xref: /reactos/drivers/filesystems/btrfs/write.c (revision 3edf37e2)
1 /* Copyright (c) Mark Harmstone 2016-17
2  *
3  * This file is part of WinBtrfs.
4  *
5  * WinBtrfs is free software: you can redistribute it and/or modify
6  * it under the terms of the GNU Lesser General Public Licence as published by
7  * the Free Software Foundation, either version 3 of the Licence, or
8  * (at your option) any later version.
9  *
10  * WinBtrfs is distributed in the hope that it will be useful,
11  * but WITHOUT ANY WARRANTY; without even the implied warranty of
12  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
13  * GNU Lesser General Public Licence for more details.
14  *
15  * You should have received a copy of the GNU Lesser General Public Licence
16  * along with WinBtrfs.  If not, see <http://www.gnu.org/licenses/>. */
17 
18 #include "btrfs_drv.h"
19 
20 typedef struct {
21     uint64_t start;
22     uint64_t end;
23     uint8_t* data;
24     PMDL mdl;
25     uint64_t irp_offset;
26 } write_stripe;
27 
28 _Function_class_(IO_COMPLETION_ROUTINE)
29 static NTSTATUS __stdcall write_data_completion(PDEVICE_OBJECT DeviceObject, PIRP Irp, PVOID conptr);
30 
31 static void remove_fcb_extent(fcb* fcb, extent* ext, LIST_ENTRY* rollback);
32 
33 extern tPsUpdateDiskCounters fPsUpdateDiskCounters;
34 extern tCcCopyWriteEx fCcCopyWriteEx;
35 extern tFsRtlUpdateDiskCounters fFsRtlUpdateDiskCounters;
36 extern bool diskacc;
37 
38 bool find_data_address_in_chunk(device_extension* Vcb, chunk* c, uint64_t length, uint64_t* address) {
39     LIST_ENTRY* le;
40     space* s;
41 
42     TRACE("(%p, %I64x, %I64x, %p)\n", Vcb, c->offset, length, address);
43 
44     if (length > c->chunk_item->size - c->used)
45         return false;
46 
47     if (!c->cache_loaded) {
48         NTSTATUS Status = load_cache_chunk(Vcb, c, NULL);
49 
50         if (!NT_SUCCESS(Status)) {
51             ERR("load_cache_chunk returned %08x\n", Status);
52             return false;
53         }
54     }
55 
56     if (IsListEmpty(&c->space_size))
57         return false;
58 
59     le = c->space_size.Flink;
60     while (le != &c->space_size) {
61         s = CONTAINING_RECORD(le, space, list_entry_size);
62 
63         if (s->size == length) {
64             *address = s->address;
65             return true;
66         } else if (s->size < length) {
67             if (le == c->space_size.Flink)
68                 return false;
69 
70             s = CONTAINING_RECORD(le->Blink, space, list_entry_size);
71 
72             *address = s->address;
73             return true;
74         }
75 
76         le = le->Flink;
77     }
78 
79     s = CONTAINING_RECORD(c->space_size.Blink, space, list_entry_size);
80 
81     if (s->size > length) {
82         *address = s->address;
83         return true;
84     }
85 
86     return false;
87 }
88 
89 chunk* get_chunk_from_address(device_extension* Vcb, uint64_t address) {
90     LIST_ENTRY* le2;
91 
92     ExAcquireResourceSharedLite(&Vcb->chunk_lock, true);
93 
94     le2 = Vcb->chunks.Flink;
95     while (le2 != &Vcb->chunks) {
96         chunk* c = CONTAINING_RECORD(le2, chunk, list_entry);
97 
98         if (address >= c->offset && address < c->offset + c->chunk_item->size) {
99             ExReleaseResourceLite(&Vcb->chunk_lock);
100             return c;
101         }
102 
103         le2 = le2->Flink;
104     }
105 
106     ExReleaseResourceLite(&Vcb->chunk_lock);
107 
108     return NULL;
109 }
110 
111 typedef struct {
112     space* dh;
113     device* device;
114 } stripe;
115 
116 static uint64_t find_new_chunk_address(device_extension* Vcb, uint64_t size) {
117     uint64_t lastaddr;
118     LIST_ENTRY* le;
119 
120     lastaddr = 0xc00000;
121 
122     le = Vcb->chunks.Flink;
123     while (le != &Vcb->chunks) {
124         chunk* c = CONTAINING_RECORD(le, chunk, list_entry);
125 
126         if (c->offset >= lastaddr + size)
127             return lastaddr;
128 
129         lastaddr = c->offset + c->chunk_item->size;
130 
131         le = le->Flink;
132     }
133 
134     return lastaddr;
135 }
136 
137 static bool find_new_dup_stripes(device_extension* Vcb, stripe* stripes, uint64_t max_stripe_size, bool full_size) {
138     uint64_t devusage = 0xffffffffffffffff;
139     space *devdh1 = NULL, *devdh2 = NULL;
140     LIST_ENTRY* le;
141     device* dev2 = NULL;
142 
143     le = Vcb->devices.Flink;
144 
145     while (le != &Vcb->devices) {
146         device* dev = CONTAINING_RECORD(le, device, list_entry);
147 
148         if (!dev->readonly && !dev->reloc && dev->devobj) {
149             uint64_t usage = (dev->devitem.bytes_used * 4096) / dev->devitem.num_bytes;
150 
151             // favour devices which have been used the least
152             if (usage < devusage) {
153                 if (!IsListEmpty(&dev->space)) {
154                     LIST_ENTRY* le2;
155                     space *dh1 = NULL, *dh2 = NULL;
156 
157                     le2 = dev->space.Flink;
158                     while (le2 != &dev->space) {
159                         space* dh = CONTAINING_RECORD(le2, space, list_entry);
160 
161                         if (dh->size >= max_stripe_size && (!dh1 || !dh2 || dh->size < dh1->size)) {
162                             dh2 = dh1;
163                             dh1 = dh;
164                         }
165 
166                         le2 = le2->Flink;
167                     }
168 
169                     if (dh1 && (dh2 || dh1->size >= 2 * max_stripe_size)) {
170                         dev2 = dev;
171                         devusage = usage;
172                         devdh1 = dh1;
173                         devdh2 = dh2 ? dh2 : dh1;
174                     }
175                 }
176             }
177         }
178 
179         le = le->Flink;
180     }
181 
182     if (!devdh1) {
183         uint64_t size = 0;
184 
185         // Can't find hole of at least max_stripe_size; look for the largest one we can find
186 
187         if (full_size)
188             return false;
189 
190         le = Vcb->devices.Flink;
191         while (le != &Vcb->devices) {
192             device* dev = CONTAINING_RECORD(le, device, list_entry);
193 
194             if (!dev->readonly && !dev->reloc) {
195                 if (!IsListEmpty(&dev->space)) {
196                     LIST_ENTRY* le2;
197                     space *dh1 = NULL, *dh2 = NULL;
198 
199                     le2 = dev->space.Flink;
200                     while (le2 != &dev->space) {
201                         space* dh = CONTAINING_RECORD(le2, space, list_entry);
202 
203                         if (!dh1 || !dh2 || dh->size < dh1->size) {
204                             dh2 = dh1;
205                             dh1 = dh;
206                         }
207 
208                         le2 = le2->Flink;
209                     }
210 
211                     if (dh1) {
212                         uint64_t devsize;
213 
214                         if (dh2)
215                             devsize = max(dh1->size / 2, min(dh1->size, dh2->size));
216                         else
217                             devsize = dh1->size / 2;
218 
219                         if (devsize > size) {
220                             dev2 = dev;
221                             devdh1 = dh1;
222 
223                             if (dh2 && min(dh1->size, dh2->size) > dh1->size / 2)
224                                 devdh2 = dh2;
225                             else
226                                 devdh2 = dh1;
227 
228                             size = devsize;
229                         }
230                     }
231                 }
232             }
233 
234             le = le->Flink;
235         }
236 
237         if (!devdh1)
238             return false;
239     }
240 
241     stripes[0].device = stripes[1].device = dev2;
242     stripes[0].dh = devdh1;
243     stripes[1].dh = devdh2;
244 
245     return true;
246 }
247 
248 static bool find_new_stripe(device_extension* Vcb, stripe* stripes, uint16_t i, uint64_t max_stripe_size, bool allow_missing, bool full_size) {
249     uint64_t k, devusage = 0xffffffffffffffff;
250     space* devdh = NULL;
251     LIST_ENTRY* le;
252     device* dev2 = NULL;
253 
254     le = Vcb->devices.Flink;
255     while (le != &Vcb->devices) {
256         device* dev = CONTAINING_RECORD(le, device, list_entry);
257         uint64_t usage;
258         bool skip = false;
259 
260         if (dev->readonly || dev->reloc || (!dev->devobj && !allow_missing)) {
261             le = le->Flink;
262             continue;
263         }
264 
265         // skip this device if it already has a stripe
266         if (i > 0) {
267             for (k = 0; k < i; k++) {
268                 if (stripes[k].device == dev) {
269                     skip = true;
270                     break;
271                 }
272             }
273         }
274 
275         if (!skip) {
276             usage = (dev->devitem.bytes_used * 4096) / dev->devitem.num_bytes;
277 
278             // favour devices which have been used the least
279             if (usage < devusage) {
280                 if (!IsListEmpty(&dev->space)) {
281                     LIST_ENTRY* le2;
282 
283                     le2 = dev->space.Flink;
284                     while (le2 != &dev->space) {
285                         space* dh = CONTAINING_RECORD(le2, space, list_entry);
286 
287                         if ((dev2 != dev && dh->size >= max_stripe_size) ||
288                             (dev2 == dev && dh->size >= max_stripe_size && dh->size < devdh->size)
289                         ) {
290                             devdh = dh;
291                             dev2 = dev;
292                             devusage = usage;
293                         }
294 
295                         le2 = le2->Flink;
296                     }
297                 }
298             }
299         }
300 
301         le = le->Flink;
302     }
303 
304     if (!devdh) {
305         // Can't find hole of at least max_stripe_size; look for the largest one we can find
306 
307         if (full_size)
308             return false;
309 
310         le = Vcb->devices.Flink;
311         while (le != &Vcb->devices) {
312             device* dev = CONTAINING_RECORD(le, device, list_entry);
313             bool skip = false;
314 
315             if (dev->readonly || dev->reloc || (!dev->devobj && !allow_missing)) {
316                 le = le->Flink;
317                 continue;
318             }
319 
320             // skip this device if it already has a stripe
321             if (i > 0) {
322                 for (k = 0; k < i; k++) {
323                     if (stripes[k].device == dev) {
324                         skip = true;
325                         break;
326                     }
327                 }
328             }
329 
330             if (!skip) {
331                 if (!IsListEmpty(&dev->space)) {
332                     LIST_ENTRY* le2;
333 
334                     le2 = dev->space.Flink;
335                     while (le2 != &dev->space) {
336                         space* dh = CONTAINING_RECORD(le2, space, list_entry);
337 
338                         if (!devdh || devdh->size < dh->size) {
339                             devdh = dh;
340                             dev2 = dev;
341                         }
342 
343                         le2 = le2->Flink;
344                     }
345                 }
346             }
347 
348             le = le->Flink;
349         }
350 
351         if (!devdh)
352             return false;
353     }
354 
355     stripes[i].dh = devdh;
356     stripes[i].device = dev2;
357 
358     return true;
359 }
360 
361 NTSTATUS alloc_chunk(device_extension* Vcb, uint64_t flags, chunk** pc, bool full_size) {
362     NTSTATUS Status;
363     uint64_t max_stripe_size, max_chunk_size, stripe_size, stripe_length, factor;
364     uint64_t total_size = 0, logaddr;
365     uint16_t i, type, num_stripes, sub_stripes, max_stripes, min_stripes, allowed_missing;
366     stripe* stripes = NULL;
367     uint16_t cisize;
368     CHUNK_ITEM_STRIPE* cis;
369     chunk* c = NULL;
370     space* s = NULL;
371     LIST_ENTRY* le;
372 
373     le = Vcb->devices.Flink;
374     while (le != &Vcb->devices) {
375         device* dev = CONTAINING_RECORD(le, device, list_entry);
376         total_size += dev->devitem.num_bytes;
377 
378         le = le->Flink;
379     }
380 
381     TRACE("total_size = %I64x\n", total_size);
382 
383     // We purposely check for DATA first - mixed blocks have the same size
384     // as DATA ones.
385     if (flags & BLOCK_FLAG_DATA) {
386         max_stripe_size = 0x40000000; // 1 GB
387         max_chunk_size = 10 * max_stripe_size;
388     } else if (flags & BLOCK_FLAG_METADATA) {
389         if (total_size > 0xC80000000) // 50 GB
390             max_stripe_size = 0x40000000; // 1 GB
391         else
392             max_stripe_size = 0x10000000; // 256 MB
393 
394         max_chunk_size = max_stripe_size;
395     } else if (flags & BLOCK_FLAG_SYSTEM) {
396         max_stripe_size = 0x2000000; // 32 MB
397         max_chunk_size = 2 * max_stripe_size;
398     } else {
399         ERR("unknown chunk type\n");
400         return STATUS_INTERNAL_ERROR;
401     }
402 
403     if (flags & BLOCK_FLAG_DUPLICATE) {
404         min_stripes = 2;
405         max_stripes = 2;
406         sub_stripes = 0;
407         type = BLOCK_FLAG_DUPLICATE;
408         allowed_missing = 0;
409     } else if (flags & BLOCK_FLAG_RAID0) {
410         min_stripes = 2;
411         max_stripes = (uint16_t)min(0xffff, Vcb->superblock.num_devices);
412         sub_stripes = 0;
413         type = BLOCK_FLAG_RAID0;
414         allowed_missing = 0;
415     } else if (flags & BLOCK_FLAG_RAID1) {
416         min_stripes = 2;
417         max_stripes = 2;
418         sub_stripes = 1;
419         type = BLOCK_FLAG_RAID1;
420         allowed_missing = 1;
421     } else if (flags & BLOCK_FLAG_RAID10) {
422         min_stripes = 4;
423         max_stripes = (uint16_t)min(0xffff, Vcb->superblock.num_devices);
424         sub_stripes = 2;
425         type = BLOCK_FLAG_RAID10;
426         allowed_missing = 1;
427     } else if (flags & BLOCK_FLAG_RAID5) {
428         min_stripes = 3;
429         max_stripes = (uint16_t)min(0xffff, Vcb->superblock.num_devices);
430         sub_stripes = 1;
431         type = BLOCK_FLAG_RAID5;
432         allowed_missing = 1;
433     } else if (flags & BLOCK_FLAG_RAID6) {
434         min_stripes = 4;
435         max_stripes = 257;
436         sub_stripes = 1;
437         type = BLOCK_FLAG_RAID6;
438         allowed_missing = 2;
439     } else { // SINGLE
440         min_stripes = 1;
441         max_stripes = 1;
442         sub_stripes = 1;
443         type = 0;
444         allowed_missing = 0;
445     }
446 
447     if (max_chunk_size > total_size / 10) {  // cap at 10%
448         max_chunk_size = total_size / 10;
449         max_stripe_size = max_chunk_size / min_stripes;
450     }
451 
452     TRACE("would allocate a new chunk of %I64x bytes and stripe %I64x\n", max_chunk_size, max_stripe_size);
453 
454     stripes = ExAllocatePoolWithTag(PagedPool, sizeof(stripe) * max_stripes, ALLOC_TAG);
455     if (!stripes) {
456         ERR("out of memory\n");
457         Status = STATUS_INSUFFICIENT_RESOURCES;
458         goto end;
459     }
460 
461     num_stripes = 0;
462 
463     if (type == BLOCK_FLAG_DUPLICATE) {
464         if (!find_new_dup_stripes(Vcb, stripes, max_stripe_size, full_size)) {
465             Status = STATUS_DISK_FULL;
466             goto end;
467         }
468         else
469             num_stripes = max_stripes;
470     } else {
471         for (i = 0; i < max_stripes; i++) {
472             if (!find_new_stripe(Vcb, stripes, i, max_stripe_size, false, full_size))
473                 break;
474             else
475                 num_stripes++;
476         }
477     }
478 
479     if (num_stripes < min_stripes && Vcb->options.allow_degraded && allowed_missing > 0) {
480         uint16_t added_missing = 0;
481 
482         for (i = num_stripes; i < max_stripes; i++) {
483             if (!find_new_stripe(Vcb, stripes, i, max_stripe_size, true, full_size))
484                 break;
485             else {
486                 added_missing++;
487                 if (added_missing >= allowed_missing)
488                     break;
489             }
490         }
491 
492         num_stripes += added_missing;
493     }
494 
495     // for RAID10, round down to an even number of stripes
496     if (type == BLOCK_FLAG_RAID10 && (num_stripes % sub_stripes) != 0) {
497         num_stripes -= num_stripes % sub_stripes;
498     }
499 
500     if (num_stripes < min_stripes) {
501         WARN("found %u stripes, needed at least %u\n", num_stripes, min_stripes);
502         Status = STATUS_DISK_FULL;
503         goto end;
504     }
505 
506     c = ExAllocatePoolWithTag(NonPagedPool, sizeof(chunk), ALLOC_TAG);
507     if (!c) {
508         ERR("out of memory\n");
509         Status = STATUS_INSUFFICIENT_RESOURCES;
510         goto end;
511     }
512 
513     c->devices = NULL;
514 
515     cisize = sizeof(CHUNK_ITEM) + (num_stripes * sizeof(CHUNK_ITEM_STRIPE));
516     c->chunk_item = ExAllocatePoolWithTag(NonPagedPool, cisize, ALLOC_TAG);
517     if (!c->chunk_item) {
518         ERR("out of memory\n");
519         Status = STATUS_INSUFFICIENT_RESOURCES;
520         goto end;
521     }
522 
523     stripe_length = 0x10000; // FIXME? BTRFS_STRIPE_LEN in kernel
524 
525     if (type == BLOCK_FLAG_DUPLICATE && stripes[1].dh == stripes[0].dh)
526         stripe_size = min(stripes[0].dh->size / 2, max_stripe_size);
527     else {
528         stripe_size = max_stripe_size;
529         for (i = 0; i < num_stripes; i++) {
530             if (stripes[i].dh->size < stripe_size)
531                 stripe_size = stripes[i].dh->size;
532         }
533     }
534 
535     if (type == 0 || type == BLOCK_FLAG_DUPLICATE || type == BLOCK_FLAG_RAID1)
536         factor = 1;
537     else if (type == BLOCK_FLAG_RAID0)
538         factor = num_stripes;
539     else if (type == BLOCK_FLAG_RAID10)
540         factor = num_stripes / sub_stripes;
541     else if (type == BLOCK_FLAG_RAID5)
542         factor = num_stripes - 1;
543     else if (type == BLOCK_FLAG_RAID6)
544         factor = num_stripes - 2;
545 
546     if (stripe_size * factor > max_chunk_size)
547         stripe_size = max_chunk_size / factor;
548 
549     if (stripe_size % stripe_length > 0)
550         stripe_size -= stripe_size % stripe_length;
551 
552     if (stripe_size == 0) {
553         ERR("not enough free space found (stripe_size == 0)\n");
554         Status = STATUS_DISK_FULL;
555         goto end;
556     }
557 
558     c->chunk_item->size = stripe_size * factor;
559     c->chunk_item->root_id = Vcb->extent_root->id;
560     c->chunk_item->stripe_length = stripe_length;
561     c->chunk_item->type = flags;
562     c->chunk_item->opt_io_alignment = (uint32_t)c->chunk_item->stripe_length;
563     c->chunk_item->opt_io_width = (uint32_t)c->chunk_item->stripe_length;
564     c->chunk_item->sector_size = stripes[0].device->devitem.minimal_io_size;
565     c->chunk_item->num_stripes = num_stripes;
566     c->chunk_item->sub_stripes = sub_stripes;
567 
568     c->devices = ExAllocatePoolWithTag(NonPagedPool, sizeof(device*) * num_stripes, ALLOC_TAG);
569     if (!c->devices) {
570         ERR("out of memory\n");
571         Status = STATUS_INSUFFICIENT_RESOURCES;
572         goto end;
573     }
574 
575     cis = (CHUNK_ITEM_STRIPE*)&c->chunk_item[1];
576     for (i = 0; i < num_stripes; i++) {
577         cis[i].dev_id = stripes[i].device->devitem.dev_id;
578 
579         if (type == BLOCK_FLAG_DUPLICATE && i == 1 && stripes[i].dh == stripes[0].dh)
580             cis[i].offset = stripes[0].dh->address + stripe_size;
581         else
582             cis[i].offset = stripes[i].dh->address;
583 
584         cis[i].dev_uuid = stripes[i].device->devitem.device_uuid;
585 
586         c->devices[i] = stripes[i].device;
587     }
588 
589     logaddr = find_new_chunk_address(Vcb, c->chunk_item->size);
590 
591     Vcb->superblock.chunk_root_generation = Vcb->superblock.generation;
592 
593     c->size = cisize;
594     c->offset = logaddr;
595     c->used = c->oldused = 0;
596     c->cache = c->old_cache = NULL;
597     c->readonly = false;
598     c->reloc = false;
599     c->last_alloc_set = false;
600     c->last_stripe = 0;
601     c->cache_loaded = true;
602     c->changed = false;
603     c->space_changed = false;
604     c->balance_num = 0;
605 
606     InitializeListHead(&c->space);
607     InitializeListHead(&c->space_size);
608     InitializeListHead(&c->deleting);
609     InitializeListHead(&c->changed_extents);
610 
611     InitializeListHead(&c->range_locks);
612     ExInitializeResourceLite(&c->range_locks_lock);
613     KeInitializeEvent(&c->range_locks_event, NotificationEvent, false);
614 
615     InitializeListHead(&c->partial_stripes);
616     ExInitializeResourceLite(&c->partial_stripes_lock);
617 
618     ExInitializeResourceLite(&c->lock);
619     ExInitializeResourceLite(&c->changed_extents_lock);
620 
621     s = ExAllocatePoolWithTag(NonPagedPool, sizeof(space), ALLOC_TAG);
622     if (!s) {
623         ERR("out of memory\n");
624         Status = STATUS_INSUFFICIENT_RESOURCES;
625         goto end;
626     }
627 
628     s->address = c->offset;
629     s->size = c->chunk_item->size;
630     InsertTailList(&c->space, &s->list_entry);
631     InsertTailList(&c->space_size, &s->list_entry_size);
632 
633     protect_superblocks(c);
634 
635     for (i = 0; i < num_stripes; i++) {
636         stripes[i].device->devitem.bytes_used += stripe_size;
637 
638         space_list_subtract2(&stripes[i].device->space, NULL, cis[i].offset, stripe_size, NULL, NULL);
639     }
640 
641     Status = STATUS_SUCCESS;
642 
643     if (flags & BLOCK_FLAG_RAID5 || flags & BLOCK_FLAG_RAID6)
644         Vcb->superblock.incompat_flags |= BTRFS_INCOMPAT_FLAGS_RAID56;
645 
646 end:
647     if (stripes)
648         ExFreePool(stripes);
649 
650     if (!NT_SUCCESS(Status)) {
651         if (c) {
652             if (c->devices)
653                 ExFreePool(c->devices);
654 
655             if (c->chunk_item)
656                 ExFreePool(c->chunk_item);
657 
658             ExFreePool(c);
659         }
660 
661         if (s) ExFreePool(s);
662     } else {
663         bool done = false;
664 
665         le = Vcb->chunks.Flink;
666         while (le != &Vcb->chunks) {
667             chunk* c2 = CONTAINING_RECORD(le, chunk, list_entry);
668 
669             if (c2->offset > c->offset) {
670                 InsertHeadList(le->Blink, &c->list_entry);
671                 done = true;
672                 break;
673             }
674 
675             le = le->Flink;
676         }
677 
678         if (!done)
679             InsertTailList(&Vcb->chunks, &c->list_entry);
680 
681         c->created = true;
682         c->changed = true;
683         c->space_changed = true;
684         c->list_entry_balance.Flink = NULL;
685 
686         *pc = c;
687     }
688 
689     return Status;
690 }
691 
692 static NTSTATUS prepare_raid0_write(_Pre_satisfies_(_Curr_->chunk_item->num_stripes>0) _In_ chunk* c, _In_ uint64_t address, _In_reads_bytes_(length) void* data,
693                                     _In_ uint32_t length, _In_ write_stripe* stripes, _In_ PIRP Irp, _In_ uint64_t irp_offset, _In_ write_data_context* wtc) {
694     uint64_t startoff, endoff;
695     uint16_t startoffstripe, endoffstripe, stripenum;
696     uint64_t pos, *stripeoff;
697     uint32_t i;
698     bool file_write = Irp && Irp->MdlAddress && (Irp->MdlAddress->ByteOffset == 0);
699     PMDL master_mdl;
700     PFN_NUMBER* pfns;
701 
702     stripeoff = ExAllocatePoolWithTag(PagedPool, sizeof(uint64_t) * c->chunk_item->num_stripes, ALLOC_TAG);
703     if (!stripeoff) {
704         ERR("out of memory\n");
705         return STATUS_INSUFFICIENT_RESOURCES;
706     }
707 
708     get_raid0_offset(address - c->offset, c->chunk_item->stripe_length, c->chunk_item->num_stripes, &startoff, &startoffstripe);
709     get_raid0_offset(address + length - c->offset - 1, c->chunk_item->stripe_length, c->chunk_item->num_stripes, &endoff, &endoffstripe);
710 
711     if (file_write) {
712         master_mdl = Irp->MdlAddress;
713 
714         pfns = (PFN_NUMBER*)(Irp->MdlAddress + 1);
715         pfns = &pfns[irp_offset >> PAGE_SHIFT];
716     } else if (((ULONG_PTR)data % PAGE_SIZE) != 0) {
717         wtc->scratch = ExAllocatePoolWithTag(NonPagedPool, length, ALLOC_TAG);
718         if (!wtc->scratch) {
719             ERR("out of memory\n");
720             return STATUS_INSUFFICIENT_RESOURCES;
721         }
722 
723         RtlCopyMemory(wtc->scratch, data, length);
724 
725         master_mdl = IoAllocateMdl(wtc->scratch, length, false, false, NULL);
726         if (!master_mdl) {
727             ERR("out of memory\n");
728             return STATUS_INSUFFICIENT_RESOURCES;
729         }
730 
731         MmBuildMdlForNonPagedPool(master_mdl);
732 
733         wtc->mdl = master_mdl;
734 
735         pfns = (PFN_NUMBER*)(master_mdl + 1);
736     } else {
737         NTSTATUS Status = STATUS_SUCCESS;
738 
739         master_mdl = IoAllocateMdl(data, length, false, false, NULL);
740         if (!master_mdl) {
741             ERR("out of memory\n");
742             return STATUS_INSUFFICIENT_RESOURCES;
743         }
744 
745         _SEH2_TRY {
746             MmProbeAndLockPages(master_mdl, KernelMode, IoReadAccess);
747         } _SEH2_EXCEPT (EXCEPTION_EXECUTE_HANDLER) {
748             Status = _SEH2_GetExceptionCode();
749         } _SEH2_END;
750 
751         if (!NT_SUCCESS(Status)) {
752             ERR("MmProbeAndLockPages threw exception %08x\n", Status);
753             IoFreeMdl(master_mdl);
754             return Status;
755         }
756 
757         wtc->mdl = master_mdl;
758 
759         pfns = (PFN_NUMBER*)(master_mdl + 1);
760     }
761 
762     for (i = 0; i < c->chunk_item->num_stripes; i++) {
763         if (startoffstripe > i)
764             stripes[i].start = startoff - (startoff % c->chunk_item->stripe_length) + c->chunk_item->stripe_length;
765         else if (startoffstripe == i)
766             stripes[i].start = startoff;
767         else
768             stripes[i].start = startoff - (startoff % c->chunk_item->stripe_length);
769 
770         if (endoffstripe > i)
771             stripes[i].end = endoff - (endoff % c->chunk_item->stripe_length) + c->chunk_item->stripe_length;
772         else if (endoffstripe == i)
773             stripes[i].end = endoff + 1;
774         else
775             stripes[i].end = endoff - (endoff % c->chunk_item->stripe_length);
776 
777         if (stripes[i].start != stripes[i].end) {
778             stripes[i].mdl = IoAllocateMdl(NULL, (ULONG)(stripes[i].end - stripes[i].start), false, false, NULL);
779             if (!stripes[i].mdl) {
780                 ERR("IoAllocateMdl failed\n");
781                 ExFreePool(stripeoff);
782                 return STATUS_INSUFFICIENT_RESOURCES;
783             }
784         }
785     }
786 
787     pos = 0;
788     RtlZeroMemory(stripeoff, sizeof(uint64_t) * c->chunk_item->num_stripes);
789 
790     stripenum = startoffstripe;
791 
792     while (pos < length) {
793         PFN_NUMBER* stripe_pfns = (PFN_NUMBER*)(stripes[stripenum].mdl + 1);
794 
795         if (pos == 0) {
796             uint32_t writelen = (uint32_t)min(stripes[stripenum].end - stripes[stripenum].start,
797                                           c->chunk_item->stripe_length - (stripes[stripenum].start % c->chunk_item->stripe_length));
798 
799             RtlCopyMemory(stripe_pfns, pfns, writelen * sizeof(PFN_NUMBER) >> PAGE_SHIFT);
800 
801             stripeoff[stripenum] += writelen;
802             pos += writelen;
803         } else if (length - pos < c->chunk_item->stripe_length) {
804             RtlCopyMemory(&stripe_pfns[stripeoff[stripenum] >> PAGE_SHIFT], &pfns[pos >> PAGE_SHIFT], (ULONG)((length - pos) * sizeof(PFN_NUMBER) >> PAGE_SHIFT));
805             break;
806         } else {
807             RtlCopyMemory(&stripe_pfns[stripeoff[stripenum] >> PAGE_SHIFT], &pfns[pos >> PAGE_SHIFT], (ULONG)(c->chunk_item->stripe_length * sizeof(PFN_NUMBER) >> PAGE_SHIFT));
808 
809             stripeoff[stripenum] += c->chunk_item->stripe_length;
810             pos += c->chunk_item->stripe_length;
811         }
812 
813         stripenum = (stripenum + 1) % c->chunk_item->num_stripes;
814     }
815 
816     ExFreePool(stripeoff);
817 
818     return STATUS_SUCCESS;
819 }
820 
821 static NTSTATUS prepare_raid10_write(_Pre_satisfies_(_Curr_->chunk_item->sub_stripes>0&&_Curr_->chunk_item->num_stripes>=_Curr_->chunk_item->sub_stripes) _In_ chunk* c,
822                                      _In_ uint64_t address, _In_reads_bytes_(length) void* data, _In_ uint32_t length, _In_ write_stripe* stripes,
823                                      _In_ PIRP Irp, _In_ uint64_t irp_offset, _In_ write_data_context* wtc) {
824     uint64_t startoff, endoff;
825     uint16_t startoffstripe, endoffstripe, stripenum;
826     uint64_t pos, *stripeoff;
827     uint32_t i;
828     bool file_write = Irp && Irp->MdlAddress && (Irp->MdlAddress->ByteOffset == 0);
829     PMDL master_mdl;
830     PFN_NUMBER* pfns;
831 
832     get_raid0_offset(address - c->offset, c->chunk_item->stripe_length, c->chunk_item->num_stripes / c->chunk_item->sub_stripes, &startoff, &startoffstripe);
833     get_raid0_offset(address + length - c->offset - 1, c->chunk_item->stripe_length, c->chunk_item->num_stripes / c->chunk_item->sub_stripes, &endoff, &endoffstripe);
834 
835     stripenum = startoffstripe;
836     startoffstripe *= c->chunk_item->sub_stripes;
837     endoffstripe *= c->chunk_item->sub_stripes;
838 
839     if (file_write) {
840         master_mdl = Irp->MdlAddress;
841 
842         pfns = (PFN_NUMBER*)(Irp->MdlAddress + 1);
843         pfns = &pfns[irp_offset >> PAGE_SHIFT];
844     } else if (((ULONG_PTR)data % PAGE_SIZE) != 0) {
845         wtc->scratch = ExAllocatePoolWithTag(NonPagedPool, length, ALLOC_TAG);
846         if (!wtc->scratch) {
847             ERR("out of memory\n");
848             return STATUS_INSUFFICIENT_RESOURCES;
849         }
850 
851         RtlCopyMemory(wtc->scratch, data, length);
852 
853         master_mdl = IoAllocateMdl(wtc->scratch, length, false, false, NULL);
854         if (!master_mdl) {
855             ERR("out of memory\n");
856             return STATUS_INSUFFICIENT_RESOURCES;
857         }
858 
859         MmBuildMdlForNonPagedPool(master_mdl);
860 
861         wtc->mdl = master_mdl;
862 
863         pfns = (PFN_NUMBER*)(master_mdl + 1);
864     } else {
865         NTSTATUS Status = STATUS_SUCCESS;
866 
867         master_mdl = IoAllocateMdl(data, length, false, false, NULL);
868         if (!master_mdl) {
869             ERR("out of memory\n");
870             return STATUS_INSUFFICIENT_RESOURCES;
871         }
872 
873         _SEH2_TRY {
874             MmProbeAndLockPages(master_mdl, KernelMode, IoReadAccess);
875         } _SEH2_EXCEPT (EXCEPTION_EXECUTE_HANDLER) {
876             Status = _SEH2_GetExceptionCode();
877         } _SEH2_END;
878 
879         if (!NT_SUCCESS(Status)) {
880             ERR("MmProbeAndLockPages threw exception %08x\n", Status);
881             IoFreeMdl(master_mdl);
882             return Status;
883         }
884 
885         wtc->mdl = master_mdl;
886 
887         pfns = (PFN_NUMBER*)(master_mdl + 1);
888     }
889 
890     for (i = 0; i < c->chunk_item->num_stripes; i += c->chunk_item->sub_stripes) {
891         uint16_t j;
892 
893         if (startoffstripe > i)
894             stripes[i].start = startoff - (startoff % c->chunk_item->stripe_length) + c->chunk_item->stripe_length;
895         else if (startoffstripe == i)
896             stripes[i].start = startoff;
897         else
898             stripes[i].start = startoff - (startoff % c->chunk_item->stripe_length);
899 
900         if (endoffstripe > i)
901             stripes[i].end = endoff - (endoff % c->chunk_item->stripe_length) + c->chunk_item->stripe_length;
902         else if (endoffstripe == i)
903             stripes[i].end = endoff + 1;
904         else
905             stripes[i].end = endoff - (endoff % c->chunk_item->stripe_length);
906 
907         stripes[i].mdl = IoAllocateMdl(NULL, (ULONG)(stripes[i].end - stripes[i].start), false, false, NULL);
908         if (!stripes[i].mdl) {
909             ERR("IoAllocateMdl failed\n");
910             return STATUS_INSUFFICIENT_RESOURCES;
911         }
912 
913         for (j = 1; j < c->chunk_item->sub_stripes; j++) {
914             stripes[i+j].start = stripes[i].start;
915             stripes[i+j].end = stripes[i].end;
916             stripes[i+j].data = stripes[i].data;
917             stripes[i+j].mdl = stripes[i].mdl;
918         }
919     }
920 
921     pos = 0;
922 
923     stripeoff = ExAllocatePoolWithTag(PagedPool, sizeof(uint64_t) * c->chunk_item->num_stripes / c->chunk_item->sub_stripes, ALLOC_TAG);
924     if (!stripeoff) {
925         ERR("out of memory\n");
926         return STATUS_INSUFFICIENT_RESOURCES;
927     }
928 
929     RtlZeroMemory(stripeoff, sizeof(uint64_t) * c->chunk_item->num_stripes / c->chunk_item->sub_stripes);
930 
931     while (pos < length) {
932         PFN_NUMBER* stripe_pfns = (PFN_NUMBER*)(stripes[stripenum * c->chunk_item->sub_stripes].mdl + 1);
933 
934         if (pos == 0) {
935             uint32_t writelen = (uint32_t)min(stripes[stripenum * c->chunk_item->sub_stripes].end - stripes[stripenum * c->chunk_item->sub_stripes].start,
936                                           c->chunk_item->stripe_length - (stripes[stripenum * c->chunk_item->sub_stripes].start % c->chunk_item->stripe_length));
937 
938             RtlCopyMemory(stripe_pfns, pfns, writelen * sizeof(PFN_NUMBER) >> PAGE_SHIFT);
939 
940             stripeoff[stripenum] += writelen;
941             pos += writelen;
942         } else if (length - pos < c->chunk_item->stripe_length) {
943             RtlCopyMemory(&stripe_pfns[stripeoff[stripenum] >> PAGE_SHIFT], &pfns[pos >> PAGE_SHIFT], (ULONG)((length - pos) * sizeof(PFN_NUMBER) >> PAGE_SHIFT));
944             break;
945         } else {
946             RtlCopyMemory(&stripe_pfns[stripeoff[stripenum] >> PAGE_SHIFT], &pfns[pos >> PAGE_SHIFT], (ULONG)(c->chunk_item->stripe_length * sizeof(PFN_NUMBER) >> PAGE_SHIFT));
947 
948             stripeoff[stripenum] += c->chunk_item->stripe_length;
949             pos += c->chunk_item->stripe_length;
950         }
951 
952         stripenum = (stripenum + 1) % (c->chunk_item->num_stripes / c->chunk_item->sub_stripes);
953     }
954 
955     ExFreePool(stripeoff);
956 
957     return STATUS_SUCCESS;
958 }
959 
960 static NTSTATUS add_partial_stripe(device_extension* Vcb, chunk *c, uint64_t address, uint32_t length, void* data) {
961     NTSTATUS Status;
962     LIST_ENTRY* le;
963     partial_stripe* ps;
964     uint64_t stripe_addr;
965     uint16_t num_data_stripes;
966 
967     num_data_stripes = c->chunk_item->num_stripes - (c->chunk_item->type & BLOCK_FLAG_RAID5 ? 1 : 2);
968     stripe_addr = address - ((address - c->offset) % (num_data_stripes * c->chunk_item->stripe_length));
969 
970     ExAcquireResourceExclusiveLite(&c->partial_stripes_lock, true);
971 
972     le = c->partial_stripes.Flink;
973     while (le != &c->partial_stripes) {
974         ps = CONTAINING_RECORD(le, partial_stripe, list_entry);
975 
976         if (ps->address == stripe_addr) {
977             // update existing entry
978 
979             RtlCopyMemory(ps->data + address - stripe_addr, data, length);
980             RtlClearBits(&ps->bmp, (ULONG)((address - stripe_addr) / Vcb->superblock.sector_size), length / Vcb->superblock.sector_size);
981 
982             // if now filled, flush
983             if (RtlAreBitsClear(&ps->bmp, 0, (ULONG)((num_data_stripes * c->chunk_item->stripe_length) / Vcb->superblock.sector_size))) {
984                 Status = flush_partial_stripe(Vcb, c, ps);
985                 if (!NT_SUCCESS(Status)) {
986                     ERR("flush_partial_stripe returned %08x\n", Status);
987                     goto end;
988                 }
989 
990                 RemoveEntryList(&ps->list_entry);
991 
992                 if (ps->bmparr)
993                     ExFreePool(ps->bmparr);
994 
995                 ExFreePool(ps);
996             }
997 
998             Status = STATUS_SUCCESS;
999             goto end;
1000         } else if (ps->address > stripe_addr)
1001             break;
1002 
1003         le = le->Flink;
1004     }
1005 
1006     // add new entry
1007 
1008     ps = ExAllocatePoolWithTag(NonPagedPool, offsetof(partial_stripe, data[0]) + (ULONG)(num_data_stripes * c->chunk_item->stripe_length), ALLOC_TAG);
1009     if (!ps) {
1010         ERR("out of memory\n");
1011         Status = STATUS_INSUFFICIENT_RESOURCES;
1012         goto end;
1013     }
1014 
1015     ps->bmplen = (ULONG)(num_data_stripes * c->chunk_item->stripe_length) / Vcb->superblock.sector_size;
1016 
1017     ps->address = stripe_addr;
1018     ps->bmparr = ExAllocatePoolWithTag(NonPagedPool, (size_t)sector_align(((ps->bmplen / 8) + 1), sizeof(ULONG)), ALLOC_TAG);
1019     if (!ps->bmparr) {
1020         ERR("out of memory\n");
1021         ExFreePool(ps);
1022         Status = STATUS_INSUFFICIENT_RESOURCES;
1023         goto end;
1024     }
1025 
1026     RtlInitializeBitMap(&ps->bmp, ps->bmparr, ps->bmplen);
1027     RtlSetAllBits(&ps->bmp);
1028 
1029     RtlCopyMemory(ps->data + address - stripe_addr, data, length);
1030     RtlClearBits(&ps->bmp, (ULONG)((address - stripe_addr) / Vcb->superblock.sector_size), length / Vcb->superblock.sector_size);
1031 
1032     InsertHeadList(le->Blink, &ps->list_entry);
1033 
1034     Status = STATUS_SUCCESS;
1035 
1036 end:
1037     ExReleaseResourceLite(&c->partial_stripes_lock);
1038 
1039     return Status;
1040 }
1041 
1042 typedef struct {
1043     PMDL mdl;
1044     PFN_NUMBER* pfns;
1045 } log_stripe;
1046 
1047 static NTSTATUS prepare_raid5_write(device_extension* Vcb, chunk* c, uint64_t address, void* data, uint32_t length, write_stripe* stripes, PIRP Irp,
1048                                     uint64_t irp_offset, ULONG priority, write_data_context* wtc) {
1049     uint64_t startoff, endoff, parity_start, parity_end;
1050     uint16_t startoffstripe, endoffstripe, parity, num_data_stripes = c->chunk_item->num_stripes - 1;
1051     uint64_t pos, parity_pos, *stripeoff = NULL;
1052     uint32_t i;
1053     bool file_write = Irp && Irp->MdlAddress && (Irp->MdlAddress->ByteOffset == 0);
1054     PMDL master_mdl;
1055     NTSTATUS Status;
1056     PFN_NUMBER *pfns, *parity_pfns;
1057     log_stripe* log_stripes = NULL;
1058 
1059     if ((address + length - c->offset) % (num_data_stripes * c->chunk_item->stripe_length) > 0) {
1060         uint64_t delta = (address + length - c->offset) % (num_data_stripes * c->chunk_item->stripe_length);
1061 
1062         delta = min(irp_offset + length, delta);
1063         Status = add_partial_stripe(Vcb, c, address + length - delta, (uint32_t)delta, (uint8_t*)data + irp_offset + length - delta);
1064         if (!NT_SUCCESS(Status)) {
1065             ERR("add_partial_stripe returned %08x\n", Status);
1066             goto exit;
1067         }
1068 
1069         length -= (uint32_t)delta;
1070     }
1071 
1072     if (length > 0 && (address - c->offset) % (num_data_stripes * c->chunk_item->stripe_length) > 0) {
1073         uint64_t delta = (num_data_stripes * c->chunk_item->stripe_length) - ((address - c->offset) % (num_data_stripes * c->chunk_item->stripe_length));
1074 
1075         Status = add_partial_stripe(Vcb, c, address, (uint32_t)delta, (uint8_t*)data + irp_offset);
1076         if (!NT_SUCCESS(Status)) {
1077             ERR("add_partial_stripe returned %08x\n", Status);
1078             goto exit;
1079         }
1080 
1081         address += delta;
1082         length -= (uint32_t)delta;
1083         irp_offset += delta;
1084     }
1085 
1086     if (length == 0) {
1087         Status = STATUS_SUCCESS;
1088         goto exit;
1089     }
1090 
1091     get_raid0_offset(address - c->offset, c->chunk_item->stripe_length, num_data_stripes, &startoff, &startoffstripe);
1092     get_raid0_offset(address + length - c->offset - 1, c->chunk_item->stripe_length, num_data_stripes, &endoff, &endoffstripe);
1093 
1094     pos = 0;
1095     while (pos < length) {
1096         parity = (((address - c->offset + pos) / (num_data_stripes * c->chunk_item->stripe_length)) + num_data_stripes) % c->chunk_item->num_stripes;
1097 
1098         if (pos == 0) {
1099             uint16_t stripe = (parity + startoffstripe + 1) % c->chunk_item->num_stripes;
1100             ULONG skip, writelen;
1101 
1102             i = startoffstripe;
1103             while (stripe != parity) {
1104                 if (i == startoffstripe) {
1105                     writelen = (ULONG)min(length, c->chunk_item->stripe_length - (startoff % c->chunk_item->stripe_length));
1106 
1107                     stripes[stripe].start = startoff;
1108                     stripes[stripe].end = startoff + writelen;
1109 
1110                     pos += writelen;
1111 
1112                     if (pos == length)
1113                         break;
1114                 } else {
1115                     writelen = (ULONG)min(length - pos, c->chunk_item->stripe_length);
1116 
1117                     stripes[stripe].start = startoff - (startoff % c->chunk_item->stripe_length);
1118                     stripes[stripe].end = stripes[stripe].start + writelen;
1119 
1120                     pos += writelen;
1121 
1122                     if (pos == length)
1123                         break;
1124                 }
1125 
1126                 i++;
1127                 stripe = (stripe + 1) % c->chunk_item->num_stripes;
1128             }
1129 
1130             if (pos == length)
1131                 break;
1132 
1133             for (i = 0; i < startoffstripe; i++) {
1134                 stripe = (parity + i + 1) % c->chunk_item->num_stripes;
1135 
1136                 stripes[stripe].start = stripes[stripe].end = startoff - (startoff % c->chunk_item->stripe_length) + c->chunk_item->stripe_length;
1137             }
1138 
1139             stripes[parity].start = stripes[parity].end = startoff - (startoff % c->chunk_item->stripe_length) + c->chunk_item->stripe_length;
1140 
1141             if (length - pos > c->chunk_item->num_stripes * num_data_stripes * c->chunk_item->stripe_length) {
1142                 skip = (ULONG)(((length - pos) / (c->chunk_item->num_stripes * num_data_stripes * c->chunk_item->stripe_length)) - 1);
1143 
1144                 for (i = 0; i < c->chunk_item->num_stripes; i++) {
1145                     stripes[i].end += skip * c->chunk_item->num_stripes * c->chunk_item->stripe_length;
1146                 }
1147 
1148                 pos += skip * num_data_stripes * c->chunk_item->num_stripes * c->chunk_item->stripe_length;
1149             }
1150         } else if (length - pos >= c->chunk_item->stripe_length * num_data_stripes) {
1151             for (i = 0; i < c->chunk_item->num_stripes; i++) {
1152                 stripes[i].end += c->chunk_item->stripe_length;
1153             }
1154 
1155             pos += c->chunk_item->stripe_length * num_data_stripes;
1156         } else {
1157             uint16_t stripe = (parity + 1) % c->chunk_item->num_stripes;
1158 
1159             i = 0;
1160             while (stripe != parity) {
1161                 if (endoffstripe == i) {
1162                     stripes[stripe].end = endoff + 1;
1163                     break;
1164                 } else if (endoffstripe > i)
1165                     stripes[stripe].end = endoff - (endoff % c->chunk_item->stripe_length) + c->chunk_item->stripe_length;
1166 
1167                 i++;
1168                 stripe = (stripe + 1) % c->chunk_item->num_stripes;
1169             }
1170 
1171             break;
1172         }
1173     }
1174 
1175     parity_start = 0xffffffffffffffff;
1176     parity_end = 0;
1177 
1178     for (i = 0; i < c->chunk_item->num_stripes; i++) {
1179         if (stripes[i].start != 0 || stripes[i].end != 0) {
1180             parity_start = min(stripes[i].start, parity_start);
1181             parity_end = max(stripes[i].end, parity_end);
1182         }
1183     }
1184 
1185     if (parity_end == parity_start) {
1186         Status = STATUS_SUCCESS;
1187         goto exit;
1188     }
1189 
1190     parity = (((address - c->offset) / (num_data_stripes * c->chunk_item->stripe_length)) + num_data_stripes) % c->chunk_item->num_stripes;
1191     stripes[parity].start = parity_start;
1192 
1193     parity = (((address - c->offset + length - 1) / (num_data_stripes * c->chunk_item->stripe_length)) + num_data_stripes) % c->chunk_item->num_stripes;
1194     stripes[parity].end = parity_end;
1195 
1196     log_stripes = ExAllocatePoolWithTag(NonPagedPool, sizeof(log_stripe) * num_data_stripes, ALLOC_TAG);
1197     if (!log_stripes) {
1198         ERR("out of memory\n");
1199         Status = STATUS_INSUFFICIENT_RESOURCES;
1200         goto exit;
1201     }
1202 
1203     RtlZeroMemory(log_stripes, sizeof(log_stripe) * num_data_stripes);
1204 
1205     for (i = 0; i < num_data_stripes; i++) {
1206         log_stripes[i].mdl = IoAllocateMdl(NULL, (ULONG)(parity_end - parity_start), false, false, NULL);
1207         if (!log_stripes[i].mdl) {
1208             ERR("out of memory\n");
1209             Status = STATUS_INSUFFICIENT_RESOURCES;
1210             goto exit;
1211         }
1212 
1213         log_stripes[i].mdl->MdlFlags |= MDL_PARTIAL;
1214         log_stripes[i].pfns = (PFN_NUMBER*)(log_stripes[i].mdl + 1);
1215     }
1216 
1217     wtc->parity1 = ExAllocatePoolWithTag(NonPagedPool, (ULONG)(parity_end - parity_start), ALLOC_TAG);
1218     if (!wtc->parity1) {
1219         ERR("out of memory\n");
1220         Status = STATUS_INSUFFICIENT_RESOURCES;
1221         goto exit;
1222     }
1223 
1224     wtc->parity1_mdl = IoAllocateMdl(wtc->parity1, (ULONG)(parity_end - parity_start), false, false, NULL);
1225     if (!wtc->parity1_mdl) {
1226         ERR("out of memory\n");
1227         Status = STATUS_INSUFFICIENT_RESOURCES;
1228         goto exit;
1229     }
1230 
1231     MmBuildMdlForNonPagedPool(wtc->parity1_mdl);
1232 
1233     if (file_write)
1234         master_mdl = Irp->MdlAddress;
1235     else if (((ULONG_PTR)data % PAGE_SIZE) != 0) {
1236         wtc->scratch = ExAllocatePoolWithTag(NonPagedPool, length, ALLOC_TAG);
1237         if (!wtc->scratch) {
1238             ERR("out of memory\n");
1239             Status = STATUS_INSUFFICIENT_RESOURCES;
1240             goto exit;
1241         }
1242 
1243         RtlCopyMemory(wtc->scratch, (uint8_t*)data + irp_offset, length);
1244 
1245         master_mdl = IoAllocateMdl(wtc->scratch, length, false, false, NULL);
1246         if (!master_mdl) {
1247             ERR("out of memory\n");
1248             Status = STATUS_INSUFFICIENT_RESOURCES;
1249             goto exit;
1250         }
1251 
1252         MmBuildMdlForNonPagedPool(master_mdl);
1253 
1254         wtc->mdl = master_mdl;
1255     } else {
1256         master_mdl = IoAllocateMdl((uint8_t*)data + irp_offset, length, false, false, NULL);
1257         if (!master_mdl) {
1258             ERR("out of memory\n");
1259             Status = STATUS_INSUFFICIENT_RESOURCES;
1260             goto exit;
1261         }
1262 
1263         Status = STATUS_SUCCESS;
1264 
1265         _SEH2_TRY {
1266             MmProbeAndLockPages(master_mdl, KernelMode, IoReadAccess);
1267         } _SEH2_EXCEPT (EXCEPTION_EXECUTE_HANDLER) {
1268             Status = _SEH2_GetExceptionCode();
1269         } _SEH2_END;
1270 
1271         if (!NT_SUCCESS(Status)) {
1272             ERR("MmProbeAndLockPages threw exception %08x\n", Status);
1273             IoFreeMdl(master_mdl);
1274             return Status;
1275         }
1276 
1277         wtc->mdl = master_mdl;
1278     }
1279 
1280     pfns = (PFN_NUMBER*)(master_mdl + 1);
1281     parity_pfns = (PFN_NUMBER*)(wtc->parity1_mdl + 1);
1282 
1283     if (file_write)
1284         pfns = &pfns[irp_offset >> PAGE_SHIFT];
1285 
1286     for (i = 0; i < c->chunk_item->num_stripes; i++) {
1287         if (stripes[i].start != stripes[i].end) {
1288             stripes[i].mdl = IoAllocateMdl((uint8_t*)MmGetMdlVirtualAddress(master_mdl) + irp_offset, (ULONG)(stripes[i].end - stripes[i].start), false, false, NULL);
1289             if (!stripes[i].mdl) {
1290                 ERR("IoAllocateMdl failed\n");
1291                 Status = STATUS_INSUFFICIENT_RESOURCES;
1292                 goto exit;
1293             }
1294         }
1295     }
1296 
1297     stripeoff = ExAllocatePoolWithTag(PagedPool, sizeof(uint64_t) * c->chunk_item->num_stripes, ALLOC_TAG);
1298     if (!stripeoff) {
1299         ERR("out of memory\n");
1300         Status = STATUS_INSUFFICIENT_RESOURCES;
1301         goto exit;
1302     }
1303 
1304     RtlZeroMemory(stripeoff, sizeof(uint64_t) * c->chunk_item->num_stripes);
1305 
1306     pos = 0;
1307     parity_pos = 0;
1308 
1309     while (pos < length) {
1310         PFN_NUMBER* stripe_pfns;
1311 
1312         parity = (((address - c->offset + pos) / (num_data_stripes * c->chunk_item->stripe_length)) + num_data_stripes) % c->chunk_item->num_stripes;
1313 
1314         if (pos == 0) {
1315             uint16_t stripe = (parity + startoffstripe + 1) % c->chunk_item->num_stripes;
1316             uint32_t writelen = (uint32_t)min(length - pos, min(stripes[stripe].end - stripes[stripe].start,
1317                                                             c->chunk_item->stripe_length - (stripes[stripe].start % c->chunk_item->stripe_length)));
1318             uint32_t maxwritelen = writelen;
1319 
1320             stripe_pfns = (PFN_NUMBER*)(stripes[stripe].mdl + 1);
1321 
1322             RtlCopyMemory(stripe_pfns, pfns, writelen * sizeof(PFN_NUMBER) >> PAGE_SHIFT);
1323 
1324             RtlCopyMemory(log_stripes[startoffstripe].pfns, pfns, writelen * sizeof(PFN_NUMBER) >> PAGE_SHIFT);
1325             log_stripes[startoffstripe].pfns += writelen >> PAGE_SHIFT;
1326 
1327             stripeoff[stripe] = writelen;
1328             pos += writelen;
1329 
1330             stripe = (stripe + 1) % c->chunk_item->num_stripes;
1331             i = startoffstripe + 1;
1332 
1333             while (stripe != parity) {
1334                 stripe_pfns = (PFN_NUMBER*)(stripes[stripe].mdl + 1);
1335                 writelen = (uint32_t)min(length - pos, min(stripes[stripe].end - stripes[stripe].start, c->chunk_item->stripe_length));
1336 
1337                 if (writelen == 0)
1338                     break;
1339 
1340                 if (writelen > maxwritelen)
1341                     maxwritelen = writelen;
1342 
1343                 RtlCopyMemory(stripe_pfns, &pfns[pos >> PAGE_SHIFT], writelen * sizeof(PFN_NUMBER) >> PAGE_SHIFT);
1344 
1345                 RtlCopyMemory(log_stripes[i].pfns, &pfns[pos >> PAGE_SHIFT], writelen * sizeof(PFN_NUMBER) >> PAGE_SHIFT);
1346                 log_stripes[i].pfns += writelen >> PAGE_SHIFT;
1347 
1348                 stripeoff[stripe] = writelen;
1349                 pos += writelen;
1350 
1351                 stripe = (stripe + 1) % c->chunk_item->num_stripes;
1352                 i++;
1353             }
1354 
1355             stripe_pfns = (PFN_NUMBER*)(stripes[parity].mdl + 1);
1356 
1357             RtlCopyMemory(stripe_pfns, parity_pfns, maxwritelen * sizeof(PFN_NUMBER) >> PAGE_SHIFT);
1358             stripeoff[parity] = maxwritelen;
1359             parity_pos = maxwritelen;
1360         } else if (length - pos >= c->chunk_item->stripe_length * num_data_stripes) {
1361             uint16_t stripe = (parity + 1) % c->chunk_item->num_stripes;
1362 
1363             i = 0;
1364             while (stripe != parity) {
1365                 stripe_pfns = (PFN_NUMBER*)(stripes[stripe].mdl + 1);
1366 
1367                 RtlCopyMemory(&stripe_pfns[stripeoff[stripe] >> PAGE_SHIFT], &pfns[pos >> PAGE_SHIFT], (ULONG)(c->chunk_item->stripe_length * sizeof(PFN_NUMBER) >> PAGE_SHIFT));
1368 
1369                 RtlCopyMemory(log_stripes[i].pfns, &pfns[pos >> PAGE_SHIFT], (ULONG)(c->chunk_item->stripe_length * sizeof(PFN_NUMBER) >> PAGE_SHIFT));
1370                 log_stripes[i].pfns += c->chunk_item->stripe_length >> PAGE_SHIFT;
1371 
1372                 stripeoff[stripe] += c->chunk_item->stripe_length;
1373                 pos += c->chunk_item->stripe_length;
1374 
1375                 stripe = (stripe + 1) % c->chunk_item->num_stripes;
1376                 i++;
1377             }
1378 
1379             stripe_pfns = (PFN_NUMBER*)(stripes[parity].mdl + 1);
1380 
1381             RtlCopyMemory(&stripe_pfns[stripeoff[parity] >> PAGE_SHIFT], &parity_pfns[parity_pos >> PAGE_SHIFT], (ULONG)(c->chunk_item->stripe_length * sizeof(PFN_NUMBER) >> PAGE_SHIFT));
1382             stripeoff[parity] += c->chunk_item->stripe_length;
1383             parity_pos += c->chunk_item->stripe_length;
1384         } else {
1385             uint16_t stripe = (parity + 1) % c->chunk_item->num_stripes;
1386             uint32_t writelen, maxwritelen = 0;
1387 
1388             i = 0;
1389             while (pos < length) {
1390                 stripe_pfns = (PFN_NUMBER*)(stripes[stripe].mdl + 1);
1391                 writelen = (uint32_t)min(length - pos, min(stripes[stripe].end - stripes[stripe].start, c->chunk_item->stripe_length));
1392 
1393                 if (writelen == 0)
1394                     break;
1395 
1396                 if (writelen > maxwritelen)
1397                     maxwritelen = writelen;
1398 
1399                 RtlCopyMemory(&stripe_pfns[stripeoff[stripe] >> PAGE_SHIFT], &pfns[pos >> PAGE_SHIFT], writelen * sizeof(PFN_NUMBER) >> PAGE_SHIFT);
1400 
1401                 RtlCopyMemory(log_stripes[i].pfns, &pfns[pos >> PAGE_SHIFT], writelen * sizeof(PFN_NUMBER) >> PAGE_SHIFT);
1402                 log_stripes[i].pfns += writelen >> PAGE_SHIFT;
1403 
1404                 stripeoff[stripe] += writelen;
1405                 pos += writelen;
1406 
1407                 stripe = (stripe + 1) % c->chunk_item->num_stripes;
1408                 i++;
1409             }
1410 
1411             stripe_pfns = (PFN_NUMBER*)(stripes[parity].mdl + 1);
1412 
1413             RtlCopyMemory(&stripe_pfns[stripeoff[parity] >> PAGE_SHIFT], &parity_pfns[parity_pos >> PAGE_SHIFT], maxwritelen * sizeof(PFN_NUMBER) >> PAGE_SHIFT);
1414         }
1415     }
1416 
1417     for (i = 0; i < num_data_stripes; i++) {
1418         uint8_t* ss = MmGetSystemAddressForMdlSafe(log_stripes[i].mdl, priority);
1419 
1420         if (i == 0)
1421             RtlCopyMemory(wtc->parity1, ss, (uint32_t)(parity_end - parity_start));
1422         else
1423             do_xor(wtc->parity1, ss, (uint32_t)(parity_end - parity_start));
1424     }
1425 
1426     Status = STATUS_SUCCESS;
1427 
1428 exit:
1429     if (log_stripes) {
1430         for (i = 0; i < num_data_stripes; i++) {
1431             if (log_stripes[i].mdl)
1432                 IoFreeMdl(log_stripes[i].mdl);
1433         }
1434 
1435         ExFreePool(log_stripes);
1436     }
1437 
1438     if (stripeoff)
1439         ExFreePool(stripeoff);
1440 
1441     return Status;
1442 }
1443 
1444 static NTSTATUS prepare_raid6_write(device_extension* Vcb, chunk* c, uint64_t address, void* data, uint32_t length, write_stripe* stripes, PIRP Irp,
1445                                     uint64_t irp_offset, ULONG priority, write_data_context* wtc) {
1446     uint64_t startoff, endoff, parity_start, parity_end;
1447     uint16_t startoffstripe, endoffstripe, parity1, num_data_stripes = c->chunk_item->num_stripes - 2;
1448     uint64_t pos, parity_pos, *stripeoff = NULL;
1449     uint32_t i;
1450     bool file_write = Irp && Irp->MdlAddress && (Irp->MdlAddress->ByteOffset == 0);
1451     PMDL master_mdl;
1452     NTSTATUS Status;
1453     PFN_NUMBER *pfns, *parity1_pfns, *parity2_pfns;
1454     log_stripe* log_stripes = NULL;
1455 
1456     if ((address + length - c->offset) % (num_data_stripes * c->chunk_item->stripe_length) > 0) {
1457         uint64_t delta = (address + length - c->offset) % (num_data_stripes * c->chunk_item->stripe_length);
1458 
1459         delta = min(irp_offset + length, delta);
1460         Status = add_partial_stripe(Vcb, c, address + length - delta, (uint32_t)delta, (uint8_t*)data + irp_offset + length - delta);
1461         if (!NT_SUCCESS(Status)) {
1462             ERR("add_partial_stripe returned %08x\n", Status);
1463             goto exit;
1464         }
1465 
1466         length -= (uint32_t)delta;
1467     }
1468 
1469     if (length > 0 && (address - c->offset) % (num_data_stripes * c->chunk_item->stripe_length) > 0) {
1470         uint64_t delta = (num_data_stripes * c->chunk_item->stripe_length) - ((address - c->offset) % (num_data_stripes * c->chunk_item->stripe_length));
1471 
1472         Status = add_partial_stripe(Vcb, c, address, (uint32_t)delta, (uint8_t*)data + irp_offset);
1473         if (!NT_SUCCESS(Status)) {
1474             ERR("add_partial_stripe returned %08x\n", Status);
1475             goto exit;
1476         }
1477 
1478         address += delta;
1479         length -= (uint32_t)delta;
1480         irp_offset += delta;
1481     }
1482 
1483     if (length == 0) {
1484         Status = STATUS_SUCCESS;
1485         goto exit;
1486     }
1487 
1488     get_raid0_offset(address - c->offset, c->chunk_item->stripe_length, num_data_stripes, &startoff, &startoffstripe);
1489     get_raid0_offset(address + length - c->offset - 1, c->chunk_item->stripe_length, num_data_stripes, &endoff, &endoffstripe);
1490 
1491     pos = 0;
1492     while (pos < length) {
1493         parity1 = (((address - c->offset + pos) / (num_data_stripes * c->chunk_item->stripe_length)) + num_data_stripes) % c->chunk_item->num_stripes;
1494 
1495         if (pos == 0) {
1496             uint16_t stripe = (parity1 + startoffstripe + 2) % c->chunk_item->num_stripes;
1497             uint16_t parity2 = (parity1 + 1) % c->chunk_item->num_stripes;
1498             ULONG skip, writelen;
1499 
1500             i = startoffstripe;
1501             while (stripe != parity1) {
1502                 if (i == startoffstripe) {
1503                     writelen = (ULONG)min(length, c->chunk_item->stripe_length - (startoff % c->chunk_item->stripe_length));
1504 
1505                     stripes[stripe].start = startoff;
1506                     stripes[stripe].end = startoff + writelen;
1507 
1508                     pos += writelen;
1509 
1510                     if (pos == length)
1511                         break;
1512                 } else {
1513                     writelen = (ULONG)min(length - pos, c->chunk_item->stripe_length);
1514 
1515                     stripes[stripe].start = startoff - (startoff % c->chunk_item->stripe_length);
1516                     stripes[stripe].end = stripes[stripe].start + writelen;
1517 
1518                     pos += writelen;
1519 
1520                     if (pos == length)
1521                         break;
1522                 }
1523 
1524                 i++;
1525                 stripe = (stripe + 1) % c->chunk_item->num_stripes;
1526             }
1527 
1528             if (pos == length)
1529                 break;
1530 
1531             for (i = 0; i < startoffstripe; i++) {
1532                 stripe = (parity1 + i + 2) % c->chunk_item->num_stripes;
1533 
1534                 stripes[stripe].start = stripes[stripe].end = startoff - (startoff % c->chunk_item->stripe_length) + c->chunk_item->stripe_length;
1535             }
1536 
1537             stripes[parity1].start = stripes[parity1].end = stripes[parity2].start = stripes[parity2].end =
1538                 startoff - (startoff % c->chunk_item->stripe_length) + c->chunk_item->stripe_length;
1539 
1540             if (length - pos > c->chunk_item->num_stripes * num_data_stripes * c->chunk_item->stripe_length) {
1541                 skip = (ULONG)(((length - pos) / (c->chunk_item->num_stripes * num_data_stripes * c->chunk_item->stripe_length)) - 1);
1542 
1543                 for (i = 0; i < c->chunk_item->num_stripes; i++) {
1544                     stripes[i].end += skip * c->chunk_item->num_stripes * c->chunk_item->stripe_length;
1545                 }
1546 
1547                 pos += skip * num_data_stripes * c->chunk_item->num_stripes * c->chunk_item->stripe_length;
1548             }
1549         } else if (length - pos >= c->chunk_item->stripe_length * num_data_stripes) {
1550             for (i = 0; i < c->chunk_item->num_stripes; i++) {
1551                 stripes[i].end += c->chunk_item->stripe_length;
1552             }
1553 
1554             pos += c->chunk_item->stripe_length * num_data_stripes;
1555         } else {
1556             uint16_t stripe = (parity1 + 2) % c->chunk_item->num_stripes;
1557 
1558             i = 0;
1559             while (stripe != parity1) {
1560                 if (endoffstripe == i) {
1561                     stripes[stripe].end = endoff + 1;
1562                     break;
1563                 } else if (endoffstripe > i)
1564                     stripes[stripe].end = endoff - (endoff % c->chunk_item->stripe_length) + c->chunk_item->stripe_length;
1565 
1566                 i++;
1567                 stripe = (stripe + 1) % c->chunk_item->num_stripes;
1568             }
1569 
1570             break;
1571         }
1572     }
1573 
1574     parity_start = 0xffffffffffffffff;
1575     parity_end = 0;
1576 
1577     for (i = 0; i < c->chunk_item->num_stripes; i++) {
1578         if (stripes[i].start != 0 || stripes[i].end != 0) {
1579             parity_start = min(stripes[i].start, parity_start);
1580             parity_end = max(stripes[i].end, parity_end);
1581         }
1582     }
1583 
1584     if (parity_end == parity_start) {
1585         Status = STATUS_SUCCESS;
1586         goto exit;
1587     }
1588 
1589     parity1 = (((address - c->offset) / (num_data_stripes * c->chunk_item->stripe_length)) + num_data_stripes) % c->chunk_item->num_stripes;
1590     stripes[parity1].start = stripes[(parity1 + 1) % c->chunk_item->num_stripes].start = parity_start;
1591 
1592     parity1 = (((address - c->offset + length - 1) / (num_data_stripes * c->chunk_item->stripe_length)) + num_data_stripes) % c->chunk_item->num_stripes;
1593     stripes[parity1].end = stripes[(parity1 + 1) % c->chunk_item->num_stripes].end = parity_end;
1594 
1595     log_stripes = ExAllocatePoolWithTag(NonPagedPool, sizeof(log_stripe) * num_data_stripes, ALLOC_TAG);
1596     if (!log_stripes) {
1597         ERR("out of memory\n");
1598         Status = STATUS_INSUFFICIENT_RESOURCES;
1599         goto exit;
1600     }
1601 
1602     RtlZeroMemory(log_stripes, sizeof(log_stripe) * num_data_stripes);
1603 
1604     for (i = 0; i < num_data_stripes; i++) {
1605         log_stripes[i].mdl = IoAllocateMdl(NULL, (ULONG)(parity_end - parity_start), false, false, NULL);
1606         if (!log_stripes[i].mdl) {
1607             ERR("out of memory\n");
1608             Status = STATUS_INSUFFICIENT_RESOURCES;
1609             goto exit;
1610         }
1611 
1612         log_stripes[i].mdl->MdlFlags |= MDL_PARTIAL;
1613         log_stripes[i].pfns = (PFN_NUMBER*)(log_stripes[i].mdl + 1);
1614     }
1615 
1616     wtc->parity1 = ExAllocatePoolWithTag(NonPagedPool, (ULONG)(parity_end - parity_start), ALLOC_TAG);
1617     if (!wtc->parity1) {
1618         ERR("out of memory\n");
1619         Status = STATUS_INSUFFICIENT_RESOURCES;
1620         goto exit;
1621     }
1622 
1623     wtc->parity2 = ExAllocatePoolWithTag(NonPagedPool, (ULONG)(parity_end - parity_start), ALLOC_TAG);
1624     if (!wtc->parity2) {
1625         ERR("out of memory\n");
1626         Status = STATUS_INSUFFICIENT_RESOURCES;
1627         goto exit;
1628     }
1629 
1630     wtc->parity1_mdl = IoAllocateMdl(wtc->parity1, (ULONG)(parity_end - parity_start), false, false, NULL);
1631     if (!wtc->parity1_mdl) {
1632         ERR("out of memory\n");
1633         Status = STATUS_INSUFFICIENT_RESOURCES;
1634         goto exit;
1635     }
1636 
1637     MmBuildMdlForNonPagedPool(wtc->parity1_mdl);
1638 
1639     wtc->parity2_mdl = IoAllocateMdl(wtc->parity2, (ULONG)(parity_end - parity_start), false, false, NULL);
1640     if (!wtc->parity2_mdl) {
1641         ERR("out of memory\n");
1642         Status = STATUS_INSUFFICIENT_RESOURCES;
1643         goto exit;
1644     }
1645 
1646     MmBuildMdlForNonPagedPool(wtc->parity2_mdl);
1647 
1648     if (file_write)
1649         master_mdl = Irp->MdlAddress;
1650     else if (((ULONG_PTR)data % PAGE_SIZE) != 0) {
1651         wtc->scratch = ExAllocatePoolWithTag(NonPagedPool, length, ALLOC_TAG);
1652         if (!wtc->scratch) {
1653             ERR("out of memory\n");
1654             Status = STATUS_INSUFFICIENT_RESOURCES;
1655             goto exit;
1656         }
1657 
1658         RtlCopyMemory(wtc->scratch, (uint8_t*)data + irp_offset, length);
1659 
1660         master_mdl = IoAllocateMdl(wtc->scratch, length, false, false, NULL);
1661         if (!master_mdl) {
1662             ERR("out of memory\n");
1663             Status = STATUS_INSUFFICIENT_RESOURCES;
1664             goto exit;
1665         }
1666 
1667         MmBuildMdlForNonPagedPool(master_mdl);
1668 
1669         wtc->mdl = master_mdl;
1670     } else {
1671         master_mdl = IoAllocateMdl((uint8_t*)data + irp_offset, length, false, false, NULL);
1672         if (!master_mdl) {
1673             ERR("out of memory\n");
1674             Status = STATUS_INSUFFICIENT_RESOURCES;
1675             goto exit;
1676         }
1677 
1678         Status = STATUS_SUCCESS;
1679 
1680         _SEH2_TRY {
1681             MmProbeAndLockPages(master_mdl, KernelMode, IoReadAccess);
1682         } _SEH2_EXCEPT (EXCEPTION_EXECUTE_HANDLER) {
1683             Status = _SEH2_GetExceptionCode();
1684         } _SEH2_END;
1685 
1686         if (!NT_SUCCESS(Status)) {
1687             ERR("MmProbeAndLockPages threw exception %08x\n", Status);
1688             IoFreeMdl(master_mdl);
1689             goto exit;
1690         }
1691 
1692         wtc->mdl = master_mdl;
1693     }
1694 
1695     pfns = (PFN_NUMBER*)(master_mdl + 1);
1696     parity1_pfns = (PFN_NUMBER*)(wtc->parity1_mdl + 1);
1697     parity2_pfns = (PFN_NUMBER*)(wtc->parity2_mdl + 1);
1698 
1699     if (file_write)
1700         pfns = &pfns[irp_offset >> PAGE_SHIFT];
1701 
1702     for (i = 0; i < c->chunk_item->num_stripes; i++) {
1703         if (stripes[i].start != stripes[i].end) {
1704             stripes[i].mdl = IoAllocateMdl((uint8_t*)MmGetMdlVirtualAddress(master_mdl) + irp_offset, (ULONG)(stripes[i].end - stripes[i].start), false, false, NULL);
1705             if (!stripes[i].mdl) {
1706                 ERR("IoAllocateMdl failed\n");
1707                 Status = STATUS_INSUFFICIENT_RESOURCES;
1708                 goto exit;
1709             }
1710         }
1711     }
1712 
1713     stripeoff = ExAllocatePoolWithTag(PagedPool, sizeof(uint64_t) * c->chunk_item->num_stripes, ALLOC_TAG);
1714     if (!stripeoff) {
1715         ERR("out of memory\n");
1716         Status = STATUS_INSUFFICIENT_RESOURCES;
1717         goto exit;
1718     }
1719 
1720     RtlZeroMemory(stripeoff, sizeof(uint64_t) * c->chunk_item->num_stripes);
1721 
1722     pos = 0;
1723     parity_pos = 0;
1724 
1725     while (pos < length) {
1726         PFN_NUMBER* stripe_pfns;
1727 
1728         parity1 = (((address - c->offset + pos) / (num_data_stripes * c->chunk_item->stripe_length)) + num_data_stripes) % c->chunk_item->num_stripes;
1729 
1730         if (pos == 0) {
1731             uint16_t stripe = (parity1 + startoffstripe + 2) % c->chunk_item->num_stripes, parity2;
1732             uint32_t writelen = (uint32_t)min(length - pos, min(stripes[stripe].end - stripes[stripe].start,
1733                                                             c->chunk_item->stripe_length - (stripes[stripe].start % c->chunk_item->stripe_length)));
1734             uint32_t maxwritelen = writelen;
1735 
1736             stripe_pfns = (PFN_NUMBER*)(stripes[stripe].mdl + 1);
1737 
1738             RtlCopyMemory(stripe_pfns, pfns, writelen * sizeof(PFN_NUMBER) >> PAGE_SHIFT);
1739 
1740             RtlCopyMemory(log_stripes[startoffstripe].pfns, pfns, writelen * sizeof(PFN_NUMBER) >> PAGE_SHIFT);
1741             log_stripes[startoffstripe].pfns += writelen >> PAGE_SHIFT;
1742 
1743             stripeoff[stripe] = writelen;
1744             pos += writelen;
1745 
1746             stripe = (stripe + 1) % c->chunk_item->num_stripes;
1747             i = startoffstripe + 1;
1748 
1749             while (stripe != parity1) {
1750                 stripe_pfns = (PFN_NUMBER*)(stripes[stripe].mdl + 1);
1751                 writelen = (uint32_t)min(length - pos, min(stripes[stripe].end - stripes[stripe].start, c->chunk_item->stripe_length));
1752 
1753                 if (writelen == 0)
1754                     break;
1755 
1756                 if (writelen > maxwritelen)
1757                     maxwritelen = writelen;
1758 
1759                 RtlCopyMemory(stripe_pfns, &pfns[pos >> PAGE_SHIFT], writelen * sizeof(PFN_NUMBER) >> PAGE_SHIFT);
1760 
1761                 RtlCopyMemory(log_stripes[i].pfns, &pfns[pos >> PAGE_SHIFT], writelen * sizeof(PFN_NUMBER) >> PAGE_SHIFT);
1762                 log_stripes[i].pfns += writelen >> PAGE_SHIFT;
1763 
1764                 stripeoff[stripe] = writelen;
1765                 pos += writelen;
1766 
1767                 stripe = (stripe + 1) % c->chunk_item->num_stripes;
1768                 i++;
1769             }
1770 
1771             stripe_pfns = (PFN_NUMBER*)(stripes[parity1].mdl + 1);
1772             RtlCopyMemory(stripe_pfns, parity1_pfns, maxwritelen * sizeof(PFN_NUMBER) >> PAGE_SHIFT);
1773             stripeoff[parity1] = maxwritelen;
1774 
1775             parity2 = (parity1 + 1) % c->chunk_item->num_stripes;
1776 
1777             stripe_pfns = (PFN_NUMBER*)(stripes[parity2].mdl + 1);
1778             RtlCopyMemory(stripe_pfns, parity2_pfns, maxwritelen * sizeof(PFN_NUMBER) >> PAGE_SHIFT);
1779             stripeoff[parity2] = maxwritelen;
1780 
1781             parity_pos = maxwritelen;
1782         } else if (length - pos >= c->chunk_item->stripe_length * num_data_stripes) {
1783             uint16_t stripe = (parity1 + 2) % c->chunk_item->num_stripes, parity2;
1784 
1785             i = 0;
1786             while (stripe != parity1) {
1787                 stripe_pfns = (PFN_NUMBER*)(stripes[stripe].mdl + 1);
1788 
1789                 RtlCopyMemory(&stripe_pfns[stripeoff[stripe] >> PAGE_SHIFT], &pfns[pos >> PAGE_SHIFT], (ULONG)(c->chunk_item->stripe_length * sizeof(PFN_NUMBER) >> PAGE_SHIFT));
1790 
1791                 RtlCopyMemory(log_stripes[i].pfns, &pfns[pos >> PAGE_SHIFT], (ULONG)(c->chunk_item->stripe_length * sizeof(PFN_NUMBER) >> PAGE_SHIFT));
1792                 log_stripes[i].pfns += c->chunk_item->stripe_length >> PAGE_SHIFT;
1793 
1794                 stripeoff[stripe] += c->chunk_item->stripe_length;
1795                 pos += c->chunk_item->stripe_length;
1796 
1797                 stripe = (stripe + 1) % c->chunk_item->num_stripes;
1798                 i++;
1799             }
1800 
1801             stripe_pfns = (PFN_NUMBER*)(stripes[parity1].mdl + 1);
1802             RtlCopyMemory(&stripe_pfns[stripeoff[parity1] >> PAGE_SHIFT], &parity1_pfns[parity_pos >> PAGE_SHIFT], (ULONG)(c->chunk_item->stripe_length * sizeof(PFN_NUMBER) >> PAGE_SHIFT));
1803             stripeoff[parity1] += c->chunk_item->stripe_length;
1804 
1805             parity2 = (parity1 + 1) % c->chunk_item->num_stripes;
1806 
1807             stripe_pfns = (PFN_NUMBER*)(stripes[parity2].mdl + 1);
1808             RtlCopyMemory(&stripe_pfns[stripeoff[parity2] >> PAGE_SHIFT], &parity2_pfns[parity_pos >> PAGE_SHIFT], (ULONG)(c->chunk_item->stripe_length * sizeof(PFN_NUMBER) >> PAGE_SHIFT));
1809             stripeoff[parity2] += c->chunk_item->stripe_length;
1810 
1811             parity_pos += c->chunk_item->stripe_length;
1812         } else {
1813             uint16_t stripe = (parity1 + 2) % c->chunk_item->num_stripes, parity2;
1814             uint32_t writelen, maxwritelen = 0;
1815 
1816             i = 0;
1817             while (pos < length) {
1818                 stripe_pfns = (PFN_NUMBER*)(stripes[stripe].mdl + 1);
1819                 writelen = (uint32_t)min(length - pos, min(stripes[stripe].end - stripes[stripe].start, c->chunk_item->stripe_length));
1820 
1821                 if (writelen == 0)
1822                     break;
1823 
1824                 if (writelen > maxwritelen)
1825                     maxwritelen = writelen;
1826 
1827                 RtlCopyMemory(&stripe_pfns[stripeoff[stripe] >> PAGE_SHIFT], &pfns[pos >> PAGE_SHIFT], writelen * sizeof(PFN_NUMBER) >> PAGE_SHIFT);
1828 
1829                 RtlCopyMemory(log_stripes[i].pfns, &pfns[pos >> PAGE_SHIFT], writelen * sizeof(PFN_NUMBER) >> PAGE_SHIFT);
1830                 log_stripes[i].pfns += writelen >> PAGE_SHIFT;
1831 
1832                 stripeoff[stripe] += writelen;
1833                 pos += writelen;
1834 
1835                 stripe = (stripe + 1) % c->chunk_item->num_stripes;
1836                 i++;
1837             }
1838 
1839             stripe_pfns = (PFN_NUMBER*)(stripes[parity1].mdl + 1);
1840             RtlCopyMemory(&stripe_pfns[stripeoff[parity1] >> PAGE_SHIFT], &parity1_pfns[parity_pos >> PAGE_SHIFT], maxwritelen * sizeof(PFN_NUMBER) >> PAGE_SHIFT);
1841 
1842             parity2 = (parity1 + 1) % c->chunk_item->num_stripes;
1843 
1844             stripe_pfns = (PFN_NUMBER*)(stripes[parity2].mdl + 1);
1845             RtlCopyMemory(&stripe_pfns[stripeoff[parity2] >> PAGE_SHIFT], &parity2_pfns[parity_pos >> PAGE_SHIFT], maxwritelen * sizeof(PFN_NUMBER) >> PAGE_SHIFT);
1846         }
1847     }
1848 
1849     for (i = 0; i < num_data_stripes; i++) {
1850         uint8_t* ss = MmGetSystemAddressForMdlSafe(log_stripes[c->chunk_item->num_stripes - 3 - i].mdl, priority);
1851 
1852         if (i == 0) {
1853             RtlCopyMemory(wtc->parity1, ss, (ULONG)(parity_end - parity_start));
1854             RtlCopyMemory(wtc->parity2, ss, (ULONG)(parity_end - parity_start));
1855         } else {
1856             do_xor(wtc->parity1, ss, (uint32_t)(parity_end - parity_start));
1857 
1858             galois_double(wtc->parity2, (uint32_t)(parity_end - parity_start));
1859             do_xor(wtc->parity2, ss, (uint32_t)(parity_end - parity_start));
1860         }
1861     }
1862 
1863     Status = STATUS_SUCCESS;
1864 
1865 exit:
1866     if (log_stripes) {
1867         for (i = 0; i < num_data_stripes; i++) {
1868             if (log_stripes[i].mdl)
1869                 IoFreeMdl(log_stripes[i].mdl);
1870         }
1871 
1872         ExFreePool(log_stripes);
1873     }
1874 
1875     if (stripeoff)
1876         ExFreePool(stripeoff);
1877 
1878     return Status;
1879 }
1880 
1881 NTSTATUS write_data(_In_ device_extension* Vcb, _In_ uint64_t address, _In_reads_bytes_(length) void* data, _In_ uint32_t length, _In_ write_data_context* wtc,
1882                     _In_opt_ PIRP Irp, _In_opt_ chunk* c, _In_ bool file_write, _In_ uint64_t irp_offset, _In_ ULONG priority) {
1883     NTSTATUS Status;
1884     uint32_t i;
1885     CHUNK_ITEM_STRIPE* cis;
1886     write_stripe* stripes = NULL;
1887     uint64_t total_writing = 0;
1888     ULONG allowed_missing, missing;
1889 
1890     TRACE("(%p, %I64x, %p, %x)\n", Vcb, address, data, length);
1891 
1892     if (!c) {
1893         c = get_chunk_from_address(Vcb, address);
1894         if (!c) {
1895             ERR("could not get chunk for address %I64x\n", address);
1896             return STATUS_INTERNAL_ERROR;
1897         }
1898     }
1899 
1900     stripes = ExAllocatePoolWithTag(PagedPool, sizeof(write_stripe) * c->chunk_item->num_stripes, ALLOC_TAG);
1901     if (!stripes) {
1902         ERR("out of memory\n");
1903         return STATUS_INSUFFICIENT_RESOURCES;
1904     }
1905 
1906     RtlZeroMemory(stripes, sizeof(write_stripe) * c->chunk_item->num_stripes);
1907 
1908     cis = (CHUNK_ITEM_STRIPE*)&c->chunk_item[1];
1909 
1910     if (c->chunk_item->type & BLOCK_FLAG_RAID0) {
1911         Status = prepare_raid0_write(c, address, data, length, stripes, file_write ? Irp : NULL, irp_offset, wtc);
1912         if (!NT_SUCCESS(Status)) {
1913             ERR("prepare_raid0_write returned %08x\n", Status);
1914             goto prepare_failed;
1915         }
1916 
1917         allowed_missing = 0;
1918     } else if (c->chunk_item->type & BLOCK_FLAG_RAID10) {
1919         Status = prepare_raid10_write(c, address, data, length, stripes, file_write ? Irp : NULL, irp_offset, wtc);
1920         if (!NT_SUCCESS(Status)) {
1921             ERR("prepare_raid10_write returned %08x\n", Status);
1922             goto prepare_failed;
1923         }
1924 
1925         allowed_missing = 1;
1926     } else if (c->chunk_item->type & BLOCK_FLAG_RAID5) {
1927         Status = prepare_raid5_write(Vcb, c, address, data, length, stripes, file_write ? Irp : NULL, irp_offset, priority, wtc);
1928         if (!NT_SUCCESS(Status)) {
1929             ERR("prepare_raid5_write returned %08x\n", Status);
1930             goto prepare_failed;
1931         }
1932 
1933         allowed_missing = 1;
1934     } else if (c->chunk_item->type & BLOCK_FLAG_RAID6) {
1935         Status = prepare_raid6_write(Vcb, c, address, data, length, stripes, file_write ? Irp : NULL, irp_offset, priority, wtc);
1936         if (!NT_SUCCESS(Status)) {
1937             ERR("prepare_raid6_write returned %08x\n", Status);
1938             goto prepare_failed;
1939         }
1940 
1941         allowed_missing = 2;
1942     } else {  // write same data to every location - SINGLE, DUP, RAID1
1943         for (i = 0; i < c->chunk_item->num_stripes; i++) {
1944             stripes[i].start = address - c->offset;
1945             stripes[i].end = stripes[i].start + length;
1946             stripes[i].data = data;
1947             stripes[i].irp_offset = irp_offset;
1948 
1949             if (c->devices[i]->devobj) {
1950                 if (file_write) {
1951                     uint8_t* va;
1952                     ULONG writelen = (ULONG)(stripes[i].end - stripes[i].start);
1953 
1954                     va = (uint8_t*)MmGetMdlVirtualAddress(Irp->MdlAddress) + stripes[i].irp_offset;
1955 
1956                     stripes[i].mdl = IoAllocateMdl(va, writelen, false, false, NULL);
1957                     if (!stripes[i].mdl) {
1958                         ERR("IoAllocateMdl failed\n");
1959                         Status = STATUS_INSUFFICIENT_RESOURCES;
1960                         goto prepare_failed;
1961                     }
1962 
1963                     IoBuildPartialMdl(Irp->MdlAddress, stripes[i].mdl, va, writelen);
1964                 } else {
1965                     stripes[i].mdl = IoAllocateMdl(stripes[i].data, (ULONG)(stripes[i].end - stripes[i].start), false, false, NULL);
1966                     if (!stripes[i].mdl) {
1967                         ERR("IoAllocateMdl failed\n");
1968                         Status = STATUS_INSUFFICIENT_RESOURCES;
1969                         goto prepare_failed;
1970                     }
1971 
1972                     Status = STATUS_SUCCESS;
1973 
1974                     _SEH2_TRY {
1975                         MmProbeAndLockPages(stripes[i].mdl, KernelMode, IoReadAccess);
1976                     } _SEH2_EXCEPT (EXCEPTION_EXECUTE_HANDLER) {
1977                         Status = _SEH2_GetExceptionCode();
1978                     } _SEH2_END;
1979 
1980                     if (!NT_SUCCESS(Status)) {
1981                         ERR("MmProbeAndLockPages threw exception %08x\n", Status);
1982                         IoFreeMdl(stripes[i].mdl);
1983                         stripes[i].mdl = NULL;
1984                         goto prepare_failed;
1985                     }
1986                 }
1987             }
1988         }
1989 
1990         allowed_missing = c->chunk_item->num_stripes - 1;
1991     }
1992 
1993     missing = 0;
1994     for (i = 0; i < c->chunk_item->num_stripes; i++) {
1995         if (!c->devices[i]->devobj)
1996             missing++;
1997     }
1998 
1999     if (missing > allowed_missing) {
2000         ERR("cannot write as %u missing devices (maximum %u)\n", missing, allowed_missing);
2001         Status = STATUS_DEVICE_NOT_READY;
2002         goto prepare_failed;
2003     }
2004 
2005     for (i = 0; i < c->chunk_item->num_stripes; i++) {
2006         write_data_stripe* stripe;
2007         PIO_STACK_LOCATION IrpSp;
2008 
2009         stripe = ExAllocatePoolWithTag(NonPagedPool, sizeof(write_data_stripe), ALLOC_TAG);
2010         if (!stripe) {
2011             ERR("out of memory\n");
2012             Status = STATUS_INSUFFICIENT_RESOURCES;
2013             goto end;
2014         }
2015 
2016         if (stripes[i].start == stripes[i].end || !c->devices[i]->devobj) {
2017             stripe->status = WriteDataStatus_Ignore;
2018             stripe->Irp = NULL;
2019             stripe->buf = stripes[i].data;
2020             stripe->mdl = NULL;
2021         } else {
2022             stripe->context = (struct _write_data_context*)wtc;
2023             stripe->buf = stripes[i].data;
2024             stripe->device = c->devices[i];
2025             RtlZeroMemory(&stripe->iosb, sizeof(IO_STATUS_BLOCK));
2026             stripe->status = WriteDataStatus_Pending;
2027             stripe->mdl = stripes[i].mdl;
2028 
2029             if (!Irp) {
2030                 stripe->Irp = IoAllocateIrp(stripe->device->devobj->StackSize, false);
2031 
2032                 if (!stripe->Irp) {
2033                     ERR("IoAllocateIrp failed\n");
2034                     ExFreePool(stripe);
2035                     Status = STATUS_INSUFFICIENT_RESOURCES;
2036                     goto end;
2037                 }
2038             } else {
2039                 stripe->Irp = IoMakeAssociatedIrp(Irp, stripe->device->devobj->StackSize);
2040 
2041                 if (!stripe->Irp) {
2042                     ERR("IoMakeAssociatedIrp failed\n");
2043                     ExFreePool(stripe);
2044                     Status = STATUS_INSUFFICIENT_RESOURCES;
2045                     goto end;
2046                 }
2047             }
2048 
2049             IrpSp = IoGetNextIrpStackLocation(stripe->Irp);
2050             IrpSp->MajorFunction = IRP_MJ_WRITE;
2051             IrpSp->FileObject = stripe->device->fileobj;
2052 
2053             if (stripe->device->devobj->Flags & DO_BUFFERED_IO) {
2054                 stripe->Irp->AssociatedIrp.SystemBuffer = MmGetSystemAddressForMdlSafe(stripes[i].mdl, priority);
2055 
2056                 stripe->Irp->Flags = IRP_BUFFERED_IO;
2057             } else if (stripe->device->devobj->Flags & DO_DIRECT_IO)
2058                 stripe->Irp->MdlAddress = stripe->mdl;
2059             else
2060                 stripe->Irp->UserBuffer = MmGetSystemAddressForMdlSafe(stripes[i].mdl, priority);
2061 
2062 #ifdef DEBUG_PARANOID
2063             if (stripes[i].end < stripes[i].start) {
2064                 ERR("trying to write stripe with negative length (%I64x < %I64x)\n", stripes[i].end, stripes[i].start);
2065                 int3;
2066             }
2067 #endif
2068 
2069             IrpSp->Parameters.Write.Length = (ULONG)(stripes[i].end - stripes[i].start);
2070             IrpSp->Parameters.Write.ByteOffset.QuadPart = stripes[i].start + cis[i].offset;
2071 
2072             total_writing += IrpSp->Parameters.Write.Length;
2073 
2074             stripe->Irp->UserIosb = &stripe->iosb;
2075             wtc->stripes_left++;
2076 
2077             IoSetCompletionRoutine(stripe->Irp, write_data_completion, stripe, true, true, true);
2078         }
2079 
2080         InsertTailList(&wtc->stripes, &stripe->list_entry);
2081     }
2082 
2083     if (diskacc)
2084         fFsRtlUpdateDiskCounters(0, total_writing);
2085 
2086     Status = STATUS_SUCCESS;
2087 
2088 end:
2089 
2090     if (stripes) ExFreePool(stripes);
2091 
2092     if (!NT_SUCCESS(Status))
2093         free_write_data_stripes(wtc);
2094 
2095     return Status;
2096 
2097 prepare_failed:
2098     for (i = 0; i < c->chunk_item->num_stripes; i++) {
2099         if (stripes[i].mdl && (i == 0 || stripes[i].mdl != stripes[i-1].mdl)) {
2100             if (stripes[i].mdl->MdlFlags & MDL_PAGES_LOCKED)
2101                 MmUnlockPages(stripes[i].mdl);
2102 
2103             IoFreeMdl(stripes[i].mdl);
2104         }
2105     }
2106 
2107     if (wtc->parity1_mdl) {
2108         if (wtc->parity1_mdl->MdlFlags & MDL_PAGES_LOCKED)
2109             MmUnlockPages(wtc->parity1_mdl);
2110 
2111         IoFreeMdl(wtc->parity1_mdl);
2112         wtc->parity1_mdl = NULL;
2113     }
2114 
2115     if (wtc->parity2_mdl) {
2116         if (wtc->parity2_mdl->MdlFlags & MDL_PAGES_LOCKED)
2117             MmUnlockPages(wtc->parity2_mdl);
2118 
2119         IoFreeMdl(wtc->parity2_mdl);
2120         wtc->parity2_mdl = NULL;
2121     }
2122 
2123     if (wtc->mdl) {
2124         if (wtc->mdl->MdlFlags & MDL_PAGES_LOCKED)
2125             MmUnlockPages(wtc->mdl);
2126 
2127         IoFreeMdl(wtc->mdl);
2128         wtc->mdl = NULL;
2129     }
2130 
2131     if (wtc->parity1) {
2132         ExFreePool(wtc->parity1);
2133         wtc->parity1 = NULL;
2134     }
2135 
2136     if (wtc->parity2) {
2137         ExFreePool(wtc->parity2);
2138         wtc->parity2 = NULL;
2139     }
2140 
2141     if (wtc->scratch) {
2142         ExFreePool(wtc->scratch);
2143         wtc->scratch = NULL;
2144     }
2145 
2146     ExFreePool(stripes);
2147     return Status;
2148 }
2149 
2150 void get_raid56_lock_range(chunk* c, uint64_t address, uint64_t length, uint64_t* lockaddr, uint64_t* locklen) {
2151     uint64_t startoff, endoff;
2152     uint16_t startoffstripe, endoffstripe, datastripes;
2153 
2154     datastripes = c->chunk_item->num_stripes - (c->chunk_item->type & BLOCK_FLAG_RAID5 ? 1 : 2);
2155 
2156     get_raid0_offset(address - c->offset, c->chunk_item->stripe_length, datastripes, &startoff, &startoffstripe);
2157     get_raid0_offset(address + length - c->offset - 1, c->chunk_item->stripe_length, datastripes, &endoff, &endoffstripe);
2158 
2159     startoff -= startoff % c->chunk_item->stripe_length;
2160     endoff = sector_align(endoff, c->chunk_item->stripe_length);
2161 
2162     *lockaddr = c->offset + (startoff * datastripes);
2163     *locklen = (endoff - startoff) * datastripes;
2164 }
2165 
2166 NTSTATUS write_data_complete(device_extension* Vcb, uint64_t address, void* data, uint32_t length, PIRP Irp, chunk* c, bool file_write, uint64_t irp_offset, ULONG priority) {
2167     write_data_context wtc;
2168     NTSTATUS Status;
2169     uint64_t lockaddr, locklen;
2170 
2171     KeInitializeEvent(&wtc.Event, NotificationEvent, false);
2172     InitializeListHead(&wtc.stripes);
2173     wtc.stripes_left = 0;
2174     wtc.parity1 = wtc.parity2 = wtc.scratch = NULL;
2175     wtc.mdl = wtc.parity1_mdl = wtc.parity2_mdl = NULL;
2176 
2177     if (!c) {
2178         c = get_chunk_from_address(Vcb, address);
2179         if (!c) {
2180             ERR("could not get chunk for address %I64x\n", address);
2181             return STATUS_INTERNAL_ERROR;
2182         }
2183     }
2184 
2185     if (c->chunk_item->type & BLOCK_FLAG_RAID5 || c->chunk_item->type & BLOCK_FLAG_RAID6) {
2186         get_raid56_lock_range(c, address, length, &lockaddr, &locklen);
2187         chunk_lock_range(Vcb, c, lockaddr, locklen);
2188     }
2189 
2190     _SEH2_TRY {
2191         Status = write_data(Vcb, address, data, length, &wtc, Irp, c, file_write, irp_offset, priority);
2192     } _SEH2_EXCEPT (EXCEPTION_EXECUTE_HANDLER) {
2193         Status = _SEH2_GetExceptionCode();
2194     } _SEH2_END;
2195 
2196     if (!NT_SUCCESS(Status)) {
2197         ERR("write_data returned %08x\n", Status);
2198 
2199         if (c->chunk_item->type & BLOCK_FLAG_RAID5 || c->chunk_item->type & BLOCK_FLAG_RAID6)
2200             chunk_unlock_range(Vcb, c, lockaddr, locklen);
2201 
2202         free_write_data_stripes(&wtc);
2203         return Status;
2204     }
2205 
2206     if (wtc.stripes.Flink != &wtc.stripes) {
2207         // launch writes and wait
2208         LIST_ENTRY* le = wtc.stripes.Flink;
2209         bool no_wait = true;
2210 
2211         while (le != &wtc.stripes) {
2212             write_data_stripe* stripe = CONTAINING_RECORD(le, write_data_stripe, list_entry);
2213 
2214             if (stripe->status != WriteDataStatus_Ignore) {
2215                 IoCallDriver(stripe->device->devobj, stripe->Irp);
2216                 no_wait = false;
2217             }
2218 
2219             le = le->Flink;
2220         }
2221 
2222         if (!no_wait)
2223             KeWaitForSingleObject(&wtc.Event, Executive, KernelMode, false, NULL);
2224 
2225         le = wtc.stripes.Flink;
2226         while (le != &wtc.stripes) {
2227             write_data_stripe* stripe = CONTAINING_RECORD(le, write_data_stripe, list_entry);
2228 
2229             if (stripe->status != WriteDataStatus_Ignore && !NT_SUCCESS(stripe->iosb.Status)) {
2230                 Status = stripe->iosb.Status;
2231 
2232                 log_device_error(Vcb, stripe->device, BTRFS_DEV_STAT_WRITE_ERRORS);
2233                 break;
2234             }
2235 
2236             le = le->Flink;
2237         }
2238 
2239         free_write_data_stripes(&wtc);
2240     }
2241 
2242     if (c->chunk_item->type & BLOCK_FLAG_RAID5 || c->chunk_item->type & BLOCK_FLAG_RAID6)
2243         chunk_unlock_range(Vcb, c, lockaddr, locklen);
2244 
2245     return Status;
2246 }
2247 
2248 _Function_class_(IO_COMPLETION_ROUTINE)
2249 static NTSTATUS __stdcall write_data_completion(PDEVICE_OBJECT DeviceObject, PIRP Irp, PVOID conptr) {
2250     write_data_stripe* stripe = conptr;
2251     write_data_context* context = (write_data_context*)stripe->context;
2252     LIST_ENTRY* le;
2253 
2254     UNUSED(DeviceObject);
2255 
2256     // FIXME - we need a lock here
2257 
2258     if (stripe->status == WriteDataStatus_Cancelling) {
2259         stripe->status = WriteDataStatus_Cancelled;
2260         goto end;
2261     }
2262 
2263     stripe->iosb = Irp->IoStatus;
2264 
2265     if (NT_SUCCESS(Irp->IoStatus.Status)) {
2266         stripe->status = WriteDataStatus_Success;
2267     } else {
2268         le = context->stripes.Flink;
2269 
2270         stripe->status = WriteDataStatus_Error;
2271 
2272         while (le != &context->stripes) {
2273             write_data_stripe* s2 = CONTAINING_RECORD(le, write_data_stripe, list_entry);
2274 
2275             if (s2->status == WriteDataStatus_Pending) {
2276                 s2->status = WriteDataStatus_Cancelling;
2277                 IoCancelIrp(s2->Irp);
2278             }
2279 
2280             le = le->Flink;
2281         }
2282     }
2283 
2284 end:
2285     if (InterlockedDecrement(&context->stripes_left) == 0)
2286         KeSetEvent(&context->Event, 0, false);
2287 
2288     return STATUS_MORE_PROCESSING_REQUIRED;
2289 }
2290 
2291 void free_write_data_stripes(write_data_context* wtc) {
2292     LIST_ENTRY* le;
2293     PMDL last_mdl = NULL;
2294 
2295     if (wtc->parity1_mdl) {
2296         if (wtc->parity1_mdl->MdlFlags & MDL_PAGES_LOCKED)
2297             MmUnlockPages(wtc->parity1_mdl);
2298 
2299         IoFreeMdl(wtc->parity1_mdl);
2300     }
2301 
2302     if (wtc->parity2_mdl) {
2303         if (wtc->parity2_mdl->MdlFlags & MDL_PAGES_LOCKED)
2304             MmUnlockPages(wtc->parity2_mdl);
2305 
2306         IoFreeMdl(wtc->parity2_mdl);
2307     }
2308 
2309     if (wtc->mdl) {
2310         if (wtc->mdl->MdlFlags & MDL_PAGES_LOCKED)
2311             MmUnlockPages(wtc->mdl);
2312 
2313         IoFreeMdl(wtc->mdl);
2314     }
2315 
2316     if (wtc->parity1)
2317         ExFreePool(wtc->parity1);
2318 
2319     if (wtc->parity2)
2320         ExFreePool(wtc->parity2);
2321 
2322     if (wtc->scratch)
2323         ExFreePool(wtc->scratch);
2324 
2325     le = wtc->stripes.Flink;
2326     while (le != &wtc->stripes) {
2327         write_data_stripe* stripe = CONTAINING_RECORD(le, write_data_stripe, list_entry);
2328 
2329         if (stripe->mdl && stripe->mdl != last_mdl) {
2330             if (stripe->mdl->MdlFlags & MDL_PAGES_LOCKED)
2331                 MmUnlockPages(stripe->mdl);
2332 
2333             IoFreeMdl(stripe->mdl);
2334         }
2335 
2336         last_mdl = stripe->mdl;
2337 
2338         if (stripe->Irp)
2339             IoFreeIrp(stripe->Irp);
2340 
2341         le = le->Flink;
2342     }
2343 
2344     while (!IsListEmpty(&wtc->stripes)) {
2345         write_data_stripe* stripe = CONTAINING_RECORD(RemoveHeadList(&wtc->stripes), write_data_stripe, list_entry);
2346 
2347         ExFreePool(stripe);
2348     }
2349 }
2350 
2351 void add_extent(_In_ fcb* fcb, _In_ LIST_ENTRY* prevextle, _In_ __drv_aliasesMem extent* newext) {
2352     LIST_ENTRY* le = prevextle->Flink;
2353 
2354     while (le != &fcb->extents) {
2355         extent* ext = CONTAINING_RECORD(le, extent, list_entry);
2356 
2357         if (ext->offset >= newext->offset) {
2358             InsertHeadList(ext->list_entry.Blink, &newext->list_entry);
2359             return;
2360         }
2361 
2362         le = le->Flink;
2363     }
2364 
2365     InsertTailList(&fcb->extents, &newext->list_entry);
2366 }
2367 
2368 NTSTATUS excise_extents(device_extension* Vcb, fcb* fcb, uint64_t start_data, uint64_t end_data, PIRP Irp, LIST_ENTRY* rollback) {
2369     NTSTATUS Status;
2370     LIST_ENTRY* le;
2371 
2372     le = fcb->extents.Flink;
2373 
2374     while (le != &fcb->extents) {
2375         LIST_ENTRY* le2 = le->Flink;
2376         extent* ext = CONTAINING_RECORD(le, extent, list_entry);
2377         EXTENT_DATA* ed = &ext->extent_data;
2378         EXTENT_DATA2* ed2 = NULL;
2379         uint64_t len;
2380 
2381         if (!ext->ignore) {
2382             if (ed->type != EXTENT_TYPE_INLINE)
2383                 ed2 = (EXTENT_DATA2*)ed->data;
2384 
2385             len = ed->type == EXTENT_TYPE_INLINE ? ed->decoded_size : ed2->num_bytes;
2386 
2387             if (ext->offset < end_data && ext->offset + len > start_data) {
2388                 if (ed->type == EXTENT_TYPE_INLINE) {
2389                     if (start_data <= ext->offset && end_data >= ext->offset + len) { // remove all
2390                         remove_fcb_extent(fcb, ext, rollback);
2391 
2392                         fcb->inode_item.st_blocks -= len;
2393                         fcb->inode_item_changed = true;
2394                     } else {
2395                         ERR("trying to split inline extent\n");
2396 #ifdef DEBUG_PARANOID
2397                         int3;
2398 #endif
2399                         return STATUS_INTERNAL_ERROR;
2400                     }
2401                 } else if (ed->type != EXTENT_TYPE_INLINE) {
2402                     if (start_data <= ext->offset && end_data >= ext->offset + len) { // remove all
2403                         if (ed2->size != 0) {
2404                             chunk* c;
2405 
2406                             fcb->inode_item.st_blocks -= len;
2407                             fcb->inode_item_changed = true;
2408 
2409                             c = get_chunk_from_address(Vcb, ed2->address);
2410 
2411                             if (!c) {
2412                                 ERR("get_chunk_from_address(%I64x) failed\n", ed2->address);
2413                             } else {
2414                                 Status = update_changed_extent_ref(Vcb, c, ed2->address, ed2->size, fcb->subvol->id, fcb->inode, ext->offset - ed2->offset, -1,
2415                                                                    fcb->inode_item.flags & BTRFS_INODE_NODATASUM, false, Irp);
2416                                 if (!NT_SUCCESS(Status)) {
2417                                     ERR("update_changed_extent_ref returned %08x\n", Status);
2418                                     goto end;
2419                                 }
2420                             }
2421                         }
2422 
2423                         remove_fcb_extent(fcb, ext, rollback);
2424                     } else if (start_data <= ext->offset && end_data < ext->offset + len) { // remove beginning
2425                         EXTENT_DATA2* ned2;
2426                         extent* newext;
2427 
2428                         if (ed2->size != 0) {
2429                             fcb->inode_item.st_blocks -= end_data - ext->offset;
2430                             fcb->inode_item_changed = true;
2431                         }
2432 
2433                         newext = ExAllocatePoolWithTag(PagedPool, offsetof(extent, extent_data) + sizeof(EXTENT_DATA) - 1 + sizeof(EXTENT_DATA2), ALLOC_TAG);
2434                         if (!newext) {
2435                             ERR("out of memory\n");
2436                             Status = STATUS_INSUFFICIENT_RESOURCES;
2437                             goto end;
2438                         }
2439 
2440                         ned2 = (EXTENT_DATA2*)newext->extent_data.data;
2441 
2442                         newext->extent_data.generation = Vcb->superblock.generation;
2443                         newext->extent_data.decoded_size = ed->decoded_size;
2444                         newext->extent_data.compression = ed->compression;
2445                         newext->extent_data.encryption = ed->encryption;
2446                         newext->extent_data.encoding = ed->encoding;
2447                         newext->extent_data.type = ed->type;
2448                         ned2->address = ed2->address;
2449                         ned2->size = ed2->size;
2450                         ned2->offset = ed2->offset + (end_data - ext->offset);
2451                         ned2->num_bytes = ed2->num_bytes - (end_data - ext->offset);
2452 
2453                         newext->offset = end_data;
2454                         newext->datalen = sizeof(EXTENT_DATA) - 1 + sizeof(EXTENT_DATA2);
2455                         newext->unique = ext->unique;
2456                         newext->ignore = false;
2457                         newext->inserted = true;
2458 
2459                         if (ext->csum) {
2460                             if (ed->compression == BTRFS_COMPRESSION_NONE) {
2461                                 newext->csum = ExAllocatePoolWithTag(PagedPool, (ULONG)(ned2->num_bytes * sizeof(uint32_t) / Vcb->superblock.sector_size), ALLOC_TAG);
2462                                 if (!newext->csum) {
2463                                     ERR("out of memory\n");
2464                                     Status = STATUS_INSUFFICIENT_RESOURCES;
2465                                     ExFreePool(newext);
2466                                     goto end;
2467                                 }
2468 
2469                                 RtlCopyMemory(newext->csum, &ext->csum[(end_data - ext->offset) / Vcb->superblock.sector_size],
2470                                               (ULONG)(ned2->num_bytes * sizeof(uint32_t) / Vcb->superblock.sector_size));
2471                             } else {
2472                                 newext->csum = ExAllocatePoolWithTag(PagedPool, (ULONG)(ed2->size * sizeof(uint32_t) / Vcb->superblock.sector_size), ALLOC_TAG);
2473                                 if (!newext->csum) {
2474                                     ERR("out of memory\n");
2475                                     Status = STATUS_INSUFFICIENT_RESOURCES;
2476                                     ExFreePool(newext);
2477                                     goto end;
2478                                 }
2479 
2480                                 RtlCopyMemory(newext->csum, ext->csum, (ULONG)(ed2->size * sizeof(uint32_t) / Vcb->superblock.sector_size));
2481                             }
2482                         } else
2483                             newext->csum = NULL;
2484 
2485                         add_extent(fcb, &ext->list_entry, newext);
2486 
2487                         remove_fcb_extent(fcb, ext, rollback);
2488                     } else if (start_data > ext->offset && end_data >= ext->offset + len) { // remove end
2489                         EXTENT_DATA2* ned2;
2490                         extent* newext;
2491 
2492                         if (ed2->size != 0) {
2493                             fcb->inode_item.st_blocks -= ext->offset + len - start_data;
2494                             fcb->inode_item_changed = true;
2495                         }
2496 
2497                         newext = ExAllocatePoolWithTag(PagedPool, offsetof(extent, extent_data) + sizeof(EXTENT_DATA) - 1 + sizeof(EXTENT_DATA2), ALLOC_TAG);
2498                         if (!newext) {
2499                             ERR("out of memory\n");
2500                             Status = STATUS_INSUFFICIENT_RESOURCES;
2501                             goto end;
2502                         }
2503 
2504                         ned2 = (EXTENT_DATA2*)newext->extent_data.data;
2505 
2506                         newext->extent_data.generation = Vcb->superblock.generation;
2507                         newext->extent_data.decoded_size = ed->decoded_size;
2508                         newext->extent_data.compression = ed->compression;
2509                         newext->extent_data.encryption = ed->encryption;
2510                         newext->extent_data.encoding = ed->encoding;
2511                         newext->extent_data.type = ed->type;
2512                         ned2->address = ed2->address;
2513                         ned2->size = ed2->size;
2514                         ned2->offset = ed2->offset;
2515                         ned2->num_bytes = start_data - ext->offset;
2516 
2517                         newext->offset = ext->offset;
2518                         newext->datalen = sizeof(EXTENT_DATA) - 1 + sizeof(EXTENT_DATA2);
2519                         newext->unique = ext->unique;
2520                         newext->ignore = false;
2521                         newext->inserted = true;
2522 
2523                         if (ext->csum) {
2524                             if (ed->compression == BTRFS_COMPRESSION_NONE) {
2525                                 newext->csum = ExAllocatePoolWithTag(PagedPool, (ULONG)(ned2->num_bytes * sizeof(uint32_t) / Vcb->superblock.sector_size), ALLOC_TAG);
2526                                 if (!newext->csum) {
2527                                     ERR("out of memory\n");
2528                                     Status = STATUS_INSUFFICIENT_RESOURCES;
2529                                     ExFreePool(newext);
2530                                     goto end;
2531                                 }
2532 
2533                                 RtlCopyMemory(newext->csum, ext->csum, (ULONG)(ned2->num_bytes * sizeof(uint32_t) / Vcb->superblock.sector_size));
2534                             } else {
2535                                 newext->csum = ExAllocatePoolWithTag(PagedPool, (ULONG)(ed2->size * sizeof(uint32_t) / Vcb->superblock.sector_size), ALLOC_TAG);
2536                                 if (!newext->csum) {
2537                                     ERR("out of memory\n");
2538                                     Status = STATUS_INSUFFICIENT_RESOURCES;
2539                                     ExFreePool(newext);
2540                                     goto end;
2541                                 }
2542 
2543                                 RtlCopyMemory(newext->csum, ext->csum, (ULONG)(ed2->size * sizeof(uint32_t) / Vcb->superblock.sector_size));
2544                             }
2545                         } else
2546                             newext->csum = NULL;
2547 
2548                         InsertHeadList(&ext->list_entry, &newext->list_entry);
2549 
2550                         remove_fcb_extent(fcb, ext, rollback);
2551                     } else if (start_data > ext->offset && end_data < ext->offset + len) { // remove middle
2552                         EXTENT_DATA2 *neda2, *nedb2;
2553                         extent *newext1, *newext2;
2554 
2555                         if (ed2->size != 0) {
2556                             chunk* c;
2557 
2558                             fcb->inode_item.st_blocks -= end_data - start_data;
2559                             fcb->inode_item_changed = true;
2560 
2561                             c = get_chunk_from_address(Vcb, ed2->address);
2562 
2563                             if (!c) {
2564                                 ERR("get_chunk_from_address(%I64x) failed\n", ed2->address);
2565                             } else {
2566                                 Status = update_changed_extent_ref(Vcb, c, ed2->address, ed2->size, fcb->subvol->id, fcb->inode, ext->offset - ed2->offset, 1,
2567                                                                    fcb->inode_item.flags & BTRFS_INODE_NODATASUM, false, Irp);
2568                                 if (!NT_SUCCESS(Status)) {
2569                                     ERR("update_changed_extent_ref returned %08x\n", Status);
2570                                     goto end;
2571                                 }
2572                             }
2573                         }
2574 
2575                         newext1 = ExAllocatePoolWithTag(PagedPool, offsetof(extent, extent_data) + sizeof(EXTENT_DATA) - 1 + sizeof(EXTENT_DATA2), ALLOC_TAG);
2576                         if (!newext1) {
2577                             ERR("out of memory\n");
2578                             Status = STATUS_INSUFFICIENT_RESOURCES;
2579                             goto end;
2580                         }
2581 
2582                         newext2 = ExAllocatePoolWithTag(PagedPool, offsetof(extent, extent_data) + sizeof(EXTENT_DATA) - 1 + sizeof(EXTENT_DATA2), ALLOC_TAG);
2583                         if (!newext2) {
2584                             ERR("out of memory\n");
2585                             Status = STATUS_INSUFFICIENT_RESOURCES;
2586                             ExFreePool(newext1);
2587                             goto end;
2588                         }
2589 
2590                         neda2 = (EXTENT_DATA2*)newext1->extent_data.data;
2591 
2592                         newext1->extent_data.generation = Vcb->superblock.generation;
2593                         newext1->extent_data.decoded_size = ed->decoded_size;
2594                         newext1->extent_data.compression = ed->compression;
2595                         newext1->extent_data.encryption = ed->encryption;
2596                         newext1->extent_data.encoding = ed->encoding;
2597                         newext1->extent_data.type = ed->type;
2598                         neda2->address = ed2->address;
2599                         neda2->size = ed2->size;
2600                         neda2->offset = ed2->offset;
2601                         neda2->num_bytes = start_data - ext->offset;
2602 
2603                         nedb2 = (EXTENT_DATA2*)newext2->extent_data.data;
2604 
2605                         newext2->extent_data.generation = Vcb->superblock.generation;
2606                         newext2->extent_data.decoded_size = ed->decoded_size;
2607                         newext2->extent_data.compression = ed->compression;
2608                         newext2->extent_data.encryption = ed->encryption;
2609                         newext2->extent_data.encoding = ed->encoding;
2610                         newext2->extent_data.type = ed->type;
2611                         nedb2->address = ed2->address;
2612                         nedb2->size = ed2->size;
2613                         nedb2->offset = ed2->offset + (end_data - ext->offset);
2614                         nedb2->num_bytes = ext->offset + len - end_data;
2615 
2616                         newext1->offset = ext->offset;
2617                         newext1->datalen = sizeof(EXTENT_DATA) - 1 + sizeof(EXTENT_DATA2);
2618                         newext1->unique = ext->unique;
2619                         newext1->ignore = false;
2620                         newext1->inserted = true;
2621 
2622                         newext2->offset = end_data;
2623                         newext2->datalen = sizeof(EXTENT_DATA) - 1 + sizeof(EXTENT_DATA2);
2624                         newext2->unique = ext->unique;
2625                         newext2->ignore = false;
2626                         newext2->inserted = true;
2627 
2628                         if (ext->csum) {
2629                             if (ed->compression == BTRFS_COMPRESSION_NONE) {
2630                                 newext1->csum = ExAllocatePoolWithTag(PagedPool, (ULONG)(neda2->num_bytes * sizeof(uint32_t) / Vcb->superblock.sector_size), ALLOC_TAG);
2631                                 if (!newext1->csum) {
2632                                     ERR("out of memory\n");
2633                                     Status = STATUS_INSUFFICIENT_RESOURCES;
2634                                     ExFreePool(newext1);
2635                                     ExFreePool(newext2);
2636                                     goto end;
2637                                 }
2638 
2639                                 newext2->csum = ExAllocatePoolWithTag(PagedPool, (ULONG)(nedb2->num_bytes * sizeof(uint32_t) / Vcb->superblock.sector_size), ALLOC_TAG);
2640                                 if (!newext2->csum) {
2641                                     ERR("out of memory\n");
2642                                     Status = STATUS_INSUFFICIENT_RESOURCES;
2643                                     ExFreePool(newext1->csum);
2644                                     ExFreePool(newext1);
2645                                     ExFreePool(newext2);
2646                                     goto end;
2647                                 }
2648 
2649                                 RtlCopyMemory(newext1->csum, ext->csum, (ULONG)(neda2->num_bytes * sizeof(uint32_t) / Vcb->superblock.sector_size));
2650                                 RtlCopyMemory(newext2->csum, &ext->csum[(end_data - ext->offset) / Vcb->superblock.sector_size],
2651                                               (ULONG)(nedb2->num_bytes * sizeof(uint32_t) / Vcb->superblock.sector_size));
2652                             } else {
2653                                 newext1->csum = ExAllocatePoolWithTag(PagedPool, (ULONG)(ed2->size * sizeof(uint32_t) / Vcb->superblock.sector_size), ALLOC_TAG);
2654                                 if (!newext1->csum) {
2655                                     ERR("out of memory\n");
2656                                     Status = STATUS_INSUFFICIENT_RESOURCES;
2657                                     ExFreePool(newext1);
2658                                     ExFreePool(newext2);
2659                                     goto end;
2660                                 }
2661 
2662                                 newext2->csum = ExAllocatePoolWithTag(PagedPool, (ULONG)(ed2->size * sizeof(uint32_t) / Vcb->superblock.sector_size), ALLOC_TAG);
2663                                 if (!newext2->csum) {
2664                                     ERR("out of memory\n");
2665                                     Status = STATUS_INSUFFICIENT_RESOURCES;
2666                                     ExFreePool(newext1->csum);
2667                                     ExFreePool(newext1);
2668                                     ExFreePool(newext2);
2669                                     goto end;
2670                                 }
2671 
2672                                 RtlCopyMemory(newext1->csum, ext->csum, (ULONG)(ed2->size * sizeof(uint32_t) / Vcb->superblock.sector_size));
2673                                 RtlCopyMemory(newext2->csum, ext->csum, (ULONG)(ed2->size * sizeof(uint32_t) / Vcb->superblock.sector_size));
2674                             }
2675                         } else {
2676                             newext1->csum = NULL;
2677                             newext2->csum = NULL;
2678                         }
2679 
2680                         InsertHeadList(&ext->list_entry, &newext1->list_entry);
2681                         add_extent(fcb, &newext1->list_entry, newext2);
2682 
2683                         remove_fcb_extent(fcb, ext, rollback);
2684                     }
2685                 }
2686             }
2687         }
2688 
2689         le = le2;
2690     }
2691 
2692     Status = STATUS_SUCCESS;
2693 
2694 end:
2695     fcb->extents_changed = true;
2696     mark_fcb_dirty(fcb);
2697 
2698     return Status;
2699 }
2700 
2701 void add_insert_extent_rollback(LIST_ENTRY* rollback, fcb* fcb, extent* ext) {
2702     rollback_extent* re;
2703 
2704     re = ExAllocatePoolWithTag(NonPagedPool, sizeof(rollback_extent), ALLOC_TAG);
2705     if (!re) {
2706         ERR("out of memory\n");
2707         return;
2708     }
2709 
2710     re->fcb = fcb;
2711     re->ext = ext;
2712 
2713     add_rollback(rollback, ROLLBACK_INSERT_EXTENT, re);
2714 }
2715 
2716 #ifdef _MSC_VER
2717 #pragma warning(push)
2718 #pragma warning(suppress: 28194)
2719 #endif
2720 NTSTATUS add_extent_to_fcb(_In_ fcb* fcb, _In_ uint64_t offset, _In_reads_bytes_(edsize) EXTENT_DATA* ed, _In_ uint16_t edsize,
2721                            _In_ bool unique, _In_opt_ _When_(return >= 0, __drv_aliasesMem) uint32_t* csum, _In_ LIST_ENTRY* rollback) {
2722     extent* ext;
2723     LIST_ENTRY* le;
2724 
2725     ext = ExAllocatePoolWithTag(PagedPool, offsetof(extent, extent_data) + edsize, ALLOC_TAG);
2726     if (!ext) {
2727         ERR("out of memory\n");
2728         return STATUS_INSUFFICIENT_RESOURCES;
2729     }
2730 
2731     ext->offset = offset;
2732     ext->datalen = edsize;
2733     ext->unique = unique;
2734     ext->ignore = false;
2735     ext->inserted = true;
2736     ext->csum = csum;
2737 
2738     RtlCopyMemory(&ext->extent_data, ed, edsize);
2739 
2740     le = fcb->extents.Flink;
2741     while (le != &fcb->extents) {
2742         extent* oldext = CONTAINING_RECORD(le, extent, list_entry);
2743 
2744         if (oldext->offset >= offset) {
2745             InsertHeadList(le->Blink, &ext->list_entry);
2746             goto end;
2747         }
2748 
2749         le = le->Flink;
2750     }
2751 
2752     InsertTailList(&fcb->extents, &ext->list_entry);
2753 
2754 end:
2755     add_insert_extent_rollback(rollback, fcb, ext);
2756 
2757     return STATUS_SUCCESS;
2758 }
2759 #ifdef _MSC_VER
2760 #pragma warning(pop)
2761 #endif
2762 
2763 static void remove_fcb_extent(fcb* fcb, extent* ext, LIST_ENTRY* rollback) {
2764     if (!ext->ignore) {
2765         rollback_extent* re;
2766 
2767         ext->ignore = true;
2768 
2769         re = ExAllocatePoolWithTag(NonPagedPool, sizeof(rollback_extent), ALLOC_TAG);
2770         if (!re) {
2771             ERR("out of memory\n");
2772             return;
2773         }
2774 
2775         re->fcb = fcb;
2776         re->ext = ext;
2777 
2778         add_rollback(rollback, ROLLBACK_DELETE_EXTENT, re);
2779     }
2780 }
2781 
2782 NTSTATUS calc_csum(_In_ device_extension* Vcb, _In_reads_bytes_(sectors*Vcb->superblock.sector_size) uint8_t* data,
2783                    _In_ uint32_t sectors, _Out_writes_bytes_(sectors*sizeof(uint32_t)) uint32_t* csum) {
2784     NTSTATUS Status;
2785     calc_job* cj;
2786 
2787     // From experimenting, it seems that 40 sectors is roughly the crossover
2788     // point where offloading the crc32 calculation becomes worth it.
2789 
2790     if (sectors < 40 || get_num_of_processors() < 2) {
2791         ULONG j;
2792 
2793         for (j = 0; j < sectors; j++) {
2794             csum[j] = ~calc_crc32c(0xffffffff, data + (j * Vcb->superblock.sector_size), Vcb->superblock.sector_size);
2795         }
2796 
2797         return STATUS_SUCCESS;
2798     }
2799 
2800     Status = add_calc_job(Vcb, data, sectors, csum, &cj);
2801     if (!NT_SUCCESS(Status)) {
2802         ERR("add_calc_job returned %08x\n", Status);
2803         return Status;
2804     }
2805 
2806     KeWaitForSingleObject(&cj->event, Executive, KernelMode, false, NULL);
2807     free_calc_job(cj);
2808 
2809     return STATUS_SUCCESS;
2810 }
2811 
2812 _Requires_lock_held_(c->lock)
2813 _When_(return != 0, _Releases_lock_(c->lock))
2814 bool insert_extent_chunk(_In_ device_extension* Vcb, _In_ fcb* fcb, _In_ chunk* c, _In_ uint64_t start_data, _In_ uint64_t length, _In_ bool prealloc, _In_opt_ void* data,
2815                          _In_opt_ PIRP Irp, _In_ LIST_ENTRY* rollback, _In_ uint8_t compression, _In_ uint64_t decoded_size, _In_ bool file_write, _In_ uint64_t irp_offset) {
2816     uint64_t address;
2817     NTSTATUS Status;
2818     EXTENT_DATA* ed;
2819     EXTENT_DATA2* ed2;
2820     uint16_t edsize = (uint16_t)(offsetof(EXTENT_DATA, data[0]) + sizeof(EXTENT_DATA2));
2821     uint32_t* csum = NULL;
2822 
2823     TRACE("(%p, (%I64x, %I64x), %I64x, %I64x, %I64x, %u, %p, %p)\n", Vcb, fcb->subvol->id, fcb->inode, c->offset, start_data, length, prealloc, data, rollback);
2824 
2825     if (!find_data_address_in_chunk(Vcb, c, length, &address))
2826         return false;
2827 
2828     // add extent data to inode
2829     ed = ExAllocatePoolWithTag(PagedPool, edsize, ALLOC_TAG);
2830     if (!ed) {
2831         ERR("out of memory\n");
2832         return false;
2833     }
2834 
2835     ed->generation = Vcb->superblock.generation;
2836     ed->decoded_size = decoded_size;
2837     ed->compression = compression;
2838     ed->encryption = BTRFS_ENCRYPTION_NONE;
2839     ed->encoding = BTRFS_ENCODING_NONE;
2840     ed->type = prealloc ? EXTENT_TYPE_PREALLOC : EXTENT_TYPE_REGULAR;
2841 
2842     ed2 = (EXTENT_DATA2*)ed->data;
2843     ed2->address = address;
2844     ed2->size = length;
2845     ed2->offset = 0;
2846     ed2->num_bytes = decoded_size;
2847 
2848     if (!prealloc && data && !(fcb->inode_item.flags & BTRFS_INODE_NODATASUM)) {
2849         ULONG sl = (ULONG)(length / Vcb->superblock.sector_size);
2850 
2851         csum = ExAllocatePoolWithTag(PagedPool, sl * sizeof(uint32_t), ALLOC_TAG);
2852         if (!csum) {
2853             ERR("out of memory\n");
2854             ExFreePool(ed);
2855             return false;
2856         }
2857 
2858         Status = calc_csum(Vcb, data, sl, csum);
2859         if (!NT_SUCCESS(Status)) {
2860             ERR("calc_csum returned %08x\n", Status);
2861             ExFreePool(csum);
2862             ExFreePool(ed);
2863             return false;
2864         }
2865     }
2866 
2867     Status = add_extent_to_fcb(fcb, start_data, ed, edsize, true, csum, rollback);
2868     if (!NT_SUCCESS(Status)) {
2869         ERR("add_extent_to_fcb returned %08x\n", Status);
2870         if (csum) ExFreePool(csum);
2871         ExFreePool(ed);
2872         return false;
2873     }
2874 
2875     ExFreePool(ed);
2876 
2877     c->used += length;
2878     space_list_subtract(c, false, address, length, rollback);
2879 
2880     fcb->inode_item.st_blocks += decoded_size;
2881 
2882     fcb->extents_changed = true;
2883     fcb->inode_item_changed = true;
2884     mark_fcb_dirty(fcb);
2885 
2886     ExAcquireResourceExclusiveLite(&c->changed_extents_lock, true);
2887 
2888     add_changed_extent_ref(c, address, length, fcb->subvol->id, fcb->inode, start_data, 1, fcb->inode_item.flags & BTRFS_INODE_NODATASUM);
2889 
2890     ExReleaseResourceLite(&c->changed_extents_lock);
2891 
2892     release_chunk_lock(c, Vcb);
2893 
2894     if (data) {
2895         Status = write_data_complete(Vcb, address, data, (uint32_t)length, Irp, NULL, file_write, irp_offset,
2896                                      fcb->Header.Flags2 & FSRTL_FLAG2_IS_PAGING_FILE ? HighPagePriority : NormalPagePriority);
2897         if (!NT_SUCCESS(Status))
2898             ERR("write_data_complete returned %08x\n", Status);
2899     }
2900 
2901     return true;
2902 }
2903 
2904 static bool try_extend_data(device_extension* Vcb, fcb* fcb, uint64_t start_data, uint64_t length, void* data,
2905                             PIRP Irp, uint64_t* written, bool file_write, uint64_t irp_offset, LIST_ENTRY* rollback) {
2906     bool success = false;
2907     EXTENT_DATA* ed;
2908     EXTENT_DATA2* ed2;
2909     chunk* c;
2910     LIST_ENTRY* le;
2911     extent* ext = NULL;
2912 
2913     le = fcb->extents.Flink;
2914 
2915     while (le != &fcb->extents) {
2916         extent* nextext = CONTAINING_RECORD(le, extent, list_entry);
2917 
2918         if (!nextext->ignore) {
2919             if (nextext->offset == start_data) {
2920                 ext = nextext;
2921                 break;
2922             } else if (nextext->offset > start_data)
2923                 break;
2924 
2925             ext = nextext;
2926         }
2927 
2928         le = le->Flink;
2929     }
2930 
2931     if (!ext)
2932         return false;
2933 
2934     ed = &ext->extent_data;
2935 
2936     if (ed->type != EXTENT_TYPE_REGULAR && ed->type != EXTENT_TYPE_PREALLOC) {
2937         TRACE("not extending extent which is not regular or prealloc\n");
2938         return false;
2939     }
2940 
2941     ed2 = (EXTENT_DATA2*)ed->data;
2942 
2943     if (ext->offset + ed2->num_bytes != start_data) {
2944         TRACE("last EXTENT_DATA does not run up to start_data (%I64x + %I64x != %I64x)\n", ext->offset, ed2->num_bytes, start_data);
2945         return false;
2946     }
2947 
2948     c = get_chunk_from_address(Vcb, ed2->address);
2949 
2950     if (c->reloc || c->readonly || c->chunk_item->type != Vcb->data_flags)
2951         return false;
2952 
2953     acquire_chunk_lock(c, Vcb);
2954 
2955     if (length > c->chunk_item->size - c->used) {
2956         release_chunk_lock(c, Vcb);
2957         return false;
2958     }
2959 
2960     if (!c->cache_loaded) {
2961         NTSTATUS Status = load_cache_chunk(Vcb, c, NULL);
2962 
2963         if (!NT_SUCCESS(Status)) {
2964             ERR("load_cache_chunk returned %08x\n", Status);
2965             release_chunk_lock(c, Vcb);
2966             return false;
2967         }
2968     }
2969 
2970     le = c->space.Flink;
2971     while (le != &c->space) {
2972         space* s = CONTAINING_RECORD(le, space, list_entry);
2973 
2974         if (s->address == ed2->address + ed2->size) {
2975             uint64_t newlen = min(min(s->size, length), MAX_EXTENT_SIZE);
2976 
2977             success = insert_extent_chunk(Vcb, fcb, c, start_data, newlen, false, data, Irp, rollback, BTRFS_COMPRESSION_NONE, newlen, file_write, irp_offset);
2978 
2979             if (success)
2980                 *written += newlen;
2981             else
2982                 release_chunk_lock(c, Vcb);
2983 
2984             return success;
2985         } else if (s->address > ed2->address + ed2->size)
2986             break;
2987 
2988         le = le->Flink;
2989     }
2990 
2991     release_chunk_lock(c, Vcb);
2992 
2993     return false;
2994 }
2995 
2996 static NTSTATUS insert_chunk_fragmented(fcb* fcb, uint64_t start, uint64_t length, uint8_t* data, bool prealloc, LIST_ENTRY* rollback) {
2997     LIST_ENTRY* le;
2998     uint64_t flags = fcb->Vcb->data_flags;
2999     bool page_file = fcb->Header.Flags2 & FSRTL_FLAG2_IS_PAGING_FILE;
3000     NTSTATUS Status;
3001     chunk* c;
3002 
3003     ExAcquireResourceSharedLite(&fcb->Vcb->chunk_lock, true);
3004 
3005     // first create as many chunks as we can
3006     do {
3007         Status = alloc_chunk(fcb->Vcb, flags, &c, false);
3008     } while (NT_SUCCESS(Status));
3009 
3010     if (Status != STATUS_DISK_FULL) {
3011         ERR("alloc_chunk returned %08x\n", Status);
3012         ExReleaseResourceLite(&fcb->Vcb->chunk_lock);
3013         return Status;
3014     }
3015 
3016     le = fcb->Vcb->chunks.Flink;
3017     while (le != &fcb->Vcb->chunks) {
3018         c = CONTAINING_RECORD(le, chunk, list_entry);
3019 
3020         if (!c->readonly && !c->reloc) {
3021             acquire_chunk_lock(c, fcb->Vcb);
3022 
3023             if (c->chunk_item->type == flags) {
3024                 while (!IsListEmpty(&c->space_size) && length > 0) {
3025                     space* s = CONTAINING_RECORD(c->space_size.Flink, space, list_entry_size);
3026                     uint64_t extlen = min(length, s->size);
3027 
3028                     if (insert_extent_chunk(fcb->Vcb, fcb, c, start, extlen, prealloc && !page_file, data, NULL, rollback, BTRFS_COMPRESSION_NONE, extlen, false, 0)) {
3029                         start += extlen;
3030                         length -= extlen;
3031                         if (data) data += extlen;
3032 
3033                         acquire_chunk_lock(c, fcb->Vcb);
3034                     }
3035                 }
3036             }
3037 
3038             release_chunk_lock(c, fcb->Vcb);
3039 
3040             if (length == 0)
3041                 break;
3042         }
3043 
3044         le = le->Flink;
3045     }
3046 
3047     ExReleaseResourceLite(&fcb->Vcb->chunk_lock);
3048 
3049     return length == 0 ? STATUS_SUCCESS : STATUS_DISK_FULL;
3050 }
3051 
3052 static NTSTATUS insert_prealloc_extent(fcb* fcb, uint64_t start, uint64_t length, LIST_ENTRY* rollback) {
3053     LIST_ENTRY* le;
3054     chunk* c;
3055     uint64_t flags;
3056     NTSTATUS Status;
3057     bool page_file = fcb->Header.Flags2 & FSRTL_FLAG2_IS_PAGING_FILE;
3058 
3059     flags = fcb->Vcb->data_flags;
3060 
3061     do {
3062         uint64_t extlen = min(MAX_EXTENT_SIZE, length);
3063 
3064         ExAcquireResourceSharedLite(&fcb->Vcb->chunk_lock, true);
3065 
3066         le = fcb->Vcb->chunks.Flink;
3067         while (le != &fcb->Vcb->chunks) {
3068             c = CONTAINING_RECORD(le, chunk, list_entry);
3069 
3070             if (!c->readonly && !c->reloc) {
3071                 acquire_chunk_lock(c, fcb->Vcb);
3072 
3073                 if (c->chunk_item->type == flags && (c->chunk_item->size - c->used) >= extlen) {
3074                     if (insert_extent_chunk(fcb->Vcb, fcb, c, start, extlen, !page_file, NULL, NULL, rollback, BTRFS_COMPRESSION_NONE, extlen, false, 0)) {
3075                         ExReleaseResourceLite(&fcb->Vcb->chunk_lock);
3076                         goto cont;
3077                     }
3078                 }
3079 
3080                 release_chunk_lock(c, fcb->Vcb);
3081             }
3082 
3083             le = le->Flink;
3084         }
3085 
3086         ExReleaseResourceLite(&fcb->Vcb->chunk_lock);
3087 
3088         ExAcquireResourceExclusiveLite(&fcb->Vcb->chunk_lock, true);
3089 
3090         Status = alloc_chunk(fcb->Vcb, flags, &c, false);
3091 
3092         ExReleaseResourceLite(&fcb->Vcb->chunk_lock);
3093 
3094         if (!NT_SUCCESS(Status)) {
3095             ERR("alloc_chunk returned %08x\n", Status);
3096             goto end;
3097         }
3098 
3099         acquire_chunk_lock(c, fcb->Vcb);
3100 
3101         if (c->chunk_item->type == flags && (c->chunk_item->size - c->used) >= extlen) {
3102             if (insert_extent_chunk(fcb->Vcb, fcb, c, start, extlen, !page_file, NULL, NULL, rollback, BTRFS_COMPRESSION_NONE, extlen, false, 0))
3103                 goto cont;
3104         }
3105 
3106         release_chunk_lock(c, fcb->Vcb);
3107 
3108         Status = insert_chunk_fragmented(fcb, start, length, NULL, true, rollback);
3109         if (!NT_SUCCESS(Status))
3110             ERR("insert_chunk_fragmented returned %08x\n", Status);
3111 
3112         goto end;
3113 
3114 cont:
3115         length -= extlen;
3116         start += extlen;
3117     } while (length > 0);
3118 
3119     Status = STATUS_SUCCESS;
3120 
3121 end:
3122     return Status;
3123 }
3124 
3125 static NTSTATUS insert_extent(device_extension* Vcb, fcb* fcb, uint64_t start_data, uint64_t length, void* data,
3126                               PIRP Irp, bool file_write, uint64_t irp_offset, LIST_ENTRY* rollback) {
3127     NTSTATUS Status;
3128     LIST_ENTRY* le;
3129     chunk* c;
3130     uint64_t flags, orig_length = length, written = 0;
3131 
3132     TRACE("(%p, (%I64x, %I64x), %I64x, %I64x, %p)\n", Vcb, fcb->subvol->id, fcb->inode, start_data, length, data);
3133 
3134     if (start_data > 0) {
3135         try_extend_data(Vcb, fcb, start_data, length, data, Irp, &written, file_write, irp_offset, rollback);
3136 
3137         if (written == length)
3138             return STATUS_SUCCESS;
3139         else if (written > 0) {
3140             start_data += written;
3141             irp_offset += written;
3142             length -= written;
3143             data = &((uint8_t*)data)[written];
3144         }
3145     }
3146 
3147     flags = Vcb->data_flags;
3148 
3149     while (written < orig_length) {
3150         uint64_t newlen = min(length, MAX_EXTENT_SIZE);
3151         bool done = false;
3152 
3153         // Rather than necessarily writing the whole extent at once, we deal with it in blocks of 128 MB.
3154         // First, see if we can write the extent part to an existing chunk.
3155 
3156         ExAcquireResourceSharedLite(&Vcb->chunk_lock, true);
3157 
3158         le = Vcb->chunks.Flink;
3159         while (le != &Vcb->chunks) {
3160             c = CONTAINING_RECORD(le, chunk, list_entry);
3161 
3162             if (!c->readonly && !c->reloc) {
3163                 acquire_chunk_lock(c, Vcb);
3164 
3165                 if (c->chunk_item->type == flags && (c->chunk_item->size - c->used) >= newlen &&
3166                     insert_extent_chunk(Vcb, fcb, c, start_data, newlen, false, data, Irp, rollback, BTRFS_COMPRESSION_NONE, newlen, file_write, irp_offset)) {
3167                     written += newlen;
3168 
3169                     if (written == orig_length) {
3170                         ExReleaseResourceLite(&Vcb->chunk_lock);
3171                         return STATUS_SUCCESS;
3172                     } else {
3173                         done = true;
3174                         start_data += newlen;
3175                         irp_offset += newlen;
3176                         length -= newlen;
3177                         data = &((uint8_t*)data)[newlen];
3178                         break;
3179                     }
3180                 } else
3181                     release_chunk_lock(c, Vcb);
3182             }
3183 
3184             le = le->Flink;
3185         }
3186 
3187         ExReleaseResourceLite(&Vcb->chunk_lock);
3188 
3189         if (done) continue;
3190 
3191         // Otherwise, see if we can put it in a new chunk.
3192 
3193         ExAcquireResourceExclusiveLite(&Vcb->chunk_lock, true);
3194 
3195         Status = alloc_chunk(Vcb, flags, &c, false);
3196 
3197         ExReleaseResourceLite(&Vcb->chunk_lock);
3198 
3199         if (!NT_SUCCESS(Status)) {
3200             ERR("alloc_chunk returned %08x\n", Status);
3201             return Status;
3202         }
3203 
3204         if (c) {
3205             acquire_chunk_lock(c, Vcb);
3206 
3207             if (c->chunk_item->type == flags && (c->chunk_item->size - c->used) >= newlen &&
3208                 insert_extent_chunk(Vcb, fcb, c, start_data, newlen, false, data, Irp, rollback, BTRFS_COMPRESSION_NONE, newlen, file_write, irp_offset)) {
3209                 written += newlen;
3210 
3211                 if (written == orig_length)
3212                     return STATUS_SUCCESS;
3213                 else {
3214                     done = true;
3215                     start_data += newlen;
3216                     irp_offset += newlen;
3217                     length -= newlen;
3218                     data = &((uint8_t*)data)[newlen];
3219                 }
3220             } else
3221                 release_chunk_lock(c, Vcb);
3222         }
3223 
3224         if (!done) {
3225             Status = insert_chunk_fragmented(fcb, start_data, length, data, false, rollback);
3226             if (!NT_SUCCESS(Status))
3227                 ERR("insert_chunk_fragmented returned %08x\n", Status);
3228 
3229             return Status;
3230         }
3231     }
3232 
3233     return STATUS_DISK_FULL;
3234 }
3235 
3236 NTSTATUS truncate_file(fcb* fcb, uint64_t end, PIRP Irp, LIST_ENTRY* rollback) {
3237     NTSTATUS Status;
3238 
3239     // FIXME - convert into inline extent if short enough
3240 
3241     if (end > 0 && fcb_is_inline(fcb)) {
3242         uint8_t* buf;
3243         bool make_inline = end <= fcb->Vcb->options.max_inline;
3244 
3245         buf = ExAllocatePoolWithTag(PagedPool, (ULONG)(make_inline ? (offsetof(EXTENT_DATA, data[0]) + end) : sector_align(end, fcb->Vcb->superblock.sector_size)), ALLOC_TAG);
3246         if (!buf) {
3247             ERR("out of memory\n");
3248             return STATUS_INSUFFICIENT_RESOURCES;
3249         }
3250 
3251         Status = read_file(fcb, make_inline ? (buf + offsetof(EXTENT_DATA, data[0])) : buf, 0, end, NULL, Irp);
3252         if (!NT_SUCCESS(Status)) {
3253             ERR("read_file returned %08x\n", Status);
3254             ExFreePool(buf);
3255             return Status;
3256         }
3257 
3258         Status = excise_extents(fcb->Vcb, fcb, 0, fcb->inode_item.st_size, Irp, rollback);
3259         if (!NT_SUCCESS(Status)) {
3260             ERR("excise_extents returned %08x\n", Status);
3261             ExFreePool(buf);
3262             return Status;
3263         }
3264 
3265         if (!make_inline) {
3266             RtlZeroMemory(buf + end, (ULONG)(sector_align(end, fcb->Vcb->superblock.sector_size) - end));
3267 
3268             Status = do_write_file(fcb, 0, sector_align(end, fcb->Vcb->superblock.sector_size), buf, Irp, false, 0, rollback);
3269             if (!NT_SUCCESS(Status)) {
3270                 ERR("do_write_file returned %08x\n", Status);
3271                 ExFreePool(buf);
3272                 return Status;
3273             }
3274         } else {
3275             EXTENT_DATA* ed = (EXTENT_DATA*)buf;
3276 
3277             ed->generation = fcb->Vcb->superblock.generation;
3278             ed->decoded_size = end;
3279             ed->compression = BTRFS_COMPRESSION_NONE;
3280             ed->encryption = BTRFS_ENCRYPTION_NONE;
3281             ed->encoding = BTRFS_ENCODING_NONE;
3282             ed->type = EXTENT_TYPE_INLINE;
3283 
3284             Status = add_extent_to_fcb(fcb, 0, ed, (uint16_t)(offsetof(EXTENT_DATA, data[0]) + end), false, NULL, rollback);
3285             if (!NT_SUCCESS(Status)) {
3286                 ERR("add_extent_to_fcb returned %08x\n", Status);
3287                 ExFreePool(buf);
3288                 return Status;
3289             }
3290 
3291             fcb->inode_item.st_blocks += end;
3292         }
3293 
3294         ExFreePool(buf);
3295         return STATUS_SUCCESS;
3296     }
3297 
3298     Status = excise_extents(fcb->Vcb, fcb, sector_align(end, fcb->Vcb->superblock.sector_size),
3299                             sector_align(fcb->inode_item.st_size, fcb->Vcb->superblock.sector_size), Irp, rollback);
3300     if (!NT_SUCCESS(Status)) {
3301         ERR("excise_extents returned %08x\n", Status);
3302         return Status;
3303     }
3304 
3305     fcb->inode_item.st_size = end;
3306     fcb->inode_item_changed = true;
3307     TRACE("setting st_size to %I64x\n", end);
3308 
3309     fcb->Header.AllocationSize.QuadPart = sector_align(fcb->inode_item.st_size, fcb->Vcb->superblock.sector_size);
3310     fcb->Header.FileSize.QuadPart = fcb->inode_item.st_size;
3311     fcb->Header.ValidDataLength.QuadPart = fcb->inode_item.st_size;
3312     // FIXME - inform cache manager of this
3313 
3314     TRACE("fcb %p FileSize = %I64x\n", fcb, fcb->Header.FileSize.QuadPart);
3315 
3316     return STATUS_SUCCESS;
3317 }
3318 
3319 NTSTATUS extend_file(fcb* fcb, file_ref* fileref, uint64_t end, bool prealloc, PIRP Irp, LIST_ENTRY* rollback) {
3320     uint64_t oldalloc, newalloc;
3321     bool cur_inline;
3322     NTSTATUS Status;
3323 
3324     TRACE("(%p, %p, %x, %u)\n", fcb, fileref, end, prealloc);
3325 
3326     if (fcb->ads) {
3327         if (end > 0xffff)
3328             return STATUS_DISK_FULL;
3329 
3330         return stream_set_end_of_file_information(fcb->Vcb, (uint16_t)end, fcb, fileref, false);
3331     } else {
3332         extent* ext = NULL;
3333         LIST_ENTRY* le;
3334 
3335         le = fcb->extents.Blink;
3336         while (le != &fcb->extents) {
3337             extent* ext2 = CONTAINING_RECORD(le, extent, list_entry);
3338 
3339             if (!ext2->ignore) {
3340                 ext = ext2;
3341                 break;
3342             }
3343 
3344             le = le->Blink;
3345         }
3346 
3347         oldalloc = 0;
3348         if (ext) {
3349             EXTENT_DATA* ed = &ext->extent_data;
3350             EXTENT_DATA2* ed2 = (EXTENT_DATA2*)ed->data;
3351 
3352             oldalloc = ext->offset + (ed->type == EXTENT_TYPE_INLINE ? ed->decoded_size : ed2->num_bytes);
3353             cur_inline = ed->type == EXTENT_TYPE_INLINE;
3354 
3355             if (cur_inline && end > fcb->Vcb->options.max_inline) {
3356                 uint64_t origlength, length;
3357                 uint8_t* data;
3358 
3359                 TRACE("giving inline file proper extents\n");
3360 
3361                 origlength = ed->decoded_size;
3362 
3363                 cur_inline = false;
3364 
3365                 length = sector_align(origlength, fcb->Vcb->superblock.sector_size);
3366 
3367                 data = ExAllocatePoolWithTag(PagedPool, (ULONG)length, ALLOC_TAG);
3368                 if (!data) {
3369                     ERR("could not allocate %I64x bytes for data\n", length);
3370                     return STATUS_INSUFFICIENT_RESOURCES;
3371                 }
3372 
3373                 Status = read_file(fcb, data, 0, origlength, NULL, Irp);
3374                 if (!NT_SUCCESS(Status)) {
3375                     ERR("read_file returned %08x\n", Status);
3376                     ExFreePool(data);
3377                     return Status;
3378                 }
3379 
3380                 RtlZeroMemory(data + origlength, (ULONG)(length - origlength));
3381 
3382                 Status = excise_extents(fcb->Vcb, fcb, 0, fcb->inode_item.st_size, Irp, rollback);
3383                 if (!NT_SUCCESS(Status)) {
3384                     ERR("excise_extents returned %08x\n", Status);
3385                     ExFreePool(data);
3386                     return Status;
3387                 }
3388 
3389                 Status = do_write_file(fcb, 0, length, data, Irp, false, 0, rollback);
3390                 if (!NT_SUCCESS(Status)) {
3391                     ERR("do_write_file returned %08x\n", Status);
3392                     ExFreePool(data);
3393                     return Status;
3394                 }
3395 
3396                 oldalloc = ext->offset + length;
3397 
3398                 ExFreePool(data);
3399             }
3400 
3401             if (cur_inline) {
3402                 uint16_t edsize;
3403 
3404                 if (end > oldalloc) {
3405                     edsize = (uint16_t)(offsetof(EXTENT_DATA, data[0]) + end - ext->offset);
3406                     ed = ExAllocatePoolWithTag(PagedPool, edsize, ALLOC_TAG);
3407 
3408                     if (!ed) {
3409                         ERR("out of memory\n");
3410                         return STATUS_INSUFFICIENT_RESOURCES;
3411                     }
3412 
3413                     ed->generation = fcb->Vcb->superblock.generation;
3414                     ed->decoded_size = end - ext->offset;
3415                     ed->compression = BTRFS_COMPRESSION_NONE;
3416                     ed->encryption = BTRFS_ENCRYPTION_NONE;
3417                     ed->encoding = BTRFS_ENCODING_NONE;
3418                     ed->type = EXTENT_TYPE_INLINE;
3419 
3420                     Status = read_file(fcb, ed->data, ext->offset, oldalloc, NULL, Irp);
3421                     if (!NT_SUCCESS(Status)) {
3422                         ERR("read_file returned %08x\n", Status);
3423                         ExFreePool(ed);
3424                         return Status;
3425                     }
3426 
3427                     RtlZeroMemory(ed->data + oldalloc - ext->offset, (ULONG)(end - oldalloc));
3428 
3429                     remove_fcb_extent(fcb, ext, rollback);
3430 
3431                     Status = add_extent_to_fcb(fcb, ext->offset, ed, edsize, ext->unique, NULL, rollback);
3432                     if (!NT_SUCCESS(Status)) {
3433                         ERR("add_extent_to_fcb returned %08x\n", Status);
3434                         ExFreePool(ed);
3435                         return Status;
3436                     }
3437 
3438                     ExFreePool(ed);
3439 
3440                     fcb->extents_changed = true;
3441                     mark_fcb_dirty(fcb);
3442                 }
3443 
3444                 TRACE("extending inline file (oldalloc = %I64x, end = %I64x)\n", oldalloc, end);
3445 
3446                 fcb->inode_item.st_size = end;
3447                 TRACE("setting st_size to %I64x\n", end);
3448 
3449                 fcb->inode_item.st_blocks = end;
3450 
3451                 fcb->Header.AllocationSize.QuadPart = fcb->Header.FileSize.QuadPart = fcb->Header.ValidDataLength.QuadPart = end;
3452             } else {
3453                 newalloc = sector_align(end, fcb->Vcb->superblock.sector_size);
3454 
3455                 if (newalloc > oldalloc) {
3456                     if (prealloc) {
3457                         // FIXME - try and extend previous extent first
3458 
3459                         Status = insert_prealloc_extent(fcb, oldalloc, newalloc - oldalloc, rollback);
3460 
3461                         if (!NT_SUCCESS(Status)) {
3462                             ERR("insert_prealloc_extent returned %08x\n", Status);
3463                             return Status;
3464                         }
3465                     }
3466 
3467                     fcb->extents_changed = true;
3468                 }
3469 
3470                 fcb->inode_item.st_size = end;
3471                 fcb->inode_item_changed = true;
3472                 mark_fcb_dirty(fcb);
3473 
3474                 TRACE("setting st_size to %I64x\n", end);
3475 
3476                 TRACE("newalloc = %I64x\n", newalloc);
3477 
3478                 fcb->Header.AllocationSize.QuadPart = newalloc;
3479                 fcb->Header.FileSize.QuadPart = fcb->Header.ValidDataLength.QuadPart = end;
3480             }
3481         } else {
3482             if (end > fcb->Vcb->options.max_inline) {
3483                 newalloc = sector_align(end, fcb->Vcb->superblock.sector_size);
3484 
3485                 if (prealloc) {
3486                     Status = insert_prealloc_extent(fcb, 0, newalloc, rollback);
3487 
3488                     if (!NT_SUCCESS(Status)) {
3489                         ERR("insert_prealloc_extent returned %08x\n", Status);
3490                         return Status;
3491                     }
3492                 }
3493 
3494                 fcb->extents_changed = true;
3495                 fcb->inode_item_changed = true;
3496                 mark_fcb_dirty(fcb);
3497 
3498                 fcb->inode_item.st_size = end;
3499                 TRACE("setting st_size to %I64x\n", end);
3500 
3501                 TRACE("newalloc = %I64x\n", newalloc);
3502 
3503                 fcb->Header.AllocationSize.QuadPart = newalloc;
3504                 fcb->Header.FileSize.QuadPart = fcb->Header.ValidDataLength.QuadPart = end;
3505             } else {
3506                 EXTENT_DATA* ed;
3507                 uint16_t edsize;
3508 
3509                 edsize = (uint16_t)(offsetof(EXTENT_DATA, data[0]) + end);
3510                 ed = ExAllocatePoolWithTag(PagedPool, edsize, ALLOC_TAG);
3511 
3512                 if (!ed) {
3513                     ERR("out of memory\n");
3514                     return STATUS_INSUFFICIENT_RESOURCES;
3515                 }
3516 
3517                 ed->generation = fcb->Vcb->superblock.generation;
3518                 ed->decoded_size = end;
3519                 ed->compression = BTRFS_COMPRESSION_NONE;
3520                 ed->encryption = BTRFS_ENCRYPTION_NONE;
3521                 ed->encoding = BTRFS_ENCODING_NONE;
3522                 ed->type = EXTENT_TYPE_INLINE;
3523 
3524                 RtlZeroMemory(ed->data, (ULONG)end);
3525 
3526                 Status = add_extent_to_fcb(fcb, 0, ed, edsize, false, NULL, rollback);
3527                 if (!NT_SUCCESS(Status)) {
3528                     ERR("add_extent_to_fcb returned %08x\n", Status);
3529                     ExFreePool(ed);
3530                     return Status;
3531                 }
3532 
3533                 ExFreePool(ed);
3534 
3535                 fcb->extents_changed = true;
3536                 fcb->inode_item_changed = true;
3537                 mark_fcb_dirty(fcb);
3538 
3539                 fcb->inode_item.st_size = end;
3540                 TRACE("setting st_size to %I64x\n", end);
3541 
3542                 fcb->inode_item.st_blocks = end;
3543 
3544                 fcb->Header.AllocationSize.QuadPart = fcb->Header.FileSize.QuadPart = fcb->Header.ValidDataLength.QuadPart = end;
3545             }
3546         }
3547     }
3548 
3549     return STATUS_SUCCESS;
3550 }
3551 
3552 static NTSTATUS do_write_file_prealloc(fcb* fcb, extent* ext, uint64_t start_data, uint64_t end_data, void* data, uint64_t* written,
3553                                        PIRP Irp, bool file_write, uint64_t irp_offset, ULONG priority, LIST_ENTRY* rollback) {
3554     EXTENT_DATA* ed = &ext->extent_data;
3555     EXTENT_DATA2* ed2 = (EXTENT_DATA2*)ed->data;
3556     NTSTATUS Status;
3557     chunk* c = NULL;
3558 
3559     if (start_data <= ext->offset && end_data >= ext->offset + ed2->num_bytes) { // replace all
3560         extent* newext;
3561 
3562         newext = ExAllocatePoolWithTag(PagedPool, offsetof(extent, extent_data) + ext->datalen, ALLOC_TAG);
3563         if (!newext) {
3564             ERR("out of memory\n");
3565             return STATUS_INSUFFICIENT_RESOURCES;
3566         }
3567 
3568         RtlCopyMemory(&newext->extent_data, &ext->extent_data, ext->datalen);
3569 
3570         newext->extent_data.type = EXTENT_TYPE_REGULAR;
3571 
3572         Status = write_data_complete(fcb->Vcb, ed2->address + ed2->offset, (uint8_t*)data + ext->offset - start_data, (uint32_t)ed2->num_bytes, Irp,
3573                                      NULL, file_write, irp_offset + ext->offset - start_data, priority);
3574         if (!NT_SUCCESS(Status)) {
3575             ERR("write_data_complete returned %08x\n", Status);
3576             return Status;
3577         }
3578 
3579         if (!(fcb->inode_item.flags & BTRFS_INODE_NODATASUM)) {
3580             ULONG sl = (ULONG)(ed2->num_bytes / fcb->Vcb->superblock.sector_size);
3581             uint32_t* csum = ExAllocatePoolWithTag(PagedPool, sl * sizeof(uint32_t), ALLOC_TAG);
3582 
3583             if (!csum) {
3584                 ERR("out of memory\n");
3585                 ExFreePool(newext);
3586                 return STATUS_INSUFFICIENT_RESOURCES;
3587             }
3588 
3589             Status = calc_csum(fcb->Vcb, (uint8_t*)data + ext->offset - start_data, sl, csum);
3590             if (!NT_SUCCESS(Status)) {
3591                 ERR("calc_csum returned %08x\n", Status);
3592                 ExFreePool(csum);
3593                 ExFreePool(newext);
3594                 return Status;
3595             }
3596 
3597             newext->csum = csum;
3598         } else
3599             newext->csum = NULL;
3600 
3601         *written = ed2->num_bytes;
3602 
3603         newext->offset = ext->offset;
3604         newext->datalen = ext->datalen;
3605         newext->unique = ext->unique;
3606         newext->ignore = false;
3607         newext->inserted = true;
3608         InsertHeadList(&ext->list_entry, &newext->list_entry);
3609 
3610         add_insert_extent_rollback(rollback, fcb, newext);
3611 
3612         remove_fcb_extent(fcb, ext, rollback);
3613 
3614         c = get_chunk_from_address(fcb->Vcb, ed2->address);
3615     } else if (start_data <= ext->offset && end_data < ext->offset + ed2->num_bytes) { // replace beginning
3616         EXTENT_DATA2* ned2;
3617         extent *newext1, *newext2;
3618 
3619         newext1 = ExAllocatePoolWithTag(PagedPool, offsetof(extent, extent_data) + ext->datalen, ALLOC_TAG);
3620         if (!newext1) {
3621             ERR("out of memory\n");
3622             return STATUS_INSUFFICIENT_RESOURCES;
3623         }
3624 
3625         newext2 = ExAllocatePoolWithTag(PagedPool, offsetof(extent, extent_data) + ext->datalen, ALLOC_TAG);
3626         if (!newext2) {
3627             ERR("out of memory\n");
3628             ExFreePool(newext1);
3629             return STATUS_INSUFFICIENT_RESOURCES;
3630         }
3631 
3632         RtlCopyMemory(&newext1->extent_data, &ext->extent_data, ext->datalen);
3633         newext1->extent_data.type = EXTENT_TYPE_REGULAR;
3634         ned2 = (EXTENT_DATA2*)newext1->extent_data.data;
3635         ned2->num_bytes = end_data - ext->offset;
3636 
3637         RtlCopyMemory(&newext2->extent_data, &ext->extent_data, ext->datalen);
3638         ned2 = (EXTENT_DATA2*)newext2->extent_data.data;
3639         ned2->offset += end_data - ext->offset;
3640         ned2->num_bytes -= end_data - ext->offset;
3641 
3642         Status = write_data_complete(fcb->Vcb, ed2->address + ed2->offset, (uint8_t*)data + ext->offset - start_data, (uint32_t)(end_data - ext->offset),
3643                                      Irp, NULL, file_write, irp_offset + ext->offset - start_data, priority);
3644         if (!NT_SUCCESS(Status)) {
3645             ERR("write_data_complete returned %08x\n", Status);
3646             ExFreePool(newext1);
3647             ExFreePool(newext2);
3648             return Status;
3649         }
3650 
3651         if (!(fcb->inode_item.flags & BTRFS_INODE_NODATASUM)) {
3652             ULONG sl = (ULONG)((end_data - ext->offset) / fcb->Vcb->superblock.sector_size);
3653             uint32_t* csum = ExAllocatePoolWithTag(PagedPool, sl * sizeof(uint32_t), ALLOC_TAG);
3654 
3655             if (!csum) {
3656                 ERR("out of memory\n");
3657                 ExFreePool(newext1);
3658                 ExFreePool(newext2);
3659                 return STATUS_INSUFFICIENT_RESOURCES;
3660             }
3661 
3662             Status = calc_csum(fcb->Vcb, (uint8_t*)data + ext->offset - start_data, sl, csum);
3663             if (!NT_SUCCESS(Status)) {
3664                 ERR("calc_csum returned %08x\n", Status);
3665                 ExFreePool(newext1);
3666                 ExFreePool(newext2);
3667                 ExFreePool(csum);
3668                 return Status;
3669             }
3670 
3671             newext1->csum = csum;
3672         } else
3673             newext1->csum = NULL;
3674 
3675         *written = end_data - ext->offset;
3676 
3677         newext1->offset = ext->offset;
3678         newext1->datalen = ext->datalen;
3679         newext1->unique = ext->unique;
3680         newext1->ignore = false;
3681         newext1->inserted = true;
3682         InsertHeadList(&ext->list_entry, &newext1->list_entry);
3683 
3684         add_insert_extent_rollback(rollback, fcb, newext1);
3685 
3686         newext2->offset = end_data;
3687         newext2->datalen = ext->datalen;
3688         newext2->unique = ext->unique;
3689         newext2->ignore = false;
3690         newext2->inserted = true;
3691         newext2->csum = NULL;
3692         add_extent(fcb, &newext1->list_entry, newext2);
3693 
3694         add_insert_extent_rollback(rollback, fcb, newext2);
3695 
3696         c = get_chunk_from_address(fcb->Vcb, ed2->address);
3697 
3698         if (!c)
3699             ERR("get_chunk_from_address(%I64x) failed\n", ed2->address);
3700         else {
3701             Status = update_changed_extent_ref(fcb->Vcb, c, ed2->address, ed2->size, fcb->subvol->id, fcb->inode, ext->offset - ed2->offset, 1,
3702                                                 fcb->inode_item.flags & BTRFS_INODE_NODATASUM, false, Irp);
3703 
3704             if (!NT_SUCCESS(Status)) {
3705                 ERR("update_changed_extent_ref returned %08x\n", Status);
3706                 return Status;
3707             }
3708         }
3709 
3710         remove_fcb_extent(fcb, ext, rollback);
3711     } else if (start_data > ext->offset && end_data >= ext->offset + ed2->num_bytes) { // replace end
3712         EXTENT_DATA2* ned2;
3713         extent *newext1, *newext2;
3714 
3715         newext1 = ExAllocatePoolWithTag(PagedPool, offsetof(extent, extent_data) + ext->datalen, ALLOC_TAG);
3716         if (!newext1) {
3717             ERR("out of memory\n");
3718             return STATUS_INSUFFICIENT_RESOURCES;
3719         }
3720 
3721         newext2 = ExAllocatePoolWithTag(PagedPool, offsetof(extent, extent_data) + ext->datalen, ALLOC_TAG);
3722         if (!newext2) {
3723             ERR("out of memory\n");
3724             ExFreePool(newext1);
3725             return STATUS_INSUFFICIENT_RESOURCES;
3726         }
3727 
3728         RtlCopyMemory(&newext1->extent_data, &ext->extent_data, ext->datalen);
3729 
3730         ned2 = (EXTENT_DATA2*)newext1->extent_data.data;
3731         ned2->num_bytes = start_data - ext->offset;
3732 
3733         RtlCopyMemory(&newext2->extent_data, &ext->extent_data, ext->datalen);
3734 
3735         newext2->extent_data.type = EXTENT_TYPE_REGULAR;
3736         ned2 = (EXTENT_DATA2*)newext2->extent_data.data;
3737         ned2->offset += start_data - ext->offset;
3738         ned2->num_bytes = ext->offset + ed2->num_bytes - start_data;
3739 
3740         Status = write_data_complete(fcb->Vcb, ed2->address + ned2->offset, data, (uint32_t)ned2->num_bytes, Irp, NULL, file_write, irp_offset, priority);
3741         if (!NT_SUCCESS(Status)) {
3742             ERR("write_data_complete returned %08x\n", Status);
3743             ExFreePool(newext1);
3744             ExFreePool(newext2);
3745             return Status;
3746         }
3747 
3748         if (!(fcb->inode_item.flags & BTRFS_INODE_NODATASUM)) {
3749             ULONG sl = (ULONG)(ned2->num_bytes / fcb->Vcb->superblock.sector_size);
3750             uint32_t* csum = ExAllocatePoolWithTag(PagedPool, sl * sizeof(uint32_t), ALLOC_TAG);
3751 
3752             if (!csum) {
3753                 ERR("out of memory\n");
3754                 ExFreePool(newext1);
3755                 ExFreePool(newext2);
3756                 return STATUS_INSUFFICIENT_RESOURCES;
3757             }
3758 
3759             Status = calc_csum(fcb->Vcb, data, sl, csum);
3760             if (!NT_SUCCESS(Status)) {
3761                 ERR("calc_csum returned %08x\n", Status);
3762                 ExFreePool(newext1);
3763                 ExFreePool(newext2);
3764                 ExFreePool(csum);
3765                 return Status;
3766             }
3767 
3768             newext2->csum = csum;
3769         } else
3770             newext2->csum = NULL;
3771 
3772         *written = ned2->num_bytes;
3773 
3774         newext1->offset = ext->offset;
3775         newext1->datalen = ext->datalen;
3776         newext1->unique = ext->unique;
3777         newext1->ignore = false;
3778         newext1->inserted = true;
3779         newext1->csum = NULL;
3780         InsertHeadList(&ext->list_entry, &newext1->list_entry);
3781 
3782         add_insert_extent_rollback(rollback, fcb, newext1);
3783 
3784         newext2->offset = start_data;
3785         newext2->datalen = ext->datalen;
3786         newext2->unique = ext->unique;
3787         newext2->ignore = false;
3788         newext2->inserted = true;
3789         add_extent(fcb, &newext1->list_entry, newext2);
3790 
3791         add_insert_extent_rollback(rollback, fcb, newext2);
3792 
3793         c = get_chunk_from_address(fcb->Vcb, ed2->address);
3794 
3795         if (!c)
3796             ERR("get_chunk_from_address(%I64x) failed\n", ed2->address);
3797         else {
3798             Status = update_changed_extent_ref(fcb->Vcb, c, ed2->address, ed2->size, fcb->subvol->id, fcb->inode, ext->offset - ed2->offset, 1,
3799                                                fcb->inode_item.flags & BTRFS_INODE_NODATASUM, false, Irp);
3800 
3801             if (!NT_SUCCESS(Status)) {
3802                 ERR("update_changed_extent_ref returned %08x\n", Status);
3803                 return Status;
3804             }
3805         }
3806 
3807         remove_fcb_extent(fcb, ext, rollback);
3808     } else if (start_data > ext->offset && end_data < ext->offset + ed2->num_bytes) { // replace middle
3809         EXTENT_DATA2* ned2;
3810         extent *newext1, *newext2, *newext3;
3811 
3812         newext1 = ExAllocatePoolWithTag(PagedPool, offsetof(extent, extent_data) + ext->datalen, ALLOC_TAG);
3813         if (!newext1) {
3814             ERR("out of memory\n");
3815             return STATUS_INSUFFICIENT_RESOURCES;
3816         }
3817 
3818         newext2 = ExAllocatePoolWithTag(PagedPool, offsetof(extent, extent_data) + ext->datalen, ALLOC_TAG);
3819         if (!newext2) {
3820             ERR("out of memory\n");
3821             ExFreePool(newext1);
3822             return STATUS_INSUFFICIENT_RESOURCES;
3823         }
3824 
3825         newext3 = ExAllocatePoolWithTag(PagedPool, offsetof(extent, extent_data) + ext->datalen, ALLOC_TAG);
3826         if (!newext3) {
3827             ERR("out of memory\n");
3828             ExFreePool(newext1);
3829             ExFreePool(newext2);
3830             return STATUS_INSUFFICIENT_RESOURCES;
3831         }
3832 
3833         RtlCopyMemory(&newext1->extent_data, &ext->extent_data, ext->datalen);
3834         RtlCopyMemory(&newext2->extent_data, &ext->extent_data, ext->datalen);
3835         RtlCopyMemory(&newext3->extent_data, &ext->extent_data, ext->datalen);
3836 
3837         ned2 = (EXTENT_DATA2*)newext1->extent_data.data;
3838         ned2->num_bytes = start_data - ext->offset;
3839 
3840         newext2->extent_data.type = EXTENT_TYPE_REGULAR;
3841         ned2 = (EXTENT_DATA2*)newext2->extent_data.data;
3842         ned2->offset += start_data - ext->offset;
3843         ned2->num_bytes = end_data - start_data;
3844 
3845         ned2 = (EXTENT_DATA2*)newext3->extent_data.data;
3846         ned2->offset += end_data - ext->offset;
3847         ned2->num_bytes -= end_data - ext->offset;
3848 
3849         ned2 = (EXTENT_DATA2*)newext2->extent_data.data;
3850         Status = write_data_complete(fcb->Vcb, ed2->address + ned2->offset, data, (uint32_t)(end_data - start_data), Irp, NULL, file_write, irp_offset, priority);
3851         if (!NT_SUCCESS(Status)) {
3852             ERR("write_data_complete returned %08x\n", Status);
3853             ExFreePool(newext1);
3854             ExFreePool(newext2);
3855             ExFreePool(newext3);
3856             return Status;
3857         }
3858 
3859         if (!(fcb->inode_item.flags & BTRFS_INODE_NODATASUM)) {
3860             ULONG sl = (ULONG)((end_data - start_data) / fcb->Vcb->superblock.sector_size);
3861             uint32_t* csum = ExAllocatePoolWithTag(PagedPool, sl * sizeof(uint32_t), ALLOC_TAG);
3862 
3863             if (!csum) {
3864                 ERR("out of memory\n");
3865                 ExFreePool(newext1);
3866                 ExFreePool(newext2);
3867                 ExFreePool(newext3);
3868                 return STATUS_INSUFFICIENT_RESOURCES;
3869             }
3870 
3871             Status = calc_csum(fcb->Vcb, data, sl, csum);
3872             if (!NT_SUCCESS(Status)) {
3873                 ERR("calc_csum returned %08x\n", Status);
3874                 ExFreePool(newext1);
3875                 ExFreePool(newext2);
3876                 ExFreePool(newext3);
3877                 ExFreePool(csum);
3878                 return Status;
3879             }
3880 
3881             newext2->csum = csum;
3882         } else
3883             newext2->csum = NULL;
3884 
3885         *written = end_data - start_data;
3886 
3887         newext1->offset = ext->offset;
3888         newext1->datalen = ext->datalen;
3889         newext1->unique = ext->unique;
3890         newext1->ignore = false;
3891         newext1->inserted = true;
3892         newext1->csum = NULL;
3893         InsertHeadList(&ext->list_entry, &newext1->list_entry);
3894 
3895         add_insert_extent_rollback(rollback, fcb, newext1);
3896 
3897         newext2->offset = start_data;
3898         newext2->datalen = ext->datalen;
3899         newext2->unique = ext->unique;
3900         newext2->ignore = false;
3901         newext2->inserted = true;
3902         add_extent(fcb, &newext1->list_entry, newext2);
3903 
3904         add_insert_extent_rollback(rollback, fcb, newext2);
3905 
3906         newext3->offset = end_data;
3907         newext3->datalen = ext->datalen;
3908         newext3->unique = ext->unique;
3909         newext3->ignore = false;
3910         newext3->inserted = true;
3911         newext3->csum = NULL;
3912         add_extent(fcb, &newext2->list_entry, newext3);
3913 
3914         add_insert_extent_rollback(rollback, fcb, newext3);
3915 
3916         c = get_chunk_from_address(fcb->Vcb, ed2->address);
3917 
3918         if (!c)
3919             ERR("get_chunk_from_address(%I64x) failed\n", ed2->address);
3920         else {
3921             Status = update_changed_extent_ref(fcb->Vcb, c, ed2->address, ed2->size, fcb->subvol->id, fcb->inode, ext->offset - ed2->offset, 2,
3922                                                fcb->inode_item.flags & BTRFS_INODE_NODATASUM, false, Irp);
3923 
3924             if (!NT_SUCCESS(Status)) {
3925                 ERR("update_changed_extent_ref returned %08x\n", Status);
3926                 return Status;
3927             }
3928         }
3929 
3930         remove_fcb_extent(fcb, ext, rollback);
3931     }
3932 
3933     if (c)
3934         c->changed = true;
3935 
3936     return STATUS_SUCCESS;
3937 }
3938 
3939 NTSTATUS do_write_file(fcb* fcb, uint64_t start, uint64_t end_data, void* data, PIRP Irp, bool file_write, uint32_t irp_offset, LIST_ENTRY* rollback) {
3940     NTSTATUS Status;
3941     LIST_ENTRY *le, *le2;
3942     uint64_t written = 0, length = end_data - start;
3943     uint64_t last_cow_start;
3944     ULONG priority = fcb->Header.Flags2 & FSRTL_FLAG2_IS_PAGING_FILE ? HighPagePriority : NormalPagePriority;
3945 #ifdef DEBUG_PARANOID
3946     uint64_t last_off;
3947 #endif
3948     bool extents_changed = false;
3949 
3950     last_cow_start = 0;
3951 
3952     le = fcb->extents.Flink;
3953     while (le != &fcb->extents) {
3954         extent* ext = CONTAINING_RECORD(le, extent, list_entry);
3955 
3956         le2 = le->Flink;
3957 
3958         if (!ext->ignore) {
3959             EXTENT_DATA* ed = &ext->extent_data;
3960             EXTENT_DATA2* ed2 = ed->type == EXTENT_TYPE_INLINE ? NULL : (EXTENT_DATA2*)ed->data;
3961             uint64_t len;
3962 
3963             len = ed->type == EXTENT_TYPE_INLINE ? ed->decoded_size : ed2->num_bytes;
3964 
3965             if (ext->offset + len <= start)
3966                 goto nextitem;
3967 
3968             if (ext->offset > start + written + length)
3969                 break;
3970 
3971             if ((fcb->inode_item.flags & BTRFS_INODE_NODATACOW || ed->type == EXTENT_TYPE_PREALLOC) && ext->unique && ed->compression == BTRFS_COMPRESSION_NONE) {
3972                 if (max(last_cow_start, start + written) < ext->offset) {
3973                     uint64_t start_write = max(last_cow_start, start + written);
3974 
3975                     extents_changed = true;
3976 
3977                     Status = excise_extents(fcb->Vcb, fcb, start_write, ext->offset, Irp, rollback);
3978                     if (!NT_SUCCESS(Status)) {
3979                         ERR("excise_extents returned %08x\n", Status);
3980                         return Status;
3981                     }
3982 
3983                     Status = insert_extent(fcb->Vcb, fcb, start_write, ext->offset - start_write, (uint8_t*)data + written, Irp, file_write, irp_offset + written, rollback);
3984                     if (!NT_SUCCESS(Status)) {
3985                         ERR("insert_extent returned %08x\n", Status);
3986                         return Status;
3987                     }
3988 
3989                     written += ext->offset - start_write;
3990                     length -= ext->offset - start_write;
3991 
3992                     if (length == 0)
3993                         break;
3994                 }
3995 
3996                 if (ed->type == EXTENT_TYPE_REGULAR) {
3997                     uint64_t writeaddr = ed2->address + ed2->offset + start + written - ext->offset;
3998                     uint64_t write_len = min(len, length);
3999                     chunk* c;
4000 
4001                     TRACE("doing non-COW write to %I64x\n", writeaddr);
4002 
4003                     Status = write_data_complete(fcb->Vcb, writeaddr, (uint8_t*)data + written, (uint32_t)write_len, Irp, NULL, file_write, irp_offset + written, priority);
4004                     if (!NT_SUCCESS(Status)) {
4005                         ERR("write_data_complete returned %08x\n", Status);
4006                         return Status;
4007                     }
4008 
4009                     c = get_chunk_from_address(fcb->Vcb, writeaddr);
4010                     if (c)
4011                         c->changed = true;
4012 
4013                     // This shouldn't ever get called - nocow files should always also be nosum.
4014                     if (!(fcb->inode_item.flags & BTRFS_INODE_NODATASUM)) {
4015                         calc_csum(fcb->Vcb, (uint8_t*)data + written, (uint32_t)(write_len / fcb->Vcb->superblock.sector_size),
4016                                   &ext->csum[(start + written - ext->offset) / fcb->Vcb->superblock.sector_size]);
4017 
4018                         ext->inserted = true;
4019                         extents_changed = true;
4020                     }
4021 
4022                     written += write_len;
4023                     length -= write_len;
4024 
4025                     if (length == 0)
4026                         break;
4027                 } else if (ed->type == EXTENT_TYPE_PREALLOC) {
4028                     uint64_t write_len;
4029 
4030                     Status = do_write_file_prealloc(fcb, ext, start + written, end_data, (uint8_t*)data + written, &write_len,
4031                                                     Irp, file_write, irp_offset + written, priority, rollback);
4032                     if (!NT_SUCCESS(Status)) {
4033                         ERR("do_write_file_prealloc returned %08x\n", Status);
4034                         return Status;
4035                     }
4036 
4037                     extents_changed = true;
4038 
4039                     written += write_len;
4040                     length -= write_len;
4041 
4042                     if (length == 0)
4043                         break;
4044                 }
4045 
4046                 last_cow_start = ext->offset + len;
4047             }
4048         }
4049 
4050 nextitem:
4051         le = le2;
4052     }
4053 
4054     if (length > 0) {
4055         uint64_t start_write = max(last_cow_start, start + written);
4056 
4057         extents_changed = true;
4058 
4059         Status = excise_extents(fcb->Vcb, fcb, start_write, end_data, Irp, rollback);
4060         if (!NT_SUCCESS(Status)) {
4061             ERR("excise_extents returned %08x\n", Status);
4062             return Status;
4063         }
4064 
4065         Status = insert_extent(fcb->Vcb, fcb, start_write, end_data - start_write, (uint8_t*)data + written, Irp, file_write, irp_offset + written, rollback);
4066         if (!NT_SUCCESS(Status)) {
4067             ERR("insert_extent returned %08x\n", Status);
4068             return Status;
4069         }
4070     }
4071 
4072 #ifdef DEBUG_PARANOID
4073     last_off = 0xffffffffffffffff;
4074 
4075     le = fcb->extents.Flink;
4076     while (le != &fcb->extents) {
4077         extent* ext = CONTAINING_RECORD(le, extent, list_entry);
4078 
4079         if (!ext->ignore) {
4080             if (ext->offset == last_off) {
4081                 ERR("offset %I64x duplicated\n", ext->offset);
4082                 int3;
4083             } else if (ext->offset < last_off && last_off != 0xffffffffffffffff) {
4084                 ERR("offsets out of order\n");
4085                 int3;
4086             }
4087 
4088             last_off = ext->offset;
4089         }
4090 
4091         le = le->Flink;
4092     }
4093 #endif
4094 
4095     if (extents_changed) {
4096         fcb->extents_changed = true;
4097         mark_fcb_dirty(fcb);
4098     }
4099 
4100     return STATUS_SUCCESS;
4101 }
4102 
4103 NTSTATUS write_compressed(fcb* fcb, uint64_t start_data, uint64_t end_data, void* data, PIRP Irp, LIST_ENTRY* rollback) {
4104     NTSTATUS Status;
4105     uint64_t i;
4106 
4107     for (i = 0; i < sector_align(end_data - start_data, COMPRESSED_EXTENT_SIZE) / COMPRESSED_EXTENT_SIZE; i++) {
4108         uint64_t s2, e2;
4109         bool compressed;
4110 
4111         s2 = start_data + (i * COMPRESSED_EXTENT_SIZE);
4112         e2 = min(s2 + COMPRESSED_EXTENT_SIZE, end_data);
4113 
4114         Status = write_compressed_bit(fcb, s2, e2, (uint8_t*)data + (i * COMPRESSED_EXTENT_SIZE), &compressed, Irp, rollback);
4115 
4116         if (!NT_SUCCESS(Status)) {
4117             ERR("write_compressed_bit returned %08x\n", Status);
4118             return Status;
4119         }
4120 
4121         // If the first 128 KB of a file is incompressible, we set the nocompress flag so we don't
4122         // bother with the rest of it.
4123         if (s2 == 0 && e2 == COMPRESSED_EXTENT_SIZE && !compressed && !fcb->Vcb->options.compress_force) {
4124             fcb->inode_item.flags |= BTRFS_INODE_NOCOMPRESS;
4125             fcb->inode_item_changed = true;
4126             mark_fcb_dirty(fcb);
4127 
4128             // write subsequent data non-compressed
4129             if (e2 < end_data) {
4130                 Status = do_write_file(fcb, e2, end_data, (uint8_t*)data + e2, Irp, false, 0, rollback);
4131 
4132                 if (!NT_SUCCESS(Status)) {
4133                     ERR("do_write_file returned %08x\n", Status);
4134                     return Status;
4135                 }
4136             }
4137 
4138             return STATUS_SUCCESS;
4139         }
4140     }
4141 
4142     return STATUS_SUCCESS;
4143 }
4144 
4145 NTSTATUS write_file2(device_extension* Vcb, PIRP Irp, LARGE_INTEGER offset, void* buf, ULONG* length, bool paging_io, bool no_cache,
4146                      bool wait, bool deferred_write, bool write_irp, LIST_ENTRY* rollback) {
4147     PIO_STACK_LOCATION IrpSp = IoGetCurrentIrpStackLocation(Irp);
4148     PFILE_OBJECT FileObject = IrpSp->FileObject;
4149     EXTENT_DATA* ed2;
4150     uint64_t off64, newlength, start_data, end_data;
4151     uint32_t bufhead;
4152     bool make_inline;
4153     INODE_ITEM* origii;
4154     bool changed_length = false;
4155     NTSTATUS Status;
4156     LARGE_INTEGER time;
4157     BTRFS_TIME now;
4158     fcb* fcb;
4159     ccb* ccb;
4160     file_ref* fileref;
4161     bool paging_lock = false, acquired_fcb_lock = false, acquired_tree_lock = false, pagefile;
4162     ULONG filter = 0;
4163 
4164     TRACE("(%p, %p, %I64x, %p, %x, %u, %u)\n", Vcb, FileObject, offset.QuadPart, buf, *length, paging_io, no_cache);
4165 
4166     if (*length == 0) {
4167         TRACE("returning success for zero-length write\n");
4168         return STATUS_SUCCESS;
4169     }
4170 
4171     if (!FileObject) {
4172         ERR("error - FileObject was NULL\n");
4173         return STATUS_ACCESS_DENIED;
4174     }
4175 
4176     fcb = FileObject->FsContext;
4177     ccb = FileObject->FsContext2;
4178     fileref = ccb ? ccb->fileref : NULL;
4179 
4180     if (!fcb->ads && fcb->type != BTRFS_TYPE_FILE && fcb->type != BTRFS_TYPE_SYMLINK) {
4181         WARN("tried to write to something other than a file or symlink (inode %I64x, type %u, %p, %p)\n", fcb->inode, fcb->type, &fcb->type, fcb);
4182         return STATUS_INVALID_DEVICE_REQUEST;
4183     }
4184 
4185     if (offset.LowPart == FILE_WRITE_TO_END_OF_FILE && offset.HighPart == -1)
4186         offset = fcb->Header.FileSize;
4187 
4188     off64 = offset.QuadPart;
4189 
4190     TRACE("fcb->Header.Flags = %x\n", fcb->Header.Flags);
4191 
4192     if (!no_cache && !CcCanIWrite(FileObject, *length, wait, deferred_write))
4193         return STATUS_PENDING;
4194 
4195     if (!wait && no_cache)
4196         return STATUS_PENDING;
4197 
4198     if (no_cache && !paging_io && FileObject->SectionObjectPointer->DataSectionObject) {
4199         IO_STATUS_BLOCK iosb;
4200 
4201         ExAcquireResourceExclusiveLite(fcb->Header.PagingIoResource, true);
4202 
4203         CcFlushCache(FileObject->SectionObjectPointer, &offset, *length, &iosb);
4204 
4205         if (!NT_SUCCESS(iosb.Status)) {
4206             ExReleaseResourceLite(fcb->Header.PagingIoResource);
4207             ERR("CcFlushCache returned %08x\n", iosb.Status);
4208             return iosb.Status;
4209         }
4210 
4211         paging_lock = true;
4212 
4213         CcPurgeCacheSection(FileObject->SectionObjectPointer, &offset, *length, false);
4214     }
4215 
4216     if (paging_io) {
4217         if (!ExAcquireResourceSharedLite(fcb->Header.PagingIoResource, wait)) {
4218             Status = STATUS_PENDING;
4219             goto end;
4220         } else
4221             paging_lock = true;
4222     }
4223 
4224     pagefile = fcb->Header.Flags2 & FSRTL_FLAG2_IS_PAGING_FILE && paging_io;
4225 
4226     if (!pagefile && !ExIsResourceAcquiredExclusiveLite(&Vcb->tree_lock)) {
4227         if (!ExAcquireResourceSharedLite(&Vcb->tree_lock, wait)) {
4228             Status = STATUS_PENDING;
4229             goto end;
4230         } else
4231             acquired_tree_lock = true;
4232     }
4233 
4234     if (pagefile) {
4235         if (!ExAcquireResourceSharedLite(fcb->Header.Resource, wait)) {
4236             Status = STATUS_PENDING;
4237             goto end;
4238         } else
4239             acquired_fcb_lock = true;
4240     } else if (!ExIsResourceAcquiredExclusiveLite(fcb->Header.Resource)) {
4241         if (!ExAcquireResourceExclusiveLite(fcb->Header.Resource, wait)) {
4242             Status = STATUS_PENDING;
4243             goto end;
4244         } else
4245             acquired_fcb_lock = true;
4246     }
4247 
4248     newlength = fcb->ads ? fcb->adsdata.Length : fcb->inode_item.st_size;
4249 
4250     if (fcb->deleted)
4251         newlength = 0;
4252 
4253     TRACE("newlength = %I64x\n", newlength);
4254 
4255     if (off64 + *length > newlength) {
4256         if (paging_io) {
4257             if (off64 >= newlength) {
4258                 TRACE("paging IO tried to write beyond end of file (file size = %I64x, offset = %I64x, length = %x)\n", newlength, off64, *length);
4259                 TRACE("FileObject: AllocationSize = %I64x, FileSize = %I64x, ValidDataLength = %I64x\n",
4260                     fcb->Header.AllocationSize.QuadPart, fcb->Header.FileSize.QuadPart, fcb->Header.ValidDataLength.QuadPart);
4261                 Status = STATUS_SUCCESS;
4262                 goto end;
4263             }
4264 
4265             *length = (ULONG)(newlength - off64);
4266         } else {
4267             newlength = off64 + *length;
4268             changed_length = true;
4269 
4270             TRACE("extending length to %I64x\n", newlength);
4271         }
4272     }
4273 
4274     if (fcb->ads)
4275         make_inline = false;
4276     else if (fcb->type == BTRFS_TYPE_SYMLINK)
4277         make_inline = newlength <= (Vcb->superblock.node_size - sizeof(tree_header) - sizeof(leaf_node) - offsetof(EXTENT_DATA, data[0]));
4278     else
4279         make_inline = newlength <= fcb->Vcb->options.max_inline;
4280 
4281     if (changed_length) {
4282         if (newlength > (uint64_t)fcb->Header.AllocationSize.QuadPart) {
4283             if (!acquired_tree_lock) {
4284                 // We need to acquire the tree lock if we don't have it already -
4285                 // we can't give an inline file proper extents at the same time as we're
4286                 // doing a flush.
4287                 if (!ExAcquireResourceSharedLite(&Vcb->tree_lock, wait)) {
4288                     Status = STATUS_PENDING;
4289                     goto end;
4290                 } else
4291                     acquired_tree_lock = true;
4292             }
4293 
4294             Status = extend_file(fcb, fileref, newlength, false, Irp, rollback);
4295             if (!NT_SUCCESS(Status)) {
4296                 ERR("extend_file returned %08x\n", Status);
4297                 goto end;
4298             }
4299         } else if (!fcb->ads)
4300             fcb->inode_item.st_size = newlength;
4301 
4302         fcb->Header.FileSize.QuadPart = newlength;
4303         fcb->Header.ValidDataLength.QuadPart = newlength;
4304 
4305         TRACE("AllocationSize = %I64x\n", fcb->Header.AllocationSize.QuadPart);
4306         TRACE("FileSize = %I64x\n", fcb->Header.FileSize.QuadPart);
4307         TRACE("ValidDataLength = %I64x\n", fcb->Header.ValidDataLength.QuadPart);
4308     }
4309 
4310     if (!no_cache) {
4311         Status = STATUS_SUCCESS;
4312 
4313         _SEH2_TRY {
4314             if (!FileObject->PrivateCacheMap || changed_length) {
4315                 CC_FILE_SIZES ccfs;
4316 
4317                 ccfs.AllocationSize = fcb->Header.AllocationSize;
4318                 ccfs.FileSize = fcb->Header.FileSize;
4319                 ccfs.ValidDataLength = fcb->Header.ValidDataLength;
4320 
4321                 if (!FileObject->PrivateCacheMap)
4322                     init_file_cache(FileObject, &ccfs);
4323 
4324                 CcSetFileSizes(FileObject, &ccfs);
4325             }
4326 
4327             if (IrpSp->MinorFunction & IRP_MN_MDL) {
4328                 CcPrepareMdlWrite(FileObject, &offset, *length, &Irp->MdlAddress, &Irp->IoStatus);
4329 
4330                 Status = Irp->IoStatus.Status;
4331                 goto end;
4332             } else {
4333                 if (fCcCopyWriteEx) {
4334                     TRACE("CcCopyWriteEx(%p, %I64x, %x, %u, %p, %p)\n", FileObject, off64, *length, wait, buf, Irp->Tail.Overlay.Thread);
4335                     if (!fCcCopyWriteEx(FileObject, &offset, *length, wait, buf, Irp->Tail.Overlay.Thread)) {
4336                         Status = STATUS_PENDING;
4337                         goto end;
4338                     }
4339                     TRACE("CcCopyWriteEx finished\n");
4340                 } else {
4341                     TRACE("CcCopyWrite(%p, %I64x, %x, %u, %p)\n", FileObject, off64, *length, wait, buf);
4342                     if (!CcCopyWrite(FileObject, &offset, *length, wait, buf)) {
4343                         Status = STATUS_PENDING;
4344                         goto end;
4345                     }
4346                     TRACE("CcCopyWrite finished\n");
4347                 }
4348             }
4349         } _SEH2_EXCEPT (EXCEPTION_EXECUTE_HANDLER) {
4350             Status = _SEH2_GetExceptionCode();
4351         } _SEH2_END;
4352 
4353         if (changed_length) {
4354             queue_notification_fcb(fcb->ads ? fileref->parent : fileref, fcb->ads ? FILE_NOTIFY_CHANGE_STREAM_SIZE : FILE_NOTIFY_CHANGE_SIZE,
4355                                    fcb->ads ? FILE_ACTION_MODIFIED_STREAM : FILE_ACTION_MODIFIED, fcb->ads && fileref->dc ? &fileref->dc->name : NULL);
4356         }
4357 
4358         goto end;
4359     }
4360 
4361     if (fcb->ads) {
4362         if (changed_length) {
4363             char* data2;
4364 
4365             if (newlength > fcb->adsmaxlen) {
4366                 ERR("error - xattr too long (%I64u > %u)\n", newlength, fcb->adsmaxlen);
4367                 Status = STATUS_DISK_FULL;
4368                 goto end;
4369             }
4370 
4371             data2 = ExAllocatePoolWithTag(PagedPool, (ULONG)newlength, ALLOC_TAG);
4372             if (!data2) {
4373                 ERR("out of memory\n");
4374                 Status = STATUS_INSUFFICIENT_RESOURCES;
4375                 goto end;
4376             }
4377 
4378             if (fcb->adsdata.Buffer) {
4379                 RtlCopyMemory(data2, fcb->adsdata.Buffer, fcb->adsdata.Length);
4380                 ExFreePool(fcb->adsdata.Buffer);
4381             }
4382 
4383             if (newlength > fcb->adsdata.Length)
4384                 RtlZeroMemory(&data2[fcb->adsdata.Length], (ULONG)(newlength - fcb->adsdata.Length));
4385 
4386 
4387             fcb->adsdata.Buffer = data2;
4388             fcb->adsdata.Length = fcb->adsdata.MaximumLength = (USHORT)newlength;
4389 
4390             fcb->Header.AllocationSize.QuadPart = newlength;
4391             fcb->Header.FileSize.QuadPart = newlength;
4392             fcb->Header.ValidDataLength.QuadPart = newlength;
4393         }
4394 
4395         if (*length > 0)
4396             RtlCopyMemory(&fcb->adsdata.Buffer[off64], buf, *length);
4397 
4398         fcb->Header.ValidDataLength.QuadPart = newlength;
4399 
4400         mark_fcb_dirty(fcb);
4401 
4402         if (fileref)
4403             mark_fileref_dirty(fileref);
4404     } else {
4405         bool compress = write_fcb_compressed(fcb), no_buf = false;
4406         uint8_t* data;
4407 
4408         if (make_inline) {
4409             start_data = 0;
4410             end_data = sector_align(newlength, fcb->Vcb->superblock.sector_size);
4411             bufhead = sizeof(EXTENT_DATA) - 1;
4412         } else if (compress) {
4413             start_data = off64 & ~(uint64_t)(COMPRESSED_EXTENT_SIZE - 1);
4414             end_data = min(sector_align(off64 + *length, COMPRESSED_EXTENT_SIZE),
4415                            sector_align(newlength, fcb->Vcb->superblock.sector_size));
4416             bufhead = 0;
4417         } else {
4418             start_data = off64 & ~(uint64_t)(fcb->Vcb->superblock.sector_size - 1);
4419             end_data = sector_align(off64 + *length, fcb->Vcb->superblock.sector_size);
4420             bufhead = 0;
4421         }
4422 
4423         if (fcb_is_inline(fcb))
4424             end_data = max(end_data, sector_align(fcb->inode_item.st_size, Vcb->superblock.sector_size));
4425 
4426         fcb->Header.ValidDataLength.QuadPart = newlength;
4427         TRACE("fcb %p FileSize = %I64x\n", fcb, fcb->Header.FileSize.QuadPart);
4428 
4429         if (!make_inline && !compress && off64 == start_data && off64 + *length == end_data) {
4430             data = buf;
4431             no_buf = true;
4432         } else {
4433             data = ExAllocatePoolWithTag(PagedPool, (ULONG)(end_data - start_data + bufhead), ALLOC_TAG);
4434             if (!data) {
4435                 ERR("out of memory\n");
4436                 Status = STATUS_INSUFFICIENT_RESOURCES;
4437                 goto end;
4438             }
4439 
4440             RtlZeroMemory(data + bufhead, (ULONG)(end_data - start_data));
4441 
4442             TRACE("start_data = %I64x\n", start_data);
4443             TRACE("end_data = %I64x\n", end_data);
4444 
4445             if (off64 > start_data || off64 + *length < end_data) {
4446                 if (changed_length) {
4447                     if (fcb->inode_item.st_size > start_data)
4448                         Status = read_file(fcb, data + bufhead, start_data, fcb->inode_item.st_size - start_data, NULL, Irp);
4449                     else
4450                         Status = STATUS_SUCCESS;
4451                 } else
4452                     Status = read_file(fcb, data + bufhead, start_data, end_data - start_data, NULL, Irp);
4453 
4454                 if (!NT_SUCCESS(Status)) {
4455                     ERR("read_file returned %08x\n", Status);
4456                     ExFreePool(data);
4457                     goto end;
4458                 }
4459             }
4460 
4461             RtlCopyMemory(data + bufhead + off64 - start_data, buf, *length);
4462         }
4463 
4464         if (make_inline) {
4465             Status = excise_extents(fcb->Vcb, fcb, start_data, end_data, Irp, rollback);
4466             if (!NT_SUCCESS(Status)) {
4467                 ERR("error - excise_extents returned %08x\n", Status);
4468                 ExFreePool(data);
4469                 goto end;
4470             }
4471 
4472             ed2 = (EXTENT_DATA*)data;
4473             ed2->generation = fcb->Vcb->superblock.generation;
4474             ed2->decoded_size = newlength;
4475             ed2->compression = BTRFS_COMPRESSION_NONE;
4476             ed2->encryption = BTRFS_ENCRYPTION_NONE;
4477             ed2->encoding = BTRFS_ENCODING_NONE;
4478             ed2->type = EXTENT_TYPE_INLINE;
4479 
4480             Status = add_extent_to_fcb(fcb, 0, ed2, (uint16_t)(offsetof(EXTENT_DATA, data[0]) + newlength), false, NULL, rollback);
4481             if (!NT_SUCCESS(Status)) {
4482                 ERR("add_extent_to_fcb returned %08x\n", Status);
4483                 ExFreePool(data);
4484                 goto end;
4485             }
4486 
4487             fcb->inode_item.st_blocks += newlength;
4488         } else if (compress) {
4489             Status = write_compressed(fcb, start_data, end_data, data, Irp, rollback);
4490 
4491             if (!NT_SUCCESS(Status)) {
4492                 ERR("write_compressed returned %08x\n", Status);
4493                 ExFreePool(data);
4494                 goto end;
4495             }
4496         } else {
4497             if (write_irp && Irp->MdlAddress && no_buf) {
4498                 bool locked = Irp->MdlAddress->MdlFlags & (MDL_PAGES_LOCKED | MDL_PARTIAL);
4499 
4500                 if (!locked) {
4501                     Status = STATUS_SUCCESS;
4502 
4503                     _SEH2_TRY {
4504                         MmProbeAndLockPages(Irp->MdlAddress, KernelMode, IoReadAccess);
4505                     } _SEH2_EXCEPT (EXCEPTION_EXECUTE_HANDLER) {
4506                         Status = _SEH2_GetExceptionCode();
4507                     } _SEH2_END;
4508 
4509                     if (!NT_SUCCESS(Status)) {
4510                         ERR("MmProbeAndLockPages threw exception %08x\n", Status);
4511                         goto end;
4512                     }
4513                 }
4514 
4515                 _SEH2_TRY {
4516                     Status = do_write_file(fcb, start_data, end_data, data, Irp, true, 0, rollback);
4517                 } _SEH2_EXCEPT (EXCEPTION_EXECUTE_HANDLER) {
4518                     Status = _SEH2_GetExceptionCode();
4519                 } _SEH2_END;
4520 
4521                 if (!locked)
4522                     MmUnlockPages(Irp->MdlAddress);
4523             } else {
4524                 _SEH2_TRY {
4525                     Status = do_write_file(fcb, start_data, end_data, data, Irp, false, 0, rollback);
4526                 } _SEH2_EXCEPT (EXCEPTION_EXECUTE_HANDLER) {
4527                     Status = _SEH2_GetExceptionCode();
4528                 } _SEH2_END;
4529             }
4530 
4531             if (!NT_SUCCESS(Status)) {
4532                 ERR("do_write_file returned %08x\n", Status);
4533                 if (!no_buf) ExFreePool(data);
4534                 goto end;
4535             }
4536         }
4537 
4538         if (!no_buf)
4539             ExFreePool(data);
4540     }
4541 
4542     KeQuerySystemTime(&time);
4543     win_time_to_unix(time, &now);
4544 
4545     if (!pagefile) {
4546         if (fcb->ads) {
4547             if (fileref && fileref->parent)
4548                 origii = &fileref->parent->fcb->inode_item;
4549             else {
4550                 ERR("no parent fcb found for stream\n");
4551                 Status = STATUS_INTERNAL_ERROR;
4552                 goto end;
4553             }
4554         } else
4555             origii = &fcb->inode_item;
4556 
4557         origii->transid = Vcb->superblock.generation;
4558         origii->sequence++;
4559 
4560         if (!ccb->user_set_change_time)
4561             origii->st_ctime = now;
4562 
4563         if (!fcb->ads) {
4564             if (changed_length) {
4565                 TRACE("setting st_size to %I64x\n", newlength);
4566                 origii->st_size = newlength;
4567                 filter |= FILE_NOTIFY_CHANGE_SIZE;
4568             }
4569 
4570             fcb->inode_item_changed = true;
4571         } else {
4572             fileref->parent->fcb->inode_item_changed = true;
4573 
4574             if (changed_length)
4575                 filter |= FILE_NOTIFY_CHANGE_STREAM_SIZE;
4576 
4577             filter |= FILE_NOTIFY_CHANGE_STREAM_WRITE;
4578         }
4579 
4580         if (!ccb->user_set_write_time) {
4581             origii->st_mtime = now;
4582             filter |= FILE_NOTIFY_CHANGE_LAST_WRITE;
4583         }
4584 
4585         mark_fcb_dirty(fcb->ads ? fileref->parent->fcb : fcb);
4586     }
4587 
4588     if (changed_length) {
4589         CC_FILE_SIZES ccfs;
4590 
4591         ccfs.AllocationSize = fcb->Header.AllocationSize;
4592         ccfs.FileSize = fcb->Header.FileSize;
4593         ccfs.ValidDataLength = fcb->Header.ValidDataLength;
4594 
4595         _SEH2_TRY {
4596             CcSetFileSizes(FileObject, &ccfs);
4597         } _SEH2_EXCEPT (EXCEPTION_EXECUTE_HANDLER) {
4598             Status = _SEH2_GetExceptionCode();
4599             goto end;
4600         } _SEH2_END;
4601     }
4602 
4603     fcb->subvol->root_item.ctransid = Vcb->superblock.generation;
4604     fcb->subvol->root_item.ctime = now;
4605 
4606     Status = STATUS_SUCCESS;
4607 
4608     if (filter != 0)
4609         queue_notification_fcb(fcb->ads ? fileref->parent : fileref, filter, fcb->ads ? FILE_ACTION_MODIFIED_STREAM : FILE_ACTION_MODIFIED,
4610                                fcb->ads && fileref->dc ? &fileref->dc->name : NULL);
4611 
4612 end:
4613     if (NT_SUCCESS(Status) && FileObject->Flags & FO_SYNCHRONOUS_IO && !paging_io) {
4614         TRACE("CurrentByteOffset was: %I64x\n", FileObject->CurrentByteOffset.QuadPart);
4615         FileObject->CurrentByteOffset.QuadPart = offset.QuadPart + (NT_SUCCESS(Status) ? *length : 0);
4616         TRACE("CurrentByteOffset now: %I64x\n", FileObject->CurrentByteOffset.QuadPart);
4617     }
4618 
4619     if (acquired_fcb_lock)
4620         ExReleaseResourceLite(fcb->Header.Resource);
4621 
4622     if (acquired_tree_lock)
4623         ExReleaseResourceLite(&Vcb->tree_lock);
4624 
4625     if (paging_lock)
4626         ExReleaseResourceLite(fcb->Header.PagingIoResource);
4627 
4628     return Status;
4629 }
4630 
4631 NTSTATUS write_file(device_extension* Vcb, PIRP Irp, bool wait, bool deferred_write) {
4632     PIO_STACK_LOCATION IrpSp = IoGetCurrentIrpStackLocation(Irp);
4633     void* buf;
4634     NTSTATUS Status;
4635     LARGE_INTEGER offset = IrpSp->Parameters.Write.ByteOffset;
4636     PFILE_OBJECT FileObject = IrpSp->FileObject;
4637     fcb* fcb = FileObject ? FileObject->FsContext : NULL;
4638     LIST_ENTRY rollback;
4639 
4640     InitializeListHead(&rollback);
4641 
4642     TRACE("write\n");
4643 
4644     Irp->IoStatus.Information = 0;
4645 
4646     TRACE("offset = %I64x\n", offset.QuadPart);
4647     TRACE("length = %x\n", IrpSp->Parameters.Write.Length);
4648 
4649     if (!Irp->AssociatedIrp.SystemBuffer) {
4650         buf = map_user_buffer(Irp, fcb && fcb->Header.Flags2 & FSRTL_FLAG2_IS_PAGING_FILE ? HighPagePriority : NormalPagePriority);
4651 
4652         if (Irp->MdlAddress && !buf) {
4653             ERR("MmGetSystemAddressForMdlSafe returned NULL\n");
4654             Status = STATUS_INSUFFICIENT_RESOURCES;
4655             goto exit;
4656         }
4657     } else
4658         buf = Irp->AssociatedIrp.SystemBuffer;
4659 
4660     TRACE("buf = %p\n", buf);
4661 
4662     if (fcb && !(Irp->Flags & IRP_PAGING_IO) && !FsRtlCheckLockForWriteAccess(&fcb->lock, Irp)) {
4663         WARN("tried to write to locked region\n");
4664         Status = STATUS_FILE_LOCK_CONFLICT;
4665         goto exit;
4666     }
4667 
4668     Status = write_file2(Vcb, Irp, offset, buf, &IrpSp->Parameters.Write.Length, Irp->Flags & IRP_PAGING_IO, Irp->Flags & IRP_NOCACHE,
4669                          wait, deferred_write, true, &rollback);
4670 
4671     if (Status == STATUS_PENDING)
4672         goto exit;
4673     else if (!NT_SUCCESS(Status)) {
4674         ERR("write_file2 returned %08x\n", Status);
4675         goto exit;
4676     }
4677 
4678     if (NT_SUCCESS(Status)) {
4679         Irp->IoStatus.Information = IrpSp->Parameters.Write.Length;
4680 
4681         if (diskacc && Status != STATUS_PENDING && Irp->Flags & IRP_NOCACHE) {
4682             PETHREAD thread = NULL;
4683 
4684             if (Irp->Tail.Overlay.Thread && !IoIsSystemThread(Irp->Tail.Overlay.Thread))
4685                 thread = Irp->Tail.Overlay.Thread;
4686             else if (!IoIsSystemThread(PsGetCurrentThread()))
4687                 thread = PsGetCurrentThread();
4688             else if (IoIsSystemThread(PsGetCurrentThread()) && IoGetTopLevelIrp() == Irp)
4689                 thread = PsGetCurrentThread();
4690 
4691             if (thread)
4692                 fPsUpdateDiskCounters(PsGetThreadProcess(thread), 0, IrpSp->Parameters.Write.Length, 0, 1, 0);
4693         }
4694     }
4695 
4696 exit:
4697     if (NT_SUCCESS(Status))
4698         clear_rollback(&rollback);
4699     else
4700         do_rollback(Vcb, &rollback);
4701 
4702     return Status;
4703 }
4704 
4705 _Dispatch_type_(IRP_MJ_WRITE)
4706 _Function_class_(DRIVER_DISPATCH)
4707 NTSTATUS __stdcall drv_write(IN PDEVICE_OBJECT DeviceObject, IN PIRP Irp) {
4708     NTSTATUS Status;
4709     bool top_level;
4710     PIO_STACK_LOCATION IrpSp = IoGetCurrentIrpStackLocation(Irp);
4711     device_extension* Vcb = DeviceObject->DeviceExtension;
4712     PFILE_OBJECT FileObject = IrpSp->FileObject;
4713     fcb* fcb = FileObject ? FileObject->FsContext : NULL;
4714     ccb* ccb = FileObject ? FileObject->FsContext2 : NULL;
4715     bool wait = FileObject ? IoIsOperationSynchronous(Irp) : true;
4716 
4717     FsRtlEnterFileSystem();
4718 
4719     top_level = is_top_level(Irp);
4720 
4721     if (Vcb && Vcb->type == VCB_TYPE_VOLUME) {
4722         Status = vol_write(DeviceObject, Irp);
4723         goto exit;
4724     } else if (!Vcb || Vcb->type != VCB_TYPE_FS) {
4725         Status = STATUS_INVALID_PARAMETER;
4726         goto end;
4727     }
4728 
4729     if (!fcb) {
4730         ERR("fcb was NULL\n");
4731         Status = STATUS_INVALID_PARAMETER;
4732         goto end;
4733     }
4734 
4735     if (!ccb) {
4736         ERR("ccb was NULL\n");
4737         Status = STATUS_INVALID_PARAMETER;
4738         goto end;
4739     }
4740 
4741     if (Irp->RequestorMode == UserMode && !(ccb->access & (FILE_WRITE_DATA | FILE_APPEND_DATA))) {
4742         WARN("insufficient permissions\n");
4743         Status = STATUS_ACCESS_DENIED;
4744         goto end;
4745     }
4746 
4747     if (fcb == Vcb->volume_fcb) {
4748         if (!Vcb->locked || Vcb->locked_fileobj != FileObject) {
4749             ERR("trying to write to volume when not locked, or locked with another FileObject\n");
4750             Status = STATUS_ACCESS_DENIED;
4751             goto end;
4752         }
4753 
4754         TRACE("writing directly to volume\n");
4755 
4756         IoSkipCurrentIrpStackLocation(Irp);
4757 
4758         Status = IoCallDriver(Vcb->Vpb->RealDevice, Irp);
4759         goto exit;
4760     }
4761 
4762     if (is_subvol_readonly(fcb->subvol, Irp)) {
4763         Status = STATUS_ACCESS_DENIED;
4764         goto end;
4765     }
4766 
4767     if (Vcb->readonly) {
4768         Status = STATUS_MEDIA_WRITE_PROTECTED;
4769         goto end;
4770     }
4771 
4772     _SEH2_TRY {
4773         if (IrpSp->MinorFunction & IRP_MN_COMPLETE) {
4774             CcMdlWriteComplete(IrpSp->FileObject, &IrpSp->Parameters.Write.ByteOffset, Irp->MdlAddress);
4775 
4776             Irp->MdlAddress = NULL;
4777             Status = STATUS_SUCCESS;
4778         } else {
4779             if (!(Irp->Flags & IRP_PAGING_IO))
4780                 FsRtlCheckOplock(fcb_oplock(fcb), Irp, NULL, NULL, NULL);
4781 
4782             // Don't offload jobs when doing paging IO - otherwise this can lead to
4783             // deadlocks in CcCopyWrite.
4784             if (Irp->Flags & IRP_PAGING_IO)
4785                 wait = true;
4786 
4787             Status = write_file(Vcb, Irp, wait, false);
4788         }
4789     } _SEH2_EXCEPT (EXCEPTION_EXECUTE_HANDLER) {
4790         Status = _SEH2_GetExceptionCode();
4791     } _SEH2_END;
4792 
4793 end:
4794     Irp->IoStatus.Status = Status;
4795 
4796     TRACE("wrote %u bytes\n", Irp->IoStatus.Information);
4797 
4798     if (Status != STATUS_PENDING)
4799         IoCompleteRequest(Irp, IO_NO_INCREMENT);
4800     else {
4801         IoMarkIrpPending(Irp);
4802 
4803         if (!add_thread_job(Vcb, Irp))
4804             Status = do_write_job(Vcb, Irp);
4805     }
4806 
4807 exit:
4808     if (top_level)
4809         IoSetTopLevelIrp(NULL);
4810 
4811     TRACE("returning %08x\n", Status);
4812 
4813     FsRtlExitFileSystem();
4814 
4815     return Status;
4816 }
4817