xref: /reactos/drivers/filesystems/btrfs/write.c (revision 6e0cf03d)
1 /* Copyright (c) Mark Harmstone 2016-17
2  *
3  * This file is part of WinBtrfs.
4  *
5  * WinBtrfs is free software: you can redistribute it and/or modify
6  * it under the terms of the GNU Lesser General Public Licence as published by
7  * the Free Software Foundation, either version 3 of the Licence, or
8  * (at your option) any later version.
9  *
10  * WinBtrfs is distributed in the hope that it will be useful,
11  * but WITHOUT ANY WARRANTY; without even the implied warranty of
12  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
13  * GNU Lesser General Public Licence for more details.
14  *
15  * You should have received a copy of the GNU Lesser General Public Licence
16  * along with WinBtrfs.  If not, see <http://www.gnu.org/licenses/>. */
17 
18 #include "btrfs_drv.h"
19 
20 typedef struct {
21     uint64_t start;
22     uint64_t end;
23     uint8_t* data;
24     PMDL mdl;
25     uint64_t irp_offset;
26 } write_stripe;
27 
28 _Function_class_(IO_COMPLETION_ROUTINE)
29 static NTSTATUS __stdcall write_data_completion(PDEVICE_OBJECT DeviceObject, PIRP Irp, PVOID conptr);
30 
31 static void remove_fcb_extent(fcb* fcb, extent* ext, LIST_ENTRY* rollback) __attribute__((nonnull(1, 2, 3)));
32 
33 extern tPsUpdateDiskCounters fPsUpdateDiskCounters;
34 extern tCcCopyWriteEx fCcCopyWriteEx;
35 extern tFsRtlUpdateDiskCounters fFsRtlUpdateDiskCounters;
36 extern bool diskacc;
37 
38 __attribute__((nonnull(1, 2, 4)))
find_data_address_in_chunk(device_extension * Vcb,chunk * c,uint64_t length,uint64_t * address)39 bool find_data_address_in_chunk(device_extension* Vcb, chunk* c, uint64_t length, uint64_t* address) {
40     LIST_ENTRY* le;
41     space* s;
42 
43     TRACE("(%p, %I64x, %I64x, %p)\n", Vcb, c->offset, length, address);
44 
45     if (length > c->chunk_item->size - c->used)
46         return false;
47 
48     if (!c->cache_loaded) {
49         NTSTATUS Status = load_cache_chunk(Vcb, c, NULL);
50 
51         if (!NT_SUCCESS(Status)) {
52             ERR("load_cache_chunk returned %08lx\n", Status);
53             return false;
54         }
55     }
56 
57     if (IsListEmpty(&c->space_size))
58         return false;
59 
60     le = c->space_size.Flink;
61     while (le != &c->space_size) {
62         s = CONTAINING_RECORD(le, space, list_entry_size);
63 
64         if (s->size == length) {
65             *address = s->address;
66             return true;
67         } else if (s->size < length) {
68             if (le == c->space_size.Flink)
69                 return false;
70 
71             s = CONTAINING_RECORD(le->Blink, space, list_entry_size);
72 
73             *address = s->address;
74             return true;
75         }
76 
77         le = le->Flink;
78     }
79 
80     s = CONTAINING_RECORD(c->space_size.Blink, space, list_entry_size);
81 
82     if (s->size > length) {
83         *address = s->address;
84         return true;
85     }
86 
87     return false;
88 }
89 
90 __attribute__((nonnull(1)))
get_chunk_from_address(device_extension * Vcb,uint64_t address)91 chunk* get_chunk_from_address(device_extension* Vcb, uint64_t address) {
92     LIST_ENTRY* le2;
93 
94     ExAcquireResourceSharedLite(&Vcb->chunk_lock, true);
95 
96     le2 = Vcb->chunks.Flink;
97     while (le2 != &Vcb->chunks) {
98         chunk* c = CONTAINING_RECORD(le2, chunk, list_entry);
99 
100         if (address >= c->offset && address < c->offset + c->chunk_item->size) {
101             ExReleaseResourceLite(&Vcb->chunk_lock);
102             return c;
103         }
104 
105         le2 = le2->Flink;
106     }
107 
108     ExReleaseResourceLite(&Vcb->chunk_lock);
109 
110     return NULL;
111 }
112 
113 typedef struct {
114     space* dh;
115     device* device;
116 } stripe;
117 
118 __attribute__((nonnull(1)))
find_new_chunk_address(device_extension * Vcb,uint64_t size)119 static uint64_t find_new_chunk_address(device_extension* Vcb, uint64_t size) {
120     uint64_t lastaddr;
121     LIST_ENTRY* le;
122 
123     lastaddr = 0xc00000;
124 
125     le = Vcb->chunks.Flink;
126     while (le != &Vcb->chunks) {
127         chunk* c = CONTAINING_RECORD(le, chunk, list_entry);
128 
129         if (c->offset >= lastaddr + size)
130             return lastaddr;
131 
132         lastaddr = c->offset + c->chunk_item->size;
133 
134         le = le->Flink;
135     }
136 
137     return lastaddr;
138 }
139 
140 __attribute__((nonnull(1,2)))
find_new_dup_stripes(device_extension * Vcb,stripe * stripes,uint64_t max_stripe_size,bool full_size)141 static bool find_new_dup_stripes(device_extension* Vcb, stripe* stripes, uint64_t max_stripe_size, bool full_size) {
142     uint64_t devusage = 0xffffffffffffffff;
143     space *devdh1 = NULL, *devdh2 = NULL;
144     LIST_ENTRY* le;
145     device* dev2 = NULL;
146 
147     le = Vcb->devices.Flink;
148 
149     while (le != &Vcb->devices) {
150         device* dev = CONTAINING_RECORD(le, device, list_entry);
151 
152         if (!dev->readonly && !dev->reloc && dev->devobj) {
153             uint64_t usage = (dev->devitem.bytes_used * 4096) / dev->devitem.num_bytes;
154 
155             // favour devices which have been used the least
156             if (usage < devusage) {
157                 if (!IsListEmpty(&dev->space)) {
158                     LIST_ENTRY* le2;
159                     space *dh1 = NULL, *dh2 = NULL;
160 
161                     le2 = dev->space.Flink;
162                     while (le2 != &dev->space) {
163                         space* dh = CONTAINING_RECORD(le2, space, list_entry);
164 
165                         if (dh->size >= max_stripe_size && (!dh1 || !dh2 || dh->size < dh1->size)) {
166                             dh2 = dh1;
167                             dh1 = dh;
168                         }
169 
170                         le2 = le2->Flink;
171                     }
172 
173                     if (dh1 && (dh2 || dh1->size >= 2 * max_stripe_size)) {
174                         dev2 = dev;
175                         devusage = usage;
176                         devdh1 = dh1;
177                         devdh2 = dh2 ? dh2 : dh1;
178                     }
179                 }
180             }
181         }
182 
183         le = le->Flink;
184     }
185 
186     if (!devdh1) {
187         uint64_t size = 0;
188 
189         // Can't find hole of at least max_stripe_size; look for the largest one we can find
190 
191         if (full_size)
192             return false;
193 
194         le = Vcb->devices.Flink;
195         while (le != &Vcb->devices) {
196             device* dev = CONTAINING_RECORD(le, device, list_entry);
197 
198             if (!dev->readonly && !dev->reloc) {
199                 if (!IsListEmpty(&dev->space)) {
200                     LIST_ENTRY* le2;
201                     space *dh1 = NULL, *dh2 = NULL;
202 
203                     le2 = dev->space.Flink;
204                     while (le2 != &dev->space) {
205                         space* dh = CONTAINING_RECORD(le2, space, list_entry);
206 
207                         if (!dh1 || !dh2 || dh->size < dh1->size) {
208                             dh2 = dh1;
209                             dh1 = dh;
210                         }
211 
212                         le2 = le2->Flink;
213                     }
214 
215                     if (dh1) {
216                         uint64_t devsize;
217 
218                         if (dh2)
219                             devsize = max(dh1->size / 2, min(dh1->size, dh2->size));
220                         else
221                             devsize = dh1->size / 2;
222 
223                         if (devsize > size) {
224                             dev2 = dev;
225                             devdh1 = dh1;
226 
227                             if (dh2 && min(dh1->size, dh2->size) > dh1->size / 2)
228                                 devdh2 = dh2;
229                             else
230                                 devdh2 = dh1;
231 
232                             size = devsize;
233                         }
234                     }
235                 }
236             }
237 
238             le = le->Flink;
239         }
240 
241         if (!devdh1)
242             return false;
243     }
244 
245     stripes[0].device = stripes[1].device = dev2;
246     stripes[0].dh = devdh1;
247     stripes[1].dh = devdh2;
248 
249     return true;
250 }
251 
252 __attribute__((nonnull(1,2)))
find_new_stripe(device_extension * Vcb,stripe * stripes,uint16_t i,uint64_t max_stripe_size,bool allow_missing,bool full_size)253 static bool find_new_stripe(device_extension* Vcb, stripe* stripes, uint16_t i, uint64_t max_stripe_size, bool allow_missing, bool full_size) {
254     uint64_t k, devusage = 0xffffffffffffffff;
255     space* devdh = NULL;
256     LIST_ENTRY* le;
257     device* dev2 = NULL;
258 
259     le = Vcb->devices.Flink;
260     while (le != &Vcb->devices) {
261         device* dev = CONTAINING_RECORD(le, device, list_entry);
262         uint64_t usage;
263         bool skip = false;
264 
265         if (dev->readonly || dev->reloc || (!dev->devobj && !allow_missing)) {
266             le = le->Flink;
267             continue;
268         }
269 
270         // skip this device if it already has a stripe
271         if (i > 0) {
272             for (k = 0; k < i; k++) {
273                 if (stripes[k].device == dev) {
274                     skip = true;
275                     break;
276                 }
277             }
278         }
279 
280         if (!skip) {
281             usage = (dev->devitem.bytes_used * 4096) / dev->devitem.num_bytes;
282 
283             // favour devices which have been used the least
284             if (usage < devusage) {
285                 if (!IsListEmpty(&dev->space)) {
286                     LIST_ENTRY* le2;
287 
288                     le2 = dev->space.Flink;
289                     while (le2 != &dev->space) {
290                         space* dh = CONTAINING_RECORD(le2, space, list_entry);
291 
292                         if ((dev2 != dev && dh->size >= max_stripe_size) ||
293                             (dev2 == dev && dh->size >= max_stripe_size && dh->size < devdh->size)
294                         ) {
295                             devdh = dh;
296                             dev2 = dev;
297                             devusage = usage;
298                         }
299 
300                         le2 = le2->Flink;
301                     }
302                 }
303             }
304         }
305 
306         le = le->Flink;
307     }
308 
309     if (!devdh) {
310         // Can't find hole of at least max_stripe_size; look for the largest one we can find
311 
312         if (full_size)
313             return false;
314 
315         le = Vcb->devices.Flink;
316         while (le != &Vcb->devices) {
317             device* dev = CONTAINING_RECORD(le, device, list_entry);
318             bool skip = false;
319 
320             if (dev->readonly || dev->reloc || (!dev->devobj && !allow_missing)) {
321                 le = le->Flink;
322                 continue;
323             }
324 
325             // skip this device if it already has a stripe
326             if (i > 0) {
327                 for (k = 0; k < i; k++) {
328                     if (stripes[k].device == dev) {
329                         skip = true;
330                         break;
331                     }
332                 }
333             }
334 
335             if (!skip) {
336                 if (!IsListEmpty(&dev->space)) {
337                     LIST_ENTRY* le2;
338 
339                     le2 = dev->space.Flink;
340                     while (le2 != &dev->space) {
341                         space* dh = CONTAINING_RECORD(le2, space, list_entry);
342 
343                         if (!devdh || devdh->size < dh->size) {
344                             devdh = dh;
345                             dev2 = dev;
346                         }
347 
348                         le2 = le2->Flink;
349                     }
350                 }
351             }
352 
353             le = le->Flink;
354         }
355 
356         if (!devdh)
357             return false;
358     }
359 
360     stripes[i].dh = devdh;
361     stripes[i].device = dev2;
362 
363     return true;
364 }
365 
366 __attribute__((nonnull(1,3)))
alloc_chunk(device_extension * Vcb,uint64_t flags,chunk ** pc,bool full_size)367 NTSTATUS alloc_chunk(device_extension* Vcb, uint64_t flags, chunk** pc, bool full_size) {
368     NTSTATUS Status;
369     uint64_t max_stripe_size, max_chunk_size, stripe_size, stripe_length, factor;
370     uint64_t total_size = 0, logaddr;
371     uint16_t i, type, num_stripes, sub_stripes, max_stripes, min_stripes, allowed_missing;
372     stripe* stripes = NULL;
373     uint16_t cisize;
374     CHUNK_ITEM_STRIPE* cis;
375     chunk* c = NULL;
376     space* s = NULL;
377     LIST_ENTRY* le;
378 
379     le = Vcb->devices.Flink;
380     while (le != &Vcb->devices) {
381         device* dev = CONTAINING_RECORD(le, device, list_entry);
382         total_size += dev->devitem.num_bytes;
383 
384         le = le->Flink;
385     }
386 
387     TRACE("total_size = %I64x\n", total_size);
388 
389     // We purposely check for DATA first - mixed blocks have the same size
390     // as DATA ones.
391     if (flags & BLOCK_FLAG_DATA) {
392         max_stripe_size = 0x40000000; // 1 GB
393         max_chunk_size = 10 * max_stripe_size;
394     } else if (flags & BLOCK_FLAG_METADATA) {
395         if (total_size > 0xC80000000) // 50 GB
396             max_stripe_size = 0x40000000; // 1 GB
397         else
398             max_stripe_size = 0x10000000; // 256 MB
399 
400         max_chunk_size = max_stripe_size;
401     } else if (flags & BLOCK_FLAG_SYSTEM) {
402         max_stripe_size = 0x2000000; // 32 MB
403         max_chunk_size = 2 * max_stripe_size;
404     } else {
405         ERR("unknown chunk type\n");
406         return STATUS_INTERNAL_ERROR;
407     }
408 
409     if (flags & BLOCK_FLAG_DUPLICATE) {
410         min_stripes = 2;
411         max_stripes = 2;
412         sub_stripes = 0;
413         type = BLOCK_FLAG_DUPLICATE;
414         allowed_missing = 0;
415     } else if (flags & BLOCK_FLAG_RAID0) {
416         min_stripes = 2;
417         max_stripes = (uint16_t)min(0xffff, Vcb->superblock.num_devices);
418         sub_stripes = 0;
419         type = BLOCK_FLAG_RAID0;
420         allowed_missing = 0;
421     } else if (flags & BLOCK_FLAG_RAID1) {
422         min_stripes = 2;
423         max_stripes = 2;
424         sub_stripes = 1;
425         type = BLOCK_FLAG_RAID1;
426         allowed_missing = 1;
427     } else if (flags & BLOCK_FLAG_RAID10) {
428         min_stripes = 4;
429         max_stripes = (uint16_t)min(0xffff, Vcb->superblock.num_devices);
430         sub_stripes = 2;
431         type = BLOCK_FLAG_RAID10;
432         allowed_missing = 1;
433     } else if (flags & BLOCK_FLAG_RAID5) {
434         min_stripes = 3;
435         max_stripes = (uint16_t)min(0xffff, Vcb->superblock.num_devices);
436         sub_stripes = 1;
437         type = BLOCK_FLAG_RAID5;
438         allowed_missing = 1;
439     } else if (flags & BLOCK_FLAG_RAID6) {
440         min_stripes = 4;
441         max_stripes = 257;
442         sub_stripes = 1;
443         type = BLOCK_FLAG_RAID6;
444         allowed_missing = 2;
445     } else if (flags & BLOCK_FLAG_RAID1C3) {
446         min_stripes = 3;
447         max_stripes = 3;
448         sub_stripes = 1;
449         type = BLOCK_FLAG_RAID1C3;
450         allowed_missing = 2;
451     } else if (flags & BLOCK_FLAG_RAID1C4) {
452         min_stripes = 4;
453         max_stripes = 4;
454         sub_stripes = 1;
455         type = BLOCK_FLAG_RAID1C4;
456         allowed_missing = 3;
457     } else { // SINGLE
458         min_stripes = 1;
459         max_stripes = 1;
460         sub_stripes = 1;
461         type = 0;
462         allowed_missing = 0;
463     }
464 
465     if (max_chunk_size > total_size / 10) {  // cap at 10%
466         max_chunk_size = total_size / 10;
467         max_stripe_size = max_chunk_size / min_stripes;
468     }
469 
470     if (max_stripe_size > total_size / (10 * min_stripes))
471         max_stripe_size = total_size / (10 * min_stripes);
472 
473     TRACE("would allocate a new chunk of %I64x bytes and stripe %I64x\n", max_chunk_size, max_stripe_size);
474 
475     stripes = ExAllocatePoolWithTag(PagedPool, sizeof(stripe) * max_stripes, ALLOC_TAG);
476     if (!stripes) {
477         ERR("out of memory\n");
478         Status = STATUS_INSUFFICIENT_RESOURCES;
479         goto end;
480     }
481 
482     num_stripes = 0;
483 
484     if (type == BLOCK_FLAG_DUPLICATE) {
485         if (!find_new_dup_stripes(Vcb, stripes, max_stripe_size, full_size)) {
486             Status = STATUS_DISK_FULL;
487             goto end;
488         } else
489             num_stripes = max_stripes;
490     } else {
491         for (i = 0; i < max_stripes; i++) {
492             if (!find_new_stripe(Vcb, stripes, i, max_stripe_size, false, full_size))
493                 break;
494             else
495                 num_stripes++;
496         }
497     }
498 
499     if (num_stripes < min_stripes && Vcb->options.allow_degraded && allowed_missing > 0) {
500         uint16_t added_missing = 0;
501 
502         for (i = num_stripes; i < max_stripes; i++) {
503             if (!find_new_stripe(Vcb, stripes, i, max_stripe_size, true, full_size))
504                 break;
505             else {
506                 added_missing++;
507                 if (added_missing >= allowed_missing)
508                     break;
509             }
510         }
511 
512         num_stripes += added_missing;
513     }
514 
515     // for RAID10, round down to an even number of stripes
516     if (type == BLOCK_FLAG_RAID10 && (num_stripes % sub_stripes) != 0) {
517         num_stripes -= num_stripes % sub_stripes;
518     }
519 
520     if (num_stripes < min_stripes) {
521         WARN("found %u stripes, needed at least %u\n", num_stripes, min_stripes);
522         Status = STATUS_DISK_FULL;
523         goto end;
524     }
525 
526     c = ExAllocatePoolWithTag(NonPagedPool, sizeof(chunk), ALLOC_TAG);
527     if (!c) {
528         ERR("out of memory\n");
529         Status = STATUS_INSUFFICIENT_RESOURCES;
530         goto end;
531     }
532 
533     c->devices = NULL;
534 
535     cisize = sizeof(CHUNK_ITEM) + (num_stripes * sizeof(CHUNK_ITEM_STRIPE));
536     c->chunk_item = ExAllocatePoolWithTag(NonPagedPool, cisize, ALLOC_TAG);
537     if (!c->chunk_item) {
538         ERR("out of memory\n");
539         Status = STATUS_INSUFFICIENT_RESOURCES;
540         goto end;
541     }
542 
543     stripe_length = 0x10000; // FIXME? BTRFS_STRIPE_LEN in kernel
544 
545     if (type == BLOCK_FLAG_DUPLICATE && stripes[1].dh == stripes[0].dh)
546         stripe_size = min(stripes[0].dh->size / 2, max_stripe_size);
547     else {
548         stripe_size = max_stripe_size;
549         for (i = 0; i < num_stripes; i++) {
550             if (stripes[i].dh->size < stripe_size)
551                 stripe_size = stripes[i].dh->size;
552         }
553     }
554 
555     if (type == BLOCK_FLAG_RAID0)
556         factor = num_stripes;
557     else if (type == BLOCK_FLAG_RAID10)
558         factor = num_stripes / sub_stripes;
559     else if (type == BLOCK_FLAG_RAID5)
560         factor = num_stripes - 1;
561     else if (type == BLOCK_FLAG_RAID6)
562         factor = num_stripes - 2;
563     else
564         factor = 1; // SINGLE, DUPLICATE, RAID1, RAID1C3, RAID1C4
565 
566     if (stripe_size * factor > max_chunk_size)
567         stripe_size = max_chunk_size / factor;
568 
569     if (stripe_size % stripe_length > 0)
570         stripe_size -= stripe_size % stripe_length;
571 
572     if (stripe_size == 0) {
573         ERR("not enough free space found (stripe_size == 0)\n");
574         Status = STATUS_DISK_FULL;
575         goto end;
576     }
577 
578     c->chunk_item->size = stripe_size * factor;
579     c->chunk_item->root_id = Vcb->extent_root->id;
580     c->chunk_item->stripe_length = stripe_length;
581     c->chunk_item->type = flags;
582     c->chunk_item->opt_io_alignment = (uint32_t)c->chunk_item->stripe_length;
583     c->chunk_item->opt_io_width = (uint32_t)c->chunk_item->stripe_length;
584     c->chunk_item->sector_size = stripes[0].device->devitem.minimal_io_size;
585     c->chunk_item->num_stripes = num_stripes;
586     c->chunk_item->sub_stripes = sub_stripes;
587 
588     c->devices = ExAllocatePoolWithTag(NonPagedPool, sizeof(device*) * num_stripes, ALLOC_TAG);
589     if (!c->devices) {
590         ERR("out of memory\n");
591         Status = STATUS_INSUFFICIENT_RESOURCES;
592         goto end;
593     }
594 
595     cis = (CHUNK_ITEM_STRIPE*)&c->chunk_item[1];
596     for (i = 0; i < num_stripes; i++) {
597         cis[i].dev_id = stripes[i].device->devitem.dev_id;
598 
599         if (type == BLOCK_FLAG_DUPLICATE && i == 1 && stripes[i].dh == stripes[0].dh)
600             cis[i].offset = stripes[0].dh->address + stripe_size;
601         else
602             cis[i].offset = stripes[i].dh->address;
603 
604         cis[i].dev_uuid = stripes[i].device->devitem.device_uuid;
605 
606         c->devices[i] = stripes[i].device;
607     }
608 
609     logaddr = find_new_chunk_address(Vcb, c->chunk_item->size);
610 
611     Vcb->superblock.chunk_root_generation = Vcb->superblock.generation;
612 
613     c->size = cisize;
614     c->offset = logaddr;
615     c->used = c->oldused = 0;
616     c->cache = c->old_cache = NULL;
617     c->readonly = false;
618     c->reloc = false;
619     c->last_alloc_set = false;
620     c->last_stripe = 0;
621     c->cache_loaded = true;
622     c->changed = false;
623     c->space_changed = false;
624     c->balance_num = 0;
625 
626     InitializeListHead(&c->space);
627     InitializeListHead(&c->space_size);
628     InitializeListHead(&c->deleting);
629     InitializeListHead(&c->changed_extents);
630 
631     InitializeListHead(&c->range_locks);
632     ExInitializeResourceLite(&c->range_locks_lock);
633     KeInitializeEvent(&c->range_locks_event, NotificationEvent, false);
634 
635     InitializeListHead(&c->partial_stripes);
636     ExInitializeResourceLite(&c->partial_stripes_lock);
637 
638     ExInitializeResourceLite(&c->lock);
639     ExInitializeResourceLite(&c->changed_extents_lock);
640 
641     s = ExAllocatePoolWithTag(NonPagedPool, sizeof(space), ALLOC_TAG);
642     if (!s) {
643         ERR("out of memory\n");
644         Status = STATUS_INSUFFICIENT_RESOURCES;
645         goto end;
646     }
647 
648     s->address = c->offset;
649     s->size = c->chunk_item->size;
650     InsertTailList(&c->space, &s->list_entry);
651     InsertTailList(&c->space_size, &s->list_entry_size);
652 
653     protect_superblocks(c);
654 
655     for (i = 0; i < num_stripes; i++) {
656         stripes[i].device->devitem.bytes_used += stripe_size;
657 
658         space_list_subtract2(&stripes[i].device->space, NULL, cis[i].offset, stripe_size, NULL, NULL);
659     }
660 
661     Status = STATUS_SUCCESS;
662 
663     if (flags & BLOCK_FLAG_RAID5 || flags & BLOCK_FLAG_RAID6)
664         Vcb->superblock.incompat_flags |= BTRFS_INCOMPAT_FLAGS_RAID56;
665 
666 end:
667     if (stripes)
668         ExFreePool(stripes);
669 
670     if (!NT_SUCCESS(Status)) {
671         if (c) {
672             if (c->devices)
673                 ExFreePool(c->devices);
674 
675             if (c->chunk_item)
676                 ExFreePool(c->chunk_item);
677 
678             ExFreePool(c);
679         }
680 
681         if (s) ExFreePool(s);
682     } else {
683         bool done = false;
684 
685         le = Vcb->chunks.Flink;
686         while (le != &Vcb->chunks) {
687             chunk* c2 = CONTAINING_RECORD(le, chunk, list_entry);
688 
689             if (c2->offset > c->offset) {
690                 InsertHeadList(le->Blink, &c->list_entry);
691                 done = true;
692                 break;
693             }
694 
695             le = le->Flink;
696         }
697 
698         if (!done)
699             InsertTailList(&Vcb->chunks, &c->list_entry);
700 
701         c->created = true;
702         c->changed = true;
703         c->space_changed = true;
704         c->list_entry_balance.Flink = NULL;
705 
706         *pc = c;
707     }
708 
709     return Status;
710 }
711 
712 __attribute__((nonnull(1,3,5,8)))
713 static NTSTATUS prepare_raid0_write(_Pre_satisfies_(_Curr_->chunk_item->num_stripes>0) _In_ chunk* c, _In_ uint64_t address, _In_reads_bytes_(length) void* data,
714                                     _In_ uint32_t length, _In_ write_stripe* stripes, _In_ PIRP Irp, _In_ uint64_t irp_offset, _In_ write_data_context* wtc) {
715     uint64_t startoff, endoff;
716     uint16_t startoffstripe, endoffstripe, stripenum;
717     uint64_t pos, *stripeoff;
718     uint32_t i;
719     bool file_write = Irp && Irp->MdlAddress && (Irp->MdlAddress->ByteOffset == 0);
720     PMDL master_mdl;
721     PFN_NUMBER* pfns;
722 
723     stripeoff = ExAllocatePoolWithTag(PagedPool, sizeof(uint64_t) * c->chunk_item->num_stripes, ALLOC_TAG);
724     if (!stripeoff) {
725         ERR("out of memory\n");
726         return STATUS_INSUFFICIENT_RESOURCES;
727     }
728 
729     get_raid0_offset(address - c->offset, c->chunk_item->stripe_length, c->chunk_item->num_stripes, &startoff, &startoffstripe);
730     get_raid0_offset(address + length - c->offset - 1, c->chunk_item->stripe_length, c->chunk_item->num_stripes, &endoff, &endoffstripe);
731 
732     if (file_write) {
733         master_mdl = Irp->MdlAddress;
734 
735         pfns = (PFN_NUMBER*)(Irp->MdlAddress + 1);
736         pfns = &pfns[irp_offset >> PAGE_SHIFT];
737     } else if (((ULONG_PTR)data % PAGE_SIZE) != 0) {
738         wtc->scratch = ExAllocatePoolWithTag(NonPagedPool, length, ALLOC_TAG);
739         if (!wtc->scratch) {
740             ERR("out of memory\n");
741             return STATUS_INSUFFICIENT_RESOURCES;
742         }
743 
744         RtlCopyMemory(wtc->scratch, data, length);
745 
746         master_mdl = IoAllocateMdl(wtc->scratch, length, false, false, NULL);
747         if (!master_mdl) {
748             ERR("out of memory\n");
749             return STATUS_INSUFFICIENT_RESOURCES;
750         }
751 
752         MmBuildMdlForNonPagedPool(master_mdl);
753 
754         wtc->mdl = master_mdl;
755 
756         pfns = (PFN_NUMBER*)(master_mdl + 1);
757     } else {
758         NTSTATUS Status = STATUS_SUCCESS;
759 
760         master_mdl = IoAllocateMdl(data, length, false, false, NULL);
761         if (!master_mdl) {
762             ERR("out of memory\n");
763             return STATUS_INSUFFICIENT_RESOURCES;
764         }
765 
766         _SEH2_TRY {
767             MmProbeAndLockPages(master_mdl, KernelMode, IoReadAccess);
_SEH2_EXCEPT(EXCEPTION_EXECUTE_HANDLER)768         } _SEH2_EXCEPT (EXCEPTION_EXECUTE_HANDLER) {
769             Status = _SEH2_GetExceptionCode();
770         } _SEH2_END;
771 
772         if (!NT_SUCCESS(Status)) {
773             ERR("MmProbeAndLockPages threw exception %08lx\n", Status);
774             IoFreeMdl(master_mdl);
775             return Status;
776         }
777 
778         wtc->mdl = master_mdl;
779 
780         pfns = (PFN_NUMBER*)(master_mdl + 1);
781     }
782 
783     for (i = 0; i < c->chunk_item->num_stripes; i++) {
784         if (startoffstripe > i)
785             stripes[i].start = startoff - (startoff % c->chunk_item->stripe_length) + c->chunk_item->stripe_length;
786         else if (startoffstripe == i)
787             stripes[i].start = startoff;
788         else
789             stripes[i].start = startoff - (startoff % c->chunk_item->stripe_length);
790 
791         if (endoffstripe > i)
792             stripes[i].end = endoff - (endoff % c->chunk_item->stripe_length) + c->chunk_item->stripe_length;
793         else if (endoffstripe == i)
794             stripes[i].end = endoff + 1;
795         else
796             stripes[i].end = endoff - (endoff % c->chunk_item->stripe_length);
797 
798         if (stripes[i].start != stripes[i].end) {
799             stripes[i].mdl = IoAllocateMdl(NULL, (ULONG)(stripes[i].end - stripes[i].start), false, false, NULL);
800             if (!stripes[i].mdl) {
801                 ERR("IoAllocateMdl failed\n");
802                 ExFreePool(stripeoff);
803                 return STATUS_INSUFFICIENT_RESOURCES;
804             }
805         }
806     }
807 
808     pos = 0;
809     RtlZeroMemory(stripeoff, sizeof(uint64_t) * c->chunk_item->num_stripes);
810 
811     stripenum = startoffstripe;
812 
813     while (pos < length) {
814         PFN_NUMBER* stripe_pfns = (PFN_NUMBER*)(stripes[stripenum].mdl + 1);
815 
816         if (pos == 0) {
817             uint32_t writelen = (uint32_t)min(stripes[stripenum].end - stripes[stripenum].start,
818                                           c->chunk_item->stripe_length - (stripes[stripenum].start % c->chunk_item->stripe_length));
819 
820             RtlCopyMemory(stripe_pfns, pfns, writelen * sizeof(PFN_NUMBER) >> PAGE_SHIFT);
821 
822             stripeoff[stripenum] += writelen;
823             pos += writelen;
824         } else if (length - pos < c->chunk_item->stripe_length) {
825             RtlCopyMemory(&stripe_pfns[stripeoff[stripenum] >> PAGE_SHIFT], &pfns[pos >> PAGE_SHIFT], (ULONG)((length - pos) * sizeof(PFN_NUMBER) >> PAGE_SHIFT));
826             break;
827         } else {
828             RtlCopyMemory(&stripe_pfns[stripeoff[stripenum] >> PAGE_SHIFT], &pfns[pos >> PAGE_SHIFT], (ULONG)(c->chunk_item->stripe_length * sizeof(PFN_NUMBER) >> PAGE_SHIFT));
829 
830             stripeoff[stripenum] += c->chunk_item->stripe_length;
831             pos += c->chunk_item->stripe_length;
832         }
833 
834         stripenum = (stripenum + 1) % c->chunk_item->num_stripes;
835     }
836 
837     ExFreePool(stripeoff);
838 
839     return STATUS_SUCCESS;
840 }
841 
842 __attribute__((nonnull(1,3,5,8)))
843 static NTSTATUS prepare_raid10_write(_Pre_satisfies_(_Curr_->chunk_item->sub_stripes>0&&_Curr_->chunk_item->num_stripes>=_Curr_->chunk_item->sub_stripes) _In_ chunk* c,
844                                      _In_ uint64_t address, _In_reads_bytes_(length) void* data, _In_ uint32_t length, _In_ write_stripe* stripes,
845                                      _In_ PIRP Irp, _In_ uint64_t irp_offset, _In_ write_data_context* wtc) {
846     uint64_t startoff, endoff;
847     uint16_t startoffstripe, endoffstripe, stripenum;
848     uint64_t pos, *stripeoff;
849     uint32_t i;
850     bool file_write = Irp && Irp->MdlAddress && (Irp->MdlAddress->ByteOffset == 0);
851     PMDL master_mdl;
852     PFN_NUMBER* pfns;
853 
854     get_raid0_offset(address - c->offset, c->chunk_item->stripe_length, c->chunk_item->num_stripes / c->chunk_item->sub_stripes, &startoff, &startoffstripe);
855     get_raid0_offset(address + length - c->offset - 1, c->chunk_item->stripe_length, c->chunk_item->num_stripes / c->chunk_item->sub_stripes, &endoff, &endoffstripe);
856 
857     stripenum = startoffstripe;
858     startoffstripe *= c->chunk_item->sub_stripes;
859     endoffstripe *= c->chunk_item->sub_stripes;
860 
861     if (file_write) {
862         master_mdl = Irp->MdlAddress;
863 
864         pfns = (PFN_NUMBER*)(Irp->MdlAddress + 1);
865         pfns = &pfns[irp_offset >> PAGE_SHIFT];
866     } else if (((ULONG_PTR)data % PAGE_SIZE) != 0) {
867         wtc->scratch = ExAllocatePoolWithTag(NonPagedPool, length, ALLOC_TAG);
868         if (!wtc->scratch) {
869             ERR("out of memory\n");
870             return STATUS_INSUFFICIENT_RESOURCES;
871         }
872 
873         RtlCopyMemory(wtc->scratch, data, length);
874 
875         master_mdl = IoAllocateMdl(wtc->scratch, length, false, false, NULL);
876         if (!master_mdl) {
877             ERR("out of memory\n");
878             return STATUS_INSUFFICIENT_RESOURCES;
879         }
880 
881         MmBuildMdlForNonPagedPool(master_mdl);
882 
883         wtc->mdl = master_mdl;
884 
885         pfns = (PFN_NUMBER*)(master_mdl + 1);
886     } else {
887         NTSTATUS Status = STATUS_SUCCESS;
888 
889         master_mdl = IoAllocateMdl(data, length, false, false, NULL);
890         if (!master_mdl) {
891             ERR("out of memory\n");
892             return STATUS_INSUFFICIENT_RESOURCES;
893         }
894 
895         _SEH2_TRY {
896             MmProbeAndLockPages(master_mdl, KernelMode, IoReadAccess);
_SEH2_EXCEPT(EXCEPTION_EXECUTE_HANDLER)897         } _SEH2_EXCEPT (EXCEPTION_EXECUTE_HANDLER) {
898             Status = _SEH2_GetExceptionCode();
899         } _SEH2_END;
900 
901         if (!NT_SUCCESS(Status)) {
902             ERR("MmProbeAndLockPages threw exception %08lx\n", Status);
903             IoFreeMdl(master_mdl);
904             return Status;
905         }
906 
907         wtc->mdl = master_mdl;
908 
909         pfns = (PFN_NUMBER*)(master_mdl + 1);
910     }
911 
912     for (i = 0; i < c->chunk_item->num_stripes; i += c->chunk_item->sub_stripes) {
913         uint16_t j;
914 
915         if (startoffstripe > i)
916             stripes[i].start = startoff - (startoff % c->chunk_item->stripe_length) + c->chunk_item->stripe_length;
917         else if (startoffstripe == i)
918             stripes[i].start = startoff;
919         else
920             stripes[i].start = startoff - (startoff % c->chunk_item->stripe_length);
921 
922         if (endoffstripe > i)
923             stripes[i].end = endoff - (endoff % c->chunk_item->stripe_length) + c->chunk_item->stripe_length;
924         else if (endoffstripe == i)
925             stripes[i].end = endoff + 1;
926         else
927             stripes[i].end = endoff - (endoff % c->chunk_item->stripe_length);
928 
929         stripes[i].mdl = IoAllocateMdl(NULL, (ULONG)(stripes[i].end - stripes[i].start), false, false, NULL);
930         if (!stripes[i].mdl) {
931             ERR("IoAllocateMdl failed\n");
932             return STATUS_INSUFFICIENT_RESOURCES;
933         }
934 
935         for (j = 1; j < c->chunk_item->sub_stripes; j++) {
936             stripes[i+j].start = stripes[i].start;
937             stripes[i+j].end = stripes[i].end;
938             stripes[i+j].data = stripes[i].data;
939             stripes[i+j].mdl = stripes[i].mdl;
940         }
941     }
942 
943     pos = 0;
944 
945     stripeoff = ExAllocatePoolWithTag(PagedPool, sizeof(uint64_t) * c->chunk_item->num_stripes / c->chunk_item->sub_stripes, ALLOC_TAG);
946     if (!stripeoff) {
947         ERR("out of memory\n");
948         return STATUS_INSUFFICIENT_RESOURCES;
949     }
950 
951     RtlZeroMemory(stripeoff, sizeof(uint64_t) * c->chunk_item->num_stripes / c->chunk_item->sub_stripes);
952 
953     while (pos < length) {
954         PFN_NUMBER* stripe_pfns = (PFN_NUMBER*)(stripes[stripenum * c->chunk_item->sub_stripes].mdl + 1);
955 
956         if (pos == 0) {
957             uint32_t writelen = (uint32_t)min(stripes[stripenum * c->chunk_item->sub_stripes].end - stripes[stripenum * c->chunk_item->sub_stripes].start,
958                                           c->chunk_item->stripe_length - (stripes[stripenum * c->chunk_item->sub_stripes].start % c->chunk_item->stripe_length));
959 
960             RtlCopyMemory(stripe_pfns, pfns, writelen * sizeof(PFN_NUMBER) >> PAGE_SHIFT);
961 
962             stripeoff[stripenum] += writelen;
963             pos += writelen;
964         } else if (length - pos < c->chunk_item->stripe_length) {
965             RtlCopyMemory(&stripe_pfns[stripeoff[stripenum] >> PAGE_SHIFT], &pfns[pos >> PAGE_SHIFT], (ULONG)((length - pos) * sizeof(PFN_NUMBER) >> PAGE_SHIFT));
966             break;
967         } else {
968             RtlCopyMemory(&stripe_pfns[stripeoff[stripenum] >> PAGE_SHIFT], &pfns[pos >> PAGE_SHIFT], (ULONG)(c->chunk_item->stripe_length * sizeof(PFN_NUMBER) >> PAGE_SHIFT));
969 
970             stripeoff[stripenum] += c->chunk_item->stripe_length;
971             pos += c->chunk_item->stripe_length;
972         }
973 
974         stripenum = (stripenum + 1) % (c->chunk_item->num_stripes / c->chunk_item->sub_stripes);
975     }
976 
977     ExFreePool(stripeoff);
978 
979     return STATUS_SUCCESS;
980 }
981 
982 __attribute__((nonnull(1,2,5)))
add_partial_stripe(device_extension * Vcb,chunk * c,uint64_t address,uint32_t length,void * data)983 static NTSTATUS add_partial_stripe(device_extension* Vcb, chunk* c, uint64_t address, uint32_t length, void* data) {
984     NTSTATUS Status;
985     LIST_ENTRY* le;
986     partial_stripe* ps;
987     uint64_t stripe_addr;
988     uint16_t num_data_stripes;
989 
990     num_data_stripes = c->chunk_item->num_stripes - (c->chunk_item->type & BLOCK_FLAG_RAID5 ? 1 : 2);
991     stripe_addr = address - ((address - c->offset) % (num_data_stripes * c->chunk_item->stripe_length));
992 
993     ExAcquireResourceExclusiveLite(&c->partial_stripes_lock, true);
994 
995     le = c->partial_stripes.Flink;
996     while (le != &c->partial_stripes) {
997         ps = CONTAINING_RECORD(le, partial_stripe, list_entry);
998 
999         if (ps->address == stripe_addr) {
1000             // update existing entry
1001 
1002             RtlCopyMemory(ps->data + address - stripe_addr, data, length);
1003             RtlClearBits(&ps->bmp, (ULONG)((address - stripe_addr) >> Vcb->sector_shift), length >> Vcb->sector_shift);
1004 
1005             // if now filled, flush
1006             if (RtlAreBitsClear(&ps->bmp, 0, (ULONG)((num_data_stripes * c->chunk_item->stripe_length) >> Vcb->sector_shift))) {
1007                 Status = flush_partial_stripe(Vcb, c, ps);
1008                 if (!NT_SUCCESS(Status)) {
1009                     ERR("flush_partial_stripe returned %08lx\n", Status);
1010                     goto end;
1011                 }
1012 
1013                 RemoveEntryList(&ps->list_entry);
1014 
1015                 if (ps->bmparr)
1016                     ExFreePool(ps->bmparr);
1017 
1018                 ExFreePool(ps);
1019             }
1020 
1021             Status = STATUS_SUCCESS;
1022             goto end;
1023         } else if (ps->address > stripe_addr)
1024             break;
1025 
1026         le = le->Flink;
1027     }
1028 
1029     // add new entry
1030 
1031     ps = ExAllocatePoolWithTag(NonPagedPool, offsetof(partial_stripe, data[0]) + (ULONG)(num_data_stripes * c->chunk_item->stripe_length), ALLOC_TAG);
1032     if (!ps) {
1033         ERR("out of memory\n");
1034         Status = STATUS_INSUFFICIENT_RESOURCES;
1035         goto end;
1036     }
1037 
1038     ps->bmplen = (ULONG)(num_data_stripes * c->chunk_item->stripe_length) >> Vcb->sector_shift;
1039 
1040     ps->address = stripe_addr;
1041     ps->bmparr = ExAllocatePoolWithTag(NonPagedPool, (size_t)sector_align(((ps->bmplen / 8) + 1), sizeof(ULONG)), ALLOC_TAG);
1042     if (!ps->bmparr) {
1043         ERR("out of memory\n");
1044         ExFreePool(ps);
1045         Status = STATUS_INSUFFICIENT_RESOURCES;
1046         goto end;
1047     }
1048 
1049     RtlInitializeBitMap(&ps->bmp, ps->bmparr, ps->bmplen);
1050     RtlSetAllBits(&ps->bmp);
1051 
1052     RtlCopyMemory(ps->data + address - stripe_addr, data, length);
1053     RtlClearBits(&ps->bmp, (ULONG)((address - stripe_addr) >> Vcb->sector_shift), length >> Vcb->sector_shift);
1054 
1055     InsertHeadList(le->Blink, &ps->list_entry);
1056 
1057     Status = STATUS_SUCCESS;
1058 
1059 end:
1060     ExReleaseResourceLite(&c->partial_stripes_lock);
1061 
1062     return Status;
1063 }
1064 
1065 typedef struct {
1066     PMDL mdl;
1067     PFN_NUMBER* pfns;
1068 } log_stripe;
1069 
1070 __attribute__((nonnull(1,2,4,6,10)))
prepare_raid5_write(device_extension * Vcb,chunk * c,uint64_t address,void * data,uint32_t length,write_stripe * stripes,PIRP Irp,uint64_t irp_offset,ULONG priority,write_data_context * wtc)1071 static NTSTATUS prepare_raid5_write(device_extension* Vcb, chunk* c, uint64_t address, void* data, uint32_t length, write_stripe* stripes, PIRP Irp,
1072                                     uint64_t irp_offset, ULONG priority, write_data_context* wtc) {
1073     uint64_t startoff, endoff, parity_start, parity_end;
1074     uint16_t startoffstripe, endoffstripe, parity, num_data_stripes = c->chunk_item->num_stripes - 1;
1075     uint64_t pos, parity_pos, *stripeoff = NULL;
1076     uint32_t i;
1077     bool file_write = Irp && Irp->MdlAddress && (Irp->MdlAddress->ByteOffset == 0);
1078     PMDL master_mdl;
1079     NTSTATUS Status;
1080     PFN_NUMBER *pfns, *parity_pfns;
1081     log_stripe* log_stripes = NULL;
1082 
1083     if ((address + length - c->offset) % (num_data_stripes * c->chunk_item->stripe_length) > 0) {
1084         uint64_t delta = (address + length - c->offset) % (num_data_stripes * c->chunk_item->stripe_length);
1085 
1086         delta = min(length, delta);
1087         Status = add_partial_stripe(Vcb, c, address + length - delta, (uint32_t)delta, (uint8_t*)data + length - delta);
1088         if (!NT_SUCCESS(Status)) {
1089             ERR("add_partial_stripe returned %08lx\n", Status);
1090             goto exit;
1091         }
1092 
1093         length -= (uint32_t)delta;
1094     }
1095 
1096     if (length > 0 && (address - c->offset) % (num_data_stripes * c->chunk_item->stripe_length) > 0) {
1097         uint64_t delta = (num_data_stripes * c->chunk_item->stripe_length) - ((address - c->offset) % (num_data_stripes * c->chunk_item->stripe_length));
1098 
1099         Status = add_partial_stripe(Vcb, c, address, (uint32_t)delta, data);
1100         if (!NT_SUCCESS(Status)) {
1101             ERR("add_partial_stripe returned %08lx\n", Status);
1102             goto exit;
1103         }
1104 
1105         address += delta;
1106         length -= (uint32_t)delta;
1107         irp_offset += delta;
1108         data = (uint8_t*)data + delta;
1109     }
1110 
1111     if (length == 0) {
1112         Status = STATUS_SUCCESS;
1113         goto exit;
1114     }
1115 
1116     get_raid0_offset(address - c->offset, c->chunk_item->stripe_length, num_data_stripes, &startoff, &startoffstripe);
1117     get_raid0_offset(address + length - c->offset - 1, c->chunk_item->stripe_length, num_data_stripes, &endoff, &endoffstripe);
1118 
1119     pos = 0;
1120     while (pos < length) {
1121         parity = (((address - c->offset + pos) / (num_data_stripes * c->chunk_item->stripe_length)) + num_data_stripes) % c->chunk_item->num_stripes;
1122 
1123         if (pos == 0) {
1124             uint16_t stripe = (parity + startoffstripe + 1) % c->chunk_item->num_stripes;
1125             ULONG skip, writelen;
1126 
1127             i = startoffstripe;
1128             while (stripe != parity) {
1129                 if (i == startoffstripe) {
1130                     writelen = (ULONG)min(length, c->chunk_item->stripe_length - (startoff % c->chunk_item->stripe_length));
1131 
1132                     stripes[stripe].start = startoff;
1133                     stripes[stripe].end = startoff + writelen;
1134 
1135                     pos += writelen;
1136 
1137                     if (pos == length)
1138                         break;
1139                 } else {
1140                     writelen = (ULONG)min(length - pos, c->chunk_item->stripe_length);
1141 
1142                     stripes[stripe].start = startoff - (startoff % c->chunk_item->stripe_length);
1143                     stripes[stripe].end = stripes[stripe].start + writelen;
1144 
1145                     pos += writelen;
1146 
1147                     if (pos == length)
1148                         break;
1149                 }
1150 
1151                 i++;
1152                 stripe = (stripe + 1) % c->chunk_item->num_stripes;
1153             }
1154 
1155             if (pos == length)
1156                 break;
1157 
1158             for (i = 0; i < startoffstripe; i++) {
1159                 stripe = (parity + i + 1) % c->chunk_item->num_stripes;
1160 
1161                 stripes[stripe].start = stripes[stripe].end = startoff - (startoff % c->chunk_item->stripe_length) + c->chunk_item->stripe_length;
1162             }
1163 
1164             stripes[parity].start = stripes[parity].end = startoff - (startoff % c->chunk_item->stripe_length) + c->chunk_item->stripe_length;
1165 
1166             if (length - pos > c->chunk_item->num_stripes * num_data_stripes * c->chunk_item->stripe_length) {
1167                 skip = (ULONG)(((length - pos) / (c->chunk_item->num_stripes * num_data_stripes * c->chunk_item->stripe_length)) - 1);
1168 
1169                 for (i = 0; i < c->chunk_item->num_stripes; i++) {
1170                     stripes[i].end += skip * c->chunk_item->num_stripes * c->chunk_item->stripe_length;
1171                 }
1172 
1173                 pos += skip * num_data_stripes * c->chunk_item->num_stripes * c->chunk_item->stripe_length;
1174             }
1175         } else if (length - pos >= c->chunk_item->stripe_length * num_data_stripes) {
1176             for (i = 0; i < c->chunk_item->num_stripes; i++) {
1177                 stripes[i].end += c->chunk_item->stripe_length;
1178             }
1179 
1180             pos += c->chunk_item->stripe_length * num_data_stripes;
1181         } else {
1182             uint16_t stripe = (parity + 1) % c->chunk_item->num_stripes;
1183 
1184             i = 0;
1185             while (stripe != parity) {
1186                 if (endoffstripe == i) {
1187                     stripes[stripe].end = endoff + 1;
1188                     break;
1189                 } else if (endoffstripe > i)
1190                     stripes[stripe].end = endoff - (endoff % c->chunk_item->stripe_length) + c->chunk_item->stripe_length;
1191 
1192                 i++;
1193                 stripe = (stripe + 1) % c->chunk_item->num_stripes;
1194             }
1195 
1196             break;
1197         }
1198     }
1199 
1200     parity_start = 0xffffffffffffffff;
1201     parity_end = 0;
1202 
1203     for (i = 0; i < c->chunk_item->num_stripes; i++) {
1204         if (stripes[i].start != 0 || stripes[i].end != 0) {
1205             parity_start = min(stripes[i].start, parity_start);
1206             parity_end = max(stripes[i].end, parity_end);
1207         }
1208     }
1209 
1210     if (parity_end == parity_start) {
1211         Status = STATUS_SUCCESS;
1212         goto exit;
1213     }
1214 
1215     parity = (((address - c->offset) / (num_data_stripes * c->chunk_item->stripe_length)) + num_data_stripes) % c->chunk_item->num_stripes;
1216     stripes[parity].start = parity_start;
1217 
1218     parity = (((address - c->offset + length - 1) / (num_data_stripes * c->chunk_item->stripe_length)) + num_data_stripes) % c->chunk_item->num_stripes;
1219     stripes[parity].end = parity_end;
1220 
1221     log_stripes = ExAllocatePoolWithTag(NonPagedPool, sizeof(log_stripe) * num_data_stripes, ALLOC_TAG);
1222     if (!log_stripes) {
1223         ERR("out of memory\n");
1224         Status = STATUS_INSUFFICIENT_RESOURCES;
1225         goto exit;
1226     }
1227 
1228     RtlZeroMemory(log_stripes, sizeof(log_stripe) * num_data_stripes);
1229 
1230     for (i = 0; i < num_data_stripes; i++) {
1231         log_stripes[i].mdl = IoAllocateMdl(NULL, (ULONG)(parity_end - parity_start), false, false, NULL);
1232         if (!log_stripes[i].mdl) {
1233             ERR("out of memory\n");
1234             Status = STATUS_INSUFFICIENT_RESOURCES;
1235             goto exit;
1236         }
1237 
1238         log_stripes[i].mdl->MdlFlags |= MDL_PARTIAL;
1239         log_stripes[i].pfns = (PFN_NUMBER*)(log_stripes[i].mdl + 1);
1240     }
1241 
1242     wtc->parity1 = ExAllocatePoolWithTag(NonPagedPool, (ULONG)(parity_end - parity_start), ALLOC_TAG);
1243     if (!wtc->parity1) {
1244         ERR("out of memory\n");
1245         Status = STATUS_INSUFFICIENT_RESOURCES;
1246         goto exit;
1247     }
1248 
1249     wtc->parity1_mdl = IoAllocateMdl(wtc->parity1, (ULONG)(parity_end - parity_start), false, false, NULL);
1250     if (!wtc->parity1_mdl) {
1251         ERR("out of memory\n");
1252         Status = STATUS_INSUFFICIENT_RESOURCES;
1253         goto exit;
1254     }
1255 
1256     MmBuildMdlForNonPagedPool(wtc->parity1_mdl);
1257 
1258     if (file_write)
1259         master_mdl = Irp->MdlAddress;
1260     else if (((ULONG_PTR)data % PAGE_SIZE) != 0) {
1261         wtc->scratch = ExAllocatePoolWithTag(NonPagedPool, length, ALLOC_TAG);
1262         if (!wtc->scratch) {
1263             ERR("out of memory\n");
1264             Status = STATUS_INSUFFICIENT_RESOURCES;
1265             goto exit;
1266         }
1267 
1268         RtlCopyMemory(wtc->scratch, data, length);
1269 
1270         master_mdl = IoAllocateMdl(wtc->scratch, length, false, false, NULL);
1271         if (!master_mdl) {
1272             ERR("out of memory\n");
1273             Status = STATUS_INSUFFICIENT_RESOURCES;
1274             goto exit;
1275         }
1276 
1277         MmBuildMdlForNonPagedPool(master_mdl);
1278 
1279         wtc->mdl = master_mdl;
1280     } else {
1281         master_mdl = IoAllocateMdl(data, length, false, false, NULL);
1282         if (!master_mdl) {
1283             ERR("out of memory\n");
1284             Status = STATUS_INSUFFICIENT_RESOURCES;
1285             goto exit;
1286         }
1287 
1288         Status = STATUS_SUCCESS;
1289 
1290         _SEH2_TRY {
1291             MmProbeAndLockPages(master_mdl, KernelMode, IoReadAccess);
1292         } _SEH2_EXCEPT (EXCEPTION_EXECUTE_HANDLER) {
1293             Status = _SEH2_GetExceptionCode();
1294         } _SEH2_END;
1295 
1296         if (!NT_SUCCESS(Status)) {
1297             ERR("MmProbeAndLockPages threw exception %08lx\n", Status);
1298             IoFreeMdl(master_mdl);
1299             return Status;
1300         }
1301 
1302         wtc->mdl = master_mdl;
1303     }
1304 
1305     pfns = (PFN_NUMBER*)(master_mdl + 1);
1306     parity_pfns = (PFN_NUMBER*)(wtc->parity1_mdl + 1);
1307 
1308     if (file_write)
1309         pfns = &pfns[irp_offset >> PAGE_SHIFT];
1310 
1311     for (i = 0; i < c->chunk_item->num_stripes; i++) {
1312         if (stripes[i].start != stripes[i].end) {
1313             stripes[i].mdl = IoAllocateMdl((uint8_t*)MmGetMdlVirtualAddress(master_mdl) + irp_offset, (ULONG)(stripes[i].end - stripes[i].start), false, false, NULL);
1314             if (!stripes[i].mdl) {
1315                 ERR("IoAllocateMdl failed\n");
1316                 Status = STATUS_INSUFFICIENT_RESOURCES;
1317                 goto exit;
1318             }
1319         }
1320     }
1321 
1322     stripeoff = ExAllocatePoolWithTag(PagedPool, sizeof(uint64_t) * c->chunk_item->num_stripes, ALLOC_TAG);
1323     if (!stripeoff) {
1324         ERR("out of memory\n");
1325         Status = STATUS_INSUFFICIENT_RESOURCES;
1326         goto exit;
1327     }
1328 
1329     RtlZeroMemory(stripeoff, sizeof(uint64_t) * c->chunk_item->num_stripes);
1330 
1331     pos = 0;
1332     parity_pos = 0;
1333 
1334     while (pos < length) {
1335         PFN_NUMBER* stripe_pfns;
1336 
1337         parity = (((address - c->offset + pos) / (num_data_stripes * c->chunk_item->stripe_length)) + num_data_stripes) % c->chunk_item->num_stripes;
1338 
1339         if (pos == 0) {
1340             uint16_t stripe = (parity + startoffstripe + 1) % c->chunk_item->num_stripes;
1341             uint32_t writelen = (uint32_t)min(length - pos, min(stripes[stripe].end - stripes[stripe].start,
1342                                                             c->chunk_item->stripe_length - (stripes[stripe].start % c->chunk_item->stripe_length)));
1343             uint32_t maxwritelen = writelen;
1344 
1345             stripe_pfns = (PFN_NUMBER*)(stripes[stripe].mdl + 1);
1346 
1347             RtlCopyMemory(stripe_pfns, pfns, writelen * sizeof(PFN_NUMBER) >> PAGE_SHIFT);
1348 
1349             RtlCopyMemory(log_stripes[startoffstripe].pfns, pfns, writelen * sizeof(PFN_NUMBER) >> PAGE_SHIFT);
1350             log_stripes[startoffstripe].pfns += writelen >> PAGE_SHIFT;
1351 
1352             stripeoff[stripe] = writelen;
1353             pos += writelen;
1354 
1355             stripe = (stripe + 1) % c->chunk_item->num_stripes;
1356             i = startoffstripe + 1;
1357 
1358             while (stripe != parity) {
1359                 stripe_pfns = (PFN_NUMBER*)(stripes[stripe].mdl + 1);
1360                 writelen = (uint32_t)min(length - pos, min(stripes[stripe].end - stripes[stripe].start, c->chunk_item->stripe_length));
1361 
1362                 if (writelen == 0)
1363                     break;
1364 
1365                 if (writelen > maxwritelen)
1366                     maxwritelen = writelen;
1367 
1368                 RtlCopyMemory(stripe_pfns, &pfns[pos >> PAGE_SHIFT], writelen * sizeof(PFN_NUMBER) >> PAGE_SHIFT);
1369 
1370                 RtlCopyMemory(log_stripes[i].pfns, &pfns[pos >> PAGE_SHIFT], writelen * sizeof(PFN_NUMBER) >> PAGE_SHIFT);
1371                 log_stripes[i].pfns += writelen >> PAGE_SHIFT;
1372 
1373                 stripeoff[stripe] = writelen;
1374                 pos += writelen;
1375 
1376                 stripe = (stripe + 1) % c->chunk_item->num_stripes;
1377                 i++;
1378             }
1379 
1380             stripe_pfns = (PFN_NUMBER*)(stripes[parity].mdl + 1);
1381 
1382             RtlCopyMemory(stripe_pfns, parity_pfns, maxwritelen * sizeof(PFN_NUMBER) >> PAGE_SHIFT);
1383             stripeoff[parity] = maxwritelen;
1384             parity_pos = maxwritelen;
1385         } else if (length - pos >= c->chunk_item->stripe_length * num_data_stripes) {
1386             uint16_t stripe = (parity + 1) % c->chunk_item->num_stripes;
1387 
1388             i = 0;
1389             while (stripe != parity) {
1390                 stripe_pfns = (PFN_NUMBER*)(stripes[stripe].mdl + 1);
1391 
1392                 RtlCopyMemory(&stripe_pfns[stripeoff[stripe] >> PAGE_SHIFT], &pfns[pos >> PAGE_SHIFT], (ULONG)(c->chunk_item->stripe_length * sizeof(PFN_NUMBER) >> PAGE_SHIFT));
1393 
1394                 RtlCopyMemory(log_stripes[i].pfns, &pfns[pos >> PAGE_SHIFT], (ULONG)(c->chunk_item->stripe_length * sizeof(PFN_NUMBER) >> PAGE_SHIFT));
1395                 log_stripes[i].pfns += c->chunk_item->stripe_length >> PAGE_SHIFT;
1396 
1397                 stripeoff[stripe] += c->chunk_item->stripe_length;
1398                 pos += c->chunk_item->stripe_length;
1399 
1400                 stripe = (stripe + 1) % c->chunk_item->num_stripes;
1401                 i++;
1402             }
1403 
1404             stripe_pfns = (PFN_NUMBER*)(stripes[parity].mdl + 1);
1405 
1406             RtlCopyMemory(&stripe_pfns[stripeoff[parity] >> PAGE_SHIFT], &parity_pfns[parity_pos >> PAGE_SHIFT], (ULONG)(c->chunk_item->stripe_length * sizeof(PFN_NUMBER) >> PAGE_SHIFT));
1407             stripeoff[parity] += c->chunk_item->stripe_length;
1408             parity_pos += c->chunk_item->stripe_length;
1409         } else {
1410             uint16_t stripe = (parity + 1) % c->chunk_item->num_stripes;
1411             uint32_t writelen, maxwritelen = 0;
1412 
1413             i = 0;
1414             while (pos < length) {
1415                 stripe_pfns = (PFN_NUMBER*)(stripes[stripe].mdl + 1);
1416                 writelen = (uint32_t)min(length - pos, min(stripes[stripe].end - stripes[stripe].start, c->chunk_item->stripe_length));
1417 
1418                 if (writelen == 0)
1419                     break;
1420 
1421                 if (writelen > maxwritelen)
1422                     maxwritelen = writelen;
1423 
1424                 RtlCopyMemory(&stripe_pfns[stripeoff[stripe] >> PAGE_SHIFT], &pfns[pos >> PAGE_SHIFT], writelen * sizeof(PFN_NUMBER) >> PAGE_SHIFT);
1425 
1426                 RtlCopyMemory(log_stripes[i].pfns, &pfns[pos >> PAGE_SHIFT], writelen * sizeof(PFN_NUMBER) >> PAGE_SHIFT);
1427                 log_stripes[i].pfns += writelen >> PAGE_SHIFT;
1428 
1429                 stripeoff[stripe] += writelen;
1430                 pos += writelen;
1431 
1432                 stripe = (stripe + 1) % c->chunk_item->num_stripes;
1433                 i++;
1434             }
1435 
1436             stripe_pfns = (PFN_NUMBER*)(stripes[parity].mdl + 1);
1437 
1438             RtlCopyMemory(&stripe_pfns[stripeoff[parity] >> PAGE_SHIFT], &parity_pfns[parity_pos >> PAGE_SHIFT], maxwritelen * sizeof(PFN_NUMBER) >> PAGE_SHIFT);
1439         }
1440     }
1441 
1442     for (i = 0; i < num_data_stripes; i++) {
1443         uint8_t* ss = MmGetSystemAddressForMdlSafe(log_stripes[i].mdl, priority);
1444 
1445         if (i == 0)
1446             RtlCopyMemory(wtc->parity1, ss, (uint32_t)(parity_end - parity_start));
1447         else
1448             do_xor(wtc->parity1, ss, (uint32_t)(parity_end - parity_start));
1449     }
1450 
1451     Status = STATUS_SUCCESS;
1452 
1453 exit:
1454     if (log_stripes) {
1455         for (i = 0; i < num_data_stripes; i++) {
1456             if (log_stripes[i].mdl)
1457                 IoFreeMdl(log_stripes[i].mdl);
1458         }
1459 
1460         ExFreePool(log_stripes);
1461     }
1462 
1463     if (stripeoff)
1464         ExFreePool(stripeoff);
1465 
1466     return Status;
1467 }
1468 
1469 __attribute__((nonnull(1,2,4,6,10)))
prepare_raid6_write(device_extension * Vcb,chunk * c,uint64_t address,void * data,uint32_t length,write_stripe * stripes,PIRP Irp,uint64_t irp_offset,ULONG priority,write_data_context * wtc)1470 static NTSTATUS prepare_raid6_write(device_extension* Vcb, chunk* c, uint64_t address, void* data, uint32_t length, write_stripe* stripes, PIRP Irp,
1471                                     uint64_t irp_offset, ULONG priority, write_data_context* wtc) {
1472     uint64_t startoff, endoff, parity_start, parity_end;
1473     uint16_t startoffstripe, endoffstripe, parity1, num_data_stripes = c->chunk_item->num_stripes - 2;
1474     uint64_t pos, parity_pos, *stripeoff = NULL;
1475     uint32_t i;
1476     bool file_write = Irp && Irp->MdlAddress && (Irp->MdlAddress->ByteOffset == 0);
1477     PMDL master_mdl;
1478     NTSTATUS Status;
1479     PFN_NUMBER *pfns, *parity1_pfns, *parity2_pfns;
1480     log_stripe* log_stripes = NULL;
1481 
1482     if ((address + length - c->offset) % (num_data_stripes * c->chunk_item->stripe_length) > 0) {
1483         uint64_t delta = (address + length - c->offset) % (num_data_stripes * c->chunk_item->stripe_length);
1484 
1485         delta = min(length, delta);
1486         Status = add_partial_stripe(Vcb, c, address + length - delta, (uint32_t)delta, (uint8_t*)data + length - delta);
1487         if (!NT_SUCCESS(Status)) {
1488             ERR("add_partial_stripe returned %08lx\n", Status);
1489             goto exit;
1490         }
1491 
1492         length -= (uint32_t)delta;
1493     }
1494 
1495     if (length > 0 && (address - c->offset) % (num_data_stripes * c->chunk_item->stripe_length) > 0) {
1496         uint64_t delta = (num_data_stripes * c->chunk_item->stripe_length) - ((address - c->offset) % (num_data_stripes * c->chunk_item->stripe_length));
1497 
1498         Status = add_partial_stripe(Vcb, c, address, (uint32_t)delta, data);
1499         if (!NT_SUCCESS(Status)) {
1500             ERR("add_partial_stripe returned %08lx\n", Status);
1501             goto exit;
1502         }
1503 
1504         address += delta;
1505         length -= (uint32_t)delta;
1506         irp_offset += delta;
1507         data = (uint8_t*)data + delta;
1508     }
1509 
1510     if (length == 0) {
1511         Status = STATUS_SUCCESS;
1512         goto exit;
1513     }
1514 
1515     get_raid0_offset(address - c->offset, c->chunk_item->stripe_length, num_data_stripes, &startoff, &startoffstripe);
1516     get_raid0_offset(address + length - c->offset - 1, c->chunk_item->stripe_length, num_data_stripes, &endoff, &endoffstripe);
1517 
1518     pos = 0;
1519     while (pos < length) {
1520         parity1 = (((address - c->offset + pos) / (num_data_stripes * c->chunk_item->stripe_length)) + num_data_stripes) % c->chunk_item->num_stripes;
1521 
1522         if (pos == 0) {
1523             uint16_t stripe = (parity1 + startoffstripe + 2) % c->chunk_item->num_stripes;
1524             uint16_t parity2 = (parity1 + 1) % c->chunk_item->num_stripes;
1525             ULONG skip, writelen;
1526 
1527             i = startoffstripe;
1528             while (stripe != parity1) {
1529                 if (i == startoffstripe) {
1530                     writelen = (ULONG)min(length, c->chunk_item->stripe_length - (startoff % c->chunk_item->stripe_length));
1531 
1532                     stripes[stripe].start = startoff;
1533                     stripes[stripe].end = startoff + writelen;
1534 
1535                     pos += writelen;
1536 
1537                     if (pos == length)
1538                         break;
1539                 } else {
1540                     writelen = (ULONG)min(length - pos, c->chunk_item->stripe_length);
1541 
1542                     stripes[stripe].start = startoff - (startoff % c->chunk_item->stripe_length);
1543                     stripes[stripe].end = stripes[stripe].start + writelen;
1544 
1545                     pos += writelen;
1546 
1547                     if (pos == length)
1548                         break;
1549                 }
1550 
1551                 i++;
1552                 stripe = (stripe + 1) % c->chunk_item->num_stripes;
1553             }
1554 
1555             if (pos == length)
1556                 break;
1557 
1558             for (i = 0; i < startoffstripe; i++) {
1559                 stripe = (parity1 + i + 2) % c->chunk_item->num_stripes;
1560 
1561                 stripes[stripe].start = stripes[stripe].end = startoff - (startoff % c->chunk_item->stripe_length) + c->chunk_item->stripe_length;
1562             }
1563 
1564             stripes[parity1].start = stripes[parity1].end = stripes[parity2].start = stripes[parity2].end =
1565                 startoff - (startoff % c->chunk_item->stripe_length) + c->chunk_item->stripe_length;
1566 
1567             if (length - pos > c->chunk_item->num_stripes * num_data_stripes * c->chunk_item->stripe_length) {
1568                 skip = (ULONG)(((length - pos) / (c->chunk_item->num_stripes * num_data_stripes * c->chunk_item->stripe_length)) - 1);
1569 
1570                 for (i = 0; i < c->chunk_item->num_stripes; i++) {
1571                     stripes[i].end += skip * c->chunk_item->num_stripes * c->chunk_item->stripe_length;
1572                 }
1573 
1574                 pos += skip * num_data_stripes * c->chunk_item->num_stripes * c->chunk_item->stripe_length;
1575             }
1576         } else if (length - pos >= c->chunk_item->stripe_length * num_data_stripes) {
1577             for (i = 0; i < c->chunk_item->num_stripes; i++) {
1578                 stripes[i].end += c->chunk_item->stripe_length;
1579             }
1580 
1581             pos += c->chunk_item->stripe_length * num_data_stripes;
1582         } else {
1583             uint16_t stripe = (parity1 + 2) % c->chunk_item->num_stripes;
1584 
1585             i = 0;
1586             while (stripe != parity1) {
1587                 if (endoffstripe == i) {
1588                     stripes[stripe].end = endoff + 1;
1589                     break;
1590                 } else if (endoffstripe > i)
1591                     stripes[stripe].end = endoff - (endoff % c->chunk_item->stripe_length) + c->chunk_item->stripe_length;
1592 
1593                 i++;
1594                 stripe = (stripe + 1) % c->chunk_item->num_stripes;
1595             }
1596 
1597             break;
1598         }
1599     }
1600 
1601     parity_start = 0xffffffffffffffff;
1602     parity_end = 0;
1603 
1604     for (i = 0; i < c->chunk_item->num_stripes; i++) {
1605         if (stripes[i].start != 0 || stripes[i].end != 0) {
1606             parity_start = min(stripes[i].start, parity_start);
1607             parity_end = max(stripes[i].end, parity_end);
1608         }
1609     }
1610 
1611     if (parity_end == parity_start) {
1612         Status = STATUS_SUCCESS;
1613         goto exit;
1614     }
1615 
1616     parity1 = (((address - c->offset) / (num_data_stripes * c->chunk_item->stripe_length)) + num_data_stripes) % c->chunk_item->num_stripes;
1617     stripes[parity1].start = stripes[(parity1 + 1) % c->chunk_item->num_stripes].start = parity_start;
1618 
1619     parity1 = (((address - c->offset + length - 1) / (num_data_stripes * c->chunk_item->stripe_length)) + num_data_stripes) % c->chunk_item->num_stripes;
1620     stripes[parity1].end = stripes[(parity1 + 1) % c->chunk_item->num_stripes].end = parity_end;
1621 
1622     log_stripes = ExAllocatePoolWithTag(NonPagedPool, sizeof(log_stripe) * num_data_stripes, ALLOC_TAG);
1623     if (!log_stripes) {
1624         ERR("out of memory\n");
1625         Status = STATUS_INSUFFICIENT_RESOURCES;
1626         goto exit;
1627     }
1628 
1629     RtlZeroMemory(log_stripes, sizeof(log_stripe) * num_data_stripes);
1630 
1631     for (i = 0; i < num_data_stripes; i++) {
1632         log_stripes[i].mdl = IoAllocateMdl(NULL, (ULONG)(parity_end - parity_start), false, false, NULL);
1633         if (!log_stripes[i].mdl) {
1634             ERR("out of memory\n");
1635             Status = STATUS_INSUFFICIENT_RESOURCES;
1636             goto exit;
1637         }
1638 
1639         log_stripes[i].mdl->MdlFlags |= MDL_PARTIAL;
1640         log_stripes[i].pfns = (PFN_NUMBER*)(log_stripes[i].mdl + 1);
1641     }
1642 
1643     wtc->parity1 = ExAllocatePoolWithTag(NonPagedPool, (ULONG)(parity_end - parity_start), ALLOC_TAG);
1644     if (!wtc->parity1) {
1645         ERR("out of memory\n");
1646         Status = STATUS_INSUFFICIENT_RESOURCES;
1647         goto exit;
1648     }
1649 
1650     wtc->parity2 = ExAllocatePoolWithTag(NonPagedPool, (ULONG)(parity_end - parity_start), ALLOC_TAG);
1651     if (!wtc->parity2) {
1652         ERR("out of memory\n");
1653         Status = STATUS_INSUFFICIENT_RESOURCES;
1654         goto exit;
1655     }
1656 
1657     wtc->parity1_mdl = IoAllocateMdl(wtc->parity1, (ULONG)(parity_end - parity_start), false, false, NULL);
1658     if (!wtc->parity1_mdl) {
1659         ERR("out of memory\n");
1660         Status = STATUS_INSUFFICIENT_RESOURCES;
1661         goto exit;
1662     }
1663 
1664     MmBuildMdlForNonPagedPool(wtc->parity1_mdl);
1665 
1666     wtc->parity2_mdl = IoAllocateMdl(wtc->parity2, (ULONG)(parity_end - parity_start), false, false, NULL);
1667     if (!wtc->parity2_mdl) {
1668         ERR("out of memory\n");
1669         Status = STATUS_INSUFFICIENT_RESOURCES;
1670         goto exit;
1671     }
1672 
1673     MmBuildMdlForNonPagedPool(wtc->parity2_mdl);
1674 
1675     if (file_write)
1676         master_mdl = Irp->MdlAddress;
1677     else if (((ULONG_PTR)data % PAGE_SIZE) != 0) {
1678         wtc->scratch = ExAllocatePoolWithTag(NonPagedPool, length, ALLOC_TAG);
1679         if (!wtc->scratch) {
1680             ERR("out of memory\n");
1681             Status = STATUS_INSUFFICIENT_RESOURCES;
1682             goto exit;
1683         }
1684 
1685         RtlCopyMemory(wtc->scratch, data, length);
1686 
1687         master_mdl = IoAllocateMdl(wtc->scratch, length, false, false, NULL);
1688         if (!master_mdl) {
1689             ERR("out of memory\n");
1690             Status = STATUS_INSUFFICIENT_RESOURCES;
1691             goto exit;
1692         }
1693 
1694         MmBuildMdlForNonPagedPool(master_mdl);
1695 
1696         wtc->mdl = master_mdl;
1697     } else {
1698         master_mdl = IoAllocateMdl(data, length, false, false, NULL);
1699         if (!master_mdl) {
1700             ERR("out of memory\n");
1701             Status = STATUS_INSUFFICIENT_RESOURCES;
1702             goto exit;
1703         }
1704 
1705         Status = STATUS_SUCCESS;
1706 
1707         _SEH2_TRY {
1708             MmProbeAndLockPages(master_mdl, KernelMode, IoReadAccess);
1709         } _SEH2_EXCEPT (EXCEPTION_EXECUTE_HANDLER) {
1710             Status = _SEH2_GetExceptionCode();
1711         } _SEH2_END;
1712 
1713         if (!NT_SUCCESS(Status)) {
1714             ERR("MmProbeAndLockPages threw exception %08lx\n", Status);
1715             IoFreeMdl(master_mdl);
1716             goto exit;
1717         }
1718 
1719         wtc->mdl = master_mdl;
1720     }
1721 
1722     pfns = (PFN_NUMBER*)(master_mdl + 1);
1723     parity1_pfns = (PFN_NUMBER*)(wtc->parity1_mdl + 1);
1724     parity2_pfns = (PFN_NUMBER*)(wtc->parity2_mdl + 1);
1725 
1726     if (file_write)
1727         pfns = &pfns[irp_offset >> PAGE_SHIFT];
1728 
1729     for (i = 0; i < c->chunk_item->num_stripes; i++) {
1730         if (stripes[i].start != stripes[i].end) {
1731             stripes[i].mdl = IoAllocateMdl((uint8_t*)MmGetMdlVirtualAddress(master_mdl) + irp_offset, (ULONG)(stripes[i].end - stripes[i].start), false, false, NULL);
1732             if (!stripes[i].mdl) {
1733                 ERR("IoAllocateMdl failed\n");
1734                 Status = STATUS_INSUFFICIENT_RESOURCES;
1735                 goto exit;
1736             }
1737         }
1738     }
1739 
1740     stripeoff = ExAllocatePoolWithTag(PagedPool, sizeof(uint64_t) * c->chunk_item->num_stripes, ALLOC_TAG);
1741     if (!stripeoff) {
1742         ERR("out of memory\n");
1743         Status = STATUS_INSUFFICIENT_RESOURCES;
1744         goto exit;
1745     }
1746 
1747     RtlZeroMemory(stripeoff, sizeof(uint64_t) * c->chunk_item->num_stripes);
1748 
1749     pos = 0;
1750     parity_pos = 0;
1751 
1752     while (pos < length) {
1753         PFN_NUMBER* stripe_pfns;
1754 
1755         parity1 = (((address - c->offset + pos) / (num_data_stripes * c->chunk_item->stripe_length)) + num_data_stripes) % c->chunk_item->num_stripes;
1756 
1757         if (pos == 0) {
1758             uint16_t stripe = (parity1 + startoffstripe + 2) % c->chunk_item->num_stripes, parity2;
1759             uint32_t writelen = (uint32_t)min(length - pos, min(stripes[stripe].end - stripes[stripe].start,
1760                                                             c->chunk_item->stripe_length - (stripes[stripe].start % c->chunk_item->stripe_length)));
1761             uint32_t maxwritelen = writelen;
1762 
1763             stripe_pfns = (PFN_NUMBER*)(stripes[stripe].mdl + 1);
1764 
1765             RtlCopyMemory(stripe_pfns, pfns, writelen * sizeof(PFN_NUMBER) >> PAGE_SHIFT);
1766 
1767             RtlCopyMemory(log_stripes[startoffstripe].pfns, pfns, writelen * sizeof(PFN_NUMBER) >> PAGE_SHIFT);
1768             log_stripes[startoffstripe].pfns += writelen >> PAGE_SHIFT;
1769 
1770             stripeoff[stripe] = writelen;
1771             pos += writelen;
1772 
1773             stripe = (stripe + 1) % c->chunk_item->num_stripes;
1774             i = startoffstripe + 1;
1775 
1776             while (stripe != parity1) {
1777                 stripe_pfns = (PFN_NUMBER*)(stripes[stripe].mdl + 1);
1778                 writelen = (uint32_t)min(length - pos, min(stripes[stripe].end - stripes[stripe].start, c->chunk_item->stripe_length));
1779 
1780                 if (writelen == 0)
1781                     break;
1782 
1783                 if (writelen > maxwritelen)
1784                     maxwritelen = writelen;
1785 
1786                 RtlCopyMemory(stripe_pfns, &pfns[pos >> PAGE_SHIFT], writelen * sizeof(PFN_NUMBER) >> PAGE_SHIFT);
1787 
1788                 RtlCopyMemory(log_stripes[i].pfns, &pfns[pos >> PAGE_SHIFT], writelen * sizeof(PFN_NUMBER) >> PAGE_SHIFT);
1789                 log_stripes[i].pfns += writelen >> PAGE_SHIFT;
1790 
1791                 stripeoff[stripe] = writelen;
1792                 pos += writelen;
1793 
1794                 stripe = (stripe + 1) % c->chunk_item->num_stripes;
1795                 i++;
1796             }
1797 
1798             stripe_pfns = (PFN_NUMBER*)(stripes[parity1].mdl + 1);
1799             RtlCopyMemory(stripe_pfns, parity1_pfns, maxwritelen * sizeof(PFN_NUMBER) >> PAGE_SHIFT);
1800             stripeoff[parity1] = maxwritelen;
1801 
1802             parity2 = (parity1 + 1) % c->chunk_item->num_stripes;
1803 
1804             stripe_pfns = (PFN_NUMBER*)(stripes[parity2].mdl + 1);
1805             RtlCopyMemory(stripe_pfns, parity2_pfns, maxwritelen * sizeof(PFN_NUMBER) >> PAGE_SHIFT);
1806             stripeoff[parity2] = maxwritelen;
1807 
1808             parity_pos = maxwritelen;
1809         } else if (length - pos >= c->chunk_item->stripe_length * num_data_stripes) {
1810             uint16_t stripe = (parity1 + 2) % c->chunk_item->num_stripes, parity2;
1811 
1812             i = 0;
1813             while (stripe != parity1) {
1814                 stripe_pfns = (PFN_NUMBER*)(stripes[stripe].mdl + 1);
1815 
1816                 RtlCopyMemory(&stripe_pfns[stripeoff[stripe] >> PAGE_SHIFT], &pfns[pos >> PAGE_SHIFT], (ULONG)(c->chunk_item->stripe_length * sizeof(PFN_NUMBER) >> PAGE_SHIFT));
1817 
1818                 RtlCopyMemory(log_stripes[i].pfns, &pfns[pos >> PAGE_SHIFT], (ULONG)(c->chunk_item->stripe_length * sizeof(PFN_NUMBER) >> PAGE_SHIFT));
1819                 log_stripes[i].pfns += c->chunk_item->stripe_length >> PAGE_SHIFT;
1820 
1821                 stripeoff[stripe] += c->chunk_item->stripe_length;
1822                 pos += c->chunk_item->stripe_length;
1823 
1824                 stripe = (stripe + 1) % c->chunk_item->num_stripes;
1825                 i++;
1826             }
1827 
1828             stripe_pfns = (PFN_NUMBER*)(stripes[parity1].mdl + 1);
1829             RtlCopyMemory(&stripe_pfns[stripeoff[parity1] >> PAGE_SHIFT], &parity1_pfns[parity_pos >> PAGE_SHIFT], (ULONG)(c->chunk_item->stripe_length * sizeof(PFN_NUMBER) >> PAGE_SHIFT));
1830             stripeoff[parity1] += c->chunk_item->stripe_length;
1831 
1832             parity2 = (parity1 + 1) % c->chunk_item->num_stripes;
1833 
1834             stripe_pfns = (PFN_NUMBER*)(stripes[parity2].mdl + 1);
1835             RtlCopyMemory(&stripe_pfns[stripeoff[parity2] >> PAGE_SHIFT], &parity2_pfns[parity_pos >> PAGE_SHIFT], (ULONG)(c->chunk_item->stripe_length * sizeof(PFN_NUMBER) >> PAGE_SHIFT));
1836             stripeoff[parity2] += c->chunk_item->stripe_length;
1837 
1838             parity_pos += c->chunk_item->stripe_length;
1839         } else {
1840             uint16_t stripe = (parity1 + 2) % c->chunk_item->num_stripes, parity2;
1841             uint32_t writelen, maxwritelen = 0;
1842 
1843             i = 0;
1844             while (pos < length) {
1845                 stripe_pfns = (PFN_NUMBER*)(stripes[stripe].mdl + 1);
1846                 writelen = (uint32_t)min(length - pos, min(stripes[stripe].end - stripes[stripe].start, c->chunk_item->stripe_length));
1847 
1848                 if (writelen == 0)
1849                     break;
1850 
1851                 if (writelen > maxwritelen)
1852                     maxwritelen = writelen;
1853 
1854                 RtlCopyMemory(&stripe_pfns[stripeoff[stripe] >> PAGE_SHIFT], &pfns[pos >> PAGE_SHIFT], writelen * sizeof(PFN_NUMBER) >> PAGE_SHIFT);
1855 
1856                 RtlCopyMemory(log_stripes[i].pfns, &pfns[pos >> PAGE_SHIFT], writelen * sizeof(PFN_NUMBER) >> PAGE_SHIFT);
1857                 log_stripes[i].pfns += writelen >> PAGE_SHIFT;
1858 
1859                 stripeoff[stripe] += writelen;
1860                 pos += writelen;
1861 
1862                 stripe = (stripe + 1) % c->chunk_item->num_stripes;
1863                 i++;
1864             }
1865 
1866             stripe_pfns = (PFN_NUMBER*)(stripes[parity1].mdl + 1);
1867             RtlCopyMemory(&stripe_pfns[stripeoff[parity1] >> PAGE_SHIFT], &parity1_pfns[parity_pos >> PAGE_SHIFT], maxwritelen * sizeof(PFN_NUMBER) >> PAGE_SHIFT);
1868 
1869             parity2 = (parity1 + 1) % c->chunk_item->num_stripes;
1870 
1871             stripe_pfns = (PFN_NUMBER*)(stripes[parity2].mdl + 1);
1872             RtlCopyMemory(&stripe_pfns[stripeoff[parity2] >> PAGE_SHIFT], &parity2_pfns[parity_pos >> PAGE_SHIFT], maxwritelen * sizeof(PFN_NUMBER) >> PAGE_SHIFT);
1873         }
1874     }
1875 
1876     for (i = 0; i < num_data_stripes; i++) {
1877         uint8_t* ss = MmGetSystemAddressForMdlSafe(log_stripes[c->chunk_item->num_stripes - 3 - i].mdl, priority);
1878 
1879         if (i == 0) {
1880             RtlCopyMemory(wtc->parity1, ss, (ULONG)(parity_end - parity_start));
1881             RtlCopyMemory(wtc->parity2, ss, (ULONG)(parity_end - parity_start));
1882         } else {
1883             do_xor(wtc->parity1, ss, (uint32_t)(parity_end - parity_start));
1884 
1885             galois_double(wtc->parity2, (uint32_t)(parity_end - parity_start));
1886             do_xor(wtc->parity2, ss, (uint32_t)(parity_end - parity_start));
1887         }
1888     }
1889 
1890     Status = STATUS_SUCCESS;
1891 
1892 exit:
1893     if (log_stripes) {
1894         for (i = 0; i < num_data_stripes; i++) {
1895             if (log_stripes[i].mdl)
1896                 IoFreeMdl(log_stripes[i].mdl);
1897         }
1898 
1899         ExFreePool(log_stripes);
1900     }
1901 
1902     if (stripeoff)
1903         ExFreePool(stripeoff);
1904 
1905     return Status;
1906 }
1907 
1908 __attribute__((nonnull(1,3,5)))
write_data(_In_ device_extension * Vcb,_In_ uint64_t address,_In_reads_bytes_ (length)void * data,_In_ uint32_t length,_In_ write_data_context * wtc,_In_opt_ PIRP Irp,_In_opt_ chunk * c,_In_ bool file_write,_In_ uint64_t irp_offset,_In_ ULONG priority)1909 NTSTATUS write_data(_In_ device_extension* Vcb, _In_ uint64_t address, _In_reads_bytes_(length) void* data, _In_ uint32_t length, _In_ write_data_context* wtc,
1910                     _In_opt_ PIRP Irp, _In_opt_ chunk* c, _In_ bool file_write, _In_ uint64_t irp_offset, _In_ ULONG priority) {
1911     NTSTATUS Status;
1912     uint32_t i;
1913     CHUNK_ITEM_STRIPE* cis;
1914     write_stripe* stripes = NULL;
1915     uint64_t total_writing = 0;
1916     ULONG allowed_missing, missing;
1917 
1918     TRACE("(%p, %I64x, %p, %x)\n", Vcb, address, data, length);
1919 
1920     if (!c) {
1921         c = get_chunk_from_address(Vcb, address);
1922         if (!c) {
1923             ERR("could not get chunk for address %I64x\n", address);
1924             return STATUS_INTERNAL_ERROR;
1925         }
1926     }
1927 
1928     stripes = ExAllocatePoolWithTag(PagedPool, sizeof(write_stripe) * c->chunk_item->num_stripes, ALLOC_TAG);
1929     if (!stripes) {
1930         ERR("out of memory\n");
1931         return STATUS_INSUFFICIENT_RESOURCES;
1932     }
1933 
1934     RtlZeroMemory(stripes, sizeof(write_stripe) * c->chunk_item->num_stripes);
1935 
1936     cis = (CHUNK_ITEM_STRIPE*)&c->chunk_item[1];
1937 
1938     if (c->chunk_item->type & BLOCK_FLAG_RAID0) {
1939         Status = prepare_raid0_write(c, address, data, length, stripes, file_write ? Irp : NULL, irp_offset, wtc);
1940         if (!NT_SUCCESS(Status)) {
1941             ERR("prepare_raid0_write returned %08lx\n", Status);
1942             goto prepare_failed;
1943         }
1944 
1945         allowed_missing = 0;
1946     } else if (c->chunk_item->type & BLOCK_FLAG_RAID10) {
1947         Status = prepare_raid10_write(c, address, data, length, stripes, file_write ? Irp : NULL, irp_offset, wtc);
1948         if (!NT_SUCCESS(Status)) {
1949             ERR("prepare_raid10_write returned %08lx\n", Status);
1950             goto prepare_failed;
1951         }
1952 
1953         allowed_missing = 1;
1954     } else if (c->chunk_item->type & BLOCK_FLAG_RAID5) {
1955         Status = prepare_raid5_write(Vcb, c, address, data, length, stripes, file_write ? Irp : NULL, irp_offset, priority, wtc);
1956         if (!NT_SUCCESS(Status)) {
1957             ERR("prepare_raid5_write returned %08lx\n", Status);
1958             goto prepare_failed;
1959         }
1960 
1961         allowed_missing = 1;
1962     } else if (c->chunk_item->type & BLOCK_FLAG_RAID6) {
1963         Status = prepare_raid6_write(Vcb, c, address, data, length, stripes, file_write ? Irp : NULL, irp_offset, priority, wtc);
1964         if (!NT_SUCCESS(Status)) {
1965             ERR("prepare_raid6_write returned %08lx\n", Status);
1966             goto prepare_failed;
1967         }
1968 
1969         allowed_missing = 2;
1970     } else {  // write same data to every location - SINGLE, DUP, RAID1, RAID1C3, RAID1C4
1971         for (i = 0; i < c->chunk_item->num_stripes; i++) {
1972             stripes[i].start = address - c->offset;
1973             stripes[i].end = stripes[i].start + length;
1974             stripes[i].data = data;
1975             stripes[i].irp_offset = irp_offset;
1976 
1977             if (c->devices[i]->devobj) {
1978                 if (file_write) {
1979                     uint8_t* va;
1980                     ULONG writelen = (ULONG)(stripes[i].end - stripes[i].start);
1981 
1982                     va = (uint8_t*)MmGetMdlVirtualAddress(Irp->MdlAddress) + stripes[i].irp_offset;
1983 
1984                     stripes[i].mdl = IoAllocateMdl(va, writelen, false, false, NULL);
1985                     if (!stripes[i].mdl) {
1986                         ERR("IoAllocateMdl failed\n");
1987                         Status = STATUS_INSUFFICIENT_RESOURCES;
1988                         goto prepare_failed;
1989                     }
1990 
1991                     IoBuildPartialMdl(Irp->MdlAddress, stripes[i].mdl, va, writelen);
1992                 } else {
1993                     stripes[i].mdl = IoAllocateMdl(stripes[i].data, (ULONG)(stripes[i].end - stripes[i].start), false, false, NULL);
1994                     if (!stripes[i].mdl) {
1995                         ERR("IoAllocateMdl failed\n");
1996                         Status = STATUS_INSUFFICIENT_RESOURCES;
1997                         goto prepare_failed;
1998                     }
1999 
2000                     Status = STATUS_SUCCESS;
2001 
2002                     _SEH2_TRY {
2003                         MmProbeAndLockPages(stripes[i].mdl, KernelMode, IoReadAccess);
2004                     } _SEH2_EXCEPT (EXCEPTION_EXECUTE_HANDLER) {
2005                         Status = _SEH2_GetExceptionCode();
2006                     } _SEH2_END;
2007 
2008                     if (!NT_SUCCESS(Status)) {
2009                         ERR("MmProbeAndLockPages threw exception %08lx\n", Status);
2010                         IoFreeMdl(stripes[i].mdl);
2011                         stripes[i].mdl = NULL;
2012                         goto prepare_failed;
2013                     }
2014                 }
2015             }
2016         }
2017 
2018         allowed_missing = c->chunk_item->num_stripes - 1;
2019     }
2020 
2021     missing = 0;
2022     for (i = 0; i < c->chunk_item->num_stripes; i++) {
2023         if (!c->devices[i]->devobj)
2024             missing++;
2025     }
2026 
2027     if (missing > allowed_missing) {
2028         ERR("cannot write as %lu missing devices (maximum %lu)\n", missing, allowed_missing);
2029         Status = STATUS_DEVICE_NOT_READY;
2030         goto prepare_failed;
2031     }
2032 
2033     for (i = 0; i < c->chunk_item->num_stripes; i++) {
2034         write_data_stripe* stripe;
2035         PIO_STACK_LOCATION IrpSp;
2036 
2037         stripe = ExAllocatePoolWithTag(NonPagedPool, sizeof(write_data_stripe), ALLOC_TAG);
2038         if (!stripe) {
2039             ERR("out of memory\n");
2040             Status = STATUS_INSUFFICIENT_RESOURCES;
2041             goto end;
2042         }
2043 
2044         if (stripes[i].start == stripes[i].end || !c->devices[i]->devobj) {
2045             stripe->status = WriteDataStatus_Ignore;
2046             stripe->Irp = NULL;
2047             stripe->buf = stripes[i].data;
2048             stripe->mdl = NULL;
2049         } else {
2050             stripe->context = (struct _write_data_context*)wtc;
2051             stripe->buf = stripes[i].data;
2052             stripe->device = c->devices[i];
2053             RtlZeroMemory(&stripe->iosb, sizeof(IO_STATUS_BLOCK));
2054             stripe->status = WriteDataStatus_Pending;
2055             stripe->mdl = stripes[i].mdl;
2056 
2057             if (!Irp) {
2058                 stripe->Irp = IoAllocateIrp(stripe->device->devobj->StackSize, false);
2059 
2060                 if (!stripe->Irp) {
2061                     ERR("IoAllocateIrp failed\n");
2062                     ExFreePool(stripe);
2063                     Status = STATUS_INSUFFICIENT_RESOURCES;
2064                     goto end;
2065                 }
2066             } else {
2067                 stripe->Irp = IoMakeAssociatedIrp(Irp, stripe->device->devobj->StackSize);
2068 
2069                 if (!stripe->Irp) {
2070                     ERR("IoMakeAssociatedIrp failed\n");
2071                     ExFreePool(stripe);
2072                     Status = STATUS_INSUFFICIENT_RESOURCES;
2073                     goto end;
2074                 }
2075             }
2076 
2077             IrpSp = IoGetNextIrpStackLocation(stripe->Irp);
2078             IrpSp->MajorFunction = IRP_MJ_WRITE;
2079             IrpSp->FileObject = stripe->device->fileobj;
2080 
2081             if (stripe->device->devobj->Flags & DO_BUFFERED_IO) {
2082                 stripe->Irp->AssociatedIrp.SystemBuffer = MmGetSystemAddressForMdlSafe(stripes[i].mdl, priority);
2083 
2084                 stripe->Irp->Flags = IRP_BUFFERED_IO;
2085             } else if (stripe->device->devobj->Flags & DO_DIRECT_IO)
2086                 stripe->Irp->MdlAddress = stripe->mdl;
2087             else
2088                 stripe->Irp->UserBuffer = MmGetSystemAddressForMdlSafe(stripes[i].mdl, priority);
2089 
2090 #ifdef DEBUG_PARANOID
2091             if (stripes[i].end < stripes[i].start) {
2092                 ERR("trying to write stripe with negative length (%I64x < %I64x)\n", stripes[i].end, stripes[i].start);
2093                 int3;
2094             }
2095 #endif
2096 
2097             IrpSp->Parameters.Write.Length = (ULONG)(stripes[i].end - stripes[i].start);
2098             IrpSp->Parameters.Write.ByteOffset.QuadPart = stripes[i].start + cis[i].offset;
2099 
2100             total_writing += IrpSp->Parameters.Write.Length;
2101 
2102             stripe->Irp->UserIosb = &stripe->iosb;
2103             wtc->stripes_left++;
2104 
2105             IoSetCompletionRoutine(stripe->Irp, write_data_completion, stripe, true, true, true);
2106         }
2107 
2108         InsertTailList(&wtc->stripes, &stripe->list_entry);
2109     }
2110 
2111     if (diskacc)
2112         fFsRtlUpdateDiskCounters(0, total_writing);
2113 
2114     Status = STATUS_SUCCESS;
2115 
2116 end:
2117 
2118     if (stripes) ExFreePool(stripes);
2119 
2120     if (!NT_SUCCESS(Status))
2121         free_write_data_stripes(wtc);
2122 
2123     return Status;
2124 
2125 prepare_failed:
2126     for (i = 0; i < c->chunk_item->num_stripes; i++) {
2127         if (stripes[i].mdl && (i == 0 || stripes[i].mdl != stripes[i-1].mdl)) {
2128             if (stripes[i].mdl->MdlFlags & MDL_PAGES_LOCKED)
2129                 MmUnlockPages(stripes[i].mdl);
2130 
2131             IoFreeMdl(stripes[i].mdl);
2132         }
2133     }
2134 
2135     if (wtc->parity1_mdl) {
2136         if (wtc->parity1_mdl->MdlFlags & MDL_PAGES_LOCKED)
2137             MmUnlockPages(wtc->parity1_mdl);
2138 
2139         IoFreeMdl(wtc->parity1_mdl);
2140         wtc->parity1_mdl = NULL;
2141     }
2142 
2143     if (wtc->parity2_mdl) {
2144         if (wtc->parity2_mdl->MdlFlags & MDL_PAGES_LOCKED)
2145             MmUnlockPages(wtc->parity2_mdl);
2146 
2147         IoFreeMdl(wtc->parity2_mdl);
2148         wtc->parity2_mdl = NULL;
2149     }
2150 
2151     if (wtc->mdl) {
2152         if (wtc->mdl->MdlFlags & MDL_PAGES_LOCKED)
2153             MmUnlockPages(wtc->mdl);
2154 
2155         IoFreeMdl(wtc->mdl);
2156         wtc->mdl = NULL;
2157     }
2158 
2159     if (wtc->parity1) {
2160         ExFreePool(wtc->parity1);
2161         wtc->parity1 = NULL;
2162     }
2163 
2164     if (wtc->parity2) {
2165         ExFreePool(wtc->parity2);
2166         wtc->parity2 = NULL;
2167     }
2168 
2169     if (wtc->scratch) {
2170         ExFreePool(wtc->scratch);
2171         wtc->scratch = NULL;
2172     }
2173 
2174     ExFreePool(stripes);
2175     return Status;
2176 }
2177 
2178 __attribute__((nonnull(1,4,5)))
get_raid56_lock_range(chunk * c,uint64_t address,uint64_t length,uint64_t * lockaddr,uint64_t * locklen)2179 void get_raid56_lock_range(chunk* c, uint64_t address, uint64_t length, uint64_t* lockaddr, uint64_t* locklen) {
2180     uint64_t startoff, endoff;
2181     uint16_t startoffstripe, endoffstripe, datastripes;
2182 
2183     datastripes = c->chunk_item->num_stripes - (c->chunk_item->type & BLOCK_FLAG_RAID5 ? 1 : 2);
2184 
2185     get_raid0_offset(address - c->offset, c->chunk_item->stripe_length, datastripes, &startoff, &startoffstripe);
2186     get_raid0_offset(address + length - c->offset - 1, c->chunk_item->stripe_length, datastripes, &endoff, &endoffstripe);
2187 
2188     startoff -= startoff % c->chunk_item->stripe_length;
2189     endoff = sector_align(endoff, c->chunk_item->stripe_length);
2190 
2191     *lockaddr = c->offset + (startoff * datastripes);
2192     *locklen = (endoff - startoff) * datastripes;
2193 }
2194 
2195 __attribute__((nonnull(1,3)))
write_data_complete(device_extension * Vcb,uint64_t address,void * data,uint32_t length,PIRP Irp,chunk * c,bool file_write,uint64_t irp_offset,ULONG priority)2196 NTSTATUS write_data_complete(device_extension* Vcb, uint64_t address, void* data, uint32_t length, PIRP Irp, chunk* c, bool file_write, uint64_t irp_offset, ULONG priority) {
2197     write_data_context wtc;
2198     NTSTATUS Status;
2199     uint64_t lockaddr, locklen;
2200 
2201     KeInitializeEvent(&wtc.Event, NotificationEvent, false);
2202     InitializeListHead(&wtc.stripes);
2203     wtc.stripes_left = 0;
2204     wtc.parity1 = wtc.parity2 = wtc.scratch = NULL;
2205     wtc.mdl = wtc.parity1_mdl = wtc.parity2_mdl = NULL;
2206 
2207     if (!c) {
2208         c = get_chunk_from_address(Vcb, address);
2209         if (!c) {
2210             ERR("could not get chunk for address %I64x\n", address);
2211             return STATUS_INTERNAL_ERROR;
2212         }
2213     }
2214 
2215     if (c->chunk_item->type & BLOCK_FLAG_RAID5 || c->chunk_item->type & BLOCK_FLAG_RAID6) {
2216         get_raid56_lock_range(c, address, length, &lockaddr, &locklen);
2217         chunk_lock_range(Vcb, c, lockaddr, locklen);
2218     }
2219 
2220     _SEH2_TRY {
2221         Status = write_data(Vcb, address, data, length, &wtc, Irp, c, file_write, irp_offset, priority);
2222     } _SEH2_EXCEPT (EXCEPTION_EXECUTE_HANDLER) {
2223         Status = _SEH2_GetExceptionCode();
2224     } _SEH2_END;
2225 
2226     if (!NT_SUCCESS(Status)) {
2227         ERR("write_data returned %08lx\n", Status);
2228 
2229         if (c->chunk_item->type & BLOCK_FLAG_RAID5 || c->chunk_item->type & BLOCK_FLAG_RAID6)
2230             chunk_unlock_range(Vcb, c, lockaddr, locklen);
2231 
2232         free_write_data_stripes(&wtc);
2233         return Status;
2234     }
2235 
2236     if (wtc.stripes.Flink != &wtc.stripes) {
2237         // launch writes and wait
2238         LIST_ENTRY* le = wtc.stripes.Flink;
2239         bool no_wait = true;
2240 
2241         while (le != &wtc.stripes) {
2242             write_data_stripe* stripe = CONTAINING_RECORD(le, write_data_stripe, list_entry);
2243 
2244             if (stripe->status != WriteDataStatus_Ignore) {
2245                 IoCallDriver(stripe->device->devobj, stripe->Irp);
2246                 no_wait = false;
2247             }
2248 
2249             le = le->Flink;
2250         }
2251 
2252         if (!no_wait)
2253             KeWaitForSingleObject(&wtc.Event, Executive, KernelMode, false, NULL);
2254 
2255         le = wtc.stripes.Flink;
2256         while (le != &wtc.stripes) {
2257             write_data_stripe* stripe = CONTAINING_RECORD(le, write_data_stripe, list_entry);
2258 
2259             if (stripe->status != WriteDataStatus_Ignore && !NT_SUCCESS(stripe->iosb.Status)) {
2260                 Status = stripe->iosb.Status;
2261 
2262                 log_device_error(Vcb, stripe->device, BTRFS_DEV_STAT_WRITE_ERRORS);
2263                 break;
2264             }
2265 
2266             le = le->Flink;
2267         }
2268 
2269         free_write_data_stripes(&wtc);
2270     }
2271 
2272     if (c->chunk_item->type & BLOCK_FLAG_RAID5 || c->chunk_item->type & BLOCK_FLAG_RAID6)
2273         chunk_unlock_range(Vcb, c, lockaddr, locklen);
2274 
2275     return Status;
2276 }
2277 
2278 __attribute__((nonnull(2,3)))
_Function_class_(IO_COMPLETION_ROUTINE)2279 _Function_class_(IO_COMPLETION_ROUTINE)
2280 static NTSTATUS __stdcall write_data_completion(PDEVICE_OBJECT DeviceObject, PIRP Irp, PVOID conptr) {
2281     write_data_stripe* stripe = conptr;
2282     write_data_context* context = (write_data_context*)stripe->context;
2283     LIST_ENTRY* le;
2284 
2285     UNUSED(DeviceObject);
2286 
2287     // FIXME - we need a lock here
2288 
2289     if (stripe->status == WriteDataStatus_Cancelling) {
2290         stripe->status = WriteDataStatus_Cancelled;
2291         goto end;
2292     }
2293 
2294     stripe->iosb = Irp->IoStatus;
2295 
2296     if (NT_SUCCESS(Irp->IoStatus.Status)) {
2297         stripe->status = WriteDataStatus_Success;
2298     } else {
2299         le = context->stripes.Flink;
2300 
2301         stripe->status = WriteDataStatus_Error;
2302 
2303         while (le != &context->stripes) {
2304             write_data_stripe* s2 = CONTAINING_RECORD(le, write_data_stripe, list_entry);
2305 
2306             if (s2->status == WriteDataStatus_Pending) {
2307                 s2->status = WriteDataStatus_Cancelling;
2308                 IoCancelIrp(s2->Irp);
2309             }
2310 
2311             le = le->Flink;
2312         }
2313     }
2314 
2315 end:
2316     if (InterlockedDecrement(&context->stripes_left) == 0)
2317         KeSetEvent(&context->Event, 0, false);
2318 
2319     return STATUS_MORE_PROCESSING_REQUIRED;
2320 }
2321 
2322 __attribute__((nonnull(1)))
free_write_data_stripes(write_data_context * wtc)2323 void free_write_data_stripes(write_data_context* wtc) {
2324     LIST_ENTRY* le;
2325     PMDL last_mdl = NULL;
2326 
2327     if (wtc->parity1_mdl) {
2328         if (wtc->parity1_mdl->MdlFlags & MDL_PAGES_LOCKED)
2329             MmUnlockPages(wtc->parity1_mdl);
2330 
2331         IoFreeMdl(wtc->parity1_mdl);
2332     }
2333 
2334     if (wtc->parity2_mdl) {
2335         if (wtc->parity2_mdl->MdlFlags & MDL_PAGES_LOCKED)
2336             MmUnlockPages(wtc->parity2_mdl);
2337 
2338         IoFreeMdl(wtc->parity2_mdl);
2339     }
2340 
2341     if (wtc->mdl) {
2342         if (wtc->mdl->MdlFlags & MDL_PAGES_LOCKED)
2343             MmUnlockPages(wtc->mdl);
2344 
2345         IoFreeMdl(wtc->mdl);
2346     }
2347 
2348     if (wtc->parity1)
2349         ExFreePool(wtc->parity1);
2350 
2351     if (wtc->parity2)
2352         ExFreePool(wtc->parity2);
2353 
2354     if (wtc->scratch)
2355         ExFreePool(wtc->scratch);
2356 
2357     le = wtc->stripes.Flink;
2358     while (le != &wtc->stripes) {
2359         write_data_stripe* stripe = CONTAINING_RECORD(le, write_data_stripe, list_entry);
2360 
2361         if (stripe->mdl && stripe->mdl != last_mdl) {
2362             if (stripe->mdl->MdlFlags & MDL_PAGES_LOCKED)
2363                 MmUnlockPages(stripe->mdl);
2364 
2365             IoFreeMdl(stripe->mdl);
2366         }
2367 
2368         last_mdl = stripe->mdl;
2369 
2370         if (stripe->Irp)
2371             IoFreeIrp(stripe->Irp);
2372 
2373         le = le->Flink;
2374     }
2375 
2376     while (!IsListEmpty(&wtc->stripes)) {
2377         write_data_stripe* stripe = CONTAINING_RECORD(RemoveHeadList(&wtc->stripes), write_data_stripe, list_entry);
2378 
2379         ExFreePool(stripe);
2380     }
2381 }
2382 
2383 __attribute__((nonnull(1,2,3)))
add_extent(_In_ fcb * fcb,_In_ LIST_ENTRY * prevextle,_In_ __drv_aliasesMem extent * newext)2384 void add_extent(_In_ fcb* fcb, _In_ LIST_ENTRY* prevextle, _In_ __drv_aliasesMem extent* newext) {
2385     LIST_ENTRY* le = prevextle->Flink;
2386 
2387     while (le != &fcb->extents) {
2388         extent* ext = CONTAINING_RECORD(le, extent, list_entry);
2389 
2390         if (ext->offset >= newext->offset) {
2391             InsertHeadList(ext->list_entry.Blink, &newext->list_entry);
2392             return;
2393         }
2394 
2395         le = le->Flink;
2396     }
2397 
2398     InsertTailList(&fcb->extents, &newext->list_entry);
2399 }
2400 
2401 __attribute__((nonnull(1,2,6)))
excise_extents(device_extension * Vcb,fcb * fcb,uint64_t start_data,uint64_t end_data,PIRP Irp,LIST_ENTRY * rollback)2402 NTSTATUS excise_extents(device_extension* Vcb, fcb* fcb, uint64_t start_data, uint64_t end_data, PIRP Irp, LIST_ENTRY* rollback) {
2403     NTSTATUS Status;
2404     LIST_ENTRY* le;
2405 
2406     le = fcb->extents.Flink;
2407 
2408     while (le != &fcb->extents) {
2409         LIST_ENTRY* le2 = le->Flink;
2410         extent* ext = CONTAINING_RECORD(le, extent, list_entry);
2411 
2412         if (!ext->ignore) {
2413             EXTENT_DATA* ed = &ext->extent_data;
2414             uint64_t len;
2415 
2416             if (ed->type == EXTENT_TYPE_INLINE)
2417                 len = ed->decoded_size;
2418             else
2419                 len = ((EXTENT_DATA2*)ed->data)->num_bytes;
2420 
2421             if (ext->offset < end_data && ext->offset + len > start_data) {
2422                 if (ed->type == EXTENT_TYPE_INLINE) {
2423                     if (start_data <= ext->offset && end_data >= ext->offset + len) { // remove all
2424                         remove_fcb_extent(fcb, ext, rollback);
2425 
2426                         fcb->inode_item.st_blocks -= len;
2427                         fcb->inode_item_changed = true;
2428                     } else {
2429                         ERR("trying to split inline extent\n");
2430 #ifdef DEBUG_PARANOID
2431                         int3;
2432 #endif
2433                         return STATUS_INTERNAL_ERROR;
2434                     }
2435                 } else {
2436                     EXTENT_DATA2* ed2 = (EXTENT_DATA2*)ed->data;
2437 
2438                     if (start_data <= ext->offset && end_data >= ext->offset + len) { // remove all
2439                         if (ed2->size != 0) {
2440                             chunk* c;
2441 
2442                             fcb->inode_item.st_blocks -= len;
2443                             fcb->inode_item_changed = true;
2444 
2445                             c = get_chunk_from_address(Vcb, ed2->address);
2446 
2447                             if (!c) {
2448                                 ERR("get_chunk_from_address(%I64x) failed\n", ed2->address);
2449                             } else {
2450                                 Status = update_changed_extent_ref(Vcb, c, ed2->address, ed2->size, fcb->subvol->id, fcb->inode, ext->offset - ed2->offset, -1,
2451                                                                    fcb->inode_item.flags & BTRFS_INODE_NODATASUM, false, Irp);
2452                                 if (!NT_SUCCESS(Status)) {
2453                                     ERR("update_changed_extent_ref returned %08lx\n", Status);
2454                                     goto end;
2455                                 }
2456                             }
2457                         }
2458 
2459                         remove_fcb_extent(fcb, ext, rollback);
2460                     } else if (start_data <= ext->offset && end_data < ext->offset + len) { // remove beginning
2461                         EXTENT_DATA2* ned2;
2462                         extent* newext;
2463 
2464                         if (ed2->size != 0) {
2465                             fcb->inode_item.st_blocks -= end_data - ext->offset;
2466                             fcb->inode_item_changed = true;
2467                         }
2468 
2469                         newext = ExAllocatePoolWithTag(PagedPool, offsetof(extent, extent_data) + sizeof(EXTENT_DATA) - 1 + sizeof(EXTENT_DATA2), ALLOC_TAG);
2470                         if (!newext) {
2471                             ERR("out of memory\n");
2472                             Status = STATUS_INSUFFICIENT_RESOURCES;
2473                             goto end;
2474                         }
2475 
2476                         ned2 = (EXTENT_DATA2*)newext->extent_data.data;
2477 
2478                         newext->extent_data.generation = Vcb->superblock.generation;
2479                         newext->extent_data.decoded_size = ed->decoded_size;
2480                         newext->extent_data.compression = ed->compression;
2481                         newext->extent_data.encryption = ed->encryption;
2482                         newext->extent_data.encoding = ed->encoding;
2483                         newext->extent_data.type = ed->type;
2484                         ned2->address = ed2->address;
2485                         ned2->size = ed2->size;
2486                         ned2->offset = ed2->offset + (end_data - ext->offset);
2487                         ned2->num_bytes = ed2->num_bytes - (end_data - ext->offset);
2488 
2489                         newext->offset = end_data;
2490                         newext->datalen = sizeof(EXTENT_DATA) - 1 + sizeof(EXTENT_DATA2);
2491                         newext->unique = ext->unique;
2492                         newext->ignore = false;
2493                         newext->inserted = true;
2494 
2495                         if (ext->csum) {
2496                             if (ed->compression == BTRFS_COMPRESSION_NONE) {
2497                                 newext->csum = ExAllocatePoolWithTag(PagedPool, (ULONG)((ned2->num_bytes * Vcb->csum_size) >> Vcb->sector_shift), ALLOC_TAG);
2498                                 if (!newext->csum) {
2499                                     ERR("out of memory\n");
2500                                     Status = STATUS_INSUFFICIENT_RESOURCES;
2501                                     ExFreePool(newext);
2502                                     goto end;
2503                                 }
2504 
2505                                 RtlCopyMemory(newext->csum, (uint8_t*)ext->csum + (((end_data - ext->offset) * Vcb->csum_size) >> Vcb->sector_shift),
2506                                               (ULONG)((ned2->num_bytes * Vcb->csum_size) >> Vcb->sector_shift));
2507                             } else {
2508                                 newext->csum = ExAllocatePoolWithTag(PagedPool, (ULONG)((ed2->size * Vcb->csum_size) >> Vcb->sector_shift), ALLOC_TAG);
2509                                 if (!newext->csum) {
2510                                     ERR("out of memory\n");
2511                                     Status = STATUS_INSUFFICIENT_RESOURCES;
2512                                     ExFreePool(newext);
2513                                     goto end;
2514                                 }
2515 
2516                                 RtlCopyMemory(newext->csum, ext->csum, (ULONG)((ed2->size * Vcb->csum_size) >> Vcb->sector_shift));
2517                             }
2518                         } else
2519                             newext->csum = NULL;
2520 
2521                         add_extent(fcb, &ext->list_entry, newext);
2522 
2523                         remove_fcb_extent(fcb, ext, rollback);
2524                     } else if (start_data > ext->offset && end_data >= ext->offset + len) { // remove end
2525                         EXTENT_DATA2* ned2;
2526                         extent* newext;
2527 
2528                         if (ed2->size != 0) {
2529                             fcb->inode_item.st_blocks -= ext->offset + len - start_data;
2530                             fcb->inode_item_changed = true;
2531                         }
2532 
2533                         newext = ExAllocatePoolWithTag(PagedPool, offsetof(extent, extent_data) + sizeof(EXTENT_DATA) - 1 + sizeof(EXTENT_DATA2), ALLOC_TAG);
2534                         if (!newext) {
2535                             ERR("out of memory\n");
2536                             Status = STATUS_INSUFFICIENT_RESOURCES;
2537                             goto end;
2538                         }
2539 
2540                         ned2 = (EXTENT_DATA2*)newext->extent_data.data;
2541 
2542                         newext->extent_data.generation = Vcb->superblock.generation;
2543                         newext->extent_data.decoded_size = ed->decoded_size;
2544                         newext->extent_data.compression = ed->compression;
2545                         newext->extent_data.encryption = ed->encryption;
2546                         newext->extent_data.encoding = ed->encoding;
2547                         newext->extent_data.type = ed->type;
2548                         ned2->address = ed2->address;
2549                         ned2->size = ed2->size;
2550                         ned2->offset = ed2->offset;
2551                         ned2->num_bytes = start_data - ext->offset;
2552 
2553                         newext->offset = ext->offset;
2554                         newext->datalen = sizeof(EXTENT_DATA) - 1 + sizeof(EXTENT_DATA2);
2555                         newext->unique = ext->unique;
2556                         newext->ignore = false;
2557                         newext->inserted = true;
2558 
2559                         if (ext->csum) {
2560                             if (ed->compression == BTRFS_COMPRESSION_NONE) {
2561                                 newext->csum = ExAllocatePoolWithTag(PagedPool, (ULONG)((ned2->num_bytes * Vcb->csum_size) >> Vcb->sector_shift), ALLOC_TAG);
2562                                 if (!newext->csum) {
2563                                     ERR("out of memory\n");
2564                                     Status = STATUS_INSUFFICIENT_RESOURCES;
2565                                     ExFreePool(newext);
2566                                     goto end;
2567                                 }
2568 
2569                                 RtlCopyMemory(newext->csum, ext->csum, (ULONG)((ned2->num_bytes * Vcb->csum_size) >> Vcb->sector_shift));
2570                             } else {
2571                                 newext->csum = ExAllocatePoolWithTag(PagedPool, (ULONG)((ed2->size * Vcb->csum_size) >> Vcb->sector_shift), ALLOC_TAG);
2572                                 if (!newext->csum) {
2573                                     ERR("out of memory\n");
2574                                     Status = STATUS_INSUFFICIENT_RESOURCES;
2575                                     ExFreePool(newext);
2576                                     goto end;
2577                                 }
2578 
2579                                 RtlCopyMemory(newext->csum, ext->csum, (ULONG)((ed2->size * Vcb->csum_size) >> Vcb->sector_shift));
2580                             }
2581                         } else
2582                             newext->csum = NULL;
2583 
2584                         InsertHeadList(&ext->list_entry, &newext->list_entry);
2585 
2586                         remove_fcb_extent(fcb, ext, rollback);
2587                     } else if (start_data > ext->offset && end_data < ext->offset + len) { // remove middle
2588                         EXTENT_DATA2 *neda2, *nedb2;
2589                         extent *newext1, *newext2;
2590 
2591                         if (ed2->size != 0) {
2592                             chunk* c;
2593 
2594                             fcb->inode_item.st_blocks -= end_data - start_data;
2595                             fcb->inode_item_changed = true;
2596 
2597                             c = get_chunk_from_address(Vcb, ed2->address);
2598 
2599                             if (!c) {
2600                                 ERR("get_chunk_from_address(%I64x) failed\n", ed2->address);
2601                             } else {
2602                                 Status = update_changed_extent_ref(Vcb, c, ed2->address, ed2->size, fcb->subvol->id, fcb->inode, ext->offset - ed2->offset, 1,
2603                                                                    fcb->inode_item.flags & BTRFS_INODE_NODATASUM, false, Irp);
2604                                 if (!NT_SUCCESS(Status)) {
2605                                     ERR("update_changed_extent_ref returned %08lx\n", Status);
2606                                     goto end;
2607                                 }
2608                             }
2609                         }
2610 
2611                         newext1 = ExAllocatePoolWithTag(PagedPool, offsetof(extent, extent_data) + sizeof(EXTENT_DATA) - 1 + sizeof(EXTENT_DATA2), ALLOC_TAG);
2612                         if (!newext1) {
2613                             ERR("out of memory\n");
2614                             Status = STATUS_INSUFFICIENT_RESOURCES;
2615                             goto end;
2616                         }
2617 
2618                         newext2 = ExAllocatePoolWithTag(PagedPool, offsetof(extent, extent_data) + sizeof(EXTENT_DATA) - 1 + sizeof(EXTENT_DATA2), ALLOC_TAG);
2619                         if (!newext2) {
2620                             ERR("out of memory\n");
2621                             Status = STATUS_INSUFFICIENT_RESOURCES;
2622                             ExFreePool(newext1);
2623                             goto end;
2624                         }
2625 
2626                         neda2 = (EXTENT_DATA2*)newext1->extent_data.data;
2627 
2628                         newext1->extent_data.generation = Vcb->superblock.generation;
2629                         newext1->extent_data.decoded_size = ed->decoded_size;
2630                         newext1->extent_data.compression = ed->compression;
2631                         newext1->extent_data.encryption = ed->encryption;
2632                         newext1->extent_data.encoding = ed->encoding;
2633                         newext1->extent_data.type = ed->type;
2634                         neda2->address = ed2->address;
2635                         neda2->size = ed2->size;
2636                         neda2->offset = ed2->offset;
2637                         neda2->num_bytes = start_data - ext->offset;
2638 
2639                         nedb2 = (EXTENT_DATA2*)newext2->extent_data.data;
2640 
2641                         newext2->extent_data.generation = Vcb->superblock.generation;
2642                         newext2->extent_data.decoded_size = ed->decoded_size;
2643                         newext2->extent_data.compression = ed->compression;
2644                         newext2->extent_data.encryption = ed->encryption;
2645                         newext2->extent_data.encoding = ed->encoding;
2646                         newext2->extent_data.type = ed->type;
2647                         nedb2->address = ed2->address;
2648                         nedb2->size = ed2->size;
2649                         nedb2->offset = ed2->offset + (end_data - ext->offset);
2650                         nedb2->num_bytes = ext->offset + len - end_data;
2651 
2652                         newext1->offset = ext->offset;
2653                         newext1->datalen = sizeof(EXTENT_DATA) - 1 + sizeof(EXTENT_DATA2);
2654                         newext1->unique = ext->unique;
2655                         newext1->ignore = false;
2656                         newext1->inserted = true;
2657 
2658                         newext2->offset = end_data;
2659                         newext2->datalen = sizeof(EXTENT_DATA) - 1 + sizeof(EXTENT_DATA2);
2660                         newext2->unique = ext->unique;
2661                         newext2->ignore = false;
2662                         newext2->inserted = true;
2663 
2664                         if (ext->csum) {
2665                             if (ed->compression == BTRFS_COMPRESSION_NONE) {
2666                                 newext1->csum = ExAllocatePoolWithTag(PagedPool, (ULONG)((neda2->num_bytes * Vcb->csum_size) >> Vcb->sector_shift), ALLOC_TAG);
2667                                 if (!newext1->csum) {
2668                                     ERR("out of memory\n");
2669                                     Status = STATUS_INSUFFICIENT_RESOURCES;
2670                                     ExFreePool(newext1);
2671                                     ExFreePool(newext2);
2672                                     goto end;
2673                                 }
2674 
2675                                 newext2->csum = ExAllocatePoolWithTag(PagedPool, (ULONG)((nedb2->num_bytes * Vcb->csum_size) >> Vcb->sector_shift), ALLOC_TAG);
2676                                 if (!newext2->csum) {
2677                                     ERR("out of memory\n");
2678                                     Status = STATUS_INSUFFICIENT_RESOURCES;
2679                                     ExFreePool(newext1->csum);
2680                                     ExFreePool(newext1);
2681                                     ExFreePool(newext2);
2682                                     goto end;
2683                                 }
2684 
2685                                 RtlCopyMemory(newext1->csum, ext->csum, (ULONG)((neda2->num_bytes * Vcb->csum_size) >> Vcb->sector_shift));
2686                                 RtlCopyMemory(newext2->csum, (uint8_t*)ext->csum + (((end_data - ext->offset) * Vcb->csum_size) >> Vcb->sector_shift),
2687                                               (ULONG)((nedb2->num_bytes * Vcb->csum_size) >> Vcb->sector_shift));
2688                             } else {
2689                                 newext1->csum = ExAllocatePoolWithTag(PagedPool, (ULONG)((ed2->size * Vcb->csum_size) >> Vcb->sector_shift), ALLOC_TAG);
2690                                 if (!newext1->csum) {
2691                                     ERR("out of memory\n");
2692                                     Status = STATUS_INSUFFICIENT_RESOURCES;
2693                                     ExFreePool(newext1);
2694                                     ExFreePool(newext2);
2695                                     goto end;
2696                                 }
2697 
2698                                 newext2->csum = ExAllocatePoolWithTag(PagedPool, (ULONG)((ed2->size * Vcb->csum_size) >> Vcb->sector_shift), ALLOC_TAG);
2699                                 if (!newext2->csum) {
2700                                     ERR("out of memory\n");
2701                                     Status = STATUS_INSUFFICIENT_RESOURCES;
2702                                     ExFreePool(newext1->csum);
2703                                     ExFreePool(newext1);
2704                                     ExFreePool(newext2);
2705                                     goto end;
2706                                 }
2707 
2708                                 RtlCopyMemory(newext1->csum, ext->csum, (ULONG)((ed2->size * Vcb->csum_size) >> Vcb->sector_shift));
2709                                 RtlCopyMemory(newext2->csum, ext->csum, (ULONG)((ed2->size * Vcb->csum_size) >> Vcb->sector_shift));
2710                             }
2711                         } else {
2712                             newext1->csum = NULL;
2713                             newext2->csum = NULL;
2714                         }
2715 
2716                         InsertHeadList(&ext->list_entry, &newext1->list_entry);
2717                         add_extent(fcb, &newext1->list_entry, newext2);
2718 
2719                         remove_fcb_extent(fcb, ext, rollback);
2720                     }
2721                 }
2722             }
2723         }
2724 
2725         le = le2;
2726     }
2727 
2728     Status = STATUS_SUCCESS;
2729 
2730 end:
2731     fcb->extents_changed = true;
2732     mark_fcb_dirty(fcb);
2733 
2734     return Status;
2735 }
2736 
2737 __attribute__((nonnull(1,2,3)))
add_insert_extent_rollback(LIST_ENTRY * rollback,fcb * fcb,extent * ext)2738 static void add_insert_extent_rollback(LIST_ENTRY* rollback, fcb* fcb, extent* ext) {
2739     rollback_extent* re;
2740 
2741     re = ExAllocatePoolWithTag(NonPagedPool, sizeof(rollback_extent), ALLOC_TAG);
2742     if (!re) {
2743         ERR("out of memory\n");
2744         return;
2745     }
2746 
2747     re->fcb = fcb;
2748     re->ext = ext;
2749 
2750     add_rollback(rollback, ROLLBACK_INSERT_EXTENT, re);
2751 }
2752 
2753 #ifdef _MSC_VER
2754 #pragma warning(push)
2755 #pragma warning(suppress: 28194)
2756 #endif
2757 __attribute__((nonnull(1,3,7)))
2758 NTSTATUS add_extent_to_fcb(_In_ fcb* fcb, _In_ uint64_t offset, _In_reads_bytes_(edsize) EXTENT_DATA* ed, _In_ uint16_t edsize,
2759                            _In_ bool unique, _In_opt_ _When_(return >= 0, __drv_aliasesMem) void* csum, _In_ LIST_ENTRY* rollback) {
2760     extent* ext;
2761     LIST_ENTRY* le;
2762 
2763     ext = ExAllocatePoolWithTag(PagedPool, offsetof(extent, extent_data) + edsize, ALLOC_TAG);
2764     if (!ext) {
2765         ERR("out of memory\n");
2766         return STATUS_INSUFFICIENT_RESOURCES;
2767     }
2768 
2769     ext->offset = offset;
2770     ext->datalen = edsize;
2771     ext->unique = unique;
2772     ext->ignore = false;
2773     ext->inserted = true;
2774     ext->csum = csum;
2775 
2776     RtlCopyMemory(&ext->extent_data, ed, edsize);
2777 
2778     le = fcb->extents.Flink;
2779     while (le != &fcb->extents) {
2780         extent* oldext = CONTAINING_RECORD(le, extent, list_entry);
2781 
2782         if (oldext->offset >= offset) {
2783             InsertHeadList(le->Blink, &ext->list_entry);
2784             goto end;
2785         }
2786 
2787         le = le->Flink;
2788     }
2789 
2790     InsertTailList(&fcb->extents, &ext->list_entry);
2791 
2792 end:
2793     add_insert_extent_rollback(rollback, fcb, ext);
2794 
2795     return STATUS_SUCCESS;
2796 }
2797 #ifdef _MSC_VER
2798 #pragma warning(pop)
2799 #endif
2800 
2801 __attribute__((nonnull(1, 2, 3)))
remove_fcb_extent(fcb * fcb,extent * ext,LIST_ENTRY * rollback)2802 static void remove_fcb_extent(fcb* fcb, extent* ext, LIST_ENTRY* rollback) {
2803     if (!ext->ignore) {
2804         rollback_extent* re;
2805 
2806         ext->ignore = true;
2807 
2808         re = ExAllocatePoolWithTag(NonPagedPool, sizeof(rollback_extent), ALLOC_TAG);
2809         if (!re) {
2810             ERR("out of memory\n");
2811             return;
2812         }
2813 
2814         re->fcb = fcb;
2815         re->ext = ext;
2816 
2817         add_rollback(rollback, ROLLBACK_DELETE_EXTENT, re);
2818     }
2819 }
2820 
2821 _Requires_lock_held_(c->lock)
2822 _When_(return != 0, _Releases_lock_(c->lock))
2823 __attribute__((nonnull(1,2,3,9)))
insert_extent_chunk(_In_ device_extension * Vcb,_In_ fcb * fcb,_In_ chunk * c,_In_ uint64_t start_data,_In_ uint64_t length,_In_ bool prealloc,_In_opt_ void * data,_In_opt_ PIRP Irp,_In_ LIST_ENTRY * rollback,_In_ uint8_t compression,_In_ uint64_t decoded_size,_In_ bool file_write,_In_ uint64_t irp_offset)2824 bool insert_extent_chunk(_In_ device_extension* Vcb, _In_ fcb* fcb, _In_ chunk* c, _In_ uint64_t start_data, _In_ uint64_t length, _In_ bool prealloc, _In_opt_ void* data,
2825                          _In_opt_ PIRP Irp, _In_ LIST_ENTRY* rollback, _In_ uint8_t compression, _In_ uint64_t decoded_size, _In_ bool file_write, _In_ uint64_t irp_offset) {
2826     uint64_t address;
2827     NTSTATUS Status;
2828     EXTENT_DATA* ed;
2829     EXTENT_DATA2* ed2;
2830     uint16_t edsize = (uint16_t)(offsetof(EXTENT_DATA, data[0]) + sizeof(EXTENT_DATA2));
2831     void* csum = NULL;
2832 
2833     TRACE("(%p, (%I64x, %I64x), %I64x, %I64x, %I64x, %u, %p, %p)\n", Vcb, fcb->subvol->id, fcb->inode, c->offset, start_data, length, prealloc, data, rollback);
2834 
2835     if (!find_data_address_in_chunk(Vcb, c, length, &address))
2836         return false;
2837 
2838     // add extent data to inode
2839     ed = ExAllocatePoolWithTag(PagedPool, edsize, ALLOC_TAG);
2840     if (!ed) {
2841         ERR("out of memory\n");
2842         return false;
2843     }
2844 
2845     ed->generation = Vcb->superblock.generation;
2846     ed->decoded_size = decoded_size;
2847     ed->compression = compression;
2848     ed->encryption = BTRFS_ENCRYPTION_NONE;
2849     ed->encoding = BTRFS_ENCODING_NONE;
2850     ed->type = prealloc ? EXTENT_TYPE_PREALLOC : EXTENT_TYPE_REGULAR;
2851 
2852     ed2 = (EXTENT_DATA2*)ed->data;
2853     ed2->address = address;
2854     ed2->size = length;
2855     ed2->offset = 0;
2856     ed2->num_bytes = decoded_size;
2857 
2858     if (!prealloc && data && !(fcb->inode_item.flags & BTRFS_INODE_NODATASUM)) {
2859         ULONG sl = (ULONG)(length >> Vcb->sector_shift);
2860 
2861         csum = ExAllocatePoolWithTag(PagedPool, sl * Vcb->csum_size, ALLOC_TAG);
2862         if (!csum) {
2863             ERR("out of memory\n");
2864             ExFreePool(ed);
2865             return false;
2866         }
2867 
2868         do_calc_job(Vcb, data, sl, csum);
2869     }
2870 
2871     Status = add_extent_to_fcb(fcb, start_data, ed, edsize, true, csum, rollback);
2872     if (!NT_SUCCESS(Status)) {
2873         ERR("add_extent_to_fcb returned %08lx\n", Status);
2874         if (csum) ExFreePool(csum);
2875         ExFreePool(ed);
2876         return false;
2877     }
2878 
2879     ExFreePool(ed);
2880 
2881     c->used += length;
2882     space_list_subtract(c, address, length, rollback);
2883 
2884     fcb->inode_item.st_blocks += decoded_size;
2885 
2886     fcb->extents_changed = true;
2887     fcb->inode_item_changed = true;
2888     mark_fcb_dirty(fcb);
2889 
2890     ExAcquireResourceExclusiveLite(&c->changed_extents_lock, true);
2891 
2892     add_changed_extent_ref(c, address, length, fcb->subvol->id, fcb->inode, start_data, 1, fcb->inode_item.flags & BTRFS_INODE_NODATASUM);
2893 
2894     ExReleaseResourceLite(&c->changed_extents_lock);
2895 
2896     release_chunk_lock(c, Vcb);
2897 
2898     if (data) {
2899         Status = write_data_complete(Vcb, address, data, (uint32_t)length, Irp, NULL, file_write, irp_offset,
2900                                      fcb->Header.Flags2 & FSRTL_FLAG2_IS_PAGING_FILE ? HighPagePriority : NormalPagePriority);
2901         if (!NT_SUCCESS(Status))
2902             ERR("write_data_complete returned %08lx\n", Status);
2903     }
2904 
2905     return true;
2906 }
2907 
2908 __attribute__((nonnull(1,2,5,7,10)))
try_extend_data(device_extension * Vcb,fcb * fcb,uint64_t start_data,uint64_t length,void * data,PIRP Irp,uint64_t * written,bool file_write,uint64_t irp_offset,LIST_ENTRY * rollback)2909 static bool try_extend_data(device_extension* Vcb, fcb* fcb, uint64_t start_data, uint64_t length, void* data,
2910                             PIRP Irp, uint64_t* written, bool file_write, uint64_t irp_offset, LIST_ENTRY* rollback) {
2911     bool success = false;
2912     EXTENT_DATA* ed;
2913     EXTENT_DATA2* ed2;
2914     chunk* c;
2915     LIST_ENTRY* le;
2916     extent* ext = NULL;
2917 
2918     le = fcb->extents.Flink;
2919 
2920     while (le != &fcb->extents) {
2921         extent* nextext = CONTAINING_RECORD(le, extent, list_entry);
2922 
2923         if (!nextext->ignore) {
2924             if (nextext->offset == start_data) {
2925                 ext = nextext;
2926                 break;
2927             } else if (nextext->offset > start_data)
2928                 break;
2929 
2930             ext = nextext;
2931         }
2932 
2933         le = le->Flink;
2934     }
2935 
2936     if (!ext)
2937         return false;
2938 
2939     ed = &ext->extent_data;
2940 
2941     if (ed->type != EXTENT_TYPE_REGULAR && ed->type != EXTENT_TYPE_PREALLOC) {
2942         TRACE("not extending extent which is not regular or prealloc\n");
2943         return false;
2944     }
2945 
2946     ed2 = (EXTENT_DATA2*)ed->data;
2947 
2948     if (ext->offset + ed2->num_bytes != start_data) {
2949         TRACE("last EXTENT_DATA does not run up to start_data (%I64x + %I64x != %I64x)\n", ext->offset, ed2->num_bytes, start_data);
2950         return false;
2951     }
2952 
2953     c = get_chunk_from_address(Vcb, ed2->address);
2954 
2955     if (c->reloc || c->readonly || c->chunk_item->type != Vcb->data_flags)
2956         return false;
2957 
2958     acquire_chunk_lock(c, Vcb);
2959 
2960     if (length > c->chunk_item->size - c->used) {
2961         release_chunk_lock(c, Vcb);
2962         return false;
2963     }
2964 
2965     if (!c->cache_loaded) {
2966         NTSTATUS Status = load_cache_chunk(Vcb, c, NULL);
2967 
2968         if (!NT_SUCCESS(Status)) {
2969             ERR("load_cache_chunk returned %08lx\n", Status);
2970             release_chunk_lock(c, Vcb);
2971             return false;
2972         }
2973     }
2974 
2975     le = c->space.Flink;
2976     while (le != &c->space) {
2977         space* s = CONTAINING_RECORD(le, space, list_entry);
2978 
2979         if (s->address == ed2->address + ed2->size) {
2980             uint64_t newlen = min(min(s->size, length), MAX_EXTENT_SIZE);
2981 
2982             success = insert_extent_chunk(Vcb, fcb, c, start_data, newlen, false, data, Irp, rollback, BTRFS_COMPRESSION_NONE, newlen, file_write, irp_offset);
2983 
2984             if (success)
2985                 *written += newlen;
2986             else
2987                 release_chunk_lock(c, Vcb);
2988 
2989             return success;
2990         } else if (s->address > ed2->address + ed2->size)
2991             break;
2992 
2993         le = le->Flink;
2994     }
2995 
2996     release_chunk_lock(c, Vcb);
2997 
2998     return false;
2999 }
3000 
3001 __attribute__((nonnull(1)))
insert_chunk_fragmented(fcb * fcb,uint64_t start,uint64_t length,uint8_t * data,bool prealloc,LIST_ENTRY * rollback)3002 static NTSTATUS insert_chunk_fragmented(fcb* fcb, uint64_t start, uint64_t length, uint8_t* data, bool prealloc, LIST_ENTRY* rollback) {
3003     LIST_ENTRY* le;
3004     uint64_t flags = fcb->Vcb->data_flags;
3005     bool page_file = fcb->Header.Flags2 & FSRTL_FLAG2_IS_PAGING_FILE;
3006     NTSTATUS Status;
3007     chunk* c;
3008 
3009     ExAcquireResourceSharedLite(&fcb->Vcb->chunk_lock, true);
3010 
3011     // first create as many chunks as we can
3012     do {
3013         Status = alloc_chunk(fcb->Vcb, flags, &c, false);
3014     } while (NT_SUCCESS(Status));
3015 
3016     if (Status != STATUS_DISK_FULL) {
3017         ERR("alloc_chunk returned %08lx\n", Status);
3018         ExReleaseResourceLite(&fcb->Vcb->chunk_lock);
3019         return Status;
3020     }
3021 
3022     le = fcb->Vcb->chunks.Flink;
3023     while (le != &fcb->Vcb->chunks) {
3024         c = CONTAINING_RECORD(le, chunk, list_entry);
3025 
3026         if (!c->readonly && !c->reloc) {
3027             acquire_chunk_lock(c, fcb->Vcb);
3028 
3029             if (c->chunk_item->type == flags) {
3030                 while (!IsListEmpty(&c->space_size) && length > 0) {
3031                     space* s = CONTAINING_RECORD(c->space_size.Flink, space, list_entry_size);
3032                     uint64_t extlen = min(length, s->size);
3033 
3034                     if (insert_extent_chunk(fcb->Vcb, fcb, c, start, extlen, prealloc && !page_file, data, NULL, rollback, BTRFS_COMPRESSION_NONE, extlen, false, 0)) {
3035                         start += extlen;
3036                         length -= extlen;
3037                         if (data) data += extlen;
3038 
3039                         acquire_chunk_lock(c, fcb->Vcb);
3040                     }
3041                 }
3042             }
3043 
3044             release_chunk_lock(c, fcb->Vcb);
3045 
3046             if (length == 0)
3047                 break;
3048         }
3049 
3050         le = le->Flink;
3051     }
3052 
3053     ExReleaseResourceLite(&fcb->Vcb->chunk_lock);
3054 
3055     return length == 0 ? STATUS_SUCCESS : STATUS_DISK_FULL;
3056 }
3057 
3058 __attribute__((nonnull(1,4)))
insert_prealloc_extent(fcb * fcb,uint64_t start,uint64_t length,LIST_ENTRY * rollback)3059 static NTSTATUS insert_prealloc_extent(fcb* fcb, uint64_t start, uint64_t length, LIST_ENTRY* rollback) {
3060     LIST_ENTRY* le;
3061     chunk* c;
3062     uint64_t flags;
3063     NTSTATUS Status;
3064     bool page_file = fcb->Header.Flags2 & FSRTL_FLAG2_IS_PAGING_FILE;
3065 
3066     flags = fcb->Vcb->data_flags;
3067 
3068     do {
3069         uint64_t extlen = min(MAX_EXTENT_SIZE, length);
3070 
3071         ExAcquireResourceSharedLite(&fcb->Vcb->chunk_lock, true);
3072 
3073         le = fcb->Vcb->chunks.Flink;
3074         while (le != &fcb->Vcb->chunks) {
3075             c = CONTAINING_RECORD(le, chunk, list_entry);
3076 
3077             if (!c->readonly && !c->reloc) {
3078                 acquire_chunk_lock(c, fcb->Vcb);
3079 
3080                 if (c->chunk_item->type == flags && (c->chunk_item->size - c->used) >= extlen) {
3081                     if (insert_extent_chunk(fcb->Vcb, fcb, c, start, extlen, !page_file, NULL, NULL, rollback, BTRFS_COMPRESSION_NONE, extlen, false, 0)) {
3082                         ExReleaseResourceLite(&fcb->Vcb->chunk_lock);
3083                         goto cont;
3084                     }
3085                 }
3086 
3087                 release_chunk_lock(c, fcb->Vcb);
3088             }
3089 
3090             le = le->Flink;
3091         }
3092 
3093         ExReleaseResourceLite(&fcb->Vcb->chunk_lock);
3094 
3095         ExAcquireResourceExclusiveLite(&fcb->Vcb->chunk_lock, true);
3096 
3097         Status = alloc_chunk(fcb->Vcb, flags, &c, false);
3098 
3099         ExReleaseResourceLite(&fcb->Vcb->chunk_lock);
3100 
3101         if (!NT_SUCCESS(Status)) {
3102             ERR("alloc_chunk returned %08lx\n", Status);
3103             goto end;
3104         }
3105 
3106         acquire_chunk_lock(c, fcb->Vcb);
3107 
3108         if (c->chunk_item->type == flags && (c->chunk_item->size - c->used) >= extlen) {
3109             if (insert_extent_chunk(fcb->Vcb, fcb, c, start, extlen, !page_file, NULL, NULL, rollback, BTRFS_COMPRESSION_NONE, extlen, false, 0))
3110                 goto cont;
3111         }
3112 
3113         release_chunk_lock(c, fcb->Vcb);
3114 
3115         Status = insert_chunk_fragmented(fcb, start, length, NULL, true, rollback);
3116         if (!NT_SUCCESS(Status))
3117             ERR("insert_chunk_fragmented returned %08lx\n", Status);
3118 
3119         goto end;
3120 
3121 cont:
3122         length -= extlen;
3123         start += extlen;
3124     } while (length > 0);
3125 
3126     Status = STATUS_SUCCESS;
3127 
3128 end:
3129     return Status;
3130 }
3131 
3132 __attribute__((nonnull(1,2,5,9)))
insert_extent(device_extension * Vcb,fcb * fcb,uint64_t start_data,uint64_t length,void * data,PIRP Irp,bool file_write,uint64_t irp_offset,LIST_ENTRY * rollback)3133 static NTSTATUS insert_extent(device_extension* Vcb, fcb* fcb, uint64_t start_data, uint64_t length, void* data,
3134                               PIRP Irp, bool file_write, uint64_t irp_offset, LIST_ENTRY* rollback) {
3135     NTSTATUS Status;
3136     LIST_ENTRY* le;
3137     chunk* c;
3138     uint64_t flags, orig_length = length, written = 0;
3139 
3140     TRACE("(%p, (%I64x, %I64x), %I64x, %I64x, %p)\n", Vcb, fcb->subvol->id, fcb->inode, start_data, length, data);
3141 
3142     if (start_data > 0) {
3143         try_extend_data(Vcb, fcb, start_data, length, data, Irp, &written, file_write, irp_offset, rollback);
3144 
3145         if (written == length)
3146             return STATUS_SUCCESS;
3147         else if (written > 0) {
3148             start_data += written;
3149             irp_offset += written;
3150             length -= written;
3151             data = &((uint8_t*)data)[written];
3152         }
3153     }
3154 
3155     flags = Vcb->data_flags;
3156 
3157     while (written < orig_length) {
3158         uint64_t newlen = min(length, MAX_EXTENT_SIZE);
3159         bool done = false;
3160 
3161         // Rather than necessarily writing the whole extent at once, we deal with it in blocks of 128 MB.
3162         // First, see if we can write the extent part to an existing chunk.
3163 
3164         ExAcquireResourceSharedLite(&Vcb->chunk_lock, true);
3165 
3166         le = Vcb->chunks.Flink;
3167         while (le != &Vcb->chunks) {
3168             c = CONTAINING_RECORD(le, chunk, list_entry);
3169 
3170             if (!c->readonly && !c->reloc) {
3171                 acquire_chunk_lock(c, Vcb);
3172 
3173                 if (c->chunk_item->type == flags && (c->chunk_item->size - c->used) >= newlen &&
3174                     insert_extent_chunk(Vcb, fcb, c, start_data, newlen, false, data, Irp, rollback, BTRFS_COMPRESSION_NONE, newlen, file_write, irp_offset)) {
3175                     written += newlen;
3176 
3177                     if (written == orig_length) {
3178                         ExReleaseResourceLite(&Vcb->chunk_lock);
3179                         return STATUS_SUCCESS;
3180                     } else {
3181                         done = true;
3182                         start_data += newlen;
3183                         irp_offset += newlen;
3184                         length -= newlen;
3185                         data = &((uint8_t*)data)[newlen];
3186                         break;
3187                     }
3188                 } else
3189                     release_chunk_lock(c, Vcb);
3190             }
3191 
3192             le = le->Flink;
3193         }
3194 
3195         ExReleaseResourceLite(&Vcb->chunk_lock);
3196 
3197         if (done) continue;
3198 
3199         // Otherwise, see if we can put it in a new chunk.
3200 
3201         ExAcquireResourceExclusiveLite(&Vcb->chunk_lock, true);
3202 
3203         Status = alloc_chunk(Vcb, flags, &c, false);
3204 
3205         ExReleaseResourceLite(&Vcb->chunk_lock);
3206 
3207         if (!NT_SUCCESS(Status)) {
3208             ERR("alloc_chunk returned %08lx\n", Status);
3209             return Status;
3210         }
3211 
3212         if (c) {
3213             acquire_chunk_lock(c, Vcb);
3214 
3215             if (c->chunk_item->type == flags && (c->chunk_item->size - c->used) >= newlen &&
3216                 insert_extent_chunk(Vcb, fcb, c, start_data, newlen, false, data, Irp, rollback, BTRFS_COMPRESSION_NONE, newlen, file_write, irp_offset)) {
3217                 written += newlen;
3218 
3219                 if (written == orig_length)
3220                     return STATUS_SUCCESS;
3221                 else {
3222                     done = true;
3223                     start_data += newlen;
3224                     irp_offset += newlen;
3225                     length -= newlen;
3226                     data = &((uint8_t*)data)[newlen];
3227                 }
3228             } else
3229                 release_chunk_lock(c, Vcb);
3230         }
3231 
3232         if (!done) {
3233             Status = insert_chunk_fragmented(fcb, start_data, length, data, false, rollback);
3234             if (!NT_SUCCESS(Status))
3235                 ERR("insert_chunk_fragmented returned %08lx\n", Status);
3236 
3237             return Status;
3238         }
3239     }
3240 
3241     return STATUS_DISK_FULL;
3242 }
3243 
3244 __attribute__((nonnull(1,4)))
truncate_file(fcb * fcb,uint64_t end,PIRP Irp,LIST_ENTRY * rollback)3245 NTSTATUS truncate_file(fcb* fcb, uint64_t end, PIRP Irp, LIST_ENTRY* rollback) {
3246     NTSTATUS Status;
3247 
3248     // FIXME - convert into inline extent if short enough
3249 
3250     if (end > 0 && fcb_is_inline(fcb)) {
3251         uint8_t* buf;
3252         bool make_inline = end <= fcb->Vcb->options.max_inline;
3253 
3254         buf = ExAllocatePoolWithTag(PagedPool, (ULONG)(make_inline ? (offsetof(EXTENT_DATA, data[0]) + end) : sector_align(end, fcb->Vcb->superblock.sector_size)), ALLOC_TAG);
3255         if (!buf) {
3256             ERR("out of memory\n");
3257             return STATUS_INSUFFICIENT_RESOURCES;
3258         }
3259 
3260         Status = read_file(fcb, make_inline ? (buf + offsetof(EXTENT_DATA, data[0])) : buf, 0, end, NULL, Irp);
3261         if (!NT_SUCCESS(Status)) {
3262             ERR("read_file returned %08lx\n", Status);
3263             ExFreePool(buf);
3264             return Status;
3265         }
3266 
3267         Status = excise_extents(fcb->Vcb, fcb, 0, fcb->inode_item.st_size, Irp, rollback);
3268         if (!NT_SUCCESS(Status)) {
3269             ERR("excise_extents returned %08lx\n", Status);
3270             ExFreePool(buf);
3271             return Status;
3272         }
3273 
3274         if (!make_inline) {
3275             RtlZeroMemory(buf + end, (ULONG)(sector_align(end, fcb->Vcb->superblock.sector_size) - end));
3276 
3277             Status = do_write_file(fcb, 0, sector_align(end, fcb->Vcb->superblock.sector_size), buf, Irp, false, 0, rollback);
3278             if (!NT_SUCCESS(Status)) {
3279                 ERR("do_write_file returned %08lx\n", Status);
3280                 ExFreePool(buf);
3281                 return Status;
3282             }
3283         } else {
3284             EXTENT_DATA* ed = (EXTENT_DATA*)buf;
3285 
3286             ed->generation = fcb->Vcb->superblock.generation;
3287             ed->decoded_size = end;
3288             ed->compression = BTRFS_COMPRESSION_NONE;
3289             ed->encryption = BTRFS_ENCRYPTION_NONE;
3290             ed->encoding = BTRFS_ENCODING_NONE;
3291             ed->type = EXTENT_TYPE_INLINE;
3292 
3293             Status = add_extent_to_fcb(fcb, 0, ed, (uint16_t)(offsetof(EXTENT_DATA, data[0]) + end), false, NULL, rollback);
3294             if (!NT_SUCCESS(Status)) {
3295                 ERR("add_extent_to_fcb returned %08lx\n", Status);
3296                 ExFreePool(buf);
3297                 return Status;
3298             }
3299 
3300             fcb->inode_item.st_blocks += end;
3301 
3302             fcb->inode_item.st_size = end;
3303             fcb->inode_item_changed = true;
3304             TRACE("setting st_size to %I64x\n", end);
3305 
3306             fcb->Header.AllocationSize.QuadPart = sector_align(fcb->inode_item.st_size, fcb->Vcb->superblock.sector_size);
3307             fcb->Header.FileSize.QuadPart = fcb->inode_item.st_size;
3308             fcb->Header.ValidDataLength.QuadPart = fcb->inode_item.st_size;
3309         }
3310 
3311         ExFreePool(buf);
3312         return STATUS_SUCCESS;
3313     }
3314 
3315     Status = excise_extents(fcb->Vcb, fcb, sector_align(end, fcb->Vcb->superblock.sector_size),
3316                             sector_align(fcb->inode_item.st_size, fcb->Vcb->superblock.sector_size), Irp, rollback);
3317     if (!NT_SUCCESS(Status)) {
3318         ERR("excise_extents returned %08lx\n", Status);
3319         return Status;
3320     }
3321 
3322     fcb->inode_item.st_size = end;
3323     fcb->inode_item_changed = true;
3324     TRACE("setting st_size to %I64x\n", end);
3325 
3326     fcb->Header.AllocationSize.QuadPart = sector_align(fcb->inode_item.st_size, fcb->Vcb->superblock.sector_size);
3327     fcb->Header.FileSize.QuadPart = fcb->inode_item.st_size;
3328     fcb->Header.ValidDataLength.QuadPart = fcb->inode_item.st_size;
3329     // FIXME - inform cache manager of this
3330 
3331     TRACE("fcb %p FileSize = %I64x\n", fcb, fcb->Header.FileSize.QuadPart);
3332 
3333     return STATUS_SUCCESS;
3334 }
3335 
3336 __attribute__((nonnull(1,6)))
extend_file(fcb * fcb,file_ref * fileref,uint64_t end,bool prealloc,PIRP Irp,LIST_ENTRY * rollback)3337 NTSTATUS extend_file(fcb* fcb, file_ref* fileref, uint64_t end, bool prealloc, PIRP Irp, LIST_ENTRY* rollback) {
3338     uint64_t oldalloc, newalloc;
3339     bool cur_inline;
3340     NTSTATUS Status;
3341 
3342     TRACE("(%p, %p, %I64x, %u)\n", fcb, fileref, end, prealloc);
3343 
3344     if (fcb->ads) {
3345         if (end > 0xffff)
3346             return STATUS_DISK_FULL;
3347 
3348         return stream_set_end_of_file_information(fcb->Vcb, (uint16_t)end, fcb, fileref, false);
3349     } else {
3350         extent* ext = NULL;
3351         LIST_ENTRY* le;
3352 
3353         le = fcb->extents.Blink;
3354         while (le != &fcb->extents) {
3355             extent* ext2 = CONTAINING_RECORD(le, extent, list_entry);
3356 
3357             if (!ext2->ignore) {
3358                 ext = ext2;
3359                 break;
3360             }
3361 
3362             le = le->Blink;
3363         }
3364 
3365         oldalloc = 0;
3366         if (ext) {
3367             EXTENT_DATA* ed = &ext->extent_data;
3368             EXTENT_DATA2* ed2 = (EXTENT_DATA2*)ed->data;
3369 
3370             oldalloc = ext->offset + (ed->type == EXTENT_TYPE_INLINE ? ed->decoded_size : ed2->num_bytes);
3371             cur_inline = ed->type == EXTENT_TYPE_INLINE;
3372 
3373             if (cur_inline && end > fcb->Vcb->options.max_inline) {
3374                 uint64_t origlength, length;
3375                 uint8_t* data;
3376 
3377                 TRACE("giving inline file proper extents\n");
3378 
3379                 origlength = ed->decoded_size;
3380 
3381                 cur_inline = false;
3382 
3383                 length = sector_align(origlength, fcb->Vcb->superblock.sector_size);
3384 
3385                 data = ExAllocatePoolWithTag(PagedPool, (ULONG)length, ALLOC_TAG);
3386                 if (!data) {
3387                     ERR("could not allocate %I64x bytes for data\n", length);
3388                     return STATUS_INSUFFICIENT_RESOURCES;
3389                 }
3390 
3391                 Status = read_file(fcb, data, 0, origlength, NULL, Irp);
3392                 if (!NT_SUCCESS(Status)) {
3393                     ERR("read_file returned %08lx\n", Status);
3394                     ExFreePool(data);
3395                     return Status;
3396                 }
3397 
3398                 RtlZeroMemory(data + origlength, (ULONG)(length - origlength));
3399 
3400                 Status = excise_extents(fcb->Vcb, fcb, 0, fcb->inode_item.st_size, Irp, rollback);
3401                 if (!NT_SUCCESS(Status)) {
3402                     ERR("excise_extents returned %08lx\n", Status);
3403                     ExFreePool(data);
3404                     return Status;
3405                 }
3406 
3407                 Status = do_write_file(fcb, 0, length, data, Irp, false, 0, rollback);
3408                 if (!NT_SUCCESS(Status)) {
3409                     ERR("do_write_file returned %08lx\n", Status);
3410                     ExFreePool(data);
3411                     return Status;
3412                 }
3413 
3414                 oldalloc = ext->offset + length;
3415 
3416                 ExFreePool(data);
3417             }
3418 
3419             if (cur_inline) {
3420                 uint16_t edsize;
3421 
3422                 if (end > oldalloc) {
3423                     edsize = (uint16_t)(offsetof(EXTENT_DATA, data[0]) + end - ext->offset);
3424                     ed = ExAllocatePoolWithTag(PagedPool, edsize, ALLOC_TAG);
3425 
3426                     if (!ed) {
3427                         ERR("out of memory\n");
3428                         return STATUS_INSUFFICIENT_RESOURCES;
3429                     }
3430 
3431                     ed->generation = fcb->Vcb->superblock.generation;
3432                     ed->decoded_size = end - ext->offset;
3433                     ed->compression = BTRFS_COMPRESSION_NONE;
3434                     ed->encryption = BTRFS_ENCRYPTION_NONE;
3435                     ed->encoding = BTRFS_ENCODING_NONE;
3436                     ed->type = EXTENT_TYPE_INLINE;
3437 
3438                     Status = read_file(fcb, ed->data, ext->offset, oldalloc, NULL, Irp);
3439                     if (!NT_SUCCESS(Status)) {
3440                         ERR("read_file returned %08lx\n", Status);
3441                         ExFreePool(ed);
3442                         return Status;
3443                     }
3444 
3445                     RtlZeroMemory(ed->data + oldalloc - ext->offset, (ULONG)(end - oldalloc));
3446 
3447                     remove_fcb_extent(fcb, ext, rollback);
3448 
3449                     Status = add_extent_to_fcb(fcb, ext->offset, ed, edsize, ext->unique, NULL, rollback);
3450                     if (!NT_SUCCESS(Status)) {
3451                         ERR("add_extent_to_fcb returned %08lx\n", Status);
3452                         ExFreePool(ed);
3453                         return Status;
3454                     }
3455 
3456                     ExFreePool(ed);
3457 
3458                     fcb->extents_changed = true;
3459                     mark_fcb_dirty(fcb);
3460                 }
3461 
3462                 TRACE("extending inline file (oldalloc = %I64x, end = %I64x)\n", oldalloc, end);
3463 
3464                 fcb->inode_item.st_size = end;
3465                 TRACE("setting st_size to %I64x\n", end);
3466 
3467                 fcb->inode_item.st_blocks = end;
3468 
3469                 fcb->Header.AllocationSize.QuadPart = fcb->Header.FileSize.QuadPart = fcb->Header.ValidDataLength.QuadPart = end;
3470             } else {
3471                 newalloc = sector_align(end, fcb->Vcb->superblock.sector_size);
3472 
3473                 if (newalloc > oldalloc) {
3474                     if (prealloc) {
3475                         // FIXME - try and extend previous extent first
3476 
3477                         Status = insert_prealloc_extent(fcb, oldalloc, newalloc - oldalloc, rollback);
3478 
3479                         if (!NT_SUCCESS(Status) && Status != STATUS_DISK_FULL) {
3480                             ERR("insert_prealloc_extent returned %08lx\n", Status);
3481                             return Status;
3482                         }
3483                     }
3484 
3485                     fcb->extents_changed = true;
3486                 }
3487 
3488                 fcb->inode_item.st_size = end;
3489                 fcb->inode_item_changed = true;
3490                 mark_fcb_dirty(fcb);
3491 
3492                 TRACE("setting st_size to %I64x\n", end);
3493 
3494                 TRACE("newalloc = %I64x\n", newalloc);
3495 
3496                 fcb->Header.AllocationSize.QuadPart = newalloc;
3497                 fcb->Header.FileSize.QuadPart = fcb->Header.ValidDataLength.QuadPart = end;
3498             }
3499         } else {
3500             if (end > fcb->Vcb->options.max_inline) {
3501                 newalloc = sector_align(end, fcb->Vcb->superblock.sector_size);
3502 
3503                 if (prealloc) {
3504                     Status = insert_prealloc_extent(fcb, 0, newalloc, rollback);
3505 
3506                     if (!NT_SUCCESS(Status) && Status != STATUS_DISK_FULL) {
3507                         ERR("insert_prealloc_extent returned %08lx\n", Status);
3508                         return Status;
3509                     }
3510                 }
3511 
3512                 fcb->extents_changed = true;
3513                 fcb->inode_item_changed = true;
3514                 mark_fcb_dirty(fcb);
3515 
3516                 fcb->inode_item.st_size = end;
3517                 TRACE("setting st_size to %I64x\n", end);
3518 
3519                 TRACE("newalloc = %I64x\n", newalloc);
3520 
3521                 fcb->Header.AllocationSize.QuadPart = newalloc;
3522                 fcb->Header.FileSize.QuadPart = fcb->Header.ValidDataLength.QuadPart = end;
3523             } else {
3524                 EXTENT_DATA* ed;
3525                 uint16_t edsize;
3526 
3527                 edsize = (uint16_t)(offsetof(EXTENT_DATA, data[0]) + end);
3528                 ed = ExAllocatePoolWithTag(PagedPool, edsize, ALLOC_TAG);
3529 
3530                 if (!ed) {
3531                     ERR("out of memory\n");
3532                     return STATUS_INSUFFICIENT_RESOURCES;
3533                 }
3534 
3535                 ed->generation = fcb->Vcb->superblock.generation;
3536                 ed->decoded_size = end;
3537                 ed->compression = BTRFS_COMPRESSION_NONE;
3538                 ed->encryption = BTRFS_ENCRYPTION_NONE;
3539                 ed->encoding = BTRFS_ENCODING_NONE;
3540                 ed->type = EXTENT_TYPE_INLINE;
3541 
3542                 RtlZeroMemory(ed->data, (ULONG)end);
3543 
3544                 Status = add_extent_to_fcb(fcb, 0, ed, edsize, false, NULL, rollback);
3545                 if (!NT_SUCCESS(Status)) {
3546                     ERR("add_extent_to_fcb returned %08lx\n", Status);
3547                     ExFreePool(ed);
3548                     return Status;
3549                 }
3550 
3551                 ExFreePool(ed);
3552 
3553                 fcb->extents_changed = true;
3554                 fcb->inode_item_changed = true;
3555                 mark_fcb_dirty(fcb);
3556 
3557                 fcb->inode_item.st_size = end;
3558                 TRACE("setting st_size to %I64x\n", end);
3559 
3560                 fcb->inode_item.st_blocks = end;
3561 
3562                 fcb->Header.AllocationSize.QuadPart = fcb->Header.FileSize.QuadPart = fcb->Header.ValidDataLength.QuadPart = end;
3563             }
3564         }
3565     }
3566 
3567     return STATUS_SUCCESS;
3568 }
3569 
3570 __attribute__((nonnull(1,2,5,6,11)))
do_write_file_prealloc(fcb * fcb,extent * ext,uint64_t start_data,uint64_t end_data,void * data,uint64_t * written,PIRP Irp,bool file_write,uint64_t irp_offset,ULONG priority,LIST_ENTRY * rollback)3571 static NTSTATUS do_write_file_prealloc(fcb* fcb, extent* ext, uint64_t start_data, uint64_t end_data, void* data, uint64_t* written,
3572                                        PIRP Irp, bool file_write, uint64_t irp_offset, ULONG priority, LIST_ENTRY* rollback) {
3573     EXTENT_DATA* ed = &ext->extent_data;
3574     EXTENT_DATA2* ed2 = (EXTENT_DATA2*)ed->data;
3575     NTSTATUS Status;
3576     chunk* c = NULL;
3577 
3578     if (start_data <= ext->offset && end_data >= ext->offset + ed2->num_bytes) { // replace all
3579         extent* newext;
3580 
3581         newext = ExAllocatePoolWithTag(PagedPool, offsetof(extent, extent_data) + ext->datalen, ALLOC_TAG);
3582         if (!newext) {
3583             ERR("out of memory\n");
3584             return STATUS_INSUFFICIENT_RESOURCES;
3585         }
3586 
3587         RtlCopyMemory(&newext->extent_data, &ext->extent_data, ext->datalen);
3588 
3589         newext->extent_data.type = EXTENT_TYPE_REGULAR;
3590 
3591         Status = write_data_complete(fcb->Vcb, ed2->address + ed2->offset, (uint8_t*)data + ext->offset - start_data, (uint32_t)ed2->num_bytes, Irp,
3592                                      NULL, file_write, irp_offset + ext->offset - start_data, priority);
3593         if (!NT_SUCCESS(Status)) {
3594             ERR("write_data_complete returned %08lx\n", Status);
3595             return Status;
3596         }
3597 
3598         if (!(fcb->inode_item.flags & BTRFS_INODE_NODATASUM)) {
3599             ULONG sl = (ULONG)(ed2->num_bytes >> fcb->Vcb->sector_shift);
3600             void* csum = ExAllocatePoolWithTag(PagedPool, sl * fcb->Vcb->csum_size, ALLOC_TAG);
3601 
3602             if (!csum) {
3603                 ERR("out of memory\n");
3604                 ExFreePool(newext);
3605                 return STATUS_INSUFFICIENT_RESOURCES;
3606             }
3607 
3608             do_calc_job(fcb->Vcb, (uint8_t*)data + ext->offset - start_data, sl, csum);
3609 
3610             newext->csum = csum;
3611         } else
3612             newext->csum = NULL;
3613 
3614         *written = ed2->num_bytes;
3615 
3616         newext->offset = ext->offset;
3617         newext->datalen = ext->datalen;
3618         newext->unique = ext->unique;
3619         newext->ignore = false;
3620         newext->inserted = true;
3621         InsertHeadList(&ext->list_entry, &newext->list_entry);
3622 
3623         add_insert_extent_rollback(rollback, fcb, newext);
3624 
3625         remove_fcb_extent(fcb, ext, rollback);
3626 
3627         c = get_chunk_from_address(fcb->Vcb, ed2->address);
3628     } else if (start_data <= ext->offset && end_data < ext->offset + ed2->num_bytes) { // replace beginning
3629         EXTENT_DATA2* ned2;
3630         extent *newext1, *newext2;
3631 
3632         newext1 = ExAllocatePoolWithTag(PagedPool, offsetof(extent, extent_data) + ext->datalen, ALLOC_TAG);
3633         if (!newext1) {
3634             ERR("out of memory\n");
3635             return STATUS_INSUFFICIENT_RESOURCES;
3636         }
3637 
3638         newext2 = ExAllocatePoolWithTag(PagedPool, offsetof(extent, extent_data) + ext->datalen, ALLOC_TAG);
3639         if (!newext2) {
3640             ERR("out of memory\n");
3641             ExFreePool(newext1);
3642             return STATUS_INSUFFICIENT_RESOURCES;
3643         }
3644 
3645         RtlCopyMemory(&newext1->extent_data, &ext->extent_data, ext->datalen);
3646         newext1->extent_data.type = EXTENT_TYPE_REGULAR;
3647         ned2 = (EXTENT_DATA2*)newext1->extent_data.data;
3648         ned2->num_bytes = end_data - ext->offset;
3649 
3650         RtlCopyMemory(&newext2->extent_data, &ext->extent_data, ext->datalen);
3651         ned2 = (EXTENT_DATA2*)newext2->extent_data.data;
3652         ned2->offset += end_data - ext->offset;
3653         ned2->num_bytes -= end_data - ext->offset;
3654 
3655         Status = write_data_complete(fcb->Vcb, ed2->address + ed2->offset, (uint8_t*)data + ext->offset - start_data, (uint32_t)(end_data - ext->offset),
3656                                      Irp, NULL, file_write, irp_offset + ext->offset - start_data, priority);
3657         if (!NT_SUCCESS(Status)) {
3658             ERR("write_data_complete returned %08lx\n", Status);
3659             ExFreePool(newext1);
3660             ExFreePool(newext2);
3661             return Status;
3662         }
3663 
3664         if (!(fcb->inode_item.flags & BTRFS_INODE_NODATASUM)) {
3665             ULONG sl = (ULONG)((end_data - ext->offset) >> fcb->Vcb->sector_shift);
3666             void* csum = ExAllocatePoolWithTag(PagedPool, sl * fcb->Vcb->csum_size, ALLOC_TAG);
3667 
3668             if (!csum) {
3669                 ERR("out of memory\n");
3670                 ExFreePool(newext1);
3671                 ExFreePool(newext2);
3672                 return STATUS_INSUFFICIENT_RESOURCES;
3673             }
3674 
3675             do_calc_job(fcb->Vcb, (uint8_t*)data + ext->offset - start_data, sl, csum);
3676 
3677             newext1->csum = csum;
3678         } else
3679             newext1->csum = NULL;
3680 
3681         *written = end_data - ext->offset;
3682 
3683         newext1->offset = ext->offset;
3684         newext1->datalen = ext->datalen;
3685         newext1->unique = ext->unique;
3686         newext1->ignore = false;
3687         newext1->inserted = true;
3688         InsertHeadList(&ext->list_entry, &newext1->list_entry);
3689 
3690         add_insert_extent_rollback(rollback, fcb, newext1);
3691 
3692         newext2->offset = end_data;
3693         newext2->datalen = ext->datalen;
3694         newext2->unique = ext->unique;
3695         newext2->ignore = false;
3696         newext2->inserted = true;
3697         newext2->csum = NULL;
3698         add_extent(fcb, &newext1->list_entry, newext2);
3699 
3700         add_insert_extent_rollback(rollback, fcb, newext2);
3701 
3702         c = get_chunk_from_address(fcb->Vcb, ed2->address);
3703 
3704         if (!c)
3705             ERR("get_chunk_from_address(%I64x) failed\n", ed2->address);
3706         else {
3707             Status = update_changed_extent_ref(fcb->Vcb, c, ed2->address, ed2->size, fcb->subvol->id, fcb->inode, ext->offset - ed2->offset, 1,
3708                                                 fcb->inode_item.flags & BTRFS_INODE_NODATASUM, false, Irp);
3709 
3710             if (!NT_SUCCESS(Status)) {
3711                 ERR("update_changed_extent_ref returned %08lx\n", Status);
3712                 return Status;
3713             }
3714         }
3715 
3716         remove_fcb_extent(fcb, ext, rollback);
3717     } else if (start_data > ext->offset && end_data >= ext->offset + ed2->num_bytes) { // replace end
3718         EXTENT_DATA2* ned2;
3719         extent *newext1, *newext2;
3720 
3721         newext1 = ExAllocatePoolWithTag(PagedPool, offsetof(extent, extent_data) + ext->datalen, ALLOC_TAG);
3722         if (!newext1) {
3723             ERR("out of memory\n");
3724             return STATUS_INSUFFICIENT_RESOURCES;
3725         }
3726 
3727         newext2 = ExAllocatePoolWithTag(PagedPool, offsetof(extent, extent_data) + ext->datalen, ALLOC_TAG);
3728         if (!newext2) {
3729             ERR("out of memory\n");
3730             ExFreePool(newext1);
3731             return STATUS_INSUFFICIENT_RESOURCES;
3732         }
3733 
3734         RtlCopyMemory(&newext1->extent_data, &ext->extent_data, ext->datalen);
3735 
3736         ned2 = (EXTENT_DATA2*)newext1->extent_data.data;
3737         ned2->num_bytes = start_data - ext->offset;
3738 
3739         RtlCopyMemory(&newext2->extent_data, &ext->extent_data, ext->datalen);
3740 
3741         newext2->extent_data.type = EXTENT_TYPE_REGULAR;
3742         ned2 = (EXTENT_DATA2*)newext2->extent_data.data;
3743         ned2->offset += start_data - ext->offset;
3744         ned2->num_bytes = ext->offset + ed2->num_bytes - start_data;
3745 
3746         Status = write_data_complete(fcb->Vcb, ed2->address + ned2->offset, data, (uint32_t)ned2->num_bytes, Irp, NULL, file_write, irp_offset, priority);
3747         if (!NT_SUCCESS(Status)) {
3748             ERR("write_data_complete returned %08lx\n", Status);
3749             ExFreePool(newext1);
3750             ExFreePool(newext2);
3751             return Status;
3752         }
3753 
3754         if (!(fcb->inode_item.flags & BTRFS_INODE_NODATASUM)) {
3755             ULONG sl = (ULONG)(ned2->num_bytes >> fcb->Vcb->sector_shift);
3756             void* csum = ExAllocatePoolWithTag(PagedPool, sl * fcb->Vcb->csum_size, ALLOC_TAG);
3757 
3758             if (!csum) {
3759                 ERR("out of memory\n");
3760                 ExFreePool(newext1);
3761                 ExFreePool(newext2);
3762                 return STATUS_INSUFFICIENT_RESOURCES;
3763             }
3764 
3765             do_calc_job(fcb->Vcb, data, sl, csum);
3766 
3767             newext2->csum = csum;
3768         } else
3769             newext2->csum = NULL;
3770 
3771         *written = ned2->num_bytes;
3772 
3773         newext1->offset = ext->offset;
3774         newext1->datalen = ext->datalen;
3775         newext1->unique = ext->unique;
3776         newext1->ignore = false;
3777         newext1->inserted = true;
3778         newext1->csum = NULL;
3779         InsertHeadList(&ext->list_entry, &newext1->list_entry);
3780 
3781         add_insert_extent_rollback(rollback, fcb, newext1);
3782 
3783         newext2->offset = start_data;
3784         newext2->datalen = ext->datalen;
3785         newext2->unique = ext->unique;
3786         newext2->ignore = false;
3787         newext2->inserted = true;
3788         add_extent(fcb, &newext1->list_entry, newext2);
3789 
3790         add_insert_extent_rollback(rollback, fcb, newext2);
3791 
3792         c = get_chunk_from_address(fcb->Vcb, ed2->address);
3793 
3794         if (!c)
3795             ERR("get_chunk_from_address(%I64x) failed\n", ed2->address);
3796         else {
3797             Status = update_changed_extent_ref(fcb->Vcb, c, ed2->address, ed2->size, fcb->subvol->id, fcb->inode, ext->offset - ed2->offset, 1,
3798                                                fcb->inode_item.flags & BTRFS_INODE_NODATASUM, false, Irp);
3799 
3800             if (!NT_SUCCESS(Status)) {
3801                 ERR("update_changed_extent_ref returned %08lx\n", Status);
3802                 return Status;
3803             }
3804         }
3805 
3806         remove_fcb_extent(fcb, ext, rollback);
3807     } else if (start_data > ext->offset && end_data < ext->offset + ed2->num_bytes) { // replace middle
3808         EXTENT_DATA2* ned2;
3809         extent *newext1, *newext2, *newext3;
3810 
3811         newext1 = ExAllocatePoolWithTag(PagedPool, offsetof(extent, extent_data) + ext->datalen, ALLOC_TAG);
3812         if (!newext1) {
3813             ERR("out of memory\n");
3814             return STATUS_INSUFFICIENT_RESOURCES;
3815         }
3816 
3817         newext2 = ExAllocatePoolWithTag(PagedPool, offsetof(extent, extent_data) + ext->datalen, ALLOC_TAG);
3818         if (!newext2) {
3819             ERR("out of memory\n");
3820             ExFreePool(newext1);
3821             return STATUS_INSUFFICIENT_RESOURCES;
3822         }
3823 
3824         newext3 = ExAllocatePoolWithTag(PagedPool, offsetof(extent, extent_data) + ext->datalen, ALLOC_TAG);
3825         if (!newext3) {
3826             ERR("out of memory\n");
3827             ExFreePool(newext1);
3828             ExFreePool(newext2);
3829             return STATUS_INSUFFICIENT_RESOURCES;
3830         }
3831 
3832         RtlCopyMemory(&newext1->extent_data, &ext->extent_data, ext->datalen);
3833         RtlCopyMemory(&newext2->extent_data, &ext->extent_data, ext->datalen);
3834         RtlCopyMemory(&newext3->extent_data, &ext->extent_data, ext->datalen);
3835 
3836         ned2 = (EXTENT_DATA2*)newext1->extent_data.data;
3837         ned2->num_bytes = start_data - ext->offset;
3838 
3839         newext2->extent_data.type = EXTENT_TYPE_REGULAR;
3840         ned2 = (EXTENT_DATA2*)newext2->extent_data.data;
3841         ned2->offset += start_data - ext->offset;
3842         ned2->num_bytes = end_data - start_data;
3843 
3844         ned2 = (EXTENT_DATA2*)newext3->extent_data.data;
3845         ned2->offset += end_data - ext->offset;
3846         ned2->num_bytes -= end_data - ext->offset;
3847 
3848         ned2 = (EXTENT_DATA2*)newext2->extent_data.data;
3849         Status = write_data_complete(fcb->Vcb, ed2->address + ned2->offset, data, (uint32_t)(end_data - start_data), Irp, NULL, file_write, irp_offset, priority);
3850         if (!NT_SUCCESS(Status)) {
3851             ERR("write_data_complete returned %08lx\n", Status);
3852             ExFreePool(newext1);
3853             ExFreePool(newext2);
3854             ExFreePool(newext3);
3855             return Status;
3856         }
3857 
3858         if (!(fcb->inode_item.flags & BTRFS_INODE_NODATASUM)) {
3859             ULONG sl = (ULONG)((end_data - start_data) >> fcb->Vcb->sector_shift);
3860             void* csum = ExAllocatePoolWithTag(PagedPool, sl * fcb->Vcb->csum_size, ALLOC_TAG);
3861 
3862             if (!csum) {
3863                 ERR("out of memory\n");
3864                 ExFreePool(newext1);
3865                 ExFreePool(newext2);
3866                 ExFreePool(newext3);
3867                 return STATUS_INSUFFICIENT_RESOURCES;
3868             }
3869 
3870             do_calc_job(fcb->Vcb, data, sl, csum);
3871 
3872             newext2->csum = csum;
3873         } else
3874             newext2->csum = NULL;
3875 
3876         *written = end_data - start_data;
3877 
3878         newext1->offset = ext->offset;
3879         newext1->datalen = ext->datalen;
3880         newext1->unique = ext->unique;
3881         newext1->ignore = false;
3882         newext1->inserted = true;
3883         newext1->csum = NULL;
3884         InsertHeadList(&ext->list_entry, &newext1->list_entry);
3885 
3886         add_insert_extent_rollback(rollback, fcb, newext1);
3887 
3888         newext2->offset = start_data;
3889         newext2->datalen = ext->datalen;
3890         newext2->unique = ext->unique;
3891         newext2->ignore = false;
3892         newext2->inserted = true;
3893         add_extent(fcb, &newext1->list_entry, newext2);
3894 
3895         add_insert_extent_rollback(rollback, fcb, newext2);
3896 
3897         newext3->offset = end_data;
3898         newext3->datalen = ext->datalen;
3899         newext3->unique = ext->unique;
3900         newext3->ignore = false;
3901         newext3->inserted = true;
3902         newext3->csum = NULL;
3903         add_extent(fcb, &newext2->list_entry, newext3);
3904 
3905         add_insert_extent_rollback(rollback, fcb, newext3);
3906 
3907         c = get_chunk_from_address(fcb->Vcb, ed2->address);
3908 
3909         if (!c)
3910             ERR("get_chunk_from_address(%I64x) failed\n", ed2->address);
3911         else {
3912             Status = update_changed_extent_ref(fcb->Vcb, c, ed2->address, ed2->size, fcb->subvol->id, fcb->inode, ext->offset - ed2->offset, 2,
3913                                                fcb->inode_item.flags & BTRFS_INODE_NODATASUM, false, Irp);
3914 
3915             if (!NT_SUCCESS(Status)) {
3916                 ERR("update_changed_extent_ref returned %08lx\n", Status);
3917                 return Status;
3918             }
3919         }
3920 
3921         remove_fcb_extent(fcb, ext, rollback);
3922     }
3923 
3924     if (c)
3925         c->changed = true;
3926 
3927     return STATUS_SUCCESS;
3928 }
3929 
3930 __attribute__((nonnull(1, 4)))
do_write_file(fcb * fcb,uint64_t start,uint64_t end_data,void * data,PIRP Irp,bool file_write,uint32_t irp_offset,LIST_ENTRY * rollback)3931 NTSTATUS do_write_file(fcb* fcb, uint64_t start, uint64_t end_data, void* data, PIRP Irp, bool file_write, uint32_t irp_offset, LIST_ENTRY* rollback) {
3932     NTSTATUS Status;
3933     LIST_ENTRY *le, *le2;
3934     uint64_t written = 0, length = end_data - start;
3935     uint64_t last_cow_start;
3936     ULONG priority = fcb->Header.Flags2 & FSRTL_FLAG2_IS_PAGING_FILE ? HighPagePriority : NormalPagePriority;
3937 #ifdef DEBUG_PARANOID
3938     uint64_t last_off;
3939 #endif
3940     bool extents_changed = false;
3941 
3942     last_cow_start = 0;
3943 
3944     le = fcb->extents.Flink;
3945     while (le != &fcb->extents) {
3946         extent* ext = CONTAINING_RECORD(le, extent, list_entry);
3947 
3948         le2 = le->Flink;
3949 
3950         if (!ext->ignore) {
3951             EXTENT_DATA* ed = &ext->extent_data;
3952             uint64_t len;
3953 
3954             if (ed->type == EXTENT_TYPE_INLINE)
3955                 len = ed->decoded_size;
3956             else
3957                 len = ((EXTENT_DATA2*)ed->data)->num_bytes;
3958 
3959             if (ext->offset + len <= start)
3960                 goto nextitem;
3961 
3962             if (ext->offset > start + written + length)
3963                 break;
3964 
3965             if ((fcb->inode_item.flags & BTRFS_INODE_NODATACOW || ed->type == EXTENT_TYPE_PREALLOC) && ext->unique && ed->compression == BTRFS_COMPRESSION_NONE) {
3966                 if (max(last_cow_start, start + written) < ext->offset) {
3967                     uint64_t start_write = max(last_cow_start, start + written);
3968 
3969                     extents_changed = true;
3970 
3971                     Status = excise_extents(fcb->Vcb, fcb, start_write, ext->offset, Irp, rollback);
3972                     if (!NT_SUCCESS(Status)) {
3973                         ERR("excise_extents returned %08lx\n", Status);
3974                         return Status;
3975                     }
3976 
3977                     Status = insert_extent(fcb->Vcb, fcb, start_write, ext->offset - start_write, (uint8_t*)data + written, Irp, file_write, irp_offset + written, rollback);
3978                     if (!NT_SUCCESS(Status)) {
3979                         ERR("insert_extent returned %08lx\n", Status);
3980                         return Status;
3981                     }
3982 
3983                     written += ext->offset - start_write;
3984                     length -= ext->offset - start_write;
3985 
3986                     if (length == 0)
3987                         break;
3988                 }
3989 
3990                 if (ed->type == EXTENT_TYPE_REGULAR) {
3991                     EXTENT_DATA2* ed2 = (EXTENT_DATA2*)ed->data;
3992                     uint64_t writeaddr = ed2->address + ed2->offset + start + written - ext->offset;
3993                     uint64_t write_len = min(len, length);
3994                     chunk* c;
3995 
3996                     TRACE("doing non-COW write to %I64x\n", writeaddr);
3997 
3998                     Status = write_data_complete(fcb->Vcb, writeaddr, (uint8_t*)data + written, (uint32_t)write_len, Irp, NULL, file_write, irp_offset + written, priority);
3999                     if (!NT_SUCCESS(Status)) {
4000                         ERR("write_data_complete returned %08lx\n", Status);
4001                         return Status;
4002                     }
4003 
4004                     c = get_chunk_from_address(fcb->Vcb, writeaddr);
4005                     if (c)
4006                         c->changed = true;
4007 
4008                     // This shouldn't ever get called - nocow files should always also be nosum.
4009                     if (!(fcb->inode_item.flags & BTRFS_INODE_NODATASUM)) {
4010                         do_calc_job(fcb->Vcb, (uint8_t*)data + written, (uint32_t)(write_len >> fcb->Vcb->sector_shift),
4011                                     (uint8_t*)ext->csum + (((start + written - ext->offset) * fcb->Vcb->csum_size) >> fcb->Vcb->sector_shift));
4012 
4013                         ext->inserted = true;
4014                         extents_changed = true;
4015                     }
4016 
4017                     written += write_len;
4018                     length -= write_len;
4019 
4020                     if (length == 0)
4021                         break;
4022                 } else if (ed->type == EXTENT_TYPE_PREALLOC) {
4023                     uint64_t write_len;
4024 
4025                     Status = do_write_file_prealloc(fcb, ext, start + written, end_data, (uint8_t*)data + written, &write_len,
4026                                                     Irp, file_write, irp_offset + written, priority, rollback);
4027                     if (!NT_SUCCESS(Status)) {
4028                         ERR("do_write_file_prealloc returned %08lx\n", Status);
4029                         return Status;
4030                     }
4031 
4032                     extents_changed = true;
4033 
4034                     written += write_len;
4035                     length -= write_len;
4036 
4037                     if (length == 0)
4038                         break;
4039                 }
4040 
4041                 last_cow_start = ext->offset + len;
4042             }
4043         }
4044 
4045 nextitem:
4046         le = le2;
4047     }
4048 
4049     if (length > 0) {
4050         uint64_t start_write = max(last_cow_start, start + written);
4051 
4052         extents_changed = true;
4053 
4054         Status = excise_extents(fcb->Vcb, fcb, start_write, end_data, Irp, rollback);
4055         if (!NT_SUCCESS(Status)) {
4056             ERR("excise_extents returned %08lx\n", Status);
4057             return Status;
4058         }
4059 
4060         Status = insert_extent(fcb->Vcb, fcb, start_write, end_data - start_write, (uint8_t*)data + written, Irp, file_write, irp_offset + written, rollback);
4061         if (!NT_SUCCESS(Status)) {
4062             ERR("insert_extent returned %08lx\n", Status);
4063             return Status;
4064         }
4065     }
4066 
4067 #ifdef DEBUG_PARANOID
4068     last_off = 0xffffffffffffffff;
4069 
4070     le = fcb->extents.Flink;
4071     while (le != &fcb->extents) {
4072         extent* ext = CONTAINING_RECORD(le, extent, list_entry);
4073 
4074         if (!ext->ignore) {
4075             if (ext->offset == last_off) {
4076                 ERR("offset %I64x duplicated\n", ext->offset);
4077                 int3;
4078             } else if (ext->offset < last_off && last_off != 0xffffffffffffffff) {
4079                 ERR("offsets out of order\n");
4080                 int3;
4081             }
4082 
4083             last_off = ext->offset;
4084         }
4085 
4086         le = le->Flink;
4087     }
4088 #endif
4089 
4090     if (extents_changed) {
4091         fcb->extents_changed = true;
4092         mark_fcb_dirty(fcb);
4093     }
4094 
4095     return STATUS_SUCCESS;
4096 }
4097 
4098 __attribute__((nonnull(1,2,4,5,11)))
write_file2(device_extension * Vcb,PIRP Irp,LARGE_INTEGER offset,void * buf,ULONG * length,bool paging_io,bool no_cache,bool wait,bool deferred_write,bool write_irp,LIST_ENTRY * rollback)4099 NTSTATUS write_file2(device_extension* Vcb, PIRP Irp, LARGE_INTEGER offset, void* buf, ULONG* length, bool paging_io, bool no_cache,
4100                      bool wait, bool deferred_write, bool write_irp, LIST_ENTRY* rollback) {
4101     PIO_STACK_LOCATION IrpSp = IoGetCurrentIrpStackLocation(Irp);
4102     PFILE_OBJECT FileObject = IrpSp->FileObject;
4103     EXTENT_DATA* ed2;
4104     uint64_t off64, newlength, start_data, end_data;
4105     uint32_t bufhead;
4106     bool make_inline;
4107     INODE_ITEM* origii;
4108     bool changed_length = false;
4109     NTSTATUS Status;
4110     LARGE_INTEGER time;
4111     BTRFS_TIME now;
4112     fcb* fcb;
4113     ccb* ccb;
4114     file_ref* fileref;
4115     bool paging_lock = false, acquired_fcb_lock = false, acquired_tree_lock = false, pagefile;
4116     ULONG filter = 0;
4117 
4118     TRACE("(%p, %p, %I64x, %p, %lx, %u, %u)\n", Vcb, FileObject, offset.QuadPart, buf, *length, paging_io, no_cache);
4119 
4120     if (*length == 0) {
4121         TRACE("returning success for zero-length write\n");
4122         return STATUS_SUCCESS;
4123     }
4124 
4125     if (!FileObject) {
4126         ERR("error - FileObject was NULL\n");
4127         return STATUS_ACCESS_DENIED;
4128     }
4129 
4130     fcb = FileObject->FsContext;
4131     ccb = FileObject->FsContext2;
4132     fileref = ccb ? ccb->fileref : NULL;
4133 
4134     if (!fcb->ads && fcb->type != BTRFS_TYPE_FILE && fcb->type != BTRFS_TYPE_SYMLINK) {
4135         WARN("tried to write to something other than a file or symlink (inode %I64x, type %u, %p, %p)\n", fcb->inode, fcb->type, &fcb->type, fcb);
4136         return STATUS_INVALID_DEVICE_REQUEST;
4137     }
4138 
4139     if (offset.LowPart == FILE_WRITE_TO_END_OF_FILE && offset.HighPart == -1)
4140         offset = fcb->Header.FileSize;
4141 
4142     off64 = offset.QuadPart;
4143 
4144     TRACE("fcb->Header.Flags = %x\n", fcb->Header.Flags);
4145 
4146     if (!no_cache && !CcCanIWrite(FileObject, *length, wait, deferred_write))
4147         return STATUS_PENDING;
4148 
4149     if (!wait && no_cache)
4150         return STATUS_PENDING;
4151 
4152     if (no_cache && !paging_io && FileObject->SectionObjectPointer->DataSectionObject) {
4153         IO_STATUS_BLOCK iosb;
4154 
4155         ExAcquireResourceExclusiveLite(fcb->Header.PagingIoResource, true);
4156 
4157         CcFlushCache(FileObject->SectionObjectPointer, &offset, *length, &iosb);
4158 
4159         if (!NT_SUCCESS(iosb.Status)) {
4160             ExReleaseResourceLite(fcb->Header.PagingIoResource);
4161             ERR("CcFlushCache returned %08lx\n", iosb.Status);
4162             return iosb.Status;
4163         }
4164 
4165         paging_lock = true;
4166 
4167         CcPurgeCacheSection(FileObject->SectionObjectPointer, &offset, *length, false);
4168     }
4169 
4170     if (paging_io) {
4171         if (!ExAcquireResourceSharedLite(fcb->Header.PagingIoResource, wait)) {
4172             Status = STATUS_PENDING;
4173             goto end;
4174         } else
4175             paging_lock = true;
4176     }
4177 
4178     pagefile = fcb->Header.Flags2 & FSRTL_FLAG2_IS_PAGING_FILE && paging_io;
4179 
4180     if (!pagefile && !ExIsResourceAcquiredExclusiveLite(&Vcb->tree_lock)) {
4181         if (!ExAcquireResourceSharedLite(&Vcb->tree_lock, wait)) {
4182             Status = STATUS_PENDING;
4183             goto end;
4184         } else
4185             acquired_tree_lock = true;
4186     }
4187 
4188     if (pagefile) {
4189         if (!ExAcquireResourceSharedLite(fcb->Header.Resource, wait)) {
4190             Status = STATUS_PENDING;
4191             goto end;
4192         } else
4193             acquired_fcb_lock = true;
4194     } else if (!ExIsResourceAcquiredExclusiveLite(fcb->Header.Resource)) {
4195         if (!ExAcquireResourceExclusiveLite(fcb->Header.Resource, wait)) {
4196             Status = STATUS_PENDING;
4197             goto end;
4198         } else
4199             acquired_fcb_lock = true;
4200     }
4201 
4202     newlength = fcb->ads ? fcb->adsdata.Length : fcb->inode_item.st_size;
4203 
4204     if (fcb->deleted)
4205         newlength = 0;
4206 
4207     TRACE("newlength = %I64x\n", newlength);
4208 
4209     if (off64 + *length > newlength) {
4210         if (paging_io) {
4211             if (off64 >= newlength) {
4212                 TRACE("paging IO tried to write beyond end of file (file size = %I64x, offset = %I64x, length = %lx)\n", newlength, off64, *length);
4213                 TRACE("FileObject: AllocationSize = %I64x, FileSize = %I64x, ValidDataLength = %I64x\n",
4214                     fcb->Header.AllocationSize.QuadPart, fcb->Header.FileSize.QuadPart, fcb->Header.ValidDataLength.QuadPart);
4215                 Irp->IoStatus.Information = 0;
4216                 Status = STATUS_SUCCESS;
4217                 goto end;
4218             }
4219 
4220             *length = (ULONG)(newlength - off64);
4221         } else {
4222             newlength = off64 + *length;
4223             changed_length = true;
4224 
4225             TRACE("extending length to %I64x\n", newlength);
4226         }
4227     }
4228 
4229     if (fcb->ads)
4230         make_inline = false;
4231     else
4232         make_inline = newlength <= fcb->Vcb->options.max_inline;
4233 
4234     if (changed_length) {
4235         if (newlength > (uint64_t)fcb->Header.AllocationSize.QuadPart) {
4236             if (!acquired_tree_lock) {
4237                 // We need to acquire the tree lock if we don't have it already -
4238                 // we can't give an inline file proper extents at the same time as we're
4239                 // doing a flush.
4240                 if (!ExAcquireResourceSharedLite(&Vcb->tree_lock, wait)) {
4241                     Status = STATUS_PENDING;
4242                     goto end;
4243                 } else
4244                     acquired_tree_lock = true;
4245             }
4246 
4247             Status = extend_file(fcb, fileref, newlength, false, Irp, rollback);
4248             if (!NT_SUCCESS(Status)) {
4249                 ERR("extend_file returned %08lx\n", Status);
4250                 goto end;
4251             }
4252         } else if (!fcb->ads)
4253             fcb->inode_item.st_size = newlength;
4254 
4255         fcb->Header.FileSize.QuadPart = newlength;
4256         fcb->Header.ValidDataLength.QuadPart = newlength;
4257 
4258         TRACE("AllocationSize = %I64x\n", fcb->Header.AllocationSize.QuadPart);
4259         TRACE("FileSize = %I64x\n", fcb->Header.FileSize.QuadPart);
4260         TRACE("ValidDataLength = %I64x\n", fcb->Header.ValidDataLength.QuadPart);
4261     }
4262 
4263     if (!no_cache) {
4264         Status = STATUS_SUCCESS;
4265 
4266         _SEH2_TRY {
4267             if (!FileObject->PrivateCacheMap || changed_length) {
4268                 CC_FILE_SIZES ccfs;
4269 
4270                 ccfs.AllocationSize = fcb->Header.AllocationSize;
4271                 ccfs.FileSize = fcb->Header.FileSize;
4272                 ccfs.ValidDataLength = fcb->Header.ValidDataLength;
4273 
4274                 if (!FileObject->PrivateCacheMap)
4275                     init_file_cache(FileObject, &ccfs);
4276 
4277                 CcSetFileSizes(FileObject, &ccfs);
4278             }
4279 
4280             if (IrpSp->MinorFunction & IRP_MN_MDL) {
4281                 CcPrepareMdlWrite(FileObject, &offset, *length, &Irp->MdlAddress, &Irp->IoStatus);
4282 
4283                 Status = Irp->IoStatus.Status;
4284                 goto end;
4285             } else {
4286                 /* We have to wait in CcCopyWrite - if we return STATUS_PENDING and add this to the work queue,
4287                  * it can result in CcFlushCache being called before the job has run. See ifstest ReadWriteTest. */
4288 
4289                 if (fCcCopyWriteEx) {
4290                     TRACE("CcCopyWriteEx(%p, %I64x, %lx, %u, %p, %p)\n", FileObject, off64, *length, true, buf, Irp->Tail.Overlay.Thread);
4291                     if (!fCcCopyWriteEx(FileObject, &offset, *length, true, buf, Irp->Tail.Overlay.Thread)) {
4292                         Status = STATUS_PENDING;
4293                         goto end;
4294                     }
4295                     TRACE("CcCopyWriteEx finished\n");
4296                 } else {
4297                     TRACE("CcCopyWrite(%p, %I64x, %lx, %u, %p)\n", FileObject, off64, *length, true, buf);
4298                     if (!CcCopyWrite(FileObject, &offset, *length, true, buf)) {
4299                         Status = STATUS_PENDING;
4300                         goto end;
4301                     }
4302                     TRACE("CcCopyWrite finished\n");
4303                 }
4304 
4305                 Irp->IoStatus.Information = *length;
4306             }
4307         } _SEH2_EXCEPT (EXCEPTION_EXECUTE_HANDLER) {
4308             Status = _SEH2_GetExceptionCode();
4309         } _SEH2_END;
4310 
4311         if (changed_length) {
4312             queue_notification_fcb(fcb->ads ? fileref->parent : fileref, fcb->ads ? FILE_NOTIFY_CHANGE_STREAM_SIZE : FILE_NOTIFY_CHANGE_SIZE,
4313                                    fcb->ads ? FILE_ACTION_MODIFIED_STREAM : FILE_ACTION_MODIFIED, fcb->ads && fileref->dc ? &fileref->dc->name : NULL);
4314         }
4315 
4316         goto end;
4317     }
4318 
4319     if (fcb->ads) {
4320         if (changed_length) {
4321             char* data2;
4322 
4323             if (newlength > fcb->adsmaxlen) {
4324                 ERR("error - xattr too long (%I64u > %lu)\n", newlength, fcb->adsmaxlen);
4325                 Status = STATUS_DISK_FULL;
4326                 goto end;
4327             }
4328 
4329             data2 = ExAllocatePoolWithTag(PagedPool, (ULONG)newlength, ALLOC_TAG);
4330             if (!data2) {
4331                 ERR("out of memory\n");
4332                 Status = STATUS_INSUFFICIENT_RESOURCES;
4333                 goto end;
4334             }
4335 
4336             if (fcb->adsdata.Buffer) {
4337                 RtlCopyMemory(data2, fcb->adsdata.Buffer, fcb->adsdata.Length);
4338                 ExFreePool(fcb->adsdata.Buffer);
4339             }
4340 
4341             if (newlength > fcb->adsdata.Length)
4342                 RtlZeroMemory(&data2[fcb->adsdata.Length], (ULONG)(newlength - fcb->adsdata.Length));
4343 
4344 
4345             fcb->adsdata.Buffer = data2;
4346             fcb->adsdata.Length = fcb->adsdata.MaximumLength = (USHORT)newlength;
4347 
4348             fcb->Header.AllocationSize.QuadPart = newlength;
4349             fcb->Header.FileSize.QuadPart = newlength;
4350             fcb->Header.ValidDataLength.QuadPart = newlength;
4351         }
4352 
4353         if (*length > 0)
4354             RtlCopyMemory(&fcb->adsdata.Buffer[off64], buf, *length);
4355 
4356         fcb->Header.ValidDataLength.QuadPart = newlength;
4357 
4358         mark_fcb_dirty(fcb);
4359 
4360         if (fileref)
4361             mark_fileref_dirty(fileref);
4362     } else {
4363         bool compress = write_fcb_compressed(fcb), no_buf = false;
4364         uint8_t* data;
4365 
4366         if (make_inline) {
4367             start_data = 0;
4368             end_data = sector_align(newlength, fcb->Vcb->superblock.sector_size);
4369             bufhead = sizeof(EXTENT_DATA) - 1;
4370         } else if (compress) {
4371             start_data = off64 & ~(uint64_t)(COMPRESSED_EXTENT_SIZE - 1);
4372             end_data = min(sector_align(off64 + *length, COMPRESSED_EXTENT_SIZE),
4373                            sector_align(newlength, fcb->Vcb->superblock.sector_size));
4374             bufhead = 0;
4375         } else {
4376             start_data = off64 & ~(uint64_t)(fcb->Vcb->superblock.sector_size - 1);
4377             end_data = sector_align(off64 + *length, fcb->Vcb->superblock.sector_size);
4378             bufhead = 0;
4379         }
4380 
4381         if (fcb_is_inline(fcb))
4382             end_data = max(end_data, sector_align(fcb->inode_item.st_size, Vcb->superblock.sector_size));
4383 
4384         fcb->Header.ValidDataLength.QuadPart = newlength;
4385         TRACE("fcb %p FileSize = %I64x\n", fcb, fcb->Header.FileSize.QuadPart);
4386 
4387         if (!make_inline && !compress && off64 == start_data && off64 + *length == end_data) {
4388             data = buf;
4389             no_buf = true;
4390         } else {
4391             data = ExAllocatePoolWithTag(PagedPool, (ULONG)(end_data - start_data + bufhead), ALLOC_TAG);
4392             if (!data) {
4393                 ERR("out of memory\n");
4394                 Status = STATUS_INSUFFICIENT_RESOURCES;
4395                 goto end;
4396             }
4397 
4398             RtlZeroMemory(data + bufhead, (ULONG)(end_data - start_data));
4399 
4400             TRACE("start_data = %I64x\n", start_data);
4401             TRACE("end_data = %I64x\n", end_data);
4402 
4403             if (off64 > start_data || off64 + *length < end_data) {
4404                 if (changed_length) {
4405                     if (fcb->inode_item.st_size > start_data)
4406                         Status = read_file(fcb, data + bufhead, start_data, fcb->inode_item.st_size - start_data, NULL, Irp);
4407                     else
4408                         Status = STATUS_SUCCESS;
4409                 } else
4410                     Status = read_file(fcb, data + bufhead, start_data, end_data - start_data, NULL, Irp);
4411 
4412                 if (!NT_SUCCESS(Status)) {
4413                     ERR("read_file returned %08lx\n", Status);
4414                     ExFreePool(data);
4415                     goto end;
4416                 }
4417             }
4418 
4419             RtlCopyMemory(data + bufhead + off64 - start_data, buf, *length);
4420         }
4421 
4422         if (make_inline) {
4423             Status = excise_extents(fcb->Vcb, fcb, start_data, end_data, Irp, rollback);
4424             if (!NT_SUCCESS(Status)) {
4425                 ERR("error - excise_extents returned %08lx\n", Status);
4426                 ExFreePool(data);
4427                 goto end;
4428             }
4429 
4430             ed2 = (EXTENT_DATA*)data;
4431             ed2->generation = fcb->Vcb->superblock.generation;
4432             ed2->decoded_size = newlength;
4433             ed2->compression = BTRFS_COMPRESSION_NONE;
4434             ed2->encryption = BTRFS_ENCRYPTION_NONE;
4435             ed2->encoding = BTRFS_ENCODING_NONE;
4436             ed2->type = EXTENT_TYPE_INLINE;
4437 
4438             Status = add_extent_to_fcb(fcb, 0, ed2, (uint16_t)(offsetof(EXTENT_DATA, data[0]) + newlength), false, NULL, rollback);
4439             if (!NT_SUCCESS(Status)) {
4440                 ERR("add_extent_to_fcb returned %08lx\n", Status);
4441                 ExFreePool(data);
4442                 goto end;
4443             }
4444 
4445             fcb->inode_item.st_blocks += newlength;
4446         } else if (compress) {
4447             Status = write_compressed(fcb, start_data, end_data, data, Irp, rollback);
4448 
4449             if (!NT_SUCCESS(Status)) {
4450                 ERR("write_compressed returned %08lx\n", Status);
4451                 ExFreePool(data);
4452                 goto end;
4453             }
4454         } else {
4455             if (write_irp && Irp->MdlAddress && no_buf) {
4456                 bool locked = Irp->MdlAddress->MdlFlags & (MDL_PAGES_LOCKED | MDL_PARTIAL);
4457 
4458                 if (!locked) {
4459                     Status = STATUS_SUCCESS;
4460 
4461                     _SEH2_TRY {
4462                         MmProbeAndLockPages(Irp->MdlAddress, KernelMode, IoReadAccess);
4463                     } _SEH2_EXCEPT (EXCEPTION_EXECUTE_HANDLER) {
4464                         Status = _SEH2_GetExceptionCode();
4465                     } _SEH2_END;
4466 
4467                     if (!NT_SUCCESS(Status)) {
4468                         ERR("MmProbeAndLockPages threw exception %08lx\n", Status);
4469                         goto end;
4470                     }
4471                 }
4472 
4473                 _SEH2_TRY {
4474                     Status = do_write_file(fcb, start_data, end_data, data, Irp, true, 0, rollback);
4475                 } _SEH2_EXCEPT (EXCEPTION_EXECUTE_HANDLER) {
4476                     Status = _SEH2_GetExceptionCode();
4477                 } _SEH2_END;
4478 
4479                 if (!locked)
4480                     MmUnlockPages(Irp->MdlAddress);
4481             } else {
4482                 _SEH2_TRY {
4483                     Status = do_write_file(fcb, start_data, end_data, data, Irp, false, 0, rollback);
4484                 } _SEH2_EXCEPT (EXCEPTION_EXECUTE_HANDLER) {
4485                     Status = _SEH2_GetExceptionCode();
4486                 } _SEH2_END;
4487             }
4488 
4489             if (!NT_SUCCESS(Status)) {
4490                 ERR("do_write_file returned %08lx\n", Status);
4491                 if (!no_buf) ExFreePool(data);
4492                 goto end;
4493             }
4494         }
4495 
4496         if (!no_buf)
4497             ExFreePool(data);
4498     }
4499 
4500     KeQuerySystemTime(&time);
4501     win_time_to_unix(time, &now);
4502 
4503     if (!pagefile) {
4504         if (fcb->ads) {
4505             if (fileref && fileref->parent)
4506                 origii = &fileref->parent->fcb->inode_item;
4507             else {
4508                 ERR("no parent fcb found for stream\n");
4509                 Status = STATUS_INTERNAL_ERROR;
4510                 goto end;
4511             }
4512         } else
4513             origii = &fcb->inode_item;
4514 
4515         origii->transid = Vcb->superblock.generation;
4516         origii->sequence++;
4517 
4518         if (!ccb->user_set_change_time)
4519             origii->st_ctime = now;
4520 
4521         if (!fcb->ads) {
4522             if (changed_length) {
4523                 TRACE("setting st_size to %I64x\n", newlength);
4524                 origii->st_size = newlength;
4525                 filter |= FILE_NOTIFY_CHANGE_SIZE;
4526             }
4527 
4528             fcb->inode_item_changed = true;
4529         } else {
4530             fileref->parent->fcb->inode_item_changed = true;
4531 
4532             if (changed_length)
4533                 filter |= FILE_NOTIFY_CHANGE_STREAM_SIZE;
4534 
4535             filter |= FILE_NOTIFY_CHANGE_STREAM_WRITE;
4536         }
4537 
4538         if (!ccb->user_set_write_time) {
4539             origii->st_mtime = now;
4540             filter |= FILE_NOTIFY_CHANGE_LAST_WRITE;
4541         }
4542 
4543         mark_fcb_dirty(fcb->ads ? fileref->parent->fcb : fcb);
4544     }
4545 
4546     if (changed_length) {
4547         CC_FILE_SIZES ccfs;
4548 
4549         ccfs.AllocationSize = fcb->Header.AllocationSize;
4550         ccfs.FileSize = fcb->Header.FileSize;
4551         ccfs.ValidDataLength = fcb->Header.ValidDataLength;
4552 
4553         _SEH2_TRY {
4554             CcSetFileSizes(FileObject, &ccfs);
4555         } _SEH2_EXCEPT (EXCEPTION_EXECUTE_HANDLER) {
4556             Status = _SEH2_GetExceptionCode();
4557             goto end;
4558         } _SEH2_END;
4559     }
4560 
4561     fcb->subvol->root_item.ctransid = Vcb->superblock.generation;
4562     fcb->subvol->root_item.ctime = now;
4563 
4564     Status = STATUS_SUCCESS;
4565     Irp->IoStatus.Information = *length;
4566 
4567     if (filter != 0)
4568         queue_notification_fcb(fcb->ads ? fileref->parent : fileref, filter, fcb->ads ? FILE_ACTION_MODIFIED_STREAM : FILE_ACTION_MODIFIED,
4569                                fcb->ads && fileref->dc ? &fileref->dc->name : NULL);
4570 
4571 end:
4572     if (NT_SUCCESS(Status) && FileObject->Flags & FO_SYNCHRONOUS_IO && !paging_io) {
4573         TRACE("CurrentByteOffset was: %I64x\n", FileObject->CurrentByteOffset.QuadPart);
4574         FileObject->CurrentByteOffset.QuadPart = offset.QuadPart + (NT_SUCCESS(Status) ? *length : 0);
4575         TRACE("CurrentByteOffset now: %I64x\n", FileObject->CurrentByteOffset.QuadPart);
4576     }
4577 
4578     if (acquired_fcb_lock)
4579         ExReleaseResourceLite(fcb->Header.Resource);
4580 
4581     if (acquired_tree_lock)
4582         ExReleaseResourceLite(&Vcb->tree_lock);
4583 
4584     if (paging_lock)
4585         ExReleaseResourceLite(fcb->Header.PagingIoResource);
4586 
4587     return Status;
4588 }
4589 
4590 __attribute__((nonnull(1,2)))
write_file(device_extension * Vcb,PIRP Irp,bool wait,bool deferred_write)4591 NTSTATUS write_file(device_extension* Vcb, PIRP Irp, bool wait, bool deferred_write) {
4592     PIO_STACK_LOCATION IrpSp = IoGetCurrentIrpStackLocation(Irp);
4593     void* buf;
4594     NTSTATUS Status;
4595     LARGE_INTEGER offset = IrpSp->Parameters.Write.ByteOffset;
4596     PFILE_OBJECT FileObject = IrpSp->FileObject;
4597     fcb* fcb = FileObject ? FileObject->FsContext : NULL;
4598     LIST_ENTRY rollback;
4599 
4600     InitializeListHead(&rollback);
4601 
4602     TRACE("write\n");
4603 
4604     Irp->IoStatus.Information = 0;
4605 
4606     TRACE("offset = %I64x\n", offset.QuadPart);
4607     TRACE("length = %lx\n", IrpSp->Parameters.Write.Length);
4608 
4609     if (!Irp->AssociatedIrp.SystemBuffer) {
4610         buf = map_user_buffer(Irp, fcb && fcb->Header.Flags2 & FSRTL_FLAG2_IS_PAGING_FILE ? HighPagePriority : NormalPagePriority);
4611 
4612         if (Irp->MdlAddress && !buf) {
4613             ERR("MmGetSystemAddressForMdlSafe returned NULL\n");
4614             Status = STATUS_INSUFFICIENT_RESOURCES;
4615             goto exit;
4616         }
4617     } else
4618         buf = Irp->AssociatedIrp.SystemBuffer;
4619 
4620     TRACE("buf = %p\n", buf);
4621 
4622     if (fcb && !(Irp->Flags & IRP_PAGING_IO) && !FsRtlCheckLockForWriteAccess(&fcb->lock, Irp)) {
4623         WARN("tried to write to locked region\n");
4624         Status = STATUS_FILE_LOCK_CONFLICT;
4625         goto exit;
4626     }
4627 
4628     Status = write_file2(Vcb, Irp, offset, buf, &IrpSp->Parameters.Write.Length, Irp->Flags & IRP_PAGING_IO, Irp->Flags & IRP_NOCACHE,
4629                          wait, deferred_write, true, &rollback);
4630 
4631     if (Status == STATUS_PENDING)
4632         goto exit;
4633     else if (!NT_SUCCESS(Status)) {
4634         ERR("write_file2 returned %08lx\n", Status);
4635         goto exit;
4636     }
4637 
4638     if (NT_SUCCESS(Status)) {
4639         if (diskacc && Status != STATUS_PENDING && Irp->Flags & IRP_NOCACHE) {
4640             PETHREAD thread = NULL;
4641 
4642             if (Irp->Tail.Overlay.Thread && !IoIsSystemThread(Irp->Tail.Overlay.Thread))
4643                 thread = Irp->Tail.Overlay.Thread;
4644             else if (!IoIsSystemThread(PsGetCurrentThread()))
4645                 thread = PsGetCurrentThread();
4646             else if (IoIsSystemThread(PsGetCurrentThread()) && IoGetTopLevelIrp() == Irp)
4647                 thread = PsGetCurrentThread();
4648 
4649             if (thread)
4650                 fPsUpdateDiskCounters(PsGetThreadProcess(thread), 0, IrpSp->Parameters.Write.Length, 0, 1, 0);
4651         }
4652     }
4653 
4654 exit:
4655     if (NT_SUCCESS(Status))
4656         clear_rollback(&rollback);
4657     else
4658         do_rollback(Vcb, &rollback);
4659 
4660     return Status;
4661 }
4662 
4663 _Dispatch_type_(IRP_MJ_WRITE)
_Function_class_(DRIVER_DISPATCH)4664 _Function_class_(DRIVER_DISPATCH)
4665 __attribute__((nonnull(1,2)))
4666 NTSTATUS __stdcall drv_write(IN PDEVICE_OBJECT DeviceObject, IN PIRP Irp) {
4667     NTSTATUS Status;
4668     bool top_level;
4669     PIO_STACK_LOCATION IrpSp = IoGetCurrentIrpStackLocation(Irp);
4670     device_extension* Vcb = DeviceObject->DeviceExtension;
4671     PFILE_OBJECT FileObject = IrpSp->FileObject;
4672     fcb* fcb = FileObject ? FileObject->FsContext : NULL;
4673     ccb* ccb = FileObject ? FileObject->FsContext2 : NULL;
4674     bool wait = FileObject ? IoIsOperationSynchronous(Irp) : true;
4675 
4676     FsRtlEnterFileSystem();
4677 
4678     top_level = is_top_level(Irp);
4679 
4680     if (Vcb && Vcb->type == VCB_TYPE_VOLUME) {
4681         Status = vol_write(DeviceObject, Irp);
4682         goto exit;
4683     } else if (!Vcb || Vcb->type != VCB_TYPE_FS) {
4684         Status = STATUS_INVALID_PARAMETER;
4685         goto end;
4686     }
4687 
4688     if (!fcb) {
4689         ERR("fcb was NULL\n");
4690         Status = STATUS_INVALID_PARAMETER;
4691         goto end;
4692     }
4693 
4694     if (!ccb) {
4695         ERR("ccb was NULL\n");
4696         Status = STATUS_INVALID_PARAMETER;
4697         goto end;
4698     }
4699 
4700     if (Irp->RequestorMode == UserMode && !(ccb->access & (FILE_WRITE_DATA | FILE_APPEND_DATA))) {
4701         WARN("insufficient permissions\n");
4702         Status = STATUS_ACCESS_DENIED;
4703         goto end;
4704     }
4705 
4706     if (fcb == Vcb->volume_fcb) {
4707         if (!Vcb->locked || Vcb->locked_fileobj != FileObject) {
4708             ERR("trying to write to volume when not locked, or locked with another FileObject\n");
4709             Status = STATUS_ACCESS_DENIED;
4710             goto end;
4711         }
4712 
4713         TRACE("writing directly to volume\n");
4714 
4715         IoSkipCurrentIrpStackLocation(Irp);
4716 
4717         Status = IoCallDriver(Vcb->Vpb->RealDevice, Irp);
4718         goto exit;
4719     }
4720 
4721     if (is_subvol_readonly(fcb->subvol, Irp)) {
4722         Status = STATUS_ACCESS_DENIED;
4723         goto end;
4724     }
4725 
4726     if (Vcb->readonly) {
4727         Status = STATUS_MEDIA_WRITE_PROTECTED;
4728         goto end;
4729     }
4730 
4731     _SEH2_TRY {
4732         if (IrpSp->MinorFunction & IRP_MN_COMPLETE) {
4733             CcMdlWriteComplete(IrpSp->FileObject, &IrpSp->Parameters.Write.ByteOffset, Irp->MdlAddress);
4734 
4735             Irp->MdlAddress = NULL;
4736             Status = STATUS_SUCCESS;
4737         } else {
4738             if (!(Irp->Flags & IRP_PAGING_IO))
4739                 FsRtlCheckOplock(fcb_oplock(fcb), Irp, NULL, NULL, NULL);
4740 
4741             // Don't offload jobs when doing paging IO - otherwise this can lead to
4742             // deadlocks in CcCopyWrite.
4743             if (Irp->Flags & IRP_PAGING_IO)
4744                 wait = true;
4745 
4746             Status = write_file(Vcb, Irp, wait, false);
4747         }
4748     } _SEH2_EXCEPT (EXCEPTION_EXECUTE_HANDLER) {
4749         Status = _SEH2_GetExceptionCode();
4750     } _SEH2_END;
4751 
4752 end:
4753     Irp->IoStatus.Status = Status;
4754 
4755     TRACE("wrote %Iu bytes\n", Irp->IoStatus.Information);
4756 
4757     if (Status != STATUS_PENDING)
4758         IoCompleteRequest(Irp, IO_NO_INCREMENT);
4759     else {
4760         IoMarkIrpPending(Irp);
4761 
4762         if (!add_thread_job(Vcb, Irp))
4763             Status = do_write_job(Vcb, Irp);
4764     }
4765 
4766 exit:
4767     if (top_level)
4768         IoSetTopLevelIrp(NULL);
4769 
4770     TRACE("returning %08lx\n", Status);
4771 
4772     FsRtlExitFileSystem();
4773 
4774     return Status;
4775 }
4776