1 /* Copyright (c) Mark Harmstone 2016-17
2  *
3  * This file is part of WinBtrfs.
4  *
5  * WinBtrfs is free software: you can redistribute it and/or modify
6  * it under the terms of the GNU Lesser General Public Licence as published by
7  * the Free Software Foundation, either version 3 of the Licence, or
8  * (at your option) any later version.
9  *
10  * WinBtrfs is distributed in the hope that it will be useful,
11  * but WITHOUT ANY WARRANTY; without even the implied warranty of
12  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
13  * GNU Lesser General Public Licence for more details.
14  *
15  * You should have received a copy of the GNU Lesser General Public Licence
16  * along with WinBtrfs.  If not, see <http://www.gnu.org/licenses/>. */
17 
18 #include "btrfs_drv.h"
19 #include <ata.h>
20 #include <ntddscsi.h>
21 #include <ntddstor.h>
22 
23 #define MAX_CSUM_SIZE (4096 - sizeof(tree_header) - sizeof(leaf_node))
24 
25 // #define DEBUG_WRITE_LOOPS
26 
27 typedef struct {
28     KEVENT Event;
29     IO_STATUS_BLOCK iosb;
30 } write_context;
31 
32 typedef struct {
33     EXTENT_ITEM_TREE eit;
34     uint8_t type;
35     TREE_BLOCK_REF tbr;
36 } EXTENT_ITEM_TREE2;
37 
38 typedef struct {
39     EXTENT_ITEM ei;
40     uint8_t type;
41     TREE_BLOCK_REF tbr;
42 } EXTENT_ITEM_SKINNY_METADATA;
43 
44 static NTSTATUS create_chunk(device_extension* Vcb, chunk* c, PIRP Irp);
45 static NTSTATUS update_tree_extents(device_extension* Vcb, tree* t, PIRP Irp, LIST_ENTRY* rollback);
46 
47 #ifndef _MSC_VER // not in mingw yet
48 #define DEVICE_DSM_FLAG_TRIM_NOT_FS_ALLOCATED 0x80000000
49 #endif
50 
51 _Function_class_(IO_COMPLETION_ROUTINE)
52 static NTSTATUS __stdcall write_completion(PDEVICE_OBJECT DeviceObject, PIRP Irp, PVOID conptr) {
53     write_context* context = conptr;
54 
55     UNUSED(DeviceObject);
56 
57     context->iosb = Irp->IoStatus;
58     KeSetEvent(&context->Event, 0, false);
59 
60     return STATUS_MORE_PROCESSING_REQUIRED;
61 }
62 
63 NTSTATUS write_data_phys(_In_ PDEVICE_OBJECT device, _In_ PFILE_OBJECT fileobj, _In_ uint64_t address,
64                          _In_reads_bytes_(length) void* data, _In_ uint32_t length) {
65     NTSTATUS Status;
66     LARGE_INTEGER offset;
67     PIRP Irp;
68     PIO_STACK_LOCATION IrpSp;
69     write_context context;
70 
71     TRACE("(%p, %I64x, %p, %x)\n", device, address, data, length);
72 
73     RtlZeroMemory(&context, sizeof(write_context));
74 
75     KeInitializeEvent(&context.Event, NotificationEvent, false);
76 
77     offset.QuadPart = address;
78 
79     Irp = IoAllocateIrp(device->StackSize, false);
80 
81     if (!Irp) {
82         ERR("IoAllocateIrp failed\n");
83         return STATUS_INSUFFICIENT_RESOURCES;
84     }
85 
86     IrpSp = IoGetNextIrpStackLocation(Irp);
87     IrpSp->MajorFunction = IRP_MJ_WRITE;
88     IrpSp->FileObject = fileobj;
89 
90     if (device->Flags & DO_BUFFERED_IO) {
91         Irp->AssociatedIrp.SystemBuffer = data;
92 
93         Irp->Flags = IRP_BUFFERED_IO;
94     } else if (device->Flags & DO_DIRECT_IO) {
95         Irp->MdlAddress = IoAllocateMdl(data, length, false, false, NULL);
96         if (!Irp->MdlAddress) {
97             DbgPrint("IoAllocateMdl failed\n");
98             Status = STATUS_INSUFFICIENT_RESOURCES;
99             goto exit;
100         }
101 
102         Status = STATUS_SUCCESS;
103 
104         _SEH2_TRY {
105             MmProbeAndLockPages(Irp->MdlAddress, KernelMode, IoReadAccess);
106         } _SEH2_EXCEPT (EXCEPTION_EXECUTE_HANDLER) {
107             Status = _SEH2_GetExceptionCode();
108         } _SEH2_END;
109 
110         if (!NT_SUCCESS(Status)) {
111             ERR("MmProbeAndLockPages threw exception %08x\n", Status);
112             IoFreeMdl(Irp->MdlAddress);
113             goto exit;
114         }
115     } else {
116         Irp->UserBuffer = data;
117     }
118 
119     IrpSp->Parameters.Write.Length = length;
120     IrpSp->Parameters.Write.ByteOffset = offset;
121 
122     Irp->UserIosb = &context.iosb;
123 
124     Irp->UserEvent = &context.Event;
125 
126     IoSetCompletionRoutine(Irp, write_completion, &context, true, true, true);
127 
128     Status = IoCallDriver(device, Irp);
129 
130     if (Status == STATUS_PENDING) {
131         KeWaitForSingleObject(&context.Event, Executive, KernelMode, false, NULL);
132         Status = context.iosb.Status;
133     }
134 
135     if (!NT_SUCCESS(Status)) {
136         ERR("IoCallDriver returned %08x\n", Status);
137     }
138 
139     if (device->Flags & DO_DIRECT_IO) {
140         MmUnlockPages(Irp->MdlAddress);
141         IoFreeMdl(Irp->MdlAddress);
142     }
143 
144 exit:
145     IoFreeIrp(Irp);
146 
147     return Status;
148 }
149 
150 static void add_trim_entry(device* dev, uint64_t address, uint64_t size) {
151     space* s = ExAllocatePoolWithTag(PagedPool, sizeof(space), ALLOC_TAG);
152     if (!s) {
153         ERR("out of memory\n");
154         return;
155     }
156 
157     s->address = address;
158     s->size = size;
159     dev->num_trim_entries++;
160 
161     InsertTailList(&dev->trim_list, &s->list_entry);
162 }
163 
164 static void clean_space_cache_chunk(device_extension* Vcb, chunk* c) {
165     ULONG type;
166 
167     if (Vcb->trim && !Vcb->options.no_trim) {
168         if (c->chunk_item->type & BLOCK_FLAG_DUPLICATE)
169             type = BLOCK_FLAG_DUPLICATE;
170         else if (c->chunk_item->type & BLOCK_FLAG_RAID0)
171             type = BLOCK_FLAG_RAID0;
172         else if (c->chunk_item->type & BLOCK_FLAG_RAID1)
173             type = BLOCK_FLAG_DUPLICATE;
174         else if (c->chunk_item->type & BLOCK_FLAG_RAID10)
175             type = BLOCK_FLAG_RAID10;
176         else if (c->chunk_item->type & BLOCK_FLAG_RAID5)
177             type = BLOCK_FLAG_RAID5;
178         else if (c->chunk_item->type & BLOCK_FLAG_RAID6)
179             type = BLOCK_FLAG_RAID6;
180         else // SINGLE
181             type = BLOCK_FLAG_DUPLICATE;
182     }
183 
184     while (!IsListEmpty(&c->deleting)) {
185         space* s = CONTAINING_RECORD(c->deleting.Flink, space, list_entry);
186 
187         if (Vcb->trim && !Vcb->options.no_trim && (!Vcb->options.no_barrier || !(c->chunk_item->type & BLOCK_FLAG_METADATA))) {
188             CHUNK_ITEM_STRIPE* cis = (CHUNK_ITEM_STRIPE*)&c->chunk_item[1];
189 
190             if (type == BLOCK_FLAG_DUPLICATE) {
191                 uint16_t i;
192 
193                 for (i = 0; i < c->chunk_item->num_stripes; i++) {
194                     if (c->devices[i] && c->devices[i]->devobj && !c->devices[i]->readonly && c->devices[i]->trim)
195                         add_trim_entry(c->devices[i], s->address - c->offset + cis[i].offset, s->size);
196                 }
197             } else if (type == BLOCK_FLAG_RAID0) {
198                 uint64_t startoff, endoff;
199                 uint16_t startoffstripe, endoffstripe, i;
200 
201                 get_raid0_offset(s->address - c->offset, c->chunk_item->stripe_length, c->chunk_item->num_stripes, &startoff, &startoffstripe);
202                 get_raid0_offset(s->address - c->offset + s->size - 1, c->chunk_item->stripe_length, c->chunk_item->num_stripes, &endoff, &endoffstripe);
203 
204                 for (i = 0; i < c->chunk_item->num_stripes; i++) {
205                     if (c->devices[i] && c->devices[i]->devobj && !c->devices[i]->readonly && c->devices[i]->trim) {
206                         uint64_t stripestart, stripeend;
207 
208                         if (startoffstripe > i)
209                             stripestart = startoff - (startoff % c->chunk_item->stripe_length) + c->chunk_item->stripe_length;
210                         else if (startoffstripe == i)
211                             stripestart = startoff;
212                         else
213                             stripestart = startoff - (startoff % c->chunk_item->stripe_length);
214 
215                         if (endoffstripe > i)
216                             stripeend = endoff - (endoff % c->chunk_item->stripe_length) + c->chunk_item->stripe_length;
217                         else if (endoffstripe == i)
218                             stripeend = endoff + 1;
219                         else
220                             stripeend = endoff - (endoff % c->chunk_item->stripe_length);
221 
222                         if (stripestart != stripeend)
223                             add_trim_entry(c->devices[i], stripestart + cis[i].offset, stripeend - stripestart);
224                     }
225                 }
226             } else if (type == BLOCK_FLAG_RAID10) {
227                 uint64_t startoff, endoff;
228                 uint16_t sub_stripes, startoffstripe, endoffstripe, i;
229 
230                 sub_stripes = max(1, c->chunk_item->sub_stripes);
231 
232                 get_raid0_offset(s->address - c->offset, c->chunk_item->stripe_length, c->chunk_item->num_stripes / sub_stripes, &startoff, &startoffstripe);
233                 get_raid0_offset(s->address - c->offset + s->size - 1, c->chunk_item->stripe_length, c->chunk_item->num_stripes / sub_stripes, &endoff, &endoffstripe);
234 
235                 startoffstripe *= sub_stripes;
236                 endoffstripe *= sub_stripes;
237 
238                 for (i = 0; i < c->chunk_item->num_stripes; i += sub_stripes) {
239                     ULONG j;
240                     uint64_t stripestart, stripeend;
241 
242                     if (startoffstripe > i)
243                         stripestart = startoff - (startoff % c->chunk_item->stripe_length) + c->chunk_item->stripe_length;
244                     else if (startoffstripe == i)
245                         stripestart = startoff;
246                     else
247                         stripestart = startoff - (startoff % c->chunk_item->stripe_length);
248 
249                     if (endoffstripe > i)
250                         stripeend = endoff - (endoff % c->chunk_item->stripe_length) + c->chunk_item->stripe_length;
251                     else if (endoffstripe == i)
252                         stripeend = endoff + 1;
253                     else
254                         stripeend = endoff - (endoff % c->chunk_item->stripe_length);
255 
256                     if (stripestart != stripeend) {
257                         for (j = 0; j < sub_stripes; j++) {
258                             if (c->devices[i+j] && c->devices[i+j]->devobj && !c->devices[i+j]->readonly && c->devices[i+j]->trim)
259                                 add_trim_entry(c->devices[i+j], stripestart + cis[i+j].offset, stripeend - stripestart);
260                         }
261                     }
262                 }
263             }
264             // FIXME - RAID5(?), RAID6(?)
265         }
266 
267         RemoveEntryList(&s->list_entry);
268         ExFreePool(s);
269     }
270 }
271 
272 typedef struct {
273     DEVICE_MANAGE_DATA_SET_ATTRIBUTES* dmdsa;
274     ATA_PASS_THROUGH_EX apte;
275     PIRP Irp;
276     IO_STATUS_BLOCK iosb;
277 #ifdef DEBUG_TRIM_EMULATION
278     PMDL mdl;
279     void* buf;
280 #endif
281 } ioctl_context_stripe;
282 
283 typedef struct {
284     KEVENT Event;
285     LONG left;
286     ioctl_context_stripe* stripes;
287 } ioctl_context;
288 
289 _Function_class_(IO_COMPLETION_ROUTINE)
290 static NTSTATUS __stdcall ioctl_completion(PDEVICE_OBJECT DeviceObject, PIRP Irp, PVOID conptr) {
291     ioctl_context* context = (ioctl_context*)conptr;
292     LONG left2 = InterlockedDecrement(&context->left);
293 
294     UNUSED(DeviceObject);
295     UNUSED(Irp);
296 
297     if (left2 == 0)
298         KeSetEvent(&context->Event, 0, false);
299 
300     return STATUS_MORE_PROCESSING_REQUIRED;
301 }
302 
303 #ifdef DEBUG_TRIM_EMULATION
304 static void trim_emulation(device* dev) {
305     LIST_ENTRY* le;
306     ioctl_context context;
307     unsigned int i = 0, count = 0;
308 
309     le = dev->trim_list.Flink;
310     while (le != &dev->trim_list) {
311         count++;
312         le = le->Flink;
313     }
314 
315     context.left = count;
316 
317     KeInitializeEvent(&context.Event, NotificationEvent, false);
318 
319     context.stripes = ExAllocatePoolWithTag(NonPagedPool, sizeof(ioctl_context_stripe) * context.left, ALLOC_TAG);
320     if (!context.stripes) {
321         ERR("out of memory\n");
322         return;
323     }
324 
325     RtlZeroMemory(context.stripes, sizeof(ioctl_context_stripe) * context.left);
326 
327     i = 0;
328     le = dev->trim_list.Flink;
329     while (le != &dev->trim_list) {
330         ioctl_context_stripe* stripe = &context.stripes[i];
331         space* s = CONTAINING_RECORD(le, space, list_entry);
332 
333         WARN("(%I64x, %I64x)\n", s->address, s->size);
334 
335         stripe->Irp = IoAllocateIrp(dev->devobj->StackSize, false);
336 
337         if (!stripe->Irp) {
338             ERR("IoAllocateIrp failed\n");
339         } else {
340             PIO_STACK_LOCATION IrpSp = IoGetNextIrpStackLocation(stripe->Irp);
341             IrpSp->MajorFunction = IRP_MJ_WRITE;
342             IrpSp->FileObject = dev->fileobj;
343 
344             stripe->buf = ExAllocatePoolWithTag(NonPagedPool, (uint32_t)s->size, ALLOC_TAG);
345 
346             if (!stripe->buf) {
347                 ERR("out of memory\n");
348             } else {
349                 RtlZeroMemory(stripe->buf, (uint32_t)s->size); // FIXME - randomize instead?
350 
351                 stripe->mdl = IoAllocateMdl(stripe->buf, (uint32_t)s->size, false, false, NULL);
352 
353                 if (!stripe->mdl) {
354                     ERR("IoAllocateMdl failed\n");
355                 } else {
356                     MmBuildMdlForNonPagedPool(stripe->mdl);
357 
358                     stripe->Irp->MdlAddress = stripe->mdl;
359 
360                     IrpSp->Parameters.Write.ByteOffset.QuadPart = s->address;
361                     IrpSp->Parameters.Write.Length = s->size;
362 
363                     stripe->Irp->UserIosb = &stripe->iosb;
364 
365                     IoSetCompletionRoutine(stripe->Irp, ioctl_completion, &context, true, true, true);
366 
367                     IoCallDriver(dev->devobj, stripe->Irp);
368                 }
369             }
370         }
371 
372         i++;
373 
374         le = le->Flink;
375     }
376 
377     KeWaitForSingleObject(&context.Event, Executive, KernelMode, false, NULL);
378 
379     for (i = 0; i < count; i++) {
380         ioctl_context_stripe* stripe = &context.stripes[i];
381 
382         if (stripe->mdl)
383             IoFreeMdl(stripe->mdl);
384 
385         if (stripe->buf)
386             ExFreePool(stripe->buf);
387     }
388 
389     ExFreePool(context.stripes);
390 }
391 #endif
392 
393 static void clean_space_cache(device_extension* Vcb) {
394     LIST_ENTRY* le;
395     chunk* c;
396 #ifndef DEBUG_TRIM_EMULATION
397     ULONG num;
398 #endif
399 
400     TRACE("(%p)\n", Vcb);
401 
402     ExAcquireResourceSharedLite(&Vcb->chunk_lock, true);
403 
404     le = Vcb->chunks.Flink;
405     while (le != &Vcb->chunks) {
406         c = CONTAINING_RECORD(le, chunk, list_entry);
407 
408         if (c->space_changed) {
409             acquire_chunk_lock(c, Vcb);
410 
411             if (c->space_changed)
412                 clean_space_cache_chunk(Vcb, c);
413 
414             c->space_changed = false;
415 
416             release_chunk_lock(c, Vcb);
417         }
418 
419         le = le->Flink;
420     }
421 
422     ExReleaseResourceLite(&Vcb->chunk_lock);
423 
424     if (Vcb->trim && !Vcb->options.no_trim) {
425 #ifndef DEBUG_TRIM_EMULATION
426         ioctl_context context;
427         ULONG total_num;
428 
429         context.left = 0;
430 
431         le = Vcb->devices.Flink;
432         while (le != &Vcb->devices) {
433             device* dev = CONTAINING_RECORD(le, device, list_entry);
434 
435             if (dev->devobj && !dev->readonly && dev->trim && dev->num_trim_entries > 0)
436                 context.left++;
437 
438             le = le->Flink;
439         }
440 
441         if (context.left == 0)
442             return;
443 
444         total_num = context.left;
445         num = 0;
446 
447         KeInitializeEvent(&context.Event, NotificationEvent, false);
448 
449         context.stripes = ExAllocatePoolWithTag(NonPagedPool, sizeof(ioctl_context_stripe) * context.left, ALLOC_TAG);
450         if (!context.stripes) {
451             ERR("out of memory\n");
452             return;
453         }
454 
455         RtlZeroMemory(context.stripes, sizeof(ioctl_context_stripe) * context.left);
456 #endif
457 
458         le = Vcb->devices.Flink;
459         while (le != &Vcb->devices) {
460             device* dev = CONTAINING_RECORD(le, device, list_entry);
461 
462             if (dev->devobj && !dev->readonly && dev->trim && dev->num_trim_entries > 0) {
463 #ifdef DEBUG_TRIM_EMULATION
464                 trim_emulation(dev);
465 #else
466                 LIST_ENTRY* le2;
467                 ioctl_context_stripe* stripe = &context.stripes[num];
468                 DEVICE_DATA_SET_RANGE* ranges;
469                 ULONG datalen = (ULONG)sector_align(sizeof(DEVICE_MANAGE_DATA_SET_ATTRIBUTES), sizeof(uint64_t)) + (dev->num_trim_entries * sizeof(DEVICE_DATA_SET_RANGE)), i;
470                 PIO_STACK_LOCATION IrpSp;
471 
472                 stripe->dmdsa = ExAllocatePoolWithTag(PagedPool, datalen, ALLOC_TAG);
473                 if (!stripe->dmdsa) {
474                     ERR("out of memory\n");
475                     goto nextdev;
476                 }
477 
478                 stripe->dmdsa->Size = sizeof(DEVICE_MANAGE_DATA_SET_ATTRIBUTES);
479                 stripe->dmdsa->Action = DeviceDsmAction_Trim;
480                 stripe->dmdsa->Flags = DEVICE_DSM_FLAG_TRIM_NOT_FS_ALLOCATED;
481                 stripe->dmdsa->ParameterBlockOffset = 0;
482                 stripe->dmdsa->ParameterBlockLength = 0;
483                 stripe->dmdsa->DataSetRangesOffset = (ULONG)sector_align(sizeof(DEVICE_MANAGE_DATA_SET_ATTRIBUTES), sizeof(uint64_t));
484                 stripe->dmdsa->DataSetRangesLength = dev->num_trim_entries * sizeof(DEVICE_DATA_SET_RANGE);
485 
486                 ranges = (DEVICE_DATA_SET_RANGE*)((uint8_t*)stripe->dmdsa + stripe->dmdsa->DataSetRangesOffset);
487 
488                 i = 0;
489 
490                 le2 = dev->trim_list.Flink;
491                 while (le2 != &dev->trim_list) {
492                     space* s = CONTAINING_RECORD(le2, space, list_entry);
493 
494                     ranges[i].StartingOffset = s->address;
495                     ranges[i].LengthInBytes = s->size;
496                     i++;
497 
498                     le2 = le2->Flink;
499                 }
500 
501                 stripe->Irp = IoAllocateIrp(dev->devobj->StackSize, false);
502 
503                 if (!stripe->Irp) {
504                     ERR("IoAllocateIrp failed\n");
505                     goto nextdev;
506                 }
507 
508                 IrpSp = IoGetNextIrpStackLocation(stripe->Irp);
509                 IrpSp->MajorFunction = IRP_MJ_DEVICE_CONTROL;
510                 IrpSp->FileObject = dev->fileobj;
511 
512                 IrpSp->Parameters.DeviceIoControl.IoControlCode = IOCTL_STORAGE_MANAGE_DATA_SET_ATTRIBUTES;
513                 IrpSp->Parameters.DeviceIoControl.InputBufferLength = datalen;
514                 IrpSp->Parameters.DeviceIoControl.OutputBufferLength = 0;
515 
516                 stripe->Irp->AssociatedIrp.SystemBuffer = stripe->dmdsa;
517                 stripe->Irp->Flags |= IRP_BUFFERED_IO;
518                 stripe->Irp->UserBuffer = NULL;
519                 stripe->Irp->UserIosb = &stripe->iosb;
520 
521                 IoSetCompletionRoutine(stripe->Irp, ioctl_completion, &context, true, true, true);
522 
523                 IoCallDriver(dev->devobj, stripe->Irp);
524 
525 nextdev:
526 #endif
527                 while (!IsListEmpty(&dev->trim_list)) {
528                     space* s = CONTAINING_RECORD(RemoveHeadList(&dev->trim_list), space, list_entry);
529                     ExFreePool(s);
530                 }
531 
532                 dev->num_trim_entries = 0;
533 
534 #ifndef DEBUG_TRIM_EMULATION
535                 num++;
536 #endif
537             }
538 
539             le = le->Flink;
540         }
541 
542 #ifndef DEBUG_TRIM_EMULATION
543         KeWaitForSingleObject(&context.Event, Executive, KernelMode, false, NULL);
544 
545         for (num = 0; num < total_num; num++) {
546             if (context.stripes[num].dmdsa)
547                 ExFreePool(context.stripes[num].dmdsa);
548 
549             if (context.stripes[num].Irp)
550                 IoFreeIrp(context.stripes[num].Irp);
551         }
552 
553         ExFreePool(context.stripes);
554 #endif
555     }
556 }
557 
558 static bool trees_consistent(device_extension* Vcb) {
559     ULONG maxsize = Vcb->superblock.node_size - sizeof(tree_header);
560     LIST_ENTRY* le;
561 
562     le = Vcb->trees.Flink;
563     while (le != &Vcb->trees) {
564         tree* t = CONTAINING_RECORD(le, tree, list_entry);
565 
566         if (t->write) {
567             if (t->header.num_items == 0 && t->parent) {
568 #ifdef DEBUG_WRITE_LOOPS
569                 ERR("empty tree found, looping again\n");
570 #endif
571                 return false;
572             }
573 
574             if (t->size > maxsize) {
575 #ifdef DEBUG_WRITE_LOOPS
576                 ERR("overlarge tree found (%u > %u), looping again\n", t->size, maxsize);
577 #endif
578                 return false;
579             }
580 
581             if (!t->has_new_address) {
582 #ifdef DEBUG_WRITE_LOOPS
583                 ERR("tree found without new address, looping again\n");
584 #endif
585                 return false;
586             }
587         }
588 
589         le = le->Flink;
590     }
591 
592     return true;
593 }
594 
595 static NTSTATUS add_parents(device_extension* Vcb, PIRP Irp) {
596     ULONG level;
597     LIST_ENTRY* le;
598 
599     for (level = 0; level <= 255; level++) {
600         bool nothing_found = true;
601 
602         TRACE("level = %u\n", level);
603 
604         le = Vcb->trees.Flink;
605         while (le != &Vcb->trees) {
606             tree* t = CONTAINING_RECORD(le, tree, list_entry);
607 
608             if (t->write && t->header.level == level) {
609                 TRACE("tree %p: root = %I64x, level = %x, parent = %p\n", t, t->header.tree_id, t->header.level, t->parent);
610 
611                 nothing_found = false;
612 
613                 if (t->parent) {
614                     if (!t->parent->write)
615                         TRACE("adding tree %p (level %x)\n", t->parent, t->header.level);
616 
617                     t->parent->write = true;
618                 } else if (t->root != Vcb->root_root && t->root != Vcb->chunk_root) {
619                     KEY searchkey;
620                     traverse_ptr tp;
621                     NTSTATUS Status;
622 #ifdef __REACTOS__
623                     tree* t2;
624 #endif
625 
626                     searchkey.obj_id = t->root->id;
627                     searchkey.obj_type = TYPE_ROOT_ITEM;
628                     searchkey.offset = 0xffffffffffffffff;
629 
630                     Status = find_item(Vcb, Vcb->root_root, &tp, &searchkey, false, Irp);
631                     if (!NT_SUCCESS(Status)) {
632                         ERR("error - find_item returned %08x\n", Status);
633                         return Status;
634                     }
635 
636                     if (tp.item->key.obj_id != searchkey.obj_id || tp.item->key.obj_type != searchkey.obj_type) {
637                         ERR("could not find ROOT_ITEM for tree %I64x\n", searchkey.obj_id);
638                         return STATUS_INTERNAL_ERROR;
639                     }
640 
641                     if (tp.item->size < sizeof(ROOT_ITEM)) { // if not full length, delete and create new entry
642                         ROOT_ITEM* ri = ExAllocatePoolWithTag(PagedPool, sizeof(ROOT_ITEM), ALLOC_TAG);
643 
644                         if (!ri) {
645                             ERR("out of memory\n");
646                             return STATUS_INSUFFICIENT_RESOURCES;
647                         }
648 
649                         RtlCopyMemory(ri, &t->root->root_item, sizeof(ROOT_ITEM));
650 
651                         Status = delete_tree_item(Vcb, &tp);
652                         if (!NT_SUCCESS(Status)) {
653                             ERR("delete_tree_item returned %08x\n", Status);
654                             ExFreePool(ri);
655                             return Status;
656                         }
657 
658                         Status = insert_tree_item(Vcb, Vcb->root_root, tp.item->key.obj_id, tp.item->key.obj_type, tp.item->key.offset, ri, sizeof(ROOT_ITEM), NULL, Irp);
659                         if (!NT_SUCCESS(Status)) {
660                             ERR("insert_tree_item returned %08x\n", Status);
661                             ExFreePool(ri);
662                             return Status;
663                         }
664                     }
665 
666 #ifndef __REACTOS__
667                     tree* t2 = tp.tree;
668 #else
669                     t2 = tp.tree;
670 #endif
671                     while (t2) {
672                         t2->write = true;
673 
674                         t2 = t2->parent;
675                     }
676                 }
677             }
678 
679             le = le->Flink;
680         }
681 
682         if (nothing_found)
683             break;
684     }
685 
686     return STATUS_SUCCESS;
687 }
688 
689 static void add_parents_to_cache(tree* t) {
690     while (t->parent) {
691         t = t->parent;
692         t->write = true;
693     }
694 }
695 
696 static bool insert_tree_extent_skinny(device_extension* Vcb, uint8_t level, uint64_t root_id, chunk* c, uint64_t address, PIRP Irp, LIST_ENTRY* rollback) {
697     NTSTATUS Status;
698     EXTENT_ITEM_SKINNY_METADATA* eism;
699     traverse_ptr insert_tp;
700 
701     eism = ExAllocatePoolWithTag(PagedPool, sizeof(EXTENT_ITEM_SKINNY_METADATA), ALLOC_TAG);
702     if (!eism) {
703         ERR("out of memory\n");
704         return false;
705     }
706 
707     eism->ei.refcount = 1;
708     eism->ei.generation = Vcb->superblock.generation;
709     eism->ei.flags = EXTENT_ITEM_TREE_BLOCK;
710     eism->type = TYPE_TREE_BLOCK_REF;
711     eism->tbr.offset = root_id;
712 
713     Status = insert_tree_item(Vcb, Vcb->extent_root, address, TYPE_METADATA_ITEM, level, eism, sizeof(EXTENT_ITEM_SKINNY_METADATA), &insert_tp, Irp);
714     if (!NT_SUCCESS(Status)) {
715         ERR("insert_tree_item returned %08x\n", Status);
716         ExFreePool(eism);
717         return false;
718     }
719 
720     acquire_chunk_lock(c, Vcb);
721 
722     space_list_subtract(c, false, address, Vcb->superblock.node_size, rollback);
723 
724     release_chunk_lock(c, Vcb);
725 
726     add_parents_to_cache(insert_tp.tree);
727 
728     return true;
729 }
730 
731 bool find_metadata_address_in_chunk(device_extension* Vcb, chunk* c, uint64_t* address) {
732     LIST_ENTRY* le;
733     space* s;
734 
735     TRACE("(%p, %I64x, %p)\n", Vcb, c->offset, address);
736 
737     if (Vcb->superblock.node_size > c->chunk_item->size - c->used)
738         return false;
739 
740     if (!c->cache_loaded) {
741         NTSTATUS Status = load_cache_chunk(Vcb, c, NULL);
742 
743         if (!NT_SUCCESS(Status)) {
744             ERR("load_cache_chunk returned %08x\n", Status);
745             return false;
746         }
747     }
748 
749     if (IsListEmpty(&c->space_size))
750         return false;
751 
752     if (!c->last_alloc_set) {
753         s = CONTAINING_RECORD(c->space.Blink, space, list_entry);
754 
755         c->last_alloc = s->address;
756         c->last_alloc_set = true;
757 
758         if (s->size >= Vcb->superblock.node_size) {
759             *address = s->address;
760             c->last_alloc += Vcb->superblock.node_size;
761             return true;
762         }
763     }
764 
765     le = c->space.Flink;
766     while (le != &c->space) {
767         s = CONTAINING_RECORD(le, space, list_entry);
768 
769         if (s->address <= c->last_alloc && s->address + s->size >= c->last_alloc + Vcb->superblock.node_size) {
770             *address = c->last_alloc;
771             c->last_alloc += Vcb->superblock.node_size;
772             return true;
773         }
774 
775         le = le->Flink;
776     }
777 
778     le = c->space_size.Flink;
779     while (le != &c->space_size) {
780         s = CONTAINING_RECORD(le, space, list_entry_size);
781 
782         if (s->size == Vcb->superblock.node_size) {
783             *address = s->address;
784             c->last_alloc = s->address + Vcb->superblock.node_size;
785             return true;
786         } else if (s->size < Vcb->superblock.node_size) {
787             if (le == c->space_size.Flink)
788                 return false;
789 
790             s = CONTAINING_RECORD(le->Blink, space, list_entry_size);
791 
792             *address = s->address;
793             c->last_alloc = s->address + Vcb->superblock.node_size;
794 
795             return true;
796         }
797 
798         le = le->Flink;
799     }
800 
801     s = CONTAINING_RECORD(c->space_size.Blink, space, list_entry_size);
802 
803     if (s->size > Vcb->superblock.node_size) {
804         *address = s->address;
805         c->last_alloc = s->address + Vcb->superblock.node_size;
806         return true;
807     }
808 
809     return false;
810 }
811 
812 static bool insert_tree_extent(device_extension* Vcb, uint8_t level, uint64_t root_id, chunk* c, uint64_t* new_address, PIRP Irp, LIST_ENTRY* rollback) {
813     NTSTATUS Status;
814     uint64_t address;
815     EXTENT_ITEM_TREE2* eit2;
816     traverse_ptr insert_tp;
817 
818     TRACE("(%p, %x, %I64x, %p, %p, %p, %p)\n", Vcb, level, root_id, c, new_address, rollback);
819 
820     if (!find_metadata_address_in_chunk(Vcb, c, &address))
821         return false;
822 
823     if (Vcb->superblock.incompat_flags & BTRFS_INCOMPAT_FLAGS_SKINNY_METADATA) {
824         bool b = insert_tree_extent_skinny(Vcb, level, root_id, c, address, Irp, rollback);
825 
826         if (b)
827             *new_address = address;
828 
829         return b;
830     }
831 
832     eit2 = ExAllocatePoolWithTag(PagedPool, sizeof(EXTENT_ITEM_TREE2), ALLOC_TAG);
833     if (!eit2) {
834         ERR("out of memory\n");
835         return false;
836     }
837 
838     eit2->eit.extent_item.refcount = 1;
839     eit2->eit.extent_item.generation = Vcb->superblock.generation;
840     eit2->eit.extent_item.flags = EXTENT_ITEM_TREE_BLOCK;
841     eit2->eit.level = level;
842     eit2->type = TYPE_TREE_BLOCK_REF;
843     eit2->tbr.offset = root_id;
844 
845     Status = insert_tree_item(Vcb, Vcb->extent_root, address, TYPE_EXTENT_ITEM, Vcb->superblock.node_size, eit2, sizeof(EXTENT_ITEM_TREE2), &insert_tp, Irp);
846     if (!NT_SUCCESS(Status)) {
847         ERR("insert_tree_item returned %08x\n", Status);
848         ExFreePool(eit2);
849         return false;
850     }
851 
852     acquire_chunk_lock(c, Vcb);
853 
854     space_list_subtract(c, false, address, Vcb->superblock.node_size, rollback);
855 
856     release_chunk_lock(c, Vcb);
857 
858     add_parents_to_cache(insert_tp.tree);
859 
860     *new_address = address;
861 
862     return true;
863 }
864 
865 NTSTATUS get_tree_new_address(device_extension* Vcb, tree* t, PIRP Irp, LIST_ENTRY* rollback) {
866     NTSTATUS Status;
867     chunk *origchunk = NULL, *c;
868     LIST_ENTRY* le;
869     uint64_t flags, addr;
870 
871     if (t->root->id == BTRFS_ROOT_CHUNK)
872         flags = Vcb->system_flags;
873     else
874         flags = Vcb->metadata_flags;
875 
876     if (t->has_address) {
877         origchunk = get_chunk_from_address(Vcb, t->header.address);
878 
879         if (origchunk && !origchunk->readonly && !origchunk->reloc && origchunk->chunk_item->type == flags &&
880             insert_tree_extent(Vcb, t->header.level, t->root->id, origchunk, &addr, Irp, rollback)) {
881             t->new_address = addr;
882             t->has_new_address = true;
883             return STATUS_SUCCESS;
884         }
885     }
886 
887     ExAcquireResourceExclusiveLite(&Vcb->chunk_lock, true);
888 
889     le = Vcb->chunks.Flink;
890     while (le != &Vcb->chunks) {
891         c = CONTAINING_RECORD(le, chunk, list_entry);
892 
893         if (!c->readonly && !c->reloc) {
894             acquire_chunk_lock(c, Vcb);
895 
896             if (c != origchunk && c->chunk_item->type == flags && (c->chunk_item->size - c->used) >= Vcb->superblock.node_size) {
897                 if (insert_tree_extent(Vcb, t->header.level, t->root->id, c, &addr, Irp, rollback)) {
898                     release_chunk_lock(c, Vcb);
899                     ExReleaseResourceLite(&Vcb->chunk_lock);
900                     t->new_address = addr;
901                     t->has_new_address = true;
902                     return STATUS_SUCCESS;
903                 }
904             }
905 
906             release_chunk_lock(c, Vcb);
907         }
908 
909         le = le->Flink;
910     }
911 
912     // allocate new chunk if necessary
913 
914     Status = alloc_chunk(Vcb, flags, &c, false);
915 
916     if (!NT_SUCCESS(Status)) {
917         ERR("alloc_chunk returned %08x\n", Status);
918         ExReleaseResourceLite(&Vcb->chunk_lock);
919         return Status;
920     }
921 
922     acquire_chunk_lock(c, Vcb);
923 
924     if ((c->chunk_item->size - c->used) >= Vcb->superblock.node_size) {
925         if (insert_tree_extent(Vcb, t->header.level, t->root->id, c, &addr, Irp, rollback)) {
926             release_chunk_lock(c, Vcb);
927             ExReleaseResourceLite(&Vcb->chunk_lock);
928             t->new_address = addr;
929             t->has_new_address = true;
930             return STATUS_SUCCESS;
931         }
932     }
933 
934     release_chunk_lock(c, Vcb);
935 
936     ExReleaseResourceLite(&Vcb->chunk_lock);
937 
938     ERR("couldn't find any metadata chunks with %x bytes free\n", Vcb->superblock.node_size);
939 
940     return STATUS_DISK_FULL;
941 }
942 
943 static NTSTATUS reduce_tree_extent(device_extension* Vcb, uint64_t address, tree* t, uint64_t parent_root, uint8_t level, PIRP Irp, LIST_ENTRY* rollback) {
944     NTSTATUS Status;
945     uint64_t rc, root;
946 
947     TRACE("(%p, %I64x, %p)\n", Vcb, address, t);
948 
949     rc = get_extent_refcount(Vcb, address, Vcb->superblock.node_size, Irp);
950     if (rc == 0) {
951         ERR("error - refcount for extent %I64x was 0\n", address);
952         return STATUS_INTERNAL_ERROR;
953     }
954 
955     if (!t || t->parent)
956         root = parent_root;
957     else
958         root = t->header.tree_id;
959 
960     Status = decrease_extent_refcount_tree(Vcb, address, Vcb->superblock.node_size, root, level, Irp);
961     if (!NT_SUCCESS(Status)) {
962         ERR("decrease_extent_refcount_tree returned %08x\n", Status);
963         return Status;
964     }
965 
966     if (rc == 1) {
967         chunk* c = get_chunk_from_address(Vcb, address);
968 
969         if (c) {
970             acquire_chunk_lock(c, Vcb);
971 
972             if (!c->cache_loaded) {
973                 Status = load_cache_chunk(Vcb, c, NULL);
974 
975                 if (!NT_SUCCESS(Status)) {
976                     ERR("load_cache_chunk returned %08x\n", Status);
977                     release_chunk_lock(c, Vcb);
978                     return Status;
979                 }
980             }
981 
982             c->used -= Vcb->superblock.node_size;
983 
984             space_list_add(c, address, Vcb->superblock.node_size, rollback);
985 
986             release_chunk_lock(c, Vcb);
987         } else
988             ERR("could not find chunk for address %I64x\n", address);
989     }
990 
991     return STATUS_SUCCESS;
992 }
993 
994 static NTSTATUS add_changed_extent_ref_edr(changed_extent* ce, EXTENT_DATA_REF* edr, bool old) {
995     LIST_ENTRY *le2, *list;
996     changed_extent_ref* cer;
997 
998     list = old ? &ce->old_refs : &ce->refs;
999 
1000     le2 = list->Flink;
1001     while (le2 != list) {
1002         cer = CONTAINING_RECORD(le2, changed_extent_ref, list_entry);
1003 
1004         if (cer->type == TYPE_EXTENT_DATA_REF && cer->edr.root == edr->root && cer->edr.objid == edr->objid && cer->edr.offset == edr->offset) {
1005             cer->edr.count += edr->count;
1006             goto end;
1007         }
1008 
1009         le2 = le2->Flink;
1010     }
1011 
1012     cer = ExAllocatePoolWithTag(PagedPool, sizeof(changed_extent_ref), ALLOC_TAG);
1013     if (!cer) {
1014         ERR("out of memory\n");
1015         return STATUS_INSUFFICIENT_RESOURCES;
1016     }
1017 
1018     cer->type = TYPE_EXTENT_DATA_REF;
1019     RtlCopyMemory(&cer->edr, edr, sizeof(EXTENT_DATA_REF));
1020     InsertTailList(list, &cer->list_entry);
1021 
1022 end:
1023     if (old)
1024         ce->old_count += edr->count;
1025     else
1026         ce->count += edr->count;
1027 
1028     return STATUS_SUCCESS;
1029 }
1030 
1031 static NTSTATUS add_changed_extent_ref_sdr(changed_extent* ce, SHARED_DATA_REF* sdr, bool old) {
1032     LIST_ENTRY *le2, *list;
1033     changed_extent_ref* cer;
1034 
1035     list = old ? &ce->old_refs : &ce->refs;
1036 
1037     le2 = list->Flink;
1038     while (le2 != list) {
1039         cer = CONTAINING_RECORD(le2, changed_extent_ref, list_entry);
1040 
1041         if (cer->type == TYPE_SHARED_DATA_REF && cer->sdr.offset == sdr->offset) {
1042             cer->sdr.count += sdr->count;
1043             goto end;
1044         }
1045 
1046         le2 = le2->Flink;
1047     }
1048 
1049     cer = ExAllocatePoolWithTag(PagedPool, sizeof(changed_extent_ref), ALLOC_TAG);
1050     if (!cer) {
1051         ERR("out of memory\n");
1052         return STATUS_INSUFFICIENT_RESOURCES;
1053     }
1054 
1055     cer->type = TYPE_SHARED_DATA_REF;
1056     RtlCopyMemory(&cer->sdr, sdr, sizeof(SHARED_DATA_REF));
1057     InsertTailList(list, &cer->list_entry);
1058 
1059 end:
1060     if (old)
1061         ce->old_count += sdr->count;
1062     else
1063         ce->count += sdr->count;
1064 
1065     return STATUS_SUCCESS;
1066 }
1067 
1068 static bool shared_tree_is_unique(device_extension* Vcb, tree* t, PIRP Irp, LIST_ENTRY* rollback) {
1069     KEY searchkey;
1070     traverse_ptr tp;
1071     NTSTATUS Status;
1072 
1073     if (!t->updated_extents && t->has_address) {
1074         Status = update_tree_extents(Vcb, t, Irp, rollback);
1075         if (!NT_SUCCESS(Status)) {
1076             ERR("update_tree_extents returned %08x\n", Status);
1077             return false;
1078         }
1079     }
1080 
1081     searchkey.obj_id = t->header.address;
1082     searchkey.obj_type = Vcb->superblock.incompat_flags & BTRFS_INCOMPAT_FLAGS_SKINNY_METADATA ? TYPE_METADATA_ITEM : TYPE_EXTENT_ITEM;
1083     searchkey.offset = 0xffffffffffffffff;
1084 
1085     Status = find_item(Vcb, Vcb->extent_root, &tp, &searchkey, false, Irp);
1086     if (!NT_SUCCESS(Status)) {
1087         ERR("error - find_item returned %08x\n", Status);
1088         return false;
1089     }
1090 
1091     if (tp.item->key.obj_id == t->header.address && (tp.item->key.obj_type == TYPE_METADATA_ITEM || tp.item->key.obj_type == TYPE_EXTENT_ITEM))
1092         return false;
1093     else
1094         return true;
1095 }
1096 
1097 static NTSTATUS update_tree_extents(device_extension* Vcb, tree* t, PIRP Irp, LIST_ENTRY* rollback) {
1098     NTSTATUS Status;
1099     uint64_t rc = get_extent_refcount(Vcb, t->header.address, Vcb->superblock.node_size, Irp);
1100     uint64_t flags = get_extent_flags(Vcb, t->header.address, Irp);
1101 
1102     if (rc == 0) {
1103         ERR("refcount for extent %I64x was 0\n", t->header.address);
1104         return STATUS_INTERNAL_ERROR;
1105     }
1106 
1107     if (flags & EXTENT_ITEM_SHARED_BACKREFS || t->header.flags & HEADER_FLAG_SHARED_BACKREF || !(t->header.flags & HEADER_FLAG_MIXED_BACKREF)) {
1108         TREE_BLOCK_REF tbr;
1109         bool unique = rc > 1 ? false : (t->parent ? shared_tree_is_unique(Vcb, t->parent, Irp, rollback) : false);
1110 
1111         if (t->header.level == 0) {
1112             LIST_ENTRY* le;
1113 
1114             le = t->itemlist.Flink;
1115             while (le != &t->itemlist) {
1116                 tree_data* td = CONTAINING_RECORD(le, tree_data, list_entry);
1117 
1118                 if (!td->inserted && td->key.obj_type == TYPE_EXTENT_DATA && td->size >= sizeof(EXTENT_DATA) - 1 + sizeof(EXTENT_DATA2)) {
1119                     EXTENT_DATA* ed = (EXTENT_DATA*)td->data;
1120 
1121                     if (ed->type == EXTENT_TYPE_REGULAR || ed->type == EXTENT_TYPE_PREALLOC) {
1122                         EXTENT_DATA2* ed2 = (EXTENT_DATA2*)ed->data;
1123 
1124                         if (ed2->size > 0) {
1125                             EXTENT_DATA_REF edr;
1126                             changed_extent* ce = NULL;
1127                             chunk* c = get_chunk_from_address(Vcb, ed2->address);
1128 
1129                             if (c) {
1130                                 LIST_ENTRY* le2;
1131 
1132                                 le2 = c->changed_extents.Flink;
1133                                 while (le2 != &c->changed_extents) {
1134                                     changed_extent* ce2 = CONTAINING_RECORD(le2, changed_extent, list_entry);
1135 
1136                                     if (ce2->address == ed2->address) {
1137                                         ce = ce2;
1138                                         break;
1139                                     }
1140 
1141                                     le2 = le2->Flink;
1142                                 }
1143                             }
1144 
1145                             edr.root = t->root->id;
1146                             edr.objid = td->key.obj_id;
1147                             edr.offset = td->key.offset - ed2->offset;
1148                             edr.count = 1;
1149 
1150                             if (ce) {
1151                                 Status = add_changed_extent_ref_edr(ce, &edr, true);
1152                                 if (!NT_SUCCESS(Status)) {
1153                                     ERR("add_changed_extent_ref_edr returned %08x\n", Status);
1154                                     return Status;
1155                                 }
1156 
1157                                 Status = add_changed_extent_ref_edr(ce, &edr, false);
1158                                 if (!NT_SUCCESS(Status)) {
1159                                     ERR("add_changed_extent_ref_edr returned %08x\n", Status);
1160                                     return Status;
1161                                 }
1162                             }
1163 
1164                             Status = increase_extent_refcount(Vcb, ed2->address, ed2->size, TYPE_EXTENT_DATA_REF, &edr, NULL, 0, Irp);
1165                             if (!NT_SUCCESS(Status)) {
1166                                 ERR("increase_extent_refcount returned %08x\n", Status);
1167                                 return Status;
1168                             }
1169 
1170                             if ((flags & EXTENT_ITEM_SHARED_BACKREFS && unique) || !(t->header.flags & HEADER_FLAG_MIXED_BACKREF)) {
1171                                 uint64_t sdrrc = find_extent_shared_data_refcount(Vcb, ed2->address, t->header.address, Irp);
1172 
1173                                 if (sdrrc > 0) {
1174                                     SHARED_DATA_REF sdr;
1175 
1176                                     sdr.offset = t->header.address;
1177                                     sdr.count = 1;
1178 
1179                                     Status = decrease_extent_refcount(Vcb, ed2->address, ed2->size, TYPE_SHARED_DATA_REF, &sdr, NULL, 0,
1180                                                                       t->header.address, ce ? ce->superseded : false, Irp);
1181                                     if (!NT_SUCCESS(Status)) {
1182                                         ERR("decrease_extent_refcount returned %08x\n", Status);
1183                                         return Status;
1184                                     }
1185 
1186                                     if (ce) {
1187                                         LIST_ENTRY* le2;
1188 
1189                                         le2 = ce->refs.Flink;
1190                                         while (le2 != &ce->refs) {
1191                                             changed_extent_ref* cer = CONTAINING_RECORD(le2, changed_extent_ref, list_entry);
1192 
1193                                             if (cer->type == TYPE_SHARED_DATA_REF && cer->sdr.offset == sdr.offset) {
1194                                                 ce->count--;
1195                                                 cer->sdr.count--;
1196                                                 break;
1197                                             }
1198 
1199                                             le2 = le2->Flink;
1200                                         }
1201 
1202                                         le2 = ce->old_refs.Flink;
1203                                         while (le2 != &ce->old_refs) {
1204                                             changed_extent_ref* cer = CONTAINING_RECORD(le2, changed_extent_ref, list_entry);
1205 
1206                                             if (cer->type == TYPE_SHARED_DATA_REF && cer->sdr.offset == sdr.offset) {
1207                                                 ce->old_count--;
1208 
1209                                                 if (cer->sdr.count > 1)
1210                                                     cer->sdr.count--;
1211                                                 else {
1212                                                     RemoveEntryList(&cer->list_entry);
1213                                                     ExFreePool(cer);
1214                                                 }
1215 
1216                                                 break;
1217                                             }
1218 
1219                                             le2 = le2->Flink;
1220                                         }
1221                                     }
1222                                 }
1223                             }
1224 
1225                             // FIXME - clear shared flag if unique?
1226                         }
1227                     }
1228                 }
1229 
1230                 le = le->Flink;
1231             }
1232         } else {
1233             LIST_ENTRY* le;
1234 
1235             le = t->itemlist.Flink;
1236             while (le != &t->itemlist) {
1237                 tree_data* td = CONTAINING_RECORD(le, tree_data, list_entry);
1238 
1239                 if (!td->inserted) {
1240                     tbr.offset = t->root->id;
1241 
1242                     Status = increase_extent_refcount(Vcb, td->treeholder.address, Vcb->superblock.node_size, TYPE_TREE_BLOCK_REF,
1243                                                       &tbr, &td->key, t->header.level - 1, Irp);
1244                     if (!NT_SUCCESS(Status)) {
1245                         ERR("increase_extent_refcount returned %08x\n", Status);
1246                         return Status;
1247                     }
1248 
1249                     if (unique || !(t->header.flags & HEADER_FLAG_MIXED_BACKREF)) {
1250                         uint64_t sbrrc = find_extent_shared_tree_refcount(Vcb, td->treeholder.address, t->header.address, Irp);
1251 
1252                         if (sbrrc > 0) {
1253                             SHARED_BLOCK_REF sbr;
1254 
1255                             sbr.offset = t->header.address;
1256 
1257                             Status = decrease_extent_refcount(Vcb, td->treeholder.address, Vcb->superblock.node_size, TYPE_SHARED_BLOCK_REF, &sbr, NULL, 0,
1258                                                               t->header.address, false, Irp);
1259                             if (!NT_SUCCESS(Status)) {
1260                                 ERR("decrease_extent_refcount returned %08x\n", Status);
1261                                 return Status;
1262                             }
1263                         }
1264                     }
1265 
1266                     // FIXME - clear shared flag if unique?
1267                 }
1268 
1269                 le = le->Flink;
1270             }
1271         }
1272 
1273         if (unique) {
1274             uint64_t sbrrc = find_extent_shared_tree_refcount(Vcb, t->header.address, t->parent->header.address, Irp);
1275 
1276             if (sbrrc == 1) {
1277                 SHARED_BLOCK_REF sbr;
1278 
1279                 sbr.offset = t->parent->header.address;
1280 
1281                 Status = decrease_extent_refcount(Vcb, t->header.address, Vcb->superblock.node_size, TYPE_SHARED_BLOCK_REF, &sbr, NULL, 0,
1282                                                   t->parent->header.address, false, Irp);
1283                 if (!NT_SUCCESS(Status)) {
1284                     ERR("decrease_extent_refcount returned %08x\n", Status);
1285                     return Status;
1286                 }
1287             }
1288         }
1289 
1290         if (t->parent)
1291             tbr.offset = t->parent->header.tree_id;
1292         else
1293             tbr.offset = t->header.tree_id;
1294 
1295         Status = increase_extent_refcount(Vcb, t->header.address, Vcb->superblock.node_size, TYPE_TREE_BLOCK_REF, &tbr,
1296                                           t->parent ? &t->paritem->key : NULL, t->header.level, Irp);
1297         if (!NT_SUCCESS(Status)) {
1298             ERR("increase_extent_refcount returned %08x\n", Status);
1299             return Status;
1300         }
1301 
1302         // FIXME - clear shared flag if unique?
1303 
1304         t->header.flags &= ~HEADER_FLAG_SHARED_BACKREF;
1305     }
1306 
1307     if (rc > 1 || t->header.tree_id == t->root->id) {
1308         Status = reduce_tree_extent(Vcb, t->header.address, t, t->parent ? t->parent->header.tree_id : t->header.tree_id, t->header.level, Irp, rollback);
1309 
1310         if (!NT_SUCCESS(Status)) {
1311             ERR("reduce_tree_extent returned %08x\n", Status);
1312             return Status;
1313         }
1314     }
1315 
1316     t->has_address = false;
1317 
1318     if ((rc > 1 || t->header.tree_id != t->root->id) && !(flags & EXTENT_ITEM_SHARED_BACKREFS)) {
1319         if (t->header.tree_id == t->root->id) {
1320             flags |= EXTENT_ITEM_SHARED_BACKREFS;
1321             update_extent_flags(Vcb, t->header.address, flags, Irp);
1322         }
1323 
1324         if (t->header.level > 0) {
1325             LIST_ENTRY* le;
1326 
1327             le = t->itemlist.Flink;
1328             while (le != &t->itemlist) {
1329                 tree_data* td = CONTAINING_RECORD(le, tree_data, list_entry);
1330 
1331                 if (!td->inserted) {
1332                     if (t->header.tree_id == t->root->id) {
1333                         SHARED_BLOCK_REF sbr;
1334 
1335                         sbr.offset = t->header.address;
1336 
1337                         Status = increase_extent_refcount(Vcb, td->treeholder.address, Vcb->superblock.node_size, TYPE_SHARED_BLOCK_REF, &sbr, &td->key, t->header.level - 1, Irp);
1338                     } else {
1339                         TREE_BLOCK_REF tbr;
1340 
1341                         tbr.offset = t->root->id;
1342 
1343                         Status = increase_extent_refcount(Vcb, td->treeholder.address, Vcb->superblock.node_size, TYPE_TREE_BLOCK_REF, &tbr, &td->key, t->header.level - 1, Irp);
1344                     }
1345 
1346                     if (!NT_SUCCESS(Status)) {
1347                         ERR("increase_extent_refcount returned %08x\n", Status);
1348                         return Status;
1349                     }
1350                 }
1351 
1352                 le = le->Flink;
1353             }
1354         } else {
1355             LIST_ENTRY* le;
1356 
1357             le = t->itemlist.Flink;
1358             while (le != &t->itemlist) {
1359                 tree_data* td = CONTAINING_RECORD(le, tree_data, list_entry);
1360 
1361                 if (!td->inserted && td->key.obj_type == TYPE_EXTENT_DATA && td->size >= sizeof(EXTENT_DATA) - 1 + sizeof(EXTENT_DATA2)) {
1362                     EXTENT_DATA* ed = (EXTENT_DATA*)td->data;
1363 
1364                     if (ed->type == EXTENT_TYPE_REGULAR || ed->type == EXTENT_TYPE_PREALLOC) {
1365                         EXTENT_DATA2* ed2 = (EXTENT_DATA2*)ed->data;
1366 
1367                         if (ed2->size > 0) {
1368                             changed_extent* ce = NULL;
1369                             chunk* c = get_chunk_from_address(Vcb, ed2->address);
1370 
1371                             if (c) {
1372                                 LIST_ENTRY* le2;
1373 
1374                                 le2 = c->changed_extents.Flink;
1375                                 while (le2 != &c->changed_extents) {
1376                                     changed_extent* ce2 = CONTAINING_RECORD(le2, changed_extent, list_entry);
1377 
1378                                     if (ce2->address == ed2->address) {
1379                                         ce = ce2;
1380                                         break;
1381                                     }
1382 
1383                                     le2 = le2->Flink;
1384                                 }
1385                             }
1386 
1387                             if (t->header.tree_id == t->root->id) {
1388                                 SHARED_DATA_REF sdr;
1389 
1390                                 sdr.offset = t->header.address;
1391                                 sdr.count = 1;
1392 
1393                                 if (ce) {
1394                                     Status = add_changed_extent_ref_sdr(ce, &sdr, true);
1395                                     if (!NT_SUCCESS(Status)) {
1396                                         ERR("add_changed_extent_ref_edr returned %08x\n", Status);
1397                                         return Status;
1398                                     }
1399 
1400                                     Status = add_changed_extent_ref_sdr(ce, &sdr, false);
1401                                     if (!NT_SUCCESS(Status)) {
1402                                         ERR("add_changed_extent_ref_edr returned %08x\n", Status);
1403                                         return Status;
1404                                     }
1405                                 }
1406 
1407                                 Status = increase_extent_refcount(Vcb, ed2->address, ed2->size, TYPE_SHARED_DATA_REF, &sdr, NULL, 0, Irp);
1408                             } else {
1409                                 EXTENT_DATA_REF edr;
1410 
1411                                 edr.root = t->root->id;
1412                                 edr.objid = td->key.obj_id;
1413                                 edr.offset = td->key.offset - ed2->offset;
1414                                 edr.count = 1;
1415 
1416                                 if (ce) {
1417                                     Status = add_changed_extent_ref_edr(ce, &edr, true);
1418                                     if (!NT_SUCCESS(Status)) {
1419                                         ERR("add_changed_extent_ref_edr returned %08x\n", Status);
1420                                         return Status;
1421                                     }
1422 
1423                                     Status = add_changed_extent_ref_edr(ce, &edr, false);
1424                                     if (!NT_SUCCESS(Status)) {
1425                                         ERR("add_changed_extent_ref_edr returned %08x\n", Status);
1426                                         return Status;
1427                                     }
1428                                 }
1429 
1430                                 Status = increase_extent_refcount(Vcb, ed2->address, ed2->size, TYPE_EXTENT_DATA_REF, &edr, NULL, 0, Irp);
1431                             }
1432 
1433                             if (!NT_SUCCESS(Status)) {
1434                                 ERR("increase_extent_refcount returned %08x\n", Status);
1435                                 return Status;
1436                             }
1437                         }
1438                     }
1439                 }
1440 
1441                 le = le->Flink;
1442             }
1443         }
1444     }
1445 
1446     t->updated_extents = true;
1447     t->header.tree_id = t->root->id;
1448 
1449     return STATUS_SUCCESS;
1450 }
1451 
1452 static NTSTATUS allocate_tree_extents(device_extension* Vcb, PIRP Irp, LIST_ENTRY* rollback) {
1453     LIST_ENTRY* le;
1454     NTSTATUS Status;
1455     bool changed = false;
1456     uint8_t max_level = 0, level;
1457 
1458     TRACE("(%p)\n", Vcb);
1459 
1460     le = Vcb->trees.Flink;
1461     while (le != &Vcb->trees) {
1462         tree* t = CONTAINING_RECORD(le, tree, list_entry);
1463 
1464         if (t->write && !t->has_new_address) {
1465             chunk* c;
1466 
1467             if (t->has_address) {
1468                 c = get_chunk_from_address(Vcb, t->header.address);
1469 
1470                 if (c) {
1471                     if (!c->cache_loaded) {
1472                         acquire_chunk_lock(c, Vcb);
1473 
1474                         if (!c->cache_loaded) {
1475                             Status = load_cache_chunk(Vcb, c, NULL);
1476 
1477                             if (!NT_SUCCESS(Status)) {
1478                                 ERR("load_cache_chunk returned %08x\n", Status);
1479                                 release_chunk_lock(c, Vcb);
1480                                 return Status;
1481                             }
1482                         }
1483 
1484                         release_chunk_lock(c, Vcb);
1485                     }
1486                 }
1487             }
1488 
1489             Status = get_tree_new_address(Vcb, t, Irp, rollback);
1490             if (!NT_SUCCESS(Status)) {
1491                 ERR("get_tree_new_address returned %08x\n", Status);
1492                 return Status;
1493             }
1494 
1495             TRACE("allocated extent %I64x\n", t->new_address);
1496 
1497             c = get_chunk_from_address(Vcb, t->new_address);
1498 
1499             if (c)
1500                 c->used += Vcb->superblock.node_size;
1501             else {
1502                 ERR("could not find chunk for address %I64x\n", t->new_address);
1503                 return STATUS_INTERNAL_ERROR;
1504             }
1505 
1506             changed = true;
1507 
1508             if (t->header.level > max_level)
1509                 max_level = t->header.level;
1510         }
1511 
1512         le = le->Flink;
1513     }
1514 
1515     if (!changed)
1516         return STATUS_SUCCESS;
1517 
1518     level = max_level;
1519     do {
1520         le = Vcb->trees.Flink;
1521         while (le != &Vcb->trees) {
1522             tree* t = CONTAINING_RECORD(le, tree, list_entry);
1523 
1524             if (t->write && !t->updated_extents && t->has_address && t->header.level == level) {
1525                 Status = update_tree_extents(Vcb, t, Irp, rollback);
1526                 if (!NT_SUCCESS(Status)) {
1527                     ERR("update_tree_extents returned %08x\n", Status);
1528                     return Status;
1529                 }
1530             }
1531 
1532             le = le->Flink;
1533         }
1534 
1535         if (level == 0)
1536             break;
1537 
1538         level--;
1539     } while (true);
1540 
1541     return STATUS_SUCCESS;
1542 }
1543 
1544 static NTSTATUS update_root_root(device_extension* Vcb, bool no_cache, PIRP Irp, LIST_ENTRY* rollback) {
1545     LIST_ENTRY* le;
1546     NTSTATUS Status;
1547 
1548     TRACE("(%p)\n", Vcb);
1549 
1550     le = Vcb->trees.Flink;
1551     while (le != &Vcb->trees) {
1552         tree* t = CONTAINING_RECORD(le, tree, list_entry);
1553 
1554         if (t->write && !t->parent) {
1555             if (t->root != Vcb->root_root && t->root != Vcb->chunk_root) {
1556                 KEY searchkey;
1557                 traverse_ptr tp;
1558 
1559                 searchkey.obj_id = t->root->id;
1560                 searchkey.obj_type = TYPE_ROOT_ITEM;
1561                 searchkey.offset = 0xffffffffffffffff;
1562 
1563                 Status = find_item(Vcb, Vcb->root_root, &tp, &searchkey, false, Irp);
1564                 if (!NT_SUCCESS(Status)) {
1565                     ERR("error - find_item returned %08x\n", Status);
1566                     return Status;
1567                 }
1568 
1569                 if (tp.item->key.obj_id != searchkey.obj_id || tp.item->key.obj_type != searchkey.obj_type) {
1570                     ERR("could not find ROOT_ITEM for tree %I64x\n", searchkey.obj_id);
1571                     return STATUS_INTERNAL_ERROR;
1572                 }
1573 
1574                 TRACE("updating the address for root %I64x to %I64x\n", searchkey.obj_id, t->new_address);
1575 
1576                 t->root->root_item.block_number = t->new_address;
1577                 t->root->root_item.root_level = t->header.level;
1578                 t->root->root_item.generation = Vcb->superblock.generation;
1579                 t->root->root_item.generation2 = Vcb->superblock.generation;
1580 
1581                 // item is guaranteed to be at least sizeof(ROOT_ITEM), due to add_parents
1582 
1583                 RtlCopyMemory(tp.item->data, &t->root->root_item, sizeof(ROOT_ITEM));
1584             }
1585 
1586             t->root->treeholder.address = t->new_address;
1587             t->root->treeholder.generation = Vcb->superblock.generation;
1588         }
1589 
1590         le = le->Flink;
1591     }
1592 
1593     if (!no_cache && !(Vcb->superblock.compat_ro_flags & BTRFS_COMPAT_RO_FLAGS_FREE_SPACE_CACHE)) {
1594         ExAcquireResourceSharedLite(&Vcb->chunk_lock, true);
1595         Status = update_chunk_caches(Vcb, Irp, rollback);
1596         ExReleaseResourceLite(&Vcb->chunk_lock);
1597 
1598         if (!NT_SUCCESS(Status)) {
1599             ERR("update_chunk_caches returned %08x\n", Status);
1600             return Status;
1601         }
1602     }
1603 
1604     return STATUS_SUCCESS;
1605 }
1606 
1607 NTSTATUS do_tree_writes(device_extension* Vcb, LIST_ENTRY* tree_writes, bool no_free) {
1608     chunk* c;
1609     LIST_ENTRY* le;
1610     tree_write* tw;
1611     NTSTATUS Status;
1612     ULONG i, num_bits;
1613     write_data_context* wtc;
1614     ULONG bit_num = 0;
1615     bool raid56 = false;
1616 
1617     // merge together runs
1618     c = NULL;
1619     le = tree_writes->Flink;
1620     while (le != tree_writes) {
1621         tw = CONTAINING_RECORD(le, tree_write, list_entry);
1622 
1623         if (!c || tw->address < c->offset || tw->address >= c->offset + c->chunk_item->size)
1624             c = get_chunk_from_address(Vcb, tw->address);
1625         else {
1626             tree_write* tw2 = CONTAINING_RECORD(le->Blink, tree_write, list_entry);
1627 
1628             if (tw->address == tw2->address + tw2->length) {
1629                 uint8_t* data = ExAllocatePoolWithTag(NonPagedPool, tw2->length + tw->length, ALLOC_TAG);
1630 
1631                 if (!data) {
1632                     ERR("out of memory\n");
1633                     return STATUS_INSUFFICIENT_RESOURCES;
1634                 }
1635 
1636                 RtlCopyMemory(data, tw2->data, tw2->length);
1637                 RtlCopyMemory(&data[tw2->length], tw->data, tw->length);
1638 
1639                 if (!no_free || tw2->allocated)
1640                     ExFreePool(tw2->data);
1641 
1642                 tw2->data = data;
1643                 tw2->length += tw->length;
1644                 tw2->allocated = true;
1645 
1646                 if (!no_free || tw->allocated)
1647                     ExFreePool(tw->data);
1648 
1649                 RemoveEntryList(&tw->list_entry);
1650                 ExFreePool(tw);
1651 
1652                 le = tw2->list_entry.Flink;
1653                 continue;
1654             }
1655         }
1656 
1657         tw->c = c;
1658 
1659         if (c->chunk_item->type & (BLOCK_FLAG_RAID5 | BLOCK_FLAG_RAID6))
1660             raid56 = true;
1661 
1662         le = le->Flink;
1663     }
1664 
1665     num_bits = 0;
1666 
1667     le = tree_writes->Flink;
1668     while (le != tree_writes) {
1669         tw = CONTAINING_RECORD(le, tree_write, list_entry);
1670 
1671         num_bits++;
1672 
1673         le = le->Flink;
1674     }
1675 
1676     wtc = ExAllocatePoolWithTag(NonPagedPool, sizeof(write_data_context) * num_bits, ALLOC_TAG);
1677     if (!wtc) {
1678         ERR("out of memory\n");
1679         return STATUS_INSUFFICIENT_RESOURCES;
1680     }
1681 
1682     le = tree_writes->Flink;
1683 
1684     while (le != tree_writes) {
1685         tw = CONTAINING_RECORD(le, tree_write, list_entry);
1686 
1687         TRACE("address: %I64x, size: %x\n", tw->address, tw->length);
1688 
1689         KeInitializeEvent(&wtc[bit_num].Event, NotificationEvent, false);
1690         InitializeListHead(&wtc[bit_num].stripes);
1691         wtc[bit_num].need_wait = false;
1692         wtc[bit_num].stripes_left = 0;
1693         wtc[bit_num].parity1 = wtc[bit_num].parity2 = wtc[bit_num].scratch = NULL;
1694         wtc[bit_num].mdl = wtc[bit_num].parity1_mdl = wtc[bit_num].parity2_mdl = NULL;
1695 
1696         Status = write_data(Vcb, tw->address, tw->data, tw->length, &wtc[bit_num], NULL, NULL, false, 0, HighPagePriority);
1697         if (!NT_SUCCESS(Status)) {
1698             ERR("write_data returned %08x\n", Status);
1699 
1700             for (i = 0; i < num_bits; i++) {
1701                 free_write_data_stripes(&wtc[i]);
1702             }
1703             ExFreePool(wtc);
1704 
1705             return Status;
1706         }
1707 
1708         bit_num++;
1709 
1710         le = le->Flink;
1711     }
1712 
1713     for (i = 0; i < num_bits; i++) {
1714         if (wtc[i].stripes.Flink != &wtc[i].stripes) {
1715             // launch writes and wait
1716             le = wtc[i].stripes.Flink;
1717             while (le != &wtc[i].stripes) {
1718                 write_data_stripe* stripe = CONTAINING_RECORD(le, write_data_stripe, list_entry);
1719 
1720                 if (stripe->status != WriteDataStatus_Ignore) {
1721                     wtc[i].need_wait = true;
1722                     IoCallDriver(stripe->device->devobj, stripe->Irp);
1723                 }
1724 
1725                 le = le->Flink;
1726             }
1727         }
1728     }
1729 
1730     for (i = 0; i < num_bits; i++) {
1731         if (wtc[i].need_wait)
1732             KeWaitForSingleObject(&wtc[i].Event, Executive, KernelMode, false, NULL);
1733     }
1734 
1735     for (i = 0; i < num_bits; i++) {
1736         le = wtc[i].stripes.Flink;
1737         while (le != &wtc[i].stripes) {
1738             write_data_stripe* stripe = CONTAINING_RECORD(le, write_data_stripe, list_entry);
1739 
1740             if (stripe->status != WriteDataStatus_Ignore && !NT_SUCCESS(stripe->iosb.Status)) {
1741                 Status = stripe->iosb.Status;
1742                 log_device_error(Vcb, stripe->device, BTRFS_DEV_STAT_WRITE_ERRORS);
1743                 break;
1744             }
1745 
1746             le = le->Flink;
1747         }
1748 
1749         free_write_data_stripes(&wtc[i]);
1750     }
1751 
1752     ExFreePool(wtc);
1753 
1754     if (raid56) {
1755         c = NULL;
1756 
1757         le = tree_writes->Flink;
1758         while (le != tree_writes) {
1759             tw = CONTAINING_RECORD(le, tree_write, list_entry);
1760 
1761             if (tw->c != c) {
1762                 c = tw->c;
1763 
1764                 ExAcquireResourceExclusiveLite(&c->partial_stripes_lock, true);
1765 
1766                 while (!IsListEmpty(&c->partial_stripes)) {
1767                     partial_stripe* ps = CONTAINING_RECORD(RemoveHeadList(&c->partial_stripes), partial_stripe, list_entry);
1768 
1769                     Status = flush_partial_stripe(Vcb, c, ps);
1770 
1771                     if (ps->bmparr)
1772                         ExFreePool(ps->bmparr);
1773 
1774                     ExFreePool(ps);
1775 
1776                     if (!NT_SUCCESS(Status)) {
1777                         ERR("flush_partial_stripe returned %08x\n", Status);
1778                         ExReleaseResourceLite(&c->partial_stripes_lock);
1779                         return Status;
1780                     }
1781                 }
1782 
1783                 ExReleaseResourceLite(&c->partial_stripes_lock);
1784             }
1785 
1786             le = le->Flink;
1787         }
1788     }
1789 
1790     return STATUS_SUCCESS;
1791 }
1792 
1793 static NTSTATUS write_trees(device_extension* Vcb, PIRP Irp) {
1794     ULONG level;
1795     uint8_t *data, *body;
1796     uint32_t crc32;
1797     NTSTATUS Status;
1798     LIST_ENTRY* le;
1799     LIST_ENTRY tree_writes;
1800     tree_write* tw;
1801 
1802     TRACE("(%p)\n", Vcb);
1803 
1804     InitializeListHead(&tree_writes);
1805 
1806     for (level = 0; level <= 255; level++) {
1807         bool nothing_found = true;
1808 
1809         TRACE("level = %u\n", level);
1810 
1811         le = Vcb->trees.Flink;
1812         while (le != &Vcb->trees) {
1813             tree* t = CONTAINING_RECORD(le, tree, list_entry);
1814 
1815             if (t->write && t->header.level == level) {
1816                 KEY firstitem, searchkey;
1817                 LIST_ENTRY* le2;
1818                 traverse_ptr tp;
1819 
1820                 if (!t->has_new_address) {
1821                     ERR("error - tried to write tree with no new address\n");
1822                     return STATUS_INTERNAL_ERROR;
1823                 }
1824 
1825                 le2 = t->itemlist.Flink;
1826                 while (le2 != &t->itemlist) {
1827                     tree_data* td = CONTAINING_RECORD(le2, tree_data, list_entry);
1828                     if (!td->ignore) {
1829                         firstitem = td->key;
1830                         break;
1831                     }
1832                     le2 = le2->Flink;
1833                 }
1834 
1835                 if (t->parent) {
1836                     t->paritem->key = firstitem;
1837                     t->paritem->treeholder.address = t->new_address;
1838                     t->paritem->treeholder.generation = Vcb->superblock.generation;
1839                 }
1840 
1841                 if (!(Vcb->superblock.incompat_flags & BTRFS_INCOMPAT_FLAGS_SKINNY_METADATA)) {
1842                     EXTENT_ITEM_TREE* eit;
1843 
1844                     searchkey.obj_id = t->new_address;
1845                     searchkey.obj_type = TYPE_EXTENT_ITEM;
1846                     searchkey.offset = Vcb->superblock.node_size;
1847 
1848                     Status = find_item(Vcb, Vcb->extent_root, &tp, &searchkey, false, Irp);
1849                     if (!NT_SUCCESS(Status)) {
1850                         ERR("error - find_item returned %08x\n", Status);
1851                         return Status;
1852                     }
1853 
1854                     if (keycmp(searchkey, tp.item->key)) {
1855                         ERR("could not find %I64x,%x,%I64x in extent_root (found %I64x,%x,%I64x instead)\n", searchkey.obj_id, searchkey.obj_type, searchkey.offset, tp.item->key.obj_id, tp.item->key.obj_type, tp.item->key.offset);
1856                         return STATUS_INTERNAL_ERROR;
1857                     }
1858 
1859                     if (tp.item->size < sizeof(EXTENT_ITEM_TREE)) {
1860                         ERR("(%I64x,%x,%I64x) was %u bytes, expected at least %u\n", tp.item->key.obj_id, tp.item->key.obj_type, tp.item->key.offset, tp.item->size, sizeof(EXTENT_ITEM_TREE));
1861                         return STATUS_INTERNAL_ERROR;
1862                     }
1863 
1864                     eit = (EXTENT_ITEM_TREE*)tp.item->data;
1865                     eit->firstitem = firstitem;
1866                 }
1867 
1868                 nothing_found = false;
1869             }
1870 
1871             le = le->Flink;
1872         }
1873 
1874         if (nothing_found)
1875             break;
1876     }
1877 
1878     TRACE("allocated tree extents\n");
1879 
1880     le = Vcb->trees.Flink;
1881     while (le != &Vcb->trees) {
1882         tree* t = CONTAINING_RECORD(le, tree, list_entry);
1883         LIST_ENTRY* le2;
1884 #ifdef DEBUG_PARANOID
1885         uint32_t num_items = 0, size = 0;
1886         bool crash = false;
1887 #endif
1888 
1889         if (t->write) {
1890 #ifdef DEBUG_PARANOID
1891             bool first = true;
1892             KEY lastkey;
1893 
1894             le2 = t->itemlist.Flink;
1895             while (le2 != &t->itemlist) {
1896                 tree_data* td = CONTAINING_RECORD(le2, tree_data, list_entry);
1897                 if (!td->ignore) {
1898                     num_items++;
1899 
1900                     if (!first) {
1901                         if (keycmp(td->key, lastkey) == 0) {
1902                             ERR("(%I64x,%x,%I64x): duplicate key\n", td->key.obj_id, td->key.obj_type, td->key.offset);
1903                             crash = true;
1904                         } else if (keycmp(td->key, lastkey) == -1) {
1905                             ERR("(%I64x,%x,%I64x): key out of order\n", td->key.obj_id, td->key.obj_type, td->key.offset);
1906                             crash = true;
1907                         }
1908                     } else
1909                         first = false;
1910 
1911                     lastkey = td->key;
1912 
1913                     if (t->header.level == 0)
1914                         size += td->size;
1915                 }
1916                 le2 = le2->Flink;
1917             }
1918 
1919             if (t->header.level == 0)
1920                 size += num_items * sizeof(leaf_node);
1921             else
1922                 size += num_items * sizeof(internal_node);
1923 
1924             if (num_items != t->header.num_items) {
1925                 ERR("tree %I64x, level %x: num_items was %x, expected %x\n", t->root->id, t->header.level, num_items, t->header.num_items);
1926                 crash = true;
1927             }
1928 
1929             if (size != t->size) {
1930                 ERR("tree %I64x, level %x: size was %x, expected %x\n", t->root->id, t->header.level, size, t->size);
1931                 crash = true;
1932             }
1933 
1934             if (t->header.num_items == 0 && t->parent) {
1935                 ERR("tree %I64x, level %x: tried to write empty tree with parent\n", t->root->id, t->header.level);
1936                 crash = true;
1937             }
1938 
1939             if (t->size > Vcb->superblock.node_size - sizeof(tree_header)) {
1940                 ERR("tree %I64x, level %x: tried to write overlarge tree (%x > %x)\n", t->root->id, t->header.level, t->size, Vcb->superblock.node_size - sizeof(tree_header));
1941                 crash = true;
1942             }
1943 
1944             if (crash) {
1945                 ERR("tree %p\n", t);
1946                 le2 = t->itemlist.Flink;
1947                 while (le2 != &t->itemlist) {
1948                     tree_data* td = CONTAINING_RECORD(le2, tree_data, list_entry);
1949                     if (!td->ignore) {
1950                         ERR("%I64x,%x,%I64x inserted=%u\n", td->key.obj_id, td->key.obj_type, td->key.offset, td->inserted);
1951                     }
1952                     le2 = le2->Flink;
1953                 }
1954                 int3;
1955             }
1956 #endif
1957             t->header.address = t->new_address;
1958             t->header.generation = Vcb->superblock.generation;
1959             t->header.tree_id = t->root->id;
1960             t->header.flags |= HEADER_FLAG_MIXED_BACKREF;
1961             t->header.fs_uuid = Vcb->superblock.uuid;
1962             t->has_address = true;
1963 
1964             data = ExAllocatePoolWithTag(NonPagedPool, Vcb->superblock.node_size, ALLOC_TAG);
1965             if (!data) {
1966                 ERR("out of memory\n");
1967                 Status = STATUS_INSUFFICIENT_RESOURCES;
1968                 goto end;
1969             }
1970 
1971             body = data + sizeof(tree_header);
1972 
1973             RtlCopyMemory(data, &t->header, sizeof(tree_header));
1974             RtlZeroMemory(body, Vcb->superblock.node_size - sizeof(tree_header));
1975 
1976             if (t->header.level == 0) {
1977                 leaf_node* itemptr = (leaf_node*)body;
1978                 int i = 0;
1979                 uint8_t* dataptr = data + Vcb->superblock.node_size;
1980 
1981                 le2 = t->itemlist.Flink;
1982                 while (le2 != &t->itemlist) {
1983                     tree_data* td = CONTAINING_RECORD(le2, tree_data, list_entry);
1984                     if (!td->ignore) {
1985                         dataptr = dataptr - td->size;
1986 
1987                         itemptr[i].key = td->key;
1988                         itemptr[i].offset = (uint32_t)((uint8_t*)dataptr - (uint8_t*)body);
1989                         itemptr[i].size = td->size;
1990                         i++;
1991 
1992                         if (td->size > 0)
1993                             RtlCopyMemory(dataptr, td->data, td->size);
1994                     }
1995 
1996                     le2 = le2->Flink;
1997                 }
1998             } else {
1999                 internal_node* itemptr = (internal_node*)body;
2000                 int i = 0;
2001 
2002                 le2 = t->itemlist.Flink;
2003                 while (le2 != &t->itemlist) {
2004                     tree_data* td = CONTAINING_RECORD(le2, tree_data, list_entry);
2005                     if (!td->ignore) {
2006                         itemptr[i].key = td->key;
2007                         itemptr[i].address = td->treeholder.address;
2008                         itemptr[i].generation = td->treeholder.generation;
2009                         i++;
2010                     }
2011 
2012                     le2 = le2->Flink;
2013                 }
2014             }
2015 
2016             crc32 = calc_crc32c(0xffffffff, (uint8_t*)&((tree_header*)data)->fs_uuid, Vcb->superblock.node_size - sizeof(((tree_header*)data)->csum));
2017             crc32 = ~crc32;
2018             *((uint32_t*)data) = crc32;
2019             TRACE("setting crc32 to %08x\n", crc32);
2020 
2021             tw = ExAllocatePoolWithTag(PagedPool, sizeof(tree_write), ALLOC_TAG);
2022             if (!tw) {
2023                 ERR("out of memory\n");
2024                 ExFreePool(data);
2025                 Status = STATUS_INSUFFICIENT_RESOURCES;
2026                 goto end;
2027             }
2028 
2029             tw->address = t->new_address;
2030             tw->length = Vcb->superblock.node_size;
2031             tw->data = data;
2032             tw->allocated = false;
2033 
2034             if (IsListEmpty(&tree_writes))
2035                 InsertTailList(&tree_writes, &tw->list_entry);
2036             else {
2037                 bool inserted = false;
2038 
2039                 le2 = tree_writes.Flink;
2040                 while (le2 != &tree_writes) {
2041                     tree_write* tw2 = CONTAINING_RECORD(le2, tree_write, list_entry);
2042 
2043                     if (tw2->address > tw->address) {
2044                         InsertHeadList(le2->Blink, &tw->list_entry);
2045                         inserted = true;
2046                         break;
2047                     }
2048 
2049                     le2 = le2->Flink;
2050                 }
2051 
2052                 if (!inserted)
2053                     InsertTailList(&tree_writes, &tw->list_entry);
2054             }
2055         }
2056 
2057         le = le->Flink;
2058     }
2059 
2060     Status = do_tree_writes(Vcb, &tree_writes, false);
2061     if (!NT_SUCCESS(Status)) {
2062         ERR("do_tree_writes returned %08x\n", Status);
2063         goto end;
2064     }
2065 
2066     Status = STATUS_SUCCESS;
2067 
2068 end:
2069     while (!IsListEmpty(&tree_writes)) {
2070         le = RemoveHeadList(&tree_writes);
2071         tw = CONTAINING_RECORD(le, tree_write, list_entry);
2072 
2073         if (tw->data)
2074             ExFreePool(tw->data);
2075 
2076         ExFreePool(tw);
2077     }
2078 
2079     return Status;
2080 }
2081 
2082 static void update_backup_superblock(device_extension* Vcb, superblock_backup* sb, PIRP Irp) {
2083     KEY searchkey;
2084     traverse_ptr tp;
2085 
2086     RtlZeroMemory(sb, sizeof(superblock_backup));
2087 
2088     sb->root_tree_addr = Vcb->superblock.root_tree_addr;
2089     sb->root_tree_generation = Vcb->superblock.generation;
2090     sb->root_level = Vcb->superblock.root_level;
2091 
2092     sb->chunk_tree_addr = Vcb->superblock.chunk_tree_addr;
2093     sb->chunk_tree_generation = Vcb->superblock.chunk_root_generation;
2094     sb->chunk_root_level = Vcb->superblock.chunk_root_level;
2095 
2096     searchkey.obj_id = BTRFS_ROOT_EXTENT;
2097     searchkey.obj_type = TYPE_ROOT_ITEM;
2098     searchkey.offset = 0xffffffffffffffff;
2099 
2100     if (NT_SUCCESS(find_item(Vcb, Vcb->root_root, &tp, &searchkey, false, Irp))) {
2101         if (tp.item->key.obj_id == searchkey.obj_id && tp.item->key.obj_type == searchkey.obj_type && tp.item->size >= sizeof(ROOT_ITEM)) {
2102             ROOT_ITEM* ri = (ROOT_ITEM*)tp.item->data;
2103 
2104             sb->extent_tree_addr = ri->block_number;
2105             sb->extent_tree_generation = ri->generation;
2106             sb->extent_root_level = ri->root_level;
2107         }
2108     }
2109 
2110     searchkey.obj_id = BTRFS_ROOT_FSTREE;
2111 
2112     if (NT_SUCCESS(find_item(Vcb, Vcb->root_root, &tp, &searchkey, false, Irp))) {
2113         if (tp.item->key.obj_id == searchkey.obj_id && tp.item->key.obj_type == searchkey.obj_type && tp.item->size >= sizeof(ROOT_ITEM)) {
2114             ROOT_ITEM* ri = (ROOT_ITEM*)tp.item->data;
2115 
2116             sb->fs_tree_addr = ri->block_number;
2117             sb->fs_tree_generation = ri->generation;
2118             sb->fs_root_level = ri->root_level;
2119         }
2120     }
2121 
2122     searchkey.obj_id = BTRFS_ROOT_DEVTREE;
2123 
2124     if (NT_SUCCESS(find_item(Vcb, Vcb->root_root, &tp, &searchkey, false, Irp))) {
2125         if (tp.item->key.obj_id == searchkey.obj_id && tp.item->key.obj_type == searchkey.obj_type && tp.item->size >= sizeof(ROOT_ITEM)) {
2126             ROOT_ITEM* ri = (ROOT_ITEM*)tp.item->data;
2127 
2128             sb->dev_root_addr = ri->block_number;
2129             sb->dev_root_generation = ri->generation;
2130             sb->dev_root_level = ri->root_level;
2131         }
2132     }
2133 
2134     searchkey.obj_id = BTRFS_ROOT_CHECKSUM;
2135 
2136     if (NT_SUCCESS(find_item(Vcb, Vcb->root_root, &tp, &searchkey, false, Irp))) {
2137         if (tp.item->key.obj_id == searchkey.obj_id && tp.item->key.obj_type == searchkey.obj_type && tp.item->size >= sizeof(ROOT_ITEM)) {
2138             ROOT_ITEM* ri = (ROOT_ITEM*)tp.item->data;
2139 
2140             sb->csum_root_addr = ri->block_number;
2141             sb->csum_root_generation = ri->generation;
2142             sb->csum_root_level = ri->root_level;
2143         }
2144     }
2145 
2146     sb->total_bytes = Vcb->superblock.total_bytes;
2147     sb->bytes_used = Vcb->superblock.bytes_used;
2148     sb->num_devices = Vcb->superblock.num_devices;
2149 }
2150 
2151 typedef struct {
2152     void* context;
2153     uint8_t* buf;
2154     PMDL mdl;
2155     device* device;
2156     NTSTATUS Status;
2157     PIRP Irp;
2158     LIST_ENTRY list_entry;
2159 } write_superblocks_stripe;
2160 
2161 typedef struct _write_superblocks_context {
2162     KEVENT Event;
2163     LIST_ENTRY stripes;
2164     LONG left;
2165 } write_superblocks_context;
2166 
2167 _Function_class_(IO_COMPLETION_ROUTINE)
2168 static NTSTATUS __stdcall write_superblock_completion(PDEVICE_OBJECT DeviceObject, PIRP Irp, PVOID conptr) {
2169     write_superblocks_stripe* stripe = conptr;
2170     write_superblocks_context* context = stripe->context;
2171 
2172     UNUSED(DeviceObject);
2173 
2174     stripe->Status = Irp->IoStatus.Status;
2175 
2176     if (InterlockedDecrement(&context->left) == 0)
2177         KeSetEvent(&context->Event, 0, false);
2178 
2179     return STATUS_MORE_PROCESSING_REQUIRED;
2180 }
2181 
2182 static NTSTATUS write_superblock(device_extension* Vcb, device* device, write_superblocks_context* context) {
2183     unsigned int i = 0;
2184 
2185     // All the documentation says that the Linux driver only writes one superblock
2186     // if it thinks a disk is an SSD, but this doesn't seem to be the case!
2187 
2188     while (superblock_addrs[i] > 0 && device->devitem.num_bytes >= superblock_addrs[i] + sizeof(superblock)) {
2189         ULONG sblen = (ULONG)sector_align(sizeof(superblock), Vcb->superblock.sector_size);
2190         superblock* sb;
2191         uint32_t crc32;
2192         write_superblocks_stripe* stripe;
2193         PIO_STACK_LOCATION IrpSp;
2194 
2195         sb = ExAllocatePoolWithTag(NonPagedPool, sblen, ALLOC_TAG);
2196         if (!sb) {
2197             ERR("out of memory\n");
2198             return STATUS_INSUFFICIENT_RESOURCES;
2199         }
2200 
2201         RtlCopyMemory(sb, &Vcb->superblock, sizeof(superblock));
2202 
2203         if (sblen > sizeof(superblock))
2204             RtlZeroMemory((uint8_t*)sb + sizeof(superblock), sblen - sizeof(superblock));
2205 
2206         RtlCopyMemory(&sb->dev_item, &device->devitem, sizeof(DEV_ITEM));
2207         sb->sb_phys_addr = superblock_addrs[i];
2208 
2209         crc32 = ~calc_crc32c(0xffffffff, (uint8_t*)&sb->uuid, (ULONG)sizeof(superblock) - sizeof(sb->checksum));
2210         RtlCopyMemory(&sb->checksum, &crc32, sizeof(uint32_t));
2211 
2212         stripe = ExAllocatePoolWithTag(NonPagedPool, sizeof(write_superblocks_stripe), ALLOC_TAG);
2213         if (!stripe) {
2214             ERR("out of memory\n");
2215             ExFreePool(sb);
2216             return STATUS_INSUFFICIENT_RESOURCES;
2217         }
2218 
2219         stripe->buf = (uint8_t*)sb;
2220 
2221         stripe->Irp = IoAllocateIrp(device->devobj->StackSize, false);
2222         if (!stripe->Irp) {
2223             ERR("IoAllocateIrp failed\n");
2224             ExFreePool(stripe);
2225             ExFreePool(sb);
2226             return STATUS_INSUFFICIENT_RESOURCES;
2227         }
2228 
2229         IrpSp = IoGetNextIrpStackLocation(stripe->Irp);
2230         IrpSp->MajorFunction = IRP_MJ_WRITE;
2231         IrpSp->FileObject = device->fileobj;
2232 
2233         if (i == 0)
2234             IrpSp->Flags |= SL_WRITE_THROUGH;
2235 
2236         if (device->devobj->Flags & DO_BUFFERED_IO) {
2237             stripe->Irp->AssociatedIrp.SystemBuffer = sb;
2238             stripe->mdl = NULL;
2239 
2240             stripe->Irp->Flags = IRP_BUFFERED_IO;
2241         } else if (device->devobj->Flags & DO_DIRECT_IO) {
2242             stripe->mdl = IoAllocateMdl(sb, sblen, false, false, NULL);
2243             if (!stripe->mdl) {
2244                 ERR("IoAllocateMdl failed\n");
2245                 IoFreeIrp(stripe->Irp);
2246                 ExFreePool(stripe);
2247                 ExFreePool(sb);
2248                 return STATUS_INSUFFICIENT_RESOURCES;
2249             }
2250 
2251             stripe->Irp->MdlAddress = stripe->mdl;
2252 
2253             MmBuildMdlForNonPagedPool(stripe->mdl);
2254         } else {
2255             stripe->Irp->UserBuffer = sb;
2256             stripe->mdl = NULL;
2257         }
2258 
2259         IrpSp->Parameters.Write.Length = sblen;
2260         IrpSp->Parameters.Write.ByteOffset.QuadPart = superblock_addrs[i];
2261 
2262         IoSetCompletionRoutine(stripe->Irp, write_superblock_completion, stripe, true, true, true);
2263 
2264         stripe->context = context;
2265         stripe->device = device;
2266         InsertTailList(&context->stripes, &stripe->list_entry);
2267 
2268         context->left++;
2269 
2270         i++;
2271     }
2272 
2273     if (i == 0)
2274         ERR("no superblocks written!\n");
2275 
2276     return STATUS_SUCCESS;
2277 }
2278 
2279 static NTSTATUS write_superblocks(device_extension* Vcb, PIRP Irp) {
2280     uint64_t i;
2281     NTSTATUS Status;
2282     LIST_ENTRY* le;
2283     write_superblocks_context context;
2284 
2285     TRACE("(%p)\n", Vcb);
2286 
2287     le = Vcb->trees.Flink;
2288     while (le != &Vcb->trees) {
2289         tree* t = CONTAINING_RECORD(le, tree, list_entry);
2290 
2291         if (t->write && !t->parent) {
2292             if (t->root == Vcb->root_root) {
2293                 Vcb->superblock.root_tree_addr = t->new_address;
2294                 Vcb->superblock.root_level = t->header.level;
2295             } else if (t->root == Vcb->chunk_root) {
2296                 Vcb->superblock.chunk_tree_addr = t->new_address;
2297                 Vcb->superblock.chunk_root_generation = t->header.generation;
2298                 Vcb->superblock.chunk_root_level = t->header.level;
2299             }
2300         }
2301 
2302         le = le->Flink;
2303     }
2304 
2305     for (i = 0; i < BTRFS_NUM_BACKUP_ROOTS - 1; i++) {
2306         RtlCopyMemory(&Vcb->superblock.backup[i], &Vcb->superblock.backup[i+1], sizeof(superblock_backup));
2307     }
2308 
2309     update_backup_superblock(Vcb, &Vcb->superblock.backup[BTRFS_NUM_BACKUP_ROOTS - 1], Irp);
2310 
2311     KeInitializeEvent(&context.Event, NotificationEvent, false);
2312     InitializeListHead(&context.stripes);
2313     context.left = 0;
2314 
2315     le = Vcb->devices.Flink;
2316     while (le != &Vcb->devices) {
2317         device* dev = CONTAINING_RECORD(le, device, list_entry);
2318 
2319         if (dev->devobj && !dev->readonly) {
2320             Status = write_superblock(Vcb, dev, &context);
2321             if (!NT_SUCCESS(Status)) {
2322                 ERR("write_superblock returned %08x\n", Status);
2323                 goto end;
2324             }
2325         }
2326 
2327         le = le->Flink;
2328     }
2329 
2330     if (IsListEmpty(&context.stripes)) {
2331         ERR("error - not writing any superblocks\n");
2332         Status = STATUS_INTERNAL_ERROR;
2333         goto end;
2334     }
2335 
2336     le = context.stripes.Flink;
2337     while (le != &context.stripes) {
2338         write_superblocks_stripe* stripe = CONTAINING_RECORD(le, write_superblocks_stripe, list_entry);
2339 
2340         IoCallDriver(stripe->device->devobj, stripe->Irp);
2341 
2342         le = le->Flink;
2343     }
2344 
2345     KeWaitForSingleObject(&context.Event, Executive, KernelMode, false, NULL);
2346 
2347     le = context.stripes.Flink;
2348     while (le != &context.stripes) {
2349         write_superblocks_stripe* stripe = CONTAINING_RECORD(le, write_superblocks_stripe, list_entry);
2350 
2351         if (!NT_SUCCESS(stripe->Status)) {
2352             ERR("device %I64x returned %08x\n", stripe->device->devitem.dev_id, stripe->Status);
2353             log_device_error(Vcb, stripe->device, BTRFS_DEV_STAT_WRITE_ERRORS);
2354             Status = stripe->Status;
2355             goto end;
2356         }
2357 
2358         le = le->Flink;
2359     }
2360 
2361     Status = STATUS_SUCCESS;
2362 
2363 end:
2364     while (!IsListEmpty(&context.stripes)) {
2365         write_superblocks_stripe* stripe = CONTAINING_RECORD(RemoveHeadList(&context.stripes), write_superblocks_stripe, list_entry);
2366 
2367         if (stripe->mdl) {
2368             if (stripe->mdl->MdlFlags & MDL_PAGES_LOCKED)
2369                 MmUnlockPages(stripe->mdl);
2370 
2371             IoFreeMdl(stripe->mdl);
2372         }
2373 
2374         if (stripe->Irp)
2375             IoFreeIrp(stripe->Irp);
2376 
2377         if (stripe->buf)
2378             ExFreePool(stripe->buf);
2379 
2380         ExFreePool(stripe);
2381     }
2382 
2383     return Status;
2384 }
2385 
2386 static NTSTATUS flush_changed_extent(device_extension* Vcb, chunk* c, changed_extent* ce, PIRP Irp, LIST_ENTRY* rollback) {
2387     LIST_ENTRY *le, *le2;
2388     NTSTATUS Status;
2389     uint64_t old_size;
2390 
2391     if (ce->count == 0 && ce->old_count == 0) {
2392         while (!IsListEmpty(&ce->refs)) {
2393             changed_extent_ref* cer = CONTAINING_RECORD(RemoveHeadList(&ce->refs), changed_extent_ref, list_entry);
2394             ExFreePool(cer);
2395         }
2396 
2397         while (!IsListEmpty(&ce->old_refs)) {
2398             changed_extent_ref* cer = CONTAINING_RECORD(RemoveHeadList(&ce->old_refs), changed_extent_ref, list_entry);
2399             ExFreePool(cer);
2400         }
2401 
2402         goto end;
2403     }
2404 
2405     le = ce->refs.Flink;
2406     while (le != &ce->refs) {
2407         changed_extent_ref* cer = CONTAINING_RECORD(le, changed_extent_ref, list_entry);
2408         uint32_t old_count = 0;
2409 
2410         if (cer->type == TYPE_EXTENT_DATA_REF) {
2411             le2 = ce->old_refs.Flink;
2412             while (le2 != &ce->old_refs) {
2413                 changed_extent_ref* cer2 = CONTAINING_RECORD(le2, changed_extent_ref, list_entry);
2414 
2415                 if (cer2->type == TYPE_EXTENT_DATA_REF && cer2->edr.root == cer->edr.root && cer2->edr.objid == cer->edr.objid && cer2->edr.offset == cer->edr.offset) {
2416                     old_count = cer2->edr.count;
2417                     break;
2418                 }
2419 
2420                 le2 = le2->Flink;
2421             }
2422 
2423             old_size = ce->old_count > 0 ? ce->old_size : ce->size;
2424 
2425             if (cer->edr.count > old_count) {
2426                 Status = increase_extent_refcount_data(Vcb, ce->address, old_size, cer->edr.root, cer->edr.objid, cer->edr.offset, cer->edr.count - old_count, Irp);
2427 
2428                 if (!NT_SUCCESS(Status)) {
2429                     ERR("increase_extent_refcount_data returned %08x\n", Status);
2430                     return Status;
2431                 }
2432             }
2433         } else if (cer->type == TYPE_SHARED_DATA_REF) {
2434             le2 = ce->old_refs.Flink;
2435             while (le2 != &ce->old_refs) {
2436                 changed_extent_ref* cer2 = CONTAINING_RECORD(le2, changed_extent_ref, list_entry);
2437 
2438                 if (cer2->type == TYPE_SHARED_DATA_REF && cer2->sdr.offset == cer->sdr.offset) {
2439                     RemoveEntryList(&cer2->list_entry);
2440                     ExFreePool(cer2);
2441                     break;
2442                 }
2443 
2444                 le2 = le2->Flink;
2445             }
2446         }
2447 
2448         le = le->Flink;
2449     }
2450 
2451     le = ce->refs.Flink;
2452     while (le != &ce->refs) {
2453         changed_extent_ref* cer = CONTAINING_RECORD(le, changed_extent_ref, list_entry);
2454         LIST_ENTRY* le3 = le->Flink;
2455         uint32_t old_count = 0;
2456 
2457         if (cer->type == TYPE_EXTENT_DATA_REF) {
2458             le2 = ce->old_refs.Flink;
2459             while (le2 != &ce->old_refs) {
2460                 changed_extent_ref* cer2 = CONTAINING_RECORD(le2, changed_extent_ref, list_entry);
2461 
2462                 if (cer2->type == TYPE_EXTENT_DATA_REF && cer2->edr.root == cer->edr.root && cer2->edr.objid == cer->edr.objid && cer2->edr.offset == cer->edr.offset) {
2463                     old_count = cer2->edr.count;
2464 
2465                     RemoveEntryList(&cer2->list_entry);
2466                     ExFreePool(cer2);
2467                     break;
2468                 }
2469 
2470                 le2 = le2->Flink;
2471             }
2472 
2473             old_size = ce->old_count > 0 ? ce->old_size : ce->size;
2474 
2475             if (cer->edr.count < old_count) {
2476                 Status = decrease_extent_refcount_data(Vcb, ce->address, old_size, cer->edr.root, cer->edr.objid, cer->edr.offset,
2477                                                        old_count - cer->edr.count, ce->superseded, Irp);
2478 
2479                 if (!NT_SUCCESS(Status)) {
2480                     ERR("decrease_extent_refcount_data returned %08x\n", Status);
2481                     return Status;
2482                 }
2483             }
2484 
2485             if (ce->size != ce->old_size && ce->old_count > 0) {
2486                 KEY searchkey;
2487                 traverse_ptr tp;
2488                 void* data;
2489 
2490                 searchkey.obj_id = ce->address;
2491                 searchkey.obj_type = TYPE_EXTENT_ITEM;
2492                 searchkey.offset = ce->old_size;
2493 
2494                 Status = find_item(Vcb, Vcb->extent_root, &tp, &searchkey, false, Irp);
2495                 if (!NT_SUCCESS(Status)) {
2496                     ERR("error - find_item returned %08x\n", Status);
2497                     return Status;
2498                 }
2499 
2500                 if (keycmp(searchkey, tp.item->key)) {
2501                     ERR("could not find (%I64x,%x,%I64x) in extent tree\n", searchkey.obj_id, searchkey.obj_type, searchkey.offset);
2502                     return STATUS_INTERNAL_ERROR;
2503                 }
2504 
2505                 if (tp.item->size > 0) {
2506                     data = ExAllocatePoolWithTag(PagedPool, tp.item->size, ALLOC_TAG);
2507 
2508                     if (!data) {
2509                         ERR("out of memory\n");
2510                         return STATUS_INSUFFICIENT_RESOURCES;
2511                     }
2512 
2513                     RtlCopyMemory(data, tp.item->data, tp.item->size);
2514                 } else
2515                     data = NULL;
2516 
2517                 Status = insert_tree_item(Vcb, Vcb->extent_root, ce->address, TYPE_EXTENT_ITEM, ce->size, data, tp.item->size, NULL, Irp);
2518                 if (!NT_SUCCESS(Status)) {
2519                     ERR("insert_tree_item returned %08x\n", Status);
2520                     if (data) ExFreePool(data);
2521                     return Status;
2522                 }
2523 
2524                 Status = delete_tree_item(Vcb, &tp);
2525                 if (!NT_SUCCESS(Status)) {
2526                     ERR("delete_tree_item returned %08x\n", Status);
2527                     return Status;
2528                 }
2529             }
2530         }
2531 
2532         RemoveEntryList(&cer->list_entry);
2533         ExFreePool(cer);
2534 
2535         le = le3;
2536     }
2537 
2538 #ifdef DEBUG_PARANOID
2539     if (!IsListEmpty(&ce->old_refs))
2540         WARN("old_refs not empty\n");
2541 #endif
2542 
2543 end:
2544     if (ce->count == 0 && !ce->superseded) {
2545         c->used -= ce->size;
2546         space_list_add(c, ce->address, ce->size, rollback);
2547     }
2548 
2549     RemoveEntryList(&ce->list_entry);
2550     ExFreePool(ce);
2551 
2552     return STATUS_SUCCESS;
2553 }
2554 
2555 void add_checksum_entry(device_extension* Vcb, uint64_t address, ULONG length, uint32_t* csum, PIRP Irp) {
2556     KEY searchkey;
2557     traverse_ptr tp, next_tp;
2558     NTSTATUS Status;
2559     uint64_t startaddr, endaddr;
2560     ULONG len;
2561     uint32_t* checksums;
2562     RTL_BITMAP bmp;
2563     ULONG* bmparr;
2564     ULONG runlength, index;
2565 
2566     TRACE("(%p, %I64x, %x, %p, %p)\n", Vcb, address, length, csum, Irp);
2567 
2568     searchkey.obj_id = EXTENT_CSUM_ID;
2569     searchkey.obj_type = TYPE_EXTENT_CSUM;
2570     searchkey.offset = address;
2571 
2572     // FIXME - create checksum_root if it doesn't exist at all
2573 
2574     Status = find_item(Vcb, Vcb->checksum_root, &tp, &searchkey, false, Irp);
2575     if (Status == STATUS_NOT_FOUND) { // tree is completely empty
2576         if (csum) { // not deleted
2577             ULONG length2 = length;
2578             uint64_t off = address;
2579             uint32_t* data = csum;
2580 
2581             do {
2582                 uint16_t il = (uint16_t)min(length2, MAX_CSUM_SIZE / sizeof(uint32_t));
2583 
2584                 checksums = ExAllocatePoolWithTag(PagedPool, il * sizeof(uint32_t), ALLOC_TAG);
2585                 if (!checksums) {
2586                     ERR("out of memory\n");
2587                     return;
2588                 }
2589 
2590                 RtlCopyMemory(checksums, data, il * sizeof(uint32_t));
2591 
2592                 Status = insert_tree_item(Vcb, Vcb->checksum_root, EXTENT_CSUM_ID, TYPE_EXTENT_CSUM, off, checksums,
2593                                           il * sizeof(uint32_t), NULL, Irp);
2594                 if (!NT_SUCCESS(Status)) {
2595                     ERR("insert_tree_item returned %08x\n", Status);
2596                     ExFreePool(checksums);
2597                     return;
2598                 }
2599 
2600                 length2 -= il;
2601 
2602                 if (length2 > 0) {
2603                     off += il * Vcb->superblock.sector_size;
2604                     data += il;
2605                 }
2606             } while (length2 > 0);
2607         }
2608     } else if (!NT_SUCCESS(Status)) {
2609         ERR("find_item returned %08x\n", Status);
2610         return;
2611     } else {
2612         uint32_t tplen;
2613 
2614         // FIXME - check entry is TYPE_EXTENT_CSUM?
2615 
2616         if (tp.item->key.offset < address && tp.item->key.offset + (tp.item->size * Vcb->superblock.sector_size / sizeof(uint32_t)) >= address)
2617             startaddr = tp.item->key.offset;
2618         else
2619             startaddr = address;
2620 
2621         searchkey.obj_id = EXTENT_CSUM_ID;
2622         searchkey.obj_type = TYPE_EXTENT_CSUM;
2623         searchkey.offset = address + (length * Vcb->superblock.sector_size);
2624 
2625         Status = find_item(Vcb, Vcb->checksum_root, &tp, &searchkey, false, Irp);
2626         if (!NT_SUCCESS(Status)) {
2627             ERR("find_item returned %08x\n", Status);
2628             return;
2629         }
2630 
2631         tplen = tp.item->size / sizeof(uint32_t);
2632 
2633         if (tp.item->key.offset + (tplen * Vcb->superblock.sector_size) >= address + (length * Vcb->superblock.sector_size))
2634             endaddr = tp.item->key.offset + (tplen * Vcb->superblock.sector_size);
2635         else
2636             endaddr = address + (length * Vcb->superblock.sector_size);
2637 
2638         TRACE("cs starts at %I64x (%x sectors)\n", address, length);
2639         TRACE("startaddr = %I64x\n", startaddr);
2640         TRACE("endaddr = %I64x\n", endaddr);
2641 
2642         len = (ULONG)((endaddr - startaddr) / Vcb->superblock.sector_size);
2643 
2644         checksums = ExAllocatePoolWithTag(PagedPool, sizeof(uint32_t) * len, ALLOC_TAG);
2645         if (!checksums) {
2646             ERR("out of memory\n");
2647             return;
2648         }
2649 
2650         bmparr = ExAllocatePoolWithTag(PagedPool, sizeof(ULONG) * ((len/8)+1), ALLOC_TAG);
2651         if (!bmparr) {
2652             ERR("out of memory\n");
2653             ExFreePool(checksums);
2654             return;
2655         }
2656 
2657         RtlInitializeBitMap(&bmp, bmparr, len);
2658         RtlSetAllBits(&bmp);
2659 
2660         searchkey.obj_id = EXTENT_CSUM_ID;
2661         searchkey.obj_type = TYPE_EXTENT_CSUM;
2662         searchkey.offset = address;
2663 
2664         Status = find_item(Vcb, Vcb->checksum_root, &tp, &searchkey, false, Irp);
2665         if (!NT_SUCCESS(Status)) {
2666             ERR("find_item returned %08x\n", Status);
2667             ExFreePool(checksums);
2668             ExFreePool(bmparr);
2669             return;
2670         }
2671 
2672         // set bit = free space, cleared bit = allocated sector
2673 
2674         while (tp.item->key.offset < endaddr) {
2675             if (tp.item->key.offset >= startaddr) {
2676                 if (tp.item->size > 0) {
2677                     ULONG itemlen = (ULONG)min((len - (tp.item->key.offset - startaddr) / Vcb->superblock.sector_size) * sizeof(uint32_t), tp.item->size);
2678 
2679                     RtlCopyMemory(&checksums[(tp.item->key.offset - startaddr) / Vcb->superblock.sector_size], tp.item->data, itemlen);
2680                     RtlClearBits(&bmp, (ULONG)((tp.item->key.offset - startaddr) / Vcb->superblock.sector_size), itemlen / sizeof(uint32_t));
2681                 }
2682 
2683                 Status = delete_tree_item(Vcb, &tp);
2684                 if (!NT_SUCCESS(Status)) {
2685                     ERR("delete_tree_item returned %08x\n", Status);
2686                     ExFreePool(checksums);
2687                     ExFreePool(bmparr);
2688                     return;
2689                 }
2690             }
2691 
2692             if (find_next_item(Vcb, &tp, &next_tp, false, Irp)) {
2693                 tp = next_tp;
2694             } else
2695                 break;
2696         }
2697 
2698         if (!csum) { // deleted
2699             RtlSetBits(&bmp, (ULONG)((address - startaddr) / Vcb->superblock.sector_size), length);
2700         } else {
2701             RtlCopyMemory(&checksums[(address - startaddr) / Vcb->superblock.sector_size], csum, length * sizeof(uint32_t));
2702             RtlClearBits(&bmp, (ULONG)((address - startaddr) / Vcb->superblock.sector_size), length);
2703         }
2704 
2705         runlength = RtlFindFirstRunClear(&bmp, &index);
2706 
2707         while (runlength != 0) {
2708             if (index >= len)
2709                 break;
2710 
2711             if (index + runlength >= len) {
2712                 runlength = len - index;
2713 
2714                 if (runlength == 0)
2715                     break;
2716             }
2717 
2718             do {
2719                 uint16_t rl;
2720                 uint64_t off;
2721                 uint32_t* data;
2722 
2723                 if (runlength * sizeof(uint32_t) > MAX_CSUM_SIZE)
2724                     rl = MAX_CSUM_SIZE / sizeof(uint32_t);
2725                 else
2726                     rl = (uint16_t)runlength;
2727 
2728                 data = ExAllocatePoolWithTag(PagedPool, sizeof(uint32_t) * rl, ALLOC_TAG);
2729                 if (!data) {
2730                     ERR("out of memory\n");
2731                     ExFreePool(bmparr);
2732                     ExFreePool(checksums);
2733                     return;
2734                 }
2735 
2736                 RtlCopyMemory(data, &checksums[index], sizeof(uint32_t) * rl);
2737 
2738                 off = startaddr + UInt32x32To64(index, Vcb->superblock.sector_size);
2739 
2740                 Status = insert_tree_item(Vcb, Vcb->checksum_root, EXTENT_CSUM_ID, TYPE_EXTENT_CSUM, off, data, sizeof(uint32_t) * rl, NULL, Irp);
2741                 if (!NT_SUCCESS(Status)) {
2742                     ERR("insert_tree_item returned %08x\n", Status);
2743                     ExFreePool(data);
2744                     ExFreePool(bmparr);
2745                     ExFreePool(checksums);
2746                     return;
2747                 }
2748 
2749                 runlength -= rl;
2750                 index += rl;
2751             } while (runlength > 0);
2752 
2753             runlength = RtlFindNextForwardRunClear(&bmp, index, &index);
2754         }
2755 
2756         ExFreePool(bmparr);
2757         ExFreePool(checksums);
2758     }
2759 }
2760 
2761 static NTSTATUS update_chunk_usage(device_extension* Vcb, PIRP Irp, LIST_ENTRY* rollback) {
2762     LIST_ENTRY *le = Vcb->chunks.Flink, *le2;
2763     chunk* c;
2764     KEY searchkey;
2765     traverse_ptr tp;
2766     BLOCK_GROUP_ITEM* bgi;
2767     NTSTATUS Status;
2768 
2769     TRACE("(%p)\n", Vcb);
2770 
2771     ExAcquireResourceSharedLite(&Vcb->chunk_lock, true);
2772 
2773     while (le != &Vcb->chunks) {
2774         c = CONTAINING_RECORD(le, chunk, list_entry);
2775 
2776         acquire_chunk_lock(c, Vcb);
2777 
2778         if (!c->cache_loaded && (!IsListEmpty(&c->changed_extents) || c->used != c->oldused)) {
2779             Status = load_cache_chunk(Vcb, c, NULL);
2780 
2781             if (!NT_SUCCESS(Status)) {
2782                 ERR("load_cache_chunk returned %08x\n", Status);
2783                 release_chunk_lock(c, Vcb);
2784                 goto end;
2785             }
2786         }
2787 
2788         le2 = c->changed_extents.Flink;
2789         while (le2 != &c->changed_extents) {
2790             LIST_ENTRY* le3 = le2->Flink;
2791             changed_extent* ce = CONTAINING_RECORD(le2, changed_extent, list_entry);
2792 
2793             Status = flush_changed_extent(Vcb, c, ce, Irp, rollback);
2794             if (!NT_SUCCESS(Status)) {
2795                 ERR("flush_changed_extent returned %08x\n", Status);
2796                 release_chunk_lock(c, Vcb);
2797                 goto end;
2798             }
2799 
2800             le2 = le3;
2801         }
2802 
2803         // This is usually done by update_chunks, but we have to check again in case any new chunks
2804         // have been allocated since.
2805         if (c->created) {
2806             Status = create_chunk(Vcb, c, Irp);
2807             if (!NT_SUCCESS(Status)) {
2808                 ERR("create_chunk returned %08x\n", Status);
2809                 release_chunk_lock(c, Vcb);
2810                 goto end;
2811             }
2812         }
2813 
2814         if (c->old_cache) {
2815             if (c->old_cache->dirty) {
2816                 LIST_ENTRY batchlist;
2817 
2818                 InitializeListHead(&batchlist);
2819 
2820                 Status = flush_fcb(c->old_cache, false, &batchlist, Irp);
2821                 if (!NT_SUCCESS(Status)) {
2822                     ERR("flush_fcb returned %08x\n", Status);
2823                     release_chunk_lock(c, Vcb);
2824                     clear_batch_list(Vcb, &batchlist);
2825                     goto end;
2826                 }
2827 
2828                 Status = commit_batch_list(Vcb, &batchlist, Irp);
2829                 if (!NT_SUCCESS(Status)) {
2830                     ERR("commit_batch_list returned %08x\n", Status);
2831                     release_chunk_lock(c, Vcb);
2832                     goto end;
2833                 }
2834             }
2835 
2836             free_fcb(c->old_cache);
2837 
2838             if (c->old_cache->refcount == 0)
2839                 reap_fcb(c->old_cache);
2840 
2841             c->old_cache = NULL;
2842         }
2843 
2844         if (c->used != c->oldused) {
2845 #ifdef __REACTOS__
2846             uint64_t old_phys_used, phys_used;
2847 #endif
2848             searchkey.obj_id = c->offset;
2849             searchkey.obj_type = TYPE_BLOCK_GROUP_ITEM;
2850             searchkey.offset = c->chunk_item->size;
2851 
2852             Status = find_item(Vcb, Vcb->extent_root, &tp, &searchkey, false, Irp);
2853             if (!NT_SUCCESS(Status)) {
2854                 ERR("error - find_item returned %08x\n", Status);
2855                 release_chunk_lock(c, Vcb);
2856                 goto end;
2857             }
2858 
2859             if (keycmp(searchkey, tp.item->key)) {
2860                 ERR("could not find (%I64x,%x,%I64x) in extent_root\n", searchkey.obj_id, searchkey.obj_type, searchkey.offset);
2861                 Status = STATUS_INTERNAL_ERROR;
2862                 release_chunk_lock(c, Vcb);
2863                 goto end;
2864             }
2865 
2866             if (tp.item->size < sizeof(BLOCK_GROUP_ITEM)) {
2867                 ERR("(%I64x,%x,%I64x) was %u bytes, expected %u\n", tp.item->key.obj_id, tp.item->key.obj_type, tp.item->key.offset, tp.item->size, sizeof(BLOCK_GROUP_ITEM));
2868                 Status = STATUS_INTERNAL_ERROR;
2869                 release_chunk_lock(c, Vcb);
2870                 goto end;
2871             }
2872 
2873             bgi = ExAllocatePoolWithTag(PagedPool, tp.item->size, ALLOC_TAG);
2874             if (!bgi) {
2875                 ERR("out of memory\n");
2876                 Status = STATUS_INSUFFICIENT_RESOURCES;
2877                 release_chunk_lock(c, Vcb);
2878                 goto end;
2879             }
2880 
2881             RtlCopyMemory(bgi, tp.item->data, tp.item->size);
2882             bgi->used = c->used;
2883 
2884 #ifdef DEBUG_PARANOID
2885             if (bgi->used & 0x8000000000000000) {
2886                 ERR("refusing to write BLOCK_GROUP_ITEM with negative usage value (%I64x)", bgi->used);
2887                 int3;
2888             }
2889 #endif
2890 
2891             TRACE("adjusting usage of chunk %I64x to %I64x\n", c->offset, c->used);
2892 
2893             Status = delete_tree_item(Vcb, &tp);
2894             if (!NT_SUCCESS(Status)) {
2895                 ERR("delete_tree_item returned %08x\n", Status);
2896                 ExFreePool(bgi);
2897                 release_chunk_lock(c, Vcb);
2898                 goto end;
2899             }
2900 
2901             Status = insert_tree_item(Vcb, Vcb->extent_root, searchkey.obj_id, searchkey.obj_type, searchkey.offset, bgi, tp.item->size, NULL, Irp);
2902             if (!NT_SUCCESS(Status)) {
2903                 ERR("insert_tree_item returned %08x\n", Status);
2904                 ExFreePool(bgi);
2905                 release_chunk_lock(c, Vcb);
2906                 goto end;
2907             }
2908 
2909 #ifndef __REACTOS__
2910             uint64_t old_phys_used = chunk_estimate_phys_size(Vcb, c, c->oldused);
2911             uint64_t phys_used = chunk_estimate_phys_size(Vcb, c, c->used);
2912 #else
2913             old_phys_used = chunk_estimate_phys_size(Vcb, c, c->oldused);
2914             phys_used = chunk_estimate_phys_size(Vcb, c, c->used);
2915 #endif
2916 
2917             if (Vcb->superblock.bytes_used + phys_used > old_phys_used)
2918                 Vcb->superblock.bytes_used += phys_used - old_phys_used;
2919             else
2920                 Vcb->superblock.bytes_used = 0;
2921 
2922             c->oldused = c->used;
2923         }
2924 
2925         release_chunk_lock(c, Vcb);
2926 
2927         le = le->Flink;
2928     }
2929 
2930     Status = STATUS_SUCCESS;
2931 
2932 end:
2933     ExReleaseResourceLite(&Vcb->chunk_lock);
2934 
2935     return Status;
2936 }
2937 
2938 static void get_first_item(tree* t, KEY* key) {
2939     LIST_ENTRY* le;
2940 
2941     le = t->itemlist.Flink;
2942     while (le != &t->itemlist) {
2943         tree_data* td = CONTAINING_RECORD(le, tree_data, list_entry);
2944 
2945         *key = td->key;
2946         return;
2947     }
2948 }
2949 
2950 static NTSTATUS split_tree_at(device_extension* Vcb, tree* t, tree_data* newfirstitem, uint32_t numitems, uint32_t size) {
2951     tree *nt, *pt;
2952     tree_data* td;
2953     tree_data* oldlastitem;
2954 
2955     TRACE("splitting tree in %I64x at (%I64x,%x,%I64x)\n", t->root->id, newfirstitem->key.obj_id, newfirstitem->key.obj_type, newfirstitem->key.offset);
2956 
2957     nt = ExAllocatePoolWithTag(PagedPool, sizeof(tree), ALLOC_TAG);
2958     if (!nt) {
2959         ERR("out of memory\n");
2960         return STATUS_INSUFFICIENT_RESOURCES;
2961     }
2962 
2963     if (t->header.level > 0) {
2964         nt->nonpaged = ExAllocatePoolWithTag(NonPagedPool, sizeof(tree_nonpaged), ALLOC_TAG);
2965         if (!nt->nonpaged) {
2966             ERR("out of memory\n");
2967             ExFreePool(nt);
2968             return STATUS_INSUFFICIENT_RESOURCES;
2969         }
2970 
2971         ExInitializeFastMutex(&nt->nonpaged->mutex);
2972     } else
2973         nt->nonpaged = NULL;
2974 
2975     RtlCopyMemory(&nt->header, &t->header, sizeof(tree_header));
2976     nt->header.address = 0;
2977     nt->header.generation = Vcb->superblock.generation;
2978     nt->header.num_items = t->header.num_items - numitems;
2979     nt->header.flags = HEADER_FLAG_MIXED_BACKREF | HEADER_FLAG_WRITTEN;
2980 
2981     nt->has_address = false;
2982     nt->Vcb = Vcb;
2983     nt->parent = t->parent;
2984 
2985 #ifdef DEBUG_PARANOID
2986     if (nt->parent && nt->parent->header.level <= nt->header.level) int3;
2987 #endif
2988 
2989     nt->root = t->root;
2990     nt->new_address = 0;
2991     nt->has_new_address = false;
2992     nt->updated_extents = false;
2993     nt->uniqueness_determined = true;
2994     nt->is_unique = true;
2995     nt->list_entry_hash.Flink = NULL;
2996     nt->buf = NULL;
2997     InitializeListHead(&nt->itemlist);
2998 
2999     oldlastitem = CONTAINING_RECORD(newfirstitem->list_entry.Blink, tree_data, list_entry);
3000 
3001     nt->itemlist.Flink = &newfirstitem->list_entry;
3002     nt->itemlist.Blink = t->itemlist.Blink;
3003     nt->itemlist.Flink->Blink = &nt->itemlist;
3004     nt->itemlist.Blink->Flink = &nt->itemlist;
3005 
3006     t->itemlist.Blink = &oldlastitem->list_entry;
3007     t->itemlist.Blink->Flink = &t->itemlist;
3008 
3009     nt->size = t->size - size;
3010     t->size = size;
3011     t->header.num_items = numitems;
3012     nt->write = true;
3013 
3014     InsertTailList(&Vcb->trees, &nt->list_entry);
3015 
3016     if (nt->header.level > 0) {
3017         LIST_ENTRY* le = nt->itemlist.Flink;
3018 
3019         while (le != &nt->itemlist) {
3020             tree_data* td2 = CONTAINING_RECORD(le, tree_data, list_entry);
3021 
3022             if (td2->treeholder.tree) {
3023                 td2->treeholder.tree->parent = nt;
3024 #ifdef DEBUG_PARANOID
3025                 if (td2->treeholder.tree->parent && td2->treeholder.tree->parent->header.level <= td2->treeholder.tree->header.level) int3;
3026 #endif
3027             }
3028 
3029             le = le->Flink;
3030         }
3031     } else {
3032         LIST_ENTRY* le = nt->itemlist.Flink;
3033 
3034         while (le != &nt->itemlist) {
3035             tree_data* td2 = CONTAINING_RECORD(le, tree_data, list_entry);
3036 
3037             if (!td2->inserted && td2->data) {
3038                 uint8_t* data = ExAllocatePoolWithTag(PagedPool, td2->size, ALLOC_TAG);
3039 
3040                 if (!data) {
3041                     ERR("out of memory\n");
3042                     return STATUS_INSUFFICIENT_RESOURCES;
3043                 }
3044 
3045                 RtlCopyMemory(data, td2->data, td2->size);
3046                 td2->data = data;
3047                 td2->inserted = true;
3048             }
3049 
3050             le = le->Flink;
3051         }
3052     }
3053 
3054     if (nt->parent) {
3055         td = ExAllocateFromPagedLookasideList(&Vcb->tree_data_lookaside);
3056         if (!td) {
3057             ERR("out of memory\n");
3058             return STATUS_INSUFFICIENT_RESOURCES;
3059         }
3060 
3061         td->key = newfirstitem->key;
3062 
3063         InsertHeadList(&t->paritem->list_entry, &td->list_entry);
3064 
3065         td->ignore = false;
3066         td->inserted = true;
3067         td->treeholder.tree = nt;
3068         nt->paritem = td;
3069 
3070         nt->parent->header.num_items++;
3071         nt->parent->size += sizeof(internal_node);
3072 
3073         goto end;
3074     }
3075 
3076     TRACE("adding new tree parent\n");
3077 
3078     if (nt->header.level == 255) {
3079         ERR("cannot add parent to tree at level 255\n");
3080         return STATUS_INTERNAL_ERROR;
3081     }
3082 
3083     pt = ExAllocatePoolWithTag(PagedPool, sizeof(tree), ALLOC_TAG);
3084     if (!pt) {
3085         ERR("out of memory\n");
3086         return STATUS_INSUFFICIENT_RESOURCES;
3087     }
3088 
3089     pt->nonpaged = ExAllocatePoolWithTag(NonPagedPool, sizeof(tree_nonpaged), ALLOC_TAG);
3090     if (!pt->nonpaged) {
3091         ERR("out of memory\n");
3092         ExFreePool(pt);
3093         return STATUS_INSUFFICIENT_RESOURCES;
3094     }
3095 
3096     ExInitializeFastMutex(&pt->nonpaged->mutex);
3097 
3098     RtlCopyMemory(&pt->header, &nt->header, sizeof(tree_header));
3099     pt->header.address = 0;
3100     pt->header.num_items = 2;
3101     pt->header.level = nt->header.level + 1;
3102     pt->header.flags = HEADER_FLAG_MIXED_BACKREF | HEADER_FLAG_WRITTEN;
3103 
3104     pt->has_address = false;
3105     pt->Vcb = Vcb;
3106     pt->parent = NULL;
3107     pt->paritem = NULL;
3108     pt->root = t->root;
3109     pt->new_address = 0;
3110     pt->has_new_address = false;
3111     pt->updated_extents = false;
3112     pt->size = pt->header.num_items * sizeof(internal_node);
3113     pt->uniqueness_determined = true;
3114     pt->is_unique = true;
3115     pt->list_entry_hash.Flink = NULL;
3116     pt->buf = NULL;
3117     InitializeListHead(&pt->itemlist);
3118 
3119     InsertTailList(&Vcb->trees, &pt->list_entry);
3120 
3121     td = ExAllocateFromPagedLookasideList(&Vcb->tree_data_lookaside);
3122     if (!td) {
3123         ERR("out of memory\n");
3124         return STATUS_INSUFFICIENT_RESOURCES;
3125     }
3126 
3127     get_first_item(t, &td->key);
3128     td->ignore = false;
3129     td->inserted = false;
3130     td->treeholder.address = 0;
3131     td->treeholder.generation = Vcb->superblock.generation;
3132     td->treeholder.tree = t;
3133     InsertTailList(&pt->itemlist, &td->list_entry);
3134     t->paritem = td;
3135 
3136     td = ExAllocateFromPagedLookasideList(&Vcb->tree_data_lookaside);
3137     if (!td) {
3138         ERR("out of memory\n");
3139         return STATUS_INSUFFICIENT_RESOURCES;
3140     }
3141 
3142     td->key = newfirstitem->key;
3143     td->ignore = false;
3144     td->inserted = false;
3145     td->treeholder.address = 0;
3146     td->treeholder.generation = Vcb->superblock.generation;
3147     td->treeholder.tree = nt;
3148     InsertTailList(&pt->itemlist, &td->list_entry);
3149     nt->paritem = td;
3150 
3151     pt->write = true;
3152 
3153     t->root->treeholder.tree = pt;
3154 
3155     t->parent = pt;
3156     nt->parent = pt;
3157 
3158 #ifdef DEBUG_PARANOID
3159     if (t->parent && t->parent->header.level <= t->header.level) int3;
3160     if (nt->parent && nt->parent->header.level <= nt->header.level) int3;
3161 #endif
3162 
3163 end:
3164     t->root->root_item.bytes_used += Vcb->superblock.node_size;
3165 
3166     return STATUS_SUCCESS;
3167 }
3168 
3169 static NTSTATUS split_tree(device_extension* Vcb, tree* t) {
3170     LIST_ENTRY* le;
3171     uint32_t size, ds, numitems;
3172 
3173     size = 0;
3174     numitems = 0;
3175 
3176     // FIXME - naïve implementation: maximizes number of filled trees
3177 
3178     le = t->itemlist.Flink;
3179     while (le != &t->itemlist) {
3180         tree_data* td = CONTAINING_RECORD(le, tree_data, list_entry);
3181 
3182         if (!td->ignore) {
3183             if (t->header.level == 0)
3184                 ds = sizeof(leaf_node) + td->size;
3185             else
3186                 ds = sizeof(internal_node);
3187 
3188             if (numitems == 0 && ds > Vcb->superblock.node_size - sizeof(tree_header)) {
3189                 ERR("(%I64x,%x,%I64x) in tree %I64x is too large (%x > %x)\n",
3190                     td->key.obj_id, td->key.obj_type, td->key.offset, t->root->id,
3191                     ds, Vcb->superblock.node_size - sizeof(tree_header));
3192                 return STATUS_INTERNAL_ERROR;
3193             }
3194 
3195             // FIXME - move back if previous item was deleted item with same key
3196             if (size + ds > Vcb->superblock.node_size - sizeof(tree_header))
3197                 return split_tree_at(Vcb, t, td, numitems, size);
3198 
3199             size += ds;
3200             numitems++;
3201         }
3202 
3203         le = le->Flink;
3204     }
3205 
3206     return STATUS_SUCCESS;
3207 }
3208 
3209 bool is_tree_unique(device_extension* Vcb, tree* t, PIRP Irp) {
3210     KEY searchkey;
3211     traverse_ptr tp;
3212     NTSTATUS Status;
3213     bool ret = false;
3214     EXTENT_ITEM* ei;
3215     uint8_t* type;
3216 
3217     if (t->uniqueness_determined)
3218         return t->is_unique;
3219 
3220     if (t->parent && !is_tree_unique(Vcb, t->parent, Irp))
3221         goto end;
3222 
3223     if (t->has_address) {
3224         searchkey.obj_id = t->header.address;
3225         searchkey.obj_type = Vcb->superblock.incompat_flags & BTRFS_INCOMPAT_FLAGS_SKINNY_METADATA ? TYPE_METADATA_ITEM : TYPE_EXTENT_ITEM;
3226         searchkey.offset = 0xffffffffffffffff;
3227 
3228         Status = find_item(Vcb, Vcb->extent_root, &tp, &searchkey, false, Irp);
3229         if (!NT_SUCCESS(Status)) {
3230             ERR("error - find_item returned %08x\n", Status);
3231             goto end;
3232         }
3233 
3234         if (tp.item->key.obj_id != t->header.address || (tp.item->key.obj_type != TYPE_METADATA_ITEM && tp.item->key.obj_type != TYPE_EXTENT_ITEM))
3235             goto end;
3236 
3237         if (tp.item->key.obj_type == TYPE_EXTENT_ITEM && tp.item->size == sizeof(EXTENT_ITEM_V0))
3238             goto end;
3239 
3240         if (tp.item->size < sizeof(EXTENT_ITEM))
3241             goto end;
3242 
3243         ei = (EXTENT_ITEM*)tp.item->data;
3244 
3245         if (ei->refcount > 1)
3246             goto end;
3247 
3248         if (tp.item->key.obj_type == TYPE_EXTENT_ITEM && ei->flags & EXTENT_ITEM_TREE_BLOCK) {
3249             EXTENT_ITEM2* ei2;
3250 
3251             if (tp.item->size < sizeof(EXTENT_ITEM) + sizeof(EXTENT_ITEM2))
3252                 goto end;
3253 
3254             ei2 = (EXTENT_ITEM2*)&ei[1];
3255             type = (uint8_t*)&ei2[1];
3256         } else
3257             type = (uint8_t*)&ei[1];
3258 
3259         if (type >= tp.item->data + tp.item->size || *type != TYPE_TREE_BLOCK_REF)
3260             goto end;
3261     }
3262 
3263     ret = true;
3264 
3265 end:
3266     t->is_unique = ret;
3267     t->uniqueness_determined = true;
3268 
3269     return ret;
3270 }
3271 
3272 static NTSTATUS try_tree_amalgamate(device_extension* Vcb, tree* t, bool* done, bool* done_deletions, PIRP Irp, LIST_ENTRY* rollback) {
3273     LIST_ENTRY* le;
3274     tree_data* nextparitem = NULL;
3275     NTSTATUS Status;
3276     tree *next_tree, *par;
3277 
3278     *done = false;
3279 
3280     TRACE("trying to amalgamate tree in root %I64x, level %x (size %u)\n", t->root->id, t->header.level, t->size);
3281 
3282     // FIXME - doesn't capture everything, as it doesn't ascend
3283     le = t->paritem->list_entry.Flink;
3284     while (le != &t->parent->itemlist) {
3285         tree_data* td = CONTAINING_RECORD(le, tree_data, list_entry);
3286 
3287         if (!td->ignore) {
3288             nextparitem = td;
3289             break;
3290         }
3291 
3292         le = le->Flink;
3293     }
3294 
3295     if (!nextparitem)
3296         return STATUS_SUCCESS;
3297 
3298     TRACE("nextparitem: key = %I64x,%x,%I64x\n", nextparitem->key.obj_id, nextparitem->key.obj_type, nextparitem->key.offset);
3299 
3300     if (!nextparitem->treeholder.tree) {
3301         Status = do_load_tree(Vcb, &nextparitem->treeholder, t->root, t->parent, nextparitem, NULL);
3302         if (!NT_SUCCESS(Status)) {
3303             ERR("do_load_tree returned %08x\n", Status);
3304             return Status;
3305         }
3306     }
3307 
3308     if (!is_tree_unique(Vcb, nextparitem->treeholder.tree, Irp))
3309         return STATUS_SUCCESS;
3310 
3311     next_tree = nextparitem->treeholder.tree;
3312 
3313     if (!next_tree->updated_extents && next_tree->has_address) {
3314         Status = update_tree_extents(Vcb, next_tree, Irp, rollback);
3315         if (!NT_SUCCESS(Status)) {
3316             ERR("update_tree_extents returned %08x\n", Status);
3317             return Status;
3318         }
3319     }
3320 
3321     if (t->size + next_tree->size <= Vcb->superblock.node_size - sizeof(tree_header)) {
3322         // merge two trees into one
3323 
3324         t->header.num_items += next_tree->header.num_items;
3325         t->size += next_tree->size;
3326 
3327         if (next_tree->header.level > 0) {
3328             le = next_tree->itemlist.Flink;
3329 
3330             while (le != &next_tree->itemlist) {
3331                 tree_data* td2 = CONTAINING_RECORD(le, tree_data, list_entry);
3332 
3333                 if (td2->treeholder.tree) {
3334                     td2->treeholder.tree->parent = t;
3335 #ifdef DEBUG_PARANOID
3336                     if (td2->treeholder.tree->parent && td2->treeholder.tree->parent->header.level <= td2->treeholder.tree->header.level) int3;
3337 #endif
3338                 }
3339 
3340                 td2->inserted = true;
3341                 le = le->Flink;
3342             }
3343         } else {
3344             le = next_tree->itemlist.Flink;
3345 
3346             while (le != &next_tree->itemlist) {
3347                 tree_data* td2 = CONTAINING_RECORD(le, tree_data, list_entry);
3348 
3349                 if (!td2->inserted && td2->data) {
3350                     uint8_t* data = ExAllocatePoolWithTag(PagedPool, td2->size, ALLOC_TAG);
3351 
3352                     if (!data) {
3353                         ERR("out of memory\n");
3354                         return STATUS_INSUFFICIENT_RESOURCES;
3355                     }
3356 
3357                     RtlCopyMemory(data, td2->data, td2->size);
3358                     td2->data = data;
3359                     td2->inserted = true;
3360                 }
3361 
3362                 le = le->Flink;
3363             }
3364         }
3365 
3366         t->itemlist.Blink->Flink = next_tree->itemlist.Flink;
3367         t->itemlist.Blink->Flink->Blink = t->itemlist.Blink;
3368         t->itemlist.Blink = next_tree->itemlist.Blink;
3369         t->itemlist.Blink->Flink = &t->itemlist;
3370 
3371         next_tree->itemlist.Flink = next_tree->itemlist.Blink = &next_tree->itemlist;
3372 
3373         next_tree->header.num_items = 0;
3374         next_tree->size = 0;
3375 
3376         if (next_tree->has_new_address) { // delete associated EXTENT_ITEM
3377             Status = reduce_tree_extent(Vcb, next_tree->new_address, next_tree, next_tree->parent->header.tree_id, next_tree->header.level, Irp, rollback);
3378 
3379             if (!NT_SUCCESS(Status)) {
3380                 ERR("reduce_tree_extent returned %08x\n", Status);
3381                 return Status;
3382             }
3383         } else if (next_tree->has_address) {
3384             Status = reduce_tree_extent(Vcb, next_tree->header.address, next_tree, next_tree->parent->header.tree_id, next_tree->header.level, Irp, rollback);
3385 
3386             if (!NT_SUCCESS(Status)) {
3387                 ERR("reduce_tree_extent returned %08x\n", Status);
3388                 return Status;
3389             }
3390         }
3391 
3392         if (!nextparitem->ignore) {
3393             nextparitem->ignore = true;
3394             next_tree->parent->header.num_items--;
3395             next_tree->parent->size -= sizeof(internal_node);
3396 
3397             *done_deletions = true;
3398         }
3399 
3400         par = next_tree->parent;
3401         while (par) {
3402             par->write = true;
3403             par = par->parent;
3404         }
3405 
3406         RemoveEntryList(&nextparitem->list_entry);
3407         ExFreePool(next_tree->paritem);
3408         next_tree->paritem = NULL;
3409 
3410         next_tree->root->root_item.bytes_used -= Vcb->superblock.node_size;
3411 
3412         free_tree(next_tree);
3413 
3414         *done = true;
3415     } else {
3416         // rebalance by moving items from second tree into first
3417         ULONG avg_size = (t->size + next_tree->size) / 2;
3418         KEY firstitem = {0, 0, 0};
3419         bool changed = false;
3420 
3421         TRACE("attempting rebalance\n");
3422 
3423         le = next_tree->itemlist.Flink;
3424         while (le != &next_tree->itemlist && t->size < avg_size && next_tree->header.num_items > 1) {
3425             tree_data* td = CONTAINING_RECORD(le, tree_data, list_entry);
3426             ULONG size;
3427 
3428             if (!td->ignore) {
3429                 if (next_tree->header.level == 0)
3430                     size = sizeof(leaf_node) + td->size;
3431                 else
3432                     size = sizeof(internal_node);
3433             } else
3434                 size = 0;
3435 
3436             if (t->size + size < Vcb->superblock.node_size - sizeof(tree_header)) {
3437                 RemoveEntryList(&td->list_entry);
3438                 InsertTailList(&t->itemlist, &td->list_entry);
3439 
3440                 if (next_tree->header.level > 0 && td->treeholder.tree) {
3441                     td->treeholder.tree->parent = t;
3442 #ifdef DEBUG_PARANOID
3443                     if (td->treeholder.tree->parent && td->treeholder.tree->parent->header.level <= td->treeholder.tree->header.level) int3;
3444 #endif
3445                 } else if (next_tree->header.level == 0 && !td->inserted && td->size > 0) {
3446                     uint8_t* data = ExAllocatePoolWithTag(PagedPool, td->size, ALLOC_TAG);
3447 
3448                     if (!data) {
3449                         ERR("out of memory\n");
3450                         return STATUS_INSUFFICIENT_RESOURCES;
3451                     }
3452 
3453                     RtlCopyMemory(data, td->data, td->size);
3454                     td->data = data;
3455                 }
3456 
3457                 td->inserted = true;
3458 
3459                 if (!td->ignore) {
3460                     next_tree->size -= size;
3461                     t->size += size;
3462                     next_tree->header.num_items--;
3463                     t->header.num_items++;
3464                 }
3465 
3466                 changed = true;
3467             } else
3468                 break;
3469 
3470             le = next_tree->itemlist.Flink;
3471         }
3472 
3473         le = next_tree->itemlist.Flink;
3474         while (le != &next_tree->itemlist) {
3475             tree_data* td = CONTAINING_RECORD(le, tree_data, list_entry);
3476 
3477             if (!td->ignore) {
3478                 firstitem = td->key;
3479                 break;
3480             }
3481 
3482             le = le->Flink;
3483         }
3484 
3485         // FIXME - once ascension is working, make this work with parent's parent, etc.
3486         if (next_tree->paritem)
3487             next_tree->paritem->key = firstitem;
3488 
3489         par = next_tree;
3490         while (par) {
3491             par->write = true;
3492             par = par->parent;
3493         }
3494 
3495         if (changed)
3496             *done = true;
3497     }
3498 
3499     return STATUS_SUCCESS;
3500 }
3501 
3502 static NTSTATUS update_extent_level(device_extension* Vcb, uint64_t address, tree* t, uint8_t level, PIRP Irp) {
3503     KEY searchkey;
3504     traverse_ptr tp;
3505     NTSTATUS Status;
3506 
3507     if (Vcb->superblock.incompat_flags & BTRFS_INCOMPAT_FLAGS_SKINNY_METADATA) {
3508         searchkey.obj_id = address;
3509         searchkey.obj_type = TYPE_METADATA_ITEM;
3510         searchkey.offset = t->header.level;
3511 
3512         Status = find_item(Vcb, Vcb->extent_root, &tp, &searchkey, false, Irp);
3513         if (!NT_SUCCESS(Status)) {
3514             ERR("error - find_item returned %08x\n", Status);
3515             return Status;
3516         }
3517 
3518         if (!keycmp(tp.item->key, searchkey)) {
3519             EXTENT_ITEM_SKINNY_METADATA* eism;
3520 
3521             if (tp.item->size > 0) {
3522                 eism = ExAllocatePoolWithTag(PagedPool, tp.item->size, ALLOC_TAG);
3523 
3524                 if (!eism) {
3525                     ERR("out of memory\n");
3526                     return STATUS_INSUFFICIENT_RESOURCES;
3527                 }
3528 
3529                 RtlCopyMemory(eism, tp.item->data, tp.item->size);
3530             } else
3531                 eism = NULL;
3532 
3533             Status = delete_tree_item(Vcb, &tp);
3534             if (!NT_SUCCESS(Status)) {
3535                 ERR("delete_tree_item returned %08x\n", Status);
3536                 if (eism) ExFreePool(eism);
3537                 return Status;
3538             }
3539 
3540             Status = insert_tree_item(Vcb, Vcb->extent_root, address, TYPE_METADATA_ITEM, level, eism, tp.item->size, NULL, Irp);
3541             if (!NT_SUCCESS(Status)) {
3542                 ERR("insert_tree_item returned %08x\n", Status);
3543                 if (eism) ExFreePool(eism);
3544                 return Status;
3545             }
3546 
3547             return STATUS_SUCCESS;
3548         }
3549     }
3550 
3551     searchkey.obj_id = address;
3552     searchkey.obj_type = TYPE_EXTENT_ITEM;
3553     searchkey.offset = 0xffffffffffffffff;
3554 
3555     Status = find_item(Vcb, Vcb->extent_root, &tp, &searchkey, false, Irp);
3556     if (!NT_SUCCESS(Status)) {
3557         ERR("error - find_item returned %08x\n", Status);
3558         return Status;
3559     }
3560 
3561     if (tp.item->key.obj_id == searchkey.obj_id && tp.item->key.obj_type == searchkey.obj_type) {
3562         EXTENT_ITEM_TREE* eit;
3563 
3564         if (tp.item->size < sizeof(EXTENT_ITEM_TREE)) {
3565             ERR("(%I64x,%x,%I64x) was %u bytes, expected at least %u\n", tp.item->key.obj_id, tp.item->key.obj_type, tp.item->key.offset, tp.item->size, sizeof(EXTENT_ITEM_TREE));
3566             return STATUS_INTERNAL_ERROR;
3567         }
3568 
3569         eit = ExAllocatePoolWithTag(PagedPool, tp.item->size, ALLOC_TAG);
3570 
3571         if (!eit) {
3572             ERR("out of memory\n");
3573             return STATUS_INSUFFICIENT_RESOURCES;
3574         }
3575 
3576         RtlCopyMemory(eit, tp.item->data, tp.item->size);
3577 
3578         Status = delete_tree_item(Vcb, &tp);
3579         if (!NT_SUCCESS(Status)) {
3580             ERR("delete_tree_item returned %08x\n", Status);
3581             ExFreePool(eit);
3582             return Status;
3583         }
3584 
3585         eit->level = level;
3586 
3587         Status = insert_tree_item(Vcb, Vcb->extent_root, tp.item->key.obj_id, tp.item->key.obj_type, tp.item->key.offset, eit, tp.item->size, NULL, Irp);
3588         if (!NT_SUCCESS(Status)) {
3589             ERR("insert_tree_item returned %08x\n", Status);
3590             ExFreePool(eit);
3591             return Status;
3592         }
3593 
3594         return STATUS_SUCCESS;
3595     }
3596 
3597     ERR("could not find EXTENT_ITEM for address %I64x\n", address);
3598 
3599     return STATUS_INTERNAL_ERROR;
3600 }
3601 
3602 static NTSTATUS update_tree_extents_recursive(device_extension* Vcb, tree* t, PIRP Irp, LIST_ENTRY* rollback) {
3603     NTSTATUS Status;
3604 
3605     if (t->parent && !t->parent->updated_extents && t->parent->has_address) {
3606         Status = update_tree_extents_recursive(Vcb, t->parent, Irp, rollback);
3607         if (!NT_SUCCESS(Status))
3608             return Status;
3609     }
3610 
3611     Status = update_tree_extents(Vcb, t, Irp, rollback);
3612     if (!NT_SUCCESS(Status)) {
3613         ERR("update_tree_extents returned %08x\n", Status);
3614         return Status;
3615     }
3616 
3617     return STATUS_SUCCESS;
3618 }
3619 
3620 static NTSTATUS do_splits(device_extension* Vcb, PIRP Irp, LIST_ENTRY* rollback) {
3621     ULONG level, max_level;
3622     uint32_t min_size;
3623     bool empty, done_deletions = false;
3624     NTSTATUS Status;
3625     tree* t;
3626 
3627     TRACE("(%p)\n", Vcb);
3628 
3629     max_level = 0;
3630 
3631     for (level = 0; level <= 255; level++) {
3632         LIST_ENTRY *le, *nextle;
3633 
3634         empty = true;
3635 
3636         TRACE("doing level %u\n", level);
3637 
3638         le = Vcb->trees.Flink;
3639 
3640         while (le != &Vcb->trees) {
3641             t = CONTAINING_RECORD(le, tree, list_entry);
3642 
3643             nextle = le->Flink;
3644 
3645             if (t->write && t->header.level == level) {
3646                 empty = false;
3647 
3648                 if (t->header.num_items == 0) {
3649                     if (t->parent) {
3650                         done_deletions = true;
3651 
3652                         TRACE("deleting tree in root %I64x\n", t->root->id);
3653 
3654                         t->root->root_item.bytes_used -= Vcb->superblock.node_size;
3655 
3656                         if (t->has_new_address) { // delete associated EXTENT_ITEM
3657                             Status = reduce_tree_extent(Vcb, t->new_address, t, t->parent->header.tree_id, t->header.level, Irp, rollback);
3658 
3659                             if (!NT_SUCCESS(Status)) {
3660                                 ERR("reduce_tree_extent returned %08x\n", Status);
3661                                 return Status;
3662                             }
3663 
3664                             t->has_new_address = false;
3665                         } else if (t->has_address) {
3666                             Status = reduce_tree_extent(Vcb,t->header.address, t, t->parent->header.tree_id, t->header.level, Irp, rollback);
3667 
3668                             if (!NT_SUCCESS(Status)) {
3669                                 ERR("reduce_tree_extent returned %08x\n", Status);
3670                                 return Status;
3671                             }
3672 
3673                             t->has_address = false;
3674                         }
3675 
3676                         if (!t->paritem->ignore) {
3677                             t->paritem->ignore = true;
3678                             t->parent->header.num_items--;
3679                             t->parent->size -= sizeof(internal_node);
3680                         }
3681 
3682                         RemoveEntryList(&t->paritem->list_entry);
3683                         ExFreePool(t->paritem);
3684                         t->paritem = NULL;
3685 
3686                         free_tree(t);
3687                     } else if (t->header.level != 0) {
3688                         if (t->has_new_address) {
3689                             Status = update_extent_level(Vcb, t->new_address, t, 0, Irp);
3690 
3691                             if (!NT_SUCCESS(Status)) {
3692                                 ERR("update_extent_level returned %08x\n", Status);
3693                                 return Status;
3694                             }
3695                         }
3696 
3697                         t->header.level = 0;
3698                     }
3699                 } else if (t->size > Vcb->superblock.node_size - sizeof(tree_header)) {
3700                     TRACE("splitting overlarge tree (%x > %x)\n", t->size, Vcb->superblock.node_size - sizeof(tree_header));
3701 
3702                     if (!t->updated_extents && t->has_address) {
3703                         Status = update_tree_extents_recursive(Vcb, t, Irp, rollback);
3704                         if (!NT_SUCCESS(Status)) {
3705                             ERR("update_tree_extents_recursive returned %08x\n", Status);
3706                             return Status;
3707                         }
3708                     }
3709 
3710                     Status = split_tree(Vcb, t);
3711 
3712                     if (!NT_SUCCESS(Status)) {
3713                         ERR("split_tree returned %08x\n", Status);
3714                         return Status;
3715                     }
3716                 }
3717             }
3718 
3719             le = nextle;
3720         }
3721 
3722         if (!empty) {
3723             max_level = level;
3724         } else {
3725             TRACE("nothing found for level %u\n", level);
3726             break;
3727         }
3728     }
3729 
3730     min_size = (Vcb->superblock.node_size - sizeof(tree_header)) / 2;
3731 
3732     for (level = 0; level <= max_level; level++) {
3733         LIST_ENTRY* le;
3734 
3735         le = Vcb->trees.Flink;
3736 
3737         while (le != &Vcb->trees) {
3738             t = CONTAINING_RECORD(le, tree, list_entry);
3739 
3740             if (t->write && t->header.level == level && t->header.num_items > 0 && t->parent && t->size < min_size &&
3741                 t->root->id != BTRFS_ROOT_FREE_SPACE && is_tree_unique(Vcb, t, Irp)) {
3742                 bool done;
3743 
3744                 do {
3745                     Status = try_tree_amalgamate(Vcb, t, &done, &done_deletions, Irp, rollback);
3746                     if (!NT_SUCCESS(Status)) {
3747                         ERR("try_tree_amalgamate returned %08x\n", Status);
3748                         return Status;
3749                     }
3750                 } while (done && t->size < min_size);
3751             }
3752 
3753             le = le->Flink;
3754         }
3755     }
3756 
3757     // simplify trees if top tree only has one entry
3758 
3759     if (done_deletions) {
3760         for (level = max_level; level > 0; level--) {
3761             LIST_ENTRY *le, *nextle;
3762 
3763             le = Vcb->trees.Flink;
3764             while (le != &Vcb->trees) {
3765                 nextle = le->Flink;
3766                 t = CONTAINING_RECORD(le, tree, list_entry);
3767 
3768                 if (t->write && t->header.level == level) {
3769                     if (!t->parent && t->header.num_items == 1) {
3770                         LIST_ENTRY* le2 = t->itemlist.Flink;
3771                         tree_data* td = NULL;
3772                         tree* child_tree = NULL;
3773 
3774                         while (le2 != &t->itemlist) {
3775                             td = CONTAINING_RECORD(le2, tree_data, list_entry);
3776                             if (!td->ignore)
3777                                 break;
3778                             le2 = le2->Flink;
3779                         }
3780 
3781                         TRACE("deleting top-level tree in root %I64x with one item\n", t->root->id);
3782 
3783                         if (t->has_new_address) { // delete associated EXTENT_ITEM
3784                             Status = reduce_tree_extent(Vcb, t->new_address, t, t->header.tree_id, t->header.level, Irp, rollback);
3785 
3786                             if (!NT_SUCCESS(Status)) {
3787                                 ERR("reduce_tree_extent returned %08x\n", Status);
3788                                 return Status;
3789                             }
3790 
3791                             t->has_new_address = false;
3792                         } else if (t->has_address) {
3793                             Status = reduce_tree_extent(Vcb,t->header.address, t, t->header.tree_id, t->header.level, Irp, rollback);
3794 
3795                             if (!NT_SUCCESS(Status)) {
3796                                 ERR("reduce_tree_extent returned %08x\n", Status);
3797                                 return Status;
3798                             }
3799 
3800                             t->has_address = false;
3801                         }
3802 
3803                         if (!td->treeholder.tree) { // load first item if not already loaded
3804                             KEY searchkey = {0,0,0};
3805                             traverse_ptr tp;
3806 
3807                             Status = find_item(Vcb, t->root, &tp, &searchkey, false, Irp);
3808                             if (!NT_SUCCESS(Status)) {
3809                                 ERR("error - find_item returned %08x\n", Status);
3810                                 return Status;
3811                             }
3812                         }
3813 
3814                         child_tree = td->treeholder.tree;
3815 
3816                         if (child_tree) {
3817                             child_tree->parent = NULL;
3818                             child_tree->paritem = NULL;
3819                         }
3820 
3821                         t->root->root_item.bytes_used -= Vcb->superblock.node_size;
3822 
3823                         free_tree(t);
3824 
3825                         if (child_tree)
3826                             child_tree->root->treeholder.tree = child_tree;
3827                     }
3828                 }
3829 
3830                 le = nextle;
3831             }
3832         }
3833     }
3834 
3835     return STATUS_SUCCESS;
3836 }
3837 
3838 static NTSTATUS remove_root_extents(device_extension* Vcb, root* r, tree_holder* th, uint8_t level, tree* parent, PIRP Irp, LIST_ENTRY* rollback) {
3839     NTSTATUS Status;
3840 
3841     if (!th->tree) {
3842         uint8_t* buf;
3843         chunk* c;
3844 
3845         buf = ExAllocatePoolWithTag(PagedPool, Vcb->superblock.node_size, ALLOC_TAG);
3846         if (!buf) {
3847             ERR("out of memory\n");
3848             return STATUS_INSUFFICIENT_RESOURCES;
3849         }
3850 
3851         Status = read_data(Vcb, th->address, Vcb->superblock.node_size, NULL, true, buf, NULL,
3852                            &c, Irp, th->generation, false, NormalPagePriority);
3853         if (!NT_SUCCESS(Status)) {
3854             ERR("read_data returned 0x%08x\n", Status);
3855             ExFreePool(buf);
3856             return Status;
3857         }
3858 
3859         Status = load_tree(Vcb, th->address, buf, r, &th->tree);
3860 
3861         if (!th->tree || th->tree->buf != buf)
3862             ExFreePool(buf);
3863 
3864         if (!NT_SUCCESS(Status)) {
3865             ERR("load_tree(%I64x) returned %08x\n", th->address, Status);
3866             return Status;
3867         }
3868     }
3869 
3870     if (level > 0) {
3871         LIST_ENTRY* le = th->tree->itemlist.Flink;
3872 
3873         while (le != &th->tree->itemlist) {
3874             tree_data* td = CONTAINING_RECORD(le, tree_data, list_entry);
3875 
3876             if (!td->ignore) {
3877                 Status = remove_root_extents(Vcb, r, &td->treeholder, th->tree->header.level - 1, th->tree, Irp, rollback);
3878 
3879                 if (!NT_SUCCESS(Status)) {
3880                     ERR("remove_root_extents returned %08x\n", Status);
3881                     return Status;
3882                 }
3883             }
3884 
3885             le = le->Flink;
3886         }
3887     }
3888 
3889     if (th->tree && !th->tree->updated_extents && th->tree->has_address) {
3890         Status = update_tree_extents(Vcb, th->tree, Irp, rollback);
3891         if (!NT_SUCCESS(Status)) {
3892             ERR("update_tree_extents returned %08x\n", Status);
3893             return Status;
3894         }
3895     }
3896 
3897     if (!th->tree || th->tree->has_address) {
3898         Status = reduce_tree_extent(Vcb, th->address, NULL, parent ? parent->header.tree_id : r->id, level, Irp, rollback);
3899 
3900         if (!NT_SUCCESS(Status)) {
3901             ERR("reduce_tree_extent(%I64x) returned %08x\n", th->address, Status);
3902             return Status;
3903         }
3904     }
3905 
3906     return STATUS_SUCCESS;
3907 }
3908 
3909 static NTSTATUS drop_root(device_extension* Vcb, root* r, PIRP Irp, LIST_ENTRY* rollback) {
3910     NTSTATUS Status;
3911     KEY searchkey;
3912     traverse_ptr tp;
3913 
3914     Status = remove_root_extents(Vcb, r, &r->treeholder, r->root_item.root_level, NULL, Irp, rollback);
3915     if (!NT_SUCCESS(Status)) {
3916         ERR("remove_root_extents returned %08x\n", Status);
3917         return Status;
3918     }
3919 
3920     // remove entries in uuid root (tree 9)
3921     if (Vcb->uuid_root) {
3922         RtlCopyMemory(&searchkey.obj_id, &r->root_item.uuid.uuid[0], sizeof(uint64_t));
3923         searchkey.obj_type = TYPE_SUBVOL_UUID;
3924         RtlCopyMemory(&searchkey.offset, &r->root_item.uuid.uuid[sizeof(uint64_t)], sizeof(uint64_t));
3925 
3926         if (searchkey.obj_id != 0 || searchkey.offset != 0) {
3927             Status = find_item(Vcb, Vcb->uuid_root, &tp, &searchkey, false, Irp);
3928             if (!NT_SUCCESS(Status)) {
3929                 WARN("find_item returned %08x\n", Status);
3930             } else {
3931                 if (!keycmp(tp.item->key, searchkey)) {
3932                     Status = delete_tree_item(Vcb, &tp);
3933                     if (!NT_SUCCESS(Status)) {
3934                         ERR("delete_tree_item returned %08x\n", Status);
3935                         return Status;
3936                     }
3937                 } else
3938                     WARN("could not find (%I64x,%x,%I64x) in uuid tree\n", searchkey.obj_id, searchkey.obj_type, searchkey.offset);
3939             }
3940         }
3941 
3942         if (r->root_item.rtransid > 0) {
3943             RtlCopyMemory(&searchkey.obj_id, &r->root_item.received_uuid.uuid[0], sizeof(uint64_t));
3944             searchkey.obj_type = TYPE_SUBVOL_REC_UUID;
3945             RtlCopyMemory(&searchkey.offset, &r->root_item.received_uuid.uuid[sizeof(uint64_t)], sizeof(uint64_t));
3946 
3947             Status = find_item(Vcb, Vcb->uuid_root, &tp, &searchkey, false, Irp);
3948             if (!NT_SUCCESS(Status))
3949                 WARN("find_item returned %08x\n", Status);
3950             else {
3951                 if (!keycmp(tp.item->key, searchkey)) {
3952                     if (tp.item->size == sizeof(uint64_t)) {
3953                         uint64_t* id = (uint64_t*)tp.item->data;
3954 
3955                         if (*id == r->id) {
3956                             Status = delete_tree_item(Vcb, &tp);
3957                             if (!NT_SUCCESS(Status)) {
3958                                 ERR("delete_tree_item returned %08x\n", Status);
3959                                 return Status;
3960                             }
3961                         }
3962                     } else if (tp.item->size > sizeof(uint64_t)) {
3963                         ULONG i;
3964                         uint64_t* ids = (uint64_t*)tp.item->data;
3965 
3966                         for (i = 0; i < tp.item->size / sizeof(uint64_t); i++) {
3967                             if (ids[i] == r->id) {
3968                                 uint64_t* ne;
3969 
3970                                 ne = ExAllocatePoolWithTag(PagedPool, tp.item->size - sizeof(uint64_t), ALLOC_TAG);
3971                                 if (!ne) {
3972                                     ERR("out of memory\n");
3973                                     return STATUS_INSUFFICIENT_RESOURCES;
3974                                 }
3975 
3976                                 if (i > 0)
3977                                     RtlCopyMemory(ne, ids, sizeof(uint64_t) * i);
3978 
3979                                 if ((i + 1) * sizeof(uint64_t) < tp.item->size)
3980                                     RtlCopyMemory(&ne[i], &ids[i + 1], tp.item->size - ((i + 1) * sizeof(uint64_t)));
3981 
3982                                 Status = delete_tree_item(Vcb, &tp);
3983                                 if (!NT_SUCCESS(Status)) {
3984                                     ERR("delete_tree_item returned %08x\n", Status);
3985                                     ExFreePool(ne);
3986                                     return Status;
3987                                 }
3988 
3989                                 Status = insert_tree_item(Vcb, Vcb->uuid_root, tp.item->key.obj_id, tp.item->key.obj_type, tp.item->key.offset,
3990                                                           ne, tp.item->size - sizeof(uint64_t), NULL, Irp);
3991                                 if (!NT_SUCCESS(Status)) {
3992                                     ERR("insert_tree_item returned %08x\n", Status);
3993                                     ExFreePool(ne);
3994                                     return Status;
3995                                 }
3996 
3997                                 break;
3998                             }
3999                         }
4000                     }
4001                 } else
4002                     WARN("could not find (%I64x,%x,%I64x) in uuid tree\n", searchkey.obj_id, searchkey.obj_type, searchkey.offset);
4003             }
4004         }
4005     }
4006 
4007     // delete ROOT_ITEM
4008 
4009     searchkey.obj_id = r->id;
4010     searchkey.obj_type = TYPE_ROOT_ITEM;
4011     searchkey.offset = 0xffffffffffffffff;
4012 
4013     Status = find_item(Vcb, Vcb->root_root, &tp, &searchkey, false, Irp);
4014     if (!NT_SUCCESS(Status)) {
4015         ERR("find_item returned %08x\n", Status);
4016         return Status;
4017     }
4018 
4019     if (tp.item->key.obj_id == searchkey.obj_id && tp.item->key.obj_type == searchkey.obj_type) {
4020         Status = delete_tree_item(Vcb, &tp);
4021 
4022         if (!NT_SUCCESS(Status)) {
4023             ERR("delete_tree_item returned %08x\n", Status);
4024             return Status;
4025         }
4026     } else
4027         WARN("could not find (%I64x,%x,%I64x) in root_root\n", searchkey.obj_id, searchkey.obj_type, searchkey.offset);
4028 
4029     // delete items in tree cache
4030 
4031     free_trees_root(Vcb, r);
4032 
4033     return STATUS_SUCCESS;
4034 }
4035 
4036 static NTSTATUS drop_roots(device_extension* Vcb, PIRP Irp, LIST_ENTRY* rollback) {
4037     LIST_ENTRY *le = Vcb->drop_roots.Flink, *le2;
4038     NTSTATUS Status;
4039 
4040     while (le != &Vcb->drop_roots) {
4041         root* r = CONTAINING_RECORD(le, root, list_entry);
4042 
4043         le2 = le->Flink;
4044 
4045         Status = drop_root(Vcb, r, Irp, rollback);
4046         if (!NT_SUCCESS(Status)) {
4047             ERR("drop_root(%I64x) returned %08x\n", r->id, Status);
4048             return Status;
4049         }
4050 
4051         le = le2;
4052     }
4053 
4054     return STATUS_SUCCESS;
4055 }
4056 
4057 NTSTATUS update_dev_item(device_extension* Vcb, device* device, PIRP Irp) {
4058     KEY searchkey;
4059     traverse_ptr tp;
4060     DEV_ITEM* di;
4061     NTSTATUS Status;
4062 
4063     searchkey.obj_id = 1;
4064     searchkey.obj_type = TYPE_DEV_ITEM;
4065     searchkey.offset = device->devitem.dev_id;
4066 
4067     Status = find_item(Vcb, Vcb->chunk_root, &tp, &searchkey, false, Irp);
4068     if (!NT_SUCCESS(Status)) {
4069         ERR("error - find_item returned %08x\n", Status);
4070         return Status;
4071     }
4072 
4073     if (keycmp(tp.item->key, searchkey)) {
4074         ERR("error - could not find DEV_ITEM for device %I64x\n", device->devitem.dev_id);
4075         return STATUS_INTERNAL_ERROR;
4076     }
4077 
4078     Status = delete_tree_item(Vcb, &tp);
4079     if (!NT_SUCCESS(Status)) {
4080         ERR("delete_tree_item returned %08x\n", Status);
4081         return Status;
4082     }
4083 
4084     di = ExAllocatePoolWithTag(PagedPool, sizeof(DEV_ITEM), ALLOC_TAG);
4085     if (!di) {
4086         ERR("out of memory\n");
4087         return STATUS_INSUFFICIENT_RESOURCES;
4088     }
4089 
4090     RtlCopyMemory(di, &device->devitem, sizeof(DEV_ITEM));
4091 
4092     Status = insert_tree_item(Vcb, Vcb->chunk_root, 1, TYPE_DEV_ITEM, device->devitem.dev_id, di, sizeof(DEV_ITEM), NULL, Irp);
4093     if (!NT_SUCCESS(Status)) {
4094         ERR("insert_tree_item returned %08x\n", Status);
4095         ExFreePool(di);
4096         return Status;
4097     }
4098 
4099     return STATUS_SUCCESS;
4100 }
4101 
4102 static void regen_bootstrap(device_extension* Vcb) {
4103     sys_chunk* sc2;
4104     USHORT i = 0;
4105     LIST_ENTRY* le;
4106 
4107     i = 0;
4108     le = Vcb->sys_chunks.Flink;
4109     while (le != &Vcb->sys_chunks) {
4110         sc2 = CONTAINING_RECORD(le, sys_chunk, list_entry);
4111 
4112         TRACE("%I64x,%x,%I64x\n", sc2->key.obj_id, sc2->key.obj_type, sc2->key.offset);
4113 
4114         RtlCopyMemory(&Vcb->superblock.sys_chunk_array[i], &sc2->key, sizeof(KEY));
4115         i += sizeof(KEY);
4116 
4117         RtlCopyMemory(&Vcb->superblock.sys_chunk_array[i], sc2->data, sc2->size);
4118         i += sc2->size;
4119 
4120         le = le->Flink;
4121     }
4122 }
4123 
4124 static NTSTATUS add_to_bootstrap(device_extension* Vcb, uint64_t obj_id, uint8_t obj_type, uint64_t offset, void* data, uint16_t size) {
4125     sys_chunk* sc;
4126     LIST_ENTRY* le;
4127 
4128     if (Vcb->superblock.n + sizeof(KEY) + size > SYS_CHUNK_ARRAY_SIZE) {
4129         ERR("error - bootstrap is full\n");
4130         return STATUS_INTERNAL_ERROR;
4131     }
4132 
4133     sc = ExAllocatePoolWithTag(PagedPool, sizeof(sys_chunk), ALLOC_TAG);
4134     if (!sc) {
4135         ERR("out of memory\n");
4136         return STATUS_INSUFFICIENT_RESOURCES;
4137     }
4138 
4139     sc->key.obj_id = obj_id;
4140     sc->key.obj_type = obj_type;
4141     sc->key.offset = offset;
4142     sc->size = size;
4143     sc->data = ExAllocatePoolWithTag(PagedPool, sc->size, ALLOC_TAG);
4144     if (!sc->data) {
4145         ERR("out of memory\n");
4146         ExFreePool(sc);
4147         return STATUS_INSUFFICIENT_RESOURCES;
4148     }
4149 
4150     RtlCopyMemory(sc->data, data, sc->size);
4151 
4152     le = Vcb->sys_chunks.Flink;
4153     while (le != &Vcb->sys_chunks) {
4154         sys_chunk* sc2 = CONTAINING_RECORD(le, sys_chunk, list_entry);
4155 
4156         if (keycmp(sc2->key, sc->key) == 1)
4157             break;
4158 
4159         le = le->Flink;
4160     }
4161     InsertTailList(le, &sc->list_entry);
4162 
4163     Vcb->superblock.n += sizeof(KEY) + size;
4164 
4165     regen_bootstrap(Vcb);
4166 
4167     return STATUS_SUCCESS;
4168 }
4169 
4170 static NTSTATUS create_chunk(device_extension* Vcb, chunk* c, PIRP Irp) {
4171     CHUNK_ITEM* ci;
4172     CHUNK_ITEM_STRIPE* cis;
4173     BLOCK_GROUP_ITEM* bgi;
4174     uint16_t i, factor;
4175     NTSTATUS Status;
4176 
4177     ci = ExAllocatePoolWithTag(PagedPool, c->size, ALLOC_TAG);
4178     if (!ci) {
4179         ERR("out of memory\n");
4180         return STATUS_INSUFFICIENT_RESOURCES;
4181     }
4182 
4183     RtlCopyMemory(ci, c->chunk_item, c->size);
4184 
4185     Status = insert_tree_item(Vcb, Vcb->chunk_root, 0x100, TYPE_CHUNK_ITEM, c->offset, ci, c->size, NULL, Irp);
4186     if (!NT_SUCCESS(Status)) {
4187         ERR("insert_tree_item failed\n");
4188         ExFreePool(ci);
4189         return Status;
4190     }
4191 
4192     if (c->chunk_item->type & BLOCK_FLAG_SYSTEM) {
4193         Status = add_to_bootstrap(Vcb, 0x100, TYPE_CHUNK_ITEM, c->offset, ci, c->size);
4194         if (!NT_SUCCESS(Status)) {
4195             ERR("add_to_bootstrap returned %08x\n", Status);
4196             return Status;
4197         }
4198     }
4199 
4200     // add BLOCK_GROUP_ITEM to tree 2
4201 
4202     bgi = ExAllocatePoolWithTag(PagedPool, sizeof(BLOCK_GROUP_ITEM), ALLOC_TAG);
4203     if (!bgi) {
4204         ERR("out of memory\n");
4205         return STATUS_INSUFFICIENT_RESOURCES;
4206     }
4207 
4208     bgi->used = c->used;
4209     bgi->chunk_tree = 0x100;
4210     bgi->flags = c->chunk_item->type;
4211 
4212     Status = insert_tree_item(Vcb, Vcb->extent_root, c->offset, TYPE_BLOCK_GROUP_ITEM, c->chunk_item->size, bgi, sizeof(BLOCK_GROUP_ITEM), NULL, Irp);
4213     if (!NT_SUCCESS(Status)) {
4214         ERR("insert_tree_item failed\n");
4215         ExFreePool(bgi);
4216         return Status;
4217     }
4218 
4219     if (c->chunk_item->type & BLOCK_FLAG_RAID0)
4220         factor = c->chunk_item->num_stripes;
4221     else if (c->chunk_item->type & BLOCK_FLAG_RAID10)
4222         factor = c->chunk_item->num_stripes / c->chunk_item->sub_stripes;
4223     else if (c->chunk_item->type & BLOCK_FLAG_RAID5)
4224         factor = c->chunk_item->num_stripes - 1;
4225     else if (c->chunk_item->type & BLOCK_FLAG_RAID6)
4226         factor = c->chunk_item->num_stripes - 2;
4227     else // SINGLE, DUPLICATE, RAID1
4228         factor = 1;
4229 
4230     // add DEV_EXTENTs to tree 4
4231 
4232     cis = (CHUNK_ITEM_STRIPE*)&c->chunk_item[1];
4233 
4234     for (i = 0; i < c->chunk_item->num_stripes; i++) {
4235         DEV_EXTENT* de;
4236 
4237         de = ExAllocatePoolWithTag(PagedPool, sizeof(DEV_EXTENT), ALLOC_TAG);
4238         if (!de) {
4239             ERR("out of memory\n");
4240             return STATUS_INSUFFICIENT_RESOURCES;
4241         }
4242 
4243         de->chunktree = Vcb->chunk_root->id;
4244         de->objid = 0x100;
4245         de->address = c->offset;
4246         de->length = c->chunk_item->size / factor;
4247         de->chunktree_uuid = Vcb->chunk_root->treeholder.tree->header.chunk_tree_uuid;
4248 
4249         Status = insert_tree_item(Vcb, Vcb->dev_root, c->devices[i]->devitem.dev_id, TYPE_DEV_EXTENT, cis[i].offset, de, sizeof(DEV_EXTENT), NULL, Irp);
4250         if (!NT_SUCCESS(Status)) {
4251             ERR("insert_tree_item returned %08x\n", Status);
4252             ExFreePool(de);
4253             return Status;
4254         }
4255 
4256         // FIXME - no point in calling this twice for the same device
4257         Status = update_dev_item(Vcb, c->devices[i], Irp);
4258         if (!NT_SUCCESS(Status)) {
4259             ERR("update_dev_item returned %08x\n", Status);
4260             return Status;
4261         }
4262     }
4263 
4264     c->created = false;
4265     c->oldused = c->used;
4266 
4267     Vcb->superblock.bytes_used += chunk_estimate_phys_size(Vcb, c, c->used);
4268 
4269     return STATUS_SUCCESS;
4270 }
4271 
4272 static void remove_from_bootstrap(device_extension* Vcb, uint64_t obj_id, uint8_t obj_type, uint64_t offset) {
4273     sys_chunk* sc2;
4274     LIST_ENTRY* le;
4275 
4276     le = Vcb->sys_chunks.Flink;
4277     while (le != &Vcb->sys_chunks) {
4278         sc2 = CONTAINING_RECORD(le, sys_chunk, list_entry);
4279 
4280         if (sc2->key.obj_id == obj_id && sc2->key.obj_type == obj_type && sc2->key.offset == offset) {
4281             RemoveEntryList(&sc2->list_entry);
4282 
4283             Vcb->superblock.n -= sizeof(KEY) + sc2->size;
4284 
4285             ExFreePool(sc2->data);
4286             ExFreePool(sc2);
4287             regen_bootstrap(Vcb);
4288             return;
4289         }
4290 
4291         le = le->Flink;
4292     }
4293 }
4294 
4295 static NTSTATUS set_xattr(device_extension* Vcb, LIST_ENTRY* batchlist, root* subvol, uint64_t inode, char* name, uint16_t namelen,
4296                           uint32_t crc32, uint8_t* data, uint16_t datalen) {
4297     NTSTATUS Status;
4298     uint16_t xasize;
4299     DIR_ITEM* xa;
4300 
4301     TRACE("(%p, %I64x, %I64x, %.*s, %08x, %p, %u)\n", Vcb, subvol->id, inode, namelen, name, crc32, data, datalen);
4302 
4303     xasize = (uint16_t)offsetof(DIR_ITEM, name[0]) + namelen + datalen;
4304 
4305     xa = ExAllocatePoolWithTag(PagedPool, xasize, ALLOC_TAG);
4306     if (!xa) {
4307         ERR("out of memory\n");
4308         return STATUS_INSUFFICIENT_RESOURCES;
4309     }
4310 
4311     xa->key.obj_id = 0;
4312     xa->key.obj_type = 0;
4313     xa->key.offset = 0;
4314     xa->transid = Vcb->superblock.generation;
4315     xa->m = datalen;
4316     xa->n = namelen;
4317     xa->type = BTRFS_TYPE_EA;
4318     RtlCopyMemory(xa->name, name, namelen);
4319     RtlCopyMemory(xa->name + namelen, data, datalen);
4320 
4321     Status = insert_tree_item_batch(batchlist, Vcb, subvol, inode, TYPE_XATTR_ITEM, crc32, xa, xasize, Batch_SetXattr);
4322     if (!NT_SUCCESS(Status)) {
4323         ERR("insert_tree_item_batch returned %08x\n", Status);
4324         ExFreePool(xa);
4325         return Status;
4326     }
4327 
4328     return STATUS_SUCCESS;
4329 }
4330 
4331 static NTSTATUS delete_xattr(device_extension* Vcb, LIST_ENTRY* batchlist, root* subvol, uint64_t inode, char* name,
4332                              uint16_t namelen, uint32_t crc32) {
4333     NTSTATUS Status;
4334     uint16_t xasize;
4335     DIR_ITEM* xa;
4336 
4337     TRACE("(%p, %I64x, %I64x, %.*s, %08x)\n", Vcb, subvol->id, inode, namelen, name, crc32);
4338 
4339     xasize = (uint16_t)offsetof(DIR_ITEM, name[0]) + namelen;
4340 
4341     xa = ExAllocatePoolWithTag(PagedPool, xasize, ALLOC_TAG);
4342     if (!xa) {
4343         ERR("out of memory\n");
4344         return STATUS_INSUFFICIENT_RESOURCES;
4345     }
4346 
4347     xa->key.obj_id = 0;
4348     xa->key.obj_type = 0;
4349     xa->key.offset = 0;
4350     xa->transid = Vcb->superblock.generation;
4351     xa->m = 0;
4352     xa->n = namelen;
4353     xa->type = BTRFS_TYPE_EA;
4354     RtlCopyMemory(xa->name, name, namelen);
4355 
4356     Status = insert_tree_item_batch(batchlist, Vcb, subvol, inode, TYPE_XATTR_ITEM, crc32, xa, xasize, Batch_DeleteXattr);
4357     if (!NT_SUCCESS(Status)) {
4358         ERR("insert_tree_item_batch returned %08x\n", Status);
4359         ExFreePool(xa);
4360         return Status;
4361     }
4362 
4363     return STATUS_SUCCESS;
4364 }
4365 
4366 static NTSTATUS insert_sparse_extent(fcb* fcb, LIST_ENTRY* batchlist, uint64_t start, uint64_t length) {
4367     NTSTATUS Status;
4368     EXTENT_DATA* ed;
4369     EXTENT_DATA2* ed2;
4370 
4371     TRACE("((%I64x, %I64x), %I64x, %I64x)\n", fcb->subvol->id, fcb->inode, start, length);
4372 
4373     ed = ExAllocatePoolWithTag(PagedPool, sizeof(EXTENT_DATA) - 1 + sizeof(EXTENT_DATA2), ALLOC_TAG);
4374     if (!ed) {
4375         ERR("out of memory\n");
4376         return STATUS_INSUFFICIENT_RESOURCES;
4377     }
4378 
4379     ed->generation = fcb->Vcb->superblock.generation;
4380     ed->decoded_size = length;
4381     ed->compression = BTRFS_COMPRESSION_NONE;
4382     ed->encryption = BTRFS_ENCRYPTION_NONE;
4383     ed->encoding = BTRFS_ENCODING_NONE;
4384     ed->type = EXTENT_TYPE_REGULAR;
4385 
4386     ed2 = (EXTENT_DATA2*)ed->data;
4387     ed2->address = 0;
4388     ed2->size = 0;
4389     ed2->offset = 0;
4390     ed2->num_bytes = length;
4391 
4392     Status = insert_tree_item_batch(batchlist, fcb->Vcb, fcb->subvol, fcb->inode, TYPE_EXTENT_DATA, start, ed, sizeof(EXTENT_DATA) - 1 + sizeof(EXTENT_DATA2), Batch_Insert);
4393     if (!NT_SUCCESS(Status)) {
4394         ERR("insert_tree_item_batch returned %08x\n", Status);
4395         ExFreePool(ed);
4396         return Status;
4397     }
4398 
4399     return STATUS_SUCCESS;
4400 }
4401 
4402 #ifdef _MSC_VER
4403 #pragma warning(push)
4404 #pragma warning(suppress: 28194)
4405 #endif
4406 NTSTATUS insert_tree_item_batch(LIST_ENTRY* batchlist, device_extension* Vcb, root* r, uint64_t objid, uint8_t objtype, uint64_t offset,
4407                                 _In_opt_ _When_(return >= 0, __drv_aliasesMem) void* data, uint16_t datalen, enum batch_operation operation) {
4408     LIST_ENTRY* le;
4409     batch_root* br = NULL;
4410     batch_item* bi;
4411 
4412     le = batchlist->Flink;
4413     while (le != batchlist) {
4414         batch_root* br2 = CONTAINING_RECORD(le, batch_root, list_entry);
4415 
4416         if (br2->r == r) {
4417             br = br2;
4418             break;
4419         }
4420 
4421         le = le->Flink;
4422     }
4423 
4424     if (!br) {
4425         br = ExAllocatePoolWithTag(PagedPool, sizeof(batch_root), ALLOC_TAG);
4426         if (!br) {
4427             ERR("out of memory\n");
4428             return STATUS_INSUFFICIENT_RESOURCES;
4429         }
4430 
4431         br->r = r;
4432         InitializeListHead(&br->items);
4433         InsertTailList(batchlist, &br->list_entry);
4434     }
4435 
4436     bi = ExAllocateFromPagedLookasideList(&Vcb->batch_item_lookaside);
4437     if (!bi) {
4438         ERR("out of memory\n");
4439         return STATUS_INSUFFICIENT_RESOURCES;
4440     }
4441 
4442     bi->key.obj_id = objid;
4443     bi->key.obj_type = objtype;
4444     bi->key.offset = offset;
4445     bi->data = data;
4446     bi->datalen = datalen;
4447     bi->operation = operation;
4448 
4449     le = br->items.Blink;
4450     while (le != &br->items) {
4451         batch_item* bi2 = CONTAINING_RECORD(le, batch_item, list_entry);
4452         int cmp = keycmp(bi2->key, bi->key);
4453 
4454         if (cmp == -1 || (cmp == 0 && bi->operation >= bi2->operation)) {
4455             InsertHeadList(&bi2->list_entry, &bi->list_entry);
4456             return STATUS_SUCCESS;
4457         }
4458 
4459         le = le->Blink;
4460     }
4461 
4462     InsertHeadList(&br->items, &bi->list_entry);
4463 
4464     return STATUS_SUCCESS;
4465 }
4466 #ifdef _MSC_VER
4467 #pragma warning(pop)
4468 #endif
4469 
4470 typedef struct {
4471     uint64_t address;
4472     uint64_t length;
4473     uint64_t offset;
4474     bool changed;
4475     chunk* chunk;
4476     uint64_t skip_start;
4477     uint64_t skip_end;
4478     LIST_ENTRY list_entry;
4479 } extent_range;
4480 
4481 static void rationalize_extents(fcb* fcb, PIRP Irp) {
4482     LIST_ENTRY* le;
4483     LIST_ENTRY extent_ranges;
4484     extent_range* er;
4485     bool changed = false, truncating = false;
4486     uint32_t num_extents = 0;
4487 
4488     InitializeListHead(&extent_ranges);
4489 
4490     le = fcb->extents.Flink;
4491     while (le != &fcb->extents) {
4492         extent* ext = CONTAINING_RECORD(le, extent, list_entry);
4493 
4494         if ((ext->extent_data.type == EXTENT_TYPE_REGULAR || ext->extent_data.type == EXTENT_TYPE_PREALLOC) && ext->extent_data.compression == BTRFS_COMPRESSION_NONE && ext->unique) {
4495             EXTENT_DATA2* ed2 = (EXTENT_DATA2*)ext->extent_data.data;
4496 
4497             if (ed2->size != 0) {
4498                 LIST_ENTRY* le2;
4499 
4500                 le2 = extent_ranges.Flink;
4501                 while (le2 != &extent_ranges) {
4502                     extent_range* er2 = CONTAINING_RECORD(le2, extent_range, list_entry);
4503 
4504                     if (er2->address == ed2->address) {
4505                         er2->skip_start = min(er2->skip_start, ed2->offset);
4506                         er2->skip_end = min(er2->skip_end, ed2->size - ed2->offset - ed2->num_bytes);
4507                         goto cont;
4508                     } else if (er2->address > ed2->address)
4509                         break;
4510 
4511                     le2 = le2->Flink;
4512                 }
4513 
4514                 er = ExAllocatePoolWithTag(PagedPool, sizeof(extent_range), ALLOC_TAG); // FIXME - should be from lookaside?
4515                 if (!er) {
4516                     ERR("out of memory\n");
4517                     goto end;
4518                 }
4519 
4520                 er->address = ed2->address;
4521                 er->length = ed2->size;
4522                 er->offset = ext->offset - ed2->offset;
4523                 er->changed = false;
4524                 er->chunk = NULL;
4525                 er->skip_start = ed2->offset;
4526                 er->skip_end = ed2->size - ed2->offset - ed2->num_bytes;
4527 
4528                 if (er->skip_start != 0 || er->skip_end != 0)
4529                     truncating = true;
4530 
4531                 InsertHeadList(le2->Blink, &er->list_entry);
4532                 num_extents++;
4533             }
4534         }
4535 
4536 cont:
4537         le = le->Flink;
4538     }
4539 
4540     if (num_extents == 0 || (num_extents == 1 && !truncating))
4541         goto end;
4542 
4543     le = extent_ranges.Flink;
4544     while (le != &extent_ranges) {
4545         er = CONTAINING_RECORD(le, extent_range, list_entry);
4546 
4547         if (!er->chunk) {
4548             LIST_ENTRY* le2;
4549 
4550             er->chunk = get_chunk_from_address(fcb->Vcb, er->address);
4551 
4552             if (!er->chunk) {
4553                 ERR("get_chunk_from_address(%I64x) failed\n", er->address);
4554                 goto end;
4555             }
4556 
4557             le2 = le->Flink;
4558             while (le2 != &extent_ranges) {
4559                 extent_range* er2 = CONTAINING_RECORD(le2, extent_range, list_entry);
4560 
4561                 if (!er2->chunk && er2->address >= er->chunk->offset && er2->address < er->chunk->offset + er->chunk->chunk_item->size)
4562                     er2->chunk = er->chunk;
4563 
4564                 le2 = le2->Flink;
4565             }
4566         }
4567 
4568         le = le->Flink;
4569     }
4570 
4571     if (truncating) {
4572         // truncate beginning or end of extent if unused
4573 
4574         le = extent_ranges.Flink;
4575         while (le != &extent_ranges) {
4576             er = CONTAINING_RECORD(le, extent_range, list_entry);
4577 
4578             if (er->skip_start > 0) {
4579                 LIST_ENTRY* le2 = fcb->extents.Flink;
4580                 while (le2 != &fcb->extents) {
4581                     extent* ext = CONTAINING_RECORD(le2, extent, list_entry);
4582 
4583                     if ((ext->extent_data.type == EXTENT_TYPE_REGULAR || ext->extent_data.type == EXTENT_TYPE_PREALLOC) && ext->extent_data.compression == BTRFS_COMPRESSION_NONE && ext->unique) {
4584                         EXTENT_DATA2* ed2 = (EXTENT_DATA2*)ext->extent_data.data;
4585 
4586                         if (ed2->size != 0 && ed2->address == er->address) {
4587                             NTSTATUS Status;
4588 
4589                             Status = update_changed_extent_ref(fcb->Vcb, er->chunk, ed2->address, ed2->size, fcb->subvol->id, fcb->inode, ext->offset - ed2->offset,
4590                                                                -1, fcb->inode_item.flags & BTRFS_INODE_NODATASUM, true, Irp);
4591                             if (!NT_SUCCESS(Status)) {
4592                                 ERR("update_changed_extent_ref returned %08x\n", Status);
4593                                 goto end;
4594                             }
4595 
4596                             ext->extent_data.decoded_size -= er->skip_start;
4597                             ed2->size -= er->skip_start;
4598                             ed2->address += er->skip_start;
4599                             ed2->offset -= er->skip_start;
4600 
4601                             add_changed_extent_ref(er->chunk, ed2->address, ed2->size, fcb->subvol->id, fcb->inode, ext->offset - ed2->offset,
4602                                                    1, fcb->inode_item.flags & BTRFS_INODE_NODATASUM);
4603                         }
4604                     }
4605 
4606                     le2 = le2->Flink;
4607                 }
4608 
4609                 if (!(fcb->inode_item.flags & BTRFS_INODE_NODATASUM))
4610                     add_checksum_entry(fcb->Vcb, er->address, (ULONG)(er->skip_start / fcb->Vcb->superblock.sector_size), NULL, NULL);
4611 
4612                 acquire_chunk_lock(er->chunk, fcb->Vcb);
4613 
4614                 if (!er->chunk->cache_loaded) {
4615                     NTSTATUS Status = load_cache_chunk(fcb->Vcb, er->chunk, NULL);
4616 
4617                     if (!NT_SUCCESS(Status)) {
4618                         ERR("load_cache_chunk returned %08x\n", Status);
4619                         release_chunk_lock(er->chunk, fcb->Vcb);
4620                         goto end;
4621                     }
4622                 }
4623 
4624                 er->chunk->used -= er->skip_start;
4625 
4626                 space_list_add(er->chunk, er->address, er->skip_start, NULL);
4627 
4628                 release_chunk_lock(er->chunk, fcb->Vcb);
4629 
4630                 er->address += er->skip_start;
4631                 er->length -= er->skip_start;
4632             }
4633 
4634             if (er->skip_end > 0) {
4635                 LIST_ENTRY* le2 = fcb->extents.Flink;
4636                 while (le2 != &fcb->extents) {
4637                     extent* ext = CONTAINING_RECORD(le2, extent, list_entry);
4638 
4639                     if ((ext->extent_data.type == EXTENT_TYPE_REGULAR || ext->extent_data.type == EXTENT_TYPE_PREALLOC) && ext->extent_data.compression == BTRFS_COMPRESSION_NONE && ext->unique) {
4640                         EXTENT_DATA2* ed2 = (EXTENT_DATA2*)ext->extent_data.data;
4641 
4642                         if (ed2->size != 0 && ed2->address == er->address) {
4643                             NTSTATUS Status;
4644 
4645                             Status = update_changed_extent_ref(fcb->Vcb, er->chunk, ed2->address, ed2->size, fcb->subvol->id, fcb->inode, ext->offset - ed2->offset,
4646                                                                -1, fcb->inode_item.flags & BTRFS_INODE_NODATASUM, true, Irp);
4647                             if (!NT_SUCCESS(Status)) {
4648                                 ERR("update_changed_extent_ref returned %08x\n", Status);
4649                                 goto end;
4650                             }
4651 
4652                             ext->extent_data.decoded_size -= er->skip_end;
4653                             ed2->size -= er->skip_end;
4654 
4655                             add_changed_extent_ref(er->chunk, ed2->address, ed2->size, fcb->subvol->id, fcb->inode, ext->offset - ed2->offset,
4656                                                    1, fcb->inode_item.flags & BTRFS_INODE_NODATASUM);
4657                         }
4658                     }
4659 
4660                     le2 = le2->Flink;
4661                 }
4662 
4663                 if (!(fcb->inode_item.flags & BTRFS_INODE_NODATASUM))
4664                     add_checksum_entry(fcb->Vcb, er->address + er->length - er->skip_end, (ULONG)(er->skip_end / fcb->Vcb->superblock.sector_size), NULL, NULL);
4665 
4666                 acquire_chunk_lock(er->chunk, fcb->Vcb);
4667 
4668                 if (!er->chunk->cache_loaded) {
4669                     NTSTATUS Status = load_cache_chunk(fcb->Vcb, er->chunk, NULL);
4670 
4671                     if (!NT_SUCCESS(Status)) {
4672                         ERR("load_cache_chunk returned %08x\n", Status);
4673                         release_chunk_lock(er->chunk, fcb->Vcb);
4674                         goto end;
4675                     }
4676                 }
4677 
4678                 er->chunk->used -= er->skip_end;
4679 
4680                 space_list_add(er->chunk, er->address + er->length - er->skip_end, er->skip_end, NULL);
4681 
4682                 release_chunk_lock(er->chunk, fcb->Vcb);
4683 
4684                 er->length -= er->skip_end;
4685             }
4686 
4687             le = le->Flink;
4688         }
4689     }
4690 
4691     if (num_extents < 2)
4692         goto end;
4693 
4694     // merge together adjacent extents
4695     le = extent_ranges.Flink;
4696     while (le != &extent_ranges) {
4697         er = CONTAINING_RECORD(le, extent_range, list_entry);
4698 
4699         if (le->Flink != &extent_ranges && er->length < MAX_EXTENT_SIZE) {
4700             extent_range* er2 = CONTAINING_RECORD(le->Flink, extent_range, list_entry);
4701 
4702             if (er->chunk == er2->chunk) {
4703                 if (er2->address == er->address + er->length && er2->offset >= er->offset + er->length) {
4704                     if (er->length + er2->length <= MAX_EXTENT_SIZE) {
4705                         er->length += er2->length;
4706                         er->changed = true;
4707 
4708                         RemoveEntryList(&er2->list_entry);
4709                         ExFreePool(er2);
4710 
4711                         changed = true;
4712                         continue;
4713                     }
4714                 }
4715             }
4716         }
4717 
4718         le = le->Flink;
4719     }
4720 
4721     if (!changed)
4722         goto end;
4723 
4724     le = fcb->extents.Flink;
4725     while (le != &fcb->extents) {
4726         extent* ext = CONTAINING_RECORD(le, extent, list_entry);
4727 
4728         if ((ext->extent_data.type == EXTENT_TYPE_REGULAR || ext->extent_data.type == EXTENT_TYPE_PREALLOC) && ext->extent_data.compression == BTRFS_COMPRESSION_NONE && ext->unique) {
4729             EXTENT_DATA2* ed2 = (EXTENT_DATA2*)ext->extent_data.data;
4730 
4731             if (ed2->size != 0) {
4732                 LIST_ENTRY* le2;
4733 
4734                 le2 = extent_ranges.Flink;
4735                 while (le2 != &extent_ranges) {
4736                     extent_range* er2 = CONTAINING_RECORD(le2, extent_range, list_entry);
4737 
4738                     if (ed2->address >= er2->address && ed2->address + ed2->size <= er2->address + er2->length && er2->changed) {
4739                         NTSTATUS Status;
4740 
4741                         Status = update_changed_extent_ref(fcb->Vcb, er2->chunk, ed2->address, ed2->size, fcb->subvol->id, fcb->inode, ext->offset - ed2->offset,
4742                                                            -1, fcb->inode_item.flags & BTRFS_INODE_NODATASUM, true, Irp);
4743                         if (!NT_SUCCESS(Status)) {
4744                             ERR("update_changed_extent_ref returned %08x\n", Status);
4745                             goto end;
4746                         }
4747 
4748                         ed2->offset += ed2->address - er2->address;
4749                         ed2->address = er2->address;
4750                         ed2->size = er2->length;
4751                         ext->extent_data.decoded_size = ed2->size;
4752 
4753                         add_changed_extent_ref(er2->chunk, ed2->address, ed2->size, fcb->subvol->id, fcb->inode, ext->offset - ed2->offset,
4754                                                1, fcb->inode_item.flags & BTRFS_INODE_NODATASUM);
4755 
4756                         break;
4757                     }
4758 
4759                     le2 = le2->Flink;
4760                 }
4761             }
4762         }
4763 
4764         le = le->Flink;
4765     }
4766 
4767 end:
4768     while (!IsListEmpty(&extent_ranges)) {
4769         le = RemoveHeadList(&extent_ranges);
4770         er = CONTAINING_RECORD(le, extent_range, list_entry);
4771 
4772         ExFreePool(er);
4773     }
4774 }
4775 
4776 NTSTATUS flush_fcb(fcb* fcb, bool cache, LIST_ENTRY* batchlist, PIRP Irp) {
4777     traverse_ptr tp;
4778     KEY searchkey;
4779     NTSTATUS Status;
4780     INODE_ITEM* ii;
4781     uint64_t ii_offset;
4782 #ifdef DEBUG_PARANOID
4783     uint64_t old_size = 0;
4784     bool extents_changed;
4785 #endif
4786 
4787     if (fcb->ads) {
4788         if (fcb->deleted) {
4789             Status = delete_xattr(fcb->Vcb, batchlist, fcb->subvol, fcb->inode, fcb->adsxattr.Buffer, fcb->adsxattr.Length, fcb->adshash);
4790             if (!NT_SUCCESS(Status)) {
4791                 ERR("delete_xattr returned %08x\n", Status);
4792                 goto end;
4793             }
4794         } else {
4795             Status = set_xattr(fcb->Vcb, batchlist, fcb->subvol, fcb->inode, fcb->adsxattr.Buffer, fcb->adsxattr.Length,
4796                                fcb->adshash, (uint8_t*)fcb->adsdata.Buffer, fcb->adsdata.Length);
4797             if (!NT_SUCCESS(Status)) {
4798                 ERR("set_xattr returned %08x\n", Status);
4799                 goto end;
4800             }
4801         }
4802 
4803         Status = STATUS_SUCCESS;
4804         goto end;
4805     }
4806 
4807     if (fcb->deleted) {
4808         Status = insert_tree_item_batch(batchlist, fcb->Vcb, fcb->subvol, fcb->inode, TYPE_INODE_ITEM, 0xffffffffffffffff, NULL, 0, Batch_DeleteInode);
4809         if (!NT_SUCCESS(Status)) {
4810             ERR("insert_tree_item_batch returned %08x\n", Status);
4811             goto end;
4812         }
4813 
4814         if (fcb->marked_as_orphan) {
4815             Status = insert_tree_item_batch(batchlist, fcb->Vcb, fcb->subvol, BTRFS_ORPHAN_INODE_OBJID, TYPE_ORPHAN_INODE,
4816                                             fcb->inode, NULL, 0, Batch_Delete);
4817             if (!NT_SUCCESS(Status)) {
4818                 ERR("insert_tree_item_batch returned %08x\n", Status);
4819                 goto end;
4820             }
4821         }
4822 
4823         Status = STATUS_SUCCESS;
4824         goto end;
4825     }
4826 
4827 #ifdef DEBUG_PARANOID
4828     extents_changed = fcb->extents_changed;
4829 #endif
4830 
4831     if (fcb->extents_changed) {
4832         LIST_ENTRY* le;
4833         bool prealloc = false, extents_inline = false;
4834         uint64_t last_end;
4835 
4836         // delete ignored extent items
4837         le = fcb->extents.Flink;
4838         while (le != &fcb->extents) {
4839             LIST_ENTRY* le2 = le->Flink;
4840             extent* ext = CONTAINING_RECORD(le, extent, list_entry);
4841 
4842             if (ext->ignore) {
4843                 RemoveEntryList(&ext->list_entry);
4844 
4845                 if (ext->csum)
4846                     ExFreePool(ext->csum);
4847 
4848                 ExFreePool(ext);
4849             }
4850 
4851             le = le2;
4852         }
4853 
4854         le = fcb->extents.Flink;
4855         while (le != &fcb->extents) {
4856             extent* ext = CONTAINING_RECORD(le, extent, list_entry);
4857 
4858             if (ext->inserted && ext->csum && ext->extent_data.type == EXTENT_TYPE_REGULAR) {
4859                 EXTENT_DATA2* ed2 = (EXTENT_DATA2*)ext->extent_data.data;
4860 
4861                 if (ed2->size > 0) { // not sparse
4862                     if (ext->extent_data.compression == BTRFS_COMPRESSION_NONE)
4863                         add_checksum_entry(fcb->Vcb, ed2->address + ed2->offset, (ULONG)(ed2->num_bytes / fcb->Vcb->superblock.sector_size), ext->csum, Irp);
4864                     else
4865                         add_checksum_entry(fcb->Vcb, ed2->address, (ULONG)(ed2->size / fcb->Vcb->superblock.sector_size), ext->csum, Irp);
4866                 }
4867             }
4868 
4869             le = le->Flink;
4870         }
4871 
4872         if (!IsListEmpty(&fcb->extents)) {
4873             rationalize_extents(fcb, Irp);
4874 
4875             // merge together adjacent EXTENT_DATAs pointing to same extent
4876 
4877             le = fcb->extents.Flink;
4878             while (le != &fcb->extents) {
4879                 LIST_ENTRY* le2 = le->Flink;
4880                 extent* ext = CONTAINING_RECORD(le, extent, list_entry);
4881 
4882                 if ((ext->extent_data.type == EXTENT_TYPE_REGULAR || ext->extent_data.type == EXTENT_TYPE_PREALLOC) && le->Flink != &fcb->extents) {
4883                     extent* nextext = CONTAINING_RECORD(le->Flink, extent, list_entry);
4884 
4885                     if (ext->extent_data.type == nextext->extent_data.type) {
4886                         EXTENT_DATA2* ed2 = (EXTENT_DATA2*)ext->extent_data.data;
4887                         EXTENT_DATA2* ned2 = (EXTENT_DATA2*)nextext->extent_data.data;
4888 
4889                         if (ed2->size != 0 && ed2->address == ned2->address && ed2->size == ned2->size &&
4890                             nextext->offset == ext->offset + ed2->num_bytes && ned2->offset == ed2->offset + ed2->num_bytes) {
4891                             chunk* c;
4892 
4893                             if (ext->extent_data.compression == BTRFS_COMPRESSION_NONE && ext->csum) {
4894                                 ULONG len = (ULONG)((ed2->num_bytes + ned2->num_bytes) / fcb->Vcb->superblock.sector_size);
4895                                 uint32_t* csum;
4896 
4897                                 csum = ExAllocatePoolWithTag(NonPagedPool, len * sizeof(uint32_t), ALLOC_TAG);
4898                                 if (!csum) {
4899                                     ERR("out of memory\n");
4900                                     Status = STATUS_INSUFFICIENT_RESOURCES;
4901                                     goto end;
4902                                 }
4903 
4904                                 RtlCopyMemory(csum, ext->csum, (ULONG)(ed2->num_bytes * sizeof(uint32_t) / fcb->Vcb->superblock.sector_size));
4905                                 RtlCopyMemory(&csum[ed2->num_bytes / fcb->Vcb->superblock.sector_size], nextext->csum,
4906                                               (ULONG)(ned2->num_bytes * sizeof(uint32_t) / fcb->Vcb->superblock.sector_size));
4907 
4908                                 ExFreePool(ext->csum);
4909                                 ext->csum = csum;
4910                             }
4911 
4912                             ext->extent_data.generation = fcb->Vcb->superblock.generation;
4913                             ed2->num_bytes += ned2->num_bytes;
4914 
4915                             RemoveEntryList(&nextext->list_entry);
4916 
4917                             if (nextext->csum)
4918                                 ExFreePool(nextext->csum);
4919 
4920                             ExFreePool(nextext);
4921 
4922                             c = get_chunk_from_address(fcb->Vcb, ed2->address);
4923 
4924                             if (!c) {
4925                                 ERR("get_chunk_from_address(%I64x) failed\n", ed2->address);
4926                             } else {
4927                                 Status = update_changed_extent_ref(fcb->Vcb, c, ed2->address, ed2->size, fcb->subvol->id, fcb->inode, ext->offset - ed2->offset, -1,
4928                                                                 fcb->inode_item.flags & BTRFS_INODE_NODATASUM, false, Irp);
4929                                 if (!NT_SUCCESS(Status)) {
4930                                     ERR("update_changed_extent_ref returned %08x\n", Status);
4931                                     goto end;
4932                                 }
4933                             }
4934 
4935                             le2 = le;
4936                         }
4937                     }
4938                 }
4939 
4940                 le = le2;
4941             }
4942         }
4943 
4944         if (!fcb->created) {
4945             // delete existing EXTENT_DATA items
4946 
4947             Status = insert_tree_item_batch(batchlist, fcb->Vcb, fcb->subvol, fcb->inode, TYPE_EXTENT_DATA, 0, NULL, 0, Batch_DeleteExtentData);
4948             if (!NT_SUCCESS(Status)) {
4949                 ERR("insert_tree_item_batch returned %08x\n", Status);
4950                 goto end;
4951             }
4952         }
4953 
4954         // add new EXTENT_DATAs
4955 
4956         last_end = 0;
4957 
4958         le = fcb->extents.Flink;
4959         while (le != &fcb->extents) {
4960             extent* ext = CONTAINING_RECORD(le, extent, list_entry);
4961             EXTENT_DATA* ed;
4962 
4963             ext->inserted = false;
4964 
4965             if (!(fcb->Vcb->superblock.incompat_flags & BTRFS_INCOMPAT_FLAGS_NO_HOLES) && ext->offset > last_end) {
4966                 Status = insert_sparse_extent(fcb, batchlist, last_end, ext->offset - last_end);
4967                 if (!NT_SUCCESS(Status)) {
4968                     ERR("insert_sparse_extent returned %08x\n", Status);
4969                     goto end;
4970                 }
4971             }
4972 
4973             ed = ExAllocatePoolWithTag(PagedPool, ext->datalen, ALLOC_TAG);
4974             if (!ed) {
4975                 ERR("out of memory\n");
4976                 Status = STATUS_INSUFFICIENT_RESOURCES;
4977                 goto end;
4978             }
4979 
4980             RtlCopyMemory(ed, &ext->extent_data, ext->datalen);
4981 
4982             Status = insert_tree_item_batch(batchlist, fcb->Vcb, fcb->subvol, fcb->inode, TYPE_EXTENT_DATA, ext->offset,
4983                                             ed, ext->datalen, Batch_Insert);
4984             if (!NT_SUCCESS(Status)) {
4985                 ERR("insert_tree_item_batch returned %08x\n", Status);
4986                 goto end;
4987             }
4988 
4989             if (ed->type == EXTENT_TYPE_PREALLOC)
4990                 prealloc = true;
4991 
4992             if (ed->type == EXTENT_TYPE_INLINE)
4993                 extents_inline = true;
4994 
4995             if (!(fcb->Vcb->superblock.incompat_flags & BTRFS_INCOMPAT_FLAGS_NO_HOLES)) {
4996                 if (ed->type == EXTENT_TYPE_INLINE)
4997                     last_end = ext->offset + ed->decoded_size;
4998                 else {
4999                     EXTENT_DATA2* ed2 = (EXTENT_DATA2*)ed->data;
5000 
5001                     last_end = ext->offset + ed2->num_bytes;
5002                 }
5003             }
5004 
5005             le = le->Flink;
5006         }
5007 
5008         if (!(fcb->Vcb->superblock.incompat_flags & BTRFS_INCOMPAT_FLAGS_NO_HOLES) && !extents_inline &&
5009             sector_align(fcb->inode_item.st_size, fcb->Vcb->superblock.sector_size) > last_end) {
5010             Status = insert_sparse_extent(fcb, batchlist, last_end, sector_align(fcb->inode_item.st_size, fcb->Vcb->superblock.sector_size) - last_end);
5011             if (!NT_SUCCESS(Status)) {
5012                 ERR("insert_sparse_extent returned %08x\n", Status);
5013                 goto end;
5014             }
5015         }
5016 
5017         // update prealloc flag in INODE_ITEM
5018 
5019         if (!prealloc)
5020             fcb->inode_item.flags &= ~BTRFS_INODE_PREALLOC;
5021         else
5022             fcb->inode_item.flags |= BTRFS_INODE_PREALLOC;
5023 
5024         fcb->inode_item_changed = true;
5025 
5026         fcb->extents_changed = false;
5027     }
5028 
5029     if ((!fcb->created && fcb->inode_item_changed) || cache) {
5030         searchkey.obj_id = fcb->inode;
5031         searchkey.obj_type = TYPE_INODE_ITEM;
5032         searchkey.offset = 0xffffffffffffffff;
5033 
5034         Status = find_item(fcb->Vcb, fcb->subvol, &tp, &searchkey, false, Irp);
5035         if (!NT_SUCCESS(Status)) {
5036             ERR("error - find_item returned %08x\n", Status);
5037             goto end;
5038         }
5039 
5040         if (tp.item->key.obj_id != searchkey.obj_id || tp.item->key.obj_type != searchkey.obj_type) {
5041             if (cache) {
5042                 ii = ExAllocatePoolWithTag(PagedPool, sizeof(INODE_ITEM), ALLOC_TAG);
5043                 if (!ii) {
5044                     ERR("out of memory\n");
5045                     Status = STATUS_INSUFFICIENT_RESOURCES;
5046                     goto end;
5047                 }
5048 
5049                 RtlCopyMemory(ii, &fcb->inode_item, sizeof(INODE_ITEM));
5050 
5051                 Status = insert_tree_item(fcb->Vcb, fcb->subvol, fcb->inode, TYPE_INODE_ITEM, 0, ii, sizeof(INODE_ITEM), NULL, Irp);
5052                 if (!NT_SUCCESS(Status)) {
5053                     ERR("insert_tree_item returned %08x\n", Status);
5054                     goto end;
5055                 }
5056 
5057                 ii_offset = 0;
5058             } else {
5059                 ERR("could not find INODE_ITEM for inode %I64x in subvol %I64x\n", fcb->inode, fcb->subvol->id);
5060                 Status = STATUS_INTERNAL_ERROR;
5061                 goto end;
5062             }
5063         } else {
5064 #ifdef DEBUG_PARANOID
5065             INODE_ITEM* ii2 = (INODE_ITEM*)tp.item->data;
5066 
5067             old_size = ii2->st_size;
5068 #endif
5069 
5070             ii_offset = tp.item->key.offset;
5071         }
5072 
5073         if (!cache) {
5074             Status = delete_tree_item(fcb->Vcb, &tp);
5075             if (!NT_SUCCESS(Status)) {
5076                 ERR("delete_tree_item returned %08x\n", Status);
5077                 goto end;
5078             }
5079         } else {
5080             searchkey.obj_id = fcb->inode;
5081             searchkey.obj_type = TYPE_INODE_ITEM;
5082             searchkey.offset = ii_offset;
5083 
5084             Status = find_item(fcb->Vcb, fcb->subvol, &tp, &searchkey, false, Irp);
5085             if (!NT_SUCCESS(Status)) {
5086                 ERR("error - find_item returned %08x\n", Status);
5087                 goto end;
5088             }
5089 
5090             if (keycmp(tp.item->key, searchkey)) {
5091                 ERR("could not find INODE_ITEM for inode %I64x in subvol %I64x\n", fcb->inode, fcb->subvol->id);
5092                 Status = STATUS_INTERNAL_ERROR;
5093                 goto end;
5094             } else
5095                 RtlCopyMemory(tp.item->data, &fcb->inode_item, min(tp.item->size, sizeof(INODE_ITEM)));
5096         }
5097 
5098 #ifdef DEBUG_PARANOID
5099         if (!extents_changed && fcb->type != BTRFS_TYPE_DIRECTORY && old_size != fcb->inode_item.st_size) {
5100             ERR("error - size has changed but extents not marked as changed\n");
5101             int3;
5102         }
5103 #endif
5104     } else
5105         ii_offset = 0;
5106 
5107     fcb->created = false;
5108 
5109     if (!cache && fcb->inode_item_changed) {
5110         ii = ExAllocatePoolWithTag(PagedPool, sizeof(INODE_ITEM), ALLOC_TAG);
5111         if (!ii) {
5112             ERR("out of memory\n");
5113             Status = STATUS_INSUFFICIENT_RESOURCES;
5114             goto end;
5115         }
5116 
5117         RtlCopyMemory(ii, &fcb->inode_item, sizeof(INODE_ITEM));
5118 
5119         Status = insert_tree_item_batch(batchlist, fcb->Vcb, fcb->subvol, fcb->inode, TYPE_INODE_ITEM, ii_offset, ii, sizeof(INODE_ITEM),
5120                                         Batch_Insert);
5121         if (!NT_SUCCESS(Status)) {
5122             ERR("insert_tree_item_batch returned %08x\n", Status);
5123             goto end;
5124         }
5125 
5126         fcb->inode_item_changed = false;
5127     }
5128 
5129     if (fcb->sd_dirty) {
5130         if (!fcb->sd_deleted) {
5131             Status = set_xattr(fcb->Vcb, batchlist, fcb->subvol, fcb->inode, EA_NTACL, sizeof(EA_NTACL) - 1,
5132                                EA_NTACL_HASH, (uint8_t*)fcb->sd, (uint16_t)RtlLengthSecurityDescriptor(fcb->sd));
5133             if (!NT_SUCCESS(Status)) {
5134                 ERR("set_xattr returned %08x\n", Status);
5135                 goto end;
5136             }
5137         } else {
5138             Status = delete_xattr(fcb->Vcb, batchlist, fcb->subvol, fcb->inode, EA_NTACL, sizeof(EA_NTACL) - 1, EA_NTACL_HASH);
5139             if (!NT_SUCCESS(Status)) {
5140                 ERR("delete_xattr returned %08x\n", Status);
5141                 goto end;
5142             }
5143         }
5144 
5145         fcb->sd_deleted = false;
5146         fcb->sd_dirty = false;
5147     }
5148 
5149     if (fcb->atts_changed) {
5150         if (!fcb->atts_deleted) {
5151             uint8_t val[16], *val2;
5152             ULONG atts = fcb->atts;
5153 
5154             TRACE("inserting new DOSATTRIB xattr\n");
5155 
5156             if (fcb->inode == SUBVOL_ROOT_INODE)
5157                 atts &= ~FILE_ATTRIBUTE_READONLY;
5158 
5159             val2 = &val[sizeof(val) - 1];
5160 
5161             do {
5162                 uint8_t c = atts % 16;
5163                 *val2 = c <= 9 ? (c + '0') : (c - 0xa + 'a');
5164 
5165                 val2--;
5166                 atts >>= 4;
5167             } while (atts != 0);
5168 
5169             *val2 = 'x';
5170             val2--;
5171             *val2 = '0';
5172 
5173             Status = set_xattr(fcb->Vcb, batchlist, fcb->subvol, fcb->inode, EA_DOSATTRIB, sizeof(EA_DOSATTRIB) - 1,
5174                                EA_DOSATTRIB_HASH, val2, (uint16_t)(val + sizeof(val) - val2));
5175             if (!NT_SUCCESS(Status)) {
5176                 ERR("set_xattr returned %08x\n", Status);
5177                 goto end;
5178             }
5179         } else {
5180             Status = delete_xattr(fcb->Vcb, batchlist, fcb->subvol, fcb->inode, EA_DOSATTRIB, sizeof(EA_DOSATTRIB) - 1, EA_DOSATTRIB_HASH);
5181             if (!NT_SUCCESS(Status)) {
5182                 ERR("delete_xattr returned %08x\n", Status);
5183                 goto end;
5184             }
5185         }
5186 
5187         fcb->atts_changed = false;
5188         fcb->atts_deleted = false;
5189     }
5190 
5191     if (fcb->reparse_xattr_changed) {
5192         if (fcb->reparse_xattr.Buffer && fcb->reparse_xattr.Length > 0) {
5193             Status = set_xattr(fcb->Vcb, batchlist, fcb->subvol, fcb->inode, EA_REPARSE, sizeof(EA_REPARSE) - 1,
5194                                EA_REPARSE_HASH, (uint8_t*)fcb->reparse_xattr.Buffer, (uint16_t)fcb->reparse_xattr.Length);
5195             if (!NT_SUCCESS(Status)) {
5196                 ERR("set_xattr returned %08x\n", Status);
5197                 goto end;
5198             }
5199         } else {
5200             Status = delete_xattr(fcb->Vcb, batchlist, fcb->subvol, fcb->inode, EA_REPARSE, sizeof(EA_REPARSE) - 1, EA_REPARSE_HASH);
5201             if (!NT_SUCCESS(Status)) {
5202                 ERR("delete_xattr returned %08x\n", Status);
5203                 goto end;
5204             }
5205         }
5206 
5207         fcb->reparse_xattr_changed = false;
5208     }
5209 
5210     if (fcb->ea_changed) {
5211         if (fcb->ea_xattr.Buffer && fcb->ea_xattr.Length > 0) {
5212             Status = set_xattr(fcb->Vcb, batchlist, fcb->subvol, fcb->inode, EA_EA, sizeof(EA_EA) - 1,
5213                                EA_EA_HASH, (uint8_t*)fcb->ea_xattr.Buffer, (uint16_t)fcb->ea_xattr.Length);
5214             if (!NT_SUCCESS(Status)) {
5215                 ERR("set_xattr returned %08x\n", Status);
5216                 goto end;
5217             }
5218         } else {
5219             Status = delete_xattr(fcb->Vcb, batchlist, fcb->subvol, fcb->inode, EA_EA, sizeof(EA_EA) - 1, EA_EA_HASH);
5220             if (!NT_SUCCESS(Status)) {
5221                 ERR("delete_xattr returned %08x\n", Status);
5222                 goto end;
5223             }
5224         }
5225 
5226         fcb->ea_changed = false;
5227     }
5228 
5229     if (fcb->prop_compression_changed) {
5230         if (fcb->prop_compression == PropCompression_None) {
5231             Status = delete_xattr(fcb->Vcb, batchlist, fcb->subvol, fcb->inode, EA_PROP_COMPRESSION, sizeof(EA_PROP_COMPRESSION) - 1, EA_PROP_COMPRESSION_HASH);
5232             if (!NT_SUCCESS(Status)) {
5233                 ERR("delete_xattr returned %08x\n", Status);
5234                 goto end;
5235             }
5236         } else if (fcb->prop_compression == PropCompression_Zlib) {
5237             static const char zlib[] = "zlib";
5238 
5239             Status = set_xattr(fcb->Vcb, batchlist, fcb->subvol, fcb->inode, EA_PROP_COMPRESSION, sizeof(EA_PROP_COMPRESSION) - 1,
5240                                EA_PROP_COMPRESSION_HASH, (uint8_t*)zlib, sizeof(zlib) - 1);
5241             if (!NT_SUCCESS(Status)) {
5242                 ERR("set_xattr returned %08x\n", Status);
5243                 goto end;
5244             }
5245         } else if (fcb->prop_compression == PropCompression_LZO) {
5246             static const char lzo[] = "lzo";
5247 
5248             Status = set_xattr(fcb->Vcb, batchlist, fcb->subvol, fcb->inode, EA_PROP_COMPRESSION, sizeof(EA_PROP_COMPRESSION) - 1,
5249                                EA_PROP_COMPRESSION_HASH, (uint8_t*)lzo, sizeof(lzo) - 1);
5250             if (!NT_SUCCESS(Status)) {
5251                 ERR("set_xattr returned %08x\n", Status);
5252                 goto end;
5253             }
5254         } else if (fcb->prop_compression == PropCompression_ZSTD) {
5255             static const char zstd[] = "zstd";
5256 
5257             Status = set_xattr(fcb->Vcb, batchlist, fcb->subvol, fcb->inode, EA_PROP_COMPRESSION, sizeof(EA_PROP_COMPRESSION) - 1,
5258                                EA_PROP_COMPRESSION_HASH, (uint8_t*)zstd, sizeof(zstd) - 1);
5259             if (!NT_SUCCESS(Status)) {
5260                 ERR("set_xattr returned %08x\n", Status);
5261                 goto end;
5262             }
5263         }
5264 
5265         fcb->prop_compression_changed = false;
5266     }
5267 
5268     if (fcb->xattrs_changed) {
5269         LIST_ENTRY* le;
5270 
5271         le = fcb->xattrs.Flink;
5272         while (le != &fcb->xattrs) {
5273             xattr* xa = CONTAINING_RECORD(le, xattr, list_entry);
5274             LIST_ENTRY* le2 = le->Flink;
5275 
5276             if (xa->dirty) {
5277                 uint32_t hash = calc_crc32c(0xfffffffe, (uint8_t*)xa->data, xa->namelen);
5278 
5279                 if (xa->valuelen == 0) {
5280                     Status = delete_xattr(fcb->Vcb, batchlist, fcb->subvol, fcb->inode, xa->data, xa->namelen, hash);
5281                     if (!NT_SUCCESS(Status)) {
5282                         ERR("delete_xattr returned %08x\n", Status);
5283                         goto end;
5284                     }
5285 
5286                     RemoveEntryList(&xa->list_entry);
5287                     ExFreePool(xa);
5288                 } else {
5289                     Status = set_xattr(fcb->Vcb, batchlist, fcb->subvol, fcb->inode, xa->data, xa->namelen,
5290                                        hash, (uint8_t*)&xa->data[xa->namelen], xa->valuelen);
5291                     if (!NT_SUCCESS(Status)) {
5292                         ERR("set_xattr returned %08x\n", Status);
5293                         goto end;
5294                     }
5295 
5296                     xa->dirty = false;
5297                 }
5298             }
5299 
5300             le = le2;
5301         }
5302 
5303         fcb->xattrs_changed = false;
5304     }
5305 
5306     if ((fcb->case_sensitive_set && !fcb->case_sensitive)) {
5307         Status = delete_xattr(fcb->Vcb, batchlist, fcb->subvol, fcb->inode, EA_CASE_SENSITIVE,
5308                               sizeof(EA_CASE_SENSITIVE) - 1, EA_CASE_SENSITIVE_HASH);
5309         if (!NT_SUCCESS(Status)) {
5310             ERR("delete_xattr returned %08x\n", Status);
5311             goto end;
5312         }
5313 
5314         fcb->case_sensitive_set = false;
5315     } else if ((!fcb->case_sensitive_set && fcb->case_sensitive)) {
5316         Status = set_xattr(fcb->Vcb, batchlist, fcb->subvol, fcb->inode, EA_CASE_SENSITIVE,
5317                            sizeof(EA_CASE_SENSITIVE) - 1, EA_CASE_SENSITIVE_HASH, (uint8_t*)"1", 1);
5318         if (!NT_SUCCESS(Status)) {
5319             ERR("set_xattr returned %08x\n", Status);
5320             goto end;
5321         }
5322 
5323         fcb->case_sensitive_set = true;
5324     }
5325 
5326     if (fcb->inode_item.st_nlink == 0 && !fcb->marked_as_orphan) { // mark as orphan
5327         Status = insert_tree_item_batch(batchlist, fcb->Vcb, fcb->subvol, BTRFS_ORPHAN_INODE_OBJID, TYPE_ORPHAN_INODE,
5328                                         fcb->inode, NULL, 0, Batch_Insert);
5329         if (!NT_SUCCESS(Status)) {
5330             ERR("insert_tree_item_batch returned %08x\n", Status);
5331             goto end;
5332         }
5333 
5334         fcb->marked_as_orphan = true;
5335     }
5336 
5337     Status = STATUS_SUCCESS;
5338 
5339 end:
5340     if (fcb->dirty) {
5341         bool lock = false;
5342 
5343         fcb->dirty = false;
5344 
5345         if (!ExIsResourceAcquiredExclusiveLite(&fcb->Vcb->dirty_fcbs_lock)) {
5346             ExAcquireResourceExclusiveLite(&fcb->Vcb->dirty_fcbs_lock, true);
5347             lock = true;
5348         }
5349 
5350         RemoveEntryList(&fcb->list_entry_dirty);
5351 
5352         if (lock)
5353             ExReleaseResourceLite(&fcb->Vcb->dirty_fcbs_lock);
5354     }
5355 
5356     return Status;
5357 }
5358 
5359 void add_trim_entry_avoid_sb(device_extension* Vcb, device* dev, uint64_t address, uint64_t size) {
5360     int i;
5361     ULONG sblen = (ULONG)sector_align(sizeof(superblock), Vcb->superblock.sector_size);
5362 
5363     i = 0;
5364     while (superblock_addrs[i] != 0) {
5365         if (superblock_addrs[i] + sblen >= address && superblock_addrs[i] < address + size) {
5366             if (superblock_addrs[i] > address)
5367                 add_trim_entry(dev, address, superblock_addrs[i] - address);
5368 
5369             if (size <= superblock_addrs[i] + sblen - address)
5370                 return;
5371 
5372             size -= superblock_addrs[i] + sblen - address;
5373             address = superblock_addrs[i] + sblen;
5374         } else if (superblock_addrs[i] > address + size)
5375             break;
5376 
5377         i++;
5378     }
5379 
5380     add_trim_entry(dev, address, size);
5381 }
5382 
5383 static NTSTATUS drop_chunk(device_extension* Vcb, chunk* c, LIST_ENTRY* batchlist, PIRP Irp, LIST_ENTRY* rollback) {
5384     NTSTATUS Status;
5385     KEY searchkey;
5386     traverse_ptr tp;
5387     uint64_t i, factor;
5388 #ifdef __REACTOS__
5389     uint64_t phys_used;
5390 #endif
5391     CHUNK_ITEM_STRIPE* cis = (CHUNK_ITEM_STRIPE*)&c->chunk_item[1];;
5392 
5393     TRACE("dropping chunk %I64x\n", c->offset);
5394 
5395     if (c->chunk_item->type & BLOCK_FLAG_RAID0)
5396         factor = c->chunk_item->num_stripes;
5397     else if (c->chunk_item->type & BLOCK_FLAG_RAID10)
5398         factor = c->chunk_item->num_stripes / c->chunk_item->sub_stripes;
5399     else if (c->chunk_item->type & BLOCK_FLAG_RAID5)
5400         factor = c->chunk_item->num_stripes - 1;
5401     else if (c->chunk_item->type & BLOCK_FLAG_RAID6)
5402         factor = c->chunk_item->num_stripes - 2;
5403     else // SINGLE, DUPLICATE, RAID1
5404         factor = 1;
5405 
5406     // do TRIM
5407     if (Vcb->trim && !Vcb->options.no_trim) {
5408         uint64_t len = c->chunk_item->size / factor;
5409 
5410         for (i = 0; i < c->chunk_item->num_stripes; i++) {
5411             if (c->devices[i] && c->devices[i]->devobj && !c->devices[i]->readonly && c->devices[i]->trim)
5412                 add_trim_entry_avoid_sb(Vcb, c->devices[i], cis[i].offset, len);
5413         }
5414     }
5415 
5416     if (!c->cache) {
5417         Status = load_stored_free_space_cache(Vcb, c, true, Irp);
5418 
5419         if (!NT_SUCCESS(Status) && Status != STATUS_NOT_FOUND)
5420             WARN("load_stored_free_space_cache returned %08x\n", Status);
5421     }
5422 
5423     // remove free space cache
5424     if (c->cache) {
5425         c->cache->deleted = true;
5426 
5427         Status = excise_extents(Vcb, c->cache, 0, c->cache->inode_item.st_size, Irp, rollback);
5428         if (!NT_SUCCESS(Status)) {
5429             ERR("excise_extents returned %08x\n", Status);
5430             return Status;
5431         }
5432 
5433         Status = flush_fcb(c->cache, true, batchlist, Irp);
5434 
5435         free_fcb(c->cache);
5436 
5437         if (c->cache->refcount == 0)
5438             reap_fcb(c->cache);
5439 
5440         if (!NT_SUCCESS(Status)) {
5441             ERR("flush_fcb returned %08x\n", Status);
5442             return Status;
5443         }
5444 
5445         searchkey.obj_id = FREE_SPACE_CACHE_ID;
5446         searchkey.obj_type = 0;
5447         searchkey.offset = c->offset;
5448 
5449         Status = find_item(Vcb, Vcb->root_root, &tp, &searchkey, false, Irp);
5450         if (!NT_SUCCESS(Status)) {
5451             ERR("error - find_item returned %08x\n", Status);
5452             return Status;
5453         }
5454 
5455         if (!keycmp(tp.item->key, searchkey)) {
5456             Status = delete_tree_item(Vcb, &tp);
5457             if (!NT_SUCCESS(Status)) {
5458                 ERR("delete_tree_item returned %08x\n", Status);
5459                 return Status;
5460             }
5461         }
5462     }
5463 
5464     if (Vcb->space_root) {
5465         Status = insert_tree_item_batch(batchlist, Vcb, Vcb->space_root, c->offset, TYPE_FREE_SPACE_INFO, c->chunk_item->size,
5466                                         NULL, 0, Batch_DeleteFreeSpace);
5467         if (!NT_SUCCESS(Status)) {
5468             ERR("insert_tree_item_batch returned %08x\n", Status);
5469             return Status;
5470         }
5471     }
5472 
5473     for (i = 0; i < c->chunk_item->num_stripes; i++) {
5474         if (!c->created) {
5475             // remove DEV_EXTENTs from tree 4
5476             searchkey.obj_id = cis[i].dev_id;
5477             searchkey.obj_type = TYPE_DEV_EXTENT;
5478             searchkey.offset = cis[i].offset;
5479 
5480             Status = find_item(Vcb, Vcb->dev_root, &tp, &searchkey, false, Irp);
5481             if (!NT_SUCCESS(Status)) {
5482                 ERR("error - find_item returned %08x\n", Status);
5483                 return Status;
5484             }
5485 
5486             if (!keycmp(tp.item->key, searchkey)) {
5487                 Status = delete_tree_item(Vcb, &tp);
5488                 if (!NT_SUCCESS(Status)) {
5489                     ERR("delete_tree_item returned %08x\n", Status);
5490                     return Status;
5491                 }
5492 
5493                 if (tp.item->size >= sizeof(DEV_EXTENT)) {
5494                     DEV_EXTENT* de = (DEV_EXTENT*)tp.item->data;
5495 
5496                     c->devices[i]->devitem.bytes_used -= de->length;
5497 
5498                     if (Vcb->balance.thread && Vcb->balance.shrinking && Vcb->balance.opts[0].devid == c->devices[i]->devitem.dev_id) {
5499                         if (cis[i].offset < Vcb->balance.opts[0].drange_start && cis[i].offset + de->length > Vcb->balance.opts[0].drange_start)
5500                             space_list_add2(&c->devices[i]->space, NULL, cis[i].offset, Vcb->balance.opts[0].drange_start - cis[i].offset, NULL, rollback);
5501                     } else
5502                         space_list_add2(&c->devices[i]->space, NULL, cis[i].offset, de->length, NULL, rollback);
5503                 }
5504             } else
5505                 WARN("could not find (%I64x,%x,%I64x) in dev tree\n", searchkey.obj_id, searchkey.obj_type, searchkey.offset);
5506         } else {
5507             uint64_t len = c->chunk_item->size / factor;
5508 
5509             c->devices[i]->devitem.bytes_used -= len;
5510 
5511             if (Vcb->balance.thread && Vcb->balance.shrinking && Vcb->balance.opts[0].devid == c->devices[i]->devitem.dev_id) {
5512                 if (cis[i].offset < Vcb->balance.opts[0].drange_start && cis[i].offset + len > Vcb->balance.opts[0].drange_start)
5513                     space_list_add2(&c->devices[i]->space, NULL, cis[i].offset, Vcb->balance.opts[0].drange_start - cis[i].offset, NULL, rollback);
5514             } else
5515                 space_list_add2(&c->devices[i]->space, NULL, cis[i].offset, len, NULL, rollback);
5516         }
5517     }
5518 
5519     // modify DEV_ITEMs in chunk tree
5520     for (i = 0; i < c->chunk_item->num_stripes; i++) {
5521         if (c->devices[i]) {
5522             uint64_t j;
5523             DEV_ITEM* di;
5524 
5525             searchkey.obj_id = 1;
5526             searchkey.obj_type = TYPE_DEV_ITEM;
5527             searchkey.offset = c->devices[i]->devitem.dev_id;
5528 
5529             Status = find_item(Vcb, Vcb->chunk_root, &tp, &searchkey, false, Irp);
5530             if (!NT_SUCCESS(Status)) {
5531                 ERR("error - find_item returned %08x\n", Status);
5532                 return Status;
5533             }
5534 
5535             if (!keycmp(tp.item->key, searchkey)) {
5536                 Status = delete_tree_item(Vcb, &tp);
5537                 if (!NT_SUCCESS(Status)) {
5538                     ERR("delete_tree_item returned %08x\n", Status);
5539                     return Status;
5540                 }
5541 
5542                 di = ExAllocatePoolWithTag(PagedPool, sizeof(DEV_ITEM), ALLOC_TAG);
5543                 if (!di) {
5544                     ERR("out of memory\n");
5545                     return STATUS_INSUFFICIENT_RESOURCES;
5546                 }
5547 
5548                 RtlCopyMemory(di, &c->devices[i]->devitem, sizeof(DEV_ITEM));
5549 
5550                 Status = insert_tree_item(Vcb, Vcb->chunk_root, 1, TYPE_DEV_ITEM, c->devices[i]->devitem.dev_id, di, sizeof(DEV_ITEM), NULL, Irp);
5551                 if (!NT_SUCCESS(Status)) {
5552                     ERR("insert_tree_item returned %08x\n", Status);
5553                     return Status;
5554                 }
5555             }
5556 
5557             for (j = i + 1; j < c->chunk_item->num_stripes; j++) {
5558                 if (c->devices[j] == c->devices[i])
5559                     c->devices[j] = NULL;
5560             }
5561         }
5562     }
5563 
5564     if (!c->created) {
5565         // remove CHUNK_ITEM from chunk tree
5566         searchkey.obj_id = 0x100;
5567         searchkey.obj_type = TYPE_CHUNK_ITEM;
5568         searchkey.offset = c->offset;
5569 
5570         Status = find_item(Vcb, Vcb->chunk_root, &tp, &searchkey, false, Irp);
5571         if (!NT_SUCCESS(Status)) {
5572             ERR("error - find_item returned %08x\n", Status);
5573             return Status;
5574         }
5575 
5576         if (!keycmp(tp.item->key, searchkey)) {
5577             Status = delete_tree_item(Vcb, &tp);
5578 
5579             if (!NT_SUCCESS(Status)) {
5580                 ERR("delete_tree_item returned %08x\n", Status);
5581                 return Status;
5582             }
5583         } else
5584             WARN("could not find CHUNK_ITEM for chunk %I64x\n", c->offset);
5585 
5586         // remove BLOCK_GROUP_ITEM from extent tree
5587         searchkey.obj_id = c->offset;
5588         searchkey.obj_type = TYPE_BLOCK_GROUP_ITEM;
5589         searchkey.offset = 0xffffffffffffffff;
5590 
5591         Status = find_item(Vcb, Vcb->extent_root, &tp, &searchkey, false, Irp);
5592         if (!NT_SUCCESS(Status)) {
5593             ERR("error - find_item returned %08x\n", Status);
5594             return Status;
5595         }
5596 
5597         if (tp.item->key.obj_id == searchkey.obj_id && tp.item->key.obj_type == searchkey.obj_type) {
5598             Status = delete_tree_item(Vcb, &tp);
5599 
5600             if (!NT_SUCCESS(Status)) {
5601                 ERR("delete_tree_item returned %08x\n", Status);
5602                 return Status;
5603             }
5604         } else
5605             WARN("could not find BLOCK_GROUP_ITEM for chunk %I64x\n", c->offset);
5606     }
5607 
5608     if (c->chunk_item->type & BLOCK_FLAG_SYSTEM)
5609         remove_from_bootstrap(Vcb, 0x100, TYPE_CHUNK_ITEM, c->offset);
5610 
5611     RemoveEntryList(&c->list_entry);
5612 
5613     // clear raid56 incompat flag if dropping last RAID5/6 chunk
5614 
5615     if (c->chunk_item->type & BLOCK_FLAG_RAID5 || c->chunk_item->type & BLOCK_FLAG_RAID6) {
5616         LIST_ENTRY* le;
5617         bool clear_flag = true;
5618 
5619         le = Vcb->chunks.Flink;
5620         while (le != &Vcb->chunks) {
5621             chunk* c2 = CONTAINING_RECORD(le, chunk, list_entry);
5622 
5623             if (c2->chunk_item->type & BLOCK_FLAG_RAID5 || c2->chunk_item->type & BLOCK_FLAG_RAID6) {
5624                 clear_flag = false;
5625                 break;
5626             }
5627 
5628             le = le->Flink;
5629         }
5630 
5631         if (clear_flag)
5632             Vcb->superblock.incompat_flags &= ~BTRFS_INCOMPAT_FLAGS_RAID56;
5633     }
5634 
5635 #ifndef __REACTOS__
5636     uint64_t phys_used = chunk_estimate_phys_size(Vcb, c, c->oldused);
5637 #else
5638     phys_used = chunk_estimate_phys_size(Vcb, c, c->oldused);
5639 #endif
5640 
5641     if (phys_used < Vcb->superblock.bytes_used)
5642         Vcb->superblock.bytes_used -= phys_used;
5643     else
5644         Vcb->superblock.bytes_used = 0;
5645 
5646     ExFreePool(c->chunk_item);
5647     ExFreePool(c->devices);
5648 
5649     while (!IsListEmpty(&c->space)) {
5650         space* s = CONTAINING_RECORD(c->space.Flink, space, list_entry);
5651 
5652         RemoveEntryList(&s->list_entry);
5653         ExFreePool(s);
5654     }
5655 
5656     while (!IsListEmpty(&c->deleting)) {
5657         space* s = CONTAINING_RECORD(c->deleting.Flink, space, list_entry);
5658 
5659         RemoveEntryList(&s->list_entry);
5660         ExFreePool(s);
5661     }
5662 
5663     release_chunk_lock(c, Vcb);
5664 
5665     ExDeleteResourceLite(&c->partial_stripes_lock);
5666     ExDeleteResourceLite(&c->range_locks_lock);
5667     ExDeleteResourceLite(&c->lock);
5668     ExDeleteResourceLite(&c->changed_extents_lock);
5669 
5670     ExFreePool(c);
5671 
5672     return STATUS_SUCCESS;
5673 }
5674 
5675 static NTSTATUS partial_stripe_read(device_extension* Vcb, chunk* c, partial_stripe* ps, uint64_t startoff, uint16_t parity, ULONG offset, ULONG len) {
5676     NTSTATUS Status;
5677     ULONG sl = (ULONG)(c->chunk_item->stripe_length / Vcb->superblock.sector_size);
5678     CHUNK_ITEM_STRIPE* cis = (CHUNK_ITEM_STRIPE*)&c->chunk_item[1];
5679 
5680     while (len > 0) {
5681         ULONG readlen = min(offset + len, offset + (sl - (offset % sl))) - offset;
5682         uint16_t stripe;
5683 
5684         stripe = (parity + (offset / sl) + 1) % c->chunk_item->num_stripes;
5685 
5686         if (c->devices[stripe]->devobj) {
5687             Status = sync_read_phys(c->devices[stripe]->devobj, c->devices[stripe]->fileobj, cis[stripe].offset + startoff + ((offset % sl) * Vcb->superblock.sector_size),
5688                                     readlen * Vcb->superblock.sector_size, ps->data + (offset * Vcb->superblock.sector_size), false);
5689             if (!NT_SUCCESS(Status)) {
5690                 ERR("sync_read_phys returned %08x\n", Status);
5691                 return Status;
5692             }
5693         } else if (c->chunk_item->type & BLOCK_FLAG_RAID5) {
5694             uint16_t i;
5695             uint8_t* scratch;
5696 
5697             scratch = ExAllocatePoolWithTag(NonPagedPool, readlen * Vcb->superblock.sector_size, ALLOC_TAG);
5698             if (!scratch) {
5699                 ERR("out of memory\n");
5700                 return STATUS_INSUFFICIENT_RESOURCES;
5701             }
5702 
5703             for (i = 0; i < c->chunk_item->num_stripes; i++) {
5704                 if (i != stripe) {
5705                     if (!c->devices[i]->devobj) {
5706                         ExFreePool(scratch);
5707                         return STATUS_UNEXPECTED_IO_ERROR;
5708                     }
5709 
5710                     if (i == 0 || (stripe == 0 && i == 1)) {
5711                         Status = sync_read_phys(c->devices[i]->devobj, c->devices[i]->fileobj, cis[i].offset + startoff + ((offset % sl) * Vcb->superblock.sector_size),
5712                                                 readlen * Vcb->superblock.sector_size, ps->data + (offset * Vcb->superblock.sector_size), false);
5713                         if (!NT_SUCCESS(Status)) {
5714                             ERR("sync_read_phys returned %08x\n", Status);
5715                             ExFreePool(scratch);
5716                             return Status;
5717                         }
5718                     } else {
5719                         Status = sync_read_phys(c->devices[i]->devobj, c->devices[i]->fileobj, cis[i].offset + startoff + ((offset % sl) * Vcb->superblock.sector_size),
5720                                                 readlen * Vcb->superblock.sector_size, scratch, false);
5721                         if (!NT_SUCCESS(Status)) {
5722                             ERR("sync_read_phys returned %08x\n", Status);
5723                             ExFreePool(scratch);
5724                             return Status;
5725                         }
5726 
5727                         do_xor(ps->data + (offset * Vcb->superblock.sector_size), scratch, readlen * Vcb->superblock.sector_size);
5728                     }
5729                 }
5730             }
5731 
5732             ExFreePool(scratch);
5733         } else {
5734             uint8_t* scratch;
5735             uint16_t k, i, logstripe, error_stripe, num_errors = 0;
5736 
5737             scratch = ExAllocatePoolWithTag(NonPagedPool, (c->chunk_item->num_stripes + 2) * readlen * Vcb->superblock.sector_size, ALLOC_TAG);
5738             if (!scratch) {
5739                 ERR("out of memory\n");
5740                 return STATUS_INSUFFICIENT_RESOURCES;
5741             }
5742 
5743             i = (parity + 1) % c->chunk_item->num_stripes;
5744             for (k = 0; k < c->chunk_item->num_stripes; k++) {
5745                 if (i != stripe) {
5746                     if (c->devices[i]->devobj) {
5747                         Status = sync_read_phys(c->devices[i]->devobj, c->devices[i]->fileobj, cis[i].offset + startoff + ((offset % sl) * Vcb->superblock.sector_size),
5748                                                 readlen * Vcb->superblock.sector_size, scratch + (k * readlen * Vcb->superblock.sector_size), false);
5749                         if (!NT_SUCCESS(Status)) {
5750                             ERR("sync_read_phys returned %08x\n", Status);
5751                             num_errors++;
5752                             error_stripe = k;
5753                         }
5754                     } else {
5755                         num_errors++;
5756                         error_stripe = k;
5757                     }
5758 
5759                     if (num_errors > 1) {
5760                         ExFreePool(scratch);
5761                         return STATUS_UNEXPECTED_IO_ERROR;
5762                     }
5763                 } else
5764                     logstripe = k;
5765 
5766                 i = (i + 1) % c->chunk_item->num_stripes;
5767             }
5768 
5769             if (num_errors == 0 || error_stripe == c->chunk_item->num_stripes - 1) {
5770                 for (k = 0; k < c->chunk_item->num_stripes - 1; k++) {
5771                     if (k != logstripe) {
5772                         if (k == 0 || (k == 1 && logstripe == 0)) {
5773                             RtlCopyMemory(ps->data + (offset * Vcb->superblock.sector_size), scratch + (k * readlen * Vcb->superblock.sector_size),
5774                                           readlen * Vcb->superblock.sector_size);
5775                         } else {
5776                             do_xor(ps->data + (offset * Vcb->superblock.sector_size), scratch + (k * readlen * Vcb->superblock.sector_size),
5777                                    readlen * Vcb->superblock.sector_size);
5778                         }
5779                     }
5780                 }
5781             } else {
5782                 raid6_recover2(scratch, c->chunk_item->num_stripes, readlen * Vcb->superblock.sector_size, logstripe,
5783                                error_stripe, scratch + (c->chunk_item->num_stripes * readlen * Vcb->superblock.sector_size));
5784 
5785                 RtlCopyMemory(ps->data + (offset * Vcb->superblock.sector_size), scratch + (c->chunk_item->num_stripes * readlen * Vcb->superblock.sector_size),
5786                               readlen * Vcb->superblock.sector_size);
5787             }
5788 
5789             ExFreePool(scratch);
5790         }
5791 
5792         offset += readlen;
5793         len -= readlen;
5794     }
5795 
5796     return STATUS_SUCCESS;
5797 }
5798 
5799 NTSTATUS flush_partial_stripe(device_extension* Vcb, chunk* c, partial_stripe* ps) {
5800     NTSTATUS Status;
5801     uint16_t parity2, stripe, startoffstripe;
5802     uint8_t* data;
5803     uint64_t startoff;
5804     ULONG runlength, index, last1;
5805     CHUNK_ITEM_STRIPE* cis = (CHUNK_ITEM_STRIPE*)&c->chunk_item[1];
5806     LIST_ENTRY* le;
5807     uint16_t k, num_data_stripes = c->chunk_item->num_stripes - (c->chunk_item->type & BLOCK_FLAG_RAID5 ? 1 : 2);
5808     uint64_t ps_length = num_data_stripes * c->chunk_item->stripe_length;
5809     ULONG stripe_length = (ULONG)c->chunk_item->stripe_length;
5810 
5811     // FIXME - do writes asynchronously?
5812 
5813     get_raid0_offset(ps->address - c->offset, stripe_length, num_data_stripes, &startoff, &startoffstripe);
5814 
5815     parity2 = (((ps->address - c->offset) / ps_length) + c->chunk_item->num_stripes - 1) % c->chunk_item->num_stripes;
5816 
5817     // read data (or reconstruct if degraded)
5818 
5819     runlength = RtlFindFirstRunClear(&ps->bmp, &index);
5820     last1 = 0;
5821 
5822     while (runlength != 0) {
5823         if (index >= ps->bmplen)
5824             break;
5825 
5826         if (index + runlength >= ps->bmplen) {
5827             runlength = ps->bmplen - index;
5828 
5829             if (runlength == 0)
5830                 break;
5831         }
5832 
5833         if (index > last1) {
5834             Status = partial_stripe_read(Vcb, c, ps, startoff, parity2, last1, index - last1);
5835             if (!NT_SUCCESS(Status)) {
5836                 ERR("partial_stripe_read returned %08x\n", Status);
5837                 return Status;
5838             }
5839         }
5840 
5841         last1 = index + runlength;
5842 
5843         runlength = RtlFindNextForwardRunClear(&ps->bmp, index + runlength, &index);
5844     }
5845 
5846     if (last1 < ps_length / Vcb->superblock.sector_size) {
5847         Status = partial_stripe_read(Vcb, c, ps, startoff, parity2, last1, (ULONG)((ps_length / Vcb->superblock.sector_size) - last1));
5848         if (!NT_SUCCESS(Status)) {
5849             ERR("partial_stripe_read returned %08x\n", Status);
5850             return Status;
5851         }
5852     }
5853 
5854     // set unallocated data to 0
5855     le = c->space.Flink;
5856     while (le != &c->space) {
5857         space* s = CONTAINING_RECORD(le, space, list_entry);
5858 
5859         if (s->address + s->size > ps->address && s->address < ps->address + ps_length) {
5860             uint64_t start = max(ps->address, s->address);
5861             uint64_t end = min(ps->address + ps_length, s->address + s->size);
5862 
5863             RtlZeroMemory(ps->data + start - ps->address, (ULONG)(end - start));
5864         } else if (s->address >= ps->address + ps_length)
5865             break;
5866 
5867         le = le->Flink;
5868     }
5869 
5870     le = c->deleting.Flink;
5871     while (le != &c->deleting) {
5872         space* s = CONTAINING_RECORD(le, space, list_entry);
5873 
5874         if (s->address + s->size > ps->address && s->address < ps->address + ps_length) {
5875             uint64_t start = max(ps->address, s->address);
5876             uint64_t end = min(ps->address + ps_length, s->address + s->size);
5877 
5878             RtlZeroMemory(ps->data + start - ps->address, (ULONG)(end - start));
5879         } else if (s->address >= ps->address + ps_length)
5880             break;
5881 
5882         le = le->Flink;
5883     }
5884 
5885     stripe = (parity2 + 1) % c->chunk_item->num_stripes;
5886 
5887     data = ps->data;
5888     for (k = 0; k < num_data_stripes; k++) {
5889         if (c->devices[stripe]->devobj) {
5890             Status = write_data_phys(c->devices[stripe]->devobj, c->devices[stripe]->fileobj, cis[stripe].offset + startoff, data, stripe_length);
5891             if (!NT_SUCCESS(Status)) {
5892                 ERR("write_data_phys returned %08x\n", Status);
5893                 return Status;
5894             }
5895         }
5896 
5897         data += stripe_length;
5898         stripe = (stripe + 1) % c->chunk_item->num_stripes;
5899     }
5900 
5901     // write parity
5902     if (c->chunk_item->type & BLOCK_FLAG_RAID5) {
5903         if (c->devices[parity2]->devobj) {
5904             uint16_t i;
5905 
5906             for (i = 1; i < c->chunk_item->num_stripes - 1; i++) {
5907                 do_xor(ps->data, ps->data + (i * stripe_length), stripe_length);
5908             }
5909 
5910             Status = write_data_phys(c->devices[parity2]->devobj, c->devices[parity2]->fileobj, cis[parity2].offset + startoff, ps->data, stripe_length);
5911             if (!NT_SUCCESS(Status)) {
5912                 ERR("write_data_phys returned %08x\n", Status);
5913                 return Status;
5914             }
5915         }
5916     } else {
5917         uint16_t parity1 = (parity2 + c->chunk_item->num_stripes - 1) % c->chunk_item->num_stripes;
5918 
5919         if (c->devices[parity1]->devobj || c->devices[parity2]->devobj) {
5920             uint8_t* scratch;
5921             uint16_t i;
5922 
5923             scratch = ExAllocatePoolWithTag(NonPagedPool, stripe_length * 2, ALLOC_TAG);
5924             if (!scratch) {
5925                 ERR("out of memory\n");
5926                 return STATUS_INSUFFICIENT_RESOURCES;
5927             }
5928 
5929             i = c->chunk_item->num_stripes - 3;
5930 
5931             while (true) {
5932                 if (i == c->chunk_item->num_stripes - 3) {
5933                     RtlCopyMemory(scratch, ps->data + (i * stripe_length), stripe_length);
5934                     RtlCopyMemory(scratch + stripe_length, ps->data + (i * stripe_length), stripe_length);
5935                 } else {
5936                     do_xor(scratch, ps->data + (i * stripe_length), stripe_length);
5937 
5938                     galois_double(scratch + stripe_length, stripe_length);
5939                     do_xor(scratch + stripe_length, ps->data + (i * stripe_length), stripe_length);
5940                 }
5941 
5942                 if (i == 0)
5943                     break;
5944 
5945                 i--;
5946             }
5947 
5948             if (c->devices[parity1]->devobj) {
5949                 Status = write_data_phys(c->devices[parity1]->devobj, c->devices[parity1]->fileobj, cis[parity1].offset + startoff, scratch, stripe_length);
5950                 if (!NT_SUCCESS(Status)) {
5951                     ERR("write_data_phys returned %08x\n", Status);
5952                     ExFreePool(scratch);
5953                     return Status;
5954                 }
5955             }
5956 
5957             if (c->devices[parity2]->devobj) {
5958                 Status = write_data_phys(c->devices[parity2]->devobj, c->devices[parity2]->fileobj, cis[parity2].offset + startoff,
5959                                          scratch + stripe_length, stripe_length);
5960                 if (!NT_SUCCESS(Status)) {
5961                     ERR("write_data_phys returned %08x\n", Status);
5962                     ExFreePool(scratch);
5963                     return Status;
5964                 }
5965             }
5966 
5967             ExFreePool(scratch);
5968         }
5969     }
5970 
5971     return STATUS_SUCCESS;
5972 }
5973 
5974 static NTSTATUS update_chunks(device_extension* Vcb, LIST_ENTRY* batchlist, PIRP Irp, LIST_ENTRY* rollback) {
5975     LIST_ENTRY *le, *le2;
5976     NTSTATUS Status;
5977     uint64_t used_minus_cache;
5978 
5979     ExAcquireResourceExclusiveLite(&Vcb->chunk_lock, true);
5980 
5981     // FIXME - do tree chunks before data chunks
5982 
5983     le = Vcb->chunks.Flink;
5984     while (le != &Vcb->chunks) {
5985         chunk* c = CONTAINING_RECORD(le, chunk, list_entry);
5986 
5987         le2 = le->Flink;
5988 
5989         if (c->changed) {
5990             acquire_chunk_lock(c, Vcb);
5991 
5992             // flush partial stripes
5993             if (!Vcb->readonly && (c->chunk_item->type & BLOCK_FLAG_RAID5 || c->chunk_item->type & BLOCK_FLAG_RAID6)) {
5994                 ExAcquireResourceExclusiveLite(&c->partial_stripes_lock, true);
5995 
5996                 while (!IsListEmpty(&c->partial_stripes)) {
5997                     partial_stripe* ps = CONTAINING_RECORD(RemoveHeadList(&c->partial_stripes), partial_stripe, list_entry);
5998 
5999                     Status = flush_partial_stripe(Vcb, c, ps);
6000 
6001                     if (ps->bmparr)
6002                         ExFreePool(ps->bmparr);
6003 
6004                     ExFreePool(ps);
6005 
6006                     if (!NT_SUCCESS(Status)) {
6007                         ERR("flush_partial_stripe returned %08x\n", Status);
6008                         ExReleaseResourceLite(&c->partial_stripes_lock);
6009                         release_chunk_lock(c, Vcb);
6010                         ExReleaseResourceLite(&Vcb->chunk_lock);
6011                         return Status;
6012                     }
6013                 }
6014 
6015                 ExReleaseResourceLite(&c->partial_stripes_lock);
6016             }
6017 
6018             if (c->list_entry_balance.Flink) {
6019                 release_chunk_lock(c, Vcb);
6020                 le = le2;
6021                 continue;
6022             }
6023 
6024             if (c->space_changed || c->created) {
6025                 bool created = c->created;
6026 
6027                 used_minus_cache = c->used;
6028 
6029                 // subtract self-hosted cache
6030                 if (used_minus_cache > 0 && c->chunk_item->type & BLOCK_FLAG_DATA && c->cache && c->cache->inode_item.st_size == c->used) {
6031                     LIST_ENTRY* le3;
6032 
6033                     le3 = c->cache->extents.Flink;
6034                     while (le3 != &c->cache->extents) {
6035                         extent* ext = CONTAINING_RECORD(le3, extent, list_entry);
6036                         EXTENT_DATA* ed = &ext->extent_data;
6037 
6038                         if (!ext->ignore) {
6039                             if (ed->type == EXTENT_TYPE_REGULAR || ed->type == EXTENT_TYPE_PREALLOC) {
6040                                 EXTENT_DATA2* ed2 = (EXTENT_DATA2*)ed->data;
6041 
6042                                 if (ed2->size != 0 && ed2->address >= c->offset && ed2->address + ed2->size <= c->offset + c->chunk_item->size)
6043                                     used_minus_cache -= ed2->size;
6044                             }
6045                         }
6046 
6047                         le3 = le3->Flink;
6048                     }
6049                 }
6050 
6051                 if (used_minus_cache == 0) {
6052                     Status = drop_chunk(Vcb, c, batchlist, Irp, rollback);
6053                     if (!NT_SUCCESS(Status)) {
6054                         ERR("drop_chunk returned %08x\n", Status);
6055                         release_chunk_lock(c, Vcb);
6056                         ExReleaseResourceLite(&Vcb->chunk_lock);
6057                         return Status;
6058                     }
6059 
6060                     // c is now freed, so avoid releasing non-existent lock
6061                     le = le2;
6062                     continue;
6063                 } else if (c->created) {
6064                     Status = create_chunk(Vcb, c, Irp);
6065                     if (!NT_SUCCESS(Status)) {
6066                         ERR("create_chunk returned %08x\n", Status);
6067                         release_chunk_lock(c, Vcb);
6068                         ExReleaseResourceLite(&Vcb->chunk_lock);
6069                         return Status;
6070                     }
6071                 }
6072 
6073                 if (used_minus_cache > 0 || created)
6074                     release_chunk_lock(c, Vcb);
6075             } else
6076                 release_chunk_lock(c, Vcb);
6077         }
6078 
6079         le = le2;
6080     }
6081 
6082     ExReleaseResourceLite(&Vcb->chunk_lock);
6083 
6084     return STATUS_SUCCESS;
6085 }
6086 
6087 static NTSTATUS delete_root_ref(device_extension* Vcb, uint64_t subvolid, uint64_t parsubvolid, uint64_t parinode, PANSI_STRING utf8, PIRP Irp) {
6088     KEY searchkey;
6089     traverse_ptr tp;
6090     NTSTATUS Status;
6091 
6092     searchkey.obj_id = parsubvolid;
6093     searchkey.obj_type = TYPE_ROOT_REF;
6094     searchkey.offset = subvolid;
6095 
6096     Status = find_item(Vcb, Vcb->root_root, &tp, &searchkey, false, Irp);
6097     if (!NT_SUCCESS(Status)) {
6098         ERR("error - find_item returned %08x\n", Status);
6099         return Status;
6100     }
6101 
6102     if (!keycmp(searchkey, tp.item->key)) {
6103         if (tp.item->size < sizeof(ROOT_REF)) {
6104             ERR("(%I64x,%x,%I64x) was %u bytes, expected at least %u\n", tp.item->key.obj_id, tp.item->key.obj_type, tp.item->key.offset, tp.item->size, sizeof(ROOT_REF));
6105             return STATUS_INTERNAL_ERROR;
6106         } else {
6107             ROOT_REF* rr;
6108             ULONG len;
6109 
6110             rr = (ROOT_REF*)tp.item->data;
6111             len = tp.item->size;
6112 
6113             do {
6114                 uint16_t itemlen;
6115 
6116                 if (len < sizeof(ROOT_REF) || len < offsetof(ROOT_REF, name[0]) + rr->n) {
6117                     ERR("(%I64x,%x,%I64x) was truncated\n", tp.item->key.obj_id, tp.item->key.obj_type, tp.item->key.offset);
6118                     break;
6119                 }
6120 
6121                 itemlen = (uint16_t)offsetof(ROOT_REF, name[0]) + rr->n;
6122 
6123                 if (rr->dir == parinode && rr->n == utf8->Length && RtlCompareMemory(rr->name, utf8->Buffer, rr->n) == rr->n) {
6124                     uint16_t newlen = tp.item->size - itemlen;
6125 
6126                     Status = delete_tree_item(Vcb, &tp);
6127                     if (!NT_SUCCESS(Status)) {
6128                         ERR("delete_tree_item returned %08x\n", Status);
6129                         return Status;
6130                     }
6131 
6132                     if (newlen == 0) {
6133                         TRACE("deleting (%I64x,%x,%I64x)\n", tp.item->key.obj_id, tp.item->key.obj_type, tp.item->key.offset);
6134                     } else {
6135                         uint8_t *newrr = ExAllocatePoolWithTag(PagedPool, newlen, ALLOC_TAG), *rroff;
6136 
6137                         if (!newrr) {
6138                             ERR("out of memory\n");
6139                             return STATUS_INSUFFICIENT_RESOURCES;
6140                         }
6141 
6142                         TRACE("modifying (%I64x,%x,%I64x)\n", tp.item->key.obj_id, tp.item->key.obj_type, tp.item->key.offset);
6143 
6144                         if ((uint8_t*)rr > tp.item->data) {
6145                             RtlCopyMemory(newrr, tp.item->data, (uint8_t*)rr - tp.item->data);
6146                             rroff = newrr + ((uint8_t*)rr - tp.item->data);
6147                         } else {
6148                             rroff = newrr;
6149                         }
6150 
6151                         if ((uint8_t*)&rr->name[rr->n] < tp.item->data + tp.item->size)
6152                             RtlCopyMemory(rroff, &rr->name[rr->n], tp.item->size - ((uint8_t*)&rr->name[rr->n] - tp.item->data));
6153 
6154                         Status = insert_tree_item(Vcb, Vcb->root_root, tp.item->key.obj_id, tp.item->key.obj_type, tp.item->key.offset, newrr, newlen, NULL, Irp);
6155                         if (!NT_SUCCESS(Status)) {
6156                             ERR("insert_tree_item returned %08x\n", Status);
6157                             ExFreePool(newrr);
6158                             return Status;
6159                         }
6160                     }
6161 
6162                     break;
6163                 }
6164 
6165                 if (len > itemlen) {
6166                     len -= itemlen;
6167                     rr = (ROOT_REF*)&rr->name[rr->n];
6168                 } else
6169                     break;
6170             } while (len > 0);
6171         }
6172     } else {
6173         WARN("could not find ROOT_REF entry for subvol %I64x in %I64x\n", searchkey.offset, searchkey.obj_id);
6174         return STATUS_NOT_FOUND;
6175     }
6176 
6177     return STATUS_SUCCESS;
6178 }
6179 
6180 #ifdef _MSC_VER
6181 #pragma warning(push)
6182 #pragma warning(suppress: 28194)
6183 #endif
6184 static NTSTATUS add_root_ref(_In_ device_extension* Vcb, _In_ uint64_t subvolid, _In_ uint64_t parsubvolid, _In_ __drv_aliasesMem ROOT_REF* rr, _In_opt_ PIRP Irp) {
6185     KEY searchkey;
6186     traverse_ptr tp;
6187     NTSTATUS Status;
6188 
6189     searchkey.obj_id = parsubvolid;
6190     searchkey.obj_type = TYPE_ROOT_REF;
6191     searchkey.offset = subvolid;
6192 
6193     Status = find_item(Vcb, Vcb->root_root, &tp, &searchkey, false, Irp);
6194     if (!NT_SUCCESS(Status)) {
6195         ERR("error - find_item returned %08x\n", Status);
6196         return Status;
6197     }
6198 
6199     if (!keycmp(searchkey, tp.item->key)) {
6200         uint16_t rrsize = tp.item->size + (uint16_t)offsetof(ROOT_REF, name[0]) + rr->n;
6201         uint8_t* rr2;
6202 
6203         rr2 = ExAllocatePoolWithTag(PagedPool, rrsize, ALLOC_TAG);
6204         if (!rr2) {
6205             ERR("out of memory\n");
6206             return STATUS_INSUFFICIENT_RESOURCES;
6207         }
6208 
6209         if (tp.item->size > 0)
6210             RtlCopyMemory(rr2, tp.item->data, tp.item->size);
6211 
6212         RtlCopyMemory(rr2 + tp.item->size, rr, offsetof(ROOT_REF, name[0]) + rr->n);
6213         ExFreePool(rr);
6214 
6215         Status = delete_tree_item(Vcb, &tp);
6216         if (!NT_SUCCESS(Status)) {
6217             ERR("delete_tree_item returned %08x\n", Status);
6218             ExFreePool(rr2);
6219             return Status;
6220         }
6221 
6222         Status = insert_tree_item(Vcb, Vcb->root_root, searchkey.obj_id, searchkey.obj_type, searchkey.offset, rr2, rrsize, NULL, Irp);
6223         if (!NT_SUCCESS(Status)) {
6224             ERR("insert_tree_item returned %08x\n", Status);
6225             ExFreePool(rr2);
6226             return Status;
6227         }
6228     } else {
6229         Status = insert_tree_item(Vcb, Vcb->root_root, searchkey.obj_id, searchkey.obj_type, searchkey.offset, rr, (uint16_t)offsetof(ROOT_REF, name[0]) + rr->n, NULL, Irp);
6230         if (!NT_SUCCESS(Status)) {
6231             ERR("insert_tree_item returned %08x\n", Status);
6232             ExFreePool(rr);
6233             return Status;
6234         }
6235     }
6236 
6237     return STATUS_SUCCESS;
6238 }
6239 #ifdef _MSC_VER
6240 #pragma warning(pop)
6241 #endif
6242 
6243 static NTSTATUS update_root_backref(device_extension* Vcb, uint64_t subvolid, uint64_t parsubvolid, PIRP Irp) {
6244     KEY searchkey;
6245     traverse_ptr tp;
6246     uint8_t* data;
6247     uint16_t datalen;
6248     NTSTATUS Status;
6249 
6250     searchkey.obj_id = parsubvolid;
6251     searchkey.obj_type = TYPE_ROOT_REF;
6252     searchkey.offset = subvolid;
6253 
6254     Status = find_item(Vcb, Vcb->root_root, &tp, &searchkey, false, Irp);
6255     if (!NT_SUCCESS(Status)) {
6256         ERR("error - find_item returned %08x\n", Status);
6257         return Status;
6258     }
6259 
6260     if (!keycmp(tp.item->key, searchkey) && tp.item->size > 0) {
6261         datalen = tp.item->size;
6262 
6263         data = ExAllocatePoolWithTag(PagedPool, datalen, ALLOC_TAG);
6264         if (!data) {
6265             ERR("out of memory\n");
6266             return STATUS_INSUFFICIENT_RESOURCES;
6267         }
6268 
6269         RtlCopyMemory(data, tp.item->data, datalen);
6270     } else {
6271         datalen = 0;
6272         data = NULL;
6273     }
6274 
6275     searchkey.obj_id = subvolid;
6276     searchkey.obj_type = TYPE_ROOT_BACKREF;
6277     searchkey.offset = parsubvolid;
6278 
6279     Status = find_item(Vcb, Vcb->root_root, &tp, &searchkey, false, Irp);
6280     if (!NT_SUCCESS(Status)) {
6281         ERR("error - find_item returned %08x\n", Status);
6282 
6283         if (datalen > 0)
6284             ExFreePool(data);
6285 
6286         return Status;
6287     }
6288 
6289     if (!keycmp(tp.item->key, searchkey)) {
6290         Status = delete_tree_item(Vcb, &tp);
6291         if (!NT_SUCCESS(Status)) {
6292             ERR("delete_tree_item returned %08x\n", Status);
6293 
6294             if (datalen > 0)
6295                 ExFreePool(data);
6296 
6297             return Status;
6298         }
6299     }
6300 
6301     if (datalen > 0) {
6302         Status = insert_tree_item(Vcb, Vcb->root_root, subvolid, TYPE_ROOT_BACKREF, parsubvolid, data, datalen, NULL, Irp);
6303         if (!NT_SUCCESS(Status)) {
6304             ERR("insert_tree_item returned %08x\n", Status);
6305             ExFreePool(data);
6306             return Status;
6307         }
6308     }
6309 
6310     return STATUS_SUCCESS;
6311 }
6312 
6313 static NTSTATUS add_root_item_to_cache(device_extension* Vcb, uint64_t root, PIRP Irp) {
6314     KEY searchkey;
6315     traverse_ptr tp;
6316     NTSTATUS Status;
6317 
6318     searchkey.obj_id = root;
6319     searchkey.obj_type = TYPE_ROOT_ITEM;
6320     searchkey.offset = 0xffffffffffffffff;
6321 
6322     Status = find_item(Vcb, Vcb->root_root, &tp, &searchkey, false, Irp);
6323     if (!NT_SUCCESS(Status)) {
6324         ERR("error - find_item returned %08x\n", Status);
6325         return Status;
6326     }
6327 
6328     if (tp.item->key.obj_id != searchkey.obj_id || tp.item->key.obj_type != searchkey.obj_type) {
6329         ERR("could not find ROOT_ITEM for tree %I64x\n", searchkey.obj_id);
6330         return STATUS_INTERNAL_ERROR;
6331     }
6332 
6333     if (tp.item->size < sizeof(ROOT_ITEM)) { // if not full length, create new entry with new bits zeroed
6334         ROOT_ITEM* ri = ExAllocatePoolWithTag(PagedPool, sizeof(ROOT_ITEM), ALLOC_TAG);
6335         if (!ri) {
6336             ERR("out of memory\n");
6337             return STATUS_INSUFFICIENT_RESOURCES;
6338         }
6339 
6340         if (tp.item->size > 0)
6341             RtlCopyMemory(ri, tp.item->data, tp.item->size);
6342 
6343         RtlZeroMemory(((uint8_t*)ri) + tp.item->size, sizeof(ROOT_ITEM) - tp.item->size);
6344 
6345         Status = delete_tree_item(Vcb, &tp);
6346         if (!NT_SUCCESS(Status)) {
6347             ERR("delete_tree_item returned %08x\n", Status);
6348             ExFreePool(ri);
6349             return Status;
6350         }
6351 
6352         Status = insert_tree_item(Vcb, Vcb->root_root, searchkey.obj_id, searchkey.obj_type, tp.item->key.offset, ri, sizeof(ROOT_ITEM), NULL, Irp);
6353         if (!NT_SUCCESS(Status)) {
6354             ERR("insert_tree_item returned %08x\n", Status);
6355             ExFreePool(ri);
6356             return Status;
6357         }
6358     } else {
6359         tp.tree->write = true;
6360     }
6361 
6362     return STATUS_SUCCESS;
6363 }
6364 
6365 static NTSTATUS flush_fileref(file_ref* fileref, LIST_ENTRY* batchlist, PIRP Irp) {
6366     NTSTATUS Status;
6367 
6368     // if fileref created and then immediately deleted, do nothing
6369     if (fileref->created && fileref->deleted) {
6370         fileref->dirty = false;
6371         return STATUS_SUCCESS;
6372     }
6373 
6374     if (fileref->fcb->ads) {
6375         fileref->dirty = false;
6376         return STATUS_SUCCESS;
6377     }
6378 
6379     if (fileref->created) {
6380         uint16_t disize;
6381         DIR_ITEM *di, *di2;
6382         uint32_t crc32;
6383 
6384         crc32 = calc_crc32c(0xfffffffe, (uint8_t*)fileref->dc->utf8.Buffer, fileref->dc->utf8.Length);
6385 
6386         disize = (uint16_t)(offsetof(DIR_ITEM, name[0]) + fileref->dc->utf8.Length);
6387         di = ExAllocatePoolWithTag(PagedPool, disize, ALLOC_TAG);
6388         if (!di) {
6389             ERR("out of memory\n");
6390             return STATUS_INSUFFICIENT_RESOURCES;
6391         }
6392 
6393         if (fileref->parent->fcb->subvol == fileref->fcb->subvol) {
6394             di->key.obj_id = fileref->fcb->inode;
6395             di->key.obj_type = TYPE_INODE_ITEM;
6396             di->key.offset = 0;
6397         } else { // subvolume
6398             di->key.obj_id = fileref->fcb->subvol->id;
6399             di->key.obj_type = TYPE_ROOT_ITEM;
6400             di->key.offset = 0xffffffffffffffff;
6401         }
6402 
6403         di->transid = fileref->fcb->Vcb->superblock.generation;
6404         di->m = 0;
6405         di->n = (uint16_t)fileref->dc->utf8.Length;
6406         di->type = fileref->fcb->type;
6407         RtlCopyMemory(di->name, fileref->dc->utf8.Buffer, fileref->dc->utf8.Length);
6408 
6409         di2 = ExAllocatePoolWithTag(PagedPool, disize, ALLOC_TAG);
6410         if (!di2) {
6411             ERR("out of memory\n");
6412             return STATUS_INSUFFICIENT_RESOURCES;
6413         }
6414 
6415         RtlCopyMemory(di2, di, disize);
6416 
6417         Status = insert_tree_item_batch(batchlist, fileref->fcb->Vcb, fileref->parent->fcb->subvol, fileref->parent->fcb->inode, TYPE_DIR_INDEX,
6418                                         fileref->dc->index, di, disize, Batch_Insert);
6419         if (!NT_SUCCESS(Status)) {
6420             ERR("insert_tree_item_batch returned %08x\n", Status);
6421             return Status;
6422         }
6423 
6424         Status = insert_tree_item_batch(batchlist, fileref->fcb->Vcb, fileref->parent->fcb->subvol, fileref->parent->fcb->inode, TYPE_DIR_ITEM, crc32,
6425                                         di2, disize, Batch_DirItem);
6426         if (!NT_SUCCESS(Status)) {
6427             ERR("insert_tree_item_batch returned %08x\n", Status);
6428             return Status;
6429         }
6430 
6431         if (fileref->parent->fcb->subvol == fileref->fcb->subvol) {
6432             INODE_REF* ir;
6433 
6434             ir = ExAllocatePoolWithTag(PagedPool, sizeof(INODE_REF) - 1 + fileref->dc->utf8.Length, ALLOC_TAG);
6435             if (!ir) {
6436                 ERR("out of memory\n");
6437                 return STATUS_INSUFFICIENT_RESOURCES;
6438             }
6439 
6440             ir->index = fileref->dc->index;
6441             ir->n = fileref->dc->utf8.Length;
6442             RtlCopyMemory(ir->name, fileref->dc->utf8.Buffer, ir->n);
6443 
6444             Status = insert_tree_item_batch(batchlist, fileref->fcb->Vcb, fileref->fcb->subvol, fileref->fcb->inode, TYPE_INODE_REF, fileref->parent->fcb->inode,
6445                                             ir, sizeof(INODE_REF) - 1 + ir->n, Batch_InodeRef);
6446             if (!NT_SUCCESS(Status)) {
6447                 ERR("insert_tree_item_batch returned %08x\n", Status);
6448                 return Status;
6449             }
6450         } else if (fileref->fcb != fileref->fcb->Vcb->dummy_fcb) {
6451             ULONG rrlen;
6452             ROOT_REF* rr;
6453 
6454             rrlen = sizeof(ROOT_REF) - 1 + fileref->dc->utf8.Length;
6455 
6456             rr = ExAllocatePoolWithTag(PagedPool, rrlen, ALLOC_TAG);
6457             if (!rr) {
6458                 ERR("out of memory\n");
6459                 return STATUS_INSUFFICIENT_RESOURCES;
6460             }
6461 
6462             rr->dir = fileref->parent->fcb->inode;
6463             rr->index = fileref->dc->index;
6464             rr->n = fileref->dc->utf8.Length;
6465             RtlCopyMemory(rr->name, fileref->dc->utf8.Buffer, fileref->dc->utf8.Length);
6466 
6467             Status = add_root_ref(fileref->fcb->Vcb, fileref->fcb->subvol->id, fileref->parent->fcb->subvol->id, rr, Irp);
6468             if (!NT_SUCCESS(Status)) {
6469                 ERR("add_root_ref returned %08x\n", Status);
6470                 return Status;
6471             }
6472 
6473             Status = update_root_backref(fileref->fcb->Vcb, fileref->fcb->subvol->id, fileref->parent->fcb->subvol->id, Irp);
6474             if (!NT_SUCCESS(Status)) {
6475                 ERR("update_root_backref returned %08x\n", Status);
6476                 return Status;
6477             }
6478         }
6479 
6480         fileref->created = false;
6481     } else if (fileref->deleted) {
6482         uint32_t crc32;
6483         ANSI_STRING* name;
6484         DIR_ITEM* di;
6485 
6486         name = &fileref->oldutf8;
6487 
6488         crc32 = calc_crc32c(0xfffffffe, (uint8_t*)name->Buffer, name->Length);
6489 
6490         di = ExAllocatePoolWithTag(PagedPool, sizeof(DIR_ITEM) - 1 + name->Length, ALLOC_TAG);
6491         if (!di) {
6492             ERR("out of memory\n");
6493             return STATUS_INSUFFICIENT_RESOURCES;
6494         }
6495 
6496         di->m = 0;
6497         di->n = name->Length;
6498         RtlCopyMemory(di->name, name->Buffer, name->Length);
6499 
6500         // delete DIR_ITEM (0x54)
6501 
6502         Status = insert_tree_item_batch(batchlist, fileref->fcb->Vcb, fileref->parent->fcb->subvol, fileref->parent->fcb->inode, TYPE_DIR_ITEM,
6503                                         crc32, di, sizeof(DIR_ITEM) - 1 + name->Length, Batch_DeleteDirItem);
6504         if (!NT_SUCCESS(Status)) {
6505             ERR("insert_tree_item_batch returned %08x\n", Status);
6506             return Status;
6507         }
6508 
6509         if (fileref->parent->fcb->subvol == fileref->fcb->subvol) {
6510             INODE_REF* ir;
6511 
6512             // delete INODE_REF (0xc)
6513 
6514             ir = ExAllocatePoolWithTag(PagedPool, sizeof(INODE_REF) - 1 + name->Length, ALLOC_TAG);
6515             if (!ir) {
6516                 ERR("out of memory\n");
6517                 return STATUS_INSUFFICIENT_RESOURCES;
6518             }
6519 
6520             ir->index = fileref->oldindex;
6521             ir->n = name->Length;
6522             RtlCopyMemory(ir->name, name->Buffer, name->Length);
6523 
6524             Status = insert_tree_item_batch(batchlist, fileref->fcb->Vcb, fileref->parent->fcb->subvol, fileref->fcb->inode, TYPE_INODE_REF,
6525                                             fileref->parent->fcb->inode, ir, sizeof(INODE_REF) - 1 + name->Length, Batch_DeleteInodeRef);
6526             if (!NT_SUCCESS(Status)) {
6527                 ERR("insert_tree_item_batch returned %08x\n", Status);
6528                 return Status;
6529             }
6530         } else if (fileref->fcb != fileref->fcb->Vcb->dummy_fcb) { // subvolume
6531             Status = delete_root_ref(fileref->fcb->Vcb, fileref->fcb->subvol->id, fileref->parent->fcb->subvol->id, fileref->parent->fcb->inode, name, Irp);
6532             if (!NT_SUCCESS(Status)) {
6533                 ERR("delete_root_ref returned %08x\n", Status);
6534                 return Status;
6535             }
6536 
6537             Status = update_root_backref(fileref->fcb->Vcb, fileref->fcb->subvol->id, fileref->parent->fcb->subvol->id, Irp);
6538             if (!NT_SUCCESS(Status)) {
6539                 ERR("update_root_backref returned %08x\n", Status);
6540                 return Status;
6541             }
6542         }
6543 
6544         // delete DIR_INDEX (0x60)
6545 
6546         Status = insert_tree_item_batch(batchlist, fileref->fcb->Vcb, fileref->parent->fcb->subvol, fileref->parent->fcb->inode, TYPE_DIR_INDEX,
6547                                         fileref->oldindex, NULL, 0, Batch_Delete);
6548         if (!NT_SUCCESS(Status)) {
6549             ERR("insert_tree_item_batch returned %08x\n", Status);
6550             return Status;
6551         }
6552 
6553         if (fileref->oldutf8.Buffer) {
6554             ExFreePool(fileref->oldutf8.Buffer);
6555             fileref->oldutf8.Buffer = NULL;
6556         }
6557     } else { // rename or change type
6558         PANSI_STRING oldutf8 = fileref->oldutf8.Buffer ? &fileref->oldutf8 : &fileref->dc->utf8;
6559         uint32_t crc32, oldcrc32;
6560         uint16_t disize;
6561         DIR_ITEM *olddi, *di, *di2;
6562 
6563         crc32 = calc_crc32c(0xfffffffe, (uint8_t*)fileref->dc->utf8.Buffer, fileref->dc->utf8.Length);
6564 
6565         if (!fileref->oldutf8.Buffer)
6566             oldcrc32 = crc32;
6567         else
6568             oldcrc32 = calc_crc32c(0xfffffffe, (uint8_t*)fileref->oldutf8.Buffer, fileref->oldutf8.Length);
6569 
6570         olddi = ExAllocatePoolWithTag(PagedPool, sizeof(DIR_ITEM) - 1 + oldutf8->Length, ALLOC_TAG);
6571         if (!olddi) {
6572             ERR("out of memory\n");
6573             return STATUS_INSUFFICIENT_RESOURCES;
6574         }
6575 
6576         olddi->m = 0;
6577         olddi->n = (uint16_t)oldutf8->Length;
6578         RtlCopyMemory(olddi->name, oldutf8->Buffer, oldutf8->Length);
6579 
6580         // delete DIR_ITEM (0x54)
6581 
6582         Status = insert_tree_item_batch(batchlist, fileref->fcb->Vcb, fileref->parent->fcb->subvol, fileref->parent->fcb->inode, TYPE_DIR_ITEM,
6583                                         oldcrc32, olddi, sizeof(DIR_ITEM) - 1 + oldutf8->Length, Batch_DeleteDirItem);
6584         if (!NT_SUCCESS(Status)) {
6585             ERR("insert_tree_item_batch returned %08x\n", Status);
6586             ExFreePool(olddi);
6587             return Status;
6588         }
6589 
6590         // add DIR_ITEM (0x54)
6591 
6592         disize = (uint16_t)(offsetof(DIR_ITEM, name[0]) + fileref->dc->utf8.Length);
6593         di = ExAllocatePoolWithTag(PagedPool, disize, ALLOC_TAG);
6594         if (!di) {
6595             ERR("out of memory\n");
6596             return STATUS_INSUFFICIENT_RESOURCES;
6597         }
6598 
6599         di2 = ExAllocatePoolWithTag(PagedPool, disize, ALLOC_TAG);
6600         if (!di2) {
6601             ERR("out of memory\n");
6602             ExFreePool(di);
6603             return STATUS_INSUFFICIENT_RESOURCES;
6604         }
6605 
6606         if (fileref->dc)
6607             di->key = fileref->dc->key;
6608         else if (fileref->parent->fcb->subvol == fileref->fcb->subvol) {
6609             di->key.obj_id = fileref->fcb->inode;
6610             di->key.obj_type = TYPE_INODE_ITEM;
6611             di->key.offset = 0;
6612         } else { // subvolume
6613             di->key.obj_id = fileref->fcb->subvol->id;
6614             di->key.obj_type = TYPE_ROOT_ITEM;
6615             di->key.offset = 0xffffffffffffffff;
6616         }
6617 
6618         di->transid = fileref->fcb->Vcb->superblock.generation;
6619         di->m = 0;
6620         di->n = (uint16_t)fileref->dc->utf8.Length;
6621         di->type = fileref->fcb->type;
6622         RtlCopyMemory(di->name, fileref->dc->utf8.Buffer, fileref->dc->utf8.Length);
6623 
6624         RtlCopyMemory(di2, di, disize);
6625 
6626         Status = insert_tree_item_batch(batchlist, fileref->fcb->Vcb, fileref->parent->fcb->subvol, fileref->parent->fcb->inode, TYPE_DIR_ITEM, crc32,
6627                                         di, disize, Batch_DirItem);
6628         if (!NT_SUCCESS(Status)) {
6629             ERR("insert_tree_item_batch returned %08x\n", Status);
6630             ExFreePool(di2);
6631             ExFreePool(di);
6632             return Status;
6633         }
6634 
6635         if (fileref->parent->fcb->subvol == fileref->fcb->subvol) {
6636             INODE_REF *ir, *ir2;
6637 
6638             // delete INODE_REF (0xc)
6639 
6640             ir = ExAllocatePoolWithTag(PagedPool, sizeof(INODE_REF) - 1 + oldutf8->Length, ALLOC_TAG);
6641             if (!ir) {
6642                 ERR("out of memory\n");
6643                 ExFreePool(di2);
6644                 return STATUS_INSUFFICIENT_RESOURCES;
6645             }
6646 
6647             ir->index = fileref->dc->index;
6648             ir->n = oldutf8->Length;
6649             RtlCopyMemory(ir->name, oldutf8->Buffer, ir->n);
6650 
6651             Status = insert_tree_item_batch(batchlist, fileref->fcb->Vcb, fileref->fcb->subvol, fileref->fcb->inode, TYPE_INODE_REF, fileref->parent->fcb->inode,
6652                                             ir, sizeof(INODE_REF) - 1 + ir->n, Batch_DeleteInodeRef);
6653             if (!NT_SUCCESS(Status)) {
6654                 ERR("insert_tree_item_batch returned %08x\n", Status);
6655                 ExFreePool(ir);
6656                 ExFreePool(di2);
6657                 return Status;
6658             }
6659 
6660             // add INODE_REF (0xc)
6661 
6662             ir2 = ExAllocatePoolWithTag(PagedPool, sizeof(INODE_REF) - 1 + fileref->dc->utf8.Length, ALLOC_TAG);
6663             if (!ir2) {
6664                 ERR("out of memory\n");
6665                 ExFreePool(di2);
6666                 return STATUS_INSUFFICIENT_RESOURCES;
6667             }
6668 
6669             ir2->index = fileref->dc->index;
6670             ir2->n = fileref->dc->utf8.Length;
6671             RtlCopyMemory(ir2->name, fileref->dc->utf8.Buffer, ir2->n);
6672 
6673             Status = insert_tree_item_batch(batchlist, fileref->fcb->Vcb, fileref->fcb->subvol, fileref->fcb->inode, TYPE_INODE_REF, fileref->parent->fcb->inode,
6674                                             ir2, sizeof(INODE_REF) - 1 + ir2->n, Batch_InodeRef);
6675             if (!NT_SUCCESS(Status)) {
6676                 ERR("insert_tree_item_batch returned %08x\n", Status);
6677                 ExFreePool(ir2);
6678                 ExFreePool(di2);
6679                 return Status;
6680             }
6681         } else if (fileref->fcb != fileref->fcb->Vcb->dummy_fcb) { // subvolume
6682             ULONG rrlen;
6683             ROOT_REF* rr;
6684 
6685             Status = delete_root_ref(fileref->fcb->Vcb, fileref->fcb->subvol->id, fileref->parent->fcb->subvol->id, fileref->parent->fcb->inode, oldutf8, Irp);
6686             if (!NT_SUCCESS(Status)) {
6687                 ERR("delete_root_ref returned %08x\n", Status);
6688                 ExFreePool(di2);
6689                 return Status;
6690             }
6691 
6692             rrlen = sizeof(ROOT_REF) - 1 + fileref->dc->utf8.Length;
6693 
6694             rr = ExAllocatePoolWithTag(PagedPool, rrlen, ALLOC_TAG);
6695             if (!rr) {
6696                 ERR("out of memory\n");
6697                 ExFreePool(di2);
6698                 return STATUS_INSUFFICIENT_RESOURCES;
6699             }
6700 
6701             rr->dir = fileref->parent->fcb->inode;
6702             rr->index = fileref->dc->index;
6703             rr->n = fileref->dc->utf8.Length;
6704             RtlCopyMemory(rr->name, fileref->dc->utf8.Buffer, fileref->dc->utf8.Length);
6705 
6706             Status = add_root_ref(fileref->fcb->Vcb, fileref->fcb->subvol->id, fileref->parent->fcb->subvol->id, rr, Irp);
6707             if (!NT_SUCCESS(Status)) {
6708                 ERR("add_root_ref returned %08x\n", Status);
6709                 ExFreePool(di2);
6710                 return Status;
6711             }
6712 
6713             Status = update_root_backref(fileref->fcb->Vcb, fileref->fcb->subvol->id, fileref->parent->fcb->subvol->id, Irp);
6714             if (!NT_SUCCESS(Status)) {
6715                 ERR("update_root_backref returned %08x\n", Status);
6716                 ExFreePool(di2);
6717                 return Status;
6718             }
6719         }
6720 
6721         // delete DIR_INDEX (0x60)
6722 
6723         Status = insert_tree_item_batch(batchlist, fileref->fcb->Vcb, fileref->parent->fcb->subvol, fileref->parent->fcb->inode, TYPE_DIR_INDEX,
6724                                         fileref->dc->index, NULL, 0, Batch_Delete);
6725         if (!NT_SUCCESS(Status)) {
6726             ERR("insert_tree_item_batch returned %08x\n", Status);
6727             ExFreePool(di2);
6728             return Status;
6729         }
6730 
6731         // add DIR_INDEX (0x60)
6732 
6733        Status = insert_tree_item_batch(batchlist, fileref->fcb->Vcb, fileref->parent->fcb->subvol, fileref->parent->fcb->inode, TYPE_DIR_INDEX,
6734                                        fileref->dc->index, di2, disize, Batch_Insert);
6735        if (!NT_SUCCESS(Status)) {
6736             ERR("insert_tree_item_batch returned %08x\n", Status);
6737             ExFreePool(di2);
6738             return Status;
6739         }
6740 
6741         if (fileref->oldutf8.Buffer) {
6742             ExFreePool(fileref->oldutf8.Buffer);
6743             fileref->oldutf8.Buffer = NULL;
6744         }
6745     }
6746 
6747     fileref->dirty = false;
6748 
6749     return STATUS_SUCCESS;
6750 }
6751 
6752 static void flush_disk_caches(device_extension* Vcb) {
6753     LIST_ENTRY* le;
6754     ioctl_context context;
6755     ULONG num;
6756 #ifdef __REACTOS__
6757     unsigned int i;
6758 #endif
6759 
6760     context.left = 0;
6761 
6762     le = Vcb->devices.Flink;
6763 
6764     while (le != &Vcb->devices) {
6765         device* dev = CONTAINING_RECORD(le, device, list_entry);
6766 
6767         if (dev->devobj && !dev->readonly && dev->can_flush)
6768             context.left++;
6769 
6770         le = le->Flink;
6771     }
6772 
6773     if (context.left == 0)
6774         return;
6775 
6776     num = 0;
6777 
6778     KeInitializeEvent(&context.Event, NotificationEvent, false);
6779 
6780     context.stripes = ExAllocatePoolWithTag(NonPagedPool, sizeof(ioctl_context_stripe) * context.left, ALLOC_TAG);
6781     if (!context.stripes) {
6782         ERR("out of memory\n");
6783         return;
6784     }
6785 
6786     RtlZeroMemory(context.stripes, sizeof(ioctl_context_stripe) * context.left);
6787 
6788     le = Vcb->devices.Flink;
6789 
6790     while (le != &Vcb->devices) {
6791         device* dev = CONTAINING_RECORD(le, device, list_entry);
6792 
6793         if (dev->devobj && !dev->readonly && dev->can_flush) {
6794             PIO_STACK_LOCATION IrpSp;
6795             ioctl_context_stripe* stripe = &context.stripes[num];
6796 
6797             RtlZeroMemory(&stripe->apte, sizeof(ATA_PASS_THROUGH_EX));
6798 
6799             stripe->apte.Length = sizeof(ATA_PASS_THROUGH_EX);
6800             stripe->apte.TimeOutValue = 5;
6801             stripe->apte.CurrentTaskFile[6] = IDE_COMMAND_FLUSH_CACHE;
6802 
6803             stripe->Irp = IoAllocateIrp(dev->devobj->StackSize, false);
6804 
6805             if (!stripe->Irp) {
6806                 ERR("IoAllocateIrp failed\n");
6807                 goto nextdev;
6808             }
6809 
6810             IrpSp = IoGetNextIrpStackLocation(stripe->Irp);
6811             IrpSp->MajorFunction = IRP_MJ_DEVICE_CONTROL;
6812             IrpSp->FileObject = dev->fileobj;
6813 
6814             IrpSp->Parameters.DeviceIoControl.IoControlCode = IOCTL_ATA_PASS_THROUGH;
6815             IrpSp->Parameters.DeviceIoControl.InputBufferLength = sizeof(ATA_PASS_THROUGH_EX);
6816             IrpSp->Parameters.DeviceIoControl.OutputBufferLength = sizeof(ATA_PASS_THROUGH_EX);
6817 
6818             stripe->Irp->AssociatedIrp.SystemBuffer = &stripe->apte;
6819             stripe->Irp->Flags |= IRP_BUFFERED_IO | IRP_INPUT_OPERATION;
6820             stripe->Irp->UserBuffer = &stripe->apte;
6821             stripe->Irp->UserIosb = &stripe->iosb;
6822 
6823             IoSetCompletionRoutine(stripe->Irp, ioctl_completion, &context, true, true, true);
6824 
6825             IoCallDriver(dev->devobj, stripe->Irp);
6826 
6827 nextdev:
6828             num++;
6829         }
6830 
6831         le = le->Flink;
6832     }
6833 
6834     KeWaitForSingleObject(&context.Event, Executive, KernelMode, false, NULL);
6835 
6836 #ifndef __REACTOS__
6837     for (unsigned int i = 0; i < num; i++) {
6838 #else
6839     for (i = 0; i < num; i++) {
6840 #endif
6841         if (context.stripes[i].Irp)
6842             IoFreeIrp(context.stripes[i].Irp);
6843     }
6844 
6845     ExFreePool(context.stripes);
6846 }
6847 
6848 static NTSTATUS flush_changed_dev_stats(device_extension* Vcb, device* dev, PIRP Irp) {
6849     NTSTATUS Status;
6850     KEY searchkey;
6851     traverse_ptr tp;
6852     uint16_t statslen;
6853     uint64_t* stats;
6854 
6855     searchkey.obj_id = 0;
6856     searchkey.obj_type = TYPE_DEV_STATS;
6857     searchkey.offset = dev->devitem.dev_id;
6858 
6859     Status = find_item(Vcb, Vcb->dev_root, &tp, &searchkey, false, Irp);
6860     if (!NT_SUCCESS(Status)) {
6861         ERR("find_item returned %08x\n", Status);
6862         return Status;
6863     }
6864 
6865     if (!keycmp(tp.item->key, searchkey)) {
6866         Status = delete_tree_item(Vcb, &tp);
6867         if (!NT_SUCCESS(Status)) {
6868             ERR("delete_tree_item returned %08x\n", Status);
6869             return Status;
6870         }
6871     }
6872 
6873     statslen = sizeof(uint64_t) * 5;
6874     stats = ExAllocatePoolWithTag(PagedPool, statslen, ALLOC_TAG);
6875     if (!stats) {
6876         ERR("out of memory\n");
6877         return STATUS_INSUFFICIENT_RESOURCES;
6878     }
6879 
6880     RtlCopyMemory(stats, dev->stats, statslen);
6881 
6882     Status = insert_tree_item(Vcb, Vcb->dev_root, 0, TYPE_DEV_STATS, dev->devitem.dev_id, stats, statslen, NULL, Irp);
6883     if (!NT_SUCCESS(Status)) {
6884         ERR("insert_tree_item returned %08x\n", Status);
6885         ExFreePool(stats);
6886         return Status;
6887     }
6888 
6889     return STATUS_SUCCESS;
6890 }
6891 
6892 static NTSTATUS flush_subvol(device_extension* Vcb, root* r, PIRP Irp) {
6893     NTSTATUS Status;
6894 
6895     if (r != Vcb->root_root && r != Vcb->chunk_root) {
6896         KEY searchkey;
6897         traverse_ptr tp;
6898         ROOT_ITEM* ri;
6899 
6900         searchkey.obj_id = r->id;
6901         searchkey.obj_type = TYPE_ROOT_ITEM;
6902         searchkey.offset = 0xffffffffffffffff;
6903 
6904         Status = find_item(Vcb, Vcb->root_root, &tp, &searchkey, false, Irp);
6905         if (!NT_SUCCESS(Status)) {
6906             ERR("error - find_item returned %08x\n", Status);
6907             return Status;
6908         }
6909 
6910         if (tp.item->key.obj_id != searchkey.obj_id || tp.item->key.obj_type != searchkey.obj_type) {
6911             ERR("could not find ROOT_ITEM for tree %I64x\n", searchkey.obj_id);
6912             return STATUS_INTERNAL_ERROR;
6913         }
6914 
6915         ri = ExAllocatePoolWithTag(PagedPool, sizeof(ROOT_ITEM), ALLOC_TAG);
6916         if (!ri) {
6917             ERR("out of memory\n");
6918             return STATUS_INSUFFICIENT_RESOURCES;
6919         }
6920 
6921         RtlCopyMemory(ri, &r->root_item, sizeof(ROOT_ITEM));
6922 
6923         Status = delete_tree_item(Vcb, &tp);
6924         if (!NT_SUCCESS(Status)) {
6925             ERR("delete_tree_item returned %08x\n", Status);
6926             return Status;
6927         }
6928 
6929         Status = insert_tree_item(Vcb, Vcb->root_root, tp.item->key.obj_id, tp.item->key.obj_type, tp.item->key.offset, ri, sizeof(ROOT_ITEM), NULL, Irp);
6930         if (!NT_SUCCESS(Status)) {
6931             ERR("insert_tree_item returned %08x\n", Status);
6932             return Status;
6933         }
6934     }
6935 
6936     if (r->received) {
6937         KEY searchkey;
6938         traverse_ptr tp;
6939 
6940         if (!Vcb->uuid_root) {
6941             root* uuid_root;
6942 
6943             TRACE("uuid root doesn't exist, creating it\n");
6944 
6945             Status = create_root(Vcb, BTRFS_ROOT_UUID, &uuid_root, false, 0, Irp);
6946 
6947             if (!NT_SUCCESS(Status)) {
6948                 ERR("create_root returned %08x\n", Status);
6949                 return Status;
6950             }
6951 
6952             Vcb->uuid_root = uuid_root;
6953         }
6954 
6955         RtlCopyMemory(&searchkey.obj_id, &r->root_item.received_uuid, sizeof(uint64_t));
6956         searchkey.obj_type = TYPE_SUBVOL_REC_UUID;
6957         RtlCopyMemory(&searchkey.offset, &r->root_item.received_uuid.uuid[sizeof(uint64_t)], sizeof(uint64_t));
6958 
6959         Status = find_item(Vcb, Vcb->uuid_root, &tp, &searchkey, false, Irp);
6960         if (!NT_SUCCESS(Status)) {
6961             ERR("find_item returned %08x\n", Status);
6962             return Status;
6963         }
6964 
6965         if (!keycmp(tp.item->key, searchkey)) {
6966             if (tp.item->size + sizeof(uint64_t) <= Vcb->superblock.node_size - sizeof(tree_header) - sizeof(leaf_node)) {
6967                 uint64_t* ids;
6968 
6969                 ids = ExAllocatePoolWithTag(PagedPool, tp.item->size + sizeof(uint64_t), ALLOC_TAG);
6970                 if (!ids) {
6971                     ERR("out of memory\n");
6972                     return STATUS_INSUFFICIENT_RESOURCES;
6973                 }
6974 
6975                 RtlCopyMemory(ids, tp.item->data, tp.item->size);
6976                 RtlCopyMemory((uint8_t*)ids + tp.item->size, &r->id, sizeof(uint64_t));
6977 
6978                 Status = delete_tree_item(Vcb, &tp);
6979                 if (!NT_SUCCESS(Status)) {
6980                     ERR("delete_tree_item returned %08x\n", Status);
6981                     ExFreePool(ids);
6982                     return Status;
6983                 }
6984 
6985                 Status = insert_tree_item(Vcb, Vcb->uuid_root, searchkey.obj_id, searchkey.obj_type, searchkey.offset, ids, tp.item->size + sizeof(uint64_t), NULL, Irp);
6986                 if (!NT_SUCCESS(Status)) {
6987                     ERR("insert_tree_item returned %08x\n", Status);
6988                     ExFreePool(ids);
6989                     return Status;
6990                 }
6991             }
6992         } else {
6993             uint64_t* root_num;
6994 
6995             root_num = ExAllocatePoolWithTag(PagedPool, sizeof(uint64_t), ALLOC_TAG);
6996             if (!root_num) {
6997                 ERR("out of memory\n");
6998                 return STATUS_INSUFFICIENT_RESOURCES;
6999             }
7000 
7001             *root_num = r->id;
7002 
7003             Status = insert_tree_item(Vcb, Vcb->uuid_root, searchkey.obj_id, searchkey.obj_type, searchkey.offset, root_num, sizeof(uint64_t), NULL, Irp);
7004             if (!NT_SUCCESS(Status)) {
7005                 ERR("insert_tree_item returned %08x\n", Status);
7006                 ExFreePool(root_num);
7007                 return Status;
7008             }
7009         }
7010 
7011         r->received = false;
7012     }
7013 
7014     r->dirty = false;
7015 
7016     return STATUS_SUCCESS;
7017 }
7018 
7019 static NTSTATUS test_not_full(device_extension* Vcb) {
7020     uint64_t reserve, could_alloc, free_space;
7021     LIST_ENTRY* le;
7022 
7023     // This function ensures we drop into readonly mode if we're about to leave very little
7024     // space for metadata - this is similar to the "global reserve" of the Linux driver.
7025     // Otherwise we might completely fill our space, at which point due to COW we can't
7026     // delete anything in order to fix this.
7027 
7028     reserve = Vcb->extent_root->root_item.bytes_used;
7029     reserve += Vcb->root_root->root_item.bytes_used;
7030     if (Vcb->checksum_root) reserve += Vcb->checksum_root->root_item.bytes_used;
7031 
7032     reserve = max(reserve, 0x1000000); // 16 M
7033     reserve = min(reserve, 0x20000000); // 512 M
7034 
7035     // Find out how much space would be available for new metadata chunks
7036 
7037     could_alloc = 0;
7038 
7039     if (Vcb->metadata_flags & BLOCK_FLAG_RAID5) {
7040         uint64_t s1 = 0, s2 = 0, s3 = 0;
7041 
7042         le = Vcb->devices.Flink;
7043         while (le != &Vcb->devices) {
7044             device* dev = CONTAINING_RECORD(le, device, list_entry);
7045 
7046             if (!dev->readonly) {
7047                 uint64_t space = dev->devitem.num_bytes - dev->devitem.bytes_used;
7048 
7049                 if (space >= s1) {
7050                     s3 = s2;
7051                     s2 = s1;
7052                     s1 = space;
7053                 } else if (space >= s2) {
7054                     s3 = s2;
7055                     s2 = space;
7056                 } else if (space >= s3)
7057                     s3 = space;
7058             }
7059 
7060             le = le->Flink;
7061         }
7062 
7063         could_alloc = s3 * 2;
7064     } else if (Vcb->metadata_flags & (BLOCK_FLAG_RAID10 | BLOCK_FLAG_RAID6)) {
7065         uint64_t s1 = 0, s2 = 0, s3 = 0, s4 = 0;
7066 
7067         le = Vcb->devices.Flink;
7068         while (le != &Vcb->devices) {
7069             device* dev = CONTAINING_RECORD(le, device, list_entry);
7070 
7071             if (!dev->readonly) {
7072                 uint64_t space = dev->devitem.num_bytes - dev->devitem.bytes_used;
7073 
7074                 if (space >= s1) {
7075                     s4 = s3;
7076                     s3 = s2;
7077                     s2 = s1;
7078                     s1 = space;
7079                 } else if (space >= s2) {
7080                     s4 = s3;
7081                     s3 = s2;
7082                     s2 = space;
7083                 } else if (space >= s3) {
7084                     s4 = s3;
7085                     s3 = space;
7086                 } else if (space >= s4)
7087                     s4 = space;
7088             }
7089 
7090             le = le->Flink;
7091         }
7092 
7093         could_alloc = s4 * 2;
7094     } else if (Vcb->metadata_flags & (BLOCK_FLAG_RAID0 | BLOCK_FLAG_RAID1)) {
7095         uint64_t s1 = 0, s2 = 0;
7096 
7097         le = Vcb->devices.Flink;
7098         while (le != &Vcb->devices) {
7099             device* dev = CONTAINING_RECORD(le, device, list_entry);
7100 
7101             if (!dev->readonly) {
7102                 uint64_t space = dev->devitem.num_bytes - dev->devitem.bytes_used;
7103 
7104                 if (space >= s1) {
7105                     s2 = s1;
7106                     s1 = space;
7107                 } else if (space >= s2)
7108                     s2 = space;
7109             }
7110 
7111             le = le->Flink;
7112         }
7113 
7114         if (Vcb->metadata_flags & BLOCK_FLAG_RAID1)
7115             could_alloc = s2;
7116         else // RAID0
7117             could_alloc = s2 * 2;
7118     } else if (Vcb->metadata_flags & BLOCK_FLAG_DUPLICATE) {
7119         le = Vcb->devices.Flink;
7120         while (le != &Vcb->devices) {
7121             device* dev = CONTAINING_RECORD(le, device, list_entry);
7122 
7123             if (!dev->readonly) {
7124                 uint64_t space = (dev->devitem.num_bytes - dev->devitem.bytes_used) / 2;
7125 
7126                 could_alloc = max(could_alloc, space);
7127             }
7128 
7129             le = le->Flink;
7130         }
7131     } else { // SINGLE
7132         le = Vcb->devices.Flink;
7133         while (le != &Vcb->devices) {
7134             device* dev = CONTAINING_RECORD(le, device, list_entry);
7135 
7136             if (!dev->readonly) {
7137                 uint64_t space = dev->devitem.num_bytes - dev->devitem.bytes_used;
7138 
7139                 could_alloc = max(could_alloc, space);
7140             }
7141 
7142             le = le->Flink;
7143         }
7144     }
7145 
7146     if (could_alloc >= reserve)
7147         return STATUS_SUCCESS;
7148 
7149     free_space = 0;
7150 
7151     le = Vcb->chunks.Flink;
7152     while (le != &Vcb->chunks) {
7153         chunk* c = CONTAINING_RECORD(le, chunk, list_entry);
7154 
7155         if (!c->reloc && !c->readonly && c->chunk_item->type & BLOCK_FLAG_METADATA) {
7156             free_space += c->chunk_item->size - c->used;
7157 
7158             if (free_space + could_alloc >= reserve)
7159                 return STATUS_SUCCESS;
7160         }
7161 
7162         le = le->Flink;
7163     }
7164 
7165     return STATUS_DISK_FULL;
7166 }
7167 
7168 static NTSTATUS check_for_orphans_root(device_extension* Vcb, root* r, PIRP Irp) {
7169     NTSTATUS Status;
7170     KEY searchkey;
7171     traverse_ptr tp;
7172     LIST_ENTRY rollback;
7173 
7174     TRACE("(%p, %p)\n", Vcb, r);
7175 
7176     InitializeListHead(&rollback);
7177 
7178     searchkey.obj_id = BTRFS_ORPHAN_INODE_OBJID;
7179     searchkey.obj_type = TYPE_ORPHAN_INODE;
7180     searchkey.offset = 0;
7181 
7182     Status = find_item(Vcb, r, &tp, &searchkey, false, Irp);
7183     if (!NT_SUCCESS(Status)) {
7184         ERR("find_item returned %08x\n", Status);
7185         return Status;
7186     }
7187 
7188     do {
7189         traverse_ptr next_tp;
7190 
7191         if (tp.item->key.obj_id > searchkey.obj_id || (tp.item->key.obj_id == searchkey.obj_id && tp.item->key.obj_type > searchkey.obj_type))
7192             break;
7193 
7194         if (tp.item->key.obj_id == searchkey.obj_id && tp.item->key.obj_type == searchkey.obj_type) {
7195             fcb* fcb;
7196 
7197             TRACE("removing orphaned inode %I64x\n", tp.item->key.offset);
7198 
7199             Status = open_fcb(Vcb, r, tp.item->key.offset, 0, NULL, false, NULL, &fcb, PagedPool, Irp);
7200             if (!NT_SUCCESS(Status))
7201                 ERR("open_fcb returned %08x\n", Status);
7202             else {
7203                 if (fcb->inode_item.st_nlink == 0) {
7204                     if (fcb->type != BTRFS_TYPE_DIRECTORY && fcb->inode_item.st_size > 0) {
7205                         Status = excise_extents(Vcb, fcb, 0, sector_align(fcb->inode_item.st_size, Vcb->superblock.sector_size), Irp, &rollback);
7206                         if (!NT_SUCCESS(Status)) {
7207                             ERR("excise_extents returned %08x\n", Status);
7208                             goto end;
7209                         }
7210                     }
7211 
7212                     fcb->deleted = true;
7213 
7214                     mark_fcb_dirty(fcb);
7215                 }
7216 
7217                 free_fcb(fcb);
7218 
7219                 Status = delete_tree_item(Vcb, &tp);
7220                 if (!NT_SUCCESS(Status)) {
7221                     ERR("delete_tree_item returned %08x\n", Status);
7222                     goto end;
7223                 }
7224             }
7225         }
7226 
7227         if (find_next_item(Vcb, &tp, &next_tp, false, Irp))
7228             tp = next_tp;
7229         else
7230             break;
7231     } while (true);
7232 
7233     Status = STATUS_SUCCESS;
7234 
7235     clear_rollback(&rollback);
7236 
7237 end:
7238     do_rollback(Vcb, &rollback);
7239 
7240     return Status;
7241 }
7242 
7243 static NTSTATUS check_for_orphans(device_extension* Vcb, PIRP Irp) {
7244     NTSTATUS Status;
7245     LIST_ENTRY* le;
7246 
7247     if (IsListEmpty(&Vcb->dirty_filerefs))
7248         return STATUS_SUCCESS;
7249 
7250     le = Vcb->dirty_filerefs.Flink;
7251     while (le != &Vcb->dirty_filerefs) {
7252         file_ref* fr = CONTAINING_RECORD(le, file_ref, list_entry_dirty);
7253 
7254         if (!fr->fcb->subvol->checked_for_orphans) {
7255             Status = check_for_orphans_root(Vcb, fr->fcb->subvol, Irp);
7256             if (!NT_SUCCESS(Status)) {
7257                 ERR("check_for_orphans_root returned %08x\n", Status);
7258                 return Status;
7259             }
7260 
7261             fr->fcb->subvol->checked_for_orphans = true;
7262         }
7263 
7264         le = le->Flink;
7265     }
7266 
7267     return STATUS_SUCCESS;
7268 }
7269 
7270 static NTSTATUS do_write2(device_extension* Vcb, PIRP Irp, LIST_ENTRY* rollback) {
7271     NTSTATUS Status;
7272     LIST_ENTRY *le, batchlist;
7273     bool cache_changed = false;
7274     volume_device_extension* vde;
7275     bool no_cache = false;
7276 #ifdef DEBUG_FLUSH_TIMES
7277     uint64_t filerefs = 0, fcbs = 0;
7278     LARGE_INTEGER freq, time1, time2;
7279 #endif
7280 #ifdef DEBUG_WRITE_LOOPS
7281     UINT loops = 0;
7282 #endif
7283 
7284     TRACE("(%p)\n", Vcb);
7285 
7286     InitializeListHead(&batchlist);
7287 
7288 #ifdef DEBUG_FLUSH_TIMES
7289     time1 = KeQueryPerformanceCounter(&freq);
7290 #endif
7291 
7292     Status = check_for_orphans(Vcb, Irp);
7293     if (!NT_SUCCESS(Status)) {
7294         ERR("check_for_orphans returned %08x\n", Status);
7295         return Status;
7296     }
7297 
7298     ExAcquireResourceExclusiveLite(&Vcb->dirty_filerefs_lock, true);
7299 
7300     while (!IsListEmpty(&Vcb->dirty_filerefs)) {
7301         file_ref* fr = CONTAINING_RECORD(RemoveHeadList(&Vcb->dirty_filerefs), file_ref, list_entry_dirty);
7302 
7303         flush_fileref(fr, &batchlist, Irp);
7304         free_fileref(fr);
7305 
7306 #ifdef DEBUG_FLUSH_TIMES
7307         filerefs++;
7308 #endif
7309     }
7310 
7311     ExReleaseResourceLite(&Vcb->dirty_filerefs_lock);
7312 
7313     Status = commit_batch_list(Vcb, &batchlist, Irp);
7314     if (!NT_SUCCESS(Status)) {
7315         ERR("commit_batch_list returned %08x\n", Status);
7316         return Status;
7317     }
7318 
7319 #ifdef DEBUG_FLUSH_TIMES
7320     time2 = KeQueryPerformanceCounter(NULL);
7321 
7322     ERR("flushed %I64u filerefs in %I64u (freq = %I64u)\n", filerefs, time2.QuadPart - time1.QuadPart, freq.QuadPart);
7323 
7324     time1 = KeQueryPerformanceCounter(&freq);
7325 #endif
7326 
7327     // We process deleted streams first, so we don't run over our xattr
7328     // limit unless we absolutely have to.
7329     // We also process deleted normal files, to avoid any problems
7330     // caused by inode collisions.
7331 
7332     ExAcquireResourceExclusiveLite(&Vcb->dirty_fcbs_lock, true);
7333 
7334     le = Vcb->dirty_fcbs.Flink;
7335     while (le != &Vcb->dirty_fcbs) {
7336         fcb* fcb = CONTAINING_RECORD(le, struct _fcb, list_entry_dirty);
7337         LIST_ENTRY* le2 = le->Flink;
7338 
7339         if (fcb->deleted) {
7340             ExAcquireResourceExclusiveLite(fcb->Header.Resource, true);
7341             Status = flush_fcb(fcb, false, &batchlist, Irp);
7342             ExReleaseResourceLite(fcb->Header.Resource);
7343 
7344             free_fcb(fcb);
7345 
7346             if (!NT_SUCCESS(Status)) {
7347                 ERR("flush_fcb returned %08x\n", Status);
7348                 clear_batch_list(Vcb, &batchlist);
7349                 ExReleaseResourceLite(&Vcb->dirty_fcbs_lock);
7350                 return Status;
7351             }
7352 
7353 #ifdef DEBUG_FLUSH_TIMES
7354             fcbs++;
7355 #endif
7356         }
7357 
7358         le = le2;
7359     }
7360 
7361     Status = commit_batch_list(Vcb, &batchlist, Irp);
7362     if (!NT_SUCCESS(Status)) {
7363         ERR("commit_batch_list returned %08x\n", Status);
7364         ExReleaseResourceLite(&Vcb->dirty_fcbs_lock);
7365         return Status;
7366     }
7367 
7368     le = Vcb->dirty_fcbs.Flink;
7369     while (le != &Vcb->dirty_fcbs) {
7370         fcb* fcb = CONTAINING_RECORD(le, struct _fcb, list_entry_dirty);
7371         LIST_ENTRY* le2 = le->Flink;
7372 
7373         if (fcb->subvol != Vcb->root_root) {
7374             ExAcquireResourceExclusiveLite(fcb->Header.Resource, true);
7375             Status = flush_fcb(fcb, false, &batchlist, Irp);
7376             ExReleaseResourceLite(fcb->Header.Resource);
7377             free_fcb(fcb);
7378 
7379             if (!NT_SUCCESS(Status)) {
7380                 ERR("flush_fcb returned %08x\n", Status);
7381                 ExReleaseResourceLite(&Vcb->dirty_fcbs_lock);
7382                 return Status;
7383             }
7384 
7385 #ifdef DEBUG_FLUSH_TIMES
7386             fcbs++;
7387 #endif
7388         }
7389 
7390         le = le2;
7391     }
7392 
7393     ExReleaseResourceLite(&Vcb->dirty_fcbs_lock);
7394 
7395     Status = commit_batch_list(Vcb, &batchlist, Irp);
7396     if (!NT_SUCCESS(Status)) {
7397         ERR("commit_batch_list returned %08x\n", Status);
7398         return Status;
7399     }
7400 
7401 #ifdef DEBUG_FLUSH_TIMES
7402     time2 = KeQueryPerformanceCounter(NULL);
7403 
7404     ERR("flushed %I64u fcbs in %I64u (freq = %I64u)\n", filerefs, time2.QuadPart - time1.QuadPart, freq.QuadPart);
7405 #endif
7406 
7407     // no need to get dirty_subvols_lock here, as we have tree_lock exclusively
7408     while (!IsListEmpty(&Vcb->dirty_subvols)) {
7409         root* r = CONTAINING_RECORD(RemoveHeadList(&Vcb->dirty_subvols), root, list_entry_dirty);
7410 
7411         Status = flush_subvol(Vcb, r, Irp);
7412         if (!NT_SUCCESS(Status)) {
7413             ERR("flush_subvol returned %08x\n", Status);
7414             return Status;
7415         }
7416     }
7417 
7418     if (!IsListEmpty(&Vcb->drop_roots)) {
7419         Status = drop_roots(Vcb, Irp, rollback);
7420 
7421         if (!NT_SUCCESS(Status)) {
7422             ERR("drop_roots returned %08x\n", Status);
7423             return Status;
7424         }
7425     }
7426 
7427     Status = update_chunks(Vcb, &batchlist, Irp, rollback);
7428 
7429     if (!NT_SUCCESS(Status)) {
7430         ERR("update_chunks returned %08x\n", Status);
7431         return Status;
7432     }
7433 
7434     Status = commit_batch_list(Vcb, &batchlist, Irp);
7435 
7436     // If only changing superblock, e.g. changing label, we still need to rewrite
7437     // the root tree so the generations match, otherwise you won't be able to mount on Linux.
7438     if (!Vcb->root_root->treeholder.tree || !Vcb->root_root->treeholder.tree->write) {
7439         KEY searchkey;
7440 
7441         traverse_ptr tp;
7442 
7443         searchkey.obj_id = 0;
7444         searchkey.obj_type = 0;
7445         searchkey.offset = 0;
7446 
7447         Status = find_item(Vcb, Vcb->root_root, &tp, &searchkey, false, Irp);
7448         if (!NT_SUCCESS(Status)) {
7449             ERR("error - find_item returned %08x\n", Status);
7450             return Status;
7451         }
7452 
7453         Vcb->root_root->treeholder.tree->write = true;
7454     }
7455 
7456     // make sure we always update the extent tree
7457     Status = add_root_item_to_cache(Vcb, BTRFS_ROOT_EXTENT, Irp);
7458     if (!NT_SUCCESS(Status)) {
7459         ERR("add_root_item_to_cache returned %08x\n", Status);
7460         return Status;
7461     }
7462 
7463     if (Vcb->stats_changed) {
7464         le = Vcb->devices.Flink;
7465         while (le != &Vcb->devices) {
7466             device* dev = CONTAINING_RECORD(le, device, list_entry);
7467 
7468             if (dev->stats_changed) {
7469                 Status = flush_changed_dev_stats(Vcb, dev, Irp);
7470                 if (!NT_SUCCESS(Status)) {
7471                     ERR("flush_changed_dev_stats returned %08x\n", Status);
7472                     return Status;
7473                 }
7474                 dev->stats_changed = false;
7475             }
7476 
7477             le = le->Flink;
7478         }
7479 
7480         Vcb->stats_changed = false;
7481     }
7482 
7483     do {
7484         Status = add_parents(Vcb, Irp);
7485         if (!NT_SUCCESS(Status)) {
7486             ERR("add_parents returned %08x\n", Status);
7487             goto end;
7488         }
7489 
7490         Status = allocate_tree_extents(Vcb, Irp, rollback);
7491         if (!NT_SUCCESS(Status)) {
7492             ERR("allocate_tree_extents returned %08x\n", Status);
7493             goto end;
7494         }
7495 
7496         Status = do_splits(Vcb, Irp, rollback);
7497         if (!NT_SUCCESS(Status)) {
7498             ERR("do_splits returned %08x\n", Status);
7499             goto end;
7500         }
7501 
7502         Status = update_chunk_usage(Vcb, Irp, rollback);
7503         if (!NT_SUCCESS(Status)) {
7504             ERR("update_chunk_usage returned %08x\n", Status);
7505             goto end;
7506         }
7507 
7508         if (!(Vcb->superblock.compat_ro_flags & BTRFS_COMPAT_RO_FLAGS_FREE_SPACE_CACHE)) {
7509             if (!no_cache) {
7510                 Status = allocate_cache(Vcb, &cache_changed, Irp, rollback);
7511                 if (!NT_SUCCESS(Status)) {
7512                     WARN("allocate_cache returned %08x\n", Status);
7513                     no_cache = true;
7514                     cache_changed = false;
7515                 }
7516             }
7517         } else {
7518             Status = update_chunk_caches_tree(Vcb, Irp);
7519             if (!NT_SUCCESS(Status)) {
7520                 ERR("update_chunk_caches_tree returned %08x\n", Status);
7521                 goto end;
7522             }
7523         }
7524 
7525 #ifdef DEBUG_WRITE_LOOPS
7526         loops++;
7527 
7528         if (cache_changed)
7529             ERR("cache has changed, looping again\n");
7530 #endif
7531     } while (cache_changed || !trees_consistent(Vcb));
7532 
7533 #ifdef DEBUG_WRITE_LOOPS
7534     ERR("%u loops\n", loops);
7535 #endif
7536 
7537     TRACE("trees consistent\n");
7538 
7539     Status = update_root_root(Vcb, no_cache, Irp, rollback);
7540     if (!NT_SUCCESS(Status)) {
7541         ERR("update_root_root returned %08x\n", Status);
7542         goto end;
7543     }
7544 
7545     Status = write_trees(Vcb, Irp);
7546     if (!NT_SUCCESS(Status)) {
7547         ERR("write_trees returned %08x\n", Status);
7548         goto end;
7549     }
7550 
7551     Status = test_not_full(Vcb);
7552     if (!NT_SUCCESS(Status)) {
7553         ERR("test_not_full returned %08x\n", Status);
7554         goto end;
7555     }
7556 
7557 #ifdef DEBUG_PARANOID
7558     le = Vcb->trees.Flink;
7559     while (le != &Vcb->trees) {
7560         tree* t = CONTAINING_RECORD(le, tree, list_entry);
7561         KEY searchkey;
7562         traverse_ptr tp;
7563 
7564         searchkey.obj_id = t->header.address;
7565         searchkey.obj_type = TYPE_METADATA_ITEM;
7566         searchkey.offset = 0xffffffffffffffff;
7567 
7568         Status = find_item(Vcb, Vcb->extent_root, &tp, &searchkey, false, Irp);
7569         if (!NT_SUCCESS(Status)) {
7570             ERR("error - find_item returned %08x\n", Status);
7571             goto end;
7572         }
7573 
7574         if (tp.item->key.obj_id != searchkey.obj_id || tp.item->key.obj_type != searchkey.obj_type) {
7575             searchkey.obj_id = t->header.address;
7576             searchkey.obj_type = TYPE_EXTENT_ITEM;
7577             searchkey.offset = 0xffffffffffffffff;
7578 
7579             Status = find_item(Vcb, Vcb->extent_root, &tp, &searchkey, false, Irp);
7580             if (!NT_SUCCESS(Status)) {
7581                 ERR("error - find_item returned %08x\n", Status);
7582                 goto end;
7583             }
7584 
7585             if (tp.item->key.obj_id != searchkey.obj_id || tp.item->key.obj_type != searchkey.obj_type) {
7586                 ERR("error - could not find entry in extent tree for tree at %I64x\n", t->header.address);
7587                 Status = STATUS_INTERNAL_ERROR;
7588                 goto end;
7589             }
7590         }
7591 
7592         le = le->Flink;
7593     }
7594 #endif
7595 
7596     Vcb->superblock.cache_generation = Vcb->superblock.generation;
7597 
7598     if (!Vcb->options.no_barrier)
7599         flush_disk_caches(Vcb);
7600 
7601     Status = write_superblocks(Vcb, Irp);
7602     if (!NT_SUCCESS(Status)) {
7603         ERR("write_superblocks returned %08x\n", Status);
7604         goto end;
7605     }
7606 
7607     vde = Vcb->vde;
7608 
7609     if (vde) {
7610         pdo_device_extension* pdode = vde->pdode;
7611 
7612         ExAcquireResourceSharedLite(&pdode->child_lock, true);
7613 
7614         le = pdode->children.Flink;
7615 
7616         while (le != &pdode->children) {
7617             volume_child* vc = CONTAINING_RECORD(le, volume_child, list_entry);
7618 
7619             vc->generation = Vcb->superblock.generation;
7620             le = le->Flink;
7621         }
7622 
7623         ExReleaseResourceLite(&pdode->child_lock);
7624     }
7625 
7626     clean_space_cache(Vcb);
7627 
7628     le = Vcb->chunks.Flink;
7629     while (le != &Vcb->chunks) {
7630         chunk* c = CONTAINING_RECORD(le, chunk, list_entry);
7631 
7632         c->changed = false;
7633         c->space_changed = false;
7634 
7635         le = le->Flink;
7636     }
7637 
7638     Vcb->superblock.generation++;
7639 
7640     Status = STATUS_SUCCESS;
7641 
7642     le = Vcb->trees.Flink;
7643     while (le != &Vcb->trees) {
7644         tree* t = CONTAINING_RECORD(le, tree, list_entry);
7645 
7646         t->write = false;
7647 
7648         le = le->Flink;
7649     }
7650 
7651     Vcb->need_write = false;
7652 
7653     while (!IsListEmpty(&Vcb->drop_roots)) {
7654         root* r = CONTAINING_RECORD(RemoveHeadList(&Vcb->drop_roots), root, list_entry);
7655 
7656         ExDeleteResourceLite(&r->nonpaged->load_tree_lock);
7657         ExFreePool(r->nonpaged);
7658         ExFreePool(r);
7659     }
7660 
7661 end:
7662     TRACE("do_write returning %08x\n", Status);
7663 
7664     return Status;
7665 }
7666 
7667 NTSTATUS do_write(device_extension* Vcb, PIRP Irp) {
7668     LIST_ENTRY rollback;
7669     NTSTATUS Status;
7670 
7671     InitializeListHead(&rollback);
7672 
7673     Status = do_write2(Vcb, Irp, &rollback);
7674 
7675     if (!NT_SUCCESS(Status)) {
7676         ERR("do_write2 returned %08x, dropping into readonly mode\n", Status);
7677         Vcb->readonly = true;
7678         FsRtlNotifyVolumeEvent(Vcb->root_file, FSRTL_VOLUME_FORCED_CLOSED);
7679         do_rollback(Vcb, &rollback);
7680     } else
7681         clear_rollback(&rollback);
7682 
7683     return Status;
7684 }
7685 
7686 static void do_flush(device_extension* Vcb) {
7687     NTSTATUS Status;
7688 
7689     ExAcquireResourceExclusiveLite(&Vcb->tree_lock, true);
7690 
7691     if (Vcb->need_write && !Vcb->readonly)
7692         Status = do_write(Vcb, NULL);
7693     else
7694         Status = STATUS_SUCCESS;
7695 
7696     free_trees(Vcb);
7697 
7698     if (!NT_SUCCESS(Status))
7699         ERR("do_write returned %08x\n", Status);
7700 
7701     ExReleaseResourceLite(&Vcb->tree_lock);
7702 }
7703 
7704 _Function_class_(KSTART_ROUTINE)
7705 void __stdcall flush_thread(void* context) {
7706     DEVICE_OBJECT* devobj = context;
7707     device_extension* Vcb = devobj->DeviceExtension;
7708     LARGE_INTEGER due_time;
7709 
7710     ObReferenceObject(devobj);
7711 
7712     KeInitializeTimer(&Vcb->flush_thread_timer);
7713 
7714     due_time.QuadPart = (uint64_t)Vcb->options.flush_interval * -10000000;
7715 
7716     KeSetTimer(&Vcb->flush_thread_timer, due_time, NULL);
7717 
7718     while (true) {
7719         KeWaitForSingleObject(&Vcb->flush_thread_timer, Executive, KernelMode, false, NULL);
7720 
7721         if (!(devobj->Vpb->Flags & VPB_MOUNTED) || Vcb->removing)
7722             break;
7723 
7724         if (!Vcb->locked)
7725             do_flush(Vcb);
7726 
7727         KeSetTimer(&Vcb->flush_thread_timer, due_time, NULL);
7728     }
7729 
7730     ObDereferenceObject(devobj);
7731     KeCancelTimer(&Vcb->flush_thread_timer);
7732 
7733     KeSetEvent(&Vcb->flush_thread_finished, 0, false);
7734 
7735     PsTerminateSystemThread(STATUS_SUCCESS);
7736 }
7737