1 /* Copyright (c) Mark Harmstone 2016-17
2  *
3  * This file is part of WinBtrfs.
4  *
5  * WinBtrfs is free software: you can redistribute it and/or modify
6  * it under the terms of the GNU Lesser General Public Licence as published by
7  * the Free Software Foundation, either version 3 of the Licence, or
8  * (at your option) any later version.
9  *
10  * WinBtrfs is distributed in the hope that it will be useful,
11  * but WITHOUT ANY WARRANTY; without even the implied warranty of
12  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
13  * GNU Lesser General Public Licence for more details.
14  *
15  * You should have received a copy of the GNU Lesser General Public Licence
16  * along with WinBtrfs.  If not, see <http://www.gnu.org/licenses/>. */
17 
18 #include "btrfs_drv.h"
19 #include <ata.h>
20 #include <ntddscsi.h>
21 #include <ntddstor.h>
22 
23 #define MAX_CSUM_SIZE (4096 - sizeof(tree_header) - sizeof(leaf_node))
24 
25 // #define DEBUG_WRITE_LOOPS
26 
27 typedef struct {
28     KEVENT Event;
29     IO_STATUS_BLOCK iosb;
30 } write_context;
31 
32 typedef struct {
33     EXTENT_ITEM_TREE eit;
34     UINT8 type;
35     TREE_BLOCK_REF tbr;
36 } EXTENT_ITEM_TREE2;
37 
38 typedef struct {
39     EXTENT_ITEM ei;
40     UINT8 type;
41     TREE_BLOCK_REF tbr;
42 } EXTENT_ITEM_SKINNY_METADATA;
43 
44 static NTSTATUS create_chunk(device_extension* Vcb, chunk* c, PIRP Irp);
45 static NTSTATUS update_tree_extents(device_extension* Vcb, tree* t, PIRP Irp, LIST_ENTRY* rollback);
46 
47 #ifndef _MSC_VER // not in mingw yet
48 #define DEVICE_DSM_FLAG_TRIM_NOT_FS_ALLOCATED 0x80000000
49 #endif
50 
51 _Function_class_(IO_COMPLETION_ROUTINE)
52 #ifdef __REACTOS__
53 static NTSTATUS NTAPI write_completion(PDEVICE_OBJECT DeviceObject, PIRP Irp, PVOID conptr) {
54 #else
55 static NTSTATUS write_completion(PDEVICE_OBJECT DeviceObject, PIRP Irp, PVOID conptr) {
56 #endif
57     write_context* context = conptr;
58 
59     UNUSED(DeviceObject);
60 
61     context->iosb = Irp->IoStatus;
62     KeSetEvent(&context->Event, 0, FALSE);
63 
64     return STATUS_MORE_PROCESSING_REQUIRED;
65 }
66 
67 NTSTATUS write_data_phys(_In_ PDEVICE_OBJECT device, _In_ UINT64 address, _In_reads_bytes_(length) void* data, _In_ UINT32 length) {
68     NTSTATUS Status;
69     LARGE_INTEGER offset;
70     PIRP Irp;
71     PIO_STACK_LOCATION IrpSp;
72     write_context context;
73 
74     TRACE("(%p, %llx, %p, %x)\n", device, address, data, length);
75 
76     RtlZeroMemory(&context, sizeof(write_context));
77 
78     KeInitializeEvent(&context.Event, NotificationEvent, FALSE);
79 
80     offset.QuadPart = address;
81 
82     Irp = IoAllocateIrp(device->StackSize, FALSE);
83 
84     if (!Irp) {
85         ERR("IoAllocateIrp failed\n");
86         return STATUS_INSUFFICIENT_RESOURCES;
87     }
88 
89     IrpSp = IoGetNextIrpStackLocation(Irp);
90     IrpSp->MajorFunction = IRP_MJ_WRITE;
91 
92     if (device->Flags & DO_BUFFERED_IO) {
93         Irp->AssociatedIrp.SystemBuffer = data;
94 
95         Irp->Flags = IRP_BUFFERED_IO;
96     } else if (device->Flags & DO_DIRECT_IO) {
97         Irp->MdlAddress = IoAllocateMdl(data, length, FALSE, FALSE, NULL);
98         if (!Irp->MdlAddress) {
99             DbgPrint("IoAllocateMdl failed\n");
100             Status = STATUS_INSUFFICIENT_RESOURCES;
101             goto exit;
102         }
103 
104         Status = STATUS_SUCCESS;
105 
106         _SEH2_TRY {
107             MmProbeAndLockPages(Irp->MdlAddress, KernelMode, IoReadAccess);
108         } _SEH2_EXCEPT (EXCEPTION_EXECUTE_HANDLER) {
109             Status = _SEH2_GetExceptionCode();
110         } _SEH2_END;
111 
112         if (!NT_SUCCESS(Status)) {
113             ERR("MmProbeAndLockPages threw exception %08x\n", Status);
114             IoFreeMdl(Irp->MdlAddress);
115             goto exit;
116         }
117     } else {
118         Irp->UserBuffer = data;
119     }
120 
121     IrpSp->Parameters.Write.Length = length;
122     IrpSp->Parameters.Write.ByteOffset = offset;
123 
124     Irp->UserIosb = &context.iosb;
125 
126     Irp->UserEvent = &context.Event;
127 
128     IoSetCompletionRoutine(Irp, write_completion, &context, TRUE, TRUE, TRUE);
129 
130     Status = IoCallDriver(device, Irp);
131 
132     if (Status == STATUS_PENDING) {
133         KeWaitForSingleObject(&context.Event, Executive, KernelMode, FALSE, NULL);
134         Status = context.iosb.Status;
135     }
136 
137     if (!NT_SUCCESS(Status)) {
138         ERR("IoCallDriver returned %08x\n", Status);
139     }
140 
141     if (device->Flags & DO_DIRECT_IO) {
142         MmUnlockPages(Irp->MdlAddress);
143         IoFreeMdl(Irp->MdlAddress);
144     }
145 
146 exit:
147     IoFreeIrp(Irp);
148 
149     return Status;
150 }
151 
152 static void add_trim_entry(device* dev, UINT64 address, UINT64 size) {
153     space* s = ExAllocatePoolWithTag(PagedPool, sizeof(space), ALLOC_TAG);
154     if (!s) {
155         ERR("out of memory\n");
156         return;
157     }
158 
159     s->address = address;
160     s->size = size;
161     dev->num_trim_entries++;
162 
163     InsertTailList(&dev->trim_list, &s->list_entry);
164 }
165 
166 static void clean_space_cache_chunk(device_extension* Vcb, chunk* c) {
167     ULONG type;
168 
169     if (Vcb->trim && !Vcb->options.no_trim) {
170         if (c->chunk_item->type & BLOCK_FLAG_DUPLICATE)
171             type = BLOCK_FLAG_DUPLICATE;
172         else if (c->chunk_item->type & BLOCK_FLAG_RAID0)
173             type = BLOCK_FLAG_RAID0;
174         else if (c->chunk_item->type & BLOCK_FLAG_RAID1)
175             type = BLOCK_FLAG_DUPLICATE;
176         else if (c->chunk_item->type & BLOCK_FLAG_RAID10)
177             type = BLOCK_FLAG_RAID10;
178         else if (c->chunk_item->type & BLOCK_FLAG_RAID5)
179             type = BLOCK_FLAG_RAID5;
180         else if (c->chunk_item->type & BLOCK_FLAG_RAID6)
181             type = BLOCK_FLAG_RAID6;
182         else // SINGLE
183             type = BLOCK_FLAG_DUPLICATE;
184     }
185 
186     while (!IsListEmpty(&c->deleting)) {
187         space* s = CONTAINING_RECORD(c->deleting.Flink, space, list_entry);
188 
189         if (Vcb->trim && !Vcb->options.no_trim && (!Vcb->options.no_barrier || !(c->chunk_item->type & BLOCK_FLAG_METADATA))) {
190             CHUNK_ITEM_STRIPE* cis = (CHUNK_ITEM_STRIPE*)&c->chunk_item[1];
191 
192             if (type == BLOCK_FLAG_DUPLICATE) {
193                 UINT16 i;
194 
195                 for (i = 0; i < c->chunk_item->num_stripes; i++) {
196                     if (c->devices[i] && c->devices[i]->devobj && !c->devices[i]->readonly && c->devices[i]->trim)
197                         add_trim_entry(c->devices[i], s->address - c->offset + cis[i].offset, s->size);
198                 }
199             } else if (type == BLOCK_FLAG_RAID0) {
200                 UINT64 startoff, endoff;
201                 UINT16 startoffstripe, endoffstripe, i;
202 
203                 get_raid0_offset(s->address - c->offset, c->chunk_item->stripe_length, c->chunk_item->num_stripes, &startoff, &startoffstripe);
204                 get_raid0_offset(s->address - c->offset + s->size - 1, c->chunk_item->stripe_length, c->chunk_item->num_stripes, &endoff, &endoffstripe);
205 
206                 for (i = 0; i < c->chunk_item->num_stripes; i++) {
207                     if (c->devices[i] && c->devices[i]->devobj && !c->devices[i]->readonly && c->devices[i]->trim) {
208                         UINT64 stripestart, stripeend;
209 
210                         if (startoffstripe > i)
211                             stripestart = startoff - (startoff % c->chunk_item->stripe_length) + c->chunk_item->stripe_length;
212                         else if (startoffstripe == i)
213                             stripestart = startoff;
214                         else
215                             stripestart = startoff - (startoff % c->chunk_item->stripe_length);
216 
217                         if (endoffstripe > i)
218                             stripeend = endoff - (endoff % c->chunk_item->stripe_length) + c->chunk_item->stripe_length;
219                         else if (endoffstripe == i)
220                             stripeend = endoff + 1;
221                         else
222                             stripeend = endoff - (endoff % c->chunk_item->stripe_length);
223 
224                         if (stripestart != stripeend)
225                             add_trim_entry(c->devices[i], stripestart + cis[i].offset, stripeend - stripestart);
226                     }
227                 }
228             } else if (type == BLOCK_FLAG_RAID10) {
229                 UINT64 startoff, endoff;
230                 UINT16 sub_stripes, startoffstripe, endoffstripe, i;
231 
232                 sub_stripes = max(1, c->chunk_item->sub_stripes);
233 
234                 get_raid0_offset(s->address - c->offset, c->chunk_item->stripe_length, c->chunk_item->num_stripes / sub_stripes, &startoff, &startoffstripe);
235                 get_raid0_offset(s->address - c->offset + s->size - 1, c->chunk_item->stripe_length, c->chunk_item->num_stripes / sub_stripes, &endoff, &endoffstripe);
236 
237                 startoffstripe *= sub_stripes;
238                 endoffstripe *= sub_stripes;
239 
240                 for (i = 0; i < c->chunk_item->num_stripes; i += sub_stripes) {
241                     ULONG j;
242                     UINT64 stripestart, stripeend;
243 
244                     if (startoffstripe > i)
245                         stripestart = startoff - (startoff % c->chunk_item->stripe_length) + c->chunk_item->stripe_length;
246                     else if (startoffstripe == i)
247                         stripestart = startoff;
248                     else
249                         stripestart = startoff - (startoff % c->chunk_item->stripe_length);
250 
251                     if (endoffstripe > i)
252                         stripeend = endoff - (endoff % c->chunk_item->stripe_length) + c->chunk_item->stripe_length;
253                     else if (endoffstripe == i)
254                         stripeend = endoff + 1;
255                     else
256                         stripeend = endoff - (endoff % c->chunk_item->stripe_length);
257 
258                     if (stripestart != stripeend) {
259                         for (j = 0; j < sub_stripes; j++) {
260                             if (c->devices[i+j] && c->devices[i+j]->devobj && !c->devices[i+j]->readonly && c->devices[i+j]->trim)
261                                 add_trim_entry(c->devices[i+j], stripestart + cis[i+j].offset, stripeend - stripestart);
262                         }
263                     }
264                 }
265             }
266             // FIXME - RAID5(?), RAID6(?)
267         }
268 
269         RemoveEntryList(&s->list_entry);
270         ExFreePool(s);
271     }
272 }
273 
274 typedef struct {
275     DEVICE_MANAGE_DATA_SET_ATTRIBUTES* dmdsa;
276     ATA_PASS_THROUGH_EX apte;
277     PIRP Irp;
278     IO_STATUS_BLOCK iosb;
279 } ioctl_context_stripe;
280 
281 typedef struct {
282     KEVENT Event;
283     LONG left;
284     ioctl_context_stripe* stripes;
285 } ioctl_context;
286 
287 _Function_class_(IO_COMPLETION_ROUTINE)
288 #ifdef __REACTOS__
289 static NTSTATUS NTAPI ioctl_completion(PDEVICE_OBJECT DeviceObject, PIRP Irp, PVOID conptr) {
290 #else
291 static NTSTATUS ioctl_completion(PDEVICE_OBJECT DeviceObject, PIRP Irp, PVOID conptr) {
292 #endif
293     ioctl_context* context = (ioctl_context*)conptr;
294     LONG left2 = InterlockedDecrement(&context->left);
295 
296     UNUSED(DeviceObject);
297     UNUSED(Irp);
298 
299     if (left2 == 0)
300         KeSetEvent(&context->Event, 0, FALSE);
301 
302     return STATUS_MORE_PROCESSING_REQUIRED;
303 }
304 
305 static void clean_space_cache(device_extension* Vcb) {
306     LIST_ENTRY* le;
307     chunk* c;
308     ULONG num;
309 
310     TRACE("(%p)\n", Vcb);
311 
312     ExAcquireResourceSharedLite(&Vcb->chunk_lock, TRUE);
313 
314     le = Vcb->chunks.Flink;
315     while (le != &Vcb->chunks) {
316         c = CONTAINING_RECORD(le, chunk, list_entry);
317 
318         if (c->space_changed) {
319             ExAcquireResourceExclusiveLite(&c->lock, TRUE);
320 
321             if (c->space_changed)
322                 clean_space_cache_chunk(Vcb, c);
323 
324             c->space_changed = FALSE;
325 
326             ExReleaseResourceLite(&c->lock);
327         }
328 
329         le = le->Flink;
330     }
331 
332     ExReleaseResourceLite(&Vcb->chunk_lock);
333 
334     if (Vcb->trim && !Vcb->options.no_trim) {
335         ioctl_context context;
336         ULONG total_num;
337 
338         context.left = 0;
339 
340         le = Vcb->devices.Flink;
341         while (le != &Vcb->devices) {
342             device* dev = CONTAINING_RECORD(le, device, list_entry);
343 
344             if (dev->devobj && !dev->readonly && dev->trim && dev->num_trim_entries > 0)
345                 context.left++;
346 
347             le = le->Flink;
348         }
349 
350         if (context.left == 0)
351             return;
352 
353         total_num = context.left;
354         num = 0;
355 
356         KeInitializeEvent(&context.Event, NotificationEvent, FALSE);
357 
358         context.stripes = ExAllocatePoolWithTag(NonPagedPool, sizeof(ioctl_context_stripe) * context.left, ALLOC_TAG);
359         if (!context.stripes) {
360             ERR("out of memory\n");
361             return;
362         }
363 
364         RtlZeroMemory(context.stripes, sizeof(ioctl_context_stripe) * context.left);
365 
366         le = Vcb->devices.Flink;
367         while (le != &Vcb->devices) {
368             device* dev = CONTAINING_RECORD(le, device, list_entry);
369 
370             if (dev->devobj && !dev->readonly && dev->trim && dev->num_trim_entries > 0) {
371                 LIST_ENTRY* le2;
372                 ioctl_context_stripe* stripe = &context.stripes[num];
373                 DEVICE_DATA_SET_RANGE* ranges;
374                 ULONG datalen = (ULONG)sector_align(sizeof(DEVICE_MANAGE_DATA_SET_ATTRIBUTES), sizeof(UINT64)) + (dev->num_trim_entries * sizeof(DEVICE_DATA_SET_RANGE)), i;
375                 PIO_STACK_LOCATION IrpSp;
376 
377                 stripe->dmdsa = ExAllocatePoolWithTag(PagedPool, datalen, ALLOC_TAG);
378                 if (!stripe->dmdsa) {
379                     ERR("out of memory\n");
380                     goto nextdev;
381                 }
382 
383                 stripe->dmdsa->Size = sizeof(DEVICE_MANAGE_DATA_SET_ATTRIBUTES);
384                 stripe->dmdsa->Action = DeviceDsmAction_Trim;
385                 stripe->dmdsa->Flags = DEVICE_DSM_FLAG_TRIM_NOT_FS_ALLOCATED;
386                 stripe->dmdsa->ParameterBlockOffset = 0;
387                 stripe->dmdsa->ParameterBlockLength = 0;
388                 stripe->dmdsa->DataSetRangesOffset = (ULONG)sector_align(sizeof(DEVICE_MANAGE_DATA_SET_ATTRIBUTES), sizeof(UINT64));
389                 stripe->dmdsa->DataSetRangesLength = dev->num_trim_entries * sizeof(DEVICE_DATA_SET_RANGE);
390 
391                 ranges = (DEVICE_DATA_SET_RANGE*)((UINT8*)stripe->dmdsa + stripe->dmdsa->DataSetRangesOffset);
392 
393                 i = 0;
394 
395                 le2 = dev->trim_list.Flink;
396                 while (le2 != &dev->trim_list) {
397                     space* s = CONTAINING_RECORD(le2, space, list_entry);
398 
399                     ranges[i].StartingOffset = s->address;
400                     ranges[i].LengthInBytes = s->size;
401                     i++;
402 
403                     le2 = le2->Flink;
404                 }
405 
406                 stripe->Irp = IoAllocateIrp(dev->devobj->StackSize, FALSE);
407 
408                 if (!stripe->Irp) {
409                     ERR("IoAllocateIrp failed\n");
410                     goto nextdev;
411                 }
412 
413                 IrpSp = IoGetNextIrpStackLocation(stripe->Irp);
414                 IrpSp->MajorFunction = IRP_MJ_DEVICE_CONTROL;
415 
416                 IrpSp->Parameters.DeviceIoControl.IoControlCode = IOCTL_STORAGE_MANAGE_DATA_SET_ATTRIBUTES;
417                 IrpSp->Parameters.DeviceIoControl.InputBufferLength = datalen;
418                 IrpSp->Parameters.DeviceIoControl.OutputBufferLength = 0;
419 
420                 stripe->Irp->AssociatedIrp.SystemBuffer = stripe->dmdsa;
421                 stripe->Irp->Flags |= IRP_BUFFERED_IO;
422                 stripe->Irp->UserBuffer = NULL;
423                 stripe->Irp->UserIosb = &stripe->iosb;
424 
425                 IoSetCompletionRoutine(stripe->Irp, ioctl_completion, &context, TRUE, TRUE, TRUE);
426 
427                 IoCallDriver(dev->devobj, stripe->Irp);
428 
429 nextdev:
430                 while (!IsListEmpty(&dev->trim_list)) {
431                     space* s = CONTAINING_RECORD(RemoveHeadList(&dev->trim_list), space, list_entry);
432                     ExFreePool(s);
433                 }
434 
435                 dev->num_trim_entries = 0;
436 
437                 num++;
438             }
439 
440             le = le->Flink;
441         }
442 
443         KeWaitForSingleObject(&context.Event, Executive, KernelMode, FALSE, NULL);
444 
445         for (num = 0; num < total_num; num++) {
446             if (context.stripes[num].dmdsa)
447                 ExFreePool(context.stripes[num].dmdsa);
448         }
449 
450         ExFreePool(context.stripes);
451     }
452 }
453 
454 static BOOL trees_consistent(device_extension* Vcb) {
455     ULONG maxsize = Vcb->superblock.node_size - sizeof(tree_header);
456     LIST_ENTRY* le;
457 
458     le = Vcb->trees.Flink;
459     while (le != &Vcb->trees) {
460         tree* t = CONTAINING_RECORD(le, tree, list_entry);
461 
462         if (t->write) {
463             if (t->header.num_items == 0 && t->parent) {
464 #ifdef DEBUG_WRITE_LOOPS
465                 ERR("empty tree found, looping again\n");
466 #endif
467                 return FALSE;
468             }
469 
470             if (t->size > maxsize) {
471 #ifdef DEBUG_WRITE_LOOPS
472                 ERR("overlarge tree found (%u > %u), looping again\n", t->size, maxsize);
473 #endif
474                 return FALSE;
475             }
476 
477             if (!t->has_new_address) {
478 #ifdef DEBUG_WRITE_LOOPS
479                 ERR("tree found without new address, looping again\n");
480 #endif
481                 return FALSE;
482             }
483         }
484 
485         le = le->Flink;
486     }
487 
488     return TRUE;
489 }
490 
491 static NTSTATUS add_parents(device_extension* Vcb, PIRP Irp) {
492     ULONG level;
493     LIST_ENTRY* le;
494 
495     for (level = 0; level <= 255; level++) {
496         BOOL nothing_found = TRUE;
497 
498         TRACE("level = %u\n", level);
499 
500         le = Vcb->trees.Flink;
501         while (le != &Vcb->trees) {
502             tree* t = CONTAINING_RECORD(le, tree, list_entry);
503 
504             if (t->write && t->header.level == level) {
505                 TRACE("tree %p: root = %llx, level = %x, parent = %p\n", t, t->header.tree_id, t->header.level, t->parent);
506 
507                 nothing_found = FALSE;
508 
509                 if (t->parent) {
510                     if (!t->parent->write)
511                         TRACE("adding tree %p (level %x)\n", t->parent, t->header.level);
512 
513                     t->parent->write = TRUE;
514                 } else if (t->root != Vcb->root_root && t->root != Vcb->chunk_root) {
515                     KEY searchkey;
516                     traverse_ptr tp;
517                     NTSTATUS Status;
518 
519                     searchkey.obj_id = t->root->id;
520                     searchkey.obj_type = TYPE_ROOT_ITEM;
521                     searchkey.offset = 0xffffffffffffffff;
522 
523                     Status = find_item(Vcb, Vcb->root_root, &tp, &searchkey, FALSE, Irp);
524                     if (!NT_SUCCESS(Status)) {
525                         ERR("error - find_item returned %08x\n", Status);
526                         return Status;
527                     }
528 
529                     if (tp.item->key.obj_id != searchkey.obj_id || tp.item->key.obj_type != searchkey.obj_type) {
530                         ERR("could not find ROOT_ITEM for tree %llx\n", searchkey.obj_id);
531                         return STATUS_INTERNAL_ERROR;
532                     }
533 
534                     if (tp.item->size < sizeof(ROOT_ITEM)) { // if not full length, delete and create new entry
535                         ROOT_ITEM* ri = ExAllocatePoolWithTag(PagedPool, sizeof(ROOT_ITEM), ALLOC_TAG);
536 
537                         if (!ri) {
538                             ERR("out of memory\n");
539                             return STATUS_INSUFFICIENT_RESOURCES;
540                         }
541 
542                         RtlCopyMemory(ri, &t->root->root_item, sizeof(ROOT_ITEM));
543 
544                         Status = delete_tree_item(Vcb, &tp);
545                         if (!NT_SUCCESS(Status)) {
546                             ERR("delete_tree_item returned %08x\n", Status);
547                             ExFreePool(ri);
548                             return Status;
549                         }
550 
551                         Status = insert_tree_item(Vcb, Vcb->root_root, tp.item->key.obj_id, tp.item->key.obj_type, tp.item->key.offset, ri, sizeof(ROOT_ITEM), NULL, Irp);
552                         if (!NT_SUCCESS(Status)) {
553                             ERR("insert_tree_item returned %08x\n", Status);
554                             ExFreePool(ri);
555                             return Status;
556                         }
557                     }
558                 }
559             }
560 
561             le = le->Flink;
562         }
563 
564         if (nothing_found)
565             break;
566     }
567 
568     return STATUS_SUCCESS;
569 }
570 
571 static void add_parents_to_cache(tree* t) {
572     while (t->parent) {
573         t = t->parent;
574         t->write = TRUE;
575     }
576 }
577 
578 static BOOL insert_tree_extent_skinny(device_extension* Vcb, UINT8 level, UINT64 root_id, chunk* c, UINT64 address, PIRP Irp, LIST_ENTRY* rollback) {
579     NTSTATUS Status;
580     EXTENT_ITEM_SKINNY_METADATA* eism;
581     traverse_ptr insert_tp;
582 
583     eism = ExAllocatePoolWithTag(PagedPool, sizeof(EXTENT_ITEM_SKINNY_METADATA), ALLOC_TAG);
584     if (!eism) {
585         ERR("out of memory\n");
586         return FALSE;
587     }
588 
589     eism->ei.refcount = 1;
590     eism->ei.generation = Vcb->superblock.generation;
591     eism->ei.flags = EXTENT_ITEM_TREE_BLOCK;
592     eism->type = TYPE_TREE_BLOCK_REF;
593     eism->tbr.offset = root_id;
594 
595     Status = insert_tree_item(Vcb, Vcb->extent_root, address, TYPE_METADATA_ITEM, level, eism, sizeof(EXTENT_ITEM_SKINNY_METADATA), &insert_tp, Irp);
596     if (!NT_SUCCESS(Status)) {
597         ERR("insert_tree_item returned %08x\n", Status);
598         ExFreePool(eism);
599         return FALSE;
600     }
601 
602     ExAcquireResourceExclusiveLite(&c->lock, TRUE);
603 
604     space_list_subtract(c, FALSE, address, Vcb->superblock.node_size, rollback);
605 
606     ExReleaseResourceLite(&c->lock);
607 
608     add_parents_to_cache(insert_tp.tree);
609 
610     return TRUE;
611 }
612 
613 BOOL find_metadata_address_in_chunk(device_extension* Vcb, chunk* c, UINT64* address) {
614     LIST_ENTRY* le;
615     space* s;
616 
617     TRACE("(%p, %llx, %p)\n", Vcb, c->offset, address);
618 
619     if (Vcb->superblock.node_size > c->chunk_item->size - c->used)
620         return FALSE;
621 
622     if (!c->cache_loaded) {
623         NTSTATUS Status = load_cache_chunk(Vcb, c, NULL);
624 
625         if (!NT_SUCCESS(Status)) {
626             ERR("load_cache_chunk returned %08x\n", Status);
627             return FALSE;
628         }
629     }
630 
631     if (IsListEmpty(&c->space_size))
632         return FALSE;
633 
634     if (!c->last_alloc_set) {
635         s = CONTAINING_RECORD(c->space.Blink, space, list_entry);
636 
637         c->last_alloc = s->address;
638         c->last_alloc_set = TRUE;
639 
640         if (s->size >= Vcb->superblock.node_size) {
641             *address = s->address;
642             c->last_alloc += Vcb->superblock.node_size;
643             return TRUE;
644         }
645     }
646 
647     le = c->space.Flink;
648     while (le != &c->space) {
649         s = CONTAINING_RECORD(le, space, list_entry);
650 
651         if (s->address <= c->last_alloc && s->address + s->size >= c->last_alloc + Vcb->superblock.node_size) {
652             *address = c->last_alloc;
653             c->last_alloc += Vcb->superblock.node_size;
654             return TRUE;
655         }
656 
657         le = le->Flink;
658     }
659 
660     le = c->space_size.Flink;
661     while (le != &c->space_size) {
662         s = CONTAINING_RECORD(le, space, list_entry_size);
663 
664         if (s->size == Vcb->superblock.node_size) {
665             *address = s->address;
666             c->last_alloc = s->address + Vcb->superblock.node_size;
667             return TRUE;
668         } else if (s->size < Vcb->superblock.node_size) {
669             if (le == c->space_size.Flink)
670                 return FALSE;
671 
672             s = CONTAINING_RECORD(le->Blink, space, list_entry_size);
673 
674             *address = s->address;
675             c->last_alloc = s->address + Vcb->superblock.node_size;
676 
677             return TRUE;
678         }
679 
680         le = le->Flink;
681     }
682 
683     s = CONTAINING_RECORD(c->space_size.Blink, space, list_entry_size);
684 
685     if (s->size > Vcb->superblock.node_size) {
686         *address = s->address;
687         c->last_alloc = s->address + Vcb->superblock.node_size;
688         return TRUE;
689     }
690 
691     return FALSE;
692 }
693 
694 static BOOL insert_tree_extent(device_extension* Vcb, UINT8 level, UINT64 root_id, chunk* c, UINT64* new_address, PIRP Irp, LIST_ENTRY* rollback) {
695     NTSTATUS Status;
696     UINT64 address;
697     EXTENT_ITEM_TREE2* eit2;
698     traverse_ptr insert_tp;
699 
700     TRACE("(%p, %x, %llx, %p, %p, %p, %p)\n", Vcb, level, root_id, c, new_address, rollback);
701 
702     if (!find_metadata_address_in_chunk(Vcb, c, &address))
703         return FALSE;
704 
705     if (Vcb->superblock.incompat_flags & BTRFS_INCOMPAT_FLAGS_SKINNY_METADATA) {
706         BOOL b = insert_tree_extent_skinny(Vcb, level, root_id, c, address, Irp, rollback);
707 
708         if (b)
709             *new_address = address;
710 
711         return b;
712     }
713 
714     eit2 = ExAllocatePoolWithTag(PagedPool, sizeof(EXTENT_ITEM_TREE2), ALLOC_TAG);
715     if (!eit2) {
716         ERR("out of memory\n");
717         return FALSE;
718     }
719 
720     eit2->eit.extent_item.refcount = 1;
721     eit2->eit.extent_item.generation = Vcb->superblock.generation;
722     eit2->eit.extent_item.flags = EXTENT_ITEM_TREE_BLOCK;
723     eit2->eit.level = level;
724     eit2->type = TYPE_TREE_BLOCK_REF;
725     eit2->tbr.offset = root_id;
726 
727     Status = insert_tree_item(Vcb, Vcb->extent_root, address, TYPE_EXTENT_ITEM, Vcb->superblock.node_size, eit2, sizeof(EXTENT_ITEM_TREE2), &insert_tp, Irp);
728     if (!NT_SUCCESS(Status)) {
729         ERR("insert_tree_item returned %08x\n", Status);
730         ExFreePool(eit2);
731         return FALSE;
732     }
733 
734     ExAcquireResourceExclusiveLite(&c->lock, TRUE);
735 
736     space_list_subtract(c, FALSE, address, Vcb->superblock.node_size, rollback);
737 
738     ExReleaseResourceLite(&c->lock);
739 
740     add_parents_to_cache(insert_tp.tree);
741 
742     *new_address = address;
743 
744     return TRUE;
745 }
746 
747 NTSTATUS get_tree_new_address(device_extension* Vcb, tree* t, PIRP Irp, LIST_ENTRY* rollback) {
748     NTSTATUS Status;
749     chunk *origchunk = NULL, *c;
750     LIST_ENTRY* le;
751     UINT64 flags, addr;
752 
753     if (t->root->id == BTRFS_ROOT_CHUNK)
754         flags = Vcb->system_flags;
755     else
756         flags = Vcb->metadata_flags;
757 
758     if (t->has_address) {
759         origchunk = get_chunk_from_address(Vcb, t->header.address);
760 
761         if (origchunk && !origchunk->readonly && !origchunk->reloc && origchunk->chunk_item->type == flags &&
762             insert_tree_extent(Vcb, t->header.level, t->root->id, origchunk, &addr, Irp, rollback)) {
763             t->new_address = addr;
764             t->has_new_address = TRUE;
765             return STATUS_SUCCESS;
766         }
767     }
768 
769     ExAcquireResourceExclusiveLite(&Vcb->chunk_lock, TRUE);
770 
771     le = Vcb->chunks.Flink;
772     while (le != &Vcb->chunks) {
773         c = CONTAINING_RECORD(le, chunk, list_entry);
774 
775         if (!c->readonly && !c->reloc) {
776             ExAcquireResourceExclusiveLite(&c->lock, TRUE);
777 
778             if (c != origchunk && c->chunk_item->type == flags && (c->chunk_item->size - c->used) >= Vcb->superblock.node_size) {
779                 if (insert_tree_extent(Vcb, t->header.level, t->root->id, c, &addr, Irp, rollback)) {
780                     ExReleaseResourceLite(&c->lock);
781                     ExReleaseResourceLite(&Vcb->chunk_lock);
782                     t->new_address = addr;
783                     t->has_new_address = TRUE;
784                     return STATUS_SUCCESS;
785                 }
786             }
787 
788             ExReleaseResourceLite(&c->lock);
789         }
790 
791         le = le->Flink;
792     }
793 
794     // allocate new chunk if necessary
795 
796     Status = alloc_chunk(Vcb, flags, &c, FALSE);
797 
798     if (!NT_SUCCESS(Status)) {
799         ERR("alloc_chunk returned %08x\n", Status);
800         ExReleaseResourceLite(&Vcb->chunk_lock);
801         return Status;
802     }
803 
804     ExAcquireResourceExclusiveLite(&c->lock, TRUE);
805 
806     if ((c->chunk_item->size - c->used) >= Vcb->superblock.node_size) {
807         if (insert_tree_extent(Vcb, t->header.level, t->root->id, c, &addr, Irp, rollback)) {
808             ExReleaseResourceLite(&c->lock);
809             ExReleaseResourceLite(&Vcb->chunk_lock);
810             t->new_address = addr;
811             t->has_new_address = TRUE;
812             return STATUS_SUCCESS;
813         }
814     }
815 
816     ExReleaseResourceLite(&c->lock);
817 
818     ExReleaseResourceLite(&Vcb->chunk_lock);
819 
820     ERR("couldn't find any metadata chunks with %x bytes free\n", Vcb->superblock.node_size);
821 
822     return STATUS_DISK_FULL;
823 }
824 
825 static NTSTATUS reduce_tree_extent(device_extension* Vcb, UINT64 address, tree* t, UINT64 parent_root, UINT8 level, PIRP Irp, LIST_ENTRY* rollback) {
826     NTSTATUS Status;
827     UINT64 rc, root;
828 
829     TRACE("(%p, %llx, %p)\n", Vcb, address, t);
830 
831     rc = get_extent_refcount(Vcb, address, Vcb->superblock.node_size, Irp);
832     if (rc == 0) {
833         ERR("error - refcount for extent %llx was 0\n", address);
834         return STATUS_INTERNAL_ERROR;
835     }
836 
837     if (!t || t->parent)
838         root = parent_root;
839     else
840         root = t->header.tree_id;
841 
842     Status = decrease_extent_refcount_tree(Vcb, address, Vcb->superblock.node_size, root, level, Irp);
843     if (!NT_SUCCESS(Status)) {
844         ERR("decrease_extent_refcount_tree returned %08x\n", Status);
845         return Status;
846     }
847 
848     if (rc == 1) {
849         chunk* c = get_chunk_from_address(Vcb, address);
850 
851         if (c) {
852             ExAcquireResourceExclusiveLite(&c->lock, TRUE);
853 
854             if (!c->cache_loaded) {
855                 Status = load_cache_chunk(Vcb, c, NULL);
856 
857                 if (!NT_SUCCESS(Status)) {
858                     ERR("load_cache_chunk returned %08x\n", Status);
859                     ExReleaseResourceLite(&c->lock);
860                     return Status;
861                 }
862             }
863 
864             c->used -= Vcb->superblock.node_size;
865 
866             space_list_add(c, address, Vcb->superblock.node_size, rollback);
867 
868             ExReleaseResourceLite(&c->lock);
869         } else
870             ERR("could not find chunk for address %llx\n", address);
871     }
872 
873     return STATUS_SUCCESS;
874 }
875 
876 static NTSTATUS add_changed_extent_ref_edr(changed_extent* ce, EXTENT_DATA_REF* edr, BOOL old) {
877     LIST_ENTRY *le2, *list;
878     changed_extent_ref* cer;
879 
880     list = old ? &ce->old_refs : &ce->refs;
881 
882     le2 = list->Flink;
883     while (le2 != list) {
884         cer = CONTAINING_RECORD(le2, changed_extent_ref, list_entry);
885 
886         if (cer->type == TYPE_EXTENT_DATA_REF && cer->edr.root == edr->root && cer->edr.objid == edr->objid && cer->edr.offset == edr->offset) {
887             cer->edr.count += edr->count;
888             goto end;
889         }
890 
891         le2 = le2->Flink;
892     }
893 
894     cer = ExAllocatePoolWithTag(PagedPool, sizeof(changed_extent_ref), ALLOC_TAG);
895     if (!cer) {
896         ERR("out of memory\n");
897         return STATUS_INSUFFICIENT_RESOURCES;
898     }
899 
900     cer->type = TYPE_EXTENT_DATA_REF;
901     RtlCopyMemory(&cer->edr, edr, sizeof(EXTENT_DATA_REF));
902     InsertTailList(list, &cer->list_entry);
903 
904 end:
905     if (old)
906         ce->old_count += edr->count;
907     else
908         ce->count += edr->count;
909 
910     return STATUS_SUCCESS;
911 }
912 
913 static NTSTATUS add_changed_extent_ref_sdr(changed_extent* ce, SHARED_DATA_REF* sdr, BOOL old) {
914     LIST_ENTRY *le2, *list;
915     changed_extent_ref* cer;
916 
917     list = old ? &ce->old_refs : &ce->refs;
918 
919     le2 = list->Flink;
920     while (le2 != list) {
921         cer = CONTAINING_RECORD(le2, changed_extent_ref, list_entry);
922 
923         if (cer->type == TYPE_SHARED_DATA_REF && cer->sdr.offset == sdr->offset) {
924             cer->sdr.count += sdr->count;
925             goto end;
926         }
927 
928         le2 = le2->Flink;
929     }
930 
931     cer = ExAllocatePoolWithTag(PagedPool, sizeof(changed_extent_ref), ALLOC_TAG);
932     if (!cer) {
933         ERR("out of memory\n");
934         return STATUS_INSUFFICIENT_RESOURCES;
935     }
936 
937     cer->type = TYPE_SHARED_DATA_REF;
938     RtlCopyMemory(&cer->sdr, sdr, sizeof(SHARED_DATA_REF));
939     InsertTailList(list, &cer->list_entry);
940 
941 end:
942     if (old)
943         ce->old_count += sdr->count;
944     else
945         ce->count += sdr->count;
946 
947     return STATUS_SUCCESS;
948 }
949 
950 static BOOL shared_tree_is_unique(device_extension* Vcb, tree* t, PIRP Irp, LIST_ENTRY* rollback) {
951     KEY searchkey;
952     traverse_ptr tp;
953     NTSTATUS Status;
954 
955     if (!t->updated_extents && t->has_address) {
956         Status = update_tree_extents(Vcb, t, Irp, rollback);
957         if (!NT_SUCCESS(Status)) {
958             ERR("update_tree_extents returned %08x\n", Status);
959             return FALSE;
960         }
961     }
962 
963     searchkey.obj_id = t->header.address;
964     searchkey.obj_type = Vcb->superblock.incompat_flags & BTRFS_INCOMPAT_FLAGS_SKINNY_METADATA ? TYPE_METADATA_ITEM : TYPE_EXTENT_ITEM;
965     searchkey.offset = 0xffffffffffffffff;
966 
967     Status = find_item(Vcb, Vcb->extent_root, &tp, &searchkey, FALSE, Irp);
968     if (!NT_SUCCESS(Status)) {
969         ERR("error - find_item returned %08x\n", Status);
970         return FALSE;
971     }
972 
973     if (tp.item->key.obj_id == t->header.address && (tp.item->key.obj_type == TYPE_METADATA_ITEM || tp.item->key.obj_type == TYPE_EXTENT_ITEM))
974         return FALSE;
975     else
976         return TRUE;
977 }
978 
979 static NTSTATUS update_tree_extents(device_extension* Vcb, tree* t, PIRP Irp, LIST_ENTRY* rollback) {
980     NTSTATUS Status;
981     UINT64 rc = get_extent_refcount(Vcb, t->header.address, Vcb->superblock.node_size, Irp);
982     UINT64 flags = get_extent_flags(Vcb, t->header.address, Irp);
983 
984     if (rc == 0) {
985         ERR("refcount for extent %llx was 0\n", t->header.address);
986         return STATUS_INTERNAL_ERROR;
987     }
988 
989     if (flags & EXTENT_ITEM_SHARED_BACKREFS || t->header.flags & HEADER_FLAG_SHARED_BACKREF || !(t->header.flags & HEADER_FLAG_MIXED_BACKREF)) {
990         TREE_BLOCK_REF tbr;
991         BOOL unique = rc > 1 ? FALSE : (t->parent ? shared_tree_is_unique(Vcb, t->parent, Irp, rollback) : FALSE);
992 
993         if (t->header.level == 0) {
994             LIST_ENTRY* le;
995 
996             le = t->itemlist.Flink;
997             while (le != &t->itemlist) {
998                 tree_data* td = CONTAINING_RECORD(le, tree_data, list_entry);
999 
1000                 if (!td->inserted && td->key.obj_type == TYPE_EXTENT_DATA && td->size >= sizeof(EXTENT_DATA) - 1 + sizeof(EXTENT_DATA2)) {
1001                     EXTENT_DATA* ed = (EXTENT_DATA*)td->data;
1002 
1003                     if (ed->type == EXTENT_TYPE_REGULAR || ed->type == EXTENT_TYPE_PREALLOC) {
1004                         EXTENT_DATA2* ed2 = (EXTENT_DATA2*)ed->data;
1005 
1006                         if (ed2->size > 0) {
1007                             EXTENT_DATA_REF edr;
1008                             changed_extent* ce = NULL;
1009                             chunk* c = get_chunk_from_address(Vcb, ed2->address);
1010 
1011                             if (c) {
1012                                 LIST_ENTRY* le2;
1013 
1014                                 le2 = c->changed_extents.Flink;
1015                                 while (le2 != &c->changed_extents) {
1016                                     changed_extent* ce2 = CONTAINING_RECORD(le2, changed_extent, list_entry);
1017 
1018                                     if (ce2->address == ed2->address) {
1019                                         ce = ce2;
1020                                         break;
1021                                     }
1022 
1023                                     le2 = le2->Flink;
1024                                 }
1025                             }
1026 
1027                             edr.root = t->root->id;
1028                             edr.objid = td->key.obj_id;
1029                             edr.offset = td->key.offset - ed2->offset;
1030                             edr.count = 1;
1031 
1032                             if (ce) {
1033                                 Status = add_changed_extent_ref_edr(ce, &edr, TRUE);
1034                                 if (!NT_SUCCESS(Status)) {
1035                                     ERR("add_changed_extent_ref_edr returned %08x\n", Status);
1036                                     return Status;
1037                                 }
1038 
1039                                 Status = add_changed_extent_ref_edr(ce, &edr, FALSE);
1040                                 if (!NT_SUCCESS(Status)) {
1041                                     ERR("add_changed_extent_ref_edr returned %08x\n", Status);
1042                                     return Status;
1043                                 }
1044                             }
1045 
1046                             Status = increase_extent_refcount(Vcb, ed2->address, ed2->size, TYPE_EXTENT_DATA_REF, &edr, NULL, 0, Irp);
1047                             if (!NT_SUCCESS(Status)) {
1048                                 ERR("increase_extent_refcount returned %08x\n", Status);
1049                                 return Status;
1050                             }
1051 
1052                             if ((flags & EXTENT_ITEM_SHARED_BACKREFS && unique) || !(t->header.flags & HEADER_FLAG_MIXED_BACKREF)) {
1053                                 UINT64 sdrrc = find_extent_shared_data_refcount(Vcb, ed2->address, t->header.address, Irp);
1054 
1055                                 if (sdrrc > 0) {
1056                                     SHARED_DATA_REF sdr;
1057 
1058                                     sdr.offset = t->header.address;
1059                                     sdr.count = 1;
1060 
1061                                     Status = decrease_extent_refcount(Vcb, ed2->address, ed2->size, TYPE_SHARED_DATA_REF, &sdr, NULL, 0,
1062                                                                       t->header.address, ce ? ce->superseded : FALSE, Irp);
1063                                     if (!NT_SUCCESS(Status)) {
1064                                         ERR("decrease_extent_refcount returned %08x\n", Status);
1065                                         return Status;
1066                                     }
1067 
1068                                     if (ce) {
1069                                         LIST_ENTRY* le2;
1070 
1071                                         le2 = ce->refs.Flink;
1072                                         while (le2 != &ce->refs) {
1073                                             changed_extent_ref* cer = CONTAINING_RECORD(le2, changed_extent_ref, list_entry);
1074 
1075                                             if (cer->type == TYPE_SHARED_DATA_REF && cer->sdr.offset == sdr.offset) {
1076                                                 ce->count--;
1077                                                 cer->sdr.count--;
1078                                                 break;
1079                                             }
1080 
1081                                             le2 = le2->Flink;
1082                                         }
1083 
1084                                         le2 = ce->old_refs.Flink;
1085                                         while (le2 != &ce->old_refs) {
1086                                             changed_extent_ref* cer = CONTAINING_RECORD(le2, changed_extent_ref, list_entry);
1087 
1088                                             if (cer->type == TYPE_SHARED_DATA_REF && cer->sdr.offset == sdr.offset) {
1089                                                 ce->old_count--;
1090 
1091                                                 if (cer->sdr.count > 1)
1092                                                     cer->sdr.count--;
1093                                                 else {
1094                                                     RemoveEntryList(&cer->list_entry);
1095                                                     ExFreePool(cer);
1096                                                 }
1097 
1098                                                 break;
1099                                             }
1100 
1101                                             le2 = le2->Flink;
1102                                         }
1103                                     }
1104                                 }
1105                             }
1106 
1107                             // FIXME - clear shared flag if unique?
1108                         }
1109                     }
1110                 }
1111 
1112                 le = le->Flink;
1113             }
1114         } else {
1115             LIST_ENTRY* le;
1116 
1117             le = t->itemlist.Flink;
1118             while (le != &t->itemlist) {
1119                 tree_data* td = CONTAINING_RECORD(le, tree_data, list_entry);
1120 
1121                 if (!td->inserted) {
1122                     tbr.offset = t->root->id;
1123 
1124                     Status = increase_extent_refcount(Vcb, td->treeholder.address, Vcb->superblock.node_size, TYPE_TREE_BLOCK_REF,
1125                                                       &tbr, &td->key, t->header.level - 1, Irp);
1126                     if (!NT_SUCCESS(Status)) {
1127                         ERR("increase_extent_refcount returned %08x\n", Status);
1128                         return Status;
1129                     }
1130 
1131                     if (unique || !(t->header.flags & HEADER_FLAG_MIXED_BACKREF)) {
1132                         UINT64 sbrrc = find_extent_shared_tree_refcount(Vcb, td->treeholder.address, t->header.address, Irp);
1133 
1134                         if (sbrrc > 0) {
1135                             SHARED_BLOCK_REF sbr;
1136 
1137                             sbr.offset = t->header.address;
1138 
1139                             Status = decrease_extent_refcount(Vcb, td->treeholder.address, Vcb->superblock.node_size, TYPE_SHARED_BLOCK_REF, &sbr, NULL, 0,
1140                                                               t->header.address, FALSE, Irp);
1141                             if (!NT_SUCCESS(Status)) {
1142                                 ERR("decrease_extent_refcount returned %08x\n", Status);
1143                                 return Status;
1144                             }
1145                         }
1146                     }
1147 
1148                     // FIXME - clear shared flag if unique?
1149                 }
1150 
1151                 le = le->Flink;
1152             }
1153         }
1154 
1155         if (unique) {
1156             UINT64 sbrrc = find_extent_shared_tree_refcount(Vcb, t->header.address, t->parent->header.address, Irp);
1157 
1158             if (sbrrc == 1) {
1159                 SHARED_BLOCK_REF sbr;
1160 
1161                 sbr.offset = t->parent->header.address;
1162 
1163                 Status = decrease_extent_refcount(Vcb, t->header.address, Vcb->superblock.node_size, TYPE_SHARED_BLOCK_REF, &sbr, NULL, 0,
1164                                                   t->parent->header.address, FALSE, Irp);
1165                 if (!NT_SUCCESS(Status)) {
1166                     ERR("decrease_extent_refcount returned %08x\n", Status);
1167                     return Status;
1168                 }
1169             }
1170         }
1171 
1172         if (t->parent)
1173             tbr.offset = t->parent->header.tree_id;
1174         else
1175             tbr.offset = t->header.tree_id;
1176 
1177         Status = increase_extent_refcount(Vcb, t->header.address, Vcb->superblock.node_size, TYPE_TREE_BLOCK_REF, &tbr,
1178                                           t->parent ? &t->paritem->key : NULL, t->header.level, Irp);
1179         if (!NT_SUCCESS(Status)) {
1180             ERR("increase_extent_refcount returned %08x\n", Status);
1181             return Status;
1182         }
1183 
1184         // FIXME - clear shared flag if unique?
1185 
1186         t->header.flags &= ~HEADER_FLAG_SHARED_BACKREF;
1187     }
1188 
1189     if (rc > 1 || t->header.tree_id == t->root->id) {
1190         Status = reduce_tree_extent(Vcb, t->header.address, t, t->parent ? t->parent->header.tree_id : t->header.tree_id, t->header.level, Irp, rollback);
1191 
1192         if (!NT_SUCCESS(Status)) {
1193             ERR("reduce_tree_extent returned %08x\n", Status);
1194             return Status;
1195         }
1196     }
1197 
1198     t->has_address = FALSE;
1199 
1200     if ((rc > 1 || t->header.tree_id != t->root->id) && !(flags & EXTENT_ITEM_SHARED_BACKREFS)) {
1201         if (t->header.tree_id == t->root->id) {
1202             flags |= EXTENT_ITEM_SHARED_BACKREFS;
1203             update_extent_flags(Vcb, t->header.address, flags, Irp);
1204         }
1205 
1206         if (t->header.level > 0) {
1207             LIST_ENTRY* le;
1208 
1209             le = t->itemlist.Flink;
1210             while (le != &t->itemlist) {
1211                 tree_data* td = CONTAINING_RECORD(le, tree_data, list_entry);
1212 
1213                 if (!td->inserted) {
1214                     if (t->header.tree_id == t->root->id) {
1215                         SHARED_BLOCK_REF sbr;
1216 
1217                         sbr.offset = t->header.address;
1218 
1219                         Status = increase_extent_refcount(Vcb, td->treeholder.address, Vcb->superblock.node_size, TYPE_SHARED_BLOCK_REF, &sbr, &td->key, t->header.level - 1, Irp);
1220                     } else {
1221                         TREE_BLOCK_REF tbr;
1222 
1223                         tbr.offset = t->root->id;
1224 
1225                         Status = increase_extent_refcount(Vcb, td->treeholder.address, Vcb->superblock.node_size, TYPE_TREE_BLOCK_REF, &tbr, &td->key, t->header.level - 1, Irp);
1226                     }
1227 
1228                     if (!NT_SUCCESS(Status)) {
1229                         ERR("increase_extent_refcount returned %08x\n", Status);
1230                         return Status;
1231                     }
1232                 }
1233 
1234                 le = le->Flink;
1235             }
1236         } else {
1237             LIST_ENTRY* le;
1238 
1239             le = t->itemlist.Flink;
1240             while (le != &t->itemlist) {
1241                 tree_data* td = CONTAINING_RECORD(le, tree_data, list_entry);
1242 
1243                 if (!td->inserted && td->key.obj_type == TYPE_EXTENT_DATA && td->size >= sizeof(EXTENT_DATA) - 1 + sizeof(EXTENT_DATA2)) {
1244                     EXTENT_DATA* ed = (EXTENT_DATA*)td->data;
1245 
1246                     if (ed->type == EXTENT_TYPE_REGULAR || ed->type == EXTENT_TYPE_PREALLOC) {
1247                         EXTENT_DATA2* ed2 = (EXTENT_DATA2*)ed->data;
1248 
1249                         if (ed2->size > 0) {
1250                             changed_extent* ce = NULL;
1251                             chunk* c = get_chunk_from_address(Vcb, ed2->address);
1252 
1253                             if (c) {
1254                                 LIST_ENTRY* le2;
1255 
1256                                 le2 = c->changed_extents.Flink;
1257                                 while (le2 != &c->changed_extents) {
1258                                     changed_extent* ce2 = CONTAINING_RECORD(le2, changed_extent, list_entry);
1259 
1260                                     if (ce2->address == ed2->address) {
1261                                         ce = ce2;
1262                                         break;
1263                                     }
1264 
1265                                     le2 = le2->Flink;
1266                                 }
1267                             }
1268 
1269                             if (t->header.tree_id == t->root->id) {
1270                                 SHARED_DATA_REF sdr;
1271 
1272                                 sdr.offset = t->header.address;
1273                                 sdr.count = 1;
1274 
1275                                 if (ce) {
1276                                     Status = add_changed_extent_ref_sdr(ce, &sdr, TRUE);
1277                                     if (!NT_SUCCESS(Status)) {
1278                                         ERR("add_changed_extent_ref_edr returned %08x\n", Status);
1279                                         return Status;
1280                                     }
1281 
1282                                     Status = add_changed_extent_ref_sdr(ce, &sdr, FALSE);
1283                                     if (!NT_SUCCESS(Status)) {
1284                                         ERR("add_changed_extent_ref_edr returned %08x\n", Status);
1285                                         return Status;
1286                                     }
1287                                 }
1288 
1289                                 Status = increase_extent_refcount(Vcb, ed2->address, ed2->size, TYPE_SHARED_DATA_REF, &sdr, NULL, 0, Irp);
1290                             } else {
1291                                 EXTENT_DATA_REF edr;
1292 
1293                                 edr.root = t->root->id;
1294                                 edr.objid = td->key.obj_id;
1295                                 edr.offset = td->key.offset - ed2->offset;
1296                                 edr.count = 1;
1297 
1298                                 if (ce) {
1299                                     Status = add_changed_extent_ref_edr(ce, &edr, TRUE);
1300                                     if (!NT_SUCCESS(Status)) {
1301                                         ERR("add_changed_extent_ref_edr returned %08x\n", Status);
1302                                         return Status;
1303                                     }
1304 
1305                                     Status = add_changed_extent_ref_edr(ce, &edr, FALSE);
1306                                     if (!NT_SUCCESS(Status)) {
1307                                         ERR("add_changed_extent_ref_edr returned %08x\n", Status);
1308                                         return Status;
1309                                     }
1310                                 }
1311 
1312                                 Status = increase_extent_refcount(Vcb, ed2->address, ed2->size, TYPE_EXTENT_DATA_REF, &edr, NULL, 0, Irp);
1313                             }
1314 
1315                             if (!NT_SUCCESS(Status)) {
1316                                 ERR("increase_extent_refcount returned %08x\n", Status);
1317                                 return Status;
1318                             }
1319                         }
1320                     }
1321                 }
1322 
1323                 le = le->Flink;
1324             }
1325         }
1326     }
1327 
1328     t->updated_extents = TRUE;
1329     t->header.tree_id = t->root->id;
1330 
1331     return STATUS_SUCCESS;
1332 }
1333 
1334 static NTSTATUS allocate_tree_extents(device_extension* Vcb, PIRP Irp, LIST_ENTRY* rollback) {
1335     LIST_ENTRY* le;
1336     NTSTATUS Status;
1337     BOOL changed = FALSE;
1338     UINT8 max_level = 0, level;
1339 
1340     TRACE("(%p)\n", Vcb);
1341 
1342     le = Vcb->trees.Flink;
1343     while (le != &Vcb->trees) {
1344         tree* t = CONTAINING_RECORD(le, tree, list_entry);
1345 
1346         if (t->write && !t->has_new_address) {
1347             chunk* c;
1348 
1349             if (t->has_address) {
1350                 c = get_chunk_from_address(Vcb, t->header.address);
1351 
1352                 if (c) {
1353                     if (!c->cache_loaded) {
1354                         ExAcquireResourceExclusiveLite(&c->lock, TRUE);
1355 
1356                         if (!c->cache_loaded) {
1357                             Status = load_cache_chunk(Vcb, c, NULL);
1358 
1359                             if (!NT_SUCCESS(Status)) {
1360                                 ERR("load_cache_chunk returned %08x\n", Status);
1361                                 ExReleaseResourceLite(&c->lock);
1362                                 return Status;
1363                             }
1364                         }
1365 
1366                         ExReleaseResourceLite(&c->lock);
1367                     }
1368                 }
1369             }
1370 
1371             Status = get_tree_new_address(Vcb, t, Irp, rollback);
1372             if (!NT_SUCCESS(Status)) {
1373                 ERR("get_tree_new_address returned %08x\n", Status);
1374                 return Status;
1375             }
1376 
1377             TRACE("allocated extent %llx\n", t->new_address);
1378 
1379             c = get_chunk_from_address(Vcb, t->new_address);
1380 
1381             if (c)
1382                 c->used += Vcb->superblock.node_size;
1383             else {
1384                 ERR("could not find chunk for address %llx\n", t->new_address);
1385                 return STATUS_INTERNAL_ERROR;
1386             }
1387 
1388             changed = TRUE;
1389 
1390             if (t->header.level > max_level)
1391                 max_level = t->header.level;
1392         }
1393 
1394         le = le->Flink;
1395     }
1396 
1397     if (!changed)
1398         return STATUS_SUCCESS;
1399 
1400     level = max_level;
1401     do {
1402         le = Vcb->trees.Flink;
1403         while (le != &Vcb->trees) {
1404             tree* t = CONTAINING_RECORD(le, tree, list_entry);
1405 
1406             if (t->write && !t->updated_extents && t->has_address && t->header.level == level) {
1407                 Status = update_tree_extents(Vcb, t, Irp, rollback);
1408                 if (!NT_SUCCESS(Status)) {
1409                     ERR("update_tree_extents returned %08x\n", Status);
1410                     return Status;
1411                 }
1412             }
1413 
1414             le = le->Flink;
1415         }
1416 
1417         if (level == 0)
1418             break;
1419 
1420         level--;
1421     } while (TRUE);
1422 
1423     return STATUS_SUCCESS;
1424 }
1425 
1426 static NTSTATUS update_root_root(device_extension* Vcb, BOOL no_cache, PIRP Irp, LIST_ENTRY* rollback) {
1427     LIST_ENTRY* le;
1428     NTSTATUS Status;
1429 
1430     TRACE("(%p)\n", Vcb);
1431 
1432     le = Vcb->trees.Flink;
1433     while (le != &Vcb->trees) {
1434         tree* t = CONTAINING_RECORD(le, tree, list_entry);
1435 
1436         if (t->write && !t->parent) {
1437             if (t->root != Vcb->root_root && t->root != Vcb->chunk_root) {
1438                 KEY searchkey;
1439                 traverse_ptr tp;
1440 
1441                 searchkey.obj_id = t->root->id;
1442                 searchkey.obj_type = TYPE_ROOT_ITEM;
1443                 searchkey.offset = 0xffffffffffffffff;
1444 
1445                 Status = find_item(Vcb, Vcb->root_root, &tp, &searchkey, FALSE, Irp);
1446                 if (!NT_SUCCESS(Status)) {
1447                     ERR("error - find_item returned %08x\n", Status);
1448                     return Status;
1449                 }
1450 
1451                 if (tp.item->key.obj_id != searchkey.obj_id || tp.item->key.obj_type != searchkey.obj_type) {
1452                     ERR("could not find ROOT_ITEM for tree %llx\n", searchkey.obj_id);
1453                     return STATUS_INTERNAL_ERROR;
1454                 }
1455 
1456                 TRACE("updating the address for root %llx to %llx\n", searchkey.obj_id, t->new_address);
1457 
1458                 t->root->root_item.block_number = t->new_address;
1459                 t->root->root_item.root_level = t->header.level;
1460                 t->root->root_item.generation = Vcb->superblock.generation;
1461                 t->root->root_item.generation2 = Vcb->superblock.generation;
1462 
1463                 // item is guaranteed to be at least sizeof(ROOT_ITEM), due to add_parents
1464 
1465                 RtlCopyMemory(tp.item->data, &t->root->root_item, sizeof(ROOT_ITEM));
1466             }
1467 
1468             t->root->treeholder.address = t->new_address;
1469             t->root->treeholder.generation = Vcb->superblock.generation;
1470         }
1471 
1472         le = le->Flink;
1473     }
1474 
1475     if (!no_cache && !(Vcb->superblock.compat_ro_flags & BTRFS_COMPAT_RO_FLAGS_FREE_SPACE_CACHE)) {
1476         ExAcquireResourceSharedLite(&Vcb->chunk_lock, TRUE);
1477         Status = update_chunk_caches(Vcb, Irp, rollback);
1478         ExReleaseResourceLite(&Vcb->chunk_lock);
1479 
1480         if (!NT_SUCCESS(Status)) {
1481             ERR("update_chunk_caches returned %08x\n", Status);
1482             return Status;
1483         }
1484     }
1485 
1486     return STATUS_SUCCESS;
1487 }
1488 
1489 NTSTATUS do_tree_writes(device_extension* Vcb, LIST_ENTRY* tree_writes, BOOL no_free) {
1490     chunk* c;
1491     LIST_ENTRY* le;
1492     tree_write* tw;
1493     NTSTATUS Status;
1494     ULONG i, num_bits;
1495     write_data_context* wtc;
1496     ULONG bit_num = 0;
1497     BOOL raid56 = FALSE;
1498 
1499     // merge together runs
1500     c = NULL;
1501     le = tree_writes->Flink;
1502     while (le != tree_writes) {
1503         tw = CONTAINING_RECORD(le, tree_write, list_entry);
1504 
1505         if (!c || tw->address < c->offset || tw->address >= c->offset + c->chunk_item->size)
1506             c = get_chunk_from_address(Vcb, tw->address);
1507         else {
1508             tree_write* tw2 = CONTAINING_RECORD(le->Blink, tree_write, list_entry);
1509 
1510             if (tw->address == tw2->address + tw2->length) {
1511                 UINT8* data = ExAllocatePoolWithTag(NonPagedPool, tw2->length + tw->length, ALLOC_TAG);
1512 
1513                 if (!data) {
1514                     ERR("out of memory\n");
1515                     return STATUS_INSUFFICIENT_RESOURCES;
1516                 }
1517 
1518                 RtlCopyMemory(data, tw2->data, tw2->length);
1519                 RtlCopyMemory(&data[tw2->length], tw->data, tw->length);
1520 
1521                 if (!no_free)
1522                     ExFreePool(tw2->data);
1523 
1524                 tw2->data = data;
1525                 tw2->length += tw->length;
1526 
1527                 if (!no_free) // FIXME - what if we allocated this just now?
1528                     ExFreePool(tw->data);
1529 
1530                 RemoveEntryList(&tw->list_entry);
1531                 ExFreePool(tw);
1532 
1533                 le = tw2->list_entry.Flink;
1534                 continue;
1535             }
1536         }
1537 
1538         tw->c = c;
1539 
1540         if (c->chunk_item->type & (BLOCK_FLAG_RAID5 | BLOCK_FLAG_RAID6))
1541             raid56 = TRUE;
1542 
1543         le = le->Flink;
1544     }
1545 
1546     num_bits = 0;
1547 
1548     le = tree_writes->Flink;
1549     while (le != tree_writes) {
1550         tw = CONTAINING_RECORD(le, tree_write, list_entry);
1551 
1552         num_bits++;
1553 
1554         le = le->Flink;
1555     }
1556 
1557     wtc = ExAllocatePoolWithTag(NonPagedPool, sizeof(write_data_context) * num_bits, ALLOC_TAG);
1558     if (!wtc) {
1559         ERR("out of memory\n");
1560         return STATUS_INSUFFICIENT_RESOURCES;
1561     }
1562 
1563     le = tree_writes->Flink;
1564 
1565     while (le != tree_writes) {
1566         tw = CONTAINING_RECORD(le, tree_write, list_entry);
1567 
1568         TRACE("address: %llx, size: %x\n", tw->address, tw->length);
1569 
1570         KeInitializeEvent(&wtc[bit_num].Event, NotificationEvent, FALSE);
1571         InitializeListHead(&wtc[bit_num].stripes);
1572         wtc[bit_num].need_wait = FALSE;
1573         wtc[bit_num].stripes_left = 0;
1574         wtc[bit_num].parity1 = wtc[bit_num].parity2 = wtc[bit_num].scratch = NULL;
1575         wtc[bit_num].mdl = wtc[bit_num].parity1_mdl = wtc[bit_num].parity2_mdl = NULL;
1576 
1577         Status = write_data(Vcb, tw->address, tw->data, tw->length, &wtc[bit_num], NULL, NULL, FALSE, 0, HighPagePriority);
1578         if (!NT_SUCCESS(Status)) {
1579             ERR("write_data returned %08x\n", Status);
1580 
1581             for (i = 0; i < num_bits; i++) {
1582                 free_write_data_stripes(&wtc[i]);
1583             }
1584             ExFreePool(wtc);
1585 
1586             return Status;
1587         }
1588 
1589         bit_num++;
1590 
1591         le = le->Flink;
1592     }
1593 
1594     for (i = 0; i < num_bits; i++) {
1595         if (wtc[i].stripes.Flink != &wtc[i].stripes) {
1596             // launch writes and wait
1597             le = wtc[i].stripes.Flink;
1598             while (le != &wtc[i].stripes) {
1599                 write_data_stripe* stripe = CONTAINING_RECORD(le, write_data_stripe, list_entry);
1600 
1601                 if (stripe->status != WriteDataStatus_Ignore) {
1602                     wtc[i].need_wait = TRUE;
1603                     IoCallDriver(stripe->device->devobj, stripe->Irp);
1604                 }
1605 
1606                 le = le->Flink;
1607             }
1608         }
1609     }
1610 
1611     for (i = 0; i < num_bits; i++) {
1612         if (wtc[i].need_wait)
1613             KeWaitForSingleObject(&wtc[i].Event, Executive, KernelMode, FALSE, NULL);
1614     }
1615 
1616     for (i = 0; i < num_bits; i++) {
1617         le = wtc[i].stripes.Flink;
1618         while (le != &wtc[i].stripes) {
1619             write_data_stripe* stripe = CONTAINING_RECORD(le, write_data_stripe, list_entry);
1620 
1621             if (stripe->status != WriteDataStatus_Ignore && !NT_SUCCESS(stripe->iosb.Status)) {
1622                 Status = stripe->iosb.Status;
1623                 log_device_error(Vcb, stripe->device, BTRFS_DEV_STAT_WRITE_ERRORS);
1624                 break;
1625             }
1626 
1627             le = le->Flink;
1628         }
1629 
1630         free_write_data_stripes(&wtc[i]);
1631     }
1632 
1633     ExFreePool(wtc);
1634 
1635     if (raid56) {
1636         c = NULL;
1637 
1638         le = tree_writes->Flink;
1639         while (le != tree_writes) {
1640             tw = CONTAINING_RECORD(le, tree_write, list_entry);
1641 
1642             if (tw->c != c) {
1643                 c = tw->c;
1644 
1645                 ExAcquireResourceExclusiveLite(&c->partial_stripes_lock, TRUE);
1646 
1647                 while (!IsListEmpty(&c->partial_stripes)) {
1648                     partial_stripe* ps = CONTAINING_RECORD(RemoveHeadList(&c->partial_stripes), partial_stripe, list_entry);
1649 
1650                     Status = flush_partial_stripe(Vcb, c, ps);
1651 
1652                     if (ps->bmparr)
1653                         ExFreePool(ps->bmparr);
1654 
1655                     ExFreePool(ps);
1656 
1657                     if (!NT_SUCCESS(Status)) {
1658                         ERR("flush_partial_stripe returned %08x\n", Status);
1659                         ExReleaseResourceLite(&c->partial_stripes_lock);
1660                         return Status;
1661                     }
1662                 }
1663 
1664                 ExReleaseResourceLite(&c->partial_stripes_lock);
1665             }
1666 
1667             le = le->Flink;
1668         }
1669     }
1670 
1671     return STATUS_SUCCESS;
1672 }
1673 
1674 static NTSTATUS write_trees(device_extension* Vcb, PIRP Irp) {
1675     ULONG level;
1676     UINT8 *data, *body;
1677     UINT32 crc32;
1678     NTSTATUS Status;
1679     LIST_ENTRY* le;
1680     LIST_ENTRY tree_writes;
1681     tree_write* tw;
1682 
1683     TRACE("(%p)\n", Vcb);
1684 
1685     InitializeListHead(&tree_writes);
1686 
1687     for (level = 0; level <= 255; level++) {
1688         BOOL nothing_found = TRUE;
1689 
1690         TRACE("level = %u\n", level);
1691 
1692         le = Vcb->trees.Flink;
1693         while (le != &Vcb->trees) {
1694             tree* t = CONTAINING_RECORD(le, tree, list_entry);
1695 
1696             if (t->write && t->header.level == level) {
1697                 KEY firstitem, searchkey;
1698                 LIST_ENTRY* le2;
1699                 traverse_ptr tp;
1700 
1701                 if (!t->has_new_address) {
1702                     ERR("error - tried to write tree with no new address\n");
1703                     return STATUS_INTERNAL_ERROR;
1704                 }
1705 
1706                 le2 = t->itemlist.Flink;
1707                 while (le2 != &t->itemlist) {
1708                     tree_data* td = CONTAINING_RECORD(le2, tree_data, list_entry);
1709                     if (!td->ignore) {
1710                         firstitem = td->key;
1711                         break;
1712                     }
1713                     le2 = le2->Flink;
1714                 }
1715 
1716                 if (t->parent) {
1717                     t->paritem->key = firstitem;
1718                     t->paritem->treeholder.address = t->new_address;
1719                     t->paritem->treeholder.generation = Vcb->superblock.generation;
1720                 }
1721 
1722                 if (!(Vcb->superblock.incompat_flags & BTRFS_INCOMPAT_FLAGS_SKINNY_METADATA)) {
1723                     EXTENT_ITEM_TREE* eit;
1724 
1725                     searchkey.obj_id = t->new_address;
1726                     searchkey.obj_type = TYPE_EXTENT_ITEM;
1727                     searchkey.offset = Vcb->superblock.node_size;
1728 
1729                     Status = find_item(Vcb, Vcb->extent_root, &tp, &searchkey, FALSE, Irp);
1730                     if (!NT_SUCCESS(Status)) {
1731                         ERR("error - find_item returned %08x\n", Status);
1732                         return Status;
1733                     }
1734 
1735                     if (keycmp(searchkey, tp.item->key)) {
1736                         ERR("could not find %llx,%x,%llx in extent_root (found %llx,%x,%llx instead)\n", searchkey.obj_id, searchkey.obj_type, searchkey.offset, tp.item->key.obj_id, tp.item->key.obj_type, tp.item->key.offset);
1737                         return STATUS_INTERNAL_ERROR;
1738                     }
1739 
1740                     if (tp.item->size < sizeof(EXTENT_ITEM_TREE)) {
1741                         ERR("(%llx,%x,%llx) was %u bytes, expected at least %u\n", tp.item->key.obj_id, tp.item->key.obj_type, tp.item->key.offset, tp.item->size, sizeof(EXTENT_ITEM_TREE));
1742                         return STATUS_INTERNAL_ERROR;
1743                     }
1744 
1745                     eit = (EXTENT_ITEM_TREE*)tp.item->data;
1746                     eit->firstitem = firstitem;
1747                 }
1748 
1749                 nothing_found = FALSE;
1750             }
1751 
1752             le = le->Flink;
1753         }
1754 
1755         if (nothing_found)
1756             break;
1757     }
1758 
1759     TRACE("allocated tree extents\n");
1760 
1761     le = Vcb->trees.Flink;
1762     while (le != &Vcb->trees) {
1763         tree* t = CONTAINING_RECORD(le, tree, list_entry);
1764         LIST_ENTRY* le2;
1765 #ifdef DEBUG_PARANOID
1766         UINT32 num_items = 0, size = 0;
1767         BOOL crash = FALSE;
1768 #endif
1769 
1770         if (t->write) {
1771 #ifdef DEBUG_PARANOID
1772             BOOL first = TRUE;
1773             KEY lastkey;
1774 
1775             le2 = t->itemlist.Flink;
1776             while (le2 != &t->itemlist) {
1777                 tree_data* td = CONTAINING_RECORD(le2, tree_data, list_entry);
1778                 if (!td->ignore) {
1779                     num_items++;
1780 
1781                     if (!first) {
1782                         if (keycmp(td->key, lastkey) == 0) {
1783                             ERR("(%llx,%x,%llx): duplicate key\n", td->key.obj_id, td->key.obj_type, td->key.offset);
1784                             crash = TRUE;
1785                         } else if (keycmp(td->key, lastkey) == -1) {
1786                             ERR("(%llx,%x,%llx): key out of order\n", td->key.obj_id, td->key.obj_type, td->key.offset);
1787                             crash = TRUE;
1788                         }
1789                     } else
1790                         first = FALSE;
1791 
1792                     lastkey = td->key;
1793 
1794                     if (t->header.level == 0)
1795                         size += td->size;
1796                 }
1797                 le2 = le2->Flink;
1798             }
1799 
1800             if (t->header.level == 0)
1801                 size += num_items * sizeof(leaf_node);
1802             else
1803                 size += num_items * sizeof(internal_node);
1804 
1805             if (num_items != t->header.num_items) {
1806                 ERR("tree %llx, level %x: num_items was %x, expected %x\n", t->root->id, t->header.level, num_items, t->header.num_items);
1807                 crash = TRUE;
1808             }
1809 
1810             if (size != t->size) {
1811                 ERR("tree %llx, level %x: size was %x, expected %x\n", t->root->id, t->header.level, size, t->size);
1812                 crash = TRUE;
1813             }
1814 
1815             if (t->header.num_items == 0 && t->parent) {
1816                 ERR("tree %llx, level %x: tried to write empty tree with parent\n", t->root->id, t->header.level);
1817                 crash = TRUE;
1818             }
1819 
1820             if (t->size > Vcb->superblock.node_size - sizeof(tree_header)) {
1821                 ERR("tree %llx, level %x: tried to write overlarge tree (%x > %x)\n", t->root->id, t->header.level, t->size, Vcb->superblock.node_size - sizeof(tree_header));
1822                 crash = TRUE;
1823             }
1824 
1825             if (crash) {
1826                 ERR("tree %p\n", t);
1827                 le2 = t->itemlist.Flink;
1828                 while (le2 != &t->itemlist) {
1829                     tree_data* td = CONTAINING_RECORD(le2, tree_data, list_entry);
1830                     if (!td->ignore) {
1831                         ERR("%llx,%x,%llx inserted=%u\n", td->key.obj_id, td->key.obj_type, td->key.offset, td->inserted);
1832                     }
1833                     le2 = le2->Flink;
1834                 }
1835                 int3;
1836             }
1837 #endif
1838             t->header.address = t->new_address;
1839             t->header.generation = Vcb->superblock.generation;
1840             t->header.tree_id = t->root->id;
1841             t->header.flags |= HEADER_FLAG_MIXED_BACKREF;
1842             t->header.fs_uuid = Vcb->superblock.uuid;
1843             t->has_address = TRUE;
1844 
1845             data = ExAllocatePoolWithTag(NonPagedPool, Vcb->superblock.node_size, ALLOC_TAG);
1846             if (!data) {
1847                 ERR("out of memory\n");
1848                 Status = STATUS_INSUFFICIENT_RESOURCES;
1849                 goto end;
1850             }
1851 
1852             body = data + sizeof(tree_header);
1853 
1854             RtlCopyMemory(data, &t->header, sizeof(tree_header));
1855             RtlZeroMemory(body, Vcb->superblock.node_size - sizeof(tree_header));
1856 
1857             if (t->header.level == 0) {
1858                 leaf_node* itemptr = (leaf_node*)body;
1859                 int i = 0;
1860                 UINT8* dataptr = data + Vcb->superblock.node_size;
1861 
1862                 le2 = t->itemlist.Flink;
1863                 while (le2 != &t->itemlist) {
1864                     tree_data* td = CONTAINING_RECORD(le2, tree_data, list_entry);
1865                     if (!td->ignore) {
1866                         dataptr = dataptr - td->size;
1867 
1868                         itemptr[i].key = td->key;
1869                         itemptr[i].offset = (UINT32)((UINT8*)dataptr - (UINT8*)body);
1870                         itemptr[i].size = td->size;
1871                         i++;
1872 
1873                         if (td->size > 0)
1874                             RtlCopyMemory(dataptr, td->data, td->size);
1875                     }
1876 
1877                     le2 = le2->Flink;
1878                 }
1879             } else {
1880                 internal_node* itemptr = (internal_node*)body;
1881                 int i = 0;
1882 
1883                 le2 = t->itemlist.Flink;
1884                 while (le2 != &t->itemlist) {
1885                     tree_data* td = CONTAINING_RECORD(le2, tree_data, list_entry);
1886                     if (!td->ignore) {
1887                         itemptr[i].key = td->key;
1888                         itemptr[i].address = td->treeholder.address;
1889                         itemptr[i].generation = td->treeholder.generation;
1890                         i++;
1891                     }
1892 
1893                     le2 = le2->Flink;
1894                 }
1895             }
1896 
1897             crc32 = calc_crc32c(0xffffffff, (UINT8*)&((tree_header*)data)->fs_uuid, Vcb->superblock.node_size - sizeof(((tree_header*)data)->csum));
1898             crc32 = ~crc32;
1899             *((UINT32*)data) = crc32;
1900             TRACE("setting crc32 to %08x\n", crc32);
1901 
1902             tw = ExAllocatePoolWithTag(PagedPool, sizeof(tree_write), ALLOC_TAG);
1903             if (!tw) {
1904                 ERR("out of memory\n");
1905                 ExFreePool(data);
1906                 Status = STATUS_INSUFFICIENT_RESOURCES;
1907                 goto end;
1908             }
1909 
1910             tw->address = t->new_address;
1911             tw->length = Vcb->superblock.node_size;
1912             tw->data = data;
1913 
1914             if (IsListEmpty(&tree_writes))
1915                 InsertTailList(&tree_writes, &tw->list_entry);
1916             else {
1917                 BOOL inserted = FALSE;
1918 
1919                 le2 = tree_writes.Flink;
1920                 while (le2 != &tree_writes) {
1921                     tree_write* tw2 = CONTAINING_RECORD(le2, tree_write, list_entry);
1922 
1923                     if (tw2->address > tw->address) {
1924                         InsertHeadList(le2->Blink, &tw->list_entry);
1925                         inserted = TRUE;
1926                         break;
1927                     }
1928 
1929                     le2 = le2->Flink;
1930                 }
1931 
1932                 if (!inserted)
1933                     InsertTailList(&tree_writes, &tw->list_entry);
1934             }
1935         }
1936 
1937         le = le->Flink;
1938     }
1939 
1940     Status = do_tree_writes(Vcb, &tree_writes, FALSE);
1941     if (!NT_SUCCESS(Status)) {
1942         ERR("do_tree_writes returned %08x\n", Status);
1943         goto end;
1944     }
1945 
1946     Status = STATUS_SUCCESS;
1947 
1948 end:
1949     while (!IsListEmpty(&tree_writes)) {
1950         le = RemoveHeadList(&tree_writes);
1951         tw = CONTAINING_RECORD(le, tree_write, list_entry);
1952 
1953         if (tw->data)
1954             ExFreePool(tw->data);
1955 
1956         ExFreePool(tw);
1957     }
1958 
1959     return Status;
1960 }
1961 
1962 static void update_backup_superblock(device_extension* Vcb, superblock_backup* sb, PIRP Irp) {
1963     KEY searchkey;
1964     traverse_ptr tp;
1965 
1966     RtlZeroMemory(sb, sizeof(superblock_backup));
1967 
1968     sb->root_tree_addr = Vcb->superblock.root_tree_addr;
1969     sb->root_tree_generation = Vcb->superblock.generation;
1970     sb->root_level = Vcb->superblock.root_level;
1971 
1972     sb->chunk_tree_addr = Vcb->superblock.chunk_tree_addr;
1973     sb->chunk_tree_generation = Vcb->superblock.chunk_root_generation;
1974     sb->chunk_root_level = Vcb->superblock.chunk_root_level;
1975 
1976     searchkey.obj_id = BTRFS_ROOT_EXTENT;
1977     searchkey.obj_type = TYPE_ROOT_ITEM;
1978     searchkey.offset = 0xffffffffffffffff;
1979 
1980     if (NT_SUCCESS(find_item(Vcb, Vcb->root_root, &tp, &searchkey, FALSE, Irp))) {
1981         if (tp.item->key.obj_id == searchkey.obj_id && tp.item->key.obj_type == searchkey.obj_type && tp.item->size >= sizeof(ROOT_ITEM)) {
1982             ROOT_ITEM* ri = (ROOT_ITEM*)tp.item->data;
1983 
1984             sb->extent_tree_addr = ri->block_number;
1985             sb->extent_tree_generation = ri->generation;
1986             sb->extent_root_level = ri->root_level;
1987         }
1988     }
1989 
1990     searchkey.obj_id = BTRFS_ROOT_FSTREE;
1991 
1992     if (NT_SUCCESS(find_item(Vcb, Vcb->root_root, &tp, &searchkey, FALSE, Irp))) {
1993         if (tp.item->key.obj_id == searchkey.obj_id && tp.item->key.obj_type == searchkey.obj_type && tp.item->size >= sizeof(ROOT_ITEM)) {
1994             ROOT_ITEM* ri = (ROOT_ITEM*)tp.item->data;
1995 
1996             sb->fs_tree_addr = ri->block_number;
1997             sb->fs_tree_generation = ri->generation;
1998             sb->fs_root_level = ri->root_level;
1999         }
2000     }
2001 
2002     searchkey.obj_id = BTRFS_ROOT_DEVTREE;
2003 
2004     if (NT_SUCCESS(find_item(Vcb, Vcb->root_root, &tp, &searchkey, FALSE, Irp))) {
2005         if (tp.item->key.obj_id == searchkey.obj_id && tp.item->key.obj_type == searchkey.obj_type && tp.item->size >= sizeof(ROOT_ITEM)) {
2006             ROOT_ITEM* ri = (ROOT_ITEM*)tp.item->data;
2007 
2008             sb->dev_root_addr = ri->block_number;
2009             sb->dev_root_generation = ri->generation;
2010             sb->dev_root_level = ri->root_level;
2011         }
2012     }
2013 
2014     searchkey.obj_id = BTRFS_ROOT_CHECKSUM;
2015 
2016     if (NT_SUCCESS(find_item(Vcb, Vcb->root_root, &tp, &searchkey, FALSE, Irp))) {
2017         if (tp.item->key.obj_id == searchkey.obj_id && tp.item->key.obj_type == searchkey.obj_type && tp.item->size >= sizeof(ROOT_ITEM)) {
2018             ROOT_ITEM* ri = (ROOT_ITEM*)tp.item->data;
2019 
2020             sb->csum_root_addr = ri->block_number;
2021             sb->csum_root_generation = ri->generation;
2022             sb->csum_root_level = ri->root_level;
2023         }
2024     }
2025 
2026     sb->total_bytes = Vcb->superblock.total_bytes;
2027     sb->bytes_used = Vcb->superblock.bytes_used;
2028     sb->num_devices = Vcb->superblock.num_devices;
2029 }
2030 
2031 typedef struct {
2032     void* context;
2033     UINT8* buf;
2034     PMDL mdl;
2035     device* device;
2036     NTSTATUS Status;
2037     PIRP Irp;
2038     LIST_ENTRY list_entry;
2039 } write_superblocks_stripe;
2040 
2041 typedef struct _write_superblocks_context {
2042     KEVENT Event;
2043     LIST_ENTRY stripes;
2044     LONG left;
2045 } write_superblocks_context;
2046 
2047 _Function_class_(IO_COMPLETION_ROUTINE)
2048 #ifdef __REACTOS__
2049 static NTSTATUS NTAPI write_superblock_completion(PDEVICE_OBJECT DeviceObject, PIRP Irp, PVOID conptr) {
2050 #else
2051 static NTSTATUS write_superblock_completion(PDEVICE_OBJECT DeviceObject, PIRP Irp, PVOID conptr) {
2052 #endif
2053     write_superblocks_stripe* stripe = conptr;
2054     write_superblocks_context* context = stripe->context;
2055 
2056     UNUSED(DeviceObject);
2057 
2058     stripe->Status = Irp->IoStatus.Status;
2059 
2060     if (InterlockedDecrement(&context->left) == 0)
2061         KeSetEvent(&context->Event, 0, FALSE);
2062 
2063     return STATUS_MORE_PROCESSING_REQUIRED;
2064 }
2065 
2066 static NTSTATUS write_superblock(device_extension* Vcb, device* device, write_superblocks_context* context) {
2067     unsigned int i = 0;
2068 
2069     // All the documentation says that the Linux driver only writes one superblock
2070     // if it thinks a disk is an SSD, but this doesn't seem to be the case!
2071 
2072     while (superblock_addrs[i] > 0 && device->devitem.num_bytes >= superblock_addrs[i] + sizeof(superblock)) {
2073         ULONG sblen = (ULONG)sector_align(sizeof(superblock), Vcb->superblock.sector_size);
2074         superblock* sb;
2075         UINT32 crc32;
2076         write_superblocks_stripe* stripe;
2077         PIO_STACK_LOCATION IrpSp;
2078 
2079         sb = ExAllocatePoolWithTag(NonPagedPool, sblen, ALLOC_TAG);
2080         if (!sb) {
2081             ERR("out of memory\n");
2082             return STATUS_INSUFFICIENT_RESOURCES;
2083         }
2084 
2085         RtlCopyMemory(sb, &Vcb->superblock, sizeof(superblock));
2086 
2087         if (sblen > sizeof(superblock))
2088             RtlZeroMemory((UINT8*)sb + sizeof(superblock), sblen - sizeof(superblock));
2089 
2090         RtlCopyMemory(&sb->dev_item, &device->devitem, sizeof(DEV_ITEM));
2091         sb->sb_phys_addr = superblock_addrs[i];
2092 
2093         crc32 = ~calc_crc32c(0xffffffff, (UINT8*)&sb->uuid, (ULONG)sizeof(superblock) - sizeof(sb->checksum));
2094         RtlCopyMemory(&sb->checksum, &crc32, sizeof(UINT32));
2095 
2096         stripe = ExAllocatePoolWithTag(NonPagedPool, sizeof(write_superblocks_stripe), ALLOC_TAG);
2097         if (!stripe) {
2098             ERR("out of memory\n");
2099             ExFreePool(sb);
2100             return STATUS_INSUFFICIENT_RESOURCES;
2101         }
2102 
2103         stripe->buf = (UINT8*)sb;
2104 
2105         stripe->Irp = IoAllocateIrp(device->devobj->StackSize, FALSE);
2106         if (!stripe->Irp) {
2107             ERR("IoAllocateIrp failed\n");
2108             ExFreePool(stripe);
2109             ExFreePool(sb);
2110             return STATUS_INSUFFICIENT_RESOURCES;
2111         }
2112 
2113         IrpSp = IoGetNextIrpStackLocation(stripe->Irp);
2114         IrpSp->MajorFunction = IRP_MJ_WRITE;
2115 
2116         if (i == 0)
2117             IrpSp->Flags |= SL_WRITE_THROUGH;
2118 
2119         if (device->devobj->Flags & DO_BUFFERED_IO) {
2120             stripe->Irp->AssociatedIrp.SystemBuffer = sb;
2121             stripe->mdl = NULL;
2122 
2123             stripe->Irp->Flags = IRP_BUFFERED_IO;
2124         } else if (device->devobj->Flags & DO_DIRECT_IO) {
2125             stripe->mdl = IoAllocateMdl(sb, sblen, FALSE, FALSE, NULL);
2126             if (!stripe->mdl) {
2127                 ERR("IoAllocateMdl failed\n");
2128                 IoFreeIrp(stripe->Irp);
2129                 ExFreePool(stripe);
2130                 ExFreePool(sb);
2131                 return STATUS_INSUFFICIENT_RESOURCES;
2132             }
2133 
2134             stripe->Irp->MdlAddress = stripe->mdl;
2135 
2136             MmBuildMdlForNonPagedPool(stripe->mdl);
2137         } else {
2138             stripe->Irp->UserBuffer = sb;
2139             stripe->mdl = NULL;
2140         }
2141 
2142         IrpSp->Parameters.Write.Length = sblen;
2143         IrpSp->Parameters.Write.ByteOffset.QuadPart = superblock_addrs[i];
2144 
2145         IoSetCompletionRoutine(stripe->Irp, write_superblock_completion, stripe, TRUE, TRUE, TRUE);
2146 
2147         stripe->context = context;
2148         stripe->device = device;
2149         InsertTailList(&context->stripes, &stripe->list_entry);
2150 
2151         context->left++;
2152 
2153         i++;
2154     }
2155 
2156     if (i == 0)
2157         ERR("no superblocks written!\n");
2158 
2159     return STATUS_SUCCESS;
2160 }
2161 
2162 static NTSTATUS write_superblocks(device_extension* Vcb, PIRP Irp) {
2163     UINT64 i;
2164     NTSTATUS Status;
2165     LIST_ENTRY* le;
2166     write_superblocks_context context;
2167 
2168     TRACE("(%p)\n", Vcb);
2169 
2170     le = Vcb->trees.Flink;
2171     while (le != &Vcb->trees) {
2172         tree* t = CONTAINING_RECORD(le, tree, list_entry);
2173 
2174         if (t->write && !t->parent) {
2175             if (t->root == Vcb->root_root) {
2176                 Vcb->superblock.root_tree_addr = t->new_address;
2177                 Vcb->superblock.root_level = t->header.level;
2178             } else if (t->root == Vcb->chunk_root) {
2179                 Vcb->superblock.chunk_tree_addr = t->new_address;
2180                 Vcb->superblock.chunk_root_generation = t->header.generation;
2181                 Vcb->superblock.chunk_root_level = t->header.level;
2182             }
2183         }
2184 
2185         le = le->Flink;
2186     }
2187 
2188     for (i = 0; i < BTRFS_NUM_BACKUP_ROOTS - 1; i++) {
2189         RtlCopyMemory(&Vcb->superblock.backup[i], &Vcb->superblock.backup[i+1], sizeof(superblock_backup));
2190     }
2191 
2192     update_backup_superblock(Vcb, &Vcb->superblock.backup[BTRFS_NUM_BACKUP_ROOTS - 1], Irp);
2193 
2194     KeInitializeEvent(&context.Event, NotificationEvent, FALSE);
2195     InitializeListHead(&context.stripes);
2196     context.left = 0;
2197 
2198     le = Vcb->devices.Flink;
2199     while (le != &Vcb->devices) {
2200         device* dev = CONTAINING_RECORD(le, device, list_entry);
2201 
2202         if (dev->devobj && !dev->readonly) {
2203             Status = write_superblock(Vcb, dev, &context);
2204             if (!NT_SUCCESS(Status)) {
2205                 ERR("write_superblock returned %08x\n", Status);
2206                 goto end;
2207             }
2208         }
2209 
2210         le = le->Flink;
2211     }
2212 
2213     if (IsListEmpty(&context.stripes)) {
2214         ERR("error - not writing any superblocks\n");
2215         Status = STATUS_INTERNAL_ERROR;
2216         goto end;
2217     }
2218 
2219     le = context.stripes.Flink;
2220     while (le != &context.stripes) {
2221         write_superblocks_stripe* stripe = CONTAINING_RECORD(le, write_superblocks_stripe, list_entry);
2222 
2223         IoCallDriver(stripe->device->devobj, stripe->Irp);
2224 
2225         le = le->Flink;
2226     }
2227 
2228     KeWaitForSingleObject(&context.Event, Executive, KernelMode, FALSE, NULL);
2229 
2230     le = context.stripes.Flink;
2231     while (le != &context.stripes) {
2232         write_superblocks_stripe* stripe = CONTAINING_RECORD(le, write_superblocks_stripe, list_entry);
2233 
2234         if (!NT_SUCCESS(stripe->Status)) {
2235             ERR("device %llx returned %08x\n", stripe->device->devitem.dev_id, stripe->Status);
2236             log_device_error(Vcb, stripe->device, BTRFS_DEV_STAT_WRITE_ERRORS);
2237             Status = stripe->Status;
2238             goto end;
2239         }
2240 
2241         le = le->Flink;
2242     }
2243 
2244     Status = STATUS_SUCCESS;
2245 
2246 end:
2247     while (!IsListEmpty(&context.stripes)) {
2248         write_superblocks_stripe* stripe = CONTAINING_RECORD(RemoveHeadList(&context.stripes), write_superblocks_stripe, list_entry);
2249 
2250         if (stripe->mdl) {
2251             if (stripe->mdl->MdlFlags & MDL_PAGES_LOCKED)
2252                 MmUnlockPages(stripe->mdl);
2253 
2254             IoFreeMdl(stripe->mdl);
2255         }
2256 
2257         if (stripe->Irp)
2258             IoFreeIrp(stripe->Irp);
2259 
2260         if (stripe->buf)
2261             ExFreePool(stripe->buf);
2262 
2263         ExFreePool(stripe);
2264     }
2265 
2266     return Status;
2267 }
2268 
2269 static NTSTATUS flush_changed_extent(device_extension* Vcb, chunk* c, changed_extent* ce, PIRP Irp, LIST_ENTRY* rollback) {
2270     LIST_ENTRY *le, *le2;
2271     NTSTATUS Status;
2272     UINT64 old_size;
2273 
2274     if (ce->count == 0 && ce->old_count == 0) {
2275         while (!IsListEmpty(&ce->refs)) {
2276             changed_extent_ref* cer = CONTAINING_RECORD(RemoveHeadList(&ce->refs), changed_extent_ref, list_entry);
2277             ExFreePool(cer);
2278         }
2279 
2280         while (!IsListEmpty(&ce->old_refs)) {
2281             changed_extent_ref* cer = CONTAINING_RECORD(RemoveHeadList(&ce->old_refs), changed_extent_ref, list_entry);
2282             ExFreePool(cer);
2283         }
2284 
2285         goto end;
2286     }
2287 
2288     le = ce->refs.Flink;
2289     while (le != &ce->refs) {
2290         changed_extent_ref* cer = CONTAINING_RECORD(le, changed_extent_ref, list_entry);
2291         UINT32 old_count = 0;
2292 
2293         if (cer->type == TYPE_EXTENT_DATA_REF) {
2294             le2 = ce->old_refs.Flink;
2295             while (le2 != &ce->old_refs) {
2296                 changed_extent_ref* cer2 = CONTAINING_RECORD(le2, changed_extent_ref, list_entry);
2297 
2298                 if (cer2->type == TYPE_EXTENT_DATA_REF && cer2->edr.root == cer->edr.root && cer2->edr.objid == cer->edr.objid && cer2->edr.offset == cer->edr.offset) {
2299                     old_count = cer2->edr.count;
2300                     break;
2301                 }
2302 
2303                 le2 = le2->Flink;
2304             }
2305 
2306             old_size = ce->old_count > 0 ? ce->old_size : ce->size;
2307 
2308             if (cer->edr.count > old_count) {
2309                 Status = increase_extent_refcount_data(Vcb, ce->address, old_size, cer->edr.root, cer->edr.objid, cer->edr.offset, cer->edr.count - old_count, Irp);
2310 
2311                 if (!NT_SUCCESS(Status)) {
2312                     ERR("increase_extent_refcount_data returned %08x\n", Status);
2313                     return Status;
2314                 }
2315             }
2316         } else if (cer->type == TYPE_SHARED_DATA_REF) {
2317             le2 = ce->old_refs.Flink;
2318             while (le2 != &ce->old_refs) {
2319                 changed_extent_ref* cer2 = CONTAINING_RECORD(le2, changed_extent_ref, list_entry);
2320 
2321                 if (cer2->type == TYPE_SHARED_DATA_REF && cer2->sdr.offset == cer->sdr.offset) {
2322                     RemoveEntryList(&cer2->list_entry);
2323                     ExFreePool(cer2);
2324                     break;
2325                 }
2326 
2327                 le2 = le2->Flink;
2328             }
2329         }
2330 
2331         le = le->Flink;
2332     }
2333 
2334     le = ce->refs.Flink;
2335     while (le != &ce->refs) {
2336         changed_extent_ref* cer = CONTAINING_RECORD(le, changed_extent_ref, list_entry);
2337         LIST_ENTRY* le3 = le->Flink;
2338         UINT32 old_count = 0;
2339 
2340         if (cer->type == TYPE_EXTENT_DATA_REF) {
2341             le2 = ce->old_refs.Flink;
2342             while (le2 != &ce->old_refs) {
2343                 changed_extent_ref* cer2 = CONTAINING_RECORD(le2, changed_extent_ref, list_entry);
2344 
2345                 if (cer2->type == TYPE_EXTENT_DATA_REF && cer2->edr.root == cer->edr.root && cer2->edr.objid == cer->edr.objid && cer2->edr.offset == cer->edr.offset) {
2346                     old_count = cer2->edr.count;
2347 
2348                     RemoveEntryList(&cer2->list_entry);
2349                     ExFreePool(cer2);
2350                     break;
2351                 }
2352 
2353                 le2 = le2->Flink;
2354             }
2355 
2356             old_size = ce->old_count > 0 ? ce->old_size : ce->size;
2357 
2358             if (cer->edr.count < old_count) {
2359                 Status = decrease_extent_refcount_data(Vcb, ce->address, old_size, cer->edr.root, cer->edr.objid, cer->edr.offset,
2360                                                        old_count - cer->edr.count, ce->superseded, Irp);
2361 
2362                 if (!NT_SUCCESS(Status)) {
2363                     ERR("decrease_extent_refcount_data returned %08x\n", Status);
2364                     return Status;
2365                 }
2366             }
2367 
2368             if (ce->size != ce->old_size && ce->old_count > 0) {
2369                 KEY searchkey;
2370                 traverse_ptr tp;
2371                 void* data;
2372 
2373                 searchkey.obj_id = ce->address;
2374                 searchkey.obj_type = TYPE_EXTENT_ITEM;
2375                 searchkey.offset = ce->old_size;
2376 
2377                 Status = find_item(Vcb, Vcb->extent_root, &tp, &searchkey, FALSE, Irp);
2378                 if (!NT_SUCCESS(Status)) {
2379                     ERR("error - find_item returned %08x\n", Status);
2380                     return Status;
2381                 }
2382 
2383                 if (keycmp(searchkey, tp.item->key)) {
2384                     ERR("could not find (%llx,%x,%llx) in extent tree\n", searchkey.obj_id, searchkey.obj_type, searchkey.offset);
2385                     return STATUS_INTERNAL_ERROR;
2386                 }
2387 
2388                 if (tp.item->size > 0) {
2389                     data = ExAllocatePoolWithTag(PagedPool, tp.item->size, ALLOC_TAG);
2390 
2391                     if (!data) {
2392                         ERR("out of memory\n");
2393                         return STATUS_INSUFFICIENT_RESOURCES;
2394                     }
2395 
2396                     RtlCopyMemory(data, tp.item->data, tp.item->size);
2397                 } else
2398                     data = NULL;
2399 
2400                 Status = insert_tree_item(Vcb, Vcb->extent_root, ce->address, TYPE_EXTENT_ITEM, ce->size, data, tp.item->size, NULL, Irp);
2401                 if (!NT_SUCCESS(Status)) {
2402                     ERR("insert_tree_item returned %08x\n", Status);
2403                     if (data) ExFreePool(data);
2404                     return Status;
2405                 }
2406 
2407                 Status = delete_tree_item(Vcb, &tp);
2408                 if (!NT_SUCCESS(Status)) {
2409                     ERR("delete_tree_item returned %08x\n", Status);
2410                     return Status;
2411                 }
2412             }
2413         }
2414 
2415         RemoveEntryList(&cer->list_entry);
2416         ExFreePool(cer);
2417 
2418         le = le3;
2419     }
2420 
2421 #ifdef DEBUG_PARANOID
2422     if (!IsListEmpty(&ce->old_refs))
2423         WARN("old_refs not empty\n");
2424 #endif
2425 
2426 end:
2427     if (ce->count == 0 && !ce->superseded) {
2428         c->used -= ce->size;
2429         space_list_add(c, ce->address, ce->size, rollback);
2430     }
2431 
2432     RemoveEntryList(&ce->list_entry);
2433     ExFreePool(ce);
2434 
2435     return STATUS_SUCCESS;
2436 }
2437 
2438 void add_checksum_entry(device_extension* Vcb, UINT64 address, ULONG length, UINT32* csum, PIRP Irp) {
2439     KEY searchkey;
2440     traverse_ptr tp, next_tp;
2441     NTSTATUS Status;
2442     UINT64 startaddr, endaddr;
2443     ULONG len;
2444     UINT32* checksums;
2445     RTL_BITMAP bmp;
2446     ULONG* bmparr;
2447     ULONG runlength, index;
2448 
2449     searchkey.obj_id = EXTENT_CSUM_ID;
2450     searchkey.obj_type = TYPE_EXTENT_CSUM;
2451     searchkey.offset = address;
2452 
2453     // FIXME - create checksum_root if it doesn't exist at all
2454 
2455     Status = find_item(Vcb, Vcb->checksum_root, &tp, &searchkey, FALSE, Irp);
2456     if (Status == STATUS_NOT_FOUND) { // tree is completely empty
2457         if (csum) { // not deleted
2458             ULONG length2 = length;
2459             UINT64 off = address;
2460             UINT32* data = csum;
2461 
2462             do {
2463                 UINT16 il = (UINT16)min(length2, MAX_CSUM_SIZE / sizeof(UINT32));
2464 
2465                 checksums = ExAllocatePoolWithTag(PagedPool, il * sizeof(UINT32), ALLOC_TAG);
2466                 if (!checksums) {
2467                     ERR("out of memory\n");
2468                     return;
2469                 }
2470 
2471                 RtlCopyMemory(checksums, data, il * sizeof(UINT32));
2472 
2473                 Status = insert_tree_item(Vcb, Vcb->checksum_root, EXTENT_CSUM_ID, TYPE_EXTENT_CSUM, off, checksums,
2474                                           il * sizeof(UINT32), NULL, Irp);
2475                 if (!NT_SUCCESS(Status)) {
2476                     ERR("insert_tree_item returned %08x\n", Status);
2477                     ExFreePool(checksums);
2478                     return;
2479                 }
2480 
2481                 length2 -= il;
2482 
2483                 if (length2 > 0) {
2484                     off += il * Vcb->superblock.sector_size;
2485                     data += il;
2486                 }
2487             } while (length2 > 0);
2488         }
2489     } else if (!NT_SUCCESS(Status)) {
2490         ERR("find_item returned %08x\n", Status);
2491         return;
2492     } else {
2493         UINT32 tplen;
2494 
2495         // FIXME - check entry is TYPE_EXTENT_CSUM?
2496 
2497         if (tp.item->key.offset < address && tp.item->key.offset + (tp.item->size * Vcb->superblock.sector_size / sizeof(UINT32)) >= address)
2498             startaddr = tp.item->key.offset;
2499         else
2500             startaddr = address;
2501 
2502         searchkey.obj_id = EXTENT_CSUM_ID;
2503         searchkey.obj_type = TYPE_EXTENT_CSUM;
2504         searchkey.offset = address + (length * Vcb->superblock.sector_size);
2505 
2506         Status = find_item(Vcb, Vcb->checksum_root, &tp, &searchkey, FALSE, Irp);
2507         if (!NT_SUCCESS(Status)) {
2508             ERR("find_item returned %08x\n", Status);
2509             return;
2510         }
2511 
2512         tplen = tp.item->size / sizeof(UINT32);
2513 
2514         if (tp.item->key.offset + (tplen * Vcb->superblock.sector_size) >= address + (length * Vcb->superblock.sector_size))
2515             endaddr = tp.item->key.offset + (tplen * Vcb->superblock.sector_size);
2516         else
2517             endaddr = address + (length * Vcb->superblock.sector_size);
2518 
2519         TRACE("cs starts at %llx (%x sectors)\n", address, length);
2520         TRACE("startaddr = %llx\n", startaddr);
2521         TRACE("endaddr = %llx\n", endaddr);
2522 
2523         len = (ULONG)((endaddr - startaddr) / Vcb->superblock.sector_size);
2524 
2525         checksums = ExAllocatePoolWithTag(PagedPool, sizeof(UINT32) * len, ALLOC_TAG);
2526         if (!checksums) {
2527             ERR("out of memory\n");
2528             return;
2529         }
2530 
2531         bmparr = ExAllocatePoolWithTag(PagedPool, sizeof(ULONG) * ((len/8)+1), ALLOC_TAG);
2532         if (!bmparr) {
2533             ERR("out of memory\n");
2534             ExFreePool(checksums);
2535             return;
2536         }
2537 
2538         RtlInitializeBitMap(&bmp, bmparr, len);
2539         RtlSetAllBits(&bmp);
2540 
2541         searchkey.obj_id = EXTENT_CSUM_ID;
2542         searchkey.obj_type = TYPE_EXTENT_CSUM;
2543         searchkey.offset = address;
2544 
2545         Status = find_item(Vcb, Vcb->checksum_root, &tp, &searchkey, FALSE, Irp);
2546         if (!NT_SUCCESS(Status)) {
2547             ERR("find_item returned %08x\n", Status);
2548             ExFreePool(checksums);
2549             ExFreePool(bmparr);
2550             return;
2551         }
2552 
2553         // set bit = free space, cleared bit = allocated sector
2554 
2555         while (tp.item->key.offset < endaddr) {
2556             if (tp.item->key.offset >= startaddr) {
2557                 if (tp.item->size > 0) {
2558                     ULONG itemlen = (ULONG)min((len - (tp.item->key.offset - startaddr) / Vcb->superblock.sector_size) * sizeof(UINT32), tp.item->size);
2559 
2560                     RtlCopyMemory(&checksums[(tp.item->key.offset - startaddr) / Vcb->superblock.sector_size], tp.item->data, itemlen);
2561                     RtlClearBits(&bmp, (ULONG)((tp.item->key.offset - startaddr) / Vcb->superblock.sector_size), itemlen / sizeof(UINT32));
2562                 }
2563 
2564                 Status = delete_tree_item(Vcb, &tp);
2565                 if (!NT_SUCCESS(Status)) {
2566                     ERR("delete_tree_item returned %08x\n", Status);
2567                     ExFreePool(checksums);
2568                     ExFreePool(bmparr);
2569                     return;
2570                 }
2571             }
2572 
2573             if (find_next_item(Vcb, &tp, &next_tp, FALSE, Irp)) {
2574                 tp = next_tp;
2575             } else
2576                 break;
2577         }
2578 
2579         if (!csum) { // deleted
2580             RtlSetBits(&bmp, (ULONG)((address - startaddr) / Vcb->superblock.sector_size), length);
2581         } else {
2582             RtlCopyMemory(&checksums[(address - startaddr) / Vcb->superblock.sector_size], csum, length * sizeof(UINT32));
2583             RtlClearBits(&bmp, (ULONG)((address - startaddr) / Vcb->superblock.sector_size), length);
2584         }
2585 
2586         runlength = RtlFindFirstRunClear(&bmp, &index);
2587 
2588         while (runlength != 0) {
2589             do {
2590                 UINT16 rl;
2591                 UINT64 off;
2592                 UINT32* data;
2593 
2594                 if (runlength * sizeof(UINT32) > MAX_CSUM_SIZE)
2595                     rl = MAX_CSUM_SIZE / sizeof(UINT32);
2596                 else
2597                     rl = (UINT16)runlength;
2598 
2599                 data = ExAllocatePoolWithTag(PagedPool, sizeof(UINT32) * rl, ALLOC_TAG);
2600                 if (!data) {
2601                     ERR("out of memory\n");
2602                     ExFreePool(bmparr);
2603                     ExFreePool(checksums);
2604                     return;
2605                 }
2606 
2607                 RtlCopyMemory(data, &checksums[index], sizeof(UINT32) * rl);
2608 
2609                 off = startaddr + UInt32x32To64(index, Vcb->superblock.sector_size);
2610 
2611                 Status = insert_tree_item(Vcb, Vcb->checksum_root, EXTENT_CSUM_ID, TYPE_EXTENT_CSUM, off, data, sizeof(UINT32) * rl, NULL, Irp);
2612                 if (!NT_SUCCESS(Status)) {
2613                     ERR("insert_tree_item returned %08x\n", Status);
2614                     ExFreePool(data);
2615                     ExFreePool(bmparr);
2616                     ExFreePool(checksums);
2617                     return;
2618                 }
2619 
2620                 runlength -= rl;
2621                 index += rl;
2622             } while (runlength > 0);
2623 
2624             runlength = RtlFindNextForwardRunClear(&bmp, index, &index);
2625         }
2626 
2627         ExFreePool(bmparr);
2628         ExFreePool(checksums);
2629     }
2630 }
2631 
2632 static NTSTATUS update_chunk_usage(device_extension* Vcb, PIRP Irp, LIST_ENTRY* rollback) {
2633     LIST_ENTRY *le = Vcb->chunks.Flink, *le2;
2634     chunk* c;
2635     KEY searchkey;
2636     traverse_ptr tp;
2637     BLOCK_GROUP_ITEM* bgi;
2638     NTSTATUS Status;
2639 
2640     TRACE("(%p)\n", Vcb);
2641 
2642     ExAcquireResourceSharedLite(&Vcb->chunk_lock, TRUE);
2643 
2644     while (le != &Vcb->chunks) {
2645         c = CONTAINING_RECORD(le, chunk, list_entry);
2646 
2647         ExAcquireResourceExclusiveLite(&c->lock, TRUE);
2648 
2649         if (!c->cache_loaded && (!IsListEmpty(&c->changed_extents) || c->used != c->oldused)) {
2650             Status = load_cache_chunk(Vcb, c, NULL);
2651 
2652             if (!NT_SUCCESS(Status)) {
2653                 ERR("load_cache_chunk returned %08x\n", Status);
2654                 ExReleaseResourceLite(&c->lock);
2655                 goto end;
2656             }
2657         }
2658 
2659         le2 = c->changed_extents.Flink;
2660         while (le2 != &c->changed_extents) {
2661             LIST_ENTRY* le3 = le2->Flink;
2662             changed_extent* ce = CONTAINING_RECORD(le2, changed_extent, list_entry);
2663 
2664             Status = flush_changed_extent(Vcb, c, ce, Irp, rollback);
2665             if (!NT_SUCCESS(Status)) {
2666                 ERR("flush_changed_extent returned %08x\n", Status);
2667                 ExReleaseResourceLite(&c->lock);
2668                 goto end;
2669             }
2670 
2671             le2 = le3;
2672         }
2673 
2674         // This is usually done by update_chunks, but we have to check again in case any new chunks
2675         // have been allocated since.
2676         if (c->created) {
2677             Status = create_chunk(Vcb, c, Irp);
2678             if (!NT_SUCCESS(Status)) {
2679                 ERR("create_chunk returned %08x\n", Status);
2680                 ExReleaseResourceLite(&c->lock);
2681                 goto end;
2682             }
2683         }
2684 
2685         if (c->old_cache) {
2686             if (c->old_cache->dirty) {
2687                 LIST_ENTRY batchlist;
2688 
2689                 InitializeListHead(&batchlist);
2690 
2691                 Status = flush_fcb(c->old_cache, FALSE, &batchlist, Irp);
2692                 if (!NT_SUCCESS(Status)) {
2693                     ERR("flush_fcb returned %08x\n", Status);
2694                     ExReleaseResourceLite(&c->lock);
2695                     clear_batch_list(Vcb, &batchlist);
2696                     goto end;
2697                 }
2698 
2699                 Status = commit_batch_list(Vcb, &batchlist, Irp);
2700                 if (!NT_SUCCESS(Status)) {
2701                     ERR("commit_batch_list returned %08x\n", Status);
2702                     ExReleaseResourceLite(&c->lock);
2703                     goto end;
2704                 }
2705             }
2706 
2707             free_fcb(Vcb, c->old_cache);
2708             c->old_cache = NULL;
2709         }
2710 
2711         if (c->used != c->oldused) {
2712             searchkey.obj_id = c->offset;
2713             searchkey.obj_type = TYPE_BLOCK_GROUP_ITEM;
2714             searchkey.offset = c->chunk_item->size;
2715 
2716             Status = find_item(Vcb, Vcb->extent_root, &tp, &searchkey, FALSE, Irp);
2717             if (!NT_SUCCESS(Status)) {
2718                 ERR("error - find_item returned %08x\n", Status);
2719                 ExReleaseResourceLite(&c->lock);
2720                 goto end;
2721             }
2722 
2723             if (keycmp(searchkey, tp.item->key)) {
2724                 ERR("could not find (%llx,%x,%llx) in extent_root\n", searchkey.obj_id, searchkey.obj_type, searchkey.offset);
2725                 Status = STATUS_INTERNAL_ERROR;
2726                 ExReleaseResourceLite(&c->lock);
2727                 goto end;
2728             }
2729 
2730             if (tp.item->size < sizeof(BLOCK_GROUP_ITEM)) {
2731                 ERR("(%llx,%x,%llx) was %u bytes, expected %u\n", tp.item->key.obj_id, tp.item->key.obj_type, tp.item->key.offset, tp.item->size, sizeof(BLOCK_GROUP_ITEM));
2732                 Status = STATUS_INTERNAL_ERROR;
2733                 ExReleaseResourceLite(&c->lock);
2734                 goto end;
2735             }
2736 
2737             bgi = ExAllocatePoolWithTag(PagedPool, tp.item->size, ALLOC_TAG);
2738             if (!bgi) {
2739                 ERR("out of memory\n");
2740                 Status = STATUS_INSUFFICIENT_RESOURCES;
2741                 ExReleaseResourceLite(&c->lock);
2742                 goto end;
2743             }
2744 
2745             RtlCopyMemory(bgi, tp.item->data, tp.item->size);
2746             bgi->used = c->used;
2747 
2748             TRACE("adjusting usage of chunk %llx to %llx\n", c->offset, c->used);
2749 
2750             Status = delete_tree_item(Vcb, &tp);
2751             if (!NT_SUCCESS(Status)) {
2752                 ERR("delete_tree_item returned %08x\n", Status);
2753                 ExFreePool(bgi);
2754                 ExReleaseResourceLite(&c->lock);
2755                 goto end;
2756             }
2757 
2758             Status = insert_tree_item(Vcb, Vcb->extent_root, searchkey.obj_id, searchkey.obj_type, searchkey.offset, bgi, tp.item->size, NULL, Irp);
2759             if (!NT_SUCCESS(Status)) {
2760                 ERR("insert_tree_item returned %08x\n", Status);
2761                 ExFreePool(bgi);
2762                 ExReleaseResourceLite(&c->lock);
2763                 goto end;
2764             }
2765 
2766             TRACE("bytes_used = %llx\n", Vcb->superblock.bytes_used);
2767 
2768             Vcb->superblock.bytes_used += c->used - c->oldused;
2769 
2770             TRACE("bytes_used = %llx\n", Vcb->superblock.bytes_used);
2771 
2772             c->oldused = c->used;
2773         }
2774 
2775         ExReleaseResourceLite(&c->lock);
2776 
2777         le = le->Flink;
2778     }
2779 
2780     Status = STATUS_SUCCESS;
2781 
2782 end:
2783     ExReleaseResourceLite(&Vcb->chunk_lock);
2784 
2785     return Status;
2786 }
2787 
2788 static void get_first_item(tree* t, KEY* key) {
2789     LIST_ENTRY* le;
2790 
2791     le = t->itemlist.Flink;
2792     while (le != &t->itemlist) {
2793         tree_data* td = CONTAINING_RECORD(le, tree_data, list_entry);
2794 
2795         *key = td->key;
2796         return;
2797     }
2798 }
2799 
2800 static NTSTATUS split_tree_at(device_extension* Vcb, tree* t, tree_data* newfirstitem, UINT32 numitems, UINT32 size) {
2801     tree *nt, *pt;
2802     tree_data* td;
2803     tree_data* oldlastitem;
2804 
2805     TRACE("splitting tree in %llx at (%llx,%x,%llx)\n", t->root->id, newfirstitem->key.obj_id, newfirstitem->key.obj_type, newfirstitem->key.offset);
2806 
2807     nt = ExAllocatePoolWithTag(PagedPool, sizeof(tree), ALLOC_TAG);
2808     if (!nt) {
2809         ERR("out of memory\n");
2810         return STATUS_INSUFFICIENT_RESOURCES;
2811     }
2812 
2813     RtlCopyMemory(&nt->header, &t->header, sizeof(tree_header));
2814     nt->header.address = 0;
2815     nt->header.generation = Vcb->superblock.generation;
2816     nt->header.num_items = t->header.num_items - numitems;
2817     nt->header.flags = HEADER_FLAG_MIXED_BACKREF | HEADER_FLAG_WRITTEN;
2818 
2819     nt->has_address = FALSE;
2820     nt->Vcb = Vcb;
2821     nt->parent = t->parent;
2822 
2823 #ifdef DEBUG_PARANOID
2824     if (nt->parent && nt->parent->header.level <= nt->header.level) int3;
2825 #endif
2826 
2827     nt->root = t->root;
2828     nt->new_address = 0;
2829     nt->has_new_address = FALSE;
2830     nt->updated_extents = FALSE;
2831     nt->uniqueness_determined = TRUE;
2832     nt->is_unique = TRUE;
2833     nt->list_entry_hash.Flink = NULL;
2834     nt->buf = NULL;
2835     InitializeListHead(&nt->itemlist);
2836 
2837     oldlastitem = CONTAINING_RECORD(newfirstitem->list_entry.Blink, tree_data, list_entry);
2838 
2839     nt->itemlist.Flink = &newfirstitem->list_entry;
2840     nt->itemlist.Blink = t->itemlist.Blink;
2841     nt->itemlist.Flink->Blink = &nt->itemlist;
2842     nt->itemlist.Blink->Flink = &nt->itemlist;
2843 
2844     t->itemlist.Blink = &oldlastitem->list_entry;
2845     t->itemlist.Blink->Flink = &t->itemlist;
2846 
2847     nt->size = t->size - size;
2848     t->size = size;
2849     t->header.num_items = numitems;
2850     nt->write = TRUE;
2851 
2852     InsertTailList(&Vcb->trees, &nt->list_entry);
2853 
2854     if (nt->header.level > 0) {
2855         LIST_ENTRY* le = nt->itemlist.Flink;
2856 
2857         while (le != &nt->itemlist) {
2858             tree_data* td2 = CONTAINING_RECORD(le, tree_data, list_entry);
2859 
2860             if (td2->treeholder.tree) {
2861                 td2->treeholder.tree->parent = nt;
2862 #ifdef DEBUG_PARANOID
2863                 if (td2->treeholder.tree->parent && td2->treeholder.tree->parent->header.level <= td2->treeholder.tree->header.level) int3;
2864 #endif
2865             }
2866 
2867             le = le->Flink;
2868         }
2869     } else {
2870         LIST_ENTRY* le = nt->itemlist.Flink;
2871 
2872         while (le != &nt->itemlist) {
2873             tree_data* td2 = CONTAINING_RECORD(le, tree_data, list_entry);
2874 
2875             if (!td2->inserted && td2->data) {
2876                 UINT8* data = ExAllocatePoolWithTag(PagedPool, td2->size, ALLOC_TAG);
2877 
2878                 if (!data) {
2879                     ERR("out of memory\n");
2880                     return STATUS_INSUFFICIENT_RESOURCES;
2881                 }
2882 
2883                 RtlCopyMemory(data, td2->data, td2->size);
2884                 td2->data = data;
2885                 td2->inserted = TRUE;
2886             }
2887 
2888             le = le->Flink;
2889         }
2890     }
2891 
2892     if (nt->parent) {
2893         td = ExAllocateFromPagedLookasideList(&Vcb->tree_data_lookaside);
2894         if (!td) {
2895             ERR("out of memory\n");
2896             return STATUS_INSUFFICIENT_RESOURCES;
2897         }
2898 
2899         td->key = newfirstitem->key;
2900 
2901         InsertHeadList(&t->paritem->list_entry, &td->list_entry);
2902 
2903         td->ignore = FALSE;
2904         td->inserted = TRUE;
2905         td->treeholder.tree = nt;
2906         nt->paritem = td;
2907 
2908         nt->parent->header.num_items++;
2909         nt->parent->size += sizeof(internal_node);
2910 
2911         goto end;
2912     }
2913 
2914     TRACE("adding new tree parent\n");
2915 
2916     if (nt->header.level == 255) {
2917         ERR("cannot add parent to tree at level 255\n");
2918         return STATUS_INTERNAL_ERROR;
2919     }
2920 
2921     pt = ExAllocatePoolWithTag(PagedPool, sizeof(tree), ALLOC_TAG);
2922     if (!pt) {
2923         ERR("out of memory\n");
2924         return STATUS_INSUFFICIENT_RESOURCES;
2925     }
2926 
2927     RtlCopyMemory(&pt->header, &nt->header, sizeof(tree_header));
2928     pt->header.address = 0;
2929     pt->header.num_items = 2;
2930     pt->header.level = nt->header.level + 1;
2931     pt->header.flags = HEADER_FLAG_MIXED_BACKREF | HEADER_FLAG_WRITTEN;
2932 
2933     pt->has_address = FALSE;
2934     pt->Vcb = Vcb;
2935     pt->parent = NULL;
2936     pt->paritem = NULL;
2937     pt->root = t->root;
2938     pt->new_address = 0;
2939     pt->has_new_address = FALSE;
2940     pt->updated_extents = FALSE;
2941     pt->size = pt->header.num_items * sizeof(internal_node);
2942     pt->uniqueness_determined = TRUE;
2943     pt->is_unique = TRUE;
2944     pt->list_entry_hash.Flink = NULL;
2945     pt->buf = NULL;
2946     InitializeListHead(&pt->itemlist);
2947 
2948     InsertTailList(&Vcb->trees, &pt->list_entry);
2949 
2950     td = ExAllocateFromPagedLookasideList(&Vcb->tree_data_lookaside);
2951     if (!td) {
2952         ERR("out of memory\n");
2953         return STATUS_INSUFFICIENT_RESOURCES;
2954     }
2955 
2956     get_first_item(t, &td->key);
2957     td->ignore = FALSE;
2958     td->inserted = FALSE;
2959     td->treeholder.address = 0;
2960     td->treeholder.generation = Vcb->superblock.generation;
2961     td->treeholder.tree = t;
2962     InsertTailList(&pt->itemlist, &td->list_entry);
2963     t->paritem = td;
2964 
2965     td = ExAllocateFromPagedLookasideList(&Vcb->tree_data_lookaside);
2966     if (!td) {
2967         ERR("out of memory\n");
2968         return STATUS_INSUFFICIENT_RESOURCES;
2969     }
2970 
2971     td->key = newfirstitem->key;
2972     td->ignore = FALSE;
2973     td->inserted = FALSE;
2974     td->treeholder.address = 0;
2975     td->treeholder.generation = Vcb->superblock.generation;
2976     td->treeholder.tree = nt;
2977     InsertTailList(&pt->itemlist, &td->list_entry);
2978     nt->paritem = td;
2979 
2980     pt->write = TRUE;
2981 
2982     t->root->treeholder.tree = pt;
2983 
2984     t->parent = pt;
2985     nt->parent = pt;
2986 
2987 #ifdef DEBUG_PARANOID
2988     if (t->parent && t->parent->header.level <= t->header.level) int3;
2989     if (nt->parent && nt->parent->header.level <= nt->header.level) int3;
2990 #endif
2991 
2992 end:
2993     t->root->root_item.bytes_used += Vcb->superblock.node_size;
2994 
2995     return STATUS_SUCCESS;
2996 }
2997 
2998 static NTSTATUS split_tree(device_extension* Vcb, tree* t) {
2999     LIST_ENTRY* le;
3000     UINT32 size, ds, numitems;
3001 
3002     size = 0;
3003     numitems = 0;
3004 
3005     // FIXME - naïve implementation: maximizes number of filled trees
3006 
3007     le = t->itemlist.Flink;
3008     while (le != &t->itemlist) {
3009         tree_data* td = CONTAINING_RECORD(le, tree_data, list_entry);
3010 
3011         if (!td->ignore) {
3012             if (t->header.level == 0)
3013                 ds = sizeof(leaf_node) + td->size;
3014             else
3015                 ds = sizeof(internal_node);
3016 
3017             if (numitems == 0 && ds > Vcb->superblock.node_size - sizeof(tree_header)) {
3018                 ERR("(%llx,%x,%llx) in tree %llx is too large (%x > %x)\n",
3019                     td->key.obj_id, td->key.obj_type, td->key.offset, t->root->id,
3020                     ds, Vcb->superblock.node_size - sizeof(tree_header));
3021                 return STATUS_INTERNAL_ERROR;
3022             }
3023 
3024             // FIXME - move back if previous item was deleted item with same key
3025             if (size + ds > Vcb->superblock.node_size - sizeof(tree_header))
3026                 return split_tree_at(Vcb, t, td, numitems, size);
3027 
3028             size += ds;
3029             numitems++;
3030         }
3031 
3032         le = le->Flink;
3033     }
3034 
3035     return STATUS_SUCCESS;
3036 }
3037 
3038 BOOL is_tree_unique(device_extension* Vcb, tree* t, PIRP Irp) {
3039     KEY searchkey;
3040     traverse_ptr tp;
3041     NTSTATUS Status;
3042     BOOL ret = FALSE;
3043     EXTENT_ITEM* ei;
3044     UINT8* type;
3045 
3046     if (t->uniqueness_determined)
3047         return t->is_unique;
3048 
3049     if (t->parent && !is_tree_unique(Vcb, t->parent, Irp))
3050         goto end;
3051 
3052     if (t->has_address) {
3053         searchkey.obj_id = t->header.address;
3054         searchkey.obj_type = Vcb->superblock.incompat_flags & BTRFS_INCOMPAT_FLAGS_SKINNY_METADATA ? TYPE_METADATA_ITEM : TYPE_EXTENT_ITEM;
3055         searchkey.offset = 0xffffffffffffffff;
3056 
3057         Status = find_item(Vcb, Vcb->extent_root, &tp, &searchkey, FALSE, Irp);
3058         if (!NT_SUCCESS(Status)) {
3059             ERR("error - find_item returned %08x\n", Status);
3060             goto end;
3061         }
3062 
3063         if (tp.item->key.obj_id != t->header.address || (tp.item->key.obj_type != TYPE_METADATA_ITEM && tp.item->key.obj_type != TYPE_EXTENT_ITEM))
3064             goto end;
3065 
3066         if (tp.item->key.obj_type == TYPE_EXTENT_ITEM && tp.item->size == sizeof(EXTENT_ITEM_V0))
3067             goto end;
3068 
3069         if (tp.item->size < sizeof(EXTENT_ITEM))
3070             goto end;
3071 
3072         ei = (EXTENT_ITEM*)tp.item->data;
3073 
3074         if (ei->refcount > 1)
3075             goto end;
3076 
3077         if (tp.item->key.obj_type == TYPE_EXTENT_ITEM && ei->flags & EXTENT_ITEM_TREE_BLOCK) {
3078             EXTENT_ITEM2* ei2;
3079 
3080             if (tp.item->size < sizeof(EXTENT_ITEM) + sizeof(EXTENT_ITEM2))
3081                 goto end;
3082 
3083             ei2 = (EXTENT_ITEM2*)&ei[1];
3084             type = (UINT8*)&ei2[1];
3085         } else
3086             type = (UINT8*)&ei[1];
3087 
3088         if (type >= tp.item->data + tp.item->size || *type != TYPE_TREE_BLOCK_REF)
3089             goto end;
3090     }
3091 
3092     ret = TRUE;
3093 
3094 end:
3095     t->is_unique = ret;
3096     t->uniqueness_determined = TRUE;
3097 
3098     return ret;
3099 }
3100 
3101 static NTSTATUS try_tree_amalgamate(device_extension* Vcb, tree* t, BOOL* done, BOOL* done_deletions, PIRP Irp, LIST_ENTRY* rollback) {
3102     LIST_ENTRY* le;
3103     tree_data* nextparitem = NULL;
3104     NTSTATUS Status;
3105     tree *next_tree, *par;
3106     BOOL loaded;
3107 
3108     *done = FALSE;
3109 
3110     TRACE("trying to amalgamate tree in root %llx, level %x (size %u)\n", t->root->id, t->header.level, t->size);
3111 
3112     // FIXME - doesn't capture everything, as it doesn't ascend
3113     le = t->paritem->list_entry.Flink;
3114     while (le != &t->parent->itemlist) {
3115         tree_data* td = CONTAINING_RECORD(le, tree_data, list_entry);
3116 
3117         if (!td->ignore) {
3118             nextparitem = td;
3119             break;
3120         }
3121 
3122         le = le->Flink;
3123     }
3124 
3125     if (!nextparitem)
3126         return STATUS_SUCCESS;
3127 
3128     TRACE("nextparitem: key = %llx,%x,%llx\n", nextparitem->key.obj_id, nextparitem->key.obj_type, nextparitem->key.offset);
3129 
3130     Status = do_load_tree(Vcb, &nextparitem->treeholder, t->root, t->parent, nextparitem, &loaded, NULL);
3131     if (!NT_SUCCESS(Status)) {
3132         ERR("do_load_tree returned %08x\n", Status);
3133         return Status;
3134     }
3135 
3136     if (!is_tree_unique(Vcb, nextparitem->treeholder.tree, Irp))
3137         return STATUS_SUCCESS;
3138 
3139     next_tree = nextparitem->treeholder.tree;
3140 
3141     if (!next_tree->updated_extents && next_tree->has_address) {
3142         Status = update_tree_extents(Vcb, next_tree, Irp, rollback);
3143         if (!NT_SUCCESS(Status)) {
3144             ERR("update_tree_extents returned %08x\n", Status);
3145             return Status;
3146         }
3147     }
3148 
3149     if (t->size + next_tree->size <= Vcb->superblock.node_size - sizeof(tree_header)) {
3150         // merge two trees into one
3151 
3152         t->header.num_items += next_tree->header.num_items;
3153         t->size += next_tree->size;
3154 
3155         if (next_tree->header.level > 0) {
3156             le = next_tree->itemlist.Flink;
3157 
3158             while (le != &next_tree->itemlist) {
3159                 tree_data* td2 = CONTAINING_RECORD(le, tree_data, list_entry);
3160 
3161                 if (td2->treeholder.tree) {
3162                     td2->treeholder.tree->parent = t;
3163 #ifdef DEBUG_PARANOID
3164                     if (td2->treeholder.tree->parent && td2->treeholder.tree->parent->header.level <= td2->treeholder.tree->header.level) int3;
3165 #endif
3166                 }
3167 
3168                 td2->inserted = TRUE;
3169                 le = le->Flink;
3170             }
3171         } else {
3172             le = next_tree->itemlist.Flink;
3173 
3174             while (le != &next_tree->itemlist) {
3175                 tree_data* td2 = CONTAINING_RECORD(le, tree_data, list_entry);
3176 
3177                 if (!td2->inserted && td2->data) {
3178                     UINT8* data = ExAllocatePoolWithTag(PagedPool, td2->size, ALLOC_TAG);
3179 
3180                     if (!data) {
3181                         ERR("out of memory\n");
3182                         return STATUS_INSUFFICIENT_RESOURCES;
3183                     }
3184 
3185                     RtlCopyMemory(data, td2->data, td2->size);
3186                     td2->data = data;
3187                     td2->inserted = TRUE;
3188                 }
3189 
3190                 le = le->Flink;
3191             }
3192         }
3193 
3194         t->itemlist.Blink->Flink = next_tree->itemlist.Flink;
3195         t->itemlist.Blink->Flink->Blink = t->itemlist.Blink;
3196         t->itemlist.Blink = next_tree->itemlist.Blink;
3197         t->itemlist.Blink->Flink = &t->itemlist;
3198 
3199         next_tree->itemlist.Flink = next_tree->itemlist.Blink = &next_tree->itemlist;
3200 
3201         next_tree->header.num_items = 0;
3202         next_tree->size = 0;
3203 
3204         if (next_tree->has_new_address) { // delete associated EXTENT_ITEM
3205             Status = reduce_tree_extent(Vcb, next_tree->new_address, next_tree, next_tree->parent->header.tree_id, next_tree->header.level, Irp, rollback);
3206 
3207             if (!NT_SUCCESS(Status)) {
3208                 ERR("reduce_tree_extent returned %08x\n", Status);
3209                 return Status;
3210             }
3211         } else if (next_tree->has_address) {
3212             Status = reduce_tree_extent(Vcb, next_tree->header.address, next_tree, next_tree->parent->header.tree_id, next_tree->header.level, Irp, rollback);
3213 
3214             if (!NT_SUCCESS(Status)) {
3215                 ERR("reduce_tree_extent returned %08x\n", Status);
3216                 return Status;
3217             }
3218         }
3219 
3220         if (!nextparitem->ignore) {
3221             nextparitem->ignore = TRUE;
3222             next_tree->parent->header.num_items--;
3223             next_tree->parent->size -= sizeof(internal_node);
3224 
3225             *done_deletions = TRUE;
3226         }
3227 
3228         par = next_tree->parent;
3229         while (par) {
3230             par->write = TRUE;
3231             par = par->parent;
3232         }
3233 
3234         RemoveEntryList(&nextparitem->list_entry);
3235         ExFreePool(next_tree->paritem);
3236         next_tree->paritem = NULL;
3237 
3238         next_tree->root->root_item.bytes_used -= Vcb->superblock.node_size;
3239 
3240         free_tree(next_tree);
3241 
3242         *done = TRUE;
3243     } else {
3244         // rebalance by moving items from second tree into first
3245         ULONG avg_size = (t->size + next_tree->size) / 2;
3246         KEY firstitem = {0, 0, 0};
3247         BOOL changed = FALSE;
3248 
3249         TRACE("attempting rebalance\n");
3250 
3251         le = next_tree->itemlist.Flink;
3252         while (le != &next_tree->itemlist && t->size < avg_size && next_tree->header.num_items > 1) {
3253             tree_data* td = CONTAINING_RECORD(le, tree_data, list_entry);
3254             ULONG size;
3255 
3256             if (!td->ignore) {
3257                 if (next_tree->header.level == 0)
3258                     size = sizeof(leaf_node) + td->size;
3259                 else
3260                     size = sizeof(internal_node);
3261             } else
3262                 size = 0;
3263 
3264             if (t->size + size < Vcb->superblock.node_size - sizeof(tree_header)) {
3265                 RemoveEntryList(&td->list_entry);
3266                 InsertTailList(&t->itemlist, &td->list_entry);
3267 
3268                 if (next_tree->header.level > 0 && td->treeholder.tree) {
3269                     td->treeholder.tree->parent = t;
3270 #ifdef DEBUG_PARANOID
3271                     if (td->treeholder.tree->parent && td->treeholder.tree->parent->header.level <= td->treeholder.tree->header.level) int3;
3272 #endif
3273                 } else if (next_tree->header.level == 0 && !td->inserted && td->size > 0) {
3274                     UINT8* data = ExAllocatePoolWithTag(PagedPool, td->size, ALLOC_TAG);
3275 
3276                     if (!data) {
3277                         ERR("out of memory\n");
3278                         return STATUS_INSUFFICIENT_RESOURCES;
3279                     }
3280 
3281                     RtlCopyMemory(data, td->data, td->size);
3282                     td->data = data;
3283                 }
3284 
3285                 td->inserted = TRUE;
3286 
3287                 if (!td->ignore) {
3288                     next_tree->size -= size;
3289                     t->size += size;
3290                     next_tree->header.num_items--;
3291                     t->header.num_items++;
3292                 }
3293 
3294                 changed = TRUE;
3295             } else
3296                 break;
3297 
3298             le = next_tree->itemlist.Flink;
3299         }
3300 
3301         le = next_tree->itemlist.Flink;
3302         while (le != &next_tree->itemlist) {
3303             tree_data* td = CONTAINING_RECORD(le, tree_data, list_entry);
3304 
3305             if (!td->ignore) {
3306                 firstitem = td->key;
3307                 break;
3308             }
3309 
3310             le = le->Flink;
3311         }
3312 
3313         // FIXME - once ascension is working, make this work with parent's parent, etc.
3314         if (next_tree->paritem)
3315             next_tree->paritem->key = firstitem;
3316 
3317         par = next_tree;
3318         while (par) {
3319             par->write = TRUE;
3320             par = par->parent;
3321         }
3322 
3323         if (changed)
3324             *done = TRUE;
3325     }
3326 
3327     return STATUS_SUCCESS;
3328 }
3329 
3330 static NTSTATUS update_extent_level(device_extension* Vcb, UINT64 address, tree* t, UINT8 level, PIRP Irp) {
3331     KEY searchkey;
3332     traverse_ptr tp;
3333     NTSTATUS Status;
3334 
3335     if (Vcb->superblock.incompat_flags & BTRFS_INCOMPAT_FLAGS_SKINNY_METADATA) {
3336         searchkey.obj_id = address;
3337         searchkey.obj_type = TYPE_METADATA_ITEM;
3338         searchkey.offset = t->header.level;
3339 
3340         Status = find_item(Vcb, Vcb->extent_root, &tp, &searchkey, FALSE, Irp);
3341         if (!NT_SUCCESS(Status)) {
3342             ERR("error - find_item returned %08x\n", Status);
3343             return Status;
3344         }
3345 
3346         if (!keycmp(tp.item->key, searchkey)) {
3347             EXTENT_ITEM_SKINNY_METADATA* eism;
3348 
3349             if (tp.item->size > 0) {
3350                 eism = ExAllocatePoolWithTag(PagedPool, tp.item->size, ALLOC_TAG);
3351 
3352                 if (!eism) {
3353                     ERR("out of memory\n");
3354                     return STATUS_INSUFFICIENT_RESOURCES;
3355                 }
3356 
3357                 RtlCopyMemory(eism, tp.item->data, tp.item->size);
3358             } else
3359                 eism = NULL;
3360 
3361             Status = delete_tree_item(Vcb, &tp);
3362             if (!NT_SUCCESS(Status)) {
3363                 ERR("delete_tree_item returned %08x\n", Status);
3364                 if (eism) ExFreePool(eism);
3365                 return Status;
3366             }
3367 
3368             Status = insert_tree_item(Vcb, Vcb->extent_root, address, TYPE_METADATA_ITEM, level, eism, tp.item->size, NULL, Irp);
3369             if (!NT_SUCCESS(Status)) {
3370                 ERR("insert_tree_item returned %08x\n", Status);
3371                 if (eism) ExFreePool(eism);
3372                 return Status;
3373             }
3374 
3375             return STATUS_SUCCESS;
3376         }
3377     }
3378 
3379     searchkey.obj_id = address;
3380     searchkey.obj_type = TYPE_EXTENT_ITEM;
3381     searchkey.offset = 0xffffffffffffffff;
3382 
3383     Status = find_item(Vcb, Vcb->extent_root, &tp, &searchkey, FALSE, Irp);
3384     if (!NT_SUCCESS(Status)) {
3385         ERR("error - find_item returned %08x\n", Status);
3386         return Status;
3387     }
3388 
3389     if (tp.item->key.obj_id == searchkey.obj_id && tp.item->key.obj_type == searchkey.obj_type) {
3390         EXTENT_ITEM_TREE* eit;
3391 
3392         if (tp.item->size < sizeof(EXTENT_ITEM_TREE)) {
3393             ERR("(%llx,%x,%llx) was %u bytes, expected at least %u\n", tp.item->key.obj_id, tp.item->key.obj_type, tp.item->key.offset, tp.item->size, sizeof(EXTENT_ITEM_TREE));
3394             return STATUS_INTERNAL_ERROR;
3395         }
3396 
3397         eit = ExAllocatePoolWithTag(PagedPool, tp.item->size, ALLOC_TAG);
3398 
3399         if (!eit) {
3400             ERR("out of memory\n");
3401             return STATUS_INSUFFICIENT_RESOURCES;
3402         }
3403 
3404         RtlCopyMemory(eit, tp.item->data, tp.item->size);
3405 
3406         Status = delete_tree_item(Vcb, &tp);
3407         if (!NT_SUCCESS(Status)) {
3408             ERR("delete_tree_item returned %08x\n", Status);
3409             ExFreePool(eit);
3410             return Status;
3411         }
3412 
3413         eit->level = level;
3414 
3415         Status = insert_tree_item(Vcb, Vcb->extent_root, tp.item->key.obj_id, tp.item->key.obj_type, tp.item->key.offset, eit, tp.item->size, NULL, Irp);
3416         if (!NT_SUCCESS(Status)) {
3417             ERR("insert_tree_item returned %08x\n", Status);
3418             ExFreePool(eit);
3419             return Status;
3420         }
3421 
3422         return STATUS_SUCCESS;
3423     }
3424 
3425     ERR("could not find EXTENT_ITEM for address %llx\n", address);
3426 
3427     return STATUS_INTERNAL_ERROR;
3428 }
3429 
3430 static NTSTATUS update_tree_extents_recursive(device_extension* Vcb, tree* t, PIRP Irp, LIST_ENTRY* rollback) {
3431     NTSTATUS Status;
3432 
3433     if (t->parent && !t->parent->updated_extents && t->parent->has_address) {
3434         Status = update_tree_extents_recursive(Vcb, t->parent, Irp, rollback);
3435         if (!NT_SUCCESS(Status))
3436             return Status;
3437     }
3438 
3439     Status = update_tree_extents(Vcb, t, Irp, rollback);
3440     if (!NT_SUCCESS(Status)) {
3441         ERR("update_tree_extents returned %08x\n", Status);
3442         return Status;
3443     }
3444 
3445     return STATUS_SUCCESS;
3446 }
3447 
3448 static NTSTATUS do_splits(device_extension* Vcb, PIRP Irp, LIST_ENTRY* rollback) {
3449     ULONG level, max_level;
3450     UINT32 min_size;
3451     BOOL empty, done_deletions = FALSE;
3452     NTSTATUS Status;
3453     tree* t;
3454 
3455     TRACE("(%p)\n", Vcb);
3456 
3457     max_level = 0;
3458 
3459     for (level = 0; level <= 255; level++) {
3460         LIST_ENTRY *le, *nextle;
3461 
3462         empty = TRUE;
3463 
3464         TRACE("doing level %u\n", level);
3465 
3466         le = Vcb->trees.Flink;
3467 
3468         while (le != &Vcb->trees) {
3469             t = CONTAINING_RECORD(le, tree, list_entry);
3470 
3471             nextle = le->Flink;
3472 
3473             if (t->write && t->header.level == level) {
3474                 empty = FALSE;
3475 
3476                 if (t->header.num_items == 0) {
3477                     if (t->parent) {
3478                         done_deletions = TRUE;
3479 
3480                         TRACE("deleting tree in root %llx\n", t->root->id);
3481 
3482                         t->root->root_item.bytes_used -= Vcb->superblock.node_size;
3483 
3484                         if (t->has_new_address) { // delete associated EXTENT_ITEM
3485                             Status = reduce_tree_extent(Vcb, t->new_address, t, t->parent->header.tree_id, t->header.level, Irp, rollback);
3486 
3487                             if (!NT_SUCCESS(Status)) {
3488                                 ERR("reduce_tree_extent returned %08x\n", Status);
3489                                 return Status;
3490                             }
3491 
3492                             t->has_new_address = FALSE;
3493                         } else if (t->has_address) {
3494                             Status = reduce_tree_extent(Vcb,t->header.address, t, t->parent->header.tree_id, t->header.level, Irp, rollback);
3495 
3496                             if (!NT_SUCCESS(Status)) {
3497                                 ERR("reduce_tree_extent returned %08x\n", Status);
3498                                 return Status;
3499                             }
3500 
3501                             t->has_address = FALSE;
3502                         }
3503 
3504                         if (!t->paritem->ignore) {
3505                             t->paritem->ignore = TRUE;
3506                             t->parent->header.num_items--;
3507                             t->parent->size -= sizeof(internal_node);
3508                         }
3509 
3510                         RemoveEntryList(&t->paritem->list_entry);
3511                         ExFreePool(t->paritem);
3512                         t->paritem = NULL;
3513 
3514                         free_tree(t);
3515                     } else if (t->header.level != 0) {
3516                         if (t->has_new_address) {
3517                             Status = update_extent_level(Vcb, t->new_address, t, 0, Irp);
3518 
3519                             if (!NT_SUCCESS(Status)) {
3520                                 ERR("update_extent_level returned %08x\n", Status);
3521                                 return Status;
3522                             }
3523                         }
3524 
3525                         t->header.level = 0;
3526                     }
3527                 } else if (t->size > Vcb->superblock.node_size - sizeof(tree_header)) {
3528                     TRACE("splitting overlarge tree (%x > %x)\n", t->size, Vcb->superblock.node_size - sizeof(tree_header));
3529 
3530                     if (!t->updated_extents && t->has_address) {
3531                         Status = update_tree_extents_recursive(Vcb, t, Irp, rollback);
3532                         if (!NT_SUCCESS(Status)) {
3533                             ERR("update_tree_extents_recursive returned %08x\n", Status);
3534                             return Status;
3535                         }
3536                     }
3537 
3538                     Status = split_tree(Vcb, t);
3539 
3540                     if (!NT_SUCCESS(Status)) {
3541                         ERR("split_tree returned %08x\n", Status);
3542                         return Status;
3543                     }
3544                 }
3545             }
3546 
3547             le = nextle;
3548         }
3549 
3550         if (!empty) {
3551             max_level = level;
3552         } else {
3553             TRACE("nothing found for level %u\n", level);
3554             break;
3555         }
3556     }
3557 
3558     min_size = (Vcb->superblock.node_size - sizeof(tree_header)) / 2;
3559 
3560     for (level = 0; level <= max_level; level++) {
3561         LIST_ENTRY* le;
3562 
3563         le = Vcb->trees.Flink;
3564 
3565         while (le != &Vcb->trees) {
3566             t = CONTAINING_RECORD(le, tree, list_entry);
3567 
3568             if (t->write && t->header.level == level && t->header.num_items > 0 && t->parent && t->size < min_size &&
3569                 t->root->id != BTRFS_ROOT_FREE_SPACE && is_tree_unique(Vcb, t, Irp)) {
3570                 BOOL done;
3571 
3572                 do {
3573                     Status = try_tree_amalgamate(Vcb, t, &done, &done_deletions, Irp, rollback);
3574                     if (!NT_SUCCESS(Status)) {
3575                         ERR("try_tree_amalgamate returned %08x\n", Status);
3576                         return Status;
3577                     }
3578                 } while (done && t->size < min_size);
3579             }
3580 
3581             le = le->Flink;
3582         }
3583     }
3584 
3585     // simplify trees if top tree only has one entry
3586 
3587     if (done_deletions) {
3588         for (level = max_level; level > 0; level--) {
3589             LIST_ENTRY *le, *nextle;
3590 
3591             le = Vcb->trees.Flink;
3592             while (le != &Vcb->trees) {
3593                 nextle = le->Flink;
3594                 t = CONTAINING_RECORD(le, tree, list_entry);
3595 
3596                 if (t->write && t->header.level == level) {
3597                     if (!t->parent && t->header.num_items == 1) {
3598                         LIST_ENTRY* le2 = t->itemlist.Flink;
3599                         tree_data* td = NULL;
3600                         tree* child_tree = NULL;
3601 
3602                         while (le2 != &t->itemlist) {
3603                             td = CONTAINING_RECORD(le2, tree_data, list_entry);
3604                             if (!td->ignore)
3605                                 break;
3606                             le2 = le2->Flink;
3607                         }
3608 
3609                         TRACE("deleting top-level tree in root %llx with one item\n", t->root->id);
3610 
3611                         if (t->has_new_address) { // delete associated EXTENT_ITEM
3612                             Status = reduce_tree_extent(Vcb, t->new_address, t, t->header.tree_id, t->header.level, Irp, rollback);
3613 
3614                             if (!NT_SUCCESS(Status)) {
3615                                 ERR("reduce_tree_extent returned %08x\n", Status);
3616                                 return Status;
3617                             }
3618 
3619                             t->has_new_address = FALSE;
3620                         } else if (t->has_address) {
3621                             Status = reduce_tree_extent(Vcb,t->header.address, t, t->header.tree_id, t->header.level, Irp, rollback);
3622 
3623                             if (!NT_SUCCESS(Status)) {
3624                                 ERR("reduce_tree_extent returned %08x\n", Status);
3625                                 return Status;
3626                             }
3627 
3628                             t->has_address = FALSE;
3629                         }
3630 
3631                         if (!td->treeholder.tree) { // load first item if not already loaded
3632                             KEY searchkey = {0,0,0};
3633                             traverse_ptr tp;
3634 
3635                             Status = find_item(Vcb, t->root, &tp, &searchkey, FALSE, Irp);
3636                             if (!NT_SUCCESS(Status)) {
3637                                 ERR("error - find_item returned %08x\n", Status);
3638                                 return Status;
3639                             }
3640                         }
3641 
3642                         child_tree = td->treeholder.tree;
3643 
3644                         if (child_tree) {
3645                             child_tree->parent = NULL;
3646                             child_tree->paritem = NULL;
3647                         }
3648 
3649                         t->root->root_item.bytes_used -= Vcb->superblock.node_size;
3650 
3651                         free_tree(t);
3652 
3653                         if (child_tree)
3654                             child_tree->root->treeholder.tree = child_tree;
3655                     }
3656                 }
3657 
3658                 le = nextle;
3659             }
3660         }
3661     }
3662 
3663     return STATUS_SUCCESS;
3664 }
3665 
3666 static NTSTATUS remove_root_extents(device_extension* Vcb, root* r, tree_holder* th, UINT8 level, tree* parent, PIRP Irp, LIST_ENTRY* rollback) {
3667     NTSTATUS Status;
3668 
3669     if (!th->tree) {
3670         Status = load_tree(Vcb, th->address, r, &th->tree, th->generation, NULL);
3671 
3672         if (!NT_SUCCESS(Status)) {
3673             ERR("load_tree(%llx) returned %08x\n", th->address, Status);
3674             return Status;
3675         }
3676     }
3677 
3678     if (level > 0) {
3679         LIST_ENTRY* le = th->tree->itemlist.Flink;
3680 
3681         while (le != &th->tree->itemlist) {
3682             tree_data* td = CONTAINING_RECORD(le, tree_data, list_entry);
3683 
3684             if (!td->ignore) {
3685                 Status = remove_root_extents(Vcb, r, &td->treeholder, th->tree->header.level - 1, th->tree, Irp, rollback);
3686 
3687                 if (!NT_SUCCESS(Status)) {
3688                     ERR("remove_root_extents returned %08x\n", Status);
3689                     return Status;
3690                 }
3691             }
3692 
3693             le = le->Flink;
3694         }
3695     }
3696 
3697     if (th->tree && !th->tree->updated_extents && th->tree->has_address) {
3698         Status = update_tree_extents(Vcb, th->tree, Irp, rollback);
3699         if (!NT_SUCCESS(Status)) {
3700             ERR("update_tree_extents returned %08x\n", Status);
3701             return Status;
3702         }
3703     }
3704 
3705     if (!th->tree || th->tree->has_address) {
3706         Status = reduce_tree_extent(Vcb, th->address, NULL, parent ? parent->header.tree_id : r->id, level, Irp, rollback);
3707 
3708         if (!NT_SUCCESS(Status)) {
3709             ERR("reduce_tree_extent(%llx) returned %08x\n", th->address, Status);
3710             return Status;
3711         }
3712     }
3713 
3714     return STATUS_SUCCESS;
3715 }
3716 
3717 static NTSTATUS drop_root(device_extension* Vcb, root* r, PIRP Irp, LIST_ENTRY* rollback) {
3718     NTSTATUS Status;
3719     KEY searchkey;
3720     traverse_ptr tp;
3721 
3722     Status = remove_root_extents(Vcb, r, &r->treeholder, r->root_item.root_level, NULL, Irp, rollback);
3723     if (!NT_SUCCESS(Status)) {
3724         ERR("remove_root_extents returned %08x\n", Status);
3725         return Status;
3726     }
3727 
3728     // remove entries in uuid root (tree 9)
3729     if (Vcb->uuid_root) {
3730         RtlCopyMemory(&searchkey.obj_id, &r->root_item.uuid.uuid[0], sizeof(UINT64));
3731         searchkey.obj_type = TYPE_SUBVOL_UUID;
3732         RtlCopyMemory(&searchkey.offset, &r->root_item.uuid.uuid[sizeof(UINT64)], sizeof(UINT64));
3733 
3734         if (searchkey.obj_id != 0 || searchkey.offset != 0) {
3735             Status = find_item(Vcb, Vcb->uuid_root, &tp, &searchkey, FALSE, Irp);
3736             if (!NT_SUCCESS(Status)) {
3737                 WARN("find_item returned %08x\n", Status);
3738             } else {
3739                 if (!keycmp(tp.item->key, searchkey)) {
3740                     Status = delete_tree_item(Vcb, &tp);
3741                     if (!NT_SUCCESS(Status)) {
3742                         ERR("delete_tree_item returned %08x\n", Status);
3743                         return Status;
3744                     }
3745                 } else
3746                     WARN("could not find (%llx,%x,%llx) in uuid tree\n", searchkey.obj_id, searchkey.obj_type, searchkey.offset);
3747             }
3748         }
3749 
3750         if (r->root_item.rtransid > 0) {
3751             RtlCopyMemory(&searchkey.obj_id, &r->root_item.received_uuid.uuid[0], sizeof(UINT64));
3752             searchkey.obj_type = TYPE_SUBVOL_REC_UUID;
3753             RtlCopyMemory(&searchkey.offset, &r->root_item.received_uuid.uuid[sizeof(UINT64)], sizeof(UINT64));
3754 
3755             Status = find_item(Vcb, Vcb->uuid_root, &tp, &searchkey, FALSE, Irp);
3756             if (!NT_SUCCESS(Status))
3757                 WARN("find_item returned %08x\n", Status);
3758             else {
3759                 if (!keycmp(tp.item->key, searchkey)) {
3760                     if (tp.item->size == sizeof(UINT64)) {
3761                         UINT64* id = (UINT64*)tp.item->data;
3762 
3763                         if (*id == r->id) {
3764                             Status = delete_tree_item(Vcb, &tp);
3765                             if (!NT_SUCCESS(Status)) {
3766                                 ERR("delete_tree_item returned %08x\n", Status);
3767                                 return Status;
3768                             }
3769                         }
3770                     } else if (tp.item->size > sizeof(UINT64)) {
3771                         ULONG i;
3772                         UINT64* ids = (UINT64*)tp.item->data;
3773 
3774                         for (i = 0; i < tp.item->size / sizeof(UINT64); i++) {
3775                             if (ids[i] == r->id) {
3776                                 UINT64* ne;
3777 
3778                                 ne = ExAllocatePoolWithTag(PagedPool, tp.item->size - sizeof(UINT64), ALLOC_TAG);
3779                                 if (!ne) {
3780                                     ERR("out of memory\n");
3781                                     return STATUS_INSUFFICIENT_RESOURCES;
3782                                 }
3783 
3784                                 if (i > 0)
3785                                     RtlCopyMemory(ne, ids, sizeof(UINT64) * i);
3786 
3787                                 if ((i + 1) * sizeof(UINT64) < tp.item->size)
3788                                     RtlCopyMemory(&ne[i], &ids[i + 1], tp.item->size - ((i + 1) * sizeof(UINT64)));
3789 
3790                                 Status = delete_tree_item(Vcb, &tp);
3791                                 if (!NT_SUCCESS(Status)) {
3792                                     ERR("delete_tree_item returned %08x\n", Status);
3793                                     ExFreePool(ne);
3794                                     return Status;
3795                                 }
3796 
3797                                 Status = insert_tree_item(Vcb, Vcb->uuid_root, tp.item->key.obj_id, tp.item->key.obj_type, tp.item->key.offset,
3798                                                           ne, tp.item->size - sizeof(UINT64), NULL, Irp);
3799                                 if (!NT_SUCCESS(Status)) {
3800                                     ERR("insert_tree_item returned %08x\n", Status);
3801                                     ExFreePool(ne);
3802                                     return Status;
3803                                 }
3804 
3805                                 break;
3806                             }
3807                         }
3808                     }
3809                 } else
3810                     WARN("could not find (%llx,%x,%llx) in uuid tree\n", searchkey.obj_id, searchkey.obj_type, searchkey.offset);
3811             }
3812         }
3813     }
3814 
3815     // delete ROOT_ITEM
3816 
3817     searchkey.obj_id = r->id;
3818     searchkey.obj_type = TYPE_ROOT_ITEM;
3819     searchkey.offset = 0xffffffffffffffff;
3820 
3821     Status = find_item(Vcb, Vcb->root_root, &tp, &searchkey, FALSE, Irp);
3822     if (!NT_SUCCESS(Status)) {
3823         ERR("find_item returned %08x\n", Status);
3824         return Status;
3825     }
3826 
3827     if (tp.item->key.obj_id == searchkey.obj_id && tp.item->key.obj_type == searchkey.obj_type) {
3828         Status = delete_tree_item(Vcb, &tp);
3829 
3830         if (!NT_SUCCESS(Status)) {
3831             ERR("delete_tree_item returned %08x\n", Status);
3832             return Status;
3833         }
3834     } else
3835         WARN("could not find (%llx,%x,%llx) in root_root\n", searchkey.obj_id, searchkey.obj_type, searchkey.offset);
3836 
3837     // delete items in tree cache
3838 
3839     free_trees_root(Vcb, r);
3840 
3841     return STATUS_SUCCESS;
3842 }
3843 
3844 static NTSTATUS drop_roots(device_extension* Vcb, PIRP Irp, LIST_ENTRY* rollback) {
3845     LIST_ENTRY *le = Vcb->drop_roots.Flink, *le2;
3846     NTSTATUS Status;
3847 
3848     while (le != &Vcb->drop_roots) {
3849         root* r = CONTAINING_RECORD(le, root, list_entry);
3850 
3851         le2 = le->Flink;
3852 
3853         Status = drop_root(Vcb, r, Irp, rollback);
3854         if (!NT_SUCCESS(Status)) {
3855             ERR("drop_root(%llx) returned %08x\n", r->id, Status);
3856             return Status;
3857         }
3858 
3859         le = le2;
3860     }
3861 
3862     return STATUS_SUCCESS;
3863 }
3864 
3865 NTSTATUS update_dev_item(device_extension* Vcb, device* device, PIRP Irp) {
3866     KEY searchkey;
3867     traverse_ptr tp;
3868     DEV_ITEM* di;
3869     NTSTATUS Status;
3870 
3871     searchkey.obj_id = 1;
3872     searchkey.obj_type = TYPE_DEV_ITEM;
3873     searchkey.offset = device->devitem.dev_id;
3874 
3875     Status = find_item(Vcb, Vcb->chunk_root, &tp, &searchkey, FALSE, Irp);
3876     if (!NT_SUCCESS(Status)) {
3877         ERR("error - find_item returned %08x\n", Status);
3878         return Status;
3879     }
3880 
3881     if (keycmp(tp.item->key, searchkey)) {
3882         ERR("error - could not find DEV_ITEM for device %llx\n", device->devitem.dev_id);
3883         return STATUS_INTERNAL_ERROR;
3884     }
3885 
3886     Status = delete_tree_item(Vcb, &tp);
3887     if (!NT_SUCCESS(Status)) {
3888         ERR("delete_tree_item returned %08x\n", Status);
3889         return Status;
3890     }
3891 
3892     di = ExAllocatePoolWithTag(PagedPool, sizeof(DEV_ITEM), ALLOC_TAG);
3893     if (!di) {
3894         ERR("out of memory\n");
3895         return STATUS_INSUFFICIENT_RESOURCES;
3896     }
3897 
3898     RtlCopyMemory(di, &device->devitem, sizeof(DEV_ITEM));
3899 
3900     Status = insert_tree_item(Vcb, Vcb->chunk_root, 1, TYPE_DEV_ITEM, device->devitem.dev_id, di, sizeof(DEV_ITEM), NULL, Irp);
3901     if (!NT_SUCCESS(Status)) {
3902         ERR("insert_tree_item returned %08x\n", Status);
3903         ExFreePool(di);
3904         return Status;
3905     }
3906 
3907     return STATUS_SUCCESS;
3908 }
3909 
3910 static void regen_bootstrap(device_extension* Vcb) {
3911     sys_chunk* sc2;
3912     USHORT i = 0;
3913     LIST_ENTRY* le;
3914 
3915     i = 0;
3916     le = Vcb->sys_chunks.Flink;
3917     while (le != &Vcb->sys_chunks) {
3918         sc2 = CONTAINING_RECORD(le, sys_chunk, list_entry);
3919 
3920         TRACE("%llx,%x,%llx\n", sc2->key.obj_id, sc2->key.obj_type, sc2->key.offset);
3921 
3922         RtlCopyMemory(&Vcb->superblock.sys_chunk_array[i], &sc2->key, sizeof(KEY));
3923         i += sizeof(KEY);
3924 
3925         RtlCopyMemory(&Vcb->superblock.sys_chunk_array[i], sc2->data, sc2->size);
3926         i += sc2->size;
3927 
3928         le = le->Flink;
3929     }
3930 }
3931 
3932 static NTSTATUS add_to_bootstrap(device_extension* Vcb, UINT64 obj_id, UINT8 obj_type, UINT64 offset, void* data, UINT16 size) {
3933     sys_chunk* sc;
3934     LIST_ENTRY* le;
3935 
3936     if (Vcb->superblock.n + sizeof(KEY) + size > SYS_CHUNK_ARRAY_SIZE) {
3937         ERR("error - bootstrap is full\n");
3938         return STATUS_INTERNAL_ERROR;
3939     }
3940 
3941     sc = ExAllocatePoolWithTag(PagedPool, sizeof(sys_chunk), ALLOC_TAG);
3942     if (!sc) {
3943         ERR("out of memory\n");
3944         return STATUS_INSUFFICIENT_RESOURCES;
3945     }
3946 
3947     sc->key.obj_id = obj_id;
3948     sc->key.obj_type = obj_type;
3949     sc->key.offset = offset;
3950     sc->size = size;
3951     sc->data = ExAllocatePoolWithTag(PagedPool, sc->size, ALLOC_TAG);
3952     if (!sc->data) {
3953         ERR("out of memory\n");
3954         ExFreePool(sc);
3955         return STATUS_INSUFFICIENT_RESOURCES;
3956     }
3957 
3958     RtlCopyMemory(sc->data, data, sc->size);
3959 
3960     le = Vcb->sys_chunks.Flink;
3961     while (le != &Vcb->sys_chunks) {
3962         sys_chunk* sc2 = CONTAINING_RECORD(le, sys_chunk, list_entry);
3963 
3964         if (keycmp(sc2->key, sc->key) == 1)
3965             break;
3966 
3967         le = le->Flink;
3968     }
3969     InsertTailList(le, &sc->list_entry);
3970 
3971     Vcb->superblock.n += sizeof(KEY) + size;
3972 
3973     regen_bootstrap(Vcb);
3974 
3975     return STATUS_SUCCESS;
3976 }
3977 
3978 static NTSTATUS create_chunk(device_extension* Vcb, chunk* c, PIRP Irp) {
3979     CHUNK_ITEM* ci;
3980     CHUNK_ITEM_STRIPE* cis;
3981     BLOCK_GROUP_ITEM* bgi;
3982     UINT16 i, factor;
3983     NTSTATUS Status;
3984 
3985     ci = ExAllocatePoolWithTag(PagedPool, c->size, ALLOC_TAG);
3986     if (!ci) {
3987         ERR("out of memory\n");
3988         return STATUS_INSUFFICIENT_RESOURCES;
3989     }
3990 
3991     RtlCopyMemory(ci, c->chunk_item, c->size);
3992 
3993     Status = insert_tree_item(Vcb, Vcb->chunk_root, 0x100, TYPE_CHUNK_ITEM, c->offset, ci, c->size, NULL, Irp);
3994     if (!NT_SUCCESS(Status)) {
3995         ERR("insert_tree_item failed\n");
3996         ExFreePool(ci);
3997         return Status;
3998     }
3999 
4000     if (c->chunk_item->type & BLOCK_FLAG_SYSTEM) {
4001         Status = add_to_bootstrap(Vcb, 0x100, TYPE_CHUNK_ITEM, c->offset, ci, c->size);
4002         if (!NT_SUCCESS(Status)) {
4003             ERR("add_to_bootstrap returned %08x\n", Status);
4004             return Status;
4005         }
4006     }
4007 
4008     // add BLOCK_GROUP_ITEM to tree 2
4009 
4010     bgi = ExAllocatePoolWithTag(PagedPool, sizeof(BLOCK_GROUP_ITEM), ALLOC_TAG);
4011     if (!bgi) {
4012         ERR("out of memory\n");
4013         return STATUS_INSUFFICIENT_RESOURCES;
4014     }
4015 
4016     bgi->used = c->used;
4017     bgi->chunk_tree = 0x100;
4018     bgi->flags = c->chunk_item->type;
4019 
4020     Status = insert_tree_item(Vcb, Vcb->extent_root, c->offset, TYPE_BLOCK_GROUP_ITEM, c->chunk_item->size, bgi, sizeof(BLOCK_GROUP_ITEM), NULL, Irp);
4021     if (!NT_SUCCESS(Status)) {
4022         ERR("insert_tree_item failed\n");
4023         ExFreePool(bgi);
4024         return Status;
4025     }
4026 
4027     if (c->chunk_item->type & BLOCK_FLAG_RAID0)
4028         factor = c->chunk_item->num_stripes;
4029     else if (c->chunk_item->type & BLOCK_FLAG_RAID10)
4030         factor = c->chunk_item->num_stripes / c->chunk_item->sub_stripes;
4031     else if (c->chunk_item->type & BLOCK_FLAG_RAID5)
4032         factor = c->chunk_item->num_stripes - 1;
4033     else if (c->chunk_item->type & BLOCK_FLAG_RAID6)
4034         factor = c->chunk_item->num_stripes - 2;
4035     else // SINGLE, DUPLICATE, RAID1
4036         factor = 1;
4037 
4038     // add DEV_EXTENTs to tree 4
4039 
4040     cis = (CHUNK_ITEM_STRIPE*)&c->chunk_item[1];
4041 
4042     for (i = 0; i < c->chunk_item->num_stripes; i++) {
4043         DEV_EXTENT* de;
4044 
4045         de = ExAllocatePoolWithTag(PagedPool, sizeof(DEV_EXTENT), ALLOC_TAG);
4046         if (!de) {
4047             ERR("out of memory\n");
4048             return STATUS_INSUFFICIENT_RESOURCES;
4049         }
4050 
4051         de->chunktree = Vcb->chunk_root->id;
4052         de->objid = 0x100;
4053         de->address = c->offset;
4054         de->length = c->chunk_item->size / factor;
4055         de->chunktree_uuid = Vcb->chunk_root->treeholder.tree->header.chunk_tree_uuid;
4056 
4057         Status = insert_tree_item(Vcb, Vcb->dev_root, c->devices[i]->devitem.dev_id, TYPE_DEV_EXTENT, cis[i].offset, de, sizeof(DEV_EXTENT), NULL, Irp);
4058         if (!NT_SUCCESS(Status)) {
4059             ERR("insert_tree_item returned %08x\n", Status);
4060             ExFreePool(de);
4061             return Status;
4062         }
4063 
4064         // FIXME - no point in calling this twice for the same device
4065         Status = update_dev_item(Vcb, c->devices[i], Irp);
4066         if (!NT_SUCCESS(Status)) {
4067             ERR("update_dev_item returned %08x\n", Status);
4068             return Status;
4069         }
4070     }
4071 
4072     c->created = FALSE;
4073 
4074     return STATUS_SUCCESS;
4075 }
4076 
4077 static void remove_from_bootstrap(device_extension* Vcb, UINT64 obj_id, UINT8 obj_type, UINT64 offset) {
4078     sys_chunk* sc2;
4079     LIST_ENTRY* le;
4080 
4081     le = Vcb->sys_chunks.Flink;
4082     while (le != &Vcb->sys_chunks) {
4083         sc2 = CONTAINING_RECORD(le, sys_chunk, list_entry);
4084 
4085         if (sc2->key.obj_id == obj_id && sc2->key.obj_type == obj_type && sc2->key.offset == offset) {
4086             RemoveEntryList(&sc2->list_entry);
4087 
4088             Vcb->superblock.n -= sizeof(KEY) + sc2->size;
4089 
4090             ExFreePool(sc2->data);
4091             ExFreePool(sc2);
4092             regen_bootstrap(Vcb);
4093             return;
4094         }
4095 
4096         le = le->Flink;
4097     }
4098 }
4099 
4100 static NTSTATUS set_xattr(device_extension* Vcb, LIST_ENTRY* batchlist, root* subvol, UINT64 inode, char* name, UINT16 namelen,
4101                           UINT32 crc32, UINT8* data, UINT16 datalen) {
4102     NTSTATUS Status;
4103     UINT16 xasize;
4104     DIR_ITEM* xa;
4105 
4106     TRACE("(%p, %llx, %llx, %.*s, %08x, %p, %u)\n", Vcb, subvol->id, inode, namelen, name, crc32, data, datalen);
4107 
4108     xasize = (UINT16)offsetof(DIR_ITEM, name[0]) + namelen + datalen;
4109 
4110     xa = ExAllocatePoolWithTag(PagedPool, xasize, ALLOC_TAG);
4111     if (!xa) {
4112         ERR("out of memory\n");
4113         return STATUS_INSUFFICIENT_RESOURCES;
4114     }
4115 
4116     xa->key.obj_id = 0;
4117     xa->key.obj_type = 0;
4118     xa->key.offset = 0;
4119     xa->transid = Vcb->superblock.generation;
4120     xa->m = datalen;
4121     xa->n = namelen;
4122     xa->type = BTRFS_TYPE_EA;
4123     RtlCopyMemory(xa->name, name, namelen);
4124     RtlCopyMemory(xa->name + namelen, data, datalen);
4125 
4126     Status = insert_tree_item_batch(batchlist, Vcb, subvol, inode, TYPE_XATTR_ITEM, crc32, xa, xasize, Batch_SetXattr);
4127     if (!NT_SUCCESS(Status)) {
4128         ERR("insert_tree_item_batch returned %08x\n", Status);
4129         ExFreePool(xa);
4130         return Status;
4131     }
4132 
4133     return STATUS_SUCCESS;
4134 }
4135 
4136 static NTSTATUS delete_xattr(device_extension* Vcb, LIST_ENTRY* batchlist, root* subvol, UINT64 inode, char* name,
4137                              UINT16 namelen, UINT32 crc32) {
4138     NTSTATUS Status;
4139     UINT16 xasize;
4140     DIR_ITEM* xa;
4141 
4142     TRACE("(%p, %llx, %llx, %.*s, %08x)\n", Vcb, subvol->id, inode, namelen, name, crc32);
4143 
4144     xasize = (UINT16)offsetof(DIR_ITEM, name[0]) + namelen;
4145 
4146     xa = ExAllocatePoolWithTag(PagedPool, xasize, ALLOC_TAG);
4147     if (!xa) {
4148         ERR("out of memory\n");
4149         return STATUS_INSUFFICIENT_RESOURCES;
4150     }
4151 
4152     xa->key.obj_id = 0;
4153     xa->key.obj_type = 0;
4154     xa->key.offset = 0;
4155     xa->transid = Vcb->superblock.generation;
4156     xa->m = 0;
4157     xa->n = namelen;
4158     xa->type = BTRFS_TYPE_EA;
4159     RtlCopyMemory(xa->name, name, namelen);
4160 
4161     Status = insert_tree_item_batch(batchlist, Vcb, subvol, inode, TYPE_XATTR_ITEM, crc32, xa, xasize, Batch_DeleteXattr);
4162     if (!NT_SUCCESS(Status)) {
4163         ERR("insert_tree_item_batch returned %08x\n", Status);
4164         ExFreePool(xa);
4165         return Status;
4166     }
4167 
4168     return STATUS_SUCCESS;
4169 }
4170 
4171 static NTSTATUS insert_sparse_extent(fcb* fcb, LIST_ENTRY* batchlist, UINT64 start, UINT64 length) {
4172     NTSTATUS Status;
4173     EXTENT_DATA* ed;
4174     EXTENT_DATA2* ed2;
4175 
4176     TRACE("((%llx, %llx), %llx, %llx)\n", fcb->subvol->id, fcb->inode, start, length);
4177 
4178     ed = ExAllocatePoolWithTag(PagedPool, sizeof(EXTENT_DATA) - 1 + sizeof(EXTENT_DATA2), ALLOC_TAG);
4179     if (!ed) {
4180         ERR("out of memory\n");
4181         return STATUS_INSUFFICIENT_RESOURCES;
4182     }
4183 
4184     ed->generation = fcb->Vcb->superblock.generation;
4185     ed->decoded_size = length;
4186     ed->compression = BTRFS_COMPRESSION_NONE;
4187     ed->encryption = BTRFS_ENCRYPTION_NONE;
4188     ed->encoding = BTRFS_ENCODING_NONE;
4189     ed->type = EXTENT_TYPE_REGULAR;
4190 
4191     ed2 = (EXTENT_DATA2*)ed->data;
4192     ed2->address = 0;
4193     ed2->size = 0;
4194     ed2->offset = 0;
4195     ed2->num_bytes = length;
4196 
4197     Status = insert_tree_item_batch(batchlist, fcb->Vcb, fcb->subvol, fcb->inode, TYPE_EXTENT_DATA, start, ed, sizeof(EXTENT_DATA) - 1 + sizeof(EXTENT_DATA2), Batch_Insert);
4198     if (!NT_SUCCESS(Status)) {
4199         ERR("insert_tree_item_batch returned %08x\n", Status);
4200         ExFreePool(ed);
4201         return Status;
4202     }
4203 
4204     return STATUS_SUCCESS;
4205 }
4206 
4207 #ifdef _MSC_VER
4208 #pragma warning(push)
4209 #pragma warning(suppress: 28194)
4210 #endif
4211 NTSTATUS insert_tree_item_batch(LIST_ENTRY* batchlist, device_extension* Vcb, root* r, UINT64 objid, UINT8 objtype, UINT64 offset,
4212                                 _In_opt_ _When_(return >= 0, __drv_aliasesMem) void* data, UINT16 datalen, enum batch_operation operation) {
4213     LIST_ENTRY* le;
4214     batch_root* br = NULL;
4215     batch_item* bi;
4216 
4217     le = batchlist->Flink;
4218     while (le != batchlist) {
4219         batch_root* br2 = CONTAINING_RECORD(le, batch_root, list_entry);
4220 
4221         if (br2->r == r) {
4222             br = br2;
4223             break;
4224         }
4225 
4226         le = le->Flink;
4227     }
4228 
4229     if (!br) {
4230         br = ExAllocatePoolWithTag(PagedPool, sizeof(batch_root), ALLOC_TAG);
4231         if (!br) {
4232             ERR("out of memory\n");
4233             return STATUS_INSUFFICIENT_RESOURCES;
4234         }
4235 
4236         br->r = r;
4237         InitializeListHead(&br->items);
4238         InsertTailList(batchlist, &br->list_entry);
4239     }
4240 
4241     bi = ExAllocateFromPagedLookasideList(&Vcb->batch_item_lookaside);
4242     if (!bi) {
4243         ERR("out of memory\n");
4244         return STATUS_INSUFFICIENT_RESOURCES;
4245     }
4246 
4247     bi->key.obj_id = objid;
4248     bi->key.obj_type = objtype;
4249     bi->key.offset = offset;
4250     bi->data = data;
4251     bi->datalen = datalen;
4252     bi->operation = operation;
4253 
4254     le = br->items.Blink;
4255     while (le != &br->items) {
4256         batch_item* bi2 = CONTAINING_RECORD(le, batch_item, list_entry);
4257         int cmp = keycmp(bi2->key, bi->key);
4258 
4259         if (cmp == -1 || (cmp == 0 && bi->operation >= bi2->operation)) {
4260             InsertHeadList(&bi2->list_entry, &bi->list_entry);
4261             return STATUS_SUCCESS;
4262         }
4263 
4264         le = le->Blink;
4265     }
4266 
4267     InsertHeadList(&br->items, &bi->list_entry);
4268 
4269     return STATUS_SUCCESS;
4270 }
4271 #ifdef _MSC_VER
4272 #pragma warning(pop)
4273 #endif
4274 
4275 typedef struct {
4276     UINT64 address;
4277     UINT64 length;
4278     UINT64 offset;
4279     BOOL changed;
4280     chunk* chunk;
4281     UINT64 skip_start;
4282     UINT64 skip_end;
4283     LIST_ENTRY list_entry;
4284 } extent_range;
4285 
4286 static void rationalize_extents(fcb* fcb, PIRP Irp) {
4287     LIST_ENTRY* le;
4288     LIST_ENTRY extent_ranges;
4289     extent_range* er;
4290     BOOL changed = FALSE, truncating = FALSE;
4291     UINT32 num_extents = 0;
4292 
4293     InitializeListHead(&extent_ranges);
4294 
4295     le = fcb->extents.Flink;
4296     while (le != &fcb->extents) {
4297         extent* ext = CONTAINING_RECORD(le, extent, list_entry);
4298 
4299         if ((ext->extent_data.type == EXTENT_TYPE_REGULAR || ext->extent_data.type == EXTENT_TYPE_PREALLOC) && ext->extent_data.compression == BTRFS_COMPRESSION_NONE && ext->unique) {
4300             EXTENT_DATA2* ed2 = (EXTENT_DATA2*)ext->extent_data.data;
4301 
4302             if (ed2->size != 0) {
4303                 LIST_ENTRY* le2;
4304 
4305                 le2 = extent_ranges.Flink;
4306                 while (le2 != &extent_ranges) {
4307                     extent_range* er2 = CONTAINING_RECORD(le2, extent_range, list_entry);
4308 
4309                     if (er2->address == ed2->address) {
4310                         er2->skip_start = min(er2->skip_start, ed2->offset);
4311                         er2->skip_end = min(er2->skip_end, ed2->size - ed2->offset - ed2->num_bytes);
4312                         goto cont;
4313                     } else if (er2->address > ed2->address)
4314                         break;
4315 
4316                     le2 = le2->Flink;
4317                 }
4318 
4319                 er = ExAllocatePoolWithTag(PagedPool, sizeof(extent_range), ALLOC_TAG); // FIXME - should be from lookaside?
4320                 if (!er) {
4321                     ERR("out of memory\n");
4322                     goto end;
4323                 }
4324 
4325                 er->address = ed2->address;
4326                 er->length = ed2->size;
4327                 er->offset = ext->offset - ed2->offset;
4328                 er->changed = FALSE;
4329                 er->chunk = NULL;
4330                 er->skip_start = ed2->offset;
4331                 er->skip_end = ed2->size - ed2->offset - ed2->num_bytes;
4332 
4333                 if (er->skip_start != 0 || er->skip_end != 0)
4334                     truncating = TRUE;
4335 
4336                 InsertHeadList(le2->Blink, &er->list_entry);
4337                 num_extents++;
4338             }
4339         }
4340 
4341 cont:
4342         le = le->Flink;
4343     }
4344 
4345     if (num_extents == 0 || (num_extents == 1 && !truncating))
4346         goto end;
4347 
4348     le = extent_ranges.Flink;
4349     while (le != &extent_ranges) {
4350         er = CONTAINING_RECORD(le, extent_range, list_entry);
4351 
4352         if (!er->chunk) {
4353             LIST_ENTRY* le2;
4354 
4355             er->chunk = get_chunk_from_address(fcb->Vcb, er->address);
4356 
4357             if (!er->chunk) {
4358                 ERR("get_chunk_from_address(%llx) failed\n", er->address);
4359                 goto end;
4360             }
4361 
4362             le2 = le->Flink;
4363             while (le2 != &extent_ranges) {
4364                 extent_range* er2 = CONTAINING_RECORD(le2, extent_range, list_entry);
4365 
4366                 if (!er2->chunk && er2->address >= er->chunk->offset && er2->address < er->chunk->offset + er->chunk->chunk_item->size)
4367                     er2->chunk = er->chunk;
4368 
4369                 le2 = le2->Flink;
4370             }
4371         }
4372 
4373         le = le->Flink;
4374     }
4375 
4376     if (truncating) {
4377         // truncate beginning or end of extent if unused
4378 
4379         le = extent_ranges.Flink;
4380         while (le != &extent_ranges) {
4381             er = CONTAINING_RECORD(le, extent_range, list_entry);
4382 
4383             if (er->skip_start > 0) {
4384                 LIST_ENTRY* le2 = fcb->extents.Flink;
4385                 while (le2 != &fcb->extents) {
4386                     extent* ext = CONTAINING_RECORD(le2, extent, list_entry);
4387 
4388                     if ((ext->extent_data.type == EXTENT_TYPE_REGULAR || ext->extent_data.type == EXTENT_TYPE_PREALLOC) && ext->extent_data.compression == BTRFS_COMPRESSION_NONE && ext->unique) {
4389                         EXTENT_DATA2* ed2 = (EXTENT_DATA2*)ext->extent_data.data;
4390 
4391                         if (ed2->size != 0 && ed2->address == er->address) {
4392                             NTSTATUS Status;
4393 
4394                             Status = update_changed_extent_ref(fcb->Vcb, er->chunk, ed2->address, ed2->size, fcb->subvol->id, fcb->inode, ext->offset - ed2->offset,
4395                                                                -1, fcb->inode_item.flags & BTRFS_INODE_NODATASUM, TRUE, Irp);
4396                             if (!NT_SUCCESS(Status)) {
4397                                 ERR("update_changed_extent_ref returned %08x\n", Status);
4398                                 goto end;
4399                             }
4400 
4401                             ext->extent_data.decoded_size -= er->skip_start;
4402                             ed2->size -= er->skip_start;
4403                             ed2->address += er->skip_start;
4404                             ed2->offset -= er->skip_start;
4405 
4406                             add_changed_extent_ref(er->chunk, ed2->address, ed2->size, fcb->subvol->id, fcb->inode, ext->offset - ed2->offset,
4407                                                    1, fcb->inode_item.flags & BTRFS_INODE_NODATASUM);
4408                         }
4409                     }
4410 
4411                     le2 = le2->Flink;
4412                 }
4413 
4414                 if (!(fcb->inode_item.flags & BTRFS_INODE_NODATASUM))
4415                     add_checksum_entry(fcb->Vcb, er->address, (ULONG)(er->skip_start / fcb->Vcb->superblock.sector_size), NULL, NULL);
4416 
4417                 ExAcquireResourceExclusiveLite(&er->chunk->lock, TRUE);
4418 
4419                 if (!er->chunk->cache_loaded) {
4420                     NTSTATUS Status = load_cache_chunk(fcb->Vcb, er->chunk, NULL);
4421 
4422                     if (!NT_SUCCESS(Status)) {
4423                         ERR("load_cache_chunk returned %08x\n", Status);
4424                         ExReleaseResourceLite(&er->chunk->lock);
4425                         goto end;
4426                     }
4427                 }
4428 
4429                 er->chunk->used -= er->skip_start;
4430 
4431                 space_list_add(er->chunk, er->address, er->skip_start, NULL);
4432 
4433                 ExReleaseResourceLite(&er->chunk->lock);
4434 
4435                 er->address += er->skip_start;
4436                 er->length -= er->skip_start;
4437             }
4438 
4439             if (er->skip_end > 0) {
4440                 LIST_ENTRY* le2 = fcb->extents.Flink;
4441                 while (le2 != &fcb->extents) {
4442                     extent* ext = CONTAINING_RECORD(le2, extent, list_entry);
4443 
4444                     if ((ext->extent_data.type == EXTENT_TYPE_REGULAR || ext->extent_data.type == EXTENT_TYPE_PREALLOC) && ext->extent_data.compression == BTRFS_COMPRESSION_NONE && ext->unique) {
4445                         EXTENT_DATA2* ed2 = (EXTENT_DATA2*)ext->extent_data.data;
4446 
4447                         if (ed2->size != 0 && ed2->address == er->address) {
4448                             NTSTATUS Status;
4449 
4450                             Status = update_changed_extent_ref(fcb->Vcb, er->chunk, ed2->address, ed2->size, fcb->subvol->id, fcb->inode, ext->offset - ed2->offset,
4451                                                                -1, fcb->inode_item.flags & BTRFS_INODE_NODATASUM, TRUE, Irp);
4452                             if (!NT_SUCCESS(Status)) {
4453                                 ERR("update_changed_extent_ref returned %08x\n", Status);
4454                                 goto end;
4455                             }
4456 
4457                             ext->extent_data.decoded_size -= er->skip_end;
4458                             ed2->size -= er->skip_end;
4459 
4460                             add_changed_extent_ref(er->chunk, ed2->address, ed2->size, fcb->subvol->id, fcb->inode, ext->offset - ed2->offset,
4461                                                    1, fcb->inode_item.flags & BTRFS_INODE_NODATASUM);
4462                         }
4463                     }
4464 
4465                     le2 = le2->Flink;
4466                 }
4467 
4468                 if (!(fcb->inode_item.flags & BTRFS_INODE_NODATASUM))
4469                     add_checksum_entry(fcb->Vcb, er->address + er->length - er->skip_end, (ULONG)(er->skip_end / fcb->Vcb->superblock.sector_size), NULL, NULL);
4470 
4471                 ExAcquireResourceExclusiveLite(&er->chunk->lock, TRUE);
4472 
4473                 if (!er->chunk->cache_loaded) {
4474                     NTSTATUS Status = load_cache_chunk(fcb->Vcb, er->chunk, NULL);
4475 
4476                     if (!NT_SUCCESS(Status)) {
4477                         ERR("load_cache_chunk returned %08x\n", Status);
4478                         ExReleaseResourceLite(&er->chunk->lock);
4479                         goto end;
4480                     }
4481                 }
4482 
4483                 er->chunk->used -= er->skip_end;
4484 
4485                 space_list_add(er->chunk, er->address + er->length - er->skip_end, er->skip_end, NULL);
4486 
4487                 ExReleaseResourceLite(&er->chunk->lock);
4488 
4489                 er->length -= er->skip_end;
4490             }
4491 
4492             le = le->Flink;
4493         }
4494     }
4495 
4496     if (num_extents < 2)
4497         goto end;
4498 
4499     // merge together adjacent extents
4500     le = extent_ranges.Flink;
4501     while (le != &extent_ranges) {
4502         er = CONTAINING_RECORD(le, extent_range, list_entry);
4503 
4504         if (le->Flink != &extent_ranges && er->length < MAX_EXTENT_SIZE) {
4505             extent_range* er2 = CONTAINING_RECORD(le->Flink, extent_range, list_entry);
4506 
4507             if (er->chunk == er2->chunk) {
4508                 if (er2->address == er->address + er->length && er2->offset >= er->offset + er->length) {
4509                     if (er->length + er2->length <= MAX_EXTENT_SIZE) {
4510                         er->length += er2->length;
4511                         er->changed = TRUE;
4512 
4513                         RemoveEntryList(&er2->list_entry);
4514                         ExFreePool(er2);
4515 
4516                         changed = TRUE;
4517                         continue;
4518                     }
4519                 }
4520             }
4521         }
4522 
4523         le = le->Flink;
4524     }
4525 
4526     if (!changed)
4527         goto end;
4528 
4529     le = fcb->extents.Flink;
4530     while (le != &fcb->extents) {
4531         extent* ext = CONTAINING_RECORD(le, extent, list_entry);
4532 
4533         if ((ext->extent_data.type == EXTENT_TYPE_REGULAR || ext->extent_data.type == EXTENT_TYPE_PREALLOC) && ext->extent_data.compression == BTRFS_COMPRESSION_NONE && ext->unique) {
4534             EXTENT_DATA2* ed2 = (EXTENT_DATA2*)ext->extent_data.data;
4535 
4536             if (ed2->size != 0) {
4537                 LIST_ENTRY* le2;
4538 
4539                 le2 = extent_ranges.Flink;
4540                 while (le2 != &extent_ranges) {
4541                     extent_range* er2 = CONTAINING_RECORD(le2, extent_range, list_entry);
4542 
4543                     if (ed2->address >= er2->address && ed2->address + ed2->size <= er2->address + er2->length && er2->changed) {
4544                         NTSTATUS Status;
4545 
4546                         Status = update_changed_extent_ref(fcb->Vcb, er2->chunk, ed2->address, ed2->size, fcb->subvol->id, fcb->inode, ext->offset - ed2->offset,
4547                                                            -1, fcb->inode_item.flags & BTRFS_INODE_NODATASUM, TRUE, Irp);
4548                         if (!NT_SUCCESS(Status)) {
4549                             ERR("update_changed_extent_ref returned %08x\n", Status);
4550                             goto end;
4551                         }
4552 
4553                         ed2->offset += ed2->address - er2->address;
4554                         ed2->address = er2->address;
4555                         ed2->size = er2->length;
4556                         ext->extent_data.decoded_size = ed2->size;
4557 
4558                         add_changed_extent_ref(er2->chunk, ed2->address, ed2->size, fcb->subvol->id, fcb->inode, ext->offset - ed2->offset,
4559                                                1, fcb->inode_item.flags & BTRFS_INODE_NODATASUM);
4560 
4561                         break;
4562                     }
4563 
4564                     le2 = le2->Flink;
4565                 }
4566             }
4567         }
4568 
4569         le = le->Flink;
4570     }
4571 
4572 end:
4573     while (!IsListEmpty(&extent_ranges)) {
4574         le = RemoveHeadList(&extent_ranges);
4575         er = CONTAINING_RECORD(le, extent_range, list_entry);
4576 
4577         ExFreePool(er);
4578     }
4579 }
4580 
4581 NTSTATUS flush_fcb(fcb* fcb, BOOL cache, LIST_ENTRY* batchlist, PIRP Irp) {
4582     traverse_ptr tp;
4583     KEY searchkey;
4584     NTSTATUS Status;
4585     INODE_ITEM* ii;
4586     UINT64 ii_offset;
4587 #ifdef DEBUG_PARANOID
4588     UINT64 old_size = 0;
4589     BOOL extents_changed;
4590 #endif
4591 
4592     if (fcb->ads) {
4593         if (fcb->deleted) {
4594             Status = delete_xattr(fcb->Vcb, batchlist, fcb->subvol, fcb->inode, fcb->adsxattr.Buffer, fcb->adsxattr.Length, fcb->adshash);
4595             if (!NT_SUCCESS(Status)) {
4596                 ERR("delete_xattr returned %08x\n", Status);
4597                 goto end;
4598             }
4599         } else {
4600             Status = set_xattr(fcb->Vcb, batchlist, fcb->subvol, fcb->inode, fcb->adsxattr.Buffer, fcb->adsxattr.Length,
4601                                fcb->adshash, (UINT8*)fcb->adsdata.Buffer, fcb->adsdata.Length);
4602             if (!NT_SUCCESS(Status)) {
4603                 ERR("set_xattr returned %08x\n", Status);
4604                 goto end;
4605             }
4606         }
4607 
4608         Status = STATUS_SUCCESS;
4609         goto end;
4610     }
4611 
4612     if (fcb->deleted) {
4613         Status = insert_tree_item_batch(batchlist, fcb->Vcb, fcb->subvol, fcb->inode, TYPE_INODE_ITEM, 0xffffffffffffffff, NULL, 0, Batch_DeleteInode);
4614         if (!NT_SUCCESS(Status)) {
4615             ERR("insert_tree_item_batch returned %08x\n", Status);
4616             goto end;
4617         }
4618 
4619         Status = STATUS_SUCCESS;
4620         goto end;
4621     }
4622 
4623 #ifdef DEBUG_PARANOID
4624     extents_changed = fcb->extents_changed;
4625 #endif
4626 
4627     if (fcb->extents_changed) {
4628         LIST_ENTRY* le;
4629         BOOL prealloc = FALSE, extents_inline = FALSE;
4630         UINT64 last_end;
4631 
4632         // delete ignored extent items
4633         le = fcb->extents.Flink;
4634         while (le != &fcb->extents) {
4635             LIST_ENTRY* le2 = le->Flink;
4636             extent* ext = CONTAINING_RECORD(le, extent, list_entry);
4637 
4638             if (ext->ignore) {
4639                 RemoveEntryList(&ext->list_entry);
4640 
4641                 if (ext->csum)
4642                     ExFreePool(ext->csum);
4643 
4644                 ExFreePool(ext);
4645             }
4646 
4647             le = le2;
4648         }
4649 
4650         le = fcb->extents.Flink;
4651         while (le != &fcb->extents) {
4652             extent* ext = CONTAINING_RECORD(le, extent, list_entry);
4653 
4654             if (ext->inserted && ext->csum && ext->extent_data.type == EXTENT_TYPE_REGULAR) {
4655                 EXTENT_DATA2* ed2 = (EXTENT_DATA2*)ext->extent_data.data;
4656 
4657                 if (ed2->size > 0) { // not sparse
4658                     if (ext->extent_data.compression == BTRFS_COMPRESSION_NONE)
4659                         add_checksum_entry(fcb->Vcb, ed2->address + ed2->offset, (ULONG)(ed2->num_bytes / fcb->Vcb->superblock.sector_size), ext->csum, Irp);
4660                     else
4661                         add_checksum_entry(fcb->Vcb, ed2->address, (ULONG)(ed2->size / fcb->Vcb->superblock.sector_size), ext->csum, Irp);
4662                 }
4663             }
4664 
4665             le = le->Flink;
4666         }
4667 
4668         if (!IsListEmpty(&fcb->extents)) {
4669             rationalize_extents(fcb, Irp);
4670 
4671             // merge together adjacent EXTENT_DATAs pointing to same extent
4672 
4673             le = fcb->extents.Flink;
4674             while (le != &fcb->extents) {
4675                 LIST_ENTRY* le2 = le->Flink;
4676                 extent* ext = CONTAINING_RECORD(le, extent, list_entry);
4677 
4678                 if ((ext->extent_data.type == EXTENT_TYPE_REGULAR || ext->extent_data.type == EXTENT_TYPE_PREALLOC) && le->Flink != &fcb->extents) {
4679                     extent* nextext = CONTAINING_RECORD(le->Flink, extent, list_entry);
4680 
4681                     if (ext->extent_data.type == nextext->extent_data.type) {
4682                         EXTENT_DATA2* ed2 = (EXTENT_DATA2*)ext->extent_data.data;
4683                         EXTENT_DATA2* ned2 = (EXTENT_DATA2*)nextext->extent_data.data;
4684 
4685                         if (ed2->size != 0 && ed2->address == ned2->address && ed2->size == ned2->size &&
4686                             nextext->offset == ext->offset + ed2->num_bytes && ned2->offset == ed2->offset + ed2->num_bytes) {
4687                             chunk* c;
4688 
4689                             if (ext->extent_data.compression == BTRFS_COMPRESSION_NONE && ext->csum) {
4690                                 ULONG len = (ULONG)((ed2->num_bytes + ned2->num_bytes) / fcb->Vcb->superblock.sector_size);
4691                                 UINT32* csum;
4692 
4693                                 csum = ExAllocatePoolWithTag(NonPagedPool, len * sizeof(UINT32), ALLOC_TAG);
4694                                 if (!csum) {
4695                                     ERR("out of memory\n");
4696                                     Status = STATUS_INSUFFICIENT_RESOURCES;
4697                                     goto end;
4698                                 }
4699 
4700                                 RtlCopyMemory(csum, ext->csum, (ULONG)(ed2->num_bytes * sizeof(UINT32) / fcb->Vcb->superblock.sector_size));
4701                                 RtlCopyMemory(&csum[ed2->num_bytes / fcb->Vcb->superblock.sector_size], nextext->csum,
4702                                               (ULONG)(ned2->num_bytes * sizeof(UINT32) / fcb->Vcb->superblock.sector_size));
4703 
4704                                 ExFreePool(ext->csum);
4705                                 ext->csum = csum;
4706                             }
4707 
4708                             ext->extent_data.generation = fcb->Vcb->superblock.generation;
4709                             ed2->num_bytes += ned2->num_bytes;
4710 
4711                             RemoveEntryList(&nextext->list_entry);
4712 
4713                             if (nextext->csum)
4714                                 ExFreePool(nextext->csum);
4715 
4716                             ExFreePool(nextext);
4717 
4718                             c = get_chunk_from_address(fcb->Vcb, ed2->address);
4719 
4720                             if (!c) {
4721                                 ERR("get_chunk_from_address(%llx) failed\n", ed2->address);
4722                             } else {
4723                                 Status = update_changed_extent_ref(fcb->Vcb, c, ed2->address, ed2->size, fcb->subvol->id, fcb->inode, ext->offset - ed2->offset, -1,
4724                                                                 fcb->inode_item.flags & BTRFS_INODE_NODATASUM, FALSE, Irp);
4725                                 if (!NT_SUCCESS(Status)) {
4726                                     ERR("update_changed_extent_ref returned %08x\n", Status);
4727                                     goto end;
4728                                 }
4729                             }
4730 
4731                             le2 = le;
4732                         }
4733                     }
4734                 }
4735 
4736                 le = le2;
4737             }
4738         }
4739 
4740         if (!fcb->created) {
4741             // delete existing EXTENT_DATA items
4742 
4743             Status = insert_tree_item_batch(batchlist, fcb->Vcb, fcb->subvol, fcb->inode, TYPE_EXTENT_DATA, 0, NULL, 0, Batch_DeleteExtentData);
4744             if (!NT_SUCCESS(Status)) {
4745                 ERR("insert_tree_item_batch returned %08x\n", Status);
4746                 goto end;
4747             }
4748         }
4749 
4750         // add new EXTENT_DATAs
4751 
4752         last_end = 0;
4753 
4754         le = fcb->extents.Flink;
4755         while (le != &fcb->extents) {
4756             extent* ext = CONTAINING_RECORD(le, extent, list_entry);
4757             EXTENT_DATA* ed;
4758 
4759             ext->inserted = FALSE;
4760 
4761             if (!(fcb->Vcb->superblock.incompat_flags & BTRFS_INCOMPAT_FLAGS_NO_HOLES) && ext->offset > last_end) {
4762                 Status = insert_sparse_extent(fcb, batchlist, last_end, ext->offset - last_end);
4763                 if (!NT_SUCCESS(Status)) {
4764                     ERR("insert_sparse_extent returned %08x\n", Status);
4765                     goto end;
4766                 }
4767             }
4768 
4769             ed = ExAllocatePoolWithTag(PagedPool, ext->datalen, ALLOC_TAG);
4770             if (!ed) {
4771                 ERR("out of memory\n");
4772                 Status = STATUS_INSUFFICIENT_RESOURCES;
4773                 goto end;
4774             }
4775 
4776             RtlCopyMemory(ed, &ext->extent_data, ext->datalen);
4777 
4778             Status = insert_tree_item_batch(batchlist, fcb->Vcb, fcb->subvol, fcb->inode, TYPE_EXTENT_DATA, ext->offset,
4779                                             ed, ext->datalen, Batch_Insert);
4780             if (!NT_SUCCESS(Status)) {
4781                 ERR("insert_tree_item_batch returned %08x\n", Status);
4782                 goto end;
4783             }
4784 
4785             if (ed->type == EXTENT_TYPE_PREALLOC)
4786                 prealloc = TRUE;
4787 
4788             if (ed->type == EXTENT_TYPE_INLINE)
4789                 extents_inline = TRUE;
4790 
4791             if (!(fcb->Vcb->superblock.incompat_flags & BTRFS_INCOMPAT_FLAGS_NO_HOLES)) {
4792                 if (ed->type == EXTENT_TYPE_INLINE)
4793                     last_end = ext->offset + ed->decoded_size;
4794                 else {
4795                     EXTENT_DATA2* ed2 = (EXTENT_DATA2*)ed->data;
4796 
4797                     last_end = ext->offset + ed2->num_bytes;
4798                 }
4799             }
4800 
4801             le = le->Flink;
4802         }
4803 
4804         if (!(fcb->Vcb->superblock.incompat_flags & BTRFS_INCOMPAT_FLAGS_NO_HOLES) && !extents_inline &&
4805             sector_align(fcb->inode_item.st_size, fcb->Vcb->superblock.sector_size) > last_end) {
4806             Status = insert_sparse_extent(fcb, batchlist, last_end, sector_align(fcb->inode_item.st_size, fcb->Vcb->superblock.sector_size) - last_end);
4807             if (!NT_SUCCESS(Status)) {
4808                 ERR("insert_sparse_extent returned %08x\n", Status);
4809                 goto end;
4810             }
4811         }
4812 
4813         // update prealloc flag in INODE_ITEM
4814 
4815         if (!prealloc)
4816             fcb->inode_item.flags &= ~BTRFS_INODE_PREALLOC;
4817         else
4818             fcb->inode_item.flags |= BTRFS_INODE_PREALLOC;
4819 
4820         fcb->inode_item_changed = TRUE;
4821 
4822         fcb->extents_changed = FALSE;
4823     }
4824 
4825     if ((!fcb->created && fcb->inode_item_changed) || cache) {
4826         searchkey.obj_id = fcb->inode;
4827         searchkey.obj_type = TYPE_INODE_ITEM;
4828         searchkey.offset = 0xffffffffffffffff;
4829 
4830         Status = find_item(fcb->Vcb, fcb->subvol, &tp, &searchkey, FALSE, Irp);
4831         if (!NT_SUCCESS(Status)) {
4832             ERR("error - find_item returned %08x\n", Status);
4833             goto end;
4834         }
4835 
4836         if (tp.item->key.obj_id != searchkey.obj_id || tp.item->key.obj_type != searchkey.obj_type) {
4837             if (cache) {
4838                 ii = ExAllocatePoolWithTag(PagedPool, sizeof(INODE_ITEM), ALLOC_TAG);
4839                 if (!ii) {
4840                     ERR("out of memory\n");
4841                     Status = STATUS_INSUFFICIENT_RESOURCES;
4842                     goto end;
4843                 }
4844 
4845                 RtlCopyMemory(ii, &fcb->inode_item, sizeof(INODE_ITEM));
4846 
4847                 Status = insert_tree_item(fcb->Vcb, fcb->subvol, fcb->inode, TYPE_INODE_ITEM, 0, ii, sizeof(INODE_ITEM), NULL, Irp);
4848                 if (!NT_SUCCESS(Status)) {
4849                     ERR("insert_tree_item returned %08x\n", Status);
4850                     goto end;
4851                 }
4852 
4853                 ii_offset = 0;
4854             } else {
4855                 ERR("could not find INODE_ITEM for inode %llx in subvol %llx\n", fcb->inode, fcb->subvol->id);
4856                 Status = STATUS_INTERNAL_ERROR;
4857                 goto end;
4858             }
4859         } else {
4860 #ifdef DEBUG_PARANOID
4861             INODE_ITEM* ii2 = (INODE_ITEM*)tp.item->data;
4862 
4863             old_size = ii2->st_size;
4864 #endif
4865 
4866             ii_offset = tp.item->key.offset;
4867         }
4868 
4869         if (!cache) {
4870             Status = delete_tree_item(fcb->Vcb, &tp);
4871             if (!NT_SUCCESS(Status)) {
4872                 ERR("delete_tree_item returned %08x\n", Status);
4873                 goto end;
4874             }
4875         } else {
4876             searchkey.obj_id = fcb->inode;
4877             searchkey.obj_type = TYPE_INODE_ITEM;
4878             searchkey.offset = ii_offset;
4879 
4880             Status = find_item(fcb->Vcb, fcb->subvol, &tp, &searchkey, FALSE, Irp);
4881             if (!NT_SUCCESS(Status)) {
4882                 ERR("error - find_item returned %08x\n", Status);
4883                 goto end;
4884             }
4885 
4886             if (keycmp(tp.item->key, searchkey)) {
4887                 ERR("could not find INODE_ITEM for inode %llx in subvol %llx\n", fcb->inode, fcb->subvol->id);
4888                 Status = STATUS_INTERNAL_ERROR;
4889                 goto end;
4890             } else
4891                 RtlCopyMemory(tp.item->data, &fcb->inode_item, min(tp.item->size, sizeof(INODE_ITEM)));
4892         }
4893 
4894 #ifdef DEBUG_PARANOID
4895         if (!extents_changed && fcb->type != BTRFS_TYPE_DIRECTORY && old_size != fcb->inode_item.st_size) {
4896             ERR("error - size has changed but extents not marked as changed\n");
4897             int3;
4898         }
4899 #endif
4900     } else
4901         ii_offset = 0;
4902 
4903     fcb->created = FALSE;
4904 
4905     if (!cache && fcb->inode_item_changed) {
4906         ii = ExAllocatePoolWithTag(PagedPool, sizeof(INODE_ITEM), ALLOC_TAG);
4907         if (!ii) {
4908             ERR("out of memory\n");
4909             Status = STATUS_INSUFFICIENT_RESOURCES;
4910             goto end;
4911         }
4912 
4913         RtlCopyMemory(ii, &fcb->inode_item, sizeof(INODE_ITEM));
4914 
4915         Status = insert_tree_item_batch(batchlist, fcb->Vcb, fcb->subvol, fcb->inode, TYPE_INODE_ITEM, ii_offset, ii, sizeof(INODE_ITEM),
4916                                         Batch_Insert);
4917         if (!NT_SUCCESS(Status)) {
4918             ERR("insert_tree_item_batch returned %08x\n", Status);
4919             goto end;
4920         }
4921 
4922         fcb->inode_item_changed = FALSE;
4923     }
4924 
4925     if (fcb->sd_dirty) {
4926         if (!fcb->sd_deleted) {
4927             Status = set_xattr(fcb->Vcb, batchlist, fcb->subvol, fcb->inode, EA_NTACL, (UINT16)strlen(EA_NTACL),
4928                                EA_NTACL_HASH, (UINT8*)fcb->sd, (UINT16)RtlLengthSecurityDescriptor(fcb->sd));
4929             if (!NT_SUCCESS(Status)) {
4930                 ERR("set_xattr returned %08x\n", Status);
4931                 goto end;
4932             }
4933         } else {
4934             Status = delete_xattr(fcb->Vcb, batchlist, fcb->subvol, fcb->inode, EA_NTACL, (UINT16)strlen(EA_NTACL), EA_NTACL_HASH);
4935             if (!NT_SUCCESS(Status)) {
4936                 ERR("delete_xattr returned %08x\n", Status);
4937                 goto end;
4938             }
4939         }
4940 
4941         fcb->sd_deleted = FALSE;
4942         fcb->sd_dirty = FALSE;
4943     }
4944 
4945     if (fcb->atts_changed) {
4946         if (!fcb->atts_deleted) {
4947             UINT8 val[16], *val2;
4948             ULONG atts = fcb->atts;
4949 
4950             TRACE("inserting new DOSATTRIB xattr\n");
4951 
4952             if (fcb->inode == SUBVOL_ROOT_INODE)
4953                 atts &= ~FILE_ATTRIBUTE_READONLY;
4954 
4955             val2 = &val[sizeof(val) - 1];
4956 
4957             do {
4958                 UINT8 c = atts % 16;
4959                 *val2 = c <= 9 ? (c + '0') : (c - 0xa + 'a');
4960 
4961                 val2--;
4962                 atts >>= 4;
4963             } while (atts != 0);
4964 
4965             *val2 = 'x';
4966             val2--;
4967             *val2 = '0';
4968 
4969             Status = set_xattr(fcb->Vcb, batchlist, fcb->subvol, fcb->inode, EA_DOSATTRIB, (UINT16)strlen(EA_DOSATTRIB),
4970                                EA_DOSATTRIB_HASH, val2, (UINT16)(val + sizeof(val) - val2));
4971             if (!NT_SUCCESS(Status)) {
4972                 ERR("set_xattr returned %08x\n", Status);
4973                 goto end;
4974             }
4975         } else {
4976             Status = delete_xattr(fcb->Vcb, batchlist, fcb->subvol, fcb->inode, EA_DOSATTRIB, (UINT16)strlen(EA_DOSATTRIB), EA_DOSATTRIB_HASH);
4977             if (!NT_SUCCESS(Status)) {
4978                 ERR("delete_xattr returned %08x\n", Status);
4979                 goto end;
4980             }
4981         }
4982 
4983         fcb->atts_changed = FALSE;
4984         fcb->atts_deleted = FALSE;
4985     }
4986 
4987     if (fcb->reparse_xattr_changed) {
4988         if (fcb->reparse_xattr.Buffer && fcb->reparse_xattr.Length > 0) {
4989             Status = set_xattr(fcb->Vcb, batchlist, fcb->subvol, fcb->inode, EA_REPARSE, (UINT16)strlen(EA_REPARSE),
4990                                EA_REPARSE_HASH, (UINT8*)fcb->reparse_xattr.Buffer, (UINT16)fcb->reparse_xattr.Length);
4991             if (!NT_SUCCESS(Status)) {
4992                 ERR("set_xattr returned %08x\n", Status);
4993                 goto end;
4994             }
4995         } else {
4996             Status = delete_xattr(fcb->Vcb, batchlist, fcb->subvol, fcb->inode, EA_REPARSE, (UINT16)strlen(EA_REPARSE), EA_REPARSE_HASH);
4997             if (!NT_SUCCESS(Status)) {
4998                 ERR("delete_xattr returned %08x\n", Status);
4999                 goto end;
5000             }
5001         }
5002 
5003         fcb->reparse_xattr_changed = FALSE;
5004     }
5005 
5006     if (fcb->ea_changed) {
5007         if (fcb->ea_xattr.Buffer && fcb->ea_xattr.Length > 0) {
5008             Status = set_xattr(fcb->Vcb, batchlist, fcb->subvol, fcb->inode, EA_EA, (UINT16)strlen(EA_EA),
5009                                EA_EA_HASH, (UINT8*)fcb->ea_xattr.Buffer, (UINT16)fcb->ea_xattr.Length);
5010             if (!NT_SUCCESS(Status)) {
5011                 ERR("set_xattr returned %08x\n", Status);
5012                 goto end;
5013             }
5014         } else {
5015             Status = delete_xattr(fcb->Vcb, batchlist, fcb->subvol, fcb->inode, EA_EA, (UINT16)strlen(EA_EA), EA_EA_HASH);
5016             if (!NT_SUCCESS(Status)) {
5017                 ERR("delete_xattr returned %08x\n", Status);
5018                 goto end;
5019             }
5020         }
5021 
5022         fcb->ea_changed = FALSE;
5023     }
5024 
5025     if (fcb->prop_compression_changed) {
5026         if (fcb->prop_compression == PropCompression_None) {
5027             Status = delete_xattr(fcb->Vcb, batchlist, fcb->subvol, fcb->inode, EA_PROP_COMPRESSION, (UINT16)strlen(EA_PROP_COMPRESSION), EA_PROP_COMPRESSION_HASH);
5028             if (!NT_SUCCESS(Status)) {
5029                 ERR("delete_xattr returned %08x\n", Status);
5030                 goto end;
5031             }
5032         } else if (fcb->prop_compression == PropCompression_Zlib) {
5033             const char zlib[] = "zlib";
5034 
5035             Status = set_xattr(fcb->Vcb, batchlist, fcb->subvol, fcb->inode, EA_PROP_COMPRESSION, (UINT16)strlen(EA_PROP_COMPRESSION),
5036                                EA_PROP_COMPRESSION_HASH, (UINT8*)zlib, (UINT16)strlen(zlib));
5037             if (!NT_SUCCESS(Status)) {
5038                 ERR("set_xattr returned %08x\n", Status);
5039                 goto end;
5040             }
5041         } else if (fcb->prop_compression == PropCompression_LZO) {
5042             const char lzo[] = "lzo";
5043 
5044             Status = set_xattr(fcb->Vcb, batchlist, fcb->subvol, fcb->inode, EA_PROP_COMPRESSION, (UINT16)strlen(EA_PROP_COMPRESSION),
5045                                EA_PROP_COMPRESSION_HASH, (UINT8*)lzo, (UINT16)strlen(lzo));
5046             if (!NT_SUCCESS(Status)) {
5047                 ERR("set_xattr returned %08x\n", Status);
5048                 goto end;
5049             }
5050         }
5051 
5052         fcb->prop_compression_changed = FALSE;
5053     }
5054 
5055     if (fcb->xattrs_changed) {
5056         LIST_ENTRY* le;
5057 
5058         le = fcb->xattrs.Flink;
5059         while (le != &fcb->xattrs) {
5060             xattr* xa = CONTAINING_RECORD(le, xattr, list_entry);
5061             LIST_ENTRY* le2 = le->Flink;
5062 
5063             if (xa->dirty) {
5064                 UINT32 hash = calc_crc32c(0xfffffffe, (UINT8*)xa->data, xa->namelen);
5065 
5066                 if (xa->valuelen == 0) {
5067                     Status = delete_xattr(fcb->Vcb, batchlist, fcb->subvol, fcb->inode, xa->data, xa->namelen, hash);
5068                     if (!NT_SUCCESS(Status)) {
5069                         ERR("delete_xattr returned %08x\n", Status);
5070                         goto end;
5071                     }
5072 
5073                     RemoveEntryList(&xa->list_entry);
5074                     ExFreePool(xa);
5075                 } else {
5076                     Status = set_xattr(fcb->Vcb, batchlist, fcb->subvol, fcb->inode, xa->data, xa->namelen,
5077                                        hash, (UINT8*)&xa->data[xa->namelen], xa->valuelen);
5078                     if (!NT_SUCCESS(Status)) {
5079                         ERR("set_xattr returned %08x\n", Status);
5080                         goto end;
5081                     }
5082 
5083                     xa->dirty = FALSE;
5084                 }
5085             }
5086 
5087             le = le2;
5088         }
5089 
5090         fcb->xattrs_changed = FALSE;
5091     }
5092 
5093     Status = STATUS_SUCCESS;
5094 
5095 end:
5096     if (fcb->dirty) {
5097         BOOL lock = FALSE;
5098 
5099         fcb->dirty = FALSE;
5100 
5101         if (!ExIsResourceAcquiredExclusiveLite(&fcb->Vcb->dirty_fcbs_lock)) {
5102             ExAcquireResourceExclusiveLite(&fcb->Vcb->dirty_fcbs_lock, TRUE);
5103             lock = TRUE;
5104         }
5105 
5106         RemoveEntryList(&fcb->list_entry_dirty);
5107 
5108         if (lock)
5109             ExReleaseResourceLite(&fcb->Vcb->dirty_fcbs_lock);
5110     }
5111 
5112     return Status;
5113 }
5114 
5115 void add_trim_entry_avoid_sb(device_extension* Vcb, device* dev, UINT64 address, UINT64 size) {
5116     int i;
5117     ULONG sblen = (ULONG)sector_align(sizeof(superblock), Vcb->superblock.sector_size);
5118 
5119     i = 0;
5120     while (superblock_addrs[i] != 0) {
5121         if (superblock_addrs[i] + sblen >= address && superblock_addrs[i] < address + size) {
5122             if (superblock_addrs[i] > address)
5123                 add_trim_entry(dev, address, superblock_addrs[i] - address);
5124 
5125             if (size <= superblock_addrs[i] + sblen - address)
5126                 return;
5127 
5128             size -= superblock_addrs[i] + sblen - address;
5129             address = superblock_addrs[i] + sblen;
5130         } else if (superblock_addrs[i] > address + size)
5131             break;
5132 
5133         i++;
5134     }
5135 
5136     add_trim_entry(dev, address, size);
5137 }
5138 
5139 static NTSTATUS drop_chunk(device_extension* Vcb, chunk* c, LIST_ENTRY* batchlist, PIRP Irp, LIST_ENTRY* rollback) {
5140     NTSTATUS Status;
5141     KEY searchkey;
5142     traverse_ptr tp;
5143     UINT64 i, factor;
5144     CHUNK_ITEM_STRIPE* cis = (CHUNK_ITEM_STRIPE*)&c->chunk_item[1];;
5145 
5146     TRACE("dropping chunk %llx\n", c->offset);
5147 
5148     if (c->chunk_item->type & BLOCK_FLAG_RAID0)
5149         factor = c->chunk_item->num_stripes;
5150     else if (c->chunk_item->type & BLOCK_FLAG_RAID10)
5151         factor = c->chunk_item->num_stripes / c->chunk_item->sub_stripes;
5152     else if (c->chunk_item->type & BLOCK_FLAG_RAID5)
5153         factor = c->chunk_item->num_stripes - 1;
5154     else if (c->chunk_item->type & BLOCK_FLAG_RAID6)
5155         factor = c->chunk_item->num_stripes - 2;
5156     else // SINGLE, DUPLICATE, RAID1
5157         factor = 1;
5158 
5159     // do TRIM
5160     if (Vcb->trim && !Vcb->options.no_trim) {
5161         UINT64 len = c->chunk_item->size / factor;
5162 
5163         for (i = 0; i < c->chunk_item->num_stripes; i++) {
5164             if (c->devices[i] && c->devices[i]->devobj && !c->devices[i]->readonly && c->devices[i]->trim)
5165                 add_trim_entry_avoid_sb(Vcb, c->devices[i], cis[i].offset, len);
5166         }
5167     }
5168 
5169     if (!c->cache) {
5170         Status = load_stored_free_space_cache(Vcb, c, TRUE, Irp);
5171 
5172         if (!NT_SUCCESS(Status) && Status != STATUS_NOT_FOUND)
5173             WARN("load_stored_free_space_cache returned %08x\n", Status);
5174     }
5175 
5176     // remove free space cache
5177     if (c->cache) {
5178         c->cache->deleted = TRUE;
5179 
5180         Status = excise_extents(Vcb, c->cache, 0, c->cache->inode_item.st_size, Irp, rollback);
5181         if (!NT_SUCCESS(Status)) {
5182             ERR("excise_extents returned %08x\n", Status);
5183             return Status;
5184         }
5185 
5186         Status = flush_fcb(c->cache, TRUE, batchlist, Irp);
5187 
5188         free_fcb(Vcb, c->cache);
5189 
5190         if (!NT_SUCCESS(Status)) {
5191             ERR("flush_fcb returned %08x\n", Status);
5192             return Status;
5193         }
5194 
5195         searchkey.obj_id = FREE_SPACE_CACHE_ID;
5196         searchkey.obj_type = 0;
5197         searchkey.offset = c->offset;
5198 
5199         Status = find_item(Vcb, Vcb->root_root, &tp, &searchkey, FALSE, Irp);
5200         if (!NT_SUCCESS(Status)) {
5201             ERR("error - find_item returned %08x\n", Status);
5202             return Status;
5203         }
5204 
5205         if (!keycmp(tp.item->key, searchkey)) {
5206             Status = delete_tree_item(Vcb, &tp);
5207             if (!NT_SUCCESS(Status)) {
5208                 ERR("delete_tree_item returned %08x\n", Status);
5209                 return Status;
5210             }
5211         }
5212     }
5213 
5214     if (Vcb->space_root) {
5215         Status = insert_tree_item_batch(batchlist, Vcb, Vcb->space_root, c->offset, TYPE_FREE_SPACE_INFO, c->chunk_item->size,
5216                                         NULL, 0, Batch_DeleteFreeSpace);
5217         if (!NT_SUCCESS(Status)) {
5218             ERR("insert_tree_item_batch returned %08x\n", Status);
5219             return Status;
5220         }
5221     }
5222 
5223     for (i = 0; i < c->chunk_item->num_stripes; i++) {
5224         if (!c->created) {
5225             // remove DEV_EXTENTs from tree 4
5226             searchkey.obj_id = cis[i].dev_id;
5227             searchkey.obj_type = TYPE_DEV_EXTENT;
5228             searchkey.offset = cis[i].offset;
5229 
5230             Status = find_item(Vcb, Vcb->dev_root, &tp, &searchkey, FALSE, Irp);
5231             if (!NT_SUCCESS(Status)) {
5232                 ERR("error - find_item returned %08x\n", Status);
5233                 return Status;
5234             }
5235 
5236             if (!keycmp(tp.item->key, searchkey)) {
5237                 Status = delete_tree_item(Vcb, &tp);
5238                 if (!NT_SUCCESS(Status)) {
5239                     ERR("delete_tree_item returned %08x\n", Status);
5240                     return Status;
5241                 }
5242 
5243                 if (tp.item->size >= sizeof(DEV_EXTENT)) {
5244                     DEV_EXTENT* de = (DEV_EXTENT*)tp.item->data;
5245 
5246                     c->devices[i]->devitem.bytes_used -= de->length;
5247 
5248                     if (Vcb->balance.thread && Vcb->balance.shrinking && Vcb->balance.opts[0].devid == c->devices[i]->devitem.dev_id) {
5249                         if (cis[i].offset < Vcb->balance.opts[0].drange_start && cis[i].offset + de->length > Vcb->balance.opts[0].drange_start)
5250                             space_list_add2(&c->devices[i]->space, NULL, cis[i].offset, Vcb->balance.opts[0].drange_start - cis[i].offset, NULL, rollback);
5251                     } else
5252                         space_list_add2(&c->devices[i]->space, NULL, cis[i].offset, de->length, NULL, rollback);
5253                 }
5254             } else
5255                 WARN("could not find (%llx,%x,%llx) in dev tree\n", searchkey.obj_id, searchkey.obj_type, searchkey.offset);
5256         } else {
5257             UINT64 len = c->chunk_item->size / factor;
5258 
5259             c->devices[i]->devitem.bytes_used -= len;
5260 
5261             if (Vcb->balance.thread && Vcb->balance.shrinking && Vcb->balance.opts[0].devid == c->devices[i]->devitem.dev_id) {
5262                 if (cis[i].offset < Vcb->balance.opts[0].drange_start && cis[i].offset + len > Vcb->balance.opts[0].drange_start)
5263                     space_list_add2(&c->devices[i]->space, NULL, cis[i].offset, Vcb->balance.opts[0].drange_start - cis[i].offset, NULL, rollback);
5264             } else
5265                 space_list_add2(&c->devices[i]->space, NULL, cis[i].offset, len, NULL, rollback);
5266         }
5267     }
5268 
5269     // modify DEV_ITEMs in chunk tree
5270     for (i = 0; i < c->chunk_item->num_stripes; i++) {
5271         if (c->devices[i]) {
5272             UINT64 j;
5273             DEV_ITEM* di;
5274 
5275             searchkey.obj_id = 1;
5276             searchkey.obj_type = TYPE_DEV_ITEM;
5277             searchkey.offset = c->devices[i]->devitem.dev_id;
5278 
5279             Status = find_item(Vcb, Vcb->chunk_root, &tp, &searchkey, FALSE, Irp);
5280             if (!NT_SUCCESS(Status)) {
5281                 ERR("error - find_item returned %08x\n", Status);
5282                 return Status;
5283             }
5284 
5285             if (keycmp(tp.item->key, searchkey)) {
5286                 ERR("error - could not find DEV_ITEM for device %llx\n", searchkey.offset);
5287                 return STATUS_INTERNAL_ERROR;
5288             }
5289 
5290             Status = delete_tree_item(Vcb, &tp);
5291             if (!NT_SUCCESS(Status)) {
5292                 ERR("delete_tree_item returned %08x\n", Status);
5293                 return Status;
5294             }
5295 
5296             di = ExAllocatePoolWithTag(PagedPool, sizeof(DEV_ITEM), ALLOC_TAG);
5297             if (!di) {
5298                 ERR("out of memory\n");
5299                 return STATUS_INSUFFICIENT_RESOURCES;
5300             }
5301 
5302             RtlCopyMemory(di, &c->devices[i]->devitem, sizeof(DEV_ITEM));
5303 
5304             Status = insert_tree_item(Vcb, Vcb->chunk_root, 1, TYPE_DEV_ITEM, c->devices[i]->devitem.dev_id, di, sizeof(DEV_ITEM), NULL, Irp);
5305             if (!NT_SUCCESS(Status)) {
5306                 ERR("insert_tree_item returned %08x\n", Status);
5307                 return Status;
5308             }
5309 
5310             for (j = i + 1; j < c->chunk_item->num_stripes; j++) {
5311                 if (c->devices[j] == c->devices[i])
5312                     c->devices[j] = NULL;
5313             }
5314         }
5315     }
5316 
5317     if (!c->created) {
5318         // remove CHUNK_ITEM from chunk tree
5319         searchkey.obj_id = 0x100;
5320         searchkey.obj_type = TYPE_CHUNK_ITEM;
5321         searchkey.offset = c->offset;
5322 
5323         Status = find_item(Vcb, Vcb->chunk_root, &tp, &searchkey, FALSE, Irp);
5324         if (!NT_SUCCESS(Status)) {
5325             ERR("error - find_item returned %08x\n", Status);
5326             return Status;
5327         }
5328 
5329         if (!keycmp(tp.item->key, searchkey)) {
5330             Status = delete_tree_item(Vcb, &tp);
5331 
5332             if (!NT_SUCCESS(Status)) {
5333                 ERR("delete_tree_item returned %08x\n", Status);
5334                 return Status;
5335             }
5336         } else
5337             WARN("could not find CHUNK_ITEM for chunk %llx\n", c->offset);
5338 
5339         // remove BLOCK_GROUP_ITEM from extent tree
5340         searchkey.obj_id = c->offset;
5341         searchkey.obj_type = TYPE_BLOCK_GROUP_ITEM;
5342         searchkey.offset = 0xffffffffffffffff;
5343 
5344         Status = find_item(Vcb, Vcb->extent_root, &tp, &searchkey, FALSE, Irp);
5345         if (!NT_SUCCESS(Status)) {
5346             ERR("error - find_item returned %08x\n", Status);
5347             return Status;
5348         }
5349 
5350         if (tp.item->key.obj_id == searchkey.obj_id && tp.item->key.obj_type == searchkey.obj_type) {
5351             Status = delete_tree_item(Vcb, &tp);
5352 
5353             if (!NT_SUCCESS(Status)) {
5354                 ERR("delete_tree_item returned %08x\n", Status);
5355                 return Status;
5356             }
5357         } else
5358             WARN("could not find BLOCK_GROUP_ITEM for chunk %llx\n", c->offset);
5359     }
5360 
5361     if (c->chunk_item->type & BLOCK_FLAG_SYSTEM)
5362         remove_from_bootstrap(Vcb, 0x100, TYPE_CHUNK_ITEM, c->offset);
5363 
5364     RemoveEntryList(&c->list_entry);
5365 
5366     // clear raid56 incompat flag if dropping last RAID5/6 chunk
5367 
5368     if (c->chunk_item->type & BLOCK_FLAG_RAID5 || c->chunk_item->type & BLOCK_FLAG_RAID6) {
5369         LIST_ENTRY* le;
5370         BOOL clear_flag = TRUE;
5371 
5372         le = Vcb->chunks.Flink;
5373         while (le != &Vcb->chunks) {
5374             chunk* c2 = CONTAINING_RECORD(le, chunk, list_entry);
5375 
5376             if (c2->chunk_item->type & BLOCK_FLAG_RAID5 || c2->chunk_item->type & BLOCK_FLAG_RAID6) {
5377                 clear_flag = FALSE;
5378                 break;
5379             }
5380 
5381             le = le->Flink;
5382         }
5383 
5384         if (clear_flag)
5385             Vcb->superblock.incompat_flags &= ~BTRFS_INCOMPAT_FLAGS_RAID56;
5386     }
5387 
5388     Vcb->superblock.bytes_used -= c->oldused;
5389 
5390     ExFreePool(c->chunk_item);
5391     ExFreePool(c->devices);
5392 
5393     while (!IsListEmpty(&c->space)) {
5394         space* s = CONTAINING_RECORD(c->space.Flink, space, list_entry);
5395 
5396         RemoveEntryList(&s->list_entry);
5397         ExFreePool(s);
5398     }
5399 
5400     while (!IsListEmpty(&c->deleting)) {
5401         space* s = CONTAINING_RECORD(c->deleting.Flink, space, list_entry);
5402 
5403         RemoveEntryList(&s->list_entry);
5404         ExFreePool(s);
5405     }
5406 
5407     ExDeleteResourceLite(&c->partial_stripes_lock);
5408     ExDeleteResourceLite(&c->range_locks_lock);
5409     ExDeleteResourceLite(&c->lock);
5410     ExDeleteResourceLite(&c->changed_extents_lock);
5411 
5412     ExFreePool(c);
5413 
5414     return STATUS_SUCCESS;
5415 }
5416 
5417 static NTSTATUS partial_stripe_read(device_extension* Vcb, chunk* c, partial_stripe* ps, UINT64 startoff, UINT16 parity, ULONG offset, ULONG len) {
5418     NTSTATUS Status;
5419     ULONG sl = (ULONG)(c->chunk_item->stripe_length / Vcb->superblock.sector_size);
5420     CHUNK_ITEM_STRIPE* cis = (CHUNK_ITEM_STRIPE*)&c->chunk_item[1];
5421 
5422     while (len > 0) {
5423         ULONG readlen = min(offset + len, offset + (sl - (offset % sl))) - offset;
5424         UINT16 stripe;
5425 
5426         stripe = (parity + (offset / sl) + 1) % c->chunk_item->num_stripes;
5427 
5428         if (c->devices[stripe]->devobj) {
5429             Status = sync_read_phys(c->devices[stripe]->devobj, cis[stripe].offset + startoff + ((offset % sl) * Vcb->superblock.sector_size),
5430                                     readlen * Vcb->superblock.sector_size, ps->data + (offset * Vcb->superblock.sector_size), FALSE);
5431             if (!NT_SUCCESS(Status)) {
5432                 ERR("sync_read_phys returned %08x\n", Status);
5433                 return Status;
5434             }
5435         } else if (c->chunk_item->type & BLOCK_FLAG_RAID5) {
5436             UINT16 i;
5437             UINT8* scratch;
5438 
5439             scratch = ExAllocatePoolWithTag(NonPagedPool, readlen * Vcb->superblock.sector_size, ALLOC_TAG);
5440             if (!scratch) {
5441                 ERR("out of memory\n");
5442                 return STATUS_INSUFFICIENT_RESOURCES;
5443             }
5444 
5445             for (i = 0; i < c->chunk_item->num_stripes; i++) {
5446                 if (i != stripe) {
5447                     if (!c->devices[i]->devobj) {
5448                         ExFreePool(scratch);
5449                         return STATUS_UNEXPECTED_IO_ERROR;
5450                     }
5451 
5452                     if (i == 0 || (stripe == 0 && i == 1)) {
5453                         Status = sync_read_phys(c->devices[i]->devobj, cis[i].offset + startoff + ((offset % sl) * Vcb->superblock.sector_size),
5454                                                 readlen * Vcb->superblock.sector_size, ps->data + (offset * Vcb->superblock.sector_size), FALSE);
5455                         if (!NT_SUCCESS(Status)) {
5456                             ERR("sync_read_phys returned %08x\n", Status);
5457                             ExFreePool(scratch);
5458                             return Status;
5459                         }
5460                     } else {
5461                         Status = sync_read_phys(c->devices[i]->devobj, cis[i].offset + startoff + ((offset % sl) * Vcb->superblock.sector_size),
5462                                                 readlen * Vcb->superblock.sector_size, scratch, FALSE);
5463                         if (!NT_SUCCESS(Status)) {
5464                             ERR("sync_read_phys returned %08x\n", Status);
5465                             ExFreePool(scratch);
5466                             return Status;
5467                         }
5468 
5469                         do_xor(ps->data + (offset * Vcb->superblock.sector_size), scratch, readlen * Vcb->superblock.sector_size);
5470                     }
5471                 }
5472             }
5473 
5474             ExFreePool(scratch);
5475         } else {
5476             UINT8* scratch;
5477             UINT16 k, i, logstripe, error_stripe, num_errors = 0;
5478 
5479             scratch = ExAllocatePoolWithTag(NonPagedPool, (c->chunk_item->num_stripes + 2) * readlen * Vcb->superblock.sector_size, ALLOC_TAG);
5480             if (!scratch) {
5481                 ERR("out of memory\n");
5482                 return STATUS_INSUFFICIENT_RESOURCES;
5483             }
5484 
5485             i = (parity + 1) % c->chunk_item->num_stripes;
5486             for (k = 0; k < c->chunk_item->num_stripes; k++) {
5487                 if (i != stripe) {
5488                     if (c->devices[i]->devobj) {
5489                         Status = sync_read_phys(c->devices[i]->devobj, cis[i].offset + startoff + ((offset % sl) * Vcb->superblock.sector_size),
5490                                                 readlen * Vcb->superblock.sector_size, scratch + (k * readlen * Vcb->superblock.sector_size), FALSE);
5491                         if (!NT_SUCCESS(Status)) {
5492                             ERR("sync_read_phys returned %08x\n", Status);
5493                             num_errors++;
5494                             error_stripe = k;
5495                         }
5496                     } else {
5497                         num_errors++;
5498                         error_stripe = k;
5499                     }
5500 
5501                     if (num_errors > 1) {
5502                         ExFreePool(scratch);
5503                         return STATUS_UNEXPECTED_IO_ERROR;
5504                     }
5505                 } else
5506                     logstripe = k;
5507 
5508                 i = (i + 1) % c->chunk_item->num_stripes;
5509             }
5510 
5511             if (num_errors == 0 || error_stripe == c->chunk_item->num_stripes - 1) {
5512                 for (k = 0; k < c->chunk_item->num_stripes - 1; k++) {
5513                     if (k != logstripe) {
5514                         if (k == 0 || (k == 1 && logstripe == 0)) {
5515                             RtlCopyMemory(ps->data + (offset * Vcb->superblock.sector_size), scratch + (k * readlen * Vcb->superblock.sector_size),
5516                                           readlen * Vcb->superblock.sector_size);
5517                         } else {
5518                             do_xor(ps->data + (offset * Vcb->superblock.sector_size), scratch + (k * readlen * Vcb->superblock.sector_size),
5519                                    readlen * Vcb->superblock.sector_size);
5520                         }
5521                     }
5522                 }
5523             } else {
5524                 raid6_recover2(scratch, c->chunk_item->num_stripes, readlen * Vcb->superblock.sector_size, logstripe,
5525                                error_stripe, scratch + (c->chunk_item->num_stripes * readlen * Vcb->superblock.sector_size));
5526 
5527                 RtlCopyMemory(ps->data + (offset * Vcb->superblock.sector_size), scratch + (c->chunk_item->num_stripes * readlen * Vcb->superblock.sector_size),
5528                               readlen * Vcb->superblock.sector_size);
5529             }
5530 
5531             ExFreePool(scratch);
5532         }
5533 
5534         offset += readlen;
5535         len -= readlen;
5536     }
5537 
5538     return STATUS_SUCCESS;
5539 }
5540 
5541 NTSTATUS flush_partial_stripe(device_extension* Vcb, chunk* c, partial_stripe* ps) {
5542     NTSTATUS Status;
5543     UINT16 parity2, stripe, startoffstripe;
5544     UINT8* data;
5545     UINT64 startoff;
5546     ULONG runlength, index, last1;
5547     CHUNK_ITEM_STRIPE* cis = (CHUNK_ITEM_STRIPE*)&c->chunk_item[1];
5548     LIST_ENTRY* le;
5549     UINT16 k, num_data_stripes = c->chunk_item->num_stripes - (c->chunk_item->type & BLOCK_FLAG_RAID5 ? 1 : 2);
5550     UINT64 ps_length = num_data_stripes * c->chunk_item->stripe_length;
5551     ULONG stripe_length = (ULONG)c->chunk_item->stripe_length;
5552 
5553     // FIXME - do writes asynchronously?
5554 
5555     get_raid0_offset(ps->address - c->offset, stripe_length, num_data_stripes, &startoff, &startoffstripe);
5556 
5557     parity2 = (((ps->address - c->offset) / ps_length) + c->chunk_item->num_stripes - 1) % c->chunk_item->num_stripes;
5558 
5559     // read data (or reconstruct if degraded)
5560 
5561     runlength = RtlFindFirstRunClear(&ps->bmp, &index);
5562     last1 = 0;
5563 
5564     while (runlength != 0) {
5565         if (index > last1) {
5566             Status = partial_stripe_read(Vcb, c, ps, startoff, parity2, last1, index - last1);
5567             if (!NT_SUCCESS(Status)) {
5568                 ERR("partial_stripe_read returned %08x\n", Status);
5569                 return Status;
5570             }
5571         }
5572 
5573         last1 = index + runlength;
5574 
5575         runlength = RtlFindNextForwardRunClear(&ps->bmp, index + runlength, &index);
5576     }
5577 
5578     if (last1 < ps_length / Vcb->superblock.sector_size) {
5579         Status = partial_stripe_read(Vcb, c, ps, startoff, parity2, last1, (ULONG)((ps_length / Vcb->superblock.sector_size) - last1));
5580         if (!NT_SUCCESS(Status)) {
5581             ERR("partial_stripe_read returned %08x\n", Status);
5582             return Status;
5583         }
5584     }
5585 
5586     // set unallocated data to 0
5587     le = c->space.Flink;
5588     while (le != &c->space) {
5589         space* s = CONTAINING_RECORD(le, space, list_entry);
5590 
5591         if (s->address + s->size > ps->address && s->address < ps->address + ps_length) {
5592             UINT64 start = max(ps->address, s->address);
5593             UINT64 end = min(ps->address + ps_length, s->address + s->size);
5594 
5595             RtlZeroMemory(ps->data + start - ps->address, (ULONG)(end - start));
5596         } else if (s->address >= ps->address + ps_length)
5597             break;
5598 
5599         le = le->Flink;
5600     }
5601 
5602     le = c->deleting.Flink;
5603     while (le != &c->deleting) {
5604         space* s = CONTAINING_RECORD(le, space, list_entry);
5605 
5606         if (s->address + s->size > ps->address && s->address < ps->address + ps_length) {
5607             UINT64 start = max(ps->address, s->address);
5608             UINT64 end = min(ps->address + ps_length, s->address + s->size);
5609 
5610             RtlZeroMemory(ps->data + start - ps->address, (ULONG)(end - start));
5611         } else if (s->address >= ps->address + ps_length)
5612             break;
5613 
5614         le = le->Flink;
5615     }
5616 
5617     stripe = (parity2 + 1) % c->chunk_item->num_stripes;
5618 
5619     data = ps->data;
5620     for (k = 0; k < num_data_stripes; k++) {
5621         if (c->devices[stripe]->devobj) {
5622             Status = write_data_phys(c->devices[stripe]->devobj, cis[stripe].offset + startoff, data, stripe_length);
5623             if (!NT_SUCCESS(Status)) {
5624                 ERR("write_data_phys returned %08x\n", Status);
5625                 return Status;
5626             }
5627         }
5628 
5629         data += stripe_length;
5630         stripe = (stripe + 1) % c->chunk_item->num_stripes;
5631     }
5632 
5633     // write parity
5634     if (c->chunk_item->type & BLOCK_FLAG_RAID5) {
5635         if (c->devices[parity2]->devobj) {
5636             UINT16 i;
5637 
5638             for (i = 1; i < c->chunk_item->num_stripes - 1; i++) {
5639                 do_xor(ps->data, ps->data + (i * stripe_length), stripe_length);
5640             }
5641 
5642             Status = write_data_phys(c->devices[parity2]->devobj, cis[parity2].offset + startoff, ps->data, stripe_length);
5643             if (!NT_SUCCESS(Status)) {
5644                 ERR("write_data_phys returned %08x\n", Status);
5645                 return Status;
5646             }
5647         }
5648     } else {
5649         UINT16 parity1 = (parity2 + c->chunk_item->num_stripes - 1) % c->chunk_item->num_stripes;
5650 
5651         if (c->devices[parity1]->devobj || c->devices[parity2]->devobj) {
5652             UINT8* scratch;
5653             UINT16 i;
5654 
5655             scratch = ExAllocatePoolWithTag(NonPagedPool, stripe_length * 2, ALLOC_TAG);
5656             if (!scratch) {
5657                 ERR("out of memory\n");
5658                 return STATUS_INSUFFICIENT_RESOURCES;
5659             }
5660 
5661             i = c->chunk_item->num_stripes - 3;
5662 
5663             while (TRUE) {
5664                 if (i == c->chunk_item->num_stripes - 3) {
5665                     RtlCopyMemory(scratch, ps->data + (i * stripe_length), stripe_length);
5666                     RtlCopyMemory(scratch + stripe_length, ps->data + (i * stripe_length), stripe_length);
5667                 } else {
5668                     do_xor(scratch, ps->data + (i * stripe_length), stripe_length);
5669 
5670                     galois_double(scratch + stripe_length, stripe_length);
5671                     do_xor(scratch + stripe_length, ps->data + (i * stripe_length), stripe_length);
5672                 }
5673 
5674                 if (i == 0)
5675                     break;
5676 
5677                 i--;
5678             }
5679 
5680             if (c->devices[parity1]->devobj) {
5681                 Status = write_data_phys(c->devices[parity1]->devobj, cis[parity1].offset + startoff, scratch, stripe_length);
5682                 if (!NT_SUCCESS(Status)) {
5683                     ERR("write_data_phys returned %08x\n", Status);
5684                     ExFreePool(scratch);
5685                     return Status;
5686                 }
5687             }
5688 
5689             if (c->devices[parity2]->devobj) {
5690                 Status = write_data_phys(c->devices[parity2]->devobj, cis[parity2].offset + startoff, scratch + stripe_length, stripe_length);
5691                 if (!NT_SUCCESS(Status)) {
5692                     ERR("write_data_phys returned %08x\n", Status);
5693                     ExFreePool(scratch);
5694                     return Status;
5695                 }
5696             }
5697 
5698             ExFreePool(scratch);
5699         }
5700     }
5701 
5702     return STATUS_SUCCESS;
5703 }
5704 
5705 static NTSTATUS update_chunks(device_extension* Vcb, LIST_ENTRY* batchlist, PIRP Irp, LIST_ENTRY* rollback) {
5706     LIST_ENTRY *le, *le2;
5707     NTSTATUS Status;
5708     UINT64 used_minus_cache;
5709 
5710     ExAcquireResourceExclusiveLite(&Vcb->chunk_lock, TRUE);
5711 
5712     // FIXME - do tree chunks before data chunks
5713 
5714     le = Vcb->chunks.Flink;
5715     while (le != &Vcb->chunks) {
5716         chunk* c = CONTAINING_RECORD(le, chunk, list_entry);
5717 
5718         le2 = le->Flink;
5719 
5720         if (c->changed) {
5721             ExAcquireResourceExclusiveLite(&c->lock, TRUE);
5722 
5723             // flush partial stripes
5724             if (!Vcb->readonly && (c->chunk_item->type & BLOCK_FLAG_RAID5 || c->chunk_item->type & BLOCK_FLAG_RAID6)) {
5725                 ExAcquireResourceExclusiveLite(&c->partial_stripes_lock, TRUE);
5726 
5727                 while (!IsListEmpty(&c->partial_stripes)) {
5728                     partial_stripe* ps = CONTAINING_RECORD(RemoveHeadList(&c->partial_stripes), partial_stripe, list_entry);
5729 
5730                     Status = flush_partial_stripe(Vcb, c, ps);
5731 
5732                     if (ps->bmparr)
5733                         ExFreePool(ps->bmparr);
5734 
5735                     ExFreePool(ps);
5736 
5737                     if (!NT_SUCCESS(Status)) {
5738                         ERR("flush_partial_stripe returned %08x\n", Status);
5739                         ExReleaseResourceLite(&c->partial_stripes_lock);
5740                         ExReleaseResourceLite(&c->lock);
5741                         ExReleaseResourceLite(&Vcb->chunk_lock);
5742                         return Status;
5743                     }
5744                 }
5745 
5746                 ExReleaseResourceLite(&c->partial_stripes_lock);
5747             }
5748 
5749             if (c->list_entry_balance.Flink) {
5750                 ExReleaseResourceLite(&c->lock);
5751                 le = le2;
5752                 continue;
5753             }
5754 
5755             if (c->space_changed || c->created) {
5756                 used_minus_cache = c->used;
5757 
5758                 // subtract self-hosted cache
5759                 if (used_minus_cache > 0 && c->chunk_item->type & BLOCK_FLAG_DATA && c->cache && c->cache->inode_item.st_size == c->used) {
5760                     LIST_ENTRY* le3;
5761 
5762                     le3 = c->cache->extents.Flink;
5763                     while (le3 != &c->cache->extents) {
5764                         extent* ext = CONTAINING_RECORD(le3, extent, list_entry);
5765                         EXTENT_DATA* ed = &ext->extent_data;
5766 
5767                         if (!ext->ignore) {
5768                             if (ed->type == EXTENT_TYPE_REGULAR || ed->type == EXTENT_TYPE_PREALLOC) {
5769                                 EXTENT_DATA2* ed2 = (EXTENT_DATA2*)ed->data;
5770 
5771                                 if (ed2->size != 0 && ed2->address >= c->offset && ed2->address + ed2->size <= c->offset + c->chunk_item->size)
5772                                     used_minus_cache -= ed2->size;
5773                             }
5774                         }
5775 
5776                         le3 = le3->Flink;
5777                     }
5778                 }
5779 
5780                 if (used_minus_cache == 0) {
5781                     Status = drop_chunk(Vcb, c, batchlist, Irp, rollback);
5782                     if (!NT_SUCCESS(Status)) {
5783                         ERR("drop_chunk returned %08x\n", Status);
5784                         ExReleaseResourceLite(&c->lock);
5785                         ExReleaseResourceLite(&Vcb->chunk_lock);
5786                         return Status;
5787                     }
5788                 } else if (c->created) {
5789                     Status = create_chunk(Vcb, c, Irp);
5790                     if (!NT_SUCCESS(Status)) {
5791                         ERR("create_chunk returned %08x\n", Status);
5792                         ExReleaseResourceLite(&c->lock);
5793                         ExReleaseResourceLite(&Vcb->chunk_lock);
5794                         return Status;
5795                     }
5796                 }
5797 
5798                 if (used_minus_cache > 0)
5799                     ExReleaseResourceLite(&c->lock);
5800             }
5801         }
5802 
5803         le = le2;
5804     }
5805 
5806     ExReleaseResourceLite(&Vcb->chunk_lock);
5807 
5808     return STATUS_SUCCESS;
5809 }
5810 
5811 static NTSTATUS delete_root_ref(device_extension* Vcb, UINT64 subvolid, UINT64 parsubvolid, UINT64 parinode, PANSI_STRING utf8, PIRP Irp) {
5812     KEY searchkey;
5813     traverse_ptr tp;
5814     NTSTATUS Status;
5815 
5816     searchkey.obj_id = parsubvolid;
5817     searchkey.obj_type = TYPE_ROOT_REF;
5818     searchkey.offset = subvolid;
5819 
5820     Status = find_item(Vcb, Vcb->root_root, &tp, &searchkey, FALSE, Irp);
5821     if (!NT_SUCCESS(Status)) {
5822         ERR("error - find_item returned %08x\n", Status);
5823         return Status;
5824     }
5825 
5826     if (!keycmp(searchkey, tp.item->key)) {
5827         if (tp.item->size < sizeof(ROOT_REF)) {
5828             ERR("(%llx,%x,%llx) was %u bytes, expected at least %u\n", tp.item->key.obj_id, tp.item->key.obj_type, tp.item->key.offset, tp.item->size, sizeof(ROOT_REF));
5829             return STATUS_INTERNAL_ERROR;
5830         } else {
5831             ROOT_REF* rr;
5832             ULONG len;
5833 
5834             rr = (ROOT_REF*)tp.item->data;
5835             len = tp.item->size;
5836 
5837             do {
5838                 UINT16 itemlen;
5839 
5840                 if (len < sizeof(ROOT_REF) || len < offsetof(ROOT_REF, name[0]) + rr->n) {
5841                     ERR("(%llx,%x,%llx) was truncated\n", tp.item->key.obj_id, tp.item->key.obj_type, tp.item->key.offset);
5842                     break;
5843                 }
5844 
5845                 itemlen = (UINT16)offsetof(ROOT_REF, name[0]) + rr->n;
5846 
5847                 if (rr->dir == parinode && rr->n == utf8->Length && RtlCompareMemory(rr->name, utf8->Buffer, rr->n) == rr->n) {
5848                     UINT16 newlen = tp.item->size - itemlen;
5849 
5850                     Status = delete_tree_item(Vcb, &tp);
5851                     if (!NT_SUCCESS(Status)) {
5852                         ERR("delete_tree_item returned %08x\n", Status);
5853                         return Status;
5854                     }
5855 
5856                     if (newlen == 0) {
5857                         TRACE("deleting (%llx,%x,%llx)\n", tp.item->key.obj_id, tp.item->key.obj_type, tp.item->key.offset);
5858                     } else {
5859                         UINT8 *newrr = ExAllocatePoolWithTag(PagedPool, newlen, ALLOC_TAG), *rroff;
5860 
5861                         if (!newrr) {
5862                             ERR("out of memory\n");
5863                             return STATUS_INSUFFICIENT_RESOURCES;
5864                         }
5865 
5866                         TRACE("modifying (%llx,%x,%llx)\n", tp.item->key.obj_id, tp.item->key.obj_type, tp.item->key.offset);
5867 
5868                         if ((UINT8*)rr > tp.item->data) {
5869                             RtlCopyMemory(newrr, tp.item->data, (UINT8*)rr - tp.item->data);
5870                             rroff = newrr + ((UINT8*)rr - tp.item->data);
5871                         } else {
5872                             rroff = newrr;
5873                         }
5874 
5875                         if ((UINT8*)&rr->name[rr->n] < tp.item->data + tp.item->size)
5876                             RtlCopyMemory(rroff, &rr->name[rr->n], tp.item->size - ((UINT8*)&rr->name[rr->n] - tp.item->data));
5877 
5878                         Status = insert_tree_item(Vcb, Vcb->root_root, tp.item->key.obj_id, tp.item->key.obj_type, tp.item->key.offset, newrr, newlen, NULL, Irp);
5879                         if (!NT_SUCCESS(Status)) {
5880                             ERR("insert_tree_item returned %08x\n", Status);
5881                             ExFreePool(newrr);
5882                             return Status;
5883                         }
5884                     }
5885 
5886                     break;
5887                 }
5888 
5889                 if (len > itemlen) {
5890                     len -= itemlen;
5891                     rr = (ROOT_REF*)&rr->name[rr->n];
5892                 } else
5893                     break;
5894             } while (len > 0);
5895         }
5896     } else {
5897         WARN("could not find ROOT_REF entry for subvol %llx in %llx\n", searchkey.offset, searchkey.obj_id);
5898         return STATUS_NOT_FOUND;
5899     }
5900 
5901     return STATUS_SUCCESS;
5902 }
5903 
5904 #ifdef _MSC_VER
5905 #pragma warning(push)
5906 #pragma warning(suppress: 28194)
5907 #endif
5908 static NTSTATUS add_root_ref(_In_ device_extension* Vcb, _In_ UINT64 subvolid, _In_ UINT64 parsubvolid, _In_ __drv_aliasesMem ROOT_REF* rr, _In_opt_ PIRP Irp) {
5909     KEY searchkey;
5910     traverse_ptr tp;
5911     NTSTATUS Status;
5912 
5913     searchkey.obj_id = parsubvolid;
5914     searchkey.obj_type = TYPE_ROOT_REF;
5915     searchkey.offset = subvolid;
5916 
5917     Status = find_item(Vcb, Vcb->root_root, &tp, &searchkey, FALSE, Irp);
5918     if (!NT_SUCCESS(Status)) {
5919         ERR("error - find_item returned %08x\n", Status);
5920         return Status;
5921     }
5922 
5923     if (!keycmp(searchkey, tp.item->key)) {
5924         UINT16 rrsize = tp.item->size + (UINT16)offsetof(ROOT_REF, name[0]) + rr->n;
5925         UINT8* rr2;
5926 
5927         rr2 = ExAllocatePoolWithTag(PagedPool, rrsize, ALLOC_TAG);
5928         if (!rr2) {
5929             ERR("out of memory\n");
5930             return STATUS_INSUFFICIENT_RESOURCES;
5931         }
5932 
5933         if (tp.item->size > 0)
5934             RtlCopyMemory(rr2, tp.item->data, tp.item->size);
5935 
5936         RtlCopyMemory(rr2 + tp.item->size, rr, offsetof(ROOT_REF, name[0]) + rr->n);
5937         ExFreePool(rr);
5938 
5939         Status = delete_tree_item(Vcb, &tp);
5940         if (!NT_SUCCESS(Status)) {
5941             ERR("delete_tree_item returned %08x\n", Status);
5942             ExFreePool(rr2);
5943             return Status;
5944         }
5945 
5946         Status = insert_tree_item(Vcb, Vcb->root_root, searchkey.obj_id, searchkey.obj_type, searchkey.offset, rr2, rrsize, NULL, Irp);
5947         if (!NT_SUCCESS(Status)) {
5948             ERR("insert_tree_item returned %08x\n", Status);
5949             ExFreePool(rr2);
5950             return Status;
5951         }
5952     } else {
5953         Status = insert_tree_item(Vcb, Vcb->root_root, searchkey.obj_id, searchkey.obj_type, searchkey.offset, rr, (UINT16)offsetof(ROOT_REF, name[0]) + rr->n, NULL, Irp);
5954         if (!NT_SUCCESS(Status)) {
5955             ERR("insert_tree_item returned %08x\n", Status);
5956             ExFreePool(rr);
5957             return Status;
5958         }
5959     }
5960 
5961     return STATUS_SUCCESS;
5962 }
5963 #ifdef _MSC_VER
5964 #pragma warning(pop)
5965 #endif
5966 
5967 static NTSTATUS update_root_backref(device_extension* Vcb, UINT64 subvolid, UINT64 parsubvolid, PIRP Irp) {
5968     KEY searchkey;
5969     traverse_ptr tp;
5970     UINT8* data;
5971     UINT16 datalen;
5972     NTSTATUS Status;
5973 
5974     searchkey.obj_id = parsubvolid;
5975     searchkey.obj_type = TYPE_ROOT_REF;
5976     searchkey.offset = subvolid;
5977 
5978     Status = find_item(Vcb, Vcb->root_root, &tp, &searchkey, FALSE, Irp);
5979     if (!NT_SUCCESS(Status)) {
5980         ERR("error - find_item returned %08x\n", Status);
5981         return Status;
5982     }
5983 
5984     if (!keycmp(tp.item->key, searchkey) && tp.item->size > 0) {
5985         datalen = tp.item->size;
5986 
5987         data = ExAllocatePoolWithTag(PagedPool, datalen, ALLOC_TAG);
5988         if (!data) {
5989             ERR("out of memory\n");
5990             return STATUS_INSUFFICIENT_RESOURCES;
5991         }
5992 
5993         RtlCopyMemory(data, tp.item->data, datalen);
5994     } else {
5995         datalen = 0;
5996         data = NULL;
5997     }
5998 
5999     searchkey.obj_id = subvolid;
6000     searchkey.obj_type = TYPE_ROOT_BACKREF;
6001     searchkey.offset = parsubvolid;
6002 
6003     Status = find_item(Vcb, Vcb->root_root, &tp, &searchkey, FALSE, Irp);
6004     if (!NT_SUCCESS(Status)) {
6005         ERR("error - find_item returned %08x\n", Status);
6006 
6007         if (datalen > 0)
6008             ExFreePool(data);
6009 
6010         return Status;
6011     }
6012 
6013     if (!keycmp(tp.item->key, searchkey)) {
6014         Status = delete_tree_item(Vcb, &tp);
6015         if (!NT_SUCCESS(Status)) {
6016             ERR("delete_tree_item returned %08x\n", Status);
6017 
6018             if (datalen > 0)
6019                 ExFreePool(data);
6020 
6021             return Status;
6022         }
6023     }
6024 
6025     if (datalen > 0) {
6026         Status = insert_tree_item(Vcb, Vcb->root_root, subvolid, TYPE_ROOT_BACKREF, parsubvolid, data, datalen, NULL, Irp);
6027         if (!NT_SUCCESS(Status)) {
6028             ERR("insert_tree_item returned %08x\n", Status);
6029             ExFreePool(data);
6030             return Status;
6031         }
6032     }
6033 
6034     return STATUS_SUCCESS;
6035 }
6036 
6037 static NTSTATUS add_root_item_to_cache(device_extension* Vcb, UINT64 root, PIRP Irp) {
6038     KEY searchkey;
6039     traverse_ptr tp;
6040     NTSTATUS Status;
6041 
6042     searchkey.obj_id = root;
6043     searchkey.obj_type = TYPE_ROOT_ITEM;
6044     searchkey.offset = 0xffffffffffffffff;
6045 
6046     Status = find_item(Vcb, Vcb->root_root, &tp, &searchkey, FALSE, Irp);
6047     if (!NT_SUCCESS(Status)) {
6048         ERR("error - find_item returned %08x\n", Status);
6049         return Status;
6050     }
6051 
6052     if (tp.item->key.obj_id != searchkey.obj_id || tp.item->key.obj_type != searchkey.obj_type) {
6053         ERR("could not find ROOT_ITEM for tree %llx\n", searchkey.obj_id);
6054         return STATUS_INTERNAL_ERROR;
6055     }
6056 
6057     if (tp.item->size < sizeof(ROOT_ITEM)) { // if not full length, create new entry with new bits zeroed
6058         ROOT_ITEM* ri = ExAllocatePoolWithTag(PagedPool, sizeof(ROOT_ITEM), ALLOC_TAG);
6059         if (!ri) {
6060             ERR("out of memory\n");
6061             return STATUS_INSUFFICIENT_RESOURCES;
6062         }
6063 
6064         if (tp.item->size > 0)
6065             RtlCopyMemory(ri, tp.item->data, tp.item->size);
6066 
6067         RtlZeroMemory(((UINT8*)ri) + tp.item->size, sizeof(ROOT_ITEM) - tp.item->size);
6068 
6069         Status = delete_tree_item(Vcb, &tp);
6070         if (!NT_SUCCESS(Status)) {
6071             ERR("delete_tree_item returned %08x\n", Status);
6072             ExFreePool(ri);
6073             return Status;
6074         }
6075 
6076         Status = insert_tree_item(Vcb, Vcb->root_root, searchkey.obj_id, searchkey.obj_type, tp.item->key.offset, ri, sizeof(ROOT_ITEM), NULL, Irp);
6077         if (!NT_SUCCESS(Status)) {
6078             ERR("insert_tree_item returned %08x\n", Status);
6079             ExFreePool(ri);
6080             return Status;
6081         }
6082     } else {
6083         tp.tree->write = TRUE;
6084     }
6085 
6086     return STATUS_SUCCESS;
6087 }
6088 
6089 static NTSTATUS flush_fileref(file_ref* fileref, LIST_ENTRY* batchlist, PIRP Irp) {
6090     NTSTATUS Status;
6091 
6092     // if fileref created and then immediately deleted, do nothing
6093     if (fileref->created && fileref->deleted) {
6094         fileref->dirty = FALSE;
6095         return STATUS_SUCCESS;
6096     }
6097 
6098     if (fileref->fcb->ads) {
6099         fileref->dirty = FALSE;
6100         return STATUS_SUCCESS;
6101     }
6102 
6103     if (fileref->created) {
6104         UINT16 disize;
6105         DIR_ITEM *di, *di2;
6106         UINT32 crc32;
6107 
6108         crc32 = calc_crc32c(0xfffffffe, (UINT8*)fileref->dc->utf8.Buffer, fileref->dc->utf8.Length);
6109 
6110         disize = (UINT16)(offsetof(DIR_ITEM, name[0]) + fileref->dc->utf8.Length);
6111         di = ExAllocatePoolWithTag(PagedPool, disize, ALLOC_TAG);
6112         if (!di) {
6113             ERR("out of memory\n");
6114             return STATUS_INSUFFICIENT_RESOURCES;
6115         }
6116 
6117         if (fileref->parent->fcb->subvol == fileref->fcb->subvol) {
6118             di->key.obj_id = fileref->fcb->inode;
6119             di->key.obj_type = TYPE_INODE_ITEM;
6120             di->key.offset = 0;
6121         } else { // subvolume
6122             di->key.obj_id = fileref->fcb->subvol->id;
6123             di->key.obj_type = TYPE_ROOT_ITEM;
6124             di->key.offset = 0xffffffffffffffff;
6125         }
6126 
6127         di->transid = fileref->fcb->Vcb->superblock.generation;
6128         di->m = 0;
6129         di->n = (UINT16)fileref->dc->utf8.Length;
6130         di->type = fileref->fcb->type;
6131         RtlCopyMemory(di->name, fileref->dc->utf8.Buffer, fileref->dc->utf8.Length);
6132 
6133         di2 = ExAllocatePoolWithTag(PagedPool, disize, ALLOC_TAG);
6134         if (!di2) {
6135             ERR("out of memory\n");
6136             return STATUS_INSUFFICIENT_RESOURCES;
6137         }
6138 
6139         RtlCopyMemory(di2, di, disize);
6140 
6141         Status = insert_tree_item_batch(batchlist, fileref->fcb->Vcb, fileref->parent->fcb->subvol, fileref->parent->fcb->inode, TYPE_DIR_INDEX,
6142                                         fileref->dc->index, di, disize, Batch_Insert);
6143         if (!NT_SUCCESS(Status)) {
6144             ERR("insert_tree_item_batch returned %08x\n", Status);
6145             return Status;
6146         }
6147 
6148         Status = insert_tree_item_batch(batchlist, fileref->fcb->Vcb, fileref->parent->fcb->subvol, fileref->parent->fcb->inode, TYPE_DIR_ITEM, crc32,
6149                                         di2, disize, Batch_DirItem);
6150         if (!NT_SUCCESS(Status)) {
6151             ERR("insert_tree_item_batch returned %08x\n", Status);
6152             return Status;
6153         }
6154 
6155         if (fileref->parent->fcb->subvol == fileref->fcb->subvol) {
6156             INODE_REF* ir;
6157 
6158             ir = ExAllocatePoolWithTag(PagedPool, sizeof(INODE_REF) - 1 + fileref->dc->utf8.Length, ALLOC_TAG);
6159             if (!ir) {
6160                 ERR("out of memory\n");
6161                 return STATUS_INSUFFICIENT_RESOURCES;
6162             }
6163 
6164             ir->index = fileref->dc->index;
6165             ir->n = fileref->dc->utf8.Length;
6166             RtlCopyMemory(ir->name, fileref->dc->utf8.Buffer, ir->n);
6167 
6168             Status = insert_tree_item_batch(batchlist, fileref->fcb->Vcb, fileref->fcb->subvol, fileref->fcb->inode, TYPE_INODE_REF, fileref->parent->fcb->inode,
6169                                             ir, sizeof(INODE_REF) - 1 + ir->n, Batch_InodeRef);
6170             if (!NT_SUCCESS(Status)) {
6171                 ERR("insert_tree_item_batch returned %08x\n", Status);
6172                 return Status;
6173             }
6174         } else if (fileref->fcb != fileref->fcb->Vcb->dummy_fcb) {
6175             ULONG rrlen;
6176             ROOT_REF* rr;
6177 
6178             rrlen = sizeof(ROOT_REF) - 1 + fileref->dc->utf8.Length;
6179 
6180             rr = ExAllocatePoolWithTag(PagedPool, rrlen, ALLOC_TAG);
6181             if (!rr) {
6182                 ERR("out of memory\n");
6183                 return STATUS_INSUFFICIENT_RESOURCES;
6184             }
6185 
6186             rr->dir = fileref->parent->fcb->inode;
6187             rr->index = fileref->dc->index;
6188             rr->n = fileref->dc->utf8.Length;
6189             RtlCopyMemory(rr->name, fileref->dc->utf8.Buffer, fileref->dc->utf8.Length);
6190 
6191             Status = add_root_ref(fileref->fcb->Vcb, fileref->fcb->subvol->id, fileref->parent->fcb->subvol->id, rr, Irp);
6192             if (!NT_SUCCESS(Status)) {
6193                 ERR("add_root_ref returned %08x\n", Status);
6194                 return Status;
6195             }
6196 
6197             Status = update_root_backref(fileref->fcb->Vcb, fileref->fcb->subvol->id, fileref->parent->fcb->subvol->id, Irp);
6198             if (!NT_SUCCESS(Status)) {
6199                 ERR("update_root_backref returned %08x\n", Status);
6200                 return Status;
6201             }
6202         }
6203 
6204         fileref->created = FALSE;
6205     } else if (fileref->deleted) {
6206         UINT32 crc32;
6207         ANSI_STRING* name;
6208         DIR_ITEM* di;
6209 
6210         name = &fileref->oldutf8;
6211 
6212         crc32 = calc_crc32c(0xfffffffe, (UINT8*)name->Buffer, name->Length);
6213 
6214         TRACE("deleting %.*S\n", file_desc_fileref(fileref));
6215 
6216         di = ExAllocatePoolWithTag(PagedPool, sizeof(DIR_ITEM) - 1 + name->Length, ALLOC_TAG);
6217         if (!di) {
6218             ERR("out of memory\n");
6219             return STATUS_INSUFFICIENT_RESOURCES;
6220         }
6221 
6222         di->m = 0;
6223         di->n = name->Length;
6224         RtlCopyMemory(di->name, name->Buffer, name->Length);
6225 
6226         // delete DIR_ITEM (0x54)
6227 
6228         Status = insert_tree_item_batch(batchlist, fileref->fcb->Vcb, fileref->parent->fcb->subvol, fileref->parent->fcb->inode, TYPE_DIR_ITEM,
6229                                         crc32, di, sizeof(DIR_ITEM) - 1 + name->Length, Batch_DeleteDirItem);
6230         if (!NT_SUCCESS(Status)) {
6231             ERR("insert_tree_item_batch returned %08x\n", Status);
6232             return Status;
6233         }
6234 
6235         if (fileref->parent->fcb->subvol == fileref->fcb->subvol) {
6236             INODE_REF* ir;
6237 
6238             // delete INODE_REF (0xc)
6239 
6240             ir = ExAllocatePoolWithTag(PagedPool, sizeof(INODE_REF) - 1 + name->Length, ALLOC_TAG);
6241             if (!ir) {
6242                 ERR("out of memory\n");
6243                 return STATUS_INSUFFICIENT_RESOURCES;
6244             }
6245 
6246             ir->index = fileref->oldindex;
6247             ir->n = name->Length;
6248             RtlCopyMemory(ir->name, name->Buffer, name->Length);
6249 
6250             Status = insert_tree_item_batch(batchlist, fileref->fcb->Vcb, fileref->parent->fcb->subvol, fileref->fcb->inode, TYPE_INODE_REF,
6251                                             fileref->parent->fcb->inode, ir, sizeof(INODE_REF) - 1 + name->Length, Batch_DeleteInodeRef);
6252             if (!NT_SUCCESS(Status)) {
6253                 ERR("insert_tree_item_batch returned %08x\n", Status);
6254                 return Status;
6255             }
6256         } else if (fileref->fcb != fileref->fcb->Vcb->dummy_fcb) { // subvolume
6257             Status = delete_root_ref(fileref->fcb->Vcb, fileref->fcb->subvol->id, fileref->parent->fcb->subvol->id, fileref->parent->fcb->inode, name, Irp);
6258             if (!NT_SUCCESS(Status)) {
6259                 ERR("delete_root_ref returned %08x\n", Status);
6260                 return Status;
6261             }
6262 
6263             Status = update_root_backref(fileref->fcb->Vcb, fileref->fcb->subvol->id, fileref->parent->fcb->subvol->id, Irp);
6264             if (!NT_SUCCESS(Status)) {
6265                 ERR("update_root_backref returned %08x\n", Status);
6266                 return Status;
6267             }
6268         }
6269 
6270         // delete DIR_INDEX (0x60)
6271 
6272         Status = insert_tree_item_batch(batchlist, fileref->fcb->Vcb, fileref->parent->fcb->subvol, fileref->parent->fcb->inode, TYPE_DIR_INDEX,
6273                                         fileref->oldindex, NULL, 0, Batch_Delete);
6274         if (!NT_SUCCESS(Status)) {
6275             ERR("insert_tree_item_batch returned %08x\n", Status);
6276             return Status;
6277         }
6278 
6279         if (fileref->oldutf8.Buffer) {
6280             ExFreePool(fileref->oldutf8.Buffer);
6281             fileref->oldutf8.Buffer = NULL;
6282         }
6283     } else { // rename or change type
6284         PANSI_STRING oldutf8 = fileref->oldutf8.Buffer ? &fileref->oldutf8 : &fileref->dc->utf8;
6285         UINT32 crc32, oldcrc32;
6286         UINT16 disize;
6287         DIR_ITEM *olddi, *di, *di2;
6288 
6289         crc32 = calc_crc32c(0xfffffffe, (UINT8*)fileref->dc->utf8.Buffer, fileref->dc->utf8.Length);
6290 
6291         if (!fileref->oldutf8.Buffer)
6292             oldcrc32 = crc32;
6293         else
6294             oldcrc32 = calc_crc32c(0xfffffffe, (UINT8*)fileref->oldutf8.Buffer, fileref->oldutf8.Length);
6295 
6296         olddi = ExAllocatePoolWithTag(PagedPool, sizeof(DIR_ITEM) - 1 + oldutf8->Length, ALLOC_TAG);
6297         if (!olddi) {
6298             ERR("out of memory\n");
6299             return STATUS_INSUFFICIENT_RESOURCES;
6300         }
6301 
6302         olddi->m = 0;
6303         olddi->n = (UINT16)oldutf8->Length;
6304         RtlCopyMemory(olddi->name, oldutf8->Buffer, oldutf8->Length);
6305 
6306         // delete DIR_ITEM (0x54)
6307 
6308         Status = insert_tree_item_batch(batchlist, fileref->fcb->Vcb, fileref->parent->fcb->subvol, fileref->parent->fcb->inode, TYPE_DIR_ITEM,
6309                                         oldcrc32, olddi, sizeof(DIR_ITEM) - 1 + oldutf8->Length, Batch_DeleteDirItem);
6310         if (!NT_SUCCESS(Status)) {
6311             ERR("insert_tree_item_batch returned %08x\n", Status);
6312             ExFreePool(olddi);
6313             return Status;
6314         }
6315 
6316         // add DIR_ITEM (0x54)
6317 
6318         disize = (UINT16)(offsetof(DIR_ITEM, name[0]) + fileref->dc->utf8.Length);
6319         di = ExAllocatePoolWithTag(PagedPool, disize, ALLOC_TAG);
6320         if (!di) {
6321             ERR("out of memory\n");
6322             return STATUS_INSUFFICIENT_RESOURCES;
6323         }
6324 
6325         di2 = ExAllocatePoolWithTag(PagedPool, disize, ALLOC_TAG);
6326         if (!di2) {
6327             ERR("out of memory\n");
6328             ExFreePool(di);
6329             return STATUS_INSUFFICIENT_RESOURCES;
6330         }
6331 
6332         if (fileref->dc)
6333             di->key = fileref->dc->key;
6334         else if (fileref->parent->fcb->subvol == fileref->fcb->subvol) {
6335             di->key.obj_id = fileref->fcb->inode;
6336             di->key.obj_type = TYPE_INODE_ITEM;
6337             di->key.offset = 0;
6338         } else { // subvolume
6339             di->key.obj_id = fileref->fcb->subvol->id;
6340             di->key.obj_type = TYPE_ROOT_ITEM;
6341             di->key.offset = 0xffffffffffffffff;
6342         }
6343 
6344         di->transid = fileref->fcb->Vcb->superblock.generation;
6345         di->m = 0;
6346         di->n = (UINT16)fileref->dc->utf8.Length;
6347         di->type = fileref->fcb->type;
6348         RtlCopyMemory(di->name, fileref->dc->utf8.Buffer, fileref->dc->utf8.Length);
6349 
6350         RtlCopyMemory(di2, di, disize);
6351 
6352         Status = insert_tree_item_batch(batchlist, fileref->fcb->Vcb, fileref->parent->fcb->subvol, fileref->parent->fcb->inode, TYPE_DIR_ITEM, crc32,
6353                                         di, disize, Batch_DirItem);
6354         if (!NT_SUCCESS(Status)) {
6355             ERR("insert_tree_item_batch returned %08x\n", Status);
6356             ExFreePool(di2);
6357             ExFreePool(di);
6358             return Status;
6359         }
6360 
6361         if (fileref->parent->fcb->subvol == fileref->fcb->subvol) {
6362             INODE_REF *ir, *ir2;
6363 
6364             // delete INODE_REF (0xc)
6365 
6366             ir = ExAllocatePoolWithTag(PagedPool, sizeof(INODE_REF) - 1 + oldutf8->Length, ALLOC_TAG);
6367             if (!ir) {
6368                 ERR("out of memory\n");
6369                 ExFreePool(di2);
6370                 return STATUS_INSUFFICIENT_RESOURCES;
6371             }
6372 
6373             ir->index = fileref->dc->index;
6374             ir->n = oldutf8->Length;
6375             RtlCopyMemory(ir->name, oldutf8->Buffer, ir->n);
6376 
6377             Status = insert_tree_item_batch(batchlist, fileref->fcb->Vcb, fileref->fcb->subvol, fileref->fcb->inode, TYPE_INODE_REF, fileref->parent->fcb->inode,
6378                                             ir, sizeof(INODE_REF) - 1 + ir->n, Batch_DeleteInodeRef);
6379             if (!NT_SUCCESS(Status)) {
6380                 ERR("insert_tree_item_batch returned %08x\n", Status);
6381                 ExFreePool(ir);
6382                 ExFreePool(di2);
6383                 return Status;
6384             }
6385 
6386             // add INODE_REF (0xc)
6387 
6388             ir2 = ExAllocatePoolWithTag(PagedPool, sizeof(INODE_REF) - 1 + fileref->dc->utf8.Length, ALLOC_TAG);
6389             if (!ir2) {
6390                 ERR("out of memory\n");
6391                 ExFreePool(di2);
6392                 return STATUS_INSUFFICIENT_RESOURCES;
6393             }
6394 
6395             ir2->index = fileref->dc->index;
6396             ir2->n = fileref->dc->utf8.Length;
6397             RtlCopyMemory(ir2->name, fileref->dc->utf8.Buffer, ir2->n);
6398 
6399             Status = insert_tree_item_batch(batchlist, fileref->fcb->Vcb, fileref->fcb->subvol, fileref->fcb->inode, TYPE_INODE_REF, fileref->parent->fcb->inode,
6400                                             ir2, sizeof(INODE_REF) - 1 + ir2->n, Batch_InodeRef);
6401             if (!NT_SUCCESS(Status)) {
6402                 ERR("insert_tree_item_batch returned %08x\n", Status);
6403                 ExFreePool(ir2);
6404                 ExFreePool(di2);
6405                 return Status;
6406             }
6407         } else if (fileref->fcb != fileref->fcb->Vcb->dummy_fcb) { // subvolume
6408             ULONG rrlen;
6409             ROOT_REF* rr;
6410 
6411             Status = delete_root_ref(fileref->fcb->Vcb, fileref->fcb->subvol->id, fileref->parent->fcb->subvol->id, fileref->parent->fcb->inode, oldutf8, Irp);
6412             if (!NT_SUCCESS(Status)) {
6413                 ERR("delete_root_ref returned %08x\n", Status);
6414                 ExFreePool(di2);
6415                 return Status;
6416             }
6417 
6418             rrlen = sizeof(ROOT_REF) - 1 + fileref->dc->utf8.Length;
6419 
6420             rr = ExAllocatePoolWithTag(PagedPool, rrlen, ALLOC_TAG);
6421             if (!rr) {
6422                 ERR("out of memory\n");
6423                 ExFreePool(di2);
6424                 return STATUS_INSUFFICIENT_RESOURCES;
6425             }
6426 
6427             rr->dir = fileref->parent->fcb->inode;
6428             rr->index = fileref->dc->index;
6429             rr->n = fileref->dc->utf8.Length;
6430             RtlCopyMemory(rr->name, fileref->dc->utf8.Buffer, fileref->dc->utf8.Length);
6431 
6432             Status = add_root_ref(fileref->fcb->Vcb, fileref->fcb->subvol->id, fileref->parent->fcb->subvol->id, rr, Irp);
6433             if (!NT_SUCCESS(Status)) {
6434                 ERR("add_root_ref returned %08x\n", Status);
6435                 ExFreePool(di2);
6436                 return Status;
6437             }
6438 
6439             Status = update_root_backref(fileref->fcb->Vcb, fileref->fcb->subvol->id, fileref->parent->fcb->subvol->id, Irp);
6440             if (!NT_SUCCESS(Status)) {
6441                 ERR("update_root_backref returned %08x\n", Status);
6442                 ExFreePool(di2);
6443                 return Status;
6444             }
6445         }
6446 
6447         // delete DIR_INDEX (0x60)
6448 
6449         Status = insert_tree_item_batch(batchlist, fileref->fcb->Vcb, fileref->parent->fcb->subvol, fileref->parent->fcb->inode, TYPE_DIR_INDEX,
6450                                         fileref->dc->index, NULL, 0, Batch_Delete);
6451         if (!NT_SUCCESS(Status)) {
6452             ERR("insert_tree_item_batch returned %08x\n", Status);
6453             ExFreePool(di2);
6454             return Status;
6455         }
6456 
6457         // add DIR_INDEX (0x60)
6458 
6459        Status = insert_tree_item_batch(batchlist, fileref->fcb->Vcb, fileref->parent->fcb->subvol, fileref->parent->fcb->inode, TYPE_DIR_INDEX,
6460                                        fileref->dc->index, di2, disize, Batch_Insert);
6461        if (!NT_SUCCESS(Status)) {
6462             ERR("insert_tree_item_batch returned %08x\n", Status);
6463             ExFreePool(di2);
6464             return Status;
6465         }
6466 
6467         if (fileref->oldutf8.Buffer) {
6468             ExFreePool(fileref->oldutf8.Buffer);
6469             fileref->oldutf8.Buffer = NULL;
6470         }
6471     }
6472 
6473     fileref->dirty = FALSE;
6474 
6475     return STATUS_SUCCESS;
6476 }
6477 
6478 static void flush_disk_caches(device_extension* Vcb) {
6479     LIST_ENTRY* le;
6480     ioctl_context context;
6481     ULONG num;
6482 
6483     context.left = 0;
6484 
6485     le = Vcb->devices.Flink;
6486 
6487     while (le != &Vcb->devices) {
6488         device* dev = CONTAINING_RECORD(le, device, list_entry);
6489 
6490         if (dev->devobj && !dev->readonly && dev->can_flush)
6491             context.left++;
6492 
6493         le = le->Flink;
6494     }
6495 
6496     if (context.left == 0)
6497         return;
6498 
6499     num = 0;
6500 
6501     KeInitializeEvent(&context.Event, NotificationEvent, FALSE);
6502 
6503     context.stripes = ExAllocatePoolWithTag(NonPagedPool, sizeof(ioctl_context_stripe) * context.left, ALLOC_TAG);
6504     if (!context.stripes) {
6505         ERR("out of memory\n");
6506         return;
6507     }
6508 
6509     RtlZeroMemory(context.stripes, sizeof(ioctl_context_stripe) * context.left);
6510 
6511     le = Vcb->devices.Flink;
6512 
6513     while (le != &Vcb->devices) {
6514         device* dev = CONTAINING_RECORD(le, device, list_entry);
6515 
6516         if (dev->devobj && !dev->readonly && dev->can_flush) {
6517             PIO_STACK_LOCATION IrpSp;
6518             ioctl_context_stripe* stripe = &context.stripes[num];
6519 
6520             RtlZeroMemory(&stripe->apte, sizeof(ATA_PASS_THROUGH_EX));
6521 
6522             stripe->apte.Length = sizeof(ATA_PASS_THROUGH_EX);
6523             stripe->apte.TimeOutValue = 5;
6524             stripe->apte.CurrentTaskFile[6] = IDE_COMMAND_FLUSH_CACHE;
6525 
6526             stripe->Irp = IoAllocateIrp(dev->devobj->StackSize, FALSE);
6527 
6528             if (!stripe->Irp) {
6529                 ERR("IoAllocateIrp failed\n");
6530                 goto nextdev;
6531             }
6532 
6533             IrpSp = IoGetNextIrpStackLocation(stripe->Irp);
6534             IrpSp->MajorFunction = IRP_MJ_DEVICE_CONTROL;
6535 
6536             IrpSp->Parameters.DeviceIoControl.IoControlCode = IOCTL_ATA_PASS_THROUGH;
6537             IrpSp->Parameters.DeviceIoControl.InputBufferLength = sizeof(ATA_PASS_THROUGH_EX);
6538             IrpSp->Parameters.DeviceIoControl.OutputBufferLength = sizeof(ATA_PASS_THROUGH_EX);
6539 
6540             stripe->Irp->AssociatedIrp.SystemBuffer = &stripe->apte;
6541             stripe->Irp->Flags |= IRP_BUFFERED_IO | IRP_INPUT_OPERATION;
6542             stripe->Irp->UserBuffer = &stripe->apte;
6543             stripe->Irp->UserIosb = &stripe->iosb;
6544 
6545             IoSetCompletionRoutine(stripe->Irp, ioctl_completion, &context, TRUE, TRUE, TRUE);
6546 
6547             IoCallDriver(dev->devobj, stripe->Irp);
6548 
6549 nextdev:
6550             num++;
6551         }
6552 
6553         le = le->Flink;
6554     }
6555 
6556     KeWaitForSingleObject(&context.Event, Executive, KernelMode, FALSE, NULL);
6557 
6558     ExFreePool(context.stripes);
6559 }
6560 
6561 static NTSTATUS flush_changed_dev_stats(device_extension* Vcb, device* dev, PIRP Irp) {
6562     NTSTATUS Status;
6563     KEY searchkey;
6564     traverse_ptr tp;
6565     UINT16 statslen;
6566     UINT64* stats;
6567 
6568     searchkey.obj_id = 0;
6569     searchkey.obj_type = TYPE_DEV_STATS;
6570     searchkey.offset = dev->devitem.dev_id;
6571 
6572     Status = find_item(Vcb, Vcb->dev_root, &tp, &searchkey, FALSE, Irp);
6573     if (!NT_SUCCESS(Status)) {
6574         ERR("find_item returned %08x\n", Status);
6575         return Status;
6576     }
6577 
6578     if (!keycmp(tp.item->key, searchkey)) {
6579         Status = delete_tree_item(Vcb, &tp);
6580         if (!NT_SUCCESS(Status)) {
6581             ERR("delete_tree_item returned %08x\n", Status);
6582             return Status;
6583         }
6584     }
6585 
6586     statslen = sizeof(UINT64) * 5;
6587     stats = ExAllocatePoolWithTag(PagedPool, statslen, ALLOC_TAG);
6588     if (!stats) {
6589         ERR("out of memory\n");
6590         return STATUS_INSUFFICIENT_RESOURCES;
6591     }
6592 
6593     RtlCopyMemory(stats, dev->stats, statslen);
6594 
6595     Status = insert_tree_item(Vcb, Vcb->dev_root, 0, TYPE_DEV_STATS, dev->devitem.dev_id, stats, statslen, NULL, Irp);
6596     if (!NT_SUCCESS(Status)) {
6597         ERR("insert_tree_item returned %08x\n", Status);
6598         ExFreePool(stats);
6599         return Status;
6600     }
6601 
6602     return STATUS_SUCCESS;
6603 }
6604 
6605 static NTSTATUS flush_subvol(device_extension* Vcb, root* r, PIRP Irp) {
6606     NTSTATUS Status;
6607 
6608     if (r != Vcb->root_root && r != Vcb->chunk_root) {
6609         KEY searchkey;
6610         traverse_ptr tp;
6611         ROOT_ITEM* ri;
6612 
6613         searchkey.obj_id = r->id;
6614         searchkey.obj_type = TYPE_ROOT_ITEM;
6615         searchkey.offset = 0xffffffffffffffff;
6616 
6617         Status = find_item(Vcb, Vcb->root_root, &tp, &searchkey, FALSE, Irp);
6618         if (!NT_SUCCESS(Status)) {
6619             ERR("error - find_item returned %08x\n", Status);
6620             return Status;
6621         }
6622 
6623         if (tp.item->key.obj_id != searchkey.obj_id || tp.item->key.obj_type != searchkey.obj_type) {
6624             ERR("could not find ROOT_ITEM for tree %llx\n", searchkey.obj_id);
6625             return STATUS_INTERNAL_ERROR;
6626         }
6627 
6628         ri = ExAllocatePoolWithTag(PagedPool, sizeof(ROOT_ITEM), ALLOC_TAG);
6629         if (!ri) {
6630             ERR("out of memory\n");
6631             return STATUS_INSUFFICIENT_RESOURCES;
6632         }
6633 
6634         RtlCopyMemory(ri, &r->root_item, sizeof(ROOT_ITEM));
6635 
6636         Status = delete_tree_item(Vcb, &tp);
6637         if (!NT_SUCCESS(Status)) {
6638             ERR("delete_tree_item returned %08x\n", Status);
6639             return Status;
6640         }
6641 
6642         Status = insert_tree_item(Vcb, Vcb->root_root, tp.item->key.obj_id, tp.item->key.obj_type, tp.item->key.offset, ri, sizeof(ROOT_ITEM), NULL, Irp);
6643         if (!NT_SUCCESS(Status)) {
6644             ERR("insert_tree_item returned %08x\n", Status);
6645             return Status;
6646         }
6647     }
6648 
6649     if (r->received) {
6650         KEY searchkey;
6651         traverse_ptr tp;
6652 
6653         if (!Vcb->uuid_root) {
6654             root* uuid_root;
6655 
6656             TRACE("uuid root doesn't exist, creating it\n");
6657 
6658             Status = create_root(Vcb, BTRFS_ROOT_UUID, &uuid_root, FALSE, 0, Irp);
6659 
6660             if (!NT_SUCCESS(Status)) {
6661                 ERR("create_root returned %08x\n", Status);
6662                 return Status;
6663             }
6664 
6665             Vcb->uuid_root = uuid_root;
6666         }
6667 
6668         RtlCopyMemory(&searchkey.obj_id, &r->root_item.received_uuid, sizeof(UINT64));
6669         searchkey.obj_type = TYPE_SUBVOL_REC_UUID;
6670         RtlCopyMemory(&searchkey.offset, &r->root_item.received_uuid.uuid[sizeof(UINT64)], sizeof(UINT64));
6671 
6672         Status = find_item(Vcb, Vcb->uuid_root, &tp, &searchkey, FALSE, Irp);
6673         if (!NT_SUCCESS(Status)) {
6674             ERR("find_item returned %08x\n", Status);
6675             return Status;
6676         }
6677 
6678         if (!keycmp(tp.item->key, searchkey)) {
6679             if (tp.item->size + sizeof(UINT64) <= Vcb->superblock.node_size - sizeof(tree_header) - sizeof(leaf_node)) {
6680                 UINT64* ids;
6681 
6682                 ids = ExAllocatePoolWithTag(PagedPool, tp.item->size + sizeof(UINT64), ALLOC_TAG);
6683                 if (!ids) {
6684                     ERR("out of memory\n");
6685                     return STATUS_INSUFFICIENT_RESOURCES;
6686                 }
6687 
6688                 RtlCopyMemory(ids, tp.item->data, tp.item->size);
6689                 RtlCopyMemory((UINT8*)ids + tp.item->size, &r->id, sizeof(UINT64));
6690 
6691                 Status = delete_tree_item(Vcb, &tp);
6692                 if (!NT_SUCCESS(Status)) {
6693                     ERR("delete_tree_item returned %08x\n", Status);
6694                     ExFreePool(ids);
6695                     return Status;
6696                 }
6697 
6698                 Status = insert_tree_item(Vcb, Vcb->uuid_root, searchkey.obj_id, searchkey.obj_type, searchkey.offset, ids, tp.item->size + sizeof(UINT64), NULL, Irp);
6699                 if (!NT_SUCCESS(Status)) {
6700                     ERR("insert_tree_item returned %08x\n", Status);
6701                     ExFreePool(ids);
6702                     return Status;
6703                 }
6704             }
6705         } else {
6706             UINT64* root_num;
6707 
6708             root_num = ExAllocatePoolWithTag(PagedPool, sizeof(UINT64), ALLOC_TAG);
6709             if (!root_num) {
6710                 ERR("out of memory\n");
6711                 return STATUS_INSUFFICIENT_RESOURCES;
6712             }
6713 
6714             *root_num = r->id;
6715 
6716             Status = insert_tree_item(Vcb, Vcb->uuid_root, searchkey.obj_id, searchkey.obj_type, searchkey.offset, root_num, sizeof(UINT64), NULL, Irp);
6717             if (!NT_SUCCESS(Status)) {
6718                 ERR("insert_tree_item returned %08x\n", Status);
6719                 ExFreePool(root_num);
6720                 return Status;
6721             }
6722         }
6723 
6724         r->received = FALSE;
6725     }
6726 
6727     r->dirty = FALSE;
6728 
6729     return STATUS_SUCCESS;
6730 }
6731 
6732 static NTSTATUS test_not_full(device_extension* Vcb) {
6733     UINT64 reserve, could_alloc, free_space;
6734     LIST_ENTRY* le;
6735 
6736     // This function ensures we drop into readonly mode if we're about to leave very little
6737     // space for metadata - this is similar to the "global reserve" of the Linux driver.
6738     // Otherwise we might completely fill our space, at which point due to COW we can't
6739     // delete anything in order to fix this.
6740 
6741     reserve = Vcb->extent_root->root_item.bytes_used;
6742     reserve += Vcb->root_root->root_item.bytes_used;
6743     if (Vcb->checksum_root) reserve += Vcb->checksum_root->root_item.bytes_used;
6744 
6745     reserve = max(reserve, 0x1000000); // 16 M
6746     reserve = min(reserve, 0x20000000); // 512 M
6747 
6748     // Find out how much space would be available for new metadata chunks
6749 
6750     could_alloc = 0;
6751 
6752     if (Vcb->metadata_flags & BLOCK_FLAG_RAID5) {
6753         UINT64 s1 = 0, s2 = 0, s3 = 0;
6754 
6755         le = Vcb->devices.Flink;
6756         while (le != &Vcb->devices) {
6757             device* dev = CONTAINING_RECORD(le, device, list_entry);
6758 
6759             if (!dev->readonly) {
6760                 UINT64 space = dev->devitem.num_bytes - dev->devitem.bytes_used;
6761 
6762                 if (space >= s1) {
6763                     s3 = s2;
6764                     s2 = s1;
6765                     s1 = space;
6766                 } else if (space >= s2) {
6767                     s3 = s2;
6768                     s2 = space;
6769                 } else if (space >= s3)
6770                     s3 = space;
6771             }
6772 
6773             le = le->Flink;
6774         }
6775 
6776         could_alloc = s3 * 2;
6777     } else if (Vcb->metadata_flags & (BLOCK_FLAG_RAID10 | BLOCK_FLAG_RAID6)) {
6778         UINT64 s1 = 0, s2 = 0, s3 = 0, s4 = 0;
6779 
6780         le = Vcb->devices.Flink;
6781         while (le != &Vcb->devices) {
6782             device* dev = CONTAINING_RECORD(le, device, list_entry);
6783 
6784             if (!dev->readonly) {
6785                 UINT64 space = dev->devitem.num_bytes - dev->devitem.bytes_used;
6786 
6787                 if (space >= s1) {
6788                     s4 = s3;
6789                     s3 = s2;
6790                     s2 = s1;
6791                     s1 = space;
6792                 } else if (space >= s2) {
6793                     s4 = s3;
6794                     s3 = s2;
6795                     s2 = space;
6796                 } else if (space >= s3) {
6797                     s4 = s3;
6798                     s3 = space;
6799                 } else if (space >= s4)
6800                     s4 = space;
6801             }
6802 
6803             le = le->Flink;
6804         }
6805 
6806         could_alloc = s4 * 2;
6807     } else if (Vcb->metadata_flags & (BLOCK_FLAG_RAID0 | BLOCK_FLAG_RAID1)) {
6808         UINT64 s1 = 0, s2 = 0;
6809 
6810         le = Vcb->devices.Flink;
6811         while (le != &Vcb->devices) {
6812             device* dev = CONTAINING_RECORD(le, device, list_entry);
6813 
6814             if (!dev->readonly) {
6815                 UINT64 space = dev->devitem.num_bytes - dev->devitem.bytes_used;
6816 
6817                 if (space >= s1) {
6818                     s2 = s1;
6819                     s1 = space;
6820                 } else if (space >= s2)
6821                     s2 = space;
6822             }
6823 
6824             le = le->Flink;
6825         }
6826 
6827         if (Vcb->metadata_flags & BLOCK_FLAG_RAID1)
6828             could_alloc = s2;
6829         else // RAID0
6830             could_alloc = s2 * 2;
6831     } else if (Vcb->metadata_flags & BLOCK_FLAG_DUPLICATE) {
6832         le = Vcb->devices.Flink;
6833         while (le != &Vcb->devices) {
6834             device* dev = CONTAINING_RECORD(le, device, list_entry);
6835 
6836             if (!dev->readonly) {
6837                 UINT64 space = (dev->devitem.num_bytes - dev->devitem.bytes_used) / 2;
6838 
6839                 could_alloc = max(could_alloc, space);
6840             }
6841 
6842             le = le->Flink;
6843         }
6844     } else { // SINGLE
6845         le = Vcb->devices.Flink;
6846         while (le != &Vcb->devices) {
6847             device* dev = CONTAINING_RECORD(le, device, list_entry);
6848 
6849             if (!dev->readonly) {
6850                 UINT64 space = dev->devitem.num_bytes - dev->devitem.bytes_used;
6851 
6852                 could_alloc = max(could_alloc, space);
6853             }
6854 
6855             le = le->Flink;
6856         }
6857     }
6858 
6859     if (could_alloc >= reserve)
6860         return STATUS_SUCCESS;
6861 
6862     free_space = 0;
6863 
6864     le = Vcb->chunks.Flink;
6865     while (le != &Vcb->chunks) {
6866         chunk* c = CONTAINING_RECORD(le, chunk, list_entry);
6867 
6868         if (!c->reloc && !c->readonly && c->chunk_item->type & BLOCK_FLAG_METADATA) {
6869             free_space += c->chunk_item->size - c->used;
6870 
6871             if (free_space + could_alloc >= reserve)
6872                 return STATUS_SUCCESS;
6873         }
6874 
6875         le = le->Flink;
6876     }
6877 
6878     return STATUS_DISK_FULL;
6879 }
6880 
6881 static NTSTATUS do_write2(device_extension* Vcb, PIRP Irp, LIST_ENTRY* rollback) {
6882     NTSTATUS Status;
6883     LIST_ENTRY *le, batchlist;
6884     BOOL cache_changed = FALSE;
6885     volume_device_extension* vde;
6886     BOOL no_cache = FALSE;
6887 #ifdef DEBUG_FLUSH_TIMES
6888     UINT64 filerefs = 0, fcbs = 0;
6889     LARGE_INTEGER freq, time1, time2;
6890 #endif
6891 #ifdef DEBUG_WRITE_LOOPS
6892     UINT loops = 0;
6893 #endif
6894 
6895     TRACE("(%p)\n", Vcb);
6896 
6897     InitializeListHead(&batchlist);
6898 
6899 #ifdef DEBUG_FLUSH_TIMES
6900     time1 = KeQueryPerformanceCounter(&freq);
6901 #endif
6902 
6903     ExAcquireResourceExclusiveLite(&Vcb->dirty_filerefs_lock, TRUE);
6904 
6905     while (!IsListEmpty(&Vcb->dirty_filerefs)) {
6906         file_ref* fr = CONTAINING_RECORD(RemoveHeadList(&Vcb->dirty_filerefs), file_ref, list_entry_dirty);
6907 
6908         flush_fileref(fr, &batchlist, Irp);
6909         free_fileref(Vcb, fr);
6910 
6911 #ifdef DEBUG_FLUSH_TIMES
6912         filerefs++;
6913 #endif
6914     }
6915 
6916     ExReleaseResourceLite(&Vcb->dirty_filerefs_lock);
6917 
6918     Status = commit_batch_list(Vcb, &batchlist, Irp);
6919     if (!NT_SUCCESS(Status)) {
6920         ERR("commit_batch_list returned %08x\n", Status);
6921         return Status;
6922     }
6923 
6924 #ifdef DEBUG_FLUSH_TIMES
6925     time2 = KeQueryPerformanceCounter(NULL);
6926 
6927     ERR("flushed %llu filerefs in %llu (freq = %llu)\n", filerefs, time2.QuadPart - time1.QuadPart, freq.QuadPart);
6928 
6929     time1 = KeQueryPerformanceCounter(&freq);
6930 #endif
6931 
6932     // We process deleted streams first, so we don't run over our xattr
6933     // limit unless we absolutely have to.
6934     // We also process deleted normal files, to avoid any problems
6935     // caused by inode collisions.
6936 
6937     ExAcquireResourceExclusiveLite(&Vcb->dirty_fcbs_lock, TRUE);
6938 
6939     le = Vcb->dirty_fcbs.Flink;
6940     while (le != &Vcb->dirty_fcbs) {
6941         fcb* fcb = CONTAINING_RECORD(le, struct _fcb, list_entry_dirty);
6942         LIST_ENTRY* le2 = le->Flink;
6943 
6944         if (fcb->deleted) {
6945             ExAcquireResourceExclusiveLite(fcb->Header.Resource, TRUE);
6946             Status = flush_fcb(fcb, FALSE, &batchlist, Irp);
6947             ExReleaseResourceLite(fcb->Header.Resource);
6948 
6949             free_fcb(Vcb, fcb);
6950 
6951             if (!NT_SUCCESS(Status)) {
6952                 ERR("flush_fcb returned %08x\n", Status);
6953                 clear_batch_list(Vcb, &batchlist);
6954                 ExReleaseResourceLite(&Vcb->dirty_fcbs_lock);
6955                 return Status;
6956             }
6957 
6958 #ifdef DEBUG_FLUSH_TIMES
6959             fcbs++;
6960 #endif
6961         }
6962 
6963         le = le2;
6964     }
6965 
6966     Status = commit_batch_list(Vcb, &batchlist, Irp);
6967     if (!NT_SUCCESS(Status)) {
6968         ERR("commit_batch_list returned %08x\n", Status);
6969         ExReleaseResourceLite(&Vcb->dirty_fcbs_lock);
6970         return Status;
6971     }
6972 
6973     le = Vcb->dirty_fcbs.Flink;
6974     while (le != &Vcb->dirty_fcbs) {
6975         fcb* fcb = CONTAINING_RECORD(le, struct _fcb, list_entry_dirty);
6976         LIST_ENTRY* le2 = le->Flink;
6977 
6978         if (fcb->subvol != Vcb->root_root) {
6979             ExAcquireResourceExclusiveLite(fcb->Header.Resource, TRUE);
6980             Status = flush_fcb(fcb, FALSE, &batchlist, Irp);
6981             ExReleaseResourceLite(fcb->Header.Resource);
6982             free_fcb(Vcb, fcb);
6983 
6984             if (!NT_SUCCESS(Status)) {
6985                 ERR("flush_fcb returned %08x\n", Status);
6986                 ExReleaseResourceLite(&Vcb->dirty_fcbs_lock);
6987                 return Status;
6988             }
6989 
6990 #ifdef DEBUG_FLUSH_TIMES
6991             fcbs++;
6992 #endif
6993         }
6994 
6995         le = le2;
6996     }
6997 
6998     ExReleaseResourceLite(&Vcb->dirty_fcbs_lock);
6999 
7000     Status = commit_batch_list(Vcb, &batchlist, Irp);
7001     if (!NT_SUCCESS(Status)) {
7002         ERR("commit_batch_list returned %08x\n", Status);
7003         return Status;
7004     }
7005 
7006 #ifdef DEBUG_FLUSH_TIMES
7007     time2 = KeQueryPerformanceCounter(NULL);
7008 
7009     ERR("flushed %llu fcbs in %llu (freq = %llu)\n", filerefs, time2.QuadPart - time1.QuadPart, freq.QuadPart);
7010 #endif
7011 
7012     // no need to get dirty_subvols_lock here, as we have tree_lock exclusively
7013     while (!IsListEmpty(&Vcb->dirty_subvols)) {
7014         root* r = CONTAINING_RECORD(RemoveHeadList(&Vcb->dirty_subvols), root, list_entry_dirty);
7015 
7016         Status = flush_subvol(Vcb, r, Irp);
7017         if (!NT_SUCCESS(Status)) {
7018             ERR("flush_subvol returned %08x\n", Status);
7019             return Status;
7020         }
7021     }
7022 
7023     if (!IsListEmpty(&Vcb->drop_roots)) {
7024         Status = drop_roots(Vcb, Irp, rollback);
7025 
7026         if (!NT_SUCCESS(Status)) {
7027             ERR("drop_roots returned %08x\n", Status);
7028             return Status;
7029         }
7030     }
7031 
7032     Status = update_chunks(Vcb, &batchlist, Irp, rollback);
7033 
7034     if (!NT_SUCCESS(Status)) {
7035         ERR("update_chunks returned %08x\n", Status);
7036         return Status;
7037     }
7038 
7039     Status = commit_batch_list(Vcb, &batchlist, Irp);
7040 
7041     // If only changing superblock, e.g. changing label, we still need to rewrite
7042     // the root tree so the generations match, otherwise you won't be able to mount on Linux.
7043     if (!Vcb->root_root->treeholder.tree || !Vcb->root_root->treeholder.tree->write) {
7044         KEY searchkey;
7045 
7046         traverse_ptr tp;
7047 
7048         searchkey.obj_id = 0;
7049         searchkey.obj_type = 0;
7050         searchkey.offset = 0;
7051 
7052         Status = find_item(Vcb, Vcb->root_root, &tp, &searchkey, FALSE, Irp);
7053         if (!NT_SUCCESS(Status)) {
7054             ERR("error - find_item returned %08x\n", Status);
7055             return Status;
7056         }
7057 
7058         Vcb->root_root->treeholder.tree->write = TRUE;
7059     }
7060 
7061     // make sure we always update the extent tree
7062     Status = add_root_item_to_cache(Vcb, BTRFS_ROOT_EXTENT, Irp);
7063     if (!NT_SUCCESS(Status)) {
7064         ERR("add_root_item_to_cache returned %08x\n", Status);
7065         return Status;
7066     }
7067 
7068     if (Vcb->stats_changed) {
7069         le = Vcb->devices.Flink;
7070         while (le != &Vcb->devices) {
7071             device* dev = CONTAINING_RECORD(le, device, list_entry);
7072 
7073             if (dev->stats_changed) {
7074                 Status = flush_changed_dev_stats(Vcb, dev, Irp);
7075                 if (!NT_SUCCESS(Status)) {
7076                     ERR("flush_changed_dev_stats returned %08x\n", Status);
7077                     return Status;
7078                 }
7079                 dev->stats_changed = FALSE;
7080             }
7081 
7082             le = le->Flink;
7083         }
7084 
7085         Vcb->stats_changed = FALSE;
7086     }
7087 
7088     do {
7089         Status = add_parents(Vcb, Irp);
7090         if (!NT_SUCCESS(Status)) {
7091             ERR("add_parents returned %08x\n", Status);
7092             goto end;
7093         }
7094 
7095         Status = allocate_tree_extents(Vcb, Irp, rollback);
7096         if (!NT_SUCCESS(Status)) {
7097             ERR("allocate_tree_extents returned %08x\n", Status);
7098             goto end;
7099         }
7100 
7101         Status = do_splits(Vcb, Irp, rollback);
7102         if (!NT_SUCCESS(Status)) {
7103             ERR("do_splits returned %08x\n", Status);
7104             goto end;
7105         }
7106 
7107         Status = update_chunk_usage(Vcb, Irp, rollback);
7108         if (!NT_SUCCESS(Status)) {
7109             ERR("update_chunk_usage returned %08x\n", Status);
7110             goto end;
7111         }
7112 
7113         if (!(Vcb->superblock.compat_ro_flags & BTRFS_COMPAT_RO_FLAGS_FREE_SPACE_CACHE)) {
7114             if (!no_cache) {
7115                 Status = allocate_cache(Vcb, &cache_changed, Irp, rollback);
7116                 if (!NT_SUCCESS(Status)) {
7117                     WARN("allocate_cache returned %08x\n", Status);
7118                     no_cache = TRUE;
7119                     cache_changed = FALSE;
7120                 }
7121             }
7122         } else {
7123             Status = update_chunk_caches_tree(Vcb, Irp);
7124             if (!NT_SUCCESS(Status)) {
7125                 ERR("update_chunk_caches_tree returned %08x\n", Status);
7126                 goto end;
7127             }
7128         }
7129 
7130 #ifdef DEBUG_WRITE_LOOPS
7131         loops++;
7132 
7133         if (cache_changed)
7134             ERR("cache has changed, looping again\n");
7135 #endif
7136     } while (cache_changed || !trees_consistent(Vcb));
7137 
7138 #ifdef DEBUG_WRITE_LOOPS
7139     ERR("%u loops\n", loops);
7140 #endif
7141 
7142     TRACE("trees consistent\n");
7143 
7144     Status = update_root_root(Vcb, no_cache, Irp, rollback);
7145     if (!NT_SUCCESS(Status)) {
7146         ERR("update_root_root returned %08x\n", Status);
7147         goto end;
7148     }
7149 
7150     Status = write_trees(Vcb, Irp);
7151     if (!NT_SUCCESS(Status)) {
7152         ERR("write_trees returned %08x\n", Status);
7153         goto end;
7154     }
7155 
7156     Status = test_not_full(Vcb);
7157     if (!NT_SUCCESS(Status)) {
7158         ERR("test_not_full returned %08x\n", Status);
7159         goto end;
7160     }
7161 
7162 #ifdef DEBUG_PARANOID
7163     le = Vcb->trees.Flink;
7164     while (le != &Vcb->trees) {
7165         tree* t = CONTAINING_RECORD(le, tree, list_entry);
7166         KEY searchkey;
7167         traverse_ptr tp;
7168 
7169         searchkey.obj_id = t->header.address;
7170         searchkey.obj_type = TYPE_METADATA_ITEM;
7171         searchkey.offset = 0xffffffffffffffff;
7172 
7173         Status = find_item(Vcb, Vcb->extent_root, &tp, &searchkey, FALSE, Irp);
7174         if (!NT_SUCCESS(Status)) {
7175             ERR("error - find_item returned %08x\n", Status);
7176             goto end;
7177         }
7178 
7179         if (tp.item->key.obj_id != searchkey.obj_id || tp.item->key.obj_type != searchkey.obj_type) {
7180             searchkey.obj_id = t->header.address;
7181             searchkey.obj_type = TYPE_EXTENT_ITEM;
7182             searchkey.offset = 0xffffffffffffffff;
7183 
7184             Status = find_item(Vcb, Vcb->extent_root, &tp, &searchkey, FALSE, Irp);
7185             if (!NT_SUCCESS(Status)) {
7186                 ERR("error - find_item returned %08x\n", Status);
7187                 goto end;
7188             }
7189 
7190             if (tp.item->key.obj_id != searchkey.obj_id || tp.item->key.obj_type != searchkey.obj_type) {
7191                 ERR("error - could not find entry in extent tree for tree at %llx\n", t->header.address);
7192                 Status = STATUS_INTERNAL_ERROR;
7193                 goto end;
7194             }
7195         }
7196 
7197         le = le->Flink;
7198     }
7199 #endif
7200 
7201     Vcb->superblock.cache_generation = Vcb->superblock.generation;
7202 
7203     if (!Vcb->options.no_barrier)
7204         flush_disk_caches(Vcb);
7205 
7206     Status = write_superblocks(Vcb, Irp);
7207     if (!NT_SUCCESS(Status)) {
7208         ERR("write_superblocks returned %08x\n", Status);
7209         goto end;
7210     }
7211 
7212     vde = Vcb->vde;
7213 
7214     if (vde) {
7215         pdo_device_extension* pdode = vde->pdode;
7216 
7217         ExAcquireResourceSharedLite(&pdode->child_lock, TRUE);
7218 
7219         le = pdode->children.Flink;
7220 
7221         while (le != &pdode->children) {
7222             volume_child* vc = CONTAINING_RECORD(le, volume_child, list_entry);
7223 
7224             vc->generation = Vcb->superblock.generation;
7225             le = le->Flink;
7226         }
7227 
7228         ExReleaseResourceLite(&pdode->child_lock);
7229     }
7230 
7231     clean_space_cache(Vcb);
7232 
7233     le = Vcb->chunks.Flink;
7234     while (le != &Vcb->chunks) {
7235         chunk* c = CONTAINING_RECORD(le, chunk, list_entry);
7236 
7237         c->changed = FALSE;
7238         c->space_changed = FALSE;
7239 
7240         le = le->Flink;
7241     }
7242 
7243     Vcb->superblock.generation++;
7244 
7245     Status = STATUS_SUCCESS;
7246 
7247     le = Vcb->trees.Flink;
7248     while (le != &Vcb->trees) {
7249         tree* t = CONTAINING_RECORD(le, tree, list_entry);
7250 
7251         t->write = FALSE;
7252 
7253         le = le->Flink;
7254     }
7255 
7256     Vcb->need_write = FALSE;
7257 
7258     while (!IsListEmpty(&Vcb->drop_roots)) {
7259         root* r = CONTAINING_RECORD(RemoveHeadList(&Vcb->drop_roots), root, list_entry);
7260 
7261         ExDeleteResourceLite(&r->nonpaged->load_tree_lock);
7262         ExFreePool(r->nonpaged);
7263         ExFreePool(r);
7264     }
7265 
7266 end:
7267     TRACE("do_write returning %08x\n", Status);
7268 
7269     return Status;
7270 }
7271 
7272 NTSTATUS do_write(device_extension* Vcb, PIRP Irp) {
7273     LIST_ENTRY rollback;
7274     NTSTATUS Status;
7275 
7276     InitializeListHead(&rollback);
7277 
7278     Status = do_write2(Vcb, Irp, &rollback);
7279 
7280     if (!NT_SUCCESS(Status)) {
7281         ERR("do_write2 returned %08x, dropping into readonly mode\n", Status);
7282         Vcb->readonly = TRUE;
7283         FsRtlNotifyVolumeEvent(Vcb->root_file, FSRTL_VOLUME_FORCED_CLOSED);
7284         do_rollback(Vcb, &rollback);
7285     } else
7286         clear_rollback(&rollback);
7287 
7288     return Status;
7289 }
7290 
7291 #ifdef DEBUG_STATS
7292 static void print_stats(device_extension* Vcb) {
7293     LARGE_INTEGER freq;
7294 
7295     ERR("READ STATS:\n");
7296     ERR("number of reads: %llu\n", Vcb->stats.num_reads);
7297     ERR("data read: %llu bytes\n", Vcb->stats.data_read);
7298     ERR("total time taken: %llu\n", Vcb->stats.read_total_time);
7299     ERR("csum time taken: %llu\n", Vcb->stats.read_csum_time);
7300     ERR("disk time taken: %llu\n", Vcb->stats.read_disk_time);
7301     ERR("other time taken: %llu\n", Vcb->stats.read_total_time - Vcb->stats.read_csum_time - Vcb->stats.read_disk_time);
7302 
7303     KeQueryPerformanceCounter(&freq);
7304 
7305     ERR("OPEN STATS (freq = %llu):\n", freq.QuadPart);
7306     ERR("number of opens: %llu\n", Vcb->stats.num_opens);
7307     ERR("total time taken: %llu\n", Vcb->stats.open_total_time);
7308     ERR("number of overwrites: %llu\n", Vcb->stats.num_overwrites);
7309     ERR("total time taken: %llu\n", Vcb->stats.overwrite_total_time);
7310     ERR("number of creates: %llu\n", Vcb->stats.num_creates);
7311     ERR("calls to open_fcb: %llu\n", Vcb->stats.open_fcb_calls);
7312     ERR("time spent in open_fcb: %llu\n", Vcb->stats.open_fcb_time);
7313     ERR("calls to open_fileref_child: %llu\n", Vcb->stats.open_fileref_child_calls);
7314     ERR("time spent in open_fileref_child: %llu\n", Vcb->stats.open_fileref_child_time);
7315     ERR("time spent waiting for fcb_lock: %llu\n", Vcb->stats.fcb_lock_time);
7316     ERR("total time taken: %llu\n", Vcb->stats.create_total_time);
7317 
7318     RtlZeroMemory(&Vcb->stats, sizeof(debug_stats));
7319 }
7320 #endif
7321 
7322 static void do_flush(device_extension* Vcb) {
7323     NTSTATUS Status;
7324 
7325     ExAcquireResourceExclusiveLite(&Vcb->tree_lock, TRUE);
7326 
7327 #ifdef DEBUG_STATS
7328     print_stats(Vcb);
7329 #endif
7330 
7331     if (Vcb->need_write && !Vcb->readonly)
7332         Status = do_write(Vcb, NULL);
7333     else
7334         Status = STATUS_SUCCESS;
7335 
7336     free_trees(Vcb);
7337 
7338     if (!NT_SUCCESS(Status))
7339         ERR("do_write returned %08x\n", Status);
7340 
7341     ExReleaseResourceLite(&Vcb->tree_lock);
7342 }
7343 
7344 _Function_class_(KSTART_ROUTINE)
7345 #ifdef __REACTOS__
7346 void NTAPI flush_thread(void* context) {
7347 #else
7348 void flush_thread(void* context) {
7349 #endif
7350     DEVICE_OBJECT* devobj = context;
7351     device_extension* Vcb = devobj->DeviceExtension;
7352     LARGE_INTEGER due_time;
7353 
7354     ObReferenceObject(devobj);
7355 
7356     KeInitializeTimer(&Vcb->flush_thread_timer);
7357 
7358     due_time.QuadPart = (UINT64)Vcb->options.flush_interval * -10000000;
7359 
7360     KeSetTimer(&Vcb->flush_thread_timer, due_time, NULL);
7361 
7362     while (TRUE) {
7363         KeWaitForSingleObject(&Vcb->flush_thread_timer, Executive, KernelMode, FALSE, NULL);
7364 
7365         if (!(devobj->Vpb->Flags & VPB_MOUNTED) || Vcb->removing)
7366             break;
7367 
7368         if (!Vcb->locked)
7369             do_flush(Vcb);
7370 
7371         KeSetTimer(&Vcb->flush_thread_timer, due_time, NULL);
7372     }
7373 
7374     ObDereferenceObject(devobj);
7375     KeCancelTimer(&Vcb->flush_thread_timer);
7376 
7377     KeSetEvent(&Vcb->flush_thread_finished, 0, FALSE);
7378 
7379     PsTerminateSystemThread(STATUS_SUCCESS);
7380 }
7381