1 /* Copyright (c) Mark Harmstone 2016-17
2  *
3  * This file is part of WinBtrfs.
4  *
5  * WinBtrfs is free software: you can redistribute it and/or modify
6  * it under the terms of the GNU Lesser General Public Licence as published by
7  * the Free Software Foundation, either version 3 of the Licence, or
8  * (at your option) any later version.
9  *
10  * WinBtrfs is distributed in the hope that it will be useful,
11  * but WITHOUT ANY WARRANTY; without even the implied warranty of
12  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
13  * GNU Lesser General Public Licence for more details.
14  *
15  * You should have received a copy of the GNU Lesser General Public Licence
16  * along with WinBtrfs.  If not, see <http://www.gnu.org/licenses/>. */
17 
18 #include "btrfs_drv.h"
19 #include "xxhash.h"
20 #include "crc32c.h"
21 #include <ata.h>
22 #include <ntddscsi.h>
23 #include <ntddstor.h>
24 
25 /* cf. __MAX_CSUM_ITEMS in Linux - it needs sizeof(leaf_node) bytes free
26  * so it can do a split. Linux tries to get it so a run will fit in a
27  * sector, but the MAX_CSUM_ITEMS logic is wrong... */
28 #define MAX_CSUM_SIZE (4096 - sizeof(tree_header) - (2 * sizeof(leaf_node)))
29 
30 // #define DEBUG_WRITE_LOOPS
31 
32 #define BATCH_ITEM_LIMIT 1000
33 
34 typedef struct {
35     KEVENT Event;
36     IO_STATUS_BLOCK iosb;
37 } write_context;
38 
39 typedef struct {
40     EXTENT_ITEM_TREE eit;
41     uint8_t type;
42     TREE_BLOCK_REF tbr;
43 } EXTENT_ITEM_TREE2;
44 
45 typedef struct {
46     EXTENT_ITEM ei;
47     uint8_t type;
48     TREE_BLOCK_REF tbr;
49 } EXTENT_ITEM_SKINNY_METADATA;
50 
51 static NTSTATUS create_chunk(device_extension* Vcb, chunk* c, PIRP Irp);
52 static NTSTATUS update_tree_extents(device_extension* Vcb, tree* t, PIRP Irp, LIST_ENTRY* rollback);
53 
54 static NTSTATUS insert_tree_item_batch(LIST_ENTRY* batchlist, device_extension* Vcb, root* r, uint64_t objid,
55                                        uint8_t objtype, uint64_t offset, _In_opt_ _When_(return >= 0, __drv_aliasesMem) void* data,
56                                        uint16_t datalen, enum batch_operation operation);
57 
_Function_class_(IO_COMPLETION_ROUTINE)58 _Function_class_(IO_COMPLETION_ROUTINE)
59 static NTSTATUS __stdcall write_completion(PDEVICE_OBJECT DeviceObject, PIRP Irp, PVOID conptr) {
60     write_context* context = conptr;
61 
62     UNUSED(DeviceObject);
63 
64     context->iosb = Irp->IoStatus;
65     KeSetEvent(&context->Event, 0, false);
66 
67     return STATUS_MORE_PROCESSING_REQUIRED;
68 }
69 
write_data_phys(_In_ PDEVICE_OBJECT device,_In_ PFILE_OBJECT fileobj,_In_ uint64_t address,_In_reads_bytes_ (length)void * data,_In_ uint32_t length)70 NTSTATUS write_data_phys(_In_ PDEVICE_OBJECT device, _In_ PFILE_OBJECT fileobj, _In_ uint64_t address,
71                          _In_reads_bytes_(length) void* data, _In_ uint32_t length) {
72     NTSTATUS Status;
73     LARGE_INTEGER offset;
74     PIRP Irp;
75     PIO_STACK_LOCATION IrpSp;
76     write_context context;
77 
78     TRACE("(%p, %I64x, %p, %x)\n", device, address, data, length);
79 
80     RtlZeroMemory(&context, sizeof(write_context));
81 
82     KeInitializeEvent(&context.Event, NotificationEvent, false);
83 
84     offset.QuadPart = address;
85 
86     Irp = IoAllocateIrp(device->StackSize, false);
87 
88     if (!Irp) {
89         ERR("IoAllocateIrp failed\n");
90         return STATUS_INSUFFICIENT_RESOURCES;
91     }
92 
93     IrpSp = IoGetNextIrpStackLocation(Irp);
94     IrpSp->MajorFunction = IRP_MJ_WRITE;
95     IrpSp->FileObject = fileobj;
96 
97     if (device->Flags & DO_BUFFERED_IO) {
98         Irp->AssociatedIrp.SystemBuffer = data;
99 
100         Irp->Flags = IRP_BUFFERED_IO;
101     } else if (device->Flags & DO_DIRECT_IO) {
102         Irp->MdlAddress = IoAllocateMdl(data, length, false, false, NULL);
103         if (!Irp->MdlAddress) {
104             DbgPrint("IoAllocateMdl failed\n");
105             Status = STATUS_INSUFFICIENT_RESOURCES;
106             goto exit;
107         }
108 
109         Status = STATUS_SUCCESS;
110 
111         _SEH2_TRY {
112             MmProbeAndLockPages(Irp->MdlAddress, KernelMode, IoReadAccess);
113         } _SEH2_EXCEPT (EXCEPTION_EXECUTE_HANDLER) {
114             Status = _SEH2_GetExceptionCode();
115         } _SEH2_END;
116 
117         if (!NT_SUCCESS(Status)) {
118             ERR("MmProbeAndLockPages threw exception %08lx\n", Status);
119             IoFreeMdl(Irp->MdlAddress);
120             goto exit;
121         }
122     } else {
123         Irp->UserBuffer = data;
124     }
125 
126     IrpSp->Parameters.Write.Length = length;
127     IrpSp->Parameters.Write.ByteOffset = offset;
128 
129     Irp->UserIosb = &context.iosb;
130 
131     Irp->UserEvent = &context.Event;
132 
133     IoSetCompletionRoutine(Irp, write_completion, &context, true, true, true);
134 
135     Status = IoCallDriver(device, Irp);
136 
137     if (Status == STATUS_PENDING) {
138         KeWaitForSingleObject(&context.Event, Executive, KernelMode, false, NULL);
139         Status = context.iosb.Status;
140     }
141 
142     if (!NT_SUCCESS(Status)) {
143         ERR("IoCallDriver returned %08lx\n", Status);
144     }
145 
146     if (device->Flags & DO_DIRECT_IO) {
147         MmUnlockPages(Irp->MdlAddress);
148         IoFreeMdl(Irp->MdlAddress);
149     }
150 
151 exit:
152     IoFreeIrp(Irp);
153 
154     return Status;
155 }
156 
add_trim_entry(device * dev,uint64_t address,uint64_t size)157 static void add_trim_entry(device* dev, uint64_t address, uint64_t size) {
158     space* s = ExAllocatePoolWithTag(PagedPool, sizeof(space), ALLOC_TAG);
159     if (!s) {
160         ERR("out of memory\n");
161         return;
162     }
163 
164     s->address = address;
165     s->size = size;
166     dev->num_trim_entries++;
167 
168     InsertTailList(&dev->trim_list, &s->list_entry);
169 }
170 
clean_space_cache_chunk(device_extension * Vcb,chunk * c)171 static void clean_space_cache_chunk(device_extension* Vcb, chunk* c) {
172     LIST_ENTRY* le;
173     ULONG type;
174 
175     if (c->chunk_item->type & BLOCK_FLAG_DUPLICATE)
176         type = BLOCK_FLAG_DUPLICATE;
177     else if (c->chunk_item->type & BLOCK_FLAG_RAID0)
178         type = BLOCK_FLAG_RAID0;
179     else if (c->chunk_item->type & BLOCK_FLAG_RAID1)
180         type = BLOCK_FLAG_DUPLICATE;
181     else if (c->chunk_item->type & BLOCK_FLAG_RAID10)
182         type = BLOCK_FLAG_RAID10;
183     else if (c->chunk_item->type & BLOCK_FLAG_RAID5)
184         type = BLOCK_FLAG_RAID5;
185     else if (c->chunk_item->type & BLOCK_FLAG_RAID6)
186         type = BLOCK_FLAG_RAID6;
187     else if (c->chunk_item->type & BLOCK_FLAG_RAID1C3)
188         type = BLOCK_FLAG_DUPLICATE;
189     else if (c->chunk_item->type & BLOCK_FLAG_RAID1C4)
190         type = BLOCK_FLAG_DUPLICATE;
191     else // SINGLE
192         type = BLOCK_FLAG_DUPLICATE;
193 
194     le = c->deleting.Flink;
195     while (le != &c->deleting) {
196         space* s = CONTAINING_RECORD(le, space, list_entry);
197 
198         if (!Vcb->options.no_barrier || !(c->chunk_item->type & BLOCK_FLAG_METADATA)) {
199             CHUNK_ITEM_STRIPE* cis = (CHUNK_ITEM_STRIPE*)&c->chunk_item[1];
200 
201             if (type == BLOCK_FLAG_DUPLICATE) {
202                 uint16_t i;
203 
204                 for (i = 0; i < c->chunk_item->num_stripes; i++) {
205                     if (c->devices[i] && c->devices[i]->devobj && !c->devices[i]->readonly && c->devices[i]->trim)
206                         add_trim_entry(c->devices[i], s->address - c->offset + cis[i].offset, s->size);
207                 }
208             } else if (type == BLOCK_FLAG_RAID0) {
209                 uint64_t startoff, endoff;
210                 uint16_t startoffstripe, endoffstripe, i;
211 
212                 get_raid0_offset(s->address - c->offset, c->chunk_item->stripe_length, c->chunk_item->num_stripes, &startoff, &startoffstripe);
213                 get_raid0_offset(s->address - c->offset + s->size - 1, c->chunk_item->stripe_length, c->chunk_item->num_stripes, &endoff, &endoffstripe);
214 
215                 for (i = 0; i < c->chunk_item->num_stripes; i++) {
216                     if (c->devices[i] && c->devices[i]->devobj && !c->devices[i]->readonly && c->devices[i]->trim) {
217                         uint64_t stripestart, stripeend;
218 
219                         if (startoffstripe > i)
220                             stripestart = startoff - (startoff % c->chunk_item->stripe_length) + c->chunk_item->stripe_length;
221                         else if (startoffstripe == i)
222                             stripestart = startoff;
223                         else
224                             stripestart = startoff - (startoff % c->chunk_item->stripe_length);
225 
226                         if (endoffstripe > i)
227                             stripeend = endoff - (endoff % c->chunk_item->stripe_length) + c->chunk_item->stripe_length;
228                         else if (endoffstripe == i)
229                             stripeend = endoff + 1;
230                         else
231                             stripeend = endoff - (endoff % c->chunk_item->stripe_length);
232 
233                         if (stripestart != stripeend)
234                             add_trim_entry(c->devices[i], stripestart + cis[i].offset, stripeend - stripestart);
235                     }
236                 }
237             } else if (type == BLOCK_FLAG_RAID10) {
238                 uint64_t startoff, endoff;
239                 uint16_t sub_stripes, startoffstripe, endoffstripe, i;
240 
241                 sub_stripes = max(1, c->chunk_item->sub_stripes);
242 
243                 get_raid0_offset(s->address - c->offset, c->chunk_item->stripe_length, c->chunk_item->num_stripes / sub_stripes, &startoff, &startoffstripe);
244                 get_raid0_offset(s->address - c->offset + s->size - 1, c->chunk_item->stripe_length, c->chunk_item->num_stripes / sub_stripes, &endoff, &endoffstripe);
245 
246                 startoffstripe *= sub_stripes;
247                 endoffstripe *= sub_stripes;
248 
249                 for (i = 0; i < c->chunk_item->num_stripes; i += sub_stripes) {
250                     ULONG j;
251                     uint64_t stripestart, stripeend;
252 
253                     if (startoffstripe > i)
254                         stripestart = startoff - (startoff % c->chunk_item->stripe_length) + c->chunk_item->stripe_length;
255                     else if (startoffstripe == i)
256                         stripestart = startoff;
257                     else
258                         stripestart = startoff - (startoff % c->chunk_item->stripe_length);
259 
260                     if (endoffstripe > i)
261                         stripeend = endoff - (endoff % c->chunk_item->stripe_length) + c->chunk_item->stripe_length;
262                     else if (endoffstripe == i)
263                         stripeend = endoff + 1;
264                     else
265                         stripeend = endoff - (endoff % c->chunk_item->stripe_length);
266 
267                     if (stripestart != stripeend) {
268                         for (j = 0; j < sub_stripes; j++) {
269                             if (c->devices[i+j] && c->devices[i+j]->devobj && !c->devices[i+j]->readonly && c->devices[i+j]->trim)
270                                 add_trim_entry(c->devices[i+j], stripestart + cis[i+j].offset, stripeend - stripestart);
271                         }
272                     }
273                 }
274             }
275             // FIXME - RAID5(?), RAID6(?)
276         }
277 
278         le = le->Flink;
279     }
280 }
281 
282 typedef struct {
283     DEVICE_MANAGE_DATA_SET_ATTRIBUTES* dmdsa;
284     ATA_PASS_THROUGH_EX apte;
285     PIRP Irp;
286     IO_STATUS_BLOCK iosb;
287 #ifdef DEBUG_TRIM_EMULATION
288     PMDL mdl;
289     void* buf;
290 #endif
291 } ioctl_context_stripe;
292 
293 typedef struct {
294     KEVENT Event;
295     LONG left;
296     ioctl_context_stripe* stripes;
297 } ioctl_context;
298 
_Function_class_(IO_COMPLETION_ROUTINE)299 _Function_class_(IO_COMPLETION_ROUTINE)
300 static NTSTATUS __stdcall ioctl_completion(PDEVICE_OBJECT DeviceObject, PIRP Irp, PVOID conptr) {
301     ioctl_context* context = (ioctl_context*)conptr;
302     LONG left2 = InterlockedDecrement(&context->left);
303 
304     UNUSED(DeviceObject);
305     UNUSED(Irp);
306 
307     if (left2 == 0)
308         KeSetEvent(&context->Event, 0, false);
309 
310     return STATUS_MORE_PROCESSING_REQUIRED;
311 }
312 
313 #ifdef DEBUG_TRIM_EMULATION
trim_emulation(device * dev)314 static void trim_emulation(device* dev) {
315     LIST_ENTRY* le;
316     ioctl_context context;
317     unsigned int i = 0, count = 0;
318 
319     le = dev->trim_list.Flink;
320     while (le != &dev->trim_list) {
321         count++;
322         le = le->Flink;
323     }
324 
325     context.left = count;
326 
327     KeInitializeEvent(&context.Event, NotificationEvent, false);
328 
329     context.stripes = ExAllocatePoolWithTag(NonPagedPool, sizeof(ioctl_context_stripe) * context.left, ALLOC_TAG);
330     if (!context.stripes) {
331         ERR("out of memory\n");
332         return;
333     }
334 
335     RtlZeroMemory(context.stripes, sizeof(ioctl_context_stripe) * context.left);
336 
337     i = 0;
338     le = dev->trim_list.Flink;
339     while (le != &dev->trim_list) {
340         ioctl_context_stripe* stripe = &context.stripes[i];
341         space* s = CONTAINING_RECORD(le, space, list_entry);
342 
343         WARN("(%I64x, %I64x)\n", s->address, s->size);
344 
345         stripe->Irp = IoAllocateIrp(dev->devobj->StackSize, false);
346 
347         if (!stripe->Irp) {
348             ERR("IoAllocateIrp failed\n");
349         } else {
350             PIO_STACK_LOCATION IrpSp = IoGetNextIrpStackLocation(stripe->Irp);
351             IrpSp->MajorFunction = IRP_MJ_WRITE;
352             IrpSp->FileObject = dev->fileobj;
353 
354             stripe->buf = ExAllocatePoolWithTag(NonPagedPool, (uint32_t)s->size, ALLOC_TAG);
355 
356             if (!stripe->buf) {
357                 ERR("out of memory\n");
358             } else {
359                 RtlZeroMemory(stripe->buf, (uint32_t)s->size); // FIXME - randomize instead?
360 
361                 stripe->mdl = IoAllocateMdl(stripe->buf, (uint32_t)s->size, false, false, NULL);
362 
363                 if (!stripe->mdl) {
364                     ERR("IoAllocateMdl failed\n");
365                 } else {
366                     MmBuildMdlForNonPagedPool(stripe->mdl);
367 
368                     stripe->Irp->MdlAddress = stripe->mdl;
369 
370                     IrpSp->Parameters.Write.ByteOffset.QuadPart = s->address;
371                     IrpSp->Parameters.Write.Length = s->size;
372 
373                     stripe->Irp->UserIosb = &stripe->iosb;
374 
375                     IoSetCompletionRoutine(stripe->Irp, ioctl_completion, &context, true, true, true);
376 
377                     IoCallDriver(dev->devobj, stripe->Irp);
378                 }
379             }
380         }
381 
382         i++;
383 
384         le = le->Flink;
385     }
386 
387     KeWaitForSingleObject(&context.Event, Executive, KernelMode, false, NULL);
388 
389     for (i = 0; i < count; i++) {
390         ioctl_context_stripe* stripe = &context.stripes[i];
391 
392         if (stripe->mdl)
393             IoFreeMdl(stripe->mdl);
394 
395         if (stripe->buf)
396             ExFreePool(stripe->buf);
397     }
398 
399     ExFreePool(context.stripes);
400 }
401 #endif
402 
clean_space_cache(device_extension * Vcb)403 static void clean_space_cache(device_extension* Vcb) {
404     LIST_ENTRY* le;
405     chunk* c;
406 #ifndef DEBUG_TRIM_EMULATION
407     ULONG num;
408 #endif
409 
410     TRACE("(%p)\n", Vcb);
411 
412     ExAcquireResourceSharedLite(&Vcb->chunk_lock, true);
413 
414     le = Vcb->chunks.Flink;
415     while (le != &Vcb->chunks) {
416         c = CONTAINING_RECORD(le, chunk, list_entry);
417 
418         if (c->space_changed) {
419             acquire_chunk_lock(c, Vcb);
420 
421             if (c->space_changed) {
422                 if (Vcb->trim && !Vcb->options.no_trim)
423                     clean_space_cache_chunk(Vcb, c);
424 
425                 space_list_merge(&c->space, &c->space_size, &c->deleting);
426 
427                 while (!IsListEmpty(&c->deleting)) {
428                     space* s = CONTAINING_RECORD(RemoveHeadList(&c->deleting), space, list_entry);
429 
430                     ExFreePool(s);
431                 }
432             }
433 
434             c->space_changed = false;
435 
436             release_chunk_lock(c, Vcb);
437         }
438 
439         le = le->Flink;
440     }
441 
442     ExReleaseResourceLite(&Vcb->chunk_lock);
443 
444     if (Vcb->trim && !Vcb->options.no_trim) {
445 #ifndef DEBUG_TRIM_EMULATION
446         ioctl_context context;
447         ULONG total_num;
448 
449         context.left = 0;
450 
451         le = Vcb->devices.Flink;
452         while (le != &Vcb->devices) {
453             device* dev = CONTAINING_RECORD(le, device, list_entry);
454 
455             if (dev->devobj && !dev->readonly && dev->trim && dev->num_trim_entries > 0)
456                 context.left++;
457 
458             le = le->Flink;
459         }
460 
461         if (context.left == 0)
462             return;
463 
464         total_num = context.left;
465         num = 0;
466 
467         KeInitializeEvent(&context.Event, NotificationEvent, false);
468 
469         context.stripes = ExAllocatePoolWithTag(NonPagedPool, sizeof(ioctl_context_stripe) * context.left, ALLOC_TAG);
470         if (!context.stripes) {
471             ERR("out of memory\n");
472             return;
473         }
474 
475         RtlZeroMemory(context.stripes, sizeof(ioctl_context_stripe) * context.left);
476 #endif
477 
478         le = Vcb->devices.Flink;
479         while (le != &Vcb->devices) {
480             device* dev = CONTAINING_RECORD(le, device, list_entry);
481 
482             if (dev->devobj && !dev->readonly && dev->trim && dev->num_trim_entries > 0) {
483 #ifdef DEBUG_TRIM_EMULATION
484                 trim_emulation(dev);
485 #else
486                 LIST_ENTRY* le2;
487                 ioctl_context_stripe* stripe = &context.stripes[num];
488                 DEVICE_DATA_SET_RANGE* ranges;
489                 ULONG datalen = (ULONG)sector_align(sizeof(DEVICE_MANAGE_DATA_SET_ATTRIBUTES), sizeof(uint64_t)) + (dev->num_trim_entries * sizeof(DEVICE_DATA_SET_RANGE)), i;
490                 PIO_STACK_LOCATION IrpSp;
491 
492                 stripe->dmdsa = ExAllocatePoolWithTag(PagedPool, datalen, ALLOC_TAG);
493                 if (!stripe->dmdsa) {
494                     ERR("out of memory\n");
495                     goto nextdev;
496                 }
497 
498                 stripe->dmdsa->Size = sizeof(DEVICE_MANAGE_DATA_SET_ATTRIBUTES);
499                 stripe->dmdsa->Action = DeviceDsmAction_Trim;
500                 stripe->dmdsa->Flags = DEVICE_DSM_FLAG_TRIM_NOT_FS_ALLOCATED;
501                 stripe->dmdsa->ParameterBlockOffset = 0;
502                 stripe->dmdsa->ParameterBlockLength = 0;
503                 stripe->dmdsa->DataSetRangesOffset = (ULONG)sector_align(sizeof(DEVICE_MANAGE_DATA_SET_ATTRIBUTES), sizeof(uint64_t));
504                 stripe->dmdsa->DataSetRangesLength = dev->num_trim_entries * sizeof(DEVICE_DATA_SET_RANGE);
505 
506                 ranges = (DEVICE_DATA_SET_RANGE*)((uint8_t*)stripe->dmdsa + stripe->dmdsa->DataSetRangesOffset);
507 
508                 i = 0;
509 
510                 le2 = dev->trim_list.Flink;
511                 while (le2 != &dev->trim_list) {
512                     space* s = CONTAINING_RECORD(le2, space, list_entry);
513 
514                     ranges[i].StartingOffset = s->address;
515                     ranges[i].LengthInBytes = s->size;
516                     i++;
517 
518                     le2 = le2->Flink;
519                 }
520 
521                 stripe->Irp = IoAllocateIrp(dev->devobj->StackSize, false);
522 
523                 if (!stripe->Irp) {
524                     ERR("IoAllocateIrp failed\n");
525                     goto nextdev;
526                 }
527 
528                 IrpSp = IoGetNextIrpStackLocation(stripe->Irp);
529                 IrpSp->MajorFunction = IRP_MJ_DEVICE_CONTROL;
530                 IrpSp->FileObject = dev->fileobj;
531 
532                 IrpSp->Parameters.DeviceIoControl.IoControlCode = IOCTL_STORAGE_MANAGE_DATA_SET_ATTRIBUTES;
533                 IrpSp->Parameters.DeviceIoControl.InputBufferLength = datalen;
534                 IrpSp->Parameters.DeviceIoControl.OutputBufferLength = 0;
535 
536                 stripe->Irp->AssociatedIrp.SystemBuffer = stripe->dmdsa;
537                 stripe->Irp->Flags |= IRP_BUFFERED_IO;
538                 stripe->Irp->UserBuffer = NULL;
539                 stripe->Irp->UserIosb = &stripe->iosb;
540 
541                 IoSetCompletionRoutine(stripe->Irp, ioctl_completion, &context, true, true, true);
542 
543                 IoCallDriver(dev->devobj, stripe->Irp);
544 
545 nextdev:
546 #endif
547                 while (!IsListEmpty(&dev->trim_list)) {
548                     space* s = CONTAINING_RECORD(RemoveHeadList(&dev->trim_list), space, list_entry);
549                     ExFreePool(s);
550                 }
551 
552                 dev->num_trim_entries = 0;
553 
554 #ifndef DEBUG_TRIM_EMULATION
555                 num++;
556 #endif
557             }
558 
559             le = le->Flink;
560         }
561 
562 #ifndef DEBUG_TRIM_EMULATION
563         KeWaitForSingleObject(&context.Event, Executive, KernelMode, false, NULL);
564 
565         for (num = 0; num < total_num; num++) {
566             if (context.stripes[num].dmdsa)
567                 ExFreePool(context.stripes[num].dmdsa);
568 
569             if (context.stripes[num].Irp)
570                 IoFreeIrp(context.stripes[num].Irp);
571         }
572 
573         ExFreePool(context.stripes);
574 #endif
575     }
576 }
577 
trees_consistent(device_extension * Vcb)578 static bool trees_consistent(device_extension* Vcb) {
579     ULONG maxsize = Vcb->superblock.node_size - sizeof(tree_header);
580     LIST_ENTRY* le;
581 
582     le = Vcb->trees.Flink;
583     while (le != &Vcb->trees) {
584         tree* t = CONTAINING_RECORD(le, tree, list_entry);
585 
586         if (t->write) {
587             if (t->header.num_items == 0 && t->parent) {
588 #ifdef DEBUG_WRITE_LOOPS
589                 ERR("empty tree found, looping again\n");
590 #endif
591                 return false;
592             }
593 
594             if (t->size > maxsize) {
595 #ifdef DEBUG_WRITE_LOOPS
596                 ERR("overlarge tree found (%u > %u), looping again\n", t->size, maxsize);
597 #endif
598                 return false;
599             }
600 
601             if (!t->has_new_address) {
602 #ifdef DEBUG_WRITE_LOOPS
603                 ERR("tree found without new address, looping again\n");
604 #endif
605                 return false;
606             }
607         }
608 
609         le = le->Flink;
610     }
611 
612     return true;
613 }
614 
add_parents(device_extension * Vcb,PIRP Irp)615 static NTSTATUS add_parents(device_extension* Vcb, PIRP Irp) {
616     ULONG level;
617     LIST_ENTRY* le;
618 
619     for (level = 0; level <= 255; level++) {
620         bool nothing_found = true;
621 
622         TRACE("level = %lu\n", level);
623 
624         le = Vcb->trees.Flink;
625         while (le != &Vcb->trees) {
626             tree* t = CONTAINING_RECORD(le, tree, list_entry);
627 
628             if (t->write && t->header.level == level) {
629                 TRACE("tree %p: root = %I64x, level = %x, parent = %p\n", t, t->header.tree_id, t->header.level, t->parent);
630 
631                 nothing_found = false;
632 
633                 if (t->parent) {
634                     if (!t->parent->write)
635                         TRACE("adding tree %p (level %x)\n", t->parent, t->header.level);
636 
637                     t->parent->write = true;
638                 } else if (t->root != Vcb->root_root && t->root != Vcb->chunk_root) {
639                     KEY searchkey;
640                     traverse_ptr tp;
641                     NTSTATUS Status;
642 
643                     searchkey.obj_id = t->root->id;
644                     searchkey.obj_type = TYPE_ROOT_ITEM;
645                     searchkey.offset = 0xffffffffffffffff;
646 
647                     Status = find_item(Vcb, Vcb->root_root, &tp, &searchkey, false, Irp);
648                     if (!NT_SUCCESS(Status)) {
649                         ERR("error - find_item returned %08lx\n", Status);
650                         return Status;
651                     }
652 
653                     if (tp.item->key.obj_id != searchkey.obj_id || tp.item->key.obj_type != searchkey.obj_type) {
654                         ERR("could not find ROOT_ITEM for tree %I64x\n", searchkey.obj_id);
655                         return STATUS_INTERNAL_ERROR;
656                     }
657 
658                     if (tp.item->size < sizeof(ROOT_ITEM)) { // if not full length, delete and create new entry
659                         ROOT_ITEM* ri = ExAllocatePoolWithTag(PagedPool, sizeof(ROOT_ITEM), ALLOC_TAG);
660 
661                         if (!ri) {
662                             ERR("out of memory\n");
663                             return STATUS_INSUFFICIENT_RESOURCES;
664                         }
665 
666                         RtlCopyMemory(ri, &t->root->root_item, sizeof(ROOT_ITEM));
667 
668                         Status = delete_tree_item(Vcb, &tp);
669                         if (!NT_SUCCESS(Status)) {
670                             ERR("delete_tree_item returned %08lx\n", Status);
671                             ExFreePool(ri);
672                             return Status;
673                         }
674 
675                         Status = insert_tree_item(Vcb, Vcb->root_root, tp.item->key.obj_id, tp.item->key.obj_type, tp.item->key.offset, ri, sizeof(ROOT_ITEM), NULL, Irp);
676                         if (!NT_SUCCESS(Status)) {
677                             ERR("insert_tree_item returned %08lx\n", Status);
678                             ExFreePool(ri);
679                             return Status;
680                         }
681                     }
682 
683                     tree* t2 = tp.tree;
684                     while (t2) {
685                         t2->write = true;
686 
687                         t2 = t2->parent;
688                     }
689                 }
690             }
691 
692             le = le->Flink;
693         }
694 
695         if (nothing_found)
696             break;
697     }
698 
699     return STATUS_SUCCESS;
700 }
701 
add_parents_to_cache(tree * t)702 static void add_parents_to_cache(tree* t) {
703     while (t->parent) {
704         t = t->parent;
705         t->write = true;
706     }
707 }
708 
insert_tree_extent_skinny(device_extension * Vcb,uint8_t level,uint64_t root_id,chunk * c,uint64_t address,PIRP Irp,LIST_ENTRY * rollback)709 static bool insert_tree_extent_skinny(device_extension* Vcb, uint8_t level, uint64_t root_id, chunk* c, uint64_t address, PIRP Irp, LIST_ENTRY* rollback) {
710     NTSTATUS Status;
711     EXTENT_ITEM_SKINNY_METADATA* eism;
712     traverse_ptr insert_tp;
713 
714     eism = ExAllocatePoolWithTag(PagedPool, sizeof(EXTENT_ITEM_SKINNY_METADATA), ALLOC_TAG);
715     if (!eism) {
716         ERR("out of memory\n");
717         return false;
718     }
719 
720     eism->ei.refcount = 1;
721     eism->ei.generation = Vcb->superblock.generation;
722     eism->ei.flags = EXTENT_ITEM_TREE_BLOCK;
723     eism->type = TYPE_TREE_BLOCK_REF;
724     eism->tbr.offset = root_id;
725 
726     Status = insert_tree_item(Vcb, Vcb->extent_root, address, TYPE_METADATA_ITEM, level, eism, sizeof(EXTENT_ITEM_SKINNY_METADATA), &insert_tp, Irp);
727     if (!NT_SUCCESS(Status)) {
728         ERR("insert_tree_item returned %08lx\n", Status);
729         ExFreePool(eism);
730         return false;
731     }
732 
733     acquire_chunk_lock(c, Vcb);
734 
735     space_list_subtract(c, address, Vcb->superblock.node_size, rollback);
736 
737     release_chunk_lock(c, Vcb);
738 
739     add_parents_to_cache(insert_tp.tree);
740 
741     return true;
742 }
743 
find_metadata_address_in_chunk(device_extension * Vcb,chunk * c,uint64_t * address)744 bool find_metadata_address_in_chunk(device_extension* Vcb, chunk* c, uint64_t* address) {
745     LIST_ENTRY* le;
746     space* s;
747 
748     TRACE("(%p, %I64x, %p)\n", Vcb, c->offset, address);
749 
750     if (Vcb->superblock.node_size > c->chunk_item->size - c->used)
751         return false;
752 
753     if (!c->cache_loaded) {
754         NTSTATUS Status = load_cache_chunk(Vcb, c, NULL);
755 
756         if (!NT_SUCCESS(Status)) {
757             ERR("load_cache_chunk returned %08lx\n", Status);
758             return false;
759         }
760     }
761 
762     if (IsListEmpty(&c->space_size))
763         return false;
764 
765     if (!c->last_alloc_set) {
766         s = CONTAINING_RECORD(c->space.Blink, space, list_entry);
767 
768         c->last_alloc = s->address;
769         c->last_alloc_set = true;
770 
771         if (s->size >= Vcb->superblock.node_size) {
772             *address = s->address;
773             c->last_alloc += Vcb->superblock.node_size;
774             return true;
775         }
776     }
777 
778     le = c->space.Flink;
779     while (le != &c->space) {
780         s = CONTAINING_RECORD(le, space, list_entry);
781 
782         if (s->address <= c->last_alloc && s->address + s->size >= c->last_alloc + Vcb->superblock.node_size) {
783             *address = c->last_alloc;
784             c->last_alloc += Vcb->superblock.node_size;
785             return true;
786         }
787 
788         le = le->Flink;
789     }
790 
791     le = c->space_size.Flink;
792     while (le != &c->space_size) {
793         s = CONTAINING_RECORD(le, space, list_entry_size);
794 
795         if (s->size == Vcb->superblock.node_size) {
796             *address = s->address;
797             c->last_alloc = s->address + Vcb->superblock.node_size;
798             return true;
799         } else if (s->size < Vcb->superblock.node_size) {
800             if (le == c->space_size.Flink)
801                 return false;
802 
803             s = CONTAINING_RECORD(le->Blink, space, list_entry_size);
804 
805             *address = s->address;
806             c->last_alloc = s->address + Vcb->superblock.node_size;
807 
808             return true;
809         }
810 
811         le = le->Flink;
812     }
813 
814     s = CONTAINING_RECORD(c->space_size.Blink, space, list_entry_size);
815 
816     if (s->size > Vcb->superblock.node_size) {
817         *address = s->address;
818         c->last_alloc = s->address + Vcb->superblock.node_size;
819         return true;
820     }
821 
822     return false;
823 }
824 
insert_tree_extent(device_extension * Vcb,uint8_t level,uint64_t root_id,chunk * c,uint64_t * new_address,PIRP Irp,LIST_ENTRY * rollback)825 static bool insert_tree_extent(device_extension* Vcb, uint8_t level, uint64_t root_id, chunk* c, uint64_t* new_address, PIRP Irp, LIST_ENTRY* rollback) {
826     NTSTATUS Status;
827     uint64_t address;
828     EXTENT_ITEM_TREE2* eit2;
829     traverse_ptr insert_tp;
830 
831     TRACE("(%p, %x, %I64x, %p, %p, %p, %p)\n", Vcb, level, root_id, c, new_address, Irp, rollback);
832 
833     if (!find_metadata_address_in_chunk(Vcb, c, &address))
834         return false;
835 
836     if (Vcb->superblock.incompat_flags & BTRFS_INCOMPAT_FLAGS_SKINNY_METADATA) {
837         bool b = insert_tree_extent_skinny(Vcb, level, root_id, c, address, Irp, rollback);
838 
839         if (b)
840             *new_address = address;
841 
842         return b;
843     }
844 
845     eit2 = ExAllocatePoolWithTag(PagedPool, sizeof(EXTENT_ITEM_TREE2), ALLOC_TAG);
846     if (!eit2) {
847         ERR("out of memory\n");
848         return false;
849     }
850 
851     eit2->eit.extent_item.refcount = 1;
852     eit2->eit.extent_item.generation = Vcb->superblock.generation;
853     eit2->eit.extent_item.flags = EXTENT_ITEM_TREE_BLOCK;
854     eit2->eit.level = level;
855     eit2->type = TYPE_TREE_BLOCK_REF;
856     eit2->tbr.offset = root_id;
857 
858     Status = insert_tree_item(Vcb, Vcb->extent_root, address, TYPE_EXTENT_ITEM, Vcb->superblock.node_size, eit2, sizeof(EXTENT_ITEM_TREE2), &insert_tp, Irp);
859     if (!NT_SUCCESS(Status)) {
860         ERR("insert_tree_item returned %08lx\n", Status);
861         ExFreePool(eit2);
862         return false;
863     }
864 
865     acquire_chunk_lock(c, Vcb);
866 
867     space_list_subtract(c, address, Vcb->superblock.node_size, rollback);
868 
869     release_chunk_lock(c, Vcb);
870 
871     add_parents_to_cache(insert_tp.tree);
872 
873     *new_address = address;
874 
875     return true;
876 }
877 
get_tree_new_address(device_extension * Vcb,tree * t,PIRP Irp,LIST_ENTRY * rollback)878 NTSTATUS get_tree_new_address(device_extension* Vcb, tree* t, PIRP Irp, LIST_ENTRY* rollback) {
879     NTSTATUS Status;
880     chunk *origchunk = NULL, *c;
881     LIST_ENTRY* le;
882     uint64_t flags, addr;
883 
884     if (t->root->id == BTRFS_ROOT_CHUNK)
885         flags = Vcb->system_flags;
886     else
887         flags = Vcb->metadata_flags;
888 
889     if (t->has_address) {
890         origchunk = get_chunk_from_address(Vcb, t->header.address);
891 
892         if (origchunk && !origchunk->readonly && !origchunk->reloc && origchunk->chunk_item->type == flags &&
893             insert_tree_extent(Vcb, t->header.level, t->root->id, origchunk, &addr, Irp, rollback)) {
894             t->new_address = addr;
895             t->has_new_address = true;
896             return STATUS_SUCCESS;
897         }
898     }
899 
900     ExAcquireResourceExclusiveLite(&Vcb->chunk_lock, true);
901 
902     le = Vcb->chunks.Flink;
903     while (le != &Vcb->chunks) {
904         c = CONTAINING_RECORD(le, chunk, list_entry);
905 
906         if (!c->readonly && !c->reloc) {
907             acquire_chunk_lock(c, Vcb);
908 
909             if (c != origchunk && c->chunk_item->type == flags && (c->chunk_item->size - c->used) >= Vcb->superblock.node_size) {
910                 if (insert_tree_extent(Vcb, t->header.level, t->root->id, c, &addr, Irp, rollback)) {
911                     release_chunk_lock(c, Vcb);
912                     ExReleaseResourceLite(&Vcb->chunk_lock);
913                     t->new_address = addr;
914                     t->has_new_address = true;
915                     return STATUS_SUCCESS;
916                 }
917             }
918 
919             release_chunk_lock(c, Vcb);
920         }
921 
922         le = le->Flink;
923     }
924 
925     // allocate new chunk if necessary
926 
927     Status = alloc_chunk(Vcb, flags, &c, false);
928 
929     if (!NT_SUCCESS(Status)) {
930         ERR("alloc_chunk returned %08lx\n", Status);
931         ExReleaseResourceLite(&Vcb->chunk_lock);
932         return Status;
933     }
934 
935     acquire_chunk_lock(c, Vcb);
936 
937     if ((c->chunk_item->size - c->used) >= Vcb->superblock.node_size) {
938         if (insert_tree_extent(Vcb, t->header.level, t->root->id, c, &addr, Irp, rollback)) {
939             release_chunk_lock(c, Vcb);
940             ExReleaseResourceLite(&Vcb->chunk_lock);
941             t->new_address = addr;
942             t->has_new_address = true;
943             return STATUS_SUCCESS;
944         }
945     }
946 
947     release_chunk_lock(c, Vcb);
948 
949     ExReleaseResourceLite(&Vcb->chunk_lock);
950 
951     ERR("couldn't find any metadata chunks with %x bytes free\n", Vcb->superblock.node_size);
952 
953     return STATUS_DISK_FULL;
954 }
955 
reduce_tree_extent(device_extension * Vcb,uint64_t address,tree * t,uint64_t parent_root,uint8_t level,PIRP Irp,LIST_ENTRY * rollback)956 static NTSTATUS reduce_tree_extent(device_extension* Vcb, uint64_t address, tree* t, uint64_t parent_root, uint8_t level, PIRP Irp, LIST_ENTRY* rollback) {
957     NTSTATUS Status;
958     uint64_t rc, root;
959 
960     TRACE("(%p, %I64x, %p)\n", Vcb, address, t);
961 
962     rc = get_extent_refcount(Vcb, address, Vcb->superblock.node_size, Irp);
963     if (rc == 0) {
964         ERR("error - refcount for extent %I64x was 0\n", address);
965         return STATUS_INTERNAL_ERROR;
966     }
967 
968     if (!t || t->parent)
969         root = parent_root;
970     else
971         root = t->header.tree_id;
972 
973     Status = decrease_extent_refcount_tree(Vcb, address, Vcb->superblock.node_size, root, level, Irp);
974     if (!NT_SUCCESS(Status)) {
975         ERR("decrease_extent_refcount_tree returned %08lx\n", Status);
976         return Status;
977     }
978 
979     if (rc == 1) {
980         chunk* c = get_chunk_from_address(Vcb, address);
981 
982         if (c) {
983             acquire_chunk_lock(c, Vcb);
984 
985             if (!c->cache_loaded) {
986                 Status = load_cache_chunk(Vcb, c, NULL);
987 
988                 if (!NT_SUCCESS(Status)) {
989                     ERR("load_cache_chunk returned %08lx\n", Status);
990                     release_chunk_lock(c, Vcb);
991                     return Status;
992                 }
993             }
994 
995             c->used -= Vcb->superblock.node_size;
996 
997             space_list_add(c, address, Vcb->superblock.node_size, rollback);
998 
999             release_chunk_lock(c, Vcb);
1000         } else
1001             ERR("could not find chunk for address %I64x\n", address);
1002     }
1003 
1004     return STATUS_SUCCESS;
1005 }
1006 
add_changed_extent_ref_edr(changed_extent * ce,EXTENT_DATA_REF * edr,bool old)1007 static NTSTATUS add_changed_extent_ref_edr(changed_extent* ce, EXTENT_DATA_REF* edr, bool old) {
1008     LIST_ENTRY *le2, *list;
1009     changed_extent_ref* cer;
1010 
1011     list = old ? &ce->old_refs : &ce->refs;
1012 
1013     le2 = list->Flink;
1014     while (le2 != list) {
1015         cer = CONTAINING_RECORD(le2, changed_extent_ref, list_entry);
1016 
1017         if (cer->type == TYPE_EXTENT_DATA_REF && cer->edr.root == edr->root && cer->edr.objid == edr->objid && cer->edr.offset == edr->offset) {
1018             cer->edr.count += edr->count;
1019             goto end;
1020         }
1021 
1022         le2 = le2->Flink;
1023     }
1024 
1025     cer = ExAllocatePoolWithTag(PagedPool, sizeof(changed_extent_ref), ALLOC_TAG);
1026     if (!cer) {
1027         ERR("out of memory\n");
1028         return STATUS_INSUFFICIENT_RESOURCES;
1029     }
1030 
1031     cer->type = TYPE_EXTENT_DATA_REF;
1032     RtlCopyMemory(&cer->edr, edr, sizeof(EXTENT_DATA_REF));
1033     InsertTailList(list, &cer->list_entry);
1034 
1035 end:
1036     if (old)
1037         ce->old_count += edr->count;
1038     else
1039         ce->count += edr->count;
1040 
1041     return STATUS_SUCCESS;
1042 }
1043 
add_changed_extent_ref_sdr(changed_extent * ce,SHARED_DATA_REF * sdr,bool old)1044 static NTSTATUS add_changed_extent_ref_sdr(changed_extent* ce, SHARED_DATA_REF* sdr, bool old) {
1045     LIST_ENTRY *le2, *list;
1046     changed_extent_ref* cer;
1047 
1048     list = old ? &ce->old_refs : &ce->refs;
1049 
1050     le2 = list->Flink;
1051     while (le2 != list) {
1052         cer = CONTAINING_RECORD(le2, changed_extent_ref, list_entry);
1053 
1054         if (cer->type == TYPE_SHARED_DATA_REF && cer->sdr.offset == sdr->offset) {
1055             cer->sdr.count += sdr->count;
1056             goto end;
1057         }
1058 
1059         le2 = le2->Flink;
1060     }
1061 
1062     cer = ExAllocatePoolWithTag(PagedPool, sizeof(changed_extent_ref), ALLOC_TAG);
1063     if (!cer) {
1064         ERR("out of memory\n");
1065         return STATUS_INSUFFICIENT_RESOURCES;
1066     }
1067 
1068     cer->type = TYPE_SHARED_DATA_REF;
1069     RtlCopyMemory(&cer->sdr, sdr, sizeof(SHARED_DATA_REF));
1070     InsertTailList(list, &cer->list_entry);
1071 
1072 end:
1073     if (old)
1074         ce->old_count += sdr->count;
1075     else
1076         ce->count += sdr->count;
1077 
1078     return STATUS_SUCCESS;
1079 }
1080 
shared_tree_is_unique(device_extension * Vcb,tree * t,PIRP Irp,LIST_ENTRY * rollback)1081 static bool shared_tree_is_unique(device_extension* Vcb, tree* t, PIRP Irp, LIST_ENTRY* rollback) {
1082     KEY searchkey;
1083     traverse_ptr tp;
1084     NTSTATUS Status;
1085 
1086     if (!t->updated_extents && t->has_address) {
1087         Status = update_tree_extents(Vcb, t, Irp, rollback);
1088         if (!NT_SUCCESS(Status)) {
1089             ERR("update_tree_extents returned %08lx\n", Status);
1090             return false;
1091         }
1092     }
1093 
1094     searchkey.obj_id = t->header.address;
1095     searchkey.obj_type = Vcb->superblock.incompat_flags & BTRFS_INCOMPAT_FLAGS_SKINNY_METADATA ? TYPE_METADATA_ITEM : TYPE_EXTENT_ITEM;
1096     searchkey.offset = 0xffffffffffffffff;
1097 
1098     Status = find_item(Vcb, Vcb->extent_root, &tp, &searchkey, false, Irp);
1099     if (!NT_SUCCESS(Status)) {
1100         ERR("error - find_item returned %08lx\n", Status);
1101         return false;
1102     }
1103 
1104     if (tp.item->key.obj_id == t->header.address && (tp.item->key.obj_type == TYPE_METADATA_ITEM || tp.item->key.obj_type == TYPE_EXTENT_ITEM))
1105         return false;
1106     else
1107         return true;
1108 }
1109 
update_tree_extents(device_extension * Vcb,tree * t,PIRP Irp,LIST_ENTRY * rollback)1110 static NTSTATUS update_tree_extents(device_extension* Vcb, tree* t, PIRP Irp, LIST_ENTRY* rollback) {
1111     NTSTATUS Status;
1112     uint64_t rc = get_extent_refcount(Vcb, t->header.address, Vcb->superblock.node_size, Irp);
1113     uint64_t flags = get_extent_flags(Vcb, t->header.address, Irp);
1114 
1115     if (rc == 0) {
1116         ERR("refcount for extent %I64x was 0\n", t->header.address);
1117         return STATUS_INTERNAL_ERROR;
1118     }
1119 
1120     if (flags & EXTENT_ITEM_SHARED_BACKREFS || t->header.flags & HEADER_FLAG_SHARED_BACKREF || !(t->header.flags & HEADER_FLAG_MIXED_BACKREF)) {
1121         TREE_BLOCK_REF tbr;
1122         bool unique = rc > 1 ? false : (t->parent ? shared_tree_is_unique(Vcb, t->parent, Irp, rollback) : false);
1123 
1124         if (t->header.level == 0) {
1125             LIST_ENTRY* le;
1126 
1127             le = t->itemlist.Flink;
1128             while (le != &t->itemlist) {
1129                 tree_data* td = CONTAINING_RECORD(le, tree_data, list_entry);
1130 
1131                 if (!td->inserted && td->key.obj_type == TYPE_EXTENT_DATA && td->size >= sizeof(EXTENT_DATA) - 1 + sizeof(EXTENT_DATA2)) {
1132                     EXTENT_DATA* ed = (EXTENT_DATA*)td->data;
1133 
1134                     if (ed->type == EXTENT_TYPE_REGULAR || ed->type == EXTENT_TYPE_PREALLOC) {
1135                         EXTENT_DATA2* ed2 = (EXTENT_DATA2*)ed->data;
1136 
1137                         if (ed2->size > 0) {
1138                             EXTENT_DATA_REF edr;
1139                             changed_extent* ce = NULL;
1140                             chunk* c = get_chunk_from_address(Vcb, ed2->address);
1141 
1142                             if (c) {
1143                                 LIST_ENTRY* le2;
1144 
1145                                 le2 = c->changed_extents.Flink;
1146                                 while (le2 != &c->changed_extents) {
1147                                     changed_extent* ce2 = CONTAINING_RECORD(le2, changed_extent, list_entry);
1148 
1149                                     if (ce2->address == ed2->address) {
1150                                         ce = ce2;
1151                                         break;
1152                                     }
1153 
1154                                     le2 = le2->Flink;
1155                                 }
1156                             }
1157 
1158                             edr.root = t->root->id;
1159                             edr.objid = td->key.obj_id;
1160                             edr.offset = td->key.offset - ed2->offset;
1161                             edr.count = 1;
1162 
1163                             if (ce) {
1164                                 Status = add_changed_extent_ref_edr(ce, &edr, true);
1165                                 if (!NT_SUCCESS(Status)) {
1166                                     ERR("add_changed_extent_ref_edr returned %08lx\n", Status);
1167                                     return Status;
1168                                 }
1169 
1170                                 Status = add_changed_extent_ref_edr(ce, &edr, false);
1171                                 if (!NT_SUCCESS(Status)) {
1172                                     ERR("add_changed_extent_ref_edr returned %08lx\n", Status);
1173                                     return Status;
1174                                 }
1175                             }
1176 
1177                             Status = increase_extent_refcount(Vcb, ed2->address, ed2->size, TYPE_EXTENT_DATA_REF, &edr, NULL, 0, Irp);
1178                             if (!NT_SUCCESS(Status)) {
1179                                 ERR("increase_extent_refcount returned %08lx\n", Status);
1180                                 return Status;
1181                             }
1182 
1183                             if ((flags & EXTENT_ITEM_SHARED_BACKREFS && unique) || !(t->header.flags & HEADER_FLAG_MIXED_BACKREF)) {
1184                                 uint64_t sdrrc = find_extent_shared_data_refcount(Vcb, ed2->address, t->header.address, Irp);
1185 
1186                                 if (sdrrc > 0) {
1187                                     SHARED_DATA_REF sdr;
1188 
1189                                     sdr.offset = t->header.address;
1190                                     sdr.count = 1;
1191 
1192                                     Status = decrease_extent_refcount(Vcb, ed2->address, ed2->size, TYPE_SHARED_DATA_REF, &sdr, NULL, 0,
1193                                                                       t->header.address, ce ? ce->superseded : false, Irp);
1194                                     if (!NT_SUCCESS(Status)) {
1195                                         ERR("decrease_extent_refcount returned %08lx\n", Status);
1196                                         return Status;
1197                                     }
1198 
1199                                     if (ce) {
1200                                         LIST_ENTRY* le2;
1201 
1202                                         le2 = ce->refs.Flink;
1203                                         while (le2 != &ce->refs) {
1204                                             changed_extent_ref* cer = CONTAINING_RECORD(le2, changed_extent_ref, list_entry);
1205 
1206                                             if (cer->type == TYPE_SHARED_DATA_REF && cer->sdr.offset == sdr.offset) {
1207                                                 ce->count--;
1208                                                 cer->sdr.count--;
1209                                                 break;
1210                                             }
1211 
1212                                             le2 = le2->Flink;
1213                                         }
1214 
1215                                         le2 = ce->old_refs.Flink;
1216                                         while (le2 != &ce->old_refs) {
1217                                             changed_extent_ref* cer = CONTAINING_RECORD(le2, changed_extent_ref, list_entry);
1218 
1219                                             if (cer->type == TYPE_SHARED_DATA_REF && cer->sdr.offset == sdr.offset) {
1220                                                 ce->old_count--;
1221 
1222                                                 if (cer->sdr.count > 1)
1223                                                     cer->sdr.count--;
1224                                                 else {
1225                                                     RemoveEntryList(&cer->list_entry);
1226                                                     ExFreePool(cer);
1227                                                 }
1228 
1229                                                 break;
1230                                             }
1231 
1232                                             le2 = le2->Flink;
1233                                         }
1234                                     }
1235                                 }
1236                             }
1237 
1238                             // FIXME - clear shared flag if unique?
1239                         }
1240                     }
1241                 }
1242 
1243                 le = le->Flink;
1244             }
1245         } else {
1246             LIST_ENTRY* le;
1247 
1248             le = t->itemlist.Flink;
1249             while (le != &t->itemlist) {
1250                 tree_data* td = CONTAINING_RECORD(le, tree_data, list_entry);
1251 
1252                 if (!td->inserted) {
1253                     tbr.offset = t->root->id;
1254 
1255                     Status = increase_extent_refcount(Vcb, td->treeholder.address, Vcb->superblock.node_size, TYPE_TREE_BLOCK_REF,
1256                                                       &tbr, &td->key, t->header.level - 1, Irp);
1257                     if (!NT_SUCCESS(Status)) {
1258                         ERR("increase_extent_refcount returned %08lx\n", Status);
1259                         return Status;
1260                     }
1261 
1262                     if (unique || !(t->header.flags & HEADER_FLAG_MIXED_BACKREF)) {
1263                         uint64_t sbrrc = find_extent_shared_tree_refcount(Vcb, td->treeholder.address, t->header.address, Irp);
1264 
1265                         if (sbrrc > 0) {
1266                             SHARED_BLOCK_REF sbr;
1267 
1268                             sbr.offset = t->header.address;
1269 
1270                             Status = decrease_extent_refcount(Vcb, td->treeholder.address, Vcb->superblock.node_size, TYPE_SHARED_BLOCK_REF, &sbr, NULL, 0,
1271                                                               t->header.address, false, Irp);
1272                             if (!NT_SUCCESS(Status)) {
1273                                 ERR("decrease_extent_refcount returned %08lx\n", Status);
1274                                 return Status;
1275                             }
1276                         }
1277                     }
1278 
1279                     // FIXME - clear shared flag if unique?
1280                 }
1281 
1282                 le = le->Flink;
1283             }
1284         }
1285 
1286         if (unique) {
1287             uint64_t sbrrc = find_extent_shared_tree_refcount(Vcb, t->header.address, t->parent->header.address, Irp);
1288 
1289             if (sbrrc == 1) {
1290                 SHARED_BLOCK_REF sbr;
1291 
1292                 sbr.offset = t->parent->header.address;
1293 
1294                 Status = decrease_extent_refcount(Vcb, t->header.address, Vcb->superblock.node_size, TYPE_SHARED_BLOCK_REF, &sbr, NULL, 0,
1295                                                   t->parent->header.address, false, Irp);
1296                 if (!NT_SUCCESS(Status)) {
1297                     ERR("decrease_extent_refcount returned %08lx\n", Status);
1298                     return Status;
1299                 }
1300             }
1301         }
1302 
1303         if (t->parent)
1304             tbr.offset = t->parent->header.tree_id;
1305         else
1306             tbr.offset = t->header.tree_id;
1307 
1308         Status = increase_extent_refcount(Vcb, t->header.address, Vcb->superblock.node_size, TYPE_TREE_BLOCK_REF, &tbr,
1309                                           t->parent ? &t->paritem->key : NULL, t->header.level, Irp);
1310         if (!NT_SUCCESS(Status)) {
1311             ERR("increase_extent_refcount returned %08lx\n", Status);
1312             return Status;
1313         }
1314 
1315         // FIXME - clear shared flag if unique?
1316 
1317         t->header.flags &= ~HEADER_FLAG_SHARED_BACKREF;
1318     }
1319 
1320     if (rc > 1 || t->header.tree_id == t->root->id) {
1321         Status = reduce_tree_extent(Vcb, t->header.address, t, t->parent ? t->parent->header.tree_id : t->header.tree_id, t->header.level, Irp, rollback);
1322 
1323         if (!NT_SUCCESS(Status)) {
1324             ERR("reduce_tree_extent returned %08lx\n", Status);
1325             return Status;
1326         }
1327     }
1328 
1329     t->has_address = false;
1330 
1331     if ((rc > 1 || t->header.tree_id != t->root->id) && !(flags & EXTENT_ITEM_SHARED_BACKREFS)) {
1332         if (t->header.tree_id == t->root->id) {
1333             flags |= EXTENT_ITEM_SHARED_BACKREFS;
1334             update_extent_flags(Vcb, t->header.address, flags, Irp);
1335         }
1336 
1337         if (t->header.level > 0) {
1338             LIST_ENTRY* le;
1339 
1340             le = t->itemlist.Flink;
1341             while (le != &t->itemlist) {
1342                 tree_data* td = CONTAINING_RECORD(le, tree_data, list_entry);
1343 
1344                 if (!td->inserted) {
1345                     if (t->header.tree_id == t->root->id) {
1346                         SHARED_BLOCK_REF sbr;
1347 
1348                         sbr.offset = t->header.address;
1349 
1350                         Status = increase_extent_refcount(Vcb, td->treeholder.address, Vcb->superblock.node_size, TYPE_SHARED_BLOCK_REF, &sbr, &td->key, t->header.level - 1, Irp);
1351                     } else {
1352                         TREE_BLOCK_REF tbr;
1353 
1354                         tbr.offset = t->root->id;
1355 
1356                         Status = increase_extent_refcount(Vcb, td->treeholder.address, Vcb->superblock.node_size, TYPE_TREE_BLOCK_REF, &tbr, &td->key, t->header.level - 1, Irp);
1357                     }
1358 
1359                     if (!NT_SUCCESS(Status)) {
1360                         ERR("increase_extent_refcount returned %08lx\n", Status);
1361                         return Status;
1362                     }
1363                 }
1364 
1365                 le = le->Flink;
1366             }
1367         } else {
1368             LIST_ENTRY* le;
1369 
1370             le = t->itemlist.Flink;
1371             while (le != &t->itemlist) {
1372                 tree_data* td = CONTAINING_RECORD(le, tree_data, list_entry);
1373 
1374                 if (!td->inserted && td->key.obj_type == TYPE_EXTENT_DATA && td->size >= sizeof(EXTENT_DATA) - 1 + sizeof(EXTENT_DATA2)) {
1375                     EXTENT_DATA* ed = (EXTENT_DATA*)td->data;
1376 
1377                     if (ed->type == EXTENT_TYPE_REGULAR || ed->type == EXTENT_TYPE_PREALLOC) {
1378                         EXTENT_DATA2* ed2 = (EXTENT_DATA2*)ed->data;
1379 
1380                         if (ed2->size > 0) {
1381                             changed_extent* ce = NULL;
1382                             chunk* c = get_chunk_from_address(Vcb, ed2->address);
1383 
1384                             if (c) {
1385                                 LIST_ENTRY* le2;
1386 
1387                                 le2 = c->changed_extents.Flink;
1388                                 while (le2 != &c->changed_extents) {
1389                                     changed_extent* ce2 = CONTAINING_RECORD(le2, changed_extent, list_entry);
1390 
1391                                     if (ce2->address == ed2->address) {
1392                                         ce = ce2;
1393                                         break;
1394                                     }
1395 
1396                                     le2 = le2->Flink;
1397                                 }
1398                             }
1399 
1400                             if (t->header.tree_id == t->root->id) {
1401                                 SHARED_DATA_REF sdr;
1402 
1403                                 sdr.offset = t->header.address;
1404                                 sdr.count = 1;
1405 
1406                                 if (ce) {
1407                                     Status = add_changed_extent_ref_sdr(ce, &sdr, true);
1408                                     if (!NT_SUCCESS(Status)) {
1409                                         ERR("add_changed_extent_ref_edr returned %08lx\n", Status);
1410                                         return Status;
1411                                     }
1412 
1413                                     Status = add_changed_extent_ref_sdr(ce, &sdr, false);
1414                                     if (!NT_SUCCESS(Status)) {
1415                                         ERR("add_changed_extent_ref_edr returned %08lx\n", Status);
1416                                         return Status;
1417                                     }
1418                                 }
1419 
1420                                 Status = increase_extent_refcount(Vcb, ed2->address, ed2->size, TYPE_SHARED_DATA_REF, &sdr, NULL, 0, Irp);
1421                             } else {
1422                                 EXTENT_DATA_REF edr;
1423 
1424                                 edr.root = t->root->id;
1425                                 edr.objid = td->key.obj_id;
1426                                 edr.offset = td->key.offset - ed2->offset;
1427                                 edr.count = 1;
1428 
1429                                 if (ce) {
1430                                     Status = add_changed_extent_ref_edr(ce, &edr, true);
1431                                     if (!NT_SUCCESS(Status)) {
1432                                         ERR("add_changed_extent_ref_edr returned %08lx\n", Status);
1433                                         return Status;
1434                                     }
1435 
1436                                     Status = add_changed_extent_ref_edr(ce, &edr, false);
1437                                     if (!NT_SUCCESS(Status)) {
1438                                         ERR("add_changed_extent_ref_edr returned %08lx\n", Status);
1439                                         return Status;
1440                                     }
1441                                 }
1442 
1443                                 Status = increase_extent_refcount(Vcb, ed2->address, ed2->size, TYPE_EXTENT_DATA_REF, &edr, NULL, 0, Irp);
1444                             }
1445 
1446                             if (!NT_SUCCESS(Status)) {
1447                                 ERR("increase_extent_refcount returned %08lx\n", Status);
1448                                 return Status;
1449                             }
1450                         }
1451                     }
1452                 }
1453 
1454                 le = le->Flink;
1455             }
1456         }
1457     }
1458 
1459     t->updated_extents = true;
1460     t->header.tree_id = t->root->id;
1461 
1462     return STATUS_SUCCESS;
1463 }
1464 
allocate_tree_extents(device_extension * Vcb,PIRP Irp,LIST_ENTRY * rollback)1465 static NTSTATUS allocate_tree_extents(device_extension* Vcb, PIRP Irp, LIST_ENTRY* rollback) {
1466     LIST_ENTRY* le;
1467     NTSTATUS Status;
1468     bool changed = false;
1469     uint8_t max_level = 0, level;
1470 
1471     TRACE("(%p)\n", Vcb);
1472 
1473     le = Vcb->trees.Flink;
1474     while (le != &Vcb->trees) {
1475         tree* t = CONTAINING_RECORD(le, tree, list_entry);
1476 
1477         if (t->write && !t->has_new_address) {
1478             chunk* c;
1479 
1480             if (t->has_address) {
1481                 c = get_chunk_from_address(Vcb, t->header.address);
1482 
1483                 if (c) {
1484                     if (!c->cache_loaded) {
1485                         acquire_chunk_lock(c, Vcb);
1486 
1487                         if (!c->cache_loaded) {
1488                             Status = load_cache_chunk(Vcb, c, NULL);
1489 
1490                             if (!NT_SUCCESS(Status)) {
1491                                 ERR("load_cache_chunk returned %08lx\n", Status);
1492                                 release_chunk_lock(c, Vcb);
1493                                 return Status;
1494                             }
1495                         }
1496 
1497                         release_chunk_lock(c, Vcb);
1498                     }
1499                 }
1500             }
1501 
1502             Status = get_tree_new_address(Vcb, t, Irp, rollback);
1503             if (!NT_SUCCESS(Status)) {
1504                 ERR("get_tree_new_address returned %08lx\n", Status);
1505                 return Status;
1506             }
1507 
1508             TRACE("allocated extent %I64x\n", t->new_address);
1509 
1510             c = get_chunk_from_address(Vcb, t->new_address);
1511 
1512             if (c)
1513                 c->used += Vcb->superblock.node_size;
1514             else {
1515                 ERR("could not find chunk for address %I64x\n", t->new_address);
1516                 return STATUS_INTERNAL_ERROR;
1517             }
1518 
1519             changed = true;
1520 
1521             if (t->header.level > max_level)
1522                 max_level = t->header.level;
1523         }
1524 
1525         le = le->Flink;
1526     }
1527 
1528     if (!changed)
1529         return STATUS_SUCCESS;
1530 
1531     level = max_level;
1532     do {
1533         le = Vcb->trees.Flink;
1534         while (le != &Vcb->trees) {
1535             tree* t = CONTAINING_RECORD(le, tree, list_entry);
1536 
1537             if (t->write && !t->updated_extents && t->has_address && t->header.level == level) {
1538                 Status = update_tree_extents(Vcb, t, Irp, rollback);
1539                 if (!NT_SUCCESS(Status)) {
1540                     ERR("update_tree_extents returned %08lx\n", Status);
1541                     return Status;
1542                 }
1543             }
1544 
1545             le = le->Flink;
1546         }
1547 
1548         if (level == 0)
1549             break;
1550 
1551         level--;
1552     } while (true);
1553 
1554     return STATUS_SUCCESS;
1555 }
1556 
update_root_root(device_extension * Vcb,bool no_cache,PIRP Irp,LIST_ENTRY * rollback)1557 static NTSTATUS update_root_root(device_extension* Vcb, bool no_cache, PIRP Irp, LIST_ENTRY* rollback) {
1558     LIST_ENTRY* le;
1559     NTSTATUS Status;
1560 
1561     TRACE("(%p)\n", Vcb);
1562 
1563     le = Vcb->trees.Flink;
1564     while (le != &Vcb->trees) {
1565         tree* t = CONTAINING_RECORD(le, tree, list_entry);
1566 
1567         if (t->write && !t->parent) {
1568             if (t->root != Vcb->root_root && t->root != Vcb->chunk_root) {
1569                 KEY searchkey;
1570                 traverse_ptr tp;
1571 
1572                 searchkey.obj_id = t->root->id;
1573                 searchkey.obj_type = TYPE_ROOT_ITEM;
1574                 searchkey.offset = 0xffffffffffffffff;
1575 
1576                 Status = find_item(Vcb, Vcb->root_root, &tp, &searchkey, false, Irp);
1577                 if (!NT_SUCCESS(Status)) {
1578                     ERR("error - find_item returned %08lx\n", Status);
1579                     return Status;
1580                 }
1581 
1582                 if (tp.item->key.obj_id != searchkey.obj_id || tp.item->key.obj_type != searchkey.obj_type) {
1583                     ERR("could not find ROOT_ITEM for tree %I64x\n", searchkey.obj_id);
1584                     return STATUS_INTERNAL_ERROR;
1585                 }
1586 
1587                 TRACE("updating the address for root %I64x to %I64x\n", searchkey.obj_id, t->new_address);
1588 
1589                 t->root->root_item.block_number = t->new_address;
1590                 t->root->root_item.root_level = t->header.level;
1591                 t->root->root_item.generation = Vcb->superblock.generation;
1592                 t->root->root_item.generation2 = Vcb->superblock.generation;
1593 
1594                 // item is guaranteed to be at least sizeof(ROOT_ITEM), due to add_parents
1595 
1596                 RtlCopyMemory(tp.item->data, &t->root->root_item, sizeof(ROOT_ITEM));
1597             }
1598 
1599             t->root->treeholder.address = t->new_address;
1600             t->root->treeholder.generation = Vcb->superblock.generation;
1601         }
1602 
1603         le = le->Flink;
1604     }
1605 
1606     if (!no_cache && !(Vcb->superblock.compat_ro_flags & BTRFS_COMPAT_RO_FLAGS_FREE_SPACE_CACHE)) {
1607         ExAcquireResourceSharedLite(&Vcb->chunk_lock, true);
1608         Status = update_chunk_caches(Vcb, Irp, rollback);
1609         ExReleaseResourceLite(&Vcb->chunk_lock);
1610 
1611         if (!NT_SUCCESS(Status)) {
1612             ERR("update_chunk_caches returned %08lx\n", Status);
1613             return Status;
1614         }
1615     }
1616 
1617     return STATUS_SUCCESS;
1618 }
1619 
do_tree_writes(device_extension * Vcb,LIST_ENTRY * tree_writes,bool no_free)1620 NTSTATUS do_tree_writes(device_extension* Vcb, LIST_ENTRY* tree_writes, bool no_free) {
1621     chunk* c;
1622     LIST_ENTRY* le;
1623     tree_write* tw;
1624     NTSTATUS Status;
1625     ULONG i, num_bits;
1626     write_data_context* wtc;
1627     ULONG bit_num = 0;
1628     bool raid56 = false;
1629 
1630     // merge together runs
1631     c = NULL;
1632     le = tree_writes->Flink;
1633     while (le != tree_writes) {
1634         tw = CONTAINING_RECORD(le, tree_write, list_entry);
1635 
1636         if (!c || tw->address < c->offset || tw->address >= c->offset + c->chunk_item->size)
1637             c = get_chunk_from_address(Vcb, tw->address);
1638         else {
1639             tree_write* tw2 = CONTAINING_RECORD(le->Blink, tree_write, list_entry);
1640 
1641             if (tw->address == tw2->address + tw2->length) {
1642                 uint8_t* data = ExAllocatePoolWithTag(NonPagedPool, tw2->length + tw->length, ALLOC_TAG);
1643 
1644                 if (!data) {
1645                     ERR("out of memory\n");
1646                     return STATUS_INSUFFICIENT_RESOURCES;
1647                 }
1648 
1649                 RtlCopyMemory(data, tw2->data, tw2->length);
1650                 RtlCopyMemory(&data[tw2->length], tw->data, tw->length);
1651 
1652                 if (!no_free || tw2->allocated)
1653                     ExFreePool(tw2->data);
1654 
1655                 tw2->data = data;
1656                 tw2->length += tw->length;
1657                 tw2->allocated = true;
1658 
1659                 if (!no_free || tw->allocated)
1660                     ExFreePool(tw->data);
1661 
1662                 RemoveEntryList(&tw->list_entry);
1663                 ExFreePool(tw);
1664 
1665                 le = tw2->list_entry.Flink;
1666                 continue;
1667             }
1668         }
1669 
1670         tw->c = c;
1671 
1672         if (c->chunk_item->type & (BLOCK_FLAG_RAID5 | BLOCK_FLAG_RAID6))
1673             raid56 = true;
1674 
1675         le = le->Flink;
1676     }
1677 
1678     num_bits = 0;
1679 
1680     le = tree_writes->Flink;
1681     while (le != tree_writes) {
1682         tw = CONTAINING_RECORD(le, tree_write, list_entry);
1683 
1684         num_bits++;
1685 
1686         le = le->Flink;
1687     }
1688 
1689     wtc = ExAllocatePoolWithTag(NonPagedPool, sizeof(write_data_context) * num_bits, ALLOC_TAG);
1690     if (!wtc) {
1691         ERR("out of memory\n");
1692         return STATUS_INSUFFICIENT_RESOURCES;
1693     }
1694 
1695     le = tree_writes->Flink;
1696 
1697     while (le != tree_writes) {
1698         tw = CONTAINING_RECORD(le, tree_write, list_entry);
1699 
1700         TRACE("address: %I64x, size: %x\n", tw->address, tw->length);
1701 
1702         KeInitializeEvent(&wtc[bit_num].Event, NotificationEvent, false);
1703         InitializeListHead(&wtc[bit_num].stripes);
1704         wtc[bit_num].need_wait = false;
1705         wtc[bit_num].stripes_left = 0;
1706         wtc[bit_num].parity1 = wtc[bit_num].parity2 = wtc[bit_num].scratch = NULL;
1707         wtc[bit_num].mdl = wtc[bit_num].parity1_mdl = wtc[bit_num].parity2_mdl = NULL;
1708 
1709         Status = write_data(Vcb, tw->address, tw->data, tw->length, &wtc[bit_num], NULL, NULL, false, 0, HighPagePriority);
1710         if (!NT_SUCCESS(Status)) {
1711             ERR("write_data returned %08lx\n", Status);
1712 
1713             for (i = 0; i < num_bits; i++) {
1714                 free_write_data_stripes(&wtc[i]);
1715             }
1716             ExFreePool(wtc);
1717 
1718             return Status;
1719         }
1720 
1721         bit_num++;
1722 
1723         le = le->Flink;
1724     }
1725 
1726     for (i = 0; i < num_bits; i++) {
1727         if (wtc[i].stripes.Flink != &wtc[i].stripes) {
1728             // launch writes and wait
1729             le = wtc[i].stripes.Flink;
1730             while (le != &wtc[i].stripes) {
1731                 write_data_stripe* stripe = CONTAINING_RECORD(le, write_data_stripe, list_entry);
1732 
1733                 if (stripe->status != WriteDataStatus_Ignore) {
1734                     wtc[i].need_wait = true;
1735                     IoCallDriver(stripe->device->devobj, stripe->Irp);
1736                 }
1737 
1738                 le = le->Flink;
1739             }
1740         }
1741     }
1742 
1743     for (i = 0; i < num_bits; i++) {
1744         if (wtc[i].need_wait)
1745             KeWaitForSingleObject(&wtc[i].Event, Executive, KernelMode, false, NULL);
1746     }
1747 
1748     for (i = 0; i < num_bits; i++) {
1749         le = wtc[i].stripes.Flink;
1750         while (le != &wtc[i].stripes) {
1751             write_data_stripe* stripe = CONTAINING_RECORD(le, write_data_stripe, list_entry);
1752 
1753             if (stripe->status != WriteDataStatus_Ignore && !NT_SUCCESS(stripe->iosb.Status)) {
1754                 Status = stripe->iosb.Status;
1755                 log_device_error(Vcb, stripe->device, BTRFS_DEV_STAT_WRITE_ERRORS);
1756                 break;
1757             }
1758 
1759             le = le->Flink;
1760         }
1761 
1762         free_write_data_stripes(&wtc[i]);
1763     }
1764 
1765     ExFreePool(wtc);
1766 
1767     if (raid56) {
1768         c = NULL;
1769 
1770         le = tree_writes->Flink;
1771         while (le != tree_writes) {
1772             tw = CONTAINING_RECORD(le, tree_write, list_entry);
1773 
1774             if (tw->c != c) {
1775                 c = tw->c;
1776 
1777                 ExAcquireResourceExclusiveLite(&c->partial_stripes_lock, true);
1778 
1779                 while (!IsListEmpty(&c->partial_stripes)) {
1780                     partial_stripe* ps = CONTAINING_RECORD(RemoveHeadList(&c->partial_stripes), partial_stripe, list_entry);
1781 
1782                     Status = flush_partial_stripe(Vcb, c, ps);
1783 
1784                     if (ps->bmparr)
1785                         ExFreePool(ps->bmparr);
1786 
1787                     ExFreePool(ps);
1788 
1789                     if (!NT_SUCCESS(Status)) {
1790                         ERR("flush_partial_stripe returned %08lx\n", Status);
1791                         ExReleaseResourceLite(&c->partial_stripes_lock);
1792                         return Status;
1793                     }
1794                 }
1795 
1796                 ExReleaseResourceLite(&c->partial_stripes_lock);
1797             }
1798 
1799             le = le->Flink;
1800         }
1801     }
1802 
1803     return STATUS_SUCCESS;
1804 }
1805 
calc_tree_checksum(device_extension * Vcb,tree_header * th)1806 void calc_tree_checksum(device_extension* Vcb, tree_header* th) {
1807     switch (Vcb->superblock.csum_type) {
1808         case CSUM_TYPE_CRC32C:
1809             *((uint32_t*)th) = ~calc_crc32c(0xffffffff, (uint8_t*)&th->fs_uuid, Vcb->superblock.node_size - sizeof(th->csum));
1810         break;
1811 
1812         case CSUM_TYPE_XXHASH:
1813             *((uint64_t*)th) = XXH64((uint8_t*)&th->fs_uuid, Vcb->superblock.node_size - sizeof(th->csum), 0);
1814         break;
1815 
1816         case CSUM_TYPE_SHA256:
1817             calc_sha256((uint8_t*)th, &th->fs_uuid, Vcb->superblock.node_size - sizeof(th->csum));
1818         break;
1819 
1820         case CSUM_TYPE_BLAKE2:
1821             blake2b((uint8_t*)th, BLAKE2_HASH_SIZE, &th->fs_uuid, Vcb->superblock.node_size - sizeof(th->csum));
1822         break;
1823     }
1824 }
1825 
write_trees(device_extension * Vcb,PIRP Irp)1826 static NTSTATUS write_trees(device_extension* Vcb, PIRP Irp) {
1827     ULONG level;
1828     uint8_t *data, *body;
1829     NTSTATUS Status;
1830     LIST_ENTRY* le;
1831     LIST_ENTRY tree_writes;
1832     tree_write* tw;
1833 
1834     TRACE("(%p)\n", Vcb);
1835 
1836     InitializeListHead(&tree_writes);
1837 
1838     for (level = 0; level <= 255; level++) {
1839         bool nothing_found = true;
1840 
1841         TRACE("level = %lu\n", level);
1842 
1843         le = Vcb->trees.Flink;
1844         while (le != &Vcb->trees) {
1845             tree* t = CONTAINING_RECORD(le, tree, list_entry);
1846 
1847             if (t->write && t->header.level == level) {
1848                 KEY firstitem, searchkey;
1849                 LIST_ENTRY* le2;
1850                 traverse_ptr tp;
1851 
1852                 if (!t->has_new_address) {
1853                     ERR("error - tried to write tree with no new address\n");
1854                     return STATUS_INTERNAL_ERROR;
1855                 }
1856 
1857                 le2 = t->itemlist.Flink;
1858                 while (le2 != &t->itemlist) {
1859                     tree_data* td = CONTAINING_RECORD(le2, tree_data, list_entry);
1860                     if (!td->ignore) {
1861                         firstitem = td->key;
1862                         break;
1863                     }
1864                     le2 = le2->Flink;
1865                 }
1866 
1867                 if (t->parent) {
1868                     t->paritem->key = firstitem;
1869                     t->paritem->treeholder.address = t->new_address;
1870                     t->paritem->treeholder.generation = Vcb->superblock.generation;
1871                 }
1872 
1873                 if (!(Vcb->superblock.incompat_flags & BTRFS_INCOMPAT_FLAGS_SKINNY_METADATA)) {
1874                     EXTENT_ITEM_TREE* eit;
1875 
1876                     searchkey.obj_id = t->new_address;
1877                     searchkey.obj_type = TYPE_EXTENT_ITEM;
1878                     searchkey.offset = Vcb->superblock.node_size;
1879 
1880                     Status = find_item(Vcb, Vcb->extent_root, &tp, &searchkey, false, Irp);
1881                     if (!NT_SUCCESS(Status)) {
1882                         ERR("error - find_item returned %08lx\n", Status);
1883                         return Status;
1884                     }
1885 
1886                     if (keycmp(searchkey, tp.item->key)) {
1887                         ERR("could not find %I64x,%x,%I64x in extent_root (found %I64x,%x,%I64x instead)\n", searchkey.obj_id, searchkey.obj_type, searchkey.offset, tp.item->key.obj_id, tp.item->key.obj_type, tp.item->key.offset);
1888                         return STATUS_INTERNAL_ERROR;
1889                     }
1890 
1891                     if (tp.item->size < sizeof(EXTENT_ITEM_TREE)) {
1892                         ERR("(%I64x,%x,%I64x) was %u bytes, expected at least %Iu\n", tp.item->key.obj_id, tp.item->key.obj_type, tp.item->key.offset, tp.item->size, sizeof(EXTENT_ITEM_TREE));
1893                         return STATUS_INTERNAL_ERROR;
1894                     }
1895 
1896                     eit = (EXTENT_ITEM_TREE*)tp.item->data;
1897                     eit->firstitem = firstitem;
1898                 }
1899 
1900                 nothing_found = false;
1901             }
1902 
1903             le = le->Flink;
1904         }
1905 
1906         if (nothing_found)
1907             break;
1908     }
1909 
1910     TRACE("allocated tree extents\n");
1911 
1912     le = Vcb->trees.Flink;
1913     while (le != &Vcb->trees) {
1914         tree* t = CONTAINING_RECORD(le, tree, list_entry);
1915         LIST_ENTRY* le2;
1916 #ifdef DEBUG_PARANOID
1917         uint32_t num_items = 0, size = 0;
1918         bool crash = false;
1919 #endif
1920 
1921         if (t->write) {
1922 #ifdef DEBUG_PARANOID
1923             bool first = true;
1924             KEY lastkey;
1925 
1926             le2 = t->itemlist.Flink;
1927             while (le2 != &t->itemlist) {
1928                 tree_data* td = CONTAINING_RECORD(le2, tree_data, list_entry);
1929                 if (!td->ignore) {
1930                     num_items++;
1931 
1932                     if (!first) {
1933                         if (keycmp(td->key, lastkey) == 0) {
1934                             ERR("(%I64x,%x,%I64x): duplicate key\n", td->key.obj_id, td->key.obj_type, td->key.offset);
1935                             crash = true;
1936                         } else if (keycmp(td->key, lastkey) == -1) {
1937                             ERR("(%I64x,%x,%I64x): key out of order\n", td->key.obj_id, td->key.obj_type, td->key.offset);
1938                             crash = true;
1939                         }
1940                     } else
1941                         first = false;
1942 
1943                     lastkey = td->key;
1944 
1945                     if (t->header.level == 0)
1946                         size += td->size;
1947                 }
1948                 le2 = le2->Flink;
1949             }
1950 
1951             if (t->header.level == 0)
1952                 size += num_items * sizeof(leaf_node);
1953             else
1954                 size += num_items * sizeof(internal_node);
1955 
1956             if (num_items != t->header.num_items) {
1957                 ERR("tree %I64x, level %x: num_items was %x, expected %x\n", t->root->id, t->header.level, num_items, t->header.num_items);
1958                 crash = true;
1959             }
1960 
1961             if (size != t->size) {
1962                 ERR("tree %I64x, level %x: size was %x, expected %x\n", t->root->id, t->header.level, size, t->size);
1963                 crash = true;
1964             }
1965 
1966             if (t->header.num_items == 0 && t->parent) {
1967                 ERR("tree %I64x, level %x: tried to write empty tree with parent\n", t->root->id, t->header.level);
1968                 crash = true;
1969             }
1970 
1971             if (t->size > Vcb->superblock.node_size - sizeof(tree_header)) {
1972                 ERR("tree %I64x, level %x: tried to write overlarge tree (%x > %Ix)\n", t->root->id, t->header.level, t->size, Vcb->superblock.node_size - sizeof(tree_header));
1973                 crash = true;
1974             }
1975 
1976             if (crash) {
1977                 ERR("tree %p\n", t);
1978                 le2 = t->itemlist.Flink;
1979                 while (le2 != &t->itemlist) {
1980                     tree_data* td = CONTAINING_RECORD(le2, tree_data, list_entry);
1981                     if (!td->ignore) {
1982                         ERR("%I64x,%x,%I64x inserted=%u\n", td->key.obj_id, td->key.obj_type, td->key.offset, td->inserted);
1983                     }
1984                     le2 = le2->Flink;
1985                 }
1986                 int3;
1987             }
1988 #endif
1989             t->header.address = t->new_address;
1990             t->header.generation = Vcb->superblock.generation;
1991             t->header.tree_id = t->root->id;
1992             t->header.flags |= HEADER_FLAG_MIXED_BACKREF;
1993             t->header.fs_uuid = Vcb->superblock.metadata_uuid;
1994             t->has_address = true;
1995 
1996             data = ExAllocatePoolWithTag(NonPagedPool, Vcb->superblock.node_size, ALLOC_TAG);
1997             if (!data) {
1998                 ERR("out of memory\n");
1999                 Status = STATUS_INSUFFICIENT_RESOURCES;
2000                 goto end;
2001             }
2002 
2003             body = data + sizeof(tree_header);
2004 
2005             RtlCopyMemory(data, &t->header, sizeof(tree_header));
2006             RtlZeroMemory(body, Vcb->superblock.node_size - sizeof(tree_header));
2007 
2008             if (t->header.level == 0) {
2009                 leaf_node* itemptr = (leaf_node*)body;
2010                 int i = 0;
2011                 uint8_t* dataptr = data + Vcb->superblock.node_size;
2012 
2013                 le2 = t->itemlist.Flink;
2014                 while (le2 != &t->itemlist) {
2015                     tree_data* td = CONTAINING_RECORD(le2, tree_data, list_entry);
2016                     if (!td->ignore) {
2017                         dataptr = dataptr - td->size;
2018 
2019                         itemptr[i].key = td->key;
2020                         itemptr[i].offset = (uint32_t)((uint8_t*)dataptr - (uint8_t*)body);
2021                         itemptr[i].size = td->size;
2022                         i++;
2023 
2024                         if (td->size > 0)
2025                             RtlCopyMemory(dataptr, td->data, td->size);
2026                     }
2027 
2028                     le2 = le2->Flink;
2029                 }
2030             } else {
2031                 internal_node* itemptr = (internal_node*)body;
2032                 int i = 0;
2033 
2034                 le2 = t->itemlist.Flink;
2035                 while (le2 != &t->itemlist) {
2036                     tree_data* td = CONTAINING_RECORD(le2, tree_data, list_entry);
2037                     if (!td->ignore) {
2038                         itemptr[i].key = td->key;
2039                         itemptr[i].address = td->treeholder.address;
2040                         itemptr[i].generation = td->treeholder.generation;
2041                         i++;
2042                     }
2043 
2044                     le2 = le2->Flink;
2045                 }
2046             }
2047 
2048             calc_tree_checksum(Vcb, (tree_header*)data);
2049 
2050             tw = ExAllocatePoolWithTag(PagedPool, sizeof(tree_write), ALLOC_TAG);
2051             if (!tw) {
2052                 ERR("out of memory\n");
2053                 ExFreePool(data);
2054                 Status = STATUS_INSUFFICIENT_RESOURCES;
2055                 goto end;
2056             }
2057 
2058             tw->address = t->new_address;
2059             tw->length = Vcb->superblock.node_size;
2060             tw->data = data;
2061             tw->allocated = false;
2062 
2063             if (IsListEmpty(&tree_writes))
2064                 InsertTailList(&tree_writes, &tw->list_entry);
2065             else {
2066                 bool inserted = false;
2067 
2068                 le2 = tree_writes.Flink;
2069                 while (le2 != &tree_writes) {
2070                     tree_write* tw2 = CONTAINING_RECORD(le2, tree_write, list_entry);
2071 
2072                     if (tw2->address > tw->address) {
2073                         InsertHeadList(le2->Blink, &tw->list_entry);
2074                         inserted = true;
2075                         break;
2076                     }
2077 
2078                     le2 = le2->Flink;
2079                 }
2080 
2081                 if (!inserted)
2082                     InsertTailList(&tree_writes, &tw->list_entry);
2083             }
2084         }
2085 
2086         le = le->Flink;
2087     }
2088 
2089     Status = do_tree_writes(Vcb, &tree_writes, false);
2090     if (!NT_SUCCESS(Status)) {
2091         ERR("do_tree_writes returned %08lx\n", Status);
2092         goto end;
2093     }
2094 
2095     Status = STATUS_SUCCESS;
2096 
2097 end:
2098     while (!IsListEmpty(&tree_writes)) {
2099         le = RemoveHeadList(&tree_writes);
2100         tw = CONTAINING_RECORD(le, tree_write, list_entry);
2101 
2102         if (tw->data)
2103             ExFreePool(tw->data);
2104 
2105         ExFreePool(tw);
2106     }
2107 
2108     return Status;
2109 }
2110 
update_backup_superblock(device_extension * Vcb,superblock_backup * sb,PIRP Irp)2111 static void update_backup_superblock(device_extension* Vcb, superblock_backup* sb, PIRP Irp) {
2112     KEY searchkey;
2113     traverse_ptr tp;
2114 
2115     RtlZeroMemory(sb, sizeof(superblock_backup));
2116 
2117     sb->root_tree_addr = Vcb->superblock.root_tree_addr;
2118     sb->root_tree_generation = Vcb->superblock.generation;
2119     sb->root_level = Vcb->superblock.root_level;
2120 
2121     sb->chunk_tree_addr = Vcb->superblock.chunk_tree_addr;
2122     sb->chunk_tree_generation = Vcb->superblock.chunk_root_generation;
2123     sb->chunk_root_level = Vcb->superblock.chunk_root_level;
2124 
2125     searchkey.obj_id = BTRFS_ROOT_EXTENT;
2126     searchkey.obj_type = TYPE_ROOT_ITEM;
2127     searchkey.offset = 0xffffffffffffffff;
2128 
2129     if (NT_SUCCESS(find_item(Vcb, Vcb->root_root, &tp, &searchkey, false, Irp))) {
2130         if (tp.item->key.obj_id == searchkey.obj_id && tp.item->key.obj_type == searchkey.obj_type && tp.item->size >= sizeof(ROOT_ITEM)) {
2131             ROOT_ITEM* ri = (ROOT_ITEM*)tp.item->data;
2132 
2133             sb->extent_tree_addr = ri->block_number;
2134             sb->extent_tree_generation = ri->generation;
2135             sb->extent_root_level = ri->root_level;
2136         }
2137     }
2138 
2139     searchkey.obj_id = BTRFS_ROOT_FSTREE;
2140 
2141     if (NT_SUCCESS(find_item(Vcb, Vcb->root_root, &tp, &searchkey, false, Irp))) {
2142         if (tp.item->key.obj_id == searchkey.obj_id && tp.item->key.obj_type == searchkey.obj_type && tp.item->size >= sizeof(ROOT_ITEM)) {
2143             ROOT_ITEM* ri = (ROOT_ITEM*)tp.item->data;
2144 
2145             sb->fs_tree_addr = ri->block_number;
2146             sb->fs_tree_generation = ri->generation;
2147             sb->fs_root_level = ri->root_level;
2148         }
2149     }
2150 
2151     searchkey.obj_id = BTRFS_ROOT_DEVTREE;
2152 
2153     if (NT_SUCCESS(find_item(Vcb, Vcb->root_root, &tp, &searchkey, false, Irp))) {
2154         if (tp.item->key.obj_id == searchkey.obj_id && tp.item->key.obj_type == searchkey.obj_type && tp.item->size >= sizeof(ROOT_ITEM)) {
2155             ROOT_ITEM* ri = (ROOT_ITEM*)tp.item->data;
2156 
2157             sb->dev_root_addr = ri->block_number;
2158             sb->dev_root_generation = ri->generation;
2159             sb->dev_root_level = ri->root_level;
2160         }
2161     }
2162 
2163     searchkey.obj_id = BTRFS_ROOT_CHECKSUM;
2164 
2165     if (NT_SUCCESS(find_item(Vcb, Vcb->root_root, &tp, &searchkey, false, Irp))) {
2166         if (tp.item->key.obj_id == searchkey.obj_id && tp.item->key.obj_type == searchkey.obj_type && tp.item->size >= sizeof(ROOT_ITEM)) {
2167             ROOT_ITEM* ri = (ROOT_ITEM*)tp.item->data;
2168 
2169             sb->csum_root_addr = ri->block_number;
2170             sb->csum_root_generation = ri->generation;
2171             sb->csum_root_level = ri->root_level;
2172         }
2173     }
2174 
2175     sb->total_bytes = Vcb->superblock.total_bytes;
2176     sb->bytes_used = Vcb->superblock.bytes_used;
2177     sb->num_devices = Vcb->superblock.num_devices;
2178 }
2179 
2180 typedef struct {
2181     void* context;
2182     uint8_t* buf;
2183     PMDL mdl;
2184     device* device;
2185     NTSTATUS Status;
2186     PIRP Irp;
2187     LIST_ENTRY list_entry;
2188 } write_superblocks_stripe;
2189 
2190 typedef struct _write_superblocks_context {
2191     KEVENT Event;
2192     LIST_ENTRY stripes;
2193     LONG left;
2194 } write_superblocks_context;
2195 
_Function_class_(IO_COMPLETION_ROUTINE)2196 _Function_class_(IO_COMPLETION_ROUTINE)
2197 static NTSTATUS __stdcall write_superblock_completion(PDEVICE_OBJECT DeviceObject, PIRP Irp, PVOID conptr) {
2198     write_superblocks_stripe* stripe = conptr;
2199     write_superblocks_context* context = stripe->context;
2200 
2201     UNUSED(DeviceObject);
2202 
2203     stripe->Status = Irp->IoStatus.Status;
2204 
2205     if (InterlockedDecrement(&context->left) == 0)
2206         KeSetEvent(&context->Event, 0, false);
2207 
2208     return STATUS_MORE_PROCESSING_REQUIRED;
2209 }
2210 
calc_superblock_checksum(superblock * sb)2211 static void calc_superblock_checksum(superblock* sb) {
2212     switch (sb->csum_type) {
2213         case CSUM_TYPE_CRC32C:
2214             *(uint32_t*)sb = ~calc_crc32c(0xffffffff, (uint8_t*)&sb->uuid, (ULONG)sizeof(superblock) - sizeof(sb->checksum));
2215         break;
2216 
2217         case CSUM_TYPE_XXHASH:
2218             *(uint64_t*)sb = XXH64(&sb->uuid, sizeof(superblock) - sizeof(sb->checksum), 0);
2219         break;
2220 
2221         case CSUM_TYPE_SHA256:
2222             calc_sha256((uint8_t*)sb, &sb->uuid, sizeof(superblock) - sizeof(sb->checksum));
2223         break;
2224 
2225         case CSUM_TYPE_BLAKE2:
2226             blake2b((uint8_t*)sb, BLAKE2_HASH_SIZE, &sb->uuid, sizeof(superblock) - sizeof(sb->checksum));
2227         break;
2228     }
2229 }
2230 
write_superblock(device_extension * Vcb,device * device,write_superblocks_context * context)2231 static NTSTATUS write_superblock(device_extension* Vcb, device* device, write_superblocks_context* context) {
2232     unsigned int i = 0;
2233 
2234     // All the documentation says that the Linux driver only writes one superblock
2235     // if it thinks a disk is an SSD, but this doesn't seem to be the case!
2236 
2237     while (superblock_addrs[i] > 0 && device->devitem.num_bytes >= superblock_addrs[i] + sizeof(superblock)) {
2238         ULONG sblen = (ULONG)sector_align(sizeof(superblock), Vcb->superblock.sector_size);
2239         superblock* sb;
2240         write_superblocks_stripe* stripe;
2241         PIO_STACK_LOCATION IrpSp;
2242 
2243         sb = ExAllocatePoolWithTag(NonPagedPool, sblen, ALLOC_TAG);
2244         if (!sb) {
2245             ERR("out of memory\n");
2246             return STATUS_INSUFFICIENT_RESOURCES;
2247         }
2248 
2249         RtlCopyMemory(sb, &Vcb->superblock, sizeof(superblock));
2250 
2251         if (sblen > sizeof(superblock))
2252             RtlZeroMemory((uint8_t*)sb + sizeof(superblock), sblen - sizeof(superblock));
2253 
2254         RtlCopyMemory(&sb->dev_item, &device->devitem, sizeof(DEV_ITEM));
2255         sb->sb_phys_addr = superblock_addrs[i];
2256 
2257         calc_superblock_checksum(sb);
2258 
2259         stripe = ExAllocatePoolWithTag(NonPagedPool, sizeof(write_superblocks_stripe), ALLOC_TAG);
2260         if (!stripe) {
2261             ERR("out of memory\n");
2262             ExFreePool(sb);
2263             return STATUS_INSUFFICIENT_RESOURCES;
2264         }
2265 
2266         stripe->buf = (uint8_t*)sb;
2267 
2268         stripe->Irp = IoAllocateIrp(device->devobj->StackSize, false);
2269         if (!stripe->Irp) {
2270             ERR("IoAllocateIrp failed\n");
2271             ExFreePool(stripe);
2272             ExFreePool(sb);
2273             return STATUS_INSUFFICIENT_RESOURCES;
2274         }
2275 
2276         IrpSp = IoGetNextIrpStackLocation(stripe->Irp);
2277         IrpSp->MajorFunction = IRP_MJ_WRITE;
2278         IrpSp->FileObject = device->fileobj;
2279 
2280         if (i == 0)
2281             IrpSp->Flags |= SL_WRITE_THROUGH;
2282 
2283         if (device->devobj->Flags & DO_BUFFERED_IO) {
2284             stripe->Irp->AssociatedIrp.SystemBuffer = sb;
2285             stripe->mdl = NULL;
2286 
2287             stripe->Irp->Flags = IRP_BUFFERED_IO;
2288         } else if (device->devobj->Flags & DO_DIRECT_IO) {
2289             stripe->mdl = IoAllocateMdl(sb, sblen, false, false, NULL);
2290             if (!stripe->mdl) {
2291                 ERR("IoAllocateMdl failed\n");
2292                 IoFreeIrp(stripe->Irp);
2293                 ExFreePool(stripe);
2294                 ExFreePool(sb);
2295                 return STATUS_INSUFFICIENT_RESOURCES;
2296             }
2297 
2298             stripe->Irp->MdlAddress = stripe->mdl;
2299 
2300             MmBuildMdlForNonPagedPool(stripe->mdl);
2301         } else {
2302             stripe->Irp->UserBuffer = sb;
2303             stripe->mdl = NULL;
2304         }
2305 
2306         IrpSp->Parameters.Write.Length = sblen;
2307         IrpSp->Parameters.Write.ByteOffset.QuadPart = superblock_addrs[i];
2308 
2309         IoSetCompletionRoutine(stripe->Irp, write_superblock_completion, stripe, true, true, true);
2310 
2311         stripe->context = context;
2312         stripe->device = device;
2313         InsertTailList(&context->stripes, &stripe->list_entry);
2314 
2315         context->left++;
2316 
2317         i++;
2318     }
2319 
2320     if (i == 0)
2321         ERR("no superblocks written!\n");
2322 
2323     return STATUS_SUCCESS;
2324 }
2325 
write_superblocks(device_extension * Vcb,PIRP Irp)2326 static NTSTATUS write_superblocks(device_extension* Vcb, PIRP Irp) {
2327     uint64_t i;
2328     NTSTATUS Status;
2329     LIST_ENTRY* le;
2330     write_superblocks_context context;
2331 
2332     TRACE("(%p)\n", Vcb);
2333 
2334     le = Vcb->trees.Flink;
2335     while (le != &Vcb->trees) {
2336         tree* t = CONTAINING_RECORD(le, tree, list_entry);
2337 
2338         if (t->write && !t->parent) {
2339             if (t->root == Vcb->root_root) {
2340                 Vcb->superblock.root_tree_addr = t->new_address;
2341                 Vcb->superblock.root_level = t->header.level;
2342             } else if (t->root == Vcb->chunk_root) {
2343                 Vcb->superblock.chunk_tree_addr = t->new_address;
2344                 Vcb->superblock.chunk_root_generation = t->header.generation;
2345                 Vcb->superblock.chunk_root_level = t->header.level;
2346             }
2347         }
2348 
2349         le = le->Flink;
2350     }
2351 
2352     for (i = 0; i < BTRFS_NUM_BACKUP_ROOTS - 1; i++) {
2353         RtlCopyMemory(&Vcb->superblock.backup[i], &Vcb->superblock.backup[i+1], sizeof(superblock_backup));
2354     }
2355 
2356     update_backup_superblock(Vcb, &Vcb->superblock.backup[BTRFS_NUM_BACKUP_ROOTS - 1], Irp);
2357 
2358     KeInitializeEvent(&context.Event, NotificationEvent, false);
2359     InitializeListHead(&context.stripes);
2360     context.left = 0;
2361 
2362     le = Vcb->devices.Flink;
2363     while (le != &Vcb->devices) {
2364         device* dev = CONTAINING_RECORD(le, device, list_entry);
2365 
2366         if (dev->devobj && !dev->readonly) {
2367             Status = write_superblock(Vcb, dev, &context);
2368             if (!NT_SUCCESS(Status)) {
2369                 ERR("write_superblock returned %08lx\n", Status);
2370                 goto end;
2371             }
2372         }
2373 
2374         le = le->Flink;
2375     }
2376 
2377     if (IsListEmpty(&context.stripes)) {
2378         ERR("error - not writing any superblocks\n");
2379         Status = STATUS_INTERNAL_ERROR;
2380         goto end;
2381     }
2382 
2383     le = context.stripes.Flink;
2384     while (le != &context.stripes) {
2385         write_superblocks_stripe* stripe = CONTAINING_RECORD(le, write_superblocks_stripe, list_entry);
2386 
2387         IoCallDriver(stripe->device->devobj, stripe->Irp);
2388 
2389         le = le->Flink;
2390     }
2391 
2392     KeWaitForSingleObject(&context.Event, Executive, KernelMode, false, NULL);
2393 
2394     le = context.stripes.Flink;
2395     while (le != &context.stripes) {
2396         write_superblocks_stripe* stripe = CONTAINING_RECORD(le, write_superblocks_stripe, list_entry);
2397 
2398         if (!NT_SUCCESS(stripe->Status)) {
2399             ERR("device %I64x returned %08lx\n", stripe->device->devitem.dev_id, stripe->Status);
2400             log_device_error(Vcb, stripe->device, BTRFS_DEV_STAT_WRITE_ERRORS);
2401             Status = stripe->Status;
2402             goto end;
2403         }
2404 
2405         le = le->Flink;
2406     }
2407 
2408     Status = STATUS_SUCCESS;
2409 
2410 end:
2411     while (!IsListEmpty(&context.stripes)) {
2412         write_superblocks_stripe* stripe = CONTAINING_RECORD(RemoveHeadList(&context.stripes), write_superblocks_stripe, list_entry);
2413 
2414         if (stripe->mdl) {
2415             if (stripe->mdl->MdlFlags & MDL_PAGES_LOCKED)
2416                 MmUnlockPages(stripe->mdl);
2417 
2418             IoFreeMdl(stripe->mdl);
2419         }
2420 
2421         if (stripe->Irp)
2422             IoFreeIrp(stripe->Irp);
2423 
2424         if (stripe->buf)
2425             ExFreePool(stripe->buf);
2426 
2427         ExFreePool(stripe);
2428     }
2429 
2430     return Status;
2431 }
2432 
flush_changed_extent(device_extension * Vcb,chunk * c,changed_extent * ce,PIRP Irp,LIST_ENTRY * rollback)2433 static NTSTATUS flush_changed_extent(device_extension* Vcb, chunk* c, changed_extent* ce, PIRP Irp, LIST_ENTRY* rollback) {
2434     LIST_ENTRY *le, *le2;
2435     NTSTATUS Status;
2436     uint64_t old_size;
2437 
2438     if (ce->count == 0 && ce->old_count == 0) {
2439         while (!IsListEmpty(&ce->refs)) {
2440             changed_extent_ref* cer = CONTAINING_RECORD(RemoveHeadList(&ce->refs), changed_extent_ref, list_entry);
2441             ExFreePool(cer);
2442         }
2443 
2444         while (!IsListEmpty(&ce->old_refs)) {
2445             changed_extent_ref* cer = CONTAINING_RECORD(RemoveHeadList(&ce->old_refs), changed_extent_ref, list_entry);
2446             ExFreePool(cer);
2447         }
2448 
2449         goto end;
2450     }
2451 
2452     le = ce->refs.Flink;
2453     while (le != &ce->refs) {
2454         changed_extent_ref* cer = CONTAINING_RECORD(le, changed_extent_ref, list_entry);
2455         uint32_t old_count = 0;
2456 
2457         if (cer->type == TYPE_EXTENT_DATA_REF) {
2458             le2 = ce->old_refs.Flink;
2459             while (le2 != &ce->old_refs) {
2460                 changed_extent_ref* cer2 = CONTAINING_RECORD(le2, changed_extent_ref, list_entry);
2461 
2462                 if (cer2->type == TYPE_EXTENT_DATA_REF && cer2->edr.root == cer->edr.root && cer2->edr.objid == cer->edr.objid && cer2->edr.offset == cer->edr.offset) {
2463                     old_count = cer2->edr.count;
2464                     break;
2465                 }
2466 
2467                 le2 = le2->Flink;
2468             }
2469 
2470             old_size = ce->old_count > 0 ? ce->old_size : ce->size;
2471 
2472             if (cer->edr.count > old_count) {
2473                 Status = increase_extent_refcount_data(Vcb, ce->address, old_size, cer->edr.root, cer->edr.objid, cer->edr.offset, cer->edr.count - old_count, Irp);
2474 
2475                 if (!NT_SUCCESS(Status)) {
2476                     ERR("increase_extent_refcount_data returned %08lx\n", Status);
2477                     return Status;
2478                 }
2479             }
2480         } else if (cer->type == TYPE_SHARED_DATA_REF) {
2481             le2 = ce->old_refs.Flink;
2482             while (le2 != &ce->old_refs) {
2483                 changed_extent_ref* cer2 = CONTAINING_RECORD(le2, changed_extent_ref, list_entry);
2484 
2485                 if (cer2->type == TYPE_SHARED_DATA_REF && cer2->sdr.offset == cer->sdr.offset) {
2486                     RemoveEntryList(&cer2->list_entry);
2487                     ExFreePool(cer2);
2488                     break;
2489                 }
2490 
2491                 le2 = le2->Flink;
2492             }
2493         }
2494 
2495         le = le->Flink;
2496     }
2497 
2498     le = ce->refs.Flink;
2499     while (le != &ce->refs) {
2500         changed_extent_ref* cer = CONTAINING_RECORD(le, changed_extent_ref, list_entry);
2501         LIST_ENTRY* le3 = le->Flink;
2502         uint32_t old_count = 0;
2503 
2504         if (cer->type == TYPE_EXTENT_DATA_REF) {
2505             le2 = ce->old_refs.Flink;
2506             while (le2 != &ce->old_refs) {
2507                 changed_extent_ref* cer2 = CONTAINING_RECORD(le2, changed_extent_ref, list_entry);
2508 
2509                 if (cer2->type == TYPE_EXTENT_DATA_REF && cer2->edr.root == cer->edr.root && cer2->edr.objid == cer->edr.objid && cer2->edr.offset == cer->edr.offset) {
2510                     old_count = cer2->edr.count;
2511 
2512                     RemoveEntryList(&cer2->list_entry);
2513                     ExFreePool(cer2);
2514                     break;
2515                 }
2516 
2517                 le2 = le2->Flink;
2518             }
2519 
2520             old_size = ce->old_count > 0 ? ce->old_size : ce->size;
2521 
2522             if (cer->edr.count < old_count) {
2523                 Status = decrease_extent_refcount_data(Vcb, ce->address, old_size, cer->edr.root, cer->edr.objid, cer->edr.offset,
2524                                                        old_count - cer->edr.count, ce->superseded, Irp);
2525 
2526                 if (!NT_SUCCESS(Status)) {
2527                     ERR("decrease_extent_refcount_data returned %08lx\n", Status);
2528                     return Status;
2529                 }
2530             }
2531 
2532             if (ce->size != ce->old_size && ce->old_count > 0) {
2533                 KEY searchkey;
2534                 traverse_ptr tp;
2535                 void* data;
2536 
2537                 searchkey.obj_id = ce->address;
2538                 searchkey.obj_type = TYPE_EXTENT_ITEM;
2539                 searchkey.offset = ce->old_size;
2540 
2541                 Status = find_item(Vcb, Vcb->extent_root, &tp, &searchkey, false, Irp);
2542                 if (!NT_SUCCESS(Status)) {
2543                     ERR("error - find_item returned %08lx\n", Status);
2544                     return Status;
2545                 }
2546 
2547                 if (keycmp(searchkey, tp.item->key)) {
2548                     ERR("could not find (%I64x,%x,%I64x) in extent tree\n", searchkey.obj_id, searchkey.obj_type, searchkey.offset);
2549                     return STATUS_INTERNAL_ERROR;
2550                 }
2551 
2552                 if (tp.item->size > 0) {
2553                     data = ExAllocatePoolWithTag(PagedPool, tp.item->size, ALLOC_TAG);
2554 
2555                     if (!data) {
2556                         ERR("out of memory\n");
2557                         return STATUS_INSUFFICIENT_RESOURCES;
2558                     }
2559 
2560                     RtlCopyMemory(data, tp.item->data, tp.item->size);
2561                 } else
2562                     data = NULL;
2563 
2564                 Status = insert_tree_item(Vcb, Vcb->extent_root, ce->address, TYPE_EXTENT_ITEM, ce->size, data, tp.item->size, NULL, Irp);
2565                 if (!NT_SUCCESS(Status)) {
2566                     ERR("insert_tree_item returned %08lx\n", Status);
2567                     if (data) ExFreePool(data);
2568                     return Status;
2569                 }
2570 
2571                 Status = delete_tree_item(Vcb, &tp);
2572                 if (!NT_SUCCESS(Status)) {
2573                     ERR("delete_tree_item returned %08lx\n", Status);
2574                     return Status;
2575                 }
2576             }
2577         }
2578 
2579         RemoveEntryList(&cer->list_entry);
2580         ExFreePool(cer);
2581 
2582         le = le3;
2583     }
2584 
2585 #ifdef DEBUG_PARANOID
2586     if (!IsListEmpty(&ce->old_refs))
2587         WARN("old_refs not empty\n");
2588 #endif
2589 
2590 end:
2591     if (ce->count == 0 && !ce->superseded) {
2592         c->used -= ce->size;
2593         space_list_add(c, ce->address, ce->size, rollback);
2594     }
2595 
2596     RemoveEntryList(&ce->list_entry);
2597     ExFreePool(ce);
2598 
2599     return STATUS_SUCCESS;
2600 }
2601 
add_checksum_entry(device_extension * Vcb,uint64_t address,ULONG length,void * csum,PIRP Irp)2602 void add_checksum_entry(device_extension* Vcb, uint64_t address, ULONG length, void* csum, PIRP Irp) {
2603     KEY searchkey;
2604     traverse_ptr tp, next_tp;
2605     NTSTATUS Status;
2606     uint64_t startaddr, endaddr;
2607     ULONG len;
2608     RTL_BITMAP bmp;
2609     ULONG* bmparr;
2610     ULONG runlength, index;
2611 
2612     TRACE("(%p, %I64x, %lx, %p, %p)\n", Vcb, address, length, csum, Irp);
2613 
2614     searchkey.obj_id = EXTENT_CSUM_ID;
2615     searchkey.obj_type = TYPE_EXTENT_CSUM;
2616     searchkey.offset = address;
2617 
2618     // FIXME - create checksum_root if it doesn't exist at all
2619 
2620     Status = find_item(Vcb, Vcb->checksum_root, &tp, &searchkey, false, Irp);
2621     if (Status == STATUS_NOT_FOUND) { // tree is completely empty
2622         if (csum) { // not deleted
2623             ULONG length2 = length;
2624             uint64_t off = address;
2625             void* data = csum;
2626 
2627             do {
2628                 uint16_t il = (uint16_t)min(length2, MAX_CSUM_SIZE / Vcb->csum_size);
2629 
2630                 void* checksums = ExAllocatePoolWithTag(PagedPool, il * Vcb->csum_size, ALLOC_TAG);
2631                 if (!checksums) {
2632                     ERR("out of memory\n");
2633                     return;
2634                 }
2635 
2636                 RtlCopyMemory(checksums, data, il * Vcb->csum_size);
2637 
2638                 Status = insert_tree_item(Vcb, Vcb->checksum_root, EXTENT_CSUM_ID, TYPE_EXTENT_CSUM, off, checksums,
2639                                           il * Vcb->csum_size, NULL, Irp);
2640                 if (!NT_SUCCESS(Status)) {
2641                     ERR("insert_tree_item returned %08lx\n", Status);
2642                     ExFreePool(checksums);
2643                     return;
2644                 }
2645 
2646                 length2 -= il;
2647 
2648                 if (length2 > 0) {
2649                     off += (uint64_t)il << Vcb->sector_shift;
2650                     data = (uint8_t*)data + (il * Vcb->csum_size);
2651                 }
2652             } while (length2 > 0);
2653         }
2654     } else if (!NT_SUCCESS(Status)) {
2655         ERR("find_item returned %08lx\n", Status);
2656         return;
2657     } else {
2658         uint32_t tplen;
2659         void* checksums;
2660 
2661         // FIXME - check entry is TYPE_EXTENT_CSUM?
2662 
2663         if (tp.item->key.offset < address && tp.item->key.offset + (((uint64_t)tp.item->size << Vcb->sector_shift) / Vcb->csum_size) >= address)
2664             startaddr = tp.item->key.offset;
2665         else
2666             startaddr = address;
2667 
2668         searchkey.obj_id = EXTENT_CSUM_ID;
2669         searchkey.obj_type = TYPE_EXTENT_CSUM;
2670         searchkey.offset = address + (length << Vcb->sector_shift);
2671 
2672         Status = find_item(Vcb, Vcb->checksum_root, &tp, &searchkey, false, Irp);
2673         if (!NT_SUCCESS(Status)) {
2674             ERR("find_item returned %08lx\n", Status);
2675             return;
2676         }
2677 
2678         tplen = tp.item->size / Vcb->csum_size;
2679 
2680         if (tp.item->key.offset + (tplen << Vcb->sector_shift) >= address + (length << Vcb->sector_shift))
2681             endaddr = tp.item->key.offset + (tplen << Vcb->sector_shift);
2682         else
2683             endaddr = address + (length << Vcb->sector_shift);
2684 
2685         TRACE("cs starts at %I64x (%lx sectors)\n", address, length);
2686         TRACE("startaddr = %I64x\n", startaddr);
2687         TRACE("endaddr = %I64x\n", endaddr);
2688 
2689         len = (ULONG)((endaddr - startaddr) >> Vcb->sector_shift);
2690 
2691         checksums = ExAllocatePoolWithTag(PagedPool, Vcb->csum_size * len, ALLOC_TAG);
2692         if (!checksums) {
2693             ERR("out of memory\n");
2694             return;
2695         }
2696 
2697         bmparr = ExAllocatePoolWithTag(PagedPool, sizeof(ULONG) * ((len/8)+1), ALLOC_TAG);
2698         if (!bmparr) {
2699             ERR("out of memory\n");
2700             ExFreePool(checksums);
2701             return;
2702         }
2703 
2704         RtlInitializeBitMap(&bmp, bmparr, len);
2705         RtlSetAllBits(&bmp);
2706 
2707         searchkey.obj_id = EXTENT_CSUM_ID;
2708         searchkey.obj_type = TYPE_EXTENT_CSUM;
2709         searchkey.offset = address;
2710 
2711         Status = find_item(Vcb, Vcb->checksum_root, &tp, &searchkey, false, Irp);
2712         if (!NT_SUCCESS(Status)) {
2713             ERR("find_item returned %08lx\n", Status);
2714             ExFreePool(checksums);
2715             ExFreePool(bmparr);
2716             return;
2717         }
2718 
2719         // set bit = free space, cleared bit = allocated sector
2720 
2721         while (tp.item->key.offset < endaddr) {
2722             if (tp.item->key.offset >= startaddr) {
2723                 if (tp.item->size > 0) {
2724                     ULONG itemlen = (ULONG)min((len - ((tp.item->key.offset - startaddr) >> Vcb->sector_shift)) * Vcb->csum_size, tp.item->size);
2725 
2726                     RtlCopyMemory((uint8_t*)checksums + (((tp.item->key.offset - startaddr) * Vcb->csum_size) >> Vcb->sector_shift),
2727                                   tp.item->data, itemlen);
2728                     RtlClearBits(&bmp, (ULONG)((tp.item->key.offset - startaddr) >> Vcb->sector_shift), itemlen / Vcb->csum_size);
2729                 }
2730 
2731                 Status = delete_tree_item(Vcb, &tp);
2732                 if (!NT_SUCCESS(Status)) {
2733                     ERR("delete_tree_item returned %08lx\n", Status);
2734                     ExFreePool(checksums);
2735                     ExFreePool(bmparr);
2736                     return;
2737                 }
2738             }
2739 
2740             if (find_next_item(Vcb, &tp, &next_tp, false, Irp)) {
2741                 tp = next_tp;
2742             } else
2743                 break;
2744         }
2745 
2746         if (!csum) { // deleted
2747             RtlSetBits(&bmp, (ULONG)((address - startaddr) >> Vcb->sector_shift), length);
2748         } else {
2749             RtlCopyMemory((uint8_t*)checksums + (((address - startaddr) * Vcb->csum_size) >> Vcb->sector_shift),
2750                           csum, length * Vcb->csum_size);
2751             RtlClearBits(&bmp, (ULONG)((address - startaddr) >> Vcb->sector_shift), length);
2752         }
2753 
2754         runlength = RtlFindFirstRunClear(&bmp, &index);
2755 
2756         while (runlength != 0) {
2757             if (index >= len)
2758                 break;
2759 
2760             if (index + runlength >= len) {
2761                 runlength = len - index;
2762 
2763                 if (runlength == 0)
2764                     break;
2765             }
2766 
2767             do {
2768                 uint16_t rl;
2769                 uint64_t off;
2770                 void* data;
2771 
2772                 if (runlength * Vcb->csum_size > MAX_CSUM_SIZE)
2773                     rl = (uint16_t)(MAX_CSUM_SIZE / Vcb->csum_size);
2774                 else
2775                     rl = (uint16_t)runlength;
2776 
2777                 data = ExAllocatePoolWithTag(PagedPool, Vcb->csum_size * rl, ALLOC_TAG);
2778                 if (!data) {
2779                     ERR("out of memory\n");
2780                     ExFreePool(bmparr);
2781                     ExFreePool(checksums);
2782                     return;
2783                 }
2784 
2785                 RtlCopyMemory(data, (uint8_t*)checksums + (Vcb->csum_size * index), Vcb->csum_size * rl);
2786 
2787                 off = startaddr + ((uint64_t)index << Vcb->sector_shift);
2788 
2789                 Status = insert_tree_item(Vcb, Vcb->checksum_root, EXTENT_CSUM_ID, TYPE_EXTENT_CSUM, off, data, Vcb->csum_size * rl, NULL, Irp);
2790                 if (!NT_SUCCESS(Status)) {
2791                     ERR("insert_tree_item returned %08lx\n", Status);
2792                     ExFreePool(data);
2793                     ExFreePool(bmparr);
2794                     ExFreePool(checksums);
2795                     return;
2796                 }
2797 
2798                 runlength -= rl;
2799                 index += rl;
2800             } while (runlength > 0);
2801 
2802             runlength = RtlFindNextForwardRunClear(&bmp, index, &index);
2803         }
2804 
2805         ExFreePool(bmparr);
2806         ExFreePool(checksums);
2807     }
2808 }
2809 
update_chunk_usage(device_extension * Vcb,PIRP Irp,LIST_ENTRY * rollback)2810 static NTSTATUS update_chunk_usage(device_extension* Vcb, PIRP Irp, LIST_ENTRY* rollback) {
2811     LIST_ENTRY *le = Vcb->chunks.Flink, *le2;
2812     chunk* c;
2813     KEY searchkey;
2814     traverse_ptr tp;
2815     BLOCK_GROUP_ITEM* bgi;
2816     NTSTATUS Status;
2817 
2818     TRACE("(%p)\n", Vcb);
2819 
2820     ExAcquireResourceSharedLite(&Vcb->chunk_lock, true);
2821 
2822     while (le != &Vcb->chunks) {
2823         c = CONTAINING_RECORD(le, chunk, list_entry);
2824 
2825         acquire_chunk_lock(c, Vcb);
2826 
2827         if (!c->cache_loaded && (!IsListEmpty(&c->changed_extents) || c->used != c->oldused)) {
2828             Status = load_cache_chunk(Vcb, c, NULL);
2829 
2830             if (!NT_SUCCESS(Status)) {
2831                 ERR("load_cache_chunk returned %08lx\n", Status);
2832                 release_chunk_lock(c, Vcb);
2833                 goto end;
2834             }
2835         }
2836 
2837         le2 = c->changed_extents.Flink;
2838         while (le2 != &c->changed_extents) {
2839             LIST_ENTRY* le3 = le2->Flink;
2840             changed_extent* ce = CONTAINING_RECORD(le2, changed_extent, list_entry);
2841 
2842             Status = flush_changed_extent(Vcb, c, ce, Irp, rollback);
2843             if (!NT_SUCCESS(Status)) {
2844                 ERR("flush_changed_extent returned %08lx\n", Status);
2845                 release_chunk_lock(c, Vcb);
2846                 goto end;
2847             }
2848 
2849             le2 = le3;
2850         }
2851 
2852         // This is usually done by update_chunks, but we have to check again in case any new chunks
2853         // have been allocated since.
2854         if (c->created) {
2855             Status = create_chunk(Vcb, c, Irp);
2856             if (!NT_SUCCESS(Status)) {
2857                 ERR("create_chunk returned %08lx\n", Status);
2858                 release_chunk_lock(c, Vcb);
2859                 goto end;
2860             }
2861         }
2862 
2863         if (c->old_cache) {
2864             if (c->old_cache->dirty) {
2865                 LIST_ENTRY batchlist;
2866 
2867                 InitializeListHead(&batchlist);
2868 
2869                 Status = flush_fcb(c->old_cache, false, &batchlist, Irp);
2870                 if (!NT_SUCCESS(Status)) {
2871                     ERR("flush_fcb returned %08lx\n", Status);
2872                     release_chunk_lock(c, Vcb);
2873                     clear_batch_list(Vcb, &batchlist);
2874                     goto end;
2875                 }
2876 
2877                 Status = commit_batch_list(Vcb, &batchlist, Irp);
2878                 if (!NT_SUCCESS(Status)) {
2879                     ERR("commit_batch_list returned %08lx\n", Status);
2880                     release_chunk_lock(c, Vcb);
2881                     goto end;
2882                 }
2883             }
2884 
2885             free_fcb(c->old_cache);
2886 
2887             if (c->old_cache->refcount == 0)
2888                 reap_fcb(c->old_cache);
2889 
2890             c->old_cache = NULL;
2891         }
2892 
2893         if (c->used != c->oldused) {
2894             searchkey.obj_id = c->offset;
2895             searchkey.obj_type = TYPE_BLOCK_GROUP_ITEM;
2896             searchkey.offset = c->chunk_item->size;
2897 
2898             Status = find_item(Vcb, Vcb->extent_root, &tp, &searchkey, false, Irp);
2899             if (!NT_SUCCESS(Status)) {
2900                 ERR("error - find_item returned %08lx\n", Status);
2901                 release_chunk_lock(c, Vcb);
2902                 goto end;
2903             }
2904 
2905             if (keycmp(searchkey, tp.item->key)) {
2906                 ERR("could not find (%I64x,%x,%I64x) in extent_root\n", searchkey.obj_id, searchkey.obj_type, searchkey.offset);
2907                 Status = STATUS_INTERNAL_ERROR;
2908                 release_chunk_lock(c, Vcb);
2909                 goto end;
2910             }
2911 
2912             if (tp.item->size < sizeof(BLOCK_GROUP_ITEM)) {
2913                 ERR("(%I64x,%x,%I64x) was %u bytes, expected %Iu\n", tp.item->key.obj_id, tp.item->key.obj_type, tp.item->key.offset, tp.item->size, sizeof(BLOCK_GROUP_ITEM));
2914                 Status = STATUS_INTERNAL_ERROR;
2915                 release_chunk_lock(c, Vcb);
2916                 goto end;
2917             }
2918 
2919             bgi = ExAllocatePoolWithTag(PagedPool, tp.item->size, ALLOC_TAG);
2920             if (!bgi) {
2921                 ERR("out of memory\n");
2922                 Status = STATUS_INSUFFICIENT_RESOURCES;
2923                 release_chunk_lock(c, Vcb);
2924                 goto end;
2925             }
2926 
2927             RtlCopyMemory(bgi, tp.item->data, tp.item->size);
2928             bgi->used = c->used;
2929 
2930 #ifdef DEBUG_PARANOID
2931             if (bgi->used & 0x8000000000000000) {
2932                 ERR("refusing to write BLOCK_GROUP_ITEM with negative usage value (%I64x)\n", bgi->used);
2933                 int3;
2934             }
2935 #endif
2936 
2937             TRACE("adjusting usage of chunk %I64x to %I64x\n", c->offset, c->used);
2938 
2939             Status = delete_tree_item(Vcb, &tp);
2940             if (!NT_SUCCESS(Status)) {
2941                 ERR("delete_tree_item returned %08lx\n", Status);
2942                 ExFreePool(bgi);
2943                 release_chunk_lock(c, Vcb);
2944                 goto end;
2945             }
2946 
2947             Status = insert_tree_item(Vcb, Vcb->extent_root, searchkey.obj_id, searchkey.obj_type, searchkey.offset, bgi, tp.item->size, NULL, Irp);
2948             if (!NT_SUCCESS(Status)) {
2949                 ERR("insert_tree_item returned %08lx\n", Status);
2950                 ExFreePool(bgi);
2951                 release_chunk_lock(c, Vcb);
2952                 goto end;
2953             }
2954 
2955             Vcb->superblock.bytes_used += c->used - c->oldused;
2956             c->oldused = c->used;
2957         }
2958 
2959         release_chunk_lock(c, Vcb);
2960 
2961         le = le->Flink;
2962     }
2963 
2964     Status = STATUS_SUCCESS;
2965 
2966 end:
2967     ExReleaseResourceLite(&Vcb->chunk_lock);
2968 
2969     return Status;
2970 }
2971 
get_first_item(tree * t,KEY * key)2972 static void get_first_item(tree* t, KEY* key) {
2973     LIST_ENTRY* le;
2974 
2975     le = t->itemlist.Flink;
2976     while (le != &t->itemlist) {
2977         tree_data* td = CONTAINING_RECORD(le, tree_data, list_entry);
2978 
2979         *key = td->key;
2980         return;
2981     }
2982 }
2983 
split_tree_at(device_extension * Vcb,tree * t,tree_data * newfirstitem,uint32_t numitems,uint32_t size)2984 static NTSTATUS split_tree_at(device_extension* Vcb, tree* t, tree_data* newfirstitem, uint32_t numitems, uint32_t size) {
2985     tree *nt, *pt;
2986     tree_data* td;
2987     tree_data* oldlastitem;
2988 
2989     TRACE("splitting tree in %I64x at (%I64x,%x,%I64x)\n", t->root->id, newfirstitem->key.obj_id, newfirstitem->key.obj_type, newfirstitem->key.offset);
2990 
2991     nt = ExAllocatePoolWithTag(PagedPool, sizeof(tree), ALLOC_TAG);
2992     if (!nt) {
2993         ERR("out of memory\n");
2994         return STATUS_INSUFFICIENT_RESOURCES;
2995     }
2996 
2997     if (t->header.level > 0) {
2998         nt->nonpaged = ExAllocatePoolWithTag(NonPagedPool, sizeof(tree_nonpaged), ALLOC_TAG);
2999         if (!nt->nonpaged) {
3000             ERR("out of memory\n");
3001             ExFreePool(nt);
3002             return STATUS_INSUFFICIENT_RESOURCES;
3003         }
3004 
3005         ExInitializeFastMutex(&nt->nonpaged->mutex);
3006     } else
3007         nt->nonpaged = NULL;
3008 
3009     RtlCopyMemory(&nt->header, &t->header, sizeof(tree_header));
3010     nt->header.address = 0;
3011     nt->header.generation = Vcb->superblock.generation;
3012     nt->header.num_items = t->header.num_items - numitems;
3013     nt->header.flags = HEADER_FLAG_MIXED_BACKREF | HEADER_FLAG_WRITTEN;
3014 
3015     nt->has_address = false;
3016     nt->Vcb = Vcb;
3017     nt->parent = t->parent;
3018 
3019 #ifdef DEBUG_PARANOID
3020     if (nt->parent && nt->parent->header.level <= nt->header.level) int3;
3021 #endif
3022 
3023     nt->root = t->root;
3024     nt->new_address = 0;
3025     nt->has_new_address = false;
3026     nt->updated_extents = false;
3027     nt->uniqueness_determined = true;
3028     nt->is_unique = true;
3029     nt->list_entry_hash.Flink = NULL;
3030     nt->buf = NULL;
3031     InitializeListHead(&nt->itemlist);
3032 
3033     oldlastitem = CONTAINING_RECORD(newfirstitem->list_entry.Blink, tree_data, list_entry);
3034 
3035     nt->itemlist.Flink = &newfirstitem->list_entry;
3036     nt->itemlist.Blink = t->itemlist.Blink;
3037     nt->itemlist.Flink->Blink = &nt->itemlist;
3038     nt->itemlist.Blink->Flink = &nt->itemlist;
3039 
3040     t->itemlist.Blink = &oldlastitem->list_entry;
3041     t->itemlist.Blink->Flink = &t->itemlist;
3042 
3043     nt->size = t->size - size;
3044     t->size = size;
3045     t->header.num_items = numitems;
3046     nt->write = true;
3047 
3048     InsertTailList(&Vcb->trees, &nt->list_entry);
3049 
3050     if (nt->header.level > 0) {
3051         LIST_ENTRY* le = nt->itemlist.Flink;
3052 
3053         while (le != &nt->itemlist) {
3054             tree_data* td2 = CONTAINING_RECORD(le, tree_data, list_entry);
3055 
3056             if (td2->treeholder.tree) {
3057                 td2->treeholder.tree->parent = nt;
3058 #ifdef DEBUG_PARANOID
3059                 if (td2->treeholder.tree->parent && td2->treeholder.tree->parent->header.level <= td2->treeholder.tree->header.level) int3;
3060 #endif
3061             }
3062 
3063             le = le->Flink;
3064         }
3065     } else {
3066         LIST_ENTRY* le = nt->itemlist.Flink;
3067 
3068         while (le != &nt->itemlist) {
3069             tree_data* td2 = CONTAINING_RECORD(le, tree_data, list_entry);
3070 
3071             if (!td2->inserted && td2->data) {
3072                 uint8_t* data = ExAllocatePoolWithTag(PagedPool, td2->size, ALLOC_TAG);
3073 
3074                 if (!data) {
3075                     ERR("out of memory\n");
3076                     return STATUS_INSUFFICIENT_RESOURCES;
3077                 }
3078 
3079                 RtlCopyMemory(data, td2->data, td2->size);
3080                 td2->data = data;
3081                 td2->inserted = true;
3082             }
3083 
3084             le = le->Flink;
3085         }
3086     }
3087 
3088     if (nt->parent) {
3089         td = ExAllocateFromPagedLookasideList(&Vcb->tree_data_lookaside);
3090         if (!td) {
3091             ERR("out of memory\n");
3092             return STATUS_INSUFFICIENT_RESOURCES;
3093         }
3094 
3095         td->key = newfirstitem->key;
3096 
3097         InsertHeadList(&t->paritem->list_entry, &td->list_entry);
3098 
3099         td->ignore = false;
3100         td->inserted = true;
3101         td->treeholder.tree = nt;
3102         nt->paritem = td;
3103 
3104         nt->parent->header.num_items++;
3105         nt->parent->size += sizeof(internal_node);
3106 
3107         goto end;
3108     }
3109 
3110     TRACE("adding new tree parent\n");
3111 
3112     if (nt->header.level == 255) {
3113         ERR("cannot add parent to tree at level 255\n");
3114         return STATUS_INTERNAL_ERROR;
3115     }
3116 
3117     pt = ExAllocatePoolWithTag(PagedPool, sizeof(tree), ALLOC_TAG);
3118     if (!pt) {
3119         ERR("out of memory\n");
3120         return STATUS_INSUFFICIENT_RESOURCES;
3121     }
3122 
3123     pt->nonpaged = ExAllocatePoolWithTag(NonPagedPool, sizeof(tree_nonpaged), ALLOC_TAG);
3124     if (!pt->nonpaged) {
3125         ERR("out of memory\n");
3126         ExFreePool(pt);
3127         return STATUS_INSUFFICIENT_RESOURCES;
3128     }
3129 
3130     ExInitializeFastMutex(&pt->nonpaged->mutex);
3131 
3132     RtlCopyMemory(&pt->header, &nt->header, sizeof(tree_header));
3133     pt->header.address = 0;
3134     pt->header.num_items = 2;
3135     pt->header.level = nt->header.level + 1;
3136     pt->header.flags = HEADER_FLAG_MIXED_BACKREF | HEADER_FLAG_WRITTEN;
3137 
3138     pt->has_address = false;
3139     pt->Vcb = Vcb;
3140     pt->parent = NULL;
3141     pt->paritem = NULL;
3142     pt->root = t->root;
3143     pt->new_address = 0;
3144     pt->has_new_address = false;
3145     pt->updated_extents = false;
3146     pt->size = pt->header.num_items * sizeof(internal_node);
3147     pt->uniqueness_determined = true;
3148     pt->is_unique = true;
3149     pt->list_entry_hash.Flink = NULL;
3150     pt->buf = NULL;
3151     InitializeListHead(&pt->itemlist);
3152 
3153     InsertTailList(&Vcb->trees, &pt->list_entry);
3154 
3155     td = ExAllocateFromPagedLookasideList(&Vcb->tree_data_lookaside);
3156     if (!td) {
3157         ERR("out of memory\n");
3158         return STATUS_INSUFFICIENT_RESOURCES;
3159     }
3160 
3161     get_first_item(t, &td->key);
3162     td->ignore = false;
3163     td->inserted = false;
3164     td->treeholder.address = 0;
3165     td->treeholder.generation = Vcb->superblock.generation;
3166     td->treeholder.tree = t;
3167     InsertTailList(&pt->itemlist, &td->list_entry);
3168     t->paritem = td;
3169 
3170     td = ExAllocateFromPagedLookasideList(&Vcb->tree_data_lookaside);
3171     if (!td) {
3172         ERR("out of memory\n");
3173         return STATUS_INSUFFICIENT_RESOURCES;
3174     }
3175 
3176     td->key = newfirstitem->key;
3177     td->ignore = false;
3178     td->inserted = false;
3179     td->treeholder.address = 0;
3180     td->treeholder.generation = Vcb->superblock.generation;
3181     td->treeholder.tree = nt;
3182     InsertTailList(&pt->itemlist, &td->list_entry);
3183     nt->paritem = td;
3184 
3185     pt->write = true;
3186 
3187     t->root->treeholder.tree = pt;
3188 
3189     t->parent = pt;
3190     nt->parent = pt;
3191 
3192 #ifdef DEBUG_PARANOID
3193     if (t->parent && t->parent->header.level <= t->header.level) int3;
3194     if (nt->parent && nt->parent->header.level <= nt->header.level) int3;
3195 #endif
3196 
3197 end:
3198     t->root->root_item.bytes_used += Vcb->superblock.node_size;
3199 
3200     return STATUS_SUCCESS;
3201 }
3202 
split_tree(device_extension * Vcb,tree * t)3203 static NTSTATUS split_tree(device_extension* Vcb, tree* t) {
3204     LIST_ENTRY* le;
3205     uint32_t size, ds, numitems;
3206 
3207     size = 0;
3208     numitems = 0;
3209 
3210     // FIXME - naïve implementation: maximizes number of filled trees
3211 
3212     le = t->itemlist.Flink;
3213     while (le != &t->itemlist) {
3214         tree_data* td = CONTAINING_RECORD(le, tree_data, list_entry);
3215 
3216         if (!td->ignore) {
3217             if (t->header.level == 0)
3218                 ds = sizeof(leaf_node) + td->size;
3219             else
3220                 ds = sizeof(internal_node);
3221 
3222             if (numitems == 0 && ds > Vcb->superblock.node_size - sizeof(tree_header)) {
3223                 ERR("(%I64x,%x,%I64x) in tree %I64x is too large (%x > %Ix)\n",
3224                     td->key.obj_id, td->key.obj_type, td->key.offset, t->root->id,
3225                     ds, Vcb->superblock.node_size - sizeof(tree_header));
3226                 return STATUS_INTERNAL_ERROR;
3227             }
3228 
3229             // FIXME - move back if previous item was deleted item with same key
3230             if (size + ds > Vcb->superblock.node_size - sizeof(tree_header))
3231                 return split_tree_at(Vcb, t, td, numitems, size);
3232 
3233             size += ds;
3234             numitems++;
3235         }
3236 
3237         le = le->Flink;
3238     }
3239 
3240     return STATUS_SUCCESS;
3241 }
3242 
is_tree_unique(device_extension * Vcb,tree * t,PIRP Irp)3243 bool is_tree_unique(device_extension* Vcb, tree* t, PIRP Irp) {
3244     KEY searchkey;
3245     traverse_ptr tp;
3246     NTSTATUS Status;
3247     bool ret = false;
3248     EXTENT_ITEM* ei;
3249     uint8_t* type;
3250 
3251     if (t->uniqueness_determined)
3252         return t->is_unique;
3253 
3254     if (t->parent && !is_tree_unique(Vcb, t->parent, Irp))
3255         goto end;
3256 
3257     if (t->has_address) {
3258         searchkey.obj_id = t->header.address;
3259         searchkey.obj_type = Vcb->superblock.incompat_flags & BTRFS_INCOMPAT_FLAGS_SKINNY_METADATA ? TYPE_METADATA_ITEM : TYPE_EXTENT_ITEM;
3260         searchkey.offset = 0xffffffffffffffff;
3261 
3262         Status = find_item(Vcb, Vcb->extent_root, &tp, &searchkey, false, Irp);
3263         if (!NT_SUCCESS(Status)) {
3264             ERR("error - find_item returned %08lx\n", Status);
3265             goto end;
3266         }
3267 
3268         if (tp.item->key.obj_id != t->header.address || (tp.item->key.obj_type != TYPE_METADATA_ITEM && tp.item->key.obj_type != TYPE_EXTENT_ITEM))
3269             goto end;
3270 
3271         if (tp.item->key.obj_type == TYPE_EXTENT_ITEM && tp.item->size == sizeof(EXTENT_ITEM_V0))
3272             goto end;
3273 
3274         if (tp.item->size < sizeof(EXTENT_ITEM))
3275             goto end;
3276 
3277         ei = (EXTENT_ITEM*)tp.item->data;
3278 
3279         if (ei->refcount > 1)
3280             goto end;
3281 
3282         if (tp.item->key.obj_type == TYPE_EXTENT_ITEM && ei->flags & EXTENT_ITEM_TREE_BLOCK) {
3283             EXTENT_ITEM2* ei2;
3284 
3285             if (tp.item->size < sizeof(EXTENT_ITEM) + sizeof(EXTENT_ITEM2))
3286                 goto end;
3287 
3288             ei2 = (EXTENT_ITEM2*)&ei[1];
3289             type = (uint8_t*)&ei2[1];
3290         } else
3291             type = (uint8_t*)&ei[1];
3292 
3293         if (type >= tp.item->data + tp.item->size || *type != TYPE_TREE_BLOCK_REF)
3294             goto end;
3295     }
3296 
3297     ret = true;
3298 
3299 end:
3300     t->is_unique = ret;
3301     t->uniqueness_determined = true;
3302 
3303     return ret;
3304 }
3305 
try_tree_amalgamate(device_extension * Vcb,tree * t,bool * done,bool * done_deletions,PIRP Irp,LIST_ENTRY * rollback)3306 static NTSTATUS try_tree_amalgamate(device_extension* Vcb, tree* t, bool* done, bool* done_deletions, PIRP Irp, LIST_ENTRY* rollback) {
3307     LIST_ENTRY* le;
3308     tree_data* nextparitem = NULL;
3309     NTSTATUS Status;
3310     tree *next_tree, *par;
3311 
3312     *done = false;
3313 
3314     TRACE("trying to amalgamate tree in root %I64x, level %x (size %u)\n", t->root->id, t->header.level, t->size);
3315 
3316     // FIXME - doesn't capture everything, as it doesn't ascend
3317     le = t->paritem->list_entry.Flink;
3318     while (le != &t->parent->itemlist) {
3319         tree_data* td = CONTAINING_RECORD(le, tree_data, list_entry);
3320 
3321         if (!td->ignore) {
3322             nextparitem = td;
3323             break;
3324         }
3325 
3326         le = le->Flink;
3327     }
3328 
3329     if (!nextparitem)
3330         return STATUS_SUCCESS;
3331 
3332     TRACE("nextparitem: key = %I64x,%x,%I64x\n", nextparitem->key.obj_id, nextparitem->key.obj_type, nextparitem->key.offset);
3333 
3334     if (!nextparitem->treeholder.tree) {
3335         Status = do_load_tree(Vcb, &nextparitem->treeholder, t->root, t->parent, nextparitem, NULL);
3336         if (!NT_SUCCESS(Status)) {
3337             ERR("do_load_tree returned %08lx\n", Status);
3338             return Status;
3339         }
3340     }
3341 
3342     if (!is_tree_unique(Vcb, nextparitem->treeholder.tree, Irp))
3343         return STATUS_SUCCESS;
3344 
3345     next_tree = nextparitem->treeholder.tree;
3346 
3347     if (!next_tree->updated_extents && next_tree->has_address) {
3348         Status = update_tree_extents(Vcb, next_tree, Irp, rollback);
3349         if (!NT_SUCCESS(Status)) {
3350             ERR("update_tree_extents returned %08lx\n", Status);
3351             return Status;
3352         }
3353     }
3354 
3355     if (t->size + next_tree->size <= Vcb->superblock.node_size - sizeof(tree_header)) {
3356         // merge two trees into one
3357 
3358         t->header.num_items += next_tree->header.num_items;
3359         t->size += next_tree->size;
3360 
3361         if (next_tree->header.level > 0) {
3362             le = next_tree->itemlist.Flink;
3363 
3364             while (le != &next_tree->itemlist) {
3365                 tree_data* td2 = CONTAINING_RECORD(le, tree_data, list_entry);
3366 
3367                 if (td2->treeholder.tree) {
3368                     td2->treeholder.tree->parent = t;
3369 #ifdef DEBUG_PARANOID
3370                     if (td2->treeholder.tree->parent && td2->treeholder.tree->parent->header.level <= td2->treeholder.tree->header.level) int3;
3371 #endif
3372                 }
3373 
3374                 td2->inserted = true;
3375                 le = le->Flink;
3376             }
3377         } else {
3378             le = next_tree->itemlist.Flink;
3379 
3380             while (le != &next_tree->itemlist) {
3381                 tree_data* td2 = CONTAINING_RECORD(le, tree_data, list_entry);
3382 
3383                 if (!td2->inserted && td2->data) {
3384                     uint8_t* data = ExAllocatePoolWithTag(PagedPool, td2->size, ALLOC_TAG);
3385 
3386                     if (!data) {
3387                         ERR("out of memory\n");
3388                         return STATUS_INSUFFICIENT_RESOURCES;
3389                     }
3390 
3391                     RtlCopyMemory(data, td2->data, td2->size);
3392                     td2->data = data;
3393                     td2->inserted = true;
3394                 }
3395 
3396                 le = le->Flink;
3397             }
3398         }
3399 
3400         t->itemlist.Blink->Flink = next_tree->itemlist.Flink;
3401         t->itemlist.Blink->Flink->Blink = t->itemlist.Blink;
3402         t->itemlist.Blink = next_tree->itemlist.Blink;
3403         t->itemlist.Blink->Flink = &t->itemlist;
3404 
3405         next_tree->itemlist.Flink = next_tree->itemlist.Blink = &next_tree->itemlist;
3406 
3407         next_tree->header.num_items = 0;
3408         next_tree->size = 0;
3409 
3410         if (next_tree->has_new_address) { // delete associated EXTENT_ITEM
3411             Status = reduce_tree_extent(Vcb, next_tree->new_address, next_tree, next_tree->parent->header.tree_id, next_tree->header.level, Irp, rollback);
3412 
3413             if (!NT_SUCCESS(Status)) {
3414                 ERR("reduce_tree_extent returned %08lx\n", Status);
3415                 return Status;
3416             }
3417         } else if (next_tree->has_address) {
3418             Status = reduce_tree_extent(Vcb, next_tree->header.address, next_tree, next_tree->parent->header.tree_id, next_tree->header.level, Irp, rollback);
3419 
3420             if (!NT_SUCCESS(Status)) {
3421                 ERR("reduce_tree_extent returned %08lx\n", Status);
3422                 return Status;
3423             }
3424         }
3425 
3426         if (!nextparitem->ignore) {
3427             nextparitem->ignore = true;
3428             next_tree->parent->header.num_items--;
3429             next_tree->parent->size -= sizeof(internal_node);
3430 
3431             *done_deletions = true;
3432         }
3433 
3434         par = next_tree->parent;
3435         while (par) {
3436             par->write = true;
3437             par = par->parent;
3438         }
3439 
3440         RemoveEntryList(&nextparitem->list_entry);
3441         ExFreePool(next_tree->paritem);
3442         next_tree->paritem = NULL;
3443 
3444         next_tree->root->root_item.bytes_used -= Vcb->superblock.node_size;
3445 
3446         free_tree(next_tree);
3447 
3448         *done = true;
3449     } else {
3450         // rebalance by moving items from second tree into first
3451         ULONG avg_size = (t->size + next_tree->size) / 2;
3452         KEY firstitem = {0, 0, 0};
3453         bool changed = false;
3454 
3455         TRACE("attempting rebalance\n");
3456 
3457         le = next_tree->itemlist.Flink;
3458         while (le != &next_tree->itemlist && t->size < avg_size && next_tree->header.num_items > 1) {
3459             tree_data* td = CONTAINING_RECORD(le, tree_data, list_entry);
3460             ULONG size;
3461 
3462             if (!td->ignore) {
3463                 if (next_tree->header.level == 0)
3464                     size = sizeof(leaf_node) + td->size;
3465                 else
3466                     size = sizeof(internal_node);
3467             } else
3468                 size = 0;
3469 
3470             if (t->size + size < Vcb->superblock.node_size - sizeof(tree_header)) {
3471                 RemoveEntryList(&td->list_entry);
3472                 InsertTailList(&t->itemlist, &td->list_entry);
3473 
3474                 if (next_tree->header.level > 0 && td->treeholder.tree) {
3475                     td->treeholder.tree->parent = t;
3476 #ifdef DEBUG_PARANOID
3477                     if (td->treeholder.tree->parent && td->treeholder.tree->parent->header.level <= td->treeholder.tree->header.level) int3;
3478 #endif
3479                 } else if (next_tree->header.level == 0 && !td->inserted && td->size > 0) {
3480                     uint8_t* data = ExAllocatePoolWithTag(PagedPool, td->size, ALLOC_TAG);
3481 
3482                     if (!data) {
3483                         ERR("out of memory\n");
3484                         return STATUS_INSUFFICIENT_RESOURCES;
3485                     }
3486 
3487                     RtlCopyMemory(data, td->data, td->size);
3488                     td->data = data;
3489                 }
3490 
3491                 td->inserted = true;
3492 
3493                 if (!td->ignore) {
3494                     next_tree->size -= size;
3495                     t->size += size;
3496                     next_tree->header.num_items--;
3497                     t->header.num_items++;
3498                 }
3499 
3500                 changed = true;
3501             } else
3502                 break;
3503 
3504             le = next_tree->itemlist.Flink;
3505         }
3506 
3507         le = next_tree->itemlist.Flink;
3508         while (le != &next_tree->itemlist) {
3509             tree_data* td = CONTAINING_RECORD(le, tree_data, list_entry);
3510 
3511             if (!td->ignore) {
3512                 firstitem = td->key;
3513                 break;
3514             }
3515 
3516             le = le->Flink;
3517         }
3518 
3519         // FIXME - once ascension is working, make this work with parent's parent, etc.
3520         if (next_tree->paritem)
3521             next_tree->paritem->key = firstitem;
3522 
3523         par = next_tree;
3524         while (par) {
3525             par->write = true;
3526             par = par->parent;
3527         }
3528 
3529         if (changed)
3530             *done = true;
3531     }
3532 
3533     return STATUS_SUCCESS;
3534 }
3535 
update_extent_level(device_extension * Vcb,uint64_t address,tree * t,uint8_t level,PIRP Irp)3536 static NTSTATUS update_extent_level(device_extension* Vcb, uint64_t address, tree* t, uint8_t level, PIRP Irp) {
3537     KEY searchkey;
3538     traverse_ptr tp;
3539     NTSTATUS Status;
3540 
3541     if (Vcb->superblock.incompat_flags & BTRFS_INCOMPAT_FLAGS_SKINNY_METADATA) {
3542         searchkey.obj_id = address;
3543         searchkey.obj_type = TYPE_METADATA_ITEM;
3544         searchkey.offset = t->header.level;
3545 
3546         Status = find_item(Vcb, Vcb->extent_root, &tp, &searchkey, false, Irp);
3547         if (!NT_SUCCESS(Status)) {
3548             ERR("error - find_item returned %08lx\n", Status);
3549             return Status;
3550         }
3551 
3552         if (!keycmp(tp.item->key, searchkey)) {
3553             EXTENT_ITEM_SKINNY_METADATA* eism;
3554 
3555             if (tp.item->size > 0) {
3556                 eism = ExAllocatePoolWithTag(PagedPool, tp.item->size, ALLOC_TAG);
3557 
3558                 if (!eism) {
3559                     ERR("out of memory\n");
3560                     return STATUS_INSUFFICIENT_RESOURCES;
3561                 }
3562 
3563                 RtlCopyMemory(eism, tp.item->data, tp.item->size);
3564             } else
3565                 eism = NULL;
3566 
3567             Status = delete_tree_item(Vcb, &tp);
3568             if (!NT_SUCCESS(Status)) {
3569                 ERR("delete_tree_item returned %08lx\n", Status);
3570                 if (eism) ExFreePool(eism);
3571                 return Status;
3572             }
3573 
3574             Status = insert_tree_item(Vcb, Vcb->extent_root, address, TYPE_METADATA_ITEM, level, eism, tp.item->size, NULL, Irp);
3575             if (!NT_SUCCESS(Status)) {
3576                 ERR("insert_tree_item returned %08lx\n", Status);
3577                 if (eism) ExFreePool(eism);
3578                 return Status;
3579             }
3580 
3581             return STATUS_SUCCESS;
3582         }
3583     }
3584 
3585     searchkey.obj_id = address;
3586     searchkey.obj_type = TYPE_EXTENT_ITEM;
3587     searchkey.offset = 0xffffffffffffffff;
3588 
3589     Status = find_item(Vcb, Vcb->extent_root, &tp, &searchkey, false, Irp);
3590     if (!NT_SUCCESS(Status)) {
3591         ERR("error - find_item returned %08lx\n", Status);
3592         return Status;
3593     }
3594 
3595     if (tp.item->key.obj_id == searchkey.obj_id && tp.item->key.obj_type == searchkey.obj_type) {
3596         EXTENT_ITEM_TREE* eit;
3597 
3598         if (tp.item->size < sizeof(EXTENT_ITEM_TREE)) {
3599             ERR("(%I64x,%x,%I64x) was %u bytes, expected at least %Iu\n", tp.item->key.obj_id, tp.item->key.obj_type, tp.item->key.offset, tp.item->size, sizeof(EXTENT_ITEM_TREE));
3600             return STATUS_INTERNAL_ERROR;
3601         }
3602 
3603         eit = ExAllocatePoolWithTag(PagedPool, tp.item->size, ALLOC_TAG);
3604 
3605         if (!eit) {
3606             ERR("out of memory\n");
3607             return STATUS_INSUFFICIENT_RESOURCES;
3608         }
3609 
3610         RtlCopyMemory(eit, tp.item->data, tp.item->size);
3611 
3612         Status = delete_tree_item(Vcb, &tp);
3613         if (!NT_SUCCESS(Status)) {
3614             ERR("delete_tree_item returned %08lx\n", Status);
3615             ExFreePool(eit);
3616             return Status;
3617         }
3618 
3619         eit->level = level;
3620 
3621         Status = insert_tree_item(Vcb, Vcb->extent_root, tp.item->key.obj_id, tp.item->key.obj_type, tp.item->key.offset, eit, tp.item->size, NULL, Irp);
3622         if (!NT_SUCCESS(Status)) {
3623             ERR("insert_tree_item returned %08lx\n", Status);
3624             ExFreePool(eit);
3625             return Status;
3626         }
3627 
3628         return STATUS_SUCCESS;
3629     }
3630 
3631     ERR("could not find EXTENT_ITEM for address %I64x\n", address);
3632 
3633     return STATUS_INTERNAL_ERROR;
3634 }
3635 
update_tree_extents_recursive(device_extension * Vcb,tree * t,PIRP Irp,LIST_ENTRY * rollback)3636 static NTSTATUS update_tree_extents_recursive(device_extension* Vcb, tree* t, PIRP Irp, LIST_ENTRY* rollback) {
3637     NTSTATUS Status;
3638 
3639     if (t->parent && !t->parent->updated_extents && t->parent->has_address) {
3640         Status = update_tree_extents_recursive(Vcb, t->parent, Irp, rollback);
3641         if (!NT_SUCCESS(Status))
3642             return Status;
3643     }
3644 
3645     Status = update_tree_extents(Vcb, t, Irp, rollback);
3646     if (!NT_SUCCESS(Status)) {
3647         ERR("update_tree_extents returned %08lx\n", Status);
3648         return Status;
3649     }
3650 
3651     return STATUS_SUCCESS;
3652 }
3653 
do_splits(device_extension * Vcb,PIRP Irp,LIST_ENTRY * rollback)3654 static NTSTATUS do_splits(device_extension* Vcb, PIRP Irp, LIST_ENTRY* rollback) {
3655     ULONG level, max_level;
3656     uint32_t min_size, min_size_fst;
3657     bool empty, done_deletions = false;
3658     NTSTATUS Status;
3659     tree* t;
3660 
3661     TRACE("(%p)\n", Vcb);
3662 
3663     max_level = 0;
3664 
3665     for (level = 0; level <= 255; level++) {
3666         LIST_ENTRY *le, *nextle;
3667 
3668         empty = true;
3669 
3670         TRACE("doing level %lu\n", level);
3671 
3672         le = Vcb->trees.Flink;
3673 
3674         while (le != &Vcb->trees) {
3675             t = CONTAINING_RECORD(le, tree, list_entry);
3676 
3677             nextle = le->Flink;
3678 
3679             if (t->write && t->header.level == level) {
3680                 empty = false;
3681 
3682                 if (t->header.num_items == 0) {
3683                     if (t->parent) {
3684                         done_deletions = true;
3685 
3686                         TRACE("deleting tree in root %I64x\n", t->root->id);
3687 
3688                         t->root->root_item.bytes_used -= Vcb->superblock.node_size;
3689 
3690                         if (t->has_new_address) { // delete associated EXTENT_ITEM
3691                             Status = reduce_tree_extent(Vcb, t->new_address, t, t->parent->header.tree_id, t->header.level, Irp, rollback);
3692 
3693                             if (!NT_SUCCESS(Status)) {
3694                                 ERR("reduce_tree_extent returned %08lx\n", Status);
3695                                 return Status;
3696                             }
3697 
3698                             t->has_new_address = false;
3699                         } else if (t->has_address) {
3700                             Status = reduce_tree_extent(Vcb,t->header.address, t, t->parent->header.tree_id, t->header.level, Irp, rollback);
3701 
3702                             if (!NT_SUCCESS(Status)) {
3703                                 ERR("reduce_tree_extent returned %08lx\n", Status);
3704                                 return Status;
3705                             }
3706 
3707                             t->has_address = false;
3708                         }
3709 
3710                         if (!t->paritem->ignore) {
3711                             t->paritem->ignore = true;
3712                             t->parent->header.num_items--;
3713                             t->parent->size -= sizeof(internal_node);
3714                         }
3715 
3716                         RemoveEntryList(&t->paritem->list_entry);
3717                         ExFreePool(t->paritem);
3718                         t->paritem = NULL;
3719 
3720                         free_tree(t);
3721                     } else if (t->header.level != 0) {
3722                         if (t->has_new_address) {
3723                             Status = update_extent_level(Vcb, t->new_address, t, 0, Irp);
3724 
3725                             if (!NT_SUCCESS(Status)) {
3726                                 ERR("update_extent_level returned %08lx\n", Status);
3727                                 return Status;
3728                             }
3729                         }
3730 
3731                         t->header.level = 0;
3732                     }
3733                 } else if (t->size > Vcb->superblock.node_size - sizeof(tree_header)) {
3734                     TRACE("splitting overlarge tree (%x > %Ix)\n", t->size, Vcb->superblock.node_size - sizeof(tree_header));
3735 
3736                     if (!t->updated_extents && t->has_address) {
3737                         Status = update_tree_extents_recursive(Vcb, t, Irp, rollback);
3738                         if (!NT_SUCCESS(Status)) {
3739                             ERR("update_tree_extents_recursive returned %08lx\n", Status);
3740                             return Status;
3741                         }
3742                     }
3743 
3744                     Status = split_tree(Vcb, t);
3745 
3746                     if (!NT_SUCCESS(Status)) {
3747                         ERR("split_tree returned %08lx\n", Status);
3748                         return Status;
3749                     }
3750                 }
3751             }
3752 
3753             le = nextle;
3754         }
3755 
3756         if (!empty) {
3757             max_level = level;
3758         } else {
3759             TRACE("nothing found for level %lu\n", level);
3760             break;
3761         }
3762     }
3763 
3764     min_size = (Vcb->superblock.node_size - sizeof(tree_header)) / 2;
3765     min_size_fst = (Vcb->superblock.node_size - sizeof(tree_header)) / 4;
3766 
3767     for (level = 0; level <= max_level; level++) {
3768         LIST_ENTRY* le;
3769 
3770         le = Vcb->trees.Flink;
3771 
3772         while (le != &Vcb->trees) {
3773             t = CONTAINING_RECORD(le, tree, list_entry);
3774 
3775             if (t->write && t->header.level == level && t->header.num_items > 0 && t->parent &&
3776                 ((t->size < min_size && t->root->id != BTRFS_ROOT_FREE_SPACE) || (t->size < min_size_fst && t->root->id == BTRFS_ROOT_FREE_SPACE)) &&
3777                 is_tree_unique(Vcb, t, Irp)) {
3778                 bool done;
3779 
3780                 do {
3781                     Status = try_tree_amalgamate(Vcb, t, &done, &done_deletions, Irp, rollback);
3782                     if (!NT_SUCCESS(Status)) {
3783                         ERR("try_tree_amalgamate returned %08lx\n", Status);
3784                         return Status;
3785                     }
3786                 } while (done && t->size < min_size);
3787             }
3788 
3789             le = le->Flink;
3790         }
3791     }
3792 
3793     // simplify trees if top tree only has one entry
3794 
3795     if (done_deletions) {
3796         for (level = max_level; level > 0; level--) {
3797             LIST_ENTRY *le, *nextle;
3798 
3799             le = Vcb->trees.Flink;
3800             while (le != &Vcb->trees) {
3801                 nextle = le->Flink;
3802                 t = CONTAINING_RECORD(le, tree, list_entry);
3803 
3804                 if (t->write && t->header.level == level) {
3805                     if (!t->parent && t->header.num_items == 1) {
3806                         LIST_ENTRY* le2 = t->itemlist.Flink;
3807                         tree_data* td = NULL;
3808                         tree* child_tree = NULL;
3809 
3810                         while (le2 != &t->itemlist) {
3811                             td = CONTAINING_RECORD(le2, tree_data, list_entry);
3812                             if (!td->ignore)
3813                                 break;
3814                             le2 = le2->Flink;
3815                         }
3816 
3817                         TRACE("deleting top-level tree in root %I64x with one item\n", t->root->id);
3818 
3819                         if (t->has_new_address) { // delete associated EXTENT_ITEM
3820                             Status = reduce_tree_extent(Vcb, t->new_address, t, t->header.tree_id, t->header.level, Irp, rollback);
3821 
3822                             if (!NT_SUCCESS(Status)) {
3823                                 ERR("reduce_tree_extent returned %08lx\n", Status);
3824                                 return Status;
3825                             }
3826 
3827                             t->has_new_address = false;
3828                         } else if (t->has_address) {
3829                             Status = reduce_tree_extent(Vcb,t->header.address, t, t->header.tree_id, t->header.level, Irp, rollback);
3830 
3831                             if (!NT_SUCCESS(Status)) {
3832                                 ERR("reduce_tree_extent returned %08lx\n", Status);
3833                                 return Status;
3834                             }
3835 
3836                             t->has_address = false;
3837                         }
3838 
3839                         if (!td->treeholder.tree) { // load first item if not already loaded
3840                             KEY searchkey = {0,0,0};
3841                             traverse_ptr tp;
3842 
3843                             Status = find_item(Vcb, t->root, &tp, &searchkey, false, Irp);
3844                             if (!NT_SUCCESS(Status)) {
3845                                 ERR("error - find_item returned %08lx\n", Status);
3846                                 return Status;
3847                             }
3848                         }
3849 
3850                         child_tree = td->treeholder.tree;
3851 
3852                         if (child_tree) {
3853                             child_tree->parent = NULL;
3854                             child_tree->paritem = NULL;
3855                         }
3856 
3857                         t->root->root_item.bytes_used -= Vcb->superblock.node_size;
3858 
3859                         free_tree(t);
3860 
3861                         if (child_tree)
3862                             child_tree->root->treeholder.tree = child_tree;
3863                     }
3864                 }
3865 
3866                 le = nextle;
3867             }
3868         }
3869     }
3870 
3871     return STATUS_SUCCESS;
3872 }
3873 
remove_root_extents(device_extension * Vcb,root * r,tree_holder * th,uint8_t level,tree * parent,PIRP Irp,LIST_ENTRY * rollback)3874 static NTSTATUS remove_root_extents(device_extension* Vcb, root* r, tree_holder* th, uint8_t level, tree* parent, PIRP Irp, LIST_ENTRY* rollback) {
3875     NTSTATUS Status;
3876 
3877     if (!th->tree) {
3878         uint8_t* buf;
3879         chunk* c;
3880 
3881         buf = ExAllocatePoolWithTag(PagedPool, Vcb->superblock.node_size, ALLOC_TAG);
3882         if (!buf) {
3883             ERR("out of memory\n");
3884             return STATUS_INSUFFICIENT_RESOURCES;
3885         }
3886 
3887         Status = read_data(Vcb, th->address, Vcb->superblock.node_size, NULL, true, buf, NULL,
3888                            &c, Irp, th->generation, false, NormalPagePriority);
3889         if (!NT_SUCCESS(Status)) {
3890             ERR("read_data returned 0x%08lx\n", Status);
3891             ExFreePool(buf);
3892             return Status;
3893         }
3894 
3895         Status = load_tree(Vcb, th->address, buf, r, &th->tree);
3896 
3897         if (!th->tree || th->tree->buf != buf)
3898             ExFreePool(buf);
3899 
3900         if (!NT_SUCCESS(Status)) {
3901             ERR("load_tree(%I64x) returned %08lx\n", th->address, Status);
3902             return Status;
3903         }
3904     }
3905 
3906     if (level > 0) {
3907         LIST_ENTRY* le = th->tree->itemlist.Flink;
3908 
3909         while (le != &th->tree->itemlist) {
3910             tree_data* td = CONTAINING_RECORD(le, tree_data, list_entry);
3911 
3912             if (!td->ignore) {
3913                 Status = remove_root_extents(Vcb, r, &td->treeholder, th->tree->header.level - 1, th->tree, Irp, rollback);
3914 
3915                 if (!NT_SUCCESS(Status)) {
3916                     ERR("remove_root_extents returned %08lx\n", Status);
3917                     return Status;
3918                 }
3919             }
3920 
3921             le = le->Flink;
3922         }
3923     }
3924 
3925     if (th->tree && !th->tree->updated_extents && th->tree->has_address) {
3926         Status = update_tree_extents(Vcb, th->tree, Irp, rollback);
3927         if (!NT_SUCCESS(Status)) {
3928             ERR("update_tree_extents returned %08lx\n", Status);
3929             return Status;
3930         }
3931     }
3932 
3933     if (!th->tree || th->tree->has_address) {
3934         Status = reduce_tree_extent(Vcb, th->address, NULL, parent ? parent->header.tree_id : r->id, level, Irp, rollback);
3935 
3936         if (!NT_SUCCESS(Status)) {
3937             ERR("reduce_tree_extent(%I64x) returned %08lx\n", th->address, Status);
3938             return Status;
3939         }
3940     }
3941 
3942     return STATUS_SUCCESS;
3943 }
3944 
drop_root(device_extension * Vcb,root * r,PIRP Irp,LIST_ENTRY * rollback)3945 static NTSTATUS drop_root(device_extension* Vcb, root* r, PIRP Irp, LIST_ENTRY* rollback) {
3946     NTSTATUS Status;
3947     KEY searchkey;
3948     traverse_ptr tp;
3949 
3950     Status = remove_root_extents(Vcb, r, &r->treeholder, r->root_item.root_level, NULL, Irp, rollback);
3951     if (!NT_SUCCESS(Status)) {
3952         ERR("remove_root_extents returned %08lx\n", Status);
3953         return Status;
3954     }
3955 
3956     // remove entries in uuid root (tree 9)
3957     if (Vcb->uuid_root) {
3958         RtlCopyMemory(&searchkey.obj_id, &r->root_item.uuid.uuid[0], sizeof(uint64_t));
3959         searchkey.obj_type = TYPE_SUBVOL_UUID;
3960         RtlCopyMemory(&searchkey.offset, &r->root_item.uuid.uuid[sizeof(uint64_t)], sizeof(uint64_t));
3961 
3962         if (searchkey.obj_id != 0 || searchkey.offset != 0) {
3963             Status = find_item(Vcb, Vcb->uuid_root, &tp, &searchkey, false, Irp);
3964             if (!NT_SUCCESS(Status)) {
3965                 WARN("find_item returned %08lx\n", Status);
3966             } else {
3967                 if (!keycmp(tp.item->key, searchkey)) {
3968                     Status = delete_tree_item(Vcb, &tp);
3969                     if (!NT_SUCCESS(Status)) {
3970                         ERR("delete_tree_item returned %08lx\n", Status);
3971                         return Status;
3972                     }
3973                 } else
3974                     WARN("could not find (%I64x,%x,%I64x) in uuid tree\n", searchkey.obj_id, searchkey.obj_type, searchkey.offset);
3975             }
3976         }
3977 
3978         if (r->root_item.rtransid > 0) {
3979             RtlCopyMemory(&searchkey.obj_id, &r->root_item.received_uuid.uuid[0], sizeof(uint64_t));
3980             searchkey.obj_type = TYPE_SUBVOL_REC_UUID;
3981             RtlCopyMemory(&searchkey.offset, &r->root_item.received_uuid.uuid[sizeof(uint64_t)], sizeof(uint64_t));
3982 
3983             Status = find_item(Vcb, Vcb->uuid_root, &tp, &searchkey, false, Irp);
3984             if (!NT_SUCCESS(Status))
3985                 WARN("find_item returned %08lx\n", Status);
3986             else {
3987                 if (!keycmp(tp.item->key, searchkey)) {
3988                     if (tp.item->size == sizeof(uint64_t)) {
3989                         uint64_t* id = (uint64_t*)tp.item->data;
3990 
3991                         if (*id == r->id) {
3992                             Status = delete_tree_item(Vcb, &tp);
3993                             if (!NT_SUCCESS(Status)) {
3994                                 ERR("delete_tree_item returned %08lx\n", Status);
3995                                 return Status;
3996                             }
3997                         }
3998                     } else if (tp.item->size > sizeof(uint64_t)) {
3999                         ULONG i;
4000                         uint64_t* ids = (uint64_t*)tp.item->data;
4001 
4002                         for (i = 0; i < tp.item->size / sizeof(uint64_t); i++) {
4003                             if (ids[i] == r->id) {
4004                                 uint64_t* ne;
4005 
4006                                 ne = ExAllocatePoolWithTag(PagedPool, tp.item->size - sizeof(uint64_t), ALLOC_TAG);
4007                                 if (!ne) {
4008                                     ERR("out of memory\n");
4009                                     return STATUS_INSUFFICIENT_RESOURCES;
4010                                 }
4011 
4012                                 if (i > 0)
4013                                     RtlCopyMemory(ne, ids, sizeof(uint64_t) * i);
4014 
4015                                 if ((i + 1) * sizeof(uint64_t) < tp.item->size)
4016                                     RtlCopyMemory(&ne[i], &ids[i + 1], tp.item->size - ((i + 1) * sizeof(uint64_t)));
4017 
4018                                 Status = delete_tree_item(Vcb, &tp);
4019                                 if (!NT_SUCCESS(Status)) {
4020                                     ERR("delete_tree_item returned %08lx\n", Status);
4021                                     ExFreePool(ne);
4022                                     return Status;
4023                                 }
4024 
4025                                 Status = insert_tree_item(Vcb, Vcb->uuid_root, tp.item->key.obj_id, tp.item->key.obj_type, tp.item->key.offset,
4026                                                           ne, tp.item->size - sizeof(uint64_t), NULL, Irp);
4027                                 if (!NT_SUCCESS(Status)) {
4028                                     ERR("insert_tree_item returned %08lx\n", Status);
4029                                     ExFreePool(ne);
4030                                     return Status;
4031                                 }
4032 
4033                                 break;
4034                             }
4035                         }
4036                     }
4037                 } else
4038                     WARN("could not find (%I64x,%x,%I64x) in uuid tree\n", searchkey.obj_id, searchkey.obj_type, searchkey.offset);
4039             }
4040         }
4041     }
4042 
4043     // delete ROOT_ITEM
4044 
4045     searchkey.obj_id = r->id;
4046     searchkey.obj_type = TYPE_ROOT_ITEM;
4047     searchkey.offset = 0xffffffffffffffff;
4048 
4049     Status = find_item(Vcb, Vcb->root_root, &tp, &searchkey, false, Irp);
4050     if (!NT_SUCCESS(Status)) {
4051         ERR("find_item returned %08lx\n", Status);
4052         return Status;
4053     }
4054 
4055     if (tp.item->key.obj_id == searchkey.obj_id && tp.item->key.obj_type == searchkey.obj_type) {
4056         Status = delete_tree_item(Vcb, &tp);
4057 
4058         if (!NT_SUCCESS(Status)) {
4059             ERR("delete_tree_item returned %08lx\n", Status);
4060             return Status;
4061         }
4062     } else
4063         WARN("could not find (%I64x,%x,%I64x) in root_root\n", searchkey.obj_id, searchkey.obj_type, searchkey.offset);
4064 
4065     // delete items in tree cache
4066 
4067     free_trees_root(Vcb, r);
4068 
4069     return STATUS_SUCCESS;
4070 }
4071 
drop_roots(device_extension * Vcb,PIRP Irp,LIST_ENTRY * rollback)4072 static NTSTATUS drop_roots(device_extension* Vcb, PIRP Irp, LIST_ENTRY* rollback) {
4073     LIST_ENTRY *le = Vcb->drop_roots.Flink, *le2;
4074     NTSTATUS Status;
4075 
4076     while (le != &Vcb->drop_roots) {
4077         root* r = CONTAINING_RECORD(le, root, list_entry);
4078 
4079         le2 = le->Flink;
4080 
4081         Status = drop_root(Vcb, r, Irp, rollback);
4082         if (!NT_SUCCESS(Status)) {
4083             ERR("drop_root(%I64x) returned %08lx\n", r->id, Status);
4084             return Status;
4085         }
4086 
4087         le = le2;
4088     }
4089 
4090     return STATUS_SUCCESS;
4091 }
4092 
update_dev_item(device_extension * Vcb,device * device,PIRP Irp)4093 NTSTATUS update_dev_item(device_extension* Vcb, device* device, PIRP Irp) {
4094     KEY searchkey;
4095     traverse_ptr tp;
4096     DEV_ITEM* di;
4097     NTSTATUS Status;
4098 
4099     searchkey.obj_id = 1;
4100     searchkey.obj_type = TYPE_DEV_ITEM;
4101     searchkey.offset = device->devitem.dev_id;
4102 
4103     Status = find_item(Vcb, Vcb->chunk_root, &tp, &searchkey, false, Irp);
4104     if (!NT_SUCCESS(Status)) {
4105         ERR("error - find_item returned %08lx\n", Status);
4106         return Status;
4107     }
4108 
4109     if (keycmp(tp.item->key, searchkey)) {
4110         ERR("error - could not find DEV_ITEM for device %I64x\n", device->devitem.dev_id);
4111         return STATUS_INTERNAL_ERROR;
4112     }
4113 
4114     Status = delete_tree_item(Vcb, &tp);
4115     if (!NT_SUCCESS(Status)) {
4116         ERR("delete_tree_item returned %08lx\n", Status);
4117         return Status;
4118     }
4119 
4120     di = ExAllocatePoolWithTag(PagedPool, sizeof(DEV_ITEM), ALLOC_TAG);
4121     if (!di) {
4122         ERR("out of memory\n");
4123         return STATUS_INSUFFICIENT_RESOURCES;
4124     }
4125 
4126     RtlCopyMemory(di, &device->devitem, sizeof(DEV_ITEM));
4127 
4128     Status = insert_tree_item(Vcb, Vcb->chunk_root, 1, TYPE_DEV_ITEM, device->devitem.dev_id, di, sizeof(DEV_ITEM), NULL, Irp);
4129     if (!NT_SUCCESS(Status)) {
4130         ERR("insert_tree_item returned %08lx\n", Status);
4131         ExFreePool(di);
4132         return Status;
4133     }
4134 
4135     return STATUS_SUCCESS;
4136 }
4137 
regen_bootstrap(device_extension * Vcb)4138 static void regen_bootstrap(device_extension* Vcb) {
4139     sys_chunk* sc2;
4140     USHORT i = 0;
4141     LIST_ENTRY* le;
4142 
4143     i = 0;
4144     le = Vcb->sys_chunks.Flink;
4145     while (le != &Vcb->sys_chunks) {
4146         sc2 = CONTAINING_RECORD(le, sys_chunk, list_entry);
4147 
4148         TRACE("%I64x,%x,%I64x\n", sc2->key.obj_id, sc2->key.obj_type, sc2->key.offset);
4149 
4150         RtlCopyMemory(&Vcb->superblock.sys_chunk_array[i], &sc2->key, sizeof(KEY));
4151         i += sizeof(KEY);
4152 
4153         RtlCopyMemory(&Vcb->superblock.sys_chunk_array[i], sc2->data, sc2->size);
4154         i += sc2->size;
4155 
4156         le = le->Flink;
4157     }
4158 }
4159 
add_to_bootstrap(device_extension * Vcb,uint64_t obj_id,uint8_t obj_type,uint64_t offset,void * data,uint16_t size)4160 static NTSTATUS add_to_bootstrap(device_extension* Vcb, uint64_t obj_id, uint8_t obj_type, uint64_t offset, void* data, uint16_t size) {
4161     sys_chunk* sc;
4162     LIST_ENTRY* le;
4163 
4164     if (Vcb->superblock.n + sizeof(KEY) + size > SYS_CHUNK_ARRAY_SIZE) {
4165         ERR("error - bootstrap is full\n");
4166         return STATUS_INTERNAL_ERROR;
4167     }
4168 
4169     sc = ExAllocatePoolWithTag(PagedPool, sizeof(sys_chunk), ALLOC_TAG);
4170     if (!sc) {
4171         ERR("out of memory\n");
4172         return STATUS_INSUFFICIENT_RESOURCES;
4173     }
4174 
4175     sc->key.obj_id = obj_id;
4176     sc->key.obj_type = obj_type;
4177     sc->key.offset = offset;
4178     sc->size = size;
4179     sc->data = ExAllocatePoolWithTag(PagedPool, sc->size, ALLOC_TAG);
4180     if (!sc->data) {
4181         ERR("out of memory\n");
4182         ExFreePool(sc);
4183         return STATUS_INSUFFICIENT_RESOURCES;
4184     }
4185 
4186     RtlCopyMemory(sc->data, data, sc->size);
4187 
4188     le = Vcb->sys_chunks.Flink;
4189     while (le != &Vcb->sys_chunks) {
4190         sys_chunk* sc2 = CONTAINING_RECORD(le, sys_chunk, list_entry);
4191 
4192         if (keycmp(sc2->key, sc->key) == 1)
4193             break;
4194 
4195         le = le->Flink;
4196     }
4197     InsertTailList(le, &sc->list_entry);
4198 
4199     Vcb->superblock.n += sizeof(KEY) + size;
4200 
4201     regen_bootstrap(Vcb);
4202 
4203     return STATUS_SUCCESS;
4204 }
4205 
create_chunk(device_extension * Vcb,chunk * c,PIRP Irp)4206 static NTSTATUS create_chunk(device_extension* Vcb, chunk* c, PIRP Irp) {
4207     CHUNK_ITEM* ci;
4208     CHUNK_ITEM_STRIPE* cis;
4209     BLOCK_GROUP_ITEM* bgi;
4210     uint16_t i, factor;
4211     NTSTATUS Status;
4212 
4213     ci = ExAllocatePoolWithTag(PagedPool, c->size, ALLOC_TAG);
4214     if (!ci) {
4215         ERR("out of memory\n");
4216         return STATUS_INSUFFICIENT_RESOURCES;
4217     }
4218 
4219     RtlCopyMemory(ci, c->chunk_item, c->size);
4220 
4221     Status = insert_tree_item(Vcb, Vcb->chunk_root, 0x100, TYPE_CHUNK_ITEM, c->offset, ci, c->size, NULL, Irp);
4222     if (!NT_SUCCESS(Status)) {
4223         ERR("insert_tree_item failed\n");
4224         ExFreePool(ci);
4225         return Status;
4226     }
4227 
4228     if (c->chunk_item->type & BLOCK_FLAG_SYSTEM) {
4229         Status = add_to_bootstrap(Vcb, 0x100, TYPE_CHUNK_ITEM, c->offset, ci, c->size);
4230         if (!NT_SUCCESS(Status)) {
4231             ERR("add_to_bootstrap returned %08lx\n", Status);
4232             return Status;
4233         }
4234     }
4235 
4236     // add BLOCK_GROUP_ITEM to tree 2
4237 
4238     bgi = ExAllocatePoolWithTag(PagedPool, sizeof(BLOCK_GROUP_ITEM), ALLOC_TAG);
4239     if (!bgi) {
4240         ERR("out of memory\n");
4241         return STATUS_INSUFFICIENT_RESOURCES;
4242     }
4243 
4244     bgi->used = c->used;
4245     bgi->chunk_tree = 0x100;
4246     bgi->flags = c->chunk_item->type;
4247 
4248     Status = insert_tree_item(Vcb, Vcb->extent_root, c->offset, TYPE_BLOCK_GROUP_ITEM, c->chunk_item->size, bgi, sizeof(BLOCK_GROUP_ITEM), NULL, Irp);
4249     if (!NT_SUCCESS(Status)) {
4250         ERR("insert_tree_item failed\n");
4251         ExFreePool(bgi);
4252         return Status;
4253     }
4254 
4255     if (c->chunk_item->type & BLOCK_FLAG_RAID0)
4256         factor = c->chunk_item->num_stripes;
4257     else if (c->chunk_item->type & BLOCK_FLAG_RAID10)
4258         factor = c->chunk_item->num_stripes / c->chunk_item->sub_stripes;
4259     else if (c->chunk_item->type & BLOCK_FLAG_RAID5)
4260         factor = c->chunk_item->num_stripes - 1;
4261     else if (c->chunk_item->type & BLOCK_FLAG_RAID6)
4262         factor = c->chunk_item->num_stripes - 2;
4263     else // SINGLE, DUPLICATE, RAID1, RAID1C3, RAID1C4
4264         factor = 1;
4265 
4266     // add DEV_EXTENTs to tree 4
4267 
4268     cis = (CHUNK_ITEM_STRIPE*)&c->chunk_item[1];
4269 
4270     for (i = 0; i < c->chunk_item->num_stripes; i++) {
4271         DEV_EXTENT* de;
4272 
4273         de = ExAllocatePoolWithTag(PagedPool, sizeof(DEV_EXTENT), ALLOC_TAG);
4274         if (!de) {
4275             ERR("out of memory\n");
4276             return STATUS_INSUFFICIENT_RESOURCES;
4277         }
4278 
4279         de->chunktree = Vcb->chunk_root->id;
4280         de->objid = 0x100;
4281         de->address = c->offset;
4282         de->length = c->chunk_item->size / factor;
4283         de->chunktree_uuid = Vcb->chunk_root->treeholder.tree->header.chunk_tree_uuid;
4284 
4285         Status = insert_tree_item(Vcb, Vcb->dev_root, c->devices[i]->devitem.dev_id, TYPE_DEV_EXTENT, cis[i].offset, de, sizeof(DEV_EXTENT), NULL, Irp);
4286         if (!NT_SUCCESS(Status)) {
4287             ERR("insert_tree_item returned %08lx\n", Status);
4288             ExFreePool(de);
4289             return Status;
4290         }
4291 
4292         // FIXME - no point in calling this twice for the same device
4293         Status = update_dev_item(Vcb, c->devices[i], Irp);
4294         if (!NT_SUCCESS(Status)) {
4295             ERR("update_dev_item returned %08lx\n", Status);
4296             return Status;
4297         }
4298     }
4299 
4300     c->created = false;
4301     c->oldused = c->used;
4302 
4303     Vcb->superblock.bytes_used += c->used;
4304 
4305     return STATUS_SUCCESS;
4306 }
4307 
remove_from_bootstrap(device_extension * Vcb,uint64_t obj_id,uint8_t obj_type,uint64_t offset)4308 static void remove_from_bootstrap(device_extension* Vcb, uint64_t obj_id, uint8_t obj_type, uint64_t offset) {
4309     sys_chunk* sc2;
4310     LIST_ENTRY* le;
4311 
4312     le = Vcb->sys_chunks.Flink;
4313     while (le != &Vcb->sys_chunks) {
4314         sc2 = CONTAINING_RECORD(le, sys_chunk, list_entry);
4315 
4316         if (sc2->key.obj_id == obj_id && sc2->key.obj_type == obj_type && sc2->key.offset == offset) {
4317             RemoveEntryList(&sc2->list_entry);
4318 
4319             Vcb->superblock.n -= sizeof(KEY) + sc2->size;
4320 
4321             ExFreePool(sc2->data);
4322             ExFreePool(sc2);
4323             regen_bootstrap(Vcb);
4324             return;
4325         }
4326 
4327         le = le->Flink;
4328     }
4329 }
4330 
set_xattr(device_extension * Vcb,LIST_ENTRY * batchlist,root * subvol,uint64_t inode,char * name,uint16_t namelen,uint32_t crc32,uint8_t * data,uint16_t datalen)4331 static NTSTATUS set_xattr(device_extension* Vcb, LIST_ENTRY* batchlist, root* subvol, uint64_t inode, char* name, uint16_t namelen,
4332                           uint32_t crc32, uint8_t* data, uint16_t datalen) {
4333     NTSTATUS Status;
4334     uint16_t xasize;
4335     DIR_ITEM* xa;
4336 
4337     TRACE("(%p, %I64x, %I64x, %.*s, %08x, %p, %u)\n", Vcb, subvol->id, inode, namelen, name, crc32, data, datalen);
4338 
4339     xasize = (uint16_t)offsetof(DIR_ITEM, name[0]) + namelen + datalen;
4340 
4341     xa = ExAllocatePoolWithTag(PagedPool, xasize, ALLOC_TAG);
4342     if (!xa) {
4343         ERR("out of memory\n");
4344         return STATUS_INSUFFICIENT_RESOURCES;
4345     }
4346 
4347     xa->key.obj_id = 0;
4348     xa->key.obj_type = 0;
4349     xa->key.offset = 0;
4350     xa->transid = Vcb->superblock.generation;
4351     xa->m = datalen;
4352     xa->n = namelen;
4353     xa->type = BTRFS_TYPE_EA;
4354     RtlCopyMemory(xa->name, name, namelen);
4355     RtlCopyMemory(xa->name + namelen, data, datalen);
4356 
4357     Status = insert_tree_item_batch(batchlist, Vcb, subvol, inode, TYPE_XATTR_ITEM, crc32, xa, xasize, Batch_SetXattr);
4358     if (!NT_SUCCESS(Status)) {
4359         ERR("insert_tree_item_batch returned %08lx\n", Status);
4360         ExFreePool(xa);
4361         return Status;
4362     }
4363 
4364     return STATUS_SUCCESS;
4365 }
4366 
delete_xattr(device_extension * Vcb,LIST_ENTRY * batchlist,root * subvol,uint64_t inode,char * name,uint16_t namelen,uint32_t crc32)4367 static NTSTATUS delete_xattr(device_extension* Vcb, LIST_ENTRY* batchlist, root* subvol, uint64_t inode, char* name,
4368                              uint16_t namelen, uint32_t crc32) {
4369     NTSTATUS Status;
4370     uint16_t xasize;
4371     DIR_ITEM* xa;
4372 
4373     TRACE("(%p, %I64x, %I64x, %.*s, %08x)\n", Vcb, subvol->id, inode, namelen, name, crc32);
4374 
4375     xasize = (uint16_t)offsetof(DIR_ITEM, name[0]) + namelen;
4376 
4377     xa = ExAllocatePoolWithTag(PagedPool, xasize, ALLOC_TAG);
4378     if (!xa) {
4379         ERR("out of memory\n");
4380         return STATUS_INSUFFICIENT_RESOURCES;
4381     }
4382 
4383     xa->key.obj_id = 0;
4384     xa->key.obj_type = 0;
4385     xa->key.offset = 0;
4386     xa->transid = Vcb->superblock.generation;
4387     xa->m = 0;
4388     xa->n = namelen;
4389     xa->type = BTRFS_TYPE_EA;
4390     RtlCopyMemory(xa->name, name, namelen);
4391 
4392     Status = insert_tree_item_batch(batchlist, Vcb, subvol, inode, TYPE_XATTR_ITEM, crc32, xa, xasize, Batch_DeleteXattr);
4393     if (!NT_SUCCESS(Status)) {
4394         ERR("insert_tree_item_batch returned %08lx\n", Status);
4395         ExFreePool(xa);
4396         return Status;
4397     }
4398 
4399     return STATUS_SUCCESS;
4400 }
4401 
insert_sparse_extent(fcb * fcb,LIST_ENTRY * batchlist,uint64_t start,uint64_t length)4402 static NTSTATUS insert_sparse_extent(fcb* fcb, LIST_ENTRY* batchlist, uint64_t start, uint64_t length) {
4403     NTSTATUS Status;
4404     EXTENT_DATA* ed;
4405     EXTENT_DATA2* ed2;
4406 
4407     TRACE("((%I64x, %I64x), %I64x, %I64x)\n", fcb->subvol->id, fcb->inode, start, length);
4408 
4409     ed = ExAllocatePoolWithTag(PagedPool, sizeof(EXTENT_DATA) - 1 + sizeof(EXTENT_DATA2), ALLOC_TAG);
4410     if (!ed) {
4411         ERR("out of memory\n");
4412         return STATUS_INSUFFICIENT_RESOURCES;
4413     }
4414 
4415     ed->generation = fcb->Vcb->superblock.generation;
4416     ed->decoded_size = length;
4417     ed->compression = BTRFS_COMPRESSION_NONE;
4418     ed->encryption = BTRFS_ENCRYPTION_NONE;
4419     ed->encoding = BTRFS_ENCODING_NONE;
4420     ed->type = EXTENT_TYPE_REGULAR;
4421 
4422     ed2 = (EXTENT_DATA2*)ed->data;
4423     ed2->address = 0;
4424     ed2->size = 0;
4425     ed2->offset = 0;
4426     ed2->num_bytes = length;
4427 
4428     Status = insert_tree_item_batch(batchlist, fcb->Vcb, fcb->subvol, fcb->inode, TYPE_EXTENT_DATA, start, ed, sizeof(EXTENT_DATA) - 1 + sizeof(EXTENT_DATA2), Batch_Insert);
4429     if (!NT_SUCCESS(Status)) {
4430         ERR("insert_tree_item_batch returned %08lx\n", Status);
4431         ExFreePool(ed);
4432         return Status;
4433     }
4434 
4435     return STATUS_SUCCESS;
4436 }
4437 
split_batch_item_list(batch_item_ind * bii)4438 static NTSTATUS split_batch_item_list(batch_item_ind* bii) {
4439     LIST_ENTRY* le;
4440     unsigned int i = 0;
4441     LIST_ENTRY* midpoint = NULL;
4442     batch_item_ind* bii2;
4443     batch_item* midpoint_item;
4444     LIST_ENTRY* before_midpoint;
4445 
4446     le = bii->items.Flink;
4447     while (le != &bii->items) {
4448         if (i >= bii->num_items / 2) {
4449             midpoint = le;
4450             break;
4451         }
4452 
4453         i++;
4454 
4455         le = le->Flink;
4456     }
4457 
4458     if (!midpoint)
4459         return STATUS_SUCCESS;
4460 
4461     // make sure items on either side of split don't have same key
4462 
4463     while (midpoint->Blink != &bii->items) {
4464         batch_item* item = CONTAINING_RECORD(midpoint, batch_item, list_entry);
4465         batch_item* prev = CONTAINING_RECORD(midpoint->Blink, batch_item, list_entry);
4466 
4467         if (item->key.obj_id != prev->key.obj_id)
4468             break;
4469 
4470         if (item->key.obj_type != prev->key.obj_type)
4471             break;
4472 
4473         if (item->key.offset != prev->key.offset)
4474             break;
4475 
4476         midpoint = midpoint->Blink;
4477         i--;
4478     }
4479 
4480     if (midpoint->Blink == &bii->items)
4481         return STATUS_SUCCESS;
4482 
4483     bii2 = ExAllocatePoolWithTag(PagedPool, sizeof(batch_item_ind), ALLOC_TAG);
4484     if (!bii2) {
4485         ERR("out of memory\n");
4486         return STATUS_INSUFFICIENT_RESOURCES;
4487     }
4488 
4489     midpoint_item = CONTAINING_RECORD(midpoint, batch_item, list_entry);
4490 
4491     bii2->key.obj_id = midpoint_item->key.obj_id;
4492     bii2->key.obj_type = midpoint_item->key.obj_type;
4493     bii2->key.offset = midpoint_item->key.offset;
4494 
4495     bii2->num_items = bii->num_items - i;
4496     bii->num_items = i;
4497 
4498     before_midpoint = midpoint->Blink;
4499 
4500     bii2->items.Flink = midpoint;
4501     midpoint->Blink = &bii2->items;
4502     bii2->items.Blink = bii->items.Blink;
4503     bii->items.Blink->Flink = &bii2->items;
4504 
4505     bii->items.Blink = before_midpoint;
4506     before_midpoint->Flink = &bii->items;
4507 
4508     InsertHeadList(&bii->list_entry, &bii2->list_entry);
4509 
4510     return STATUS_SUCCESS;
4511 }
4512 
4513 #ifdef _MSC_VER
4514 #pragma warning(push)
4515 #pragma warning(suppress: 28194)
4516 #endif
4517 static NTSTATUS insert_tree_item_batch(LIST_ENTRY* batchlist, device_extension* Vcb, root* r, uint64_t objid,
4518                                        uint8_t objtype, uint64_t offset, _In_opt_ _When_(return >= 0, __drv_aliasesMem) void* data,
4519                                        uint16_t datalen, enum batch_operation operation) {
4520     LIST_ENTRY* le;
4521     batch_root* br = NULL;
4522     batch_item* bi;
4523 
4524     le = batchlist->Flink;
4525     while (le != batchlist) {
4526         batch_root* br2 = CONTAINING_RECORD(le, batch_root, list_entry);
4527 
4528         if (br2->r == r) {
4529             br = br2;
4530             break;
4531         }
4532 
4533         le = le->Flink;
4534     }
4535 
4536     if (!br) {
4537         br = ExAllocatePoolWithTag(PagedPool, sizeof(batch_root), ALLOC_TAG);
4538         if (!br) {
4539             ERR("out of memory\n");
4540             return STATUS_INSUFFICIENT_RESOURCES;
4541         }
4542 
4543         br->r = r;
4544         InitializeListHead(&br->items_ind);
4545         InsertTailList(batchlist, &br->list_entry);
4546     }
4547 
4548     if (IsListEmpty(&br->items_ind)) {
4549         batch_item_ind* bii;
4550 
4551         bii = ExAllocatePoolWithTag(PagedPool, sizeof(batch_item_ind), ALLOC_TAG);
4552         if (!bii) {
4553             ERR("out of memory\n");
4554             return STATUS_INSUFFICIENT_RESOURCES;
4555         }
4556 
4557         bii->key.obj_id = 0;
4558         bii->key.obj_type = 0;
4559         bii->key.offset = 0;
4560         InitializeListHead(&bii->items);
4561         bii->num_items = 0;
4562         InsertTailList(&br->items_ind, &bii->list_entry);
4563     }
4564 
4565     bi = ExAllocateFromPagedLookasideList(&Vcb->batch_item_lookaside);
4566     if (!bi) {
4567         ERR("out of memory\n");
4568         return STATUS_INSUFFICIENT_RESOURCES;
4569     }
4570 
4571     bi->key.obj_id = objid;
4572     bi->key.obj_type = objtype;
4573     bi->key.offset = offset;
4574     bi->data = data;
4575     bi->datalen = datalen;
4576     bi->operation = operation;
4577 
4578     le = br->items_ind.Blink;
4579     while (le != &br->items_ind) {
4580         LIST_ENTRY* le2;
4581         batch_item_ind* bii = CONTAINING_RECORD(le, batch_item_ind, list_entry);
4582 
4583         if (keycmp(bii->key, bi->key) == 1) {
4584             le = le->Blink;
4585             continue;
4586         }
4587 
4588         le2 = bii->items.Blink;
4589         while (le2 != &bii->items) {
4590             batch_item* bi2 = CONTAINING_RECORD(le2, batch_item, list_entry);
4591             int cmp = keycmp(bi2->key, bi->key);
4592 
4593             if (cmp == -1 || (cmp == 0 && bi->operation >= bi2->operation)) {
4594                 InsertHeadList(&bi2->list_entry, &bi->list_entry);
4595                 bii->num_items++;
4596                 goto end;
4597             }
4598 
4599             le2 = le2->Blink;
4600         }
4601 
4602         InsertHeadList(&bii->items, &bi->list_entry);
4603         bii->num_items++;
4604 
4605 end:
4606         if (bii->num_items > BATCH_ITEM_LIMIT)
4607             return split_batch_item_list(bii);
4608 
4609         return STATUS_SUCCESS;
4610     }
4611 
4612     return STATUS_INTERNAL_ERROR;
4613 }
4614 #ifdef _MSC_VER
4615 #pragma warning(pop)
4616 #endif
4617 
4618 typedef struct {
4619     uint64_t address;
4620     uint64_t length;
4621     uint64_t offset;
4622     bool changed;
4623     chunk* chunk;
4624     uint64_t skip_start;
4625     uint64_t skip_end;
4626     LIST_ENTRY list_entry;
4627 } extent_range;
4628 
rationalize_extents(fcb * fcb,PIRP Irp)4629 static void rationalize_extents(fcb* fcb, PIRP Irp) {
4630     LIST_ENTRY* le;
4631     LIST_ENTRY extent_ranges;
4632     extent_range* er;
4633     bool changed = false, truncating = false;
4634     uint32_t num_extents = 0;
4635 
4636     InitializeListHead(&extent_ranges);
4637 
4638     le = fcb->extents.Flink;
4639     while (le != &fcb->extents) {
4640         extent* ext = CONTAINING_RECORD(le, extent, list_entry);
4641 
4642         if ((ext->extent_data.type == EXTENT_TYPE_REGULAR || ext->extent_data.type == EXTENT_TYPE_PREALLOC) && ext->extent_data.compression == BTRFS_COMPRESSION_NONE && ext->unique) {
4643             EXTENT_DATA2* ed2 = (EXTENT_DATA2*)ext->extent_data.data;
4644 
4645             if (ed2->size != 0) {
4646                 LIST_ENTRY* le2;
4647 
4648                 le2 = extent_ranges.Flink;
4649                 while (le2 != &extent_ranges) {
4650                     extent_range* er2 = CONTAINING_RECORD(le2, extent_range, list_entry);
4651 
4652                     if (er2->address == ed2->address) {
4653                         er2->skip_start = min(er2->skip_start, ed2->offset);
4654                         er2->skip_end = min(er2->skip_end, ed2->size - ed2->offset - ed2->num_bytes);
4655                         goto cont;
4656                     } else if (er2->address > ed2->address)
4657                         break;
4658 
4659                     le2 = le2->Flink;
4660                 }
4661 
4662                 er = ExAllocatePoolWithTag(PagedPool, sizeof(extent_range), ALLOC_TAG); // FIXME - should be from lookaside?
4663                 if (!er) {
4664                     ERR("out of memory\n");
4665                     goto end;
4666                 }
4667 
4668                 er->address = ed2->address;
4669                 er->length = ed2->size;
4670                 er->offset = ext->offset - ed2->offset;
4671                 er->changed = false;
4672                 er->chunk = NULL;
4673                 er->skip_start = ed2->offset;
4674                 er->skip_end = ed2->size - ed2->offset - ed2->num_bytes;
4675 
4676                 if (er->skip_start != 0 || er->skip_end != 0)
4677                     truncating = true;
4678 
4679                 InsertHeadList(le2->Blink, &er->list_entry);
4680                 num_extents++;
4681             }
4682         }
4683 
4684 cont:
4685         le = le->Flink;
4686     }
4687 
4688     if (num_extents == 0 || (num_extents == 1 && !truncating))
4689         goto end;
4690 
4691     le = extent_ranges.Flink;
4692     while (le != &extent_ranges) {
4693         er = CONTAINING_RECORD(le, extent_range, list_entry);
4694 
4695         if (!er->chunk) {
4696             LIST_ENTRY* le2;
4697 
4698             er->chunk = get_chunk_from_address(fcb->Vcb, er->address);
4699 
4700             if (!er->chunk) {
4701                 ERR("get_chunk_from_address(%I64x) failed\n", er->address);
4702                 goto end;
4703             }
4704 
4705             le2 = le->Flink;
4706             while (le2 != &extent_ranges) {
4707                 extent_range* er2 = CONTAINING_RECORD(le2, extent_range, list_entry);
4708 
4709                 if (!er2->chunk && er2->address >= er->chunk->offset && er2->address < er->chunk->offset + er->chunk->chunk_item->size)
4710                     er2->chunk = er->chunk;
4711 
4712                 le2 = le2->Flink;
4713             }
4714         }
4715 
4716         le = le->Flink;
4717     }
4718 
4719     if (truncating) {
4720         // truncate beginning or end of extent if unused
4721 
4722         le = extent_ranges.Flink;
4723         while (le != &extent_ranges) {
4724             er = CONTAINING_RECORD(le, extent_range, list_entry);
4725 
4726             if (er->skip_start > 0) {
4727                 LIST_ENTRY* le2 = fcb->extents.Flink;
4728                 while (le2 != &fcb->extents) {
4729                     extent* ext = CONTAINING_RECORD(le2, extent, list_entry);
4730 
4731                     if ((ext->extent_data.type == EXTENT_TYPE_REGULAR || ext->extent_data.type == EXTENT_TYPE_PREALLOC) && ext->extent_data.compression == BTRFS_COMPRESSION_NONE && ext->unique) {
4732                         EXTENT_DATA2* ed2 = (EXTENT_DATA2*)ext->extent_data.data;
4733 
4734                         if (ed2->size != 0 && ed2->address == er->address) {
4735                             NTSTATUS Status;
4736 
4737                             Status = update_changed_extent_ref(fcb->Vcb, er->chunk, ed2->address, ed2->size, fcb->subvol->id, fcb->inode, ext->offset - ed2->offset,
4738                                                                -1, fcb->inode_item.flags & BTRFS_INODE_NODATASUM, true, Irp);
4739                             if (!NT_SUCCESS(Status)) {
4740                                 ERR("update_changed_extent_ref returned %08lx\n", Status);
4741                                 goto end;
4742                             }
4743 
4744                             ext->extent_data.decoded_size -= er->skip_start;
4745                             ed2->size -= er->skip_start;
4746                             ed2->address += er->skip_start;
4747                             ed2->offset -= er->skip_start;
4748 
4749                             add_changed_extent_ref(er->chunk, ed2->address, ed2->size, fcb->subvol->id, fcb->inode, ext->offset - ed2->offset,
4750                                                    1, fcb->inode_item.flags & BTRFS_INODE_NODATASUM);
4751                         }
4752                     }
4753 
4754                     le2 = le2->Flink;
4755                 }
4756 
4757                 if (!(fcb->inode_item.flags & BTRFS_INODE_NODATASUM))
4758                     add_checksum_entry(fcb->Vcb, er->address, (ULONG)(er->skip_start >> fcb->Vcb->sector_shift), NULL, NULL);
4759 
4760                 acquire_chunk_lock(er->chunk, fcb->Vcb);
4761 
4762                 if (!er->chunk->cache_loaded) {
4763                     NTSTATUS Status = load_cache_chunk(fcb->Vcb, er->chunk, NULL);
4764 
4765                     if (!NT_SUCCESS(Status)) {
4766                         ERR("load_cache_chunk returned %08lx\n", Status);
4767                         release_chunk_lock(er->chunk, fcb->Vcb);
4768                         goto end;
4769                     }
4770                 }
4771 
4772                 er->chunk->used -= er->skip_start;
4773 
4774                 space_list_add(er->chunk, er->address, er->skip_start, NULL);
4775 
4776                 release_chunk_lock(er->chunk, fcb->Vcb);
4777 
4778                 er->address += er->skip_start;
4779                 er->length -= er->skip_start;
4780             }
4781 
4782             if (er->skip_end > 0) {
4783                 LIST_ENTRY* le2 = fcb->extents.Flink;
4784                 while (le2 != &fcb->extents) {
4785                     extent* ext = CONTAINING_RECORD(le2, extent, list_entry);
4786 
4787                     if ((ext->extent_data.type == EXTENT_TYPE_REGULAR || ext->extent_data.type == EXTENT_TYPE_PREALLOC) && ext->extent_data.compression == BTRFS_COMPRESSION_NONE && ext->unique) {
4788                         EXTENT_DATA2* ed2 = (EXTENT_DATA2*)ext->extent_data.data;
4789 
4790                         if (ed2->size != 0 && ed2->address == er->address) {
4791                             NTSTATUS Status;
4792 
4793                             Status = update_changed_extent_ref(fcb->Vcb, er->chunk, ed2->address, ed2->size, fcb->subvol->id, fcb->inode, ext->offset - ed2->offset,
4794                                                                -1, fcb->inode_item.flags & BTRFS_INODE_NODATASUM, true, Irp);
4795                             if (!NT_SUCCESS(Status)) {
4796                                 ERR("update_changed_extent_ref returned %08lx\n", Status);
4797                                 goto end;
4798                             }
4799 
4800                             ext->extent_data.decoded_size -= er->skip_end;
4801                             ed2->size -= er->skip_end;
4802 
4803                             add_changed_extent_ref(er->chunk, ed2->address, ed2->size, fcb->subvol->id, fcb->inode, ext->offset - ed2->offset,
4804                                                    1, fcb->inode_item.flags & BTRFS_INODE_NODATASUM);
4805                         }
4806                     }
4807 
4808                     le2 = le2->Flink;
4809                 }
4810 
4811                 if (!(fcb->inode_item.flags & BTRFS_INODE_NODATASUM))
4812                     add_checksum_entry(fcb->Vcb, er->address + er->length - er->skip_end, (ULONG)(er->skip_end >> fcb->Vcb->sector_shift), NULL, NULL);
4813 
4814                 acquire_chunk_lock(er->chunk, fcb->Vcb);
4815 
4816                 if (!er->chunk->cache_loaded) {
4817                     NTSTATUS Status = load_cache_chunk(fcb->Vcb, er->chunk, NULL);
4818 
4819                     if (!NT_SUCCESS(Status)) {
4820                         ERR("load_cache_chunk returned %08lx\n", Status);
4821                         release_chunk_lock(er->chunk, fcb->Vcb);
4822                         goto end;
4823                     }
4824                 }
4825 
4826                 er->chunk->used -= er->skip_end;
4827 
4828                 space_list_add(er->chunk, er->address + er->length - er->skip_end, er->skip_end, NULL);
4829 
4830                 release_chunk_lock(er->chunk, fcb->Vcb);
4831 
4832                 er->length -= er->skip_end;
4833             }
4834 
4835             le = le->Flink;
4836         }
4837     }
4838 
4839     if (num_extents < 2)
4840         goto end;
4841 
4842     // merge together adjacent extents
4843     le = extent_ranges.Flink;
4844     while (le != &extent_ranges) {
4845         er = CONTAINING_RECORD(le, extent_range, list_entry);
4846 
4847         if (le->Flink != &extent_ranges && er->length < MAX_EXTENT_SIZE) {
4848             extent_range* er2 = CONTAINING_RECORD(le->Flink, extent_range, list_entry);
4849 
4850             if (er->chunk == er2->chunk) {
4851                 if (er2->address == er->address + er->length && er2->offset >= er->offset + er->length) {
4852                     if (er->length + er2->length <= MAX_EXTENT_SIZE) {
4853                         er->length += er2->length;
4854                         er->changed = true;
4855 
4856                         RemoveEntryList(&er2->list_entry);
4857                         ExFreePool(er2);
4858 
4859                         changed = true;
4860                         continue;
4861                     }
4862                 }
4863             }
4864         }
4865 
4866         le = le->Flink;
4867     }
4868 
4869     if (!changed)
4870         goto end;
4871 
4872     le = fcb->extents.Flink;
4873     while (le != &fcb->extents) {
4874         extent* ext = CONTAINING_RECORD(le, extent, list_entry);
4875 
4876         if ((ext->extent_data.type == EXTENT_TYPE_REGULAR || ext->extent_data.type == EXTENT_TYPE_PREALLOC) && ext->extent_data.compression == BTRFS_COMPRESSION_NONE && ext->unique) {
4877             EXTENT_DATA2* ed2 = (EXTENT_DATA2*)ext->extent_data.data;
4878 
4879             if (ed2->size != 0) {
4880                 LIST_ENTRY* le2;
4881 
4882                 le2 = extent_ranges.Flink;
4883                 while (le2 != &extent_ranges) {
4884                     extent_range* er2 = CONTAINING_RECORD(le2, extent_range, list_entry);
4885 
4886                     if (ed2->address >= er2->address && ed2->address + ed2->size <= er2->address + er2->length && er2->changed) {
4887                         NTSTATUS Status;
4888 
4889                         Status = update_changed_extent_ref(fcb->Vcb, er2->chunk, ed2->address, ed2->size, fcb->subvol->id, fcb->inode, ext->offset - ed2->offset,
4890                                                            -1, fcb->inode_item.flags & BTRFS_INODE_NODATASUM, true, Irp);
4891                         if (!NT_SUCCESS(Status)) {
4892                             ERR("update_changed_extent_ref returned %08lx\n", Status);
4893                             goto end;
4894                         }
4895 
4896                         ed2->offset += ed2->address - er2->address;
4897                         ed2->address = er2->address;
4898                         ed2->size = er2->length;
4899                         ext->extent_data.decoded_size = ed2->size;
4900 
4901                         add_changed_extent_ref(er2->chunk, ed2->address, ed2->size, fcb->subvol->id, fcb->inode, ext->offset - ed2->offset,
4902                                                1, fcb->inode_item.flags & BTRFS_INODE_NODATASUM);
4903 
4904                         break;
4905                     }
4906 
4907                     le2 = le2->Flink;
4908                 }
4909             }
4910         }
4911 
4912         le = le->Flink;
4913     }
4914 
4915 end:
4916     while (!IsListEmpty(&extent_ranges)) {
4917         le = RemoveHeadList(&extent_ranges);
4918         er = CONTAINING_RECORD(le, extent_range, list_entry);
4919 
4920         ExFreePool(er);
4921     }
4922 }
4923 
flush_fcb(fcb * fcb,bool cache,LIST_ENTRY * batchlist,PIRP Irp)4924 NTSTATUS flush_fcb(fcb* fcb, bool cache, LIST_ENTRY* batchlist, PIRP Irp) {
4925     traverse_ptr tp;
4926     KEY searchkey;
4927     NTSTATUS Status;
4928     INODE_ITEM* ii;
4929     uint64_t ii_offset;
4930 #ifdef DEBUG_PARANOID
4931     uint64_t old_size = 0;
4932     bool extents_changed;
4933 #endif
4934 
4935     if (fcb->ads) {
4936         if (fcb->deleted) {
4937             Status = delete_xattr(fcb->Vcb, batchlist, fcb->subvol, fcb->inode, fcb->adsxattr.Buffer, fcb->adsxattr.Length, fcb->adshash);
4938             if (!NT_SUCCESS(Status)) {
4939                 ERR("delete_xattr returned %08lx\n", Status);
4940                 goto end;
4941             }
4942         } else {
4943             Status = set_xattr(fcb->Vcb, batchlist, fcb->subvol, fcb->inode, fcb->adsxattr.Buffer, fcb->adsxattr.Length,
4944                                fcb->adshash, (uint8_t*)fcb->adsdata.Buffer, fcb->adsdata.Length);
4945             if (!NT_SUCCESS(Status)) {
4946                 ERR("set_xattr returned %08lx\n", Status);
4947                 goto end;
4948             }
4949         }
4950 
4951         Status = STATUS_SUCCESS;
4952         goto end;
4953     }
4954 
4955     if (fcb->deleted) {
4956         Status = insert_tree_item_batch(batchlist, fcb->Vcb, fcb->subvol, fcb->inode, TYPE_INODE_ITEM, 0xffffffffffffffff, NULL, 0, Batch_DeleteInode);
4957         if (!NT_SUCCESS(Status)) {
4958             ERR("insert_tree_item_batch returned %08lx\n", Status);
4959             goto end;
4960         }
4961 
4962         if (fcb->marked_as_orphan) {
4963             Status = insert_tree_item_batch(batchlist, fcb->Vcb, fcb->subvol, BTRFS_ORPHAN_INODE_OBJID, TYPE_ORPHAN_INODE,
4964                                             fcb->inode, NULL, 0, Batch_Delete);
4965             if (!NT_SUCCESS(Status)) {
4966                 ERR("insert_tree_item_batch returned %08lx\n", Status);
4967                 goto end;
4968             }
4969         }
4970 
4971         Status = STATUS_SUCCESS;
4972         goto end;
4973     }
4974 
4975 #ifdef DEBUG_PARANOID
4976     extents_changed = fcb->extents_changed;
4977 #endif
4978 
4979     if (fcb->extents_changed) {
4980         LIST_ENTRY* le;
4981         bool prealloc = false, extents_inline = false;
4982         uint64_t last_end;
4983 
4984         // delete ignored extent items
4985         le = fcb->extents.Flink;
4986         while (le != &fcb->extents) {
4987             LIST_ENTRY* le2 = le->Flink;
4988             extent* ext = CONTAINING_RECORD(le, extent, list_entry);
4989 
4990             if (ext->ignore) {
4991                 RemoveEntryList(&ext->list_entry);
4992 
4993                 if (ext->csum)
4994                     ExFreePool(ext->csum);
4995 
4996                 ExFreePool(ext);
4997             }
4998 
4999             le = le2;
5000         }
5001 
5002         le = fcb->extents.Flink;
5003         while (le != &fcb->extents) {
5004             extent* ext = CONTAINING_RECORD(le, extent, list_entry);
5005 
5006             if (ext->inserted && ext->csum && ext->extent_data.type == EXTENT_TYPE_REGULAR) {
5007                 EXTENT_DATA2* ed2 = (EXTENT_DATA2*)ext->extent_data.data;
5008 
5009                 if (ed2->size > 0) { // not sparse
5010                     if (ext->extent_data.compression == BTRFS_COMPRESSION_NONE)
5011                         add_checksum_entry(fcb->Vcb, ed2->address + ed2->offset, (ULONG)(ed2->num_bytes >> fcb->Vcb->sector_shift), ext->csum, Irp);
5012                     else
5013                         add_checksum_entry(fcb->Vcb, ed2->address, (ULONG)(ed2->size >> fcb->Vcb->sector_shift), ext->csum, Irp);
5014                 }
5015             }
5016 
5017             le = le->Flink;
5018         }
5019 
5020         if (!IsListEmpty(&fcb->extents)) {
5021             rationalize_extents(fcb, Irp);
5022 
5023             // merge together adjacent EXTENT_DATAs pointing to same extent
5024 
5025             le = fcb->extents.Flink;
5026             while (le != &fcb->extents) {
5027                 LIST_ENTRY* le2 = le->Flink;
5028                 extent* ext = CONTAINING_RECORD(le, extent, list_entry);
5029 
5030                 if ((ext->extent_data.type == EXTENT_TYPE_REGULAR || ext->extent_data.type == EXTENT_TYPE_PREALLOC) && le->Flink != &fcb->extents) {
5031                     extent* nextext = CONTAINING_RECORD(le->Flink, extent, list_entry);
5032 
5033                     if (ext->extent_data.type == nextext->extent_data.type) {
5034                         EXTENT_DATA2* ed2 = (EXTENT_DATA2*)ext->extent_data.data;
5035                         EXTENT_DATA2* ned2 = (EXTENT_DATA2*)nextext->extent_data.data;
5036 
5037                         if (ed2->size != 0 && ed2->address == ned2->address && ed2->size == ned2->size &&
5038                             nextext->offset == ext->offset + ed2->num_bytes && ned2->offset == ed2->offset + ed2->num_bytes) {
5039                             chunk* c;
5040 
5041                             if (ext->extent_data.compression == BTRFS_COMPRESSION_NONE && ext->csum) {
5042                                 ULONG len = (ULONG)((ed2->num_bytes + ned2->num_bytes) >> fcb->Vcb->sector_shift);
5043                                 void* csum;
5044 
5045                                 csum = ExAllocatePoolWithTag(NonPagedPool, len * fcb->Vcb->csum_size, ALLOC_TAG);
5046                                 if (!csum) {
5047                                     ERR("out of memory\n");
5048                                     Status = STATUS_INSUFFICIENT_RESOURCES;
5049                                     goto end;
5050                                 }
5051 
5052                                 RtlCopyMemory(csum, ext->csum, (ULONG)((ed2->num_bytes * fcb->Vcb->csum_size) >> fcb->Vcb->sector_shift));
5053                                 RtlCopyMemory((uint8_t*)csum + ((ed2->num_bytes * fcb->Vcb->csum_size) >> fcb->Vcb->sector_shift), nextext->csum,
5054                                               (ULONG)((ned2->num_bytes * fcb->Vcb->csum_size) >> fcb->Vcb->sector_shift));
5055 
5056                                 ExFreePool(ext->csum);
5057                                 ext->csum = csum;
5058                             }
5059 
5060                             ext->extent_data.generation = fcb->Vcb->superblock.generation;
5061                             ed2->num_bytes += ned2->num_bytes;
5062 
5063                             RemoveEntryList(&nextext->list_entry);
5064 
5065                             if (nextext->csum)
5066                                 ExFreePool(nextext->csum);
5067 
5068                             ExFreePool(nextext);
5069 
5070                             c = get_chunk_from_address(fcb->Vcb, ed2->address);
5071 
5072                             if (!c) {
5073                                 ERR("get_chunk_from_address(%I64x) failed\n", ed2->address);
5074                             } else {
5075                                 Status = update_changed_extent_ref(fcb->Vcb, c, ed2->address, ed2->size, fcb->subvol->id, fcb->inode, ext->offset - ed2->offset, -1,
5076                                                                 fcb->inode_item.flags & BTRFS_INODE_NODATASUM, false, Irp);
5077                                 if (!NT_SUCCESS(Status)) {
5078                                     ERR("update_changed_extent_ref returned %08lx\n", Status);
5079                                     goto end;
5080                                 }
5081                             }
5082 
5083                             le2 = le;
5084                         }
5085                     }
5086                 }
5087 
5088                 le = le2;
5089             }
5090         }
5091 
5092         if (!fcb->created) {
5093             // delete existing EXTENT_DATA items
5094 
5095             Status = insert_tree_item_batch(batchlist, fcb->Vcb, fcb->subvol, fcb->inode, TYPE_EXTENT_DATA, 0, NULL, 0, Batch_DeleteExtentData);
5096             if (!NT_SUCCESS(Status)) {
5097                 ERR("insert_tree_item_batch returned %08lx\n", Status);
5098                 goto end;
5099             }
5100         }
5101 
5102         // add new EXTENT_DATAs
5103 
5104         last_end = 0;
5105 
5106         le = fcb->extents.Flink;
5107         while (le != &fcb->extents) {
5108             extent* ext = CONTAINING_RECORD(le, extent, list_entry);
5109             EXTENT_DATA* ed;
5110 
5111             ext->inserted = false;
5112 
5113             if (!(fcb->Vcb->superblock.incompat_flags & BTRFS_INCOMPAT_FLAGS_NO_HOLES) && ext->offset > last_end) {
5114                 Status = insert_sparse_extent(fcb, batchlist, last_end, ext->offset - last_end);
5115                 if (!NT_SUCCESS(Status)) {
5116                     ERR("insert_sparse_extent returned %08lx\n", Status);
5117                     goto end;
5118                 }
5119             }
5120 
5121             ed = ExAllocatePoolWithTag(PagedPool, ext->datalen, ALLOC_TAG);
5122             if (!ed) {
5123                 ERR("out of memory\n");
5124                 Status = STATUS_INSUFFICIENT_RESOURCES;
5125                 goto end;
5126             }
5127 
5128             RtlCopyMemory(ed, &ext->extent_data, ext->datalen);
5129 
5130             Status = insert_tree_item_batch(batchlist, fcb->Vcb, fcb->subvol, fcb->inode, TYPE_EXTENT_DATA, ext->offset,
5131                                             ed, ext->datalen, Batch_Insert);
5132             if (!NT_SUCCESS(Status)) {
5133                 ERR("insert_tree_item_batch returned %08lx\n", Status);
5134                 goto end;
5135             }
5136 
5137             if (ed->type == EXTENT_TYPE_PREALLOC)
5138                 prealloc = true;
5139 
5140             if (ed->type == EXTENT_TYPE_INLINE)
5141                 extents_inline = true;
5142 
5143             if (!(fcb->Vcb->superblock.incompat_flags & BTRFS_INCOMPAT_FLAGS_NO_HOLES)) {
5144                 if (ed->type == EXTENT_TYPE_INLINE)
5145                     last_end = ext->offset + ed->decoded_size;
5146                 else {
5147                     EXTENT_DATA2* ed2 = (EXTENT_DATA2*)ed->data;
5148 
5149                     last_end = ext->offset + ed2->num_bytes;
5150                 }
5151             }
5152 
5153             le = le->Flink;
5154         }
5155 
5156         if (!(fcb->Vcb->superblock.incompat_flags & BTRFS_INCOMPAT_FLAGS_NO_HOLES) && !extents_inline &&
5157             sector_align(fcb->inode_item.st_size, fcb->Vcb->superblock.sector_size) > last_end) {
5158             Status = insert_sparse_extent(fcb, batchlist, last_end, sector_align(fcb->inode_item.st_size, fcb->Vcb->superblock.sector_size) - last_end);
5159             if (!NT_SUCCESS(Status)) {
5160                 ERR("insert_sparse_extent returned %08lx\n", Status);
5161                 goto end;
5162             }
5163         }
5164 
5165         // update prealloc flag in INODE_ITEM
5166 
5167         if (!prealloc)
5168             fcb->inode_item.flags &= ~BTRFS_INODE_PREALLOC;
5169         else
5170             fcb->inode_item.flags |= BTRFS_INODE_PREALLOC;
5171 
5172         fcb->inode_item_changed = true;
5173 
5174         fcb->extents_changed = false;
5175     }
5176 
5177     if ((!fcb->created && fcb->inode_item_changed) || cache) {
5178         searchkey.obj_id = fcb->inode;
5179         searchkey.obj_type = TYPE_INODE_ITEM;
5180         searchkey.offset = 0xffffffffffffffff;
5181 
5182         Status = find_item(fcb->Vcb, fcb->subvol, &tp, &searchkey, false, Irp);
5183         if (!NT_SUCCESS(Status)) {
5184             ERR("error - find_item returned %08lx\n", Status);
5185             goto end;
5186         }
5187 
5188         if (tp.item->key.obj_id != searchkey.obj_id || tp.item->key.obj_type != searchkey.obj_type) {
5189             if (cache) {
5190                 ii = ExAllocatePoolWithTag(PagedPool, sizeof(INODE_ITEM), ALLOC_TAG);
5191                 if (!ii) {
5192                     ERR("out of memory\n");
5193                     Status = STATUS_INSUFFICIENT_RESOURCES;
5194                     goto end;
5195                 }
5196 
5197                 RtlCopyMemory(ii, &fcb->inode_item, sizeof(INODE_ITEM));
5198 
5199                 Status = insert_tree_item(fcb->Vcb, fcb->subvol, fcb->inode, TYPE_INODE_ITEM, 0, ii, sizeof(INODE_ITEM), NULL, Irp);
5200                 if (!NT_SUCCESS(Status)) {
5201                     ERR("insert_tree_item returned %08lx\n", Status);
5202                     goto end;
5203                 }
5204 
5205                 ii_offset = 0;
5206             } else {
5207                 ERR("could not find INODE_ITEM for inode %I64x in subvol %I64x\n", fcb->inode, fcb->subvol->id);
5208                 Status = STATUS_INTERNAL_ERROR;
5209                 goto end;
5210             }
5211         } else {
5212 #ifdef DEBUG_PARANOID
5213             INODE_ITEM* ii2 = (INODE_ITEM*)tp.item->data;
5214 
5215             old_size = ii2->st_size;
5216 #endif
5217 
5218             ii_offset = tp.item->key.offset;
5219         }
5220 
5221         if (!cache) {
5222             Status = delete_tree_item(fcb->Vcb, &tp);
5223             if (!NT_SUCCESS(Status)) {
5224                 ERR("delete_tree_item returned %08lx\n", Status);
5225                 goto end;
5226             }
5227         } else {
5228             searchkey.obj_id = fcb->inode;
5229             searchkey.obj_type = TYPE_INODE_ITEM;
5230             searchkey.offset = ii_offset;
5231 
5232             Status = find_item(fcb->Vcb, fcb->subvol, &tp, &searchkey, false, Irp);
5233             if (!NT_SUCCESS(Status)) {
5234                 ERR("error - find_item returned %08lx\n", Status);
5235                 goto end;
5236             }
5237 
5238             if (keycmp(tp.item->key, searchkey)) {
5239                 ERR("could not find INODE_ITEM for inode %I64x in subvol %I64x\n", fcb->inode, fcb->subvol->id);
5240                 Status = STATUS_INTERNAL_ERROR;
5241                 goto end;
5242             } else
5243                 RtlCopyMemory(tp.item->data, &fcb->inode_item, min(tp.item->size, sizeof(INODE_ITEM)));
5244         }
5245 
5246 #ifdef DEBUG_PARANOID
5247         if (!extents_changed && fcb->type != BTRFS_TYPE_DIRECTORY && old_size != fcb->inode_item.st_size) {
5248             ERR("error - size has changed but extents not marked as changed\n");
5249             int3;
5250         }
5251 #endif
5252     } else
5253         ii_offset = 0;
5254 
5255     fcb->created = false;
5256 
5257     if (!cache && fcb->inode_item_changed) {
5258         ii = ExAllocatePoolWithTag(PagedPool, sizeof(INODE_ITEM), ALLOC_TAG);
5259         if (!ii) {
5260             ERR("out of memory\n");
5261             Status = STATUS_INSUFFICIENT_RESOURCES;
5262             goto end;
5263         }
5264 
5265         RtlCopyMemory(ii, &fcb->inode_item, sizeof(INODE_ITEM));
5266 
5267         Status = insert_tree_item_batch(batchlist, fcb->Vcb, fcb->subvol, fcb->inode, TYPE_INODE_ITEM, ii_offset, ii, sizeof(INODE_ITEM),
5268                                         Batch_Insert);
5269         if (!NT_SUCCESS(Status)) {
5270             ERR("insert_tree_item_batch returned %08lx\n", Status);
5271             goto end;
5272         }
5273 
5274         fcb->inode_item_changed = false;
5275     }
5276 
5277     if (fcb->sd_dirty) {
5278         if (!fcb->sd_deleted) {
5279             Status = set_xattr(fcb->Vcb, batchlist, fcb->subvol, fcb->inode, EA_NTACL, sizeof(EA_NTACL) - 1,
5280                                EA_NTACL_HASH, (uint8_t*)fcb->sd, (uint16_t)RtlLengthSecurityDescriptor(fcb->sd));
5281             if (!NT_SUCCESS(Status)) {
5282                 ERR("set_xattr returned %08lx\n", Status);
5283                 goto end;
5284             }
5285         } else {
5286             Status = delete_xattr(fcb->Vcb, batchlist, fcb->subvol, fcb->inode, EA_NTACL, sizeof(EA_NTACL) - 1, EA_NTACL_HASH);
5287             if (!NT_SUCCESS(Status)) {
5288                 ERR("delete_xattr returned %08lx\n", Status);
5289                 goto end;
5290             }
5291         }
5292 
5293         fcb->sd_deleted = false;
5294         fcb->sd_dirty = false;
5295     }
5296 
5297     if (fcb->atts_changed) {
5298         if (!fcb->atts_deleted) {
5299             uint8_t val[16], *val2;
5300             ULONG atts = fcb->atts;
5301 
5302             TRACE("inserting new DOSATTRIB xattr\n");
5303 
5304             if (fcb->inode == SUBVOL_ROOT_INODE)
5305                 atts &= ~FILE_ATTRIBUTE_READONLY;
5306 
5307             val2 = &val[sizeof(val) - 1];
5308 
5309             do {
5310                 uint8_t c = atts % 16;
5311                 *val2 = c <= 9 ? (c + '0') : (c - 0xa + 'a');
5312 
5313                 val2--;
5314                 atts >>= 4;
5315             } while (atts != 0);
5316 
5317             *val2 = 'x';
5318             val2--;
5319             *val2 = '0';
5320 
5321             Status = set_xattr(fcb->Vcb, batchlist, fcb->subvol, fcb->inode, EA_DOSATTRIB, sizeof(EA_DOSATTRIB) - 1,
5322                                EA_DOSATTRIB_HASH, val2, (uint16_t)(val + sizeof(val) - val2));
5323             if (!NT_SUCCESS(Status)) {
5324                 ERR("set_xattr returned %08lx\n", Status);
5325                 goto end;
5326             }
5327         } else {
5328             Status = delete_xattr(fcb->Vcb, batchlist, fcb->subvol, fcb->inode, EA_DOSATTRIB, sizeof(EA_DOSATTRIB) - 1, EA_DOSATTRIB_HASH);
5329             if (!NT_SUCCESS(Status)) {
5330                 ERR("delete_xattr returned %08lx\n", Status);
5331                 goto end;
5332             }
5333         }
5334 
5335         fcb->atts_changed = false;
5336         fcb->atts_deleted = false;
5337     }
5338 
5339     if (fcb->reparse_xattr_changed) {
5340         if (fcb->reparse_xattr.Buffer && fcb->reparse_xattr.Length > 0) {
5341             Status = set_xattr(fcb->Vcb, batchlist, fcb->subvol, fcb->inode, EA_REPARSE, sizeof(EA_REPARSE) - 1,
5342                                EA_REPARSE_HASH, (uint8_t*)fcb->reparse_xattr.Buffer, (uint16_t)fcb->reparse_xattr.Length);
5343             if (!NT_SUCCESS(Status)) {
5344                 ERR("set_xattr returned %08lx\n", Status);
5345                 goto end;
5346             }
5347         } else {
5348             Status = delete_xattr(fcb->Vcb, batchlist, fcb->subvol, fcb->inode, EA_REPARSE, sizeof(EA_REPARSE) - 1, EA_REPARSE_HASH);
5349             if (!NT_SUCCESS(Status)) {
5350                 ERR("delete_xattr returned %08lx\n", Status);
5351                 goto end;
5352             }
5353         }
5354 
5355         fcb->reparse_xattr_changed = false;
5356     }
5357 
5358     if (fcb->ea_changed) {
5359         if (fcb->ea_xattr.Buffer && fcb->ea_xattr.Length > 0) {
5360             Status = set_xattr(fcb->Vcb, batchlist, fcb->subvol, fcb->inode, EA_EA, sizeof(EA_EA) - 1,
5361                                EA_EA_HASH, (uint8_t*)fcb->ea_xattr.Buffer, (uint16_t)fcb->ea_xattr.Length);
5362             if (!NT_SUCCESS(Status)) {
5363                 ERR("set_xattr returned %08lx\n", Status);
5364                 goto end;
5365             }
5366         } else {
5367             Status = delete_xattr(fcb->Vcb, batchlist, fcb->subvol, fcb->inode, EA_EA, sizeof(EA_EA) - 1, EA_EA_HASH);
5368             if (!NT_SUCCESS(Status)) {
5369                 ERR("delete_xattr returned %08lx\n", Status);
5370                 goto end;
5371             }
5372         }
5373 
5374         fcb->ea_changed = false;
5375     }
5376 
5377     if (fcb->prop_compression_changed) {
5378         if (fcb->prop_compression == PropCompression_None) {
5379             Status = delete_xattr(fcb->Vcb, batchlist, fcb->subvol, fcb->inode, EA_PROP_COMPRESSION, sizeof(EA_PROP_COMPRESSION) - 1, EA_PROP_COMPRESSION_HASH);
5380             if (!NT_SUCCESS(Status)) {
5381                 ERR("delete_xattr returned %08lx\n", Status);
5382                 goto end;
5383             }
5384         } else if (fcb->prop_compression == PropCompression_Zlib) {
5385             static const char zlib[] = "zlib";
5386 
5387             Status = set_xattr(fcb->Vcb, batchlist, fcb->subvol, fcb->inode, EA_PROP_COMPRESSION, sizeof(EA_PROP_COMPRESSION) - 1,
5388                                EA_PROP_COMPRESSION_HASH, (uint8_t*)zlib, sizeof(zlib) - 1);
5389             if (!NT_SUCCESS(Status)) {
5390                 ERR("set_xattr returned %08lx\n", Status);
5391                 goto end;
5392             }
5393         } else if (fcb->prop_compression == PropCompression_LZO) {
5394             static const char lzo[] = "lzo";
5395 
5396             Status = set_xattr(fcb->Vcb, batchlist, fcb->subvol, fcb->inode, EA_PROP_COMPRESSION, sizeof(EA_PROP_COMPRESSION) - 1,
5397                                EA_PROP_COMPRESSION_HASH, (uint8_t*)lzo, sizeof(lzo) - 1);
5398             if (!NT_SUCCESS(Status)) {
5399                 ERR("set_xattr returned %08lx\n", Status);
5400                 goto end;
5401             }
5402         } else if (fcb->prop_compression == PropCompression_ZSTD) {
5403             static const char zstd[] = "zstd";
5404 
5405             Status = set_xattr(fcb->Vcb, batchlist, fcb->subvol, fcb->inode, EA_PROP_COMPRESSION, sizeof(EA_PROP_COMPRESSION) - 1,
5406                                EA_PROP_COMPRESSION_HASH, (uint8_t*)zstd, sizeof(zstd) - 1);
5407             if (!NT_SUCCESS(Status)) {
5408                 ERR("set_xattr returned %08lx\n", Status);
5409                 goto end;
5410             }
5411         }
5412 
5413         fcb->prop_compression_changed = false;
5414     }
5415 
5416     if (fcb->xattrs_changed) {
5417         LIST_ENTRY* le;
5418 
5419         le = fcb->xattrs.Flink;
5420         while (le != &fcb->xattrs) {
5421             xattr* xa = CONTAINING_RECORD(le, xattr, list_entry);
5422             LIST_ENTRY* le2 = le->Flink;
5423 
5424             if (xa->dirty) {
5425                 uint32_t hash = calc_crc32c(0xfffffffe, (uint8_t*)xa->data, xa->namelen);
5426 
5427                 if (xa->valuelen == 0) {
5428                     Status = delete_xattr(fcb->Vcb, batchlist, fcb->subvol, fcb->inode, xa->data, xa->namelen, hash);
5429                     if (!NT_SUCCESS(Status)) {
5430                         ERR("delete_xattr returned %08lx\n", Status);
5431                         goto end;
5432                     }
5433 
5434                     RemoveEntryList(&xa->list_entry);
5435                     ExFreePool(xa);
5436                 } else {
5437                     Status = set_xattr(fcb->Vcb, batchlist, fcb->subvol, fcb->inode, xa->data, xa->namelen,
5438                                        hash, (uint8_t*)&xa->data[xa->namelen], xa->valuelen);
5439                     if (!NT_SUCCESS(Status)) {
5440                         ERR("set_xattr returned %08lx\n", Status);
5441                         goto end;
5442                     }
5443 
5444                     xa->dirty = false;
5445                 }
5446             }
5447 
5448             le = le2;
5449         }
5450 
5451         fcb->xattrs_changed = false;
5452     }
5453 
5454     if ((fcb->case_sensitive_set && !fcb->case_sensitive)) {
5455         Status = delete_xattr(fcb->Vcb, batchlist, fcb->subvol, fcb->inode, EA_CASE_SENSITIVE,
5456                               sizeof(EA_CASE_SENSITIVE) - 1, EA_CASE_SENSITIVE_HASH);
5457         if (!NT_SUCCESS(Status)) {
5458             ERR("delete_xattr returned %08lx\n", Status);
5459             goto end;
5460         }
5461 
5462         fcb->case_sensitive_set = false;
5463     } else if ((!fcb->case_sensitive_set && fcb->case_sensitive)) {
5464         Status = set_xattr(fcb->Vcb, batchlist, fcb->subvol, fcb->inode, EA_CASE_SENSITIVE,
5465                            sizeof(EA_CASE_SENSITIVE) - 1, EA_CASE_SENSITIVE_HASH, (uint8_t*)"1", 1);
5466         if (!NT_SUCCESS(Status)) {
5467             ERR("set_xattr returned %08lx\n", Status);
5468             goto end;
5469         }
5470 
5471         fcb->case_sensitive_set = true;
5472     }
5473 
5474     if (fcb->inode_item.st_nlink == 0 && !fcb->marked_as_orphan) { // mark as orphan
5475         Status = insert_tree_item_batch(batchlist, fcb->Vcb, fcb->subvol, BTRFS_ORPHAN_INODE_OBJID, TYPE_ORPHAN_INODE,
5476                                         fcb->inode, NULL, 0, Batch_Insert);
5477         if (!NT_SUCCESS(Status)) {
5478             ERR("insert_tree_item_batch returned %08lx\n", Status);
5479             goto end;
5480         }
5481 
5482         fcb->marked_as_orphan = true;
5483     }
5484 
5485     Status = STATUS_SUCCESS;
5486 
5487 end:
5488     if (fcb->dirty) {
5489         bool lock = false;
5490 
5491         fcb->dirty = false;
5492 
5493         if (!ExIsResourceAcquiredExclusiveLite(&fcb->Vcb->dirty_fcbs_lock)) {
5494             ExAcquireResourceExclusiveLite(&fcb->Vcb->dirty_fcbs_lock, true);
5495             lock = true;
5496         }
5497 
5498         RemoveEntryList(&fcb->list_entry_dirty);
5499 
5500         if (lock)
5501             ExReleaseResourceLite(&fcb->Vcb->dirty_fcbs_lock);
5502     }
5503 
5504     return Status;
5505 }
5506 
add_trim_entry_avoid_sb(device_extension * Vcb,device * dev,uint64_t address,uint64_t size)5507 void add_trim_entry_avoid_sb(device_extension* Vcb, device* dev, uint64_t address, uint64_t size) {
5508     int i;
5509     ULONG sblen = (ULONG)sector_align(sizeof(superblock), Vcb->superblock.sector_size);
5510 
5511     i = 0;
5512     while (superblock_addrs[i] != 0) {
5513         if (superblock_addrs[i] + sblen >= address && superblock_addrs[i] < address + size) {
5514             if (superblock_addrs[i] > address)
5515                 add_trim_entry(dev, address, superblock_addrs[i] - address);
5516 
5517             if (size <= superblock_addrs[i] + sblen - address)
5518                 return;
5519 
5520             size -= superblock_addrs[i] + sblen - address;
5521             address = superblock_addrs[i] + sblen;
5522         } else if (superblock_addrs[i] > address + size)
5523             break;
5524 
5525         i++;
5526     }
5527 
5528     add_trim_entry(dev, address, size);
5529 }
5530 
drop_chunk(device_extension * Vcb,chunk * c,LIST_ENTRY * batchlist,PIRP Irp,LIST_ENTRY * rollback)5531 static NTSTATUS drop_chunk(device_extension* Vcb, chunk* c, LIST_ENTRY* batchlist, PIRP Irp, LIST_ENTRY* rollback) {
5532     NTSTATUS Status;
5533     KEY searchkey;
5534     traverse_ptr tp;
5535     uint64_t i, factor;
5536     CHUNK_ITEM_STRIPE* cis = (CHUNK_ITEM_STRIPE*)&c->chunk_item[1];;
5537 
5538     TRACE("dropping chunk %I64x\n", c->offset);
5539 
5540     if (c->chunk_item->type & BLOCK_FLAG_RAID0)
5541         factor = c->chunk_item->num_stripes;
5542     else if (c->chunk_item->type & BLOCK_FLAG_RAID10)
5543         factor = c->chunk_item->num_stripes / c->chunk_item->sub_stripes;
5544     else if (c->chunk_item->type & BLOCK_FLAG_RAID5)
5545         factor = c->chunk_item->num_stripes - 1;
5546     else if (c->chunk_item->type & BLOCK_FLAG_RAID6)
5547         factor = c->chunk_item->num_stripes - 2;
5548     else // SINGLE, DUPLICATE, RAID1, RAID1C3, RAID1C4
5549         factor = 1;
5550 
5551     // do TRIM
5552     if (Vcb->trim && !Vcb->options.no_trim) {
5553         uint64_t len = c->chunk_item->size / factor;
5554 
5555         for (i = 0; i < c->chunk_item->num_stripes; i++) {
5556             if (c->devices[i] && c->devices[i]->devobj && !c->devices[i]->readonly && c->devices[i]->trim)
5557                 add_trim_entry_avoid_sb(Vcb, c->devices[i], cis[i].offset, len);
5558         }
5559     }
5560 
5561     if (!c->cache) {
5562         Status = load_stored_free_space_cache(Vcb, c, true, Irp);
5563 
5564         if (!NT_SUCCESS(Status) && Status != STATUS_NOT_FOUND)
5565             WARN("load_stored_free_space_cache returned %08lx\n", Status);
5566     }
5567 
5568     // remove free space cache
5569     if (c->cache) {
5570         c->cache->deleted = true;
5571 
5572         Status = excise_extents(Vcb, c->cache, 0, c->cache->inode_item.st_size, Irp, rollback);
5573         if (!NT_SUCCESS(Status)) {
5574             ERR("excise_extents returned %08lx\n", Status);
5575             return Status;
5576         }
5577 
5578         Status = flush_fcb(c->cache, true, batchlist, Irp);
5579 
5580         free_fcb(c->cache);
5581 
5582         if (c->cache->refcount == 0)
5583             reap_fcb(c->cache);
5584 
5585         if (!NT_SUCCESS(Status)) {
5586             ERR("flush_fcb returned %08lx\n", Status);
5587             return Status;
5588         }
5589 
5590         searchkey.obj_id = FREE_SPACE_CACHE_ID;
5591         searchkey.obj_type = 0;
5592         searchkey.offset = c->offset;
5593 
5594         Status = find_item(Vcb, Vcb->root_root, &tp, &searchkey, false, Irp);
5595         if (!NT_SUCCESS(Status)) {
5596             ERR("error - find_item returned %08lx\n", Status);
5597             return Status;
5598         }
5599 
5600         if (!keycmp(tp.item->key, searchkey)) {
5601             Status = delete_tree_item(Vcb, &tp);
5602             if (!NT_SUCCESS(Status)) {
5603                 ERR("delete_tree_item returned %08lx\n", Status);
5604                 return Status;
5605             }
5606         }
5607     }
5608 
5609     if (Vcb->space_root) {
5610         Status = insert_tree_item_batch(batchlist, Vcb, Vcb->space_root, c->offset, TYPE_FREE_SPACE_INFO, c->chunk_item->size,
5611                                         NULL, 0, Batch_DeleteFreeSpace);
5612         if (!NT_SUCCESS(Status)) {
5613             ERR("insert_tree_item_batch returned %08lx\n", Status);
5614             return Status;
5615         }
5616     }
5617 
5618     for (i = 0; i < c->chunk_item->num_stripes; i++) {
5619         if (!c->created) {
5620             // remove DEV_EXTENTs from tree 4
5621             searchkey.obj_id = cis[i].dev_id;
5622             searchkey.obj_type = TYPE_DEV_EXTENT;
5623             searchkey.offset = cis[i].offset;
5624 
5625             Status = find_item(Vcb, Vcb->dev_root, &tp, &searchkey, false, Irp);
5626             if (!NT_SUCCESS(Status)) {
5627                 ERR("error - find_item returned %08lx\n", Status);
5628                 return Status;
5629             }
5630 
5631             if (!keycmp(tp.item->key, searchkey)) {
5632                 Status = delete_tree_item(Vcb, &tp);
5633                 if (!NT_SUCCESS(Status)) {
5634                     ERR("delete_tree_item returned %08lx\n", Status);
5635                     return Status;
5636                 }
5637 
5638                 if (tp.item->size >= sizeof(DEV_EXTENT)) {
5639                     DEV_EXTENT* de = (DEV_EXTENT*)tp.item->data;
5640 
5641                     c->devices[i]->devitem.bytes_used -= de->length;
5642 
5643                     if (Vcb->balance.thread && Vcb->balance.shrinking && Vcb->balance.opts[0].devid == c->devices[i]->devitem.dev_id) {
5644                         if (cis[i].offset < Vcb->balance.opts[0].drange_start && cis[i].offset + de->length > Vcb->balance.opts[0].drange_start)
5645                             space_list_add2(&c->devices[i]->space, NULL, cis[i].offset, Vcb->balance.opts[0].drange_start - cis[i].offset, NULL, rollback);
5646                     } else
5647                         space_list_add2(&c->devices[i]->space, NULL, cis[i].offset, de->length, NULL, rollback);
5648                 }
5649             } else
5650                 WARN("could not find (%I64x,%x,%I64x) in dev tree\n", searchkey.obj_id, searchkey.obj_type, searchkey.offset);
5651         } else {
5652             uint64_t len = c->chunk_item->size / factor;
5653 
5654             c->devices[i]->devitem.bytes_used -= len;
5655 
5656             if (Vcb->balance.thread && Vcb->balance.shrinking && Vcb->balance.opts[0].devid == c->devices[i]->devitem.dev_id) {
5657                 if (cis[i].offset < Vcb->balance.opts[0].drange_start && cis[i].offset + len > Vcb->balance.opts[0].drange_start)
5658                     space_list_add2(&c->devices[i]->space, NULL, cis[i].offset, Vcb->balance.opts[0].drange_start - cis[i].offset, NULL, rollback);
5659             } else
5660                 space_list_add2(&c->devices[i]->space, NULL, cis[i].offset, len, NULL, rollback);
5661         }
5662     }
5663 
5664     // modify DEV_ITEMs in chunk tree
5665     for (i = 0; i < c->chunk_item->num_stripes; i++) {
5666         if (c->devices[i]) {
5667             uint64_t j;
5668             DEV_ITEM* di;
5669 
5670             searchkey.obj_id = 1;
5671             searchkey.obj_type = TYPE_DEV_ITEM;
5672             searchkey.offset = c->devices[i]->devitem.dev_id;
5673 
5674             Status = find_item(Vcb, Vcb->chunk_root, &tp, &searchkey, false, Irp);
5675             if (!NT_SUCCESS(Status)) {
5676                 ERR("error - find_item returned %08lx\n", Status);
5677                 return Status;
5678             }
5679 
5680             if (!keycmp(tp.item->key, searchkey)) {
5681                 Status = delete_tree_item(Vcb, &tp);
5682                 if (!NT_SUCCESS(Status)) {
5683                     ERR("delete_tree_item returned %08lx\n", Status);
5684                     return Status;
5685                 }
5686 
5687                 di = ExAllocatePoolWithTag(PagedPool, sizeof(DEV_ITEM), ALLOC_TAG);
5688                 if (!di) {
5689                     ERR("out of memory\n");
5690                     return STATUS_INSUFFICIENT_RESOURCES;
5691                 }
5692 
5693                 RtlCopyMemory(di, &c->devices[i]->devitem, sizeof(DEV_ITEM));
5694 
5695                 Status = insert_tree_item(Vcb, Vcb->chunk_root, 1, TYPE_DEV_ITEM, c->devices[i]->devitem.dev_id, di, sizeof(DEV_ITEM), NULL, Irp);
5696                 if (!NT_SUCCESS(Status)) {
5697                     ERR("insert_tree_item returned %08lx\n", Status);
5698                     return Status;
5699                 }
5700             }
5701 
5702             for (j = i + 1; j < c->chunk_item->num_stripes; j++) {
5703                 if (c->devices[j] == c->devices[i])
5704                     c->devices[j] = NULL;
5705             }
5706         }
5707     }
5708 
5709     if (!c->created) {
5710         // remove CHUNK_ITEM from chunk tree
5711         searchkey.obj_id = 0x100;
5712         searchkey.obj_type = TYPE_CHUNK_ITEM;
5713         searchkey.offset = c->offset;
5714 
5715         Status = find_item(Vcb, Vcb->chunk_root, &tp, &searchkey, false, Irp);
5716         if (!NT_SUCCESS(Status)) {
5717             ERR("error - find_item returned %08lx\n", Status);
5718             return Status;
5719         }
5720 
5721         if (!keycmp(tp.item->key, searchkey)) {
5722             Status = delete_tree_item(Vcb, &tp);
5723 
5724             if (!NT_SUCCESS(Status)) {
5725                 ERR("delete_tree_item returned %08lx\n", Status);
5726                 return Status;
5727             }
5728         } else
5729             WARN("could not find CHUNK_ITEM for chunk %I64x\n", c->offset);
5730 
5731         // remove BLOCK_GROUP_ITEM from extent tree
5732         searchkey.obj_id = c->offset;
5733         searchkey.obj_type = TYPE_BLOCK_GROUP_ITEM;
5734         searchkey.offset = 0xffffffffffffffff;
5735 
5736         Status = find_item(Vcb, Vcb->extent_root, &tp, &searchkey, false, Irp);
5737         if (!NT_SUCCESS(Status)) {
5738             ERR("error - find_item returned %08lx\n", Status);
5739             return Status;
5740         }
5741 
5742         if (tp.item->key.obj_id == searchkey.obj_id && tp.item->key.obj_type == searchkey.obj_type) {
5743             Status = delete_tree_item(Vcb, &tp);
5744 
5745             if (!NT_SUCCESS(Status)) {
5746                 ERR("delete_tree_item returned %08lx\n", Status);
5747                 return Status;
5748             }
5749         } else
5750             WARN("could not find BLOCK_GROUP_ITEM for chunk %I64x\n", c->offset);
5751     }
5752 
5753     if (c->chunk_item->type & BLOCK_FLAG_SYSTEM)
5754         remove_from_bootstrap(Vcb, 0x100, TYPE_CHUNK_ITEM, c->offset);
5755 
5756     RemoveEntryList(&c->list_entry);
5757 
5758     // clear raid56 incompat flag if dropping last RAID5/6 chunk
5759 
5760     if (c->chunk_item->type & BLOCK_FLAG_RAID5 || c->chunk_item->type & BLOCK_FLAG_RAID6) {
5761         LIST_ENTRY* le;
5762         bool clear_flag = true;
5763 
5764         le = Vcb->chunks.Flink;
5765         while (le != &Vcb->chunks) {
5766             chunk* c2 = CONTAINING_RECORD(le, chunk, list_entry);
5767 
5768             if (c2->chunk_item->type & BLOCK_FLAG_RAID5 || c2->chunk_item->type & BLOCK_FLAG_RAID6) {
5769                 clear_flag = false;
5770                 break;
5771             }
5772 
5773             le = le->Flink;
5774         }
5775 
5776         if (clear_flag)
5777             Vcb->superblock.incompat_flags &= ~BTRFS_INCOMPAT_FLAGS_RAID56;
5778     }
5779 
5780     // clear raid1c34 incompat flag if dropping last RAID5/6 chunk
5781 
5782     if (c->chunk_item->type & BLOCK_FLAG_RAID1C3 || c->chunk_item->type & BLOCK_FLAG_RAID1C4) {
5783         LIST_ENTRY* le;
5784         bool clear_flag = true;
5785 
5786         le = Vcb->chunks.Flink;
5787         while (le != &Vcb->chunks) {
5788             chunk* c2 = CONTAINING_RECORD(le, chunk, list_entry);
5789 
5790             if (c2->chunk_item->type & BLOCK_FLAG_RAID1C3 || c2->chunk_item->type & BLOCK_FLAG_RAID1C4) {
5791                 clear_flag = false;
5792                 break;
5793             }
5794 
5795             le = le->Flink;
5796         }
5797 
5798         if (clear_flag)
5799             Vcb->superblock.incompat_flags &= ~BTRFS_INCOMPAT_FLAGS_RAID1C34;
5800     }
5801 
5802     Vcb->superblock.bytes_used -= c->oldused;
5803 
5804     ExFreePool(c->chunk_item);
5805     ExFreePool(c->devices);
5806 
5807     while (!IsListEmpty(&c->space)) {
5808         space* s = CONTAINING_RECORD(c->space.Flink, space, list_entry);
5809 
5810         RemoveEntryList(&s->list_entry);
5811         ExFreePool(s);
5812     }
5813 
5814     while (!IsListEmpty(&c->deleting)) {
5815         space* s = CONTAINING_RECORD(c->deleting.Flink, space, list_entry);
5816 
5817         RemoveEntryList(&s->list_entry);
5818         ExFreePool(s);
5819     }
5820 
5821     release_chunk_lock(c, Vcb);
5822 
5823     ExDeleteResourceLite(&c->partial_stripes_lock);
5824     ExDeleteResourceLite(&c->range_locks_lock);
5825     ExDeleteResourceLite(&c->lock);
5826     ExDeleteResourceLite(&c->changed_extents_lock);
5827 
5828     ExFreePool(c);
5829 
5830     return STATUS_SUCCESS;
5831 }
5832 
partial_stripe_read(device_extension * Vcb,chunk * c,partial_stripe * ps,uint64_t startoff,uint16_t parity,ULONG offset,ULONG len)5833 static NTSTATUS partial_stripe_read(device_extension* Vcb, chunk* c, partial_stripe* ps, uint64_t startoff, uint16_t parity, ULONG offset, ULONG len) {
5834     NTSTATUS Status;
5835     ULONG sl = (ULONG)(c->chunk_item->stripe_length >> Vcb->sector_shift);
5836     CHUNK_ITEM_STRIPE* cis = (CHUNK_ITEM_STRIPE*)&c->chunk_item[1];
5837 
5838     while (len > 0) {
5839         ULONG readlen = min(offset + len, offset + (sl - (offset % sl))) - offset;
5840         uint16_t stripe;
5841 
5842         stripe = (parity + (offset / sl) + 1) % c->chunk_item->num_stripes;
5843 
5844         if (c->devices[stripe]->devobj) {
5845             Status = sync_read_phys(c->devices[stripe]->devobj, c->devices[stripe]->fileobj, cis[stripe].offset + startoff + ((offset % sl) << Vcb->sector_shift),
5846                                     readlen << Vcb->sector_shift, ps->data + (offset << Vcb->sector_shift), false);
5847             if (!NT_SUCCESS(Status)) {
5848                 ERR("sync_read_phys returned %08lx\n", Status);
5849                 return Status;
5850             }
5851         } else if (c->chunk_item->type & BLOCK_FLAG_RAID5) {
5852             uint16_t i;
5853             uint8_t* scratch;
5854 
5855             scratch = ExAllocatePoolWithTag(NonPagedPool, readlen << Vcb->sector_shift, ALLOC_TAG);
5856             if (!scratch) {
5857                 ERR("out of memory\n");
5858                 return STATUS_INSUFFICIENT_RESOURCES;
5859             }
5860 
5861             for (i = 0; i < c->chunk_item->num_stripes; i++) {
5862                 if (i != stripe) {
5863                     if (!c->devices[i]->devobj) {
5864                         ExFreePool(scratch);
5865                         return STATUS_UNEXPECTED_IO_ERROR;
5866                     }
5867 
5868                     if (i == 0 || (stripe == 0 && i == 1)) {
5869                         Status = sync_read_phys(c->devices[i]->devobj, c->devices[i]->fileobj, cis[i].offset + startoff + ((offset % sl) << Vcb->sector_shift),
5870                                                 readlen << Vcb->sector_shift, ps->data + (offset << Vcb->sector_shift), false);
5871                         if (!NT_SUCCESS(Status)) {
5872                             ERR("sync_read_phys returned %08lx\n", Status);
5873                             ExFreePool(scratch);
5874                             return Status;
5875                         }
5876                     } else {
5877                         Status = sync_read_phys(c->devices[i]->devobj, c->devices[i]->fileobj, cis[i].offset + startoff + ((offset % sl) << Vcb->sector_shift),
5878                                                 readlen << Vcb->sector_shift, scratch, false);
5879                         if (!NT_SUCCESS(Status)) {
5880                             ERR("sync_read_phys returned %08lx\n", Status);
5881                             ExFreePool(scratch);
5882                             return Status;
5883                         }
5884 
5885                         do_xor(ps->data + (offset << Vcb->sector_shift), scratch, readlen << Vcb->sector_shift);
5886                     }
5887                 }
5888             }
5889 
5890             ExFreePool(scratch);
5891         } else {
5892             uint8_t* scratch;
5893             uint16_t k, i, logstripe, error_stripe, num_errors = 0;
5894 
5895             scratch = ExAllocatePoolWithTag(NonPagedPool, (c->chunk_item->num_stripes + 2) * readlen << Vcb->sector_shift, ALLOC_TAG);
5896             if (!scratch) {
5897                 ERR("out of memory\n");
5898                 return STATUS_INSUFFICIENT_RESOURCES;
5899             }
5900 
5901             i = (parity + 1) % c->chunk_item->num_stripes;
5902             logstripe = (c->chunk_item->num_stripes + c->chunk_item->num_stripes - 1 - parity + stripe) % c->chunk_item->num_stripes;
5903 
5904             for (k = 0; k < c->chunk_item->num_stripes; k++) {
5905                 if (i != stripe) {
5906                     if (c->devices[i]->devobj) {
5907                         Status = sync_read_phys(c->devices[i]->devobj, c->devices[i]->fileobj, cis[i].offset + startoff + ((offset % sl) << Vcb->sector_shift),
5908                                                 readlen << Vcb->sector_shift, scratch + (k * readlen << Vcb->sector_shift), false);
5909                         if (!NT_SUCCESS(Status)) {
5910                             ERR("sync_read_phys returned %08lx\n", Status);
5911                             num_errors++;
5912                             error_stripe = k;
5913                         }
5914                     } else {
5915                         num_errors++;
5916                         error_stripe = k;
5917                     }
5918 
5919                     if (num_errors > 1) {
5920                         ExFreePool(scratch);
5921                         return STATUS_UNEXPECTED_IO_ERROR;
5922                     }
5923                 }
5924 
5925                 i = (i + 1) % c->chunk_item->num_stripes;
5926             }
5927 
5928             if (num_errors == 0 || error_stripe == c->chunk_item->num_stripes - 1) {
5929                 for (k = 0; k < c->chunk_item->num_stripes - 1; k++) {
5930                     if (k != logstripe) {
5931                         if (k == 0 || (k == 1 && logstripe == 0)) {
5932                             RtlCopyMemory(ps->data + (offset << Vcb->sector_shift), scratch + (k * readlen << Vcb->sector_shift),
5933                                           readlen << Vcb->sector_shift);
5934                         } else {
5935                             do_xor(ps->data + (offset << Vcb->sector_shift), scratch + (k * readlen << Vcb->sector_shift),
5936                                    readlen << Vcb->sector_shift);
5937                         }
5938                     }
5939                 }
5940             } else {
5941                 raid6_recover2(scratch, c->chunk_item->num_stripes, readlen << Vcb->sector_shift, logstripe,
5942                                error_stripe, scratch + (c->chunk_item->num_stripes * readlen << Vcb->sector_shift));
5943 
5944                 RtlCopyMemory(ps->data + (offset << Vcb->sector_shift), scratch + (c->chunk_item->num_stripes * readlen << Vcb->sector_shift),
5945                               readlen << Vcb->sector_shift);
5946             }
5947 
5948             ExFreePool(scratch);
5949         }
5950 
5951         offset += readlen;
5952         len -= readlen;
5953     }
5954 
5955     return STATUS_SUCCESS;
5956 }
5957 
flush_partial_stripe(device_extension * Vcb,chunk * c,partial_stripe * ps)5958 NTSTATUS flush_partial_stripe(device_extension* Vcb, chunk* c, partial_stripe* ps) {
5959     NTSTATUS Status;
5960     uint16_t parity2, stripe, startoffstripe;
5961     uint8_t* data;
5962     uint64_t startoff;
5963     ULONG runlength, index, last1;
5964     CHUNK_ITEM_STRIPE* cis = (CHUNK_ITEM_STRIPE*)&c->chunk_item[1];
5965     LIST_ENTRY* le;
5966     uint16_t k, num_data_stripes = c->chunk_item->num_stripes - (c->chunk_item->type & BLOCK_FLAG_RAID5 ? 1 : 2);
5967     uint64_t ps_length = num_data_stripes * c->chunk_item->stripe_length;
5968     ULONG stripe_length = (ULONG)c->chunk_item->stripe_length;
5969 
5970     // FIXME - do writes asynchronously?
5971 
5972     get_raid0_offset(ps->address - c->offset, stripe_length, num_data_stripes, &startoff, &startoffstripe);
5973 
5974     parity2 = (((ps->address - c->offset) / ps_length) + c->chunk_item->num_stripes - 1) % c->chunk_item->num_stripes;
5975 
5976     // read data (or reconstruct if degraded)
5977 
5978     runlength = RtlFindFirstRunClear(&ps->bmp, &index);
5979     last1 = 0;
5980 
5981     while (runlength != 0) {
5982         if (index >= ps->bmplen)
5983             break;
5984 
5985         if (index + runlength >= ps->bmplen) {
5986             runlength = ps->bmplen - index;
5987 
5988             if (runlength == 0)
5989                 break;
5990         }
5991 
5992         if (index > last1) {
5993             Status = partial_stripe_read(Vcb, c, ps, startoff, parity2, last1, index - last1);
5994             if (!NT_SUCCESS(Status)) {
5995                 ERR("partial_stripe_read returned %08lx\n", Status);
5996                 return Status;
5997             }
5998         }
5999 
6000         last1 = index + runlength;
6001 
6002         runlength = RtlFindNextForwardRunClear(&ps->bmp, index + runlength, &index);
6003     }
6004 
6005     if (last1 < ps_length >> Vcb->sector_shift) {
6006         Status = partial_stripe_read(Vcb, c, ps, startoff, parity2, last1, (ULONG)((ps_length >> Vcb->sector_shift) - last1));
6007         if (!NT_SUCCESS(Status)) {
6008             ERR("partial_stripe_read returned %08lx\n", Status);
6009             return Status;
6010         }
6011     }
6012 
6013     // set unallocated data to 0
6014     le = c->space.Flink;
6015     while (le != &c->space) {
6016         space* s = CONTAINING_RECORD(le, space, list_entry);
6017 
6018         if (s->address + s->size > ps->address && s->address < ps->address + ps_length) {
6019             uint64_t start = max(ps->address, s->address);
6020             uint64_t end = min(ps->address + ps_length, s->address + s->size);
6021 
6022             RtlZeroMemory(ps->data + start - ps->address, (ULONG)(end - start));
6023         } else if (s->address >= ps->address + ps_length)
6024             break;
6025 
6026         le = le->Flink;
6027     }
6028 
6029     le = c->deleting.Flink;
6030     while (le != &c->deleting) {
6031         space* s = CONTAINING_RECORD(le, space, list_entry);
6032 
6033         if (s->address + s->size > ps->address && s->address < ps->address + ps_length) {
6034             uint64_t start = max(ps->address, s->address);
6035             uint64_t end = min(ps->address + ps_length, s->address + s->size);
6036 
6037             RtlZeroMemory(ps->data + start - ps->address, (ULONG)(end - start));
6038         } else if (s->address >= ps->address + ps_length)
6039             break;
6040 
6041         le = le->Flink;
6042     }
6043 
6044     stripe = (parity2 + 1) % c->chunk_item->num_stripes;
6045 
6046     data = ps->data;
6047     for (k = 0; k < num_data_stripes; k++) {
6048         if (c->devices[stripe]->devobj) {
6049             Status = write_data_phys(c->devices[stripe]->devobj, c->devices[stripe]->fileobj, cis[stripe].offset + startoff, data, stripe_length);
6050             if (!NT_SUCCESS(Status)) {
6051                 ERR("write_data_phys returned %08lx\n", Status);
6052                 return Status;
6053             }
6054         }
6055 
6056         data += stripe_length;
6057         stripe = (stripe + 1) % c->chunk_item->num_stripes;
6058     }
6059 
6060     // write parity
6061     if (c->chunk_item->type & BLOCK_FLAG_RAID5) {
6062         if (c->devices[parity2]->devobj) {
6063             uint16_t i;
6064 
6065             for (i = 1; i < c->chunk_item->num_stripes - 1; i++) {
6066                 do_xor(ps->data, ps->data + (i * stripe_length), stripe_length);
6067             }
6068 
6069             Status = write_data_phys(c->devices[parity2]->devobj, c->devices[parity2]->fileobj, cis[parity2].offset + startoff, ps->data, stripe_length);
6070             if (!NT_SUCCESS(Status)) {
6071                 ERR("write_data_phys returned %08lx\n", Status);
6072                 return Status;
6073             }
6074         }
6075     } else {
6076         uint16_t parity1 = (parity2 + c->chunk_item->num_stripes - 1) % c->chunk_item->num_stripes;
6077 
6078         if (c->devices[parity1]->devobj || c->devices[parity2]->devobj) {
6079             uint8_t* scratch;
6080             uint16_t i;
6081 
6082             scratch = ExAllocatePoolWithTag(NonPagedPool, stripe_length * 2, ALLOC_TAG);
6083             if (!scratch) {
6084                 ERR("out of memory\n");
6085                 return STATUS_INSUFFICIENT_RESOURCES;
6086             }
6087 
6088             i = c->chunk_item->num_stripes - 3;
6089 
6090             while (true) {
6091                 if (i == c->chunk_item->num_stripes - 3) {
6092                     RtlCopyMemory(scratch, ps->data + (i * stripe_length), stripe_length);
6093                     RtlCopyMemory(scratch + stripe_length, ps->data + (i * stripe_length), stripe_length);
6094                 } else {
6095                     do_xor(scratch, ps->data + (i * stripe_length), stripe_length);
6096 
6097                     galois_double(scratch + stripe_length, stripe_length);
6098                     do_xor(scratch + stripe_length, ps->data + (i * stripe_length), stripe_length);
6099                 }
6100 
6101                 if (i == 0)
6102                     break;
6103 
6104                 i--;
6105             }
6106 
6107             if (c->devices[parity1]->devobj) {
6108                 Status = write_data_phys(c->devices[parity1]->devobj, c->devices[parity1]->fileobj, cis[parity1].offset + startoff, scratch, stripe_length);
6109                 if (!NT_SUCCESS(Status)) {
6110                     ERR("write_data_phys returned %08lx\n", Status);
6111                     ExFreePool(scratch);
6112                     return Status;
6113                 }
6114             }
6115 
6116             if (c->devices[parity2]->devobj) {
6117                 Status = write_data_phys(c->devices[parity2]->devobj, c->devices[parity2]->fileobj, cis[parity2].offset + startoff,
6118                                          scratch + stripe_length, stripe_length);
6119                 if (!NT_SUCCESS(Status)) {
6120                     ERR("write_data_phys returned %08lx\n", Status);
6121                     ExFreePool(scratch);
6122                     return Status;
6123                 }
6124             }
6125 
6126             ExFreePool(scratch);
6127         }
6128     }
6129 
6130     return STATUS_SUCCESS;
6131 }
6132 
update_chunks(device_extension * Vcb,LIST_ENTRY * batchlist,PIRP Irp,LIST_ENTRY * rollback)6133 static NTSTATUS update_chunks(device_extension* Vcb, LIST_ENTRY* batchlist, PIRP Irp, LIST_ENTRY* rollback) {
6134     LIST_ENTRY *le, *le2;
6135     NTSTATUS Status;
6136     uint64_t used_minus_cache;
6137 
6138     ExAcquireResourceExclusiveLite(&Vcb->chunk_lock, true);
6139 
6140     // FIXME - do tree chunks before data chunks
6141 
6142     le = Vcb->chunks.Flink;
6143     while (le != &Vcb->chunks) {
6144         chunk* c = CONTAINING_RECORD(le, chunk, list_entry);
6145 
6146         le2 = le->Flink;
6147 
6148         if (c->changed) {
6149             acquire_chunk_lock(c, Vcb);
6150 
6151             // flush partial stripes
6152             if (!Vcb->readonly && (c->chunk_item->type & BLOCK_FLAG_RAID5 || c->chunk_item->type & BLOCK_FLAG_RAID6)) {
6153                 ExAcquireResourceExclusiveLite(&c->partial_stripes_lock, true);
6154 
6155                 while (!IsListEmpty(&c->partial_stripes)) {
6156                     partial_stripe* ps = CONTAINING_RECORD(RemoveHeadList(&c->partial_stripes), partial_stripe, list_entry);
6157 
6158                     Status = flush_partial_stripe(Vcb, c, ps);
6159 
6160                     if (ps->bmparr)
6161                         ExFreePool(ps->bmparr);
6162 
6163                     ExFreePool(ps);
6164 
6165                     if (!NT_SUCCESS(Status)) {
6166                         ERR("flush_partial_stripe returned %08lx\n", Status);
6167                         ExReleaseResourceLite(&c->partial_stripes_lock);
6168                         release_chunk_lock(c, Vcb);
6169                         ExReleaseResourceLite(&Vcb->chunk_lock);
6170                         return Status;
6171                     }
6172                 }
6173 
6174                 ExReleaseResourceLite(&c->partial_stripes_lock);
6175             }
6176 
6177             if (c->list_entry_balance.Flink) {
6178                 release_chunk_lock(c, Vcb);
6179                 le = le2;
6180                 continue;
6181             }
6182 
6183             if (c->space_changed || c->created) {
6184                 bool created = c->created;
6185 
6186                 used_minus_cache = c->used;
6187 
6188                 // subtract self-hosted cache
6189                 if (used_minus_cache > 0 && c->chunk_item->type & BLOCK_FLAG_DATA && c->cache && c->cache->inode_item.st_size == c->used) {
6190                     LIST_ENTRY* le3;
6191 
6192                     le3 = c->cache->extents.Flink;
6193                     while (le3 != &c->cache->extents) {
6194                         extent* ext = CONTAINING_RECORD(le3, extent, list_entry);
6195                         EXTENT_DATA* ed = &ext->extent_data;
6196 
6197                         if (!ext->ignore) {
6198                             if (ed->type == EXTENT_TYPE_REGULAR || ed->type == EXTENT_TYPE_PREALLOC) {
6199                                 EXTENT_DATA2* ed2 = (EXTENT_DATA2*)ed->data;
6200 
6201                                 if (ed2->size != 0 && ed2->address >= c->offset && ed2->address + ed2->size <= c->offset + c->chunk_item->size)
6202                                     used_minus_cache -= ed2->size;
6203                             }
6204                         }
6205 
6206                         le3 = le3->Flink;
6207                     }
6208                 }
6209 
6210                 if (used_minus_cache == 0) {
6211                     Status = drop_chunk(Vcb, c, batchlist, Irp, rollback);
6212                     if (!NT_SUCCESS(Status)) {
6213                         ERR("drop_chunk returned %08lx\n", Status);
6214                         release_chunk_lock(c, Vcb);
6215                         ExReleaseResourceLite(&Vcb->chunk_lock);
6216                         return Status;
6217                     }
6218 
6219                     // c is now freed, so avoid releasing non-existent lock
6220                     le = le2;
6221                     continue;
6222                 } else if (c->created) {
6223                     Status = create_chunk(Vcb, c, Irp);
6224                     if (!NT_SUCCESS(Status)) {
6225                         ERR("create_chunk returned %08lx\n", Status);
6226                         release_chunk_lock(c, Vcb);
6227                         ExReleaseResourceLite(&Vcb->chunk_lock);
6228                         return Status;
6229                     }
6230                 }
6231 
6232                 if (used_minus_cache > 0 || created)
6233                     release_chunk_lock(c, Vcb);
6234             } else
6235                 release_chunk_lock(c, Vcb);
6236         }
6237 
6238         le = le2;
6239     }
6240 
6241     ExReleaseResourceLite(&Vcb->chunk_lock);
6242 
6243     return STATUS_SUCCESS;
6244 }
6245 
delete_root_ref(device_extension * Vcb,uint64_t subvolid,uint64_t parsubvolid,uint64_t parinode,PANSI_STRING utf8,PIRP Irp)6246 static NTSTATUS delete_root_ref(device_extension* Vcb, uint64_t subvolid, uint64_t parsubvolid, uint64_t parinode, PANSI_STRING utf8, PIRP Irp) {
6247     KEY searchkey;
6248     traverse_ptr tp;
6249     NTSTATUS Status;
6250 
6251     searchkey.obj_id = parsubvolid;
6252     searchkey.obj_type = TYPE_ROOT_REF;
6253     searchkey.offset = subvolid;
6254 
6255     Status = find_item(Vcb, Vcb->root_root, &tp, &searchkey, false, Irp);
6256     if (!NT_SUCCESS(Status)) {
6257         ERR("error - find_item returned %08lx\n", Status);
6258         return Status;
6259     }
6260 
6261     if (!keycmp(searchkey, tp.item->key)) {
6262         if (tp.item->size < sizeof(ROOT_REF)) {
6263             ERR("(%I64x,%x,%I64x) was %u bytes, expected at least %Iu\n", tp.item->key.obj_id, tp.item->key.obj_type, tp.item->key.offset, tp.item->size, sizeof(ROOT_REF));
6264             return STATUS_INTERNAL_ERROR;
6265         } else {
6266             ROOT_REF* rr;
6267             ULONG len;
6268 
6269             rr = (ROOT_REF*)tp.item->data;
6270             len = tp.item->size;
6271 
6272             do {
6273                 uint16_t itemlen;
6274 
6275                 if (len < sizeof(ROOT_REF) || len < offsetof(ROOT_REF, name[0]) + rr->n) {
6276                     ERR("(%I64x,%x,%I64x) was truncated\n", tp.item->key.obj_id, tp.item->key.obj_type, tp.item->key.offset);
6277                     break;
6278                 }
6279 
6280                 itemlen = (uint16_t)offsetof(ROOT_REF, name[0]) + rr->n;
6281 
6282                 if (rr->dir == parinode && rr->n == utf8->Length && RtlCompareMemory(rr->name, utf8->Buffer, rr->n) == rr->n) {
6283                     uint16_t newlen = tp.item->size - itemlen;
6284 
6285                     Status = delete_tree_item(Vcb, &tp);
6286                     if (!NT_SUCCESS(Status)) {
6287                         ERR("delete_tree_item returned %08lx\n", Status);
6288                         return Status;
6289                     }
6290 
6291                     if (newlen == 0) {
6292                         TRACE("deleting (%I64x,%x,%I64x)\n", tp.item->key.obj_id, tp.item->key.obj_type, tp.item->key.offset);
6293                     } else {
6294                         uint8_t *newrr = ExAllocatePoolWithTag(PagedPool, newlen, ALLOC_TAG), *rroff;
6295 
6296                         if (!newrr) {
6297                             ERR("out of memory\n");
6298                             return STATUS_INSUFFICIENT_RESOURCES;
6299                         }
6300 
6301                         TRACE("modifying (%I64x,%x,%I64x)\n", tp.item->key.obj_id, tp.item->key.obj_type, tp.item->key.offset);
6302 
6303                         if ((uint8_t*)rr > tp.item->data) {
6304                             RtlCopyMemory(newrr, tp.item->data, (uint8_t*)rr - tp.item->data);
6305                             rroff = newrr + ((uint8_t*)rr - tp.item->data);
6306                         } else {
6307                             rroff = newrr;
6308                         }
6309 
6310                         if ((uint8_t*)&rr->name[rr->n] < tp.item->data + tp.item->size)
6311                             RtlCopyMemory(rroff, &rr->name[rr->n], tp.item->size - ((uint8_t*)&rr->name[rr->n] - tp.item->data));
6312 
6313                         Status = insert_tree_item(Vcb, Vcb->root_root, tp.item->key.obj_id, tp.item->key.obj_type, tp.item->key.offset, newrr, newlen, NULL, Irp);
6314                         if (!NT_SUCCESS(Status)) {
6315                             ERR("insert_tree_item returned %08lx\n", Status);
6316                             ExFreePool(newrr);
6317                             return Status;
6318                         }
6319                     }
6320 
6321                     break;
6322                 }
6323 
6324                 if (len > itemlen) {
6325                     len -= itemlen;
6326                     rr = (ROOT_REF*)&rr->name[rr->n];
6327                 } else
6328                     break;
6329             } while (len > 0);
6330         }
6331     } else {
6332         WARN("could not find ROOT_REF entry for subvol %I64x in %I64x\n", searchkey.offset, searchkey.obj_id);
6333         return STATUS_NOT_FOUND;
6334     }
6335 
6336     return STATUS_SUCCESS;
6337 }
6338 
6339 #ifdef _MSC_VER
6340 #pragma warning(push)
6341 #pragma warning(suppress: 28194)
6342 #endif
add_root_ref(_In_ device_extension * Vcb,_In_ uint64_t subvolid,_In_ uint64_t parsubvolid,_In_ __drv_aliasesMem ROOT_REF * rr,_In_opt_ PIRP Irp)6343 static NTSTATUS add_root_ref(_In_ device_extension* Vcb, _In_ uint64_t subvolid, _In_ uint64_t parsubvolid, _In_ __drv_aliasesMem ROOT_REF* rr, _In_opt_ PIRP Irp) {
6344     KEY searchkey;
6345     traverse_ptr tp;
6346     NTSTATUS Status;
6347 
6348     searchkey.obj_id = parsubvolid;
6349     searchkey.obj_type = TYPE_ROOT_REF;
6350     searchkey.offset = subvolid;
6351 
6352     Status = find_item(Vcb, Vcb->root_root, &tp, &searchkey, false, Irp);
6353     if (!NT_SUCCESS(Status)) {
6354         ERR("error - find_item returned %08lx\n", Status);
6355         return Status;
6356     }
6357 
6358     if (!keycmp(searchkey, tp.item->key)) {
6359         uint16_t rrsize = tp.item->size + (uint16_t)offsetof(ROOT_REF, name[0]) + rr->n;
6360         uint8_t* rr2;
6361 
6362         rr2 = ExAllocatePoolWithTag(PagedPool, rrsize, ALLOC_TAG);
6363         if (!rr2) {
6364             ERR("out of memory\n");
6365             return STATUS_INSUFFICIENT_RESOURCES;
6366         }
6367 
6368         if (tp.item->size > 0)
6369             RtlCopyMemory(rr2, tp.item->data, tp.item->size);
6370 
6371         RtlCopyMemory(rr2 + tp.item->size, rr, offsetof(ROOT_REF, name[0]) + rr->n);
6372         ExFreePool(rr);
6373 
6374         Status = delete_tree_item(Vcb, &tp);
6375         if (!NT_SUCCESS(Status)) {
6376             ERR("delete_tree_item returned %08lx\n", Status);
6377             ExFreePool(rr2);
6378             return Status;
6379         }
6380 
6381         Status = insert_tree_item(Vcb, Vcb->root_root, searchkey.obj_id, searchkey.obj_type, searchkey.offset, rr2, rrsize, NULL, Irp);
6382         if (!NT_SUCCESS(Status)) {
6383             ERR("insert_tree_item returned %08lx\n", Status);
6384             ExFreePool(rr2);
6385             return Status;
6386         }
6387     } else {
6388         Status = insert_tree_item(Vcb, Vcb->root_root, searchkey.obj_id, searchkey.obj_type, searchkey.offset, rr, (uint16_t)offsetof(ROOT_REF, name[0]) + rr->n, NULL, Irp);
6389         if (!NT_SUCCESS(Status)) {
6390             ERR("insert_tree_item returned %08lx\n", Status);
6391             ExFreePool(rr);
6392             return Status;
6393         }
6394     }
6395 
6396     return STATUS_SUCCESS;
6397 }
6398 #ifdef _MSC_VER
6399 #pragma warning(pop)
6400 #endif
6401 
update_root_backref(device_extension * Vcb,uint64_t subvolid,uint64_t parsubvolid,PIRP Irp)6402 static NTSTATUS update_root_backref(device_extension* Vcb, uint64_t subvolid, uint64_t parsubvolid, PIRP Irp) {
6403     KEY searchkey;
6404     traverse_ptr tp;
6405     uint8_t* data;
6406     uint16_t datalen;
6407     NTSTATUS Status;
6408 
6409     searchkey.obj_id = parsubvolid;
6410     searchkey.obj_type = TYPE_ROOT_REF;
6411     searchkey.offset = subvolid;
6412 
6413     Status = find_item(Vcb, Vcb->root_root, &tp, &searchkey, false, Irp);
6414     if (!NT_SUCCESS(Status)) {
6415         ERR("error - find_item returned %08lx\n", Status);
6416         return Status;
6417     }
6418 
6419     if (!keycmp(tp.item->key, searchkey) && tp.item->size > 0) {
6420         datalen = tp.item->size;
6421 
6422         data = ExAllocatePoolWithTag(PagedPool, datalen, ALLOC_TAG);
6423         if (!data) {
6424             ERR("out of memory\n");
6425             return STATUS_INSUFFICIENT_RESOURCES;
6426         }
6427 
6428         RtlCopyMemory(data, tp.item->data, datalen);
6429     } else {
6430         datalen = 0;
6431         data = NULL;
6432     }
6433 
6434     searchkey.obj_id = subvolid;
6435     searchkey.obj_type = TYPE_ROOT_BACKREF;
6436     searchkey.offset = parsubvolid;
6437 
6438     Status = find_item(Vcb, Vcb->root_root, &tp, &searchkey, false, Irp);
6439     if (!NT_SUCCESS(Status)) {
6440         ERR("error - find_item returned %08lx\n", Status);
6441 
6442         if (datalen > 0)
6443             ExFreePool(data);
6444 
6445         return Status;
6446     }
6447 
6448     if (!keycmp(tp.item->key, searchkey)) {
6449         Status = delete_tree_item(Vcb, &tp);
6450         if (!NT_SUCCESS(Status)) {
6451             ERR("delete_tree_item returned %08lx\n", Status);
6452 
6453             if (datalen > 0)
6454                 ExFreePool(data);
6455 
6456             return Status;
6457         }
6458     }
6459 
6460     if (datalen > 0) {
6461         Status = insert_tree_item(Vcb, Vcb->root_root, subvolid, TYPE_ROOT_BACKREF, parsubvolid, data, datalen, NULL, Irp);
6462         if (!NT_SUCCESS(Status)) {
6463             ERR("insert_tree_item returned %08lx\n", Status);
6464             ExFreePool(data);
6465             return Status;
6466         }
6467     }
6468 
6469     return STATUS_SUCCESS;
6470 }
6471 
add_root_item_to_cache(device_extension * Vcb,uint64_t root,PIRP Irp)6472 static NTSTATUS add_root_item_to_cache(device_extension* Vcb, uint64_t root, PIRP Irp) {
6473     KEY searchkey;
6474     traverse_ptr tp;
6475     NTSTATUS Status;
6476 
6477     searchkey.obj_id = root;
6478     searchkey.obj_type = TYPE_ROOT_ITEM;
6479     searchkey.offset = 0xffffffffffffffff;
6480 
6481     Status = find_item(Vcb, Vcb->root_root, &tp, &searchkey, false, Irp);
6482     if (!NT_SUCCESS(Status)) {
6483         ERR("error - find_item returned %08lx\n", Status);
6484         return Status;
6485     }
6486 
6487     if (tp.item->key.obj_id != searchkey.obj_id || tp.item->key.obj_type != searchkey.obj_type) {
6488         ERR("could not find ROOT_ITEM for tree %I64x\n", searchkey.obj_id);
6489         return STATUS_INTERNAL_ERROR;
6490     }
6491 
6492     if (tp.item->size < sizeof(ROOT_ITEM)) { // if not full length, create new entry with new bits zeroed
6493         ROOT_ITEM* ri = ExAllocatePoolWithTag(PagedPool, sizeof(ROOT_ITEM), ALLOC_TAG);
6494         if (!ri) {
6495             ERR("out of memory\n");
6496             return STATUS_INSUFFICIENT_RESOURCES;
6497         }
6498 
6499         if (tp.item->size > 0)
6500             RtlCopyMemory(ri, tp.item->data, tp.item->size);
6501 
6502         RtlZeroMemory(((uint8_t*)ri) + tp.item->size, sizeof(ROOT_ITEM) - tp.item->size);
6503 
6504         Status = delete_tree_item(Vcb, &tp);
6505         if (!NT_SUCCESS(Status)) {
6506             ERR("delete_tree_item returned %08lx\n", Status);
6507             ExFreePool(ri);
6508             return Status;
6509         }
6510 
6511         Status = insert_tree_item(Vcb, Vcb->root_root, searchkey.obj_id, searchkey.obj_type, tp.item->key.offset, ri, sizeof(ROOT_ITEM), NULL, Irp);
6512         if (!NT_SUCCESS(Status)) {
6513             ERR("insert_tree_item returned %08lx\n", Status);
6514             ExFreePool(ri);
6515             return Status;
6516         }
6517     } else {
6518         tp.tree->write = true;
6519     }
6520 
6521     return STATUS_SUCCESS;
6522 }
6523 
flush_fileref(file_ref * fileref,LIST_ENTRY * batchlist,PIRP Irp)6524 static NTSTATUS flush_fileref(file_ref* fileref, LIST_ENTRY* batchlist, PIRP Irp) {
6525     NTSTATUS Status;
6526 
6527     // if fileref created and then immediately deleted, do nothing
6528     if (fileref->created && fileref->deleted) {
6529         fileref->dirty = false;
6530         return STATUS_SUCCESS;
6531     }
6532 
6533     if (fileref->fcb->ads) {
6534         fileref->dirty = false;
6535         return STATUS_SUCCESS;
6536     }
6537 
6538     if (fileref->created) {
6539         uint16_t disize;
6540         DIR_ITEM *di, *di2;
6541         uint32_t crc32;
6542 
6543         crc32 = calc_crc32c(0xfffffffe, (uint8_t*)fileref->dc->utf8.Buffer, fileref->dc->utf8.Length);
6544 
6545         disize = (uint16_t)(offsetof(DIR_ITEM, name[0]) + fileref->dc->utf8.Length);
6546         di = ExAllocatePoolWithTag(PagedPool, disize, ALLOC_TAG);
6547         if (!di) {
6548             ERR("out of memory\n");
6549             return STATUS_INSUFFICIENT_RESOURCES;
6550         }
6551 
6552         if (fileref->parent->fcb->subvol == fileref->fcb->subvol) {
6553             di->key.obj_id = fileref->fcb->inode;
6554             di->key.obj_type = TYPE_INODE_ITEM;
6555             di->key.offset = 0;
6556         } else { // subvolume
6557             di->key.obj_id = fileref->fcb->subvol->id;
6558             di->key.obj_type = TYPE_ROOT_ITEM;
6559             di->key.offset = 0xffffffffffffffff;
6560         }
6561 
6562         di->transid = fileref->fcb->Vcb->superblock.generation;
6563         di->m = 0;
6564         di->n = (uint16_t)fileref->dc->utf8.Length;
6565         di->type = fileref->fcb->type;
6566         RtlCopyMemory(di->name, fileref->dc->utf8.Buffer, fileref->dc->utf8.Length);
6567 
6568         di2 = ExAllocatePoolWithTag(PagedPool, disize, ALLOC_TAG);
6569         if (!di2) {
6570             ERR("out of memory\n");
6571             return STATUS_INSUFFICIENT_RESOURCES;
6572         }
6573 
6574         RtlCopyMemory(di2, di, disize);
6575 
6576         Status = insert_tree_item_batch(batchlist, fileref->fcb->Vcb, fileref->parent->fcb->subvol, fileref->parent->fcb->inode, TYPE_DIR_INDEX,
6577                                         fileref->dc->index, di, disize, Batch_Insert);
6578         if (!NT_SUCCESS(Status)) {
6579             ERR("insert_tree_item_batch returned %08lx\n", Status);
6580             return Status;
6581         }
6582 
6583         Status = insert_tree_item_batch(batchlist, fileref->fcb->Vcb, fileref->parent->fcb->subvol, fileref->parent->fcb->inode, TYPE_DIR_ITEM, crc32,
6584                                         di2, disize, Batch_DirItem);
6585         if (!NT_SUCCESS(Status)) {
6586             ERR("insert_tree_item_batch returned %08lx\n", Status);
6587             return Status;
6588         }
6589 
6590         if (fileref->parent->fcb->subvol == fileref->fcb->subvol) {
6591             INODE_REF* ir;
6592 
6593             ir = ExAllocatePoolWithTag(PagedPool, sizeof(INODE_REF) - 1 + fileref->dc->utf8.Length, ALLOC_TAG);
6594             if (!ir) {
6595                 ERR("out of memory\n");
6596                 return STATUS_INSUFFICIENT_RESOURCES;
6597             }
6598 
6599             ir->index = fileref->dc->index;
6600             ir->n = fileref->dc->utf8.Length;
6601             RtlCopyMemory(ir->name, fileref->dc->utf8.Buffer, ir->n);
6602 
6603             Status = insert_tree_item_batch(batchlist, fileref->fcb->Vcb, fileref->fcb->subvol, fileref->fcb->inode, TYPE_INODE_REF, fileref->parent->fcb->inode,
6604                                             ir, sizeof(INODE_REF) - 1 + ir->n, Batch_InodeRef);
6605             if (!NT_SUCCESS(Status)) {
6606                 ERR("insert_tree_item_batch returned %08lx\n", Status);
6607                 return Status;
6608             }
6609         } else if (fileref->fcb != fileref->fcb->Vcb->dummy_fcb) {
6610             ULONG rrlen;
6611             ROOT_REF* rr;
6612 
6613             rrlen = sizeof(ROOT_REF) - 1 + fileref->dc->utf8.Length;
6614 
6615             rr = ExAllocatePoolWithTag(PagedPool, rrlen, ALLOC_TAG);
6616             if (!rr) {
6617                 ERR("out of memory\n");
6618                 return STATUS_INSUFFICIENT_RESOURCES;
6619             }
6620 
6621             rr->dir = fileref->parent->fcb->inode;
6622             rr->index = fileref->dc->index;
6623             rr->n = fileref->dc->utf8.Length;
6624             RtlCopyMemory(rr->name, fileref->dc->utf8.Buffer, fileref->dc->utf8.Length);
6625 
6626             Status = add_root_ref(fileref->fcb->Vcb, fileref->fcb->subvol->id, fileref->parent->fcb->subvol->id, rr, Irp);
6627             if (!NT_SUCCESS(Status)) {
6628                 ERR("add_root_ref returned %08lx\n", Status);
6629                 return Status;
6630             }
6631 
6632             Status = update_root_backref(fileref->fcb->Vcb, fileref->fcb->subvol->id, fileref->parent->fcb->subvol->id, Irp);
6633             if (!NT_SUCCESS(Status)) {
6634                 ERR("update_root_backref returned %08lx\n", Status);
6635                 return Status;
6636             }
6637         }
6638 
6639         fileref->created = false;
6640     } else if (fileref->deleted) {
6641         uint32_t crc32;
6642         ANSI_STRING* name;
6643         DIR_ITEM* di;
6644 
6645         name = &fileref->oldutf8;
6646 
6647         crc32 = calc_crc32c(0xfffffffe, (uint8_t*)name->Buffer, name->Length);
6648 
6649         di = ExAllocatePoolWithTag(PagedPool, sizeof(DIR_ITEM) - 1 + name->Length, ALLOC_TAG);
6650         if (!di) {
6651             ERR("out of memory\n");
6652             return STATUS_INSUFFICIENT_RESOURCES;
6653         }
6654 
6655         di->m = 0;
6656         di->n = name->Length;
6657         RtlCopyMemory(di->name, name->Buffer, name->Length);
6658 
6659         // delete DIR_ITEM (0x54)
6660 
6661         Status = insert_tree_item_batch(batchlist, fileref->fcb->Vcb, fileref->parent->fcb->subvol, fileref->parent->fcb->inode, TYPE_DIR_ITEM,
6662                                         crc32, di, sizeof(DIR_ITEM) - 1 + name->Length, Batch_DeleteDirItem);
6663         if (!NT_SUCCESS(Status)) {
6664             ERR("insert_tree_item_batch returned %08lx\n", Status);
6665             return Status;
6666         }
6667 
6668         if (fileref->parent->fcb->subvol == fileref->fcb->subvol) {
6669             INODE_REF* ir;
6670 
6671             // delete INODE_REF (0xc)
6672 
6673             ir = ExAllocatePoolWithTag(PagedPool, sizeof(INODE_REF) - 1 + name->Length, ALLOC_TAG);
6674             if (!ir) {
6675                 ERR("out of memory\n");
6676                 return STATUS_INSUFFICIENT_RESOURCES;
6677             }
6678 
6679             ir->index = fileref->oldindex;
6680             ir->n = name->Length;
6681             RtlCopyMemory(ir->name, name->Buffer, name->Length);
6682 
6683             Status = insert_tree_item_batch(batchlist, fileref->fcb->Vcb, fileref->parent->fcb->subvol, fileref->fcb->inode, TYPE_INODE_REF,
6684                                             fileref->parent->fcb->inode, ir, sizeof(INODE_REF) - 1 + name->Length, Batch_DeleteInodeRef);
6685             if (!NT_SUCCESS(Status)) {
6686                 ERR("insert_tree_item_batch returned %08lx\n", Status);
6687                 return Status;
6688             }
6689         } else if (fileref->fcb != fileref->fcb->Vcb->dummy_fcb) { // subvolume
6690             Status = delete_root_ref(fileref->fcb->Vcb, fileref->fcb->subvol->id, fileref->parent->fcb->subvol->id, fileref->parent->fcb->inode, name, Irp);
6691             if (!NT_SUCCESS(Status)) {
6692                 ERR("delete_root_ref returned %08lx\n", Status);
6693                 return Status;
6694             }
6695 
6696             Status = update_root_backref(fileref->fcb->Vcb, fileref->fcb->subvol->id, fileref->parent->fcb->subvol->id, Irp);
6697             if (!NT_SUCCESS(Status)) {
6698                 ERR("update_root_backref returned %08lx\n", Status);
6699                 return Status;
6700             }
6701         }
6702 
6703         // delete DIR_INDEX (0x60)
6704 
6705         Status = insert_tree_item_batch(batchlist, fileref->fcb->Vcb, fileref->parent->fcb->subvol, fileref->parent->fcb->inode, TYPE_DIR_INDEX,
6706                                         fileref->oldindex, NULL, 0, Batch_Delete);
6707         if (!NT_SUCCESS(Status)) {
6708             ERR("insert_tree_item_batch returned %08lx\n", Status);
6709             return Status;
6710         }
6711 
6712         if (fileref->oldutf8.Buffer) {
6713             ExFreePool(fileref->oldutf8.Buffer);
6714             fileref->oldutf8.Buffer = NULL;
6715         }
6716     } else { // rename or change type
6717         PANSI_STRING oldutf8 = fileref->oldutf8.Buffer ? &fileref->oldutf8 : &fileref->dc->utf8;
6718         uint32_t crc32, oldcrc32;
6719         uint16_t disize;
6720         DIR_ITEM *olddi, *di, *di2;
6721 
6722         crc32 = calc_crc32c(0xfffffffe, (uint8_t*)fileref->dc->utf8.Buffer, fileref->dc->utf8.Length);
6723 
6724         if (!fileref->oldutf8.Buffer)
6725             oldcrc32 = crc32;
6726         else
6727             oldcrc32 = calc_crc32c(0xfffffffe, (uint8_t*)fileref->oldutf8.Buffer, fileref->oldutf8.Length);
6728 
6729         olddi = ExAllocatePoolWithTag(PagedPool, sizeof(DIR_ITEM) - 1 + oldutf8->Length, ALLOC_TAG);
6730         if (!olddi) {
6731             ERR("out of memory\n");
6732             return STATUS_INSUFFICIENT_RESOURCES;
6733         }
6734 
6735         olddi->m = 0;
6736         olddi->n = (uint16_t)oldutf8->Length;
6737         RtlCopyMemory(olddi->name, oldutf8->Buffer, oldutf8->Length);
6738 
6739         // delete DIR_ITEM (0x54)
6740 
6741         Status = insert_tree_item_batch(batchlist, fileref->fcb->Vcb, fileref->parent->fcb->subvol, fileref->parent->fcb->inode, TYPE_DIR_ITEM,
6742                                         oldcrc32, olddi, sizeof(DIR_ITEM) - 1 + oldutf8->Length, Batch_DeleteDirItem);
6743         if (!NT_SUCCESS(Status)) {
6744             ERR("insert_tree_item_batch returned %08lx\n", Status);
6745             ExFreePool(olddi);
6746             return Status;
6747         }
6748 
6749         // add DIR_ITEM (0x54)
6750 
6751         disize = (uint16_t)(offsetof(DIR_ITEM, name[0]) + fileref->dc->utf8.Length);
6752         di = ExAllocatePoolWithTag(PagedPool, disize, ALLOC_TAG);
6753         if (!di) {
6754             ERR("out of memory\n");
6755             return STATUS_INSUFFICIENT_RESOURCES;
6756         }
6757 
6758         di2 = ExAllocatePoolWithTag(PagedPool, disize, ALLOC_TAG);
6759         if (!di2) {
6760             ERR("out of memory\n");
6761             ExFreePool(di);
6762             return STATUS_INSUFFICIENT_RESOURCES;
6763         }
6764 
6765         if (fileref->dc)
6766             di->key = fileref->dc->key;
6767         else if (fileref->parent->fcb->subvol == fileref->fcb->subvol) {
6768             di->key.obj_id = fileref->fcb->inode;
6769             di->key.obj_type = TYPE_INODE_ITEM;
6770             di->key.offset = 0;
6771         } else { // subvolume
6772             di->key.obj_id = fileref->fcb->subvol->id;
6773             di->key.obj_type = TYPE_ROOT_ITEM;
6774             di->key.offset = 0xffffffffffffffff;
6775         }
6776 
6777         di->transid = fileref->fcb->Vcb->superblock.generation;
6778         di->m = 0;
6779         di->n = (uint16_t)fileref->dc->utf8.Length;
6780         di->type = fileref->fcb->type;
6781         RtlCopyMemory(di->name, fileref->dc->utf8.Buffer, fileref->dc->utf8.Length);
6782 
6783         RtlCopyMemory(di2, di, disize);
6784 
6785         Status = insert_tree_item_batch(batchlist, fileref->fcb->Vcb, fileref->parent->fcb->subvol, fileref->parent->fcb->inode, TYPE_DIR_ITEM, crc32,
6786                                         di, disize, Batch_DirItem);
6787         if (!NT_SUCCESS(Status)) {
6788             ERR("insert_tree_item_batch returned %08lx\n", Status);
6789             ExFreePool(di2);
6790             ExFreePool(di);
6791             return Status;
6792         }
6793 
6794         if (fileref->parent->fcb->subvol == fileref->fcb->subvol) {
6795             INODE_REF *ir, *ir2;
6796 
6797             // delete INODE_REF (0xc)
6798 
6799             ir = ExAllocatePoolWithTag(PagedPool, sizeof(INODE_REF) - 1 + oldutf8->Length, ALLOC_TAG);
6800             if (!ir) {
6801                 ERR("out of memory\n");
6802                 ExFreePool(di2);
6803                 return STATUS_INSUFFICIENT_RESOURCES;
6804             }
6805 
6806             ir->index = fileref->dc->index;
6807             ir->n = oldutf8->Length;
6808             RtlCopyMemory(ir->name, oldutf8->Buffer, ir->n);
6809 
6810             Status = insert_tree_item_batch(batchlist, fileref->fcb->Vcb, fileref->fcb->subvol, fileref->fcb->inode, TYPE_INODE_REF, fileref->parent->fcb->inode,
6811                                             ir, sizeof(INODE_REF) - 1 + ir->n, Batch_DeleteInodeRef);
6812             if (!NT_SUCCESS(Status)) {
6813                 ERR("insert_tree_item_batch returned %08lx\n", Status);
6814                 ExFreePool(ir);
6815                 ExFreePool(di2);
6816                 return Status;
6817             }
6818 
6819             // add INODE_REF (0xc)
6820 
6821             ir2 = ExAllocatePoolWithTag(PagedPool, sizeof(INODE_REF) - 1 + fileref->dc->utf8.Length, ALLOC_TAG);
6822             if (!ir2) {
6823                 ERR("out of memory\n");
6824                 ExFreePool(di2);
6825                 return STATUS_INSUFFICIENT_RESOURCES;
6826             }
6827 
6828             ir2->index = fileref->dc->index;
6829             ir2->n = fileref->dc->utf8.Length;
6830             RtlCopyMemory(ir2->name, fileref->dc->utf8.Buffer, ir2->n);
6831 
6832             Status = insert_tree_item_batch(batchlist, fileref->fcb->Vcb, fileref->fcb->subvol, fileref->fcb->inode, TYPE_INODE_REF, fileref->parent->fcb->inode,
6833                                             ir2, sizeof(INODE_REF) - 1 + ir2->n, Batch_InodeRef);
6834             if (!NT_SUCCESS(Status)) {
6835                 ERR("insert_tree_item_batch returned %08lx\n", Status);
6836                 ExFreePool(ir2);
6837                 ExFreePool(di2);
6838                 return Status;
6839             }
6840         } else if (fileref->fcb != fileref->fcb->Vcb->dummy_fcb) { // subvolume
6841             ULONG rrlen;
6842             ROOT_REF* rr;
6843 
6844             Status = delete_root_ref(fileref->fcb->Vcb, fileref->fcb->subvol->id, fileref->parent->fcb->subvol->id, fileref->parent->fcb->inode, oldutf8, Irp);
6845             if (!NT_SUCCESS(Status)) {
6846                 ERR("delete_root_ref returned %08lx\n", Status);
6847                 ExFreePool(di2);
6848                 return Status;
6849             }
6850 
6851             rrlen = sizeof(ROOT_REF) - 1 + fileref->dc->utf8.Length;
6852 
6853             rr = ExAllocatePoolWithTag(PagedPool, rrlen, ALLOC_TAG);
6854             if (!rr) {
6855                 ERR("out of memory\n");
6856                 ExFreePool(di2);
6857                 return STATUS_INSUFFICIENT_RESOURCES;
6858             }
6859 
6860             rr->dir = fileref->parent->fcb->inode;
6861             rr->index = fileref->dc->index;
6862             rr->n = fileref->dc->utf8.Length;
6863             RtlCopyMemory(rr->name, fileref->dc->utf8.Buffer, fileref->dc->utf8.Length);
6864 
6865             Status = add_root_ref(fileref->fcb->Vcb, fileref->fcb->subvol->id, fileref->parent->fcb->subvol->id, rr, Irp);
6866             if (!NT_SUCCESS(Status)) {
6867                 ERR("add_root_ref returned %08lx\n", Status);
6868                 ExFreePool(di2);
6869                 return Status;
6870             }
6871 
6872             Status = update_root_backref(fileref->fcb->Vcb, fileref->fcb->subvol->id, fileref->parent->fcb->subvol->id, Irp);
6873             if (!NT_SUCCESS(Status)) {
6874                 ERR("update_root_backref returned %08lx\n", Status);
6875                 ExFreePool(di2);
6876                 return Status;
6877             }
6878         }
6879 
6880         // delete DIR_INDEX (0x60)
6881 
6882         Status = insert_tree_item_batch(batchlist, fileref->fcb->Vcb, fileref->parent->fcb->subvol, fileref->parent->fcb->inode, TYPE_DIR_INDEX,
6883                                         fileref->dc->index, NULL, 0, Batch_Delete);
6884         if (!NT_SUCCESS(Status)) {
6885             ERR("insert_tree_item_batch returned %08lx\n", Status);
6886             ExFreePool(di2);
6887             return Status;
6888         }
6889 
6890         // add DIR_INDEX (0x60)
6891 
6892        Status = insert_tree_item_batch(batchlist, fileref->fcb->Vcb, fileref->parent->fcb->subvol, fileref->parent->fcb->inode, TYPE_DIR_INDEX,
6893                                        fileref->dc->index, di2, disize, Batch_Insert);
6894        if (!NT_SUCCESS(Status)) {
6895             ERR("insert_tree_item_batch returned %08lx\n", Status);
6896             ExFreePool(di2);
6897             return Status;
6898         }
6899 
6900         if (fileref->oldutf8.Buffer) {
6901             ExFreePool(fileref->oldutf8.Buffer);
6902             fileref->oldutf8.Buffer = NULL;
6903         }
6904     }
6905 
6906     fileref->dirty = false;
6907 
6908     return STATUS_SUCCESS;
6909 }
6910 
flush_disk_caches(device_extension * Vcb)6911 static void flush_disk_caches(device_extension* Vcb) {
6912     LIST_ENTRY* le;
6913     ioctl_context context;
6914     ULONG num;
6915 
6916     context.left = 0;
6917 
6918     le = Vcb->devices.Flink;
6919 
6920     while (le != &Vcb->devices) {
6921         device* dev = CONTAINING_RECORD(le, device, list_entry);
6922 
6923         if (dev->devobj && !dev->readonly && dev->can_flush)
6924             context.left++;
6925 
6926         le = le->Flink;
6927     }
6928 
6929     if (context.left == 0)
6930         return;
6931 
6932     num = 0;
6933 
6934     KeInitializeEvent(&context.Event, NotificationEvent, false);
6935 
6936     context.stripes = ExAllocatePoolWithTag(NonPagedPool, sizeof(ioctl_context_stripe) * context.left, ALLOC_TAG);
6937     if (!context.stripes) {
6938         ERR("out of memory\n");
6939         return;
6940     }
6941 
6942     RtlZeroMemory(context.stripes, sizeof(ioctl_context_stripe) * context.left);
6943 
6944     le = Vcb->devices.Flink;
6945 
6946     while (le != &Vcb->devices) {
6947         device* dev = CONTAINING_RECORD(le, device, list_entry);
6948 
6949         if (dev->devobj && !dev->readonly && dev->can_flush) {
6950             PIO_STACK_LOCATION IrpSp;
6951             ioctl_context_stripe* stripe = &context.stripes[num];
6952 
6953             RtlZeroMemory(&stripe->apte, sizeof(ATA_PASS_THROUGH_EX));
6954 
6955             stripe->apte.Length = sizeof(ATA_PASS_THROUGH_EX);
6956             stripe->apte.TimeOutValue = 5;
6957             stripe->apte.CurrentTaskFile[6] = IDE_COMMAND_FLUSH_CACHE;
6958 
6959             stripe->Irp = IoAllocateIrp(dev->devobj->StackSize, false);
6960 
6961             if (!stripe->Irp) {
6962                 ERR("IoAllocateIrp failed\n");
6963                 goto nextdev;
6964             }
6965 
6966             IrpSp = IoGetNextIrpStackLocation(stripe->Irp);
6967             IrpSp->MajorFunction = IRP_MJ_DEVICE_CONTROL;
6968             IrpSp->FileObject = dev->fileobj;
6969 
6970             IrpSp->Parameters.DeviceIoControl.IoControlCode = IOCTL_ATA_PASS_THROUGH;
6971             IrpSp->Parameters.DeviceIoControl.InputBufferLength = sizeof(ATA_PASS_THROUGH_EX);
6972             IrpSp->Parameters.DeviceIoControl.OutputBufferLength = sizeof(ATA_PASS_THROUGH_EX);
6973 
6974             stripe->Irp->AssociatedIrp.SystemBuffer = &stripe->apte;
6975             stripe->Irp->Flags |= IRP_BUFFERED_IO | IRP_INPUT_OPERATION;
6976             stripe->Irp->UserBuffer = &stripe->apte;
6977             stripe->Irp->UserIosb = &stripe->iosb;
6978 
6979             IoSetCompletionRoutine(stripe->Irp, ioctl_completion, &context, true, true, true);
6980 
6981             IoCallDriver(dev->devobj, stripe->Irp);
6982 
6983 nextdev:
6984             num++;
6985         }
6986 
6987         le = le->Flink;
6988     }
6989 
6990     KeWaitForSingleObject(&context.Event, Executive, KernelMode, false, NULL);
6991 
6992     for (unsigned int i = 0; i < num; i++) {
6993         if (context.stripes[i].Irp)
6994             IoFreeIrp(context.stripes[i].Irp);
6995     }
6996 
6997     ExFreePool(context.stripes);
6998 }
6999 
flush_changed_dev_stats(device_extension * Vcb,device * dev,PIRP Irp)7000 static NTSTATUS flush_changed_dev_stats(device_extension* Vcb, device* dev, PIRP Irp) {
7001     NTSTATUS Status;
7002     KEY searchkey;
7003     traverse_ptr tp;
7004     uint16_t statslen;
7005     uint64_t* stats;
7006 
7007     searchkey.obj_id = 0;
7008     searchkey.obj_type = TYPE_DEV_STATS;
7009     searchkey.offset = dev->devitem.dev_id;
7010 
7011     Status = find_item(Vcb, Vcb->dev_root, &tp, &searchkey, false, Irp);
7012     if (!NT_SUCCESS(Status)) {
7013         ERR("find_item returned %08lx\n", Status);
7014         return Status;
7015     }
7016 
7017     if (!keycmp(tp.item->key, searchkey)) {
7018         Status = delete_tree_item(Vcb, &tp);
7019         if (!NT_SUCCESS(Status)) {
7020             ERR("delete_tree_item returned %08lx\n", Status);
7021             return Status;
7022         }
7023     }
7024 
7025     statslen = sizeof(uint64_t) * 5;
7026     stats = ExAllocatePoolWithTag(PagedPool, statslen, ALLOC_TAG);
7027     if (!stats) {
7028         ERR("out of memory\n");
7029         return STATUS_INSUFFICIENT_RESOURCES;
7030     }
7031 
7032     RtlCopyMemory(stats, dev->stats, statslen);
7033 
7034     Status = insert_tree_item(Vcb, Vcb->dev_root, 0, TYPE_DEV_STATS, dev->devitem.dev_id, stats, statslen, NULL, Irp);
7035     if (!NT_SUCCESS(Status)) {
7036         ERR("insert_tree_item returned %08lx\n", Status);
7037         ExFreePool(stats);
7038         return Status;
7039     }
7040 
7041     return STATUS_SUCCESS;
7042 }
7043 
flush_subvol(device_extension * Vcb,root * r,PIRP Irp)7044 static NTSTATUS flush_subvol(device_extension* Vcb, root* r, PIRP Irp) {
7045     NTSTATUS Status;
7046 
7047     if (r != Vcb->root_root && r != Vcb->chunk_root) {
7048         KEY searchkey;
7049         traverse_ptr tp;
7050         ROOT_ITEM* ri;
7051 
7052         searchkey.obj_id = r->id;
7053         searchkey.obj_type = TYPE_ROOT_ITEM;
7054         searchkey.offset = 0xffffffffffffffff;
7055 
7056         Status = find_item(Vcb, Vcb->root_root, &tp, &searchkey, false, Irp);
7057         if (!NT_SUCCESS(Status)) {
7058             ERR("error - find_item returned %08lx\n", Status);
7059             return Status;
7060         }
7061 
7062         if (tp.item->key.obj_id != searchkey.obj_id || tp.item->key.obj_type != searchkey.obj_type) {
7063             ERR("could not find ROOT_ITEM for tree %I64x\n", searchkey.obj_id);
7064             return STATUS_INTERNAL_ERROR;
7065         }
7066 
7067         ri = ExAllocatePoolWithTag(PagedPool, sizeof(ROOT_ITEM), ALLOC_TAG);
7068         if (!ri) {
7069             ERR("out of memory\n");
7070             return STATUS_INSUFFICIENT_RESOURCES;
7071         }
7072 
7073         RtlCopyMemory(ri, &r->root_item, sizeof(ROOT_ITEM));
7074 
7075         Status = delete_tree_item(Vcb, &tp);
7076         if (!NT_SUCCESS(Status)) {
7077             ERR("delete_tree_item returned %08lx\n", Status);
7078             return Status;
7079         }
7080 
7081         Status = insert_tree_item(Vcb, Vcb->root_root, tp.item->key.obj_id, tp.item->key.obj_type, tp.item->key.offset, ri, sizeof(ROOT_ITEM), NULL, Irp);
7082         if (!NT_SUCCESS(Status)) {
7083             ERR("insert_tree_item returned %08lx\n", Status);
7084             return Status;
7085         }
7086     }
7087 
7088     if (r->received) {
7089         KEY searchkey;
7090         traverse_ptr tp;
7091 
7092         if (!Vcb->uuid_root) {
7093             root* uuid_root;
7094 
7095             TRACE("uuid root doesn't exist, creating it\n");
7096 
7097             Status = create_root(Vcb, BTRFS_ROOT_UUID, &uuid_root, false, 0, Irp);
7098 
7099             if (!NT_SUCCESS(Status)) {
7100                 ERR("create_root returned %08lx\n", Status);
7101                 return Status;
7102             }
7103 
7104             Vcb->uuid_root = uuid_root;
7105         }
7106 
7107         RtlCopyMemory(&searchkey.obj_id, &r->root_item.received_uuid, sizeof(uint64_t));
7108         searchkey.obj_type = TYPE_SUBVOL_REC_UUID;
7109         RtlCopyMemory(&searchkey.offset, &r->root_item.received_uuid.uuid[sizeof(uint64_t)], sizeof(uint64_t));
7110 
7111         Status = find_item(Vcb, Vcb->uuid_root, &tp, &searchkey, false, Irp);
7112         if (!NT_SUCCESS(Status)) {
7113             ERR("find_item returned %08lx\n", Status);
7114             return Status;
7115         }
7116 
7117         if (!keycmp(tp.item->key, searchkey)) {
7118             if (tp.item->size + sizeof(uint64_t) <= Vcb->superblock.node_size - sizeof(tree_header) - sizeof(leaf_node)) {
7119                 uint64_t* ids;
7120 
7121                 ids = ExAllocatePoolWithTag(PagedPool, tp.item->size + sizeof(uint64_t), ALLOC_TAG);
7122                 if (!ids) {
7123                     ERR("out of memory\n");
7124                     return STATUS_INSUFFICIENT_RESOURCES;
7125                 }
7126 
7127                 RtlCopyMemory(ids, tp.item->data, tp.item->size);
7128                 RtlCopyMemory((uint8_t*)ids + tp.item->size, &r->id, sizeof(uint64_t));
7129 
7130                 Status = delete_tree_item(Vcb, &tp);
7131                 if (!NT_SUCCESS(Status)) {
7132                     ERR("delete_tree_item returned %08lx\n", Status);
7133                     ExFreePool(ids);
7134                     return Status;
7135                 }
7136 
7137                 Status = insert_tree_item(Vcb, Vcb->uuid_root, searchkey.obj_id, searchkey.obj_type, searchkey.offset, ids, tp.item->size + sizeof(uint64_t), NULL, Irp);
7138                 if (!NT_SUCCESS(Status)) {
7139                     ERR("insert_tree_item returned %08lx\n", Status);
7140                     ExFreePool(ids);
7141                     return Status;
7142                 }
7143             }
7144         } else {
7145             uint64_t* root_num;
7146 
7147             root_num = ExAllocatePoolWithTag(PagedPool, sizeof(uint64_t), ALLOC_TAG);
7148             if (!root_num) {
7149                 ERR("out of memory\n");
7150                 return STATUS_INSUFFICIENT_RESOURCES;
7151             }
7152 
7153             *root_num = r->id;
7154 
7155             Status = insert_tree_item(Vcb, Vcb->uuid_root, searchkey.obj_id, searchkey.obj_type, searchkey.offset, root_num, sizeof(uint64_t), NULL, Irp);
7156             if (!NT_SUCCESS(Status)) {
7157                 ERR("insert_tree_item returned %08lx\n", Status);
7158                 ExFreePool(root_num);
7159                 return Status;
7160             }
7161         }
7162 
7163         r->received = false;
7164     }
7165 
7166     r->dirty = false;
7167 
7168     return STATUS_SUCCESS;
7169 }
7170 
test_not_full(device_extension * Vcb)7171 static NTSTATUS test_not_full(device_extension* Vcb) {
7172     uint64_t reserve, could_alloc, free_space;
7173     LIST_ENTRY* le;
7174 
7175     // This function ensures we drop into readonly mode if we're about to leave very little
7176     // space for metadata - this is similar to the "global reserve" of the Linux driver.
7177     // Otherwise we might completely fill our space, at which point due to COW we can't
7178     // delete anything in order to fix this.
7179 
7180     reserve = Vcb->extent_root->root_item.bytes_used;
7181     reserve += Vcb->root_root->root_item.bytes_used;
7182     if (Vcb->checksum_root) reserve += Vcb->checksum_root->root_item.bytes_used;
7183 
7184     reserve = max(reserve, 0x1000000); // 16 M
7185     reserve = min(reserve, 0x20000000); // 512 M
7186 
7187     // Find out how much space would be available for new metadata chunks
7188 
7189     could_alloc = 0;
7190 
7191     if (Vcb->metadata_flags & BLOCK_FLAG_RAID5) {
7192         uint64_t s1 = 0, s2 = 0, s3 = 0;
7193 
7194         le = Vcb->devices.Flink;
7195         while (le != &Vcb->devices) {
7196             device* dev = CONTAINING_RECORD(le, device, list_entry);
7197 
7198             if (!dev->readonly) {
7199                 uint64_t space = dev->devitem.num_bytes - dev->devitem.bytes_used;
7200 
7201                 if (space >= s1) {
7202                     s3 = s2;
7203                     s2 = s1;
7204                     s1 = space;
7205                 } else if (space >= s2) {
7206                     s3 = s2;
7207                     s2 = space;
7208                 } else if (space >= s3)
7209                     s3 = space;
7210             }
7211 
7212             le = le->Flink;
7213         }
7214 
7215         could_alloc = s3 * 2;
7216     } else if (Vcb->metadata_flags & (BLOCK_FLAG_RAID10 | BLOCK_FLAG_RAID6)) {
7217         uint64_t s1 = 0, s2 = 0, s3 = 0, s4 = 0;
7218 
7219         le = Vcb->devices.Flink;
7220         while (le != &Vcb->devices) {
7221             device* dev = CONTAINING_RECORD(le, device, list_entry);
7222 
7223             if (!dev->readonly) {
7224                 uint64_t space = dev->devitem.num_bytes - dev->devitem.bytes_used;
7225 
7226                 if (space >= s1) {
7227                     s4 = s3;
7228                     s3 = s2;
7229                     s2 = s1;
7230                     s1 = space;
7231                 } else if (space >= s2) {
7232                     s4 = s3;
7233                     s3 = s2;
7234                     s2 = space;
7235                 } else if (space >= s3) {
7236                     s4 = s3;
7237                     s3 = space;
7238                 } else if (space >= s4)
7239                     s4 = space;
7240             }
7241 
7242             le = le->Flink;
7243         }
7244 
7245         could_alloc = s4 * 2;
7246     } else if (Vcb->metadata_flags & (BLOCK_FLAG_RAID0 | BLOCK_FLAG_RAID1)) {
7247         uint64_t s1 = 0, s2 = 0;
7248 
7249         le = Vcb->devices.Flink;
7250         while (le != &Vcb->devices) {
7251             device* dev = CONTAINING_RECORD(le, device, list_entry);
7252 
7253             if (!dev->readonly) {
7254                 uint64_t space = dev->devitem.num_bytes - dev->devitem.bytes_used;
7255 
7256                 if (space >= s1) {
7257                     s2 = s1;
7258                     s1 = space;
7259                 } else if (space >= s2)
7260                     s2 = space;
7261             }
7262 
7263             le = le->Flink;
7264         }
7265 
7266         if (Vcb->metadata_flags & BLOCK_FLAG_RAID1)
7267             could_alloc = s2;
7268         else // RAID0
7269             could_alloc = s2 * 2;
7270     } else if (Vcb->metadata_flags & BLOCK_FLAG_DUPLICATE) {
7271         le = Vcb->devices.Flink;
7272         while (le != &Vcb->devices) {
7273             device* dev = CONTAINING_RECORD(le, device, list_entry);
7274 
7275             if (!dev->readonly) {
7276                 uint64_t space = (dev->devitem.num_bytes - dev->devitem.bytes_used) / 2;
7277 
7278                 could_alloc = max(could_alloc, space);
7279             }
7280 
7281             le = le->Flink;
7282         }
7283     } else if (Vcb->metadata_flags & BLOCK_FLAG_RAID1C3) {
7284         uint64_t s1 = 0, s2 = 0, s3 = 0;
7285 
7286         le = Vcb->devices.Flink;
7287         while (le != &Vcb->devices) {
7288             device* dev = CONTAINING_RECORD(le, device, list_entry);
7289 
7290             if (!dev->readonly) {
7291                 uint64_t space = dev->devitem.num_bytes - dev->devitem.bytes_used;
7292 
7293                 if (space >= s1) {
7294                     s3 = s2;
7295                     s2 = s1;
7296                     s1 = space;
7297                 } else if (space >= s2) {
7298                     s3 = s2;
7299                     s2 = space;
7300                 } else if (space >= s3)
7301                     s3 = space;
7302             }
7303 
7304             le = le->Flink;
7305         }
7306 
7307         could_alloc = s3;
7308     } else if (Vcb->metadata_flags & BLOCK_FLAG_RAID1C4) {
7309         uint64_t s1 = 0, s2 = 0, s3 = 0, s4 = 0;
7310 
7311         le = Vcb->devices.Flink;
7312         while (le != &Vcb->devices) {
7313             device* dev = CONTAINING_RECORD(le, device, list_entry);
7314 
7315             if (!dev->readonly) {
7316                 uint64_t space = dev->devitem.num_bytes - dev->devitem.bytes_used;
7317 
7318                 if (space >= s1) {
7319                     s4 = s3;
7320                     s3 = s2;
7321                     s2 = s1;
7322                     s1 = space;
7323                 } else if (space >= s2) {
7324                     s4 = s3;
7325                     s3 = s2;
7326                     s2 = space;
7327                 } else if (space >= s3) {
7328                     s4 = s3;
7329                     s3 = space;
7330                 } else if (space >= s4)
7331                     s4 = space;
7332             }
7333 
7334             le = le->Flink;
7335         }
7336 
7337         could_alloc = s4;
7338     } else { // SINGLE
7339         le = Vcb->devices.Flink;
7340         while (le != &Vcb->devices) {
7341             device* dev = CONTAINING_RECORD(le, device, list_entry);
7342 
7343             if (!dev->readonly) {
7344                 uint64_t space = dev->devitem.num_bytes - dev->devitem.bytes_used;
7345 
7346                 could_alloc = max(could_alloc, space);
7347             }
7348 
7349             le = le->Flink;
7350         }
7351     }
7352 
7353     if (could_alloc >= reserve)
7354         return STATUS_SUCCESS;
7355 
7356     free_space = 0;
7357 
7358     le = Vcb->chunks.Flink;
7359     while (le != &Vcb->chunks) {
7360         chunk* c = CONTAINING_RECORD(le, chunk, list_entry);
7361 
7362         if (!c->reloc && !c->readonly && c->chunk_item->type & BLOCK_FLAG_METADATA) {
7363             free_space += c->chunk_item->size - c->used;
7364 
7365             if (free_space + could_alloc >= reserve)
7366                 return STATUS_SUCCESS;
7367         }
7368 
7369         le = le->Flink;
7370     }
7371 
7372     return STATUS_DISK_FULL;
7373 }
7374 
check_for_orphans_root(device_extension * Vcb,root * r,PIRP Irp)7375 static NTSTATUS check_for_orphans_root(device_extension* Vcb, root* r, PIRP Irp) {
7376     NTSTATUS Status;
7377     KEY searchkey;
7378     traverse_ptr tp;
7379     LIST_ENTRY rollback;
7380 
7381     TRACE("(%p, %p)\n", Vcb, r);
7382 
7383     InitializeListHead(&rollback);
7384 
7385     searchkey.obj_id = BTRFS_ORPHAN_INODE_OBJID;
7386     searchkey.obj_type = TYPE_ORPHAN_INODE;
7387     searchkey.offset = 0;
7388 
7389     Status = find_item(Vcb, r, &tp, &searchkey, false, Irp);
7390     if (!NT_SUCCESS(Status)) {
7391         ERR("find_item returned %08lx\n", Status);
7392         return Status;
7393     }
7394 
7395     do {
7396         traverse_ptr next_tp;
7397 
7398         if (tp.item->key.obj_id > searchkey.obj_id || (tp.item->key.obj_id == searchkey.obj_id && tp.item->key.obj_type > searchkey.obj_type))
7399             break;
7400 
7401         if (tp.item->key.obj_id == searchkey.obj_id && tp.item->key.obj_type == searchkey.obj_type) {
7402             fcb* fcb;
7403 
7404             TRACE("removing orphaned inode %I64x\n", tp.item->key.offset);
7405 
7406             Status = open_fcb(Vcb, r, tp.item->key.offset, 0, NULL, false, NULL, &fcb, PagedPool, Irp);
7407             if (!NT_SUCCESS(Status))
7408                 ERR("open_fcb returned %08lx\n", Status);
7409             else {
7410                 if (fcb->inode_item.st_nlink == 0) {
7411                     if (fcb->type != BTRFS_TYPE_DIRECTORY && fcb->inode_item.st_size > 0) {
7412                         Status = excise_extents(Vcb, fcb, 0, sector_align(fcb->inode_item.st_size, Vcb->superblock.sector_size), Irp, &rollback);
7413                         if (!NT_SUCCESS(Status)) {
7414                             ERR("excise_extents returned %08lx\n", Status);
7415                             goto end;
7416                         }
7417                     }
7418 
7419                     fcb->deleted = true;
7420 
7421                     mark_fcb_dirty(fcb);
7422                 }
7423 
7424                 free_fcb(fcb);
7425 
7426                 Status = delete_tree_item(Vcb, &tp);
7427                 if (!NT_SUCCESS(Status)) {
7428                     ERR("delete_tree_item returned %08lx\n", Status);
7429                     goto end;
7430                 }
7431             }
7432         }
7433 
7434         if (find_next_item(Vcb, &tp, &next_tp, false, Irp))
7435             tp = next_tp;
7436         else
7437             break;
7438     } while (true);
7439 
7440     Status = STATUS_SUCCESS;
7441 
7442     clear_rollback(&rollback);
7443 
7444 end:
7445     do_rollback(Vcb, &rollback);
7446 
7447     return Status;
7448 }
7449 
check_for_orphans(device_extension * Vcb,PIRP Irp)7450 static NTSTATUS check_for_orphans(device_extension* Vcb, PIRP Irp) {
7451     NTSTATUS Status;
7452     LIST_ENTRY* le;
7453 
7454     if (IsListEmpty(&Vcb->dirty_filerefs))
7455         return STATUS_SUCCESS;
7456 
7457     le = Vcb->dirty_filerefs.Flink;
7458     while (le != &Vcb->dirty_filerefs) {
7459         file_ref* fr = CONTAINING_RECORD(le, file_ref, list_entry_dirty);
7460 
7461         if (!fr->fcb->subvol->checked_for_orphans) {
7462             Status = check_for_orphans_root(Vcb, fr->fcb->subvol, Irp);
7463             if (!NT_SUCCESS(Status)) {
7464                 ERR("check_for_orphans_root returned %08lx\n", Status);
7465                 return Status;
7466             }
7467 
7468             fr->fcb->subvol->checked_for_orphans = true;
7469         }
7470 
7471         le = le->Flink;
7472     }
7473 
7474     return STATUS_SUCCESS;
7475 }
7476 
do_write2(device_extension * Vcb,PIRP Irp,LIST_ENTRY * rollback)7477 static NTSTATUS do_write2(device_extension* Vcb, PIRP Irp, LIST_ENTRY* rollback) {
7478     NTSTATUS Status;
7479     LIST_ENTRY *le, batchlist;
7480     bool cache_changed = false;
7481     volume_device_extension* vde;
7482     bool no_cache = false;
7483 #ifdef DEBUG_FLUSH_TIMES
7484     uint64_t filerefs = 0, fcbs = 0;
7485     LARGE_INTEGER freq, time1, time2;
7486 #endif
7487 #ifdef DEBUG_WRITE_LOOPS
7488     UINT loops = 0;
7489 #endif
7490 
7491     TRACE("(%p)\n", Vcb);
7492 
7493     InitializeListHead(&batchlist);
7494 
7495 #ifdef DEBUG_FLUSH_TIMES
7496     time1 = KeQueryPerformanceCounter(&freq);
7497 #endif
7498 
7499     Status = check_for_orphans(Vcb, Irp);
7500     if (!NT_SUCCESS(Status)) {
7501         ERR("check_for_orphans returned %08lx\n", Status);
7502         return Status;
7503     }
7504 
7505     ExAcquireResourceExclusiveLite(&Vcb->dirty_filerefs_lock, true);
7506 
7507     while (!IsListEmpty(&Vcb->dirty_filerefs)) {
7508         file_ref* fr = CONTAINING_RECORD(RemoveHeadList(&Vcb->dirty_filerefs), file_ref, list_entry_dirty);
7509 
7510         flush_fileref(fr, &batchlist, Irp);
7511         free_fileref(fr);
7512 
7513 #ifdef DEBUG_FLUSH_TIMES
7514         filerefs++;
7515 #endif
7516     }
7517 
7518     ExReleaseResourceLite(&Vcb->dirty_filerefs_lock);
7519 
7520     Status = commit_batch_list(Vcb, &batchlist, Irp);
7521     if (!NT_SUCCESS(Status)) {
7522         ERR("commit_batch_list returned %08lx\n", Status);
7523         return Status;
7524     }
7525 
7526 #ifdef DEBUG_FLUSH_TIMES
7527     time2 = KeQueryPerformanceCounter(NULL);
7528 
7529     ERR("flushed %I64u filerefs in %I64u (freq = %I64u)\n", filerefs, time2.QuadPart - time1.QuadPart, freq.QuadPart);
7530 
7531     time1 = KeQueryPerformanceCounter(&freq);
7532 #endif
7533 
7534     // We process deleted streams first, so we don't run over our xattr
7535     // limit unless we absolutely have to.
7536     // We also process deleted normal files, to avoid any problems
7537     // caused by inode collisions.
7538 
7539     ExAcquireResourceExclusiveLite(&Vcb->dirty_fcbs_lock, true);
7540 
7541     le = Vcb->dirty_fcbs.Flink;
7542     while (le != &Vcb->dirty_fcbs) {
7543         fcb* fcb = CONTAINING_RECORD(le, struct _fcb, list_entry_dirty);
7544         LIST_ENTRY* le2 = le->Flink;
7545 
7546         if (fcb->deleted) {
7547             ExAcquireResourceExclusiveLite(fcb->Header.Resource, true);
7548             Status = flush_fcb(fcb, false, &batchlist, Irp);
7549             ExReleaseResourceLite(fcb->Header.Resource);
7550 
7551             free_fcb(fcb);
7552 
7553             if (!NT_SUCCESS(Status)) {
7554                 ERR("flush_fcb returned %08lx\n", Status);
7555                 clear_batch_list(Vcb, &batchlist);
7556                 ExReleaseResourceLite(&Vcb->dirty_fcbs_lock);
7557                 return Status;
7558             }
7559 
7560 #ifdef DEBUG_FLUSH_TIMES
7561             fcbs++;
7562 #endif
7563         }
7564 
7565         le = le2;
7566     }
7567 
7568     Status = commit_batch_list(Vcb, &batchlist, Irp);
7569     if (!NT_SUCCESS(Status)) {
7570         ERR("commit_batch_list returned %08lx\n", Status);
7571         ExReleaseResourceLite(&Vcb->dirty_fcbs_lock);
7572         return Status;
7573     }
7574 
7575     le = Vcb->dirty_fcbs.Flink;
7576     while (le != &Vcb->dirty_fcbs) {
7577         fcb* fcb = CONTAINING_RECORD(le, struct _fcb, list_entry_dirty);
7578         LIST_ENTRY* le2 = le->Flink;
7579 
7580         if (fcb->subvol != Vcb->root_root) {
7581             ExAcquireResourceExclusiveLite(fcb->Header.Resource, true);
7582             Status = flush_fcb(fcb, false, &batchlist, Irp);
7583             ExReleaseResourceLite(fcb->Header.Resource);
7584             free_fcb(fcb);
7585 
7586             if (!NT_SUCCESS(Status)) {
7587                 ERR("flush_fcb returned %08lx\n", Status);
7588                 ExReleaseResourceLite(&Vcb->dirty_fcbs_lock);
7589                 return Status;
7590             }
7591 
7592 #ifdef DEBUG_FLUSH_TIMES
7593             fcbs++;
7594 #endif
7595         }
7596 
7597         le = le2;
7598     }
7599 
7600     ExReleaseResourceLite(&Vcb->dirty_fcbs_lock);
7601 
7602     Status = commit_batch_list(Vcb, &batchlist, Irp);
7603     if (!NT_SUCCESS(Status)) {
7604         ERR("commit_batch_list returned %08lx\n", Status);
7605         return Status;
7606     }
7607 
7608 #ifdef DEBUG_FLUSH_TIMES
7609     time2 = KeQueryPerformanceCounter(NULL);
7610 
7611     ERR("flushed %I64u fcbs in %I64u (freq = %I64u)\n", filerefs, time2.QuadPart - time1.QuadPart, freq.QuadPart);
7612 #endif
7613 
7614     // no need to get dirty_subvols_lock here, as we have tree_lock exclusively
7615     while (!IsListEmpty(&Vcb->dirty_subvols)) {
7616         root* r = CONTAINING_RECORD(RemoveHeadList(&Vcb->dirty_subvols), root, list_entry_dirty);
7617 
7618         Status = flush_subvol(Vcb, r, Irp);
7619         if (!NT_SUCCESS(Status)) {
7620             ERR("flush_subvol returned %08lx\n", Status);
7621             return Status;
7622         }
7623     }
7624 
7625     if (!IsListEmpty(&Vcb->drop_roots)) {
7626         Status = drop_roots(Vcb, Irp, rollback);
7627 
7628         if (!NT_SUCCESS(Status)) {
7629             ERR("drop_roots returned %08lx\n", Status);
7630             return Status;
7631         }
7632     }
7633 
7634     Status = update_chunks(Vcb, &batchlist, Irp, rollback);
7635 
7636     if (!NT_SUCCESS(Status)) {
7637         ERR("update_chunks returned %08lx\n", Status);
7638         return Status;
7639     }
7640 
7641     Status = commit_batch_list(Vcb, &batchlist, Irp);
7642 
7643     // If only changing superblock, e.g. changing label, we still need to rewrite
7644     // the root tree so the generations match, otherwise you won't be able to mount on Linux.
7645     if (!Vcb->root_root->treeholder.tree || !Vcb->root_root->treeholder.tree->write) {
7646         KEY searchkey;
7647 
7648         traverse_ptr tp;
7649 
7650         searchkey.obj_id = 0;
7651         searchkey.obj_type = 0;
7652         searchkey.offset = 0;
7653 
7654         Status = find_item(Vcb, Vcb->root_root, &tp, &searchkey, false, Irp);
7655         if (!NT_SUCCESS(Status)) {
7656             ERR("error - find_item returned %08lx\n", Status);
7657             return Status;
7658         }
7659 
7660         Vcb->root_root->treeholder.tree->write = true;
7661     }
7662 
7663     // make sure we always update the extent tree
7664     Status = add_root_item_to_cache(Vcb, BTRFS_ROOT_EXTENT, Irp);
7665     if (!NT_SUCCESS(Status)) {
7666         ERR("add_root_item_to_cache returned %08lx\n", Status);
7667         return Status;
7668     }
7669 
7670     if (Vcb->stats_changed) {
7671         le = Vcb->devices.Flink;
7672         while (le != &Vcb->devices) {
7673             device* dev = CONTAINING_RECORD(le, device, list_entry);
7674 
7675             if (dev->stats_changed) {
7676                 Status = flush_changed_dev_stats(Vcb, dev, Irp);
7677                 if (!NT_SUCCESS(Status)) {
7678                     ERR("flush_changed_dev_stats returned %08lx\n", Status);
7679                     return Status;
7680                 }
7681                 dev->stats_changed = false;
7682             }
7683 
7684             le = le->Flink;
7685         }
7686 
7687         Vcb->stats_changed = false;
7688     }
7689 
7690     do {
7691         Status = add_parents(Vcb, Irp);
7692         if (!NT_SUCCESS(Status)) {
7693             ERR("add_parents returned %08lx\n", Status);
7694             goto end;
7695         }
7696 
7697         Status = allocate_tree_extents(Vcb, Irp, rollback);
7698         if (!NT_SUCCESS(Status)) {
7699             ERR("allocate_tree_extents returned %08lx\n", Status);
7700             goto end;
7701         }
7702 
7703         Status = do_splits(Vcb, Irp, rollback);
7704         if (!NT_SUCCESS(Status)) {
7705             ERR("do_splits returned %08lx\n", Status);
7706             goto end;
7707         }
7708 
7709         Status = update_chunk_usage(Vcb, Irp, rollback);
7710         if (!NT_SUCCESS(Status)) {
7711             ERR("update_chunk_usage returned %08lx\n", Status);
7712             goto end;
7713         }
7714 
7715         if (!(Vcb->superblock.compat_ro_flags & BTRFS_COMPAT_RO_FLAGS_FREE_SPACE_CACHE)) {
7716             if (!no_cache) {
7717                 Status = allocate_cache(Vcb, &cache_changed, Irp, rollback);
7718                 if (!NT_SUCCESS(Status)) {
7719                     WARN("allocate_cache returned %08lx\n", Status);
7720                     no_cache = true;
7721                     cache_changed = false;
7722                 }
7723             }
7724         } else {
7725             Status = update_chunk_caches_tree(Vcb, Irp);
7726             if (!NT_SUCCESS(Status)) {
7727                 ERR("update_chunk_caches_tree returned %08lx\n", Status);
7728                 goto end;
7729             }
7730         }
7731 
7732 #ifdef DEBUG_WRITE_LOOPS
7733         loops++;
7734 
7735         if (cache_changed)
7736             ERR("cache has changed, looping again\n");
7737 #endif
7738     } while (cache_changed || !trees_consistent(Vcb));
7739 
7740 #ifdef DEBUG_WRITE_LOOPS
7741     ERR("%u loops\n", loops);
7742 #endif
7743 
7744     TRACE("trees consistent\n");
7745 
7746     Status = update_root_root(Vcb, no_cache, Irp, rollback);
7747     if (!NT_SUCCESS(Status)) {
7748         ERR("update_root_root returned %08lx\n", Status);
7749         goto end;
7750     }
7751 
7752     Status = write_trees(Vcb, Irp);
7753     if (!NT_SUCCESS(Status)) {
7754         ERR("write_trees returned %08lx\n", Status);
7755         goto end;
7756     }
7757 
7758     Status = test_not_full(Vcb);
7759     if (!NT_SUCCESS(Status)) {
7760         ERR("test_not_full returned %08lx\n", Status);
7761         goto end;
7762     }
7763 
7764 #ifdef DEBUG_PARANOID
7765     le = Vcb->trees.Flink;
7766     while (le != &Vcb->trees) {
7767         tree* t = CONTAINING_RECORD(le, tree, list_entry);
7768         KEY searchkey;
7769         traverse_ptr tp;
7770 
7771         searchkey.obj_id = t->header.address;
7772         searchkey.obj_type = TYPE_METADATA_ITEM;
7773         searchkey.offset = 0xffffffffffffffff;
7774 
7775         Status = find_item(Vcb, Vcb->extent_root, &tp, &searchkey, false, Irp);
7776         if (!NT_SUCCESS(Status)) {
7777             ERR("error - find_item returned %08lx\n", Status);
7778             goto end;
7779         }
7780 
7781         if (tp.item->key.obj_id != searchkey.obj_id || tp.item->key.obj_type != searchkey.obj_type) {
7782             searchkey.obj_id = t->header.address;
7783             searchkey.obj_type = TYPE_EXTENT_ITEM;
7784             searchkey.offset = 0xffffffffffffffff;
7785 
7786             Status = find_item(Vcb, Vcb->extent_root, &tp, &searchkey, false, Irp);
7787             if (!NT_SUCCESS(Status)) {
7788                 ERR("error - find_item returned %08lx\n", Status);
7789                 goto end;
7790             }
7791 
7792             if (tp.item->key.obj_id != searchkey.obj_id || tp.item->key.obj_type != searchkey.obj_type) {
7793                 ERR("error - could not find entry in extent tree for tree at %I64x\n", t->header.address);
7794                 Status = STATUS_INTERNAL_ERROR;
7795                 goto end;
7796             }
7797         }
7798 
7799         le = le->Flink;
7800     }
7801 #endif
7802 
7803     Vcb->superblock.cache_generation = Vcb->superblock.generation;
7804 
7805     if (!Vcb->options.no_barrier)
7806         flush_disk_caches(Vcb);
7807 
7808     Status = write_superblocks(Vcb, Irp);
7809     if (!NT_SUCCESS(Status)) {
7810         ERR("write_superblocks returned %08lx\n", Status);
7811         goto end;
7812     }
7813 
7814     vde = Vcb->vde;
7815 
7816     if (vde) {
7817         pdo_device_extension* pdode = vde->pdode;
7818 
7819         ExAcquireResourceSharedLite(&pdode->child_lock, true);
7820 
7821         le = pdode->children.Flink;
7822 
7823         while (le != &pdode->children) {
7824             volume_child* vc = CONTAINING_RECORD(le, volume_child, list_entry);
7825 
7826             vc->generation = Vcb->superblock.generation;
7827             le = le->Flink;
7828         }
7829 
7830         ExReleaseResourceLite(&pdode->child_lock);
7831     }
7832 
7833     clean_space_cache(Vcb);
7834 
7835     le = Vcb->chunks.Flink;
7836     while (le != &Vcb->chunks) {
7837         chunk* c = CONTAINING_RECORD(le, chunk, list_entry);
7838 
7839         c->changed = false;
7840         c->space_changed = false;
7841 
7842         le = le->Flink;
7843     }
7844 
7845     Vcb->superblock.generation++;
7846 
7847     Status = STATUS_SUCCESS;
7848 
7849     le = Vcb->trees.Flink;
7850     while (le != &Vcb->trees) {
7851         tree* t = CONTAINING_RECORD(le, tree, list_entry);
7852 
7853         t->write = false;
7854 
7855         le = le->Flink;
7856     }
7857 
7858     Vcb->need_write = false;
7859 
7860     while (!IsListEmpty(&Vcb->drop_roots)) {
7861         root* r = CONTAINING_RECORD(RemoveHeadList(&Vcb->drop_roots), root, list_entry);
7862 
7863         if (IsListEmpty(&r->fcbs)) {
7864             ExDeleteResourceLite(&r->nonpaged->load_tree_lock);
7865             ExFreePool(r->nonpaged);
7866             ExFreePool(r);
7867         } else
7868             r->dropped = true;
7869     }
7870 
7871 end:
7872     TRACE("do_write returning %08lx\n", Status);
7873 
7874     return Status;
7875 }
7876 
do_write(device_extension * Vcb,PIRP Irp)7877 NTSTATUS do_write(device_extension* Vcb, PIRP Irp) {
7878     LIST_ENTRY rollback;
7879     NTSTATUS Status;
7880 
7881     InitializeListHead(&rollback);
7882 
7883     Status = do_write2(Vcb, Irp, &rollback);
7884 
7885     if (!NT_SUCCESS(Status)) {
7886         ERR("do_write2 returned %08lx, dropping into readonly mode\n", Status);
7887         Vcb->readonly = true;
7888         FsRtlNotifyVolumeEvent(Vcb->root_file, FSRTL_VOLUME_FORCED_CLOSED);
7889         do_rollback(Vcb, &rollback);
7890     } else
7891         clear_rollback(&rollback);
7892 
7893     return Status;
7894 }
7895 
do_flush(device_extension * Vcb)7896 static void do_flush(device_extension* Vcb) {
7897     NTSTATUS Status;
7898 
7899     ExAcquireResourceExclusiveLite(&Vcb->tree_lock, true);
7900 
7901     if (Vcb->need_write && !Vcb->readonly)
7902         Status = do_write(Vcb, NULL);
7903     else
7904         Status = STATUS_SUCCESS;
7905 
7906     free_trees(Vcb);
7907 
7908     if (!NT_SUCCESS(Status))
7909         ERR("do_write returned %08lx\n", Status);
7910 
7911     ExReleaseResourceLite(&Vcb->tree_lock);
7912 }
7913 
_Function_class_(KSTART_ROUTINE)7914 _Function_class_(KSTART_ROUTINE)
7915 void __stdcall flush_thread(void* context) {
7916     DEVICE_OBJECT* devobj = context;
7917     device_extension* Vcb = devobj->DeviceExtension;
7918     LARGE_INTEGER due_time;
7919 
7920     ObReferenceObject(devobj);
7921 
7922     KeInitializeTimer(&Vcb->flush_thread_timer);
7923 
7924     due_time.QuadPart = (uint64_t)Vcb->options.flush_interval * -10000000;
7925 
7926     KeSetTimer(&Vcb->flush_thread_timer, due_time, NULL);
7927 
7928     while (true) {
7929         KeWaitForSingleObject(&Vcb->flush_thread_timer, Executive, KernelMode, false, NULL);
7930 
7931         if (!(devobj->Vpb->Flags & VPB_MOUNTED) || Vcb->removing)
7932             break;
7933 
7934         if (!Vcb->locked)
7935             do_flush(Vcb);
7936 
7937         KeSetTimer(&Vcb->flush_thread_timer, due_time, NULL);
7938     }
7939 
7940     ObDereferenceObject(devobj);
7941     KeCancelTimer(&Vcb->flush_thread_timer);
7942 
7943     KeSetEvent(&Vcb->flush_thread_finished, 0, false);
7944 
7945     PsTerminateSystemThread(STATUS_SUCCESS);
7946 }
7947