1 /* Copyright (c) Mark Harmstone 2016-17
2 *
3 * This file is part of WinBtrfs.
4 *
5 * WinBtrfs is free software: you can redistribute it and/or modify
6 * it under the terms of the GNU Lesser General Public Licence as published by
7 * the Free Software Foundation, either version 3 of the Licence, or
8 * (at your option) any later version.
9 *
10 * WinBtrfs is distributed in the hope that it will be useful,
11 * but WITHOUT ANY WARRANTY; without even the implied warranty of
12 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
13 * GNU Lesser General Public Licence for more details.
14 *
15 * You should have received a copy of the GNU Lesser General Public Licence
16 * along with WinBtrfs. If not, see <http://www.gnu.org/licenses/>. */
17
18 #include "btrfs_drv.h"
19 #include "xxhash.h"
20 #include "crc32c.h"
21 #include <ata.h>
22 #include <ntddscsi.h>
23 #include <ntddstor.h>
24
25 /* cf. __MAX_CSUM_ITEMS in Linux - it needs sizeof(leaf_node) bytes free
26 * so it can do a split. Linux tries to get it so a run will fit in a
27 * sector, but the MAX_CSUM_ITEMS logic is wrong... */
28 #define MAX_CSUM_SIZE (4096 - sizeof(tree_header) - (2 * sizeof(leaf_node)))
29
30 // #define DEBUG_WRITE_LOOPS
31
32 #define BATCH_ITEM_LIMIT 1000
33
34 typedef struct {
35 KEVENT Event;
36 IO_STATUS_BLOCK iosb;
37 } write_context;
38
39 typedef struct {
40 EXTENT_ITEM_TREE eit;
41 uint8_t type;
42 TREE_BLOCK_REF tbr;
43 } EXTENT_ITEM_TREE2;
44
45 typedef struct {
46 EXTENT_ITEM ei;
47 uint8_t type;
48 TREE_BLOCK_REF tbr;
49 } EXTENT_ITEM_SKINNY_METADATA;
50
51 static NTSTATUS create_chunk(device_extension* Vcb, chunk* c, PIRP Irp);
52 static NTSTATUS update_tree_extents(device_extension* Vcb, tree* t, PIRP Irp, LIST_ENTRY* rollback);
53
54 static NTSTATUS insert_tree_item_batch(LIST_ENTRY* batchlist, device_extension* Vcb, root* r, uint64_t objid,
55 uint8_t objtype, uint64_t offset, _In_opt_ _When_(return >= 0, __drv_aliasesMem) void* data,
56 uint16_t datalen, enum batch_operation operation);
57
_Function_class_(IO_COMPLETION_ROUTINE)58 _Function_class_(IO_COMPLETION_ROUTINE)
59 static NTSTATUS __stdcall write_completion(PDEVICE_OBJECT DeviceObject, PIRP Irp, PVOID conptr) {
60 write_context* context = conptr;
61
62 UNUSED(DeviceObject);
63
64 context->iosb = Irp->IoStatus;
65 KeSetEvent(&context->Event, 0, false);
66
67 return STATUS_MORE_PROCESSING_REQUIRED;
68 }
69
write_data_phys(_In_ PDEVICE_OBJECT device,_In_ PFILE_OBJECT fileobj,_In_ uint64_t address,_In_reads_bytes_ (length)void * data,_In_ uint32_t length)70 NTSTATUS write_data_phys(_In_ PDEVICE_OBJECT device, _In_ PFILE_OBJECT fileobj, _In_ uint64_t address,
71 _In_reads_bytes_(length) void* data, _In_ uint32_t length) {
72 NTSTATUS Status;
73 LARGE_INTEGER offset;
74 PIRP Irp;
75 PIO_STACK_LOCATION IrpSp;
76 write_context context;
77
78 TRACE("(%p, %I64x, %p, %x)\n", device, address, data, length);
79
80 RtlZeroMemory(&context, sizeof(write_context));
81
82 KeInitializeEvent(&context.Event, NotificationEvent, false);
83
84 offset.QuadPart = address;
85
86 Irp = IoAllocateIrp(device->StackSize, false);
87
88 if (!Irp) {
89 ERR("IoAllocateIrp failed\n");
90 return STATUS_INSUFFICIENT_RESOURCES;
91 }
92
93 IrpSp = IoGetNextIrpStackLocation(Irp);
94 IrpSp->MajorFunction = IRP_MJ_WRITE;
95 IrpSp->FileObject = fileobj;
96
97 if (device->Flags & DO_BUFFERED_IO) {
98 Irp->AssociatedIrp.SystemBuffer = data;
99
100 Irp->Flags = IRP_BUFFERED_IO;
101 } else if (device->Flags & DO_DIRECT_IO) {
102 Irp->MdlAddress = IoAllocateMdl(data, length, false, false, NULL);
103 if (!Irp->MdlAddress) {
104 DbgPrint("IoAllocateMdl failed\n");
105 Status = STATUS_INSUFFICIENT_RESOURCES;
106 goto exit;
107 }
108
109 Status = STATUS_SUCCESS;
110
111 _SEH2_TRY {
112 MmProbeAndLockPages(Irp->MdlAddress, KernelMode, IoReadAccess);
113 } _SEH2_EXCEPT (EXCEPTION_EXECUTE_HANDLER) {
114 Status = _SEH2_GetExceptionCode();
115 } _SEH2_END;
116
117 if (!NT_SUCCESS(Status)) {
118 ERR("MmProbeAndLockPages threw exception %08lx\n", Status);
119 IoFreeMdl(Irp->MdlAddress);
120 goto exit;
121 }
122 } else {
123 Irp->UserBuffer = data;
124 }
125
126 IrpSp->Parameters.Write.Length = length;
127 IrpSp->Parameters.Write.ByteOffset = offset;
128
129 Irp->UserIosb = &context.iosb;
130
131 Irp->UserEvent = &context.Event;
132
133 IoSetCompletionRoutine(Irp, write_completion, &context, true, true, true);
134
135 Status = IoCallDriver(device, Irp);
136
137 if (Status == STATUS_PENDING) {
138 KeWaitForSingleObject(&context.Event, Executive, KernelMode, false, NULL);
139 Status = context.iosb.Status;
140 }
141
142 if (!NT_SUCCESS(Status)) {
143 ERR("IoCallDriver returned %08lx\n", Status);
144 }
145
146 if (device->Flags & DO_DIRECT_IO) {
147 MmUnlockPages(Irp->MdlAddress);
148 IoFreeMdl(Irp->MdlAddress);
149 }
150
151 exit:
152 IoFreeIrp(Irp);
153
154 return Status;
155 }
156
add_trim_entry(device * dev,uint64_t address,uint64_t size)157 static void add_trim_entry(device* dev, uint64_t address, uint64_t size) {
158 space* s = ExAllocatePoolWithTag(PagedPool, sizeof(space), ALLOC_TAG);
159 if (!s) {
160 ERR("out of memory\n");
161 return;
162 }
163
164 s->address = address;
165 s->size = size;
166 dev->num_trim_entries++;
167
168 InsertTailList(&dev->trim_list, &s->list_entry);
169 }
170
clean_space_cache_chunk(device_extension * Vcb,chunk * c)171 static void clean_space_cache_chunk(device_extension* Vcb, chunk* c) {
172 LIST_ENTRY* le;
173 ULONG type;
174
175 if (c->chunk_item->type & BLOCK_FLAG_DUPLICATE)
176 type = BLOCK_FLAG_DUPLICATE;
177 else if (c->chunk_item->type & BLOCK_FLAG_RAID0)
178 type = BLOCK_FLAG_RAID0;
179 else if (c->chunk_item->type & BLOCK_FLAG_RAID1)
180 type = BLOCK_FLAG_DUPLICATE;
181 else if (c->chunk_item->type & BLOCK_FLAG_RAID10)
182 type = BLOCK_FLAG_RAID10;
183 else if (c->chunk_item->type & BLOCK_FLAG_RAID5)
184 type = BLOCK_FLAG_RAID5;
185 else if (c->chunk_item->type & BLOCK_FLAG_RAID6)
186 type = BLOCK_FLAG_RAID6;
187 else if (c->chunk_item->type & BLOCK_FLAG_RAID1C3)
188 type = BLOCK_FLAG_DUPLICATE;
189 else if (c->chunk_item->type & BLOCK_FLAG_RAID1C4)
190 type = BLOCK_FLAG_DUPLICATE;
191 else // SINGLE
192 type = BLOCK_FLAG_DUPLICATE;
193
194 le = c->deleting.Flink;
195 while (le != &c->deleting) {
196 space* s = CONTAINING_RECORD(le, space, list_entry);
197
198 if (!Vcb->options.no_barrier || !(c->chunk_item->type & BLOCK_FLAG_METADATA)) {
199 CHUNK_ITEM_STRIPE* cis = (CHUNK_ITEM_STRIPE*)&c->chunk_item[1];
200
201 if (type == BLOCK_FLAG_DUPLICATE) {
202 uint16_t i;
203
204 for (i = 0; i < c->chunk_item->num_stripes; i++) {
205 if (c->devices[i] && c->devices[i]->devobj && !c->devices[i]->readonly && c->devices[i]->trim)
206 add_trim_entry(c->devices[i], s->address - c->offset + cis[i].offset, s->size);
207 }
208 } else if (type == BLOCK_FLAG_RAID0) {
209 uint64_t startoff, endoff;
210 uint16_t startoffstripe, endoffstripe, i;
211
212 get_raid0_offset(s->address - c->offset, c->chunk_item->stripe_length, c->chunk_item->num_stripes, &startoff, &startoffstripe);
213 get_raid0_offset(s->address - c->offset + s->size - 1, c->chunk_item->stripe_length, c->chunk_item->num_stripes, &endoff, &endoffstripe);
214
215 for (i = 0; i < c->chunk_item->num_stripes; i++) {
216 if (c->devices[i] && c->devices[i]->devobj && !c->devices[i]->readonly && c->devices[i]->trim) {
217 uint64_t stripestart, stripeend;
218
219 if (startoffstripe > i)
220 stripestart = startoff - (startoff % c->chunk_item->stripe_length) + c->chunk_item->stripe_length;
221 else if (startoffstripe == i)
222 stripestart = startoff;
223 else
224 stripestart = startoff - (startoff % c->chunk_item->stripe_length);
225
226 if (endoffstripe > i)
227 stripeend = endoff - (endoff % c->chunk_item->stripe_length) + c->chunk_item->stripe_length;
228 else if (endoffstripe == i)
229 stripeend = endoff + 1;
230 else
231 stripeend = endoff - (endoff % c->chunk_item->stripe_length);
232
233 if (stripestart != stripeend)
234 add_trim_entry(c->devices[i], stripestart + cis[i].offset, stripeend - stripestart);
235 }
236 }
237 } else if (type == BLOCK_FLAG_RAID10) {
238 uint64_t startoff, endoff;
239 uint16_t sub_stripes, startoffstripe, endoffstripe, i;
240
241 sub_stripes = max(1, c->chunk_item->sub_stripes);
242
243 get_raid0_offset(s->address - c->offset, c->chunk_item->stripe_length, c->chunk_item->num_stripes / sub_stripes, &startoff, &startoffstripe);
244 get_raid0_offset(s->address - c->offset + s->size - 1, c->chunk_item->stripe_length, c->chunk_item->num_stripes / sub_stripes, &endoff, &endoffstripe);
245
246 startoffstripe *= sub_stripes;
247 endoffstripe *= sub_stripes;
248
249 for (i = 0; i < c->chunk_item->num_stripes; i += sub_stripes) {
250 ULONG j;
251 uint64_t stripestart, stripeend;
252
253 if (startoffstripe > i)
254 stripestart = startoff - (startoff % c->chunk_item->stripe_length) + c->chunk_item->stripe_length;
255 else if (startoffstripe == i)
256 stripestart = startoff;
257 else
258 stripestart = startoff - (startoff % c->chunk_item->stripe_length);
259
260 if (endoffstripe > i)
261 stripeend = endoff - (endoff % c->chunk_item->stripe_length) + c->chunk_item->stripe_length;
262 else if (endoffstripe == i)
263 stripeend = endoff + 1;
264 else
265 stripeend = endoff - (endoff % c->chunk_item->stripe_length);
266
267 if (stripestart != stripeend) {
268 for (j = 0; j < sub_stripes; j++) {
269 if (c->devices[i+j] && c->devices[i+j]->devobj && !c->devices[i+j]->readonly && c->devices[i+j]->trim)
270 add_trim_entry(c->devices[i+j], stripestart + cis[i+j].offset, stripeend - stripestart);
271 }
272 }
273 }
274 }
275 // FIXME - RAID5(?), RAID6(?)
276 }
277
278 le = le->Flink;
279 }
280 }
281
282 typedef struct {
283 DEVICE_MANAGE_DATA_SET_ATTRIBUTES* dmdsa;
284 ATA_PASS_THROUGH_EX apte;
285 PIRP Irp;
286 IO_STATUS_BLOCK iosb;
287 #ifdef DEBUG_TRIM_EMULATION
288 PMDL mdl;
289 void* buf;
290 #endif
291 } ioctl_context_stripe;
292
293 typedef struct {
294 KEVENT Event;
295 LONG left;
296 ioctl_context_stripe* stripes;
297 } ioctl_context;
298
_Function_class_(IO_COMPLETION_ROUTINE)299 _Function_class_(IO_COMPLETION_ROUTINE)
300 static NTSTATUS __stdcall ioctl_completion(PDEVICE_OBJECT DeviceObject, PIRP Irp, PVOID conptr) {
301 ioctl_context* context = (ioctl_context*)conptr;
302 LONG left2 = InterlockedDecrement(&context->left);
303
304 UNUSED(DeviceObject);
305 UNUSED(Irp);
306
307 if (left2 == 0)
308 KeSetEvent(&context->Event, 0, false);
309
310 return STATUS_MORE_PROCESSING_REQUIRED;
311 }
312
313 #ifdef DEBUG_TRIM_EMULATION
trim_emulation(device * dev)314 static void trim_emulation(device* dev) {
315 LIST_ENTRY* le;
316 ioctl_context context;
317 unsigned int i = 0, count = 0;
318
319 le = dev->trim_list.Flink;
320 while (le != &dev->trim_list) {
321 count++;
322 le = le->Flink;
323 }
324
325 context.left = count;
326
327 KeInitializeEvent(&context.Event, NotificationEvent, false);
328
329 context.stripes = ExAllocatePoolWithTag(NonPagedPool, sizeof(ioctl_context_stripe) * context.left, ALLOC_TAG);
330 if (!context.stripes) {
331 ERR("out of memory\n");
332 return;
333 }
334
335 RtlZeroMemory(context.stripes, sizeof(ioctl_context_stripe) * context.left);
336
337 i = 0;
338 le = dev->trim_list.Flink;
339 while (le != &dev->trim_list) {
340 ioctl_context_stripe* stripe = &context.stripes[i];
341 space* s = CONTAINING_RECORD(le, space, list_entry);
342
343 WARN("(%I64x, %I64x)\n", s->address, s->size);
344
345 stripe->Irp = IoAllocateIrp(dev->devobj->StackSize, false);
346
347 if (!stripe->Irp) {
348 ERR("IoAllocateIrp failed\n");
349 } else {
350 PIO_STACK_LOCATION IrpSp = IoGetNextIrpStackLocation(stripe->Irp);
351 IrpSp->MajorFunction = IRP_MJ_WRITE;
352 IrpSp->FileObject = dev->fileobj;
353
354 stripe->buf = ExAllocatePoolWithTag(NonPagedPool, (uint32_t)s->size, ALLOC_TAG);
355
356 if (!stripe->buf) {
357 ERR("out of memory\n");
358 } else {
359 RtlZeroMemory(stripe->buf, (uint32_t)s->size); // FIXME - randomize instead?
360
361 stripe->mdl = IoAllocateMdl(stripe->buf, (uint32_t)s->size, false, false, NULL);
362
363 if (!stripe->mdl) {
364 ERR("IoAllocateMdl failed\n");
365 } else {
366 MmBuildMdlForNonPagedPool(stripe->mdl);
367
368 stripe->Irp->MdlAddress = stripe->mdl;
369
370 IrpSp->Parameters.Write.ByteOffset.QuadPart = s->address;
371 IrpSp->Parameters.Write.Length = s->size;
372
373 stripe->Irp->UserIosb = &stripe->iosb;
374
375 IoSetCompletionRoutine(stripe->Irp, ioctl_completion, &context, true, true, true);
376
377 IoCallDriver(dev->devobj, stripe->Irp);
378 }
379 }
380 }
381
382 i++;
383
384 le = le->Flink;
385 }
386
387 KeWaitForSingleObject(&context.Event, Executive, KernelMode, false, NULL);
388
389 for (i = 0; i < count; i++) {
390 ioctl_context_stripe* stripe = &context.stripes[i];
391
392 if (stripe->mdl)
393 IoFreeMdl(stripe->mdl);
394
395 if (stripe->buf)
396 ExFreePool(stripe->buf);
397 }
398
399 ExFreePool(context.stripes);
400 }
401 #endif
402
clean_space_cache(device_extension * Vcb)403 static void clean_space_cache(device_extension* Vcb) {
404 LIST_ENTRY* le;
405 chunk* c;
406 #ifndef DEBUG_TRIM_EMULATION
407 ULONG num;
408 #endif
409
410 TRACE("(%p)\n", Vcb);
411
412 ExAcquireResourceSharedLite(&Vcb->chunk_lock, true);
413
414 le = Vcb->chunks.Flink;
415 while (le != &Vcb->chunks) {
416 c = CONTAINING_RECORD(le, chunk, list_entry);
417
418 if (c->space_changed) {
419 acquire_chunk_lock(c, Vcb);
420
421 if (c->space_changed) {
422 if (Vcb->trim && !Vcb->options.no_trim)
423 clean_space_cache_chunk(Vcb, c);
424
425 space_list_merge(&c->space, &c->space_size, &c->deleting);
426
427 while (!IsListEmpty(&c->deleting)) {
428 space* s = CONTAINING_RECORD(RemoveHeadList(&c->deleting), space, list_entry);
429
430 ExFreePool(s);
431 }
432 }
433
434 c->space_changed = false;
435
436 release_chunk_lock(c, Vcb);
437 }
438
439 le = le->Flink;
440 }
441
442 ExReleaseResourceLite(&Vcb->chunk_lock);
443
444 if (Vcb->trim && !Vcb->options.no_trim) {
445 #ifndef DEBUG_TRIM_EMULATION
446 ioctl_context context;
447 ULONG total_num;
448
449 context.left = 0;
450
451 le = Vcb->devices.Flink;
452 while (le != &Vcb->devices) {
453 device* dev = CONTAINING_RECORD(le, device, list_entry);
454
455 if (dev->devobj && !dev->readonly && dev->trim && dev->num_trim_entries > 0)
456 context.left++;
457
458 le = le->Flink;
459 }
460
461 if (context.left == 0)
462 return;
463
464 total_num = context.left;
465 num = 0;
466
467 KeInitializeEvent(&context.Event, NotificationEvent, false);
468
469 context.stripes = ExAllocatePoolWithTag(NonPagedPool, sizeof(ioctl_context_stripe) * context.left, ALLOC_TAG);
470 if (!context.stripes) {
471 ERR("out of memory\n");
472 return;
473 }
474
475 RtlZeroMemory(context.stripes, sizeof(ioctl_context_stripe) * context.left);
476 #endif
477
478 le = Vcb->devices.Flink;
479 while (le != &Vcb->devices) {
480 device* dev = CONTAINING_RECORD(le, device, list_entry);
481
482 if (dev->devobj && !dev->readonly && dev->trim && dev->num_trim_entries > 0) {
483 #ifdef DEBUG_TRIM_EMULATION
484 trim_emulation(dev);
485 #else
486 LIST_ENTRY* le2;
487 ioctl_context_stripe* stripe = &context.stripes[num];
488 DEVICE_DATA_SET_RANGE* ranges;
489 ULONG datalen = (ULONG)sector_align(sizeof(DEVICE_MANAGE_DATA_SET_ATTRIBUTES), sizeof(uint64_t)) + (dev->num_trim_entries * sizeof(DEVICE_DATA_SET_RANGE)), i;
490 PIO_STACK_LOCATION IrpSp;
491
492 stripe->dmdsa = ExAllocatePoolWithTag(PagedPool, datalen, ALLOC_TAG);
493 if (!stripe->dmdsa) {
494 ERR("out of memory\n");
495 goto nextdev;
496 }
497
498 stripe->dmdsa->Size = sizeof(DEVICE_MANAGE_DATA_SET_ATTRIBUTES);
499 stripe->dmdsa->Action = DeviceDsmAction_Trim;
500 stripe->dmdsa->Flags = DEVICE_DSM_FLAG_TRIM_NOT_FS_ALLOCATED;
501 stripe->dmdsa->ParameterBlockOffset = 0;
502 stripe->dmdsa->ParameterBlockLength = 0;
503 stripe->dmdsa->DataSetRangesOffset = (ULONG)sector_align(sizeof(DEVICE_MANAGE_DATA_SET_ATTRIBUTES), sizeof(uint64_t));
504 stripe->dmdsa->DataSetRangesLength = dev->num_trim_entries * sizeof(DEVICE_DATA_SET_RANGE);
505
506 ranges = (DEVICE_DATA_SET_RANGE*)((uint8_t*)stripe->dmdsa + stripe->dmdsa->DataSetRangesOffset);
507
508 i = 0;
509
510 le2 = dev->trim_list.Flink;
511 while (le2 != &dev->trim_list) {
512 space* s = CONTAINING_RECORD(le2, space, list_entry);
513
514 ranges[i].StartingOffset = s->address;
515 ranges[i].LengthInBytes = s->size;
516 i++;
517
518 le2 = le2->Flink;
519 }
520
521 stripe->Irp = IoAllocateIrp(dev->devobj->StackSize, false);
522
523 if (!stripe->Irp) {
524 ERR("IoAllocateIrp failed\n");
525 goto nextdev;
526 }
527
528 IrpSp = IoGetNextIrpStackLocation(stripe->Irp);
529 IrpSp->MajorFunction = IRP_MJ_DEVICE_CONTROL;
530 IrpSp->FileObject = dev->fileobj;
531
532 IrpSp->Parameters.DeviceIoControl.IoControlCode = IOCTL_STORAGE_MANAGE_DATA_SET_ATTRIBUTES;
533 IrpSp->Parameters.DeviceIoControl.InputBufferLength = datalen;
534 IrpSp->Parameters.DeviceIoControl.OutputBufferLength = 0;
535
536 stripe->Irp->AssociatedIrp.SystemBuffer = stripe->dmdsa;
537 stripe->Irp->Flags |= IRP_BUFFERED_IO;
538 stripe->Irp->UserBuffer = NULL;
539 stripe->Irp->UserIosb = &stripe->iosb;
540
541 IoSetCompletionRoutine(stripe->Irp, ioctl_completion, &context, true, true, true);
542
543 IoCallDriver(dev->devobj, stripe->Irp);
544
545 nextdev:
546 #endif
547 while (!IsListEmpty(&dev->trim_list)) {
548 space* s = CONTAINING_RECORD(RemoveHeadList(&dev->trim_list), space, list_entry);
549 ExFreePool(s);
550 }
551
552 dev->num_trim_entries = 0;
553
554 #ifndef DEBUG_TRIM_EMULATION
555 num++;
556 #endif
557 }
558
559 le = le->Flink;
560 }
561
562 #ifndef DEBUG_TRIM_EMULATION
563 KeWaitForSingleObject(&context.Event, Executive, KernelMode, false, NULL);
564
565 for (num = 0; num < total_num; num++) {
566 if (context.stripes[num].dmdsa)
567 ExFreePool(context.stripes[num].dmdsa);
568
569 if (context.stripes[num].Irp)
570 IoFreeIrp(context.stripes[num].Irp);
571 }
572
573 ExFreePool(context.stripes);
574 #endif
575 }
576 }
577
trees_consistent(device_extension * Vcb)578 static bool trees_consistent(device_extension* Vcb) {
579 ULONG maxsize = Vcb->superblock.node_size - sizeof(tree_header);
580 LIST_ENTRY* le;
581
582 le = Vcb->trees.Flink;
583 while (le != &Vcb->trees) {
584 tree* t = CONTAINING_RECORD(le, tree, list_entry);
585
586 if (t->write) {
587 if (t->header.num_items == 0 && t->parent) {
588 #ifdef DEBUG_WRITE_LOOPS
589 ERR("empty tree found, looping again\n");
590 #endif
591 return false;
592 }
593
594 if (t->size > maxsize) {
595 #ifdef DEBUG_WRITE_LOOPS
596 ERR("overlarge tree found (%u > %u), looping again\n", t->size, maxsize);
597 #endif
598 return false;
599 }
600
601 if (!t->has_new_address) {
602 #ifdef DEBUG_WRITE_LOOPS
603 ERR("tree found without new address, looping again\n");
604 #endif
605 return false;
606 }
607 }
608
609 le = le->Flink;
610 }
611
612 return true;
613 }
614
add_parents(device_extension * Vcb,PIRP Irp)615 static NTSTATUS add_parents(device_extension* Vcb, PIRP Irp) {
616 ULONG level;
617 LIST_ENTRY* le;
618
619 for (level = 0; level <= 255; level++) {
620 bool nothing_found = true;
621
622 TRACE("level = %lu\n", level);
623
624 le = Vcb->trees.Flink;
625 while (le != &Vcb->trees) {
626 tree* t = CONTAINING_RECORD(le, tree, list_entry);
627
628 if (t->write && t->header.level == level) {
629 TRACE("tree %p: root = %I64x, level = %x, parent = %p\n", t, t->header.tree_id, t->header.level, t->parent);
630
631 nothing_found = false;
632
633 if (t->parent) {
634 if (!t->parent->write)
635 TRACE("adding tree %p (level %x)\n", t->parent, t->header.level);
636
637 t->parent->write = true;
638 } else if (t->root != Vcb->root_root && t->root != Vcb->chunk_root) {
639 KEY searchkey;
640 traverse_ptr tp;
641 NTSTATUS Status;
642
643 searchkey.obj_id = t->root->id;
644 searchkey.obj_type = TYPE_ROOT_ITEM;
645 searchkey.offset = 0xffffffffffffffff;
646
647 Status = find_item(Vcb, Vcb->root_root, &tp, &searchkey, false, Irp);
648 if (!NT_SUCCESS(Status)) {
649 ERR("error - find_item returned %08lx\n", Status);
650 return Status;
651 }
652
653 if (tp.item->key.obj_id != searchkey.obj_id || tp.item->key.obj_type != searchkey.obj_type) {
654 ERR("could not find ROOT_ITEM for tree %I64x\n", searchkey.obj_id);
655 return STATUS_INTERNAL_ERROR;
656 }
657
658 if (tp.item->size < sizeof(ROOT_ITEM)) { // if not full length, delete and create new entry
659 ROOT_ITEM* ri = ExAllocatePoolWithTag(PagedPool, sizeof(ROOT_ITEM), ALLOC_TAG);
660
661 if (!ri) {
662 ERR("out of memory\n");
663 return STATUS_INSUFFICIENT_RESOURCES;
664 }
665
666 RtlCopyMemory(ri, &t->root->root_item, sizeof(ROOT_ITEM));
667
668 Status = delete_tree_item(Vcb, &tp);
669 if (!NT_SUCCESS(Status)) {
670 ERR("delete_tree_item returned %08lx\n", Status);
671 ExFreePool(ri);
672 return Status;
673 }
674
675 Status = insert_tree_item(Vcb, Vcb->root_root, tp.item->key.obj_id, tp.item->key.obj_type, tp.item->key.offset, ri, sizeof(ROOT_ITEM), NULL, Irp);
676 if (!NT_SUCCESS(Status)) {
677 ERR("insert_tree_item returned %08lx\n", Status);
678 ExFreePool(ri);
679 return Status;
680 }
681 }
682
683 tree* t2 = tp.tree;
684 while (t2) {
685 t2->write = true;
686
687 t2 = t2->parent;
688 }
689 }
690 }
691
692 le = le->Flink;
693 }
694
695 if (nothing_found)
696 break;
697 }
698
699 return STATUS_SUCCESS;
700 }
701
add_parents_to_cache(tree * t)702 static void add_parents_to_cache(tree* t) {
703 while (t->parent) {
704 t = t->parent;
705 t->write = true;
706 }
707 }
708
insert_tree_extent_skinny(device_extension * Vcb,uint8_t level,uint64_t root_id,chunk * c,uint64_t address,PIRP Irp,LIST_ENTRY * rollback)709 static bool insert_tree_extent_skinny(device_extension* Vcb, uint8_t level, uint64_t root_id, chunk* c, uint64_t address, PIRP Irp, LIST_ENTRY* rollback) {
710 NTSTATUS Status;
711 EXTENT_ITEM_SKINNY_METADATA* eism;
712 traverse_ptr insert_tp;
713
714 eism = ExAllocatePoolWithTag(PagedPool, sizeof(EXTENT_ITEM_SKINNY_METADATA), ALLOC_TAG);
715 if (!eism) {
716 ERR("out of memory\n");
717 return false;
718 }
719
720 eism->ei.refcount = 1;
721 eism->ei.generation = Vcb->superblock.generation;
722 eism->ei.flags = EXTENT_ITEM_TREE_BLOCK;
723 eism->type = TYPE_TREE_BLOCK_REF;
724 eism->tbr.offset = root_id;
725
726 Status = insert_tree_item(Vcb, Vcb->extent_root, address, TYPE_METADATA_ITEM, level, eism, sizeof(EXTENT_ITEM_SKINNY_METADATA), &insert_tp, Irp);
727 if (!NT_SUCCESS(Status)) {
728 ERR("insert_tree_item returned %08lx\n", Status);
729 ExFreePool(eism);
730 return false;
731 }
732
733 acquire_chunk_lock(c, Vcb);
734
735 space_list_subtract(c, address, Vcb->superblock.node_size, rollback);
736
737 release_chunk_lock(c, Vcb);
738
739 add_parents_to_cache(insert_tp.tree);
740
741 return true;
742 }
743
find_metadata_address_in_chunk(device_extension * Vcb,chunk * c,uint64_t * address)744 bool find_metadata_address_in_chunk(device_extension* Vcb, chunk* c, uint64_t* address) {
745 LIST_ENTRY* le;
746 space* s;
747
748 TRACE("(%p, %I64x, %p)\n", Vcb, c->offset, address);
749
750 if (Vcb->superblock.node_size > c->chunk_item->size - c->used)
751 return false;
752
753 if (!c->cache_loaded) {
754 NTSTATUS Status = load_cache_chunk(Vcb, c, NULL);
755
756 if (!NT_SUCCESS(Status)) {
757 ERR("load_cache_chunk returned %08lx\n", Status);
758 return false;
759 }
760 }
761
762 if (IsListEmpty(&c->space_size))
763 return false;
764
765 if (!c->last_alloc_set) {
766 s = CONTAINING_RECORD(c->space.Blink, space, list_entry);
767
768 c->last_alloc = s->address;
769 c->last_alloc_set = true;
770
771 if (s->size >= Vcb->superblock.node_size) {
772 *address = s->address;
773 c->last_alloc += Vcb->superblock.node_size;
774 return true;
775 }
776 }
777
778 le = c->space.Flink;
779 while (le != &c->space) {
780 s = CONTAINING_RECORD(le, space, list_entry);
781
782 if (s->address <= c->last_alloc && s->address + s->size >= c->last_alloc + Vcb->superblock.node_size) {
783 *address = c->last_alloc;
784 c->last_alloc += Vcb->superblock.node_size;
785 return true;
786 }
787
788 le = le->Flink;
789 }
790
791 le = c->space_size.Flink;
792 while (le != &c->space_size) {
793 s = CONTAINING_RECORD(le, space, list_entry_size);
794
795 if (s->size == Vcb->superblock.node_size) {
796 *address = s->address;
797 c->last_alloc = s->address + Vcb->superblock.node_size;
798 return true;
799 } else if (s->size < Vcb->superblock.node_size) {
800 if (le == c->space_size.Flink)
801 return false;
802
803 s = CONTAINING_RECORD(le->Blink, space, list_entry_size);
804
805 *address = s->address;
806 c->last_alloc = s->address + Vcb->superblock.node_size;
807
808 return true;
809 }
810
811 le = le->Flink;
812 }
813
814 s = CONTAINING_RECORD(c->space_size.Blink, space, list_entry_size);
815
816 if (s->size > Vcb->superblock.node_size) {
817 *address = s->address;
818 c->last_alloc = s->address + Vcb->superblock.node_size;
819 return true;
820 }
821
822 return false;
823 }
824
insert_tree_extent(device_extension * Vcb,uint8_t level,uint64_t root_id,chunk * c,uint64_t * new_address,PIRP Irp,LIST_ENTRY * rollback)825 static bool insert_tree_extent(device_extension* Vcb, uint8_t level, uint64_t root_id, chunk* c, uint64_t* new_address, PIRP Irp, LIST_ENTRY* rollback) {
826 NTSTATUS Status;
827 uint64_t address;
828 EXTENT_ITEM_TREE2* eit2;
829 traverse_ptr insert_tp;
830
831 TRACE("(%p, %x, %I64x, %p, %p, %p, %p)\n", Vcb, level, root_id, c, new_address, Irp, rollback);
832
833 if (!find_metadata_address_in_chunk(Vcb, c, &address))
834 return false;
835
836 if (Vcb->superblock.incompat_flags & BTRFS_INCOMPAT_FLAGS_SKINNY_METADATA) {
837 bool b = insert_tree_extent_skinny(Vcb, level, root_id, c, address, Irp, rollback);
838
839 if (b)
840 *new_address = address;
841
842 return b;
843 }
844
845 eit2 = ExAllocatePoolWithTag(PagedPool, sizeof(EXTENT_ITEM_TREE2), ALLOC_TAG);
846 if (!eit2) {
847 ERR("out of memory\n");
848 return false;
849 }
850
851 eit2->eit.extent_item.refcount = 1;
852 eit2->eit.extent_item.generation = Vcb->superblock.generation;
853 eit2->eit.extent_item.flags = EXTENT_ITEM_TREE_BLOCK;
854 eit2->eit.level = level;
855 eit2->type = TYPE_TREE_BLOCK_REF;
856 eit2->tbr.offset = root_id;
857
858 Status = insert_tree_item(Vcb, Vcb->extent_root, address, TYPE_EXTENT_ITEM, Vcb->superblock.node_size, eit2, sizeof(EXTENT_ITEM_TREE2), &insert_tp, Irp);
859 if (!NT_SUCCESS(Status)) {
860 ERR("insert_tree_item returned %08lx\n", Status);
861 ExFreePool(eit2);
862 return false;
863 }
864
865 acquire_chunk_lock(c, Vcb);
866
867 space_list_subtract(c, address, Vcb->superblock.node_size, rollback);
868
869 release_chunk_lock(c, Vcb);
870
871 add_parents_to_cache(insert_tp.tree);
872
873 *new_address = address;
874
875 return true;
876 }
877
get_tree_new_address(device_extension * Vcb,tree * t,PIRP Irp,LIST_ENTRY * rollback)878 NTSTATUS get_tree_new_address(device_extension* Vcb, tree* t, PIRP Irp, LIST_ENTRY* rollback) {
879 NTSTATUS Status;
880 chunk *origchunk = NULL, *c;
881 LIST_ENTRY* le;
882 uint64_t flags, addr;
883
884 if (t->root->id == BTRFS_ROOT_CHUNK)
885 flags = Vcb->system_flags;
886 else
887 flags = Vcb->metadata_flags;
888
889 if (t->has_address) {
890 origchunk = get_chunk_from_address(Vcb, t->header.address);
891
892 if (origchunk && !origchunk->readonly && !origchunk->reloc && origchunk->chunk_item->type == flags &&
893 insert_tree_extent(Vcb, t->header.level, t->root->id, origchunk, &addr, Irp, rollback)) {
894 t->new_address = addr;
895 t->has_new_address = true;
896 return STATUS_SUCCESS;
897 }
898 }
899
900 ExAcquireResourceExclusiveLite(&Vcb->chunk_lock, true);
901
902 le = Vcb->chunks.Flink;
903 while (le != &Vcb->chunks) {
904 c = CONTAINING_RECORD(le, chunk, list_entry);
905
906 if (!c->readonly && !c->reloc) {
907 acquire_chunk_lock(c, Vcb);
908
909 if (c != origchunk && c->chunk_item->type == flags && (c->chunk_item->size - c->used) >= Vcb->superblock.node_size) {
910 if (insert_tree_extent(Vcb, t->header.level, t->root->id, c, &addr, Irp, rollback)) {
911 release_chunk_lock(c, Vcb);
912 ExReleaseResourceLite(&Vcb->chunk_lock);
913 t->new_address = addr;
914 t->has_new_address = true;
915 return STATUS_SUCCESS;
916 }
917 }
918
919 release_chunk_lock(c, Vcb);
920 }
921
922 le = le->Flink;
923 }
924
925 // allocate new chunk if necessary
926
927 Status = alloc_chunk(Vcb, flags, &c, false);
928
929 if (!NT_SUCCESS(Status)) {
930 ERR("alloc_chunk returned %08lx\n", Status);
931 ExReleaseResourceLite(&Vcb->chunk_lock);
932 return Status;
933 }
934
935 acquire_chunk_lock(c, Vcb);
936
937 if ((c->chunk_item->size - c->used) >= Vcb->superblock.node_size) {
938 if (insert_tree_extent(Vcb, t->header.level, t->root->id, c, &addr, Irp, rollback)) {
939 release_chunk_lock(c, Vcb);
940 ExReleaseResourceLite(&Vcb->chunk_lock);
941 t->new_address = addr;
942 t->has_new_address = true;
943 return STATUS_SUCCESS;
944 }
945 }
946
947 release_chunk_lock(c, Vcb);
948
949 ExReleaseResourceLite(&Vcb->chunk_lock);
950
951 ERR("couldn't find any metadata chunks with %x bytes free\n", Vcb->superblock.node_size);
952
953 return STATUS_DISK_FULL;
954 }
955
reduce_tree_extent(device_extension * Vcb,uint64_t address,tree * t,uint64_t parent_root,uint8_t level,PIRP Irp,LIST_ENTRY * rollback)956 static NTSTATUS reduce_tree_extent(device_extension* Vcb, uint64_t address, tree* t, uint64_t parent_root, uint8_t level, PIRP Irp, LIST_ENTRY* rollback) {
957 NTSTATUS Status;
958 uint64_t rc, root;
959
960 TRACE("(%p, %I64x, %p)\n", Vcb, address, t);
961
962 rc = get_extent_refcount(Vcb, address, Vcb->superblock.node_size, Irp);
963 if (rc == 0) {
964 ERR("error - refcount for extent %I64x was 0\n", address);
965 return STATUS_INTERNAL_ERROR;
966 }
967
968 if (!t || t->parent)
969 root = parent_root;
970 else
971 root = t->header.tree_id;
972
973 Status = decrease_extent_refcount_tree(Vcb, address, Vcb->superblock.node_size, root, level, Irp);
974 if (!NT_SUCCESS(Status)) {
975 ERR("decrease_extent_refcount_tree returned %08lx\n", Status);
976 return Status;
977 }
978
979 if (rc == 1) {
980 chunk* c = get_chunk_from_address(Vcb, address);
981
982 if (c) {
983 acquire_chunk_lock(c, Vcb);
984
985 if (!c->cache_loaded) {
986 Status = load_cache_chunk(Vcb, c, NULL);
987
988 if (!NT_SUCCESS(Status)) {
989 ERR("load_cache_chunk returned %08lx\n", Status);
990 release_chunk_lock(c, Vcb);
991 return Status;
992 }
993 }
994
995 c->used -= Vcb->superblock.node_size;
996
997 space_list_add(c, address, Vcb->superblock.node_size, rollback);
998
999 release_chunk_lock(c, Vcb);
1000 } else
1001 ERR("could not find chunk for address %I64x\n", address);
1002 }
1003
1004 return STATUS_SUCCESS;
1005 }
1006
add_changed_extent_ref_edr(changed_extent * ce,EXTENT_DATA_REF * edr,bool old)1007 static NTSTATUS add_changed_extent_ref_edr(changed_extent* ce, EXTENT_DATA_REF* edr, bool old) {
1008 LIST_ENTRY *le2, *list;
1009 changed_extent_ref* cer;
1010
1011 list = old ? &ce->old_refs : &ce->refs;
1012
1013 le2 = list->Flink;
1014 while (le2 != list) {
1015 cer = CONTAINING_RECORD(le2, changed_extent_ref, list_entry);
1016
1017 if (cer->type == TYPE_EXTENT_DATA_REF && cer->edr.root == edr->root && cer->edr.objid == edr->objid && cer->edr.offset == edr->offset) {
1018 cer->edr.count += edr->count;
1019 goto end;
1020 }
1021
1022 le2 = le2->Flink;
1023 }
1024
1025 cer = ExAllocatePoolWithTag(PagedPool, sizeof(changed_extent_ref), ALLOC_TAG);
1026 if (!cer) {
1027 ERR("out of memory\n");
1028 return STATUS_INSUFFICIENT_RESOURCES;
1029 }
1030
1031 cer->type = TYPE_EXTENT_DATA_REF;
1032 RtlCopyMemory(&cer->edr, edr, sizeof(EXTENT_DATA_REF));
1033 InsertTailList(list, &cer->list_entry);
1034
1035 end:
1036 if (old)
1037 ce->old_count += edr->count;
1038 else
1039 ce->count += edr->count;
1040
1041 return STATUS_SUCCESS;
1042 }
1043
add_changed_extent_ref_sdr(changed_extent * ce,SHARED_DATA_REF * sdr,bool old)1044 static NTSTATUS add_changed_extent_ref_sdr(changed_extent* ce, SHARED_DATA_REF* sdr, bool old) {
1045 LIST_ENTRY *le2, *list;
1046 changed_extent_ref* cer;
1047
1048 list = old ? &ce->old_refs : &ce->refs;
1049
1050 le2 = list->Flink;
1051 while (le2 != list) {
1052 cer = CONTAINING_RECORD(le2, changed_extent_ref, list_entry);
1053
1054 if (cer->type == TYPE_SHARED_DATA_REF && cer->sdr.offset == sdr->offset) {
1055 cer->sdr.count += sdr->count;
1056 goto end;
1057 }
1058
1059 le2 = le2->Flink;
1060 }
1061
1062 cer = ExAllocatePoolWithTag(PagedPool, sizeof(changed_extent_ref), ALLOC_TAG);
1063 if (!cer) {
1064 ERR("out of memory\n");
1065 return STATUS_INSUFFICIENT_RESOURCES;
1066 }
1067
1068 cer->type = TYPE_SHARED_DATA_REF;
1069 RtlCopyMemory(&cer->sdr, sdr, sizeof(SHARED_DATA_REF));
1070 InsertTailList(list, &cer->list_entry);
1071
1072 end:
1073 if (old)
1074 ce->old_count += sdr->count;
1075 else
1076 ce->count += sdr->count;
1077
1078 return STATUS_SUCCESS;
1079 }
1080
shared_tree_is_unique(device_extension * Vcb,tree * t,PIRP Irp,LIST_ENTRY * rollback)1081 static bool shared_tree_is_unique(device_extension* Vcb, tree* t, PIRP Irp, LIST_ENTRY* rollback) {
1082 KEY searchkey;
1083 traverse_ptr tp;
1084 NTSTATUS Status;
1085
1086 if (!t->updated_extents && t->has_address) {
1087 Status = update_tree_extents(Vcb, t, Irp, rollback);
1088 if (!NT_SUCCESS(Status)) {
1089 ERR("update_tree_extents returned %08lx\n", Status);
1090 return false;
1091 }
1092 }
1093
1094 searchkey.obj_id = t->header.address;
1095 searchkey.obj_type = Vcb->superblock.incompat_flags & BTRFS_INCOMPAT_FLAGS_SKINNY_METADATA ? TYPE_METADATA_ITEM : TYPE_EXTENT_ITEM;
1096 searchkey.offset = 0xffffffffffffffff;
1097
1098 Status = find_item(Vcb, Vcb->extent_root, &tp, &searchkey, false, Irp);
1099 if (!NT_SUCCESS(Status)) {
1100 ERR("error - find_item returned %08lx\n", Status);
1101 return false;
1102 }
1103
1104 if (tp.item->key.obj_id == t->header.address && (tp.item->key.obj_type == TYPE_METADATA_ITEM || tp.item->key.obj_type == TYPE_EXTENT_ITEM))
1105 return false;
1106 else
1107 return true;
1108 }
1109
update_tree_extents(device_extension * Vcb,tree * t,PIRP Irp,LIST_ENTRY * rollback)1110 static NTSTATUS update_tree_extents(device_extension* Vcb, tree* t, PIRP Irp, LIST_ENTRY* rollback) {
1111 NTSTATUS Status;
1112 uint64_t rc = get_extent_refcount(Vcb, t->header.address, Vcb->superblock.node_size, Irp);
1113 uint64_t flags = get_extent_flags(Vcb, t->header.address, Irp);
1114
1115 if (rc == 0) {
1116 ERR("refcount for extent %I64x was 0\n", t->header.address);
1117 return STATUS_INTERNAL_ERROR;
1118 }
1119
1120 if (flags & EXTENT_ITEM_SHARED_BACKREFS || t->header.flags & HEADER_FLAG_SHARED_BACKREF || !(t->header.flags & HEADER_FLAG_MIXED_BACKREF)) {
1121 TREE_BLOCK_REF tbr;
1122 bool unique = rc > 1 ? false : (t->parent ? shared_tree_is_unique(Vcb, t->parent, Irp, rollback) : false);
1123
1124 if (t->header.level == 0) {
1125 LIST_ENTRY* le;
1126
1127 le = t->itemlist.Flink;
1128 while (le != &t->itemlist) {
1129 tree_data* td = CONTAINING_RECORD(le, tree_data, list_entry);
1130
1131 if (!td->inserted && td->key.obj_type == TYPE_EXTENT_DATA && td->size >= sizeof(EXTENT_DATA) - 1 + sizeof(EXTENT_DATA2)) {
1132 EXTENT_DATA* ed = (EXTENT_DATA*)td->data;
1133
1134 if (ed->type == EXTENT_TYPE_REGULAR || ed->type == EXTENT_TYPE_PREALLOC) {
1135 EXTENT_DATA2* ed2 = (EXTENT_DATA2*)ed->data;
1136
1137 if (ed2->size > 0) {
1138 EXTENT_DATA_REF edr;
1139 changed_extent* ce = NULL;
1140 chunk* c = get_chunk_from_address(Vcb, ed2->address);
1141
1142 if (c) {
1143 LIST_ENTRY* le2;
1144
1145 le2 = c->changed_extents.Flink;
1146 while (le2 != &c->changed_extents) {
1147 changed_extent* ce2 = CONTAINING_RECORD(le2, changed_extent, list_entry);
1148
1149 if (ce2->address == ed2->address) {
1150 ce = ce2;
1151 break;
1152 }
1153
1154 le2 = le2->Flink;
1155 }
1156 }
1157
1158 edr.root = t->root->id;
1159 edr.objid = td->key.obj_id;
1160 edr.offset = td->key.offset - ed2->offset;
1161 edr.count = 1;
1162
1163 if (ce) {
1164 Status = add_changed_extent_ref_edr(ce, &edr, true);
1165 if (!NT_SUCCESS(Status)) {
1166 ERR("add_changed_extent_ref_edr returned %08lx\n", Status);
1167 return Status;
1168 }
1169
1170 Status = add_changed_extent_ref_edr(ce, &edr, false);
1171 if (!NT_SUCCESS(Status)) {
1172 ERR("add_changed_extent_ref_edr returned %08lx\n", Status);
1173 return Status;
1174 }
1175 }
1176
1177 Status = increase_extent_refcount(Vcb, ed2->address, ed2->size, TYPE_EXTENT_DATA_REF, &edr, NULL, 0, Irp);
1178 if (!NT_SUCCESS(Status)) {
1179 ERR("increase_extent_refcount returned %08lx\n", Status);
1180 return Status;
1181 }
1182
1183 if ((flags & EXTENT_ITEM_SHARED_BACKREFS && unique) || !(t->header.flags & HEADER_FLAG_MIXED_BACKREF)) {
1184 uint64_t sdrrc = find_extent_shared_data_refcount(Vcb, ed2->address, t->header.address, Irp);
1185
1186 if (sdrrc > 0) {
1187 SHARED_DATA_REF sdr;
1188
1189 sdr.offset = t->header.address;
1190 sdr.count = 1;
1191
1192 Status = decrease_extent_refcount(Vcb, ed2->address, ed2->size, TYPE_SHARED_DATA_REF, &sdr, NULL, 0,
1193 t->header.address, ce ? ce->superseded : false, Irp);
1194 if (!NT_SUCCESS(Status)) {
1195 ERR("decrease_extent_refcount returned %08lx\n", Status);
1196 return Status;
1197 }
1198
1199 if (ce) {
1200 LIST_ENTRY* le2;
1201
1202 le2 = ce->refs.Flink;
1203 while (le2 != &ce->refs) {
1204 changed_extent_ref* cer = CONTAINING_RECORD(le2, changed_extent_ref, list_entry);
1205
1206 if (cer->type == TYPE_SHARED_DATA_REF && cer->sdr.offset == sdr.offset) {
1207 ce->count--;
1208 cer->sdr.count--;
1209 break;
1210 }
1211
1212 le2 = le2->Flink;
1213 }
1214
1215 le2 = ce->old_refs.Flink;
1216 while (le2 != &ce->old_refs) {
1217 changed_extent_ref* cer = CONTAINING_RECORD(le2, changed_extent_ref, list_entry);
1218
1219 if (cer->type == TYPE_SHARED_DATA_REF && cer->sdr.offset == sdr.offset) {
1220 ce->old_count--;
1221
1222 if (cer->sdr.count > 1)
1223 cer->sdr.count--;
1224 else {
1225 RemoveEntryList(&cer->list_entry);
1226 ExFreePool(cer);
1227 }
1228
1229 break;
1230 }
1231
1232 le2 = le2->Flink;
1233 }
1234 }
1235 }
1236 }
1237
1238 // FIXME - clear shared flag if unique?
1239 }
1240 }
1241 }
1242
1243 le = le->Flink;
1244 }
1245 } else {
1246 LIST_ENTRY* le;
1247
1248 le = t->itemlist.Flink;
1249 while (le != &t->itemlist) {
1250 tree_data* td = CONTAINING_RECORD(le, tree_data, list_entry);
1251
1252 if (!td->inserted) {
1253 tbr.offset = t->root->id;
1254
1255 Status = increase_extent_refcount(Vcb, td->treeholder.address, Vcb->superblock.node_size, TYPE_TREE_BLOCK_REF,
1256 &tbr, &td->key, t->header.level - 1, Irp);
1257 if (!NT_SUCCESS(Status)) {
1258 ERR("increase_extent_refcount returned %08lx\n", Status);
1259 return Status;
1260 }
1261
1262 if (unique || !(t->header.flags & HEADER_FLAG_MIXED_BACKREF)) {
1263 uint64_t sbrrc = find_extent_shared_tree_refcount(Vcb, td->treeholder.address, t->header.address, Irp);
1264
1265 if (sbrrc > 0) {
1266 SHARED_BLOCK_REF sbr;
1267
1268 sbr.offset = t->header.address;
1269
1270 Status = decrease_extent_refcount(Vcb, td->treeholder.address, Vcb->superblock.node_size, TYPE_SHARED_BLOCK_REF, &sbr, NULL, 0,
1271 t->header.address, false, Irp);
1272 if (!NT_SUCCESS(Status)) {
1273 ERR("decrease_extent_refcount returned %08lx\n", Status);
1274 return Status;
1275 }
1276 }
1277 }
1278
1279 // FIXME - clear shared flag if unique?
1280 }
1281
1282 le = le->Flink;
1283 }
1284 }
1285
1286 if (unique) {
1287 uint64_t sbrrc = find_extent_shared_tree_refcount(Vcb, t->header.address, t->parent->header.address, Irp);
1288
1289 if (sbrrc == 1) {
1290 SHARED_BLOCK_REF sbr;
1291
1292 sbr.offset = t->parent->header.address;
1293
1294 Status = decrease_extent_refcount(Vcb, t->header.address, Vcb->superblock.node_size, TYPE_SHARED_BLOCK_REF, &sbr, NULL, 0,
1295 t->parent->header.address, false, Irp);
1296 if (!NT_SUCCESS(Status)) {
1297 ERR("decrease_extent_refcount returned %08lx\n", Status);
1298 return Status;
1299 }
1300 }
1301 }
1302
1303 if (t->parent)
1304 tbr.offset = t->parent->header.tree_id;
1305 else
1306 tbr.offset = t->header.tree_id;
1307
1308 Status = increase_extent_refcount(Vcb, t->header.address, Vcb->superblock.node_size, TYPE_TREE_BLOCK_REF, &tbr,
1309 t->parent ? &t->paritem->key : NULL, t->header.level, Irp);
1310 if (!NT_SUCCESS(Status)) {
1311 ERR("increase_extent_refcount returned %08lx\n", Status);
1312 return Status;
1313 }
1314
1315 // FIXME - clear shared flag if unique?
1316
1317 t->header.flags &= ~HEADER_FLAG_SHARED_BACKREF;
1318 }
1319
1320 if (rc > 1 || t->header.tree_id == t->root->id) {
1321 Status = reduce_tree_extent(Vcb, t->header.address, t, t->parent ? t->parent->header.tree_id : t->header.tree_id, t->header.level, Irp, rollback);
1322
1323 if (!NT_SUCCESS(Status)) {
1324 ERR("reduce_tree_extent returned %08lx\n", Status);
1325 return Status;
1326 }
1327 }
1328
1329 t->has_address = false;
1330
1331 if ((rc > 1 || t->header.tree_id != t->root->id) && !(flags & EXTENT_ITEM_SHARED_BACKREFS)) {
1332 if (t->header.tree_id == t->root->id) {
1333 flags |= EXTENT_ITEM_SHARED_BACKREFS;
1334 update_extent_flags(Vcb, t->header.address, flags, Irp);
1335 }
1336
1337 if (t->header.level > 0) {
1338 LIST_ENTRY* le;
1339
1340 le = t->itemlist.Flink;
1341 while (le != &t->itemlist) {
1342 tree_data* td = CONTAINING_RECORD(le, tree_data, list_entry);
1343
1344 if (!td->inserted) {
1345 if (t->header.tree_id == t->root->id) {
1346 SHARED_BLOCK_REF sbr;
1347
1348 sbr.offset = t->header.address;
1349
1350 Status = increase_extent_refcount(Vcb, td->treeholder.address, Vcb->superblock.node_size, TYPE_SHARED_BLOCK_REF, &sbr, &td->key, t->header.level - 1, Irp);
1351 } else {
1352 TREE_BLOCK_REF tbr;
1353
1354 tbr.offset = t->root->id;
1355
1356 Status = increase_extent_refcount(Vcb, td->treeholder.address, Vcb->superblock.node_size, TYPE_TREE_BLOCK_REF, &tbr, &td->key, t->header.level - 1, Irp);
1357 }
1358
1359 if (!NT_SUCCESS(Status)) {
1360 ERR("increase_extent_refcount returned %08lx\n", Status);
1361 return Status;
1362 }
1363 }
1364
1365 le = le->Flink;
1366 }
1367 } else {
1368 LIST_ENTRY* le;
1369
1370 le = t->itemlist.Flink;
1371 while (le != &t->itemlist) {
1372 tree_data* td = CONTAINING_RECORD(le, tree_data, list_entry);
1373
1374 if (!td->inserted && td->key.obj_type == TYPE_EXTENT_DATA && td->size >= sizeof(EXTENT_DATA) - 1 + sizeof(EXTENT_DATA2)) {
1375 EXTENT_DATA* ed = (EXTENT_DATA*)td->data;
1376
1377 if (ed->type == EXTENT_TYPE_REGULAR || ed->type == EXTENT_TYPE_PREALLOC) {
1378 EXTENT_DATA2* ed2 = (EXTENT_DATA2*)ed->data;
1379
1380 if (ed2->size > 0) {
1381 changed_extent* ce = NULL;
1382 chunk* c = get_chunk_from_address(Vcb, ed2->address);
1383
1384 if (c) {
1385 LIST_ENTRY* le2;
1386
1387 le2 = c->changed_extents.Flink;
1388 while (le2 != &c->changed_extents) {
1389 changed_extent* ce2 = CONTAINING_RECORD(le2, changed_extent, list_entry);
1390
1391 if (ce2->address == ed2->address) {
1392 ce = ce2;
1393 break;
1394 }
1395
1396 le2 = le2->Flink;
1397 }
1398 }
1399
1400 if (t->header.tree_id == t->root->id) {
1401 SHARED_DATA_REF sdr;
1402
1403 sdr.offset = t->header.address;
1404 sdr.count = 1;
1405
1406 if (ce) {
1407 Status = add_changed_extent_ref_sdr(ce, &sdr, true);
1408 if (!NT_SUCCESS(Status)) {
1409 ERR("add_changed_extent_ref_edr returned %08lx\n", Status);
1410 return Status;
1411 }
1412
1413 Status = add_changed_extent_ref_sdr(ce, &sdr, false);
1414 if (!NT_SUCCESS(Status)) {
1415 ERR("add_changed_extent_ref_edr returned %08lx\n", Status);
1416 return Status;
1417 }
1418 }
1419
1420 Status = increase_extent_refcount(Vcb, ed2->address, ed2->size, TYPE_SHARED_DATA_REF, &sdr, NULL, 0, Irp);
1421 } else {
1422 EXTENT_DATA_REF edr;
1423
1424 edr.root = t->root->id;
1425 edr.objid = td->key.obj_id;
1426 edr.offset = td->key.offset - ed2->offset;
1427 edr.count = 1;
1428
1429 if (ce) {
1430 Status = add_changed_extent_ref_edr(ce, &edr, true);
1431 if (!NT_SUCCESS(Status)) {
1432 ERR("add_changed_extent_ref_edr returned %08lx\n", Status);
1433 return Status;
1434 }
1435
1436 Status = add_changed_extent_ref_edr(ce, &edr, false);
1437 if (!NT_SUCCESS(Status)) {
1438 ERR("add_changed_extent_ref_edr returned %08lx\n", Status);
1439 return Status;
1440 }
1441 }
1442
1443 Status = increase_extent_refcount(Vcb, ed2->address, ed2->size, TYPE_EXTENT_DATA_REF, &edr, NULL, 0, Irp);
1444 }
1445
1446 if (!NT_SUCCESS(Status)) {
1447 ERR("increase_extent_refcount returned %08lx\n", Status);
1448 return Status;
1449 }
1450 }
1451 }
1452 }
1453
1454 le = le->Flink;
1455 }
1456 }
1457 }
1458
1459 t->updated_extents = true;
1460 t->header.tree_id = t->root->id;
1461
1462 return STATUS_SUCCESS;
1463 }
1464
allocate_tree_extents(device_extension * Vcb,PIRP Irp,LIST_ENTRY * rollback)1465 static NTSTATUS allocate_tree_extents(device_extension* Vcb, PIRP Irp, LIST_ENTRY* rollback) {
1466 LIST_ENTRY* le;
1467 NTSTATUS Status;
1468 bool changed = false;
1469 uint8_t max_level = 0, level;
1470
1471 TRACE("(%p)\n", Vcb);
1472
1473 le = Vcb->trees.Flink;
1474 while (le != &Vcb->trees) {
1475 tree* t = CONTAINING_RECORD(le, tree, list_entry);
1476
1477 if (t->write && !t->has_new_address) {
1478 chunk* c;
1479
1480 if (t->has_address) {
1481 c = get_chunk_from_address(Vcb, t->header.address);
1482
1483 if (c) {
1484 if (!c->cache_loaded) {
1485 acquire_chunk_lock(c, Vcb);
1486
1487 if (!c->cache_loaded) {
1488 Status = load_cache_chunk(Vcb, c, NULL);
1489
1490 if (!NT_SUCCESS(Status)) {
1491 ERR("load_cache_chunk returned %08lx\n", Status);
1492 release_chunk_lock(c, Vcb);
1493 return Status;
1494 }
1495 }
1496
1497 release_chunk_lock(c, Vcb);
1498 }
1499 }
1500 }
1501
1502 Status = get_tree_new_address(Vcb, t, Irp, rollback);
1503 if (!NT_SUCCESS(Status)) {
1504 ERR("get_tree_new_address returned %08lx\n", Status);
1505 return Status;
1506 }
1507
1508 TRACE("allocated extent %I64x\n", t->new_address);
1509
1510 c = get_chunk_from_address(Vcb, t->new_address);
1511
1512 if (c)
1513 c->used += Vcb->superblock.node_size;
1514 else {
1515 ERR("could not find chunk for address %I64x\n", t->new_address);
1516 return STATUS_INTERNAL_ERROR;
1517 }
1518
1519 changed = true;
1520
1521 if (t->header.level > max_level)
1522 max_level = t->header.level;
1523 }
1524
1525 le = le->Flink;
1526 }
1527
1528 if (!changed)
1529 return STATUS_SUCCESS;
1530
1531 level = max_level;
1532 do {
1533 le = Vcb->trees.Flink;
1534 while (le != &Vcb->trees) {
1535 tree* t = CONTAINING_RECORD(le, tree, list_entry);
1536
1537 if (t->write && !t->updated_extents && t->has_address && t->header.level == level) {
1538 Status = update_tree_extents(Vcb, t, Irp, rollback);
1539 if (!NT_SUCCESS(Status)) {
1540 ERR("update_tree_extents returned %08lx\n", Status);
1541 return Status;
1542 }
1543 }
1544
1545 le = le->Flink;
1546 }
1547
1548 if (level == 0)
1549 break;
1550
1551 level--;
1552 } while (true);
1553
1554 return STATUS_SUCCESS;
1555 }
1556
update_root_root(device_extension * Vcb,bool no_cache,PIRP Irp,LIST_ENTRY * rollback)1557 static NTSTATUS update_root_root(device_extension* Vcb, bool no_cache, PIRP Irp, LIST_ENTRY* rollback) {
1558 LIST_ENTRY* le;
1559 NTSTATUS Status;
1560
1561 TRACE("(%p)\n", Vcb);
1562
1563 le = Vcb->trees.Flink;
1564 while (le != &Vcb->trees) {
1565 tree* t = CONTAINING_RECORD(le, tree, list_entry);
1566
1567 if (t->write && !t->parent) {
1568 if (t->root != Vcb->root_root && t->root != Vcb->chunk_root) {
1569 KEY searchkey;
1570 traverse_ptr tp;
1571
1572 searchkey.obj_id = t->root->id;
1573 searchkey.obj_type = TYPE_ROOT_ITEM;
1574 searchkey.offset = 0xffffffffffffffff;
1575
1576 Status = find_item(Vcb, Vcb->root_root, &tp, &searchkey, false, Irp);
1577 if (!NT_SUCCESS(Status)) {
1578 ERR("error - find_item returned %08lx\n", Status);
1579 return Status;
1580 }
1581
1582 if (tp.item->key.obj_id != searchkey.obj_id || tp.item->key.obj_type != searchkey.obj_type) {
1583 ERR("could not find ROOT_ITEM for tree %I64x\n", searchkey.obj_id);
1584 return STATUS_INTERNAL_ERROR;
1585 }
1586
1587 TRACE("updating the address for root %I64x to %I64x\n", searchkey.obj_id, t->new_address);
1588
1589 t->root->root_item.block_number = t->new_address;
1590 t->root->root_item.root_level = t->header.level;
1591 t->root->root_item.generation = Vcb->superblock.generation;
1592 t->root->root_item.generation2 = Vcb->superblock.generation;
1593
1594 // item is guaranteed to be at least sizeof(ROOT_ITEM), due to add_parents
1595
1596 RtlCopyMemory(tp.item->data, &t->root->root_item, sizeof(ROOT_ITEM));
1597 }
1598
1599 t->root->treeholder.address = t->new_address;
1600 t->root->treeholder.generation = Vcb->superblock.generation;
1601 }
1602
1603 le = le->Flink;
1604 }
1605
1606 if (!no_cache && !(Vcb->superblock.compat_ro_flags & BTRFS_COMPAT_RO_FLAGS_FREE_SPACE_CACHE)) {
1607 ExAcquireResourceSharedLite(&Vcb->chunk_lock, true);
1608 Status = update_chunk_caches(Vcb, Irp, rollback);
1609 ExReleaseResourceLite(&Vcb->chunk_lock);
1610
1611 if (!NT_SUCCESS(Status)) {
1612 ERR("update_chunk_caches returned %08lx\n", Status);
1613 return Status;
1614 }
1615 }
1616
1617 return STATUS_SUCCESS;
1618 }
1619
do_tree_writes(device_extension * Vcb,LIST_ENTRY * tree_writes,bool no_free)1620 NTSTATUS do_tree_writes(device_extension* Vcb, LIST_ENTRY* tree_writes, bool no_free) {
1621 chunk* c;
1622 LIST_ENTRY* le;
1623 tree_write* tw;
1624 NTSTATUS Status;
1625 ULONG i, num_bits;
1626 write_data_context* wtc;
1627 ULONG bit_num = 0;
1628 bool raid56 = false;
1629
1630 // merge together runs
1631 c = NULL;
1632 le = tree_writes->Flink;
1633 while (le != tree_writes) {
1634 tw = CONTAINING_RECORD(le, tree_write, list_entry);
1635
1636 if (!c || tw->address < c->offset || tw->address >= c->offset + c->chunk_item->size)
1637 c = get_chunk_from_address(Vcb, tw->address);
1638 else {
1639 tree_write* tw2 = CONTAINING_RECORD(le->Blink, tree_write, list_entry);
1640
1641 if (tw->address == tw2->address + tw2->length) {
1642 uint8_t* data = ExAllocatePoolWithTag(NonPagedPool, tw2->length + tw->length, ALLOC_TAG);
1643
1644 if (!data) {
1645 ERR("out of memory\n");
1646 return STATUS_INSUFFICIENT_RESOURCES;
1647 }
1648
1649 RtlCopyMemory(data, tw2->data, tw2->length);
1650 RtlCopyMemory(&data[tw2->length], tw->data, tw->length);
1651
1652 if (!no_free || tw2->allocated)
1653 ExFreePool(tw2->data);
1654
1655 tw2->data = data;
1656 tw2->length += tw->length;
1657 tw2->allocated = true;
1658
1659 if (!no_free || tw->allocated)
1660 ExFreePool(tw->data);
1661
1662 RemoveEntryList(&tw->list_entry);
1663 ExFreePool(tw);
1664
1665 le = tw2->list_entry.Flink;
1666 continue;
1667 }
1668 }
1669
1670 tw->c = c;
1671
1672 if (c->chunk_item->type & (BLOCK_FLAG_RAID5 | BLOCK_FLAG_RAID6))
1673 raid56 = true;
1674
1675 le = le->Flink;
1676 }
1677
1678 num_bits = 0;
1679
1680 le = tree_writes->Flink;
1681 while (le != tree_writes) {
1682 tw = CONTAINING_RECORD(le, tree_write, list_entry);
1683
1684 num_bits++;
1685
1686 le = le->Flink;
1687 }
1688
1689 wtc = ExAllocatePoolWithTag(NonPagedPool, sizeof(write_data_context) * num_bits, ALLOC_TAG);
1690 if (!wtc) {
1691 ERR("out of memory\n");
1692 return STATUS_INSUFFICIENT_RESOURCES;
1693 }
1694
1695 le = tree_writes->Flink;
1696
1697 while (le != tree_writes) {
1698 tw = CONTAINING_RECORD(le, tree_write, list_entry);
1699
1700 TRACE("address: %I64x, size: %x\n", tw->address, tw->length);
1701
1702 KeInitializeEvent(&wtc[bit_num].Event, NotificationEvent, false);
1703 InitializeListHead(&wtc[bit_num].stripes);
1704 wtc[bit_num].need_wait = false;
1705 wtc[bit_num].stripes_left = 0;
1706 wtc[bit_num].parity1 = wtc[bit_num].parity2 = wtc[bit_num].scratch = NULL;
1707 wtc[bit_num].mdl = wtc[bit_num].parity1_mdl = wtc[bit_num].parity2_mdl = NULL;
1708
1709 Status = write_data(Vcb, tw->address, tw->data, tw->length, &wtc[bit_num], NULL, NULL, false, 0, HighPagePriority);
1710 if (!NT_SUCCESS(Status)) {
1711 ERR("write_data returned %08lx\n", Status);
1712
1713 for (i = 0; i < num_bits; i++) {
1714 free_write_data_stripes(&wtc[i]);
1715 }
1716 ExFreePool(wtc);
1717
1718 return Status;
1719 }
1720
1721 bit_num++;
1722
1723 le = le->Flink;
1724 }
1725
1726 for (i = 0; i < num_bits; i++) {
1727 if (wtc[i].stripes.Flink != &wtc[i].stripes) {
1728 // launch writes and wait
1729 le = wtc[i].stripes.Flink;
1730 while (le != &wtc[i].stripes) {
1731 write_data_stripe* stripe = CONTAINING_RECORD(le, write_data_stripe, list_entry);
1732
1733 if (stripe->status != WriteDataStatus_Ignore) {
1734 wtc[i].need_wait = true;
1735 IoCallDriver(stripe->device->devobj, stripe->Irp);
1736 }
1737
1738 le = le->Flink;
1739 }
1740 }
1741 }
1742
1743 for (i = 0; i < num_bits; i++) {
1744 if (wtc[i].need_wait)
1745 KeWaitForSingleObject(&wtc[i].Event, Executive, KernelMode, false, NULL);
1746 }
1747
1748 for (i = 0; i < num_bits; i++) {
1749 le = wtc[i].stripes.Flink;
1750 while (le != &wtc[i].stripes) {
1751 write_data_stripe* stripe = CONTAINING_RECORD(le, write_data_stripe, list_entry);
1752
1753 if (stripe->status != WriteDataStatus_Ignore && !NT_SUCCESS(stripe->iosb.Status)) {
1754 Status = stripe->iosb.Status;
1755 log_device_error(Vcb, stripe->device, BTRFS_DEV_STAT_WRITE_ERRORS);
1756 break;
1757 }
1758
1759 le = le->Flink;
1760 }
1761
1762 free_write_data_stripes(&wtc[i]);
1763 }
1764
1765 ExFreePool(wtc);
1766
1767 if (raid56) {
1768 c = NULL;
1769
1770 le = tree_writes->Flink;
1771 while (le != tree_writes) {
1772 tw = CONTAINING_RECORD(le, tree_write, list_entry);
1773
1774 if (tw->c != c) {
1775 c = tw->c;
1776
1777 ExAcquireResourceExclusiveLite(&c->partial_stripes_lock, true);
1778
1779 while (!IsListEmpty(&c->partial_stripes)) {
1780 partial_stripe* ps = CONTAINING_RECORD(RemoveHeadList(&c->partial_stripes), partial_stripe, list_entry);
1781
1782 Status = flush_partial_stripe(Vcb, c, ps);
1783
1784 if (ps->bmparr)
1785 ExFreePool(ps->bmparr);
1786
1787 ExFreePool(ps);
1788
1789 if (!NT_SUCCESS(Status)) {
1790 ERR("flush_partial_stripe returned %08lx\n", Status);
1791 ExReleaseResourceLite(&c->partial_stripes_lock);
1792 return Status;
1793 }
1794 }
1795
1796 ExReleaseResourceLite(&c->partial_stripes_lock);
1797 }
1798
1799 le = le->Flink;
1800 }
1801 }
1802
1803 return STATUS_SUCCESS;
1804 }
1805
calc_tree_checksum(device_extension * Vcb,tree_header * th)1806 void calc_tree_checksum(device_extension* Vcb, tree_header* th) {
1807 switch (Vcb->superblock.csum_type) {
1808 case CSUM_TYPE_CRC32C:
1809 *((uint32_t*)th) = ~calc_crc32c(0xffffffff, (uint8_t*)&th->fs_uuid, Vcb->superblock.node_size - sizeof(th->csum));
1810 break;
1811
1812 case CSUM_TYPE_XXHASH:
1813 *((uint64_t*)th) = XXH64((uint8_t*)&th->fs_uuid, Vcb->superblock.node_size - sizeof(th->csum), 0);
1814 break;
1815
1816 case CSUM_TYPE_SHA256:
1817 calc_sha256((uint8_t*)th, &th->fs_uuid, Vcb->superblock.node_size - sizeof(th->csum));
1818 break;
1819
1820 case CSUM_TYPE_BLAKE2:
1821 blake2b((uint8_t*)th, BLAKE2_HASH_SIZE, &th->fs_uuid, Vcb->superblock.node_size - sizeof(th->csum));
1822 break;
1823 }
1824 }
1825
write_trees(device_extension * Vcb,PIRP Irp)1826 static NTSTATUS write_trees(device_extension* Vcb, PIRP Irp) {
1827 ULONG level;
1828 uint8_t *data, *body;
1829 NTSTATUS Status;
1830 LIST_ENTRY* le;
1831 LIST_ENTRY tree_writes;
1832 tree_write* tw;
1833
1834 TRACE("(%p)\n", Vcb);
1835
1836 InitializeListHead(&tree_writes);
1837
1838 for (level = 0; level <= 255; level++) {
1839 bool nothing_found = true;
1840
1841 TRACE("level = %lu\n", level);
1842
1843 le = Vcb->trees.Flink;
1844 while (le != &Vcb->trees) {
1845 tree* t = CONTAINING_RECORD(le, tree, list_entry);
1846
1847 if (t->write && t->header.level == level) {
1848 KEY firstitem, searchkey;
1849 LIST_ENTRY* le2;
1850 traverse_ptr tp;
1851
1852 if (!t->has_new_address) {
1853 ERR("error - tried to write tree with no new address\n");
1854 return STATUS_INTERNAL_ERROR;
1855 }
1856
1857 le2 = t->itemlist.Flink;
1858 while (le2 != &t->itemlist) {
1859 tree_data* td = CONTAINING_RECORD(le2, tree_data, list_entry);
1860 if (!td->ignore) {
1861 firstitem = td->key;
1862 break;
1863 }
1864 le2 = le2->Flink;
1865 }
1866
1867 if (t->parent) {
1868 t->paritem->key = firstitem;
1869 t->paritem->treeholder.address = t->new_address;
1870 t->paritem->treeholder.generation = Vcb->superblock.generation;
1871 }
1872
1873 if (!(Vcb->superblock.incompat_flags & BTRFS_INCOMPAT_FLAGS_SKINNY_METADATA)) {
1874 EXTENT_ITEM_TREE* eit;
1875
1876 searchkey.obj_id = t->new_address;
1877 searchkey.obj_type = TYPE_EXTENT_ITEM;
1878 searchkey.offset = Vcb->superblock.node_size;
1879
1880 Status = find_item(Vcb, Vcb->extent_root, &tp, &searchkey, false, Irp);
1881 if (!NT_SUCCESS(Status)) {
1882 ERR("error - find_item returned %08lx\n", Status);
1883 return Status;
1884 }
1885
1886 if (keycmp(searchkey, tp.item->key)) {
1887 ERR("could not find %I64x,%x,%I64x in extent_root (found %I64x,%x,%I64x instead)\n", searchkey.obj_id, searchkey.obj_type, searchkey.offset, tp.item->key.obj_id, tp.item->key.obj_type, tp.item->key.offset);
1888 return STATUS_INTERNAL_ERROR;
1889 }
1890
1891 if (tp.item->size < sizeof(EXTENT_ITEM_TREE)) {
1892 ERR("(%I64x,%x,%I64x) was %u bytes, expected at least %Iu\n", tp.item->key.obj_id, tp.item->key.obj_type, tp.item->key.offset, tp.item->size, sizeof(EXTENT_ITEM_TREE));
1893 return STATUS_INTERNAL_ERROR;
1894 }
1895
1896 eit = (EXTENT_ITEM_TREE*)tp.item->data;
1897 eit->firstitem = firstitem;
1898 }
1899
1900 nothing_found = false;
1901 }
1902
1903 le = le->Flink;
1904 }
1905
1906 if (nothing_found)
1907 break;
1908 }
1909
1910 TRACE("allocated tree extents\n");
1911
1912 le = Vcb->trees.Flink;
1913 while (le != &Vcb->trees) {
1914 tree* t = CONTAINING_RECORD(le, tree, list_entry);
1915 LIST_ENTRY* le2;
1916 #ifdef DEBUG_PARANOID
1917 uint32_t num_items = 0, size = 0;
1918 bool crash = false;
1919 #endif
1920
1921 if (t->write) {
1922 #ifdef DEBUG_PARANOID
1923 bool first = true;
1924 KEY lastkey;
1925
1926 le2 = t->itemlist.Flink;
1927 while (le2 != &t->itemlist) {
1928 tree_data* td = CONTAINING_RECORD(le2, tree_data, list_entry);
1929 if (!td->ignore) {
1930 num_items++;
1931
1932 if (!first) {
1933 if (keycmp(td->key, lastkey) == 0) {
1934 ERR("(%I64x,%x,%I64x): duplicate key\n", td->key.obj_id, td->key.obj_type, td->key.offset);
1935 crash = true;
1936 } else if (keycmp(td->key, lastkey) == -1) {
1937 ERR("(%I64x,%x,%I64x): key out of order\n", td->key.obj_id, td->key.obj_type, td->key.offset);
1938 crash = true;
1939 }
1940 } else
1941 first = false;
1942
1943 lastkey = td->key;
1944
1945 if (t->header.level == 0)
1946 size += td->size;
1947 }
1948 le2 = le2->Flink;
1949 }
1950
1951 if (t->header.level == 0)
1952 size += num_items * sizeof(leaf_node);
1953 else
1954 size += num_items * sizeof(internal_node);
1955
1956 if (num_items != t->header.num_items) {
1957 ERR("tree %I64x, level %x: num_items was %x, expected %x\n", t->root->id, t->header.level, num_items, t->header.num_items);
1958 crash = true;
1959 }
1960
1961 if (size != t->size) {
1962 ERR("tree %I64x, level %x: size was %x, expected %x\n", t->root->id, t->header.level, size, t->size);
1963 crash = true;
1964 }
1965
1966 if (t->header.num_items == 0 && t->parent) {
1967 ERR("tree %I64x, level %x: tried to write empty tree with parent\n", t->root->id, t->header.level);
1968 crash = true;
1969 }
1970
1971 if (t->size > Vcb->superblock.node_size - sizeof(tree_header)) {
1972 ERR("tree %I64x, level %x: tried to write overlarge tree (%x > %Ix)\n", t->root->id, t->header.level, t->size, Vcb->superblock.node_size - sizeof(tree_header));
1973 crash = true;
1974 }
1975
1976 if (crash) {
1977 ERR("tree %p\n", t);
1978 le2 = t->itemlist.Flink;
1979 while (le2 != &t->itemlist) {
1980 tree_data* td = CONTAINING_RECORD(le2, tree_data, list_entry);
1981 if (!td->ignore) {
1982 ERR("%I64x,%x,%I64x inserted=%u\n", td->key.obj_id, td->key.obj_type, td->key.offset, td->inserted);
1983 }
1984 le2 = le2->Flink;
1985 }
1986 int3;
1987 }
1988 #endif
1989 t->header.address = t->new_address;
1990 t->header.generation = Vcb->superblock.generation;
1991 t->header.tree_id = t->root->id;
1992 t->header.flags |= HEADER_FLAG_MIXED_BACKREF;
1993 t->header.fs_uuid = Vcb->superblock.metadata_uuid;
1994 t->has_address = true;
1995
1996 data = ExAllocatePoolWithTag(NonPagedPool, Vcb->superblock.node_size, ALLOC_TAG);
1997 if (!data) {
1998 ERR("out of memory\n");
1999 Status = STATUS_INSUFFICIENT_RESOURCES;
2000 goto end;
2001 }
2002
2003 body = data + sizeof(tree_header);
2004
2005 RtlCopyMemory(data, &t->header, sizeof(tree_header));
2006 RtlZeroMemory(body, Vcb->superblock.node_size - sizeof(tree_header));
2007
2008 if (t->header.level == 0) {
2009 leaf_node* itemptr = (leaf_node*)body;
2010 int i = 0;
2011 uint8_t* dataptr = data + Vcb->superblock.node_size;
2012
2013 le2 = t->itemlist.Flink;
2014 while (le2 != &t->itemlist) {
2015 tree_data* td = CONTAINING_RECORD(le2, tree_data, list_entry);
2016 if (!td->ignore) {
2017 dataptr = dataptr - td->size;
2018
2019 itemptr[i].key = td->key;
2020 itemptr[i].offset = (uint32_t)((uint8_t*)dataptr - (uint8_t*)body);
2021 itemptr[i].size = td->size;
2022 i++;
2023
2024 if (td->size > 0)
2025 RtlCopyMemory(dataptr, td->data, td->size);
2026 }
2027
2028 le2 = le2->Flink;
2029 }
2030 } else {
2031 internal_node* itemptr = (internal_node*)body;
2032 int i = 0;
2033
2034 le2 = t->itemlist.Flink;
2035 while (le2 != &t->itemlist) {
2036 tree_data* td = CONTAINING_RECORD(le2, tree_data, list_entry);
2037 if (!td->ignore) {
2038 itemptr[i].key = td->key;
2039 itemptr[i].address = td->treeholder.address;
2040 itemptr[i].generation = td->treeholder.generation;
2041 i++;
2042 }
2043
2044 le2 = le2->Flink;
2045 }
2046 }
2047
2048 calc_tree_checksum(Vcb, (tree_header*)data);
2049
2050 tw = ExAllocatePoolWithTag(PagedPool, sizeof(tree_write), ALLOC_TAG);
2051 if (!tw) {
2052 ERR("out of memory\n");
2053 ExFreePool(data);
2054 Status = STATUS_INSUFFICIENT_RESOURCES;
2055 goto end;
2056 }
2057
2058 tw->address = t->new_address;
2059 tw->length = Vcb->superblock.node_size;
2060 tw->data = data;
2061 tw->allocated = false;
2062
2063 if (IsListEmpty(&tree_writes))
2064 InsertTailList(&tree_writes, &tw->list_entry);
2065 else {
2066 bool inserted = false;
2067
2068 le2 = tree_writes.Flink;
2069 while (le2 != &tree_writes) {
2070 tree_write* tw2 = CONTAINING_RECORD(le2, tree_write, list_entry);
2071
2072 if (tw2->address > tw->address) {
2073 InsertHeadList(le2->Blink, &tw->list_entry);
2074 inserted = true;
2075 break;
2076 }
2077
2078 le2 = le2->Flink;
2079 }
2080
2081 if (!inserted)
2082 InsertTailList(&tree_writes, &tw->list_entry);
2083 }
2084 }
2085
2086 le = le->Flink;
2087 }
2088
2089 Status = do_tree_writes(Vcb, &tree_writes, false);
2090 if (!NT_SUCCESS(Status)) {
2091 ERR("do_tree_writes returned %08lx\n", Status);
2092 goto end;
2093 }
2094
2095 Status = STATUS_SUCCESS;
2096
2097 end:
2098 while (!IsListEmpty(&tree_writes)) {
2099 le = RemoveHeadList(&tree_writes);
2100 tw = CONTAINING_RECORD(le, tree_write, list_entry);
2101
2102 if (tw->data)
2103 ExFreePool(tw->data);
2104
2105 ExFreePool(tw);
2106 }
2107
2108 return Status;
2109 }
2110
update_backup_superblock(device_extension * Vcb,superblock_backup * sb,PIRP Irp)2111 static void update_backup_superblock(device_extension* Vcb, superblock_backup* sb, PIRP Irp) {
2112 KEY searchkey;
2113 traverse_ptr tp;
2114
2115 RtlZeroMemory(sb, sizeof(superblock_backup));
2116
2117 sb->root_tree_addr = Vcb->superblock.root_tree_addr;
2118 sb->root_tree_generation = Vcb->superblock.generation;
2119 sb->root_level = Vcb->superblock.root_level;
2120
2121 sb->chunk_tree_addr = Vcb->superblock.chunk_tree_addr;
2122 sb->chunk_tree_generation = Vcb->superblock.chunk_root_generation;
2123 sb->chunk_root_level = Vcb->superblock.chunk_root_level;
2124
2125 searchkey.obj_id = BTRFS_ROOT_EXTENT;
2126 searchkey.obj_type = TYPE_ROOT_ITEM;
2127 searchkey.offset = 0xffffffffffffffff;
2128
2129 if (NT_SUCCESS(find_item(Vcb, Vcb->root_root, &tp, &searchkey, false, Irp))) {
2130 if (tp.item->key.obj_id == searchkey.obj_id && tp.item->key.obj_type == searchkey.obj_type && tp.item->size >= sizeof(ROOT_ITEM)) {
2131 ROOT_ITEM* ri = (ROOT_ITEM*)tp.item->data;
2132
2133 sb->extent_tree_addr = ri->block_number;
2134 sb->extent_tree_generation = ri->generation;
2135 sb->extent_root_level = ri->root_level;
2136 }
2137 }
2138
2139 searchkey.obj_id = BTRFS_ROOT_FSTREE;
2140
2141 if (NT_SUCCESS(find_item(Vcb, Vcb->root_root, &tp, &searchkey, false, Irp))) {
2142 if (tp.item->key.obj_id == searchkey.obj_id && tp.item->key.obj_type == searchkey.obj_type && tp.item->size >= sizeof(ROOT_ITEM)) {
2143 ROOT_ITEM* ri = (ROOT_ITEM*)tp.item->data;
2144
2145 sb->fs_tree_addr = ri->block_number;
2146 sb->fs_tree_generation = ri->generation;
2147 sb->fs_root_level = ri->root_level;
2148 }
2149 }
2150
2151 searchkey.obj_id = BTRFS_ROOT_DEVTREE;
2152
2153 if (NT_SUCCESS(find_item(Vcb, Vcb->root_root, &tp, &searchkey, false, Irp))) {
2154 if (tp.item->key.obj_id == searchkey.obj_id && tp.item->key.obj_type == searchkey.obj_type && tp.item->size >= sizeof(ROOT_ITEM)) {
2155 ROOT_ITEM* ri = (ROOT_ITEM*)tp.item->data;
2156
2157 sb->dev_root_addr = ri->block_number;
2158 sb->dev_root_generation = ri->generation;
2159 sb->dev_root_level = ri->root_level;
2160 }
2161 }
2162
2163 searchkey.obj_id = BTRFS_ROOT_CHECKSUM;
2164
2165 if (NT_SUCCESS(find_item(Vcb, Vcb->root_root, &tp, &searchkey, false, Irp))) {
2166 if (tp.item->key.obj_id == searchkey.obj_id && tp.item->key.obj_type == searchkey.obj_type && tp.item->size >= sizeof(ROOT_ITEM)) {
2167 ROOT_ITEM* ri = (ROOT_ITEM*)tp.item->data;
2168
2169 sb->csum_root_addr = ri->block_number;
2170 sb->csum_root_generation = ri->generation;
2171 sb->csum_root_level = ri->root_level;
2172 }
2173 }
2174
2175 sb->total_bytes = Vcb->superblock.total_bytes;
2176 sb->bytes_used = Vcb->superblock.bytes_used;
2177 sb->num_devices = Vcb->superblock.num_devices;
2178 }
2179
2180 typedef struct {
2181 void* context;
2182 uint8_t* buf;
2183 PMDL mdl;
2184 device* device;
2185 NTSTATUS Status;
2186 PIRP Irp;
2187 LIST_ENTRY list_entry;
2188 } write_superblocks_stripe;
2189
2190 typedef struct _write_superblocks_context {
2191 KEVENT Event;
2192 LIST_ENTRY stripes;
2193 LONG left;
2194 } write_superblocks_context;
2195
_Function_class_(IO_COMPLETION_ROUTINE)2196 _Function_class_(IO_COMPLETION_ROUTINE)
2197 static NTSTATUS __stdcall write_superblock_completion(PDEVICE_OBJECT DeviceObject, PIRP Irp, PVOID conptr) {
2198 write_superblocks_stripe* stripe = conptr;
2199 write_superblocks_context* context = stripe->context;
2200
2201 UNUSED(DeviceObject);
2202
2203 stripe->Status = Irp->IoStatus.Status;
2204
2205 if (InterlockedDecrement(&context->left) == 0)
2206 KeSetEvent(&context->Event, 0, false);
2207
2208 return STATUS_MORE_PROCESSING_REQUIRED;
2209 }
2210
calc_superblock_checksum(superblock * sb)2211 static void calc_superblock_checksum(superblock* sb) {
2212 switch (sb->csum_type) {
2213 case CSUM_TYPE_CRC32C:
2214 *(uint32_t*)sb = ~calc_crc32c(0xffffffff, (uint8_t*)&sb->uuid, (ULONG)sizeof(superblock) - sizeof(sb->checksum));
2215 break;
2216
2217 case CSUM_TYPE_XXHASH:
2218 *(uint64_t*)sb = XXH64(&sb->uuid, sizeof(superblock) - sizeof(sb->checksum), 0);
2219 break;
2220
2221 case CSUM_TYPE_SHA256:
2222 calc_sha256((uint8_t*)sb, &sb->uuid, sizeof(superblock) - sizeof(sb->checksum));
2223 break;
2224
2225 case CSUM_TYPE_BLAKE2:
2226 blake2b((uint8_t*)sb, BLAKE2_HASH_SIZE, &sb->uuid, sizeof(superblock) - sizeof(sb->checksum));
2227 break;
2228 }
2229 }
2230
write_superblock(device_extension * Vcb,device * device,write_superblocks_context * context)2231 static NTSTATUS write_superblock(device_extension* Vcb, device* device, write_superblocks_context* context) {
2232 unsigned int i = 0;
2233
2234 // All the documentation says that the Linux driver only writes one superblock
2235 // if it thinks a disk is an SSD, but this doesn't seem to be the case!
2236
2237 while (superblock_addrs[i] > 0 && device->devitem.num_bytes >= superblock_addrs[i] + sizeof(superblock)) {
2238 ULONG sblen = (ULONG)sector_align(sizeof(superblock), Vcb->superblock.sector_size);
2239 superblock* sb;
2240 write_superblocks_stripe* stripe;
2241 PIO_STACK_LOCATION IrpSp;
2242
2243 sb = ExAllocatePoolWithTag(NonPagedPool, sblen, ALLOC_TAG);
2244 if (!sb) {
2245 ERR("out of memory\n");
2246 return STATUS_INSUFFICIENT_RESOURCES;
2247 }
2248
2249 RtlCopyMemory(sb, &Vcb->superblock, sizeof(superblock));
2250
2251 if (sblen > sizeof(superblock))
2252 RtlZeroMemory((uint8_t*)sb + sizeof(superblock), sblen - sizeof(superblock));
2253
2254 RtlCopyMemory(&sb->dev_item, &device->devitem, sizeof(DEV_ITEM));
2255 sb->sb_phys_addr = superblock_addrs[i];
2256
2257 calc_superblock_checksum(sb);
2258
2259 stripe = ExAllocatePoolWithTag(NonPagedPool, sizeof(write_superblocks_stripe), ALLOC_TAG);
2260 if (!stripe) {
2261 ERR("out of memory\n");
2262 ExFreePool(sb);
2263 return STATUS_INSUFFICIENT_RESOURCES;
2264 }
2265
2266 stripe->buf = (uint8_t*)sb;
2267
2268 stripe->Irp = IoAllocateIrp(device->devobj->StackSize, false);
2269 if (!stripe->Irp) {
2270 ERR("IoAllocateIrp failed\n");
2271 ExFreePool(stripe);
2272 ExFreePool(sb);
2273 return STATUS_INSUFFICIENT_RESOURCES;
2274 }
2275
2276 IrpSp = IoGetNextIrpStackLocation(stripe->Irp);
2277 IrpSp->MajorFunction = IRP_MJ_WRITE;
2278 IrpSp->FileObject = device->fileobj;
2279
2280 if (i == 0)
2281 IrpSp->Flags |= SL_WRITE_THROUGH;
2282
2283 if (device->devobj->Flags & DO_BUFFERED_IO) {
2284 stripe->Irp->AssociatedIrp.SystemBuffer = sb;
2285 stripe->mdl = NULL;
2286
2287 stripe->Irp->Flags = IRP_BUFFERED_IO;
2288 } else if (device->devobj->Flags & DO_DIRECT_IO) {
2289 stripe->mdl = IoAllocateMdl(sb, sblen, false, false, NULL);
2290 if (!stripe->mdl) {
2291 ERR("IoAllocateMdl failed\n");
2292 IoFreeIrp(stripe->Irp);
2293 ExFreePool(stripe);
2294 ExFreePool(sb);
2295 return STATUS_INSUFFICIENT_RESOURCES;
2296 }
2297
2298 stripe->Irp->MdlAddress = stripe->mdl;
2299
2300 MmBuildMdlForNonPagedPool(stripe->mdl);
2301 } else {
2302 stripe->Irp->UserBuffer = sb;
2303 stripe->mdl = NULL;
2304 }
2305
2306 IrpSp->Parameters.Write.Length = sblen;
2307 IrpSp->Parameters.Write.ByteOffset.QuadPart = superblock_addrs[i];
2308
2309 IoSetCompletionRoutine(stripe->Irp, write_superblock_completion, stripe, true, true, true);
2310
2311 stripe->context = context;
2312 stripe->device = device;
2313 InsertTailList(&context->stripes, &stripe->list_entry);
2314
2315 context->left++;
2316
2317 i++;
2318 }
2319
2320 if (i == 0)
2321 ERR("no superblocks written!\n");
2322
2323 return STATUS_SUCCESS;
2324 }
2325
write_superblocks(device_extension * Vcb,PIRP Irp)2326 static NTSTATUS write_superblocks(device_extension* Vcb, PIRP Irp) {
2327 uint64_t i;
2328 NTSTATUS Status;
2329 LIST_ENTRY* le;
2330 write_superblocks_context context;
2331
2332 TRACE("(%p)\n", Vcb);
2333
2334 le = Vcb->trees.Flink;
2335 while (le != &Vcb->trees) {
2336 tree* t = CONTAINING_RECORD(le, tree, list_entry);
2337
2338 if (t->write && !t->parent) {
2339 if (t->root == Vcb->root_root) {
2340 Vcb->superblock.root_tree_addr = t->new_address;
2341 Vcb->superblock.root_level = t->header.level;
2342 } else if (t->root == Vcb->chunk_root) {
2343 Vcb->superblock.chunk_tree_addr = t->new_address;
2344 Vcb->superblock.chunk_root_generation = t->header.generation;
2345 Vcb->superblock.chunk_root_level = t->header.level;
2346 }
2347 }
2348
2349 le = le->Flink;
2350 }
2351
2352 for (i = 0; i < BTRFS_NUM_BACKUP_ROOTS - 1; i++) {
2353 RtlCopyMemory(&Vcb->superblock.backup[i], &Vcb->superblock.backup[i+1], sizeof(superblock_backup));
2354 }
2355
2356 update_backup_superblock(Vcb, &Vcb->superblock.backup[BTRFS_NUM_BACKUP_ROOTS - 1], Irp);
2357
2358 KeInitializeEvent(&context.Event, NotificationEvent, false);
2359 InitializeListHead(&context.stripes);
2360 context.left = 0;
2361
2362 le = Vcb->devices.Flink;
2363 while (le != &Vcb->devices) {
2364 device* dev = CONTAINING_RECORD(le, device, list_entry);
2365
2366 if (dev->devobj && !dev->readonly) {
2367 Status = write_superblock(Vcb, dev, &context);
2368 if (!NT_SUCCESS(Status)) {
2369 ERR("write_superblock returned %08lx\n", Status);
2370 goto end;
2371 }
2372 }
2373
2374 le = le->Flink;
2375 }
2376
2377 if (IsListEmpty(&context.stripes)) {
2378 ERR("error - not writing any superblocks\n");
2379 Status = STATUS_INTERNAL_ERROR;
2380 goto end;
2381 }
2382
2383 le = context.stripes.Flink;
2384 while (le != &context.stripes) {
2385 write_superblocks_stripe* stripe = CONTAINING_RECORD(le, write_superblocks_stripe, list_entry);
2386
2387 IoCallDriver(stripe->device->devobj, stripe->Irp);
2388
2389 le = le->Flink;
2390 }
2391
2392 KeWaitForSingleObject(&context.Event, Executive, KernelMode, false, NULL);
2393
2394 le = context.stripes.Flink;
2395 while (le != &context.stripes) {
2396 write_superblocks_stripe* stripe = CONTAINING_RECORD(le, write_superblocks_stripe, list_entry);
2397
2398 if (!NT_SUCCESS(stripe->Status)) {
2399 ERR("device %I64x returned %08lx\n", stripe->device->devitem.dev_id, stripe->Status);
2400 log_device_error(Vcb, stripe->device, BTRFS_DEV_STAT_WRITE_ERRORS);
2401 Status = stripe->Status;
2402 goto end;
2403 }
2404
2405 le = le->Flink;
2406 }
2407
2408 Status = STATUS_SUCCESS;
2409
2410 end:
2411 while (!IsListEmpty(&context.stripes)) {
2412 write_superblocks_stripe* stripe = CONTAINING_RECORD(RemoveHeadList(&context.stripes), write_superblocks_stripe, list_entry);
2413
2414 if (stripe->mdl) {
2415 if (stripe->mdl->MdlFlags & MDL_PAGES_LOCKED)
2416 MmUnlockPages(stripe->mdl);
2417
2418 IoFreeMdl(stripe->mdl);
2419 }
2420
2421 if (stripe->Irp)
2422 IoFreeIrp(stripe->Irp);
2423
2424 if (stripe->buf)
2425 ExFreePool(stripe->buf);
2426
2427 ExFreePool(stripe);
2428 }
2429
2430 return Status;
2431 }
2432
flush_changed_extent(device_extension * Vcb,chunk * c,changed_extent * ce,PIRP Irp,LIST_ENTRY * rollback)2433 static NTSTATUS flush_changed_extent(device_extension* Vcb, chunk* c, changed_extent* ce, PIRP Irp, LIST_ENTRY* rollback) {
2434 LIST_ENTRY *le, *le2;
2435 NTSTATUS Status;
2436 uint64_t old_size;
2437
2438 if (ce->count == 0 && ce->old_count == 0) {
2439 while (!IsListEmpty(&ce->refs)) {
2440 changed_extent_ref* cer = CONTAINING_RECORD(RemoveHeadList(&ce->refs), changed_extent_ref, list_entry);
2441 ExFreePool(cer);
2442 }
2443
2444 while (!IsListEmpty(&ce->old_refs)) {
2445 changed_extent_ref* cer = CONTAINING_RECORD(RemoveHeadList(&ce->old_refs), changed_extent_ref, list_entry);
2446 ExFreePool(cer);
2447 }
2448
2449 goto end;
2450 }
2451
2452 le = ce->refs.Flink;
2453 while (le != &ce->refs) {
2454 changed_extent_ref* cer = CONTAINING_RECORD(le, changed_extent_ref, list_entry);
2455 uint32_t old_count = 0;
2456
2457 if (cer->type == TYPE_EXTENT_DATA_REF) {
2458 le2 = ce->old_refs.Flink;
2459 while (le2 != &ce->old_refs) {
2460 changed_extent_ref* cer2 = CONTAINING_RECORD(le2, changed_extent_ref, list_entry);
2461
2462 if (cer2->type == TYPE_EXTENT_DATA_REF && cer2->edr.root == cer->edr.root && cer2->edr.objid == cer->edr.objid && cer2->edr.offset == cer->edr.offset) {
2463 old_count = cer2->edr.count;
2464 break;
2465 }
2466
2467 le2 = le2->Flink;
2468 }
2469
2470 old_size = ce->old_count > 0 ? ce->old_size : ce->size;
2471
2472 if (cer->edr.count > old_count) {
2473 Status = increase_extent_refcount_data(Vcb, ce->address, old_size, cer->edr.root, cer->edr.objid, cer->edr.offset, cer->edr.count - old_count, Irp);
2474
2475 if (!NT_SUCCESS(Status)) {
2476 ERR("increase_extent_refcount_data returned %08lx\n", Status);
2477 return Status;
2478 }
2479 }
2480 } else if (cer->type == TYPE_SHARED_DATA_REF) {
2481 le2 = ce->old_refs.Flink;
2482 while (le2 != &ce->old_refs) {
2483 changed_extent_ref* cer2 = CONTAINING_RECORD(le2, changed_extent_ref, list_entry);
2484
2485 if (cer2->type == TYPE_SHARED_DATA_REF && cer2->sdr.offset == cer->sdr.offset) {
2486 RemoveEntryList(&cer2->list_entry);
2487 ExFreePool(cer2);
2488 break;
2489 }
2490
2491 le2 = le2->Flink;
2492 }
2493 }
2494
2495 le = le->Flink;
2496 }
2497
2498 le = ce->refs.Flink;
2499 while (le != &ce->refs) {
2500 changed_extent_ref* cer = CONTAINING_RECORD(le, changed_extent_ref, list_entry);
2501 LIST_ENTRY* le3 = le->Flink;
2502 uint32_t old_count = 0;
2503
2504 if (cer->type == TYPE_EXTENT_DATA_REF) {
2505 le2 = ce->old_refs.Flink;
2506 while (le2 != &ce->old_refs) {
2507 changed_extent_ref* cer2 = CONTAINING_RECORD(le2, changed_extent_ref, list_entry);
2508
2509 if (cer2->type == TYPE_EXTENT_DATA_REF && cer2->edr.root == cer->edr.root && cer2->edr.objid == cer->edr.objid && cer2->edr.offset == cer->edr.offset) {
2510 old_count = cer2->edr.count;
2511
2512 RemoveEntryList(&cer2->list_entry);
2513 ExFreePool(cer2);
2514 break;
2515 }
2516
2517 le2 = le2->Flink;
2518 }
2519
2520 old_size = ce->old_count > 0 ? ce->old_size : ce->size;
2521
2522 if (cer->edr.count < old_count) {
2523 Status = decrease_extent_refcount_data(Vcb, ce->address, old_size, cer->edr.root, cer->edr.objid, cer->edr.offset,
2524 old_count - cer->edr.count, ce->superseded, Irp);
2525
2526 if (!NT_SUCCESS(Status)) {
2527 ERR("decrease_extent_refcount_data returned %08lx\n", Status);
2528 return Status;
2529 }
2530 }
2531
2532 if (ce->size != ce->old_size && ce->old_count > 0) {
2533 KEY searchkey;
2534 traverse_ptr tp;
2535 void* data;
2536
2537 searchkey.obj_id = ce->address;
2538 searchkey.obj_type = TYPE_EXTENT_ITEM;
2539 searchkey.offset = ce->old_size;
2540
2541 Status = find_item(Vcb, Vcb->extent_root, &tp, &searchkey, false, Irp);
2542 if (!NT_SUCCESS(Status)) {
2543 ERR("error - find_item returned %08lx\n", Status);
2544 return Status;
2545 }
2546
2547 if (keycmp(searchkey, tp.item->key)) {
2548 ERR("could not find (%I64x,%x,%I64x) in extent tree\n", searchkey.obj_id, searchkey.obj_type, searchkey.offset);
2549 return STATUS_INTERNAL_ERROR;
2550 }
2551
2552 if (tp.item->size > 0) {
2553 data = ExAllocatePoolWithTag(PagedPool, tp.item->size, ALLOC_TAG);
2554
2555 if (!data) {
2556 ERR("out of memory\n");
2557 return STATUS_INSUFFICIENT_RESOURCES;
2558 }
2559
2560 RtlCopyMemory(data, tp.item->data, tp.item->size);
2561 } else
2562 data = NULL;
2563
2564 Status = insert_tree_item(Vcb, Vcb->extent_root, ce->address, TYPE_EXTENT_ITEM, ce->size, data, tp.item->size, NULL, Irp);
2565 if (!NT_SUCCESS(Status)) {
2566 ERR("insert_tree_item returned %08lx\n", Status);
2567 if (data) ExFreePool(data);
2568 return Status;
2569 }
2570
2571 Status = delete_tree_item(Vcb, &tp);
2572 if (!NT_SUCCESS(Status)) {
2573 ERR("delete_tree_item returned %08lx\n", Status);
2574 return Status;
2575 }
2576 }
2577 }
2578
2579 RemoveEntryList(&cer->list_entry);
2580 ExFreePool(cer);
2581
2582 le = le3;
2583 }
2584
2585 #ifdef DEBUG_PARANOID
2586 if (!IsListEmpty(&ce->old_refs))
2587 WARN("old_refs not empty\n");
2588 #endif
2589
2590 end:
2591 if (ce->count == 0 && !ce->superseded) {
2592 c->used -= ce->size;
2593 space_list_add(c, ce->address, ce->size, rollback);
2594 }
2595
2596 RemoveEntryList(&ce->list_entry);
2597 ExFreePool(ce);
2598
2599 return STATUS_SUCCESS;
2600 }
2601
add_checksum_entry(device_extension * Vcb,uint64_t address,ULONG length,void * csum,PIRP Irp)2602 void add_checksum_entry(device_extension* Vcb, uint64_t address, ULONG length, void* csum, PIRP Irp) {
2603 KEY searchkey;
2604 traverse_ptr tp, next_tp;
2605 NTSTATUS Status;
2606 uint64_t startaddr, endaddr;
2607 ULONG len;
2608 RTL_BITMAP bmp;
2609 ULONG* bmparr;
2610 ULONG runlength, index;
2611
2612 TRACE("(%p, %I64x, %lx, %p, %p)\n", Vcb, address, length, csum, Irp);
2613
2614 searchkey.obj_id = EXTENT_CSUM_ID;
2615 searchkey.obj_type = TYPE_EXTENT_CSUM;
2616 searchkey.offset = address;
2617
2618 // FIXME - create checksum_root if it doesn't exist at all
2619
2620 Status = find_item(Vcb, Vcb->checksum_root, &tp, &searchkey, false, Irp);
2621 if (Status == STATUS_NOT_FOUND) { // tree is completely empty
2622 if (csum) { // not deleted
2623 ULONG length2 = length;
2624 uint64_t off = address;
2625 void* data = csum;
2626
2627 do {
2628 uint16_t il = (uint16_t)min(length2, MAX_CSUM_SIZE / Vcb->csum_size);
2629
2630 void* checksums = ExAllocatePoolWithTag(PagedPool, il * Vcb->csum_size, ALLOC_TAG);
2631 if (!checksums) {
2632 ERR("out of memory\n");
2633 return;
2634 }
2635
2636 RtlCopyMemory(checksums, data, il * Vcb->csum_size);
2637
2638 Status = insert_tree_item(Vcb, Vcb->checksum_root, EXTENT_CSUM_ID, TYPE_EXTENT_CSUM, off, checksums,
2639 il * Vcb->csum_size, NULL, Irp);
2640 if (!NT_SUCCESS(Status)) {
2641 ERR("insert_tree_item returned %08lx\n", Status);
2642 ExFreePool(checksums);
2643 return;
2644 }
2645
2646 length2 -= il;
2647
2648 if (length2 > 0) {
2649 off += (uint64_t)il << Vcb->sector_shift;
2650 data = (uint8_t*)data + (il * Vcb->csum_size);
2651 }
2652 } while (length2 > 0);
2653 }
2654 } else if (!NT_SUCCESS(Status)) {
2655 ERR("find_item returned %08lx\n", Status);
2656 return;
2657 } else {
2658 uint32_t tplen;
2659 void* checksums;
2660
2661 // FIXME - check entry is TYPE_EXTENT_CSUM?
2662
2663 if (tp.item->key.offset < address && tp.item->key.offset + (((uint64_t)tp.item->size << Vcb->sector_shift) / Vcb->csum_size) >= address)
2664 startaddr = tp.item->key.offset;
2665 else
2666 startaddr = address;
2667
2668 searchkey.obj_id = EXTENT_CSUM_ID;
2669 searchkey.obj_type = TYPE_EXTENT_CSUM;
2670 searchkey.offset = address + (length << Vcb->sector_shift);
2671
2672 Status = find_item(Vcb, Vcb->checksum_root, &tp, &searchkey, false, Irp);
2673 if (!NT_SUCCESS(Status)) {
2674 ERR("find_item returned %08lx\n", Status);
2675 return;
2676 }
2677
2678 tplen = tp.item->size / Vcb->csum_size;
2679
2680 if (tp.item->key.offset + (tplen << Vcb->sector_shift) >= address + (length << Vcb->sector_shift))
2681 endaddr = tp.item->key.offset + (tplen << Vcb->sector_shift);
2682 else
2683 endaddr = address + (length << Vcb->sector_shift);
2684
2685 TRACE("cs starts at %I64x (%lx sectors)\n", address, length);
2686 TRACE("startaddr = %I64x\n", startaddr);
2687 TRACE("endaddr = %I64x\n", endaddr);
2688
2689 len = (ULONG)((endaddr - startaddr) >> Vcb->sector_shift);
2690
2691 checksums = ExAllocatePoolWithTag(PagedPool, Vcb->csum_size * len, ALLOC_TAG);
2692 if (!checksums) {
2693 ERR("out of memory\n");
2694 return;
2695 }
2696
2697 bmparr = ExAllocatePoolWithTag(PagedPool, sizeof(ULONG) * ((len/8)+1), ALLOC_TAG);
2698 if (!bmparr) {
2699 ERR("out of memory\n");
2700 ExFreePool(checksums);
2701 return;
2702 }
2703
2704 RtlInitializeBitMap(&bmp, bmparr, len);
2705 RtlSetAllBits(&bmp);
2706
2707 searchkey.obj_id = EXTENT_CSUM_ID;
2708 searchkey.obj_type = TYPE_EXTENT_CSUM;
2709 searchkey.offset = address;
2710
2711 Status = find_item(Vcb, Vcb->checksum_root, &tp, &searchkey, false, Irp);
2712 if (!NT_SUCCESS(Status)) {
2713 ERR("find_item returned %08lx\n", Status);
2714 ExFreePool(checksums);
2715 ExFreePool(bmparr);
2716 return;
2717 }
2718
2719 // set bit = free space, cleared bit = allocated sector
2720
2721 while (tp.item->key.offset < endaddr) {
2722 if (tp.item->key.offset >= startaddr) {
2723 if (tp.item->size > 0) {
2724 ULONG itemlen = (ULONG)min((len - ((tp.item->key.offset - startaddr) >> Vcb->sector_shift)) * Vcb->csum_size, tp.item->size);
2725
2726 RtlCopyMemory((uint8_t*)checksums + (((tp.item->key.offset - startaddr) * Vcb->csum_size) >> Vcb->sector_shift),
2727 tp.item->data, itemlen);
2728 RtlClearBits(&bmp, (ULONG)((tp.item->key.offset - startaddr) >> Vcb->sector_shift), itemlen / Vcb->csum_size);
2729 }
2730
2731 Status = delete_tree_item(Vcb, &tp);
2732 if (!NT_SUCCESS(Status)) {
2733 ERR("delete_tree_item returned %08lx\n", Status);
2734 ExFreePool(checksums);
2735 ExFreePool(bmparr);
2736 return;
2737 }
2738 }
2739
2740 if (find_next_item(Vcb, &tp, &next_tp, false, Irp)) {
2741 tp = next_tp;
2742 } else
2743 break;
2744 }
2745
2746 if (!csum) { // deleted
2747 RtlSetBits(&bmp, (ULONG)((address - startaddr) >> Vcb->sector_shift), length);
2748 } else {
2749 RtlCopyMemory((uint8_t*)checksums + (((address - startaddr) * Vcb->csum_size) >> Vcb->sector_shift),
2750 csum, length * Vcb->csum_size);
2751 RtlClearBits(&bmp, (ULONG)((address - startaddr) >> Vcb->sector_shift), length);
2752 }
2753
2754 runlength = RtlFindFirstRunClear(&bmp, &index);
2755
2756 while (runlength != 0) {
2757 if (index >= len)
2758 break;
2759
2760 if (index + runlength >= len) {
2761 runlength = len - index;
2762
2763 if (runlength == 0)
2764 break;
2765 }
2766
2767 do {
2768 uint16_t rl;
2769 uint64_t off;
2770 void* data;
2771
2772 if (runlength * Vcb->csum_size > MAX_CSUM_SIZE)
2773 rl = (uint16_t)(MAX_CSUM_SIZE / Vcb->csum_size);
2774 else
2775 rl = (uint16_t)runlength;
2776
2777 data = ExAllocatePoolWithTag(PagedPool, Vcb->csum_size * rl, ALLOC_TAG);
2778 if (!data) {
2779 ERR("out of memory\n");
2780 ExFreePool(bmparr);
2781 ExFreePool(checksums);
2782 return;
2783 }
2784
2785 RtlCopyMemory(data, (uint8_t*)checksums + (Vcb->csum_size * index), Vcb->csum_size * rl);
2786
2787 off = startaddr + ((uint64_t)index << Vcb->sector_shift);
2788
2789 Status = insert_tree_item(Vcb, Vcb->checksum_root, EXTENT_CSUM_ID, TYPE_EXTENT_CSUM, off, data, Vcb->csum_size * rl, NULL, Irp);
2790 if (!NT_SUCCESS(Status)) {
2791 ERR("insert_tree_item returned %08lx\n", Status);
2792 ExFreePool(data);
2793 ExFreePool(bmparr);
2794 ExFreePool(checksums);
2795 return;
2796 }
2797
2798 runlength -= rl;
2799 index += rl;
2800 } while (runlength > 0);
2801
2802 runlength = RtlFindNextForwardRunClear(&bmp, index, &index);
2803 }
2804
2805 ExFreePool(bmparr);
2806 ExFreePool(checksums);
2807 }
2808 }
2809
update_chunk_usage(device_extension * Vcb,PIRP Irp,LIST_ENTRY * rollback)2810 static NTSTATUS update_chunk_usage(device_extension* Vcb, PIRP Irp, LIST_ENTRY* rollback) {
2811 LIST_ENTRY *le = Vcb->chunks.Flink, *le2;
2812 chunk* c;
2813 KEY searchkey;
2814 traverse_ptr tp;
2815 BLOCK_GROUP_ITEM* bgi;
2816 NTSTATUS Status;
2817
2818 TRACE("(%p)\n", Vcb);
2819
2820 ExAcquireResourceSharedLite(&Vcb->chunk_lock, true);
2821
2822 while (le != &Vcb->chunks) {
2823 c = CONTAINING_RECORD(le, chunk, list_entry);
2824
2825 acquire_chunk_lock(c, Vcb);
2826
2827 if (!c->cache_loaded && (!IsListEmpty(&c->changed_extents) || c->used != c->oldused)) {
2828 Status = load_cache_chunk(Vcb, c, NULL);
2829
2830 if (!NT_SUCCESS(Status)) {
2831 ERR("load_cache_chunk returned %08lx\n", Status);
2832 release_chunk_lock(c, Vcb);
2833 goto end;
2834 }
2835 }
2836
2837 le2 = c->changed_extents.Flink;
2838 while (le2 != &c->changed_extents) {
2839 LIST_ENTRY* le3 = le2->Flink;
2840 changed_extent* ce = CONTAINING_RECORD(le2, changed_extent, list_entry);
2841
2842 Status = flush_changed_extent(Vcb, c, ce, Irp, rollback);
2843 if (!NT_SUCCESS(Status)) {
2844 ERR("flush_changed_extent returned %08lx\n", Status);
2845 release_chunk_lock(c, Vcb);
2846 goto end;
2847 }
2848
2849 le2 = le3;
2850 }
2851
2852 // This is usually done by update_chunks, but we have to check again in case any new chunks
2853 // have been allocated since.
2854 if (c->created) {
2855 Status = create_chunk(Vcb, c, Irp);
2856 if (!NT_SUCCESS(Status)) {
2857 ERR("create_chunk returned %08lx\n", Status);
2858 release_chunk_lock(c, Vcb);
2859 goto end;
2860 }
2861 }
2862
2863 if (c->old_cache) {
2864 if (c->old_cache->dirty) {
2865 LIST_ENTRY batchlist;
2866
2867 InitializeListHead(&batchlist);
2868
2869 Status = flush_fcb(c->old_cache, false, &batchlist, Irp);
2870 if (!NT_SUCCESS(Status)) {
2871 ERR("flush_fcb returned %08lx\n", Status);
2872 release_chunk_lock(c, Vcb);
2873 clear_batch_list(Vcb, &batchlist);
2874 goto end;
2875 }
2876
2877 Status = commit_batch_list(Vcb, &batchlist, Irp);
2878 if (!NT_SUCCESS(Status)) {
2879 ERR("commit_batch_list returned %08lx\n", Status);
2880 release_chunk_lock(c, Vcb);
2881 goto end;
2882 }
2883 }
2884
2885 free_fcb(c->old_cache);
2886
2887 if (c->old_cache->refcount == 0)
2888 reap_fcb(c->old_cache);
2889
2890 c->old_cache = NULL;
2891 }
2892
2893 if (c->used != c->oldused) {
2894 searchkey.obj_id = c->offset;
2895 searchkey.obj_type = TYPE_BLOCK_GROUP_ITEM;
2896 searchkey.offset = c->chunk_item->size;
2897
2898 Status = find_item(Vcb, Vcb->extent_root, &tp, &searchkey, false, Irp);
2899 if (!NT_SUCCESS(Status)) {
2900 ERR("error - find_item returned %08lx\n", Status);
2901 release_chunk_lock(c, Vcb);
2902 goto end;
2903 }
2904
2905 if (keycmp(searchkey, tp.item->key)) {
2906 ERR("could not find (%I64x,%x,%I64x) in extent_root\n", searchkey.obj_id, searchkey.obj_type, searchkey.offset);
2907 Status = STATUS_INTERNAL_ERROR;
2908 release_chunk_lock(c, Vcb);
2909 goto end;
2910 }
2911
2912 if (tp.item->size < sizeof(BLOCK_GROUP_ITEM)) {
2913 ERR("(%I64x,%x,%I64x) was %u bytes, expected %Iu\n", tp.item->key.obj_id, tp.item->key.obj_type, tp.item->key.offset, tp.item->size, sizeof(BLOCK_GROUP_ITEM));
2914 Status = STATUS_INTERNAL_ERROR;
2915 release_chunk_lock(c, Vcb);
2916 goto end;
2917 }
2918
2919 bgi = ExAllocatePoolWithTag(PagedPool, tp.item->size, ALLOC_TAG);
2920 if (!bgi) {
2921 ERR("out of memory\n");
2922 Status = STATUS_INSUFFICIENT_RESOURCES;
2923 release_chunk_lock(c, Vcb);
2924 goto end;
2925 }
2926
2927 RtlCopyMemory(bgi, tp.item->data, tp.item->size);
2928 bgi->used = c->used;
2929
2930 #ifdef DEBUG_PARANOID
2931 if (bgi->used & 0x8000000000000000) {
2932 ERR("refusing to write BLOCK_GROUP_ITEM with negative usage value (%I64x)\n", bgi->used);
2933 int3;
2934 }
2935 #endif
2936
2937 TRACE("adjusting usage of chunk %I64x to %I64x\n", c->offset, c->used);
2938
2939 Status = delete_tree_item(Vcb, &tp);
2940 if (!NT_SUCCESS(Status)) {
2941 ERR("delete_tree_item returned %08lx\n", Status);
2942 ExFreePool(bgi);
2943 release_chunk_lock(c, Vcb);
2944 goto end;
2945 }
2946
2947 Status = insert_tree_item(Vcb, Vcb->extent_root, searchkey.obj_id, searchkey.obj_type, searchkey.offset, bgi, tp.item->size, NULL, Irp);
2948 if (!NT_SUCCESS(Status)) {
2949 ERR("insert_tree_item returned %08lx\n", Status);
2950 ExFreePool(bgi);
2951 release_chunk_lock(c, Vcb);
2952 goto end;
2953 }
2954
2955 Vcb->superblock.bytes_used += c->used - c->oldused;
2956 c->oldused = c->used;
2957 }
2958
2959 release_chunk_lock(c, Vcb);
2960
2961 le = le->Flink;
2962 }
2963
2964 Status = STATUS_SUCCESS;
2965
2966 end:
2967 ExReleaseResourceLite(&Vcb->chunk_lock);
2968
2969 return Status;
2970 }
2971
get_first_item(tree * t,KEY * key)2972 static void get_first_item(tree* t, KEY* key) {
2973 LIST_ENTRY* le;
2974
2975 le = t->itemlist.Flink;
2976 while (le != &t->itemlist) {
2977 tree_data* td = CONTAINING_RECORD(le, tree_data, list_entry);
2978
2979 *key = td->key;
2980 return;
2981 }
2982 }
2983
split_tree_at(device_extension * Vcb,tree * t,tree_data * newfirstitem,uint32_t numitems,uint32_t size)2984 static NTSTATUS split_tree_at(device_extension* Vcb, tree* t, tree_data* newfirstitem, uint32_t numitems, uint32_t size) {
2985 tree *nt, *pt;
2986 tree_data* td;
2987 tree_data* oldlastitem;
2988
2989 TRACE("splitting tree in %I64x at (%I64x,%x,%I64x)\n", t->root->id, newfirstitem->key.obj_id, newfirstitem->key.obj_type, newfirstitem->key.offset);
2990
2991 nt = ExAllocatePoolWithTag(PagedPool, sizeof(tree), ALLOC_TAG);
2992 if (!nt) {
2993 ERR("out of memory\n");
2994 return STATUS_INSUFFICIENT_RESOURCES;
2995 }
2996
2997 if (t->header.level > 0) {
2998 nt->nonpaged = ExAllocatePoolWithTag(NonPagedPool, sizeof(tree_nonpaged), ALLOC_TAG);
2999 if (!nt->nonpaged) {
3000 ERR("out of memory\n");
3001 ExFreePool(nt);
3002 return STATUS_INSUFFICIENT_RESOURCES;
3003 }
3004
3005 ExInitializeFastMutex(&nt->nonpaged->mutex);
3006 } else
3007 nt->nonpaged = NULL;
3008
3009 RtlCopyMemory(&nt->header, &t->header, sizeof(tree_header));
3010 nt->header.address = 0;
3011 nt->header.generation = Vcb->superblock.generation;
3012 nt->header.num_items = t->header.num_items - numitems;
3013 nt->header.flags = HEADER_FLAG_MIXED_BACKREF | HEADER_FLAG_WRITTEN;
3014
3015 nt->has_address = false;
3016 nt->Vcb = Vcb;
3017 nt->parent = t->parent;
3018
3019 #ifdef DEBUG_PARANOID
3020 if (nt->parent && nt->parent->header.level <= nt->header.level) int3;
3021 #endif
3022
3023 nt->root = t->root;
3024 nt->new_address = 0;
3025 nt->has_new_address = false;
3026 nt->updated_extents = false;
3027 nt->uniqueness_determined = true;
3028 nt->is_unique = true;
3029 nt->list_entry_hash.Flink = NULL;
3030 nt->buf = NULL;
3031 InitializeListHead(&nt->itemlist);
3032
3033 oldlastitem = CONTAINING_RECORD(newfirstitem->list_entry.Blink, tree_data, list_entry);
3034
3035 nt->itemlist.Flink = &newfirstitem->list_entry;
3036 nt->itemlist.Blink = t->itemlist.Blink;
3037 nt->itemlist.Flink->Blink = &nt->itemlist;
3038 nt->itemlist.Blink->Flink = &nt->itemlist;
3039
3040 t->itemlist.Blink = &oldlastitem->list_entry;
3041 t->itemlist.Blink->Flink = &t->itemlist;
3042
3043 nt->size = t->size - size;
3044 t->size = size;
3045 t->header.num_items = numitems;
3046 nt->write = true;
3047
3048 InsertTailList(&Vcb->trees, &nt->list_entry);
3049
3050 if (nt->header.level > 0) {
3051 LIST_ENTRY* le = nt->itemlist.Flink;
3052
3053 while (le != &nt->itemlist) {
3054 tree_data* td2 = CONTAINING_RECORD(le, tree_data, list_entry);
3055
3056 if (td2->treeholder.tree) {
3057 td2->treeholder.tree->parent = nt;
3058 #ifdef DEBUG_PARANOID
3059 if (td2->treeholder.tree->parent && td2->treeholder.tree->parent->header.level <= td2->treeholder.tree->header.level) int3;
3060 #endif
3061 }
3062
3063 le = le->Flink;
3064 }
3065 } else {
3066 LIST_ENTRY* le = nt->itemlist.Flink;
3067
3068 while (le != &nt->itemlist) {
3069 tree_data* td2 = CONTAINING_RECORD(le, tree_data, list_entry);
3070
3071 if (!td2->inserted && td2->data) {
3072 uint8_t* data = ExAllocatePoolWithTag(PagedPool, td2->size, ALLOC_TAG);
3073
3074 if (!data) {
3075 ERR("out of memory\n");
3076 return STATUS_INSUFFICIENT_RESOURCES;
3077 }
3078
3079 RtlCopyMemory(data, td2->data, td2->size);
3080 td2->data = data;
3081 td2->inserted = true;
3082 }
3083
3084 le = le->Flink;
3085 }
3086 }
3087
3088 if (nt->parent) {
3089 td = ExAllocateFromPagedLookasideList(&Vcb->tree_data_lookaside);
3090 if (!td) {
3091 ERR("out of memory\n");
3092 return STATUS_INSUFFICIENT_RESOURCES;
3093 }
3094
3095 td->key = newfirstitem->key;
3096
3097 InsertHeadList(&t->paritem->list_entry, &td->list_entry);
3098
3099 td->ignore = false;
3100 td->inserted = true;
3101 td->treeholder.tree = nt;
3102 nt->paritem = td;
3103
3104 nt->parent->header.num_items++;
3105 nt->parent->size += sizeof(internal_node);
3106
3107 goto end;
3108 }
3109
3110 TRACE("adding new tree parent\n");
3111
3112 if (nt->header.level == 255) {
3113 ERR("cannot add parent to tree at level 255\n");
3114 return STATUS_INTERNAL_ERROR;
3115 }
3116
3117 pt = ExAllocatePoolWithTag(PagedPool, sizeof(tree), ALLOC_TAG);
3118 if (!pt) {
3119 ERR("out of memory\n");
3120 return STATUS_INSUFFICIENT_RESOURCES;
3121 }
3122
3123 pt->nonpaged = ExAllocatePoolWithTag(NonPagedPool, sizeof(tree_nonpaged), ALLOC_TAG);
3124 if (!pt->nonpaged) {
3125 ERR("out of memory\n");
3126 ExFreePool(pt);
3127 return STATUS_INSUFFICIENT_RESOURCES;
3128 }
3129
3130 ExInitializeFastMutex(&pt->nonpaged->mutex);
3131
3132 RtlCopyMemory(&pt->header, &nt->header, sizeof(tree_header));
3133 pt->header.address = 0;
3134 pt->header.num_items = 2;
3135 pt->header.level = nt->header.level + 1;
3136 pt->header.flags = HEADER_FLAG_MIXED_BACKREF | HEADER_FLAG_WRITTEN;
3137
3138 pt->has_address = false;
3139 pt->Vcb = Vcb;
3140 pt->parent = NULL;
3141 pt->paritem = NULL;
3142 pt->root = t->root;
3143 pt->new_address = 0;
3144 pt->has_new_address = false;
3145 pt->updated_extents = false;
3146 pt->size = pt->header.num_items * sizeof(internal_node);
3147 pt->uniqueness_determined = true;
3148 pt->is_unique = true;
3149 pt->list_entry_hash.Flink = NULL;
3150 pt->buf = NULL;
3151 InitializeListHead(&pt->itemlist);
3152
3153 InsertTailList(&Vcb->trees, &pt->list_entry);
3154
3155 td = ExAllocateFromPagedLookasideList(&Vcb->tree_data_lookaside);
3156 if (!td) {
3157 ERR("out of memory\n");
3158 return STATUS_INSUFFICIENT_RESOURCES;
3159 }
3160
3161 get_first_item(t, &td->key);
3162 td->ignore = false;
3163 td->inserted = false;
3164 td->treeholder.address = 0;
3165 td->treeholder.generation = Vcb->superblock.generation;
3166 td->treeholder.tree = t;
3167 InsertTailList(&pt->itemlist, &td->list_entry);
3168 t->paritem = td;
3169
3170 td = ExAllocateFromPagedLookasideList(&Vcb->tree_data_lookaside);
3171 if (!td) {
3172 ERR("out of memory\n");
3173 return STATUS_INSUFFICIENT_RESOURCES;
3174 }
3175
3176 td->key = newfirstitem->key;
3177 td->ignore = false;
3178 td->inserted = false;
3179 td->treeholder.address = 0;
3180 td->treeholder.generation = Vcb->superblock.generation;
3181 td->treeholder.tree = nt;
3182 InsertTailList(&pt->itemlist, &td->list_entry);
3183 nt->paritem = td;
3184
3185 pt->write = true;
3186
3187 t->root->treeholder.tree = pt;
3188
3189 t->parent = pt;
3190 nt->parent = pt;
3191
3192 #ifdef DEBUG_PARANOID
3193 if (t->parent && t->parent->header.level <= t->header.level) int3;
3194 if (nt->parent && nt->parent->header.level <= nt->header.level) int3;
3195 #endif
3196
3197 end:
3198 t->root->root_item.bytes_used += Vcb->superblock.node_size;
3199
3200 return STATUS_SUCCESS;
3201 }
3202
split_tree(device_extension * Vcb,tree * t)3203 static NTSTATUS split_tree(device_extension* Vcb, tree* t) {
3204 LIST_ENTRY* le;
3205 uint32_t size, ds, numitems;
3206
3207 size = 0;
3208 numitems = 0;
3209
3210 // FIXME - naïve implementation: maximizes number of filled trees
3211
3212 le = t->itemlist.Flink;
3213 while (le != &t->itemlist) {
3214 tree_data* td = CONTAINING_RECORD(le, tree_data, list_entry);
3215
3216 if (!td->ignore) {
3217 if (t->header.level == 0)
3218 ds = sizeof(leaf_node) + td->size;
3219 else
3220 ds = sizeof(internal_node);
3221
3222 if (numitems == 0 && ds > Vcb->superblock.node_size - sizeof(tree_header)) {
3223 ERR("(%I64x,%x,%I64x) in tree %I64x is too large (%x > %Ix)\n",
3224 td->key.obj_id, td->key.obj_type, td->key.offset, t->root->id,
3225 ds, Vcb->superblock.node_size - sizeof(tree_header));
3226 return STATUS_INTERNAL_ERROR;
3227 }
3228
3229 // FIXME - move back if previous item was deleted item with same key
3230 if (size + ds > Vcb->superblock.node_size - sizeof(tree_header))
3231 return split_tree_at(Vcb, t, td, numitems, size);
3232
3233 size += ds;
3234 numitems++;
3235 }
3236
3237 le = le->Flink;
3238 }
3239
3240 return STATUS_SUCCESS;
3241 }
3242
is_tree_unique(device_extension * Vcb,tree * t,PIRP Irp)3243 bool is_tree_unique(device_extension* Vcb, tree* t, PIRP Irp) {
3244 KEY searchkey;
3245 traverse_ptr tp;
3246 NTSTATUS Status;
3247 bool ret = false;
3248 EXTENT_ITEM* ei;
3249 uint8_t* type;
3250
3251 if (t->uniqueness_determined)
3252 return t->is_unique;
3253
3254 if (t->parent && !is_tree_unique(Vcb, t->parent, Irp))
3255 goto end;
3256
3257 if (t->has_address) {
3258 searchkey.obj_id = t->header.address;
3259 searchkey.obj_type = Vcb->superblock.incompat_flags & BTRFS_INCOMPAT_FLAGS_SKINNY_METADATA ? TYPE_METADATA_ITEM : TYPE_EXTENT_ITEM;
3260 searchkey.offset = 0xffffffffffffffff;
3261
3262 Status = find_item(Vcb, Vcb->extent_root, &tp, &searchkey, false, Irp);
3263 if (!NT_SUCCESS(Status)) {
3264 ERR("error - find_item returned %08lx\n", Status);
3265 goto end;
3266 }
3267
3268 if (tp.item->key.obj_id != t->header.address || (tp.item->key.obj_type != TYPE_METADATA_ITEM && tp.item->key.obj_type != TYPE_EXTENT_ITEM))
3269 goto end;
3270
3271 if (tp.item->key.obj_type == TYPE_EXTENT_ITEM && tp.item->size == sizeof(EXTENT_ITEM_V0))
3272 goto end;
3273
3274 if (tp.item->size < sizeof(EXTENT_ITEM))
3275 goto end;
3276
3277 ei = (EXTENT_ITEM*)tp.item->data;
3278
3279 if (ei->refcount > 1)
3280 goto end;
3281
3282 if (tp.item->key.obj_type == TYPE_EXTENT_ITEM && ei->flags & EXTENT_ITEM_TREE_BLOCK) {
3283 EXTENT_ITEM2* ei2;
3284
3285 if (tp.item->size < sizeof(EXTENT_ITEM) + sizeof(EXTENT_ITEM2))
3286 goto end;
3287
3288 ei2 = (EXTENT_ITEM2*)&ei[1];
3289 type = (uint8_t*)&ei2[1];
3290 } else
3291 type = (uint8_t*)&ei[1];
3292
3293 if (type >= tp.item->data + tp.item->size || *type != TYPE_TREE_BLOCK_REF)
3294 goto end;
3295 }
3296
3297 ret = true;
3298
3299 end:
3300 t->is_unique = ret;
3301 t->uniqueness_determined = true;
3302
3303 return ret;
3304 }
3305
try_tree_amalgamate(device_extension * Vcb,tree * t,bool * done,bool * done_deletions,PIRP Irp,LIST_ENTRY * rollback)3306 static NTSTATUS try_tree_amalgamate(device_extension* Vcb, tree* t, bool* done, bool* done_deletions, PIRP Irp, LIST_ENTRY* rollback) {
3307 LIST_ENTRY* le;
3308 tree_data* nextparitem = NULL;
3309 NTSTATUS Status;
3310 tree *next_tree, *par;
3311
3312 *done = false;
3313
3314 TRACE("trying to amalgamate tree in root %I64x, level %x (size %u)\n", t->root->id, t->header.level, t->size);
3315
3316 // FIXME - doesn't capture everything, as it doesn't ascend
3317 le = t->paritem->list_entry.Flink;
3318 while (le != &t->parent->itemlist) {
3319 tree_data* td = CONTAINING_RECORD(le, tree_data, list_entry);
3320
3321 if (!td->ignore) {
3322 nextparitem = td;
3323 break;
3324 }
3325
3326 le = le->Flink;
3327 }
3328
3329 if (!nextparitem)
3330 return STATUS_SUCCESS;
3331
3332 TRACE("nextparitem: key = %I64x,%x,%I64x\n", nextparitem->key.obj_id, nextparitem->key.obj_type, nextparitem->key.offset);
3333
3334 if (!nextparitem->treeholder.tree) {
3335 Status = do_load_tree(Vcb, &nextparitem->treeholder, t->root, t->parent, nextparitem, NULL);
3336 if (!NT_SUCCESS(Status)) {
3337 ERR("do_load_tree returned %08lx\n", Status);
3338 return Status;
3339 }
3340 }
3341
3342 if (!is_tree_unique(Vcb, nextparitem->treeholder.tree, Irp))
3343 return STATUS_SUCCESS;
3344
3345 next_tree = nextparitem->treeholder.tree;
3346
3347 if (!next_tree->updated_extents && next_tree->has_address) {
3348 Status = update_tree_extents(Vcb, next_tree, Irp, rollback);
3349 if (!NT_SUCCESS(Status)) {
3350 ERR("update_tree_extents returned %08lx\n", Status);
3351 return Status;
3352 }
3353 }
3354
3355 if (t->size + next_tree->size <= Vcb->superblock.node_size - sizeof(tree_header)) {
3356 // merge two trees into one
3357
3358 t->header.num_items += next_tree->header.num_items;
3359 t->size += next_tree->size;
3360
3361 if (next_tree->header.level > 0) {
3362 le = next_tree->itemlist.Flink;
3363
3364 while (le != &next_tree->itemlist) {
3365 tree_data* td2 = CONTAINING_RECORD(le, tree_data, list_entry);
3366
3367 if (td2->treeholder.tree) {
3368 td2->treeholder.tree->parent = t;
3369 #ifdef DEBUG_PARANOID
3370 if (td2->treeholder.tree->parent && td2->treeholder.tree->parent->header.level <= td2->treeholder.tree->header.level) int3;
3371 #endif
3372 }
3373
3374 td2->inserted = true;
3375 le = le->Flink;
3376 }
3377 } else {
3378 le = next_tree->itemlist.Flink;
3379
3380 while (le != &next_tree->itemlist) {
3381 tree_data* td2 = CONTAINING_RECORD(le, tree_data, list_entry);
3382
3383 if (!td2->inserted && td2->data) {
3384 uint8_t* data = ExAllocatePoolWithTag(PagedPool, td2->size, ALLOC_TAG);
3385
3386 if (!data) {
3387 ERR("out of memory\n");
3388 return STATUS_INSUFFICIENT_RESOURCES;
3389 }
3390
3391 RtlCopyMemory(data, td2->data, td2->size);
3392 td2->data = data;
3393 td2->inserted = true;
3394 }
3395
3396 le = le->Flink;
3397 }
3398 }
3399
3400 t->itemlist.Blink->Flink = next_tree->itemlist.Flink;
3401 t->itemlist.Blink->Flink->Blink = t->itemlist.Blink;
3402 t->itemlist.Blink = next_tree->itemlist.Blink;
3403 t->itemlist.Blink->Flink = &t->itemlist;
3404
3405 next_tree->itemlist.Flink = next_tree->itemlist.Blink = &next_tree->itemlist;
3406
3407 next_tree->header.num_items = 0;
3408 next_tree->size = 0;
3409
3410 if (next_tree->has_new_address) { // delete associated EXTENT_ITEM
3411 Status = reduce_tree_extent(Vcb, next_tree->new_address, next_tree, next_tree->parent->header.tree_id, next_tree->header.level, Irp, rollback);
3412
3413 if (!NT_SUCCESS(Status)) {
3414 ERR("reduce_tree_extent returned %08lx\n", Status);
3415 return Status;
3416 }
3417 } else if (next_tree->has_address) {
3418 Status = reduce_tree_extent(Vcb, next_tree->header.address, next_tree, next_tree->parent->header.tree_id, next_tree->header.level, Irp, rollback);
3419
3420 if (!NT_SUCCESS(Status)) {
3421 ERR("reduce_tree_extent returned %08lx\n", Status);
3422 return Status;
3423 }
3424 }
3425
3426 if (!nextparitem->ignore) {
3427 nextparitem->ignore = true;
3428 next_tree->parent->header.num_items--;
3429 next_tree->parent->size -= sizeof(internal_node);
3430
3431 *done_deletions = true;
3432 }
3433
3434 par = next_tree->parent;
3435 while (par) {
3436 par->write = true;
3437 par = par->parent;
3438 }
3439
3440 RemoveEntryList(&nextparitem->list_entry);
3441 ExFreePool(next_tree->paritem);
3442 next_tree->paritem = NULL;
3443
3444 next_tree->root->root_item.bytes_used -= Vcb->superblock.node_size;
3445
3446 free_tree(next_tree);
3447
3448 *done = true;
3449 } else {
3450 // rebalance by moving items from second tree into first
3451 ULONG avg_size = (t->size + next_tree->size) / 2;
3452 KEY firstitem = {0, 0, 0};
3453 bool changed = false;
3454
3455 TRACE("attempting rebalance\n");
3456
3457 le = next_tree->itemlist.Flink;
3458 while (le != &next_tree->itemlist && t->size < avg_size && next_tree->header.num_items > 1) {
3459 tree_data* td = CONTAINING_RECORD(le, tree_data, list_entry);
3460 ULONG size;
3461
3462 if (!td->ignore) {
3463 if (next_tree->header.level == 0)
3464 size = sizeof(leaf_node) + td->size;
3465 else
3466 size = sizeof(internal_node);
3467 } else
3468 size = 0;
3469
3470 if (t->size + size < Vcb->superblock.node_size - sizeof(tree_header)) {
3471 RemoveEntryList(&td->list_entry);
3472 InsertTailList(&t->itemlist, &td->list_entry);
3473
3474 if (next_tree->header.level > 0 && td->treeholder.tree) {
3475 td->treeholder.tree->parent = t;
3476 #ifdef DEBUG_PARANOID
3477 if (td->treeholder.tree->parent && td->treeholder.tree->parent->header.level <= td->treeholder.tree->header.level) int3;
3478 #endif
3479 } else if (next_tree->header.level == 0 && !td->inserted && td->size > 0) {
3480 uint8_t* data = ExAllocatePoolWithTag(PagedPool, td->size, ALLOC_TAG);
3481
3482 if (!data) {
3483 ERR("out of memory\n");
3484 return STATUS_INSUFFICIENT_RESOURCES;
3485 }
3486
3487 RtlCopyMemory(data, td->data, td->size);
3488 td->data = data;
3489 }
3490
3491 td->inserted = true;
3492
3493 if (!td->ignore) {
3494 next_tree->size -= size;
3495 t->size += size;
3496 next_tree->header.num_items--;
3497 t->header.num_items++;
3498 }
3499
3500 changed = true;
3501 } else
3502 break;
3503
3504 le = next_tree->itemlist.Flink;
3505 }
3506
3507 le = next_tree->itemlist.Flink;
3508 while (le != &next_tree->itemlist) {
3509 tree_data* td = CONTAINING_RECORD(le, tree_data, list_entry);
3510
3511 if (!td->ignore) {
3512 firstitem = td->key;
3513 break;
3514 }
3515
3516 le = le->Flink;
3517 }
3518
3519 // FIXME - once ascension is working, make this work with parent's parent, etc.
3520 if (next_tree->paritem)
3521 next_tree->paritem->key = firstitem;
3522
3523 par = next_tree;
3524 while (par) {
3525 par->write = true;
3526 par = par->parent;
3527 }
3528
3529 if (changed)
3530 *done = true;
3531 }
3532
3533 return STATUS_SUCCESS;
3534 }
3535
update_extent_level(device_extension * Vcb,uint64_t address,tree * t,uint8_t level,PIRP Irp)3536 static NTSTATUS update_extent_level(device_extension* Vcb, uint64_t address, tree* t, uint8_t level, PIRP Irp) {
3537 KEY searchkey;
3538 traverse_ptr tp;
3539 NTSTATUS Status;
3540
3541 if (Vcb->superblock.incompat_flags & BTRFS_INCOMPAT_FLAGS_SKINNY_METADATA) {
3542 searchkey.obj_id = address;
3543 searchkey.obj_type = TYPE_METADATA_ITEM;
3544 searchkey.offset = t->header.level;
3545
3546 Status = find_item(Vcb, Vcb->extent_root, &tp, &searchkey, false, Irp);
3547 if (!NT_SUCCESS(Status)) {
3548 ERR("error - find_item returned %08lx\n", Status);
3549 return Status;
3550 }
3551
3552 if (!keycmp(tp.item->key, searchkey)) {
3553 EXTENT_ITEM_SKINNY_METADATA* eism;
3554
3555 if (tp.item->size > 0) {
3556 eism = ExAllocatePoolWithTag(PagedPool, tp.item->size, ALLOC_TAG);
3557
3558 if (!eism) {
3559 ERR("out of memory\n");
3560 return STATUS_INSUFFICIENT_RESOURCES;
3561 }
3562
3563 RtlCopyMemory(eism, tp.item->data, tp.item->size);
3564 } else
3565 eism = NULL;
3566
3567 Status = delete_tree_item(Vcb, &tp);
3568 if (!NT_SUCCESS(Status)) {
3569 ERR("delete_tree_item returned %08lx\n", Status);
3570 if (eism) ExFreePool(eism);
3571 return Status;
3572 }
3573
3574 Status = insert_tree_item(Vcb, Vcb->extent_root, address, TYPE_METADATA_ITEM, level, eism, tp.item->size, NULL, Irp);
3575 if (!NT_SUCCESS(Status)) {
3576 ERR("insert_tree_item returned %08lx\n", Status);
3577 if (eism) ExFreePool(eism);
3578 return Status;
3579 }
3580
3581 return STATUS_SUCCESS;
3582 }
3583 }
3584
3585 searchkey.obj_id = address;
3586 searchkey.obj_type = TYPE_EXTENT_ITEM;
3587 searchkey.offset = 0xffffffffffffffff;
3588
3589 Status = find_item(Vcb, Vcb->extent_root, &tp, &searchkey, false, Irp);
3590 if (!NT_SUCCESS(Status)) {
3591 ERR("error - find_item returned %08lx\n", Status);
3592 return Status;
3593 }
3594
3595 if (tp.item->key.obj_id == searchkey.obj_id && tp.item->key.obj_type == searchkey.obj_type) {
3596 EXTENT_ITEM_TREE* eit;
3597
3598 if (tp.item->size < sizeof(EXTENT_ITEM_TREE)) {
3599 ERR("(%I64x,%x,%I64x) was %u bytes, expected at least %Iu\n", tp.item->key.obj_id, tp.item->key.obj_type, tp.item->key.offset, tp.item->size, sizeof(EXTENT_ITEM_TREE));
3600 return STATUS_INTERNAL_ERROR;
3601 }
3602
3603 eit = ExAllocatePoolWithTag(PagedPool, tp.item->size, ALLOC_TAG);
3604
3605 if (!eit) {
3606 ERR("out of memory\n");
3607 return STATUS_INSUFFICIENT_RESOURCES;
3608 }
3609
3610 RtlCopyMemory(eit, tp.item->data, tp.item->size);
3611
3612 Status = delete_tree_item(Vcb, &tp);
3613 if (!NT_SUCCESS(Status)) {
3614 ERR("delete_tree_item returned %08lx\n", Status);
3615 ExFreePool(eit);
3616 return Status;
3617 }
3618
3619 eit->level = level;
3620
3621 Status = insert_tree_item(Vcb, Vcb->extent_root, tp.item->key.obj_id, tp.item->key.obj_type, tp.item->key.offset, eit, tp.item->size, NULL, Irp);
3622 if (!NT_SUCCESS(Status)) {
3623 ERR("insert_tree_item returned %08lx\n", Status);
3624 ExFreePool(eit);
3625 return Status;
3626 }
3627
3628 return STATUS_SUCCESS;
3629 }
3630
3631 ERR("could not find EXTENT_ITEM for address %I64x\n", address);
3632
3633 return STATUS_INTERNAL_ERROR;
3634 }
3635
update_tree_extents_recursive(device_extension * Vcb,tree * t,PIRP Irp,LIST_ENTRY * rollback)3636 static NTSTATUS update_tree_extents_recursive(device_extension* Vcb, tree* t, PIRP Irp, LIST_ENTRY* rollback) {
3637 NTSTATUS Status;
3638
3639 if (t->parent && !t->parent->updated_extents && t->parent->has_address) {
3640 Status = update_tree_extents_recursive(Vcb, t->parent, Irp, rollback);
3641 if (!NT_SUCCESS(Status))
3642 return Status;
3643 }
3644
3645 Status = update_tree_extents(Vcb, t, Irp, rollback);
3646 if (!NT_SUCCESS(Status)) {
3647 ERR("update_tree_extents returned %08lx\n", Status);
3648 return Status;
3649 }
3650
3651 return STATUS_SUCCESS;
3652 }
3653
do_splits(device_extension * Vcb,PIRP Irp,LIST_ENTRY * rollback)3654 static NTSTATUS do_splits(device_extension* Vcb, PIRP Irp, LIST_ENTRY* rollback) {
3655 ULONG level, max_level;
3656 uint32_t min_size, min_size_fst;
3657 bool empty, done_deletions = false;
3658 NTSTATUS Status;
3659 tree* t;
3660
3661 TRACE("(%p)\n", Vcb);
3662
3663 max_level = 0;
3664
3665 for (level = 0; level <= 255; level++) {
3666 LIST_ENTRY *le, *nextle;
3667
3668 empty = true;
3669
3670 TRACE("doing level %lu\n", level);
3671
3672 le = Vcb->trees.Flink;
3673
3674 while (le != &Vcb->trees) {
3675 t = CONTAINING_RECORD(le, tree, list_entry);
3676
3677 nextle = le->Flink;
3678
3679 if (t->write && t->header.level == level) {
3680 empty = false;
3681
3682 if (t->header.num_items == 0) {
3683 if (t->parent) {
3684 done_deletions = true;
3685
3686 TRACE("deleting tree in root %I64x\n", t->root->id);
3687
3688 t->root->root_item.bytes_used -= Vcb->superblock.node_size;
3689
3690 if (t->has_new_address) { // delete associated EXTENT_ITEM
3691 Status = reduce_tree_extent(Vcb, t->new_address, t, t->parent->header.tree_id, t->header.level, Irp, rollback);
3692
3693 if (!NT_SUCCESS(Status)) {
3694 ERR("reduce_tree_extent returned %08lx\n", Status);
3695 return Status;
3696 }
3697
3698 t->has_new_address = false;
3699 } else if (t->has_address) {
3700 Status = reduce_tree_extent(Vcb,t->header.address, t, t->parent->header.tree_id, t->header.level, Irp, rollback);
3701
3702 if (!NT_SUCCESS(Status)) {
3703 ERR("reduce_tree_extent returned %08lx\n", Status);
3704 return Status;
3705 }
3706
3707 t->has_address = false;
3708 }
3709
3710 if (!t->paritem->ignore) {
3711 t->paritem->ignore = true;
3712 t->parent->header.num_items--;
3713 t->parent->size -= sizeof(internal_node);
3714 }
3715
3716 RemoveEntryList(&t->paritem->list_entry);
3717 ExFreePool(t->paritem);
3718 t->paritem = NULL;
3719
3720 free_tree(t);
3721 } else if (t->header.level != 0) {
3722 if (t->has_new_address) {
3723 Status = update_extent_level(Vcb, t->new_address, t, 0, Irp);
3724
3725 if (!NT_SUCCESS(Status)) {
3726 ERR("update_extent_level returned %08lx\n", Status);
3727 return Status;
3728 }
3729 }
3730
3731 t->header.level = 0;
3732 }
3733 } else if (t->size > Vcb->superblock.node_size - sizeof(tree_header)) {
3734 TRACE("splitting overlarge tree (%x > %Ix)\n", t->size, Vcb->superblock.node_size - sizeof(tree_header));
3735
3736 if (!t->updated_extents && t->has_address) {
3737 Status = update_tree_extents_recursive(Vcb, t, Irp, rollback);
3738 if (!NT_SUCCESS(Status)) {
3739 ERR("update_tree_extents_recursive returned %08lx\n", Status);
3740 return Status;
3741 }
3742 }
3743
3744 Status = split_tree(Vcb, t);
3745
3746 if (!NT_SUCCESS(Status)) {
3747 ERR("split_tree returned %08lx\n", Status);
3748 return Status;
3749 }
3750 }
3751 }
3752
3753 le = nextle;
3754 }
3755
3756 if (!empty) {
3757 max_level = level;
3758 } else {
3759 TRACE("nothing found for level %lu\n", level);
3760 break;
3761 }
3762 }
3763
3764 min_size = (Vcb->superblock.node_size - sizeof(tree_header)) / 2;
3765 min_size_fst = (Vcb->superblock.node_size - sizeof(tree_header)) / 4;
3766
3767 for (level = 0; level <= max_level; level++) {
3768 LIST_ENTRY* le;
3769
3770 le = Vcb->trees.Flink;
3771
3772 while (le != &Vcb->trees) {
3773 t = CONTAINING_RECORD(le, tree, list_entry);
3774
3775 if (t->write && t->header.level == level && t->header.num_items > 0 && t->parent &&
3776 ((t->size < min_size && t->root->id != BTRFS_ROOT_FREE_SPACE) || (t->size < min_size_fst && t->root->id == BTRFS_ROOT_FREE_SPACE)) &&
3777 is_tree_unique(Vcb, t, Irp)) {
3778 bool done;
3779
3780 do {
3781 Status = try_tree_amalgamate(Vcb, t, &done, &done_deletions, Irp, rollback);
3782 if (!NT_SUCCESS(Status)) {
3783 ERR("try_tree_amalgamate returned %08lx\n", Status);
3784 return Status;
3785 }
3786 } while (done && t->size < min_size);
3787 }
3788
3789 le = le->Flink;
3790 }
3791 }
3792
3793 // simplify trees if top tree only has one entry
3794
3795 if (done_deletions) {
3796 for (level = max_level; level > 0; level--) {
3797 LIST_ENTRY *le, *nextle;
3798
3799 le = Vcb->trees.Flink;
3800 while (le != &Vcb->trees) {
3801 nextle = le->Flink;
3802 t = CONTAINING_RECORD(le, tree, list_entry);
3803
3804 if (t->write && t->header.level == level) {
3805 if (!t->parent && t->header.num_items == 1) {
3806 LIST_ENTRY* le2 = t->itemlist.Flink;
3807 tree_data* td = NULL;
3808 tree* child_tree = NULL;
3809
3810 while (le2 != &t->itemlist) {
3811 td = CONTAINING_RECORD(le2, tree_data, list_entry);
3812 if (!td->ignore)
3813 break;
3814 le2 = le2->Flink;
3815 }
3816
3817 TRACE("deleting top-level tree in root %I64x with one item\n", t->root->id);
3818
3819 if (t->has_new_address) { // delete associated EXTENT_ITEM
3820 Status = reduce_tree_extent(Vcb, t->new_address, t, t->header.tree_id, t->header.level, Irp, rollback);
3821
3822 if (!NT_SUCCESS(Status)) {
3823 ERR("reduce_tree_extent returned %08lx\n", Status);
3824 return Status;
3825 }
3826
3827 t->has_new_address = false;
3828 } else if (t->has_address) {
3829 Status = reduce_tree_extent(Vcb,t->header.address, t, t->header.tree_id, t->header.level, Irp, rollback);
3830
3831 if (!NT_SUCCESS(Status)) {
3832 ERR("reduce_tree_extent returned %08lx\n", Status);
3833 return Status;
3834 }
3835
3836 t->has_address = false;
3837 }
3838
3839 if (!td->treeholder.tree) { // load first item if not already loaded
3840 KEY searchkey = {0,0,0};
3841 traverse_ptr tp;
3842
3843 Status = find_item(Vcb, t->root, &tp, &searchkey, false, Irp);
3844 if (!NT_SUCCESS(Status)) {
3845 ERR("error - find_item returned %08lx\n", Status);
3846 return Status;
3847 }
3848 }
3849
3850 child_tree = td->treeholder.tree;
3851
3852 if (child_tree) {
3853 child_tree->parent = NULL;
3854 child_tree->paritem = NULL;
3855 }
3856
3857 t->root->root_item.bytes_used -= Vcb->superblock.node_size;
3858
3859 free_tree(t);
3860
3861 if (child_tree)
3862 child_tree->root->treeholder.tree = child_tree;
3863 }
3864 }
3865
3866 le = nextle;
3867 }
3868 }
3869 }
3870
3871 return STATUS_SUCCESS;
3872 }
3873
remove_root_extents(device_extension * Vcb,root * r,tree_holder * th,uint8_t level,tree * parent,PIRP Irp,LIST_ENTRY * rollback)3874 static NTSTATUS remove_root_extents(device_extension* Vcb, root* r, tree_holder* th, uint8_t level, tree* parent, PIRP Irp, LIST_ENTRY* rollback) {
3875 NTSTATUS Status;
3876
3877 if (!th->tree) {
3878 uint8_t* buf;
3879 chunk* c;
3880
3881 buf = ExAllocatePoolWithTag(PagedPool, Vcb->superblock.node_size, ALLOC_TAG);
3882 if (!buf) {
3883 ERR("out of memory\n");
3884 return STATUS_INSUFFICIENT_RESOURCES;
3885 }
3886
3887 Status = read_data(Vcb, th->address, Vcb->superblock.node_size, NULL, true, buf, NULL,
3888 &c, Irp, th->generation, false, NormalPagePriority);
3889 if (!NT_SUCCESS(Status)) {
3890 ERR("read_data returned 0x%08lx\n", Status);
3891 ExFreePool(buf);
3892 return Status;
3893 }
3894
3895 Status = load_tree(Vcb, th->address, buf, r, &th->tree);
3896
3897 if (!th->tree || th->tree->buf != buf)
3898 ExFreePool(buf);
3899
3900 if (!NT_SUCCESS(Status)) {
3901 ERR("load_tree(%I64x) returned %08lx\n", th->address, Status);
3902 return Status;
3903 }
3904 }
3905
3906 if (level > 0) {
3907 LIST_ENTRY* le = th->tree->itemlist.Flink;
3908
3909 while (le != &th->tree->itemlist) {
3910 tree_data* td = CONTAINING_RECORD(le, tree_data, list_entry);
3911
3912 if (!td->ignore) {
3913 Status = remove_root_extents(Vcb, r, &td->treeholder, th->tree->header.level - 1, th->tree, Irp, rollback);
3914
3915 if (!NT_SUCCESS(Status)) {
3916 ERR("remove_root_extents returned %08lx\n", Status);
3917 return Status;
3918 }
3919 }
3920
3921 le = le->Flink;
3922 }
3923 }
3924
3925 if (th->tree && !th->tree->updated_extents && th->tree->has_address) {
3926 Status = update_tree_extents(Vcb, th->tree, Irp, rollback);
3927 if (!NT_SUCCESS(Status)) {
3928 ERR("update_tree_extents returned %08lx\n", Status);
3929 return Status;
3930 }
3931 }
3932
3933 if (!th->tree || th->tree->has_address) {
3934 Status = reduce_tree_extent(Vcb, th->address, NULL, parent ? parent->header.tree_id : r->id, level, Irp, rollback);
3935
3936 if (!NT_SUCCESS(Status)) {
3937 ERR("reduce_tree_extent(%I64x) returned %08lx\n", th->address, Status);
3938 return Status;
3939 }
3940 }
3941
3942 return STATUS_SUCCESS;
3943 }
3944
drop_root(device_extension * Vcb,root * r,PIRP Irp,LIST_ENTRY * rollback)3945 static NTSTATUS drop_root(device_extension* Vcb, root* r, PIRP Irp, LIST_ENTRY* rollback) {
3946 NTSTATUS Status;
3947 KEY searchkey;
3948 traverse_ptr tp;
3949
3950 Status = remove_root_extents(Vcb, r, &r->treeholder, r->root_item.root_level, NULL, Irp, rollback);
3951 if (!NT_SUCCESS(Status)) {
3952 ERR("remove_root_extents returned %08lx\n", Status);
3953 return Status;
3954 }
3955
3956 // remove entries in uuid root (tree 9)
3957 if (Vcb->uuid_root) {
3958 RtlCopyMemory(&searchkey.obj_id, &r->root_item.uuid.uuid[0], sizeof(uint64_t));
3959 searchkey.obj_type = TYPE_SUBVOL_UUID;
3960 RtlCopyMemory(&searchkey.offset, &r->root_item.uuid.uuid[sizeof(uint64_t)], sizeof(uint64_t));
3961
3962 if (searchkey.obj_id != 0 || searchkey.offset != 0) {
3963 Status = find_item(Vcb, Vcb->uuid_root, &tp, &searchkey, false, Irp);
3964 if (!NT_SUCCESS(Status)) {
3965 WARN("find_item returned %08lx\n", Status);
3966 } else {
3967 if (!keycmp(tp.item->key, searchkey)) {
3968 Status = delete_tree_item(Vcb, &tp);
3969 if (!NT_SUCCESS(Status)) {
3970 ERR("delete_tree_item returned %08lx\n", Status);
3971 return Status;
3972 }
3973 } else
3974 WARN("could not find (%I64x,%x,%I64x) in uuid tree\n", searchkey.obj_id, searchkey.obj_type, searchkey.offset);
3975 }
3976 }
3977
3978 if (r->root_item.rtransid > 0) {
3979 RtlCopyMemory(&searchkey.obj_id, &r->root_item.received_uuid.uuid[0], sizeof(uint64_t));
3980 searchkey.obj_type = TYPE_SUBVOL_REC_UUID;
3981 RtlCopyMemory(&searchkey.offset, &r->root_item.received_uuid.uuid[sizeof(uint64_t)], sizeof(uint64_t));
3982
3983 Status = find_item(Vcb, Vcb->uuid_root, &tp, &searchkey, false, Irp);
3984 if (!NT_SUCCESS(Status))
3985 WARN("find_item returned %08lx\n", Status);
3986 else {
3987 if (!keycmp(tp.item->key, searchkey)) {
3988 if (tp.item->size == sizeof(uint64_t)) {
3989 uint64_t* id = (uint64_t*)tp.item->data;
3990
3991 if (*id == r->id) {
3992 Status = delete_tree_item(Vcb, &tp);
3993 if (!NT_SUCCESS(Status)) {
3994 ERR("delete_tree_item returned %08lx\n", Status);
3995 return Status;
3996 }
3997 }
3998 } else if (tp.item->size > sizeof(uint64_t)) {
3999 ULONG i;
4000 uint64_t* ids = (uint64_t*)tp.item->data;
4001
4002 for (i = 0; i < tp.item->size / sizeof(uint64_t); i++) {
4003 if (ids[i] == r->id) {
4004 uint64_t* ne;
4005
4006 ne = ExAllocatePoolWithTag(PagedPool, tp.item->size - sizeof(uint64_t), ALLOC_TAG);
4007 if (!ne) {
4008 ERR("out of memory\n");
4009 return STATUS_INSUFFICIENT_RESOURCES;
4010 }
4011
4012 if (i > 0)
4013 RtlCopyMemory(ne, ids, sizeof(uint64_t) * i);
4014
4015 if ((i + 1) * sizeof(uint64_t) < tp.item->size)
4016 RtlCopyMemory(&ne[i], &ids[i + 1], tp.item->size - ((i + 1) * sizeof(uint64_t)));
4017
4018 Status = delete_tree_item(Vcb, &tp);
4019 if (!NT_SUCCESS(Status)) {
4020 ERR("delete_tree_item returned %08lx\n", Status);
4021 ExFreePool(ne);
4022 return Status;
4023 }
4024
4025 Status = insert_tree_item(Vcb, Vcb->uuid_root, tp.item->key.obj_id, tp.item->key.obj_type, tp.item->key.offset,
4026 ne, tp.item->size - sizeof(uint64_t), NULL, Irp);
4027 if (!NT_SUCCESS(Status)) {
4028 ERR("insert_tree_item returned %08lx\n", Status);
4029 ExFreePool(ne);
4030 return Status;
4031 }
4032
4033 break;
4034 }
4035 }
4036 }
4037 } else
4038 WARN("could not find (%I64x,%x,%I64x) in uuid tree\n", searchkey.obj_id, searchkey.obj_type, searchkey.offset);
4039 }
4040 }
4041 }
4042
4043 // delete ROOT_ITEM
4044
4045 searchkey.obj_id = r->id;
4046 searchkey.obj_type = TYPE_ROOT_ITEM;
4047 searchkey.offset = 0xffffffffffffffff;
4048
4049 Status = find_item(Vcb, Vcb->root_root, &tp, &searchkey, false, Irp);
4050 if (!NT_SUCCESS(Status)) {
4051 ERR("find_item returned %08lx\n", Status);
4052 return Status;
4053 }
4054
4055 if (tp.item->key.obj_id == searchkey.obj_id && tp.item->key.obj_type == searchkey.obj_type) {
4056 Status = delete_tree_item(Vcb, &tp);
4057
4058 if (!NT_SUCCESS(Status)) {
4059 ERR("delete_tree_item returned %08lx\n", Status);
4060 return Status;
4061 }
4062 } else
4063 WARN("could not find (%I64x,%x,%I64x) in root_root\n", searchkey.obj_id, searchkey.obj_type, searchkey.offset);
4064
4065 // delete items in tree cache
4066
4067 free_trees_root(Vcb, r);
4068
4069 return STATUS_SUCCESS;
4070 }
4071
drop_roots(device_extension * Vcb,PIRP Irp,LIST_ENTRY * rollback)4072 static NTSTATUS drop_roots(device_extension* Vcb, PIRP Irp, LIST_ENTRY* rollback) {
4073 LIST_ENTRY *le = Vcb->drop_roots.Flink, *le2;
4074 NTSTATUS Status;
4075
4076 while (le != &Vcb->drop_roots) {
4077 root* r = CONTAINING_RECORD(le, root, list_entry);
4078
4079 le2 = le->Flink;
4080
4081 Status = drop_root(Vcb, r, Irp, rollback);
4082 if (!NT_SUCCESS(Status)) {
4083 ERR("drop_root(%I64x) returned %08lx\n", r->id, Status);
4084 return Status;
4085 }
4086
4087 le = le2;
4088 }
4089
4090 return STATUS_SUCCESS;
4091 }
4092
update_dev_item(device_extension * Vcb,device * device,PIRP Irp)4093 NTSTATUS update_dev_item(device_extension* Vcb, device* device, PIRP Irp) {
4094 KEY searchkey;
4095 traverse_ptr tp;
4096 DEV_ITEM* di;
4097 NTSTATUS Status;
4098
4099 searchkey.obj_id = 1;
4100 searchkey.obj_type = TYPE_DEV_ITEM;
4101 searchkey.offset = device->devitem.dev_id;
4102
4103 Status = find_item(Vcb, Vcb->chunk_root, &tp, &searchkey, false, Irp);
4104 if (!NT_SUCCESS(Status)) {
4105 ERR("error - find_item returned %08lx\n", Status);
4106 return Status;
4107 }
4108
4109 if (keycmp(tp.item->key, searchkey)) {
4110 ERR("error - could not find DEV_ITEM for device %I64x\n", device->devitem.dev_id);
4111 return STATUS_INTERNAL_ERROR;
4112 }
4113
4114 Status = delete_tree_item(Vcb, &tp);
4115 if (!NT_SUCCESS(Status)) {
4116 ERR("delete_tree_item returned %08lx\n", Status);
4117 return Status;
4118 }
4119
4120 di = ExAllocatePoolWithTag(PagedPool, sizeof(DEV_ITEM), ALLOC_TAG);
4121 if (!di) {
4122 ERR("out of memory\n");
4123 return STATUS_INSUFFICIENT_RESOURCES;
4124 }
4125
4126 RtlCopyMemory(di, &device->devitem, sizeof(DEV_ITEM));
4127
4128 Status = insert_tree_item(Vcb, Vcb->chunk_root, 1, TYPE_DEV_ITEM, device->devitem.dev_id, di, sizeof(DEV_ITEM), NULL, Irp);
4129 if (!NT_SUCCESS(Status)) {
4130 ERR("insert_tree_item returned %08lx\n", Status);
4131 ExFreePool(di);
4132 return Status;
4133 }
4134
4135 return STATUS_SUCCESS;
4136 }
4137
regen_bootstrap(device_extension * Vcb)4138 static void regen_bootstrap(device_extension* Vcb) {
4139 sys_chunk* sc2;
4140 USHORT i = 0;
4141 LIST_ENTRY* le;
4142
4143 i = 0;
4144 le = Vcb->sys_chunks.Flink;
4145 while (le != &Vcb->sys_chunks) {
4146 sc2 = CONTAINING_RECORD(le, sys_chunk, list_entry);
4147
4148 TRACE("%I64x,%x,%I64x\n", sc2->key.obj_id, sc2->key.obj_type, sc2->key.offset);
4149
4150 RtlCopyMemory(&Vcb->superblock.sys_chunk_array[i], &sc2->key, sizeof(KEY));
4151 i += sizeof(KEY);
4152
4153 RtlCopyMemory(&Vcb->superblock.sys_chunk_array[i], sc2->data, sc2->size);
4154 i += sc2->size;
4155
4156 le = le->Flink;
4157 }
4158 }
4159
add_to_bootstrap(device_extension * Vcb,uint64_t obj_id,uint8_t obj_type,uint64_t offset,void * data,uint16_t size)4160 static NTSTATUS add_to_bootstrap(device_extension* Vcb, uint64_t obj_id, uint8_t obj_type, uint64_t offset, void* data, uint16_t size) {
4161 sys_chunk* sc;
4162 LIST_ENTRY* le;
4163
4164 if (Vcb->superblock.n + sizeof(KEY) + size > SYS_CHUNK_ARRAY_SIZE) {
4165 ERR("error - bootstrap is full\n");
4166 return STATUS_INTERNAL_ERROR;
4167 }
4168
4169 sc = ExAllocatePoolWithTag(PagedPool, sizeof(sys_chunk), ALLOC_TAG);
4170 if (!sc) {
4171 ERR("out of memory\n");
4172 return STATUS_INSUFFICIENT_RESOURCES;
4173 }
4174
4175 sc->key.obj_id = obj_id;
4176 sc->key.obj_type = obj_type;
4177 sc->key.offset = offset;
4178 sc->size = size;
4179 sc->data = ExAllocatePoolWithTag(PagedPool, sc->size, ALLOC_TAG);
4180 if (!sc->data) {
4181 ERR("out of memory\n");
4182 ExFreePool(sc);
4183 return STATUS_INSUFFICIENT_RESOURCES;
4184 }
4185
4186 RtlCopyMemory(sc->data, data, sc->size);
4187
4188 le = Vcb->sys_chunks.Flink;
4189 while (le != &Vcb->sys_chunks) {
4190 sys_chunk* sc2 = CONTAINING_RECORD(le, sys_chunk, list_entry);
4191
4192 if (keycmp(sc2->key, sc->key) == 1)
4193 break;
4194
4195 le = le->Flink;
4196 }
4197 InsertTailList(le, &sc->list_entry);
4198
4199 Vcb->superblock.n += sizeof(KEY) + size;
4200
4201 regen_bootstrap(Vcb);
4202
4203 return STATUS_SUCCESS;
4204 }
4205
create_chunk(device_extension * Vcb,chunk * c,PIRP Irp)4206 static NTSTATUS create_chunk(device_extension* Vcb, chunk* c, PIRP Irp) {
4207 CHUNK_ITEM* ci;
4208 CHUNK_ITEM_STRIPE* cis;
4209 BLOCK_GROUP_ITEM* bgi;
4210 uint16_t i, factor;
4211 NTSTATUS Status;
4212
4213 ci = ExAllocatePoolWithTag(PagedPool, c->size, ALLOC_TAG);
4214 if (!ci) {
4215 ERR("out of memory\n");
4216 return STATUS_INSUFFICIENT_RESOURCES;
4217 }
4218
4219 RtlCopyMemory(ci, c->chunk_item, c->size);
4220
4221 Status = insert_tree_item(Vcb, Vcb->chunk_root, 0x100, TYPE_CHUNK_ITEM, c->offset, ci, c->size, NULL, Irp);
4222 if (!NT_SUCCESS(Status)) {
4223 ERR("insert_tree_item failed\n");
4224 ExFreePool(ci);
4225 return Status;
4226 }
4227
4228 if (c->chunk_item->type & BLOCK_FLAG_SYSTEM) {
4229 Status = add_to_bootstrap(Vcb, 0x100, TYPE_CHUNK_ITEM, c->offset, ci, c->size);
4230 if (!NT_SUCCESS(Status)) {
4231 ERR("add_to_bootstrap returned %08lx\n", Status);
4232 return Status;
4233 }
4234 }
4235
4236 // add BLOCK_GROUP_ITEM to tree 2
4237
4238 bgi = ExAllocatePoolWithTag(PagedPool, sizeof(BLOCK_GROUP_ITEM), ALLOC_TAG);
4239 if (!bgi) {
4240 ERR("out of memory\n");
4241 return STATUS_INSUFFICIENT_RESOURCES;
4242 }
4243
4244 bgi->used = c->used;
4245 bgi->chunk_tree = 0x100;
4246 bgi->flags = c->chunk_item->type;
4247
4248 Status = insert_tree_item(Vcb, Vcb->extent_root, c->offset, TYPE_BLOCK_GROUP_ITEM, c->chunk_item->size, bgi, sizeof(BLOCK_GROUP_ITEM), NULL, Irp);
4249 if (!NT_SUCCESS(Status)) {
4250 ERR("insert_tree_item failed\n");
4251 ExFreePool(bgi);
4252 return Status;
4253 }
4254
4255 if (c->chunk_item->type & BLOCK_FLAG_RAID0)
4256 factor = c->chunk_item->num_stripes;
4257 else if (c->chunk_item->type & BLOCK_FLAG_RAID10)
4258 factor = c->chunk_item->num_stripes / c->chunk_item->sub_stripes;
4259 else if (c->chunk_item->type & BLOCK_FLAG_RAID5)
4260 factor = c->chunk_item->num_stripes - 1;
4261 else if (c->chunk_item->type & BLOCK_FLAG_RAID6)
4262 factor = c->chunk_item->num_stripes - 2;
4263 else // SINGLE, DUPLICATE, RAID1, RAID1C3, RAID1C4
4264 factor = 1;
4265
4266 // add DEV_EXTENTs to tree 4
4267
4268 cis = (CHUNK_ITEM_STRIPE*)&c->chunk_item[1];
4269
4270 for (i = 0; i < c->chunk_item->num_stripes; i++) {
4271 DEV_EXTENT* de;
4272
4273 de = ExAllocatePoolWithTag(PagedPool, sizeof(DEV_EXTENT), ALLOC_TAG);
4274 if (!de) {
4275 ERR("out of memory\n");
4276 return STATUS_INSUFFICIENT_RESOURCES;
4277 }
4278
4279 de->chunktree = Vcb->chunk_root->id;
4280 de->objid = 0x100;
4281 de->address = c->offset;
4282 de->length = c->chunk_item->size / factor;
4283 de->chunktree_uuid = Vcb->chunk_root->treeholder.tree->header.chunk_tree_uuid;
4284
4285 Status = insert_tree_item(Vcb, Vcb->dev_root, c->devices[i]->devitem.dev_id, TYPE_DEV_EXTENT, cis[i].offset, de, sizeof(DEV_EXTENT), NULL, Irp);
4286 if (!NT_SUCCESS(Status)) {
4287 ERR("insert_tree_item returned %08lx\n", Status);
4288 ExFreePool(de);
4289 return Status;
4290 }
4291
4292 // FIXME - no point in calling this twice for the same device
4293 Status = update_dev_item(Vcb, c->devices[i], Irp);
4294 if (!NT_SUCCESS(Status)) {
4295 ERR("update_dev_item returned %08lx\n", Status);
4296 return Status;
4297 }
4298 }
4299
4300 c->created = false;
4301 c->oldused = c->used;
4302
4303 Vcb->superblock.bytes_used += c->used;
4304
4305 return STATUS_SUCCESS;
4306 }
4307
remove_from_bootstrap(device_extension * Vcb,uint64_t obj_id,uint8_t obj_type,uint64_t offset)4308 static void remove_from_bootstrap(device_extension* Vcb, uint64_t obj_id, uint8_t obj_type, uint64_t offset) {
4309 sys_chunk* sc2;
4310 LIST_ENTRY* le;
4311
4312 le = Vcb->sys_chunks.Flink;
4313 while (le != &Vcb->sys_chunks) {
4314 sc2 = CONTAINING_RECORD(le, sys_chunk, list_entry);
4315
4316 if (sc2->key.obj_id == obj_id && sc2->key.obj_type == obj_type && sc2->key.offset == offset) {
4317 RemoveEntryList(&sc2->list_entry);
4318
4319 Vcb->superblock.n -= sizeof(KEY) + sc2->size;
4320
4321 ExFreePool(sc2->data);
4322 ExFreePool(sc2);
4323 regen_bootstrap(Vcb);
4324 return;
4325 }
4326
4327 le = le->Flink;
4328 }
4329 }
4330
set_xattr(device_extension * Vcb,LIST_ENTRY * batchlist,root * subvol,uint64_t inode,char * name,uint16_t namelen,uint32_t crc32,uint8_t * data,uint16_t datalen)4331 static NTSTATUS set_xattr(device_extension* Vcb, LIST_ENTRY* batchlist, root* subvol, uint64_t inode, char* name, uint16_t namelen,
4332 uint32_t crc32, uint8_t* data, uint16_t datalen) {
4333 NTSTATUS Status;
4334 uint16_t xasize;
4335 DIR_ITEM* xa;
4336
4337 TRACE("(%p, %I64x, %I64x, %.*s, %08x, %p, %u)\n", Vcb, subvol->id, inode, namelen, name, crc32, data, datalen);
4338
4339 xasize = (uint16_t)offsetof(DIR_ITEM, name[0]) + namelen + datalen;
4340
4341 xa = ExAllocatePoolWithTag(PagedPool, xasize, ALLOC_TAG);
4342 if (!xa) {
4343 ERR("out of memory\n");
4344 return STATUS_INSUFFICIENT_RESOURCES;
4345 }
4346
4347 xa->key.obj_id = 0;
4348 xa->key.obj_type = 0;
4349 xa->key.offset = 0;
4350 xa->transid = Vcb->superblock.generation;
4351 xa->m = datalen;
4352 xa->n = namelen;
4353 xa->type = BTRFS_TYPE_EA;
4354 RtlCopyMemory(xa->name, name, namelen);
4355 RtlCopyMemory(xa->name + namelen, data, datalen);
4356
4357 Status = insert_tree_item_batch(batchlist, Vcb, subvol, inode, TYPE_XATTR_ITEM, crc32, xa, xasize, Batch_SetXattr);
4358 if (!NT_SUCCESS(Status)) {
4359 ERR("insert_tree_item_batch returned %08lx\n", Status);
4360 ExFreePool(xa);
4361 return Status;
4362 }
4363
4364 return STATUS_SUCCESS;
4365 }
4366
delete_xattr(device_extension * Vcb,LIST_ENTRY * batchlist,root * subvol,uint64_t inode,char * name,uint16_t namelen,uint32_t crc32)4367 static NTSTATUS delete_xattr(device_extension* Vcb, LIST_ENTRY* batchlist, root* subvol, uint64_t inode, char* name,
4368 uint16_t namelen, uint32_t crc32) {
4369 NTSTATUS Status;
4370 uint16_t xasize;
4371 DIR_ITEM* xa;
4372
4373 TRACE("(%p, %I64x, %I64x, %.*s, %08x)\n", Vcb, subvol->id, inode, namelen, name, crc32);
4374
4375 xasize = (uint16_t)offsetof(DIR_ITEM, name[0]) + namelen;
4376
4377 xa = ExAllocatePoolWithTag(PagedPool, xasize, ALLOC_TAG);
4378 if (!xa) {
4379 ERR("out of memory\n");
4380 return STATUS_INSUFFICIENT_RESOURCES;
4381 }
4382
4383 xa->key.obj_id = 0;
4384 xa->key.obj_type = 0;
4385 xa->key.offset = 0;
4386 xa->transid = Vcb->superblock.generation;
4387 xa->m = 0;
4388 xa->n = namelen;
4389 xa->type = BTRFS_TYPE_EA;
4390 RtlCopyMemory(xa->name, name, namelen);
4391
4392 Status = insert_tree_item_batch(batchlist, Vcb, subvol, inode, TYPE_XATTR_ITEM, crc32, xa, xasize, Batch_DeleteXattr);
4393 if (!NT_SUCCESS(Status)) {
4394 ERR("insert_tree_item_batch returned %08lx\n", Status);
4395 ExFreePool(xa);
4396 return Status;
4397 }
4398
4399 return STATUS_SUCCESS;
4400 }
4401
insert_sparse_extent(fcb * fcb,LIST_ENTRY * batchlist,uint64_t start,uint64_t length)4402 static NTSTATUS insert_sparse_extent(fcb* fcb, LIST_ENTRY* batchlist, uint64_t start, uint64_t length) {
4403 NTSTATUS Status;
4404 EXTENT_DATA* ed;
4405 EXTENT_DATA2* ed2;
4406
4407 TRACE("((%I64x, %I64x), %I64x, %I64x)\n", fcb->subvol->id, fcb->inode, start, length);
4408
4409 ed = ExAllocatePoolWithTag(PagedPool, sizeof(EXTENT_DATA) - 1 + sizeof(EXTENT_DATA2), ALLOC_TAG);
4410 if (!ed) {
4411 ERR("out of memory\n");
4412 return STATUS_INSUFFICIENT_RESOURCES;
4413 }
4414
4415 ed->generation = fcb->Vcb->superblock.generation;
4416 ed->decoded_size = length;
4417 ed->compression = BTRFS_COMPRESSION_NONE;
4418 ed->encryption = BTRFS_ENCRYPTION_NONE;
4419 ed->encoding = BTRFS_ENCODING_NONE;
4420 ed->type = EXTENT_TYPE_REGULAR;
4421
4422 ed2 = (EXTENT_DATA2*)ed->data;
4423 ed2->address = 0;
4424 ed2->size = 0;
4425 ed2->offset = 0;
4426 ed2->num_bytes = length;
4427
4428 Status = insert_tree_item_batch(batchlist, fcb->Vcb, fcb->subvol, fcb->inode, TYPE_EXTENT_DATA, start, ed, sizeof(EXTENT_DATA) - 1 + sizeof(EXTENT_DATA2), Batch_Insert);
4429 if (!NT_SUCCESS(Status)) {
4430 ERR("insert_tree_item_batch returned %08lx\n", Status);
4431 ExFreePool(ed);
4432 return Status;
4433 }
4434
4435 return STATUS_SUCCESS;
4436 }
4437
split_batch_item_list(batch_item_ind * bii)4438 static NTSTATUS split_batch_item_list(batch_item_ind* bii) {
4439 LIST_ENTRY* le;
4440 unsigned int i = 0;
4441 LIST_ENTRY* midpoint = NULL;
4442 batch_item_ind* bii2;
4443 batch_item* midpoint_item;
4444 LIST_ENTRY* before_midpoint;
4445
4446 le = bii->items.Flink;
4447 while (le != &bii->items) {
4448 if (i >= bii->num_items / 2) {
4449 midpoint = le;
4450 break;
4451 }
4452
4453 i++;
4454
4455 le = le->Flink;
4456 }
4457
4458 if (!midpoint)
4459 return STATUS_SUCCESS;
4460
4461 // make sure items on either side of split don't have same key
4462
4463 while (midpoint->Blink != &bii->items) {
4464 batch_item* item = CONTAINING_RECORD(midpoint, batch_item, list_entry);
4465 batch_item* prev = CONTAINING_RECORD(midpoint->Blink, batch_item, list_entry);
4466
4467 if (item->key.obj_id != prev->key.obj_id)
4468 break;
4469
4470 if (item->key.obj_type != prev->key.obj_type)
4471 break;
4472
4473 if (item->key.offset != prev->key.offset)
4474 break;
4475
4476 midpoint = midpoint->Blink;
4477 i--;
4478 }
4479
4480 if (midpoint->Blink == &bii->items)
4481 return STATUS_SUCCESS;
4482
4483 bii2 = ExAllocatePoolWithTag(PagedPool, sizeof(batch_item_ind), ALLOC_TAG);
4484 if (!bii2) {
4485 ERR("out of memory\n");
4486 return STATUS_INSUFFICIENT_RESOURCES;
4487 }
4488
4489 midpoint_item = CONTAINING_RECORD(midpoint, batch_item, list_entry);
4490
4491 bii2->key.obj_id = midpoint_item->key.obj_id;
4492 bii2->key.obj_type = midpoint_item->key.obj_type;
4493 bii2->key.offset = midpoint_item->key.offset;
4494
4495 bii2->num_items = bii->num_items - i;
4496 bii->num_items = i;
4497
4498 before_midpoint = midpoint->Blink;
4499
4500 bii2->items.Flink = midpoint;
4501 midpoint->Blink = &bii2->items;
4502 bii2->items.Blink = bii->items.Blink;
4503 bii->items.Blink->Flink = &bii2->items;
4504
4505 bii->items.Blink = before_midpoint;
4506 before_midpoint->Flink = &bii->items;
4507
4508 InsertHeadList(&bii->list_entry, &bii2->list_entry);
4509
4510 return STATUS_SUCCESS;
4511 }
4512
4513 #ifdef _MSC_VER
4514 #pragma warning(push)
4515 #pragma warning(suppress: 28194)
4516 #endif
4517 static NTSTATUS insert_tree_item_batch(LIST_ENTRY* batchlist, device_extension* Vcb, root* r, uint64_t objid,
4518 uint8_t objtype, uint64_t offset, _In_opt_ _When_(return >= 0, __drv_aliasesMem) void* data,
4519 uint16_t datalen, enum batch_operation operation) {
4520 LIST_ENTRY* le;
4521 batch_root* br = NULL;
4522 batch_item* bi;
4523
4524 le = batchlist->Flink;
4525 while (le != batchlist) {
4526 batch_root* br2 = CONTAINING_RECORD(le, batch_root, list_entry);
4527
4528 if (br2->r == r) {
4529 br = br2;
4530 break;
4531 }
4532
4533 le = le->Flink;
4534 }
4535
4536 if (!br) {
4537 br = ExAllocatePoolWithTag(PagedPool, sizeof(batch_root), ALLOC_TAG);
4538 if (!br) {
4539 ERR("out of memory\n");
4540 return STATUS_INSUFFICIENT_RESOURCES;
4541 }
4542
4543 br->r = r;
4544 InitializeListHead(&br->items_ind);
4545 InsertTailList(batchlist, &br->list_entry);
4546 }
4547
4548 if (IsListEmpty(&br->items_ind)) {
4549 batch_item_ind* bii;
4550
4551 bii = ExAllocatePoolWithTag(PagedPool, sizeof(batch_item_ind), ALLOC_TAG);
4552 if (!bii) {
4553 ERR("out of memory\n");
4554 return STATUS_INSUFFICIENT_RESOURCES;
4555 }
4556
4557 bii->key.obj_id = 0;
4558 bii->key.obj_type = 0;
4559 bii->key.offset = 0;
4560 InitializeListHead(&bii->items);
4561 bii->num_items = 0;
4562 InsertTailList(&br->items_ind, &bii->list_entry);
4563 }
4564
4565 bi = ExAllocateFromPagedLookasideList(&Vcb->batch_item_lookaside);
4566 if (!bi) {
4567 ERR("out of memory\n");
4568 return STATUS_INSUFFICIENT_RESOURCES;
4569 }
4570
4571 bi->key.obj_id = objid;
4572 bi->key.obj_type = objtype;
4573 bi->key.offset = offset;
4574 bi->data = data;
4575 bi->datalen = datalen;
4576 bi->operation = operation;
4577
4578 le = br->items_ind.Blink;
4579 while (le != &br->items_ind) {
4580 LIST_ENTRY* le2;
4581 batch_item_ind* bii = CONTAINING_RECORD(le, batch_item_ind, list_entry);
4582
4583 if (keycmp(bii->key, bi->key) == 1) {
4584 le = le->Blink;
4585 continue;
4586 }
4587
4588 le2 = bii->items.Blink;
4589 while (le2 != &bii->items) {
4590 batch_item* bi2 = CONTAINING_RECORD(le2, batch_item, list_entry);
4591 int cmp = keycmp(bi2->key, bi->key);
4592
4593 if (cmp == -1 || (cmp == 0 && bi->operation >= bi2->operation)) {
4594 InsertHeadList(&bi2->list_entry, &bi->list_entry);
4595 bii->num_items++;
4596 goto end;
4597 }
4598
4599 le2 = le2->Blink;
4600 }
4601
4602 InsertHeadList(&bii->items, &bi->list_entry);
4603 bii->num_items++;
4604
4605 end:
4606 if (bii->num_items > BATCH_ITEM_LIMIT)
4607 return split_batch_item_list(bii);
4608
4609 return STATUS_SUCCESS;
4610 }
4611
4612 return STATUS_INTERNAL_ERROR;
4613 }
4614 #ifdef _MSC_VER
4615 #pragma warning(pop)
4616 #endif
4617
4618 typedef struct {
4619 uint64_t address;
4620 uint64_t length;
4621 uint64_t offset;
4622 bool changed;
4623 chunk* chunk;
4624 uint64_t skip_start;
4625 uint64_t skip_end;
4626 LIST_ENTRY list_entry;
4627 } extent_range;
4628
rationalize_extents(fcb * fcb,PIRP Irp)4629 static void rationalize_extents(fcb* fcb, PIRP Irp) {
4630 LIST_ENTRY* le;
4631 LIST_ENTRY extent_ranges;
4632 extent_range* er;
4633 bool changed = false, truncating = false;
4634 uint32_t num_extents = 0;
4635
4636 InitializeListHead(&extent_ranges);
4637
4638 le = fcb->extents.Flink;
4639 while (le != &fcb->extents) {
4640 extent* ext = CONTAINING_RECORD(le, extent, list_entry);
4641
4642 if ((ext->extent_data.type == EXTENT_TYPE_REGULAR || ext->extent_data.type == EXTENT_TYPE_PREALLOC) && ext->extent_data.compression == BTRFS_COMPRESSION_NONE && ext->unique) {
4643 EXTENT_DATA2* ed2 = (EXTENT_DATA2*)ext->extent_data.data;
4644
4645 if (ed2->size != 0) {
4646 LIST_ENTRY* le2;
4647
4648 le2 = extent_ranges.Flink;
4649 while (le2 != &extent_ranges) {
4650 extent_range* er2 = CONTAINING_RECORD(le2, extent_range, list_entry);
4651
4652 if (er2->address == ed2->address) {
4653 er2->skip_start = min(er2->skip_start, ed2->offset);
4654 er2->skip_end = min(er2->skip_end, ed2->size - ed2->offset - ed2->num_bytes);
4655 goto cont;
4656 } else if (er2->address > ed2->address)
4657 break;
4658
4659 le2 = le2->Flink;
4660 }
4661
4662 er = ExAllocatePoolWithTag(PagedPool, sizeof(extent_range), ALLOC_TAG); // FIXME - should be from lookaside?
4663 if (!er) {
4664 ERR("out of memory\n");
4665 goto end;
4666 }
4667
4668 er->address = ed2->address;
4669 er->length = ed2->size;
4670 er->offset = ext->offset - ed2->offset;
4671 er->changed = false;
4672 er->chunk = NULL;
4673 er->skip_start = ed2->offset;
4674 er->skip_end = ed2->size - ed2->offset - ed2->num_bytes;
4675
4676 if (er->skip_start != 0 || er->skip_end != 0)
4677 truncating = true;
4678
4679 InsertHeadList(le2->Blink, &er->list_entry);
4680 num_extents++;
4681 }
4682 }
4683
4684 cont:
4685 le = le->Flink;
4686 }
4687
4688 if (num_extents == 0 || (num_extents == 1 && !truncating))
4689 goto end;
4690
4691 le = extent_ranges.Flink;
4692 while (le != &extent_ranges) {
4693 er = CONTAINING_RECORD(le, extent_range, list_entry);
4694
4695 if (!er->chunk) {
4696 LIST_ENTRY* le2;
4697
4698 er->chunk = get_chunk_from_address(fcb->Vcb, er->address);
4699
4700 if (!er->chunk) {
4701 ERR("get_chunk_from_address(%I64x) failed\n", er->address);
4702 goto end;
4703 }
4704
4705 le2 = le->Flink;
4706 while (le2 != &extent_ranges) {
4707 extent_range* er2 = CONTAINING_RECORD(le2, extent_range, list_entry);
4708
4709 if (!er2->chunk && er2->address >= er->chunk->offset && er2->address < er->chunk->offset + er->chunk->chunk_item->size)
4710 er2->chunk = er->chunk;
4711
4712 le2 = le2->Flink;
4713 }
4714 }
4715
4716 le = le->Flink;
4717 }
4718
4719 if (truncating) {
4720 // truncate beginning or end of extent if unused
4721
4722 le = extent_ranges.Flink;
4723 while (le != &extent_ranges) {
4724 er = CONTAINING_RECORD(le, extent_range, list_entry);
4725
4726 if (er->skip_start > 0) {
4727 LIST_ENTRY* le2 = fcb->extents.Flink;
4728 while (le2 != &fcb->extents) {
4729 extent* ext = CONTAINING_RECORD(le2, extent, list_entry);
4730
4731 if ((ext->extent_data.type == EXTENT_TYPE_REGULAR || ext->extent_data.type == EXTENT_TYPE_PREALLOC) && ext->extent_data.compression == BTRFS_COMPRESSION_NONE && ext->unique) {
4732 EXTENT_DATA2* ed2 = (EXTENT_DATA2*)ext->extent_data.data;
4733
4734 if (ed2->size != 0 && ed2->address == er->address) {
4735 NTSTATUS Status;
4736
4737 Status = update_changed_extent_ref(fcb->Vcb, er->chunk, ed2->address, ed2->size, fcb->subvol->id, fcb->inode, ext->offset - ed2->offset,
4738 -1, fcb->inode_item.flags & BTRFS_INODE_NODATASUM, true, Irp);
4739 if (!NT_SUCCESS(Status)) {
4740 ERR("update_changed_extent_ref returned %08lx\n", Status);
4741 goto end;
4742 }
4743
4744 ext->extent_data.decoded_size -= er->skip_start;
4745 ed2->size -= er->skip_start;
4746 ed2->address += er->skip_start;
4747 ed2->offset -= er->skip_start;
4748
4749 add_changed_extent_ref(er->chunk, ed2->address, ed2->size, fcb->subvol->id, fcb->inode, ext->offset - ed2->offset,
4750 1, fcb->inode_item.flags & BTRFS_INODE_NODATASUM);
4751 }
4752 }
4753
4754 le2 = le2->Flink;
4755 }
4756
4757 if (!(fcb->inode_item.flags & BTRFS_INODE_NODATASUM))
4758 add_checksum_entry(fcb->Vcb, er->address, (ULONG)(er->skip_start >> fcb->Vcb->sector_shift), NULL, NULL);
4759
4760 acquire_chunk_lock(er->chunk, fcb->Vcb);
4761
4762 if (!er->chunk->cache_loaded) {
4763 NTSTATUS Status = load_cache_chunk(fcb->Vcb, er->chunk, NULL);
4764
4765 if (!NT_SUCCESS(Status)) {
4766 ERR("load_cache_chunk returned %08lx\n", Status);
4767 release_chunk_lock(er->chunk, fcb->Vcb);
4768 goto end;
4769 }
4770 }
4771
4772 er->chunk->used -= er->skip_start;
4773
4774 space_list_add(er->chunk, er->address, er->skip_start, NULL);
4775
4776 release_chunk_lock(er->chunk, fcb->Vcb);
4777
4778 er->address += er->skip_start;
4779 er->length -= er->skip_start;
4780 }
4781
4782 if (er->skip_end > 0) {
4783 LIST_ENTRY* le2 = fcb->extents.Flink;
4784 while (le2 != &fcb->extents) {
4785 extent* ext = CONTAINING_RECORD(le2, extent, list_entry);
4786
4787 if ((ext->extent_data.type == EXTENT_TYPE_REGULAR || ext->extent_data.type == EXTENT_TYPE_PREALLOC) && ext->extent_data.compression == BTRFS_COMPRESSION_NONE && ext->unique) {
4788 EXTENT_DATA2* ed2 = (EXTENT_DATA2*)ext->extent_data.data;
4789
4790 if (ed2->size != 0 && ed2->address == er->address) {
4791 NTSTATUS Status;
4792
4793 Status = update_changed_extent_ref(fcb->Vcb, er->chunk, ed2->address, ed2->size, fcb->subvol->id, fcb->inode, ext->offset - ed2->offset,
4794 -1, fcb->inode_item.flags & BTRFS_INODE_NODATASUM, true, Irp);
4795 if (!NT_SUCCESS(Status)) {
4796 ERR("update_changed_extent_ref returned %08lx\n", Status);
4797 goto end;
4798 }
4799
4800 ext->extent_data.decoded_size -= er->skip_end;
4801 ed2->size -= er->skip_end;
4802
4803 add_changed_extent_ref(er->chunk, ed2->address, ed2->size, fcb->subvol->id, fcb->inode, ext->offset - ed2->offset,
4804 1, fcb->inode_item.flags & BTRFS_INODE_NODATASUM);
4805 }
4806 }
4807
4808 le2 = le2->Flink;
4809 }
4810
4811 if (!(fcb->inode_item.flags & BTRFS_INODE_NODATASUM))
4812 add_checksum_entry(fcb->Vcb, er->address + er->length - er->skip_end, (ULONG)(er->skip_end >> fcb->Vcb->sector_shift), NULL, NULL);
4813
4814 acquire_chunk_lock(er->chunk, fcb->Vcb);
4815
4816 if (!er->chunk->cache_loaded) {
4817 NTSTATUS Status = load_cache_chunk(fcb->Vcb, er->chunk, NULL);
4818
4819 if (!NT_SUCCESS(Status)) {
4820 ERR("load_cache_chunk returned %08lx\n", Status);
4821 release_chunk_lock(er->chunk, fcb->Vcb);
4822 goto end;
4823 }
4824 }
4825
4826 er->chunk->used -= er->skip_end;
4827
4828 space_list_add(er->chunk, er->address + er->length - er->skip_end, er->skip_end, NULL);
4829
4830 release_chunk_lock(er->chunk, fcb->Vcb);
4831
4832 er->length -= er->skip_end;
4833 }
4834
4835 le = le->Flink;
4836 }
4837 }
4838
4839 if (num_extents < 2)
4840 goto end;
4841
4842 // merge together adjacent extents
4843 le = extent_ranges.Flink;
4844 while (le != &extent_ranges) {
4845 er = CONTAINING_RECORD(le, extent_range, list_entry);
4846
4847 if (le->Flink != &extent_ranges && er->length < MAX_EXTENT_SIZE) {
4848 extent_range* er2 = CONTAINING_RECORD(le->Flink, extent_range, list_entry);
4849
4850 if (er->chunk == er2->chunk) {
4851 if (er2->address == er->address + er->length && er2->offset >= er->offset + er->length) {
4852 if (er->length + er2->length <= MAX_EXTENT_SIZE) {
4853 er->length += er2->length;
4854 er->changed = true;
4855
4856 RemoveEntryList(&er2->list_entry);
4857 ExFreePool(er2);
4858
4859 changed = true;
4860 continue;
4861 }
4862 }
4863 }
4864 }
4865
4866 le = le->Flink;
4867 }
4868
4869 if (!changed)
4870 goto end;
4871
4872 le = fcb->extents.Flink;
4873 while (le != &fcb->extents) {
4874 extent* ext = CONTAINING_RECORD(le, extent, list_entry);
4875
4876 if ((ext->extent_data.type == EXTENT_TYPE_REGULAR || ext->extent_data.type == EXTENT_TYPE_PREALLOC) && ext->extent_data.compression == BTRFS_COMPRESSION_NONE && ext->unique) {
4877 EXTENT_DATA2* ed2 = (EXTENT_DATA2*)ext->extent_data.data;
4878
4879 if (ed2->size != 0) {
4880 LIST_ENTRY* le2;
4881
4882 le2 = extent_ranges.Flink;
4883 while (le2 != &extent_ranges) {
4884 extent_range* er2 = CONTAINING_RECORD(le2, extent_range, list_entry);
4885
4886 if (ed2->address >= er2->address && ed2->address + ed2->size <= er2->address + er2->length && er2->changed) {
4887 NTSTATUS Status;
4888
4889 Status = update_changed_extent_ref(fcb->Vcb, er2->chunk, ed2->address, ed2->size, fcb->subvol->id, fcb->inode, ext->offset - ed2->offset,
4890 -1, fcb->inode_item.flags & BTRFS_INODE_NODATASUM, true, Irp);
4891 if (!NT_SUCCESS(Status)) {
4892 ERR("update_changed_extent_ref returned %08lx\n", Status);
4893 goto end;
4894 }
4895
4896 ed2->offset += ed2->address - er2->address;
4897 ed2->address = er2->address;
4898 ed2->size = er2->length;
4899 ext->extent_data.decoded_size = ed2->size;
4900
4901 add_changed_extent_ref(er2->chunk, ed2->address, ed2->size, fcb->subvol->id, fcb->inode, ext->offset - ed2->offset,
4902 1, fcb->inode_item.flags & BTRFS_INODE_NODATASUM);
4903
4904 break;
4905 }
4906
4907 le2 = le2->Flink;
4908 }
4909 }
4910 }
4911
4912 le = le->Flink;
4913 }
4914
4915 end:
4916 while (!IsListEmpty(&extent_ranges)) {
4917 le = RemoveHeadList(&extent_ranges);
4918 er = CONTAINING_RECORD(le, extent_range, list_entry);
4919
4920 ExFreePool(er);
4921 }
4922 }
4923
flush_fcb(fcb * fcb,bool cache,LIST_ENTRY * batchlist,PIRP Irp)4924 NTSTATUS flush_fcb(fcb* fcb, bool cache, LIST_ENTRY* batchlist, PIRP Irp) {
4925 traverse_ptr tp;
4926 KEY searchkey;
4927 NTSTATUS Status;
4928 INODE_ITEM* ii;
4929 uint64_t ii_offset;
4930 #ifdef DEBUG_PARANOID
4931 uint64_t old_size = 0;
4932 bool extents_changed;
4933 #endif
4934
4935 if (fcb->ads) {
4936 if (fcb->deleted) {
4937 Status = delete_xattr(fcb->Vcb, batchlist, fcb->subvol, fcb->inode, fcb->adsxattr.Buffer, fcb->adsxattr.Length, fcb->adshash);
4938 if (!NT_SUCCESS(Status)) {
4939 ERR("delete_xattr returned %08lx\n", Status);
4940 goto end;
4941 }
4942 } else {
4943 Status = set_xattr(fcb->Vcb, batchlist, fcb->subvol, fcb->inode, fcb->adsxattr.Buffer, fcb->adsxattr.Length,
4944 fcb->adshash, (uint8_t*)fcb->adsdata.Buffer, fcb->adsdata.Length);
4945 if (!NT_SUCCESS(Status)) {
4946 ERR("set_xattr returned %08lx\n", Status);
4947 goto end;
4948 }
4949 }
4950
4951 Status = STATUS_SUCCESS;
4952 goto end;
4953 }
4954
4955 if (fcb->deleted) {
4956 Status = insert_tree_item_batch(batchlist, fcb->Vcb, fcb->subvol, fcb->inode, TYPE_INODE_ITEM, 0xffffffffffffffff, NULL, 0, Batch_DeleteInode);
4957 if (!NT_SUCCESS(Status)) {
4958 ERR("insert_tree_item_batch returned %08lx\n", Status);
4959 goto end;
4960 }
4961
4962 if (fcb->marked_as_orphan) {
4963 Status = insert_tree_item_batch(batchlist, fcb->Vcb, fcb->subvol, BTRFS_ORPHAN_INODE_OBJID, TYPE_ORPHAN_INODE,
4964 fcb->inode, NULL, 0, Batch_Delete);
4965 if (!NT_SUCCESS(Status)) {
4966 ERR("insert_tree_item_batch returned %08lx\n", Status);
4967 goto end;
4968 }
4969 }
4970
4971 Status = STATUS_SUCCESS;
4972 goto end;
4973 }
4974
4975 #ifdef DEBUG_PARANOID
4976 extents_changed = fcb->extents_changed;
4977 #endif
4978
4979 if (fcb->extents_changed) {
4980 LIST_ENTRY* le;
4981 bool prealloc = false, extents_inline = false;
4982 uint64_t last_end;
4983
4984 // delete ignored extent items
4985 le = fcb->extents.Flink;
4986 while (le != &fcb->extents) {
4987 LIST_ENTRY* le2 = le->Flink;
4988 extent* ext = CONTAINING_RECORD(le, extent, list_entry);
4989
4990 if (ext->ignore) {
4991 RemoveEntryList(&ext->list_entry);
4992
4993 if (ext->csum)
4994 ExFreePool(ext->csum);
4995
4996 ExFreePool(ext);
4997 }
4998
4999 le = le2;
5000 }
5001
5002 le = fcb->extents.Flink;
5003 while (le != &fcb->extents) {
5004 extent* ext = CONTAINING_RECORD(le, extent, list_entry);
5005
5006 if (ext->inserted && ext->csum && ext->extent_data.type == EXTENT_TYPE_REGULAR) {
5007 EXTENT_DATA2* ed2 = (EXTENT_DATA2*)ext->extent_data.data;
5008
5009 if (ed2->size > 0) { // not sparse
5010 if (ext->extent_data.compression == BTRFS_COMPRESSION_NONE)
5011 add_checksum_entry(fcb->Vcb, ed2->address + ed2->offset, (ULONG)(ed2->num_bytes >> fcb->Vcb->sector_shift), ext->csum, Irp);
5012 else
5013 add_checksum_entry(fcb->Vcb, ed2->address, (ULONG)(ed2->size >> fcb->Vcb->sector_shift), ext->csum, Irp);
5014 }
5015 }
5016
5017 le = le->Flink;
5018 }
5019
5020 if (!IsListEmpty(&fcb->extents)) {
5021 rationalize_extents(fcb, Irp);
5022
5023 // merge together adjacent EXTENT_DATAs pointing to same extent
5024
5025 le = fcb->extents.Flink;
5026 while (le != &fcb->extents) {
5027 LIST_ENTRY* le2 = le->Flink;
5028 extent* ext = CONTAINING_RECORD(le, extent, list_entry);
5029
5030 if ((ext->extent_data.type == EXTENT_TYPE_REGULAR || ext->extent_data.type == EXTENT_TYPE_PREALLOC) && le->Flink != &fcb->extents) {
5031 extent* nextext = CONTAINING_RECORD(le->Flink, extent, list_entry);
5032
5033 if (ext->extent_data.type == nextext->extent_data.type) {
5034 EXTENT_DATA2* ed2 = (EXTENT_DATA2*)ext->extent_data.data;
5035 EXTENT_DATA2* ned2 = (EXTENT_DATA2*)nextext->extent_data.data;
5036
5037 if (ed2->size != 0 && ed2->address == ned2->address && ed2->size == ned2->size &&
5038 nextext->offset == ext->offset + ed2->num_bytes && ned2->offset == ed2->offset + ed2->num_bytes) {
5039 chunk* c;
5040
5041 if (ext->extent_data.compression == BTRFS_COMPRESSION_NONE && ext->csum) {
5042 ULONG len = (ULONG)((ed2->num_bytes + ned2->num_bytes) >> fcb->Vcb->sector_shift);
5043 void* csum;
5044
5045 csum = ExAllocatePoolWithTag(NonPagedPool, len * fcb->Vcb->csum_size, ALLOC_TAG);
5046 if (!csum) {
5047 ERR("out of memory\n");
5048 Status = STATUS_INSUFFICIENT_RESOURCES;
5049 goto end;
5050 }
5051
5052 RtlCopyMemory(csum, ext->csum, (ULONG)((ed2->num_bytes * fcb->Vcb->csum_size) >> fcb->Vcb->sector_shift));
5053 RtlCopyMemory((uint8_t*)csum + ((ed2->num_bytes * fcb->Vcb->csum_size) >> fcb->Vcb->sector_shift), nextext->csum,
5054 (ULONG)((ned2->num_bytes * fcb->Vcb->csum_size) >> fcb->Vcb->sector_shift));
5055
5056 ExFreePool(ext->csum);
5057 ext->csum = csum;
5058 }
5059
5060 ext->extent_data.generation = fcb->Vcb->superblock.generation;
5061 ed2->num_bytes += ned2->num_bytes;
5062
5063 RemoveEntryList(&nextext->list_entry);
5064
5065 if (nextext->csum)
5066 ExFreePool(nextext->csum);
5067
5068 ExFreePool(nextext);
5069
5070 c = get_chunk_from_address(fcb->Vcb, ed2->address);
5071
5072 if (!c) {
5073 ERR("get_chunk_from_address(%I64x) failed\n", ed2->address);
5074 } else {
5075 Status = update_changed_extent_ref(fcb->Vcb, c, ed2->address, ed2->size, fcb->subvol->id, fcb->inode, ext->offset - ed2->offset, -1,
5076 fcb->inode_item.flags & BTRFS_INODE_NODATASUM, false, Irp);
5077 if (!NT_SUCCESS(Status)) {
5078 ERR("update_changed_extent_ref returned %08lx\n", Status);
5079 goto end;
5080 }
5081 }
5082
5083 le2 = le;
5084 }
5085 }
5086 }
5087
5088 le = le2;
5089 }
5090 }
5091
5092 if (!fcb->created) {
5093 // delete existing EXTENT_DATA items
5094
5095 Status = insert_tree_item_batch(batchlist, fcb->Vcb, fcb->subvol, fcb->inode, TYPE_EXTENT_DATA, 0, NULL, 0, Batch_DeleteExtentData);
5096 if (!NT_SUCCESS(Status)) {
5097 ERR("insert_tree_item_batch returned %08lx\n", Status);
5098 goto end;
5099 }
5100 }
5101
5102 // add new EXTENT_DATAs
5103
5104 last_end = 0;
5105
5106 le = fcb->extents.Flink;
5107 while (le != &fcb->extents) {
5108 extent* ext = CONTAINING_RECORD(le, extent, list_entry);
5109 EXTENT_DATA* ed;
5110
5111 ext->inserted = false;
5112
5113 if (!(fcb->Vcb->superblock.incompat_flags & BTRFS_INCOMPAT_FLAGS_NO_HOLES) && ext->offset > last_end) {
5114 Status = insert_sparse_extent(fcb, batchlist, last_end, ext->offset - last_end);
5115 if (!NT_SUCCESS(Status)) {
5116 ERR("insert_sparse_extent returned %08lx\n", Status);
5117 goto end;
5118 }
5119 }
5120
5121 ed = ExAllocatePoolWithTag(PagedPool, ext->datalen, ALLOC_TAG);
5122 if (!ed) {
5123 ERR("out of memory\n");
5124 Status = STATUS_INSUFFICIENT_RESOURCES;
5125 goto end;
5126 }
5127
5128 RtlCopyMemory(ed, &ext->extent_data, ext->datalen);
5129
5130 Status = insert_tree_item_batch(batchlist, fcb->Vcb, fcb->subvol, fcb->inode, TYPE_EXTENT_DATA, ext->offset,
5131 ed, ext->datalen, Batch_Insert);
5132 if (!NT_SUCCESS(Status)) {
5133 ERR("insert_tree_item_batch returned %08lx\n", Status);
5134 goto end;
5135 }
5136
5137 if (ed->type == EXTENT_TYPE_PREALLOC)
5138 prealloc = true;
5139
5140 if (ed->type == EXTENT_TYPE_INLINE)
5141 extents_inline = true;
5142
5143 if (!(fcb->Vcb->superblock.incompat_flags & BTRFS_INCOMPAT_FLAGS_NO_HOLES)) {
5144 if (ed->type == EXTENT_TYPE_INLINE)
5145 last_end = ext->offset + ed->decoded_size;
5146 else {
5147 EXTENT_DATA2* ed2 = (EXTENT_DATA2*)ed->data;
5148
5149 last_end = ext->offset + ed2->num_bytes;
5150 }
5151 }
5152
5153 le = le->Flink;
5154 }
5155
5156 if (!(fcb->Vcb->superblock.incompat_flags & BTRFS_INCOMPAT_FLAGS_NO_HOLES) && !extents_inline &&
5157 sector_align(fcb->inode_item.st_size, fcb->Vcb->superblock.sector_size) > last_end) {
5158 Status = insert_sparse_extent(fcb, batchlist, last_end, sector_align(fcb->inode_item.st_size, fcb->Vcb->superblock.sector_size) - last_end);
5159 if (!NT_SUCCESS(Status)) {
5160 ERR("insert_sparse_extent returned %08lx\n", Status);
5161 goto end;
5162 }
5163 }
5164
5165 // update prealloc flag in INODE_ITEM
5166
5167 if (!prealloc)
5168 fcb->inode_item.flags &= ~BTRFS_INODE_PREALLOC;
5169 else
5170 fcb->inode_item.flags |= BTRFS_INODE_PREALLOC;
5171
5172 fcb->inode_item_changed = true;
5173
5174 fcb->extents_changed = false;
5175 }
5176
5177 if ((!fcb->created && fcb->inode_item_changed) || cache) {
5178 searchkey.obj_id = fcb->inode;
5179 searchkey.obj_type = TYPE_INODE_ITEM;
5180 searchkey.offset = 0xffffffffffffffff;
5181
5182 Status = find_item(fcb->Vcb, fcb->subvol, &tp, &searchkey, false, Irp);
5183 if (!NT_SUCCESS(Status)) {
5184 ERR("error - find_item returned %08lx\n", Status);
5185 goto end;
5186 }
5187
5188 if (tp.item->key.obj_id != searchkey.obj_id || tp.item->key.obj_type != searchkey.obj_type) {
5189 if (cache) {
5190 ii = ExAllocatePoolWithTag(PagedPool, sizeof(INODE_ITEM), ALLOC_TAG);
5191 if (!ii) {
5192 ERR("out of memory\n");
5193 Status = STATUS_INSUFFICIENT_RESOURCES;
5194 goto end;
5195 }
5196
5197 RtlCopyMemory(ii, &fcb->inode_item, sizeof(INODE_ITEM));
5198
5199 Status = insert_tree_item(fcb->Vcb, fcb->subvol, fcb->inode, TYPE_INODE_ITEM, 0, ii, sizeof(INODE_ITEM), NULL, Irp);
5200 if (!NT_SUCCESS(Status)) {
5201 ERR("insert_tree_item returned %08lx\n", Status);
5202 goto end;
5203 }
5204
5205 ii_offset = 0;
5206 } else {
5207 ERR("could not find INODE_ITEM for inode %I64x in subvol %I64x\n", fcb->inode, fcb->subvol->id);
5208 Status = STATUS_INTERNAL_ERROR;
5209 goto end;
5210 }
5211 } else {
5212 #ifdef DEBUG_PARANOID
5213 INODE_ITEM* ii2 = (INODE_ITEM*)tp.item->data;
5214
5215 old_size = ii2->st_size;
5216 #endif
5217
5218 ii_offset = tp.item->key.offset;
5219 }
5220
5221 if (!cache) {
5222 Status = delete_tree_item(fcb->Vcb, &tp);
5223 if (!NT_SUCCESS(Status)) {
5224 ERR("delete_tree_item returned %08lx\n", Status);
5225 goto end;
5226 }
5227 } else {
5228 searchkey.obj_id = fcb->inode;
5229 searchkey.obj_type = TYPE_INODE_ITEM;
5230 searchkey.offset = ii_offset;
5231
5232 Status = find_item(fcb->Vcb, fcb->subvol, &tp, &searchkey, false, Irp);
5233 if (!NT_SUCCESS(Status)) {
5234 ERR("error - find_item returned %08lx\n", Status);
5235 goto end;
5236 }
5237
5238 if (keycmp(tp.item->key, searchkey)) {
5239 ERR("could not find INODE_ITEM for inode %I64x in subvol %I64x\n", fcb->inode, fcb->subvol->id);
5240 Status = STATUS_INTERNAL_ERROR;
5241 goto end;
5242 } else
5243 RtlCopyMemory(tp.item->data, &fcb->inode_item, min(tp.item->size, sizeof(INODE_ITEM)));
5244 }
5245
5246 #ifdef DEBUG_PARANOID
5247 if (!extents_changed && fcb->type != BTRFS_TYPE_DIRECTORY && old_size != fcb->inode_item.st_size) {
5248 ERR("error - size has changed but extents not marked as changed\n");
5249 int3;
5250 }
5251 #endif
5252 } else
5253 ii_offset = 0;
5254
5255 fcb->created = false;
5256
5257 if (!cache && fcb->inode_item_changed) {
5258 ii = ExAllocatePoolWithTag(PagedPool, sizeof(INODE_ITEM), ALLOC_TAG);
5259 if (!ii) {
5260 ERR("out of memory\n");
5261 Status = STATUS_INSUFFICIENT_RESOURCES;
5262 goto end;
5263 }
5264
5265 RtlCopyMemory(ii, &fcb->inode_item, sizeof(INODE_ITEM));
5266
5267 Status = insert_tree_item_batch(batchlist, fcb->Vcb, fcb->subvol, fcb->inode, TYPE_INODE_ITEM, ii_offset, ii, sizeof(INODE_ITEM),
5268 Batch_Insert);
5269 if (!NT_SUCCESS(Status)) {
5270 ERR("insert_tree_item_batch returned %08lx\n", Status);
5271 goto end;
5272 }
5273
5274 fcb->inode_item_changed = false;
5275 }
5276
5277 if (fcb->sd_dirty) {
5278 if (!fcb->sd_deleted) {
5279 Status = set_xattr(fcb->Vcb, batchlist, fcb->subvol, fcb->inode, EA_NTACL, sizeof(EA_NTACL) - 1,
5280 EA_NTACL_HASH, (uint8_t*)fcb->sd, (uint16_t)RtlLengthSecurityDescriptor(fcb->sd));
5281 if (!NT_SUCCESS(Status)) {
5282 ERR("set_xattr returned %08lx\n", Status);
5283 goto end;
5284 }
5285 } else {
5286 Status = delete_xattr(fcb->Vcb, batchlist, fcb->subvol, fcb->inode, EA_NTACL, sizeof(EA_NTACL) - 1, EA_NTACL_HASH);
5287 if (!NT_SUCCESS(Status)) {
5288 ERR("delete_xattr returned %08lx\n", Status);
5289 goto end;
5290 }
5291 }
5292
5293 fcb->sd_deleted = false;
5294 fcb->sd_dirty = false;
5295 }
5296
5297 if (fcb->atts_changed) {
5298 if (!fcb->atts_deleted) {
5299 uint8_t val[16], *val2;
5300 ULONG atts = fcb->atts;
5301
5302 TRACE("inserting new DOSATTRIB xattr\n");
5303
5304 if (fcb->inode == SUBVOL_ROOT_INODE)
5305 atts &= ~FILE_ATTRIBUTE_READONLY;
5306
5307 val2 = &val[sizeof(val) - 1];
5308
5309 do {
5310 uint8_t c = atts % 16;
5311 *val2 = c <= 9 ? (c + '0') : (c - 0xa + 'a');
5312
5313 val2--;
5314 atts >>= 4;
5315 } while (atts != 0);
5316
5317 *val2 = 'x';
5318 val2--;
5319 *val2 = '0';
5320
5321 Status = set_xattr(fcb->Vcb, batchlist, fcb->subvol, fcb->inode, EA_DOSATTRIB, sizeof(EA_DOSATTRIB) - 1,
5322 EA_DOSATTRIB_HASH, val2, (uint16_t)(val + sizeof(val) - val2));
5323 if (!NT_SUCCESS(Status)) {
5324 ERR("set_xattr returned %08lx\n", Status);
5325 goto end;
5326 }
5327 } else {
5328 Status = delete_xattr(fcb->Vcb, batchlist, fcb->subvol, fcb->inode, EA_DOSATTRIB, sizeof(EA_DOSATTRIB) - 1, EA_DOSATTRIB_HASH);
5329 if (!NT_SUCCESS(Status)) {
5330 ERR("delete_xattr returned %08lx\n", Status);
5331 goto end;
5332 }
5333 }
5334
5335 fcb->atts_changed = false;
5336 fcb->atts_deleted = false;
5337 }
5338
5339 if (fcb->reparse_xattr_changed) {
5340 if (fcb->reparse_xattr.Buffer && fcb->reparse_xattr.Length > 0) {
5341 Status = set_xattr(fcb->Vcb, batchlist, fcb->subvol, fcb->inode, EA_REPARSE, sizeof(EA_REPARSE) - 1,
5342 EA_REPARSE_HASH, (uint8_t*)fcb->reparse_xattr.Buffer, (uint16_t)fcb->reparse_xattr.Length);
5343 if (!NT_SUCCESS(Status)) {
5344 ERR("set_xattr returned %08lx\n", Status);
5345 goto end;
5346 }
5347 } else {
5348 Status = delete_xattr(fcb->Vcb, batchlist, fcb->subvol, fcb->inode, EA_REPARSE, sizeof(EA_REPARSE) - 1, EA_REPARSE_HASH);
5349 if (!NT_SUCCESS(Status)) {
5350 ERR("delete_xattr returned %08lx\n", Status);
5351 goto end;
5352 }
5353 }
5354
5355 fcb->reparse_xattr_changed = false;
5356 }
5357
5358 if (fcb->ea_changed) {
5359 if (fcb->ea_xattr.Buffer && fcb->ea_xattr.Length > 0) {
5360 Status = set_xattr(fcb->Vcb, batchlist, fcb->subvol, fcb->inode, EA_EA, sizeof(EA_EA) - 1,
5361 EA_EA_HASH, (uint8_t*)fcb->ea_xattr.Buffer, (uint16_t)fcb->ea_xattr.Length);
5362 if (!NT_SUCCESS(Status)) {
5363 ERR("set_xattr returned %08lx\n", Status);
5364 goto end;
5365 }
5366 } else {
5367 Status = delete_xattr(fcb->Vcb, batchlist, fcb->subvol, fcb->inode, EA_EA, sizeof(EA_EA) - 1, EA_EA_HASH);
5368 if (!NT_SUCCESS(Status)) {
5369 ERR("delete_xattr returned %08lx\n", Status);
5370 goto end;
5371 }
5372 }
5373
5374 fcb->ea_changed = false;
5375 }
5376
5377 if (fcb->prop_compression_changed) {
5378 if (fcb->prop_compression == PropCompression_None) {
5379 Status = delete_xattr(fcb->Vcb, batchlist, fcb->subvol, fcb->inode, EA_PROP_COMPRESSION, sizeof(EA_PROP_COMPRESSION) - 1, EA_PROP_COMPRESSION_HASH);
5380 if (!NT_SUCCESS(Status)) {
5381 ERR("delete_xattr returned %08lx\n", Status);
5382 goto end;
5383 }
5384 } else if (fcb->prop_compression == PropCompression_Zlib) {
5385 static const char zlib[] = "zlib";
5386
5387 Status = set_xattr(fcb->Vcb, batchlist, fcb->subvol, fcb->inode, EA_PROP_COMPRESSION, sizeof(EA_PROP_COMPRESSION) - 1,
5388 EA_PROP_COMPRESSION_HASH, (uint8_t*)zlib, sizeof(zlib) - 1);
5389 if (!NT_SUCCESS(Status)) {
5390 ERR("set_xattr returned %08lx\n", Status);
5391 goto end;
5392 }
5393 } else if (fcb->prop_compression == PropCompression_LZO) {
5394 static const char lzo[] = "lzo";
5395
5396 Status = set_xattr(fcb->Vcb, batchlist, fcb->subvol, fcb->inode, EA_PROP_COMPRESSION, sizeof(EA_PROP_COMPRESSION) - 1,
5397 EA_PROP_COMPRESSION_HASH, (uint8_t*)lzo, sizeof(lzo) - 1);
5398 if (!NT_SUCCESS(Status)) {
5399 ERR("set_xattr returned %08lx\n", Status);
5400 goto end;
5401 }
5402 } else if (fcb->prop_compression == PropCompression_ZSTD) {
5403 static const char zstd[] = "zstd";
5404
5405 Status = set_xattr(fcb->Vcb, batchlist, fcb->subvol, fcb->inode, EA_PROP_COMPRESSION, sizeof(EA_PROP_COMPRESSION) - 1,
5406 EA_PROP_COMPRESSION_HASH, (uint8_t*)zstd, sizeof(zstd) - 1);
5407 if (!NT_SUCCESS(Status)) {
5408 ERR("set_xattr returned %08lx\n", Status);
5409 goto end;
5410 }
5411 }
5412
5413 fcb->prop_compression_changed = false;
5414 }
5415
5416 if (fcb->xattrs_changed) {
5417 LIST_ENTRY* le;
5418
5419 le = fcb->xattrs.Flink;
5420 while (le != &fcb->xattrs) {
5421 xattr* xa = CONTAINING_RECORD(le, xattr, list_entry);
5422 LIST_ENTRY* le2 = le->Flink;
5423
5424 if (xa->dirty) {
5425 uint32_t hash = calc_crc32c(0xfffffffe, (uint8_t*)xa->data, xa->namelen);
5426
5427 if (xa->valuelen == 0) {
5428 Status = delete_xattr(fcb->Vcb, batchlist, fcb->subvol, fcb->inode, xa->data, xa->namelen, hash);
5429 if (!NT_SUCCESS(Status)) {
5430 ERR("delete_xattr returned %08lx\n", Status);
5431 goto end;
5432 }
5433
5434 RemoveEntryList(&xa->list_entry);
5435 ExFreePool(xa);
5436 } else {
5437 Status = set_xattr(fcb->Vcb, batchlist, fcb->subvol, fcb->inode, xa->data, xa->namelen,
5438 hash, (uint8_t*)&xa->data[xa->namelen], xa->valuelen);
5439 if (!NT_SUCCESS(Status)) {
5440 ERR("set_xattr returned %08lx\n", Status);
5441 goto end;
5442 }
5443
5444 xa->dirty = false;
5445 }
5446 }
5447
5448 le = le2;
5449 }
5450
5451 fcb->xattrs_changed = false;
5452 }
5453
5454 if ((fcb->case_sensitive_set && !fcb->case_sensitive)) {
5455 Status = delete_xattr(fcb->Vcb, batchlist, fcb->subvol, fcb->inode, EA_CASE_SENSITIVE,
5456 sizeof(EA_CASE_SENSITIVE) - 1, EA_CASE_SENSITIVE_HASH);
5457 if (!NT_SUCCESS(Status)) {
5458 ERR("delete_xattr returned %08lx\n", Status);
5459 goto end;
5460 }
5461
5462 fcb->case_sensitive_set = false;
5463 } else if ((!fcb->case_sensitive_set && fcb->case_sensitive)) {
5464 Status = set_xattr(fcb->Vcb, batchlist, fcb->subvol, fcb->inode, EA_CASE_SENSITIVE,
5465 sizeof(EA_CASE_SENSITIVE) - 1, EA_CASE_SENSITIVE_HASH, (uint8_t*)"1", 1);
5466 if (!NT_SUCCESS(Status)) {
5467 ERR("set_xattr returned %08lx\n", Status);
5468 goto end;
5469 }
5470
5471 fcb->case_sensitive_set = true;
5472 }
5473
5474 if (fcb->inode_item.st_nlink == 0 && !fcb->marked_as_orphan) { // mark as orphan
5475 Status = insert_tree_item_batch(batchlist, fcb->Vcb, fcb->subvol, BTRFS_ORPHAN_INODE_OBJID, TYPE_ORPHAN_INODE,
5476 fcb->inode, NULL, 0, Batch_Insert);
5477 if (!NT_SUCCESS(Status)) {
5478 ERR("insert_tree_item_batch returned %08lx\n", Status);
5479 goto end;
5480 }
5481
5482 fcb->marked_as_orphan = true;
5483 }
5484
5485 Status = STATUS_SUCCESS;
5486
5487 end:
5488 if (fcb->dirty) {
5489 bool lock = false;
5490
5491 fcb->dirty = false;
5492
5493 if (!ExIsResourceAcquiredExclusiveLite(&fcb->Vcb->dirty_fcbs_lock)) {
5494 ExAcquireResourceExclusiveLite(&fcb->Vcb->dirty_fcbs_lock, true);
5495 lock = true;
5496 }
5497
5498 RemoveEntryList(&fcb->list_entry_dirty);
5499
5500 if (lock)
5501 ExReleaseResourceLite(&fcb->Vcb->dirty_fcbs_lock);
5502 }
5503
5504 return Status;
5505 }
5506
add_trim_entry_avoid_sb(device_extension * Vcb,device * dev,uint64_t address,uint64_t size)5507 void add_trim_entry_avoid_sb(device_extension* Vcb, device* dev, uint64_t address, uint64_t size) {
5508 int i;
5509 ULONG sblen = (ULONG)sector_align(sizeof(superblock), Vcb->superblock.sector_size);
5510
5511 i = 0;
5512 while (superblock_addrs[i] != 0) {
5513 if (superblock_addrs[i] + sblen >= address && superblock_addrs[i] < address + size) {
5514 if (superblock_addrs[i] > address)
5515 add_trim_entry(dev, address, superblock_addrs[i] - address);
5516
5517 if (size <= superblock_addrs[i] + sblen - address)
5518 return;
5519
5520 size -= superblock_addrs[i] + sblen - address;
5521 address = superblock_addrs[i] + sblen;
5522 } else if (superblock_addrs[i] > address + size)
5523 break;
5524
5525 i++;
5526 }
5527
5528 add_trim_entry(dev, address, size);
5529 }
5530
drop_chunk(device_extension * Vcb,chunk * c,LIST_ENTRY * batchlist,PIRP Irp,LIST_ENTRY * rollback)5531 static NTSTATUS drop_chunk(device_extension* Vcb, chunk* c, LIST_ENTRY* batchlist, PIRP Irp, LIST_ENTRY* rollback) {
5532 NTSTATUS Status;
5533 KEY searchkey;
5534 traverse_ptr tp;
5535 uint64_t i, factor;
5536 CHUNK_ITEM_STRIPE* cis = (CHUNK_ITEM_STRIPE*)&c->chunk_item[1];;
5537
5538 TRACE("dropping chunk %I64x\n", c->offset);
5539
5540 if (c->chunk_item->type & BLOCK_FLAG_RAID0)
5541 factor = c->chunk_item->num_stripes;
5542 else if (c->chunk_item->type & BLOCK_FLAG_RAID10)
5543 factor = c->chunk_item->num_stripes / c->chunk_item->sub_stripes;
5544 else if (c->chunk_item->type & BLOCK_FLAG_RAID5)
5545 factor = c->chunk_item->num_stripes - 1;
5546 else if (c->chunk_item->type & BLOCK_FLAG_RAID6)
5547 factor = c->chunk_item->num_stripes - 2;
5548 else // SINGLE, DUPLICATE, RAID1, RAID1C3, RAID1C4
5549 factor = 1;
5550
5551 // do TRIM
5552 if (Vcb->trim && !Vcb->options.no_trim) {
5553 uint64_t len = c->chunk_item->size / factor;
5554
5555 for (i = 0; i < c->chunk_item->num_stripes; i++) {
5556 if (c->devices[i] && c->devices[i]->devobj && !c->devices[i]->readonly && c->devices[i]->trim)
5557 add_trim_entry_avoid_sb(Vcb, c->devices[i], cis[i].offset, len);
5558 }
5559 }
5560
5561 if (!c->cache) {
5562 Status = load_stored_free_space_cache(Vcb, c, true, Irp);
5563
5564 if (!NT_SUCCESS(Status) && Status != STATUS_NOT_FOUND)
5565 WARN("load_stored_free_space_cache returned %08lx\n", Status);
5566 }
5567
5568 // remove free space cache
5569 if (c->cache) {
5570 c->cache->deleted = true;
5571
5572 Status = excise_extents(Vcb, c->cache, 0, c->cache->inode_item.st_size, Irp, rollback);
5573 if (!NT_SUCCESS(Status)) {
5574 ERR("excise_extents returned %08lx\n", Status);
5575 return Status;
5576 }
5577
5578 Status = flush_fcb(c->cache, true, batchlist, Irp);
5579
5580 free_fcb(c->cache);
5581
5582 if (c->cache->refcount == 0)
5583 reap_fcb(c->cache);
5584
5585 if (!NT_SUCCESS(Status)) {
5586 ERR("flush_fcb returned %08lx\n", Status);
5587 return Status;
5588 }
5589
5590 searchkey.obj_id = FREE_SPACE_CACHE_ID;
5591 searchkey.obj_type = 0;
5592 searchkey.offset = c->offset;
5593
5594 Status = find_item(Vcb, Vcb->root_root, &tp, &searchkey, false, Irp);
5595 if (!NT_SUCCESS(Status)) {
5596 ERR("error - find_item returned %08lx\n", Status);
5597 return Status;
5598 }
5599
5600 if (!keycmp(tp.item->key, searchkey)) {
5601 Status = delete_tree_item(Vcb, &tp);
5602 if (!NT_SUCCESS(Status)) {
5603 ERR("delete_tree_item returned %08lx\n", Status);
5604 return Status;
5605 }
5606 }
5607 }
5608
5609 if (Vcb->space_root) {
5610 Status = insert_tree_item_batch(batchlist, Vcb, Vcb->space_root, c->offset, TYPE_FREE_SPACE_INFO, c->chunk_item->size,
5611 NULL, 0, Batch_DeleteFreeSpace);
5612 if (!NT_SUCCESS(Status)) {
5613 ERR("insert_tree_item_batch returned %08lx\n", Status);
5614 return Status;
5615 }
5616 }
5617
5618 for (i = 0; i < c->chunk_item->num_stripes; i++) {
5619 if (!c->created) {
5620 // remove DEV_EXTENTs from tree 4
5621 searchkey.obj_id = cis[i].dev_id;
5622 searchkey.obj_type = TYPE_DEV_EXTENT;
5623 searchkey.offset = cis[i].offset;
5624
5625 Status = find_item(Vcb, Vcb->dev_root, &tp, &searchkey, false, Irp);
5626 if (!NT_SUCCESS(Status)) {
5627 ERR("error - find_item returned %08lx\n", Status);
5628 return Status;
5629 }
5630
5631 if (!keycmp(tp.item->key, searchkey)) {
5632 Status = delete_tree_item(Vcb, &tp);
5633 if (!NT_SUCCESS(Status)) {
5634 ERR("delete_tree_item returned %08lx\n", Status);
5635 return Status;
5636 }
5637
5638 if (tp.item->size >= sizeof(DEV_EXTENT)) {
5639 DEV_EXTENT* de = (DEV_EXTENT*)tp.item->data;
5640
5641 c->devices[i]->devitem.bytes_used -= de->length;
5642
5643 if (Vcb->balance.thread && Vcb->balance.shrinking && Vcb->balance.opts[0].devid == c->devices[i]->devitem.dev_id) {
5644 if (cis[i].offset < Vcb->balance.opts[0].drange_start && cis[i].offset + de->length > Vcb->balance.opts[0].drange_start)
5645 space_list_add2(&c->devices[i]->space, NULL, cis[i].offset, Vcb->balance.opts[0].drange_start - cis[i].offset, NULL, rollback);
5646 } else
5647 space_list_add2(&c->devices[i]->space, NULL, cis[i].offset, de->length, NULL, rollback);
5648 }
5649 } else
5650 WARN("could not find (%I64x,%x,%I64x) in dev tree\n", searchkey.obj_id, searchkey.obj_type, searchkey.offset);
5651 } else {
5652 uint64_t len = c->chunk_item->size / factor;
5653
5654 c->devices[i]->devitem.bytes_used -= len;
5655
5656 if (Vcb->balance.thread && Vcb->balance.shrinking && Vcb->balance.opts[0].devid == c->devices[i]->devitem.dev_id) {
5657 if (cis[i].offset < Vcb->balance.opts[0].drange_start && cis[i].offset + len > Vcb->balance.opts[0].drange_start)
5658 space_list_add2(&c->devices[i]->space, NULL, cis[i].offset, Vcb->balance.opts[0].drange_start - cis[i].offset, NULL, rollback);
5659 } else
5660 space_list_add2(&c->devices[i]->space, NULL, cis[i].offset, len, NULL, rollback);
5661 }
5662 }
5663
5664 // modify DEV_ITEMs in chunk tree
5665 for (i = 0; i < c->chunk_item->num_stripes; i++) {
5666 if (c->devices[i]) {
5667 uint64_t j;
5668 DEV_ITEM* di;
5669
5670 searchkey.obj_id = 1;
5671 searchkey.obj_type = TYPE_DEV_ITEM;
5672 searchkey.offset = c->devices[i]->devitem.dev_id;
5673
5674 Status = find_item(Vcb, Vcb->chunk_root, &tp, &searchkey, false, Irp);
5675 if (!NT_SUCCESS(Status)) {
5676 ERR("error - find_item returned %08lx\n", Status);
5677 return Status;
5678 }
5679
5680 if (!keycmp(tp.item->key, searchkey)) {
5681 Status = delete_tree_item(Vcb, &tp);
5682 if (!NT_SUCCESS(Status)) {
5683 ERR("delete_tree_item returned %08lx\n", Status);
5684 return Status;
5685 }
5686
5687 di = ExAllocatePoolWithTag(PagedPool, sizeof(DEV_ITEM), ALLOC_TAG);
5688 if (!di) {
5689 ERR("out of memory\n");
5690 return STATUS_INSUFFICIENT_RESOURCES;
5691 }
5692
5693 RtlCopyMemory(di, &c->devices[i]->devitem, sizeof(DEV_ITEM));
5694
5695 Status = insert_tree_item(Vcb, Vcb->chunk_root, 1, TYPE_DEV_ITEM, c->devices[i]->devitem.dev_id, di, sizeof(DEV_ITEM), NULL, Irp);
5696 if (!NT_SUCCESS(Status)) {
5697 ERR("insert_tree_item returned %08lx\n", Status);
5698 return Status;
5699 }
5700 }
5701
5702 for (j = i + 1; j < c->chunk_item->num_stripes; j++) {
5703 if (c->devices[j] == c->devices[i])
5704 c->devices[j] = NULL;
5705 }
5706 }
5707 }
5708
5709 if (!c->created) {
5710 // remove CHUNK_ITEM from chunk tree
5711 searchkey.obj_id = 0x100;
5712 searchkey.obj_type = TYPE_CHUNK_ITEM;
5713 searchkey.offset = c->offset;
5714
5715 Status = find_item(Vcb, Vcb->chunk_root, &tp, &searchkey, false, Irp);
5716 if (!NT_SUCCESS(Status)) {
5717 ERR("error - find_item returned %08lx\n", Status);
5718 return Status;
5719 }
5720
5721 if (!keycmp(tp.item->key, searchkey)) {
5722 Status = delete_tree_item(Vcb, &tp);
5723
5724 if (!NT_SUCCESS(Status)) {
5725 ERR("delete_tree_item returned %08lx\n", Status);
5726 return Status;
5727 }
5728 } else
5729 WARN("could not find CHUNK_ITEM for chunk %I64x\n", c->offset);
5730
5731 // remove BLOCK_GROUP_ITEM from extent tree
5732 searchkey.obj_id = c->offset;
5733 searchkey.obj_type = TYPE_BLOCK_GROUP_ITEM;
5734 searchkey.offset = 0xffffffffffffffff;
5735
5736 Status = find_item(Vcb, Vcb->extent_root, &tp, &searchkey, false, Irp);
5737 if (!NT_SUCCESS(Status)) {
5738 ERR("error - find_item returned %08lx\n", Status);
5739 return Status;
5740 }
5741
5742 if (tp.item->key.obj_id == searchkey.obj_id && tp.item->key.obj_type == searchkey.obj_type) {
5743 Status = delete_tree_item(Vcb, &tp);
5744
5745 if (!NT_SUCCESS(Status)) {
5746 ERR("delete_tree_item returned %08lx\n", Status);
5747 return Status;
5748 }
5749 } else
5750 WARN("could not find BLOCK_GROUP_ITEM for chunk %I64x\n", c->offset);
5751 }
5752
5753 if (c->chunk_item->type & BLOCK_FLAG_SYSTEM)
5754 remove_from_bootstrap(Vcb, 0x100, TYPE_CHUNK_ITEM, c->offset);
5755
5756 RemoveEntryList(&c->list_entry);
5757
5758 // clear raid56 incompat flag if dropping last RAID5/6 chunk
5759
5760 if (c->chunk_item->type & BLOCK_FLAG_RAID5 || c->chunk_item->type & BLOCK_FLAG_RAID6) {
5761 LIST_ENTRY* le;
5762 bool clear_flag = true;
5763
5764 le = Vcb->chunks.Flink;
5765 while (le != &Vcb->chunks) {
5766 chunk* c2 = CONTAINING_RECORD(le, chunk, list_entry);
5767
5768 if (c2->chunk_item->type & BLOCK_FLAG_RAID5 || c2->chunk_item->type & BLOCK_FLAG_RAID6) {
5769 clear_flag = false;
5770 break;
5771 }
5772
5773 le = le->Flink;
5774 }
5775
5776 if (clear_flag)
5777 Vcb->superblock.incompat_flags &= ~BTRFS_INCOMPAT_FLAGS_RAID56;
5778 }
5779
5780 // clear raid1c34 incompat flag if dropping last RAID5/6 chunk
5781
5782 if (c->chunk_item->type & BLOCK_FLAG_RAID1C3 || c->chunk_item->type & BLOCK_FLAG_RAID1C4) {
5783 LIST_ENTRY* le;
5784 bool clear_flag = true;
5785
5786 le = Vcb->chunks.Flink;
5787 while (le != &Vcb->chunks) {
5788 chunk* c2 = CONTAINING_RECORD(le, chunk, list_entry);
5789
5790 if (c2->chunk_item->type & BLOCK_FLAG_RAID1C3 || c2->chunk_item->type & BLOCK_FLAG_RAID1C4) {
5791 clear_flag = false;
5792 break;
5793 }
5794
5795 le = le->Flink;
5796 }
5797
5798 if (clear_flag)
5799 Vcb->superblock.incompat_flags &= ~BTRFS_INCOMPAT_FLAGS_RAID1C34;
5800 }
5801
5802 Vcb->superblock.bytes_used -= c->oldused;
5803
5804 ExFreePool(c->chunk_item);
5805 ExFreePool(c->devices);
5806
5807 while (!IsListEmpty(&c->space)) {
5808 space* s = CONTAINING_RECORD(c->space.Flink, space, list_entry);
5809
5810 RemoveEntryList(&s->list_entry);
5811 ExFreePool(s);
5812 }
5813
5814 while (!IsListEmpty(&c->deleting)) {
5815 space* s = CONTAINING_RECORD(c->deleting.Flink, space, list_entry);
5816
5817 RemoveEntryList(&s->list_entry);
5818 ExFreePool(s);
5819 }
5820
5821 release_chunk_lock(c, Vcb);
5822
5823 ExDeleteResourceLite(&c->partial_stripes_lock);
5824 ExDeleteResourceLite(&c->range_locks_lock);
5825 ExDeleteResourceLite(&c->lock);
5826 ExDeleteResourceLite(&c->changed_extents_lock);
5827
5828 ExFreePool(c);
5829
5830 return STATUS_SUCCESS;
5831 }
5832
partial_stripe_read(device_extension * Vcb,chunk * c,partial_stripe * ps,uint64_t startoff,uint16_t parity,ULONG offset,ULONG len)5833 static NTSTATUS partial_stripe_read(device_extension* Vcb, chunk* c, partial_stripe* ps, uint64_t startoff, uint16_t parity, ULONG offset, ULONG len) {
5834 NTSTATUS Status;
5835 ULONG sl = (ULONG)(c->chunk_item->stripe_length >> Vcb->sector_shift);
5836 CHUNK_ITEM_STRIPE* cis = (CHUNK_ITEM_STRIPE*)&c->chunk_item[1];
5837
5838 while (len > 0) {
5839 ULONG readlen = min(offset + len, offset + (sl - (offset % sl))) - offset;
5840 uint16_t stripe;
5841
5842 stripe = (parity + (offset / sl) + 1) % c->chunk_item->num_stripes;
5843
5844 if (c->devices[stripe]->devobj) {
5845 Status = sync_read_phys(c->devices[stripe]->devobj, c->devices[stripe]->fileobj, cis[stripe].offset + startoff + ((offset % sl) << Vcb->sector_shift),
5846 readlen << Vcb->sector_shift, ps->data + (offset << Vcb->sector_shift), false);
5847 if (!NT_SUCCESS(Status)) {
5848 ERR("sync_read_phys returned %08lx\n", Status);
5849 return Status;
5850 }
5851 } else if (c->chunk_item->type & BLOCK_FLAG_RAID5) {
5852 uint16_t i;
5853 uint8_t* scratch;
5854
5855 scratch = ExAllocatePoolWithTag(NonPagedPool, readlen << Vcb->sector_shift, ALLOC_TAG);
5856 if (!scratch) {
5857 ERR("out of memory\n");
5858 return STATUS_INSUFFICIENT_RESOURCES;
5859 }
5860
5861 for (i = 0; i < c->chunk_item->num_stripes; i++) {
5862 if (i != stripe) {
5863 if (!c->devices[i]->devobj) {
5864 ExFreePool(scratch);
5865 return STATUS_UNEXPECTED_IO_ERROR;
5866 }
5867
5868 if (i == 0 || (stripe == 0 && i == 1)) {
5869 Status = sync_read_phys(c->devices[i]->devobj, c->devices[i]->fileobj, cis[i].offset + startoff + ((offset % sl) << Vcb->sector_shift),
5870 readlen << Vcb->sector_shift, ps->data + (offset << Vcb->sector_shift), false);
5871 if (!NT_SUCCESS(Status)) {
5872 ERR("sync_read_phys returned %08lx\n", Status);
5873 ExFreePool(scratch);
5874 return Status;
5875 }
5876 } else {
5877 Status = sync_read_phys(c->devices[i]->devobj, c->devices[i]->fileobj, cis[i].offset + startoff + ((offset % sl) << Vcb->sector_shift),
5878 readlen << Vcb->sector_shift, scratch, false);
5879 if (!NT_SUCCESS(Status)) {
5880 ERR("sync_read_phys returned %08lx\n", Status);
5881 ExFreePool(scratch);
5882 return Status;
5883 }
5884
5885 do_xor(ps->data + (offset << Vcb->sector_shift), scratch, readlen << Vcb->sector_shift);
5886 }
5887 }
5888 }
5889
5890 ExFreePool(scratch);
5891 } else {
5892 uint8_t* scratch;
5893 uint16_t k, i, logstripe, error_stripe, num_errors = 0;
5894
5895 scratch = ExAllocatePoolWithTag(NonPagedPool, (c->chunk_item->num_stripes + 2) * readlen << Vcb->sector_shift, ALLOC_TAG);
5896 if (!scratch) {
5897 ERR("out of memory\n");
5898 return STATUS_INSUFFICIENT_RESOURCES;
5899 }
5900
5901 i = (parity + 1) % c->chunk_item->num_stripes;
5902 logstripe = (c->chunk_item->num_stripes + c->chunk_item->num_stripes - 1 - parity + stripe) % c->chunk_item->num_stripes;
5903
5904 for (k = 0; k < c->chunk_item->num_stripes; k++) {
5905 if (i != stripe) {
5906 if (c->devices[i]->devobj) {
5907 Status = sync_read_phys(c->devices[i]->devobj, c->devices[i]->fileobj, cis[i].offset + startoff + ((offset % sl) << Vcb->sector_shift),
5908 readlen << Vcb->sector_shift, scratch + (k * readlen << Vcb->sector_shift), false);
5909 if (!NT_SUCCESS(Status)) {
5910 ERR("sync_read_phys returned %08lx\n", Status);
5911 num_errors++;
5912 error_stripe = k;
5913 }
5914 } else {
5915 num_errors++;
5916 error_stripe = k;
5917 }
5918
5919 if (num_errors > 1) {
5920 ExFreePool(scratch);
5921 return STATUS_UNEXPECTED_IO_ERROR;
5922 }
5923 }
5924
5925 i = (i + 1) % c->chunk_item->num_stripes;
5926 }
5927
5928 if (num_errors == 0 || error_stripe == c->chunk_item->num_stripes - 1) {
5929 for (k = 0; k < c->chunk_item->num_stripes - 1; k++) {
5930 if (k != logstripe) {
5931 if (k == 0 || (k == 1 && logstripe == 0)) {
5932 RtlCopyMemory(ps->data + (offset << Vcb->sector_shift), scratch + (k * readlen << Vcb->sector_shift),
5933 readlen << Vcb->sector_shift);
5934 } else {
5935 do_xor(ps->data + (offset << Vcb->sector_shift), scratch + (k * readlen << Vcb->sector_shift),
5936 readlen << Vcb->sector_shift);
5937 }
5938 }
5939 }
5940 } else {
5941 raid6_recover2(scratch, c->chunk_item->num_stripes, readlen << Vcb->sector_shift, logstripe,
5942 error_stripe, scratch + (c->chunk_item->num_stripes * readlen << Vcb->sector_shift));
5943
5944 RtlCopyMemory(ps->data + (offset << Vcb->sector_shift), scratch + (c->chunk_item->num_stripes * readlen << Vcb->sector_shift),
5945 readlen << Vcb->sector_shift);
5946 }
5947
5948 ExFreePool(scratch);
5949 }
5950
5951 offset += readlen;
5952 len -= readlen;
5953 }
5954
5955 return STATUS_SUCCESS;
5956 }
5957
flush_partial_stripe(device_extension * Vcb,chunk * c,partial_stripe * ps)5958 NTSTATUS flush_partial_stripe(device_extension* Vcb, chunk* c, partial_stripe* ps) {
5959 NTSTATUS Status;
5960 uint16_t parity2, stripe, startoffstripe;
5961 uint8_t* data;
5962 uint64_t startoff;
5963 ULONG runlength, index, last1;
5964 CHUNK_ITEM_STRIPE* cis = (CHUNK_ITEM_STRIPE*)&c->chunk_item[1];
5965 LIST_ENTRY* le;
5966 uint16_t k, num_data_stripes = c->chunk_item->num_stripes - (c->chunk_item->type & BLOCK_FLAG_RAID5 ? 1 : 2);
5967 uint64_t ps_length = num_data_stripes * c->chunk_item->stripe_length;
5968 ULONG stripe_length = (ULONG)c->chunk_item->stripe_length;
5969
5970 // FIXME - do writes asynchronously?
5971
5972 get_raid0_offset(ps->address - c->offset, stripe_length, num_data_stripes, &startoff, &startoffstripe);
5973
5974 parity2 = (((ps->address - c->offset) / ps_length) + c->chunk_item->num_stripes - 1) % c->chunk_item->num_stripes;
5975
5976 // read data (or reconstruct if degraded)
5977
5978 runlength = RtlFindFirstRunClear(&ps->bmp, &index);
5979 last1 = 0;
5980
5981 while (runlength != 0) {
5982 if (index >= ps->bmplen)
5983 break;
5984
5985 if (index + runlength >= ps->bmplen) {
5986 runlength = ps->bmplen - index;
5987
5988 if (runlength == 0)
5989 break;
5990 }
5991
5992 if (index > last1) {
5993 Status = partial_stripe_read(Vcb, c, ps, startoff, parity2, last1, index - last1);
5994 if (!NT_SUCCESS(Status)) {
5995 ERR("partial_stripe_read returned %08lx\n", Status);
5996 return Status;
5997 }
5998 }
5999
6000 last1 = index + runlength;
6001
6002 runlength = RtlFindNextForwardRunClear(&ps->bmp, index + runlength, &index);
6003 }
6004
6005 if (last1 < ps_length >> Vcb->sector_shift) {
6006 Status = partial_stripe_read(Vcb, c, ps, startoff, parity2, last1, (ULONG)((ps_length >> Vcb->sector_shift) - last1));
6007 if (!NT_SUCCESS(Status)) {
6008 ERR("partial_stripe_read returned %08lx\n", Status);
6009 return Status;
6010 }
6011 }
6012
6013 // set unallocated data to 0
6014 le = c->space.Flink;
6015 while (le != &c->space) {
6016 space* s = CONTAINING_RECORD(le, space, list_entry);
6017
6018 if (s->address + s->size > ps->address && s->address < ps->address + ps_length) {
6019 uint64_t start = max(ps->address, s->address);
6020 uint64_t end = min(ps->address + ps_length, s->address + s->size);
6021
6022 RtlZeroMemory(ps->data + start - ps->address, (ULONG)(end - start));
6023 } else if (s->address >= ps->address + ps_length)
6024 break;
6025
6026 le = le->Flink;
6027 }
6028
6029 le = c->deleting.Flink;
6030 while (le != &c->deleting) {
6031 space* s = CONTAINING_RECORD(le, space, list_entry);
6032
6033 if (s->address + s->size > ps->address && s->address < ps->address + ps_length) {
6034 uint64_t start = max(ps->address, s->address);
6035 uint64_t end = min(ps->address + ps_length, s->address + s->size);
6036
6037 RtlZeroMemory(ps->data + start - ps->address, (ULONG)(end - start));
6038 } else if (s->address >= ps->address + ps_length)
6039 break;
6040
6041 le = le->Flink;
6042 }
6043
6044 stripe = (parity2 + 1) % c->chunk_item->num_stripes;
6045
6046 data = ps->data;
6047 for (k = 0; k < num_data_stripes; k++) {
6048 if (c->devices[stripe]->devobj) {
6049 Status = write_data_phys(c->devices[stripe]->devobj, c->devices[stripe]->fileobj, cis[stripe].offset + startoff, data, stripe_length);
6050 if (!NT_SUCCESS(Status)) {
6051 ERR("write_data_phys returned %08lx\n", Status);
6052 return Status;
6053 }
6054 }
6055
6056 data += stripe_length;
6057 stripe = (stripe + 1) % c->chunk_item->num_stripes;
6058 }
6059
6060 // write parity
6061 if (c->chunk_item->type & BLOCK_FLAG_RAID5) {
6062 if (c->devices[parity2]->devobj) {
6063 uint16_t i;
6064
6065 for (i = 1; i < c->chunk_item->num_stripes - 1; i++) {
6066 do_xor(ps->data, ps->data + (i * stripe_length), stripe_length);
6067 }
6068
6069 Status = write_data_phys(c->devices[parity2]->devobj, c->devices[parity2]->fileobj, cis[parity2].offset + startoff, ps->data, stripe_length);
6070 if (!NT_SUCCESS(Status)) {
6071 ERR("write_data_phys returned %08lx\n", Status);
6072 return Status;
6073 }
6074 }
6075 } else {
6076 uint16_t parity1 = (parity2 + c->chunk_item->num_stripes - 1) % c->chunk_item->num_stripes;
6077
6078 if (c->devices[parity1]->devobj || c->devices[parity2]->devobj) {
6079 uint8_t* scratch;
6080 uint16_t i;
6081
6082 scratch = ExAllocatePoolWithTag(NonPagedPool, stripe_length * 2, ALLOC_TAG);
6083 if (!scratch) {
6084 ERR("out of memory\n");
6085 return STATUS_INSUFFICIENT_RESOURCES;
6086 }
6087
6088 i = c->chunk_item->num_stripes - 3;
6089
6090 while (true) {
6091 if (i == c->chunk_item->num_stripes - 3) {
6092 RtlCopyMemory(scratch, ps->data + (i * stripe_length), stripe_length);
6093 RtlCopyMemory(scratch + stripe_length, ps->data + (i * stripe_length), stripe_length);
6094 } else {
6095 do_xor(scratch, ps->data + (i * stripe_length), stripe_length);
6096
6097 galois_double(scratch + stripe_length, stripe_length);
6098 do_xor(scratch + stripe_length, ps->data + (i * stripe_length), stripe_length);
6099 }
6100
6101 if (i == 0)
6102 break;
6103
6104 i--;
6105 }
6106
6107 if (c->devices[parity1]->devobj) {
6108 Status = write_data_phys(c->devices[parity1]->devobj, c->devices[parity1]->fileobj, cis[parity1].offset + startoff, scratch, stripe_length);
6109 if (!NT_SUCCESS(Status)) {
6110 ERR("write_data_phys returned %08lx\n", Status);
6111 ExFreePool(scratch);
6112 return Status;
6113 }
6114 }
6115
6116 if (c->devices[parity2]->devobj) {
6117 Status = write_data_phys(c->devices[parity2]->devobj, c->devices[parity2]->fileobj, cis[parity2].offset + startoff,
6118 scratch + stripe_length, stripe_length);
6119 if (!NT_SUCCESS(Status)) {
6120 ERR("write_data_phys returned %08lx\n", Status);
6121 ExFreePool(scratch);
6122 return Status;
6123 }
6124 }
6125
6126 ExFreePool(scratch);
6127 }
6128 }
6129
6130 return STATUS_SUCCESS;
6131 }
6132
update_chunks(device_extension * Vcb,LIST_ENTRY * batchlist,PIRP Irp,LIST_ENTRY * rollback)6133 static NTSTATUS update_chunks(device_extension* Vcb, LIST_ENTRY* batchlist, PIRP Irp, LIST_ENTRY* rollback) {
6134 LIST_ENTRY *le, *le2;
6135 NTSTATUS Status;
6136 uint64_t used_minus_cache;
6137
6138 ExAcquireResourceExclusiveLite(&Vcb->chunk_lock, true);
6139
6140 // FIXME - do tree chunks before data chunks
6141
6142 le = Vcb->chunks.Flink;
6143 while (le != &Vcb->chunks) {
6144 chunk* c = CONTAINING_RECORD(le, chunk, list_entry);
6145
6146 le2 = le->Flink;
6147
6148 if (c->changed) {
6149 acquire_chunk_lock(c, Vcb);
6150
6151 // flush partial stripes
6152 if (!Vcb->readonly && (c->chunk_item->type & BLOCK_FLAG_RAID5 || c->chunk_item->type & BLOCK_FLAG_RAID6)) {
6153 ExAcquireResourceExclusiveLite(&c->partial_stripes_lock, true);
6154
6155 while (!IsListEmpty(&c->partial_stripes)) {
6156 partial_stripe* ps = CONTAINING_RECORD(RemoveHeadList(&c->partial_stripes), partial_stripe, list_entry);
6157
6158 Status = flush_partial_stripe(Vcb, c, ps);
6159
6160 if (ps->bmparr)
6161 ExFreePool(ps->bmparr);
6162
6163 ExFreePool(ps);
6164
6165 if (!NT_SUCCESS(Status)) {
6166 ERR("flush_partial_stripe returned %08lx\n", Status);
6167 ExReleaseResourceLite(&c->partial_stripes_lock);
6168 release_chunk_lock(c, Vcb);
6169 ExReleaseResourceLite(&Vcb->chunk_lock);
6170 return Status;
6171 }
6172 }
6173
6174 ExReleaseResourceLite(&c->partial_stripes_lock);
6175 }
6176
6177 if (c->list_entry_balance.Flink) {
6178 release_chunk_lock(c, Vcb);
6179 le = le2;
6180 continue;
6181 }
6182
6183 if (c->space_changed || c->created) {
6184 bool created = c->created;
6185
6186 used_minus_cache = c->used;
6187
6188 // subtract self-hosted cache
6189 if (used_minus_cache > 0 && c->chunk_item->type & BLOCK_FLAG_DATA && c->cache && c->cache->inode_item.st_size == c->used) {
6190 LIST_ENTRY* le3;
6191
6192 le3 = c->cache->extents.Flink;
6193 while (le3 != &c->cache->extents) {
6194 extent* ext = CONTAINING_RECORD(le3, extent, list_entry);
6195 EXTENT_DATA* ed = &ext->extent_data;
6196
6197 if (!ext->ignore) {
6198 if (ed->type == EXTENT_TYPE_REGULAR || ed->type == EXTENT_TYPE_PREALLOC) {
6199 EXTENT_DATA2* ed2 = (EXTENT_DATA2*)ed->data;
6200
6201 if (ed2->size != 0 && ed2->address >= c->offset && ed2->address + ed2->size <= c->offset + c->chunk_item->size)
6202 used_minus_cache -= ed2->size;
6203 }
6204 }
6205
6206 le3 = le3->Flink;
6207 }
6208 }
6209
6210 if (used_minus_cache == 0) {
6211 Status = drop_chunk(Vcb, c, batchlist, Irp, rollback);
6212 if (!NT_SUCCESS(Status)) {
6213 ERR("drop_chunk returned %08lx\n", Status);
6214 release_chunk_lock(c, Vcb);
6215 ExReleaseResourceLite(&Vcb->chunk_lock);
6216 return Status;
6217 }
6218
6219 // c is now freed, so avoid releasing non-existent lock
6220 le = le2;
6221 continue;
6222 } else if (c->created) {
6223 Status = create_chunk(Vcb, c, Irp);
6224 if (!NT_SUCCESS(Status)) {
6225 ERR("create_chunk returned %08lx\n", Status);
6226 release_chunk_lock(c, Vcb);
6227 ExReleaseResourceLite(&Vcb->chunk_lock);
6228 return Status;
6229 }
6230 }
6231
6232 if (used_minus_cache > 0 || created)
6233 release_chunk_lock(c, Vcb);
6234 } else
6235 release_chunk_lock(c, Vcb);
6236 }
6237
6238 le = le2;
6239 }
6240
6241 ExReleaseResourceLite(&Vcb->chunk_lock);
6242
6243 return STATUS_SUCCESS;
6244 }
6245
delete_root_ref(device_extension * Vcb,uint64_t subvolid,uint64_t parsubvolid,uint64_t parinode,PANSI_STRING utf8,PIRP Irp)6246 static NTSTATUS delete_root_ref(device_extension* Vcb, uint64_t subvolid, uint64_t parsubvolid, uint64_t parinode, PANSI_STRING utf8, PIRP Irp) {
6247 KEY searchkey;
6248 traverse_ptr tp;
6249 NTSTATUS Status;
6250
6251 searchkey.obj_id = parsubvolid;
6252 searchkey.obj_type = TYPE_ROOT_REF;
6253 searchkey.offset = subvolid;
6254
6255 Status = find_item(Vcb, Vcb->root_root, &tp, &searchkey, false, Irp);
6256 if (!NT_SUCCESS(Status)) {
6257 ERR("error - find_item returned %08lx\n", Status);
6258 return Status;
6259 }
6260
6261 if (!keycmp(searchkey, tp.item->key)) {
6262 if (tp.item->size < sizeof(ROOT_REF)) {
6263 ERR("(%I64x,%x,%I64x) was %u bytes, expected at least %Iu\n", tp.item->key.obj_id, tp.item->key.obj_type, tp.item->key.offset, tp.item->size, sizeof(ROOT_REF));
6264 return STATUS_INTERNAL_ERROR;
6265 } else {
6266 ROOT_REF* rr;
6267 ULONG len;
6268
6269 rr = (ROOT_REF*)tp.item->data;
6270 len = tp.item->size;
6271
6272 do {
6273 uint16_t itemlen;
6274
6275 if (len < sizeof(ROOT_REF) || len < offsetof(ROOT_REF, name[0]) + rr->n) {
6276 ERR("(%I64x,%x,%I64x) was truncated\n", tp.item->key.obj_id, tp.item->key.obj_type, tp.item->key.offset);
6277 break;
6278 }
6279
6280 itemlen = (uint16_t)offsetof(ROOT_REF, name[0]) + rr->n;
6281
6282 if (rr->dir == parinode && rr->n == utf8->Length && RtlCompareMemory(rr->name, utf8->Buffer, rr->n) == rr->n) {
6283 uint16_t newlen = tp.item->size - itemlen;
6284
6285 Status = delete_tree_item(Vcb, &tp);
6286 if (!NT_SUCCESS(Status)) {
6287 ERR("delete_tree_item returned %08lx\n", Status);
6288 return Status;
6289 }
6290
6291 if (newlen == 0) {
6292 TRACE("deleting (%I64x,%x,%I64x)\n", tp.item->key.obj_id, tp.item->key.obj_type, tp.item->key.offset);
6293 } else {
6294 uint8_t *newrr = ExAllocatePoolWithTag(PagedPool, newlen, ALLOC_TAG), *rroff;
6295
6296 if (!newrr) {
6297 ERR("out of memory\n");
6298 return STATUS_INSUFFICIENT_RESOURCES;
6299 }
6300
6301 TRACE("modifying (%I64x,%x,%I64x)\n", tp.item->key.obj_id, tp.item->key.obj_type, tp.item->key.offset);
6302
6303 if ((uint8_t*)rr > tp.item->data) {
6304 RtlCopyMemory(newrr, tp.item->data, (uint8_t*)rr - tp.item->data);
6305 rroff = newrr + ((uint8_t*)rr - tp.item->data);
6306 } else {
6307 rroff = newrr;
6308 }
6309
6310 if ((uint8_t*)&rr->name[rr->n] < tp.item->data + tp.item->size)
6311 RtlCopyMemory(rroff, &rr->name[rr->n], tp.item->size - ((uint8_t*)&rr->name[rr->n] - tp.item->data));
6312
6313 Status = insert_tree_item(Vcb, Vcb->root_root, tp.item->key.obj_id, tp.item->key.obj_type, tp.item->key.offset, newrr, newlen, NULL, Irp);
6314 if (!NT_SUCCESS(Status)) {
6315 ERR("insert_tree_item returned %08lx\n", Status);
6316 ExFreePool(newrr);
6317 return Status;
6318 }
6319 }
6320
6321 break;
6322 }
6323
6324 if (len > itemlen) {
6325 len -= itemlen;
6326 rr = (ROOT_REF*)&rr->name[rr->n];
6327 } else
6328 break;
6329 } while (len > 0);
6330 }
6331 } else {
6332 WARN("could not find ROOT_REF entry for subvol %I64x in %I64x\n", searchkey.offset, searchkey.obj_id);
6333 return STATUS_NOT_FOUND;
6334 }
6335
6336 return STATUS_SUCCESS;
6337 }
6338
6339 #ifdef _MSC_VER
6340 #pragma warning(push)
6341 #pragma warning(suppress: 28194)
6342 #endif
add_root_ref(_In_ device_extension * Vcb,_In_ uint64_t subvolid,_In_ uint64_t parsubvolid,_In_ __drv_aliasesMem ROOT_REF * rr,_In_opt_ PIRP Irp)6343 static NTSTATUS add_root_ref(_In_ device_extension* Vcb, _In_ uint64_t subvolid, _In_ uint64_t parsubvolid, _In_ __drv_aliasesMem ROOT_REF* rr, _In_opt_ PIRP Irp) {
6344 KEY searchkey;
6345 traverse_ptr tp;
6346 NTSTATUS Status;
6347
6348 searchkey.obj_id = parsubvolid;
6349 searchkey.obj_type = TYPE_ROOT_REF;
6350 searchkey.offset = subvolid;
6351
6352 Status = find_item(Vcb, Vcb->root_root, &tp, &searchkey, false, Irp);
6353 if (!NT_SUCCESS(Status)) {
6354 ERR("error - find_item returned %08lx\n", Status);
6355 return Status;
6356 }
6357
6358 if (!keycmp(searchkey, tp.item->key)) {
6359 uint16_t rrsize = tp.item->size + (uint16_t)offsetof(ROOT_REF, name[0]) + rr->n;
6360 uint8_t* rr2;
6361
6362 rr2 = ExAllocatePoolWithTag(PagedPool, rrsize, ALLOC_TAG);
6363 if (!rr2) {
6364 ERR("out of memory\n");
6365 return STATUS_INSUFFICIENT_RESOURCES;
6366 }
6367
6368 if (tp.item->size > 0)
6369 RtlCopyMemory(rr2, tp.item->data, tp.item->size);
6370
6371 RtlCopyMemory(rr2 + tp.item->size, rr, offsetof(ROOT_REF, name[0]) + rr->n);
6372 ExFreePool(rr);
6373
6374 Status = delete_tree_item(Vcb, &tp);
6375 if (!NT_SUCCESS(Status)) {
6376 ERR("delete_tree_item returned %08lx\n", Status);
6377 ExFreePool(rr2);
6378 return Status;
6379 }
6380
6381 Status = insert_tree_item(Vcb, Vcb->root_root, searchkey.obj_id, searchkey.obj_type, searchkey.offset, rr2, rrsize, NULL, Irp);
6382 if (!NT_SUCCESS(Status)) {
6383 ERR("insert_tree_item returned %08lx\n", Status);
6384 ExFreePool(rr2);
6385 return Status;
6386 }
6387 } else {
6388 Status = insert_tree_item(Vcb, Vcb->root_root, searchkey.obj_id, searchkey.obj_type, searchkey.offset, rr, (uint16_t)offsetof(ROOT_REF, name[0]) + rr->n, NULL, Irp);
6389 if (!NT_SUCCESS(Status)) {
6390 ERR("insert_tree_item returned %08lx\n", Status);
6391 ExFreePool(rr);
6392 return Status;
6393 }
6394 }
6395
6396 return STATUS_SUCCESS;
6397 }
6398 #ifdef _MSC_VER
6399 #pragma warning(pop)
6400 #endif
6401
update_root_backref(device_extension * Vcb,uint64_t subvolid,uint64_t parsubvolid,PIRP Irp)6402 static NTSTATUS update_root_backref(device_extension* Vcb, uint64_t subvolid, uint64_t parsubvolid, PIRP Irp) {
6403 KEY searchkey;
6404 traverse_ptr tp;
6405 uint8_t* data;
6406 uint16_t datalen;
6407 NTSTATUS Status;
6408
6409 searchkey.obj_id = parsubvolid;
6410 searchkey.obj_type = TYPE_ROOT_REF;
6411 searchkey.offset = subvolid;
6412
6413 Status = find_item(Vcb, Vcb->root_root, &tp, &searchkey, false, Irp);
6414 if (!NT_SUCCESS(Status)) {
6415 ERR("error - find_item returned %08lx\n", Status);
6416 return Status;
6417 }
6418
6419 if (!keycmp(tp.item->key, searchkey) && tp.item->size > 0) {
6420 datalen = tp.item->size;
6421
6422 data = ExAllocatePoolWithTag(PagedPool, datalen, ALLOC_TAG);
6423 if (!data) {
6424 ERR("out of memory\n");
6425 return STATUS_INSUFFICIENT_RESOURCES;
6426 }
6427
6428 RtlCopyMemory(data, tp.item->data, datalen);
6429 } else {
6430 datalen = 0;
6431 data = NULL;
6432 }
6433
6434 searchkey.obj_id = subvolid;
6435 searchkey.obj_type = TYPE_ROOT_BACKREF;
6436 searchkey.offset = parsubvolid;
6437
6438 Status = find_item(Vcb, Vcb->root_root, &tp, &searchkey, false, Irp);
6439 if (!NT_SUCCESS(Status)) {
6440 ERR("error - find_item returned %08lx\n", Status);
6441
6442 if (datalen > 0)
6443 ExFreePool(data);
6444
6445 return Status;
6446 }
6447
6448 if (!keycmp(tp.item->key, searchkey)) {
6449 Status = delete_tree_item(Vcb, &tp);
6450 if (!NT_SUCCESS(Status)) {
6451 ERR("delete_tree_item returned %08lx\n", Status);
6452
6453 if (datalen > 0)
6454 ExFreePool(data);
6455
6456 return Status;
6457 }
6458 }
6459
6460 if (datalen > 0) {
6461 Status = insert_tree_item(Vcb, Vcb->root_root, subvolid, TYPE_ROOT_BACKREF, parsubvolid, data, datalen, NULL, Irp);
6462 if (!NT_SUCCESS(Status)) {
6463 ERR("insert_tree_item returned %08lx\n", Status);
6464 ExFreePool(data);
6465 return Status;
6466 }
6467 }
6468
6469 return STATUS_SUCCESS;
6470 }
6471
add_root_item_to_cache(device_extension * Vcb,uint64_t root,PIRP Irp)6472 static NTSTATUS add_root_item_to_cache(device_extension* Vcb, uint64_t root, PIRP Irp) {
6473 KEY searchkey;
6474 traverse_ptr tp;
6475 NTSTATUS Status;
6476
6477 searchkey.obj_id = root;
6478 searchkey.obj_type = TYPE_ROOT_ITEM;
6479 searchkey.offset = 0xffffffffffffffff;
6480
6481 Status = find_item(Vcb, Vcb->root_root, &tp, &searchkey, false, Irp);
6482 if (!NT_SUCCESS(Status)) {
6483 ERR("error - find_item returned %08lx\n", Status);
6484 return Status;
6485 }
6486
6487 if (tp.item->key.obj_id != searchkey.obj_id || tp.item->key.obj_type != searchkey.obj_type) {
6488 ERR("could not find ROOT_ITEM for tree %I64x\n", searchkey.obj_id);
6489 return STATUS_INTERNAL_ERROR;
6490 }
6491
6492 if (tp.item->size < sizeof(ROOT_ITEM)) { // if not full length, create new entry with new bits zeroed
6493 ROOT_ITEM* ri = ExAllocatePoolWithTag(PagedPool, sizeof(ROOT_ITEM), ALLOC_TAG);
6494 if (!ri) {
6495 ERR("out of memory\n");
6496 return STATUS_INSUFFICIENT_RESOURCES;
6497 }
6498
6499 if (tp.item->size > 0)
6500 RtlCopyMemory(ri, tp.item->data, tp.item->size);
6501
6502 RtlZeroMemory(((uint8_t*)ri) + tp.item->size, sizeof(ROOT_ITEM) - tp.item->size);
6503
6504 Status = delete_tree_item(Vcb, &tp);
6505 if (!NT_SUCCESS(Status)) {
6506 ERR("delete_tree_item returned %08lx\n", Status);
6507 ExFreePool(ri);
6508 return Status;
6509 }
6510
6511 Status = insert_tree_item(Vcb, Vcb->root_root, searchkey.obj_id, searchkey.obj_type, tp.item->key.offset, ri, sizeof(ROOT_ITEM), NULL, Irp);
6512 if (!NT_SUCCESS(Status)) {
6513 ERR("insert_tree_item returned %08lx\n", Status);
6514 ExFreePool(ri);
6515 return Status;
6516 }
6517 } else {
6518 tp.tree->write = true;
6519 }
6520
6521 return STATUS_SUCCESS;
6522 }
6523
flush_fileref(file_ref * fileref,LIST_ENTRY * batchlist,PIRP Irp)6524 static NTSTATUS flush_fileref(file_ref* fileref, LIST_ENTRY* batchlist, PIRP Irp) {
6525 NTSTATUS Status;
6526
6527 // if fileref created and then immediately deleted, do nothing
6528 if (fileref->created && fileref->deleted) {
6529 fileref->dirty = false;
6530 return STATUS_SUCCESS;
6531 }
6532
6533 if (fileref->fcb->ads) {
6534 fileref->dirty = false;
6535 return STATUS_SUCCESS;
6536 }
6537
6538 if (fileref->created) {
6539 uint16_t disize;
6540 DIR_ITEM *di, *di2;
6541 uint32_t crc32;
6542
6543 crc32 = calc_crc32c(0xfffffffe, (uint8_t*)fileref->dc->utf8.Buffer, fileref->dc->utf8.Length);
6544
6545 disize = (uint16_t)(offsetof(DIR_ITEM, name[0]) + fileref->dc->utf8.Length);
6546 di = ExAllocatePoolWithTag(PagedPool, disize, ALLOC_TAG);
6547 if (!di) {
6548 ERR("out of memory\n");
6549 return STATUS_INSUFFICIENT_RESOURCES;
6550 }
6551
6552 if (fileref->parent->fcb->subvol == fileref->fcb->subvol) {
6553 di->key.obj_id = fileref->fcb->inode;
6554 di->key.obj_type = TYPE_INODE_ITEM;
6555 di->key.offset = 0;
6556 } else { // subvolume
6557 di->key.obj_id = fileref->fcb->subvol->id;
6558 di->key.obj_type = TYPE_ROOT_ITEM;
6559 di->key.offset = 0xffffffffffffffff;
6560 }
6561
6562 di->transid = fileref->fcb->Vcb->superblock.generation;
6563 di->m = 0;
6564 di->n = (uint16_t)fileref->dc->utf8.Length;
6565 di->type = fileref->fcb->type;
6566 RtlCopyMemory(di->name, fileref->dc->utf8.Buffer, fileref->dc->utf8.Length);
6567
6568 di2 = ExAllocatePoolWithTag(PagedPool, disize, ALLOC_TAG);
6569 if (!di2) {
6570 ERR("out of memory\n");
6571 return STATUS_INSUFFICIENT_RESOURCES;
6572 }
6573
6574 RtlCopyMemory(di2, di, disize);
6575
6576 Status = insert_tree_item_batch(batchlist, fileref->fcb->Vcb, fileref->parent->fcb->subvol, fileref->parent->fcb->inode, TYPE_DIR_INDEX,
6577 fileref->dc->index, di, disize, Batch_Insert);
6578 if (!NT_SUCCESS(Status)) {
6579 ERR("insert_tree_item_batch returned %08lx\n", Status);
6580 return Status;
6581 }
6582
6583 Status = insert_tree_item_batch(batchlist, fileref->fcb->Vcb, fileref->parent->fcb->subvol, fileref->parent->fcb->inode, TYPE_DIR_ITEM, crc32,
6584 di2, disize, Batch_DirItem);
6585 if (!NT_SUCCESS(Status)) {
6586 ERR("insert_tree_item_batch returned %08lx\n", Status);
6587 return Status;
6588 }
6589
6590 if (fileref->parent->fcb->subvol == fileref->fcb->subvol) {
6591 INODE_REF* ir;
6592
6593 ir = ExAllocatePoolWithTag(PagedPool, sizeof(INODE_REF) - 1 + fileref->dc->utf8.Length, ALLOC_TAG);
6594 if (!ir) {
6595 ERR("out of memory\n");
6596 return STATUS_INSUFFICIENT_RESOURCES;
6597 }
6598
6599 ir->index = fileref->dc->index;
6600 ir->n = fileref->dc->utf8.Length;
6601 RtlCopyMemory(ir->name, fileref->dc->utf8.Buffer, ir->n);
6602
6603 Status = insert_tree_item_batch(batchlist, fileref->fcb->Vcb, fileref->fcb->subvol, fileref->fcb->inode, TYPE_INODE_REF, fileref->parent->fcb->inode,
6604 ir, sizeof(INODE_REF) - 1 + ir->n, Batch_InodeRef);
6605 if (!NT_SUCCESS(Status)) {
6606 ERR("insert_tree_item_batch returned %08lx\n", Status);
6607 return Status;
6608 }
6609 } else if (fileref->fcb != fileref->fcb->Vcb->dummy_fcb) {
6610 ULONG rrlen;
6611 ROOT_REF* rr;
6612
6613 rrlen = sizeof(ROOT_REF) - 1 + fileref->dc->utf8.Length;
6614
6615 rr = ExAllocatePoolWithTag(PagedPool, rrlen, ALLOC_TAG);
6616 if (!rr) {
6617 ERR("out of memory\n");
6618 return STATUS_INSUFFICIENT_RESOURCES;
6619 }
6620
6621 rr->dir = fileref->parent->fcb->inode;
6622 rr->index = fileref->dc->index;
6623 rr->n = fileref->dc->utf8.Length;
6624 RtlCopyMemory(rr->name, fileref->dc->utf8.Buffer, fileref->dc->utf8.Length);
6625
6626 Status = add_root_ref(fileref->fcb->Vcb, fileref->fcb->subvol->id, fileref->parent->fcb->subvol->id, rr, Irp);
6627 if (!NT_SUCCESS(Status)) {
6628 ERR("add_root_ref returned %08lx\n", Status);
6629 return Status;
6630 }
6631
6632 Status = update_root_backref(fileref->fcb->Vcb, fileref->fcb->subvol->id, fileref->parent->fcb->subvol->id, Irp);
6633 if (!NT_SUCCESS(Status)) {
6634 ERR("update_root_backref returned %08lx\n", Status);
6635 return Status;
6636 }
6637 }
6638
6639 fileref->created = false;
6640 } else if (fileref->deleted) {
6641 uint32_t crc32;
6642 ANSI_STRING* name;
6643 DIR_ITEM* di;
6644
6645 name = &fileref->oldutf8;
6646
6647 crc32 = calc_crc32c(0xfffffffe, (uint8_t*)name->Buffer, name->Length);
6648
6649 di = ExAllocatePoolWithTag(PagedPool, sizeof(DIR_ITEM) - 1 + name->Length, ALLOC_TAG);
6650 if (!di) {
6651 ERR("out of memory\n");
6652 return STATUS_INSUFFICIENT_RESOURCES;
6653 }
6654
6655 di->m = 0;
6656 di->n = name->Length;
6657 RtlCopyMemory(di->name, name->Buffer, name->Length);
6658
6659 // delete DIR_ITEM (0x54)
6660
6661 Status = insert_tree_item_batch(batchlist, fileref->fcb->Vcb, fileref->parent->fcb->subvol, fileref->parent->fcb->inode, TYPE_DIR_ITEM,
6662 crc32, di, sizeof(DIR_ITEM) - 1 + name->Length, Batch_DeleteDirItem);
6663 if (!NT_SUCCESS(Status)) {
6664 ERR("insert_tree_item_batch returned %08lx\n", Status);
6665 return Status;
6666 }
6667
6668 if (fileref->parent->fcb->subvol == fileref->fcb->subvol) {
6669 INODE_REF* ir;
6670
6671 // delete INODE_REF (0xc)
6672
6673 ir = ExAllocatePoolWithTag(PagedPool, sizeof(INODE_REF) - 1 + name->Length, ALLOC_TAG);
6674 if (!ir) {
6675 ERR("out of memory\n");
6676 return STATUS_INSUFFICIENT_RESOURCES;
6677 }
6678
6679 ir->index = fileref->oldindex;
6680 ir->n = name->Length;
6681 RtlCopyMemory(ir->name, name->Buffer, name->Length);
6682
6683 Status = insert_tree_item_batch(batchlist, fileref->fcb->Vcb, fileref->parent->fcb->subvol, fileref->fcb->inode, TYPE_INODE_REF,
6684 fileref->parent->fcb->inode, ir, sizeof(INODE_REF) - 1 + name->Length, Batch_DeleteInodeRef);
6685 if (!NT_SUCCESS(Status)) {
6686 ERR("insert_tree_item_batch returned %08lx\n", Status);
6687 return Status;
6688 }
6689 } else if (fileref->fcb != fileref->fcb->Vcb->dummy_fcb) { // subvolume
6690 Status = delete_root_ref(fileref->fcb->Vcb, fileref->fcb->subvol->id, fileref->parent->fcb->subvol->id, fileref->parent->fcb->inode, name, Irp);
6691 if (!NT_SUCCESS(Status)) {
6692 ERR("delete_root_ref returned %08lx\n", Status);
6693 return Status;
6694 }
6695
6696 Status = update_root_backref(fileref->fcb->Vcb, fileref->fcb->subvol->id, fileref->parent->fcb->subvol->id, Irp);
6697 if (!NT_SUCCESS(Status)) {
6698 ERR("update_root_backref returned %08lx\n", Status);
6699 return Status;
6700 }
6701 }
6702
6703 // delete DIR_INDEX (0x60)
6704
6705 Status = insert_tree_item_batch(batchlist, fileref->fcb->Vcb, fileref->parent->fcb->subvol, fileref->parent->fcb->inode, TYPE_DIR_INDEX,
6706 fileref->oldindex, NULL, 0, Batch_Delete);
6707 if (!NT_SUCCESS(Status)) {
6708 ERR("insert_tree_item_batch returned %08lx\n", Status);
6709 return Status;
6710 }
6711
6712 if (fileref->oldutf8.Buffer) {
6713 ExFreePool(fileref->oldutf8.Buffer);
6714 fileref->oldutf8.Buffer = NULL;
6715 }
6716 } else { // rename or change type
6717 PANSI_STRING oldutf8 = fileref->oldutf8.Buffer ? &fileref->oldutf8 : &fileref->dc->utf8;
6718 uint32_t crc32, oldcrc32;
6719 uint16_t disize;
6720 DIR_ITEM *olddi, *di, *di2;
6721
6722 crc32 = calc_crc32c(0xfffffffe, (uint8_t*)fileref->dc->utf8.Buffer, fileref->dc->utf8.Length);
6723
6724 if (!fileref->oldutf8.Buffer)
6725 oldcrc32 = crc32;
6726 else
6727 oldcrc32 = calc_crc32c(0xfffffffe, (uint8_t*)fileref->oldutf8.Buffer, fileref->oldutf8.Length);
6728
6729 olddi = ExAllocatePoolWithTag(PagedPool, sizeof(DIR_ITEM) - 1 + oldutf8->Length, ALLOC_TAG);
6730 if (!olddi) {
6731 ERR("out of memory\n");
6732 return STATUS_INSUFFICIENT_RESOURCES;
6733 }
6734
6735 olddi->m = 0;
6736 olddi->n = (uint16_t)oldutf8->Length;
6737 RtlCopyMemory(olddi->name, oldutf8->Buffer, oldutf8->Length);
6738
6739 // delete DIR_ITEM (0x54)
6740
6741 Status = insert_tree_item_batch(batchlist, fileref->fcb->Vcb, fileref->parent->fcb->subvol, fileref->parent->fcb->inode, TYPE_DIR_ITEM,
6742 oldcrc32, olddi, sizeof(DIR_ITEM) - 1 + oldutf8->Length, Batch_DeleteDirItem);
6743 if (!NT_SUCCESS(Status)) {
6744 ERR("insert_tree_item_batch returned %08lx\n", Status);
6745 ExFreePool(olddi);
6746 return Status;
6747 }
6748
6749 // add DIR_ITEM (0x54)
6750
6751 disize = (uint16_t)(offsetof(DIR_ITEM, name[0]) + fileref->dc->utf8.Length);
6752 di = ExAllocatePoolWithTag(PagedPool, disize, ALLOC_TAG);
6753 if (!di) {
6754 ERR("out of memory\n");
6755 return STATUS_INSUFFICIENT_RESOURCES;
6756 }
6757
6758 di2 = ExAllocatePoolWithTag(PagedPool, disize, ALLOC_TAG);
6759 if (!di2) {
6760 ERR("out of memory\n");
6761 ExFreePool(di);
6762 return STATUS_INSUFFICIENT_RESOURCES;
6763 }
6764
6765 if (fileref->dc)
6766 di->key = fileref->dc->key;
6767 else if (fileref->parent->fcb->subvol == fileref->fcb->subvol) {
6768 di->key.obj_id = fileref->fcb->inode;
6769 di->key.obj_type = TYPE_INODE_ITEM;
6770 di->key.offset = 0;
6771 } else { // subvolume
6772 di->key.obj_id = fileref->fcb->subvol->id;
6773 di->key.obj_type = TYPE_ROOT_ITEM;
6774 di->key.offset = 0xffffffffffffffff;
6775 }
6776
6777 di->transid = fileref->fcb->Vcb->superblock.generation;
6778 di->m = 0;
6779 di->n = (uint16_t)fileref->dc->utf8.Length;
6780 di->type = fileref->fcb->type;
6781 RtlCopyMemory(di->name, fileref->dc->utf8.Buffer, fileref->dc->utf8.Length);
6782
6783 RtlCopyMemory(di2, di, disize);
6784
6785 Status = insert_tree_item_batch(batchlist, fileref->fcb->Vcb, fileref->parent->fcb->subvol, fileref->parent->fcb->inode, TYPE_DIR_ITEM, crc32,
6786 di, disize, Batch_DirItem);
6787 if (!NT_SUCCESS(Status)) {
6788 ERR("insert_tree_item_batch returned %08lx\n", Status);
6789 ExFreePool(di2);
6790 ExFreePool(di);
6791 return Status;
6792 }
6793
6794 if (fileref->parent->fcb->subvol == fileref->fcb->subvol) {
6795 INODE_REF *ir, *ir2;
6796
6797 // delete INODE_REF (0xc)
6798
6799 ir = ExAllocatePoolWithTag(PagedPool, sizeof(INODE_REF) - 1 + oldutf8->Length, ALLOC_TAG);
6800 if (!ir) {
6801 ERR("out of memory\n");
6802 ExFreePool(di2);
6803 return STATUS_INSUFFICIENT_RESOURCES;
6804 }
6805
6806 ir->index = fileref->dc->index;
6807 ir->n = oldutf8->Length;
6808 RtlCopyMemory(ir->name, oldutf8->Buffer, ir->n);
6809
6810 Status = insert_tree_item_batch(batchlist, fileref->fcb->Vcb, fileref->fcb->subvol, fileref->fcb->inode, TYPE_INODE_REF, fileref->parent->fcb->inode,
6811 ir, sizeof(INODE_REF) - 1 + ir->n, Batch_DeleteInodeRef);
6812 if (!NT_SUCCESS(Status)) {
6813 ERR("insert_tree_item_batch returned %08lx\n", Status);
6814 ExFreePool(ir);
6815 ExFreePool(di2);
6816 return Status;
6817 }
6818
6819 // add INODE_REF (0xc)
6820
6821 ir2 = ExAllocatePoolWithTag(PagedPool, sizeof(INODE_REF) - 1 + fileref->dc->utf8.Length, ALLOC_TAG);
6822 if (!ir2) {
6823 ERR("out of memory\n");
6824 ExFreePool(di2);
6825 return STATUS_INSUFFICIENT_RESOURCES;
6826 }
6827
6828 ir2->index = fileref->dc->index;
6829 ir2->n = fileref->dc->utf8.Length;
6830 RtlCopyMemory(ir2->name, fileref->dc->utf8.Buffer, ir2->n);
6831
6832 Status = insert_tree_item_batch(batchlist, fileref->fcb->Vcb, fileref->fcb->subvol, fileref->fcb->inode, TYPE_INODE_REF, fileref->parent->fcb->inode,
6833 ir2, sizeof(INODE_REF) - 1 + ir2->n, Batch_InodeRef);
6834 if (!NT_SUCCESS(Status)) {
6835 ERR("insert_tree_item_batch returned %08lx\n", Status);
6836 ExFreePool(ir2);
6837 ExFreePool(di2);
6838 return Status;
6839 }
6840 } else if (fileref->fcb != fileref->fcb->Vcb->dummy_fcb) { // subvolume
6841 ULONG rrlen;
6842 ROOT_REF* rr;
6843
6844 Status = delete_root_ref(fileref->fcb->Vcb, fileref->fcb->subvol->id, fileref->parent->fcb->subvol->id, fileref->parent->fcb->inode, oldutf8, Irp);
6845 if (!NT_SUCCESS(Status)) {
6846 ERR("delete_root_ref returned %08lx\n", Status);
6847 ExFreePool(di2);
6848 return Status;
6849 }
6850
6851 rrlen = sizeof(ROOT_REF) - 1 + fileref->dc->utf8.Length;
6852
6853 rr = ExAllocatePoolWithTag(PagedPool, rrlen, ALLOC_TAG);
6854 if (!rr) {
6855 ERR("out of memory\n");
6856 ExFreePool(di2);
6857 return STATUS_INSUFFICIENT_RESOURCES;
6858 }
6859
6860 rr->dir = fileref->parent->fcb->inode;
6861 rr->index = fileref->dc->index;
6862 rr->n = fileref->dc->utf8.Length;
6863 RtlCopyMemory(rr->name, fileref->dc->utf8.Buffer, fileref->dc->utf8.Length);
6864
6865 Status = add_root_ref(fileref->fcb->Vcb, fileref->fcb->subvol->id, fileref->parent->fcb->subvol->id, rr, Irp);
6866 if (!NT_SUCCESS(Status)) {
6867 ERR("add_root_ref returned %08lx\n", Status);
6868 ExFreePool(di2);
6869 return Status;
6870 }
6871
6872 Status = update_root_backref(fileref->fcb->Vcb, fileref->fcb->subvol->id, fileref->parent->fcb->subvol->id, Irp);
6873 if (!NT_SUCCESS(Status)) {
6874 ERR("update_root_backref returned %08lx\n", Status);
6875 ExFreePool(di2);
6876 return Status;
6877 }
6878 }
6879
6880 // delete DIR_INDEX (0x60)
6881
6882 Status = insert_tree_item_batch(batchlist, fileref->fcb->Vcb, fileref->parent->fcb->subvol, fileref->parent->fcb->inode, TYPE_DIR_INDEX,
6883 fileref->dc->index, NULL, 0, Batch_Delete);
6884 if (!NT_SUCCESS(Status)) {
6885 ERR("insert_tree_item_batch returned %08lx\n", Status);
6886 ExFreePool(di2);
6887 return Status;
6888 }
6889
6890 // add DIR_INDEX (0x60)
6891
6892 Status = insert_tree_item_batch(batchlist, fileref->fcb->Vcb, fileref->parent->fcb->subvol, fileref->parent->fcb->inode, TYPE_DIR_INDEX,
6893 fileref->dc->index, di2, disize, Batch_Insert);
6894 if (!NT_SUCCESS(Status)) {
6895 ERR("insert_tree_item_batch returned %08lx\n", Status);
6896 ExFreePool(di2);
6897 return Status;
6898 }
6899
6900 if (fileref->oldutf8.Buffer) {
6901 ExFreePool(fileref->oldutf8.Buffer);
6902 fileref->oldutf8.Buffer = NULL;
6903 }
6904 }
6905
6906 fileref->dirty = false;
6907
6908 return STATUS_SUCCESS;
6909 }
6910
flush_disk_caches(device_extension * Vcb)6911 static void flush_disk_caches(device_extension* Vcb) {
6912 LIST_ENTRY* le;
6913 ioctl_context context;
6914 ULONG num;
6915
6916 context.left = 0;
6917
6918 le = Vcb->devices.Flink;
6919
6920 while (le != &Vcb->devices) {
6921 device* dev = CONTAINING_RECORD(le, device, list_entry);
6922
6923 if (dev->devobj && !dev->readonly && dev->can_flush)
6924 context.left++;
6925
6926 le = le->Flink;
6927 }
6928
6929 if (context.left == 0)
6930 return;
6931
6932 num = 0;
6933
6934 KeInitializeEvent(&context.Event, NotificationEvent, false);
6935
6936 context.stripes = ExAllocatePoolWithTag(NonPagedPool, sizeof(ioctl_context_stripe) * context.left, ALLOC_TAG);
6937 if (!context.stripes) {
6938 ERR("out of memory\n");
6939 return;
6940 }
6941
6942 RtlZeroMemory(context.stripes, sizeof(ioctl_context_stripe) * context.left);
6943
6944 le = Vcb->devices.Flink;
6945
6946 while (le != &Vcb->devices) {
6947 device* dev = CONTAINING_RECORD(le, device, list_entry);
6948
6949 if (dev->devobj && !dev->readonly && dev->can_flush) {
6950 PIO_STACK_LOCATION IrpSp;
6951 ioctl_context_stripe* stripe = &context.stripes[num];
6952
6953 RtlZeroMemory(&stripe->apte, sizeof(ATA_PASS_THROUGH_EX));
6954
6955 stripe->apte.Length = sizeof(ATA_PASS_THROUGH_EX);
6956 stripe->apte.TimeOutValue = 5;
6957 stripe->apte.CurrentTaskFile[6] = IDE_COMMAND_FLUSH_CACHE;
6958
6959 stripe->Irp = IoAllocateIrp(dev->devobj->StackSize, false);
6960
6961 if (!stripe->Irp) {
6962 ERR("IoAllocateIrp failed\n");
6963 goto nextdev;
6964 }
6965
6966 IrpSp = IoGetNextIrpStackLocation(stripe->Irp);
6967 IrpSp->MajorFunction = IRP_MJ_DEVICE_CONTROL;
6968 IrpSp->FileObject = dev->fileobj;
6969
6970 IrpSp->Parameters.DeviceIoControl.IoControlCode = IOCTL_ATA_PASS_THROUGH;
6971 IrpSp->Parameters.DeviceIoControl.InputBufferLength = sizeof(ATA_PASS_THROUGH_EX);
6972 IrpSp->Parameters.DeviceIoControl.OutputBufferLength = sizeof(ATA_PASS_THROUGH_EX);
6973
6974 stripe->Irp->AssociatedIrp.SystemBuffer = &stripe->apte;
6975 stripe->Irp->Flags |= IRP_BUFFERED_IO | IRP_INPUT_OPERATION;
6976 stripe->Irp->UserBuffer = &stripe->apte;
6977 stripe->Irp->UserIosb = &stripe->iosb;
6978
6979 IoSetCompletionRoutine(stripe->Irp, ioctl_completion, &context, true, true, true);
6980
6981 IoCallDriver(dev->devobj, stripe->Irp);
6982
6983 nextdev:
6984 num++;
6985 }
6986
6987 le = le->Flink;
6988 }
6989
6990 KeWaitForSingleObject(&context.Event, Executive, KernelMode, false, NULL);
6991
6992 for (unsigned int i = 0; i < num; i++) {
6993 if (context.stripes[i].Irp)
6994 IoFreeIrp(context.stripes[i].Irp);
6995 }
6996
6997 ExFreePool(context.stripes);
6998 }
6999
flush_changed_dev_stats(device_extension * Vcb,device * dev,PIRP Irp)7000 static NTSTATUS flush_changed_dev_stats(device_extension* Vcb, device* dev, PIRP Irp) {
7001 NTSTATUS Status;
7002 KEY searchkey;
7003 traverse_ptr tp;
7004 uint16_t statslen;
7005 uint64_t* stats;
7006
7007 searchkey.obj_id = 0;
7008 searchkey.obj_type = TYPE_DEV_STATS;
7009 searchkey.offset = dev->devitem.dev_id;
7010
7011 Status = find_item(Vcb, Vcb->dev_root, &tp, &searchkey, false, Irp);
7012 if (!NT_SUCCESS(Status)) {
7013 ERR("find_item returned %08lx\n", Status);
7014 return Status;
7015 }
7016
7017 if (!keycmp(tp.item->key, searchkey)) {
7018 Status = delete_tree_item(Vcb, &tp);
7019 if (!NT_SUCCESS(Status)) {
7020 ERR("delete_tree_item returned %08lx\n", Status);
7021 return Status;
7022 }
7023 }
7024
7025 statslen = sizeof(uint64_t) * 5;
7026 stats = ExAllocatePoolWithTag(PagedPool, statslen, ALLOC_TAG);
7027 if (!stats) {
7028 ERR("out of memory\n");
7029 return STATUS_INSUFFICIENT_RESOURCES;
7030 }
7031
7032 RtlCopyMemory(stats, dev->stats, statslen);
7033
7034 Status = insert_tree_item(Vcb, Vcb->dev_root, 0, TYPE_DEV_STATS, dev->devitem.dev_id, stats, statslen, NULL, Irp);
7035 if (!NT_SUCCESS(Status)) {
7036 ERR("insert_tree_item returned %08lx\n", Status);
7037 ExFreePool(stats);
7038 return Status;
7039 }
7040
7041 return STATUS_SUCCESS;
7042 }
7043
flush_subvol(device_extension * Vcb,root * r,PIRP Irp)7044 static NTSTATUS flush_subvol(device_extension* Vcb, root* r, PIRP Irp) {
7045 NTSTATUS Status;
7046
7047 if (r != Vcb->root_root && r != Vcb->chunk_root) {
7048 KEY searchkey;
7049 traverse_ptr tp;
7050 ROOT_ITEM* ri;
7051
7052 searchkey.obj_id = r->id;
7053 searchkey.obj_type = TYPE_ROOT_ITEM;
7054 searchkey.offset = 0xffffffffffffffff;
7055
7056 Status = find_item(Vcb, Vcb->root_root, &tp, &searchkey, false, Irp);
7057 if (!NT_SUCCESS(Status)) {
7058 ERR("error - find_item returned %08lx\n", Status);
7059 return Status;
7060 }
7061
7062 if (tp.item->key.obj_id != searchkey.obj_id || tp.item->key.obj_type != searchkey.obj_type) {
7063 ERR("could not find ROOT_ITEM for tree %I64x\n", searchkey.obj_id);
7064 return STATUS_INTERNAL_ERROR;
7065 }
7066
7067 ri = ExAllocatePoolWithTag(PagedPool, sizeof(ROOT_ITEM), ALLOC_TAG);
7068 if (!ri) {
7069 ERR("out of memory\n");
7070 return STATUS_INSUFFICIENT_RESOURCES;
7071 }
7072
7073 RtlCopyMemory(ri, &r->root_item, sizeof(ROOT_ITEM));
7074
7075 Status = delete_tree_item(Vcb, &tp);
7076 if (!NT_SUCCESS(Status)) {
7077 ERR("delete_tree_item returned %08lx\n", Status);
7078 return Status;
7079 }
7080
7081 Status = insert_tree_item(Vcb, Vcb->root_root, tp.item->key.obj_id, tp.item->key.obj_type, tp.item->key.offset, ri, sizeof(ROOT_ITEM), NULL, Irp);
7082 if (!NT_SUCCESS(Status)) {
7083 ERR("insert_tree_item returned %08lx\n", Status);
7084 return Status;
7085 }
7086 }
7087
7088 if (r->received) {
7089 KEY searchkey;
7090 traverse_ptr tp;
7091
7092 if (!Vcb->uuid_root) {
7093 root* uuid_root;
7094
7095 TRACE("uuid root doesn't exist, creating it\n");
7096
7097 Status = create_root(Vcb, BTRFS_ROOT_UUID, &uuid_root, false, 0, Irp);
7098
7099 if (!NT_SUCCESS(Status)) {
7100 ERR("create_root returned %08lx\n", Status);
7101 return Status;
7102 }
7103
7104 Vcb->uuid_root = uuid_root;
7105 }
7106
7107 RtlCopyMemory(&searchkey.obj_id, &r->root_item.received_uuid, sizeof(uint64_t));
7108 searchkey.obj_type = TYPE_SUBVOL_REC_UUID;
7109 RtlCopyMemory(&searchkey.offset, &r->root_item.received_uuid.uuid[sizeof(uint64_t)], sizeof(uint64_t));
7110
7111 Status = find_item(Vcb, Vcb->uuid_root, &tp, &searchkey, false, Irp);
7112 if (!NT_SUCCESS(Status)) {
7113 ERR("find_item returned %08lx\n", Status);
7114 return Status;
7115 }
7116
7117 if (!keycmp(tp.item->key, searchkey)) {
7118 if (tp.item->size + sizeof(uint64_t) <= Vcb->superblock.node_size - sizeof(tree_header) - sizeof(leaf_node)) {
7119 uint64_t* ids;
7120
7121 ids = ExAllocatePoolWithTag(PagedPool, tp.item->size + sizeof(uint64_t), ALLOC_TAG);
7122 if (!ids) {
7123 ERR("out of memory\n");
7124 return STATUS_INSUFFICIENT_RESOURCES;
7125 }
7126
7127 RtlCopyMemory(ids, tp.item->data, tp.item->size);
7128 RtlCopyMemory((uint8_t*)ids + tp.item->size, &r->id, sizeof(uint64_t));
7129
7130 Status = delete_tree_item(Vcb, &tp);
7131 if (!NT_SUCCESS(Status)) {
7132 ERR("delete_tree_item returned %08lx\n", Status);
7133 ExFreePool(ids);
7134 return Status;
7135 }
7136
7137 Status = insert_tree_item(Vcb, Vcb->uuid_root, searchkey.obj_id, searchkey.obj_type, searchkey.offset, ids, tp.item->size + sizeof(uint64_t), NULL, Irp);
7138 if (!NT_SUCCESS(Status)) {
7139 ERR("insert_tree_item returned %08lx\n", Status);
7140 ExFreePool(ids);
7141 return Status;
7142 }
7143 }
7144 } else {
7145 uint64_t* root_num;
7146
7147 root_num = ExAllocatePoolWithTag(PagedPool, sizeof(uint64_t), ALLOC_TAG);
7148 if (!root_num) {
7149 ERR("out of memory\n");
7150 return STATUS_INSUFFICIENT_RESOURCES;
7151 }
7152
7153 *root_num = r->id;
7154
7155 Status = insert_tree_item(Vcb, Vcb->uuid_root, searchkey.obj_id, searchkey.obj_type, searchkey.offset, root_num, sizeof(uint64_t), NULL, Irp);
7156 if (!NT_SUCCESS(Status)) {
7157 ERR("insert_tree_item returned %08lx\n", Status);
7158 ExFreePool(root_num);
7159 return Status;
7160 }
7161 }
7162
7163 r->received = false;
7164 }
7165
7166 r->dirty = false;
7167
7168 return STATUS_SUCCESS;
7169 }
7170
test_not_full(device_extension * Vcb)7171 static NTSTATUS test_not_full(device_extension* Vcb) {
7172 uint64_t reserve, could_alloc, free_space;
7173 LIST_ENTRY* le;
7174
7175 // This function ensures we drop into readonly mode if we're about to leave very little
7176 // space for metadata - this is similar to the "global reserve" of the Linux driver.
7177 // Otherwise we might completely fill our space, at which point due to COW we can't
7178 // delete anything in order to fix this.
7179
7180 reserve = Vcb->extent_root->root_item.bytes_used;
7181 reserve += Vcb->root_root->root_item.bytes_used;
7182 if (Vcb->checksum_root) reserve += Vcb->checksum_root->root_item.bytes_used;
7183
7184 reserve = max(reserve, 0x1000000); // 16 M
7185 reserve = min(reserve, 0x20000000); // 512 M
7186
7187 // Find out how much space would be available for new metadata chunks
7188
7189 could_alloc = 0;
7190
7191 if (Vcb->metadata_flags & BLOCK_FLAG_RAID5) {
7192 uint64_t s1 = 0, s2 = 0, s3 = 0;
7193
7194 le = Vcb->devices.Flink;
7195 while (le != &Vcb->devices) {
7196 device* dev = CONTAINING_RECORD(le, device, list_entry);
7197
7198 if (!dev->readonly) {
7199 uint64_t space = dev->devitem.num_bytes - dev->devitem.bytes_used;
7200
7201 if (space >= s1) {
7202 s3 = s2;
7203 s2 = s1;
7204 s1 = space;
7205 } else if (space >= s2) {
7206 s3 = s2;
7207 s2 = space;
7208 } else if (space >= s3)
7209 s3 = space;
7210 }
7211
7212 le = le->Flink;
7213 }
7214
7215 could_alloc = s3 * 2;
7216 } else if (Vcb->metadata_flags & (BLOCK_FLAG_RAID10 | BLOCK_FLAG_RAID6)) {
7217 uint64_t s1 = 0, s2 = 0, s3 = 0, s4 = 0;
7218
7219 le = Vcb->devices.Flink;
7220 while (le != &Vcb->devices) {
7221 device* dev = CONTAINING_RECORD(le, device, list_entry);
7222
7223 if (!dev->readonly) {
7224 uint64_t space = dev->devitem.num_bytes - dev->devitem.bytes_used;
7225
7226 if (space >= s1) {
7227 s4 = s3;
7228 s3 = s2;
7229 s2 = s1;
7230 s1 = space;
7231 } else if (space >= s2) {
7232 s4 = s3;
7233 s3 = s2;
7234 s2 = space;
7235 } else if (space >= s3) {
7236 s4 = s3;
7237 s3 = space;
7238 } else if (space >= s4)
7239 s4 = space;
7240 }
7241
7242 le = le->Flink;
7243 }
7244
7245 could_alloc = s4 * 2;
7246 } else if (Vcb->metadata_flags & (BLOCK_FLAG_RAID0 | BLOCK_FLAG_RAID1)) {
7247 uint64_t s1 = 0, s2 = 0;
7248
7249 le = Vcb->devices.Flink;
7250 while (le != &Vcb->devices) {
7251 device* dev = CONTAINING_RECORD(le, device, list_entry);
7252
7253 if (!dev->readonly) {
7254 uint64_t space = dev->devitem.num_bytes - dev->devitem.bytes_used;
7255
7256 if (space >= s1) {
7257 s2 = s1;
7258 s1 = space;
7259 } else if (space >= s2)
7260 s2 = space;
7261 }
7262
7263 le = le->Flink;
7264 }
7265
7266 if (Vcb->metadata_flags & BLOCK_FLAG_RAID1)
7267 could_alloc = s2;
7268 else // RAID0
7269 could_alloc = s2 * 2;
7270 } else if (Vcb->metadata_flags & BLOCK_FLAG_DUPLICATE) {
7271 le = Vcb->devices.Flink;
7272 while (le != &Vcb->devices) {
7273 device* dev = CONTAINING_RECORD(le, device, list_entry);
7274
7275 if (!dev->readonly) {
7276 uint64_t space = (dev->devitem.num_bytes - dev->devitem.bytes_used) / 2;
7277
7278 could_alloc = max(could_alloc, space);
7279 }
7280
7281 le = le->Flink;
7282 }
7283 } else if (Vcb->metadata_flags & BLOCK_FLAG_RAID1C3) {
7284 uint64_t s1 = 0, s2 = 0, s3 = 0;
7285
7286 le = Vcb->devices.Flink;
7287 while (le != &Vcb->devices) {
7288 device* dev = CONTAINING_RECORD(le, device, list_entry);
7289
7290 if (!dev->readonly) {
7291 uint64_t space = dev->devitem.num_bytes - dev->devitem.bytes_used;
7292
7293 if (space >= s1) {
7294 s3 = s2;
7295 s2 = s1;
7296 s1 = space;
7297 } else if (space >= s2) {
7298 s3 = s2;
7299 s2 = space;
7300 } else if (space >= s3)
7301 s3 = space;
7302 }
7303
7304 le = le->Flink;
7305 }
7306
7307 could_alloc = s3;
7308 } else if (Vcb->metadata_flags & BLOCK_FLAG_RAID1C4) {
7309 uint64_t s1 = 0, s2 = 0, s3 = 0, s4 = 0;
7310
7311 le = Vcb->devices.Flink;
7312 while (le != &Vcb->devices) {
7313 device* dev = CONTAINING_RECORD(le, device, list_entry);
7314
7315 if (!dev->readonly) {
7316 uint64_t space = dev->devitem.num_bytes - dev->devitem.bytes_used;
7317
7318 if (space >= s1) {
7319 s4 = s3;
7320 s3 = s2;
7321 s2 = s1;
7322 s1 = space;
7323 } else if (space >= s2) {
7324 s4 = s3;
7325 s3 = s2;
7326 s2 = space;
7327 } else if (space >= s3) {
7328 s4 = s3;
7329 s3 = space;
7330 } else if (space >= s4)
7331 s4 = space;
7332 }
7333
7334 le = le->Flink;
7335 }
7336
7337 could_alloc = s4;
7338 } else { // SINGLE
7339 le = Vcb->devices.Flink;
7340 while (le != &Vcb->devices) {
7341 device* dev = CONTAINING_RECORD(le, device, list_entry);
7342
7343 if (!dev->readonly) {
7344 uint64_t space = dev->devitem.num_bytes - dev->devitem.bytes_used;
7345
7346 could_alloc = max(could_alloc, space);
7347 }
7348
7349 le = le->Flink;
7350 }
7351 }
7352
7353 if (could_alloc >= reserve)
7354 return STATUS_SUCCESS;
7355
7356 free_space = 0;
7357
7358 le = Vcb->chunks.Flink;
7359 while (le != &Vcb->chunks) {
7360 chunk* c = CONTAINING_RECORD(le, chunk, list_entry);
7361
7362 if (!c->reloc && !c->readonly && c->chunk_item->type & BLOCK_FLAG_METADATA) {
7363 free_space += c->chunk_item->size - c->used;
7364
7365 if (free_space + could_alloc >= reserve)
7366 return STATUS_SUCCESS;
7367 }
7368
7369 le = le->Flink;
7370 }
7371
7372 return STATUS_DISK_FULL;
7373 }
7374
check_for_orphans_root(device_extension * Vcb,root * r,PIRP Irp)7375 static NTSTATUS check_for_orphans_root(device_extension* Vcb, root* r, PIRP Irp) {
7376 NTSTATUS Status;
7377 KEY searchkey;
7378 traverse_ptr tp;
7379 LIST_ENTRY rollback;
7380
7381 TRACE("(%p, %p)\n", Vcb, r);
7382
7383 InitializeListHead(&rollback);
7384
7385 searchkey.obj_id = BTRFS_ORPHAN_INODE_OBJID;
7386 searchkey.obj_type = TYPE_ORPHAN_INODE;
7387 searchkey.offset = 0;
7388
7389 Status = find_item(Vcb, r, &tp, &searchkey, false, Irp);
7390 if (!NT_SUCCESS(Status)) {
7391 ERR("find_item returned %08lx\n", Status);
7392 return Status;
7393 }
7394
7395 do {
7396 traverse_ptr next_tp;
7397
7398 if (tp.item->key.obj_id > searchkey.obj_id || (tp.item->key.obj_id == searchkey.obj_id && tp.item->key.obj_type > searchkey.obj_type))
7399 break;
7400
7401 if (tp.item->key.obj_id == searchkey.obj_id && tp.item->key.obj_type == searchkey.obj_type) {
7402 fcb* fcb;
7403
7404 TRACE("removing orphaned inode %I64x\n", tp.item->key.offset);
7405
7406 Status = open_fcb(Vcb, r, tp.item->key.offset, 0, NULL, false, NULL, &fcb, PagedPool, Irp);
7407 if (!NT_SUCCESS(Status))
7408 ERR("open_fcb returned %08lx\n", Status);
7409 else {
7410 if (fcb->inode_item.st_nlink == 0) {
7411 if (fcb->type != BTRFS_TYPE_DIRECTORY && fcb->inode_item.st_size > 0) {
7412 Status = excise_extents(Vcb, fcb, 0, sector_align(fcb->inode_item.st_size, Vcb->superblock.sector_size), Irp, &rollback);
7413 if (!NT_SUCCESS(Status)) {
7414 ERR("excise_extents returned %08lx\n", Status);
7415 goto end;
7416 }
7417 }
7418
7419 fcb->deleted = true;
7420
7421 mark_fcb_dirty(fcb);
7422 }
7423
7424 free_fcb(fcb);
7425
7426 Status = delete_tree_item(Vcb, &tp);
7427 if (!NT_SUCCESS(Status)) {
7428 ERR("delete_tree_item returned %08lx\n", Status);
7429 goto end;
7430 }
7431 }
7432 }
7433
7434 if (find_next_item(Vcb, &tp, &next_tp, false, Irp))
7435 tp = next_tp;
7436 else
7437 break;
7438 } while (true);
7439
7440 Status = STATUS_SUCCESS;
7441
7442 clear_rollback(&rollback);
7443
7444 end:
7445 do_rollback(Vcb, &rollback);
7446
7447 return Status;
7448 }
7449
check_for_orphans(device_extension * Vcb,PIRP Irp)7450 static NTSTATUS check_for_orphans(device_extension* Vcb, PIRP Irp) {
7451 NTSTATUS Status;
7452 LIST_ENTRY* le;
7453
7454 if (IsListEmpty(&Vcb->dirty_filerefs))
7455 return STATUS_SUCCESS;
7456
7457 le = Vcb->dirty_filerefs.Flink;
7458 while (le != &Vcb->dirty_filerefs) {
7459 file_ref* fr = CONTAINING_RECORD(le, file_ref, list_entry_dirty);
7460
7461 if (!fr->fcb->subvol->checked_for_orphans) {
7462 Status = check_for_orphans_root(Vcb, fr->fcb->subvol, Irp);
7463 if (!NT_SUCCESS(Status)) {
7464 ERR("check_for_orphans_root returned %08lx\n", Status);
7465 return Status;
7466 }
7467
7468 fr->fcb->subvol->checked_for_orphans = true;
7469 }
7470
7471 le = le->Flink;
7472 }
7473
7474 return STATUS_SUCCESS;
7475 }
7476
do_write2(device_extension * Vcb,PIRP Irp,LIST_ENTRY * rollback)7477 static NTSTATUS do_write2(device_extension* Vcb, PIRP Irp, LIST_ENTRY* rollback) {
7478 NTSTATUS Status;
7479 LIST_ENTRY *le, batchlist;
7480 bool cache_changed = false;
7481 volume_device_extension* vde;
7482 bool no_cache = false;
7483 #ifdef DEBUG_FLUSH_TIMES
7484 uint64_t filerefs = 0, fcbs = 0;
7485 LARGE_INTEGER freq, time1, time2;
7486 #endif
7487 #ifdef DEBUG_WRITE_LOOPS
7488 UINT loops = 0;
7489 #endif
7490
7491 TRACE("(%p)\n", Vcb);
7492
7493 InitializeListHead(&batchlist);
7494
7495 #ifdef DEBUG_FLUSH_TIMES
7496 time1 = KeQueryPerformanceCounter(&freq);
7497 #endif
7498
7499 Status = check_for_orphans(Vcb, Irp);
7500 if (!NT_SUCCESS(Status)) {
7501 ERR("check_for_orphans returned %08lx\n", Status);
7502 return Status;
7503 }
7504
7505 ExAcquireResourceExclusiveLite(&Vcb->dirty_filerefs_lock, true);
7506
7507 while (!IsListEmpty(&Vcb->dirty_filerefs)) {
7508 file_ref* fr = CONTAINING_RECORD(RemoveHeadList(&Vcb->dirty_filerefs), file_ref, list_entry_dirty);
7509
7510 flush_fileref(fr, &batchlist, Irp);
7511 free_fileref(fr);
7512
7513 #ifdef DEBUG_FLUSH_TIMES
7514 filerefs++;
7515 #endif
7516 }
7517
7518 ExReleaseResourceLite(&Vcb->dirty_filerefs_lock);
7519
7520 Status = commit_batch_list(Vcb, &batchlist, Irp);
7521 if (!NT_SUCCESS(Status)) {
7522 ERR("commit_batch_list returned %08lx\n", Status);
7523 return Status;
7524 }
7525
7526 #ifdef DEBUG_FLUSH_TIMES
7527 time2 = KeQueryPerformanceCounter(NULL);
7528
7529 ERR("flushed %I64u filerefs in %I64u (freq = %I64u)\n", filerefs, time2.QuadPart - time1.QuadPart, freq.QuadPart);
7530
7531 time1 = KeQueryPerformanceCounter(&freq);
7532 #endif
7533
7534 // We process deleted streams first, so we don't run over our xattr
7535 // limit unless we absolutely have to.
7536 // We also process deleted normal files, to avoid any problems
7537 // caused by inode collisions.
7538
7539 ExAcquireResourceExclusiveLite(&Vcb->dirty_fcbs_lock, true);
7540
7541 le = Vcb->dirty_fcbs.Flink;
7542 while (le != &Vcb->dirty_fcbs) {
7543 fcb* fcb = CONTAINING_RECORD(le, struct _fcb, list_entry_dirty);
7544 LIST_ENTRY* le2 = le->Flink;
7545
7546 if (fcb->deleted) {
7547 ExAcquireResourceExclusiveLite(fcb->Header.Resource, true);
7548 Status = flush_fcb(fcb, false, &batchlist, Irp);
7549 ExReleaseResourceLite(fcb->Header.Resource);
7550
7551 free_fcb(fcb);
7552
7553 if (!NT_SUCCESS(Status)) {
7554 ERR("flush_fcb returned %08lx\n", Status);
7555 clear_batch_list(Vcb, &batchlist);
7556 ExReleaseResourceLite(&Vcb->dirty_fcbs_lock);
7557 return Status;
7558 }
7559
7560 #ifdef DEBUG_FLUSH_TIMES
7561 fcbs++;
7562 #endif
7563 }
7564
7565 le = le2;
7566 }
7567
7568 Status = commit_batch_list(Vcb, &batchlist, Irp);
7569 if (!NT_SUCCESS(Status)) {
7570 ERR("commit_batch_list returned %08lx\n", Status);
7571 ExReleaseResourceLite(&Vcb->dirty_fcbs_lock);
7572 return Status;
7573 }
7574
7575 le = Vcb->dirty_fcbs.Flink;
7576 while (le != &Vcb->dirty_fcbs) {
7577 fcb* fcb = CONTAINING_RECORD(le, struct _fcb, list_entry_dirty);
7578 LIST_ENTRY* le2 = le->Flink;
7579
7580 if (fcb->subvol != Vcb->root_root) {
7581 ExAcquireResourceExclusiveLite(fcb->Header.Resource, true);
7582 Status = flush_fcb(fcb, false, &batchlist, Irp);
7583 ExReleaseResourceLite(fcb->Header.Resource);
7584 free_fcb(fcb);
7585
7586 if (!NT_SUCCESS(Status)) {
7587 ERR("flush_fcb returned %08lx\n", Status);
7588 ExReleaseResourceLite(&Vcb->dirty_fcbs_lock);
7589 return Status;
7590 }
7591
7592 #ifdef DEBUG_FLUSH_TIMES
7593 fcbs++;
7594 #endif
7595 }
7596
7597 le = le2;
7598 }
7599
7600 ExReleaseResourceLite(&Vcb->dirty_fcbs_lock);
7601
7602 Status = commit_batch_list(Vcb, &batchlist, Irp);
7603 if (!NT_SUCCESS(Status)) {
7604 ERR("commit_batch_list returned %08lx\n", Status);
7605 return Status;
7606 }
7607
7608 #ifdef DEBUG_FLUSH_TIMES
7609 time2 = KeQueryPerformanceCounter(NULL);
7610
7611 ERR("flushed %I64u fcbs in %I64u (freq = %I64u)\n", filerefs, time2.QuadPart - time1.QuadPart, freq.QuadPart);
7612 #endif
7613
7614 // no need to get dirty_subvols_lock here, as we have tree_lock exclusively
7615 while (!IsListEmpty(&Vcb->dirty_subvols)) {
7616 root* r = CONTAINING_RECORD(RemoveHeadList(&Vcb->dirty_subvols), root, list_entry_dirty);
7617
7618 Status = flush_subvol(Vcb, r, Irp);
7619 if (!NT_SUCCESS(Status)) {
7620 ERR("flush_subvol returned %08lx\n", Status);
7621 return Status;
7622 }
7623 }
7624
7625 if (!IsListEmpty(&Vcb->drop_roots)) {
7626 Status = drop_roots(Vcb, Irp, rollback);
7627
7628 if (!NT_SUCCESS(Status)) {
7629 ERR("drop_roots returned %08lx\n", Status);
7630 return Status;
7631 }
7632 }
7633
7634 Status = update_chunks(Vcb, &batchlist, Irp, rollback);
7635
7636 if (!NT_SUCCESS(Status)) {
7637 ERR("update_chunks returned %08lx\n", Status);
7638 return Status;
7639 }
7640
7641 Status = commit_batch_list(Vcb, &batchlist, Irp);
7642
7643 // If only changing superblock, e.g. changing label, we still need to rewrite
7644 // the root tree so the generations match, otherwise you won't be able to mount on Linux.
7645 if (!Vcb->root_root->treeholder.tree || !Vcb->root_root->treeholder.tree->write) {
7646 KEY searchkey;
7647
7648 traverse_ptr tp;
7649
7650 searchkey.obj_id = 0;
7651 searchkey.obj_type = 0;
7652 searchkey.offset = 0;
7653
7654 Status = find_item(Vcb, Vcb->root_root, &tp, &searchkey, false, Irp);
7655 if (!NT_SUCCESS(Status)) {
7656 ERR("error - find_item returned %08lx\n", Status);
7657 return Status;
7658 }
7659
7660 Vcb->root_root->treeholder.tree->write = true;
7661 }
7662
7663 // make sure we always update the extent tree
7664 Status = add_root_item_to_cache(Vcb, BTRFS_ROOT_EXTENT, Irp);
7665 if (!NT_SUCCESS(Status)) {
7666 ERR("add_root_item_to_cache returned %08lx\n", Status);
7667 return Status;
7668 }
7669
7670 if (Vcb->stats_changed) {
7671 le = Vcb->devices.Flink;
7672 while (le != &Vcb->devices) {
7673 device* dev = CONTAINING_RECORD(le, device, list_entry);
7674
7675 if (dev->stats_changed) {
7676 Status = flush_changed_dev_stats(Vcb, dev, Irp);
7677 if (!NT_SUCCESS(Status)) {
7678 ERR("flush_changed_dev_stats returned %08lx\n", Status);
7679 return Status;
7680 }
7681 dev->stats_changed = false;
7682 }
7683
7684 le = le->Flink;
7685 }
7686
7687 Vcb->stats_changed = false;
7688 }
7689
7690 do {
7691 Status = add_parents(Vcb, Irp);
7692 if (!NT_SUCCESS(Status)) {
7693 ERR("add_parents returned %08lx\n", Status);
7694 goto end;
7695 }
7696
7697 Status = allocate_tree_extents(Vcb, Irp, rollback);
7698 if (!NT_SUCCESS(Status)) {
7699 ERR("allocate_tree_extents returned %08lx\n", Status);
7700 goto end;
7701 }
7702
7703 Status = do_splits(Vcb, Irp, rollback);
7704 if (!NT_SUCCESS(Status)) {
7705 ERR("do_splits returned %08lx\n", Status);
7706 goto end;
7707 }
7708
7709 Status = update_chunk_usage(Vcb, Irp, rollback);
7710 if (!NT_SUCCESS(Status)) {
7711 ERR("update_chunk_usage returned %08lx\n", Status);
7712 goto end;
7713 }
7714
7715 if (!(Vcb->superblock.compat_ro_flags & BTRFS_COMPAT_RO_FLAGS_FREE_SPACE_CACHE)) {
7716 if (!no_cache) {
7717 Status = allocate_cache(Vcb, &cache_changed, Irp, rollback);
7718 if (!NT_SUCCESS(Status)) {
7719 WARN("allocate_cache returned %08lx\n", Status);
7720 no_cache = true;
7721 cache_changed = false;
7722 }
7723 }
7724 } else {
7725 Status = update_chunk_caches_tree(Vcb, Irp);
7726 if (!NT_SUCCESS(Status)) {
7727 ERR("update_chunk_caches_tree returned %08lx\n", Status);
7728 goto end;
7729 }
7730 }
7731
7732 #ifdef DEBUG_WRITE_LOOPS
7733 loops++;
7734
7735 if (cache_changed)
7736 ERR("cache has changed, looping again\n");
7737 #endif
7738 } while (cache_changed || !trees_consistent(Vcb));
7739
7740 #ifdef DEBUG_WRITE_LOOPS
7741 ERR("%u loops\n", loops);
7742 #endif
7743
7744 TRACE("trees consistent\n");
7745
7746 Status = update_root_root(Vcb, no_cache, Irp, rollback);
7747 if (!NT_SUCCESS(Status)) {
7748 ERR("update_root_root returned %08lx\n", Status);
7749 goto end;
7750 }
7751
7752 Status = write_trees(Vcb, Irp);
7753 if (!NT_SUCCESS(Status)) {
7754 ERR("write_trees returned %08lx\n", Status);
7755 goto end;
7756 }
7757
7758 Status = test_not_full(Vcb);
7759 if (!NT_SUCCESS(Status)) {
7760 ERR("test_not_full returned %08lx\n", Status);
7761 goto end;
7762 }
7763
7764 #ifdef DEBUG_PARANOID
7765 le = Vcb->trees.Flink;
7766 while (le != &Vcb->trees) {
7767 tree* t = CONTAINING_RECORD(le, tree, list_entry);
7768 KEY searchkey;
7769 traverse_ptr tp;
7770
7771 searchkey.obj_id = t->header.address;
7772 searchkey.obj_type = TYPE_METADATA_ITEM;
7773 searchkey.offset = 0xffffffffffffffff;
7774
7775 Status = find_item(Vcb, Vcb->extent_root, &tp, &searchkey, false, Irp);
7776 if (!NT_SUCCESS(Status)) {
7777 ERR("error - find_item returned %08lx\n", Status);
7778 goto end;
7779 }
7780
7781 if (tp.item->key.obj_id != searchkey.obj_id || tp.item->key.obj_type != searchkey.obj_type) {
7782 searchkey.obj_id = t->header.address;
7783 searchkey.obj_type = TYPE_EXTENT_ITEM;
7784 searchkey.offset = 0xffffffffffffffff;
7785
7786 Status = find_item(Vcb, Vcb->extent_root, &tp, &searchkey, false, Irp);
7787 if (!NT_SUCCESS(Status)) {
7788 ERR("error - find_item returned %08lx\n", Status);
7789 goto end;
7790 }
7791
7792 if (tp.item->key.obj_id != searchkey.obj_id || tp.item->key.obj_type != searchkey.obj_type) {
7793 ERR("error - could not find entry in extent tree for tree at %I64x\n", t->header.address);
7794 Status = STATUS_INTERNAL_ERROR;
7795 goto end;
7796 }
7797 }
7798
7799 le = le->Flink;
7800 }
7801 #endif
7802
7803 Vcb->superblock.cache_generation = Vcb->superblock.generation;
7804
7805 if (!Vcb->options.no_barrier)
7806 flush_disk_caches(Vcb);
7807
7808 Status = write_superblocks(Vcb, Irp);
7809 if (!NT_SUCCESS(Status)) {
7810 ERR("write_superblocks returned %08lx\n", Status);
7811 goto end;
7812 }
7813
7814 vde = Vcb->vde;
7815
7816 if (vde) {
7817 pdo_device_extension* pdode = vde->pdode;
7818
7819 ExAcquireResourceSharedLite(&pdode->child_lock, true);
7820
7821 le = pdode->children.Flink;
7822
7823 while (le != &pdode->children) {
7824 volume_child* vc = CONTAINING_RECORD(le, volume_child, list_entry);
7825
7826 vc->generation = Vcb->superblock.generation;
7827 le = le->Flink;
7828 }
7829
7830 ExReleaseResourceLite(&pdode->child_lock);
7831 }
7832
7833 clean_space_cache(Vcb);
7834
7835 le = Vcb->chunks.Flink;
7836 while (le != &Vcb->chunks) {
7837 chunk* c = CONTAINING_RECORD(le, chunk, list_entry);
7838
7839 c->changed = false;
7840 c->space_changed = false;
7841
7842 le = le->Flink;
7843 }
7844
7845 Vcb->superblock.generation++;
7846
7847 Status = STATUS_SUCCESS;
7848
7849 le = Vcb->trees.Flink;
7850 while (le != &Vcb->trees) {
7851 tree* t = CONTAINING_RECORD(le, tree, list_entry);
7852
7853 t->write = false;
7854
7855 le = le->Flink;
7856 }
7857
7858 Vcb->need_write = false;
7859
7860 while (!IsListEmpty(&Vcb->drop_roots)) {
7861 root* r = CONTAINING_RECORD(RemoveHeadList(&Vcb->drop_roots), root, list_entry);
7862
7863 if (IsListEmpty(&r->fcbs)) {
7864 ExDeleteResourceLite(&r->nonpaged->load_tree_lock);
7865 ExFreePool(r->nonpaged);
7866 ExFreePool(r);
7867 } else
7868 r->dropped = true;
7869 }
7870
7871 end:
7872 TRACE("do_write returning %08lx\n", Status);
7873
7874 return Status;
7875 }
7876
do_write(device_extension * Vcb,PIRP Irp)7877 NTSTATUS do_write(device_extension* Vcb, PIRP Irp) {
7878 LIST_ENTRY rollback;
7879 NTSTATUS Status;
7880
7881 InitializeListHead(&rollback);
7882
7883 Status = do_write2(Vcb, Irp, &rollback);
7884
7885 if (!NT_SUCCESS(Status)) {
7886 ERR("do_write2 returned %08lx, dropping into readonly mode\n", Status);
7887 Vcb->readonly = true;
7888 FsRtlNotifyVolumeEvent(Vcb->root_file, FSRTL_VOLUME_FORCED_CLOSED);
7889 do_rollback(Vcb, &rollback);
7890 } else
7891 clear_rollback(&rollback);
7892
7893 return Status;
7894 }
7895
do_flush(device_extension * Vcb)7896 static void do_flush(device_extension* Vcb) {
7897 NTSTATUS Status;
7898
7899 ExAcquireResourceExclusiveLite(&Vcb->tree_lock, true);
7900
7901 if (Vcb->need_write && !Vcb->readonly)
7902 Status = do_write(Vcb, NULL);
7903 else
7904 Status = STATUS_SUCCESS;
7905
7906 free_trees(Vcb);
7907
7908 if (!NT_SUCCESS(Status))
7909 ERR("do_write returned %08lx\n", Status);
7910
7911 ExReleaseResourceLite(&Vcb->tree_lock);
7912 }
7913
_Function_class_(KSTART_ROUTINE)7914 _Function_class_(KSTART_ROUTINE)
7915 void __stdcall flush_thread(void* context) {
7916 DEVICE_OBJECT* devobj = context;
7917 device_extension* Vcb = devobj->DeviceExtension;
7918 LARGE_INTEGER due_time;
7919
7920 ObReferenceObject(devobj);
7921
7922 KeInitializeTimer(&Vcb->flush_thread_timer);
7923
7924 due_time.QuadPart = (uint64_t)Vcb->options.flush_interval * -10000000;
7925
7926 KeSetTimer(&Vcb->flush_thread_timer, due_time, NULL);
7927
7928 while (true) {
7929 KeWaitForSingleObject(&Vcb->flush_thread_timer, Executive, KernelMode, false, NULL);
7930
7931 if (!(devobj->Vpb->Flags & VPB_MOUNTED) || Vcb->removing)
7932 break;
7933
7934 if (!Vcb->locked)
7935 do_flush(Vcb);
7936
7937 KeSetTimer(&Vcb->flush_thread_timer, due_time, NULL);
7938 }
7939
7940 ObDereferenceObject(devobj);
7941 KeCancelTimer(&Vcb->flush_thread_timer);
7942
7943 KeSetEvent(&Vcb->flush_thread_finished, 0, false);
7944
7945 PsTerminateSystemThread(STATUS_SUCCESS);
7946 }
7947