1 /* Copyright (c) Mark Harmstone 2016-17 2 * 3 * This file is part of WinBtrfs. 4 * 5 * WinBtrfs is free software: you can redistribute it and/or modify 6 * it under the terms of the GNU Lesser General Public Licence as published by 7 * the Free Software Foundation, either version 3 of the Licence, or 8 * (at your option) any later version. 9 * 10 * WinBtrfs is distributed in the hope that it will be useful, 11 * but WITHOUT ANY WARRANTY; without even the implied warranty of 12 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 13 * GNU Lesser General Public Licence for more details. 14 * 15 * You should have received a copy of the GNU Lesser General Public Licence 16 * along with WinBtrfs. If not, see <http://www.gnu.org/licenses/>. */ 17 18 #include "btrfs_drv.h" 19 #include "xxhash.h" 20 #include "crc32c.h" 21 #include <ata.h> 22 #include <ntddscsi.h> 23 #include <ntddstor.h> 24 25 /* cf. __MAX_CSUM_ITEMS in Linux - it needs sizeof(leaf_node) bytes free 26 * so it can do a split. Linux tries to get it so a run will fit in a 27 * sector, but the MAX_CSUM_ITEMS logic is wrong... */ 28 #define MAX_CSUM_SIZE (4096 - sizeof(tree_header) - (2 * sizeof(leaf_node))) 29 30 // #define DEBUG_WRITE_LOOPS 31 32 #define BATCH_ITEM_LIMIT 1000 33 34 typedef struct { 35 KEVENT Event; 36 IO_STATUS_BLOCK iosb; 37 } write_context; 38 39 typedef struct { 40 EXTENT_ITEM_TREE eit; 41 uint8_t type; 42 TREE_BLOCK_REF tbr; 43 } EXTENT_ITEM_TREE2; 44 45 typedef struct { 46 EXTENT_ITEM ei; 47 uint8_t type; 48 TREE_BLOCK_REF tbr; 49 } EXTENT_ITEM_SKINNY_METADATA; 50 51 static NTSTATUS create_chunk(device_extension* Vcb, chunk* c, PIRP Irp); 52 static NTSTATUS update_tree_extents(device_extension* Vcb, tree* t, PIRP Irp, LIST_ENTRY* rollback); 53 54 static NTSTATUS insert_tree_item_batch(LIST_ENTRY* batchlist, device_extension* Vcb, root* r, uint64_t objid, 55 uint8_t objtype, uint64_t offset, _In_opt_ _When_(return >= 0, __drv_aliasesMem) void* data, 56 uint16_t datalen, enum batch_operation operation); 57 58 _Function_class_(IO_COMPLETION_ROUTINE) 59 static NTSTATUS __stdcall write_completion(PDEVICE_OBJECT DeviceObject, PIRP Irp, PVOID conptr) { 60 write_context* context = conptr; 61 62 UNUSED(DeviceObject); 63 64 context->iosb = Irp->IoStatus; 65 KeSetEvent(&context->Event, 0, false); 66 67 return STATUS_MORE_PROCESSING_REQUIRED; 68 } 69 70 NTSTATUS write_data_phys(_In_ PDEVICE_OBJECT device, _In_ PFILE_OBJECT fileobj, _In_ uint64_t address, 71 _In_reads_bytes_(length) void* data, _In_ uint32_t length) { 72 NTSTATUS Status; 73 LARGE_INTEGER offset; 74 PIRP Irp; 75 PIO_STACK_LOCATION IrpSp; 76 write_context context; 77 78 TRACE("(%p, %I64x, %p, %x)\n", device, address, data, length); 79 80 RtlZeroMemory(&context, sizeof(write_context)); 81 82 KeInitializeEvent(&context.Event, NotificationEvent, false); 83 84 offset.QuadPart = address; 85 86 Irp = IoAllocateIrp(device->StackSize, false); 87 88 if (!Irp) { 89 ERR("IoAllocateIrp failed\n"); 90 return STATUS_INSUFFICIENT_RESOURCES; 91 } 92 93 IrpSp = IoGetNextIrpStackLocation(Irp); 94 IrpSp->MajorFunction = IRP_MJ_WRITE; 95 IrpSp->FileObject = fileobj; 96 97 if (device->Flags & DO_BUFFERED_IO) { 98 Irp->AssociatedIrp.SystemBuffer = data; 99 100 Irp->Flags = IRP_BUFFERED_IO; 101 } else if (device->Flags & DO_DIRECT_IO) { 102 Irp->MdlAddress = IoAllocateMdl(data, length, false, false, NULL); 103 if (!Irp->MdlAddress) { 104 DbgPrint("IoAllocateMdl failed\n"); 105 Status = STATUS_INSUFFICIENT_RESOURCES; 106 goto exit; 107 } 108 109 Status = STATUS_SUCCESS; 110 111 _SEH2_TRY { 112 MmProbeAndLockPages(Irp->MdlAddress, KernelMode, IoReadAccess); 113 } _SEH2_EXCEPT (EXCEPTION_EXECUTE_HANDLER) { 114 Status = _SEH2_GetExceptionCode(); 115 } _SEH2_END; 116 117 if (!NT_SUCCESS(Status)) { 118 ERR("MmProbeAndLockPages threw exception %08lx\n", Status); 119 IoFreeMdl(Irp->MdlAddress); 120 goto exit; 121 } 122 } else { 123 Irp->UserBuffer = data; 124 } 125 126 IrpSp->Parameters.Write.Length = length; 127 IrpSp->Parameters.Write.ByteOffset = offset; 128 129 Irp->UserIosb = &context.iosb; 130 131 Irp->UserEvent = &context.Event; 132 133 IoSetCompletionRoutine(Irp, write_completion, &context, true, true, true); 134 135 Status = IoCallDriver(device, Irp); 136 137 if (Status == STATUS_PENDING) { 138 KeWaitForSingleObject(&context.Event, Executive, KernelMode, false, NULL); 139 Status = context.iosb.Status; 140 } 141 142 if (!NT_SUCCESS(Status)) { 143 ERR("IoCallDriver returned %08lx\n", Status); 144 } 145 146 if (device->Flags & DO_DIRECT_IO) { 147 MmUnlockPages(Irp->MdlAddress); 148 IoFreeMdl(Irp->MdlAddress); 149 } 150 151 exit: 152 IoFreeIrp(Irp); 153 154 return Status; 155 } 156 157 static void add_trim_entry(device* dev, uint64_t address, uint64_t size) { 158 space* s = ExAllocatePoolWithTag(PagedPool, sizeof(space), ALLOC_TAG); 159 if (!s) { 160 ERR("out of memory\n"); 161 return; 162 } 163 164 s->address = address; 165 s->size = size; 166 dev->num_trim_entries++; 167 168 InsertTailList(&dev->trim_list, &s->list_entry); 169 } 170 171 static void clean_space_cache_chunk(device_extension* Vcb, chunk* c) { 172 LIST_ENTRY* le; 173 ULONG type; 174 175 if (c->chunk_item->type & BLOCK_FLAG_DUPLICATE) 176 type = BLOCK_FLAG_DUPLICATE; 177 else if (c->chunk_item->type & BLOCK_FLAG_RAID0) 178 type = BLOCK_FLAG_RAID0; 179 else if (c->chunk_item->type & BLOCK_FLAG_RAID1) 180 type = BLOCK_FLAG_DUPLICATE; 181 else if (c->chunk_item->type & BLOCK_FLAG_RAID10) 182 type = BLOCK_FLAG_RAID10; 183 else if (c->chunk_item->type & BLOCK_FLAG_RAID5) 184 type = BLOCK_FLAG_RAID5; 185 else if (c->chunk_item->type & BLOCK_FLAG_RAID6) 186 type = BLOCK_FLAG_RAID6; 187 else if (c->chunk_item->type & BLOCK_FLAG_RAID1C3) 188 type = BLOCK_FLAG_DUPLICATE; 189 else if (c->chunk_item->type & BLOCK_FLAG_RAID1C4) 190 type = BLOCK_FLAG_DUPLICATE; 191 else // SINGLE 192 type = BLOCK_FLAG_DUPLICATE; 193 194 le = c->deleting.Flink; 195 while (le != &c->deleting) { 196 space* s = CONTAINING_RECORD(le, space, list_entry); 197 198 if (!Vcb->options.no_barrier || !(c->chunk_item->type & BLOCK_FLAG_METADATA)) { 199 CHUNK_ITEM_STRIPE* cis = (CHUNK_ITEM_STRIPE*)&c->chunk_item[1]; 200 201 if (type == BLOCK_FLAG_DUPLICATE) { 202 uint16_t i; 203 204 for (i = 0; i < c->chunk_item->num_stripes; i++) { 205 if (c->devices[i] && c->devices[i]->devobj && !c->devices[i]->readonly && c->devices[i]->trim) 206 add_trim_entry(c->devices[i], s->address - c->offset + cis[i].offset, s->size); 207 } 208 } else if (type == BLOCK_FLAG_RAID0) { 209 uint64_t startoff, endoff; 210 uint16_t startoffstripe, endoffstripe, i; 211 212 get_raid0_offset(s->address - c->offset, c->chunk_item->stripe_length, c->chunk_item->num_stripes, &startoff, &startoffstripe); 213 get_raid0_offset(s->address - c->offset + s->size - 1, c->chunk_item->stripe_length, c->chunk_item->num_stripes, &endoff, &endoffstripe); 214 215 for (i = 0; i < c->chunk_item->num_stripes; i++) { 216 if (c->devices[i] && c->devices[i]->devobj && !c->devices[i]->readonly && c->devices[i]->trim) { 217 uint64_t stripestart, stripeend; 218 219 if (startoffstripe > i) 220 stripestart = startoff - (startoff % c->chunk_item->stripe_length) + c->chunk_item->stripe_length; 221 else if (startoffstripe == i) 222 stripestart = startoff; 223 else 224 stripestart = startoff - (startoff % c->chunk_item->stripe_length); 225 226 if (endoffstripe > i) 227 stripeend = endoff - (endoff % c->chunk_item->stripe_length) + c->chunk_item->stripe_length; 228 else if (endoffstripe == i) 229 stripeend = endoff + 1; 230 else 231 stripeend = endoff - (endoff % c->chunk_item->stripe_length); 232 233 if (stripestart != stripeend) 234 add_trim_entry(c->devices[i], stripestart + cis[i].offset, stripeend - stripestart); 235 } 236 } 237 } else if (type == BLOCK_FLAG_RAID10) { 238 uint64_t startoff, endoff; 239 uint16_t sub_stripes, startoffstripe, endoffstripe, i; 240 241 sub_stripes = max(1, c->chunk_item->sub_stripes); 242 243 get_raid0_offset(s->address - c->offset, c->chunk_item->stripe_length, c->chunk_item->num_stripes / sub_stripes, &startoff, &startoffstripe); 244 get_raid0_offset(s->address - c->offset + s->size - 1, c->chunk_item->stripe_length, c->chunk_item->num_stripes / sub_stripes, &endoff, &endoffstripe); 245 246 startoffstripe *= sub_stripes; 247 endoffstripe *= sub_stripes; 248 249 for (i = 0; i < c->chunk_item->num_stripes; i += sub_stripes) { 250 ULONG j; 251 uint64_t stripestart, stripeend; 252 253 if (startoffstripe > i) 254 stripestart = startoff - (startoff % c->chunk_item->stripe_length) + c->chunk_item->stripe_length; 255 else if (startoffstripe == i) 256 stripestart = startoff; 257 else 258 stripestart = startoff - (startoff % c->chunk_item->stripe_length); 259 260 if (endoffstripe > i) 261 stripeend = endoff - (endoff % c->chunk_item->stripe_length) + c->chunk_item->stripe_length; 262 else if (endoffstripe == i) 263 stripeend = endoff + 1; 264 else 265 stripeend = endoff - (endoff % c->chunk_item->stripe_length); 266 267 if (stripestart != stripeend) { 268 for (j = 0; j < sub_stripes; j++) { 269 if (c->devices[i+j] && c->devices[i+j]->devobj && !c->devices[i+j]->readonly && c->devices[i+j]->trim) 270 add_trim_entry(c->devices[i+j], stripestart + cis[i+j].offset, stripeend - stripestart); 271 } 272 } 273 } 274 } 275 // FIXME - RAID5(?), RAID6(?) 276 } 277 278 le = le->Flink; 279 } 280 } 281 282 typedef struct { 283 DEVICE_MANAGE_DATA_SET_ATTRIBUTES* dmdsa; 284 ATA_PASS_THROUGH_EX apte; 285 PIRP Irp; 286 IO_STATUS_BLOCK iosb; 287 #ifdef DEBUG_TRIM_EMULATION 288 PMDL mdl; 289 void* buf; 290 #endif 291 } ioctl_context_stripe; 292 293 typedef struct { 294 KEVENT Event; 295 LONG left; 296 ioctl_context_stripe* stripes; 297 } ioctl_context; 298 299 _Function_class_(IO_COMPLETION_ROUTINE) 300 static NTSTATUS __stdcall ioctl_completion(PDEVICE_OBJECT DeviceObject, PIRP Irp, PVOID conptr) { 301 ioctl_context* context = (ioctl_context*)conptr; 302 LONG left2 = InterlockedDecrement(&context->left); 303 304 UNUSED(DeviceObject); 305 UNUSED(Irp); 306 307 if (left2 == 0) 308 KeSetEvent(&context->Event, 0, false); 309 310 return STATUS_MORE_PROCESSING_REQUIRED; 311 } 312 313 #ifdef DEBUG_TRIM_EMULATION 314 static void trim_emulation(device* dev) { 315 LIST_ENTRY* le; 316 ioctl_context context; 317 unsigned int i = 0, count = 0; 318 319 le = dev->trim_list.Flink; 320 while (le != &dev->trim_list) { 321 count++; 322 le = le->Flink; 323 } 324 325 context.left = count; 326 327 KeInitializeEvent(&context.Event, NotificationEvent, false); 328 329 context.stripes = ExAllocatePoolWithTag(NonPagedPool, sizeof(ioctl_context_stripe) * context.left, ALLOC_TAG); 330 if (!context.stripes) { 331 ERR("out of memory\n"); 332 return; 333 } 334 335 RtlZeroMemory(context.stripes, sizeof(ioctl_context_stripe) * context.left); 336 337 i = 0; 338 le = dev->trim_list.Flink; 339 while (le != &dev->trim_list) { 340 ioctl_context_stripe* stripe = &context.stripes[i]; 341 space* s = CONTAINING_RECORD(le, space, list_entry); 342 343 WARN("(%I64x, %I64x)\n", s->address, s->size); 344 345 stripe->Irp = IoAllocateIrp(dev->devobj->StackSize, false); 346 347 if (!stripe->Irp) { 348 ERR("IoAllocateIrp failed\n"); 349 } else { 350 PIO_STACK_LOCATION IrpSp = IoGetNextIrpStackLocation(stripe->Irp); 351 IrpSp->MajorFunction = IRP_MJ_WRITE; 352 IrpSp->FileObject = dev->fileobj; 353 354 stripe->buf = ExAllocatePoolWithTag(NonPagedPool, (uint32_t)s->size, ALLOC_TAG); 355 356 if (!stripe->buf) { 357 ERR("out of memory\n"); 358 } else { 359 RtlZeroMemory(stripe->buf, (uint32_t)s->size); // FIXME - randomize instead? 360 361 stripe->mdl = IoAllocateMdl(stripe->buf, (uint32_t)s->size, false, false, NULL); 362 363 if (!stripe->mdl) { 364 ERR("IoAllocateMdl failed\n"); 365 } else { 366 MmBuildMdlForNonPagedPool(stripe->mdl); 367 368 stripe->Irp->MdlAddress = stripe->mdl; 369 370 IrpSp->Parameters.Write.ByteOffset.QuadPart = s->address; 371 IrpSp->Parameters.Write.Length = s->size; 372 373 stripe->Irp->UserIosb = &stripe->iosb; 374 375 IoSetCompletionRoutine(stripe->Irp, ioctl_completion, &context, true, true, true); 376 377 IoCallDriver(dev->devobj, stripe->Irp); 378 } 379 } 380 } 381 382 i++; 383 384 le = le->Flink; 385 } 386 387 KeWaitForSingleObject(&context.Event, Executive, KernelMode, false, NULL); 388 389 for (i = 0; i < count; i++) { 390 ioctl_context_stripe* stripe = &context.stripes[i]; 391 392 if (stripe->mdl) 393 IoFreeMdl(stripe->mdl); 394 395 if (stripe->buf) 396 ExFreePool(stripe->buf); 397 } 398 399 ExFreePool(context.stripes); 400 } 401 #endif 402 403 static void clean_space_cache(device_extension* Vcb) { 404 LIST_ENTRY* le; 405 chunk* c; 406 #ifndef DEBUG_TRIM_EMULATION 407 ULONG num; 408 #endif 409 410 TRACE("(%p)\n", Vcb); 411 412 ExAcquireResourceSharedLite(&Vcb->chunk_lock, true); 413 414 le = Vcb->chunks.Flink; 415 while (le != &Vcb->chunks) { 416 c = CONTAINING_RECORD(le, chunk, list_entry); 417 418 if (c->space_changed) { 419 acquire_chunk_lock(c, Vcb); 420 421 if (c->space_changed) { 422 if (Vcb->trim && !Vcb->options.no_trim) 423 clean_space_cache_chunk(Vcb, c); 424 425 space_list_merge(&c->space, &c->space_size, &c->deleting); 426 427 while (!IsListEmpty(&c->deleting)) { 428 space* s = CONTAINING_RECORD(RemoveHeadList(&c->deleting), space, list_entry); 429 430 ExFreePool(s); 431 } 432 } 433 434 c->space_changed = false; 435 436 release_chunk_lock(c, Vcb); 437 } 438 439 le = le->Flink; 440 } 441 442 ExReleaseResourceLite(&Vcb->chunk_lock); 443 444 if (Vcb->trim && !Vcb->options.no_trim) { 445 #ifndef DEBUG_TRIM_EMULATION 446 ioctl_context context; 447 ULONG total_num; 448 449 context.left = 0; 450 451 le = Vcb->devices.Flink; 452 while (le != &Vcb->devices) { 453 device* dev = CONTAINING_RECORD(le, device, list_entry); 454 455 if (dev->devobj && !dev->readonly && dev->trim && dev->num_trim_entries > 0) 456 context.left++; 457 458 le = le->Flink; 459 } 460 461 if (context.left == 0) 462 return; 463 464 total_num = context.left; 465 num = 0; 466 467 KeInitializeEvent(&context.Event, NotificationEvent, false); 468 469 context.stripes = ExAllocatePoolWithTag(NonPagedPool, sizeof(ioctl_context_stripe) * context.left, ALLOC_TAG); 470 if (!context.stripes) { 471 ERR("out of memory\n"); 472 return; 473 } 474 475 RtlZeroMemory(context.stripes, sizeof(ioctl_context_stripe) * context.left); 476 #endif 477 478 le = Vcb->devices.Flink; 479 while (le != &Vcb->devices) { 480 device* dev = CONTAINING_RECORD(le, device, list_entry); 481 482 if (dev->devobj && !dev->readonly && dev->trim && dev->num_trim_entries > 0) { 483 #ifdef DEBUG_TRIM_EMULATION 484 trim_emulation(dev); 485 #else 486 LIST_ENTRY* le2; 487 ioctl_context_stripe* stripe = &context.stripes[num]; 488 DEVICE_DATA_SET_RANGE* ranges; 489 ULONG datalen = (ULONG)sector_align(sizeof(DEVICE_MANAGE_DATA_SET_ATTRIBUTES), sizeof(uint64_t)) + (dev->num_trim_entries * sizeof(DEVICE_DATA_SET_RANGE)), i; 490 PIO_STACK_LOCATION IrpSp; 491 492 stripe->dmdsa = ExAllocatePoolWithTag(PagedPool, datalen, ALLOC_TAG); 493 if (!stripe->dmdsa) { 494 ERR("out of memory\n"); 495 goto nextdev; 496 } 497 498 stripe->dmdsa->Size = sizeof(DEVICE_MANAGE_DATA_SET_ATTRIBUTES); 499 stripe->dmdsa->Action = DeviceDsmAction_Trim; 500 stripe->dmdsa->Flags = DEVICE_DSM_FLAG_TRIM_NOT_FS_ALLOCATED; 501 stripe->dmdsa->ParameterBlockOffset = 0; 502 stripe->dmdsa->ParameterBlockLength = 0; 503 stripe->dmdsa->DataSetRangesOffset = (ULONG)sector_align(sizeof(DEVICE_MANAGE_DATA_SET_ATTRIBUTES), sizeof(uint64_t)); 504 stripe->dmdsa->DataSetRangesLength = dev->num_trim_entries * sizeof(DEVICE_DATA_SET_RANGE); 505 506 ranges = (DEVICE_DATA_SET_RANGE*)((uint8_t*)stripe->dmdsa + stripe->dmdsa->DataSetRangesOffset); 507 508 i = 0; 509 510 le2 = dev->trim_list.Flink; 511 while (le2 != &dev->trim_list) { 512 space* s = CONTAINING_RECORD(le2, space, list_entry); 513 514 ranges[i].StartingOffset = s->address; 515 ranges[i].LengthInBytes = s->size; 516 i++; 517 518 le2 = le2->Flink; 519 } 520 521 stripe->Irp = IoAllocateIrp(dev->devobj->StackSize, false); 522 523 if (!stripe->Irp) { 524 ERR("IoAllocateIrp failed\n"); 525 goto nextdev; 526 } 527 528 IrpSp = IoGetNextIrpStackLocation(stripe->Irp); 529 IrpSp->MajorFunction = IRP_MJ_DEVICE_CONTROL; 530 IrpSp->FileObject = dev->fileobj; 531 532 IrpSp->Parameters.DeviceIoControl.IoControlCode = IOCTL_STORAGE_MANAGE_DATA_SET_ATTRIBUTES; 533 IrpSp->Parameters.DeviceIoControl.InputBufferLength = datalen; 534 IrpSp->Parameters.DeviceIoControl.OutputBufferLength = 0; 535 536 stripe->Irp->AssociatedIrp.SystemBuffer = stripe->dmdsa; 537 stripe->Irp->Flags |= IRP_BUFFERED_IO; 538 stripe->Irp->UserBuffer = NULL; 539 stripe->Irp->UserIosb = &stripe->iosb; 540 541 IoSetCompletionRoutine(stripe->Irp, ioctl_completion, &context, true, true, true); 542 543 IoCallDriver(dev->devobj, stripe->Irp); 544 545 nextdev: 546 #endif 547 while (!IsListEmpty(&dev->trim_list)) { 548 space* s = CONTAINING_RECORD(RemoveHeadList(&dev->trim_list), space, list_entry); 549 ExFreePool(s); 550 } 551 552 dev->num_trim_entries = 0; 553 554 #ifndef DEBUG_TRIM_EMULATION 555 num++; 556 #endif 557 } 558 559 le = le->Flink; 560 } 561 562 #ifndef DEBUG_TRIM_EMULATION 563 KeWaitForSingleObject(&context.Event, Executive, KernelMode, false, NULL); 564 565 for (num = 0; num < total_num; num++) { 566 if (context.stripes[num].dmdsa) 567 ExFreePool(context.stripes[num].dmdsa); 568 569 if (context.stripes[num].Irp) 570 IoFreeIrp(context.stripes[num].Irp); 571 } 572 573 ExFreePool(context.stripes); 574 #endif 575 } 576 } 577 578 static bool trees_consistent(device_extension* Vcb) { 579 ULONG maxsize = Vcb->superblock.node_size - sizeof(tree_header); 580 LIST_ENTRY* le; 581 582 le = Vcb->trees.Flink; 583 while (le != &Vcb->trees) { 584 tree* t = CONTAINING_RECORD(le, tree, list_entry); 585 586 if (t->write) { 587 if (t->header.num_items == 0 && t->parent) { 588 #ifdef DEBUG_WRITE_LOOPS 589 ERR("empty tree found, looping again\n"); 590 #endif 591 return false; 592 } 593 594 if (t->size > maxsize) { 595 #ifdef DEBUG_WRITE_LOOPS 596 ERR("overlarge tree found (%u > %u), looping again\n", t->size, maxsize); 597 #endif 598 return false; 599 } 600 601 if (!t->has_new_address) { 602 #ifdef DEBUG_WRITE_LOOPS 603 ERR("tree found without new address, looping again\n"); 604 #endif 605 return false; 606 } 607 } 608 609 le = le->Flink; 610 } 611 612 return true; 613 } 614 615 static NTSTATUS add_parents(device_extension* Vcb, PIRP Irp) { 616 ULONG level; 617 LIST_ENTRY* le; 618 619 for (level = 0; level <= 255; level++) { 620 bool nothing_found = true; 621 622 TRACE("level = %lu\n", level); 623 624 le = Vcb->trees.Flink; 625 while (le != &Vcb->trees) { 626 tree* t = CONTAINING_RECORD(le, tree, list_entry); 627 628 if (t->write && t->header.level == level) { 629 TRACE("tree %p: root = %I64x, level = %x, parent = %p\n", t, t->header.tree_id, t->header.level, t->parent); 630 631 nothing_found = false; 632 633 if (t->parent) { 634 if (!t->parent->write) 635 TRACE("adding tree %p (level %x)\n", t->parent, t->header.level); 636 637 t->parent->write = true; 638 } else if (t->root != Vcb->root_root && t->root != Vcb->chunk_root) { 639 KEY searchkey; 640 traverse_ptr tp; 641 NTSTATUS Status; 642 643 searchkey.obj_id = t->root->id; 644 searchkey.obj_type = TYPE_ROOT_ITEM; 645 searchkey.offset = 0xffffffffffffffff; 646 647 Status = find_item(Vcb, Vcb->root_root, &tp, &searchkey, false, Irp); 648 if (!NT_SUCCESS(Status)) { 649 ERR("error - find_item returned %08lx\n", Status); 650 return Status; 651 } 652 653 if (tp.item->key.obj_id != searchkey.obj_id || tp.item->key.obj_type != searchkey.obj_type) { 654 ERR("could not find ROOT_ITEM for tree %I64x\n", searchkey.obj_id); 655 return STATUS_INTERNAL_ERROR; 656 } 657 658 if (tp.item->size < sizeof(ROOT_ITEM)) { // if not full length, delete and create new entry 659 ROOT_ITEM* ri = ExAllocatePoolWithTag(PagedPool, sizeof(ROOT_ITEM), ALLOC_TAG); 660 661 if (!ri) { 662 ERR("out of memory\n"); 663 return STATUS_INSUFFICIENT_RESOURCES; 664 } 665 666 RtlCopyMemory(ri, &t->root->root_item, sizeof(ROOT_ITEM)); 667 668 Status = delete_tree_item(Vcb, &tp); 669 if (!NT_SUCCESS(Status)) { 670 ERR("delete_tree_item returned %08lx\n", Status); 671 ExFreePool(ri); 672 return Status; 673 } 674 675 Status = insert_tree_item(Vcb, Vcb->root_root, tp.item->key.obj_id, tp.item->key.obj_type, tp.item->key.offset, ri, sizeof(ROOT_ITEM), NULL, Irp); 676 if (!NT_SUCCESS(Status)) { 677 ERR("insert_tree_item returned %08lx\n", Status); 678 ExFreePool(ri); 679 return Status; 680 } 681 } 682 683 tree* t2 = tp.tree; 684 while (t2) { 685 t2->write = true; 686 687 t2 = t2->parent; 688 } 689 } 690 } 691 692 le = le->Flink; 693 } 694 695 if (nothing_found) 696 break; 697 } 698 699 return STATUS_SUCCESS; 700 } 701 702 static void add_parents_to_cache(tree* t) { 703 while (t->parent) { 704 t = t->parent; 705 t->write = true; 706 } 707 } 708 709 static bool insert_tree_extent_skinny(device_extension* Vcb, uint8_t level, uint64_t root_id, chunk* c, uint64_t address, PIRP Irp, LIST_ENTRY* rollback) { 710 NTSTATUS Status; 711 EXTENT_ITEM_SKINNY_METADATA* eism; 712 traverse_ptr insert_tp; 713 714 eism = ExAllocatePoolWithTag(PagedPool, sizeof(EXTENT_ITEM_SKINNY_METADATA), ALLOC_TAG); 715 if (!eism) { 716 ERR("out of memory\n"); 717 return false; 718 } 719 720 eism->ei.refcount = 1; 721 eism->ei.generation = Vcb->superblock.generation; 722 eism->ei.flags = EXTENT_ITEM_TREE_BLOCK; 723 eism->type = TYPE_TREE_BLOCK_REF; 724 eism->tbr.offset = root_id; 725 726 Status = insert_tree_item(Vcb, Vcb->extent_root, address, TYPE_METADATA_ITEM, level, eism, sizeof(EXTENT_ITEM_SKINNY_METADATA), &insert_tp, Irp); 727 if (!NT_SUCCESS(Status)) { 728 ERR("insert_tree_item returned %08lx\n", Status); 729 ExFreePool(eism); 730 return false; 731 } 732 733 acquire_chunk_lock(c, Vcb); 734 735 space_list_subtract(c, address, Vcb->superblock.node_size, rollback); 736 737 release_chunk_lock(c, Vcb); 738 739 add_parents_to_cache(insert_tp.tree); 740 741 return true; 742 } 743 744 bool find_metadata_address_in_chunk(device_extension* Vcb, chunk* c, uint64_t* address) { 745 LIST_ENTRY* le; 746 space* s; 747 748 TRACE("(%p, %I64x, %p)\n", Vcb, c->offset, address); 749 750 if (Vcb->superblock.node_size > c->chunk_item->size - c->used) 751 return false; 752 753 if (!c->cache_loaded) { 754 NTSTATUS Status = load_cache_chunk(Vcb, c, NULL); 755 756 if (!NT_SUCCESS(Status)) { 757 ERR("load_cache_chunk returned %08lx\n", Status); 758 return false; 759 } 760 } 761 762 if (IsListEmpty(&c->space_size)) 763 return false; 764 765 if (!c->last_alloc_set) { 766 s = CONTAINING_RECORD(c->space.Blink, space, list_entry); 767 768 c->last_alloc = s->address; 769 c->last_alloc_set = true; 770 771 if (s->size >= Vcb->superblock.node_size) { 772 *address = s->address; 773 c->last_alloc += Vcb->superblock.node_size; 774 return true; 775 } 776 } 777 778 le = c->space.Flink; 779 while (le != &c->space) { 780 s = CONTAINING_RECORD(le, space, list_entry); 781 782 if (s->address <= c->last_alloc && s->address + s->size >= c->last_alloc + Vcb->superblock.node_size) { 783 *address = c->last_alloc; 784 c->last_alloc += Vcb->superblock.node_size; 785 return true; 786 } 787 788 le = le->Flink; 789 } 790 791 le = c->space_size.Flink; 792 while (le != &c->space_size) { 793 s = CONTAINING_RECORD(le, space, list_entry_size); 794 795 if (s->size == Vcb->superblock.node_size) { 796 *address = s->address; 797 c->last_alloc = s->address + Vcb->superblock.node_size; 798 return true; 799 } else if (s->size < Vcb->superblock.node_size) { 800 if (le == c->space_size.Flink) 801 return false; 802 803 s = CONTAINING_RECORD(le->Blink, space, list_entry_size); 804 805 *address = s->address; 806 c->last_alloc = s->address + Vcb->superblock.node_size; 807 808 return true; 809 } 810 811 le = le->Flink; 812 } 813 814 s = CONTAINING_RECORD(c->space_size.Blink, space, list_entry_size); 815 816 if (s->size > Vcb->superblock.node_size) { 817 *address = s->address; 818 c->last_alloc = s->address + Vcb->superblock.node_size; 819 return true; 820 } 821 822 return false; 823 } 824 825 static bool insert_tree_extent(device_extension* Vcb, uint8_t level, uint64_t root_id, chunk* c, uint64_t* new_address, PIRP Irp, LIST_ENTRY* rollback) { 826 NTSTATUS Status; 827 uint64_t address; 828 EXTENT_ITEM_TREE2* eit2; 829 traverse_ptr insert_tp; 830 831 TRACE("(%p, %x, %I64x, %p, %p, %p, %p)\n", Vcb, level, root_id, c, new_address, Irp, rollback); 832 833 if (!find_metadata_address_in_chunk(Vcb, c, &address)) 834 return false; 835 836 if (Vcb->superblock.incompat_flags & BTRFS_INCOMPAT_FLAGS_SKINNY_METADATA) { 837 bool b = insert_tree_extent_skinny(Vcb, level, root_id, c, address, Irp, rollback); 838 839 if (b) 840 *new_address = address; 841 842 return b; 843 } 844 845 eit2 = ExAllocatePoolWithTag(PagedPool, sizeof(EXTENT_ITEM_TREE2), ALLOC_TAG); 846 if (!eit2) { 847 ERR("out of memory\n"); 848 return false; 849 } 850 851 eit2->eit.extent_item.refcount = 1; 852 eit2->eit.extent_item.generation = Vcb->superblock.generation; 853 eit2->eit.extent_item.flags = EXTENT_ITEM_TREE_BLOCK; 854 eit2->eit.level = level; 855 eit2->type = TYPE_TREE_BLOCK_REF; 856 eit2->tbr.offset = root_id; 857 858 Status = insert_tree_item(Vcb, Vcb->extent_root, address, TYPE_EXTENT_ITEM, Vcb->superblock.node_size, eit2, sizeof(EXTENT_ITEM_TREE2), &insert_tp, Irp); 859 if (!NT_SUCCESS(Status)) { 860 ERR("insert_tree_item returned %08lx\n", Status); 861 ExFreePool(eit2); 862 return false; 863 } 864 865 acquire_chunk_lock(c, Vcb); 866 867 space_list_subtract(c, address, Vcb->superblock.node_size, rollback); 868 869 release_chunk_lock(c, Vcb); 870 871 add_parents_to_cache(insert_tp.tree); 872 873 *new_address = address; 874 875 return true; 876 } 877 878 NTSTATUS get_tree_new_address(device_extension* Vcb, tree* t, PIRP Irp, LIST_ENTRY* rollback) { 879 NTSTATUS Status; 880 chunk *origchunk = NULL, *c; 881 LIST_ENTRY* le; 882 uint64_t flags, addr; 883 884 if (t->root->id == BTRFS_ROOT_CHUNK) 885 flags = Vcb->system_flags; 886 else 887 flags = Vcb->metadata_flags; 888 889 if (t->has_address) { 890 origchunk = get_chunk_from_address(Vcb, t->header.address); 891 892 if (origchunk && !origchunk->readonly && !origchunk->reloc && origchunk->chunk_item->type == flags && 893 insert_tree_extent(Vcb, t->header.level, t->root->id, origchunk, &addr, Irp, rollback)) { 894 t->new_address = addr; 895 t->has_new_address = true; 896 return STATUS_SUCCESS; 897 } 898 } 899 900 ExAcquireResourceExclusiveLite(&Vcb->chunk_lock, true); 901 902 le = Vcb->chunks.Flink; 903 while (le != &Vcb->chunks) { 904 c = CONTAINING_RECORD(le, chunk, list_entry); 905 906 if (!c->readonly && !c->reloc) { 907 acquire_chunk_lock(c, Vcb); 908 909 if (c != origchunk && c->chunk_item->type == flags && (c->chunk_item->size - c->used) >= Vcb->superblock.node_size) { 910 if (insert_tree_extent(Vcb, t->header.level, t->root->id, c, &addr, Irp, rollback)) { 911 release_chunk_lock(c, Vcb); 912 ExReleaseResourceLite(&Vcb->chunk_lock); 913 t->new_address = addr; 914 t->has_new_address = true; 915 return STATUS_SUCCESS; 916 } 917 } 918 919 release_chunk_lock(c, Vcb); 920 } 921 922 le = le->Flink; 923 } 924 925 // allocate new chunk if necessary 926 927 Status = alloc_chunk(Vcb, flags, &c, false); 928 929 if (!NT_SUCCESS(Status)) { 930 ERR("alloc_chunk returned %08lx\n", Status); 931 ExReleaseResourceLite(&Vcb->chunk_lock); 932 return Status; 933 } 934 935 acquire_chunk_lock(c, Vcb); 936 937 if ((c->chunk_item->size - c->used) >= Vcb->superblock.node_size) { 938 if (insert_tree_extent(Vcb, t->header.level, t->root->id, c, &addr, Irp, rollback)) { 939 release_chunk_lock(c, Vcb); 940 ExReleaseResourceLite(&Vcb->chunk_lock); 941 t->new_address = addr; 942 t->has_new_address = true; 943 return STATUS_SUCCESS; 944 } 945 } 946 947 release_chunk_lock(c, Vcb); 948 949 ExReleaseResourceLite(&Vcb->chunk_lock); 950 951 ERR("couldn't find any metadata chunks with %x bytes free\n", Vcb->superblock.node_size); 952 953 return STATUS_DISK_FULL; 954 } 955 956 static NTSTATUS reduce_tree_extent(device_extension* Vcb, uint64_t address, tree* t, uint64_t parent_root, uint8_t level, PIRP Irp, LIST_ENTRY* rollback) { 957 NTSTATUS Status; 958 uint64_t rc, root; 959 960 TRACE("(%p, %I64x, %p)\n", Vcb, address, t); 961 962 rc = get_extent_refcount(Vcb, address, Vcb->superblock.node_size, Irp); 963 if (rc == 0) { 964 ERR("error - refcount for extent %I64x was 0\n", address); 965 return STATUS_INTERNAL_ERROR; 966 } 967 968 if (!t || t->parent) 969 root = parent_root; 970 else 971 root = t->header.tree_id; 972 973 Status = decrease_extent_refcount_tree(Vcb, address, Vcb->superblock.node_size, root, level, Irp); 974 if (!NT_SUCCESS(Status)) { 975 ERR("decrease_extent_refcount_tree returned %08lx\n", Status); 976 return Status; 977 } 978 979 if (rc == 1) { 980 chunk* c = get_chunk_from_address(Vcb, address); 981 982 if (c) { 983 acquire_chunk_lock(c, Vcb); 984 985 if (!c->cache_loaded) { 986 Status = load_cache_chunk(Vcb, c, NULL); 987 988 if (!NT_SUCCESS(Status)) { 989 ERR("load_cache_chunk returned %08lx\n", Status); 990 release_chunk_lock(c, Vcb); 991 return Status; 992 } 993 } 994 995 c->used -= Vcb->superblock.node_size; 996 997 space_list_add(c, address, Vcb->superblock.node_size, rollback); 998 999 release_chunk_lock(c, Vcb); 1000 } else 1001 ERR("could not find chunk for address %I64x\n", address); 1002 } 1003 1004 return STATUS_SUCCESS; 1005 } 1006 1007 static NTSTATUS add_changed_extent_ref_edr(changed_extent* ce, EXTENT_DATA_REF* edr, bool old) { 1008 LIST_ENTRY *le2, *list; 1009 changed_extent_ref* cer; 1010 1011 list = old ? &ce->old_refs : &ce->refs; 1012 1013 le2 = list->Flink; 1014 while (le2 != list) { 1015 cer = CONTAINING_RECORD(le2, changed_extent_ref, list_entry); 1016 1017 if (cer->type == TYPE_EXTENT_DATA_REF && cer->edr.root == edr->root && cer->edr.objid == edr->objid && cer->edr.offset == edr->offset) { 1018 cer->edr.count += edr->count; 1019 goto end; 1020 } 1021 1022 le2 = le2->Flink; 1023 } 1024 1025 cer = ExAllocatePoolWithTag(PagedPool, sizeof(changed_extent_ref), ALLOC_TAG); 1026 if (!cer) { 1027 ERR("out of memory\n"); 1028 return STATUS_INSUFFICIENT_RESOURCES; 1029 } 1030 1031 cer->type = TYPE_EXTENT_DATA_REF; 1032 RtlCopyMemory(&cer->edr, edr, sizeof(EXTENT_DATA_REF)); 1033 InsertTailList(list, &cer->list_entry); 1034 1035 end: 1036 if (old) 1037 ce->old_count += edr->count; 1038 else 1039 ce->count += edr->count; 1040 1041 return STATUS_SUCCESS; 1042 } 1043 1044 static NTSTATUS add_changed_extent_ref_sdr(changed_extent* ce, SHARED_DATA_REF* sdr, bool old) { 1045 LIST_ENTRY *le2, *list; 1046 changed_extent_ref* cer; 1047 1048 list = old ? &ce->old_refs : &ce->refs; 1049 1050 le2 = list->Flink; 1051 while (le2 != list) { 1052 cer = CONTAINING_RECORD(le2, changed_extent_ref, list_entry); 1053 1054 if (cer->type == TYPE_SHARED_DATA_REF && cer->sdr.offset == sdr->offset) { 1055 cer->sdr.count += sdr->count; 1056 goto end; 1057 } 1058 1059 le2 = le2->Flink; 1060 } 1061 1062 cer = ExAllocatePoolWithTag(PagedPool, sizeof(changed_extent_ref), ALLOC_TAG); 1063 if (!cer) { 1064 ERR("out of memory\n"); 1065 return STATUS_INSUFFICIENT_RESOURCES; 1066 } 1067 1068 cer->type = TYPE_SHARED_DATA_REF; 1069 RtlCopyMemory(&cer->sdr, sdr, sizeof(SHARED_DATA_REF)); 1070 InsertTailList(list, &cer->list_entry); 1071 1072 end: 1073 if (old) 1074 ce->old_count += sdr->count; 1075 else 1076 ce->count += sdr->count; 1077 1078 return STATUS_SUCCESS; 1079 } 1080 1081 static bool shared_tree_is_unique(device_extension* Vcb, tree* t, PIRP Irp, LIST_ENTRY* rollback) { 1082 KEY searchkey; 1083 traverse_ptr tp; 1084 NTSTATUS Status; 1085 1086 if (!t->updated_extents && t->has_address) { 1087 Status = update_tree_extents(Vcb, t, Irp, rollback); 1088 if (!NT_SUCCESS(Status)) { 1089 ERR("update_tree_extents returned %08lx\n", Status); 1090 return false; 1091 } 1092 } 1093 1094 searchkey.obj_id = t->header.address; 1095 searchkey.obj_type = Vcb->superblock.incompat_flags & BTRFS_INCOMPAT_FLAGS_SKINNY_METADATA ? TYPE_METADATA_ITEM : TYPE_EXTENT_ITEM; 1096 searchkey.offset = 0xffffffffffffffff; 1097 1098 Status = find_item(Vcb, Vcb->extent_root, &tp, &searchkey, false, Irp); 1099 if (!NT_SUCCESS(Status)) { 1100 ERR("error - find_item returned %08lx\n", Status); 1101 return false; 1102 } 1103 1104 if (tp.item->key.obj_id == t->header.address && (tp.item->key.obj_type == TYPE_METADATA_ITEM || tp.item->key.obj_type == TYPE_EXTENT_ITEM)) 1105 return false; 1106 else 1107 return true; 1108 } 1109 1110 static NTSTATUS update_tree_extents(device_extension* Vcb, tree* t, PIRP Irp, LIST_ENTRY* rollback) { 1111 NTSTATUS Status; 1112 uint64_t rc = get_extent_refcount(Vcb, t->header.address, Vcb->superblock.node_size, Irp); 1113 uint64_t flags = get_extent_flags(Vcb, t->header.address, Irp); 1114 1115 if (rc == 0) { 1116 ERR("refcount for extent %I64x was 0\n", t->header.address); 1117 return STATUS_INTERNAL_ERROR; 1118 } 1119 1120 if (flags & EXTENT_ITEM_SHARED_BACKREFS || t->header.flags & HEADER_FLAG_SHARED_BACKREF || !(t->header.flags & HEADER_FLAG_MIXED_BACKREF)) { 1121 TREE_BLOCK_REF tbr; 1122 bool unique = rc > 1 ? false : (t->parent ? shared_tree_is_unique(Vcb, t->parent, Irp, rollback) : false); 1123 1124 if (t->header.level == 0) { 1125 LIST_ENTRY* le; 1126 1127 le = t->itemlist.Flink; 1128 while (le != &t->itemlist) { 1129 tree_data* td = CONTAINING_RECORD(le, tree_data, list_entry); 1130 1131 if (!td->inserted && td->key.obj_type == TYPE_EXTENT_DATA && td->size >= sizeof(EXTENT_DATA) - 1 + sizeof(EXTENT_DATA2)) { 1132 EXTENT_DATA* ed = (EXTENT_DATA*)td->data; 1133 1134 if (ed->type == EXTENT_TYPE_REGULAR || ed->type == EXTENT_TYPE_PREALLOC) { 1135 EXTENT_DATA2* ed2 = (EXTENT_DATA2*)ed->data; 1136 1137 if (ed2->size > 0) { 1138 EXTENT_DATA_REF edr; 1139 changed_extent* ce = NULL; 1140 chunk* c = get_chunk_from_address(Vcb, ed2->address); 1141 1142 if (c) { 1143 LIST_ENTRY* le2; 1144 1145 le2 = c->changed_extents.Flink; 1146 while (le2 != &c->changed_extents) { 1147 changed_extent* ce2 = CONTAINING_RECORD(le2, changed_extent, list_entry); 1148 1149 if (ce2->address == ed2->address) { 1150 ce = ce2; 1151 break; 1152 } 1153 1154 le2 = le2->Flink; 1155 } 1156 } 1157 1158 edr.root = t->root->id; 1159 edr.objid = td->key.obj_id; 1160 edr.offset = td->key.offset - ed2->offset; 1161 edr.count = 1; 1162 1163 if (ce) { 1164 Status = add_changed_extent_ref_edr(ce, &edr, true); 1165 if (!NT_SUCCESS(Status)) { 1166 ERR("add_changed_extent_ref_edr returned %08lx\n", Status); 1167 return Status; 1168 } 1169 1170 Status = add_changed_extent_ref_edr(ce, &edr, false); 1171 if (!NT_SUCCESS(Status)) { 1172 ERR("add_changed_extent_ref_edr returned %08lx\n", Status); 1173 return Status; 1174 } 1175 } 1176 1177 Status = increase_extent_refcount(Vcb, ed2->address, ed2->size, TYPE_EXTENT_DATA_REF, &edr, NULL, 0, Irp); 1178 if (!NT_SUCCESS(Status)) { 1179 ERR("increase_extent_refcount returned %08lx\n", Status); 1180 return Status; 1181 } 1182 1183 if ((flags & EXTENT_ITEM_SHARED_BACKREFS && unique) || !(t->header.flags & HEADER_FLAG_MIXED_BACKREF)) { 1184 uint64_t sdrrc = find_extent_shared_data_refcount(Vcb, ed2->address, t->header.address, Irp); 1185 1186 if (sdrrc > 0) { 1187 SHARED_DATA_REF sdr; 1188 1189 sdr.offset = t->header.address; 1190 sdr.count = 1; 1191 1192 Status = decrease_extent_refcount(Vcb, ed2->address, ed2->size, TYPE_SHARED_DATA_REF, &sdr, NULL, 0, 1193 t->header.address, ce ? ce->superseded : false, Irp); 1194 if (!NT_SUCCESS(Status)) { 1195 ERR("decrease_extent_refcount returned %08lx\n", Status); 1196 return Status; 1197 } 1198 1199 if (ce) { 1200 LIST_ENTRY* le2; 1201 1202 le2 = ce->refs.Flink; 1203 while (le2 != &ce->refs) { 1204 changed_extent_ref* cer = CONTAINING_RECORD(le2, changed_extent_ref, list_entry); 1205 1206 if (cer->type == TYPE_SHARED_DATA_REF && cer->sdr.offset == sdr.offset) { 1207 ce->count--; 1208 cer->sdr.count--; 1209 break; 1210 } 1211 1212 le2 = le2->Flink; 1213 } 1214 1215 le2 = ce->old_refs.Flink; 1216 while (le2 != &ce->old_refs) { 1217 changed_extent_ref* cer = CONTAINING_RECORD(le2, changed_extent_ref, list_entry); 1218 1219 if (cer->type == TYPE_SHARED_DATA_REF && cer->sdr.offset == sdr.offset) { 1220 ce->old_count--; 1221 1222 if (cer->sdr.count > 1) 1223 cer->sdr.count--; 1224 else { 1225 RemoveEntryList(&cer->list_entry); 1226 ExFreePool(cer); 1227 } 1228 1229 break; 1230 } 1231 1232 le2 = le2->Flink; 1233 } 1234 } 1235 } 1236 } 1237 1238 // FIXME - clear shared flag if unique? 1239 } 1240 } 1241 } 1242 1243 le = le->Flink; 1244 } 1245 } else { 1246 LIST_ENTRY* le; 1247 1248 le = t->itemlist.Flink; 1249 while (le != &t->itemlist) { 1250 tree_data* td = CONTAINING_RECORD(le, tree_data, list_entry); 1251 1252 if (!td->inserted) { 1253 tbr.offset = t->root->id; 1254 1255 Status = increase_extent_refcount(Vcb, td->treeholder.address, Vcb->superblock.node_size, TYPE_TREE_BLOCK_REF, 1256 &tbr, &td->key, t->header.level - 1, Irp); 1257 if (!NT_SUCCESS(Status)) { 1258 ERR("increase_extent_refcount returned %08lx\n", Status); 1259 return Status; 1260 } 1261 1262 if (unique || !(t->header.flags & HEADER_FLAG_MIXED_BACKREF)) { 1263 uint64_t sbrrc = find_extent_shared_tree_refcount(Vcb, td->treeholder.address, t->header.address, Irp); 1264 1265 if (sbrrc > 0) { 1266 SHARED_BLOCK_REF sbr; 1267 1268 sbr.offset = t->header.address; 1269 1270 Status = decrease_extent_refcount(Vcb, td->treeholder.address, Vcb->superblock.node_size, TYPE_SHARED_BLOCK_REF, &sbr, NULL, 0, 1271 t->header.address, false, Irp); 1272 if (!NT_SUCCESS(Status)) { 1273 ERR("decrease_extent_refcount returned %08lx\n", Status); 1274 return Status; 1275 } 1276 } 1277 } 1278 1279 // FIXME - clear shared flag if unique? 1280 } 1281 1282 le = le->Flink; 1283 } 1284 } 1285 1286 if (unique) { 1287 uint64_t sbrrc = find_extent_shared_tree_refcount(Vcb, t->header.address, t->parent->header.address, Irp); 1288 1289 if (sbrrc == 1) { 1290 SHARED_BLOCK_REF sbr; 1291 1292 sbr.offset = t->parent->header.address; 1293 1294 Status = decrease_extent_refcount(Vcb, t->header.address, Vcb->superblock.node_size, TYPE_SHARED_BLOCK_REF, &sbr, NULL, 0, 1295 t->parent->header.address, false, Irp); 1296 if (!NT_SUCCESS(Status)) { 1297 ERR("decrease_extent_refcount returned %08lx\n", Status); 1298 return Status; 1299 } 1300 } 1301 } 1302 1303 if (t->parent) 1304 tbr.offset = t->parent->header.tree_id; 1305 else 1306 tbr.offset = t->header.tree_id; 1307 1308 Status = increase_extent_refcount(Vcb, t->header.address, Vcb->superblock.node_size, TYPE_TREE_BLOCK_REF, &tbr, 1309 t->parent ? &t->paritem->key : NULL, t->header.level, Irp); 1310 if (!NT_SUCCESS(Status)) { 1311 ERR("increase_extent_refcount returned %08lx\n", Status); 1312 return Status; 1313 } 1314 1315 // FIXME - clear shared flag if unique? 1316 1317 t->header.flags &= ~HEADER_FLAG_SHARED_BACKREF; 1318 } 1319 1320 if (rc > 1 || t->header.tree_id == t->root->id) { 1321 Status = reduce_tree_extent(Vcb, t->header.address, t, t->parent ? t->parent->header.tree_id : t->header.tree_id, t->header.level, Irp, rollback); 1322 1323 if (!NT_SUCCESS(Status)) { 1324 ERR("reduce_tree_extent returned %08lx\n", Status); 1325 return Status; 1326 } 1327 } 1328 1329 t->has_address = false; 1330 1331 if ((rc > 1 || t->header.tree_id != t->root->id) && !(flags & EXTENT_ITEM_SHARED_BACKREFS)) { 1332 if (t->header.tree_id == t->root->id) { 1333 flags |= EXTENT_ITEM_SHARED_BACKREFS; 1334 update_extent_flags(Vcb, t->header.address, flags, Irp); 1335 } 1336 1337 if (t->header.level > 0) { 1338 LIST_ENTRY* le; 1339 1340 le = t->itemlist.Flink; 1341 while (le != &t->itemlist) { 1342 tree_data* td = CONTAINING_RECORD(le, tree_data, list_entry); 1343 1344 if (!td->inserted) { 1345 if (t->header.tree_id == t->root->id) { 1346 SHARED_BLOCK_REF sbr; 1347 1348 sbr.offset = t->header.address; 1349 1350 Status = increase_extent_refcount(Vcb, td->treeholder.address, Vcb->superblock.node_size, TYPE_SHARED_BLOCK_REF, &sbr, &td->key, t->header.level - 1, Irp); 1351 } else { 1352 TREE_BLOCK_REF tbr; 1353 1354 tbr.offset = t->root->id; 1355 1356 Status = increase_extent_refcount(Vcb, td->treeholder.address, Vcb->superblock.node_size, TYPE_TREE_BLOCK_REF, &tbr, &td->key, t->header.level - 1, Irp); 1357 } 1358 1359 if (!NT_SUCCESS(Status)) { 1360 ERR("increase_extent_refcount returned %08lx\n", Status); 1361 return Status; 1362 } 1363 } 1364 1365 le = le->Flink; 1366 } 1367 } else { 1368 LIST_ENTRY* le; 1369 1370 le = t->itemlist.Flink; 1371 while (le != &t->itemlist) { 1372 tree_data* td = CONTAINING_RECORD(le, tree_data, list_entry); 1373 1374 if (!td->inserted && td->key.obj_type == TYPE_EXTENT_DATA && td->size >= sizeof(EXTENT_DATA) - 1 + sizeof(EXTENT_DATA2)) { 1375 EXTENT_DATA* ed = (EXTENT_DATA*)td->data; 1376 1377 if (ed->type == EXTENT_TYPE_REGULAR || ed->type == EXTENT_TYPE_PREALLOC) { 1378 EXTENT_DATA2* ed2 = (EXTENT_DATA2*)ed->data; 1379 1380 if (ed2->size > 0) { 1381 changed_extent* ce = NULL; 1382 chunk* c = get_chunk_from_address(Vcb, ed2->address); 1383 1384 if (c) { 1385 LIST_ENTRY* le2; 1386 1387 le2 = c->changed_extents.Flink; 1388 while (le2 != &c->changed_extents) { 1389 changed_extent* ce2 = CONTAINING_RECORD(le2, changed_extent, list_entry); 1390 1391 if (ce2->address == ed2->address) { 1392 ce = ce2; 1393 break; 1394 } 1395 1396 le2 = le2->Flink; 1397 } 1398 } 1399 1400 if (t->header.tree_id == t->root->id) { 1401 SHARED_DATA_REF sdr; 1402 1403 sdr.offset = t->header.address; 1404 sdr.count = 1; 1405 1406 if (ce) { 1407 Status = add_changed_extent_ref_sdr(ce, &sdr, true); 1408 if (!NT_SUCCESS(Status)) { 1409 ERR("add_changed_extent_ref_edr returned %08lx\n", Status); 1410 return Status; 1411 } 1412 1413 Status = add_changed_extent_ref_sdr(ce, &sdr, false); 1414 if (!NT_SUCCESS(Status)) { 1415 ERR("add_changed_extent_ref_edr returned %08lx\n", Status); 1416 return Status; 1417 } 1418 } 1419 1420 Status = increase_extent_refcount(Vcb, ed2->address, ed2->size, TYPE_SHARED_DATA_REF, &sdr, NULL, 0, Irp); 1421 } else { 1422 EXTENT_DATA_REF edr; 1423 1424 edr.root = t->root->id; 1425 edr.objid = td->key.obj_id; 1426 edr.offset = td->key.offset - ed2->offset; 1427 edr.count = 1; 1428 1429 if (ce) { 1430 Status = add_changed_extent_ref_edr(ce, &edr, true); 1431 if (!NT_SUCCESS(Status)) { 1432 ERR("add_changed_extent_ref_edr returned %08lx\n", Status); 1433 return Status; 1434 } 1435 1436 Status = add_changed_extent_ref_edr(ce, &edr, false); 1437 if (!NT_SUCCESS(Status)) { 1438 ERR("add_changed_extent_ref_edr returned %08lx\n", Status); 1439 return Status; 1440 } 1441 } 1442 1443 Status = increase_extent_refcount(Vcb, ed2->address, ed2->size, TYPE_EXTENT_DATA_REF, &edr, NULL, 0, Irp); 1444 } 1445 1446 if (!NT_SUCCESS(Status)) { 1447 ERR("increase_extent_refcount returned %08lx\n", Status); 1448 return Status; 1449 } 1450 } 1451 } 1452 } 1453 1454 le = le->Flink; 1455 } 1456 } 1457 } 1458 1459 t->updated_extents = true; 1460 t->header.tree_id = t->root->id; 1461 1462 return STATUS_SUCCESS; 1463 } 1464 1465 static NTSTATUS allocate_tree_extents(device_extension* Vcb, PIRP Irp, LIST_ENTRY* rollback) { 1466 LIST_ENTRY* le; 1467 NTSTATUS Status; 1468 bool changed = false; 1469 uint8_t max_level = 0, level; 1470 1471 TRACE("(%p)\n", Vcb); 1472 1473 le = Vcb->trees.Flink; 1474 while (le != &Vcb->trees) { 1475 tree* t = CONTAINING_RECORD(le, tree, list_entry); 1476 1477 if (t->write && !t->has_new_address) { 1478 chunk* c; 1479 1480 if (t->has_address) { 1481 c = get_chunk_from_address(Vcb, t->header.address); 1482 1483 if (c) { 1484 if (!c->cache_loaded) { 1485 acquire_chunk_lock(c, Vcb); 1486 1487 if (!c->cache_loaded) { 1488 Status = load_cache_chunk(Vcb, c, NULL); 1489 1490 if (!NT_SUCCESS(Status)) { 1491 ERR("load_cache_chunk returned %08lx\n", Status); 1492 release_chunk_lock(c, Vcb); 1493 return Status; 1494 } 1495 } 1496 1497 release_chunk_lock(c, Vcb); 1498 } 1499 } 1500 } 1501 1502 Status = get_tree_new_address(Vcb, t, Irp, rollback); 1503 if (!NT_SUCCESS(Status)) { 1504 ERR("get_tree_new_address returned %08lx\n", Status); 1505 return Status; 1506 } 1507 1508 TRACE("allocated extent %I64x\n", t->new_address); 1509 1510 c = get_chunk_from_address(Vcb, t->new_address); 1511 1512 if (c) 1513 c->used += Vcb->superblock.node_size; 1514 else { 1515 ERR("could not find chunk for address %I64x\n", t->new_address); 1516 return STATUS_INTERNAL_ERROR; 1517 } 1518 1519 changed = true; 1520 1521 if (t->header.level > max_level) 1522 max_level = t->header.level; 1523 } 1524 1525 le = le->Flink; 1526 } 1527 1528 if (!changed) 1529 return STATUS_SUCCESS; 1530 1531 level = max_level; 1532 do { 1533 le = Vcb->trees.Flink; 1534 while (le != &Vcb->trees) { 1535 tree* t = CONTAINING_RECORD(le, tree, list_entry); 1536 1537 if (t->write && !t->updated_extents && t->has_address && t->header.level == level) { 1538 Status = update_tree_extents(Vcb, t, Irp, rollback); 1539 if (!NT_SUCCESS(Status)) { 1540 ERR("update_tree_extents returned %08lx\n", Status); 1541 return Status; 1542 } 1543 } 1544 1545 le = le->Flink; 1546 } 1547 1548 if (level == 0) 1549 break; 1550 1551 level--; 1552 } while (true); 1553 1554 return STATUS_SUCCESS; 1555 } 1556 1557 static NTSTATUS update_root_root(device_extension* Vcb, bool no_cache, PIRP Irp, LIST_ENTRY* rollback) { 1558 LIST_ENTRY* le; 1559 NTSTATUS Status; 1560 1561 TRACE("(%p)\n", Vcb); 1562 1563 le = Vcb->trees.Flink; 1564 while (le != &Vcb->trees) { 1565 tree* t = CONTAINING_RECORD(le, tree, list_entry); 1566 1567 if (t->write && !t->parent) { 1568 if (t->root != Vcb->root_root && t->root != Vcb->chunk_root) { 1569 KEY searchkey; 1570 traverse_ptr tp; 1571 1572 searchkey.obj_id = t->root->id; 1573 searchkey.obj_type = TYPE_ROOT_ITEM; 1574 searchkey.offset = 0xffffffffffffffff; 1575 1576 Status = find_item(Vcb, Vcb->root_root, &tp, &searchkey, false, Irp); 1577 if (!NT_SUCCESS(Status)) { 1578 ERR("error - find_item returned %08lx\n", Status); 1579 return Status; 1580 } 1581 1582 if (tp.item->key.obj_id != searchkey.obj_id || tp.item->key.obj_type != searchkey.obj_type) { 1583 ERR("could not find ROOT_ITEM for tree %I64x\n", searchkey.obj_id); 1584 return STATUS_INTERNAL_ERROR; 1585 } 1586 1587 TRACE("updating the address for root %I64x to %I64x\n", searchkey.obj_id, t->new_address); 1588 1589 t->root->root_item.block_number = t->new_address; 1590 t->root->root_item.root_level = t->header.level; 1591 t->root->root_item.generation = Vcb->superblock.generation; 1592 t->root->root_item.generation2 = Vcb->superblock.generation; 1593 1594 // item is guaranteed to be at least sizeof(ROOT_ITEM), due to add_parents 1595 1596 RtlCopyMemory(tp.item->data, &t->root->root_item, sizeof(ROOT_ITEM)); 1597 } 1598 1599 t->root->treeholder.address = t->new_address; 1600 t->root->treeholder.generation = Vcb->superblock.generation; 1601 } 1602 1603 le = le->Flink; 1604 } 1605 1606 if (!no_cache && !(Vcb->superblock.compat_ro_flags & BTRFS_COMPAT_RO_FLAGS_FREE_SPACE_CACHE)) { 1607 ExAcquireResourceSharedLite(&Vcb->chunk_lock, true); 1608 Status = update_chunk_caches(Vcb, Irp, rollback); 1609 ExReleaseResourceLite(&Vcb->chunk_lock); 1610 1611 if (!NT_SUCCESS(Status)) { 1612 ERR("update_chunk_caches returned %08lx\n", Status); 1613 return Status; 1614 } 1615 } 1616 1617 return STATUS_SUCCESS; 1618 } 1619 1620 NTSTATUS do_tree_writes(device_extension* Vcb, LIST_ENTRY* tree_writes, bool no_free) { 1621 chunk* c; 1622 LIST_ENTRY* le; 1623 tree_write* tw; 1624 NTSTATUS Status; 1625 ULONG i, num_bits; 1626 write_data_context* wtc; 1627 ULONG bit_num = 0; 1628 bool raid56 = false; 1629 1630 // merge together runs 1631 c = NULL; 1632 le = tree_writes->Flink; 1633 while (le != tree_writes) { 1634 tw = CONTAINING_RECORD(le, tree_write, list_entry); 1635 1636 if (!c || tw->address < c->offset || tw->address >= c->offset + c->chunk_item->size) 1637 c = get_chunk_from_address(Vcb, tw->address); 1638 else { 1639 tree_write* tw2 = CONTAINING_RECORD(le->Blink, tree_write, list_entry); 1640 1641 if (tw->address == tw2->address + tw2->length) { 1642 uint8_t* data = ExAllocatePoolWithTag(NonPagedPool, tw2->length + tw->length, ALLOC_TAG); 1643 1644 if (!data) { 1645 ERR("out of memory\n"); 1646 return STATUS_INSUFFICIENT_RESOURCES; 1647 } 1648 1649 RtlCopyMemory(data, tw2->data, tw2->length); 1650 RtlCopyMemory(&data[tw2->length], tw->data, tw->length); 1651 1652 if (!no_free || tw2->allocated) 1653 ExFreePool(tw2->data); 1654 1655 tw2->data = data; 1656 tw2->length += tw->length; 1657 tw2->allocated = true; 1658 1659 if (!no_free || tw->allocated) 1660 ExFreePool(tw->data); 1661 1662 RemoveEntryList(&tw->list_entry); 1663 ExFreePool(tw); 1664 1665 le = tw2->list_entry.Flink; 1666 continue; 1667 } 1668 } 1669 1670 tw->c = c; 1671 1672 if (c->chunk_item->type & (BLOCK_FLAG_RAID5 | BLOCK_FLAG_RAID6)) 1673 raid56 = true; 1674 1675 le = le->Flink; 1676 } 1677 1678 num_bits = 0; 1679 1680 le = tree_writes->Flink; 1681 while (le != tree_writes) { 1682 tw = CONTAINING_RECORD(le, tree_write, list_entry); 1683 1684 num_bits++; 1685 1686 le = le->Flink; 1687 } 1688 1689 wtc = ExAllocatePoolWithTag(NonPagedPool, sizeof(write_data_context) * num_bits, ALLOC_TAG); 1690 if (!wtc) { 1691 ERR("out of memory\n"); 1692 return STATUS_INSUFFICIENT_RESOURCES; 1693 } 1694 1695 le = tree_writes->Flink; 1696 1697 while (le != tree_writes) { 1698 tw = CONTAINING_RECORD(le, tree_write, list_entry); 1699 1700 TRACE("address: %I64x, size: %x\n", tw->address, tw->length); 1701 1702 KeInitializeEvent(&wtc[bit_num].Event, NotificationEvent, false); 1703 InitializeListHead(&wtc[bit_num].stripes); 1704 wtc[bit_num].need_wait = false; 1705 wtc[bit_num].stripes_left = 0; 1706 wtc[bit_num].parity1 = wtc[bit_num].parity2 = wtc[bit_num].scratch = NULL; 1707 wtc[bit_num].mdl = wtc[bit_num].parity1_mdl = wtc[bit_num].parity2_mdl = NULL; 1708 1709 Status = write_data(Vcb, tw->address, tw->data, tw->length, &wtc[bit_num], NULL, NULL, false, 0, HighPagePriority); 1710 if (!NT_SUCCESS(Status)) { 1711 ERR("write_data returned %08lx\n", Status); 1712 1713 for (i = 0; i < num_bits; i++) { 1714 free_write_data_stripes(&wtc[i]); 1715 } 1716 ExFreePool(wtc); 1717 1718 return Status; 1719 } 1720 1721 bit_num++; 1722 1723 le = le->Flink; 1724 } 1725 1726 for (i = 0; i < num_bits; i++) { 1727 if (wtc[i].stripes.Flink != &wtc[i].stripes) { 1728 // launch writes and wait 1729 le = wtc[i].stripes.Flink; 1730 while (le != &wtc[i].stripes) { 1731 write_data_stripe* stripe = CONTAINING_RECORD(le, write_data_stripe, list_entry); 1732 1733 if (stripe->status != WriteDataStatus_Ignore) { 1734 wtc[i].need_wait = true; 1735 IoCallDriver(stripe->device->devobj, stripe->Irp); 1736 } 1737 1738 le = le->Flink; 1739 } 1740 } 1741 } 1742 1743 for (i = 0; i < num_bits; i++) { 1744 if (wtc[i].need_wait) 1745 KeWaitForSingleObject(&wtc[i].Event, Executive, KernelMode, false, NULL); 1746 } 1747 1748 for (i = 0; i < num_bits; i++) { 1749 le = wtc[i].stripes.Flink; 1750 while (le != &wtc[i].stripes) { 1751 write_data_stripe* stripe = CONTAINING_RECORD(le, write_data_stripe, list_entry); 1752 1753 if (stripe->status != WriteDataStatus_Ignore && !NT_SUCCESS(stripe->iosb.Status)) { 1754 Status = stripe->iosb.Status; 1755 log_device_error(Vcb, stripe->device, BTRFS_DEV_STAT_WRITE_ERRORS); 1756 break; 1757 } 1758 1759 le = le->Flink; 1760 } 1761 1762 free_write_data_stripes(&wtc[i]); 1763 } 1764 1765 ExFreePool(wtc); 1766 1767 if (raid56) { 1768 c = NULL; 1769 1770 le = tree_writes->Flink; 1771 while (le != tree_writes) { 1772 tw = CONTAINING_RECORD(le, tree_write, list_entry); 1773 1774 if (tw->c != c) { 1775 c = tw->c; 1776 1777 ExAcquireResourceExclusiveLite(&c->partial_stripes_lock, true); 1778 1779 while (!IsListEmpty(&c->partial_stripes)) { 1780 partial_stripe* ps = CONTAINING_RECORD(RemoveHeadList(&c->partial_stripes), partial_stripe, list_entry); 1781 1782 Status = flush_partial_stripe(Vcb, c, ps); 1783 1784 if (ps->bmparr) 1785 ExFreePool(ps->bmparr); 1786 1787 ExFreePool(ps); 1788 1789 if (!NT_SUCCESS(Status)) { 1790 ERR("flush_partial_stripe returned %08lx\n", Status); 1791 ExReleaseResourceLite(&c->partial_stripes_lock); 1792 return Status; 1793 } 1794 } 1795 1796 ExReleaseResourceLite(&c->partial_stripes_lock); 1797 } 1798 1799 le = le->Flink; 1800 } 1801 } 1802 1803 return STATUS_SUCCESS; 1804 } 1805 1806 void calc_tree_checksum(device_extension* Vcb, tree_header* th) { 1807 switch (Vcb->superblock.csum_type) { 1808 case CSUM_TYPE_CRC32C: 1809 *((uint32_t*)th) = ~calc_crc32c(0xffffffff, (uint8_t*)&th->fs_uuid, Vcb->superblock.node_size - sizeof(th->csum)); 1810 break; 1811 1812 case CSUM_TYPE_XXHASH: 1813 *((uint64_t*)th) = XXH64((uint8_t*)&th->fs_uuid, Vcb->superblock.node_size - sizeof(th->csum), 0); 1814 break; 1815 1816 case CSUM_TYPE_SHA256: 1817 calc_sha256((uint8_t*)th, &th->fs_uuid, Vcb->superblock.node_size - sizeof(th->csum)); 1818 break; 1819 1820 case CSUM_TYPE_BLAKE2: 1821 blake2b((uint8_t*)th, BLAKE2_HASH_SIZE, &th->fs_uuid, Vcb->superblock.node_size - sizeof(th->csum)); 1822 break; 1823 } 1824 } 1825 1826 static NTSTATUS write_trees(device_extension* Vcb, PIRP Irp) { 1827 ULONG level; 1828 uint8_t *data, *body; 1829 NTSTATUS Status; 1830 LIST_ENTRY* le; 1831 LIST_ENTRY tree_writes; 1832 tree_write* tw; 1833 1834 TRACE("(%p)\n", Vcb); 1835 1836 InitializeListHead(&tree_writes); 1837 1838 for (level = 0; level <= 255; level++) { 1839 bool nothing_found = true; 1840 1841 TRACE("level = %lu\n", level); 1842 1843 le = Vcb->trees.Flink; 1844 while (le != &Vcb->trees) { 1845 tree* t = CONTAINING_RECORD(le, tree, list_entry); 1846 1847 if (t->write && t->header.level == level) { 1848 KEY firstitem, searchkey; 1849 LIST_ENTRY* le2; 1850 traverse_ptr tp; 1851 1852 if (!t->has_new_address) { 1853 ERR("error - tried to write tree with no new address\n"); 1854 return STATUS_INTERNAL_ERROR; 1855 } 1856 1857 le2 = t->itemlist.Flink; 1858 while (le2 != &t->itemlist) { 1859 tree_data* td = CONTAINING_RECORD(le2, tree_data, list_entry); 1860 if (!td->ignore) { 1861 firstitem = td->key; 1862 break; 1863 } 1864 le2 = le2->Flink; 1865 } 1866 1867 if (t->parent) { 1868 t->paritem->key = firstitem; 1869 t->paritem->treeholder.address = t->new_address; 1870 t->paritem->treeholder.generation = Vcb->superblock.generation; 1871 } 1872 1873 if (!(Vcb->superblock.incompat_flags & BTRFS_INCOMPAT_FLAGS_SKINNY_METADATA)) { 1874 EXTENT_ITEM_TREE* eit; 1875 1876 searchkey.obj_id = t->new_address; 1877 searchkey.obj_type = TYPE_EXTENT_ITEM; 1878 searchkey.offset = Vcb->superblock.node_size; 1879 1880 Status = find_item(Vcb, Vcb->extent_root, &tp, &searchkey, false, Irp); 1881 if (!NT_SUCCESS(Status)) { 1882 ERR("error - find_item returned %08lx\n", Status); 1883 return Status; 1884 } 1885 1886 if (keycmp(searchkey, tp.item->key)) { 1887 ERR("could not find %I64x,%x,%I64x in extent_root (found %I64x,%x,%I64x instead)\n", searchkey.obj_id, searchkey.obj_type, searchkey.offset, tp.item->key.obj_id, tp.item->key.obj_type, tp.item->key.offset); 1888 return STATUS_INTERNAL_ERROR; 1889 } 1890 1891 if (tp.item->size < sizeof(EXTENT_ITEM_TREE)) { 1892 ERR("(%I64x,%x,%I64x) was %u bytes, expected at least %Iu\n", tp.item->key.obj_id, tp.item->key.obj_type, tp.item->key.offset, tp.item->size, sizeof(EXTENT_ITEM_TREE)); 1893 return STATUS_INTERNAL_ERROR; 1894 } 1895 1896 eit = (EXTENT_ITEM_TREE*)tp.item->data; 1897 eit->firstitem = firstitem; 1898 } 1899 1900 nothing_found = false; 1901 } 1902 1903 le = le->Flink; 1904 } 1905 1906 if (nothing_found) 1907 break; 1908 } 1909 1910 TRACE("allocated tree extents\n"); 1911 1912 le = Vcb->trees.Flink; 1913 while (le != &Vcb->trees) { 1914 tree* t = CONTAINING_RECORD(le, tree, list_entry); 1915 LIST_ENTRY* le2; 1916 #ifdef DEBUG_PARANOID 1917 uint32_t num_items = 0, size = 0; 1918 bool crash = false; 1919 #endif 1920 1921 if (t->write) { 1922 #ifdef DEBUG_PARANOID 1923 bool first = true; 1924 KEY lastkey; 1925 1926 le2 = t->itemlist.Flink; 1927 while (le2 != &t->itemlist) { 1928 tree_data* td = CONTAINING_RECORD(le2, tree_data, list_entry); 1929 if (!td->ignore) { 1930 num_items++; 1931 1932 if (!first) { 1933 if (keycmp(td->key, lastkey) == 0) { 1934 ERR("(%I64x,%x,%I64x): duplicate key\n", td->key.obj_id, td->key.obj_type, td->key.offset); 1935 crash = true; 1936 } else if (keycmp(td->key, lastkey) == -1) { 1937 ERR("(%I64x,%x,%I64x): key out of order\n", td->key.obj_id, td->key.obj_type, td->key.offset); 1938 crash = true; 1939 } 1940 } else 1941 first = false; 1942 1943 lastkey = td->key; 1944 1945 if (t->header.level == 0) 1946 size += td->size; 1947 } 1948 le2 = le2->Flink; 1949 } 1950 1951 if (t->header.level == 0) 1952 size += num_items * sizeof(leaf_node); 1953 else 1954 size += num_items * sizeof(internal_node); 1955 1956 if (num_items != t->header.num_items) { 1957 ERR("tree %I64x, level %x: num_items was %x, expected %x\n", t->root->id, t->header.level, num_items, t->header.num_items); 1958 crash = true; 1959 } 1960 1961 if (size != t->size) { 1962 ERR("tree %I64x, level %x: size was %x, expected %x\n", t->root->id, t->header.level, size, t->size); 1963 crash = true; 1964 } 1965 1966 if (t->header.num_items == 0 && t->parent) { 1967 ERR("tree %I64x, level %x: tried to write empty tree with parent\n", t->root->id, t->header.level); 1968 crash = true; 1969 } 1970 1971 if (t->size > Vcb->superblock.node_size - sizeof(tree_header)) { 1972 ERR("tree %I64x, level %x: tried to write overlarge tree (%x > %Ix)\n", t->root->id, t->header.level, t->size, Vcb->superblock.node_size - sizeof(tree_header)); 1973 crash = true; 1974 } 1975 1976 if (crash) { 1977 ERR("tree %p\n", t); 1978 le2 = t->itemlist.Flink; 1979 while (le2 != &t->itemlist) { 1980 tree_data* td = CONTAINING_RECORD(le2, tree_data, list_entry); 1981 if (!td->ignore) { 1982 ERR("%I64x,%x,%I64x inserted=%u\n", td->key.obj_id, td->key.obj_type, td->key.offset, td->inserted); 1983 } 1984 le2 = le2->Flink; 1985 } 1986 int3; 1987 } 1988 #endif 1989 t->header.address = t->new_address; 1990 t->header.generation = Vcb->superblock.generation; 1991 t->header.tree_id = t->root->id; 1992 t->header.flags |= HEADER_FLAG_MIXED_BACKREF; 1993 t->header.fs_uuid = Vcb->superblock.metadata_uuid; 1994 t->has_address = true; 1995 1996 data = ExAllocatePoolWithTag(NonPagedPool, Vcb->superblock.node_size, ALLOC_TAG); 1997 if (!data) { 1998 ERR("out of memory\n"); 1999 Status = STATUS_INSUFFICIENT_RESOURCES; 2000 goto end; 2001 } 2002 2003 body = data + sizeof(tree_header); 2004 2005 RtlCopyMemory(data, &t->header, sizeof(tree_header)); 2006 RtlZeroMemory(body, Vcb->superblock.node_size - sizeof(tree_header)); 2007 2008 if (t->header.level == 0) { 2009 leaf_node* itemptr = (leaf_node*)body; 2010 int i = 0; 2011 uint8_t* dataptr = data + Vcb->superblock.node_size; 2012 2013 le2 = t->itemlist.Flink; 2014 while (le2 != &t->itemlist) { 2015 tree_data* td = CONTAINING_RECORD(le2, tree_data, list_entry); 2016 if (!td->ignore) { 2017 dataptr = dataptr - td->size; 2018 2019 itemptr[i].key = td->key; 2020 itemptr[i].offset = (uint32_t)((uint8_t*)dataptr - (uint8_t*)body); 2021 itemptr[i].size = td->size; 2022 i++; 2023 2024 if (td->size > 0) 2025 RtlCopyMemory(dataptr, td->data, td->size); 2026 } 2027 2028 le2 = le2->Flink; 2029 } 2030 } else { 2031 internal_node* itemptr = (internal_node*)body; 2032 int i = 0; 2033 2034 le2 = t->itemlist.Flink; 2035 while (le2 != &t->itemlist) { 2036 tree_data* td = CONTAINING_RECORD(le2, tree_data, list_entry); 2037 if (!td->ignore) { 2038 itemptr[i].key = td->key; 2039 itemptr[i].address = td->treeholder.address; 2040 itemptr[i].generation = td->treeholder.generation; 2041 i++; 2042 } 2043 2044 le2 = le2->Flink; 2045 } 2046 } 2047 2048 calc_tree_checksum(Vcb, (tree_header*)data); 2049 2050 tw = ExAllocatePoolWithTag(PagedPool, sizeof(tree_write), ALLOC_TAG); 2051 if (!tw) { 2052 ERR("out of memory\n"); 2053 ExFreePool(data); 2054 Status = STATUS_INSUFFICIENT_RESOURCES; 2055 goto end; 2056 } 2057 2058 tw->address = t->new_address; 2059 tw->length = Vcb->superblock.node_size; 2060 tw->data = data; 2061 tw->allocated = false; 2062 2063 if (IsListEmpty(&tree_writes)) 2064 InsertTailList(&tree_writes, &tw->list_entry); 2065 else { 2066 bool inserted = false; 2067 2068 le2 = tree_writes.Flink; 2069 while (le2 != &tree_writes) { 2070 tree_write* tw2 = CONTAINING_RECORD(le2, tree_write, list_entry); 2071 2072 if (tw2->address > tw->address) { 2073 InsertHeadList(le2->Blink, &tw->list_entry); 2074 inserted = true; 2075 break; 2076 } 2077 2078 le2 = le2->Flink; 2079 } 2080 2081 if (!inserted) 2082 InsertTailList(&tree_writes, &tw->list_entry); 2083 } 2084 } 2085 2086 le = le->Flink; 2087 } 2088 2089 Status = do_tree_writes(Vcb, &tree_writes, false); 2090 if (!NT_SUCCESS(Status)) { 2091 ERR("do_tree_writes returned %08lx\n", Status); 2092 goto end; 2093 } 2094 2095 Status = STATUS_SUCCESS; 2096 2097 end: 2098 while (!IsListEmpty(&tree_writes)) { 2099 le = RemoveHeadList(&tree_writes); 2100 tw = CONTAINING_RECORD(le, tree_write, list_entry); 2101 2102 if (tw->data) 2103 ExFreePool(tw->data); 2104 2105 ExFreePool(tw); 2106 } 2107 2108 return Status; 2109 } 2110 2111 static void update_backup_superblock(device_extension* Vcb, superblock_backup* sb, PIRP Irp) { 2112 KEY searchkey; 2113 traverse_ptr tp; 2114 2115 RtlZeroMemory(sb, sizeof(superblock_backup)); 2116 2117 sb->root_tree_addr = Vcb->superblock.root_tree_addr; 2118 sb->root_tree_generation = Vcb->superblock.generation; 2119 sb->root_level = Vcb->superblock.root_level; 2120 2121 sb->chunk_tree_addr = Vcb->superblock.chunk_tree_addr; 2122 sb->chunk_tree_generation = Vcb->superblock.chunk_root_generation; 2123 sb->chunk_root_level = Vcb->superblock.chunk_root_level; 2124 2125 searchkey.obj_id = BTRFS_ROOT_EXTENT; 2126 searchkey.obj_type = TYPE_ROOT_ITEM; 2127 searchkey.offset = 0xffffffffffffffff; 2128 2129 if (NT_SUCCESS(find_item(Vcb, Vcb->root_root, &tp, &searchkey, false, Irp))) { 2130 if (tp.item->key.obj_id == searchkey.obj_id && tp.item->key.obj_type == searchkey.obj_type && tp.item->size >= sizeof(ROOT_ITEM)) { 2131 ROOT_ITEM* ri = (ROOT_ITEM*)tp.item->data; 2132 2133 sb->extent_tree_addr = ri->block_number; 2134 sb->extent_tree_generation = ri->generation; 2135 sb->extent_root_level = ri->root_level; 2136 } 2137 } 2138 2139 searchkey.obj_id = BTRFS_ROOT_FSTREE; 2140 2141 if (NT_SUCCESS(find_item(Vcb, Vcb->root_root, &tp, &searchkey, false, Irp))) { 2142 if (tp.item->key.obj_id == searchkey.obj_id && tp.item->key.obj_type == searchkey.obj_type && tp.item->size >= sizeof(ROOT_ITEM)) { 2143 ROOT_ITEM* ri = (ROOT_ITEM*)tp.item->data; 2144 2145 sb->fs_tree_addr = ri->block_number; 2146 sb->fs_tree_generation = ri->generation; 2147 sb->fs_root_level = ri->root_level; 2148 } 2149 } 2150 2151 searchkey.obj_id = BTRFS_ROOT_DEVTREE; 2152 2153 if (NT_SUCCESS(find_item(Vcb, Vcb->root_root, &tp, &searchkey, false, Irp))) { 2154 if (tp.item->key.obj_id == searchkey.obj_id && tp.item->key.obj_type == searchkey.obj_type && tp.item->size >= sizeof(ROOT_ITEM)) { 2155 ROOT_ITEM* ri = (ROOT_ITEM*)tp.item->data; 2156 2157 sb->dev_root_addr = ri->block_number; 2158 sb->dev_root_generation = ri->generation; 2159 sb->dev_root_level = ri->root_level; 2160 } 2161 } 2162 2163 searchkey.obj_id = BTRFS_ROOT_CHECKSUM; 2164 2165 if (NT_SUCCESS(find_item(Vcb, Vcb->root_root, &tp, &searchkey, false, Irp))) { 2166 if (tp.item->key.obj_id == searchkey.obj_id && tp.item->key.obj_type == searchkey.obj_type && tp.item->size >= sizeof(ROOT_ITEM)) { 2167 ROOT_ITEM* ri = (ROOT_ITEM*)tp.item->data; 2168 2169 sb->csum_root_addr = ri->block_number; 2170 sb->csum_root_generation = ri->generation; 2171 sb->csum_root_level = ri->root_level; 2172 } 2173 } 2174 2175 sb->total_bytes = Vcb->superblock.total_bytes; 2176 sb->bytes_used = Vcb->superblock.bytes_used; 2177 sb->num_devices = Vcb->superblock.num_devices; 2178 } 2179 2180 typedef struct { 2181 void* context; 2182 uint8_t* buf; 2183 PMDL mdl; 2184 device* device; 2185 NTSTATUS Status; 2186 PIRP Irp; 2187 LIST_ENTRY list_entry; 2188 } write_superblocks_stripe; 2189 2190 typedef struct _write_superblocks_context { 2191 KEVENT Event; 2192 LIST_ENTRY stripes; 2193 LONG left; 2194 } write_superblocks_context; 2195 2196 _Function_class_(IO_COMPLETION_ROUTINE) 2197 static NTSTATUS __stdcall write_superblock_completion(PDEVICE_OBJECT DeviceObject, PIRP Irp, PVOID conptr) { 2198 write_superblocks_stripe* stripe = conptr; 2199 write_superblocks_context* context = stripe->context; 2200 2201 UNUSED(DeviceObject); 2202 2203 stripe->Status = Irp->IoStatus.Status; 2204 2205 if (InterlockedDecrement(&context->left) == 0) 2206 KeSetEvent(&context->Event, 0, false); 2207 2208 return STATUS_MORE_PROCESSING_REQUIRED; 2209 } 2210 2211 static void calc_superblock_checksum(superblock* sb) { 2212 switch (sb->csum_type) { 2213 case CSUM_TYPE_CRC32C: 2214 *(uint32_t*)sb = ~calc_crc32c(0xffffffff, (uint8_t*)&sb->uuid, (ULONG)sizeof(superblock) - sizeof(sb->checksum)); 2215 break; 2216 2217 case CSUM_TYPE_XXHASH: 2218 *(uint64_t*)sb = XXH64(&sb->uuid, sizeof(superblock) - sizeof(sb->checksum), 0); 2219 break; 2220 2221 case CSUM_TYPE_SHA256: 2222 calc_sha256((uint8_t*)sb, &sb->uuid, sizeof(superblock) - sizeof(sb->checksum)); 2223 break; 2224 2225 case CSUM_TYPE_BLAKE2: 2226 blake2b((uint8_t*)sb, BLAKE2_HASH_SIZE, &sb->uuid, sizeof(superblock) - sizeof(sb->checksum)); 2227 break; 2228 } 2229 } 2230 2231 static NTSTATUS write_superblock(device_extension* Vcb, device* device, write_superblocks_context* context) { 2232 unsigned int i = 0; 2233 2234 // All the documentation says that the Linux driver only writes one superblock 2235 // if it thinks a disk is an SSD, but this doesn't seem to be the case! 2236 2237 while (superblock_addrs[i] > 0 && device->devitem.num_bytes >= superblock_addrs[i] + sizeof(superblock)) { 2238 ULONG sblen = (ULONG)sector_align(sizeof(superblock), Vcb->superblock.sector_size); 2239 superblock* sb; 2240 write_superblocks_stripe* stripe; 2241 PIO_STACK_LOCATION IrpSp; 2242 2243 sb = ExAllocatePoolWithTag(NonPagedPool, sblen, ALLOC_TAG); 2244 if (!sb) { 2245 ERR("out of memory\n"); 2246 return STATUS_INSUFFICIENT_RESOURCES; 2247 } 2248 2249 RtlCopyMemory(sb, &Vcb->superblock, sizeof(superblock)); 2250 2251 if (sblen > sizeof(superblock)) 2252 RtlZeroMemory((uint8_t*)sb + sizeof(superblock), sblen - sizeof(superblock)); 2253 2254 RtlCopyMemory(&sb->dev_item, &device->devitem, sizeof(DEV_ITEM)); 2255 sb->sb_phys_addr = superblock_addrs[i]; 2256 2257 calc_superblock_checksum(sb); 2258 2259 stripe = ExAllocatePoolWithTag(NonPagedPool, sizeof(write_superblocks_stripe), ALLOC_TAG); 2260 if (!stripe) { 2261 ERR("out of memory\n"); 2262 ExFreePool(sb); 2263 return STATUS_INSUFFICIENT_RESOURCES; 2264 } 2265 2266 stripe->buf = (uint8_t*)sb; 2267 2268 stripe->Irp = IoAllocateIrp(device->devobj->StackSize, false); 2269 if (!stripe->Irp) { 2270 ERR("IoAllocateIrp failed\n"); 2271 ExFreePool(stripe); 2272 ExFreePool(sb); 2273 return STATUS_INSUFFICIENT_RESOURCES; 2274 } 2275 2276 IrpSp = IoGetNextIrpStackLocation(stripe->Irp); 2277 IrpSp->MajorFunction = IRP_MJ_WRITE; 2278 IrpSp->FileObject = device->fileobj; 2279 2280 if (i == 0) 2281 IrpSp->Flags |= SL_WRITE_THROUGH; 2282 2283 if (device->devobj->Flags & DO_BUFFERED_IO) { 2284 stripe->Irp->AssociatedIrp.SystemBuffer = sb; 2285 stripe->mdl = NULL; 2286 2287 stripe->Irp->Flags = IRP_BUFFERED_IO; 2288 } else if (device->devobj->Flags & DO_DIRECT_IO) { 2289 stripe->mdl = IoAllocateMdl(sb, sblen, false, false, NULL); 2290 if (!stripe->mdl) { 2291 ERR("IoAllocateMdl failed\n"); 2292 IoFreeIrp(stripe->Irp); 2293 ExFreePool(stripe); 2294 ExFreePool(sb); 2295 return STATUS_INSUFFICIENT_RESOURCES; 2296 } 2297 2298 stripe->Irp->MdlAddress = stripe->mdl; 2299 2300 MmBuildMdlForNonPagedPool(stripe->mdl); 2301 } else { 2302 stripe->Irp->UserBuffer = sb; 2303 stripe->mdl = NULL; 2304 } 2305 2306 IrpSp->Parameters.Write.Length = sblen; 2307 IrpSp->Parameters.Write.ByteOffset.QuadPart = superblock_addrs[i]; 2308 2309 IoSetCompletionRoutine(stripe->Irp, write_superblock_completion, stripe, true, true, true); 2310 2311 stripe->context = context; 2312 stripe->device = device; 2313 InsertTailList(&context->stripes, &stripe->list_entry); 2314 2315 context->left++; 2316 2317 i++; 2318 } 2319 2320 if (i == 0) 2321 ERR("no superblocks written!\n"); 2322 2323 return STATUS_SUCCESS; 2324 } 2325 2326 static NTSTATUS write_superblocks(device_extension* Vcb, PIRP Irp) { 2327 uint64_t i; 2328 NTSTATUS Status; 2329 LIST_ENTRY* le; 2330 write_superblocks_context context; 2331 2332 TRACE("(%p)\n", Vcb); 2333 2334 le = Vcb->trees.Flink; 2335 while (le != &Vcb->trees) { 2336 tree* t = CONTAINING_RECORD(le, tree, list_entry); 2337 2338 if (t->write && !t->parent) { 2339 if (t->root == Vcb->root_root) { 2340 Vcb->superblock.root_tree_addr = t->new_address; 2341 Vcb->superblock.root_level = t->header.level; 2342 } else if (t->root == Vcb->chunk_root) { 2343 Vcb->superblock.chunk_tree_addr = t->new_address; 2344 Vcb->superblock.chunk_root_generation = t->header.generation; 2345 Vcb->superblock.chunk_root_level = t->header.level; 2346 } 2347 } 2348 2349 le = le->Flink; 2350 } 2351 2352 for (i = 0; i < BTRFS_NUM_BACKUP_ROOTS - 1; i++) { 2353 RtlCopyMemory(&Vcb->superblock.backup[i], &Vcb->superblock.backup[i+1], sizeof(superblock_backup)); 2354 } 2355 2356 update_backup_superblock(Vcb, &Vcb->superblock.backup[BTRFS_NUM_BACKUP_ROOTS - 1], Irp); 2357 2358 KeInitializeEvent(&context.Event, NotificationEvent, false); 2359 InitializeListHead(&context.stripes); 2360 context.left = 0; 2361 2362 le = Vcb->devices.Flink; 2363 while (le != &Vcb->devices) { 2364 device* dev = CONTAINING_RECORD(le, device, list_entry); 2365 2366 if (dev->devobj && !dev->readonly) { 2367 Status = write_superblock(Vcb, dev, &context); 2368 if (!NT_SUCCESS(Status)) { 2369 ERR("write_superblock returned %08lx\n", Status); 2370 goto end; 2371 } 2372 } 2373 2374 le = le->Flink; 2375 } 2376 2377 if (IsListEmpty(&context.stripes)) { 2378 ERR("error - not writing any superblocks\n"); 2379 Status = STATUS_INTERNAL_ERROR; 2380 goto end; 2381 } 2382 2383 le = context.stripes.Flink; 2384 while (le != &context.stripes) { 2385 write_superblocks_stripe* stripe = CONTAINING_RECORD(le, write_superblocks_stripe, list_entry); 2386 2387 IoCallDriver(stripe->device->devobj, stripe->Irp); 2388 2389 le = le->Flink; 2390 } 2391 2392 KeWaitForSingleObject(&context.Event, Executive, KernelMode, false, NULL); 2393 2394 le = context.stripes.Flink; 2395 while (le != &context.stripes) { 2396 write_superblocks_stripe* stripe = CONTAINING_RECORD(le, write_superblocks_stripe, list_entry); 2397 2398 if (!NT_SUCCESS(stripe->Status)) { 2399 ERR("device %I64x returned %08lx\n", stripe->device->devitem.dev_id, stripe->Status); 2400 log_device_error(Vcb, stripe->device, BTRFS_DEV_STAT_WRITE_ERRORS); 2401 Status = stripe->Status; 2402 goto end; 2403 } 2404 2405 le = le->Flink; 2406 } 2407 2408 Status = STATUS_SUCCESS; 2409 2410 end: 2411 while (!IsListEmpty(&context.stripes)) { 2412 write_superblocks_stripe* stripe = CONTAINING_RECORD(RemoveHeadList(&context.stripes), write_superblocks_stripe, list_entry); 2413 2414 if (stripe->mdl) { 2415 if (stripe->mdl->MdlFlags & MDL_PAGES_LOCKED) 2416 MmUnlockPages(stripe->mdl); 2417 2418 IoFreeMdl(stripe->mdl); 2419 } 2420 2421 if (stripe->Irp) 2422 IoFreeIrp(stripe->Irp); 2423 2424 if (stripe->buf) 2425 ExFreePool(stripe->buf); 2426 2427 ExFreePool(stripe); 2428 } 2429 2430 return Status; 2431 } 2432 2433 static NTSTATUS flush_changed_extent(device_extension* Vcb, chunk* c, changed_extent* ce, PIRP Irp, LIST_ENTRY* rollback) { 2434 LIST_ENTRY *le, *le2; 2435 NTSTATUS Status; 2436 uint64_t old_size; 2437 2438 if (ce->count == 0 && ce->old_count == 0) { 2439 while (!IsListEmpty(&ce->refs)) { 2440 changed_extent_ref* cer = CONTAINING_RECORD(RemoveHeadList(&ce->refs), changed_extent_ref, list_entry); 2441 ExFreePool(cer); 2442 } 2443 2444 while (!IsListEmpty(&ce->old_refs)) { 2445 changed_extent_ref* cer = CONTAINING_RECORD(RemoveHeadList(&ce->old_refs), changed_extent_ref, list_entry); 2446 ExFreePool(cer); 2447 } 2448 2449 goto end; 2450 } 2451 2452 le = ce->refs.Flink; 2453 while (le != &ce->refs) { 2454 changed_extent_ref* cer = CONTAINING_RECORD(le, changed_extent_ref, list_entry); 2455 uint32_t old_count = 0; 2456 2457 if (cer->type == TYPE_EXTENT_DATA_REF) { 2458 le2 = ce->old_refs.Flink; 2459 while (le2 != &ce->old_refs) { 2460 changed_extent_ref* cer2 = CONTAINING_RECORD(le2, changed_extent_ref, list_entry); 2461 2462 if (cer2->type == TYPE_EXTENT_DATA_REF && cer2->edr.root == cer->edr.root && cer2->edr.objid == cer->edr.objid && cer2->edr.offset == cer->edr.offset) { 2463 old_count = cer2->edr.count; 2464 break; 2465 } 2466 2467 le2 = le2->Flink; 2468 } 2469 2470 old_size = ce->old_count > 0 ? ce->old_size : ce->size; 2471 2472 if (cer->edr.count > old_count) { 2473 Status = increase_extent_refcount_data(Vcb, ce->address, old_size, cer->edr.root, cer->edr.objid, cer->edr.offset, cer->edr.count - old_count, Irp); 2474 2475 if (!NT_SUCCESS(Status)) { 2476 ERR("increase_extent_refcount_data returned %08lx\n", Status); 2477 return Status; 2478 } 2479 } 2480 } else if (cer->type == TYPE_SHARED_DATA_REF) { 2481 le2 = ce->old_refs.Flink; 2482 while (le2 != &ce->old_refs) { 2483 changed_extent_ref* cer2 = CONTAINING_RECORD(le2, changed_extent_ref, list_entry); 2484 2485 if (cer2->type == TYPE_SHARED_DATA_REF && cer2->sdr.offset == cer->sdr.offset) { 2486 RemoveEntryList(&cer2->list_entry); 2487 ExFreePool(cer2); 2488 break; 2489 } 2490 2491 le2 = le2->Flink; 2492 } 2493 } 2494 2495 le = le->Flink; 2496 } 2497 2498 le = ce->refs.Flink; 2499 while (le != &ce->refs) { 2500 changed_extent_ref* cer = CONTAINING_RECORD(le, changed_extent_ref, list_entry); 2501 LIST_ENTRY* le3 = le->Flink; 2502 uint32_t old_count = 0; 2503 2504 if (cer->type == TYPE_EXTENT_DATA_REF) { 2505 le2 = ce->old_refs.Flink; 2506 while (le2 != &ce->old_refs) { 2507 changed_extent_ref* cer2 = CONTAINING_RECORD(le2, changed_extent_ref, list_entry); 2508 2509 if (cer2->type == TYPE_EXTENT_DATA_REF && cer2->edr.root == cer->edr.root && cer2->edr.objid == cer->edr.objid && cer2->edr.offset == cer->edr.offset) { 2510 old_count = cer2->edr.count; 2511 2512 RemoveEntryList(&cer2->list_entry); 2513 ExFreePool(cer2); 2514 break; 2515 } 2516 2517 le2 = le2->Flink; 2518 } 2519 2520 old_size = ce->old_count > 0 ? ce->old_size : ce->size; 2521 2522 if (cer->edr.count < old_count) { 2523 Status = decrease_extent_refcount_data(Vcb, ce->address, old_size, cer->edr.root, cer->edr.objid, cer->edr.offset, 2524 old_count - cer->edr.count, ce->superseded, Irp); 2525 2526 if (!NT_SUCCESS(Status)) { 2527 ERR("decrease_extent_refcount_data returned %08lx\n", Status); 2528 return Status; 2529 } 2530 } 2531 2532 if (ce->size != ce->old_size && ce->old_count > 0) { 2533 KEY searchkey; 2534 traverse_ptr tp; 2535 void* data; 2536 2537 searchkey.obj_id = ce->address; 2538 searchkey.obj_type = TYPE_EXTENT_ITEM; 2539 searchkey.offset = ce->old_size; 2540 2541 Status = find_item(Vcb, Vcb->extent_root, &tp, &searchkey, false, Irp); 2542 if (!NT_SUCCESS(Status)) { 2543 ERR("error - find_item returned %08lx\n", Status); 2544 return Status; 2545 } 2546 2547 if (keycmp(searchkey, tp.item->key)) { 2548 ERR("could not find (%I64x,%x,%I64x) in extent tree\n", searchkey.obj_id, searchkey.obj_type, searchkey.offset); 2549 return STATUS_INTERNAL_ERROR; 2550 } 2551 2552 if (tp.item->size > 0) { 2553 data = ExAllocatePoolWithTag(PagedPool, tp.item->size, ALLOC_TAG); 2554 2555 if (!data) { 2556 ERR("out of memory\n"); 2557 return STATUS_INSUFFICIENT_RESOURCES; 2558 } 2559 2560 RtlCopyMemory(data, tp.item->data, tp.item->size); 2561 } else 2562 data = NULL; 2563 2564 Status = insert_tree_item(Vcb, Vcb->extent_root, ce->address, TYPE_EXTENT_ITEM, ce->size, data, tp.item->size, NULL, Irp); 2565 if (!NT_SUCCESS(Status)) { 2566 ERR("insert_tree_item returned %08lx\n", Status); 2567 if (data) ExFreePool(data); 2568 return Status; 2569 } 2570 2571 Status = delete_tree_item(Vcb, &tp); 2572 if (!NT_SUCCESS(Status)) { 2573 ERR("delete_tree_item returned %08lx\n", Status); 2574 return Status; 2575 } 2576 } 2577 } 2578 2579 RemoveEntryList(&cer->list_entry); 2580 ExFreePool(cer); 2581 2582 le = le3; 2583 } 2584 2585 #ifdef DEBUG_PARANOID 2586 if (!IsListEmpty(&ce->old_refs)) 2587 WARN("old_refs not empty\n"); 2588 #endif 2589 2590 end: 2591 if (ce->count == 0 && !ce->superseded) { 2592 c->used -= ce->size; 2593 space_list_add(c, ce->address, ce->size, rollback); 2594 } 2595 2596 RemoveEntryList(&ce->list_entry); 2597 ExFreePool(ce); 2598 2599 return STATUS_SUCCESS; 2600 } 2601 2602 void add_checksum_entry(device_extension* Vcb, uint64_t address, ULONG length, void* csum, PIRP Irp) { 2603 KEY searchkey; 2604 traverse_ptr tp, next_tp; 2605 NTSTATUS Status; 2606 uint64_t startaddr, endaddr; 2607 ULONG len; 2608 RTL_BITMAP bmp; 2609 ULONG* bmparr; 2610 ULONG runlength, index; 2611 2612 TRACE("(%p, %I64x, %lx, %p, %p)\n", Vcb, address, length, csum, Irp); 2613 2614 searchkey.obj_id = EXTENT_CSUM_ID; 2615 searchkey.obj_type = TYPE_EXTENT_CSUM; 2616 searchkey.offset = address; 2617 2618 // FIXME - create checksum_root if it doesn't exist at all 2619 2620 Status = find_item(Vcb, Vcb->checksum_root, &tp, &searchkey, false, Irp); 2621 if (Status == STATUS_NOT_FOUND) { // tree is completely empty 2622 if (csum) { // not deleted 2623 ULONG length2 = length; 2624 uint64_t off = address; 2625 void* data = csum; 2626 2627 do { 2628 uint16_t il = (uint16_t)min(length2, MAX_CSUM_SIZE / Vcb->csum_size); 2629 2630 void* checksums = ExAllocatePoolWithTag(PagedPool, il * Vcb->csum_size, ALLOC_TAG); 2631 if (!checksums) { 2632 ERR("out of memory\n"); 2633 return; 2634 } 2635 2636 RtlCopyMemory(checksums, data, il * Vcb->csum_size); 2637 2638 Status = insert_tree_item(Vcb, Vcb->checksum_root, EXTENT_CSUM_ID, TYPE_EXTENT_CSUM, off, checksums, 2639 il * Vcb->csum_size, NULL, Irp); 2640 if (!NT_SUCCESS(Status)) { 2641 ERR("insert_tree_item returned %08lx\n", Status); 2642 ExFreePool(checksums); 2643 return; 2644 } 2645 2646 length2 -= il; 2647 2648 if (length2 > 0) { 2649 off += (uint64_t)il << Vcb->sector_shift; 2650 data = (uint8_t*)data + (il * Vcb->csum_size); 2651 } 2652 } while (length2 > 0); 2653 } 2654 } else if (!NT_SUCCESS(Status)) { 2655 ERR("find_item returned %08lx\n", Status); 2656 return; 2657 } else { 2658 uint32_t tplen; 2659 void* checksums; 2660 2661 // FIXME - check entry is TYPE_EXTENT_CSUM? 2662 2663 if (tp.item->key.offset < address && tp.item->key.offset + (((uint64_t)tp.item->size << Vcb->sector_shift) / Vcb->csum_size) >= address) 2664 startaddr = tp.item->key.offset; 2665 else 2666 startaddr = address; 2667 2668 searchkey.obj_id = EXTENT_CSUM_ID; 2669 searchkey.obj_type = TYPE_EXTENT_CSUM; 2670 searchkey.offset = address + (length << Vcb->sector_shift); 2671 2672 Status = find_item(Vcb, Vcb->checksum_root, &tp, &searchkey, false, Irp); 2673 if (!NT_SUCCESS(Status)) { 2674 ERR("find_item returned %08lx\n", Status); 2675 return; 2676 } 2677 2678 tplen = tp.item->size / Vcb->csum_size; 2679 2680 if (tp.item->key.offset + (tplen << Vcb->sector_shift) >= address + (length << Vcb->sector_shift)) 2681 endaddr = tp.item->key.offset + (tplen << Vcb->sector_shift); 2682 else 2683 endaddr = address + (length << Vcb->sector_shift); 2684 2685 TRACE("cs starts at %I64x (%lx sectors)\n", address, length); 2686 TRACE("startaddr = %I64x\n", startaddr); 2687 TRACE("endaddr = %I64x\n", endaddr); 2688 2689 len = (ULONG)((endaddr - startaddr) >> Vcb->sector_shift); 2690 2691 checksums = ExAllocatePoolWithTag(PagedPool, Vcb->csum_size * len, ALLOC_TAG); 2692 if (!checksums) { 2693 ERR("out of memory\n"); 2694 return; 2695 } 2696 2697 bmparr = ExAllocatePoolWithTag(PagedPool, sizeof(ULONG) * ((len/8)+1), ALLOC_TAG); 2698 if (!bmparr) { 2699 ERR("out of memory\n"); 2700 ExFreePool(checksums); 2701 return; 2702 } 2703 2704 RtlInitializeBitMap(&bmp, bmparr, len); 2705 RtlSetAllBits(&bmp); 2706 2707 searchkey.obj_id = EXTENT_CSUM_ID; 2708 searchkey.obj_type = TYPE_EXTENT_CSUM; 2709 searchkey.offset = address; 2710 2711 Status = find_item(Vcb, Vcb->checksum_root, &tp, &searchkey, false, Irp); 2712 if (!NT_SUCCESS(Status)) { 2713 ERR("find_item returned %08lx\n", Status); 2714 ExFreePool(checksums); 2715 ExFreePool(bmparr); 2716 return; 2717 } 2718 2719 // set bit = free space, cleared bit = allocated sector 2720 2721 while (tp.item->key.offset < endaddr) { 2722 if (tp.item->key.offset >= startaddr) { 2723 if (tp.item->size > 0) { 2724 ULONG itemlen = (ULONG)min((len - ((tp.item->key.offset - startaddr) >> Vcb->sector_shift)) * Vcb->csum_size, tp.item->size); 2725 2726 RtlCopyMemory((uint8_t*)checksums + (((tp.item->key.offset - startaddr) * Vcb->csum_size) >> Vcb->sector_shift), 2727 tp.item->data, itemlen); 2728 RtlClearBits(&bmp, (ULONG)((tp.item->key.offset - startaddr) >> Vcb->sector_shift), itemlen / Vcb->csum_size); 2729 } 2730 2731 Status = delete_tree_item(Vcb, &tp); 2732 if (!NT_SUCCESS(Status)) { 2733 ERR("delete_tree_item returned %08lx\n", Status); 2734 ExFreePool(checksums); 2735 ExFreePool(bmparr); 2736 return; 2737 } 2738 } 2739 2740 if (find_next_item(Vcb, &tp, &next_tp, false, Irp)) { 2741 tp = next_tp; 2742 } else 2743 break; 2744 } 2745 2746 if (!csum) { // deleted 2747 RtlSetBits(&bmp, (ULONG)((address - startaddr) >> Vcb->sector_shift), length); 2748 } else { 2749 RtlCopyMemory((uint8_t*)checksums + (((address - startaddr) * Vcb->csum_size) >> Vcb->sector_shift), 2750 csum, length * Vcb->csum_size); 2751 RtlClearBits(&bmp, (ULONG)((address - startaddr) >> Vcb->sector_shift), length); 2752 } 2753 2754 runlength = RtlFindFirstRunClear(&bmp, &index); 2755 2756 while (runlength != 0) { 2757 if (index >= len) 2758 break; 2759 2760 if (index + runlength >= len) { 2761 runlength = len - index; 2762 2763 if (runlength == 0) 2764 break; 2765 } 2766 2767 do { 2768 uint16_t rl; 2769 uint64_t off; 2770 void* data; 2771 2772 if (runlength * Vcb->csum_size > MAX_CSUM_SIZE) 2773 rl = (uint16_t)(MAX_CSUM_SIZE / Vcb->csum_size); 2774 else 2775 rl = (uint16_t)runlength; 2776 2777 data = ExAllocatePoolWithTag(PagedPool, Vcb->csum_size * rl, ALLOC_TAG); 2778 if (!data) { 2779 ERR("out of memory\n"); 2780 ExFreePool(bmparr); 2781 ExFreePool(checksums); 2782 return; 2783 } 2784 2785 RtlCopyMemory(data, (uint8_t*)checksums + (Vcb->csum_size * index), Vcb->csum_size * rl); 2786 2787 off = startaddr + ((uint64_t)index << Vcb->sector_shift); 2788 2789 Status = insert_tree_item(Vcb, Vcb->checksum_root, EXTENT_CSUM_ID, TYPE_EXTENT_CSUM, off, data, Vcb->csum_size * rl, NULL, Irp); 2790 if (!NT_SUCCESS(Status)) { 2791 ERR("insert_tree_item returned %08lx\n", Status); 2792 ExFreePool(data); 2793 ExFreePool(bmparr); 2794 ExFreePool(checksums); 2795 return; 2796 } 2797 2798 runlength -= rl; 2799 index += rl; 2800 } while (runlength > 0); 2801 2802 runlength = RtlFindNextForwardRunClear(&bmp, index, &index); 2803 } 2804 2805 ExFreePool(bmparr); 2806 ExFreePool(checksums); 2807 } 2808 } 2809 2810 static NTSTATUS update_chunk_usage(device_extension* Vcb, PIRP Irp, LIST_ENTRY* rollback) { 2811 LIST_ENTRY *le = Vcb->chunks.Flink, *le2; 2812 chunk* c; 2813 KEY searchkey; 2814 traverse_ptr tp; 2815 BLOCK_GROUP_ITEM* bgi; 2816 NTSTATUS Status; 2817 2818 TRACE("(%p)\n", Vcb); 2819 2820 ExAcquireResourceSharedLite(&Vcb->chunk_lock, true); 2821 2822 while (le != &Vcb->chunks) { 2823 c = CONTAINING_RECORD(le, chunk, list_entry); 2824 2825 acquire_chunk_lock(c, Vcb); 2826 2827 if (!c->cache_loaded && (!IsListEmpty(&c->changed_extents) || c->used != c->oldused)) { 2828 Status = load_cache_chunk(Vcb, c, NULL); 2829 2830 if (!NT_SUCCESS(Status)) { 2831 ERR("load_cache_chunk returned %08lx\n", Status); 2832 release_chunk_lock(c, Vcb); 2833 goto end; 2834 } 2835 } 2836 2837 le2 = c->changed_extents.Flink; 2838 while (le2 != &c->changed_extents) { 2839 LIST_ENTRY* le3 = le2->Flink; 2840 changed_extent* ce = CONTAINING_RECORD(le2, changed_extent, list_entry); 2841 2842 Status = flush_changed_extent(Vcb, c, ce, Irp, rollback); 2843 if (!NT_SUCCESS(Status)) { 2844 ERR("flush_changed_extent returned %08lx\n", Status); 2845 release_chunk_lock(c, Vcb); 2846 goto end; 2847 } 2848 2849 le2 = le3; 2850 } 2851 2852 // This is usually done by update_chunks, but we have to check again in case any new chunks 2853 // have been allocated since. 2854 if (c->created) { 2855 Status = create_chunk(Vcb, c, Irp); 2856 if (!NT_SUCCESS(Status)) { 2857 ERR("create_chunk returned %08lx\n", Status); 2858 release_chunk_lock(c, Vcb); 2859 goto end; 2860 } 2861 } 2862 2863 if (c->old_cache) { 2864 if (c->old_cache->dirty) { 2865 LIST_ENTRY batchlist; 2866 2867 InitializeListHead(&batchlist); 2868 2869 Status = flush_fcb(c->old_cache, false, &batchlist, Irp); 2870 if (!NT_SUCCESS(Status)) { 2871 ERR("flush_fcb returned %08lx\n", Status); 2872 release_chunk_lock(c, Vcb); 2873 clear_batch_list(Vcb, &batchlist); 2874 goto end; 2875 } 2876 2877 Status = commit_batch_list(Vcb, &batchlist, Irp); 2878 if (!NT_SUCCESS(Status)) { 2879 ERR("commit_batch_list returned %08lx\n", Status); 2880 release_chunk_lock(c, Vcb); 2881 goto end; 2882 } 2883 } 2884 2885 free_fcb(c->old_cache); 2886 2887 if (c->old_cache->refcount == 0) 2888 reap_fcb(c->old_cache); 2889 2890 c->old_cache = NULL; 2891 } 2892 2893 if (c->used != c->oldused) { 2894 searchkey.obj_id = c->offset; 2895 searchkey.obj_type = TYPE_BLOCK_GROUP_ITEM; 2896 searchkey.offset = c->chunk_item->size; 2897 2898 Status = find_item(Vcb, Vcb->extent_root, &tp, &searchkey, false, Irp); 2899 if (!NT_SUCCESS(Status)) { 2900 ERR("error - find_item returned %08lx\n", Status); 2901 release_chunk_lock(c, Vcb); 2902 goto end; 2903 } 2904 2905 if (keycmp(searchkey, tp.item->key)) { 2906 ERR("could not find (%I64x,%x,%I64x) in extent_root\n", searchkey.obj_id, searchkey.obj_type, searchkey.offset); 2907 Status = STATUS_INTERNAL_ERROR; 2908 release_chunk_lock(c, Vcb); 2909 goto end; 2910 } 2911 2912 if (tp.item->size < sizeof(BLOCK_GROUP_ITEM)) { 2913 ERR("(%I64x,%x,%I64x) was %u bytes, expected %Iu\n", tp.item->key.obj_id, tp.item->key.obj_type, tp.item->key.offset, tp.item->size, sizeof(BLOCK_GROUP_ITEM)); 2914 Status = STATUS_INTERNAL_ERROR; 2915 release_chunk_lock(c, Vcb); 2916 goto end; 2917 } 2918 2919 bgi = ExAllocatePoolWithTag(PagedPool, tp.item->size, ALLOC_TAG); 2920 if (!bgi) { 2921 ERR("out of memory\n"); 2922 Status = STATUS_INSUFFICIENT_RESOURCES; 2923 release_chunk_lock(c, Vcb); 2924 goto end; 2925 } 2926 2927 RtlCopyMemory(bgi, tp.item->data, tp.item->size); 2928 bgi->used = c->used; 2929 2930 #ifdef DEBUG_PARANOID 2931 if (bgi->used & 0x8000000000000000) { 2932 ERR("refusing to write BLOCK_GROUP_ITEM with negative usage value (%I64x)\n", bgi->used); 2933 int3; 2934 } 2935 #endif 2936 2937 TRACE("adjusting usage of chunk %I64x to %I64x\n", c->offset, c->used); 2938 2939 Status = delete_tree_item(Vcb, &tp); 2940 if (!NT_SUCCESS(Status)) { 2941 ERR("delete_tree_item returned %08lx\n", Status); 2942 ExFreePool(bgi); 2943 release_chunk_lock(c, Vcb); 2944 goto end; 2945 } 2946 2947 Status = insert_tree_item(Vcb, Vcb->extent_root, searchkey.obj_id, searchkey.obj_type, searchkey.offset, bgi, tp.item->size, NULL, Irp); 2948 if (!NT_SUCCESS(Status)) { 2949 ERR("insert_tree_item returned %08lx\n", Status); 2950 ExFreePool(bgi); 2951 release_chunk_lock(c, Vcb); 2952 goto end; 2953 } 2954 2955 Vcb->superblock.bytes_used += c->used - c->oldused; 2956 c->oldused = c->used; 2957 } 2958 2959 release_chunk_lock(c, Vcb); 2960 2961 le = le->Flink; 2962 } 2963 2964 Status = STATUS_SUCCESS; 2965 2966 end: 2967 ExReleaseResourceLite(&Vcb->chunk_lock); 2968 2969 return Status; 2970 } 2971 2972 static void get_first_item(tree* t, KEY* key) { 2973 LIST_ENTRY* le; 2974 2975 le = t->itemlist.Flink; 2976 while (le != &t->itemlist) { 2977 tree_data* td = CONTAINING_RECORD(le, tree_data, list_entry); 2978 2979 *key = td->key; 2980 return; 2981 } 2982 } 2983 2984 static NTSTATUS split_tree_at(device_extension* Vcb, tree* t, tree_data* newfirstitem, uint32_t numitems, uint32_t size) { 2985 tree *nt, *pt; 2986 tree_data* td; 2987 tree_data* oldlastitem; 2988 2989 TRACE("splitting tree in %I64x at (%I64x,%x,%I64x)\n", t->root->id, newfirstitem->key.obj_id, newfirstitem->key.obj_type, newfirstitem->key.offset); 2990 2991 nt = ExAllocatePoolWithTag(PagedPool, sizeof(tree), ALLOC_TAG); 2992 if (!nt) { 2993 ERR("out of memory\n"); 2994 return STATUS_INSUFFICIENT_RESOURCES; 2995 } 2996 2997 if (t->header.level > 0) { 2998 nt->nonpaged = ExAllocatePoolWithTag(NonPagedPool, sizeof(tree_nonpaged), ALLOC_TAG); 2999 if (!nt->nonpaged) { 3000 ERR("out of memory\n"); 3001 ExFreePool(nt); 3002 return STATUS_INSUFFICIENT_RESOURCES; 3003 } 3004 3005 ExInitializeFastMutex(&nt->nonpaged->mutex); 3006 } else 3007 nt->nonpaged = NULL; 3008 3009 RtlCopyMemory(&nt->header, &t->header, sizeof(tree_header)); 3010 nt->header.address = 0; 3011 nt->header.generation = Vcb->superblock.generation; 3012 nt->header.num_items = t->header.num_items - numitems; 3013 nt->header.flags = HEADER_FLAG_MIXED_BACKREF | HEADER_FLAG_WRITTEN; 3014 3015 nt->has_address = false; 3016 nt->Vcb = Vcb; 3017 nt->parent = t->parent; 3018 3019 #ifdef DEBUG_PARANOID 3020 if (nt->parent && nt->parent->header.level <= nt->header.level) int3; 3021 #endif 3022 3023 nt->root = t->root; 3024 nt->new_address = 0; 3025 nt->has_new_address = false; 3026 nt->updated_extents = false; 3027 nt->uniqueness_determined = true; 3028 nt->is_unique = true; 3029 nt->list_entry_hash.Flink = NULL; 3030 nt->buf = NULL; 3031 InitializeListHead(&nt->itemlist); 3032 3033 oldlastitem = CONTAINING_RECORD(newfirstitem->list_entry.Blink, tree_data, list_entry); 3034 3035 nt->itemlist.Flink = &newfirstitem->list_entry; 3036 nt->itemlist.Blink = t->itemlist.Blink; 3037 nt->itemlist.Flink->Blink = &nt->itemlist; 3038 nt->itemlist.Blink->Flink = &nt->itemlist; 3039 3040 t->itemlist.Blink = &oldlastitem->list_entry; 3041 t->itemlist.Blink->Flink = &t->itemlist; 3042 3043 nt->size = t->size - size; 3044 t->size = size; 3045 t->header.num_items = numitems; 3046 nt->write = true; 3047 3048 InsertTailList(&Vcb->trees, &nt->list_entry); 3049 3050 if (nt->header.level > 0) { 3051 LIST_ENTRY* le = nt->itemlist.Flink; 3052 3053 while (le != &nt->itemlist) { 3054 tree_data* td2 = CONTAINING_RECORD(le, tree_data, list_entry); 3055 3056 if (td2->treeholder.tree) { 3057 td2->treeholder.tree->parent = nt; 3058 #ifdef DEBUG_PARANOID 3059 if (td2->treeholder.tree->parent && td2->treeholder.tree->parent->header.level <= td2->treeholder.tree->header.level) int3; 3060 #endif 3061 } 3062 3063 le = le->Flink; 3064 } 3065 } else { 3066 LIST_ENTRY* le = nt->itemlist.Flink; 3067 3068 while (le != &nt->itemlist) { 3069 tree_data* td2 = CONTAINING_RECORD(le, tree_data, list_entry); 3070 3071 if (!td2->inserted && td2->data) { 3072 uint8_t* data = ExAllocatePoolWithTag(PagedPool, td2->size, ALLOC_TAG); 3073 3074 if (!data) { 3075 ERR("out of memory\n"); 3076 return STATUS_INSUFFICIENT_RESOURCES; 3077 } 3078 3079 RtlCopyMemory(data, td2->data, td2->size); 3080 td2->data = data; 3081 td2->inserted = true; 3082 } 3083 3084 le = le->Flink; 3085 } 3086 } 3087 3088 if (nt->parent) { 3089 td = ExAllocateFromPagedLookasideList(&Vcb->tree_data_lookaside); 3090 if (!td) { 3091 ERR("out of memory\n"); 3092 return STATUS_INSUFFICIENT_RESOURCES; 3093 } 3094 3095 td->key = newfirstitem->key; 3096 3097 InsertHeadList(&t->paritem->list_entry, &td->list_entry); 3098 3099 td->ignore = false; 3100 td->inserted = true; 3101 td->treeholder.tree = nt; 3102 nt->paritem = td; 3103 3104 nt->parent->header.num_items++; 3105 nt->parent->size += sizeof(internal_node); 3106 3107 goto end; 3108 } 3109 3110 TRACE("adding new tree parent\n"); 3111 3112 if (nt->header.level == 255) { 3113 ERR("cannot add parent to tree at level 255\n"); 3114 return STATUS_INTERNAL_ERROR; 3115 } 3116 3117 pt = ExAllocatePoolWithTag(PagedPool, sizeof(tree), ALLOC_TAG); 3118 if (!pt) { 3119 ERR("out of memory\n"); 3120 return STATUS_INSUFFICIENT_RESOURCES; 3121 } 3122 3123 pt->nonpaged = ExAllocatePoolWithTag(NonPagedPool, sizeof(tree_nonpaged), ALLOC_TAG); 3124 if (!pt->nonpaged) { 3125 ERR("out of memory\n"); 3126 ExFreePool(pt); 3127 return STATUS_INSUFFICIENT_RESOURCES; 3128 } 3129 3130 ExInitializeFastMutex(&pt->nonpaged->mutex); 3131 3132 RtlCopyMemory(&pt->header, &nt->header, sizeof(tree_header)); 3133 pt->header.address = 0; 3134 pt->header.num_items = 2; 3135 pt->header.level = nt->header.level + 1; 3136 pt->header.flags = HEADER_FLAG_MIXED_BACKREF | HEADER_FLAG_WRITTEN; 3137 3138 pt->has_address = false; 3139 pt->Vcb = Vcb; 3140 pt->parent = NULL; 3141 pt->paritem = NULL; 3142 pt->root = t->root; 3143 pt->new_address = 0; 3144 pt->has_new_address = false; 3145 pt->updated_extents = false; 3146 pt->size = pt->header.num_items * sizeof(internal_node); 3147 pt->uniqueness_determined = true; 3148 pt->is_unique = true; 3149 pt->list_entry_hash.Flink = NULL; 3150 pt->buf = NULL; 3151 InitializeListHead(&pt->itemlist); 3152 3153 InsertTailList(&Vcb->trees, &pt->list_entry); 3154 3155 td = ExAllocateFromPagedLookasideList(&Vcb->tree_data_lookaside); 3156 if (!td) { 3157 ERR("out of memory\n"); 3158 return STATUS_INSUFFICIENT_RESOURCES; 3159 } 3160 3161 get_first_item(t, &td->key); 3162 td->ignore = false; 3163 td->inserted = false; 3164 td->treeholder.address = 0; 3165 td->treeholder.generation = Vcb->superblock.generation; 3166 td->treeholder.tree = t; 3167 InsertTailList(&pt->itemlist, &td->list_entry); 3168 t->paritem = td; 3169 3170 td = ExAllocateFromPagedLookasideList(&Vcb->tree_data_lookaside); 3171 if (!td) { 3172 ERR("out of memory\n"); 3173 return STATUS_INSUFFICIENT_RESOURCES; 3174 } 3175 3176 td->key = newfirstitem->key; 3177 td->ignore = false; 3178 td->inserted = false; 3179 td->treeholder.address = 0; 3180 td->treeholder.generation = Vcb->superblock.generation; 3181 td->treeholder.tree = nt; 3182 InsertTailList(&pt->itemlist, &td->list_entry); 3183 nt->paritem = td; 3184 3185 pt->write = true; 3186 3187 t->root->treeholder.tree = pt; 3188 3189 t->parent = pt; 3190 nt->parent = pt; 3191 3192 #ifdef DEBUG_PARANOID 3193 if (t->parent && t->parent->header.level <= t->header.level) int3; 3194 if (nt->parent && nt->parent->header.level <= nt->header.level) int3; 3195 #endif 3196 3197 end: 3198 t->root->root_item.bytes_used += Vcb->superblock.node_size; 3199 3200 return STATUS_SUCCESS; 3201 } 3202 3203 static NTSTATUS split_tree(device_extension* Vcb, tree* t) { 3204 LIST_ENTRY* le; 3205 uint32_t size, ds, numitems; 3206 3207 size = 0; 3208 numitems = 0; 3209 3210 // FIXME - naïve implementation: maximizes number of filled trees 3211 3212 le = t->itemlist.Flink; 3213 while (le != &t->itemlist) { 3214 tree_data* td = CONTAINING_RECORD(le, tree_data, list_entry); 3215 3216 if (!td->ignore) { 3217 if (t->header.level == 0) 3218 ds = sizeof(leaf_node) + td->size; 3219 else 3220 ds = sizeof(internal_node); 3221 3222 if (numitems == 0 && ds > Vcb->superblock.node_size - sizeof(tree_header)) { 3223 ERR("(%I64x,%x,%I64x) in tree %I64x is too large (%x > %Ix)\n", 3224 td->key.obj_id, td->key.obj_type, td->key.offset, t->root->id, 3225 ds, Vcb->superblock.node_size - sizeof(tree_header)); 3226 return STATUS_INTERNAL_ERROR; 3227 } 3228 3229 // FIXME - move back if previous item was deleted item with same key 3230 if (size + ds > Vcb->superblock.node_size - sizeof(tree_header)) 3231 return split_tree_at(Vcb, t, td, numitems, size); 3232 3233 size += ds; 3234 numitems++; 3235 } 3236 3237 le = le->Flink; 3238 } 3239 3240 return STATUS_SUCCESS; 3241 } 3242 3243 bool is_tree_unique(device_extension* Vcb, tree* t, PIRP Irp) { 3244 KEY searchkey; 3245 traverse_ptr tp; 3246 NTSTATUS Status; 3247 bool ret = false; 3248 EXTENT_ITEM* ei; 3249 uint8_t* type; 3250 3251 if (t->uniqueness_determined) 3252 return t->is_unique; 3253 3254 if (t->parent && !is_tree_unique(Vcb, t->parent, Irp)) 3255 goto end; 3256 3257 if (t->has_address) { 3258 searchkey.obj_id = t->header.address; 3259 searchkey.obj_type = Vcb->superblock.incompat_flags & BTRFS_INCOMPAT_FLAGS_SKINNY_METADATA ? TYPE_METADATA_ITEM : TYPE_EXTENT_ITEM; 3260 searchkey.offset = 0xffffffffffffffff; 3261 3262 Status = find_item(Vcb, Vcb->extent_root, &tp, &searchkey, false, Irp); 3263 if (!NT_SUCCESS(Status)) { 3264 ERR("error - find_item returned %08lx\n", Status); 3265 goto end; 3266 } 3267 3268 if (tp.item->key.obj_id != t->header.address || (tp.item->key.obj_type != TYPE_METADATA_ITEM && tp.item->key.obj_type != TYPE_EXTENT_ITEM)) 3269 goto end; 3270 3271 if (tp.item->key.obj_type == TYPE_EXTENT_ITEM && tp.item->size == sizeof(EXTENT_ITEM_V0)) 3272 goto end; 3273 3274 if (tp.item->size < sizeof(EXTENT_ITEM)) 3275 goto end; 3276 3277 ei = (EXTENT_ITEM*)tp.item->data; 3278 3279 if (ei->refcount > 1) 3280 goto end; 3281 3282 if (tp.item->key.obj_type == TYPE_EXTENT_ITEM && ei->flags & EXTENT_ITEM_TREE_BLOCK) { 3283 EXTENT_ITEM2* ei2; 3284 3285 if (tp.item->size < sizeof(EXTENT_ITEM) + sizeof(EXTENT_ITEM2)) 3286 goto end; 3287 3288 ei2 = (EXTENT_ITEM2*)&ei[1]; 3289 type = (uint8_t*)&ei2[1]; 3290 } else 3291 type = (uint8_t*)&ei[1]; 3292 3293 if (type >= tp.item->data + tp.item->size || *type != TYPE_TREE_BLOCK_REF) 3294 goto end; 3295 } 3296 3297 ret = true; 3298 3299 end: 3300 t->is_unique = ret; 3301 t->uniqueness_determined = true; 3302 3303 return ret; 3304 } 3305 3306 static NTSTATUS try_tree_amalgamate(device_extension* Vcb, tree* t, bool* done, bool* done_deletions, PIRP Irp, LIST_ENTRY* rollback) { 3307 LIST_ENTRY* le; 3308 tree_data* nextparitem = NULL; 3309 NTSTATUS Status; 3310 tree *next_tree, *par; 3311 3312 *done = false; 3313 3314 TRACE("trying to amalgamate tree in root %I64x, level %x (size %u)\n", t->root->id, t->header.level, t->size); 3315 3316 // FIXME - doesn't capture everything, as it doesn't ascend 3317 le = t->paritem->list_entry.Flink; 3318 while (le != &t->parent->itemlist) { 3319 tree_data* td = CONTAINING_RECORD(le, tree_data, list_entry); 3320 3321 if (!td->ignore) { 3322 nextparitem = td; 3323 break; 3324 } 3325 3326 le = le->Flink; 3327 } 3328 3329 if (!nextparitem) 3330 return STATUS_SUCCESS; 3331 3332 TRACE("nextparitem: key = %I64x,%x,%I64x\n", nextparitem->key.obj_id, nextparitem->key.obj_type, nextparitem->key.offset); 3333 3334 if (!nextparitem->treeholder.tree) { 3335 Status = do_load_tree(Vcb, &nextparitem->treeholder, t->root, t->parent, nextparitem, NULL); 3336 if (!NT_SUCCESS(Status)) { 3337 ERR("do_load_tree returned %08lx\n", Status); 3338 return Status; 3339 } 3340 } 3341 3342 if (!is_tree_unique(Vcb, nextparitem->treeholder.tree, Irp)) 3343 return STATUS_SUCCESS; 3344 3345 next_tree = nextparitem->treeholder.tree; 3346 3347 if (!next_tree->updated_extents && next_tree->has_address) { 3348 Status = update_tree_extents(Vcb, next_tree, Irp, rollback); 3349 if (!NT_SUCCESS(Status)) { 3350 ERR("update_tree_extents returned %08lx\n", Status); 3351 return Status; 3352 } 3353 } 3354 3355 if (t->size + next_tree->size <= Vcb->superblock.node_size - sizeof(tree_header)) { 3356 // merge two trees into one 3357 3358 t->header.num_items += next_tree->header.num_items; 3359 t->size += next_tree->size; 3360 3361 if (next_tree->header.level > 0) { 3362 le = next_tree->itemlist.Flink; 3363 3364 while (le != &next_tree->itemlist) { 3365 tree_data* td2 = CONTAINING_RECORD(le, tree_data, list_entry); 3366 3367 if (td2->treeholder.tree) { 3368 td2->treeholder.tree->parent = t; 3369 #ifdef DEBUG_PARANOID 3370 if (td2->treeholder.tree->parent && td2->treeholder.tree->parent->header.level <= td2->treeholder.tree->header.level) int3; 3371 #endif 3372 } 3373 3374 td2->inserted = true; 3375 le = le->Flink; 3376 } 3377 } else { 3378 le = next_tree->itemlist.Flink; 3379 3380 while (le != &next_tree->itemlist) { 3381 tree_data* td2 = CONTAINING_RECORD(le, tree_data, list_entry); 3382 3383 if (!td2->inserted && td2->data) { 3384 uint8_t* data = ExAllocatePoolWithTag(PagedPool, td2->size, ALLOC_TAG); 3385 3386 if (!data) { 3387 ERR("out of memory\n"); 3388 return STATUS_INSUFFICIENT_RESOURCES; 3389 } 3390 3391 RtlCopyMemory(data, td2->data, td2->size); 3392 td2->data = data; 3393 td2->inserted = true; 3394 } 3395 3396 le = le->Flink; 3397 } 3398 } 3399 3400 t->itemlist.Blink->Flink = next_tree->itemlist.Flink; 3401 t->itemlist.Blink->Flink->Blink = t->itemlist.Blink; 3402 t->itemlist.Blink = next_tree->itemlist.Blink; 3403 t->itemlist.Blink->Flink = &t->itemlist; 3404 3405 next_tree->itemlist.Flink = next_tree->itemlist.Blink = &next_tree->itemlist; 3406 3407 next_tree->header.num_items = 0; 3408 next_tree->size = 0; 3409 3410 if (next_tree->has_new_address) { // delete associated EXTENT_ITEM 3411 Status = reduce_tree_extent(Vcb, next_tree->new_address, next_tree, next_tree->parent->header.tree_id, next_tree->header.level, Irp, rollback); 3412 3413 if (!NT_SUCCESS(Status)) { 3414 ERR("reduce_tree_extent returned %08lx\n", Status); 3415 return Status; 3416 } 3417 } else if (next_tree->has_address) { 3418 Status = reduce_tree_extent(Vcb, next_tree->header.address, next_tree, next_tree->parent->header.tree_id, next_tree->header.level, Irp, rollback); 3419 3420 if (!NT_SUCCESS(Status)) { 3421 ERR("reduce_tree_extent returned %08lx\n", Status); 3422 return Status; 3423 } 3424 } 3425 3426 if (!nextparitem->ignore) { 3427 nextparitem->ignore = true; 3428 next_tree->parent->header.num_items--; 3429 next_tree->parent->size -= sizeof(internal_node); 3430 3431 *done_deletions = true; 3432 } 3433 3434 par = next_tree->parent; 3435 while (par) { 3436 par->write = true; 3437 par = par->parent; 3438 } 3439 3440 RemoveEntryList(&nextparitem->list_entry); 3441 ExFreePool(next_tree->paritem); 3442 next_tree->paritem = NULL; 3443 3444 next_tree->root->root_item.bytes_used -= Vcb->superblock.node_size; 3445 3446 free_tree(next_tree); 3447 3448 *done = true; 3449 } else { 3450 // rebalance by moving items from second tree into first 3451 ULONG avg_size = (t->size + next_tree->size) / 2; 3452 KEY firstitem = {0, 0, 0}; 3453 bool changed = false; 3454 3455 TRACE("attempting rebalance\n"); 3456 3457 le = next_tree->itemlist.Flink; 3458 while (le != &next_tree->itemlist && t->size < avg_size && next_tree->header.num_items > 1) { 3459 tree_data* td = CONTAINING_RECORD(le, tree_data, list_entry); 3460 ULONG size; 3461 3462 if (!td->ignore) { 3463 if (next_tree->header.level == 0) 3464 size = sizeof(leaf_node) + td->size; 3465 else 3466 size = sizeof(internal_node); 3467 } else 3468 size = 0; 3469 3470 if (t->size + size < Vcb->superblock.node_size - sizeof(tree_header)) { 3471 RemoveEntryList(&td->list_entry); 3472 InsertTailList(&t->itemlist, &td->list_entry); 3473 3474 if (next_tree->header.level > 0 && td->treeholder.tree) { 3475 td->treeholder.tree->parent = t; 3476 #ifdef DEBUG_PARANOID 3477 if (td->treeholder.tree->parent && td->treeholder.tree->parent->header.level <= td->treeholder.tree->header.level) int3; 3478 #endif 3479 } else if (next_tree->header.level == 0 && !td->inserted && td->size > 0) { 3480 uint8_t* data = ExAllocatePoolWithTag(PagedPool, td->size, ALLOC_TAG); 3481 3482 if (!data) { 3483 ERR("out of memory\n"); 3484 return STATUS_INSUFFICIENT_RESOURCES; 3485 } 3486 3487 RtlCopyMemory(data, td->data, td->size); 3488 td->data = data; 3489 } 3490 3491 td->inserted = true; 3492 3493 if (!td->ignore) { 3494 next_tree->size -= size; 3495 t->size += size; 3496 next_tree->header.num_items--; 3497 t->header.num_items++; 3498 } 3499 3500 changed = true; 3501 } else 3502 break; 3503 3504 le = next_tree->itemlist.Flink; 3505 } 3506 3507 le = next_tree->itemlist.Flink; 3508 while (le != &next_tree->itemlist) { 3509 tree_data* td = CONTAINING_RECORD(le, tree_data, list_entry); 3510 3511 if (!td->ignore) { 3512 firstitem = td->key; 3513 break; 3514 } 3515 3516 le = le->Flink; 3517 } 3518 3519 // FIXME - once ascension is working, make this work with parent's parent, etc. 3520 if (next_tree->paritem) 3521 next_tree->paritem->key = firstitem; 3522 3523 par = next_tree; 3524 while (par) { 3525 par->write = true; 3526 par = par->parent; 3527 } 3528 3529 if (changed) 3530 *done = true; 3531 } 3532 3533 return STATUS_SUCCESS; 3534 } 3535 3536 static NTSTATUS update_extent_level(device_extension* Vcb, uint64_t address, tree* t, uint8_t level, PIRP Irp) { 3537 KEY searchkey; 3538 traverse_ptr tp; 3539 NTSTATUS Status; 3540 3541 if (Vcb->superblock.incompat_flags & BTRFS_INCOMPAT_FLAGS_SKINNY_METADATA) { 3542 searchkey.obj_id = address; 3543 searchkey.obj_type = TYPE_METADATA_ITEM; 3544 searchkey.offset = t->header.level; 3545 3546 Status = find_item(Vcb, Vcb->extent_root, &tp, &searchkey, false, Irp); 3547 if (!NT_SUCCESS(Status)) { 3548 ERR("error - find_item returned %08lx\n", Status); 3549 return Status; 3550 } 3551 3552 if (!keycmp(tp.item->key, searchkey)) { 3553 EXTENT_ITEM_SKINNY_METADATA* eism; 3554 3555 if (tp.item->size > 0) { 3556 eism = ExAllocatePoolWithTag(PagedPool, tp.item->size, ALLOC_TAG); 3557 3558 if (!eism) { 3559 ERR("out of memory\n"); 3560 return STATUS_INSUFFICIENT_RESOURCES; 3561 } 3562 3563 RtlCopyMemory(eism, tp.item->data, tp.item->size); 3564 } else 3565 eism = NULL; 3566 3567 Status = delete_tree_item(Vcb, &tp); 3568 if (!NT_SUCCESS(Status)) { 3569 ERR("delete_tree_item returned %08lx\n", Status); 3570 if (eism) ExFreePool(eism); 3571 return Status; 3572 } 3573 3574 Status = insert_tree_item(Vcb, Vcb->extent_root, address, TYPE_METADATA_ITEM, level, eism, tp.item->size, NULL, Irp); 3575 if (!NT_SUCCESS(Status)) { 3576 ERR("insert_tree_item returned %08lx\n", Status); 3577 if (eism) ExFreePool(eism); 3578 return Status; 3579 } 3580 3581 return STATUS_SUCCESS; 3582 } 3583 } 3584 3585 searchkey.obj_id = address; 3586 searchkey.obj_type = TYPE_EXTENT_ITEM; 3587 searchkey.offset = 0xffffffffffffffff; 3588 3589 Status = find_item(Vcb, Vcb->extent_root, &tp, &searchkey, false, Irp); 3590 if (!NT_SUCCESS(Status)) { 3591 ERR("error - find_item returned %08lx\n", Status); 3592 return Status; 3593 } 3594 3595 if (tp.item->key.obj_id == searchkey.obj_id && tp.item->key.obj_type == searchkey.obj_type) { 3596 EXTENT_ITEM_TREE* eit; 3597 3598 if (tp.item->size < sizeof(EXTENT_ITEM_TREE)) { 3599 ERR("(%I64x,%x,%I64x) was %u bytes, expected at least %Iu\n", tp.item->key.obj_id, tp.item->key.obj_type, tp.item->key.offset, tp.item->size, sizeof(EXTENT_ITEM_TREE)); 3600 return STATUS_INTERNAL_ERROR; 3601 } 3602 3603 eit = ExAllocatePoolWithTag(PagedPool, tp.item->size, ALLOC_TAG); 3604 3605 if (!eit) { 3606 ERR("out of memory\n"); 3607 return STATUS_INSUFFICIENT_RESOURCES; 3608 } 3609 3610 RtlCopyMemory(eit, tp.item->data, tp.item->size); 3611 3612 Status = delete_tree_item(Vcb, &tp); 3613 if (!NT_SUCCESS(Status)) { 3614 ERR("delete_tree_item returned %08lx\n", Status); 3615 ExFreePool(eit); 3616 return Status; 3617 } 3618 3619 eit->level = level; 3620 3621 Status = insert_tree_item(Vcb, Vcb->extent_root, tp.item->key.obj_id, tp.item->key.obj_type, tp.item->key.offset, eit, tp.item->size, NULL, Irp); 3622 if (!NT_SUCCESS(Status)) { 3623 ERR("insert_tree_item returned %08lx\n", Status); 3624 ExFreePool(eit); 3625 return Status; 3626 } 3627 3628 return STATUS_SUCCESS; 3629 } 3630 3631 ERR("could not find EXTENT_ITEM for address %I64x\n", address); 3632 3633 return STATUS_INTERNAL_ERROR; 3634 } 3635 3636 static NTSTATUS update_tree_extents_recursive(device_extension* Vcb, tree* t, PIRP Irp, LIST_ENTRY* rollback) { 3637 NTSTATUS Status; 3638 3639 if (t->parent && !t->parent->updated_extents && t->parent->has_address) { 3640 Status = update_tree_extents_recursive(Vcb, t->parent, Irp, rollback); 3641 if (!NT_SUCCESS(Status)) 3642 return Status; 3643 } 3644 3645 Status = update_tree_extents(Vcb, t, Irp, rollback); 3646 if (!NT_SUCCESS(Status)) { 3647 ERR("update_tree_extents returned %08lx\n", Status); 3648 return Status; 3649 } 3650 3651 return STATUS_SUCCESS; 3652 } 3653 3654 static NTSTATUS do_splits(device_extension* Vcb, PIRP Irp, LIST_ENTRY* rollback) { 3655 ULONG level, max_level; 3656 uint32_t min_size, min_size_fst; 3657 bool empty, done_deletions = false; 3658 NTSTATUS Status; 3659 tree* t; 3660 3661 TRACE("(%p)\n", Vcb); 3662 3663 max_level = 0; 3664 3665 for (level = 0; level <= 255; level++) { 3666 LIST_ENTRY *le, *nextle; 3667 3668 empty = true; 3669 3670 TRACE("doing level %lu\n", level); 3671 3672 le = Vcb->trees.Flink; 3673 3674 while (le != &Vcb->trees) { 3675 t = CONTAINING_RECORD(le, tree, list_entry); 3676 3677 nextle = le->Flink; 3678 3679 if (t->write && t->header.level == level) { 3680 empty = false; 3681 3682 if (t->header.num_items == 0) { 3683 if (t->parent) { 3684 done_deletions = true; 3685 3686 TRACE("deleting tree in root %I64x\n", t->root->id); 3687 3688 t->root->root_item.bytes_used -= Vcb->superblock.node_size; 3689 3690 if (t->has_new_address) { // delete associated EXTENT_ITEM 3691 Status = reduce_tree_extent(Vcb, t->new_address, t, t->parent->header.tree_id, t->header.level, Irp, rollback); 3692 3693 if (!NT_SUCCESS(Status)) { 3694 ERR("reduce_tree_extent returned %08lx\n", Status); 3695 return Status; 3696 } 3697 3698 t->has_new_address = false; 3699 } else if (t->has_address) { 3700 Status = reduce_tree_extent(Vcb,t->header.address, t, t->parent->header.tree_id, t->header.level, Irp, rollback); 3701 3702 if (!NT_SUCCESS(Status)) { 3703 ERR("reduce_tree_extent returned %08lx\n", Status); 3704 return Status; 3705 } 3706 3707 t->has_address = false; 3708 } 3709 3710 if (!t->paritem->ignore) { 3711 t->paritem->ignore = true; 3712 t->parent->header.num_items--; 3713 t->parent->size -= sizeof(internal_node); 3714 } 3715 3716 RemoveEntryList(&t->paritem->list_entry); 3717 ExFreePool(t->paritem); 3718 t->paritem = NULL; 3719 3720 free_tree(t); 3721 } else if (t->header.level != 0) { 3722 if (t->has_new_address) { 3723 Status = update_extent_level(Vcb, t->new_address, t, 0, Irp); 3724 3725 if (!NT_SUCCESS(Status)) { 3726 ERR("update_extent_level returned %08lx\n", Status); 3727 return Status; 3728 } 3729 } 3730 3731 t->header.level = 0; 3732 } 3733 } else if (t->size > Vcb->superblock.node_size - sizeof(tree_header)) { 3734 TRACE("splitting overlarge tree (%x > %Ix)\n", t->size, Vcb->superblock.node_size - sizeof(tree_header)); 3735 3736 if (!t->updated_extents && t->has_address) { 3737 Status = update_tree_extents_recursive(Vcb, t, Irp, rollback); 3738 if (!NT_SUCCESS(Status)) { 3739 ERR("update_tree_extents_recursive returned %08lx\n", Status); 3740 return Status; 3741 } 3742 } 3743 3744 Status = split_tree(Vcb, t); 3745 3746 if (!NT_SUCCESS(Status)) { 3747 ERR("split_tree returned %08lx\n", Status); 3748 return Status; 3749 } 3750 } 3751 } 3752 3753 le = nextle; 3754 } 3755 3756 if (!empty) { 3757 max_level = level; 3758 } else { 3759 TRACE("nothing found for level %lu\n", level); 3760 break; 3761 } 3762 } 3763 3764 min_size = (Vcb->superblock.node_size - sizeof(tree_header)) / 2; 3765 min_size_fst = (Vcb->superblock.node_size - sizeof(tree_header)) / 4; 3766 3767 for (level = 0; level <= max_level; level++) { 3768 LIST_ENTRY* le; 3769 3770 le = Vcb->trees.Flink; 3771 3772 while (le != &Vcb->trees) { 3773 t = CONTAINING_RECORD(le, tree, list_entry); 3774 3775 if (t->write && t->header.level == level && t->header.num_items > 0 && t->parent && 3776 ((t->size < min_size && t->root->id != BTRFS_ROOT_FREE_SPACE) || (t->size < min_size_fst && t->root->id == BTRFS_ROOT_FREE_SPACE)) && 3777 is_tree_unique(Vcb, t, Irp)) { 3778 bool done; 3779 3780 do { 3781 Status = try_tree_amalgamate(Vcb, t, &done, &done_deletions, Irp, rollback); 3782 if (!NT_SUCCESS(Status)) { 3783 ERR("try_tree_amalgamate returned %08lx\n", Status); 3784 return Status; 3785 } 3786 } while (done && t->size < min_size); 3787 } 3788 3789 le = le->Flink; 3790 } 3791 } 3792 3793 // simplify trees if top tree only has one entry 3794 3795 if (done_deletions) { 3796 for (level = max_level; level > 0; level--) { 3797 LIST_ENTRY *le, *nextle; 3798 3799 le = Vcb->trees.Flink; 3800 while (le != &Vcb->trees) { 3801 nextle = le->Flink; 3802 t = CONTAINING_RECORD(le, tree, list_entry); 3803 3804 if (t->write && t->header.level == level) { 3805 if (!t->parent && t->header.num_items == 1) { 3806 LIST_ENTRY* le2 = t->itemlist.Flink; 3807 tree_data* td = NULL; 3808 tree* child_tree = NULL; 3809 3810 while (le2 != &t->itemlist) { 3811 td = CONTAINING_RECORD(le2, tree_data, list_entry); 3812 if (!td->ignore) 3813 break; 3814 le2 = le2->Flink; 3815 } 3816 3817 TRACE("deleting top-level tree in root %I64x with one item\n", t->root->id); 3818 3819 if (t->has_new_address) { // delete associated EXTENT_ITEM 3820 Status = reduce_tree_extent(Vcb, t->new_address, t, t->header.tree_id, t->header.level, Irp, rollback); 3821 3822 if (!NT_SUCCESS(Status)) { 3823 ERR("reduce_tree_extent returned %08lx\n", Status); 3824 return Status; 3825 } 3826 3827 t->has_new_address = false; 3828 } else if (t->has_address) { 3829 Status = reduce_tree_extent(Vcb,t->header.address, t, t->header.tree_id, t->header.level, Irp, rollback); 3830 3831 if (!NT_SUCCESS(Status)) { 3832 ERR("reduce_tree_extent returned %08lx\n", Status); 3833 return Status; 3834 } 3835 3836 t->has_address = false; 3837 } 3838 3839 if (!td->treeholder.tree) { // load first item if not already loaded 3840 KEY searchkey = {0,0,0}; 3841 traverse_ptr tp; 3842 3843 Status = find_item(Vcb, t->root, &tp, &searchkey, false, Irp); 3844 if (!NT_SUCCESS(Status)) { 3845 ERR("error - find_item returned %08lx\n", Status); 3846 return Status; 3847 } 3848 } 3849 3850 child_tree = td->treeholder.tree; 3851 3852 if (child_tree) { 3853 child_tree->parent = NULL; 3854 child_tree->paritem = NULL; 3855 } 3856 3857 t->root->root_item.bytes_used -= Vcb->superblock.node_size; 3858 3859 free_tree(t); 3860 3861 if (child_tree) 3862 child_tree->root->treeholder.tree = child_tree; 3863 } 3864 } 3865 3866 le = nextle; 3867 } 3868 } 3869 } 3870 3871 return STATUS_SUCCESS; 3872 } 3873 3874 static NTSTATUS remove_root_extents(device_extension* Vcb, root* r, tree_holder* th, uint8_t level, tree* parent, PIRP Irp, LIST_ENTRY* rollback) { 3875 NTSTATUS Status; 3876 3877 if (!th->tree) { 3878 uint8_t* buf; 3879 chunk* c; 3880 3881 buf = ExAllocatePoolWithTag(PagedPool, Vcb->superblock.node_size, ALLOC_TAG); 3882 if (!buf) { 3883 ERR("out of memory\n"); 3884 return STATUS_INSUFFICIENT_RESOURCES; 3885 } 3886 3887 Status = read_data(Vcb, th->address, Vcb->superblock.node_size, NULL, true, buf, NULL, 3888 &c, Irp, th->generation, false, NormalPagePriority); 3889 if (!NT_SUCCESS(Status)) { 3890 ERR("read_data returned 0x%08lx\n", Status); 3891 ExFreePool(buf); 3892 return Status; 3893 } 3894 3895 Status = load_tree(Vcb, th->address, buf, r, &th->tree); 3896 3897 if (!th->tree || th->tree->buf != buf) 3898 ExFreePool(buf); 3899 3900 if (!NT_SUCCESS(Status)) { 3901 ERR("load_tree(%I64x) returned %08lx\n", th->address, Status); 3902 return Status; 3903 } 3904 } 3905 3906 if (level > 0) { 3907 LIST_ENTRY* le = th->tree->itemlist.Flink; 3908 3909 while (le != &th->tree->itemlist) { 3910 tree_data* td = CONTAINING_RECORD(le, tree_data, list_entry); 3911 3912 if (!td->ignore) { 3913 Status = remove_root_extents(Vcb, r, &td->treeholder, th->tree->header.level - 1, th->tree, Irp, rollback); 3914 3915 if (!NT_SUCCESS(Status)) { 3916 ERR("remove_root_extents returned %08lx\n", Status); 3917 return Status; 3918 } 3919 } 3920 3921 le = le->Flink; 3922 } 3923 } 3924 3925 if (th->tree && !th->tree->updated_extents && th->tree->has_address) { 3926 Status = update_tree_extents(Vcb, th->tree, Irp, rollback); 3927 if (!NT_SUCCESS(Status)) { 3928 ERR("update_tree_extents returned %08lx\n", Status); 3929 return Status; 3930 } 3931 } 3932 3933 if (!th->tree || th->tree->has_address) { 3934 Status = reduce_tree_extent(Vcb, th->address, NULL, parent ? parent->header.tree_id : r->id, level, Irp, rollback); 3935 3936 if (!NT_SUCCESS(Status)) { 3937 ERR("reduce_tree_extent(%I64x) returned %08lx\n", th->address, Status); 3938 return Status; 3939 } 3940 } 3941 3942 return STATUS_SUCCESS; 3943 } 3944 3945 static NTSTATUS drop_root(device_extension* Vcb, root* r, PIRP Irp, LIST_ENTRY* rollback) { 3946 NTSTATUS Status; 3947 KEY searchkey; 3948 traverse_ptr tp; 3949 3950 Status = remove_root_extents(Vcb, r, &r->treeholder, r->root_item.root_level, NULL, Irp, rollback); 3951 if (!NT_SUCCESS(Status)) { 3952 ERR("remove_root_extents returned %08lx\n", Status); 3953 return Status; 3954 } 3955 3956 // remove entries in uuid root (tree 9) 3957 if (Vcb->uuid_root) { 3958 RtlCopyMemory(&searchkey.obj_id, &r->root_item.uuid.uuid[0], sizeof(uint64_t)); 3959 searchkey.obj_type = TYPE_SUBVOL_UUID; 3960 RtlCopyMemory(&searchkey.offset, &r->root_item.uuid.uuid[sizeof(uint64_t)], sizeof(uint64_t)); 3961 3962 if (searchkey.obj_id != 0 || searchkey.offset != 0) { 3963 Status = find_item(Vcb, Vcb->uuid_root, &tp, &searchkey, false, Irp); 3964 if (!NT_SUCCESS(Status)) { 3965 WARN("find_item returned %08lx\n", Status); 3966 } else { 3967 if (!keycmp(tp.item->key, searchkey)) { 3968 Status = delete_tree_item(Vcb, &tp); 3969 if (!NT_SUCCESS(Status)) { 3970 ERR("delete_tree_item returned %08lx\n", Status); 3971 return Status; 3972 } 3973 } else 3974 WARN("could not find (%I64x,%x,%I64x) in uuid tree\n", searchkey.obj_id, searchkey.obj_type, searchkey.offset); 3975 } 3976 } 3977 3978 if (r->root_item.rtransid > 0) { 3979 RtlCopyMemory(&searchkey.obj_id, &r->root_item.received_uuid.uuid[0], sizeof(uint64_t)); 3980 searchkey.obj_type = TYPE_SUBVOL_REC_UUID; 3981 RtlCopyMemory(&searchkey.offset, &r->root_item.received_uuid.uuid[sizeof(uint64_t)], sizeof(uint64_t)); 3982 3983 Status = find_item(Vcb, Vcb->uuid_root, &tp, &searchkey, false, Irp); 3984 if (!NT_SUCCESS(Status)) 3985 WARN("find_item returned %08lx\n", Status); 3986 else { 3987 if (!keycmp(tp.item->key, searchkey)) { 3988 if (tp.item->size == sizeof(uint64_t)) { 3989 uint64_t* id = (uint64_t*)tp.item->data; 3990 3991 if (*id == r->id) { 3992 Status = delete_tree_item(Vcb, &tp); 3993 if (!NT_SUCCESS(Status)) { 3994 ERR("delete_tree_item returned %08lx\n", Status); 3995 return Status; 3996 } 3997 } 3998 } else if (tp.item->size > sizeof(uint64_t)) { 3999 ULONG i; 4000 uint64_t* ids = (uint64_t*)tp.item->data; 4001 4002 for (i = 0; i < tp.item->size / sizeof(uint64_t); i++) { 4003 if (ids[i] == r->id) { 4004 uint64_t* ne; 4005 4006 ne = ExAllocatePoolWithTag(PagedPool, tp.item->size - sizeof(uint64_t), ALLOC_TAG); 4007 if (!ne) { 4008 ERR("out of memory\n"); 4009 return STATUS_INSUFFICIENT_RESOURCES; 4010 } 4011 4012 if (i > 0) 4013 RtlCopyMemory(ne, ids, sizeof(uint64_t) * i); 4014 4015 if ((i + 1) * sizeof(uint64_t) < tp.item->size) 4016 RtlCopyMemory(&ne[i], &ids[i + 1], tp.item->size - ((i + 1) * sizeof(uint64_t))); 4017 4018 Status = delete_tree_item(Vcb, &tp); 4019 if (!NT_SUCCESS(Status)) { 4020 ERR("delete_tree_item returned %08lx\n", Status); 4021 ExFreePool(ne); 4022 return Status; 4023 } 4024 4025 Status = insert_tree_item(Vcb, Vcb->uuid_root, tp.item->key.obj_id, tp.item->key.obj_type, tp.item->key.offset, 4026 ne, tp.item->size - sizeof(uint64_t), NULL, Irp); 4027 if (!NT_SUCCESS(Status)) { 4028 ERR("insert_tree_item returned %08lx\n", Status); 4029 ExFreePool(ne); 4030 return Status; 4031 } 4032 4033 break; 4034 } 4035 } 4036 } 4037 } else 4038 WARN("could not find (%I64x,%x,%I64x) in uuid tree\n", searchkey.obj_id, searchkey.obj_type, searchkey.offset); 4039 } 4040 } 4041 } 4042 4043 // delete ROOT_ITEM 4044 4045 searchkey.obj_id = r->id; 4046 searchkey.obj_type = TYPE_ROOT_ITEM; 4047 searchkey.offset = 0xffffffffffffffff; 4048 4049 Status = find_item(Vcb, Vcb->root_root, &tp, &searchkey, false, Irp); 4050 if (!NT_SUCCESS(Status)) { 4051 ERR("find_item returned %08lx\n", Status); 4052 return Status; 4053 } 4054 4055 if (tp.item->key.obj_id == searchkey.obj_id && tp.item->key.obj_type == searchkey.obj_type) { 4056 Status = delete_tree_item(Vcb, &tp); 4057 4058 if (!NT_SUCCESS(Status)) { 4059 ERR("delete_tree_item returned %08lx\n", Status); 4060 return Status; 4061 } 4062 } else 4063 WARN("could not find (%I64x,%x,%I64x) in root_root\n", searchkey.obj_id, searchkey.obj_type, searchkey.offset); 4064 4065 // delete items in tree cache 4066 4067 free_trees_root(Vcb, r); 4068 4069 return STATUS_SUCCESS; 4070 } 4071 4072 static NTSTATUS drop_roots(device_extension* Vcb, PIRP Irp, LIST_ENTRY* rollback) { 4073 LIST_ENTRY *le = Vcb->drop_roots.Flink, *le2; 4074 NTSTATUS Status; 4075 4076 while (le != &Vcb->drop_roots) { 4077 root* r = CONTAINING_RECORD(le, root, list_entry); 4078 4079 le2 = le->Flink; 4080 4081 Status = drop_root(Vcb, r, Irp, rollback); 4082 if (!NT_SUCCESS(Status)) { 4083 ERR("drop_root(%I64x) returned %08lx\n", r->id, Status); 4084 return Status; 4085 } 4086 4087 le = le2; 4088 } 4089 4090 return STATUS_SUCCESS; 4091 } 4092 4093 NTSTATUS update_dev_item(device_extension* Vcb, device* device, PIRP Irp) { 4094 KEY searchkey; 4095 traverse_ptr tp; 4096 DEV_ITEM* di; 4097 NTSTATUS Status; 4098 4099 searchkey.obj_id = 1; 4100 searchkey.obj_type = TYPE_DEV_ITEM; 4101 searchkey.offset = device->devitem.dev_id; 4102 4103 Status = find_item(Vcb, Vcb->chunk_root, &tp, &searchkey, false, Irp); 4104 if (!NT_SUCCESS(Status)) { 4105 ERR("error - find_item returned %08lx\n", Status); 4106 return Status; 4107 } 4108 4109 if (keycmp(tp.item->key, searchkey)) { 4110 ERR("error - could not find DEV_ITEM for device %I64x\n", device->devitem.dev_id); 4111 return STATUS_INTERNAL_ERROR; 4112 } 4113 4114 Status = delete_tree_item(Vcb, &tp); 4115 if (!NT_SUCCESS(Status)) { 4116 ERR("delete_tree_item returned %08lx\n", Status); 4117 return Status; 4118 } 4119 4120 di = ExAllocatePoolWithTag(PagedPool, sizeof(DEV_ITEM), ALLOC_TAG); 4121 if (!di) { 4122 ERR("out of memory\n"); 4123 return STATUS_INSUFFICIENT_RESOURCES; 4124 } 4125 4126 RtlCopyMemory(di, &device->devitem, sizeof(DEV_ITEM)); 4127 4128 Status = insert_tree_item(Vcb, Vcb->chunk_root, 1, TYPE_DEV_ITEM, device->devitem.dev_id, di, sizeof(DEV_ITEM), NULL, Irp); 4129 if (!NT_SUCCESS(Status)) { 4130 ERR("insert_tree_item returned %08lx\n", Status); 4131 ExFreePool(di); 4132 return Status; 4133 } 4134 4135 return STATUS_SUCCESS; 4136 } 4137 4138 static void regen_bootstrap(device_extension* Vcb) { 4139 sys_chunk* sc2; 4140 USHORT i = 0; 4141 LIST_ENTRY* le; 4142 4143 i = 0; 4144 le = Vcb->sys_chunks.Flink; 4145 while (le != &Vcb->sys_chunks) { 4146 sc2 = CONTAINING_RECORD(le, sys_chunk, list_entry); 4147 4148 TRACE("%I64x,%x,%I64x\n", sc2->key.obj_id, sc2->key.obj_type, sc2->key.offset); 4149 4150 RtlCopyMemory(&Vcb->superblock.sys_chunk_array[i], &sc2->key, sizeof(KEY)); 4151 i += sizeof(KEY); 4152 4153 RtlCopyMemory(&Vcb->superblock.sys_chunk_array[i], sc2->data, sc2->size); 4154 i += sc2->size; 4155 4156 le = le->Flink; 4157 } 4158 } 4159 4160 static NTSTATUS add_to_bootstrap(device_extension* Vcb, uint64_t obj_id, uint8_t obj_type, uint64_t offset, void* data, uint16_t size) { 4161 sys_chunk* sc; 4162 LIST_ENTRY* le; 4163 4164 if (Vcb->superblock.n + sizeof(KEY) + size > SYS_CHUNK_ARRAY_SIZE) { 4165 ERR("error - bootstrap is full\n"); 4166 return STATUS_INTERNAL_ERROR; 4167 } 4168 4169 sc = ExAllocatePoolWithTag(PagedPool, sizeof(sys_chunk), ALLOC_TAG); 4170 if (!sc) { 4171 ERR("out of memory\n"); 4172 return STATUS_INSUFFICIENT_RESOURCES; 4173 } 4174 4175 sc->key.obj_id = obj_id; 4176 sc->key.obj_type = obj_type; 4177 sc->key.offset = offset; 4178 sc->size = size; 4179 sc->data = ExAllocatePoolWithTag(PagedPool, sc->size, ALLOC_TAG); 4180 if (!sc->data) { 4181 ERR("out of memory\n"); 4182 ExFreePool(sc); 4183 return STATUS_INSUFFICIENT_RESOURCES; 4184 } 4185 4186 RtlCopyMemory(sc->data, data, sc->size); 4187 4188 le = Vcb->sys_chunks.Flink; 4189 while (le != &Vcb->sys_chunks) { 4190 sys_chunk* sc2 = CONTAINING_RECORD(le, sys_chunk, list_entry); 4191 4192 if (keycmp(sc2->key, sc->key) == 1) 4193 break; 4194 4195 le = le->Flink; 4196 } 4197 InsertTailList(le, &sc->list_entry); 4198 4199 Vcb->superblock.n += sizeof(KEY) + size; 4200 4201 regen_bootstrap(Vcb); 4202 4203 return STATUS_SUCCESS; 4204 } 4205 4206 static NTSTATUS create_chunk(device_extension* Vcb, chunk* c, PIRP Irp) { 4207 CHUNK_ITEM* ci; 4208 CHUNK_ITEM_STRIPE* cis; 4209 BLOCK_GROUP_ITEM* bgi; 4210 uint16_t i, factor; 4211 NTSTATUS Status; 4212 4213 ci = ExAllocatePoolWithTag(PagedPool, c->size, ALLOC_TAG); 4214 if (!ci) { 4215 ERR("out of memory\n"); 4216 return STATUS_INSUFFICIENT_RESOURCES; 4217 } 4218 4219 RtlCopyMemory(ci, c->chunk_item, c->size); 4220 4221 Status = insert_tree_item(Vcb, Vcb->chunk_root, 0x100, TYPE_CHUNK_ITEM, c->offset, ci, c->size, NULL, Irp); 4222 if (!NT_SUCCESS(Status)) { 4223 ERR("insert_tree_item failed\n"); 4224 ExFreePool(ci); 4225 return Status; 4226 } 4227 4228 if (c->chunk_item->type & BLOCK_FLAG_SYSTEM) { 4229 Status = add_to_bootstrap(Vcb, 0x100, TYPE_CHUNK_ITEM, c->offset, ci, c->size); 4230 if (!NT_SUCCESS(Status)) { 4231 ERR("add_to_bootstrap returned %08lx\n", Status); 4232 return Status; 4233 } 4234 } 4235 4236 // add BLOCK_GROUP_ITEM to tree 2 4237 4238 bgi = ExAllocatePoolWithTag(PagedPool, sizeof(BLOCK_GROUP_ITEM), ALLOC_TAG); 4239 if (!bgi) { 4240 ERR("out of memory\n"); 4241 return STATUS_INSUFFICIENT_RESOURCES; 4242 } 4243 4244 bgi->used = c->used; 4245 bgi->chunk_tree = 0x100; 4246 bgi->flags = c->chunk_item->type; 4247 4248 Status = insert_tree_item(Vcb, Vcb->extent_root, c->offset, TYPE_BLOCK_GROUP_ITEM, c->chunk_item->size, bgi, sizeof(BLOCK_GROUP_ITEM), NULL, Irp); 4249 if (!NT_SUCCESS(Status)) { 4250 ERR("insert_tree_item failed\n"); 4251 ExFreePool(bgi); 4252 return Status; 4253 } 4254 4255 if (c->chunk_item->type & BLOCK_FLAG_RAID0) 4256 factor = c->chunk_item->num_stripes; 4257 else if (c->chunk_item->type & BLOCK_FLAG_RAID10) 4258 factor = c->chunk_item->num_stripes / c->chunk_item->sub_stripes; 4259 else if (c->chunk_item->type & BLOCK_FLAG_RAID5) 4260 factor = c->chunk_item->num_stripes - 1; 4261 else if (c->chunk_item->type & BLOCK_FLAG_RAID6) 4262 factor = c->chunk_item->num_stripes - 2; 4263 else // SINGLE, DUPLICATE, RAID1, RAID1C3, RAID1C4 4264 factor = 1; 4265 4266 // add DEV_EXTENTs to tree 4 4267 4268 cis = (CHUNK_ITEM_STRIPE*)&c->chunk_item[1]; 4269 4270 for (i = 0; i < c->chunk_item->num_stripes; i++) { 4271 DEV_EXTENT* de; 4272 4273 de = ExAllocatePoolWithTag(PagedPool, sizeof(DEV_EXTENT), ALLOC_TAG); 4274 if (!de) { 4275 ERR("out of memory\n"); 4276 return STATUS_INSUFFICIENT_RESOURCES; 4277 } 4278 4279 de->chunktree = Vcb->chunk_root->id; 4280 de->objid = 0x100; 4281 de->address = c->offset; 4282 de->length = c->chunk_item->size / factor; 4283 de->chunktree_uuid = Vcb->chunk_root->treeholder.tree->header.chunk_tree_uuid; 4284 4285 Status = insert_tree_item(Vcb, Vcb->dev_root, c->devices[i]->devitem.dev_id, TYPE_DEV_EXTENT, cis[i].offset, de, sizeof(DEV_EXTENT), NULL, Irp); 4286 if (!NT_SUCCESS(Status)) { 4287 ERR("insert_tree_item returned %08lx\n", Status); 4288 ExFreePool(de); 4289 return Status; 4290 } 4291 4292 // FIXME - no point in calling this twice for the same device 4293 Status = update_dev_item(Vcb, c->devices[i], Irp); 4294 if (!NT_SUCCESS(Status)) { 4295 ERR("update_dev_item returned %08lx\n", Status); 4296 return Status; 4297 } 4298 } 4299 4300 c->created = false; 4301 c->oldused = c->used; 4302 4303 Vcb->superblock.bytes_used += c->used; 4304 4305 return STATUS_SUCCESS; 4306 } 4307 4308 static void remove_from_bootstrap(device_extension* Vcb, uint64_t obj_id, uint8_t obj_type, uint64_t offset) { 4309 sys_chunk* sc2; 4310 LIST_ENTRY* le; 4311 4312 le = Vcb->sys_chunks.Flink; 4313 while (le != &Vcb->sys_chunks) { 4314 sc2 = CONTAINING_RECORD(le, sys_chunk, list_entry); 4315 4316 if (sc2->key.obj_id == obj_id && sc2->key.obj_type == obj_type && sc2->key.offset == offset) { 4317 RemoveEntryList(&sc2->list_entry); 4318 4319 Vcb->superblock.n -= sizeof(KEY) + sc2->size; 4320 4321 ExFreePool(sc2->data); 4322 ExFreePool(sc2); 4323 regen_bootstrap(Vcb); 4324 return; 4325 } 4326 4327 le = le->Flink; 4328 } 4329 } 4330 4331 static NTSTATUS set_xattr(device_extension* Vcb, LIST_ENTRY* batchlist, root* subvol, uint64_t inode, char* name, uint16_t namelen, 4332 uint32_t crc32, uint8_t* data, uint16_t datalen) { 4333 NTSTATUS Status; 4334 uint16_t xasize; 4335 DIR_ITEM* xa; 4336 4337 TRACE("(%p, %I64x, %I64x, %.*s, %08x, %p, %u)\n", Vcb, subvol->id, inode, namelen, name, crc32, data, datalen); 4338 4339 xasize = (uint16_t)offsetof(DIR_ITEM, name[0]) + namelen + datalen; 4340 4341 xa = ExAllocatePoolWithTag(PagedPool, xasize, ALLOC_TAG); 4342 if (!xa) { 4343 ERR("out of memory\n"); 4344 return STATUS_INSUFFICIENT_RESOURCES; 4345 } 4346 4347 xa->key.obj_id = 0; 4348 xa->key.obj_type = 0; 4349 xa->key.offset = 0; 4350 xa->transid = Vcb->superblock.generation; 4351 xa->m = datalen; 4352 xa->n = namelen; 4353 xa->type = BTRFS_TYPE_EA; 4354 RtlCopyMemory(xa->name, name, namelen); 4355 RtlCopyMemory(xa->name + namelen, data, datalen); 4356 4357 Status = insert_tree_item_batch(batchlist, Vcb, subvol, inode, TYPE_XATTR_ITEM, crc32, xa, xasize, Batch_SetXattr); 4358 if (!NT_SUCCESS(Status)) { 4359 ERR("insert_tree_item_batch returned %08lx\n", Status); 4360 ExFreePool(xa); 4361 return Status; 4362 } 4363 4364 return STATUS_SUCCESS; 4365 } 4366 4367 static NTSTATUS delete_xattr(device_extension* Vcb, LIST_ENTRY* batchlist, root* subvol, uint64_t inode, char* name, 4368 uint16_t namelen, uint32_t crc32) { 4369 NTSTATUS Status; 4370 uint16_t xasize; 4371 DIR_ITEM* xa; 4372 4373 TRACE("(%p, %I64x, %I64x, %.*s, %08x)\n", Vcb, subvol->id, inode, namelen, name, crc32); 4374 4375 xasize = (uint16_t)offsetof(DIR_ITEM, name[0]) + namelen; 4376 4377 xa = ExAllocatePoolWithTag(PagedPool, xasize, ALLOC_TAG); 4378 if (!xa) { 4379 ERR("out of memory\n"); 4380 return STATUS_INSUFFICIENT_RESOURCES; 4381 } 4382 4383 xa->key.obj_id = 0; 4384 xa->key.obj_type = 0; 4385 xa->key.offset = 0; 4386 xa->transid = Vcb->superblock.generation; 4387 xa->m = 0; 4388 xa->n = namelen; 4389 xa->type = BTRFS_TYPE_EA; 4390 RtlCopyMemory(xa->name, name, namelen); 4391 4392 Status = insert_tree_item_batch(batchlist, Vcb, subvol, inode, TYPE_XATTR_ITEM, crc32, xa, xasize, Batch_DeleteXattr); 4393 if (!NT_SUCCESS(Status)) { 4394 ERR("insert_tree_item_batch returned %08lx\n", Status); 4395 ExFreePool(xa); 4396 return Status; 4397 } 4398 4399 return STATUS_SUCCESS; 4400 } 4401 4402 static NTSTATUS insert_sparse_extent(fcb* fcb, LIST_ENTRY* batchlist, uint64_t start, uint64_t length) { 4403 NTSTATUS Status; 4404 EXTENT_DATA* ed; 4405 EXTENT_DATA2* ed2; 4406 4407 TRACE("((%I64x, %I64x), %I64x, %I64x)\n", fcb->subvol->id, fcb->inode, start, length); 4408 4409 ed = ExAllocatePoolWithTag(PagedPool, sizeof(EXTENT_DATA) - 1 + sizeof(EXTENT_DATA2), ALLOC_TAG); 4410 if (!ed) { 4411 ERR("out of memory\n"); 4412 return STATUS_INSUFFICIENT_RESOURCES; 4413 } 4414 4415 ed->generation = fcb->Vcb->superblock.generation; 4416 ed->decoded_size = length; 4417 ed->compression = BTRFS_COMPRESSION_NONE; 4418 ed->encryption = BTRFS_ENCRYPTION_NONE; 4419 ed->encoding = BTRFS_ENCODING_NONE; 4420 ed->type = EXTENT_TYPE_REGULAR; 4421 4422 ed2 = (EXTENT_DATA2*)ed->data; 4423 ed2->address = 0; 4424 ed2->size = 0; 4425 ed2->offset = 0; 4426 ed2->num_bytes = length; 4427 4428 Status = insert_tree_item_batch(batchlist, fcb->Vcb, fcb->subvol, fcb->inode, TYPE_EXTENT_DATA, start, ed, sizeof(EXTENT_DATA) - 1 + sizeof(EXTENT_DATA2), Batch_Insert); 4429 if (!NT_SUCCESS(Status)) { 4430 ERR("insert_tree_item_batch returned %08lx\n", Status); 4431 ExFreePool(ed); 4432 return Status; 4433 } 4434 4435 return STATUS_SUCCESS; 4436 } 4437 4438 static NTSTATUS split_batch_item_list(batch_item_ind* bii) { 4439 LIST_ENTRY* le; 4440 unsigned int i = 0; 4441 LIST_ENTRY* midpoint = NULL; 4442 batch_item_ind* bii2; 4443 batch_item* midpoint_item; 4444 LIST_ENTRY* before_midpoint; 4445 4446 le = bii->items.Flink; 4447 while (le != &bii->items) { 4448 if (i >= bii->num_items / 2) { 4449 midpoint = le; 4450 break; 4451 } 4452 4453 i++; 4454 4455 le = le->Flink; 4456 } 4457 4458 if (!midpoint) 4459 return STATUS_SUCCESS; 4460 4461 // make sure items on either side of split don't have same key 4462 4463 while (midpoint->Blink != &bii->items) { 4464 batch_item* item = CONTAINING_RECORD(midpoint, batch_item, list_entry); 4465 batch_item* prev = CONTAINING_RECORD(midpoint->Blink, batch_item, list_entry); 4466 4467 if (item->key.obj_id != prev->key.obj_id) 4468 break; 4469 4470 if (item->key.obj_type != prev->key.obj_type) 4471 break; 4472 4473 if (item->key.offset != prev->key.offset) 4474 break; 4475 4476 midpoint = midpoint->Blink; 4477 i--; 4478 } 4479 4480 if (midpoint->Blink == &bii->items) 4481 return STATUS_SUCCESS; 4482 4483 bii2 = ExAllocatePoolWithTag(PagedPool, sizeof(batch_item_ind), ALLOC_TAG); 4484 if (!bii2) { 4485 ERR("out of memory\n"); 4486 return STATUS_INSUFFICIENT_RESOURCES; 4487 } 4488 4489 midpoint_item = CONTAINING_RECORD(midpoint, batch_item, list_entry); 4490 4491 bii2->key.obj_id = midpoint_item->key.obj_id; 4492 bii2->key.obj_type = midpoint_item->key.obj_type; 4493 bii2->key.offset = midpoint_item->key.offset; 4494 4495 bii2->num_items = bii->num_items - i; 4496 bii->num_items = i; 4497 4498 before_midpoint = midpoint->Blink; 4499 4500 bii2->items.Flink = midpoint; 4501 midpoint->Blink = &bii2->items; 4502 bii2->items.Blink = bii->items.Blink; 4503 bii->items.Blink->Flink = &bii2->items; 4504 4505 bii->items.Blink = before_midpoint; 4506 before_midpoint->Flink = &bii->items; 4507 4508 InsertHeadList(&bii->list_entry, &bii2->list_entry); 4509 4510 return STATUS_SUCCESS; 4511 } 4512 4513 #ifdef _MSC_VER 4514 #pragma warning(push) 4515 #pragma warning(suppress: 28194) 4516 #endif 4517 static NTSTATUS insert_tree_item_batch(LIST_ENTRY* batchlist, device_extension* Vcb, root* r, uint64_t objid, 4518 uint8_t objtype, uint64_t offset, _In_opt_ _When_(return >= 0, __drv_aliasesMem) void* data, 4519 uint16_t datalen, enum batch_operation operation) { 4520 LIST_ENTRY* le; 4521 batch_root* br = NULL; 4522 batch_item* bi; 4523 4524 le = batchlist->Flink; 4525 while (le != batchlist) { 4526 batch_root* br2 = CONTAINING_RECORD(le, batch_root, list_entry); 4527 4528 if (br2->r == r) { 4529 br = br2; 4530 break; 4531 } 4532 4533 le = le->Flink; 4534 } 4535 4536 if (!br) { 4537 br = ExAllocatePoolWithTag(PagedPool, sizeof(batch_root), ALLOC_TAG); 4538 if (!br) { 4539 ERR("out of memory\n"); 4540 return STATUS_INSUFFICIENT_RESOURCES; 4541 } 4542 4543 br->r = r; 4544 InitializeListHead(&br->items_ind); 4545 InsertTailList(batchlist, &br->list_entry); 4546 } 4547 4548 if (IsListEmpty(&br->items_ind)) { 4549 batch_item_ind* bii; 4550 4551 bii = ExAllocatePoolWithTag(PagedPool, sizeof(batch_item_ind), ALLOC_TAG); 4552 if (!bii) { 4553 ERR("out of memory\n"); 4554 return STATUS_INSUFFICIENT_RESOURCES; 4555 } 4556 4557 bii->key.obj_id = 0; 4558 bii->key.obj_type = 0; 4559 bii->key.offset = 0; 4560 InitializeListHead(&bii->items); 4561 bii->num_items = 0; 4562 InsertTailList(&br->items_ind, &bii->list_entry); 4563 } 4564 4565 bi = ExAllocateFromPagedLookasideList(&Vcb->batch_item_lookaside); 4566 if (!bi) { 4567 ERR("out of memory\n"); 4568 return STATUS_INSUFFICIENT_RESOURCES; 4569 } 4570 4571 bi->key.obj_id = objid; 4572 bi->key.obj_type = objtype; 4573 bi->key.offset = offset; 4574 bi->data = data; 4575 bi->datalen = datalen; 4576 bi->operation = operation; 4577 4578 le = br->items_ind.Blink; 4579 while (le != &br->items_ind) { 4580 LIST_ENTRY* le2; 4581 batch_item_ind* bii = CONTAINING_RECORD(le, batch_item_ind, list_entry); 4582 4583 if (keycmp(bii->key, bi->key) == 1) { 4584 le = le->Blink; 4585 continue; 4586 } 4587 4588 le2 = bii->items.Blink; 4589 while (le2 != &bii->items) { 4590 batch_item* bi2 = CONTAINING_RECORD(le2, batch_item, list_entry); 4591 int cmp = keycmp(bi2->key, bi->key); 4592 4593 if (cmp == -1 || (cmp == 0 && bi->operation >= bi2->operation)) { 4594 InsertHeadList(&bi2->list_entry, &bi->list_entry); 4595 bii->num_items++; 4596 goto end; 4597 } 4598 4599 le2 = le2->Blink; 4600 } 4601 4602 InsertHeadList(&bii->items, &bi->list_entry); 4603 bii->num_items++; 4604 4605 end: 4606 if (bii->num_items > BATCH_ITEM_LIMIT) 4607 return split_batch_item_list(bii); 4608 4609 return STATUS_SUCCESS; 4610 } 4611 4612 return STATUS_INTERNAL_ERROR; 4613 } 4614 #ifdef _MSC_VER 4615 #pragma warning(pop) 4616 #endif 4617 4618 typedef struct { 4619 uint64_t address; 4620 uint64_t length; 4621 uint64_t offset; 4622 bool changed; 4623 chunk* chunk; 4624 uint64_t skip_start; 4625 uint64_t skip_end; 4626 LIST_ENTRY list_entry; 4627 } extent_range; 4628 4629 static void rationalize_extents(fcb* fcb, PIRP Irp) { 4630 LIST_ENTRY* le; 4631 LIST_ENTRY extent_ranges; 4632 extent_range* er; 4633 bool changed = false, truncating = false; 4634 uint32_t num_extents = 0; 4635 4636 InitializeListHead(&extent_ranges); 4637 4638 le = fcb->extents.Flink; 4639 while (le != &fcb->extents) { 4640 extent* ext = CONTAINING_RECORD(le, extent, list_entry); 4641 4642 if ((ext->extent_data.type == EXTENT_TYPE_REGULAR || ext->extent_data.type == EXTENT_TYPE_PREALLOC) && ext->extent_data.compression == BTRFS_COMPRESSION_NONE && ext->unique) { 4643 EXTENT_DATA2* ed2 = (EXTENT_DATA2*)ext->extent_data.data; 4644 4645 if (ed2->size != 0) { 4646 LIST_ENTRY* le2; 4647 4648 le2 = extent_ranges.Flink; 4649 while (le2 != &extent_ranges) { 4650 extent_range* er2 = CONTAINING_RECORD(le2, extent_range, list_entry); 4651 4652 if (er2->address == ed2->address) { 4653 er2->skip_start = min(er2->skip_start, ed2->offset); 4654 er2->skip_end = min(er2->skip_end, ed2->size - ed2->offset - ed2->num_bytes); 4655 goto cont; 4656 } else if (er2->address > ed2->address) 4657 break; 4658 4659 le2 = le2->Flink; 4660 } 4661 4662 er = ExAllocatePoolWithTag(PagedPool, sizeof(extent_range), ALLOC_TAG); // FIXME - should be from lookaside? 4663 if (!er) { 4664 ERR("out of memory\n"); 4665 goto end; 4666 } 4667 4668 er->address = ed2->address; 4669 er->length = ed2->size; 4670 er->offset = ext->offset - ed2->offset; 4671 er->changed = false; 4672 er->chunk = NULL; 4673 er->skip_start = ed2->offset; 4674 er->skip_end = ed2->size - ed2->offset - ed2->num_bytes; 4675 4676 if (er->skip_start != 0 || er->skip_end != 0) 4677 truncating = true; 4678 4679 InsertHeadList(le2->Blink, &er->list_entry); 4680 num_extents++; 4681 } 4682 } 4683 4684 cont: 4685 le = le->Flink; 4686 } 4687 4688 if (num_extents == 0 || (num_extents == 1 && !truncating)) 4689 goto end; 4690 4691 le = extent_ranges.Flink; 4692 while (le != &extent_ranges) { 4693 er = CONTAINING_RECORD(le, extent_range, list_entry); 4694 4695 if (!er->chunk) { 4696 LIST_ENTRY* le2; 4697 4698 er->chunk = get_chunk_from_address(fcb->Vcb, er->address); 4699 4700 if (!er->chunk) { 4701 ERR("get_chunk_from_address(%I64x) failed\n", er->address); 4702 goto end; 4703 } 4704 4705 le2 = le->Flink; 4706 while (le2 != &extent_ranges) { 4707 extent_range* er2 = CONTAINING_RECORD(le2, extent_range, list_entry); 4708 4709 if (!er2->chunk && er2->address >= er->chunk->offset && er2->address < er->chunk->offset + er->chunk->chunk_item->size) 4710 er2->chunk = er->chunk; 4711 4712 le2 = le2->Flink; 4713 } 4714 } 4715 4716 le = le->Flink; 4717 } 4718 4719 if (truncating) { 4720 // truncate beginning or end of extent if unused 4721 4722 le = extent_ranges.Flink; 4723 while (le != &extent_ranges) { 4724 er = CONTAINING_RECORD(le, extent_range, list_entry); 4725 4726 if (er->skip_start > 0) { 4727 LIST_ENTRY* le2 = fcb->extents.Flink; 4728 while (le2 != &fcb->extents) { 4729 extent* ext = CONTAINING_RECORD(le2, extent, list_entry); 4730 4731 if ((ext->extent_data.type == EXTENT_TYPE_REGULAR || ext->extent_data.type == EXTENT_TYPE_PREALLOC) && ext->extent_data.compression == BTRFS_COMPRESSION_NONE && ext->unique) { 4732 EXTENT_DATA2* ed2 = (EXTENT_DATA2*)ext->extent_data.data; 4733 4734 if (ed2->size != 0 && ed2->address == er->address) { 4735 NTSTATUS Status; 4736 4737 Status = update_changed_extent_ref(fcb->Vcb, er->chunk, ed2->address, ed2->size, fcb->subvol->id, fcb->inode, ext->offset - ed2->offset, 4738 -1, fcb->inode_item.flags & BTRFS_INODE_NODATASUM, true, Irp); 4739 if (!NT_SUCCESS(Status)) { 4740 ERR("update_changed_extent_ref returned %08lx\n", Status); 4741 goto end; 4742 } 4743 4744 ext->extent_data.decoded_size -= er->skip_start; 4745 ed2->size -= er->skip_start; 4746 ed2->address += er->skip_start; 4747 ed2->offset -= er->skip_start; 4748 4749 add_changed_extent_ref(er->chunk, ed2->address, ed2->size, fcb->subvol->id, fcb->inode, ext->offset - ed2->offset, 4750 1, fcb->inode_item.flags & BTRFS_INODE_NODATASUM); 4751 } 4752 } 4753 4754 le2 = le2->Flink; 4755 } 4756 4757 if (!(fcb->inode_item.flags & BTRFS_INODE_NODATASUM)) 4758 add_checksum_entry(fcb->Vcb, er->address, (ULONG)(er->skip_start >> fcb->Vcb->sector_shift), NULL, NULL); 4759 4760 acquire_chunk_lock(er->chunk, fcb->Vcb); 4761 4762 if (!er->chunk->cache_loaded) { 4763 NTSTATUS Status = load_cache_chunk(fcb->Vcb, er->chunk, NULL); 4764 4765 if (!NT_SUCCESS(Status)) { 4766 ERR("load_cache_chunk returned %08lx\n", Status); 4767 release_chunk_lock(er->chunk, fcb->Vcb); 4768 goto end; 4769 } 4770 } 4771 4772 er->chunk->used -= er->skip_start; 4773 4774 space_list_add(er->chunk, er->address, er->skip_start, NULL); 4775 4776 release_chunk_lock(er->chunk, fcb->Vcb); 4777 4778 er->address += er->skip_start; 4779 er->length -= er->skip_start; 4780 } 4781 4782 if (er->skip_end > 0) { 4783 LIST_ENTRY* le2 = fcb->extents.Flink; 4784 while (le2 != &fcb->extents) { 4785 extent* ext = CONTAINING_RECORD(le2, extent, list_entry); 4786 4787 if ((ext->extent_data.type == EXTENT_TYPE_REGULAR || ext->extent_data.type == EXTENT_TYPE_PREALLOC) && ext->extent_data.compression == BTRFS_COMPRESSION_NONE && ext->unique) { 4788 EXTENT_DATA2* ed2 = (EXTENT_DATA2*)ext->extent_data.data; 4789 4790 if (ed2->size != 0 && ed2->address == er->address) { 4791 NTSTATUS Status; 4792 4793 Status = update_changed_extent_ref(fcb->Vcb, er->chunk, ed2->address, ed2->size, fcb->subvol->id, fcb->inode, ext->offset - ed2->offset, 4794 -1, fcb->inode_item.flags & BTRFS_INODE_NODATASUM, true, Irp); 4795 if (!NT_SUCCESS(Status)) { 4796 ERR("update_changed_extent_ref returned %08lx\n", Status); 4797 goto end; 4798 } 4799 4800 ext->extent_data.decoded_size -= er->skip_end; 4801 ed2->size -= er->skip_end; 4802 4803 add_changed_extent_ref(er->chunk, ed2->address, ed2->size, fcb->subvol->id, fcb->inode, ext->offset - ed2->offset, 4804 1, fcb->inode_item.flags & BTRFS_INODE_NODATASUM); 4805 } 4806 } 4807 4808 le2 = le2->Flink; 4809 } 4810 4811 if (!(fcb->inode_item.flags & BTRFS_INODE_NODATASUM)) 4812 add_checksum_entry(fcb->Vcb, er->address + er->length - er->skip_end, (ULONG)(er->skip_end >> fcb->Vcb->sector_shift), NULL, NULL); 4813 4814 acquire_chunk_lock(er->chunk, fcb->Vcb); 4815 4816 if (!er->chunk->cache_loaded) { 4817 NTSTATUS Status = load_cache_chunk(fcb->Vcb, er->chunk, NULL); 4818 4819 if (!NT_SUCCESS(Status)) { 4820 ERR("load_cache_chunk returned %08lx\n", Status); 4821 release_chunk_lock(er->chunk, fcb->Vcb); 4822 goto end; 4823 } 4824 } 4825 4826 er->chunk->used -= er->skip_end; 4827 4828 space_list_add(er->chunk, er->address + er->length - er->skip_end, er->skip_end, NULL); 4829 4830 release_chunk_lock(er->chunk, fcb->Vcb); 4831 4832 er->length -= er->skip_end; 4833 } 4834 4835 le = le->Flink; 4836 } 4837 } 4838 4839 if (num_extents < 2) 4840 goto end; 4841 4842 // merge together adjacent extents 4843 le = extent_ranges.Flink; 4844 while (le != &extent_ranges) { 4845 er = CONTAINING_RECORD(le, extent_range, list_entry); 4846 4847 if (le->Flink != &extent_ranges && er->length < MAX_EXTENT_SIZE) { 4848 extent_range* er2 = CONTAINING_RECORD(le->Flink, extent_range, list_entry); 4849 4850 if (er->chunk == er2->chunk) { 4851 if (er2->address == er->address + er->length && er2->offset >= er->offset + er->length) { 4852 if (er->length + er2->length <= MAX_EXTENT_SIZE) { 4853 er->length += er2->length; 4854 er->changed = true; 4855 4856 RemoveEntryList(&er2->list_entry); 4857 ExFreePool(er2); 4858 4859 changed = true; 4860 continue; 4861 } 4862 } 4863 } 4864 } 4865 4866 le = le->Flink; 4867 } 4868 4869 if (!changed) 4870 goto end; 4871 4872 le = fcb->extents.Flink; 4873 while (le != &fcb->extents) { 4874 extent* ext = CONTAINING_RECORD(le, extent, list_entry); 4875 4876 if ((ext->extent_data.type == EXTENT_TYPE_REGULAR || ext->extent_data.type == EXTENT_TYPE_PREALLOC) && ext->extent_data.compression == BTRFS_COMPRESSION_NONE && ext->unique) { 4877 EXTENT_DATA2* ed2 = (EXTENT_DATA2*)ext->extent_data.data; 4878 4879 if (ed2->size != 0) { 4880 LIST_ENTRY* le2; 4881 4882 le2 = extent_ranges.Flink; 4883 while (le2 != &extent_ranges) { 4884 extent_range* er2 = CONTAINING_RECORD(le2, extent_range, list_entry); 4885 4886 if (ed2->address >= er2->address && ed2->address + ed2->size <= er2->address + er2->length && er2->changed) { 4887 NTSTATUS Status; 4888 4889 Status = update_changed_extent_ref(fcb->Vcb, er2->chunk, ed2->address, ed2->size, fcb->subvol->id, fcb->inode, ext->offset - ed2->offset, 4890 -1, fcb->inode_item.flags & BTRFS_INODE_NODATASUM, true, Irp); 4891 if (!NT_SUCCESS(Status)) { 4892 ERR("update_changed_extent_ref returned %08lx\n", Status); 4893 goto end; 4894 } 4895 4896 ed2->offset += ed2->address - er2->address; 4897 ed2->address = er2->address; 4898 ed2->size = er2->length; 4899 ext->extent_data.decoded_size = ed2->size; 4900 4901 add_changed_extent_ref(er2->chunk, ed2->address, ed2->size, fcb->subvol->id, fcb->inode, ext->offset - ed2->offset, 4902 1, fcb->inode_item.flags & BTRFS_INODE_NODATASUM); 4903 4904 break; 4905 } 4906 4907 le2 = le2->Flink; 4908 } 4909 } 4910 } 4911 4912 le = le->Flink; 4913 } 4914 4915 end: 4916 while (!IsListEmpty(&extent_ranges)) { 4917 le = RemoveHeadList(&extent_ranges); 4918 er = CONTAINING_RECORD(le, extent_range, list_entry); 4919 4920 ExFreePool(er); 4921 } 4922 } 4923 4924 NTSTATUS flush_fcb(fcb* fcb, bool cache, LIST_ENTRY* batchlist, PIRP Irp) { 4925 traverse_ptr tp; 4926 KEY searchkey; 4927 NTSTATUS Status; 4928 INODE_ITEM* ii; 4929 uint64_t ii_offset; 4930 #ifdef DEBUG_PARANOID 4931 uint64_t old_size = 0; 4932 bool extents_changed; 4933 #endif 4934 4935 if (fcb->ads) { 4936 if (fcb->deleted) { 4937 Status = delete_xattr(fcb->Vcb, batchlist, fcb->subvol, fcb->inode, fcb->adsxattr.Buffer, fcb->adsxattr.Length, fcb->adshash); 4938 if (!NT_SUCCESS(Status)) { 4939 ERR("delete_xattr returned %08lx\n", Status); 4940 goto end; 4941 } 4942 } else { 4943 Status = set_xattr(fcb->Vcb, batchlist, fcb->subvol, fcb->inode, fcb->adsxattr.Buffer, fcb->adsxattr.Length, 4944 fcb->adshash, (uint8_t*)fcb->adsdata.Buffer, fcb->adsdata.Length); 4945 if (!NT_SUCCESS(Status)) { 4946 ERR("set_xattr returned %08lx\n", Status); 4947 goto end; 4948 } 4949 } 4950 4951 Status = STATUS_SUCCESS; 4952 goto end; 4953 } 4954 4955 if (fcb->deleted) { 4956 Status = insert_tree_item_batch(batchlist, fcb->Vcb, fcb->subvol, fcb->inode, TYPE_INODE_ITEM, 0xffffffffffffffff, NULL, 0, Batch_DeleteInode); 4957 if (!NT_SUCCESS(Status)) { 4958 ERR("insert_tree_item_batch returned %08lx\n", Status); 4959 goto end; 4960 } 4961 4962 if (fcb->marked_as_orphan) { 4963 Status = insert_tree_item_batch(batchlist, fcb->Vcb, fcb->subvol, BTRFS_ORPHAN_INODE_OBJID, TYPE_ORPHAN_INODE, 4964 fcb->inode, NULL, 0, Batch_Delete); 4965 if (!NT_SUCCESS(Status)) { 4966 ERR("insert_tree_item_batch returned %08lx\n", Status); 4967 goto end; 4968 } 4969 } 4970 4971 Status = STATUS_SUCCESS; 4972 goto end; 4973 } 4974 4975 #ifdef DEBUG_PARANOID 4976 extents_changed = fcb->extents_changed; 4977 #endif 4978 4979 if (fcb->extents_changed) { 4980 LIST_ENTRY* le; 4981 bool prealloc = false, extents_inline = false; 4982 uint64_t last_end; 4983 4984 // delete ignored extent items 4985 le = fcb->extents.Flink; 4986 while (le != &fcb->extents) { 4987 LIST_ENTRY* le2 = le->Flink; 4988 extent* ext = CONTAINING_RECORD(le, extent, list_entry); 4989 4990 if (ext->ignore) { 4991 RemoveEntryList(&ext->list_entry); 4992 4993 if (ext->csum) 4994 ExFreePool(ext->csum); 4995 4996 ExFreePool(ext); 4997 } 4998 4999 le = le2; 5000 } 5001 5002 le = fcb->extents.Flink; 5003 while (le != &fcb->extents) { 5004 extent* ext = CONTAINING_RECORD(le, extent, list_entry); 5005 5006 if (ext->inserted && ext->csum && ext->extent_data.type == EXTENT_TYPE_REGULAR) { 5007 EXTENT_DATA2* ed2 = (EXTENT_DATA2*)ext->extent_data.data; 5008 5009 if (ed2->size > 0) { // not sparse 5010 if (ext->extent_data.compression == BTRFS_COMPRESSION_NONE) 5011 add_checksum_entry(fcb->Vcb, ed2->address + ed2->offset, (ULONG)(ed2->num_bytes >> fcb->Vcb->sector_shift), ext->csum, Irp); 5012 else 5013 add_checksum_entry(fcb->Vcb, ed2->address, (ULONG)(ed2->size >> fcb->Vcb->sector_shift), ext->csum, Irp); 5014 } 5015 } 5016 5017 le = le->Flink; 5018 } 5019 5020 if (!IsListEmpty(&fcb->extents)) { 5021 rationalize_extents(fcb, Irp); 5022 5023 // merge together adjacent EXTENT_DATAs pointing to same extent 5024 5025 le = fcb->extents.Flink; 5026 while (le != &fcb->extents) { 5027 LIST_ENTRY* le2 = le->Flink; 5028 extent* ext = CONTAINING_RECORD(le, extent, list_entry); 5029 5030 if ((ext->extent_data.type == EXTENT_TYPE_REGULAR || ext->extent_data.type == EXTENT_TYPE_PREALLOC) && le->Flink != &fcb->extents) { 5031 extent* nextext = CONTAINING_RECORD(le->Flink, extent, list_entry); 5032 5033 if (ext->extent_data.type == nextext->extent_data.type) { 5034 EXTENT_DATA2* ed2 = (EXTENT_DATA2*)ext->extent_data.data; 5035 EXTENT_DATA2* ned2 = (EXTENT_DATA2*)nextext->extent_data.data; 5036 5037 if (ed2->size != 0 && ed2->address == ned2->address && ed2->size == ned2->size && 5038 nextext->offset == ext->offset + ed2->num_bytes && ned2->offset == ed2->offset + ed2->num_bytes) { 5039 chunk* c; 5040 5041 if (ext->extent_data.compression == BTRFS_COMPRESSION_NONE && ext->csum) { 5042 ULONG len = (ULONG)((ed2->num_bytes + ned2->num_bytes) >> fcb->Vcb->sector_shift); 5043 void* csum; 5044 5045 csum = ExAllocatePoolWithTag(NonPagedPool, len * fcb->Vcb->csum_size, ALLOC_TAG); 5046 if (!csum) { 5047 ERR("out of memory\n"); 5048 Status = STATUS_INSUFFICIENT_RESOURCES; 5049 goto end; 5050 } 5051 5052 RtlCopyMemory(csum, ext->csum, (ULONG)((ed2->num_bytes * fcb->Vcb->csum_size) >> fcb->Vcb->sector_shift)); 5053 RtlCopyMemory((uint8_t*)csum + ((ed2->num_bytes * fcb->Vcb->csum_size) >> fcb->Vcb->sector_shift), nextext->csum, 5054 (ULONG)((ned2->num_bytes * fcb->Vcb->csum_size) >> fcb->Vcb->sector_shift)); 5055 5056 ExFreePool(ext->csum); 5057 ext->csum = csum; 5058 } 5059 5060 ext->extent_data.generation = fcb->Vcb->superblock.generation; 5061 ed2->num_bytes += ned2->num_bytes; 5062 5063 RemoveEntryList(&nextext->list_entry); 5064 5065 if (nextext->csum) 5066 ExFreePool(nextext->csum); 5067 5068 ExFreePool(nextext); 5069 5070 c = get_chunk_from_address(fcb->Vcb, ed2->address); 5071 5072 if (!c) { 5073 ERR("get_chunk_from_address(%I64x) failed\n", ed2->address); 5074 } else { 5075 Status = update_changed_extent_ref(fcb->Vcb, c, ed2->address, ed2->size, fcb->subvol->id, fcb->inode, ext->offset - ed2->offset, -1, 5076 fcb->inode_item.flags & BTRFS_INODE_NODATASUM, false, Irp); 5077 if (!NT_SUCCESS(Status)) { 5078 ERR("update_changed_extent_ref returned %08lx\n", Status); 5079 goto end; 5080 } 5081 } 5082 5083 le2 = le; 5084 } 5085 } 5086 } 5087 5088 le = le2; 5089 } 5090 } 5091 5092 if (!fcb->created) { 5093 // delete existing EXTENT_DATA items 5094 5095 Status = insert_tree_item_batch(batchlist, fcb->Vcb, fcb->subvol, fcb->inode, TYPE_EXTENT_DATA, 0, NULL, 0, Batch_DeleteExtentData); 5096 if (!NT_SUCCESS(Status)) { 5097 ERR("insert_tree_item_batch returned %08lx\n", Status); 5098 goto end; 5099 } 5100 } 5101 5102 // add new EXTENT_DATAs 5103 5104 last_end = 0; 5105 5106 le = fcb->extents.Flink; 5107 while (le != &fcb->extents) { 5108 extent* ext = CONTAINING_RECORD(le, extent, list_entry); 5109 EXTENT_DATA* ed; 5110 5111 ext->inserted = false; 5112 5113 if (!(fcb->Vcb->superblock.incompat_flags & BTRFS_INCOMPAT_FLAGS_NO_HOLES) && ext->offset > last_end) { 5114 Status = insert_sparse_extent(fcb, batchlist, last_end, ext->offset - last_end); 5115 if (!NT_SUCCESS(Status)) { 5116 ERR("insert_sparse_extent returned %08lx\n", Status); 5117 goto end; 5118 } 5119 } 5120 5121 ed = ExAllocatePoolWithTag(PagedPool, ext->datalen, ALLOC_TAG); 5122 if (!ed) { 5123 ERR("out of memory\n"); 5124 Status = STATUS_INSUFFICIENT_RESOURCES; 5125 goto end; 5126 } 5127 5128 RtlCopyMemory(ed, &ext->extent_data, ext->datalen); 5129 5130 Status = insert_tree_item_batch(batchlist, fcb->Vcb, fcb->subvol, fcb->inode, TYPE_EXTENT_DATA, ext->offset, 5131 ed, ext->datalen, Batch_Insert); 5132 if (!NT_SUCCESS(Status)) { 5133 ERR("insert_tree_item_batch returned %08lx\n", Status); 5134 goto end; 5135 } 5136 5137 if (ed->type == EXTENT_TYPE_PREALLOC) 5138 prealloc = true; 5139 5140 if (ed->type == EXTENT_TYPE_INLINE) 5141 extents_inline = true; 5142 5143 if (!(fcb->Vcb->superblock.incompat_flags & BTRFS_INCOMPAT_FLAGS_NO_HOLES)) { 5144 if (ed->type == EXTENT_TYPE_INLINE) 5145 last_end = ext->offset + ed->decoded_size; 5146 else { 5147 EXTENT_DATA2* ed2 = (EXTENT_DATA2*)ed->data; 5148 5149 last_end = ext->offset + ed2->num_bytes; 5150 } 5151 } 5152 5153 le = le->Flink; 5154 } 5155 5156 if (!(fcb->Vcb->superblock.incompat_flags & BTRFS_INCOMPAT_FLAGS_NO_HOLES) && !extents_inline && 5157 sector_align(fcb->inode_item.st_size, fcb->Vcb->superblock.sector_size) > last_end) { 5158 Status = insert_sparse_extent(fcb, batchlist, last_end, sector_align(fcb->inode_item.st_size, fcb->Vcb->superblock.sector_size) - last_end); 5159 if (!NT_SUCCESS(Status)) { 5160 ERR("insert_sparse_extent returned %08lx\n", Status); 5161 goto end; 5162 } 5163 } 5164 5165 // update prealloc flag in INODE_ITEM 5166 5167 if (!prealloc) 5168 fcb->inode_item.flags &= ~BTRFS_INODE_PREALLOC; 5169 else 5170 fcb->inode_item.flags |= BTRFS_INODE_PREALLOC; 5171 5172 fcb->inode_item_changed = true; 5173 5174 fcb->extents_changed = false; 5175 } 5176 5177 if ((!fcb->created && fcb->inode_item_changed) || cache) { 5178 searchkey.obj_id = fcb->inode; 5179 searchkey.obj_type = TYPE_INODE_ITEM; 5180 searchkey.offset = 0xffffffffffffffff; 5181 5182 Status = find_item(fcb->Vcb, fcb->subvol, &tp, &searchkey, false, Irp); 5183 if (!NT_SUCCESS(Status)) { 5184 ERR("error - find_item returned %08lx\n", Status); 5185 goto end; 5186 } 5187 5188 if (tp.item->key.obj_id != searchkey.obj_id || tp.item->key.obj_type != searchkey.obj_type) { 5189 if (cache) { 5190 ii = ExAllocatePoolWithTag(PagedPool, sizeof(INODE_ITEM), ALLOC_TAG); 5191 if (!ii) { 5192 ERR("out of memory\n"); 5193 Status = STATUS_INSUFFICIENT_RESOURCES; 5194 goto end; 5195 } 5196 5197 RtlCopyMemory(ii, &fcb->inode_item, sizeof(INODE_ITEM)); 5198 5199 Status = insert_tree_item(fcb->Vcb, fcb->subvol, fcb->inode, TYPE_INODE_ITEM, 0, ii, sizeof(INODE_ITEM), NULL, Irp); 5200 if (!NT_SUCCESS(Status)) { 5201 ERR("insert_tree_item returned %08lx\n", Status); 5202 goto end; 5203 } 5204 5205 ii_offset = 0; 5206 } else { 5207 ERR("could not find INODE_ITEM for inode %I64x in subvol %I64x\n", fcb->inode, fcb->subvol->id); 5208 Status = STATUS_INTERNAL_ERROR; 5209 goto end; 5210 } 5211 } else { 5212 #ifdef DEBUG_PARANOID 5213 INODE_ITEM* ii2 = (INODE_ITEM*)tp.item->data; 5214 5215 old_size = ii2->st_size; 5216 #endif 5217 5218 ii_offset = tp.item->key.offset; 5219 } 5220 5221 if (!cache) { 5222 Status = delete_tree_item(fcb->Vcb, &tp); 5223 if (!NT_SUCCESS(Status)) { 5224 ERR("delete_tree_item returned %08lx\n", Status); 5225 goto end; 5226 } 5227 } else { 5228 searchkey.obj_id = fcb->inode; 5229 searchkey.obj_type = TYPE_INODE_ITEM; 5230 searchkey.offset = ii_offset; 5231 5232 Status = find_item(fcb->Vcb, fcb->subvol, &tp, &searchkey, false, Irp); 5233 if (!NT_SUCCESS(Status)) { 5234 ERR("error - find_item returned %08lx\n", Status); 5235 goto end; 5236 } 5237 5238 if (keycmp(tp.item->key, searchkey)) { 5239 ERR("could not find INODE_ITEM for inode %I64x in subvol %I64x\n", fcb->inode, fcb->subvol->id); 5240 Status = STATUS_INTERNAL_ERROR; 5241 goto end; 5242 } else 5243 RtlCopyMemory(tp.item->data, &fcb->inode_item, min(tp.item->size, sizeof(INODE_ITEM))); 5244 } 5245 5246 #ifdef DEBUG_PARANOID 5247 if (!extents_changed && fcb->type != BTRFS_TYPE_DIRECTORY && old_size != fcb->inode_item.st_size) { 5248 ERR("error - size has changed but extents not marked as changed\n"); 5249 int3; 5250 } 5251 #endif 5252 } else 5253 ii_offset = 0; 5254 5255 fcb->created = false; 5256 5257 if (!cache && fcb->inode_item_changed) { 5258 ii = ExAllocatePoolWithTag(PagedPool, sizeof(INODE_ITEM), ALLOC_TAG); 5259 if (!ii) { 5260 ERR("out of memory\n"); 5261 Status = STATUS_INSUFFICIENT_RESOURCES; 5262 goto end; 5263 } 5264 5265 RtlCopyMemory(ii, &fcb->inode_item, sizeof(INODE_ITEM)); 5266 5267 Status = insert_tree_item_batch(batchlist, fcb->Vcb, fcb->subvol, fcb->inode, TYPE_INODE_ITEM, ii_offset, ii, sizeof(INODE_ITEM), 5268 Batch_Insert); 5269 if (!NT_SUCCESS(Status)) { 5270 ERR("insert_tree_item_batch returned %08lx\n", Status); 5271 goto end; 5272 } 5273 5274 fcb->inode_item_changed = false; 5275 } 5276 5277 if (fcb->sd_dirty) { 5278 if (!fcb->sd_deleted) { 5279 Status = set_xattr(fcb->Vcb, batchlist, fcb->subvol, fcb->inode, EA_NTACL, sizeof(EA_NTACL) - 1, 5280 EA_NTACL_HASH, (uint8_t*)fcb->sd, (uint16_t)RtlLengthSecurityDescriptor(fcb->sd)); 5281 if (!NT_SUCCESS(Status)) { 5282 ERR("set_xattr returned %08lx\n", Status); 5283 goto end; 5284 } 5285 } else { 5286 Status = delete_xattr(fcb->Vcb, batchlist, fcb->subvol, fcb->inode, EA_NTACL, sizeof(EA_NTACL) - 1, EA_NTACL_HASH); 5287 if (!NT_SUCCESS(Status)) { 5288 ERR("delete_xattr returned %08lx\n", Status); 5289 goto end; 5290 } 5291 } 5292 5293 fcb->sd_deleted = false; 5294 fcb->sd_dirty = false; 5295 } 5296 5297 if (fcb->atts_changed) { 5298 if (!fcb->atts_deleted) { 5299 uint8_t val[16], *val2; 5300 ULONG atts = fcb->atts; 5301 5302 TRACE("inserting new DOSATTRIB xattr\n"); 5303 5304 if (fcb->inode == SUBVOL_ROOT_INODE) 5305 atts &= ~FILE_ATTRIBUTE_READONLY; 5306 5307 val2 = &val[sizeof(val) - 1]; 5308 5309 do { 5310 uint8_t c = atts % 16; 5311 *val2 = c <= 9 ? (c + '0') : (c - 0xa + 'a'); 5312 5313 val2--; 5314 atts >>= 4; 5315 } while (atts != 0); 5316 5317 *val2 = 'x'; 5318 val2--; 5319 *val2 = '0'; 5320 5321 Status = set_xattr(fcb->Vcb, batchlist, fcb->subvol, fcb->inode, EA_DOSATTRIB, sizeof(EA_DOSATTRIB) - 1, 5322 EA_DOSATTRIB_HASH, val2, (uint16_t)(val + sizeof(val) - val2)); 5323 if (!NT_SUCCESS(Status)) { 5324 ERR("set_xattr returned %08lx\n", Status); 5325 goto end; 5326 } 5327 } else { 5328 Status = delete_xattr(fcb->Vcb, batchlist, fcb->subvol, fcb->inode, EA_DOSATTRIB, sizeof(EA_DOSATTRIB) - 1, EA_DOSATTRIB_HASH); 5329 if (!NT_SUCCESS(Status)) { 5330 ERR("delete_xattr returned %08lx\n", Status); 5331 goto end; 5332 } 5333 } 5334 5335 fcb->atts_changed = false; 5336 fcb->atts_deleted = false; 5337 } 5338 5339 if (fcb->reparse_xattr_changed) { 5340 if (fcb->reparse_xattr.Buffer && fcb->reparse_xattr.Length > 0) { 5341 Status = set_xattr(fcb->Vcb, batchlist, fcb->subvol, fcb->inode, EA_REPARSE, sizeof(EA_REPARSE) - 1, 5342 EA_REPARSE_HASH, (uint8_t*)fcb->reparse_xattr.Buffer, (uint16_t)fcb->reparse_xattr.Length); 5343 if (!NT_SUCCESS(Status)) { 5344 ERR("set_xattr returned %08lx\n", Status); 5345 goto end; 5346 } 5347 } else { 5348 Status = delete_xattr(fcb->Vcb, batchlist, fcb->subvol, fcb->inode, EA_REPARSE, sizeof(EA_REPARSE) - 1, EA_REPARSE_HASH); 5349 if (!NT_SUCCESS(Status)) { 5350 ERR("delete_xattr returned %08lx\n", Status); 5351 goto end; 5352 } 5353 } 5354 5355 fcb->reparse_xattr_changed = false; 5356 } 5357 5358 if (fcb->ea_changed) { 5359 if (fcb->ea_xattr.Buffer && fcb->ea_xattr.Length > 0) { 5360 Status = set_xattr(fcb->Vcb, batchlist, fcb->subvol, fcb->inode, EA_EA, sizeof(EA_EA) - 1, 5361 EA_EA_HASH, (uint8_t*)fcb->ea_xattr.Buffer, (uint16_t)fcb->ea_xattr.Length); 5362 if (!NT_SUCCESS(Status)) { 5363 ERR("set_xattr returned %08lx\n", Status); 5364 goto end; 5365 } 5366 } else { 5367 Status = delete_xattr(fcb->Vcb, batchlist, fcb->subvol, fcb->inode, EA_EA, sizeof(EA_EA) - 1, EA_EA_HASH); 5368 if (!NT_SUCCESS(Status)) { 5369 ERR("delete_xattr returned %08lx\n", Status); 5370 goto end; 5371 } 5372 } 5373 5374 fcb->ea_changed = false; 5375 } 5376 5377 if (fcb->prop_compression_changed) { 5378 if (fcb->prop_compression == PropCompression_None) { 5379 Status = delete_xattr(fcb->Vcb, batchlist, fcb->subvol, fcb->inode, EA_PROP_COMPRESSION, sizeof(EA_PROP_COMPRESSION) - 1, EA_PROP_COMPRESSION_HASH); 5380 if (!NT_SUCCESS(Status)) { 5381 ERR("delete_xattr returned %08lx\n", Status); 5382 goto end; 5383 } 5384 } else if (fcb->prop_compression == PropCompression_Zlib) { 5385 static const char zlib[] = "zlib"; 5386 5387 Status = set_xattr(fcb->Vcb, batchlist, fcb->subvol, fcb->inode, EA_PROP_COMPRESSION, sizeof(EA_PROP_COMPRESSION) - 1, 5388 EA_PROP_COMPRESSION_HASH, (uint8_t*)zlib, sizeof(zlib) - 1); 5389 if (!NT_SUCCESS(Status)) { 5390 ERR("set_xattr returned %08lx\n", Status); 5391 goto end; 5392 } 5393 } else if (fcb->prop_compression == PropCompression_LZO) { 5394 static const char lzo[] = "lzo"; 5395 5396 Status = set_xattr(fcb->Vcb, batchlist, fcb->subvol, fcb->inode, EA_PROP_COMPRESSION, sizeof(EA_PROP_COMPRESSION) - 1, 5397 EA_PROP_COMPRESSION_HASH, (uint8_t*)lzo, sizeof(lzo) - 1); 5398 if (!NT_SUCCESS(Status)) { 5399 ERR("set_xattr returned %08lx\n", Status); 5400 goto end; 5401 } 5402 } else if (fcb->prop_compression == PropCompression_ZSTD) { 5403 static const char zstd[] = "zstd"; 5404 5405 Status = set_xattr(fcb->Vcb, batchlist, fcb->subvol, fcb->inode, EA_PROP_COMPRESSION, sizeof(EA_PROP_COMPRESSION) - 1, 5406 EA_PROP_COMPRESSION_HASH, (uint8_t*)zstd, sizeof(zstd) - 1); 5407 if (!NT_SUCCESS(Status)) { 5408 ERR("set_xattr returned %08lx\n", Status); 5409 goto end; 5410 } 5411 } 5412 5413 fcb->prop_compression_changed = false; 5414 } 5415 5416 if (fcb->xattrs_changed) { 5417 LIST_ENTRY* le; 5418 5419 le = fcb->xattrs.Flink; 5420 while (le != &fcb->xattrs) { 5421 xattr* xa = CONTAINING_RECORD(le, xattr, list_entry); 5422 LIST_ENTRY* le2 = le->Flink; 5423 5424 if (xa->dirty) { 5425 uint32_t hash = calc_crc32c(0xfffffffe, (uint8_t*)xa->data, xa->namelen); 5426 5427 if (xa->valuelen == 0) { 5428 Status = delete_xattr(fcb->Vcb, batchlist, fcb->subvol, fcb->inode, xa->data, xa->namelen, hash); 5429 if (!NT_SUCCESS(Status)) { 5430 ERR("delete_xattr returned %08lx\n", Status); 5431 goto end; 5432 } 5433 5434 RemoveEntryList(&xa->list_entry); 5435 ExFreePool(xa); 5436 } else { 5437 Status = set_xattr(fcb->Vcb, batchlist, fcb->subvol, fcb->inode, xa->data, xa->namelen, 5438 hash, (uint8_t*)&xa->data[xa->namelen], xa->valuelen); 5439 if (!NT_SUCCESS(Status)) { 5440 ERR("set_xattr returned %08lx\n", Status); 5441 goto end; 5442 } 5443 5444 xa->dirty = false; 5445 } 5446 } 5447 5448 le = le2; 5449 } 5450 5451 fcb->xattrs_changed = false; 5452 } 5453 5454 if ((fcb->case_sensitive_set && !fcb->case_sensitive)) { 5455 Status = delete_xattr(fcb->Vcb, batchlist, fcb->subvol, fcb->inode, EA_CASE_SENSITIVE, 5456 sizeof(EA_CASE_SENSITIVE) - 1, EA_CASE_SENSITIVE_HASH); 5457 if (!NT_SUCCESS(Status)) { 5458 ERR("delete_xattr returned %08lx\n", Status); 5459 goto end; 5460 } 5461 5462 fcb->case_sensitive_set = false; 5463 } else if ((!fcb->case_sensitive_set && fcb->case_sensitive)) { 5464 Status = set_xattr(fcb->Vcb, batchlist, fcb->subvol, fcb->inode, EA_CASE_SENSITIVE, 5465 sizeof(EA_CASE_SENSITIVE) - 1, EA_CASE_SENSITIVE_HASH, (uint8_t*)"1", 1); 5466 if (!NT_SUCCESS(Status)) { 5467 ERR("set_xattr returned %08lx\n", Status); 5468 goto end; 5469 } 5470 5471 fcb->case_sensitive_set = true; 5472 } 5473 5474 if (fcb->inode_item.st_nlink == 0 && !fcb->marked_as_orphan) { // mark as orphan 5475 Status = insert_tree_item_batch(batchlist, fcb->Vcb, fcb->subvol, BTRFS_ORPHAN_INODE_OBJID, TYPE_ORPHAN_INODE, 5476 fcb->inode, NULL, 0, Batch_Insert); 5477 if (!NT_SUCCESS(Status)) { 5478 ERR("insert_tree_item_batch returned %08lx\n", Status); 5479 goto end; 5480 } 5481 5482 fcb->marked_as_orphan = true; 5483 } 5484 5485 Status = STATUS_SUCCESS; 5486 5487 end: 5488 if (fcb->dirty) { 5489 bool lock = false; 5490 5491 fcb->dirty = false; 5492 5493 if (!ExIsResourceAcquiredExclusiveLite(&fcb->Vcb->dirty_fcbs_lock)) { 5494 ExAcquireResourceExclusiveLite(&fcb->Vcb->dirty_fcbs_lock, true); 5495 lock = true; 5496 } 5497 5498 RemoveEntryList(&fcb->list_entry_dirty); 5499 5500 if (lock) 5501 ExReleaseResourceLite(&fcb->Vcb->dirty_fcbs_lock); 5502 } 5503 5504 return Status; 5505 } 5506 5507 void add_trim_entry_avoid_sb(device_extension* Vcb, device* dev, uint64_t address, uint64_t size) { 5508 int i; 5509 ULONG sblen = (ULONG)sector_align(sizeof(superblock), Vcb->superblock.sector_size); 5510 5511 i = 0; 5512 while (superblock_addrs[i] != 0) { 5513 if (superblock_addrs[i] + sblen >= address && superblock_addrs[i] < address + size) { 5514 if (superblock_addrs[i] > address) 5515 add_trim_entry(dev, address, superblock_addrs[i] - address); 5516 5517 if (size <= superblock_addrs[i] + sblen - address) 5518 return; 5519 5520 size -= superblock_addrs[i] + sblen - address; 5521 address = superblock_addrs[i] + sblen; 5522 } else if (superblock_addrs[i] > address + size) 5523 break; 5524 5525 i++; 5526 } 5527 5528 add_trim_entry(dev, address, size); 5529 } 5530 5531 static NTSTATUS drop_chunk(device_extension* Vcb, chunk* c, LIST_ENTRY* batchlist, PIRP Irp, LIST_ENTRY* rollback) { 5532 NTSTATUS Status; 5533 KEY searchkey; 5534 traverse_ptr tp; 5535 uint64_t i, factor; 5536 CHUNK_ITEM_STRIPE* cis = (CHUNK_ITEM_STRIPE*)&c->chunk_item[1];; 5537 5538 TRACE("dropping chunk %I64x\n", c->offset); 5539 5540 if (c->chunk_item->type & BLOCK_FLAG_RAID0) 5541 factor = c->chunk_item->num_stripes; 5542 else if (c->chunk_item->type & BLOCK_FLAG_RAID10) 5543 factor = c->chunk_item->num_stripes / c->chunk_item->sub_stripes; 5544 else if (c->chunk_item->type & BLOCK_FLAG_RAID5) 5545 factor = c->chunk_item->num_stripes - 1; 5546 else if (c->chunk_item->type & BLOCK_FLAG_RAID6) 5547 factor = c->chunk_item->num_stripes - 2; 5548 else // SINGLE, DUPLICATE, RAID1, RAID1C3, RAID1C4 5549 factor = 1; 5550 5551 // do TRIM 5552 if (Vcb->trim && !Vcb->options.no_trim) { 5553 uint64_t len = c->chunk_item->size / factor; 5554 5555 for (i = 0; i < c->chunk_item->num_stripes; i++) { 5556 if (c->devices[i] && c->devices[i]->devobj && !c->devices[i]->readonly && c->devices[i]->trim) 5557 add_trim_entry_avoid_sb(Vcb, c->devices[i], cis[i].offset, len); 5558 } 5559 } 5560 5561 if (!c->cache) { 5562 Status = load_stored_free_space_cache(Vcb, c, true, Irp); 5563 5564 if (!NT_SUCCESS(Status) && Status != STATUS_NOT_FOUND) 5565 WARN("load_stored_free_space_cache returned %08lx\n", Status); 5566 } 5567 5568 // remove free space cache 5569 if (c->cache) { 5570 c->cache->deleted = true; 5571 5572 Status = excise_extents(Vcb, c->cache, 0, c->cache->inode_item.st_size, Irp, rollback); 5573 if (!NT_SUCCESS(Status)) { 5574 ERR("excise_extents returned %08lx\n", Status); 5575 return Status; 5576 } 5577 5578 Status = flush_fcb(c->cache, true, batchlist, Irp); 5579 5580 free_fcb(c->cache); 5581 5582 if (c->cache->refcount == 0) 5583 reap_fcb(c->cache); 5584 5585 if (!NT_SUCCESS(Status)) { 5586 ERR("flush_fcb returned %08lx\n", Status); 5587 return Status; 5588 } 5589 5590 searchkey.obj_id = FREE_SPACE_CACHE_ID; 5591 searchkey.obj_type = 0; 5592 searchkey.offset = c->offset; 5593 5594 Status = find_item(Vcb, Vcb->root_root, &tp, &searchkey, false, Irp); 5595 if (!NT_SUCCESS(Status)) { 5596 ERR("error - find_item returned %08lx\n", Status); 5597 return Status; 5598 } 5599 5600 if (!keycmp(tp.item->key, searchkey)) { 5601 Status = delete_tree_item(Vcb, &tp); 5602 if (!NT_SUCCESS(Status)) { 5603 ERR("delete_tree_item returned %08lx\n", Status); 5604 return Status; 5605 } 5606 } 5607 } 5608 5609 if (Vcb->space_root) { 5610 Status = insert_tree_item_batch(batchlist, Vcb, Vcb->space_root, c->offset, TYPE_FREE_SPACE_INFO, c->chunk_item->size, 5611 NULL, 0, Batch_DeleteFreeSpace); 5612 if (!NT_SUCCESS(Status)) { 5613 ERR("insert_tree_item_batch returned %08lx\n", Status); 5614 return Status; 5615 } 5616 } 5617 5618 for (i = 0; i < c->chunk_item->num_stripes; i++) { 5619 if (!c->created) { 5620 // remove DEV_EXTENTs from tree 4 5621 searchkey.obj_id = cis[i].dev_id; 5622 searchkey.obj_type = TYPE_DEV_EXTENT; 5623 searchkey.offset = cis[i].offset; 5624 5625 Status = find_item(Vcb, Vcb->dev_root, &tp, &searchkey, false, Irp); 5626 if (!NT_SUCCESS(Status)) { 5627 ERR("error - find_item returned %08lx\n", Status); 5628 return Status; 5629 } 5630 5631 if (!keycmp(tp.item->key, searchkey)) { 5632 Status = delete_tree_item(Vcb, &tp); 5633 if (!NT_SUCCESS(Status)) { 5634 ERR("delete_tree_item returned %08lx\n", Status); 5635 return Status; 5636 } 5637 5638 if (tp.item->size >= sizeof(DEV_EXTENT)) { 5639 DEV_EXTENT* de = (DEV_EXTENT*)tp.item->data; 5640 5641 c->devices[i]->devitem.bytes_used -= de->length; 5642 5643 if (Vcb->balance.thread && Vcb->balance.shrinking && Vcb->balance.opts[0].devid == c->devices[i]->devitem.dev_id) { 5644 if (cis[i].offset < Vcb->balance.opts[0].drange_start && cis[i].offset + de->length > Vcb->balance.opts[0].drange_start) 5645 space_list_add2(&c->devices[i]->space, NULL, cis[i].offset, Vcb->balance.opts[0].drange_start - cis[i].offset, NULL, rollback); 5646 } else 5647 space_list_add2(&c->devices[i]->space, NULL, cis[i].offset, de->length, NULL, rollback); 5648 } 5649 } else 5650 WARN("could not find (%I64x,%x,%I64x) in dev tree\n", searchkey.obj_id, searchkey.obj_type, searchkey.offset); 5651 } else { 5652 uint64_t len = c->chunk_item->size / factor; 5653 5654 c->devices[i]->devitem.bytes_used -= len; 5655 5656 if (Vcb->balance.thread && Vcb->balance.shrinking && Vcb->balance.opts[0].devid == c->devices[i]->devitem.dev_id) { 5657 if (cis[i].offset < Vcb->balance.opts[0].drange_start && cis[i].offset + len > Vcb->balance.opts[0].drange_start) 5658 space_list_add2(&c->devices[i]->space, NULL, cis[i].offset, Vcb->balance.opts[0].drange_start - cis[i].offset, NULL, rollback); 5659 } else 5660 space_list_add2(&c->devices[i]->space, NULL, cis[i].offset, len, NULL, rollback); 5661 } 5662 } 5663 5664 // modify DEV_ITEMs in chunk tree 5665 for (i = 0; i < c->chunk_item->num_stripes; i++) { 5666 if (c->devices[i]) { 5667 uint64_t j; 5668 DEV_ITEM* di; 5669 5670 searchkey.obj_id = 1; 5671 searchkey.obj_type = TYPE_DEV_ITEM; 5672 searchkey.offset = c->devices[i]->devitem.dev_id; 5673 5674 Status = find_item(Vcb, Vcb->chunk_root, &tp, &searchkey, false, Irp); 5675 if (!NT_SUCCESS(Status)) { 5676 ERR("error - find_item returned %08lx\n", Status); 5677 return Status; 5678 } 5679 5680 if (!keycmp(tp.item->key, searchkey)) { 5681 Status = delete_tree_item(Vcb, &tp); 5682 if (!NT_SUCCESS(Status)) { 5683 ERR("delete_tree_item returned %08lx\n", Status); 5684 return Status; 5685 } 5686 5687 di = ExAllocatePoolWithTag(PagedPool, sizeof(DEV_ITEM), ALLOC_TAG); 5688 if (!di) { 5689 ERR("out of memory\n"); 5690 return STATUS_INSUFFICIENT_RESOURCES; 5691 } 5692 5693 RtlCopyMemory(di, &c->devices[i]->devitem, sizeof(DEV_ITEM)); 5694 5695 Status = insert_tree_item(Vcb, Vcb->chunk_root, 1, TYPE_DEV_ITEM, c->devices[i]->devitem.dev_id, di, sizeof(DEV_ITEM), NULL, Irp); 5696 if (!NT_SUCCESS(Status)) { 5697 ERR("insert_tree_item returned %08lx\n", Status); 5698 return Status; 5699 } 5700 } 5701 5702 for (j = i + 1; j < c->chunk_item->num_stripes; j++) { 5703 if (c->devices[j] == c->devices[i]) 5704 c->devices[j] = NULL; 5705 } 5706 } 5707 } 5708 5709 if (!c->created) { 5710 // remove CHUNK_ITEM from chunk tree 5711 searchkey.obj_id = 0x100; 5712 searchkey.obj_type = TYPE_CHUNK_ITEM; 5713 searchkey.offset = c->offset; 5714 5715 Status = find_item(Vcb, Vcb->chunk_root, &tp, &searchkey, false, Irp); 5716 if (!NT_SUCCESS(Status)) { 5717 ERR("error - find_item returned %08lx\n", Status); 5718 return Status; 5719 } 5720 5721 if (!keycmp(tp.item->key, searchkey)) { 5722 Status = delete_tree_item(Vcb, &tp); 5723 5724 if (!NT_SUCCESS(Status)) { 5725 ERR("delete_tree_item returned %08lx\n", Status); 5726 return Status; 5727 } 5728 } else 5729 WARN("could not find CHUNK_ITEM for chunk %I64x\n", c->offset); 5730 5731 // remove BLOCK_GROUP_ITEM from extent tree 5732 searchkey.obj_id = c->offset; 5733 searchkey.obj_type = TYPE_BLOCK_GROUP_ITEM; 5734 searchkey.offset = 0xffffffffffffffff; 5735 5736 Status = find_item(Vcb, Vcb->extent_root, &tp, &searchkey, false, Irp); 5737 if (!NT_SUCCESS(Status)) { 5738 ERR("error - find_item returned %08lx\n", Status); 5739 return Status; 5740 } 5741 5742 if (tp.item->key.obj_id == searchkey.obj_id && tp.item->key.obj_type == searchkey.obj_type) { 5743 Status = delete_tree_item(Vcb, &tp); 5744 5745 if (!NT_SUCCESS(Status)) { 5746 ERR("delete_tree_item returned %08lx\n", Status); 5747 return Status; 5748 } 5749 } else 5750 WARN("could not find BLOCK_GROUP_ITEM for chunk %I64x\n", c->offset); 5751 } 5752 5753 if (c->chunk_item->type & BLOCK_FLAG_SYSTEM) 5754 remove_from_bootstrap(Vcb, 0x100, TYPE_CHUNK_ITEM, c->offset); 5755 5756 RemoveEntryList(&c->list_entry); 5757 5758 // clear raid56 incompat flag if dropping last RAID5/6 chunk 5759 5760 if (c->chunk_item->type & BLOCK_FLAG_RAID5 || c->chunk_item->type & BLOCK_FLAG_RAID6) { 5761 LIST_ENTRY* le; 5762 bool clear_flag = true; 5763 5764 le = Vcb->chunks.Flink; 5765 while (le != &Vcb->chunks) { 5766 chunk* c2 = CONTAINING_RECORD(le, chunk, list_entry); 5767 5768 if (c2->chunk_item->type & BLOCK_FLAG_RAID5 || c2->chunk_item->type & BLOCK_FLAG_RAID6) { 5769 clear_flag = false; 5770 break; 5771 } 5772 5773 le = le->Flink; 5774 } 5775 5776 if (clear_flag) 5777 Vcb->superblock.incompat_flags &= ~BTRFS_INCOMPAT_FLAGS_RAID56; 5778 } 5779 5780 // clear raid1c34 incompat flag if dropping last RAID5/6 chunk 5781 5782 if (c->chunk_item->type & BLOCK_FLAG_RAID1C3 || c->chunk_item->type & BLOCK_FLAG_RAID1C4) { 5783 LIST_ENTRY* le; 5784 bool clear_flag = true; 5785 5786 le = Vcb->chunks.Flink; 5787 while (le != &Vcb->chunks) { 5788 chunk* c2 = CONTAINING_RECORD(le, chunk, list_entry); 5789 5790 if (c2->chunk_item->type & BLOCK_FLAG_RAID1C3 || c2->chunk_item->type & BLOCK_FLAG_RAID1C4) { 5791 clear_flag = false; 5792 break; 5793 } 5794 5795 le = le->Flink; 5796 } 5797 5798 if (clear_flag) 5799 Vcb->superblock.incompat_flags &= ~BTRFS_INCOMPAT_FLAGS_RAID1C34; 5800 } 5801 5802 Vcb->superblock.bytes_used -= c->oldused; 5803 5804 ExFreePool(c->chunk_item); 5805 ExFreePool(c->devices); 5806 5807 while (!IsListEmpty(&c->space)) { 5808 space* s = CONTAINING_RECORD(c->space.Flink, space, list_entry); 5809 5810 RemoveEntryList(&s->list_entry); 5811 ExFreePool(s); 5812 } 5813 5814 while (!IsListEmpty(&c->deleting)) { 5815 space* s = CONTAINING_RECORD(c->deleting.Flink, space, list_entry); 5816 5817 RemoveEntryList(&s->list_entry); 5818 ExFreePool(s); 5819 } 5820 5821 release_chunk_lock(c, Vcb); 5822 5823 ExDeleteResourceLite(&c->partial_stripes_lock); 5824 ExDeleteResourceLite(&c->range_locks_lock); 5825 ExDeleteResourceLite(&c->lock); 5826 ExDeleteResourceLite(&c->changed_extents_lock); 5827 5828 ExFreePool(c); 5829 5830 return STATUS_SUCCESS; 5831 } 5832 5833 static NTSTATUS partial_stripe_read(device_extension* Vcb, chunk* c, partial_stripe* ps, uint64_t startoff, uint16_t parity, ULONG offset, ULONG len) { 5834 NTSTATUS Status; 5835 ULONG sl = (ULONG)(c->chunk_item->stripe_length >> Vcb->sector_shift); 5836 CHUNK_ITEM_STRIPE* cis = (CHUNK_ITEM_STRIPE*)&c->chunk_item[1]; 5837 5838 while (len > 0) { 5839 ULONG readlen = min(offset + len, offset + (sl - (offset % sl))) - offset; 5840 uint16_t stripe; 5841 5842 stripe = (parity + (offset / sl) + 1) % c->chunk_item->num_stripes; 5843 5844 if (c->devices[stripe]->devobj) { 5845 Status = sync_read_phys(c->devices[stripe]->devobj, c->devices[stripe]->fileobj, cis[stripe].offset + startoff + ((offset % sl) << Vcb->sector_shift), 5846 readlen << Vcb->sector_shift, ps->data + (offset << Vcb->sector_shift), false); 5847 if (!NT_SUCCESS(Status)) { 5848 ERR("sync_read_phys returned %08lx\n", Status); 5849 return Status; 5850 } 5851 } else if (c->chunk_item->type & BLOCK_FLAG_RAID5) { 5852 uint16_t i; 5853 uint8_t* scratch; 5854 5855 scratch = ExAllocatePoolWithTag(NonPagedPool, readlen << Vcb->sector_shift, ALLOC_TAG); 5856 if (!scratch) { 5857 ERR("out of memory\n"); 5858 return STATUS_INSUFFICIENT_RESOURCES; 5859 } 5860 5861 for (i = 0; i < c->chunk_item->num_stripes; i++) { 5862 if (i != stripe) { 5863 if (!c->devices[i]->devobj) { 5864 ExFreePool(scratch); 5865 return STATUS_UNEXPECTED_IO_ERROR; 5866 } 5867 5868 if (i == 0 || (stripe == 0 && i == 1)) { 5869 Status = sync_read_phys(c->devices[i]->devobj, c->devices[i]->fileobj, cis[i].offset + startoff + ((offset % sl) << Vcb->sector_shift), 5870 readlen << Vcb->sector_shift, ps->data + (offset << Vcb->sector_shift), false); 5871 if (!NT_SUCCESS(Status)) { 5872 ERR("sync_read_phys returned %08lx\n", Status); 5873 ExFreePool(scratch); 5874 return Status; 5875 } 5876 } else { 5877 Status = sync_read_phys(c->devices[i]->devobj, c->devices[i]->fileobj, cis[i].offset + startoff + ((offset % sl) << Vcb->sector_shift), 5878 readlen << Vcb->sector_shift, scratch, false); 5879 if (!NT_SUCCESS(Status)) { 5880 ERR("sync_read_phys returned %08lx\n", Status); 5881 ExFreePool(scratch); 5882 return Status; 5883 } 5884 5885 do_xor(ps->data + (offset << Vcb->sector_shift), scratch, readlen << Vcb->sector_shift); 5886 } 5887 } 5888 } 5889 5890 ExFreePool(scratch); 5891 } else { 5892 uint8_t* scratch; 5893 uint16_t k, i, logstripe, error_stripe, num_errors = 0; 5894 5895 scratch = ExAllocatePoolWithTag(NonPagedPool, (c->chunk_item->num_stripes + 2) * readlen << Vcb->sector_shift, ALLOC_TAG); 5896 if (!scratch) { 5897 ERR("out of memory\n"); 5898 return STATUS_INSUFFICIENT_RESOURCES; 5899 } 5900 5901 i = (parity + 1) % c->chunk_item->num_stripes; 5902 logstripe = (c->chunk_item->num_stripes + c->chunk_item->num_stripes - 1 - parity + stripe) % c->chunk_item->num_stripes; 5903 5904 for (k = 0; k < c->chunk_item->num_stripes; k++) { 5905 if (i != stripe) { 5906 if (c->devices[i]->devobj) { 5907 Status = sync_read_phys(c->devices[i]->devobj, c->devices[i]->fileobj, cis[i].offset + startoff + ((offset % sl) << Vcb->sector_shift), 5908 readlen << Vcb->sector_shift, scratch + (k * readlen << Vcb->sector_shift), false); 5909 if (!NT_SUCCESS(Status)) { 5910 ERR("sync_read_phys returned %08lx\n", Status); 5911 num_errors++; 5912 error_stripe = k; 5913 } 5914 } else { 5915 num_errors++; 5916 error_stripe = k; 5917 } 5918 5919 if (num_errors > 1) { 5920 ExFreePool(scratch); 5921 return STATUS_UNEXPECTED_IO_ERROR; 5922 } 5923 } 5924 5925 i = (i + 1) % c->chunk_item->num_stripes; 5926 } 5927 5928 if (num_errors == 0 || error_stripe == c->chunk_item->num_stripes - 1) { 5929 for (k = 0; k < c->chunk_item->num_stripes - 1; k++) { 5930 if (k != logstripe) { 5931 if (k == 0 || (k == 1 && logstripe == 0)) { 5932 RtlCopyMemory(ps->data + (offset << Vcb->sector_shift), scratch + (k * readlen << Vcb->sector_shift), 5933 readlen << Vcb->sector_shift); 5934 } else { 5935 do_xor(ps->data + (offset << Vcb->sector_shift), scratch + (k * readlen << Vcb->sector_shift), 5936 readlen << Vcb->sector_shift); 5937 } 5938 } 5939 } 5940 } else { 5941 raid6_recover2(scratch, c->chunk_item->num_stripes, readlen << Vcb->sector_shift, logstripe, 5942 error_stripe, scratch + (c->chunk_item->num_stripes * readlen << Vcb->sector_shift)); 5943 5944 RtlCopyMemory(ps->data + (offset << Vcb->sector_shift), scratch + (c->chunk_item->num_stripes * readlen << Vcb->sector_shift), 5945 readlen << Vcb->sector_shift); 5946 } 5947 5948 ExFreePool(scratch); 5949 } 5950 5951 offset += readlen; 5952 len -= readlen; 5953 } 5954 5955 return STATUS_SUCCESS; 5956 } 5957 5958 NTSTATUS flush_partial_stripe(device_extension* Vcb, chunk* c, partial_stripe* ps) { 5959 NTSTATUS Status; 5960 uint16_t parity2, stripe, startoffstripe; 5961 uint8_t* data; 5962 uint64_t startoff; 5963 ULONG runlength, index, last1; 5964 CHUNK_ITEM_STRIPE* cis = (CHUNK_ITEM_STRIPE*)&c->chunk_item[1]; 5965 LIST_ENTRY* le; 5966 uint16_t k, num_data_stripes = c->chunk_item->num_stripes - (c->chunk_item->type & BLOCK_FLAG_RAID5 ? 1 : 2); 5967 uint64_t ps_length = num_data_stripes * c->chunk_item->stripe_length; 5968 ULONG stripe_length = (ULONG)c->chunk_item->stripe_length; 5969 5970 // FIXME - do writes asynchronously? 5971 5972 get_raid0_offset(ps->address - c->offset, stripe_length, num_data_stripes, &startoff, &startoffstripe); 5973 5974 parity2 = (((ps->address - c->offset) / ps_length) + c->chunk_item->num_stripes - 1) % c->chunk_item->num_stripes; 5975 5976 // read data (or reconstruct if degraded) 5977 5978 runlength = RtlFindFirstRunClear(&ps->bmp, &index); 5979 last1 = 0; 5980 5981 while (runlength != 0) { 5982 if (index >= ps->bmplen) 5983 break; 5984 5985 if (index + runlength >= ps->bmplen) { 5986 runlength = ps->bmplen - index; 5987 5988 if (runlength == 0) 5989 break; 5990 } 5991 5992 if (index > last1) { 5993 Status = partial_stripe_read(Vcb, c, ps, startoff, parity2, last1, index - last1); 5994 if (!NT_SUCCESS(Status)) { 5995 ERR("partial_stripe_read returned %08lx\n", Status); 5996 return Status; 5997 } 5998 } 5999 6000 last1 = index + runlength; 6001 6002 runlength = RtlFindNextForwardRunClear(&ps->bmp, index + runlength, &index); 6003 } 6004 6005 if (last1 < ps_length >> Vcb->sector_shift) { 6006 Status = partial_stripe_read(Vcb, c, ps, startoff, parity2, last1, (ULONG)((ps_length >> Vcb->sector_shift) - last1)); 6007 if (!NT_SUCCESS(Status)) { 6008 ERR("partial_stripe_read returned %08lx\n", Status); 6009 return Status; 6010 } 6011 } 6012 6013 // set unallocated data to 0 6014 le = c->space.Flink; 6015 while (le != &c->space) { 6016 space* s = CONTAINING_RECORD(le, space, list_entry); 6017 6018 if (s->address + s->size > ps->address && s->address < ps->address + ps_length) { 6019 uint64_t start = max(ps->address, s->address); 6020 uint64_t end = min(ps->address + ps_length, s->address + s->size); 6021 6022 RtlZeroMemory(ps->data + start - ps->address, (ULONG)(end - start)); 6023 } else if (s->address >= ps->address + ps_length) 6024 break; 6025 6026 le = le->Flink; 6027 } 6028 6029 le = c->deleting.Flink; 6030 while (le != &c->deleting) { 6031 space* s = CONTAINING_RECORD(le, space, list_entry); 6032 6033 if (s->address + s->size > ps->address && s->address < ps->address + ps_length) { 6034 uint64_t start = max(ps->address, s->address); 6035 uint64_t end = min(ps->address + ps_length, s->address + s->size); 6036 6037 RtlZeroMemory(ps->data + start - ps->address, (ULONG)(end - start)); 6038 } else if (s->address >= ps->address + ps_length) 6039 break; 6040 6041 le = le->Flink; 6042 } 6043 6044 stripe = (parity2 + 1) % c->chunk_item->num_stripes; 6045 6046 data = ps->data; 6047 for (k = 0; k < num_data_stripes; k++) { 6048 if (c->devices[stripe]->devobj) { 6049 Status = write_data_phys(c->devices[stripe]->devobj, c->devices[stripe]->fileobj, cis[stripe].offset + startoff, data, stripe_length); 6050 if (!NT_SUCCESS(Status)) { 6051 ERR("write_data_phys returned %08lx\n", Status); 6052 return Status; 6053 } 6054 } 6055 6056 data += stripe_length; 6057 stripe = (stripe + 1) % c->chunk_item->num_stripes; 6058 } 6059 6060 // write parity 6061 if (c->chunk_item->type & BLOCK_FLAG_RAID5) { 6062 if (c->devices[parity2]->devobj) { 6063 uint16_t i; 6064 6065 for (i = 1; i < c->chunk_item->num_stripes - 1; i++) { 6066 do_xor(ps->data, ps->data + (i * stripe_length), stripe_length); 6067 } 6068 6069 Status = write_data_phys(c->devices[parity2]->devobj, c->devices[parity2]->fileobj, cis[parity2].offset + startoff, ps->data, stripe_length); 6070 if (!NT_SUCCESS(Status)) { 6071 ERR("write_data_phys returned %08lx\n", Status); 6072 return Status; 6073 } 6074 } 6075 } else { 6076 uint16_t parity1 = (parity2 + c->chunk_item->num_stripes - 1) % c->chunk_item->num_stripes; 6077 6078 if (c->devices[parity1]->devobj || c->devices[parity2]->devobj) { 6079 uint8_t* scratch; 6080 uint16_t i; 6081 6082 scratch = ExAllocatePoolWithTag(NonPagedPool, stripe_length * 2, ALLOC_TAG); 6083 if (!scratch) { 6084 ERR("out of memory\n"); 6085 return STATUS_INSUFFICIENT_RESOURCES; 6086 } 6087 6088 i = c->chunk_item->num_stripes - 3; 6089 6090 while (true) { 6091 if (i == c->chunk_item->num_stripes - 3) { 6092 RtlCopyMemory(scratch, ps->data + (i * stripe_length), stripe_length); 6093 RtlCopyMemory(scratch + stripe_length, ps->data + (i * stripe_length), stripe_length); 6094 } else { 6095 do_xor(scratch, ps->data + (i * stripe_length), stripe_length); 6096 6097 galois_double(scratch + stripe_length, stripe_length); 6098 do_xor(scratch + stripe_length, ps->data + (i * stripe_length), stripe_length); 6099 } 6100 6101 if (i == 0) 6102 break; 6103 6104 i--; 6105 } 6106 6107 if (c->devices[parity1]->devobj) { 6108 Status = write_data_phys(c->devices[parity1]->devobj, c->devices[parity1]->fileobj, cis[parity1].offset + startoff, scratch, stripe_length); 6109 if (!NT_SUCCESS(Status)) { 6110 ERR("write_data_phys returned %08lx\n", Status); 6111 ExFreePool(scratch); 6112 return Status; 6113 } 6114 } 6115 6116 if (c->devices[parity2]->devobj) { 6117 Status = write_data_phys(c->devices[parity2]->devobj, c->devices[parity2]->fileobj, cis[parity2].offset + startoff, 6118 scratch + stripe_length, stripe_length); 6119 if (!NT_SUCCESS(Status)) { 6120 ERR("write_data_phys returned %08lx\n", Status); 6121 ExFreePool(scratch); 6122 return Status; 6123 } 6124 } 6125 6126 ExFreePool(scratch); 6127 } 6128 } 6129 6130 return STATUS_SUCCESS; 6131 } 6132 6133 static NTSTATUS update_chunks(device_extension* Vcb, LIST_ENTRY* batchlist, PIRP Irp, LIST_ENTRY* rollback) { 6134 LIST_ENTRY *le, *le2; 6135 NTSTATUS Status; 6136 uint64_t used_minus_cache; 6137 6138 ExAcquireResourceExclusiveLite(&Vcb->chunk_lock, true); 6139 6140 // FIXME - do tree chunks before data chunks 6141 6142 le = Vcb->chunks.Flink; 6143 while (le != &Vcb->chunks) { 6144 chunk* c = CONTAINING_RECORD(le, chunk, list_entry); 6145 6146 le2 = le->Flink; 6147 6148 if (c->changed) { 6149 acquire_chunk_lock(c, Vcb); 6150 6151 // flush partial stripes 6152 if (!Vcb->readonly && (c->chunk_item->type & BLOCK_FLAG_RAID5 || c->chunk_item->type & BLOCK_FLAG_RAID6)) { 6153 ExAcquireResourceExclusiveLite(&c->partial_stripes_lock, true); 6154 6155 while (!IsListEmpty(&c->partial_stripes)) { 6156 partial_stripe* ps = CONTAINING_RECORD(RemoveHeadList(&c->partial_stripes), partial_stripe, list_entry); 6157 6158 Status = flush_partial_stripe(Vcb, c, ps); 6159 6160 if (ps->bmparr) 6161 ExFreePool(ps->bmparr); 6162 6163 ExFreePool(ps); 6164 6165 if (!NT_SUCCESS(Status)) { 6166 ERR("flush_partial_stripe returned %08lx\n", Status); 6167 ExReleaseResourceLite(&c->partial_stripes_lock); 6168 release_chunk_lock(c, Vcb); 6169 ExReleaseResourceLite(&Vcb->chunk_lock); 6170 return Status; 6171 } 6172 } 6173 6174 ExReleaseResourceLite(&c->partial_stripes_lock); 6175 } 6176 6177 if (c->list_entry_balance.Flink) { 6178 release_chunk_lock(c, Vcb); 6179 le = le2; 6180 continue; 6181 } 6182 6183 if (c->space_changed || c->created) { 6184 bool created = c->created; 6185 6186 used_minus_cache = c->used; 6187 6188 // subtract self-hosted cache 6189 if (used_minus_cache > 0 && c->chunk_item->type & BLOCK_FLAG_DATA && c->cache && c->cache->inode_item.st_size == c->used) { 6190 LIST_ENTRY* le3; 6191 6192 le3 = c->cache->extents.Flink; 6193 while (le3 != &c->cache->extents) { 6194 extent* ext = CONTAINING_RECORD(le3, extent, list_entry); 6195 EXTENT_DATA* ed = &ext->extent_data; 6196 6197 if (!ext->ignore) { 6198 if (ed->type == EXTENT_TYPE_REGULAR || ed->type == EXTENT_TYPE_PREALLOC) { 6199 EXTENT_DATA2* ed2 = (EXTENT_DATA2*)ed->data; 6200 6201 if (ed2->size != 0 && ed2->address >= c->offset && ed2->address + ed2->size <= c->offset + c->chunk_item->size) 6202 used_minus_cache -= ed2->size; 6203 } 6204 } 6205 6206 le3 = le3->Flink; 6207 } 6208 } 6209 6210 if (used_minus_cache == 0) { 6211 Status = drop_chunk(Vcb, c, batchlist, Irp, rollback); 6212 if (!NT_SUCCESS(Status)) { 6213 ERR("drop_chunk returned %08lx\n", Status); 6214 release_chunk_lock(c, Vcb); 6215 ExReleaseResourceLite(&Vcb->chunk_lock); 6216 return Status; 6217 } 6218 6219 // c is now freed, so avoid releasing non-existent lock 6220 le = le2; 6221 continue; 6222 } else if (c->created) { 6223 Status = create_chunk(Vcb, c, Irp); 6224 if (!NT_SUCCESS(Status)) { 6225 ERR("create_chunk returned %08lx\n", Status); 6226 release_chunk_lock(c, Vcb); 6227 ExReleaseResourceLite(&Vcb->chunk_lock); 6228 return Status; 6229 } 6230 } 6231 6232 if (used_minus_cache > 0 || created) 6233 release_chunk_lock(c, Vcb); 6234 } else 6235 release_chunk_lock(c, Vcb); 6236 } 6237 6238 le = le2; 6239 } 6240 6241 ExReleaseResourceLite(&Vcb->chunk_lock); 6242 6243 return STATUS_SUCCESS; 6244 } 6245 6246 static NTSTATUS delete_root_ref(device_extension* Vcb, uint64_t subvolid, uint64_t parsubvolid, uint64_t parinode, PANSI_STRING utf8, PIRP Irp) { 6247 KEY searchkey; 6248 traverse_ptr tp; 6249 NTSTATUS Status; 6250 6251 searchkey.obj_id = parsubvolid; 6252 searchkey.obj_type = TYPE_ROOT_REF; 6253 searchkey.offset = subvolid; 6254 6255 Status = find_item(Vcb, Vcb->root_root, &tp, &searchkey, false, Irp); 6256 if (!NT_SUCCESS(Status)) { 6257 ERR("error - find_item returned %08lx\n", Status); 6258 return Status; 6259 } 6260 6261 if (!keycmp(searchkey, tp.item->key)) { 6262 if (tp.item->size < sizeof(ROOT_REF)) { 6263 ERR("(%I64x,%x,%I64x) was %u bytes, expected at least %Iu\n", tp.item->key.obj_id, tp.item->key.obj_type, tp.item->key.offset, tp.item->size, sizeof(ROOT_REF)); 6264 return STATUS_INTERNAL_ERROR; 6265 } else { 6266 ROOT_REF* rr; 6267 ULONG len; 6268 6269 rr = (ROOT_REF*)tp.item->data; 6270 len = tp.item->size; 6271 6272 do { 6273 uint16_t itemlen; 6274 6275 if (len < sizeof(ROOT_REF) || len < offsetof(ROOT_REF, name[0]) + rr->n) { 6276 ERR("(%I64x,%x,%I64x) was truncated\n", tp.item->key.obj_id, tp.item->key.obj_type, tp.item->key.offset); 6277 break; 6278 } 6279 6280 itemlen = (uint16_t)offsetof(ROOT_REF, name[0]) + rr->n; 6281 6282 if (rr->dir == parinode && rr->n == utf8->Length && RtlCompareMemory(rr->name, utf8->Buffer, rr->n) == rr->n) { 6283 uint16_t newlen = tp.item->size - itemlen; 6284 6285 Status = delete_tree_item(Vcb, &tp); 6286 if (!NT_SUCCESS(Status)) { 6287 ERR("delete_tree_item returned %08lx\n", Status); 6288 return Status; 6289 } 6290 6291 if (newlen == 0) { 6292 TRACE("deleting (%I64x,%x,%I64x)\n", tp.item->key.obj_id, tp.item->key.obj_type, tp.item->key.offset); 6293 } else { 6294 uint8_t *newrr = ExAllocatePoolWithTag(PagedPool, newlen, ALLOC_TAG), *rroff; 6295 6296 if (!newrr) { 6297 ERR("out of memory\n"); 6298 return STATUS_INSUFFICIENT_RESOURCES; 6299 } 6300 6301 TRACE("modifying (%I64x,%x,%I64x)\n", tp.item->key.obj_id, tp.item->key.obj_type, tp.item->key.offset); 6302 6303 if ((uint8_t*)rr > tp.item->data) { 6304 RtlCopyMemory(newrr, tp.item->data, (uint8_t*)rr - tp.item->data); 6305 rroff = newrr + ((uint8_t*)rr - tp.item->data); 6306 } else { 6307 rroff = newrr; 6308 } 6309 6310 if ((uint8_t*)&rr->name[rr->n] < tp.item->data + tp.item->size) 6311 RtlCopyMemory(rroff, &rr->name[rr->n], tp.item->size - ((uint8_t*)&rr->name[rr->n] - tp.item->data)); 6312 6313 Status = insert_tree_item(Vcb, Vcb->root_root, tp.item->key.obj_id, tp.item->key.obj_type, tp.item->key.offset, newrr, newlen, NULL, Irp); 6314 if (!NT_SUCCESS(Status)) { 6315 ERR("insert_tree_item returned %08lx\n", Status); 6316 ExFreePool(newrr); 6317 return Status; 6318 } 6319 } 6320 6321 break; 6322 } 6323 6324 if (len > itemlen) { 6325 len -= itemlen; 6326 rr = (ROOT_REF*)&rr->name[rr->n]; 6327 } else 6328 break; 6329 } while (len > 0); 6330 } 6331 } else { 6332 WARN("could not find ROOT_REF entry for subvol %I64x in %I64x\n", searchkey.offset, searchkey.obj_id); 6333 return STATUS_NOT_FOUND; 6334 } 6335 6336 return STATUS_SUCCESS; 6337 } 6338 6339 #ifdef _MSC_VER 6340 #pragma warning(push) 6341 #pragma warning(suppress: 28194) 6342 #endif 6343 static NTSTATUS add_root_ref(_In_ device_extension* Vcb, _In_ uint64_t subvolid, _In_ uint64_t parsubvolid, _In_ __drv_aliasesMem ROOT_REF* rr, _In_opt_ PIRP Irp) { 6344 KEY searchkey; 6345 traverse_ptr tp; 6346 NTSTATUS Status; 6347 6348 searchkey.obj_id = parsubvolid; 6349 searchkey.obj_type = TYPE_ROOT_REF; 6350 searchkey.offset = subvolid; 6351 6352 Status = find_item(Vcb, Vcb->root_root, &tp, &searchkey, false, Irp); 6353 if (!NT_SUCCESS(Status)) { 6354 ERR("error - find_item returned %08lx\n", Status); 6355 return Status; 6356 } 6357 6358 if (!keycmp(searchkey, tp.item->key)) { 6359 uint16_t rrsize = tp.item->size + (uint16_t)offsetof(ROOT_REF, name[0]) + rr->n; 6360 uint8_t* rr2; 6361 6362 rr2 = ExAllocatePoolWithTag(PagedPool, rrsize, ALLOC_TAG); 6363 if (!rr2) { 6364 ERR("out of memory\n"); 6365 return STATUS_INSUFFICIENT_RESOURCES; 6366 } 6367 6368 if (tp.item->size > 0) 6369 RtlCopyMemory(rr2, tp.item->data, tp.item->size); 6370 6371 RtlCopyMemory(rr2 + tp.item->size, rr, offsetof(ROOT_REF, name[0]) + rr->n); 6372 ExFreePool(rr); 6373 6374 Status = delete_tree_item(Vcb, &tp); 6375 if (!NT_SUCCESS(Status)) { 6376 ERR("delete_tree_item returned %08lx\n", Status); 6377 ExFreePool(rr2); 6378 return Status; 6379 } 6380 6381 Status = insert_tree_item(Vcb, Vcb->root_root, searchkey.obj_id, searchkey.obj_type, searchkey.offset, rr2, rrsize, NULL, Irp); 6382 if (!NT_SUCCESS(Status)) { 6383 ERR("insert_tree_item returned %08lx\n", Status); 6384 ExFreePool(rr2); 6385 return Status; 6386 } 6387 } else { 6388 Status = insert_tree_item(Vcb, Vcb->root_root, searchkey.obj_id, searchkey.obj_type, searchkey.offset, rr, (uint16_t)offsetof(ROOT_REF, name[0]) + rr->n, NULL, Irp); 6389 if (!NT_SUCCESS(Status)) { 6390 ERR("insert_tree_item returned %08lx\n", Status); 6391 ExFreePool(rr); 6392 return Status; 6393 } 6394 } 6395 6396 return STATUS_SUCCESS; 6397 } 6398 #ifdef _MSC_VER 6399 #pragma warning(pop) 6400 #endif 6401 6402 static NTSTATUS update_root_backref(device_extension* Vcb, uint64_t subvolid, uint64_t parsubvolid, PIRP Irp) { 6403 KEY searchkey; 6404 traverse_ptr tp; 6405 uint8_t* data; 6406 uint16_t datalen; 6407 NTSTATUS Status; 6408 6409 searchkey.obj_id = parsubvolid; 6410 searchkey.obj_type = TYPE_ROOT_REF; 6411 searchkey.offset = subvolid; 6412 6413 Status = find_item(Vcb, Vcb->root_root, &tp, &searchkey, false, Irp); 6414 if (!NT_SUCCESS(Status)) { 6415 ERR("error - find_item returned %08lx\n", Status); 6416 return Status; 6417 } 6418 6419 if (!keycmp(tp.item->key, searchkey) && tp.item->size > 0) { 6420 datalen = tp.item->size; 6421 6422 data = ExAllocatePoolWithTag(PagedPool, datalen, ALLOC_TAG); 6423 if (!data) { 6424 ERR("out of memory\n"); 6425 return STATUS_INSUFFICIENT_RESOURCES; 6426 } 6427 6428 RtlCopyMemory(data, tp.item->data, datalen); 6429 } else { 6430 datalen = 0; 6431 data = NULL; 6432 } 6433 6434 searchkey.obj_id = subvolid; 6435 searchkey.obj_type = TYPE_ROOT_BACKREF; 6436 searchkey.offset = parsubvolid; 6437 6438 Status = find_item(Vcb, Vcb->root_root, &tp, &searchkey, false, Irp); 6439 if (!NT_SUCCESS(Status)) { 6440 ERR("error - find_item returned %08lx\n", Status); 6441 6442 if (datalen > 0) 6443 ExFreePool(data); 6444 6445 return Status; 6446 } 6447 6448 if (!keycmp(tp.item->key, searchkey)) { 6449 Status = delete_tree_item(Vcb, &tp); 6450 if (!NT_SUCCESS(Status)) { 6451 ERR("delete_tree_item returned %08lx\n", Status); 6452 6453 if (datalen > 0) 6454 ExFreePool(data); 6455 6456 return Status; 6457 } 6458 } 6459 6460 if (datalen > 0) { 6461 Status = insert_tree_item(Vcb, Vcb->root_root, subvolid, TYPE_ROOT_BACKREF, parsubvolid, data, datalen, NULL, Irp); 6462 if (!NT_SUCCESS(Status)) { 6463 ERR("insert_tree_item returned %08lx\n", Status); 6464 ExFreePool(data); 6465 return Status; 6466 } 6467 } 6468 6469 return STATUS_SUCCESS; 6470 } 6471 6472 static NTSTATUS add_root_item_to_cache(device_extension* Vcb, uint64_t root, PIRP Irp) { 6473 KEY searchkey; 6474 traverse_ptr tp; 6475 NTSTATUS Status; 6476 6477 searchkey.obj_id = root; 6478 searchkey.obj_type = TYPE_ROOT_ITEM; 6479 searchkey.offset = 0xffffffffffffffff; 6480 6481 Status = find_item(Vcb, Vcb->root_root, &tp, &searchkey, false, Irp); 6482 if (!NT_SUCCESS(Status)) { 6483 ERR("error - find_item returned %08lx\n", Status); 6484 return Status; 6485 } 6486 6487 if (tp.item->key.obj_id != searchkey.obj_id || tp.item->key.obj_type != searchkey.obj_type) { 6488 ERR("could not find ROOT_ITEM for tree %I64x\n", searchkey.obj_id); 6489 return STATUS_INTERNAL_ERROR; 6490 } 6491 6492 if (tp.item->size < sizeof(ROOT_ITEM)) { // if not full length, create new entry with new bits zeroed 6493 ROOT_ITEM* ri = ExAllocatePoolWithTag(PagedPool, sizeof(ROOT_ITEM), ALLOC_TAG); 6494 if (!ri) { 6495 ERR("out of memory\n"); 6496 return STATUS_INSUFFICIENT_RESOURCES; 6497 } 6498 6499 if (tp.item->size > 0) 6500 RtlCopyMemory(ri, tp.item->data, tp.item->size); 6501 6502 RtlZeroMemory(((uint8_t*)ri) + tp.item->size, sizeof(ROOT_ITEM) - tp.item->size); 6503 6504 Status = delete_tree_item(Vcb, &tp); 6505 if (!NT_SUCCESS(Status)) { 6506 ERR("delete_tree_item returned %08lx\n", Status); 6507 ExFreePool(ri); 6508 return Status; 6509 } 6510 6511 Status = insert_tree_item(Vcb, Vcb->root_root, searchkey.obj_id, searchkey.obj_type, tp.item->key.offset, ri, sizeof(ROOT_ITEM), NULL, Irp); 6512 if (!NT_SUCCESS(Status)) { 6513 ERR("insert_tree_item returned %08lx\n", Status); 6514 ExFreePool(ri); 6515 return Status; 6516 } 6517 } else { 6518 tp.tree->write = true; 6519 } 6520 6521 return STATUS_SUCCESS; 6522 } 6523 6524 static NTSTATUS flush_fileref(file_ref* fileref, LIST_ENTRY* batchlist, PIRP Irp) { 6525 NTSTATUS Status; 6526 6527 // if fileref created and then immediately deleted, do nothing 6528 if (fileref->created && fileref->deleted) { 6529 fileref->dirty = false; 6530 return STATUS_SUCCESS; 6531 } 6532 6533 if (fileref->fcb->ads) { 6534 fileref->dirty = false; 6535 return STATUS_SUCCESS; 6536 } 6537 6538 if (fileref->created) { 6539 uint16_t disize; 6540 DIR_ITEM *di, *di2; 6541 uint32_t crc32; 6542 6543 crc32 = calc_crc32c(0xfffffffe, (uint8_t*)fileref->dc->utf8.Buffer, fileref->dc->utf8.Length); 6544 6545 disize = (uint16_t)(offsetof(DIR_ITEM, name[0]) + fileref->dc->utf8.Length); 6546 di = ExAllocatePoolWithTag(PagedPool, disize, ALLOC_TAG); 6547 if (!di) { 6548 ERR("out of memory\n"); 6549 return STATUS_INSUFFICIENT_RESOURCES; 6550 } 6551 6552 if (fileref->parent->fcb->subvol == fileref->fcb->subvol) { 6553 di->key.obj_id = fileref->fcb->inode; 6554 di->key.obj_type = TYPE_INODE_ITEM; 6555 di->key.offset = 0; 6556 } else { // subvolume 6557 di->key.obj_id = fileref->fcb->subvol->id; 6558 di->key.obj_type = TYPE_ROOT_ITEM; 6559 di->key.offset = 0xffffffffffffffff; 6560 } 6561 6562 di->transid = fileref->fcb->Vcb->superblock.generation; 6563 di->m = 0; 6564 di->n = (uint16_t)fileref->dc->utf8.Length; 6565 di->type = fileref->fcb->type; 6566 RtlCopyMemory(di->name, fileref->dc->utf8.Buffer, fileref->dc->utf8.Length); 6567 6568 di2 = ExAllocatePoolWithTag(PagedPool, disize, ALLOC_TAG); 6569 if (!di2) { 6570 ERR("out of memory\n"); 6571 return STATUS_INSUFFICIENT_RESOURCES; 6572 } 6573 6574 RtlCopyMemory(di2, di, disize); 6575 6576 Status = insert_tree_item_batch(batchlist, fileref->fcb->Vcb, fileref->parent->fcb->subvol, fileref->parent->fcb->inode, TYPE_DIR_INDEX, 6577 fileref->dc->index, di, disize, Batch_Insert); 6578 if (!NT_SUCCESS(Status)) { 6579 ERR("insert_tree_item_batch returned %08lx\n", Status); 6580 return Status; 6581 } 6582 6583 Status = insert_tree_item_batch(batchlist, fileref->fcb->Vcb, fileref->parent->fcb->subvol, fileref->parent->fcb->inode, TYPE_DIR_ITEM, crc32, 6584 di2, disize, Batch_DirItem); 6585 if (!NT_SUCCESS(Status)) { 6586 ERR("insert_tree_item_batch returned %08lx\n", Status); 6587 return Status; 6588 } 6589 6590 if (fileref->parent->fcb->subvol == fileref->fcb->subvol) { 6591 INODE_REF* ir; 6592 6593 ir = ExAllocatePoolWithTag(PagedPool, sizeof(INODE_REF) - 1 + fileref->dc->utf8.Length, ALLOC_TAG); 6594 if (!ir) { 6595 ERR("out of memory\n"); 6596 return STATUS_INSUFFICIENT_RESOURCES; 6597 } 6598 6599 ir->index = fileref->dc->index; 6600 ir->n = fileref->dc->utf8.Length; 6601 RtlCopyMemory(ir->name, fileref->dc->utf8.Buffer, ir->n); 6602 6603 Status = insert_tree_item_batch(batchlist, fileref->fcb->Vcb, fileref->fcb->subvol, fileref->fcb->inode, TYPE_INODE_REF, fileref->parent->fcb->inode, 6604 ir, sizeof(INODE_REF) - 1 + ir->n, Batch_InodeRef); 6605 if (!NT_SUCCESS(Status)) { 6606 ERR("insert_tree_item_batch returned %08lx\n", Status); 6607 return Status; 6608 } 6609 } else if (fileref->fcb != fileref->fcb->Vcb->dummy_fcb) { 6610 ULONG rrlen; 6611 ROOT_REF* rr; 6612 6613 rrlen = sizeof(ROOT_REF) - 1 + fileref->dc->utf8.Length; 6614 6615 rr = ExAllocatePoolWithTag(PagedPool, rrlen, ALLOC_TAG); 6616 if (!rr) { 6617 ERR("out of memory\n"); 6618 return STATUS_INSUFFICIENT_RESOURCES; 6619 } 6620 6621 rr->dir = fileref->parent->fcb->inode; 6622 rr->index = fileref->dc->index; 6623 rr->n = fileref->dc->utf8.Length; 6624 RtlCopyMemory(rr->name, fileref->dc->utf8.Buffer, fileref->dc->utf8.Length); 6625 6626 Status = add_root_ref(fileref->fcb->Vcb, fileref->fcb->subvol->id, fileref->parent->fcb->subvol->id, rr, Irp); 6627 if (!NT_SUCCESS(Status)) { 6628 ERR("add_root_ref returned %08lx\n", Status); 6629 return Status; 6630 } 6631 6632 Status = update_root_backref(fileref->fcb->Vcb, fileref->fcb->subvol->id, fileref->parent->fcb->subvol->id, Irp); 6633 if (!NT_SUCCESS(Status)) { 6634 ERR("update_root_backref returned %08lx\n", Status); 6635 return Status; 6636 } 6637 } 6638 6639 fileref->created = false; 6640 } else if (fileref->deleted) { 6641 uint32_t crc32; 6642 ANSI_STRING* name; 6643 DIR_ITEM* di; 6644 6645 name = &fileref->oldutf8; 6646 6647 crc32 = calc_crc32c(0xfffffffe, (uint8_t*)name->Buffer, name->Length); 6648 6649 di = ExAllocatePoolWithTag(PagedPool, sizeof(DIR_ITEM) - 1 + name->Length, ALLOC_TAG); 6650 if (!di) { 6651 ERR("out of memory\n"); 6652 return STATUS_INSUFFICIENT_RESOURCES; 6653 } 6654 6655 di->m = 0; 6656 di->n = name->Length; 6657 RtlCopyMemory(di->name, name->Buffer, name->Length); 6658 6659 // delete DIR_ITEM (0x54) 6660 6661 Status = insert_tree_item_batch(batchlist, fileref->fcb->Vcb, fileref->parent->fcb->subvol, fileref->parent->fcb->inode, TYPE_DIR_ITEM, 6662 crc32, di, sizeof(DIR_ITEM) - 1 + name->Length, Batch_DeleteDirItem); 6663 if (!NT_SUCCESS(Status)) { 6664 ERR("insert_tree_item_batch returned %08lx\n", Status); 6665 return Status; 6666 } 6667 6668 if (fileref->parent->fcb->subvol == fileref->fcb->subvol) { 6669 INODE_REF* ir; 6670 6671 // delete INODE_REF (0xc) 6672 6673 ir = ExAllocatePoolWithTag(PagedPool, sizeof(INODE_REF) - 1 + name->Length, ALLOC_TAG); 6674 if (!ir) { 6675 ERR("out of memory\n"); 6676 return STATUS_INSUFFICIENT_RESOURCES; 6677 } 6678 6679 ir->index = fileref->oldindex; 6680 ir->n = name->Length; 6681 RtlCopyMemory(ir->name, name->Buffer, name->Length); 6682 6683 Status = insert_tree_item_batch(batchlist, fileref->fcb->Vcb, fileref->parent->fcb->subvol, fileref->fcb->inode, TYPE_INODE_REF, 6684 fileref->parent->fcb->inode, ir, sizeof(INODE_REF) - 1 + name->Length, Batch_DeleteInodeRef); 6685 if (!NT_SUCCESS(Status)) { 6686 ERR("insert_tree_item_batch returned %08lx\n", Status); 6687 return Status; 6688 } 6689 } else if (fileref->fcb != fileref->fcb->Vcb->dummy_fcb) { // subvolume 6690 Status = delete_root_ref(fileref->fcb->Vcb, fileref->fcb->subvol->id, fileref->parent->fcb->subvol->id, fileref->parent->fcb->inode, name, Irp); 6691 if (!NT_SUCCESS(Status)) { 6692 ERR("delete_root_ref returned %08lx\n", Status); 6693 return Status; 6694 } 6695 6696 Status = update_root_backref(fileref->fcb->Vcb, fileref->fcb->subvol->id, fileref->parent->fcb->subvol->id, Irp); 6697 if (!NT_SUCCESS(Status)) { 6698 ERR("update_root_backref returned %08lx\n", Status); 6699 return Status; 6700 } 6701 } 6702 6703 // delete DIR_INDEX (0x60) 6704 6705 Status = insert_tree_item_batch(batchlist, fileref->fcb->Vcb, fileref->parent->fcb->subvol, fileref->parent->fcb->inode, TYPE_DIR_INDEX, 6706 fileref->oldindex, NULL, 0, Batch_Delete); 6707 if (!NT_SUCCESS(Status)) { 6708 ERR("insert_tree_item_batch returned %08lx\n", Status); 6709 return Status; 6710 } 6711 6712 if (fileref->oldutf8.Buffer) { 6713 ExFreePool(fileref->oldutf8.Buffer); 6714 fileref->oldutf8.Buffer = NULL; 6715 } 6716 } else { // rename or change type 6717 PANSI_STRING oldutf8 = fileref->oldutf8.Buffer ? &fileref->oldutf8 : &fileref->dc->utf8; 6718 uint32_t crc32, oldcrc32; 6719 uint16_t disize; 6720 DIR_ITEM *olddi, *di, *di2; 6721 6722 crc32 = calc_crc32c(0xfffffffe, (uint8_t*)fileref->dc->utf8.Buffer, fileref->dc->utf8.Length); 6723 6724 if (!fileref->oldutf8.Buffer) 6725 oldcrc32 = crc32; 6726 else 6727 oldcrc32 = calc_crc32c(0xfffffffe, (uint8_t*)fileref->oldutf8.Buffer, fileref->oldutf8.Length); 6728 6729 olddi = ExAllocatePoolWithTag(PagedPool, sizeof(DIR_ITEM) - 1 + oldutf8->Length, ALLOC_TAG); 6730 if (!olddi) { 6731 ERR("out of memory\n"); 6732 return STATUS_INSUFFICIENT_RESOURCES; 6733 } 6734 6735 olddi->m = 0; 6736 olddi->n = (uint16_t)oldutf8->Length; 6737 RtlCopyMemory(olddi->name, oldutf8->Buffer, oldutf8->Length); 6738 6739 // delete DIR_ITEM (0x54) 6740 6741 Status = insert_tree_item_batch(batchlist, fileref->fcb->Vcb, fileref->parent->fcb->subvol, fileref->parent->fcb->inode, TYPE_DIR_ITEM, 6742 oldcrc32, olddi, sizeof(DIR_ITEM) - 1 + oldutf8->Length, Batch_DeleteDirItem); 6743 if (!NT_SUCCESS(Status)) { 6744 ERR("insert_tree_item_batch returned %08lx\n", Status); 6745 ExFreePool(olddi); 6746 return Status; 6747 } 6748 6749 // add DIR_ITEM (0x54) 6750 6751 disize = (uint16_t)(offsetof(DIR_ITEM, name[0]) + fileref->dc->utf8.Length); 6752 di = ExAllocatePoolWithTag(PagedPool, disize, ALLOC_TAG); 6753 if (!di) { 6754 ERR("out of memory\n"); 6755 return STATUS_INSUFFICIENT_RESOURCES; 6756 } 6757 6758 di2 = ExAllocatePoolWithTag(PagedPool, disize, ALLOC_TAG); 6759 if (!di2) { 6760 ERR("out of memory\n"); 6761 ExFreePool(di); 6762 return STATUS_INSUFFICIENT_RESOURCES; 6763 } 6764 6765 if (fileref->dc) 6766 di->key = fileref->dc->key; 6767 else if (fileref->parent->fcb->subvol == fileref->fcb->subvol) { 6768 di->key.obj_id = fileref->fcb->inode; 6769 di->key.obj_type = TYPE_INODE_ITEM; 6770 di->key.offset = 0; 6771 } else { // subvolume 6772 di->key.obj_id = fileref->fcb->subvol->id; 6773 di->key.obj_type = TYPE_ROOT_ITEM; 6774 di->key.offset = 0xffffffffffffffff; 6775 } 6776 6777 di->transid = fileref->fcb->Vcb->superblock.generation; 6778 di->m = 0; 6779 di->n = (uint16_t)fileref->dc->utf8.Length; 6780 di->type = fileref->fcb->type; 6781 RtlCopyMemory(di->name, fileref->dc->utf8.Buffer, fileref->dc->utf8.Length); 6782 6783 RtlCopyMemory(di2, di, disize); 6784 6785 Status = insert_tree_item_batch(batchlist, fileref->fcb->Vcb, fileref->parent->fcb->subvol, fileref->parent->fcb->inode, TYPE_DIR_ITEM, crc32, 6786 di, disize, Batch_DirItem); 6787 if (!NT_SUCCESS(Status)) { 6788 ERR("insert_tree_item_batch returned %08lx\n", Status); 6789 ExFreePool(di2); 6790 ExFreePool(di); 6791 return Status; 6792 } 6793 6794 if (fileref->parent->fcb->subvol == fileref->fcb->subvol) { 6795 INODE_REF *ir, *ir2; 6796 6797 // delete INODE_REF (0xc) 6798 6799 ir = ExAllocatePoolWithTag(PagedPool, sizeof(INODE_REF) - 1 + oldutf8->Length, ALLOC_TAG); 6800 if (!ir) { 6801 ERR("out of memory\n"); 6802 ExFreePool(di2); 6803 return STATUS_INSUFFICIENT_RESOURCES; 6804 } 6805 6806 ir->index = fileref->dc->index; 6807 ir->n = oldutf8->Length; 6808 RtlCopyMemory(ir->name, oldutf8->Buffer, ir->n); 6809 6810 Status = insert_tree_item_batch(batchlist, fileref->fcb->Vcb, fileref->fcb->subvol, fileref->fcb->inode, TYPE_INODE_REF, fileref->parent->fcb->inode, 6811 ir, sizeof(INODE_REF) - 1 + ir->n, Batch_DeleteInodeRef); 6812 if (!NT_SUCCESS(Status)) { 6813 ERR("insert_tree_item_batch returned %08lx\n", Status); 6814 ExFreePool(ir); 6815 ExFreePool(di2); 6816 return Status; 6817 } 6818 6819 // add INODE_REF (0xc) 6820 6821 ir2 = ExAllocatePoolWithTag(PagedPool, sizeof(INODE_REF) - 1 + fileref->dc->utf8.Length, ALLOC_TAG); 6822 if (!ir2) { 6823 ERR("out of memory\n"); 6824 ExFreePool(di2); 6825 return STATUS_INSUFFICIENT_RESOURCES; 6826 } 6827 6828 ir2->index = fileref->dc->index; 6829 ir2->n = fileref->dc->utf8.Length; 6830 RtlCopyMemory(ir2->name, fileref->dc->utf8.Buffer, ir2->n); 6831 6832 Status = insert_tree_item_batch(batchlist, fileref->fcb->Vcb, fileref->fcb->subvol, fileref->fcb->inode, TYPE_INODE_REF, fileref->parent->fcb->inode, 6833 ir2, sizeof(INODE_REF) - 1 + ir2->n, Batch_InodeRef); 6834 if (!NT_SUCCESS(Status)) { 6835 ERR("insert_tree_item_batch returned %08lx\n", Status); 6836 ExFreePool(ir2); 6837 ExFreePool(di2); 6838 return Status; 6839 } 6840 } else if (fileref->fcb != fileref->fcb->Vcb->dummy_fcb) { // subvolume 6841 ULONG rrlen; 6842 ROOT_REF* rr; 6843 6844 Status = delete_root_ref(fileref->fcb->Vcb, fileref->fcb->subvol->id, fileref->parent->fcb->subvol->id, fileref->parent->fcb->inode, oldutf8, Irp); 6845 if (!NT_SUCCESS(Status)) { 6846 ERR("delete_root_ref returned %08lx\n", Status); 6847 ExFreePool(di2); 6848 return Status; 6849 } 6850 6851 rrlen = sizeof(ROOT_REF) - 1 + fileref->dc->utf8.Length; 6852 6853 rr = ExAllocatePoolWithTag(PagedPool, rrlen, ALLOC_TAG); 6854 if (!rr) { 6855 ERR("out of memory\n"); 6856 ExFreePool(di2); 6857 return STATUS_INSUFFICIENT_RESOURCES; 6858 } 6859 6860 rr->dir = fileref->parent->fcb->inode; 6861 rr->index = fileref->dc->index; 6862 rr->n = fileref->dc->utf8.Length; 6863 RtlCopyMemory(rr->name, fileref->dc->utf8.Buffer, fileref->dc->utf8.Length); 6864 6865 Status = add_root_ref(fileref->fcb->Vcb, fileref->fcb->subvol->id, fileref->parent->fcb->subvol->id, rr, Irp); 6866 if (!NT_SUCCESS(Status)) { 6867 ERR("add_root_ref returned %08lx\n", Status); 6868 ExFreePool(di2); 6869 return Status; 6870 } 6871 6872 Status = update_root_backref(fileref->fcb->Vcb, fileref->fcb->subvol->id, fileref->parent->fcb->subvol->id, Irp); 6873 if (!NT_SUCCESS(Status)) { 6874 ERR("update_root_backref returned %08lx\n", Status); 6875 ExFreePool(di2); 6876 return Status; 6877 } 6878 } 6879 6880 // delete DIR_INDEX (0x60) 6881 6882 Status = insert_tree_item_batch(batchlist, fileref->fcb->Vcb, fileref->parent->fcb->subvol, fileref->parent->fcb->inode, TYPE_DIR_INDEX, 6883 fileref->dc->index, NULL, 0, Batch_Delete); 6884 if (!NT_SUCCESS(Status)) { 6885 ERR("insert_tree_item_batch returned %08lx\n", Status); 6886 ExFreePool(di2); 6887 return Status; 6888 } 6889 6890 // add DIR_INDEX (0x60) 6891 6892 Status = insert_tree_item_batch(batchlist, fileref->fcb->Vcb, fileref->parent->fcb->subvol, fileref->parent->fcb->inode, TYPE_DIR_INDEX, 6893 fileref->dc->index, di2, disize, Batch_Insert); 6894 if (!NT_SUCCESS(Status)) { 6895 ERR("insert_tree_item_batch returned %08lx\n", Status); 6896 ExFreePool(di2); 6897 return Status; 6898 } 6899 6900 if (fileref->oldutf8.Buffer) { 6901 ExFreePool(fileref->oldutf8.Buffer); 6902 fileref->oldutf8.Buffer = NULL; 6903 } 6904 } 6905 6906 fileref->dirty = false; 6907 6908 return STATUS_SUCCESS; 6909 } 6910 6911 static void flush_disk_caches(device_extension* Vcb) { 6912 LIST_ENTRY* le; 6913 ioctl_context context; 6914 ULONG num; 6915 6916 context.left = 0; 6917 6918 le = Vcb->devices.Flink; 6919 6920 while (le != &Vcb->devices) { 6921 device* dev = CONTAINING_RECORD(le, device, list_entry); 6922 6923 if (dev->devobj && !dev->readonly && dev->can_flush) 6924 context.left++; 6925 6926 le = le->Flink; 6927 } 6928 6929 if (context.left == 0) 6930 return; 6931 6932 num = 0; 6933 6934 KeInitializeEvent(&context.Event, NotificationEvent, false); 6935 6936 context.stripes = ExAllocatePoolWithTag(NonPagedPool, sizeof(ioctl_context_stripe) * context.left, ALLOC_TAG); 6937 if (!context.stripes) { 6938 ERR("out of memory\n"); 6939 return; 6940 } 6941 6942 RtlZeroMemory(context.stripes, sizeof(ioctl_context_stripe) * context.left); 6943 6944 le = Vcb->devices.Flink; 6945 6946 while (le != &Vcb->devices) { 6947 device* dev = CONTAINING_RECORD(le, device, list_entry); 6948 6949 if (dev->devobj && !dev->readonly && dev->can_flush) { 6950 PIO_STACK_LOCATION IrpSp; 6951 ioctl_context_stripe* stripe = &context.stripes[num]; 6952 6953 RtlZeroMemory(&stripe->apte, sizeof(ATA_PASS_THROUGH_EX)); 6954 6955 stripe->apte.Length = sizeof(ATA_PASS_THROUGH_EX); 6956 stripe->apte.TimeOutValue = 5; 6957 stripe->apte.CurrentTaskFile[6] = IDE_COMMAND_FLUSH_CACHE; 6958 6959 stripe->Irp = IoAllocateIrp(dev->devobj->StackSize, false); 6960 6961 if (!stripe->Irp) { 6962 ERR("IoAllocateIrp failed\n"); 6963 goto nextdev; 6964 } 6965 6966 IrpSp = IoGetNextIrpStackLocation(stripe->Irp); 6967 IrpSp->MajorFunction = IRP_MJ_DEVICE_CONTROL; 6968 IrpSp->FileObject = dev->fileobj; 6969 6970 IrpSp->Parameters.DeviceIoControl.IoControlCode = IOCTL_ATA_PASS_THROUGH; 6971 IrpSp->Parameters.DeviceIoControl.InputBufferLength = sizeof(ATA_PASS_THROUGH_EX); 6972 IrpSp->Parameters.DeviceIoControl.OutputBufferLength = sizeof(ATA_PASS_THROUGH_EX); 6973 6974 stripe->Irp->AssociatedIrp.SystemBuffer = &stripe->apte; 6975 stripe->Irp->Flags |= IRP_BUFFERED_IO | IRP_INPUT_OPERATION; 6976 stripe->Irp->UserBuffer = &stripe->apte; 6977 stripe->Irp->UserIosb = &stripe->iosb; 6978 6979 IoSetCompletionRoutine(stripe->Irp, ioctl_completion, &context, true, true, true); 6980 6981 IoCallDriver(dev->devobj, stripe->Irp); 6982 6983 nextdev: 6984 num++; 6985 } 6986 6987 le = le->Flink; 6988 } 6989 6990 KeWaitForSingleObject(&context.Event, Executive, KernelMode, false, NULL); 6991 6992 for (unsigned int i = 0; i < num; i++) { 6993 if (context.stripes[i].Irp) 6994 IoFreeIrp(context.stripes[i].Irp); 6995 } 6996 6997 ExFreePool(context.stripes); 6998 } 6999 7000 static NTSTATUS flush_changed_dev_stats(device_extension* Vcb, device* dev, PIRP Irp) { 7001 NTSTATUS Status; 7002 KEY searchkey; 7003 traverse_ptr tp; 7004 uint16_t statslen; 7005 uint64_t* stats; 7006 7007 searchkey.obj_id = 0; 7008 searchkey.obj_type = TYPE_DEV_STATS; 7009 searchkey.offset = dev->devitem.dev_id; 7010 7011 Status = find_item(Vcb, Vcb->dev_root, &tp, &searchkey, false, Irp); 7012 if (!NT_SUCCESS(Status)) { 7013 ERR("find_item returned %08lx\n", Status); 7014 return Status; 7015 } 7016 7017 if (!keycmp(tp.item->key, searchkey)) { 7018 Status = delete_tree_item(Vcb, &tp); 7019 if (!NT_SUCCESS(Status)) { 7020 ERR("delete_tree_item returned %08lx\n", Status); 7021 return Status; 7022 } 7023 } 7024 7025 statslen = sizeof(uint64_t) * 5; 7026 stats = ExAllocatePoolWithTag(PagedPool, statslen, ALLOC_TAG); 7027 if (!stats) { 7028 ERR("out of memory\n"); 7029 return STATUS_INSUFFICIENT_RESOURCES; 7030 } 7031 7032 RtlCopyMemory(stats, dev->stats, statslen); 7033 7034 Status = insert_tree_item(Vcb, Vcb->dev_root, 0, TYPE_DEV_STATS, dev->devitem.dev_id, stats, statslen, NULL, Irp); 7035 if (!NT_SUCCESS(Status)) { 7036 ERR("insert_tree_item returned %08lx\n", Status); 7037 ExFreePool(stats); 7038 return Status; 7039 } 7040 7041 return STATUS_SUCCESS; 7042 } 7043 7044 static NTSTATUS flush_subvol(device_extension* Vcb, root* r, PIRP Irp) { 7045 NTSTATUS Status; 7046 7047 if (r != Vcb->root_root && r != Vcb->chunk_root) { 7048 KEY searchkey; 7049 traverse_ptr tp; 7050 ROOT_ITEM* ri; 7051 7052 searchkey.obj_id = r->id; 7053 searchkey.obj_type = TYPE_ROOT_ITEM; 7054 searchkey.offset = 0xffffffffffffffff; 7055 7056 Status = find_item(Vcb, Vcb->root_root, &tp, &searchkey, false, Irp); 7057 if (!NT_SUCCESS(Status)) { 7058 ERR("error - find_item returned %08lx\n", Status); 7059 return Status; 7060 } 7061 7062 if (tp.item->key.obj_id != searchkey.obj_id || tp.item->key.obj_type != searchkey.obj_type) { 7063 ERR("could not find ROOT_ITEM for tree %I64x\n", searchkey.obj_id); 7064 return STATUS_INTERNAL_ERROR; 7065 } 7066 7067 ri = ExAllocatePoolWithTag(PagedPool, sizeof(ROOT_ITEM), ALLOC_TAG); 7068 if (!ri) { 7069 ERR("out of memory\n"); 7070 return STATUS_INSUFFICIENT_RESOURCES; 7071 } 7072 7073 RtlCopyMemory(ri, &r->root_item, sizeof(ROOT_ITEM)); 7074 7075 Status = delete_tree_item(Vcb, &tp); 7076 if (!NT_SUCCESS(Status)) { 7077 ERR("delete_tree_item returned %08lx\n", Status); 7078 return Status; 7079 } 7080 7081 Status = insert_tree_item(Vcb, Vcb->root_root, tp.item->key.obj_id, tp.item->key.obj_type, tp.item->key.offset, ri, sizeof(ROOT_ITEM), NULL, Irp); 7082 if (!NT_SUCCESS(Status)) { 7083 ERR("insert_tree_item returned %08lx\n", Status); 7084 return Status; 7085 } 7086 } 7087 7088 if (r->received) { 7089 KEY searchkey; 7090 traverse_ptr tp; 7091 7092 if (!Vcb->uuid_root) { 7093 root* uuid_root; 7094 7095 TRACE("uuid root doesn't exist, creating it\n"); 7096 7097 Status = create_root(Vcb, BTRFS_ROOT_UUID, &uuid_root, false, 0, Irp); 7098 7099 if (!NT_SUCCESS(Status)) { 7100 ERR("create_root returned %08lx\n", Status); 7101 return Status; 7102 } 7103 7104 Vcb->uuid_root = uuid_root; 7105 } 7106 7107 RtlCopyMemory(&searchkey.obj_id, &r->root_item.received_uuid, sizeof(uint64_t)); 7108 searchkey.obj_type = TYPE_SUBVOL_REC_UUID; 7109 RtlCopyMemory(&searchkey.offset, &r->root_item.received_uuid.uuid[sizeof(uint64_t)], sizeof(uint64_t)); 7110 7111 Status = find_item(Vcb, Vcb->uuid_root, &tp, &searchkey, false, Irp); 7112 if (!NT_SUCCESS(Status)) { 7113 ERR("find_item returned %08lx\n", Status); 7114 return Status; 7115 } 7116 7117 if (!keycmp(tp.item->key, searchkey)) { 7118 if (tp.item->size + sizeof(uint64_t) <= Vcb->superblock.node_size - sizeof(tree_header) - sizeof(leaf_node)) { 7119 uint64_t* ids; 7120 7121 ids = ExAllocatePoolWithTag(PagedPool, tp.item->size + sizeof(uint64_t), ALLOC_TAG); 7122 if (!ids) { 7123 ERR("out of memory\n"); 7124 return STATUS_INSUFFICIENT_RESOURCES; 7125 } 7126 7127 RtlCopyMemory(ids, tp.item->data, tp.item->size); 7128 RtlCopyMemory((uint8_t*)ids + tp.item->size, &r->id, sizeof(uint64_t)); 7129 7130 Status = delete_tree_item(Vcb, &tp); 7131 if (!NT_SUCCESS(Status)) { 7132 ERR("delete_tree_item returned %08lx\n", Status); 7133 ExFreePool(ids); 7134 return Status; 7135 } 7136 7137 Status = insert_tree_item(Vcb, Vcb->uuid_root, searchkey.obj_id, searchkey.obj_type, searchkey.offset, ids, tp.item->size + sizeof(uint64_t), NULL, Irp); 7138 if (!NT_SUCCESS(Status)) { 7139 ERR("insert_tree_item returned %08lx\n", Status); 7140 ExFreePool(ids); 7141 return Status; 7142 } 7143 } 7144 } else { 7145 uint64_t* root_num; 7146 7147 root_num = ExAllocatePoolWithTag(PagedPool, sizeof(uint64_t), ALLOC_TAG); 7148 if (!root_num) { 7149 ERR("out of memory\n"); 7150 return STATUS_INSUFFICIENT_RESOURCES; 7151 } 7152 7153 *root_num = r->id; 7154 7155 Status = insert_tree_item(Vcb, Vcb->uuid_root, searchkey.obj_id, searchkey.obj_type, searchkey.offset, root_num, sizeof(uint64_t), NULL, Irp); 7156 if (!NT_SUCCESS(Status)) { 7157 ERR("insert_tree_item returned %08lx\n", Status); 7158 ExFreePool(root_num); 7159 return Status; 7160 } 7161 } 7162 7163 r->received = false; 7164 } 7165 7166 r->dirty = false; 7167 7168 return STATUS_SUCCESS; 7169 } 7170 7171 static NTSTATUS test_not_full(device_extension* Vcb) { 7172 uint64_t reserve, could_alloc, free_space; 7173 LIST_ENTRY* le; 7174 7175 // This function ensures we drop into readonly mode if we're about to leave very little 7176 // space for metadata - this is similar to the "global reserve" of the Linux driver. 7177 // Otherwise we might completely fill our space, at which point due to COW we can't 7178 // delete anything in order to fix this. 7179 7180 reserve = Vcb->extent_root->root_item.bytes_used; 7181 reserve += Vcb->root_root->root_item.bytes_used; 7182 if (Vcb->checksum_root) reserve += Vcb->checksum_root->root_item.bytes_used; 7183 7184 reserve = max(reserve, 0x1000000); // 16 M 7185 reserve = min(reserve, 0x20000000); // 512 M 7186 7187 // Find out how much space would be available for new metadata chunks 7188 7189 could_alloc = 0; 7190 7191 if (Vcb->metadata_flags & BLOCK_FLAG_RAID5) { 7192 uint64_t s1 = 0, s2 = 0, s3 = 0; 7193 7194 le = Vcb->devices.Flink; 7195 while (le != &Vcb->devices) { 7196 device* dev = CONTAINING_RECORD(le, device, list_entry); 7197 7198 if (!dev->readonly) { 7199 uint64_t space = dev->devitem.num_bytes - dev->devitem.bytes_used; 7200 7201 if (space >= s1) { 7202 s3 = s2; 7203 s2 = s1; 7204 s1 = space; 7205 } else if (space >= s2) { 7206 s3 = s2; 7207 s2 = space; 7208 } else if (space >= s3) 7209 s3 = space; 7210 } 7211 7212 le = le->Flink; 7213 } 7214 7215 could_alloc = s3 * 2; 7216 } else if (Vcb->metadata_flags & (BLOCK_FLAG_RAID10 | BLOCK_FLAG_RAID6)) { 7217 uint64_t s1 = 0, s2 = 0, s3 = 0, s4 = 0; 7218 7219 le = Vcb->devices.Flink; 7220 while (le != &Vcb->devices) { 7221 device* dev = CONTAINING_RECORD(le, device, list_entry); 7222 7223 if (!dev->readonly) { 7224 uint64_t space = dev->devitem.num_bytes - dev->devitem.bytes_used; 7225 7226 if (space >= s1) { 7227 s4 = s3; 7228 s3 = s2; 7229 s2 = s1; 7230 s1 = space; 7231 } else if (space >= s2) { 7232 s4 = s3; 7233 s3 = s2; 7234 s2 = space; 7235 } else if (space >= s3) { 7236 s4 = s3; 7237 s3 = space; 7238 } else if (space >= s4) 7239 s4 = space; 7240 } 7241 7242 le = le->Flink; 7243 } 7244 7245 could_alloc = s4 * 2; 7246 } else if (Vcb->metadata_flags & (BLOCK_FLAG_RAID0 | BLOCK_FLAG_RAID1)) { 7247 uint64_t s1 = 0, s2 = 0; 7248 7249 le = Vcb->devices.Flink; 7250 while (le != &Vcb->devices) { 7251 device* dev = CONTAINING_RECORD(le, device, list_entry); 7252 7253 if (!dev->readonly) { 7254 uint64_t space = dev->devitem.num_bytes - dev->devitem.bytes_used; 7255 7256 if (space >= s1) { 7257 s2 = s1; 7258 s1 = space; 7259 } else if (space >= s2) 7260 s2 = space; 7261 } 7262 7263 le = le->Flink; 7264 } 7265 7266 if (Vcb->metadata_flags & BLOCK_FLAG_RAID1) 7267 could_alloc = s2; 7268 else // RAID0 7269 could_alloc = s2 * 2; 7270 } else if (Vcb->metadata_flags & BLOCK_FLAG_DUPLICATE) { 7271 le = Vcb->devices.Flink; 7272 while (le != &Vcb->devices) { 7273 device* dev = CONTAINING_RECORD(le, device, list_entry); 7274 7275 if (!dev->readonly) { 7276 uint64_t space = (dev->devitem.num_bytes - dev->devitem.bytes_used) / 2; 7277 7278 could_alloc = max(could_alloc, space); 7279 } 7280 7281 le = le->Flink; 7282 } 7283 } else if (Vcb->metadata_flags & BLOCK_FLAG_RAID1C3) { 7284 uint64_t s1 = 0, s2 = 0, s3 = 0; 7285 7286 le = Vcb->devices.Flink; 7287 while (le != &Vcb->devices) { 7288 device* dev = CONTAINING_RECORD(le, device, list_entry); 7289 7290 if (!dev->readonly) { 7291 uint64_t space = dev->devitem.num_bytes - dev->devitem.bytes_used; 7292 7293 if (space >= s1) { 7294 s3 = s2; 7295 s2 = s1; 7296 s1 = space; 7297 } else if (space >= s2) { 7298 s3 = s2; 7299 s2 = space; 7300 } else if (space >= s3) 7301 s3 = space; 7302 } 7303 7304 le = le->Flink; 7305 } 7306 7307 could_alloc = s3; 7308 } else if (Vcb->metadata_flags & BLOCK_FLAG_RAID1C4) { 7309 uint64_t s1 = 0, s2 = 0, s3 = 0, s4 = 0; 7310 7311 le = Vcb->devices.Flink; 7312 while (le != &Vcb->devices) { 7313 device* dev = CONTAINING_RECORD(le, device, list_entry); 7314 7315 if (!dev->readonly) { 7316 uint64_t space = dev->devitem.num_bytes - dev->devitem.bytes_used; 7317 7318 if (space >= s1) { 7319 s4 = s3; 7320 s3 = s2; 7321 s2 = s1; 7322 s1 = space; 7323 } else if (space >= s2) { 7324 s4 = s3; 7325 s3 = s2; 7326 s2 = space; 7327 } else if (space >= s3) { 7328 s4 = s3; 7329 s3 = space; 7330 } else if (space >= s4) 7331 s4 = space; 7332 } 7333 7334 le = le->Flink; 7335 } 7336 7337 could_alloc = s4; 7338 } else { // SINGLE 7339 le = Vcb->devices.Flink; 7340 while (le != &Vcb->devices) { 7341 device* dev = CONTAINING_RECORD(le, device, list_entry); 7342 7343 if (!dev->readonly) { 7344 uint64_t space = dev->devitem.num_bytes - dev->devitem.bytes_used; 7345 7346 could_alloc = max(could_alloc, space); 7347 } 7348 7349 le = le->Flink; 7350 } 7351 } 7352 7353 if (could_alloc >= reserve) 7354 return STATUS_SUCCESS; 7355 7356 free_space = 0; 7357 7358 le = Vcb->chunks.Flink; 7359 while (le != &Vcb->chunks) { 7360 chunk* c = CONTAINING_RECORD(le, chunk, list_entry); 7361 7362 if (!c->reloc && !c->readonly && c->chunk_item->type & BLOCK_FLAG_METADATA) { 7363 free_space += c->chunk_item->size - c->used; 7364 7365 if (free_space + could_alloc >= reserve) 7366 return STATUS_SUCCESS; 7367 } 7368 7369 le = le->Flink; 7370 } 7371 7372 return STATUS_DISK_FULL; 7373 } 7374 7375 static NTSTATUS check_for_orphans_root(device_extension* Vcb, root* r, PIRP Irp) { 7376 NTSTATUS Status; 7377 KEY searchkey; 7378 traverse_ptr tp; 7379 LIST_ENTRY rollback; 7380 7381 TRACE("(%p, %p)\n", Vcb, r); 7382 7383 InitializeListHead(&rollback); 7384 7385 searchkey.obj_id = BTRFS_ORPHAN_INODE_OBJID; 7386 searchkey.obj_type = TYPE_ORPHAN_INODE; 7387 searchkey.offset = 0; 7388 7389 Status = find_item(Vcb, r, &tp, &searchkey, false, Irp); 7390 if (!NT_SUCCESS(Status)) { 7391 ERR("find_item returned %08lx\n", Status); 7392 return Status; 7393 } 7394 7395 do { 7396 traverse_ptr next_tp; 7397 7398 if (tp.item->key.obj_id > searchkey.obj_id || (tp.item->key.obj_id == searchkey.obj_id && tp.item->key.obj_type > searchkey.obj_type)) 7399 break; 7400 7401 if (tp.item->key.obj_id == searchkey.obj_id && tp.item->key.obj_type == searchkey.obj_type) { 7402 fcb* fcb; 7403 7404 TRACE("removing orphaned inode %I64x\n", tp.item->key.offset); 7405 7406 Status = open_fcb(Vcb, r, tp.item->key.offset, 0, NULL, false, NULL, &fcb, PagedPool, Irp); 7407 if (!NT_SUCCESS(Status)) 7408 ERR("open_fcb returned %08lx\n", Status); 7409 else { 7410 if (fcb->inode_item.st_nlink == 0) { 7411 if (fcb->type != BTRFS_TYPE_DIRECTORY && fcb->inode_item.st_size > 0) { 7412 Status = excise_extents(Vcb, fcb, 0, sector_align(fcb->inode_item.st_size, Vcb->superblock.sector_size), Irp, &rollback); 7413 if (!NT_SUCCESS(Status)) { 7414 ERR("excise_extents returned %08lx\n", Status); 7415 goto end; 7416 } 7417 } 7418 7419 fcb->deleted = true; 7420 7421 mark_fcb_dirty(fcb); 7422 } 7423 7424 free_fcb(fcb); 7425 7426 Status = delete_tree_item(Vcb, &tp); 7427 if (!NT_SUCCESS(Status)) { 7428 ERR("delete_tree_item returned %08lx\n", Status); 7429 goto end; 7430 } 7431 } 7432 } 7433 7434 if (find_next_item(Vcb, &tp, &next_tp, false, Irp)) 7435 tp = next_tp; 7436 else 7437 break; 7438 } while (true); 7439 7440 Status = STATUS_SUCCESS; 7441 7442 clear_rollback(&rollback); 7443 7444 end: 7445 do_rollback(Vcb, &rollback); 7446 7447 return Status; 7448 } 7449 7450 static NTSTATUS check_for_orphans(device_extension* Vcb, PIRP Irp) { 7451 NTSTATUS Status; 7452 LIST_ENTRY* le; 7453 7454 if (IsListEmpty(&Vcb->dirty_filerefs)) 7455 return STATUS_SUCCESS; 7456 7457 le = Vcb->dirty_filerefs.Flink; 7458 while (le != &Vcb->dirty_filerefs) { 7459 file_ref* fr = CONTAINING_RECORD(le, file_ref, list_entry_dirty); 7460 7461 if (!fr->fcb->subvol->checked_for_orphans) { 7462 Status = check_for_orphans_root(Vcb, fr->fcb->subvol, Irp); 7463 if (!NT_SUCCESS(Status)) { 7464 ERR("check_for_orphans_root returned %08lx\n", Status); 7465 return Status; 7466 } 7467 7468 fr->fcb->subvol->checked_for_orphans = true; 7469 } 7470 7471 le = le->Flink; 7472 } 7473 7474 return STATUS_SUCCESS; 7475 } 7476 7477 static NTSTATUS do_write2(device_extension* Vcb, PIRP Irp, LIST_ENTRY* rollback) { 7478 NTSTATUS Status; 7479 LIST_ENTRY *le, batchlist; 7480 bool cache_changed = false; 7481 volume_device_extension* vde; 7482 bool no_cache = false; 7483 #ifdef DEBUG_FLUSH_TIMES 7484 uint64_t filerefs = 0, fcbs = 0; 7485 LARGE_INTEGER freq, time1, time2; 7486 #endif 7487 #ifdef DEBUG_WRITE_LOOPS 7488 UINT loops = 0; 7489 #endif 7490 7491 TRACE("(%p)\n", Vcb); 7492 7493 InitializeListHead(&batchlist); 7494 7495 #ifdef DEBUG_FLUSH_TIMES 7496 time1 = KeQueryPerformanceCounter(&freq); 7497 #endif 7498 7499 Status = check_for_orphans(Vcb, Irp); 7500 if (!NT_SUCCESS(Status)) { 7501 ERR("check_for_orphans returned %08lx\n", Status); 7502 return Status; 7503 } 7504 7505 ExAcquireResourceExclusiveLite(&Vcb->dirty_filerefs_lock, true); 7506 7507 while (!IsListEmpty(&Vcb->dirty_filerefs)) { 7508 file_ref* fr = CONTAINING_RECORD(RemoveHeadList(&Vcb->dirty_filerefs), file_ref, list_entry_dirty); 7509 7510 flush_fileref(fr, &batchlist, Irp); 7511 free_fileref(fr); 7512 7513 #ifdef DEBUG_FLUSH_TIMES 7514 filerefs++; 7515 #endif 7516 } 7517 7518 ExReleaseResourceLite(&Vcb->dirty_filerefs_lock); 7519 7520 Status = commit_batch_list(Vcb, &batchlist, Irp); 7521 if (!NT_SUCCESS(Status)) { 7522 ERR("commit_batch_list returned %08lx\n", Status); 7523 return Status; 7524 } 7525 7526 #ifdef DEBUG_FLUSH_TIMES 7527 time2 = KeQueryPerformanceCounter(NULL); 7528 7529 ERR("flushed %I64u filerefs in %I64u (freq = %I64u)\n", filerefs, time2.QuadPart - time1.QuadPart, freq.QuadPart); 7530 7531 time1 = KeQueryPerformanceCounter(&freq); 7532 #endif 7533 7534 // We process deleted streams first, so we don't run over our xattr 7535 // limit unless we absolutely have to. 7536 // We also process deleted normal files, to avoid any problems 7537 // caused by inode collisions. 7538 7539 ExAcquireResourceExclusiveLite(&Vcb->dirty_fcbs_lock, true); 7540 7541 le = Vcb->dirty_fcbs.Flink; 7542 while (le != &Vcb->dirty_fcbs) { 7543 fcb* fcb = CONTAINING_RECORD(le, struct _fcb, list_entry_dirty); 7544 LIST_ENTRY* le2 = le->Flink; 7545 7546 if (fcb->deleted) { 7547 ExAcquireResourceExclusiveLite(fcb->Header.Resource, true); 7548 Status = flush_fcb(fcb, false, &batchlist, Irp); 7549 ExReleaseResourceLite(fcb->Header.Resource); 7550 7551 free_fcb(fcb); 7552 7553 if (!NT_SUCCESS(Status)) { 7554 ERR("flush_fcb returned %08lx\n", Status); 7555 clear_batch_list(Vcb, &batchlist); 7556 ExReleaseResourceLite(&Vcb->dirty_fcbs_lock); 7557 return Status; 7558 } 7559 7560 #ifdef DEBUG_FLUSH_TIMES 7561 fcbs++; 7562 #endif 7563 } 7564 7565 le = le2; 7566 } 7567 7568 Status = commit_batch_list(Vcb, &batchlist, Irp); 7569 if (!NT_SUCCESS(Status)) { 7570 ERR("commit_batch_list returned %08lx\n", Status); 7571 ExReleaseResourceLite(&Vcb->dirty_fcbs_lock); 7572 return Status; 7573 } 7574 7575 le = Vcb->dirty_fcbs.Flink; 7576 while (le != &Vcb->dirty_fcbs) { 7577 fcb* fcb = CONTAINING_RECORD(le, struct _fcb, list_entry_dirty); 7578 LIST_ENTRY* le2 = le->Flink; 7579 7580 if (fcb->subvol != Vcb->root_root) { 7581 ExAcquireResourceExclusiveLite(fcb->Header.Resource, true); 7582 Status = flush_fcb(fcb, false, &batchlist, Irp); 7583 ExReleaseResourceLite(fcb->Header.Resource); 7584 free_fcb(fcb); 7585 7586 if (!NT_SUCCESS(Status)) { 7587 ERR("flush_fcb returned %08lx\n", Status); 7588 ExReleaseResourceLite(&Vcb->dirty_fcbs_lock); 7589 return Status; 7590 } 7591 7592 #ifdef DEBUG_FLUSH_TIMES 7593 fcbs++; 7594 #endif 7595 } 7596 7597 le = le2; 7598 } 7599 7600 ExReleaseResourceLite(&Vcb->dirty_fcbs_lock); 7601 7602 Status = commit_batch_list(Vcb, &batchlist, Irp); 7603 if (!NT_SUCCESS(Status)) { 7604 ERR("commit_batch_list returned %08lx\n", Status); 7605 return Status; 7606 } 7607 7608 #ifdef DEBUG_FLUSH_TIMES 7609 time2 = KeQueryPerformanceCounter(NULL); 7610 7611 ERR("flushed %I64u fcbs in %I64u (freq = %I64u)\n", filerefs, time2.QuadPart - time1.QuadPart, freq.QuadPart); 7612 #endif 7613 7614 // no need to get dirty_subvols_lock here, as we have tree_lock exclusively 7615 while (!IsListEmpty(&Vcb->dirty_subvols)) { 7616 root* r = CONTAINING_RECORD(RemoveHeadList(&Vcb->dirty_subvols), root, list_entry_dirty); 7617 7618 Status = flush_subvol(Vcb, r, Irp); 7619 if (!NT_SUCCESS(Status)) { 7620 ERR("flush_subvol returned %08lx\n", Status); 7621 return Status; 7622 } 7623 } 7624 7625 if (!IsListEmpty(&Vcb->drop_roots)) { 7626 Status = drop_roots(Vcb, Irp, rollback); 7627 7628 if (!NT_SUCCESS(Status)) { 7629 ERR("drop_roots returned %08lx\n", Status); 7630 return Status; 7631 } 7632 } 7633 7634 Status = update_chunks(Vcb, &batchlist, Irp, rollback); 7635 7636 if (!NT_SUCCESS(Status)) { 7637 ERR("update_chunks returned %08lx\n", Status); 7638 return Status; 7639 } 7640 7641 Status = commit_batch_list(Vcb, &batchlist, Irp); 7642 7643 // If only changing superblock, e.g. changing label, we still need to rewrite 7644 // the root tree so the generations match, otherwise you won't be able to mount on Linux. 7645 if (!Vcb->root_root->treeholder.tree || !Vcb->root_root->treeholder.tree->write) { 7646 KEY searchkey; 7647 7648 traverse_ptr tp; 7649 7650 searchkey.obj_id = 0; 7651 searchkey.obj_type = 0; 7652 searchkey.offset = 0; 7653 7654 Status = find_item(Vcb, Vcb->root_root, &tp, &searchkey, false, Irp); 7655 if (!NT_SUCCESS(Status)) { 7656 ERR("error - find_item returned %08lx\n", Status); 7657 return Status; 7658 } 7659 7660 Vcb->root_root->treeholder.tree->write = true; 7661 } 7662 7663 // make sure we always update the extent tree 7664 Status = add_root_item_to_cache(Vcb, BTRFS_ROOT_EXTENT, Irp); 7665 if (!NT_SUCCESS(Status)) { 7666 ERR("add_root_item_to_cache returned %08lx\n", Status); 7667 return Status; 7668 } 7669 7670 if (Vcb->stats_changed) { 7671 le = Vcb->devices.Flink; 7672 while (le != &Vcb->devices) { 7673 device* dev = CONTAINING_RECORD(le, device, list_entry); 7674 7675 if (dev->stats_changed) { 7676 Status = flush_changed_dev_stats(Vcb, dev, Irp); 7677 if (!NT_SUCCESS(Status)) { 7678 ERR("flush_changed_dev_stats returned %08lx\n", Status); 7679 return Status; 7680 } 7681 dev->stats_changed = false; 7682 } 7683 7684 le = le->Flink; 7685 } 7686 7687 Vcb->stats_changed = false; 7688 } 7689 7690 do { 7691 Status = add_parents(Vcb, Irp); 7692 if (!NT_SUCCESS(Status)) { 7693 ERR("add_parents returned %08lx\n", Status); 7694 goto end; 7695 } 7696 7697 Status = allocate_tree_extents(Vcb, Irp, rollback); 7698 if (!NT_SUCCESS(Status)) { 7699 ERR("allocate_tree_extents returned %08lx\n", Status); 7700 goto end; 7701 } 7702 7703 Status = do_splits(Vcb, Irp, rollback); 7704 if (!NT_SUCCESS(Status)) { 7705 ERR("do_splits returned %08lx\n", Status); 7706 goto end; 7707 } 7708 7709 Status = update_chunk_usage(Vcb, Irp, rollback); 7710 if (!NT_SUCCESS(Status)) { 7711 ERR("update_chunk_usage returned %08lx\n", Status); 7712 goto end; 7713 } 7714 7715 if (!(Vcb->superblock.compat_ro_flags & BTRFS_COMPAT_RO_FLAGS_FREE_SPACE_CACHE)) { 7716 if (!no_cache) { 7717 Status = allocate_cache(Vcb, &cache_changed, Irp, rollback); 7718 if (!NT_SUCCESS(Status)) { 7719 WARN("allocate_cache returned %08lx\n", Status); 7720 no_cache = true; 7721 cache_changed = false; 7722 } 7723 } 7724 } else { 7725 Status = update_chunk_caches_tree(Vcb, Irp); 7726 if (!NT_SUCCESS(Status)) { 7727 ERR("update_chunk_caches_tree returned %08lx\n", Status); 7728 goto end; 7729 } 7730 } 7731 7732 #ifdef DEBUG_WRITE_LOOPS 7733 loops++; 7734 7735 if (cache_changed) 7736 ERR("cache has changed, looping again\n"); 7737 #endif 7738 } while (cache_changed || !trees_consistent(Vcb)); 7739 7740 #ifdef DEBUG_WRITE_LOOPS 7741 ERR("%u loops\n", loops); 7742 #endif 7743 7744 TRACE("trees consistent\n"); 7745 7746 Status = update_root_root(Vcb, no_cache, Irp, rollback); 7747 if (!NT_SUCCESS(Status)) { 7748 ERR("update_root_root returned %08lx\n", Status); 7749 goto end; 7750 } 7751 7752 Status = write_trees(Vcb, Irp); 7753 if (!NT_SUCCESS(Status)) { 7754 ERR("write_trees returned %08lx\n", Status); 7755 goto end; 7756 } 7757 7758 Status = test_not_full(Vcb); 7759 if (!NT_SUCCESS(Status)) { 7760 ERR("test_not_full returned %08lx\n", Status); 7761 goto end; 7762 } 7763 7764 #ifdef DEBUG_PARANOID 7765 le = Vcb->trees.Flink; 7766 while (le != &Vcb->trees) { 7767 tree* t = CONTAINING_RECORD(le, tree, list_entry); 7768 KEY searchkey; 7769 traverse_ptr tp; 7770 7771 searchkey.obj_id = t->header.address; 7772 searchkey.obj_type = TYPE_METADATA_ITEM; 7773 searchkey.offset = 0xffffffffffffffff; 7774 7775 Status = find_item(Vcb, Vcb->extent_root, &tp, &searchkey, false, Irp); 7776 if (!NT_SUCCESS(Status)) { 7777 ERR("error - find_item returned %08lx\n", Status); 7778 goto end; 7779 } 7780 7781 if (tp.item->key.obj_id != searchkey.obj_id || tp.item->key.obj_type != searchkey.obj_type) { 7782 searchkey.obj_id = t->header.address; 7783 searchkey.obj_type = TYPE_EXTENT_ITEM; 7784 searchkey.offset = 0xffffffffffffffff; 7785 7786 Status = find_item(Vcb, Vcb->extent_root, &tp, &searchkey, false, Irp); 7787 if (!NT_SUCCESS(Status)) { 7788 ERR("error - find_item returned %08lx\n", Status); 7789 goto end; 7790 } 7791 7792 if (tp.item->key.obj_id != searchkey.obj_id || tp.item->key.obj_type != searchkey.obj_type) { 7793 ERR("error - could not find entry in extent tree for tree at %I64x\n", t->header.address); 7794 Status = STATUS_INTERNAL_ERROR; 7795 goto end; 7796 } 7797 } 7798 7799 le = le->Flink; 7800 } 7801 #endif 7802 7803 Vcb->superblock.cache_generation = Vcb->superblock.generation; 7804 7805 if (!Vcb->options.no_barrier) 7806 flush_disk_caches(Vcb); 7807 7808 Status = write_superblocks(Vcb, Irp); 7809 if (!NT_SUCCESS(Status)) { 7810 ERR("write_superblocks returned %08lx\n", Status); 7811 goto end; 7812 } 7813 7814 vde = Vcb->vde; 7815 7816 if (vde) { 7817 pdo_device_extension* pdode = vde->pdode; 7818 7819 ExAcquireResourceSharedLite(&pdode->child_lock, true); 7820 7821 le = pdode->children.Flink; 7822 7823 while (le != &pdode->children) { 7824 volume_child* vc = CONTAINING_RECORD(le, volume_child, list_entry); 7825 7826 vc->generation = Vcb->superblock.generation; 7827 le = le->Flink; 7828 } 7829 7830 ExReleaseResourceLite(&pdode->child_lock); 7831 } 7832 7833 clean_space_cache(Vcb); 7834 7835 le = Vcb->chunks.Flink; 7836 while (le != &Vcb->chunks) { 7837 chunk* c = CONTAINING_RECORD(le, chunk, list_entry); 7838 7839 c->changed = false; 7840 c->space_changed = false; 7841 7842 le = le->Flink; 7843 } 7844 7845 Vcb->superblock.generation++; 7846 7847 Status = STATUS_SUCCESS; 7848 7849 le = Vcb->trees.Flink; 7850 while (le != &Vcb->trees) { 7851 tree* t = CONTAINING_RECORD(le, tree, list_entry); 7852 7853 t->write = false; 7854 7855 le = le->Flink; 7856 } 7857 7858 Vcb->need_write = false; 7859 7860 while (!IsListEmpty(&Vcb->drop_roots)) { 7861 root* r = CONTAINING_RECORD(RemoveHeadList(&Vcb->drop_roots), root, list_entry); 7862 7863 if (IsListEmpty(&r->fcbs)) { 7864 ExDeleteResourceLite(&r->nonpaged->load_tree_lock); 7865 ExFreePool(r->nonpaged); 7866 ExFreePool(r); 7867 } else 7868 r->dropped = true; 7869 } 7870 7871 end: 7872 TRACE("do_write returning %08lx\n", Status); 7873 7874 return Status; 7875 } 7876 7877 NTSTATUS do_write(device_extension* Vcb, PIRP Irp) { 7878 LIST_ENTRY rollback; 7879 NTSTATUS Status; 7880 7881 InitializeListHead(&rollback); 7882 7883 Status = do_write2(Vcb, Irp, &rollback); 7884 7885 if (!NT_SUCCESS(Status)) { 7886 ERR("do_write2 returned %08lx, dropping into readonly mode\n", Status); 7887 Vcb->readonly = true; 7888 FsRtlNotifyVolumeEvent(Vcb->root_file, FSRTL_VOLUME_FORCED_CLOSED); 7889 do_rollback(Vcb, &rollback); 7890 } else 7891 clear_rollback(&rollback); 7892 7893 return Status; 7894 } 7895 7896 static void do_flush(device_extension* Vcb) { 7897 NTSTATUS Status; 7898 7899 ExAcquireResourceExclusiveLite(&Vcb->tree_lock, true); 7900 7901 if (Vcb->need_write && !Vcb->readonly) 7902 Status = do_write(Vcb, NULL); 7903 else 7904 Status = STATUS_SUCCESS; 7905 7906 free_trees(Vcb); 7907 7908 if (!NT_SUCCESS(Status)) 7909 ERR("do_write returned %08lx\n", Status); 7910 7911 ExReleaseResourceLite(&Vcb->tree_lock); 7912 } 7913 7914 _Function_class_(KSTART_ROUTINE) 7915 void __stdcall flush_thread(void* context) { 7916 DEVICE_OBJECT* devobj = context; 7917 device_extension* Vcb = devobj->DeviceExtension; 7918 LARGE_INTEGER due_time; 7919 7920 ObReferenceObject(devobj); 7921 7922 KeInitializeTimer(&Vcb->flush_thread_timer); 7923 7924 due_time.QuadPart = (uint64_t)Vcb->options.flush_interval * -10000000; 7925 7926 KeSetTimer(&Vcb->flush_thread_timer, due_time, NULL); 7927 7928 while (true) { 7929 KeWaitForSingleObject(&Vcb->flush_thread_timer, Executive, KernelMode, false, NULL); 7930 7931 if (!(devobj->Vpb->Flags & VPB_MOUNTED) || Vcb->removing) 7932 break; 7933 7934 if (!Vcb->locked) 7935 do_flush(Vcb); 7936 7937 KeSetTimer(&Vcb->flush_thread_timer, due_time, NULL); 7938 } 7939 7940 ObDereferenceObject(devobj); 7941 KeCancelTimer(&Vcb->flush_thread_timer); 7942 7943 KeSetEvent(&Vcb->flush_thread_finished, 0, false); 7944 7945 PsTerminateSystemThread(STATUS_SUCCESS); 7946 } 7947