1 /* Copyright (c) Mark Harmstone 2016-17 2 * 3 * This file is part of WinBtrfs. 4 * 5 * WinBtrfs is free software: you can redistribute it and/or modify 6 * it under the terms of the GNU Lesser General Public Licence as published by 7 * the Free Software Foundation, either version 3 of the Licence, or 8 * (at your option) any later version. 9 * 10 * WinBtrfs is distributed in the hope that it will be useful, 11 * but WITHOUT ANY WARRANTY; without even the implied warranty of 12 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 13 * GNU Lesser General Public Licence for more details. 14 * 15 * You should have received a copy of the GNU Lesser General Public Licence 16 * along with WinBtrfs. If not, see <http://www.gnu.org/licenses/>. */ 17 18 #include "btrfs_drv.h" 19 #include <ata.h> 20 #include <ntddscsi.h> 21 #include <ntddstor.h> 22 23 #define MAX_CSUM_SIZE (4096 - sizeof(tree_header) - sizeof(leaf_node)) 24 25 // #define DEBUG_WRITE_LOOPS 26 27 typedef struct { 28 KEVENT Event; 29 IO_STATUS_BLOCK iosb; 30 } write_context; 31 32 typedef struct { 33 EXTENT_ITEM_TREE eit; 34 uint8_t type; 35 TREE_BLOCK_REF tbr; 36 } EXTENT_ITEM_TREE2; 37 38 typedef struct { 39 EXTENT_ITEM ei; 40 uint8_t type; 41 TREE_BLOCK_REF tbr; 42 } EXTENT_ITEM_SKINNY_METADATA; 43 44 static NTSTATUS create_chunk(device_extension* Vcb, chunk* c, PIRP Irp); 45 static NTSTATUS update_tree_extents(device_extension* Vcb, tree* t, PIRP Irp, LIST_ENTRY* rollback); 46 47 #ifndef _MSC_VER // not in mingw yet 48 #define DEVICE_DSM_FLAG_TRIM_NOT_FS_ALLOCATED 0x80000000 49 #endif 50 51 _Function_class_(IO_COMPLETION_ROUTINE) 52 static NTSTATUS __stdcall write_completion(PDEVICE_OBJECT DeviceObject, PIRP Irp, PVOID conptr) { 53 write_context* context = conptr; 54 55 UNUSED(DeviceObject); 56 57 context->iosb = Irp->IoStatus; 58 KeSetEvent(&context->Event, 0, false); 59 60 return STATUS_MORE_PROCESSING_REQUIRED; 61 } 62 63 NTSTATUS write_data_phys(_In_ PDEVICE_OBJECT device, _In_ PFILE_OBJECT fileobj, _In_ uint64_t address, 64 _In_reads_bytes_(length) void* data, _In_ uint32_t length) { 65 NTSTATUS Status; 66 LARGE_INTEGER offset; 67 PIRP Irp; 68 PIO_STACK_LOCATION IrpSp; 69 write_context context; 70 71 TRACE("(%p, %I64x, %p, %x)\n", device, address, data, length); 72 73 RtlZeroMemory(&context, sizeof(write_context)); 74 75 KeInitializeEvent(&context.Event, NotificationEvent, false); 76 77 offset.QuadPart = address; 78 79 Irp = IoAllocateIrp(device->StackSize, false); 80 81 if (!Irp) { 82 ERR("IoAllocateIrp failed\n"); 83 return STATUS_INSUFFICIENT_RESOURCES; 84 } 85 86 IrpSp = IoGetNextIrpStackLocation(Irp); 87 IrpSp->MajorFunction = IRP_MJ_WRITE; 88 IrpSp->FileObject = fileobj; 89 90 if (device->Flags & DO_BUFFERED_IO) { 91 Irp->AssociatedIrp.SystemBuffer = data; 92 93 Irp->Flags = IRP_BUFFERED_IO; 94 } else if (device->Flags & DO_DIRECT_IO) { 95 Irp->MdlAddress = IoAllocateMdl(data, length, false, false, NULL); 96 if (!Irp->MdlAddress) { 97 DbgPrint("IoAllocateMdl failed\n"); 98 Status = STATUS_INSUFFICIENT_RESOURCES; 99 goto exit; 100 } 101 102 Status = STATUS_SUCCESS; 103 104 _SEH2_TRY { 105 MmProbeAndLockPages(Irp->MdlAddress, KernelMode, IoReadAccess); 106 } _SEH2_EXCEPT (EXCEPTION_EXECUTE_HANDLER) { 107 Status = _SEH2_GetExceptionCode(); 108 } _SEH2_END; 109 110 if (!NT_SUCCESS(Status)) { 111 ERR("MmProbeAndLockPages threw exception %08x\n", Status); 112 IoFreeMdl(Irp->MdlAddress); 113 goto exit; 114 } 115 } else { 116 Irp->UserBuffer = data; 117 } 118 119 IrpSp->Parameters.Write.Length = length; 120 IrpSp->Parameters.Write.ByteOffset = offset; 121 122 Irp->UserIosb = &context.iosb; 123 124 Irp->UserEvent = &context.Event; 125 126 IoSetCompletionRoutine(Irp, write_completion, &context, true, true, true); 127 128 Status = IoCallDriver(device, Irp); 129 130 if (Status == STATUS_PENDING) { 131 KeWaitForSingleObject(&context.Event, Executive, KernelMode, false, NULL); 132 Status = context.iosb.Status; 133 } 134 135 if (!NT_SUCCESS(Status)) { 136 ERR("IoCallDriver returned %08x\n", Status); 137 } 138 139 if (device->Flags & DO_DIRECT_IO) { 140 MmUnlockPages(Irp->MdlAddress); 141 IoFreeMdl(Irp->MdlAddress); 142 } 143 144 exit: 145 IoFreeIrp(Irp); 146 147 return Status; 148 } 149 150 static void add_trim_entry(device* dev, uint64_t address, uint64_t size) { 151 space* s = ExAllocatePoolWithTag(PagedPool, sizeof(space), ALLOC_TAG); 152 if (!s) { 153 ERR("out of memory\n"); 154 return; 155 } 156 157 s->address = address; 158 s->size = size; 159 dev->num_trim_entries++; 160 161 InsertTailList(&dev->trim_list, &s->list_entry); 162 } 163 164 static void clean_space_cache_chunk(device_extension* Vcb, chunk* c) { 165 ULONG type; 166 167 if (Vcb->trim && !Vcb->options.no_trim) { 168 if (c->chunk_item->type & BLOCK_FLAG_DUPLICATE) 169 type = BLOCK_FLAG_DUPLICATE; 170 else if (c->chunk_item->type & BLOCK_FLAG_RAID0) 171 type = BLOCK_FLAG_RAID0; 172 else if (c->chunk_item->type & BLOCK_FLAG_RAID1) 173 type = BLOCK_FLAG_DUPLICATE; 174 else if (c->chunk_item->type & BLOCK_FLAG_RAID10) 175 type = BLOCK_FLAG_RAID10; 176 else if (c->chunk_item->type & BLOCK_FLAG_RAID5) 177 type = BLOCK_FLAG_RAID5; 178 else if (c->chunk_item->type & BLOCK_FLAG_RAID6) 179 type = BLOCK_FLAG_RAID6; 180 else // SINGLE 181 type = BLOCK_FLAG_DUPLICATE; 182 } 183 184 while (!IsListEmpty(&c->deleting)) { 185 space* s = CONTAINING_RECORD(c->deleting.Flink, space, list_entry); 186 187 if (Vcb->trim && !Vcb->options.no_trim && (!Vcb->options.no_barrier || !(c->chunk_item->type & BLOCK_FLAG_METADATA))) { 188 CHUNK_ITEM_STRIPE* cis = (CHUNK_ITEM_STRIPE*)&c->chunk_item[1]; 189 190 if (type == BLOCK_FLAG_DUPLICATE) { 191 uint16_t i; 192 193 for (i = 0; i < c->chunk_item->num_stripes; i++) { 194 if (c->devices[i] && c->devices[i]->devobj && !c->devices[i]->readonly && c->devices[i]->trim) 195 add_trim_entry(c->devices[i], s->address - c->offset + cis[i].offset, s->size); 196 } 197 } else if (type == BLOCK_FLAG_RAID0) { 198 uint64_t startoff, endoff; 199 uint16_t startoffstripe, endoffstripe, i; 200 201 get_raid0_offset(s->address - c->offset, c->chunk_item->stripe_length, c->chunk_item->num_stripes, &startoff, &startoffstripe); 202 get_raid0_offset(s->address - c->offset + s->size - 1, c->chunk_item->stripe_length, c->chunk_item->num_stripes, &endoff, &endoffstripe); 203 204 for (i = 0; i < c->chunk_item->num_stripes; i++) { 205 if (c->devices[i] && c->devices[i]->devobj && !c->devices[i]->readonly && c->devices[i]->trim) { 206 uint64_t stripestart, stripeend; 207 208 if (startoffstripe > i) 209 stripestart = startoff - (startoff % c->chunk_item->stripe_length) + c->chunk_item->stripe_length; 210 else if (startoffstripe == i) 211 stripestart = startoff; 212 else 213 stripestart = startoff - (startoff % c->chunk_item->stripe_length); 214 215 if (endoffstripe > i) 216 stripeend = endoff - (endoff % c->chunk_item->stripe_length) + c->chunk_item->stripe_length; 217 else if (endoffstripe == i) 218 stripeend = endoff + 1; 219 else 220 stripeend = endoff - (endoff % c->chunk_item->stripe_length); 221 222 if (stripestart != stripeend) 223 add_trim_entry(c->devices[i], stripestart + cis[i].offset, stripeend - stripestart); 224 } 225 } 226 } else if (type == BLOCK_FLAG_RAID10) { 227 uint64_t startoff, endoff; 228 uint16_t sub_stripes, startoffstripe, endoffstripe, i; 229 230 sub_stripes = max(1, c->chunk_item->sub_stripes); 231 232 get_raid0_offset(s->address - c->offset, c->chunk_item->stripe_length, c->chunk_item->num_stripes / sub_stripes, &startoff, &startoffstripe); 233 get_raid0_offset(s->address - c->offset + s->size - 1, c->chunk_item->stripe_length, c->chunk_item->num_stripes / sub_stripes, &endoff, &endoffstripe); 234 235 startoffstripe *= sub_stripes; 236 endoffstripe *= sub_stripes; 237 238 for (i = 0; i < c->chunk_item->num_stripes; i += sub_stripes) { 239 ULONG j; 240 uint64_t stripestart, stripeend; 241 242 if (startoffstripe > i) 243 stripestart = startoff - (startoff % c->chunk_item->stripe_length) + c->chunk_item->stripe_length; 244 else if (startoffstripe == i) 245 stripestart = startoff; 246 else 247 stripestart = startoff - (startoff % c->chunk_item->stripe_length); 248 249 if (endoffstripe > i) 250 stripeend = endoff - (endoff % c->chunk_item->stripe_length) + c->chunk_item->stripe_length; 251 else if (endoffstripe == i) 252 stripeend = endoff + 1; 253 else 254 stripeend = endoff - (endoff % c->chunk_item->stripe_length); 255 256 if (stripestart != stripeend) { 257 for (j = 0; j < sub_stripes; j++) { 258 if (c->devices[i+j] && c->devices[i+j]->devobj && !c->devices[i+j]->readonly && c->devices[i+j]->trim) 259 add_trim_entry(c->devices[i+j], stripestart + cis[i+j].offset, stripeend - stripestart); 260 } 261 } 262 } 263 } 264 // FIXME - RAID5(?), RAID6(?) 265 } 266 267 RemoveEntryList(&s->list_entry); 268 ExFreePool(s); 269 } 270 } 271 272 typedef struct { 273 DEVICE_MANAGE_DATA_SET_ATTRIBUTES* dmdsa; 274 ATA_PASS_THROUGH_EX apte; 275 PIRP Irp; 276 IO_STATUS_BLOCK iosb; 277 #ifdef DEBUG_TRIM_EMULATION 278 PMDL mdl; 279 void* buf; 280 #endif 281 } ioctl_context_stripe; 282 283 typedef struct { 284 KEVENT Event; 285 LONG left; 286 ioctl_context_stripe* stripes; 287 } ioctl_context; 288 289 _Function_class_(IO_COMPLETION_ROUTINE) 290 static NTSTATUS __stdcall ioctl_completion(PDEVICE_OBJECT DeviceObject, PIRP Irp, PVOID conptr) { 291 ioctl_context* context = (ioctl_context*)conptr; 292 LONG left2 = InterlockedDecrement(&context->left); 293 294 UNUSED(DeviceObject); 295 UNUSED(Irp); 296 297 if (left2 == 0) 298 KeSetEvent(&context->Event, 0, false); 299 300 return STATUS_MORE_PROCESSING_REQUIRED; 301 } 302 303 #ifdef DEBUG_TRIM_EMULATION 304 static void trim_emulation(device* dev) { 305 LIST_ENTRY* le; 306 ioctl_context context; 307 unsigned int i = 0, count = 0; 308 309 le = dev->trim_list.Flink; 310 while (le != &dev->trim_list) { 311 count++; 312 le = le->Flink; 313 } 314 315 context.left = count; 316 317 KeInitializeEvent(&context.Event, NotificationEvent, false); 318 319 context.stripes = ExAllocatePoolWithTag(NonPagedPool, sizeof(ioctl_context_stripe) * context.left, ALLOC_TAG); 320 if (!context.stripes) { 321 ERR("out of memory\n"); 322 return; 323 } 324 325 RtlZeroMemory(context.stripes, sizeof(ioctl_context_stripe) * context.left); 326 327 i = 0; 328 le = dev->trim_list.Flink; 329 while (le != &dev->trim_list) { 330 ioctl_context_stripe* stripe = &context.stripes[i]; 331 space* s = CONTAINING_RECORD(le, space, list_entry); 332 333 WARN("(%I64x, %I64x)\n", s->address, s->size); 334 335 stripe->Irp = IoAllocateIrp(dev->devobj->StackSize, false); 336 337 if (!stripe->Irp) { 338 ERR("IoAllocateIrp failed\n"); 339 } else { 340 PIO_STACK_LOCATION IrpSp = IoGetNextIrpStackLocation(stripe->Irp); 341 IrpSp->MajorFunction = IRP_MJ_WRITE; 342 IrpSp->FileObject = dev->fileobj; 343 344 stripe->buf = ExAllocatePoolWithTag(NonPagedPool, (uint32_t)s->size, ALLOC_TAG); 345 346 if (!stripe->buf) { 347 ERR("out of memory\n"); 348 } else { 349 RtlZeroMemory(stripe->buf, (uint32_t)s->size); // FIXME - randomize instead? 350 351 stripe->mdl = IoAllocateMdl(stripe->buf, (uint32_t)s->size, false, false, NULL); 352 353 if (!stripe->mdl) { 354 ERR("IoAllocateMdl failed\n"); 355 } else { 356 MmBuildMdlForNonPagedPool(stripe->mdl); 357 358 stripe->Irp->MdlAddress = stripe->mdl; 359 360 IrpSp->Parameters.Write.ByteOffset.QuadPart = s->address; 361 IrpSp->Parameters.Write.Length = s->size; 362 363 stripe->Irp->UserIosb = &stripe->iosb; 364 365 IoSetCompletionRoutine(stripe->Irp, ioctl_completion, &context, true, true, true); 366 367 IoCallDriver(dev->devobj, stripe->Irp); 368 } 369 } 370 } 371 372 i++; 373 374 le = le->Flink; 375 } 376 377 KeWaitForSingleObject(&context.Event, Executive, KernelMode, false, NULL); 378 379 for (i = 0; i < count; i++) { 380 ioctl_context_stripe* stripe = &context.stripes[i]; 381 382 if (stripe->mdl) 383 IoFreeMdl(stripe->mdl); 384 385 if (stripe->buf) 386 ExFreePool(stripe->buf); 387 } 388 389 ExFreePool(context.stripes); 390 } 391 #endif 392 393 static void clean_space_cache(device_extension* Vcb) { 394 LIST_ENTRY* le; 395 chunk* c; 396 #ifndef DEBUG_TRIM_EMULATION 397 ULONG num; 398 #endif 399 400 TRACE("(%p)\n", Vcb); 401 402 ExAcquireResourceSharedLite(&Vcb->chunk_lock, true); 403 404 le = Vcb->chunks.Flink; 405 while (le != &Vcb->chunks) { 406 c = CONTAINING_RECORD(le, chunk, list_entry); 407 408 if (c->space_changed) { 409 acquire_chunk_lock(c, Vcb); 410 411 if (c->space_changed) 412 clean_space_cache_chunk(Vcb, c); 413 414 c->space_changed = false; 415 416 release_chunk_lock(c, Vcb); 417 } 418 419 le = le->Flink; 420 } 421 422 ExReleaseResourceLite(&Vcb->chunk_lock); 423 424 if (Vcb->trim && !Vcb->options.no_trim) { 425 #ifndef DEBUG_TRIM_EMULATION 426 ioctl_context context; 427 ULONG total_num; 428 429 context.left = 0; 430 431 le = Vcb->devices.Flink; 432 while (le != &Vcb->devices) { 433 device* dev = CONTAINING_RECORD(le, device, list_entry); 434 435 if (dev->devobj && !dev->readonly && dev->trim && dev->num_trim_entries > 0) 436 context.left++; 437 438 le = le->Flink; 439 } 440 441 if (context.left == 0) 442 return; 443 444 total_num = context.left; 445 num = 0; 446 447 KeInitializeEvent(&context.Event, NotificationEvent, false); 448 449 context.stripes = ExAllocatePoolWithTag(NonPagedPool, sizeof(ioctl_context_stripe) * context.left, ALLOC_TAG); 450 if (!context.stripes) { 451 ERR("out of memory\n"); 452 return; 453 } 454 455 RtlZeroMemory(context.stripes, sizeof(ioctl_context_stripe) * context.left); 456 #endif 457 458 le = Vcb->devices.Flink; 459 while (le != &Vcb->devices) { 460 device* dev = CONTAINING_RECORD(le, device, list_entry); 461 462 if (dev->devobj && !dev->readonly && dev->trim && dev->num_trim_entries > 0) { 463 #ifdef DEBUG_TRIM_EMULATION 464 trim_emulation(dev); 465 #else 466 LIST_ENTRY* le2; 467 ioctl_context_stripe* stripe = &context.stripes[num]; 468 DEVICE_DATA_SET_RANGE* ranges; 469 ULONG datalen = (ULONG)sector_align(sizeof(DEVICE_MANAGE_DATA_SET_ATTRIBUTES), sizeof(uint64_t)) + (dev->num_trim_entries * sizeof(DEVICE_DATA_SET_RANGE)), i; 470 PIO_STACK_LOCATION IrpSp; 471 472 stripe->dmdsa = ExAllocatePoolWithTag(PagedPool, datalen, ALLOC_TAG); 473 if (!stripe->dmdsa) { 474 ERR("out of memory\n"); 475 goto nextdev; 476 } 477 478 stripe->dmdsa->Size = sizeof(DEVICE_MANAGE_DATA_SET_ATTRIBUTES); 479 stripe->dmdsa->Action = DeviceDsmAction_Trim; 480 stripe->dmdsa->Flags = DEVICE_DSM_FLAG_TRIM_NOT_FS_ALLOCATED; 481 stripe->dmdsa->ParameterBlockOffset = 0; 482 stripe->dmdsa->ParameterBlockLength = 0; 483 stripe->dmdsa->DataSetRangesOffset = (ULONG)sector_align(sizeof(DEVICE_MANAGE_DATA_SET_ATTRIBUTES), sizeof(uint64_t)); 484 stripe->dmdsa->DataSetRangesLength = dev->num_trim_entries * sizeof(DEVICE_DATA_SET_RANGE); 485 486 ranges = (DEVICE_DATA_SET_RANGE*)((uint8_t*)stripe->dmdsa + stripe->dmdsa->DataSetRangesOffset); 487 488 i = 0; 489 490 le2 = dev->trim_list.Flink; 491 while (le2 != &dev->trim_list) { 492 space* s = CONTAINING_RECORD(le2, space, list_entry); 493 494 ranges[i].StartingOffset = s->address; 495 ranges[i].LengthInBytes = s->size; 496 i++; 497 498 le2 = le2->Flink; 499 } 500 501 stripe->Irp = IoAllocateIrp(dev->devobj->StackSize, false); 502 503 if (!stripe->Irp) { 504 ERR("IoAllocateIrp failed\n"); 505 goto nextdev; 506 } 507 508 IrpSp = IoGetNextIrpStackLocation(stripe->Irp); 509 IrpSp->MajorFunction = IRP_MJ_DEVICE_CONTROL; 510 IrpSp->FileObject = dev->fileobj; 511 512 IrpSp->Parameters.DeviceIoControl.IoControlCode = IOCTL_STORAGE_MANAGE_DATA_SET_ATTRIBUTES; 513 IrpSp->Parameters.DeviceIoControl.InputBufferLength = datalen; 514 IrpSp->Parameters.DeviceIoControl.OutputBufferLength = 0; 515 516 stripe->Irp->AssociatedIrp.SystemBuffer = stripe->dmdsa; 517 stripe->Irp->Flags |= IRP_BUFFERED_IO; 518 stripe->Irp->UserBuffer = NULL; 519 stripe->Irp->UserIosb = &stripe->iosb; 520 521 IoSetCompletionRoutine(stripe->Irp, ioctl_completion, &context, true, true, true); 522 523 IoCallDriver(dev->devobj, stripe->Irp); 524 525 nextdev: 526 #endif 527 while (!IsListEmpty(&dev->trim_list)) { 528 space* s = CONTAINING_RECORD(RemoveHeadList(&dev->trim_list), space, list_entry); 529 ExFreePool(s); 530 } 531 532 dev->num_trim_entries = 0; 533 534 #ifndef DEBUG_TRIM_EMULATION 535 num++; 536 #endif 537 } 538 539 le = le->Flink; 540 } 541 542 #ifndef DEBUG_TRIM_EMULATION 543 KeWaitForSingleObject(&context.Event, Executive, KernelMode, false, NULL); 544 545 for (num = 0; num < total_num; num++) { 546 if (context.stripes[num].dmdsa) 547 ExFreePool(context.stripes[num].dmdsa); 548 549 if (context.stripes[num].Irp) 550 IoFreeIrp(context.stripes[num].Irp); 551 } 552 553 ExFreePool(context.stripes); 554 #endif 555 } 556 } 557 558 static bool trees_consistent(device_extension* Vcb) { 559 ULONG maxsize = Vcb->superblock.node_size - sizeof(tree_header); 560 LIST_ENTRY* le; 561 562 le = Vcb->trees.Flink; 563 while (le != &Vcb->trees) { 564 tree* t = CONTAINING_RECORD(le, tree, list_entry); 565 566 if (t->write) { 567 if (t->header.num_items == 0 && t->parent) { 568 #ifdef DEBUG_WRITE_LOOPS 569 ERR("empty tree found, looping again\n"); 570 #endif 571 return false; 572 } 573 574 if (t->size > maxsize) { 575 #ifdef DEBUG_WRITE_LOOPS 576 ERR("overlarge tree found (%u > %u), looping again\n", t->size, maxsize); 577 #endif 578 return false; 579 } 580 581 if (!t->has_new_address) { 582 #ifdef DEBUG_WRITE_LOOPS 583 ERR("tree found without new address, looping again\n"); 584 #endif 585 return false; 586 } 587 } 588 589 le = le->Flink; 590 } 591 592 return true; 593 } 594 595 static NTSTATUS add_parents(device_extension* Vcb, PIRP Irp) { 596 ULONG level; 597 LIST_ENTRY* le; 598 599 for (level = 0; level <= 255; level++) { 600 bool nothing_found = true; 601 602 TRACE("level = %u\n", level); 603 604 le = Vcb->trees.Flink; 605 while (le != &Vcb->trees) { 606 tree* t = CONTAINING_RECORD(le, tree, list_entry); 607 608 if (t->write && t->header.level == level) { 609 TRACE("tree %p: root = %I64x, level = %x, parent = %p\n", t, t->header.tree_id, t->header.level, t->parent); 610 611 nothing_found = false; 612 613 if (t->parent) { 614 if (!t->parent->write) 615 TRACE("adding tree %p (level %x)\n", t->parent, t->header.level); 616 617 t->parent->write = true; 618 } else if (t->root != Vcb->root_root && t->root != Vcb->chunk_root) { 619 KEY searchkey; 620 traverse_ptr tp; 621 NTSTATUS Status; 622 #ifdef __REACTOS__ 623 tree* t2; 624 #endif 625 626 searchkey.obj_id = t->root->id; 627 searchkey.obj_type = TYPE_ROOT_ITEM; 628 searchkey.offset = 0xffffffffffffffff; 629 630 Status = find_item(Vcb, Vcb->root_root, &tp, &searchkey, false, Irp); 631 if (!NT_SUCCESS(Status)) { 632 ERR("error - find_item returned %08x\n", Status); 633 return Status; 634 } 635 636 if (tp.item->key.obj_id != searchkey.obj_id || tp.item->key.obj_type != searchkey.obj_type) { 637 ERR("could not find ROOT_ITEM for tree %I64x\n", searchkey.obj_id); 638 return STATUS_INTERNAL_ERROR; 639 } 640 641 if (tp.item->size < sizeof(ROOT_ITEM)) { // if not full length, delete and create new entry 642 ROOT_ITEM* ri = ExAllocatePoolWithTag(PagedPool, sizeof(ROOT_ITEM), ALLOC_TAG); 643 644 if (!ri) { 645 ERR("out of memory\n"); 646 return STATUS_INSUFFICIENT_RESOURCES; 647 } 648 649 RtlCopyMemory(ri, &t->root->root_item, sizeof(ROOT_ITEM)); 650 651 Status = delete_tree_item(Vcb, &tp); 652 if (!NT_SUCCESS(Status)) { 653 ERR("delete_tree_item returned %08x\n", Status); 654 ExFreePool(ri); 655 return Status; 656 } 657 658 Status = insert_tree_item(Vcb, Vcb->root_root, tp.item->key.obj_id, tp.item->key.obj_type, tp.item->key.offset, ri, sizeof(ROOT_ITEM), NULL, Irp); 659 if (!NT_SUCCESS(Status)) { 660 ERR("insert_tree_item returned %08x\n", Status); 661 ExFreePool(ri); 662 return Status; 663 } 664 } 665 666 #ifndef __REACTOS__ 667 tree* t2 = tp.tree; 668 #else 669 t2 = tp.tree; 670 #endif 671 while (t2) { 672 t2->write = true; 673 674 t2 = t2->parent; 675 } 676 } 677 } 678 679 le = le->Flink; 680 } 681 682 if (nothing_found) 683 break; 684 } 685 686 return STATUS_SUCCESS; 687 } 688 689 static void add_parents_to_cache(tree* t) { 690 while (t->parent) { 691 t = t->parent; 692 t->write = true; 693 } 694 } 695 696 static bool insert_tree_extent_skinny(device_extension* Vcb, uint8_t level, uint64_t root_id, chunk* c, uint64_t address, PIRP Irp, LIST_ENTRY* rollback) { 697 NTSTATUS Status; 698 EXTENT_ITEM_SKINNY_METADATA* eism; 699 traverse_ptr insert_tp; 700 701 eism = ExAllocatePoolWithTag(PagedPool, sizeof(EXTENT_ITEM_SKINNY_METADATA), ALLOC_TAG); 702 if (!eism) { 703 ERR("out of memory\n"); 704 return false; 705 } 706 707 eism->ei.refcount = 1; 708 eism->ei.generation = Vcb->superblock.generation; 709 eism->ei.flags = EXTENT_ITEM_TREE_BLOCK; 710 eism->type = TYPE_TREE_BLOCK_REF; 711 eism->tbr.offset = root_id; 712 713 Status = insert_tree_item(Vcb, Vcb->extent_root, address, TYPE_METADATA_ITEM, level, eism, sizeof(EXTENT_ITEM_SKINNY_METADATA), &insert_tp, Irp); 714 if (!NT_SUCCESS(Status)) { 715 ERR("insert_tree_item returned %08x\n", Status); 716 ExFreePool(eism); 717 return false; 718 } 719 720 acquire_chunk_lock(c, Vcb); 721 722 space_list_subtract(c, false, address, Vcb->superblock.node_size, rollback); 723 724 release_chunk_lock(c, Vcb); 725 726 add_parents_to_cache(insert_tp.tree); 727 728 return true; 729 } 730 731 bool find_metadata_address_in_chunk(device_extension* Vcb, chunk* c, uint64_t* address) { 732 LIST_ENTRY* le; 733 space* s; 734 735 TRACE("(%p, %I64x, %p)\n", Vcb, c->offset, address); 736 737 if (Vcb->superblock.node_size > c->chunk_item->size - c->used) 738 return false; 739 740 if (!c->cache_loaded) { 741 NTSTATUS Status = load_cache_chunk(Vcb, c, NULL); 742 743 if (!NT_SUCCESS(Status)) { 744 ERR("load_cache_chunk returned %08x\n", Status); 745 return false; 746 } 747 } 748 749 if (IsListEmpty(&c->space_size)) 750 return false; 751 752 if (!c->last_alloc_set) { 753 s = CONTAINING_RECORD(c->space.Blink, space, list_entry); 754 755 c->last_alloc = s->address; 756 c->last_alloc_set = true; 757 758 if (s->size >= Vcb->superblock.node_size) { 759 *address = s->address; 760 c->last_alloc += Vcb->superblock.node_size; 761 return true; 762 } 763 } 764 765 le = c->space.Flink; 766 while (le != &c->space) { 767 s = CONTAINING_RECORD(le, space, list_entry); 768 769 if (s->address <= c->last_alloc && s->address + s->size >= c->last_alloc + Vcb->superblock.node_size) { 770 *address = c->last_alloc; 771 c->last_alloc += Vcb->superblock.node_size; 772 return true; 773 } 774 775 le = le->Flink; 776 } 777 778 le = c->space_size.Flink; 779 while (le != &c->space_size) { 780 s = CONTAINING_RECORD(le, space, list_entry_size); 781 782 if (s->size == Vcb->superblock.node_size) { 783 *address = s->address; 784 c->last_alloc = s->address + Vcb->superblock.node_size; 785 return true; 786 } else if (s->size < Vcb->superblock.node_size) { 787 if (le == c->space_size.Flink) 788 return false; 789 790 s = CONTAINING_RECORD(le->Blink, space, list_entry_size); 791 792 *address = s->address; 793 c->last_alloc = s->address + Vcb->superblock.node_size; 794 795 return true; 796 } 797 798 le = le->Flink; 799 } 800 801 s = CONTAINING_RECORD(c->space_size.Blink, space, list_entry_size); 802 803 if (s->size > Vcb->superblock.node_size) { 804 *address = s->address; 805 c->last_alloc = s->address + Vcb->superblock.node_size; 806 return true; 807 } 808 809 return false; 810 } 811 812 static bool insert_tree_extent(device_extension* Vcb, uint8_t level, uint64_t root_id, chunk* c, uint64_t* new_address, PIRP Irp, LIST_ENTRY* rollback) { 813 NTSTATUS Status; 814 uint64_t address; 815 EXTENT_ITEM_TREE2* eit2; 816 traverse_ptr insert_tp; 817 818 TRACE("(%p, %x, %I64x, %p, %p, %p, %p)\n", Vcb, level, root_id, c, new_address, rollback); 819 820 if (!find_metadata_address_in_chunk(Vcb, c, &address)) 821 return false; 822 823 if (Vcb->superblock.incompat_flags & BTRFS_INCOMPAT_FLAGS_SKINNY_METADATA) { 824 bool b = insert_tree_extent_skinny(Vcb, level, root_id, c, address, Irp, rollback); 825 826 if (b) 827 *new_address = address; 828 829 return b; 830 } 831 832 eit2 = ExAllocatePoolWithTag(PagedPool, sizeof(EXTENT_ITEM_TREE2), ALLOC_TAG); 833 if (!eit2) { 834 ERR("out of memory\n"); 835 return false; 836 } 837 838 eit2->eit.extent_item.refcount = 1; 839 eit2->eit.extent_item.generation = Vcb->superblock.generation; 840 eit2->eit.extent_item.flags = EXTENT_ITEM_TREE_BLOCK; 841 eit2->eit.level = level; 842 eit2->type = TYPE_TREE_BLOCK_REF; 843 eit2->tbr.offset = root_id; 844 845 Status = insert_tree_item(Vcb, Vcb->extent_root, address, TYPE_EXTENT_ITEM, Vcb->superblock.node_size, eit2, sizeof(EXTENT_ITEM_TREE2), &insert_tp, Irp); 846 if (!NT_SUCCESS(Status)) { 847 ERR("insert_tree_item returned %08x\n", Status); 848 ExFreePool(eit2); 849 return false; 850 } 851 852 acquire_chunk_lock(c, Vcb); 853 854 space_list_subtract(c, false, address, Vcb->superblock.node_size, rollback); 855 856 release_chunk_lock(c, Vcb); 857 858 add_parents_to_cache(insert_tp.tree); 859 860 *new_address = address; 861 862 return true; 863 } 864 865 NTSTATUS get_tree_new_address(device_extension* Vcb, tree* t, PIRP Irp, LIST_ENTRY* rollback) { 866 NTSTATUS Status; 867 chunk *origchunk = NULL, *c; 868 LIST_ENTRY* le; 869 uint64_t flags, addr; 870 871 if (t->root->id == BTRFS_ROOT_CHUNK) 872 flags = Vcb->system_flags; 873 else 874 flags = Vcb->metadata_flags; 875 876 if (t->has_address) { 877 origchunk = get_chunk_from_address(Vcb, t->header.address); 878 879 if (origchunk && !origchunk->readonly && !origchunk->reloc && origchunk->chunk_item->type == flags && 880 insert_tree_extent(Vcb, t->header.level, t->root->id, origchunk, &addr, Irp, rollback)) { 881 t->new_address = addr; 882 t->has_new_address = true; 883 return STATUS_SUCCESS; 884 } 885 } 886 887 ExAcquireResourceExclusiveLite(&Vcb->chunk_lock, true); 888 889 le = Vcb->chunks.Flink; 890 while (le != &Vcb->chunks) { 891 c = CONTAINING_RECORD(le, chunk, list_entry); 892 893 if (!c->readonly && !c->reloc) { 894 acquire_chunk_lock(c, Vcb); 895 896 if (c != origchunk && c->chunk_item->type == flags && (c->chunk_item->size - c->used) >= Vcb->superblock.node_size) { 897 if (insert_tree_extent(Vcb, t->header.level, t->root->id, c, &addr, Irp, rollback)) { 898 release_chunk_lock(c, Vcb); 899 ExReleaseResourceLite(&Vcb->chunk_lock); 900 t->new_address = addr; 901 t->has_new_address = true; 902 return STATUS_SUCCESS; 903 } 904 } 905 906 release_chunk_lock(c, Vcb); 907 } 908 909 le = le->Flink; 910 } 911 912 // allocate new chunk if necessary 913 914 Status = alloc_chunk(Vcb, flags, &c, false); 915 916 if (!NT_SUCCESS(Status)) { 917 ERR("alloc_chunk returned %08x\n", Status); 918 ExReleaseResourceLite(&Vcb->chunk_lock); 919 return Status; 920 } 921 922 acquire_chunk_lock(c, Vcb); 923 924 if ((c->chunk_item->size - c->used) >= Vcb->superblock.node_size) { 925 if (insert_tree_extent(Vcb, t->header.level, t->root->id, c, &addr, Irp, rollback)) { 926 release_chunk_lock(c, Vcb); 927 ExReleaseResourceLite(&Vcb->chunk_lock); 928 t->new_address = addr; 929 t->has_new_address = true; 930 return STATUS_SUCCESS; 931 } 932 } 933 934 release_chunk_lock(c, Vcb); 935 936 ExReleaseResourceLite(&Vcb->chunk_lock); 937 938 ERR("couldn't find any metadata chunks with %x bytes free\n", Vcb->superblock.node_size); 939 940 return STATUS_DISK_FULL; 941 } 942 943 static NTSTATUS reduce_tree_extent(device_extension* Vcb, uint64_t address, tree* t, uint64_t parent_root, uint8_t level, PIRP Irp, LIST_ENTRY* rollback) { 944 NTSTATUS Status; 945 uint64_t rc, root; 946 947 TRACE("(%p, %I64x, %p)\n", Vcb, address, t); 948 949 rc = get_extent_refcount(Vcb, address, Vcb->superblock.node_size, Irp); 950 if (rc == 0) { 951 ERR("error - refcount for extent %I64x was 0\n", address); 952 return STATUS_INTERNAL_ERROR; 953 } 954 955 if (!t || t->parent) 956 root = parent_root; 957 else 958 root = t->header.tree_id; 959 960 Status = decrease_extent_refcount_tree(Vcb, address, Vcb->superblock.node_size, root, level, Irp); 961 if (!NT_SUCCESS(Status)) { 962 ERR("decrease_extent_refcount_tree returned %08x\n", Status); 963 return Status; 964 } 965 966 if (rc == 1) { 967 chunk* c = get_chunk_from_address(Vcb, address); 968 969 if (c) { 970 acquire_chunk_lock(c, Vcb); 971 972 if (!c->cache_loaded) { 973 Status = load_cache_chunk(Vcb, c, NULL); 974 975 if (!NT_SUCCESS(Status)) { 976 ERR("load_cache_chunk returned %08x\n", Status); 977 release_chunk_lock(c, Vcb); 978 return Status; 979 } 980 } 981 982 c->used -= Vcb->superblock.node_size; 983 984 space_list_add(c, address, Vcb->superblock.node_size, rollback); 985 986 release_chunk_lock(c, Vcb); 987 } else 988 ERR("could not find chunk for address %I64x\n", address); 989 } 990 991 return STATUS_SUCCESS; 992 } 993 994 static NTSTATUS add_changed_extent_ref_edr(changed_extent* ce, EXTENT_DATA_REF* edr, bool old) { 995 LIST_ENTRY *le2, *list; 996 changed_extent_ref* cer; 997 998 list = old ? &ce->old_refs : &ce->refs; 999 1000 le2 = list->Flink; 1001 while (le2 != list) { 1002 cer = CONTAINING_RECORD(le2, changed_extent_ref, list_entry); 1003 1004 if (cer->type == TYPE_EXTENT_DATA_REF && cer->edr.root == edr->root && cer->edr.objid == edr->objid && cer->edr.offset == edr->offset) { 1005 cer->edr.count += edr->count; 1006 goto end; 1007 } 1008 1009 le2 = le2->Flink; 1010 } 1011 1012 cer = ExAllocatePoolWithTag(PagedPool, sizeof(changed_extent_ref), ALLOC_TAG); 1013 if (!cer) { 1014 ERR("out of memory\n"); 1015 return STATUS_INSUFFICIENT_RESOURCES; 1016 } 1017 1018 cer->type = TYPE_EXTENT_DATA_REF; 1019 RtlCopyMemory(&cer->edr, edr, sizeof(EXTENT_DATA_REF)); 1020 InsertTailList(list, &cer->list_entry); 1021 1022 end: 1023 if (old) 1024 ce->old_count += edr->count; 1025 else 1026 ce->count += edr->count; 1027 1028 return STATUS_SUCCESS; 1029 } 1030 1031 static NTSTATUS add_changed_extent_ref_sdr(changed_extent* ce, SHARED_DATA_REF* sdr, bool old) { 1032 LIST_ENTRY *le2, *list; 1033 changed_extent_ref* cer; 1034 1035 list = old ? &ce->old_refs : &ce->refs; 1036 1037 le2 = list->Flink; 1038 while (le2 != list) { 1039 cer = CONTAINING_RECORD(le2, changed_extent_ref, list_entry); 1040 1041 if (cer->type == TYPE_SHARED_DATA_REF && cer->sdr.offset == sdr->offset) { 1042 cer->sdr.count += sdr->count; 1043 goto end; 1044 } 1045 1046 le2 = le2->Flink; 1047 } 1048 1049 cer = ExAllocatePoolWithTag(PagedPool, sizeof(changed_extent_ref), ALLOC_TAG); 1050 if (!cer) { 1051 ERR("out of memory\n"); 1052 return STATUS_INSUFFICIENT_RESOURCES; 1053 } 1054 1055 cer->type = TYPE_SHARED_DATA_REF; 1056 RtlCopyMemory(&cer->sdr, sdr, sizeof(SHARED_DATA_REF)); 1057 InsertTailList(list, &cer->list_entry); 1058 1059 end: 1060 if (old) 1061 ce->old_count += sdr->count; 1062 else 1063 ce->count += sdr->count; 1064 1065 return STATUS_SUCCESS; 1066 } 1067 1068 static bool shared_tree_is_unique(device_extension* Vcb, tree* t, PIRP Irp, LIST_ENTRY* rollback) { 1069 KEY searchkey; 1070 traverse_ptr tp; 1071 NTSTATUS Status; 1072 1073 if (!t->updated_extents && t->has_address) { 1074 Status = update_tree_extents(Vcb, t, Irp, rollback); 1075 if (!NT_SUCCESS(Status)) { 1076 ERR("update_tree_extents returned %08x\n", Status); 1077 return false; 1078 } 1079 } 1080 1081 searchkey.obj_id = t->header.address; 1082 searchkey.obj_type = Vcb->superblock.incompat_flags & BTRFS_INCOMPAT_FLAGS_SKINNY_METADATA ? TYPE_METADATA_ITEM : TYPE_EXTENT_ITEM; 1083 searchkey.offset = 0xffffffffffffffff; 1084 1085 Status = find_item(Vcb, Vcb->extent_root, &tp, &searchkey, false, Irp); 1086 if (!NT_SUCCESS(Status)) { 1087 ERR("error - find_item returned %08x\n", Status); 1088 return false; 1089 } 1090 1091 if (tp.item->key.obj_id == t->header.address && (tp.item->key.obj_type == TYPE_METADATA_ITEM || tp.item->key.obj_type == TYPE_EXTENT_ITEM)) 1092 return false; 1093 else 1094 return true; 1095 } 1096 1097 static NTSTATUS update_tree_extents(device_extension* Vcb, tree* t, PIRP Irp, LIST_ENTRY* rollback) { 1098 NTSTATUS Status; 1099 uint64_t rc = get_extent_refcount(Vcb, t->header.address, Vcb->superblock.node_size, Irp); 1100 uint64_t flags = get_extent_flags(Vcb, t->header.address, Irp); 1101 1102 if (rc == 0) { 1103 ERR("refcount for extent %I64x was 0\n", t->header.address); 1104 return STATUS_INTERNAL_ERROR; 1105 } 1106 1107 if (flags & EXTENT_ITEM_SHARED_BACKREFS || t->header.flags & HEADER_FLAG_SHARED_BACKREF || !(t->header.flags & HEADER_FLAG_MIXED_BACKREF)) { 1108 TREE_BLOCK_REF tbr; 1109 bool unique = rc > 1 ? false : (t->parent ? shared_tree_is_unique(Vcb, t->parent, Irp, rollback) : false); 1110 1111 if (t->header.level == 0) { 1112 LIST_ENTRY* le; 1113 1114 le = t->itemlist.Flink; 1115 while (le != &t->itemlist) { 1116 tree_data* td = CONTAINING_RECORD(le, tree_data, list_entry); 1117 1118 if (!td->inserted && td->key.obj_type == TYPE_EXTENT_DATA && td->size >= sizeof(EXTENT_DATA) - 1 + sizeof(EXTENT_DATA2)) { 1119 EXTENT_DATA* ed = (EXTENT_DATA*)td->data; 1120 1121 if (ed->type == EXTENT_TYPE_REGULAR || ed->type == EXTENT_TYPE_PREALLOC) { 1122 EXTENT_DATA2* ed2 = (EXTENT_DATA2*)ed->data; 1123 1124 if (ed2->size > 0) { 1125 EXTENT_DATA_REF edr; 1126 changed_extent* ce = NULL; 1127 chunk* c = get_chunk_from_address(Vcb, ed2->address); 1128 1129 if (c) { 1130 LIST_ENTRY* le2; 1131 1132 le2 = c->changed_extents.Flink; 1133 while (le2 != &c->changed_extents) { 1134 changed_extent* ce2 = CONTAINING_RECORD(le2, changed_extent, list_entry); 1135 1136 if (ce2->address == ed2->address) { 1137 ce = ce2; 1138 break; 1139 } 1140 1141 le2 = le2->Flink; 1142 } 1143 } 1144 1145 edr.root = t->root->id; 1146 edr.objid = td->key.obj_id; 1147 edr.offset = td->key.offset - ed2->offset; 1148 edr.count = 1; 1149 1150 if (ce) { 1151 Status = add_changed_extent_ref_edr(ce, &edr, true); 1152 if (!NT_SUCCESS(Status)) { 1153 ERR("add_changed_extent_ref_edr returned %08x\n", Status); 1154 return Status; 1155 } 1156 1157 Status = add_changed_extent_ref_edr(ce, &edr, false); 1158 if (!NT_SUCCESS(Status)) { 1159 ERR("add_changed_extent_ref_edr returned %08x\n", Status); 1160 return Status; 1161 } 1162 } 1163 1164 Status = increase_extent_refcount(Vcb, ed2->address, ed2->size, TYPE_EXTENT_DATA_REF, &edr, NULL, 0, Irp); 1165 if (!NT_SUCCESS(Status)) { 1166 ERR("increase_extent_refcount returned %08x\n", Status); 1167 return Status; 1168 } 1169 1170 if ((flags & EXTENT_ITEM_SHARED_BACKREFS && unique) || !(t->header.flags & HEADER_FLAG_MIXED_BACKREF)) { 1171 uint64_t sdrrc = find_extent_shared_data_refcount(Vcb, ed2->address, t->header.address, Irp); 1172 1173 if (sdrrc > 0) { 1174 SHARED_DATA_REF sdr; 1175 1176 sdr.offset = t->header.address; 1177 sdr.count = 1; 1178 1179 Status = decrease_extent_refcount(Vcb, ed2->address, ed2->size, TYPE_SHARED_DATA_REF, &sdr, NULL, 0, 1180 t->header.address, ce ? ce->superseded : false, Irp); 1181 if (!NT_SUCCESS(Status)) { 1182 ERR("decrease_extent_refcount returned %08x\n", Status); 1183 return Status; 1184 } 1185 1186 if (ce) { 1187 LIST_ENTRY* le2; 1188 1189 le2 = ce->refs.Flink; 1190 while (le2 != &ce->refs) { 1191 changed_extent_ref* cer = CONTAINING_RECORD(le2, changed_extent_ref, list_entry); 1192 1193 if (cer->type == TYPE_SHARED_DATA_REF && cer->sdr.offset == sdr.offset) { 1194 ce->count--; 1195 cer->sdr.count--; 1196 break; 1197 } 1198 1199 le2 = le2->Flink; 1200 } 1201 1202 le2 = ce->old_refs.Flink; 1203 while (le2 != &ce->old_refs) { 1204 changed_extent_ref* cer = CONTAINING_RECORD(le2, changed_extent_ref, list_entry); 1205 1206 if (cer->type == TYPE_SHARED_DATA_REF && cer->sdr.offset == sdr.offset) { 1207 ce->old_count--; 1208 1209 if (cer->sdr.count > 1) 1210 cer->sdr.count--; 1211 else { 1212 RemoveEntryList(&cer->list_entry); 1213 ExFreePool(cer); 1214 } 1215 1216 break; 1217 } 1218 1219 le2 = le2->Flink; 1220 } 1221 } 1222 } 1223 } 1224 1225 // FIXME - clear shared flag if unique? 1226 } 1227 } 1228 } 1229 1230 le = le->Flink; 1231 } 1232 } else { 1233 LIST_ENTRY* le; 1234 1235 le = t->itemlist.Flink; 1236 while (le != &t->itemlist) { 1237 tree_data* td = CONTAINING_RECORD(le, tree_data, list_entry); 1238 1239 if (!td->inserted) { 1240 tbr.offset = t->root->id; 1241 1242 Status = increase_extent_refcount(Vcb, td->treeholder.address, Vcb->superblock.node_size, TYPE_TREE_BLOCK_REF, 1243 &tbr, &td->key, t->header.level - 1, Irp); 1244 if (!NT_SUCCESS(Status)) { 1245 ERR("increase_extent_refcount returned %08x\n", Status); 1246 return Status; 1247 } 1248 1249 if (unique || !(t->header.flags & HEADER_FLAG_MIXED_BACKREF)) { 1250 uint64_t sbrrc = find_extent_shared_tree_refcount(Vcb, td->treeholder.address, t->header.address, Irp); 1251 1252 if (sbrrc > 0) { 1253 SHARED_BLOCK_REF sbr; 1254 1255 sbr.offset = t->header.address; 1256 1257 Status = decrease_extent_refcount(Vcb, td->treeholder.address, Vcb->superblock.node_size, TYPE_SHARED_BLOCK_REF, &sbr, NULL, 0, 1258 t->header.address, false, Irp); 1259 if (!NT_SUCCESS(Status)) { 1260 ERR("decrease_extent_refcount returned %08x\n", Status); 1261 return Status; 1262 } 1263 } 1264 } 1265 1266 // FIXME - clear shared flag if unique? 1267 } 1268 1269 le = le->Flink; 1270 } 1271 } 1272 1273 if (unique) { 1274 uint64_t sbrrc = find_extent_shared_tree_refcount(Vcb, t->header.address, t->parent->header.address, Irp); 1275 1276 if (sbrrc == 1) { 1277 SHARED_BLOCK_REF sbr; 1278 1279 sbr.offset = t->parent->header.address; 1280 1281 Status = decrease_extent_refcount(Vcb, t->header.address, Vcb->superblock.node_size, TYPE_SHARED_BLOCK_REF, &sbr, NULL, 0, 1282 t->parent->header.address, false, Irp); 1283 if (!NT_SUCCESS(Status)) { 1284 ERR("decrease_extent_refcount returned %08x\n", Status); 1285 return Status; 1286 } 1287 } 1288 } 1289 1290 if (t->parent) 1291 tbr.offset = t->parent->header.tree_id; 1292 else 1293 tbr.offset = t->header.tree_id; 1294 1295 Status = increase_extent_refcount(Vcb, t->header.address, Vcb->superblock.node_size, TYPE_TREE_BLOCK_REF, &tbr, 1296 t->parent ? &t->paritem->key : NULL, t->header.level, Irp); 1297 if (!NT_SUCCESS(Status)) { 1298 ERR("increase_extent_refcount returned %08x\n", Status); 1299 return Status; 1300 } 1301 1302 // FIXME - clear shared flag if unique? 1303 1304 t->header.flags &= ~HEADER_FLAG_SHARED_BACKREF; 1305 } 1306 1307 if (rc > 1 || t->header.tree_id == t->root->id) { 1308 Status = reduce_tree_extent(Vcb, t->header.address, t, t->parent ? t->parent->header.tree_id : t->header.tree_id, t->header.level, Irp, rollback); 1309 1310 if (!NT_SUCCESS(Status)) { 1311 ERR("reduce_tree_extent returned %08x\n", Status); 1312 return Status; 1313 } 1314 } 1315 1316 t->has_address = false; 1317 1318 if ((rc > 1 || t->header.tree_id != t->root->id) && !(flags & EXTENT_ITEM_SHARED_BACKREFS)) { 1319 if (t->header.tree_id == t->root->id) { 1320 flags |= EXTENT_ITEM_SHARED_BACKREFS; 1321 update_extent_flags(Vcb, t->header.address, flags, Irp); 1322 } 1323 1324 if (t->header.level > 0) { 1325 LIST_ENTRY* le; 1326 1327 le = t->itemlist.Flink; 1328 while (le != &t->itemlist) { 1329 tree_data* td = CONTAINING_RECORD(le, tree_data, list_entry); 1330 1331 if (!td->inserted) { 1332 if (t->header.tree_id == t->root->id) { 1333 SHARED_BLOCK_REF sbr; 1334 1335 sbr.offset = t->header.address; 1336 1337 Status = increase_extent_refcount(Vcb, td->treeholder.address, Vcb->superblock.node_size, TYPE_SHARED_BLOCK_REF, &sbr, &td->key, t->header.level - 1, Irp); 1338 } else { 1339 TREE_BLOCK_REF tbr; 1340 1341 tbr.offset = t->root->id; 1342 1343 Status = increase_extent_refcount(Vcb, td->treeholder.address, Vcb->superblock.node_size, TYPE_TREE_BLOCK_REF, &tbr, &td->key, t->header.level - 1, Irp); 1344 } 1345 1346 if (!NT_SUCCESS(Status)) { 1347 ERR("increase_extent_refcount returned %08x\n", Status); 1348 return Status; 1349 } 1350 } 1351 1352 le = le->Flink; 1353 } 1354 } else { 1355 LIST_ENTRY* le; 1356 1357 le = t->itemlist.Flink; 1358 while (le != &t->itemlist) { 1359 tree_data* td = CONTAINING_RECORD(le, tree_data, list_entry); 1360 1361 if (!td->inserted && td->key.obj_type == TYPE_EXTENT_DATA && td->size >= sizeof(EXTENT_DATA) - 1 + sizeof(EXTENT_DATA2)) { 1362 EXTENT_DATA* ed = (EXTENT_DATA*)td->data; 1363 1364 if (ed->type == EXTENT_TYPE_REGULAR || ed->type == EXTENT_TYPE_PREALLOC) { 1365 EXTENT_DATA2* ed2 = (EXTENT_DATA2*)ed->data; 1366 1367 if (ed2->size > 0) { 1368 changed_extent* ce = NULL; 1369 chunk* c = get_chunk_from_address(Vcb, ed2->address); 1370 1371 if (c) { 1372 LIST_ENTRY* le2; 1373 1374 le2 = c->changed_extents.Flink; 1375 while (le2 != &c->changed_extents) { 1376 changed_extent* ce2 = CONTAINING_RECORD(le2, changed_extent, list_entry); 1377 1378 if (ce2->address == ed2->address) { 1379 ce = ce2; 1380 break; 1381 } 1382 1383 le2 = le2->Flink; 1384 } 1385 } 1386 1387 if (t->header.tree_id == t->root->id) { 1388 SHARED_DATA_REF sdr; 1389 1390 sdr.offset = t->header.address; 1391 sdr.count = 1; 1392 1393 if (ce) { 1394 Status = add_changed_extent_ref_sdr(ce, &sdr, true); 1395 if (!NT_SUCCESS(Status)) { 1396 ERR("add_changed_extent_ref_edr returned %08x\n", Status); 1397 return Status; 1398 } 1399 1400 Status = add_changed_extent_ref_sdr(ce, &sdr, false); 1401 if (!NT_SUCCESS(Status)) { 1402 ERR("add_changed_extent_ref_edr returned %08x\n", Status); 1403 return Status; 1404 } 1405 } 1406 1407 Status = increase_extent_refcount(Vcb, ed2->address, ed2->size, TYPE_SHARED_DATA_REF, &sdr, NULL, 0, Irp); 1408 } else { 1409 EXTENT_DATA_REF edr; 1410 1411 edr.root = t->root->id; 1412 edr.objid = td->key.obj_id; 1413 edr.offset = td->key.offset - ed2->offset; 1414 edr.count = 1; 1415 1416 if (ce) { 1417 Status = add_changed_extent_ref_edr(ce, &edr, true); 1418 if (!NT_SUCCESS(Status)) { 1419 ERR("add_changed_extent_ref_edr returned %08x\n", Status); 1420 return Status; 1421 } 1422 1423 Status = add_changed_extent_ref_edr(ce, &edr, false); 1424 if (!NT_SUCCESS(Status)) { 1425 ERR("add_changed_extent_ref_edr returned %08x\n", Status); 1426 return Status; 1427 } 1428 } 1429 1430 Status = increase_extent_refcount(Vcb, ed2->address, ed2->size, TYPE_EXTENT_DATA_REF, &edr, NULL, 0, Irp); 1431 } 1432 1433 if (!NT_SUCCESS(Status)) { 1434 ERR("increase_extent_refcount returned %08x\n", Status); 1435 return Status; 1436 } 1437 } 1438 } 1439 } 1440 1441 le = le->Flink; 1442 } 1443 } 1444 } 1445 1446 t->updated_extents = true; 1447 t->header.tree_id = t->root->id; 1448 1449 return STATUS_SUCCESS; 1450 } 1451 1452 static NTSTATUS allocate_tree_extents(device_extension* Vcb, PIRP Irp, LIST_ENTRY* rollback) { 1453 LIST_ENTRY* le; 1454 NTSTATUS Status; 1455 bool changed = false; 1456 uint8_t max_level = 0, level; 1457 1458 TRACE("(%p)\n", Vcb); 1459 1460 le = Vcb->trees.Flink; 1461 while (le != &Vcb->trees) { 1462 tree* t = CONTAINING_RECORD(le, tree, list_entry); 1463 1464 if (t->write && !t->has_new_address) { 1465 chunk* c; 1466 1467 if (t->has_address) { 1468 c = get_chunk_from_address(Vcb, t->header.address); 1469 1470 if (c) { 1471 if (!c->cache_loaded) { 1472 acquire_chunk_lock(c, Vcb); 1473 1474 if (!c->cache_loaded) { 1475 Status = load_cache_chunk(Vcb, c, NULL); 1476 1477 if (!NT_SUCCESS(Status)) { 1478 ERR("load_cache_chunk returned %08x\n", Status); 1479 release_chunk_lock(c, Vcb); 1480 return Status; 1481 } 1482 } 1483 1484 release_chunk_lock(c, Vcb); 1485 } 1486 } 1487 } 1488 1489 Status = get_tree_new_address(Vcb, t, Irp, rollback); 1490 if (!NT_SUCCESS(Status)) { 1491 ERR("get_tree_new_address returned %08x\n", Status); 1492 return Status; 1493 } 1494 1495 TRACE("allocated extent %I64x\n", t->new_address); 1496 1497 c = get_chunk_from_address(Vcb, t->new_address); 1498 1499 if (c) 1500 c->used += Vcb->superblock.node_size; 1501 else { 1502 ERR("could not find chunk for address %I64x\n", t->new_address); 1503 return STATUS_INTERNAL_ERROR; 1504 } 1505 1506 changed = true; 1507 1508 if (t->header.level > max_level) 1509 max_level = t->header.level; 1510 } 1511 1512 le = le->Flink; 1513 } 1514 1515 if (!changed) 1516 return STATUS_SUCCESS; 1517 1518 level = max_level; 1519 do { 1520 le = Vcb->trees.Flink; 1521 while (le != &Vcb->trees) { 1522 tree* t = CONTAINING_RECORD(le, tree, list_entry); 1523 1524 if (t->write && !t->updated_extents && t->has_address && t->header.level == level) { 1525 Status = update_tree_extents(Vcb, t, Irp, rollback); 1526 if (!NT_SUCCESS(Status)) { 1527 ERR("update_tree_extents returned %08x\n", Status); 1528 return Status; 1529 } 1530 } 1531 1532 le = le->Flink; 1533 } 1534 1535 if (level == 0) 1536 break; 1537 1538 level--; 1539 } while (true); 1540 1541 return STATUS_SUCCESS; 1542 } 1543 1544 static NTSTATUS update_root_root(device_extension* Vcb, bool no_cache, PIRP Irp, LIST_ENTRY* rollback) { 1545 LIST_ENTRY* le; 1546 NTSTATUS Status; 1547 1548 TRACE("(%p)\n", Vcb); 1549 1550 le = Vcb->trees.Flink; 1551 while (le != &Vcb->trees) { 1552 tree* t = CONTAINING_RECORD(le, tree, list_entry); 1553 1554 if (t->write && !t->parent) { 1555 if (t->root != Vcb->root_root && t->root != Vcb->chunk_root) { 1556 KEY searchkey; 1557 traverse_ptr tp; 1558 1559 searchkey.obj_id = t->root->id; 1560 searchkey.obj_type = TYPE_ROOT_ITEM; 1561 searchkey.offset = 0xffffffffffffffff; 1562 1563 Status = find_item(Vcb, Vcb->root_root, &tp, &searchkey, false, Irp); 1564 if (!NT_SUCCESS(Status)) { 1565 ERR("error - find_item returned %08x\n", Status); 1566 return Status; 1567 } 1568 1569 if (tp.item->key.obj_id != searchkey.obj_id || tp.item->key.obj_type != searchkey.obj_type) { 1570 ERR("could not find ROOT_ITEM for tree %I64x\n", searchkey.obj_id); 1571 return STATUS_INTERNAL_ERROR; 1572 } 1573 1574 TRACE("updating the address for root %I64x to %I64x\n", searchkey.obj_id, t->new_address); 1575 1576 t->root->root_item.block_number = t->new_address; 1577 t->root->root_item.root_level = t->header.level; 1578 t->root->root_item.generation = Vcb->superblock.generation; 1579 t->root->root_item.generation2 = Vcb->superblock.generation; 1580 1581 // item is guaranteed to be at least sizeof(ROOT_ITEM), due to add_parents 1582 1583 RtlCopyMemory(tp.item->data, &t->root->root_item, sizeof(ROOT_ITEM)); 1584 } 1585 1586 t->root->treeholder.address = t->new_address; 1587 t->root->treeholder.generation = Vcb->superblock.generation; 1588 } 1589 1590 le = le->Flink; 1591 } 1592 1593 if (!no_cache && !(Vcb->superblock.compat_ro_flags & BTRFS_COMPAT_RO_FLAGS_FREE_SPACE_CACHE)) { 1594 ExAcquireResourceSharedLite(&Vcb->chunk_lock, true); 1595 Status = update_chunk_caches(Vcb, Irp, rollback); 1596 ExReleaseResourceLite(&Vcb->chunk_lock); 1597 1598 if (!NT_SUCCESS(Status)) { 1599 ERR("update_chunk_caches returned %08x\n", Status); 1600 return Status; 1601 } 1602 } 1603 1604 return STATUS_SUCCESS; 1605 } 1606 1607 NTSTATUS do_tree_writes(device_extension* Vcb, LIST_ENTRY* tree_writes, bool no_free) { 1608 chunk* c; 1609 LIST_ENTRY* le; 1610 tree_write* tw; 1611 NTSTATUS Status; 1612 ULONG i, num_bits; 1613 write_data_context* wtc; 1614 ULONG bit_num = 0; 1615 bool raid56 = false; 1616 1617 // merge together runs 1618 c = NULL; 1619 le = tree_writes->Flink; 1620 while (le != tree_writes) { 1621 tw = CONTAINING_RECORD(le, tree_write, list_entry); 1622 1623 if (!c || tw->address < c->offset || tw->address >= c->offset + c->chunk_item->size) 1624 c = get_chunk_from_address(Vcb, tw->address); 1625 else { 1626 tree_write* tw2 = CONTAINING_RECORD(le->Blink, tree_write, list_entry); 1627 1628 if (tw->address == tw2->address + tw2->length) { 1629 uint8_t* data = ExAllocatePoolWithTag(NonPagedPool, tw2->length + tw->length, ALLOC_TAG); 1630 1631 if (!data) { 1632 ERR("out of memory\n"); 1633 return STATUS_INSUFFICIENT_RESOURCES; 1634 } 1635 1636 RtlCopyMemory(data, tw2->data, tw2->length); 1637 RtlCopyMemory(&data[tw2->length], tw->data, tw->length); 1638 1639 if (!no_free || tw2->allocated) 1640 ExFreePool(tw2->data); 1641 1642 tw2->data = data; 1643 tw2->length += tw->length; 1644 tw2->allocated = true; 1645 1646 if (!no_free || tw->allocated) 1647 ExFreePool(tw->data); 1648 1649 RemoveEntryList(&tw->list_entry); 1650 ExFreePool(tw); 1651 1652 le = tw2->list_entry.Flink; 1653 continue; 1654 } 1655 } 1656 1657 tw->c = c; 1658 1659 if (c->chunk_item->type & (BLOCK_FLAG_RAID5 | BLOCK_FLAG_RAID6)) 1660 raid56 = true; 1661 1662 le = le->Flink; 1663 } 1664 1665 num_bits = 0; 1666 1667 le = tree_writes->Flink; 1668 while (le != tree_writes) { 1669 tw = CONTAINING_RECORD(le, tree_write, list_entry); 1670 1671 num_bits++; 1672 1673 le = le->Flink; 1674 } 1675 1676 wtc = ExAllocatePoolWithTag(NonPagedPool, sizeof(write_data_context) * num_bits, ALLOC_TAG); 1677 if (!wtc) { 1678 ERR("out of memory\n"); 1679 return STATUS_INSUFFICIENT_RESOURCES; 1680 } 1681 1682 le = tree_writes->Flink; 1683 1684 while (le != tree_writes) { 1685 tw = CONTAINING_RECORD(le, tree_write, list_entry); 1686 1687 TRACE("address: %I64x, size: %x\n", tw->address, tw->length); 1688 1689 KeInitializeEvent(&wtc[bit_num].Event, NotificationEvent, false); 1690 InitializeListHead(&wtc[bit_num].stripes); 1691 wtc[bit_num].need_wait = false; 1692 wtc[bit_num].stripes_left = 0; 1693 wtc[bit_num].parity1 = wtc[bit_num].parity2 = wtc[bit_num].scratch = NULL; 1694 wtc[bit_num].mdl = wtc[bit_num].parity1_mdl = wtc[bit_num].parity2_mdl = NULL; 1695 1696 Status = write_data(Vcb, tw->address, tw->data, tw->length, &wtc[bit_num], NULL, NULL, false, 0, HighPagePriority); 1697 if (!NT_SUCCESS(Status)) { 1698 ERR("write_data returned %08x\n", Status); 1699 1700 for (i = 0; i < num_bits; i++) { 1701 free_write_data_stripes(&wtc[i]); 1702 } 1703 ExFreePool(wtc); 1704 1705 return Status; 1706 } 1707 1708 bit_num++; 1709 1710 le = le->Flink; 1711 } 1712 1713 for (i = 0; i < num_bits; i++) { 1714 if (wtc[i].stripes.Flink != &wtc[i].stripes) { 1715 // launch writes and wait 1716 le = wtc[i].stripes.Flink; 1717 while (le != &wtc[i].stripes) { 1718 write_data_stripe* stripe = CONTAINING_RECORD(le, write_data_stripe, list_entry); 1719 1720 if (stripe->status != WriteDataStatus_Ignore) { 1721 wtc[i].need_wait = true; 1722 IoCallDriver(stripe->device->devobj, stripe->Irp); 1723 } 1724 1725 le = le->Flink; 1726 } 1727 } 1728 } 1729 1730 for (i = 0; i < num_bits; i++) { 1731 if (wtc[i].need_wait) 1732 KeWaitForSingleObject(&wtc[i].Event, Executive, KernelMode, false, NULL); 1733 } 1734 1735 for (i = 0; i < num_bits; i++) { 1736 le = wtc[i].stripes.Flink; 1737 while (le != &wtc[i].stripes) { 1738 write_data_stripe* stripe = CONTAINING_RECORD(le, write_data_stripe, list_entry); 1739 1740 if (stripe->status != WriteDataStatus_Ignore && !NT_SUCCESS(stripe->iosb.Status)) { 1741 Status = stripe->iosb.Status; 1742 log_device_error(Vcb, stripe->device, BTRFS_DEV_STAT_WRITE_ERRORS); 1743 break; 1744 } 1745 1746 le = le->Flink; 1747 } 1748 1749 free_write_data_stripes(&wtc[i]); 1750 } 1751 1752 ExFreePool(wtc); 1753 1754 if (raid56) { 1755 c = NULL; 1756 1757 le = tree_writes->Flink; 1758 while (le != tree_writes) { 1759 tw = CONTAINING_RECORD(le, tree_write, list_entry); 1760 1761 if (tw->c != c) { 1762 c = tw->c; 1763 1764 ExAcquireResourceExclusiveLite(&c->partial_stripes_lock, true); 1765 1766 while (!IsListEmpty(&c->partial_stripes)) { 1767 partial_stripe* ps = CONTAINING_RECORD(RemoveHeadList(&c->partial_stripes), partial_stripe, list_entry); 1768 1769 Status = flush_partial_stripe(Vcb, c, ps); 1770 1771 if (ps->bmparr) 1772 ExFreePool(ps->bmparr); 1773 1774 ExFreePool(ps); 1775 1776 if (!NT_SUCCESS(Status)) { 1777 ERR("flush_partial_stripe returned %08x\n", Status); 1778 ExReleaseResourceLite(&c->partial_stripes_lock); 1779 return Status; 1780 } 1781 } 1782 1783 ExReleaseResourceLite(&c->partial_stripes_lock); 1784 } 1785 1786 le = le->Flink; 1787 } 1788 } 1789 1790 return STATUS_SUCCESS; 1791 } 1792 1793 static NTSTATUS write_trees(device_extension* Vcb, PIRP Irp) { 1794 ULONG level; 1795 uint8_t *data, *body; 1796 uint32_t crc32; 1797 NTSTATUS Status; 1798 LIST_ENTRY* le; 1799 LIST_ENTRY tree_writes; 1800 tree_write* tw; 1801 1802 TRACE("(%p)\n", Vcb); 1803 1804 InitializeListHead(&tree_writes); 1805 1806 for (level = 0; level <= 255; level++) { 1807 bool nothing_found = true; 1808 1809 TRACE("level = %u\n", level); 1810 1811 le = Vcb->trees.Flink; 1812 while (le != &Vcb->trees) { 1813 tree* t = CONTAINING_RECORD(le, tree, list_entry); 1814 1815 if (t->write && t->header.level == level) { 1816 KEY firstitem, searchkey; 1817 LIST_ENTRY* le2; 1818 traverse_ptr tp; 1819 1820 if (!t->has_new_address) { 1821 ERR("error - tried to write tree with no new address\n"); 1822 return STATUS_INTERNAL_ERROR; 1823 } 1824 1825 le2 = t->itemlist.Flink; 1826 while (le2 != &t->itemlist) { 1827 tree_data* td = CONTAINING_RECORD(le2, tree_data, list_entry); 1828 if (!td->ignore) { 1829 firstitem = td->key; 1830 break; 1831 } 1832 le2 = le2->Flink; 1833 } 1834 1835 if (t->parent) { 1836 t->paritem->key = firstitem; 1837 t->paritem->treeholder.address = t->new_address; 1838 t->paritem->treeholder.generation = Vcb->superblock.generation; 1839 } 1840 1841 if (!(Vcb->superblock.incompat_flags & BTRFS_INCOMPAT_FLAGS_SKINNY_METADATA)) { 1842 EXTENT_ITEM_TREE* eit; 1843 1844 searchkey.obj_id = t->new_address; 1845 searchkey.obj_type = TYPE_EXTENT_ITEM; 1846 searchkey.offset = Vcb->superblock.node_size; 1847 1848 Status = find_item(Vcb, Vcb->extent_root, &tp, &searchkey, false, Irp); 1849 if (!NT_SUCCESS(Status)) { 1850 ERR("error - find_item returned %08x\n", Status); 1851 return Status; 1852 } 1853 1854 if (keycmp(searchkey, tp.item->key)) { 1855 ERR("could not find %I64x,%x,%I64x in extent_root (found %I64x,%x,%I64x instead)\n", searchkey.obj_id, searchkey.obj_type, searchkey.offset, tp.item->key.obj_id, tp.item->key.obj_type, tp.item->key.offset); 1856 return STATUS_INTERNAL_ERROR; 1857 } 1858 1859 if (tp.item->size < sizeof(EXTENT_ITEM_TREE)) { 1860 ERR("(%I64x,%x,%I64x) was %u bytes, expected at least %u\n", tp.item->key.obj_id, tp.item->key.obj_type, tp.item->key.offset, tp.item->size, sizeof(EXTENT_ITEM_TREE)); 1861 return STATUS_INTERNAL_ERROR; 1862 } 1863 1864 eit = (EXTENT_ITEM_TREE*)tp.item->data; 1865 eit->firstitem = firstitem; 1866 } 1867 1868 nothing_found = false; 1869 } 1870 1871 le = le->Flink; 1872 } 1873 1874 if (nothing_found) 1875 break; 1876 } 1877 1878 TRACE("allocated tree extents\n"); 1879 1880 le = Vcb->trees.Flink; 1881 while (le != &Vcb->trees) { 1882 tree* t = CONTAINING_RECORD(le, tree, list_entry); 1883 LIST_ENTRY* le2; 1884 #ifdef DEBUG_PARANOID 1885 uint32_t num_items = 0, size = 0; 1886 bool crash = false; 1887 #endif 1888 1889 if (t->write) { 1890 #ifdef DEBUG_PARANOID 1891 bool first = true; 1892 KEY lastkey; 1893 1894 le2 = t->itemlist.Flink; 1895 while (le2 != &t->itemlist) { 1896 tree_data* td = CONTAINING_RECORD(le2, tree_data, list_entry); 1897 if (!td->ignore) { 1898 num_items++; 1899 1900 if (!first) { 1901 if (keycmp(td->key, lastkey) == 0) { 1902 ERR("(%I64x,%x,%I64x): duplicate key\n", td->key.obj_id, td->key.obj_type, td->key.offset); 1903 crash = true; 1904 } else if (keycmp(td->key, lastkey) == -1) { 1905 ERR("(%I64x,%x,%I64x): key out of order\n", td->key.obj_id, td->key.obj_type, td->key.offset); 1906 crash = true; 1907 } 1908 } else 1909 first = false; 1910 1911 lastkey = td->key; 1912 1913 if (t->header.level == 0) 1914 size += td->size; 1915 } 1916 le2 = le2->Flink; 1917 } 1918 1919 if (t->header.level == 0) 1920 size += num_items * sizeof(leaf_node); 1921 else 1922 size += num_items * sizeof(internal_node); 1923 1924 if (num_items != t->header.num_items) { 1925 ERR("tree %I64x, level %x: num_items was %x, expected %x\n", t->root->id, t->header.level, num_items, t->header.num_items); 1926 crash = true; 1927 } 1928 1929 if (size != t->size) { 1930 ERR("tree %I64x, level %x: size was %x, expected %x\n", t->root->id, t->header.level, size, t->size); 1931 crash = true; 1932 } 1933 1934 if (t->header.num_items == 0 && t->parent) { 1935 ERR("tree %I64x, level %x: tried to write empty tree with parent\n", t->root->id, t->header.level); 1936 crash = true; 1937 } 1938 1939 if (t->size > Vcb->superblock.node_size - sizeof(tree_header)) { 1940 ERR("tree %I64x, level %x: tried to write overlarge tree (%x > %x)\n", t->root->id, t->header.level, t->size, Vcb->superblock.node_size - sizeof(tree_header)); 1941 crash = true; 1942 } 1943 1944 if (crash) { 1945 ERR("tree %p\n", t); 1946 le2 = t->itemlist.Flink; 1947 while (le2 != &t->itemlist) { 1948 tree_data* td = CONTAINING_RECORD(le2, tree_data, list_entry); 1949 if (!td->ignore) { 1950 ERR("%I64x,%x,%I64x inserted=%u\n", td->key.obj_id, td->key.obj_type, td->key.offset, td->inserted); 1951 } 1952 le2 = le2->Flink; 1953 } 1954 int3; 1955 } 1956 #endif 1957 t->header.address = t->new_address; 1958 t->header.generation = Vcb->superblock.generation; 1959 t->header.tree_id = t->root->id; 1960 t->header.flags |= HEADER_FLAG_MIXED_BACKREF; 1961 t->header.fs_uuid = Vcb->superblock.uuid; 1962 t->has_address = true; 1963 1964 data = ExAllocatePoolWithTag(NonPagedPool, Vcb->superblock.node_size, ALLOC_TAG); 1965 if (!data) { 1966 ERR("out of memory\n"); 1967 Status = STATUS_INSUFFICIENT_RESOURCES; 1968 goto end; 1969 } 1970 1971 body = data + sizeof(tree_header); 1972 1973 RtlCopyMemory(data, &t->header, sizeof(tree_header)); 1974 RtlZeroMemory(body, Vcb->superblock.node_size - sizeof(tree_header)); 1975 1976 if (t->header.level == 0) { 1977 leaf_node* itemptr = (leaf_node*)body; 1978 int i = 0; 1979 uint8_t* dataptr = data + Vcb->superblock.node_size; 1980 1981 le2 = t->itemlist.Flink; 1982 while (le2 != &t->itemlist) { 1983 tree_data* td = CONTAINING_RECORD(le2, tree_data, list_entry); 1984 if (!td->ignore) { 1985 dataptr = dataptr - td->size; 1986 1987 itemptr[i].key = td->key; 1988 itemptr[i].offset = (uint32_t)((uint8_t*)dataptr - (uint8_t*)body); 1989 itemptr[i].size = td->size; 1990 i++; 1991 1992 if (td->size > 0) 1993 RtlCopyMemory(dataptr, td->data, td->size); 1994 } 1995 1996 le2 = le2->Flink; 1997 } 1998 } else { 1999 internal_node* itemptr = (internal_node*)body; 2000 int i = 0; 2001 2002 le2 = t->itemlist.Flink; 2003 while (le2 != &t->itemlist) { 2004 tree_data* td = CONTAINING_RECORD(le2, tree_data, list_entry); 2005 if (!td->ignore) { 2006 itemptr[i].key = td->key; 2007 itemptr[i].address = td->treeholder.address; 2008 itemptr[i].generation = td->treeholder.generation; 2009 i++; 2010 } 2011 2012 le2 = le2->Flink; 2013 } 2014 } 2015 2016 crc32 = calc_crc32c(0xffffffff, (uint8_t*)&((tree_header*)data)->fs_uuid, Vcb->superblock.node_size - sizeof(((tree_header*)data)->csum)); 2017 crc32 = ~crc32; 2018 *((uint32_t*)data) = crc32; 2019 TRACE("setting crc32 to %08x\n", crc32); 2020 2021 tw = ExAllocatePoolWithTag(PagedPool, sizeof(tree_write), ALLOC_TAG); 2022 if (!tw) { 2023 ERR("out of memory\n"); 2024 ExFreePool(data); 2025 Status = STATUS_INSUFFICIENT_RESOURCES; 2026 goto end; 2027 } 2028 2029 tw->address = t->new_address; 2030 tw->length = Vcb->superblock.node_size; 2031 tw->data = data; 2032 tw->allocated = false; 2033 2034 if (IsListEmpty(&tree_writes)) 2035 InsertTailList(&tree_writes, &tw->list_entry); 2036 else { 2037 bool inserted = false; 2038 2039 le2 = tree_writes.Flink; 2040 while (le2 != &tree_writes) { 2041 tree_write* tw2 = CONTAINING_RECORD(le2, tree_write, list_entry); 2042 2043 if (tw2->address > tw->address) { 2044 InsertHeadList(le2->Blink, &tw->list_entry); 2045 inserted = true; 2046 break; 2047 } 2048 2049 le2 = le2->Flink; 2050 } 2051 2052 if (!inserted) 2053 InsertTailList(&tree_writes, &tw->list_entry); 2054 } 2055 } 2056 2057 le = le->Flink; 2058 } 2059 2060 Status = do_tree_writes(Vcb, &tree_writes, false); 2061 if (!NT_SUCCESS(Status)) { 2062 ERR("do_tree_writes returned %08x\n", Status); 2063 goto end; 2064 } 2065 2066 Status = STATUS_SUCCESS; 2067 2068 end: 2069 while (!IsListEmpty(&tree_writes)) { 2070 le = RemoveHeadList(&tree_writes); 2071 tw = CONTAINING_RECORD(le, tree_write, list_entry); 2072 2073 if (tw->data) 2074 ExFreePool(tw->data); 2075 2076 ExFreePool(tw); 2077 } 2078 2079 return Status; 2080 } 2081 2082 static void update_backup_superblock(device_extension* Vcb, superblock_backup* sb, PIRP Irp) { 2083 KEY searchkey; 2084 traverse_ptr tp; 2085 2086 RtlZeroMemory(sb, sizeof(superblock_backup)); 2087 2088 sb->root_tree_addr = Vcb->superblock.root_tree_addr; 2089 sb->root_tree_generation = Vcb->superblock.generation; 2090 sb->root_level = Vcb->superblock.root_level; 2091 2092 sb->chunk_tree_addr = Vcb->superblock.chunk_tree_addr; 2093 sb->chunk_tree_generation = Vcb->superblock.chunk_root_generation; 2094 sb->chunk_root_level = Vcb->superblock.chunk_root_level; 2095 2096 searchkey.obj_id = BTRFS_ROOT_EXTENT; 2097 searchkey.obj_type = TYPE_ROOT_ITEM; 2098 searchkey.offset = 0xffffffffffffffff; 2099 2100 if (NT_SUCCESS(find_item(Vcb, Vcb->root_root, &tp, &searchkey, false, Irp))) { 2101 if (tp.item->key.obj_id == searchkey.obj_id && tp.item->key.obj_type == searchkey.obj_type && tp.item->size >= sizeof(ROOT_ITEM)) { 2102 ROOT_ITEM* ri = (ROOT_ITEM*)tp.item->data; 2103 2104 sb->extent_tree_addr = ri->block_number; 2105 sb->extent_tree_generation = ri->generation; 2106 sb->extent_root_level = ri->root_level; 2107 } 2108 } 2109 2110 searchkey.obj_id = BTRFS_ROOT_FSTREE; 2111 2112 if (NT_SUCCESS(find_item(Vcb, Vcb->root_root, &tp, &searchkey, false, Irp))) { 2113 if (tp.item->key.obj_id == searchkey.obj_id && tp.item->key.obj_type == searchkey.obj_type && tp.item->size >= sizeof(ROOT_ITEM)) { 2114 ROOT_ITEM* ri = (ROOT_ITEM*)tp.item->data; 2115 2116 sb->fs_tree_addr = ri->block_number; 2117 sb->fs_tree_generation = ri->generation; 2118 sb->fs_root_level = ri->root_level; 2119 } 2120 } 2121 2122 searchkey.obj_id = BTRFS_ROOT_DEVTREE; 2123 2124 if (NT_SUCCESS(find_item(Vcb, Vcb->root_root, &tp, &searchkey, false, Irp))) { 2125 if (tp.item->key.obj_id == searchkey.obj_id && tp.item->key.obj_type == searchkey.obj_type && tp.item->size >= sizeof(ROOT_ITEM)) { 2126 ROOT_ITEM* ri = (ROOT_ITEM*)tp.item->data; 2127 2128 sb->dev_root_addr = ri->block_number; 2129 sb->dev_root_generation = ri->generation; 2130 sb->dev_root_level = ri->root_level; 2131 } 2132 } 2133 2134 searchkey.obj_id = BTRFS_ROOT_CHECKSUM; 2135 2136 if (NT_SUCCESS(find_item(Vcb, Vcb->root_root, &tp, &searchkey, false, Irp))) { 2137 if (tp.item->key.obj_id == searchkey.obj_id && tp.item->key.obj_type == searchkey.obj_type && tp.item->size >= sizeof(ROOT_ITEM)) { 2138 ROOT_ITEM* ri = (ROOT_ITEM*)tp.item->data; 2139 2140 sb->csum_root_addr = ri->block_number; 2141 sb->csum_root_generation = ri->generation; 2142 sb->csum_root_level = ri->root_level; 2143 } 2144 } 2145 2146 sb->total_bytes = Vcb->superblock.total_bytes; 2147 sb->bytes_used = Vcb->superblock.bytes_used; 2148 sb->num_devices = Vcb->superblock.num_devices; 2149 } 2150 2151 typedef struct { 2152 void* context; 2153 uint8_t* buf; 2154 PMDL mdl; 2155 device* device; 2156 NTSTATUS Status; 2157 PIRP Irp; 2158 LIST_ENTRY list_entry; 2159 } write_superblocks_stripe; 2160 2161 typedef struct _write_superblocks_context { 2162 KEVENT Event; 2163 LIST_ENTRY stripes; 2164 LONG left; 2165 } write_superblocks_context; 2166 2167 _Function_class_(IO_COMPLETION_ROUTINE) 2168 static NTSTATUS __stdcall write_superblock_completion(PDEVICE_OBJECT DeviceObject, PIRP Irp, PVOID conptr) { 2169 write_superblocks_stripe* stripe = conptr; 2170 write_superblocks_context* context = stripe->context; 2171 2172 UNUSED(DeviceObject); 2173 2174 stripe->Status = Irp->IoStatus.Status; 2175 2176 if (InterlockedDecrement(&context->left) == 0) 2177 KeSetEvent(&context->Event, 0, false); 2178 2179 return STATUS_MORE_PROCESSING_REQUIRED; 2180 } 2181 2182 static NTSTATUS write_superblock(device_extension* Vcb, device* device, write_superblocks_context* context) { 2183 unsigned int i = 0; 2184 2185 // All the documentation says that the Linux driver only writes one superblock 2186 // if it thinks a disk is an SSD, but this doesn't seem to be the case! 2187 2188 while (superblock_addrs[i] > 0 && device->devitem.num_bytes >= superblock_addrs[i] + sizeof(superblock)) { 2189 ULONG sblen = (ULONG)sector_align(sizeof(superblock), Vcb->superblock.sector_size); 2190 superblock* sb; 2191 uint32_t crc32; 2192 write_superblocks_stripe* stripe; 2193 PIO_STACK_LOCATION IrpSp; 2194 2195 sb = ExAllocatePoolWithTag(NonPagedPool, sblen, ALLOC_TAG); 2196 if (!sb) { 2197 ERR("out of memory\n"); 2198 return STATUS_INSUFFICIENT_RESOURCES; 2199 } 2200 2201 RtlCopyMemory(sb, &Vcb->superblock, sizeof(superblock)); 2202 2203 if (sblen > sizeof(superblock)) 2204 RtlZeroMemory((uint8_t*)sb + sizeof(superblock), sblen - sizeof(superblock)); 2205 2206 RtlCopyMemory(&sb->dev_item, &device->devitem, sizeof(DEV_ITEM)); 2207 sb->sb_phys_addr = superblock_addrs[i]; 2208 2209 crc32 = ~calc_crc32c(0xffffffff, (uint8_t*)&sb->uuid, (ULONG)sizeof(superblock) - sizeof(sb->checksum)); 2210 RtlCopyMemory(&sb->checksum, &crc32, sizeof(uint32_t)); 2211 2212 stripe = ExAllocatePoolWithTag(NonPagedPool, sizeof(write_superblocks_stripe), ALLOC_TAG); 2213 if (!stripe) { 2214 ERR("out of memory\n"); 2215 ExFreePool(sb); 2216 return STATUS_INSUFFICIENT_RESOURCES; 2217 } 2218 2219 stripe->buf = (uint8_t*)sb; 2220 2221 stripe->Irp = IoAllocateIrp(device->devobj->StackSize, false); 2222 if (!stripe->Irp) { 2223 ERR("IoAllocateIrp failed\n"); 2224 ExFreePool(stripe); 2225 ExFreePool(sb); 2226 return STATUS_INSUFFICIENT_RESOURCES; 2227 } 2228 2229 IrpSp = IoGetNextIrpStackLocation(stripe->Irp); 2230 IrpSp->MajorFunction = IRP_MJ_WRITE; 2231 IrpSp->FileObject = device->fileobj; 2232 2233 if (i == 0) 2234 IrpSp->Flags |= SL_WRITE_THROUGH; 2235 2236 if (device->devobj->Flags & DO_BUFFERED_IO) { 2237 stripe->Irp->AssociatedIrp.SystemBuffer = sb; 2238 stripe->mdl = NULL; 2239 2240 stripe->Irp->Flags = IRP_BUFFERED_IO; 2241 } else if (device->devobj->Flags & DO_DIRECT_IO) { 2242 stripe->mdl = IoAllocateMdl(sb, sblen, false, false, NULL); 2243 if (!stripe->mdl) { 2244 ERR("IoAllocateMdl failed\n"); 2245 IoFreeIrp(stripe->Irp); 2246 ExFreePool(stripe); 2247 ExFreePool(sb); 2248 return STATUS_INSUFFICIENT_RESOURCES; 2249 } 2250 2251 stripe->Irp->MdlAddress = stripe->mdl; 2252 2253 MmBuildMdlForNonPagedPool(stripe->mdl); 2254 } else { 2255 stripe->Irp->UserBuffer = sb; 2256 stripe->mdl = NULL; 2257 } 2258 2259 IrpSp->Parameters.Write.Length = sblen; 2260 IrpSp->Parameters.Write.ByteOffset.QuadPart = superblock_addrs[i]; 2261 2262 IoSetCompletionRoutine(stripe->Irp, write_superblock_completion, stripe, true, true, true); 2263 2264 stripe->context = context; 2265 stripe->device = device; 2266 InsertTailList(&context->stripes, &stripe->list_entry); 2267 2268 context->left++; 2269 2270 i++; 2271 } 2272 2273 if (i == 0) 2274 ERR("no superblocks written!\n"); 2275 2276 return STATUS_SUCCESS; 2277 } 2278 2279 static NTSTATUS write_superblocks(device_extension* Vcb, PIRP Irp) { 2280 uint64_t i; 2281 NTSTATUS Status; 2282 LIST_ENTRY* le; 2283 write_superblocks_context context; 2284 2285 TRACE("(%p)\n", Vcb); 2286 2287 le = Vcb->trees.Flink; 2288 while (le != &Vcb->trees) { 2289 tree* t = CONTAINING_RECORD(le, tree, list_entry); 2290 2291 if (t->write && !t->parent) { 2292 if (t->root == Vcb->root_root) { 2293 Vcb->superblock.root_tree_addr = t->new_address; 2294 Vcb->superblock.root_level = t->header.level; 2295 } else if (t->root == Vcb->chunk_root) { 2296 Vcb->superblock.chunk_tree_addr = t->new_address; 2297 Vcb->superblock.chunk_root_generation = t->header.generation; 2298 Vcb->superblock.chunk_root_level = t->header.level; 2299 } 2300 } 2301 2302 le = le->Flink; 2303 } 2304 2305 for (i = 0; i < BTRFS_NUM_BACKUP_ROOTS - 1; i++) { 2306 RtlCopyMemory(&Vcb->superblock.backup[i], &Vcb->superblock.backup[i+1], sizeof(superblock_backup)); 2307 } 2308 2309 update_backup_superblock(Vcb, &Vcb->superblock.backup[BTRFS_NUM_BACKUP_ROOTS - 1], Irp); 2310 2311 KeInitializeEvent(&context.Event, NotificationEvent, false); 2312 InitializeListHead(&context.stripes); 2313 context.left = 0; 2314 2315 le = Vcb->devices.Flink; 2316 while (le != &Vcb->devices) { 2317 device* dev = CONTAINING_RECORD(le, device, list_entry); 2318 2319 if (dev->devobj && !dev->readonly) { 2320 Status = write_superblock(Vcb, dev, &context); 2321 if (!NT_SUCCESS(Status)) { 2322 ERR("write_superblock returned %08x\n", Status); 2323 goto end; 2324 } 2325 } 2326 2327 le = le->Flink; 2328 } 2329 2330 if (IsListEmpty(&context.stripes)) { 2331 ERR("error - not writing any superblocks\n"); 2332 Status = STATUS_INTERNAL_ERROR; 2333 goto end; 2334 } 2335 2336 le = context.stripes.Flink; 2337 while (le != &context.stripes) { 2338 write_superblocks_stripe* stripe = CONTAINING_RECORD(le, write_superblocks_stripe, list_entry); 2339 2340 IoCallDriver(stripe->device->devobj, stripe->Irp); 2341 2342 le = le->Flink; 2343 } 2344 2345 KeWaitForSingleObject(&context.Event, Executive, KernelMode, false, NULL); 2346 2347 le = context.stripes.Flink; 2348 while (le != &context.stripes) { 2349 write_superblocks_stripe* stripe = CONTAINING_RECORD(le, write_superblocks_stripe, list_entry); 2350 2351 if (!NT_SUCCESS(stripe->Status)) { 2352 ERR("device %I64x returned %08x\n", stripe->device->devitem.dev_id, stripe->Status); 2353 log_device_error(Vcb, stripe->device, BTRFS_DEV_STAT_WRITE_ERRORS); 2354 Status = stripe->Status; 2355 goto end; 2356 } 2357 2358 le = le->Flink; 2359 } 2360 2361 Status = STATUS_SUCCESS; 2362 2363 end: 2364 while (!IsListEmpty(&context.stripes)) { 2365 write_superblocks_stripe* stripe = CONTAINING_RECORD(RemoveHeadList(&context.stripes), write_superblocks_stripe, list_entry); 2366 2367 if (stripe->mdl) { 2368 if (stripe->mdl->MdlFlags & MDL_PAGES_LOCKED) 2369 MmUnlockPages(stripe->mdl); 2370 2371 IoFreeMdl(stripe->mdl); 2372 } 2373 2374 if (stripe->Irp) 2375 IoFreeIrp(stripe->Irp); 2376 2377 if (stripe->buf) 2378 ExFreePool(stripe->buf); 2379 2380 ExFreePool(stripe); 2381 } 2382 2383 return Status; 2384 } 2385 2386 static NTSTATUS flush_changed_extent(device_extension* Vcb, chunk* c, changed_extent* ce, PIRP Irp, LIST_ENTRY* rollback) { 2387 LIST_ENTRY *le, *le2; 2388 NTSTATUS Status; 2389 uint64_t old_size; 2390 2391 if (ce->count == 0 && ce->old_count == 0) { 2392 while (!IsListEmpty(&ce->refs)) { 2393 changed_extent_ref* cer = CONTAINING_RECORD(RemoveHeadList(&ce->refs), changed_extent_ref, list_entry); 2394 ExFreePool(cer); 2395 } 2396 2397 while (!IsListEmpty(&ce->old_refs)) { 2398 changed_extent_ref* cer = CONTAINING_RECORD(RemoveHeadList(&ce->old_refs), changed_extent_ref, list_entry); 2399 ExFreePool(cer); 2400 } 2401 2402 goto end; 2403 } 2404 2405 le = ce->refs.Flink; 2406 while (le != &ce->refs) { 2407 changed_extent_ref* cer = CONTAINING_RECORD(le, changed_extent_ref, list_entry); 2408 uint32_t old_count = 0; 2409 2410 if (cer->type == TYPE_EXTENT_DATA_REF) { 2411 le2 = ce->old_refs.Flink; 2412 while (le2 != &ce->old_refs) { 2413 changed_extent_ref* cer2 = CONTAINING_RECORD(le2, changed_extent_ref, list_entry); 2414 2415 if (cer2->type == TYPE_EXTENT_DATA_REF && cer2->edr.root == cer->edr.root && cer2->edr.objid == cer->edr.objid && cer2->edr.offset == cer->edr.offset) { 2416 old_count = cer2->edr.count; 2417 break; 2418 } 2419 2420 le2 = le2->Flink; 2421 } 2422 2423 old_size = ce->old_count > 0 ? ce->old_size : ce->size; 2424 2425 if (cer->edr.count > old_count) { 2426 Status = increase_extent_refcount_data(Vcb, ce->address, old_size, cer->edr.root, cer->edr.objid, cer->edr.offset, cer->edr.count - old_count, Irp); 2427 2428 if (!NT_SUCCESS(Status)) { 2429 ERR("increase_extent_refcount_data returned %08x\n", Status); 2430 return Status; 2431 } 2432 } 2433 } else if (cer->type == TYPE_SHARED_DATA_REF) { 2434 le2 = ce->old_refs.Flink; 2435 while (le2 != &ce->old_refs) { 2436 changed_extent_ref* cer2 = CONTAINING_RECORD(le2, changed_extent_ref, list_entry); 2437 2438 if (cer2->type == TYPE_SHARED_DATA_REF && cer2->sdr.offset == cer->sdr.offset) { 2439 RemoveEntryList(&cer2->list_entry); 2440 ExFreePool(cer2); 2441 break; 2442 } 2443 2444 le2 = le2->Flink; 2445 } 2446 } 2447 2448 le = le->Flink; 2449 } 2450 2451 le = ce->refs.Flink; 2452 while (le != &ce->refs) { 2453 changed_extent_ref* cer = CONTAINING_RECORD(le, changed_extent_ref, list_entry); 2454 LIST_ENTRY* le3 = le->Flink; 2455 uint32_t old_count = 0; 2456 2457 if (cer->type == TYPE_EXTENT_DATA_REF) { 2458 le2 = ce->old_refs.Flink; 2459 while (le2 != &ce->old_refs) { 2460 changed_extent_ref* cer2 = CONTAINING_RECORD(le2, changed_extent_ref, list_entry); 2461 2462 if (cer2->type == TYPE_EXTENT_DATA_REF && cer2->edr.root == cer->edr.root && cer2->edr.objid == cer->edr.objid && cer2->edr.offset == cer->edr.offset) { 2463 old_count = cer2->edr.count; 2464 2465 RemoveEntryList(&cer2->list_entry); 2466 ExFreePool(cer2); 2467 break; 2468 } 2469 2470 le2 = le2->Flink; 2471 } 2472 2473 old_size = ce->old_count > 0 ? ce->old_size : ce->size; 2474 2475 if (cer->edr.count < old_count) { 2476 Status = decrease_extent_refcount_data(Vcb, ce->address, old_size, cer->edr.root, cer->edr.objid, cer->edr.offset, 2477 old_count - cer->edr.count, ce->superseded, Irp); 2478 2479 if (!NT_SUCCESS(Status)) { 2480 ERR("decrease_extent_refcount_data returned %08x\n", Status); 2481 return Status; 2482 } 2483 } 2484 2485 if (ce->size != ce->old_size && ce->old_count > 0) { 2486 KEY searchkey; 2487 traverse_ptr tp; 2488 void* data; 2489 2490 searchkey.obj_id = ce->address; 2491 searchkey.obj_type = TYPE_EXTENT_ITEM; 2492 searchkey.offset = ce->old_size; 2493 2494 Status = find_item(Vcb, Vcb->extent_root, &tp, &searchkey, false, Irp); 2495 if (!NT_SUCCESS(Status)) { 2496 ERR("error - find_item returned %08x\n", Status); 2497 return Status; 2498 } 2499 2500 if (keycmp(searchkey, tp.item->key)) { 2501 ERR("could not find (%I64x,%x,%I64x) in extent tree\n", searchkey.obj_id, searchkey.obj_type, searchkey.offset); 2502 return STATUS_INTERNAL_ERROR; 2503 } 2504 2505 if (tp.item->size > 0) { 2506 data = ExAllocatePoolWithTag(PagedPool, tp.item->size, ALLOC_TAG); 2507 2508 if (!data) { 2509 ERR("out of memory\n"); 2510 return STATUS_INSUFFICIENT_RESOURCES; 2511 } 2512 2513 RtlCopyMemory(data, tp.item->data, tp.item->size); 2514 } else 2515 data = NULL; 2516 2517 Status = insert_tree_item(Vcb, Vcb->extent_root, ce->address, TYPE_EXTENT_ITEM, ce->size, data, tp.item->size, NULL, Irp); 2518 if (!NT_SUCCESS(Status)) { 2519 ERR("insert_tree_item returned %08x\n", Status); 2520 if (data) ExFreePool(data); 2521 return Status; 2522 } 2523 2524 Status = delete_tree_item(Vcb, &tp); 2525 if (!NT_SUCCESS(Status)) { 2526 ERR("delete_tree_item returned %08x\n", Status); 2527 return Status; 2528 } 2529 } 2530 } 2531 2532 RemoveEntryList(&cer->list_entry); 2533 ExFreePool(cer); 2534 2535 le = le3; 2536 } 2537 2538 #ifdef DEBUG_PARANOID 2539 if (!IsListEmpty(&ce->old_refs)) 2540 WARN("old_refs not empty\n"); 2541 #endif 2542 2543 end: 2544 if (ce->count == 0 && !ce->superseded) { 2545 c->used -= ce->size; 2546 space_list_add(c, ce->address, ce->size, rollback); 2547 } 2548 2549 RemoveEntryList(&ce->list_entry); 2550 ExFreePool(ce); 2551 2552 return STATUS_SUCCESS; 2553 } 2554 2555 void add_checksum_entry(device_extension* Vcb, uint64_t address, ULONG length, uint32_t* csum, PIRP Irp) { 2556 KEY searchkey; 2557 traverse_ptr tp, next_tp; 2558 NTSTATUS Status; 2559 uint64_t startaddr, endaddr; 2560 ULONG len; 2561 uint32_t* checksums; 2562 RTL_BITMAP bmp; 2563 ULONG* bmparr; 2564 ULONG runlength, index; 2565 2566 TRACE("(%p, %I64x, %x, %p, %p)\n", Vcb, address, length, csum, Irp); 2567 2568 searchkey.obj_id = EXTENT_CSUM_ID; 2569 searchkey.obj_type = TYPE_EXTENT_CSUM; 2570 searchkey.offset = address; 2571 2572 // FIXME - create checksum_root if it doesn't exist at all 2573 2574 Status = find_item(Vcb, Vcb->checksum_root, &tp, &searchkey, false, Irp); 2575 if (Status == STATUS_NOT_FOUND) { // tree is completely empty 2576 if (csum) { // not deleted 2577 ULONG length2 = length; 2578 uint64_t off = address; 2579 uint32_t* data = csum; 2580 2581 do { 2582 uint16_t il = (uint16_t)min(length2, MAX_CSUM_SIZE / sizeof(uint32_t)); 2583 2584 checksums = ExAllocatePoolWithTag(PagedPool, il * sizeof(uint32_t), ALLOC_TAG); 2585 if (!checksums) { 2586 ERR("out of memory\n"); 2587 return; 2588 } 2589 2590 RtlCopyMemory(checksums, data, il * sizeof(uint32_t)); 2591 2592 Status = insert_tree_item(Vcb, Vcb->checksum_root, EXTENT_CSUM_ID, TYPE_EXTENT_CSUM, off, checksums, 2593 il * sizeof(uint32_t), NULL, Irp); 2594 if (!NT_SUCCESS(Status)) { 2595 ERR("insert_tree_item returned %08x\n", Status); 2596 ExFreePool(checksums); 2597 return; 2598 } 2599 2600 length2 -= il; 2601 2602 if (length2 > 0) { 2603 off += il * Vcb->superblock.sector_size; 2604 data += il; 2605 } 2606 } while (length2 > 0); 2607 } 2608 } else if (!NT_SUCCESS(Status)) { 2609 ERR("find_item returned %08x\n", Status); 2610 return; 2611 } else { 2612 uint32_t tplen; 2613 2614 // FIXME - check entry is TYPE_EXTENT_CSUM? 2615 2616 if (tp.item->key.offset < address && tp.item->key.offset + (tp.item->size * Vcb->superblock.sector_size / sizeof(uint32_t)) >= address) 2617 startaddr = tp.item->key.offset; 2618 else 2619 startaddr = address; 2620 2621 searchkey.obj_id = EXTENT_CSUM_ID; 2622 searchkey.obj_type = TYPE_EXTENT_CSUM; 2623 searchkey.offset = address + (length * Vcb->superblock.sector_size); 2624 2625 Status = find_item(Vcb, Vcb->checksum_root, &tp, &searchkey, false, Irp); 2626 if (!NT_SUCCESS(Status)) { 2627 ERR("find_item returned %08x\n", Status); 2628 return; 2629 } 2630 2631 tplen = tp.item->size / sizeof(uint32_t); 2632 2633 if (tp.item->key.offset + (tplen * Vcb->superblock.sector_size) >= address + (length * Vcb->superblock.sector_size)) 2634 endaddr = tp.item->key.offset + (tplen * Vcb->superblock.sector_size); 2635 else 2636 endaddr = address + (length * Vcb->superblock.sector_size); 2637 2638 TRACE("cs starts at %I64x (%x sectors)\n", address, length); 2639 TRACE("startaddr = %I64x\n", startaddr); 2640 TRACE("endaddr = %I64x\n", endaddr); 2641 2642 len = (ULONG)((endaddr - startaddr) / Vcb->superblock.sector_size); 2643 2644 checksums = ExAllocatePoolWithTag(PagedPool, sizeof(uint32_t) * len, ALLOC_TAG); 2645 if (!checksums) { 2646 ERR("out of memory\n"); 2647 return; 2648 } 2649 2650 bmparr = ExAllocatePoolWithTag(PagedPool, sizeof(ULONG) * ((len/8)+1), ALLOC_TAG); 2651 if (!bmparr) { 2652 ERR("out of memory\n"); 2653 ExFreePool(checksums); 2654 return; 2655 } 2656 2657 RtlInitializeBitMap(&bmp, bmparr, len); 2658 RtlSetAllBits(&bmp); 2659 2660 searchkey.obj_id = EXTENT_CSUM_ID; 2661 searchkey.obj_type = TYPE_EXTENT_CSUM; 2662 searchkey.offset = address; 2663 2664 Status = find_item(Vcb, Vcb->checksum_root, &tp, &searchkey, false, Irp); 2665 if (!NT_SUCCESS(Status)) { 2666 ERR("find_item returned %08x\n", Status); 2667 ExFreePool(checksums); 2668 ExFreePool(bmparr); 2669 return; 2670 } 2671 2672 // set bit = free space, cleared bit = allocated sector 2673 2674 while (tp.item->key.offset < endaddr) { 2675 if (tp.item->key.offset >= startaddr) { 2676 if (tp.item->size > 0) { 2677 ULONG itemlen = (ULONG)min((len - (tp.item->key.offset - startaddr) / Vcb->superblock.sector_size) * sizeof(uint32_t), tp.item->size); 2678 2679 RtlCopyMemory(&checksums[(tp.item->key.offset - startaddr) / Vcb->superblock.sector_size], tp.item->data, itemlen); 2680 RtlClearBits(&bmp, (ULONG)((tp.item->key.offset - startaddr) / Vcb->superblock.sector_size), itemlen / sizeof(uint32_t)); 2681 } 2682 2683 Status = delete_tree_item(Vcb, &tp); 2684 if (!NT_SUCCESS(Status)) { 2685 ERR("delete_tree_item returned %08x\n", Status); 2686 ExFreePool(checksums); 2687 ExFreePool(bmparr); 2688 return; 2689 } 2690 } 2691 2692 if (find_next_item(Vcb, &tp, &next_tp, false, Irp)) { 2693 tp = next_tp; 2694 } else 2695 break; 2696 } 2697 2698 if (!csum) { // deleted 2699 RtlSetBits(&bmp, (ULONG)((address - startaddr) / Vcb->superblock.sector_size), length); 2700 } else { 2701 RtlCopyMemory(&checksums[(address - startaddr) / Vcb->superblock.sector_size], csum, length * sizeof(uint32_t)); 2702 RtlClearBits(&bmp, (ULONG)((address - startaddr) / Vcb->superblock.sector_size), length); 2703 } 2704 2705 runlength = RtlFindFirstRunClear(&bmp, &index); 2706 2707 while (runlength != 0) { 2708 if (index >= len) 2709 break; 2710 2711 if (index + runlength >= len) { 2712 runlength = len - index; 2713 2714 if (runlength == 0) 2715 break; 2716 } 2717 2718 do { 2719 uint16_t rl; 2720 uint64_t off; 2721 uint32_t* data; 2722 2723 if (runlength * sizeof(uint32_t) > MAX_CSUM_SIZE) 2724 rl = MAX_CSUM_SIZE / sizeof(uint32_t); 2725 else 2726 rl = (uint16_t)runlength; 2727 2728 data = ExAllocatePoolWithTag(PagedPool, sizeof(uint32_t) * rl, ALLOC_TAG); 2729 if (!data) { 2730 ERR("out of memory\n"); 2731 ExFreePool(bmparr); 2732 ExFreePool(checksums); 2733 return; 2734 } 2735 2736 RtlCopyMemory(data, &checksums[index], sizeof(uint32_t) * rl); 2737 2738 off = startaddr + UInt32x32To64(index, Vcb->superblock.sector_size); 2739 2740 Status = insert_tree_item(Vcb, Vcb->checksum_root, EXTENT_CSUM_ID, TYPE_EXTENT_CSUM, off, data, sizeof(uint32_t) * rl, NULL, Irp); 2741 if (!NT_SUCCESS(Status)) { 2742 ERR("insert_tree_item returned %08x\n", Status); 2743 ExFreePool(data); 2744 ExFreePool(bmparr); 2745 ExFreePool(checksums); 2746 return; 2747 } 2748 2749 runlength -= rl; 2750 index += rl; 2751 } while (runlength > 0); 2752 2753 runlength = RtlFindNextForwardRunClear(&bmp, index, &index); 2754 } 2755 2756 ExFreePool(bmparr); 2757 ExFreePool(checksums); 2758 } 2759 } 2760 2761 static NTSTATUS update_chunk_usage(device_extension* Vcb, PIRP Irp, LIST_ENTRY* rollback) { 2762 LIST_ENTRY *le = Vcb->chunks.Flink, *le2; 2763 chunk* c; 2764 KEY searchkey; 2765 traverse_ptr tp; 2766 BLOCK_GROUP_ITEM* bgi; 2767 NTSTATUS Status; 2768 2769 TRACE("(%p)\n", Vcb); 2770 2771 ExAcquireResourceSharedLite(&Vcb->chunk_lock, true); 2772 2773 while (le != &Vcb->chunks) { 2774 c = CONTAINING_RECORD(le, chunk, list_entry); 2775 2776 acquire_chunk_lock(c, Vcb); 2777 2778 if (!c->cache_loaded && (!IsListEmpty(&c->changed_extents) || c->used != c->oldused)) { 2779 Status = load_cache_chunk(Vcb, c, NULL); 2780 2781 if (!NT_SUCCESS(Status)) { 2782 ERR("load_cache_chunk returned %08x\n", Status); 2783 release_chunk_lock(c, Vcb); 2784 goto end; 2785 } 2786 } 2787 2788 le2 = c->changed_extents.Flink; 2789 while (le2 != &c->changed_extents) { 2790 LIST_ENTRY* le3 = le2->Flink; 2791 changed_extent* ce = CONTAINING_RECORD(le2, changed_extent, list_entry); 2792 2793 Status = flush_changed_extent(Vcb, c, ce, Irp, rollback); 2794 if (!NT_SUCCESS(Status)) { 2795 ERR("flush_changed_extent returned %08x\n", Status); 2796 release_chunk_lock(c, Vcb); 2797 goto end; 2798 } 2799 2800 le2 = le3; 2801 } 2802 2803 // This is usually done by update_chunks, but we have to check again in case any new chunks 2804 // have been allocated since. 2805 if (c->created) { 2806 Status = create_chunk(Vcb, c, Irp); 2807 if (!NT_SUCCESS(Status)) { 2808 ERR("create_chunk returned %08x\n", Status); 2809 release_chunk_lock(c, Vcb); 2810 goto end; 2811 } 2812 } 2813 2814 if (c->old_cache) { 2815 if (c->old_cache->dirty) { 2816 LIST_ENTRY batchlist; 2817 2818 InitializeListHead(&batchlist); 2819 2820 Status = flush_fcb(c->old_cache, false, &batchlist, Irp); 2821 if (!NT_SUCCESS(Status)) { 2822 ERR("flush_fcb returned %08x\n", Status); 2823 release_chunk_lock(c, Vcb); 2824 clear_batch_list(Vcb, &batchlist); 2825 goto end; 2826 } 2827 2828 Status = commit_batch_list(Vcb, &batchlist, Irp); 2829 if (!NT_SUCCESS(Status)) { 2830 ERR("commit_batch_list returned %08x\n", Status); 2831 release_chunk_lock(c, Vcb); 2832 goto end; 2833 } 2834 } 2835 2836 free_fcb(c->old_cache); 2837 2838 if (c->old_cache->refcount == 0) 2839 reap_fcb(c->old_cache); 2840 2841 c->old_cache = NULL; 2842 } 2843 2844 if (c->used != c->oldused) { 2845 #ifdef __REACTOS__ 2846 uint64_t old_phys_used, phys_used; 2847 #endif 2848 searchkey.obj_id = c->offset; 2849 searchkey.obj_type = TYPE_BLOCK_GROUP_ITEM; 2850 searchkey.offset = c->chunk_item->size; 2851 2852 Status = find_item(Vcb, Vcb->extent_root, &tp, &searchkey, false, Irp); 2853 if (!NT_SUCCESS(Status)) { 2854 ERR("error - find_item returned %08x\n", Status); 2855 release_chunk_lock(c, Vcb); 2856 goto end; 2857 } 2858 2859 if (keycmp(searchkey, tp.item->key)) { 2860 ERR("could not find (%I64x,%x,%I64x) in extent_root\n", searchkey.obj_id, searchkey.obj_type, searchkey.offset); 2861 Status = STATUS_INTERNAL_ERROR; 2862 release_chunk_lock(c, Vcb); 2863 goto end; 2864 } 2865 2866 if (tp.item->size < sizeof(BLOCK_GROUP_ITEM)) { 2867 ERR("(%I64x,%x,%I64x) was %u bytes, expected %u\n", tp.item->key.obj_id, tp.item->key.obj_type, tp.item->key.offset, tp.item->size, sizeof(BLOCK_GROUP_ITEM)); 2868 Status = STATUS_INTERNAL_ERROR; 2869 release_chunk_lock(c, Vcb); 2870 goto end; 2871 } 2872 2873 bgi = ExAllocatePoolWithTag(PagedPool, tp.item->size, ALLOC_TAG); 2874 if (!bgi) { 2875 ERR("out of memory\n"); 2876 Status = STATUS_INSUFFICIENT_RESOURCES; 2877 release_chunk_lock(c, Vcb); 2878 goto end; 2879 } 2880 2881 RtlCopyMemory(bgi, tp.item->data, tp.item->size); 2882 bgi->used = c->used; 2883 2884 #ifdef DEBUG_PARANOID 2885 if (bgi->used & 0x8000000000000000) { 2886 ERR("refusing to write BLOCK_GROUP_ITEM with negative usage value (%I64x)", bgi->used); 2887 int3; 2888 } 2889 #endif 2890 2891 TRACE("adjusting usage of chunk %I64x to %I64x\n", c->offset, c->used); 2892 2893 Status = delete_tree_item(Vcb, &tp); 2894 if (!NT_SUCCESS(Status)) { 2895 ERR("delete_tree_item returned %08x\n", Status); 2896 ExFreePool(bgi); 2897 release_chunk_lock(c, Vcb); 2898 goto end; 2899 } 2900 2901 Status = insert_tree_item(Vcb, Vcb->extent_root, searchkey.obj_id, searchkey.obj_type, searchkey.offset, bgi, tp.item->size, NULL, Irp); 2902 if (!NT_SUCCESS(Status)) { 2903 ERR("insert_tree_item returned %08x\n", Status); 2904 ExFreePool(bgi); 2905 release_chunk_lock(c, Vcb); 2906 goto end; 2907 } 2908 2909 #ifndef __REACTOS__ 2910 uint64_t old_phys_used = chunk_estimate_phys_size(Vcb, c, c->oldused); 2911 uint64_t phys_used = chunk_estimate_phys_size(Vcb, c, c->used); 2912 #else 2913 old_phys_used = chunk_estimate_phys_size(Vcb, c, c->oldused); 2914 phys_used = chunk_estimate_phys_size(Vcb, c, c->used); 2915 #endif 2916 2917 if (Vcb->superblock.bytes_used + phys_used > old_phys_used) 2918 Vcb->superblock.bytes_used += phys_used - old_phys_used; 2919 else 2920 Vcb->superblock.bytes_used = 0; 2921 2922 c->oldused = c->used; 2923 } 2924 2925 release_chunk_lock(c, Vcb); 2926 2927 le = le->Flink; 2928 } 2929 2930 Status = STATUS_SUCCESS; 2931 2932 end: 2933 ExReleaseResourceLite(&Vcb->chunk_lock); 2934 2935 return Status; 2936 } 2937 2938 static void get_first_item(tree* t, KEY* key) { 2939 LIST_ENTRY* le; 2940 2941 le = t->itemlist.Flink; 2942 while (le != &t->itemlist) { 2943 tree_data* td = CONTAINING_RECORD(le, tree_data, list_entry); 2944 2945 *key = td->key; 2946 return; 2947 } 2948 } 2949 2950 static NTSTATUS split_tree_at(device_extension* Vcb, tree* t, tree_data* newfirstitem, uint32_t numitems, uint32_t size) { 2951 tree *nt, *pt; 2952 tree_data* td; 2953 tree_data* oldlastitem; 2954 2955 TRACE("splitting tree in %I64x at (%I64x,%x,%I64x)\n", t->root->id, newfirstitem->key.obj_id, newfirstitem->key.obj_type, newfirstitem->key.offset); 2956 2957 nt = ExAllocatePoolWithTag(PagedPool, sizeof(tree), ALLOC_TAG); 2958 if (!nt) { 2959 ERR("out of memory\n"); 2960 return STATUS_INSUFFICIENT_RESOURCES; 2961 } 2962 2963 if (t->header.level > 0) { 2964 nt->nonpaged = ExAllocatePoolWithTag(NonPagedPool, sizeof(tree_nonpaged), ALLOC_TAG); 2965 if (!nt->nonpaged) { 2966 ERR("out of memory\n"); 2967 ExFreePool(nt); 2968 return STATUS_INSUFFICIENT_RESOURCES; 2969 } 2970 2971 ExInitializeFastMutex(&nt->nonpaged->mutex); 2972 } else 2973 nt->nonpaged = NULL; 2974 2975 RtlCopyMemory(&nt->header, &t->header, sizeof(tree_header)); 2976 nt->header.address = 0; 2977 nt->header.generation = Vcb->superblock.generation; 2978 nt->header.num_items = t->header.num_items - numitems; 2979 nt->header.flags = HEADER_FLAG_MIXED_BACKREF | HEADER_FLAG_WRITTEN; 2980 2981 nt->has_address = false; 2982 nt->Vcb = Vcb; 2983 nt->parent = t->parent; 2984 2985 #ifdef DEBUG_PARANOID 2986 if (nt->parent && nt->parent->header.level <= nt->header.level) int3; 2987 #endif 2988 2989 nt->root = t->root; 2990 nt->new_address = 0; 2991 nt->has_new_address = false; 2992 nt->updated_extents = false; 2993 nt->uniqueness_determined = true; 2994 nt->is_unique = true; 2995 nt->list_entry_hash.Flink = NULL; 2996 nt->buf = NULL; 2997 InitializeListHead(&nt->itemlist); 2998 2999 oldlastitem = CONTAINING_RECORD(newfirstitem->list_entry.Blink, tree_data, list_entry); 3000 3001 nt->itemlist.Flink = &newfirstitem->list_entry; 3002 nt->itemlist.Blink = t->itemlist.Blink; 3003 nt->itemlist.Flink->Blink = &nt->itemlist; 3004 nt->itemlist.Blink->Flink = &nt->itemlist; 3005 3006 t->itemlist.Blink = &oldlastitem->list_entry; 3007 t->itemlist.Blink->Flink = &t->itemlist; 3008 3009 nt->size = t->size - size; 3010 t->size = size; 3011 t->header.num_items = numitems; 3012 nt->write = true; 3013 3014 InsertTailList(&Vcb->trees, &nt->list_entry); 3015 3016 if (nt->header.level > 0) { 3017 LIST_ENTRY* le = nt->itemlist.Flink; 3018 3019 while (le != &nt->itemlist) { 3020 tree_data* td2 = CONTAINING_RECORD(le, tree_data, list_entry); 3021 3022 if (td2->treeholder.tree) { 3023 td2->treeholder.tree->parent = nt; 3024 #ifdef DEBUG_PARANOID 3025 if (td2->treeholder.tree->parent && td2->treeholder.tree->parent->header.level <= td2->treeholder.tree->header.level) int3; 3026 #endif 3027 } 3028 3029 le = le->Flink; 3030 } 3031 } else { 3032 LIST_ENTRY* le = nt->itemlist.Flink; 3033 3034 while (le != &nt->itemlist) { 3035 tree_data* td2 = CONTAINING_RECORD(le, tree_data, list_entry); 3036 3037 if (!td2->inserted && td2->data) { 3038 uint8_t* data = ExAllocatePoolWithTag(PagedPool, td2->size, ALLOC_TAG); 3039 3040 if (!data) { 3041 ERR("out of memory\n"); 3042 return STATUS_INSUFFICIENT_RESOURCES; 3043 } 3044 3045 RtlCopyMemory(data, td2->data, td2->size); 3046 td2->data = data; 3047 td2->inserted = true; 3048 } 3049 3050 le = le->Flink; 3051 } 3052 } 3053 3054 if (nt->parent) { 3055 td = ExAllocateFromPagedLookasideList(&Vcb->tree_data_lookaside); 3056 if (!td) { 3057 ERR("out of memory\n"); 3058 return STATUS_INSUFFICIENT_RESOURCES; 3059 } 3060 3061 td->key = newfirstitem->key; 3062 3063 InsertHeadList(&t->paritem->list_entry, &td->list_entry); 3064 3065 td->ignore = false; 3066 td->inserted = true; 3067 td->treeholder.tree = nt; 3068 nt->paritem = td; 3069 3070 nt->parent->header.num_items++; 3071 nt->parent->size += sizeof(internal_node); 3072 3073 goto end; 3074 } 3075 3076 TRACE("adding new tree parent\n"); 3077 3078 if (nt->header.level == 255) { 3079 ERR("cannot add parent to tree at level 255\n"); 3080 return STATUS_INTERNAL_ERROR; 3081 } 3082 3083 pt = ExAllocatePoolWithTag(PagedPool, sizeof(tree), ALLOC_TAG); 3084 if (!pt) { 3085 ERR("out of memory\n"); 3086 return STATUS_INSUFFICIENT_RESOURCES; 3087 } 3088 3089 pt->nonpaged = ExAllocatePoolWithTag(NonPagedPool, sizeof(tree_nonpaged), ALLOC_TAG); 3090 if (!pt->nonpaged) { 3091 ERR("out of memory\n"); 3092 ExFreePool(pt); 3093 return STATUS_INSUFFICIENT_RESOURCES; 3094 } 3095 3096 ExInitializeFastMutex(&pt->nonpaged->mutex); 3097 3098 RtlCopyMemory(&pt->header, &nt->header, sizeof(tree_header)); 3099 pt->header.address = 0; 3100 pt->header.num_items = 2; 3101 pt->header.level = nt->header.level + 1; 3102 pt->header.flags = HEADER_FLAG_MIXED_BACKREF | HEADER_FLAG_WRITTEN; 3103 3104 pt->has_address = false; 3105 pt->Vcb = Vcb; 3106 pt->parent = NULL; 3107 pt->paritem = NULL; 3108 pt->root = t->root; 3109 pt->new_address = 0; 3110 pt->has_new_address = false; 3111 pt->updated_extents = false; 3112 pt->size = pt->header.num_items * sizeof(internal_node); 3113 pt->uniqueness_determined = true; 3114 pt->is_unique = true; 3115 pt->list_entry_hash.Flink = NULL; 3116 pt->buf = NULL; 3117 InitializeListHead(&pt->itemlist); 3118 3119 InsertTailList(&Vcb->trees, &pt->list_entry); 3120 3121 td = ExAllocateFromPagedLookasideList(&Vcb->tree_data_lookaside); 3122 if (!td) { 3123 ERR("out of memory\n"); 3124 return STATUS_INSUFFICIENT_RESOURCES; 3125 } 3126 3127 get_first_item(t, &td->key); 3128 td->ignore = false; 3129 td->inserted = false; 3130 td->treeholder.address = 0; 3131 td->treeholder.generation = Vcb->superblock.generation; 3132 td->treeholder.tree = t; 3133 InsertTailList(&pt->itemlist, &td->list_entry); 3134 t->paritem = td; 3135 3136 td = ExAllocateFromPagedLookasideList(&Vcb->tree_data_lookaside); 3137 if (!td) { 3138 ERR("out of memory\n"); 3139 return STATUS_INSUFFICIENT_RESOURCES; 3140 } 3141 3142 td->key = newfirstitem->key; 3143 td->ignore = false; 3144 td->inserted = false; 3145 td->treeholder.address = 0; 3146 td->treeholder.generation = Vcb->superblock.generation; 3147 td->treeholder.tree = nt; 3148 InsertTailList(&pt->itemlist, &td->list_entry); 3149 nt->paritem = td; 3150 3151 pt->write = true; 3152 3153 t->root->treeholder.tree = pt; 3154 3155 t->parent = pt; 3156 nt->parent = pt; 3157 3158 #ifdef DEBUG_PARANOID 3159 if (t->parent && t->parent->header.level <= t->header.level) int3; 3160 if (nt->parent && nt->parent->header.level <= nt->header.level) int3; 3161 #endif 3162 3163 end: 3164 t->root->root_item.bytes_used += Vcb->superblock.node_size; 3165 3166 return STATUS_SUCCESS; 3167 } 3168 3169 static NTSTATUS split_tree(device_extension* Vcb, tree* t) { 3170 LIST_ENTRY* le; 3171 uint32_t size, ds, numitems; 3172 3173 size = 0; 3174 numitems = 0; 3175 3176 // FIXME - naïve implementation: maximizes number of filled trees 3177 3178 le = t->itemlist.Flink; 3179 while (le != &t->itemlist) { 3180 tree_data* td = CONTAINING_RECORD(le, tree_data, list_entry); 3181 3182 if (!td->ignore) { 3183 if (t->header.level == 0) 3184 ds = sizeof(leaf_node) + td->size; 3185 else 3186 ds = sizeof(internal_node); 3187 3188 if (numitems == 0 && ds > Vcb->superblock.node_size - sizeof(tree_header)) { 3189 ERR("(%I64x,%x,%I64x) in tree %I64x is too large (%x > %x)\n", 3190 td->key.obj_id, td->key.obj_type, td->key.offset, t->root->id, 3191 ds, Vcb->superblock.node_size - sizeof(tree_header)); 3192 return STATUS_INTERNAL_ERROR; 3193 } 3194 3195 // FIXME - move back if previous item was deleted item with same key 3196 if (size + ds > Vcb->superblock.node_size - sizeof(tree_header)) 3197 return split_tree_at(Vcb, t, td, numitems, size); 3198 3199 size += ds; 3200 numitems++; 3201 } 3202 3203 le = le->Flink; 3204 } 3205 3206 return STATUS_SUCCESS; 3207 } 3208 3209 bool is_tree_unique(device_extension* Vcb, tree* t, PIRP Irp) { 3210 KEY searchkey; 3211 traverse_ptr tp; 3212 NTSTATUS Status; 3213 bool ret = false; 3214 EXTENT_ITEM* ei; 3215 uint8_t* type; 3216 3217 if (t->uniqueness_determined) 3218 return t->is_unique; 3219 3220 if (t->parent && !is_tree_unique(Vcb, t->parent, Irp)) 3221 goto end; 3222 3223 if (t->has_address) { 3224 searchkey.obj_id = t->header.address; 3225 searchkey.obj_type = Vcb->superblock.incompat_flags & BTRFS_INCOMPAT_FLAGS_SKINNY_METADATA ? TYPE_METADATA_ITEM : TYPE_EXTENT_ITEM; 3226 searchkey.offset = 0xffffffffffffffff; 3227 3228 Status = find_item(Vcb, Vcb->extent_root, &tp, &searchkey, false, Irp); 3229 if (!NT_SUCCESS(Status)) { 3230 ERR("error - find_item returned %08x\n", Status); 3231 goto end; 3232 } 3233 3234 if (tp.item->key.obj_id != t->header.address || (tp.item->key.obj_type != TYPE_METADATA_ITEM && tp.item->key.obj_type != TYPE_EXTENT_ITEM)) 3235 goto end; 3236 3237 if (tp.item->key.obj_type == TYPE_EXTENT_ITEM && tp.item->size == sizeof(EXTENT_ITEM_V0)) 3238 goto end; 3239 3240 if (tp.item->size < sizeof(EXTENT_ITEM)) 3241 goto end; 3242 3243 ei = (EXTENT_ITEM*)tp.item->data; 3244 3245 if (ei->refcount > 1) 3246 goto end; 3247 3248 if (tp.item->key.obj_type == TYPE_EXTENT_ITEM && ei->flags & EXTENT_ITEM_TREE_BLOCK) { 3249 EXTENT_ITEM2* ei2; 3250 3251 if (tp.item->size < sizeof(EXTENT_ITEM) + sizeof(EXTENT_ITEM2)) 3252 goto end; 3253 3254 ei2 = (EXTENT_ITEM2*)&ei[1]; 3255 type = (uint8_t*)&ei2[1]; 3256 } else 3257 type = (uint8_t*)&ei[1]; 3258 3259 if (type >= tp.item->data + tp.item->size || *type != TYPE_TREE_BLOCK_REF) 3260 goto end; 3261 } 3262 3263 ret = true; 3264 3265 end: 3266 t->is_unique = ret; 3267 t->uniqueness_determined = true; 3268 3269 return ret; 3270 } 3271 3272 static NTSTATUS try_tree_amalgamate(device_extension* Vcb, tree* t, bool* done, bool* done_deletions, PIRP Irp, LIST_ENTRY* rollback) { 3273 LIST_ENTRY* le; 3274 tree_data* nextparitem = NULL; 3275 NTSTATUS Status; 3276 tree *next_tree, *par; 3277 3278 *done = false; 3279 3280 TRACE("trying to amalgamate tree in root %I64x, level %x (size %u)\n", t->root->id, t->header.level, t->size); 3281 3282 // FIXME - doesn't capture everything, as it doesn't ascend 3283 le = t->paritem->list_entry.Flink; 3284 while (le != &t->parent->itemlist) { 3285 tree_data* td = CONTAINING_RECORD(le, tree_data, list_entry); 3286 3287 if (!td->ignore) { 3288 nextparitem = td; 3289 break; 3290 } 3291 3292 le = le->Flink; 3293 } 3294 3295 if (!nextparitem) 3296 return STATUS_SUCCESS; 3297 3298 TRACE("nextparitem: key = %I64x,%x,%I64x\n", nextparitem->key.obj_id, nextparitem->key.obj_type, nextparitem->key.offset); 3299 3300 if (!nextparitem->treeholder.tree) { 3301 Status = do_load_tree(Vcb, &nextparitem->treeholder, t->root, t->parent, nextparitem, NULL); 3302 if (!NT_SUCCESS(Status)) { 3303 ERR("do_load_tree returned %08x\n", Status); 3304 return Status; 3305 } 3306 } 3307 3308 if (!is_tree_unique(Vcb, nextparitem->treeholder.tree, Irp)) 3309 return STATUS_SUCCESS; 3310 3311 next_tree = nextparitem->treeholder.tree; 3312 3313 if (!next_tree->updated_extents && next_tree->has_address) { 3314 Status = update_tree_extents(Vcb, next_tree, Irp, rollback); 3315 if (!NT_SUCCESS(Status)) { 3316 ERR("update_tree_extents returned %08x\n", Status); 3317 return Status; 3318 } 3319 } 3320 3321 if (t->size + next_tree->size <= Vcb->superblock.node_size - sizeof(tree_header)) { 3322 // merge two trees into one 3323 3324 t->header.num_items += next_tree->header.num_items; 3325 t->size += next_tree->size; 3326 3327 if (next_tree->header.level > 0) { 3328 le = next_tree->itemlist.Flink; 3329 3330 while (le != &next_tree->itemlist) { 3331 tree_data* td2 = CONTAINING_RECORD(le, tree_data, list_entry); 3332 3333 if (td2->treeholder.tree) { 3334 td2->treeholder.tree->parent = t; 3335 #ifdef DEBUG_PARANOID 3336 if (td2->treeholder.tree->parent && td2->treeholder.tree->parent->header.level <= td2->treeholder.tree->header.level) int3; 3337 #endif 3338 } 3339 3340 td2->inserted = true; 3341 le = le->Flink; 3342 } 3343 } else { 3344 le = next_tree->itemlist.Flink; 3345 3346 while (le != &next_tree->itemlist) { 3347 tree_data* td2 = CONTAINING_RECORD(le, tree_data, list_entry); 3348 3349 if (!td2->inserted && td2->data) { 3350 uint8_t* data = ExAllocatePoolWithTag(PagedPool, td2->size, ALLOC_TAG); 3351 3352 if (!data) { 3353 ERR("out of memory\n"); 3354 return STATUS_INSUFFICIENT_RESOURCES; 3355 } 3356 3357 RtlCopyMemory(data, td2->data, td2->size); 3358 td2->data = data; 3359 td2->inserted = true; 3360 } 3361 3362 le = le->Flink; 3363 } 3364 } 3365 3366 t->itemlist.Blink->Flink = next_tree->itemlist.Flink; 3367 t->itemlist.Blink->Flink->Blink = t->itemlist.Blink; 3368 t->itemlist.Blink = next_tree->itemlist.Blink; 3369 t->itemlist.Blink->Flink = &t->itemlist; 3370 3371 next_tree->itemlist.Flink = next_tree->itemlist.Blink = &next_tree->itemlist; 3372 3373 next_tree->header.num_items = 0; 3374 next_tree->size = 0; 3375 3376 if (next_tree->has_new_address) { // delete associated EXTENT_ITEM 3377 Status = reduce_tree_extent(Vcb, next_tree->new_address, next_tree, next_tree->parent->header.tree_id, next_tree->header.level, Irp, rollback); 3378 3379 if (!NT_SUCCESS(Status)) { 3380 ERR("reduce_tree_extent returned %08x\n", Status); 3381 return Status; 3382 } 3383 } else if (next_tree->has_address) { 3384 Status = reduce_tree_extent(Vcb, next_tree->header.address, next_tree, next_tree->parent->header.tree_id, next_tree->header.level, Irp, rollback); 3385 3386 if (!NT_SUCCESS(Status)) { 3387 ERR("reduce_tree_extent returned %08x\n", Status); 3388 return Status; 3389 } 3390 } 3391 3392 if (!nextparitem->ignore) { 3393 nextparitem->ignore = true; 3394 next_tree->parent->header.num_items--; 3395 next_tree->parent->size -= sizeof(internal_node); 3396 3397 *done_deletions = true; 3398 } 3399 3400 par = next_tree->parent; 3401 while (par) { 3402 par->write = true; 3403 par = par->parent; 3404 } 3405 3406 RemoveEntryList(&nextparitem->list_entry); 3407 ExFreePool(next_tree->paritem); 3408 next_tree->paritem = NULL; 3409 3410 next_tree->root->root_item.bytes_used -= Vcb->superblock.node_size; 3411 3412 free_tree(next_tree); 3413 3414 *done = true; 3415 } else { 3416 // rebalance by moving items from second tree into first 3417 ULONG avg_size = (t->size + next_tree->size) / 2; 3418 KEY firstitem = {0, 0, 0}; 3419 bool changed = false; 3420 3421 TRACE("attempting rebalance\n"); 3422 3423 le = next_tree->itemlist.Flink; 3424 while (le != &next_tree->itemlist && t->size < avg_size && next_tree->header.num_items > 1) { 3425 tree_data* td = CONTAINING_RECORD(le, tree_data, list_entry); 3426 ULONG size; 3427 3428 if (!td->ignore) { 3429 if (next_tree->header.level == 0) 3430 size = sizeof(leaf_node) + td->size; 3431 else 3432 size = sizeof(internal_node); 3433 } else 3434 size = 0; 3435 3436 if (t->size + size < Vcb->superblock.node_size - sizeof(tree_header)) { 3437 RemoveEntryList(&td->list_entry); 3438 InsertTailList(&t->itemlist, &td->list_entry); 3439 3440 if (next_tree->header.level > 0 && td->treeholder.tree) { 3441 td->treeholder.tree->parent = t; 3442 #ifdef DEBUG_PARANOID 3443 if (td->treeholder.tree->parent && td->treeholder.tree->parent->header.level <= td->treeholder.tree->header.level) int3; 3444 #endif 3445 } else if (next_tree->header.level == 0 && !td->inserted && td->size > 0) { 3446 uint8_t* data = ExAllocatePoolWithTag(PagedPool, td->size, ALLOC_TAG); 3447 3448 if (!data) { 3449 ERR("out of memory\n"); 3450 return STATUS_INSUFFICIENT_RESOURCES; 3451 } 3452 3453 RtlCopyMemory(data, td->data, td->size); 3454 td->data = data; 3455 } 3456 3457 td->inserted = true; 3458 3459 if (!td->ignore) { 3460 next_tree->size -= size; 3461 t->size += size; 3462 next_tree->header.num_items--; 3463 t->header.num_items++; 3464 } 3465 3466 changed = true; 3467 } else 3468 break; 3469 3470 le = next_tree->itemlist.Flink; 3471 } 3472 3473 le = next_tree->itemlist.Flink; 3474 while (le != &next_tree->itemlist) { 3475 tree_data* td = CONTAINING_RECORD(le, tree_data, list_entry); 3476 3477 if (!td->ignore) { 3478 firstitem = td->key; 3479 break; 3480 } 3481 3482 le = le->Flink; 3483 } 3484 3485 // FIXME - once ascension is working, make this work with parent's parent, etc. 3486 if (next_tree->paritem) 3487 next_tree->paritem->key = firstitem; 3488 3489 par = next_tree; 3490 while (par) { 3491 par->write = true; 3492 par = par->parent; 3493 } 3494 3495 if (changed) 3496 *done = true; 3497 } 3498 3499 return STATUS_SUCCESS; 3500 } 3501 3502 static NTSTATUS update_extent_level(device_extension* Vcb, uint64_t address, tree* t, uint8_t level, PIRP Irp) { 3503 KEY searchkey; 3504 traverse_ptr tp; 3505 NTSTATUS Status; 3506 3507 if (Vcb->superblock.incompat_flags & BTRFS_INCOMPAT_FLAGS_SKINNY_METADATA) { 3508 searchkey.obj_id = address; 3509 searchkey.obj_type = TYPE_METADATA_ITEM; 3510 searchkey.offset = t->header.level; 3511 3512 Status = find_item(Vcb, Vcb->extent_root, &tp, &searchkey, false, Irp); 3513 if (!NT_SUCCESS(Status)) { 3514 ERR("error - find_item returned %08x\n", Status); 3515 return Status; 3516 } 3517 3518 if (!keycmp(tp.item->key, searchkey)) { 3519 EXTENT_ITEM_SKINNY_METADATA* eism; 3520 3521 if (tp.item->size > 0) { 3522 eism = ExAllocatePoolWithTag(PagedPool, tp.item->size, ALLOC_TAG); 3523 3524 if (!eism) { 3525 ERR("out of memory\n"); 3526 return STATUS_INSUFFICIENT_RESOURCES; 3527 } 3528 3529 RtlCopyMemory(eism, tp.item->data, tp.item->size); 3530 } else 3531 eism = NULL; 3532 3533 Status = delete_tree_item(Vcb, &tp); 3534 if (!NT_SUCCESS(Status)) { 3535 ERR("delete_tree_item returned %08x\n", Status); 3536 if (eism) ExFreePool(eism); 3537 return Status; 3538 } 3539 3540 Status = insert_tree_item(Vcb, Vcb->extent_root, address, TYPE_METADATA_ITEM, level, eism, tp.item->size, NULL, Irp); 3541 if (!NT_SUCCESS(Status)) { 3542 ERR("insert_tree_item returned %08x\n", Status); 3543 if (eism) ExFreePool(eism); 3544 return Status; 3545 } 3546 3547 return STATUS_SUCCESS; 3548 } 3549 } 3550 3551 searchkey.obj_id = address; 3552 searchkey.obj_type = TYPE_EXTENT_ITEM; 3553 searchkey.offset = 0xffffffffffffffff; 3554 3555 Status = find_item(Vcb, Vcb->extent_root, &tp, &searchkey, false, Irp); 3556 if (!NT_SUCCESS(Status)) { 3557 ERR("error - find_item returned %08x\n", Status); 3558 return Status; 3559 } 3560 3561 if (tp.item->key.obj_id == searchkey.obj_id && tp.item->key.obj_type == searchkey.obj_type) { 3562 EXTENT_ITEM_TREE* eit; 3563 3564 if (tp.item->size < sizeof(EXTENT_ITEM_TREE)) { 3565 ERR("(%I64x,%x,%I64x) was %u bytes, expected at least %u\n", tp.item->key.obj_id, tp.item->key.obj_type, tp.item->key.offset, tp.item->size, sizeof(EXTENT_ITEM_TREE)); 3566 return STATUS_INTERNAL_ERROR; 3567 } 3568 3569 eit = ExAllocatePoolWithTag(PagedPool, tp.item->size, ALLOC_TAG); 3570 3571 if (!eit) { 3572 ERR("out of memory\n"); 3573 return STATUS_INSUFFICIENT_RESOURCES; 3574 } 3575 3576 RtlCopyMemory(eit, tp.item->data, tp.item->size); 3577 3578 Status = delete_tree_item(Vcb, &tp); 3579 if (!NT_SUCCESS(Status)) { 3580 ERR("delete_tree_item returned %08x\n", Status); 3581 ExFreePool(eit); 3582 return Status; 3583 } 3584 3585 eit->level = level; 3586 3587 Status = insert_tree_item(Vcb, Vcb->extent_root, tp.item->key.obj_id, tp.item->key.obj_type, tp.item->key.offset, eit, tp.item->size, NULL, Irp); 3588 if (!NT_SUCCESS(Status)) { 3589 ERR("insert_tree_item returned %08x\n", Status); 3590 ExFreePool(eit); 3591 return Status; 3592 } 3593 3594 return STATUS_SUCCESS; 3595 } 3596 3597 ERR("could not find EXTENT_ITEM for address %I64x\n", address); 3598 3599 return STATUS_INTERNAL_ERROR; 3600 } 3601 3602 static NTSTATUS update_tree_extents_recursive(device_extension* Vcb, tree* t, PIRP Irp, LIST_ENTRY* rollback) { 3603 NTSTATUS Status; 3604 3605 if (t->parent && !t->parent->updated_extents && t->parent->has_address) { 3606 Status = update_tree_extents_recursive(Vcb, t->parent, Irp, rollback); 3607 if (!NT_SUCCESS(Status)) 3608 return Status; 3609 } 3610 3611 Status = update_tree_extents(Vcb, t, Irp, rollback); 3612 if (!NT_SUCCESS(Status)) { 3613 ERR("update_tree_extents returned %08x\n", Status); 3614 return Status; 3615 } 3616 3617 return STATUS_SUCCESS; 3618 } 3619 3620 static NTSTATUS do_splits(device_extension* Vcb, PIRP Irp, LIST_ENTRY* rollback) { 3621 ULONG level, max_level; 3622 uint32_t min_size; 3623 bool empty, done_deletions = false; 3624 NTSTATUS Status; 3625 tree* t; 3626 3627 TRACE("(%p)\n", Vcb); 3628 3629 max_level = 0; 3630 3631 for (level = 0; level <= 255; level++) { 3632 LIST_ENTRY *le, *nextle; 3633 3634 empty = true; 3635 3636 TRACE("doing level %u\n", level); 3637 3638 le = Vcb->trees.Flink; 3639 3640 while (le != &Vcb->trees) { 3641 t = CONTAINING_RECORD(le, tree, list_entry); 3642 3643 nextle = le->Flink; 3644 3645 if (t->write && t->header.level == level) { 3646 empty = false; 3647 3648 if (t->header.num_items == 0) { 3649 if (t->parent) { 3650 done_deletions = true; 3651 3652 TRACE("deleting tree in root %I64x\n", t->root->id); 3653 3654 t->root->root_item.bytes_used -= Vcb->superblock.node_size; 3655 3656 if (t->has_new_address) { // delete associated EXTENT_ITEM 3657 Status = reduce_tree_extent(Vcb, t->new_address, t, t->parent->header.tree_id, t->header.level, Irp, rollback); 3658 3659 if (!NT_SUCCESS(Status)) { 3660 ERR("reduce_tree_extent returned %08x\n", Status); 3661 return Status; 3662 } 3663 3664 t->has_new_address = false; 3665 } else if (t->has_address) { 3666 Status = reduce_tree_extent(Vcb,t->header.address, t, t->parent->header.tree_id, t->header.level, Irp, rollback); 3667 3668 if (!NT_SUCCESS(Status)) { 3669 ERR("reduce_tree_extent returned %08x\n", Status); 3670 return Status; 3671 } 3672 3673 t->has_address = false; 3674 } 3675 3676 if (!t->paritem->ignore) { 3677 t->paritem->ignore = true; 3678 t->parent->header.num_items--; 3679 t->parent->size -= sizeof(internal_node); 3680 } 3681 3682 RemoveEntryList(&t->paritem->list_entry); 3683 ExFreePool(t->paritem); 3684 t->paritem = NULL; 3685 3686 free_tree(t); 3687 } else if (t->header.level != 0) { 3688 if (t->has_new_address) { 3689 Status = update_extent_level(Vcb, t->new_address, t, 0, Irp); 3690 3691 if (!NT_SUCCESS(Status)) { 3692 ERR("update_extent_level returned %08x\n", Status); 3693 return Status; 3694 } 3695 } 3696 3697 t->header.level = 0; 3698 } 3699 } else if (t->size > Vcb->superblock.node_size - sizeof(tree_header)) { 3700 TRACE("splitting overlarge tree (%x > %x)\n", t->size, Vcb->superblock.node_size - sizeof(tree_header)); 3701 3702 if (!t->updated_extents && t->has_address) { 3703 Status = update_tree_extents_recursive(Vcb, t, Irp, rollback); 3704 if (!NT_SUCCESS(Status)) { 3705 ERR("update_tree_extents_recursive returned %08x\n", Status); 3706 return Status; 3707 } 3708 } 3709 3710 Status = split_tree(Vcb, t); 3711 3712 if (!NT_SUCCESS(Status)) { 3713 ERR("split_tree returned %08x\n", Status); 3714 return Status; 3715 } 3716 } 3717 } 3718 3719 le = nextle; 3720 } 3721 3722 if (!empty) { 3723 max_level = level; 3724 } else { 3725 TRACE("nothing found for level %u\n", level); 3726 break; 3727 } 3728 } 3729 3730 min_size = (Vcb->superblock.node_size - sizeof(tree_header)) / 2; 3731 3732 for (level = 0; level <= max_level; level++) { 3733 LIST_ENTRY* le; 3734 3735 le = Vcb->trees.Flink; 3736 3737 while (le != &Vcb->trees) { 3738 t = CONTAINING_RECORD(le, tree, list_entry); 3739 3740 if (t->write && t->header.level == level && t->header.num_items > 0 && t->parent && t->size < min_size && 3741 t->root->id != BTRFS_ROOT_FREE_SPACE && is_tree_unique(Vcb, t, Irp)) { 3742 bool done; 3743 3744 do { 3745 Status = try_tree_amalgamate(Vcb, t, &done, &done_deletions, Irp, rollback); 3746 if (!NT_SUCCESS(Status)) { 3747 ERR("try_tree_amalgamate returned %08x\n", Status); 3748 return Status; 3749 } 3750 } while (done && t->size < min_size); 3751 } 3752 3753 le = le->Flink; 3754 } 3755 } 3756 3757 // simplify trees if top tree only has one entry 3758 3759 if (done_deletions) { 3760 for (level = max_level; level > 0; level--) { 3761 LIST_ENTRY *le, *nextle; 3762 3763 le = Vcb->trees.Flink; 3764 while (le != &Vcb->trees) { 3765 nextle = le->Flink; 3766 t = CONTAINING_RECORD(le, tree, list_entry); 3767 3768 if (t->write && t->header.level == level) { 3769 if (!t->parent && t->header.num_items == 1) { 3770 LIST_ENTRY* le2 = t->itemlist.Flink; 3771 tree_data* td = NULL; 3772 tree* child_tree = NULL; 3773 3774 while (le2 != &t->itemlist) { 3775 td = CONTAINING_RECORD(le2, tree_data, list_entry); 3776 if (!td->ignore) 3777 break; 3778 le2 = le2->Flink; 3779 } 3780 3781 TRACE("deleting top-level tree in root %I64x with one item\n", t->root->id); 3782 3783 if (t->has_new_address) { // delete associated EXTENT_ITEM 3784 Status = reduce_tree_extent(Vcb, t->new_address, t, t->header.tree_id, t->header.level, Irp, rollback); 3785 3786 if (!NT_SUCCESS(Status)) { 3787 ERR("reduce_tree_extent returned %08x\n", Status); 3788 return Status; 3789 } 3790 3791 t->has_new_address = false; 3792 } else if (t->has_address) { 3793 Status = reduce_tree_extent(Vcb,t->header.address, t, t->header.tree_id, t->header.level, Irp, rollback); 3794 3795 if (!NT_SUCCESS(Status)) { 3796 ERR("reduce_tree_extent returned %08x\n", Status); 3797 return Status; 3798 } 3799 3800 t->has_address = false; 3801 } 3802 3803 if (!td->treeholder.tree) { // load first item if not already loaded 3804 KEY searchkey = {0,0,0}; 3805 traverse_ptr tp; 3806 3807 Status = find_item(Vcb, t->root, &tp, &searchkey, false, Irp); 3808 if (!NT_SUCCESS(Status)) { 3809 ERR("error - find_item returned %08x\n", Status); 3810 return Status; 3811 } 3812 } 3813 3814 child_tree = td->treeholder.tree; 3815 3816 if (child_tree) { 3817 child_tree->parent = NULL; 3818 child_tree->paritem = NULL; 3819 } 3820 3821 t->root->root_item.bytes_used -= Vcb->superblock.node_size; 3822 3823 free_tree(t); 3824 3825 if (child_tree) 3826 child_tree->root->treeholder.tree = child_tree; 3827 } 3828 } 3829 3830 le = nextle; 3831 } 3832 } 3833 } 3834 3835 return STATUS_SUCCESS; 3836 } 3837 3838 static NTSTATUS remove_root_extents(device_extension* Vcb, root* r, tree_holder* th, uint8_t level, tree* parent, PIRP Irp, LIST_ENTRY* rollback) { 3839 NTSTATUS Status; 3840 3841 if (!th->tree) { 3842 uint8_t* buf; 3843 chunk* c; 3844 3845 buf = ExAllocatePoolWithTag(PagedPool, Vcb->superblock.node_size, ALLOC_TAG); 3846 if (!buf) { 3847 ERR("out of memory\n"); 3848 return STATUS_INSUFFICIENT_RESOURCES; 3849 } 3850 3851 Status = read_data(Vcb, th->address, Vcb->superblock.node_size, NULL, true, buf, NULL, 3852 &c, Irp, th->generation, false, NormalPagePriority); 3853 if (!NT_SUCCESS(Status)) { 3854 ERR("read_data returned 0x%08x\n", Status); 3855 ExFreePool(buf); 3856 return Status; 3857 } 3858 3859 Status = load_tree(Vcb, th->address, buf, r, &th->tree); 3860 3861 if (!th->tree || th->tree->buf != buf) 3862 ExFreePool(buf); 3863 3864 if (!NT_SUCCESS(Status)) { 3865 ERR("load_tree(%I64x) returned %08x\n", th->address, Status); 3866 return Status; 3867 } 3868 } 3869 3870 if (level > 0) { 3871 LIST_ENTRY* le = th->tree->itemlist.Flink; 3872 3873 while (le != &th->tree->itemlist) { 3874 tree_data* td = CONTAINING_RECORD(le, tree_data, list_entry); 3875 3876 if (!td->ignore) { 3877 Status = remove_root_extents(Vcb, r, &td->treeholder, th->tree->header.level - 1, th->tree, Irp, rollback); 3878 3879 if (!NT_SUCCESS(Status)) { 3880 ERR("remove_root_extents returned %08x\n", Status); 3881 return Status; 3882 } 3883 } 3884 3885 le = le->Flink; 3886 } 3887 } 3888 3889 if (th->tree && !th->tree->updated_extents && th->tree->has_address) { 3890 Status = update_tree_extents(Vcb, th->tree, Irp, rollback); 3891 if (!NT_SUCCESS(Status)) { 3892 ERR("update_tree_extents returned %08x\n", Status); 3893 return Status; 3894 } 3895 } 3896 3897 if (!th->tree || th->tree->has_address) { 3898 Status = reduce_tree_extent(Vcb, th->address, NULL, parent ? parent->header.tree_id : r->id, level, Irp, rollback); 3899 3900 if (!NT_SUCCESS(Status)) { 3901 ERR("reduce_tree_extent(%I64x) returned %08x\n", th->address, Status); 3902 return Status; 3903 } 3904 } 3905 3906 return STATUS_SUCCESS; 3907 } 3908 3909 static NTSTATUS drop_root(device_extension* Vcb, root* r, PIRP Irp, LIST_ENTRY* rollback) { 3910 NTSTATUS Status; 3911 KEY searchkey; 3912 traverse_ptr tp; 3913 3914 Status = remove_root_extents(Vcb, r, &r->treeholder, r->root_item.root_level, NULL, Irp, rollback); 3915 if (!NT_SUCCESS(Status)) { 3916 ERR("remove_root_extents returned %08x\n", Status); 3917 return Status; 3918 } 3919 3920 // remove entries in uuid root (tree 9) 3921 if (Vcb->uuid_root) { 3922 RtlCopyMemory(&searchkey.obj_id, &r->root_item.uuid.uuid[0], sizeof(uint64_t)); 3923 searchkey.obj_type = TYPE_SUBVOL_UUID; 3924 RtlCopyMemory(&searchkey.offset, &r->root_item.uuid.uuid[sizeof(uint64_t)], sizeof(uint64_t)); 3925 3926 if (searchkey.obj_id != 0 || searchkey.offset != 0) { 3927 Status = find_item(Vcb, Vcb->uuid_root, &tp, &searchkey, false, Irp); 3928 if (!NT_SUCCESS(Status)) { 3929 WARN("find_item returned %08x\n", Status); 3930 } else { 3931 if (!keycmp(tp.item->key, searchkey)) { 3932 Status = delete_tree_item(Vcb, &tp); 3933 if (!NT_SUCCESS(Status)) { 3934 ERR("delete_tree_item returned %08x\n", Status); 3935 return Status; 3936 } 3937 } else 3938 WARN("could not find (%I64x,%x,%I64x) in uuid tree\n", searchkey.obj_id, searchkey.obj_type, searchkey.offset); 3939 } 3940 } 3941 3942 if (r->root_item.rtransid > 0) { 3943 RtlCopyMemory(&searchkey.obj_id, &r->root_item.received_uuid.uuid[0], sizeof(uint64_t)); 3944 searchkey.obj_type = TYPE_SUBVOL_REC_UUID; 3945 RtlCopyMemory(&searchkey.offset, &r->root_item.received_uuid.uuid[sizeof(uint64_t)], sizeof(uint64_t)); 3946 3947 Status = find_item(Vcb, Vcb->uuid_root, &tp, &searchkey, false, Irp); 3948 if (!NT_SUCCESS(Status)) 3949 WARN("find_item returned %08x\n", Status); 3950 else { 3951 if (!keycmp(tp.item->key, searchkey)) { 3952 if (tp.item->size == sizeof(uint64_t)) { 3953 uint64_t* id = (uint64_t*)tp.item->data; 3954 3955 if (*id == r->id) { 3956 Status = delete_tree_item(Vcb, &tp); 3957 if (!NT_SUCCESS(Status)) { 3958 ERR("delete_tree_item returned %08x\n", Status); 3959 return Status; 3960 } 3961 } 3962 } else if (tp.item->size > sizeof(uint64_t)) { 3963 ULONG i; 3964 uint64_t* ids = (uint64_t*)tp.item->data; 3965 3966 for (i = 0; i < tp.item->size / sizeof(uint64_t); i++) { 3967 if (ids[i] == r->id) { 3968 uint64_t* ne; 3969 3970 ne = ExAllocatePoolWithTag(PagedPool, tp.item->size - sizeof(uint64_t), ALLOC_TAG); 3971 if (!ne) { 3972 ERR("out of memory\n"); 3973 return STATUS_INSUFFICIENT_RESOURCES; 3974 } 3975 3976 if (i > 0) 3977 RtlCopyMemory(ne, ids, sizeof(uint64_t) * i); 3978 3979 if ((i + 1) * sizeof(uint64_t) < tp.item->size) 3980 RtlCopyMemory(&ne[i], &ids[i + 1], tp.item->size - ((i + 1) * sizeof(uint64_t))); 3981 3982 Status = delete_tree_item(Vcb, &tp); 3983 if (!NT_SUCCESS(Status)) { 3984 ERR("delete_tree_item returned %08x\n", Status); 3985 ExFreePool(ne); 3986 return Status; 3987 } 3988 3989 Status = insert_tree_item(Vcb, Vcb->uuid_root, tp.item->key.obj_id, tp.item->key.obj_type, tp.item->key.offset, 3990 ne, tp.item->size - sizeof(uint64_t), NULL, Irp); 3991 if (!NT_SUCCESS(Status)) { 3992 ERR("insert_tree_item returned %08x\n", Status); 3993 ExFreePool(ne); 3994 return Status; 3995 } 3996 3997 break; 3998 } 3999 } 4000 } 4001 } else 4002 WARN("could not find (%I64x,%x,%I64x) in uuid tree\n", searchkey.obj_id, searchkey.obj_type, searchkey.offset); 4003 } 4004 } 4005 } 4006 4007 // delete ROOT_ITEM 4008 4009 searchkey.obj_id = r->id; 4010 searchkey.obj_type = TYPE_ROOT_ITEM; 4011 searchkey.offset = 0xffffffffffffffff; 4012 4013 Status = find_item(Vcb, Vcb->root_root, &tp, &searchkey, false, Irp); 4014 if (!NT_SUCCESS(Status)) { 4015 ERR("find_item returned %08x\n", Status); 4016 return Status; 4017 } 4018 4019 if (tp.item->key.obj_id == searchkey.obj_id && tp.item->key.obj_type == searchkey.obj_type) { 4020 Status = delete_tree_item(Vcb, &tp); 4021 4022 if (!NT_SUCCESS(Status)) { 4023 ERR("delete_tree_item returned %08x\n", Status); 4024 return Status; 4025 } 4026 } else 4027 WARN("could not find (%I64x,%x,%I64x) in root_root\n", searchkey.obj_id, searchkey.obj_type, searchkey.offset); 4028 4029 // delete items in tree cache 4030 4031 free_trees_root(Vcb, r); 4032 4033 return STATUS_SUCCESS; 4034 } 4035 4036 static NTSTATUS drop_roots(device_extension* Vcb, PIRP Irp, LIST_ENTRY* rollback) { 4037 LIST_ENTRY *le = Vcb->drop_roots.Flink, *le2; 4038 NTSTATUS Status; 4039 4040 while (le != &Vcb->drop_roots) { 4041 root* r = CONTAINING_RECORD(le, root, list_entry); 4042 4043 le2 = le->Flink; 4044 4045 Status = drop_root(Vcb, r, Irp, rollback); 4046 if (!NT_SUCCESS(Status)) { 4047 ERR("drop_root(%I64x) returned %08x\n", r->id, Status); 4048 return Status; 4049 } 4050 4051 le = le2; 4052 } 4053 4054 return STATUS_SUCCESS; 4055 } 4056 4057 NTSTATUS update_dev_item(device_extension* Vcb, device* device, PIRP Irp) { 4058 KEY searchkey; 4059 traverse_ptr tp; 4060 DEV_ITEM* di; 4061 NTSTATUS Status; 4062 4063 searchkey.obj_id = 1; 4064 searchkey.obj_type = TYPE_DEV_ITEM; 4065 searchkey.offset = device->devitem.dev_id; 4066 4067 Status = find_item(Vcb, Vcb->chunk_root, &tp, &searchkey, false, Irp); 4068 if (!NT_SUCCESS(Status)) { 4069 ERR("error - find_item returned %08x\n", Status); 4070 return Status; 4071 } 4072 4073 if (keycmp(tp.item->key, searchkey)) { 4074 ERR("error - could not find DEV_ITEM for device %I64x\n", device->devitem.dev_id); 4075 return STATUS_INTERNAL_ERROR; 4076 } 4077 4078 Status = delete_tree_item(Vcb, &tp); 4079 if (!NT_SUCCESS(Status)) { 4080 ERR("delete_tree_item returned %08x\n", Status); 4081 return Status; 4082 } 4083 4084 di = ExAllocatePoolWithTag(PagedPool, sizeof(DEV_ITEM), ALLOC_TAG); 4085 if (!di) { 4086 ERR("out of memory\n"); 4087 return STATUS_INSUFFICIENT_RESOURCES; 4088 } 4089 4090 RtlCopyMemory(di, &device->devitem, sizeof(DEV_ITEM)); 4091 4092 Status = insert_tree_item(Vcb, Vcb->chunk_root, 1, TYPE_DEV_ITEM, device->devitem.dev_id, di, sizeof(DEV_ITEM), NULL, Irp); 4093 if (!NT_SUCCESS(Status)) { 4094 ERR("insert_tree_item returned %08x\n", Status); 4095 ExFreePool(di); 4096 return Status; 4097 } 4098 4099 return STATUS_SUCCESS; 4100 } 4101 4102 static void regen_bootstrap(device_extension* Vcb) { 4103 sys_chunk* sc2; 4104 USHORT i = 0; 4105 LIST_ENTRY* le; 4106 4107 i = 0; 4108 le = Vcb->sys_chunks.Flink; 4109 while (le != &Vcb->sys_chunks) { 4110 sc2 = CONTAINING_RECORD(le, sys_chunk, list_entry); 4111 4112 TRACE("%I64x,%x,%I64x\n", sc2->key.obj_id, sc2->key.obj_type, sc2->key.offset); 4113 4114 RtlCopyMemory(&Vcb->superblock.sys_chunk_array[i], &sc2->key, sizeof(KEY)); 4115 i += sizeof(KEY); 4116 4117 RtlCopyMemory(&Vcb->superblock.sys_chunk_array[i], sc2->data, sc2->size); 4118 i += sc2->size; 4119 4120 le = le->Flink; 4121 } 4122 } 4123 4124 static NTSTATUS add_to_bootstrap(device_extension* Vcb, uint64_t obj_id, uint8_t obj_type, uint64_t offset, void* data, uint16_t size) { 4125 sys_chunk* sc; 4126 LIST_ENTRY* le; 4127 4128 if (Vcb->superblock.n + sizeof(KEY) + size > SYS_CHUNK_ARRAY_SIZE) { 4129 ERR("error - bootstrap is full\n"); 4130 return STATUS_INTERNAL_ERROR; 4131 } 4132 4133 sc = ExAllocatePoolWithTag(PagedPool, sizeof(sys_chunk), ALLOC_TAG); 4134 if (!sc) { 4135 ERR("out of memory\n"); 4136 return STATUS_INSUFFICIENT_RESOURCES; 4137 } 4138 4139 sc->key.obj_id = obj_id; 4140 sc->key.obj_type = obj_type; 4141 sc->key.offset = offset; 4142 sc->size = size; 4143 sc->data = ExAllocatePoolWithTag(PagedPool, sc->size, ALLOC_TAG); 4144 if (!sc->data) { 4145 ERR("out of memory\n"); 4146 ExFreePool(sc); 4147 return STATUS_INSUFFICIENT_RESOURCES; 4148 } 4149 4150 RtlCopyMemory(sc->data, data, sc->size); 4151 4152 le = Vcb->sys_chunks.Flink; 4153 while (le != &Vcb->sys_chunks) { 4154 sys_chunk* sc2 = CONTAINING_RECORD(le, sys_chunk, list_entry); 4155 4156 if (keycmp(sc2->key, sc->key) == 1) 4157 break; 4158 4159 le = le->Flink; 4160 } 4161 InsertTailList(le, &sc->list_entry); 4162 4163 Vcb->superblock.n += sizeof(KEY) + size; 4164 4165 regen_bootstrap(Vcb); 4166 4167 return STATUS_SUCCESS; 4168 } 4169 4170 static NTSTATUS create_chunk(device_extension* Vcb, chunk* c, PIRP Irp) { 4171 CHUNK_ITEM* ci; 4172 CHUNK_ITEM_STRIPE* cis; 4173 BLOCK_GROUP_ITEM* bgi; 4174 uint16_t i, factor; 4175 NTSTATUS Status; 4176 4177 ci = ExAllocatePoolWithTag(PagedPool, c->size, ALLOC_TAG); 4178 if (!ci) { 4179 ERR("out of memory\n"); 4180 return STATUS_INSUFFICIENT_RESOURCES; 4181 } 4182 4183 RtlCopyMemory(ci, c->chunk_item, c->size); 4184 4185 Status = insert_tree_item(Vcb, Vcb->chunk_root, 0x100, TYPE_CHUNK_ITEM, c->offset, ci, c->size, NULL, Irp); 4186 if (!NT_SUCCESS(Status)) { 4187 ERR("insert_tree_item failed\n"); 4188 ExFreePool(ci); 4189 return Status; 4190 } 4191 4192 if (c->chunk_item->type & BLOCK_FLAG_SYSTEM) { 4193 Status = add_to_bootstrap(Vcb, 0x100, TYPE_CHUNK_ITEM, c->offset, ci, c->size); 4194 if (!NT_SUCCESS(Status)) { 4195 ERR("add_to_bootstrap returned %08x\n", Status); 4196 return Status; 4197 } 4198 } 4199 4200 // add BLOCK_GROUP_ITEM to tree 2 4201 4202 bgi = ExAllocatePoolWithTag(PagedPool, sizeof(BLOCK_GROUP_ITEM), ALLOC_TAG); 4203 if (!bgi) { 4204 ERR("out of memory\n"); 4205 return STATUS_INSUFFICIENT_RESOURCES; 4206 } 4207 4208 bgi->used = c->used; 4209 bgi->chunk_tree = 0x100; 4210 bgi->flags = c->chunk_item->type; 4211 4212 Status = insert_tree_item(Vcb, Vcb->extent_root, c->offset, TYPE_BLOCK_GROUP_ITEM, c->chunk_item->size, bgi, sizeof(BLOCK_GROUP_ITEM), NULL, Irp); 4213 if (!NT_SUCCESS(Status)) { 4214 ERR("insert_tree_item failed\n"); 4215 ExFreePool(bgi); 4216 return Status; 4217 } 4218 4219 if (c->chunk_item->type & BLOCK_FLAG_RAID0) 4220 factor = c->chunk_item->num_stripes; 4221 else if (c->chunk_item->type & BLOCK_FLAG_RAID10) 4222 factor = c->chunk_item->num_stripes / c->chunk_item->sub_stripes; 4223 else if (c->chunk_item->type & BLOCK_FLAG_RAID5) 4224 factor = c->chunk_item->num_stripes - 1; 4225 else if (c->chunk_item->type & BLOCK_FLAG_RAID6) 4226 factor = c->chunk_item->num_stripes - 2; 4227 else // SINGLE, DUPLICATE, RAID1 4228 factor = 1; 4229 4230 // add DEV_EXTENTs to tree 4 4231 4232 cis = (CHUNK_ITEM_STRIPE*)&c->chunk_item[1]; 4233 4234 for (i = 0; i < c->chunk_item->num_stripes; i++) { 4235 DEV_EXTENT* de; 4236 4237 de = ExAllocatePoolWithTag(PagedPool, sizeof(DEV_EXTENT), ALLOC_TAG); 4238 if (!de) { 4239 ERR("out of memory\n"); 4240 return STATUS_INSUFFICIENT_RESOURCES; 4241 } 4242 4243 de->chunktree = Vcb->chunk_root->id; 4244 de->objid = 0x100; 4245 de->address = c->offset; 4246 de->length = c->chunk_item->size / factor; 4247 de->chunktree_uuid = Vcb->chunk_root->treeholder.tree->header.chunk_tree_uuid; 4248 4249 Status = insert_tree_item(Vcb, Vcb->dev_root, c->devices[i]->devitem.dev_id, TYPE_DEV_EXTENT, cis[i].offset, de, sizeof(DEV_EXTENT), NULL, Irp); 4250 if (!NT_SUCCESS(Status)) { 4251 ERR("insert_tree_item returned %08x\n", Status); 4252 ExFreePool(de); 4253 return Status; 4254 } 4255 4256 // FIXME - no point in calling this twice for the same device 4257 Status = update_dev_item(Vcb, c->devices[i], Irp); 4258 if (!NT_SUCCESS(Status)) { 4259 ERR("update_dev_item returned %08x\n", Status); 4260 return Status; 4261 } 4262 } 4263 4264 c->created = false; 4265 c->oldused = c->used; 4266 4267 Vcb->superblock.bytes_used += chunk_estimate_phys_size(Vcb, c, c->used); 4268 4269 return STATUS_SUCCESS; 4270 } 4271 4272 static void remove_from_bootstrap(device_extension* Vcb, uint64_t obj_id, uint8_t obj_type, uint64_t offset) { 4273 sys_chunk* sc2; 4274 LIST_ENTRY* le; 4275 4276 le = Vcb->sys_chunks.Flink; 4277 while (le != &Vcb->sys_chunks) { 4278 sc2 = CONTAINING_RECORD(le, sys_chunk, list_entry); 4279 4280 if (sc2->key.obj_id == obj_id && sc2->key.obj_type == obj_type && sc2->key.offset == offset) { 4281 RemoveEntryList(&sc2->list_entry); 4282 4283 Vcb->superblock.n -= sizeof(KEY) + sc2->size; 4284 4285 ExFreePool(sc2->data); 4286 ExFreePool(sc2); 4287 regen_bootstrap(Vcb); 4288 return; 4289 } 4290 4291 le = le->Flink; 4292 } 4293 } 4294 4295 static NTSTATUS set_xattr(device_extension* Vcb, LIST_ENTRY* batchlist, root* subvol, uint64_t inode, char* name, uint16_t namelen, 4296 uint32_t crc32, uint8_t* data, uint16_t datalen) { 4297 NTSTATUS Status; 4298 uint16_t xasize; 4299 DIR_ITEM* xa; 4300 4301 TRACE("(%p, %I64x, %I64x, %.*s, %08x, %p, %u)\n", Vcb, subvol->id, inode, namelen, name, crc32, data, datalen); 4302 4303 xasize = (uint16_t)offsetof(DIR_ITEM, name[0]) + namelen + datalen; 4304 4305 xa = ExAllocatePoolWithTag(PagedPool, xasize, ALLOC_TAG); 4306 if (!xa) { 4307 ERR("out of memory\n"); 4308 return STATUS_INSUFFICIENT_RESOURCES; 4309 } 4310 4311 xa->key.obj_id = 0; 4312 xa->key.obj_type = 0; 4313 xa->key.offset = 0; 4314 xa->transid = Vcb->superblock.generation; 4315 xa->m = datalen; 4316 xa->n = namelen; 4317 xa->type = BTRFS_TYPE_EA; 4318 RtlCopyMemory(xa->name, name, namelen); 4319 RtlCopyMemory(xa->name + namelen, data, datalen); 4320 4321 Status = insert_tree_item_batch(batchlist, Vcb, subvol, inode, TYPE_XATTR_ITEM, crc32, xa, xasize, Batch_SetXattr); 4322 if (!NT_SUCCESS(Status)) { 4323 ERR("insert_tree_item_batch returned %08x\n", Status); 4324 ExFreePool(xa); 4325 return Status; 4326 } 4327 4328 return STATUS_SUCCESS; 4329 } 4330 4331 static NTSTATUS delete_xattr(device_extension* Vcb, LIST_ENTRY* batchlist, root* subvol, uint64_t inode, char* name, 4332 uint16_t namelen, uint32_t crc32) { 4333 NTSTATUS Status; 4334 uint16_t xasize; 4335 DIR_ITEM* xa; 4336 4337 TRACE("(%p, %I64x, %I64x, %.*s, %08x)\n", Vcb, subvol->id, inode, namelen, name, crc32); 4338 4339 xasize = (uint16_t)offsetof(DIR_ITEM, name[0]) + namelen; 4340 4341 xa = ExAllocatePoolWithTag(PagedPool, xasize, ALLOC_TAG); 4342 if (!xa) { 4343 ERR("out of memory\n"); 4344 return STATUS_INSUFFICIENT_RESOURCES; 4345 } 4346 4347 xa->key.obj_id = 0; 4348 xa->key.obj_type = 0; 4349 xa->key.offset = 0; 4350 xa->transid = Vcb->superblock.generation; 4351 xa->m = 0; 4352 xa->n = namelen; 4353 xa->type = BTRFS_TYPE_EA; 4354 RtlCopyMemory(xa->name, name, namelen); 4355 4356 Status = insert_tree_item_batch(batchlist, Vcb, subvol, inode, TYPE_XATTR_ITEM, crc32, xa, xasize, Batch_DeleteXattr); 4357 if (!NT_SUCCESS(Status)) { 4358 ERR("insert_tree_item_batch returned %08x\n", Status); 4359 ExFreePool(xa); 4360 return Status; 4361 } 4362 4363 return STATUS_SUCCESS; 4364 } 4365 4366 static NTSTATUS insert_sparse_extent(fcb* fcb, LIST_ENTRY* batchlist, uint64_t start, uint64_t length) { 4367 NTSTATUS Status; 4368 EXTENT_DATA* ed; 4369 EXTENT_DATA2* ed2; 4370 4371 TRACE("((%I64x, %I64x), %I64x, %I64x)\n", fcb->subvol->id, fcb->inode, start, length); 4372 4373 ed = ExAllocatePoolWithTag(PagedPool, sizeof(EXTENT_DATA) - 1 + sizeof(EXTENT_DATA2), ALLOC_TAG); 4374 if (!ed) { 4375 ERR("out of memory\n"); 4376 return STATUS_INSUFFICIENT_RESOURCES; 4377 } 4378 4379 ed->generation = fcb->Vcb->superblock.generation; 4380 ed->decoded_size = length; 4381 ed->compression = BTRFS_COMPRESSION_NONE; 4382 ed->encryption = BTRFS_ENCRYPTION_NONE; 4383 ed->encoding = BTRFS_ENCODING_NONE; 4384 ed->type = EXTENT_TYPE_REGULAR; 4385 4386 ed2 = (EXTENT_DATA2*)ed->data; 4387 ed2->address = 0; 4388 ed2->size = 0; 4389 ed2->offset = 0; 4390 ed2->num_bytes = length; 4391 4392 Status = insert_tree_item_batch(batchlist, fcb->Vcb, fcb->subvol, fcb->inode, TYPE_EXTENT_DATA, start, ed, sizeof(EXTENT_DATA) - 1 + sizeof(EXTENT_DATA2), Batch_Insert); 4393 if (!NT_SUCCESS(Status)) { 4394 ERR("insert_tree_item_batch returned %08x\n", Status); 4395 ExFreePool(ed); 4396 return Status; 4397 } 4398 4399 return STATUS_SUCCESS; 4400 } 4401 4402 #ifdef _MSC_VER 4403 #pragma warning(push) 4404 #pragma warning(suppress: 28194) 4405 #endif 4406 NTSTATUS insert_tree_item_batch(LIST_ENTRY* batchlist, device_extension* Vcb, root* r, uint64_t objid, uint8_t objtype, uint64_t offset, 4407 _In_opt_ _When_(return >= 0, __drv_aliasesMem) void* data, uint16_t datalen, enum batch_operation operation) { 4408 LIST_ENTRY* le; 4409 batch_root* br = NULL; 4410 batch_item* bi; 4411 4412 le = batchlist->Flink; 4413 while (le != batchlist) { 4414 batch_root* br2 = CONTAINING_RECORD(le, batch_root, list_entry); 4415 4416 if (br2->r == r) { 4417 br = br2; 4418 break; 4419 } 4420 4421 le = le->Flink; 4422 } 4423 4424 if (!br) { 4425 br = ExAllocatePoolWithTag(PagedPool, sizeof(batch_root), ALLOC_TAG); 4426 if (!br) { 4427 ERR("out of memory\n"); 4428 return STATUS_INSUFFICIENT_RESOURCES; 4429 } 4430 4431 br->r = r; 4432 InitializeListHead(&br->items); 4433 InsertTailList(batchlist, &br->list_entry); 4434 } 4435 4436 bi = ExAllocateFromPagedLookasideList(&Vcb->batch_item_lookaside); 4437 if (!bi) { 4438 ERR("out of memory\n"); 4439 return STATUS_INSUFFICIENT_RESOURCES; 4440 } 4441 4442 bi->key.obj_id = objid; 4443 bi->key.obj_type = objtype; 4444 bi->key.offset = offset; 4445 bi->data = data; 4446 bi->datalen = datalen; 4447 bi->operation = operation; 4448 4449 le = br->items.Blink; 4450 while (le != &br->items) { 4451 batch_item* bi2 = CONTAINING_RECORD(le, batch_item, list_entry); 4452 int cmp = keycmp(bi2->key, bi->key); 4453 4454 if (cmp == -1 || (cmp == 0 && bi->operation >= bi2->operation)) { 4455 InsertHeadList(&bi2->list_entry, &bi->list_entry); 4456 return STATUS_SUCCESS; 4457 } 4458 4459 le = le->Blink; 4460 } 4461 4462 InsertHeadList(&br->items, &bi->list_entry); 4463 4464 return STATUS_SUCCESS; 4465 } 4466 #ifdef _MSC_VER 4467 #pragma warning(pop) 4468 #endif 4469 4470 typedef struct { 4471 uint64_t address; 4472 uint64_t length; 4473 uint64_t offset; 4474 bool changed; 4475 chunk* chunk; 4476 uint64_t skip_start; 4477 uint64_t skip_end; 4478 LIST_ENTRY list_entry; 4479 } extent_range; 4480 4481 static void rationalize_extents(fcb* fcb, PIRP Irp) { 4482 LIST_ENTRY* le; 4483 LIST_ENTRY extent_ranges; 4484 extent_range* er; 4485 bool changed = false, truncating = false; 4486 uint32_t num_extents = 0; 4487 4488 InitializeListHead(&extent_ranges); 4489 4490 le = fcb->extents.Flink; 4491 while (le != &fcb->extents) { 4492 extent* ext = CONTAINING_RECORD(le, extent, list_entry); 4493 4494 if ((ext->extent_data.type == EXTENT_TYPE_REGULAR || ext->extent_data.type == EXTENT_TYPE_PREALLOC) && ext->extent_data.compression == BTRFS_COMPRESSION_NONE && ext->unique) { 4495 EXTENT_DATA2* ed2 = (EXTENT_DATA2*)ext->extent_data.data; 4496 4497 if (ed2->size != 0) { 4498 LIST_ENTRY* le2; 4499 4500 le2 = extent_ranges.Flink; 4501 while (le2 != &extent_ranges) { 4502 extent_range* er2 = CONTAINING_RECORD(le2, extent_range, list_entry); 4503 4504 if (er2->address == ed2->address) { 4505 er2->skip_start = min(er2->skip_start, ed2->offset); 4506 er2->skip_end = min(er2->skip_end, ed2->size - ed2->offset - ed2->num_bytes); 4507 goto cont; 4508 } else if (er2->address > ed2->address) 4509 break; 4510 4511 le2 = le2->Flink; 4512 } 4513 4514 er = ExAllocatePoolWithTag(PagedPool, sizeof(extent_range), ALLOC_TAG); // FIXME - should be from lookaside? 4515 if (!er) { 4516 ERR("out of memory\n"); 4517 goto end; 4518 } 4519 4520 er->address = ed2->address; 4521 er->length = ed2->size; 4522 er->offset = ext->offset - ed2->offset; 4523 er->changed = false; 4524 er->chunk = NULL; 4525 er->skip_start = ed2->offset; 4526 er->skip_end = ed2->size - ed2->offset - ed2->num_bytes; 4527 4528 if (er->skip_start != 0 || er->skip_end != 0) 4529 truncating = true; 4530 4531 InsertHeadList(le2->Blink, &er->list_entry); 4532 num_extents++; 4533 } 4534 } 4535 4536 cont: 4537 le = le->Flink; 4538 } 4539 4540 if (num_extents == 0 || (num_extents == 1 && !truncating)) 4541 goto end; 4542 4543 le = extent_ranges.Flink; 4544 while (le != &extent_ranges) { 4545 er = CONTAINING_RECORD(le, extent_range, list_entry); 4546 4547 if (!er->chunk) { 4548 LIST_ENTRY* le2; 4549 4550 er->chunk = get_chunk_from_address(fcb->Vcb, er->address); 4551 4552 if (!er->chunk) { 4553 ERR("get_chunk_from_address(%I64x) failed\n", er->address); 4554 goto end; 4555 } 4556 4557 le2 = le->Flink; 4558 while (le2 != &extent_ranges) { 4559 extent_range* er2 = CONTAINING_RECORD(le2, extent_range, list_entry); 4560 4561 if (!er2->chunk && er2->address >= er->chunk->offset && er2->address < er->chunk->offset + er->chunk->chunk_item->size) 4562 er2->chunk = er->chunk; 4563 4564 le2 = le2->Flink; 4565 } 4566 } 4567 4568 le = le->Flink; 4569 } 4570 4571 if (truncating) { 4572 // truncate beginning or end of extent if unused 4573 4574 le = extent_ranges.Flink; 4575 while (le != &extent_ranges) { 4576 er = CONTAINING_RECORD(le, extent_range, list_entry); 4577 4578 if (er->skip_start > 0) { 4579 LIST_ENTRY* le2 = fcb->extents.Flink; 4580 while (le2 != &fcb->extents) { 4581 extent* ext = CONTAINING_RECORD(le2, extent, list_entry); 4582 4583 if ((ext->extent_data.type == EXTENT_TYPE_REGULAR || ext->extent_data.type == EXTENT_TYPE_PREALLOC) && ext->extent_data.compression == BTRFS_COMPRESSION_NONE && ext->unique) { 4584 EXTENT_DATA2* ed2 = (EXTENT_DATA2*)ext->extent_data.data; 4585 4586 if (ed2->size != 0 && ed2->address == er->address) { 4587 NTSTATUS Status; 4588 4589 Status = update_changed_extent_ref(fcb->Vcb, er->chunk, ed2->address, ed2->size, fcb->subvol->id, fcb->inode, ext->offset - ed2->offset, 4590 -1, fcb->inode_item.flags & BTRFS_INODE_NODATASUM, true, Irp); 4591 if (!NT_SUCCESS(Status)) { 4592 ERR("update_changed_extent_ref returned %08x\n", Status); 4593 goto end; 4594 } 4595 4596 ext->extent_data.decoded_size -= er->skip_start; 4597 ed2->size -= er->skip_start; 4598 ed2->address += er->skip_start; 4599 ed2->offset -= er->skip_start; 4600 4601 add_changed_extent_ref(er->chunk, ed2->address, ed2->size, fcb->subvol->id, fcb->inode, ext->offset - ed2->offset, 4602 1, fcb->inode_item.flags & BTRFS_INODE_NODATASUM); 4603 } 4604 } 4605 4606 le2 = le2->Flink; 4607 } 4608 4609 if (!(fcb->inode_item.flags & BTRFS_INODE_NODATASUM)) 4610 add_checksum_entry(fcb->Vcb, er->address, (ULONG)(er->skip_start / fcb->Vcb->superblock.sector_size), NULL, NULL); 4611 4612 acquire_chunk_lock(er->chunk, fcb->Vcb); 4613 4614 if (!er->chunk->cache_loaded) { 4615 NTSTATUS Status = load_cache_chunk(fcb->Vcb, er->chunk, NULL); 4616 4617 if (!NT_SUCCESS(Status)) { 4618 ERR("load_cache_chunk returned %08x\n", Status); 4619 release_chunk_lock(er->chunk, fcb->Vcb); 4620 goto end; 4621 } 4622 } 4623 4624 er->chunk->used -= er->skip_start; 4625 4626 space_list_add(er->chunk, er->address, er->skip_start, NULL); 4627 4628 release_chunk_lock(er->chunk, fcb->Vcb); 4629 4630 er->address += er->skip_start; 4631 er->length -= er->skip_start; 4632 } 4633 4634 if (er->skip_end > 0) { 4635 LIST_ENTRY* le2 = fcb->extents.Flink; 4636 while (le2 != &fcb->extents) { 4637 extent* ext = CONTAINING_RECORD(le2, extent, list_entry); 4638 4639 if ((ext->extent_data.type == EXTENT_TYPE_REGULAR || ext->extent_data.type == EXTENT_TYPE_PREALLOC) && ext->extent_data.compression == BTRFS_COMPRESSION_NONE && ext->unique) { 4640 EXTENT_DATA2* ed2 = (EXTENT_DATA2*)ext->extent_data.data; 4641 4642 if (ed2->size != 0 && ed2->address == er->address) { 4643 NTSTATUS Status; 4644 4645 Status = update_changed_extent_ref(fcb->Vcb, er->chunk, ed2->address, ed2->size, fcb->subvol->id, fcb->inode, ext->offset - ed2->offset, 4646 -1, fcb->inode_item.flags & BTRFS_INODE_NODATASUM, true, Irp); 4647 if (!NT_SUCCESS(Status)) { 4648 ERR("update_changed_extent_ref returned %08x\n", Status); 4649 goto end; 4650 } 4651 4652 ext->extent_data.decoded_size -= er->skip_end; 4653 ed2->size -= er->skip_end; 4654 4655 add_changed_extent_ref(er->chunk, ed2->address, ed2->size, fcb->subvol->id, fcb->inode, ext->offset - ed2->offset, 4656 1, fcb->inode_item.flags & BTRFS_INODE_NODATASUM); 4657 } 4658 } 4659 4660 le2 = le2->Flink; 4661 } 4662 4663 if (!(fcb->inode_item.flags & BTRFS_INODE_NODATASUM)) 4664 add_checksum_entry(fcb->Vcb, er->address + er->length - er->skip_end, (ULONG)(er->skip_end / fcb->Vcb->superblock.sector_size), NULL, NULL); 4665 4666 acquire_chunk_lock(er->chunk, fcb->Vcb); 4667 4668 if (!er->chunk->cache_loaded) { 4669 NTSTATUS Status = load_cache_chunk(fcb->Vcb, er->chunk, NULL); 4670 4671 if (!NT_SUCCESS(Status)) { 4672 ERR("load_cache_chunk returned %08x\n", Status); 4673 release_chunk_lock(er->chunk, fcb->Vcb); 4674 goto end; 4675 } 4676 } 4677 4678 er->chunk->used -= er->skip_end; 4679 4680 space_list_add(er->chunk, er->address + er->length - er->skip_end, er->skip_end, NULL); 4681 4682 release_chunk_lock(er->chunk, fcb->Vcb); 4683 4684 er->length -= er->skip_end; 4685 } 4686 4687 le = le->Flink; 4688 } 4689 } 4690 4691 if (num_extents < 2) 4692 goto end; 4693 4694 // merge together adjacent extents 4695 le = extent_ranges.Flink; 4696 while (le != &extent_ranges) { 4697 er = CONTAINING_RECORD(le, extent_range, list_entry); 4698 4699 if (le->Flink != &extent_ranges && er->length < MAX_EXTENT_SIZE) { 4700 extent_range* er2 = CONTAINING_RECORD(le->Flink, extent_range, list_entry); 4701 4702 if (er->chunk == er2->chunk) { 4703 if (er2->address == er->address + er->length && er2->offset >= er->offset + er->length) { 4704 if (er->length + er2->length <= MAX_EXTENT_SIZE) { 4705 er->length += er2->length; 4706 er->changed = true; 4707 4708 RemoveEntryList(&er2->list_entry); 4709 ExFreePool(er2); 4710 4711 changed = true; 4712 continue; 4713 } 4714 } 4715 } 4716 } 4717 4718 le = le->Flink; 4719 } 4720 4721 if (!changed) 4722 goto end; 4723 4724 le = fcb->extents.Flink; 4725 while (le != &fcb->extents) { 4726 extent* ext = CONTAINING_RECORD(le, extent, list_entry); 4727 4728 if ((ext->extent_data.type == EXTENT_TYPE_REGULAR || ext->extent_data.type == EXTENT_TYPE_PREALLOC) && ext->extent_data.compression == BTRFS_COMPRESSION_NONE && ext->unique) { 4729 EXTENT_DATA2* ed2 = (EXTENT_DATA2*)ext->extent_data.data; 4730 4731 if (ed2->size != 0) { 4732 LIST_ENTRY* le2; 4733 4734 le2 = extent_ranges.Flink; 4735 while (le2 != &extent_ranges) { 4736 extent_range* er2 = CONTAINING_RECORD(le2, extent_range, list_entry); 4737 4738 if (ed2->address >= er2->address && ed2->address + ed2->size <= er2->address + er2->length && er2->changed) { 4739 NTSTATUS Status; 4740 4741 Status = update_changed_extent_ref(fcb->Vcb, er2->chunk, ed2->address, ed2->size, fcb->subvol->id, fcb->inode, ext->offset - ed2->offset, 4742 -1, fcb->inode_item.flags & BTRFS_INODE_NODATASUM, true, Irp); 4743 if (!NT_SUCCESS(Status)) { 4744 ERR("update_changed_extent_ref returned %08x\n", Status); 4745 goto end; 4746 } 4747 4748 ed2->offset += ed2->address - er2->address; 4749 ed2->address = er2->address; 4750 ed2->size = er2->length; 4751 ext->extent_data.decoded_size = ed2->size; 4752 4753 add_changed_extent_ref(er2->chunk, ed2->address, ed2->size, fcb->subvol->id, fcb->inode, ext->offset - ed2->offset, 4754 1, fcb->inode_item.flags & BTRFS_INODE_NODATASUM); 4755 4756 break; 4757 } 4758 4759 le2 = le2->Flink; 4760 } 4761 } 4762 } 4763 4764 le = le->Flink; 4765 } 4766 4767 end: 4768 while (!IsListEmpty(&extent_ranges)) { 4769 le = RemoveHeadList(&extent_ranges); 4770 er = CONTAINING_RECORD(le, extent_range, list_entry); 4771 4772 ExFreePool(er); 4773 } 4774 } 4775 4776 NTSTATUS flush_fcb(fcb* fcb, bool cache, LIST_ENTRY* batchlist, PIRP Irp) { 4777 traverse_ptr tp; 4778 KEY searchkey; 4779 NTSTATUS Status; 4780 INODE_ITEM* ii; 4781 uint64_t ii_offset; 4782 #ifdef DEBUG_PARANOID 4783 uint64_t old_size = 0; 4784 bool extents_changed; 4785 #endif 4786 4787 if (fcb->ads) { 4788 if (fcb->deleted) { 4789 Status = delete_xattr(fcb->Vcb, batchlist, fcb->subvol, fcb->inode, fcb->adsxattr.Buffer, fcb->adsxattr.Length, fcb->adshash); 4790 if (!NT_SUCCESS(Status)) { 4791 ERR("delete_xattr returned %08x\n", Status); 4792 goto end; 4793 } 4794 } else { 4795 Status = set_xattr(fcb->Vcb, batchlist, fcb->subvol, fcb->inode, fcb->adsxattr.Buffer, fcb->adsxattr.Length, 4796 fcb->adshash, (uint8_t*)fcb->adsdata.Buffer, fcb->adsdata.Length); 4797 if (!NT_SUCCESS(Status)) { 4798 ERR("set_xattr returned %08x\n", Status); 4799 goto end; 4800 } 4801 } 4802 4803 Status = STATUS_SUCCESS; 4804 goto end; 4805 } 4806 4807 if (fcb->deleted) { 4808 Status = insert_tree_item_batch(batchlist, fcb->Vcb, fcb->subvol, fcb->inode, TYPE_INODE_ITEM, 0xffffffffffffffff, NULL, 0, Batch_DeleteInode); 4809 if (!NT_SUCCESS(Status)) { 4810 ERR("insert_tree_item_batch returned %08x\n", Status); 4811 goto end; 4812 } 4813 4814 if (fcb->marked_as_orphan) { 4815 Status = insert_tree_item_batch(batchlist, fcb->Vcb, fcb->subvol, BTRFS_ORPHAN_INODE_OBJID, TYPE_ORPHAN_INODE, 4816 fcb->inode, NULL, 0, Batch_Delete); 4817 if (!NT_SUCCESS(Status)) { 4818 ERR("insert_tree_item_batch returned %08x\n", Status); 4819 goto end; 4820 } 4821 } 4822 4823 Status = STATUS_SUCCESS; 4824 goto end; 4825 } 4826 4827 #ifdef DEBUG_PARANOID 4828 extents_changed = fcb->extents_changed; 4829 #endif 4830 4831 if (fcb->extents_changed) { 4832 LIST_ENTRY* le; 4833 bool prealloc = false, extents_inline = false; 4834 uint64_t last_end; 4835 4836 // delete ignored extent items 4837 le = fcb->extents.Flink; 4838 while (le != &fcb->extents) { 4839 LIST_ENTRY* le2 = le->Flink; 4840 extent* ext = CONTAINING_RECORD(le, extent, list_entry); 4841 4842 if (ext->ignore) { 4843 RemoveEntryList(&ext->list_entry); 4844 4845 if (ext->csum) 4846 ExFreePool(ext->csum); 4847 4848 ExFreePool(ext); 4849 } 4850 4851 le = le2; 4852 } 4853 4854 le = fcb->extents.Flink; 4855 while (le != &fcb->extents) { 4856 extent* ext = CONTAINING_RECORD(le, extent, list_entry); 4857 4858 if (ext->inserted && ext->csum && ext->extent_data.type == EXTENT_TYPE_REGULAR) { 4859 EXTENT_DATA2* ed2 = (EXTENT_DATA2*)ext->extent_data.data; 4860 4861 if (ed2->size > 0) { // not sparse 4862 if (ext->extent_data.compression == BTRFS_COMPRESSION_NONE) 4863 add_checksum_entry(fcb->Vcb, ed2->address + ed2->offset, (ULONG)(ed2->num_bytes / fcb->Vcb->superblock.sector_size), ext->csum, Irp); 4864 else 4865 add_checksum_entry(fcb->Vcb, ed2->address, (ULONG)(ed2->size / fcb->Vcb->superblock.sector_size), ext->csum, Irp); 4866 } 4867 } 4868 4869 le = le->Flink; 4870 } 4871 4872 if (!IsListEmpty(&fcb->extents)) { 4873 rationalize_extents(fcb, Irp); 4874 4875 // merge together adjacent EXTENT_DATAs pointing to same extent 4876 4877 le = fcb->extents.Flink; 4878 while (le != &fcb->extents) { 4879 LIST_ENTRY* le2 = le->Flink; 4880 extent* ext = CONTAINING_RECORD(le, extent, list_entry); 4881 4882 if ((ext->extent_data.type == EXTENT_TYPE_REGULAR || ext->extent_data.type == EXTENT_TYPE_PREALLOC) && le->Flink != &fcb->extents) { 4883 extent* nextext = CONTAINING_RECORD(le->Flink, extent, list_entry); 4884 4885 if (ext->extent_data.type == nextext->extent_data.type) { 4886 EXTENT_DATA2* ed2 = (EXTENT_DATA2*)ext->extent_data.data; 4887 EXTENT_DATA2* ned2 = (EXTENT_DATA2*)nextext->extent_data.data; 4888 4889 if (ed2->size != 0 && ed2->address == ned2->address && ed2->size == ned2->size && 4890 nextext->offset == ext->offset + ed2->num_bytes && ned2->offset == ed2->offset + ed2->num_bytes) { 4891 chunk* c; 4892 4893 if (ext->extent_data.compression == BTRFS_COMPRESSION_NONE && ext->csum) { 4894 ULONG len = (ULONG)((ed2->num_bytes + ned2->num_bytes) / fcb->Vcb->superblock.sector_size); 4895 uint32_t* csum; 4896 4897 csum = ExAllocatePoolWithTag(NonPagedPool, len * sizeof(uint32_t), ALLOC_TAG); 4898 if (!csum) { 4899 ERR("out of memory\n"); 4900 Status = STATUS_INSUFFICIENT_RESOURCES; 4901 goto end; 4902 } 4903 4904 RtlCopyMemory(csum, ext->csum, (ULONG)(ed2->num_bytes * sizeof(uint32_t) / fcb->Vcb->superblock.sector_size)); 4905 RtlCopyMemory(&csum[ed2->num_bytes / fcb->Vcb->superblock.sector_size], nextext->csum, 4906 (ULONG)(ned2->num_bytes * sizeof(uint32_t) / fcb->Vcb->superblock.sector_size)); 4907 4908 ExFreePool(ext->csum); 4909 ext->csum = csum; 4910 } 4911 4912 ext->extent_data.generation = fcb->Vcb->superblock.generation; 4913 ed2->num_bytes += ned2->num_bytes; 4914 4915 RemoveEntryList(&nextext->list_entry); 4916 4917 if (nextext->csum) 4918 ExFreePool(nextext->csum); 4919 4920 ExFreePool(nextext); 4921 4922 c = get_chunk_from_address(fcb->Vcb, ed2->address); 4923 4924 if (!c) { 4925 ERR("get_chunk_from_address(%I64x) failed\n", ed2->address); 4926 } else { 4927 Status = update_changed_extent_ref(fcb->Vcb, c, ed2->address, ed2->size, fcb->subvol->id, fcb->inode, ext->offset - ed2->offset, -1, 4928 fcb->inode_item.flags & BTRFS_INODE_NODATASUM, false, Irp); 4929 if (!NT_SUCCESS(Status)) { 4930 ERR("update_changed_extent_ref returned %08x\n", Status); 4931 goto end; 4932 } 4933 } 4934 4935 le2 = le; 4936 } 4937 } 4938 } 4939 4940 le = le2; 4941 } 4942 } 4943 4944 if (!fcb->created) { 4945 // delete existing EXTENT_DATA items 4946 4947 Status = insert_tree_item_batch(batchlist, fcb->Vcb, fcb->subvol, fcb->inode, TYPE_EXTENT_DATA, 0, NULL, 0, Batch_DeleteExtentData); 4948 if (!NT_SUCCESS(Status)) { 4949 ERR("insert_tree_item_batch returned %08x\n", Status); 4950 goto end; 4951 } 4952 } 4953 4954 // add new EXTENT_DATAs 4955 4956 last_end = 0; 4957 4958 le = fcb->extents.Flink; 4959 while (le != &fcb->extents) { 4960 extent* ext = CONTAINING_RECORD(le, extent, list_entry); 4961 EXTENT_DATA* ed; 4962 4963 ext->inserted = false; 4964 4965 if (!(fcb->Vcb->superblock.incompat_flags & BTRFS_INCOMPAT_FLAGS_NO_HOLES) && ext->offset > last_end) { 4966 Status = insert_sparse_extent(fcb, batchlist, last_end, ext->offset - last_end); 4967 if (!NT_SUCCESS(Status)) { 4968 ERR("insert_sparse_extent returned %08x\n", Status); 4969 goto end; 4970 } 4971 } 4972 4973 ed = ExAllocatePoolWithTag(PagedPool, ext->datalen, ALLOC_TAG); 4974 if (!ed) { 4975 ERR("out of memory\n"); 4976 Status = STATUS_INSUFFICIENT_RESOURCES; 4977 goto end; 4978 } 4979 4980 RtlCopyMemory(ed, &ext->extent_data, ext->datalen); 4981 4982 Status = insert_tree_item_batch(batchlist, fcb->Vcb, fcb->subvol, fcb->inode, TYPE_EXTENT_DATA, ext->offset, 4983 ed, ext->datalen, Batch_Insert); 4984 if (!NT_SUCCESS(Status)) { 4985 ERR("insert_tree_item_batch returned %08x\n", Status); 4986 goto end; 4987 } 4988 4989 if (ed->type == EXTENT_TYPE_PREALLOC) 4990 prealloc = true; 4991 4992 if (ed->type == EXTENT_TYPE_INLINE) 4993 extents_inline = true; 4994 4995 if (!(fcb->Vcb->superblock.incompat_flags & BTRFS_INCOMPAT_FLAGS_NO_HOLES)) { 4996 if (ed->type == EXTENT_TYPE_INLINE) 4997 last_end = ext->offset + ed->decoded_size; 4998 else { 4999 EXTENT_DATA2* ed2 = (EXTENT_DATA2*)ed->data; 5000 5001 last_end = ext->offset + ed2->num_bytes; 5002 } 5003 } 5004 5005 le = le->Flink; 5006 } 5007 5008 if (!(fcb->Vcb->superblock.incompat_flags & BTRFS_INCOMPAT_FLAGS_NO_HOLES) && !extents_inline && 5009 sector_align(fcb->inode_item.st_size, fcb->Vcb->superblock.sector_size) > last_end) { 5010 Status = insert_sparse_extent(fcb, batchlist, last_end, sector_align(fcb->inode_item.st_size, fcb->Vcb->superblock.sector_size) - last_end); 5011 if (!NT_SUCCESS(Status)) { 5012 ERR("insert_sparse_extent returned %08x\n", Status); 5013 goto end; 5014 } 5015 } 5016 5017 // update prealloc flag in INODE_ITEM 5018 5019 if (!prealloc) 5020 fcb->inode_item.flags &= ~BTRFS_INODE_PREALLOC; 5021 else 5022 fcb->inode_item.flags |= BTRFS_INODE_PREALLOC; 5023 5024 fcb->inode_item_changed = true; 5025 5026 fcb->extents_changed = false; 5027 } 5028 5029 if ((!fcb->created && fcb->inode_item_changed) || cache) { 5030 searchkey.obj_id = fcb->inode; 5031 searchkey.obj_type = TYPE_INODE_ITEM; 5032 searchkey.offset = 0xffffffffffffffff; 5033 5034 Status = find_item(fcb->Vcb, fcb->subvol, &tp, &searchkey, false, Irp); 5035 if (!NT_SUCCESS(Status)) { 5036 ERR("error - find_item returned %08x\n", Status); 5037 goto end; 5038 } 5039 5040 if (tp.item->key.obj_id != searchkey.obj_id || tp.item->key.obj_type != searchkey.obj_type) { 5041 if (cache) { 5042 ii = ExAllocatePoolWithTag(PagedPool, sizeof(INODE_ITEM), ALLOC_TAG); 5043 if (!ii) { 5044 ERR("out of memory\n"); 5045 Status = STATUS_INSUFFICIENT_RESOURCES; 5046 goto end; 5047 } 5048 5049 RtlCopyMemory(ii, &fcb->inode_item, sizeof(INODE_ITEM)); 5050 5051 Status = insert_tree_item(fcb->Vcb, fcb->subvol, fcb->inode, TYPE_INODE_ITEM, 0, ii, sizeof(INODE_ITEM), NULL, Irp); 5052 if (!NT_SUCCESS(Status)) { 5053 ERR("insert_tree_item returned %08x\n", Status); 5054 goto end; 5055 } 5056 5057 ii_offset = 0; 5058 } else { 5059 ERR("could not find INODE_ITEM for inode %I64x in subvol %I64x\n", fcb->inode, fcb->subvol->id); 5060 Status = STATUS_INTERNAL_ERROR; 5061 goto end; 5062 } 5063 } else { 5064 #ifdef DEBUG_PARANOID 5065 INODE_ITEM* ii2 = (INODE_ITEM*)tp.item->data; 5066 5067 old_size = ii2->st_size; 5068 #endif 5069 5070 ii_offset = tp.item->key.offset; 5071 } 5072 5073 if (!cache) { 5074 Status = delete_tree_item(fcb->Vcb, &tp); 5075 if (!NT_SUCCESS(Status)) { 5076 ERR("delete_tree_item returned %08x\n", Status); 5077 goto end; 5078 } 5079 } else { 5080 searchkey.obj_id = fcb->inode; 5081 searchkey.obj_type = TYPE_INODE_ITEM; 5082 searchkey.offset = ii_offset; 5083 5084 Status = find_item(fcb->Vcb, fcb->subvol, &tp, &searchkey, false, Irp); 5085 if (!NT_SUCCESS(Status)) { 5086 ERR("error - find_item returned %08x\n", Status); 5087 goto end; 5088 } 5089 5090 if (keycmp(tp.item->key, searchkey)) { 5091 ERR("could not find INODE_ITEM for inode %I64x in subvol %I64x\n", fcb->inode, fcb->subvol->id); 5092 Status = STATUS_INTERNAL_ERROR; 5093 goto end; 5094 } else 5095 RtlCopyMemory(tp.item->data, &fcb->inode_item, min(tp.item->size, sizeof(INODE_ITEM))); 5096 } 5097 5098 #ifdef DEBUG_PARANOID 5099 if (!extents_changed && fcb->type != BTRFS_TYPE_DIRECTORY && old_size != fcb->inode_item.st_size) { 5100 ERR("error - size has changed but extents not marked as changed\n"); 5101 int3; 5102 } 5103 #endif 5104 } else 5105 ii_offset = 0; 5106 5107 fcb->created = false; 5108 5109 if (!cache && fcb->inode_item_changed) { 5110 ii = ExAllocatePoolWithTag(PagedPool, sizeof(INODE_ITEM), ALLOC_TAG); 5111 if (!ii) { 5112 ERR("out of memory\n"); 5113 Status = STATUS_INSUFFICIENT_RESOURCES; 5114 goto end; 5115 } 5116 5117 RtlCopyMemory(ii, &fcb->inode_item, sizeof(INODE_ITEM)); 5118 5119 Status = insert_tree_item_batch(batchlist, fcb->Vcb, fcb->subvol, fcb->inode, TYPE_INODE_ITEM, ii_offset, ii, sizeof(INODE_ITEM), 5120 Batch_Insert); 5121 if (!NT_SUCCESS(Status)) { 5122 ERR("insert_tree_item_batch returned %08x\n", Status); 5123 goto end; 5124 } 5125 5126 fcb->inode_item_changed = false; 5127 } 5128 5129 if (fcb->sd_dirty) { 5130 if (!fcb->sd_deleted) { 5131 Status = set_xattr(fcb->Vcb, batchlist, fcb->subvol, fcb->inode, EA_NTACL, sizeof(EA_NTACL) - 1, 5132 EA_NTACL_HASH, (uint8_t*)fcb->sd, (uint16_t)RtlLengthSecurityDescriptor(fcb->sd)); 5133 if (!NT_SUCCESS(Status)) { 5134 ERR("set_xattr returned %08x\n", Status); 5135 goto end; 5136 } 5137 } else { 5138 Status = delete_xattr(fcb->Vcb, batchlist, fcb->subvol, fcb->inode, EA_NTACL, sizeof(EA_NTACL) - 1, EA_NTACL_HASH); 5139 if (!NT_SUCCESS(Status)) { 5140 ERR("delete_xattr returned %08x\n", Status); 5141 goto end; 5142 } 5143 } 5144 5145 fcb->sd_deleted = false; 5146 fcb->sd_dirty = false; 5147 } 5148 5149 if (fcb->atts_changed) { 5150 if (!fcb->atts_deleted) { 5151 uint8_t val[16], *val2; 5152 ULONG atts = fcb->atts; 5153 5154 TRACE("inserting new DOSATTRIB xattr\n"); 5155 5156 if (fcb->inode == SUBVOL_ROOT_INODE) 5157 atts &= ~FILE_ATTRIBUTE_READONLY; 5158 5159 val2 = &val[sizeof(val) - 1]; 5160 5161 do { 5162 uint8_t c = atts % 16; 5163 *val2 = c <= 9 ? (c + '0') : (c - 0xa + 'a'); 5164 5165 val2--; 5166 atts >>= 4; 5167 } while (atts != 0); 5168 5169 *val2 = 'x'; 5170 val2--; 5171 *val2 = '0'; 5172 5173 Status = set_xattr(fcb->Vcb, batchlist, fcb->subvol, fcb->inode, EA_DOSATTRIB, sizeof(EA_DOSATTRIB) - 1, 5174 EA_DOSATTRIB_HASH, val2, (uint16_t)(val + sizeof(val) - val2)); 5175 if (!NT_SUCCESS(Status)) { 5176 ERR("set_xattr returned %08x\n", Status); 5177 goto end; 5178 } 5179 } else { 5180 Status = delete_xattr(fcb->Vcb, batchlist, fcb->subvol, fcb->inode, EA_DOSATTRIB, sizeof(EA_DOSATTRIB) - 1, EA_DOSATTRIB_HASH); 5181 if (!NT_SUCCESS(Status)) { 5182 ERR("delete_xattr returned %08x\n", Status); 5183 goto end; 5184 } 5185 } 5186 5187 fcb->atts_changed = false; 5188 fcb->atts_deleted = false; 5189 } 5190 5191 if (fcb->reparse_xattr_changed) { 5192 if (fcb->reparse_xattr.Buffer && fcb->reparse_xattr.Length > 0) { 5193 Status = set_xattr(fcb->Vcb, batchlist, fcb->subvol, fcb->inode, EA_REPARSE, sizeof(EA_REPARSE) - 1, 5194 EA_REPARSE_HASH, (uint8_t*)fcb->reparse_xattr.Buffer, (uint16_t)fcb->reparse_xattr.Length); 5195 if (!NT_SUCCESS(Status)) { 5196 ERR("set_xattr returned %08x\n", Status); 5197 goto end; 5198 } 5199 } else { 5200 Status = delete_xattr(fcb->Vcb, batchlist, fcb->subvol, fcb->inode, EA_REPARSE, sizeof(EA_REPARSE) - 1, EA_REPARSE_HASH); 5201 if (!NT_SUCCESS(Status)) { 5202 ERR("delete_xattr returned %08x\n", Status); 5203 goto end; 5204 } 5205 } 5206 5207 fcb->reparse_xattr_changed = false; 5208 } 5209 5210 if (fcb->ea_changed) { 5211 if (fcb->ea_xattr.Buffer && fcb->ea_xattr.Length > 0) { 5212 Status = set_xattr(fcb->Vcb, batchlist, fcb->subvol, fcb->inode, EA_EA, sizeof(EA_EA) - 1, 5213 EA_EA_HASH, (uint8_t*)fcb->ea_xattr.Buffer, (uint16_t)fcb->ea_xattr.Length); 5214 if (!NT_SUCCESS(Status)) { 5215 ERR("set_xattr returned %08x\n", Status); 5216 goto end; 5217 } 5218 } else { 5219 Status = delete_xattr(fcb->Vcb, batchlist, fcb->subvol, fcb->inode, EA_EA, sizeof(EA_EA) - 1, EA_EA_HASH); 5220 if (!NT_SUCCESS(Status)) { 5221 ERR("delete_xattr returned %08x\n", Status); 5222 goto end; 5223 } 5224 } 5225 5226 fcb->ea_changed = false; 5227 } 5228 5229 if (fcb->prop_compression_changed) { 5230 if (fcb->prop_compression == PropCompression_None) { 5231 Status = delete_xattr(fcb->Vcb, batchlist, fcb->subvol, fcb->inode, EA_PROP_COMPRESSION, sizeof(EA_PROP_COMPRESSION) - 1, EA_PROP_COMPRESSION_HASH); 5232 if (!NT_SUCCESS(Status)) { 5233 ERR("delete_xattr returned %08x\n", Status); 5234 goto end; 5235 } 5236 } else if (fcb->prop_compression == PropCompression_Zlib) { 5237 static const char zlib[] = "zlib"; 5238 5239 Status = set_xattr(fcb->Vcb, batchlist, fcb->subvol, fcb->inode, EA_PROP_COMPRESSION, sizeof(EA_PROP_COMPRESSION) - 1, 5240 EA_PROP_COMPRESSION_HASH, (uint8_t*)zlib, sizeof(zlib) - 1); 5241 if (!NT_SUCCESS(Status)) { 5242 ERR("set_xattr returned %08x\n", Status); 5243 goto end; 5244 } 5245 } else if (fcb->prop_compression == PropCompression_LZO) { 5246 static const char lzo[] = "lzo"; 5247 5248 Status = set_xattr(fcb->Vcb, batchlist, fcb->subvol, fcb->inode, EA_PROP_COMPRESSION, sizeof(EA_PROP_COMPRESSION) - 1, 5249 EA_PROP_COMPRESSION_HASH, (uint8_t*)lzo, sizeof(lzo) - 1); 5250 if (!NT_SUCCESS(Status)) { 5251 ERR("set_xattr returned %08x\n", Status); 5252 goto end; 5253 } 5254 } else if (fcb->prop_compression == PropCompression_ZSTD) { 5255 static const char zstd[] = "zstd"; 5256 5257 Status = set_xattr(fcb->Vcb, batchlist, fcb->subvol, fcb->inode, EA_PROP_COMPRESSION, sizeof(EA_PROP_COMPRESSION) - 1, 5258 EA_PROP_COMPRESSION_HASH, (uint8_t*)zstd, sizeof(zstd) - 1); 5259 if (!NT_SUCCESS(Status)) { 5260 ERR("set_xattr returned %08x\n", Status); 5261 goto end; 5262 } 5263 } 5264 5265 fcb->prop_compression_changed = false; 5266 } 5267 5268 if (fcb->xattrs_changed) { 5269 LIST_ENTRY* le; 5270 5271 le = fcb->xattrs.Flink; 5272 while (le != &fcb->xattrs) { 5273 xattr* xa = CONTAINING_RECORD(le, xattr, list_entry); 5274 LIST_ENTRY* le2 = le->Flink; 5275 5276 if (xa->dirty) { 5277 uint32_t hash = calc_crc32c(0xfffffffe, (uint8_t*)xa->data, xa->namelen); 5278 5279 if (xa->valuelen == 0) { 5280 Status = delete_xattr(fcb->Vcb, batchlist, fcb->subvol, fcb->inode, xa->data, xa->namelen, hash); 5281 if (!NT_SUCCESS(Status)) { 5282 ERR("delete_xattr returned %08x\n", Status); 5283 goto end; 5284 } 5285 5286 RemoveEntryList(&xa->list_entry); 5287 ExFreePool(xa); 5288 } else { 5289 Status = set_xattr(fcb->Vcb, batchlist, fcb->subvol, fcb->inode, xa->data, xa->namelen, 5290 hash, (uint8_t*)&xa->data[xa->namelen], xa->valuelen); 5291 if (!NT_SUCCESS(Status)) { 5292 ERR("set_xattr returned %08x\n", Status); 5293 goto end; 5294 } 5295 5296 xa->dirty = false; 5297 } 5298 } 5299 5300 le = le2; 5301 } 5302 5303 fcb->xattrs_changed = false; 5304 } 5305 5306 if ((fcb->case_sensitive_set && !fcb->case_sensitive)) { 5307 Status = delete_xattr(fcb->Vcb, batchlist, fcb->subvol, fcb->inode, EA_CASE_SENSITIVE, 5308 sizeof(EA_CASE_SENSITIVE) - 1, EA_CASE_SENSITIVE_HASH); 5309 if (!NT_SUCCESS(Status)) { 5310 ERR("delete_xattr returned %08x\n", Status); 5311 goto end; 5312 } 5313 5314 fcb->case_sensitive_set = false; 5315 } else if ((!fcb->case_sensitive_set && fcb->case_sensitive)) { 5316 Status = set_xattr(fcb->Vcb, batchlist, fcb->subvol, fcb->inode, EA_CASE_SENSITIVE, 5317 sizeof(EA_CASE_SENSITIVE) - 1, EA_CASE_SENSITIVE_HASH, (uint8_t*)"1", 1); 5318 if (!NT_SUCCESS(Status)) { 5319 ERR("set_xattr returned %08x\n", Status); 5320 goto end; 5321 } 5322 5323 fcb->case_sensitive_set = true; 5324 } 5325 5326 if (fcb->inode_item.st_nlink == 0 && !fcb->marked_as_orphan) { // mark as orphan 5327 Status = insert_tree_item_batch(batchlist, fcb->Vcb, fcb->subvol, BTRFS_ORPHAN_INODE_OBJID, TYPE_ORPHAN_INODE, 5328 fcb->inode, NULL, 0, Batch_Insert); 5329 if (!NT_SUCCESS(Status)) { 5330 ERR("insert_tree_item_batch returned %08x\n", Status); 5331 goto end; 5332 } 5333 5334 fcb->marked_as_orphan = true; 5335 } 5336 5337 Status = STATUS_SUCCESS; 5338 5339 end: 5340 if (fcb->dirty) { 5341 bool lock = false; 5342 5343 fcb->dirty = false; 5344 5345 if (!ExIsResourceAcquiredExclusiveLite(&fcb->Vcb->dirty_fcbs_lock)) { 5346 ExAcquireResourceExclusiveLite(&fcb->Vcb->dirty_fcbs_lock, true); 5347 lock = true; 5348 } 5349 5350 RemoveEntryList(&fcb->list_entry_dirty); 5351 5352 if (lock) 5353 ExReleaseResourceLite(&fcb->Vcb->dirty_fcbs_lock); 5354 } 5355 5356 return Status; 5357 } 5358 5359 void add_trim_entry_avoid_sb(device_extension* Vcb, device* dev, uint64_t address, uint64_t size) { 5360 int i; 5361 ULONG sblen = (ULONG)sector_align(sizeof(superblock), Vcb->superblock.sector_size); 5362 5363 i = 0; 5364 while (superblock_addrs[i] != 0) { 5365 if (superblock_addrs[i] + sblen >= address && superblock_addrs[i] < address + size) { 5366 if (superblock_addrs[i] > address) 5367 add_trim_entry(dev, address, superblock_addrs[i] - address); 5368 5369 if (size <= superblock_addrs[i] + sblen - address) 5370 return; 5371 5372 size -= superblock_addrs[i] + sblen - address; 5373 address = superblock_addrs[i] + sblen; 5374 } else if (superblock_addrs[i] > address + size) 5375 break; 5376 5377 i++; 5378 } 5379 5380 add_trim_entry(dev, address, size); 5381 } 5382 5383 static NTSTATUS drop_chunk(device_extension* Vcb, chunk* c, LIST_ENTRY* batchlist, PIRP Irp, LIST_ENTRY* rollback) { 5384 NTSTATUS Status; 5385 KEY searchkey; 5386 traverse_ptr tp; 5387 uint64_t i, factor; 5388 #ifdef __REACTOS__ 5389 uint64_t phys_used; 5390 #endif 5391 CHUNK_ITEM_STRIPE* cis = (CHUNK_ITEM_STRIPE*)&c->chunk_item[1];; 5392 5393 TRACE("dropping chunk %I64x\n", c->offset); 5394 5395 if (c->chunk_item->type & BLOCK_FLAG_RAID0) 5396 factor = c->chunk_item->num_stripes; 5397 else if (c->chunk_item->type & BLOCK_FLAG_RAID10) 5398 factor = c->chunk_item->num_stripes / c->chunk_item->sub_stripes; 5399 else if (c->chunk_item->type & BLOCK_FLAG_RAID5) 5400 factor = c->chunk_item->num_stripes - 1; 5401 else if (c->chunk_item->type & BLOCK_FLAG_RAID6) 5402 factor = c->chunk_item->num_stripes - 2; 5403 else // SINGLE, DUPLICATE, RAID1 5404 factor = 1; 5405 5406 // do TRIM 5407 if (Vcb->trim && !Vcb->options.no_trim) { 5408 uint64_t len = c->chunk_item->size / factor; 5409 5410 for (i = 0; i < c->chunk_item->num_stripes; i++) { 5411 if (c->devices[i] && c->devices[i]->devobj && !c->devices[i]->readonly && c->devices[i]->trim) 5412 add_trim_entry_avoid_sb(Vcb, c->devices[i], cis[i].offset, len); 5413 } 5414 } 5415 5416 if (!c->cache) { 5417 Status = load_stored_free_space_cache(Vcb, c, true, Irp); 5418 5419 if (!NT_SUCCESS(Status) && Status != STATUS_NOT_FOUND) 5420 WARN("load_stored_free_space_cache returned %08x\n", Status); 5421 } 5422 5423 // remove free space cache 5424 if (c->cache) { 5425 c->cache->deleted = true; 5426 5427 Status = excise_extents(Vcb, c->cache, 0, c->cache->inode_item.st_size, Irp, rollback); 5428 if (!NT_SUCCESS(Status)) { 5429 ERR("excise_extents returned %08x\n", Status); 5430 return Status; 5431 } 5432 5433 Status = flush_fcb(c->cache, true, batchlist, Irp); 5434 5435 free_fcb(c->cache); 5436 5437 if (c->cache->refcount == 0) 5438 reap_fcb(c->cache); 5439 5440 if (!NT_SUCCESS(Status)) { 5441 ERR("flush_fcb returned %08x\n", Status); 5442 return Status; 5443 } 5444 5445 searchkey.obj_id = FREE_SPACE_CACHE_ID; 5446 searchkey.obj_type = 0; 5447 searchkey.offset = c->offset; 5448 5449 Status = find_item(Vcb, Vcb->root_root, &tp, &searchkey, false, Irp); 5450 if (!NT_SUCCESS(Status)) { 5451 ERR("error - find_item returned %08x\n", Status); 5452 return Status; 5453 } 5454 5455 if (!keycmp(tp.item->key, searchkey)) { 5456 Status = delete_tree_item(Vcb, &tp); 5457 if (!NT_SUCCESS(Status)) { 5458 ERR("delete_tree_item returned %08x\n", Status); 5459 return Status; 5460 } 5461 } 5462 } 5463 5464 if (Vcb->space_root) { 5465 Status = insert_tree_item_batch(batchlist, Vcb, Vcb->space_root, c->offset, TYPE_FREE_SPACE_INFO, c->chunk_item->size, 5466 NULL, 0, Batch_DeleteFreeSpace); 5467 if (!NT_SUCCESS(Status)) { 5468 ERR("insert_tree_item_batch returned %08x\n", Status); 5469 return Status; 5470 } 5471 } 5472 5473 for (i = 0; i < c->chunk_item->num_stripes; i++) { 5474 if (!c->created) { 5475 // remove DEV_EXTENTs from tree 4 5476 searchkey.obj_id = cis[i].dev_id; 5477 searchkey.obj_type = TYPE_DEV_EXTENT; 5478 searchkey.offset = cis[i].offset; 5479 5480 Status = find_item(Vcb, Vcb->dev_root, &tp, &searchkey, false, Irp); 5481 if (!NT_SUCCESS(Status)) { 5482 ERR("error - find_item returned %08x\n", Status); 5483 return Status; 5484 } 5485 5486 if (!keycmp(tp.item->key, searchkey)) { 5487 Status = delete_tree_item(Vcb, &tp); 5488 if (!NT_SUCCESS(Status)) { 5489 ERR("delete_tree_item returned %08x\n", Status); 5490 return Status; 5491 } 5492 5493 if (tp.item->size >= sizeof(DEV_EXTENT)) { 5494 DEV_EXTENT* de = (DEV_EXTENT*)tp.item->data; 5495 5496 c->devices[i]->devitem.bytes_used -= de->length; 5497 5498 if (Vcb->balance.thread && Vcb->balance.shrinking && Vcb->balance.opts[0].devid == c->devices[i]->devitem.dev_id) { 5499 if (cis[i].offset < Vcb->balance.opts[0].drange_start && cis[i].offset + de->length > Vcb->balance.opts[0].drange_start) 5500 space_list_add2(&c->devices[i]->space, NULL, cis[i].offset, Vcb->balance.opts[0].drange_start - cis[i].offset, NULL, rollback); 5501 } else 5502 space_list_add2(&c->devices[i]->space, NULL, cis[i].offset, de->length, NULL, rollback); 5503 } 5504 } else 5505 WARN("could not find (%I64x,%x,%I64x) in dev tree\n", searchkey.obj_id, searchkey.obj_type, searchkey.offset); 5506 } else { 5507 uint64_t len = c->chunk_item->size / factor; 5508 5509 c->devices[i]->devitem.bytes_used -= len; 5510 5511 if (Vcb->balance.thread && Vcb->balance.shrinking && Vcb->balance.opts[0].devid == c->devices[i]->devitem.dev_id) { 5512 if (cis[i].offset < Vcb->balance.opts[0].drange_start && cis[i].offset + len > Vcb->balance.opts[0].drange_start) 5513 space_list_add2(&c->devices[i]->space, NULL, cis[i].offset, Vcb->balance.opts[0].drange_start - cis[i].offset, NULL, rollback); 5514 } else 5515 space_list_add2(&c->devices[i]->space, NULL, cis[i].offset, len, NULL, rollback); 5516 } 5517 } 5518 5519 // modify DEV_ITEMs in chunk tree 5520 for (i = 0; i < c->chunk_item->num_stripes; i++) { 5521 if (c->devices[i]) { 5522 uint64_t j; 5523 DEV_ITEM* di; 5524 5525 searchkey.obj_id = 1; 5526 searchkey.obj_type = TYPE_DEV_ITEM; 5527 searchkey.offset = c->devices[i]->devitem.dev_id; 5528 5529 Status = find_item(Vcb, Vcb->chunk_root, &tp, &searchkey, false, Irp); 5530 if (!NT_SUCCESS(Status)) { 5531 ERR("error - find_item returned %08x\n", Status); 5532 return Status; 5533 } 5534 5535 if (!keycmp(tp.item->key, searchkey)) { 5536 Status = delete_tree_item(Vcb, &tp); 5537 if (!NT_SUCCESS(Status)) { 5538 ERR("delete_tree_item returned %08x\n", Status); 5539 return Status; 5540 } 5541 5542 di = ExAllocatePoolWithTag(PagedPool, sizeof(DEV_ITEM), ALLOC_TAG); 5543 if (!di) { 5544 ERR("out of memory\n"); 5545 return STATUS_INSUFFICIENT_RESOURCES; 5546 } 5547 5548 RtlCopyMemory(di, &c->devices[i]->devitem, sizeof(DEV_ITEM)); 5549 5550 Status = insert_tree_item(Vcb, Vcb->chunk_root, 1, TYPE_DEV_ITEM, c->devices[i]->devitem.dev_id, di, sizeof(DEV_ITEM), NULL, Irp); 5551 if (!NT_SUCCESS(Status)) { 5552 ERR("insert_tree_item returned %08x\n", Status); 5553 return Status; 5554 } 5555 } 5556 5557 for (j = i + 1; j < c->chunk_item->num_stripes; j++) { 5558 if (c->devices[j] == c->devices[i]) 5559 c->devices[j] = NULL; 5560 } 5561 } 5562 } 5563 5564 if (!c->created) { 5565 // remove CHUNK_ITEM from chunk tree 5566 searchkey.obj_id = 0x100; 5567 searchkey.obj_type = TYPE_CHUNK_ITEM; 5568 searchkey.offset = c->offset; 5569 5570 Status = find_item(Vcb, Vcb->chunk_root, &tp, &searchkey, false, Irp); 5571 if (!NT_SUCCESS(Status)) { 5572 ERR("error - find_item returned %08x\n", Status); 5573 return Status; 5574 } 5575 5576 if (!keycmp(tp.item->key, searchkey)) { 5577 Status = delete_tree_item(Vcb, &tp); 5578 5579 if (!NT_SUCCESS(Status)) { 5580 ERR("delete_tree_item returned %08x\n", Status); 5581 return Status; 5582 } 5583 } else 5584 WARN("could not find CHUNK_ITEM for chunk %I64x\n", c->offset); 5585 5586 // remove BLOCK_GROUP_ITEM from extent tree 5587 searchkey.obj_id = c->offset; 5588 searchkey.obj_type = TYPE_BLOCK_GROUP_ITEM; 5589 searchkey.offset = 0xffffffffffffffff; 5590 5591 Status = find_item(Vcb, Vcb->extent_root, &tp, &searchkey, false, Irp); 5592 if (!NT_SUCCESS(Status)) { 5593 ERR("error - find_item returned %08x\n", Status); 5594 return Status; 5595 } 5596 5597 if (tp.item->key.obj_id == searchkey.obj_id && tp.item->key.obj_type == searchkey.obj_type) { 5598 Status = delete_tree_item(Vcb, &tp); 5599 5600 if (!NT_SUCCESS(Status)) { 5601 ERR("delete_tree_item returned %08x\n", Status); 5602 return Status; 5603 } 5604 } else 5605 WARN("could not find BLOCK_GROUP_ITEM for chunk %I64x\n", c->offset); 5606 } 5607 5608 if (c->chunk_item->type & BLOCK_FLAG_SYSTEM) 5609 remove_from_bootstrap(Vcb, 0x100, TYPE_CHUNK_ITEM, c->offset); 5610 5611 RemoveEntryList(&c->list_entry); 5612 5613 // clear raid56 incompat flag if dropping last RAID5/6 chunk 5614 5615 if (c->chunk_item->type & BLOCK_FLAG_RAID5 || c->chunk_item->type & BLOCK_FLAG_RAID6) { 5616 LIST_ENTRY* le; 5617 bool clear_flag = true; 5618 5619 le = Vcb->chunks.Flink; 5620 while (le != &Vcb->chunks) { 5621 chunk* c2 = CONTAINING_RECORD(le, chunk, list_entry); 5622 5623 if (c2->chunk_item->type & BLOCK_FLAG_RAID5 || c2->chunk_item->type & BLOCK_FLAG_RAID6) { 5624 clear_flag = false; 5625 break; 5626 } 5627 5628 le = le->Flink; 5629 } 5630 5631 if (clear_flag) 5632 Vcb->superblock.incompat_flags &= ~BTRFS_INCOMPAT_FLAGS_RAID56; 5633 } 5634 5635 #ifndef __REACTOS__ 5636 uint64_t phys_used = chunk_estimate_phys_size(Vcb, c, c->oldused); 5637 #else 5638 phys_used = chunk_estimate_phys_size(Vcb, c, c->oldused); 5639 #endif 5640 5641 if (phys_used < Vcb->superblock.bytes_used) 5642 Vcb->superblock.bytes_used -= phys_used; 5643 else 5644 Vcb->superblock.bytes_used = 0; 5645 5646 ExFreePool(c->chunk_item); 5647 ExFreePool(c->devices); 5648 5649 while (!IsListEmpty(&c->space)) { 5650 space* s = CONTAINING_RECORD(c->space.Flink, space, list_entry); 5651 5652 RemoveEntryList(&s->list_entry); 5653 ExFreePool(s); 5654 } 5655 5656 while (!IsListEmpty(&c->deleting)) { 5657 space* s = CONTAINING_RECORD(c->deleting.Flink, space, list_entry); 5658 5659 RemoveEntryList(&s->list_entry); 5660 ExFreePool(s); 5661 } 5662 5663 release_chunk_lock(c, Vcb); 5664 5665 ExDeleteResourceLite(&c->partial_stripes_lock); 5666 ExDeleteResourceLite(&c->range_locks_lock); 5667 ExDeleteResourceLite(&c->lock); 5668 ExDeleteResourceLite(&c->changed_extents_lock); 5669 5670 ExFreePool(c); 5671 5672 return STATUS_SUCCESS; 5673 } 5674 5675 static NTSTATUS partial_stripe_read(device_extension* Vcb, chunk* c, partial_stripe* ps, uint64_t startoff, uint16_t parity, ULONG offset, ULONG len) { 5676 NTSTATUS Status; 5677 ULONG sl = (ULONG)(c->chunk_item->stripe_length / Vcb->superblock.sector_size); 5678 CHUNK_ITEM_STRIPE* cis = (CHUNK_ITEM_STRIPE*)&c->chunk_item[1]; 5679 5680 while (len > 0) { 5681 ULONG readlen = min(offset + len, offset + (sl - (offset % sl))) - offset; 5682 uint16_t stripe; 5683 5684 stripe = (parity + (offset / sl) + 1) % c->chunk_item->num_stripes; 5685 5686 if (c->devices[stripe]->devobj) { 5687 Status = sync_read_phys(c->devices[stripe]->devobj, c->devices[stripe]->fileobj, cis[stripe].offset + startoff + ((offset % sl) * Vcb->superblock.sector_size), 5688 readlen * Vcb->superblock.sector_size, ps->data + (offset * Vcb->superblock.sector_size), false); 5689 if (!NT_SUCCESS(Status)) { 5690 ERR("sync_read_phys returned %08x\n", Status); 5691 return Status; 5692 } 5693 } else if (c->chunk_item->type & BLOCK_FLAG_RAID5) { 5694 uint16_t i; 5695 uint8_t* scratch; 5696 5697 scratch = ExAllocatePoolWithTag(NonPagedPool, readlen * Vcb->superblock.sector_size, ALLOC_TAG); 5698 if (!scratch) { 5699 ERR("out of memory\n"); 5700 return STATUS_INSUFFICIENT_RESOURCES; 5701 } 5702 5703 for (i = 0; i < c->chunk_item->num_stripes; i++) { 5704 if (i != stripe) { 5705 if (!c->devices[i]->devobj) { 5706 ExFreePool(scratch); 5707 return STATUS_UNEXPECTED_IO_ERROR; 5708 } 5709 5710 if (i == 0 || (stripe == 0 && i == 1)) { 5711 Status = sync_read_phys(c->devices[i]->devobj, c->devices[i]->fileobj, cis[i].offset + startoff + ((offset % sl) * Vcb->superblock.sector_size), 5712 readlen * Vcb->superblock.sector_size, ps->data + (offset * Vcb->superblock.sector_size), false); 5713 if (!NT_SUCCESS(Status)) { 5714 ERR("sync_read_phys returned %08x\n", Status); 5715 ExFreePool(scratch); 5716 return Status; 5717 } 5718 } else { 5719 Status = sync_read_phys(c->devices[i]->devobj, c->devices[i]->fileobj, cis[i].offset + startoff + ((offset % sl) * Vcb->superblock.sector_size), 5720 readlen * Vcb->superblock.sector_size, scratch, false); 5721 if (!NT_SUCCESS(Status)) { 5722 ERR("sync_read_phys returned %08x\n", Status); 5723 ExFreePool(scratch); 5724 return Status; 5725 } 5726 5727 do_xor(ps->data + (offset * Vcb->superblock.sector_size), scratch, readlen * Vcb->superblock.sector_size); 5728 } 5729 } 5730 } 5731 5732 ExFreePool(scratch); 5733 } else { 5734 uint8_t* scratch; 5735 uint16_t k, i, logstripe, error_stripe, num_errors = 0; 5736 5737 scratch = ExAllocatePoolWithTag(NonPagedPool, (c->chunk_item->num_stripes + 2) * readlen * Vcb->superblock.sector_size, ALLOC_TAG); 5738 if (!scratch) { 5739 ERR("out of memory\n"); 5740 return STATUS_INSUFFICIENT_RESOURCES; 5741 } 5742 5743 i = (parity + 1) % c->chunk_item->num_stripes; 5744 for (k = 0; k < c->chunk_item->num_stripes; k++) { 5745 if (i != stripe) { 5746 if (c->devices[i]->devobj) { 5747 Status = sync_read_phys(c->devices[i]->devobj, c->devices[i]->fileobj, cis[i].offset + startoff + ((offset % sl) * Vcb->superblock.sector_size), 5748 readlen * Vcb->superblock.sector_size, scratch + (k * readlen * Vcb->superblock.sector_size), false); 5749 if (!NT_SUCCESS(Status)) { 5750 ERR("sync_read_phys returned %08x\n", Status); 5751 num_errors++; 5752 error_stripe = k; 5753 } 5754 } else { 5755 num_errors++; 5756 error_stripe = k; 5757 } 5758 5759 if (num_errors > 1) { 5760 ExFreePool(scratch); 5761 return STATUS_UNEXPECTED_IO_ERROR; 5762 } 5763 } else 5764 logstripe = k; 5765 5766 i = (i + 1) % c->chunk_item->num_stripes; 5767 } 5768 5769 if (num_errors == 0 || error_stripe == c->chunk_item->num_stripes - 1) { 5770 for (k = 0; k < c->chunk_item->num_stripes - 1; k++) { 5771 if (k != logstripe) { 5772 if (k == 0 || (k == 1 && logstripe == 0)) { 5773 RtlCopyMemory(ps->data + (offset * Vcb->superblock.sector_size), scratch + (k * readlen * Vcb->superblock.sector_size), 5774 readlen * Vcb->superblock.sector_size); 5775 } else { 5776 do_xor(ps->data + (offset * Vcb->superblock.sector_size), scratch + (k * readlen * Vcb->superblock.sector_size), 5777 readlen * Vcb->superblock.sector_size); 5778 } 5779 } 5780 } 5781 } else { 5782 raid6_recover2(scratch, c->chunk_item->num_stripes, readlen * Vcb->superblock.sector_size, logstripe, 5783 error_stripe, scratch + (c->chunk_item->num_stripes * readlen * Vcb->superblock.sector_size)); 5784 5785 RtlCopyMemory(ps->data + (offset * Vcb->superblock.sector_size), scratch + (c->chunk_item->num_stripes * readlen * Vcb->superblock.sector_size), 5786 readlen * Vcb->superblock.sector_size); 5787 } 5788 5789 ExFreePool(scratch); 5790 } 5791 5792 offset += readlen; 5793 len -= readlen; 5794 } 5795 5796 return STATUS_SUCCESS; 5797 } 5798 5799 NTSTATUS flush_partial_stripe(device_extension* Vcb, chunk* c, partial_stripe* ps) { 5800 NTSTATUS Status; 5801 uint16_t parity2, stripe, startoffstripe; 5802 uint8_t* data; 5803 uint64_t startoff; 5804 ULONG runlength, index, last1; 5805 CHUNK_ITEM_STRIPE* cis = (CHUNK_ITEM_STRIPE*)&c->chunk_item[1]; 5806 LIST_ENTRY* le; 5807 uint16_t k, num_data_stripes = c->chunk_item->num_stripes - (c->chunk_item->type & BLOCK_FLAG_RAID5 ? 1 : 2); 5808 uint64_t ps_length = num_data_stripes * c->chunk_item->stripe_length; 5809 ULONG stripe_length = (ULONG)c->chunk_item->stripe_length; 5810 5811 // FIXME - do writes asynchronously? 5812 5813 get_raid0_offset(ps->address - c->offset, stripe_length, num_data_stripes, &startoff, &startoffstripe); 5814 5815 parity2 = (((ps->address - c->offset) / ps_length) + c->chunk_item->num_stripes - 1) % c->chunk_item->num_stripes; 5816 5817 // read data (or reconstruct if degraded) 5818 5819 runlength = RtlFindFirstRunClear(&ps->bmp, &index); 5820 last1 = 0; 5821 5822 while (runlength != 0) { 5823 if (index >= ps->bmplen) 5824 break; 5825 5826 if (index + runlength >= ps->bmplen) { 5827 runlength = ps->bmplen - index; 5828 5829 if (runlength == 0) 5830 break; 5831 } 5832 5833 if (index > last1) { 5834 Status = partial_stripe_read(Vcb, c, ps, startoff, parity2, last1, index - last1); 5835 if (!NT_SUCCESS(Status)) { 5836 ERR("partial_stripe_read returned %08x\n", Status); 5837 return Status; 5838 } 5839 } 5840 5841 last1 = index + runlength; 5842 5843 runlength = RtlFindNextForwardRunClear(&ps->bmp, index + runlength, &index); 5844 } 5845 5846 if (last1 < ps_length / Vcb->superblock.sector_size) { 5847 Status = partial_stripe_read(Vcb, c, ps, startoff, parity2, last1, (ULONG)((ps_length / Vcb->superblock.sector_size) - last1)); 5848 if (!NT_SUCCESS(Status)) { 5849 ERR("partial_stripe_read returned %08x\n", Status); 5850 return Status; 5851 } 5852 } 5853 5854 // set unallocated data to 0 5855 le = c->space.Flink; 5856 while (le != &c->space) { 5857 space* s = CONTAINING_RECORD(le, space, list_entry); 5858 5859 if (s->address + s->size > ps->address && s->address < ps->address + ps_length) { 5860 uint64_t start = max(ps->address, s->address); 5861 uint64_t end = min(ps->address + ps_length, s->address + s->size); 5862 5863 RtlZeroMemory(ps->data + start - ps->address, (ULONG)(end - start)); 5864 } else if (s->address >= ps->address + ps_length) 5865 break; 5866 5867 le = le->Flink; 5868 } 5869 5870 le = c->deleting.Flink; 5871 while (le != &c->deleting) { 5872 space* s = CONTAINING_RECORD(le, space, list_entry); 5873 5874 if (s->address + s->size > ps->address && s->address < ps->address + ps_length) { 5875 uint64_t start = max(ps->address, s->address); 5876 uint64_t end = min(ps->address + ps_length, s->address + s->size); 5877 5878 RtlZeroMemory(ps->data + start - ps->address, (ULONG)(end - start)); 5879 } else if (s->address >= ps->address + ps_length) 5880 break; 5881 5882 le = le->Flink; 5883 } 5884 5885 stripe = (parity2 + 1) % c->chunk_item->num_stripes; 5886 5887 data = ps->data; 5888 for (k = 0; k < num_data_stripes; k++) { 5889 if (c->devices[stripe]->devobj) { 5890 Status = write_data_phys(c->devices[stripe]->devobj, c->devices[stripe]->fileobj, cis[stripe].offset + startoff, data, stripe_length); 5891 if (!NT_SUCCESS(Status)) { 5892 ERR("write_data_phys returned %08x\n", Status); 5893 return Status; 5894 } 5895 } 5896 5897 data += stripe_length; 5898 stripe = (stripe + 1) % c->chunk_item->num_stripes; 5899 } 5900 5901 // write parity 5902 if (c->chunk_item->type & BLOCK_FLAG_RAID5) { 5903 if (c->devices[parity2]->devobj) { 5904 uint16_t i; 5905 5906 for (i = 1; i < c->chunk_item->num_stripes - 1; i++) { 5907 do_xor(ps->data, ps->data + (i * stripe_length), stripe_length); 5908 } 5909 5910 Status = write_data_phys(c->devices[parity2]->devobj, c->devices[parity2]->fileobj, cis[parity2].offset + startoff, ps->data, stripe_length); 5911 if (!NT_SUCCESS(Status)) { 5912 ERR("write_data_phys returned %08x\n", Status); 5913 return Status; 5914 } 5915 } 5916 } else { 5917 uint16_t parity1 = (parity2 + c->chunk_item->num_stripes - 1) % c->chunk_item->num_stripes; 5918 5919 if (c->devices[parity1]->devobj || c->devices[parity2]->devobj) { 5920 uint8_t* scratch; 5921 uint16_t i; 5922 5923 scratch = ExAllocatePoolWithTag(NonPagedPool, stripe_length * 2, ALLOC_TAG); 5924 if (!scratch) { 5925 ERR("out of memory\n"); 5926 return STATUS_INSUFFICIENT_RESOURCES; 5927 } 5928 5929 i = c->chunk_item->num_stripes - 3; 5930 5931 while (true) { 5932 if (i == c->chunk_item->num_stripes - 3) { 5933 RtlCopyMemory(scratch, ps->data + (i * stripe_length), stripe_length); 5934 RtlCopyMemory(scratch + stripe_length, ps->data + (i * stripe_length), stripe_length); 5935 } else { 5936 do_xor(scratch, ps->data + (i * stripe_length), stripe_length); 5937 5938 galois_double(scratch + stripe_length, stripe_length); 5939 do_xor(scratch + stripe_length, ps->data + (i * stripe_length), stripe_length); 5940 } 5941 5942 if (i == 0) 5943 break; 5944 5945 i--; 5946 } 5947 5948 if (c->devices[parity1]->devobj) { 5949 Status = write_data_phys(c->devices[parity1]->devobj, c->devices[parity1]->fileobj, cis[parity1].offset + startoff, scratch, stripe_length); 5950 if (!NT_SUCCESS(Status)) { 5951 ERR("write_data_phys returned %08x\n", Status); 5952 ExFreePool(scratch); 5953 return Status; 5954 } 5955 } 5956 5957 if (c->devices[parity2]->devobj) { 5958 Status = write_data_phys(c->devices[parity2]->devobj, c->devices[parity2]->fileobj, cis[parity2].offset + startoff, 5959 scratch + stripe_length, stripe_length); 5960 if (!NT_SUCCESS(Status)) { 5961 ERR("write_data_phys returned %08x\n", Status); 5962 ExFreePool(scratch); 5963 return Status; 5964 } 5965 } 5966 5967 ExFreePool(scratch); 5968 } 5969 } 5970 5971 return STATUS_SUCCESS; 5972 } 5973 5974 static NTSTATUS update_chunks(device_extension* Vcb, LIST_ENTRY* batchlist, PIRP Irp, LIST_ENTRY* rollback) { 5975 LIST_ENTRY *le, *le2; 5976 NTSTATUS Status; 5977 uint64_t used_minus_cache; 5978 5979 ExAcquireResourceExclusiveLite(&Vcb->chunk_lock, true); 5980 5981 // FIXME - do tree chunks before data chunks 5982 5983 le = Vcb->chunks.Flink; 5984 while (le != &Vcb->chunks) { 5985 chunk* c = CONTAINING_RECORD(le, chunk, list_entry); 5986 5987 le2 = le->Flink; 5988 5989 if (c->changed) { 5990 acquire_chunk_lock(c, Vcb); 5991 5992 // flush partial stripes 5993 if (!Vcb->readonly && (c->chunk_item->type & BLOCK_FLAG_RAID5 || c->chunk_item->type & BLOCK_FLAG_RAID6)) { 5994 ExAcquireResourceExclusiveLite(&c->partial_stripes_lock, true); 5995 5996 while (!IsListEmpty(&c->partial_stripes)) { 5997 partial_stripe* ps = CONTAINING_RECORD(RemoveHeadList(&c->partial_stripes), partial_stripe, list_entry); 5998 5999 Status = flush_partial_stripe(Vcb, c, ps); 6000 6001 if (ps->bmparr) 6002 ExFreePool(ps->bmparr); 6003 6004 ExFreePool(ps); 6005 6006 if (!NT_SUCCESS(Status)) { 6007 ERR("flush_partial_stripe returned %08x\n", Status); 6008 ExReleaseResourceLite(&c->partial_stripes_lock); 6009 release_chunk_lock(c, Vcb); 6010 ExReleaseResourceLite(&Vcb->chunk_lock); 6011 return Status; 6012 } 6013 } 6014 6015 ExReleaseResourceLite(&c->partial_stripes_lock); 6016 } 6017 6018 if (c->list_entry_balance.Flink) { 6019 release_chunk_lock(c, Vcb); 6020 le = le2; 6021 continue; 6022 } 6023 6024 if (c->space_changed || c->created) { 6025 bool created = c->created; 6026 6027 used_minus_cache = c->used; 6028 6029 // subtract self-hosted cache 6030 if (used_minus_cache > 0 && c->chunk_item->type & BLOCK_FLAG_DATA && c->cache && c->cache->inode_item.st_size == c->used) { 6031 LIST_ENTRY* le3; 6032 6033 le3 = c->cache->extents.Flink; 6034 while (le3 != &c->cache->extents) { 6035 extent* ext = CONTAINING_RECORD(le3, extent, list_entry); 6036 EXTENT_DATA* ed = &ext->extent_data; 6037 6038 if (!ext->ignore) { 6039 if (ed->type == EXTENT_TYPE_REGULAR || ed->type == EXTENT_TYPE_PREALLOC) { 6040 EXTENT_DATA2* ed2 = (EXTENT_DATA2*)ed->data; 6041 6042 if (ed2->size != 0 && ed2->address >= c->offset && ed2->address + ed2->size <= c->offset + c->chunk_item->size) 6043 used_minus_cache -= ed2->size; 6044 } 6045 } 6046 6047 le3 = le3->Flink; 6048 } 6049 } 6050 6051 if (used_minus_cache == 0) { 6052 Status = drop_chunk(Vcb, c, batchlist, Irp, rollback); 6053 if (!NT_SUCCESS(Status)) { 6054 ERR("drop_chunk returned %08x\n", Status); 6055 release_chunk_lock(c, Vcb); 6056 ExReleaseResourceLite(&Vcb->chunk_lock); 6057 return Status; 6058 } 6059 6060 // c is now freed, so avoid releasing non-existent lock 6061 le = le2; 6062 continue; 6063 } else if (c->created) { 6064 Status = create_chunk(Vcb, c, Irp); 6065 if (!NT_SUCCESS(Status)) { 6066 ERR("create_chunk returned %08x\n", Status); 6067 release_chunk_lock(c, Vcb); 6068 ExReleaseResourceLite(&Vcb->chunk_lock); 6069 return Status; 6070 } 6071 } 6072 6073 if (used_minus_cache > 0 || created) 6074 release_chunk_lock(c, Vcb); 6075 } else 6076 release_chunk_lock(c, Vcb); 6077 } 6078 6079 le = le2; 6080 } 6081 6082 ExReleaseResourceLite(&Vcb->chunk_lock); 6083 6084 return STATUS_SUCCESS; 6085 } 6086 6087 static NTSTATUS delete_root_ref(device_extension* Vcb, uint64_t subvolid, uint64_t parsubvolid, uint64_t parinode, PANSI_STRING utf8, PIRP Irp) { 6088 KEY searchkey; 6089 traverse_ptr tp; 6090 NTSTATUS Status; 6091 6092 searchkey.obj_id = parsubvolid; 6093 searchkey.obj_type = TYPE_ROOT_REF; 6094 searchkey.offset = subvolid; 6095 6096 Status = find_item(Vcb, Vcb->root_root, &tp, &searchkey, false, Irp); 6097 if (!NT_SUCCESS(Status)) { 6098 ERR("error - find_item returned %08x\n", Status); 6099 return Status; 6100 } 6101 6102 if (!keycmp(searchkey, tp.item->key)) { 6103 if (tp.item->size < sizeof(ROOT_REF)) { 6104 ERR("(%I64x,%x,%I64x) was %u bytes, expected at least %u\n", tp.item->key.obj_id, tp.item->key.obj_type, tp.item->key.offset, tp.item->size, sizeof(ROOT_REF)); 6105 return STATUS_INTERNAL_ERROR; 6106 } else { 6107 ROOT_REF* rr; 6108 ULONG len; 6109 6110 rr = (ROOT_REF*)tp.item->data; 6111 len = tp.item->size; 6112 6113 do { 6114 uint16_t itemlen; 6115 6116 if (len < sizeof(ROOT_REF) || len < offsetof(ROOT_REF, name[0]) + rr->n) { 6117 ERR("(%I64x,%x,%I64x) was truncated\n", tp.item->key.obj_id, tp.item->key.obj_type, tp.item->key.offset); 6118 break; 6119 } 6120 6121 itemlen = (uint16_t)offsetof(ROOT_REF, name[0]) + rr->n; 6122 6123 if (rr->dir == parinode && rr->n == utf8->Length && RtlCompareMemory(rr->name, utf8->Buffer, rr->n) == rr->n) { 6124 uint16_t newlen = tp.item->size - itemlen; 6125 6126 Status = delete_tree_item(Vcb, &tp); 6127 if (!NT_SUCCESS(Status)) { 6128 ERR("delete_tree_item returned %08x\n", Status); 6129 return Status; 6130 } 6131 6132 if (newlen == 0) { 6133 TRACE("deleting (%I64x,%x,%I64x)\n", tp.item->key.obj_id, tp.item->key.obj_type, tp.item->key.offset); 6134 } else { 6135 uint8_t *newrr = ExAllocatePoolWithTag(PagedPool, newlen, ALLOC_TAG), *rroff; 6136 6137 if (!newrr) { 6138 ERR("out of memory\n"); 6139 return STATUS_INSUFFICIENT_RESOURCES; 6140 } 6141 6142 TRACE("modifying (%I64x,%x,%I64x)\n", tp.item->key.obj_id, tp.item->key.obj_type, tp.item->key.offset); 6143 6144 if ((uint8_t*)rr > tp.item->data) { 6145 RtlCopyMemory(newrr, tp.item->data, (uint8_t*)rr - tp.item->data); 6146 rroff = newrr + ((uint8_t*)rr - tp.item->data); 6147 } else { 6148 rroff = newrr; 6149 } 6150 6151 if ((uint8_t*)&rr->name[rr->n] < tp.item->data + tp.item->size) 6152 RtlCopyMemory(rroff, &rr->name[rr->n], tp.item->size - ((uint8_t*)&rr->name[rr->n] - tp.item->data)); 6153 6154 Status = insert_tree_item(Vcb, Vcb->root_root, tp.item->key.obj_id, tp.item->key.obj_type, tp.item->key.offset, newrr, newlen, NULL, Irp); 6155 if (!NT_SUCCESS(Status)) { 6156 ERR("insert_tree_item returned %08x\n", Status); 6157 ExFreePool(newrr); 6158 return Status; 6159 } 6160 } 6161 6162 break; 6163 } 6164 6165 if (len > itemlen) { 6166 len -= itemlen; 6167 rr = (ROOT_REF*)&rr->name[rr->n]; 6168 } else 6169 break; 6170 } while (len > 0); 6171 } 6172 } else { 6173 WARN("could not find ROOT_REF entry for subvol %I64x in %I64x\n", searchkey.offset, searchkey.obj_id); 6174 return STATUS_NOT_FOUND; 6175 } 6176 6177 return STATUS_SUCCESS; 6178 } 6179 6180 #ifdef _MSC_VER 6181 #pragma warning(push) 6182 #pragma warning(suppress: 28194) 6183 #endif 6184 static NTSTATUS add_root_ref(_In_ device_extension* Vcb, _In_ uint64_t subvolid, _In_ uint64_t parsubvolid, _In_ __drv_aliasesMem ROOT_REF* rr, _In_opt_ PIRP Irp) { 6185 KEY searchkey; 6186 traverse_ptr tp; 6187 NTSTATUS Status; 6188 6189 searchkey.obj_id = parsubvolid; 6190 searchkey.obj_type = TYPE_ROOT_REF; 6191 searchkey.offset = subvolid; 6192 6193 Status = find_item(Vcb, Vcb->root_root, &tp, &searchkey, false, Irp); 6194 if (!NT_SUCCESS(Status)) { 6195 ERR("error - find_item returned %08x\n", Status); 6196 return Status; 6197 } 6198 6199 if (!keycmp(searchkey, tp.item->key)) { 6200 uint16_t rrsize = tp.item->size + (uint16_t)offsetof(ROOT_REF, name[0]) + rr->n; 6201 uint8_t* rr2; 6202 6203 rr2 = ExAllocatePoolWithTag(PagedPool, rrsize, ALLOC_TAG); 6204 if (!rr2) { 6205 ERR("out of memory\n"); 6206 return STATUS_INSUFFICIENT_RESOURCES; 6207 } 6208 6209 if (tp.item->size > 0) 6210 RtlCopyMemory(rr2, tp.item->data, tp.item->size); 6211 6212 RtlCopyMemory(rr2 + tp.item->size, rr, offsetof(ROOT_REF, name[0]) + rr->n); 6213 ExFreePool(rr); 6214 6215 Status = delete_tree_item(Vcb, &tp); 6216 if (!NT_SUCCESS(Status)) { 6217 ERR("delete_tree_item returned %08x\n", Status); 6218 ExFreePool(rr2); 6219 return Status; 6220 } 6221 6222 Status = insert_tree_item(Vcb, Vcb->root_root, searchkey.obj_id, searchkey.obj_type, searchkey.offset, rr2, rrsize, NULL, Irp); 6223 if (!NT_SUCCESS(Status)) { 6224 ERR("insert_tree_item returned %08x\n", Status); 6225 ExFreePool(rr2); 6226 return Status; 6227 } 6228 } else { 6229 Status = insert_tree_item(Vcb, Vcb->root_root, searchkey.obj_id, searchkey.obj_type, searchkey.offset, rr, (uint16_t)offsetof(ROOT_REF, name[0]) + rr->n, NULL, Irp); 6230 if (!NT_SUCCESS(Status)) { 6231 ERR("insert_tree_item returned %08x\n", Status); 6232 ExFreePool(rr); 6233 return Status; 6234 } 6235 } 6236 6237 return STATUS_SUCCESS; 6238 } 6239 #ifdef _MSC_VER 6240 #pragma warning(pop) 6241 #endif 6242 6243 static NTSTATUS update_root_backref(device_extension* Vcb, uint64_t subvolid, uint64_t parsubvolid, PIRP Irp) { 6244 KEY searchkey; 6245 traverse_ptr tp; 6246 uint8_t* data; 6247 uint16_t datalen; 6248 NTSTATUS Status; 6249 6250 searchkey.obj_id = parsubvolid; 6251 searchkey.obj_type = TYPE_ROOT_REF; 6252 searchkey.offset = subvolid; 6253 6254 Status = find_item(Vcb, Vcb->root_root, &tp, &searchkey, false, Irp); 6255 if (!NT_SUCCESS(Status)) { 6256 ERR("error - find_item returned %08x\n", Status); 6257 return Status; 6258 } 6259 6260 if (!keycmp(tp.item->key, searchkey) && tp.item->size > 0) { 6261 datalen = tp.item->size; 6262 6263 data = ExAllocatePoolWithTag(PagedPool, datalen, ALLOC_TAG); 6264 if (!data) { 6265 ERR("out of memory\n"); 6266 return STATUS_INSUFFICIENT_RESOURCES; 6267 } 6268 6269 RtlCopyMemory(data, tp.item->data, datalen); 6270 } else { 6271 datalen = 0; 6272 data = NULL; 6273 } 6274 6275 searchkey.obj_id = subvolid; 6276 searchkey.obj_type = TYPE_ROOT_BACKREF; 6277 searchkey.offset = parsubvolid; 6278 6279 Status = find_item(Vcb, Vcb->root_root, &tp, &searchkey, false, Irp); 6280 if (!NT_SUCCESS(Status)) { 6281 ERR("error - find_item returned %08x\n", Status); 6282 6283 if (datalen > 0) 6284 ExFreePool(data); 6285 6286 return Status; 6287 } 6288 6289 if (!keycmp(tp.item->key, searchkey)) { 6290 Status = delete_tree_item(Vcb, &tp); 6291 if (!NT_SUCCESS(Status)) { 6292 ERR("delete_tree_item returned %08x\n", Status); 6293 6294 if (datalen > 0) 6295 ExFreePool(data); 6296 6297 return Status; 6298 } 6299 } 6300 6301 if (datalen > 0) { 6302 Status = insert_tree_item(Vcb, Vcb->root_root, subvolid, TYPE_ROOT_BACKREF, parsubvolid, data, datalen, NULL, Irp); 6303 if (!NT_SUCCESS(Status)) { 6304 ERR("insert_tree_item returned %08x\n", Status); 6305 ExFreePool(data); 6306 return Status; 6307 } 6308 } 6309 6310 return STATUS_SUCCESS; 6311 } 6312 6313 static NTSTATUS add_root_item_to_cache(device_extension* Vcb, uint64_t root, PIRP Irp) { 6314 KEY searchkey; 6315 traverse_ptr tp; 6316 NTSTATUS Status; 6317 6318 searchkey.obj_id = root; 6319 searchkey.obj_type = TYPE_ROOT_ITEM; 6320 searchkey.offset = 0xffffffffffffffff; 6321 6322 Status = find_item(Vcb, Vcb->root_root, &tp, &searchkey, false, Irp); 6323 if (!NT_SUCCESS(Status)) { 6324 ERR("error - find_item returned %08x\n", Status); 6325 return Status; 6326 } 6327 6328 if (tp.item->key.obj_id != searchkey.obj_id || tp.item->key.obj_type != searchkey.obj_type) { 6329 ERR("could not find ROOT_ITEM for tree %I64x\n", searchkey.obj_id); 6330 return STATUS_INTERNAL_ERROR; 6331 } 6332 6333 if (tp.item->size < sizeof(ROOT_ITEM)) { // if not full length, create new entry with new bits zeroed 6334 ROOT_ITEM* ri = ExAllocatePoolWithTag(PagedPool, sizeof(ROOT_ITEM), ALLOC_TAG); 6335 if (!ri) { 6336 ERR("out of memory\n"); 6337 return STATUS_INSUFFICIENT_RESOURCES; 6338 } 6339 6340 if (tp.item->size > 0) 6341 RtlCopyMemory(ri, tp.item->data, tp.item->size); 6342 6343 RtlZeroMemory(((uint8_t*)ri) + tp.item->size, sizeof(ROOT_ITEM) - tp.item->size); 6344 6345 Status = delete_tree_item(Vcb, &tp); 6346 if (!NT_SUCCESS(Status)) { 6347 ERR("delete_tree_item returned %08x\n", Status); 6348 ExFreePool(ri); 6349 return Status; 6350 } 6351 6352 Status = insert_tree_item(Vcb, Vcb->root_root, searchkey.obj_id, searchkey.obj_type, tp.item->key.offset, ri, sizeof(ROOT_ITEM), NULL, Irp); 6353 if (!NT_SUCCESS(Status)) { 6354 ERR("insert_tree_item returned %08x\n", Status); 6355 ExFreePool(ri); 6356 return Status; 6357 } 6358 } else { 6359 tp.tree->write = true; 6360 } 6361 6362 return STATUS_SUCCESS; 6363 } 6364 6365 static NTSTATUS flush_fileref(file_ref* fileref, LIST_ENTRY* batchlist, PIRP Irp) { 6366 NTSTATUS Status; 6367 6368 // if fileref created and then immediately deleted, do nothing 6369 if (fileref->created && fileref->deleted) { 6370 fileref->dirty = false; 6371 return STATUS_SUCCESS; 6372 } 6373 6374 if (fileref->fcb->ads) { 6375 fileref->dirty = false; 6376 return STATUS_SUCCESS; 6377 } 6378 6379 if (fileref->created) { 6380 uint16_t disize; 6381 DIR_ITEM *di, *di2; 6382 uint32_t crc32; 6383 6384 crc32 = calc_crc32c(0xfffffffe, (uint8_t*)fileref->dc->utf8.Buffer, fileref->dc->utf8.Length); 6385 6386 disize = (uint16_t)(offsetof(DIR_ITEM, name[0]) + fileref->dc->utf8.Length); 6387 di = ExAllocatePoolWithTag(PagedPool, disize, ALLOC_TAG); 6388 if (!di) { 6389 ERR("out of memory\n"); 6390 return STATUS_INSUFFICIENT_RESOURCES; 6391 } 6392 6393 if (fileref->parent->fcb->subvol == fileref->fcb->subvol) { 6394 di->key.obj_id = fileref->fcb->inode; 6395 di->key.obj_type = TYPE_INODE_ITEM; 6396 di->key.offset = 0; 6397 } else { // subvolume 6398 di->key.obj_id = fileref->fcb->subvol->id; 6399 di->key.obj_type = TYPE_ROOT_ITEM; 6400 di->key.offset = 0xffffffffffffffff; 6401 } 6402 6403 di->transid = fileref->fcb->Vcb->superblock.generation; 6404 di->m = 0; 6405 di->n = (uint16_t)fileref->dc->utf8.Length; 6406 di->type = fileref->fcb->type; 6407 RtlCopyMemory(di->name, fileref->dc->utf8.Buffer, fileref->dc->utf8.Length); 6408 6409 di2 = ExAllocatePoolWithTag(PagedPool, disize, ALLOC_TAG); 6410 if (!di2) { 6411 ERR("out of memory\n"); 6412 return STATUS_INSUFFICIENT_RESOURCES; 6413 } 6414 6415 RtlCopyMemory(di2, di, disize); 6416 6417 Status = insert_tree_item_batch(batchlist, fileref->fcb->Vcb, fileref->parent->fcb->subvol, fileref->parent->fcb->inode, TYPE_DIR_INDEX, 6418 fileref->dc->index, di, disize, Batch_Insert); 6419 if (!NT_SUCCESS(Status)) { 6420 ERR("insert_tree_item_batch returned %08x\n", Status); 6421 return Status; 6422 } 6423 6424 Status = insert_tree_item_batch(batchlist, fileref->fcb->Vcb, fileref->parent->fcb->subvol, fileref->parent->fcb->inode, TYPE_DIR_ITEM, crc32, 6425 di2, disize, Batch_DirItem); 6426 if (!NT_SUCCESS(Status)) { 6427 ERR("insert_tree_item_batch returned %08x\n", Status); 6428 return Status; 6429 } 6430 6431 if (fileref->parent->fcb->subvol == fileref->fcb->subvol) { 6432 INODE_REF* ir; 6433 6434 ir = ExAllocatePoolWithTag(PagedPool, sizeof(INODE_REF) - 1 + fileref->dc->utf8.Length, ALLOC_TAG); 6435 if (!ir) { 6436 ERR("out of memory\n"); 6437 return STATUS_INSUFFICIENT_RESOURCES; 6438 } 6439 6440 ir->index = fileref->dc->index; 6441 ir->n = fileref->dc->utf8.Length; 6442 RtlCopyMemory(ir->name, fileref->dc->utf8.Buffer, ir->n); 6443 6444 Status = insert_tree_item_batch(batchlist, fileref->fcb->Vcb, fileref->fcb->subvol, fileref->fcb->inode, TYPE_INODE_REF, fileref->parent->fcb->inode, 6445 ir, sizeof(INODE_REF) - 1 + ir->n, Batch_InodeRef); 6446 if (!NT_SUCCESS(Status)) { 6447 ERR("insert_tree_item_batch returned %08x\n", Status); 6448 return Status; 6449 } 6450 } else if (fileref->fcb != fileref->fcb->Vcb->dummy_fcb) { 6451 ULONG rrlen; 6452 ROOT_REF* rr; 6453 6454 rrlen = sizeof(ROOT_REF) - 1 + fileref->dc->utf8.Length; 6455 6456 rr = ExAllocatePoolWithTag(PagedPool, rrlen, ALLOC_TAG); 6457 if (!rr) { 6458 ERR("out of memory\n"); 6459 return STATUS_INSUFFICIENT_RESOURCES; 6460 } 6461 6462 rr->dir = fileref->parent->fcb->inode; 6463 rr->index = fileref->dc->index; 6464 rr->n = fileref->dc->utf8.Length; 6465 RtlCopyMemory(rr->name, fileref->dc->utf8.Buffer, fileref->dc->utf8.Length); 6466 6467 Status = add_root_ref(fileref->fcb->Vcb, fileref->fcb->subvol->id, fileref->parent->fcb->subvol->id, rr, Irp); 6468 if (!NT_SUCCESS(Status)) { 6469 ERR("add_root_ref returned %08x\n", Status); 6470 return Status; 6471 } 6472 6473 Status = update_root_backref(fileref->fcb->Vcb, fileref->fcb->subvol->id, fileref->parent->fcb->subvol->id, Irp); 6474 if (!NT_SUCCESS(Status)) { 6475 ERR("update_root_backref returned %08x\n", Status); 6476 return Status; 6477 } 6478 } 6479 6480 fileref->created = false; 6481 } else if (fileref->deleted) { 6482 uint32_t crc32; 6483 ANSI_STRING* name; 6484 DIR_ITEM* di; 6485 6486 name = &fileref->oldutf8; 6487 6488 crc32 = calc_crc32c(0xfffffffe, (uint8_t*)name->Buffer, name->Length); 6489 6490 di = ExAllocatePoolWithTag(PagedPool, sizeof(DIR_ITEM) - 1 + name->Length, ALLOC_TAG); 6491 if (!di) { 6492 ERR("out of memory\n"); 6493 return STATUS_INSUFFICIENT_RESOURCES; 6494 } 6495 6496 di->m = 0; 6497 di->n = name->Length; 6498 RtlCopyMemory(di->name, name->Buffer, name->Length); 6499 6500 // delete DIR_ITEM (0x54) 6501 6502 Status = insert_tree_item_batch(batchlist, fileref->fcb->Vcb, fileref->parent->fcb->subvol, fileref->parent->fcb->inode, TYPE_DIR_ITEM, 6503 crc32, di, sizeof(DIR_ITEM) - 1 + name->Length, Batch_DeleteDirItem); 6504 if (!NT_SUCCESS(Status)) { 6505 ERR("insert_tree_item_batch returned %08x\n", Status); 6506 return Status; 6507 } 6508 6509 if (fileref->parent->fcb->subvol == fileref->fcb->subvol) { 6510 INODE_REF* ir; 6511 6512 // delete INODE_REF (0xc) 6513 6514 ir = ExAllocatePoolWithTag(PagedPool, sizeof(INODE_REF) - 1 + name->Length, ALLOC_TAG); 6515 if (!ir) { 6516 ERR("out of memory\n"); 6517 return STATUS_INSUFFICIENT_RESOURCES; 6518 } 6519 6520 ir->index = fileref->oldindex; 6521 ir->n = name->Length; 6522 RtlCopyMemory(ir->name, name->Buffer, name->Length); 6523 6524 Status = insert_tree_item_batch(batchlist, fileref->fcb->Vcb, fileref->parent->fcb->subvol, fileref->fcb->inode, TYPE_INODE_REF, 6525 fileref->parent->fcb->inode, ir, sizeof(INODE_REF) - 1 + name->Length, Batch_DeleteInodeRef); 6526 if (!NT_SUCCESS(Status)) { 6527 ERR("insert_tree_item_batch returned %08x\n", Status); 6528 return Status; 6529 } 6530 } else if (fileref->fcb != fileref->fcb->Vcb->dummy_fcb) { // subvolume 6531 Status = delete_root_ref(fileref->fcb->Vcb, fileref->fcb->subvol->id, fileref->parent->fcb->subvol->id, fileref->parent->fcb->inode, name, Irp); 6532 if (!NT_SUCCESS(Status)) { 6533 ERR("delete_root_ref returned %08x\n", Status); 6534 return Status; 6535 } 6536 6537 Status = update_root_backref(fileref->fcb->Vcb, fileref->fcb->subvol->id, fileref->parent->fcb->subvol->id, Irp); 6538 if (!NT_SUCCESS(Status)) { 6539 ERR("update_root_backref returned %08x\n", Status); 6540 return Status; 6541 } 6542 } 6543 6544 // delete DIR_INDEX (0x60) 6545 6546 Status = insert_tree_item_batch(batchlist, fileref->fcb->Vcb, fileref->parent->fcb->subvol, fileref->parent->fcb->inode, TYPE_DIR_INDEX, 6547 fileref->oldindex, NULL, 0, Batch_Delete); 6548 if (!NT_SUCCESS(Status)) { 6549 ERR("insert_tree_item_batch returned %08x\n", Status); 6550 return Status; 6551 } 6552 6553 if (fileref->oldutf8.Buffer) { 6554 ExFreePool(fileref->oldutf8.Buffer); 6555 fileref->oldutf8.Buffer = NULL; 6556 } 6557 } else { // rename or change type 6558 PANSI_STRING oldutf8 = fileref->oldutf8.Buffer ? &fileref->oldutf8 : &fileref->dc->utf8; 6559 uint32_t crc32, oldcrc32; 6560 uint16_t disize; 6561 DIR_ITEM *olddi, *di, *di2; 6562 6563 crc32 = calc_crc32c(0xfffffffe, (uint8_t*)fileref->dc->utf8.Buffer, fileref->dc->utf8.Length); 6564 6565 if (!fileref->oldutf8.Buffer) 6566 oldcrc32 = crc32; 6567 else 6568 oldcrc32 = calc_crc32c(0xfffffffe, (uint8_t*)fileref->oldutf8.Buffer, fileref->oldutf8.Length); 6569 6570 olddi = ExAllocatePoolWithTag(PagedPool, sizeof(DIR_ITEM) - 1 + oldutf8->Length, ALLOC_TAG); 6571 if (!olddi) { 6572 ERR("out of memory\n"); 6573 return STATUS_INSUFFICIENT_RESOURCES; 6574 } 6575 6576 olddi->m = 0; 6577 olddi->n = (uint16_t)oldutf8->Length; 6578 RtlCopyMemory(olddi->name, oldutf8->Buffer, oldutf8->Length); 6579 6580 // delete DIR_ITEM (0x54) 6581 6582 Status = insert_tree_item_batch(batchlist, fileref->fcb->Vcb, fileref->parent->fcb->subvol, fileref->parent->fcb->inode, TYPE_DIR_ITEM, 6583 oldcrc32, olddi, sizeof(DIR_ITEM) - 1 + oldutf8->Length, Batch_DeleteDirItem); 6584 if (!NT_SUCCESS(Status)) { 6585 ERR("insert_tree_item_batch returned %08x\n", Status); 6586 ExFreePool(olddi); 6587 return Status; 6588 } 6589 6590 // add DIR_ITEM (0x54) 6591 6592 disize = (uint16_t)(offsetof(DIR_ITEM, name[0]) + fileref->dc->utf8.Length); 6593 di = ExAllocatePoolWithTag(PagedPool, disize, ALLOC_TAG); 6594 if (!di) { 6595 ERR("out of memory\n"); 6596 return STATUS_INSUFFICIENT_RESOURCES; 6597 } 6598 6599 di2 = ExAllocatePoolWithTag(PagedPool, disize, ALLOC_TAG); 6600 if (!di2) { 6601 ERR("out of memory\n"); 6602 ExFreePool(di); 6603 return STATUS_INSUFFICIENT_RESOURCES; 6604 } 6605 6606 if (fileref->dc) 6607 di->key = fileref->dc->key; 6608 else if (fileref->parent->fcb->subvol == fileref->fcb->subvol) { 6609 di->key.obj_id = fileref->fcb->inode; 6610 di->key.obj_type = TYPE_INODE_ITEM; 6611 di->key.offset = 0; 6612 } else { // subvolume 6613 di->key.obj_id = fileref->fcb->subvol->id; 6614 di->key.obj_type = TYPE_ROOT_ITEM; 6615 di->key.offset = 0xffffffffffffffff; 6616 } 6617 6618 di->transid = fileref->fcb->Vcb->superblock.generation; 6619 di->m = 0; 6620 di->n = (uint16_t)fileref->dc->utf8.Length; 6621 di->type = fileref->fcb->type; 6622 RtlCopyMemory(di->name, fileref->dc->utf8.Buffer, fileref->dc->utf8.Length); 6623 6624 RtlCopyMemory(di2, di, disize); 6625 6626 Status = insert_tree_item_batch(batchlist, fileref->fcb->Vcb, fileref->parent->fcb->subvol, fileref->parent->fcb->inode, TYPE_DIR_ITEM, crc32, 6627 di, disize, Batch_DirItem); 6628 if (!NT_SUCCESS(Status)) { 6629 ERR("insert_tree_item_batch returned %08x\n", Status); 6630 ExFreePool(di2); 6631 ExFreePool(di); 6632 return Status; 6633 } 6634 6635 if (fileref->parent->fcb->subvol == fileref->fcb->subvol) { 6636 INODE_REF *ir, *ir2; 6637 6638 // delete INODE_REF (0xc) 6639 6640 ir = ExAllocatePoolWithTag(PagedPool, sizeof(INODE_REF) - 1 + oldutf8->Length, ALLOC_TAG); 6641 if (!ir) { 6642 ERR("out of memory\n"); 6643 ExFreePool(di2); 6644 return STATUS_INSUFFICIENT_RESOURCES; 6645 } 6646 6647 ir->index = fileref->dc->index; 6648 ir->n = oldutf8->Length; 6649 RtlCopyMemory(ir->name, oldutf8->Buffer, ir->n); 6650 6651 Status = insert_tree_item_batch(batchlist, fileref->fcb->Vcb, fileref->fcb->subvol, fileref->fcb->inode, TYPE_INODE_REF, fileref->parent->fcb->inode, 6652 ir, sizeof(INODE_REF) - 1 + ir->n, Batch_DeleteInodeRef); 6653 if (!NT_SUCCESS(Status)) { 6654 ERR("insert_tree_item_batch returned %08x\n", Status); 6655 ExFreePool(ir); 6656 ExFreePool(di2); 6657 return Status; 6658 } 6659 6660 // add INODE_REF (0xc) 6661 6662 ir2 = ExAllocatePoolWithTag(PagedPool, sizeof(INODE_REF) - 1 + fileref->dc->utf8.Length, ALLOC_TAG); 6663 if (!ir2) { 6664 ERR("out of memory\n"); 6665 ExFreePool(di2); 6666 return STATUS_INSUFFICIENT_RESOURCES; 6667 } 6668 6669 ir2->index = fileref->dc->index; 6670 ir2->n = fileref->dc->utf8.Length; 6671 RtlCopyMemory(ir2->name, fileref->dc->utf8.Buffer, ir2->n); 6672 6673 Status = insert_tree_item_batch(batchlist, fileref->fcb->Vcb, fileref->fcb->subvol, fileref->fcb->inode, TYPE_INODE_REF, fileref->parent->fcb->inode, 6674 ir2, sizeof(INODE_REF) - 1 + ir2->n, Batch_InodeRef); 6675 if (!NT_SUCCESS(Status)) { 6676 ERR("insert_tree_item_batch returned %08x\n", Status); 6677 ExFreePool(ir2); 6678 ExFreePool(di2); 6679 return Status; 6680 } 6681 } else if (fileref->fcb != fileref->fcb->Vcb->dummy_fcb) { // subvolume 6682 ULONG rrlen; 6683 ROOT_REF* rr; 6684 6685 Status = delete_root_ref(fileref->fcb->Vcb, fileref->fcb->subvol->id, fileref->parent->fcb->subvol->id, fileref->parent->fcb->inode, oldutf8, Irp); 6686 if (!NT_SUCCESS(Status)) { 6687 ERR("delete_root_ref returned %08x\n", Status); 6688 ExFreePool(di2); 6689 return Status; 6690 } 6691 6692 rrlen = sizeof(ROOT_REF) - 1 + fileref->dc->utf8.Length; 6693 6694 rr = ExAllocatePoolWithTag(PagedPool, rrlen, ALLOC_TAG); 6695 if (!rr) { 6696 ERR("out of memory\n"); 6697 ExFreePool(di2); 6698 return STATUS_INSUFFICIENT_RESOURCES; 6699 } 6700 6701 rr->dir = fileref->parent->fcb->inode; 6702 rr->index = fileref->dc->index; 6703 rr->n = fileref->dc->utf8.Length; 6704 RtlCopyMemory(rr->name, fileref->dc->utf8.Buffer, fileref->dc->utf8.Length); 6705 6706 Status = add_root_ref(fileref->fcb->Vcb, fileref->fcb->subvol->id, fileref->parent->fcb->subvol->id, rr, Irp); 6707 if (!NT_SUCCESS(Status)) { 6708 ERR("add_root_ref returned %08x\n", Status); 6709 ExFreePool(di2); 6710 return Status; 6711 } 6712 6713 Status = update_root_backref(fileref->fcb->Vcb, fileref->fcb->subvol->id, fileref->parent->fcb->subvol->id, Irp); 6714 if (!NT_SUCCESS(Status)) { 6715 ERR("update_root_backref returned %08x\n", Status); 6716 ExFreePool(di2); 6717 return Status; 6718 } 6719 } 6720 6721 // delete DIR_INDEX (0x60) 6722 6723 Status = insert_tree_item_batch(batchlist, fileref->fcb->Vcb, fileref->parent->fcb->subvol, fileref->parent->fcb->inode, TYPE_DIR_INDEX, 6724 fileref->dc->index, NULL, 0, Batch_Delete); 6725 if (!NT_SUCCESS(Status)) { 6726 ERR("insert_tree_item_batch returned %08x\n", Status); 6727 ExFreePool(di2); 6728 return Status; 6729 } 6730 6731 // add DIR_INDEX (0x60) 6732 6733 Status = insert_tree_item_batch(batchlist, fileref->fcb->Vcb, fileref->parent->fcb->subvol, fileref->parent->fcb->inode, TYPE_DIR_INDEX, 6734 fileref->dc->index, di2, disize, Batch_Insert); 6735 if (!NT_SUCCESS(Status)) { 6736 ERR("insert_tree_item_batch returned %08x\n", Status); 6737 ExFreePool(di2); 6738 return Status; 6739 } 6740 6741 if (fileref->oldutf8.Buffer) { 6742 ExFreePool(fileref->oldutf8.Buffer); 6743 fileref->oldutf8.Buffer = NULL; 6744 } 6745 } 6746 6747 fileref->dirty = false; 6748 6749 return STATUS_SUCCESS; 6750 } 6751 6752 static void flush_disk_caches(device_extension* Vcb) { 6753 LIST_ENTRY* le; 6754 ioctl_context context; 6755 ULONG num; 6756 #ifdef __REACTOS__ 6757 unsigned int i; 6758 #endif 6759 6760 context.left = 0; 6761 6762 le = Vcb->devices.Flink; 6763 6764 while (le != &Vcb->devices) { 6765 device* dev = CONTAINING_RECORD(le, device, list_entry); 6766 6767 if (dev->devobj && !dev->readonly && dev->can_flush) 6768 context.left++; 6769 6770 le = le->Flink; 6771 } 6772 6773 if (context.left == 0) 6774 return; 6775 6776 num = 0; 6777 6778 KeInitializeEvent(&context.Event, NotificationEvent, false); 6779 6780 context.stripes = ExAllocatePoolWithTag(NonPagedPool, sizeof(ioctl_context_stripe) * context.left, ALLOC_TAG); 6781 if (!context.stripes) { 6782 ERR("out of memory\n"); 6783 return; 6784 } 6785 6786 RtlZeroMemory(context.stripes, sizeof(ioctl_context_stripe) * context.left); 6787 6788 le = Vcb->devices.Flink; 6789 6790 while (le != &Vcb->devices) { 6791 device* dev = CONTAINING_RECORD(le, device, list_entry); 6792 6793 if (dev->devobj && !dev->readonly && dev->can_flush) { 6794 PIO_STACK_LOCATION IrpSp; 6795 ioctl_context_stripe* stripe = &context.stripes[num]; 6796 6797 RtlZeroMemory(&stripe->apte, sizeof(ATA_PASS_THROUGH_EX)); 6798 6799 stripe->apte.Length = sizeof(ATA_PASS_THROUGH_EX); 6800 stripe->apte.TimeOutValue = 5; 6801 stripe->apte.CurrentTaskFile[6] = IDE_COMMAND_FLUSH_CACHE; 6802 6803 stripe->Irp = IoAllocateIrp(dev->devobj->StackSize, false); 6804 6805 if (!stripe->Irp) { 6806 ERR("IoAllocateIrp failed\n"); 6807 goto nextdev; 6808 } 6809 6810 IrpSp = IoGetNextIrpStackLocation(stripe->Irp); 6811 IrpSp->MajorFunction = IRP_MJ_DEVICE_CONTROL; 6812 IrpSp->FileObject = dev->fileobj; 6813 6814 IrpSp->Parameters.DeviceIoControl.IoControlCode = IOCTL_ATA_PASS_THROUGH; 6815 IrpSp->Parameters.DeviceIoControl.InputBufferLength = sizeof(ATA_PASS_THROUGH_EX); 6816 IrpSp->Parameters.DeviceIoControl.OutputBufferLength = sizeof(ATA_PASS_THROUGH_EX); 6817 6818 stripe->Irp->AssociatedIrp.SystemBuffer = &stripe->apte; 6819 stripe->Irp->Flags |= IRP_BUFFERED_IO | IRP_INPUT_OPERATION; 6820 stripe->Irp->UserBuffer = &stripe->apte; 6821 stripe->Irp->UserIosb = &stripe->iosb; 6822 6823 IoSetCompletionRoutine(stripe->Irp, ioctl_completion, &context, true, true, true); 6824 6825 IoCallDriver(dev->devobj, stripe->Irp); 6826 6827 nextdev: 6828 num++; 6829 } 6830 6831 le = le->Flink; 6832 } 6833 6834 KeWaitForSingleObject(&context.Event, Executive, KernelMode, false, NULL); 6835 6836 #ifndef __REACTOS__ 6837 for (unsigned int i = 0; i < num; i++) { 6838 #else 6839 for (i = 0; i < num; i++) { 6840 #endif 6841 if (context.stripes[i].Irp) 6842 IoFreeIrp(context.stripes[i].Irp); 6843 } 6844 6845 ExFreePool(context.stripes); 6846 } 6847 6848 static NTSTATUS flush_changed_dev_stats(device_extension* Vcb, device* dev, PIRP Irp) { 6849 NTSTATUS Status; 6850 KEY searchkey; 6851 traverse_ptr tp; 6852 uint16_t statslen; 6853 uint64_t* stats; 6854 6855 searchkey.obj_id = 0; 6856 searchkey.obj_type = TYPE_DEV_STATS; 6857 searchkey.offset = dev->devitem.dev_id; 6858 6859 Status = find_item(Vcb, Vcb->dev_root, &tp, &searchkey, false, Irp); 6860 if (!NT_SUCCESS(Status)) { 6861 ERR("find_item returned %08x\n", Status); 6862 return Status; 6863 } 6864 6865 if (!keycmp(tp.item->key, searchkey)) { 6866 Status = delete_tree_item(Vcb, &tp); 6867 if (!NT_SUCCESS(Status)) { 6868 ERR("delete_tree_item returned %08x\n", Status); 6869 return Status; 6870 } 6871 } 6872 6873 statslen = sizeof(uint64_t) * 5; 6874 stats = ExAllocatePoolWithTag(PagedPool, statslen, ALLOC_TAG); 6875 if (!stats) { 6876 ERR("out of memory\n"); 6877 return STATUS_INSUFFICIENT_RESOURCES; 6878 } 6879 6880 RtlCopyMemory(stats, dev->stats, statslen); 6881 6882 Status = insert_tree_item(Vcb, Vcb->dev_root, 0, TYPE_DEV_STATS, dev->devitem.dev_id, stats, statslen, NULL, Irp); 6883 if (!NT_SUCCESS(Status)) { 6884 ERR("insert_tree_item returned %08x\n", Status); 6885 ExFreePool(stats); 6886 return Status; 6887 } 6888 6889 return STATUS_SUCCESS; 6890 } 6891 6892 static NTSTATUS flush_subvol(device_extension* Vcb, root* r, PIRP Irp) { 6893 NTSTATUS Status; 6894 6895 if (r != Vcb->root_root && r != Vcb->chunk_root) { 6896 KEY searchkey; 6897 traverse_ptr tp; 6898 ROOT_ITEM* ri; 6899 6900 searchkey.obj_id = r->id; 6901 searchkey.obj_type = TYPE_ROOT_ITEM; 6902 searchkey.offset = 0xffffffffffffffff; 6903 6904 Status = find_item(Vcb, Vcb->root_root, &tp, &searchkey, false, Irp); 6905 if (!NT_SUCCESS(Status)) { 6906 ERR("error - find_item returned %08x\n", Status); 6907 return Status; 6908 } 6909 6910 if (tp.item->key.obj_id != searchkey.obj_id || tp.item->key.obj_type != searchkey.obj_type) { 6911 ERR("could not find ROOT_ITEM for tree %I64x\n", searchkey.obj_id); 6912 return STATUS_INTERNAL_ERROR; 6913 } 6914 6915 ri = ExAllocatePoolWithTag(PagedPool, sizeof(ROOT_ITEM), ALLOC_TAG); 6916 if (!ri) { 6917 ERR("out of memory\n"); 6918 return STATUS_INSUFFICIENT_RESOURCES; 6919 } 6920 6921 RtlCopyMemory(ri, &r->root_item, sizeof(ROOT_ITEM)); 6922 6923 Status = delete_tree_item(Vcb, &tp); 6924 if (!NT_SUCCESS(Status)) { 6925 ERR("delete_tree_item returned %08x\n", Status); 6926 return Status; 6927 } 6928 6929 Status = insert_tree_item(Vcb, Vcb->root_root, tp.item->key.obj_id, tp.item->key.obj_type, tp.item->key.offset, ri, sizeof(ROOT_ITEM), NULL, Irp); 6930 if (!NT_SUCCESS(Status)) { 6931 ERR("insert_tree_item returned %08x\n", Status); 6932 return Status; 6933 } 6934 } 6935 6936 if (r->received) { 6937 KEY searchkey; 6938 traverse_ptr tp; 6939 6940 if (!Vcb->uuid_root) { 6941 root* uuid_root; 6942 6943 TRACE("uuid root doesn't exist, creating it\n"); 6944 6945 Status = create_root(Vcb, BTRFS_ROOT_UUID, &uuid_root, false, 0, Irp); 6946 6947 if (!NT_SUCCESS(Status)) { 6948 ERR("create_root returned %08x\n", Status); 6949 return Status; 6950 } 6951 6952 Vcb->uuid_root = uuid_root; 6953 } 6954 6955 RtlCopyMemory(&searchkey.obj_id, &r->root_item.received_uuid, sizeof(uint64_t)); 6956 searchkey.obj_type = TYPE_SUBVOL_REC_UUID; 6957 RtlCopyMemory(&searchkey.offset, &r->root_item.received_uuid.uuid[sizeof(uint64_t)], sizeof(uint64_t)); 6958 6959 Status = find_item(Vcb, Vcb->uuid_root, &tp, &searchkey, false, Irp); 6960 if (!NT_SUCCESS(Status)) { 6961 ERR("find_item returned %08x\n", Status); 6962 return Status; 6963 } 6964 6965 if (!keycmp(tp.item->key, searchkey)) { 6966 if (tp.item->size + sizeof(uint64_t) <= Vcb->superblock.node_size - sizeof(tree_header) - sizeof(leaf_node)) { 6967 uint64_t* ids; 6968 6969 ids = ExAllocatePoolWithTag(PagedPool, tp.item->size + sizeof(uint64_t), ALLOC_TAG); 6970 if (!ids) { 6971 ERR("out of memory\n"); 6972 return STATUS_INSUFFICIENT_RESOURCES; 6973 } 6974 6975 RtlCopyMemory(ids, tp.item->data, tp.item->size); 6976 RtlCopyMemory((uint8_t*)ids + tp.item->size, &r->id, sizeof(uint64_t)); 6977 6978 Status = delete_tree_item(Vcb, &tp); 6979 if (!NT_SUCCESS(Status)) { 6980 ERR("delete_tree_item returned %08x\n", Status); 6981 ExFreePool(ids); 6982 return Status; 6983 } 6984 6985 Status = insert_tree_item(Vcb, Vcb->uuid_root, searchkey.obj_id, searchkey.obj_type, searchkey.offset, ids, tp.item->size + sizeof(uint64_t), NULL, Irp); 6986 if (!NT_SUCCESS(Status)) { 6987 ERR("insert_tree_item returned %08x\n", Status); 6988 ExFreePool(ids); 6989 return Status; 6990 } 6991 } 6992 } else { 6993 uint64_t* root_num; 6994 6995 root_num = ExAllocatePoolWithTag(PagedPool, sizeof(uint64_t), ALLOC_TAG); 6996 if (!root_num) { 6997 ERR("out of memory\n"); 6998 return STATUS_INSUFFICIENT_RESOURCES; 6999 } 7000 7001 *root_num = r->id; 7002 7003 Status = insert_tree_item(Vcb, Vcb->uuid_root, searchkey.obj_id, searchkey.obj_type, searchkey.offset, root_num, sizeof(uint64_t), NULL, Irp); 7004 if (!NT_SUCCESS(Status)) { 7005 ERR("insert_tree_item returned %08x\n", Status); 7006 ExFreePool(root_num); 7007 return Status; 7008 } 7009 } 7010 7011 r->received = false; 7012 } 7013 7014 r->dirty = false; 7015 7016 return STATUS_SUCCESS; 7017 } 7018 7019 static NTSTATUS test_not_full(device_extension* Vcb) { 7020 uint64_t reserve, could_alloc, free_space; 7021 LIST_ENTRY* le; 7022 7023 // This function ensures we drop into readonly mode if we're about to leave very little 7024 // space for metadata - this is similar to the "global reserve" of the Linux driver. 7025 // Otherwise we might completely fill our space, at which point due to COW we can't 7026 // delete anything in order to fix this. 7027 7028 reserve = Vcb->extent_root->root_item.bytes_used; 7029 reserve += Vcb->root_root->root_item.bytes_used; 7030 if (Vcb->checksum_root) reserve += Vcb->checksum_root->root_item.bytes_used; 7031 7032 reserve = max(reserve, 0x1000000); // 16 M 7033 reserve = min(reserve, 0x20000000); // 512 M 7034 7035 // Find out how much space would be available for new metadata chunks 7036 7037 could_alloc = 0; 7038 7039 if (Vcb->metadata_flags & BLOCK_FLAG_RAID5) { 7040 uint64_t s1 = 0, s2 = 0, s3 = 0; 7041 7042 le = Vcb->devices.Flink; 7043 while (le != &Vcb->devices) { 7044 device* dev = CONTAINING_RECORD(le, device, list_entry); 7045 7046 if (!dev->readonly) { 7047 uint64_t space = dev->devitem.num_bytes - dev->devitem.bytes_used; 7048 7049 if (space >= s1) { 7050 s3 = s2; 7051 s2 = s1; 7052 s1 = space; 7053 } else if (space >= s2) { 7054 s3 = s2; 7055 s2 = space; 7056 } else if (space >= s3) 7057 s3 = space; 7058 } 7059 7060 le = le->Flink; 7061 } 7062 7063 could_alloc = s3 * 2; 7064 } else if (Vcb->metadata_flags & (BLOCK_FLAG_RAID10 | BLOCK_FLAG_RAID6)) { 7065 uint64_t s1 = 0, s2 = 0, s3 = 0, s4 = 0; 7066 7067 le = Vcb->devices.Flink; 7068 while (le != &Vcb->devices) { 7069 device* dev = CONTAINING_RECORD(le, device, list_entry); 7070 7071 if (!dev->readonly) { 7072 uint64_t space = dev->devitem.num_bytes - dev->devitem.bytes_used; 7073 7074 if (space >= s1) { 7075 s4 = s3; 7076 s3 = s2; 7077 s2 = s1; 7078 s1 = space; 7079 } else if (space >= s2) { 7080 s4 = s3; 7081 s3 = s2; 7082 s2 = space; 7083 } else if (space >= s3) { 7084 s4 = s3; 7085 s3 = space; 7086 } else if (space >= s4) 7087 s4 = space; 7088 } 7089 7090 le = le->Flink; 7091 } 7092 7093 could_alloc = s4 * 2; 7094 } else if (Vcb->metadata_flags & (BLOCK_FLAG_RAID0 | BLOCK_FLAG_RAID1)) { 7095 uint64_t s1 = 0, s2 = 0; 7096 7097 le = Vcb->devices.Flink; 7098 while (le != &Vcb->devices) { 7099 device* dev = CONTAINING_RECORD(le, device, list_entry); 7100 7101 if (!dev->readonly) { 7102 uint64_t space = dev->devitem.num_bytes - dev->devitem.bytes_used; 7103 7104 if (space >= s1) { 7105 s2 = s1; 7106 s1 = space; 7107 } else if (space >= s2) 7108 s2 = space; 7109 } 7110 7111 le = le->Flink; 7112 } 7113 7114 if (Vcb->metadata_flags & BLOCK_FLAG_RAID1) 7115 could_alloc = s2; 7116 else // RAID0 7117 could_alloc = s2 * 2; 7118 } else if (Vcb->metadata_flags & BLOCK_FLAG_DUPLICATE) { 7119 le = Vcb->devices.Flink; 7120 while (le != &Vcb->devices) { 7121 device* dev = CONTAINING_RECORD(le, device, list_entry); 7122 7123 if (!dev->readonly) { 7124 uint64_t space = (dev->devitem.num_bytes - dev->devitem.bytes_used) / 2; 7125 7126 could_alloc = max(could_alloc, space); 7127 } 7128 7129 le = le->Flink; 7130 } 7131 } else { // SINGLE 7132 le = Vcb->devices.Flink; 7133 while (le != &Vcb->devices) { 7134 device* dev = CONTAINING_RECORD(le, device, list_entry); 7135 7136 if (!dev->readonly) { 7137 uint64_t space = dev->devitem.num_bytes - dev->devitem.bytes_used; 7138 7139 could_alloc = max(could_alloc, space); 7140 } 7141 7142 le = le->Flink; 7143 } 7144 } 7145 7146 if (could_alloc >= reserve) 7147 return STATUS_SUCCESS; 7148 7149 free_space = 0; 7150 7151 le = Vcb->chunks.Flink; 7152 while (le != &Vcb->chunks) { 7153 chunk* c = CONTAINING_RECORD(le, chunk, list_entry); 7154 7155 if (!c->reloc && !c->readonly && c->chunk_item->type & BLOCK_FLAG_METADATA) { 7156 free_space += c->chunk_item->size - c->used; 7157 7158 if (free_space + could_alloc >= reserve) 7159 return STATUS_SUCCESS; 7160 } 7161 7162 le = le->Flink; 7163 } 7164 7165 return STATUS_DISK_FULL; 7166 } 7167 7168 static NTSTATUS check_for_orphans_root(device_extension* Vcb, root* r, PIRP Irp) { 7169 NTSTATUS Status; 7170 KEY searchkey; 7171 traverse_ptr tp; 7172 LIST_ENTRY rollback; 7173 7174 TRACE("(%p, %p)\n", Vcb, r); 7175 7176 InitializeListHead(&rollback); 7177 7178 searchkey.obj_id = BTRFS_ORPHAN_INODE_OBJID; 7179 searchkey.obj_type = TYPE_ORPHAN_INODE; 7180 searchkey.offset = 0; 7181 7182 Status = find_item(Vcb, r, &tp, &searchkey, false, Irp); 7183 if (!NT_SUCCESS(Status)) { 7184 ERR("find_item returned %08x\n", Status); 7185 return Status; 7186 } 7187 7188 do { 7189 traverse_ptr next_tp; 7190 7191 if (tp.item->key.obj_id > searchkey.obj_id || (tp.item->key.obj_id == searchkey.obj_id && tp.item->key.obj_type > searchkey.obj_type)) 7192 break; 7193 7194 if (tp.item->key.obj_id == searchkey.obj_id && tp.item->key.obj_type == searchkey.obj_type) { 7195 fcb* fcb; 7196 7197 TRACE("removing orphaned inode %I64x\n", tp.item->key.offset); 7198 7199 Status = open_fcb(Vcb, r, tp.item->key.offset, 0, NULL, false, NULL, &fcb, PagedPool, Irp); 7200 if (!NT_SUCCESS(Status)) 7201 ERR("open_fcb returned %08x\n", Status); 7202 else { 7203 if (fcb->inode_item.st_nlink == 0) { 7204 if (fcb->type != BTRFS_TYPE_DIRECTORY && fcb->inode_item.st_size > 0) { 7205 Status = excise_extents(Vcb, fcb, 0, sector_align(fcb->inode_item.st_size, Vcb->superblock.sector_size), Irp, &rollback); 7206 if (!NT_SUCCESS(Status)) { 7207 ERR("excise_extents returned %08x\n", Status); 7208 goto end; 7209 } 7210 } 7211 7212 fcb->deleted = true; 7213 7214 mark_fcb_dirty(fcb); 7215 } 7216 7217 free_fcb(fcb); 7218 7219 Status = delete_tree_item(Vcb, &tp); 7220 if (!NT_SUCCESS(Status)) { 7221 ERR("delete_tree_item returned %08x\n", Status); 7222 goto end; 7223 } 7224 } 7225 } 7226 7227 if (find_next_item(Vcb, &tp, &next_tp, false, Irp)) 7228 tp = next_tp; 7229 else 7230 break; 7231 } while (true); 7232 7233 Status = STATUS_SUCCESS; 7234 7235 clear_rollback(&rollback); 7236 7237 end: 7238 do_rollback(Vcb, &rollback); 7239 7240 return Status; 7241 } 7242 7243 static NTSTATUS check_for_orphans(device_extension* Vcb, PIRP Irp) { 7244 NTSTATUS Status; 7245 LIST_ENTRY* le; 7246 7247 if (IsListEmpty(&Vcb->dirty_filerefs)) 7248 return STATUS_SUCCESS; 7249 7250 le = Vcb->dirty_filerefs.Flink; 7251 while (le != &Vcb->dirty_filerefs) { 7252 file_ref* fr = CONTAINING_RECORD(le, file_ref, list_entry_dirty); 7253 7254 if (!fr->fcb->subvol->checked_for_orphans) { 7255 Status = check_for_orphans_root(Vcb, fr->fcb->subvol, Irp); 7256 if (!NT_SUCCESS(Status)) { 7257 ERR("check_for_orphans_root returned %08x\n", Status); 7258 return Status; 7259 } 7260 7261 fr->fcb->subvol->checked_for_orphans = true; 7262 } 7263 7264 le = le->Flink; 7265 } 7266 7267 return STATUS_SUCCESS; 7268 } 7269 7270 static NTSTATUS do_write2(device_extension* Vcb, PIRP Irp, LIST_ENTRY* rollback) { 7271 NTSTATUS Status; 7272 LIST_ENTRY *le, batchlist; 7273 bool cache_changed = false; 7274 volume_device_extension* vde; 7275 bool no_cache = false; 7276 #ifdef DEBUG_FLUSH_TIMES 7277 uint64_t filerefs = 0, fcbs = 0; 7278 LARGE_INTEGER freq, time1, time2; 7279 #endif 7280 #ifdef DEBUG_WRITE_LOOPS 7281 UINT loops = 0; 7282 #endif 7283 7284 TRACE("(%p)\n", Vcb); 7285 7286 InitializeListHead(&batchlist); 7287 7288 #ifdef DEBUG_FLUSH_TIMES 7289 time1 = KeQueryPerformanceCounter(&freq); 7290 #endif 7291 7292 Status = check_for_orphans(Vcb, Irp); 7293 if (!NT_SUCCESS(Status)) { 7294 ERR("check_for_orphans returned %08x\n", Status); 7295 return Status; 7296 } 7297 7298 ExAcquireResourceExclusiveLite(&Vcb->dirty_filerefs_lock, true); 7299 7300 while (!IsListEmpty(&Vcb->dirty_filerefs)) { 7301 file_ref* fr = CONTAINING_RECORD(RemoveHeadList(&Vcb->dirty_filerefs), file_ref, list_entry_dirty); 7302 7303 flush_fileref(fr, &batchlist, Irp); 7304 free_fileref(fr); 7305 7306 #ifdef DEBUG_FLUSH_TIMES 7307 filerefs++; 7308 #endif 7309 } 7310 7311 ExReleaseResourceLite(&Vcb->dirty_filerefs_lock); 7312 7313 Status = commit_batch_list(Vcb, &batchlist, Irp); 7314 if (!NT_SUCCESS(Status)) { 7315 ERR("commit_batch_list returned %08x\n", Status); 7316 return Status; 7317 } 7318 7319 #ifdef DEBUG_FLUSH_TIMES 7320 time2 = KeQueryPerformanceCounter(NULL); 7321 7322 ERR("flushed %I64u filerefs in %I64u (freq = %I64u)\n", filerefs, time2.QuadPart - time1.QuadPart, freq.QuadPart); 7323 7324 time1 = KeQueryPerformanceCounter(&freq); 7325 #endif 7326 7327 // We process deleted streams first, so we don't run over our xattr 7328 // limit unless we absolutely have to. 7329 // We also process deleted normal files, to avoid any problems 7330 // caused by inode collisions. 7331 7332 ExAcquireResourceExclusiveLite(&Vcb->dirty_fcbs_lock, true); 7333 7334 le = Vcb->dirty_fcbs.Flink; 7335 while (le != &Vcb->dirty_fcbs) { 7336 fcb* fcb = CONTAINING_RECORD(le, struct _fcb, list_entry_dirty); 7337 LIST_ENTRY* le2 = le->Flink; 7338 7339 if (fcb->deleted) { 7340 ExAcquireResourceExclusiveLite(fcb->Header.Resource, true); 7341 Status = flush_fcb(fcb, false, &batchlist, Irp); 7342 ExReleaseResourceLite(fcb->Header.Resource); 7343 7344 free_fcb(fcb); 7345 7346 if (!NT_SUCCESS(Status)) { 7347 ERR("flush_fcb returned %08x\n", Status); 7348 clear_batch_list(Vcb, &batchlist); 7349 ExReleaseResourceLite(&Vcb->dirty_fcbs_lock); 7350 return Status; 7351 } 7352 7353 #ifdef DEBUG_FLUSH_TIMES 7354 fcbs++; 7355 #endif 7356 } 7357 7358 le = le2; 7359 } 7360 7361 Status = commit_batch_list(Vcb, &batchlist, Irp); 7362 if (!NT_SUCCESS(Status)) { 7363 ERR("commit_batch_list returned %08x\n", Status); 7364 ExReleaseResourceLite(&Vcb->dirty_fcbs_lock); 7365 return Status; 7366 } 7367 7368 le = Vcb->dirty_fcbs.Flink; 7369 while (le != &Vcb->dirty_fcbs) { 7370 fcb* fcb = CONTAINING_RECORD(le, struct _fcb, list_entry_dirty); 7371 LIST_ENTRY* le2 = le->Flink; 7372 7373 if (fcb->subvol != Vcb->root_root) { 7374 ExAcquireResourceExclusiveLite(fcb->Header.Resource, true); 7375 Status = flush_fcb(fcb, false, &batchlist, Irp); 7376 ExReleaseResourceLite(fcb->Header.Resource); 7377 free_fcb(fcb); 7378 7379 if (!NT_SUCCESS(Status)) { 7380 ERR("flush_fcb returned %08x\n", Status); 7381 ExReleaseResourceLite(&Vcb->dirty_fcbs_lock); 7382 return Status; 7383 } 7384 7385 #ifdef DEBUG_FLUSH_TIMES 7386 fcbs++; 7387 #endif 7388 } 7389 7390 le = le2; 7391 } 7392 7393 ExReleaseResourceLite(&Vcb->dirty_fcbs_lock); 7394 7395 Status = commit_batch_list(Vcb, &batchlist, Irp); 7396 if (!NT_SUCCESS(Status)) { 7397 ERR("commit_batch_list returned %08x\n", Status); 7398 return Status; 7399 } 7400 7401 #ifdef DEBUG_FLUSH_TIMES 7402 time2 = KeQueryPerformanceCounter(NULL); 7403 7404 ERR("flushed %I64u fcbs in %I64u (freq = %I64u)\n", filerefs, time2.QuadPart - time1.QuadPart, freq.QuadPart); 7405 #endif 7406 7407 // no need to get dirty_subvols_lock here, as we have tree_lock exclusively 7408 while (!IsListEmpty(&Vcb->dirty_subvols)) { 7409 root* r = CONTAINING_RECORD(RemoveHeadList(&Vcb->dirty_subvols), root, list_entry_dirty); 7410 7411 Status = flush_subvol(Vcb, r, Irp); 7412 if (!NT_SUCCESS(Status)) { 7413 ERR("flush_subvol returned %08x\n", Status); 7414 return Status; 7415 } 7416 } 7417 7418 if (!IsListEmpty(&Vcb->drop_roots)) { 7419 Status = drop_roots(Vcb, Irp, rollback); 7420 7421 if (!NT_SUCCESS(Status)) { 7422 ERR("drop_roots returned %08x\n", Status); 7423 return Status; 7424 } 7425 } 7426 7427 Status = update_chunks(Vcb, &batchlist, Irp, rollback); 7428 7429 if (!NT_SUCCESS(Status)) { 7430 ERR("update_chunks returned %08x\n", Status); 7431 return Status; 7432 } 7433 7434 Status = commit_batch_list(Vcb, &batchlist, Irp); 7435 7436 // If only changing superblock, e.g. changing label, we still need to rewrite 7437 // the root tree so the generations match, otherwise you won't be able to mount on Linux. 7438 if (!Vcb->root_root->treeholder.tree || !Vcb->root_root->treeholder.tree->write) { 7439 KEY searchkey; 7440 7441 traverse_ptr tp; 7442 7443 searchkey.obj_id = 0; 7444 searchkey.obj_type = 0; 7445 searchkey.offset = 0; 7446 7447 Status = find_item(Vcb, Vcb->root_root, &tp, &searchkey, false, Irp); 7448 if (!NT_SUCCESS(Status)) { 7449 ERR("error - find_item returned %08x\n", Status); 7450 return Status; 7451 } 7452 7453 Vcb->root_root->treeholder.tree->write = true; 7454 } 7455 7456 // make sure we always update the extent tree 7457 Status = add_root_item_to_cache(Vcb, BTRFS_ROOT_EXTENT, Irp); 7458 if (!NT_SUCCESS(Status)) { 7459 ERR("add_root_item_to_cache returned %08x\n", Status); 7460 return Status; 7461 } 7462 7463 if (Vcb->stats_changed) { 7464 le = Vcb->devices.Flink; 7465 while (le != &Vcb->devices) { 7466 device* dev = CONTAINING_RECORD(le, device, list_entry); 7467 7468 if (dev->stats_changed) { 7469 Status = flush_changed_dev_stats(Vcb, dev, Irp); 7470 if (!NT_SUCCESS(Status)) { 7471 ERR("flush_changed_dev_stats returned %08x\n", Status); 7472 return Status; 7473 } 7474 dev->stats_changed = false; 7475 } 7476 7477 le = le->Flink; 7478 } 7479 7480 Vcb->stats_changed = false; 7481 } 7482 7483 do { 7484 Status = add_parents(Vcb, Irp); 7485 if (!NT_SUCCESS(Status)) { 7486 ERR("add_parents returned %08x\n", Status); 7487 goto end; 7488 } 7489 7490 Status = allocate_tree_extents(Vcb, Irp, rollback); 7491 if (!NT_SUCCESS(Status)) { 7492 ERR("allocate_tree_extents returned %08x\n", Status); 7493 goto end; 7494 } 7495 7496 Status = do_splits(Vcb, Irp, rollback); 7497 if (!NT_SUCCESS(Status)) { 7498 ERR("do_splits returned %08x\n", Status); 7499 goto end; 7500 } 7501 7502 Status = update_chunk_usage(Vcb, Irp, rollback); 7503 if (!NT_SUCCESS(Status)) { 7504 ERR("update_chunk_usage returned %08x\n", Status); 7505 goto end; 7506 } 7507 7508 if (!(Vcb->superblock.compat_ro_flags & BTRFS_COMPAT_RO_FLAGS_FREE_SPACE_CACHE)) { 7509 if (!no_cache) { 7510 Status = allocate_cache(Vcb, &cache_changed, Irp, rollback); 7511 if (!NT_SUCCESS(Status)) { 7512 WARN("allocate_cache returned %08x\n", Status); 7513 no_cache = true; 7514 cache_changed = false; 7515 } 7516 } 7517 } else { 7518 Status = update_chunk_caches_tree(Vcb, Irp); 7519 if (!NT_SUCCESS(Status)) { 7520 ERR("update_chunk_caches_tree returned %08x\n", Status); 7521 goto end; 7522 } 7523 } 7524 7525 #ifdef DEBUG_WRITE_LOOPS 7526 loops++; 7527 7528 if (cache_changed) 7529 ERR("cache has changed, looping again\n"); 7530 #endif 7531 } while (cache_changed || !trees_consistent(Vcb)); 7532 7533 #ifdef DEBUG_WRITE_LOOPS 7534 ERR("%u loops\n", loops); 7535 #endif 7536 7537 TRACE("trees consistent\n"); 7538 7539 Status = update_root_root(Vcb, no_cache, Irp, rollback); 7540 if (!NT_SUCCESS(Status)) { 7541 ERR("update_root_root returned %08x\n", Status); 7542 goto end; 7543 } 7544 7545 Status = write_trees(Vcb, Irp); 7546 if (!NT_SUCCESS(Status)) { 7547 ERR("write_trees returned %08x\n", Status); 7548 goto end; 7549 } 7550 7551 Status = test_not_full(Vcb); 7552 if (!NT_SUCCESS(Status)) { 7553 ERR("test_not_full returned %08x\n", Status); 7554 goto end; 7555 } 7556 7557 #ifdef DEBUG_PARANOID 7558 le = Vcb->trees.Flink; 7559 while (le != &Vcb->trees) { 7560 tree* t = CONTAINING_RECORD(le, tree, list_entry); 7561 KEY searchkey; 7562 traverse_ptr tp; 7563 7564 searchkey.obj_id = t->header.address; 7565 searchkey.obj_type = TYPE_METADATA_ITEM; 7566 searchkey.offset = 0xffffffffffffffff; 7567 7568 Status = find_item(Vcb, Vcb->extent_root, &tp, &searchkey, false, Irp); 7569 if (!NT_SUCCESS(Status)) { 7570 ERR("error - find_item returned %08x\n", Status); 7571 goto end; 7572 } 7573 7574 if (tp.item->key.obj_id != searchkey.obj_id || tp.item->key.obj_type != searchkey.obj_type) { 7575 searchkey.obj_id = t->header.address; 7576 searchkey.obj_type = TYPE_EXTENT_ITEM; 7577 searchkey.offset = 0xffffffffffffffff; 7578 7579 Status = find_item(Vcb, Vcb->extent_root, &tp, &searchkey, false, Irp); 7580 if (!NT_SUCCESS(Status)) { 7581 ERR("error - find_item returned %08x\n", Status); 7582 goto end; 7583 } 7584 7585 if (tp.item->key.obj_id != searchkey.obj_id || tp.item->key.obj_type != searchkey.obj_type) { 7586 ERR("error - could not find entry in extent tree for tree at %I64x\n", t->header.address); 7587 Status = STATUS_INTERNAL_ERROR; 7588 goto end; 7589 } 7590 } 7591 7592 le = le->Flink; 7593 } 7594 #endif 7595 7596 Vcb->superblock.cache_generation = Vcb->superblock.generation; 7597 7598 if (!Vcb->options.no_barrier) 7599 flush_disk_caches(Vcb); 7600 7601 Status = write_superblocks(Vcb, Irp); 7602 if (!NT_SUCCESS(Status)) { 7603 ERR("write_superblocks returned %08x\n", Status); 7604 goto end; 7605 } 7606 7607 vde = Vcb->vde; 7608 7609 if (vde) { 7610 pdo_device_extension* pdode = vde->pdode; 7611 7612 ExAcquireResourceSharedLite(&pdode->child_lock, true); 7613 7614 le = pdode->children.Flink; 7615 7616 while (le != &pdode->children) { 7617 volume_child* vc = CONTAINING_RECORD(le, volume_child, list_entry); 7618 7619 vc->generation = Vcb->superblock.generation; 7620 le = le->Flink; 7621 } 7622 7623 ExReleaseResourceLite(&pdode->child_lock); 7624 } 7625 7626 clean_space_cache(Vcb); 7627 7628 le = Vcb->chunks.Flink; 7629 while (le != &Vcb->chunks) { 7630 chunk* c = CONTAINING_RECORD(le, chunk, list_entry); 7631 7632 c->changed = false; 7633 c->space_changed = false; 7634 7635 le = le->Flink; 7636 } 7637 7638 Vcb->superblock.generation++; 7639 7640 Status = STATUS_SUCCESS; 7641 7642 le = Vcb->trees.Flink; 7643 while (le != &Vcb->trees) { 7644 tree* t = CONTAINING_RECORD(le, tree, list_entry); 7645 7646 t->write = false; 7647 7648 le = le->Flink; 7649 } 7650 7651 Vcb->need_write = false; 7652 7653 while (!IsListEmpty(&Vcb->drop_roots)) { 7654 root* r = CONTAINING_RECORD(RemoveHeadList(&Vcb->drop_roots), root, list_entry); 7655 7656 ExDeleteResourceLite(&r->nonpaged->load_tree_lock); 7657 ExFreePool(r->nonpaged); 7658 ExFreePool(r); 7659 } 7660 7661 end: 7662 TRACE("do_write returning %08x\n", Status); 7663 7664 return Status; 7665 } 7666 7667 NTSTATUS do_write(device_extension* Vcb, PIRP Irp) { 7668 LIST_ENTRY rollback; 7669 NTSTATUS Status; 7670 7671 InitializeListHead(&rollback); 7672 7673 Status = do_write2(Vcb, Irp, &rollback); 7674 7675 if (!NT_SUCCESS(Status)) { 7676 ERR("do_write2 returned %08x, dropping into readonly mode\n", Status); 7677 Vcb->readonly = true; 7678 FsRtlNotifyVolumeEvent(Vcb->root_file, FSRTL_VOLUME_FORCED_CLOSED); 7679 do_rollback(Vcb, &rollback); 7680 } else 7681 clear_rollback(&rollback); 7682 7683 return Status; 7684 } 7685 7686 static void do_flush(device_extension* Vcb) { 7687 NTSTATUS Status; 7688 7689 ExAcquireResourceExclusiveLite(&Vcb->tree_lock, true); 7690 7691 if (Vcb->need_write && !Vcb->readonly) 7692 Status = do_write(Vcb, NULL); 7693 else 7694 Status = STATUS_SUCCESS; 7695 7696 free_trees(Vcb); 7697 7698 if (!NT_SUCCESS(Status)) 7699 ERR("do_write returned %08x\n", Status); 7700 7701 ExReleaseResourceLite(&Vcb->tree_lock); 7702 } 7703 7704 _Function_class_(KSTART_ROUTINE) 7705 void __stdcall flush_thread(void* context) { 7706 DEVICE_OBJECT* devobj = context; 7707 device_extension* Vcb = devobj->DeviceExtension; 7708 LARGE_INTEGER due_time; 7709 7710 ObReferenceObject(devobj); 7711 7712 KeInitializeTimer(&Vcb->flush_thread_timer); 7713 7714 due_time.QuadPart = (uint64_t)Vcb->options.flush_interval * -10000000; 7715 7716 KeSetTimer(&Vcb->flush_thread_timer, due_time, NULL); 7717 7718 while (true) { 7719 KeWaitForSingleObject(&Vcb->flush_thread_timer, Executive, KernelMode, false, NULL); 7720 7721 if (!(devobj->Vpb->Flags & VPB_MOUNTED) || Vcb->removing) 7722 break; 7723 7724 if (!Vcb->locked) 7725 do_flush(Vcb); 7726 7727 KeSetTimer(&Vcb->flush_thread_timer, due_time, NULL); 7728 } 7729 7730 ObDereferenceObject(devobj); 7731 KeCancelTimer(&Vcb->flush_thread_timer); 7732 7733 KeSetEvent(&Vcb->flush_thread_finished, 0, false); 7734 7735 PsTerminateSystemThread(STATUS_SUCCESS); 7736 } 7737