1 /* Copyright (c) Mark Harmstone 2016-17 2 * 3 * This file is part of WinBtrfs. 4 * 5 * WinBtrfs is free software: you can redistribute it and/or modify 6 * it under the terms of the GNU Lesser General Public Licence as published by 7 * the Free Software Foundation, either version 3 of the Licence, or 8 * (at your option) any later version. 9 * 10 * WinBtrfs is distributed in the hope that it will be useful, 11 * but WITHOUT ANY WARRANTY; without even the implied warranty of 12 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 13 * GNU Lesser General Public Licence for more details. 14 * 15 * You should have received a copy of the GNU Lesser General Public Licence 16 * along with WinBtrfs. If not, see <http://www.gnu.org/licenses/>. */ 17 18 #include "btrfs_drv.h" 19 #include "btrfsioctl.h" 20 #include <ntddstor.h> 21 22 typedef struct { 23 UINT64 address; 24 UINT64 new_address; 25 tree_header* data; 26 EXTENT_ITEM* ei; 27 tree* t; 28 BOOL system; 29 LIST_ENTRY refs; 30 LIST_ENTRY list_entry; 31 } metadata_reloc; 32 33 typedef struct { 34 UINT8 type; 35 UINT64 hash; 36 37 union { 38 TREE_BLOCK_REF tbr; 39 SHARED_BLOCK_REF sbr; 40 }; 41 42 metadata_reloc* parent; 43 BOOL top; 44 LIST_ENTRY list_entry; 45 } metadata_reloc_ref; 46 47 typedef struct { 48 UINT64 address; 49 UINT64 size; 50 UINT64 new_address; 51 chunk* newchunk; 52 EXTENT_ITEM* ei; 53 LIST_ENTRY refs; 54 LIST_ENTRY list_entry; 55 } data_reloc; 56 57 typedef struct { 58 UINT8 type; 59 UINT64 hash; 60 61 union { 62 EXTENT_DATA_REF edr; 63 SHARED_DATA_REF sdr; 64 }; 65 66 metadata_reloc* parent; 67 LIST_ENTRY list_entry; 68 } data_reloc_ref; 69 70 #ifndef __REACTOS__ 71 #ifndef _MSC_VER // not in mingw yet 72 #define DEVICE_DSM_FLAG_TRIM_NOT_FS_ALLOCATED 0x80000000 73 #endif 74 #endif 75 76 #define BALANCE_UNIT 0x100000 // only read 1 MB at a time 77 78 static NTSTATUS add_metadata_reloc(_Requires_exclusive_lock_held_(_Curr_->tree_lock) device_extension* Vcb, LIST_ENTRY* items, traverse_ptr* tp, 79 BOOL skinny, metadata_reloc** mr2, chunk* c, LIST_ENTRY* rollback) { 80 NTSTATUS Status; 81 metadata_reloc* mr; 82 EXTENT_ITEM* ei; 83 UINT16 len; 84 UINT64 inline_rc; 85 UINT8* ptr; 86 87 mr = ExAllocatePoolWithTag(PagedPool, sizeof(metadata_reloc), ALLOC_TAG); 88 if (!mr) { 89 ERR("out of memory\n"); 90 return STATUS_INSUFFICIENT_RESOURCES; 91 } 92 93 mr->address = tp->item->key.obj_id; 94 mr->data = NULL; 95 mr->ei = (EXTENT_ITEM*)tp->item->data; 96 mr->system = FALSE; 97 InitializeListHead(&mr->refs); 98 99 Status = delete_tree_item(Vcb, tp); 100 if (!NT_SUCCESS(Status)) { 101 ERR("delete_tree_item returned %08x\n", Status); 102 ExFreePool(mr); 103 return Status; 104 } 105 106 if (!c) 107 c = get_chunk_from_address(Vcb, tp->item->key.obj_id); 108 109 if (c) { 110 ExAcquireResourceExclusiveLite(&c->lock, TRUE); 111 112 c->used -= Vcb->superblock.node_size; 113 114 space_list_add(c, tp->item->key.obj_id, Vcb->superblock.node_size, rollback); 115 116 ExReleaseResourceLite(&c->lock); 117 } 118 119 ei = (EXTENT_ITEM*)tp->item->data; 120 inline_rc = 0; 121 122 len = tp->item->size - sizeof(EXTENT_ITEM); 123 ptr = (UINT8*)tp->item->data + sizeof(EXTENT_ITEM); 124 if (!skinny) { 125 len -= sizeof(EXTENT_ITEM2); 126 ptr += sizeof(EXTENT_ITEM2); 127 } 128 129 while (len > 0) { 130 UINT8 secttype = *ptr; 131 UINT16 sectlen = secttype == TYPE_TREE_BLOCK_REF ? sizeof(TREE_BLOCK_REF) : (secttype == TYPE_SHARED_BLOCK_REF ? sizeof(SHARED_BLOCK_REF) : 0); 132 metadata_reloc_ref* ref; 133 134 len--; 135 136 if (sectlen > len) { 137 ERR("(%llx,%x,%llx): %x bytes left, expecting at least %x\n", tp->item->key.obj_id, tp->item->key.obj_type, tp->item->key.offset, len, sectlen); 138 return STATUS_INTERNAL_ERROR; 139 } 140 141 if (sectlen == 0) { 142 ERR("(%llx,%x,%llx): unrecognized extent type %x\n", tp->item->key.obj_id, tp->item->key.obj_type, tp->item->key.offset, secttype); 143 return STATUS_INTERNAL_ERROR; 144 } 145 146 ref = ExAllocatePoolWithTag(PagedPool, sizeof(metadata_reloc_ref), ALLOC_TAG); 147 if (!ref) { 148 ERR("out of memory\n"); 149 return STATUS_INSUFFICIENT_RESOURCES; 150 } 151 152 if (secttype == TYPE_TREE_BLOCK_REF) { 153 ref->type = TYPE_TREE_BLOCK_REF; 154 RtlCopyMemory(&ref->tbr, ptr + sizeof(UINT8), sizeof(TREE_BLOCK_REF)); 155 inline_rc++; 156 } else if (secttype == TYPE_SHARED_BLOCK_REF) { 157 ref->type = TYPE_SHARED_BLOCK_REF; 158 RtlCopyMemory(&ref->sbr, ptr + sizeof(UINT8), sizeof(SHARED_BLOCK_REF)); 159 inline_rc++; 160 } else { 161 ERR("unexpected tree type %x\n", secttype); 162 ExFreePool(ref); 163 return STATUS_INTERNAL_ERROR; 164 } 165 166 ref->parent = NULL; 167 ref->top = FALSE; 168 InsertTailList(&mr->refs, &ref->list_entry); 169 170 len -= sectlen; 171 ptr += sizeof(UINT8) + sectlen; 172 } 173 174 if (inline_rc < ei->refcount) { // look for non-inline entries 175 traverse_ptr tp2 = *tp, next_tp; 176 177 while (find_next_item(Vcb, &tp2, &next_tp, FALSE, NULL)) { 178 tp2 = next_tp; 179 180 if (tp2.item->key.obj_id == tp->item->key.obj_id) { 181 if (tp2.item->key.obj_type == TYPE_TREE_BLOCK_REF) { 182 metadata_reloc_ref* ref = ExAllocatePoolWithTag(PagedPool, sizeof(metadata_reloc_ref), ALLOC_TAG); 183 if (!ref) { 184 ERR("out of memory\n"); 185 return STATUS_INSUFFICIENT_RESOURCES; 186 } 187 188 ref->type = TYPE_TREE_BLOCK_REF; 189 ref->tbr.offset = tp2.item->key.offset; 190 ref->parent = NULL; 191 ref->top = FALSE; 192 InsertTailList(&mr->refs, &ref->list_entry); 193 194 Status = delete_tree_item(Vcb, &tp2); 195 if (!NT_SUCCESS(Status)) { 196 ERR("delete_tree_item returned %08x\n", Status); 197 return Status; 198 } 199 } else if (tp2.item->key.obj_type == TYPE_SHARED_BLOCK_REF) { 200 metadata_reloc_ref* ref = ExAllocatePoolWithTag(PagedPool, sizeof(metadata_reloc_ref), ALLOC_TAG); 201 if (!ref) { 202 ERR("out of memory\n"); 203 return STATUS_INSUFFICIENT_RESOURCES; 204 } 205 206 ref->type = TYPE_SHARED_BLOCK_REF; 207 ref->sbr.offset = tp2.item->key.offset; 208 ref->parent = NULL; 209 ref->top = FALSE; 210 InsertTailList(&mr->refs, &ref->list_entry); 211 212 Status = delete_tree_item(Vcb, &tp2); 213 if (!NT_SUCCESS(Status)) { 214 ERR("delete_tree_item returned %08x\n", Status); 215 return Status; 216 } 217 } 218 } else 219 break; 220 } 221 } 222 223 InsertTailList(items, &mr->list_entry); 224 225 if (mr2) 226 *mr2 = mr; 227 228 return STATUS_SUCCESS; 229 } 230 231 static NTSTATUS add_metadata_reloc_parent(_Requires_exclusive_lock_held_(_Curr_->tree_lock) device_extension* Vcb, LIST_ENTRY* items, 232 UINT64 address, metadata_reloc** mr2, LIST_ENTRY* rollback) { 233 LIST_ENTRY* le; 234 KEY searchkey; 235 traverse_ptr tp; 236 BOOL skinny = FALSE; 237 NTSTATUS Status; 238 239 le = items->Flink; 240 while (le != items) { 241 metadata_reloc* mr = CONTAINING_RECORD(le, metadata_reloc, list_entry); 242 243 if (mr->address == address) { 244 *mr2 = mr; 245 return STATUS_SUCCESS; 246 } 247 248 le = le->Flink; 249 } 250 251 searchkey.obj_id = address; 252 searchkey.obj_type = TYPE_METADATA_ITEM; 253 searchkey.offset = 0xffffffffffffffff; 254 255 Status = find_item(Vcb, Vcb->extent_root, &tp, &searchkey, FALSE, NULL); 256 if (!NT_SUCCESS(Status)) { 257 ERR("find_item returned %08x\n", Status); 258 return Status; 259 } 260 261 if (tp.item->key.obj_id == address && tp.item->key.obj_type == TYPE_METADATA_ITEM && tp.item->size >= sizeof(EXTENT_ITEM)) 262 skinny = TRUE; 263 else if (tp.item->key.obj_id == address && tp.item->key.obj_type == TYPE_EXTENT_ITEM && tp.item->key.offset == Vcb->superblock.node_size && 264 tp.item->size >= sizeof(EXTENT_ITEM)) { 265 EXTENT_ITEM* ei = (EXTENT_ITEM*)tp.item->data; 266 267 if (!(ei->flags & EXTENT_ITEM_TREE_BLOCK)) { 268 ERR("EXTENT_ITEM for %llx found, but tree flag not set\n", address); 269 return STATUS_INTERNAL_ERROR; 270 } 271 } else { 272 ERR("could not find valid EXTENT_ITEM for address %llx\n", address); 273 return STATUS_INTERNAL_ERROR; 274 } 275 276 Status = add_metadata_reloc(Vcb, items, &tp, skinny, mr2, NULL, rollback); 277 if (!NT_SUCCESS(Status)) { 278 ERR("add_metadata_reloc returned %08x\n", Status); 279 return Status; 280 } 281 282 return STATUS_SUCCESS; 283 } 284 285 static void sort_metadata_reloc_refs(metadata_reloc* mr) { 286 LIST_ENTRY newlist, *le; 287 288 if (mr->refs.Flink == mr->refs.Blink) // 0 or 1 items 289 return; 290 291 // insertion sort 292 293 InitializeListHead(&newlist); 294 295 while (!IsListEmpty(&mr->refs)) { 296 metadata_reloc_ref* ref = CONTAINING_RECORD(RemoveHeadList(&mr->refs), metadata_reloc_ref, list_entry); 297 BOOL inserted = FALSE; 298 299 if (ref->type == TYPE_TREE_BLOCK_REF) 300 ref->hash = ref->tbr.offset; 301 else if (ref->type == TYPE_SHARED_BLOCK_REF) 302 ref->hash = ref->parent->new_address; 303 304 le = newlist.Flink; 305 while (le != &newlist) { 306 metadata_reloc_ref* ref2 = CONTAINING_RECORD(le, metadata_reloc_ref, list_entry); 307 308 if (ref->type < ref2->type || (ref->type == ref2->type && ref->hash > ref2->hash)) { 309 InsertHeadList(le->Blink, &ref->list_entry); 310 inserted = TRUE; 311 break; 312 } 313 314 le = le->Flink; 315 } 316 317 if (!inserted) 318 InsertTailList(&newlist, &ref->list_entry); 319 } 320 321 newlist.Flink->Blink = &mr->refs; 322 newlist.Blink->Flink = &mr->refs; 323 mr->refs.Flink = newlist.Flink; 324 mr->refs.Blink = newlist.Blink; 325 } 326 327 static NTSTATUS add_metadata_reloc_extent_item(_Requires_exclusive_lock_held_(_Curr_->tree_lock) device_extension* Vcb, metadata_reloc* mr) { 328 NTSTATUS Status; 329 LIST_ENTRY* le; 330 UINT64 rc = 0; 331 UINT16 inline_len; 332 BOOL all_inline = TRUE; 333 metadata_reloc_ref* first_noninline = NULL; 334 EXTENT_ITEM* ei; 335 UINT8* ptr; 336 337 inline_len = sizeof(EXTENT_ITEM); 338 if (!(Vcb->superblock.incompat_flags & BTRFS_INCOMPAT_FLAGS_SKINNY_METADATA)) 339 inline_len += sizeof(EXTENT_ITEM2); 340 341 sort_metadata_reloc_refs(mr); 342 343 le = mr->refs.Flink; 344 while (le != &mr->refs) { 345 metadata_reloc_ref* ref = CONTAINING_RECORD(le, metadata_reloc_ref, list_entry); 346 UINT16 extlen = 0; 347 348 rc++; 349 350 if (ref->type == TYPE_TREE_BLOCK_REF) 351 extlen += sizeof(TREE_BLOCK_REF); 352 else if (ref->type == TYPE_SHARED_BLOCK_REF) 353 extlen += sizeof(SHARED_BLOCK_REF); 354 355 if (all_inline) { 356 if ((ULONG)(inline_len + 1 + extlen) > (Vcb->superblock.node_size >> 2)) { 357 all_inline = FALSE; 358 first_noninline = ref; 359 } else 360 inline_len += extlen + 1; 361 } 362 363 le = le->Flink; 364 } 365 366 ei = ExAllocatePoolWithTag(PagedPool, inline_len, ALLOC_TAG); 367 if (!ei) { 368 ERR("out of memory\n"); 369 return STATUS_INSUFFICIENT_RESOURCES; 370 } 371 372 ei->refcount = rc; 373 ei->generation = mr->ei->generation; 374 ei->flags = mr->ei->flags; 375 ptr = (UINT8*)&ei[1]; 376 377 if (!(Vcb->superblock.incompat_flags & BTRFS_INCOMPAT_FLAGS_SKINNY_METADATA)) { 378 EXTENT_ITEM2* ei2 = (EXTENT_ITEM2*)ptr; 379 380 ei2->firstitem = *(KEY*)&mr->data[1]; 381 ei2->level = mr->data->level; 382 383 ptr += sizeof(EXTENT_ITEM2); 384 } 385 386 le = mr->refs.Flink; 387 while (le != &mr->refs) { 388 metadata_reloc_ref* ref = CONTAINING_RECORD(le, metadata_reloc_ref, list_entry); 389 390 if (ref == first_noninline) 391 break; 392 393 *ptr = ref->type; 394 ptr++; 395 396 if (ref->type == TYPE_TREE_BLOCK_REF) { 397 TREE_BLOCK_REF* tbr = (TREE_BLOCK_REF*)ptr; 398 399 tbr->offset = ref->tbr.offset; 400 401 ptr += sizeof(TREE_BLOCK_REF); 402 } else if (ref->type == TYPE_SHARED_BLOCK_REF) { 403 SHARED_BLOCK_REF* sbr = (SHARED_BLOCK_REF*)ptr; 404 405 sbr->offset = ref->parent->new_address; 406 407 ptr += sizeof(SHARED_BLOCK_REF); 408 } 409 410 le = le->Flink; 411 } 412 413 if (Vcb->superblock.incompat_flags & BTRFS_INCOMPAT_FLAGS_SKINNY_METADATA) 414 Status = insert_tree_item(Vcb, Vcb->extent_root, mr->new_address, TYPE_METADATA_ITEM, mr->data->level, ei, inline_len, NULL, NULL); 415 else 416 Status = insert_tree_item(Vcb, Vcb->extent_root, mr->new_address, TYPE_EXTENT_ITEM, Vcb->superblock.node_size, ei, inline_len, NULL, NULL); 417 418 if (!NT_SUCCESS(Status)) { 419 ERR("insert_tree_item returned %08x\n", Status); 420 ExFreePool(ei); 421 return Status; 422 } 423 424 if (!all_inline) { 425 le = &first_noninline->list_entry; 426 427 while (le != &mr->refs) { 428 metadata_reloc_ref* ref = CONTAINING_RECORD(le, metadata_reloc_ref, list_entry); 429 430 if (ref->type == TYPE_TREE_BLOCK_REF) { 431 Status = insert_tree_item(Vcb, Vcb->extent_root, mr->new_address, TYPE_TREE_BLOCK_REF, ref->tbr.offset, NULL, 0, NULL, NULL); 432 if (!NT_SUCCESS(Status)) { 433 ERR("insert_tree_item returned %08x\n", Status); 434 return Status; 435 } 436 } else if (ref->type == TYPE_SHARED_BLOCK_REF) { 437 Status = insert_tree_item(Vcb, Vcb->extent_root, mr->new_address, TYPE_SHARED_BLOCK_REF, ref->parent->new_address, NULL, 0, NULL, NULL); 438 if (!NT_SUCCESS(Status)) { 439 ERR("insert_tree_item returned %08x\n", Status); 440 return Status; 441 } 442 } 443 444 le = le->Flink; 445 } 446 } 447 448 if (ei->flags & EXTENT_ITEM_SHARED_BACKREFS || mr->data->flags & HEADER_FLAG_SHARED_BACKREF || !(mr->data->flags & HEADER_FLAG_MIXED_BACKREF)) { 449 if (mr->data->level > 0) { 450 UINT16 i; 451 internal_node* in = (internal_node*)&mr->data[1]; 452 453 for (i = 0; i < mr->data->num_items; i++) { 454 UINT64 sbrrc = find_extent_shared_tree_refcount(Vcb, in[i].address, mr->address, NULL); 455 456 if (sbrrc > 0) { 457 SHARED_BLOCK_REF sbr; 458 459 sbr.offset = mr->new_address; 460 461 Status = increase_extent_refcount(Vcb, in[i].address, Vcb->superblock.node_size, TYPE_SHARED_BLOCK_REF, &sbr, NULL, 0, NULL); 462 if (!NT_SUCCESS(Status)) { 463 ERR("increase_extent_refcount returned %08x\n", Status); 464 return Status; 465 } 466 467 sbr.offset = mr->address; 468 469 Status = decrease_extent_refcount(Vcb, in[i].address, Vcb->superblock.node_size, TYPE_SHARED_BLOCK_REF, &sbr, NULL, 0, 470 sbr.offset, FALSE, NULL); 471 if (!NT_SUCCESS(Status)) { 472 ERR("decrease_extent_refcount returned %08x\n", Status); 473 return Status; 474 } 475 } 476 } 477 } else { 478 UINT16 i; 479 leaf_node* ln = (leaf_node*)&mr->data[1]; 480 481 for (i = 0; i < mr->data->num_items; i++) { 482 if (ln[i].key.obj_type == TYPE_EXTENT_DATA && ln[i].size >= sizeof(EXTENT_DATA) - 1 + sizeof(EXTENT_DATA2)) { 483 EXTENT_DATA* ed = (EXTENT_DATA*)((UINT8*)mr->data + sizeof(tree_header) + ln[i].offset); 484 485 if (ed->type == EXTENT_TYPE_REGULAR || ed->type == EXTENT_TYPE_PREALLOC) { 486 EXTENT_DATA2* ed2 = (EXTENT_DATA2*)ed->data; 487 488 if (ed2->size > 0) { // not sparse 489 UINT32 sdrrc = find_extent_shared_data_refcount(Vcb, ed2->address, mr->address, NULL); 490 491 if (sdrrc > 0) { 492 SHARED_DATA_REF sdr; 493 chunk* c; 494 495 sdr.offset = mr->new_address; 496 sdr.count = sdrrc; 497 498 Status = increase_extent_refcount(Vcb, ed2->address, ed2->size, TYPE_SHARED_DATA_REF, &sdr, NULL, 0, NULL); 499 if (!NT_SUCCESS(Status)) { 500 ERR("increase_extent_refcount returned %08x\n", Status); 501 return Status; 502 } 503 504 sdr.offset = mr->address; 505 506 Status = decrease_extent_refcount(Vcb, ed2->address, ed2->size, TYPE_SHARED_DATA_REF, &sdr, NULL, 0, 507 sdr.offset, FALSE, NULL); 508 if (!NT_SUCCESS(Status)) { 509 ERR("decrease_extent_refcount returned %08x\n", Status); 510 return Status; 511 } 512 513 c = get_chunk_from_address(Vcb, ed2->address); 514 515 if (c) { 516 // check changed_extents 517 518 ExAcquireResourceExclusiveLite(&c->changed_extents_lock, TRUE); 519 520 le = c->changed_extents.Flink; 521 522 while (le != &c->changed_extents) { 523 changed_extent* ce = CONTAINING_RECORD(le, changed_extent, list_entry); 524 525 if (ce->address == ed2->address) { 526 LIST_ENTRY* le2; 527 528 le2 = ce->refs.Flink; 529 while (le2 != &ce->refs) { 530 changed_extent_ref* cer = CONTAINING_RECORD(le2, changed_extent_ref, list_entry); 531 532 if (cer->type == TYPE_SHARED_DATA_REF && cer->sdr.offset == mr->address) { 533 cer->sdr.offset = mr->new_address; 534 break; 535 } 536 537 le2 = le2->Flink; 538 } 539 540 le2 = ce->old_refs.Flink; 541 while (le2 != &ce->old_refs) { 542 changed_extent_ref* cer = CONTAINING_RECORD(le2, changed_extent_ref, list_entry); 543 544 if (cer->type == TYPE_SHARED_DATA_REF && cer->sdr.offset == mr->address) { 545 cer->sdr.offset = mr->new_address; 546 break; 547 } 548 549 le2 = le2->Flink; 550 } 551 552 break; 553 } 554 555 le = le->Flink; 556 } 557 558 ExReleaseResourceLite(&c->changed_extents_lock); 559 } 560 } 561 } 562 } 563 } 564 } 565 } 566 } 567 568 return STATUS_SUCCESS; 569 } 570 571 static NTSTATUS write_metadata_items(_Requires_exclusive_lock_held_(_Curr_->tree_lock) device_extension* Vcb, LIST_ENTRY* items, 572 LIST_ENTRY* data_items, chunk* c, LIST_ENTRY* rollback) { 573 LIST_ENTRY tree_writes, *le; 574 NTSTATUS Status; 575 traverse_ptr tp; 576 UINT8 level, max_level = 0; 577 chunk* newchunk = NULL; 578 579 InitializeListHead(&tree_writes); 580 581 le = items->Flink; 582 while (le != items) { 583 metadata_reloc* mr = CONTAINING_RECORD(le, metadata_reloc, list_entry); 584 LIST_ENTRY* le2; 585 chunk* pc; 586 587 mr->data = ExAllocatePoolWithTag(PagedPool, Vcb->superblock.node_size, ALLOC_TAG); 588 if (!mr->data) { 589 ERR("out of memory\n"); 590 return STATUS_INSUFFICIENT_RESOURCES; 591 } 592 593 Status = read_data(Vcb, mr->address, Vcb->superblock.node_size, NULL, TRUE, (UINT8*)mr->data, 594 c && mr->address >= c->offset && mr->address < c->offset + c->chunk_item->size ? c : NULL, &pc, NULL, 0, FALSE, NormalPagePriority); 595 if (!NT_SUCCESS(Status)) { 596 ERR("read_data returned %08x\n", Status); 597 return Status; 598 } 599 600 if (pc->chunk_item->type & BLOCK_FLAG_SYSTEM) 601 mr->system = TRUE; 602 603 if (data_items && mr->data->level == 0) { 604 le2 = data_items->Flink; 605 while (le2 != data_items) { 606 data_reloc* dr = CONTAINING_RECORD(le2, data_reloc, list_entry); 607 leaf_node* ln = (leaf_node*)&mr->data[1]; 608 UINT16 i; 609 610 for (i = 0; i < mr->data->num_items; i++) { 611 if (ln[i].key.obj_type == TYPE_EXTENT_DATA && ln[i].size >= sizeof(EXTENT_DATA) - 1 + sizeof(EXTENT_DATA2)) { 612 EXTENT_DATA* ed = (EXTENT_DATA*)((UINT8*)mr->data + sizeof(tree_header) + ln[i].offset); 613 614 if (ed->type == EXTENT_TYPE_REGULAR || ed->type == EXTENT_TYPE_PREALLOC) { 615 EXTENT_DATA2* ed2 = (EXTENT_DATA2*)ed->data; 616 617 if (ed2->address == dr->address) 618 ed2->address = dr->new_address; 619 } 620 } 621 } 622 623 le2 = le2->Flink; 624 } 625 } 626 627 if (mr->data->level > max_level) 628 max_level = mr->data->level; 629 630 le2 = mr->refs.Flink; 631 while (le2 != &mr->refs) { 632 metadata_reloc_ref* ref = CONTAINING_RECORD(le2, metadata_reloc_ref, list_entry); 633 634 if (ref->type == TYPE_TREE_BLOCK_REF) { 635 KEY* firstitem; 636 root* r = NULL; 637 LIST_ENTRY* le3; 638 tree* t; 639 640 firstitem = (KEY*)&mr->data[1]; 641 642 le3 = Vcb->roots.Flink; 643 while (le3 != &Vcb->roots) { 644 root* r2 = CONTAINING_RECORD(le3, root, list_entry); 645 646 if (r2->id == ref->tbr.offset) { 647 r = r2; 648 break; 649 } 650 651 le3 = le3->Flink; 652 } 653 654 if (!r) { 655 ERR("could not find subvol with id %llx\n", ref->tbr.offset); 656 return STATUS_INTERNAL_ERROR; 657 } 658 659 Status = find_item_to_level(Vcb, r, &tp, firstitem, FALSE, mr->data->level + 1, NULL); 660 if (!NT_SUCCESS(Status) && Status != STATUS_NOT_FOUND) { 661 ERR("find_item_to_level returned %08x\n", Status); 662 return Status; 663 } 664 665 t = tp.tree; 666 while (t && t->header.level < mr->data->level + 1) { 667 t = t->parent; 668 } 669 670 if (!t) 671 ref->top = TRUE; 672 else { 673 metadata_reloc* mr2; 674 675 Status = add_metadata_reloc_parent(Vcb, items, t->header.address, &mr2, rollback); 676 if (!NT_SUCCESS(Status)) { 677 ERR("add_metadata_reloc_parent returned %08x\n", Status); 678 return Status; 679 } 680 681 ref->parent = mr2; 682 } 683 } else if (ref->type == TYPE_SHARED_BLOCK_REF) { 684 metadata_reloc* mr2; 685 686 Status = add_metadata_reloc_parent(Vcb, items, ref->sbr.offset, &mr2, rollback); 687 if (!NT_SUCCESS(Status)) { 688 ERR("add_metadata_reloc_parent returned %08x\n", Status); 689 return Status; 690 } 691 692 ref->parent = mr2; 693 } 694 695 le2 = le2->Flink; 696 } 697 698 le = le->Flink; 699 } 700 701 le = items->Flink; 702 while (le != items) { 703 metadata_reloc* mr = CONTAINING_RECORD(le, metadata_reloc, list_entry); 704 LIST_ENTRY* le2; 705 UINT32 hash; 706 707 mr->t = NULL; 708 709 hash = calc_crc32c(0xffffffff, (UINT8*)&mr->address, sizeof(UINT64)); 710 711 le2 = Vcb->trees_ptrs[hash >> 24]; 712 713 if (le2) { 714 while (le2 != &Vcb->trees_hash) { 715 tree* t = CONTAINING_RECORD(le2, tree, list_entry_hash); 716 717 if (t->header.address == mr->address) { 718 mr->t = t; 719 break; 720 } else if (t->hash > hash) 721 break; 722 723 le2 = le2->Flink; 724 } 725 } 726 727 le = le->Flink; 728 } 729 730 for (level = 0; level <= max_level; level++) { 731 le = items->Flink; 732 while (le != items) { 733 metadata_reloc* mr = CONTAINING_RECORD(le, metadata_reloc, list_entry); 734 735 if (mr->data->level == level) { 736 BOOL done = FALSE; 737 LIST_ENTRY* le2; 738 tree_write* tw; 739 UINT64 flags; 740 tree* t3; 741 742 if (mr->system) 743 flags = Vcb->system_flags; 744 else if (Vcb->superblock.incompat_flags & BTRFS_INCOMPAT_FLAGS_MIXED_GROUPS) 745 flags = Vcb->data_flags; 746 else 747 flags = Vcb->metadata_flags; 748 749 if (newchunk) { 750 ExAcquireResourceExclusiveLite(&newchunk->lock, TRUE); 751 752 if (newchunk->chunk_item->type == flags && find_metadata_address_in_chunk(Vcb, newchunk, &mr->new_address)) { 753 newchunk->used += Vcb->superblock.node_size; 754 space_list_subtract(newchunk, FALSE, mr->new_address, Vcb->superblock.node_size, rollback); 755 done = TRUE; 756 } 757 758 ExReleaseResourceLite(&newchunk->lock); 759 } 760 761 if (!done) { 762 ExAcquireResourceExclusiveLite(&Vcb->chunk_lock, TRUE); 763 764 le2 = Vcb->chunks.Flink; 765 while (le2 != &Vcb->chunks) { 766 chunk* c2 = CONTAINING_RECORD(le2, chunk, list_entry); 767 768 if (!c2->readonly && !c2->reloc && c2 != newchunk && c2->chunk_item->type == flags) { 769 ExAcquireResourceExclusiveLite(&c2->lock, TRUE); 770 771 if ((c2->chunk_item->size - c2->used) >= Vcb->superblock.node_size) { 772 if (find_metadata_address_in_chunk(Vcb, c2, &mr->new_address)) { 773 c2->used += Vcb->superblock.node_size; 774 space_list_subtract(c2, FALSE, mr->new_address, Vcb->superblock.node_size, rollback); 775 ExReleaseResourceLite(&c2->lock); 776 newchunk = c2; 777 done = TRUE; 778 break; 779 } 780 } 781 782 ExReleaseResourceLite(&c2->lock); 783 } 784 785 le2 = le2->Flink; 786 } 787 788 // allocate new chunk if necessary 789 if (!done) { 790 Status = alloc_chunk(Vcb, flags, &newchunk, FALSE); 791 792 if (!NT_SUCCESS(Status)) { 793 ERR("alloc_chunk returned %08x\n", Status); 794 ExReleaseResourceLite(&Vcb->chunk_lock); 795 goto end; 796 } 797 798 ExAcquireResourceExclusiveLite(&newchunk->lock, TRUE); 799 800 newchunk->balance_num = Vcb->balance.balance_num; 801 802 if (!find_metadata_address_in_chunk(Vcb, newchunk, &mr->new_address)) { 803 ExReleaseResourceLite(&newchunk->lock); 804 ExReleaseResourceLite(&Vcb->chunk_lock); 805 ERR("could not find address in new chunk\n"); 806 Status = STATUS_DISK_FULL; 807 goto end; 808 } else { 809 newchunk->used += Vcb->superblock.node_size; 810 space_list_subtract(newchunk, FALSE, mr->new_address, Vcb->superblock.node_size, rollback); 811 } 812 813 ExReleaseResourceLite(&newchunk->lock); 814 } 815 816 ExReleaseResourceLite(&Vcb->chunk_lock); 817 } 818 819 // update parents 820 le2 = mr->refs.Flink; 821 while (le2 != &mr->refs) { 822 metadata_reloc_ref* ref = CONTAINING_RECORD(le2, metadata_reloc_ref, list_entry); 823 824 if (ref->parent) { 825 UINT16 i; 826 internal_node* in = (internal_node*)&ref->parent->data[1]; 827 828 for (i = 0; i < ref->parent->data->num_items; i++) { 829 if (in[i].address == mr->address) { 830 in[i].address = mr->new_address; 831 break; 832 } 833 } 834 835 if (ref->parent->t) { 836 LIST_ENTRY* le3; 837 838 le3 = ref->parent->t->itemlist.Flink; 839 while (le3 != &ref->parent->t->itemlist) { 840 tree_data* td = CONTAINING_RECORD(le3, tree_data, list_entry); 841 842 if (!td->inserted && td->treeholder.address == mr->address) 843 td->treeholder.address = mr->new_address; 844 845 le3 = le3->Flink; 846 } 847 } 848 } else if (ref->top && ref->type == TYPE_TREE_BLOCK_REF) { 849 LIST_ENTRY* le3; 850 root* r = NULL; 851 852 // alter ROOT_ITEM 853 854 le3 = Vcb->roots.Flink; 855 while (le3 != &Vcb->roots) { 856 root* r2 = CONTAINING_RECORD(le3, root, list_entry); 857 858 if (r2->id == ref->tbr.offset) { 859 r = r2; 860 break; 861 } 862 863 le3 = le3->Flink; 864 } 865 866 if (r) { 867 r->treeholder.address = mr->new_address; 868 869 if (r == Vcb->root_root) 870 Vcb->superblock.root_tree_addr = mr->new_address; 871 else if (r == Vcb->chunk_root) 872 Vcb->superblock.chunk_tree_addr = mr->new_address; 873 else if (r->root_item.block_number == mr->address) { 874 KEY searchkey; 875 ROOT_ITEM* ri; 876 877 r->root_item.block_number = mr->new_address; 878 879 searchkey.obj_id = r->id; 880 searchkey.obj_type = TYPE_ROOT_ITEM; 881 searchkey.offset = 0xffffffffffffffff; 882 883 Status = find_item(Vcb, Vcb->root_root, &tp, &searchkey, FALSE, NULL); 884 if (!NT_SUCCESS(Status)) { 885 ERR("find_item returned %08x\n", Status); 886 goto end; 887 } 888 889 if (tp.item->key.obj_id != searchkey.obj_id || tp.item->key.obj_type != searchkey.obj_type) { 890 ERR("could not find ROOT_ITEM for tree %llx\n", searchkey.obj_id); 891 Status = STATUS_INTERNAL_ERROR; 892 goto end; 893 } 894 895 ri = ExAllocatePoolWithTag(PagedPool, sizeof(ROOT_ITEM), ALLOC_TAG); 896 if (!ri) { 897 ERR("out of memory\n"); 898 Status = STATUS_INSUFFICIENT_RESOURCES; 899 goto end; 900 } 901 902 RtlCopyMemory(ri, &r->root_item, sizeof(ROOT_ITEM)); 903 904 Status = delete_tree_item(Vcb, &tp); 905 if (!NT_SUCCESS(Status)) { 906 ERR("delete_tree_item returned %08x\n", Status); 907 goto end; 908 } 909 910 Status = insert_tree_item(Vcb, Vcb->root_root, tp.item->key.obj_id, tp.item->key.obj_type, tp.item->key.offset, ri, sizeof(ROOT_ITEM), NULL, NULL); 911 if (!NT_SUCCESS(Status)) { 912 ERR("insert_tree_item returned %08x\n", Status); 913 goto end; 914 } 915 } 916 } 917 } 918 919 le2 = le2->Flink; 920 } 921 922 mr->data->address = mr->new_address; 923 924 t3 = mr->t; 925 926 while (t3) { 927 UINT8 h; 928 BOOL inserted; 929 tree* t4 = NULL; 930 931 // check if tree loaded more than once 932 if (t3->list_entry.Flink != &Vcb->trees_hash) { 933 tree* nt = CONTAINING_RECORD(t3->list_entry_hash.Flink, tree, list_entry_hash); 934 935 if (nt->header.address == t3->header.address) 936 t4 = nt; 937 } 938 939 t3->header.address = mr->new_address; 940 941 h = t3->hash >> 24; 942 943 if (Vcb->trees_ptrs[h] == &t3->list_entry_hash) { 944 if (t3->list_entry_hash.Flink == &Vcb->trees_hash) 945 Vcb->trees_ptrs[h] = NULL; 946 else { 947 tree* t2 = CONTAINING_RECORD(t3->list_entry_hash.Flink, tree, list_entry_hash); 948 949 if (t2->hash >> 24 == h) 950 Vcb->trees_ptrs[h] = &t2->list_entry_hash; 951 else 952 Vcb->trees_ptrs[h] = NULL; 953 } 954 } 955 956 RemoveEntryList(&t3->list_entry_hash); 957 958 t3->hash = calc_crc32c(0xffffffff, (UINT8*)&t3->header.address, sizeof(UINT64)); 959 h = t3->hash >> 24; 960 961 if (!Vcb->trees_ptrs[h]) { 962 UINT8 h2 = h; 963 964 le2 = Vcb->trees_hash.Flink; 965 966 if (h2 > 0) { 967 h2--; 968 do { 969 if (Vcb->trees_ptrs[h2]) { 970 le2 = Vcb->trees_ptrs[h2]; 971 break; 972 } 973 974 h2--; 975 } while (h2 > 0); 976 } 977 } else 978 le2 = Vcb->trees_ptrs[h]; 979 980 inserted = FALSE; 981 while (le2 != &Vcb->trees_hash) { 982 tree* t2 = CONTAINING_RECORD(le2, tree, list_entry_hash); 983 984 if (t2->hash >= t3->hash) { 985 InsertHeadList(le2->Blink, &t3->list_entry_hash); 986 inserted = TRUE; 987 break; 988 } 989 990 le2 = le2->Flink; 991 } 992 993 if (!inserted) 994 InsertTailList(&Vcb->trees_hash, &t3->list_entry_hash); 995 996 if (!Vcb->trees_ptrs[h] || t3->list_entry_hash.Flink == Vcb->trees_ptrs[h]) 997 Vcb->trees_ptrs[h] = &t3->list_entry_hash; 998 999 if (data_items && level == 0) { 1000 le2 = data_items->Flink; 1001 1002 while (le2 != data_items) { 1003 data_reloc* dr = CONTAINING_RECORD(le2, data_reloc, list_entry); 1004 LIST_ENTRY* le3 = t3->itemlist.Flink; 1005 1006 while (le3 != &t3->itemlist) { 1007 tree_data* td = CONTAINING_RECORD(le3, tree_data, list_entry); 1008 1009 if (!td->inserted && td->key.obj_type == TYPE_EXTENT_DATA && td->size >= sizeof(EXTENT_DATA) - 1 + sizeof(EXTENT_DATA2)) { 1010 EXTENT_DATA* ed = (EXTENT_DATA*)td->data; 1011 1012 if (ed->type == EXTENT_TYPE_REGULAR || ed->type == EXTENT_TYPE_PREALLOC) { 1013 EXTENT_DATA2* ed2 = (EXTENT_DATA2*)ed->data; 1014 1015 if (ed2->address == dr->address) 1016 ed2->address = dr->new_address; 1017 } 1018 } 1019 1020 le3 = le3->Flink; 1021 } 1022 1023 le2 = le2->Flink; 1024 } 1025 } 1026 1027 t3 = t4; 1028 } 1029 1030 *((UINT32*)mr->data) = ~calc_crc32c(0xffffffff, (UINT8*)&mr->data->fs_uuid, Vcb->superblock.node_size - sizeof(mr->data->csum)); 1031 1032 tw = ExAllocatePoolWithTag(PagedPool, sizeof(tree_write), ALLOC_TAG); 1033 if (!tw) { 1034 ERR("out of memory\n"); 1035 Status = STATUS_INSUFFICIENT_RESOURCES; 1036 goto end; 1037 } 1038 1039 tw->address = mr->new_address; 1040 tw->length = Vcb->superblock.node_size; 1041 tw->data = (UINT8*)mr->data; 1042 1043 if (IsListEmpty(&tree_writes)) 1044 InsertTailList(&tree_writes, &tw->list_entry); 1045 else { 1046 BOOL inserted = FALSE; 1047 1048 le2 = tree_writes.Flink; 1049 while (le2 != &tree_writes) { 1050 tree_write* tw2 = CONTAINING_RECORD(le2, tree_write, list_entry); 1051 1052 if (tw2->address > tw->address) { 1053 InsertHeadList(le2->Blink, &tw->list_entry); 1054 inserted = TRUE; 1055 break; 1056 } 1057 1058 le2 = le2->Flink; 1059 } 1060 1061 if (!inserted) 1062 InsertTailList(&tree_writes, &tw->list_entry); 1063 } 1064 } 1065 1066 le = le->Flink; 1067 } 1068 } 1069 1070 Status = do_tree_writes(Vcb, &tree_writes, TRUE); 1071 if (!NT_SUCCESS(Status)) { 1072 ERR("do_tree_writes returned %08x\n", Status); 1073 goto end; 1074 } 1075 1076 le = items->Flink; 1077 while (le != items) { 1078 metadata_reloc* mr = CONTAINING_RECORD(le, metadata_reloc, list_entry); 1079 1080 Status = add_metadata_reloc_extent_item(Vcb, mr); 1081 if (!NT_SUCCESS(Status)) { 1082 ERR("add_metadata_reloc_extent_item returned %08x\n", Status); 1083 goto end; 1084 } 1085 1086 le = le->Flink; 1087 } 1088 1089 Status = STATUS_SUCCESS; 1090 1091 end: 1092 while (!IsListEmpty(&tree_writes)) { 1093 tree_write* tw = CONTAINING_RECORD(RemoveHeadList(&tree_writes), tree_write, list_entry); 1094 ExFreePool(tw); 1095 } 1096 1097 return Status; 1098 } 1099 1100 static NTSTATUS balance_metadata_chunk(device_extension* Vcb, chunk* c, BOOL* changed) { 1101 KEY searchkey; 1102 traverse_ptr tp; 1103 NTSTATUS Status; 1104 BOOL b; 1105 LIST_ENTRY items, rollback; 1106 UINT32 loaded = 0; 1107 1108 TRACE("chunk %llx\n", c->offset); 1109 1110 InitializeListHead(&rollback); 1111 InitializeListHead(&items); 1112 1113 ExAcquireResourceExclusiveLite(&Vcb->tree_lock, TRUE); 1114 1115 searchkey.obj_id = c->offset; 1116 searchkey.obj_type = TYPE_METADATA_ITEM; 1117 searchkey.offset = 0xffffffffffffffff; 1118 1119 Status = find_item(Vcb, Vcb->extent_root, &tp, &searchkey, FALSE, NULL); 1120 if (!NT_SUCCESS(Status)) { 1121 ERR("find_item returned %08x\n", Status); 1122 goto end; 1123 } 1124 1125 do { 1126 traverse_ptr next_tp; 1127 1128 if (tp.item->key.obj_id >= c->offset + c->chunk_item->size) 1129 break; 1130 1131 if (tp.item->key.obj_id >= c->offset && (tp.item->key.obj_type == TYPE_EXTENT_ITEM || tp.item->key.obj_type == TYPE_METADATA_ITEM)) { 1132 BOOL tree = FALSE, skinny = FALSE; 1133 1134 if (tp.item->key.obj_type == TYPE_METADATA_ITEM && tp.item->size >= sizeof(EXTENT_ITEM)) { 1135 tree = TRUE; 1136 skinny = TRUE; 1137 } else if (tp.item->key.obj_type == TYPE_EXTENT_ITEM && tp.item->key.offset == Vcb->superblock.node_size && 1138 tp.item->size >= sizeof(EXTENT_ITEM)) { 1139 EXTENT_ITEM* ei = (EXTENT_ITEM*)tp.item->data; 1140 1141 if (ei->flags & EXTENT_ITEM_TREE_BLOCK) 1142 tree = TRUE; 1143 } 1144 1145 if (tree) { 1146 Status = add_metadata_reloc(Vcb, &items, &tp, skinny, NULL, c, &rollback); 1147 1148 if (!NT_SUCCESS(Status)) { 1149 ERR("add_metadata_reloc returned %08x\n", Status); 1150 goto end; 1151 } 1152 1153 loaded++; 1154 1155 if (loaded >= 64) // only do 64 at a time 1156 break; 1157 } 1158 } 1159 1160 b = find_next_item(Vcb, &tp, &next_tp, FALSE, NULL); 1161 1162 if (b) 1163 tp = next_tp; 1164 } while (b); 1165 1166 if (IsListEmpty(&items)) { 1167 *changed = FALSE; 1168 Status = STATUS_SUCCESS; 1169 goto end; 1170 } else 1171 *changed = TRUE; 1172 1173 Status = write_metadata_items(Vcb, &items, NULL, c, &rollback); 1174 if (!NT_SUCCESS(Status)) { 1175 ERR("write_metadata_items returned %08x\n", Status); 1176 goto end; 1177 } 1178 1179 Status = STATUS_SUCCESS; 1180 1181 Vcb->need_write = TRUE; 1182 1183 end: 1184 if (NT_SUCCESS(Status)) { 1185 Status = do_write(Vcb, NULL); 1186 if (!NT_SUCCESS(Status)) 1187 ERR("do_write returned %08x\n", Status); 1188 } 1189 1190 if (NT_SUCCESS(Status)) 1191 clear_rollback(&rollback); 1192 else 1193 do_rollback(Vcb, &rollback); 1194 1195 free_trees(Vcb); 1196 1197 ExReleaseResourceLite(&Vcb->tree_lock); 1198 1199 while (!IsListEmpty(&items)) { 1200 metadata_reloc* mr = CONTAINING_RECORD(RemoveHeadList(&items), metadata_reloc, list_entry); 1201 1202 while (!IsListEmpty(&mr->refs)) { 1203 metadata_reloc_ref* ref = CONTAINING_RECORD(RemoveHeadList(&mr->refs), metadata_reloc_ref, list_entry); 1204 1205 ExFreePool(ref); 1206 } 1207 1208 ExFreePool(mr); 1209 } 1210 1211 return Status; 1212 } 1213 1214 static NTSTATUS data_reloc_add_tree_edr(_Requires_lock_held_(_Curr_->tree_lock) device_extension* Vcb, LIST_ENTRY* metadata_items, 1215 data_reloc* dr, EXTENT_DATA_REF* edr, LIST_ENTRY* rollback) { 1216 NTSTATUS Status; 1217 LIST_ENTRY* le; 1218 KEY searchkey; 1219 traverse_ptr tp; 1220 root* r = NULL; 1221 metadata_reloc* mr; 1222 UINT64 last_tree = 0; 1223 data_reloc_ref* ref; 1224 1225 le = Vcb->roots.Flink; 1226 while (le != &Vcb->roots) { 1227 root* r2 = CONTAINING_RECORD(le, root, list_entry); 1228 1229 if (r2->id == edr->root) { 1230 r = r2; 1231 break; 1232 } 1233 1234 le = le->Flink; 1235 } 1236 1237 if (!r) { 1238 ERR("could not find subvol %llx\n", edr->count); 1239 return STATUS_INTERNAL_ERROR; 1240 } 1241 1242 searchkey.obj_id = edr->objid; 1243 searchkey.obj_type = TYPE_EXTENT_DATA; 1244 searchkey.offset = 0; 1245 1246 Status = find_item(Vcb, r, &tp, &searchkey, FALSE, NULL); 1247 if (!NT_SUCCESS(Status)) { 1248 ERR("find_item returned %08x\n", Status); 1249 return Status; 1250 } 1251 1252 if (tp.item->key.obj_id < searchkey.obj_id || (tp.item->key.obj_id == searchkey.obj_id && tp.item->key.obj_type < searchkey.obj_type)) { 1253 traverse_ptr tp2; 1254 1255 if (find_next_item(Vcb, &tp, &tp2, FALSE, NULL)) 1256 tp = tp2; 1257 else { 1258 ERR("could not find EXTENT_DATA for inode %llx in root %llx\n", searchkey.obj_id, r->id); 1259 return STATUS_INTERNAL_ERROR; 1260 } 1261 } 1262 1263 ref = NULL; 1264 1265 while (tp.item->key.obj_id == searchkey.obj_id && tp.item->key.obj_type == searchkey.obj_type) { 1266 traverse_ptr tp2; 1267 1268 if (tp.item->size >= sizeof(EXTENT_DATA)) { 1269 EXTENT_DATA* ed = (EXTENT_DATA*)tp.item->data; 1270 1271 if ((ed->type == EXTENT_TYPE_PREALLOC || ed->type == EXTENT_TYPE_REGULAR) && tp.item->size >= offsetof(EXTENT_DATA, data[0]) + sizeof(EXTENT_DATA2)) { 1272 EXTENT_DATA2* ed2 = (EXTENT_DATA2*)ed->data; 1273 1274 if (ed2->address == dr->address && ed2->size == dr->size && tp.item->key.offset - ed2->offset == edr->offset) { 1275 if (ref && last_tree == tp.tree->header.address) 1276 ref->edr.count++; 1277 else { 1278 ref = ExAllocatePoolWithTag(PagedPool, sizeof(data_reloc_ref), ALLOC_TAG); 1279 if (!ref) { 1280 ERR("out of memory\n"); 1281 return STATUS_INSUFFICIENT_RESOURCES; 1282 } 1283 1284 ref->type = TYPE_EXTENT_DATA_REF; 1285 RtlCopyMemory(&ref->edr, edr, sizeof(EXTENT_DATA_REF)); 1286 ref->edr.count = 1; 1287 1288 Status = add_metadata_reloc_parent(Vcb, metadata_items, tp.tree->header.address, &mr, rollback); 1289 if (!NT_SUCCESS(Status)) { 1290 ERR("add_metadata_reloc_parent returned %08x\n", Status); 1291 ExFreePool(ref); 1292 return Status; 1293 } 1294 1295 last_tree = tp.tree->header.address; 1296 ref->parent = mr; 1297 1298 InsertTailList(&dr->refs, &ref->list_entry); 1299 } 1300 } 1301 } 1302 } 1303 1304 if (find_next_item(Vcb, &tp, &tp2, FALSE, NULL)) 1305 tp = tp2; 1306 else 1307 break; 1308 } 1309 1310 return STATUS_SUCCESS; 1311 } 1312 1313 static NTSTATUS add_data_reloc(_Requires_exclusive_lock_held_(_Curr_->tree_lock) device_extension* Vcb, LIST_ENTRY* items, LIST_ENTRY* metadata_items, 1314 traverse_ptr* tp, chunk* c, LIST_ENTRY* rollback) { 1315 NTSTATUS Status; 1316 data_reloc* dr; 1317 EXTENT_ITEM* ei; 1318 UINT16 len; 1319 UINT64 inline_rc; 1320 UINT8* ptr; 1321 1322 dr = ExAllocatePoolWithTag(PagedPool, sizeof(data_reloc), ALLOC_TAG); 1323 if (!dr) { 1324 ERR("out of memory\n"); 1325 return STATUS_INSUFFICIENT_RESOURCES; 1326 } 1327 1328 dr->address = tp->item->key.obj_id; 1329 dr->size = tp->item->key.offset; 1330 dr->ei = (EXTENT_ITEM*)tp->item->data; 1331 InitializeListHead(&dr->refs); 1332 1333 Status = delete_tree_item(Vcb, tp); 1334 if (!NT_SUCCESS(Status)) { 1335 ERR("delete_tree_item returned %08x\n", Status); 1336 return Status; 1337 } 1338 1339 if (!c) 1340 c = get_chunk_from_address(Vcb, tp->item->key.obj_id); 1341 1342 if (c) { 1343 ExAcquireResourceExclusiveLite(&c->lock, TRUE); 1344 1345 c->used -= tp->item->key.offset; 1346 1347 space_list_add(c, tp->item->key.obj_id, tp->item->key.offset, rollback); 1348 1349 ExReleaseResourceLite(&c->lock); 1350 } 1351 1352 ei = (EXTENT_ITEM*)tp->item->data; 1353 inline_rc = 0; 1354 1355 len = tp->item->size - sizeof(EXTENT_ITEM); 1356 ptr = (UINT8*)tp->item->data + sizeof(EXTENT_ITEM); 1357 1358 while (len > 0) { 1359 UINT8 secttype = *ptr; 1360 UINT16 sectlen = secttype == TYPE_EXTENT_DATA_REF ? sizeof(EXTENT_DATA_REF) : (secttype == TYPE_SHARED_DATA_REF ? sizeof(SHARED_DATA_REF) : 0); 1361 1362 len--; 1363 1364 if (sectlen > len) { 1365 ERR("(%llx,%x,%llx): %x bytes left, expecting at least %x\n", tp->item->key.obj_id, tp->item->key.obj_type, tp->item->key.offset, len, sectlen); 1366 return STATUS_INTERNAL_ERROR; 1367 } 1368 1369 if (sectlen == 0) { 1370 ERR("(%llx,%x,%llx): unrecognized extent type %x\n", tp->item->key.obj_id, tp->item->key.obj_type, tp->item->key.offset, secttype); 1371 return STATUS_INTERNAL_ERROR; 1372 } 1373 1374 if (secttype == TYPE_EXTENT_DATA_REF) { 1375 EXTENT_DATA_REF* edr = (EXTENT_DATA_REF*)(ptr + sizeof(UINT8)); 1376 1377 inline_rc += edr->count; 1378 1379 Status = data_reloc_add_tree_edr(Vcb, metadata_items, dr, edr, rollback); 1380 if (!NT_SUCCESS(Status)) { 1381 ERR("data_reloc_add_tree_edr returned %08x\n", Status); 1382 return Status; 1383 } 1384 } else if (secttype == TYPE_SHARED_DATA_REF) { 1385 metadata_reloc* mr; 1386 data_reloc_ref* ref; 1387 1388 ref = ExAllocatePoolWithTag(PagedPool, sizeof(data_reloc_ref), ALLOC_TAG); 1389 if (!ref) { 1390 ERR("out of memory\n"); 1391 return STATUS_INSUFFICIENT_RESOURCES; 1392 } 1393 1394 ref->type = TYPE_SHARED_DATA_REF; 1395 RtlCopyMemory(&ref->sdr, ptr + sizeof(UINT8), sizeof(SHARED_DATA_REF)); 1396 inline_rc += ref->sdr.count; 1397 1398 Status = add_metadata_reloc_parent(Vcb, metadata_items, ref->sdr.offset, &mr, rollback); 1399 if (!NT_SUCCESS(Status)) { 1400 ERR("add_metadata_reloc_parent returned %08x\n", Status); 1401 ExFreePool(ref); 1402 return Status; 1403 } 1404 1405 ref->parent = mr; 1406 1407 InsertTailList(&dr->refs, &ref->list_entry); 1408 } else { 1409 ERR("unexpected tree type %x\n", secttype); 1410 return STATUS_INTERNAL_ERROR; 1411 } 1412 1413 1414 len -= sectlen; 1415 ptr += sizeof(UINT8) + sectlen; 1416 } 1417 1418 if (inline_rc < ei->refcount) { // look for non-inline entries 1419 traverse_ptr tp2 = *tp, next_tp; 1420 1421 while (find_next_item(Vcb, &tp2, &next_tp, FALSE, NULL)) { 1422 tp2 = next_tp; 1423 1424 if (tp2.item->key.obj_id == tp->item->key.obj_id) { 1425 if (tp2.item->key.obj_type == TYPE_EXTENT_DATA_REF && tp2.item->size >= sizeof(EXTENT_DATA_REF)) { 1426 Status = data_reloc_add_tree_edr(Vcb, metadata_items, dr, (EXTENT_DATA_REF*)tp2.item->data, rollback); 1427 if (!NT_SUCCESS(Status)) { 1428 ERR("data_reloc_add_tree_edr returned %08x\n", Status); 1429 return Status; 1430 } 1431 1432 Status = delete_tree_item(Vcb, &tp2); 1433 if (!NT_SUCCESS(Status)) { 1434 ERR("delete_tree_item returned %08x\n", Status); 1435 return Status; 1436 } 1437 } else if (tp2.item->key.obj_type == TYPE_SHARED_DATA_REF && tp2.item->size >= sizeof(UINT32)) { 1438 metadata_reloc* mr; 1439 data_reloc_ref* ref; 1440 1441 ref = ExAllocatePoolWithTag(PagedPool, sizeof(data_reloc_ref), ALLOC_TAG); 1442 if (!ref) { 1443 ERR("out of memory\n"); 1444 return STATUS_INSUFFICIENT_RESOURCES; 1445 } 1446 1447 ref->type = TYPE_SHARED_DATA_REF; 1448 ref->sdr.offset = tp2.item->key.offset; 1449 ref->sdr.count = *((UINT32*)tp2.item->data); 1450 1451 Status = add_metadata_reloc_parent(Vcb, metadata_items, ref->sdr.offset, &mr, rollback); 1452 if (!NT_SUCCESS(Status)) { 1453 ERR("add_metadata_reloc_parent returned %08x\n", Status); 1454 ExFreePool(ref); 1455 return Status; 1456 } 1457 1458 ref->parent = mr; 1459 InsertTailList(&dr->refs, &ref->list_entry); 1460 1461 Status = delete_tree_item(Vcb, &tp2); 1462 if (!NT_SUCCESS(Status)) { 1463 ERR("delete_tree_item returned %08x\n", Status); 1464 return Status; 1465 } 1466 } 1467 } else 1468 break; 1469 } 1470 } 1471 1472 InsertTailList(items, &dr->list_entry); 1473 1474 return STATUS_SUCCESS; 1475 } 1476 1477 static void sort_data_reloc_refs(data_reloc* dr) { 1478 LIST_ENTRY newlist, *le; 1479 1480 if (IsListEmpty(&dr->refs)) 1481 return; 1482 1483 // insertion sort 1484 1485 InitializeListHead(&newlist); 1486 1487 while (!IsListEmpty(&dr->refs)) { 1488 data_reloc_ref* ref = CONTAINING_RECORD(RemoveHeadList(&dr->refs), data_reloc_ref, list_entry); 1489 BOOL inserted = FALSE; 1490 1491 if (ref->type == TYPE_EXTENT_DATA_REF) 1492 ref->hash = get_extent_data_ref_hash2(ref->edr.root, ref->edr.objid, ref->edr.offset); 1493 else if (ref->type == TYPE_SHARED_DATA_REF) 1494 ref->hash = ref->parent->new_address; 1495 1496 le = newlist.Flink; 1497 while (le != &newlist) { 1498 data_reloc_ref* ref2 = CONTAINING_RECORD(le, data_reloc_ref, list_entry); 1499 1500 if (ref->type < ref2->type || (ref->type == ref2->type && ref->hash > ref2->hash)) { 1501 InsertHeadList(le->Blink, &ref->list_entry); 1502 inserted = TRUE; 1503 break; 1504 } 1505 1506 le = le->Flink; 1507 } 1508 1509 if (!inserted) 1510 InsertTailList(&newlist, &ref->list_entry); 1511 } 1512 1513 le = newlist.Flink; 1514 while (le != &newlist) { 1515 data_reloc_ref* ref = CONTAINING_RECORD(le, data_reloc_ref, list_entry); 1516 1517 if (le->Flink != &newlist) { 1518 data_reloc_ref* ref2 = CONTAINING_RECORD(le->Flink, data_reloc_ref, list_entry); 1519 1520 if (ref->type == TYPE_EXTENT_DATA_REF && ref2->type == TYPE_EXTENT_DATA_REF && ref->edr.root == ref2->edr.root && 1521 ref->edr.objid == ref2->edr.objid && ref->edr.offset == ref2->edr.offset) { 1522 RemoveEntryList(&ref2->list_entry); 1523 ref->edr.count += ref2->edr.count; 1524 ExFreePool(ref2); 1525 continue; 1526 } 1527 } 1528 1529 le = le->Flink; 1530 } 1531 1532 newlist.Flink->Blink = &dr->refs; 1533 newlist.Blink->Flink = &dr->refs; 1534 dr->refs.Flink = newlist.Flink; 1535 dr->refs.Blink = newlist.Blink; 1536 } 1537 1538 static NTSTATUS add_data_reloc_extent_item(_Requires_exclusive_lock_held_(_Curr_->tree_lock) device_extension* Vcb, data_reloc* dr) { 1539 NTSTATUS Status; 1540 LIST_ENTRY* le; 1541 UINT64 rc = 0; 1542 UINT16 inline_len; 1543 BOOL all_inline = TRUE; 1544 data_reloc_ref* first_noninline = NULL; 1545 EXTENT_ITEM* ei; 1546 UINT8* ptr; 1547 1548 inline_len = sizeof(EXTENT_ITEM); 1549 1550 sort_data_reloc_refs(dr); 1551 1552 le = dr->refs.Flink; 1553 while (le != &dr->refs) { 1554 data_reloc_ref* ref = CONTAINING_RECORD(le, data_reloc_ref, list_entry); 1555 UINT16 extlen = 0; 1556 1557 if (ref->type == TYPE_EXTENT_DATA_REF) { 1558 extlen += sizeof(EXTENT_DATA_REF); 1559 rc += ref->edr.count; 1560 } else if (ref->type == TYPE_SHARED_DATA_REF) { 1561 extlen += sizeof(SHARED_DATA_REF); 1562 rc++; 1563 } 1564 1565 if (all_inline) { 1566 if ((ULONG)(inline_len + 1 + extlen) > (Vcb->superblock.node_size >> 2)) { 1567 all_inline = FALSE; 1568 first_noninline = ref; 1569 } else 1570 inline_len += extlen + 1; 1571 } 1572 1573 le = le->Flink; 1574 } 1575 1576 ei = ExAllocatePoolWithTag(PagedPool, inline_len, ALLOC_TAG); 1577 if (!ei) { 1578 ERR("out of memory\n"); 1579 return STATUS_INSUFFICIENT_RESOURCES; 1580 } 1581 1582 ei->refcount = rc; 1583 ei->generation = dr->ei->generation; 1584 ei->flags = dr->ei->flags; 1585 ptr = (UINT8*)&ei[1]; 1586 1587 le = dr->refs.Flink; 1588 while (le != &dr->refs) { 1589 data_reloc_ref* ref = CONTAINING_RECORD(le, data_reloc_ref, list_entry); 1590 1591 if (ref == first_noninline) 1592 break; 1593 1594 *ptr = ref->type; 1595 ptr++; 1596 1597 if (ref->type == TYPE_EXTENT_DATA_REF) { 1598 EXTENT_DATA_REF* edr = (EXTENT_DATA_REF*)ptr; 1599 1600 RtlCopyMemory(edr, &ref->edr, sizeof(EXTENT_DATA_REF)); 1601 1602 ptr += sizeof(EXTENT_DATA_REF); 1603 } else if (ref->type == TYPE_SHARED_DATA_REF) { 1604 SHARED_DATA_REF* sdr = (SHARED_DATA_REF*)ptr; 1605 1606 sdr->offset = ref->parent->new_address; 1607 sdr->count = ref->sdr.count; 1608 1609 ptr += sizeof(SHARED_DATA_REF); 1610 } 1611 1612 le = le->Flink; 1613 } 1614 1615 Status = insert_tree_item(Vcb, Vcb->extent_root, dr->new_address, TYPE_EXTENT_ITEM, dr->size, ei, inline_len, NULL, NULL); 1616 if (!NT_SUCCESS(Status)) { 1617 ERR("insert_tree_item returned %08x\n", Status); 1618 return Status; 1619 } 1620 1621 if (!all_inline) { 1622 le = &first_noninline->list_entry; 1623 1624 while (le != &dr->refs) { 1625 data_reloc_ref* ref = CONTAINING_RECORD(le, data_reloc_ref, list_entry); 1626 1627 if (ref->type == TYPE_EXTENT_DATA_REF) { 1628 EXTENT_DATA_REF* edr; 1629 1630 edr = ExAllocatePoolWithTag(PagedPool, sizeof(EXTENT_DATA_REF), ALLOC_TAG); 1631 if (!edr) { 1632 ERR("out of memory\n"); 1633 return STATUS_INSUFFICIENT_RESOURCES; 1634 } 1635 1636 RtlCopyMemory(edr, &ref->edr, sizeof(EXTENT_DATA_REF)); 1637 1638 Status = insert_tree_item(Vcb, Vcb->extent_root, dr->new_address, TYPE_EXTENT_DATA_REF, ref->hash, edr, sizeof(EXTENT_DATA_REF), NULL, NULL); 1639 if (!NT_SUCCESS(Status)) { 1640 ERR("insert_tree_item returned %08x\n", Status); 1641 return Status; 1642 } 1643 } else if (ref->type == TYPE_SHARED_DATA_REF) { 1644 UINT32* sdr; 1645 1646 sdr = ExAllocatePoolWithTag(PagedPool, sizeof(UINT32), ALLOC_TAG); 1647 if (!sdr) { 1648 ERR("out of memory\n"); 1649 return STATUS_INSUFFICIENT_RESOURCES; 1650 } 1651 1652 *sdr = ref->sdr.count; 1653 1654 Status = insert_tree_item(Vcb, Vcb->extent_root, dr->new_address, TYPE_SHARED_DATA_REF, ref->parent->new_address, sdr, sizeof(UINT32), NULL, NULL); 1655 if (!NT_SUCCESS(Status)) { 1656 ERR("insert_tree_item returned %08x\n", Status); 1657 return Status; 1658 } 1659 } 1660 1661 le = le->Flink; 1662 } 1663 } 1664 1665 return STATUS_SUCCESS; 1666 } 1667 1668 static NTSTATUS balance_data_chunk(device_extension* Vcb, chunk* c, BOOL* changed) { 1669 KEY searchkey; 1670 traverse_ptr tp; 1671 NTSTATUS Status; 1672 BOOL b; 1673 LIST_ENTRY items, metadata_items, rollback, *le; 1674 UINT64 loaded = 0, num_loaded = 0; 1675 chunk* newchunk = NULL; 1676 UINT8* data = NULL; 1677 1678 TRACE("chunk %llx\n", c->offset); 1679 1680 InitializeListHead(&rollback); 1681 InitializeListHead(&items); 1682 InitializeListHead(&metadata_items); 1683 1684 ExAcquireResourceExclusiveLite(&Vcb->tree_lock, TRUE); 1685 1686 searchkey.obj_id = c->offset; 1687 searchkey.obj_type = TYPE_EXTENT_ITEM; 1688 searchkey.offset = 0xffffffffffffffff; 1689 1690 Status = find_item(Vcb, Vcb->extent_root, &tp, &searchkey, FALSE, NULL); 1691 if (!NT_SUCCESS(Status)) { 1692 ERR("find_item returned %08x\n", Status); 1693 goto end; 1694 } 1695 1696 do { 1697 traverse_ptr next_tp; 1698 1699 if (tp.item->key.obj_id >= c->offset + c->chunk_item->size) 1700 break; 1701 1702 if (tp.item->key.obj_id >= c->offset && tp.item->key.obj_type == TYPE_EXTENT_ITEM) { 1703 BOOL tree = FALSE; 1704 1705 if (tp.item->key.obj_type == TYPE_EXTENT_ITEM && tp.item->size >= sizeof(EXTENT_ITEM)) { 1706 EXTENT_ITEM* ei = (EXTENT_ITEM*)tp.item->data; 1707 1708 if (ei->flags & EXTENT_ITEM_TREE_BLOCK) 1709 tree = TRUE; 1710 } 1711 1712 if (!tree) { 1713 Status = add_data_reloc(Vcb, &items, &metadata_items, &tp, c, &rollback); 1714 1715 if (!NT_SUCCESS(Status)) { 1716 ERR("add_data_reloc returned %08x\n", Status); 1717 goto end; 1718 } 1719 1720 loaded += tp.item->key.offset; 1721 num_loaded++; 1722 1723 if (loaded >= 0x1000000 || num_loaded >= 100) // only do so much at a time, so we don't block too obnoxiously 1724 break; 1725 } 1726 } 1727 1728 b = find_next_item(Vcb, &tp, &next_tp, FALSE, NULL); 1729 1730 if (b) 1731 tp = next_tp; 1732 } while (b); 1733 1734 if (IsListEmpty(&items)) { 1735 *changed = FALSE; 1736 Status = STATUS_SUCCESS; 1737 goto end; 1738 } else 1739 *changed = TRUE; 1740 1741 data = ExAllocatePoolWithTag(PagedPool, BALANCE_UNIT, ALLOC_TAG); 1742 if (!data) { 1743 ERR("out of memory\n"); 1744 Status = STATUS_INSUFFICIENT_RESOURCES; 1745 goto end; 1746 } 1747 1748 le = items.Flink; 1749 while (le != &items) { 1750 data_reloc* dr = CONTAINING_RECORD(le, data_reloc, list_entry); 1751 BOOL done = FALSE; 1752 LIST_ENTRY* le2; 1753 UINT32* csum; 1754 RTL_BITMAP bmp; 1755 ULONG* bmparr; 1756 ULONG runlength, index, lastoff; 1757 1758 if (newchunk) { 1759 ExAcquireResourceExclusiveLite(&newchunk->lock, TRUE); 1760 1761 if (find_data_address_in_chunk(Vcb, newchunk, dr->size, &dr->new_address)) { 1762 newchunk->used += dr->size; 1763 space_list_subtract(newchunk, FALSE, dr->new_address, dr->size, &rollback); 1764 done = TRUE; 1765 } 1766 1767 ExReleaseResourceLite(&newchunk->lock); 1768 } 1769 1770 if (!done) { 1771 ExAcquireResourceExclusiveLite(&Vcb->chunk_lock, TRUE); 1772 1773 le2 = Vcb->chunks.Flink; 1774 while (le2 != &Vcb->chunks) { 1775 chunk* c2 = CONTAINING_RECORD(le2, chunk, list_entry); 1776 1777 if (!c2->readonly && !c2->reloc && c2 != newchunk && c2->chunk_item->type == Vcb->data_flags) { 1778 ExAcquireResourceExclusiveLite(&c2->lock, TRUE); 1779 1780 if ((c2->chunk_item->size - c2->used) >= dr->size) { 1781 if (find_data_address_in_chunk(Vcb, c2, dr->size, &dr->new_address)) { 1782 c2->used += dr->size; 1783 space_list_subtract(c2, FALSE, dr->new_address, dr->size, &rollback); 1784 ExReleaseResourceLite(&c2->lock); 1785 newchunk = c2; 1786 done = TRUE; 1787 break; 1788 } 1789 } 1790 1791 ExReleaseResourceLite(&c2->lock); 1792 } 1793 1794 le2 = le2->Flink; 1795 } 1796 1797 // allocate new chunk if necessary 1798 if (!done) { 1799 Status = alloc_chunk(Vcb, Vcb->data_flags, &newchunk, FALSE); 1800 1801 if (!NT_SUCCESS(Status)) { 1802 ERR("alloc_chunk returned %08x\n", Status); 1803 ExReleaseResourceLite(&Vcb->chunk_lock); 1804 goto end; 1805 } 1806 1807 ExAcquireResourceExclusiveLite(&newchunk->lock, TRUE); 1808 1809 newchunk->balance_num = Vcb->balance.balance_num; 1810 1811 if (!find_data_address_in_chunk(Vcb, newchunk, dr->size, &dr->new_address)) { 1812 ExReleaseResourceLite(&newchunk->lock); 1813 ExReleaseResourceLite(&Vcb->chunk_lock); 1814 ERR("could not find address in new chunk\n"); 1815 Status = STATUS_DISK_FULL; 1816 goto end; 1817 } else { 1818 newchunk->used += dr->size; 1819 space_list_subtract(newchunk, FALSE, dr->new_address, dr->size, &rollback); 1820 } 1821 1822 ExReleaseResourceLite(&newchunk->lock); 1823 } 1824 1825 ExReleaseResourceLite(&Vcb->chunk_lock); 1826 } 1827 1828 dr->newchunk = newchunk; 1829 1830 bmparr = ExAllocatePoolWithTag(PagedPool, (ULONG)sector_align((dr->size / Vcb->superblock.sector_size) + 1, sizeof(ULONG)), ALLOC_TAG); 1831 if (!bmparr) { 1832 ERR("out of memory\n"); 1833 Status = STATUS_INSUFFICIENT_RESOURCES; 1834 goto end; 1835 } 1836 1837 csum = ExAllocatePoolWithTag(PagedPool, (ULONG)(dr->size * sizeof(UINT32) / Vcb->superblock.sector_size), ALLOC_TAG); 1838 if (!csum) { 1839 ERR("out of memory\n"); 1840 ExFreePool(bmparr); 1841 Status = STATUS_INSUFFICIENT_RESOURCES; 1842 goto end; 1843 } 1844 1845 RtlInitializeBitMap(&bmp, bmparr, (ULONG)(dr->size / Vcb->superblock.sector_size)); 1846 RtlSetAllBits(&bmp); // 1 = no csum, 0 = csum 1847 1848 searchkey.obj_id = EXTENT_CSUM_ID; 1849 searchkey.obj_type = TYPE_EXTENT_CSUM; 1850 searchkey.offset = dr->address; 1851 1852 Status = find_item(Vcb, Vcb->checksum_root, &tp, &searchkey, FALSE, NULL); 1853 if (!NT_SUCCESS(Status) && Status != STATUS_NOT_FOUND) { 1854 ERR("find_item returned %08x\n", Status); 1855 ExFreePool(csum); 1856 ExFreePool(bmparr); 1857 goto end; 1858 } 1859 1860 if (Status != STATUS_NOT_FOUND) { 1861 do { 1862 traverse_ptr next_tp; 1863 1864 if (tp.item->key.obj_type == TYPE_EXTENT_CSUM) { 1865 if (tp.item->key.offset >= dr->address + dr->size) 1866 break; 1867 else if (tp.item->size >= sizeof(UINT32) && tp.item->key.offset + (tp.item->size * Vcb->superblock.sector_size / sizeof(UINT32)) >= dr->address) { 1868 UINT64 cs = max(dr->address, tp.item->key.offset); 1869 UINT64 ce = min(dr->address + dr->size, tp.item->key.offset + (tp.item->size * Vcb->superblock.sector_size / sizeof(UINT32))); 1870 1871 RtlCopyMemory(csum + ((cs - dr->address) / Vcb->superblock.sector_size), 1872 tp.item->data + ((cs - tp.item->key.offset) * sizeof(UINT32) / Vcb->superblock.sector_size), 1873 (ULONG)((ce - cs) * sizeof(UINT32) / Vcb->superblock.sector_size)); 1874 1875 RtlClearBits(&bmp, (ULONG)((cs - dr->address) / Vcb->superblock.sector_size), (ULONG)((ce - cs) / Vcb->superblock.sector_size)); 1876 1877 if (ce == dr->address + dr->size) 1878 break; 1879 } 1880 } 1881 1882 if (find_next_item(Vcb, &tp, &next_tp, FALSE, NULL)) 1883 tp = next_tp; 1884 else 1885 break; 1886 } while (TRUE); 1887 } 1888 1889 lastoff = 0; 1890 runlength = RtlFindFirstRunClear(&bmp, &index); 1891 1892 while (runlength != 0) { 1893 if (index > lastoff) { 1894 ULONG off = lastoff; 1895 ULONG size = index - lastoff; 1896 1897 // handle no csum run 1898 do { 1899 ULONG rl; 1900 1901 if (size * Vcb->superblock.sector_size > BALANCE_UNIT) 1902 rl = BALANCE_UNIT / Vcb->superblock.sector_size; 1903 else 1904 rl = size; 1905 1906 Status = read_data(Vcb, dr->address + (off * Vcb->superblock.sector_size), rl * Vcb->superblock.sector_size, NULL, FALSE, data, 1907 c, NULL, NULL, 0, FALSE, NormalPagePriority); 1908 if (!NT_SUCCESS(Status)) { 1909 ERR("read_data returned %08x\n", Status); 1910 ExFreePool(csum); 1911 ExFreePool(bmparr); 1912 goto end; 1913 } 1914 1915 Status = write_data_complete(Vcb, dr->new_address + (off * Vcb->superblock.sector_size), data, rl * Vcb->superblock.sector_size, 1916 NULL, newchunk, FALSE, 0, NormalPagePriority); 1917 if (!NT_SUCCESS(Status)) { 1918 ERR("write_data_complete returned %08x\n", Status); 1919 ExFreePool(csum); 1920 ExFreePool(bmparr); 1921 goto end; 1922 } 1923 1924 size -= rl; 1925 off += rl; 1926 } while (size > 0); 1927 } 1928 1929 add_checksum_entry(Vcb, dr->new_address + (index * Vcb->superblock.sector_size), runlength, &csum[index], NULL); 1930 add_checksum_entry(Vcb, dr->address + (index * Vcb->superblock.sector_size), runlength, NULL, NULL); 1931 1932 // handle csum run 1933 do { 1934 ULONG rl; 1935 1936 if (runlength * Vcb->superblock.sector_size > BALANCE_UNIT) 1937 rl = BALANCE_UNIT / Vcb->superblock.sector_size; 1938 else 1939 rl = runlength; 1940 1941 Status = read_data(Vcb, dr->address + (index * Vcb->superblock.sector_size), rl * Vcb->superblock.sector_size, &csum[index], FALSE, data, 1942 c, NULL, NULL, 0, FALSE, NormalPagePriority); 1943 if (!NT_SUCCESS(Status)) { 1944 ERR("read_data returned %08x\n", Status); 1945 ExFreePool(csum); 1946 ExFreePool(bmparr); 1947 goto end; 1948 } 1949 1950 Status = write_data_complete(Vcb, dr->new_address + (index * Vcb->superblock.sector_size), data, rl * Vcb->superblock.sector_size, 1951 NULL, newchunk, FALSE, 0, NormalPagePriority); 1952 if (!NT_SUCCESS(Status)) { 1953 ERR("write_data_complete returned %08x\n", Status); 1954 ExFreePool(csum); 1955 ExFreePool(bmparr); 1956 goto end; 1957 } 1958 1959 runlength -= rl; 1960 index += rl; 1961 } while (runlength > 0); 1962 1963 lastoff = index; 1964 runlength = RtlFindNextForwardRunClear(&bmp, index, &index); 1965 } 1966 1967 ExFreePool(csum); 1968 ExFreePool(bmparr); 1969 1970 // handle final nocsum run 1971 if (lastoff < dr->size / Vcb->superblock.sector_size) { 1972 ULONG off = lastoff; 1973 ULONG size = (ULONG)((dr->size / Vcb->superblock.sector_size) - lastoff); 1974 1975 do { 1976 ULONG rl; 1977 1978 if (size * Vcb->superblock.sector_size > BALANCE_UNIT) 1979 rl = BALANCE_UNIT / Vcb->superblock.sector_size; 1980 else 1981 rl = size; 1982 1983 Status = read_data(Vcb, dr->address + (off * Vcb->superblock.sector_size), rl * Vcb->superblock.sector_size, NULL, FALSE, data, 1984 c, NULL, NULL, 0, FALSE, NormalPagePriority); 1985 if (!NT_SUCCESS(Status)) { 1986 ERR("read_data returned %08x\n", Status); 1987 ExFreePool(csum); 1988 ExFreePool(bmparr); 1989 goto end; 1990 } 1991 1992 Status = write_data_complete(Vcb, dr->new_address + (off * Vcb->superblock.sector_size), data, rl * Vcb->superblock.sector_size, 1993 NULL, newchunk, FALSE, 0, NormalPagePriority); 1994 if (!NT_SUCCESS(Status)) { 1995 ERR("write_data_complete returned %08x\n", Status); 1996 ExFreePool(csum); 1997 ExFreePool(bmparr); 1998 goto end; 1999 } 2000 2001 size -= rl; 2002 off += rl; 2003 } while (size > 0); 2004 } 2005 2006 le = le->Flink; 2007 } 2008 2009 ExFreePool(data); 2010 data = NULL; 2011 2012 Status = write_metadata_items(Vcb, &metadata_items, &items, NULL, &rollback); 2013 if (!NT_SUCCESS(Status)) { 2014 ERR("write_metadata_items returned %08x\n", Status); 2015 goto end; 2016 } 2017 2018 le = items.Flink; 2019 while (le != &items) { 2020 data_reloc* dr = CONTAINING_RECORD(le, data_reloc, list_entry); 2021 2022 Status = add_data_reloc_extent_item(Vcb, dr); 2023 if (!NT_SUCCESS(Status)) { 2024 ERR("add_data_reloc_extent_item returned %08x\n", Status); 2025 goto end; 2026 } 2027 2028 le = le->Flink; 2029 } 2030 2031 le = c->changed_extents.Flink; 2032 while (le != &c->changed_extents) { 2033 LIST_ENTRY *le2, *le3; 2034 changed_extent* ce = CONTAINING_RECORD(le, changed_extent, list_entry); 2035 2036 le3 = le->Flink; 2037 2038 le2 = items.Flink; 2039 while (le2 != &items) { 2040 data_reloc* dr = CONTAINING_RECORD(le2, data_reloc, list_entry); 2041 2042 if (ce->address == dr->address) { 2043 ce->address = dr->new_address; 2044 RemoveEntryList(&ce->list_entry); 2045 InsertTailList(&dr->newchunk->changed_extents, &ce->list_entry); 2046 break; 2047 } 2048 2049 le2 = le2->Flink; 2050 } 2051 2052 le = le3; 2053 } 2054 2055 Status = STATUS_SUCCESS; 2056 2057 Vcb->need_write = TRUE; 2058 2059 end: 2060 if (NT_SUCCESS(Status)) { 2061 // update extents in cache inodes before we flush 2062 le = Vcb->chunks.Flink; 2063 while (le != &Vcb->chunks) { 2064 chunk* c2 = CONTAINING_RECORD(le, chunk, list_entry); 2065 2066 if (c2->cache) { 2067 LIST_ENTRY* le2; 2068 2069 ExAcquireResourceExclusiveLite(c2->cache->Header.Resource, TRUE); 2070 2071 le2 = c2->cache->extents.Flink; 2072 while (le2 != &c2->cache->extents) { 2073 extent* ext = CONTAINING_RECORD(le2, extent, list_entry); 2074 2075 if (!ext->ignore) { 2076 if (ext->extent_data.type == EXTENT_TYPE_REGULAR || ext->extent_data.type == EXTENT_TYPE_PREALLOC) { 2077 EXTENT_DATA2* ed2 = (EXTENT_DATA2*)ext->extent_data.data; 2078 2079 if (ed2->size > 0 && ed2->address >= c->offset && ed2->address < c->offset + c->chunk_item->size) { 2080 LIST_ENTRY* le3 = items.Flink; 2081 while (le3 != &items) { 2082 data_reloc* dr = CONTAINING_RECORD(le3, data_reloc, list_entry); 2083 2084 if (ed2->address == dr->address) { 2085 ed2->address = dr->new_address; 2086 break; 2087 } 2088 2089 le3 = le3->Flink; 2090 } 2091 } 2092 } 2093 } 2094 2095 le2 = le2->Flink; 2096 } 2097 2098 ExReleaseResourceLite(c2->cache->Header.Resource); 2099 } 2100 2101 le = le->Flink; 2102 } 2103 2104 Status = do_write(Vcb, NULL); 2105 if (!NT_SUCCESS(Status)) 2106 ERR("do_write returned %08x\n", Status); 2107 } 2108 2109 if (NT_SUCCESS(Status)) { 2110 clear_rollback(&rollback); 2111 2112 // update open FCBs 2113 // FIXME - speed this up(?) 2114 2115 acquire_fcb_lock_shared(Vcb); 2116 2117 le = Vcb->all_fcbs.Flink; 2118 while (le != &Vcb->all_fcbs) { 2119 struct _fcb* fcb = CONTAINING_RECORD(le, struct _fcb, list_entry_all); 2120 LIST_ENTRY* le2; 2121 2122 ExAcquireResourceExclusiveLite(fcb->Header.Resource, TRUE); 2123 2124 le2 = fcb->extents.Flink; 2125 while (le2 != &fcb->extents) { 2126 extent* ext = CONTAINING_RECORD(le2, extent, list_entry); 2127 2128 if (!ext->ignore) { 2129 if (ext->extent_data.type == EXTENT_TYPE_REGULAR || ext->extent_data.type == EXTENT_TYPE_PREALLOC) { 2130 EXTENT_DATA2* ed2 = (EXTENT_DATA2*)ext->extent_data.data; 2131 2132 if (ed2->size > 0 && ed2->address >= c->offset && ed2->address < c->offset + c->chunk_item->size) { 2133 LIST_ENTRY* le3 = items.Flink; 2134 while (le3 != &items) { 2135 data_reloc* dr = CONTAINING_RECORD(le3, data_reloc, list_entry); 2136 2137 if (ed2->address == dr->address) { 2138 ed2->address = dr->new_address; 2139 break; 2140 } 2141 2142 le3 = le3->Flink; 2143 } 2144 } 2145 } 2146 } 2147 2148 le2 = le2->Flink; 2149 } 2150 2151 ExReleaseResourceLite(fcb->Header.Resource); 2152 2153 le = le->Flink; 2154 } 2155 2156 release_fcb_lock(Vcb); 2157 } else 2158 do_rollback(Vcb, &rollback); 2159 2160 free_trees(Vcb); 2161 2162 ExReleaseResourceLite(&Vcb->tree_lock); 2163 2164 if (data) 2165 ExFreePool(data); 2166 2167 while (!IsListEmpty(&items)) { 2168 data_reloc* dr = CONTAINING_RECORD(RemoveHeadList(&items), data_reloc, list_entry); 2169 2170 while (!IsListEmpty(&dr->refs)) { 2171 data_reloc_ref* ref = CONTAINING_RECORD(RemoveHeadList(&dr->refs), data_reloc_ref, list_entry); 2172 2173 ExFreePool(ref); 2174 } 2175 2176 ExFreePool(dr); 2177 } 2178 2179 while (!IsListEmpty(&metadata_items)) { 2180 metadata_reloc* mr = CONTAINING_RECORD(RemoveHeadList(&metadata_items), metadata_reloc, list_entry); 2181 2182 while (!IsListEmpty(&mr->refs)) { 2183 metadata_reloc_ref* ref = CONTAINING_RECORD(RemoveHeadList(&mr->refs), metadata_reloc_ref, list_entry); 2184 2185 ExFreePool(ref); 2186 } 2187 2188 ExFreePool(mr); 2189 } 2190 2191 return Status; 2192 } 2193 2194 static __inline UINT64 get_chunk_dup_type(chunk* c) { 2195 if (c->chunk_item->type & BLOCK_FLAG_RAID0) 2196 return BLOCK_FLAG_RAID0; 2197 else if (c->chunk_item->type & BLOCK_FLAG_RAID1) 2198 return BLOCK_FLAG_RAID1; 2199 else if (c->chunk_item->type & BLOCK_FLAG_DUPLICATE) 2200 return BLOCK_FLAG_DUPLICATE; 2201 else if (c->chunk_item->type & BLOCK_FLAG_RAID10) 2202 return BLOCK_FLAG_RAID10; 2203 else if (c->chunk_item->type & BLOCK_FLAG_RAID5) 2204 return BLOCK_FLAG_RAID5; 2205 else if (c->chunk_item->type & BLOCK_FLAG_RAID6) 2206 return BLOCK_FLAG_RAID6; 2207 else 2208 return BLOCK_FLAG_SINGLE; 2209 } 2210 2211 static BOOL should_balance_chunk(device_extension* Vcb, UINT8 sort, chunk* c) { 2212 btrfs_balance_opts* opts; 2213 2214 opts = &Vcb->balance.opts[sort]; 2215 2216 if (!(opts->flags & BTRFS_BALANCE_OPTS_ENABLED)) 2217 return FALSE; 2218 2219 if (opts->flags & BTRFS_BALANCE_OPTS_PROFILES) { 2220 UINT64 type = get_chunk_dup_type(c); 2221 2222 if (!(type & opts->profiles)) 2223 return FALSE; 2224 } 2225 2226 if (opts->flags & BTRFS_BALANCE_OPTS_DEVID) { 2227 UINT16 i; 2228 CHUNK_ITEM_STRIPE* cis = (CHUNK_ITEM_STRIPE*)&c->chunk_item[1]; 2229 BOOL b = FALSE; 2230 2231 for (i = 0; i < c->chunk_item->num_stripes; i++) { 2232 if (cis[i].dev_id == opts->devid) { 2233 b = TRUE; 2234 break; 2235 } 2236 } 2237 2238 if (!b) 2239 return FALSE; 2240 } 2241 2242 if (opts->flags & BTRFS_BALANCE_OPTS_DRANGE) { 2243 UINT16 i, factor; 2244 UINT64 physsize; 2245 CHUNK_ITEM_STRIPE* cis = (CHUNK_ITEM_STRIPE*)&c->chunk_item[1]; 2246 BOOL b = FALSE; 2247 2248 if (c->chunk_item->type & BLOCK_FLAG_RAID0) 2249 factor = c->chunk_item->num_stripes; 2250 else if (c->chunk_item->type & BLOCK_FLAG_RAID10) 2251 factor = c->chunk_item->num_stripes / c->chunk_item->sub_stripes; 2252 else if (c->chunk_item->type & BLOCK_FLAG_RAID5) 2253 factor = c->chunk_item->num_stripes - 1; 2254 else if (c->chunk_item->type & BLOCK_FLAG_RAID6) 2255 factor = c->chunk_item->num_stripes - 2; 2256 else // SINGLE, DUPLICATE, RAID1 2257 factor = 1; 2258 2259 physsize = c->chunk_item->size / factor; 2260 2261 for (i = 0; i < c->chunk_item->num_stripes; i++) { 2262 if (cis[i].offset < opts->drange_end && cis[i].offset + physsize >= opts->drange_start && 2263 (!(opts->flags & BTRFS_BALANCE_OPTS_DEVID) || cis[i].dev_id == opts->devid)) { 2264 b = TRUE; 2265 break; 2266 } 2267 } 2268 2269 if (!b) 2270 return FALSE; 2271 } 2272 2273 if (opts->flags & BTRFS_BALANCE_OPTS_VRANGE) { 2274 if (c->offset + c->chunk_item->size <= opts->vrange_start || c->offset > opts->vrange_end) 2275 return FALSE; 2276 } 2277 2278 if (opts->flags & BTRFS_BALANCE_OPTS_STRIPES) { 2279 if (c->chunk_item->num_stripes < opts->stripes_start || c->chunk_item->num_stripes < opts->stripes_end) 2280 return FALSE; 2281 } 2282 2283 if (opts->flags & BTRFS_BALANCE_OPTS_USAGE) { 2284 UINT64 usage = c->used * 100 / c->chunk_item->size; 2285 2286 // usage == 0 should mean completely empty, not just that usage rounds to 0% 2287 if (c->used > 0 && usage == 0) 2288 usage = 1; 2289 2290 if (usage < opts->usage_start || usage > opts->usage_end) 2291 return FALSE; 2292 } 2293 2294 if (opts->flags & BTRFS_BALANCE_OPTS_CONVERT && opts->flags & BTRFS_BALANCE_OPTS_SOFT) { 2295 UINT64 type = get_chunk_dup_type(c); 2296 2297 if (type == opts->convert) 2298 return FALSE; 2299 } 2300 2301 return TRUE; 2302 } 2303 2304 static void copy_balance_args(btrfs_balance_opts* opts, BALANCE_ARGS* args) { 2305 if (opts->flags & BTRFS_BALANCE_OPTS_PROFILES) { 2306 args->profiles = opts->profiles; 2307 args->flags |= BALANCE_ARGS_FLAGS_PROFILES; 2308 } 2309 2310 if (opts->flags & BTRFS_BALANCE_OPTS_USAGE) { 2311 if (args->usage_start == 0) { 2312 args->flags |= BALANCE_ARGS_FLAGS_USAGE_RANGE; 2313 args->usage_start = opts->usage_start; 2314 args->usage_end = opts->usage_end; 2315 } else { 2316 args->flags |= BALANCE_ARGS_FLAGS_USAGE; 2317 args->usage = opts->usage_end; 2318 } 2319 } 2320 2321 if (opts->flags & BTRFS_BALANCE_OPTS_DEVID) { 2322 args->devid = opts->devid; 2323 args->flags |= BALANCE_ARGS_FLAGS_DEVID; 2324 } 2325 2326 if (opts->flags & BTRFS_BALANCE_OPTS_DRANGE) { 2327 args->drange_start = opts->drange_start; 2328 args->drange_end = opts->drange_end; 2329 args->flags |= BALANCE_ARGS_FLAGS_DRANGE; 2330 } 2331 2332 if (opts->flags & BTRFS_BALANCE_OPTS_VRANGE) { 2333 args->vrange_start = opts->vrange_start; 2334 args->vrange_end = opts->vrange_end; 2335 args->flags |= BALANCE_ARGS_FLAGS_VRANGE; 2336 } 2337 2338 if (opts->flags & BTRFS_BALANCE_OPTS_CONVERT) { 2339 args->convert = opts->convert; 2340 args->flags |= BALANCE_ARGS_FLAGS_CONVERT; 2341 2342 if (opts->flags & BTRFS_BALANCE_OPTS_SOFT) 2343 args->flags |= BALANCE_ARGS_FLAGS_SOFT; 2344 } 2345 2346 if (opts->flags & BTRFS_BALANCE_OPTS_LIMIT) { 2347 if (args->limit_start == 0) { 2348 args->flags |= BALANCE_ARGS_FLAGS_LIMIT_RANGE; 2349 args->limit_start = (UINT32)opts->limit_start; 2350 args->limit_end = (UINT32)opts->limit_end; 2351 } else { 2352 args->flags |= BALANCE_ARGS_FLAGS_LIMIT; 2353 args->limit = opts->limit_end; 2354 } 2355 } 2356 2357 if (opts->flags & BTRFS_BALANCE_OPTS_STRIPES) { 2358 args->stripes_start = opts->stripes_start; 2359 args->stripes_end = opts->stripes_end; 2360 args->flags |= BALANCE_ARGS_FLAGS_STRIPES_RANGE; 2361 } 2362 } 2363 2364 static NTSTATUS add_balance_item(device_extension* Vcb) { 2365 KEY searchkey; 2366 traverse_ptr tp; 2367 NTSTATUS Status; 2368 BALANCE_ITEM* bi; 2369 2370 searchkey.obj_id = BALANCE_ITEM_ID; 2371 searchkey.obj_type = TYPE_TEMP_ITEM; 2372 searchkey.offset = 0; 2373 2374 ExAcquireResourceExclusiveLite(&Vcb->tree_lock, TRUE); 2375 2376 Status = find_item(Vcb, Vcb->root_root, &tp, &searchkey, FALSE, NULL); 2377 if (!NT_SUCCESS(Status)) { 2378 ERR("find_item returned %08x\n", Status); 2379 goto end; 2380 } 2381 2382 if (!keycmp(tp.item->key, searchkey)) { 2383 Status = delete_tree_item(Vcb, &tp); 2384 if (!NT_SUCCESS(Status)) { 2385 ERR("delete_tree_item returned %08x\n", Status); 2386 goto end; 2387 } 2388 } 2389 2390 bi = ExAllocatePoolWithTag(PagedPool, sizeof(BALANCE_ITEM), ALLOC_TAG); 2391 if (!bi) { 2392 ERR("out of memory\n"); 2393 Status = STATUS_INSUFFICIENT_RESOURCES; 2394 goto end; 2395 } 2396 2397 RtlZeroMemory(bi, sizeof(BALANCE_ITEM)); 2398 2399 if (Vcb->balance.opts[BALANCE_OPTS_DATA].flags & BTRFS_BALANCE_OPTS_ENABLED) { 2400 bi->flags |= BALANCE_FLAGS_DATA; 2401 copy_balance_args(&Vcb->balance.opts[BALANCE_OPTS_DATA], &bi->data); 2402 } 2403 2404 if (Vcb->balance.opts[BALANCE_OPTS_METADATA].flags & BTRFS_BALANCE_OPTS_ENABLED) { 2405 bi->flags |= BALANCE_FLAGS_METADATA; 2406 copy_balance_args(&Vcb->balance.opts[BALANCE_OPTS_METADATA], &bi->metadata); 2407 } 2408 2409 if (Vcb->balance.opts[BALANCE_OPTS_SYSTEM].flags & BTRFS_BALANCE_OPTS_ENABLED) { 2410 bi->flags |= BALANCE_FLAGS_SYSTEM; 2411 copy_balance_args(&Vcb->balance.opts[BALANCE_OPTS_SYSTEM], &bi->system); 2412 } 2413 2414 Status = insert_tree_item(Vcb, Vcb->root_root, BALANCE_ITEM_ID, TYPE_TEMP_ITEM, 0, bi, sizeof(BALANCE_ITEM), NULL, NULL); 2415 if (!NT_SUCCESS(Status)) { 2416 ERR("insert_tree_item returned %08x\n", Status); 2417 ExFreePool(bi); 2418 goto end; 2419 } 2420 2421 Status = STATUS_SUCCESS; 2422 2423 end: 2424 if (NT_SUCCESS(Status)) { 2425 Status = do_write(Vcb, NULL); 2426 if (!NT_SUCCESS(Status)) 2427 ERR("do_write returned %08x\n", Status); 2428 } 2429 2430 free_trees(Vcb); 2431 2432 ExReleaseResourceLite(&Vcb->tree_lock); 2433 2434 return Status; 2435 } 2436 2437 static NTSTATUS remove_balance_item(device_extension* Vcb) { 2438 KEY searchkey; 2439 traverse_ptr tp; 2440 NTSTATUS Status; 2441 2442 searchkey.obj_id = BALANCE_ITEM_ID; 2443 searchkey.obj_type = TYPE_TEMP_ITEM; 2444 searchkey.offset = 0; 2445 2446 ExAcquireResourceExclusiveLite(&Vcb->tree_lock, TRUE); 2447 2448 Status = find_item(Vcb, Vcb->root_root, &tp, &searchkey, FALSE, NULL); 2449 if (!NT_SUCCESS(Status)) { 2450 ERR("find_item returned %08x\n", Status); 2451 goto end; 2452 } 2453 2454 if (!keycmp(tp.item->key, searchkey)) { 2455 Status = delete_tree_item(Vcb, &tp); 2456 if (!NT_SUCCESS(Status)) { 2457 ERR("delete_tree_item returned %08x\n", Status); 2458 goto end; 2459 } 2460 2461 Status = do_write(Vcb, NULL); 2462 if (!NT_SUCCESS(Status)) { 2463 ERR("do_write returned %08x\n", Status); 2464 goto end; 2465 } 2466 2467 free_trees(Vcb); 2468 } 2469 2470 Status = STATUS_SUCCESS; 2471 2472 end: 2473 ExReleaseResourceLite(&Vcb->tree_lock); 2474 2475 return Status; 2476 } 2477 2478 static void load_balance_args(btrfs_balance_opts* opts, BALANCE_ARGS* args) { 2479 opts->flags = BTRFS_BALANCE_OPTS_ENABLED; 2480 2481 if (args->flags & BALANCE_ARGS_FLAGS_PROFILES) { 2482 opts->flags |= BTRFS_BALANCE_OPTS_PROFILES; 2483 opts->profiles = args->profiles; 2484 } 2485 2486 if (args->flags & BALANCE_ARGS_FLAGS_USAGE) { 2487 opts->flags |= BTRFS_BALANCE_OPTS_USAGE; 2488 2489 opts->usage_start = 0; 2490 opts->usage_end = (UINT8)args->usage; 2491 } else if (args->flags & BALANCE_ARGS_FLAGS_USAGE_RANGE) { 2492 opts->flags |= BTRFS_BALANCE_OPTS_USAGE; 2493 2494 opts->usage_start = (UINT8)args->usage_start; 2495 opts->usage_end = (UINT8)args->usage_end; 2496 } 2497 2498 if (args->flags & BALANCE_ARGS_FLAGS_DEVID) { 2499 opts->flags |= BTRFS_BALANCE_OPTS_DEVID; 2500 opts->devid = args->devid; 2501 } 2502 2503 if (args->flags & BALANCE_ARGS_FLAGS_DRANGE) { 2504 opts->flags |= BTRFS_BALANCE_OPTS_DRANGE; 2505 opts->drange_start = args->drange_start; 2506 opts->drange_end = args->drange_end; 2507 } 2508 2509 if (args->flags & BALANCE_ARGS_FLAGS_VRANGE) { 2510 opts->flags |= BTRFS_BALANCE_OPTS_VRANGE; 2511 opts->vrange_start = args->vrange_start; 2512 opts->vrange_end = args->vrange_end; 2513 } 2514 2515 if (args->flags & BALANCE_ARGS_FLAGS_LIMIT) { 2516 opts->flags |= BTRFS_BALANCE_OPTS_LIMIT; 2517 2518 opts->limit_start = 0; 2519 opts->limit_end = args->limit; 2520 } else if (args->flags & BALANCE_ARGS_FLAGS_LIMIT_RANGE) { 2521 opts->flags |= BTRFS_BALANCE_OPTS_LIMIT; 2522 2523 opts->limit_start = args->limit_start; 2524 opts->limit_end = args->limit_end; 2525 } 2526 2527 if (args->flags & BALANCE_ARGS_FLAGS_STRIPES_RANGE) { 2528 opts->flags |= BTRFS_BALANCE_OPTS_STRIPES; 2529 2530 opts->stripes_start = (UINT16)args->stripes_start; 2531 opts->stripes_end = (UINT16)args->stripes_end; 2532 } 2533 2534 if (args->flags & BALANCE_ARGS_FLAGS_CONVERT) { 2535 opts->flags |= BTRFS_BALANCE_OPTS_CONVERT; 2536 opts->convert = args->convert; 2537 2538 if (args->flags & BALANCE_ARGS_FLAGS_SOFT) 2539 opts->flags |= BTRFS_BALANCE_OPTS_SOFT; 2540 } 2541 } 2542 2543 static NTSTATUS remove_superblocks(device* dev) { 2544 NTSTATUS Status; 2545 superblock* sb; 2546 int i = 0; 2547 2548 sb = ExAllocatePoolWithTag(PagedPool, sizeof(superblock), ALLOC_TAG); 2549 if (!sb) { 2550 ERR("out of memory\n"); 2551 return STATUS_INSUFFICIENT_RESOURCES; 2552 } 2553 2554 RtlZeroMemory(sb, sizeof(superblock)); 2555 2556 while (superblock_addrs[i] > 0 && dev->devitem.num_bytes >= superblock_addrs[i] + sizeof(superblock)) { 2557 Status = write_data_phys(dev->devobj, superblock_addrs[i], sb, sizeof(superblock)); 2558 2559 if (!NT_SUCCESS(Status)) { 2560 ExFreePool(sb); 2561 return Status; 2562 } 2563 2564 i++; 2565 } 2566 2567 ExFreePool(sb); 2568 2569 return STATUS_SUCCESS; 2570 } 2571 2572 static NTSTATUS finish_removing_device(_Requires_exclusive_lock_held_(_Curr_->tree_lock) device_extension* Vcb, device* dev) { 2573 KEY searchkey; 2574 traverse_ptr tp; 2575 NTSTATUS Status; 2576 LIST_ENTRY* le; 2577 volume_device_extension* vde; 2578 2579 if (Vcb->need_write) { 2580 Status = do_write(Vcb, NULL); 2581 2582 if (!NT_SUCCESS(Status)) 2583 ERR("do_write returned %08x\n", Status); 2584 } else 2585 Status = STATUS_SUCCESS; 2586 2587 free_trees(Vcb); 2588 2589 if (!NT_SUCCESS(Status)) 2590 return Status; 2591 2592 // remove entry in chunk tree 2593 2594 searchkey.obj_id = 1; 2595 searchkey.obj_type = TYPE_DEV_ITEM; 2596 searchkey.offset = dev->devitem.dev_id; 2597 2598 Status = find_item(Vcb, Vcb->chunk_root, &tp, &searchkey, FALSE, NULL); 2599 if (!NT_SUCCESS(Status)) { 2600 ERR("find_item returned %08x\n", Status); 2601 return Status; 2602 } 2603 2604 if (!keycmp(searchkey, tp.item->key)) { 2605 Status = delete_tree_item(Vcb, &tp); 2606 2607 if (!NT_SUCCESS(Status)) { 2608 ERR("delete_tree_item returned %08x\n", Status); 2609 return Status; 2610 } 2611 } 2612 2613 // remove stats entry in device tree 2614 2615 searchkey.obj_id = 0; 2616 searchkey.obj_type = TYPE_DEV_STATS; 2617 searchkey.offset = dev->devitem.dev_id; 2618 2619 Status = find_item(Vcb, Vcb->dev_root, &tp, &searchkey, FALSE, NULL); 2620 if (!NT_SUCCESS(Status)) { 2621 ERR("find_item returned %08x\n", Status); 2622 return Status; 2623 } 2624 2625 if (!keycmp(searchkey, tp.item->key)) { 2626 Status = delete_tree_item(Vcb, &tp); 2627 2628 if (!NT_SUCCESS(Status)) { 2629 ERR("delete_tree_item returned %08x\n", Status); 2630 return Status; 2631 } 2632 } 2633 2634 // update superblock 2635 2636 Vcb->superblock.num_devices--; 2637 Vcb->superblock.total_bytes -= dev->devitem.num_bytes; 2638 Vcb->devices_loaded--; 2639 2640 RemoveEntryList(&dev->list_entry); 2641 2642 // flush 2643 2644 Status = do_write(Vcb, NULL); 2645 if (!NT_SUCCESS(Status)) 2646 ERR("do_write returned %08x\n", Status); 2647 2648 free_trees(Vcb); 2649 2650 if (!NT_SUCCESS(Status)) 2651 return Status; 2652 2653 if (!dev->readonly && dev->devobj) { 2654 Status = remove_superblocks(dev); 2655 if (!NT_SUCCESS(Status)) 2656 WARN("remove_superblocks returned %08x\n", Status); 2657 } 2658 2659 // remove entry in volume list 2660 2661 vde = Vcb->vde; 2662 2663 if (dev->devobj) { 2664 pdo_device_extension* pdode = vde->pdode; 2665 2666 ExAcquireResourceExclusiveLite(&pdode->child_lock, TRUE); 2667 2668 le = pdode->children.Flink; 2669 while (le != &pdode->children) { 2670 volume_child* vc = CONTAINING_RECORD(le, volume_child, list_entry); 2671 2672 if (RtlCompareMemory(&dev->devitem.device_uuid, &vc->uuid, sizeof(BTRFS_UUID)) == sizeof(BTRFS_UUID)) { 2673 PFILE_OBJECT FileObject; 2674 PDEVICE_OBJECT mountmgr; 2675 UNICODE_STRING mmdevpath; 2676 2677 pdode->children_loaded--; 2678 2679 if (vc->had_drive_letter) { // re-add entry to mountmgr 2680 RtlInitUnicodeString(&mmdevpath, MOUNTMGR_DEVICE_NAME); 2681 Status = IoGetDeviceObjectPointer(&mmdevpath, FILE_READ_ATTRIBUTES, &FileObject, &mountmgr); 2682 if (!NT_SUCCESS(Status)) 2683 ERR("IoGetDeviceObjectPointer returned %08x\n", Status); 2684 else { 2685 MOUNTDEV_NAME mdn; 2686 2687 Status = dev_ioctl(dev->devobj, IOCTL_MOUNTDEV_QUERY_DEVICE_NAME, NULL, 0, &mdn, sizeof(MOUNTDEV_NAME), TRUE, NULL); 2688 if (!NT_SUCCESS(Status) && Status != STATUS_BUFFER_OVERFLOW) 2689 ERR("IOCTL_MOUNTDEV_QUERY_DEVICE_NAME returned %08x\n", Status); 2690 else { 2691 MOUNTDEV_NAME* mdn2; 2692 ULONG mdnsize = (ULONG)offsetof(MOUNTDEV_NAME, Name[0]) + mdn.NameLength; 2693 2694 mdn2 = ExAllocatePoolWithTag(PagedPool, mdnsize, ALLOC_TAG); 2695 if (!mdn2) 2696 ERR("out of memory\n"); 2697 else { 2698 Status = dev_ioctl(dev->devobj, IOCTL_MOUNTDEV_QUERY_DEVICE_NAME, NULL, 0, mdn2, mdnsize, TRUE, NULL); 2699 if (!NT_SUCCESS(Status)) 2700 ERR("IOCTL_MOUNTDEV_QUERY_DEVICE_NAME returned %08x\n", Status); 2701 else { 2702 UNICODE_STRING name; 2703 2704 name.Buffer = mdn2->Name; 2705 name.Length = name.MaximumLength = mdn2->NameLength; 2706 2707 Status = mountmgr_add_drive_letter(mountmgr, &name); 2708 if (!NT_SUCCESS(Status)) 2709 WARN("mountmgr_add_drive_letter returned %08x\n", Status); 2710 } 2711 2712 ExFreePool(mdn2); 2713 } 2714 } 2715 2716 ObDereferenceObject(FileObject); 2717 } 2718 } 2719 2720 ExFreePool(vc->pnp_name.Buffer); 2721 RemoveEntryList(&vc->list_entry); 2722 ExFreePool(vc); 2723 2724 ObDereferenceObject(vc->fileobj); 2725 2726 break; 2727 } 2728 2729 le = le->Flink; 2730 } 2731 2732 if (pdode->children_loaded > 0 && vde->device->Characteristics & FILE_REMOVABLE_MEDIA) { 2733 vde->device->Characteristics &= ~FILE_REMOVABLE_MEDIA; 2734 2735 le = pdode->children.Flink; 2736 while (le != &pdode->children) { 2737 volume_child* vc = CONTAINING_RECORD(le, volume_child, list_entry); 2738 2739 if (vc->devobj->Characteristics & FILE_REMOVABLE_MEDIA) { 2740 vde->device->Characteristics |= FILE_REMOVABLE_MEDIA; 2741 break; 2742 } 2743 2744 le = le->Flink; 2745 } 2746 } 2747 2748 pdode->num_children = Vcb->superblock.num_devices; 2749 2750 ExReleaseResourceLite(&pdode->child_lock); 2751 2752 // free dev 2753 2754 if (dev->trim && !dev->readonly && !Vcb->options.no_trim) 2755 trim_whole_device(dev); 2756 } 2757 2758 while (!IsListEmpty(&dev->space)) { 2759 LIST_ENTRY* le2 = RemoveHeadList(&dev->space); 2760 space* s = CONTAINING_RECORD(le2, space, list_entry); 2761 2762 ExFreePool(s); 2763 } 2764 2765 ExFreePool(dev); 2766 2767 if (Vcb->trim) { 2768 Vcb->trim = FALSE; 2769 2770 le = Vcb->devices.Flink; 2771 while (le != &Vcb->devices) { 2772 device* dev2 = CONTAINING_RECORD(le, device, list_entry); 2773 2774 if (dev2->trim) { 2775 Vcb->trim = TRUE; 2776 break; 2777 } 2778 2779 le = le->Flink; 2780 } 2781 } 2782 2783 FsRtlNotifyVolumeEvent(Vcb->root_file, FSRTL_VOLUME_CHANGE_SIZE); 2784 2785 return STATUS_SUCCESS; 2786 } 2787 2788 static void trim_unalloc_space(_Requires_lock_held_(_Curr_->tree_lock) device_extension* Vcb, device* dev) { 2789 DEVICE_MANAGE_DATA_SET_ATTRIBUTES* dmdsa; 2790 DEVICE_DATA_SET_RANGE* ranges; 2791 ULONG datalen, i; 2792 KEY searchkey; 2793 traverse_ptr tp; 2794 NTSTATUS Status; 2795 BOOL b; 2796 UINT64 lastoff = 0x100000; // don't TRIM the first megabyte, in case someone has been daft enough to install GRUB there 2797 LIST_ENTRY* le; 2798 2799 dev->num_trim_entries = 0; 2800 2801 searchkey.obj_id = dev->devitem.dev_id; 2802 searchkey.obj_type = TYPE_DEV_EXTENT; 2803 searchkey.offset = 0; 2804 2805 Status = find_item(Vcb, Vcb->dev_root, &tp, &searchkey, FALSE, NULL); 2806 if (!NT_SUCCESS(Status)) { 2807 ERR("find_item returned %08x\n", Status); 2808 return; 2809 } 2810 2811 do { 2812 traverse_ptr next_tp; 2813 2814 if (tp.item->key.obj_id == dev->devitem.dev_id && tp.item->key.obj_type == TYPE_DEV_EXTENT) { 2815 if (tp.item->size >= sizeof(DEV_EXTENT)) { 2816 DEV_EXTENT* de = (DEV_EXTENT*)tp.item->data; 2817 2818 if (tp.item->key.offset > lastoff) 2819 add_trim_entry_avoid_sb(Vcb, dev, lastoff, tp.item->key.offset - lastoff); 2820 2821 lastoff = tp.item->key.offset + de->length; 2822 } else { 2823 ERR("(%llx,%x,%llx) was %u bytes, expected %u\n", tp.item->key.obj_id, tp.item->key.obj_type, tp.item->key.offset, tp.item->size, sizeof(DEV_EXTENT)); 2824 return; 2825 } 2826 } 2827 2828 b = find_next_item(Vcb, &tp, &next_tp, FALSE, NULL); 2829 2830 if (b) { 2831 tp = next_tp; 2832 if (tp.item->key.obj_id > searchkey.obj_id || (tp.item->key.obj_id == searchkey.obj_id && tp.item->key.obj_type > searchkey.obj_type)) 2833 break; 2834 } 2835 } while (b); 2836 2837 if (lastoff < dev->devitem.num_bytes) 2838 add_trim_entry_avoid_sb(Vcb, dev, lastoff, dev->devitem.num_bytes - lastoff); 2839 2840 if (dev->num_trim_entries == 0) 2841 return; 2842 2843 datalen = (ULONG)sector_align(sizeof(DEVICE_MANAGE_DATA_SET_ATTRIBUTES), sizeof(UINT64)) + (dev->num_trim_entries * sizeof(DEVICE_DATA_SET_RANGE)); 2844 2845 dmdsa = ExAllocatePoolWithTag(PagedPool, datalen, ALLOC_TAG); 2846 if (!dmdsa) { 2847 ERR("out of memory\n"); 2848 goto end; 2849 } 2850 2851 dmdsa->Size = sizeof(DEVICE_MANAGE_DATA_SET_ATTRIBUTES); 2852 dmdsa->Action = DeviceDsmAction_Trim; 2853 dmdsa->Flags = DEVICE_DSM_FLAG_TRIM_NOT_FS_ALLOCATED; 2854 dmdsa->ParameterBlockOffset = 0; 2855 dmdsa->ParameterBlockLength = 0; 2856 dmdsa->DataSetRangesOffset = (ULONG)sector_align(sizeof(DEVICE_MANAGE_DATA_SET_ATTRIBUTES), sizeof(UINT64)); 2857 dmdsa->DataSetRangesLength = dev->num_trim_entries * sizeof(DEVICE_DATA_SET_RANGE); 2858 2859 ranges = (DEVICE_DATA_SET_RANGE*)((UINT8*)dmdsa + dmdsa->DataSetRangesOffset); 2860 2861 i = 0; 2862 le = dev->trim_list.Flink; 2863 while (le != &dev->trim_list) { 2864 space* s = CONTAINING_RECORD(le, space, list_entry); 2865 2866 ranges[i].StartingOffset = s->address; 2867 ranges[i].LengthInBytes = s->size; 2868 i++; 2869 2870 le = le->Flink; 2871 } 2872 2873 Status = dev_ioctl(dev->devobj, IOCTL_STORAGE_MANAGE_DATA_SET_ATTRIBUTES, dmdsa, datalen, NULL, 0, TRUE, NULL); 2874 if (!NT_SUCCESS(Status)) 2875 WARN("IOCTL_STORAGE_MANAGE_DATA_SET_ATTRIBUTES returned %08x\n", Status); 2876 2877 ExFreePool(dmdsa); 2878 2879 end: 2880 while (!IsListEmpty(&dev->trim_list)) { 2881 space* s = CONTAINING_RECORD(RemoveHeadList(&dev->trim_list), space, list_entry); 2882 ExFreePool(s); 2883 } 2884 2885 dev->num_trim_entries = 0; 2886 } 2887 2888 static NTSTATUS try_consolidation(device_extension* Vcb, UINT64 flags, chunk** newchunk) { 2889 NTSTATUS Status; 2890 BOOL changed; 2891 LIST_ENTRY* le; 2892 chunk* rc; 2893 2894 // FIXME - allow with metadata chunks? 2895 2896 while (TRUE) { 2897 rc = NULL; 2898 2899 ExAcquireResourceSharedLite(&Vcb->tree_lock, TRUE); 2900 2901 ExAcquireResourceSharedLite(&Vcb->chunk_lock, TRUE); 2902 2903 // choose the least-used chunk we haven't looked at yet 2904 le = Vcb->chunks.Flink; 2905 while (le != &Vcb->chunks) { 2906 chunk* c = CONTAINING_RECORD(le, chunk, list_entry); 2907 2908 // FIXME - skip full-size chunks over e.g. 90% full? 2909 if (c->chunk_item->type & BLOCK_FLAG_DATA && !c->readonly && c->balance_num != Vcb->balance.balance_num && (!rc || c->used < rc->used)) 2910 rc = c; 2911 2912 le = le->Flink; 2913 } 2914 2915 ExReleaseResourceLite(&Vcb->chunk_lock); 2916 2917 if (!rc) { 2918 ExReleaseResourceLite(&Vcb->tree_lock); 2919 break; 2920 } 2921 2922 if (rc->list_entry_balance.Flink) { 2923 RemoveEntryList(&rc->list_entry_balance); 2924 Vcb->balance.chunks_left--; 2925 } 2926 2927 rc->list_entry_balance.Flink = (LIST_ENTRY*)1; // so it doesn't get dropped 2928 rc->reloc = TRUE; 2929 2930 ExReleaseResourceLite(&Vcb->tree_lock); 2931 2932 do { 2933 changed = FALSE; 2934 2935 Status = balance_data_chunk(Vcb, rc, &changed); 2936 if (!NT_SUCCESS(Status)) { 2937 ERR("balance_data_chunk returned %08x\n", Status); 2938 Vcb->balance.status = Status; 2939 rc->list_entry_balance.Flink = NULL; 2940 rc->reloc = FALSE; 2941 return Status; 2942 } 2943 2944 KeWaitForSingleObject(&Vcb->balance.event, Executive, KernelMode, FALSE, NULL); 2945 2946 if (Vcb->readonly) 2947 Vcb->balance.stopping = TRUE; 2948 2949 if (Vcb->balance.stopping) 2950 return STATUS_SUCCESS; 2951 } while (changed); 2952 2953 rc->list_entry_balance.Flink = NULL; 2954 2955 rc->changed = TRUE; 2956 rc->space_changed = TRUE; 2957 rc->balance_num = Vcb->balance.balance_num; 2958 2959 Status = do_write(Vcb, NULL); 2960 if (!NT_SUCCESS(Status)) { 2961 ERR("do_write returned %08x\n", Status); 2962 return Status; 2963 } 2964 } 2965 2966 ExAcquireResourceExclusiveLite(&Vcb->chunk_lock, TRUE); 2967 2968 Status = alloc_chunk(Vcb, flags, &rc, TRUE); 2969 2970 ExReleaseResourceLite(&Vcb->chunk_lock); 2971 2972 if (NT_SUCCESS(Status)) { 2973 *newchunk = rc; 2974 return Status; 2975 } else { 2976 ERR("alloc_chunk returned %08x\n", Status); 2977 return Status; 2978 } 2979 } 2980 2981 static NTSTATUS regenerate_space_list(device_extension* Vcb, device* dev) { 2982 LIST_ENTRY* le; 2983 2984 while (!IsListEmpty(&dev->space)) { 2985 space* s = CONTAINING_RECORD(RemoveHeadList(&dev->space), space, list_entry); 2986 2987 ExFreePool(s); 2988 } 2989 2990 // The Linux driver doesn't like to allocate chunks within the first megabyte of a device. 2991 2992 space_list_add2(&dev->space, NULL, 0x100000, dev->devitem.num_bytes - 0x100000, NULL, NULL); 2993 2994 le = Vcb->chunks.Flink; 2995 while (le != &Vcb->chunks) { 2996 UINT16 n; 2997 chunk* c = CONTAINING_RECORD(le, chunk, list_entry); 2998 CHUNK_ITEM_STRIPE* cis = (CHUNK_ITEM_STRIPE*)&c->chunk_item[1]; 2999 3000 for (n = 0; n < c->chunk_item->num_stripes; n++) { 3001 UINT64 stripe_size = 0; 3002 3003 if (cis[n].dev_id == dev->devitem.dev_id) { 3004 if (stripe_size == 0) { 3005 UINT16 factor; 3006 3007 if (c->chunk_item->type & BLOCK_FLAG_RAID0) 3008 factor = c->chunk_item->num_stripes; 3009 else if (c->chunk_item->type & BLOCK_FLAG_RAID10) 3010 factor = c->chunk_item->num_stripes / c->chunk_item->sub_stripes; 3011 else if (c->chunk_item->type & BLOCK_FLAG_RAID5) 3012 factor = c->chunk_item->num_stripes - 1; 3013 else if (c->chunk_item->type & BLOCK_FLAG_RAID6) 3014 factor = c->chunk_item->num_stripes - 2; 3015 else // SINGLE, DUP, RAID1 3016 factor = 1; 3017 3018 stripe_size = c->chunk_item->size / factor; 3019 } 3020 3021 space_list_subtract2(&dev->space, NULL, cis[n].offset, stripe_size, NULL, NULL); 3022 } 3023 } 3024 3025 le = le->Flink; 3026 } 3027 3028 return STATUS_SUCCESS; 3029 } 3030 3031 _Function_class_(KSTART_ROUTINE) 3032 #ifndef __REACTOS__ 3033 void balance_thread(void* context) { 3034 #else 3035 void NTAPI balance_thread(void* context) { 3036 #endif 3037 device_extension* Vcb = (device_extension*)context; 3038 LIST_ENTRY chunks; 3039 LIST_ENTRY* le; 3040 UINT64 num_chunks[3], okay_metadata_chunks = 0, okay_data_chunks = 0, okay_system_chunks = 0; 3041 UINT64 old_data_flags = 0, old_metadata_flags = 0, old_system_flags = 0; 3042 NTSTATUS Status; 3043 3044 Vcb->balance.balance_num++; 3045 3046 Vcb->balance.stopping = FALSE; 3047 KeInitializeEvent(&Vcb->balance.finished, NotificationEvent, FALSE); 3048 3049 if (Vcb->balance.opts[BALANCE_OPTS_DATA].flags & BTRFS_BALANCE_OPTS_ENABLED && Vcb->balance.opts[BALANCE_OPTS_DATA].flags & BTRFS_BALANCE_OPTS_CONVERT) { 3050 old_data_flags = Vcb->data_flags; 3051 Vcb->data_flags = BLOCK_FLAG_DATA | (Vcb->balance.opts[BALANCE_OPTS_DATA].convert == BLOCK_FLAG_SINGLE ? 0 : Vcb->balance.opts[BALANCE_OPTS_DATA].convert); 3052 3053 FsRtlNotifyVolumeEvent(Vcb->root_file, FSRTL_VOLUME_CHANGE_SIZE); 3054 } 3055 3056 if (Vcb->balance.opts[BALANCE_OPTS_METADATA].flags & BTRFS_BALANCE_OPTS_ENABLED && Vcb->balance.opts[BALANCE_OPTS_METADATA].flags & BTRFS_BALANCE_OPTS_CONVERT) { 3057 old_metadata_flags = Vcb->metadata_flags; 3058 Vcb->metadata_flags = BLOCK_FLAG_METADATA | (Vcb->balance.opts[BALANCE_OPTS_METADATA].convert == BLOCK_FLAG_SINGLE ? 0 : Vcb->balance.opts[BALANCE_OPTS_METADATA].convert); 3059 } 3060 3061 if (Vcb->balance.opts[BALANCE_OPTS_SYSTEM].flags & BTRFS_BALANCE_OPTS_ENABLED && Vcb->balance.opts[BALANCE_OPTS_SYSTEM].flags & BTRFS_BALANCE_OPTS_CONVERT) { 3062 old_system_flags = Vcb->system_flags; 3063 Vcb->system_flags = BLOCK_FLAG_SYSTEM | (Vcb->balance.opts[BALANCE_OPTS_SYSTEM].convert == BLOCK_FLAG_SINGLE ? 0 : Vcb->balance.opts[BALANCE_OPTS_SYSTEM].convert); 3064 } 3065 3066 if (Vcb->superblock.incompat_flags & BTRFS_INCOMPAT_FLAGS_MIXED_GROUPS) { 3067 if (Vcb->balance.opts[BALANCE_OPTS_DATA].flags & BTRFS_BALANCE_OPTS_ENABLED) 3068 RtlCopyMemory(&Vcb->balance.opts[BALANCE_OPTS_METADATA], &Vcb->balance.opts[BALANCE_OPTS_DATA], sizeof(btrfs_balance_opts)); 3069 else if (Vcb->balance.opts[BALANCE_OPTS_METADATA].flags & BTRFS_BALANCE_OPTS_ENABLED) 3070 RtlCopyMemory(&Vcb->balance.opts[BALANCE_OPTS_DATA], &Vcb->balance.opts[BALANCE_OPTS_METADATA], sizeof(btrfs_balance_opts)); 3071 } 3072 3073 num_chunks[0] = num_chunks[1] = num_chunks[2] = 0; 3074 Vcb->balance.total_chunks = Vcb->balance.chunks_left = 0; 3075 3076 InitializeListHead(&chunks); 3077 3078 // FIXME - what are we supposed to do with limit_start? 3079 3080 if (!Vcb->readonly) { 3081 if (!Vcb->balance.removing && !Vcb->balance.shrinking) { 3082 Status = add_balance_item(Vcb); 3083 if (!NT_SUCCESS(Status)) { 3084 ERR("add_balance_item returned %08x\n", Status); 3085 Vcb->balance.status = Status; 3086 goto end; 3087 } 3088 } else { 3089 if (Vcb->need_write) { 3090 Status = do_write(Vcb, NULL); 3091 3092 free_trees(Vcb); 3093 3094 if (!NT_SUCCESS(Status)) { 3095 ERR("do_write returned %08x\n", Status); 3096 Vcb->balance.status = Status; 3097 goto end; 3098 } 3099 } 3100 } 3101 } 3102 3103 KeWaitForSingleObject(&Vcb->balance.event, Executive, KernelMode, FALSE, NULL); 3104 3105 if (Vcb->balance.stopping) 3106 goto end; 3107 3108 ExAcquireResourceSharedLite(&Vcb->chunk_lock, TRUE); 3109 3110 le = Vcb->chunks.Flink; 3111 while (le != &Vcb->chunks) { 3112 chunk* c = CONTAINING_RECORD(le, chunk, list_entry); 3113 UINT8 sort; 3114 3115 ExAcquireResourceExclusiveLite(&c->lock, TRUE); 3116 3117 if (c->chunk_item->type & BLOCK_FLAG_DATA) 3118 sort = BALANCE_OPTS_DATA; 3119 else if (c->chunk_item->type & BLOCK_FLAG_METADATA) 3120 sort = BALANCE_OPTS_METADATA; 3121 else if (c->chunk_item->type & BLOCK_FLAG_SYSTEM) 3122 sort = BALANCE_OPTS_SYSTEM; 3123 else { 3124 ERR("unexpected chunk type %llx\n", c->chunk_item->type); 3125 ExReleaseResourceLite(&c->lock); 3126 break; 3127 } 3128 3129 if ((!(Vcb->balance.opts[sort].flags & BTRFS_BALANCE_OPTS_LIMIT) || num_chunks[sort] < Vcb->balance.opts[sort].limit_end) && 3130 should_balance_chunk(Vcb, sort, c)) { 3131 InsertTailList(&chunks, &c->list_entry_balance); 3132 3133 num_chunks[sort]++; 3134 Vcb->balance.total_chunks++; 3135 Vcb->balance.chunks_left++; 3136 } else if (sort == BALANCE_OPTS_METADATA) 3137 okay_metadata_chunks++; 3138 else if (sort == BALANCE_OPTS_DATA) 3139 okay_data_chunks++; 3140 else if (sort == BALANCE_OPTS_SYSTEM) 3141 okay_system_chunks++; 3142 3143 if (!c->cache_loaded) { 3144 Status = load_cache_chunk(Vcb, c, NULL); 3145 3146 if (!NT_SUCCESS(Status)) { 3147 ERR("load_cache_chunk returned %08x\n", Status); 3148 Vcb->balance.status = Status; 3149 ExReleaseResourceLite(&c->lock); 3150 ExReleaseResourceLite(&Vcb->chunk_lock); 3151 goto end; 3152 } 3153 } 3154 3155 ExReleaseResourceLite(&c->lock); 3156 3157 le = le->Flink; 3158 } 3159 3160 ExReleaseResourceLite(&Vcb->chunk_lock); 3161 3162 // If we're doing a full balance, try and allocate a new chunk now, before we mess things up 3163 if (okay_metadata_chunks == 0 || okay_data_chunks == 0 || okay_system_chunks == 0) { 3164 BOOL consolidated = FALSE; 3165 chunk* c; 3166 3167 if (okay_metadata_chunks == 0) { 3168 ExAcquireResourceExclusiveLite(&Vcb->chunk_lock, TRUE); 3169 3170 Status = alloc_chunk(Vcb, Vcb->metadata_flags, &c, TRUE); 3171 if (NT_SUCCESS(Status)) 3172 c->balance_num = Vcb->balance.balance_num; 3173 else if (Status != STATUS_DISK_FULL || consolidated) { 3174 ERR("alloc_chunk returned %08x\n", Status); 3175 ExReleaseResourceLite(&Vcb->chunk_lock); 3176 Vcb->balance.status = Status; 3177 goto end; 3178 } 3179 3180 ExReleaseResourceLite(&Vcb->chunk_lock); 3181 3182 if (Status == STATUS_DISK_FULL) { 3183 Status = try_consolidation(Vcb, Vcb->metadata_flags, &c); 3184 if (!NT_SUCCESS(Status)) { 3185 ERR("try_consolidation returned %08x\n", Status); 3186 Vcb->balance.status = Status; 3187 goto end; 3188 } else 3189 c->balance_num = Vcb->balance.balance_num; 3190 3191 consolidated = TRUE; 3192 3193 if (Vcb->balance.stopping) 3194 goto end; 3195 } 3196 } 3197 3198 if (okay_data_chunks == 0) { 3199 ExAcquireResourceExclusiveLite(&Vcb->chunk_lock, TRUE); 3200 3201 Status = alloc_chunk(Vcb, Vcb->data_flags, &c, TRUE); 3202 if (NT_SUCCESS(Status)) 3203 c->balance_num = Vcb->balance.balance_num; 3204 else if (Status != STATUS_DISK_FULL || consolidated) { 3205 ERR("alloc_chunk returned %08x\n", Status); 3206 ExReleaseResourceLite(&Vcb->chunk_lock); 3207 Vcb->balance.status = Status; 3208 goto end; 3209 } 3210 3211 ExReleaseResourceLite(&Vcb->chunk_lock); 3212 3213 if (Status == STATUS_DISK_FULL) { 3214 Status = try_consolidation(Vcb, Vcb->data_flags, &c); 3215 if (!NT_SUCCESS(Status)) { 3216 ERR("try_consolidation returned %08x\n", Status); 3217 Vcb->balance.status = Status; 3218 goto end; 3219 } else 3220 c->balance_num = Vcb->balance.balance_num; 3221 3222 consolidated = TRUE; 3223 3224 if (Vcb->balance.stopping) 3225 goto end; 3226 } 3227 } 3228 3229 if (okay_system_chunks == 0) { 3230 ExAcquireResourceExclusiveLite(&Vcb->chunk_lock, TRUE); 3231 3232 Status = alloc_chunk(Vcb, Vcb->system_flags, &c, TRUE); 3233 if (NT_SUCCESS(Status)) 3234 c->balance_num = Vcb->balance.balance_num; 3235 else if (Status != STATUS_DISK_FULL || consolidated) { 3236 ERR("alloc_chunk returned %08x\n", Status); 3237 ExReleaseResourceLite(&Vcb->chunk_lock); 3238 Vcb->balance.status = Status; 3239 goto end; 3240 } 3241 3242 ExReleaseResourceLite(&Vcb->chunk_lock); 3243 3244 if (Status == STATUS_DISK_FULL) { 3245 Status = try_consolidation(Vcb, Vcb->system_flags, &c); 3246 if (!NT_SUCCESS(Status)) { 3247 ERR("try_consolidation returned %08x\n", Status); 3248 Vcb->balance.status = Status; 3249 goto end; 3250 } else 3251 c->balance_num = Vcb->balance.balance_num; 3252 3253 consolidated = TRUE; 3254 3255 if (Vcb->balance.stopping) 3256 goto end; 3257 } 3258 } 3259 } 3260 3261 ExAcquireResourceSharedLite(&Vcb->chunk_lock, TRUE); 3262 3263 le = chunks.Flink; 3264 while (le != &chunks) { 3265 chunk* c = CONTAINING_RECORD(le, chunk, list_entry_balance); 3266 3267 c->reloc = TRUE; 3268 3269 le = le->Flink; 3270 } 3271 3272 ExReleaseResourceLite(&Vcb->chunk_lock); 3273 3274 // do data chunks before metadata 3275 le = chunks.Flink; 3276 while (le != &chunks) { 3277 chunk* c = CONTAINING_RECORD(le, chunk, list_entry_balance); 3278 LIST_ENTRY* le2 = le->Flink; 3279 3280 if (c->chunk_item->type & BLOCK_FLAG_DATA) { 3281 BOOL changed; 3282 3283 do { 3284 changed = FALSE; 3285 3286 Status = balance_data_chunk(Vcb, c, &changed); 3287 if (!NT_SUCCESS(Status)) { 3288 ERR("balance_data_chunk returned %08x\n", Status); 3289 Vcb->balance.status = Status; 3290 goto end; 3291 } 3292 3293 KeWaitForSingleObject(&Vcb->balance.event, Executive, KernelMode, FALSE, NULL); 3294 3295 if (Vcb->readonly) 3296 Vcb->balance.stopping = TRUE; 3297 3298 if (Vcb->balance.stopping) 3299 break; 3300 } while (changed); 3301 3302 c->changed = TRUE; 3303 c->space_changed = TRUE; 3304 } 3305 3306 if (Vcb->balance.stopping) 3307 goto end; 3308 3309 if (c->chunk_item->type & BLOCK_FLAG_DATA && 3310 (!(Vcb->balance.opts[BALANCE_OPTS_METADATA].flags & BTRFS_BALANCE_OPTS_ENABLED) || !(c->chunk_item->type & BLOCK_FLAG_METADATA))) { 3311 RemoveEntryList(&c->list_entry_balance); 3312 c->list_entry_balance.Flink = NULL; 3313 3314 Vcb->balance.chunks_left--; 3315 } 3316 3317 le = le2; 3318 } 3319 3320 // do metadata chunks 3321 while (!IsListEmpty(&chunks)) { 3322 chunk* c; 3323 BOOL changed; 3324 3325 le = RemoveHeadList(&chunks); 3326 c = CONTAINING_RECORD(le, chunk, list_entry_balance); 3327 3328 if (c->chunk_item->type & BLOCK_FLAG_METADATA || c->chunk_item->type & BLOCK_FLAG_SYSTEM) { 3329 do { 3330 Status = balance_metadata_chunk(Vcb, c, &changed); 3331 if (!NT_SUCCESS(Status)) { 3332 ERR("balance_metadata_chunk returned %08x\n", Status); 3333 Vcb->balance.status = Status; 3334 goto end; 3335 } 3336 3337 KeWaitForSingleObject(&Vcb->balance.event, Executive, KernelMode, FALSE, NULL); 3338 3339 if (Vcb->readonly) 3340 Vcb->balance.stopping = TRUE; 3341 3342 if (Vcb->balance.stopping) 3343 break; 3344 } while (changed); 3345 3346 c->changed = TRUE; 3347 c->space_changed = TRUE; 3348 } 3349 3350 if (Vcb->balance.stopping) 3351 break; 3352 3353 c->list_entry_balance.Flink = NULL; 3354 3355 Vcb->balance.chunks_left--; 3356 } 3357 3358 end: 3359 if (!Vcb->readonly) { 3360 if (Vcb->balance.stopping || !NT_SUCCESS(Vcb->balance.status)) { 3361 le = chunks.Flink; 3362 while (le != &chunks) { 3363 chunk* c = CONTAINING_RECORD(le, chunk, list_entry_balance); 3364 c->reloc = FALSE; 3365 3366 le = le->Flink; 3367 c->list_entry_balance.Flink = NULL; 3368 } 3369 3370 if (old_data_flags != 0) 3371 Vcb->data_flags = old_data_flags; 3372 3373 if (old_metadata_flags != 0) 3374 Vcb->metadata_flags = old_metadata_flags; 3375 3376 if (old_system_flags != 0) 3377 Vcb->system_flags = old_system_flags; 3378 } 3379 3380 if (Vcb->balance.removing) { 3381 device* dev = NULL; 3382 3383 ExAcquireResourceExclusiveLite(&Vcb->tree_lock, TRUE); 3384 3385 le = Vcb->devices.Flink; 3386 while (le != &Vcb->devices) { 3387 device* dev2 = CONTAINING_RECORD(le, device, list_entry); 3388 3389 if (dev2->devitem.dev_id == Vcb->balance.opts[0].devid) { 3390 dev = dev2; 3391 break; 3392 } 3393 3394 le = le->Flink; 3395 } 3396 3397 if (dev) { 3398 if (Vcb->balance.chunks_left == 0) { 3399 Status = finish_removing_device(Vcb, dev); 3400 3401 if (!NT_SUCCESS(Status)) { 3402 ERR("finish_removing_device returned %08x\n", Status); 3403 dev->reloc = FALSE; 3404 } 3405 } else 3406 dev->reloc = FALSE; 3407 } 3408 3409 ExReleaseResourceLite(&Vcb->tree_lock); 3410 } else if (Vcb->balance.shrinking) { 3411 device* dev = NULL; 3412 3413 ExAcquireResourceExclusiveLite(&Vcb->tree_lock, TRUE); 3414 3415 le = Vcb->devices.Flink; 3416 while (le != &Vcb->devices) { 3417 device* dev2 = CONTAINING_RECORD(le, device, list_entry); 3418 3419 if (dev2->devitem.dev_id == Vcb->balance.opts[0].devid) { 3420 dev = dev2; 3421 break; 3422 } 3423 3424 le = le->Flink; 3425 } 3426 3427 if (!dev) { 3428 ERR("could not find device %llx\n", Vcb->balance.opts[0].devid); 3429 Vcb->balance.status = STATUS_INTERNAL_ERROR; 3430 } 3431 3432 if (Vcb->balance.stopping || !NT_SUCCESS(Vcb->balance.status)) { 3433 if (dev) { 3434 Status = regenerate_space_list(Vcb, dev); 3435 if (!NT_SUCCESS(Status)) 3436 WARN("regenerate_space_list returned %08x\n", Status); 3437 } 3438 } else { 3439 UINT64 old_size; 3440 3441 old_size = dev->devitem.num_bytes; 3442 dev->devitem.num_bytes = Vcb->balance.opts[0].drange_start; 3443 3444 Status = update_dev_item(Vcb, dev, NULL); 3445 if (!NT_SUCCESS(Status)) { 3446 ERR("update_dev_item returned %08x\n", Status); 3447 dev->devitem.num_bytes = old_size; 3448 Vcb->balance.status = Status; 3449 3450 Status = regenerate_space_list(Vcb, dev); 3451 if (!NT_SUCCESS(Status)) 3452 WARN("regenerate_space_list returned %08x\n", Status); 3453 } else { 3454 Vcb->superblock.total_bytes -= old_size - dev->devitem.num_bytes; 3455 3456 Status = do_write(Vcb, NULL); 3457 if (!NT_SUCCESS(Status)) 3458 ERR("do_write returned %08x\n", Status); 3459 3460 free_trees(Vcb); 3461 } 3462 } 3463 3464 ExReleaseResourceLite(&Vcb->tree_lock); 3465 3466 if (!Vcb->balance.stopping && NT_SUCCESS(Vcb->balance.status)) 3467 FsRtlNotifyVolumeEvent(Vcb->root_file, FSRTL_VOLUME_CHANGE_SIZE); 3468 } else { 3469 Status = remove_balance_item(Vcb); 3470 if (!NT_SUCCESS(Status)) { 3471 ERR("remove_balance_item returned %08x\n", Status); 3472 goto end; 3473 } 3474 } 3475 3476 if (Vcb->trim && !Vcb->options.no_trim) { 3477 ExAcquireResourceExclusiveLite(&Vcb->tree_lock, TRUE); 3478 3479 le = Vcb->devices.Flink; 3480 while (le != &Vcb->devices) { 3481 device* dev2 = CONTAINING_RECORD(le, device, list_entry); 3482 3483 if (dev2->devobj && !dev2->readonly && dev2->trim) 3484 trim_unalloc_space(Vcb, dev2); 3485 3486 le = le->Flink; 3487 } 3488 3489 ExReleaseResourceLite(&Vcb->tree_lock); 3490 } 3491 } 3492 3493 ZwClose(Vcb->balance.thread); 3494 Vcb->balance.thread = NULL; 3495 3496 KeSetEvent(&Vcb->balance.finished, 0, FALSE); 3497 } 3498 3499 NTSTATUS start_balance(device_extension* Vcb, void* data, ULONG length, KPROCESSOR_MODE processor_mode) { 3500 NTSTATUS Status; 3501 btrfs_start_balance* bsb = (btrfs_start_balance*)data; 3502 UINT8 i; 3503 3504 if (length < sizeof(btrfs_start_balance) || !data) 3505 return STATUS_INVALID_PARAMETER; 3506 3507 if (!SeSinglePrivilegeCheck(RtlConvertLongToLuid(SE_MANAGE_VOLUME_PRIVILEGE), processor_mode)) 3508 return STATUS_PRIVILEGE_NOT_HELD; 3509 3510 if (Vcb->locked) { 3511 WARN("cannot start balance while locked\n"); 3512 return STATUS_DEVICE_NOT_READY; 3513 } 3514 3515 if (Vcb->scrub.thread) { 3516 WARN("cannot start balance while scrub running\n"); 3517 return STATUS_DEVICE_NOT_READY; 3518 } 3519 3520 if (Vcb->balance.thread) { 3521 WARN("balance already running\n"); 3522 return STATUS_DEVICE_NOT_READY; 3523 } 3524 3525 if (Vcb->readonly) 3526 return STATUS_MEDIA_WRITE_PROTECTED; 3527 3528 if (!(bsb->opts[BALANCE_OPTS_DATA].flags & BTRFS_BALANCE_OPTS_ENABLED) && 3529 !(bsb->opts[BALANCE_OPTS_METADATA].flags & BTRFS_BALANCE_OPTS_ENABLED) && 3530 !(bsb->opts[BALANCE_OPTS_SYSTEM].flags & BTRFS_BALANCE_OPTS_ENABLED)) 3531 return STATUS_SUCCESS; 3532 3533 for (i = 0; i < 3; i++) { 3534 if (bsb->opts[i].flags & BTRFS_BALANCE_OPTS_ENABLED) { 3535 if (bsb->opts[i].flags & BTRFS_BALANCE_OPTS_PROFILES) { 3536 bsb->opts[i].profiles &= BLOCK_FLAG_RAID0 | BLOCK_FLAG_RAID1 | BLOCK_FLAG_DUPLICATE | BLOCK_FLAG_RAID10 | 3537 BLOCK_FLAG_RAID5 | BLOCK_FLAG_RAID6 | BLOCK_FLAG_SINGLE; 3538 3539 if (bsb->opts[i].profiles == 0) 3540 return STATUS_INVALID_PARAMETER; 3541 } 3542 3543 if (bsb->opts[i].flags & BTRFS_BALANCE_OPTS_DEVID) { 3544 if (bsb->opts[i].devid == 0) 3545 return STATUS_INVALID_PARAMETER; 3546 } 3547 3548 if (bsb->opts[i].flags & BTRFS_BALANCE_OPTS_DRANGE) { 3549 if (bsb->opts[i].drange_start > bsb->opts[i].drange_end) 3550 return STATUS_INVALID_PARAMETER; 3551 } 3552 3553 if (bsb->opts[i].flags & BTRFS_BALANCE_OPTS_VRANGE) { 3554 if (bsb->opts[i].vrange_start > bsb->opts[i].vrange_end) 3555 return STATUS_INVALID_PARAMETER; 3556 } 3557 3558 if (bsb->opts[i].flags & BTRFS_BALANCE_OPTS_LIMIT) { 3559 bsb->opts[i].limit_start = max(1, bsb->opts[i].limit_start); 3560 bsb->opts[i].limit_end = max(1, bsb->opts[i].limit_end); 3561 3562 if (bsb->opts[i].limit_start > bsb->opts[i].limit_end) 3563 return STATUS_INVALID_PARAMETER; 3564 } 3565 3566 if (bsb->opts[i].flags & BTRFS_BALANCE_OPTS_STRIPES) { 3567 bsb->opts[i].stripes_start = max(1, bsb->opts[i].stripes_start); 3568 bsb->opts[i].stripes_end = max(1, bsb->opts[i].stripes_end); 3569 3570 if (bsb->opts[i].stripes_start > bsb->opts[i].stripes_end) 3571 return STATUS_INVALID_PARAMETER; 3572 } 3573 3574 if (bsb->opts[i].flags & BTRFS_BALANCE_OPTS_USAGE) { 3575 bsb->opts[i].usage_start = min(100, bsb->opts[i].stripes_start); 3576 bsb->opts[i].usage_end = min(100, bsb->opts[i].stripes_end); 3577 3578 if (bsb->opts[i].stripes_start > bsb->opts[i].stripes_end) 3579 return STATUS_INVALID_PARAMETER; 3580 } 3581 3582 if (bsb->opts[i].flags & BTRFS_BALANCE_OPTS_CONVERT) { 3583 if (bsb->opts[i].convert != BLOCK_FLAG_RAID0 && bsb->opts[i].convert != BLOCK_FLAG_RAID1 && 3584 bsb->opts[i].convert != BLOCK_FLAG_DUPLICATE && bsb->opts[i].convert != BLOCK_FLAG_RAID10 && 3585 bsb->opts[i].convert != BLOCK_FLAG_RAID5 && bsb->opts[i].convert != BLOCK_FLAG_RAID6 && 3586 bsb->opts[i].convert != BLOCK_FLAG_SINGLE) 3587 return STATUS_INVALID_PARAMETER; 3588 } 3589 } 3590 } 3591 3592 RtlCopyMemory(&Vcb->balance.opts[BALANCE_OPTS_DATA], &bsb->opts[BALANCE_OPTS_DATA], sizeof(btrfs_balance_opts)); 3593 RtlCopyMemory(&Vcb->balance.opts[BALANCE_OPTS_METADATA], &bsb->opts[BALANCE_OPTS_METADATA], sizeof(btrfs_balance_opts)); 3594 RtlCopyMemory(&Vcb->balance.opts[BALANCE_OPTS_SYSTEM], &bsb->opts[BALANCE_OPTS_SYSTEM], sizeof(btrfs_balance_opts)); 3595 3596 Vcb->balance.paused = FALSE; 3597 Vcb->balance.removing = FALSE; 3598 Vcb->balance.shrinking = FALSE; 3599 Vcb->balance.status = STATUS_SUCCESS; 3600 KeInitializeEvent(&Vcb->balance.event, NotificationEvent, !Vcb->balance.paused); 3601 3602 Status = PsCreateSystemThread(&Vcb->balance.thread, 0, NULL, NULL, NULL, balance_thread, Vcb); 3603 if (!NT_SUCCESS(Status)) { 3604 ERR("PsCreateSystemThread returned %08x\n", Status); 3605 return Status; 3606 } 3607 3608 return STATUS_SUCCESS; 3609 } 3610 3611 NTSTATUS look_for_balance_item(_Requires_lock_held_(_Curr_->tree_lock) device_extension* Vcb) { 3612 KEY searchkey; 3613 traverse_ptr tp; 3614 NTSTATUS Status; 3615 BALANCE_ITEM* bi; 3616 int i; 3617 3618 searchkey.obj_id = BALANCE_ITEM_ID; 3619 searchkey.obj_type = TYPE_TEMP_ITEM; 3620 searchkey.offset = 0; 3621 3622 Status = find_item(Vcb, Vcb->root_root, &tp, &searchkey, FALSE, NULL); 3623 if (!NT_SUCCESS(Status)) { 3624 ERR("find_item returned %08x\n", Status); 3625 return Status; 3626 } 3627 3628 if (keycmp(tp.item->key, searchkey)) { 3629 TRACE("no balance item found\n"); 3630 return STATUS_NOT_FOUND; 3631 } 3632 3633 if (tp.item->size < sizeof(BALANCE_ITEM)) { 3634 WARN("(%llx,%x,%llx) was %u bytes, expected %u\n", tp.item->key.obj_id, tp.item->key.obj_type, tp.item->key.offset, 3635 tp.item->size, sizeof(BALANCE_ITEM)); 3636 return STATUS_INTERNAL_ERROR; 3637 } 3638 3639 bi = (BALANCE_ITEM*)tp.item->data; 3640 3641 if (bi->flags & BALANCE_FLAGS_DATA) 3642 load_balance_args(&Vcb->balance.opts[BALANCE_OPTS_DATA], &bi->data); 3643 3644 if (bi->flags & BALANCE_FLAGS_METADATA) 3645 load_balance_args(&Vcb->balance.opts[BALANCE_OPTS_METADATA], &bi->metadata); 3646 3647 if (bi->flags & BALANCE_FLAGS_SYSTEM) 3648 load_balance_args(&Vcb->balance.opts[BALANCE_OPTS_SYSTEM], &bi->system); 3649 3650 // do the heuristics that Linux driver does 3651 3652 for (i = 0; i < 3; i++) { 3653 if (Vcb->balance.opts[i].flags & BTRFS_BALANCE_OPTS_ENABLED) { 3654 // if converting, don't redo chunks already done 3655 3656 if (Vcb->balance.opts[i].flags & BTRFS_BALANCE_OPTS_CONVERT) 3657 Vcb->balance.opts[i].flags |= BTRFS_BALANCE_OPTS_SOFT; 3658 3659 // don't balance chunks more than 90% filled - presumably these 3660 // have already been done 3661 3662 if (!(Vcb->balance.opts[i].flags & BTRFS_BALANCE_OPTS_USAGE) && 3663 !(Vcb->balance.opts[i].flags & BTRFS_BALANCE_OPTS_CONVERT) 3664 ) { 3665 Vcb->balance.opts[i].flags |= BTRFS_BALANCE_OPTS_USAGE; 3666 Vcb->balance.opts[i].usage_start = 0; 3667 Vcb->balance.opts[i].usage_end = 90; 3668 } 3669 } 3670 } 3671 3672 if (Vcb->readonly || Vcb->options.skip_balance) 3673 Vcb->balance.paused = TRUE; 3674 else 3675 Vcb->balance.paused = FALSE; 3676 3677 Vcb->balance.removing = FALSE; 3678 Vcb->balance.shrinking = FALSE; 3679 Vcb->balance.status = STATUS_SUCCESS; 3680 KeInitializeEvent(&Vcb->balance.event, NotificationEvent, !Vcb->balance.paused); 3681 3682 Status = PsCreateSystemThread(&Vcb->balance.thread, 0, NULL, NULL, NULL, balance_thread, Vcb); 3683 if (!NT_SUCCESS(Status)) { 3684 ERR("PsCreateSystemThread returned %08x\n", Status); 3685 return Status; 3686 } 3687 3688 return STATUS_SUCCESS; 3689 } 3690 3691 NTSTATUS query_balance(device_extension* Vcb, void* data, ULONG length) { 3692 btrfs_query_balance* bqb = (btrfs_query_balance*)data; 3693 3694 if (length < sizeof(btrfs_query_balance) || !data) 3695 return STATUS_INVALID_PARAMETER; 3696 3697 if (!Vcb->balance.thread) { 3698 bqb->status = BTRFS_BALANCE_STOPPED; 3699 3700 if (!NT_SUCCESS(Vcb->balance.status)) { 3701 bqb->status |= BTRFS_BALANCE_ERROR; 3702 bqb->error = Vcb->balance.status; 3703 } 3704 3705 return STATUS_SUCCESS; 3706 } 3707 3708 bqb->status = Vcb->balance.paused ? BTRFS_BALANCE_PAUSED : BTRFS_BALANCE_RUNNING; 3709 3710 if (Vcb->balance.removing) 3711 bqb->status |= BTRFS_BALANCE_REMOVAL; 3712 3713 if (Vcb->balance.shrinking) 3714 bqb->status |= BTRFS_BALANCE_SHRINKING; 3715 3716 if (!NT_SUCCESS(Vcb->balance.status)) 3717 bqb->status |= BTRFS_BALANCE_ERROR; 3718 3719 bqb->chunks_left = Vcb->balance.chunks_left; 3720 bqb->total_chunks = Vcb->balance.total_chunks; 3721 bqb->error = Vcb->balance.status; 3722 RtlCopyMemory(&bqb->data_opts, &Vcb->balance.opts[BALANCE_OPTS_DATA], sizeof(btrfs_balance_opts)); 3723 RtlCopyMemory(&bqb->metadata_opts, &Vcb->balance.opts[BALANCE_OPTS_METADATA], sizeof(btrfs_balance_opts)); 3724 RtlCopyMemory(&bqb->system_opts, &Vcb->balance.opts[BALANCE_OPTS_SYSTEM], sizeof(btrfs_balance_opts)); 3725 3726 return STATUS_SUCCESS; 3727 } 3728 3729 NTSTATUS pause_balance(device_extension* Vcb, KPROCESSOR_MODE processor_mode) { 3730 if (!SeSinglePrivilegeCheck(RtlConvertLongToLuid(SE_MANAGE_VOLUME_PRIVILEGE), processor_mode)) 3731 return STATUS_PRIVILEGE_NOT_HELD; 3732 3733 if (!Vcb->balance.thread) 3734 return STATUS_DEVICE_NOT_READY; 3735 3736 if (Vcb->balance.paused) 3737 return STATUS_DEVICE_NOT_READY; 3738 3739 Vcb->balance.paused = TRUE; 3740 KeClearEvent(&Vcb->balance.event); 3741 3742 return STATUS_SUCCESS; 3743 } 3744 3745 NTSTATUS resume_balance(device_extension* Vcb, KPROCESSOR_MODE processor_mode) { 3746 if (!SeSinglePrivilegeCheck(RtlConvertLongToLuid(SE_MANAGE_VOLUME_PRIVILEGE), processor_mode)) 3747 return STATUS_PRIVILEGE_NOT_HELD; 3748 3749 if (!Vcb->balance.thread) 3750 return STATUS_DEVICE_NOT_READY; 3751 3752 if (!Vcb->balance.paused) 3753 return STATUS_DEVICE_NOT_READY; 3754 3755 if (Vcb->readonly) 3756 return STATUS_MEDIA_WRITE_PROTECTED; 3757 3758 Vcb->balance.paused = FALSE; 3759 KeSetEvent(&Vcb->balance.event, 0, FALSE); 3760 3761 return STATUS_SUCCESS; 3762 } 3763 3764 NTSTATUS stop_balance(device_extension* Vcb, KPROCESSOR_MODE processor_mode) { 3765 if (!SeSinglePrivilegeCheck(RtlConvertLongToLuid(SE_MANAGE_VOLUME_PRIVILEGE), processor_mode)) 3766 return STATUS_PRIVILEGE_NOT_HELD; 3767 3768 if (!Vcb->balance.thread) 3769 return STATUS_DEVICE_NOT_READY; 3770 3771 Vcb->balance.paused = FALSE; 3772 Vcb->balance.stopping = TRUE; 3773 Vcb->balance.status = STATUS_SUCCESS; 3774 KeSetEvent(&Vcb->balance.event, 0, FALSE); 3775 3776 return STATUS_SUCCESS; 3777 } 3778 3779 NTSTATUS remove_device(device_extension* Vcb, void* data, ULONG length, KPROCESSOR_MODE processor_mode) { 3780 UINT64 devid; 3781 LIST_ENTRY* le; 3782 device* dev = NULL; 3783 NTSTATUS Status; 3784 int i; 3785 UINT64 num_rw_devices; 3786 3787 TRACE("(%p, %p, %x)\n", Vcb, data, length); 3788 3789 if (!SeSinglePrivilegeCheck(RtlConvertLongToLuid(SE_MANAGE_VOLUME_PRIVILEGE), processor_mode)) 3790 return STATUS_PRIVILEGE_NOT_HELD; 3791 3792 if (length < sizeof(UINT64)) 3793 return STATUS_INVALID_PARAMETER; 3794 3795 devid = *(UINT64*)data; 3796 3797 ExAcquireResourceSharedLite(&Vcb->tree_lock, TRUE); 3798 3799 if (Vcb->readonly) { 3800 ExReleaseResourceLite(&Vcb->tree_lock); 3801 return STATUS_MEDIA_WRITE_PROTECTED; 3802 } 3803 3804 num_rw_devices = 0; 3805 3806 le = Vcb->devices.Flink; 3807 while (le != &Vcb->devices) { 3808 device* dev2 = CONTAINING_RECORD(le, device, list_entry); 3809 3810 if (dev2->devitem.dev_id == devid) 3811 dev = dev2; 3812 3813 if (!dev2->readonly) 3814 num_rw_devices++; 3815 3816 le = le->Flink; 3817 } 3818 3819 if (!dev) { 3820 ExReleaseResourceLite(&Vcb->tree_lock); 3821 WARN("device %llx not found\n", devid); 3822 return STATUS_NOT_FOUND; 3823 } 3824 3825 if (!dev->readonly) { 3826 if (num_rw_devices == 1) { 3827 ExReleaseResourceLite(&Vcb->tree_lock); 3828 WARN("not removing last non-readonly device\n"); 3829 return STATUS_INVALID_PARAMETER; 3830 } 3831 3832 if (num_rw_devices == 4 && 3833 ((Vcb->data_flags & BLOCK_FLAG_RAID10 || Vcb->metadata_flags & BLOCK_FLAG_RAID10 || Vcb->system_flags & BLOCK_FLAG_RAID10) || 3834 (Vcb->data_flags & BLOCK_FLAG_RAID6 || Vcb->metadata_flags & BLOCK_FLAG_RAID6 || Vcb->system_flags & BLOCK_FLAG_RAID6)) 3835 ) { 3836 ExReleaseResourceLite(&Vcb->tree_lock); 3837 ERR("would not be enough devices to satisfy RAID requirement (RAID6/10)\n"); 3838 return STATUS_CANNOT_DELETE; 3839 } 3840 3841 if (num_rw_devices == 3 && (Vcb->data_flags & BLOCK_FLAG_RAID5 || Vcb->metadata_flags & BLOCK_FLAG_RAID5 || Vcb->system_flags & BLOCK_FLAG_RAID5)) { 3842 ExReleaseResourceLite(&Vcb->tree_lock); 3843 ERR("would not be enough devices to satisfy RAID requirement (RAID5)\n"); 3844 return STATUS_CANNOT_DELETE; 3845 } 3846 3847 if (num_rw_devices == 2 && 3848 ((Vcb->data_flags & BLOCK_FLAG_RAID0 || Vcb->metadata_flags & BLOCK_FLAG_RAID0 || Vcb->system_flags & BLOCK_FLAG_RAID0) || 3849 (Vcb->data_flags & BLOCK_FLAG_RAID1 || Vcb->metadata_flags & BLOCK_FLAG_RAID1 || Vcb->system_flags & BLOCK_FLAG_RAID1)) 3850 ) { 3851 ExReleaseResourceLite(&Vcb->tree_lock); 3852 ERR("would not be enough devices to satisfy RAID requirement (RAID0/1)\n"); 3853 return STATUS_CANNOT_DELETE; 3854 } 3855 } 3856 3857 ExReleaseResourceLite(&Vcb->tree_lock); 3858 3859 if (Vcb->balance.thread) { 3860 WARN("balance already running\n"); 3861 return STATUS_DEVICE_NOT_READY; 3862 } 3863 3864 dev->reloc = TRUE; 3865 3866 RtlZeroMemory(Vcb->balance.opts, sizeof(btrfs_balance_opts) * 3); 3867 3868 for (i = 0; i < 3; i++) { 3869 Vcb->balance.opts[i].flags = BTRFS_BALANCE_OPTS_ENABLED | BTRFS_BALANCE_OPTS_DEVID; 3870 Vcb->balance.opts[i].devid = devid; 3871 } 3872 3873 Vcb->balance.paused = FALSE; 3874 Vcb->balance.removing = TRUE; 3875 Vcb->balance.shrinking = FALSE; 3876 Vcb->balance.status = STATUS_SUCCESS; 3877 KeInitializeEvent(&Vcb->balance.event, NotificationEvent, !Vcb->balance.paused); 3878 3879 Status = PsCreateSystemThread(&Vcb->balance.thread, 0, NULL, NULL, NULL, balance_thread, Vcb); 3880 if (!NT_SUCCESS(Status)) { 3881 ERR("PsCreateSystemThread returned %08x\n", Status); 3882 dev->reloc = FALSE; 3883 return Status; 3884 } 3885 3886 return STATUS_SUCCESS; 3887 } 3888