1 /* Copyright (c) Mark Harmstone 2016-17
2 *
3 * This file is part of WinBtrfs.
4 *
5 * WinBtrfs is free software: you can redistribute it and/or modify
6 * it under the terms of the GNU Lesser General Public Licence as published by
7 * the Free Software Foundation, either version 3 of the Licence, or
8 * (at your option) any later version.
9 *
10 * WinBtrfs is distributed in the hope that it will be useful,
11 * but WITHOUT ANY WARRANTY; without even the implied warranty of
12 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
13 * GNU Lesser General Public Licence for more details.
14 *
15 * You should have received a copy of the GNU Lesser General Public Licence
16 * along with WinBtrfs. If not, see <http://www.gnu.org/licenses/>. */
17
18 #include "btrfs_drv.h"
19 #include "crc32c.h"
20
21 // Number of increments in the size of each cache inode, in sectors. Should
22 // this be a constant number of sectors, a constant 256 KB, or what?
23 #define CACHE_INCREMENTS 64
24
remove_free_space_inode(device_extension * Vcb,uint64_t inode,LIST_ENTRY * batchlist,PIRP Irp,LIST_ENTRY * rollback)25 static NTSTATUS remove_free_space_inode(device_extension* Vcb, uint64_t inode, LIST_ENTRY* batchlist, PIRP Irp, LIST_ENTRY* rollback) {
26 NTSTATUS Status;
27 fcb* fcb;
28
29 Status = open_fcb(Vcb, Vcb->root_root, inode, BTRFS_TYPE_FILE, NULL, false, NULL, &fcb, PagedPool, Irp);
30 if (!NT_SUCCESS(Status)) {
31 ERR("open_fcb returned %08lx\n", Status);
32 return Status;
33 }
34
35 mark_fcb_dirty(fcb);
36
37 if (fcb->inode_item.st_size > 0) {
38 Status = excise_extents(fcb->Vcb, fcb, 0, sector_align(fcb->inode_item.st_size, fcb->Vcb->superblock.sector_size), Irp, rollback);
39 if (!NT_SUCCESS(Status)) {
40 ERR("excise_extents returned %08lx\n", Status);
41 return Status;
42 }
43 }
44
45 fcb->deleted = true;
46
47 Status = flush_fcb(fcb, false, batchlist, Irp);
48 if (!NT_SUCCESS(Status)) {
49 ERR("flush_fcb returned %08lx\n", Status);
50 free_fcb(fcb);
51 return Status;
52 }
53
54 free_fcb(fcb);
55
56 return STATUS_SUCCESS;
57 }
58
clear_free_space_cache(device_extension * Vcb,LIST_ENTRY * batchlist,PIRP Irp)59 NTSTATUS clear_free_space_cache(device_extension* Vcb, LIST_ENTRY* batchlist, PIRP Irp) {
60 KEY searchkey;
61 traverse_ptr tp, next_tp;
62 NTSTATUS Status;
63 bool b;
64 LIST_ENTRY rollback;
65
66 InitializeListHead(&rollback);
67
68 searchkey.obj_id = FREE_SPACE_CACHE_ID;
69 searchkey.obj_type = 0;
70 searchkey.offset = 0;
71
72 Status = find_item(Vcb, Vcb->root_root, &tp, &searchkey, false, Irp);
73 if (!NT_SUCCESS(Status)) {
74 ERR("error - find_item returned %08lx\n", Status);
75 return Status;
76 }
77
78 do {
79 if (tp.item->key.obj_id > searchkey.obj_id || (tp.item->key.obj_id == searchkey.obj_id && tp.item->key.obj_type > searchkey.obj_type))
80 break;
81
82 if (tp.item->key.obj_id == searchkey.obj_id && tp.item->key.obj_type == searchkey.obj_type) {
83 Status = delete_tree_item(Vcb, &tp);
84 if (!NT_SUCCESS(Status)) {
85 ERR("delete_tree_item returned %08lx\n", Status);
86 return Status;
87 }
88
89 if (tp.item->size >= sizeof(FREE_SPACE_ITEM)) {
90 FREE_SPACE_ITEM* fsi = (FREE_SPACE_ITEM*)tp.item->data;
91
92 if (fsi->key.obj_type != TYPE_INODE_ITEM)
93 WARN("key (%I64x,%x,%I64x) does not point to an INODE_ITEM\n", fsi->key.obj_id, fsi->key.obj_type, fsi->key.offset);
94 else {
95 LIST_ENTRY* le;
96
97 Status = remove_free_space_inode(Vcb, fsi->key.obj_id, batchlist, Irp, &rollback);
98
99 if (!NT_SUCCESS(Status))
100 ERR("remove_free_space_inode for (%I64x,%x,%I64x) returned %08lx\n", fsi->key.obj_id, fsi->key.obj_type, fsi->key.offset, Status);
101
102 le = Vcb->chunks.Flink;
103 while (le != &Vcb->chunks) {
104 chunk* c = CONTAINING_RECORD(le, chunk, list_entry);
105
106 if (c->offset == tp.item->key.offset && c->cache) {
107 reap_fcb(c->cache);
108 c->cache = NULL;
109 }
110
111 le = le->Flink;
112 }
113 }
114 } else
115 WARN("(%I64x,%x,%I64x) was %u bytes, expected %Iu\n", tp.item->key.obj_id, tp.item->key.obj_type, tp.item->key.offset, tp.item->size, sizeof(FREE_SPACE_ITEM));
116 }
117
118 b = find_next_item(Vcb, &tp, &next_tp, false, Irp);
119 if (b)
120 tp = next_tp;
121 } while (b);
122
123 Status = STATUS_SUCCESS;
124
125 if (NT_SUCCESS(Status))
126 clear_rollback(&rollback);
127 else
128 do_rollback(Vcb, &rollback);
129
130 if (Vcb->space_root) {
131 searchkey.obj_id = 0;
132 searchkey.obj_type = 0;
133 searchkey.offset = 0;
134
135 Status = find_item(Vcb, Vcb->space_root, &tp, &searchkey, false, Irp);
136 if (!NT_SUCCESS(Status)) {
137 ERR("find_item returned %08lx\n", Status);
138 return Status;
139 }
140
141 do {
142 Status = delete_tree_item(Vcb, &tp);
143 if (!NT_SUCCESS(Status)) {
144 ERR("delete_tree_item returned %08lx\n", Status);
145 return Status;
146 }
147
148 b = find_next_item(Vcb, &tp, &next_tp, false, Irp);
149 if (b)
150 tp = next_tp;
151 } while (b);
152 }
153
154 // regenerate free space tree
155 if (Vcb->superblock.compat_ro_flags & BTRFS_COMPAT_RO_FLAGS_FREE_SPACE_CACHE) {
156 LIST_ENTRY* le;
157
158 ExAcquireResourceSharedLite(&Vcb->chunk_lock, true);
159
160 le = Vcb->chunks.Flink;
161 while (le != &Vcb->chunks) {
162 chunk* c = CONTAINING_RECORD(le, chunk, list_entry);
163
164 if (!c->cache_loaded) {
165 acquire_chunk_lock(c, Vcb);
166
167 Status = load_cache_chunk(Vcb, c, NULL);
168 if (!NT_SUCCESS(Status)) {
169 ERR("load_cache_chunk(%I64x) returned %08lx\n", c->offset, Status);
170 release_chunk_lock(c, Vcb);
171 ExReleaseResourceLite(&Vcb->chunk_lock);
172 return Status;
173 }
174
175 c->changed = true;
176 c->space_changed = true;
177
178 release_chunk_lock(c, Vcb);
179 }
180
181 le = le->Flink;
182 }
183
184 ExReleaseResourceLite(&Vcb->chunk_lock);
185 }
186
187 return Status;
188 }
189
add_space_entry(LIST_ENTRY * list,LIST_ENTRY * list_size,uint64_t offset,uint64_t size)190 NTSTATUS add_space_entry(LIST_ENTRY* list, LIST_ENTRY* list_size, uint64_t offset, uint64_t size) {
191 space* s;
192
193 s = ExAllocatePoolWithTag(PagedPool, sizeof(space), ALLOC_TAG);
194
195 if (!s) {
196 ERR("out of memory\n");
197 return STATUS_INSUFFICIENT_RESOURCES;
198 }
199
200 s->address = offset;
201 s->size = size;
202
203 if (IsListEmpty(list))
204 InsertTailList(list, &s->list_entry);
205 else {
206 space* s2 = CONTAINING_RECORD(list->Blink, space, list_entry);
207
208 if (s2->address < offset)
209 InsertTailList(list, &s->list_entry);
210 else {
211 LIST_ENTRY* le;
212
213 le = list->Flink;
214 while (le != list) {
215 s2 = CONTAINING_RECORD(le, space, list_entry);
216
217 if (s2->address > offset) {
218 InsertTailList(le, &s->list_entry);
219 goto size;
220 }
221
222 le = le->Flink;
223 }
224 }
225 }
226
227 size:
228 if (!list_size)
229 return STATUS_SUCCESS;
230
231 if (IsListEmpty(list_size))
232 InsertTailList(list_size, &s->list_entry_size);
233 else {
234 space* s2 = CONTAINING_RECORD(list_size->Blink, space, list_entry_size);
235
236 if (s2->size >= size)
237 InsertTailList(list_size, &s->list_entry_size);
238 else {
239 LIST_ENTRY* le;
240
241 le = list_size->Flink;
242 while (le != list_size) {
243 s2 = CONTAINING_RECORD(le, space, list_entry_size);
244
245 if (s2->size <= size) {
246 InsertHeadList(le->Blink, &s->list_entry_size);
247 return STATUS_SUCCESS;
248 }
249
250 le = le->Flink;
251 }
252 }
253 }
254
255 return STATUS_SUCCESS;
256 }
257
load_free_space_bitmap(device_extension * Vcb,chunk * c,uint64_t offset,void * data,uint64_t * total_space)258 static void load_free_space_bitmap(device_extension* Vcb, chunk* c, uint64_t offset, void* data, uint64_t* total_space) {
259 RTL_BITMAP bmph;
260 uint32_t i, len, *dwords = data;
261 ULONG runlength, index;
262
263 // flip bits
264 for (i = 0; i < Vcb->superblock.sector_size / sizeof(uint32_t); i++) {
265 dwords[i] = ~dwords[i];
266 }
267
268 len = Vcb->superblock.sector_size * 8;
269
270 RtlInitializeBitMap(&bmph, data, len);
271
272 index = 0;
273 runlength = RtlFindFirstRunClear(&bmph, &index);
274
275 while (runlength != 0) {
276 uint64_t addr, length;
277
278 if (index >= len)
279 break;
280
281 if (index + runlength >= len) {
282 runlength = len - index;
283
284 if (runlength == 0)
285 break;
286 }
287
288 addr = offset + (index << Vcb->sector_shift);
289 length = runlength << Vcb->sector_shift;
290
291 add_space_entry(&c->space, &c->space_size, addr, length);
292 index += runlength;
293 *total_space += length;
294
295 runlength = RtlFindNextForwardRunClear(&bmph, index, &index);
296 }
297 }
298
order_space_entry(space * s,LIST_ENTRY * list_size)299 static void order_space_entry(space* s, LIST_ENTRY* list_size) {
300 LIST_ENTRY* le;
301
302 if (IsListEmpty(list_size)) {
303 InsertHeadList(list_size, &s->list_entry_size);
304 return;
305 }
306
307 le = list_size->Flink;
308
309 while (le != list_size) {
310 space* s2 = CONTAINING_RECORD(le, space, list_entry_size);
311
312 if (s2->size <= s->size) {
313 InsertHeadList(le->Blink, &s->list_entry_size);
314 return;
315 }
316
317 le = le->Flink;
318 }
319
320 InsertTailList(list_size, &s->list_entry_size);
321 }
322
323 typedef struct {
324 uint64_t stripe;
325 LIST_ENTRY list_entry;
326 } superblock_stripe;
327
add_superblock_stripe(LIST_ENTRY * stripes,uint64_t off,uint64_t len)328 static NTSTATUS add_superblock_stripe(LIST_ENTRY* stripes, uint64_t off, uint64_t len) {
329 uint64_t i;
330
331 for (i = 0; i < len; i++) {
332 LIST_ENTRY* le;
333 superblock_stripe* ss;
334 bool ignore = false;
335
336 le = stripes->Flink;
337 while (le != stripes) {
338 ss = CONTAINING_RECORD(le, superblock_stripe, list_entry);
339
340 if (ss->stripe == off + i) {
341 ignore = true;
342 break;
343 }
344
345 le = le->Flink;
346 }
347
348 if (ignore)
349 continue;
350
351 ss = ExAllocatePoolWithTag(PagedPool, sizeof(superblock_stripe), ALLOC_TAG);
352 if (!ss) {
353 ERR("out of memory\n");
354 return STATUS_INSUFFICIENT_RESOURCES;
355 }
356
357 ss->stripe = off + i;
358 InsertTailList(stripes, &ss->list_entry);
359 }
360
361 return STATUS_SUCCESS;
362 }
363
get_superblock_size(chunk * c,uint64_t * size)364 static NTSTATUS get_superblock_size(chunk* c, uint64_t* size) {
365 NTSTATUS Status;
366 CHUNK_ITEM* ci = c->chunk_item;
367 CHUNK_ITEM_STRIPE* cis = (CHUNK_ITEM_STRIPE*)&ci[1];
368 uint64_t off_start, off_end, space = 0;
369 uint16_t i = 0, j;
370 LIST_ENTRY stripes;
371
372 InitializeListHead(&stripes);
373
374 while (superblock_addrs[i] != 0) {
375 if (ci->type & BLOCK_FLAG_RAID0 || ci->type & BLOCK_FLAG_RAID10) {
376 for (j = 0; j < ci->num_stripes; j++) {
377 ULONG sub_stripes = max(ci->sub_stripes, 1);
378
379 if (cis[j].offset + (ci->size * ci->num_stripes / sub_stripes) > superblock_addrs[i] && cis[j].offset <= superblock_addrs[i] + sizeof(superblock)) {
380 off_start = superblock_addrs[i] - cis[j].offset;
381 off_start -= off_start % ci->stripe_length;
382 off_start *= ci->num_stripes / sub_stripes;
383 off_start += (j / sub_stripes) * ci->stripe_length;
384
385 off_end = off_start + ci->stripe_length;
386
387 Status = add_superblock_stripe(&stripes, off_start / ci->stripe_length, 1);
388 if (!NT_SUCCESS(Status)) {
389 ERR("add_superblock_stripe returned %08lx\n", Status);
390 goto end;
391 }
392 }
393 }
394 } else if (ci->type & BLOCK_FLAG_RAID5) {
395 for (j = 0; j < ci->num_stripes; j++) {
396 uint64_t stripe_size = ci->size / (ci->num_stripes - 1);
397
398 if (cis[j].offset + stripe_size > superblock_addrs[i] && cis[j].offset <= superblock_addrs[i] + sizeof(superblock)) {
399 off_start = superblock_addrs[i] - cis[j].offset;
400 off_start -= off_start % (ci->stripe_length * (ci->num_stripes - 1));
401 off_start *= ci->num_stripes - 1;
402
403 off_end = off_start + (ci->stripe_length * (ci->num_stripes - 1));
404
405 Status = add_superblock_stripe(&stripes, off_start / ci->stripe_length, (off_end - off_start) / ci->stripe_length);
406 if (!NT_SUCCESS(Status)) {
407 ERR("add_superblock_stripe returned %08lx\n", Status);
408 goto end;
409 }
410 }
411 }
412 } else if (ci->type & BLOCK_FLAG_RAID6) {
413 for (j = 0; j < ci->num_stripes; j++) {
414 uint64_t stripe_size = ci->size / (ci->num_stripes - 2);
415
416 if (cis[j].offset + stripe_size > superblock_addrs[i] && cis[j].offset <= superblock_addrs[i] + sizeof(superblock)) {
417 off_start = superblock_addrs[i] - cis[j].offset;
418 off_start -= off_start % (ci->stripe_length * (ci->num_stripes - 2));
419 off_start *= ci->num_stripes - 2;
420
421 off_end = off_start + (ci->stripe_length * (ci->num_stripes - 2));
422
423 Status = add_superblock_stripe(&stripes, off_start / ci->stripe_length, (off_end - off_start) / ci->stripe_length);
424 if (!NT_SUCCESS(Status)) {
425 ERR("add_superblock_stripe returned %08lx\n", Status);
426 goto end;
427 }
428 }
429 }
430 } else { // SINGLE, DUPLICATE, RAID1, RAID1C3, RAID1C4
431 for (j = 0; j < ci->num_stripes; j++) {
432 if (cis[j].offset + ci->size > superblock_addrs[i] && cis[j].offset <= superblock_addrs[i] + sizeof(superblock)) {
433 off_start = ((superblock_addrs[i] - cis[j].offset) / c->chunk_item->stripe_length) * c->chunk_item->stripe_length;
434 off_end = sector_align(superblock_addrs[i] - cis[j].offset + sizeof(superblock), c->chunk_item->stripe_length);
435
436 Status = add_superblock_stripe(&stripes, off_start / ci->stripe_length, (off_end - off_start) / ci->stripe_length);
437 if (!NT_SUCCESS(Status)) {
438 ERR("add_superblock_stripe returned %08lx\n", Status);
439 goto end;
440 }
441 }
442 }
443 }
444
445 i++;
446 }
447
448 Status = STATUS_SUCCESS;
449
450 end:
451 while (!IsListEmpty(&stripes)) {
452 LIST_ENTRY* le = RemoveHeadList(&stripes);
453 superblock_stripe* ss = CONTAINING_RECORD(le, superblock_stripe, list_entry);
454
455 space++;
456
457 ExFreePool(ss);
458 }
459
460 if (NT_SUCCESS(Status))
461 *size = space * ci->stripe_length;
462
463 return Status;
464 }
465
load_stored_free_space_cache(device_extension * Vcb,chunk * c,bool load_only,PIRP Irp)466 NTSTATUS load_stored_free_space_cache(device_extension* Vcb, chunk* c, bool load_only, PIRP Irp) {
467 KEY searchkey;
468 traverse_ptr tp;
469 FREE_SPACE_ITEM* fsi;
470 uint64_t inode, *generation;
471 uint8_t* data;
472 NTSTATUS Status;
473 uint32_t *checksums, crc32, num_sectors, num_valid_sectors, size;
474 FREE_SPACE_ENTRY* fse;
475 uint64_t num_entries, num_bitmaps, extent_length, bmpnum, off, total_space = 0, superblock_size;
476 LIST_ENTRY *le, rollback;
477
478 // FIXME - does this break if Vcb->superblock.sector_size is not 4096?
479
480 TRACE("(%p, %I64x)\n", Vcb, c->offset);
481
482 searchkey.obj_id = FREE_SPACE_CACHE_ID;
483 searchkey.obj_type = 0;
484 searchkey.offset = c->offset;
485
486 Status = find_item(Vcb, Vcb->root_root, &tp, &searchkey, false, Irp);
487 if (!NT_SUCCESS(Status)) {
488 ERR("error - find_item returned %08lx\n", Status);
489 return Status;
490 }
491
492 if (keycmp(tp.item->key, searchkey)) {
493 TRACE("(%I64x,%x,%I64x) not found\n", searchkey.obj_id, searchkey.obj_type, searchkey.offset);
494 return STATUS_NOT_FOUND;
495 }
496
497 if (tp.item->size < sizeof(FREE_SPACE_ITEM)) {
498 WARN("(%I64x,%x,%I64x) was %u bytes, expected %Iu\n", tp.item->key.obj_id, tp.item->key.obj_type, tp.item->key.offset, tp.item->size, sizeof(FREE_SPACE_ITEM));
499 return STATUS_NOT_FOUND;
500 }
501
502 fsi = (FREE_SPACE_ITEM*)tp.item->data;
503
504 if (fsi->key.obj_type != TYPE_INODE_ITEM) {
505 WARN("cache pointed to something other than an INODE_ITEM\n");
506 return STATUS_NOT_FOUND;
507 }
508
509 inode = fsi->key.obj_id;
510 num_entries = fsi->num_entries;
511 num_bitmaps = fsi->num_bitmaps;
512
513 Status = open_fcb(Vcb, Vcb->root_root, inode, BTRFS_TYPE_FILE, NULL, false, NULL, &c->cache, PagedPool, Irp);
514 if (!NT_SUCCESS(Status)) {
515 ERR("open_fcb returned %08lx\n", Status);
516 return STATUS_NOT_FOUND;
517 }
518
519 if (load_only)
520 return STATUS_SUCCESS;
521
522 if (c->cache->inode_item.st_size == 0) {
523 WARN("cache had zero length\n");
524 free_fcb(c->cache);
525 c->cache = NULL;
526 return STATUS_NOT_FOUND;
527 }
528
529 c->cache->inode_item.flags |= BTRFS_INODE_NODATACOW;
530
531 if (num_entries == 0 && num_bitmaps == 0)
532 return STATUS_SUCCESS;
533
534 size = (uint32_t)sector_align(c->cache->inode_item.st_size, Vcb->superblock.sector_size);
535
536 data = ExAllocatePoolWithTag(PagedPool, size, ALLOC_TAG);
537
538 if (!data) {
539 ERR("out of memory\n");
540 free_fcb(c->cache);
541 c->cache = NULL;
542 return STATUS_INSUFFICIENT_RESOURCES;
543 }
544
545 if (c->chunk_item->size < 0x6400000) { // 100 MB
546 WARN("deleting free space cache for chunk smaller than 100MB\n");
547 goto clearcache;
548 }
549
550 Status = read_file(c->cache, data, 0, c->cache->inode_item.st_size, NULL, NULL);
551 if (!NT_SUCCESS(Status)) {
552 ERR("read_file returned %08lx\n", Status);
553 ExFreePool(data);
554
555 c->cache->deleted = true;
556 mark_fcb_dirty(c->cache);
557
558 free_fcb(c->cache);
559 c->cache = NULL;
560 return STATUS_NOT_FOUND;
561 }
562
563 if (size > c->cache->inode_item.st_size)
564 RtlZeroMemory(&data[c->cache->inode_item.st_size], (ULONG)(size - c->cache->inode_item.st_size));
565
566 num_sectors = size >> Vcb->sector_shift;
567
568 generation = (uint64_t*)(data + (num_sectors * sizeof(uint32_t)));
569
570 if (*generation != fsi->generation) {
571 WARN("free space cache generation for %I64x was %I64x, expected %I64x\n", c->offset, *generation, fsi->generation);
572 goto clearcache;
573 }
574
575 extent_length = (num_sectors * sizeof(uint32_t)) + sizeof(uint64_t) + (num_entries * sizeof(FREE_SPACE_ENTRY));
576
577 num_valid_sectors = (ULONG)((sector_align(extent_length, Vcb->superblock.sector_size) >> Vcb->sector_shift) + num_bitmaps);
578
579 if (num_valid_sectors > num_sectors) {
580 ERR("free space cache for %I64x was %u sectors, expected at least %u\n", c->offset, num_sectors, num_valid_sectors);
581 goto clearcache;
582 }
583
584 checksums = (uint32_t*)data;
585
586 for (uint32_t i = 0; i < num_valid_sectors; i++) {
587 if (i << Vcb->sector_shift > sizeof(uint32_t) * num_sectors)
588 crc32 = ~calc_crc32c(0xffffffff, &data[i << Vcb->sector_shift], Vcb->superblock.sector_size);
589 else if ((i + 1) << Vcb->sector_shift < sizeof(uint32_t) * num_sectors)
590 crc32 = 0; // FIXME - test this
591 else
592 crc32 = ~calc_crc32c(0xffffffff, &data[sizeof(uint32_t) * num_sectors], ((i + 1) << Vcb->sector_shift) - (sizeof(uint32_t) * num_sectors));
593
594 if (crc32 != checksums[i]) {
595 WARN("checksum %u was %08x, expected %08x\n", i, crc32, checksums[i]);
596 goto clearcache;
597 }
598 }
599
600 off = (sizeof(uint32_t) * num_sectors) + sizeof(uint64_t);
601
602 bmpnum = 0;
603 for (uint32_t i = 0; i < num_entries; i++) {
604 if ((off + sizeof(FREE_SPACE_ENTRY)) >> Vcb->sector_shift != off >> Vcb->sector_shift)
605 off = sector_align(off, Vcb->superblock.sector_size);
606
607 fse = (FREE_SPACE_ENTRY*)&data[off];
608
609 if (fse->type == FREE_SPACE_EXTENT) {
610 Status = add_space_entry(&c->space, &c->space_size, fse->offset, fse->size);
611 if (!NT_SUCCESS(Status)) {
612 ERR("add_space_entry returned %08lx\n", Status);
613 ExFreePool(data);
614 return Status;
615 }
616
617 total_space += fse->size;
618 } else if (fse->type != FREE_SPACE_BITMAP) {
619 ERR("unknown free-space type %x\n", fse->type);
620 }
621
622 off += sizeof(FREE_SPACE_ENTRY);
623 }
624
625 if (num_bitmaps > 0) {
626 bmpnum = sector_align(off, Vcb->superblock.sector_size) >> Vcb->sector_shift;
627 off = (sizeof(uint32_t) * num_sectors) + sizeof(uint64_t);
628
629 for (uint32_t i = 0; i < num_entries; i++) {
630 if ((off + sizeof(FREE_SPACE_ENTRY)) >> Vcb->sector_shift != off >> Vcb->sector_shift)
631 off = sector_align(off, Vcb->superblock.sector_size);
632
633 fse = (FREE_SPACE_ENTRY*)&data[off];
634
635 if (fse->type == FREE_SPACE_BITMAP) {
636 // FIXME - make sure we don't overflow the buffer here
637 load_free_space_bitmap(Vcb, c, fse->offset, &data[bmpnum << Vcb->sector_shift], &total_space);
638 bmpnum++;
639 }
640
641 off += sizeof(FREE_SPACE_ENTRY);
642 }
643 }
644
645 // do sanity check
646
647 Status = get_superblock_size(c, &superblock_size);
648 if (!NT_SUCCESS(Status)) {
649 ERR("get_superblock_size returned %08lx\n", Status);
650 ExFreePool(data);
651 return Status;
652 }
653
654 if (c->chunk_item->size - c->used != total_space + superblock_size) {
655 WARN("invalidating cache for chunk %I64x: space was %I64x, expected %I64x\n", c->offset, total_space + superblock_size, c->chunk_item->size - c->used);
656 goto clearcache;
657 }
658
659 le = c->space.Flink;
660 while (le != &c->space) {
661 space* s = CONTAINING_RECORD(le, space, list_entry);
662 LIST_ENTRY* le2 = le->Flink;
663
664 if (le2 != &c->space) {
665 space* s2 = CONTAINING_RECORD(le2, space, list_entry);
666
667 if (s2->address == s->address + s->size) {
668 s->size += s2->size;
669
670 RemoveEntryList(&s2->list_entry);
671 RemoveEntryList(&s2->list_entry_size);
672 ExFreePool(s2);
673
674 RemoveEntryList(&s->list_entry_size);
675 order_space_entry(s, &c->space_size);
676
677 le2 = le;
678 }
679 }
680
681 le = le2;
682 }
683
684 ExFreePool(data);
685
686 return STATUS_SUCCESS;
687
688 clearcache:
689 ExFreePool(data);
690
691 InitializeListHead(&rollback);
692
693 Status = delete_tree_item(Vcb, &tp);
694 if (!NT_SUCCESS(Status)) {
695 ERR("delete_tree_item returned %08lx\n", Status);
696 return Status;
697 }
698
699 Status = excise_extents(Vcb, c->cache, 0, c->cache->inode_item.st_size, Irp, &rollback);
700 if (!NT_SUCCESS(Status)) {
701 ERR("excise_extents returned %08lx\n", Status);
702 do_rollback(Vcb, &rollback);
703 return Status;
704 }
705
706 clear_rollback(&rollback);
707
708 c->cache->deleted = true;
709 mark_fcb_dirty(c->cache);
710
711 c->old_cache = c->cache;
712 c->cache = NULL;
713
714 le = c->space.Flink;
715 while (le != &c->space) {
716 space* s = CONTAINING_RECORD(le, space, list_entry);
717 LIST_ENTRY* le2 = le->Flink;
718
719 RemoveEntryList(&s->list_entry);
720 RemoveEntryList(&s->list_entry_size);
721 ExFreePool(s);
722
723 le = le2;
724 }
725
726 return STATUS_NOT_FOUND;
727 }
728
load_stored_free_space_tree(device_extension * Vcb,chunk * c,PIRP Irp)729 static NTSTATUS load_stored_free_space_tree(device_extension* Vcb, chunk* c, PIRP Irp) {
730 KEY searchkey;
731 traverse_ptr tp, next_tp;
732 NTSTATUS Status;
733 ULONG* bmparr = NULL;
734 ULONG bmplen = 0;
735 LIST_ENTRY* le;
736
737 TRACE("(%p, %I64x)\n", Vcb, c->offset);
738
739 if (!Vcb->space_root)
740 return STATUS_NOT_FOUND;
741
742 searchkey.obj_id = c->offset;
743 searchkey.obj_type = TYPE_FREE_SPACE_INFO;
744 searchkey.offset = c->chunk_item->size;
745
746 Status = find_item(Vcb, Vcb->space_root, &tp, &searchkey, false, Irp);
747 if (!NT_SUCCESS(Status)) {
748 ERR("find_item returned %08lx\n", Status);
749 return Status;
750 }
751
752 if (keycmp(tp.item->key, searchkey)) {
753 TRACE("(%I64x,%x,%I64x) not found\n", searchkey.obj_id, searchkey.obj_type, searchkey.offset);
754 return STATUS_NOT_FOUND;
755 }
756
757 if (tp.item->size < sizeof(FREE_SPACE_INFO)) {
758 WARN("(%I64x,%x,%I64x) was %u bytes, expected %Iu\n", tp.item->key.obj_id, tp.item->key.obj_type, tp.item->key.offset, tp.item->size, sizeof(FREE_SPACE_INFO));
759 return STATUS_NOT_FOUND;
760 }
761
762 while (find_next_item(Vcb, &tp, &next_tp, false, Irp)) {
763 tp = next_tp;
764
765 if (tp.item->key.obj_id >= c->offset + c->chunk_item->size)
766 break;
767
768 if (tp.item->key.obj_type == TYPE_FREE_SPACE_EXTENT) {
769 Status = add_space_entry(&c->space, &c->space_size, tp.item->key.obj_id, tp.item->key.offset);
770 if (!NT_SUCCESS(Status)) {
771 ERR("add_space_entry returned %08lx\n", Status);
772 if (bmparr) ExFreePool(bmparr);
773 return Status;
774 }
775 } else if (tp.item->key.obj_type == TYPE_FREE_SPACE_BITMAP) {
776 ULONG explen, index, runlength;
777 RTL_BITMAP bmp;
778 uint64_t lastoff;
779 ULONG bmpl;
780
781 explen = (ULONG)(tp.item->key.offset >> Vcb->sector_shift) / 8;
782
783 if (tp.item->size < explen) {
784 WARN("(%I64x,%x,%I64x) was %u bytes, expected %lu\n", tp.item->key.obj_id, tp.item->key.obj_type, tp.item->key.offset, tp.item->size, explen);
785 return STATUS_NOT_FOUND;
786 } else if (tp.item->size == 0) {
787 WARN("(%I64x,%x,%I64x) has size of 0\n", tp.item->key.obj_id, tp.item->key.obj_type, tp.item->key.offset);
788 return STATUS_NOT_FOUND;
789 }
790
791 if (bmplen < tp.item->size) {
792 if (bmparr)
793 ExFreePool(bmparr);
794
795 bmplen = (ULONG)sector_align(tp.item->size, sizeof(ULONG));
796 bmparr = ExAllocatePoolWithTag(PagedPool, bmplen, ALLOC_TAG);
797 if (!bmparr) {
798 ERR("out of memory\n");
799 return STATUS_INSUFFICIENT_RESOURCES;
800 }
801 }
802
803 // We copy the bitmap because it supposedly has to be ULONG-aligned
804 RtlCopyMemory(bmparr, tp.item->data, tp.item->size);
805
806 bmpl = (ULONG)tp.item->key.offset >> Vcb->sector_shift;
807
808 RtlInitializeBitMap(&bmp, bmparr, bmpl);
809
810 lastoff = tp.item->key.obj_id;
811
812 runlength = RtlFindFirstRunClear(&bmp, &index);
813
814 while (runlength != 0) {
815 uint64_t runstart, runend;
816
817 if (index >= bmpl)
818 break;
819
820 if (index + runlength >= bmpl) {
821 runlength = bmpl - index;
822
823 if (runlength == 0)
824 break;
825 }
826
827 runstart = tp.item->key.obj_id + (index << Vcb->sector_shift);
828 runend = runstart + (runlength << Vcb->sector_shift);
829
830 if (runstart > lastoff) {
831 Status = add_space_entry(&c->space, &c->space_size, lastoff, runstart - lastoff);
832 if (!NT_SUCCESS(Status)) {
833 ERR("add_space_entry returned %08lx\n", Status);
834 if (bmparr) ExFreePool(bmparr);
835 return Status;
836 }
837 }
838
839 lastoff = runend;
840
841 runlength = RtlFindNextForwardRunClear(&bmp, index + runlength, &index);
842 }
843
844 if (lastoff < tp.item->key.obj_id + tp.item->key.offset) {
845 Status = add_space_entry(&c->space, &c->space_size, lastoff, tp.item->key.obj_id + tp.item->key.offset - lastoff);
846 if (!NT_SUCCESS(Status)) {
847 ERR("add_space_entry returned %08lx\n", Status);
848 if (bmparr) ExFreePool(bmparr);
849 return Status;
850 }
851 }
852 }
853 }
854
855 if (bmparr)
856 ExFreePool(bmparr);
857
858 le = c->space.Flink;
859 while (le != &c->space) {
860 space* s = CONTAINING_RECORD(le, space, list_entry);
861 LIST_ENTRY* le2 = le->Flink;
862
863 if (le2 != &c->space) {
864 space* s2 = CONTAINING_RECORD(le2, space, list_entry);
865
866 if (s2->address == s->address + s->size) {
867 s->size += s2->size;
868
869 RemoveEntryList(&s2->list_entry);
870 RemoveEntryList(&s2->list_entry_size);
871 ExFreePool(s2);
872
873 RemoveEntryList(&s->list_entry_size);
874 order_space_entry(s, &c->space_size);
875
876 le2 = le;
877 }
878 }
879
880 le = le2;
881 }
882
883 return STATUS_SUCCESS;
884 }
885
load_free_space_cache(device_extension * Vcb,chunk * c,PIRP Irp)886 static NTSTATUS load_free_space_cache(device_extension* Vcb, chunk* c, PIRP Irp) {
887 traverse_ptr tp, next_tp;
888 KEY searchkey;
889 uint64_t lastaddr;
890 bool b;
891 space* s;
892 NTSTATUS Status;
893
894 if (Vcb->superblock.compat_ro_flags & BTRFS_COMPAT_RO_FLAGS_FREE_SPACE_CACHE && Vcb->superblock.compat_ro_flags & BTRFS_COMPAT_RO_FLAGS_FREE_SPACE_CACHE_VALID) {
895 Status = load_stored_free_space_tree(Vcb, c, Irp);
896
897 if (!NT_SUCCESS(Status) && Status != STATUS_NOT_FOUND) {
898 ERR("load_stored_free_space_tree returned %08lx\n", Status);
899 return Status;
900 }
901 } else if (Vcb->superblock.generation - 1 == Vcb->superblock.cache_generation) {
902 Status = load_stored_free_space_cache(Vcb, c, false, Irp);
903
904 if (!NT_SUCCESS(Status) && Status != STATUS_NOT_FOUND) {
905 ERR("load_stored_free_space_cache returned %08lx\n", Status);
906 return Status;
907 }
908 } else
909 Status = STATUS_NOT_FOUND;
910
911 if (Status == STATUS_NOT_FOUND) {
912 TRACE("generating free space cache for chunk %I64x\n", c->offset);
913
914 searchkey.obj_id = c->offset;
915 searchkey.obj_type = TYPE_EXTENT_ITEM;
916 searchkey.offset = 0;
917
918 Status = find_item(Vcb, Vcb->extent_root, &tp, &searchkey, false, Irp);
919 if (!NT_SUCCESS(Status)) {
920 ERR("error - find_item returned %08lx\n", Status);
921 return Status;
922 }
923
924 lastaddr = c->offset;
925
926 do {
927 if (tp.item->key.obj_id >= c->offset + c->chunk_item->size)
928 break;
929
930 if (tp.item->key.obj_id >= c->offset && (tp.item->key.obj_type == TYPE_EXTENT_ITEM || tp.item->key.obj_type == TYPE_METADATA_ITEM)) {
931 if (tp.item->key.obj_id > lastaddr) {
932 s = ExAllocatePoolWithTag(PagedPool, sizeof(space), ALLOC_TAG);
933
934 if (!s) {
935 ERR("out of memory\n");
936 return STATUS_INSUFFICIENT_RESOURCES;
937 }
938
939 s->address = lastaddr;
940 s->size = tp.item->key.obj_id - lastaddr;
941 InsertTailList(&c->space, &s->list_entry);
942
943 order_space_entry(s, &c->space_size);
944
945 TRACE("(%I64x,%I64x)\n", s->address, s->size);
946 }
947
948 if (tp.item->key.obj_type == TYPE_METADATA_ITEM)
949 lastaddr = tp.item->key.obj_id + Vcb->superblock.node_size;
950 else
951 lastaddr = tp.item->key.obj_id + tp.item->key.offset;
952 }
953
954 b = find_next_item(Vcb, &tp, &next_tp, false, Irp);
955 if (b)
956 tp = next_tp;
957 } while (b);
958
959 if (lastaddr < c->offset + c->chunk_item->size) {
960 s = ExAllocatePoolWithTag(PagedPool, sizeof(space), ALLOC_TAG);
961
962 if (!s) {
963 ERR("out of memory\n");
964 return STATUS_INSUFFICIENT_RESOURCES;
965 }
966
967 s->address = lastaddr;
968 s->size = c->offset + c->chunk_item->size - lastaddr;
969 InsertTailList(&c->space, &s->list_entry);
970
971 order_space_entry(s, &c->space_size);
972
973 TRACE("(%I64x,%I64x)\n", s->address, s->size);
974 }
975 }
976
977 return STATUS_SUCCESS;
978 }
979
load_cache_chunk(device_extension * Vcb,chunk * c,PIRP Irp)980 NTSTATUS load_cache_chunk(device_extension* Vcb, chunk* c, PIRP Irp) {
981 NTSTATUS Status;
982
983 if (c->cache_loaded)
984 return STATUS_SUCCESS;
985
986 Status = load_free_space_cache(Vcb, c, Irp);
987 if (!NT_SUCCESS(Status)) {
988 ERR("load_free_space_cache returned %08lx\n", Status);
989 return Status;
990 }
991
992 protect_superblocks(c);
993
994 c->cache_loaded = true;
995
996 return STATUS_SUCCESS;
997 }
998
insert_cache_extent(fcb * fcb,uint64_t start,uint64_t length,LIST_ENTRY * rollback)999 static NTSTATUS insert_cache_extent(fcb* fcb, uint64_t start, uint64_t length, LIST_ENTRY* rollback) {
1000 NTSTATUS Status;
1001 LIST_ENTRY* le = fcb->Vcb->chunks.Flink;
1002 chunk* c;
1003 uint64_t flags;
1004
1005 flags = fcb->Vcb->data_flags;
1006
1007 while (le != &fcb->Vcb->chunks) {
1008 c = CONTAINING_RECORD(le, chunk, list_entry);
1009
1010 if (!c->readonly && !c->reloc) {
1011 acquire_chunk_lock(c, fcb->Vcb);
1012
1013 if (c->chunk_item->type == flags && (c->chunk_item->size - c->used) >= length) {
1014 if (insert_extent_chunk(fcb->Vcb, fcb, c, start, length, false, NULL, NULL, rollback, BTRFS_COMPRESSION_NONE, length, false, 0))
1015 return STATUS_SUCCESS;
1016 }
1017
1018 release_chunk_lock(c, fcb->Vcb);
1019 }
1020
1021 le = le->Flink;
1022 }
1023
1024 Status = alloc_chunk(fcb->Vcb, flags, &c, false);
1025
1026 if (!NT_SUCCESS(Status)) {
1027 ERR("alloc_chunk returned %08lx\n", Status);
1028 return Status;
1029 }
1030
1031 acquire_chunk_lock(c, fcb->Vcb);
1032
1033 if (c->chunk_item->type == flags && (c->chunk_item->size - c->used) >= length) {
1034 if (insert_extent_chunk(fcb->Vcb, fcb, c, start, length, false, NULL, NULL, rollback, BTRFS_COMPRESSION_NONE, length, false, 0))
1035 return STATUS_SUCCESS;
1036 }
1037
1038 release_chunk_lock(c, fcb->Vcb);
1039
1040 return STATUS_DISK_FULL;
1041 }
1042
allocate_cache_chunk(device_extension * Vcb,chunk * c,bool * changed,LIST_ENTRY * batchlist,PIRP Irp,LIST_ENTRY * rollback)1043 static NTSTATUS allocate_cache_chunk(device_extension* Vcb, chunk* c, bool* changed, LIST_ENTRY* batchlist, PIRP Irp, LIST_ENTRY* rollback) {
1044 LIST_ENTRY* le;
1045 NTSTATUS Status;
1046 uint64_t num_entries, new_cache_size, i;
1047 uint32_t num_sectors;
1048 bool realloc_extents = false;
1049
1050 // FIXME - also do bitmaps
1051 // FIXME - make sure this works when sector_size is not 4096
1052
1053 *changed = false;
1054
1055 num_entries = 0;
1056
1057 // num_entries is the number of entries in c->space and c->deleting - it might
1058 // be slightly higher then what we end up writing, but doing it this way is much
1059 // quicker and simpler.
1060 if (!IsListEmpty(&c->space)) {
1061 le = c->space.Flink;
1062 while (le != &c->space) {
1063 num_entries++;
1064
1065 le = le->Flink;
1066 }
1067 }
1068
1069 if (!IsListEmpty(&c->deleting)) {
1070 le = c->deleting.Flink;
1071 while (le != &c->deleting) {
1072 num_entries++;
1073
1074 le = le->Flink;
1075 }
1076 }
1077
1078 new_cache_size = sizeof(uint64_t) + (num_entries * sizeof(FREE_SPACE_ENTRY));
1079
1080 num_sectors = (uint32_t)sector_align(new_cache_size, Vcb->superblock.sector_size) >> Vcb->sector_shift;
1081 num_sectors = (uint32_t)sector_align(num_sectors, CACHE_INCREMENTS);
1082
1083 // adjust for padding
1084 // FIXME - there must be a more efficient way of doing this
1085 new_cache_size = sizeof(uint64_t) + (sizeof(uint32_t) * num_sectors);
1086 for (i = 0; i < num_entries; i++) {
1087 if ((new_cache_size >> Vcb->sector_shift) != ((new_cache_size + sizeof(FREE_SPACE_ENTRY)) >> Vcb->sector_shift))
1088 new_cache_size = sector_align(new_cache_size, Vcb->superblock.sector_size);
1089
1090 new_cache_size += sizeof(FREE_SPACE_ENTRY);
1091 }
1092
1093 new_cache_size = sector_align(new_cache_size, CACHE_INCREMENTS << Vcb->sector_shift);
1094
1095 TRACE("chunk %I64x: cache_size = %I64x, new_cache_size = %I64x\n", c->offset, c->cache ? c->cache->inode_item.st_size : 0, new_cache_size);
1096
1097 if (c->cache) {
1098 if (new_cache_size > c->cache->inode_item.st_size)
1099 realloc_extents = true;
1100 else {
1101 le = c->cache->extents.Flink;
1102
1103 while (le != &c->cache->extents) {
1104 extent* ext = CONTAINING_RECORD(le, extent, list_entry);
1105
1106 if (!ext->ignore && (ext->extent_data.type == EXTENT_TYPE_REGULAR || ext->extent_data.type == EXTENT_TYPE_PREALLOC)) {
1107 EXTENT_DATA2* ed2 = (EXTENT_DATA2*)&ext->extent_data.data[0];
1108
1109 if (ed2->size != 0) {
1110 chunk* c2 = get_chunk_from_address(Vcb, ed2->address);
1111
1112 if (c2 && (c2->readonly || c2->reloc)) {
1113 realloc_extents = true;
1114 break;
1115 }
1116 }
1117 }
1118
1119 le = le->Flink;
1120 }
1121 }
1122 }
1123
1124 if (!c->cache) {
1125 FREE_SPACE_ITEM* fsi;
1126 KEY searchkey;
1127 traverse_ptr tp;
1128
1129 // create new inode
1130
1131 c->cache = create_fcb(Vcb, PagedPool);
1132 if (!c->cache) {
1133 ERR("out of memory\n");
1134 return STATUS_INSUFFICIENT_RESOURCES;
1135 }
1136
1137 c->cache->Vcb = Vcb;
1138
1139 c->cache->inode_item.st_size = new_cache_size;
1140 c->cache->inode_item.st_blocks = new_cache_size;
1141 c->cache->inode_item.st_nlink = 1;
1142 c->cache->inode_item.st_mode = S_IRUSR | S_IWUSR | __S_IFREG;
1143 c->cache->inode_item.flags = BTRFS_INODE_NODATASUM | BTRFS_INODE_NODATACOW | BTRFS_INODE_NOCOMPRESS | BTRFS_INODE_PREALLOC;
1144
1145 c->cache->Header.IsFastIoPossible = fast_io_possible(c->cache);
1146 c->cache->Header.AllocationSize.QuadPart = 0;
1147 c->cache->Header.FileSize.QuadPart = 0;
1148 c->cache->Header.ValidDataLength.QuadPart = 0;
1149
1150 c->cache->subvol = Vcb->root_root;
1151
1152 c->cache->inode = InterlockedIncrement64(&Vcb->root_root->lastinode);
1153 c->cache->hash = calc_crc32c(0xffffffff, (uint8_t*)&c->cache->inode, sizeof(uint64_t));
1154
1155 c->cache->type = BTRFS_TYPE_FILE;
1156 c->cache->created = true;
1157
1158 // create new free space entry
1159
1160 fsi = ExAllocatePoolWithTag(PagedPool, sizeof(FREE_SPACE_ITEM), ALLOC_TAG);
1161 if (!fsi) {
1162 ERR("out of memory\n");
1163 reap_fcb(c->cache);
1164 c->cache = NULL;
1165 return STATUS_INSUFFICIENT_RESOURCES;
1166 }
1167
1168 searchkey.obj_id = FREE_SPACE_CACHE_ID;
1169 searchkey.obj_type = 0;
1170 searchkey.offset = c->offset;
1171
1172 Status = find_item(Vcb, Vcb->root_root, &tp, &searchkey, false, Irp);
1173 if (!NT_SUCCESS(Status)) {
1174 ERR("error - find_item returned %08lx\n", Status);
1175 ExFreePool(fsi);
1176 reap_fcb(c->cache);
1177 c->cache = NULL;
1178 return Status;
1179 }
1180
1181 if (!keycmp(searchkey, tp.item->key)) {
1182 Status = delete_tree_item(Vcb, &tp);
1183 if (!NT_SUCCESS(Status)) {
1184 ERR("delete_tree_item returned %08lx\n", Status);
1185 ExFreePool(fsi);
1186 reap_fcb(c->cache);
1187 c->cache = NULL;
1188 return Status;
1189 }
1190 }
1191
1192 fsi->key.obj_id = c->cache->inode;
1193 fsi->key.obj_type = TYPE_INODE_ITEM;
1194 fsi->key.offset = 0;
1195
1196 Status = insert_tree_item(Vcb, Vcb->root_root, FREE_SPACE_CACHE_ID, 0, c->offset, fsi, sizeof(FREE_SPACE_ITEM), NULL, Irp);
1197 if (!NT_SUCCESS(Status)) {
1198 ERR("insert_tree_item returned %08lx\n", Status);
1199 ExFreePool(fsi);
1200 reap_fcb(c->cache);
1201 c->cache = NULL;
1202 return Status;
1203 }
1204
1205 // allocate space
1206
1207 Status = insert_cache_extent(c->cache, 0, new_cache_size, rollback);
1208 if (!NT_SUCCESS(Status)) {
1209 ERR("insert_cache_extent returned %08lx\n", Status);
1210 reap_fcb(c->cache);
1211 c->cache = NULL;
1212 return Status;
1213 }
1214
1215 c->cache->extents_changed = true;
1216 InsertTailList(&Vcb->all_fcbs, &c->cache->list_entry_all);
1217
1218 add_fcb_to_subvol(c->cache);
1219
1220 Status = flush_fcb(c->cache, true, batchlist, Irp);
1221 if (!NT_SUCCESS(Status)) {
1222 ERR("flush_fcb returned %08lx\n", Status);
1223 free_fcb(c->cache);
1224 c->cache = NULL;
1225 return Status;
1226 }
1227
1228 *changed = true;
1229 } else if (realloc_extents) {
1230 KEY searchkey;
1231 traverse_ptr tp;
1232
1233 TRACE("reallocating extents\n");
1234
1235 // add free_space entry to tree cache
1236
1237 searchkey.obj_id = FREE_SPACE_CACHE_ID;
1238 searchkey.obj_type = 0;
1239 searchkey.offset = c->offset;
1240
1241 Status = find_item(Vcb, Vcb->root_root, &tp, &searchkey, false, Irp);
1242 if (!NT_SUCCESS(Status)) {
1243 ERR("error - find_item returned %08lx\n", Status);
1244 return Status;
1245 }
1246
1247 if (keycmp(searchkey, tp.item->key)) {
1248 ERR("could not find (%I64x,%x,%I64x) in root_root\n", searchkey.obj_id, searchkey.obj_type, searchkey.offset);
1249 return STATUS_INTERNAL_ERROR;
1250 }
1251
1252 if (tp.item->size < sizeof(FREE_SPACE_ITEM)) {
1253 ERR("(%I64x,%x,%I64x) was %u bytes, expected %Iu\n", tp.item->key.obj_id, tp.item->key.obj_type, tp.item->key.offset, tp.item->size, sizeof(FREE_SPACE_ITEM));
1254 return STATUS_INTERNAL_ERROR;
1255 }
1256
1257 tp.tree->write = true;
1258
1259 // remove existing extents
1260
1261 if (c->cache->inode_item.st_size > 0) {
1262 le = c->cache->extents.Flink;
1263
1264 while (le != &c->cache->extents) {
1265 extent* ext = CONTAINING_RECORD(le, extent, list_entry);
1266
1267 if (!ext->ignore && (ext->extent_data.type == EXTENT_TYPE_REGULAR || ext->extent_data.type == EXTENT_TYPE_PREALLOC)) {
1268 EXTENT_DATA2* ed2 = (EXTENT_DATA2*)&ext->extent_data.data[0];
1269
1270 if (ed2->size != 0) {
1271 chunk* c2 = get_chunk_from_address(Vcb, ed2->address);
1272
1273 if (c2) {
1274 c2->changed = true;
1275 c2->space_changed = true;
1276 }
1277 }
1278 }
1279
1280 le = le->Flink;
1281 }
1282
1283 Status = excise_extents(Vcb, c->cache, 0, c->cache->inode_item.st_size, Irp, rollback);
1284 if (!NT_SUCCESS(Status)) {
1285 ERR("excise_extents returned %08lx\n", Status);
1286 return Status;
1287 }
1288 }
1289
1290 // add new extent
1291
1292 Status = insert_cache_extent(c->cache, 0, new_cache_size, rollback);
1293 if (!NT_SUCCESS(Status)) {
1294 ERR("insert_cache_extent returned %08lx\n", Status);
1295 return Status;
1296 }
1297
1298 // modify INODE_ITEM
1299
1300 c->cache->inode_item.st_size = new_cache_size;
1301 c->cache->inode_item.st_blocks = new_cache_size;
1302
1303 Status = flush_fcb(c->cache, true, batchlist, Irp);
1304 if (!NT_SUCCESS(Status)) {
1305 ERR("flush_fcb returned %08lx\n", Status);
1306 return Status;
1307 }
1308
1309 *changed = true;
1310 } else {
1311 KEY searchkey;
1312 traverse_ptr tp;
1313
1314 // add INODE_ITEM and free_space entry to tree cache, for writing later
1315
1316 searchkey.obj_id = c->cache->inode;
1317 searchkey.obj_type = TYPE_INODE_ITEM;
1318 searchkey.offset = 0;
1319
1320 Status = find_item(Vcb, Vcb->root_root, &tp, &searchkey, false, Irp);
1321 if (!NT_SUCCESS(Status)) {
1322 ERR("error - find_item returned %08lx\n", Status);
1323 return Status;
1324 }
1325
1326 if (keycmp(searchkey, tp.item->key)) {
1327 INODE_ITEM* ii;
1328
1329 ii = ExAllocatePoolWithTag(PagedPool, sizeof(INODE_ITEM), ALLOC_TAG);
1330 if (!ii) {
1331 ERR("out of memory\n");
1332 return STATUS_INSUFFICIENT_RESOURCES;
1333 }
1334
1335 RtlCopyMemory(ii, &c->cache->inode_item, sizeof(INODE_ITEM));
1336
1337 Status = insert_tree_item(Vcb, Vcb->root_root, c->cache->inode, TYPE_INODE_ITEM, 0, ii, sizeof(INODE_ITEM), NULL, Irp);
1338 if (!NT_SUCCESS(Status)) {
1339 ERR("insert_tree_item returned %08lx\n", Status);
1340 ExFreePool(ii);
1341 return Status;
1342 }
1343
1344 *changed = true;
1345 } else {
1346 if (tp.item->size < sizeof(INODE_ITEM)) {
1347 ERR("(%I64x,%x,%I64x) was %u bytes, expected %Iu\n", tp.item->key.obj_id, tp.item->key.obj_type, tp.item->key.offset, tp.item->size, sizeof(INODE_ITEM));
1348 return STATUS_INTERNAL_ERROR;
1349 }
1350
1351 tp.tree->write = true;
1352 }
1353
1354 searchkey.obj_id = FREE_SPACE_CACHE_ID;
1355 searchkey.obj_type = 0;
1356 searchkey.offset = c->offset;
1357
1358 Status = find_item(Vcb, Vcb->root_root, &tp, &searchkey, false, Irp);
1359 if (!NT_SUCCESS(Status)) {
1360 ERR("error - find_item returned %08lx\n", Status);
1361 return Status;
1362 }
1363
1364 if (keycmp(searchkey, tp.item->key)) {
1365 ERR("could not find (%I64x,%x,%I64x) in root_root\n", searchkey.obj_id, searchkey.obj_type, searchkey.offset);
1366 return STATUS_INTERNAL_ERROR;
1367 }
1368
1369 if (tp.item->size < sizeof(FREE_SPACE_ITEM)) {
1370 ERR("(%I64x,%x,%I64x) was %u bytes, expected %Iu\n", tp.item->key.obj_id, tp.item->key.obj_type, tp.item->key.offset, tp.item->size, sizeof(FREE_SPACE_ITEM));
1371 return STATUS_INTERNAL_ERROR;
1372 }
1373
1374 tp.tree->write = true;
1375 }
1376
1377 // FIXME - reduce inode allocation if cache is shrinking. Make sure to avoid infinite write loops
1378
1379 return STATUS_SUCCESS;
1380 }
1381
allocate_cache(device_extension * Vcb,bool * changed,PIRP Irp,LIST_ENTRY * rollback)1382 NTSTATUS allocate_cache(device_extension* Vcb, bool* changed, PIRP Irp, LIST_ENTRY* rollback) {
1383 LIST_ENTRY *le, batchlist;
1384 NTSTATUS Status;
1385
1386 *changed = false;
1387
1388 InitializeListHead(&batchlist);
1389
1390 ExAcquireResourceExclusiveLite(&Vcb->chunk_lock, true);
1391
1392 le = Vcb->chunks.Flink;
1393 while (le != &Vcb->chunks) {
1394 chunk* c = CONTAINING_RECORD(le, chunk, list_entry);
1395
1396 if (c->space_changed && c->chunk_item->size >= 0x6400000) { // 100MB
1397 bool b;
1398
1399 acquire_chunk_lock(c, Vcb);
1400 Status = allocate_cache_chunk(Vcb, c, &b, &batchlist, Irp, rollback);
1401 release_chunk_lock(c, Vcb);
1402
1403 if (b)
1404 *changed = true;
1405
1406 if (!NT_SUCCESS(Status)) {
1407 ERR("allocate_cache_chunk(%I64x) returned %08lx\n", c->offset, Status);
1408 ExReleaseResourceLite(&Vcb->chunk_lock);
1409 clear_batch_list(Vcb, &batchlist);
1410 return Status;
1411 }
1412 }
1413
1414 le = le->Flink;
1415 }
1416
1417 ExReleaseResourceLite(&Vcb->chunk_lock);
1418
1419 Status = commit_batch_list(Vcb, &batchlist, Irp);
1420 if (!NT_SUCCESS(Status)) {
1421 ERR("commit_batch_list returned %08lx\n", Status);
1422 return Status;
1423 }
1424
1425 return STATUS_SUCCESS;
1426 }
1427
add_rollback_space(LIST_ENTRY * rollback,bool add,LIST_ENTRY * list,LIST_ENTRY * list_size,uint64_t address,uint64_t length,chunk * c)1428 static void add_rollback_space(LIST_ENTRY* rollback, bool add, LIST_ENTRY* list, LIST_ENTRY* list_size, uint64_t address, uint64_t length, chunk* c) {
1429 rollback_space* rs;
1430
1431 rs = ExAllocatePoolWithTag(PagedPool, sizeof(rollback_space), ALLOC_TAG);
1432 if (!rs) {
1433 ERR("out of memory\n");
1434 return;
1435 }
1436
1437 rs->list = list;
1438 rs->list_size = list_size;
1439 rs->address = address;
1440 rs->length = length;
1441 rs->chunk = c;
1442
1443 add_rollback(rollback, add ? ROLLBACK_ADD_SPACE : ROLLBACK_SUBTRACT_SPACE, rs);
1444 }
1445
space_list_add2(LIST_ENTRY * list,LIST_ENTRY * list_size,uint64_t address,uint64_t length,chunk * c,LIST_ENTRY * rollback)1446 void space_list_add2(LIST_ENTRY* list, LIST_ENTRY* list_size, uint64_t address, uint64_t length, chunk* c, LIST_ENTRY* rollback) {
1447 LIST_ENTRY* le;
1448 space *s, *s2;
1449
1450 if (IsListEmpty(list)) {
1451 s = ExAllocatePoolWithTag(PagedPool, sizeof(space), ALLOC_TAG);
1452
1453 if (!s) {
1454 ERR("out of memory\n");
1455 return;
1456 }
1457
1458 s->address = address;
1459 s->size = length;
1460 InsertTailList(list, &s->list_entry);
1461
1462 if (list_size)
1463 InsertTailList(list_size, &s->list_entry_size);
1464
1465 if (rollback)
1466 add_rollback_space(rollback, true, list, list_size, address, length, c);
1467
1468 return;
1469 }
1470
1471 le = list->Flink;
1472 do {
1473 s2 = CONTAINING_RECORD(le, space, list_entry);
1474
1475 // old entry envelops new one completely
1476 if (s2->address <= address && s2->address + s2->size >= address + length)
1477 return;
1478
1479 // new entry envelops old one completely
1480 if (address <= s2->address && address + length >= s2->address + s2->size) {
1481 if (address < s2->address) {
1482 if (rollback)
1483 add_rollback_space(rollback, true, list, list_size, address, s2->address - address, c);
1484
1485 s2->size += s2->address - address;
1486 s2->address = address;
1487
1488 while (s2->list_entry.Blink != list) {
1489 space* s3 = CONTAINING_RECORD(s2->list_entry.Blink, space, list_entry);
1490
1491 if (s3->address + s3->size == s2->address) {
1492 s2->address = s3->address;
1493 s2->size += s3->size;
1494
1495 RemoveEntryList(&s3->list_entry);
1496
1497 if (list_size)
1498 RemoveEntryList(&s3->list_entry_size);
1499
1500 ExFreePool(s3);
1501 } else
1502 break;
1503 }
1504 }
1505
1506 if (length > s2->size) {
1507 if (rollback)
1508 add_rollback_space(rollback, true, list, list_size, s2->address + s2->size, address + length - s2->address - s2->size, c);
1509
1510 s2->size = length;
1511
1512 while (s2->list_entry.Flink != list) {
1513 space* s3 = CONTAINING_RECORD(s2->list_entry.Flink, space, list_entry);
1514
1515 if (s3->address <= s2->address + s2->size) {
1516 s2->size = max(s2->size, s3->address + s3->size - s2->address);
1517
1518 RemoveEntryList(&s3->list_entry);
1519
1520 if (list_size)
1521 RemoveEntryList(&s3->list_entry_size);
1522
1523 ExFreePool(s3);
1524 } else
1525 break;
1526 }
1527 }
1528
1529 if (list_size) {
1530 RemoveEntryList(&s2->list_entry_size);
1531 order_space_entry(s2, list_size);
1532 }
1533
1534 return;
1535 }
1536
1537 // new entry overlaps start of old one
1538 if (address < s2->address && address + length >= s2->address) {
1539 if (rollback)
1540 add_rollback_space(rollback, true, list, list_size, address, s2->address - address, c);
1541
1542 s2->size += s2->address - address;
1543 s2->address = address;
1544
1545 while (s2->list_entry.Blink != list) {
1546 space* s3 = CONTAINING_RECORD(s2->list_entry.Blink, space, list_entry);
1547
1548 if (s3->address + s3->size == s2->address) {
1549 s2->address = s3->address;
1550 s2->size += s3->size;
1551
1552 RemoveEntryList(&s3->list_entry);
1553
1554 if (list_size)
1555 RemoveEntryList(&s3->list_entry_size);
1556
1557 ExFreePool(s3);
1558 } else
1559 break;
1560 }
1561
1562 if (list_size) {
1563 RemoveEntryList(&s2->list_entry_size);
1564 order_space_entry(s2, list_size);
1565 }
1566
1567 return;
1568 }
1569
1570 // new entry overlaps end of old one
1571 if (address <= s2->address + s2->size && address + length > s2->address + s2->size) {
1572 if (rollback)
1573 add_rollback_space(rollback, true, list, list_size, address, s2->address + s2->size - address, c);
1574
1575 s2->size = address + length - s2->address;
1576
1577 while (s2->list_entry.Flink != list) {
1578 space* s3 = CONTAINING_RECORD(s2->list_entry.Flink, space, list_entry);
1579
1580 if (s3->address <= s2->address + s2->size) {
1581 s2->size = max(s2->size, s3->address + s3->size - s2->address);
1582
1583 RemoveEntryList(&s3->list_entry);
1584
1585 if (list_size)
1586 RemoveEntryList(&s3->list_entry_size);
1587
1588 ExFreePool(s3);
1589 } else
1590 break;
1591 }
1592
1593 if (list_size) {
1594 RemoveEntryList(&s2->list_entry_size);
1595 order_space_entry(s2, list_size);
1596 }
1597
1598 return;
1599 }
1600
1601 // add completely separate entry
1602 if (s2->address > address + length) {
1603 s = ExAllocatePoolWithTag(PagedPool, sizeof(space), ALLOC_TAG);
1604
1605 if (!s) {
1606 ERR("out of memory\n");
1607 return;
1608 }
1609
1610 if (rollback)
1611 add_rollback_space(rollback, true, list, list_size, address, length, c);
1612
1613 s->address = address;
1614 s->size = length;
1615 InsertHeadList(s2->list_entry.Blink, &s->list_entry);
1616
1617 if (list_size)
1618 order_space_entry(s, list_size);
1619
1620 return;
1621 }
1622
1623 le = le->Flink;
1624 } while (le != list);
1625
1626 // check if contiguous with last entry
1627 if (s2->address + s2->size == address) {
1628 s2->size += length;
1629
1630 if (list_size) {
1631 RemoveEntryList(&s2->list_entry_size);
1632 order_space_entry(s2, list_size);
1633 }
1634
1635 return;
1636 }
1637
1638 // otherwise, insert at end
1639 s = ExAllocatePoolWithTag(PagedPool, sizeof(space), ALLOC_TAG);
1640
1641 if (!s) {
1642 ERR("out of memory\n");
1643 return;
1644 }
1645
1646 s->address = address;
1647 s->size = length;
1648 InsertTailList(list, &s->list_entry);
1649
1650 if (list_size)
1651 order_space_entry(s, list_size);
1652
1653 if (rollback)
1654 add_rollback_space(rollback, true, list, list_size, address, length, c);
1655 }
1656
space_list_merge(LIST_ENTRY * spacelist,LIST_ENTRY * spacelist_size,LIST_ENTRY * deleting)1657 void space_list_merge(LIST_ENTRY* spacelist, LIST_ENTRY* spacelist_size, LIST_ENTRY* deleting) {
1658 LIST_ENTRY* le = deleting->Flink;
1659
1660 while (le != deleting) {
1661 space* s = CONTAINING_RECORD(le, space, list_entry);
1662
1663 space_list_add2(spacelist, spacelist_size, s->address, s->size, NULL, NULL);
1664
1665 le = le->Flink;
1666 }
1667 }
1668
copy_space_list(LIST_ENTRY * old_list,LIST_ENTRY * new_list)1669 static NTSTATUS copy_space_list(LIST_ENTRY* old_list, LIST_ENTRY* new_list) {
1670 LIST_ENTRY* le;
1671
1672 le = old_list->Flink;
1673 while (le != old_list) {
1674 space* s = CONTAINING_RECORD(le, space, list_entry);
1675 space* s2;
1676
1677 s2 = ExAllocatePoolWithTag(PagedPool, sizeof(space), ALLOC_TAG);
1678 if (!s2) {
1679 ERR("out of memory\n");
1680 return STATUS_INSUFFICIENT_RESOURCES;
1681 }
1682
1683 s2->address = s->address;
1684 s2->size = s->size;
1685
1686 InsertTailList(new_list, &s2->list_entry);
1687
1688 le = le->Flink;
1689 }
1690
1691 return STATUS_SUCCESS;
1692 }
1693
update_chunk_cache(device_extension * Vcb,chunk * c,BTRFS_TIME * now,LIST_ENTRY * batchlist,PIRP Irp,LIST_ENTRY * rollback)1694 static NTSTATUS update_chunk_cache(device_extension* Vcb, chunk* c, BTRFS_TIME* now, LIST_ENTRY* batchlist, PIRP Irp, LIST_ENTRY* rollback) {
1695 NTSTATUS Status;
1696 KEY searchkey;
1697 traverse_ptr tp;
1698 FREE_SPACE_ITEM* fsi;
1699 void* data;
1700 uint64_t num_entries, *cachegen, off;
1701 uint32_t *checksums, num_sectors;
1702 LIST_ENTRY space_list, deleting;
1703
1704 data = ExAllocatePoolWithTag(NonPagedPool, (ULONG)c->cache->inode_item.st_size, ALLOC_TAG);
1705 if (!data) {
1706 ERR("out of memory\n");
1707 return STATUS_INSUFFICIENT_RESOURCES;
1708 }
1709
1710 RtlZeroMemory(data, (ULONG)c->cache->inode_item.st_size);
1711
1712 InitializeListHead(&space_list);
1713 InitializeListHead(&deleting);
1714
1715 Status = copy_space_list(&c->space, &space_list);
1716 if (!NT_SUCCESS(Status)) {
1717 ERR("copy_space_list returned %08lx\n", Status);
1718 goto end;
1719 }
1720
1721 Status = copy_space_list(&c->deleting, &deleting);
1722 if (!NT_SUCCESS(Status)) {
1723 ERR("copy_space_list returned %08lx\n", Status);
1724
1725 while (!IsListEmpty(&space_list)) {
1726 ExFreePool(CONTAINING_RECORD(RemoveHeadList(&space_list), space, list_entry));
1727 }
1728
1729 goto end;
1730 }
1731
1732 space_list_merge(&space_list, NULL, &deleting);
1733
1734 num_entries = 0;
1735 num_sectors = (uint32_t)(c->cache->inode_item.st_size >> Vcb->sector_shift);
1736 off = (sizeof(uint32_t) * num_sectors) + sizeof(uint64_t);
1737
1738 while (!IsListEmpty(&space_list)) {
1739 FREE_SPACE_ENTRY* fse;
1740
1741 space* s = CONTAINING_RECORD(RemoveHeadList(&space_list), space, list_entry);
1742
1743 if ((off + sizeof(FREE_SPACE_ENTRY)) >> Vcb->sector_shift != off >> Vcb->sector_shift)
1744 off = sector_align(off, Vcb->superblock.sector_size);
1745
1746 fse = (FREE_SPACE_ENTRY*)((uint8_t*)data + off);
1747
1748 fse->offset = s->address;
1749 fse->size = s->size;
1750 fse->type = FREE_SPACE_EXTENT;
1751 num_entries++;
1752
1753 off += sizeof(FREE_SPACE_ENTRY);
1754 }
1755
1756 // update INODE_ITEM
1757
1758 c->cache->inode_item.generation = Vcb->superblock.generation;
1759 c->cache->inode_item.transid = Vcb->superblock.generation;
1760 c->cache->inode_item.sequence++;
1761 c->cache->inode_item.st_ctime = *now;
1762
1763 Status = flush_fcb(c->cache, true, batchlist, Irp);
1764 if (!NT_SUCCESS(Status)) {
1765 ERR("flush_fcb returned %08lx\n", Status);
1766 goto end;
1767 }
1768
1769 // update free_space item
1770
1771 searchkey.obj_id = FREE_SPACE_CACHE_ID;
1772 searchkey.obj_type = 0;
1773 searchkey.offset = c->offset;
1774
1775 Status = find_item(Vcb, Vcb->root_root, &tp, &searchkey, false, Irp);
1776 if (!NT_SUCCESS(Status)) {
1777 ERR("error - find_item returned %08lx\n", Status);
1778 goto end;
1779 }
1780
1781 if (keycmp(searchkey, tp.item->key)) {
1782 ERR("could not find (%I64x,%x,%I64x) in root_root\n", searchkey.obj_id, searchkey.obj_type, searchkey.offset);
1783 Status = STATUS_INTERNAL_ERROR;
1784 goto end;
1785 }
1786
1787 if (tp.item->size < sizeof(FREE_SPACE_ITEM)) {
1788 ERR("(%I64x,%x,%I64x) was %u bytes, expected %Iu\n", tp.item->key.obj_id, tp.item->key.obj_type, tp.item->key.offset, tp.item->size, sizeof(FREE_SPACE_ITEM));
1789 Status = STATUS_INTERNAL_ERROR;
1790 goto end;
1791 }
1792
1793 fsi = (FREE_SPACE_ITEM*)tp.item->data;
1794
1795 fsi->generation = Vcb->superblock.generation;
1796 fsi->num_entries = num_entries;
1797 fsi->num_bitmaps = 0;
1798
1799 // set cache generation
1800
1801 cachegen = (uint64_t*)((uint8_t*)data + (sizeof(uint32_t) * num_sectors));
1802 *cachegen = Vcb->superblock.generation;
1803
1804 // calculate cache checksums
1805
1806 checksums = (uint32_t*)data;
1807
1808 // FIXME - if we know sector is fully zeroed, use cached checksum
1809
1810 for (uint32_t i = 0; i < num_sectors; i++) {
1811 if (i << Vcb->sector_shift > sizeof(uint32_t) * num_sectors)
1812 checksums[i] = ~calc_crc32c(0xffffffff, (uint8_t*)data + (i << Vcb->sector_shift), Vcb->superblock.sector_size);
1813 else if ((i + 1) << Vcb->sector_shift < sizeof(uint32_t) * num_sectors)
1814 checksums[i] = 0; // FIXME - test this
1815 else
1816 checksums[i] = ~calc_crc32c(0xffffffff, (uint8_t*)data + (sizeof(uint32_t) * num_sectors), ((i + 1) << Vcb->sector_shift) - (sizeof(uint32_t) * num_sectors));
1817 }
1818
1819 // write cache
1820
1821 Status = do_write_file(c->cache, 0, c->cache->inode_item.st_size, data, NULL, false, 0, rollback);
1822 if (!NT_SUCCESS(Status)) {
1823 ERR("do_write_file returned %08lx\n", Status);
1824
1825 // Writing the cache isn't critical, so we don't return an error if writing fails. This means
1826 // we can still flush on a degraded mount if metadata is RAID1 but data is RAID0.
1827 }
1828
1829 Status = STATUS_SUCCESS;
1830
1831 end:
1832 ExFreePool(data);
1833
1834 return Status;
1835 }
1836
update_chunk_cache_tree(device_extension * Vcb,chunk * c,PIRP Irp)1837 static NTSTATUS update_chunk_cache_tree(device_extension* Vcb, chunk* c, PIRP Irp) {
1838 NTSTATUS Status;
1839 LIST_ENTRY space_list;
1840 FREE_SPACE_INFO* fsi;
1841 KEY searchkey;
1842 traverse_ptr tp;
1843 uint32_t fsi_count = 0;
1844
1845 InitializeListHead(&space_list);
1846
1847 Status = copy_space_list(&c->space, &space_list);
1848 if (!NT_SUCCESS(Status)) {
1849 ERR("copy_space_list returned %08lx\n", Status);
1850 return Status;
1851 }
1852
1853 space_list_merge(&space_list, NULL, &c->deleting);
1854
1855 searchkey.obj_id = c->offset;
1856 searchkey.obj_type = TYPE_FREE_SPACE_EXTENT;
1857 searchkey.offset = 0xffffffffffffffff;
1858
1859 Status = find_item(Vcb, Vcb->space_root, &tp, &searchkey, false, Irp);
1860 if (Status == STATUS_NOT_FOUND)
1861 goto after_tree_walk;
1862 else if (!NT_SUCCESS(Status)) {
1863 ERR("find_item returned %08lx\n", Status);
1864
1865 while (!IsListEmpty(&space_list)) {
1866 ExFreePool(CONTAINING_RECORD(RemoveHeadList(&space_list), space, list_entry));
1867 }
1868
1869 return Status;
1870 }
1871
1872 while (!IsListEmpty(&space_list) && tp.item->key.obj_id < c->offset + c->chunk_item->size) {
1873 traverse_ptr next_tp;
1874
1875 if (tp.item->key.obj_type == TYPE_FREE_SPACE_EXTENT) {
1876 space* s = CONTAINING_RECORD(space_list.Flink, space, list_entry);
1877
1878 if (s->address < tp.item->key.obj_id || (s->address == tp.item->key.obj_id && s->size < tp.item->key.offset)) {
1879 // add entry
1880
1881 Status = insert_tree_item(Vcb, Vcb->space_root, s->address, TYPE_FREE_SPACE_EXTENT, s->size, NULL, 0,
1882 NULL, Irp);
1883 if (!NT_SUCCESS(Status)) {
1884 ERR("insert_tree_item returned %08lx\n", Status);
1885
1886 while (!IsListEmpty(&space_list)) {
1887 ExFreePool(CONTAINING_RECORD(RemoveHeadList(&space_list), space, list_entry));
1888 }
1889
1890 return Status;
1891 }
1892
1893 fsi_count++;
1894
1895 RemoveHeadList(&space_list);
1896 ExFreePool(s);
1897 continue;
1898 } else if (s->address == tp.item->key.obj_id && s->size == tp.item->key.offset) {
1899 // unchanged entry
1900
1901 fsi_count++;
1902
1903 RemoveHeadList(&space_list);
1904 ExFreePool(s);
1905 } else {
1906 // remove entry
1907
1908 Status = delete_tree_item(Vcb, &tp);
1909 if (!NT_SUCCESS(Status)) {
1910 ERR("delete_tree_item returned %08lx\n", Status);
1911
1912 while (!IsListEmpty(&space_list)) {
1913 ExFreePool(CONTAINING_RECORD(RemoveHeadList(&space_list), space, list_entry));
1914 }
1915
1916 return Status;
1917 }
1918 }
1919 } else if (tp.item->key.obj_type == TYPE_FREE_SPACE_BITMAP) {
1920 Status = delete_tree_item(Vcb, &tp);
1921 if (!NT_SUCCESS(Status)) {
1922 ERR("delete_tree_item returned %08lx\n", Status);
1923
1924 while (!IsListEmpty(&space_list)) {
1925 ExFreePool(CONTAINING_RECORD(RemoveHeadList(&space_list), space, list_entry));
1926 }
1927
1928 return Status;
1929 }
1930 }
1931
1932 if (!find_next_item(Vcb, &tp, &next_tp, false, Irp))
1933 goto after_tree_walk;
1934
1935 tp = next_tp;
1936 }
1937
1938 // after loop, delete remaining tree items for this chunk
1939
1940 while (tp.item->key.obj_id < c->offset + c->chunk_item->size) {
1941 traverse_ptr next_tp;
1942
1943 if (tp.item->key.obj_type == TYPE_FREE_SPACE_EXTENT || tp.item->key.obj_type == TYPE_FREE_SPACE_BITMAP) {
1944 Status = delete_tree_item(Vcb, &tp);
1945 if (!NT_SUCCESS(Status)) {
1946 ERR("delete_tree_item returned %08lx\n", Status);
1947 return Status;
1948 }
1949 }
1950
1951 if (!find_next_item(Vcb, &tp, &next_tp, false, NULL))
1952 break;
1953
1954 tp = next_tp;
1955 }
1956
1957 after_tree_walk:
1958 // after loop, insert remaining space_list entries
1959
1960 while (!IsListEmpty(&space_list)) {
1961 space* s = CONTAINING_RECORD(RemoveHeadList(&space_list), space, list_entry);
1962
1963 Status = insert_tree_item(Vcb, Vcb->space_root, s->address, TYPE_FREE_SPACE_EXTENT, s->size, NULL, 0,
1964 NULL, Irp);
1965 if (!NT_SUCCESS(Status)) {
1966 ERR("insert_tree_item returned %08lx\n", Status);
1967
1968 while (!IsListEmpty(&space_list)) {
1969 ExFreePool(CONTAINING_RECORD(RemoveHeadList(&space_list), space, list_entry));
1970 }
1971
1972 return Status;
1973 }
1974
1975 fsi_count++;
1976
1977 ExFreePool(s);
1978 }
1979
1980 // change TYPE_FREE_SPACE_INFO in place if present, and insert otherwise
1981
1982 searchkey.obj_id = c->offset;
1983 searchkey.obj_type = TYPE_FREE_SPACE_INFO;
1984 searchkey.offset = c->chunk_item->size;
1985
1986 Status = find_item(Vcb, Vcb->space_root, &tp, &searchkey, false, Irp);
1987 if (!NT_SUCCESS(Status) && Status != STATUS_NOT_FOUND) {
1988 ERR("find_item returned %08lx\n", Status);
1989 return Status;
1990 }
1991
1992 if (NT_SUCCESS(Status) && !keycmp(tp.item->key, searchkey)) {
1993 if (tp.item->size == sizeof(FREE_SPACE_INFO)) {
1994 tree* t;
1995
1996 // change in place if possible
1997
1998 fsi = (FREE_SPACE_INFO*)tp.item->data;
1999
2000 fsi->count = fsi_count;
2001 fsi->flags = 0;
2002
2003 tp.tree->write = true;
2004
2005 t = tp.tree;
2006 while (t) {
2007 if (t->paritem && t->paritem->ignore) {
2008 t->paritem->ignore = false;
2009 t->parent->header.num_items++;
2010 t->parent->size += sizeof(internal_node);
2011 }
2012
2013 t->header.generation = Vcb->superblock.generation;
2014 t = t->parent;
2015 }
2016
2017 return STATUS_SUCCESS;
2018 } else
2019 delete_tree_item(Vcb, &tp);
2020 }
2021
2022 // insert FREE_SPACE_INFO
2023
2024 fsi = ExAllocatePoolWithTag(PagedPool, sizeof(FREE_SPACE_INFO), ALLOC_TAG);
2025 if (!fsi) {
2026 ERR("out of memory\n");
2027 return STATUS_INSUFFICIENT_RESOURCES;
2028 }
2029
2030 fsi->count = fsi_count;
2031 fsi->flags = 0;
2032
2033 Status = insert_tree_item(Vcb, Vcb->space_root, c->offset, TYPE_FREE_SPACE_INFO, c->chunk_item->size, fsi, sizeof(fsi),
2034 NULL, Irp);
2035 if (!NT_SUCCESS(Status)) {
2036 ERR("insert_tree_item returned %08lx\n", Status);
2037 return Status;
2038 }
2039
2040 return STATUS_SUCCESS;
2041 }
2042
update_chunk_caches(device_extension * Vcb,PIRP Irp,LIST_ENTRY * rollback)2043 NTSTATUS update_chunk_caches(device_extension* Vcb, PIRP Irp, LIST_ENTRY* rollback) {
2044 LIST_ENTRY *le, batchlist;
2045 NTSTATUS Status;
2046 chunk* c;
2047 LARGE_INTEGER time;
2048 BTRFS_TIME now;
2049
2050 KeQuerySystemTime(&time);
2051 win_time_to_unix(time, &now);
2052
2053 InitializeListHead(&batchlist);
2054
2055 le = Vcb->chunks.Flink;
2056 while (le != &Vcb->chunks) {
2057 c = CONTAINING_RECORD(le, chunk, list_entry);
2058
2059 if (c->space_changed && c->chunk_item->size >= 0x6400000) { // 100MB
2060 acquire_chunk_lock(c, Vcb);
2061 Status = update_chunk_cache(Vcb, c, &now, &batchlist, Irp, rollback);
2062 release_chunk_lock(c, Vcb);
2063
2064 if (!NT_SUCCESS(Status)) {
2065 ERR("update_chunk_cache(%I64x) returned %08lx\n", c->offset, Status);
2066 clear_batch_list(Vcb, &batchlist);
2067 return Status;
2068 }
2069 }
2070
2071 le = le->Flink;
2072 }
2073
2074 Status = commit_batch_list(Vcb, &batchlist, Irp);
2075 if (!NT_SUCCESS(Status)) {
2076 ERR("commit_batch_list returned %08lx\n", Status);
2077 return Status;
2078 }
2079
2080 le = Vcb->chunks.Flink;
2081 while (le != &Vcb->chunks) {
2082 c = CONTAINING_RECORD(le, chunk, list_entry);
2083
2084 if (c->changed && (c->chunk_item->type & BLOCK_FLAG_RAID5 || c->chunk_item->type & BLOCK_FLAG_RAID6)) {
2085 ExAcquireResourceExclusiveLite(&c->partial_stripes_lock, true);
2086
2087 while (!IsListEmpty(&c->partial_stripes)) {
2088 partial_stripe* ps = CONTAINING_RECORD(RemoveHeadList(&c->partial_stripes), partial_stripe, list_entry);
2089
2090 Status = flush_partial_stripe(Vcb, c, ps);
2091
2092 if (ps->bmparr)
2093 ExFreePool(ps->bmparr);
2094
2095 ExFreePool(ps);
2096
2097 if (!NT_SUCCESS(Status)) {
2098 ERR("flush_partial_stripe returned %08lx\n", Status);
2099 ExReleaseResourceLite(&c->partial_stripes_lock);
2100 return Status;
2101 }
2102 }
2103
2104 ExReleaseResourceLite(&c->partial_stripes_lock);
2105 }
2106
2107 le = le->Flink;
2108 }
2109
2110 return STATUS_SUCCESS;
2111 }
2112
update_chunk_caches_tree(device_extension * Vcb,PIRP Irp)2113 NTSTATUS update_chunk_caches_tree(device_extension* Vcb, PIRP Irp) {
2114 LIST_ENTRY *le;
2115 NTSTATUS Status;
2116 chunk* c;
2117
2118 Vcb->superblock.compat_ro_flags |= BTRFS_COMPAT_RO_FLAGS_FREE_SPACE_CACHE_VALID;
2119
2120 ExAcquireResourceSharedLite(&Vcb->chunk_lock, true);
2121
2122 le = Vcb->chunks.Flink;
2123 while (le != &Vcb->chunks) {
2124 c = CONTAINING_RECORD(le, chunk, list_entry);
2125
2126 if (c->space_changed) {
2127 acquire_chunk_lock(c, Vcb);
2128 Status = update_chunk_cache_tree(Vcb, c, Irp);
2129 release_chunk_lock(c, Vcb);
2130
2131 if (!NT_SUCCESS(Status)) {
2132 ERR("update_chunk_cache_tree(%I64x) returned %08lx\n", c->offset, Status);
2133 ExReleaseResourceLite(&Vcb->chunk_lock);
2134 return Status;
2135 }
2136 }
2137
2138 le = le->Flink;
2139 }
2140
2141 ExReleaseResourceLite(&Vcb->chunk_lock);
2142
2143 return STATUS_SUCCESS;
2144 }
2145
space_list_add(chunk * c,uint64_t address,uint64_t length,LIST_ENTRY * rollback)2146 void space_list_add(chunk* c, uint64_t address, uint64_t length, LIST_ENTRY* rollback) {
2147 TRACE("(%p, %I64x, %I64x, %p)\n", c, address, length, rollback);
2148
2149 c->changed = true;
2150 c->space_changed = true;
2151
2152 space_list_add2(&c->deleting, NULL, address, length, c, rollback);
2153 }
2154
space_list_subtract2(LIST_ENTRY * list,LIST_ENTRY * list_size,uint64_t address,uint64_t length,chunk * c,LIST_ENTRY * rollback)2155 void space_list_subtract2(LIST_ENTRY* list, LIST_ENTRY* list_size, uint64_t address, uint64_t length, chunk* c, LIST_ENTRY* rollback) {
2156 LIST_ENTRY *le, *le2;
2157 space *s, *s2;
2158
2159 if (IsListEmpty(list))
2160 return;
2161
2162 le = list->Flink;
2163 while (le != list) {
2164 s2 = CONTAINING_RECORD(le, space, list_entry);
2165 le2 = le->Flink;
2166
2167 if (s2->address >= address + length)
2168 return;
2169
2170 if (s2->address >= address && s2->address + s2->size <= address + length) { // remove entry entirely
2171 if (rollback)
2172 add_rollback_space(rollback, false, list, list_size, s2->address, s2->size, c);
2173
2174 RemoveEntryList(&s2->list_entry);
2175
2176 if (list_size)
2177 RemoveEntryList(&s2->list_entry_size);
2178
2179 ExFreePool(s2);
2180 } else if (address + length > s2->address && address + length < s2->address + s2->size) {
2181 if (address > s2->address) { // cut out hole
2182 if (rollback)
2183 add_rollback_space(rollback, false, list, list_size, address, length, c);
2184
2185 s = ExAllocatePoolWithTag(PagedPool, sizeof(space), ALLOC_TAG);
2186
2187 if (!s) {
2188 ERR("out of memory\n");
2189 return;
2190 }
2191
2192 s->address = s2->address;
2193 s->size = address - s2->address;
2194 InsertHeadList(s2->list_entry.Blink, &s->list_entry);
2195
2196 s2->size = s2->address + s2->size - address - length;
2197 s2->address = address + length;
2198
2199 if (list_size) {
2200 RemoveEntryList(&s2->list_entry_size);
2201 order_space_entry(s2, list_size);
2202 order_space_entry(s, list_size);
2203 }
2204
2205 return;
2206 } else { // remove start of entry
2207 if (rollback)
2208 add_rollback_space(rollback, false, list, list_size, s2->address, address + length - s2->address, c);
2209
2210 s2->size -= address + length - s2->address;
2211 s2->address = address + length;
2212
2213 if (list_size) {
2214 RemoveEntryList(&s2->list_entry_size);
2215 order_space_entry(s2, list_size);
2216 }
2217 }
2218 } else if (address > s2->address && address < s2->address + s2->size) { // remove end of entry
2219 if (rollback)
2220 add_rollback_space(rollback, false, list, list_size, address, s2->address + s2->size - address, c);
2221
2222 s2->size = address - s2->address;
2223
2224 if (list_size) {
2225 RemoveEntryList(&s2->list_entry_size);
2226 order_space_entry(s2, list_size);
2227 }
2228 }
2229
2230 le = le2;
2231 }
2232 }
2233
space_list_subtract(chunk * c,uint64_t address,uint64_t length,LIST_ENTRY * rollback)2234 void space_list_subtract(chunk* c, uint64_t address, uint64_t length, LIST_ENTRY* rollback) {
2235 c->changed = true;
2236 c->space_changed = true;
2237
2238 space_list_subtract2(&c->space, &c->space_size, address, length, c, rollback);
2239
2240 space_list_subtract2(&c->deleting, NULL, address, length, c, rollback);
2241 }
2242