1 /* Copyright (c) Mark Harmstone 2016-17
2  *
3  * This file is part of WinBtrfs.
4  *
5  * WinBtrfs is free software: you can redistribute it and/or modify
6  * it under the terms of the GNU Lesser General Public Licence as published by
7  * the Free Software Foundation, either version 3 of the Licence, or
8  * (at your option) any later version.
9  *
10  * WinBtrfs is distributed in the hope that it will be useful,
11  * but WITHOUT ANY WARRANTY; without even the implied warranty of
12  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
13  * GNU Lesser General Public Licence for more details.
14  *
15  * You should have received a copy of the GNU Lesser General Public Licence
16  * along with WinBtrfs.  If not, see <http://www.gnu.org/licenses/>. */
17 
18 #include "btrfs_drv.h"
19 #include "crc32c.h"
20 
21 // Number of increments in the size of each cache inode, in sectors. Should
22 // this be a constant number of sectors, a constant 256 KB, or what?
23 #define CACHE_INCREMENTS    64
24 
remove_free_space_inode(device_extension * Vcb,uint64_t inode,LIST_ENTRY * batchlist,PIRP Irp,LIST_ENTRY * rollback)25 static NTSTATUS remove_free_space_inode(device_extension* Vcb, uint64_t inode, LIST_ENTRY* batchlist, PIRP Irp, LIST_ENTRY* rollback) {
26     NTSTATUS Status;
27     fcb* fcb;
28 
29     Status = open_fcb(Vcb, Vcb->root_root, inode, BTRFS_TYPE_FILE, NULL, false, NULL, &fcb, PagedPool, Irp);
30     if (!NT_SUCCESS(Status)) {
31         ERR("open_fcb returned %08lx\n", Status);
32         return Status;
33     }
34 
35     mark_fcb_dirty(fcb);
36 
37     if (fcb->inode_item.st_size > 0) {
38         Status = excise_extents(fcb->Vcb, fcb, 0, sector_align(fcb->inode_item.st_size, fcb->Vcb->superblock.sector_size), Irp, rollback);
39         if (!NT_SUCCESS(Status)) {
40             ERR("excise_extents returned %08lx\n", Status);
41             return Status;
42         }
43     }
44 
45     fcb->deleted = true;
46 
47     Status = flush_fcb(fcb, false, batchlist, Irp);
48     if (!NT_SUCCESS(Status)) {
49         ERR("flush_fcb returned %08lx\n", Status);
50         free_fcb(fcb);
51         return Status;
52     }
53 
54     free_fcb(fcb);
55 
56     return STATUS_SUCCESS;
57 }
58 
clear_free_space_cache(device_extension * Vcb,LIST_ENTRY * batchlist,PIRP Irp)59 NTSTATUS clear_free_space_cache(device_extension* Vcb, LIST_ENTRY* batchlist, PIRP Irp) {
60     KEY searchkey;
61     traverse_ptr tp, next_tp;
62     NTSTATUS Status;
63     bool b;
64     LIST_ENTRY rollback;
65 
66     InitializeListHead(&rollback);
67 
68     searchkey.obj_id = FREE_SPACE_CACHE_ID;
69     searchkey.obj_type = 0;
70     searchkey.offset = 0;
71 
72     Status = find_item(Vcb, Vcb->root_root, &tp, &searchkey, false, Irp);
73     if (!NT_SUCCESS(Status)) {
74         ERR("error - find_item returned %08lx\n", Status);
75         return Status;
76     }
77 
78     do {
79         if (tp.item->key.obj_id > searchkey.obj_id || (tp.item->key.obj_id == searchkey.obj_id && tp.item->key.obj_type > searchkey.obj_type))
80             break;
81 
82         if (tp.item->key.obj_id == searchkey.obj_id && tp.item->key.obj_type == searchkey.obj_type) {
83             Status = delete_tree_item(Vcb, &tp);
84             if (!NT_SUCCESS(Status)) {
85                 ERR("delete_tree_item returned %08lx\n", Status);
86                 return Status;
87             }
88 
89             if (tp.item->size >= sizeof(FREE_SPACE_ITEM)) {
90                 FREE_SPACE_ITEM* fsi = (FREE_SPACE_ITEM*)tp.item->data;
91 
92                 if (fsi->key.obj_type != TYPE_INODE_ITEM)
93                     WARN("key (%I64x,%x,%I64x) does not point to an INODE_ITEM\n", fsi->key.obj_id, fsi->key.obj_type, fsi->key.offset);
94                 else {
95                     LIST_ENTRY* le;
96 
97                     Status = remove_free_space_inode(Vcb, fsi->key.obj_id, batchlist, Irp, &rollback);
98 
99                     if (!NT_SUCCESS(Status))
100                         ERR("remove_free_space_inode for (%I64x,%x,%I64x) returned %08lx\n", fsi->key.obj_id, fsi->key.obj_type, fsi->key.offset, Status);
101 
102                     le = Vcb->chunks.Flink;
103                     while (le != &Vcb->chunks) {
104                         chunk* c = CONTAINING_RECORD(le, chunk, list_entry);
105 
106                         if (c->offset == tp.item->key.offset && c->cache) {
107                             reap_fcb(c->cache);
108                             c->cache = NULL;
109                         }
110 
111                         le = le->Flink;
112                     }
113                 }
114             } else
115                 WARN("(%I64x,%x,%I64x) was %u bytes, expected %Iu\n", tp.item->key.obj_id, tp.item->key.obj_type, tp.item->key.offset, tp.item->size, sizeof(FREE_SPACE_ITEM));
116         }
117 
118         b = find_next_item(Vcb, &tp, &next_tp, false, Irp);
119         if (b)
120             tp = next_tp;
121     } while (b);
122 
123     Status = STATUS_SUCCESS;
124 
125     if (NT_SUCCESS(Status))
126         clear_rollback(&rollback);
127     else
128         do_rollback(Vcb, &rollback);
129 
130     if (Vcb->space_root) {
131         searchkey.obj_id = 0;
132         searchkey.obj_type = 0;
133         searchkey.offset = 0;
134 
135         Status = find_item(Vcb, Vcb->space_root, &tp, &searchkey, false, Irp);
136         if (!NT_SUCCESS(Status)) {
137             ERR("find_item returned %08lx\n", Status);
138             return Status;
139         }
140 
141         do {
142             Status = delete_tree_item(Vcb, &tp);
143             if (!NT_SUCCESS(Status)) {
144                 ERR("delete_tree_item returned %08lx\n", Status);
145                 return Status;
146             }
147 
148             b = find_next_item(Vcb, &tp, &next_tp, false, Irp);
149             if (b)
150                 tp = next_tp;
151         } while (b);
152     }
153 
154     // regenerate free space tree
155     if (Vcb->superblock.compat_ro_flags & BTRFS_COMPAT_RO_FLAGS_FREE_SPACE_CACHE) {
156         LIST_ENTRY* le;
157 
158         ExAcquireResourceSharedLite(&Vcb->chunk_lock, true);
159 
160         le = Vcb->chunks.Flink;
161         while (le != &Vcb->chunks) {
162             chunk* c = CONTAINING_RECORD(le, chunk, list_entry);
163 
164             if (!c->cache_loaded) {
165                 acquire_chunk_lock(c, Vcb);
166 
167                 Status = load_cache_chunk(Vcb, c, NULL);
168                 if (!NT_SUCCESS(Status)) {
169                     ERR("load_cache_chunk(%I64x) returned %08lx\n", c->offset, Status);
170                     release_chunk_lock(c, Vcb);
171                     ExReleaseResourceLite(&Vcb->chunk_lock);
172                     return Status;
173                 }
174 
175                 c->changed = true;
176                 c->space_changed = true;
177 
178                 release_chunk_lock(c, Vcb);
179             }
180 
181             le = le->Flink;
182         }
183 
184         ExReleaseResourceLite(&Vcb->chunk_lock);
185     }
186 
187     return Status;
188 }
189 
add_space_entry(LIST_ENTRY * list,LIST_ENTRY * list_size,uint64_t offset,uint64_t size)190 NTSTATUS add_space_entry(LIST_ENTRY* list, LIST_ENTRY* list_size, uint64_t offset, uint64_t size) {
191     space* s;
192 
193     s = ExAllocatePoolWithTag(PagedPool, sizeof(space), ALLOC_TAG);
194 
195     if (!s) {
196         ERR("out of memory\n");
197         return STATUS_INSUFFICIENT_RESOURCES;
198     }
199 
200     s->address = offset;
201     s->size = size;
202 
203     if (IsListEmpty(list))
204         InsertTailList(list, &s->list_entry);
205     else {
206         space* s2 = CONTAINING_RECORD(list->Blink, space, list_entry);
207 
208         if (s2->address < offset)
209             InsertTailList(list, &s->list_entry);
210         else {
211             LIST_ENTRY* le;
212 
213             le = list->Flink;
214             while (le != list) {
215                 s2 = CONTAINING_RECORD(le, space, list_entry);
216 
217                 if (s2->address > offset) {
218                     InsertTailList(le, &s->list_entry);
219                     goto size;
220                 }
221 
222                 le = le->Flink;
223             }
224         }
225     }
226 
227 size:
228     if (!list_size)
229         return STATUS_SUCCESS;
230 
231     if (IsListEmpty(list_size))
232         InsertTailList(list_size, &s->list_entry_size);
233     else {
234         space* s2 = CONTAINING_RECORD(list_size->Blink, space, list_entry_size);
235 
236         if (s2->size >= size)
237             InsertTailList(list_size, &s->list_entry_size);
238         else {
239             LIST_ENTRY* le;
240 
241             le = list_size->Flink;
242             while (le != list_size) {
243                 s2 = CONTAINING_RECORD(le, space, list_entry_size);
244 
245                 if (s2->size <= size) {
246                     InsertHeadList(le->Blink, &s->list_entry_size);
247                     return STATUS_SUCCESS;
248                 }
249 
250                 le = le->Flink;
251             }
252         }
253     }
254 
255     return STATUS_SUCCESS;
256 }
257 
load_free_space_bitmap(device_extension * Vcb,chunk * c,uint64_t offset,void * data,uint64_t * total_space)258 static void load_free_space_bitmap(device_extension* Vcb, chunk* c, uint64_t offset, void* data, uint64_t* total_space) {
259     RTL_BITMAP bmph;
260     uint32_t i, len, *dwords = data;
261     ULONG runlength, index;
262 
263     // flip bits
264     for (i = 0; i < Vcb->superblock.sector_size / sizeof(uint32_t); i++) {
265         dwords[i] = ~dwords[i];
266     }
267 
268     len = Vcb->superblock.sector_size * 8;
269 
270     RtlInitializeBitMap(&bmph, data, len);
271 
272     index = 0;
273     runlength = RtlFindFirstRunClear(&bmph, &index);
274 
275     while (runlength != 0) {
276         uint64_t addr, length;
277 
278         if (index >= len)
279             break;
280 
281         if (index + runlength >= len) {
282             runlength = len - index;
283 
284             if (runlength == 0)
285                 break;
286         }
287 
288         addr = offset + (index << Vcb->sector_shift);
289         length = runlength << Vcb->sector_shift;
290 
291         add_space_entry(&c->space, &c->space_size, addr, length);
292         index += runlength;
293         *total_space += length;
294 
295         runlength = RtlFindNextForwardRunClear(&bmph, index, &index);
296     }
297 }
298 
order_space_entry(space * s,LIST_ENTRY * list_size)299 static void order_space_entry(space* s, LIST_ENTRY* list_size) {
300     LIST_ENTRY* le;
301 
302     if (IsListEmpty(list_size)) {
303         InsertHeadList(list_size, &s->list_entry_size);
304         return;
305     }
306 
307     le = list_size->Flink;
308 
309     while (le != list_size) {
310         space* s2 = CONTAINING_RECORD(le, space, list_entry_size);
311 
312         if (s2->size <= s->size) {
313             InsertHeadList(le->Blink, &s->list_entry_size);
314             return;
315         }
316 
317         le = le->Flink;
318     }
319 
320     InsertTailList(list_size, &s->list_entry_size);
321 }
322 
323 typedef struct {
324     uint64_t stripe;
325     LIST_ENTRY list_entry;
326 } superblock_stripe;
327 
add_superblock_stripe(LIST_ENTRY * stripes,uint64_t off,uint64_t len)328 static NTSTATUS add_superblock_stripe(LIST_ENTRY* stripes, uint64_t off, uint64_t len) {
329     uint64_t i;
330 
331     for (i = 0; i < len; i++) {
332         LIST_ENTRY* le;
333         superblock_stripe* ss;
334         bool ignore = false;
335 
336         le = stripes->Flink;
337         while (le != stripes) {
338             ss = CONTAINING_RECORD(le, superblock_stripe, list_entry);
339 
340             if (ss->stripe == off + i) {
341                 ignore = true;
342                 break;
343             }
344 
345             le = le->Flink;
346         }
347 
348         if (ignore)
349             continue;
350 
351         ss = ExAllocatePoolWithTag(PagedPool, sizeof(superblock_stripe), ALLOC_TAG);
352         if (!ss) {
353             ERR("out of memory\n");
354             return STATUS_INSUFFICIENT_RESOURCES;
355         }
356 
357         ss->stripe = off + i;
358         InsertTailList(stripes, &ss->list_entry);
359     }
360 
361     return STATUS_SUCCESS;
362 }
363 
get_superblock_size(chunk * c,uint64_t * size)364 static NTSTATUS get_superblock_size(chunk* c, uint64_t* size) {
365     NTSTATUS Status;
366     CHUNK_ITEM* ci = c->chunk_item;
367     CHUNK_ITEM_STRIPE* cis = (CHUNK_ITEM_STRIPE*)&ci[1];
368     uint64_t off_start, off_end, space = 0;
369     uint16_t i = 0, j;
370     LIST_ENTRY stripes;
371 
372     InitializeListHead(&stripes);
373 
374     while (superblock_addrs[i] != 0) {
375         if (ci->type & BLOCK_FLAG_RAID0 || ci->type & BLOCK_FLAG_RAID10) {
376             for (j = 0; j < ci->num_stripes; j++) {
377                 ULONG sub_stripes = max(ci->sub_stripes, 1);
378 
379                 if (cis[j].offset + (ci->size * ci->num_stripes / sub_stripes) > superblock_addrs[i] && cis[j].offset <= superblock_addrs[i] + sizeof(superblock)) {
380                     off_start = superblock_addrs[i] - cis[j].offset;
381                     off_start -= off_start % ci->stripe_length;
382                     off_start *= ci->num_stripes / sub_stripes;
383                     off_start += (j / sub_stripes) * ci->stripe_length;
384 
385                     off_end = off_start + ci->stripe_length;
386 
387                     Status = add_superblock_stripe(&stripes, off_start / ci->stripe_length, 1);
388                     if (!NT_SUCCESS(Status)) {
389                         ERR("add_superblock_stripe returned %08lx\n", Status);
390                         goto end;
391                     }
392                 }
393             }
394         } else if (ci->type & BLOCK_FLAG_RAID5) {
395             for (j = 0; j < ci->num_stripes; j++) {
396                 uint64_t stripe_size = ci->size / (ci->num_stripes - 1);
397 
398                 if (cis[j].offset + stripe_size > superblock_addrs[i] && cis[j].offset <= superblock_addrs[i] + sizeof(superblock)) {
399                     off_start = superblock_addrs[i] - cis[j].offset;
400                     off_start -= off_start % (ci->stripe_length * (ci->num_stripes - 1));
401                     off_start *= ci->num_stripes - 1;
402 
403                     off_end = off_start + (ci->stripe_length * (ci->num_stripes - 1));
404 
405                     Status = add_superblock_stripe(&stripes, off_start / ci->stripe_length, (off_end - off_start) / ci->stripe_length);
406                     if (!NT_SUCCESS(Status)) {
407                         ERR("add_superblock_stripe returned %08lx\n", Status);
408                         goto end;
409                     }
410                 }
411             }
412         } else if (ci->type & BLOCK_FLAG_RAID6) {
413             for (j = 0; j < ci->num_stripes; j++) {
414                 uint64_t stripe_size = ci->size / (ci->num_stripes - 2);
415 
416                 if (cis[j].offset + stripe_size > superblock_addrs[i] && cis[j].offset <= superblock_addrs[i] + sizeof(superblock)) {
417                     off_start = superblock_addrs[i] - cis[j].offset;
418                     off_start -= off_start % (ci->stripe_length * (ci->num_stripes - 2));
419                     off_start *= ci->num_stripes - 2;
420 
421                     off_end = off_start + (ci->stripe_length * (ci->num_stripes - 2));
422 
423                     Status = add_superblock_stripe(&stripes, off_start / ci->stripe_length, (off_end - off_start) / ci->stripe_length);
424                     if (!NT_SUCCESS(Status)) {
425                         ERR("add_superblock_stripe returned %08lx\n", Status);
426                         goto end;
427                     }
428                 }
429             }
430         } else { // SINGLE, DUPLICATE, RAID1, RAID1C3, RAID1C4
431             for (j = 0; j < ci->num_stripes; j++) {
432                 if (cis[j].offset + ci->size > superblock_addrs[i] && cis[j].offset <= superblock_addrs[i] + sizeof(superblock)) {
433                     off_start = ((superblock_addrs[i] - cis[j].offset) / c->chunk_item->stripe_length) * c->chunk_item->stripe_length;
434                     off_end = sector_align(superblock_addrs[i] - cis[j].offset + sizeof(superblock), c->chunk_item->stripe_length);
435 
436                     Status = add_superblock_stripe(&stripes, off_start / ci->stripe_length, (off_end - off_start) / ci->stripe_length);
437                     if (!NT_SUCCESS(Status)) {
438                         ERR("add_superblock_stripe returned %08lx\n", Status);
439                         goto end;
440                     }
441                 }
442             }
443         }
444 
445         i++;
446     }
447 
448     Status = STATUS_SUCCESS;
449 
450 end:
451     while (!IsListEmpty(&stripes)) {
452         LIST_ENTRY* le = RemoveHeadList(&stripes);
453         superblock_stripe* ss = CONTAINING_RECORD(le, superblock_stripe, list_entry);
454 
455         space++;
456 
457         ExFreePool(ss);
458     }
459 
460     if (NT_SUCCESS(Status))
461         *size = space * ci->stripe_length;
462 
463     return Status;
464 }
465 
load_stored_free_space_cache(device_extension * Vcb,chunk * c,bool load_only,PIRP Irp)466 NTSTATUS load_stored_free_space_cache(device_extension* Vcb, chunk* c, bool load_only, PIRP Irp) {
467     KEY searchkey;
468     traverse_ptr tp;
469     FREE_SPACE_ITEM* fsi;
470     uint64_t inode, *generation;
471     uint8_t* data;
472     NTSTATUS Status;
473     uint32_t *checksums, crc32, num_sectors, num_valid_sectors, size;
474     FREE_SPACE_ENTRY* fse;
475     uint64_t num_entries, num_bitmaps, extent_length, bmpnum, off, total_space = 0, superblock_size;
476     LIST_ENTRY *le, rollback;
477 
478     // FIXME - does this break if Vcb->superblock.sector_size is not 4096?
479 
480     TRACE("(%p, %I64x)\n", Vcb, c->offset);
481 
482     searchkey.obj_id = FREE_SPACE_CACHE_ID;
483     searchkey.obj_type = 0;
484     searchkey.offset = c->offset;
485 
486     Status = find_item(Vcb, Vcb->root_root, &tp, &searchkey, false, Irp);
487     if (!NT_SUCCESS(Status)) {
488         ERR("error - find_item returned %08lx\n", Status);
489         return Status;
490     }
491 
492     if (keycmp(tp.item->key, searchkey)) {
493         TRACE("(%I64x,%x,%I64x) not found\n", searchkey.obj_id, searchkey.obj_type, searchkey.offset);
494         return STATUS_NOT_FOUND;
495     }
496 
497     if (tp.item->size < sizeof(FREE_SPACE_ITEM)) {
498         WARN("(%I64x,%x,%I64x) was %u bytes, expected %Iu\n", tp.item->key.obj_id, tp.item->key.obj_type, tp.item->key.offset, tp.item->size, sizeof(FREE_SPACE_ITEM));
499         return STATUS_NOT_FOUND;
500     }
501 
502     fsi = (FREE_SPACE_ITEM*)tp.item->data;
503 
504     if (fsi->key.obj_type != TYPE_INODE_ITEM) {
505         WARN("cache pointed to something other than an INODE_ITEM\n");
506         return STATUS_NOT_FOUND;
507     }
508 
509     inode = fsi->key.obj_id;
510     num_entries = fsi->num_entries;
511     num_bitmaps = fsi->num_bitmaps;
512 
513     Status = open_fcb(Vcb, Vcb->root_root, inode, BTRFS_TYPE_FILE, NULL, false, NULL, &c->cache, PagedPool, Irp);
514     if (!NT_SUCCESS(Status)) {
515         ERR("open_fcb returned %08lx\n", Status);
516         return STATUS_NOT_FOUND;
517     }
518 
519     if (load_only)
520         return STATUS_SUCCESS;
521 
522     if (c->cache->inode_item.st_size == 0) {
523         WARN("cache had zero length\n");
524         free_fcb(c->cache);
525         c->cache = NULL;
526         return STATUS_NOT_FOUND;
527     }
528 
529     c->cache->inode_item.flags |= BTRFS_INODE_NODATACOW;
530 
531     if (num_entries == 0 && num_bitmaps == 0)
532         return STATUS_SUCCESS;
533 
534     size = (uint32_t)sector_align(c->cache->inode_item.st_size, Vcb->superblock.sector_size);
535 
536     data = ExAllocatePoolWithTag(PagedPool, size, ALLOC_TAG);
537 
538     if (!data) {
539         ERR("out of memory\n");
540         free_fcb(c->cache);
541         c->cache = NULL;
542         return STATUS_INSUFFICIENT_RESOURCES;
543     }
544 
545     if (c->chunk_item->size < 0x6400000) { // 100 MB
546         WARN("deleting free space cache for chunk smaller than 100MB\n");
547         goto clearcache;
548     }
549 
550     Status = read_file(c->cache, data, 0, c->cache->inode_item.st_size, NULL, NULL);
551     if (!NT_SUCCESS(Status)) {
552         ERR("read_file returned %08lx\n", Status);
553         ExFreePool(data);
554 
555         c->cache->deleted = true;
556         mark_fcb_dirty(c->cache);
557 
558         free_fcb(c->cache);
559         c->cache = NULL;
560         return STATUS_NOT_FOUND;
561     }
562 
563     if (size > c->cache->inode_item.st_size)
564         RtlZeroMemory(&data[c->cache->inode_item.st_size], (ULONG)(size - c->cache->inode_item.st_size));
565 
566     num_sectors = size >> Vcb->sector_shift;
567 
568     generation = (uint64_t*)(data + (num_sectors * sizeof(uint32_t)));
569 
570     if (*generation != fsi->generation) {
571         WARN("free space cache generation for %I64x was %I64x, expected %I64x\n", c->offset, *generation, fsi->generation);
572         goto clearcache;
573     }
574 
575     extent_length = (num_sectors * sizeof(uint32_t)) + sizeof(uint64_t) + (num_entries * sizeof(FREE_SPACE_ENTRY));
576 
577     num_valid_sectors = (ULONG)((sector_align(extent_length, Vcb->superblock.sector_size) >> Vcb->sector_shift) + num_bitmaps);
578 
579     if (num_valid_sectors > num_sectors) {
580         ERR("free space cache for %I64x was %u sectors, expected at least %u\n", c->offset, num_sectors, num_valid_sectors);
581         goto clearcache;
582     }
583 
584     checksums = (uint32_t*)data;
585 
586     for (uint32_t i = 0; i < num_valid_sectors; i++) {
587         if (i << Vcb->sector_shift > sizeof(uint32_t) * num_sectors)
588             crc32 = ~calc_crc32c(0xffffffff, &data[i << Vcb->sector_shift], Vcb->superblock.sector_size);
589         else if ((i + 1) << Vcb->sector_shift < sizeof(uint32_t) * num_sectors)
590             crc32 = 0; // FIXME - test this
591         else
592             crc32 = ~calc_crc32c(0xffffffff, &data[sizeof(uint32_t) * num_sectors], ((i + 1) << Vcb->sector_shift) - (sizeof(uint32_t) * num_sectors));
593 
594         if (crc32 != checksums[i]) {
595             WARN("checksum %u was %08x, expected %08x\n", i, crc32, checksums[i]);
596             goto clearcache;
597         }
598     }
599 
600     off = (sizeof(uint32_t) * num_sectors) + sizeof(uint64_t);
601 
602     bmpnum = 0;
603     for (uint32_t i = 0; i < num_entries; i++) {
604         if ((off + sizeof(FREE_SPACE_ENTRY)) >> Vcb->sector_shift != off >> Vcb->sector_shift)
605             off = sector_align(off, Vcb->superblock.sector_size);
606 
607         fse = (FREE_SPACE_ENTRY*)&data[off];
608 
609         if (fse->type == FREE_SPACE_EXTENT) {
610             Status = add_space_entry(&c->space, &c->space_size, fse->offset, fse->size);
611             if (!NT_SUCCESS(Status)) {
612                 ERR("add_space_entry returned %08lx\n", Status);
613                 ExFreePool(data);
614                 return Status;
615             }
616 
617             total_space += fse->size;
618         } else if (fse->type != FREE_SPACE_BITMAP) {
619             ERR("unknown free-space type %x\n", fse->type);
620         }
621 
622         off += sizeof(FREE_SPACE_ENTRY);
623     }
624 
625     if (num_bitmaps > 0) {
626         bmpnum = sector_align(off, Vcb->superblock.sector_size) >> Vcb->sector_shift;
627         off = (sizeof(uint32_t) * num_sectors) + sizeof(uint64_t);
628 
629         for (uint32_t i = 0; i < num_entries; i++) {
630             if ((off + sizeof(FREE_SPACE_ENTRY)) >> Vcb->sector_shift != off >> Vcb->sector_shift)
631                 off = sector_align(off, Vcb->superblock.sector_size);
632 
633             fse = (FREE_SPACE_ENTRY*)&data[off];
634 
635             if (fse->type == FREE_SPACE_BITMAP) {
636                 // FIXME - make sure we don't overflow the buffer here
637                 load_free_space_bitmap(Vcb, c, fse->offset, &data[bmpnum << Vcb->sector_shift], &total_space);
638                 bmpnum++;
639             }
640 
641             off += sizeof(FREE_SPACE_ENTRY);
642         }
643     }
644 
645     // do sanity check
646 
647     Status = get_superblock_size(c, &superblock_size);
648     if (!NT_SUCCESS(Status)) {
649         ERR("get_superblock_size returned %08lx\n", Status);
650         ExFreePool(data);
651         return Status;
652     }
653 
654     if (c->chunk_item->size - c->used != total_space + superblock_size) {
655         WARN("invalidating cache for chunk %I64x: space was %I64x, expected %I64x\n", c->offset, total_space + superblock_size, c->chunk_item->size - c->used);
656         goto clearcache;
657     }
658 
659     le = c->space.Flink;
660     while (le != &c->space) {
661         space* s = CONTAINING_RECORD(le, space, list_entry);
662         LIST_ENTRY* le2 = le->Flink;
663 
664         if (le2 != &c->space) {
665             space* s2 = CONTAINING_RECORD(le2, space, list_entry);
666 
667             if (s2->address == s->address + s->size) {
668                 s->size += s2->size;
669 
670                 RemoveEntryList(&s2->list_entry);
671                 RemoveEntryList(&s2->list_entry_size);
672                 ExFreePool(s2);
673 
674                 RemoveEntryList(&s->list_entry_size);
675                 order_space_entry(s, &c->space_size);
676 
677                 le2 = le;
678             }
679         }
680 
681         le = le2;
682     }
683 
684     ExFreePool(data);
685 
686     return STATUS_SUCCESS;
687 
688 clearcache:
689     ExFreePool(data);
690 
691     InitializeListHead(&rollback);
692 
693     Status = delete_tree_item(Vcb, &tp);
694     if (!NT_SUCCESS(Status)) {
695         ERR("delete_tree_item returned %08lx\n", Status);
696         return Status;
697     }
698 
699     Status = excise_extents(Vcb, c->cache, 0, c->cache->inode_item.st_size, Irp, &rollback);
700     if (!NT_SUCCESS(Status)) {
701         ERR("excise_extents returned %08lx\n", Status);
702         do_rollback(Vcb, &rollback);
703         return Status;
704     }
705 
706     clear_rollback(&rollback);
707 
708     c->cache->deleted = true;
709     mark_fcb_dirty(c->cache);
710 
711     c->old_cache = c->cache;
712     c->cache = NULL;
713 
714     le = c->space.Flink;
715     while (le != &c->space) {
716         space* s = CONTAINING_RECORD(le, space, list_entry);
717         LIST_ENTRY* le2 = le->Flink;
718 
719         RemoveEntryList(&s->list_entry);
720         RemoveEntryList(&s->list_entry_size);
721         ExFreePool(s);
722 
723         le = le2;
724     }
725 
726     return STATUS_NOT_FOUND;
727 }
728 
load_stored_free_space_tree(device_extension * Vcb,chunk * c,PIRP Irp)729 static NTSTATUS load_stored_free_space_tree(device_extension* Vcb, chunk* c, PIRP Irp) {
730     KEY searchkey;
731     traverse_ptr tp, next_tp;
732     NTSTATUS Status;
733     ULONG* bmparr = NULL;
734     ULONG bmplen = 0;
735     LIST_ENTRY* le;
736 
737     TRACE("(%p, %I64x)\n", Vcb, c->offset);
738 
739     if (!Vcb->space_root)
740         return STATUS_NOT_FOUND;
741 
742     searchkey.obj_id = c->offset;
743     searchkey.obj_type = TYPE_FREE_SPACE_INFO;
744     searchkey.offset = c->chunk_item->size;
745 
746     Status = find_item(Vcb, Vcb->space_root, &tp, &searchkey, false, Irp);
747     if (!NT_SUCCESS(Status)) {
748         ERR("find_item returned %08lx\n", Status);
749         return Status;
750     }
751 
752     if (keycmp(tp.item->key, searchkey)) {
753         TRACE("(%I64x,%x,%I64x) not found\n", searchkey.obj_id, searchkey.obj_type, searchkey.offset);
754         return STATUS_NOT_FOUND;
755     }
756 
757     if (tp.item->size < sizeof(FREE_SPACE_INFO)) {
758         WARN("(%I64x,%x,%I64x) was %u bytes, expected %Iu\n", tp.item->key.obj_id, tp.item->key.obj_type, tp.item->key.offset, tp.item->size, sizeof(FREE_SPACE_INFO));
759         return STATUS_NOT_FOUND;
760     }
761 
762     while (find_next_item(Vcb, &tp, &next_tp, false, Irp)) {
763         tp = next_tp;
764 
765         if (tp.item->key.obj_id >= c->offset + c->chunk_item->size)
766             break;
767 
768         if (tp.item->key.obj_type == TYPE_FREE_SPACE_EXTENT) {
769             Status = add_space_entry(&c->space, &c->space_size, tp.item->key.obj_id, tp.item->key.offset);
770             if (!NT_SUCCESS(Status)) {
771                 ERR("add_space_entry returned %08lx\n", Status);
772                 if (bmparr) ExFreePool(bmparr);
773                 return Status;
774             }
775         } else if (tp.item->key.obj_type == TYPE_FREE_SPACE_BITMAP) {
776             ULONG explen, index, runlength;
777             RTL_BITMAP bmp;
778             uint64_t lastoff;
779             ULONG bmpl;
780 
781             explen = (ULONG)(tp.item->key.offset >> Vcb->sector_shift) / 8;
782 
783             if (tp.item->size < explen) {
784                 WARN("(%I64x,%x,%I64x) was %u bytes, expected %lu\n", tp.item->key.obj_id, tp.item->key.obj_type, tp.item->key.offset, tp.item->size, explen);
785                 return STATUS_NOT_FOUND;
786             } else if (tp.item->size == 0) {
787                 WARN("(%I64x,%x,%I64x) has size of 0\n", tp.item->key.obj_id, tp.item->key.obj_type, tp.item->key.offset);
788                 return STATUS_NOT_FOUND;
789             }
790 
791             if (bmplen < tp.item->size) {
792                 if (bmparr)
793                     ExFreePool(bmparr);
794 
795                 bmplen = (ULONG)sector_align(tp.item->size, sizeof(ULONG));
796                 bmparr = ExAllocatePoolWithTag(PagedPool, bmplen, ALLOC_TAG);
797                 if (!bmparr) {
798                     ERR("out of memory\n");
799                     return STATUS_INSUFFICIENT_RESOURCES;
800                 }
801             }
802 
803             // We copy the bitmap because it supposedly has to be ULONG-aligned
804             RtlCopyMemory(bmparr, tp.item->data, tp.item->size);
805 
806             bmpl = (ULONG)tp.item->key.offset >> Vcb->sector_shift;
807 
808             RtlInitializeBitMap(&bmp, bmparr, bmpl);
809 
810             lastoff = tp.item->key.obj_id;
811 
812             runlength = RtlFindFirstRunClear(&bmp, &index);
813 
814             while (runlength != 0) {
815                 uint64_t runstart, runend;
816 
817                 if (index >= bmpl)
818                     break;
819 
820                 if (index + runlength >= bmpl) {
821                     runlength = bmpl - index;
822 
823                     if (runlength == 0)
824                         break;
825                 }
826 
827                 runstart = tp.item->key.obj_id + (index << Vcb->sector_shift);
828                 runend = runstart + (runlength << Vcb->sector_shift);
829 
830                 if (runstart > lastoff) {
831                     Status = add_space_entry(&c->space, &c->space_size, lastoff, runstart - lastoff);
832                     if (!NT_SUCCESS(Status)) {
833                         ERR("add_space_entry returned %08lx\n", Status);
834                         if (bmparr) ExFreePool(bmparr);
835                         return Status;
836                     }
837                 }
838 
839                 lastoff = runend;
840 
841                 runlength = RtlFindNextForwardRunClear(&bmp, index + runlength, &index);
842             }
843 
844             if (lastoff < tp.item->key.obj_id + tp.item->key.offset) {
845                 Status = add_space_entry(&c->space, &c->space_size, lastoff, tp.item->key.obj_id + tp.item->key.offset - lastoff);
846                 if (!NT_SUCCESS(Status)) {
847                     ERR("add_space_entry returned %08lx\n", Status);
848                     if (bmparr) ExFreePool(bmparr);
849                     return Status;
850                 }
851             }
852         }
853     }
854 
855     if (bmparr)
856         ExFreePool(bmparr);
857 
858     le = c->space.Flink;
859     while (le != &c->space) {
860         space* s = CONTAINING_RECORD(le, space, list_entry);
861         LIST_ENTRY* le2 = le->Flink;
862 
863         if (le2 != &c->space) {
864             space* s2 = CONTAINING_RECORD(le2, space, list_entry);
865 
866             if (s2->address == s->address + s->size) {
867                 s->size += s2->size;
868 
869                 RemoveEntryList(&s2->list_entry);
870                 RemoveEntryList(&s2->list_entry_size);
871                 ExFreePool(s2);
872 
873                 RemoveEntryList(&s->list_entry_size);
874                 order_space_entry(s, &c->space_size);
875 
876                 le2 = le;
877             }
878         }
879 
880         le = le2;
881     }
882 
883     return STATUS_SUCCESS;
884 }
885 
load_free_space_cache(device_extension * Vcb,chunk * c,PIRP Irp)886 static NTSTATUS load_free_space_cache(device_extension* Vcb, chunk* c, PIRP Irp) {
887     traverse_ptr tp, next_tp;
888     KEY searchkey;
889     uint64_t lastaddr;
890     bool b;
891     space* s;
892     NTSTATUS Status;
893 
894     if (Vcb->superblock.compat_ro_flags & BTRFS_COMPAT_RO_FLAGS_FREE_SPACE_CACHE && Vcb->superblock.compat_ro_flags & BTRFS_COMPAT_RO_FLAGS_FREE_SPACE_CACHE_VALID) {
895         Status = load_stored_free_space_tree(Vcb, c, Irp);
896 
897         if (!NT_SUCCESS(Status) && Status != STATUS_NOT_FOUND) {
898             ERR("load_stored_free_space_tree returned %08lx\n", Status);
899             return Status;
900         }
901     } else if (Vcb->superblock.generation - 1 == Vcb->superblock.cache_generation) {
902         Status = load_stored_free_space_cache(Vcb, c, false, Irp);
903 
904         if (!NT_SUCCESS(Status) && Status != STATUS_NOT_FOUND) {
905             ERR("load_stored_free_space_cache returned %08lx\n", Status);
906             return Status;
907         }
908     } else
909         Status = STATUS_NOT_FOUND;
910 
911     if (Status == STATUS_NOT_FOUND) {
912         TRACE("generating free space cache for chunk %I64x\n", c->offset);
913 
914         searchkey.obj_id = c->offset;
915         searchkey.obj_type = TYPE_EXTENT_ITEM;
916         searchkey.offset = 0;
917 
918         Status = find_item(Vcb, Vcb->extent_root, &tp, &searchkey, false, Irp);
919         if (!NT_SUCCESS(Status)) {
920             ERR("error - find_item returned %08lx\n", Status);
921             return Status;
922         }
923 
924         lastaddr = c->offset;
925 
926         do {
927             if (tp.item->key.obj_id >= c->offset + c->chunk_item->size)
928                 break;
929 
930             if (tp.item->key.obj_id >= c->offset && (tp.item->key.obj_type == TYPE_EXTENT_ITEM || tp.item->key.obj_type == TYPE_METADATA_ITEM)) {
931                 if (tp.item->key.obj_id > lastaddr) {
932                     s = ExAllocatePoolWithTag(PagedPool, sizeof(space), ALLOC_TAG);
933 
934                     if (!s) {
935                         ERR("out of memory\n");
936                         return STATUS_INSUFFICIENT_RESOURCES;
937                     }
938 
939                     s->address = lastaddr;
940                     s->size = tp.item->key.obj_id - lastaddr;
941                     InsertTailList(&c->space, &s->list_entry);
942 
943                     order_space_entry(s, &c->space_size);
944 
945                     TRACE("(%I64x,%I64x)\n", s->address, s->size);
946                 }
947 
948                 if (tp.item->key.obj_type == TYPE_METADATA_ITEM)
949                     lastaddr = tp.item->key.obj_id + Vcb->superblock.node_size;
950                 else
951                     lastaddr = tp.item->key.obj_id + tp.item->key.offset;
952             }
953 
954             b = find_next_item(Vcb, &tp, &next_tp, false, Irp);
955             if (b)
956                 tp = next_tp;
957         } while (b);
958 
959         if (lastaddr < c->offset + c->chunk_item->size) {
960             s = ExAllocatePoolWithTag(PagedPool, sizeof(space), ALLOC_TAG);
961 
962             if (!s) {
963                 ERR("out of memory\n");
964                 return STATUS_INSUFFICIENT_RESOURCES;
965             }
966 
967             s->address = lastaddr;
968             s->size = c->offset + c->chunk_item->size - lastaddr;
969             InsertTailList(&c->space, &s->list_entry);
970 
971             order_space_entry(s, &c->space_size);
972 
973             TRACE("(%I64x,%I64x)\n", s->address, s->size);
974         }
975     }
976 
977     return STATUS_SUCCESS;
978 }
979 
load_cache_chunk(device_extension * Vcb,chunk * c,PIRP Irp)980 NTSTATUS load_cache_chunk(device_extension* Vcb, chunk* c, PIRP Irp) {
981     NTSTATUS Status;
982 
983     if (c->cache_loaded)
984         return STATUS_SUCCESS;
985 
986     Status = load_free_space_cache(Vcb, c, Irp);
987     if (!NT_SUCCESS(Status)) {
988         ERR("load_free_space_cache returned %08lx\n", Status);
989         return Status;
990     }
991 
992     protect_superblocks(c);
993 
994     c->cache_loaded = true;
995 
996     return STATUS_SUCCESS;
997 }
998 
insert_cache_extent(fcb * fcb,uint64_t start,uint64_t length,LIST_ENTRY * rollback)999 static NTSTATUS insert_cache_extent(fcb* fcb, uint64_t start, uint64_t length, LIST_ENTRY* rollback) {
1000     NTSTATUS Status;
1001     LIST_ENTRY* le = fcb->Vcb->chunks.Flink;
1002     chunk* c;
1003     uint64_t flags;
1004 
1005     flags = fcb->Vcb->data_flags;
1006 
1007     while (le != &fcb->Vcb->chunks) {
1008         c = CONTAINING_RECORD(le, chunk, list_entry);
1009 
1010         if (!c->readonly && !c->reloc) {
1011             acquire_chunk_lock(c, fcb->Vcb);
1012 
1013             if (c->chunk_item->type == flags && (c->chunk_item->size - c->used) >= length) {
1014                 if (insert_extent_chunk(fcb->Vcb, fcb, c, start, length, false, NULL, NULL, rollback, BTRFS_COMPRESSION_NONE, length, false, 0))
1015                     return STATUS_SUCCESS;
1016             }
1017 
1018             release_chunk_lock(c, fcb->Vcb);
1019         }
1020 
1021         le = le->Flink;
1022     }
1023 
1024     Status = alloc_chunk(fcb->Vcb, flags, &c, false);
1025 
1026     if (!NT_SUCCESS(Status)) {
1027         ERR("alloc_chunk returned %08lx\n", Status);
1028         return Status;
1029     }
1030 
1031     acquire_chunk_lock(c, fcb->Vcb);
1032 
1033     if (c->chunk_item->type == flags && (c->chunk_item->size - c->used) >= length) {
1034         if (insert_extent_chunk(fcb->Vcb, fcb, c, start, length, false, NULL, NULL, rollback, BTRFS_COMPRESSION_NONE, length, false, 0))
1035             return STATUS_SUCCESS;
1036     }
1037 
1038     release_chunk_lock(c, fcb->Vcb);
1039 
1040     return STATUS_DISK_FULL;
1041 }
1042 
allocate_cache_chunk(device_extension * Vcb,chunk * c,bool * changed,LIST_ENTRY * batchlist,PIRP Irp,LIST_ENTRY * rollback)1043 static NTSTATUS allocate_cache_chunk(device_extension* Vcb, chunk* c, bool* changed, LIST_ENTRY* batchlist, PIRP Irp, LIST_ENTRY* rollback) {
1044     LIST_ENTRY* le;
1045     NTSTATUS Status;
1046     uint64_t num_entries, new_cache_size, i;
1047     uint32_t num_sectors;
1048     bool realloc_extents = false;
1049 
1050     // FIXME - also do bitmaps
1051     // FIXME - make sure this works when sector_size is not 4096
1052 
1053     *changed = false;
1054 
1055     num_entries = 0;
1056 
1057     // num_entries is the number of entries in c->space and c->deleting - it might
1058     // be slightly higher then what we end up writing, but doing it this way is much
1059     // quicker and simpler.
1060     if (!IsListEmpty(&c->space)) {
1061         le = c->space.Flink;
1062         while (le != &c->space) {
1063             num_entries++;
1064 
1065             le = le->Flink;
1066         }
1067     }
1068 
1069     if (!IsListEmpty(&c->deleting)) {
1070         le = c->deleting.Flink;
1071         while (le != &c->deleting) {
1072             num_entries++;
1073 
1074             le = le->Flink;
1075         }
1076     }
1077 
1078     new_cache_size = sizeof(uint64_t) + (num_entries * sizeof(FREE_SPACE_ENTRY));
1079 
1080     num_sectors = (uint32_t)sector_align(new_cache_size, Vcb->superblock.sector_size) >> Vcb->sector_shift;
1081     num_sectors = (uint32_t)sector_align(num_sectors, CACHE_INCREMENTS);
1082 
1083     // adjust for padding
1084     // FIXME - there must be a more efficient way of doing this
1085     new_cache_size = sizeof(uint64_t) + (sizeof(uint32_t) * num_sectors);
1086     for (i = 0; i < num_entries; i++) {
1087         if ((new_cache_size >> Vcb->sector_shift) != ((new_cache_size + sizeof(FREE_SPACE_ENTRY)) >> Vcb->sector_shift))
1088             new_cache_size = sector_align(new_cache_size, Vcb->superblock.sector_size);
1089 
1090         new_cache_size += sizeof(FREE_SPACE_ENTRY);
1091     }
1092 
1093     new_cache_size = sector_align(new_cache_size, CACHE_INCREMENTS << Vcb->sector_shift);
1094 
1095     TRACE("chunk %I64x: cache_size = %I64x, new_cache_size = %I64x\n", c->offset, c->cache ? c->cache->inode_item.st_size : 0, new_cache_size);
1096 
1097     if (c->cache) {
1098         if (new_cache_size > c->cache->inode_item.st_size)
1099             realloc_extents = true;
1100         else {
1101             le = c->cache->extents.Flink;
1102 
1103             while (le != &c->cache->extents) {
1104                 extent* ext = CONTAINING_RECORD(le, extent, list_entry);
1105 
1106                 if (!ext->ignore && (ext->extent_data.type == EXTENT_TYPE_REGULAR || ext->extent_data.type == EXTENT_TYPE_PREALLOC)) {
1107                     EXTENT_DATA2* ed2 = (EXTENT_DATA2*)&ext->extent_data.data[0];
1108 
1109                     if (ed2->size != 0) {
1110                         chunk* c2 = get_chunk_from_address(Vcb, ed2->address);
1111 
1112                         if (c2 && (c2->readonly || c2->reloc)) {
1113                             realloc_extents = true;
1114                             break;
1115                         }
1116                     }
1117                 }
1118 
1119                 le = le->Flink;
1120             }
1121         }
1122     }
1123 
1124     if (!c->cache) {
1125         FREE_SPACE_ITEM* fsi;
1126         KEY searchkey;
1127         traverse_ptr tp;
1128 
1129         // create new inode
1130 
1131         c->cache = create_fcb(Vcb, PagedPool);
1132         if (!c->cache) {
1133             ERR("out of memory\n");
1134             return STATUS_INSUFFICIENT_RESOURCES;
1135         }
1136 
1137         c->cache->Vcb = Vcb;
1138 
1139         c->cache->inode_item.st_size = new_cache_size;
1140         c->cache->inode_item.st_blocks = new_cache_size;
1141         c->cache->inode_item.st_nlink = 1;
1142         c->cache->inode_item.st_mode = S_IRUSR | S_IWUSR | __S_IFREG;
1143         c->cache->inode_item.flags = BTRFS_INODE_NODATASUM | BTRFS_INODE_NODATACOW | BTRFS_INODE_NOCOMPRESS | BTRFS_INODE_PREALLOC;
1144 
1145         c->cache->Header.IsFastIoPossible = fast_io_possible(c->cache);
1146         c->cache->Header.AllocationSize.QuadPart = 0;
1147         c->cache->Header.FileSize.QuadPart = 0;
1148         c->cache->Header.ValidDataLength.QuadPart = 0;
1149 
1150         c->cache->subvol = Vcb->root_root;
1151 
1152         c->cache->inode = InterlockedIncrement64(&Vcb->root_root->lastinode);
1153         c->cache->hash = calc_crc32c(0xffffffff, (uint8_t*)&c->cache->inode, sizeof(uint64_t));
1154 
1155         c->cache->type = BTRFS_TYPE_FILE;
1156         c->cache->created = true;
1157 
1158         // create new free space entry
1159 
1160         fsi = ExAllocatePoolWithTag(PagedPool, sizeof(FREE_SPACE_ITEM), ALLOC_TAG);
1161         if (!fsi) {
1162             ERR("out of memory\n");
1163             reap_fcb(c->cache);
1164             c->cache = NULL;
1165             return STATUS_INSUFFICIENT_RESOURCES;
1166         }
1167 
1168         searchkey.obj_id = FREE_SPACE_CACHE_ID;
1169         searchkey.obj_type = 0;
1170         searchkey.offset = c->offset;
1171 
1172         Status = find_item(Vcb, Vcb->root_root, &tp, &searchkey, false, Irp);
1173         if (!NT_SUCCESS(Status)) {
1174             ERR("error - find_item returned %08lx\n", Status);
1175             ExFreePool(fsi);
1176             reap_fcb(c->cache);
1177             c->cache = NULL;
1178             return Status;
1179         }
1180 
1181         if (!keycmp(searchkey, tp.item->key)) {
1182             Status = delete_tree_item(Vcb, &tp);
1183             if (!NT_SUCCESS(Status)) {
1184                 ERR("delete_tree_item returned %08lx\n", Status);
1185                 ExFreePool(fsi);
1186                 reap_fcb(c->cache);
1187                 c->cache = NULL;
1188                 return Status;
1189             }
1190         }
1191 
1192         fsi->key.obj_id = c->cache->inode;
1193         fsi->key.obj_type = TYPE_INODE_ITEM;
1194         fsi->key.offset = 0;
1195 
1196         Status = insert_tree_item(Vcb, Vcb->root_root, FREE_SPACE_CACHE_ID, 0, c->offset, fsi, sizeof(FREE_SPACE_ITEM), NULL, Irp);
1197         if (!NT_SUCCESS(Status)) {
1198             ERR("insert_tree_item returned %08lx\n", Status);
1199             ExFreePool(fsi);
1200             reap_fcb(c->cache);
1201             c->cache = NULL;
1202             return Status;
1203         }
1204 
1205         // allocate space
1206 
1207         Status = insert_cache_extent(c->cache, 0, new_cache_size, rollback);
1208         if (!NT_SUCCESS(Status)) {
1209             ERR("insert_cache_extent returned %08lx\n", Status);
1210             reap_fcb(c->cache);
1211             c->cache = NULL;
1212             return Status;
1213         }
1214 
1215         c->cache->extents_changed = true;
1216         InsertTailList(&Vcb->all_fcbs, &c->cache->list_entry_all);
1217 
1218         add_fcb_to_subvol(c->cache);
1219 
1220         Status = flush_fcb(c->cache, true, batchlist, Irp);
1221         if (!NT_SUCCESS(Status)) {
1222             ERR("flush_fcb returned %08lx\n", Status);
1223             free_fcb(c->cache);
1224             c->cache = NULL;
1225             return Status;
1226         }
1227 
1228         *changed = true;
1229     } else if (realloc_extents) {
1230         KEY searchkey;
1231         traverse_ptr tp;
1232 
1233         TRACE("reallocating extents\n");
1234 
1235         // add free_space entry to tree cache
1236 
1237         searchkey.obj_id = FREE_SPACE_CACHE_ID;
1238         searchkey.obj_type = 0;
1239         searchkey.offset = c->offset;
1240 
1241         Status = find_item(Vcb, Vcb->root_root, &tp, &searchkey, false, Irp);
1242         if (!NT_SUCCESS(Status)) {
1243             ERR("error - find_item returned %08lx\n", Status);
1244             return Status;
1245         }
1246 
1247         if (keycmp(searchkey, tp.item->key)) {
1248             ERR("could not find (%I64x,%x,%I64x) in root_root\n", searchkey.obj_id, searchkey.obj_type, searchkey.offset);
1249             return STATUS_INTERNAL_ERROR;
1250         }
1251 
1252         if (tp.item->size < sizeof(FREE_SPACE_ITEM)) {
1253             ERR("(%I64x,%x,%I64x) was %u bytes, expected %Iu\n", tp.item->key.obj_id, tp.item->key.obj_type, tp.item->key.offset, tp.item->size, sizeof(FREE_SPACE_ITEM));
1254             return STATUS_INTERNAL_ERROR;
1255         }
1256 
1257         tp.tree->write = true;
1258 
1259         // remove existing extents
1260 
1261         if (c->cache->inode_item.st_size > 0) {
1262             le = c->cache->extents.Flink;
1263 
1264             while (le != &c->cache->extents) {
1265                 extent* ext = CONTAINING_RECORD(le, extent, list_entry);
1266 
1267                 if (!ext->ignore && (ext->extent_data.type == EXTENT_TYPE_REGULAR || ext->extent_data.type == EXTENT_TYPE_PREALLOC)) {
1268                     EXTENT_DATA2* ed2 = (EXTENT_DATA2*)&ext->extent_data.data[0];
1269 
1270                     if (ed2->size != 0) {
1271                         chunk* c2 = get_chunk_from_address(Vcb, ed2->address);
1272 
1273                         if (c2) {
1274                             c2->changed = true;
1275                             c2->space_changed = true;
1276                         }
1277                     }
1278                 }
1279 
1280                 le = le->Flink;
1281             }
1282 
1283             Status = excise_extents(Vcb, c->cache, 0, c->cache->inode_item.st_size, Irp, rollback);
1284             if (!NT_SUCCESS(Status)) {
1285                 ERR("excise_extents returned %08lx\n", Status);
1286                 return Status;
1287             }
1288         }
1289 
1290         // add new extent
1291 
1292         Status = insert_cache_extent(c->cache, 0, new_cache_size, rollback);
1293         if (!NT_SUCCESS(Status)) {
1294             ERR("insert_cache_extent returned %08lx\n", Status);
1295             return Status;
1296         }
1297 
1298         // modify INODE_ITEM
1299 
1300         c->cache->inode_item.st_size = new_cache_size;
1301         c->cache->inode_item.st_blocks = new_cache_size;
1302 
1303         Status = flush_fcb(c->cache, true, batchlist, Irp);
1304         if (!NT_SUCCESS(Status)) {
1305             ERR("flush_fcb returned %08lx\n", Status);
1306             return Status;
1307         }
1308 
1309         *changed = true;
1310     } else {
1311         KEY searchkey;
1312         traverse_ptr tp;
1313 
1314         // add INODE_ITEM and free_space entry to tree cache, for writing later
1315 
1316         searchkey.obj_id = c->cache->inode;
1317         searchkey.obj_type = TYPE_INODE_ITEM;
1318         searchkey.offset = 0;
1319 
1320         Status = find_item(Vcb, Vcb->root_root, &tp, &searchkey, false, Irp);
1321         if (!NT_SUCCESS(Status)) {
1322             ERR("error - find_item returned %08lx\n", Status);
1323             return Status;
1324         }
1325 
1326         if (keycmp(searchkey, tp.item->key)) {
1327             INODE_ITEM* ii;
1328 
1329             ii = ExAllocatePoolWithTag(PagedPool, sizeof(INODE_ITEM), ALLOC_TAG);
1330             if (!ii) {
1331                 ERR("out of memory\n");
1332                 return STATUS_INSUFFICIENT_RESOURCES;
1333             }
1334 
1335             RtlCopyMemory(ii, &c->cache->inode_item, sizeof(INODE_ITEM));
1336 
1337             Status = insert_tree_item(Vcb, Vcb->root_root, c->cache->inode, TYPE_INODE_ITEM, 0, ii, sizeof(INODE_ITEM), NULL, Irp);
1338             if (!NT_SUCCESS(Status)) {
1339                 ERR("insert_tree_item returned %08lx\n", Status);
1340                 ExFreePool(ii);
1341                 return Status;
1342             }
1343 
1344             *changed = true;
1345         } else {
1346             if (tp.item->size < sizeof(INODE_ITEM)) {
1347                 ERR("(%I64x,%x,%I64x) was %u bytes, expected %Iu\n", tp.item->key.obj_id, tp.item->key.obj_type, tp.item->key.offset, tp.item->size, sizeof(INODE_ITEM));
1348                 return STATUS_INTERNAL_ERROR;
1349             }
1350 
1351             tp.tree->write = true;
1352         }
1353 
1354         searchkey.obj_id = FREE_SPACE_CACHE_ID;
1355         searchkey.obj_type = 0;
1356         searchkey.offset = c->offset;
1357 
1358         Status = find_item(Vcb, Vcb->root_root, &tp, &searchkey, false, Irp);
1359         if (!NT_SUCCESS(Status)) {
1360             ERR("error - find_item returned %08lx\n", Status);
1361             return Status;
1362         }
1363 
1364         if (keycmp(searchkey, tp.item->key)) {
1365             ERR("could not find (%I64x,%x,%I64x) in root_root\n", searchkey.obj_id, searchkey.obj_type, searchkey.offset);
1366             return STATUS_INTERNAL_ERROR;
1367         }
1368 
1369         if (tp.item->size < sizeof(FREE_SPACE_ITEM)) {
1370             ERR("(%I64x,%x,%I64x) was %u bytes, expected %Iu\n", tp.item->key.obj_id, tp.item->key.obj_type, tp.item->key.offset, tp.item->size, sizeof(FREE_SPACE_ITEM));
1371             return STATUS_INTERNAL_ERROR;
1372         }
1373 
1374         tp.tree->write = true;
1375     }
1376 
1377     // FIXME - reduce inode allocation if cache is shrinking. Make sure to avoid infinite write loops
1378 
1379     return STATUS_SUCCESS;
1380 }
1381 
allocate_cache(device_extension * Vcb,bool * changed,PIRP Irp,LIST_ENTRY * rollback)1382 NTSTATUS allocate_cache(device_extension* Vcb, bool* changed, PIRP Irp, LIST_ENTRY* rollback) {
1383     LIST_ENTRY *le, batchlist;
1384     NTSTATUS Status;
1385 
1386     *changed = false;
1387 
1388     InitializeListHead(&batchlist);
1389 
1390     ExAcquireResourceExclusiveLite(&Vcb->chunk_lock, true);
1391 
1392     le = Vcb->chunks.Flink;
1393     while (le != &Vcb->chunks) {
1394         chunk* c = CONTAINING_RECORD(le, chunk, list_entry);
1395 
1396         if (c->space_changed && c->chunk_item->size >= 0x6400000) { // 100MB
1397             bool b;
1398 
1399             acquire_chunk_lock(c, Vcb);
1400             Status = allocate_cache_chunk(Vcb, c, &b, &batchlist, Irp, rollback);
1401             release_chunk_lock(c, Vcb);
1402 
1403             if (b)
1404                 *changed = true;
1405 
1406             if (!NT_SUCCESS(Status)) {
1407                 ERR("allocate_cache_chunk(%I64x) returned %08lx\n", c->offset, Status);
1408                 ExReleaseResourceLite(&Vcb->chunk_lock);
1409                 clear_batch_list(Vcb, &batchlist);
1410                 return Status;
1411             }
1412         }
1413 
1414         le = le->Flink;
1415     }
1416 
1417     ExReleaseResourceLite(&Vcb->chunk_lock);
1418 
1419     Status = commit_batch_list(Vcb, &batchlist, Irp);
1420     if (!NT_SUCCESS(Status)) {
1421         ERR("commit_batch_list returned %08lx\n", Status);
1422         return Status;
1423     }
1424 
1425     return STATUS_SUCCESS;
1426 }
1427 
add_rollback_space(LIST_ENTRY * rollback,bool add,LIST_ENTRY * list,LIST_ENTRY * list_size,uint64_t address,uint64_t length,chunk * c)1428 static void add_rollback_space(LIST_ENTRY* rollback, bool add, LIST_ENTRY* list, LIST_ENTRY* list_size, uint64_t address, uint64_t length, chunk* c) {
1429     rollback_space* rs;
1430 
1431     rs = ExAllocatePoolWithTag(PagedPool, sizeof(rollback_space), ALLOC_TAG);
1432     if (!rs) {
1433         ERR("out of memory\n");
1434         return;
1435     }
1436 
1437     rs->list = list;
1438     rs->list_size = list_size;
1439     rs->address = address;
1440     rs->length = length;
1441     rs->chunk = c;
1442 
1443     add_rollback(rollback, add ? ROLLBACK_ADD_SPACE : ROLLBACK_SUBTRACT_SPACE, rs);
1444 }
1445 
space_list_add2(LIST_ENTRY * list,LIST_ENTRY * list_size,uint64_t address,uint64_t length,chunk * c,LIST_ENTRY * rollback)1446 void space_list_add2(LIST_ENTRY* list, LIST_ENTRY* list_size, uint64_t address, uint64_t length, chunk* c, LIST_ENTRY* rollback) {
1447     LIST_ENTRY* le;
1448     space *s, *s2;
1449 
1450     if (IsListEmpty(list)) {
1451         s = ExAllocatePoolWithTag(PagedPool, sizeof(space), ALLOC_TAG);
1452 
1453         if (!s) {
1454             ERR("out of memory\n");
1455             return;
1456         }
1457 
1458         s->address = address;
1459         s->size = length;
1460         InsertTailList(list, &s->list_entry);
1461 
1462         if (list_size)
1463             InsertTailList(list_size, &s->list_entry_size);
1464 
1465         if (rollback)
1466             add_rollback_space(rollback, true, list, list_size, address, length, c);
1467 
1468         return;
1469     }
1470 
1471     le = list->Flink;
1472     do {
1473         s2 = CONTAINING_RECORD(le, space, list_entry);
1474 
1475         // old entry envelops new one completely
1476         if (s2->address <= address && s2->address + s2->size >= address + length)
1477             return;
1478 
1479         // new entry envelops old one completely
1480         if (address <= s2->address && address + length >= s2->address + s2->size) {
1481             if (address < s2->address) {
1482                 if (rollback)
1483                     add_rollback_space(rollback, true, list, list_size, address, s2->address - address, c);
1484 
1485                 s2->size += s2->address - address;
1486                 s2->address = address;
1487 
1488                 while (s2->list_entry.Blink != list) {
1489                     space* s3 = CONTAINING_RECORD(s2->list_entry.Blink, space, list_entry);
1490 
1491                     if (s3->address + s3->size == s2->address) {
1492                         s2->address = s3->address;
1493                         s2->size += s3->size;
1494 
1495                         RemoveEntryList(&s3->list_entry);
1496 
1497                         if (list_size)
1498                             RemoveEntryList(&s3->list_entry_size);
1499 
1500                         ExFreePool(s3);
1501                     } else
1502                         break;
1503                 }
1504             }
1505 
1506             if (length > s2->size) {
1507                 if (rollback)
1508                     add_rollback_space(rollback, true, list, list_size, s2->address + s2->size, address + length - s2->address - s2->size, c);
1509 
1510                 s2->size = length;
1511 
1512                 while (s2->list_entry.Flink != list) {
1513                     space* s3 = CONTAINING_RECORD(s2->list_entry.Flink, space, list_entry);
1514 
1515                     if (s3->address <= s2->address + s2->size) {
1516                         s2->size = max(s2->size, s3->address + s3->size - s2->address);
1517 
1518                         RemoveEntryList(&s3->list_entry);
1519 
1520                         if (list_size)
1521                             RemoveEntryList(&s3->list_entry_size);
1522 
1523                         ExFreePool(s3);
1524                     } else
1525                         break;
1526                 }
1527             }
1528 
1529             if (list_size) {
1530                 RemoveEntryList(&s2->list_entry_size);
1531                 order_space_entry(s2, list_size);
1532             }
1533 
1534             return;
1535         }
1536 
1537         // new entry overlaps start of old one
1538         if (address < s2->address && address + length >= s2->address) {
1539             if (rollback)
1540                 add_rollback_space(rollback, true, list, list_size, address, s2->address - address, c);
1541 
1542             s2->size += s2->address - address;
1543             s2->address = address;
1544 
1545             while (s2->list_entry.Blink != list) {
1546                 space* s3 = CONTAINING_RECORD(s2->list_entry.Blink, space, list_entry);
1547 
1548                 if (s3->address + s3->size == s2->address) {
1549                     s2->address = s3->address;
1550                     s2->size += s3->size;
1551 
1552                     RemoveEntryList(&s3->list_entry);
1553 
1554                     if (list_size)
1555                         RemoveEntryList(&s3->list_entry_size);
1556 
1557                     ExFreePool(s3);
1558                 } else
1559                     break;
1560             }
1561 
1562             if (list_size) {
1563                 RemoveEntryList(&s2->list_entry_size);
1564                 order_space_entry(s2, list_size);
1565             }
1566 
1567             return;
1568         }
1569 
1570         // new entry overlaps end of old one
1571         if (address <= s2->address + s2->size && address + length > s2->address + s2->size) {
1572             if (rollback)
1573                 add_rollback_space(rollback, true, list, list_size, address, s2->address + s2->size - address, c);
1574 
1575             s2->size = address + length - s2->address;
1576 
1577             while (s2->list_entry.Flink != list) {
1578                 space* s3 = CONTAINING_RECORD(s2->list_entry.Flink, space, list_entry);
1579 
1580                 if (s3->address <= s2->address + s2->size) {
1581                     s2->size = max(s2->size, s3->address + s3->size - s2->address);
1582 
1583                     RemoveEntryList(&s3->list_entry);
1584 
1585                     if (list_size)
1586                         RemoveEntryList(&s3->list_entry_size);
1587 
1588                     ExFreePool(s3);
1589                 } else
1590                     break;
1591             }
1592 
1593             if (list_size) {
1594                 RemoveEntryList(&s2->list_entry_size);
1595                 order_space_entry(s2, list_size);
1596             }
1597 
1598             return;
1599         }
1600 
1601         // add completely separate entry
1602         if (s2->address > address + length) {
1603             s = ExAllocatePoolWithTag(PagedPool, sizeof(space), ALLOC_TAG);
1604 
1605             if (!s) {
1606                 ERR("out of memory\n");
1607                 return;
1608             }
1609 
1610             if (rollback)
1611                 add_rollback_space(rollback, true, list, list_size, address, length, c);
1612 
1613             s->address = address;
1614             s->size = length;
1615             InsertHeadList(s2->list_entry.Blink, &s->list_entry);
1616 
1617             if (list_size)
1618                 order_space_entry(s, list_size);
1619 
1620             return;
1621         }
1622 
1623         le = le->Flink;
1624     } while (le != list);
1625 
1626     // check if contiguous with last entry
1627     if (s2->address + s2->size == address) {
1628         s2->size += length;
1629 
1630         if (list_size) {
1631             RemoveEntryList(&s2->list_entry_size);
1632             order_space_entry(s2, list_size);
1633         }
1634 
1635         return;
1636     }
1637 
1638     // otherwise, insert at end
1639     s = ExAllocatePoolWithTag(PagedPool, sizeof(space), ALLOC_TAG);
1640 
1641     if (!s) {
1642         ERR("out of memory\n");
1643         return;
1644     }
1645 
1646     s->address = address;
1647     s->size = length;
1648     InsertTailList(list, &s->list_entry);
1649 
1650     if (list_size)
1651         order_space_entry(s, list_size);
1652 
1653     if (rollback)
1654         add_rollback_space(rollback, true, list, list_size, address, length, c);
1655 }
1656 
space_list_merge(LIST_ENTRY * spacelist,LIST_ENTRY * spacelist_size,LIST_ENTRY * deleting)1657 void space_list_merge(LIST_ENTRY* spacelist, LIST_ENTRY* spacelist_size, LIST_ENTRY* deleting) {
1658     LIST_ENTRY* le = deleting->Flink;
1659 
1660     while (le != deleting) {
1661         space* s = CONTAINING_RECORD(le, space, list_entry);
1662 
1663         space_list_add2(spacelist, spacelist_size, s->address, s->size, NULL, NULL);
1664 
1665         le = le->Flink;
1666     }
1667 }
1668 
copy_space_list(LIST_ENTRY * old_list,LIST_ENTRY * new_list)1669 static NTSTATUS copy_space_list(LIST_ENTRY* old_list, LIST_ENTRY* new_list) {
1670     LIST_ENTRY* le;
1671 
1672     le = old_list->Flink;
1673     while (le != old_list) {
1674         space* s = CONTAINING_RECORD(le, space, list_entry);
1675         space* s2;
1676 
1677         s2 = ExAllocatePoolWithTag(PagedPool, sizeof(space), ALLOC_TAG);
1678         if (!s2) {
1679             ERR("out of memory\n");
1680             return STATUS_INSUFFICIENT_RESOURCES;
1681         }
1682 
1683         s2->address = s->address;
1684         s2->size = s->size;
1685 
1686         InsertTailList(new_list, &s2->list_entry);
1687 
1688         le = le->Flink;
1689     }
1690 
1691     return STATUS_SUCCESS;
1692 }
1693 
update_chunk_cache(device_extension * Vcb,chunk * c,BTRFS_TIME * now,LIST_ENTRY * batchlist,PIRP Irp,LIST_ENTRY * rollback)1694 static NTSTATUS update_chunk_cache(device_extension* Vcb, chunk* c, BTRFS_TIME* now, LIST_ENTRY* batchlist, PIRP Irp, LIST_ENTRY* rollback) {
1695     NTSTATUS Status;
1696     KEY searchkey;
1697     traverse_ptr tp;
1698     FREE_SPACE_ITEM* fsi;
1699     void* data;
1700     uint64_t num_entries, *cachegen, off;
1701     uint32_t *checksums, num_sectors;
1702     LIST_ENTRY space_list, deleting;
1703 
1704     data = ExAllocatePoolWithTag(NonPagedPool, (ULONG)c->cache->inode_item.st_size, ALLOC_TAG);
1705     if (!data) {
1706         ERR("out of memory\n");
1707         return STATUS_INSUFFICIENT_RESOURCES;
1708     }
1709 
1710     RtlZeroMemory(data, (ULONG)c->cache->inode_item.st_size);
1711 
1712     InitializeListHead(&space_list);
1713     InitializeListHead(&deleting);
1714 
1715     Status = copy_space_list(&c->space, &space_list);
1716     if (!NT_SUCCESS(Status)) {
1717         ERR("copy_space_list returned %08lx\n", Status);
1718         goto end;
1719     }
1720 
1721     Status = copy_space_list(&c->deleting, &deleting);
1722     if (!NT_SUCCESS(Status)) {
1723         ERR("copy_space_list returned %08lx\n", Status);
1724 
1725         while (!IsListEmpty(&space_list)) {
1726             ExFreePool(CONTAINING_RECORD(RemoveHeadList(&space_list), space, list_entry));
1727         }
1728 
1729         goto end;
1730     }
1731 
1732     space_list_merge(&space_list, NULL, &deleting);
1733 
1734     num_entries = 0;
1735     num_sectors = (uint32_t)(c->cache->inode_item.st_size >> Vcb->sector_shift);
1736     off = (sizeof(uint32_t) * num_sectors) + sizeof(uint64_t);
1737 
1738     while (!IsListEmpty(&space_list)) {
1739         FREE_SPACE_ENTRY* fse;
1740 
1741         space* s = CONTAINING_RECORD(RemoveHeadList(&space_list), space, list_entry);
1742 
1743         if ((off + sizeof(FREE_SPACE_ENTRY)) >> Vcb->sector_shift != off >> Vcb->sector_shift)
1744             off = sector_align(off, Vcb->superblock.sector_size);
1745 
1746         fse = (FREE_SPACE_ENTRY*)((uint8_t*)data + off);
1747 
1748         fse->offset = s->address;
1749         fse->size = s->size;
1750         fse->type = FREE_SPACE_EXTENT;
1751         num_entries++;
1752 
1753         off += sizeof(FREE_SPACE_ENTRY);
1754     }
1755 
1756     // update INODE_ITEM
1757 
1758     c->cache->inode_item.generation = Vcb->superblock.generation;
1759     c->cache->inode_item.transid = Vcb->superblock.generation;
1760     c->cache->inode_item.sequence++;
1761     c->cache->inode_item.st_ctime = *now;
1762 
1763     Status = flush_fcb(c->cache, true, batchlist, Irp);
1764     if (!NT_SUCCESS(Status)) {
1765         ERR("flush_fcb returned %08lx\n", Status);
1766         goto end;
1767     }
1768 
1769     // update free_space item
1770 
1771     searchkey.obj_id = FREE_SPACE_CACHE_ID;
1772     searchkey.obj_type = 0;
1773     searchkey.offset = c->offset;
1774 
1775     Status = find_item(Vcb, Vcb->root_root, &tp, &searchkey, false, Irp);
1776     if (!NT_SUCCESS(Status)) {
1777         ERR("error - find_item returned %08lx\n", Status);
1778         goto end;
1779     }
1780 
1781     if (keycmp(searchkey, tp.item->key)) {
1782         ERR("could not find (%I64x,%x,%I64x) in root_root\n", searchkey.obj_id, searchkey.obj_type, searchkey.offset);
1783         Status = STATUS_INTERNAL_ERROR;
1784         goto end;
1785     }
1786 
1787     if (tp.item->size < sizeof(FREE_SPACE_ITEM)) {
1788         ERR("(%I64x,%x,%I64x) was %u bytes, expected %Iu\n", tp.item->key.obj_id, tp.item->key.obj_type, tp.item->key.offset, tp.item->size, sizeof(FREE_SPACE_ITEM));
1789         Status = STATUS_INTERNAL_ERROR;
1790         goto end;
1791     }
1792 
1793     fsi = (FREE_SPACE_ITEM*)tp.item->data;
1794 
1795     fsi->generation = Vcb->superblock.generation;
1796     fsi->num_entries = num_entries;
1797     fsi->num_bitmaps = 0;
1798 
1799     // set cache generation
1800 
1801     cachegen = (uint64_t*)((uint8_t*)data + (sizeof(uint32_t) * num_sectors));
1802     *cachegen = Vcb->superblock.generation;
1803 
1804     // calculate cache checksums
1805 
1806     checksums = (uint32_t*)data;
1807 
1808     // FIXME - if we know sector is fully zeroed, use cached checksum
1809 
1810     for (uint32_t i = 0; i < num_sectors; i++) {
1811         if (i << Vcb->sector_shift > sizeof(uint32_t) * num_sectors)
1812             checksums[i] = ~calc_crc32c(0xffffffff, (uint8_t*)data + (i << Vcb->sector_shift), Vcb->superblock.sector_size);
1813         else if ((i + 1) << Vcb->sector_shift < sizeof(uint32_t) * num_sectors)
1814             checksums[i] = 0; // FIXME - test this
1815         else
1816             checksums[i] = ~calc_crc32c(0xffffffff, (uint8_t*)data + (sizeof(uint32_t) * num_sectors), ((i + 1) << Vcb->sector_shift) - (sizeof(uint32_t) * num_sectors));
1817     }
1818 
1819     // write cache
1820 
1821     Status = do_write_file(c->cache, 0, c->cache->inode_item.st_size, data, NULL, false, 0, rollback);
1822     if (!NT_SUCCESS(Status)) {
1823         ERR("do_write_file returned %08lx\n", Status);
1824 
1825         // Writing the cache isn't critical, so we don't return an error if writing fails. This means
1826         // we can still flush on a degraded mount if metadata is RAID1 but data is RAID0.
1827     }
1828 
1829     Status = STATUS_SUCCESS;
1830 
1831 end:
1832     ExFreePool(data);
1833 
1834     return Status;
1835 }
1836 
update_chunk_cache_tree(device_extension * Vcb,chunk * c,PIRP Irp)1837 static NTSTATUS update_chunk_cache_tree(device_extension* Vcb, chunk* c, PIRP Irp) {
1838     NTSTATUS Status;
1839     LIST_ENTRY space_list;
1840     FREE_SPACE_INFO* fsi;
1841     KEY searchkey;
1842     traverse_ptr tp;
1843     uint32_t fsi_count = 0;
1844 
1845     InitializeListHead(&space_list);
1846 
1847     Status = copy_space_list(&c->space, &space_list);
1848     if (!NT_SUCCESS(Status)) {
1849         ERR("copy_space_list returned %08lx\n", Status);
1850         return Status;
1851     }
1852 
1853     space_list_merge(&space_list, NULL, &c->deleting);
1854 
1855     searchkey.obj_id = c->offset;
1856     searchkey.obj_type = TYPE_FREE_SPACE_EXTENT;
1857     searchkey.offset = 0xffffffffffffffff;
1858 
1859     Status = find_item(Vcb, Vcb->space_root, &tp, &searchkey, false, Irp);
1860     if (Status == STATUS_NOT_FOUND)
1861         goto after_tree_walk;
1862     else if (!NT_SUCCESS(Status)) {
1863         ERR("find_item returned %08lx\n", Status);
1864 
1865         while (!IsListEmpty(&space_list)) {
1866             ExFreePool(CONTAINING_RECORD(RemoveHeadList(&space_list), space, list_entry));
1867         }
1868 
1869         return Status;
1870     }
1871 
1872     while (!IsListEmpty(&space_list) && tp.item->key.obj_id < c->offset + c->chunk_item->size) {
1873         traverse_ptr next_tp;
1874 
1875         if (tp.item->key.obj_type == TYPE_FREE_SPACE_EXTENT) {
1876             space* s = CONTAINING_RECORD(space_list.Flink, space, list_entry);
1877 
1878             if (s->address < tp.item->key.obj_id || (s->address == tp.item->key.obj_id && s->size < tp.item->key.offset)) {
1879                 // add entry
1880 
1881                 Status = insert_tree_item(Vcb, Vcb->space_root, s->address, TYPE_FREE_SPACE_EXTENT, s->size, NULL, 0,
1882                                           NULL, Irp);
1883                 if (!NT_SUCCESS(Status)) {
1884                     ERR("insert_tree_item returned %08lx\n", Status);
1885 
1886                     while (!IsListEmpty(&space_list)) {
1887                         ExFreePool(CONTAINING_RECORD(RemoveHeadList(&space_list), space, list_entry));
1888                     }
1889 
1890                     return Status;
1891                 }
1892 
1893                 fsi_count++;
1894 
1895                 RemoveHeadList(&space_list);
1896                 ExFreePool(s);
1897                 continue;
1898             } else if (s->address == tp.item->key.obj_id && s->size == tp.item->key.offset) {
1899                 // unchanged entry
1900 
1901                 fsi_count++;
1902 
1903                 RemoveHeadList(&space_list);
1904                 ExFreePool(s);
1905             } else {
1906                 // remove entry
1907 
1908                 Status = delete_tree_item(Vcb, &tp);
1909                 if (!NT_SUCCESS(Status)) {
1910                     ERR("delete_tree_item returned %08lx\n", Status);
1911 
1912                     while (!IsListEmpty(&space_list)) {
1913                         ExFreePool(CONTAINING_RECORD(RemoveHeadList(&space_list), space, list_entry));
1914                     }
1915 
1916                     return Status;
1917                 }
1918             }
1919         } else if (tp.item->key.obj_type == TYPE_FREE_SPACE_BITMAP) {
1920             Status = delete_tree_item(Vcb, &tp);
1921             if (!NT_SUCCESS(Status)) {
1922                 ERR("delete_tree_item returned %08lx\n", Status);
1923 
1924                 while (!IsListEmpty(&space_list)) {
1925                     ExFreePool(CONTAINING_RECORD(RemoveHeadList(&space_list), space, list_entry));
1926                 }
1927 
1928                 return Status;
1929             }
1930         }
1931 
1932         if (!find_next_item(Vcb, &tp, &next_tp, false, Irp))
1933             goto after_tree_walk;
1934 
1935         tp = next_tp;
1936     }
1937 
1938     // after loop, delete remaining tree items for this chunk
1939 
1940     while (tp.item->key.obj_id < c->offset + c->chunk_item->size) {
1941         traverse_ptr next_tp;
1942 
1943         if (tp.item->key.obj_type == TYPE_FREE_SPACE_EXTENT || tp.item->key.obj_type == TYPE_FREE_SPACE_BITMAP) {
1944             Status = delete_tree_item(Vcb, &tp);
1945             if (!NT_SUCCESS(Status)) {
1946                 ERR("delete_tree_item returned %08lx\n", Status);
1947                 return Status;
1948             }
1949         }
1950 
1951         if (!find_next_item(Vcb, &tp, &next_tp, false, NULL))
1952             break;
1953 
1954         tp = next_tp;
1955     }
1956 
1957 after_tree_walk:
1958     // after loop, insert remaining space_list entries
1959 
1960     while (!IsListEmpty(&space_list)) {
1961         space* s = CONTAINING_RECORD(RemoveHeadList(&space_list), space, list_entry);
1962 
1963         Status = insert_tree_item(Vcb, Vcb->space_root, s->address, TYPE_FREE_SPACE_EXTENT, s->size, NULL, 0,
1964                                   NULL, Irp);
1965         if (!NT_SUCCESS(Status)) {
1966             ERR("insert_tree_item returned %08lx\n", Status);
1967 
1968             while (!IsListEmpty(&space_list)) {
1969                 ExFreePool(CONTAINING_RECORD(RemoveHeadList(&space_list), space, list_entry));
1970             }
1971 
1972             return Status;
1973         }
1974 
1975         fsi_count++;
1976 
1977         ExFreePool(s);
1978     }
1979 
1980     // change TYPE_FREE_SPACE_INFO in place if present, and insert otherwise
1981 
1982     searchkey.obj_id = c->offset;
1983     searchkey.obj_type = TYPE_FREE_SPACE_INFO;
1984     searchkey.offset = c->chunk_item->size;
1985 
1986     Status = find_item(Vcb, Vcb->space_root, &tp, &searchkey, false, Irp);
1987     if (!NT_SUCCESS(Status) && Status != STATUS_NOT_FOUND) {
1988         ERR("find_item returned %08lx\n", Status);
1989         return Status;
1990     }
1991 
1992     if (NT_SUCCESS(Status) && !keycmp(tp.item->key, searchkey)) {
1993         if (tp.item->size == sizeof(FREE_SPACE_INFO)) {
1994             tree* t;
1995 
1996             // change in place if possible
1997 
1998             fsi = (FREE_SPACE_INFO*)tp.item->data;
1999 
2000             fsi->count = fsi_count;
2001             fsi->flags = 0;
2002 
2003             tp.tree->write = true;
2004 
2005             t = tp.tree;
2006             while (t) {
2007                 if (t->paritem && t->paritem->ignore) {
2008                     t->paritem->ignore = false;
2009                     t->parent->header.num_items++;
2010                     t->parent->size += sizeof(internal_node);
2011                 }
2012 
2013                 t->header.generation = Vcb->superblock.generation;
2014                 t = t->parent;
2015             }
2016 
2017             return STATUS_SUCCESS;
2018         } else
2019             delete_tree_item(Vcb, &tp);
2020     }
2021 
2022     // insert FREE_SPACE_INFO
2023 
2024     fsi = ExAllocatePoolWithTag(PagedPool, sizeof(FREE_SPACE_INFO), ALLOC_TAG);
2025     if (!fsi) {
2026         ERR("out of memory\n");
2027         return STATUS_INSUFFICIENT_RESOURCES;
2028     }
2029 
2030     fsi->count = fsi_count;
2031     fsi->flags = 0;
2032 
2033     Status = insert_tree_item(Vcb, Vcb->space_root, c->offset, TYPE_FREE_SPACE_INFO, c->chunk_item->size, fsi, sizeof(fsi),
2034                               NULL, Irp);
2035     if (!NT_SUCCESS(Status)) {
2036         ERR("insert_tree_item returned %08lx\n", Status);
2037         return Status;
2038     }
2039 
2040     return STATUS_SUCCESS;
2041 }
2042 
update_chunk_caches(device_extension * Vcb,PIRP Irp,LIST_ENTRY * rollback)2043 NTSTATUS update_chunk_caches(device_extension* Vcb, PIRP Irp, LIST_ENTRY* rollback) {
2044     LIST_ENTRY *le, batchlist;
2045     NTSTATUS Status;
2046     chunk* c;
2047     LARGE_INTEGER time;
2048     BTRFS_TIME now;
2049 
2050     KeQuerySystemTime(&time);
2051     win_time_to_unix(time, &now);
2052 
2053     InitializeListHead(&batchlist);
2054 
2055     le = Vcb->chunks.Flink;
2056     while (le != &Vcb->chunks) {
2057         c = CONTAINING_RECORD(le, chunk, list_entry);
2058 
2059         if (c->space_changed && c->chunk_item->size >= 0x6400000) { // 100MB
2060             acquire_chunk_lock(c, Vcb);
2061             Status = update_chunk_cache(Vcb, c, &now, &batchlist, Irp, rollback);
2062             release_chunk_lock(c, Vcb);
2063 
2064             if (!NT_SUCCESS(Status)) {
2065                 ERR("update_chunk_cache(%I64x) returned %08lx\n", c->offset, Status);
2066                 clear_batch_list(Vcb, &batchlist);
2067                 return Status;
2068             }
2069         }
2070 
2071         le = le->Flink;
2072     }
2073 
2074     Status = commit_batch_list(Vcb, &batchlist, Irp);
2075     if (!NT_SUCCESS(Status)) {
2076         ERR("commit_batch_list returned %08lx\n", Status);
2077         return Status;
2078     }
2079 
2080     le = Vcb->chunks.Flink;
2081     while (le != &Vcb->chunks) {
2082         c = CONTAINING_RECORD(le, chunk, list_entry);
2083 
2084         if (c->changed && (c->chunk_item->type & BLOCK_FLAG_RAID5 || c->chunk_item->type & BLOCK_FLAG_RAID6)) {
2085             ExAcquireResourceExclusiveLite(&c->partial_stripes_lock, true);
2086 
2087             while (!IsListEmpty(&c->partial_stripes)) {
2088                 partial_stripe* ps = CONTAINING_RECORD(RemoveHeadList(&c->partial_stripes), partial_stripe, list_entry);
2089 
2090                 Status = flush_partial_stripe(Vcb, c, ps);
2091 
2092                 if (ps->bmparr)
2093                     ExFreePool(ps->bmparr);
2094 
2095                 ExFreePool(ps);
2096 
2097                 if (!NT_SUCCESS(Status)) {
2098                     ERR("flush_partial_stripe returned %08lx\n", Status);
2099                     ExReleaseResourceLite(&c->partial_stripes_lock);
2100                     return Status;
2101                 }
2102             }
2103 
2104             ExReleaseResourceLite(&c->partial_stripes_lock);
2105         }
2106 
2107         le = le->Flink;
2108     }
2109 
2110     return STATUS_SUCCESS;
2111 }
2112 
update_chunk_caches_tree(device_extension * Vcb,PIRP Irp)2113 NTSTATUS update_chunk_caches_tree(device_extension* Vcb, PIRP Irp) {
2114     LIST_ENTRY *le;
2115     NTSTATUS Status;
2116     chunk* c;
2117 
2118     Vcb->superblock.compat_ro_flags |= BTRFS_COMPAT_RO_FLAGS_FREE_SPACE_CACHE_VALID;
2119 
2120     ExAcquireResourceSharedLite(&Vcb->chunk_lock, true);
2121 
2122     le = Vcb->chunks.Flink;
2123     while (le != &Vcb->chunks) {
2124         c = CONTAINING_RECORD(le, chunk, list_entry);
2125 
2126         if (c->space_changed) {
2127             acquire_chunk_lock(c, Vcb);
2128             Status = update_chunk_cache_tree(Vcb, c, Irp);
2129             release_chunk_lock(c, Vcb);
2130 
2131             if (!NT_SUCCESS(Status)) {
2132                 ERR("update_chunk_cache_tree(%I64x) returned %08lx\n", c->offset, Status);
2133                 ExReleaseResourceLite(&Vcb->chunk_lock);
2134                 return Status;
2135             }
2136         }
2137 
2138         le = le->Flink;
2139     }
2140 
2141     ExReleaseResourceLite(&Vcb->chunk_lock);
2142 
2143     return STATUS_SUCCESS;
2144 }
2145 
space_list_add(chunk * c,uint64_t address,uint64_t length,LIST_ENTRY * rollback)2146 void space_list_add(chunk* c, uint64_t address, uint64_t length, LIST_ENTRY* rollback) {
2147     TRACE("(%p, %I64x, %I64x, %p)\n", c, address, length, rollback);
2148 
2149     c->changed = true;
2150     c->space_changed = true;
2151 
2152     space_list_add2(&c->deleting, NULL, address, length, c, rollback);
2153 }
2154 
space_list_subtract2(LIST_ENTRY * list,LIST_ENTRY * list_size,uint64_t address,uint64_t length,chunk * c,LIST_ENTRY * rollback)2155 void space_list_subtract2(LIST_ENTRY* list, LIST_ENTRY* list_size, uint64_t address, uint64_t length, chunk* c, LIST_ENTRY* rollback) {
2156     LIST_ENTRY *le, *le2;
2157     space *s, *s2;
2158 
2159     if (IsListEmpty(list))
2160         return;
2161 
2162     le = list->Flink;
2163     while (le != list) {
2164         s2 = CONTAINING_RECORD(le, space, list_entry);
2165         le2 = le->Flink;
2166 
2167         if (s2->address >= address + length)
2168             return;
2169 
2170         if (s2->address >= address && s2->address + s2->size <= address + length) { // remove entry entirely
2171             if (rollback)
2172                 add_rollback_space(rollback, false, list, list_size, s2->address, s2->size, c);
2173 
2174             RemoveEntryList(&s2->list_entry);
2175 
2176             if (list_size)
2177                 RemoveEntryList(&s2->list_entry_size);
2178 
2179             ExFreePool(s2);
2180         } else if (address + length > s2->address && address + length < s2->address + s2->size) {
2181             if (address > s2->address) { // cut out hole
2182                 if (rollback)
2183                     add_rollback_space(rollback, false, list, list_size, address, length, c);
2184 
2185                 s = ExAllocatePoolWithTag(PagedPool, sizeof(space), ALLOC_TAG);
2186 
2187                 if (!s) {
2188                     ERR("out of memory\n");
2189                     return;
2190                 }
2191 
2192                 s->address = s2->address;
2193                 s->size = address - s2->address;
2194                 InsertHeadList(s2->list_entry.Blink, &s->list_entry);
2195 
2196                 s2->size = s2->address + s2->size - address - length;
2197                 s2->address = address + length;
2198 
2199                 if (list_size) {
2200                     RemoveEntryList(&s2->list_entry_size);
2201                     order_space_entry(s2, list_size);
2202                     order_space_entry(s, list_size);
2203                 }
2204 
2205                 return;
2206             } else { // remove start of entry
2207                 if (rollback)
2208                     add_rollback_space(rollback, false, list, list_size, s2->address, address + length - s2->address, c);
2209 
2210                 s2->size -= address + length - s2->address;
2211                 s2->address = address + length;
2212 
2213                 if (list_size) {
2214                     RemoveEntryList(&s2->list_entry_size);
2215                     order_space_entry(s2, list_size);
2216                 }
2217             }
2218         } else if (address > s2->address && address < s2->address + s2->size) { // remove end of entry
2219             if (rollback)
2220                 add_rollback_space(rollback, false, list, list_size, address, s2->address + s2->size - address, c);
2221 
2222             s2->size = address - s2->address;
2223 
2224             if (list_size) {
2225                 RemoveEntryList(&s2->list_entry_size);
2226                 order_space_entry(s2, list_size);
2227             }
2228         }
2229 
2230         le = le2;
2231     }
2232 }
2233 
space_list_subtract(chunk * c,uint64_t address,uint64_t length,LIST_ENTRY * rollback)2234 void space_list_subtract(chunk* c, uint64_t address, uint64_t length, LIST_ENTRY* rollback) {
2235     c->changed = true;
2236     c->space_changed = true;
2237 
2238     space_list_subtract2(&c->space, &c->space_size, address, length, c, rollback);
2239 
2240     space_list_subtract2(&c->deleting, NULL, address, length, c, rollback);
2241 }
2242