1 /* Copyright (c) Mark Harmstone 2016-17
2 *
3 * This file is part of WinBtrfs.
4 *
5 * WinBtrfs is free software: you can redistribute it and/or modify
6 * it under the terms of the GNU Lesser General Public Licence as published by
7 * the Free Software Foundation, either version 3 of the Licence, or
8 * (at your option) any later version.
9 *
10 * WinBtrfs is distributed in the hope that it will be useful,
11 * but WITHOUT ANY WARRANTY; without even the implied warranty of
12 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
13 * GNU Lesser General Public Licence for more details.
14 *
15 * You should have received a copy of the GNU Lesser General Public Licence
16 * along with WinBtrfs. If not, see <http://www.gnu.org/licenses/>. */
17
18 #include "btrfs_drv.h"
19
20 typedef struct {
21 uint64_t start;
22 uint64_t end;
23 uint8_t* data;
24 PMDL mdl;
25 uint64_t irp_offset;
26 } write_stripe;
27
28 _Function_class_(IO_COMPLETION_ROUTINE)
29 static NTSTATUS __stdcall write_data_completion(PDEVICE_OBJECT DeviceObject, PIRP Irp, PVOID conptr);
30
31 static void remove_fcb_extent(fcb* fcb, extent* ext, LIST_ENTRY* rollback) __attribute__((nonnull(1, 2, 3)));
32
33 extern tPsUpdateDiskCounters fPsUpdateDiskCounters;
34 extern tCcCopyWriteEx fCcCopyWriteEx;
35 extern tFsRtlUpdateDiskCounters fFsRtlUpdateDiskCounters;
36 extern bool diskacc;
37
38 __attribute__((nonnull(1, 2, 4)))
find_data_address_in_chunk(device_extension * Vcb,chunk * c,uint64_t length,uint64_t * address)39 bool find_data_address_in_chunk(device_extension* Vcb, chunk* c, uint64_t length, uint64_t* address) {
40 LIST_ENTRY* le;
41 space* s;
42
43 TRACE("(%p, %I64x, %I64x, %p)\n", Vcb, c->offset, length, address);
44
45 if (length > c->chunk_item->size - c->used)
46 return false;
47
48 if (!c->cache_loaded) {
49 NTSTATUS Status = load_cache_chunk(Vcb, c, NULL);
50
51 if (!NT_SUCCESS(Status)) {
52 ERR("load_cache_chunk returned %08lx\n", Status);
53 return false;
54 }
55 }
56
57 if (IsListEmpty(&c->space_size))
58 return false;
59
60 le = c->space_size.Flink;
61 while (le != &c->space_size) {
62 s = CONTAINING_RECORD(le, space, list_entry_size);
63
64 if (s->size == length) {
65 *address = s->address;
66 return true;
67 } else if (s->size < length) {
68 if (le == c->space_size.Flink)
69 return false;
70
71 s = CONTAINING_RECORD(le->Blink, space, list_entry_size);
72
73 *address = s->address;
74 return true;
75 }
76
77 le = le->Flink;
78 }
79
80 s = CONTAINING_RECORD(c->space_size.Blink, space, list_entry_size);
81
82 if (s->size > length) {
83 *address = s->address;
84 return true;
85 }
86
87 return false;
88 }
89
90 __attribute__((nonnull(1)))
get_chunk_from_address(device_extension * Vcb,uint64_t address)91 chunk* get_chunk_from_address(device_extension* Vcb, uint64_t address) {
92 LIST_ENTRY* le2;
93
94 ExAcquireResourceSharedLite(&Vcb->chunk_lock, true);
95
96 le2 = Vcb->chunks.Flink;
97 while (le2 != &Vcb->chunks) {
98 chunk* c = CONTAINING_RECORD(le2, chunk, list_entry);
99
100 if (address >= c->offset && address < c->offset + c->chunk_item->size) {
101 ExReleaseResourceLite(&Vcb->chunk_lock);
102 return c;
103 }
104
105 le2 = le2->Flink;
106 }
107
108 ExReleaseResourceLite(&Vcb->chunk_lock);
109
110 return NULL;
111 }
112
113 typedef struct {
114 space* dh;
115 device* device;
116 } stripe;
117
118 __attribute__((nonnull(1)))
find_new_chunk_address(device_extension * Vcb,uint64_t size)119 static uint64_t find_new_chunk_address(device_extension* Vcb, uint64_t size) {
120 uint64_t lastaddr;
121 LIST_ENTRY* le;
122
123 lastaddr = 0xc00000;
124
125 le = Vcb->chunks.Flink;
126 while (le != &Vcb->chunks) {
127 chunk* c = CONTAINING_RECORD(le, chunk, list_entry);
128
129 if (c->offset >= lastaddr + size)
130 return lastaddr;
131
132 lastaddr = c->offset + c->chunk_item->size;
133
134 le = le->Flink;
135 }
136
137 return lastaddr;
138 }
139
140 __attribute__((nonnull(1,2)))
find_new_dup_stripes(device_extension * Vcb,stripe * stripes,uint64_t max_stripe_size,bool full_size)141 static bool find_new_dup_stripes(device_extension* Vcb, stripe* stripes, uint64_t max_stripe_size, bool full_size) {
142 uint64_t devusage = 0xffffffffffffffff;
143 space *devdh1 = NULL, *devdh2 = NULL;
144 LIST_ENTRY* le;
145 device* dev2 = NULL;
146
147 le = Vcb->devices.Flink;
148
149 while (le != &Vcb->devices) {
150 device* dev = CONTAINING_RECORD(le, device, list_entry);
151
152 if (!dev->readonly && !dev->reloc && dev->devobj) {
153 uint64_t usage = (dev->devitem.bytes_used * 4096) / dev->devitem.num_bytes;
154
155 // favour devices which have been used the least
156 if (usage < devusage) {
157 if (!IsListEmpty(&dev->space)) {
158 LIST_ENTRY* le2;
159 space *dh1 = NULL, *dh2 = NULL;
160
161 le2 = dev->space.Flink;
162 while (le2 != &dev->space) {
163 space* dh = CONTAINING_RECORD(le2, space, list_entry);
164
165 if (dh->size >= max_stripe_size && (!dh1 || !dh2 || dh->size < dh1->size)) {
166 dh2 = dh1;
167 dh1 = dh;
168 }
169
170 le2 = le2->Flink;
171 }
172
173 if (dh1 && (dh2 || dh1->size >= 2 * max_stripe_size)) {
174 dev2 = dev;
175 devusage = usage;
176 devdh1 = dh1;
177 devdh2 = dh2 ? dh2 : dh1;
178 }
179 }
180 }
181 }
182
183 le = le->Flink;
184 }
185
186 if (!devdh1) {
187 uint64_t size = 0;
188
189 // Can't find hole of at least max_stripe_size; look for the largest one we can find
190
191 if (full_size)
192 return false;
193
194 le = Vcb->devices.Flink;
195 while (le != &Vcb->devices) {
196 device* dev = CONTAINING_RECORD(le, device, list_entry);
197
198 if (!dev->readonly && !dev->reloc) {
199 if (!IsListEmpty(&dev->space)) {
200 LIST_ENTRY* le2;
201 space *dh1 = NULL, *dh2 = NULL;
202
203 le2 = dev->space.Flink;
204 while (le2 != &dev->space) {
205 space* dh = CONTAINING_RECORD(le2, space, list_entry);
206
207 if (!dh1 || !dh2 || dh->size < dh1->size) {
208 dh2 = dh1;
209 dh1 = dh;
210 }
211
212 le2 = le2->Flink;
213 }
214
215 if (dh1) {
216 uint64_t devsize;
217
218 if (dh2)
219 devsize = max(dh1->size / 2, min(dh1->size, dh2->size));
220 else
221 devsize = dh1->size / 2;
222
223 if (devsize > size) {
224 dev2 = dev;
225 devdh1 = dh1;
226
227 if (dh2 && min(dh1->size, dh2->size) > dh1->size / 2)
228 devdh2 = dh2;
229 else
230 devdh2 = dh1;
231
232 size = devsize;
233 }
234 }
235 }
236 }
237
238 le = le->Flink;
239 }
240
241 if (!devdh1)
242 return false;
243 }
244
245 stripes[0].device = stripes[1].device = dev2;
246 stripes[0].dh = devdh1;
247 stripes[1].dh = devdh2;
248
249 return true;
250 }
251
252 __attribute__((nonnull(1,2)))
find_new_stripe(device_extension * Vcb,stripe * stripes,uint16_t i,uint64_t max_stripe_size,bool allow_missing,bool full_size)253 static bool find_new_stripe(device_extension* Vcb, stripe* stripes, uint16_t i, uint64_t max_stripe_size, bool allow_missing, bool full_size) {
254 uint64_t k, devusage = 0xffffffffffffffff;
255 space* devdh = NULL;
256 LIST_ENTRY* le;
257 device* dev2 = NULL;
258
259 le = Vcb->devices.Flink;
260 while (le != &Vcb->devices) {
261 device* dev = CONTAINING_RECORD(le, device, list_entry);
262 uint64_t usage;
263 bool skip = false;
264
265 if (dev->readonly || dev->reloc || (!dev->devobj && !allow_missing)) {
266 le = le->Flink;
267 continue;
268 }
269
270 // skip this device if it already has a stripe
271 if (i > 0) {
272 for (k = 0; k < i; k++) {
273 if (stripes[k].device == dev) {
274 skip = true;
275 break;
276 }
277 }
278 }
279
280 if (!skip) {
281 usage = (dev->devitem.bytes_used * 4096) / dev->devitem.num_bytes;
282
283 // favour devices which have been used the least
284 if (usage < devusage) {
285 if (!IsListEmpty(&dev->space)) {
286 LIST_ENTRY* le2;
287
288 le2 = dev->space.Flink;
289 while (le2 != &dev->space) {
290 space* dh = CONTAINING_RECORD(le2, space, list_entry);
291
292 if ((dev2 != dev && dh->size >= max_stripe_size) ||
293 (dev2 == dev && dh->size >= max_stripe_size && dh->size < devdh->size)
294 ) {
295 devdh = dh;
296 dev2 = dev;
297 devusage = usage;
298 }
299
300 le2 = le2->Flink;
301 }
302 }
303 }
304 }
305
306 le = le->Flink;
307 }
308
309 if (!devdh) {
310 // Can't find hole of at least max_stripe_size; look for the largest one we can find
311
312 if (full_size)
313 return false;
314
315 le = Vcb->devices.Flink;
316 while (le != &Vcb->devices) {
317 device* dev = CONTAINING_RECORD(le, device, list_entry);
318 bool skip = false;
319
320 if (dev->readonly || dev->reloc || (!dev->devobj && !allow_missing)) {
321 le = le->Flink;
322 continue;
323 }
324
325 // skip this device if it already has a stripe
326 if (i > 0) {
327 for (k = 0; k < i; k++) {
328 if (stripes[k].device == dev) {
329 skip = true;
330 break;
331 }
332 }
333 }
334
335 if (!skip) {
336 if (!IsListEmpty(&dev->space)) {
337 LIST_ENTRY* le2;
338
339 le2 = dev->space.Flink;
340 while (le2 != &dev->space) {
341 space* dh = CONTAINING_RECORD(le2, space, list_entry);
342
343 if (!devdh || devdh->size < dh->size) {
344 devdh = dh;
345 dev2 = dev;
346 }
347
348 le2 = le2->Flink;
349 }
350 }
351 }
352
353 le = le->Flink;
354 }
355
356 if (!devdh)
357 return false;
358 }
359
360 stripes[i].dh = devdh;
361 stripes[i].device = dev2;
362
363 return true;
364 }
365
366 __attribute__((nonnull(1,3)))
alloc_chunk(device_extension * Vcb,uint64_t flags,chunk ** pc,bool full_size)367 NTSTATUS alloc_chunk(device_extension* Vcb, uint64_t flags, chunk** pc, bool full_size) {
368 NTSTATUS Status;
369 uint64_t max_stripe_size, max_chunk_size, stripe_size, stripe_length, factor;
370 uint64_t total_size = 0, logaddr;
371 uint16_t i, type, num_stripes, sub_stripes, max_stripes, min_stripes, allowed_missing;
372 stripe* stripes = NULL;
373 uint16_t cisize;
374 CHUNK_ITEM_STRIPE* cis;
375 chunk* c = NULL;
376 space* s = NULL;
377 LIST_ENTRY* le;
378
379 le = Vcb->devices.Flink;
380 while (le != &Vcb->devices) {
381 device* dev = CONTAINING_RECORD(le, device, list_entry);
382 total_size += dev->devitem.num_bytes;
383
384 le = le->Flink;
385 }
386
387 TRACE("total_size = %I64x\n", total_size);
388
389 // We purposely check for DATA first - mixed blocks have the same size
390 // as DATA ones.
391 if (flags & BLOCK_FLAG_DATA) {
392 max_stripe_size = 0x40000000; // 1 GB
393 max_chunk_size = 10 * max_stripe_size;
394 } else if (flags & BLOCK_FLAG_METADATA) {
395 if (total_size > 0xC80000000) // 50 GB
396 max_stripe_size = 0x40000000; // 1 GB
397 else
398 max_stripe_size = 0x10000000; // 256 MB
399
400 max_chunk_size = max_stripe_size;
401 } else if (flags & BLOCK_FLAG_SYSTEM) {
402 max_stripe_size = 0x2000000; // 32 MB
403 max_chunk_size = 2 * max_stripe_size;
404 } else {
405 ERR("unknown chunk type\n");
406 return STATUS_INTERNAL_ERROR;
407 }
408
409 if (flags & BLOCK_FLAG_DUPLICATE) {
410 min_stripes = 2;
411 max_stripes = 2;
412 sub_stripes = 0;
413 type = BLOCK_FLAG_DUPLICATE;
414 allowed_missing = 0;
415 } else if (flags & BLOCK_FLAG_RAID0) {
416 min_stripes = 2;
417 max_stripes = (uint16_t)min(0xffff, Vcb->superblock.num_devices);
418 sub_stripes = 0;
419 type = BLOCK_FLAG_RAID0;
420 allowed_missing = 0;
421 } else if (flags & BLOCK_FLAG_RAID1) {
422 min_stripes = 2;
423 max_stripes = 2;
424 sub_stripes = 1;
425 type = BLOCK_FLAG_RAID1;
426 allowed_missing = 1;
427 } else if (flags & BLOCK_FLAG_RAID10) {
428 min_stripes = 4;
429 max_stripes = (uint16_t)min(0xffff, Vcb->superblock.num_devices);
430 sub_stripes = 2;
431 type = BLOCK_FLAG_RAID10;
432 allowed_missing = 1;
433 } else if (flags & BLOCK_FLAG_RAID5) {
434 min_stripes = 3;
435 max_stripes = (uint16_t)min(0xffff, Vcb->superblock.num_devices);
436 sub_stripes = 1;
437 type = BLOCK_FLAG_RAID5;
438 allowed_missing = 1;
439 } else if (flags & BLOCK_FLAG_RAID6) {
440 min_stripes = 4;
441 max_stripes = 257;
442 sub_stripes = 1;
443 type = BLOCK_FLAG_RAID6;
444 allowed_missing = 2;
445 } else if (flags & BLOCK_FLAG_RAID1C3) {
446 min_stripes = 3;
447 max_stripes = 3;
448 sub_stripes = 1;
449 type = BLOCK_FLAG_RAID1C3;
450 allowed_missing = 2;
451 } else if (flags & BLOCK_FLAG_RAID1C4) {
452 min_stripes = 4;
453 max_stripes = 4;
454 sub_stripes = 1;
455 type = BLOCK_FLAG_RAID1C4;
456 allowed_missing = 3;
457 } else { // SINGLE
458 min_stripes = 1;
459 max_stripes = 1;
460 sub_stripes = 1;
461 type = 0;
462 allowed_missing = 0;
463 }
464
465 if (max_chunk_size > total_size / 10) { // cap at 10%
466 max_chunk_size = total_size / 10;
467 max_stripe_size = max_chunk_size / min_stripes;
468 }
469
470 if (max_stripe_size > total_size / (10 * min_stripes))
471 max_stripe_size = total_size / (10 * min_stripes);
472
473 TRACE("would allocate a new chunk of %I64x bytes and stripe %I64x\n", max_chunk_size, max_stripe_size);
474
475 stripes = ExAllocatePoolWithTag(PagedPool, sizeof(stripe) * max_stripes, ALLOC_TAG);
476 if (!stripes) {
477 ERR("out of memory\n");
478 Status = STATUS_INSUFFICIENT_RESOURCES;
479 goto end;
480 }
481
482 num_stripes = 0;
483
484 if (type == BLOCK_FLAG_DUPLICATE) {
485 if (!find_new_dup_stripes(Vcb, stripes, max_stripe_size, full_size)) {
486 Status = STATUS_DISK_FULL;
487 goto end;
488 } else
489 num_stripes = max_stripes;
490 } else {
491 for (i = 0; i < max_stripes; i++) {
492 if (!find_new_stripe(Vcb, stripes, i, max_stripe_size, false, full_size))
493 break;
494 else
495 num_stripes++;
496 }
497 }
498
499 if (num_stripes < min_stripes && Vcb->options.allow_degraded && allowed_missing > 0) {
500 uint16_t added_missing = 0;
501
502 for (i = num_stripes; i < max_stripes; i++) {
503 if (!find_new_stripe(Vcb, stripes, i, max_stripe_size, true, full_size))
504 break;
505 else {
506 added_missing++;
507 if (added_missing >= allowed_missing)
508 break;
509 }
510 }
511
512 num_stripes += added_missing;
513 }
514
515 // for RAID10, round down to an even number of stripes
516 if (type == BLOCK_FLAG_RAID10 && (num_stripes % sub_stripes) != 0) {
517 num_stripes -= num_stripes % sub_stripes;
518 }
519
520 if (num_stripes < min_stripes) {
521 WARN("found %u stripes, needed at least %u\n", num_stripes, min_stripes);
522 Status = STATUS_DISK_FULL;
523 goto end;
524 }
525
526 c = ExAllocatePoolWithTag(NonPagedPool, sizeof(chunk), ALLOC_TAG);
527 if (!c) {
528 ERR("out of memory\n");
529 Status = STATUS_INSUFFICIENT_RESOURCES;
530 goto end;
531 }
532
533 c->devices = NULL;
534
535 cisize = sizeof(CHUNK_ITEM) + (num_stripes * sizeof(CHUNK_ITEM_STRIPE));
536 c->chunk_item = ExAllocatePoolWithTag(NonPagedPool, cisize, ALLOC_TAG);
537 if (!c->chunk_item) {
538 ERR("out of memory\n");
539 Status = STATUS_INSUFFICIENT_RESOURCES;
540 goto end;
541 }
542
543 stripe_length = 0x10000; // FIXME? BTRFS_STRIPE_LEN in kernel
544
545 if (type == BLOCK_FLAG_DUPLICATE && stripes[1].dh == stripes[0].dh)
546 stripe_size = min(stripes[0].dh->size / 2, max_stripe_size);
547 else {
548 stripe_size = max_stripe_size;
549 for (i = 0; i < num_stripes; i++) {
550 if (stripes[i].dh->size < stripe_size)
551 stripe_size = stripes[i].dh->size;
552 }
553 }
554
555 if (type == BLOCK_FLAG_RAID0)
556 factor = num_stripes;
557 else if (type == BLOCK_FLAG_RAID10)
558 factor = num_stripes / sub_stripes;
559 else if (type == BLOCK_FLAG_RAID5)
560 factor = num_stripes - 1;
561 else if (type == BLOCK_FLAG_RAID6)
562 factor = num_stripes - 2;
563 else
564 factor = 1; // SINGLE, DUPLICATE, RAID1, RAID1C3, RAID1C4
565
566 if (stripe_size * factor > max_chunk_size)
567 stripe_size = max_chunk_size / factor;
568
569 if (stripe_size % stripe_length > 0)
570 stripe_size -= stripe_size % stripe_length;
571
572 if (stripe_size == 0) {
573 ERR("not enough free space found (stripe_size == 0)\n");
574 Status = STATUS_DISK_FULL;
575 goto end;
576 }
577
578 c->chunk_item->size = stripe_size * factor;
579 c->chunk_item->root_id = Vcb->extent_root->id;
580 c->chunk_item->stripe_length = stripe_length;
581 c->chunk_item->type = flags;
582 c->chunk_item->opt_io_alignment = (uint32_t)c->chunk_item->stripe_length;
583 c->chunk_item->opt_io_width = (uint32_t)c->chunk_item->stripe_length;
584 c->chunk_item->sector_size = stripes[0].device->devitem.minimal_io_size;
585 c->chunk_item->num_stripes = num_stripes;
586 c->chunk_item->sub_stripes = sub_stripes;
587
588 c->devices = ExAllocatePoolWithTag(NonPagedPool, sizeof(device*) * num_stripes, ALLOC_TAG);
589 if (!c->devices) {
590 ERR("out of memory\n");
591 Status = STATUS_INSUFFICIENT_RESOURCES;
592 goto end;
593 }
594
595 cis = (CHUNK_ITEM_STRIPE*)&c->chunk_item[1];
596 for (i = 0; i < num_stripes; i++) {
597 cis[i].dev_id = stripes[i].device->devitem.dev_id;
598
599 if (type == BLOCK_FLAG_DUPLICATE && i == 1 && stripes[i].dh == stripes[0].dh)
600 cis[i].offset = stripes[0].dh->address + stripe_size;
601 else
602 cis[i].offset = stripes[i].dh->address;
603
604 cis[i].dev_uuid = stripes[i].device->devitem.device_uuid;
605
606 c->devices[i] = stripes[i].device;
607 }
608
609 logaddr = find_new_chunk_address(Vcb, c->chunk_item->size);
610
611 Vcb->superblock.chunk_root_generation = Vcb->superblock.generation;
612
613 c->size = cisize;
614 c->offset = logaddr;
615 c->used = c->oldused = 0;
616 c->cache = c->old_cache = NULL;
617 c->readonly = false;
618 c->reloc = false;
619 c->last_alloc_set = false;
620 c->last_stripe = 0;
621 c->cache_loaded = true;
622 c->changed = false;
623 c->space_changed = false;
624 c->balance_num = 0;
625
626 InitializeListHead(&c->space);
627 InitializeListHead(&c->space_size);
628 InitializeListHead(&c->deleting);
629 InitializeListHead(&c->changed_extents);
630
631 InitializeListHead(&c->range_locks);
632 ExInitializeResourceLite(&c->range_locks_lock);
633 KeInitializeEvent(&c->range_locks_event, NotificationEvent, false);
634
635 InitializeListHead(&c->partial_stripes);
636 ExInitializeResourceLite(&c->partial_stripes_lock);
637
638 ExInitializeResourceLite(&c->lock);
639 ExInitializeResourceLite(&c->changed_extents_lock);
640
641 s = ExAllocatePoolWithTag(NonPagedPool, sizeof(space), ALLOC_TAG);
642 if (!s) {
643 ERR("out of memory\n");
644 Status = STATUS_INSUFFICIENT_RESOURCES;
645 goto end;
646 }
647
648 s->address = c->offset;
649 s->size = c->chunk_item->size;
650 InsertTailList(&c->space, &s->list_entry);
651 InsertTailList(&c->space_size, &s->list_entry_size);
652
653 protect_superblocks(c);
654
655 for (i = 0; i < num_stripes; i++) {
656 stripes[i].device->devitem.bytes_used += stripe_size;
657
658 space_list_subtract2(&stripes[i].device->space, NULL, cis[i].offset, stripe_size, NULL, NULL);
659 }
660
661 Status = STATUS_SUCCESS;
662
663 if (flags & BLOCK_FLAG_RAID5 || flags & BLOCK_FLAG_RAID6)
664 Vcb->superblock.incompat_flags |= BTRFS_INCOMPAT_FLAGS_RAID56;
665
666 end:
667 if (stripes)
668 ExFreePool(stripes);
669
670 if (!NT_SUCCESS(Status)) {
671 if (c) {
672 if (c->devices)
673 ExFreePool(c->devices);
674
675 if (c->chunk_item)
676 ExFreePool(c->chunk_item);
677
678 ExFreePool(c);
679 }
680
681 if (s) ExFreePool(s);
682 } else {
683 bool done = false;
684
685 le = Vcb->chunks.Flink;
686 while (le != &Vcb->chunks) {
687 chunk* c2 = CONTAINING_RECORD(le, chunk, list_entry);
688
689 if (c2->offset > c->offset) {
690 InsertHeadList(le->Blink, &c->list_entry);
691 done = true;
692 break;
693 }
694
695 le = le->Flink;
696 }
697
698 if (!done)
699 InsertTailList(&Vcb->chunks, &c->list_entry);
700
701 c->created = true;
702 c->changed = true;
703 c->space_changed = true;
704 c->list_entry_balance.Flink = NULL;
705
706 *pc = c;
707 }
708
709 return Status;
710 }
711
712 __attribute__((nonnull(1,3,5,8)))
713 static NTSTATUS prepare_raid0_write(_Pre_satisfies_(_Curr_->chunk_item->num_stripes>0) _In_ chunk* c, _In_ uint64_t address, _In_reads_bytes_(length) void* data,
714 _In_ uint32_t length, _In_ write_stripe* stripes, _In_ PIRP Irp, _In_ uint64_t irp_offset, _In_ write_data_context* wtc) {
715 uint64_t startoff, endoff;
716 uint16_t startoffstripe, endoffstripe, stripenum;
717 uint64_t pos, *stripeoff;
718 uint32_t i;
719 bool file_write = Irp && Irp->MdlAddress && (Irp->MdlAddress->ByteOffset == 0);
720 PMDL master_mdl;
721 PFN_NUMBER* pfns;
722
723 stripeoff = ExAllocatePoolWithTag(PagedPool, sizeof(uint64_t) * c->chunk_item->num_stripes, ALLOC_TAG);
724 if (!stripeoff) {
725 ERR("out of memory\n");
726 return STATUS_INSUFFICIENT_RESOURCES;
727 }
728
729 get_raid0_offset(address - c->offset, c->chunk_item->stripe_length, c->chunk_item->num_stripes, &startoff, &startoffstripe);
730 get_raid0_offset(address + length - c->offset - 1, c->chunk_item->stripe_length, c->chunk_item->num_stripes, &endoff, &endoffstripe);
731
732 if (file_write) {
733 master_mdl = Irp->MdlAddress;
734
735 pfns = (PFN_NUMBER*)(Irp->MdlAddress + 1);
736 pfns = &pfns[irp_offset >> PAGE_SHIFT];
737 } else if (((ULONG_PTR)data % PAGE_SIZE) != 0) {
738 wtc->scratch = ExAllocatePoolWithTag(NonPagedPool, length, ALLOC_TAG);
739 if (!wtc->scratch) {
740 ERR("out of memory\n");
741 return STATUS_INSUFFICIENT_RESOURCES;
742 }
743
744 RtlCopyMemory(wtc->scratch, data, length);
745
746 master_mdl = IoAllocateMdl(wtc->scratch, length, false, false, NULL);
747 if (!master_mdl) {
748 ERR("out of memory\n");
749 return STATUS_INSUFFICIENT_RESOURCES;
750 }
751
752 MmBuildMdlForNonPagedPool(master_mdl);
753
754 wtc->mdl = master_mdl;
755
756 pfns = (PFN_NUMBER*)(master_mdl + 1);
757 } else {
758 NTSTATUS Status = STATUS_SUCCESS;
759
760 master_mdl = IoAllocateMdl(data, length, false, false, NULL);
761 if (!master_mdl) {
762 ERR("out of memory\n");
763 return STATUS_INSUFFICIENT_RESOURCES;
764 }
765
766 _SEH2_TRY {
767 MmProbeAndLockPages(master_mdl, KernelMode, IoReadAccess);
_SEH2_EXCEPT(EXCEPTION_EXECUTE_HANDLER)768 } _SEH2_EXCEPT (EXCEPTION_EXECUTE_HANDLER) {
769 Status = _SEH2_GetExceptionCode();
770 } _SEH2_END;
771
772 if (!NT_SUCCESS(Status)) {
773 ERR("MmProbeAndLockPages threw exception %08lx\n", Status);
774 IoFreeMdl(master_mdl);
775 return Status;
776 }
777
778 wtc->mdl = master_mdl;
779
780 pfns = (PFN_NUMBER*)(master_mdl + 1);
781 }
782
783 for (i = 0; i < c->chunk_item->num_stripes; i++) {
784 if (startoffstripe > i)
785 stripes[i].start = startoff - (startoff % c->chunk_item->stripe_length) + c->chunk_item->stripe_length;
786 else if (startoffstripe == i)
787 stripes[i].start = startoff;
788 else
789 stripes[i].start = startoff - (startoff % c->chunk_item->stripe_length);
790
791 if (endoffstripe > i)
792 stripes[i].end = endoff - (endoff % c->chunk_item->stripe_length) + c->chunk_item->stripe_length;
793 else if (endoffstripe == i)
794 stripes[i].end = endoff + 1;
795 else
796 stripes[i].end = endoff - (endoff % c->chunk_item->stripe_length);
797
798 if (stripes[i].start != stripes[i].end) {
799 stripes[i].mdl = IoAllocateMdl(NULL, (ULONG)(stripes[i].end - stripes[i].start), false, false, NULL);
800 if (!stripes[i].mdl) {
801 ERR("IoAllocateMdl failed\n");
802 ExFreePool(stripeoff);
803 return STATUS_INSUFFICIENT_RESOURCES;
804 }
805 }
806 }
807
808 pos = 0;
809 RtlZeroMemory(stripeoff, sizeof(uint64_t) * c->chunk_item->num_stripes);
810
811 stripenum = startoffstripe;
812
813 while (pos < length) {
814 PFN_NUMBER* stripe_pfns = (PFN_NUMBER*)(stripes[stripenum].mdl + 1);
815
816 if (pos == 0) {
817 uint32_t writelen = (uint32_t)min(stripes[stripenum].end - stripes[stripenum].start,
818 c->chunk_item->stripe_length - (stripes[stripenum].start % c->chunk_item->stripe_length));
819
820 RtlCopyMemory(stripe_pfns, pfns, writelen * sizeof(PFN_NUMBER) >> PAGE_SHIFT);
821
822 stripeoff[stripenum] += writelen;
823 pos += writelen;
824 } else if (length - pos < c->chunk_item->stripe_length) {
825 RtlCopyMemory(&stripe_pfns[stripeoff[stripenum] >> PAGE_SHIFT], &pfns[pos >> PAGE_SHIFT], (ULONG)((length - pos) * sizeof(PFN_NUMBER) >> PAGE_SHIFT));
826 break;
827 } else {
828 RtlCopyMemory(&stripe_pfns[stripeoff[stripenum] >> PAGE_SHIFT], &pfns[pos >> PAGE_SHIFT], (ULONG)(c->chunk_item->stripe_length * sizeof(PFN_NUMBER) >> PAGE_SHIFT));
829
830 stripeoff[stripenum] += c->chunk_item->stripe_length;
831 pos += c->chunk_item->stripe_length;
832 }
833
834 stripenum = (stripenum + 1) % c->chunk_item->num_stripes;
835 }
836
837 ExFreePool(stripeoff);
838
839 return STATUS_SUCCESS;
840 }
841
842 __attribute__((nonnull(1,3,5,8)))
843 static NTSTATUS prepare_raid10_write(_Pre_satisfies_(_Curr_->chunk_item->sub_stripes>0&&_Curr_->chunk_item->num_stripes>=_Curr_->chunk_item->sub_stripes) _In_ chunk* c,
844 _In_ uint64_t address, _In_reads_bytes_(length) void* data, _In_ uint32_t length, _In_ write_stripe* stripes,
845 _In_ PIRP Irp, _In_ uint64_t irp_offset, _In_ write_data_context* wtc) {
846 uint64_t startoff, endoff;
847 uint16_t startoffstripe, endoffstripe, stripenum;
848 uint64_t pos, *stripeoff;
849 uint32_t i;
850 bool file_write = Irp && Irp->MdlAddress && (Irp->MdlAddress->ByteOffset == 0);
851 PMDL master_mdl;
852 PFN_NUMBER* pfns;
853
854 get_raid0_offset(address - c->offset, c->chunk_item->stripe_length, c->chunk_item->num_stripes / c->chunk_item->sub_stripes, &startoff, &startoffstripe);
855 get_raid0_offset(address + length - c->offset - 1, c->chunk_item->stripe_length, c->chunk_item->num_stripes / c->chunk_item->sub_stripes, &endoff, &endoffstripe);
856
857 stripenum = startoffstripe;
858 startoffstripe *= c->chunk_item->sub_stripes;
859 endoffstripe *= c->chunk_item->sub_stripes;
860
861 if (file_write) {
862 master_mdl = Irp->MdlAddress;
863
864 pfns = (PFN_NUMBER*)(Irp->MdlAddress + 1);
865 pfns = &pfns[irp_offset >> PAGE_SHIFT];
866 } else if (((ULONG_PTR)data % PAGE_SIZE) != 0) {
867 wtc->scratch = ExAllocatePoolWithTag(NonPagedPool, length, ALLOC_TAG);
868 if (!wtc->scratch) {
869 ERR("out of memory\n");
870 return STATUS_INSUFFICIENT_RESOURCES;
871 }
872
873 RtlCopyMemory(wtc->scratch, data, length);
874
875 master_mdl = IoAllocateMdl(wtc->scratch, length, false, false, NULL);
876 if (!master_mdl) {
877 ERR("out of memory\n");
878 return STATUS_INSUFFICIENT_RESOURCES;
879 }
880
881 MmBuildMdlForNonPagedPool(master_mdl);
882
883 wtc->mdl = master_mdl;
884
885 pfns = (PFN_NUMBER*)(master_mdl + 1);
886 } else {
887 NTSTATUS Status = STATUS_SUCCESS;
888
889 master_mdl = IoAllocateMdl(data, length, false, false, NULL);
890 if (!master_mdl) {
891 ERR("out of memory\n");
892 return STATUS_INSUFFICIENT_RESOURCES;
893 }
894
895 _SEH2_TRY {
896 MmProbeAndLockPages(master_mdl, KernelMode, IoReadAccess);
_SEH2_EXCEPT(EXCEPTION_EXECUTE_HANDLER)897 } _SEH2_EXCEPT (EXCEPTION_EXECUTE_HANDLER) {
898 Status = _SEH2_GetExceptionCode();
899 } _SEH2_END;
900
901 if (!NT_SUCCESS(Status)) {
902 ERR("MmProbeAndLockPages threw exception %08lx\n", Status);
903 IoFreeMdl(master_mdl);
904 return Status;
905 }
906
907 wtc->mdl = master_mdl;
908
909 pfns = (PFN_NUMBER*)(master_mdl + 1);
910 }
911
912 for (i = 0; i < c->chunk_item->num_stripes; i += c->chunk_item->sub_stripes) {
913 uint16_t j;
914
915 if (startoffstripe > i)
916 stripes[i].start = startoff - (startoff % c->chunk_item->stripe_length) + c->chunk_item->stripe_length;
917 else if (startoffstripe == i)
918 stripes[i].start = startoff;
919 else
920 stripes[i].start = startoff - (startoff % c->chunk_item->stripe_length);
921
922 if (endoffstripe > i)
923 stripes[i].end = endoff - (endoff % c->chunk_item->stripe_length) + c->chunk_item->stripe_length;
924 else if (endoffstripe == i)
925 stripes[i].end = endoff + 1;
926 else
927 stripes[i].end = endoff - (endoff % c->chunk_item->stripe_length);
928
929 stripes[i].mdl = IoAllocateMdl(NULL, (ULONG)(stripes[i].end - stripes[i].start), false, false, NULL);
930 if (!stripes[i].mdl) {
931 ERR("IoAllocateMdl failed\n");
932 return STATUS_INSUFFICIENT_RESOURCES;
933 }
934
935 for (j = 1; j < c->chunk_item->sub_stripes; j++) {
936 stripes[i+j].start = stripes[i].start;
937 stripes[i+j].end = stripes[i].end;
938 stripes[i+j].data = stripes[i].data;
939 stripes[i+j].mdl = stripes[i].mdl;
940 }
941 }
942
943 pos = 0;
944
945 stripeoff = ExAllocatePoolWithTag(PagedPool, sizeof(uint64_t) * c->chunk_item->num_stripes / c->chunk_item->sub_stripes, ALLOC_TAG);
946 if (!stripeoff) {
947 ERR("out of memory\n");
948 return STATUS_INSUFFICIENT_RESOURCES;
949 }
950
951 RtlZeroMemory(stripeoff, sizeof(uint64_t) * c->chunk_item->num_stripes / c->chunk_item->sub_stripes);
952
953 while (pos < length) {
954 PFN_NUMBER* stripe_pfns = (PFN_NUMBER*)(stripes[stripenum * c->chunk_item->sub_stripes].mdl + 1);
955
956 if (pos == 0) {
957 uint32_t writelen = (uint32_t)min(stripes[stripenum * c->chunk_item->sub_stripes].end - stripes[stripenum * c->chunk_item->sub_stripes].start,
958 c->chunk_item->stripe_length - (stripes[stripenum * c->chunk_item->sub_stripes].start % c->chunk_item->stripe_length));
959
960 RtlCopyMemory(stripe_pfns, pfns, writelen * sizeof(PFN_NUMBER) >> PAGE_SHIFT);
961
962 stripeoff[stripenum] += writelen;
963 pos += writelen;
964 } else if (length - pos < c->chunk_item->stripe_length) {
965 RtlCopyMemory(&stripe_pfns[stripeoff[stripenum] >> PAGE_SHIFT], &pfns[pos >> PAGE_SHIFT], (ULONG)((length - pos) * sizeof(PFN_NUMBER) >> PAGE_SHIFT));
966 break;
967 } else {
968 RtlCopyMemory(&stripe_pfns[stripeoff[stripenum] >> PAGE_SHIFT], &pfns[pos >> PAGE_SHIFT], (ULONG)(c->chunk_item->stripe_length * sizeof(PFN_NUMBER) >> PAGE_SHIFT));
969
970 stripeoff[stripenum] += c->chunk_item->stripe_length;
971 pos += c->chunk_item->stripe_length;
972 }
973
974 stripenum = (stripenum + 1) % (c->chunk_item->num_stripes / c->chunk_item->sub_stripes);
975 }
976
977 ExFreePool(stripeoff);
978
979 return STATUS_SUCCESS;
980 }
981
982 __attribute__((nonnull(1,2,5)))
add_partial_stripe(device_extension * Vcb,chunk * c,uint64_t address,uint32_t length,void * data)983 static NTSTATUS add_partial_stripe(device_extension* Vcb, chunk* c, uint64_t address, uint32_t length, void* data) {
984 NTSTATUS Status;
985 LIST_ENTRY* le;
986 partial_stripe* ps;
987 uint64_t stripe_addr;
988 uint16_t num_data_stripes;
989
990 num_data_stripes = c->chunk_item->num_stripes - (c->chunk_item->type & BLOCK_FLAG_RAID5 ? 1 : 2);
991 stripe_addr = address - ((address - c->offset) % (num_data_stripes * c->chunk_item->stripe_length));
992
993 ExAcquireResourceExclusiveLite(&c->partial_stripes_lock, true);
994
995 le = c->partial_stripes.Flink;
996 while (le != &c->partial_stripes) {
997 ps = CONTAINING_RECORD(le, partial_stripe, list_entry);
998
999 if (ps->address == stripe_addr) {
1000 // update existing entry
1001
1002 RtlCopyMemory(ps->data + address - stripe_addr, data, length);
1003 RtlClearBits(&ps->bmp, (ULONG)((address - stripe_addr) >> Vcb->sector_shift), length >> Vcb->sector_shift);
1004
1005 // if now filled, flush
1006 if (RtlAreBitsClear(&ps->bmp, 0, (ULONG)((num_data_stripes * c->chunk_item->stripe_length) >> Vcb->sector_shift))) {
1007 Status = flush_partial_stripe(Vcb, c, ps);
1008 if (!NT_SUCCESS(Status)) {
1009 ERR("flush_partial_stripe returned %08lx\n", Status);
1010 goto end;
1011 }
1012
1013 RemoveEntryList(&ps->list_entry);
1014
1015 if (ps->bmparr)
1016 ExFreePool(ps->bmparr);
1017
1018 ExFreePool(ps);
1019 }
1020
1021 Status = STATUS_SUCCESS;
1022 goto end;
1023 } else if (ps->address > stripe_addr)
1024 break;
1025
1026 le = le->Flink;
1027 }
1028
1029 // add new entry
1030
1031 ps = ExAllocatePoolWithTag(NonPagedPool, offsetof(partial_stripe, data[0]) + (ULONG)(num_data_stripes * c->chunk_item->stripe_length), ALLOC_TAG);
1032 if (!ps) {
1033 ERR("out of memory\n");
1034 Status = STATUS_INSUFFICIENT_RESOURCES;
1035 goto end;
1036 }
1037
1038 ps->bmplen = (ULONG)(num_data_stripes * c->chunk_item->stripe_length) >> Vcb->sector_shift;
1039
1040 ps->address = stripe_addr;
1041 ps->bmparr = ExAllocatePoolWithTag(NonPagedPool, (size_t)sector_align(((ps->bmplen / 8) + 1), sizeof(ULONG)), ALLOC_TAG);
1042 if (!ps->bmparr) {
1043 ERR("out of memory\n");
1044 ExFreePool(ps);
1045 Status = STATUS_INSUFFICIENT_RESOURCES;
1046 goto end;
1047 }
1048
1049 RtlInitializeBitMap(&ps->bmp, ps->bmparr, ps->bmplen);
1050 RtlSetAllBits(&ps->bmp);
1051
1052 RtlCopyMemory(ps->data + address - stripe_addr, data, length);
1053 RtlClearBits(&ps->bmp, (ULONG)((address - stripe_addr) >> Vcb->sector_shift), length >> Vcb->sector_shift);
1054
1055 InsertHeadList(le->Blink, &ps->list_entry);
1056
1057 Status = STATUS_SUCCESS;
1058
1059 end:
1060 ExReleaseResourceLite(&c->partial_stripes_lock);
1061
1062 return Status;
1063 }
1064
1065 typedef struct {
1066 PMDL mdl;
1067 PFN_NUMBER* pfns;
1068 } log_stripe;
1069
1070 __attribute__((nonnull(1,2,4,6,10)))
prepare_raid5_write(device_extension * Vcb,chunk * c,uint64_t address,void * data,uint32_t length,write_stripe * stripes,PIRP Irp,uint64_t irp_offset,ULONG priority,write_data_context * wtc)1071 static NTSTATUS prepare_raid5_write(device_extension* Vcb, chunk* c, uint64_t address, void* data, uint32_t length, write_stripe* stripes, PIRP Irp,
1072 uint64_t irp_offset, ULONG priority, write_data_context* wtc) {
1073 uint64_t startoff, endoff, parity_start, parity_end;
1074 uint16_t startoffstripe, endoffstripe, parity, num_data_stripes = c->chunk_item->num_stripes - 1;
1075 uint64_t pos, parity_pos, *stripeoff = NULL;
1076 uint32_t i;
1077 bool file_write = Irp && Irp->MdlAddress && (Irp->MdlAddress->ByteOffset == 0);
1078 PMDL master_mdl;
1079 NTSTATUS Status;
1080 PFN_NUMBER *pfns, *parity_pfns;
1081 log_stripe* log_stripes = NULL;
1082
1083 if ((address + length - c->offset) % (num_data_stripes * c->chunk_item->stripe_length) > 0) {
1084 uint64_t delta = (address + length - c->offset) % (num_data_stripes * c->chunk_item->stripe_length);
1085
1086 delta = min(length, delta);
1087 Status = add_partial_stripe(Vcb, c, address + length - delta, (uint32_t)delta, (uint8_t*)data + length - delta);
1088 if (!NT_SUCCESS(Status)) {
1089 ERR("add_partial_stripe returned %08lx\n", Status);
1090 goto exit;
1091 }
1092
1093 length -= (uint32_t)delta;
1094 }
1095
1096 if (length > 0 && (address - c->offset) % (num_data_stripes * c->chunk_item->stripe_length) > 0) {
1097 uint64_t delta = (num_data_stripes * c->chunk_item->stripe_length) - ((address - c->offset) % (num_data_stripes * c->chunk_item->stripe_length));
1098
1099 Status = add_partial_stripe(Vcb, c, address, (uint32_t)delta, data);
1100 if (!NT_SUCCESS(Status)) {
1101 ERR("add_partial_stripe returned %08lx\n", Status);
1102 goto exit;
1103 }
1104
1105 address += delta;
1106 length -= (uint32_t)delta;
1107 irp_offset += delta;
1108 data = (uint8_t*)data + delta;
1109 }
1110
1111 if (length == 0) {
1112 Status = STATUS_SUCCESS;
1113 goto exit;
1114 }
1115
1116 get_raid0_offset(address - c->offset, c->chunk_item->stripe_length, num_data_stripes, &startoff, &startoffstripe);
1117 get_raid0_offset(address + length - c->offset - 1, c->chunk_item->stripe_length, num_data_stripes, &endoff, &endoffstripe);
1118
1119 pos = 0;
1120 while (pos < length) {
1121 parity = (((address - c->offset + pos) / (num_data_stripes * c->chunk_item->stripe_length)) + num_data_stripes) % c->chunk_item->num_stripes;
1122
1123 if (pos == 0) {
1124 uint16_t stripe = (parity + startoffstripe + 1) % c->chunk_item->num_stripes;
1125 ULONG skip, writelen;
1126
1127 i = startoffstripe;
1128 while (stripe != parity) {
1129 if (i == startoffstripe) {
1130 writelen = (ULONG)min(length, c->chunk_item->stripe_length - (startoff % c->chunk_item->stripe_length));
1131
1132 stripes[stripe].start = startoff;
1133 stripes[stripe].end = startoff + writelen;
1134
1135 pos += writelen;
1136
1137 if (pos == length)
1138 break;
1139 } else {
1140 writelen = (ULONG)min(length - pos, c->chunk_item->stripe_length);
1141
1142 stripes[stripe].start = startoff - (startoff % c->chunk_item->stripe_length);
1143 stripes[stripe].end = stripes[stripe].start + writelen;
1144
1145 pos += writelen;
1146
1147 if (pos == length)
1148 break;
1149 }
1150
1151 i++;
1152 stripe = (stripe + 1) % c->chunk_item->num_stripes;
1153 }
1154
1155 if (pos == length)
1156 break;
1157
1158 for (i = 0; i < startoffstripe; i++) {
1159 stripe = (parity + i + 1) % c->chunk_item->num_stripes;
1160
1161 stripes[stripe].start = stripes[stripe].end = startoff - (startoff % c->chunk_item->stripe_length) + c->chunk_item->stripe_length;
1162 }
1163
1164 stripes[parity].start = stripes[parity].end = startoff - (startoff % c->chunk_item->stripe_length) + c->chunk_item->stripe_length;
1165
1166 if (length - pos > c->chunk_item->num_stripes * num_data_stripes * c->chunk_item->stripe_length) {
1167 skip = (ULONG)(((length - pos) / (c->chunk_item->num_stripes * num_data_stripes * c->chunk_item->stripe_length)) - 1);
1168
1169 for (i = 0; i < c->chunk_item->num_stripes; i++) {
1170 stripes[i].end += skip * c->chunk_item->num_stripes * c->chunk_item->stripe_length;
1171 }
1172
1173 pos += skip * num_data_stripes * c->chunk_item->num_stripes * c->chunk_item->stripe_length;
1174 }
1175 } else if (length - pos >= c->chunk_item->stripe_length * num_data_stripes) {
1176 for (i = 0; i < c->chunk_item->num_stripes; i++) {
1177 stripes[i].end += c->chunk_item->stripe_length;
1178 }
1179
1180 pos += c->chunk_item->stripe_length * num_data_stripes;
1181 } else {
1182 uint16_t stripe = (parity + 1) % c->chunk_item->num_stripes;
1183
1184 i = 0;
1185 while (stripe != parity) {
1186 if (endoffstripe == i) {
1187 stripes[stripe].end = endoff + 1;
1188 break;
1189 } else if (endoffstripe > i)
1190 stripes[stripe].end = endoff - (endoff % c->chunk_item->stripe_length) + c->chunk_item->stripe_length;
1191
1192 i++;
1193 stripe = (stripe + 1) % c->chunk_item->num_stripes;
1194 }
1195
1196 break;
1197 }
1198 }
1199
1200 parity_start = 0xffffffffffffffff;
1201 parity_end = 0;
1202
1203 for (i = 0; i < c->chunk_item->num_stripes; i++) {
1204 if (stripes[i].start != 0 || stripes[i].end != 0) {
1205 parity_start = min(stripes[i].start, parity_start);
1206 parity_end = max(stripes[i].end, parity_end);
1207 }
1208 }
1209
1210 if (parity_end == parity_start) {
1211 Status = STATUS_SUCCESS;
1212 goto exit;
1213 }
1214
1215 parity = (((address - c->offset) / (num_data_stripes * c->chunk_item->stripe_length)) + num_data_stripes) % c->chunk_item->num_stripes;
1216 stripes[parity].start = parity_start;
1217
1218 parity = (((address - c->offset + length - 1) / (num_data_stripes * c->chunk_item->stripe_length)) + num_data_stripes) % c->chunk_item->num_stripes;
1219 stripes[parity].end = parity_end;
1220
1221 log_stripes = ExAllocatePoolWithTag(NonPagedPool, sizeof(log_stripe) * num_data_stripes, ALLOC_TAG);
1222 if (!log_stripes) {
1223 ERR("out of memory\n");
1224 Status = STATUS_INSUFFICIENT_RESOURCES;
1225 goto exit;
1226 }
1227
1228 RtlZeroMemory(log_stripes, sizeof(log_stripe) * num_data_stripes);
1229
1230 for (i = 0; i < num_data_stripes; i++) {
1231 log_stripes[i].mdl = IoAllocateMdl(NULL, (ULONG)(parity_end - parity_start), false, false, NULL);
1232 if (!log_stripes[i].mdl) {
1233 ERR("out of memory\n");
1234 Status = STATUS_INSUFFICIENT_RESOURCES;
1235 goto exit;
1236 }
1237
1238 log_stripes[i].mdl->MdlFlags |= MDL_PARTIAL;
1239 log_stripes[i].pfns = (PFN_NUMBER*)(log_stripes[i].mdl + 1);
1240 }
1241
1242 wtc->parity1 = ExAllocatePoolWithTag(NonPagedPool, (ULONG)(parity_end - parity_start), ALLOC_TAG);
1243 if (!wtc->parity1) {
1244 ERR("out of memory\n");
1245 Status = STATUS_INSUFFICIENT_RESOURCES;
1246 goto exit;
1247 }
1248
1249 wtc->parity1_mdl = IoAllocateMdl(wtc->parity1, (ULONG)(parity_end - parity_start), false, false, NULL);
1250 if (!wtc->parity1_mdl) {
1251 ERR("out of memory\n");
1252 Status = STATUS_INSUFFICIENT_RESOURCES;
1253 goto exit;
1254 }
1255
1256 MmBuildMdlForNonPagedPool(wtc->parity1_mdl);
1257
1258 if (file_write)
1259 master_mdl = Irp->MdlAddress;
1260 else if (((ULONG_PTR)data % PAGE_SIZE) != 0) {
1261 wtc->scratch = ExAllocatePoolWithTag(NonPagedPool, length, ALLOC_TAG);
1262 if (!wtc->scratch) {
1263 ERR("out of memory\n");
1264 Status = STATUS_INSUFFICIENT_RESOURCES;
1265 goto exit;
1266 }
1267
1268 RtlCopyMemory(wtc->scratch, data, length);
1269
1270 master_mdl = IoAllocateMdl(wtc->scratch, length, false, false, NULL);
1271 if (!master_mdl) {
1272 ERR("out of memory\n");
1273 Status = STATUS_INSUFFICIENT_RESOURCES;
1274 goto exit;
1275 }
1276
1277 MmBuildMdlForNonPagedPool(master_mdl);
1278
1279 wtc->mdl = master_mdl;
1280 } else {
1281 master_mdl = IoAllocateMdl(data, length, false, false, NULL);
1282 if (!master_mdl) {
1283 ERR("out of memory\n");
1284 Status = STATUS_INSUFFICIENT_RESOURCES;
1285 goto exit;
1286 }
1287
1288 Status = STATUS_SUCCESS;
1289
1290 _SEH2_TRY {
1291 MmProbeAndLockPages(master_mdl, KernelMode, IoReadAccess);
1292 } _SEH2_EXCEPT (EXCEPTION_EXECUTE_HANDLER) {
1293 Status = _SEH2_GetExceptionCode();
1294 } _SEH2_END;
1295
1296 if (!NT_SUCCESS(Status)) {
1297 ERR("MmProbeAndLockPages threw exception %08lx\n", Status);
1298 IoFreeMdl(master_mdl);
1299 return Status;
1300 }
1301
1302 wtc->mdl = master_mdl;
1303 }
1304
1305 pfns = (PFN_NUMBER*)(master_mdl + 1);
1306 parity_pfns = (PFN_NUMBER*)(wtc->parity1_mdl + 1);
1307
1308 if (file_write)
1309 pfns = &pfns[irp_offset >> PAGE_SHIFT];
1310
1311 for (i = 0; i < c->chunk_item->num_stripes; i++) {
1312 if (stripes[i].start != stripes[i].end) {
1313 stripes[i].mdl = IoAllocateMdl((uint8_t*)MmGetMdlVirtualAddress(master_mdl) + irp_offset, (ULONG)(stripes[i].end - stripes[i].start), false, false, NULL);
1314 if (!stripes[i].mdl) {
1315 ERR("IoAllocateMdl failed\n");
1316 Status = STATUS_INSUFFICIENT_RESOURCES;
1317 goto exit;
1318 }
1319 }
1320 }
1321
1322 stripeoff = ExAllocatePoolWithTag(PagedPool, sizeof(uint64_t) * c->chunk_item->num_stripes, ALLOC_TAG);
1323 if (!stripeoff) {
1324 ERR("out of memory\n");
1325 Status = STATUS_INSUFFICIENT_RESOURCES;
1326 goto exit;
1327 }
1328
1329 RtlZeroMemory(stripeoff, sizeof(uint64_t) * c->chunk_item->num_stripes);
1330
1331 pos = 0;
1332 parity_pos = 0;
1333
1334 while (pos < length) {
1335 PFN_NUMBER* stripe_pfns;
1336
1337 parity = (((address - c->offset + pos) / (num_data_stripes * c->chunk_item->stripe_length)) + num_data_stripes) % c->chunk_item->num_stripes;
1338
1339 if (pos == 0) {
1340 uint16_t stripe = (parity + startoffstripe + 1) % c->chunk_item->num_stripes;
1341 uint32_t writelen = (uint32_t)min(length - pos, min(stripes[stripe].end - stripes[stripe].start,
1342 c->chunk_item->stripe_length - (stripes[stripe].start % c->chunk_item->stripe_length)));
1343 uint32_t maxwritelen = writelen;
1344
1345 stripe_pfns = (PFN_NUMBER*)(stripes[stripe].mdl + 1);
1346
1347 RtlCopyMemory(stripe_pfns, pfns, writelen * sizeof(PFN_NUMBER) >> PAGE_SHIFT);
1348
1349 RtlCopyMemory(log_stripes[startoffstripe].pfns, pfns, writelen * sizeof(PFN_NUMBER) >> PAGE_SHIFT);
1350 log_stripes[startoffstripe].pfns += writelen >> PAGE_SHIFT;
1351
1352 stripeoff[stripe] = writelen;
1353 pos += writelen;
1354
1355 stripe = (stripe + 1) % c->chunk_item->num_stripes;
1356 i = startoffstripe + 1;
1357
1358 while (stripe != parity) {
1359 stripe_pfns = (PFN_NUMBER*)(stripes[stripe].mdl + 1);
1360 writelen = (uint32_t)min(length - pos, min(stripes[stripe].end - stripes[stripe].start, c->chunk_item->stripe_length));
1361
1362 if (writelen == 0)
1363 break;
1364
1365 if (writelen > maxwritelen)
1366 maxwritelen = writelen;
1367
1368 RtlCopyMemory(stripe_pfns, &pfns[pos >> PAGE_SHIFT], writelen * sizeof(PFN_NUMBER) >> PAGE_SHIFT);
1369
1370 RtlCopyMemory(log_stripes[i].pfns, &pfns[pos >> PAGE_SHIFT], writelen * sizeof(PFN_NUMBER) >> PAGE_SHIFT);
1371 log_stripes[i].pfns += writelen >> PAGE_SHIFT;
1372
1373 stripeoff[stripe] = writelen;
1374 pos += writelen;
1375
1376 stripe = (stripe + 1) % c->chunk_item->num_stripes;
1377 i++;
1378 }
1379
1380 stripe_pfns = (PFN_NUMBER*)(stripes[parity].mdl + 1);
1381
1382 RtlCopyMemory(stripe_pfns, parity_pfns, maxwritelen * sizeof(PFN_NUMBER) >> PAGE_SHIFT);
1383 stripeoff[parity] = maxwritelen;
1384 parity_pos = maxwritelen;
1385 } else if (length - pos >= c->chunk_item->stripe_length * num_data_stripes) {
1386 uint16_t stripe = (parity + 1) % c->chunk_item->num_stripes;
1387
1388 i = 0;
1389 while (stripe != parity) {
1390 stripe_pfns = (PFN_NUMBER*)(stripes[stripe].mdl + 1);
1391
1392 RtlCopyMemory(&stripe_pfns[stripeoff[stripe] >> PAGE_SHIFT], &pfns[pos >> PAGE_SHIFT], (ULONG)(c->chunk_item->stripe_length * sizeof(PFN_NUMBER) >> PAGE_SHIFT));
1393
1394 RtlCopyMemory(log_stripes[i].pfns, &pfns[pos >> PAGE_SHIFT], (ULONG)(c->chunk_item->stripe_length * sizeof(PFN_NUMBER) >> PAGE_SHIFT));
1395 log_stripes[i].pfns += c->chunk_item->stripe_length >> PAGE_SHIFT;
1396
1397 stripeoff[stripe] += c->chunk_item->stripe_length;
1398 pos += c->chunk_item->stripe_length;
1399
1400 stripe = (stripe + 1) % c->chunk_item->num_stripes;
1401 i++;
1402 }
1403
1404 stripe_pfns = (PFN_NUMBER*)(stripes[parity].mdl + 1);
1405
1406 RtlCopyMemory(&stripe_pfns[stripeoff[parity] >> PAGE_SHIFT], &parity_pfns[parity_pos >> PAGE_SHIFT], (ULONG)(c->chunk_item->stripe_length * sizeof(PFN_NUMBER) >> PAGE_SHIFT));
1407 stripeoff[parity] += c->chunk_item->stripe_length;
1408 parity_pos += c->chunk_item->stripe_length;
1409 } else {
1410 uint16_t stripe = (parity + 1) % c->chunk_item->num_stripes;
1411 uint32_t writelen, maxwritelen = 0;
1412
1413 i = 0;
1414 while (pos < length) {
1415 stripe_pfns = (PFN_NUMBER*)(stripes[stripe].mdl + 1);
1416 writelen = (uint32_t)min(length - pos, min(stripes[stripe].end - stripes[stripe].start, c->chunk_item->stripe_length));
1417
1418 if (writelen == 0)
1419 break;
1420
1421 if (writelen > maxwritelen)
1422 maxwritelen = writelen;
1423
1424 RtlCopyMemory(&stripe_pfns[stripeoff[stripe] >> PAGE_SHIFT], &pfns[pos >> PAGE_SHIFT], writelen * sizeof(PFN_NUMBER) >> PAGE_SHIFT);
1425
1426 RtlCopyMemory(log_stripes[i].pfns, &pfns[pos >> PAGE_SHIFT], writelen * sizeof(PFN_NUMBER) >> PAGE_SHIFT);
1427 log_stripes[i].pfns += writelen >> PAGE_SHIFT;
1428
1429 stripeoff[stripe] += writelen;
1430 pos += writelen;
1431
1432 stripe = (stripe + 1) % c->chunk_item->num_stripes;
1433 i++;
1434 }
1435
1436 stripe_pfns = (PFN_NUMBER*)(stripes[parity].mdl + 1);
1437
1438 RtlCopyMemory(&stripe_pfns[stripeoff[parity] >> PAGE_SHIFT], &parity_pfns[parity_pos >> PAGE_SHIFT], maxwritelen * sizeof(PFN_NUMBER) >> PAGE_SHIFT);
1439 }
1440 }
1441
1442 for (i = 0; i < num_data_stripes; i++) {
1443 uint8_t* ss = MmGetSystemAddressForMdlSafe(log_stripes[i].mdl, priority);
1444
1445 if (i == 0)
1446 RtlCopyMemory(wtc->parity1, ss, (uint32_t)(parity_end - parity_start));
1447 else
1448 do_xor(wtc->parity1, ss, (uint32_t)(parity_end - parity_start));
1449 }
1450
1451 Status = STATUS_SUCCESS;
1452
1453 exit:
1454 if (log_stripes) {
1455 for (i = 0; i < num_data_stripes; i++) {
1456 if (log_stripes[i].mdl)
1457 IoFreeMdl(log_stripes[i].mdl);
1458 }
1459
1460 ExFreePool(log_stripes);
1461 }
1462
1463 if (stripeoff)
1464 ExFreePool(stripeoff);
1465
1466 return Status;
1467 }
1468
1469 __attribute__((nonnull(1,2,4,6,10)))
prepare_raid6_write(device_extension * Vcb,chunk * c,uint64_t address,void * data,uint32_t length,write_stripe * stripes,PIRP Irp,uint64_t irp_offset,ULONG priority,write_data_context * wtc)1470 static NTSTATUS prepare_raid6_write(device_extension* Vcb, chunk* c, uint64_t address, void* data, uint32_t length, write_stripe* stripes, PIRP Irp,
1471 uint64_t irp_offset, ULONG priority, write_data_context* wtc) {
1472 uint64_t startoff, endoff, parity_start, parity_end;
1473 uint16_t startoffstripe, endoffstripe, parity1, num_data_stripes = c->chunk_item->num_stripes - 2;
1474 uint64_t pos, parity_pos, *stripeoff = NULL;
1475 uint32_t i;
1476 bool file_write = Irp && Irp->MdlAddress && (Irp->MdlAddress->ByteOffset == 0);
1477 PMDL master_mdl;
1478 NTSTATUS Status;
1479 PFN_NUMBER *pfns, *parity1_pfns, *parity2_pfns;
1480 log_stripe* log_stripes = NULL;
1481
1482 if ((address + length - c->offset) % (num_data_stripes * c->chunk_item->stripe_length) > 0) {
1483 uint64_t delta = (address + length - c->offset) % (num_data_stripes * c->chunk_item->stripe_length);
1484
1485 delta = min(length, delta);
1486 Status = add_partial_stripe(Vcb, c, address + length - delta, (uint32_t)delta, (uint8_t*)data + length - delta);
1487 if (!NT_SUCCESS(Status)) {
1488 ERR("add_partial_stripe returned %08lx\n", Status);
1489 goto exit;
1490 }
1491
1492 length -= (uint32_t)delta;
1493 }
1494
1495 if (length > 0 && (address - c->offset) % (num_data_stripes * c->chunk_item->stripe_length) > 0) {
1496 uint64_t delta = (num_data_stripes * c->chunk_item->stripe_length) - ((address - c->offset) % (num_data_stripes * c->chunk_item->stripe_length));
1497
1498 Status = add_partial_stripe(Vcb, c, address, (uint32_t)delta, data);
1499 if (!NT_SUCCESS(Status)) {
1500 ERR("add_partial_stripe returned %08lx\n", Status);
1501 goto exit;
1502 }
1503
1504 address += delta;
1505 length -= (uint32_t)delta;
1506 irp_offset += delta;
1507 data = (uint8_t*)data + delta;
1508 }
1509
1510 if (length == 0) {
1511 Status = STATUS_SUCCESS;
1512 goto exit;
1513 }
1514
1515 get_raid0_offset(address - c->offset, c->chunk_item->stripe_length, num_data_stripes, &startoff, &startoffstripe);
1516 get_raid0_offset(address + length - c->offset - 1, c->chunk_item->stripe_length, num_data_stripes, &endoff, &endoffstripe);
1517
1518 pos = 0;
1519 while (pos < length) {
1520 parity1 = (((address - c->offset + pos) / (num_data_stripes * c->chunk_item->stripe_length)) + num_data_stripes) % c->chunk_item->num_stripes;
1521
1522 if (pos == 0) {
1523 uint16_t stripe = (parity1 + startoffstripe + 2) % c->chunk_item->num_stripes;
1524 uint16_t parity2 = (parity1 + 1) % c->chunk_item->num_stripes;
1525 ULONG skip, writelen;
1526
1527 i = startoffstripe;
1528 while (stripe != parity1) {
1529 if (i == startoffstripe) {
1530 writelen = (ULONG)min(length, c->chunk_item->stripe_length - (startoff % c->chunk_item->stripe_length));
1531
1532 stripes[stripe].start = startoff;
1533 stripes[stripe].end = startoff + writelen;
1534
1535 pos += writelen;
1536
1537 if (pos == length)
1538 break;
1539 } else {
1540 writelen = (ULONG)min(length - pos, c->chunk_item->stripe_length);
1541
1542 stripes[stripe].start = startoff - (startoff % c->chunk_item->stripe_length);
1543 stripes[stripe].end = stripes[stripe].start + writelen;
1544
1545 pos += writelen;
1546
1547 if (pos == length)
1548 break;
1549 }
1550
1551 i++;
1552 stripe = (stripe + 1) % c->chunk_item->num_stripes;
1553 }
1554
1555 if (pos == length)
1556 break;
1557
1558 for (i = 0; i < startoffstripe; i++) {
1559 stripe = (parity1 + i + 2) % c->chunk_item->num_stripes;
1560
1561 stripes[stripe].start = stripes[stripe].end = startoff - (startoff % c->chunk_item->stripe_length) + c->chunk_item->stripe_length;
1562 }
1563
1564 stripes[parity1].start = stripes[parity1].end = stripes[parity2].start = stripes[parity2].end =
1565 startoff - (startoff % c->chunk_item->stripe_length) + c->chunk_item->stripe_length;
1566
1567 if (length - pos > c->chunk_item->num_stripes * num_data_stripes * c->chunk_item->stripe_length) {
1568 skip = (ULONG)(((length - pos) / (c->chunk_item->num_stripes * num_data_stripes * c->chunk_item->stripe_length)) - 1);
1569
1570 for (i = 0; i < c->chunk_item->num_stripes; i++) {
1571 stripes[i].end += skip * c->chunk_item->num_stripes * c->chunk_item->stripe_length;
1572 }
1573
1574 pos += skip * num_data_stripes * c->chunk_item->num_stripes * c->chunk_item->stripe_length;
1575 }
1576 } else if (length - pos >= c->chunk_item->stripe_length * num_data_stripes) {
1577 for (i = 0; i < c->chunk_item->num_stripes; i++) {
1578 stripes[i].end += c->chunk_item->stripe_length;
1579 }
1580
1581 pos += c->chunk_item->stripe_length * num_data_stripes;
1582 } else {
1583 uint16_t stripe = (parity1 + 2) % c->chunk_item->num_stripes;
1584
1585 i = 0;
1586 while (stripe != parity1) {
1587 if (endoffstripe == i) {
1588 stripes[stripe].end = endoff + 1;
1589 break;
1590 } else if (endoffstripe > i)
1591 stripes[stripe].end = endoff - (endoff % c->chunk_item->stripe_length) + c->chunk_item->stripe_length;
1592
1593 i++;
1594 stripe = (stripe + 1) % c->chunk_item->num_stripes;
1595 }
1596
1597 break;
1598 }
1599 }
1600
1601 parity_start = 0xffffffffffffffff;
1602 parity_end = 0;
1603
1604 for (i = 0; i < c->chunk_item->num_stripes; i++) {
1605 if (stripes[i].start != 0 || stripes[i].end != 0) {
1606 parity_start = min(stripes[i].start, parity_start);
1607 parity_end = max(stripes[i].end, parity_end);
1608 }
1609 }
1610
1611 if (parity_end == parity_start) {
1612 Status = STATUS_SUCCESS;
1613 goto exit;
1614 }
1615
1616 parity1 = (((address - c->offset) / (num_data_stripes * c->chunk_item->stripe_length)) + num_data_stripes) % c->chunk_item->num_stripes;
1617 stripes[parity1].start = stripes[(parity1 + 1) % c->chunk_item->num_stripes].start = parity_start;
1618
1619 parity1 = (((address - c->offset + length - 1) / (num_data_stripes * c->chunk_item->stripe_length)) + num_data_stripes) % c->chunk_item->num_stripes;
1620 stripes[parity1].end = stripes[(parity1 + 1) % c->chunk_item->num_stripes].end = parity_end;
1621
1622 log_stripes = ExAllocatePoolWithTag(NonPagedPool, sizeof(log_stripe) * num_data_stripes, ALLOC_TAG);
1623 if (!log_stripes) {
1624 ERR("out of memory\n");
1625 Status = STATUS_INSUFFICIENT_RESOURCES;
1626 goto exit;
1627 }
1628
1629 RtlZeroMemory(log_stripes, sizeof(log_stripe) * num_data_stripes);
1630
1631 for (i = 0; i < num_data_stripes; i++) {
1632 log_stripes[i].mdl = IoAllocateMdl(NULL, (ULONG)(parity_end - parity_start), false, false, NULL);
1633 if (!log_stripes[i].mdl) {
1634 ERR("out of memory\n");
1635 Status = STATUS_INSUFFICIENT_RESOURCES;
1636 goto exit;
1637 }
1638
1639 log_stripes[i].mdl->MdlFlags |= MDL_PARTIAL;
1640 log_stripes[i].pfns = (PFN_NUMBER*)(log_stripes[i].mdl + 1);
1641 }
1642
1643 wtc->parity1 = ExAllocatePoolWithTag(NonPagedPool, (ULONG)(parity_end - parity_start), ALLOC_TAG);
1644 if (!wtc->parity1) {
1645 ERR("out of memory\n");
1646 Status = STATUS_INSUFFICIENT_RESOURCES;
1647 goto exit;
1648 }
1649
1650 wtc->parity2 = ExAllocatePoolWithTag(NonPagedPool, (ULONG)(parity_end - parity_start), ALLOC_TAG);
1651 if (!wtc->parity2) {
1652 ERR("out of memory\n");
1653 Status = STATUS_INSUFFICIENT_RESOURCES;
1654 goto exit;
1655 }
1656
1657 wtc->parity1_mdl = IoAllocateMdl(wtc->parity1, (ULONG)(parity_end - parity_start), false, false, NULL);
1658 if (!wtc->parity1_mdl) {
1659 ERR("out of memory\n");
1660 Status = STATUS_INSUFFICIENT_RESOURCES;
1661 goto exit;
1662 }
1663
1664 MmBuildMdlForNonPagedPool(wtc->parity1_mdl);
1665
1666 wtc->parity2_mdl = IoAllocateMdl(wtc->parity2, (ULONG)(parity_end - parity_start), false, false, NULL);
1667 if (!wtc->parity2_mdl) {
1668 ERR("out of memory\n");
1669 Status = STATUS_INSUFFICIENT_RESOURCES;
1670 goto exit;
1671 }
1672
1673 MmBuildMdlForNonPagedPool(wtc->parity2_mdl);
1674
1675 if (file_write)
1676 master_mdl = Irp->MdlAddress;
1677 else if (((ULONG_PTR)data % PAGE_SIZE) != 0) {
1678 wtc->scratch = ExAllocatePoolWithTag(NonPagedPool, length, ALLOC_TAG);
1679 if (!wtc->scratch) {
1680 ERR("out of memory\n");
1681 Status = STATUS_INSUFFICIENT_RESOURCES;
1682 goto exit;
1683 }
1684
1685 RtlCopyMemory(wtc->scratch, data, length);
1686
1687 master_mdl = IoAllocateMdl(wtc->scratch, length, false, false, NULL);
1688 if (!master_mdl) {
1689 ERR("out of memory\n");
1690 Status = STATUS_INSUFFICIENT_RESOURCES;
1691 goto exit;
1692 }
1693
1694 MmBuildMdlForNonPagedPool(master_mdl);
1695
1696 wtc->mdl = master_mdl;
1697 } else {
1698 master_mdl = IoAllocateMdl(data, length, false, false, NULL);
1699 if (!master_mdl) {
1700 ERR("out of memory\n");
1701 Status = STATUS_INSUFFICIENT_RESOURCES;
1702 goto exit;
1703 }
1704
1705 Status = STATUS_SUCCESS;
1706
1707 _SEH2_TRY {
1708 MmProbeAndLockPages(master_mdl, KernelMode, IoReadAccess);
1709 } _SEH2_EXCEPT (EXCEPTION_EXECUTE_HANDLER) {
1710 Status = _SEH2_GetExceptionCode();
1711 } _SEH2_END;
1712
1713 if (!NT_SUCCESS(Status)) {
1714 ERR("MmProbeAndLockPages threw exception %08lx\n", Status);
1715 IoFreeMdl(master_mdl);
1716 goto exit;
1717 }
1718
1719 wtc->mdl = master_mdl;
1720 }
1721
1722 pfns = (PFN_NUMBER*)(master_mdl + 1);
1723 parity1_pfns = (PFN_NUMBER*)(wtc->parity1_mdl + 1);
1724 parity2_pfns = (PFN_NUMBER*)(wtc->parity2_mdl + 1);
1725
1726 if (file_write)
1727 pfns = &pfns[irp_offset >> PAGE_SHIFT];
1728
1729 for (i = 0; i < c->chunk_item->num_stripes; i++) {
1730 if (stripes[i].start != stripes[i].end) {
1731 stripes[i].mdl = IoAllocateMdl((uint8_t*)MmGetMdlVirtualAddress(master_mdl) + irp_offset, (ULONG)(stripes[i].end - stripes[i].start), false, false, NULL);
1732 if (!stripes[i].mdl) {
1733 ERR("IoAllocateMdl failed\n");
1734 Status = STATUS_INSUFFICIENT_RESOURCES;
1735 goto exit;
1736 }
1737 }
1738 }
1739
1740 stripeoff = ExAllocatePoolWithTag(PagedPool, sizeof(uint64_t) * c->chunk_item->num_stripes, ALLOC_TAG);
1741 if (!stripeoff) {
1742 ERR("out of memory\n");
1743 Status = STATUS_INSUFFICIENT_RESOURCES;
1744 goto exit;
1745 }
1746
1747 RtlZeroMemory(stripeoff, sizeof(uint64_t) * c->chunk_item->num_stripes);
1748
1749 pos = 0;
1750 parity_pos = 0;
1751
1752 while (pos < length) {
1753 PFN_NUMBER* stripe_pfns;
1754
1755 parity1 = (((address - c->offset + pos) / (num_data_stripes * c->chunk_item->stripe_length)) + num_data_stripes) % c->chunk_item->num_stripes;
1756
1757 if (pos == 0) {
1758 uint16_t stripe = (parity1 + startoffstripe + 2) % c->chunk_item->num_stripes, parity2;
1759 uint32_t writelen = (uint32_t)min(length - pos, min(stripes[stripe].end - stripes[stripe].start,
1760 c->chunk_item->stripe_length - (stripes[stripe].start % c->chunk_item->stripe_length)));
1761 uint32_t maxwritelen = writelen;
1762
1763 stripe_pfns = (PFN_NUMBER*)(stripes[stripe].mdl + 1);
1764
1765 RtlCopyMemory(stripe_pfns, pfns, writelen * sizeof(PFN_NUMBER) >> PAGE_SHIFT);
1766
1767 RtlCopyMemory(log_stripes[startoffstripe].pfns, pfns, writelen * sizeof(PFN_NUMBER) >> PAGE_SHIFT);
1768 log_stripes[startoffstripe].pfns += writelen >> PAGE_SHIFT;
1769
1770 stripeoff[stripe] = writelen;
1771 pos += writelen;
1772
1773 stripe = (stripe + 1) % c->chunk_item->num_stripes;
1774 i = startoffstripe + 1;
1775
1776 while (stripe != parity1) {
1777 stripe_pfns = (PFN_NUMBER*)(stripes[stripe].mdl + 1);
1778 writelen = (uint32_t)min(length - pos, min(stripes[stripe].end - stripes[stripe].start, c->chunk_item->stripe_length));
1779
1780 if (writelen == 0)
1781 break;
1782
1783 if (writelen > maxwritelen)
1784 maxwritelen = writelen;
1785
1786 RtlCopyMemory(stripe_pfns, &pfns[pos >> PAGE_SHIFT], writelen * sizeof(PFN_NUMBER) >> PAGE_SHIFT);
1787
1788 RtlCopyMemory(log_stripes[i].pfns, &pfns[pos >> PAGE_SHIFT], writelen * sizeof(PFN_NUMBER) >> PAGE_SHIFT);
1789 log_stripes[i].pfns += writelen >> PAGE_SHIFT;
1790
1791 stripeoff[stripe] = writelen;
1792 pos += writelen;
1793
1794 stripe = (stripe + 1) % c->chunk_item->num_stripes;
1795 i++;
1796 }
1797
1798 stripe_pfns = (PFN_NUMBER*)(stripes[parity1].mdl + 1);
1799 RtlCopyMemory(stripe_pfns, parity1_pfns, maxwritelen * sizeof(PFN_NUMBER) >> PAGE_SHIFT);
1800 stripeoff[parity1] = maxwritelen;
1801
1802 parity2 = (parity1 + 1) % c->chunk_item->num_stripes;
1803
1804 stripe_pfns = (PFN_NUMBER*)(stripes[parity2].mdl + 1);
1805 RtlCopyMemory(stripe_pfns, parity2_pfns, maxwritelen * sizeof(PFN_NUMBER) >> PAGE_SHIFT);
1806 stripeoff[parity2] = maxwritelen;
1807
1808 parity_pos = maxwritelen;
1809 } else if (length - pos >= c->chunk_item->stripe_length * num_data_stripes) {
1810 uint16_t stripe = (parity1 + 2) % c->chunk_item->num_stripes, parity2;
1811
1812 i = 0;
1813 while (stripe != parity1) {
1814 stripe_pfns = (PFN_NUMBER*)(stripes[stripe].mdl + 1);
1815
1816 RtlCopyMemory(&stripe_pfns[stripeoff[stripe] >> PAGE_SHIFT], &pfns[pos >> PAGE_SHIFT], (ULONG)(c->chunk_item->stripe_length * sizeof(PFN_NUMBER) >> PAGE_SHIFT));
1817
1818 RtlCopyMemory(log_stripes[i].pfns, &pfns[pos >> PAGE_SHIFT], (ULONG)(c->chunk_item->stripe_length * sizeof(PFN_NUMBER) >> PAGE_SHIFT));
1819 log_stripes[i].pfns += c->chunk_item->stripe_length >> PAGE_SHIFT;
1820
1821 stripeoff[stripe] += c->chunk_item->stripe_length;
1822 pos += c->chunk_item->stripe_length;
1823
1824 stripe = (stripe + 1) % c->chunk_item->num_stripes;
1825 i++;
1826 }
1827
1828 stripe_pfns = (PFN_NUMBER*)(stripes[parity1].mdl + 1);
1829 RtlCopyMemory(&stripe_pfns[stripeoff[parity1] >> PAGE_SHIFT], &parity1_pfns[parity_pos >> PAGE_SHIFT], (ULONG)(c->chunk_item->stripe_length * sizeof(PFN_NUMBER) >> PAGE_SHIFT));
1830 stripeoff[parity1] += c->chunk_item->stripe_length;
1831
1832 parity2 = (parity1 + 1) % c->chunk_item->num_stripes;
1833
1834 stripe_pfns = (PFN_NUMBER*)(stripes[parity2].mdl + 1);
1835 RtlCopyMemory(&stripe_pfns[stripeoff[parity2] >> PAGE_SHIFT], &parity2_pfns[parity_pos >> PAGE_SHIFT], (ULONG)(c->chunk_item->stripe_length * sizeof(PFN_NUMBER) >> PAGE_SHIFT));
1836 stripeoff[parity2] += c->chunk_item->stripe_length;
1837
1838 parity_pos += c->chunk_item->stripe_length;
1839 } else {
1840 uint16_t stripe = (parity1 + 2) % c->chunk_item->num_stripes, parity2;
1841 uint32_t writelen, maxwritelen = 0;
1842
1843 i = 0;
1844 while (pos < length) {
1845 stripe_pfns = (PFN_NUMBER*)(stripes[stripe].mdl + 1);
1846 writelen = (uint32_t)min(length - pos, min(stripes[stripe].end - stripes[stripe].start, c->chunk_item->stripe_length));
1847
1848 if (writelen == 0)
1849 break;
1850
1851 if (writelen > maxwritelen)
1852 maxwritelen = writelen;
1853
1854 RtlCopyMemory(&stripe_pfns[stripeoff[stripe] >> PAGE_SHIFT], &pfns[pos >> PAGE_SHIFT], writelen * sizeof(PFN_NUMBER) >> PAGE_SHIFT);
1855
1856 RtlCopyMemory(log_stripes[i].pfns, &pfns[pos >> PAGE_SHIFT], writelen * sizeof(PFN_NUMBER) >> PAGE_SHIFT);
1857 log_stripes[i].pfns += writelen >> PAGE_SHIFT;
1858
1859 stripeoff[stripe] += writelen;
1860 pos += writelen;
1861
1862 stripe = (stripe + 1) % c->chunk_item->num_stripes;
1863 i++;
1864 }
1865
1866 stripe_pfns = (PFN_NUMBER*)(stripes[parity1].mdl + 1);
1867 RtlCopyMemory(&stripe_pfns[stripeoff[parity1] >> PAGE_SHIFT], &parity1_pfns[parity_pos >> PAGE_SHIFT], maxwritelen * sizeof(PFN_NUMBER) >> PAGE_SHIFT);
1868
1869 parity2 = (parity1 + 1) % c->chunk_item->num_stripes;
1870
1871 stripe_pfns = (PFN_NUMBER*)(stripes[parity2].mdl + 1);
1872 RtlCopyMemory(&stripe_pfns[stripeoff[parity2] >> PAGE_SHIFT], &parity2_pfns[parity_pos >> PAGE_SHIFT], maxwritelen * sizeof(PFN_NUMBER) >> PAGE_SHIFT);
1873 }
1874 }
1875
1876 for (i = 0; i < num_data_stripes; i++) {
1877 uint8_t* ss = MmGetSystemAddressForMdlSafe(log_stripes[c->chunk_item->num_stripes - 3 - i].mdl, priority);
1878
1879 if (i == 0) {
1880 RtlCopyMemory(wtc->parity1, ss, (ULONG)(parity_end - parity_start));
1881 RtlCopyMemory(wtc->parity2, ss, (ULONG)(parity_end - parity_start));
1882 } else {
1883 do_xor(wtc->parity1, ss, (uint32_t)(parity_end - parity_start));
1884
1885 galois_double(wtc->parity2, (uint32_t)(parity_end - parity_start));
1886 do_xor(wtc->parity2, ss, (uint32_t)(parity_end - parity_start));
1887 }
1888 }
1889
1890 Status = STATUS_SUCCESS;
1891
1892 exit:
1893 if (log_stripes) {
1894 for (i = 0; i < num_data_stripes; i++) {
1895 if (log_stripes[i].mdl)
1896 IoFreeMdl(log_stripes[i].mdl);
1897 }
1898
1899 ExFreePool(log_stripes);
1900 }
1901
1902 if (stripeoff)
1903 ExFreePool(stripeoff);
1904
1905 return Status;
1906 }
1907
1908 __attribute__((nonnull(1,3,5)))
write_data(_In_ device_extension * Vcb,_In_ uint64_t address,_In_reads_bytes_ (length)void * data,_In_ uint32_t length,_In_ write_data_context * wtc,_In_opt_ PIRP Irp,_In_opt_ chunk * c,_In_ bool file_write,_In_ uint64_t irp_offset,_In_ ULONG priority)1909 NTSTATUS write_data(_In_ device_extension* Vcb, _In_ uint64_t address, _In_reads_bytes_(length) void* data, _In_ uint32_t length, _In_ write_data_context* wtc,
1910 _In_opt_ PIRP Irp, _In_opt_ chunk* c, _In_ bool file_write, _In_ uint64_t irp_offset, _In_ ULONG priority) {
1911 NTSTATUS Status;
1912 uint32_t i;
1913 CHUNK_ITEM_STRIPE* cis;
1914 write_stripe* stripes = NULL;
1915 uint64_t total_writing = 0;
1916 ULONG allowed_missing, missing;
1917
1918 TRACE("(%p, %I64x, %p, %x)\n", Vcb, address, data, length);
1919
1920 if (!c) {
1921 c = get_chunk_from_address(Vcb, address);
1922 if (!c) {
1923 ERR("could not get chunk for address %I64x\n", address);
1924 return STATUS_INTERNAL_ERROR;
1925 }
1926 }
1927
1928 stripes = ExAllocatePoolWithTag(PagedPool, sizeof(write_stripe) * c->chunk_item->num_stripes, ALLOC_TAG);
1929 if (!stripes) {
1930 ERR("out of memory\n");
1931 return STATUS_INSUFFICIENT_RESOURCES;
1932 }
1933
1934 RtlZeroMemory(stripes, sizeof(write_stripe) * c->chunk_item->num_stripes);
1935
1936 cis = (CHUNK_ITEM_STRIPE*)&c->chunk_item[1];
1937
1938 if (c->chunk_item->type & BLOCK_FLAG_RAID0) {
1939 Status = prepare_raid0_write(c, address, data, length, stripes, file_write ? Irp : NULL, irp_offset, wtc);
1940 if (!NT_SUCCESS(Status)) {
1941 ERR("prepare_raid0_write returned %08lx\n", Status);
1942 goto prepare_failed;
1943 }
1944
1945 allowed_missing = 0;
1946 } else if (c->chunk_item->type & BLOCK_FLAG_RAID10) {
1947 Status = prepare_raid10_write(c, address, data, length, stripes, file_write ? Irp : NULL, irp_offset, wtc);
1948 if (!NT_SUCCESS(Status)) {
1949 ERR("prepare_raid10_write returned %08lx\n", Status);
1950 goto prepare_failed;
1951 }
1952
1953 allowed_missing = 1;
1954 } else if (c->chunk_item->type & BLOCK_FLAG_RAID5) {
1955 Status = prepare_raid5_write(Vcb, c, address, data, length, stripes, file_write ? Irp : NULL, irp_offset, priority, wtc);
1956 if (!NT_SUCCESS(Status)) {
1957 ERR("prepare_raid5_write returned %08lx\n", Status);
1958 goto prepare_failed;
1959 }
1960
1961 allowed_missing = 1;
1962 } else if (c->chunk_item->type & BLOCK_FLAG_RAID6) {
1963 Status = prepare_raid6_write(Vcb, c, address, data, length, stripes, file_write ? Irp : NULL, irp_offset, priority, wtc);
1964 if (!NT_SUCCESS(Status)) {
1965 ERR("prepare_raid6_write returned %08lx\n", Status);
1966 goto prepare_failed;
1967 }
1968
1969 allowed_missing = 2;
1970 } else { // write same data to every location - SINGLE, DUP, RAID1, RAID1C3, RAID1C4
1971 for (i = 0; i < c->chunk_item->num_stripes; i++) {
1972 stripes[i].start = address - c->offset;
1973 stripes[i].end = stripes[i].start + length;
1974 stripes[i].data = data;
1975 stripes[i].irp_offset = irp_offset;
1976
1977 if (c->devices[i]->devobj) {
1978 if (file_write) {
1979 uint8_t* va;
1980 ULONG writelen = (ULONG)(stripes[i].end - stripes[i].start);
1981
1982 va = (uint8_t*)MmGetMdlVirtualAddress(Irp->MdlAddress) + stripes[i].irp_offset;
1983
1984 stripes[i].mdl = IoAllocateMdl(va, writelen, false, false, NULL);
1985 if (!stripes[i].mdl) {
1986 ERR("IoAllocateMdl failed\n");
1987 Status = STATUS_INSUFFICIENT_RESOURCES;
1988 goto prepare_failed;
1989 }
1990
1991 IoBuildPartialMdl(Irp->MdlAddress, stripes[i].mdl, va, writelen);
1992 } else {
1993 stripes[i].mdl = IoAllocateMdl(stripes[i].data, (ULONG)(stripes[i].end - stripes[i].start), false, false, NULL);
1994 if (!stripes[i].mdl) {
1995 ERR("IoAllocateMdl failed\n");
1996 Status = STATUS_INSUFFICIENT_RESOURCES;
1997 goto prepare_failed;
1998 }
1999
2000 Status = STATUS_SUCCESS;
2001
2002 _SEH2_TRY {
2003 MmProbeAndLockPages(stripes[i].mdl, KernelMode, IoReadAccess);
2004 } _SEH2_EXCEPT (EXCEPTION_EXECUTE_HANDLER) {
2005 Status = _SEH2_GetExceptionCode();
2006 } _SEH2_END;
2007
2008 if (!NT_SUCCESS(Status)) {
2009 ERR("MmProbeAndLockPages threw exception %08lx\n", Status);
2010 IoFreeMdl(stripes[i].mdl);
2011 stripes[i].mdl = NULL;
2012 goto prepare_failed;
2013 }
2014 }
2015 }
2016 }
2017
2018 allowed_missing = c->chunk_item->num_stripes - 1;
2019 }
2020
2021 missing = 0;
2022 for (i = 0; i < c->chunk_item->num_stripes; i++) {
2023 if (!c->devices[i]->devobj)
2024 missing++;
2025 }
2026
2027 if (missing > allowed_missing) {
2028 ERR("cannot write as %lu missing devices (maximum %lu)\n", missing, allowed_missing);
2029 Status = STATUS_DEVICE_NOT_READY;
2030 goto prepare_failed;
2031 }
2032
2033 for (i = 0; i < c->chunk_item->num_stripes; i++) {
2034 write_data_stripe* stripe;
2035 PIO_STACK_LOCATION IrpSp;
2036
2037 stripe = ExAllocatePoolWithTag(NonPagedPool, sizeof(write_data_stripe), ALLOC_TAG);
2038 if (!stripe) {
2039 ERR("out of memory\n");
2040 Status = STATUS_INSUFFICIENT_RESOURCES;
2041 goto end;
2042 }
2043
2044 if (stripes[i].start == stripes[i].end || !c->devices[i]->devobj) {
2045 stripe->status = WriteDataStatus_Ignore;
2046 stripe->Irp = NULL;
2047 stripe->buf = stripes[i].data;
2048 stripe->mdl = NULL;
2049 } else {
2050 stripe->context = (struct _write_data_context*)wtc;
2051 stripe->buf = stripes[i].data;
2052 stripe->device = c->devices[i];
2053 RtlZeroMemory(&stripe->iosb, sizeof(IO_STATUS_BLOCK));
2054 stripe->status = WriteDataStatus_Pending;
2055 stripe->mdl = stripes[i].mdl;
2056
2057 if (!Irp) {
2058 stripe->Irp = IoAllocateIrp(stripe->device->devobj->StackSize, false);
2059
2060 if (!stripe->Irp) {
2061 ERR("IoAllocateIrp failed\n");
2062 ExFreePool(stripe);
2063 Status = STATUS_INSUFFICIENT_RESOURCES;
2064 goto end;
2065 }
2066 } else {
2067 stripe->Irp = IoMakeAssociatedIrp(Irp, stripe->device->devobj->StackSize);
2068
2069 if (!stripe->Irp) {
2070 ERR("IoMakeAssociatedIrp failed\n");
2071 ExFreePool(stripe);
2072 Status = STATUS_INSUFFICIENT_RESOURCES;
2073 goto end;
2074 }
2075 }
2076
2077 IrpSp = IoGetNextIrpStackLocation(stripe->Irp);
2078 IrpSp->MajorFunction = IRP_MJ_WRITE;
2079 IrpSp->FileObject = stripe->device->fileobj;
2080
2081 if (stripe->device->devobj->Flags & DO_BUFFERED_IO) {
2082 stripe->Irp->AssociatedIrp.SystemBuffer = MmGetSystemAddressForMdlSafe(stripes[i].mdl, priority);
2083
2084 stripe->Irp->Flags = IRP_BUFFERED_IO;
2085 } else if (stripe->device->devobj->Flags & DO_DIRECT_IO)
2086 stripe->Irp->MdlAddress = stripe->mdl;
2087 else
2088 stripe->Irp->UserBuffer = MmGetSystemAddressForMdlSafe(stripes[i].mdl, priority);
2089
2090 #ifdef DEBUG_PARANOID
2091 if (stripes[i].end < stripes[i].start) {
2092 ERR("trying to write stripe with negative length (%I64x < %I64x)\n", stripes[i].end, stripes[i].start);
2093 int3;
2094 }
2095 #endif
2096
2097 IrpSp->Parameters.Write.Length = (ULONG)(stripes[i].end - stripes[i].start);
2098 IrpSp->Parameters.Write.ByteOffset.QuadPart = stripes[i].start + cis[i].offset;
2099
2100 total_writing += IrpSp->Parameters.Write.Length;
2101
2102 stripe->Irp->UserIosb = &stripe->iosb;
2103 wtc->stripes_left++;
2104
2105 IoSetCompletionRoutine(stripe->Irp, write_data_completion, stripe, true, true, true);
2106 }
2107
2108 InsertTailList(&wtc->stripes, &stripe->list_entry);
2109 }
2110
2111 if (diskacc)
2112 fFsRtlUpdateDiskCounters(0, total_writing);
2113
2114 Status = STATUS_SUCCESS;
2115
2116 end:
2117
2118 if (stripes) ExFreePool(stripes);
2119
2120 if (!NT_SUCCESS(Status))
2121 free_write_data_stripes(wtc);
2122
2123 return Status;
2124
2125 prepare_failed:
2126 for (i = 0; i < c->chunk_item->num_stripes; i++) {
2127 if (stripes[i].mdl && (i == 0 || stripes[i].mdl != stripes[i-1].mdl)) {
2128 if (stripes[i].mdl->MdlFlags & MDL_PAGES_LOCKED)
2129 MmUnlockPages(stripes[i].mdl);
2130
2131 IoFreeMdl(stripes[i].mdl);
2132 }
2133 }
2134
2135 if (wtc->parity1_mdl) {
2136 if (wtc->parity1_mdl->MdlFlags & MDL_PAGES_LOCKED)
2137 MmUnlockPages(wtc->parity1_mdl);
2138
2139 IoFreeMdl(wtc->parity1_mdl);
2140 wtc->parity1_mdl = NULL;
2141 }
2142
2143 if (wtc->parity2_mdl) {
2144 if (wtc->parity2_mdl->MdlFlags & MDL_PAGES_LOCKED)
2145 MmUnlockPages(wtc->parity2_mdl);
2146
2147 IoFreeMdl(wtc->parity2_mdl);
2148 wtc->parity2_mdl = NULL;
2149 }
2150
2151 if (wtc->mdl) {
2152 if (wtc->mdl->MdlFlags & MDL_PAGES_LOCKED)
2153 MmUnlockPages(wtc->mdl);
2154
2155 IoFreeMdl(wtc->mdl);
2156 wtc->mdl = NULL;
2157 }
2158
2159 if (wtc->parity1) {
2160 ExFreePool(wtc->parity1);
2161 wtc->parity1 = NULL;
2162 }
2163
2164 if (wtc->parity2) {
2165 ExFreePool(wtc->parity2);
2166 wtc->parity2 = NULL;
2167 }
2168
2169 if (wtc->scratch) {
2170 ExFreePool(wtc->scratch);
2171 wtc->scratch = NULL;
2172 }
2173
2174 ExFreePool(stripes);
2175 return Status;
2176 }
2177
2178 __attribute__((nonnull(1,4,5)))
get_raid56_lock_range(chunk * c,uint64_t address,uint64_t length,uint64_t * lockaddr,uint64_t * locklen)2179 void get_raid56_lock_range(chunk* c, uint64_t address, uint64_t length, uint64_t* lockaddr, uint64_t* locklen) {
2180 uint64_t startoff, endoff;
2181 uint16_t startoffstripe, endoffstripe, datastripes;
2182
2183 datastripes = c->chunk_item->num_stripes - (c->chunk_item->type & BLOCK_FLAG_RAID5 ? 1 : 2);
2184
2185 get_raid0_offset(address - c->offset, c->chunk_item->stripe_length, datastripes, &startoff, &startoffstripe);
2186 get_raid0_offset(address + length - c->offset - 1, c->chunk_item->stripe_length, datastripes, &endoff, &endoffstripe);
2187
2188 startoff -= startoff % c->chunk_item->stripe_length;
2189 endoff = sector_align(endoff, c->chunk_item->stripe_length);
2190
2191 *lockaddr = c->offset + (startoff * datastripes);
2192 *locklen = (endoff - startoff) * datastripes;
2193 }
2194
2195 __attribute__((nonnull(1,3)))
write_data_complete(device_extension * Vcb,uint64_t address,void * data,uint32_t length,PIRP Irp,chunk * c,bool file_write,uint64_t irp_offset,ULONG priority)2196 NTSTATUS write_data_complete(device_extension* Vcb, uint64_t address, void* data, uint32_t length, PIRP Irp, chunk* c, bool file_write, uint64_t irp_offset, ULONG priority) {
2197 write_data_context wtc;
2198 NTSTATUS Status;
2199 uint64_t lockaddr, locklen;
2200
2201 KeInitializeEvent(&wtc.Event, NotificationEvent, false);
2202 InitializeListHead(&wtc.stripes);
2203 wtc.stripes_left = 0;
2204 wtc.parity1 = wtc.parity2 = wtc.scratch = NULL;
2205 wtc.mdl = wtc.parity1_mdl = wtc.parity2_mdl = NULL;
2206
2207 if (!c) {
2208 c = get_chunk_from_address(Vcb, address);
2209 if (!c) {
2210 ERR("could not get chunk for address %I64x\n", address);
2211 return STATUS_INTERNAL_ERROR;
2212 }
2213 }
2214
2215 if (c->chunk_item->type & BLOCK_FLAG_RAID5 || c->chunk_item->type & BLOCK_FLAG_RAID6) {
2216 get_raid56_lock_range(c, address, length, &lockaddr, &locklen);
2217 chunk_lock_range(Vcb, c, lockaddr, locklen);
2218 }
2219
2220 _SEH2_TRY {
2221 Status = write_data(Vcb, address, data, length, &wtc, Irp, c, file_write, irp_offset, priority);
2222 } _SEH2_EXCEPT (EXCEPTION_EXECUTE_HANDLER) {
2223 Status = _SEH2_GetExceptionCode();
2224 } _SEH2_END;
2225
2226 if (!NT_SUCCESS(Status)) {
2227 ERR("write_data returned %08lx\n", Status);
2228
2229 if (c->chunk_item->type & BLOCK_FLAG_RAID5 || c->chunk_item->type & BLOCK_FLAG_RAID6)
2230 chunk_unlock_range(Vcb, c, lockaddr, locklen);
2231
2232 free_write_data_stripes(&wtc);
2233 return Status;
2234 }
2235
2236 if (wtc.stripes.Flink != &wtc.stripes) {
2237 // launch writes and wait
2238 LIST_ENTRY* le = wtc.stripes.Flink;
2239 bool no_wait = true;
2240
2241 while (le != &wtc.stripes) {
2242 write_data_stripe* stripe = CONTAINING_RECORD(le, write_data_stripe, list_entry);
2243
2244 if (stripe->status != WriteDataStatus_Ignore) {
2245 IoCallDriver(stripe->device->devobj, stripe->Irp);
2246 no_wait = false;
2247 }
2248
2249 le = le->Flink;
2250 }
2251
2252 if (!no_wait)
2253 KeWaitForSingleObject(&wtc.Event, Executive, KernelMode, false, NULL);
2254
2255 le = wtc.stripes.Flink;
2256 while (le != &wtc.stripes) {
2257 write_data_stripe* stripe = CONTAINING_RECORD(le, write_data_stripe, list_entry);
2258
2259 if (stripe->status != WriteDataStatus_Ignore && !NT_SUCCESS(stripe->iosb.Status)) {
2260 Status = stripe->iosb.Status;
2261
2262 log_device_error(Vcb, stripe->device, BTRFS_DEV_STAT_WRITE_ERRORS);
2263 break;
2264 }
2265
2266 le = le->Flink;
2267 }
2268
2269 free_write_data_stripes(&wtc);
2270 }
2271
2272 if (c->chunk_item->type & BLOCK_FLAG_RAID5 || c->chunk_item->type & BLOCK_FLAG_RAID6)
2273 chunk_unlock_range(Vcb, c, lockaddr, locklen);
2274
2275 return Status;
2276 }
2277
2278 __attribute__((nonnull(2,3)))
_Function_class_(IO_COMPLETION_ROUTINE)2279 _Function_class_(IO_COMPLETION_ROUTINE)
2280 static NTSTATUS __stdcall write_data_completion(PDEVICE_OBJECT DeviceObject, PIRP Irp, PVOID conptr) {
2281 write_data_stripe* stripe = conptr;
2282 write_data_context* context = (write_data_context*)stripe->context;
2283 LIST_ENTRY* le;
2284
2285 UNUSED(DeviceObject);
2286
2287 // FIXME - we need a lock here
2288
2289 if (stripe->status == WriteDataStatus_Cancelling) {
2290 stripe->status = WriteDataStatus_Cancelled;
2291 goto end;
2292 }
2293
2294 stripe->iosb = Irp->IoStatus;
2295
2296 if (NT_SUCCESS(Irp->IoStatus.Status)) {
2297 stripe->status = WriteDataStatus_Success;
2298 } else {
2299 le = context->stripes.Flink;
2300
2301 stripe->status = WriteDataStatus_Error;
2302
2303 while (le != &context->stripes) {
2304 write_data_stripe* s2 = CONTAINING_RECORD(le, write_data_stripe, list_entry);
2305
2306 if (s2->status == WriteDataStatus_Pending) {
2307 s2->status = WriteDataStatus_Cancelling;
2308 IoCancelIrp(s2->Irp);
2309 }
2310
2311 le = le->Flink;
2312 }
2313 }
2314
2315 end:
2316 if (InterlockedDecrement(&context->stripes_left) == 0)
2317 KeSetEvent(&context->Event, 0, false);
2318
2319 return STATUS_MORE_PROCESSING_REQUIRED;
2320 }
2321
2322 __attribute__((nonnull(1)))
free_write_data_stripes(write_data_context * wtc)2323 void free_write_data_stripes(write_data_context* wtc) {
2324 LIST_ENTRY* le;
2325 PMDL last_mdl = NULL;
2326
2327 if (wtc->parity1_mdl) {
2328 if (wtc->parity1_mdl->MdlFlags & MDL_PAGES_LOCKED)
2329 MmUnlockPages(wtc->parity1_mdl);
2330
2331 IoFreeMdl(wtc->parity1_mdl);
2332 }
2333
2334 if (wtc->parity2_mdl) {
2335 if (wtc->parity2_mdl->MdlFlags & MDL_PAGES_LOCKED)
2336 MmUnlockPages(wtc->parity2_mdl);
2337
2338 IoFreeMdl(wtc->parity2_mdl);
2339 }
2340
2341 if (wtc->mdl) {
2342 if (wtc->mdl->MdlFlags & MDL_PAGES_LOCKED)
2343 MmUnlockPages(wtc->mdl);
2344
2345 IoFreeMdl(wtc->mdl);
2346 }
2347
2348 if (wtc->parity1)
2349 ExFreePool(wtc->parity1);
2350
2351 if (wtc->parity2)
2352 ExFreePool(wtc->parity2);
2353
2354 if (wtc->scratch)
2355 ExFreePool(wtc->scratch);
2356
2357 le = wtc->stripes.Flink;
2358 while (le != &wtc->stripes) {
2359 write_data_stripe* stripe = CONTAINING_RECORD(le, write_data_stripe, list_entry);
2360
2361 if (stripe->mdl && stripe->mdl != last_mdl) {
2362 if (stripe->mdl->MdlFlags & MDL_PAGES_LOCKED)
2363 MmUnlockPages(stripe->mdl);
2364
2365 IoFreeMdl(stripe->mdl);
2366 }
2367
2368 last_mdl = stripe->mdl;
2369
2370 if (stripe->Irp)
2371 IoFreeIrp(stripe->Irp);
2372
2373 le = le->Flink;
2374 }
2375
2376 while (!IsListEmpty(&wtc->stripes)) {
2377 write_data_stripe* stripe = CONTAINING_RECORD(RemoveHeadList(&wtc->stripes), write_data_stripe, list_entry);
2378
2379 ExFreePool(stripe);
2380 }
2381 }
2382
2383 __attribute__((nonnull(1,2,3)))
add_extent(_In_ fcb * fcb,_In_ LIST_ENTRY * prevextle,_In_ __drv_aliasesMem extent * newext)2384 void add_extent(_In_ fcb* fcb, _In_ LIST_ENTRY* prevextle, _In_ __drv_aliasesMem extent* newext) {
2385 LIST_ENTRY* le = prevextle->Flink;
2386
2387 while (le != &fcb->extents) {
2388 extent* ext = CONTAINING_RECORD(le, extent, list_entry);
2389
2390 if (ext->offset >= newext->offset) {
2391 InsertHeadList(ext->list_entry.Blink, &newext->list_entry);
2392 return;
2393 }
2394
2395 le = le->Flink;
2396 }
2397
2398 InsertTailList(&fcb->extents, &newext->list_entry);
2399 }
2400
2401 __attribute__((nonnull(1,2,6)))
excise_extents(device_extension * Vcb,fcb * fcb,uint64_t start_data,uint64_t end_data,PIRP Irp,LIST_ENTRY * rollback)2402 NTSTATUS excise_extents(device_extension* Vcb, fcb* fcb, uint64_t start_data, uint64_t end_data, PIRP Irp, LIST_ENTRY* rollback) {
2403 NTSTATUS Status;
2404 LIST_ENTRY* le;
2405
2406 le = fcb->extents.Flink;
2407
2408 while (le != &fcb->extents) {
2409 LIST_ENTRY* le2 = le->Flink;
2410 extent* ext = CONTAINING_RECORD(le, extent, list_entry);
2411
2412 if (!ext->ignore) {
2413 EXTENT_DATA* ed = &ext->extent_data;
2414 uint64_t len;
2415
2416 if (ed->type == EXTENT_TYPE_INLINE)
2417 len = ed->decoded_size;
2418 else
2419 len = ((EXTENT_DATA2*)ed->data)->num_bytes;
2420
2421 if (ext->offset < end_data && ext->offset + len > start_data) {
2422 if (ed->type == EXTENT_TYPE_INLINE) {
2423 if (start_data <= ext->offset && end_data >= ext->offset + len) { // remove all
2424 remove_fcb_extent(fcb, ext, rollback);
2425
2426 fcb->inode_item.st_blocks -= len;
2427 fcb->inode_item_changed = true;
2428 } else {
2429 ERR("trying to split inline extent\n");
2430 #ifdef DEBUG_PARANOID
2431 int3;
2432 #endif
2433 return STATUS_INTERNAL_ERROR;
2434 }
2435 } else {
2436 EXTENT_DATA2* ed2 = (EXTENT_DATA2*)ed->data;
2437
2438 if (start_data <= ext->offset && end_data >= ext->offset + len) { // remove all
2439 if (ed2->size != 0) {
2440 chunk* c;
2441
2442 fcb->inode_item.st_blocks -= len;
2443 fcb->inode_item_changed = true;
2444
2445 c = get_chunk_from_address(Vcb, ed2->address);
2446
2447 if (!c) {
2448 ERR("get_chunk_from_address(%I64x) failed\n", ed2->address);
2449 } else {
2450 Status = update_changed_extent_ref(Vcb, c, ed2->address, ed2->size, fcb->subvol->id, fcb->inode, ext->offset - ed2->offset, -1,
2451 fcb->inode_item.flags & BTRFS_INODE_NODATASUM, false, Irp);
2452 if (!NT_SUCCESS(Status)) {
2453 ERR("update_changed_extent_ref returned %08lx\n", Status);
2454 goto end;
2455 }
2456 }
2457 }
2458
2459 remove_fcb_extent(fcb, ext, rollback);
2460 } else if (start_data <= ext->offset && end_data < ext->offset + len) { // remove beginning
2461 EXTENT_DATA2* ned2;
2462 extent* newext;
2463
2464 if (ed2->size != 0) {
2465 fcb->inode_item.st_blocks -= end_data - ext->offset;
2466 fcb->inode_item_changed = true;
2467 }
2468
2469 newext = ExAllocatePoolWithTag(PagedPool, offsetof(extent, extent_data) + sizeof(EXTENT_DATA) - 1 + sizeof(EXTENT_DATA2), ALLOC_TAG);
2470 if (!newext) {
2471 ERR("out of memory\n");
2472 Status = STATUS_INSUFFICIENT_RESOURCES;
2473 goto end;
2474 }
2475
2476 ned2 = (EXTENT_DATA2*)newext->extent_data.data;
2477
2478 newext->extent_data.generation = Vcb->superblock.generation;
2479 newext->extent_data.decoded_size = ed->decoded_size;
2480 newext->extent_data.compression = ed->compression;
2481 newext->extent_data.encryption = ed->encryption;
2482 newext->extent_data.encoding = ed->encoding;
2483 newext->extent_data.type = ed->type;
2484 ned2->address = ed2->address;
2485 ned2->size = ed2->size;
2486 ned2->offset = ed2->offset + (end_data - ext->offset);
2487 ned2->num_bytes = ed2->num_bytes - (end_data - ext->offset);
2488
2489 newext->offset = end_data;
2490 newext->datalen = sizeof(EXTENT_DATA) - 1 + sizeof(EXTENT_DATA2);
2491 newext->unique = ext->unique;
2492 newext->ignore = false;
2493 newext->inserted = true;
2494
2495 if (ext->csum) {
2496 if (ed->compression == BTRFS_COMPRESSION_NONE) {
2497 newext->csum = ExAllocatePoolWithTag(PagedPool, (ULONG)((ned2->num_bytes * Vcb->csum_size) >> Vcb->sector_shift), ALLOC_TAG);
2498 if (!newext->csum) {
2499 ERR("out of memory\n");
2500 Status = STATUS_INSUFFICIENT_RESOURCES;
2501 ExFreePool(newext);
2502 goto end;
2503 }
2504
2505 RtlCopyMemory(newext->csum, (uint8_t*)ext->csum + (((end_data - ext->offset) * Vcb->csum_size) >> Vcb->sector_shift),
2506 (ULONG)((ned2->num_bytes * Vcb->csum_size) >> Vcb->sector_shift));
2507 } else {
2508 newext->csum = ExAllocatePoolWithTag(PagedPool, (ULONG)((ed2->size * Vcb->csum_size) >> Vcb->sector_shift), ALLOC_TAG);
2509 if (!newext->csum) {
2510 ERR("out of memory\n");
2511 Status = STATUS_INSUFFICIENT_RESOURCES;
2512 ExFreePool(newext);
2513 goto end;
2514 }
2515
2516 RtlCopyMemory(newext->csum, ext->csum, (ULONG)((ed2->size * Vcb->csum_size) >> Vcb->sector_shift));
2517 }
2518 } else
2519 newext->csum = NULL;
2520
2521 add_extent(fcb, &ext->list_entry, newext);
2522
2523 remove_fcb_extent(fcb, ext, rollback);
2524 } else if (start_data > ext->offset && end_data >= ext->offset + len) { // remove end
2525 EXTENT_DATA2* ned2;
2526 extent* newext;
2527
2528 if (ed2->size != 0) {
2529 fcb->inode_item.st_blocks -= ext->offset + len - start_data;
2530 fcb->inode_item_changed = true;
2531 }
2532
2533 newext = ExAllocatePoolWithTag(PagedPool, offsetof(extent, extent_data) + sizeof(EXTENT_DATA) - 1 + sizeof(EXTENT_DATA2), ALLOC_TAG);
2534 if (!newext) {
2535 ERR("out of memory\n");
2536 Status = STATUS_INSUFFICIENT_RESOURCES;
2537 goto end;
2538 }
2539
2540 ned2 = (EXTENT_DATA2*)newext->extent_data.data;
2541
2542 newext->extent_data.generation = Vcb->superblock.generation;
2543 newext->extent_data.decoded_size = ed->decoded_size;
2544 newext->extent_data.compression = ed->compression;
2545 newext->extent_data.encryption = ed->encryption;
2546 newext->extent_data.encoding = ed->encoding;
2547 newext->extent_data.type = ed->type;
2548 ned2->address = ed2->address;
2549 ned2->size = ed2->size;
2550 ned2->offset = ed2->offset;
2551 ned2->num_bytes = start_data - ext->offset;
2552
2553 newext->offset = ext->offset;
2554 newext->datalen = sizeof(EXTENT_DATA) - 1 + sizeof(EXTENT_DATA2);
2555 newext->unique = ext->unique;
2556 newext->ignore = false;
2557 newext->inserted = true;
2558
2559 if (ext->csum) {
2560 if (ed->compression == BTRFS_COMPRESSION_NONE) {
2561 newext->csum = ExAllocatePoolWithTag(PagedPool, (ULONG)((ned2->num_bytes * Vcb->csum_size) >> Vcb->sector_shift), ALLOC_TAG);
2562 if (!newext->csum) {
2563 ERR("out of memory\n");
2564 Status = STATUS_INSUFFICIENT_RESOURCES;
2565 ExFreePool(newext);
2566 goto end;
2567 }
2568
2569 RtlCopyMemory(newext->csum, ext->csum, (ULONG)((ned2->num_bytes * Vcb->csum_size) >> Vcb->sector_shift));
2570 } else {
2571 newext->csum = ExAllocatePoolWithTag(PagedPool, (ULONG)((ed2->size * Vcb->csum_size) >> Vcb->sector_shift), ALLOC_TAG);
2572 if (!newext->csum) {
2573 ERR("out of memory\n");
2574 Status = STATUS_INSUFFICIENT_RESOURCES;
2575 ExFreePool(newext);
2576 goto end;
2577 }
2578
2579 RtlCopyMemory(newext->csum, ext->csum, (ULONG)((ed2->size * Vcb->csum_size) >> Vcb->sector_shift));
2580 }
2581 } else
2582 newext->csum = NULL;
2583
2584 InsertHeadList(&ext->list_entry, &newext->list_entry);
2585
2586 remove_fcb_extent(fcb, ext, rollback);
2587 } else if (start_data > ext->offset && end_data < ext->offset + len) { // remove middle
2588 EXTENT_DATA2 *neda2, *nedb2;
2589 extent *newext1, *newext2;
2590
2591 if (ed2->size != 0) {
2592 chunk* c;
2593
2594 fcb->inode_item.st_blocks -= end_data - start_data;
2595 fcb->inode_item_changed = true;
2596
2597 c = get_chunk_from_address(Vcb, ed2->address);
2598
2599 if (!c) {
2600 ERR("get_chunk_from_address(%I64x) failed\n", ed2->address);
2601 } else {
2602 Status = update_changed_extent_ref(Vcb, c, ed2->address, ed2->size, fcb->subvol->id, fcb->inode, ext->offset - ed2->offset, 1,
2603 fcb->inode_item.flags & BTRFS_INODE_NODATASUM, false, Irp);
2604 if (!NT_SUCCESS(Status)) {
2605 ERR("update_changed_extent_ref returned %08lx\n", Status);
2606 goto end;
2607 }
2608 }
2609 }
2610
2611 newext1 = ExAllocatePoolWithTag(PagedPool, offsetof(extent, extent_data) + sizeof(EXTENT_DATA) - 1 + sizeof(EXTENT_DATA2), ALLOC_TAG);
2612 if (!newext1) {
2613 ERR("out of memory\n");
2614 Status = STATUS_INSUFFICIENT_RESOURCES;
2615 goto end;
2616 }
2617
2618 newext2 = ExAllocatePoolWithTag(PagedPool, offsetof(extent, extent_data) + sizeof(EXTENT_DATA) - 1 + sizeof(EXTENT_DATA2), ALLOC_TAG);
2619 if (!newext2) {
2620 ERR("out of memory\n");
2621 Status = STATUS_INSUFFICIENT_RESOURCES;
2622 ExFreePool(newext1);
2623 goto end;
2624 }
2625
2626 neda2 = (EXTENT_DATA2*)newext1->extent_data.data;
2627
2628 newext1->extent_data.generation = Vcb->superblock.generation;
2629 newext1->extent_data.decoded_size = ed->decoded_size;
2630 newext1->extent_data.compression = ed->compression;
2631 newext1->extent_data.encryption = ed->encryption;
2632 newext1->extent_data.encoding = ed->encoding;
2633 newext1->extent_data.type = ed->type;
2634 neda2->address = ed2->address;
2635 neda2->size = ed2->size;
2636 neda2->offset = ed2->offset;
2637 neda2->num_bytes = start_data - ext->offset;
2638
2639 nedb2 = (EXTENT_DATA2*)newext2->extent_data.data;
2640
2641 newext2->extent_data.generation = Vcb->superblock.generation;
2642 newext2->extent_data.decoded_size = ed->decoded_size;
2643 newext2->extent_data.compression = ed->compression;
2644 newext2->extent_data.encryption = ed->encryption;
2645 newext2->extent_data.encoding = ed->encoding;
2646 newext2->extent_data.type = ed->type;
2647 nedb2->address = ed2->address;
2648 nedb2->size = ed2->size;
2649 nedb2->offset = ed2->offset + (end_data - ext->offset);
2650 nedb2->num_bytes = ext->offset + len - end_data;
2651
2652 newext1->offset = ext->offset;
2653 newext1->datalen = sizeof(EXTENT_DATA) - 1 + sizeof(EXTENT_DATA2);
2654 newext1->unique = ext->unique;
2655 newext1->ignore = false;
2656 newext1->inserted = true;
2657
2658 newext2->offset = end_data;
2659 newext2->datalen = sizeof(EXTENT_DATA) - 1 + sizeof(EXTENT_DATA2);
2660 newext2->unique = ext->unique;
2661 newext2->ignore = false;
2662 newext2->inserted = true;
2663
2664 if (ext->csum) {
2665 if (ed->compression == BTRFS_COMPRESSION_NONE) {
2666 newext1->csum = ExAllocatePoolWithTag(PagedPool, (ULONG)((neda2->num_bytes * Vcb->csum_size) >> Vcb->sector_shift), ALLOC_TAG);
2667 if (!newext1->csum) {
2668 ERR("out of memory\n");
2669 Status = STATUS_INSUFFICIENT_RESOURCES;
2670 ExFreePool(newext1);
2671 ExFreePool(newext2);
2672 goto end;
2673 }
2674
2675 newext2->csum = ExAllocatePoolWithTag(PagedPool, (ULONG)((nedb2->num_bytes * Vcb->csum_size) >> Vcb->sector_shift), ALLOC_TAG);
2676 if (!newext2->csum) {
2677 ERR("out of memory\n");
2678 Status = STATUS_INSUFFICIENT_RESOURCES;
2679 ExFreePool(newext1->csum);
2680 ExFreePool(newext1);
2681 ExFreePool(newext2);
2682 goto end;
2683 }
2684
2685 RtlCopyMemory(newext1->csum, ext->csum, (ULONG)((neda2->num_bytes * Vcb->csum_size) >> Vcb->sector_shift));
2686 RtlCopyMemory(newext2->csum, (uint8_t*)ext->csum + (((end_data - ext->offset) * Vcb->csum_size) >> Vcb->sector_shift),
2687 (ULONG)((nedb2->num_bytes * Vcb->csum_size) >> Vcb->sector_shift));
2688 } else {
2689 newext1->csum = ExAllocatePoolWithTag(PagedPool, (ULONG)((ed2->size * Vcb->csum_size) >> Vcb->sector_shift), ALLOC_TAG);
2690 if (!newext1->csum) {
2691 ERR("out of memory\n");
2692 Status = STATUS_INSUFFICIENT_RESOURCES;
2693 ExFreePool(newext1);
2694 ExFreePool(newext2);
2695 goto end;
2696 }
2697
2698 newext2->csum = ExAllocatePoolWithTag(PagedPool, (ULONG)((ed2->size * Vcb->csum_size) >> Vcb->sector_shift), ALLOC_TAG);
2699 if (!newext2->csum) {
2700 ERR("out of memory\n");
2701 Status = STATUS_INSUFFICIENT_RESOURCES;
2702 ExFreePool(newext1->csum);
2703 ExFreePool(newext1);
2704 ExFreePool(newext2);
2705 goto end;
2706 }
2707
2708 RtlCopyMemory(newext1->csum, ext->csum, (ULONG)((ed2->size * Vcb->csum_size) >> Vcb->sector_shift));
2709 RtlCopyMemory(newext2->csum, ext->csum, (ULONG)((ed2->size * Vcb->csum_size) >> Vcb->sector_shift));
2710 }
2711 } else {
2712 newext1->csum = NULL;
2713 newext2->csum = NULL;
2714 }
2715
2716 InsertHeadList(&ext->list_entry, &newext1->list_entry);
2717 add_extent(fcb, &newext1->list_entry, newext2);
2718
2719 remove_fcb_extent(fcb, ext, rollback);
2720 }
2721 }
2722 }
2723 }
2724
2725 le = le2;
2726 }
2727
2728 Status = STATUS_SUCCESS;
2729
2730 end:
2731 fcb->extents_changed = true;
2732 mark_fcb_dirty(fcb);
2733
2734 return Status;
2735 }
2736
2737 __attribute__((nonnull(1,2,3)))
add_insert_extent_rollback(LIST_ENTRY * rollback,fcb * fcb,extent * ext)2738 static void add_insert_extent_rollback(LIST_ENTRY* rollback, fcb* fcb, extent* ext) {
2739 rollback_extent* re;
2740
2741 re = ExAllocatePoolWithTag(NonPagedPool, sizeof(rollback_extent), ALLOC_TAG);
2742 if (!re) {
2743 ERR("out of memory\n");
2744 return;
2745 }
2746
2747 re->fcb = fcb;
2748 re->ext = ext;
2749
2750 add_rollback(rollback, ROLLBACK_INSERT_EXTENT, re);
2751 }
2752
2753 #ifdef _MSC_VER
2754 #pragma warning(push)
2755 #pragma warning(suppress: 28194)
2756 #endif
2757 __attribute__((nonnull(1,3,7)))
2758 NTSTATUS add_extent_to_fcb(_In_ fcb* fcb, _In_ uint64_t offset, _In_reads_bytes_(edsize) EXTENT_DATA* ed, _In_ uint16_t edsize,
2759 _In_ bool unique, _In_opt_ _When_(return >= 0, __drv_aliasesMem) void* csum, _In_ LIST_ENTRY* rollback) {
2760 extent* ext;
2761 LIST_ENTRY* le;
2762
2763 ext = ExAllocatePoolWithTag(PagedPool, offsetof(extent, extent_data) + edsize, ALLOC_TAG);
2764 if (!ext) {
2765 ERR("out of memory\n");
2766 return STATUS_INSUFFICIENT_RESOURCES;
2767 }
2768
2769 ext->offset = offset;
2770 ext->datalen = edsize;
2771 ext->unique = unique;
2772 ext->ignore = false;
2773 ext->inserted = true;
2774 ext->csum = csum;
2775
2776 RtlCopyMemory(&ext->extent_data, ed, edsize);
2777
2778 le = fcb->extents.Flink;
2779 while (le != &fcb->extents) {
2780 extent* oldext = CONTAINING_RECORD(le, extent, list_entry);
2781
2782 if (oldext->offset >= offset) {
2783 InsertHeadList(le->Blink, &ext->list_entry);
2784 goto end;
2785 }
2786
2787 le = le->Flink;
2788 }
2789
2790 InsertTailList(&fcb->extents, &ext->list_entry);
2791
2792 end:
2793 add_insert_extent_rollback(rollback, fcb, ext);
2794
2795 return STATUS_SUCCESS;
2796 }
2797 #ifdef _MSC_VER
2798 #pragma warning(pop)
2799 #endif
2800
2801 __attribute__((nonnull(1, 2, 3)))
remove_fcb_extent(fcb * fcb,extent * ext,LIST_ENTRY * rollback)2802 static void remove_fcb_extent(fcb* fcb, extent* ext, LIST_ENTRY* rollback) {
2803 if (!ext->ignore) {
2804 rollback_extent* re;
2805
2806 ext->ignore = true;
2807
2808 re = ExAllocatePoolWithTag(NonPagedPool, sizeof(rollback_extent), ALLOC_TAG);
2809 if (!re) {
2810 ERR("out of memory\n");
2811 return;
2812 }
2813
2814 re->fcb = fcb;
2815 re->ext = ext;
2816
2817 add_rollback(rollback, ROLLBACK_DELETE_EXTENT, re);
2818 }
2819 }
2820
2821 _Requires_lock_held_(c->lock)
2822 _When_(return != 0, _Releases_lock_(c->lock))
2823 __attribute__((nonnull(1,2,3,9)))
insert_extent_chunk(_In_ device_extension * Vcb,_In_ fcb * fcb,_In_ chunk * c,_In_ uint64_t start_data,_In_ uint64_t length,_In_ bool prealloc,_In_opt_ void * data,_In_opt_ PIRP Irp,_In_ LIST_ENTRY * rollback,_In_ uint8_t compression,_In_ uint64_t decoded_size,_In_ bool file_write,_In_ uint64_t irp_offset)2824 bool insert_extent_chunk(_In_ device_extension* Vcb, _In_ fcb* fcb, _In_ chunk* c, _In_ uint64_t start_data, _In_ uint64_t length, _In_ bool prealloc, _In_opt_ void* data,
2825 _In_opt_ PIRP Irp, _In_ LIST_ENTRY* rollback, _In_ uint8_t compression, _In_ uint64_t decoded_size, _In_ bool file_write, _In_ uint64_t irp_offset) {
2826 uint64_t address;
2827 NTSTATUS Status;
2828 EXTENT_DATA* ed;
2829 EXTENT_DATA2* ed2;
2830 uint16_t edsize = (uint16_t)(offsetof(EXTENT_DATA, data[0]) + sizeof(EXTENT_DATA2));
2831 void* csum = NULL;
2832
2833 TRACE("(%p, (%I64x, %I64x), %I64x, %I64x, %I64x, %u, %p, %p)\n", Vcb, fcb->subvol->id, fcb->inode, c->offset, start_data, length, prealloc, data, rollback);
2834
2835 if (!find_data_address_in_chunk(Vcb, c, length, &address))
2836 return false;
2837
2838 // add extent data to inode
2839 ed = ExAllocatePoolWithTag(PagedPool, edsize, ALLOC_TAG);
2840 if (!ed) {
2841 ERR("out of memory\n");
2842 return false;
2843 }
2844
2845 ed->generation = Vcb->superblock.generation;
2846 ed->decoded_size = decoded_size;
2847 ed->compression = compression;
2848 ed->encryption = BTRFS_ENCRYPTION_NONE;
2849 ed->encoding = BTRFS_ENCODING_NONE;
2850 ed->type = prealloc ? EXTENT_TYPE_PREALLOC : EXTENT_TYPE_REGULAR;
2851
2852 ed2 = (EXTENT_DATA2*)ed->data;
2853 ed2->address = address;
2854 ed2->size = length;
2855 ed2->offset = 0;
2856 ed2->num_bytes = decoded_size;
2857
2858 if (!prealloc && data && !(fcb->inode_item.flags & BTRFS_INODE_NODATASUM)) {
2859 ULONG sl = (ULONG)(length >> Vcb->sector_shift);
2860
2861 csum = ExAllocatePoolWithTag(PagedPool, sl * Vcb->csum_size, ALLOC_TAG);
2862 if (!csum) {
2863 ERR("out of memory\n");
2864 ExFreePool(ed);
2865 return false;
2866 }
2867
2868 do_calc_job(Vcb, data, sl, csum);
2869 }
2870
2871 Status = add_extent_to_fcb(fcb, start_data, ed, edsize, true, csum, rollback);
2872 if (!NT_SUCCESS(Status)) {
2873 ERR("add_extent_to_fcb returned %08lx\n", Status);
2874 if (csum) ExFreePool(csum);
2875 ExFreePool(ed);
2876 return false;
2877 }
2878
2879 ExFreePool(ed);
2880
2881 c->used += length;
2882 space_list_subtract(c, address, length, rollback);
2883
2884 fcb->inode_item.st_blocks += decoded_size;
2885
2886 fcb->extents_changed = true;
2887 fcb->inode_item_changed = true;
2888 mark_fcb_dirty(fcb);
2889
2890 ExAcquireResourceExclusiveLite(&c->changed_extents_lock, true);
2891
2892 add_changed_extent_ref(c, address, length, fcb->subvol->id, fcb->inode, start_data, 1, fcb->inode_item.flags & BTRFS_INODE_NODATASUM);
2893
2894 ExReleaseResourceLite(&c->changed_extents_lock);
2895
2896 release_chunk_lock(c, Vcb);
2897
2898 if (data) {
2899 Status = write_data_complete(Vcb, address, data, (uint32_t)length, Irp, NULL, file_write, irp_offset,
2900 fcb->Header.Flags2 & FSRTL_FLAG2_IS_PAGING_FILE ? HighPagePriority : NormalPagePriority);
2901 if (!NT_SUCCESS(Status))
2902 ERR("write_data_complete returned %08lx\n", Status);
2903 }
2904
2905 return true;
2906 }
2907
2908 __attribute__((nonnull(1,2,5,7,10)))
try_extend_data(device_extension * Vcb,fcb * fcb,uint64_t start_data,uint64_t length,void * data,PIRP Irp,uint64_t * written,bool file_write,uint64_t irp_offset,LIST_ENTRY * rollback)2909 static bool try_extend_data(device_extension* Vcb, fcb* fcb, uint64_t start_data, uint64_t length, void* data,
2910 PIRP Irp, uint64_t* written, bool file_write, uint64_t irp_offset, LIST_ENTRY* rollback) {
2911 bool success = false;
2912 EXTENT_DATA* ed;
2913 EXTENT_DATA2* ed2;
2914 chunk* c;
2915 LIST_ENTRY* le;
2916 extent* ext = NULL;
2917
2918 le = fcb->extents.Flink;
2919
2920 while (le != &fcb->extents) {
2921 extent* nextext = CONTAINING_RECORD(le, extent, list_entry);
2922
2923 if (!nextext->ignore) {
2924 if (nextext->offset == start_data) {
2925 ext = nextext;
2926 break;
2927 } else if (nextext->offset > start_data)
2928 break;
2929
2930 ext = nextext;
2931 }
2932
2933 le = le->Flink;
2934 }
2935
2936 if (!ext)
2937 return false;
2938
2939 ed = &ext->extent_data;
2940
2941 if (ed->type != EXTENT_TYPE_REGULAR && ed->type != EXTENT_TYPE_PREALLOC) {
2942 TRACE("not extending extent which is not regular or prealloc\n");
2943 return false;
2944 }
2945
2946 ed2 = (EXTENT_DATA2*)ed->data;
2947
2948 if (ext->offset + ed2->num_bytes != start_data) {
2949 TRACE("last EXTENT_DATA does not run up to start_data (%I64x + %I64x != %I64x)\n", ext->offset, ed2->num_bytes, start_data);
2950 return false;
2951 }
2952
2953 c = get_chunk_from_address(Vcb, ed2->address);
2954
2955 if (c->reloc || c->readonly || c->chunk_item->type != Vcb->data_flags)
2956 return false;
2957
2958 acquire_chunk_lock(c, Vcb);
2959
2960 if (length > c->chunk_item->size - c->used) {
2961 release_chunk_lock(c, Vcb);
2962 return false;
2963 }
2964
2965 if (!c->cache_loaded) {
2966 NTSTATUS Status = load_cache_chunk(Vcb, c, NULL);
2967
2968 if (!NT_SUCCESS(Status)) {
2969 ERR("load_cache_chunk returned %08lx\n", Status);
2970 release_chunk_lock(c, Vcb);
2971 return false;
2972 }
2973 }
2974
2975 le = c->space.Flink;
2976 while (le != &c->space) {
2977 space* s = CONTAINING_RECORD(le, space, list_entry);
2978
2979 if (s->address == ed2->address + ed2->size) {
2980 uint64_t newlen = min(min(s->size, length), MAX_EXTENT_SIZE);
2981
2982 success = insert_extent_chunk(Vcb, fcb, c, start_data, newlen, false, data, Irp, rollback, BTRFS_COMPRESSION_NONE, newlen, file_write, irp_offset);
2983
2984 if (success)
2985 *written += newlen;
2986 else
2987 release_chunk_lock(c, Vcb);
2988
2989 return success;
2990 } else if (s->address > ed2->address + ed2->size)
2991 break;
2992
2993 le = le->Flink;
2994 }
2995
2996 release_chunk_lock(c, Vcb);
2997
2998 return false;
2999 }
3000
3001 __attribute__((nonnull(1)))
insert_chunk_fragmented(fcb * fcb,uint64_t start,uint64_t length,uint8_t * data,bool prealloc,LIST_ENTRY * rollback)3002 static NTSTATUS insert_chunk_fragmented(fcb* fcb, uint64_t start, uint64_t length, uint8_t* data, bool prealloc, LIST_ENTRY* rollback) {
3003 LIST_ENTRY* le;
3004 uint64_t flags = fcb->Vcb->data_flags;
3005 bool page_file = fcb->Header.Flags2 & FSRTL_FLAG2_IS_PAGING_FILE;
3006 NTSTATUS Status;
3007 chunk* c;
3008
3009 ExAcquireResourceSharedLite(&fcb->Vcb->chunk_lock, true);
3010
3011 // first create as many chunks as we can
3012 do {
3013 Status = alloc_chunk(fcb->Vcb, flags, &c, false);
3014 } while (NT_SUCCESS(Status));
3015
3016 if (Status != STATUS_DISK_FULL) {
3017 ERR("alloc_chunk returned %08lx\n", Status);
3018 ExReleaseResourceLite(&fcb->Vcb->chunk_lock);
3019 return Status;
3020 }
3021
3022 le = fcb->Vcb->chunks.Flink;
3023 while (le != &fcb->Vcb->chunks) {
3024 c = CONTAINING_RECORD(le, chunk, list_entry);
3025
3026 if (!c->readonly && !c->reloc) {
3027 acquire_chunk_lock(c, fcb->Vcb);
3028
3029 if (c->chunk_item->type == flags) {
3030 while (!IsListEmpty(&c->space_size) && length > 0) {
3031 space* s = CONTAINING_RECORD(c->space_size.Flink, space, list_entry_size);
3032 uint64_t extlen = min(length, s->size);
3033
3034 if (insert_extent_chunk(fcb->Vcb, fcb, c, start, extlen, prealloc && !page_file, data, NULL, rollback, BTRFS_COMPRESSION_NONE, extlen, false, 0)) {
3035 start += extlen;
3036 length -= extlen;
3037 if (data) data += extlen;
3038
3039 acquire_chunk_lock(c, fcb->Vcb);
3040 }
3041 }
3042 }
3043
3044 release_chunk_lock(c, fcb->Vcb);
3045
3046 if (length == 0)
3047 break;
3048 }
3049
3050 le = le->Flink;
3051 }
3052
3053 ExReleaseResourceLite(&fcb->Vcb->chunk_lock);
3054
3055 return length == 0 ? STATUS_SUCCESS : STATUS_DISK_FULL;
3056 }
3057
3058 __attribute__((nonnull(1,4)))
insert_prealloc_extent(fcb * fcb,uint64_t start,uint64_t length,LIST_ENTRY * rollback)3059 static NTSTATUS insert_prealloc_extent(fcb* fcb, uint64_t start, uint64_t length, LIST_ENTRY* rollback) {
3060 LIST_ENTRY* le;
3061 chunk* c;
3062 uint64_t flags;
3063 NTSTATUS Status;
3064 bool page_file = fcb->Header.Flags2 & FSRTL_FLAG2_IS_PAGING_FILE;
3065
3066 flags = fcb->Vcb->data_flags;
3067
3068 do {
3069 uint64_t extlen = min(MAX_EXTENT_SIZE, length);
3070
3071 ExAcquireResourceSharedLite(&fcb->Vcb->chunk_lock, true);
3072
3073 le = fcb->Vcb->chunks.Flink;
3074 while (le != &fcb->Vcb->chunks) {
3075 c = CONTAINING_RECORD(le, chunk, list_entry);
3076
3077 if (!c->readonly && !c->reloc) {
3078 acquire_chunk_lock(c, fcb->Vcb);
3079
3080 if (c->chunk_item->type == flags && (c->chunk_item->size - c->used) >= extlen) {
3081 if (insert_extent_chunk(fcb->Vcb, fcb, c, start, extlen, !page_file, NULL, NULL, rollback, BTRFS_COMPRESSION_NONE, extlen, false, 0)) {
3082 ExReleaseResourceLite(&fcb->Vcb->chunk_lock);
3083 goto cont;
3084 }
3085 }
3086
3087 release_chunk_lock(c, fcb->Vcb);
3088 }
3089
3090 le = le->Flink;
3091 }
3092
3093 ExReleaseResourceLite(&fcb->Vcb->chunk_lock);
3094
3095 ExAcquireResourceExclusiveLite(&fcb->Vcb->chunk_lock, true);
3096
3097 Status = alloc_chunk(fcb->Vcb, flags, &c, false);
3098
3099 ExReleaseResourceLite(&fcb->Vcb->chunk_lock);
3100
3101 if (!NT_SUCCESS(Status)) {
3102 ERR("alloc_chunk returned %08lx\n", Status);
3103 goto end;
3104 }
3105
3106 acquire_chunk_lock(c, fcb->Vcb);
3107
3108 if (c->chunk_item->type == flags && (c->chunk_item->size - c->used) >= extlen) {
3109 if (insert_extent_chunk(fcb->Vcb, fcb, c, start, extlen, !page_file, NULL, NULL, rollback, BTRFS_COMPRESSION_NONE, extlen, false, 0))
3110 goto cont;
3111 }
3112
3113 release_chunk_lock(c, fcb->Vcb);
3114
3115 Status = insert_chunk_fragmented(fcb, start, length, NULL, true, rollback);
3116 if (!NT_SUCCESS(Status))
3117 ERR("insert_chunk_fragmented returned %08lx\n", Status);
3118
3119 goto end;
3120
3121 cont:
3122 length -= extlen;
3123 start += extlen;
3124 } while (length > 0);
3125
3126 Status = STATUS_SUCCESS;
3127
3128 end:
3129 return Status;
3130 }
3131
3132 __attribute__((nonnull(1,2,5,9)))
insert_extent(device_extension * Vcb,fcb * fcb,uint64_t start_data,uint64_t length,void * data,PIRP Irp,bool file_write,uint64_t irp_offset,LIST_ENTRY * rollback)3133 static NTSTATUS insert_extent(device_extension* Vcb, fcb* fcb, uint64_t start_data, uint64_t length, void* data,
3134 PIRP Irp, bool file_write, uint64_t irp_offset, LIST_ENTRY* rollback) {
3135 NTSTATUS Status;
3136 LIST_ENTRY* le;
3137 chunk* c;
3138 uint64_t flags, orig_length = length, written = 0;
3139
3140 TRACE("(%p, (%I64x, %I64x), %I64x, %I64x, %p)\n", Vcb, fcb->subvol->id, fcb->inode, start_data, length, data);
3141
3142 if (start_data > 0) {
3143 try_extend_data(Vcb, fcb, start_data, length, data, Irp, &written, file_write, irp_offset, rollback);
3144
3145 if (written == length)
3146 return STATUS_SUCCESS;
3147 else if (written > 0) {
3148 start_data += written;
3149 irp_offset += written;
3150 length -= written;
3151 data = &((uint8_t*)data)[written];
3152 }
3153 }
3154
3155 flags = Vcb->data_flags;
3156
3157 while (written < orig_length) {
3158 uint64_t newlen = min(length, MAX_EXTENT_SIZE);
3159 bool done = false;
3160
3161 // Rather than necessarily writing the whole extent at once, we deal with it in blocks of 128 MB.
3162 // First, see if we can write the extent part to an existing chunk.
3163
3164 ExAcquireResourceSharedLite(&Vcb->chunk_lock, true);
3165
3166 le = Vcb->chunks.Flink;
3167 while (le != &Vcb->chunks) {
3168 c = CONTAINING_RECORD(le, chunk, list_entry);
3169
3170 if (!c->readonly && !c->reloc) {
3171 acquire_chunk_lock(c, Vcb);
3172
3173 if (c->chunk_item->type == flags && (c->chunk_item->size - c->used) >= newlen &&
3174 insert_extent_chunk(Vcb, fcb, c, start_data, newlen, false, data, Irp, rollback, BTRFS_COMPRESSION_NONE, newlen, file_write, irp_offset)) {
3175 written += newlen;
3176
3177 if (written == orig_length) {
3178 ExReleaseResourceLite(&Vcb->chunk_lock);
3179 return STATUS_SUCCESS;
3180 } else {
3181 done = true;
3182 start_data += newlen;
3183 irp_offset += newlen;
3184 length -= newlen;
3185 data = &((uint8_t*)data)[newlen];
3186 break;
3187 }
3188 } else
3189 release_chunk_lock(c, Vcb);
3190 }
3191
3192 le = le->Flink;
3193 }
3194
3195 ExReleaseResourceLite(&Vcb->chunk_lock);
3196
3197 if (done) continue;
3198
3199 // Otherwise, see if we can put it in a new chunk.
3200
3201 ExAcquireResourceExclusiveLite(&Vcb->chunk_lock, true);
3202
3203 Status = alloc_chunk(Vcb, flags, &c, false);
3204
3205 ExReleaseResourceLite(&Vcb->chunk_lock);
3206
3207 if (!NT_SUCCESS(Status)) {
3208 ERR("alloc_chunk returned %08lx\n", Status);
3209 return Status;
3210 }
3211
3212 if (c) {
3213 acquire_chunk_lock(c, Vcb);
3214
3215 if (c->chunk_item->type == flags && (c->chunk_item->size - c->used) >= newlen &&
3216 insert_extent_chunk(Vcb, fcb, c, start_data, newlen, false, data, Irp, rollback, BTRFS_COMPRESSION_NONE, newlen, file_write, irp_offset)) {
3217 written += newlen;
3218
3219 if (written == orig_length)
3220 return STATUS_SUCCESS;
3221 else {
3222 done = true;
3223 start_data += newlen;
3224 irp_offset += newlen;
3225 length -= newlen;
3226 data = &((uint8_t*)data)[newlen];
3227 }
3228 } else
3229 release_chunk_lock(c, Vcb);
3230 }
3231
3232 if (!done) {
3233 Status = insert_chunk_fragmented(fcb, start_data, length, data, false, rollback);
3234 if (!NT_SUCCESS(Status))
3235 ERR("insert_chunk_fragmented returned %08lx\n", Status);
3236
3237 return Status;
3238 }
3239 }
3240
3241 return STATUS_DISK_FULL;
3242 }
3243
3244 __attribute__((nonnull(1,4)))
truncate_file(fcb * fcb,uint64_t end,PIRP Irp,LIST_ENTRY * rollback)3245 NTSTATUS truncate_file(fcb* fcb, uint64_t end, PIRP Irp, LIST_ENTRY* rollback) {
3246 NTSTATUS Status;
3247
3248 // FIXME - convert into inline extent if short enough
3249
3250 if (end > 0 && fcb_is_inline(fcb)) {
3251 uint8_t* buf;
3252 bool make_inline = end <= fcb->Vcb->options.max_inline;
3253
3254 buf = ExAllocatePoolWithTag(PagedPool, (ULONG)(make_inline ? (offsetof(EXTENT_DATA, data[0]) + end) : sector_align(end, fcb->Vcb->superblock.sector_size)), ALLOC_TAG);
3255 if (!buf) {
3256 ERR("out of memory\n");
3257 return STATUS_INSUFFICIENT_RESOURCES;
3258 }
3259
3260 Status = read_file(fcb, make_inline ? (buf + offsetof(EXTENT_DATA, data[0])) : buf, 0, end, NULL, Irp);
3261 if (!NT_SUCCESS(Status)) {
3262 ERR("read_file returned %08lx\n", Status);
3263 ExFreePool(buf);
3264 return Status;
3265 }
3266
3267 Status = excise_extents(fcb->Vcb, fcb, 0, fcb->inode_item.st_size, Irp, rollback);
3268 if (!NT_SUCCESS(Status)) {
3269 ERR("excise_extents returned %08lx\n", Status);
3270 ExFreePool(buf);
3271 return Status;
3272 }
3273
3274 if (!make_inline) {
3275 RtlZeroMemory(buf + end, (ULONG)(sector_align(end, fcb->Vcb->superblock.sector_size) - end));
3276
3277 Status = do_write_file(fcb, 0, sector_align(end, fcb->Vcb->superblock.sector_size), buf, Irp, false, 0, rollback);
3278 if (!NT_SUCCESS(Status)) {
3279 ERR("do_write_file returned %08lx\n", Status);
3280 ExFreePool(buf);
3281 return Status;
3282 }
3283 } else {
3284 EXTENT_DATA* ed = (EXTENT_DATA*)buf;
3285
3286 ed->generation = fcb->Vcb->superblock.generation;
3287 ed->decoded_size = end;
3288 ed->compression = BTRFS_COMPRESSION_NONE;
3289 ed->encryption = BTRFS_ENCRYPTION_NONE;
3290 ed->encoding = BTRFS_ENCODING_NONE;
3291 ed->type = EXTENT_TYPE_INLINE;
3292
3293 Status = add_extent_to_fcb(fcb, 0, ed, (uint16_t)(offsetof(EXTENT_DATA, data[0]) + end), false, NULL, rollback);
3294 if (!NT_SUCCESS(Status)) {
3295 ERR("add_extent_to_fcb returned %08lx\n", Status);
3296 ExFreePool(buf);
3297 return Status;
3298 }
3299
3300 fcb->inode_item.st_blocks += end;
3301
3302 fcb->inode_item.st_size = end;
3303 fcb->inode_item_changed = true;
3304 TRACE("setting st_size to %I64x\n", end);
3305
3306 fcb->Header.AllocationSize.QuadPart = sector_align(fcb->inode_item.st_size, fcb->Vcb->superblock.sector_size);
3307 fcb->Header.FileSize.QuadPart = fcb->inode_item.st_size;
3308 fcb->Header.ValidDataLength.QuadPart = fcb->inode_item.st_size;
3309 }
3310
3311 ExFreePool(buf);
3312 return STATUS_SUCCESS;
3313 }
3314
3315 Status = excise_extents(fcb->Vcb, fcb, sector_align(end, fcb->Vcb->superblock.sector_size),
3316 sector_align(fcb->inode_item.st_size, fcb->Vcb->superblock.sector_size), Irp, rollback);
3317 if (!NT_SUCCESS(Status)) {
3318 ERR("excise_extents returned %08lx\n", Status);
3319 return Status;
3320 }
3321
3322 fcb->inode_item.st_size = end;
3323 fcb->inode_item_changed = true;
3324 TRACE("setting st_size to %I64x\n", end);
3325
3326 fcb->Header.AllocationSize.QuadPart = sector_align(fcb->inode_item.st_size, fcb->Vcb->superblock.sector_size);
3327 fcb->Header.FileSize.QuadPart = fcb->inode_item.st_size;
3328 fcb->Header.ValidDataLength.QuadPart = fcb->inode_item.st_size;
3329 // FIXME - inform cache manager of this
3330
3331 TRACE("fcb %p FileSize = %I64x\n", fcb, fcb->Header.FileSize.QuadPart);
3332
3333 return STATUS_SUCCESS;
3334 }
3335
3336 __attribute__((nonnull(1,6)))
extend_file(fcb * fcb,file_ref * fileref,uint64_t end,bool prealloc,PIRP Irp,LIST_ENTRY * rollback)3337 NTSTATUS extend_file(fcb* fcb, file_ref* fileref, uint64_t end, bool prealloc, PIRP Irp, LIST_ENTRY* rollback) {
3338 uint64_t oldalloc, newalloc;
3339 bool cur_inline;
3340 NTSTATUS Status;
3341
3342 TRACE("(%p, %p, %I64x, %u)\n", fcb, fileref, end, prealloc);
3343
3344 if (fcb->ads) {
3345 if (end > 0xffff)
3346 return STATUS_DISK_FULL;
3347
3348 return stream_set_end_of_file_information(fcb->Vcb, (uint16_t)end, fcb, fileref, false);
3349 } else {
3350 extent* ext = NULL;
3351 LIST_ENTRY* le;
3352
3353 le = fcb->extents.Blink;
3354 while (le != &fcb->extents) {
3355 extent* ext2 = CONTAINING_RECORD(le, extent, list_entry);
3356
3357 if (!ext2->ignore) {
3358 ext = ext2;
3359 break;
3360 }
3361
3362 le = le->Blink;
3363 }
3364
3365 oldalloc = 0;
3366 if (ext) {
3367 EXTENT_DATA* ed = &ext->extent_data;
3368 EXTENT_DATA2* ed2 = (EXTENT_DATA2*)ed->data;
3369
3370 oldalloc = ext->offset + (ed->type == EXTENT_TYPE_INLINE ? ed->decoded_size : ed2->num_bytes);
3371 cur_inline = ed->type == EXTENT_TYPE_INLINE;
3372
3373 if (cur_inline && end > fcb->Vcb->options.max_inline) {
3374 uint64_t origlength, length;
3375 uint8_t* data;
3376
3377 TRACE("giving inline file proper extents\n");
3378
3379 origlength = ed->decoded_size;
3380
3381 cur_inline = false;
3382
3383 length = sector_align(origlength, fcb->Vcb->superblock.sector_size);
3384
3385 data = ExAllocatePoolWithTag(PagedPool, (ULONG)length, ALLOC_TAG);
3386 if (!data) {
3387 ERR("could not allocate %I64x bytes for data\n", length);
3388 return STATUS_INSUFFICIENT_RESOURCES;
3389 }
3390
3391 Status = read_file(fcb, data, 0, origlength, NULL, Irp);
3392 if (!NT_SUCCESS(Status)) {
3393 ERR("read_file returned %08lx\n", Status);
3394 ExFreePool(data);
3395 return Status;
3396 }
3397
3398 RtlZeroMemory(data + origlength, (ULONG)(length - origlength));
3399
3400 Status = excise_extents(fcb->Vcb, fcb, 0, fcb->inode_item.st_size, Irp, rollback);
3401 if (!NT_SUCCESS(Status)) {
3402 ERR("excise_extents returned %08lx\n", Status);
3403 ExFreePool(data);
3404 return Status;
3405 }
3406
3407 Status = do_write_file(fcb, 0, length, data, Irp, false, 0, rollback);
3408 if (!NT_SUCCESS(Status)) {
3409 ERR("do_write_file returned %08lx\n", Status);
3410 ExFreePool(data);
3411 return Status;
3412 }
3413
3414 oldalloc = ext->offset + length;
3415
3416 ExFreePool(data);
3417 }
3418
3419 if (cur_inline) {
3420 uint16_t edsize;
3421
3422 if (end > oldalloc) {
3423 edsize = (uint16_t)(offsetof(EXTENT_DATA, data[0]) + end - ext->offset);
3424 ed = ExAllocatePoolWithTag(PagedPool, edsize, ALLOC_TAG);
3425
3426 if (!ed) {
3427 ERR("out of memory\n");
3428 return STATUS_INSUFFICIENT_RESOURCES;
3429 }
3430
3431 ed->generation = fcb->Vcb->superblock.generation;
3432 ed->decoded_size = end - ext->offset;
3433 ed->compression = BTRFS_COMPRESSION_NONE;
3434 ed->encryption = BTRFS_ENCRYPTION_NONE;
3435 ed->encoding = BTRFS_ENCODING_NONE;
3436 ed->type = EXTENT_TYPE_INLINE;
3437
3438 Status = read_file(fcb, ed->data, ext->offset, oldalloc, NULL, Irp);
3439 if (!NT_SUCCESS(Status)) {
3440 ERR("read_file returned %08lx\n", Status);
3441 ExFreePool(ed);
3442 return Status;
3443 }
3444
3445 RtlZeroMemory(ed->data + oldalloc - ext->offset, (ULONG)(end - oldalloc));
3446
3447 remove_fcb_extent(fcb, ext, rollback);
3448
3449 Status = add_extent_to_fcb(fcb, ext->offset, ed, edsize, ext->unique, NULL, rollback);
3450 if (!NT_SUCCESS(Status)) {
3451 ERR("add_extent_to_fcb returned %08lx\n", Status);
3452 ExFreePool(ed);
3453 return Status;
3454 }
3455
3456 ExFreePool(ed);
3457
3458 fcb->extents_changed = true;
3459 mark_fcb_dirty(fcb);
3460 }
3461
3462 TRACE("extending inline file (oldalloc = %I64x, end = %I64x)\n", oldalloc, end);
3463
3464 fcb->inode_item.st_size = end;
3465 TRACE("setting st_size to %I64x\n", end);
3466
3467 fcb->inode_item.st_blocks = end;
3468
3469 fcb->Header.AllocationSize.QuadPart = fcb->Header.FileSize.QuadPart = fcb->Header.ValidDataLength.QuadPart = end;
3470 } else {
3471 newalloc = sector_align(end, fcb->Vcb->superblock.sector_size);
3472
3473 if (newalloc > oldalloc) {
3474 if (prealloc) {
3475 // FIXME - try and extend previous extent first
3476
3477 Status = insert_prealloc_extent(fcb, oldalloc, newalloc - oldalloc, rollback);
3478
3479 if (!NT_SUCCESS(Status) && Status != STATUS_DISK_FULL) {
3480 ERR("insert_prealloc_extent returned %08lx\n", Status);
3481 return Status;
3482 }
3483 }
3484
3485 fcb->extents_changed = true;
3486 }
3487
3488 fcb->inode_item.st_size = end;
3489 fcb->inode_item_changed = true;
3490 mark_fcb_dirty(fcb);
3491
3492 TRACE("setting st_size to %I64x\n", end);
3493
3494 TRACE("newalloc = %I64x\n", newalloc);
3495
3496 fcb->Header.AllocationSize.QuadPart = newalloc;
3497 fcb->Header.FileSize.QuadPart = fcb->Header.ValidDataLength.QuadPart = end;
3498 }
3499 } else {
3500 if (end > fcb->Vcb->options.max_inline) {
3501 newalloc = sector_align(end, fcb->Vcb->superblock.sector_size);
3502
3503 if (prealloc) {
3504 Status = insert_prealloc_extent(fcb, 0, newalloc, rollback);
3505
3506 if (!NT_SUCCESS(Status) && Status != STATUS_DISK_FULL) {
3507 ERR("insert_prealloc_extent returned %08lx\n", Status);
3508 return Status;
3509 }
3510 }
3511
3512 fcb->extents_changed = true;
3513 fcb->inode_item_changed = true;
3514 mark_fcb_dirty(fcb);
3515
3516 fcb->inode_item.st_size = end;
3517 TRACE("setting st_size to %I64x\n", end);
3518
3519 TRACE("newalloc = %I64x\n", newalloc);
3520
3521 fcb->Header.AllocationSize.QuadPart = newalloc;
3522 fcb->Header.FileSize.QuadPart = fcb->Header.ValidDataLength.QuadPart = end;
3523 } else {
3524 EXTENT_DATA* ed;
3525 uint16_t edsize;
3526
3527 edsize = (uint16_t)(offsetof(EXTENT_DATA, data[0]) + end);
3528 ed = ExAllocatePoolWithTag(PagedPool, edsize, ALLOC_TAG);
3529
3530 if (!ed) {
3531 ERR("out of memory\n");
3532 return STATUS_INSUFFICIENT_RESOURCES;
3533 }
3534
3535 ed->generation = fcb->Vcb->superblock.generation;
3536 ed->decoded_size = end;
3537 ed->compression = BTRFS_COMPRESSION_NONE;
3538 ed->encryption = BTRFS_ENCRYPTION_NONE;
3539 ed->encoding = BTRFS_ENCODING_NONE;
3540 ed->type = EXTENT_TYPE_INLINE;
3541
3542 RtlZeroMemory(ed->data, (ULONG)end);
3543
3544 Status = add_extent_to_fcb(fcb, 0, ed, edsize, false, NULL, rollback);
3545 if (!NT_SUCCESS(Status)) {
3546 ERR("add_extent_to_fcb returned %08lx\n", Status);
3547 ExFreePool(ed);
3548 return Status;
3549 }
3550
3551 ExFreePool(ed);
3552
3553 fcb->extents_changed = true;
3554 fcb->inode_item_changed = true;
3555 mark_fcb_dirty(fcb);
3556
3557 fcb->inode_item.st_size = end;
3558 TRACE("setting st_size to %I64x\n", end);
3559
3560 fcb->inode_item.st_blocks = end;
3561
3562 fcb->Header.AllocationSize.QuadPart = fcb->Header.FileSize.QuadPart = fcb->Header.ValidDataLength.QuadPart = end;
3563 }
3564 }
3565 }
3566
3567 return STATUS_SUCCESS;
3568 }
3569
3570 __attribute__((nonnull(1,2,5,6,11)))
do_write_file_prealloc(fcb * fcb,extent * ext,uint64_t start_data,uint64_t end_data,void * data,uint64_t * written,PIRP Irp,bool file_write,uint64_t irp_offset,ULONG priority,LIST_ENTRY * rollback)3571 static NTSTATUS do_write_file_prealloc(fcb* fcb, extent* ext, uint64_t start_data, uint64_t end_data, void* data, uint64_t* written,
3572 PIRP Irp, bool file_write, uint64_t irp_offset, ULONG priority, LIST_ENTRY* rollback) {
3573 EXTENT_DATA* ed = &ext->extent_data;
3574 EXTENT_DATA2* ed2 = (EXTENT_DATA2*)ed->data;
3575 NTSTATUS Status;
3576 chunk* c = NULL;
3577
3578 if (start_data <= ext->offset && end_data >= ext->offset + ed2->num_bytes) { // replace all
3579 extent* newext;
3580
3581 newext = ExAllocatePoolWithTag(PagedPool, offsetof(extent, extent_data) + ext->datalen, ALLOC_TAG);
3582 if (!newext) {
3583 ERR("out of memory\n");
3584 return STATUS_INSUFFICIENT_RESOURCES;
3585 }
3586
3587 RtlCopyMemory(&newext->extent_data, &ext->extent_data, ext->datalen);
3588
3589 newext->extent_data.type = EXTENT_TYPE_REGULAR;
3590
3591 Status = write_data_complete(fcb->Vcb, ed2->address + ed2->offset, (uint8_t*)data + ext->offset - start_data, (uint32_t)ed2->num_bytes, Irp,
3592 NULL, file_write, irp_offset + ext->offset - start_data, priority);
3593 if (!NT_SUCCESS(Status)) {
3594 ERR("write_data_complete returned %08lx\n", Status);
3595 return Status;
3596 }
3597
3598 if (!(fcb->inode_item.flags & BTRFS_INODE_NODATASUM)) {
3599 ULONG sl = (ULONG)(ed2->num_bytes >> fcb->Vcb->sector_shift);
3600 void* csum = ExAllocatePoolWithTag(PagedPool, sl * fcb->Vcb->csum_size, ALLOC_TAG);
3601
3602 if (!csum) {
3603 ERR("out of memory\n");
3604 ExFreePool(newext);
3605 return STATUS_INSUFFICIENT_RESOURCES;
3606 }
3607
3608 do_calc_job(fcb->Vcb, (uint8_t*)data + ext->offset - start_data, sl, csum);
3609
3610 newext->csum = csum;
3611 } else
3612 newext->csum = NULL;
3613
3614 *written = ed2->num_bytes;
3615
3616 newext->offset = ext->offset;
3617 newext->datalen = ext->datalen;
3618 newext->unique = ext->unique;
3619 newext->ignore = false;
3620 newext->inserted = true;
3621 InsertHeadList(&ext->list_entry, &newext->list_entry);
3622
3623 add_insert_extent_rollback(rollback, fcb, newext);
3624
3625 remove_fcb_extent(fcb, ext, rollback);
3626
3627 c = get_chunk_from_address(fcb->Vcb, ed2->address);
3628 } else if (start_data <= ext->offset && end_data < ext->offset + ed2->num_bytes) { // replace beginning
3629 EXTENT_DATA2* ned2;
3630 extent *newext1, *newext2;
3631
3632 newext1 = ExAllocatePoolWithTag(PagedPool, offsetof(extent, extent_data) + ext->datalen, ALLOC_TAG);
3633 if (!newext1) {
3634 ERR("out of memory\n");
3635 return STATUS_INSUFFICIENT_RESOURCES;
3636 }
3637
3638 newext2 = ExAllocatePoolWithTag(PagedPool, offsetof(extent, extent_data) + ext->datalen, ALLOC_TAG);
3639 if (!newext2) {
3640 ERR("out of memory\n");
3641 ExFreePool(newext1);
3642 return STATUS_INSUFFICIENT_RESOURCES;
3643 }
3644
3645 RtlCopyMemory(&newext1->extent_data, &ext->extent_data, ext->datalen);
3646 newext1->extent_data.type = EXTENT_TYPE_REGULAR;
3647 ned2 = (EXTENT_DATA2*)newext1->extent_data.data;
3648 ned2->num_bytes = end_data - ext->offset;
3649
3650 RtlCopyMemory(&newext2->extent_data, &ext->extent_data, ext->datalen);
3651 ned2 = (EXTENT_DATA2*)newext2->extent_data.data;
3652 ned2->offset += end_data - ext->offset;
3653 ned2->num_bytes -= end_data - ext->offset;
3654
3655 Status = write_data_complete(fcb->Vcb, ed2->address + ed2->offset, (uint8_t*)data + ext->offset - start_data, (uint32_t)(end_data - ext->offset),
3656 Irp, NULL, file_write, irp_offset + ext->offset - start_data, priority);
3657 if (!NT_SUCCESS(Status)) {
3658 ERR("write_data_complete returned %08lx\n", Status);
3659 ExFreePool(newext1);
3660 ExFreePool(newext2);
3661 return Status;
3662 }
3663
3664 if (!(fcb->inode_item.flags & BTRFS_INODE_NODATASUM)) {
3665 ULONG sl = (ULONG)((end_data - ext->offset) >> fcb->Vcb->sector_shift);
3666 void* csum = ExAllocatePoolWithTag(PagedPool, sl * fcb->Vcb->csum_size, ALLOC_TAG);
3667
3668 if (!csum) {
3669 ERR("out of memory\n");
3670 ExFreePool(newext1);
3671 ExFreePool(newext2);
3672 return STATUS_INSUFFICIENT_RESOURCES;
3673 }
3674
3675 do_calc_job(fcb->Vcb, (uint8_t*)data + ext->offset - start_data, sl, csum);
3676
3677 newext1->csum = csum;
3678 } else
3679 newext1->csum = NULL;
3680
3681 *written = end_data - ext->offset;
3682
3683 newext1->offset = ext->offset;
3684 newext1->datalen = ext->datalen;
3685 newext1->unique = ext->unique;
3686 newext1->ignore = false;
3687 newext1->inserted = true;
3688 InsertHeadList(&ext->list_entry, &newext1->list_entry);
3689
3690 add_insert_extent_rollback(rollback, fcb, newext1);
3691
3692 newext2->offset = end_data;
3693 newext2->datalen = ext->datalen;
3694 newext2->unique = ext->unique;
3695 newext2->ignore = false;
3696 newext2->inserted = true;
3697 newext2->csum = NULL;
3698 add_extent(fcb, &newext1->list_entry, newext2);
3699
3700 add_insert_extent_rollback(rollback, fcb, newext2);
3701
3702 c = get_chunk_from_address(fcb->Vcb, ed2->address);
3703
3704 if (!c)
3705 ERR("get_chunk_from_address(%I64x) failed\n", ed2->address);
3706 else {
3707 Status = update_changed_extent_ref(fcb->Vcb, c, ed2->address, ed2->size, fcb->subvol->id, fcb->inode, ext->offset - ed2->offset, 1,
3708 fcb->inode_item.flags & BTRFS_INODE_NODATASUM, false, Irp);
3709
3710 if (!NT_SUCCESS(Status)) {
3711 ERR("update_changed_extent_ref returned %08lx\n", Status);
3712 return Status;
3713 }
3714 }
3715
3716 remove_fcb_extent(fcb, ext, rollback);
3717 } else if (start_data > ext->offset && end_data >= ext->offset + ed2->num_bytes) { // replace end
3718 EXTENT_DATA2* ned2;
3719 extent *newext1, *newext2;
3720
3721 newext1 = ExAllocatePoolWithTag(PagedPool, offsetof(extent, extent_data) + ext->datalen, ALLOC_TAG);
3722 if (!newext1) {
3723 ERR("out of memory\n");
3724 return STATUS_INSUFFICIENT_RESOURCES;
3725 }
3726
3727 newext2 = ExAllocatePoolWithTag(PagedPool, offsetof(extent, extent_data) + ext->datalen, ALLOC_TAG);
3728 if (!newext2) {
3729 ERR("out of memory\n");
3730 ExFreePool(newext1);
3731 return STATUS_INSUFFICIENT_RESOURCES;
3732 }
3733
3734 RtlCopyMemory(&newext1->extent_data, &ext->extent_data, ext->datalen);
3735
3736 ned2 = (EXTENT_DATA2*)newext1->extent_data.data;
3737 ned2->num_bytes = start_data - ext->offset;
3738
3739 RtlCopyMemory(&newext2->extent_data, &ext->extent_data, ext->datalen);
3740
3741 newext2->extent_data.type = EXTENT_TYPE_REGULAR;
3742 ned2 = (EXTENT_DATA2*)newext2->extent_data.data;
3743 ned2->offset += start_data - ext->offset;
3744 ned2->num_bytes = ext->offset + ed2->num_bytes - start_data;
3745
3746 Status = write_data_complete(fcb->Vcb, ed2->address + ned2->offset, data, (uint32_t)ned2->num_bytes, Irp, NULL, file_write, irp_offset, priority);
3747 if (!NT_SUCCESS(Status)) {
3748 ERR("write_data_complete returned %08lx\n", Status);
3749 ExFreePool(newext1);
3750 ExFreePool(newext2);
3751 return Status;
3752 }
3753
3754 if (!(fcb->inode_item.flags & BTRFS_INODE_NODATASUM)) {
3755 ULONG sl = (ULONG)(ned2->num_bytes >> fcb->Vcb->sector_shift);
3756 void* csum = ExAllocatePoolWithTag(PagedPool, sl * fcb->Vcb->csum_size, ALLOC_TAG);
3757
3758 if (!csum) {
3759 ERR("out of memory\n");
3760 ExFreePool(newext1);
3761 ExFreePool(newext2);
3762 return STATUS_INSUFFICIENT_RESOURCES;
3763 }
3764
3765 do_calc_job(fcb->Vcb, data, sl, csum);
3766
3767 newext2->csum = csum;
3768 } else
3769 newext2->csum = NULL;
3770
3771 *written = ned2->num_bytes;
3772
3773 newext1->offset = ext->offset;
3774 newext1->datalen = ext->datalen;
3775 newext1->unique = ext->unique;
3776 newext1->ignore = false;
3777 newext1->inserted = true;
3778 newext1->csum = NULL;
3779 InsertHeadList(&ext->list_entry, &newext1->list_entry);
3780
3781 add_insert_extent_rollback(rollback, fcb, newext1);
3782
3783 newext2->offset = start_data;
3784 newext2->datalen = ext->datalen;
3785 newext2->unique = ext->unique;
3786 newext2->ignore = false;
3787 newext2->inserted = true;
3788 add_extent(fcb, &newext1->list_entry, newext2);
3789
3790 add_insert_extent_rollback(rollback, fcb, newext2);
3791
3792 c = get_chunk_from_address(fcb->Vcb, ed2->address);
3793
3794 if (!c)
3795 ERR("get_chunk_from_address(%I64x) failed\n", ed2->address);
3796 else {
3797 Status = update_changed_extent_ref(fcb->Vcb, c, ed2->address, ed2->size, fcb->subvol->id, fcb->inode, ext->offset - ed2->offset, 1,
3798 fcb->inode_item.flags & BTRFS_INODE_NODATASUM, false, Irp);
3799
3800 if (!NT_SUCCESS(Status)) {
3801 ERR("update_changed_extent_ref returned %08lx\n", Status);
3802 return Status;
3803 }
3804 }
3805
3806 remove_fcb_extent(fcb, ext, rollback);
3807 } else if (start_data > ext->offset && end_data < ext->offset + ed2->num_bytes) { // replace middle
3808 EXTENT_DATA2* ned2;
3809 extent *newext1, *newext2, *newext3;
3810
3811 newext1 = ExAllocatePoolWithTag(PagedPool, offsetof(extent, extent_data) + ext->datalen, ALLOC_TAG);
3812 if (!newext1) {
3813 ERR("out of memory\n");
3814 return STATUS_INSUFFICIENT_RESOURCES;
3815 }
3816
3817 newext2 = ExAllocatePoolWithTag(PagedPool, offsetof(extent, extent_data) + ext->datalen, ALLOC_TAG);
3818 if (!newext2) {
3819 ERR("out of memory\n");
3820 ExFreePool(newext1);
3821 return STATUS_INSUFFICIENT_RESOURCES;
3822 }
3823
3824 newext3 = ExAllocatePoolWithTag(PagedPool, offsetof(extent, extent_data) + ext->datalen, ALLOC_TAG);
3825 if (!newext3) {
3826 ERR("out of memory\n");
3827 ExFreePool(newext1);
3828 ExFreePool(newext2);
3829 return STATUS_INSUFFICIENT_RESOURCES;
3830 }
3831
3832 RtlCopyMemory(&newext1->extent_data, &ext->extent_data, ext->datalen);
3833 RtlCopyMemory(&newext2->extent_data, &ext->extent_data, ext->datalen);
3834 RtlCopyMemory(&newext3->extent_data, &ext->extent_data, ext->datalen);
3835
3836 ned2 = (EXTENT_DATA2*)newext1->extent_data.data;
3837 ned2->num_bytes = start_data - ext->offset;
3838
3839 newext2->extent_data.type = EXTENT_TYPE_REGULAR;
3840 ned2 = (EXTENT_DATA2*)newext2->extent_data.data;
3841 ned2->offset += start_data - ext->offset;
3842 ned2->num_bytes = end_data - start_data;
3843
3844 ned2 = (EXTENT_DATA2*)newext3->extent_data.data;
3845 ned2->offset += end_data - ext->offset;
3846 ned2->num_bytes -= end_data - ext->offset;
3847
3848 ned2 = (EXTENT_DATA2*)newext2->extent_data.data;
3849 Status = write_data_complete(fcb->Vcb, ed2->address + ned2->offset, data, (uint32_t)(end_data - start_data), Irp, NULL, file_write, irp_offset, priority);
3850 if (!NT_SUCCESS(Status)) {
3851 ERR("write_data_complete returned %08lx\n", Status);
3852 ExFreePool(newext1);
3853 ExFreePool(newext2);
3854 ExFreePool(newext3);
3855 return Status;
3856 }
3857
3858 if (!(fcb->inode_item.flags & BTRFS_INODE_NODATASUM)) {
3859 ULONG sl = (ULONG)((end_data - start_data) >> fcb->Vcb->sector_shift);
3860 void* csum = ExAllocatePoolWithTag(PagedPool, sl * fcb->Vcb->csum_size, ALLOC_TAG);
3861
3862 if (!csum) {
3863 ERR("out of memory\n");
3864 ExFreePool(newext1);
3865 ExFreePool(newext2);
3866 ExFreePool(newext3);
3867 return STATUS_INSUFFICIENT_RESOURCES;
3868 }
3869
3870 do_calc_job(fcb->Vcb, data, sl, csum);
3871
3872 newext2->csum = csum;
3873 } else
3874 newext2->csum = NULL;
3875
3876 *written = end_data - start_data;
3877
3878 newext1->offset = ext->offset;
3879 newext1->datalen = ext->datalen;
3880 newext1->unique = ext->unique;
3881 newext1->ignore = false;
3882 newext1->inserted = true;
3883 newext1->csum = NULL;
3884 InsertHeadList(&ext->list_entry, &newext1->list_entry);
3885
3886 add_insert_extent_rollback(rollback, fcb, newext1);
3887
3888 newext2->offset = start_data;
3889 newext2->datalen = ext->datalen;
3890 newext2->unique = ext->unique;
3891 newext2->ignore = false;
3892 newext2->inserted = true;
3893 add_extent(fcb, &newext1->list_entry, newext2);
3894
3895 add_insert_extent_rollback(rollback, fcb, newext2);
3896
3897 newext3->offset = end_data;
3898 newext3->datalen = ext->datalen;
3899 newext3->unique = ext->unique;
3900 newext3->ignore = false;
3901 newext3->inserted = true;
3902 newext3->csum = NULL;
3903 add_extent(fcb, &newext2->list_entry, newext3);
3904
3905 add_insert_extent_rollback(rollback, fcb, newext3);
3906
3907 c = get_chunk_from_address(fcb->Vcb, ed2->address);
3908
3909 if (!c)
3910 ERR("get_chunk_from_address(%I64x) failed\n", ed2->address);
3911 else {
3912 Status = update_changed_extent_ref(fcb->Vcb, c, ed2->address, ed2->size, fcb->subvol->id, fcb->inode, ext->offset - ed2->offset, 2,
3913 fcb->inode_item.flags & BTRFS_INODE_NODATASUM, false, Irp);
3914
3915 if (!NT_SUCCESS(Status)) {
3916 ERR("update_changed_extent_ref returned %08lx\n", Status);
3917 return Status;
3918 }
3919 }
3920
3921 remove_fcb_extent(fcb, ext, rollback);
3922 }
3923
3924 if (c)
3925 c->changed = true;
3926
3927 return STATUS_SUCCESS;
3928 }
3929
3930 __attribute__((nonnull(1, 4)))
do_write_file(fcb * fcb,uint64_t start,uint64_t end_data,void * data,PIRP Irp,bool file_write,uint32_t irp_offset,LIST_ENTRY * rollback)3931 NTSTATUS do_write_file(fcb* fcb, uint64_t start, uint64_t end_data, void* data, PIRP Irp, bool file_write, uint32_t irp_offset, LIST_ENTRY* rollback) {
3932 NTSTATUS Status;
3933 LIST_ENTRY *le, *le2;
3934 uint64_t written = 0, length = end_data - start;
3935 uint64_t last_cow_start;
3936 ULONG priority = fcb->Header.Flags2 & FSRTL_FLAG2_IS_PAGING_FILE ? HighPagePriority : NormalPagePriority;
3937 #ifdef DEBUG_PARANOID
3938 uint64_t last_off;
3939 #endif
3940 bool extents_changed = false;
3941
3942 last_cow_start = 0;
3943
3944 le = fcb->extents.Flink;
3945 while (le != &fcb->extents) {
3946 extent* ext = CONTAINING_RECORD(le, extent, list_entry);
3947
3948 le2 = le->Flink;
3949
3950 if (!ext->ignore) {
3951 EXTENT_DATA* ed = &ext->extent_data;
3952 uint64_t len;
3953
3954 if (ed->type == EXTENT_TYPE_INLINE)
3955 len = ed->decoded_size;
3956 else
3957 len = ((EXTENT_DATA2*)ed->data)->num_bytes;
3958
3959 if (ext->offset + len <= start)
3960 goto nextitem;
3961
3962 if (ext->offset > start + written + length)
3963 break;
3964
3965 if ((fcb->inode_item.flags & BTRFS_INODE_NODATACOW || ed->type == EXTENT_TYPE_PREALLOC) && ext->unique && ed->compression == BTRFS_COMPRESSION_NONE) {
3966 if (max(last_cow_start, start + written) < ext->offset) {
3967 uint64_t start_write = max(last_cow_start, start + written);
3968
3969 extents_changed = true;
3970
3971 Status = excise_extents(fcb->Vcb, fcb, start_write, ext->offset, Irp, rollback);
3972 if (!NT_SUCCESS(Status)) {
3973 ERR("excise_extents returned %08lx\n", Status);
3974 return Status;
3975 }
3976
3977 Status = insert_extent(fcb->Vcb, fcb, start_write, ext->offset - start_write, (uint8_t*)data + written, Irp, file_write, irp_offset + written, rollback);
3978 if (!NT_SUCCESS(Status)) {
3979 ERR("insert_extent returned %08lx\n", Status);
3980 return Status;
3981 }
3982
3983 written += ext->offset - start_write;
3984 length -= ext->offset - start_write;
3985
3986 if (length == 0)
3987 break;
3988 }
3989
3990 if (ed->type == EXTENT_TYPE_REGULAR) {
3991 EXTENT_DATA2* ed2 = (EXTENT_DATA2*)ed->data;
3992 uint64_t writeaddr = ed2->address + ed2->offset + start + written - ext->offset;
3993 uint64_t write_len = min(len, length);
3994 chunk* c;
3995
3996 TRACE("doing non-COW write to %I64x\n", writeaddr);
3997
3998 Status = write_data_complete(fcb->Vcb, writeaddr, (uint8_t*)data + written, (uint32_t)write_len, Irp, NULL, file_write, irp_offset + written, priority);
3999 if (!NT_SUCCESS(Status)) {
4000 ERR("write_data_complete returned %08lx\n", Status);
4001 return Status;
4002 }
4003
4004 c = get_chunk_from_address(fcb->Vcb, writeaddr);
4005 if (c)
4006 c->changed = true;
4007
4008 // This shouldn't ever get called - nocow files should always also be nosum.
4009 if (!(fcb->inode_item.flags & BTRFS_INODE_NODATASUM)) {
4010 do_calc_job(fcb->Vcb, (uint8_t*)data + written, (uint32_t)(write_len >> fcb->Vcb->sector_shift),
4011 (uint8_t*)ext->csum + (((start + written - ext->offset) * fcb->Vcb->csum_size) >> fcb->Vcb->sector_shift));
4012
4013 ext->inserted = true;
4014 extents_changed = true;
4015 }
4016
4017 written += write_len;
4018 length -= write_len;
4019
4020 if (length == 0)
4021 break;
4022 } else if (ed->type == EXTENT_TYPE_PREALLOC) {
4023 uint64_t write_len;
4024
4025 Status = do_write_file_prealloc(fcb, ext, start + written, end_data, (uint8_t*)data + written, &write_len,
4026 Irp, file_write, irp_offset + written, priority, rollback);
4027 if (!NT_SUCCESS(Status)) {
4028 ERR("do_write_file_prealloc returned %08lx\n", Status);
4029 return Status;
4030 }
4031
4032 extents_changed = true;
4033
4034 written += write_len;
4035 length -= write_len;
4036
4037 if (length == 0)
4038 break;
4039 }
4040
4041 last_cow_start = ext->offset + len;
4042 }
4043 }
4044
4045 nextitem:
4046 le = le2;
4047 }
4048
4049 if (length > 0) {
4050 uint64_t start_write = max(last_cow_start, start + written);
4051
4052 extents_changed = true;
4053
4054 Status = excise_extents(fcb->Vcb, fcb, start_write, end_data, Irp, rollback);
4055 if (!NT_SUCCESS(Status)) {
4056 ERR("excise_extents returned %08lx\n", Status);
4057 return Status;
4058 }
4059
4060 Status = insert_extent(fcb->Vcb, fcb, start_write, end_data - start_write, (uint8_t*)data + written, Irp, file_write, irp_offset + written, rollback);
4061 if (!NT_SUCCESS(Status)) {
4062 ERR("insert_extent returned %08lx\n", Status);
4063 return Status;
4064 }
4065 }
4066
4067 #ifdef DEBUG_PARANOID
4068 last_off = 0xffffffffffffffff;
4069
4070 le = fcb->extents.Flink;
4071 while (le != &fcb->extents) {
4072 extent* ext = CONTAINING_RECORD(le, extent, list_entry);
4073
4074 if (!ext->ignore) {
4075 if (ext->offset == last_off) {
4076 ERR("offset %I64x duplicated\n", ext->offset);
4077 int3;
4078 } else if (ext->offset < last_off && last_off != 0xffffffffffffffff) {
4079 ERR("offsets out of order\n");
4080 int3;
4081 }
4082
4083 last_off = ext->offset;
4084 }
4085
4086 le = le->Flink;
4087 }
4088 #endif
4089
4090 if (extents_changed) {
4091 fcb->extents_changed = true;
4092 mark_fcb_dirty(fcb);
4093 }
4094
4095 return STATUS_SUCCESS;
4096 }
4097
4098 __attribute__((nonnull(1,2,4,5,11)))
write_file2(device_extension * Vcb,PIRP Irp,LARGE_INTEGER offset,void * buf,ULONG * length,bool paging_io,bool no_cache,bool wait,bool deferred_write,bool write_irp,LIST_ENTRY * rollback)4099 NTSTATUS write_file2(device_extension* Vcb, PIRP Irp, LARGE_INTEGER offset, void* buf, ULONG* length, bool paging_io, bool no_cache,
4100 bool wait, bool deferred_write, bool write_irp, LIST_ENTRY* rollback) {
4101 PIO_STACK_LOCATION IrpSp = IoGetCurrentIrpStackLocation(Irp);
4102 PFILE_OBJECT FileObject = IrpSp->FileObject;
4103 EXTENT_DATA* ed2;
4104 uint64_t off64, newlength, start_data, end_data;
4105 uint32_t bufhead;
4106 bool make_inline;
4107 INODE_ITEM* origii;
4108 bool changed_length = false;
4109 NTSTATUS Status;
4110 LARGE_INTEGER time;
4111 BTRFS_TIME now;
4112 fcb* fcb;
4113 ccb* ccb;
4114 file_ref* fileref;
4115 bool paging_lock = false, acquired_fcb_lock = false, acquired_tree_lock = false, pagefile;
4116 ULONG filter = 0;
4117
4118 TRACE("(%p, %p, %I64x, %p, %lx, %u, %u)\n", Vcb, FileObject, offset.QuadPart, buf, *length, paging_io, no_cache);
4119
4120 if (*length == 0) {
4121 TRACE("returning success for zero-length write\n");
4122 return STATUS_SUCCESS;
4123 }
4124
4125 if (!FileObject) {
4126 ERR("error - FileObject was NULL\n");
4127 return STATUS_ACCESS_DENIED;
4128 }
4129
4130 fcb = FileObject->FsContext;
4131 ccb = FileObject->FsContext2;
4132 fileref = ccb ? ccb->fileref : NULL;
4133
4134 if (!fcb->ads && fcb->type != BTRFS_TYPE_FILE && fcb->type != BTRFS_TYPE_SYMLINK) {
4135 WARN("tried to write to something other than a file or symlink (inode %I64x, type %u, %p, %p)\n", fcb->inode, fcb->type, &fcb->type, fcb);
4136 return STATUS_INVALID_DEVICE_REQUEST;
4137 }
4138
4139 if (offset.LowPart == FILE_WRITE_TO_END_OF_FILE && offset.HighPart == -1)
4140 offset = fcb->Header.FileSize;
4141
4142 off64 = offset.QuadPart;
4143
4144 TRACE("fcb->Header.Flags = %x\n", fcb->Header.Flags);
4145
4146 if (!no_cache && !CcCanIWrite(FileObject, *length, wait, deferred_write))
4147 return STATUS_PENDING;
4148
4149 if (!wait && no_cache)
4150 return STATUS_PENDING;
4151
4152 if (no_cache && !paging_io && FileObject->SectionObjectPointer->DataSectionObject) {
4153 IO_STATUS_BLOCK iosb;
4154
4155 ExAcquireResourceExclusiveLite(fcb->Header.PagingIoResource, true);
4156
4157 CcFlushCache(FileObject->SectionObjectPointer, &offset, *length, &iosb);
4158
4159 if (!NT_SUCCESS(iosb.Status)) {
4160 ExReleaseResourceLite(fcb->Header.PagingIoResource);
4161 ERR("CcFlushCache returned %08lx\n", iosb.Status);
4162 return iosb.Status;
4163 }
4164
4165 paging_lock = true;
4166
4167 CcPurgeCacheSection(FileObject->SectionObjectPointer, &offset, *length, false);
4168 }
4169
4170 if (paging_io) {
4171 if (!ExAcquireResourceSharedLite(fcb->Header.PagingIoResource, wait)) {
4172 Status = STATUS_PENDING;
4173 goto end;
4174 } else
4175 paging_lock = true;
4176 }
4177
4178 pagefile = fcb->Header.Flags2 & FSRTL_FLAG2_IS_PAGING_FILE && paging_io;
4179
4180 if (!pagefile && !ExIsResourceAcquiredExclusiveLite(&Vcb->tree_lock)) {
4181 if (!ExAcquireResourceSharedLite(&Vcb->tree_lock, wait)) {
4182 Status = STATUS_PENDING;
4183 goto end;
4184 } else
4185 acquired_tree_lock = true;
4186 }
4187
4188 if (pagefile) {
4189 if (!ExAcquireResourceSharedLite(fcb->Header.Resource, wait)) {
4190 Status = STATUS_PENDING;
4191 goto end;
4192 } else
4193 acquired_fcb_lock = true;
4194 } else if (!ExIsResourceAcquiredExclusiveLite(fcb->Header.Resource)) {
4195 if (!ExAcquireResourceExclusiveLite(fcb->Header.Resource, wait)) {
4196 Status = STATUS_PENDING;
4197 goto end;
4198 } else
4199 acquired_fcb_lock = true;
4200 }
4201
4202 newlength = fcb->ads ? fcb->adsdata.Length : fcb->inode_item.st_size;
4203
4204 if (fcb->deleted)
4205 newlength = 0;
4206
4207 TRACE("newlength = %I64x\n", newlength);
4208
4209 if (off64 + *length > newlength) {
4210 if (paging_io) {
4211 if (off64 >= newlength) {
4212 TRACE("paging IO tried to write beyond end of file (file size = %I64x, offset = %I64x, length = %lx)\n", newlength, off64, *length);
4213 TRACE("FileObject: AllocationSize = %I64x, FileSize = %I64x, ValidDataLength = %I64x\n",
4214 fcb->Header.AllocationSize.QuadPart, fcb->Header.FileSize.QuadPart, fcb->Header.ValidDataLength.QuadPart);
4215 Irp->IoStatus.Information = 0;
4216 Status = STATUS_SUCCESS;
4217 goto end;
4218 }
4219
4220 *length = (ULONG)(newlength - off64);
4221 } else {
4222 newlength = off64 + *length;
4223 changed_length = true;
4224
4225 TRACE("extending length to %I64x\n", newlength);
4226 }
4227 }
4228
4229 if (fcb->ads)
4230 make_inline = false;
4231 else
4232 make_inline = newlength <= fcb->Vcb->options.max_inline;
4233
4234 if (changed_length) {
4235 if (newlength > (uint64_t)fcb->Header.AllocationSize.QuadPart) {
4236 if (!acquired_tree_lock) {
4237 // We need to acquire the tree lock if we don't have it already -
4238 // we can't give an inline file proper extents at the same time as we're
4239 // doing a flush.
4240 if (!ExAcquireResourceSharedLite(&Vcb->tree_lock, wait)) {
4241 Status = STATUS_PENDING;
4242 goto end;
4243 } else
4244 acquired_tree_lock = true;
4245 }
4246
4247 Status = extend_file(fcb, fileref, newlength, false, Irp, rollback);
4248 if (!NT_SUCCESS(Status)) {
4249 ERR("extend_file returned %08lx\n", Status);
4250 goto end;
4251 }
4252 } else if (!fcb->ads)
4253 fcb->inode_item.st_size = newlength;
4254
4255 fcb->Header.FileSize.QuadPart = newlength;
4256 fcb->Header.ValidDataLength.QuadPart = newlength;
4257
4258 TRACE("AllocationSize = %I64x\n", fcb->Header.AllocationSize.QuadPart);
4259 TRACE("FileSize = %I64x\n", fcb->Header.FileSize.QuadPart);
4260 TRACE("ValidDataLength = %I64x\n", fcb->Header.ValidDataLength.QuadPart);
4261 }
4262
4263 if (!no_cache) {
4264 Status = STATUS_SUCCESS;
4265
4266 _SEH2_TRY {
4267 if (!FileObject->PrivateCacheMap || changed_length) {
4268 CC_FILE_SIZES ccfs;
4269
4270 ccfs.AllocationSize = fcb->Header.AllocationSize;
4271 ccfs.FileSize = fcb->Header.FileSize;
4272 ccfs.ValidDataLength = fcb->Header.ValidDataLength;
4273
4274 if (!FileObject->PrivateCacheMap)
4275 init_file_cache(FileObject, &ccfs);
4276
4277 CcSetFileSizes(FileObject, &ccfs);
4278 }
4279
4280 if (IrpSp->MinorFunction & IRP_MN_MDL) {
4281 CcPrepareMdlWrite(FileObject, &offset, *length, &Irp->MdlAddress, &Irp->IoStatus);
4282
4283 Status = Irp->IoStatus.Status;
4284 goto end;
4285 } else {
4286 /* We have to wait in CcCopyWrite - if we return STATUS_PENDING and add this to the work queue,
4287 * it can result in CcFlushCache being called before the job has run. See ifstest ReadWriteTest. */
4288
4289 if (fCcCopyWriteEx) {
4290 TRACE("CcCopyWriteEx(%p, %I64x, %lx, %u, %p, %p)\n", FileObject, off64, *length, true, buf, Irp->Tail.Overlay.Thread);
4291 if (!fCcCopyWriteEx(FileObject, &offset, *length, true, buf, Irp->Tail.Overlay.Thread)) {
4292 Status = STATUS_PENDING;
4293 goto end;
4294 }
4295 TRACE("CcCopyWriteEx finished\n");
4296 } else {
4297 TRACE("CcCopyWrite(%p, %I64x, %lx, %u, %p)\n", FileObject, off64, *length, true, buf);
4298 if (!CcCopyWrite(FileObject, &offset, *length, true, buf)) {
4299 Status = STATUS_PENDING;
4300 goto end;
4301 }
4302 TRACE("CcCopyWrite finished\n");
4303 }
4304
4305 Irp->IoStatus.Information = *length;
4306 }
4307 } _SEH2_EXCEPT (EXCEPTION_EXECUTE_HANDLER) {
4308 Status = _SEH2_GetExceptionCode();
4309 } _SEH2_END;
4310
4311 if (changed_length) {
4312 queue_notification_fcb(fcb->ads ? fileref->parent : fileref, fcb->ads ? FILE_NOTIFY_CHANGE_STREAM_SIZE : FILE_NOTIFY_CHANGE_SIZE,
4313 fcb->ads ? FILE_ACTION_MODIFIED_STREAM : FILE_ACTION_MODIFIED, fcb->ads && fileref->dc ? &fileref->dc->name : NULL);
4314 }
4315
4316 goto end;
4317 }
4318
4319 if (fcb->ads) {
4320 if (changed_length) {
4321 char* data2;
4322
4323 if (newlength > fcb->adsmaxlen) {
4324 ERR("error - xattr too long (%I64u > %lu)\n", newlength, fcb->adsmaxlen);
4325 Status = STATUS_DISK_FULL;
4326 goto end;
4327 }
4328
4329 data2 = ExAllocatePoolWithTag(PagedPool, (ULONG)newlength, ALLOC_TAG);
4330 if (!data2) {
4331 ERR("out of memory\n");
4332 Status = STATUS_INSUFFICIENT_RESOURCES;
4333 goto end;
4334 }
4335
4336 if (fcb->adsdata.Buffer) {
4337 RtlCopyMemory(data2, fcb->adsdata.Buffer, fcb->adsdata.Length);
4338 ExFreePool(fcb->adsdata.Buffer);
4339 }
4340
4341 if (newlength > fcb->adsdata.Length)
4342 RtlZeroMemory(&data2[fcb->adsdata.Length], (ULONG)(newlength - fcb->adsdata.Length));
4343
4344
4345 fcb->adsdata.Buffer = data2;
4346 fcb->adsdata.Length = fcb->adsdata.MaximumLength = (USHORT)newlength;
4347
4348 fcb->Header.AllocationSize.QuadPart = newlength;
4349 fcb->Header.FileSize.QuadPart = newlength;
4350 fcb->Header.ValidDataLength.QuadPart = newlength;
4351 }
4352
4353 if (*length > 0)
4354 RtlCopyMemory(&fcb->adsdata.Buffer[off64], buf, *length);
4355
4356 fcb->Header.ValidDataLength.QuadPart = newlength;
4357
4358 mark_fcb_dirty(fcb);
4359
4360 if (fileref)
4361 mark_fileref_dirty(fileref);
4362 } else {
4363 bool compress = write_fcb_compressed(fcb), no_buf = false;
4364 uint8_t* data;
4365
4366 if (make_inline) {
4367 start_data = 0;
4368 end_data = sector_align(newlength, fcb->Vcb->superblock.sector_size);
4369 bufhead = sizeof(EXTENT_DATA) - 1;
4370 } else if (compress) {
4371 start_data = off64 & ~(uint64_t)(COMPRESSED_EXTENT_SIZE - 1);
4372 end_data = min(sector_align(off64 + *length, COMPRESSED_EXTENT_SIZE),
4373 sector_align(newlength, fcb->Vcb->superblock.sector_size));
4374 bufhead = 0;
4375 } else {
4376 start_data = off64 & ~(uint64_t)(fcb->Vcb->superblock.sector_size - 1);
4377 end_data = sector_align(off64 + *length, fcb->Vcb->superblock.sector_size);
4378 bufhead = 0;
4379 }
4380
4381 if (fcb_is_inline(fcb))
4382 end_data = max(end_data, sector_align(fcb->inode_item.st_size, Vcb->superblock.sector_size));
4383
4384 fcb->Header.ValidDataLength.QuadPart = newlength;
4385 TRACE("fcb %p FileSize = %I64x\n", fcb, fcb->Header.FileSize.QuadPart);
4386
4387 if (!make_inline && !compress && off64 == start_data && off64 + *length == end_data) {
4388 data = buf;
4389 no_buf = true;
4390 } else {
4391 data = ExAllocatePoolWithTag(PagedPool, (ULONG)(end_data - start_data + bufhead), ALLOC_TAG);
4392 if (!data) {
4393 ERR("out of memory\n");
4394 Status = STATUS_INSUFFICIENT_RESOURCES;
4395 goto end;
4396 }
4397
4398 RtlZeroMemory(data + bufhead, (ULONG)(end_data - start_data));
4399
4400 TRACE("start_data = %I64x\n", start_data);
4401 TRACE("end_data = %I64x\n", end_data);
4402
4403 if (off64 > start_data || off64 + *length < end_data) {
4404 if (changed_length) {
4405 if (fcb->inode_item.st_size > start_data)
4406 Status = read_file(fcb, data + bufhead, start_data, fcb->inode_item.st_size - start_data, NULL, Irp);
4407 else
4408 Status = STATUS_SUCCESS;
4409 } else
4410 Status = read_file(fcb, data + bufhead, start_data, end_data - start_data, NULL, Irp);
4411
4412 if (!NT_SUCCESS(Status)) {
4413 ERR("read_file returned %08lx\n", Status);
4414 ExFreePool(data);
4415 goto end;
4416 }
4417 }
4418
4419 RtlCopyMemory(data + bufhead + off64 - start_data, buf, *length);
4420 }
4421
4422 if (make_inline) {
4423 Status = excise_extents(fcb->Vcb, fcb, start_data, end_data, Irp, rollback);
4424 if (!NT_SUCCESS(Status)) {
4425 ERR("error - excise_extents returned %08lx\n", Status);
4426 ExFreePool(data);
4427 goto end;
4428 }
4429
4430 ed2 = (EXTENT_DATA*)data;
4431 ed2->generation = fcb->Vcb->superblock.generation;
4432 ed2->decoded_size = newlength;
4433 ed2->compression = BTRFS_COMPRESSION_NONE;
4434 ed2->encryption = BTRFS_ENCRYPTION_NONE;
4435 ed2->encoding = BTRFS_ENCODING_NONE;
4436 ed2->type = EXTENT_TYPE_INLINE;
4437
4438 Status = add_extent_to_fcb(fcb, 0, ed2, (uint16_t)(offsetof(EXTENT_DATA, data[0]) + newlength), false, NULL, rollback);
4439 if (!NT_SUCCESS(Status)) {
4440 ERR("add_extent_to_fcb returned %08lx\n", Status);
4441 ExFreePool(data);
4442 goto end;
4443 }
4444
4445 fcb->inode_item.st_blocks += newlength;
4446 } else if (compress) {
4447 Status = write_compressed(fcb, start_data, end_data, data, Irp, rollback);
4448
4449 if (!NT_SUCCESS(Status)) {
4450 ERR("write_compressed returned %08lx\n", Status);
4451 ExFreePool(data);
4452 goto end;
4453 }
4454 } else {
4455 if (write_irp && Irp->MdlAddress && no_buf) {
4456 bool locked = Irp->MdlAddress->MdlFlags & (MDL_PAGES_LOCKED | MDL_PARTIAL);
4457
4458 if (!locked) {
4459 Status = STATUS_SUCCESS;
4460
4461 _SEH2_TRY {
4462 MmProbeAndLockPages(Irp->MdlAddress, KernelMode, IoReadAccess);
4463 } _SEH2_EXCEPT (EXCEPTION_EXECUTE_HANDLER) {
4464 Status = _SEH2_GetExceptionCode();
4465 } _SEH2_END;
4466
4467 if (!NT_SUCCESS(Status)) {
4468 ERR("MmProbeAndLockPages threw exception %08lx\n", Status);
4469 goto end;
4470 }
4471 }
4472
4473 _SEH2_TRY {
4474 Status = do_write_file(fcb, start_data, end_data, data, Irp, true, 0, rollback);
4475 } _SEH2_EXCEPT (EXCEPTION_EXECUTE_HANDLER) {
4476 Status = _SEH2_GetExceptionCode();
4477 } _SEH2_END;
4478
4479 if (!locked)
4480 MmUnlockPages(Irp->MdlAddress);
4481 } else {
4482 _SEH2_TRY {
4483 Status = do_write_file(fcb, start_data, end_data, data, Irp, false, 0, rollback);
4484 } _SEH2_EXCEPT (EXCEPTION_EXECUTE_HANDLER) {
4485 Status = _SEH2_GetExceptionCode();
4486 } _SEH2_END;
4487 }
4488
4489 if (!NT_SUCCESS(Status)) {
4490 ERR("do_write_file returned %08lx\n", Status);
4491 if (!no_buf) ExFreePool(data);
4492 goto end;
4493 }
4494 }
4495
4496 if (!no_buf)
4497 ExFreePool(data);
4498 }
4499
4500 KeQuerySystemTime(&time);
4501 win_time_to_unix(time, &now);
4502
4503 if (!pagefile) {
4504 if (fcb->ads) {
4505 if (fileref && fileref->parent)
4506 origii = &fileref->parent->fcb->inode_item;
4507 else {
4508 ERR("no parent fcb found for stream\n");
4509 Status = STATUS_INTERNAL_ERROR;
4510 goto end;
4511 }
4512 } else
4513 origii = &fcb->inode_item;
4514
4515 origii->transid = Vcb->superblock.generation;
4516 origii->sequence++;
4517
4518 if (!ccb->user_set_change_time)
4519 origii->st_ctime = now;
4520
4521 if (!fcb->ads) {
4522 if (changed_length) {
4523 TRACE("setting st_size to %I64x\n", newlength);
4524 origii->st_size = newlength;
4525 filter |= FILE_NOTIFY_CHANGE_SIZE;
4526 }
4527
4528 fcb->inode_item_changed = true;
4529 } else {
4530 fileref->parent->fcb->inode_item_changed = true;
4531
4532 if (changed_length)
4533 filter |= FILE_NOTIFY_CHANGE_STREAM_SIZE;
4534
4535 filter |= FILE_NOTIFY_CHANGE_STREAM_WRITE;
4536 }
4537
4538 if (!ccb->user_set_write_time) {
4539 origii->st_mtime = now;
4540 filter |= FILE_NOTIFY_CHANGE_LAST_WRITE;
4541 }
4542
4543 mark_fcb_dirty(fcb->ads ? fileref->parent->fcb : fcb);
4544 }
4545
4546 if (changed_length) {
4547 CC_FILE_SIZES ccfs;
4548
4549 ccfs.AllocationSize = fcb->Header.AllocationSize;
4550 ccfs.FileSize = fcb->Header.FileSize;
4551 ccfs.ValidDataLength = fcb->Header.ValidDataLength;
4552
4553 _SEH2_TRY {
4554 CcSetFileSizes(FileObject, &ccfs);
4555 } _SEH2_EXCEPT (EXCEPTION_EXECUTE_HANDLER) {
4556 Status = _SEH2_GetExceptionCode();
4557 goto end;
4558 } _SEH2_END;
4559 }
4560
4561 fcb->subvol->root_item.ctransid = Vcb->superblock.generation;
4562 fcb->subvol->root_item.ctime = now;
4563
4564 Status = STATUS_SUCCESS;
4565 Irp->IoStatus.Information = *length;
4566
4567 if (filter != 0)
4568 queue_notification_fcb(fcb->ads ? fileref->parent : fileref, filter, fcb->ads ? FILE_ACTION_MODIFIED_STREAM : FILE_ACTION_MODIFIED,
4569 fcb->ads && fileref->dc ? &fileref->dc->name : NULL);
4570
4571 end:
4572 if (NT_SUCCESS(Status) && FileObject->Flags & FO_SYNCHRONOUS_IO && !paging_io) {
4573 TRACE("CurrentByteOffset was: %I64x\n", FileObject->CurrentByteOffset.QuadPart);
4574 FileObject->CurrentByteOffset.QuadPart = offset.QuadPart + (NT_SUCCESS(Status) ? *length : 0);
4575 TRACE("CurrentByteOffset now: %I64x\n", FileObject->CurrentByteOffset.QuadPart);
4576 }
4577
4578 if (acquired_fcb_lock)
4579 ExReleaseResourceLite(fcb->Header.Resource);
4580
4581 if (acquired_tree_lock)
4582 ExReleaseResourceLite(&Vcb->tree_lock);
4583
4584 if (paging_lock)
4585 ExReleaseResourceLite(fcb->Header.PagingIoResource);
4586
4587 return Status;
4588 }
4589
4590 __attribute__((nonnull(1,2)))
write_file(device_extension * Vcb,PIRP Irp,bool wait,bool deferred_write)4591 NTSTATUS write_file(device_extension* Vcb, PIRP Irp, bool wait, bool deferred_write) {
4592 PIO_STACK_LOCATION IrpSp = IoGetCurrentIrpStackLocation(Irp);
4593 void* buf;
4594 NTSTATUS Status;
4595 LARGE_INTEGER offset = IrpSp->Parameters.Write.ByteOffset;
4596 PFILE_OBJECT FileObject = IrpSp->FileObject;
4597 fcb* fcb = FileObject ? FileObject->FsContext : NULL;
4598 LIST_ENTRY rollback;
4599
4600 InitializeListHead(&rollback);
4601
4602 TRACE("write\n");
4603
4604 Irp->IoStatus.Information = 0;
4605
4606 TRACE("offset = %I64x\n", offset.QuadPart);
4607 TRACE("length = %lx\n", IrpSp->Parameters.Write.Length);
4608
4609 if (!Irp->AssociatedIrp.SystemBuffer) {
4610 buf = map_user_buffer(Irp, fcb && fcb->Header.Flags2 & FSRTL_FLAG2_IS_PAGING_FILE ? HighPagePriority : NormalPagePriority);
4611
4612 if (Irp->MdlAddress && !buf) {
4613 ERR("MmGetSystemAddressForMdlSafe returned NULL\n");
4614 Status = STATUS_INSUFFICIENT_RESOURCES;
4615 goto exit;
4616 }
4617 } else
4618 buf = Irp->AssociatedIrp.SystemBuffer;
4619
4620 TRACE("buf = %p\n", buf);
4621
4622 if (fcb && !(Irp->Flags & IRP_PAGING_IO) && !FsRtlCheckLockForWriteAccess(&fcb->lock, Irp)) {
4623 WARN("tried to write to locked region\n");
4624 Status = STATUS_FILE_LOCK_CONFLICT;
4625 goto exit;
4626 }
4627
4628 Status = write_file2(Vcb, Irp, offset, buf, &IrpSp->Parameters.Write.Length, Irp->Flags & IRP_PAGING_IO, Irp->Flags & IRP_NOCACHE,
4629 wait, deferred_write, true, &rollback);
4630
4631 if (Status == STATUS_PENDING)
4632 goto exit;
4633 else if (!NT_SUCCESS(Status)) {
4634 ERR("write_file2 returned %08lx\n", Status);
4635 goto exit;
4636 }
4637
4638 if (NT_SUCCESS(Status)) {
4639 if (diskacc && Status != STATUS_PENDING && Irp->Flags & IRP_NOCACHE) {
4640 PETHREAD thread = NULL;
4641
4642 if (Irp->Tail.Overlay.Thread && !IoIsSystemThread(Irp->Tail.Overlay.Thread))
4643 thread = Irp->Tail.Overlay.Thread;
4644 else if (!IoIsSystemThread(PsGetCurrentThread()))
4645 thread = PsGetCurrentThread();
4646 else if (IoIsSystemThread(PsGetCurrentThread()) && IoGetTopLevelIrp() == Irp)
4647 thread = PsGetCurrentThread();
4648
4649 if (thread)
4650 fPsUpdateDiskCounters(PsGetThreadProcess(thread), 0, IrpSp->Parameters.Write.Length, 0, 1, 0);
4651 }
4652 }
4653
4654 exit:
4655 if (NT_SUCCESS(Status))
4656 clear_rollback(&rollback);
4657 else
4658 do_rollback(Vcb, &rollback);
4659
4660 return Status;
4661 }
4662
4663 _Dispatch_type_(IRP_MJ_WRITE)
_Function_class_(DRIVER_DISPATCH)4664 _Function_class_(DRIVER_DISPATCH)
4665 __attribute__((nonnull(1,2)))
4666 NTSTATUS __stdcall drv_write(IN PDEVICE_OBJECT DeviceObject, IN PIRP Irp) {
4667 NTSTATUS Status;
4668 bool top_level;
4669 PIO_STACK_LOCATION IrpSp = IoGetCurrentIrpStackLocation(Irp);
4670 device_extension* Vcb = DeviceObject->DeviceExtension;
4671 PFILE_OBJECT FileObject = IrpSp->FileObject;
4672 fcb* fcb = FileObject ? FileObject->FsContext : NULL;
4673 ccb* ccb = FileObject ? FileObject->FsContext2 : NULL;
4674 bool wait = FileObject ? IoIsOperationSynchronous(Irp) : true;
4675
4676 FsRtlEnterFileSystem();
4677
4678 top_level = is_top_level(Irp);
4679
4680 if (Vcb && Vcb->type == VCB_TYPE_VOLUME) {
4681 Status = vol_write(DeviceObject, Irp);
4682 goto exit;
4683 } else if (!Vcb || Vcb->type != VCB_TYPE_FS) {
4684 Status = STATUS_INVALID_PARAMETER;
4685 goto end;
4686 }
4687
4688 if (!fcb) {
4689 ERR("fcb was NULL\n");
4690 Status = STATUS_INVALID_PARAMETER;
4691 goto end;
4692 }
4693
4694 if (!ccb) {
4695 ERR("ccb was NULL\n");
4696 Status = STATUS_INVALID_PARAMETER;
4697 goto end;
4698 }
4699
4700 if (Irp->RequestorMode == UserMode && !(ccb->access & (FILE_WRITE_DATA | FILE_APPEND_DATA))) {
4701 WARN("insufficient permissions\n");
4702 Status = STATUS_ACCESS_DENIED;
4703 goto end;
4704 }
4705
4706 if (fcb == Vcb->volume_fcb) {
4707 if (!Vcb->locked || Vcb->locked_fileobj != FileObject) {
4708 ERR("trying to write to volume when not locked, or locked with another FileObject\n");
4709 Status = STATUS_ACCESS_DENIED;
4710 goto end;
4711 }
4712
4713 TRACE("writing directly to volume\n");
4714
4715 IoSkipCurrentIrpStackLocation(Irp);
4716
4717 Status = IoCallDriver(Vcb->Vpb->RealDevice, Irp);
4718 goto exit;
4719 }
4720
4721 if (is_subvol_readonly(fcb->subvol, Irp)) {
4722 Status = STATUS_ACCESS_DENIED;
4723 goto end;
4724 }
4725
4726 if (Vcb->readonly) {
4727 Status = STATUS_MEDIA_WRITE_PROTECTED;
4728 goto end;
4729 }
4730
4731 _SEH2_TRY {
4732 if (IrpSp->MinorFunction & IRP_MN_COMPLETE) {
4733 CcMdlWriteComplete(IrpSp->FileObject, &IrpSp->Parameters.Write.ByteOffset, Irp->MdlAddress);
4734
4735 Irp->MdlAddress = NULL;
4736 Status = STATUS_SUCCESS;
4737 } else {
4738 if (!(Irp->Flags & IRP_PAGING_IO))
4739 FsRtlCheckOplock(fcb_oplock(fcb), Irp, NULL, NULL, NULL);
4740
4741 // Don't offload jobs when doing paging IO - otherwise this can lead to
4742 // deadlocks in CcCopyWrite.
4743 if (Irp->Flags & IRP_PAGING_IO)
4744 wait = true;
4745
4746 Status = write_file(Vcb, Irp, wait, false);
4747 }
4748 } _SEH2_EXCEPT (EXCEPTION_EXECUTE_HANDLER) {
4749 Status = _SEH2_GetExceptionCode();
4750 } _SEH2_END;
4751
4752 end:
4753 Irp->IoStatus.Status = Status;
4754
4755 TRACE("wrote %Iu bytes\n", Irp->IoStatus.Information);
4756
4757 if (Status != STATUS_PENDING)
4758 IoCompleteRequest(Irp, IO_NO_INCREMENT);
4759 else {
4760 IoMarkIrpPending(Irp);
4761
4762 if (!add_thread_job(Vcb, Irp))
4763 Status = do_write_job(Vcb, Irp);
4764 }
4765
4766 exit:
4767 if (top_level)
4768 IoSetTopLevelIrp(NULL);
4769
4770 TRACE("returning %08lx\n", Status);
4771
4772 FsRtlExitFileSystem();
4773
4774 return Status;
4775 }
4776