1 ///
2 module std.experimental.allocator.building_blocks.bitmapped_block;
3 
4 import std.experimental.allocator.building_blocks.null_allocator;
5 import std.experimental.allocator.common;
6 
7 /**
8 
9 $(D BitmappedBlock) implements a simple heap consisting of one contiguous area
10 of memory organized in blocks, each of size $(D theBlockSize). A block is a unit
11 of allocation. A bitmap serves as bookkeeping data, more precisely one bit per
12 block indicating whether that block is currently allocated or not.
13 
14 Passing $(D NullAllocator) as $(D ParentAllocator) (the default) means user code
15 manages allocation of the memory block from the outside; in that case
16 $(D BitmappedBlock) must be constructed with a $(D void[]) preallocated block and
17 has no responsibility regarding the lifetime of its support underlying storage.
18 If another allocator type is passed, $(D BitmappedBlock) defines a destructor that
19 uses the parent allocator to release the memory block. That makes the combination of $(D AllocatorList), $(D BitmappedBlock), and a back-end allocator such as $(D MmapAllocator) a simple and scalable solution for memory allocation.
20 
21 There are advantages to storing bookkeeping data separated from the payload
22 (as opposed to e.g. using $(D AffixAllocator) to store metadata together with
23 each allocation). The layout is more compact (overhead is one bit per block),
24 searching for a free block during allocation enjoys better cache locality, and
25 deallocation does not touch memory around the payload being deallocated (which
26 is often cold).
27 
28 Allocation requests are handled on a first-fit basis. Although linear in
29 complexity, allocation is in practice fast because of the compact bookkeeping
30 representation, use of simple and fast bitwise routines, and caching of the
31 first available block position. A known issue with this general approach is
32 fragmentation, partially mitigated by coalescing. Since $(D BitmappedBlock) does
33 not need to maintain the allocated size, freeing memory implicitly coalesces
34 free blocks together. Also, tuning $(D blockSize) has a considerable impact on
35 both internal and external fragmentation.
36 
37 The size of each block can be selected either during compilation or at run
38 time. Statically-known block sizes are frequent in practice and yield slightly
39 better performance. To choose a block size statically, pass it as the $(D
40 blockSize) parameter as in $(D BitmappedBlock!(Allocator, 4096)). To choose a block
41 size parameter, use $(D BitmappedBlock!(Allocator, chooseAtRuntime)) and pass the
42 block size to the constructor.
43 
44 */
45 struct BitmappedBlock(size_t theBlockSize, uint theAlignment = platformAlignment,
46     ParentAllocator = NullAllocator)
47 {
48     import std.conv : text;
49     import std.traits : hasMember;
50     import std.typecons : Ternary;
51     import std.typecons : tuple, Tuple;
52 
53     @system unittest
54     {
55         import std.algorithm.comparison : max;
56         import std.experimental.allocator.mallocator : AlignedMallocator;
57         auto m = cast(ubyte[])(AlignedMallocator.instance.alignedAllocate(1024 * 64,
58                                 max(theAlignment, cast(uint) size_t.sizeof)));
59         scope(exit) AlignedMallocator.instance.deallocate(m);
60         testAllocator!(() => BitmappedBlock(m));
61     }
62     static assert(theBlockSize > 0 && theAlignment.isGoodStaticAlignment);
63     static assert(theBlockSize == chooseAtRuntime
64         || theBlockSize % theAlignment == 0,
65         "Block size must be a multiple of the alignment");
66 
67     /**
68     If $(D blockSize == chooseAtRuntime), $(D BitmappedBlock) offers a read/write
69     property $(D blockSize). It must be set before any use of the allocator.
70     Otherwise (i.e. $(D theBlockSize) is a legit constant), $(D blockSize) is
71     an alias for $(D theBlockSize). Whether constant or variable, must also be
72     a multiple of $(D alignment). This constraint is $(D assert)ed statically
73     and dynamically.
74     */
75     static if (theBlockSize != chooseAtRuntime)
76     {
77         alias blockSize = theBlockSize;
78     }
79     else
80     {
blockSizeBitmappedBlock81         @property uint blockSize() { return _blockSize; }
blockSizeBitmappedBlock82         @property void blockSize(uint s)
83         {
84             assert(!_control && s % alignment == 0);
85             _blockSize = s;
86         }
87         private uint _blockSize;
88     }
89 
90     static if (is(ParentAllocator == NullAllocator))
91     {
92         private enum parentAlignment = platformAlignment;
93     }
94     else
95     {
96         private alias parentAlignment = ParentAllocator.alignment;
97         static assert(parentAlignment >= ulong.alignof);
98     }
99 
100     /**
101     The _alignment offered is user-configurable statically through parameter
102     $(D theAlignment), defaulted to $(D platformAlignment).
103     */
104     alias alignment = theAlignment;
105 
106     // state {
107     /**
108     The _parent allocator. Depending on whether $(D ParentAllocator) holds state
109     or not, this is a member variable or an alias for
110     `ParentAllocator.instance`.
111     */
112     static if (stateSize!ParentAllocator)
113     {
114         ParentAllocator parent;
115     }
116     else
117     {
118         alias parent = ParentAllocator.instance;
119     }
120     private uint _blocks;
121     private BitVector _control;
122     private void[] _payload;
123     private size_t _startIdx;
124     // }
125 
totalAllocationBitmappedBlock126     private size_t totalAllocation(size_t capacity)
127     {
128         auto blocks = capacity.divideRoundUp(blockSize);
129         auto leadingUlongs = blocks.divideRoundUp(64);
130         import std.algorithm.comparison : min;
131         immutable initialAlignment = min(parentAlignment,
132             1U << trailingZeros(leadingUlongs * 8));
133         auto maxSlack = alignment <= initialAlignment
134             ? 0
135             : alignment - initialAlignment;
136         //writeln(maxSlack);
137         return leadingUlongs * 8 + maxSlack + blockSize * blocks;
138     }
139 
140     /**
141     Constructs a block allocator given a hunk of memory, or a desired capacity
142     in bytes.
143 
144     $(UL
145     $(LI If $(D ParentAllocator) is $(D NullAllocator), only the constructor
146     taking $(D data) is defined and the user is responsible for freeing $(D
147     data) if desired.)
148     $(LI Otherwise, both constructors are defined. The $(D data)-based
149     constructor assumes memory has been allocated with the parent allocator.
150     The $(D capacity)-based constructor uses $(D ParentAllocator) to allocate
151     an appropriate contiguous hunk of memory. Regardless of the constructor
152     used, the destructor releases the memory by using $(D
153     ParentAllocator.deallocate).)
154     )
155     */
thisBitmappedBlock156     this(ubyte[] data)
157     {
158         immutable a = data.ptr.effectiveAlignment;
159         assert(a >= size_t.alignof || !data.ptr,
160             "Data must be aligned properly");
161 
162         immutable ulong totalBits = data.length * 8;
163         immutable ulong bitsPerBlock = blockSize * 8 + 1;
164         // Get a first estimate
165         import std.conv : to;
166         _blocks = to!uint(totalBits / bitsPerBlock);
167 
168         // Reality is a bit more complicated, iterate until a good number of
169         // blocks found.
170         for (; _blocks; --_blocks)
171         {
172             immutable controlWords = _blocks.divideRoundUp(64);
173             auto payload = data[controlWords * 8 .. $].roundStartToMultipleOf(
174                 alignment);
175             if (payload.length < _blocks * blockSize)
176             {
177                 // Overestimated
178                 continue;
179             }
180             _control = BitVector((cast(ulong*) data.ptr)[0 .. controlWords]);
181             _control[] = 0;
182             _payload = payload;
183             break;
184         }
185     }
186 
187     /// Ditto
188     static if (!is(ParentAllocator == NullAllocator))
thisBitmappedBlock189     this(size_t capacity)
190     {
191         size_t toAllocate = totalAllocation(capacity);
192         auto data = cast(ubyte[])(parent.allocate(toAllocate));
193         this(data);
194         assert(_blocks * blockSize >= capacity);
195     }
196 
197     /**
198     If $(D ParentAllocator) is not $(D NullAllocator) and defines $(D
199     deallocate), the destructor is defined to deallocate the block held.
200     */
201     static if (!is(ParentAllocator == NullAllocator)
202         && hasMember!(ParentAllocator, "deallocate"))
~thisBitmappedBlock203     ~this()
204     {
205         auto start = _control.rep.ptr, end = _payload.ptr + _payload.length;
206         parent.deallocate(start[0 .. end - start]);
207     }
208 
209     /*
210     Adjusts the memoized _startIdx to the leftmost control word that has at
211     least one zero bit. Assumes all control words to the left of $(D
212     _control[_startIdx]) are already occupied.
213     */
adjustStartIdxBitmappedBlock214     private void adjustStartIdx()
215     {
216         while (_startIdx < _control.rep.length
217             && _control.rep[_startIdx] == ulong.max)
218         {
219             ++_startIdx;
220         }
221     }
222 
223     /*
224     Returns the blocks corresponding to the control bits starting at word index
225     wordIdx and bit index msbIdx (MSB=0) for a total of howManyBlocks.
226     */
blocksForBitmappedBlock227     private void[] blocksFor(size_t wordIdx, uint msbIdx, size_t howManyBlocks)
228     {
229         assert(msbIdx <= 63);
230         const start = (wordIdx * 64 + msbIdx) * blockSize;
231         const end = start + blockSize * howManyBlocks;
232         if (end <= _payload.length) return _payload[start .. end];
233         // This could happen if we have more control bits than available memory.
234         // That's possible because the control bits are rounded up to fit in
235         // 64-bit words.
236         return null;
237     }
238 
239     /**
240     Returns the actual bytes allocated when $(D n) bytes are requested, i.e.
241     $(D n.roundUpToMultipleOf(blockSize)).
242     */
goodAllocSizeBitmappedBlock243     size_t goodAllocSize(size_t n)
244     {
245         return n.roundUpToMultipleOf(blockSize);
246     }
247 
248     /**
249     Allocates $(D s) bytes of memory and returns it, or $(D null) if memory
250     could not be allocated.
251 
252     The following information might be of help with choosing the appropriate
253     block size. Actual allocation occurs in sizes multiple of the block size.
254     Allocating one block is the fastest because only one 0 bit needs to be
255     found in the metadata. Allocating 2 through 64 blocks is the next cheapest
256     because it affects a maximum of two $(D ulong)s in the metadata.
257     Allocations greater than 64 blocks require a multiword search through the
258     metadata.
259     */
allocateBitmappedBlock260     @trusted void[] allocate(const size_t s)
261     {
262         const blocks = s.divideRoundUp(blockSize);
263         void[] result = void;
264 
265     switcharoo:
266         switch (blocks)
267         {
268         case 1:
269             // inline code here for speed
270             // find the next available block
271             foreach (i; _startIdx .. _control.rep.length)
272             {
273                 const w = _control.rep[i];
274                 if (w == ulong.max) continue;
275                 uint j = leadingOnes(w);
276                 assert(j < 64);
277                 assert((_control.rep[i] & ((1UL << 63) >> j)) == 0);
278                 _control.rep[i] |= (1UL << 63) >> j;
279                 if (i == _startIdx)
280                 {
281                     adjustStartIdx();
282                 }
283                 result = blocksFor(i, j, 1);
284                 break switcharoo;
285             }
286             goto case 0; // fall through
287         case 0:
288             return null;
289         case 2: .. case 64:
290             result = smallAlloc(cast(uint) blocks);
291             break;
292         default:
293             result = hugeAlloc(blocks);
294             break;
295         }
296         return result.ptr ? result.ptr[0 .. s] : null;
297     }
298 
299     /**
300     Allocates a block with specified alignment $(D a). The alignment must be a
301     power of 2. If $(D a <= alignment), function forwards to $(D allocate).
302     Otherwise, it attempts to overallocate and then adjust the result for
303     proper alignment. In the worst case the slack memory is around two blocks.
304     */
alignedAllocateBitmappedBlock305     void[] alignedAllocate(size_t n, uint a)
306     {
307         import std.math : isPowerOf2;
308         assert(a.isPowerOf2);
309         if (a <= alignment) return allocate(n);
310 
311         // Overallocate to make sure we can get an aligned block
312         auto b = allocate((n + a - alignment).roundUpToMultipleOf(blockSize));
313         if (!b.ptr) return null;
314         auto result = b.roundStartToMultipleOf(a);
315         assert(result.length >= n);
316         result = result.ptr[0 .. n]; // final result
317 
318         // Free any blocks that might be slack at the beginning
319         auto slackHeadingBlocks = (result.ptr - b.ptr) / blockSize;
320         if (slackHeadingBlocks)
321         {
322             deallocate(b[0 .. slackHeadingBlocks * blockSize]);
323         }
324 
325         // Free any blocks that might be slack at the end
326         auto slackTrailingBlocks = ((b.ptr + b.length)
327             - (result.ptr + result.length)) / blockSize;
328         if (slackTrailingBlocks)
329         {
330             deallocate(b[$ - slackTrailingBlocks * blockSize .. $]);
331         }
332 
333         return result;
334     }
335 
336     /**
337     If the $(D BitmappedBlock) object is empty (has no active allocation), allocates
338     all memory within and returns a slice to it. Otherwise, returns $(D null)
339     (i.e. no attempt is made to allocate the largest available block).
340     */
allocateAllBitmappedBlock341     void[] allocateAll()
342     {
343         if (empty != Ternary.yes) return null;
344         _control[] = 1;
345         return _payload;
346     }
347 
348     /**
349     Returns `Ternary.yes` if `b` belongs to the `BitmappedBlock` object,
350     `Ternary.no` otherwise. Never returns `Ternary.unkown`. (This
351     method is somewhat tolerant in that accepts an interior slice.)
352     */
ownsBitmappedBlock353     Ternary owns(void[] b) const
354     {
355         //if (!b.ptr) return Ternary.no;
356         assert(b.ptr !is null || b.length == 0, "Corrupt block.");
357         return Ternary(b.ptr >= _payload.ptr
358             && b.ptr + b.length <= _payload.ptr + _payload.length);
359     }
360 
361     /*
362     Tries to allocate "blocks" blocks at the exact position indicated by the
363     position wordIdx/msbIdx (msbIdx counts from MSB, i.e. MSB has index 0). If
364     it succeeds, fills "result" with the result and returns tuple(size_t.max,
365     0). Otherwise, returns a tuple with the next position to search.
366     */
367     private Tuple!(size_t, uint) allocateAt(size_t wordIdx, uint msbIdx,
368             size_t blocks, ref void[] result)
369     {
370         assert(blocks > 0);
371         assert(wordIdx < _control.rep.length);
372         assert(msbIdx <= 63);
373         if (msbIdx + blocks <= 64)
374         {
375             // Allocation should fit this control word
376             if (setBitsIfZero(_control.rep[wordIdx],
377                     cast(uint) (64 - msbIdx - blocks), 63 - msbIdx))
378             {
379                 // Success
380                 result = blocksFor(wordIdx, msbIdx, blocks);
381                 return tuple(size_t.max, 0u);
382             }
383             // Can't allocate, make a suggestion
384             return msbIdx + blocks == 64
385                 ? tuple(wordIdx + 1, 0u)
386                 : tuple(wordIdx, cast(uint) (msbIdx + blocks));
387         }
388         // Allocation spans two control words or more
389         immutable mask = ulong.max >> msbIdx;
390         if (_control.rep[wordIdx] & mask)
391         {
392             // We can't allocate the rest of this control word,
393             // return a suggestion.
394             return tuple(wordIdx + 1, 0u);
395         }
396         // We can allocate the rest of this control word, but we first need to
397         // make sure we can allocate the tail.
398         if (wordIdx + 1 == _control.rep.length)
399         {
400             // No more memory
401             return tuple(_control.rep.length, 0u);
402         }
403         auto hint = allocateAt(wordIdx + 1, 0, blocks - 64 + msbIdx, result);
404         if (hint[0] == size_t.max)
405         {
406             // We did it!
407             _control.rep[wordIdx] |= mask;
408             result = blocksFor(wordIdx, msbIdx, blocks);
409             return tuple(size_t.max, 0u);
410         }
411         // Failed, return a suggestion that skips this whole run.
412         return hint;
413     }
414 
415     /* Allocates as many blocks as possible at the end of the blocks indicated
416     by wordIdx. Returns the number of blocks allocated. */
allocateAtTailBitmappedBlock417     private uint allocateAtTail(size_t wordIdx)
418     {
419         assert(wordIdx < _control.rep.length);
420         const available = trailingZeros(_control.rep[wordIdx]);
421         _control.rep[wordIdx] |= ulong.max >> available;
422         return available;
423     }
424 
smallAllocBitmappedBlock425     private void[] smallAlloc(uint blocks)
426     {
427         assert(blocks >= 2 && blocks <= 64, text(blocks));
428         foreach (i; _startIdx .. _control.rep.length)
429         {
430             // Test within the current 64-bit word
431             const v = _control.rep[i];
432             if (v == ulong.max) continue;
433             auto j = findContigOnes(~v, blocks);
434             if (j < 64)
435             {
436                 // yay, found stuff
437                 setBits(_control.rep[i], 64 - j - blocks, 63 - j);
438                 return blocksFor(i, j, blocks);
439             }
440             // Next, try allocations that cross a word
441             auto available = trailingZeros(v);
442             if (available == 0) continue;
443             if (i + 1 >= _control.rep.length) break;
444             assert(available < blocks); // otherwise we should have found it
445             auto needed = blocks - available;
446             assert(needed > 0 && needed < 64);
447             if (allocateAtFront(i + 1, needed))
448             {
449                 // yay, found a block crossing two words
450                 _control.rep[i] |= (1UL << available) - 1;
451                 return blocksFor(i, 64 - available, blocks);
452             }
453         }
454         return null;
455     }
456 
hugeAllocBitmappedBlock457     private void[] hugeAlloc(size_t blocks)
458     {
459         assert(blocks > 64);
460         if (_startIdx == _control._rep.length)
461         {
462             assert(_control.allAre1);
463             return null;
464         }
465         auto i = _control.findZeros(blocks, _startIdx * 64);
466         if (i == i.max) return null;
467         // Allocate those bits
468         _control[i .. i + blocks] = 1;
469         return _payload[cast(size_t) (i * blockSize)
470             .. cast(size_t) ((i + blocks) * blockSize)];
471     }
472 
473     // Rounds sizeInBytes to a multiple of blockSize.
bytes2blocksBitmappedBlock474     private size_t bytes2blocks(size_t sizeInBytes)
475     {
476         return (sizeInBytes + blockSize - 1) / blockSize;
477     }
478 
479     /* Allocates given blocks at the beginning blocks indicated by wordIdx.
480     Returns true if allocation was possible, false otherwise. */
allocateAtFrontBitmappedBlock481     private bool allocateAtFront(size_t wordIdx, uint blocks)
482     {
483         assert(wordIdx < _control.rep.length && blocks >= 1 && blocks <= 64);
484         const mask = (1UL << (64 - blocks)) - 1;
485         if (_control.rep[wordIdx] > mask) return false;
486         // yay, works
487         _control.rep[wordIdx] |= ~mask;
488         return true;
489     }
490 
491     /**
492     Expands an allocated block in place.
493     */
expandBitmappedBlock494     @trusted bool expand(ref void[] b, immutable size_t delta)
495     {
496         // Dispose with trivial corner cases
497         if (delta == 0) return true;
498         if (b is null) return false;
499 
500         /* To simplify matters, refuse to expand buffers that don't start at a block start (this may be the case for blocks allocated with alignedAllocate).
501         */
502         if ((b.ptr - _payload.ptr) % blockSize) return false;
503 
504         const blocksOld = bytes2blocks(b.length);
505         const blocksNew = bytes2blocks(b.length + delta);
506         assert(blocksOld <= blocksNew);
507 
508         // Possibly we have enough slack at the end of the block!
509         if (blocksOld == blocksNew)
510         {
511             b = b.ptr[0 .. b.length + delta];
512             return true;
513         }
514 
515         assert((b.ptr - _payload.ptr) % blockSize == 0);
516         const blockIdx = (b.ptr - _payload.ptr) / blockSize;
517         const blockIdxAfter = blockIdx + blocksOld;
518 
519         // Try the maximum
520         const wordIdx = blockIdxAfter / 64,
521             msbIdx = cast(uint) (blockIdxAfter % 64);
522         void[] p;
523         auto hint = allocateAt(wordIdx, msbIdx,  blocksNew - blocksOld, p);
524         if (hint[0] != size_t.max)
525         {
526             return false;
527         }
528         // Expansion successful
529         assert(p.ptr == b.ptr + blocksOld * blockSize,
530             text(p.ptr, " != ", b.ptr + blocksOld * blockSize));
531         b = b.ptr[0 .. b.length + delta];
532         return true;
533     }
534 
535     /**
536     Reallocates a previously-allocated block. Contractions occur in place.
537     */
reallocateBitmappedBlock538     @system bool reallocate(ref void[] b, size_t newSize)
539     {
540         if (!b.ptr)
541         {
542             b = allocate(newSize);
543             return b.length == newSize;
544         }
545         if (newSize == 0)
546         {
547             deallocate(b);
548             b = null;
549             return true;
550         }
551         if (newSize < b.length)
552         {
553             // Shrink. Will shrink in place by deallocating the trailing part.
554             auto newCapacity = bytes2blocks(newSize) * blockSize;
555             deallocate(b[newCapacity .. $]);
556             b = b[0 .. newSize];
557             return true;
558         }
559         // Go the slow route
560         return .reallocate(this, b, newSize);
561     }
562 
563     /**
564     Reallocates a block previously allocated with $(D alignedAllocate). Contractions do not occur in place.
565     */
alignedReallocateBitmappedBlock566     @system bool alignedReallocate(ref void[] b, size_t newSize, uint a)
567     {
568         if (newSize == 0)
569         {
570             deallocate(b);
571             b = null;
572             return true;
573         }
574         // Go the slow route
575         return .alignedReallocate(this, b, newSize, a);
576     }
577 
578     /**
579     Deallocates a block previously allocated with this allocator.
580     */
deallocateBitmappedBlock581     bool deallocate(void[] b)
582     {
583         if (b is null) return true;
584 
585         // Locate position
586         immutable pos = b.ptr - _payload.ptr;
587         immutable blockIdx = pos / blockSize;
588 
589         // Adjust pointer, might be inside a block due to alignedAllocate
590         auto begin = _payload.ptr + blockIdx * blockSize,
591             end = b.ptr + b.length;
592         b = begin[0 .. end - begin];
593         // Round up size to multiple of block size
594         auto blocks = b.length.divideRoundUp(blockSize);
595 
596         // Get into details
597         auto wordIdx = blockIdx / 64, msbIdx = cast(uint) (blockIdx % 64);
598         if (_startIdx > wordIdx) _startIdx = wordIdx;
599 
600         // Three stages: heading bits, full words, leftover bits
601         if (msbIdx)
602         {
603             if (blocks + msbIdx <= 64)
604             {
605                 resetBits(_control.rep[wordIdx],
606                     cast(uint) (64 - msbIdx - blocks),
607                     63 - msbIdx);
608                 return true;
609             }
610             else
611             {
612                 _control.rep[wordIdx] &= ulong.max << 64 - msbIdx;
613                 blocks -= 64 - msbIdx;
614                 ++wordIdx;
615                 msbIdx = 0;
616             }
617         }
618 
619         // Stage 2: reset one word at a time
620         for (; blocks >= 64; blocks -= 64)
621         {
622             _control.rep[wordIdx++] = 0;
623         }
624 
625         // Stage 3: deal with leftover bits, if any
626         assert(wordIdx <= _control.rep.length);
627         if (blocks)
628         {
629             _control.rep[wordIdx] &= ulong.max >> blocks;
630         }
631         return true;
632     }
633 
634     /**
635     Forcibly deallocates all memory allocated by this allocator, making it
636     available for further allocations. Does not return memory to $(D
637     ParentAllocator).
638     */
deallocateAllBitmappedBlock639     bool deallocateAll()
640     {
641         _control[] = 0;
642         _startIdx = 0;
643         return true;
644     }
645 
646     /**
647     Returns `Ternary.yes` if no memory is currently allocated with this
648     allocator, otherwise `Ternary.no`. This method never returns
649     `Ternary.unknown`.
650     */
emptyBitmappedBlock651     Ternary empty()
652     {
653         return Ternary(_control.allAre0());
654     }
655 
dumpBitmappedBlock656     void dump()
657     {
658         import std.stdio : writefln, writeln;
659         writefln("%s @ %s {", typeid(this), cast(void*) _control._rep.ptr);
660         scope(exit) writeln("}");
661         assert(_payload.length == blockSize * _blocks);
662         assert(_control.length >= _blocks);
663         writefln("  _startIdx=%s; blockSize=%s; blocks=%s",
664             _startIdx, blockSize, _blocks);
665         if (!_control.length) return;
666         uint blockCount = 1;
667         bool inAllocatedStore = _control[0];
668         void* start = _payload.ptr;
669         for (size_t i = 1;; ++i)
670         {
671             if (i >= _blocks || _control[i] != inAllocatedStore)
672             {
673                 writefln("  %s block at 0x%s, length: %s (%s*%s)",
674                     inAllocatedStore ? "Busy" : "Free",
675                     cast(void*) start,
676                     blockCount * blockSize,
677                     blockCount, blockSize);
678                 if (i >= _blocks) break;
679                 assert(i < _control.length);
680                 inAllocatedStore = _control[i];
681                 start = _payload.ptr + blockCount * blockSize;
682                 blockCount = 1;
683             }
684             else
685             {
686                 ++blockCount;
687             }
688         }
689     }
690 }
691 
692 ///
693 @system unittest
694 {
695     // Create a block allocator on top of a 10KB stack region.
696     import std.experimental.allocator.building_blocks.region : InSituRegion;
697     import std.traits : hasMember;
698     InSituRegion!(10_240, 64) r;
699     auto a = BitmappedBlock!(64, 64)(cast(ubyte[])(r.allocateAll()));
700     static assert(hasMember!(InSituRegion!(10_240, 64), "allocateAll"));
701     const b = a.allocate(100);
702     assert(b.length == 100);
703 }
704 
705 @system unittest
706 {
707     import std.experimental.allocator.gc_allocator : GCAllocator;
708     testAllocator!(() => BitmappedBlock!(64, 8, GCAllocator)(1024 * 64));
709 }
710 
711 @system unittest
712 {
testAllocateAll(size_t bs)713     static void testAllocateAll(size_t bs)(uint blocks, uint blocksAtATime)
714     {
715         import std.algorithm.comparison : min;
716         assert(bs);
717         import std.experimental.allocator.gc_allocator : GCAllocator;
718         auto a = BitmappedBlock!(bs, min(bs, platformAlignment))(
719             cast(ubyte[])(GCAllocator.instance.allocate((blocks * bs * 8 +
720                         blocks) / 8))
721         );
722         import std.conv : text;
723         assert(blocks >= a._blocks, text(blocks, " < ", a._blocks));
724         blocks = a._blocks;
725 
726         // test allocation of 0 bytes
727         auto x = a.allocate(0);
728         assert(x is null);
729         // test allocation of 1 byte
730         x = a.allocate(1);
731         assert(x.length == 1 || blocks == 0,
732             text(x.ptr, " ", x.length, " ", a));
733         a.deallocateAll();
734 
735         bool twice = true;
736 
737     begin:
738         foreach (i; 0 .. blocks / blocksAtATime)
739         {
740             auto b = a.allocate(bs * blocksAtATime);
741             assert(b.length == bs * blocksAtATime, text(i, ": ", b.length));
742         }
743         assert(a.allocate(bs * blocksAtATime) is null);
744         assert(a.allocate(1) is null);
745 
746         // Now deallocate all and do it again!
747         a.deallocateAll();
748 
749         // Test deallocation
750 
751         auto v = new void[][blocks / blocksAtATime];
752         foreach (i; 0 .. blocks / blocksAtATime)
753         {
754             auto b = a.allocate(bs * blocksAtATime);
755             assert(b.length == bs * blocksAtATime, text(i, ": ", b.length));
756             v[i] = b;
757         }
758         assert(a.allocate(bs * blocksAtATime) is null);
759         assert(a.allocate(1) is null);
760 
761         foreach (i; 0 .. blocks / blocksAtATime)
762         {
763             a.deallocate(v[i]);
764         }
765 
766         foreach (i; 0 .. blocks / blocksAtATime)
767         {
768             auto b = a.allocate(bs * blocksAtATime);
769             assert(b.length == bs * blocksAtATime, text(i, ": ", b.length));
770             v[i] = b;
771         }
772 
773         foreach (i; 0 .. v.length)
774         {
775             a.deallocate(v[i]);
776         }
777 
778         if (twice)
779         {
780             twice = false;
781             goto begin;
782         }
783 
784         a.deallocateAll;
785 
786         // test expansion
787         if (blocks >= blocksAtATime)
788         {
789             foreach (i; 0 .. blocks / blocksAtATime - 1)
790             {
791                 auto b = a.allocate(bs * blocksAtATime);
792                 assert(b.length == bs * blocksAtATime, text(i, ": ", b.length));
793                 (cast(ubyte[]) b)[] = 0xff;
794                 a.expand(b, blocksAtATime * bs)
795                     || assert(0, text(i));
796                 (cast(ubyte[]) b)[] = 0xfe;
797                 assert(b.length == bs * blocksAtATime * 2, text(i, ": ", b.length));
798                 a.reallocate(b, blocksAtATime * bs) || assert(0);
799                 assert(b.length == bs * blocksAtATime, text(i, ": ", b.length));
800             }
801         }
802     }
803 
804     testAllocateAll!(1)(0, 1);
805     testAllocateAll!(1)(8, 1);
806     testAllocateAll!(4096)(128, 1);
807 
808     testAllocateAll!(1)(0, 2);
809     testAllocateAll!(1)(128, 2);
810     testAllocateAll!(4096)(128, 2);
811 
812     testAllocateAll!(1)(0, 4);
813     testAllocateAll!(1)(128, 4);
814     testAllocateAll!(4096)(128, 4);
815 
816     testAllocateAll!(1)(0, 3);
817     testAllocateAll!(1)(24, 3);
818     testAllocateAll!(3008)(100, 1);
819     testAllocateAll!(3008)(100, 3);
820 
821     testAllocateAll!(1)(0, 128);
822     testAllocateAll!(1)(128 * 1, 128);
823     testAllocateAll!(128 * 20)(13 * 128, 128);
824 }
825 
826 // Test totalAllocation
827 @safe unittest
828 {
829     BitmappedBlock!(8, 8, NullAllocator) h1;
830     assert(h1.totalAllocation(1) >= 8);
831     assert(h1.totalAllocation(64) >= 64);
832     assert(h1.totalAllocation(8 * 64) >= 8 * 64);
833     assert(h1.totalAllocation(8 * 63) >= 8 * 63);
834     assert(h1.totalAllocation(8 * 64 + 1) >= 8 * 65);
835 
836     BitmappedBlock!(64, 8, NullAllocator) h2;
837     assert(h2.totalAllocation(1) >= 64);
838     assert(h2.totalAllocation(64 * 64) >= 64 * 64);
839 
840     BitmappedBlock!(4096, 4096, NullAllocator) h3;
841     assert(h3.totalAllocation(1) >= 4096);
842     assert(h3.totalAllocation(64 * 4096) >= 64 * 4096);
843     assert(h3.totalAllocation(64 * 4096 + 1) >= 65 * 4096);
844 }
845 
846 // BitmappedBlockWithInternalPointers
847 /**
848 
849 A $(D BitmappedBlock) with additional structure for supporting $(D
850 resolveInternalPointer). To that end, $(D BitmappedBlockWithInternalPointers) adds a
851 bitmap (one bit per block) that marks object starts. The bitmap itself has
852 variable size and is allocated together with regular allocations.
853 
854 The time complexity of $(D resolveInternalPointer) is $(BIGOH k), where $(D k)
855 is the size of the object within which the internal pointer is looked up.
856 
857 */
858 struct BitmappedBlockWithInternalPointers(
859     size_t theBlockSize, uint theAlignment = platformAlignment,
860     ParentAllocator = NullAllocator)
861 {
862     import std.conv : text;
863     import std.typecons : Ternary;
864     @system unittest
865     {
866         import std.experimental.allocator.mallocator : AlignedMallocator;
867         auto m = cast(ubyte[])(AlignedMallocator.instance.alignedAllocate(1024 * 64,
868             theAlignment));
869         scope(exit) AlignedMallocator.instance.deallocate(m);
870         testAllocator!(() => BitmappedBlockWithInternalPointers(m));
871     }
872 
873     // state {
874     private BitmappedBlock!(theBlockSize, theAlignment, NullAllocator) _heap;
875     private BitVector _allocStart;
876     // }
877 
878     /**
879     Constructors accepting desired capacity or a preallocated buffer, similar
880     in semantics to those of $(D BitmappedBlock).
881     */
thisBitmappedBlockWithInternalPointers882     this(ubyte[] data)
883     {
884         _heap = BitmappedBlock!(theBlockSize, theAlignment, ParentAllocator)(data);
885     }
886 
887     /// Ditto
888     static if (!is(ParentAllocator == NullAllocator))
thisBitmappedBlockWithInternalPointers889     this(size_t capacity)
890     {
891         // Add room for the _allocStart vector
892         _heap = BitmappedBlock!(theBlockSize, theAlignment, ParentAllocator)
893             (capacity + capacity.divideRoundUp(64));
894     }
895 
896     // Makes sure there's enough room for _allocStart
ensureRoomForAllocStartBitmappedBlockWithInternalPointers897     private bool ensureRoomForAllocStart(size_t len)
898     {
899         if (_allocStart.length >= len) return true;
900         // Must ensure there's room
901         immutable oldLength = _allocStart.rep.length;
902         immutable bits = len.roundUpToMultipleOf(64);
903         void[] b = _allocStart.rep;
904         if (!_heap.reallocate(b, bits / 8)) return false;
905         assert(b.length * 8 == bits, text(b.length * 8, " != ", bits));
906         _allocStart = BitVector(cast(ulong[]) b);
907         assert(_allocStart.rep.length * 64 == bits);
908         _allocStart.rep[oldLength .. $] = ulong.max;
909         return true;
910     }
911 
912     /**
913     Allocator primitives.
914     */
915     alias alignment = theAlignment;
916 
917     /// Ditto
goodAllocSizeBitmappedBlockWithInternalPointers918     size_t goodAllocSize(size_t n)
919     {
920         return n.roundUpToMultipleOf(_heap.blockSize);
921     }
922 
923     /// Ditto
allocateBitmappedBlockWithInternalPointers924     void[] allocate(size_t bytes)
925     {
926         auto r = _heap.allocate(bytes);
927         if (!r.ptr) return r;
928         immutable block = (r.ptr - _heap._payload.ptr) / _heap.blockSize;
929         immutable blocks =
930             (r.length + _heap.blockSize - 1) / _heap.blockSize;
931         if (!ensureRoomForAllocStart(block + blocks))
932         {
933             // Failed, free r and bailout
934             _heap.deallocate(r);
935             return null;
936         }
937         assert(block < _allocStart.length);
938         assert(block + blocks <= _allocStart.length);
939         // Mark the _allocStart bits
940         assert(blocks > 0);
941         _allocStart[block] = 1;
942         _allocStart[block + 1 .. block + blocks] = 0;
943         assert(block + blocks == _allocStart.length
944             || _allocStart[block + blocks] == 1);
945         return r;
946     }
947 
948     /// Ditto
allocateAllBitmappedBlockWithInternalPointers949     void[] allocateAll()
950     {
951         auto r = _heap.allocateAll();
952         if (!r.ptr) return r;
953         // Carve space at the end for _allocStart
954         auto p = alignDownTo(r.ptr + r.length - 8, ulong.alignof);
955         r = r[0 .. p - r.ptr];
956         // Initialize _allocStart
957         _allocStart = BitVector(cast(ulong[]) p[0 .. 8]);
958         _allocStart[] = 0;
959         immutable block = (r.ptr - _heap._payload.ptr) / _heap.blockSize;
960         assert(block < _allocStart.length);
961         _allocStart[block] = 1;
962         return r;
963     }
964 
965     /// Ditto
expandBitmappedBlockWithInternalPointers966     bool expand(ref void[] b, size_t bytes)
967     {
968         if (!bytes) return true;
969         if (b is null) return false;
970         immutable oldBlocks =
971             (b.length + _heap.blockSize - 1) / _heap.blockSize;
972         assert(oldBlocks);
973         immutable newBlocks =
974             (b.length + bytes + _heap.blockSize - 1) / _heap.blockSize;
975         assert(newBlocks >= oldBlocks);
976         immutable block = (b.ptr - _heap._payload.ptr) / _heap.blockSize;
977         assert(_allocStart[block]);
978         if (!ensureRoomForAllocStart(block + newBlocks)
979                 || !_heap.expand(b, bytes))
980         {
981             return false;
982         }
983         // Zero only the expanded bits
984         _allocStart[block + oldBlocks .. block + newBlocks] = 0;
985         assert(_allocStart[block]);
986         return true;
987     }
988 
989     /// Ditto
deallocateBitmappedBlockWithInternalPointers990     bool deallocate(void[] b)
991     {
992         // No need to touch _allocStart here - except for the first bit, it's
993         // meaningless in freed memory. The first bit is already 1.
994         return _heap.deallocate(b);
995         // TODO: one smart thing to do is reduce memory occupied by
996         // _allocStart if we're freeing the rightmost block.
997     }
998 
999     /// Ditto
resolveInternalPointerBitmappedBlockWithInternalPointers1000     Ternary resolveInternalPointer(const void* p, ref void[] result)
1001     {
1002         if (p < _heap._payload.ptr
1003             || p >= _heap._payload.ptr + _heap._payload.length)
1004         {
1005             return Ternary.no;
1006         }
1007         // Find block start
1008         auto block = (p - _heap._payload.ptr) / _heap.blockSize;
1009         if (block >= _allocStart.length) return Ternary.no;
1010         // Within an allocation, must find the 1 just to the left of it
1011         auto i = _allocStart.find1Backward(block);
1012         if (i == i.max) return Ternary.no;
1013         auto j = _allocStart.find1(i + 1);
1014         result = _heap._payload.ptr[cast(size_t) (_heap.blockSize * i)
1015                                     .. cast(size_t) (_heap.blockSize * j)];
1016         return Ternary.yes;
1017     }
1018 
1019     /// Ditto
emptyBitmappedBlockWithInternalPointers1020     Ternary empty()
1021     {
1022         return _heap.empty;
1023     }
1024 
1025     // Currently unused
markAllAsUnusedBitmappedBlockWithInternalPointers1026     private void markAllAsUnused()
1027     {
1028         // Mark all deallocated memory with 1 so we minimize damage created by
1029         // false pointers. TODO: improve speed.
1030         foreach (i, ref e; _allocStart.rep)
1031         {
1032             // Set to 1 all bits in _allocStart[i] that were 0 in control, and
1033             // leave the others unchanged.
1034             // (0, 0) => 1; (0, 1) => 0; (1, 0) => 1; (1, 1) => 1
1035             e |= ~_heap._control.rep[i];
1036         }
1037         // Now zero all control bits
1038         _heap._control[] = 0;
1039         // EXCEPT for the _allocStart block itself
1040         markAsUsed(_allocStart.rep);
1041     }
1042 
1043     // Currently unused
markAsUsedBitmappedBlockWithInternalPointers1044     private bool markAsUsed(void[] b)
1045     {
1046         // Locate position
1047         immutable pos = b.ptr - _heap._payload.ptr;
1048         assert(pos % _heap.blockSize == 0);
1049         auto blockIdx = pos / _heap.blockSize;
1050         if (_heap._control[blockIdx]) return false;
1051         // Round up size to multiple of block size
1052         auto blocks = b.length.divideRoundUp(_heap.blockSize);
1053         _heap._control[blockIdx .. blockIdx + blocks] = 1;
1054         return true;
1055     }
1056 
1057     // Currently unused
doneMarkingBitmappedBlockWithInternalPointers1058     private void doneMarking()
1059     {
1060         // Nothing to do, what's free stays free.
1061     }
1062 }
1063 
1064 @system unittest
1065 {
1066     import std.typecons : Ternary;
1067 
1068     auto h = BitmappedBlockWithInternalPointers!(4096)(new ubyte[4096 * 1024]);
1069     auto b = h.allocate(123);
1070     assert(b.length == 123);
1071 
1072     void[] p;
1073     Ternary r = h.resolveInternalPointer(b.ptr + 17, p);
1074     assert(p.ptr is b.ptr);
1075     assert(p.length >= b.length);
1076     b = h.allocate(4096);
1077 
1078     h.resolveInternalPointer(b.ptr, p);
1079     assert(p is b);
1080 
1081     h.resolveInternalPointer(b.ptr + 11, p);
1082     assert(p is b);
1083 
1084     void[] unchanged = p;
1085     h.resolveInternalPointer(b.ptr - 40_970, p);
1086     assert(p is unchanged);
1087 
1088     assert(h.expand(b, 1));
1089     assert(b.length == 4097);
1090     h.resolveInternalPointer(b.ptr + 4096, p);
1091     assert(p.ptr is b.ptr);
1092 }
1093 
1094 /**
1095 Returns the number of most significant ones before a zero can be found in $(D
1096 x). If $(D x) contains no zeros (i.e. is equal to $(D ulong.max)), returns 64.
1097 */
leadingOnes(ulong x)1098 private uint leadingOnes(ulong x)
1099 {
1100     uint result = 0;
1101     while (cast(long) x < 0)
1102     {
1103         ++result;
1104         x <<= 1;
1105     }
1106     return result;
1107 }
1108 
1109 @system unittest
1110 {
1111     assert(leadingOnes(0) == 0);
1112     assert(leadingOnes(~0UL) == 64);
1113     assert(leadingOnes(0xF000_0000_0000_0000) == 4);
1114     assert(leadingOnes(0xE400_0000_0000_0000) == 3);
1115     assert(leadingOnes(0xC700_0200_0000_0000) == 2);
1116     assert(leadingOnes(0x8000_0030_0000_0000) == 1);
1117     assert(leadingOnes(0x2000_0000_0000_0000) == 0);
1118 }
1119 
1120 /**
1121 Finds a run of contiguous ones in $(D x) of length at least $(D n).
1122 */
findContigOnes(ulong x,uint n)1123 private uint findContigOnes(ulong x, uint n)
1124 {
1125     while (n > 1)
1126     {
1127         immutable s = n >> 1;
1128         x &= x << s;
1129         n -= s;
1130     }
1131     return leadingOnes(~x);
1132 }
1133 
1134 @system unittest
1135 {
1136     assert(findContigOnes(0x0000_0000_0000_0300, 2) == 54);
1137 
1138     assert(findContigOnes(~0UL, 1) == 0);
1139     assert(findContigOnes(~0UL, 2) == 0);
1140     assert(findContigOnes(~0UL, 32) == 0);
1141     assert(findContigOnes(~0UL, 64) == 0);
1142     assert(findContigOnes(0UL, 1) == 64);
1143 
1144     assert(findContigOnes(0x4000_0000_0000_0000, 1) == 1);
1145     assert(findContigOnes(0x0000_0F00_0000_0000, 4) == 20);
1146 }
1147 
1148 /*
1149 Unconditionally sets the bits from lsb through msb in w to zero.
1150 */
setBits(ref ulong w,uint lsb,uint msb)1151 private void setBits(ref ulong w, uint lsb, uint msb)
1152 {
1153     assert(lsb <= msb && msb < 64);
1154     const mask = (ulong.max << lsb) & (ulong.max >> (63 - msb));
1155     w |= mask;
1156 }
1157 
1158 @system unittest
1159 {
1160     ulong w;
1161     w = 0; setBits(w, 0, 63); assert(w == ulong.max);
1162     w = 0; setBits(w, 1, 63); assert(w == ulong.max - 1);
1163     w = 6; setBits(w, 0, 1); assert(w == 7);
1164     w = 6; setBits(w, 3, 3); assert(w == 14);
1165 }
1166 
1167 /* Are bits from lsb through msb in w zero? If so, make then 1
1168 and return the resulting w. Otherwise, just return 0.
1169 */
setBitsIfZero(ref ulong w,uint lsb,uint msb)1170 private bool setBitsIfZero(ref ulong w, uint lsb, uint msb)
1171 {
1172     assert(lsb <= msb && msb < 64);
1173     const mask = (ulong.max << lsb) & (ulong.max >> (63 - msb));
1174     if (w & mask) return false;
1175     w |= mask;
1176     return true;
1177 }
1178 
1179 // Assigns bits in w from lsb through msb to zero.
resetBits(ref ulong w,uint lsb,uint msb)1180 private void resetBits(ref ulong w, uint lsb, uint msb)
1181 {
1182     assert(lsb <= msb && msb < 64);
1183     const mask = (ulong.max << lsb) & (ulong.max >> (63 - msb));
1184     w &= ~mask;
1185 }
1186 
1187 /*
1188 Bit disposition is MSB=0 (leftmost, big endian).
1189 */
1190 private struct BitVector
1191 {
1192     ulong[] _rep;
1193 
repBitVector1194     auto rep() { return _rep; }
1195 
thisBitVector1196     this(ulong[] data) { _rep = data; }
1197 
opSliceAssignBitVector1198     void opSliceAssign(bool b) { _rep[] = b ? ulong.max : 0; }
1199 
opSliceAssignBitVector1200     void opSliceAssign(bool b, ulong x, ulong y)
1201     {
1202         assert(x <= y && y <= _rep.length * 64);
1203         if (x == y) return;
1204         --y;
1205         assert(x / 64 <= size_t.max);
1206         immutable i1 = cast(size_t) (x / 64);
1207         immutable uint b1 = 63 - x % 64;
1208         assert(y / 64 <= size_t.max);
1209         immutable i2 = cast(size_t) (y / 64);
1210         immutable uint b2 = 63 - y % 64;
1211         assert(i1 <= i2 && i2 < _rep.length);
1212         if (i1 == i2)
1213         {
1214             // Inside the same word
1215             assert(b1 >= b2);
1216             if (b) setBits(_rep[i1], b2, b1);
1217             else resetBits(_rep[i1], b2, b1);
1218         }
1219         else
1220         {
1221             // Spans multiple words
1222             assert(i1 < i2);
1223             if (b) setBits(_rep[i1], 0, b1);
1224             else resetBits(_rep[i1], 0, b1);
1225             _rep[i1 + 1 .. i2] = b;
1226             if (b) setBits(_rep[i2], b2, 63);
1227             else resetBits(_rep[i2], b2, 63);
1228         }
1229     }
1230 
opIndexBitVector1231     bool opIndex(ulong x)
1232     {
1233         assert(x < length);
1234         return (_rep[cast(size_t) (x / 64)]
1235             & (0x8000_0000_0000_0000UL >> (x % 64))) != 0;
1236     }
1237 
opIndexAssignBitVector1238     void opIndexAssign(bool b, ulong x)
1239     {
1240         assert(x / 64 <= size_t.max);
1241         immutable i = cast(size_t) (x / 64);
1242         immutable j = 0x8000_0000_0000_0000UL >> (x % 64);
1243         if (b) _rep[i] |= j;
1244         else _rep[i] &= ~j;
1245     }
1246 
lengthBitVector1247     ulong length() const
1248     {
1249         return _rep.length * 64;
1250     }
1251 
1252     /* Returns the index of the first 1 to the right of i (including i itself),
1253     or length if not found.
1254     */
find1BitVector1255     ulong find1(ulong i)
1256     {
1257         assert(i < length);
1258         assert(i / 64 <= size_t.max);
1259         auto w = cast(size_t) (i / 64);
1260         immutable b = i % 64; // 0 through 63, 0 when i == 0
1261         immutable mask = ulong.max >> b;
1262         if (auto current = _rep[w] & mask)
1263         {
1264             // Great, found
1265             return w * 64 + leadingOnes(~current);
1266         }
1267         // The current word doesn't have the solution, find the leftmost 1
1268         // going to the right.
1269         for (++w; w < _rep.length; ++w)
1270         {
1271             if (auto current = _rep[w])
1272             {
1273                 return w * 64 + leadingOnes(~current);
1274             }
1275         }
1276         return length;
1277     }
1278 
1279     /* Returns the index of the first 1 to the left of i (including i itself),
1280     or ulong.max if not found.
1281     */
find1BackwardBitVector1282     ulong find1Backward(ulong i)
1283     {
1284         assert(i < length);
1285         auto w = cast(size_t) (i / 64);
1286         immutable b = 63 - (i % 64); // 0 through 63, 63 when i == 0
1287         immutable mask = ~((1UL << b) - 1);
1288         assert(mask != 0);
1289         // First, let's see if the current word has a bit larger than ours.
1290         if (auto currentWord = _rep[w] & mask)
1291         {
1292             // Great, this word contains the result.
1293             return w * 64 + 63 - currentWord.trailingZeros;
1294         }
1295         // The current word doesn't have the solution, find the rightmost 1
1296         // going to the left.
1297         while (w >= 1)
1298         {
1299             --w;
1300             if (auto currentWord = _rep[w])
1301                 return w * 64 + (63 - currentWord.trailingZeros);
1302         }
1303         return ulong.max;
1304     }
1305 
1306     /// Are all bits zero?
allAre0BitVector1307     bool allAre0() const
1308     {
1309         foreach (w; _rep) if (w) return false;
1310         return true;
1311     }
1312 
1313     /// Are all bits one?
allAre1BitVector1314     bool allAre1() const
1315     {
1316         foreach (w; _rep) if (w != ulong.max) return false;
1317         return true;
1318     }
1319 
findZerosBitVector1320     ulong findZeros(immutable size_t howMany, ulong start)
1321     {
1322         assert(start < length);
1323         assert(howMany > 64);
1324         auto i = cast(size_t) (start / 64);
1325         while (_rep[i] & 1)
1326         {
1327             // No trailing zeros in this word, try the next one
1328             if (++i == _rep.length) return ulong.max;
1329             start = i * 64;
1330         }
1331         // Adjust start to have only trailing zeros after it
1332         auto prefixLength = 64;
1333         while (_rep[i] & (ulong.max >> (64 - prefixLength)))
1334         {
1335             assert(prefixLength > 0);
1336             --prefixLength;
1337             ++start;
1338         }
1339 
1340         assert(howMany > prefixLength);
1341         auto needed = howMany - prefixLength;
1342         for (++i; needed >= 64; needed -= 64, ++i)
1343         {
1344             if (i >= _rep.length) return ulong.max;
1345             if (_rep[i] != 0) return findZeros(howMany, i * 64);
1346         }
1347         // Leftover < 64 bits
1348         assert(needed < 64);
1349         if (!needed) return start;
1350         if (i >= _rep.length) return ulong.max;
1351         if (leadingOnes(~_rep[i]) >= needed) return start;
1352         return findZeros(howMany, i * 64);
1353     }
1354 }
1355 
1356 @system unittest
1357 {
1358     auto v = BitVector(new ulong[10]);
1359     assert(v.length == 640);
1360 
1361     v[] = 0;
1362     v[53] = 1;
1363     assert(v[52] == 0);
1364     assert(v[53] == 1);
1365     assert(v[54] == 0);
1366 
1367     v[] = 0;
1368     v[53 .. 55] = 1;
1369     assert(v[52] == 0);
1370     assert(v[53] == 1);
1371     assert(v[54] == 1);
1372     assert(v[55] == 0);
1373 
1374     v[] = 0;
1375     v[2 .. 65] = 1;
1376     assert(v.rep[0] == 0x3FFF_FFFF_FFFF_FFFF);
1377     assert(v.rep[1] == 0x8000_0000_0000_0000);
1378     assert(v.rep[2] == 0);
1379 
1380     v[] = 0;
1381     assert(v.find1Backward(0) == ulong.max);
1382     assert(v.find1Backward(43) == ulong.max);
1383     assert(v.find1Backward(83) == ulong.max);
1384 
1385     v[0] = 1;
1386     assert(v.find1Backward(0) == 0);
1387     assert(v.find1Backward(43) == 0);
1388     import std.conv : text;
1389     assert(v.find1Backward(83) == 0, text(v.find1Backward(83)));
1390 
1391     v[0] = 0;
1392     v[101] = 1;
1393     assert(v.find1Backward(0) == ulong.max);
1394     assert(v.find1Backward(43) == ulong.max);
1395     assert(v.find1Backward(83) == ulong.max);
1396     assert(v.find1Backward(100) == ulong.max);
1397     assert(v.find1Backward(101) == 101);
1398     assert(v.find1Backward(553) == 101);
1399 
1400     v[0 .. v.length] = 0;
1401     v[v.length .. v.length] = 0;
1402     v[0 .. 0] = 0;
1403 
1404     v[] = 0;
1405     assert(v.find1(0) == v.length);
1406     v[139] = 1;
1407     assert(v.find1(0) == 139);
1408     assert(v.find1(100) == 139);
1409     assert(v.find1(138) == 139);
1410     assert(v.find1(139) == 139);
1411     assert(v.find1(140) == v.length);
1412 
1413     v[] = 0;
1414     assert(v.findZeros(100, 0) == 0);
1415     foreach (i; 0 .. 500)
1416         assert(v.findZeros(100, i) == i, text(v.findZeros(100, i), " != ", i));
1417     assert(v.findZeros(540, 99) == 99);
1418     assert(v.findZeros(99, 540) == 540);
1419     assert(v.findZeros(540, 100) == 100);
1420     assert(v.findZeros(640, 0) == 0);
1421     assert(v.findZeros(641, 1) == ulong.max);
1422     assert(v.findZeros(641, 100) == ulong.max);
1423 }
1424