1 /**
2  * This module contains a collection of bit-level operations.
3  *
4  * Copyright: Copyright Don Clugston 2005 - 2013.
5  * License:   $(LINK2 http://www.boost.org/LICENSE_1_0.txt, Boost License 1.0)
6  * Authors:   Don Clugston, Sean Kelly, Walter Bright, Alex Rønne Petersen, Thomas Stuart Bockman
7  * Source:    $(DRUNTIMESRC core/_bitop.d)
8  */
9 
10 module core.bitop;
11 
12 nothrow:
13 @safe:
14 @nogc:
15 
16 version (D_InlineAsm_X86_64)
17     version = AsmX86;
18 else version (D_InlineAsm_X86)
19     version = AsmX86;
20 
21 version (X86_64)
22     version = AnyX86;
23 else version (X86)
24     version = AnyX86;
25 
26 // Use to implement 64-bit bitops on 32-bit arch.
27 private union Split64
28 {
29     ulong u64;
30     struct
31     {
versionSplit64::__anon6c4ff899010832         version (LittleEndian)
33         {
34             uint lo;
35             uint hi;
36         }
37         else
38         {
39             uint hi;
40             uint lo;
41         }
42     }
43 
pragma(inline,true)44     pragma(inline, true)
45     this(ulong u64) @safe pure nothrow @nogc
46     {
47         if (__ctfe)
48         {
49             lo = cast(uint) u64;
50             hi = cast(uint) (u64 >>> 32);
51         }
52         else
53             this.u64 = u64;
54     }
55 }
56 
57 unittest
58 {
59     const rt = Split64(1);
60     assert((rt.lo == 1) && (rt.hi == 0));
61 
62     enum ct = Split64(1);
63     assert((ct.lo == rt.lo) && (ct.hi == rt.hi));
64 }
65 
66 /**
67  * Scans the bits in v starting with bit 0, looking
68  * for the first set bit.
69  * Returns:
70  *      The bit number of the first bit set.
71  *      The return value is undefined if v is zero.
72  */
bsf(uint v)73 int bsf(uint v) pure
74 {
75     pragma(inline, false);  // so intrinsic detection will work
76     return softBsf!uint(v);
77 }
78 
79 /// ditto
bsf(ulong v)80 int bsf(ulong v) pure
81 {
82     static if (size_t.sizeof == ulong.sizeof)  // 64 bit code gen
83     {
84         pragma(inline, false);   // so intrinsic detection will work
85         return softBsf!ulong(v);
86     }
87     else
88     {
89         /* intrinsic not available for 32 bit code,
90          * make do with 32 bit bsf
91          */
92         const sv = Split64(v);
93         return (sv.lo == 0)?
94             bsf(sv.hi) + 32 :
95             bsf(sv.lo);
96     }
97 }
98 
99 ///
100 unittest
101 {
102     assert(bsf(0x21) == 0);
103     assert(bsf(ulong.max << 39) == 39);
104 }
105 
106 unittest
107 {
108     // Make sure bsf() is available at CTFE
109     enum test_ctfe = bsf(ulong.max);
110     assert(test_ctfe == 0);
111 }
112 
113 /**
114  * Scans the bits in v from the most significant bit
115  * to the least significant bit, looking
116  * for the first set bit.
117  * Returns:
118  *      The bit number of the first bit set.
119  *      The return value is undefined if v is zero.
120  */
bsr(uint v)121 int bsr(uint v) pure
122 {
123     pragma(inline, false);  // so intrinsic detection will work
124     return softBsr!uint(v);
125 }
126 
127 /// ditto
bsr(ulong v)128 int bsr(ulong v) pure
129 {
130     static if (size_t.sizeof == ulong.sizeof)  // 64 bit code gen
131     {
132         pragma(inline, false);   // so intrinsic detection will work
133         return softBsr!ulong(v);
134     }
135     else
136     {
137         /* intrinsic not available for 32 bit code,
138          * make do with 32 bit bsr
139          */
140         const sv = Split64(v);
141         return (sv.hi == 0)?
142             bsr(sv.lo) :
143             bsr(sv.hi) + 32;
144     }
145 }
146 
147 ///
148 unittest
149 {
150     assert(bsr(0x21) == 5);
151     assert(bsr((ulong.max >> 15) - 1) == 48);
152 }
153 
154 unittest
155 {
156     // Make sure bsr() is available at CTFE
157     enum test_ctfe = bsr(ulong.max);
158     assert(test_ctfe == 63);
159 }
160 
161 private alias softBsf(N) = softScan!(N, true);
162 private alias softBsr(N) = softScan!(N, false);
163 
164 /* Shared software fallback implementation for bit scan foward and reverse.
165 
166 If forward is true, bsf is computed (the index of the first set bit).
167 If forward is false, bsr is computed (the index of the last set bit).
168 
169 -1 is returned if no bits are set (v == 0).
170 */
171 private int softScan(N, bool forward)(N v) pure
172     if (is(N == uint) || is(N == ulong))
173 {
174     // bsf() and bsr() are officially undefined for v == 0.
175     if (!v)
176         return -1;
177 
178     // This is essentially an unrolled binary search:
179     enum mask(ulong lo) = forward ? cast(N) lo : cast(N)~lo;
180     enum inc(int up) = forward ? up : -up;
181 
182     N x;
183     int ret;
184     static if (is(N == ulong))
185     {
186         x = v & mask!0x0000_0000_FFFF_FFFFL;
187         if (x)
188         {
189             v = x;
190             ret = forward ? 0 : 63;
191         }
192         else
193             ret = forward ? 32 : 31;
194 
195         x = v & mask!0x0000_FFFF_0000_FFFFL;
196         if (x)
197             v = x;
198         else
199             ret += inc!16;
200     }
201     else static if (is(N == uint))
202     {
203         x = v & mask!0x0000_FFFF;
204         if (x)
205         {
206             v = x;
207             ret = forward ? 0 : 31;
208         }
209         else
210             ret = forward ? 16 : 15;
211     }
212     else
213         static assert(false);
214 
215     x = v & mask!0x00FF_00FF_00FF_00FFL;
216     if (x)
217         v = x;
218     else
219         ret += inc!8;
220 
221     x = v & mask!0x0F0F_0F0F_0F0F_0F0FL;
222     if (x)
223         v = x;
224     else
225         ret += inc!4;
226 
227     x = v & mask!0x3333_3333_3333_3333L;
228     if (x)
229         v = x;
230     else
231         ret += inc!2;
232 
233     x = v & mask!0x5555_5555_5555_5555L;
234     if (!x)
235         ret += inc!1;
236 
237     return ret;
238 }
239 
240 unittest
241 {
242     assert(softBsf!uint(0u) == -1);
243     assert(softBsr!uint(0u) == -1);
244     assert(softBsf!ulong(0uL) == -1);
245     assert(softBsr!ulong(0uL) == -1);
246 
247     assert(softBsf!uint(0x0031_A000) == 13);
248     assert(softBsr!uint(0x0031_A000) == 21);
249     assert(softBsf!ulong(0x0000_0001_8000_0000L) == 31);
250     assert(softBsr!ulong(0x0000_0001_8000_0000L) == 32);
251 
252     foreach (b; 0 .. 64)
253     {
254         if (b < 32)
255         {
256             assert(softBsf!uint(1u << b) == b);
257             assert(softBsr!uint(1u << b) == b);
258         }
259 
260         assert(softBsf!ulong(1uL << b) == b);
261         assert(softBsr!ulong(1uL << b) == b);
262     }
263 }
264 
265 /**
266  * Tests the bit.
267  * (No longer an intrisic - the compiler recognizes the patterns
268  * in the body.)
269  */
bt(in size_t * p,size_t bitnum)270 int bt(in size_t* p, size_t bitnum) pure @system
271 {
272     static if (size_t.sizeof == 8)
273         return ((p[bitnum >> 6] & (1L << (bitnum & 63)))) != 0;
274     else static if (size_t.sizeof == 4)
275         return ((p[bitnum >> 5] & (1  << (bitnum & 31)))) != 0;
276     else
277         static assert(0);
278 }
279 ///
280 @system pure unittest
281 {
282     size_t[2] array;
283 
284     array[0] = 2;
285     array[1] = 0x100;
286 
287     assert(bt(array.ptr, 1));
288     assert(array[0] == 2);
289     assert(array[1] == 0x100);
290 }
291 
292 /**
293  * Tests and complements the bit.
294  */
295 int btc(size_t* p, size_t bitnum) pure @system;
296 
297 
298 /**
299  * Tests and resets (sets to 0) the bit.
300  */
301 int btr(size_t* p, size_t bitnum) pure @system;
302 
303 
304 /**
305  * Tests and sets the bit.
306  * Params:
307  * p = a non-NULL pointer to an array of size_ts.
308  * bitnum = a bit number, starting with bit 0 of p[0],
309  * and progressing. It addresses bits like the expression:
310 ---
311 p[index / (size_t.sizeof*8)] & (1 << (index & ((size_t.sizeof*8) - 1)))
312 ---
313  * Returns:
314  *      A non-zero value if the bit was set, and a zero
315  *      if it was clear.
316  */
317 int bts(size_t* p, size_t bitnum) pure @system;
318 
319 ///
320 @system pure unittest
321 {
322     size_t[2] array;
323 
324     array[0] = 2;
325     array[1] = 0x100;
326 
327     assert(btc(array.ptr, 35) == 0);
328     if (size_t.sizeof == 8)
329     {
330         assert(array[0] == 0x8_0000_0002);
331         assert(array[1] == 0x100);
332     }
333     else
334     {
335         assert(array[0] == 2);
336         assert(array[1] == 0x108);
337     }
338 
339     assert(btc(array.ptr, 35));
340     assert(array[0] == 2);
341     assert(array[1] == 0x100);
342 
343     assert(bts(array.ptr, 35) == 0);
344     if (size_t.sizeof == 8)
345     {
346         assert(array[0] == 0x8_0000_0002);
347         assert(array[1] == 0x100);
348     }
349     else
350     {
351         assert(array[0] == 2);
352         assert(array[1] == 0x108);
353     }
354 
355     assert(btr(array.ptr, 35));
356     assert(array[0] == 2);
357     assert(array[1] == 0x100);
358 }
359 
360 /**
361  * Range over bit set. Each element is the bit number that is set.
362  *
363  * This is more efficient than testing each bit in a sparsely populated bit
364  * set. Note that the first bit in the bit set would be bit 0.
365  */
366 struct BitRange
367 {
368     /// Number of bits in each size_t
369     enum bitsPerWord = size_t.sizeof * 8;
370 
371     private
372     {
373         const(size_t)*bits; // points at next word of bits to check
374         size_t cur; // needed to allow searching bits using bsf
375         size_t idx; // index of current set bit
376         size_t len; // number of bits in the bit set.
377     }
378     @nogc nothrow pure:
379 
380     /**
381      * Construct a BitRange.
382      *
383      * Params:
384      *   bitarr = The array of bits to iterate over
385      *   numBits = The total number of valid bits in the given bit array
386      */
thisBitRange387     this(const(size_t)* bitarr, size_t numBits) @system
388     {
389         bits = bitarr;
390         len = numBits;
391         if (len)
392         {
393             // prime the first bit
394             cur = *bits++ ^ 1;
395             popFront();
396         }
397     }
398 
399     /// Range functions
frontBitRange400     size_t front()
401     {
402         assert(!empty);
403         return idx;
404     }
405 
406     /// ditto
emptyBitRange407     bool empty() const
408     {
409         return idx >= len;
410     }
411 
412     /// ditto
popFrontBitRange413     void popFront() @system
414     {
415         // clear the current bit
416         auto curbit = idx % bitsPerWord;
417         cur ^= size_t(1) << curbit;
418         if (!cur)
419         {
420             // find next size_t with set bit
421             idx -= curbit;
422             while (!cur)
423             {
424                 if ((idx += bitsPerWord) >= len)
425                     // now empty
426                     return;
427                 cur = *bits++;
428             }
429             idx += bsf(cur);
430         }
431         else
432         {
433             idx += bsf(cur) - curbit;
434         }
435     }
436 }
437 
438 ///
439 @system unittest
440 {
441     import core.stdc.stdlib : malloc, free;
442     import core.stdc.string : memset;
443 
444     // initialize a bit array
445     enum nBytes = (100 + BitRange.bitsPerWord - 1) / 8;
446     size_t *bitArr = cast(size_t *)malloc(nBytes);
447     scope(exit) free(bitArr);
448     memset(bitArr, 0, nBytes);
449 
450     // set some bits
451     bts(bitArr, 48);
452     bts(bitArr, 24);
453     bts(bitArr, 95);
454     bts(bitArr, 78);
455 
456     enum sum = 48 + 24 + 95 + 78;
457 
458     // iterate
459     size_t testSum;
460     size_t nBits;
461     foreach (b; BitRange(bitArr, 100))
462     {
463         testSum += b;
464         ++nBits;
465     }
466 
467     assert(testSum == sum);
468     assert(nBits == 4);
469 }
470 
471 @system unittest
472 {
testIt(size_t numBits,size_t[]bitsToTest...)473     void testIt(size_t numBits, size_t[] bitsToTest...)
474     {
475         import core.stdc.stdlib : malloc, free;
476         import core.stdc.string : memset;
477         immutable numBytes = (numBits + size_t.sizeof * 8 - 1) / 8;
478         size_t* bitArr = cast(size_t *)malloc(numBytes);
479         scope(exit) free(bitArr);
480         memset(bitArr, 0, numBytes);
481         foreach (b; bitsToTest)
482             bts(bitArr, b);
483         auto br = BitRange(bitArr, numBits);
484         foreach (b; bitsToTest)
485         {
486             assert(!br.empty);
487             assert(b == br.front);
488             br.popFront();
489         }
490         assert(br.empty);
491     }
492 
493     testIt(100, 0, 1, 31, 63, 85);
494     testIt(100, 6, 45, 89, 92, 99);
495 }
496 
497 /**
498  * Swaps bytes in a 4 byte uint end-to-end, i.e. byte 0 becomes
499  * byte 3, byte 1 becomes byte 2, byte 2 becomes byte 1, byte 3
500  * becomes byte 0.
501  */
502 uint bswap(uint v) pure;
503 
504 /**
505  * Swaps bytes in an 8 byte ulong end-to-end, i.e. byte 0 becomes
506  * byte 7, byte 1 becomes byte 6, etc.
507  */
bswap(ulong v)508 ulong bswap(ulong v) pure
509 {
510     auto sv = Split64(v);
511 
512     const temp = sv.lo;
513     sv.lo = bswap(sv.hi);
514     sv.hi = bswap(temp);
515 
516     return (cast(ulong) sv.hi << 32) | sv.lo;
517 }
518 
version(AnyX86)519 version (DigitalMars) version (AnyX86) @system // not pure
520 {
521     /**
522      * Reads I/O port at port_address.
523      */
524     ubyte inp(uint port_address);
525 
526 
527     /**
528      * ditto
529      */
530     ushort inpw(uint port_address);
531 
532 
533     /**
534      * ditto
535      */
536     uint inpl(uint port_address);
537 
538 
539     /**
540      * Writes and returns value to I/O port at port_address.
541      */
542     ubyte outp(uint port_address, ubyte value);
543 
544 
545     /**
546      * ditto
547      */
548     ushort outpw(uint port_address, ushort value);
549 
550 
551     /**
552      * ditto
553      */
554     uint outpl(uint port_address, uint value);
555 }
556 
557 
558 /**
559  *  Calculates the number of set bits in an integer.
560  */
popcnt(uint x)561 int popcnt(uint x) pure
562 {
563     // Select the fastest method depending on the compiler and CPU architecture
564     version (DigitalMars)
565     {
566         static if (is(typeof(_popcnt(uint.max))))
567         {
568             import core.cpuid;
569             if (!__ctfe && hasPopcnt)
570                 return _popcnt(x);
571         }
572     }
573 
574     return softPopcnt!uint(x);
575 }
576 
577 unittest
578 {
579     assert( popcnt( 0 ) == 0 );
580     assert( popcnt( 7 ) == 3 );
581     assert( popcnt( 0xAA )== 4 );
582     assert( popcnt( 0x8421_1248 ) == 8 );
583     assert( popcnt( 0xFFFF_FFFF ) == 32 );
584     assert( popcnt( 0xCCCC_CCCC ) == 16 );
585     assert( popcnt( 0x7777_7777 ) == 24 );
586 
587     // Make sure popcnt() is available at CTFE
588     enum test_ctfe = popcnt(uint.max);
589     assert(test_ctfe == 32);
590 }
591 
592 /// ditto
popcnt(ulong x)593 int popcnt(ulong x) pure
594 {
595     // Select the fastest method depending on the compiler and CPU architecture
596     import core.cpuid;
597 
598     static if (size_t.sizeof == uint.sizeof)
599     {
600         const sx = Split64(x);
601         version (DigitalMars)
602         {
603             static if (is(typeof(_popcnt(uint.max))))
604             {
605                 if (!__ctfe && hasPopcnt)
606                     return _popcnt(sx.lo) + _popcnt(sx.hi);
607             }
608         }
609 
610         return softPopcnt!uint(sx.lo) + softPopcnt!uint(sx.hi);
611     }
612     else static if (size_t.sizeof == ulong.sizeof)
613     {
614         version (DigitalMars)
615         {
616             static if (is(typeof(_popcnt(ulong.max))))
617             {
618                 if (!__ctfe && hasPopcnt)
619                     return _popcnt(x);
620             }
621         }
622 
623         return softPopcnt!ulong(x);
624     }
625     else
626         static assert(false);
627 }
628 
629 unittest
630 {
631     assert(popcnt(0uL) == 0);
632     assert(popcnt(1uL) == 1);
633     assert(popcnt((1uL << 32) - 1) == 32);
634     assert(popcnt(0x48_65_6C_6C_6F_3F_21_00uL) == 28);
635     assert(popcnt(ulong.max) == 64);
636 
637     // Make sure popcnt() is available at CTFE
638     enum test_ctfe = popcnt(ulong.max);
639     assert(test_ctfe == 64);
640 }
641 
642 private int softPopcnt(N)(N x) pure
643     if (is(N == uint) || is(N == ulong))
644 {
645     // Avoid branches, and the potential for cache misses which
646     // could be incurred with a table lookup.
647 
648     // We need to mask alternate bits to prevent the
649     // sum from overflowing.
650     // add neighbouring bits. Each bit is 0 or 1.
651     enum mask1 = cast(N) 0x5555_5555_5555_5555L;
652     x = x - ((x>>1) & mask1);
653     // now each two bits of x is a number 00,01 or 10.
654     // now add neighbouring pairs
655     enum mask2a = cast(N) 0xCCCC_CCCC_CCCC_CCCCL;
656     enum mask2b = cast(N) 0x3333_3333_3333_3333L;
657     x = ((x & mask2a)>>2) + (x & mask2b);
658     // now each nibble holds 0000-0100. Adding them won't
659     // overflow any more, so we don't need to mask any more
660 
661     enum mask4 = cast(N) 0x0F0F_0F0F_0F0F_0F0FL;
662     x = (x + (x >> 4)) & mask4;
663 
664     enum shiftbits = is(N == uint)? 24 : 56;
665     enum maskMul = cast(N) 0x0101_0101_0101_0101L;
666     x = (x * maskMul) >> shiftbits;
667 
668     return cast(int) x;
669 }
670 
version(AnyX86)671 version (DigitalMars) version (AnyX86)
672 {
673     /**
674      * Calculates the number of set bits in an integer
675      * using the X86 SSE4 POPCNT instruction.
676      * POPCNT is not available on all X86 CPUs.
677      */
678     ushort _popcnt( ushort x ) pure;
679     /// ditto
680     int _popcnt( uint x ) pure;
681     version (X86_64)
682     {
683         /// ditto
684         int _popcnt( ulong x ) pure;
685     }
686 
687     unittest
688     {
689         // Not everyone has SSE4 instructions
690         import core.cpuid;
691         if (!hasPopcnt)
692             return;
693 
694         static int popcnt_x(ulong u) nothrow @nogc
695         {
696             int c;
697             while (u)
698             {
699                 c += u & 1;
700                 u >>= 1;
701             }
702             return c;
703         }
704 
705         for (uint u = 0; u < 0x1_0000; ++u)
706         {
707             //writefln("%x %x %x", u,   _popcnt(cast(ushort)u), popcnt_x(cast(ushort)u));
708             assert(_popcnt(cast(ushort)u) == popcnt_x(cast(ushort)u));
709 
710             assert(_popcnt(cast(uint)u) == popcnt_x(cast(uint)u));
711             uint ui = u * 0x3_0001;
712             assert(_popcnt(ui) == popcnt_x(ui));
713 
714             version (X86_64)
715             {
716                 assert(_popcnt(cast(ulong)u) == popcnt_x(cast(ulong)u));
717                 ulong ul = u * 0x3_0003_0001;
718                 assert(_popcnt(ul) == popcnt_x(ul));
719             }
720         }
721     }
722 }
723 
724 
725 /*************************************
726  * Read/write value from/to the memory location indicated by ptr.
727  *
728  * These functions are recognized by the compiler, and calls to them are guaranteed
729  * to not be removed (as dead assignment elimination or presumed to have no effect)
730  * or reordered in the same thread.
731  *
732  * These reordering guarantees are only made with regards to other
733  * operations done through these functions; the compiler is free to reorder regular
734  * loads/stores with regards to loads/stores done through these functions.
735  *
736  * This is useful when dealing with memory-mapped I/O (MMIO) where a store can
737  * have an effect other than just writing a value, or where sequential loads
738  * with no intervening stores can retrieve
739  * different values from the same location due to external stores to the location.
740  *
741  * These functions will, when possible, do the load/store as a single operation. In
742  * general, this is possible when the size of the operation is less than or equal to
743  * $(D (void*).sizeof), although some targets may support larger operations. If the
744  * load/store cannot be done as a single operation, multiple smaller operations will be used.
745  *
746  * These are not to be conflated with atomic operations. They do not guarantee any
747  * atomicity. This may be provided by coincidence as a result of the instructions
748  * used on the target, but this should not be relied on for portable programs.
749  * Further, no memory fences are implied by these functions.
750  * They should not be used for communication between threads.
751  * They may be used to guarantee a write or read cycle occurs at a specified address.
752  */
753 
754 ubyte  volatileLoad(ubyte * ptr);
755 ushort volatileLoad(ushort* ptr);  /// ditto
756 uint   volatileLoad(uint  * ptr);  /// ditto
757 ulong  volatileLoad(ulong * ptr);  /// ditto
758 
759 void volatileStore(ubyte * ptr, ubyte  value);   /// ditto
760 void volatileStore(ushort* ptr, ushort value);   /// ditto
761 void volatileStore(uint  * ptr, uint   value);   /// ditto
762 void volatileStore(ulong * ptr, ulong  value);   /// ditto
763 
764 @system unittest
765 {
766     alias TT(T...) = T;
767 
768     foreach (T; TT!(ubyte, ushort, uint, ulong))
769     {
770         T u;
771         T* p = &u;
772         volatileStore(p, 1);
773         T r = volatileLoad(p);
774         assert(r == u);
775     }
776 }
777 
778 
779 /**
780  * Reverses the order of bits in a 32-bit integer.
781  */
pragma(inline,true)782 pragma(inline, true)
783 uint bitswap( uint x ) pure
784 {
785     if (!__ctfe)
786     {
787         static if (is(typeof(asmBitswap32(x))))
788             return asmBitswap32(x);
789     }
790 
791     return softBitswap!uint(x);
792 }
793 
794 unittest
795 {
test(alias impl)796     static void test(alias impl)()
797     {
798         assert (impl( 0x8000_0100 ) == 0x0080_0001);
799         foreach (i; 0 .. 32)
800             assert (impl(1 << i) == 1 << 32 - i - 1);
801     }
802 
803     test!(bitswap)();
804     test!(softBitswap!uint)();
805     static if (is(typeof(asmBitswap32(0u))))
806         test!(asmBitswap32)();
807 
808     // Make sure bitswap() is available at CTFE
809     enum test_ctfe = bitswap(1U);
810     assert(test_ctfe == (1U << 31));
811 }
812 
813 /**
814  * Reverses the order of bits in a 64-bit integer.
815  */
pragma(inline,true)816 pragma(inline, true)
817 ulong bitswap ( ulong x ) pure
818 {
819     if (!__ctfe)
820     {
821         static if (is(typeof(asmBitswap64(x))))
822             return asmBitswap64(x);
823     }
824 
825     return softBitswap!ulong(x);
826 }
827 
828 unittest
829 {
test(alias impl)830     static void test(alias impl)()
831     {
832         assert (impl( 0b1000000000000000000000010000000000000000100000000000000000000001)
833             == 0b1000000000000000000000010000000000000000100000000000000000000001);
834         assert (impl( 0b1110000000000000000000010000000000000000100000000000000000000001)
835             == 0b1000000000000000000000010000000000000000100000000000000000000111);
836         foreach (i; 0 .. 64)
837             assert (impl(1UL << i) == 1UL << 64 - i - 1);
838     }
839 
840     test!(bitswap)();
841     test!(softBitswap!ulong)();
842     static if (is(typeof(asmBitswap64(0uL))))
843         test!(asmBitswap64)();
844 
845     // Make sure bitswap() is available at CTFE
846     enum test_ctfe = bitswap(1UL);
847     assert(test_ctfe == (1UL << 63));
848 }
849 
850 private N softBitswap(N)(N x) pure
851     if (is(N == uint) || is(N == ulong))
852 {
853     // swap 1-bit pairs:
854     enum mask1 = cast(N) 0x5555_5555_5555_5555L;
855     x = ((x >> 1) & mask1) | ((x & mask1) << 1);
856     // swap 2-bit pairs:
857     enum mask2 = cast(N) 0x3333_3333_3333_3333L;
858     x = ((x >> 2) & mask2) | ((x & mask2) << 2);
859     // swap 4-bit pairs:
860     enum mask4 = cast(N) 0x0F0F_0F0F_0F0F_0F0FL;
861     x = ((x >> 4) & mask4) | ((x & mask4) << 4);
862 
863     // reverse the order of all bytes:
864     x = bswap(x);
865 
866     return x;
867 }
868 
version(AsmX86)869 version (AsmX86)
870 {
871     private uint asmBitswap32(uint x) @trusted pure
872     {
873         asm pure nothrow @nogc { naked; }
874 
875         version (D_InlineAsm_X86_64)
876         {
877             version (Win64)
878                 asm pure nothrow @nogc { mov EAX, ECX; }
879             else
880                 asm pure nothrow @nogc { mov EAX, EDI; }
881         }
882 
883         asm pure nothrow @nogc
884         {
885             // Author: Tiago Gasiba.
886             mov EDX, EAX;
887             shr EAX, 1;
888             and EDX, 0x5555_5555;
889             and EAX, 0x5555_5555;
890             shl EDX, 1;
891             or  EAX, EDX;
892             mov EDX, EAX;
893             shr EAX, 2;
894             and EDX, 0x3333_3333;
895             and EAX, 0x3333_3333;
896             shl EDX, 2;
897             or  EAX, EDX;
898             mov EDX, EAX;
899             shr EAX, 4;
900             and EDX, 0x0f0f_0f0f;
901             and EAX, 0x0f0f_0f0f;
902             shl EDX, 4;
903             or  EAX, EDX;
904             bswap EAX;
905             ret;
906         }
907     }
908 }
909 
version(D_InlineAsm_X86_64)910 version (D_InlineAsm_X86_64)
911 {
912     private ulong asmBitswap64(ulong x) @trusted pure
913     {
914         asm pure nothrow @nogc { naked; }
915 
916         version (Win64)
917             asm pure nothrow @nogc { mov RAX, RCX; }
918         else
919             asm pure nothrow @nogc { mov RAX, RDI; }
920 
921         asm pure nothrow @nogc
922         {
923             // Author: Tiago Gasiba.
924             mov RDX, RAX;
925             shr RAX, 1;
926             mov RCX, 0x5555_5555_5555_5555L;
927             and RDX, RCX;
928             and RAX, RCX;
929             shl RDX, 1;
930             or  RAX, RDX;
931 
932             mov RDX, RAX;
933             shr RAX, 2;
934             mov RCX, 0x3333_3333_3333_3333L;
935             and RDX, RCX;
936             and RAX, RCX;
937             shl RDX, 2;
938             or  RAX, RDX;
939 
940             mov RDX, RAX;
941             shr RAX, 4;
942             mov RCX, 0x0f0f_0f0f_0f0f_0f0fL;
943             and RDX, RCX;
944             and RAX, RCX;
945             shl RDX, 4;
946             or  RAX, RDX;
947             bswap RAX;
948             ret;
949         }
950     }
951 }
952 
953 /**
954  *  Bitwise rotate `value` left (`rol`) or right (`ror`) by
955  *  `count` bit positions.
956  */
957 pure T rol(T)(in T value, in uint count)
958     if (__traits(isIntegral, T) && __traits(isUnsigned, T))
959 {
960     assert(count < 8 * T.sizeof);
961     return cast(T) ((value << count) | (value >> (-count & (T.sizeof * 8 - 1))));
962 }
963 /// ditto
964 pure T ror(T)(in T value, in uint count)
965     if (__traits(isIntegral, T) && __traits(isUnsigned, T))
966 {
967     assert(count < 8 * T.sizeof);
968     return cast(T) ((value >> count) | (value << (-count & (T.sizeof * 8 - 1))));
969 }
970 /// ditto
971 pure T rol(uint count, T)(in T value)
972     if (__traits(isIntegral, T) && __traits(isUnsigned, T))
973 {
974     static assert(count < 8 * T.sizeof);
975     return cast(T) ((value << count) | (value >> (-count & (T.sizeof * 8 - 1))));
976 }
977 /// ditto
978 pure T ror(uint count, T)(in T value)
979     if (__traits(isIntegral, T) && __traits(isUnsigned, T))
980 {
981     static assert(count < 8 * T.sizeof);
982     return cast(T) ((value >> count) | (value << (-count & (T.sizeof * 8 - 1))));
983 }
984 
985 ///
986 unittest
987 {
988     ubyte a = 0b10101010U;
989     ulong b = ulong.max;
990 
991     assert(rol(a, 1) == 0b01010101);
992     assert(ror(a, 1) == 0b01010101);
993     assert(rol(a, 3) == 0b01010101);
994     assert(ror(a, 3) == 0b01010101);
995 
996     assert(rol(a, 0) == a);
997     assert(ror(a, 0) == a);
998 
999     assert(rol(b, 63) == ulong.max);
1000     assert(ror(b, 63) == ulong.max);
1001 
1002     assert(rol!3(a) == 0b01010101);
1003     assert(ror!3(a) == 0b01010101);
1004 }
1005