1 /**
2 * This module contains a collection of bit-level operations.
3 *
4 * Copyright: Copyright Don Clugston 2005 - 2013.
5 * License: $(LINK2 http://www.boost.org/LICENSE_1_0.txt, Boost License 1.0)
6 * Authors: Don Clugston, Sean Kelly, Walter Bright, Alex Rønne Petersen, Thomas Stuart Bockman
7 * Source: $(DRUNTIMESRC core/_bitop.d)
8 */
9
10 module core.bitop;
11
12 nothrow:
13 @safe:
14 @nogc:
15
16 version (D_InlineAsm_X86_64)
17 version = AsmX86;
18 else version (D_InlineAsm_X86)
19 version = AsmX86;
20
21 version (X86_64)
22 version = AnyX86;
23 else version (X86)
24 version = AnyX86;
25
26 // Use to implement 64-bit bitops on 32-bit arch.
27 private union Split64
28 {
29 ulong u64;
30 struct
31 {
versionSplit64::__anon6c4ff899010832 version (LittleEndian)
33 {
34 uint lo;
35 uint hi;
36 }
37 else
38 {
39 uint hi;
40 uint lo;
41 }
42 }
43
pragma(inline,true)44 pragma(inline, true)
45 this(ulong u64) @safe pure nothrow @nogc
46 {
47 if (__ctfe)
48 {
49 lo = cast(uint) u64;
50 hi = cast(uint) (u64 >>> 32);
51 }
52 else
53 this.u64 = u64;
54 }
55 }
56
57 unittest
58 {
59 const rt = Split64(1);
60 assert((rt.lo == 1) && (rt.hi == 0));
61
62 enum ct = Split64(1);
63 assert((ct.lo == rt.lo) && (ct.hi == rt.hi));
64 }
65
66 /**
67 * Scans the bits in v starting with bit 0, looking
68 * for the first set bit.
69 * Returns:
70 * The bit number of the first bit set.
71 * The return value is undefined if v is zero.
72 */
bsf(uint v)73 int bsf(uint v) pure
74 {
75 pragma(inline, false); // so intrinsic detection will work
76 return softBsf!uint(v);
77 }
78
79 /// ditto
bsf(ulong v)80 int bsf(ulong v) pure
81 {
82 static if (size_t.sizeof == ulong.sizeof) // 64 bit code gen
83 {
84 pragma(inline, false); // so intrinsic detection will work
85 return softBsf!ulong(v);
86 }
87 else
88 {
89 /* intrinsic not available for 32 bit code,
90 * make do with 32 bit bsf
91 */
92 const sv = Split64(v);
93 return (sv.lo == 0)?
94 bsf(sv.hi) + 32 :
95 bsf(sv.lo);
96 }
97 }
98
99 ///
100 unittest
101 {
102 assert(bsf(0x21) == 0);
103 assert(bsf(ulong.max << 39) == 39);
104 }
105
106 unittest
107 {
108 // Make sure bsf() is available at CTFE
109 enum test_ctfe = bsf(ulong.max);
110 assert(test_ctfe == 0);
111 }
112
113 /**
114 * Scans the bits in v from the most significant bit
115 * to the least significant bit, looking
116 * for the first set bit.
117 * Returns:
118 * The bit number of the first bit set.
119 * The return value is undefined if v is zero.
120 */
bsr(uint v)121 int bsr(uint v) pure
122 {
123 pragma(inline, false); // so intrinsic detection will work
124 return softBsr!uint(v);
125 }
126
127 /// ditto
bsr(ulong v)128 int bsr(ulong v) pure
129 {
130 static if (size_t.sizeof == ulong.sizeof) // 64 bit code gen
131 {
132 pragma(inline, false); // so intrinsic detection will work
133 return softBsr!ulong(v);
134 }
135 else
136 {
137 /* intrinsic not available for 32 bit code,
138 * make do with 32 bit bsr
139 */
140 const sv = Split64(v);
141 return (sv.hi == 0)?
142 bsr(sv.lo) :
143 bsr(sv.hi) + 32;
144 }
145 }
146
147 ///
148 unittest
149 {
150 assert(bsr(0x21) == 5);
151 assert(bsr((ulong.max >> 15) - 1) == 48);
152 }
153
154 unittest
155 {
156 // Make sure bsr() is available at CTFE
157 enum test_ctfe = bsr(ulong.max);
158 assert(test_ctfe == 63);
159 }
160
161 private alias softBsf(N) = softScan!(N, true);
162 private alias softBsr(N) = softScan!(N, false);
163
164 /* Shared software fallback implementation for bit scan foward and reverse.
165
166 If forward is true, bsf is computed (the index of the first set bit).
167 If forward is false, bsr is computed (the index of the last set bit).
168
169 -1 is returned if no bits are set (v == 0).
170 */
171 private int softScan(N, bool forward)(N v) pure
172 if (is(N == uint) || is(N == ulong))
173 {
174 // bsf() and bsr() are officially undefined for v == 0.
175 if (!v)
176 return -1;
177
178 // This is essentially an unrolled binary search:
179 enum mask(ulong lo) = forward ? cast(N) lo : cast(N)~lo;
180 enum inc(int up) = forward ? up : -up;
181
182 N x;
183 int ret;
184 static if (is(N == ulong))
185 {
186 x = v & mask!0x0000_0000_FFFF_FFFFL;
187 if (x)
188 {
189 v = x;
190 ret = forward ? 0 : 63;
191 }
192 else
193 ret = forward ? 32 : 31;
194
195 x = v & mask!0x0000_FFFF_0000_FFFFL;
196 if (x)
197 v = x;
198 else
199 ret += inc!16;
200 }
201 else static if (is(N == uint))
202 {
203 x = v & mask!0x0000_FFFF;
204 if (x)
205 {
206 v = x;
207 ret = forward ? 0 : 31;
208 }
209 else
210 ret = forward ? 16 : 15;
211 }
212 else
213 static assert(false);
214
215 x = v & mask!0x00FF_00FF_00FF_00FFL;
216 if (x)
217 v = x;
218 else
219 ret += inc!8;
220
221 x = v & mask!0x0F0F_0F0F_0F0F_0F0FL;
222 if (x)
223 v = x;
224 else
225 ret += inc!4;
226
227 x = v & mask!0x3333_3333_3333_3333L;
228 if (x)
229 v = x;
230 else
231 ret += inc!2;
232
233 x = v & mask!0x5555_5555_5555_5555L;
234 if (!x)
235 ret += inc!1;
236
237 return ret;
238 }
239
240 unittest
241 {
242 assert(softBsf!uint(0u) == -1);
243 assert(softBsr!uint(0u) == -1);
244 assert(softBsf!ulong(0uL) == -1);
245 assert(softBsr!ulong(0uL) == -1);
246
247 assert(softBsf!uint(0x0031_A000) == 13);
248 assert(softBsr!uint(0x0031_A000) == 21);
249 assert(softBsf!ulong(0x0000_0001_8000_0000L) == 31);
250 assert(softBsr!ulong(0x0000_0001_8000_0000L) == 32);
251
252 foreach (b; 0 .. 64)
253 {
254 if (b < 32)
255 {
256 assert(softBsf!uint(1u << b) == b);
257 assert(softBsr!uint(1u << b) == b);
258 }
259
260 assert(softBsf!ulong(1uL << b) == b);
261 assert(softBsr!ulong(1uL << b) == b);
262 }
263 }
264
265 /**
266 * Tests the bit.
267 * (No longer an intrisic - the compiler recognizes the patterns
268 * in the body.)
269 */
bt(in size_t * p,size_t bitnum)270 int bt(in size_t* p, size_t bitnum) pure @system
271 {
272 static if (size_t.sizeof == 8)
273 return ((p[bitnum >> 6] & (1L << (bitnum & 63)))) != 0;
274 else static if (size_t.sizeof == 4)
275 return ((p[bitnum >> 5] & (1 << (bitnum & 31)))) != 0;
276 else
277 static assert(0);
278 }
279 ///
280 @system pure unittest
281 {
282 size_t[2] array;
283
284 array[0] = 2;
285 array[1] = 0x100;
286
287 assert(bt(array.ptr, 1));
288 assert(array[0] == 2);
289 assert(array[1] == 0x100);
290 }
291
292 /**
293 * Tests and complements the bit.
294 */
295 int btc(size_t* p, size_t bitnum) pure @system;
296
297
298 /**
299 * Tests and resets (sets to 0) the bit.
300 */
301 int btr(size_t* p, size_t bitnum) pure @system;
302
303
304 /**
305 * Tests and sets the bit.
306 * Params:
307 * p = a non-NULL pointer to an array of size_ts.
308 * bitnum = a bit number, starting with bit 0 of p[0],
309 * and progressing. It addresses bits like the expression:
310 ---
311 p[index / (size_t.sizeof*8)] & (1 << (index & ((size_t.sizeof*8) - 1)))
312 ---
313 * Returns:
314 * A non-zero value if the bit was set, and a zero
315 * if it was clear.
316 */
317 int bts(size_t* p, size_t bitnum) pure @system;
318
319 ///
320 @system pure unittest
321 {
322 size_t[2] array;
323
324 array[0] = 2;
325 array[1] = 0x100;
326
327 assert(btc(array.ptr, 35) == 0);
328 if (size_t.sizeof == 8)
329 {
330 assert(array[0] == 0x8_0000_0002);
331 assert(array[1] == 0x100);
332 }
333 else
334 {
335 assert(array[0] == 2);
336 assert(array[1] == 0x108);
337 }
338
339 assert(btc(array.ptr, 35));
340 assert(array[0] == 2);
341 assert(array[1] == 0x100);
342
343 assert(bts(array.ptr, 35) == 0);
344 if (size_t.sizeof == 8)
345 {
346 assert(array[0] == 0x8_0000_0002);
347 assert(array[1] == 0x100);
348 }
349 else
350 {
351 assert(array[0] == 2);
352 assert(array[1] == 0x108);
353 }
354
355 assert(btr(array.ptr, 35));
356 assert(array[0] == 2);
357 assert(array[1] == 0x100);
358 }
359
360 /**
361 * Range over bit set. Each element is the bit number that is set.
362 *
363 * This is more efficient than testing each bit in a sparsely populated bit
364 * set. Note that the first bit in the bit set would be bit 0.
365 */
366 struct BitRange
367 {
368 /// Number of bits in each size_t
369 enum bitsPerWord = size_t.sizeof * 8;
370
371 private
372 {
373 const(size_t)*bits; // points at next word of bits to check
374 size_t cur; // needed to allow searching bits using bsf
375 size_t idx; // index of current set bit
376 size_t len; // number of bits in the bit set.
377 }
378 @nogc nothrow pure:
379
380 /**
381 * Construct a BitRange.
382 *
383 * Params:
384 * bitarr = The array of bits to iterate over
385 * numBits = The total number of valid bits in the given bit array
386 */
thisBitRange387 this(const(size_t)* bitarr, size_t numBits) @system
388 {
389 bits = bitarr;
390 len = numBits;
391 if (len)
392 {
393 // prime the first bit
394 cur = *bits++ ^ 1;
395 popFront();
396 }
397 }
398
399 /// Range functions
frontBitRange400 size_t front()
401 {
402 assert(!empty);
403 return idx;
404 }
405
406 /// ditto
emptyBitRange407 bool empty() const
408 {
409 return idx >= len;
410 }
411
412 /// ditto
popFrontBitRange413 void popFront() @system
414 {
415 // clear the current bit
416 auto curbit = idx % bitsPerWord;
417 cur ^= size_t(1) << curbit;
418 if (!cur)
419 {
420 // find next size_t with set bit
421 idx -= curbit;
422 while (!cur)
423 {
424 if ((idx += bitsPerWord) >= len)
425 // now empty
426 return;
427 cur = *bits++;
428 }
429 idx += bsf(cur);
430 }
431 else
432 {
433 idx += bsf(cur) - curbit;
434 }
435 }
436 }
437
438 ///
439 @system unittest
440 {
441 import core.stdc.stdlib : malloc, free;
442 import core.stdc.string : memset;
443
444 // initialize a bit array
445 enum nBytes = (100 + BitRange.bitsPerWord - 1) / 8;
446 size_t *bitArr = cast(size_t *)malloc(nBytes);
447 scope(exit) free(bitArr);
448 memset(bitArr, 0, nBytes);
449
450 // set some bits
451 bts(bitArr, 48);
452 bts(bitArr, 24);
453 bts(bitArr, 95);
454 bts(bitArr, 78);
455
456 enum sum = 48 + 24 + 95 + 78;
457
458 // iterate
459 size_t testSum;
460 size_t nBits;
461 foreach (b; BitRange(bitArr, 100))
462 {
463 testSum += b;
464 ++nBits;
465 }
466
467 assert(testSum == sum);
468 assert(nBits == 4);
469 }
470
471 @system unittest
472 {
testIt(size_t numBits,size_t[]bitsToTest...)473 void testIt(size_t numBits, size_t[] bitsToTest...)
474 {
475 import core.stdc.stdlib : malloc, free;
476 import core.stdc.string : memset;
477 immutable numBytes = (numBits + size_t.sizeof * 8 - 1) / 8;
478 size_t* bitArr = cast(size_t *)malloc(numBytes);
479 scope(exit) free(bitArr);
480 memset(bitArr, 0, numBytes);
481 foreach (b; bitsToTest)
482 bts(bitArr, b);
483 auto br = BitRange(bitArr, numBits);
484 foreach (b; bitsToTest)
485 {
486 assert(!br.empty);
487 assert(b == br.front);
488 br.popFront();
489 }
490 assert(br.empty);
491 }
492
493 testIt(100, 0, 1, 31, 63, 85);
494 testIt(100, 6, 45, 89, 92, 99);
495 }
496
497 /**
498 * Swaps bytes in a 4 byte uint end-to-end, i.e. byte 0 becomes
499 * byte 3, byte 1 becomes byte 2, byte 2 becomes byte 1, byte 3
500 * becomes byte 0.
501 */
502 uint bswap(uint v) pure;
503
504 /**
505 * Swaps bytes in an 8 byte ulong end-to-end, i.e. byte 0 becomes
506 * byte 7, byte 1 becomes byte 6, etc.
507 */
bswap(ulong v)508 ulong bswap(ulong v) pure
509 {
510 auto sv = Split64(v);
511
512 const temp = sv.lo;
513 sv.lo = bswap(sv.hi);
514 sv.hi = bswap(temp);
515
516 return (cast(ulong) sv.hi << 32) | sv.lo;
517 }
518
version(AnyX86)519 version (DigitalMars) version (AnyX86) @system // not pure
520 {
521 /**
522 * Reads I/O port at port_address.
523 */
524 ubyte inp(uint port_address);
525
526
527 /**
528 * ditto
529 */
530 ushort inpw(uint port_address);
531
532
533 /**
534 * ditto
535 */
536 uint inpl(uint port_address);
537
538
539 /**
540 * Writes and returns value to I/O port at port_address.
541 */
542 ubyte outp(uint port_address, ubyte value);
543
544
545 /**
546 * ditto
547 */
548 ushort outpw(uint port_address, ushort value);
549
550
551 /**
552 * ditto
553 */
554 uint outpl(uint port_address, uint value);
555 }
556
557
558 /**
559 * Calculates the number of set bits in an integer.
560 */
popcnt(uint x)561 int popcnt(uint x) pure
562 {
563 // Select the fastest method depending on the compiler and CPU architecture
564 version (DigitalMars)
565 {
566 static if (is(typeof(_popcnt(uint.max))))
567 {
568 import core.cpuid;
569 if (!__ctfe && hasPopcnt)
570 return _popcnt(x);
571 }
572 }
573
574 return softPopcnt!uint(x);
575 }
576
577 unittest
578 {
579 assert( popcnt( 0 ) == 0 );
580 assert( popcnt( 7 ) == 3 );
581 assert( popcnt( 0xAA )== 4 );
582 assert( popcnt( 0x8421_1248 ) == 8 );
583 assert( popcnt( 0xFFFF_FFFF ) == 32 );
584 assert( popcnt( 0xCCCC_CCCC ) == 16 );
585 assert( popcnt( 0x7777_7777 ) == 24 );
586
587 // Make sure popcnt() is available at CTFE
588 enum test_ctfe = popcnt(uint.max);
589 assert(test_ctfe == 32);
590 }
591
592 /// ditto
popcnt(ulong x)593 int popcnt(ulong x) pure
594 {
595 // Select the fastest method depending on the compiler and CPU architecture
596 import core.cpuid;
597
598 static if (size_t.sizeof == uint.sizeof)
599 {
600 const sx = Split64(x);
601 version (DigitalMars)
602 {
603 static if (is(typeof(_popcnt(uint.max))))
604 {
605 if (!__ctfe && hasPopcnt)
606 return _popcnt(sx.lo) + _popcnt(sx.hi);
607 }
608 }
609
610 return softPopcnt!uint(sx.lo) + softPopcnt!uint(sx.hi);
611 }
612 else static if (size_t.sizeof == ulong.sizeof)
613 {
614 version (DigitalMars)
615 {
616 static if (is(typeof(_popcnt(ulong.max))))
617 {
618 if (!__ctfe && hasPopcnt)
619 return _popcnt(x);
620 }
621 }
622
623 return softPopcnt!ulong(x);
624 }
625 else
626 static assert(false);
627 }
628
629 unittest
630 {
631 assert(popcnt(0uL) == 0);
632 assert(popcnt(1uL) == 1);
633 assert(popcnt((1uL << 32) - 1) == 32);
634 assert(popcnt(0x48_65_6C_6C_6F_3F_21_00uL) == 28);
635 assert(popcnt(ulong.max) == 64);
636
637 // Make sure popcnt() is available at CTFE
638 enum test_ctfe = popcnt(ulong.max);
639 assert(test_ctfe == 64);
640 }
641
642 private int softPopcnt(N)(N x) pure
643 if (is(N == uint) || is(N == ulong))
644 {
645 // Avoid branches, and the potential for cache misses which
646 // could be incurred with a table lookup.
647
648 // We need to mask alternate bits to prevent the
649 // sum from overflowing.
650 // add neighbouring bits. Each bit is 0 or 1.
651 enum mask1 = cast(N) 0x5555_5555_5555_5555L;
652 x = x - ((x>>1) & mask1);
653 // now each two bits of x is a number 00,01 or 10.
654 // now add neighbouring pairs
655 enum mask2a = cast(N) 0xCCCC_CCCC_CCCC_CCCCL;
656 enum mask2b = cast(N) 0x3333_3333_3333_3333L;
657 x = ((x & mask2a)>>2) + (x & mask2b);
658 // now each nibble holds 0000-0100. Adding them won't
659 // overflow any more, so we don't need to mask any more
660
661 enum mask4 = cast(N) 0x0F0F_0F0F_0F0F_0F0FL;
662 x = (x + (x >> 4)) & mask4;
663
664 enum shiftbits = is(N == uint)? 24 : 56;
665 enum maskMul = cast(N) 0x0101_0101_0101_0101L;
666 x = (x * maskMul) >> shiftbits;
667
668 return cast(int) x;
669 }
670
version(AnyX86)671 version (DigitalMars) version (AnyX86)
672 {
673 /**
674 * Calculates the number of set bits in an integer
675 * using the X86 SSE4 POPCNT instruction.
676 * POPCNT is not available on all X86 CPUs.
677 */
678 ushort _popcnt( ushort x ) pure;
679 /// ditto
680 int _popcnt( uint x ) pure;
681 version (X86_64)
682 {
683 /// ditto
684 int _popcnt( ulong x ) pure;
685 }
686
687 unittest
688 {
689 // Not everyone has SSE4 instructions
690 import core.cpuid;
691 if (!hasPopcnt)
692 return;
693
694 static int popcnt_x(ulong u) nothrow @nogc
695 {
696 int c;
697 while (u)
698 {
699 c += u & 1;
700 u >>= 1;
701 }
702 return c;
703 }
704
705 for (uint u = 0; u < 0x1_0000; ++u)
706 {
707 //writefln("%x %x %x", u, _popcnt(cast(ushort)u), popcnt_x(cast(ushort)u));
708 assert(_popcnt(cast(ushort)u) == popcnt_x(cast(ushort)u));
709
710 assert(_popcnt(cast(uint)u) == popcnt_x(cast(uint)u));
711 uint ui = u * 0x3_0001;
712 assert(_popcnt(ui) == popcnt_x(ui));
713
714 version (X86_64)
715 {
716 assert(_popcnt(cast(ulong)u) == popcnt_x(cast(ulong)u));
717 ulong ul = u * 0x3_0003_0001;
718 assert(_popcnt(ul) == popcnt_x(ul));
719 }
720 }
721 }
722 }
723
724
725 /*************************************
726 * Read/write value from/to the memory location indicated by ptr.
727 *
728 * These functions are recognized by the compiler, and calls to them are guaranteed
729 * to not be removed (as dead assignment elimination or presumed to have no effect)
730 * or reordered in the same thread.
731 *
732 * These reordering guarantees are only made with regards to other
733 * operations done through these functions; the compiler is free to reorder regular
734 * loads/stores with regards to loads/stores done through these functions.
735 *
736 * This is useful when dealing with memory-mapped I/O (MMIO) where a store can
737 * have an effect other than just writing a value, or where sequential loads
738 * with no intervening stores can retrieve
739 * different values from the same location due to external stores to the location.
740 *
741 * These functions will, when possible, do the load/store as a single operation. In
742 * general, this is possible when the size of the operation is less than or equal to
743 * $(D (void*).sizeof), although some targets may support larger operations. If the
744 * load/store cannot be done as a single operation, multiple smaller operations will be used.
745 *
746 * These are not to be conflated with atomic operations. They do not guarantee any
747 * atomicity. This may be provided by coincidence as a result of the instructions
748 * used on the target, but this should not be relied on for portable programs.
749 * Further, no memory fences are implied by these functions.
750 * They should not be used for communication between threads.
751 * They may be used to guarantee a write or read cycle occurs at a specified address.
752 */
753
754 ubyte volatileLoad(ubyte * ptr);
755 ushort volatileLoad(ushort* ptr); /// ditto
756 uint volatileLoad(uint * ptr); /// ditto
757 ulong volatileLoad(ulong * ptr); /// ditto
758
759 void volatileStore(ubyte * ptr, ubyte value); /// ditto
760 void volatileStore(ushort* ptr, ushort value); /// ditto
761 void volatileStore(uint * ptr, uint value); /// ditto
762 void volatileStore(ulong * ptr, ulong value); /// ditto
763
764 @system unittest
765 {
766 alias TT(T...) = T;
767
768 foreach (T; TT!(ubyte, ushort, uint, ulong))
769 {
770 T u;
771 T* p = &u;
772 volatileStore(p, 1);
773 T r = volatileLoad(p);
774 assert(r == u);
775 }
776 }
777
778
779 /**
780 * Reverses the order of bits in a 32-bit integer.
781 */
pragma(inline,true)782 pragma(inline, true)
783 uint bitswap( uint x ) pure
784 {
785 if (!__ctfe)
786 {
787 static if (is(typeof(asmBitswap32(x))))
788 return asmBitswap32(x);
789 }
790
791 return softBitswap!uint(x);
792 }
793
794 unittest
795 {
test(alias impl)796 static void test(alias impl)()
797 {
798 assert (impl( 0x8000_0100 ) == 0x0080_0001);
799 foreach (i; 0 .. 32)
800 assert (impl(1 << i) == 1 << 32 - i - 1);
801 }
802
803 test!(bitswap)();
804 test!(softBitswap!uint)();
805 static if (is(typeof(asmBitswap32(0u))))
806 test!(asmBitswap32)();
807
808 // Make sure bitswap() is available at CTFE
809 enum test_ctfe = bitswap(1U);
810 assert(test_ctfe == (1U << 31));
811 }
812
813 /**
814 * Reverses the order of bits in a 64-bit integer.
815 */
pragma(inline,true)816 pragma(inline, true)
817 ulong bitswap ( ulong x ) pure
818 {
819 if (!__ctfe)
820 {
821 static if (is(typeof(asmBitswap64(x))))
822 return asmBitswap64(x);
823 }
824
825 return softBitswap!ulong(x);
826 }
827
828 unittest
829 {
test(alias impl)830 static void test(alias impl)()
831 {
832 assert (impl( 0b1000000000000000000000010000000000000000100000000000000000000001)
833 == 0b1000000000000000000000010000000000000000100000000000000000000001);
834 assert (impl( 0b1110000000000000000000010000000000000000100000000000000000000001)
835 == 0b1000000000000000000000010000000000000000100000000000000000000111);
836 foreach (i; 0 .. 64)
837 assert (impl(1UL << i) == 1UL << 64 - i - 1);
838 }
839
840 test!(bitswap)();
841 test!(softBitswap!ulong)();
842 static if (is(typeof(asmBitswap64(0uL))))
843 test!(asmBitswap64)();
844
845 // Make sure bitswap() is available at CTFE
846 enum test_ctfe = bitswap(1UL);
847 assert(test_ctfe == (1UL << 63));
848 }
849
850 private N softBitswap(N)(N x) pure
851 if (is(N == uint) || is(N == ulong))
852 {
853 // swap 1-bit pairs:
854 enum mask1 = cast(N) 0x5555_5555_5555_5555L;
855 x = ((x >> 1) & mask1) | ((x & mask1) << 1);
856 // swap 2-bit pairs:
857 enum mask2 = cast(N) 0x3333_3333_3333_3333L;
858 x = ((x >> 2) & mask2) | ((x & mask2) << 2);
859 // swap 4-bit pairs:
860 enum mask4 = cast(N) 0x0F0F_0F0F_0F0F_0F0FL;
861 x = ((x >> 4) & mask4) | ((x & mask4) << 4);
862
863 // reverse the order of all bytes:
864 x = bswap(x);
865
866 return x;
867 }
868
version(AsmX86)869 version (AsmX86)
870 {
871 private uint asmBitswap32(uint x) @trusted pure
872 {
873 asm pure nothrow @nogc { naked; }
874
875 version (D_InlineAsm_X86_64)
876 {
877 version (Win64)
878 asm pure nothrow @nogc { mov EAX, ECX; }
879 else
880 asm pure nothrow @nogc { mov EAX, EDI; }
881 }
882
883 asm pure nothrow @nogc
884 {
885 // Author: Tiago Gasiba.
886 mov EDX, EAX;
887 shr EAX, 1;
888 and EDX, 0x5555_5555;
889 and EAX, 0x5555_5555;
890 shl EDX, 1;
891 or EAX, EDX;
892 mov EDX, EAX;
893 shr EAX, 2;
894 and EDX, 0x3333_3333;
895 and EAX, 0x3333_3333;
896 shl EDX, 2;
897 or EAX, EDX;
898 mov EDX, EAX;
899 shr EAX, 4;
900 and EDX, 0x0f0f_0f0f;
901 and EAX, 0x0f0f_0f0f;
902 shl EDX, 4;
903 or EAX, EDX;
904 bswap EAX;
905 ret;
906 }
907 }
908 }
909
version(D_InlineAsm_X86_64)910 version (D_InlineAsm_X86_64)
911 {
912 private ulong asmBitswap64(ulong x) @trusted pure
913 {
914 asm pure nothrow @nogc { naked; }
915
916 version (Win64)
917 asm pure nothrow @nogc { mov RAX, RCX; }
918 else
919 asm pure nothrow @nogc { mov RAX, RDI; }
920
921 asm pure nothrow @nogc
922 {
923 // Author: Tiago Gasiba.
924 mov RDX, RAX;
925 shr RAX, 1;
926 mov RCX, 0x5555_5555_5555_5555L;
927 and RDX, RCX;
928 and RAX, RCX;
929 shl RDX, 1;
930 or RAX, RDX;
931
932 mov RDX, RAX;
933 shr RAX, 2;
934 mov RCX, 0x3333_3333_3333_3333L;
935 and RDX, RCX;
936 and RAX, RCX;
937 shl RDX, 2;
938 or RAX, RDX;
939
940 mov RDX, RAX;
941 shr RAX, 4;
942 mov RCX, 0x0f0f_0f0f_0f0f_0f0fL;
943 and RDX, RCX;
944 and RAX, RCX;
945 shl RDX, 4;
946 or RAX, RDX;
947 bswap RAX;
948 ret;
949 }
950 }
951 }
952
953 /**
954 * Bitwise rotate `value` left (`rol`) or right (`ror`) by
955 * `count` bit positions.
956 */
957 pure T rol(T)(in T value, in uint count)
958 if (__traits(isIntegral, T) && __traits(isUnsigned, T))
959 {
960 assert(count < 8 * T.sizeof);
961 return cast(T) ((value << count) | (value >> (-count & (T.sizeof * 8 - 1))));
962 }
963 /// ditto
964 pure T ror(T)(in T value, in uint count)
965 if (__traits(isIntegral, T) && __traits(isUnsigned, T))
966 {
967 assert(count < 8 * T.sizeof);
968 return cast(T) ((value >> count) | (value << (-count & (T.sizeof * 8 - 1))));
969 }
970 /// ditto
971 pure T rol(uint count, T)(in T value)
972 if (__traits(isIntegral, T) && __traits(isUnsigned, T))
973 {
974 static assert(count < 8 * T.sizeof);
975 return cast(T) ((value << count) | (value >> (-count & (T.sizeof * 8 - 1))));
976 }
977 /// ditto
978 pure T ror(uint count, T)(in T value)
979 if (__traits(isIntegral, T) && __traits(isUnsigned, T))
980 {
981 static assert(count < 8 * T.sizeof);
982 return cast(T) ((value >> count) | (value << (-count & (T.sizeof * 8 - 1))));
983 }
984
985 ///
986 unittest
987 {
988 ubyte a = 0b10101010U;
989 ulong b = ulong.max;
990
991 assert(rol(a, 1) == 0b01010101);
992 assert(ror(a, 1) == 0b01010101);
993 assert(rol(a, 3) == 0b01010101);
994 assert(ror(a, 3) == 0b01010101);
995
996 assert(rol(a, 0) == a);
997 assert(ror(a, 0) == a);
998
999 assert(rol(b, 63) == ulong.max);
1000 assert(ror(b, 63) == ulong.max);
1001
1002 assert(rol!3(a) == 0b01010101);
1003 assert(ror!3(a) == 0b01010101);
1004 }
1005