1#
2#
3#            Nim's Runtime Library
4#        (c) Copyright 2017 Nim Authors
5#
6#    See the file "copying.txt", included in this
7#    distribution, for details about the copyright.
8#
9
10## This module implements a series of low level methods for bit manipulation.
11##
12## By default, compiler intrinsics are used where possible to improve performance
13## on supported compilers: `GCC`, `LLVM_GCC`, `CLANG`, `VCC`, `ICC`.
14##
15## The module will fallback to pure nim procs in case the backend is not supported.
16## You can also use the flag `noIntrinsicsBitOpts` to disable compiler intrinsics.
17##
18## This module is also compatible with other backends: `JavaScript`, `NimScript`
19## as well as the `compiletime VM`.
20##
21## As a result of using optimized functions/intrinsics, some functions can return
22## undefined results if the input is invalid. You can use the flag `noUndefinedBitOpts`
23## to force predictable behaviour for all input, causing a small performance hit.
24##
25## At this time only `fastLog2`, `firstSetBit`, `countLeadingZeroBits` and `countTrailingZeroBits`
26## may return undefined and/or platform dependent values if given invalid input.
27
28import macros
29import std/private/since
30from std/private/bitops_utils import forwardImpl, toUnsigned
31
32func bitnot*[T: SomeInteger](x: T): T {.magic: "BitnotI".}
33  ## Computes the `bitwise complement` of the integer `x`.
34
35func internalBitand[T: SomeInteger](x, y: T): T {.magic: "BitandI".}
36
37func internalBitor[T: SomeInteger](x, y: T): T {.magic: "BitorI".}
38
39func internalBitxor[T: SomeInteger](x, y: T): T {.magic: "BitxorI".}
40
41macro bitand*[T: SomeInteger](x, y: T; z: varargs[T]): T =
42  ## Computes the `bitwise and` of all arguments collectively.
43  let fn = bindSym("internalBitand")
44  result = newCall(fn, x, y)
45  for extra in z:
46    result = newCall(fn, result, extra)
47
48macro bitor*[T: SomeInteger](x, y: T; z: varargs[T]): T =
49  ## Computes the `bitwise or` of all arguments collectively.
50  let fn = bindSym("internalBitor")
51  result = newCall(fn, x, y)
52  for extra in z:
53    result = newCall(fn, result, extra)
54
55macro bitxor*[T: SomeInteger](x, y: T; z: varargs[T]): T =
56  ## Computes the `bitwise xor` of all arguments collectively.
57  let fn = bindSym("internalBitxor")
58  result = newCall(fn, x, y)
59  for extra in z:
60    result = newCall(fn, result, extra)
61
62
63type BitsRange*[T] = range[0..sizeof(T)*8-1]
64  ## A range with all bit positions for type `T`.
65
66func bitsliced*[T: SomeInteger](v: T; slice: Slice[int]): T {.inline, since: (1, 3).} =
67  ## Returns an extracted (and shifted) slice of bits from `v`.
68  runnableExamples:
69    doAssert 0b10111.bitsliced(2 .. 4) == 0b101
70    doAssert 0b11100.bitsliced(0 .. 2) == 0b100
71    doAssert 0b11100.bitsliced(0 ..< 3) == 0b100
72
73  let
74    upmost = sizeof(T) * 8 - 1
75    uv     = when v is SomeUnsignedInt: v else: v.toUnsigned
76  (uv shl (upmost - slice.b) shr (upmost - slice.b + slice.a)).T
77
78proc bitslice*[T: SomeInteger](v: var T; slice: Slice[int]) {.inline, since: (1, 3).} =
79  ## Mutates `v` into an extracted (and shifted) slice of bits from `v`.
80  runnableExamples:
81    var x = 0b101110
82    x.bitslice(2 .. 4)
83    doAssert x == 0b011
84
85  let
86    upmost = sizeof(T) * 8 - 1
87    uv     = when v is SomeUnsignedInt: v else: v.toUnsigned
88  v = (uv shl (upmost - slice.b) shr (upmost - slice.b + slice.a)).T
89
90func toMask*[T: SomeInteger](slice: Slice[int]): T {.inline, since: (1, 3).} =
91  ## Creates a bitmask based on a slice of bits.
92  runnableExamples:
93    doAssert toMask[int32](1 .. 3) == 0b1110'i32
94    doAssert toMask[int32](0 .. 3) == 0b1111'i32
95
96  let
97    upmost = sizeof(T) * 8 - 1
98    bitmask = when T is SomeUnsignedInt:
99                bitnot(0.T)
100              else:
101                bitnot(0.T).toUnsigned
102  (bitmask shl (upmost - slice.b + slice.a) shr (upmost - slice.b)).T
103
104proc masked*[T: SomeInteger](v, mask :T): T {.inline, since: (1, 3).} =
105  ## Returns `v`, with only the `1` bits from `mask` matching those of
106  ## `v` set to 1.
107  ##
108  ## Effectively maps to a `bitand <#bitand.m,T,T,varargs[T]>`_ operation.
109  runnableExamples:
110    let v = 0b0000_0011'u8
111    doAssert v.masked(0b0000_1010'u8) == 0b0000_0010'u8
112
113  bitand(v, mask)
114
115func masked*[T: SomeInteger](v: T; slice: Slice[int]): T {.inline, since: (1, 3).} =
116  ## Returns `v`, with only the `1` bits in the range of `slice`
117  ## matching those of `v` set to 1.
118  ##
119  ## Effectively maps to a `bitand <#bitand.m,T,T,varargs[T]>`_ operation.
120  runnableExamples:
121    let v = 0b0000_1011'u8
122    doAssert v.masked(1 .. 3) == 0b0000_1010'u8
123
124  bitand(v, toMask[T](slice))
125
126proc mask*[T: SomeInteger](v: var T; mask: T) {.inline, since: (1, 3).} =
127  ## Mutates `v`, with only the `1` bits from `mask` matching those of
128  ## `v` set to 1.
129  ##
130  ## Effectively maps to a `bitand <#bitand.m,T,T,varargs[T]>`_ operation.
131  runnableExamples:
132    var v = 0b0000_0011'u8
133    v.mask(0b0000_1010'u8)
134    doAssert v == 0b0000_0010'u8
135
136  v = bitand(v, mask)
137
138proc mask*[T: SomeInteger](v: var T; slice: Slice[int]) {.inline, since: (1, 3).} =
139  ## Mutates `v`, with only the `1` bits in the range of `slice`
140  ## matching those of `v` set to 1.
141  ##
142  ## Effectively maps to a `bitand <#bitand.m,T,T,varargs[T]>`_ operation.
143  runnableExamples:
144    var v = 0b0000_1011'u8
145    v.mask(1 .. 3)
146    doAssert v == 0b0000_1010'u8
147
148  v = bitand(v, toMask[T](slice))
149
150func setMasked*[T: SomeInteger](v, mask :T): T {.inline, since: (1, 3).} =
151  ## Returns `v`, with all the `1` bits from `mask` set to 1.
152  ##
153  ## Effectively maps to a `bitor <#bitor.m,T,T,varargs[T]>`_ operation.
154  runnableExamples:
155    let v = 0b0000_0011'u8
156    doAssert v.setMasked(0b0000_1010'u8) == 0b0000_1011'u8
157
158  bitor(v, mask)
159
160func setMasked*[T: SomeInteger](v: T; slice: Slice[int]): T {.inline, since: (1, 3).} =
161  ## Returns `v`, with all the `1` bits in the range of `slice` set to 1.
162  ##
163  ## Effectively maps to a `bitor <#bitor.m,T,T,varargs[T]>`_ operation.
164  runnableExamples:
165    let v = 0b0000_0011'u8
166    doAssert v.setMasked(2 .. 3) == 0b0000_1111'u8
167
168  bitor(v, toMask[T](slice))
169
170proc setMask*[T: SomeInteger](v: var T; mask: T) {.inline.} =
171  ## Mutates `v`, with all the `1` bits from `mask` set to 1.
172  ##
173  ## Effectively maps to a `bitor <#bitor.m,T,T,varargs[T]>`_ operation.
174  runnableExamples:
175    var v = 0b0000_0011'u8
176    v.setMask(0b0000_1010'u8)
177    doAssert v == 0b0000_1011'u8
178
179  v = bitor(v, mask)
180
181proc setMask*[T: SomeInteger](v: var T; slice: Slice[int]) {.inline, since: (1, 3).} =
182  ## Mutates `v`, with all the `1` bits in the range of `slice` set to 1.
183  ##
184  ## Effectively maps to a `bitor <#bitor.m,T,T,varargs[T]>`_ operation.
185  runnableExamples:
186    var v = 0b0000_0011'u8
187    v.setMask(2 .. 3)
188    doAssert v == 0b0000_1111'u8
189
190  v = bitor(v, toMask[T](slice))
191
192func clearMasked*[T: SomeInteger](v, mask :T): T {.inline, since: (1, 3).} =
193  ## Returns `v`, with all the `1` bits from `mask` set to 0.
194  ##
195  ## Effectively maps to a `bitand <#bitand.m,T,T,varargs[T]>`_ operation
196  ## with an *inverted mask*.
197  runnableExamples:
198    let v = 0b0000_0011'u8
199    doAssert v.clearMasked(0b0000_1010'u8) == 0b0000_0001'u8
200
201  bitand(v, bitnot(mask))
202
203func clearMasked*[T: SomeInteger](v: T; slice: Slice[int]): T {.inline, since: (1, 3).} =
204  ## Returns `v`, with all the `1` bits in the range of `slice` set to 0.
205  ##
206  ## Effectively maps to a `bitand <#bitand.m,T,T,varargs[T]>`_ operation
207  ## with an *inverted mask*.
208  runnableExamples:
209    let v = 0b0000_0011'u8
210    doAssert v.clearMasked(1 .. 3) == 0b0000_0001'u8
211
212  bitand(v, bitnot(toMask[T](slice)))
213
214proc clearMask*[T: SomeInteger](v: var T; mask: T) {.inline.} =
215  ## Mutates `v`, with all the `1` bits from `mask` set to 0.
216  ##
217  ## Effectively maps to a `bitand <#bitand.m,T,T,varargs[T]>`_ operation
218  ## with an *inverted mask*.
219  runnableExamples:
220    var v = 0b0000_0011'u8
221    v.clearMask(0b0000_1010'u8)
222    doAssert v == 0b0000_0001'u8
223
224  v = bitand(v, bitnot(mask))
225
226proc clearMask*[T: SomeInteger](v: var T; slice: Slice[int]) {.inline, since: (1, 3).} =
227  ## Mutates `v`, with all the `1` bits in the range of `slice` set to 0.
228  ##
229  ## Effectively maps to a `bitand <#bitand.m,T,T,varargs[T]>`_ operation
230  ## with an *inverted mask*.
231  runnableExamples:
232    var v = 0b0000_0011'u8
233    v.clearMask(1 .. 3)
234    doAssert v == 0b0000_0001'u8
235
236  v = bitand(v, bitnot(toMask[T](slice)))
237
238func flipMasked*[T: SomeInteger](v, mask :T): T {.inline, since: (1, 3).} =
239  ## Returns `v`, with all the `1` bits from `mask` flipped.
240  ##
241  ## Effectively maps to a `bitxor <#bitxor.m,T,T,varargs[T]>`_ operation.
242  runnableExamples:
243    let v = 0b0000_0011'u8
244    doAssert v.flipMasked(0b0000_1010'u8) == 0b0000_1001'u8
245
246  bitxor(v, mask)
247
248func flipMasked*[T: SomeInteger](v: T; slice: Slice[int]): T {.inline, since: (1, 3).} =
249  ## Returns `v`, with all the `1` bits in the range of `slice` flipped.
250  ##
251  ## Effectively maps to a `bitxor <#bitxor.m,T,T,varargs[T]>`_ operation.
252  runnableExamples:
253    let v = 0b0000_0011'u8
254    doAssert v.flipMasked(1 .. 3) == 0b0000_1101'u8
255
256  bitxor(v, toMask[T](slice))
257
258proc flipMask*[T: SomeInteger](v: var T; mask: T) {.inline.} =
259  ## Mutates `v`, with all the `1` bits from `mask` flipped.
260  ##
261  ## Effectively maps to a `bitxor <#bitxor.m,T,T,varargs[T]>`_ operation.
262  runnableExamples:
263    var v = 0b0000_0011'u8
264    v.flipMask(0b0000_1010'u8)
265    doAssert v == 0b0000_1001'u8
266
267  v = bitxor(v, mask)
268
269proc flipMask*[T: SomeInteger](v: var T; slice: Slice[int]) {.inline, since: (1, 3).} =
270  ## Mutates `v`, with all the `1` bits in the range of `slice` flipped.
271  ##
272  ## Effectively maps to a `bitxor <#bitxor.m,T,T,varargs[T]>`_ operation.
273  runnableExamples:
274    var v = 0b0000_0011'u8
275    v.flipMask(1 .. 3)
276    doAssert v == 0b0000_1101'u8
277
278  v = bitxor(v, toMask[T](slice))
279
280proc setBit*[T: SomeInteger](v: var T; bit: BitsRange[T]) {.inline.} =
281  ## Mutates `v`, with the bit at position `bit` set to 1.
282  runnableExamples:
283    var v = 0b0000_0011'u8
284    v.setBit(5'u8)
285    doAssert v == 0b0010_0011'u8
286
287  v.setMask(1.T shl bit)
288
289proc clearBit*[T: SomeInteger](v: var T; bit: BitsRange[T]) {.inline.} =
290  ## Mutates `v`, with the bit at position `bit` set to 0.
291  runnableExamples:
292    var v = 0b0000_0011'u8
293    v.clearBit(1'u8)
294    doAssert v == 0b0000_0001'u8
295
296  v.clearMask(1.T shl bit)
297
298proc flipBit*[T: SomeInteger](v: var T; bit: BitsRange[T]) {.inline.} =
299  ## Mutates `v`, with the bit at position `bit` flipped.
300  runnableExamples:
301    var v = 0b0000_0011'u8
302    v.flipBit(1'u8)
303    doAssert v == 0b0000_0001'u8
304
305    v = 0b0000_0011'u8
306    v.flipBit(2'u8)
307    doAssert v == 0b0000_0111'u8
308
309  v.flipMask(1.T shl bit)
310
311macro setBits*(v: typed; bits: varargs[typed]): untyped =
312  ## Mutates `v`, with the bits at positions `bits` set to 1.
313  runnableExamples:
314    var v = 0b0000_0011'u8
315    v.setBits(3, 5, 7)
316    doAssert v == 0b1010_1011'u8
317
318  bits.expectKind(nnkBracket)
319  result = newStmtList()
320  for bit in bits:
321    result.add newCall("setBit", v, bit)
322
323macro clearBits*(v: typed; bits: varargs[typed]): untyped =
324  ## Mutates `v`, with the bits at positions `bits` set to 0.
325  runnableExamples:
326    var v = 0b1111_1111'u8
327    v.clearBits(1, 3, 5, 7)
328    doAssert v == 0b0101_0101'u8
329
330  bits.expectKind(nnkBracket)
331  result = newStmtList()
332  for bit in bits:
333    result.add newCall("clearBit", v, bit)
334
335macro flipBits*(v: typed; bits: varargs[typed]): untyped =
336  ## Mutates `v`, with the bits at positions `bits` set to 0.
337  runnableExamples:
338    var v = 0b0000_1111'u8
339    v.flipBits(1, 3, 5, 7)
340    doAssert v == 0b1010_0101'u8
341
342  bits.expectKind(nnkBracket)
343  result = newStmtList()
344  for bit in bits:
345    result.add newCall("flipBit", v, bit)
346
347
348proc testBit*[T: SomeInteger](v: T; bit: BitsRange[T]): bool {.inline.} =
349  ## Returns true if the bit in `v` at positions `bit` is set to 1.
350  runnableExamples:
351    let v = 0b0000_1111'u8
352    doAssert v.testBit(0)
353    doAssert not v.testBit(7)
354
355  let mask = 1.T shl bit
356  return (v and mask) == mask
357
358# #### Pure Nim version ####
359
360func firstSetBitNim(x: uint32): int {.inline.} =
361  ## Returns the 1-based index of the least significant set bit of x, or if x is zero, returns zero.
362  # https://graphics.stanford.edu/%7Eseander/bithacks.html#ZerosOnRightMultLookup
363  const lookup: array[32, uint8] = [0'u8, 1, 28, 2, 29, 14, 24, 3, 30, 22, 20, 15,
364    25, 17, 4, 8, 31, 27, 13, 23, 21, 19, 16, 7, 26, 12, 18, 6, 11, 5, 10, 9]
365  let v = x.uint32
366  let k = not v + 1 # get two's complement # cast[uint32](-cast[int32](v))
367  result = 1 + lookup[uint32((v and k) * 0x077CB531'u32) shr 27].int
368
369func firstSetBitNim(x: uint64): int {.inline.} =
370  ## Returns the 1-based index of the least significant set bit of x, or if x is zero, returns zero.
371  # https://graphics.stanford.edu/%7Eseander/bithacks.html#ZerosOnRightMultLookup
372  let v = uint64(x)
373  var k = uint32(v and 0xFFFFFFFF'u32)
374  if k == 0:
375    k = uint32(v shr 32'u32) and 0xFFFFFFFF'u32
376    result = 32
377  else:
378    result = 0
379  result += firstSetBitNim(k)
380
381func fastlog2Nim(x: uint32): int {.inline.} =
382  ## Quickly find the log base 2 of a 32-bit or less integer.
383  # https://graphics.stanford.edu/%7Eseander/bithacks.html#IntegerLogDeBruijn
384  # https://stackoverflow.com/questions/11376288/fast-computing-of-log2-for-64-bit-integers
385  const lookup: array[32, uint8] = [0'u8, 9, 1, 10, 13, 21, 2, 29, 11, 14, 16, 18,
386    22, 25, 3, 30, 8, 12, 20, 28, 15, 17, 24, 7, 19, 27, 23, 6, 26, 5, 4, 31]
387  var v = x.uint32
388  v = v or v shr 1 # first round down to one less than a power of 2
389  v = v or v shr 2
390  v = v or v shr 4
391  v = v or v shr 8
392  v = v or v shr 16
393  result = lookup[uint32(v * 0x07C4ACDD'u32) shr 27].int
394
395func fastlog2Nim(x: uint64): int {.inline.} =
396  ## Quickly find the log base 2 of a 64-bit integer.
397  # https://graphics.stanford.edu/%7Eseander/bithacks.html#IntegerLogDeBruijn
398  # https://stackoverflow.com/questions/11376288/fast-computing-of-log2-for-64-bit-integers
399  const lookup: array[64, uint8] = [0'u8, 58, 1, 59, 47, 53, 2, 60, 39, 48, 27, 54,
400    33, 42, 3, 61, 51, 37, 40, 49, 18, 28, 20, 55, 30, 34, 11, 43, 14, 22, 4, 62,
401    57, 46, 52, 38, 26, 32, 41, 50, 36, 17, 19, 29, 10, 13, 21, 56, 45, 25, 31,
402    35, 16, 9, 12, 44, 24, 15, 8, 23, 7, 6, 5, 63]
403  var v = x.uint64
404  v = v or v shr 1 # first round down to one less than a power of 2
405  v = v or v shr 2
406  v = v or v shr 4
407  v = v or v shr 8
408  v = v or v shr 16
409  v = v or v shr 32
410  result = lookup[(v * 0x03F6EAF2CD271461'u64) shr 58].int
411
412import system/countbits_impl
413
414const useBuiltinsRotate = (defined(amd64) or defined(i386)) and
415                          (defined(gcc) or defined(clang) or defined(vcc) or
416                           (defined(icl) and not defined(cpp))) and useBuiltins
417
418template parityImpl[T](value: T): int =
419  # formula id from: https://graphics.stanford.edu/%7Eseander/bithacks.html#ParityParallel
420  var v = value
421  when sizeof(T) == 8:
422    v = v xor (v shr 32)
423  when sizeof(T) >= 4:
424    v = v xor (v shr 16)
425  when sizeof(T) >= 2:
426    v = v xor (v shr 8)
427  v = v xor (v shr 4)
428  v = v and 0xf
429  ((0x6996'u shr v) and 1).int
430
431
432when useGCC_builtins:
433  # Returns the bit parity in value
434  proc builtin_parity(x: cuint): cint {.importc: "__builtin_parity", cdecl.}
435  proc builtin_parityll(x: culonglong): cint {.importc: "__builtin_parityll", cdecl.}
436
437  # Returns one plus the index of the least significant 1-bit of x, or if x is zero, returns zero.
438  proc builtin_ffs(x: cint): cint {.importc: "__builtin_ffs", cdecl.}
439  proc builtin_ffsll(x: clonglong): cint {.importc: "__builtin_ffsll", cdecl.}
440
441  # Returns the number of leading 0-bits in x, starting at the most significant bit position. If x is 0, the result is undefined.
442  proc builtin_clz(x: cuint): cint {.importc: "__builtin_clz", cdecl.}
443  proc builtin_clzll(x: culonglong): cint {.importc: "__builtin_clzll", cdecl.}
444
445  # Returns the number of trailing 0-bits in x, starting at the least significant bit position. If x is 0, the result is undefined.
446  proc builtin_ctz(x: cuint): cint {.importc: "__builtin_ctz", cdecl.}
447  proc builtin_ctzll(x: culonglong): cint {.importc: "__builtin_ctzll", cdecl.}
448
449elif useVCC_builtins:
450  # Search the mask data from most significant bit (MSB) to least significant bit (LSB) for a set bit (1).
451  func bitScanReverse(index: ptr culong, mask: culong): uint8 {.
452      importc: "_BitScanReverse", header: "<intrin.h>".}
453  func bitScanReverse64(index: ptr culong, mask: uint64): uint8 {.
454      importc: "_BitScanReverse64", header: "<intrin.h>".}
455
456  # Search the mask data from least significant bit (LSB) to the most significant bit (MSB) for a set bit (1).
457  func bitScanForward(index: ptr culong, mask: culong): uint8 {.
458      importc: "_BitScanForward", header: "<intrin.h>".}
459  func bitScanForward64(index: ptr culong, mask: uint64): uint8 {.
460      importc: "_BitScanForward64", header: "<intrin.h>".}
461
462  template vcc_scan_impl(fnc: untyped; v: untyped): int =
463    var index: culong
464    discard fnc(index.addr, v)
465    index.int
466
467elif useICC_builtins:
468  # Returns the number of trailing 0-bits in x, starting at the least significant bit position. If x is 0, the result is undefined.
469  func bitScanForward(p: ptr uint32, b: uint32): uint8 {.
470      importc: "_BitScanForward", header: "<immintrin.h>".}
471  func bitScanForward64(p: ptr uint32, b: uint64): uint8 {.
472      importc: "_BitScanForward64", header: "<immintrin.h>".}
473
474  # Returns the number of leading 0-bits in x, starting at the most significant bit position. If x is 0, the result is undefined.
475  func bitScanReverse(p: ptr uint32, b: uint32): uint8 {.
476      importc: "_BitScanReverse", header: "<immintrin.h>".}
477  func bitScanReverse64(p: ptr uint32, b: uint64): uint8 {.
478      importc: "_BitScanReverse64", header: "<immintrin.h>".}
479
480  template icc_scan_impl(fnc: untyped; v: untyped): int =
481    var index: uint32
482    discard fnc(index.addr, v)
483    index.int
484
485func countSetBits*(x: SomeInteger): int {.inline.} =
486  ## Counts the set bits in an integer (also called `Hamming weight`:idx:).
487  runnableExamples:
488    doAssert countSetBits(0b0000_0011'u8) == 2
489    doAssert countSetBits(0b1010_1010'u8) == 4
490
491  result = countSetBitsImpl(x)
492
493func popcount*(x: SomeInteger): int {.inline.} =
494  ## Alias for `countSetBits <#countSetBits,SomeInteger>`_ (Hamming weight).
495  result = countSetBits(x)
496
497func parityBits*(x: SomeInteger): int {.inline.} =
498  ## Calculate the bit parity in an integer. If the number of 1-bits
499  ## is odd, the parity is 1, otherwise 0.
500  runnableExamples:
501    doAssert parityBits(0b0000_0000'u8) == 0
502    doAssert parityBits(0b0101_0001'u8) == 1
503    doAssert parityBits(0b0110_1001'u8) == 0
504    doAssert parityBits(0b0111_1111'u8) == 1
505
506  # Can be used a base if creating ASM version.
507  # https://stackoverflow.com/questions/21617970/how-to-check-if-value-has-even-parity-of-bits-or-odd
508  when x is SomeSignedInt:
509    let x = x.toUnsigned
510  when nimvm:
511    result = forwardImpl(parityImpl, x)
512  else:
513    when useGCC_builtins:
514      when sizeof(x) <= 4: result = builtin_parity(x.uint32).int
515      else: result = builtin_parityll(x.uint64).int
516    else:
517      when sizeof(x) <= 4: result = parityImpl(x.uint32)
518      else: result = parityImpl(x.uint64)
519
520func firstSetBit*(x: SomeInteger): int {.inline.} =
521  ## Returns the 1-based index of the least significant set bit of `x`.
522  ## If `x` is zero, when `noUndefinedBitOpts` is set, the result is 0,
523  ## otherwise the result is undefined.
524  runnableExamples:
525    doAssert firstSetBit(0b0000_0001'u8) == 1
526    doAssert firstSetBit(0b0000_0010'u8) == 2
527    doAssert firstSetBit(0b0000_0100'u8) == 3
528    doAssert firstSetBit(0b0000_1000'u8) == 4
529    doAssert firstSetBit(0b0000_1111'u8) == 1
530
531  # GCC builtin 'builtin_ffs' already handle zero input.
532  when x is SomeSignedInt:
533    let x = x.toUnsigned
534  when nimvm:
535    when noUndefined:
536      if x == 0:
537        return 0
538    result = forwardImpl(firstSetBitNim, x)
539  else:
540    when noUndefined and not useGCC_builtins:
541      if x == 0:
542        return 0
543    when useGCC_builtins:
544      when sizeof(x) <= 4: result = builtin_ffs(cast[cint](x.cuint)).int
545      else: result = builtin_ffsll(cast[clonglong](x.culonglong)).int
546    elif useVCC_builtins:
547      when sizeof(x) <= 4:
548        result = 1 + vcc_scan_impl(bitScanForward, x.culong)
549      elif arch64:
550        result = 1 + vcc_scan_impl(bitScanForward64, x.uint64)
551      else:
552        result = firstSetBitNim(x.uint64)
553    elif useICC_builtins:
554      when sizeof(x) <= 4:
555        result = 1 + icc_scan_impl(bitScanForward, x.uint32)
556      elif arch64:
557        result = 1 + icc_scan_impl(bitScanForward64, x.uint64)
558      else:
559        result = firstSetBitNim(x.uint64)
560    else:
561      when sizeof(x) <= 4: result = firstSetBitNim(x.uint32)
562      else: result = firstSetBitNim(x.uint64)
563
564func fastLog2*(x: SomeInteger): int {.inline.} =
565  ## Quickly find the log base 2 of an integer.
566  ## If `x` is zero, when `noUndefinedBitOpts` is set, the result is -1,
567  ## otherwise the result is undefined.
568  runnableExamples:
569    doAssert fastLog2(0b0000_0001'u8) == 0
570    doAssert fastLog2(0b0000_0010'u8) == 1
571    doAssert fastLog2(0b0000_0100'u8) == 2
572    doAssert fastLog2(0b0000_1000'u8) == 3
573    doAssert fastLog2(0b0000_1111'u8) == 3
574
575  when x is SomeSignedInt:
576    let x = x.toUnsigned
577  when noUndefined:
578    if x == 0:
579      return -1
580  when nimvm:
581    result = forwardImpl(fastlog2Nim, x)
582  else:
583    when useGCC_builtins:
584      when sizeof(x) <= 4: result = 31 - builtin_clz(x.uint32).int
585      else: result = 63 - builtin_clzll(x.uint64).int
586    elif useVCC_builtins:
587      when sizeof(x) <= 4:
588        result = vcc_scan_impl(bitScanReverse, x.culong)
589      elif arch64:
590        result = vcc_scan_impl(bitScanReverse64, x.uint64)
591      else:
592        result = fastlog2Nim(x.uint64)
593    elif useICC_builtins:
594      when sizeof(x) <= 4:
595        result = icc_scan_impl(bitScanReverse, x.uint32)
596      elif arch64:
597        result = icc_scan_impl(bitScanReverse64, x.uint64)
598      else:
599        result = fastlog2Nim(x.uint64)
600    else:
601      when sizeof(x) <= 4: result = fastlog2Nim(x.uint32)
602      else: result = fastlog2Nim(x.uint64)
603
604func countLeadingZeroBits*(x: SomeInteger): int {.inline.} =
605  ## Returns the number of leading zero bits in an integer.
606  ## If `x` is zero, when `noUndefinedBitOpts` is set, the result is 0,
607  ## otherwise the result is undefined.
608  ##
609  ## **See also:**
610  ## * `countTrailingZeroBits proc <#countTrailingZeroBits,SomeInteger>`_
611  runnableExamples:
612    doAssert countLeadingZeroBits(0b0000_0001'u8) == 7
613    doAssert countLeadingZeroBits(0b0000_0010'u8) == 6
614    doAssert countLeadingZeroBits(0b0000_0100'u8) == 5
615    doAssert countLeadingZeroBits(0b0000_1000'u8) == 4
616    doAssert countLeadingZeroBits(0b0000_1111'u8) == 4
617
618  when x is SomeSignedInt:
619    let x = x.toUnsigned
620  when noUndefined:
621    if x == 0:
622      return 0
623  when nimvm:
624    result = sizeof(x)*8 - 1 - forwardImpl(fastlog2Nim, x)
625  else:
626    when useGCC_builtins:
627      when sizeof(x) <= 4: result = builtin_clz(x.uint32).int - (32 - sizeof(x)*8)
628      else: result = builtin_clzll(x.uint64).int
629    else:
630      when sizeof(x) <= 4: result = sizeof(x)*8 - 1 - fastlog2Nim(x.uint32)
631      else: result = sizeof(x)*8 - 1 - fastlog2Nim(x.uint64)
632
633func countTrailingZeroBits*(x: SomeInteger): int {.inline.} =
634  ## Returns the number of trailing zeros in an integer.
635  ## If `x` is zero, when `noUndefinedBitOpts` is set, the result is 0,
636  ## otherwise the result is undefined.
637  ##
638  ## **See also:**
639  ## * `countLeadingZeroBits proc <#countLeadingZeroBits,SomeInteger>`_
640  runnableExamples:
641    doAssert countTrailingZeroBits(0b0000_0001'u8) == 0
642    doAssert countTrailingZeroBits(0b0000_0010'u8) == 1
643    doAssert countTrailingZeroBits(0b0000_0100'u8) == 2
644    doAssert countTrailingZeroBits(0b0000_1000'u8) == 3
645    doAssert countTrailingZeroBits(0b0000_1111'u8) == 0
646
647  when x is SomeSignedInt:
648    let x = x.toUnsigned
649  when noUndefined:
650    if x == 0:
651      return 0
652  when nimvm:
653    result = firstSetBit(x) - 1
654  else:
655    when useGCC_builtins:
656      when sizeof(x) <= 4: result = builtin_ctz(x.uint32).int
657      else: result = builtin_ctzll(x.uint64).int
658    else:
659      result = firstSetBit(x) - 1
660
661when useBuiltinsRotate:
662  when defined(gcc):
663    # GCC was tested until version 4.8.1 and intrinsics were present. Not tested
664    # in previous versions.
665    func builtin_rotl8(value: uint8, shift: cint): uint8
666                      {.importc: "__rolb", header: "<x86intrin.h>".}
667    func builtin_rotl16(value: cushort, shift: cint): cushort
668                       {.importc: "__rolw", header: "<x86intrin.h>".}
669    func builtin_rotl32(value: cuint, shift: cint): cuint
670                       {.importc: "__rold", header: "<x86intrin.h>".}
671    when defined(amd64):
672      func builtin_rotl64(value: culonglong, shift: cint): culonglong
673                         {.importc: "__rolq", header: "<x86intrin.h>".}
674
675    func builtin_rotr8(value: uint8, shift: cint): uint8
676                      {.importc: "__rorb", header: "<x86intrin.h>".}
677    func builtin_rotr16(value: cushort, shift: cint): cushort
678                       {.importc: "__rorw", header: "<x86intrin.h>".}
679    func builtin_rotr32(value: cuint, shift: cint): cuint
680                       {.importc: "__rord", header: "<x86intrin.h>".}
681    when defined(amd64):
682      func builtin_rotr64(value: culonglong, shift: cint): culonglong
683                         {.importc: "__rorq", header: "<x86intrin.h>".}
684  elif defined(clang):
685    # In CLANG, builtins have been present since version 8.0.0 and intrinsics
686    # since version 9.0.0. This implementation chose the builtins, as they have
687    # been around for longer.
688    # https://releases.llvm.org/8.0.0/tools/clang/docs/ReleaseNotes.html#non-comprehensive-list-of-changes-in-this-release
689    # https://releases.llvm.org/8.0.0/tools/clang/docs/LanguageExtensions.html#builtin-rotateleft
690    # source for correct declarations: https://github.com/llvm/llvm-project/blob/main/clang/include/clang/Basic/Builtins.def
691    func builtin_rotl8(value: uint8, shift: uint8): uint8
692                      {.importc: "__builtin_rotateleft8", nodecl.}
693    func builtin_rotl16(value: cushort, shift: cushort): cushort
694                       {.importc: "__builtin_rotateleft16", nodecl.}
695    func builtin_rotl32(value: cuint, shift: cuint): cuint
696                       {.importc: "__builtin_rotateleft32", nodecl.}
697    when defined(amd64):
698      func builtin_rotl64(value: culonglong, shift: culonglong): culonglong
699                         {.importc: "__builtin_rotateleft64", nodecl.}
700
701    func builtin_rotr8(value: uint8, shift: uint8): uint8
702                      {.importc: "__builtin_rotateright8", nodecl.}
703    func builtin_rotr16(value: cushort, shift: cushort): cushort
704                       {.importc: "__builtin_rotateright16", nodecl.}
705    func builtin_rotr32(value: cuint, shift: cuint): cuint
706                       {.importc: "__builtin_rotateright32", nodecl.}
707    when defined(amd64):
708      # shift is unsigned, refs https://github.com/llvm-mirror/clang/commit/892de415b7fde609dafc4e6c1643b7eaa0150a4d
709      func builtin_rotr64(value: culonglong, shift: culonglong): culonglong
710                         {.importc: "__builtin_rotateright64", nodecl.}
711  elif defined(vcc):
712    # Tested on Microsoft (R) C/C++ Optimizing Compiler 19.28.29335 x64 and x86.
713    # Not tested in previous versions.
714    # https://docs.microsoft.com/en-us/cpp/intrinsics/rotl8-rotl16?view=msvc-160
715    # https://docs.microsoft.com/en-us/cpp/intrinsics/rotr8-rotr16?view=msvc-160
716    # https://docs.microsoft.com/en-us/cpp/c-runtime-library/reference/rotl-rotl64-rotr-rotr64?view=msvc-160
717    func builtin_rotl8(value: uint8, shift: uint8): uint8
718                      {.importc: "_rotl8", header: "<intrin.h>".}
719    func builtin_rotl16(value: cushort, shift: uint8): cushort
720                       {.importc: "_rotl16", header: "<intrin.h>".}
721    func builtin_rotl32(value: cuint, shift: cint): cuint
722                       {.importc: "_rotl", header: "<stdlib.h>".}
723    when defined(amd64):
724      func builtin_rotl64(value: culonglong, shift: cint): culonglong
725                         {.importc: "_rotl64", header: "<stdlib.h>".}
726
727    func builtin_rotr8(value: uint8, shift: uint8): uint8
728                      {.importc: "_rotr8", header: "<intrin.h>".}
729    func builtin_rotr16(value: cushort, shift: uint8): cushort
730                       {.importc: "_rotr16", header: "<intrin.h>".}
731    func builtin_rotr32(value: cuint, shift: cint): cuint
732                       {.importc: "_rotr", header: "<stdlib.h>".}
733    when defined(amd64):
734      func builtin_rotr64(value: culonglong, shift: cint): culonglong
735                         {.importc: "_rotr64", header: "<stdlib.h>".}
736  elif defined(icl):
737    # Tested on Intel(R) C++ Intel(R) 64 Compiler Classic Version 2021.1.2 Build
738    # 20201208_000000 x64 and x86. Not tested in previous versions.
739    func builtin_rotl8(value: uint8, shift: cint): uint8
740                      {.importc: "__rolb", header: "<immintrin.h>".}
741    func builtin_rotl16(value: cushort, shift: cint): cushort
742                       {.importc: "__rolw", header: "<immintrin.h>".}
743    func builtin_rotl32(value: cuint, shift: cint): cuint
744                       {.importc: "__rold", header: "<immintrin.h>".}
745    when defined(amd64):
746      func builtin_rotl64(value: culonglong, shift: cint): culonglong
747                         {.importc: "__rolq", header: "<immintrin.h>".}
748
749    func builtin_rotr8(value: uint8, shift: cint): uint8
750                      {.importc: "__rorb", header: "<immintrin.h>".}
751    func builtin_rotr16(value: cushort, shift: cint): cushort
752                       {.importc: "__rorw", header: "<immintrin.h>".}
753    func builtin_rotr32(value: cuint, shift: cint): cuint
754                       {.importc: "__rord", header: "<immintrin.h>".}
755    when defined(amd64):
756      func builtin_rotr64(value: culonglong, shift: cint): culonglong
757                         {.importc: "__rorq", header: "<immintrin.h>".}
758
759func rotl[T: SomeUnsignedInt](value: T, rot: int32): T {.inline.} =
760  ## Left-rotate bits in a `value`.
761  # https://stackoverflow.com/a/776523
762  const mask = 8 * sizeof(value) - 1
763  let rot = rot and mask
764  (value shl rot) or (value shr ((-rot) and mask))
765
766func rotr[T: SomeUnsignedInt](value: T, rot: int32): T {.inline.} =
767  ## Right-rotate bits in a `value`.
768  const mask = 8 * sizeof(value) - 1
769  let rot = rot and mask
770  (value shr rot) or (value shl ((-rot) and mask))
771
772func shiftTypeTo(size: static int, shift: int): auto {.inline.} =
773  ## Returns the `shift` for the rotation according to the compiler and the
774  ## `size`.
775  when (defined(vcc) and (size in [4, 8])) or defined(gcc) or defined(icl):
776    cint(shift)
777  elif (defined(vcc) and (size in [1, 2])) or (defined(clang) and size == 1):
778    uint8(shift)
779  elif defined(clang):
780    when size == 2:
781      cushort(shift)
782    elif size == 4:
783      cuint(shift)
784    elif size == 8:
785      culonglong(shift)
786
787func rotateLeftBits*[T: SomeUnsignedInt](value: T, shift: range[0..(sizeof(T) * 8)]): T {.inline.} =
788  ## Left-rotate bits in a `value`.
789  runnableExamples:
790    doAssert rotateLeftBits(0b0110_1001'u8, 4) == 0b1001_0110'u8
791    doAssert rotateLeftBits(0b00111100_11000011'u16, 8) ==
792      0b11000011_00111100'u16
793    doAssert rotateLeftBits(0b0000111111110000_1111000000001111'u32, 16) ==
794      0b1111000000001111_0000111111110000'u32
795    doAssert rotateLeftBits(0b00000000111111111111111100000000_11111111000000000000000011111111'u64, 32) ==
796      0b11111111000000000000000011111111_00000000111111111111111100000000'u64
797  when nimvm:
798    rotl(value, shift.int32)
799  else:
800    when useBuiltinsRotate:
801      const size = sizeof(T)
802      when size == 1:
803        builtin_rotl8(value.uint8, shiftTypeTo(size, shift)).T
804      elif size == 2:
805        builtin_rotl16(value.cushort, shiftTypeTo(size, shift)).T
806      elif size == 4:
807        builtin_rotl32(value.cuint, shiftTypeTo(size, shift)).T
808      elif size == 8 and arch64:
809        builtin_rotl64(value.culonglong, shiftTypeTo(size, shift)).T
810      else:
811        rotl(value, shift.int32)
812    else:
813      rotl(value, shift.int32)
814
815func rotateRightBits*[T: SomeUnsignedInt](value: T, shift: range[0..(sizeof(T) * 8)]): T {.inline.} =
816  ## Right-rotate bits in a `value`.
817  runnableExamples:
818    doAssert rotateRightBits(0b0110_1001'u8, 4) == 0b1001_0110'u8
819    doAssert rotateRightBits(0b00111100_11000011'u16, 8) ==
820      0b11000011_00111100'u16
821    doAssert rotateRightBits(0b0000111111110000_1111000000001111'u32, 16) ==
822      0b1111000000001111_0000111111110000'u32
823    doAssert rotateRightBits(0b00000000111111111111111100000000_11111111000000000000000011111111'u64, 32) ==
824      0b11111111000000000000000011111111_00000000111111111111111100000000'u64
825  when nimvm:
826    rotr(value, shift.int32)
827  else:
828    when useBuiltinsRotate:
829      const size = sizeof(T)
830      when size == 1:
831        builtin_rotr8(value.uint8, shiftTypeTo(size, shift)).T
832      elif size == 2:
833        builtin_rotr16(value.cushort, shiftTypeTo(size, shift)).T
834      elif size == 4:
835        builtin_rotr32(value.cuint, shiftTypeTo(size, shift)).T
836      elif size == 8 and arch64:
837        builtin_rotr64(value.culonglong, shiftTypeTo(size, shift)).T
838      else:
839        rotr(value, shift.int32)
840    else:
841      rotr(value, shift.int32)
842
843func repeatBits[T: SomeUnsignedInt](x: SomeUnsignedInt; retType: type[T]): T  =
844  result = x
845  var i = 1
846  while i != (sizeof(T) div sizeof(x)):
847    result = (result shl (sizeof(x)*8*i)) or result
848    i *= 2
849
850func reverseBits*[T: SomeUnsignedInt](x: T): T =
851  ## Return the bit reversal of x.
852  runnableExamples:
853    doAssert reverseBits(0b10100100'u8) == 0b00100101'u8
854    doAssert reverseBits(0xdd'u8) == 0xbb'u8
855    doAssert reverseBits(0xddbb'u16) == 0xddbb'u16
856    doAssert reverseBits(0xdeadbeef'u32) == 0xf77db57b'u32
857
858  template repeat(x: SomeUnsignedInt): T = repeatBits(x, T)
859
860  result = x
861  result =
862    ((repeat(0x55u8) and result) shl 1) or
863    ((repeat(0xaau8) and result) shr 1)
864  result =
865    ((repeat(0x33u8) and result) shl 2) or
866    ((repeat(0xccu8) and result) shr 2)
867  when sizeof(T) == 1:
868    result = (result shl 4) or (result shr 4)
869  when sizeof(T) >= 2:
870    result =
871      ((repeat(0x0fu8) and result) shl 4) or
872      ((repeat(0xf0u8) and result) shr 4)
873  when sizeof(T) == 2:
874    result = (result shl 8) or (result shr 8)
875  when sizeof(T) >= 4:
876    result =
877      ((repeat(0x00ffu16) and result) shl 8) or
878      ((repeat(0xff00u16) and result) shr 8)
879  when sizeof(T) == 4:
880    result = (result shl 16) or (result shr 16)
881  when sizeof(T) == 8:
882    result =
883      ((repeat(0x0000ffffu32) and result) shl 16) or
884      ((repeat(0xffff0000u32) and result) shr 16)
885    result = (result shl 32) or (result shr 32)
886