1# 2# 3# Nim's Runtime Library 4# (c) Copyright 2017 Nim Authors 5# 6# See the file "copying.txt", included in this 7# distribution, for details about the copyright. 8# 9 10## This module implements a series of low level methods for bit manipulation. 11## 12## By default, compiler intrinsics are used where possible to improve performance 13## on supported compilers: `GCC`, `LLVM_GCC`, `CLANG`, `VCC`, `ICC`. 14## 15## The module will fallback to pure nim procs in case the backend is not supported. 16## You can also use the flag `noIntrinsicsBitOpts` to disable compiler intrinsics. 17## 18## This module is also compatible with other backends: `JavaScript`, `NimScript` 19## as well as the `compiletime VM`. 20## 21## As a result of using optimized functions/intrinsics, some functions can return 22## undefined results if the input is invalid. You can use the flag `noUndefinedBitOpts` 23## to force predictable behaviour for all input, causing a small performance hit. 24## 25## At this time only `fastLog2`, `firstSetBit`, `countLeadingZeroBits` and `countTrailingZeroBits` 26## may return undefined and/or platform dependent values if given invalid input. 27 28import macros 29import std/private/since 30from std/private/bitops_utils import forwardImpl, toUnsigned 31 32func bitnot*[T: SomeInteger](x: T): T {.magic: "BitnotI".} 33 ## Computes the `bitwise complement` of the integer `x`. 34 35func internalBitand[T: SomeInteger](x, y: T): T {.magic: "BitandI".} 36 37func internalBitor[T: SomeInteger](x, y: T): T {.magic: "BitorI".} 38 39func internalBitxor[T: SomeInteger](x, y: T): T {.magic: "BitxorI".} 40 41macro bitand*[T: SomeInteger](x, y: T; z: varargs[T]): T = 42 ## Computes the `bitwise and` of all arguments collectively. 43 let fn = bindSym("internalBitand") 44 result = newCall(fn, x, y) 45 for extra in z: 46 result = newCall(fn, result, extra) 47 48macro bitor*[T: SomeInteger](x, y: T; z: varargs[T]): T = 49 ## Computes the `bitwise or` of all arguments collectively. 50 let fn = bindSym("internalBitor") 51 result = newCall(fn, x, y) 52 for extra in z: 53 result = newCall(fn, result, extra) 54 55macro bitxor*[T: SomeInteger](x, y: T; z: varargs[T]): T = 56 ## Computes the `bitwise xor` of all arguments collectively. 57 let fn = bindSym("internalBitxor") 58 result = newCall(fn, x, y) 59 for extra in z: 60 result = newCall(fn, result, extra) 61 62 63type BitsRange*[T] = range[0..sizeof(T)*8-1] 64 ## A range with all bit positions for type `T`. 65 66func bitsliced*[T: SomeInteger](v: T; slice: Slice[int]): T {.inline, since: (1, 3).} = 67 ## Returns an extracted (and shifted) slice of bits from `v`. 68 runnableExamples: 69 doAssert 0b10111.bitsliced(2 .. 4) == 0b101 70 doAssert 0b11100.bitsliced(0 .. 2) == 0b100 71 doAssert 0b11100.bitsliced(0 ..< 3) == 0b100 72 73 let 74 upmost = sizeof(T) * 8 - 1 75 uv = when v is SomeUnsignedInt: v else: v.toUnsigned 76 (uv shl (upmost - slice.b) shr (upmost - slice.b + slice.a)).T 77 78proc bitslice*[T: SomeInteger](v: var T; slice: Slice[int]) {.inline, since: (1, 3).} = 79 ## Mutates `v` into an extracted (and shifted) slice of bits from `v`. 80 runnableExamples: 81 var x = 0b101110 82 x.bitslice(2 .. 4) 83 doAssert x == 0b011 84 85 let 86 upmost = sizeof(T) * 8 - 1 87 uv = when v is SomeUnsignedInt: v else: v.toUnsigned 88 v = (uv shl (upmost - slice.b) shr (upmost - slice.b + slice.a)).T 89 90func toMask*[T: SomeInteger](slice: Slice[int]): T {.inline, since: (1, 3).} = 91 ## Creates a bitmask based on a slice of bits. 92 runnableExamples: 93 doAssert toMask[int32](1 .. 3) == 0b1110'i32 94 doAssert toMask[int32](0 .. 3) == 0b1111'i32 95 96 let 97 upmost = sizeof(T) * 8 - 1 98 bitmask = when T is SomeUnsignedInt: 99 bitnot(0.T) 100 else: 101 bitnot(0.T).toUnsigned 102 (bitmask shl (upmost - slice.b + slice.a) shr (upmost - slice.b)).T 103 104proc masked*[T: SomeInteger](v, mask :T): T {.inline, since: (1, 3).} = 105 ## Returns `v`, with only the `1` bits from `mask` matching those of 106 ## `v` set to 1. 107 ## 108 ## Effectively maps to a `bitand <#bitand.m,T,T,varargs[T]>`_ operation. 109 runnableExamples: 110 let v = 0b0000_0011'u8 111 doAssert v.masked(0b0000_1010'u8) == 0b0000_0010'u8 112 113 bitand(v, mask) 114 115func masked*[T: SomeInteger](v: T; slice: Slice[int]): T {.inline, since: (1, 3).} = 116 ## Returns `v`, with only the `1` bits in the range of `slice` 117 ## matching those of `v` set to 1. 118 ## 119 ## Effectively maps to a `bitand <#bitand.m,T,T,varargs[T]>`_ operation. 120 runnableExamples: 121 let v = 0b0000_1011'u8 122 doAssert v.masked(1 .. 3) == 0b0000_1010'u8 123 124 bitand(v, toMask[T](slice)) 125 126proc mask*[T: SomeInteger](v: var T; mask: T) {.inline, since: (1, 3).} = 127 ## Mutates `v`, with only the `1` bits from `mask` matching those of 128 ## `v` set to 1. 129 ## 130 ## Effectively maps to a `bitand <#bitand.m,T,T,varargs[T]>`_ operation. 131 runnableExamples: 132 var v = 0b0000_0011'u8 133 v.mask(0b0000_1010'u8) 134 doAssert v == 0b0000_0010'u8 135 136 v = bitand(v, mask) 137 138proc mask*[T: SomeInteger](v: var T; slice: Slice[int]) {.inline, since: (1, 3).} = 139 ## Mutates `v`, with only the `1` bits in the range of `slice` 140 ## matching those of `v` set to 1. 141 ## 142 ## Effectively maps to a `bitand <#bitand.m,T,T,varargs[T]>`_ operation. 143 runnableExamples: 144 var v = 0b0000_1011'u8 145 v.mask(1 .. 3) 146 doAssert v == 0b0000_1010'u8 147 148 v = bitand(v, toMask[T](slice)) 149 150func setMasked*[T: SomeInteger](v, mask :T): T {.inline, since: (1, 3).} = 151 ## Returns `v`, with all the `1` bits from `mask` set to 1. 152 ## 153 ## Effectively maps to a `bitor <#bitor.m,T,T,varargs[T]>`_ operation. 154 runnableExamples: 155 let v = 0b0000_0011'u8 156 doAssert v.setMasked(0b0000_1010'u8) == 0b0000_1011'u8 157 158 bitor(v, mask) 159 160func setMasked*[T: SomeInteger](v: T; slice: Slice[int]): T {.inline, since: (1, 3).} = 161 ## Returns `v`, with all the `1` bits in the range of `slice` set to 1. 162 ## 163 ## Effectively maps to a `bitor <#bitor.m,T,T,varargs[T]>`_ operation. 164 runnableExamples: 165 let v = 0b0000_0011'u8 166 doAssert v.setMasked(2 .. 3) == 0b0000_1111'u8 167 168 bitor(v, toMask[T](slice)) 169 170proc setMask*[T: SomeInteger](v: var T; mask: T) {.inline.} = 171 ## Mutates `v`, with all the `1` bits from `mask` set to 1. 172 ## 173 ## Effectively maps to a `bitor <#bitor.m,T,T,varargs[T]>`_ operation. 174 runnableExamples: 175 var v = 0b0000_0011'u8 176 v.setMask(0b0000_1010'u8) 177 doAssert v == 0b0000_1011'u8 178 179 v = bitor(v, mask) 180 181proc setMask*[T: SomeInteger](v: var T; slice: Slice[int]) {.inline, since: (1, 3).} = 182 ## Mutates `v`, with all the `1` bits in the range of `slice` set to 1. 183 ## 184 ## Effectively maps to a `bitor <#bitor.m,T,T,varargs[T]>`_ operation. 185 runnableExamples: 186 var v = 0b0000_0011'u8 187 v.setMask(2 .. 3) 188 doAssert v == 0b0000_1111'u8 189 190 v = bitor(v, toMask[T](slice)) 191 192func clearMasked*[T: SomeInteger](v, mask :T): T {.inline, since: (1, 3).} = 193 ## Returns `v`, with all the `1` bits from `mask` set to 0. 194 ## 195 ## Effectively maps to a `bitand <#bitand.m,T,T,varargs[T]>`_ operation 196 ## with an *inverted mask*. 197 runnableExamples: 198 let v = 0b0000_0011'u8 199 doAssert v.clearMasked(0b0000_1010'u8) == 0b0000_0001'u8 200 201 bitand(v, bitnot(mask)) 202 203func clearMasked*[T: SomeInteger](v: T; slice: Slice[int]): T {.inline, since: (1, 3).} = 204 ## Returns `v`, with all the `1` bits in the range of `slice` set to 0. 205 ## 206 ## Effectively maps to a `bitand <#bitand.m,T,T,varargs[T]>`_ operation 207 ## with an *inverted mask*. 208 runnableExamples: 209 let v = 0b0000_0011'u8 210 doAssert v.clearMasked(1 .. 3) == 0b0000_0001'u8 211 212 bitand(v, bitnot(toMask[T](slice))) 213 214proc clearMask*[T: SomeInteger](v: var T; mask: T) {.inline.} = 215 ## Mutates `v`, with all the `1` bits from `mask` set to 0. 216 ## 217 ## Effectively maps to a `bitand <#bitand.m,T,T,varargs[T]>`_ operation 218 ## with an *inverted mask*. 219 runnableExamples: 220 var v = 0b0000_0011'u8 221 v.clearMask(0b0000_1010'u8) 222 doAssert v == 0b0000_0001'u8 223 224 v = bitand(v, bitnot(mask)) 225 226proc clearMask*[T: SomeInteger](v: var T; slice: Slice[int]) {.inline, since: (1, 3).} = 227 ## Mutates `v`, with all the `1` bits in the range of `slice` set to 0. 228 ## 229 ## Effectively maps to a `bitand <#bitand.m,T,T,varargs[T]>`_ operation 230 ## with an *inverted mask*. 231 runnableExamples: 232 var v = 0b0000_0011'u8 233 v.clearMask(1 .. 3) 234 doAssert v == 0b0000_0001'u8 235 236 v = bitand(v, bitnot(toMask[T](slice))) 237 238func flipMasked*[T: SomeInteger](v, mask :T): T {.inline, since: (1, 3).} = 239 ## Returns `v`, with all the `1` bits from `mask` flipped. 240 ## 241 ## Effectively maps to a `bitxor <#bitxor.m,T,T,varargs[T]>`_ operation. 242 runnableExamples: 243 let v = 0b0000_0011'u8 244 doAssert v.flipMasked(0b0000_1010'u8) == 0b0000_1001'u8 245 246 bitxor(v, mask) 247 248func flipMasked*[T: SomeInteger](v: T; slice: Slice[int]): T {.inline, since: (1, 3).} = 249 ## Returns `v`, with all the `1` bits in the range of `slice` flipped. 250 ## 251 ## Effectively maps to a `bitxor <#bitxor.m,T,T,varargs[T]>`_ operation. 252 runnableExamples: 253 let v = 0b0000_0011'u8 254 doAssert v.flipMasked(1 .. 3) == 0b0000_1101'u8 255 256 bitxor(v, toMask[T](slice)) 257 258proc flipMask*[T: SomeInteger](v: var T; mask: T) {.inline.} = 259 ## Mutates `v`, with all the `1` bits from `mask` flipped. 260 ## 261 ## Effectively maps to a `bitxor <#bitxor.m,T,T,varargs[T]>`_ operation. 262 runnableExamples: 263 var v = 0b0000_0011'u8 264 v.flipMask(0b0000_1010'u8) 265 doAssert v == 0b0000_1001'u8 266 267 v = bitxor(v, mask) 268 269proc flipMask*[T: SomeInteger](v: var T; slice: Slice[int]) {.inline, since: (1, 3).} = 270 ## Mutates `v`, with all the `1` bits in the range of `slice` flipped. 271 ## 272 ## Effectively maps to a `bitxor <#bitxor.m,T,T,varargs[T]>`_ operation. 273 runnableExamples: 274 var v = 0b0000_0011'u8 275 v.flipMask(1 .. 3) 276 doAssert v == 0b0000_1101'u8 277 278 v = bitxor(v, toMask[T](slice)) 279 280proc setBit*[T: SomeInteger](v: var T; bit: BitsRange[T]) {.inline.} = 281 ## Mutates `v`, with the bit at position `bit` set to 1. 282 runnableExamples: 283 var v = 0b0000_0011'u8 284 v.setBit(5'u8) 285 doAssert v == 0b0010_0011'u8 286 287 v.setMask(1.T shl bit) 288 289proc clearBit*[T: SomeInteger](v: var T; bit: BitsRange[T]) {.inline.} = 290 ## Mutates `v`, with the bit at position `bit` set to 0. 291 runnableExamples: 292 var v = 0b0000_0011'u8 293 v.clearBit(1'u8) 294 doAssert v == 0b0000_0001'u8 295 296 v.clearMask(1.T shl bit) 297 298proc flipBit*[T: SomeInteger](v: var T; bit: BitsRange[T]) {.inline.} = 299 ## Mutates `v`, with the bit at position `bit` flipped. 300 runnableExamples: 301 var v = 0b0000_0011'u8 302 v.flipBit(1'u8) 303 doAssert v == 0b0000_0001'u8 304 305 v = 0b0000_0011'u8 306 v.flipBit(2'u8) 307 doAssert v == 0b0000_0111'u8 308 309 v.flipMask(1.T shl bit) 310 311macro setBits*(v: typed; bits: varargs[typed]): untyped = 312 ## Mutates `v`, with the bits at positions `bits` set to 1. 313 runnableExamples: 314 var v = 0b0000_0011'u8 315 v.setBits(3, 5, 7) 316 doAssert v == 0b1010_1011'u8 317 318 bits.expectKind(nnkBracket) 319 result = newStmtList() 320 for bit in bits: 321 result.add newCall("setBit", v, bit) 322 323macro clearBits*(v: typed; bits: varargs[typed]): untyped = 324 ## Mutates `v`, with the bits at positions `bits` set to 0. 325 runnableExamples: 326 var v = 0b1111_1111'u8 327 v.clearBits(1, 3, 5, 7) 328 doAssert v == 0b0101_0101'u8 329 330 bits.expectKind(nnkBracket) 331 result = newStmtList() 332 for bit in bits: 333 result.add newCall("clearBit", v, bit) 334 335macro flipBits*(v: typed; bits: varargs[typed]): untyped = 336 ## Mutates `v`, with the bits at positions `bits` set to 0. 337 runnableExamples: 338 var v = 0b0000_1111'u8 339 v.flipBits(1, 3, 5, 7) 340 doAssert v == 0b1010_0101'u8 341 342 bits.expectKind(nnkBracket) 343 result = newStmtList() 344 for bit in bits: 345 result.add newCall("flipBit", v, bit) 346 347 348proc testBit*[T: SomeInteger](v: T; bit: BitsRange[T]): bool {.inline.} = 349 ## Returns true if the bit in `v` at positions `bit` is set to 1. 350 runnableExamples: 351 let v = 0b0000_1111'u8 352 doAssert v.testBit(0) 353 doAssert not v.testBit(7) 354 355 let mask = 1.T shl bit 356 return (v and mask) == mask 357 358# #### Pure Nim version #### 359 360func firstSetBitNim(x: uint32): int {.inline.} = 361 ## Returns the 1-based index of the least significant set bit of x, or if x is zero, returns zero. 362 # https://graphics.stanford.edu/%7Eseander/bithacks.html#ZerosOnRightMultLookup 363 const lookup: array[32, uint8] = [0'u8, 1, 28, 2, 29, 14, 24, 3, 30, 22, 20, 15, 364 25, 17, 4, 8, 31, 27, 13, 23, 21, 19, 16, 7, 26, 12, 18, 6, 11, 5, 10, 9] 365 let v = x.uint32 366 let k = not v + 1 # get two's complement # cast[uint32](-cast[int32](v)) 367 result = 1 + lookup[uint32((v and k) * 0x077CB531'u32) shr 27].int 368 369func firstSetBitNim(x: uint64): int {.inline.} = 370 ## Returns the 1-based index of the least significant set bit of x, or if x is zero, returns zero. 371 # https://graphics.stanford.edu/%7Eseander/bithacks.html#ZerosOnRightMultLookup 372 let v = uint64(x) 373 var k = uint32(v and 0xFFFFFFFF'u32) 374 if k == 0: 375 k = uint32(v shr 32'u32) and 0xFFFFFFFF'u32 376 result = 32 377 else: 378 result = 0 379 result += firstSetBitNim(k) 380 381func fastlog2Nim(x: uint32): int {.inline.} = 382 ## Quickly find the log base 2 of a 32-bit or less integer. 383 # https://graphics.stanford.edu/%7Eseander/bithacks.html#IntegerLogDeBruijn 384 # https://stackoverflow.com/questions/11376288/fast-computing-of-log2-for-64-bit-integers 385 const lookup: array[32, uint8] = [0'u8, 9, 1, 10, 13, 21, 2, 29, 11, 14, 16, 18, 386 22, 25, 3, 30, 8, 12, 20, 28, 15, 17, 24, 7, 19, 27, 23, 6, 26, 5, 4, 31] 387 var v = x.uint32 388 v = v or v shr 1 # first round down to one less than a power of 2 389 v = v or v shr 2 390 v = v or v shr 4 391 v = v or v shr 8 392 v = v or v shr 16 393 result = lookup[uint32(v * 0x07C4ACDD'u32) shr 27].int 394 395func fastlog2Nim(x: uint64): int {.inline.} = 396 ## Quickly find the log base 2 of a 64-bit integer. 397 # https://graphics.stanford.edu/%7Eseander/bithacks.html#IntegerLogDeBruijn 398 # https://stackoverflow.com/questions/11376288/fast-computing-of-log2-for-64-bit-integers 399 const lookup: array[64, uint8] = [0'u8, 58, 1, 59, 47, 53, 2, 60, 39, 48, 27, 54, 400 33, 42, 3, 61, 51, 37, 40, 49, 18, 28, 20, 55, 30, 34, 11, 43, 14, 22, 4, 62, 401 57, 46, 52, 38, 26, 32, 41, 50, 36, 17, 19, 29, 10, 13, 21, 56, 45, 25, 31, 402 35, 16, 9, 12, 44, 24, 15, 8, 23, 7, 6, 5, 63] 403 var v = x.uint64 404 v = v or v shr 1 # first round down to one less than a power of 2 405 v = v or v shr 2 406 v = v or v shr 4 407 v = v or v shr 8 408 v = v or v shr 16 409 v = v or v shr 32 410 result = lookup[(v * 0x03F6EAF2CD271461'u64) shr 58].int 411 412import system/countbits_impl 413 414const useBuiltinsRotate = (defined(amd64) or defined(i386)) and 415 (defined(gcc) or defined(clang) or defined(vcc) or 416 (defined(icl) and not defined(cpp))) and useBuiltins 417 418template parityImpl[T](value: T): int = 419 # formula id from: https://graphics.stanford.edu/%7Eseander/bithacks.html#ParityParallel 420 var v = value 421 when sizeof(T) == 8: 422 v = v xor (v shr 32) 423 when sizeof(T) >= 4: 424 v = v xor (v shr 16) 425 when sizeof(T) >= 2: 426 v = v xor (v shr 8) 427 v = v xor (v shr 4) 428 v = v and 0xf 429 ((0x6996'u shr v) and 1).int 430 431 432when useGCC_builtins: 433 # Returns the bit parity in value 434 proc builtin_parity(x: cuint): cint {.importc: "__builtin_parity", cdecl.} 435 proc builtin_parityll(x: culonglong): cint {.importc: "__builtin_parityll", cdecl.} 436 437 # Returns one plus the index of the least significant 1-bit of x, or if x is zero, returns zero. 438 proc builtin_ffs(x: cint): cint {.importc: "__builtin_ffs", cdecl.} 439 proc builtin_ffsll(x: clonglong): cint {.importc: "__builtin_ffsll", cdecl.} 440 441 # Returns the number of leading 0-bits in x, starting at the most significant bit position. If x is 0, the result is undefined. 442 proc builtin_clz(x: cuint): cint {.importc: "__builtin_clz", cdecl.} 443 proc builtin_clzll(x: culonglong): cint {.importc: "__builtin_clzll", cdecl.} 444 445 # Returns the number of trailing 0-bits in x, starting at the least significant bit position. If x is 0, the result is undefined. 446 proc builtin_ctz(x: cuint): cint {.importc: "__builtin_ctz", cdecl.} 447 proc builtin_ctzll(x: culonglong): cint {.importc: "__builtin_ctzll", cdecl.} 448 449elif useVCC_builtins: 450 # Search the mask data from most significant bit (MSB) to least significant bit (LSB) for a set bit (1). 451 func bitScanReverse(index: ptr culong, mask: culong): uint8 {. 452 importc: "_BitScanReverse", header: "<intrin.h>".} 453 func bitScanReverse64(index: ptr culong, mask: uint64): uint8 {. 454 importc: "_BitScanReverse64", header: "<intrin.h>".} 455 456 # Search the mask data from least significant bit (LSB) to the most significant bit (MSB) for a set bit (1). 457 func bitScanForward(index: ptr culong, mask: culong): uint8 {. 458 importc: "_BitScanForward", header: "<intrin.h>".} 459 func bitScanForward64(index: ptr culong, mask: uint64): uint8 {. 460 importc: "_BitScanForward64", header: "<intrin.h>".} 461 462 template vcc_scan_impl(fnc: untyped; v: untyped): int = 463 var index: culong 464 discard fnc(index.addr, v) 465 index.int 466 467elif useICC_builtins: 468 # Returns the number of trailing 0-bits in x, starting at the least significant bit position. If x is 0, the result is undefined. 469 func bitScanForward(p: ptr uint32, b: uint32): uint8 {. 470 importc: "_BitScanForward", header: "<immintrin.h>".} 471 func bitScanForward64(p: ptr uint32, b: uint64): uint8 {. 472 importc: "_BitScanForward64", header: "<immintrin.h>".} 473 474 # Returns the number of leading 0-bits in x, starting at the most significant bit position. If x is 0, the result is undefined. 475 func bitScanReverse(p: ptr uint32, b: uint32): uint8 {. 476 importc: "_BitScanReverse", header: "<immintrin.h>".} 477 func bitScanReverse64(p: ptr uint32, b: uint64): uint8 {. 478 importc: "_BitScanReverse64", header: "<immintrin.h>".} 479 480 template icc_scan_impl(fnc: untyped; v: untyped): int = 481 var index: uint32 482 discard fnc(index.addr, v) 483 index.int 484 485func countSetBits*(x: SomeInteger): int {.inline.} = 486 ## Counts the set bits in an integer (also called `Hamming weight`:idx:). 487 runnableExamples: 488 doAssert countSetBits(0b0000_0011'u8) == 2 489 doAssert countSetBits(0b1010_1010'u8) == 4 490 491 result = countSetBitsImpl(x) 492 493func popcount*(x: SomeInteger): int {.inline.} = 494 ## Alias for `countSetBits <#countSetBits,SomeInteger>`_ (Hamming weight). 495 result = countSetBits(x) 496 497func parityBits*(x: SomeInteger): int {.inline.} = 498 ## Calculate the bit parity in an integer. If the number of 1-bits 499 ## is odd, the parity is 1, otherwise 0. 500 runnableExamples: 501 doAssert parityBits(0b0000_0000'u8) == 0 502 doAssert parityBits(0b0101_0001'u8) == 1 503 doAssert parityBits(0b0110_1001'u8) == 0 504 doAssert parityBits(0b0111_1111'u8) == 1 505 506 # Can be used a base if creating ASM version. 507 # https://stackoverflow.com/questions/21617970/how-to-check-if-value-has-even-parity-of-bits-or-odd 508 when x is SomeSignedInt: 509 let x = x.toUnsigned 510 when nimvm: 511 result = forwardImpl(parityImpl, x) 512 else: 513 when useGCC_builtins: 514 when sizeof(x) <= 4: result = builtin_parity(x.uint32).int 515 else: result = builtin_parityll(x.uint64).int 516 else: 517 when sizeof(x) <= 4: result = parityImpl(x.uint32) 518 else: result = parityImpl(x.uint64) 519 520func firstSetBit*(x: SomeInteger): int {.inline.} = 521 ## Returns the 1-based index of the least significant set bit of `x`. 522 ## If `x` is zero, when `noUndefinedBitOpts` is set, the result is 0, 523 ## otherwise the result is undefined. 524 runnableExamples: 525 doAssert firstSetBit(0b0000_0001'u8) == 1 526 doAssert firstSetBit(0b0000_0010'u8) == 2 527 doAssert firstSetBit(0b0000_0100'u8) == 3 528 doAssert firstSetBit(0b0000_1000'u8) == 4 529 doAssert firstSetBit(0b0000_1111'u8) == 1 530 531 # GCC builtin 'builtin_ffs' already handle zero input. 532 when x is SomeSignedInt: 533 let x = x.toUnsigned 534 when nimvm: 535 when noUndefined: 536 if x == 0: 537 return 0 538 result = forwardImpl(firstSetBitNim, x) 539 else: 540 when noUndefined and not useGCC_builtins: 541 if x == 0: 542 return 0 543 when useGCC_builtins: 544 when sizeof(x) <= 4: result = builtin_ffs(cast[cint](x.cuint)).int 545 else: result = builtin_ffsll(cast[clonglong](x.culonglong)).int 546 elif useVCC_builtins: 547 when sizeof(x) <= 4: 548 result = 1 + vcc_scan_impl(bitScanForward, x.culong) 549 elif arch64: 550 result = 1 + vcc_scan_impl(bitScanForward64, x.uint64) 551 else: 552 result = firstSetBitNim(x.uint64) 553 elif useICC_builtins: 554 when sizeof(x) <= 4: 555 result = 1 + icc_scan_impl(bitScanForward, x.uint32) 556 elif arch64: 557 result = 1 + icc_scan_impl(bitScanForward64, x.uint64) 558 else: 559 result = firstSetBitNim(x.uint64) 560 else: 561 when sizeof(x) <= 4: result = firstSetBitNim(x.uint32) 562 else: result = firstSetBitNim(x.uint64) 563 564func fastLog2*(x: SomeInteger): int {.inline.} = 565 ## Quickly find the log base 2 of an integer. 566 ## If `x` is zero, when `noUndefinedBitOpts` is set, the result is -1, 567 ## otherwise the result is undefined. 568 runnableExamples: 569 doAssert fastLog2(0b0000_0001'u8) == 0 570 doAssert fastLog2(0b0000_0010'u8) == 1 571 doAssert fastLog2(0b0000_0100'u8) == 2 572 doAssert fastLog2(0b0000_1000'u8) == 3 573 doAssert fastLog2(0b0000_1111'u8) == 3 574 575 when x is SomeSignedInt: 576 let x = x.toUnsigned 577 when noUndefined: 578 if x == 0: 579 return -1 580 when nimvm: 581 result = forwardImpl(fastlog2Nim, x) 582 else: 583 when useGCC_builtins: 584 when sizeof(x) <= 4: result = 31 - builtin_clz(x.uint32).int 585 else: result = 63 - builtin_clzll(x.uint64).int 586 elif useVCC_builtins: 587 when sizeof(x) <= 4: 588 result = vcc_scan_impl(bitScanReverse, x.culong) 589 elif arch64: 590 result = vcc_scan_impl(bitScanReverse64, x.uint64) 591 else: 592 result = fastlog2Nim(x.uint64) 593 elif useICC_builtins: 594 when sizeof(x) <= 4: 595 result = icc_scan_impl(bitScanReverse, x.uint32) 596 elif arch64: 597 result = icc_scan_impl(bitScanReverse64, x.uint64) 598 else: 599 result = fastlog2Nim(x.uint64) 600 else: 601 when sizeof(x) <= 4: result = fastlog2Nim(x.uint32) 602 else: result = fastlog2Nim(x.uint64) 603 604func countLeadingZeroBits*(x: SomeInteger): int {.inline.} = 605 ## Returns the number of leading zero bits in an integer. 606 ## If `x` is zero, when `noUndefinedBitOpts` is set, the result is 0, 607 ## otherwise the result is undefined. 608 ## 609 ## **See also:** 610 ## * `countTrailingZeroBits proc <#countTrailingZeroBits,SomeInteger>`_ 611 runnableExamples: 612 doAssert countLeadingZeroBits(0b0000_0001'u8) == 7 613 doAssert countLeadingZeroBits(0b0000_0010'u8) == 6 614 doAssert countLeadingZeroBits(0b0000_0100'u8) == 5 615 doAssert countLeadingZeroBits(0b0000_1000'u8) == 4 616 doAssert countLeadingZeroBits(0b0000_1111'u8) == 4 617 618 when x is SomeSignedInt: 619 let x = x.toUnsigned 620 when noUndefined: 621 if x == 0: 622 return 0 623 when nimvm: 624 result = sizeof(x)*8 - 1 - forwardImpl(fastlog2Nim, x) 625 else: 626 when useGCC_builtins: 627 when sizeof(x) <= 4: result = builtin_clz(x.uint32).int - (32 - sizeof(x)*8) 628 else: result = builtin_clzll(x.uint64).int 629 else: 630 when sizeof(x) <= 4: result = sizeof(x)*8 - 1 - fastlog2Nim(x.uint32) 631 else: result = sizeof(x)*8 - 1 - fastlog2Nim(x.uint64) 632 633func countTrailingZeroBits*(x: SomeInteger): int {.inline.} = 634 ## Returns the number of trailing zeros in an integer. 635 ## If `x` is zero, when `noUndefinedBitOpts` is set, the result is 0, 636 ## otherwise the result is undefined. 637 ## 638 ## **See also:** 639 ## * `countLeadingZeroBits proc <#countLeadingZeroBits,SomeInteger>`_ 640 runnableExamples: 641 doAssert countTrailingZeroBits(0b0000_0001'u8) == 0 642 doAssert countTrailingZeroBits(0b0000_0010'u8) == 1 643 doAssert countTrailingZeroBits(0b0000_0100'u8) == 2 644 doAssert countTrailingZeroBits(0b0000_1000'u8) == 3 645 doAssert countTrailingZeroBits(0b0000_1111'u8) == 0 646 647 when x is SomeSignedInt: 648 let x = x.toUnsigned 649 when noUndefined: 650 if x == 0: 651 return 0 652 when nimvm: 653 result = firstSetBit(x) - 1 654 else: 655 when useGCC_builtins: 656 when sizeof(x) <= 4: result = builtin_ctz(x.uint32).int 657 else: result = builtin_ctzll(x.uint64).int 658 else: 659 result = firstSetBit(x) - 1 660 661when useBuiltinsRotate: 662 when defined(gcc): 663 # GCC was tested until version 4.8.1 and intrinsics were present. Not tested 664 # in previous versions. 665 func builtin_rotl8(value: uint8, shift: cint): uint8 666 {.importc: "__rolb", header: "<x86intrin.h>".} 667 func builtin_rotl16(value: cushort, shift: cint): cushort 668 {.importc: "__rolw", header: "<x86intrin.h>".} 669 func builtin_rotl32(value: cuint, shift: cint): cuint 670 {.importc: "__rold", header: "<x86intrin.h>".} 671 when defined(amd64): 672 func builtin_rotl64(value: culonglong, shift: cint): culonglong 673 {.importc: "__rolq", header: "<x86intrin.h>".} 674 675 func builtin_rotr8(value: uint8, shift: cint): uint8 676 {.importc: "__rorb", header: "<x86intrin.h>".} 677 func builtin_rotr16(value: cushort, shift: cint): cushort 678 {.importc: "__rorw", header: "<x86intrin.h>".} 679 func builtin_rotr32(value: cuint, shift: cint): cuint 680 {.importc: "__rord", header: "<x86intrin.h>".} 681 when defined(amd64): 682 func builtin_rotr64(value: culonglong, shift: cint): culonglong 683 {.importc: "__rorq", header: "<x86intrin.h>".} 684 elif defined(clang): 685 # In CLANG, builtins have been present since version 8.0.0 and intrinsics 686 # since version 9.0.0. This implementation chose the builtins, as they have 687 # been around for longer. 688 # https://releases.llvm.org/8.0.0/tools/clang/docs/ReleaseNotes.html#non-comprehensive-list-of-changes-in-this-release 689 # https://releases.llvm.org/8.0.0/tools/clang/docs/LanguageExtensions.html#builtin-rotateleft 690 # source for correct declarations: https://github.com/llvm/llvm-project/blob/main/clang/include/clang/Basic/Builtins.def 691 func builtin_rotl8(value: uint8, shift: uint8): uint8 692 {.importc: "__builtin_rotateleft8", nodecl.} 693 func builtin_rotl16(value: cushort, shift: cushort): cushort 694 {.importc: "__builtin_rotateleft16", nodecl.} 695 func builtin_rotl32(value: cuint, shift: cuint): cuint 696 {.importc: "__builtin_rotateleft32", nodecl.} 697 when defined(amd64): 698 func builtin_rotl64(value: culonglong, shift: culonglong): culonglong 699 {.importc: "__builtin_rotateleft64", nodecl.} 700 701 func builtin_rotr8(value: uint8, shift: uint8): uint8 702 {.importc: "__builtin_rotateright8", nodecl.} 703 func builtin_rotr16(value: cushort, shift: cushort): cushort 704 {.importc: "__builtin_rotateright16", nodecl.} 705 func builtin_rotr32(value: cuint, shift: cuint): cuint 706 {.importc: "__builtin_rotateright32", nodecl.} 707 when defined(amd64): 708 # shift is unsigned, refs https://github.com/llvm-mirror/clang/commit/892de415b7fde609dafc4e6c1643b7eaa0150a4d 709 func builtin_rotr64(value: culonglong, shift: culonglong): culonglong 710 {.importc: "__builtin_rotateright64", nodecl.} 711 elif defined(vcc): 712 # Tested on Microsoft (R) C/C++ Optimizing Compiler 19.28.29335 x64 and x86. 713 # Not tested in previous versions. 714 # https://docs.microsoft.com/en-us/cpp/intrinsics/rotl8-rotl16?view=msvc-160 715 # https://docs.microsoft.com/en-us/cpp/intrinsics/rotr8-rotr16?view=msvc-160 716 # https://docs.microsoft.com/en-us/cpp/c-runtime-library/reference/rotl-rotl64-rotr-rotr64?view=msvc-160 717 func builtin_rotl8(value: uint8, shift: uint8): uint8 718 {.importc: "_rotl8", header: "<intrin.h>".} 719 func builtin_rotl16(value: cushort, shift: uint8): cushort 720 {.importc: "_rotl16", header: "<intrin.h>".} 721 func builtin_rotl32(value: cuint, shift: cint): cuint 722 {.importc: "_rotl", header: "<stdlib.h>".} 723 when defined(amd64): 724 func builtin_rotl64(value: culonglong, shift: cint): culonglong 725 {.importc: "_rotl64", header: "<stdlib.h>".} 726 727 func builtin_rotr8(value: uint8, shift: uint8): uint8 728 {.importc: "_rotr8", header: "<intrin.h>".} 729 func builtin_rotr16(value: cushort, shift: uint8): cushort 730 {.importc: "_rotr16", header: "<intrin.h>".} 731 func builtin_rotr32(value: cuint, shift: cint): cuint 732 {.importc: "_rotr", header: "<stdlib.h>".} 733 when defined(amd64): 734 func builtin_rotr64(value: culonglong, shift: cint): culonglong 735 {.importc: "_rotr64", header: "<stdlib.h>".} 736 elif defined(icl): 737 # Tested on Intel(R) C++ Intel(R) 64 Compiler Classic Version 2021.1.2 Build 738 # 20201208_000000 x64 and x86. Not tested in previous versions. 739 func builtin_rotl8(value: uint8, shift: cint): uint8 740 {.importc: "__rolb", header: "<immintrin.h>".} 741 func builtin_rotl16(value: cushort, shift: cint): cushort 742 {.importc: "__rolw", header: "<immintrin.h>".} 743 func builtin_rotl32(value: cuint, shift: cint): cuint 744 {.importc: "__rold", header: "<immintrin.h>".} 745 when defined(amd64): 746 func builtin_rotl64(value: culonglong, shift: cint): culonglong 747 {.importc: "__rolq", header: "<immintrin.h>".} 748 749 func builtin_rotr8(value: uint8, shift: cint): uint8 750 {.importc: "__rorb", header: "<immintrin.h>".} 751 func builtin_rotr16(value: cushort, shift: cint): cushort 752 {.importc: "__rorw", header: "<immintrin.h>".} 753 func builtin_rotr32(value: cuint, shift: cint): cuint 754 {.importc: "__rord", header: "<immintrin.h>".} 755 when defined(amd64): 756 func builtin_rotr64(value: culonglong, shift: cint): culonglong 757 {.importc: "__rorq", header: "<immintrin.h>".} 758 759func rotl[T: SomeUnsignedInt](value: T, rot: int32): T {.inline.} = 760 ## Left-rotate bits in a `value`. 761 # https://stackoverflow.com/a/776523 762 const mask = 8 * sizeof(value) - 1 763 let rot = rot and mask 764 (value shl rot) or (value shr ((-rot) and mask)) 765 766func rotr[T: SomeUnsignedInt](value: T, rot: int32): T {.inline.} = 767 ## Right-rotate bits in a `value`. 768 const mask = 8 * sizeof(value) - 1 769 let rot = rot and mask 770 (value shr rot) or (value shl ((-rot) and mask)) 771 772func shiftTypeTo(size: static int, shift: int): auto {.inline.} = 773 ## Returns the `shift` for the rotation according to the compiler and the 774 ## `size`. 775 when (defined(vcc) and (size in [4, 8])) or defined(gcc) or defined(icl): 776 cint(shift) 777 elif (defined(vcc) and (size in [1, 2])) or (defined(clang) and size == 1): 778 uint8(shift) 779 elif defined(clang): 780 when size == 2: 781 cushort(shift) 782 elif size == 4: 783 cuint(shift) 784 elif size == 8: 785 culonglong(shift) 786 787func rotateLeftBits*[T: SomeUnsignedInt](value: T, shift: range[0..(sizeof(T) * 8)]): T {.inline.} = 788 ## Left-rotate bits in a `value`. 789 runnableExamples: 790 doAssert rotateLeftBits(0b0110_1001'u8, 4) == 0b1001_0110'u8 791 doAssert rotateLeftBits(0b00111100_11000011'u16, 8) == 792 0b11000011_00111100'u16 793 doAssert rotateLeftBits(0b0000111111110000_1111000000001111'u32, 16) == 794 0b1111000000001111_0000111111110000'u32 795 doAssert rotateLeftBits(0b00000000111111111111111100000000_11111111000000000000000011111111'u64, 32) == 796 0b11111111000000000000000011111111_00000000111111111111111100000000'u64 797 when nimvm: 798 rotl(value, shift.int32) 799 else: 800 when useBuiltinsRotate: 801 const size = sizeof(T) 802 when size == 1: 803 builtin_rotl8(value.uint8, shiftTypeTo(size, shift)).T 804 elif size == 2: 805 builtin_rotl16(value.cushort, shiftTypeTo(size, shift)).T 806 elif size == 4: 807 builtin_rotl32(value.cuint, shiftTypeTo(size, shift)).T 808 elif size == 8 and arch64: 809 builtin_rotl64(value.culonglong, shiftTypeTo(size, shift)).T 810 else: 811 rotl(value, shift.int32) 812 else: 813 rotl(value, shift.int32) 814 815func rotateRightBits*[T: SomeUnsignedInt](value: T, shift: range[0..(sizeof(T) * 8)]): T {.inline.} = 816 ## Right-rotate bits in a `value`. 817 runnableExamples: 818 doAssert rotateRightBits(0b0110_1001'u8, 4) == 0b1001_0110'u8 819 doAssert rotateRightBits(0b00111100_11000011'u16, 8) == 820 0b11000011_00111100'u16 821 doAssert rotateRightBits(0b0000111111110000_1111000000001111'u32, 16) == 822 0b1111000000001111_0000111111110000'u32 823 doAssert rotateRightBits(0b00000000111111111111111100000000_11111111000000000000000011111111'u64, 32) == 824 0b11111111000000000000000011111111_00000000111111111111111100000000'u64 825 when nimvm: 826 rotr(value, shift.int32) 827 else: 828 when useBuiltinsRotate: 829 const size = sizeof(T) 830 when size == 1: 831 builtin_rotr8(value.uint8, shiftTypeTo(size, shift)).T 832 elif size == 2: 833 builtin_rotr16(value.cushort, shiftTypeTo(size, shift)).T 834 elif size == 4: 835 builtin_rotr32(value.cuint, shiftTypeTo(size, shift)).T 836 elif size == 8 and arch64: 837 builtin_rotr64(value.culonglong, shiftTypeTo(size, shift)).T 838 else: 839 rotr(value, shift.int32) 840 else: 841 rotr(value, shift.int32) 842 843func repeatBits[T: SomeUnsignedInt](x: SomeUnsignedInt; retType: type[T]): T = 844 result = x 845 var i = 1 846 while i != (sizeof(T) div sizeof(x)): 847 result = (result shl (sizeof(x)*8*i)) or result 848 i *= 2 849 850func reverseBits*[T: SomeUnsignedInt](x: T): T = 851 ## Return the bit reversal of x. 852 runnableExamples: 853 doAssert reverseBits(0b10100100'u8) == 0b00100101'u8 854 doAssert reverseBits(0xdd'u8) == 0xbb'u8 855 doAssert reverseBits(0xddbb'u16) == 0xddbb'u16 856 doAssert reverseBits(0xdeadbeef'u32) == 0xf77db57b'u32 857 858 template repeat(x: SomeUnsignedInt): T = repeatBits(x, T) 859 860 result = x 861 result = 862 ((repeat(0x55u8) and result) shl 1) or 863 ((repeat(0xaau8) and result) shr 1) 864 result = 865 ((repeat(0x33u8) and result) shl 2) or 866 ((repeat(0xccu8) and result) shr 2) 867 when sizeof(T) == 1: 868 result = (result shl 4) or (result shr 4) 869 when sizeof(T) >= 2: 870 result = 871 ((repeat(0x0fu8) and result) shl 4) or 872 ((repeat(0xf0u8) and result) shr 4) 873 when sizeof(T) == 2: 874 result = (result shl 8) or (result shr 8) 875 when sizeof(T) >= 4: 876 result = 877 ((repeat(0x00ffu16) and result) shl 8) or 878 ((repeat(0xff00u16) and result) shr 8) 879 when sizeof(T) == 4: 880 result = (result shl 16) or (result shr 16) 881 when sizeof(T) == 8: 882 result = 883 ((repeat(0x0000ffffu32) and result) shl 16) or 884 ((repeat(0xffff0000u32) and result) shr 16) 885 result = (result shl 32) or (result shr 32) 886