1 /* 2 * Utility compute operations used by translated code. 3 * 4 * Copyright (c) 2007 Thiemo Seufer 5 * Copyright (c) 2007 Jocelyn Mayer 6 * 7 * Permission is hereby granted, free of charge, to any person obtaining a copy 8 * of this software and associated documentation files (the "Software"), to deal 9 * in the Software without restriction, including without limitation the rights 10 * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 11 * copies of the Software, and to permit persons to whom the Software is 12 * furnished to do so, subject to the following conditions: 13 * 14 * The above copyright notice and this permission notice shall be included in 15 * all copies or substantial portions of the Software. 16 * 17 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 18 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 19 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL 20 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 21 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 22 * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN 23 * THE SOFTWARE. 24 */ 25 26 /* Portions of this work are licensed under the terms of the GNU GPL, 27 * version 2 or later. See the COPYING file in the top-level directory. 28 */ 29 30 #ifndef HOST_UTILS_H 31 #define HOST_UTILS_H 32 33 #include "qemu/compiler.h" 34 #include "qemu/bswap.h" 35 #include "qemu/int128.h" 36 37 #ifdef CONFIG_INT128 38 static inline void mulu64(uint64_t *plow, uint64_t *phigh, 39 uint64_t a, uint64_t b) 40 { 41 __uint128_t r = (__uint128_t)a * b; 42 *plow = r; 43 *phigh = r >> 64; 44 } 45 46 static inline void muls64(uint64_t *plow, uint64_t *phigh, 47 int64_t a, int64_t b) 48 { 49 __int128_t r = (__int128_t)a * b; 50 *plow = r; 51 *phigh = r >> 64; 52 } 53 54 /* compute with 96 bit intermediate result: (a*b)/c */ 55 static inline uint64_t muldiv64(uint64_t a, uint32_t b, uint32_t c) 56 { 57 return (__int128_t)a * b / c; 58 } 59 60 static inline uint64_t divu128(uint64_t *plow, uint64_t *phigh, 61 uint64_t divisor) 62 { 63 __uint128_t dividend = ((__uint128_t)*phigh << 64) | *plow; 64 __uint128_t result = dividend / divisor; 65 66 *plow = result; 67 *phigh = result >> 64; 68 return dividend % divisor; 69 } 70 71 static inline int64_t divs128(uint64_t *plow, int64_t *phigh, 72 int64_t divisor) 73 { 74 __int128_t dividend = ((__int128_t)*phigh << 64) | *plow; 75 __int128_t result = dividend / divisor; 76 77 *plow = result; 78 *phigh = result >> 64; 79 return dividend % divisor; 80 } 81 #else 82 void muls64(uint64_t *plow, uint64_t *phigh, int64_t a, int64_t b); 83 void mulu64(uint64_t *plow, uint64_t *phigh, uint64_t a, uint64_t b); 84 uint64_t divu128(uint64_t *plow, uint64_t *phigh, uint64_t divisor); 85 int64_t divs128(uint64_t *plow, int64_t *phigh, int64_t divisor); 86 87 static inline uint64_t muldiv64(uint64_t a, uint32_t b, uint32_t c) 88 { 89 union { 90 uint64_t ll; 91 struct { 92 #if HOST_BIG_ENDIAN 93 uint32_t high, low; 94 #else 95 uint32_t low, high; 96 #endif 97 } l; 98 } u, res; 99 uint64_t rl, rh; 100 101 u.ll = a; 102 rl = (uint64_t)u.l.low * (uint64_t)b; 103 rh = (uint64_t)u.l.high * (uint64_t)b; 104 rh += (rl >> 32); 105 res.l.high = rh / c; 106 res.l.low = (((rh % c) << 32) + (rl & 0xffffffff)) / c; 107 return res.ll; 108 } 109 #endif 110 111 /** 112 * clz32 - count leading zeros in a 32-bit value. 113 * @val: The value to search 114 * 115 * Returns 32 if the value is zero. Note that the GCC builtin is 116 * undefined if the value is zero. 117 */ 118 static inline int clz32(uint32_t val) 119 { 120 return val ? __builtin_clz(val) : 32; 121 } 122 123 /** 124 * clo32 - count leading ones in a 32-bit value. 125 * @val: The value to search 126 * 127 * Returns 32 if the value is -1. 128 */ 129 static inline int clo32(uint32_t val) 130 { 131 return clz32(~val); 132 } 133 134 /** 135 * clz64 - count leading zeros in a 64-bit value. 136 * @val: The value to search 137 * 138 * Returns 64 if the value is zero. Note that the GCC builtin is 139 * undefined if the value is zero. 140 */ 141 static inline int clz64(uint64_t val) 142 { 143 return val ? __builtin_clzll(val) : 64; 144 } 145 146 /** 147 * clo64 - count leading ones in a 64-bit value. 148 * @val: The value to search 149 * 150 * Returns 64 if the value is -1. 151 */ 152 static inline int clo64(uint64_t val) 153 { 154 return clz64(~val); 155 } 156 157 /** 158 * ctz32 - count trailing zeros in a 32-bit value. 159 * @val: The value to search 160 * 161 * Returns 32 if the value is zero. Note that the GCC builtin is 162 * undefined if the value is zero. 163 */ 164 static inline int ctz32(uint32_t val) 165 { 166 return val ? __builtin_ctz(val) : 32; 167 } 168 169 /** 170 * cto32 - count trailing ones in a 32-bit value. 171 * @val: The value to search 172 * 173 * Returns 32 if the value is -1. 174 */ 175 static inline int cto32(uint32_t val) 176 { 177 return ctz32(~val); 178 } 179 180 /** 181 * ctz64 - count trailing zeros in a 64-bit value. 182 * @val: The value to search 183 * 184 * Returns 64 if the value is zero. Note that the GCC builtin is 185 * undefined if the value is zero. 186 */ 187 static inline int ctz64(uint64_t val) 188 { 189 return val ? __builtin_ctzll(val) : 64; 190 } 191 192 /** 193 * cto64 - count trailing ones in a 64-bit value. 194 * @val: The value to search 195 * 196 * Returns 64 if the value is -1. 197 */ 198 static inline int cto64(uint64_t val) 199 { 200 return ctz64(~val); 201 } 202 203 /** 204 * clrsb32 - count leading redundant sign bits in a 32-bit value. 205 * @val: The value to search 206 * 207 * Returns the number of bits following the sign bit that are equal to it. 208 * No special cases; output range is [0-31]. 209 */ 210 static inline int clrsb32(uint32_t val) 211 { 212 #if __has_builtin(__builtin_clrsb) || !defined(__clang__) 213 return __builtin_clrsb(val); 214 #else 215 return clz32(val ^ ((int32_t)val >> 1)) - 1; 216 #endif 217 } 218 219 /** 220 * clrsb64 - count leading redundant sign bits in a 64-bit value. 221 * @val: The value to search 222 * 223 * Returns the number of bits following the sign bit that are equal to it. 224 * No special cases; output range is [0-63]. 225 */ 226 static inline int clrsb64(uint64_t val) 227 { 228 #if __has_builtin(__builtin_clrsbll) || !defined(__clang__) 229 return __builtin_clrsbll(val); 230 #else 231 return clz64(val ^ ((int64_t)val >> 1)) - 1; 232 #endif 233 } 234 235 /** 236 * ctpop8 - count the population of one bits in an 8-bit value. 237 * @val: The value to search 238 */ 239 static inline int ctpop8(uint8_t val) 240 { 241 return __builtin_popcount(val); 242 } 243 244 /** 245 * ctpop16 - count the population of one bits in a 16-bit value. 246 * @val: The value to search 247 */ 248 static inline int ctpop16(uint16_t val) 249 { 250 return __builtin_popcount(val); 251 } 252 253 /** 254 * ctpop32 - count the population of one bits in a 32-bit value. 255 * @val: The value to search 256 */ 257 static inline int ctpop32(uint32_t val) 258 { 259 return __builtin_popcount(val); 260 } 261 262 /** 263 * ctpop64 - count the population of one bits in a 64-bit value. 264 * @val: The value to search 265 */ 266 static inline int ctpop64(uint64_t val) 267 { 268 return __builtin_popcountll(val); 269 } 270 271 /** 272 * revbit8 - reverse the bits in an 8-bit value. 273 * @x: The value to modify. 274 */ 275 static inline uint8_t revbit8(uint8_t x) 276 { 277 #if __has_builtin(__builtin_bitreverse8) 278 return __builtin_bitreverse8(x); 279 #else 280 /* Assign the correct nibble position. */ 281 x = ((x & 0xf0) >> 4) 282 | ((x & 0x0f) << 4); 283 /* Assign the correct bit position. */ 284 x = ((x & 0x88) >> 3) 285 | ((x & 0x44) >> 1) 286 | ((x & 0x22) << 1) 287 | ((x & 0x11) << 3); 288 return x; 289 #endif 290 } 291 292 /** 293 * revbit16 - reverse the bits in a 16-bit value. 294 * @x: The value to modify. 295 */ 296 static inline uint16_t revbit16(uint16_t x) 297 { 298 #if __has_builtin(__builtin_bitreverse16) 299 return __builtin_bitreverse16(x); 300 #else 301 /* Assign the correct byte position. */ 302 x = bswap16(x); 303 /* Assign the correct nibble position. */ 304 x = ((x & 0xf0f0) >> 4) 305 | ((x & 0x0f0f) << 4); 306 /* Assign the correct bit position. */ 307 x = ((x & 0x8888) >> 3) 308 | ((x & 0x4444) >> 1) 309 | ((x & 0x2222) << 1) 310 | ((x & 0x1111) << 3); 311 return x; 312 #endif 313 } 314 315 /** 316 * revbit32 - reverse the bits in a 32-bit value. 317 * @x: The value to modify. 318 */ 319 static inline uint32_t revbit32(uint32_t x) 320 { 321 #if __has_builtin(__builtin_bitreverse32) 322 return __builtin_bitreverse32(x); 323 #else 324 /* Assign the correct byte position. */ 325 x = bswap32(x); 326 /* Assign the correct nibble position. */ 327 x = ((x & 0xf0f0f0f0u) >> 4) 328 | ((x & 0x0f0f0f0fu) << 4); 329 /* Assign the correct bit position. */ 330 x = ((x & 0x88888888u) >> 3) 331 | ((x & 0x44444444u) >> 1) 332 | ((x & 0x22222222u) << 1) 333 | ((x & 0x11111111u) << 3); 334 return x; 335 #endif 336 } 337 338 /** 339 * revbit64 - reverse the bits in a 64-bit value. 340 * @x: The value to modify. 341 */ 342 static inline uint64_t revbit64(uint64_t x) 343 { 344 #if __has_builtin(__builtin_bitreverse64) 345 return __builtin_bitreverse64(x); 346 #else 347 /* Assign the correct byte position. */ 348 x = bswap64(x); 349 /* Assign the correct nibble position. */ 350 x = ((x & 0xf0f0f0f0f0f0f0f0ull) >> 4) 351 | ((x & 0x0f0f0f0f0f0f0f0full) << 4); 352 /* Assign the correct bit position. */ 353 x = ((x & 0x8888888888888888ull) >> 3) 354 | ((x & 0x4444444444444444ull) >> 1) 355 | ((x & 0x2222222222222222ull) << 1) 356 | ((x & 0x1111111111111111ull) << 3); 357 return x; 358 #endif 359 } 360 361 /** 362 * Return the absolute value of a 64-bit integer as an unsigned 64-bit value 363 */ 364 static inline uint64_t uabs64(int64_t v) 365 { 366 return v < 0 ? -v : v; 367 } 368 369 /** 370 * sadd32_overflow - addition with overflow indication 371 * @x, @y: addends 372 * @ret: Output for sum 373 * 374 * Computes *@ret = @x + @y, and returns true if and only if that 375 * value has been truncated. 376 */ 377 static inline bool sadd32_overflow(int32_t x, int32_t y, int32_t *ret) 378 { 379 #if __has_builtin(__builtin_add_overflow) || __GNUC__ >= 5 380 return __builtin_add_overflow(x, y, ret); 381 #else 382 *ret = x + y; 383 return ((*ret ^ x) & ~(x ^ y)) < 0; 384 #endif 385 } 386 387 /** 388 * sadd64_overflow - addition with overflow indication 389 * @x, @y: addends 390 * @ret: Output for sum 391 * 392 * Computes *@ret = @x + @y, and returns true if and only if that 393 * value has been truncated. 394 */ 395 static inline bool sadd64_overflow(int64_t x, int64_t y, int64_t *ret) 396 { 397 #if __has_builtin(__builtin_add_overflow) || __GNUC__ >= 5 398 return __builtin_add_overflow(x, y, ret); 399 #else 400 *ret = x + y; 401 return ((*ret ^ x) & ~(x ^ y)) < 0; 402 #endif 403 } 404 405 /** 406 * uadd32_overflow - addition with overflow indication 407 * @x, @y: addends 408 * @ret: Output for sum 409 * 410 * Computes *@ret = @x + @y, and returns true if and only if that 411 * value has been truncated. 412 */ 413 static inline bool uadd32_overflow(uint32_t x, uint32_t y, uint32_t *ret) 414 { 415 #if __has_builtin(__builtin_add_overflow) || __GNUC__ >= 5 416 return __builtin_add_overflow(x, y, ret); 417 #else 418 *ret = x + y; 419 return *ret < x; 420 #endif 421 } 422 423 /** 424 * uadd64_overflow - addition with overflow indication 425 * @x, @y: addends 426 * @ret: Output for sum 427 * 428 * Computes *@ret = @x + @y, and returns true if and only if that 429 * value has been truncated. 430 */ 431 static inline bool uadd64_overflow(uint64_t x, uint64_t y, uint64_t *ret) 432 { 433 #if __has_builtin(__builtin_add_overflow) || __GNUC__ >= 5 434 return __builtin_add_overflow(x, y, ret); 435 #else 436 *ret = x + y; 437 return *ret < x; 438 #endif 439 } 440 441 /** 442 * ssub32_overflow - subtraction with overflow indication 443 * @x: Minuend 444 * @y: Subtrahend 445 * @ret: Output for difference 446 * 447 * Computes *@ret = @x - @y, and returns true if and only if that 448 * value has been truncated. 449 */ 450 static inline bool ssub32_overflow(int32_t x, int32_t y, int32_t *ret) 451 { 452 #if __has_builtin(__builtin_sub_overflow) || __GNUC__ >= 5 453 return __builtin_sub_overflow(x, y, ret); 454 #else 455 *ret = x - y; 456 return ((*ret ^ x) & (x ^ y)) < 0; 457 #endif 458 } 459 460 /** 461 * ssub64_overflow - subtraction with overflow indication 462 * @x: Minuend 463 * @y: Subtrahend 464 * @ret: Output for sum 465 * 466 * Computes *@ret = @x - @y, and returns true if and only if that 467 * value has been truncated. 468 */ 469 static inline bool ssub64_overflow(int64_t x, int64_t y, int64_t *ret) 470 { 471 #if __has_builtin(__builtin_sub_overflow) || __GNUC__ >= 5 472 return __builtin_sub_overflow(x, y, ret); 473 #else 474 *ret = x - y; 475 return ((*ret ^ x) & (x ^ y)) < 0; 476 #endif 477 } 478 479 /** 480 * usub32_overflow - subtraction with overflow indication 481 * @x: Minuend 482 * @y: Subtrahend 483 * @ret: Output for sum 484 * 485 * Computes *@ret = @x - @y, and returns true if and only if that 486 * value has been truncated. 487 */ 488 static inline bool usub32_overflow(uint32_t x, uint32_t y, uint32_t *ret) 489 { 490 #if __has_builtin(__builtin_sub_overflow) || __GNUC__ >= 5 491 return __builtin_sub_overflow(x, y, ret); 492 #else 493 *ret = x - y; 494 return x < y; 495 #endif 496 } 497 498 /** 499 * usub64_overflow - subtraction with overflow indication 500 * @x: Minuend 501 * @y: Subtrahend 502 * @ret: Output for sum 503 * 504 * Computes *@ret = @x - @y, and returns true if and only if that 505 * value has been truncated. 506 */ 507 static inline bool usub64_overflow(uint64_t x, uint64_t y, uint64_t *ret) 508 { 509 #if __has_builtin(__builtin_sub_overflow) || __GNUC__ >= 5 510 return __builtin_sub_overflow(x, y, ret); 511 #else 512 *ret = x - y; 513 return x < y; 514 #endif 515 } 516 517 /** 518 * smul32_overflow - multiplication with overflow indication 519 * @x, @y: Input multipliers 520 * @ret: Output for product 521 * 522 * Computes *@ret = @x * @y, and returns true if and only if that 523 * value has been truncated. 524 */ 525 static inline bool smul32_overflow(int32_t x, int32_t y, int32_t *ret) 526 { 527 #if __has_builtin(__builtin_mul_overflow) || __GNUC__ >= 5 528 return __builtin_mul_overflow(x, y, ret); 529 #else 530 int64_t z = (int64_t)x * y; 531 *ret = z; 532 return *ret != z; 533 #endif 534 } 535 536 /** 537 * smul64_overflow - multiplication with overflow indication 538 * @x, @y: Input multipliers 539 * @ret: Output for product 540 * 541 * Computes *@ret = @x * @y, and returns true if and only if that 542 * value has been truncated. 543 */ 544 static inline bool smul64_overflow(int64_t x, int64_t y, int64_t *ret) 545 { 546 #if __has_builtin(__builtin_mul_overflow) || __GNUC__ >= 5 547 return __builtin_mul_overflow(x, y, ret); 548 #else 549 uint64_t hi, lo; 550 muls64(&lo, &hi, x, y); 551 *ret = lo; 552 return hi != ((int64_t)lo >> 63); 553 #endif 554 } 555 556 /** 557 * umul32_overflow - multiplication with overflow indication 558 * @x, @y: Input multipliers 559 * @ret: Output for product 560 * 561 * Computes *@ret = @x * @y, and returns true if and only if that 562 * value has been truncated. 563 */ 564 static inline bool umul32_overflow(uint32_t x, uint32_t y, uint32_t *ret) 565 { 566 #if __has_builtin(__builtin_mul_overflow) || __GNUC__ >= 5 567 return __builtin_mul_overflow(x, y, ret); 568 #else 569 uint64_t z = (uint64_t)x * y; 570 *ret = z; 571 return z > UINT32_MAX; 572 #endif 573 } 574 575 /** 576 * umul64_overflow - multiplication with overflow indication 577 * @x, @y: Input multipliers 578 * @ret: Output for product 579 * 580 * Computes *@ret = @x * @y, and returns true if and only if that 581 * value has been truncated. 582 */ 583 static inline bool umul64_overflow(uint64_t x, uint64_t y, uint64_t *ret) 584 { 585 #if __has_builtin(__builtin_mul_overflow) || __GNUC__ >= 5 586 return __builtin_mul_overflow(x, y, ret); 587 #else 588 uint64_t hi; 589 mulu64(ret, &hi, x, y); 590 return hi != 0; 591 #endif 592 } 593 594 /* 595 * Unsigned 128x64 multiplication. 596 * Returns true if the result got truncated to 128 bits. 597 * Otherwise, returns false and the multiplication result via plow and phigh. 598 */ 599 static inline bool mulu128(uint64_t *plow, uint64_t *phigh, uint64_t factor) 600 { 601 #if defined(CONFIG_INT128) && \ 602 (__has_builtin(__builtin_mul_overflow) || __GNUC__ >= 5) 603 bool res; 604 __uint128_t r; 605 __uint128_t f = ((__uint128_t)*phigh << 64) | *plow; 606 res = __builtin_mul_overflow(f, factor, &r); 607 608 *plow = r; 609 *phigh = r >> 64; 610 611 return res; 612 #else 613 uint64_t dhi = *phigh; 614 uint64_t dlo = *plow; 615 uint64_t ahi; 616 uint64_t blo, bhi; 617 618 if (dhi == 0) { 619 mulu64(plow, phigh, dlo, factor); 620 return false; 621 } 622 623 mulu64(plow, &ahi, dlo, factor); 624 mulu64(&blo, &bhi, dhi, factor); 625 626 return uadd64_overflow(ahi, blo, phigh) || bhi != 0; 627 #endif 628 } 629 630 /** 631 * uadd64_carry - addition with carry-in and carry-out 632 * @x, @y: addends 633 * @pcarry: in-out carry value 634 * 635 * Computes @x + @y + *@pcarry, placing the carry-out back 636 * into *@pcarry and returning the 64-bit sum. 637 */ 638 static inline uint64_t uadd64_carry(uint64_t x, uint64_t y, bool *pcarry) 639 { 640 #if __has_builtin(__builtin_addcll) 641 unsigned long long c = *pcarry; 642 x = __builtin_addcll(x, y, c, &c); 643 *pcarry = c & 1; 644 return x; 645 #else 646 bool c = *pcarry; 647 /* This is clang's internal expansion of __builtin_addc. */ 648 c = uadd64_overflow(x, c, &x); 649 c |= uadd64_overflow(x, y, &x); 650 *pcarry = c; 651 return x; 652 #endif 653 } 654 655 /** 656 * usub64_borrow - subtraction with borrow-in and borrow-out 657 * @x, @y: addends 658 * @pborrow: in-out borrow value 659 * 660 * Computes @x - @y - *@pborrow, placing the borrow-out back 661 * into *@pborrow and returning the 64-bit sum. 662 */ 663 static inline uint64_t usub64_borrow(uint64_t x, uint64_t y, bool *pborrow) 664 { 665 #if __has_builtin(__builtin_subcll) 666 unsigned long long b = *pborrow; 667 x = __builtin_subcll(x, y, b, &b); 668 *pborrow = b & 1; 669 return x; 670 #else 671 bool b = *pborrow; 672 b = usub64_overflow(x, b, &x); 673 b |= usub64_overflow(x, y, &x); 674 *pborrow = b; 675 return x; 676 #endif 677 } 678 679 /* Host type specific sizes of these routines. */ 680 681 #if ULONG_MAX == UINT32_MAX 682 # define clzl clz32 683 # define ctzl ctz32 684 # define clol clo32 685 # define ctol cto32 686 # define ctpopl ctpop32 687 # define revbitl revbit32 688 #elif ULONG_MAX == UINT64_MAX 689 # define clzl clz64 690 # define ctzl ctz64 691 # define clol clo64 692 # define ctol cto64 693 # define ctpopl ctpop64 694 # define revbitl revbit64 695 #else 696 # error Unknown sizeof long 697 #endif 698 699 static inline bool is_power_of_2(uint64_t value) 700 { 701 if (!value) { 702 return false; 703 } 704 705 return !(value & (value - 1)); 706 } 707 708 /** 709 * Return @value rounded down to the nearest power of two or zero. 710 */ 711 static inline uint64_t pow2floor(uint64_t value) 712 { 713 if (!value) { 714 /* Avoid undefined shift by 64 */ 715 return 0; 716 } 717 return 0x8000000000000000ull >> clz64(value); 718 } 719 720 /* 721 * Return @value rounded up to the nearest power of two modulo 2^64. 722 * This is *zero* for @value > 2^63, so be careful. 723 */ 724 static inline uint64_t pow2ceil(uint64_t value) 725 { 726 int n = clz64(value - 1); 727 728 if (!n) { 729 /* 730 * @value - 1 has no leading zeroes, thus @value - 1 >= 2^63 731 * Therefore, either @value == 0 or @value > 2^63. 732 * If it's 0, return 1, else return 0. 733 */ 734 return !value; 735 } 736 return 0x8000000000000000ull >> (n - 1); 737 } 738 739 static inline uint32_t pow2roundup32(uint32_t x) 740 { 741 x |= (x >> 1); 742 x |= (x >> 2); 743 x |= (x >> 4); 744 x |= (x >> 8); 745 x |= (x >> 16); 746 return x + 1; 747 } 748 749 /** 750 * urshift - 128-bit Unsigned Right Shift. 751 * @plow: in/out - lower 64-bit integer. 752 * @phigh: in/out - higher 64-bit integer. 753 * @shift: in - bytes to shift, between 0 and 127. 754 * 755 * Result is zero-extended and stored in plow/phigh, which are 756 * input/output variables. Shift values outside the range will 757 * be mod to 128. In other words, the caller is responsible to 758 * verify/assert both the shift range and plow/phigh pointers. 759 */ 760 void urshift(uint64_t *plow, uint64_t *phigh, int32_t shift); 761 762 /** 763 * ulshift - 128-bit Unsigned Left Shift. 764 * @plow: in/out - lower 64-bit integer. 765 * @phigh: in/out - higher 64-bit integer. 766 * @shift: in - bytes to shift, between 0 and 127. 767 * @overflow: out - true if any 1-bit is shifted out. 768 * 769 * Result is zero-extended and stored in plow/phigh, which are 770 * input/output variables. Shift values outside the range will 771 * be mod to 128. In other words, the caller is responsible to 772 * verify/assert both the shift range and plow/phigh pointers. 773 */ 774 void ulshift(uint64_t *plow, uint64_t *phigh, int32_t shift, bool *overflow); 775 776 /* From the GNU Multi Precision Library - longlong.h __udiv_qrnnd 777 * (https://gmplib.org/repo/gmp/file/tip/longlong.h) 778 * 779 * Licensed under the GPLv2/LGPLv3 780 */ 781 static inline uint64_t udiv_qrnnd(uint64_t *r, uint64_t n1, 782 uint64_t n0, uint64_t d) 783 { 784 #if defined(__x86_64__) 785 uint64_t q; 786 asm("divq %4" : "=a"(q), "=d"(*r) : "0"(n0), "1"(n1), "rm"(d)); 787 return q; 788 #elif defined(__s390x__) && !defined(__clang__) 789 /* Need to use a TImode type to get an even register pair for DLGR. */ 790 unsigned __int128 n = (unsigned __int128)n1 << 64 | n0; 791 asm("dlgr %0, %1" : "+r"(n) : "r"(d)); 792 *r = n >> 64; 793 return n; 794 #elif defined(_ARCH_PPC64) && defined(_ARCH_PWR7) 795 /* From Power ISA 2.06, programming note for divdeu. */ 796 uint64_t q1, q2, Q, r1, r2, R; 797 asm("divdeu %0,%2,%4; divdu %1,%3,%4" 798 : "=&r"(q1), "=r"(q2) 799 : "r"(n1), "r"(n0), "r"(d)); 800 r1 = -(q1 * d); /* low part of (n1<<64) - (q1 * d) */ 801 r2 = n0 - (q2 * d); 802 Q = q1 + q2; 803 R = r1 + r2; 804 if (R >= d || R < r2) { /* overflow implies R > d */ 805 Q += 1; 806 R -= d; 807 } 808 *r = R; 809 return Q; 810 #else 811 uint64_t d0, d1, q0, q1, r1, r0, m; 812 813 d0 = (uint32_t)d; 814 d1 = d >> 32; 815 816 r1 = n1 % d1; 817 q1 = n1 / d1; 818 m = q1 * d0; 819 r1 = (r1 << 32) | (n0 >> 32); 820 if (r1 < m) { 821 q1 -= 1; 822 r1 += d; 823 if (r1 >= d) { 824 if (r1 < m) { 825 q1 -= 1; 826 r1 += d; 827 } 828 } 829 } 830 r1 -= m; 831 832 r0 = r1 % d1; 833 q0 = r1 / d1; 834 m = q0 * d0; 835 r0 = (r0 << 32) | (uint32_t)n0; 836 if (r0 < m) { 837 q0 -= 1; 838 r0 += d; 839 if (r0 >= d) { 840 if (r0 < m) { 841 q0 -= 1; 842 r0 += d; 843 } 844 } 845 } 846 r0 -= m; 847 848 *r = r0; 849 return (q1 << 32) | q0; 850 #endif 851 } 852 853 Int128 divu256(Int128 *plow, Int128 *phigh, Int128 divisor); 854 Int128 divs256(Int128 *plow, Int128 *phigh, Int128 divisor); 855 #endif 856