1 /* 2 * Utility compute operations used by translated code. 3 * 4 * Copyright (c) 2007 Thiemo Seufer 5 * Copyright (c) 2007 Jocelyn Mayer 6 * 7 * Permission is hereby granted, free of charge, to any person obtaining a copy 8 * of this software and associated documentation files (the "Software"), to deal 9 * in the Software without restriction, including without limitation the rights 10 * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 11 * copies of the Software, and to permit persons to whom the Software is 12 * furnished to do so, subject to the following conditions: 13 * 14 * The above copyright notice and this permission notice shall be included in 15 * all copies or substantial portions of the Software. 16 * 17 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 18 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 19 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL 20 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 21 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 22 * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN 23 * THE SOFTWARE. 24 */ 25 26 /* Portions of this work are licensed under the terms of the GNU GPL, 27 * version 2 or later. See the COPYING file in the top-level directory. 28 */ 29 30 #ifndef HOST_UTILS_H 31 #define HOST_UTILS_H 32 33 #include "qemu/compiler.h" 34 #include "qemu/bswap.h" 35 #include "qemu/int128.h" 36 37 #ifdef CONFIG_INT128 38 static inline void mulu64(uint64_t *plow, uint64_t *phigh, 39 uint64_t a, uint64_t b) 40 { 41 __uint128_t r = (__uint128_t)a * b; 42 *plow = r; 43 *phigh = r >> 64; 44 } 45 46 static inline void muls64(uint64_t *plow, uint64_t *phigh, 47 int64_t a, int64_t b) 48 { 49 __int128_t r = (__int128_t)a * b; 50 *plow = r; 51 *phigh = r >> 64; 52 } 53 54 /* compute with 96 bit intermediate result: (a*b)/c */ 55 static inline uint64_t muldiv64(uint64_t a, uint32_t b, uint32_t c) 56 { 57 return (__int128_t)a * b / c; 58 } 59 60 static inline uint64_t divu128(uint64_t *plow, uint64_t *phigh, 61 uint64_t divisor) 62 { 63 __uint128_t dividend = ((__uint128_t)*phigh << 64) | *plow; 64 __uint128_t result = dividend / divisor; 65 66 *plow = result; 67 *phigh = result >> 64; 68 return dividend % divisor; 69 } 70 71 static inline int64_t divs128(uint64_t *plow, int64_t *phigh, 72 int64_t divisor) 73 { 74 __int128_t dividend = ((__int128_t)*phigh << 64) | *plow; 75 __int128_t result = dividend / divisor; 76 77 *plow = result; 78 *phigh = result >> 64; 79 return dividend % divisor; 80 } 81 #else 82 void muls64(uint64_t *plow, uint64_t *phigh, int64_t a, int64_t b); 83 void mulu64(uint64_t *plow, uint64_t *phigh, uint64_t a, uint64_t b); 84 uint64_t divu128(uint64_t *plow, uint64_t *phigh, uint64_t divisor); 85 int64_t divs128(uint64_t *plow, int64_t *phigh, int64_t divisor); 86 87 static inline uint64_t muldiv64(uint64_t a, uint32_t b, uint32_t c) 88 { 89 union { 90 uint64_t ll; 91 struct { 92 #if HOST_BIG_ENDIAN 93 uint32_t high, low; 94 #else 95 uint32_t low, high; 96 #endif 97 } l; 98 } u, res; 99 uint64_t rl, rh; 100 101 u.ll = a; 102 rl = (uint64_t)u.l.low * (uint64_t)b; 103 rh = (uint64_t)u.l.high * (uint64_t)b; 104 rh += (rl >> 32); 105 res.l.high = rh / c; 106 res.l.low = (((rh % c) << 32) + (rl & 0xffffffff)) / c; 107 return res.ll; 108 } 109 #endif 110 111 /** 112 * clz32 - count leading zeros in a 32-bit value. 113 * @val: The value to search 114 * 115 * Returns 32 if the value is zero. Note that the GCC builtin is 116 * undefined if the value is zero. 117 */ 118 static inline int clz32(uint32_t val) 119 { 120 return val ? __builtin_clz(val) : 32; 121 } 122 123 /** 124 * clo32 - count leading ones in a 32-bit value. 125 * @val: The value to search 126 * 127 * Returns 32 if the value is -1. 128 */ 129 static inline int clo32(uint32_t val) 130 { 131 return clz32(~val); 132 } 133 134 /** 135 * clz64 - count leading zeros in a 64-bit value. 136 * @val: The value to search 137 * 138 * Returns 64 if the value is zero. Note that the GCC builtin is 139 * undefined if the value is zero. 140 */ 141 static inline int clz64(uint64_t val) 142 { 143 return val ? __builtin_clzll(val) : 64; 144 } 145 146 /** 147 * clo64 - count leading ones in a 64-bit value. 148 * @val: The value to search 149 * 150 * Returns 64 if the value is -1. 151 */ 152 static inline int clo64(uint64_t val) 153 { 154 return clz64(~val); 155 } 156 157 /** 158 * ctz32 - count trailing zeros in a 32-bit value. 159 * @val: The value to search 160 * 161 * Returns 32 if the value is zero. Note that the GCC builtin is 162 * undefined if the value is zero. 163 */ 164 static inline int ctz32(uint32_t val) 165 { 166 return val ? __builtin_ctz(val) : 32; 167 } 168 169 /** 170 * cto32 - count trailing ones in a 32-bit value. 171 * @val: The value to search 172 * 173 * Returns 32 if the value is -1. 174 */ 175 static inline int cto32(uint32_t val) 176 { 177 return ctz32(~val); 178 } 179 180 /** 181 * ctz64 - count trailing zeros in a 64-bit value. 182 * @val: The value to search 183 * 184 * Returns 64 if the value is zero. Note that the GCC builtin is 185 * undefined if the value is zero. 186 */ 187 static inline int ctz64(uint64_t val) 188 { 189 return val ? __builtin_ctzll(val) : 64; 190 } 191 192 /** 193 * cto64 - count trailing ones in a 64-bit value. 194 * @val: The value to search 195 * 196 * Returns 64 if the value is -1. 197 */ 198 static inline int cto64(uint64_t val) 199 { 200 return ctz64(~val); 201 } 202 203 /** 204 * clrsb32 - count leading redundant sign bits in a 32-bit value. 205 * @val: The value to search 206 * 207 * Returns the number of bits following the sign bit that are equal to it. 208 * No special cases; output range is [0-31]. 209 */ 210 static inline int clrsb32(uint32_t val) 211 { 212 #if __has_builtin(__builtin_clrsb) || !defined(__clang__) 213 return __builtin_clrsb(val); 214 #else 215 return clz32(val ^ ((int32_t)val >> 1)) - 1; 216 #endif 217 } 218 219 /** 220 * clrsb64 - count leading redundant sign bits in a 64-bit value. 221 * @val: The value to search 222 * 223 * Returns the number of bits following the sign bit that are equal to it. 224 * No special cases; output range is [0-63]. 225 */ 226 static inline int clrsb64(uint64_t val) 227 { 228 #if __has_builtin(__builtin_clrsbll) || !defined(__clang__) 229 return __builtin_clrsbll(val); 230 #else 231 return clz64(val ^ ((int64_t)val >> 1)) - 1; 232 #endif 233 } 234 235 /** 236 * ctpop8 - count the population of one bits in an 8-bit value. 237 * @val: The value to search 238 */ 239 static inline int ctpop8(uint8_t val) 240 { 241 return __builtin_popcount(val); 242 } 243 244 /** 245 * ctpop16 - count the population of one bits in a 16-bit value. 246 * @val: The value to search 247 */ 248 static inline int ctpop16(uint16_t val) 249 { 250 return __builtin_popcount(val); 251 } 252 253 /** 254 * ctpop32 - count the population of one bits in a 32-bit value. 255 * @val: The value to search 256 */ 257 static inline int ctpop32(uint32_t val) 258 { 259 return __builtin_popcount(val); 260 } 261 262 /** 263 * ctpop64 - count the population of one bits in a 64-bit value. 264 * @val: The value to search 265 */ 266 static inline int ctpop64(uint64_t val) 267 { 268 return __builtin_popcountll(val); 269 } 270 271 /** 272 * revbit8 - reverse the bits in an 8-bit value. 273 * @x: The value to modify. 274 */ 275 static inline uint8_t revbit8(uint8_t x) 276 { 277 #if __has_builtin(__builtin_bitreverse8) 278 return __builtin_bitreverse8(x); 279 #else 280 /* Assign the correct nibble position. */ 281 x = ((x & 0xf0) >> 4) 282 | ((x & 0x0f) << 4); 283 /* Assign the correct bit position. */ 284 x = ((x & 0x88) >> 3) 285 | ((x & 0x44) >> 1) 286 | ((x & 0x22) << 1) 287 | ((x & 0x11) << 3); 288 return x; 289 #endif 290 } 291 292 /** 293 * revbit16 - reverse the bits in a 16-bit value. 294 * @x: The value to modify. 295 */ 296 static inline uint16_t revbit16(uint16_t x) 297 { 298 #if __has_builtin(__builtin_bitreverse16) 299 return __builtin_bitreverse16(x); 300 #else 301 /* Assign the correct byte position. */ 302 x = bswap16(x); 303 /* Assign the correct nibble position. */ 304 x = ((x & 0xf0f0) >> 4) 305 | ((x & 0x0f0f) << 4); 306 /* Assign the correct bit position. */ 307 x = ((x & 0x8888) >> 3) 308 | ((x & 0x4444) >> 1) 309 | ((x & 0x2222) << 1) 310 | ((x & 0x1111) << 3); 311 return x; 312 #endif 313 } 314 315 /** 316 * revbit32 - reverse the bits in a 32-bit value. 317 * @x: The value to modify. 318 */ 319 static inline uint32_t revbit32(uint32_t x) 320 { 321 #if __has_builtin(__builtin_bitreverse32) 322 return __builtin_bitreverse32(x); 323 #else 324 /* Assign the correct byte position. */ 325 x = bswap32(x); 326 /* Assign the correct nibble position. */ 327 x = ((x & 0xf0f0f0f0u) >> 4) 328 | ((x & 0x0f0f0f0fu) << 4); 329 /* Assign the correct bit position. */ 330 x = ((x & 0x88888888u) >> 3) 331 | ((x & 0x44444444u) >> 1) 332 | ((x & 0x22222222u) << 1) 333 | ((x & 0x11111111u) << 3); 334 return x; 335 #endif 336 } 337 338 /** 339 * revbit64 - reverse the bits in a 64-bit value. 340 * @x: The value to modify. 341 */ 342 static inline uint64_t revbit64(uint64_t x) 343 { 344 #if __has_builtin(__builtin_bitreverse64) 345 return __builtin_bitreverse64(x); 346 #else 347 /* Assign the correct byte position. */ 348 x = bswap64(x); 349 /* Assign the correct nibble position. */ 350 x = ((x & 0xf0f0f0f0f0f0f0f0ull) >> 4) 351 | ((x & 0x0f0f0f0f0f0f0f0full) << 4); 352 /* Assign the correct bit position. */ 353 x = ((x & 0x8888888888888888ull) >> 3) 354 | ((x & 0x4444444444444444ull) >> 1) 355 | ((x & 0x2222222222222222ull) << 1) 356 | ((x & 0x1111111111111111ull) << 3); 357 return x; 358 #endif 359 } 360 361 /** 362 * Return the absolute value of a 64-bit integer as an unsigned 64-bit value 363 */ 364 static inline uint64_t uabs64(int64_t v) 365 { 366 return v < 0 ? -v : v; 367 } 368 369 /** 370 * sadd32_overflow - addition with overflow indication 371 * @x, @y: addends 372 * @ret: Output for sum 373 * 374 * Computes *@ret = @x + @y, and returns true if and only if that 375 * value has been truncated. 376 */ 377 static inline bool sadd32_overflow(int32_t x, int32_t y, int32_t *ret) 378 { 379 return __builtin_add_overflow(x, y, ret); 380 } 381 382 /** 383 * sadd64_overflow - addition with overflow indication 384 * @x, @y: addends 385 * @ret: Output for sum 386 * 387 * Computes *@ret = @x + @y, and returns true if and only if that 388 * value has been truncated. 389 */ 390 static inline bool sadd64_overflow(int64_t x, int64_t y, int64_t *ret) 391 { 392 return __builtin_add_overflow(x, y, ret); 393 } 394 395 /** 396 * uadd32_overflow - addition with overflow indication 397 * @x, @y: addends 398 * @ret: Output for sum 399 * 400 * Computes *@ret = @x + @y, and returns true if and only if that 401 * value has been truncated. 402 */ 403 static inline bool uadd32_overflow(uint32_t x, uint32_t y, uint32_t *ret) 404 { 405 return __builtin_add_overflow(x, y, ret); 406 } 407 408 /** 409 * uadd64_overflow - addition with overflow indication 410 * @x, @y: addends 411 * @ret: Output for sum 412 * 413 * Computes *@ret = @x + @y, and returns true if and only if that 414 * value has been truncated. 415 */ 416 static inline bool uadd64_overflow(uint64_t x, uint64_t y, uint64_t *ret) 417 { 418 return __builtin_add_overflow(x, y, ret); 419 } 420 421 /** 422 * ssub32_overflow - subtraction with overflow indication 423 * @x: Minuend 424 * @y: Subtrahend 425 * @ret: Output for difference 426 * 427 * Computes *@ret = @x - @y, and returns true if and only if that 428 * value has been truncated. 429 */ 430 static inline bool ssub32_overflow(int32_t x, int32_t y, int32_t *ret) 431 { 432 return __builtin_sub_overflow(x, y, ret); 433 } 434 435 /** 436 * ssub64_overflow - subtraction with overflow indication 437 * @x: Minuend 438 * @y: Subtrahend 439 * @ret: Output for sum 440 * 441 * Computes *@ret = @x - @y, and returns true if and only if that 442 * value has been truncated. 443 */ 444 static inline bool ssub64_overflow(int64_t x, int64_t y, int64_t *ret) 445 { 446 return __builtin_sub_overflow(x, y, ret); 447 } 448 449 /** 450 * usub32_overflow - subtraction with overflow indication 451 * @x: Minuend 452 * @y: Subtrahend 453 * @ret: Output for sum 454 * 455 * Computes *@ret = @x - @y, and returns true if and only if that 456 * value has been truncated. 457 */ 458 static inline bool usub32_overflow(uint32_t x, uint32_t y, uint32_t *ret) 459 { 460 return __builtin_sub_overflow(x, y, ret); 461 } 462 463 /** 464 * usub64_overflow - subtraction with overflow indication 465 * @x: Minuend 466 * @y: Subtrahend 467 * @ret: Output for sum 468 * 469 * Computes *@ret = @x - @y, and returns true if and only if that 470 * value has been truncated. 471 */ 472 static inline bool usub64_overflow(uint64_t x, uint64_t y, uint64_t *ret) 473 { 474 return __builtin_sub_overflow(x, y, ret); 475 } 476 477 /** 478 * smul32_overflow - multiplication with overflow indication 479 * @x, @y: Input multipliers 480 * @ret: Output for product 481 * 482 * Computes *@ret = @x * @y, and returns true if and only if that 483 * value has been truncated. 484 */ 485 static inline bool smul32_overflow(int32_t x, int32_t y, int32_t *ret) 486 { 487 return __builtin_mul_overflow(x, y, ret); 488 } 489 490 /** 491 * smul64_overflow - multiplication with overflow indication 492 * @x, @y: Input multipliers 493 * @ret: Output for product 494 * 495 * Computes *@ret = @x * @y, and returns true if and only if that 496 * value has been truncated. 497 */ 498 static inline bool smul64_overflow(int64_t x, int64_t y, int64_t *ret) 499 { 500 return __builtin_mul_overflow(x, y, ret); 501 } 502 503 /** 504 * umul32_overflow - multiplication with overflow indication 505 * @x, @y: Input multipliers 506 * @ret: Output for product 507 * 508 * Computes *@ret = @x * @y, and returns true if and only if that 509 * value has been truncated. 510 */ 511 static inline bool umul32_overflow(uint32_t x, uint32_t y, uint32_t *ret) 512 { 513 return __builtin_mul_overflow(x, y, ret); 514 } 515 516 /** 517 * umul64_overflow - multiplication with overflow indication 518 * @x, @y: Input multipliers 519 * @ret: Output for product 520 * 521 * Computes *@ret = @x * @y, and returns true if and only if that 522 * value has been truncated. 523 */ 524 static inline bool umul64_overflow(uint64_t x, uint64_t y, uint64_t *ret) 525 { 526 return __builtin_mul_overflow(x, y, ret); 527 } 528 529 /* 530 * Unsigned 128x64 multiplication. 531 * Returns true if the result got truncated to 128 bits. 532 * Otherwise, returns false and the multiplication result via plow and phigh. 533 */ 534 static inline bool mulu128(uint64_t *plow, uint64_t *phigh, uint64_t factor) 535 { 536 #if defined(CONFIG_INT128) && \ 537 (__has_builtin(__builtin_mul_overflow) || __GNUC__ >= 5) 538 bool res; 539 __uint128_t r; 540 __uint128_t f = ((__uint128_t)*phigh << 64) | *plow; 541 res = __builtin_mul_overflow(f, factor, &r); 542 543 *plow = r; 544 *phigh = r >> 64; 545 546 return res; 547 #else 548 uint64_t dhi = *phigh; 549 uint64_t dlo = *plow; 550 uint64_t ahi; 551 uint64_t blo, bhi; 552 553 if (dhi == 0) { 554 mulu64(plow, phigh, dlo, factor); 555 return false; 556 } 557 558 mulu64(plow, &ahi, dlo, factor); 559 mulu64(&blo, &bhi, dhi, factor); 560 561 return uadd64_overflow(ahi, blo, phigh) || bhi != 0; 562 #endif 563 } 564 565 /** 566 * uadd64_carry - addition with carry-in and carry-out 567 * @x, @y: addends 568 * @pcarry: in-out carry value 569 * 570 * Computes @x + @y + *@pcarry, placing the carry-out back 571 * into *@pcarry and returning the 64-bit sum. 572 */ 573 static inline uint64_t uadd64_carry(uint64_t x, uint64_t y, bool *pcarry) 574 { 575 #if __has_builtin(__builtin_addcll) 576 unsigned long long c = *pcarry; 577 x = __builtin_addcll(x, y, c, &c); 578 *pcarry = c & 1; 579 return x; 580 #else 581 bool c = *pcarry; 582 /* This is clang's internal expansion of __builtin_addc. */ 583 c = uadd64_overflow(x, c, &x); 584 c |= uadd64_overflow(x, y, &x); 585 *pcarry = c; 586 return x; 587 #endif 588 } 589 590 /** 591 * usub64_borrow - subtraction with borrow-in and borrow-out 592 * @x, @y: addends 593 * @pborrow: in-out borrow value 594 * 595 * Computes @x - @y - *@pborrow, placing the borrow-out back 596 * into *@pborrow and returning the 64-bit sum. 597 */ 598 static inline uint64_t usub64_borrow(uint64_t x, uint64_t y, bool *pborrow) 599 { 600 #if __has_builtin(__builtin_subcll) 601 unsigned long long b = *pborrow; 602 x = __builtin_subcll(x, y, b, &b); 603 *pborrow = b & 1; 604 return x; 605 #else 606 bool b = *pborrow; 607 b = usub64_overflow(x, b, &x); 608 b |= usub64_overflow(x, y, &x); 609 *pborrow = b; 610 return x; 611 #endif 612 } 613 614 /* Host type specific sizes of these routines. */ 615 616 #if ULONG_MAX == UINT32_MAX 617 # define clzl clz32 618 # define ctzl ctz32 619 # define clol clo32 620 # define ctol cto32 621 # define ctpopl ctpop32 622 # define revbitl revbit32 623 #elif ULONG_MAX == UINT64_MAX 624 # define clzl clz64 625 # define ctzl ctz64 626 # define clol clo64 627 # define ctol cto64 628 # define ctpopl ctpop64 629 # define revbitl revbit64 630 #else 631 # error Unknown sizeof long 632 #endif 633 634 static inline bool is_power_of_2(uint64_t value) 635 { 636 if (!value) { 637 return false; 638 } 639 640 return !(value & (value - 1)); 641 } 642 643 /** 644 * Return @value rounded down to the nearest power of two or zero. 645 */ 646 static inline uint64_t pow2floor(uint64_t value) 647 { 648 if (!value) { 649 /* Avoid undefined shift by 64 */ 650 return 0; 651 } 652 return 0x8000000000000000ull >> clz64(value); 653 } 654 655 /* 656 * Return @value rounded up to the nearest power of two modulo 2^64. 657 * This is *zero* for @value > 2^63, so be careful. 658 */ 659 static inline uint64_t pow2ceil(uint64_t value) 660 { 661 int n = clz64(value - 1); 662 663 if (!n) { 664 /* 665 * @value - 1 has no leading zeroes, thus @value - 1 >= 2^63 666 * Therefore, either @value == 0 or @value > 2^63. 667 * If it's 0, return 1, else return 0. 668 */ 669 return !value; 670 } 671 return 0x8000000000000000ull >> (n - 1); 672 } 673 674 static inline uint32_t pow2roundup32(uint32_t x) 675 { 676 x |= (x >> 1); 677 x |= (x >> 2); 678 x |= (x >> 4); 679 x |= (x >> 8); 680 x |= (x >> 16); 681 return x + 1; 682 } 683 684 /** 685 * urshift - 128-bit Unsigned Right Shift. 686 * @plow: in/out - lower 64-bit integer. 687 * @phigh: in/out - higher 64-bit integer. 688 * @shift: in - bytes to shift, between 0 and 127. 689 * 690 * Result is zero-extended and stored in plow/phigh, which are 691 * input/output variables. Shift values outside the range will 692 * be mod to 128. In other words, the caller is responsible to 693 * verify/assert both the shift range and plow/phigh pointers. 694 */ 695 void urshift(uint64_t *plow, uint64_t *phigh, int32_t shift); 696 697 /** 698 * ulshift - 128-bit Unsigned Left Shift. 699 * @plow: in/out - lower 64-bit integer. 700 * @phigh: in/out - higher 64-bit integer. 701 * @shift: in - bytes to shift, between 0 and 127. 702 * @overflow: out - true if any 1-bit is shifted out. 703 * 704 * Result is zero-extended and stored in plow/phigh, which are 705 * input/output variables. Shift values outside the range will 706 * be mod to 128. In other words, the caller is responsible to 707 * verify/assert both the shift range and plow/phigh pointers. 708 */ 709 void ulshift(uint64_t *plow, uint64_t *phigh, int32_t shift, bool *overflow); 710 711 /* From the GNU Multi Precision Library - longlong.h __udiv_qrnnd 712 * (https://gmplib.org/repo/gmp/file/tip/longlong.h) 713 * 714 * Licensed under the GPLv2/LGPLv3 715 */ 716 static inline uint64_t udiv_qrnnd(uint64_t *r, uint64_t n1, 717 uint64_t n0, uint64_t d) 718 { 719 #if defined(__x86_64__) 720 uint64_t q; 721 asm("divq %4" : "=a"(q), "=d"(*r) : "0"(n0), "1"(n1), "rm"(d)); 722 return q; 723 #elif defined(__s390x__) && !defined(__clang__) 724 /* Need to use a TImode type to get an even register pair for DLGR. */ 725 unsigned __int128 n = (unsigned __int128)n1 << 64 | n0; 726 asm("dlgr %0, %1" : "+r"(n) : "r"(d)); 727 *r = n >> 64; 728 return n; 729 #elif defined(_ARCH_PPC64) && defined(_ARCH_PWR7) 730 /* From Power ISA 2.06, programming note for divdeu. */ 731 uint64_t q1, q2, Q, r1, r2, R; 732 asm("divdeu %0,%2,%4; divdu %1,%3,%4" 733 : "=&r"(q1), "=r"(q2) 734 : "r"(n1), "r"(n0), "r"(d)); 735 r1 = -(q1 * d); /* low part of (n1<<64) - (q1 * d) */ 736 r2 = n0 - (q2 * d); 737 Q = q1 + q2; 738 R = r1 + r2; 739 if (R >= d || R < r2) { /* overflow implies R > d */ 740 Q += 1; 741 R -= d; 742 } 743 *r = R; 744 return Q; 745 #else 746 uint64_t d0, d1, q0, q1, r1, r0, m; 747 748 d0 = (uint32_t)d; 749 d1 = d >> 32; 750 751 r1 = n1 % d1; 752 q1 = n1 / d1; 753 m = q1 * d0; 754 r1 = (r1 << 32) | (n0 >> 32); 755 if (r1 < m) { 756 q1 -= 1; 757 r1 += d; 758 if (r1 >= d) { 759 if (r1 < m) { 760 q1 -= 1; 761 r1 += d; 762 } 763 } 764 } 765 r1 -= m; 766 767 r0 = r1 % d1; 768 q0 = r1 / d1; 769 m = q0 * d0; 770 r0 = (r0 << 32) | (uint32_t)n0; 771 if (r0 < m) { 772 q0 -= 1; 773 r0 += d; 774 if (r0 >= d) { 775 if (r0 < m) { 776 q0 -= 1; 777 r0 += d; 778 } 779 } 780 } 781 r0 -= m; 782 783 *r = r0; 784 return (q1 << 32) | q0; 785 #endif 786 } 787 788 Int128 divu256(Int128 *plow, Int128 *phigh, Int128 divisor); 789 Int128 divs256(Int128 *plow, Int128 *phigh, Int128 divisor); 790 #endif 791