10b57cec5SDimitry Andric //===-- udivmodti4.c - Implement __udivmodti4 -----------------------------===//
20b57cec5SDimitry Andric //
30b57cec5SDimitry Andric // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
40b57cec5SDimitry Andric // See https://llvm.org/LICENSE.txt for license information.
50b57cec5SDimitry Andric // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
60b57cec5SDimitry Andric //
70b57cec5SDimitry Andric //===----------------------------------------------------------------------===//
80b57cec5SDimitry Andric //
90b57cec5SDimitry Andric // This file implements __udivmodti4 for the compiler_rt library.
100b57cec5SDimitry Andric //
110b57cec5SDimitry Andric //===----------------------------------------------------------------------===//
120b57cec5SDimitry Andric 
130b57cec5SDimitry Andric #include "int_lib.h"
140b57cec5SDimitry Andric 
150b57cec5SDimitry Andric #ifdef CRT_HAS_128BIT
160b57cec5SDimitry Andric 
170b57cec5SDimitry Andric // Returns the 128 bit division result by 64 bit. Result must fit in 64 bits.
180b57cec5SDimitry Andric // Remainder stored in r.
190b57cec5SDimitry Andric // Taken and adjusted from libdivide libdivide_128_div_64_to_64 division
200b57cec5SDimitry Andric // fallback. For a correctness proof see the reference for this algorithm
210b57cec5SDimitry Andric // in Knuth, Volume 2, section 4.3.1, Algorithm D.
220b57cec5SDimitry Andric UNUSED
udiv128by64to64default(du_int u1,du_int u0,du_int v,du_int * r)230b57cec5SDimitry Andric static inline du_int udiv128by64to64default(du_int u1, du_int u0, du_int v,
240b57cec5SDimitry Andric                                             du_int *r) {
250b57cec5SDimitry Andric   const unsigned n_udword_bits = sizeof(du_int) * CHAR_BIT;
260b57cec5SDimitry Andric   const du_int b = (1ULL << (n_udword_bits / 2)); // Number base (32 bits)
270b57cec5SDimitry Andric   du_int un1, un0;                                // Norm. dividend LSD's
280b57cec5SDimitry Andric   du_int vn1, vn0;                                // Norm. divisor digits
290b57cec5SDimitry Andric   du_int q1, q0;                                  // Quotient digits
300b57cec5SDimitry Andric   du_int un64, un21, un10;                        // Dividend digit pairs
310b57cec5SDimitry Andric   du_int rhat;                                    // A remainder
320b57cec5SDimitry Andric   si_int s;                                       // Shift amount for normalization
330b57cec5SDimitry Andric 
340b57cec5SDimitry Andric   s = __builtin_clzll(v);
350b57cec5SDimitry Andric   if (s > 0) {
360b57cec5SDimitry Andric     // Normalize the divisor.
370b57cec5SDimitry Andric     v = v << s;
380b57cec5SDimitry Andric     un64 = (u1 << s) | (u0 >> (n_udword_bits - s));
390b57cec5SDimitry Andric     un10 = u0 << s; // Shift dividend left
400b57cec5SDimitry Andric   } else {
410b57cec5SDimitry Andric     // Avoid undefined behavior of (u0 >> 64).
420b57cec5SDimitry Andric     un64 = u1;
430b57cec5SDimitry Andric     un10 = u0;
440b57cec5SDimitry Andric   }
450b57cec5SDimitry Andric 
460b57cec5SDimitry Andric   // Break divisor up into two 32-bit digits.
470b57cec5SDimitry Andric   vn1 = v >> (n_udword_bits / 2);
480b57cec5SDimitry Andric   vn0 = v & 0xFFFFFFFF;
490b57cec5SDimitry Andric 
500b57cec5SDimitry Andric   // Break right half of dividend into two digits.
510b57cec5SDimitry Andric   un1 = un10 >> (n_udword_bits / 2);
520b57cec5SDimitry Andric   un0 = un10 & 0xFFFFFFFF;
530b57cec5SDimitry Andric 
540b57cec5SDimitry Andric   // Compute the first quotient digit, q1.
550b57cec5SDimitry Andric   q1 = un64 / vn1;
560b57cec5SDimitry Andric   rhat = un64 - q1 * vn1;
570b57cec5SDimitry Andric 
580b57cec5SDimitry Andric   // q1 has at most error 2. No more than 2 iterations.
590b57cec5SDimitry Andric   while (q1 >= b || q1 * vn0 > b * rhat + un1) {
600b57cec5SDimitry Andric     q1 = q1 - 1;
610b57cec5SDimitry Andric     rhat = rhat + vn1;
620b57cec5SDimitry Andric     if (rhat >= b)
630b57cec5SDimitry Andric       break;
640b57cec5SDimitry Andric   }
650b57cec5SDimitry Andric 
660b57cec5SDimitry Andric   un21 = un64 * b + un1 - q1 * v;
670b57cec5SDimitry Andric 
680b57cec5SDimitry Andric   // Compute the second quotient digit.
690b57cec5SDimitry Andric   q0 = un21 / vn1;
700b57cec5SDimitry Andric   rhat = un21 - q0 * vn1;
710b57cec5SDimitry Andric 
720b57cec5SDimitry Andric   // q0 has at most error 2. No more than 2 iterations.
730b57cec5SDimitry Andric   while (q0 >= b || q0 * vn0 > b * rhat + un0) {
740b57cec5SDimitry Andric     q0 = q0 - 1;
750b57cec5SDimitry Andric     rhat = rhat + vn1;
760b57cec5SDimitry Andric     if (rhat >= b)
770b57cec5SDimitry Andric       break;
780b57cec5SDimitry Andric   }
790b57cec5SDimitry Andric 
800b57cec5SDimitry Andric   *r = (un21 * b + un0 - q0 * v) >> s;
810b57cec5SDimitry Andric   return q1 * b + q0;
820b57cec5SDimitry Andric }
830b57cec5SDimitry Andric 
udiv128by64to64(du_int u1,du_int u0,du_int v,du_int * r)840b57cec5SDimitry Andric static inline du_int udiv128by64to64(du_int u1, du_int u0, du_int v,
850b57cec5SDimitry Andric                                      du_int *r) {
860b57cec5SDimitry Andric #if defined(__x86_64__)
870b57cec5SDimitry Andric   du_int result;
880b57cec5SDimitry Andric   __asm__("divq %[v]"
890b57cec5SDimitry Andric           : "=a"(result), "=d"(*r)
900b57cec5SDimitry Andric           : [ v ] "r"(v), "a"(u0), "d"(u1));
910b57cec5SDimitry Andric   return result;
920b57cec5SDimitry Andric #else
930b57cec5SDimitry Andric   return udiv128by64to64default(u1, u0, v, r);
940b57cec5SDimitry Andric #endif
950b57cec5SDimitry Andric }
960b57cec5SDimitry Andric 
970b57cec5SDimitry Andric // Effects: if rem != 0, *rem = a % b
980b57cec5SDimitry Andric // Returns: a / b
990b57cec5SDimitry Andric 
__udivmodti4(tu_int a,tu_int b,tu_int * rem)1000b57cec5SDimitry Andric COMPILER_RT_ABI tu_int __udivmodti4(tu_int a, tu_int b, tu_int *rem) {
1010b57cec5SDimitry Andric   const unsigned n_utword_bits = sizeof(tu_int) * CHAR_BIT;
1020b57cec5SDimitry Andric   utwords dividend;
1030b57cec5SDimitry Andric   dividend.all = a;
1040b57cec5SDimitry Andric   utwords divisor;
1050b57cec5SDimitry Andric   divisor.all = b;
1060b57cec5SDimitry Andric   utwords quotient;
1070b57cec5SDimitry Andric   utwords remainder;
1080b57cec5SDimitry Andric   if (divisor.all > dividend.all) {
1090b57cec5SDimitry Andric     if (rem)
1100b57cec5SDimitry Andric       *rem = dividend.all;
1110b57cec5SDimitry Andric     return 0;
1120b57cec5SDimitry Andric   }
1130b57cec5SDimitry Andric   // When the divisor fits in 64 bits, we can use an optimized path.
1140b57cec5SDimitry Andric   if (divisor.s.high == 0) {
1150b57cec5SDimitry Andric     remainder.s.high = 0;
1160b57cec5SDimitry Andric     if (dividend.s.high < divisor.s.low) {
1170b57cec5SDimitry Andric       // The result fits in 64 bits.
1180b57cec5SDimitry Andric       quotient.s.low = udiv128by64to64(dividend.s.high, dividend.s.low,
1190b57cec5SDimitry Andric                                        divisor.s.low, &remainder.s.low);
1200b57cec5SDimitry Andric       quotient.s.high = 0;
1210b57cec5SDimitry Andric     } else {
1220b57cec5SDimitry Andric       // First, divide with the high part to get the remainder in dividend.s.high.
1230b57cec5SDimitry Andric       // After that dividend.s.high < divisor.s.low.
1240b57cec5SDimitry Andric       quotient.s.high = dividend.s.high / divisor.s.low;
1250b57cec5SDimitry Andric       dividend.s.high = dividend.s.high % divisor.s.low;
1260b57cec5SDimitry Andric       quotient.s.low = udiv128by64to64(dividend.s.high, dividend.s.low,
1270b57cec5SDimitry Andric                                        divisor.s.low, &remainder.s.low);
1280b57cec5SDimitry Andric     }
1290b57cec5SDimitry Andric     if (rem)
1300b57cec5SDimitry Andric       *rem = remainder.all;
1310b57cec5SDimitry Andric     return quotient.all;
1320b57cec5SDimitry Andric   }
1330b57cec5SDimitry Andric   // 0 <= shift <= 63.
1340b57cec5SDimitry Andric   si_int shift =
1350b57cec5SDimitry Andric       __builtin_clzll(divisor.s.high) - __builtin_clzll(dividend.s.high);
1360b57cec5SDimitry Andric   divisor.all <<= shift;
1370b57cec5SDimitry Andric   quotient.s.high = 0;
1380b57cec5SDimitry Andric   quotient.s.low = 0;
1390b57cec5SDimitry Andric   for (; shift >= 0; --shift) {
1400b57cec5SDimitry Andric     quotient.s.low <<= 1;
1410b57cec5SDimitry Andric     // Branch free version of.
1420b57cec5SDimitry Andric     // if (dividend.all >= divisor.all)
1430b57cec5SDimitry Andric     // {
1440b57cec5SDimitry Andric     //    dividend.all -= divisor.all;
1450b57cec5SDimitry Andric     //    carry = 1;
1460b57cec5SDimitry Andric     // }
1470b57cec5SDimitry Andric     const ti_int s =
1480b57cec5SDimitry Andric         (ti_int)(divisor.all - dividend.all - 1) >> (n_utword_bits - 1);
1490b57cec5SDimitry Andric     quotient.s.low |= s & 1;
1500b57cec5SDimitry Andric     dividend.all -= divisor.all & s;
1510b57cec5SDimitry Andric     divisor.all >>= 1;
1520b57cec5SDimitry Andric   }
1530b57cec5SDimitry Andric   if (rem)
1540b57cec5SDimitry Andric     *rem = dividend.all;
1550b57cec5SDimitry Andric   return quotient.all;
1560b57cec5SDimitry Andric }
1570b57cec5SDimitry Andric 
1580b57cec5SDimitry Andric #endif // CRT_HAS_128BIT
1590b57cec5SDimitry Andric