1 //! Optimized division algorithms for u128.
2 //!
3 //! The code in this module is derived off of `dtolnay/itoa`
4 //! and Rust's compiler-builtins crate. This copies a specific
5 //! path of LLVM's `__udivmodti4` intrinsic, which does division/
6 //! modulus for u128 in a single step. Rust implements both division
7 //! and modulus in terms of this intrinsic, but calls the intrinsic
8 //! twice for subsequent division and modulus operations on the same
9 //! dividend/divisor, leading to significant performance overhead.
10 //!
11 //! This module calculates the optimal divisors for each radix,
12 //! and exports a general-purpose division algorithm for u128 where
13 //! the divisor can fit in a u64.
14 //!
15 //! This implementation is derived from dtolnay/itoa, which can be found here:
16 //! https://github.com/dtolnay/itoa/blob/master/src/udiv128.rs
17 //!
18 //! This implementation is also derived from Rust's compiler-builtins crate,
19 //! which can be found here:
20 //! https://github.com/rust-lang-nursery/compiler-builtins/blob/master/src/int/udiv.rs
21 //!
22 //! Licensing for this module may be under the MIT or Illinois license
23 //! (a BSD-like license), and may be found here:
24 //! https://github.com/rust-lang-nursery/compiler-builtins/blob/master/LICENSE.TXT
25
26 // Get the divisor for optimized 128-bit division.
27 // Returns the divisor, the number of digits processed, and the
28 // number of leading zeros in the divisor.
29 //
30 // These values were calculated using the following script:
31 //
32 // ```text
33 // import math
34 //
35 // u64_max = 2**64 - 1
36 // u128_max = 2**128-1
37 //
38 // def is_valid(x):
39 // return (
40 // x <= u64_max
41 // and (u128_max / (x**2)) < x
42 // )
43 //
44 // def find_pow(radix):
45 // start_pow = int(math.floor(math.log(u64_max, radix))) - 1
46 // while is_valid(radix**start_pow):
47 // start_pow += 1
48 // return start_pow - 1
49 //
50 // for radix in range(2, 37):
51 // power = find_pow(radix)
52 // print(radix, radix**power, power)
53 // ```
54 #[cfg(feature = "radix")]
55 #[inline]
u128_divisor(radix: u32) -> (u64, usize, u32)56 pub(crate) fn u128_divisor(radix: u32) -> (u64, usize, u32) {
57 match radix {
58 2 => (9223372036854775808, 63, 0), // 2^63
59 3 => (12157665459056928801, 40, 0), // 3^40
60 4 => (4611686018427387904, 31, 1), // 4^31
61 5 => (7450580596923828125, 27, 1), // 5^27
62 6 => (4738381338321616896, 24, 1), // 6^24
63 7 => (3909821048582988049, 22, 2), // 7^22
64 8 => (9223372036854775808, 21, 0), // 8^21
65 9 => (12157665459056928801, 20, 0), // 9^20
66 10 => (10000000000000000000, 19, 0), // 10^19
67 11 => (5559917313492231481, 18, 1), // 11^18
68 12 => (2218611106740436992, 17, 3), // 12^17
69 13 => (8650415919381337933, 17, 1), // 13^17
70 14 => (2177953337809371136, 16, 3), // 14^16
71 15 => (6568408355712890625, 16, 1), // 15^16
72 16 => (1152921504606846976, 15, 3), // 16^15
73 17 => (2862423051509815793, 15, 2), // 17^15
74 18 => (6746640616477458432, 15, 1), // 18^15
75 19 => (15181127029874798299, 15, 0), // 19^15
76 20 => (1638400000000000000, 14, 3), // 20^14
77 21 => (3243919932521508681, 14, 2), // 21^14
78 22 => (6221821273427820544, 14, 1), // 22^14
79 23 => (11592836324538749809, 14, 0), // 23^14
80 24 => (876488338465357824, 13, 4), // 24^13
81 25 => (1490116119384765625, 13, 3), // 25^13
82 26 => (2481152873203736576, 13, 2), // 26^13
83 27 => (4052555153018976267, 13, 2), // 27^13
84 28 => (6502111422497947648, 13, 1), // 28^13
85 29 => (10260628712958602189, 13, 0), // 29^13
86 30 => (15943230000000000000, 13, 0), // 30^13
87 31 => (787662783788549761, 12, 4), // 31^12
88 32 => (1152921504606846976, 12, 3), // 32^12
89 33 => (1667889514952984961, 12, 3), // 33^12
90 34 => (2386420683693101056, 12, 2), // 34^12
91 35 => (3379220508056640625, 12, 2), // 35^12
92 36 => (4738381338321616896, 12, 1), // 36^12
93 _ => unreachable!(),
94 }
95 }
96
97 // Get the divisor for optimized 128-bit division.
98 // Returns the divisor, the number of digits processed, and the
99 // number of leading zeros in the divisor.
100 #[cfg(not(feature = "radix"))]
101 #[inline]
102 #[allow(dead_code)]
u128_divisor(_: u32) -> (u64, usize, u32)103 pub(crate) fn u128_divisor(_: u32) -> (u64, usize, u32) {
104 (10000000000000000000, 19, 0) // 10^19
105 }
106
107 // Optimized division/remainder algorithm for u128.
108 // This is because the codegen for u128 divrem is very inefficient in Rust,
109 // calling both `__udivmodti4` twice internally, rather than a single time.
110 #[inline]
u128_divrem(n: u128, d: u64, d_cltz: u32) -> (u128, u64)111 pub(crate) fn u128_divrem(n: u128, d: u64, d_cltz: u32) -> (u128, u64) {
112 // Ensure we have the correct number of leading zeros passed.
113 debug_assert_eq!(d_cltz, d.leading_zeros());
114
115 // Optimize if we can divide using u64 first.
116 let high = (n >> 64) as u64;
117 if high == 0 {
118 let low = n as u64;
119 return ((low / d) as u128, low % d);
120 }
121
122 // sr = 1 + u64::BITS + d.leading_zeros() - high.leading_zeros();
123 let sr = 65 + d_cltz - high.leading_zeros();
124
125 // 1 <= sr <= u64::BITS - 1
126 let mut q: u128 = n << (128 - sr);
127 let mut r: u128 = n >> sr;
128 let mut carry: u64 = 0;
129
130 // Don't use a range because they may generate references to memcpy in unoptimized code
131 // Loop invariants: r < d; carry is 0 or 1
132 let mut i = 0;
133 while i < sr {
134 i += 1;
135
136 // r:q = ((r:q) << 1) | carry
137 r = (r << 1) | (q >> 127);
138 q = (q << 1) | carry as u128;
139
140 // carry = 0
141 // if r >= d {
142 // r -= d;
143 // carry = 1;
144 // }
145 let s = (d as u128).wrapping_sub(r).wrapping_sub(1) as i128 >> 127;
146 carry = (s & 1) as u64;
147 r -= (d as u128) & s as u128;
148 }
149
150 ((q << 1) | carry as u128, r as u64)
151 }
152
153 // Divide by 1e19 for base10 algorithms.
154 #[cfg(feature = "table")]
u128_divrem_1e19(n: u128) -> (u128, u64)155 pub(crate) fn u128_divrem_1e19(n: u128) -> (u128, u64) {
156 u128_divrem(n, 10000000000000000000, 0)
157 }
158
159 // TESTS
160 // -----
161
162 #[cfg(test)]
163 mod tests {
164 use super::*;
165
166 #[cfg(all(feature = "std", feature = "property_tests"))]
167 use proptest::{proptest, prop_assert_eq, prop_assert};
168
169 #[cfg(all(feature = "std", feature = "property_tests"))]
170 proptest! {
171 #[test]
172 fn u128_divrem_proptest(i in u128::min_value()..u128::max_value()) {
173 let (d, _, d_cltz) = u128_divisor(10);
174 let expected = (i / d as u128, (i % d as u128) as u64);
175 let actual = u128_divrem(i, d, d_cltz);
176 prop_assert_eq!(actual, expected);
177 }
178 }
179 }
180