1 // Adapted from https://github.com/Alexhuszagh/rust-lexical.
2 
3 //! Building-blocks for arbitrary-precision math.
4 //!
5 //! These algorithms assume little-endian order for the large integer
6 //! buffers, so for a `vec![0, 1, 2, 3]`, `3` is the most significant limb,
7 //! and `0` is the least significant limb.
8 
9 use super::large_powers;
10 use super::num::*;
11 use super::small_powers::*;
12 use crate::lib::{cmp, iter, mem, Vec};
13 
14 // ALIASES
15 // -------
16 
17 //  Type for a single limb of the big integer.
18 //
19 //  A limb is analogous to a digit in base10, except, it stores 32-bit
20 //  or 64-bit numbers instead.
21 //
22 //  This should be all-known 64-bit platforms supported by Rust.
23 //      https://forge.rust-lang.org/platform-support.html
24 //
25 //  Platforms where native 128-bit multiplication is explicitly supported:
26 //      - x86_64 (Supported via `MUL`).
27 //      - mips64 (Supported via `DMULTU`, which `HI` and `LO` can be read-from).
28 //
29 //  Platforms where native 64-bit multiplication is supported and
30 //  you can extract hi-lo for 64-bit multiplications.
31 //      aarch64 (Requires `UMULH` and `MUL` to capture high and low bits).
32 //      powerpc64 (Requires `MULHDU` and `MULLD` to capture high and low bits).
33 //
34 //  Platforms where native 128-bit multiplication is not supported,
35 //  requiring software emulation.
36 //      sparc64 (`UMUL` only supported double-word arguments).
37 
38 // 32-BIT LIMB
39 #[cfg(limb_width_32)]
40 pub type Limb = u32;
41 
42 #[cfg(limb_width_32)]
43 pub const POW5_LIMB: &[Limb] = &POW5_32;
44 
45 #[cfg(limb_width_32)]
46 pub const POW10_LIMB: &[Limb] = &POW10_32;
47 
48 #[cfg(limb_width_32)]
49 type Wide = u64;
50 
51 // 64-BIT LIMB
52 #[cfg(limb_width_64)]
53 pub type Limb = u64;
54 
55 #[cfg(limb_width_64)]
56 pub const POW5_LIMB: &[Limb] = &POW5_64;
57 
58 #[cfg(limb_width_64)]
59 pub const POW10_LIMB: &[Limb] = &POW10_64;
60 
61 #[cfg(limb_width_64)]
62 type Wide = u128;
63 
64 /// Cast to limb type.
65 #[inline]
as_limb<T: Integer>(t: T) -> Limb66 pub(crate) fn as_limb<T: Integer>(t: T) -> Limb {
67     Limb::as_cast(t)
68 }
69 
70 /// Cast to wide type.
71 #[inline]
as_wide<T: Integer>(t: T) -> Wide72 fn as_wide<T: Integer>(t: T) -> Wide {
73     Wide::as_cast(t)
74 }
75 
76 // SPLIT
77 // -----
78 
79 /// Split u64 into limbs, in little-endian order.
80 #[inline]
81 #[cfg(limb_width_32)]
split_u64(x: u64) -> [Limb; 2]82 fn split_u64(x: u64) -> [Limb; 2] {
83     [as_limb(x), as_limb(x >> 32)]
84 }
85 
86 /// Split u64 into limbs, in little-endian order.
87 #[inline]
88 #[cfg(limb_width_64)]
split_u64(x: u64) -> [Limb; 1]89 fn split_u64(x: u64) -> [Limb; 1] {
90     [as_limb(x)]
91 }
92 
93 // HI64
94 // ----
95 
96 // NONZERO
97 
98 /// Check if any of the remaining bits are non-zero.
99 #[inline]
nonzero<T: Integer>(x: &[T], rindex: usize) -> bool100 pub fn nonzero<T: Integer>(x: &[T], rindex: usize) -> bool {
101     let len = x.len();
102     let slc = &x[..len - rindex];
103     slc.iter().rev().any(|&x| x != T::ZERO)
104 }
105 
106 /// Shift 64-bit integer to high 64-bits.
107 #[inline]
u64_to_hi64_1(r0: u64) -> (u64, bool)108 fn u64_to_hi64_1(r0: u64) -> (u64, bool) {
109     debug_assert!(r0 != 0);
110     let ls = r0.leading_zeros();
111     (r0 << ls, false)
112 }
113 
114 /// Shift 2 64-bit integers to high 64-bits.
115 #[inline]
u64_to_hi64_2(r0: u64, r1: u64) -> (u64, bool)116 fn u64_to_hi64_2(r0: u64, r1: u64) -> (u64, bool) {
117     debug_assert!(r0 != 0);
118     let ls = r0.leading_zeros();
119     let rs = 64 - ls;
120     let v = match ls {
121         0 => r0,
122         _ => (r0 << ls) | (r1 >> rs),
123     };
124     let n = r1 << ls != 0;
125     (v, n)
126 }
127 
128 /// Trait to export the high 64-bits from a little-endian slice.
129 trait Hi64<T>: AsRef<[T]> {
130     /// Get the hi64 bits from a 1-limb slice.
hi64_1(&self) -> (u64, bool)131     fn hi64_1(&self) -> (u64, bool);
132 
133     /// Get the hi64 bits from a 2-limb slice.
hi64_2(&self) -> (u64, bool)134     fn hi64_2(&self) -> (u64, bool);
135 
136     /// Get the hi64 bits from a 3-limb slice.
hi64_3(&self) -> (u64, bool)137     fn hi64_3(&self) -> (u64, bool);
138 
139     /// High-level exporter to extract the high 64 bits from a little-endian slice.
140     #[inline]
hi64(&self) -> (u64, bool)141     fn hi64(&self) -> (u64, bool) {
142         match self.as_ref().len() {
143             0 => (0, false),
144             1 => self.hi64_1(),
145             2 => self.hi64_2(),
146             _ => self.hi64_3(),
147         }
148     }
149 }
150 
151 impl Hi64<u32> for [u32] {
152     #[inline]
hi64_1(&self) -> (u64, bool)153     fn hi64_1(&self) -> (u64, bool) {
154         debug_assert!(self.len() == 1);
155         let r0 = self[0] as u64;
156         u64_to_hi64_1(r0)
157     }
158 
159     #[inline]
hi64_2(&self) -> (u64, bool)160     fn hi64_2(&self) -> (u64, bool) {
161         debug_assert!(self.len() == 2);
162         let r0 = (self[1] as u64) << 32;
163         let r1 = self[0] as u64;
164         u64_to_hi64_1(r0 | r1)
165     }
166 
167     #[inline]
hi64_3(&self) -> (u64, bool)168     fn hi64_3(&self) -> (u64, bool) {
169         debug_assert!(self.len() >= 3);
170         let r0 = self[self.len() - 1] as u64;
171         let r1 = (self[self.len() - 2] as u64) << 32;
172         let r2 = self[self.len() - 3] as u64;
173         let (v, n) = u64_to_hi64_2(r0, r1 | r2);
174         (v, n || nonzero(self, 3))
175     }
176 }
177 
178 impl Hi64<u64> for [u64] {
179     #[inline]
hi64_1(&self) -> (u64, bool)180     fn hi64_1(&self) -> (u64, bool) {
181         debug_assert!(self.len() == 1);
182         let r0 = self[0];
183         u64_to_hi64_1(r0)
184     }
185 
186     #[inline]
hi64_2(&self) -> (u64, bool)187     fn hi64_2(&self) -> (u64, bool) {
188         debug_assert!(self.len() >= 2);
189         let r0 = self[self.len() - 1];
190         let r1 = self[self.len() - 2];
191         let (v, n) = u64_to_hi64_2(r0, r1);
192         (v, n || nonzero(self, 2))
193     }
194 
195     #[inline]
hi64_3(&self) -> (u64, bool)196     fn hi64_3(&self) -> (u64, bool) {
197         self.hi64_2()
198     }
199 }
200 
201 // SCALAR
202 // ------
203 
204 // Scalar-to-scalar operations, for building-blocks for arbitrary-precision
205 // operations.
206 
207 mod scalar {
208     use super::*;
209 
210     // ADDITION
211 
212     /// Add two small integers and return the resulting value and if overflow happens.
213     #[inline]
add(x: Limb, y: Limb) -> (Limb, bool)214     pub fn add(x: Limb, y: Limb) -> (Limb, bool) {
215         x.overflowing_add(y)
216     }
217 
218     /// AddAssign two small integers and return if overflow happens.
219     #[inline]
iadd(x: &mut Limb, y: Limb) -> bool220     pub fn iadd(x: &mut Limb, y: Limb) -> bool {
221         let t = add(*x, y);
222         *x = t.0;
223         t.1
224     }
225 
226     // SUBTRACTION
227 
228     /// Subtract two small integers and return the resulting value and if overflow happens.
229     #[inline]
sub(x: Limb, y: Limb) -> (Limb, bool)230     pub fn sub(x: Limb, y: Limb) -> (Limb, bool) {
231         x.overflowing_sub(y)
232     }
233 
234     /// SubAssign two small integers and return if overflow happens.
235     #[inline]
isub(x: &mut Limb, y: Limb) -> bool236     pub fn isub(x: &mut Limb, y: Limb) -> bool {
237         let t = sub(*x, y);
238         *x = t.0;
239         t.1
240     }
241 
242     // MULTIPLICATION
243 
244     /// Multiply two small integers (with carry) (and return the overflow contribution).
245     ///
246     /// Returns the (low, high) components.
247     #[inline]
mul(x: Limb, y: Limb, carry: Limb) -> (Limb, Limb)248     pub fn mul(x: Limb, y: Limb, carry: Limb) -> (Limb, Limb) {
249         // Cannot overflow, as long as wide is 2x as wide. This is because
250         // the following is always true:
251         // `Wide::max_value() - (Narrow::max_value() * Narrow::max_value()) >= Narrow::max_value()`
252         let z: Wide = as_wide(x) * as_wide(y) + as_wide(carry);
253         let bits = mem::size_of::<Limb>() * 8;
254         (as_limb(z), as_limb(z >> bits))
255     }
256 
257     /// Multiply two small integers (with carry) (and return if overflow happens).
258     #[inline]
imul(x: &mut Limb, y: Limb, carry: Limb) -> Limb259     pub fn imul(x: &mut Limb, y: Limb, carry: Limb) -> Limb {
260         let t = mul(*x, y, carry);
261         *x = t.0;
262         t.1
263     }
264 } // scalar
265 
266 // SMALL
267 // -----
268 
269 // Large-to-small operations, to modify a big integer from a native scalar.
270 
271 mod small {
272     use super::*;
273 
274     // MULTIPLICATIION
275 
276     /// ADDITION
277 
278     /// Implied AddAssign implementation for adding a small integer to bigint.
279     ///
280     /// Allows us to choose a start-index in x to store, to allow incrementing
281     /// from a non-zero start.
282     #[inline]
iadd_impl(x: &mut Vec<Limb>, y: Limb, xstart: usize)283     pub fn iadd_impl(x: &mut Vec<Limb>, y: Limb, xstart: usize) {
284         if x.len() <= xstart {
285             x.push(y);
286         } else {
287             // Initial add
288             let mut carry = scalar::iadd(&mut x[xstart], y);
289 
290             // Increment until overflow stops occurring.
291             let mut size = xstart + 1;
292             while carry && size < x.len() {
293                 carry = scalar::iadd(&mut x[size], 1);
294                 size += 1;
295             }
296 
297             // If we overflowed the buffer entirely, need to add 1 to the end
298             // of the buffer.
299             if carry {
300                 x.push(1);
301             }
302         }
303     }
304 
305     /// AddAssign small integer to bigint.
306     #[inline]
iadd(x: &mut Vec<Limb>, y: Limb)307     pub fn iadd(x: &mut Vec<Limb>, y: Limb) {
308         iadd_impl(x, y, 0);
309     }
310 
311     // SUBTRACTION
312 
313     /// SubAssign small integer to bigint.
314     /// Does not do overflowing subtraction.
315     #[inline]
isub_impl(x: &mut Vec<Limb>, y: Limb, xstart: usize)316     pub fn isub_impl(x: &mut Vec<Limb>, y: Limb, xstart: usize) {
317         debug_assert!(x.len() > xstart && (x[xstart] >= y || x.len() > xstart + 1));
318 
319         // Initial subtraction
320         let mut carry = scalar::isub(&mut x[xstart], y);
321 
322         // Increment until overflow stops occurring.
323         let mut size = xstart + 1;
324         while carry && size < x.len() {
325             carry = scalar::isub(&mut x[size], 1);
326             size += 1;
327         }
328         normalize(x);
329     }
330 
331     // MULTIPLICATION
332 
333     /// MulAssign small integer to bigint.
334     #[inline]
imul(x: &mut Vec<Limb>, y: Limb)335     pub fn imul(x: &mut Vec<Limb>, y: Limb) {
336         // Multiply iteratively over all elements, adding the carry each time.
337         let mut carry: Limb = 0;
338         for xi in x.iter_mut() {
339             carry = scalar::imul(xi, y, carry);
340         }
341 
342         // Overflow of value, add to end.
343         if carry != 0 {
344             x.push(carry);
345         }
346     }
347 
348     /// Mul small integer to bigint.
349     #[inline]
mul(x: &[Limb], y: Limb) -> Vec<Limb>350     pub fn mul(x: &[Limb], y: Limb) -> Vec<Limb> {
351         let mut z = Vec::<Limb>::default();
352         z.extend_from_slice(x);
353         imul(&mut z, y);
354         z
355     }
356 
357     /// MulAssign by a power.
358     ///
359     /// Theoretically...
360     ///
361     /// Use an exponentiation by squaring method, since it reduces the time
362     /// complexity of the multiplication to ~`O(log(n))` for the squaring,
363     /// and `O(n*m)` for the result. Since `m` is typically a lower-order
364     /// factor, this significantly reduces the number of multiplications
365     /// we need to do. Iteratively multiplying by small powers follows
366     /// the nth triangular number series, which scales as `O(p^2)`, but
367     /// where `p` is `n+m`. In short, it scales very poorly.
368     ///
369     /// Practically....
370     ///
371     /// Exponentiation by Squaring:
372     ///     running 2 tests
373     ///     test bigcomp_f32_lexical ... bench:       1,018 ns/iter (+/- 78)
374     ///     test bigcomp_f64_lexical ... bench:       3,639 ns/iter (+/- 1,007)
375     ///
376     /// Exponentiation by Iterative Small Powers:
377     ///     running 2 tests
378     ///     test bigcomp_f32_lexical ... bench:         518 ns/iter (+/- 31)
379     ///     test bigcomp_f64_lexical ... bench:         583 ns/iter (+/- 47)
380     ///
381     /// Exponentiation by Iterative Large Powers (of 2):
382     ///     running 2 tests
383     ///     test bigcomp_f32_lexical ... bench:         671 ns/iter (+/- 31)
384     ///     test bigcomp_f64_lexical ... bench:       1,394 ns/iter (+/- 47)
385     ///
386     /// Even using worst-case scenarios, exponentiation by squaring is
387     /// significantly slower for our workloads. Just multiply by small powers,
388     /// in simple cases, and use precalculated large powers in other cases.
imul_pow5(x: &mut Vec<Limb>, n: u32)389     pub fn imul_pow5(x: &mut Vec<Limb>, n: u32) {
390         use super::large::KARATSUBA_CUTOFF;
391 
392         let small_powers = POW5_LIMB;
393         let large_powers = large_powers::POW5;
394 
395         if n == 0 {
396             // No exponent, just return.
397             // The 0-index of the large powers is `2^0`, which is 1, so we want
398             // to make sure we don't take that path with a literal 0.
399             return;
400         }
401 
402         // We want to use the asymptotically faster algorithm if we're going
403         // to be using Karabatsu multiplication sometime during the result,
404         // otherwise, just use exponentiation by squaring.
405         let bit_length = 32 - n.leading_zeros() as usize;
406         debug_assert!(bit_length != 0 && bit_length <= large_powers.len());
407         if x.len() + large_powers[bit_length - 1].len() < 2 * KARATSUBA_CUTOFF {
408             // We can use iterative small powers to make this faster for the
409             // easy cases.
410 
411             // Multiply by the largest small power until n < step.
412             let step = small_powers.len() - 1;
413             let power = small_powers[step];
414             let mut n = n as usize;
415             while n >= step {
416                 imul(x, power);
417                 n -= step;
418             }
419 
420             // Multiply by the remainder.
421             imul(x, small_powers[n]);
422         } else {
423             // In theory, this code should be asymptotically a lot faster,
424             // in practice, our small::imul seems to be the limiting step,
425             // and large imul is slow as well.
426 
427             // Multiply by higher order powers.
428             let mut idx: usize = 0;
429             let mut bit: usize = 1;
430             let mut n = n as usize;
431             while n != 0 {
432                 if n & bit != 0 {
433                     debug_assert!(idx < large_powers.len());
434                     large::imul(x, large_powers[idx]);
435                     n ^= bit;
436                 }
437                 idx += 1;
438                 bit <<= 1;
439             }
440         }
441     }
442 
443     // BIT LENGTH
444 
445     /// Get number of leading zero bits in the storage.
446     #[inline]
leading_zeros(x: &[Limb]) -> usize447     pub fn leading_zeros(x: &[Limb]) -> usize {
448         x.last().map_or(0, |x| x.leading_zeros() as usize)
449     }
450 
451     /// Calculate the bit-length of the big-integer.
452     #[inline]
bit_length(x: &[Limb]) -> usize453     pub fn bit_length(x: &[Limb]) -> usize {
454         let bits = mem::size_of::<Limb>() * 8;
455         // Avoid overflowing, calculate via total number of bits
456         // minus leading zero bits.
457         let nlz = leading_zeros(x);
458         bits.checked_mul(x.len())
459             .map_or_else(usize::max_value, |v| v - nlz)
460     }
461 
462     // SHL
463 
464     /// Shift-left bits inside a buffer.
465     ///
466     /// Assumes `n < Limb::BITS`, IE, internally shifting bits.
467     #[inline]
ishl_bits(x: &mut Vec<Limb>, n: usize)468     pub fn ishl_bits(x: &mut Vec<Limb>, n: usize) {
469         // Need to shift by the number of `bits % Limb::BITS)`.
470         let bits = mem::size_of::<Limb>() * 8;
471         debug_assert!(n < bits);
472         if n == 0 {
473             return;
474         }
475 
476         // Internally, for each item, we shift left by n, and add the previous
477         // right shifted limb-bits.
478         // For example, we transform (for u8) shifted left 2, to:
479         //      b10100100 b01000010
480         //      b10 b10010001 b00001000
481         let rshift = bits - n;
482         let lshift = n;
483         let mut prev: Limb = 0;
484         for xi in x.iter_mut() {
485             let tmp = *xi;
486             *xi <<= lshift;
487             *xi |= prev >> rshift;
488             prev = tmp;
489         }
490 
491         // Always push the carry, even if it creates a non-normal result.
492         let carry = prev >> rshift;
493         if carry != 0 {
494             x.push(carry);
495         }
496     }
497 
498     /// Shift-left `n` digits inside a buffer.
499     ///
500     /// Assumes `n` is not 0.
501     #[inline]
ishl_limbs(x: &mut Vec<Limb>, n: usize)502     pub fn ishl_limbs(x: &mut Vec<Limb>, n: usize) {
503         debug_assert!(n != 0);
504         if !x.is_empty() {
505             x.reserve(n);
506             x.splice(..0, iter::repeat(0).take(n));
507         }
508     }
509 
510     /// Shift-left buffer by n bits.
511     #[inline]
ishl(x: &mut Vec<Limb>, n: usize)512     pub fn ishl(x: &mut Vec<Limb>, n: usize) {
513         let bits = mem::size_of::<Limb>() * 8;
514         // Need to pad with zeros for the number of `bits / Limb::BITS`,
515         // and shift-left with carry for `bits % Limb::BITS`.
516         let rem = n % bits;
517         let div = n / bits;
518         ishl_bits(x, rem);
519         if div != 0 {
520             ishl_limbs(x, div);
521         }
522     }
523 
524     // NORMALIZE
525 
526     /// Normalize the container by popping any leading zeros.
527     #[inline]
normalize(x: &mut Vec<Limb>)528     pub fn normalize(x: &mut Vec<Limb>) {
529         // Remove leading zero if we cause underflow. Since we're dividing
530         // by a small power, we have at max 1 int removed.
531         while x.last() == Some(&0) {
532             x.pop();
533         }
534     }
535 } // small
536 
537 // LARGE
538 // -----
539 
540 // Large-to-large operations, to modify a big integer from a native scalar.
541 
542 mod large {
543     use super::*;
544 
545     // RELATIVE OPERATORS
546 
547     /// Compare `x` to `y`, in little-endian order.
548     #[inline]
compare(x: &[Limb], y: &[Limb]) -> cmp::Ordering549     pub fn compare(x: &[Limb], y: &[Limb]) -> cmp::Ordering {
550         if x.len() > y.len() {
551             cmp::Ordering::Greater
552         } else if x.len() < y.len() {
553             cmp::Ordering::Less
554         } else {
555             let iter = x.iter().rev().zip(y.iter().rev());
556             for (&xi, &yi) in iter {
557                 if xi > yi {
558                     return cmp::Ordering::Greater;
559                 } else if xi < yi {
560                     return cmp::Ordering::Less;
561                 }
562             }
563             // Equal case.
564             cmp::Ordering::Equal
565         }
566     }
567 
568     /// Check if x is less than y.
569     #[inline]
less(x: &[Limb], y: &[Limb]) -> bool570     pub fn less(x: &[Limb], y: &[Limb]) -> bool {
571         compare(x, y) == cmp::Ordering::Less
572     }
573 
574     /// Check if x is greater than or equal to y.
575     #[inline]
greater_equal(x: &[Limb], y: &[Limb]) -> bool576     pub fn greater_equal(x: &[Limb], y: &[Limb]) -> bool {
577         !less(x, y)
578     }
579 
580     // ADDITION
581 
582     /// Implied AddAssign implementation for bigints.
583     ///
584     /// Allows us to choose a start-index in x to store, so we can avoid
585     /// padding the buffer with zeros when not needed, optimized for vectors.
iadd_impl(x: &mut Vec<Limb>, y: &[Limb], xstart: usize)586     pub fn iadd_impl(x: &mut Vec<Limb>, y: &[Limb], xstart: usize) {
587         // The effective x buffer is from `xstart..x.len()`, so we need to treat
588         // that as the current range. If the effective y buffer is longer, need
589         // to resize to that, + the start index.
590         if y.len() > x.len() - xstart {
591             x.resize(y.len() + xstart, 0);
592         }
593 
594         // Iteratively add elements from y to x.
595         let mut carry = false;
596         for (xi, yi) in (&mut x[xstart..]).iter_mut().zip(y.iter()) {
597             // Only one op of the two can overflow, since we added at max
598             // Limb::max_value() + Limb::max_value(). Add the previous carry,
599             // and store the current carry for the next.
600             let mut tmp = scalar::iadd(xi, *yi);
601             if carry {
602                 tmp |= scalar::iadd(xi, 1);
603             }
604             carry = tmp;
605         }
606 
607         // Overflow from the previous bit.
608         if carry {
609             small::iadd_impl(x, 1, y.len() + xstart);
610         }
611     }
612 
613     /// AddAssign bigint to bigint.
614     #[inline]
iadd(x: &mut Vec<Limb>, y: &[Limb])615     pub fn iadd(x: &mut Vec<Limb>, y: &[Limb]) {
616         iadd_impl(x, y, 0)
617     }
618 
619     /// Add bigint to bigint.
620     #[inline]
add(x: &[Limb], y: &[Limb]) -> Vec<Limb>621     pub fn add(x: &[Limb], y: &[Limb]) -> Vec<Limb> {
622         let mut z = Vec::<Limb>::default();
623         z.extend_from_slice(x);
624         iadd(&mut z, y);
625         z
626     }
627 
628     // SUBTRACTION
629 
630     /// SubAssign bigint to bigint.
isub(x: &mut Vec<Limb>, y: &[Limb])631     pub fn isub(x: &mut Vec<Limb>, y: &[Limb]) {
632         // Basic underflow checks.
633         debug_assert!(greater_equal(x, y));
634 
635         // Iteratively add elements from y to x.
636         let mut carry = false;
637         for (xi, yi) in x.iter_mut().zip(y.iter()) {
638             // Only one op of the two can overflow, since we added at max
639             // Limb::max_value() + Limb::max_value(). Add the previous carry,
640             // and store the current carry for the next.
641             let mut tmp = scalar::isub(xi, *yi);
642             if carry {
643                 tmp |= scalar::isub(xi, 1);
644             }
645             carry = tmp;
646         }
647 
648         if carry {
649             small::isub_impl(x, 1, y.len());
650         } else {
651             small::normalize(x);
652         }
653     }
654 
655     // MULTIPLICATION
656 
657     /// Number of digits to bottom-out to asymptotically slow algorithms.
658     ///
659     /// Karatsuba tends to out-perform long-multiplication at ~320-640 bits,
660     /// so we go halfway, while Newton division tends to out-perform
661     /// Algorithm D at ~1024 bits. We can toggle this for optimal performance.
662     pub const KARATSUBA_CUTOFF: usize = 32;
663 
664     /// Grade-school multiplication algorithm.
665     ///
666     /// Slow, naive algorithm, using limb-bit bases and just shifting left for
667     /// each iteration. This could be optimized with numerous other algorithms,
668     /// but it's extremely simple, and works in O(n*m) time, which is fine
669     /// by me. Each iteration, of which there are `m` iterations, requires
670     /// `n` multiplications, and `n` additions, or grade-school multiplication.
long_mul(x: &[Limb], y: &[Limb]) -> Vec<Limb>671     fn long_mul(x: &[Limb], y: &[Limb]) -> Vec<Limb> {
672         // Using the immutable value, multiply by all the scalars in y, using
673         // the algorithm defined above. Use a single buffer to avoid
674         // frequent reallocations. Handle the first case to avoid a redundant
675         // addition, since we know y.len() >= 1.
676         let mut z: Vec<Limb> = small::mul(x, y[0]);
677         z.resize(x.len() + y.len(), 0);
678 
679         // Handle the iterative cases.
680         for (i, &yi) in y[1..].iter().enumerate() {
681             let zi: Vec<Limb> = small::mul(x, yi);
682             iadd_impl(&mut z, &zi, i + 1);
683         }
684 
685         small::normalize(&mut z);
686 
687         z
688     }
689 
690     /// Split two buffers into halfway, into (lo, hi).
691     #[inline]
karatsuba_split(z: &[Limb], m: usize) -> (&[Limb], &[Limb])692     pub fn karatsuba_split(z: &[Limb], m: usize) -> (&[Limb], &[Limb]) {
693         (&z[..m], &z[m..])
694     }
695 
696     /// Karatsuba multiplication algorithm with roughly equal input sizes.
697     ///
698     /// Assumes `y.len() >= x.len()`.
karatsuba_mul(x: &[Limb], y: &[Limb]) -> Vec<Limb>699     fn karatsuba_mul(x: &[Limb], y: &[Limb]) -> Vec<Limb> {
700         if y.len() <= KARATSUBA_CUTOFF {
701             // Bottom-out to long division for small cases.
702             long_mul(x, y)
703         } else if x.len() < y.len() / 2 {
704             karatsuba_uneven_mul(x, y)
705         } else {
706             // Do our 3 multiplications.
707             let m = y.len() / 2;
708             let (xl, xh) = karatsuba_split(x, m);
709             let (yl, yh) = karatsuba_split(y, m);
710             let sumx = add(xl, xh);
711             let sumy = add(yl, yh);
712             let z0 = karatsuba_mul(xl, yl);
713             let mut z1 = karatsuba_mul(&sumx, &sumy);
714             let z2 = karatsuba_mul(xh, yh);
715             // Properly scale z1, which is `z1 - z2 - zo`.
716             isub(&mut z1, &z2);
717             isub(&mut z1, &z0);
718 
719             // Create our result, which is equal to, in little-endian order:
720             // [z0, z1 - z2 - z0, z2]
721             //  z1 must be shifted m digits (2^(32m)) over.
722             //  z2 must be shifted 2*m digits (2^(64m)) over.
723             let len = z0.len().max(m + z1.len()).max(2 * m + z2.len());
724             let mut result = z0;
725             result.reserve_exact(len - result.len());
726             iadd_impl(&mut result, &z1, m);
727             iadd_impl(&mut result, &z2, 2 * m);
728 
729             result
730         }
731     }
732 
733     /// Karatsuba multiplication algorithm where y is substantially larger than x.
734     ///
735     /// Assumes `y.len() >= x.len()`.
karatsuba_uneven_mul(x: &[Limb], mut y: &[Limb]) -> Vec<Limb>736     fn karatsuba_uneven_mul(x: &[Limb], mut y: &[Limb]) -> Vec<Limb> {
737         let mut result = Vec::<Limb>::default();
738         result.resize(x.len() + y.len(), 0);
739 
740         // This effectively is like grade-school multiplication between
741         // two numbers, except we're using splits on `y`, and the intermediate
742         // step is a Karatsuba multiplication.
743         let mut start = 0;
744         while !y.is_empty() {
745             let m = x.len().min(y.len());
746             let (yl, yh) = karatsuba_split(y, m);
747             let prod = karatsuba_mul(x, yl);
748             iadd_impl(&mut result, &prod, start);
749             y = yh;
750             start += m;
751         }
752         small::normalize(&mut result);
753 
754         result
755     }
756 
757     /// Forwarder to the proper Karatsuba algorithm.
758     #[inline]
karatsuba_mul_fwd(x: &[Limb], y: &[Limb]) -> Vec<Limb>759     fn karatsuba_mul_fwd(x: &[Limb], y: &[Limb]) -> Vec<Limb> {
760         if x.len() < y.len() {
761             karatsuba_mul(x, y)
762         } else {
763             karatsuba_mul(y, x)
764         }
765     }
766 
767     /// MulAssign bigint to bigint.
768     #[inline]
imul(x: &mut Vec<Limb>, y: &[Limb])769     pub fn imul(x: &mut Vec<Limb>, y: &[Limb]) {
770         if y.len() == 1 {
771             small::imul(x, y[0]);
772         } else {
773             // We're not really in a condition where using Karatsuba
774             // multiplication makes sense, so we're just going to use long
775             // division. ~20% speedup compared to:
776             //      *x = karatsuba_mul_fwd(x, y);
777             *x = karatsuba_mul_fwd(x, y);
778         }
779     }
780 } // large
781 
782 // TRAITS
783 // ------
784 
785 /// Traits for shared operations for big integers.
786 ///
787 /// None of these are implemented using normal traits, since these
788 /// are very expensive operations, and we want to deliberately
789 /// and explicitly use these functions.
790 pub(crate) trait Math: Clone + Sized + Default {
791     // DATA
792 
793     /// Get access to the underlying data
data(&self) -> &Vec<Limb>794     fn data(&self) -> &Vec<Limb>;
795 
796     /// Get access to the underlying data
data_mut(&mut self) -> &mut Vec<Limb>797     fn data_mut(&mut self) -> &mut Vec<Limb>;
798 
799     // RELATIVE OPERATIONS
800 
801     /// Compare self to y.
802     #[inline]
compare(&self, y: &Self) -> cmp::Ordering803     fn compare(&self, y: &Self) -> cmp::Ordering {
804         large::compare(self.data(), y.data())
805     }
806 
807     // PROPERTIES
808 
809     /// Get the high 64-bits from the bigint and if there are remaining bits.
810     #[inline]
hi64(&self) -> (u64, bool)811     fn hi64(&self) -> (u64, bool) {
812         self.data().as_slice().hi64()
813     }
814 
815     /// Calculate the bit-length of the big-integer.
816     /// Returns usize::max_value() if the value overflows,
817     /// IE, if `self.data().len() > usize::max_value() / 8`.
818     #[inline]
bit_length(&self) -> usize819     fn bit_length(&self) -> usize {
820         small::bit_length(self.data())
821     }
822 
823     // INTEGER CONVERSIONS
824 
825     /// Create new big integer from u64.
826     #[inline]
from_u64(x: u64) -> Self827     fn from_u64(x: u64) -> Self {
828         let mut v = Self::default();
829         let slc = split_u64(x);
830         v.data_mut().extend_from_slice(&slc);
831         v.normalize();
832         v
833     }
834 
835     // NORMALIZE
836 
837     /// Normalize the integer, so any leading zero values are removed.
838     #[inline]
normalize(&mut self)839     fn normalize(&mut self) {
840         small::normalize(self.data_mut());
841     }
842 
843     // ADDITION
844 
845     /// AddAssign small integer.
846     #[inline]
iadd_small(&mut self, y: Limb)847     fn iadd_small(&mut self, y: Limb) {
848         small::iadd(self.data_mut(), y);
849     }
850 
851     // MULTIPLICATION
852 
853     /// MulAssign small integer.
854     #[inline]
imul_small(&mut self, y: Limb)855     fn imul_small(&mut self, y: Limb) {
856         small::imul(self.data_mut(), y);
857     }
858 
859     /// Multiply by a power of 2.
860     #[inline]
imul_pow2(&mut self, n: u32)861     fn imul_pow2(&mut self, n: u32) {
862         self.ishl(n as usize)
863     }
864 
865     /// Multiply by a power of 5.
866     #[inline]
imul_pow5(&mut self, n: u32)867     fn imul_pow5(&mut self, n: u32) {
868         small::imul_pow5(self.data_mut(), n)
869     }
870 
871     /// MulAssign by a power of 10.
872     #[inline]
imul_pow10(&mut self, n: u32)873     fn imul_pow10(&mut self, n: u32) {
874         self.imul_pow5(n);
875         self.imul_pow2(n);
876     }
877 
878     // SHIFTS
879 
880     /// Shift-left the entire buffer n bits.
881     #[inline]
ishl(&mut self, n: usize)882     fn ishl(&mut self, n: usize) {
883         small::ishl(self.data_mut(), n);
884     }
885 }
886