1 // Adapted from https://github.com/Alexhuszagh/rust-lexical.
2
3 //! Building-blocks for arbitrary-precision math.
4 //!
5 //! These algorithms assume little-endian order for the large integer
6 //! buffers, so for a `vec![0, 1, 2, 3]`, `3` is the most significant limb,
7 //! and `0` is the least significant limb.
8
9 use super::large_powers;
10 use super::num::*;
11 use super::small_powers::*;
12 use crate::lib::{cmp, iter, mem, Vec};
13
14 // ALIASES
15 // -------
16
17 // Type for a single limb of the big integer.
18 //
19 // A limb is analogous to a digit in base10, except, it stores 32-bit
20 // or 64-bit numbers instead.
21 //
22 // This should be all-known 64-bit platforms supported by Rust.
23 // https://forge.rust-lang.org/platform-support.html
24 //
25 // Platforms where native 128-bit multiplication is explicitly supported:
26 // - x86_64 (Supported via `MUL`).
27 // - mips64 (Supported via `DMULTU`, which `HI` and `LO` can be read-from).
28 //
29 // Platforms where native 64-bit multiplication is supported and
30 // you can extract hi-lo for 64-bit multiplications.
31 // aarch64 (Requires `UMULH` and `MUL` to capture high and low bits).
32 // powerpc64 (Requires `MULHDU` and `MULLD` to capture high and low bits).
33 //
34 // Platforms where native 128-bit multiplication is not supported,
35 // requiring software emulation.
36 // sparc64 (`UMUL` only supported double-word arguments).
37
38 // 32-BIT LIMB
39 #[cfg(limb_width_32)]
40 pub type Limb = u32;
41
42 #[cfg(limb_width_32)]
43 pub const POW5_LIMB: &[Limb] = &POW5_32;
44
45 #[cfg(limb_width_32)]
46 pub const POW10_LIMB: &[Limb] = &POW10_32;
47
48 #[cfg(limb_width_32)]
49 type Wide = u64;
50
51 // 64-BIT LIMB
52 #[cfg(limb_width_64)]
53 pub type Limb = u64;
54
55 #[cfg(limb_width_64)]
56 pub const POW5_LIMB: &[Limb] = &POW5_64;
57
58 #[cfg(limb_width_64)]
59 pub const POW10_LIMB: &[Limb] = &POW10_64;
60
61 #[cfg(limb_width_64)]
62 type Wide = u128;
63
64 /// Cast to limb type.
65 #[inline]
as_limb<T: Integer>(t: T) -> Limb66 pub(crate) fn as_limb<T: Integer>(t: T) -> Limb {
67 Limb::as_cast(t)
68 }
69
70 /// Cast to wide type.
71 #[inline]
as_wide<T: Integer>(t: T) -> Wide72 fn as_wide<T: Integer>(t: T) -> Wide {
73 Wide::as_cast(t)
74 }
75
76 // SPLIT
77 // -----
78
79 /// Split u64 into limbs, in little-endian order.
80 #[inline]
81 #[cfg(limb_width_32)]
split_u64(x: u64) -> [Limb; 2]82 fn split_u64(x: u64) -> [Limb; 2] {
83 [as_limb(x), as_limb(x >> 32)]
84 }
85
86 /// Split u64 into limbs, in little-endian order.
87 #[inline]
88 #[cfg(limb_width_64)]
split_u64(x: u64) -> [Limb; 1]89 fn split_u64(x: u64) -> [Limb; 1] {
90 [as_limb(x)]
91 }
92
93 // HI64
94 // ----
95
96 // NONZERO
97
98 /// Check if any of the remaining bits are non-zero.
99 #[inline]
nonzero<T: Integer>(x: &[T], rindex: usize) -> bool100 pub fn nonzero<T: Integer>(x: &[T], rindex: usize) -> bool {
101 let len = x.len();
102 let slc = &x[..len - rindex];
103 slc.iter().rev().any(|&x| x != T::ZERO)
104 }
105
106 /// Shift 64-bit integer to high 64-bits.
107 #[inline]
u64_to_hi64_1(r0: u64) -> (u64, bool)108 fn u64_to_hi64_1(r0: u64) -> (u64, bool) {
109 debug_assert!(r0 != 0);
110 let ls = r0.leading_zeros();
111 (r0 << ls, false)
112 }
113
114 /// Shift 2 64-bit integers to high 64-bits.
115 #[inline]
u64_to_hi64_2(r0: u64, r1: u64) -> (u64, bool)116 fn u64_to_hi64_2(r0: u64, r1: u64) -> (u64, bool) {
117 debug_assert!(r0 != 0);
118 let ls = r0.leading_zeros();
119 let rs = 64 - ls;
120 let v = match ls {
121 0 => r0,
122 _ => (r0 << ls) | (r1 >> rs),
123 };
124 let n = r1 << ls != 0;
125 (v, n)
126 }
127
128 /// Trait to export the high 64-bits from a little-endian slice.
129 trait Hi64<T>: AsRef<[T]> {
130 /// Get the hi64 bits from a 1-limb slice.
hi64_1(&self) -> (u64, bool)131 fn hi64_1(&self) -> (u64, bool);
132
133 /// Get the hi64 bits from a 2-limb slice.
hi64_2(&self) -> (u64, bool)134 fn hi64_2(&self) -> (u64, bool);
135
136 /// Get the hi64 bits from a 3-limb slice.
hi64_3(&self) -> (u64, bool)137 fn hi64_3(&self) -> (u64, bool);
138
139 /// High-level exporter to extract the high 64 bits from a little-endian slice.
140 #[inline]
hi64(&self) -> (u64, bool)141 fn hi64(&self) -> (u64, bool) {
142 match self.as_ref().len() {
143 0 => (0, false),
144 1 => self.hi64_1(),
145 2 => self.hi64_2(),
146 _ => self.hi64_3(),
147 }
148 }
149 }
150
151 impl Hi64<u32> for [u32] {
152 #[inline]
hi64_1(&self) -> (u64, bool)153 fn hi64_1(&self) -> (u64, bool) {
154 debug_assert!(self.len() == 1);
155 let r0 = self[0] as u64;
156 u64_to_hi64_1(r0)
157 }
158
159 #[inline]
hi64_2(&self) -> (u64, bool)160 fn hi64_2(&self) -> (u64, bool) {
161 debug_assert!(self.len() == 2);
162 let r0 = (self[1] as u64) << 32;
163 let r1 = self[0] as u64;
164 u64_to_hi64_1(r0 | r1)
165 }
166
167 #[inline]
hi64_3(&self) -> (u64, bool)168 fn hi64_3(&self) -> (u64, bool) {
169 debug_assert!(self.len() >= 3);
170 let r0 = self[self.len() - 1] as u64;
171 let r1 = (self[self.len() - 2] as u64) << 32;
172 let r2 = self[self.len() - 3] as u64;
173 let (v, n) = u64_to_hi64_2(r0, r1 | r2);
174 (v, n || nonzero(self, 3))
175 }
176 }
177
178 impl Hi64<u64> for [u64] {
179 #[inline]
hi64_1(&self) -> (u64, bool)180 fn hi64_1(&self) -> (u64, bool) {
181 debug_assert!(self.len() == 1);
182 let r0 = self[0];
183 u64_to_hi64_1(r0)
184 }
185
186 #[inline]
hi64_2(&self) -> (u64, bool)187 fn hi64_2(&self) -> (u64, bool) {
188 debug_assert!(self.len() >= 2);
189 let r0 = self[self.len() - 1];
190 let r1 = self[self.len() - 2];
191 let (v, n) = u64_to_hi64_2(r0, r1);
192 (v, n || nonzero(self, 2))
193 }
194
195 #[inline]
hi64_3(&self) -> (u64, bool)196 fn hi64_3(&self) -> (u64, bool) {
197 self.hi64_2()
198 }
199 }
200
201 // SCALAR
202 // ------
203
204 // Scalar-to-scalar operations, for building-blocks for arbitrary-precision
205 // operations.
206
207 mod scalar {
208 use super::*;
209
210 // ADDITION
211
212 /// Add two small integers and return the resulting value and if overflow happens.
213 #[inline]
add(x: Limb, y: Limb) -> (Limb, bool)214 pub fn add(x: Limb, y: Limb) -> (Limb, bool) {
215 x.overflowing_add(y)
216 }
217
218 /// AddAssign two small integers and return if overflow happens.
219 #[inline]
iadd(x: &mut Limb, y: Limb) -> bool220 pub fn iadd(x: &mut Limb, y: Limb) -> bool {
221 let t = add(*x, y);
222 *x = t.0;
223 t.1
224 }
225
226 // SUBTRACTION
227
228 /// Subtract two small integers and return the resulting value and if overflow happens.
229 #[inline]
sub(x: Limb, y: Limb) -> (Limb, bool)230 pub fn sub(x: Limb, y: Limb) -> (Limb, bool) {
231 x.overflowing_sub(y)
232 }
233
234 /// SubAssign two small integers and return if overflow happens.
235 #[inline]
isub(x: &mut Limb, y: Limb) -> bool236 pub fn isub(x: &mut Limb, y: Limb) -> bool {
237 let t = sub(*x, y);
238 *x = t.0;
239 t.1
240 }
241
242 // MULTIPLICATION
243
244 /// Multiply two small integers (with carry) (and return the overflow contribution).
245 ///
246 /// Returns the (low, high) components.
247 #[inline]
mul(x: Limb, y: Limb, carry: Limb) -> (Limb, Limb)248 pub fn mul(x: Limb, y: Limb, carry: Limb) -> (Limb, Limb) {
249 // Cannot overflow, as long as wide is 2x as wide. This is because
250 // the following is always true:
251 // `Wide::max_value() - (Narrow::max_value() * Narrow::max_value()) >= Narrow::max_value()`
252 let z: Wide = as_wide(x) * as_wide(y) + as_wide(carry);
253 let bits = mem::size_of::<Limb>() * 8;
254 (as_limb(z), as_limb(z >> bits))
255 }
256
257 /// Multiply two small integers (with carry) (and return if overflow happens).
258 #[inline]
imul(x: &mut Limb, y: Limb, carry: Limb) -> Limb259 pub fn imul(x: &mut Limb, y: Limb, carry: Limb) -> Limb {
260 let t = mul(*x, y, carry);
261 *x = t.0;
262 t.1
263 }
264 } // scalar
265
266 // SMALL
267 // -----
268
269 // Large-to-small operations, to modify a big integer from a native scalar.
270
271 mod small {
272 use super::*;
273
274 // MULTIPLICATIION
275
276 /// ADDITION
277
278 /// Implied AddAssign implementation for adding a small integer to bigint.
279 ///
280 /// Allows us to choose a start-index in x to store, to allow incrementing
281 /// from a non-zero start.
282 #[inline]
iadd_impl(x: &mut Vec<Limb>, y: Limb, xstart: usize)283 pub fn iadd_impl(x: &mut Vec<Limb>, y: Limb, xstart: usize) {
284 if x.len() <= xstart {
285 x.push(y);
286 } else {
287 // Initial add
288 let mut carry = scalar::iadd(&mut x[xstart], y);
289
290 // Increment until overflow stops occurring.
291 let mut size = xstart + 1;
292 while carry && size < x.len() {
293 carry = scalar::iadd(&mut x[size], 1);
294 size += 1;
295 }
296
297 // If we overflowed the buffer entirely, need to add 1 to the end
298 // of the buffer.
299 if carry {
300 x.push(1);
301 }
302 }
303 }
304
305 /// AddAssign small integer to bigint.
306 #[inline]
iadd(x: &mut Vec<Limb>, y: Limb)307 pub fn iadd(x: &mut Vec<Limb>, y: Limb) {
308 iadd_impl(x, y, 0);
309 }
310
311 // SUBTRACTION
312
313 /// SubAssign small integer to bigint.
314 /// Does not do overflowing subtraction.
315 #[inline]
isub_impl(x: &mut Vec<Limb>, y: Limb, xstart: usize)316 pub fn isub_impl(x: &mut Vec<Limb>, y: Limb, xstart: usize) {
317 debug_assert!(x.len() > xstart && (x[xstart] >= y || x.len() > xstart + 1));
318
319 // Initial subtraction
320 let mut carry = scalar::isub(&mut x[xstart], y);
321
322 // Increment until overflow stops occurring.
323 let mut size = xstart + 1;
324 while carry && size < x.len() {
325 carry = scalar::isub(&mut x[size], 1);
326 size += 1;
327 }
328 normalize(x);
329 }
330
331 // MULTIPLICATION
332
333 /// MulAssign small integer to bigint.
334 #[inline]
imul(x: &mut Vec<Limb>, y: Limb)335 pub fn imul(x: &mut Vec<Limb>, y: Limb) {
336 // Multiply iteratively over all elements, adding the carry each time.
337 let mut carry: Limb = 0;
338 for xi in x.iter_mut() {
339 carry = scalar::imul(xi, y, carry);
340 }
341
342 // Overflow of value, add to end.
343 if carry != 0 {
344 x.push(carry);
345 }
346 }
347
348 /// Mul small integer to bigint.
349 #[inline]
mul(x: &[Limb], y: Limb) -> Vec<Limb>350 pub fn mul(x: &[Limb], y: Limb) -> Vec<Limb> {
351 let mut z = Vec::<Limb>::default();
352 z.extend_from_slice(x);
353 imul(&mut z, y);
354 z
355 }
356
357 /// MulAssign by a power.
358 ///
359 /// Theoretically...
360 ///
361 /// Use an exponentiation by squaring method, since it reduces the time
362 /// complexity of the multiplication to ~`O(log(n))` for the squaring,
363 /// and `O(n*m)` for the result. Since `m` is typically a lower-order
364 /// factor, this significantly reduces the number of multiplications
365 /// we need to do. Iteratively multiplying by small powers follows
366 /// the nth triangular number series, which scales as `O(p^2)`, but
367 /// where `p` is `n+m`. In short, it scales very poorly.
368 ///
369 /// Practically....
370 ///
371 /// Exponentiation by Squaring:
372 /// running 2 tests
373 /// test bigcomp_f32_lexical ... bench: 1,018 ns/iter (+/- 78)
374 /// test bigcomp_f64_lexical ... bench: 3,639 ns/iter (+/- 1,007)
375 ///
376 /// Exponentiation by Iterative Small Powers:
377 /// running 2 tests
378 /// test bigcomp_f32_lexical ... bench: 518 ns/iter (+/- 31)
379 /// test bigcomp_f64_lexical ... bench: 583 ns/iter (+/- 47)
380 ///
381 /// Exponentiation by Iterative Large Powers (of 2):
382 /// running 2 tests
383 /// test bigcomp_f32_lexical ... bench: 671 ns/iter (+/- 31)
384 /// test bigcomp_f64_lexical ... bench: 1,394 ns/iter (+/- 47)
385 ///
386 /// Even using worst-case scenarios, exponentiation by squaring is
387 /// significantly slower for our workloads. Just multiply by small powers,
388 /// in simple cases, and use precalculated large powers in other cases.
imul_pow5(x: &mut Vec<Limb>, n: u32)389 pub fn imul_pow5(x: &mut Vec<Limb>, n: u32) {
390 use super::large::KARATSUBA_CUTOFF;
391
392 let small_powers = POW5_LIMB;
393 let large_powers = large_powers::POW5;
394
395 if n == 0 {
396 // No exponent, just return.
397 // The 0-index of the large powers is `2^0`, which is 1, so we want
398 // to make sure we don't take that path with a literal 0.
399 return;
400 }
401
402 // We want to use the asymptotically faster algorithm if we're going
403 // to be using Karabatsu multiplication sometime during the result,
404 // otherwise, just use exponentiation by squaring.
405 let bit_length = 32 - n.leading_zeros() as usize;
406 debug_assert!(bit_length != 0 && bit_length <= large_powers.len());
407 if x.len() + large_powers[bit_length - 1].len() < 2 * KARATSUBA_CUTOFF {
408 // We can use iterative small powers to make this faster for the
409 // easy cases.
410
411 // Multiply by the largest small power until n < step.
412 let step = small_powers.len() - 1;
413 let power = small_powers[step];
414 let mut n = n as usize;
415 while n >= step {
416 imul(x, power);
417 n -= step;
418 }
419
420 // Multiply by the remainder.
421 imul(x, small_powers[n]);
422 } else {
423 // In theory, this code should be asymptotically a lot faster,
424 // in practice, our small::imul seems to be the limiting step,
425 // and large imul is slow as well.
426
427 // Multiply by higher order powers.
428 let mut idx: usize = 0;
429 let mut bit: usize = 1;
430 let mut n = n as usize;
431 while n != 0 {
432 if n & bit != 0 {
433 debug_assert!(idx < large_powers.len());
434 large::imul(x, large_powers[idx]);
435 n ^= bit;
436 }
437 idx += 1;
438 bit <<= 1;
439 }
440 }
441 }
442
443 // BIT LENGTH
444
445 /// Get number of leading zero bits in the storage.
446 #[inline]
leading_zeros(x: &[Limb]) -> usize447 pub fn leading_zeros(x: &[Limb]) -> usize {
448 x.last().map_or(0, |x| x.leading_zeros() as usize)
449 }
450
451 /// Calculate the bit-length of the big-integer.
452 #[inline]
bit_length(x: &[Limb]) -> usize453 pub fn bit_length(x: &[Limb]) -> usize {
454 let bits = mem::size_of::<Limb>() * 8;
455 // Avoid overflowing, calculate via total number of bits
456 // minus leading zero bits.
457 let nlz = leading_zeros(x);
458 bits.checked_mul(x.len())
459 .map_or_else(usize::max_value, |v| v - nlz)
460 }
461
462 // SHL
463
464 /// Shift-left bits inside a buffer.
465 ///
466 /// Assumes `n < Limb::BITS`, IE, internally shifting bits.
467 #[inline]
ishl_bits(x: &mut Vec<Limb>, n: usize)468 pub fn ishl_bits(x: &mut Vec<Limb>, n: usize) {
469 // Need to shift by the number of `bits % Limb::BITS)`.
470 let bits = mem::size_of::<Limb>() * 8;
471 debug_assert!(n < bits);
472 if n == 0 {
473 return;
474 }
475
476 // Internally, for each item, we shift left by n, and add the previous
477 // right shifted limb-bits.
478 // For example, we transform (for u8) shifted left 2, to:
479 // b10100100 b01000010
480 // b10 b10010001 b00001000
481 let rshift = bits - n;
482 let lshift = n;
483 let mut prev: Limb = 0;
484 for xi in x.iter_mut() {
485 let tmp = *xi;
486 *xi <<= lshift;
487 *xi |= prev >> rshift;
488 prev = tmp;
489 }
490
491 // Always push the carry, even if it creates a non-normal result.
492 let carry = prev >> rshift;
493 if carry != 0 {
494 x.push(carry);
495 }
496 }
497
498 /// Shift-left `n` digits inside a buffer.
499 ///
500 /// Assumes `n` is not 0.
501 #[inline]
ishl_limbs(x: &mut Vec<Limb>, n: usize)502 pub fn ishl_limbs(x: &mut Vec<Limb>, n: usize) {
503 debug_assert!(n != 0);
504 if !x.is_empty() {
505 x.reserve(n);
506 x.splice(..0, iter::repeat(0).take(n));
507 }
508 }
509
510 /// Shift-left buffer by n bits.
511 #[inline]
ishl(x: &mut Vec<Limb>, n: usize)512 pub fn ishl(x: &mut Vec<Limb>, n: usize) {
513 let bits = mem::size_of::<Limb>() * 8;
514 // Need to pad with zeros for the number of `bits / Limb::BITS`,
515 // and shift-left with carry for `bits % Limb::BITS`.
516 let rem = n % bits;
517 let div = n / bits;
518 ishl_bits(x, rem);
519 if div != 0 {
520 ishl_limbs(x, div);
521 }
522 }
523
524 // NORMALIZE
525
526 /// Normalize the container by popping any leading zeros.
527 #[inline]
normalize(x: &mut Vec<Limb>)528 pub fn normalize(x: &mut Vec<Limb>) {
529 // Remove leading zero if we cause underflow. Since we're dividing
530 // by a small power, we have at max 1 int removed.
531 while x.last() == Some(&0) {
532 x.pop();
533 }
534 }
535 } // small
536
537 // LARGE
538 // -----
539
540 // Large-to-large operations, to modify a big integer from a native scalar.
541
542 mod large {
543 use super::*;
544
545 // RELATIVE OPERATORS
546
547 /// Compare `x` to `y`, in little-endian order.
548 #[inline]
compare(x: &[Limb], y: &[Limb]) -> cmp::Ordering549 pub fn compare(x: &[Limb], y: &[Limb]) -> cmp::Ordering {
550 if x.len() > y.len() {
551 cmp::Ordering::Greater
552 } else if x.len() < y.len() {
553 cmp::Ordering::Less
554 } else {
555 let iter = x.iter().rev().zip(y.iter().rev());
556 for (&xi, &yi) in iter {
557 if xi > yi {
558 return cmp::Ordering::Greater;
559 } else if xi < yi {
560 return cmp::Ordering::Less;
561 }
562 }
563 // Equal case.
564 cmp::Ordering::Equal
565 }
566 }
567
568 /// Check if x is less than y.
569 #[inline]
less(x: &[Limb], y: &[Limb]) -> bool570 pub fn less(x: &[Limb], y: &[Limb]) -> bool {
571 compare(x, y) == cmp::Ordering::Less
572 }
573
574 /// Check if x is greater than or equal to y.
575 #[inline]
greater_equal(x: &[Limb], y: &[Limb]) -> bool576 pub fn greater_equal(x: &[Limb], y: &[Limb]) -> bool {
577 !less(x, y)
578 }
579
580 // ADDITION
581
582 /// Implied AddAssign implementation for bigints.
583 ///
584 /// Allows us to choose a start-index in x to store, so we can avoid
585 /// padding the buffer with zeros when not needed, optimized for vectors.
iadd_impl(x: &mut Vec<Limb>, y: &[Limb], xstart: usize)586 pub fn iadd_impl(x: &mut Vec<Limb>, y: &[Limb], xstart: usize) {
587 // The effective x buffer is from `xstart..x.len()`, so we need to treat
588 // that as the current range. If the effective y buffer is longer, need
589 // to resize to that, + the start index.
590 if y.len() > x.len() - xstart {
591 x.resize(y.len() + xstart, 0);
592 }
593
594 // Iteratively add elements from y to x.
595 let mut carry = false;
596 for (xi, yi) in (&mut x[xstart..]).iter_mut().zip(y.iter()) {
597 // Only one op of the two can overflow, since we added at max
598 // Limb::max_value() + Limb::max_value(). Add the previous carry,
599 // and store the current carry for the next.
600 let mut tmp = scalar::iadd(xi, *yi);
601 if carry {
602 tmp |= scalar::iadd(xi, 1);
603 }
604 carry = tmp;
605 }
606
607 // Overflow from the previous bit.
608 if carry {
609 small::iadd_impl(x, 1, y.len() + xstart);
610 }
611 }
612
613 /// AddAssign bigint to bigint.
614 #[inline]
iadd(x: &mut Vec<Limb>, y: &[Limb])615 pub fn iadd(x: &mut Vec<Limb>, y: &[Limb]) {
616 iadd_impl(x, y, 0);
617 }
618
619 /// Add bigint to bigint.
620 #[inline]
add(x: &[Limb], y: &[Limb]) -> Vec<Limb>621 pub fn add(x: &[Limb], y: &[Limb]) -> Vec<Limb> {
622 let mut z = Vec::<Limb>::default();
623 z.extend_from_slice(x);
624 iadd(&mut z, y);
625 z
626 }
627
628 // SUBTRACTION
629
630 /// SubAssign bigint to bigint.
isub(x: &mut Vec<Limb>, y: &[Limb])631 pub fn isub(x: &mut Vec<Limb>, y: &[Limb]) {
632 // Basic underflow checks.
633 debug_assert!(greater_equal(x, y));
634
635 // Iteratively add elements from y to x.
636 let mut carry = false;
637 for (xi, yi) in x.iter_mut().zip(y.iter()) {
638 // Only one op of the two can overflow, since we added at max
639 // Limb::max_value() + Limb::max_value(). Add the previous carry,
640 // and store the current carry for the next.
641 let mut tmp = scalar::isub(xi, *yi);
642 if carry {
643 tmp |= scalar::isub(xi, 1);
644 }
645 carry = tmp;
646 }
647
648 if carry {
649 small::isub_impl(x, 1, y.len());
650 } else {
651 small::normalize(x);
652 }
653 }
654
655 // MULTIPLICATION
656
657 /// Number of digits to bottom-out to asymptotically slow algorithms.
658 ///
659 /// Karatsuba tends to out-perform long-multiplication at ~320-640 bits,
660 /// so we go halfway, while Newton division tends to out-perform
661 /// Algorithm D at ~1024 bits. We can toggle this for optimal performance.
662 pub const KARATSUBA_CUTOFF: usize = 32;
663
664 /// Grade-school multiplication algorithm.
665 ///
666 /// Slow, naive algorithm, using limb-bit bases and just shifting left for
667 /// each iteration. This could be optimized with numerous other algorithms,
668 /// but it's extremely simple, and works in O(n*m) time, which is fine
669 /// by me. Each iteration, of which there are `m` iterations, requires
670 /// `n` multiplications, and `n` additions, or grade-school multiplication.
long_mul(x: &[Limb], y: &[Limb]) -> Vec<Limb>671 fn long_mul(x: &[Limb], y: &[Limb]) -> Vec<Limb> {
672 // Using the immutable value, multiply by all the scalars in y, using
673 // the algorithm defined above. Use a single buffer to avoid
674 // frequent reallocations. Handle the first case to avoid a redundant
675 // addition, since we know y.len() >= 1.
676 let mut z: Vec<Limb> = small::mul(x, y[0]);
677 z.resize(x.len() + y.len(), 0);
678
679 // Handle the iterative cases.
680 for (i, &yi) in y[1..].iter().enumerate() {
681 let zi: Vec<Limb> = small::mul(x, yi);
682 iadd_impl(&mut z, &zi, i + 1);
683 }
684
685 small::normalize(&mut z);
686
687 z
688 }
689
690 /// Split two buffers into halfway, into (lo, hi).
691 #[inline]
karatsuba_split(z: &[Limb], m: usize) -> (&[Limb], &[Limb])692 pub fn karatsuba_split(z: &[Limb], m: usize) -> (&[Limb], &[Limb]) {
693 (&z[..m], &z[m..])
694 }
695
696 /// Karatsuba multiplication algorithm with roughly equal input sizes.
697 ///
698 /// Assumes `y.len() >= x.len()`.
karatsuba_mul(x: &[Limb], y: &[Limb]) -> Vec<Limb>699 fn karatsuba_mul(x: &[Limb], y: &[Limb]) -> Vec<Limb> {
700 if y.len() <= KARATSUBA_CUTOFF {
701 // Bottom-out to long division for small cases.
702 long_mul(x, y)
703 } else if x.len() < y.len() / 2 {
704 karatsuba_uneven_mul(x, y)
705 } else {
706 // Do our 3 multiplications.
707 let m = y.len() / 2;
708 let (xl, xh) = karatsuba_split(x, m);
709 let (yl, yh) = karatsuba_split(y, m);
710 let sumx = add(xl, xh);
711 let sumy = add(yl, yh);
712 let z0 = karatsuba_mul(xl, yl);
713 let mut z1 = karatsuba_mul(&sumx, &sumy);
714 let z2 = karatsuba_mul(xh, yh);
715 // Properly scale z1, which is `z1 - z2 - zo`.
716 isub(&mut z1, &z2);
717 isub(&mut z1, &z0);
718
719 // Create our result, which is equal to, in little-endian order:
720 // [z0, z1 - z2 - z0, z2]
721 // z1 must be shifted m digits (2^(32m)) over.
722 // z2 must be shifted 2*m digits (2^(64m)) over.
723 let len = z0.len().max(m + z1.len()).max(2 * m + z2.len());
724 let mut result = z0;
725 result.reserve_exact(len - result.len());
726 iadd_impl(&mut result, &z1, m);
727 iadd_impl(&mut result, &z2, 2 * m);
728
729 result
730 }
731 }
732
733 /// Karatsuba multiplication algorithm where y is substantially larger than x.
734 ///
735 /// Assumes `y.len() >= x.len()`.
karatsuba_uneven_mul(x: &[Limb], mut y: &[Limb]) -> Vec<Limb>736 fn karatsuba_uneven_mul(x: &[Limb], mut y: &[Limb]) -> Vec<Limb> {
737 let mut result = Vec::<Limb>::default();
738 result.resize(x.len() + y.len(), 0);
739
740 // This effectively is like grade-school multiplication between
741 // two numbers, except we're using splits on `y`, and the intermediate
742 // step is a Karatsuba multiplication.
743 let mut start = 0;
744 while !y.is_empty() {
745 let m = x.len().min(y.len());
746 let (yl, yh) = karatsuba_split(y, m);
747 let prod = karatsuba_mul(x, yl);
748 iadd_impl(&mut result, &prod, start);
749 y = yh;
750 start += m;
751 }
752 small::normalize(&mut result);
753
754 result
755 }
756
757 /// Forwarder to the proper Karatsuba algorithm.
758 #[inline]
karatsuba_mul_fwd(x: &[Limb], y: &[Limb]) -> Vec<Limb>759 fn karatsuba_mul_fwd(x: &[Limb], y: &[Limb]) -> Vec<Limb> {
760 if x.len() < y.len() {
761 karatsuba_mul(x, y)
762 } else {
763 karatsuba_mul(y, x)
764 }
765 }
766
767 /// MulAssign bigint to bigint.
768 #[inline]
imul(x: &mut Vec<Limb>, y: &[Limb])769 pub fn imul(x: &mut Vec<Limb>, y: &[Limb]) {
770 if y.len() == 1 {
771 small::imul(x, y[0]);
772 } else {
773 // We're not really in a condition where using Karatsuba
774 // multiplication makes sense, so we're just going to use long
775 // division. ~20% speedup compared to:
776 // *x = karatsuba_mul_fwd(x, y);
777 *x = karatsuba_mul_fwd(x, y);
778 }
779 }
780 } // large
781
782 // TRAITS
783 // ------
784
785 /// Traits for shared operations for big integers.
786 ///
787 /// None of these are implemented using normal traits, since these
788 /// are very expensive operations, and we want to deliberately
789 /// and explicitly use these functions.
790 pub(crate) trait Math: Clone + Sized + Default {
791 // DATA
792
793 /// Get access to the underlying data
data(&self) -> &Vec<Limb>794 fn data(&self) -> &Vec<Limb>;
795
796 /// Get access to the underlying data
data_mut(&mut self) -> &mut Vec<Limb>797 fn data_mut(&mut self) -> &mut Vec<Limb>;
798
799 // RELATIVE OPERATIONS
800
801 /// Compare self to y.
802 #[inline]
compare(&self, y: &Self) -> cmp::Ordering803 fn compare(&self, y: &Self) -> cmp::Ordering {
804 large::compare(self.data(), y.data())
805 }
806
807 // PROPERTIES
808
809 /// Get the high 64-bits from the bigint and if there are remaining bits.
810 #[inline]
hi64(&self) -> (u64, bool)811 fn hi64(&self) -> (u64, bool) {
812 self.data().as_slice().hi64()
813 }
814
815 /// Calculate the bit-length of the big-integer.
816 /// Returns usize::max_value() if the value overflows,
817 /// IE, if `self.data().len() > usize::max_value() / 8`.
818 #[inline]
bit_length(&self) -> usize819 fn bit_length(&self) -> usize {
820 small::bit_length(self.data())
821 }
822
823 // INTEGER CONVERSIONS
824
825 /// Create new big integer from u64.
826 #[inline]
from_u64(x: u64) -> Self827 fn from_u64(x: u64) -> Self {
828 let mut v = Self::default();
829 let slc = split_u64(x);
830 v.data_mut().extend_from_slice(&slc);
831 v.normalize();
832 v
833 }
834
835 // NORMALIZE
836
837 /// Normalize the integer, so any leading zero values are removed.
838 #[inline]
normalize(&mut self)839 fn normalize(&mut self) {
840 small::normalize(self.data_mut());
841 }
842
843 // ADDITION
844
845 /// AddAssign small integer.
846 #[inline]
iadd_small(&mut self, y: Limb)847 fn iadd_small(&mut self, y: Limb) {
848 small::iadd(self.data_mut(), y);
849 }
850
851 // MULTIPLICATION
852
853 /// MulAssign small integer.
854 #[inline]
imul_small(&mut self, y: Limb)855 fn imul_small(&mut self, y: Limb) {
856 small::imul(self.data_mut(), y);
857 }
858
859 /// Multiply by a power of 2.
860 #[inline]
imul_pow2(&mut self, n: u32)861 fn imul_pow2(&mut self, n: u32) {
862 self.ishl(n as usize);
863 }
864
865 /// Multiply by a power of 5.
866 #[inline]
imul_pow5(&mut self, n: u32)867 fn imul_pow5(&mut self, n: u32) {
868 small::imul_pow5(self.data_mut(), n);
869 }
870
871 /// MulAssign by a power of 10.
872 #[inline]
imul_pow10(&mut self, n: u32)873 fn imul_pow10(&mut self, n: u32) {
874 self.imul_pow5(n);
875 self.imul_pow2(n);
876 }
877
878 // SHIFTS
879
880 /// Shift-left the entire buffer n bits.
881 #[inline]
ishl(&mut self, n: usize)882 fn ishl(&mut self, n: usize) {
883 small::ishl(self.data_mut(), n);
884 }
885 }
886