1 use crate::std_alloc::Vec;
2 use core::mem;
3 use core::ops::Shl;
4 use num_traits::{One, Zero};
5
6 use crate::big_digit::{self, BigDigit, DoubleBigDigit, SignedDoubleBigDigit};
7 use crate::biguint::BigUint;
8
9 struct MontyReducer {
10 n0inv: BigDigit,
11 }
12
13 // k0 = -m**-1 mod 2**BITS. Algorithm from: Dumas, J.G. "On Newton–Raphson
14 // Iteration for Multiplicative Inverses Modulo Prime Powers".
inv_mod_alt(b: BigDigit) -> BigDigit15 fn inv_mod_alt(b: BigDigit) -> BigDigit {
16 assert_ne!(b & 1, 0);
17
18 let mut k0 = 2 - b as SignedDoubleBigDigit;
19 let mut t = (b - 1) as SignedDoubleBigDigit;
20 let mut i = 1;
21 while i < big_digit::BITS {
22 t = t.wrapping_mul(t);
23 k0 = k0.wrapping_mul(t + 1);
24
25 i <<= 1;
26 }
27 -k0 as BigDigit
28 }
29
30 impl MontyReducer {
new(n: &BigUint) -> Self31 fn new(n: &BigUint) -> Self {
32 let n0inv = inv_mod_alt(n.data[0]);
33 MontyReducer { n0inv }
34 }
35 }
36
37 /// Computes z mod m = x * y * 2 ** (-n*_W) mod m
38 /// assuming k = -1/m mod 2**_W
39 /// See Gueron, "Efficient Software Implementations of Modular Exponentiation".
40 /// https://eprint.iacr.org/2011/239.pdf
41 /// In the terminology of that paper, this is an "Almost Montgomery Multiplication":
42 /// x and y are required to satisfy 0 <= z < 2**(n*_W) and then the result
43 /// z is guaranteed to satisfy 0 <= z < 2**(n*_W), but it may not be < m.
montgomery(x: &BigUint, y: &BigUint, m: &BigUint, k: BigDigit, n: usize) -> BigUint44 fn montgomery(x: &BigUint, y: &BigUint, m: &BigUint, k: BigDigit, n: usize) -> BigUint {
45 // This code assumes x, y, m are all the same length, n.
46 // (required by addMulVVW and the for loop).
47 // It also assumes that x, y are already reduced mod m,
48 // or else the result will not be properly reduced.
49 assert!(
50 x.data.len() == n && y.data.len() == n && m.data.len() == n,
51 "{:?} {:?} {:?} {}",
52 x,
53 y,
54 m,
55 n
56 );
57
58 let mut z = BigUint::zero();
59 z.data.resize(n * 2, 0);
60
61 let mut c: BigDigit = 0;
62 for i in 0..n {
63 let c2 = add_mul_vvw(&mut z.data[i..n + i], &x.data, y.data[i]);
64 let t = z.data[i].wrapping_mul(k);
65 let c3 = add_mul_vvw(&mut z.data[i..n + i], &m.data, t);
66 let cx = c.wrapping_add(c2);
67 let cy = cx.wrapping_add(c3);
68 z.data[n + i] = cy;
69 if cx < c2 || cy < c3 {
70 c = 1;
71 } else {
72 c = 0;
73 }
74 }
75
76 if c == 0 {
77 z.data = z.data[n..].to_vec();
78 } else {
79 {
80 let (mut first, second) = z.data.split_at_mut(n);
81 sub_vv(&mut first, &second, &m.data);
82 }
83 z.data = z.data[..n].to_vec();
84 }
85
86 z
87 }
88
89 #[inline(always)]
add_mul_vvw(z: &mut [BigDigit], x: &[BigDigit], y: BigDigit) -> BigDigit90 fn add_mul_vvw(z: &mut [BigDigit], x: &[BigDigit], y: BigDigit) -> BigDigit {
91 let mut c = 0;
92 for (zi, xi) in z.iter_mut().zip(x.iter()) {
93 let (z1, z0) = mul_add_www(*xi, y, *zi);
94 let (c_, zi_) = add_ww(z0, c, 0);
95 *zi = zi_;
96 c = c_ + z1;
97 }
98
99 c
100 }
101
102 /// The resulting carry c is either 0 or 1.
103 #[inline(always)]
sub_vv(z: &mut [BigDigit], x: &[BigDigit], y: &[BigDigit]) -> BigDigit104 fn sub_vv(z: &mut [BigDigit], x: &[BigDigit], y: &[BigDigit]) -> BigDigit {
105 let mut c = 0;
106 for (i, (xi, yi)) in x.iter().zip(y.iter()).enumerate().take(z.len()) {
107 let zi = xi.wrapping_sub(*yi).wrapping_sub(c);
108 z[i] = zi;
109 // see "Hacker's Delight", section 2-12 (overflow detection)
110 c = ((yi & !xi) | ((yi | !xi) & zi)) >> (big_digit::BITS - 1)
111 }
112
113 c
114 }
115
116 /// z1<<_W + z0 = x+y+c, with c == 0 or 1
117 #[inline(always)]
add_ww(x: BigDigit, y: BigDigit, c: BigDigit) -> (BigDigit, BigDigit)118 fn add_ww(x: BigDigit, y: BigDigit, c: BigDigit) -> (BigDigit, BigDigit) {
119 let yc = y.wrapping_add(c);
120 let z0 = x.wrapping_add(yc);
121 let z1 = if z0 < x || yc < y { 1 } else { 0 };
122
123 (z1, z0)
124 }
125
126 /// z1 << _W + z0 = x * y + c
127 #[inline(always)]
mul_add_www(x: BigDigit, y: BigDigit, c: BigDigit) -> (BigDigit, BigDigit)128 fn mul_add_www(x: BigDigit, y: BigDigit, c: BigDigit) -> (BigDigit, BigDigit) {
129 let z = x as DoubleBigDigit * y as DoubleBigDigit + c as DoubleBigDigit;
130 ((z >> big_digit::BITS) as BigDigit, z as BigDigit)
131 }
132
133 /// Calculates x ** y mod m using a fixed, 4-bit window.
monty_modpow(x: &BigUint, y: &BigUint, m: &BigUint) -> BigUint134 pub(crate) fn monty_modpow(x: &BigUint, y: &BigUint, m: &BigUint) -> BigUint {
135 assert!(m.data[0] & 1 == 1);
136 let mr = MontyReducer::new(m);
137 let num_words = m.data.len();
138
139 let mut x = x.clone();
140
141 // We want the lengths of x and m to be equal.
142 // It is OK if x >= m as long as len(x) == len(m).
143 if x.data.len() > num_words {
144 x %= m;
145 // Note: now len(x) <= numWords, not guaranteed ==.
146 }
147 if x.data.len() < num_words {
148 x.data.resize(num_words, 0);
149 }
150
151 // rr = 2**(2*_W*len(m)) mod m
152 let mut rr = BigUint::one();
153 rr = (rr.shl(2 * num_words as u64 * u64::from(big_digit::BITS))) % m;
154 if rr.data.len() < num_words {
155 rr.data.resize(num_words, 0);
156 }
157 // one = 1, with equal length to that of m
158 let mut one = BigUint::one();
159 one.data.resize(num_words, 0);
160
161 let n = 4;
162 // powers[i] contains x^i
163 let mut powers = Vec::with_capacity(1 << n);
164 powers.push(montgomery(&one, &rr, m, mr.n0inv, num_words));
165 powers.push(montgomery(&x, &rr, m, mr.n0inv, num_words));
166 for i in 2..1 << n {
167 let r = montgomery(&powers[i - 1], &powers[1], m, mr.n0inv, num_words);
168 powers.push(r);
169 }
170
171 // initialize z = 1 (Montgomery 1)
172 let mut z = powers[0].clone();
173 z.data.resize(num_words, 0);
174 let mut zz = BigUint::zero();
175 zz.data.resize(num_words, 0);
176
177 // same windowed exponent, but with Montgomery multiplications
178 for i in (0..y.data.len()).rev() {
179 let mut yi = y.data[i];
180 let mut j = 0;
181 while j < big_digit::BITS {
182 if i != y.data.len() - 1 || j != 0 {
183 zz = montgomery(&z, &z, m, mr.n0inv, num_words);
184 z = montgomery(&zz, &zz, m, mr.n0inv, num_words);
185 zz = montgomery(&z, &z, m, mr.n0inv, num_words);
186 z = montgomery(&zz, &zz, m, mr.n0inv, num_words);
187 }
188 zz = montgomery(
189 &z,
190 &powers[(yi >> (big_digit::BITS - n)) as usize],
191 m,
192 mr.n0inv,
193 num_words,
194 );
195 mem::swap(&mut z, &mut zz);
196 yi <<= n;
197 j += n;
198 }
199 }
200
201 // convert to regular number
202 zz = montgomery(&z, &one, m, mr.n0inv, num_words);
203
204 zz.normalize();
205 // One last reduction, just in case.
206 // See golang.org/issue/13907.
207 if zz >= *m {
208 // Common case is m has high bit set; in that case,
209 // since zz is the same length as m, there can be just
210 // one multiple of m to remove. Just subtract.
211 // We think that the subtract should be sufficient in general,
212 // so do that unconditionally, but double-check,
213 // in case our beliefs are wrong.
214 // The div is not expected to be reached.
215 zz -= m;
216 if zz >= *m {
217 zz %= m;
218 }
219 }
220
221 zz.normalize();
222 zz
223 }
224