1 /* sqrmod_bnm1.c -- squaring mod B^n-1.
2
3 Contributed to the GNU project by Niels Möller, Torbjorn Granlund and
4 Marco Bodrato.
5
6 THE FUNCTIONS IN THIS FILE ARE INTERNAL WITH MUTABLE INTERFACES. IT IS ONLY
7 SAFE TO REACH THEM THROUGH DOCUMENTED INTERFACES. IN FACT, IT IS ALMOST
8 GUARANTEED THAT THEY WILL CHANGE OR DISAPPEAR IN A FUTURE GNU MP RELEASE.
9
10 Copyright 2009, 2010, 2012 Free Software Foundation, Inc.
11
12 This file is part of the GNU MP Library.
13
14 The GNU MP Library is free software; you can redistribute it and/or modify
15 it under the terms of either:
16
17 * the GNU Lesser General Public License as published by the Free
18 Software Foundation; either version 3 of the License, or (at your
19 option) any later version.
20
21 or
22
23 * the GNU General Public License as published by the Free Software
24 Foundation; either version 2 of the License, or (at your option) any
25 later version.
26
27 or both in parallel, as here.
28
29 The GNU MP Library is distributed in the hope that it will be useful, but
30 WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
31 or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
32 for more details.
33
34 You should have received copies of the GNU General Public License and the
35 GNU Lesser General Public License along with the GNU MP Library. If not,
36 see https://www.gnu.org/licenses/. */
37
38
39 #include "gmp-impl.h"
40 #include "longlong.h"
41
42 /* Input is {ap,rn}; output is {rp,rn}, computation is
43 mod B^rn - 1, and values are semi-normalised; zero is represented
44 as either 0 or B^n - 1. Needs a scratch of 2rn limbs at tp.
45 tp==rp is allowed. */
46 static void
mpn_bc_sqrmod_bnm1(mp_ptr rp,mp_srcptr ap,mp_size_t rn,mp_ptr tp)47 mpn_bc_sqrmod_bnm1 (mp_ptr rp, mp_srcptr ap, mp_size_t rn, mp_ptr tp)
48 {
49 mp_limb_t cy;
50
51 ASSERT (0 < rn);
52
53 mpn_sqr (tp, ap, rn);
54 cy = mpn_add_n (rp, tp, tp + rn, rn);
55 /* If cy == 1, then the value of rp is at most B^rn - 2, so there can
56 * be no overflow when adding in the carry. */
57 MPN_INCR_U (rp, rn, cy);
58 }
59
60
61 /* Input is {ap,rn+1}; output is {rp,rn+1}, in
62 semi-normalised representation, computation is mod B^rn + 1. Needs
63 a scratch area of 2rn + 2 limbs at tp; tp == rp is allowed.
64 Output is normalised. */
65 static void
mpn_bc_sqrmod_bnp1(mp_ptr rp,mp_srcptr ap,mp_size_t rn,mp_ptr tp)66 mpn_bc_sqrmod_bnp1 (mp_ptr rp, mp_srcptr ap, mp_size_t rn, mp_ptr tp)
67 {
68 mp_limb_t cy;
69
70 ASSERT (0 < rn);
71
72 mpn_sqr (tp, ap, rn + 1);
73 ASSERT (tp[2*rn+1] == 0);
74 ASSERT (tp[2*rn] < GMP_NUMB_MAX);
75 cy = tp[2*rn] + mpn_sub_n (rp, tp, tp+rn, rn);
76 rp[rn] = 0;
77 MPN_INCR_U (rp, rn+1, cy);
78 }
79
80
81 /* Computes {rp,MIN(rn,2an)} <- {ap,an}^2 Mod(B^rn-1)
82 *
83 * The result is expected to be ZERO if and only if the operand
84 * already is. Otherwise the class [0] Mod(B^rn-1) is represented by
85 * B^rn-1.
86 * It should not be a problem if sqrmod_bnm1 is used to
87 * compute the full square with an <= 2*rn, because this condition
88 * implies (B^an-1)^2 < (B^rn-1) .
89 *
90 * Requires rn/4 < an <= rn
91 * Scratch need: rn/2 + (need for recursive call OR rn + 3). This gives
92 *
93 * S(n) <= rn/2 + MAX (rn + 4, S(n/2)) <= 3/2 rn + 4
94 */
95 void
mpn_sqrmod_bnm1(mp_ptr rp,mp_size_t rn,mp_srcptr ap,mp_size_t an,mp_ptr tp)96 mpn_sqrmod_bnm1 (mp_ptr rp, mp_size_t rn, mp_srcptr ap, mp_size_t an, mp_ptr tp)
97 {
98 ASSERT (0 < an);
99 ASSERT (an <= rn);
100
101 if ((rn & 1) != 0 || BELOW_THRESHOLD (rn, SQRMOD_BNM1_THRESHOLD))
102 {
103 if (UNLIKELY (an < rn))
104 {
105 if (UNLIKELY (2*an <= rn))
106 {
107 mpn_sqr (rp, ap, an);
108 }
109 else
110 {
111 mp_limb_t cy;
112 mpn_sqr (tp, ap, an);
113 cy = mpn_add (rp, tp, rn, tp + rn, 2*an - rn);
114 MPN_INCR_U (rp, rn, cy);
115 }
116 }
117 else
118 mpn_bc_sqrmod_bnm1 (rp, ap, rn, tp);
119 }
120 else
121 {
122 mp_size_t n;
123 mp_limb_t cy;
124 mp_limb_t hi;
125
126 n = rn >> 1;
127
128 ASSERT (2*an > n);
129
130 /* Compute xm = a^2 mod (B^n - 1), xp = a^2 mod (B^n + 1)
131 and crt together as
132
133 x = -xp * B^n + (B^n + 1) * [ (xp + xm)/2 mod (B^n-1)]
134 */
135
136 #define a0 ap
137 #define a1 (ap + n)
138
139 #define xp tp /* 2n + 2 */
140 /* am1 maybe in {xp, n} */
141 #define sp1 (tp + 2*n + 2)
142 /* ap1 maybe in {sp1, n + 1} */
143
144 {
145 mp_srcptr am1;
146 mp_size_t anm;
147 mp_ptr so;
148
149 if (LIKELY (an > n))
150 {
151 so = xp + n;
152 am1 = xp;
153 cy = mpn_add (xp, a0, n, a1, an - n);
154 MPN_INCR_U (xp, n, cy);
155 anm = n;
156 }
157 else
158 {
159 so = xp;
160 am1 = a0;
161 anm = an;
162 }
163
164 mpn_sqrmod_bnm1 (rp, n, am1, anm, so);
165 }
166
167 {
168 int k;
169 mp_srcptr ap1;
170 mp_size_t anp;
171
172 if (LIKELY (an > n)) {
173 ap1 = sp1;
174 cy = mpn_sub (sp1, a0, n, a1, an - n);
175 sp1[n] = 0;
176 MPN_INCR_U (sp1, n + 1, cy);
177 anp = n + ap1[n];
178 } else {
179 ap1 = a0;
180 anp = an;
181 }
182
183 if (BELOW_THRESHOLD (n, MUL_FFT_MODF_THRESHOLD))
184 k=0;
185 else
186 {
187 int mask;
188 k = mpn_fft_best_k (n, 1);
189 mask = (1<<k) -1;
190 while (n & mask) {k--; mask >>=1;};
191 }
192 if (k >= FFT_FIRST_K)
193 xp[n] = mpn_mul_fft (xp, n, ap1, anp, ap1, anp, k);
194 else if (UNLIKELY (ap1 == a0))
195 {
196 ASSERT (anp <= n);
197 ASSERT (2*anp > n);
198 mpn_sqr (xp, a0, an);
199 anp = 2*an - n;
200 cy = mpn_sub (xp, xp, n, xp + n, anp);
201 xp[n] = 0;
202 MPN_INCR_U (xp, n+1, cy);
203 }
204 else
205 mpn_bc_sqrmod_bnp1 (xp, ap1, n, xp);
206 }
207
208 /* Here the CRT recomposition begins.
209
210 xm <- (xp + xm)/2 = (xp + xm)B^n/2 mod (B^n-1)
211 Division by 2 is a bitwise rotation.
212
213 Assumes xp normalised mod (B^n+1).
214
215 The residue class [0] is represented by [B^n-1]; except when
216 both input are ZERO.
217 */
218
219 #if HAVE_NATIVE_mpn_rsh1add_n || HAVE_NATIVE_mpn_rsh1add_nc
220 #if HAVE_NATIVE_mpn_rsh1add_nc
221 cy = mpn_rsh1add_nc(rp, rp, xp, n, xp[n]); /* B^n = 1 */
222 hi = cy << (GMP_NUMB_BITS - 1);
223 cy = 0;
224 /* next update of rp[n-1] will set cy = 1 only if rp[n-1]+=hi
225 overflows, i.e. a further increment will not overflow again. */
226 #else /* ! _nc */
227 cy = xp[n] + mpn_rsh1add_n(rp, rp, xp, n); /* B^n = 1 */
228 hi = (cy<<(GMP_NUMB_BITS-1))&GMP_NUMB_MASK; /* (cy&1) << ... */
229 cy >>= 1;
230 /* cy = 1 only if xp[n] = 1 i.e. {xp,n} = ZERO, this implies that
231 the rsh1add was a simple rshift: the top bit is 0. cy=1 => hi=0. */
232 #endif
233 #if GMP_NAIL_BITS == 0
234 add_ssaaaa(cy, rp[n-1], cy, rp[n-1], CNST_LIMB(0), hi);
235 #else
236 cy += (hi & rp[n-1]) >> (GMP_NUMB_BITS-1);
237 rp[n-1] ^= hi;
238 #endif
239 #else /* ! HAVE_NATIVE_mpn_rsh1add_n */
240 #if HAVE_NATIVE_mpn_add_nc
241 cy = mpn_add_nc(rp, rp, xp, n, xp[n]);
242 #else /* ! _nc */
243 cy = xp[n] + mpn_add_n(rp, rp, xp, n); /* xp[n] == 1 implies {xp,n} == ZERO */
244 #endif
245 cy += (rp[0]&1);
246 mpn_rshift(rp, rp, n, 1);
247 ASSERT (cy <= 2);
248 hi = (cy<<(GMP_NUMB_BITS-1))&GMP_NUMB_MASK; /* (cy&1) << ... */
249 cy >>= 1;
250 /* We can have cy != 0 only if hi = 0... */
251 ASSERT ((rp[n-1] & GMP_NUMB_HIGHBIT) == 0);
252 rp[n-1] |= hi;
253 /* ... rp[n-1] + cy can not overflow, the following INCR is correct. */
254 #endif
255 ASSERT (cy <= 1);
256 /* Next increment can not overflow, read the previous comments about cy. */
257 ASSERT ((cy == 0) || ((rp[n-1] & GMP_NUMB_HIGHBIT) == 0));
258 MPN_INCR_U(rp, n, cy);
259
260 /* Compute the highest half:
261 ([(xp + xm)/2 mod (B^n-1)] - xp ) * B^n
262 */
263 if (UNLIKELY (2*an < rn))
264 {
265 /* Note that in this case, the only way the result can equal
266 zero mod B^{rn} - 1 is if the input is zero, and
267 then the output of both the recursive calls and this CRT
268 reconstruction is zero, not B^{rn} - 1. */
269 cy = mpn_sub_n (rp + n, rp, xp, 2*an - n);
270
271 /* FIXME: This subtraction of the high parts is not really
272 necessary, we do it to get the carry out, and for sanity
273 checking. */
274 cy = xp[n] + mpn_sub_nc (xp + 2*an - n, rp + 2*an - n,
275 xp + 2*an - n, rn - 2*an, cy);
276 ASSERT (mpn_zero_p (xp + 2*an - n+1, rn - 1 - 2*an));
277 cy = mpn_sub_1 (rp, rp, 2*an, cy);
278 ASSERT (cy == (xp + 2*an - n)[0]);
279 }
280 else
281 {
282 cy = xp[n] + mpn_sub_n (rp + n, rp, xp, n);
283 /* cy = 1 only if {xp,n+1} is not ZERO, i.e. {rp,n} is not ZERO.
284 DECR will affect _at most_ the lowest n limbs. */
285 MPN_DECR_U (rp, 2*n, cy);
286 }
287 #undef a0
288 #undef a1
289 #undef xp
290 #undef sp1
291 }
292 }
293
294 mp_size_t
mpn_sqrmod_bnm1_next_size(mp_size_t n)295 mpn_sqrmod_bnm1_next_size (mp_size_t n)
296 {
297 mp_size_t nh;
298
299 if (BELOW_THRESHOLD (n, SQRMOD_BNM1_THRESHOLD))
300 return n;
301 if (BELOW_THRESHOLD (n, 4 * (SQRMOD_BNM1_THRESHOLD - 1) + 1))
302 return (n + (2-1)) & (-2);
303 if (BELOW_THRESHOLD (n, 8 * (SQRMOD_BNM1_THRESHOLD - 1) + 1))
304 return (n + (4-1)) & (-4);
305
306 nh = (n + 1) >> 1;
307
308 if (BELOW_THRESHOLD (nh, SQR_FFT_MODF_THRESHOLD))
309 return (n + (8-1)) & (-8);
310
311 return 2 * mpn_fft_next_size (nh, mpn_fft_best_k (nh, 1));
312 }
313