1 /* sqrmod_bnm1.c -- squaring mod B^n-1.
2 
3    Contributed to the GNU project by Niels Möller, Torbjorn Granlund and
4    Marco Bodrato.
5 
6    THE FUNCTIONS IN THIS FILE ARE INTERNAL WITH MUTABLE INTERFACES.  IT IS ONLY
7    SAFE TO REACH THEM THROUGH DOCUMENTED INTERFACES.  IN FACT, IT IS ALMOST
8    GUARANTEED THAT THEY WILL CHANGE OR DISAPPEAR IN A FUTURE GNU MP RELEASE.
9 
10 Copyright 2009, 2010, 2012 Free Software Foundation, Inc.
11 
12 This file is part of the GNU MP Library.
13 
14 The GNU MP Library is free software; you can redistribute it and/or modify
15 it under the terms of either:
16 
17   * the GNU Lesser General Public License as published by the Free
18     Software Foundation; either version 3 of the License, or (at your
19     option) any later version.
20 
21 or
22 
23   * the GNU General Public License as published by the Free Software
24     Foundation; either version 2 of the License, or (at your option) any
25     later version.
26 
27 or both in parallel, as here.
28 
29 The GNU MP Library is distributed in the hope that it will be useful, but
30 WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
31 or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
32 for more details.
33 
34 You should have received copies of the GNU General Public License and the
35 GNU Lesser General Public License along with the GNU MP Library.  If not,
36 see https://www.gnu.org/licenses/.  */
37 
38 
39 #include "gmp.h"
40 #include "gmp-impl.h"
41 #include "longlong.h"
42 
43 /* Input is {ap,rn}; output is {rp,rn}, computation is
44    mod B^rn - 1, and values are semi-normalised; zero is represented
45    as either 0 or B^n - 1.  Needs a scratch of 2rn limbs at tp.
46    tp==rp is allowed. */
47 static void
mpn_bc_sqrmod_bnm1(mp_ptr rp,mp_srcptr ap,mp_size_t rn,mp_ptr tp)48 mpn_bc_sqrmod_bnm1 (mp_ptr rp, mp_srcptr ap, mp_size_t rn, mp_ptr tp)
49 {
50   mp_limb_t cy;
51 
52   ASSERT (0 < rn);
53 
54   mpn_sqr (tp, ap, rn);
55   cy = mpn_add_n (rp, tp, tp + rn, rn);
56   /* If cy == 1, then the value of rp is at most B^rn - 2, so there can
57    * be no overflow when adding in the carry. */
58   MPN_INCR_U (rp, rn, cy);
59 }
60 
61 
62 /* Input is {ap,rn+1}; output is {rp,rn+1}, in
63    semi-normalised representation, computation is mod B^rn + 1. Needs
64    a scratch area of 2rn + 2 limbs at tp; tp == rp is allowed.
65    Output is normalised. */
66 static void
mpn_bc_sqrmod_bnp1(mp_ptr rp,mp_srcptr ap,mp_size_t rn,mp_ptr tp)67 mpn_bc_sqrmod_bnp1 (mp_ptr rp, mp_srcptr ap, mp_size_t rn, mp_ptr tp)
68 {
69   mp_limb_t cy;
70 
71   ASSERT (0 < rn);
72 
73   mpn_sqr (tp, ap, rn + 1);
74   ASSERT (tp[2*rn+1] == 0);
75   ASSERT (tp[2*rn] < GMP_NUMB_MAX);
76   cy = tp[2*rn] + mpn_sub_n (rp, tp, tp+rn, rn);
77   rp[rn] = 0;
78   MPN_INCR_U (rp, rn+1, cy );
79 }
80 
81 
82 /* Computes {rp,MIN(rn,2an)} <- {ap,an}^2 Mod(B^rn-1)
83  *
84  * The result is expected to be ZERO if and only if the operand
85  * already is. Otherwise the class [0] Mod(B^rn-1) is represented by
86  * B^rn-1.
87  * It should not be a problem if sqrmod_bnm1 is used to
88  * compute the full square with an <= 2*rn, because this condition
89  * implies (B^an-1)^2 < (B^rn-1) .
90  *
91  * Requires rn/4 < an <= rn
92  * Scratch need: rn/2 + (need for recursive call OR rn + 3). This gives
93  *
94  * S(n) <= rn/2 + MAX (rn + 4, S(n/2)) <= 3/2 rn + 4
95  */
96 void
mpn_sqrmod_bnm1(mp_ptr rp,mp_size_t rn,mp_srcptr ap,mp_size_t an,mp_ptr tp)97 mpn_sqrmod_bnm1 (mp_ptr rp, mp_size_t rn, mp_srcptr ap, mp_size_t an, mp_ptr tp)
98 {
99   ASSERT (0 < an);
100   ASSERT (an <= rn);
101 
102   if ((rn & 1) != 0 || BELOW_THRESHOLD (rn, SQRMOD_BNM1_THRESHOLD))
103     {
104       if (UNLIKELY (an < rn))
105 	{
106 	  if (UNLIKELY (2*an <= rn))
107 	    {
108 	      mpn_sqr (rp, ap, an);
109 	    }
110 	  else
111 	    {
112 	      mp_limb_t cy;
113 	      mpn_sqr (tp, ap, an);
114 	      cy = mpn_add (rp, tp, rn, tp + rn, 2*an - rn);
115 	      MPN_INCR_U (rp, rn, cy);
116 	    }
117 	}
118       else
119 	mpn_bc_sqrmod_bnm1 (rp, ap, rn, tp);
120     }
121   else
122     {
123       mp_size_t n;
124       mp_limb_t cy;
125       mp_limb_t hi;
126 
127       n = rn >> 1;
128 
129       ASSERT (2*an > n);
130 
131       /* Compute xm = a^2 mod (B^n - 1), xp = a^2 mod (B^n + 1)
132 	 and crt together as
133 
134 	 x = -xp * B^n + (B^n + 1) * [ (xp + xm)/2 mod (B^n-1)]
135       */
136 
137 #define a0 ap
138 #define a1 (ap + n)
139 
140 #define xp  tp	/* 2n + 2 */
141       /* am1  maybe in {xp, n} */
142 #define sp1 (tp + 2*n + 2)
143       /* ap1  maybe in {sp1, n + 1} */
144 
145       {
146 	mp_srcptr am1;
147 	mp_size_t anm;
148 	mp_ptr so;
149 
150 	if (LIKELY (an > n))
151 	  {
152 	    so = xp + n;
153 	    am1 = xp;
154 	    cy = mpn_add (xp, a0, n, a1, an - n);
155 	    MPN_INCR_U (xp, n, cy);
156 	    anm = n;
157 	  }
158 	else
159 	  {
160 	    so = xp;
161 	    am1 = a0;
162 	    anm = an;
163 	  }
164 
165 	mpn_sqrmod_bnm1 (rp, n, am1, anm, so);
166       }
167 
168       {
169 	int       k;
170 	mp_srcptr ap1;
171 	mp_size_t anp;
172 
173 	if (LIKELY (an > n)) {
174 	  ap1 = sp1;
175 	  cy = mpn_sub (sp1, a0, n, a1, an - n);
176 	  sp1[n] = 0;
177 	  MPN_INCR_U (sp1, n + 1, cy);
178 	  anp = n + ap1[n];
179 	} else {
180 	  ap1 = a0;
181 	  anp = an;
182 	}
183 
184 	if (BELOW_THRESHOLD (n, MUL_FFT_MODF_THRESHOLD))
185 	  k=0;
186 	else
187 	  {
188 	    int mask;
189 	    k = mpn_fft_best_k (n, 1);
190 	    mask = (1<<k) -1;
191 	    while (n & mask) {k--; mask >>=1;};
192 	  }
193 	if (k >= FFT_FIRST_K)
194 	  xp[n] = mpn_mul_fft (xp, n, ap1, anp, ap1, anp, k);
195 	else if (UNLIKELY (ap1 == a0))
196 	  {
197 	    ASSERT (anp <= n);
198 	    ASSERT (2*anp > n);
199 	    mpn_sqr (xp, a0, an);
200 	    anp = 2*an - n;
201 	    cy = mpn_sub (xp, xp, n, xp + n, anp);
202 	    xp[n] = 0;
203 	    MPN_INCR_U (xp, n+1, cy);
204 	  }
205 	else
206 	  mpn_bc_sqrmod_bnp1 (xp, ap1, n, xp);
207       }
208 
209       /* Here the CRT recomposition begins.
210 
211 	 xm <- (xp + xm)/2 = (xp + xm)B^n/2 mod (B^n-1)
212 	 Division by 2 is a bitwise rotation.
213 
214 	 Assumes xp normalised mod (B^n+1).
215 
216 	 The residue class [0] is represented by [B^n-1]; except when
217 	 both input are ZERO.
218       */
219 
220 #if HAVE_NATIVE_mpn_rsh1add_n || HAVE_NATIVE_mpn_rsh1add_nc
221 #if HAVE_NATIVE_mpn_rsh1add_nc
222       cy = mpn_rsh1add_nc(rp, rp, xp, n, xp[n]); /* B^n = 1 */
223       hi = cy << (GMP_NUMB_BITS - 1);
224       cy = 0;
225       /* next update of rp[n-1] will set cy = 1 only if rp[n-1]+=hi
226 	 overflows, i.e. a further increment will not overflow again. */
227 #else /* ! _nc */
228       cy = xp[n] + mpn_rsh1add_n(rp, rp, xp, n); /* B^n = 1 */
229       hi = (cy<<(GMP_NUMB_BITS-1))&GMP_NUMB_MASK; /* (cy&1) << ... */
230       cy >>= 1;
231       /* cy = 1 only if xp[n] = 1 i.e. {xp,n} = ZERO, this implies that
232 	 the rsh1add was a simple rshift: the top bit is 0. cy=1 => hi=0. */
233 #endif
234 #if GMP_NAIL_BITS == 0
235       add_ssaaaa(cy, rp[n-1], cy, rp[n-1], CNST_LIMB(0), hi);
236 #else
237       cy += (hi & rp[n-1]) >> (GMP_NUMB_BITS-1);
238       rp[n-1] ^= hi;
239 #endif
240 #else /* ! HAVE_NATIVE_mpn_rsh1add_n */
241 #if HAVE_NATIVE_mpn_add_nc
242       cy = mpn_add_nc(rp, rp, xp, n, xp[n]);
243 #else /* ! _nc */
244       cy = xp[n] + mpn_add_n(rp, rp, xp, n); /* xp[n] == 1 implies {xp,n} == ZERO */
245 #endif
246       cy += (rp[0]&1);
247       mpn_rshift(rp, rp, n, 1);
248       ASSERT (cy <= 2);
249       hi = (cy<<(GMP_NUMB_BITS-1))&GMP_NUMB_MASK; /* (cy&1) << ... */
250       cy >>= 1;
251       /* We can have cy != 0 only if hi = 0... */
252       ASSERT ((rp[n-1] & GMP_NUMB_HIGHBIT) == 0);
253       rp[n-1] |= hi;
254       /* ... rp[n-1] + cy can not overflow, the following INCR is correct. */
255 #endif
256       ASSERT (cy <= 1);
257       /* Next increment can not overflow, read the previous comments about cy. */
258       ASSERT ((cy == 0) || ((rp[n-1] & GMP_NUMB_HIGHBIT) == 0));
259       MPN_INCR_U(rp, n, cy);
260 
261       /* Compute the highest half:
262 	 ([(xp + xm)/2 mod (B^n-1)] - xp ) * B^n
263        */
264       if (UNLIKELY (2*an < rn))
265 	{
266 	  /* Note that in this case, the only way the result can equal
267 	     zero mod B^{rn} - 1 is if the input is zero, and
268 	     then the output of both the recursive calls and this CRT
269 	     reconstruction is zero, not B^{rn} - 1. */
270 	  cy = mpn_sub_n (rp + n, rp, xp, 2*an - n);
271 
272 	  /* FIXME: This subtraction of the high parts is not really
273 	     necessary, we do it to get the carry out, and for sanity
274 	     checking. */
275 	  cy = xp[n] + mpn_sub_nc (xp + 2*an - n, rp + 2*an - n,
276 				   xp + 2*an - n, rn - 2*an, cy);
277 	  ASSERT (mpn_zero_p (xp + 2*an - n+1, rn - 1 - 2*an));
278 	  cy = mpn_sub_1 (rp, rp, 2*an, cy);
279 	  ASSERT (cy == (xp + 2*an - n)[0]);
280 	}
281       else
282 	{
283 	  cy = xp[n] + mpn_sub_n (rp + n, rp, xp, n);
284 	  /* cy = 1 only if {xp,n+1} is not ZERO, i.e. {rp,n} is not ZERO.
285 	     DECR will affect _at most_ the lowest n limbs. */
286 	  MPN_DECR_U (rp, 2*n, cy);
287 	}
288 #undef a0
289 #undef a1
290 #undef xp
291 #undef sp1
292     }
293 }
294 
295 mp_size_t
mpn_sqrmod_bnm1_next_size(mp_size_t n)296 mpn_sqrmod_bnm1_next_size (mp_size_t n)
297 {
298   mp_size_t nh;
299 
300   if (BELOW_THRESHOLD (n,     SQRMOD_BNM1_THRESHOLD))
301     return n;
302   if (BELOW_THRESHOLD (n, 4 * (SQRMOD_BNM1_THRESHOLD - 1) + 1))
303     return (n + (2-1)) & (-2);
304   if (BELOW_THRESHOLD (n, 8 * (SQRMOD_BNM1_THRESHOLD - 1) + 1))
305     return (n + (4-1)) & (-4);
306 
307   nh = (n + 1) >> 1;
308 
309   if (BELOW_THRESHOLD (nh, SQR_FFT_MODF_THRESHOLD))
310     return (n + (8-1)) & (-8);
311 
312   return 2 * mpn_fft_next_size (nh, mpn_fft_best_k (nh, 1));
313 }
314