1 /* sqrmod_bnm1.c -- squaring mod B^n-1.
2 
3    Contributed to the GNU project by Niels Möller, Torbjorn Granlund and
4    Marco Bodrato.
5 
6    THE FUNCTIONS IN THIS FILE ARE INTERNAL WITH MUTABLE INTERFACES.  IT IS ONLY
7    SAFE TO REACH THEM THROUGH DOCUMENTED INTERFACES.  IN FACT, IT IS ALMOST
8    GUARANTEED THAT THEY WILL CHANGE OR DISAPPEAR IN A FUTURE GNU MP RELEASE.
9 
10 Copyright 2009, 2010, 2012 Free Software Foundation, Inc.
11 
12 This file is part of the GNU MP Library.
13 
14 The GNU MP Library is free software; you can redistribute it and/or modify
15 it under the terms of either:
16 
17   * the GNU Lesser General Public License as published by the Free
18     Software Foundation; either version 3 of the License, or (at your
19     option) any later version.
20 
21 or
22 
23   * the GNU General Public License as published by the Free Software
24     Foundation; either version 2 of the License, or (at your option) any
25     later version.
26 
27 or both in parallel, as here.
28 
29 The GNU MP Library is distributed in the hope that it will be useful, but
30 WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
31 or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
32 for more details.
33 
34 You should have received copies of the GNU General Public License and the
35 GNU Lesser General Public License along with the GNU MP Library.  If not,
36 see https://www.gnu.org/licenses/.  */
37 
38 
39 #include "gmp-impl.h"
40 #include "longlong.h"
41 
42 /* Input is {ap,rn}; output is {rp,rn}, computation is
43    mod B^rn - 1, and values are semi-normalised; zero is represented
44    as either 0 or B^n - 1.  Needs a scratch of 2rn limbs at tp.
45    tp==rp is allowed. */
46 static void
mpn_bc_sqrmod_bnm1(mp_ptr rp,mp_srcptr ap,mp_size_t rn,mp_ptr tp)47 mpn_bc_sqrmod_bnm1 (mp_ptr rp, mp_srcptr ap, mp_size_t rn, mp_ptr tp)
48 {
49   mp_limb_t cy;
50 
51   ASSERT (0 < rn);
52 
53   mpn_sqr (tp, ap, rn);
54   cy = mpn_add_n (rp, tp, tp + rn, rn);
55   /* If cy == 1, then the value of rp is at most B^rn - 2, so there can
56    * be no overflow when adding in the carry. */
57   MPN_INCR_U (rp, rn, cy);
58 }
59 
60 
61 /* Input is {ap,rn+1}; output is {rp,rn+1}, in
62    semi-normalised representation, computation is mod B^rn + 1. Needs
63    a scratch area of 2rn + 2 limbs at tp; tp == rp is allowed.
64    Output is normalised. */
65 static void
mpn_bc_sqrmod_bnp1(mp_ptr rp,mp_srcptr ap,mp_size_t rn,mp_ptr tp)66 mpn_bc_sqrmod_bnp1 (mp_ptr rp, mp_srcptr ap, mp_size_t rn, mp_ptr tp)
67 {
68   mp_limb_t cy;
69 
70   ASSERT (0 < rn);
71 
72   mpn_sqr (tp, ap, rn + 1);
73   ASSERT (tp[2*rn+1] == 0);
74   ASSERT (tp[2*rn] < GMP_NUMB_MAX);
75   cy = tp[2*rn] + mpn_sub_n (rp, tp, tp+rn, rn);
76   rp[rn] = 0;
77   MPN_INCR_U (rp, rn+1, cy);
78 }
79 
80 
81 /* Computes {rp,MIN(rn,2an)} <- {ap,an}^2 Mod(B^rn-1)
82  *
83  * The result is expected to be ZERO if and only if the operand
84  * already is. Otherwise the class [0] Mod(B^rn-1) is represented by
85  * B^rn-1.
86  * It should not be a problem if sqrmod_bnm1 is used to
87  * compute the full square with an <= 2*rn, because this condition
88  * implies (B^an-1)^2 < (B^rn-1) .
89  *
90  * Requires rn/4 < an <= rn
91  * Scratch need: rn/2 + (need for recursive call OR rn + 3). This gives
92  *
93  * S(n) <= rn/2 + MAX (rn + 4, S(n/2)) <= 3/2 rn + 4
94  */
95 void
mpn_sqrmod_bnm1(mp_ptr rp,mp_size_t rn,mp_srcptr ap,mp_size_t an,mp_ptr tp)96 mpn_sqrmod_bnm1 (mp_ptr rp, mp_size_t rn, mp_srcptr ap, mp_size_t an, mp_ptr tp)
97 {
98   ASSERT (0 < an);
99   ASSERT (an <= rn);
100 
101   if ((rn & 1) != 0 || BELOW_THRESHOLD (rn, SQRMOD_BNM1_THRESHOLD))
102     {
103       if (UNLIKELY (an < rn))
104 	{
105 	  if (UNLIKELY (2*an <= rn))
106 	    {
107 	      mpn_sqr (rp, ap, an);
108 	    }
109 	  else
110 	    {
111 	      mp_limb_t cy;
112 	      mpn_sqr (tp, ap, an);
113 	      cy = mpn_add (rp, tp, rn, tp + rn, 2*an - rn);
114 	      MPN_INCR_U (rp, rn, cy);
115 	    }
116 	}
117       else
118 	mpn_bc_sqrmod_bnm1 (rp, ap, rn, tp);
119     }
120   else
121     {
122       mp_size_t n;
123       mp_limb_t cy;
124       mp_limb_t hi;
125 
126       n = rn >> 1;
127 
128       ASSERT (2*an > n);
129 
130       /* Compute xm = a^2 mod (B^n - 1), xp = a^2 mod (B^n + 1)
131 	 and crt together as
132 
133 	 x = -xp * B^n + (B^n + 1) * [ (xp + xm)/2 mod (B^n-1)]
134       */
135 
136 #define a0 ap
137 #define a1 (ap + n)
138 
139 #define xp  tp	/* 2n + 2 */
140       /* am1  maybe in {xp, n} */
141 #define sp1 (tp + 2*n + 2)
142       /* ap1  maybe in {sp1, n + 1} */
143 
144       {
145 	mp_srcptr am1;
146 	mp_size_t anm;
147 	mp_ptr so;
148 
149 	if (LIKELY (an > n))
150 	  {
151 	    so = xp + n;
152 	    am1 = xp;
153 	    cy = mpn_add (xp, a0, n, a1, an - n);
154 	    MPN_INCR_U (xp, n, cy);
155 	    anm = n;
156 	  }
157 	else
158 	  {
159 	    so = xp;
160 	    am1 = a0;
161 	    anm = an;
162 	  }
163 
164 	mpn_sqrmod_bnm1 (rp, n, am1, anm, so);
165       }
166 
167       {
168 	int       k;
169 	mp_srcptr ap1;
170 	mp_size_t anp;
171 
172 	if (LIKELY (an > n)) {
173 	  ap1 = sp1;
174 	  cy = mpn_sub (sp1, a0, n, a1, an - n);
175 	  sp1[n] = 0;
176 	  MPN_INCR_U (sp1, n + 1, cy);
177 	  anp = n + ap1[n];
178 	} else {
179 	  ap1 = a0;
180 	  anp = an;
181 	}
182 
183 	if (BELOW_THRESHOLD (n, MUL_FFT_MODF_THRESHOLD))
184 	  k=0;
185 	else
186 	  {
187 	    int mask;
188 	    k = mpn_fft_best_k (n, 1);
189 	    mask = (1<<k) -1;
190 	    while (n & mask) {k--; mask >>=1;};
191 	  }
192 	if (k >= FFT_FIRST_K)
193 	  xp[n] = mpn_mul_fft (xp, n, ap1, anp, ap1, anp, k);
194 	else if (UNLIKELY (ap1 == a0))
195 	  {
196 	    ASSERT (anp <= n);
197 	    ASSERT (2*anp > n);
198 	    mpn_sqr (xp, a0, an);
199 	    anp = 2*an - n;
200 	    cy = mpn_sub (xp, xp, n, xp + n, anp);
201 	    xp[n] = 0;
202 	    MPN_INCR_U (xp, n+1, cy);
203 	  }
204 	else
205 	  mpn_bc_sqrmod_bnp1 (xp, ap1, n, xp);
206       }
207 
208       /* Here the CRT recomposition begins.
209 
210 	 xm <- (xp + xm)/2 = (xp + xm)B^n/2 mod (B^n-1)
211 	 Division by 2 is a bitwise rotation.
212 
213 	 Assumes xp normalised mod (B^n+1).
214 
215 	 The residue class [0] is represented by [B^n-1]; except when
216 	 both input are ZERO.
217       */
218 
219 #if HAVE_NATIVE_mpn_rsh1add_n || HAVE_NATIVE_mpn_rsh1add_nc
220 #if HAVE_NATIVE_mpn_rsh1add_nc
221       cy = mpn_rsh1add_nc(rp, rp, xp, n, xp[n]); /* B^n = 1 */
222       hi = cy << (GMP_NUMB_BITS - 1);
223       cy = 0;
224       /* next update of rp[n-1] will set cy = 1 only if rp[n-1]+=hi
225 	 overflows, i.e. a further increment will not overflow again. */
226 #else /* ! _nc */
227       cy = xp[n] + mpn_rsh1add_n(rp, rp, xp, n); /* B^n = 1 */
228       hi = (cy<<(GMP_NUMB_BITS-1))&GMP_NUMB_MASK; /* (cy&1) << ... */
229       cy >>= 1;
230       /* cy = 1 only if xp[n] = 1 i.e. {xp,n} = ZERO, this implies that
231 	 the rsh1add was a simple rshift: the top bit is 0. cy=1 => hi=0. */
232 #endif
233 #if GMP_NAIL_BITS == 0
234       add_ssaaaa(cy, rp[n-1], cy, rp[n-1], CNST_LIMB(0), hi);
235 #else
236       cy += (hi & rp[n-1]) >> (GMP_NUMB_BITS-1);
237       rp[n-1] ^= hi;
238 #endif
239 #else /* ! HAVE_NATIVE_mpn_rsh1add_n */
240 #if HAVE_NATIVE_mpn_add_nc
241       cy = mpn_add_nc(rp, rp, xp, n, xp[n]);
242 #else /* ! _nc */
243       cy = xp[n] + mpn_add_n(rp, rp, xp, n); /* xp[n] == 1 implies {xp,n} == ZERO */
244 #endif
245       cy += (rp[0]&1);
246       mpn_rshift(rp, rp, n, 1);
247       ASSERT (cy <= 2);
248       hi = (cy<<(GMP_NUMB_BITS-1))&GMP_NUMB_MASK; /* (cy&1) << ... */
249       cy >>= 1;
250       /* We can have cy != 0 only if hi = 0... */
251       ASSERT ((rp[n-1] & GMP_NUMB_HIGHBIT) == 0);
252       rp[n-1] |= hi;
253       /* ... rp[n-1] + cy can not overflow, the following INCR is correct. */
254 #endif
255       ASSERT (cy <= 1);
256       /* Next increment can not overflow, read the previous comments about cy. */
257       ASSERT ((cy == 0) || ((rp[n-1] & GMP_NUMB_HIGHBIT) == 0));
258       MPN_INCR_U(rp, n, cy);
259 
260       /* Compute the highest half:
261 	 ([(xp + xm)/2 mod (B^n-1)] - xp ) * B^n
262        */
263       if (UNLIKELY (2*an < rn))
264 	{
265 	  /* Note that in this case, the only way the result can equal
266 	     zero mod B^{rn} - 1 is if the input is zero, and
267 	     then the output of both the recursive calls and this CRT
268 	     reconstruction is zero, not B^{rn} - 1. */
269 	  cy = mpn_sub_n (rp + n, rp, xp, 2*an - n);
270 
271 	  /* FIXME: This subtraction of the high parts is not really
272 	     necessary, we do it to get the carry out, and for sanity
273 	     checking. */
274 	  cy = xp[n] + mpn_sub_nc (xp + 2*an - n, rp + 2*an - n,
275 				   xp + 2*an - n, rn - 2*an, cy);
276 	  ASSERT (mpn_zero_p (xp + 2*an - n+1, rn - 1 - 2*an));
277 	  cy = mpn_sub_1 (rp, rp, 2*an, cy);
278 	  ASSERT (cy == (xp + 2*an - n)[0]);
279 	}
280       else
281 	{
282 	  cy = xp[n] + mpn_sub_n (rp + n, rp, xp, n);
283 	  /* cy = 1 only if {xp,n+1} is not ZERO, i.e. {rp,n} is not ZERO.
284 	     DECR will affect _at most_ the lowest n limbs. */
285 	  MPN_DECR_U (rp, 2*n, cy);
286 	}
287 #undef a0
288 #undef a1
289 #undef xp
290 #undef sp1
291     }
292 }
293 
294 mp_size_t
mpn_sqrmod_bnm1_next_size(mp_size_t n)295 mpn_sqrmod_bnm1_next_size (mp_size_t n)
296 {
297   mp_size_t nh;
298 
299   if (BELOW_THRESHOLD (n,     SQRMOD_BNM1_THRESHOLD))
300     return n;
301   if (BELOW_THRESHOLD (n, 4 * (SQRMOD_BNM1_THRESHOLD - 1) + 1))
302     return (n + (2-1)) & (-2);
303   if (BELOW_THRESHOLD (n, 8 * (SQRMOD_BNM1_THRESHOLD - 1) + 1))
304     return (n + (4-1)) & (-4);
305 
306   nh = (n + 1) >> 1;
307 
308   if (BELOW_THRESHOLD (nh, SQR_FFT_MODF_THRESHOLD))
309     return (n + (8-1)) & (-8);
310 
311   return 2 * mpn_fft_next_size (nh, mpn_fft_best_k (nh, 1));
312 }
313