1 /* sqrmod_bnm1.c -- squaring mod B^n-1.
2 
3    Contributed to the GNU project by Niels M�ller, Torbjorn Granlund and
4    Marco Bodrato.
5 
6    THE FUNCTIONS IN THIS FILE ARE INTERNAL WITH MUTABLE INTERFACES.  IT IS ONLY
7    SAFE TO REACH THEM THROUGH DOCUMENTED INTERFACES.  IN FACT, IT IS ALMOST
8    GUARANTEED THAT THEY WILL CHANGE OR DISAPPEAR IN A FUTURE GNU MP RELEASE.
9 
10 Copyright 2009, 2010 Free Software Foundation, Inc.
11 
12 This file is part of the GNU MP Library.
13 
14 The GNU MP Library is free software; you can redistribute it and/or modify
15 it under the terms of the GNU Lesser General Public License as published by
16 the Free Software Foundation; either version 3 of the License, or (at your
17 option) any later version.
18 
19 The GNU MP Library is distributed in the hope that it will be useful, but
20 WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
21 or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public
22 License for more details.
23 
24 You should have received a copy of the GNU Lesser General Public License
25 along with the GNU MP Library.  If not, see http://www.gnu.org/licenses/.  */
26 
27 
28 #include "gmp.h"
29 #include "gmp-impl.h"
30 #include "longlong.h"
31 
32 /* Input is {ap,rn}; output is {rp,rn}, computation is
33    mod B^rn - 1, and values are semi-normalised; zero is represented
34    as either 0 or B^n - 1.  Needs a scratch of 2rn limbs at tp.
35    tp==rp is allowed. */
36 static void
37 mpn_bc_sqrmod_bnm1 (mp_ptr rp, mp_srcptr ap, mp_size_t rn, mp_ptr tp)
38 {
39   mp_limb_t cy;
40 
41   ASSERT (0 < rn);
42 
43   mpn_sqr (tp, ap, rn);
44   cy = mpn_add_n (rp, tp, tp + rn, rn);
45   /* If cy == 1, then the value of rp is at most B^rn - 2, so there can
46    * be no overflow when adding in the carry. */
47   MPN_INCR_U (rp, rn, cy);
48 }
49 
50 
51 /* Input is {ap,rn+1}; output is {rp,rn+1}, in
52    semi-normalised representation, computation is mod B^rn + 1. Needs
53    a scratch area of 2rn + 2 limbs at tp; tp == rp is allowed.
54    Output is normalised. */
55 static void
56 mpn_bc_sqrmod_bnp1 (mp_ptr rp, mp_srcptr ap, mp_size_t rn, mp_ptr tp)
57 {
58   mp_limb_t cy;
59 
60   ASSERT (0 < rn);
61 
62   mpn_sqr (tp, ap, rn + 1);
63   ASSERT (tp[2*rn+1] == 0);
64   ASSERT (tp[2*rn] < GMP_NUMB_MAX);
65   cy = tp[2*rn] + mpn_sub_n (rp, tp, tp+rn, rn);
66   rp[rn] = 0;
67   MPN_INCR_U (rp, rn+1, cy );
68 }
69 
70 
71 /* Computes {rp,MIN(rn,2an)} <- {ap,an}^2 Mod(B^rn-1)
72  *
73  * The result is expected to be ZERO if and only if the operand
74  * already is. Otherwise the class [0] Mod(B^rn-1) is represented by
75  * B^rn-1.
76  * It should not be a problem if sqrmod_bnm1 is used to
77  * compute the full square with an <= 2*rn, because this condition
78  * implies (B^an-1)^2 < (B^rn-1) .
79  *
80  * Requires rn/4 < an <= rn
81  * Scratch need: rn/2 + (need for recursive call OR rn + 3). This gives
82  *
83  * S(n) <= rn/2 + MAX (rn + 4, S(n/2)) <= 3/2 rn + 4
84  */
85 void
86 mpn_sqrmod_bnm1 (mp_ptr rp, mp_size_t rn, mp_srcptr ap, mp_size_t an, mp_ptr tp)
87 {
88   ASSERT (0 < an);
89   ASSERT (an <= rn);
90 
91   if ((rn & 1) != 0 || BELOW_THRESHOLD (rn, SQRMOD_BNM1_THRESHOLD))
92     {
93       if (UNLIKELY (an < rn))
94 	{
95 	  if (UNLIKELY (2*an <= rn))
96 	    {
97 	      mpn_sqr (rp, ap, an);
98 	    }
99 	  else
100 	    {
101 	      mp_limb_t cy;
102 	      mpn_sqr (tp, ap, an);
103 	      cy = mpn_add (rp, tp, rn, tp + rn, 2*an - rn);
104 	      MPN_INCR_U (rp, rn, cy);
105 	    }
106 	}
107       else
108 	mpn_bc_sqrmod_bnm1 (rp, ap, rn, tp);
109     }
110   else
111     {
112       mp_size_t n;
113       mp_limb_t cy;
114       mp_limb_t hi;
115 
116       n = rn >> 1;
117 
118       ASSERT (2*an > n);
119 
120       /* Compute xm = a^2 mod (B^n - 1), xp = a^2 mod (B^n + 1)
121 	 and crt together as
122 
123 	 x = -xp * B^n + (B^n + 1) * [ (xp + xm)/2 mod (B^n-1)]
124       */
125 
126 #define a0 ap
127 #define a1 (ap + n)
128 
129 #define xp  tp	/* 2n + 2 */
130       /* am1  maybe in {xp, n} */
131 #define sp1 (tp + 2*n + 2)
132       /* ap1  maybe in {sp1, n + 1} */
133 
134       {
135 	mp_srcptr am1;
136 	mp_size_t anm;
137 	mp_ptr so;
138 
139 	if (LIKELY (an > n))
140 	  {
141 	    so = xp + n;
142 	    am1 = xp;
143 	    cy = mpn_add (xp, a0, n, a1, an - n);
144 	    MPN_INCR_U (xp, n, cy);
145 	    anm = n;
146 	  }
147 	else
148 	  {
149 	    so = xp;
150 	    am1 = a0;
151 	    anm = an;
152 	  }
153 
154 	mpn_sqrmod_bnm1 (rp, n, am1, anm, so);
155       }
156 
157       {
158 	int       k;
159 	mp_srcptr ap1;
160 	mp_size_t anp;
161 
162 	if (LIKELY (an > n)) {
163 	  ap1 = sp1;
164 	  cy = mpn_sub (sp1, a0, n, a1, an - n);
165 	  sp1[n] = 0;
166 	  MPN_INCR_U (sp1, n + 1, cy);
167 	  anp = n + ap1[n];
168 	} else {
169 	  ap1 = a0;
170 	  anp = an;
171 	}
172 
173 	if (BELOW_THRESHOLD (n, MUL_FFT_MODF_THRESHOLD))
174 	  k=0;
175 	else
176 	  {
177 	    int mask;
178 	    k = mpn_fft_best_k (n, 1);
179 	    mask = (1<<k) -1;
180 	    while (n & mask) {k--; mask >>=1;};
181 	  }
182 	if (k >= FFT_FIRST_K)
183 	  xp[n] = mpn_mul_fft (xp, n, ap1, anp, ap1, anp, k);
184 	else if (UNLIKELY (ap1 == a0))
185 	  {
186 	    ASSERT (anp <= n);
187 	    ASSERT (2*anp > n);
188 	    mpn_sqr (xp, a0, an);
189 	    anp = 2*an - n;
190 	    cy = mpn_sub (xp, xp, n, xp + n, anp);
191 	    xp[n] = 0;
192 	    MPN_INCR_U (xp, n+1, cy);
193 	  }
194 	else
195 	  mpn_bc_sqrmod_bnp1 (xp, ap1, n, xp);
196       }
197 
198       /* Here the CRT recomposition begins.
199 
200 	 xm <- (xp + xm)/2 = (xp + xm)B^n/2 mod (B^n-1)
201 	 Division by 2 is a bitwise rotation.
202 
203 	 Assumes xp normalised mod (B^n+1).
204 
205 	 The residue class [0] is represented by [B^n-1]; except when
206 	 both input are ZERO.
207       */
208 
209 #if HAVE_NATIVE_mpn_rsh1add_n || HAVE_NATIVE_mpn_rsh1add_nc
210 #if HAVE_NATIVE_mpn_rsh1add_nc
211       cy = mpn_rsh1add_nc(rp, rp, xp, n, xp[n]); /* B^n = 1 */
212       hi = cy << (GMP_NUMB_BITS - 1);
213       cy = 0;
214       /* next update of rp[n-1] will set cy = 1 only if rp[n-1]+=hi
215 	 overflows, i.e. a further increment will not overflow again. */
216 #else /* ! _nc */
217       cy = xp[n] + mpn_rsh1add_n(rp, rp, xp, n); /* B^n = 1 */
218       hi = (cy<<(GMP_NUMB_BITS-1))&GMP_NUMB_MASK; /* (cy&1) << ... */
219       cy >>= 1;
220       /* cy = 1 only if xp[n] = 1 i.e. {xp,n} = ZERO, this implies that
221 	 the rsh1add was a simple rshift: the top bit is 0. cy=1 => hi=0. */
222 #endif
223 #if GMP_NAIL_BITS == 0
224       add_ssaaaa(cy, rp[n-1], cy, rp[n-1], 0, hi);
225 #else
226       cy += (hi & rp[n-1]) >> (GMP_NUMB_BITS-1);
227       rp[n-1] ^= hi;
228 #endif
229 #else /* ! HAVE_NATIVE_mpn_rsh1add_n */
230 #if HAVE_NATIVE_mpn_add_nc
231       cy = mpn_add_nc(rp, rp, xp, n, xp[n]);
232 #else /* ! _nc */
233       cy = xp[n] + mpn_add_n(rp, rp, xp, n); /* xp[n] == 1 implies {xp,n} == ZERO */
234 #endif
235       cy += (rp[0]&1);
236       mpn_rshift(rp, rp, n, 1);
237       ASSERT (cy <= 2);
238       hi = (cy<<(GMP_NUMB_BITS-1))&GMP_NUMB_MASK; /* (cy&1) << ... */
239       cy >>= 1;
240       /* We can have cy != 0 only if hi = 0... */
241       ASSERT ((rp[n-1] & GMP_NUMB_HIGHBIT) == 0);
242       rp[n-1] |= hi;
243       /* ... rp[n-1] + cy can not overflow, the following INCR is correct. */
244 #endif
245       ASSERT (cy <= 1);
246       /* Next increment can not overflow, read the previous comments about cy. */
247       ASSERT ((cy == 0) || ((rp[n-1] & GMP_NUMB_HIGHBIT) == 0));
248       MPN_INCR_U(rp, n, cy);
249 
250       /* Compute the highest half:
251 	 ([(xp + xm)/2 mod (B^n-1)] - xp ) * B^n
252        */
253       if (UNLIKELY (2*an < rn))
254 	{
255 	  /* Note that in this case, the only way the result can equal
256 	     zero mod B^{rn} - 1 is if the input is zero, and
257 	     then the output of both the recursive calls and this CRT
258 	     reconstruction is zero, not B^{rn} - 1. */
259 	  cy = mpn_sub_n (rp + n, rp, xp, 2*an - n);
260 
261 	  /* FIXME: This subtraction of the high parts is not really
262 	     necessary, we do it to get the carry out, and for sanity
263 	     checking. */
264 	  cy = xp[n] + mpn_sub_nc (xp + 2*an - n, rp + 2*an - n,
265 				   xp + 2*an - n, rn - 2*an, cy);
266 	  ASSERT (mpn_zero_p (xp + 2*an - n+1, rn - 1 - 2*an));
267 	  cy = mpn_sub_1 (rp, rp, 2*an, cy);
268 	  ASSERT (cy == (xp + 2*an - n)[0]);
269 	}
270       else
271 	{
272 	  cy = xp[n] + mpn_sub_n (rp + n, rp, xp, n);
273 	  /* cy = 1 only if {xp,n+1} is not ZERO, i.e. {rp,n} is not ZERO.
274 	     DECR will affect _at most_ the lowest n limbs. */
275 	  MPN_DECR_U (rp, 2*n, cy);
276 	}
277 #undef a0
278 #undef a1
279 #undef xp
280 #undef sp1
281     }
282 }
283 
284 mp_size_t
285 mpn_sqrmod_bnm1_next_size (mp_size_t n)
286 {
287   mp_size_t nh;
288 
289   if (BELOW_THRESHOLD (n,     SQRMOD_BNM1_THRESHOLD))
290     return n;
291   if (BELOW_THRESHOLD (n, 4 * (SQRMOD_BNM1_THRESHOLD - 1) + 1))
292     return (n + (2-1)) & (-2);
293   if (BELOW_THRESHOLD (n, 8 * (SQRMOD_BNM1_THRESHOLD - 1) + 1))
294     return (n + (4-1)) & (-4);
295 
296   nh = (n + 1) >> 1;
297 
298   if (BELOW_THRESHOLD (nh, SQR_FFT_MODF_THRESHOLD))
299     return (n + (8-1)) & (-8);
300 
301   return 2 * mpn_fft_next_size (nh, mpn_fft_best_k (nh, 1));
302 }
303