1 /* hgcd_appr.c.
2 
3    THE FUNCTIONS IN THIS FILE ARE INTERNAL WITH MUTABLE INTERFACES.  IT IS ONLY
4    SAFE TO REACH THEM THROUGH DOCUMENTED INTERFACES.  IN FACT, IT IS ALMOST
5    GUARANTEED THAT THEY'LL CHANGE OR DISAPPEAR IN A FUTURE GNU MP RELEASE.
6 
7 Copyright 2011, 2012 Free Software Foundation, Inc.
8 
9 This file is part of the GNU MP Library.
10 
11 The GNU MP Library is free software; you can redistribute it and/or modify
12 it under the terms of the GNU Lesser General Public License as published by
13 the Free Software Foundation; either version 3 of the License, or (at your
14 option) any later version.
15 
16 The GNU MP Library is distributed in the hope that it will be useful, but
17 WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
18 or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public
19 License for more details.
20 
21 You should have received a copy of the GNU Lesser General Public License
22 along with the GNU MP Library.  If not, see http://www.gnu.org/licenses/.  */
23 
24 #include "mpir.h"
25 #include "gmp-impl.h"
26 #include "longlong.h"
27 
28 /* Identical to mpn_hgcd_itch. FIXME: Do we really need to add
29    HGCD_THRESHOLD at the end? */
30 mp_size_t
mpn_hgcd_appr_itch(mp_size_t n)31 mpn_hgcd_appr_itch (mp_size_t n)
32 {
33   if (BELOW_THRESHOLD (n, HGCD_APPR_THRESHOLD))
34     return n;
35   else
36     {
37       unsigned k;
38       int count;
39       mp_size_t nscaled;
40 
41       /* Get the recursion depth. */
42       nscaled = (n - 1) / (HGCD_APPR_THRESHOLD - 1);
43       count_leading_zeros (count, nscaled);
44       k = GMP_LIMB_BITS - count;
45 
46       return 20 * ((n+3) / 4) + 22 * k + HGCD_THRESHOLD;
47     }
48 }
49 
50 /* Destroys inputs. */
51 int
mpn_hgcd_appr(mp_ptr ap,mp_ptr bp,mp_size_t n,struct hgcd_matrix * M,mp_ptr tp)52 mpn_hgcd_appr (mp_ptr ap, mp_ptr bp, mp_size_t n,
53 	       struct hgcd_matrix *M, mp_ptr tp)
54 {
55   mp_size_t s;
56   int success = 0;
57 
58   ASSERT (n > 0);
59 
60   ASSERT ((ap[n-1] | bp[n-1]) != 0);
61 
62   if (n <= 2)
63     /* Implies s = n. A fairly uninteresting case but exercised by the
64        random inputs of the testsuite. */
65     return 0;
66 
67   ASSERT ((n+1)/2 - 1 < M->alloc);
68 
69   /* We aim for reduction of to GMP_NUMB_BITS * s bits. But each time
70      we discard some of the least significant limbs, we must keep one
71      additional bit to account for the truncation error. We maintain
72      the GMP_NUMB_BITS * s - extra_bits as the current target size. */
73 
74   s = n/2 + 1;
75   if (BELOW_THRESHOLD (n, HGCD_APPR_THRESHOLD))
76     {
77       unsigned extra_bits = 0;
78 
79       while (n > 2)
80 	{
81 	  mp_size_t nn;
82 
83 	  ASSERT (n > s);
84 	  ASSERT (n <= 2*s);
85 
86 	  nn = mpn_hgcd_step (n, ap, bp, s, M, tp);
87 	  if (!nn)
88 	    break;
89 
90 	  n = nn;
91 	  success = 1;
92 
93 	  /* We can truncate and discard the lower p bits whenever nbits <=
94 	     2*sbits - p. To account for the truncation error, we must
95 	     adjust
96 
97 	     sbits <-- sbits + 1 - p,
98 
99 	     rather than just sbits <-- sbits - p. This adjustment makes
100 	     the produced matrix sligthly smaller than it could be. */
101 
102 	  if (GMP_NUMB_BITS * (n + 1) + 2 * extra_bits <= 2*GMP_NUMB_BITS * s)
103 	    {
104 	      mp_size_t p = (GMP_NUMB_BITS * (2*s - n) - 2*extra_bits) / GMP_NUMB_BITS;
105 
106 	      if (extra_bits == 0)
107 		{
108 		  /* We cross a limb boundary and bump s. We can't do that
109 		     if the result is that it makes makes min(U, V)
110 		     smaller than 2^{GMP_NUMB_BITS} s. */
111 		  if (s + 1 == n
112 		      || mpn_zero_p (ap + s + 1, n - s - 1)
113 		      || mpn_zero_p (bp + s + 1, n - s - 1))
114 		    continue;
115 
116 		  extra_bits = GMP_NUMB_BITS - 1;
117 		  s++;
118 		}
119 	      else
120 		{
121 		  extra_bits--;
122 		}
123 
124 	      /* Drop the p least significant limbs */
125 	      ap += p; bp += p; n -= p; s -= p;
126 	    }
127 	}
128 
129       ASSERT (s > 0);
130 
131       if (extra_bits > 0)
132 	{
133 	  /* We can get here only of we have dropped at least one of the
134 	     least significant bits, so we can decrement ap and bp. We can
135 	     then shift left extra bits using mpn_shiftr. */
136 	  /* NOTE: In the unlikely case that n is large, it would be
137 	     preferable to do an initial subdiv step to reduce the size
138 	     before shifting, but that would mean daplicating
139 	     mpn_gcd_subdiv_step with a bit count rather than a limb
140 	     count. */
141 	  ap--; bp--;
142 	  ap[0] = mpn_rshift (ap+1, ap+1, n, GMP_NUMB_BITS - extra_bits);
143 	  bp[0] = mpn_rshift (bp+1, bp+1, n, GMP_NUMB_BITS - extra_bits);
144 	  n += (ap[n] | bp[n]) > 0;
145 
146 	  ASSERT (success);
147 
148 	  while (n > 2)
149 	    {
150 	      mp_size_t nn;
151 
152 	      ASSERT (n > s);
153 	      ASSERT (n <= 2*s);
154 
155 	      nn = mpn_hgcd_step (n, ap, bp, s, M, tp);
156 
157 	      if (!nn)
158 		return 1;
159 
160 	      n = nn;
161 	    }
162 	}
163 
164       if (n == 2)
165 	{
166 	  struct hgcd_matrix1 M1;
167 	  ASSERT (s == 1);
168 
169 	  if (mpn_hgcd2 (ap[1], ap[0], bp[1], bp[0], &M1))
170 	    {
171 	      /* Multiply M <- M * M1 */
172 	      mpn_hgcd_matrix_mul_1 (M, &M1, tp);
173 	      success = 1;
174 	    }
175 	}
176       return success;
177     }
178   else
179     {
180       mp_size_t n2 = (3*n)/4 + 1;
181       mp_size_t p = n/2;
182       mp_size_t nn;
183 
184       nn = mpn_hgcd_reduce (M, ap, bp, n, p, tp);
185       if (nn)
186 	{
187 	  n = nn;
188 	  /* FIXME: Discard some of the low limbs immediately? */
189 	  success = 1;
190 	}
191 
192       while (n > n2)
193 	{
194 	  mp_size_t nn;
195 
196 	  /* Needs n + 1 storage */
197 	  nn = mpn_hgcd_step (n, ap, bp, s, M, tp);
198 	  if (!nn)
199 	    return success;
200 
201 	  n = nn;
202 	  success = 1;
203 	}
204       if (n > s + 2)
205 	{
206 	  struct hgcd_matrix M1;
207 	  mp_size_t scratch;
208 
209 	  p = 2*s - n + 1;
210 	  scratch = MPN_HGCD_MATRIX_INIT_ITCH (n-p);
211 
212 	  mpn_hgcd_matrix_init(&M1, n - p, tp);
213 	  if (mpn_hgcd_appr (ap + p, bp + p, n - p, &M1, tp + scratch))
214 	    {
215 	      /* We always have max(M) > 2^{-(GMP_NUMB_BITS + 1)} max(M1) */
216 	      ASSERT (M->n + 2 >= M1.n);
217 
218 	      /* Furthermore, assume M ends with a quotient (1, q; 0, 1),
219 		 then either q or q + 1 is a correct quotient, and M1 will
220 		 start with either (1, 0; 1, 1) or (2, 1; 1, 1). This
221 		 rules out the case that the size of M * M1 is much
222 		 smaller than the expected M->n + M1->n. */
223 
224 	      ASSERT (M->n + M1.n < M->alloc);
225 
226 	      /* We need a bound for of M->n + M1.n. Let n be the original
227 		 input size. Then
228 
229 		 ceil(n/2) - 1 >= size of product >= M.n + M1.n - 2
230 
231 		 and it follows that
232 
233 		 M.n + M1.n <= ceil(n/2) + 1
234 
235 		 Then 3*(M.n + M1.n) + 5 <= 3 * ceil(n/2) + 8 is the
236 		 amount of needed scratch space. */
237 	      mpn_hgcd_matrix_mul (M, &M1, tp + scratch);
238 	      return 1;
239 	    }
240 	}
241 
242       for(;;)
243 	{
244 	  mp_size_t nn;
245 
246 	  ASSERT (n > s);
247 	  ASSERT (n <= 2*s);
248 
249 	  nn = mpn_hgcd_step (n, ap, bp, s, M, tp);
250 
251 	  if (!nn)
252 	    return success;
253 
254 	  n = nn;
255 	  success = 1;
256 	}
257     }
258 }
259