1 /* hgcd_appr.c.
2 
3    THE FUNCTIONS IN THIS FILE ARE INTERNAL WITH MUTABLE INTERFACES.  IT IS ONLY
4    SAFE TO REACH THEM THROUGH DOCUMENTED INTERFACES.  IN FACT, IT IS ALMOST
5    GUARANTEED THAT THEY'LL CHANGE OR DISAPPEAR IN A FUTURE GNU MP RELEASE.
6 
7 Copyright 2011, 2012 Free Software Foundation, Inc.
8 
9 This file is part of the GNU MP Library.
10 
11 The GNU MP Library is free software; you can redistribute it and/or modify
12 it under the terms of either:
13 
14   * the GNU Lesser General Public License as published by the Free
15     Software Foundation; either version 3 of the License, or (at your
16     option) any later version.
17 
18 or
19 
20   * the GNU General Public License as published by the Free Software
21     Foundation; either version 2 of the License, or (at your option) any
22     later version.
23 
24 or both in parallel, as here.
25 
26 The GNU MP Library is distributed in the hope that it will be useful, but
27 WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
28 or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
29 for more details.
30 
31 You should have received copies of the GNU General Public License and the
32 GNU Lesser General Public License along with the GNU MP Library.  If not,
33 see https://www.gnu.org/licenses/.  */
34 
35 #include "gmp.h"
36 #include "gmp-impl.h"
37 #include "longlong.h"
38 
39 /* Identical to mpn_hgcd_itch. FIXME: Do we really need to add
40    HGCD_THRESHOLD at the end? */
41 mp_size_t
mpn_hgcd_appr_itch(mp_size_t n)42 mpn_hgcd_appr_itch (mp_size_t n)
43 {
44   if (BELOW_THRESHOLD (n, HGCD_APPR_THRESHOLD))
45     return n;
46   else
47     {
48       unsigned k;
49       int count;
50       mp_size_t nscaled;
51 
52       /* Get the recursion depth. */
53       nscaled = (n - 1) / (HGCD_APPR_THRESHOLD - 1);
54       count_leading_zeros (count, nscaled);
55       k = GMP_LIMB_BITS - count;
56 
57       return 20 * ((n+3) / 4) + 22 * k + HGCD_THRESHOLD;
58     }
59 }
60 
61 /* Destroys inputs. */
62 int
mpn_hgcd_appr(mp_ptr ap,mp_ptr bp,mp_size_t n,struct hgcd_matrix * M,mp_ptr tp)63 mpn_hgcd_appr (mp_ptr ap, mp_ptr bp, mp_size_t n,
64 	       struct hgcd_matrix *M, mp_ptr tp)
65 {
66   mp_size_t s;
67   int success = 0;
68 
69   ASSERT (n > 0);
70 
71   ASSERT ((ap[n-1] | bp[n-1]) != 0);
72 
73   if (n <= 2)
74     /* Implies s = n. A fairly uninteresting case but exercised by the
75        random inputs of the testsuite. */
76     return 0;
77 
78   ASSERT ((n+1)/2 - 1 < M->alloc);
79 
80   /* We aim for reduction of to GMP_NUMB_BITS * s bits. But each time
81      we discard some of the least significant limbs, we must keep one
82      additional bit to account for the truncation error. We maintain
83      the GMP_NUMB_BITS * s - extra_bits as the current target size. */
84 
85   s = n/2 + 1;
86   if (BELOW_THRESHOLD (n, HGCD_APPR_THRESHOLD))
87     {
88       unsigned extra_bits = 0;
89 
90       while (n > 2)
91 	{
92 	  mp_size_t nn;
93 
94 	  ASSERT (n > s);
95 	  ASSERT (n <= 2*s);
96 
97 	  nn = mpn_hgcd_step (n, ap, bp, s, M, tp);
98 	  if (!nn)
99 	    break;
100 
101 	  n = nn;
102 	  success = 1;
103 
104 	  /* We can truncate and discard the lower p bits whenever nbits <=
105 	     2*sbits - p. To account for the truncation error, we must
106 	     adjust
107 
108 	     sbits <-- sbits + 1 - p,
109 
110 	     rather than just sbits <-- sbits - p. This adjustment makes
111 	     the produced matrix slightly smaller than it could be. */
112 
113 	  if (GMP_NUMB_BITS * (n + 1) + 2 * extra_bits <= 2*GMP_NUMB_BITS * s)
114 	    {
115 	      mp_size_t p = (GMP_NUMB_BITS * (2*s - n) - 2*extra_bits) / GMP_NUMB_BITS;
116 
117 	      if (extra_bits == 0)
118 		{
119 		  /* We cross a limb boundary and bump s. We can't do that
120 		     if the result is that it makes makes min(U, V)
121 		     smaller than 2^{GMP_NUMB_BITS} s. */
122 		  if (s + 1 == n
123 		      || mpn_zero_p (ap + s + 1, n - s - 1)
124 		      || mpn_zero_p (bp + s + 1, n - s - 1))
125 		    continue;
126 
127 		  extra_bits = GMP_NUMB_BITS - 1;
128 		  s++;
129 		}
130 	      else
131 		{
132 		  extra_bits--;
133 		}
134 
135 	      /* Drop the p least significant limbs */
136 	      ap += p; bp += p; n -= p; s -= p;
137 	    }
138 	}
139 
140       ASSERT (s > 0);
141 
142       if (extra_bits > 0)
143 	{
144 	  /* We can get here only of we have dropped at least one of the least
145 	     significant bits, so we can decrement ap and bp. We can then shift
146 	     left extra bits using mpn_rshift. */
147 	  /* NOTE: In the unlikely case that n is large, it would be preferable
148 	     to do an initial subdiv step to reduce the size before shifting,
149 	     but that would mean duplicating mpn_gcd_subdiv_step with a bit
150 	     count rather than a limb count. */
151 	  ap--; bp--;
152 	  ap[0] = mpn_rshift (ap+1, ap+1, n, GMP_NUMB_BITS - extra_bits);
153 	  bp[0] = mpn_rshift (bp+1, bp+1, n, GMP_NUMB_BITS - extra_bits);
154 	  n += (ap[n] | bp[n]) > 0;
155 
156 	  ASSERT (success);
157 
158 	  while (n > 2)
159 	    {
160 	      mp_size_t nn;
161 
162 	      ASSERT (n > s);
163 	      ASSERT (n <= 2*s);
164 
165 	      nn = mpn_hgcd_step (n, ap, bp, s, M, tp);
166 
167 	      if (!nn)
168 		return 1;
169 
170 	      n = nn;
171 	    }
172 	}
173 
174       if (n == 2)
175 	{
176 	  struct hgcd_matrix1 M1;
177 	  ASSERT (s == 1);
178 
179 	  if (mpn_hgcd2 (ap[1], ap[0], bp[1], bp[0], &M1))
180 	    {
181 	      /* Multiply M <- M * M1 */
182 	      mpn_hgcd_matrix_mul_1 (M, &M1, tp);
183 	      success = 1;
184 	    }
185 	}
186       return success;
187     }
188   else
189     {
190       mp_size_t n2 = (3*n)/4 + 1;
191       mp_size_t p = n/2;
192       mp_size_t nn;
193 
194       nn = mpn_hgcd_reduce (M, ap, bp, n, p, tp);
195       if (nn)
196 	{
197 	  n = nn;
198 	  /* FIXME: Discard some of the low limbs immediately? */
199 	  success = 1;
200 	}
201 
202       while (n > n2)
203 	{
204 	  mp_size_t nn;
205 
206 	  /* Needs n + 1 storage */
207 	  nn = mpn_hgcd_step (n, ap, bp, s, M, tp);
208 	  if (!nn)
209 	    return success;
210 
211 	  n = nn;
212 	  success = 1;
213 	}
214       if (n > s + 2)
215 	{
216 	  struct hgcd_matrix M1;
217 	  mp_size_t scratch;
218 
219 	  p = 2*s - n + 1;
220 	  scratch = MPN_HGCD_MATRIX_INIT_ITCH (n-p);
221 
222 	  mpn_hgcd_matrix_init(&M1, n - p, tp);
223 	  if (mpn_hgcd_appr (ap + p, bp + p, n - p, &M1, tp + scratch))
224 	    {
225 	      /* We always have max(M) > 2^{-(GMP_NUMB_BITS + 1)} max(M1) */
226 	      ASSERT (M->n + 2 >= M1.n);
227 
228 	      /* Furthermore, assume M ends with a quotient (1, q; 0, 1),
229 		 then either q or q + 1 is a correct quotient, and M1 will
230 		 start with either (1, 0; 1, 1) or (2, 1; 1, 1). This
231 		 rules out the case that the size of M * M1 is much
232 		 smaller than the expected M->n + M1->n. */
233 
234 	      ASSERT (M->n + M1.n < M->alloc);
235 
236 	      /* We need a bound for of M->n + M1.n. Let n be the original
237 		 input size. Then
238 
239 		 ceil(n/2) - 1 >= size of product >= M.n + M1.n - 2
240 
241 		 and it follows that
242 
243 		 M.n + M1.n <= ceil(n/2) + 1
244 
245 		 Then 3*(M.n + M1.n) + 5 <= 3 * ceil(n/2) + 8 is the
246 		 amount of needed scratch space. */
247 	      mpn_hgcd_matrix_mul (M, &M1, tp + scratch);
248 	      return 1;
249 	    }
250 	}
251 
252       for(;;)
253 	{
254 	  mp_size_t nn;
255 
256 	  ASSERT (n > s);
257 	  ASSERT (n <= 2*s);
258 
259 	  nn = mpn_hgcd_step (n, ap, bp, s, M, tp);
260 
261 	  if (!nn)
262 	    return success;
263 
264 	  n = nn;
265 	  success = 1;
266 	}
267     }
268 }
269