1 /* hgcd_appr.c.
2
3 THE FUNCTIONS IN THIS FILE ARE INTERNAL WITH MUTABLE INTERFACES. IT IS ONLY
4 SAFE TO REACH THEM THROUGH DOCUMENTED INTERFACES. IN FACT, IT IS ALMOST
5 GUARANTEED THAT THEY'LL CHANGE OR DISAPPEAR IN A FUTURE GNU MP RELEASE.
6
7 Copyright 2011, 2012 Free Software Foundation, Inc.
8
9 This file is part of the GNU MP Library.
10
11 The GNU MP Library is free software; you can redistribute it and/or modify
12 it under the terms of either:
13
14 * the GNU Lesser General Public License as published by the Free
15 Software Foundation; either version 3 of the License, or (at your
16 option) any later version.
17
18 or
19
20 * the GNU General Public License as published by the Free Software
21 Foundation; either version 2 of the License, or (at your option) any
22 later version.
23
24 or both in parallel, as here.
25
26 The GNU MP Library is distributed in the hope that it will be useful, but
27 WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
28 or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
29 for more details.
30
31 You should have received copies of the GNU General Public License and the
32 GNU Lesser General Public License along with the GNU MP Library. If not,
33 see https://www.gnu.org/licenses/. */
34
35 #include "gmp.h"
36 #include "gmp-impl.h"
37 #include "longlong.h"
38
39 /* Identical to mpn_hgcd_itch. FIXME: Do we really need to add
40 HGCD_THRESHOLD at the end? */
41 mp_size_t
mpn_hgcd_appr_itch(mp_size_t n)42 mpn_hgcd_appr_itch (mp_size_t n)
43 {
44 if (BELOW_THRESHOLD (n, HGCD_APPR_THRESHOLD))
45 return n;
46 else
47 {
48 unsigned k;
49 int count;
50 mp_size_t nscaled;
51
52 /* Get the recursion depth. */
53 nscaled = (n - 1) / (HGCD_APPR_THRESHOLD - 1);
54 count_leading_zeros (count, nscaled);
55 k = GMP_LIMB_BITS - count;
56
57 return 20 * ((n+3) / 4) + 22 * k + HGCD_THRESHOLD;
58 }
59 }
60
61 /* Destroys inputs. */
62 int
mpn_hgcd_appr(mp_ptr ap,mp_ptr bp,mp_size_t n,struct hgcd_matrix * M,mp_ptr tp)63 mpn_hgcd_appr (mp_ptr ap, mp_ptr bp, mp_size_t n,
64 struct hgcd_matrix *M, mp_ptr tp)
65 {
66 mp_size_t s;
67 int success = 0;
68
69 ASSERT (n > 0);
70
71 ASSERT ((ap[n-1] | bp[n-1]) != 0);
72
73 if (n <= 2)
74 /* Implies s = n. A fairly uninteresting case but exercised by the
75 random inputs of the testsuite. */
76 return 0;
77
78 ASSERT ((n+1)/2 - 1 < M->alloc);
79
80 /* We aim for reduction of to GMP_NUMB_BITS * s bits. But each time
81 we discard some of the least significant limbs, we must keep one
82 additional bit to account for the truncation error. We maintain
83 the GMP_NUMB_BITS * s - extra_bits as the current target size. */
84
85 s = n/2 + 1;
86 if (BELOW_THRESHOLD (n, HGCD_APPR_THRESHOLD))
87 {
88 unsigned extra_bits = 0;
89
90 while (n > 2)
91 {
92 mp_size_t nn;
93
94 ASSERT (n > s);
95 ASSERT (n <= 2*s);
96
97 nn = mpn_hgcd_step (n, ap, bp, s, M, tp);
98 if (!nn)
99 break;
100
101 n = nn;
102 success = 1;
103
104 /* We can truncate and discard the lower p bits whenever nbits <=
105 2*sbits - p. To account for the truncation error, we must
106 adjust
107
108 sbits <-- sbits + 1 - p,
109
110 rather than just sbits <-- sbits - p. This adjustment makes
111 the produced matrix slightly smaller than it could be. */
112
113 if (GMP_NUMB_BITS * (n + 1) + 2 * extra_bits <= 2*GMP_NUMB_BITS * s)
114 {
115 mp_size_t p = (GMP_NUMB_BITS * (2*s - n) - 2*extra_bits) / GMP_NUMB_BITS;
116
117 if (extra_bits == 0)
118 {
119 /* We cross a limb boundary and bump s. We can't do that
120 if the result is that it makes makes min(U, V)
121 smaller than 2^{GMP_NUMB_BITS} s. */
122 if (s + 1 == n
123 || mpn_zero_p (ap + s + 1, n - s - 1)
124 || mpn_zero_p (bp + s + 1, n - s - 1))
125 continue;
126
127 extra_bits = GMP_NUMB_BITS - 1;
128 s++;
129 }
130 else
131 {
132 extra_bits--;
133 }
134
135 /* Drop the p least significant limbs */
136 ap += p; bp += p; n -= p; s -= p;
137 }
138 }
139
140 ASSERT (s > 0);
141
142 if (extra_bits > 0)
143 {
144 /* We can get here only of we have dropped at least one of the least
145 significant bits, so we can decrement ap and bp. We can then shift
146 left extra bits using mpn_rshift. */
147 /* NOTE: In the unlikely case that n is large, it would be preferable
148 to do an initial subdiv step to reduce the size before shifting,
149 but that would mean duplicating mpn_gcd_subdiv_step with a bit
150 count rather than a limb count. */
151 ap--; bp--;
152 ap[0] = mpn_rshift (ap+1, ap+1, n, GMP_NUMB_BITS - extra_bits);
153 bp[0] = mpn_rshift (bp+1, bp+1, n, GMP_NUMB_BITS - extra_bits);
154 n += (ap[n] | bp[n]) > 0;
155
156 ASSERT (success);
157
158 while (n > 2)
159 {
160 mp_size_t nn;
161
162 ASSERT (n > s);
163 ASSERT (n <= 2*s);
164
165 nn = mpn_hgcd_step (n, ap, bp, s, M, tp);
166
167 if (!nn)
168 return 1;
169
170 n = nn;
171 }
172 }
173
174 if (n == 2)
175 {
176 struct hgcd_matrix1 M1;
177 ASSERT (s == 1);
178
179 if (mpn_hgcd2 (ap[1], ap[0], bp[1], bp[0], &M1))
180 {
181 /* Multiply M <- M * M1 */
182 mpn_hgcd_matrix_mul_1 (M, &M1, tp);
183 success = 1;
184 }
185 }
186 return success;
187 }
188 else
189 {
190 mp_size_t n2 = (3*n)/4 + 1;
191 mp_size_t p = n/2;
192 mp_size_t nn;
193
194 nn = mpn_hgcd_reduce (M, ap, bp, n, p, tp);
195 if (nn)
196 {
197 n = nn;
198 /* FIXME: Discard some of the low limbs immediately? */
199 success = 1;
200 }
201
202 while (n > n2)
203 {
204 mp_size_t nn;
205
206 /* Needs n + 1 storage */
207 nn = mpn_hgcd_step (n, ap, bp, s, M, tp);
208 if (!nn)
209 return success;
210
211 n = nn;
212 success = 1;
213 }
214 if (n > s + 2)
215 {
216 struct hgcd_matrix M1;
217 mp_size_t scratch;
218
219 p = 2*s - n + 1;
220 scratch = MPN_HGCD_MATRIX_INIT_ITCH (n-p);
221
222 mpn_hgcd_matrix_init(&M1, n - p, tp);
223 if (mpn_hgcd_appr (ap + p, bp + p, n - p, &M1, tp + scratch))
224 {
225 /* We always have max(M) > 2^{-(GMP_NUMB_BITS + 1)} max(M1) */
226 ASSERT (M->n + 2 >= M1.n);
227
228 /* Furthermore, assume M ends with a quotient (1, q; 0, 1),
229 then either q or q + 1 is a correct quotient, and M1 will
230 start with either (1, 0; 1, 1) or (2, 1; 1, 1). This
231 rules out the case that the size of M * M1 is much
232 smaller than the expected M->n + M1->n. */
233
234 ASSERT (M->n + M1.n < M->alloc);
235
236 /* We need a bound for of M->n + M1.n. Let n be the original
237 input size. Then
238
239 ceil(n/2) - 1 >= size of product >= M.n + M1.n - 2
240
241 and it follows that
242
243 M.n + M1.n <= ceil(n/2) + 1
244
245 Then 3*(M.n + M1.n) + 5 <= 3 * ceil(n/2) + 8 is the
246 amount of needed scratch space. */
247 mpn_hgcd_matrix_mul (M, &M1, tp + scratch);
248 return 1;
249 }
250 }
251
252 for(;;)
253 {
254 mp_size_t nn;
255
256 ASSERT (n > s);
257 ASSERT (n <= 2*s);
258
259 nn = mpn_hgcd_step (n, ap, bp, s, M, tp);
260
261 if (!nn)
262 return success;
263
264 n = nn;
265 success = 1;
266 }
267 }
268 }
269