1 /* hgcd_appr.c.
2
3 THE FUNCTIONS IN THIS FILE ARE INTERNAL WITH MUTABLE INTERFACES. IT IS ONLY
4 SAFE TO REACH THEM THROUGH DOCUMENTED INTERFACES. IN FACT, IT IS ALMOST
5 GUARANTEED THAT THEY'LL CHANGE OR DISAPPEAR IN A FUTURE GNU MP RELEASE.
6
7 Copyright 2011, 2012 Free Software Foundation, Inc.
8
9 This file is part of the GNU MP Library.
10
11 The GNU MP Library is free software; you can redistribute it and/or modify
12 it under the terms of either:
13
14 * the GNU Lesser General Public License as published by the Free
15 Software Foundation; either version 3 of the License, or (at your
16 option) any later version.
17
18 or
19
20 * the GNU General Public License as published by the Free Software
21 Foundation; either version 2 of the License, or (at your option) any
22 later version.
23
24 or both in parallel, as here.
25
26 The GNU MP Library is distributed in the hope that it will be useful, but
27 WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
28 or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
29 for more details.
30
31 You should have received copies of the GNU General Public License and the
32 GNU Lesser General Public License along with the GNU MP Library. If not,
33 see https://www.gnu.org/licenses/. */
34
35 #include "gmp-impl.h"
36 #include "longlong.h"
37
38 /* Identical to mpn_hgcd_itch. FIXME: Do we really need to add
39 HGCD_THRESHOLD at the end? */
40 mp_size_t
mpn_hgcd_appr_itch(mp_size_t n)41 mpn_hgcd_appr_itch (mp_size_t n)
42 {
43 if (BELOW_THRESHOLD (n, HGCD_APPR_THRESHOLD))
44 return n;
45 else
46 {
47 unsigned k;
48 int count;
49 mp_size_t nscaled;
50
51 /* Get the recursion depth. */
52 nscaled = (n - 1) / (HGCD_APPR_THRESHOLD - 1);
53 count_leading_zeros (count, nscaled);
54 k = GMP_LIMB_BITS - count;
55
56 return 20 * ((n+3) / 4) + 22 * k + HGCD_THRESHOLD;
57 }
58 }
59
60 /* Destroys inputs. */
61 int
mpn_hgcd_appr(mp_ptr ap,mp_ptr bp,mp_size_t n,struct hgcd_matrix * M,mp_ptr tp)62 mpn_hgcd_appr (mp_ptr ap, mp_ptr bp, mp_size_t n,
63 struct hgcd_matrix *M, mp_ptr tp)
64 {
65 mp_size_t s;
66 int success = 0;
67
68 ASSERT (n > 0);
69
70 ASSERT ((ap[n-1] | bp[n-1]) != 0);
71
72 if (n <= 2)
73 /* Implies s = n. A fairly uninteresting case but exercised by the
74 random inputs of the testsuite. */
75 return 0;
76
77 ASSERT ((n+1)/2 - 1 < M->alloc);
78
79 /* We aim for reduction of to GMP_NUMB_BITS * s bits. But each time
80 we discard some of the least significant limbs, we must keep one
81 additional bit to account for the truncation error. We maintain
82 the GMP_NUMB_BITS * s - extra_bits as the current target size. */
83
84 s = n/2 + 1;
85 if (BELOW_THRESHOLD (n, HGCD_APPR_THRESHOLD))
86 {
87 unsigned extra_bits = 0;
88
89 while (n > 2)
90 {
91 mp_size_t nn;
92
93 ASSERT (n > s);
94 ASSERT (n <= 2*s);
95
96 nn = mpn_hgcd_step (n, ap, bp, s, M, tp);
97 if (!nn)
98 break;
99
100 n = nn;
101 success = 1;
102
103 /* We can truncate and discard the lower p bits whenever nbits <=
104 2*sbits - p. To account for the truncation error, we must
105 adjust
106
107 sbits <-- sbits + 1 - p,
108
109 rather than just sbits <-- sbits - p. This adjustment makes
110 the produced matrix slightly smaller than it could be. */
111
112 if (GMP_NUMB_BITS * (n + 1) + 2 * extra_bits <= 2*GMP_NUMB_BITS * s)
113 {
114 mp_size_t p = (GMP_NUMB_BITS * (2*s - n) - 2*extra_bits) / GMP_NUMB_BITS;
115
116 if (extra_bits == 0)
117 {
118 /* We cross a limb boundary and bump s. We can't do that
119 if the result is that it makes makes min(U, V)
120 smaller than 2^{GMP_NUMB_BITS} s. */
121 if (s + 1 == n
122 || mpn_zero_p (ap + s + 1, n - s - 1)
123 || mpn_zero_p (bp + s + 1, n - s - 1))
124 continue;
125
126 extra_bits = GMP_NUMB_BITS - 1;
127 s++;
128 }
129 else
130 {
131 extra_bits--;
132 }
133
134 /* Drop the p least significant limbs */
135 ap += p; bp += p; n -= p; s -= p;
136 }
137 }
138
139 ASSERT (s > 0);
140
141 if (extra_bits > 0)
142 {
143 /* We can get here only of we have dropped at least one of the least
144 significant bits, so we can decrement ap and bp. We can then shift
145 left extra bits using mpn_rshift. */
146 /* NOTE: In the unlikely case that n is large, it would be preferable
147 to do an initial subdiv step to reduce the size before shifting,
148 but that would mean duplicating mpn_gcd_subdiv_step with a bit
149 count rather than a limb count. */
150 ap--; bp--;
151 ap[0] = mpn_rshift (ap+1, ap+1, n, GMP_NUMB_BITS - extra_bits);
152 bp[0] = mpn_rshift (bp+1, bp+1, n, GMP_NUMB_BITS - extra_bits);
153 n += (ap[n] | bp[n]) > 0;
154
155 ASSERT (success);
156
157 while (n > 2)
158 {
159 mp_size_t nn;
160
161 ASSERT (n > s);
162 ASSERT (n <= 2*s);
163
164 nn = mpn_hgcd_step (n, ap, bp, s, M, tp);
165
166 if (!nn)
167 return 1;
168
169 n = nn;
170 }
171 }
172
173 if (n == 2)
174 {
175 struct hgcd_matrix1 M1;
176 ASSERT (s == 1);
177
178 if (mpn_hgcd2 (ap[1], ap[0], bp[1], bp[0], &M1))
179 {
180 /* Multiply M <- M * M1 */
181 mpn_hgcd_matrix_mul_1 (M, &M1, tp);
182 success = 1;
183 }
184 }
185 return success;
186 }
187 else
188 {
189 mp_size_t n2 = (3*n)/4 + 1;
190 mp_size_t p = n/2;
191 mp_size_t nn;
192
193 nn = mpn_hgcd_reduce (M, ap, bp, n, p, tp);
194 if (nn)
195 {
196 n = nn;
197 /* FIXME: Discard some of the low limbs immediately? */
198 success = 1;
199 }
200
201 while (n > n2)
202 {
203 mp_size_t nn;
204
205 /* Needs n + 1 storage */
206 nn = mpn_hgcd_step (n, ap, bp, s, M, tp);
207 if (!nn)
208 return success;
209
210 n = nn;
211 success = 1;
212 }
213 if (n > s + 2)
214 {
215 struct hgcd_matrix M1;
216 mp_size_t scratch;
217
218 p = 2*s - n + 1;
219 scratch = MPN_HGCD_MATRIX_INIT_ITCH (n-p);
220
221 mpn_hgcd_matrix_init(&M1, n - p, tp);
222 if (mpn_hgcd_appr (ap + p, bp + p, n - p, &M1, tp + scratch))
223 {
224 /* We always have max(M) > 2^{-(GMP_NUMB_BITS + 1)} max(M1) */
225 ASSERT (M->n + 2 >= M1.n);
226
227 /* Furthermore, assume M ends with a quotient (1, q; 0, 1),
228 then either q or q + 1 is a correct quotient, and M1 will
229 start with either (1, 0; 1, 1) or (2, 1; 1, 1). This
230 rules out the case that the size of M * M1 is much
231 smaller than the expected M->n + M1->n. */
232
233 ASSERT (M->n + M1.n < M->alloc);
234
235 /* We need a bound for of M->n + M1.n. Let n be the original
236 input size. Then
237
238 ceil(n/2) - 1 >= size of product >= M.n + M1.n - 2
239
240 and it follows that
241
242 M.n + M1.n <= ceil(n/2) + 1
243
244 Then 3*(M.n + M1.n) + 5 <= 3 * ceil(n/2) + 8 is the
245 amount of needed scratch space. */
246 mpn_hgcd_matrix_mul (M, &M1, tp + scratch);
247 return 1;
248 }
249 }
250
251 for(;;)
252 {
253 mp_size_t nn;
254
255 ASSERT (n > s);
256 ASSERT (n <= 2*s);
257
258 nn = mpn_hgcd_step (n, ap, bp, s, M, tp);
259
260 if (!nn)
261 return success;
262
263 n = nn;
264 success = 1;
265 }
266 }
267 }
268