1 /* hgcd_appr.c.
2
3 THE FUNCTIONS IN THIS FILE ARE INTERNAL WITH MUTABLE INTERFACES. IT IS ONLY
4 SAFE TO REACH THEM THROUGH DOCUMENTED INTERFACES. IN FACT, IT IS ALMOST
5 GUARANTEED THAT THEY'LL CHANGE OR DISAPPEAR IN A FUTURE GNU MP RELEASE.
6
7 Copyright 2011, 2012 Free Software Foundation, Inc.
8
9 This file is part of the GNU MP Library.
10
11 The GNU MP Library is free software; you can redistribute it and/or modify
12 it under the terms of the GNU Lesser General Public License as published by
13 the Free Software Foundation; either version 3 of the License, or (at your
14 option) any later version.
15
16 The GNU MP Library is distributed in the hope that it will be useful, but
17 WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
18 or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public
19 License for more details.
20
21 You should have received a copy of the GNU Lesser General Public License
22 along with the GNU MP Library. If not, see http://www.gnu.org/licenses/. */
23
24 #include "mpir.h"
25 #include "gmp-impl.h"
26 #include "longlong.h"
27
28 /* Identical to mpn_hgcd_itch. FIXME: Do we really need to add
29 HGCD_THRESHOLD at the end? */
30 mp_size_t
mpn_hgcd_appr_itch(mp_size_t n)31 mpn_hgcd_appr_itch (mp_size_t n)
32 {
33 if (BELOW_THRESHOLD (n, HGCD_APPR_THRESHOLD))
34 return n;
35 else
36 {
37 unsigned k;
38 int count;
39 mp_size_t nscaled;
40
41 /* Get the recursion depth. */
42 nscaled = (n - 1) / (HGCD_APPR_THRESHOLD - 1);
43 count_leading_zeros (count, nscaled);
44 k = GMP_LIMB_BITS - count;
45
46 return 20 * ((n+3) / 4) + 22 * k + HGCD_THRESHOLD;
47 }
48 }
49
50 /* Destroys inputs. */
51 int
mpn_hgcd_appr(mp_ptr ap,mp_ptr bp,mp_size_t n,struct hgcd_matrix * M,mp_ptr tp)52 mpn_hgcd_appr (mp_ptr ap, mp_ptr bp, mp_size_t n,
53 struct hgcd_matrix *M, mp_ptr tp)
54 {
55 mp_size_t s;
56 int success = 0;
57
58 ASSERT (n > 0);
59
60 ASSERT ((ap[n-1] | bp[n-1]) != 0);
61
62 if (n <= 2)
63 /* Implies s = n. A fairly uninteresting case but exercised by the
64 random inputs of the testsuite. */
65 return 0;
66
67 ASSERT ((n+1)/2 - 1 < M->alloc);
68
69 /* We aim for reduction of to GMP_NUMB_BITS * s bits. But each time
70 we discard some of the least significant limbs, we must keep one
71 additional bit to account for the truncation error. We maintain
72 the GMP_NUMB_BITS * s - extra_bits as the current target size. */
73
74 s = n/2 + 1;
75 if (BELOW_THRESHOLD (n, HGCD_APPR_THRESHOLD))
76 {
77 unsigned extra_bits = 0;
78
79 while (n > 2)
80 {
81 mp_size_t nn;
82
83 ASSERT (n > s);
84 ASSERT (n <= 2*s);
85
86 nn = mpn_hgcd_step (n, ap, bp, s, M, tp);
87 if (!nn)
88 break;
89
90 n = nn;
91 success = 1;
92
93 /* We can truncate and discard the lower p bits whenever nbits <=
94 2*sbits - p. To account for the truncation error, we must
95 adjust
96
97 sbits <-- sbits + 1 - p,
98
99 rather than just sbits <-- sbits - p. This adjustment makes
100 the produced matrix sligthly smaller than it could be. */
101
102 if (GMP_NUMB_BITS * (n + 1) + 2 * extra_bits <= 2*GMP_NUMB_BITS * s)
103 {
104 mp_size_t p = (GMP_NUMB_BITS * (2*s - n) - 2*extra_bits) / GMP_NUMB_BITS;
105
106 if (extra_bits == 0)
107 {
108 /* We cross a limb boundary and bump s. We can't do that
109 if the result is that it makes makes min(U, V)
110 smaller than 2^{GMP_NUMB_BITS} s. */
111 if (s + 1 == n
112 || mpn_zero_p (ap + s + 1, n - s - 1)
113 || mpn_zero_p (bp + s + 1, n - s - 1))
114 continue;
115
116 extra_bits = GMP_NUMB_BITS - 1;
117 s++;
118 }
119 else
120 {
121 extra_bits--;
122 }
123
124 /* Drop the p least significant limbs */
125 ap += p; bp += p; n -= p; s -= p;
126 }
127 }
128
129 ASSERT (s > 0);
130
131 if (extra_bits > 0)
132 {
133 /* We can get here only of we have dropped at least one of the
134 least significant bits, so we can decrement ap and bp. We can
135 then shift left extra bits using mpn_shiftr. */
136 /* NOTE: In the unlikely case that n is large, it would be
137 preferable to do an initial subdiv step to reduce the size
138 before shifting, but that would mean daplicating
139 mpn_gcd_subdiv_step with a bit count rather than a limb
140 count. */
141 ap--; bp--;
142 ap[0] = mpn_rshift (ap+1, ap+1, n, GMP_NUMB_BITS - extra_bits);
143 bp[0] = mpn_rshift (bp+1, bp+1, n, GMP_NUMB_BITS - extra_bits);
144 n += (ap[n] | bp[n]) > 0;
145
146 ASSERT (success);
147
148 while (n > 2)
149 {
150 mp_size_t nn;
151
152 ASSERT (n > s);
153 ASSERT (n <= 2*s);
154
155 nn = mpn_hgcd_step (n, ap, bp, s, M, tp);
156
157 if (!nn)
158 return 1;
159
160 n = nn;
161 }
162 }
163
164 if (n == 2)
165 {
166 struct hgcd_matrix1 M1;
167 ASSERT (s == 1);
168
169 if (mpn_hgcd2 (ap[1], ap[0], bp[1], bp[0], &M1))
170 {
171 /* Multiply M <- M * M1 */
172 mpn_hgcd_matrix_mul_1 (M, &M1, tp);
173 success = 1;
174 }
175 }
176 return success;
177 }
178 else
179 {
180 mp_size_t n2 = (3*n)/4 + 1;
181 mp_size_t p = n/2;
182 mp_size_t nn;
183
184 nn = mpn_hgcd_reduce (M, ap, bp, n, p, tp);
185 if (nn)
186 {
187 n = nn;
188 /* FIXME: Discard some of the low limbs immediately? */
189 success = 1;
190 }
191
192 while (n > n2)
193 {
194 mp_size_t nn;
195
196 /* Needs n + 1 storage */
197 nn = mpn_hgcd_step (n, ap, bp, s, M, tp);
198 if (!nn)
199 return success;
200
201 n = nn;
202 success = 1;
203 }
204 if (n > s + 2)
205 {
206 struct hgcd_matrix M1;
207 mp_size_t scratch;
208
209 p = 2*s - n + 1;
210 scratch = MPN_HGCD_MATRIX_INIT_ITCH (n-p);
211
212 mpn_hgcd_matrix_init(&M1, n - p, tp);
213 if (mpn_hgcd_appr (ap + p, bp + p, n - p, &M1, tp + scratch))
214 {
215 /* We always have max(M) > 2^{-(GMP_NUMB_BITS + 1)} max(M1) */
216 ASSERT (M->n + 2 >= M1.n);
217
218 /* Furthermore, assume M ends with a quotient (1, q; 0, 1),
219 then either q or q + 1 is a correct quotient, and M1 will
220 start with either (1, 0; 1, 1) or (2, 1; 1, 1). This
221 rules out the case that the size of M * M1 is much
222 smaller than the expected M->n + M1->n. */
223
224 ASSERT (M->n + M1.n < M->alloc);
225
226 /* We need a bound for of M->n + M1.n. Let n be the original
227 input size. Then
228
229 ceil(n/2) - 1 >= size of product >= M.n + M1.n - 2
230
231 and it follows that
232
233 M.n + M1.n <= ceil(n/2) + 1
234
235 Then 3*(M.n + M1.n) + 5 <= 3 * ceil(n/2) + 8 is the
236 amount of needed scratch space. */
237 mpn_hgcd_matrix_mul (M, &M1, tp + scratch);
238 return 1;
239 }
240 }
241
242 for(;;)
243 {
244 mp_size_t nn;
245
246 ASSERT (n > s);
247 ASSERT (n <= 2*s);
248
249 nn = mpn_hgcd_step (n, ap, bp, s, M, tp);
250
251 if (!nn)
252 return success;
253
254 n = nn;
255 success = 1;
256 }
257 }
258 }
259