1 /* hgcd2.c
2
3 THE FUNCTIONS IN THIS FILE ARE INTERNAL WITH MUTABLE INTERFACES. IT IS ONLY
4 SAFE TO REACH THEM THROUGH DOCUMENTED INTERFACES. IN FACT, IT IS ALMOST
5 GUARANTEED THAT THEY'LL CHANGE OR DISAPPEAR IN A FUTURE GNU MP RELEASE.
6
7 Copyright 1996, 1998, 2000, 2001, 2002, 2003, 2004, 2008 Free Software
8 Foundation, Inc.
9
10 This file is part of the GNU MP Library.
11
12 The GNU MP Library is free software; you can redistribute it and/or modify
13 it under the terms of the GNU Lesser General Public License as published by
14 the Free Software Foundation; either version 3 of the License, or (at your
15 option) any later version.
16
17 The GNU MP Library is distributed in the hope that it will be useful, but
18 WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
19 or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public
20 License for more details.
21
22 You should have received a copy of the GNU Lesser General Public License
23 along with the GNU MP Library. If not, see http://www.gnu.org/licenses/. */
24
25 #include "gmp.h"
26 #include "gmp-impl.h"
27 #include "longlong.h"
28
29 #if GMP_NAIL_BITS == 0
30
31 /* Copied from the old mpn/generic/gcdext.c, and modified slightly to return
32 the remainder. */
33
34 /* Single-limb division optimized for small quotients. */
35 static inline mp_limb_t
div1(mp_ptr rp,mp_limb_t n0,mp_limb_t d0)36 div1 (mp_ptr rp,
37 mp_limb_t n0,
38 mp_limb_t d0)
39 {
40 mp_limb_t q = 0;
41
42 if ((mp_limb_signed_t) n0 < 0)
43 {
44 int cnt;
45 for (cnt = 1; (mp_limb_signed_t) d0 >= 0; cnt++)
46 {
47 d0 = d0 << 1;
48 }
49
50 q = 0;
51 while (cnt)
52 {
53 q <<= 1;
54 if (n0 >= d0)
55 {
56 n0 = n0 - d0;
57 q |= 1;
58 }
59 d0 = d0 >> 1;
60 cnt--;
61 }
62 }
63 else
64 {
65 int cnt;
66 for (cnt = 0; n0 >= d0; cnt++)
67 {
68 d0 = d0 << 1;
69 }
70
71 q = 0;
72 while (cnt)
73 {
74 d0 = d0 >> 1;
75 q <<= 1;
76 if (n0 >= d0)
77 {
78 n0 = n0 - d0;
79 q |= 1;
80 }
81 cnt--;
82 }
83 }
84 *rp = n0;
85 return q;
86 }
87
88 /* Two-limb division optimized for small quotients. */
89 static inline mp_limb_t
div2(mp_ptr rp,mp_limb_t nh,mp_limb_t nl,mp_limb_t dh,mp_limb_t dl)90 div2 (mp_ptr rp,
91 mp_limb_t nh, mp_limb_t nl,
92 mp_limb_t dh, mp_limb_t dl)
93 {
94 mp_limb_t q = 0;
95
96 if ((mp_limb_signed_t) nh < 0)
97 {
98 int cnt;
99 for (cnt = 1; (mp_limb_signed_t) dh >= 0; cnt++)
100 {
101 dh = (dh << 1) | (dl >> (GMP_LIMB_BITS - 1));
102 dl = dl << 1;
103 }
104
105 while (cnt)
106 {
107 q <<= 1;
108 if (nh > dh || (nh == dh && nl >= dl))
109 {
110 sub_ddmmss (nh, nl, nh, nl, dh, dl);
111 q |= 1;
112 }
113 dl = (dh << (GMP_LIMB_BITS - 1)) | (dl >> 1);
114 dh = dh >> 1;
115 cnt--;
116 }
117 }
118 else
119 {
120 int cnt;
121 for (cnt = 0; nh > dh || (nh == dh && nl >= dl); cnt++)
122 {
123 dh = (dh << 1) | (dl >> (GMP_LIMB_BITS - 1));
124 dl = dl << 1;
125 }
126
127 while (cnt)
128 {
129 dl = (dh << (GMP_LIMB_BITS - 1)) | (dl >> 1);
130 dh = dh >> 1;
131 q <<= 1;
132 if (nh > dh || (nh == dh && nl >= dl))
133 {
134 sub_ddmmss (nh, nl, nh, nl, dh, dl);
135 q |= 1;
136 }
137 cnt--;
138 }
139 }
140
141 rp[0] = nl;
142 rp[1] = nh;
143
144 return q;
145 }
146
147 #if 0
148 /* This div2 uses less branches, but it seems to nevertheless be
149 slightly slower than the above code. */
150 static inline mp_limb_t
151 div2 (mp_ptr rp,
152 mp_limb_t nh, mp_limb_t nl,
153 mp_limb_t dh, mp_limb_t dl)
154 {
155 mp_limb_t q = 0;
156 int ncnt;
157 int dcnt;
158
159 count_leading_zeros (ncnt, nh);
160 count_leading_zeros (dcnt, dh);
161 dcnt -= ncnt;
162
163 dh = (dh << dcnt) + (-(dcnt > 0) & (dl >> (GMP_LIMB_BITS - dcnt)));
164 dl <<= dcnt;
165
166 do
167 {
168 mp_limb_t bit;
169 q <<= 1;
170 if (UNLIKELY (nh == dh))
171 bit = (nl >= dl);
172 else
173 bit = (nh > dh);
174
175 q |= bit;
176
177 sub_ddmmss (nh, nl, nh, nl, (-bit) & dh, (-bit) & dl);
178
179 dl = (dh << (GMP_LIMB_BITS - 1)) | (dl >> 1);
180 dh = dh >> 1;
181 }
182 while (dcnt--);
183
184 rp[0] = nl;
185 rp[1] = nh;
186
187 return q;
188 }
189 #endif
190
191 #else /* GMP_NAIL_BITS != 0 */
192 /* Check all functions for nail support. */
193 /* hgcd2 should be defined to take inputs including nail bits, and
194 produce a matrix with elements also including nail bits. This is
195 necessary, for the matrix elements to be useful with mpn_mul_1,
196 mpn_addmul_1 and friends. */
197 #error Not implemented
198 #endif /* GMP_NAIL_BITS != 0 */
199
200 /* Reduces a,b until |a-b| (almost) fits in one limb + 1 bit. Constructs
201 matrix M. Returns 1 if we make progress, i.e. can perform at least
202 one subtraction. Otherwise returns zero.. */
203
204 /* FIXME: Possible optimizations:
205
206 The div2 function starts with checking the most significant bit of
207 the numerator. We can maintained normalized operands here, call
208 hgcd with normalized operands only, which should make the code
209 simpler and possibly faster.
210
211 Experiment with table lookups on the most significant bits.
212
213 This function is also a candidate for assembler implementation.
214 */
215 int
mpn_hgcd2(mp_limb_t ah,mp_limb_t al,mp_limb_t bh,mp_limb_t bl,struct hgcd_matrix1 * M)216 mpn_hgcd2 (mp_limb_t ah, mp_limb_t al, mp_limb_t bh, mp_limb_t bl,
217 struct hgcd_matrix1 *M)
218 {
219 mp_limb_t u00, u01, u10, u11;
220
221 if (ah < 2 || bh < 2)
222 return 0;
223
224 if (ah > bh || (ah == bh && al > bl))
225 {
226 sub_ddmmss (ah, al, ah, al, bh, bl);
227 if (ah < 2)
228 return 0;
229
230 u00 = u01 = u11 = 1;
231 u10 = 0;
232 }
233 else
234 {
235 sub_ddmmss (bh, bl, bh, bl, ah, al);
236 if (bh < 2)
237 return 0;
238
239 u00 = u10 = u11 = 1;
240 u01 = 0;
241 }
242
243 if (ah < bh)
244 goto subtract_a;
245
246 for (;;)
247 {
248 ASSERT (ah >= bh);
249 if (ah == bh)
250 goto done;
251
252 if (ah < (CNST_LIMB(1) << (GMP_LIMB_BITS / 2)))
253 {
254 ah = (ah << (GMP_LIMB_BITS / 2) ) + (al >> (GMP_LIMB_BITS / 2));
255 bh = (bh << (GMP_LIMB_BITS / 2) ) + (bl >> (GMP_LIMB_BITS / 2));
256
257 break;
258 }
259
260 /* Subtract a -= q b, and multiply M from the right by (1 q ; 0
261 1), affecting the second column of M. */
262 ASSERT (ah > bh);
263 sub_ddmmss (ah, al, ah, al, bh, bl);
264
265 if (ah < 2)
266 goto done;
267
268 if (ah <= bh)
269 {
270 /* Use q = 1 */
271 u01 += u00;
272 u11 += u10;
273 }
274 else
275 {
276 mp_limb_t r[2];
277 mp_limb_t q = div2 (r, ah, al, bh, bl);
278 al = r[0]; ah = r[1];
279 if (ah < 2)
280 {
281 /* A is too small, but q is correct. */
282 u01 += q * u00;
283 u11 += q * u10;
284 goto done;
285 }
286 q++;
287 u01 += q * u00;
288 u11 += q * u10;
289 }
290 subtract_a:
291 ASSERT (bh >= ah);
292 if (ah == bh)
293 goto done;
294
295 if (bh < (CNST_LIMB(1) << (GMP_LIMB_BITS / 2)))
296 {
297 ah = (ah << (GMP_LIMB_BITS / 2) ) + (al >> (GMP_LIMB_BITS / 2));
298 bh = (bh << (GMP_LIMB_BITS / 2) ) + (bl >> (GMP_LIMB_BITS / 2));
299
300 goto subtract_a1;
301 }
302
303 /* Subtract b -= q a, and multiply M from the right by (1 0 ; q
304 1), affecting the first column of M. */
305 sub_ddmmss (bh, bl, bh, bl, ah, al);
306
307 if (bh < 2)
308 goto done;
309
310 if (bh <= ah)
311 {
312 /* Use q = 1 */
313 u00 += u01;
314 u10 += u11;
315 }
316 else
317 {
318 mp_limb_t r[2];
319 mp_limb_t q = div2 (r, bh, bl, ah, al);
320 bl = r[0]; bh = r[1];
321 if (bh < 2)
322 {
323 /* B is too small, but q is correct. */
324 u00 += q * u01;
325 u10 += q * u11;
326 goto done;
327 }
328 q++;
329 u00 += q * u01;
330 u10 += q * u11;
331 }
332 }
333
334 /* NOTE: Since we discard the least significant half limb, we don't
335 get a truly maximal M (corresponding to |a - b| <
336 2^{GMP_LIMB_BITS +1}). */
337 /* Single precision loop */
338 for (;;)
339 {
340 ASSERT (ah >= bh);
341 if (ah == bh)
342 break;
343
344 ah -= bh;
345 if (ah < (CNST_LIMB (1) << (GMP_LIMB_BITS / 2 + 1)))
346 break;
347
348 if (ah <= bh)
349 {
350 /* Use q = 1 */
351 u01 += u00;
352 u11 += u10;
353 }
354 else
355 {
356 mp_limb_t r;
357 mp_limb_t q = div1 (&r, ah, bh);
358 ah = r;
359 if (ah < (CNST_LIMB(1) << (GMP_LIMB_BITS / 2 + 1)))
360 {
361 /* A is too small, but q is correct. */
362 u01 += q * u00;
363 u11 += q * u10;
364 break;
365 }
366 q++;
367 u01 += q * u00;
368 u11 += q * u10;
369 }
370 subtract_a1:
371 ASSERT (bh >= ah);
372 if (ah == bh)
373 break;
374
375 bh -= ah;
376 if (bh < (CNST_LIMB (1) << (GMP_LIMB_BITS / 2 + 1)))
377 break;
378
379 if (bh <= ah)
380 {
381 /* Use q = 1 */
382 u00 += u01;
383 u10 += u11;
384 }
385 else
386 {
387 mp_limb_t r;
388 mp_limb_t q = div1 (&r, bh, ah);
389 bh = r;
390 if (bh < (CNST_LIMB(1) << (GMP_LIMB_BITS / 2 + 1)))
391 {
392 /* B is too small, but q is correct. */
393 u00 += q * u01;
394 u10 += q * u11;
395 break;
396 }
397 q++;
398 u00 += q * u01;
399 u10 += q * u11;
400 }
401 }
402
403 done:
404 M->u[0][0] = u00; M->u[0][1] = u01;
405 M->u[1][0] = u10; M->u[1][1] = u11;
406
407 return 1;
408 }
409
410 /* Sets (r;b) = (a;b) M, with M = (u00, u01; u10, u11). Vector must
411 * have space for n + 1 limbs. Uses three buffers to avoid a copy*/
412 mp_size_t
mpn_hgcd_mul_matrix1_vector(const struct hgcd_matrix1 * M,mp_ptr rp,mp_srcptr ap,mp_ptr bp,mp_size_t n)413 mpn_hgcd_mul_matrix1_vector (const struct hgcd_matrix1 *M,
414 mp_ptr rp, mp_srcptr ap, mp_ptr bp, mp_size_t n)
415 {
416 mp_limb_t ah, bh;
417
418 /* Compute (r,b) <-- (u00 a + u10 b, u01 a + u11 b) as
419
420 r = u00 * a
421 r += u10 * b
422 b *= u11
423 b += u01 * a
424 */
425
426 #if HAVE_NATIVE_mpn_addaddmul_1msb0
427 ah = mpn_addaddmul_1msb0 (rp, ap, bp, n, M->u[0][0], M->u[1][0]);
428 bh = mpn_addaddmul_1msb0 (bp, bp, ap, n, M->u[1][1], M->u[0][1]);
429 #else
430 ah = mpn_mul_1 (rp, ap, n, M->u[0][0]);
431 ah += mpn_addmul_1 (rp, bp, n, M->u[1][0]);
432
433 bh = mpn_mul_1 (bp, bp, n, M->u[1][1]);
434 bh += mpn_addmul_1 (bp, ap, n, M->u[0][1]);
435 #endif
436 rp[n] = ah;
437 bp[n] = bh;
438
439 n += (ah | bh) > 0;
440 return n;
441 }
442
443 /* Sets (r;b) = M^{-1}(a;b), with M^{-1} = (u11, -u01; -u10, u00) from
444 the left. Uses three buffers, to avoid a copy. */
445 mp_size_t
mpn_hgcd_mul_matrix1_inverse_vector(const struct hgcd_matrix1 * M,mp_ptr rp,mp_srcptr ap,mp_ptr bp,mp_size_t n)446 mpn_hgcd_mul_matrix1_inverse_vector (const struct hgcd_matrix1 *M,
447 mp_ptr rp, mp_srcptr ap, mp_ptr bp, mp_size_t n)
448 {
449 mp_limb_t h0, h1;
450
451 /* Compute (r;b) <-- (u11 a - u01 b; -u10 a + u00 b) as
452
453 r = u11 * a
454 r -= u01 * b
455 b *= u00
456 b -= u10 * a
457 */
458
459 h0 = mpn_mul_1 (rp, ap, n, M->u[1][1]);
460 h1 = mpn_submul_1 (rp, bp, n, M->u[0][1]);
461 ASSERT (h0 == h1);
462
463 h0 = mpn_mul_1 (bp, bp, n, M->u[0][0]);
464 h1 = mpn_submul_1 (bp, ap, n, M->u[1][0]);
465 ASSERT (h0 == h1);
466
467 n -= (rp[n-1] | bp[n-1]) == 0;
468 return n;
469 }
470