1 /* hgcd2.c
2
3 THE FUNCTIONS IN THIS FILE ARE INTERNAL WITH MUTABLE INTERFACES. IT IS ONLY
4 SAFE TO REACH THEM THROUGH DOCUMENTED INTERFACES. IN FACT, IT IS ALMOST
5 GUARANTEED THAT THEY'LL CHANGE OR DISAPPEAR IN A FUTURE GNU MP RELEASE.
6
7 Copyright 1996, 1998, 2000-2004, 2008, 2012 Free Software Foundation, Inc.
8
9 This file is part of the GNU MP Library.
10
11 The GNU MP Library is free software; you can redistribute it and/or modify
12 it under the terms of either:
13
14 * the GNU Lesser General Public License as published by the Free
15 Software Foundation; either version 3 of the License, or (at your
16 option) any later version.
17
18 or
19
20 * the GNU General Public License as published by the Free Software
21 Foundation; either version 2 of the License, or (at your option) any
22 later version.
23
24 or both in parallel, as here.
25
26 The GNU MP Library is distributed in the hope that it will be useful, but
27 WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
28 or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
29 for more details.
30
31 You should have received copies of the GNU General Public License and the
32 GNU Lesser General Public License along with the GNU MP Library. If not,
33 see https://www.gnu.org/licenses/. */
34
35 #include "gmp.h"
36 #include "gmp-impl.h"
37 #include "longlong.h"
38
39 #if GMP_NAIL_BITS == 0
40
41 /* Copied from the old mpn/generic/gcdext.c, and modified slightly to return
42 the remainder. */
43
44 /* Single-limb division optimized for small quotients. */
45 static inline mp_limb_t
div1(mp_ptr rp,mp_limb_t n0,mp_limb_t d0)46 div1 (mp_ptr rp,
47 mp_limb_t n0,
48 mp_limb_t d0)
49 {
50 mp_limb_t q = 0;
51
52 if ((mp_limb_signed_t) n0 < 0)
53 {
54 int cnt;
55 for (cnt = 1; (mp_limb_signed_t) d0 >= 0; cnt++)
56 {
57 d0 = d0 << 1;
58 }
59
60 q = 0;
61 while (cnt)
62 {
63 q <<= 1;
64 if (n0 >= d0)
65 {
66 n0 = n0 - d0;
67 q |= 1;
68 }
69 d0 = d0 >> 1;
70 cnt--;
71 }
72 }
73 else
74 {
75 int cnt;
76 for (cnt = 0; n0 >= d0; cnt++)
77 {
78 d0 = d0 << 1;
79 }
80
81 q = 0;
82 while (cnt)
83 {
84 d0 = d0 >> 1;
85 q <<= 1;
86 if (n0 >= d0)
87 {
88 n0 = n0 - d0;
89 q |= 1;
90 }
91 cnt--;
92 }
93 }
94 *rp = n0;
95 return q;
96 }
97
98 /* Two-limb division optimized for small quotients. */
99 static inline mp_limb_t
div2(mp_ptr rp,mp_limb_t nh,mp_limb_t nl,mp_limb_t dh,mp_limb_t dl)100 div2 (mp_ptr rp,
101 mp_limb_t nh, mp_limb_t nl,
102 mp_limb_t dh, mp_limb_t dl)
103 {
104 mp_limb_t q = 0;
105
106 if ((mp_limb_signed_t) nh < 0)
107 {
108 int cnt;
109 for (cnt = 1; (mp_limb_signed_t) dh >= 0; cnt++)
110 {
111 dh = (dh << 1) | (dl >> (GMP_LIMB_BITS - 1));
112 dl = dl << 1;
113 }
114
115 while (cnt)
116 {
117 q <<= 1;
118 if (nh > dh || (nh == dh && nl >= dl))
119 {
120 sub_ddmmss (nh, nl, nh, nl, dh, dl);
121 q |= 1;
122 }
123 dl = (dh << (GMP_LIMB_BITS - 1)) | (dl >> 1);
124 dh = dh >> 1;
125 cnt--;
126 }
127 }
128 else
129 {
130 int cnt;
131 for (cnt = 0; nh > dh || (nh == dh && nl >= dl); cnt++)
132 {
133 dh = (dh << 1) | (dl >> (GMP_LIMB_BITS - 1));
134 dl = dl << 1;
135 }
136
137 while (cnt)
138 {
139 dl = (dh << (GMP_LIMB_BITS - 1)) | (dl >> 1);
140 dh = dh >> 1;
141 q <<= 1;
142 if (nh > dh || (nh == dh && nl >= dl))
143 {
144 sub_ddmmss (nh, nl, nh, nl, dh, dl);
145 q |= 1;
146 }
147 cnt--;
148 }
149 }
150
151 rp[0] = nl;
152 rp[1] = nh;
153
154 return q;
155 }
156
157 #if 0
158 /* This div2 uses less branches, but it seems to nevertheless be
159 slightly slower than the above code. */
160 static inline mp_limb_t
161 div2 (mp_ptr rp,
162 mp_limb_t nh, mp_limb_t nl,
163 mp_limb_t dh, mp_limb_t dl)
164 {
165 mp_limb_t q = 0;
166 int ncnt;
167 int dcnt;
168
169 count_leading_zeros (ncnt, nh);
170 count_leading_zeros (dcnt, dh);
171 dcnt -= ncnt;
172
173 dh = (dh << dcnt) + (-(dcnt > 0) & (dl >> (GMP_LIMB_BITS - dcnt)));
174 dl <<= dcnt;
175
176 do
177 {
178 mp_limb_t bit;
179 q <<= 1;
180 if (UNLIKELY (nh == dh))
181 bit = (nl >= dl);
182 else
183 bit = (nh > dh);
184
185 q |= bit;
186
187 sub_ddmmss (nh, nl, nh, nl, (-bit) & dh, (-bit) & dl);
188
189 dl = (dh << (GMP_LIMB_BITS - 1)) | (dl >> 1);
190 dh = dh >> 1;
191 }
192 while (dcnt--);
193
194 rp[0] = nl;
195 rp[1] = nh;
196
197 return q;
198 }
199 #endif
200
201 #else /* GMP_NAIL_BITS != 0 */
202 /* Check all functions for nail support. */
203 /* hgcd2 should be defined to take inputs including nail bits, and
204 produce a matrix with elements also including nail bits. This is
205 necessary, for the matrix elements to be useful with mpn_mul_1,
206 mpn_addmul_1 and friends. */
207 #error Not implemented
208 #endif /* GMP_NAIL_BITS != 0 */
209
210 /* Reduces a,b until |a-b| (almost) fits in one limb + 1 bit. Constructs
211 matrix M. Returns 1 if we make progress, i.e. can perform at least
212 one subtraction. Otherwise returns zero. */
213
214 /* FIXME: Possible optimizations:
215
216 The div2 function starts with checking the most significant bit of
217 the numerator. We can maintained normalized operands here, call
218 hgcd with normalized operands only, which should make the code
219 simpler and possibly faster.
220
221 Experiment with table lookups on the most significant bits.
222
223 This function is also a candidate for assembler implementation.
224 */
225 int
mpn_hgcd2(mp_limb_t ah,mp_limb_t al,mp_limb_t bh,mp_limb_t bl,struct hgcd_matrix1 * M)226 mpn_hgcd2 (mp_limb_t ah, mp_limb_t al, mp_limb_t bh, mp_limb_t bl,
227 struct hgcd_matrix1 *M)
228 {
229 mp_limb_t u00, u01, u10, u11;
230
231 if (ah < 2 || bh < 2)
232 return 0;
233
234 if (ah > bh || (ah == bh && al > bl))
235 {
236 sub_ddmmss (ah, al, ah, al, bh, bl);
237 if (ah < 2)
238 return 0;
239
240 u00 = u01 = u11 = 1;
241 u10 = 0;
242 }
243 else
244 {
245 sub_ddmmss (bh, bl, bh, bl, ah, al);
246 if (bh < 2)
247 return 0;
248
249 u00 = u10 = u11 = 1;
250 u01 = 0;
251 }
252
253 if (ah < bh)
254 goto subtract_a;
255
256 for (;;)
257 {
258 ASSERT (ah >= bh);
259 if (ah == bh)
260 goto done;
261
262 if (ah < (CNST_LIMB(1) << (GMP_LIMB_BITS / 2)))
263 {
264 ah = (ah << (GMP_LIMB_BITS / 2) ) + (al >> (GMP_LIMB_BITS / 2));
265 bh = (bh << (GMP_LIMB_BITS / 2) ) + (bl >> (GMP_LIMB_BITS / 2));
266
267 break;
268 }
269
270 /* Subtract a -= q b, and multiply M from the right by (1 q ; 0
271 1), affecting the second column of M. */
272 ASSERT (ah > bh);
273 sub_ddmmss (ah, al, ah, al, bh, bl);
274
275 if (ah < 2)
276 goto done;
277
278 if (ah <= bh)
279 {
280 /* Use q = 1 */
281 u01 += u00;
282 u11 += u10;
283 }
284 else
285 {
286 mp_limb_t r[2];
287 mp_limb_t q = div2 (r, ah, al, bh, bl);
288 al = r[0]; ah = r[1];
289 if (ah < 2)
290 {
291 /* A is too small, but q is correct. */
292 u01 += q * u00;
293 u11 += q * u10;
294 goto done;
295 }
296 q++;
297 u01 += q * u00;
298 u11 += q * u10;
299 }
300 subtract_a:
301 ASSERT (bh >= ah);
302 if (ah == bh)
303 goto done;
304
305 if (bh < (CNST_LIMB(1) << (GMP_LIMB_BITS / 2)))
306 {
307 ah = (ah << (GMP_LIMB_BITS / 2) ) + (al >> (GMP_LIMB_BITS / 2));
308 bh = (bh << (GMP_LIMB_BITS / 2) ) + (bl >> (GMP_LIMB_BITS / 2));
309
310 goto subtract_a1;
311 }
312
313 /* Subtract b -= q a, and multiply M from the right by (1 0 ; q
314 1), affecting the first column of M. */
315 sub_ddmmss (bh, bl, bh, bl, ah, al);
316
317 if (bh < 2)
318 goto done;
319
320 if (bh <= ah)
321 {
322 /* Use q = 1 */
323 u00 += u01;
324 u10 += u11;
325 }
326 else
327 {
328 mp_limb_t r[2];
329 mp_limb_t q = div2 (r, bh, bl, ah, al);
330 bl = r[0]; bh = r[1];
331 if (bh < 2)
332 {
333 /* B is too small, but q is correct. */
334 u00 += q * u01;
335 u10 += q * u11;
336 goto done;
337 }
338 q++;
339 u00 += q * u01;
340 u10 += q * u11;
341 }
342 }
343
344 /* NOTE: Since we discard the least significant half limb, we don't
345 get a truly maximal M (corresponding to |a - b| <
346 2^{GMP_LIMB_BITS +1}). */
347 /* Single precision loop */
348 for (;;)
349 {
350 ASSERT (ah >= bh);
351
352 ah -= bh;
353 if (ah < (CNST_LIMB (1) << (GMP_LIMB_BITS / 2 + 1)))
354 break;
355
356 if (ah <= bh)
357 {
358 /* Use q = 1 */
359 u01 += u00;
360 u11 += u10;
361 }
362 else
363 {
364 mp_limb_t r;
365 mp_limb_t q = div1 (&r, ah, bh);
366 ah = r;
367 if (ah < (CNST_LIMB(1) << (GMP_LIMB_BITS / 2 + 1)))
368 {
369 /* A is too small, but q is correct. */
370 u01 += q * u00;
371 u11 += q * u10;
372 break;
373 }
374 q++;
375 u01 += q * u00;
376 u11 += q * u10;
377 }
378 subtract_a1:
379 ASSERT (bh >= ah);
380
381 bh -= ah;
382 if (bh < (CNST_LIMB (1) << (GMP_LIMB_BITS / 2 + 1)))
383 break;
384
385 if (bh <= ah)
386 {
387 /* Use q = 1 */
388 u00 += u01;
389 u10 += u11;
390 }
391 else
392 {
393 mp_limb_t r;
394 mp_limb_t q = div1 (&r, bh, ah);
395 bh = r;
396 if (bh < (CNST_LIMB(1) << (GMP_LIMB_BITS / 2 + 1)))
397 {
398 /* B is too small, but q is correct. */
399 u00 += q * u01;
400 u10 += q * u11;
401 break;
402 }
403 q++;
404 u00 += q * u01;
405 u10 += q * u11;
406 }
407 }
408
409 done:
410 M->u[0][0] = u00; M->u[0][1] = u01;
411 M->u[1][0] = u10; M->u[1][1] = u11;
412
413 return 1;
414 }
415
416 /* Sets (r;b) = (a;b) M, with M = (u00, u01; u10, u11). Vector must
417 * have space for n + 1 limbs. Uses three buffers to avoid a copy*/
418 mp_size_t
mpn_hgcd_mul_matrix1_vector(const struct hgcd_matrix1 * M,mp_ptr rp,mp_srcptr ap,mp_ptr bp,mp_size_t n)419 mpn_hgcd_mul_matrix1_vector (const struct hgcd_matrix1 *M,
420 mp_ptr rp, mp_srcptr ap, mp_ptr bp, mp_size_t n)
421 {
422 mp_limb_t ah, bh;
423
424 /* Compute (r,b) <-- (u00 a + u10 b, u01 a + u11 b) as
425
426 r = u00 * a
427 r += u10 * b
428 b *= u11
429 b += u01 * a
430 */
431
432 #if HAVE_NATIVE_mpn_addaddmul_1msb0
433 ah = mpn_addaddmul_1msb0 (rp, ap, bp, n, M->u[0][0], M->u[1][0]);
434 bh = mpn_addaddmul_1msb0 (bp, bp, ap, n, M->u[1][1], M->u[0][1]);
435 #else
436 ah = mpn_mul_1 (rp, ap, n, M->u[0][0]);
437 ah += mpn_addmul_1 (rp, bp, n, M->u[1][0]);
438
439 bh = mpn_mul_1 (bp, bp, n, M->u[1][1]);
440 bh += mpn_addmul_1 (bp, ap, n, M->u[0][1]);
441 #endif
442 rp[n] = ah;
443 bp[n] = bh;
444
445 n += (ah | bh) > 0;
446 return n;
447 }
448