1 /* Median/middle product.
2
3 Copyright 2003, 2004, 2005, 2006, 2007, 2008 Laurent Fousse, Paul Zimmermann,
4 Alexander Kruppa, Dave Newman.
5
6 This file is part of the ECM Library.
7
8 The ECM Library is free software; you can redistribute it and/or modify
9 it under the terms of the GNU Lesser General Public License as published by
10 the Free Software Foundation; either version 3 of the License, or (at your
11 option) any later version.
12
13 The ECM Library is distributed in the hope that it will be useful, but
14 WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
15 or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public
16 License for more details.
17
18 You should have received a copy of the GNU Lesser General Public License
19 along with the ECM Library; see the file COPYING.LIB. If not, see
20 http://www.gnu.org/licenses/ or write to the Free Software Foundation, Inc.,
21 51 Franklin St, Fifth Floor, Boston, MA 02110-1301, USA. */
22
23 /* Reference:
24 [1] Tellegen's Principle into Practice, by A. Bostan, G. Lecerf and E. Schost,
25 Proc. of ISSAC'03, Philadelphia, 2003.
26 */
27
28 #include <stdio.h>
29 #include "ecm-impl.h"
30
31 #ifndef MAX
32 #define MAX(a,b) (((a) > (b)) ? (a) : (b))
33 #endif
34
35 #ifndef MIN
36 #define MIN(a,b) (((a) < (b)) ? (a) : (b))
37 #endif
38
39 extern unsigned int Fermat;
40
41 static void list_add_wrapper (listz_t, listz_t, listz_t, unsigned int,
42 unsigned int);
43 static void list_sub_wrapper (listz_t, listz_t, listz_t, unsigned int,
44 unsigned int);
45 static unsigned int TKarMul (listz_t, unsigned int, listz_t, unsigned int,
46 listz_t, unsigned int, listz_t);
47 static void list_sub_safe (listz_t, listz_t, listz_t, unsigned int,
48 unsigned int, unsigned int);
49 static void list_add_safe (listz_t, listz_t, listz_t, unsigned int,
50 unsigned int, unsigned int);
51 static unsigned int TToomCookMul (listz_t, unsigned int, listz_t, unsigned int,
52 listz_t, unsigned int, listz_t);
53 static unsigned int TToomCookMul_space (unsigned int, unsigned int,
54 unsigned int);
55
56 static void
list_add_wrapper(listz_t p,listz_t q,listz_t r,unsigned int n,unsigned int max_r)57 list_add_wrapper (listz_t p, listz_t q, listz_t r, unsigned int n,
58 unsigned int max_r)
59 {
60 list_add (p, q, r, MIN (n, max_r));
61 if (n > max_r)
62 list_set (p + max_r, q + max_r, n - max_r);
63 }
64
65 static void
list_sub_wrapper(listz_t p,listz_t q,listz_t r,unsigned int n,unsigned int max_r)66 list_sub_wrapper (listz_t p, listz_t q, listz_t r, unsigned int n,
67 unsigned int max_r)
68 {
69 list_sub (p, q, r, MIN (n, max_r));
70 if (n > max_r)
71 list_set (p + max_r, q + max_r, n - max_r);
72 }
73
74 /* Given a[0..m] and c[0..l], puts in b[0..n] the coefficients
75 of degree m to n+m of rev(a)*c, i.e.
76 b[0] = a[0]*c[0] + ... + a[i]*c[i] with i = min(m, l)
77 ...
78 b[k] = a[0]*c[k] + ... + a[i]*c[i+k] with i = min(m, l-k)
79 ...
80 b[n] = a[0]*c[n] + ... + a[i]*c[i+n] with i = min(m, l-n) [=l-n].
81 Using auxiliary memory in t.
82 Implements algorithm TKarMul of [1].
83 Assumes deg(c) = l <= m+n.
84 */
85
86 static unsigned int
TKarMul(listz_t b,unsigned int n,listz_t a,unsigned int m,listz_t c,unsigned int l,listz_t t)87 TKarMul (listz_t b, unsigned int n,
88 listz_t a, unsigned int m, listz_t c, unsigned int l, listz_t t)
89 {
90 unsigned int k, mu, nu, h;
91 unsigned int s1;
92 unsigned tot_muls = 0;
93 #ifdef DEBUG
94 fprintf (ECM_STDOUT, "Enter TKarMul.\nm = %d\nn = %d\nl = %d\n", m, n, l);
95 fprintf (ECM_STDOUT, "a = ");
96 print_list (a, m + 1);
97 fprintf (ECM_STDOUT, "\nc = ");
98 print_list (c, l + 1);
99 fprintf (ECM_STDOUT, "\n");
100 #endif
101
102
103 if (n == 0)
104 {
105 #ifdef DEBUG
106 fprintf (ECM_STDOUT, "Case n = 0.\n");
107 #endif
108 mpz_mul (b[0], a[0], c[0]);
109 for (k = 1; (k <= m) && (k <= l); k++)
110 mpz_addmul (b[0], a[k], c[k]);
111 #ifdef DEBUG
112 fprintf (ECM_STDOUT, "Exit TKarMul.\n");
113 #endif
114 return MIN (m, l) + 1;
115 }
116
117 if (m == 0)
118 {
119 #ifdef DEBUG
120 fprintf (ECM_STDOUT, "Case m = 0.\n");
121 #endif
122 for (k = 0; (k <= l) && (k <= n); k++)
123 mpz_mul (b[k], a[0], c[k]);
124 for (k = l + 1; k <= n; k++)
125 mpz_set_ui (b[k], 0);
126 #ifdef DEBUG
127 fprintf (ECM_STDOUT, "Exit TKarMul.\n");
128 #endif
129 return MIN (n, l) + 1;
130 }
131
132 mu = (m / 2) + 1; /* 1 <= mu <= m */
133 nu = (n / 2) + 1; /* 1 <= nu <= n */
134 h = MAX (mu, nu); /* h >= 1 */
135
136 #ifdef DEBUG
137 fprintf (ECM_STDOUT, "mu = %d\nnu = %d\nh = %d\n", mu, nu, h);
138 #endif
139
140 if (mu > n)
141 {
142 #ifdef DEBUG
143 fprintf (ECM_STDOUT, "Case mu > n.\n");
144 #endif
145
146 tot_muls += TKarMul (b, n, a, mu - 1, c, l, t);
147 if (l >= mu)
148 {
149 /* we have to check l-mu <= n + (m-mu), i.e. l <= n+m */
150 tot_muls += TKarMul (t, n, a + mu, m - mu, c + mu, l - mu, t + n + 1);
151 list_add (b, b, t, n + 1);
152 }
153 #ifdef DEBUG
154 fprintf (ECM_STDOUT, "Exit TKarMul.\n");
155 #endif
156 return tot_muls;
157 }
158
159 if (nu > m)
160 {
161 #ifdef DEBUG
162 fprintf (ECM_STDOUT, "Case nu > m.\n");
163 #endif
164
165 /* we have to check MIN(l,m+nu-1) <= nu-1+m: trivial */
166 tot_muls += TKarMul (b, nu - 1, a, m, c, MIN (l, m + nu - 1), t);
167
168 /* Description broken in reference. Should be a list
169 * concatenation, not an addition.
170 * Fixed now.
171 */
172
173 if (l >= nu)
174 {
175 /* we have to check l-nu <= n-nu+m, i.e. l <= n+m: trivial */
176 tot_muls += TKarMul (b + nu, n - nu, a, m, c + nu, l - nu, t);
177 }
178 else
179 list_zero (b + nu, n - nu + 1);
180 #ifdef DEBUG
181 fprintf (ECM_STDOUT, "Exit TKarMul.\n");
182 #endif
183 return tot_muls;
184 }
185
186 /* We want nu = mu */
187
188 mu = nu = h;
189
190 #ifdef DEBUG
191 fprintf (ECM_STDOUT, "Base Case.\n");
192 #endif
193
194 s1 = MIN (l + 1, n + mu);
195 if (l + 1 > nu)
196 list_sub_wrapper (t, c, c + nu, s1, l - nu + 1);
197 else
198 list_set (t, c, s1);
199 #ifdef DEBUG
200 fprintf (ECM_STDOUT, "DEBUG c - c[nu].\n");
201 print_list (t, s1);
202 fprintf (ECM_STDOUT, "We compute (1) - (3)\n");
203 #endif
204 tot_muls += TKarMul (b, nu - 1, a, mu - 1, t, s1 - 1, t + s1);
205 /* (1) - (3) */
206 #ifdef DEBUG
207 print_list (b, nu);
208 fprintf (ECM_STDOUT, "We compute (2) - (4)\n");
209 #endif
210 if (s1 >= nu + 1) { /* nu - 1 */
211 tot_muls += TKarMul (b + nu, n - nu, a + mu, m - mu,
212 t + nu, s1 - nu - 1, t + s1);
213 /* (2) - (4) */
214 }
215 else {
216 list_zero (b + nu, n - nu + 1);
217 }
218 #ifdef DEBUG
219 print_list (b + nu, n - nu + 1);
220 #endif
221 list_add_wrapper (t, a, a + mu, mu, m + 1 - mu);
222 #ifdef DEBUG
223 fprintf (ECM_STDOUT, "We compute (2) + (3)\n");
224 #endif
225 if (l >= nu) {
226 tot_muls += TKarMul (t + mu, nu - 1, t, mu - 1, c + nu, l - nu,
227 t + mu + nu);
228 }
229 else
230 list_zero (t + mu, nu);
231 /* (2) + (3) */
232 #ifdef DEBUG
233 print_list (t + mu, nu);
234 #endif
235 list_add (b, b, t + mu, nu);
236 list_sub (b + nu, t + mu, b + nu, n - nu + 1);
237 return tot_muls;
238 }
239
240 /* Computes the space needed for TKarMul of b[0..n],
241 * a[0..m] and c[0..l]
242 */
243
244 static unsigned int
TKarMul_space(unsigned int n,unsigned int m,unsigned int l)245 TKarMul_space (unsigned int n, unsigned int m, unsigned int l)
246 {
247 unsigned int mu, nu, h;
248 unsigned int s1;
249
250 unsigned int r1, r2;
251
252 if (n == 0)
253 return 0;
254
255 if (m == 0)
256 return 0;
257
258 mu = (m / 2) + 1;
259 nu = (n / 2) + 1;
260 h = MAX (mu, nu);
261
262 if (mu > n)
263 {
264 r1 = TKarMul_space (n, mu - 1, l);
265 if (l >= mu)
266 {
267 r2 = TKarMul_space (n, m - mu, l - mu) + n + 1;
268 r1 = MAX (r1, r2);
269 }
270 return r1;
271 }
272
273 if (nu > m)
274 {
275 r1 = TKarMul_space (nu - 1, m, MIN (l, m + nu - 1));
276
277 if (l >= nu)
278 {
279 r2 = TKarMul_space (n - nu, m,l - nu);
280 r1 = MAX (r1, r2);
281 }
282 return r1;
283 }
284
285 mu = nu = h;
286
287 s1 = MIN (l + 1, n + mu);
288 r1 = TKarMul_space (nu - 1, mu - 1, s1 - 1) + s1;
289 if (s1 >= nu + 1) {
290 r2 = TKarMul_space (n - nu, m - mu, s1 - nu - 1) + s1;
291 r1 = MAX (r1, r2);
292 }
293 if (l >= nu) {
294 r2 = TKarMul_space (nu - 1, mu - 1, l - nu) + mu + nu;
295 r1 = MAX (r1, r2);
296 }
297 return r1;
298 }
299
300 /* list_sub with bound checking
301 */
302
303 static void
list_sub_safe(listz_t ret,listz_t a,listz_t b,unsigned int sizea,unsigned int sizeb,unsigned int needed)304 list_sub_safe (listz_t ret, listz_t a, listz_t b,
305 unsigned int sizea, unsigned int sizeb,
306 unsigned int needed)
307 {
308 unsigned int i;
309 unsigned int safe;
310 safe = MIN(sizea, sizeb);
311 safe = MIN(safe, needed);
312
313 list_sub (ret, a, b, safe);
314
315 i = safe;
316 while (i < needed)
317 {
318 if (i < sizea)
319 {
320 if (i < sizeb)
321 mpz_sub (ret[i], a[i], b[i]);
322 else
323 mpz_set (ret[i], a[i]);
324 }
325 else
326 {
327 if (i < sizeb)
328 mpz_neg (ret[i], b[i]);
329 else
330 mpz_set_ui (ret[i], 0);
331 }
332 i++;
333 }
334 }
335
336 /* list_add with bound checking
337 */
338
339 static void
list_add_safe(listz_t ret,listz_t a,listz_t b,unsigned int sizea,unsigned int sizeb,unsigned int needed)340 list_add_safe (listz_t ret, listz_t a, listz_t b,
341 unsigned int sizea, unsigned int sizeb,
342 unsigned int needed)
343 {
344 unsigned int i;
345 unsigned int safe;
346 safe = MIN(sizea, sizeb);
347 safe = MIN(safe, needed);
348
349 list_add (ret, a, b, i = safe);
350
351 while (i < needed)
352 {
353 if (i < sizea)
354 {
355 if (i < sizeb)
356 mpz_add (ret[i], a[i], b[i]);
357 else
358 mpz_set (ret[i], a[i]);
359 }
360 else
361 {
362 if (i < sizeb)
363 mpz_set (ret[i], b[i]);
364 else
365 mpz_set_ui (ret[i], 0);
366 }
367 i++;
368 }
369 }
370
371 static unsigned int
TToomCookMul(listz_t b,unsigned int n,listz_t a,unsigned int m,listz_t c,unsigned int l,listz_t tmp)372 TToomCookMul (listz_t b, unsigned int n,
373 listz_t a, unsigned int m, listz_t c, unsigned int l,
374 listz_t tmp)
375 {
376 unsigned int nu, mu, h;
377 unsigned int i;
378 unsigned int btmp;
379 unsigned int tot_muls = 0;
380
381 nu = n / 3 + 1;
382 mu = m / 3 + 1;
383
384 /* ensures n + 1 > 2 * nu */
385 if ((n < 2 * nu) || (m < 2 * mu))
386 {
387 #ifdef DEBUG
388 fprintf (ECM_STDOUT, "Too small operands, calling TKara.\n");
389 #endif
390 return TKarMul (b, n, a, m, c, l, tmp);
391 }
392
393 /* First strip unnecessary trailing coefficients of c:
394 */
395
396 l = MIN(l, n + m);
397
398 /* Now the degenerate cases. We want 2 * nu <= m.
399 *
400 */
401
402 if (m < 2 * nu)
403 {
404 #ifdef DEBUG
405 fprintf (ECM_STDOUT, "Degenerate Case 1.\n");
406 #endif
407 tot_muls += TToomCookMul (b, nu - 1, a, m, c, l, tmp);
408 if (l >= nu)
409 tot_muls += TToomCookMul (b + nu, nu - 1, a, m,
410 c + nu, l - nu, tmp);
411 else
412 list_zero (b + nu, nu);
413 if (l >= 2 * nu) /* n >= 2 * nu is assured. Hopefully */
414 tot_muls += TToomCookMul (b + 2 * nu, n - 2 * nu, a, m,
415 c + 2 * nu, l - 2 * nu, tmp);
416 else
417 list_zero (b + 2 * nu, n - 2 * nu + 1);
418 return tot_muls;
419 }
420
421 /* Second degenerate case. We want 2 * mu <= n.
422 */
423
424 if (n < 2 * mu)
425 {
426 #ifdef DEBUG
427 fprintf (ECM_STDOUT, "Degenerate Case 2.\n");
428 #endif
429 tot_muls += TToomCookMul (b, n, a, mu - 1, c, l, tmp);
430 if (l >= mu)
431 {
432 tot_muls += TToomCookMul (tmp, n, a + mu, mu - 1,
433 c + mu, l - mu, tmp + n + 1);
434 list_add (b, b, tmp, n + 1);
435 }
436 if (l >= 2 * mu)
437 {
438 tot_muls += TToomCookMul (tmp, n, a + 2 * mu, m - 2 * mu,
439 c + 2 * mu, l - 2 * mu, tmp + n + 1);
440 list_add (b, b, tmp, n + 1);
441 }
442 return tot_muls;
443 }
444
445 #ifdef DEBUG
446 fprintf (ECM_STDOUT, "Base Case.\n");
447 fprintf (ECM_STDOUT, "a = ");
448 print_list (a, m + 1);
449
450 fprintf (ECM_STDOUT, "\nc = ");
451 print_list (c, l + 1);
452 #endif
453 h = MAX(nu, mu);
454 nu = mu = h;
455
456 list_sub_safe (tmp, c + 3 * h, c + h,
457 (l + 1 > 3 * h ? l + 1 - 3 * h : 0),
458 (l + 1 > h ? l + 1 - h : 0), 2 * h - 1);
459 list_sub_safe (tmp + 2 * h - 1, c, c + 2 * h,
460 l + 1, (l + 1 > 2 * h ? l + 1 - 2 * h : 0),
461 2 * h - 1);
462 for (i = 0; i < 2 * h - 1; i++)
463 mpz_mul_2exp (tmp[2 * h - 1 + i], tmp[2 * h - 1 + i], 1);
464
465 #ifdef DEBUG
466 print_list (tmp, 4 * h - 2);
467 #endif
468
469 /* --------------------------------
470 * | 0 .. 2*h-2 | 2*h-1 .. 4*h-3 |
471 * --------------------------------
472 * | c3 - c1 | 2(c0 - c2) |
473 * --------------------------------
474 */
475
476 list_add (tmp + 2 * h - 1, tmp + 2 * h - 1, tmp, 2 * h - 1);
477
478 tot_muls += TToomCookMul (b, h - 1, a, h - 1, tmp + 2 * h - 1,
479 2 * h - 2, tmp + 4 * h - 2);
480
481 /* b[0 .. h - 1] = 2 * m0 */
482
483 #ifdef DEBUG
484 fprintf (ECM_STDOUT, "2 * m0 = ");
485 print_list (b, h);
486 #endif
487
488 list_add (tmp + 2 * h - 1, a, a + h, h);
489
490 list_add (tmp + 2 * h - 1, tmp + 2 * h - 1, a + 2 * h,
491 MIN(h, m + 1 - 2 * h));
492
493 /* tmp[2*h-1 .. 3*h-2] = a0 + a1 + a2 */
494
495 #ifdef DEBUG
496 fprintf (ECM_STDOUT, "\na0 + a1 + a2 = ");
497 print_list (tmp + 2 * h - 1, h);
498 #endif
499
500 list_sub_safe (tmp + 3 * h - 1, c + 2 * h, c + 3 * h,
501 (l + 1 > 2 * h ? l + 1 - 2 * h : 0),
502 (l + 1 > 3 * h ? l + 1 - 3 * h : 0),
503 2 * h - 1);
504
505 /* -------------------------------------------------
506 * | 0 .. 2*h-2 | 2*h-1 .. 3*h-2 | 3*h-1 .. 5*h-3 |
507 * -------------------------------------------------
508 * | c3 - c1 | a0 + a1 + a2 | c2 - c3 |
509 * -------------------------------------------------
510 */
511
512 btmp = (l + 1 > h ? l + 1 - h : 0);
513 btmp = MIN(btmp, 2 * h - 1);
514 for (i = 0; i < btmp; i++)
515 {
516 mpz_mul_2exp (tmp[5 * h - 2 + i], c[h + i], 1);
517 mpz_add (tmp[5 * h - 2 + i], tmp[5 * h - 2 + i], tmp[3 * h - 1 + i]);
518 }
519 while (i < 2 * h - 1)
520 {
521 mpz_set (tmp[5 * h - 2 + i], tmp[3 * h - 1 + i]);
522 i++;
523 }
524
525 tot_muls += TToomCookMul (b + h, h - 1, tmp + 2 * h - 1, h - 1,
526 tmp + 5 * h - 2, 2 * h - 2,
527 tmp + 7 * h - 3);
528
529 /* b[h .. 2 * h - 1] = 2 * m1 */
530 #ifdef DEBUG
531 fprintf (ECM_STDOUT, "\n2 * m1 = ");
532 print_list (b + h, h);
533 #endif
534
535 /* ------------------------------------------------------------------
536 * | 0 .. 2*h-2 | 2*h-1 .. 3*h-2 | 3*h-1 .. 5*h-3 | 5*h-2 .. 7*h-4 |
537 * ------------------------------------------------------------------
538 * | c3 - c1 | a0 + a1 + a2 | c2 - c3 | c2 - c3 + 2c1 |
539 * ------------------------------------------------------------------
540 */
541
542
543 for (i = 0; i < h; i++)
544 {
545 mpz_add (tmp[2 * h - 1 + i], tmp[2 * h - 1 + i], a[i + h]);
546 if (2 * h + i <= m)
547 mpz_addmul_ui (tmp[2 * h - 1 + i], a[2 * h + i], 3);
548 }
549 tot_muls += TToomCookMul (tmp + 5 * h - 2, h - 1,
550 tmp + 2 * h - 1, h - 1,
551 tmp, 2 * h - 2, tmp + 6 * h - 2);
552
553 /* tmp[5*h-2 .. 6*h - 3] = 6 * m2 */
554
555 #ifdef DEBUG
556 fprintf (ECM_STDOUT, "\n6 * m2 = ");
557 print_list (tmp + 5 * h - 2, h);
558 #endif
559 for (i = 0; i < h; i++)
560 {
561 mpz_sub (tmp[2 * h - 1 + i], a[i], a[h + i]);
562 if (i + 2 * h <= m)
563 mpz_add (tmp[2 * h - 1 + i], tmp[2 * h - 1 + i], a[2 * h + i]);
564 }
565
566 for (i = 0; i < 2 * h - 1; i++)
567 {
568 mpz_mul_ui (tmp[3 * h - 1 + i], tmp[3 * h - 1 + i], 3);
569 mpz_mul_2exp (tmp[i], tmp[i], 1);
570 }
571
572 list_add (tmp + 3 * h - 1, tmp + 3 * h - 1, tmp, 2 * h - 1);
573
574 tot_muls += TToomCookMul (tmp + 6 * h - 2, h - 1,
575 tmp + 2 * h - 1, h - 1,
576 tmp + 3 * h - 1, 2 * h - 2,
577 tmp + 7 * h - 2);
578
579 /* tmp[6h-2 .. 7h - 3] = 6 * mm1 */
580
581 #ifdef DEBUG
582 fprintf (ECM_STDOUT, "\n6 * mm1 = ");
583 print_list (tmp + 6 * h - 2, h);
584 #endif
585 list_add_safe (tmp, tmp, c + 2 * h,
586 2 * h,
587 (l + 1 > 2 * h ? l + 1 - 2 * h : 0),
588 2 * h - 1);
589
590 list_sub_safe (tmp, c + 4 * h, tmp,
591 (l + 1 > 4 * h ? l + 1 - 4 * h : 0),
592 2 * h - 1, 2 * h - 1);
593
594 tot_muls += TToomCookMul (b + 2 * h, n - 2 * h, a + 2 * h, m - 2 * h,
595 tmp, 2 * h - 1, tmp + 7 * h - 2);
596
597 /* b[2 * h .. n] = minf */
598
599 #ifdef DEBUG
600 fprintf (ECM_STDOUT, "\nminf = ");
601 print_list (b + 2 * h, n + 1 - 2 * h);
602 #endif
603
604 /* Layout of b :
605 * ---------------------------------------
606 * | 0 ... h-1 | h ... 2*h-1 | 2*h ... n |
607 * ---------------------------------------
608 * | 2 * m0 | 2 * m1 | minf |
609 * ---------------------------------------
610 *
611 * Layout of tmp :
612 * ---------------------------------------------------
613 * | 0 ... 5*h-1 | 5*h-2 ... 6*h-3 | 6*h-2 ... 7*h-3 |
614 * ---------------------------------------------------
615 * | ?????? | 6 * m2 | 6 * mm1 |
616 * ---------------------------------------------------
617 */
618
619 list_add (tmp, tmp + 5 * h - 2, tmp + 6 * h - 2, h);
620 for (i = 0; i < h; i++)
621 mpz_divby3_1op (tmp[i]);
622
623 /* t1 = 2 (m2 + mm1)
624 * tmp[0 .. h - 1] = t1
625 */
626
627 list_add (b, b, b + h, h);
628 list_add (b, b, tmp, h);
629 for (i = 0; i < h; i++)
630 mpz_tdiv_q_2exp (b[i], b[i], 1);
631
632 /* b_{low} should be correct */
633
634 list_add (tmp + h, b + h, tmp, h);
635
636 /* t2 = t1 + 2 m1
637 * tmp[h .. 2h - 1] = t2
638 */
639
640 list_add (b + h, tmp, tmp + h, h);
641 list_sub (b + h, b + h, tmp + 6 * h - 2, h);
642 for (i = 0; i < h; i++)
643 mpz_tdiv_q_2exp (b[h + i], b[h + i], 1);
644
645 /* b_{mid} should be correct */
646
647 list_add (tmp + h, tmp + h, tmp + 5 * h - 2, n + 1 - 2 * h);
648 for (i = 0; i < n + 1 - 2 * h; i++)
649 mpz_tdiv_q_2exp (tmp[h + i], tmp[h + i], 1);
650
651 list_add (b + 2 * h, b + 2 * h, tmp + h, n + 1 - 2 * h);
652 /* b_{high} should be correct */
653
654 return tot_muls;
655 }
656
657 /* Returns space needed by TToomCookMul */
658
659 unsigned int
TToomCookMul_space(unsigned int n,unsigned int m,unsigned int l)660 TToomCookMul_space (unsigned int n, unsigned int m, unsigned int l)
661
662 {
663 unsigned int nu, mu, h;
664 unsigned int stmp1, stmp2;
665
666 nu = n / 3 + 1;
667 mu = m / 3 + 1;
668
669 stmp1 = stmp2 = 0;
670
671 /* ensures n + 1 > 2 * nu */
672 if ((n < 2 * nu) || (m < 2 * mu))
673 return TKarMul_space (n, m, l);
674
675 /* First strip unnecessary trailing coefficients of c:
676 */
677
678 l = MIN(l, n + m);
679
680 /* Now the degenerate cases. We want 2 * nu < m.
681 *
682 */
683
684 if (m <= 2 * nu)
685 {
686 stmp1 = TToomCookMul_space (nu - 1, m, l);
687 if (l >= 2 * nu)
688 stmp2 = TToomCookMul_space (n - 2 * nu, m, l - 2 * nu);
689 else if (l >= nu)
690 stmp2 = TToomCookMul_space (nu - 1, m, l - nu);
691 return MAX(stmp1, stmp2);
692 }
693
694 /* Second degenerate case. We want 2 * mu < n.
695 */
696
697 if (n <= 2 * mu)
698 {
699 stmp1 += TToomCookMul_space (n, mu - 1, l);
700 if (l >= 2 * mu)
701 stmp2 = TToomCookMul_space (n, m - 2 * mu, l - 2 * mu) + n + 1;
702 else if (l >= mu)
703 stmp2 = TToomCookMul_space (n, mu - 1, l - mu) + n + 1;
704 return MAX(stmp1, stmp2);
705 }
706
707 h = MAX(nu, mu);
708
709 stmp1 = TToomCookMul_space (h - 1, h - 1, 2 * h - 2);
710 stmp2 = stmp1 + 7 * h - 2;
711 stmp1 = stmp1 + 6 * h - 2;
712 stmp1 = MAX(stmp1, stmp2);
713 stmp2 = TToomCookMul_space (n - 2 * h, m - 2 * h, 2 * h - 1) + 7*h-2;
714 return MAX(stmp1, stmp2);
715 }
716
717 /* Given a[0..m] and c[0..l], puts in b[0..n] the coefficients
718 of degree m to n+m of rev(a)*c, i.e.
719 b[0] = a[0]*c[0] + ... + a[i]*c[i] with i = min(m, l)
720 ...
721 b[k] = a[0]*c[k] + ... + a[i]*c[i+k] with i = min(m, l-k)
722 ...
723 b[n] = a[0]*c[n] + ... + a[i]*c[i+n] with i = min(m, l-n) [=l-n].
724 Using auxiliary memory in tmp.
725
726 Assumes n <= l.
727
728 Returns number of multiplications if known, 0 if not known,
729 and -1 for error.
730 */
731 int
TMulGen(listz_t b,unsigned int n,listz_t a,unsigned int m,listz_t c,unsigned int l,listz_t tmp,mpz_t modulus)732 TMulGen (listz_t b, unsigned int n, listz_t a, unsigned int m,
733 listz_t c, unsigned int l, listz_t tmp, mpz_t modulus)
734 {
735 ASSERT (n <= l);
736
737 if (Fermat)
738 {
739 unsigned int i;
740 for (i = l + 1; i > 1 && (i&1) == 0; i >>= 1);
741 ASSERT(i == 1);
742 ASSERT(n + 1 == (l + 1) / 2);
743 ASSERT(m == l - n || m + 1 == l - n);
744 return F_mul_trans (b, a, c, m + 1, l + 1, Fermat, tmp);
745 }
746
747 if ((double) n * (double) mpz_sizeinbase (modulus, 2) >= KS_TMUL_THRESHOLD)
748 {
749 if (TMulKS (b, n, a, m, c, l, modulus, 1)) /* Non-zero means error */
750 return -1;
751 return 0; /* We have no mul count so we return 0 */
752 }
753
754 return TToomCookMul (b, n, a, m, c, l, tmp);
755 }
756
757
758 unsigned int
TMulGen_space(unsigned int n,unsigned int m,unsigned int l)759 TMulGen_space (unsigned int n, unsigned int m, unsigned int l)
760 {
761 if (Fermat)
762 return 2 * (l + 1);
763 else
764 return TToomCookMul_space (n, m, l);
765 }
766