1 /* Median/middle product.
2 
3 Copyright 2003, 2004, 2005, 2006, 2007, 2008 Laurent Fousse, Paul Zimmermann,
4 Alexander Kruppa, Dave Newman.
5 
6 This file is part of the ECM Library.
7 
8 The ECM Library is free software; you can redistribute it and/or modify
9 it under the terms of the GNU Lesser General Public License as published by
10 the Free Software Foundation; either version 3 of the License, or (at your
11 option) any later version.
12 
13 The ECM Library is distributed in the hope that it will be useful, but
14 WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
15 or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public
16 License for more details.
17 
18 You should have received a copy of the GNU Lesser General Public License
19 along with the ECM Library; see the file COPYING.LIB.  If not, see
20 http://www.gnu.org/licenses/ or write to the Free Software Foundation, Inc.,
21 51 Franklin St, Fifth Floor, Boston, MA 02110-1301, USA. */
22 
23 /* Reference:
24 [1] Tellegen's Principle into Practice, by A. Bostan, G. Lecerf and E. Schost,
25 Proc. of ISSAC'03, Philadelphia, 2003.
26 */
27 
28 #include <stdio.h>
29 #include "ecm-impl.h"
30 
31 #ifndef MAX
32 #define MAX(a,b) (((a) > (b)) ? (a) : (b))
33 #endif
34 
35 #ifndef MIN
36 #define MIN(a,b) (((a) < (b)) ? (a) : (b))
37 #endif
38 
39 extern unsigned int Fermat;
40 
41 static void list_add_wrapper (listz_t, listz_t, listz_t, unsigned int,
42                               unsigned int);
43 static void list_sub_wrapper (listz_t, listz_t, listz_t, unsigned int,
44                               unsigned int);
45 static unsigned int TKarMul  (listz_t, unsigned int, listz_t, unsigned int,
46                               listz_t, unsigned int, listz_t);
47 static void list_sub_safe    (listz_t, listz_t, listz_t, unsigned int,
48                               unsigned int, unsigned int);
49 static void list_add_safe    (listz_t, listz_t, listz_t, unsigned int,
50                               unsigned int, unsigned int);
51 static unsigned int TToomCookMul (listz_t, unsigned int, listz_t, unsigned int,
52                                   listz_t, unsigned int, listz_t);
53 static unsigned int TToomCookMul_space (unsigned int, unsigned int,
54                                         unsigned int);
55 
56 static void
list_add_wrapper(listz_t p,listz_t q,listz_t r,unsigned int n,unsigned int max_r)57 list_add_wrapper (listz_t p, listz_t q, listz_t r, unsigned int n,
58                   unsigned int max_r)
59 {
60     list_add (p, q, r, MIN (n, max_r));
61     if (n > max_r)
62       list_set (p + max_r, q + max_r, n - max_r);
63 }
64 
65 static void
list_sub_wrapper(listz_t p,listz_t q,listz_t r,unsigned int n,unsigned int max_r)66 list_sub_wrapper (listz_t p, listz_t q, listz_t r, unsigned int n,
67                   unsigned int max_r)
68 {
69     list_sub (p, q, r, MIN (n, max_r));
70     if (n > max_r)
71         list_set (p + max_r, q + max_r, n - max_r);
72 }
73 
74 /* Given a[0..m] and c[0..l], puts in b[0..n] the coefficients
75    of degree m to n+m of rev(a)*c, i.e.
76    b[0] = a[0]*c[0] + ... + a[i]*c[i] with i = min(m, l)
77    ...
78    b[k] = a[0]*c[k] + ... + a[i]*c[i+k] with i = min(m, l-k)
79    ...
80    b[n] = a[0]*c[n] + ... + a[i]*c[i+n] with i = min(m, l-n) [=l-n].
81    Using auxiliary memory in t.
82    Implements algorithm TKarMul of [1].
83    Assumes deg(c) = l <= m+n.
84 */
85 
86 static unsigned int
TKarMul(listz_t b,unsigned int n,listz_t a,unsigned int m,listz_t c,unsigned int l,listz_t t)87 TKarMul (listz_t b, unsigned int n,
88 	 listz_t a, unsigned int m, listz_t c, unsigned int l, listz_t t)
89 {
90   unsigned int k, mu, nu, h;
91   unsigned int s1;
92   unsigned tot_muls = 0;
93 #ifdef DEBUG
94   fprintf (ECM_STDOUT, "Enter TKarMul.\nm = %d\nn = %d\nl = %d\n", m, n, l);
95   fprintf (ECM_STDOUT, "a = ");
96   print_list (a, m + 1);
97   fprintf (ECM_STDOUT, "\nc = ");
98   print_list (c, l + 1);
99   fprintf (ECM_STDOUT, "\n");
100 #endif
101 
102 
103   if (n == 0)
104     {
105 #ifdef DEBUG
106       fprintf (ECM_STDOUT, "Case n = 0.\n");
107 #endif
108       mpz_mul (b[0], a[0], c[0]);
109       for (k = 1; (k <= m) && (k <= l); k++)
110 	mpz_addmul (b[0], a[k], c[k]);
111 #ifdef DEBUG
112       fprintf (ECM_STDOUT, "Exit TKarMul.\n");
113 #endif
114       return MIN (m, l) + 1;
115     }
116 
117   if (m == 0)
118     {
119 #ifdef DEBUG
120       fprintf (ECM_STDOUT, "Case m = 0.\n");
121 #endif
122       for (k = 0; (k <= l) && (k <= n); k++)
123 	mpz_mul (b[k], a[0], c[k]);
124       for (k = l + 1; k <= n; k++)
125 	mpz_set_ui (b[k], 0);
126 #ifdef DEBUG
127       fprintf (ECM_STDOUT, "Exit TKarMul.\n");
128 #endif
129       return MIN (n, l) + 1;
130     }
131 
132   mu = (m / 2) + 1;		/* 1 <= mu <= m */
133   nu = (n / 2) + 1;		/* 1 <= nu <= n */
134   h = MAX (mu, nu);		/* h >= 1 */
135 
136 #ifdef DEBUG
137   fprintf (ECM_STDOUT, "mu = %d\nnu = %d\nh = %d\n", mu, nu, h);
138 #endif
139 
140   if (mu > n)
141     {
142 #ifdef DEBUG
143       fprintf (ECM_STDOUT, "Case mu > n.\n");
144 #endif
145 
146       tot_muls += TKarMul (b, n, a, mu - 1, c, l, t);
147       if (l >= mu)
148 	{
149 	  /* we have to check l-mu <= n + (m-mu), i.e. l <= n+m */
150 	  tot_muls += TKarMul (t, n, a + mu, m - mu, c + mu, l - mu, t + n + 1);
151 	  list_add (b, b, t, n + 1);
152 	}
153 #ifdef DEBUG
154       fprintf (ECM_STDOUT, "Exit TKarMul.\n");
155 #endif
156       return tot_muls;
157     }
158 
159   if (nu > m)
160     {
161 #ifdef DEBUG
162       fprintf (ECM_STDOUT, "Case nu > m.\n");
163 #endif
164 
165       /* we have to check MIN(l,m+nu-1) <= nu-1+m: trivial */
166       tot_muls += TKarMul (b, nu - 1, a, m, c, MIN (l, m + nu - 1), t);
167 
168       /* Description broken in reference. Should be a list
169        * concatenation, not an addition.
170        * Fixed now.
171        */
172 
173       if (l >= nu)
174 	{
175 	  /* we have to check l-nu <= n-nu+m, i.e. l <= n+m: trivial */
176 	  tot_muls += TKarMul (b + nu, n - nu, a, m, c + nu, l - nu, t);
177 	}
178       else
179         list_zero (b + nu, n - nu + 1);
180 #ifdef DEBUG
181       fprintf (ECM_STDOUT, "Exit TKarMul.\n");
182 #endif
183       return tot_muls;
184     }
185 
186   /* We want nu = mu */
187 
188   mu = nu = h;
189 
190 #ifdef DEBUG
191   fprintf (ECM_STDOUT, "Base Case.\n");
192 #endif
193 
194   s1 = MIN (l + 1, n + mu);
195   if (l + 1 > nu)
196     list_sub_wrapper (t, c, c + nu, s1, l - nu + 1);
197   else
198     list_set (t, c, s1);
199 #ifdef DEBUG
200       fprintf (ECM_STDOUT, "DEBUG c - c[nu].\n");
201       print_list (t, s1);
202       fprintf (ECM_STDOUT, "We compute (1) - (3)\n");
203 #endif
204       tot_muls += TKarMul (b, nu - 1, a, mu - 1, t, s1 - 1, t + s1);
205       /* (1) - (3) */
206 #ifdef DEBUG
207       print_list (b, nu);
208       fprintf (ECM_STDOUT, "We compute (2) - (4)\n");
209 #endif
210       if (s1 >= nu + 1) { /* nu - 1 */
211         tot_muls += TKarMul (b + nu, n - nu, a + mu, m - mu,
212                              t + nu, s1 - nu - 1, t + s1);
213         /* (2) - (4) */
214       }
215       else {
216           list_zero (b + nu, n - nu + 1);
217       }
218 #ifdef DEBUG
219       print_list (b + nu, n - nu + 1);
220 #endif
221       list_add_wrapper (t, a, a + mu, mu, m + 1 - mu);
222 #ifdef DEBUG
223       fprintf (ECM_STDOUT, "We compute (2) + (3)\n");
224 #endif
225       if (l >= nu) {
226           tot_muls += TKarMul (t + mu, nu - 1, t, mu - 1, c + nu, l - nu,
227                                t + mu + nu);
228       }
229       else
230           list_zero (t + mu, nu);
231       /* (2) + (3) */
232 #ifdef DEBUG
233       print_list (t + mu, nu);
234 #endif
235       list_add (b, b, t + mu, nu);
236       list_sub (b + nu, t + mu, b + nu, n - nu + 1);
237       return tot_muls;
238 }
239 
240 /* Computes the space needed for TKarMul of b[0..n],
241  * a[0..m] and c[0..l]
242  */
243 
244 static unsigned int
TKarMul_space(unsigned int n,unsigned int m,unsigned int l)245 TKarMul_space (unsigned int n, unsigned int m, unsigned int l)
246 {
247   unsigned int mu, nu, h;
248   unsigned int s1;
249 
250   unsigned int r1, r2;
251 
252   if (n == 0)
253       return 0;
254 
255   if (m == 0)
256       return 0;
257 
258   mu = (m / 2) + 1;
259   nu = (n / 2) + 1;
260   h = MAX (mu, nu);
261 
262   if (mu > n)
263     {
264       r1 = TKarMul_space (n, mu - 1, l);
265       if (l >= mu)
266 	{
267 	  r2 = TKarMul_space (n, m - mu, l - mu) + n + 1;
268           r1 = MAX (r1, r2);
269 	}
270       return r1;
271     }
272 
273   if (nu > m)
274     {
275       r1 = TKarMul_space (nu - 1, m, MIN (l, m + nu - 1));
276 
277       if (l >= nu)
278 	{
279 	  r2 = TKarMul_space (n - nu, m,l - nu);
280           r1 = MAX (r1, r2);
281 	}
282       return r1;
283     }
284 
285   mu = nu = h;
286 
287   s1 = MIN (l + 1, n + mu);
288   r1 = TKarMul_space (nu - 1, mu - 1, s1 - 1) + s1;
289   if (s1 >= nu + 1) {
290     r2 = TKarMul_space (n - nu, m - mu, s1 - nu - 1) + s1;
291     r1 = MAX (r1, r2);
292   }
293   if (l >= nu) {
294     r2 = TKarMul_space (nu - 1, mu - 1, l - nu) + mu + nu;
295     r1 = MAX (r1, r2);
296   }
297   return r1;
298 }
299 
300 /* list_sub with bound checking
301  */
302 
303 static void
list_sub_safe(listz_t ret,listz_t a,listz_t b,unsigned int sizea,unsigned int sizeb,unsigned int needed)304 list_sub_safe (listz_t ret, listz_t a, listz_t b,
305                unsigned int sizea, unsigned int sizeb,
306                unsigned int needed)
307 {
308     unsigned int i;
309     unsigned int safe;
310     safe = MIN(sizea, sizeb);
311     safe = MIN(safe, needed);
312 
313     list_sub (ret, a, b, safe);
314 
315     i = safe;
316     while (i < needed)
317     {
318         if (i < sizea)
319         {
320             if (i < sizeb)
321                 mpz_sub (ret[i], a[i], b[i]);
322             else
323                 mpz_set (ret[i], a[i]);
324         }
325         else
326         {
327             if (i < sizeb)
328                 mpz_neg (ret[i], b[i]);
329             else
330                 mpz_set_ui (ret[i], 0);
331         }
332         i++;
333     }
334 }
335 
336 /* list_add with bound checking
337  */
338 
339 static void
list_add_safe(listz_t ret,listz_t a,listz_t b,unsigned int sizea,unsigned int sizeb,unsigned int needed)340 list_add_safe (listz_t ret, listz_t a, listz_t b,
341                         unsigned int sizea, unsigned int sizeb,
342                         unsigned int needed)
343 {
344     unsigned int i;
345     unsigned int safe;
346     safe = MIN(sizea, sizeb);
347     safe = MIN(safe, needed);
348 
349     list_add (ret, a, b, i = safe);
350 
351     while (i < needed)
352     {
353         if (i < sizea)
354         {
355             if (i < sizeb)
356                 mpz_add (ret[i], a[i], b[i]);
357             else
358                 mpz_set (ret[i], a[i]);
359         }
360         else
361         {
362             if (i < sizeb)
363                 mpz_set (ret[i], b[i]);
364             else
365                 mpz_set_ui (ret[i], 0);
366         }
367         i++;
368     }
369 }
370 
371 static unsigned int
TToomCookMul(listz_t b,unsigned int n,listz_t a,unsigned int m,listz_t c,unsigned int l,listz_t tmp)372 TToomCookMul (listz_t b, unsigned int n,
373               listz_t a, unsigned int m, listz_t c, unsigned int l,
374               listz_t tmp)
375 {
376     unsigned int nu, mu, h;
377     unsigned int i;
378     unsigned int btmp;
379     unsigned int tot_muls = 0;
380 
381     nu = n / 3 + 1;
382     mu = m / 3 + 1;
383 
384     /* ensures n + 1 > 2 * nu */
385     if ((n < 2 * nu) || (m < 2 * mu))
386     {
387 #ifdef DEBUG
388         fprintf (ECM_STDOUT, "Too small operands, calling TKara.\n");
389 #endif
390         return TKarMul (b, n, a, m, c, l, tmp);
391     }
392 
393     /* First strip unnecessary trailing coefficients of c:
394      */
395 
396     l = MIN(l, n + m);
397 
398     /* Now the degenerate cases. We want 2 * nu <= m.
399      *
400      */
401 
402     if (m < 2 * nu)
403     {
404 #ifdef DEBUG
405         fprintf (ECM_STDOUT, "Degenerate Case 1.\n");
406 #endif
407         tot_muls += TToomCookMul (b, nu - 1, a, m, c, l, tmp);
408         if (l >= nu)
409             tot_muls += TToomCookMul (b + nu, nu - 1, a, m,
410                                       c + nu, l - nu, tmp);
411         else
412             list_zero (b + nu, nu);
413         if (l >= 2 * nu) /* n >= 2 * nu is assured. Hopefully */
414             tot_muls += TToomCookMul (b + 2 * nu, n - 2 * nu, a, m,
415                                       c + 2 * nu, l - 2 * nu, tmp);
416         else
417             list_zero (b + 2 * nu, n - 2 * nu + 1);
418         return tot_muls;
419     }
420 
421     /* Second degenerate case. We want 2 * mu <= n.
422      */
423 
424     if (n < 2 * mu)
425     {
426 #ifdef DEBUG
427         fprintf (ECM_STDOUT, "Degenerate Case 2.\n");
428 #endif
429         tot_muls += TToomCookMul (b, n, a, mu - 1, c, l, tmp);
430         if (l >= mu)
431         {
432             tot_muls += TToomCookMul (tmp, n, a + mu, mu - 1,
433                                       c + mu, l - mu, tmp + n + 1);
434             list_add (b, b, tmp, n + 1);
435         }
436         if (l >= 2 * mu)
437         {
438             tot_muls += TToomCookMul (tmp, n, a + 2 * mu, m - 2 * mu,
439                                       c + 2 * mu, l - 2 * mu, tmp + n + 1);
440             list_add (b, b, tmp, n + 1);
441         }
442         return tot_muls;
443     }
444 
445 #ifdef DEBUG
446     fprintf (ECM_STDOUT, "Base Case.\n");
447     fprintf (ECM_STDOUT, "a = ");
448     print_list (a, m + 1);
449 
450     fprintf (ECM_STDOUT, "\nc = ");
451     print_list (c, l + 1);
452 #endif
453     h = MAX(nu, mu);
454     nu = mu = h;
455 
456     list_sub_safe (tmp, c + 3 * h, c + h,
457                    (l + 1 > 3 * h ? l + 1 - 3 * h : 0),
458                    (l + 1 > h ? l + 1 - h : 0), 2 * h - 1);
459     list_sub_safe (tmp + 2 * h - 1, c, c + 2 * h,
460                    l + 1, (l + 1 > 2 * h ? l + 1 - 2 * h : 0),
461                    2 * h - 1);
462     for (i = 0; i < 2 * h - 1; i++)
463         mpz_mul_2exp (tmp[2 * h - 1 + i], tmp[2 * h - 1 + i], 1);
464 
465 #ifdef DEBUG
466     print_list (tmp, 4 * h - 2);
467 #endif
468 
469     /* --------------------------------
470      * | 0 ..  2*h-2 | 2*h-1 .. 4*h-3 |
471      * --------------------------------
472      * | c3 - c1     |   2(c0 - c2)   |
473      * --------------------------------
474      */
475 
476     list_add (tmp + 2 * h - 1, tmp + 2 * h - 1, tmp, 2 * h - 1);
477 
478     tot_muls += TToomCookMul (b, h - 1, a, h - 1, tmp + 2 * h - 1,
479                               2 * h - 2, tmp + 4 * h - 2);
480 
481     /* b[0 .. h - 1] = 2 * m0 */
482 
483 #ifdef DEBUG
484     fprintf (ECM_STDOUT, "2 * m0 = ");
485     print_list (b, h);
486 #endif
487 
488     list_add (tmp + 2 * h - 1, a, a + h, h);
489 
490     list_add (tmp + 2 * h - 1, tmp + 2 * h - 1, a + 2 * h,
491               MIN(h, m + 1 - 2 * h));
492 
493     /* tmp[2*h-1 .. 3*h-2] = a0 + a1 + a2 */
494 
495 #ifdef DEBUG
496     fprintf (ECM_STDOUT, "\na0 + a1 + a2 = ");
497     print_list (tmp + 2 * h - 1, h);
498 #endif
499 
500     list_sub_safe (tmp + 3 * h - 1, c + 2 * h, c + 3 * h,
501                    (l + 1 > 2 * h ? l + 1 - 2 * h : 0),
502                    (l + 1 > 3 * h ? l + 1 - 3 * h : 0),
503                    2 * h - 1);
504 
505     /* -------------------------------------------------
506      * | 0 ..  2*h-2 | 2*h-1 .. 3*h-2 | 3*h-1 .. 5*h-3 |
507      * -------------------------------------------------
508      * | c3 - c1     |  a0 + a1 + a2  |   c2 - c3      |
509      * -------------------------------------------------
510      */
511 
512     btmp = (l + 1 > h ? l + 1 - h : 0);
513     btmp = MIN(btmp, 2 * h - 1);
514     for (i = 0; i < btmp; i++)
515       {
516         mpz_mul_2exp (tmp[5 * h - 2 + i], c[h + i], 1);
517         mpz_add (tmp[5 * h - 2 + i], tmp[5 * h - 2 + i], tmp[3 * h - 1 + i]);
518       }
519     while (i < 2 * h - 1)
520       {
521         mpz_set (tmp[5 * h - 2 + i], tmp[3 * h - 1 + i]);
522         i++;
523       }
524 
525     tot_muls += TToomCookMul (b + h, h - 1, tmp + 2 * h - 1, h - 1,
526                               tmp + 5 * h - 2, 2 * h - 2,
527                               tmp + 7 * h - 3);
528 
529     /* b[h .. 2 * h - 1] = 2 * m1 */
530 #ifdef DEBUG
531     fprintf (ECM_STDOUT, "\n2 * m1 = ");
532     print_list (b + h, h);
533 #endif
534 
535     /* ------------------------------------------------------------------
536      * | 0 ..  2*h-2 | 2*h-1 .. 3*h-2 | 3*h-1 .. 5*h-3 | 5*h-2 .. 7*h-4 |
537      * ------------------------------------------------------------------
538      * | c3 - c1     |  a0 + a1 + a2  |   c2 - c3      | c2 - c3 + 2c1  |
539      * ------------------------------------------------------------------
540      */
541 
542 
543     for (i = 0; i < h; i++)
544     {
545         mpz_add (tmp[2 * h  - 1 + i], tmp[2 * h  - 1 + i], a[i + h]);
546         if (2 * h + i <= m)
547           mpz_addmul_ui (tmp[2 * h  - 1 + i], a[2 * h + i], 3);
548     }
549     tot_muls += TToomCookMul (tmp + 5 * h - 2, h - 1,
550                               tmp + 2 * h - 1, h - 1,
551                               tmp, 2 * h - 2, tmp + 6 * h - 2);
552 
553     /* tmp[5*h-2 .. 6*h - 3] = 6 * m2  */
554 
555 #ifdef DEBUG
556     fprintf (ECM_STDOUT, "\n6 * m2 = ");
557     print_list (tmp + 5 * h - 2, h);
558 #endif
559     for (i = 0; i < h; i++)
560     {
561         mpz_sub (tmp[2 * h - 1 + i], a[i], a[h + i]);
562         if (i + 2 * h <= m)
563             mpz_add (tmp[2 * h - 1 + i], tmp[2 * h - 1 + i], a[2 * h + i]);
564     }
565 
566     for (i = 0; i < 2 * h - 1; i++)
567     {
568         mpz_mul_ui (tmp[3 * h - 1 + i], tmp[3 * h - 1 + i], 3);
569         mpz_mul_2exp (tmp[i], tmp[i], 1);
570     }
571 
572     list_add (tmp + 3 * h - 1, tmp + 3 * h - 1, tmp, 2 * h - 1);
573 
574     tot_muls += TToomCookMul (tmp + 6 * h - 2, h - 1,
575                               tmp + 2 * h - 1, h - 1,
576                               tmp + 3 * h - 1, 2 * h - 2,
577                               tmp + 7 * h - 2);
578 
579     /* tmp[6h-2 .. 7h - 3] = 6 * mm1 */
580 
581 #ifdef DEBUG
582     fprintf (ECM_STDOUT, "\n6 * mm1 = ");
583     print_list (tmp + 6 * h - 2, h);
584 #endif
585     list_add_safe (tmp, tmp, c + 2 * h,
586                    2 * h,
587                    (l + 1 > 2 * h ? l + 1 - 2 * h : 0),
588                    2 * h - 1);
589 
590     list_sub_safe (tmp, c + 4 * h, tmp,
591                    (l + 1 > 4 * h ? l + 1 - 4 * h : 0),
592                    2 * h - 1, 2 * h - 1);
593 
594     tot_muls += TToomCookMul (b + 2 * h, n - 2 * h, a + 2 * h, m - 2 * h,
595                   tmp, 2 * h - 1, tmp + 7 * h - 2);
596 
597     /* b[2 * h .. n] = minf */
598 
599 #ifdef DEBUG
600     fprintf (ECM_STDOUT, "\nminf = ");
601     print_list (b + 2 * h, n + 1 - 2 * h);
602 #endif
603 
604     /* Layout of b :
605      * ---------------------------------------
606      * | 0 ... h-1 | h ... 2*h-1 | 2*h ... n |
607      * ---------------------------------------
608      * |  2 * m0   |   2 * m1    |    minf   |
609      * ---------------------------------------
610      *
611      * Layout of tmp :
612      * ---------------------------------------------------
613      * | 0 ... 5*h-1 | 5*h-2 ... 6*h-3 | 6*h-2 ... 7*h-3 |
614      * ---------------------------------------------------
615      * |  ??????     |    6 * m2       |   6 * mm1       |
616      * ---------------------------------------------------
617      */
618 
619     list_add (tmp, tmp + 5 * h - 2, tmp + 6 * h - 2, h);
620     for (i = 0; i < h; i++)
621         mpz_divby3_1op (tmp[i]);
622 
623     /* t1 = 2 (m2 + mm1)
624      * tmp[0 .. h - 1] = t1
625      */
626 
627     list_add (b, b, b + h, h);
628     list_add (b, b, tmp, h);
629     for (i = 0; i < h; i++)
630       mpz_tdiv_q_2exp (b[i], b[i], 1);
631 
632     /* b_{low} should be correct */
633 
634     list_add (tmp + h, b + h, tmp, h);
635 
636     /* t2 = t1 + 2 m1
637      * tmp[h .. 2h - 1] = t2
638      */
639 
640     list_add (b + h, tmp, tmp + h, h);
641     list_sub (b + h, b + h, tmp + 6 * h - 2, h);
642     for (i = 0; i < h; i++)
643       mpz_tdiv_q_2exp (b[h + i], b[h + i], 1);
644 
645     /* b_{mid} should be correct */
646 
647     list_add (tmp + h, tmp + h, tmp + 5 * h - 2, n + 1 - 2 * h);
648     for (i = 0; i < n + 1 - 2 * h; i++)
649       mpz_tdiv_q_2exp (tmp[h + i], tmp[h + i], 1);
650 
651     list_add (b + 2 * h, b + 2 * h, tmp + h, n + 1 - 2 * h);
652     /* b_{high} should be correct */
653 
654     return tot_muls;
655 }
656 
657 /* Returns space needed by TToomCookMul */
658 
659 unsigned int
TToomCookMul_space(unsigned int n,unsigned int m,unsigned int l)660 TToomCookMul_space (unsigned int n, unsigned int m, unsigned int l)
661 
662 {
663     unsigned int nu, mu, h;
664     unsigned int stmp1, stmp2;
665 
666     nu = n / 3 + 1;
667     mu = m / 3 + 1;
668 
669     stmp1 = stmp2 = 0;
670 
671     /* ensures n + 1 > 2 * nu */
672     if ((n < 2 * nu) || (m < 2 * mu))
673       return TKarMul_space (n, m, l);
674 
675     /* First strip unnecessary trailing coefficients of c:
676      */
677 
678     l = MIN(l, n + m);
679 
680     /* Now the degenerate cases. We want 2 * nu < m.
681      *
682      */
683 
684     if (m <= 2 * nu)
685     {
686         stmp1 = TToomCookMul_space (nu - 1, m, l);
687         if (l >= 2 * nu)
688 	  stmp2 = TToomCookMul_space (n - 2 * nu, m, l - 2 * nu);
689         else if (l >= nu)
690 	  stmp2 = TToomCookMul_space (nu - 1, m, l - nu);
691         return MAX(stmp1, stmp2);
692     }
693 
694     /* Second degenerate case. We want 2 * mu < n.
695      */
696 
697     if (n <= 2 * mu)
698     {
699         stmp1 += TToomCookMul_space (n, mu - 1, l);
700         if (l >= 2 * mu)
701 	  stmp2 = TToomCookMul_space (n, m - 2 * mu, l - 2 * mu) + n + 1;
702         else if (l >= mu)
703 	  stmp2 = TToomCookMul_space (n, mu - 1, l - mu) + n + 1;
704         return MAX(stmp1, stmp2);
705     }
706 
707     h = MAX(nu, mu);
708 
709     stmp1 = TToomCookMul_space (h - 1, h - 1, 2 * h - 2);
710     stmp2 = stmp1 + 7 * h - 2;
711     stmp1 = stmp1 + 6 * h - 2;
712     stmp1 = MAX(stmp1, stmp2);
713     stmp2 = TToomCookMul_space (n - 2 * h, m - 2 * h, 2 * h - 1) + 7*h-2;
714     return MAX(stmp1, stmp2);
715 }
716 
717 /* Given a[0..m] and c[0..l], puts in b[0..n] the coefficients
718    of degree m to n+m of rev(a)*c, i.e.
719    b[0] = a[0]*c[0] + ... + a[i]*c[i] with i = min(m, l)
720    ...
721    b[k] = a[0]*c[k] + ... + a[i]*c[i+k] with i = min(m, l-k)
722    ...
723    b[n] = a[0]*c[n] + ... + a[i]*c[i+n] with i = min(m, l-n) [=l-n].
724    Using auxiliary memory in tmp.
725 
726    Assumes n <= l.
727 
728    Returns number of multiplications if known, 0 if not known,
729    and -1 for error.
730 */
731 int
TMulGen(listz_t b,unsigned int n,listz_t a,unsigned int m,listz_t c,unsigned int l,listz_t tmp,mpz_t modulus)732 TMulGen (listz_t b, unsigned int n, listz_t a, unsigned int m,
733          listz_t c, unsigned int l, listz_t tmp, mpz_t modulus)
734 {
735   ASSERT (n <= l);
736 
737   if (Fermat)
738     {
739       unsigned int i;
740       for (i = l + 1; i > 1 && (i&1) == 0; i >>= 1);
741       ASSERT(i == 1);
742       ASSERT(n + 1 == (l + 1) / 2);
743       ASSERT(m == l - n || m + 1 == l - n);
744       return F_mul_trans (b, a, c, m + 1, l + 1, Fermat, tmp);
745     }
746 
747   if ((double) n * (double) mpz_sizeinbase (modulus, 2) >= KS_TMUL_THRESHOLD)
748     {
749       if (TMulKS (b, n, a, m, c, l, modulus, 1)) /* Non-zero means error */
750 	return -1;
751       return 0; /* We have no mul count so we return 0 */
752     }
753 
754   return TToomCookMul (b, n, a, m, c, l, tmp);
755 }
756 
757 
758 unsigned int
TMulGen_space(unsigned int n,unsigned int m,unsigned int l)759 TMulGen_space (unsigned int n, unsigned int m, unsigned int l)
760 {
761     if (Fermat)
762       return 2 * (l + 1);
763     else
764       return TToomCookMul_space (n, m, l);
765 }
766