gmp-ecm/ecm-7.0.4/median.c

/* Median/middle product.

Copyright 2003, 2004, 2005, 2006, 2007, 2008 Laurent Fousse, Paul Zimmermann,
Alexander Kruppa, Dave Newman.

This file is part of the ECM Library.

The ECM Library is free software; you can redistribute it and/or modify
it under the terms of the GNU Lesser General Public License as published by
the Free Software Foundation; either version 3 of the License, or (at your
option) any later version.

The ECM Library is distributed in the hope that it will be useful, but
WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public
License for more details.

You should have received a copy of the GNU Lesser General Public License
along with the ECM Library; see the file COPYING.LIB.  If not, see
http://www.gnu.org/licenses/ or write to the Free Software Foundation, Inc.,
51 Franklin St, Fifth Floor, Boston, MA 02110-1301, USA. */

/* Reference:
[1] Tellegen's Principle into Practice, by A. Bostan, G. Lecerf and E. Schost,
Proc. of ISSAC'03, Philadelphia, 2003.
*/

#include <stdio.h>
#include "ecm-impl.h"

#ifndef MAX
#define MAX(a,b) (((a) > (b)) ? (a) : (b))
#endif

#ifndef MIN
#define MIN(a,b) (((a) < (b)) ? (a) : (b))
#endif

extern unsigned int Fermat;

static void list_add_wrapper (listz_t, listz_t, listz_t, unsigned int,
                              unsigned int);
static void list_sub_wrapper (listz_t, listz_t, listz_t, unsigned int,
                              unsigned int);
static unsigned int TKarMul  (listz_t, unsigned int, listz_t, unsigned int,
                              listz_t, unsigned int, listz_t);
static void list_sub_safe    (listz_t, listz_t, listz_t, unsigned int,
                              unsigned int, unsigned int);
static void list_add_safe    (listz_t, listz_t, listz_t, unsigned int,
                              unsigned int, unsigned int);
static unsigned int TToomCookMul (listz_t, unsigned int, listz_t, unsigned int,
                                  listz_t, unsigned int, listz_t);
static unsigned int TToomCookMul_space (unsigned int, unsigned int,
                                        unsigned int);

static void
list_add_wrapper (listz_t p, listz_t q, listz_t r, unsigned int n,
                  unsigned int max_r)
{
    list_add (p, q, r, MIN (n, max_r));
    if (n > max_r)
      list_set (p + max_r, q + max_r, n - max_r);
}

static void
list_sub_wrapper (listz_t p, listz_t q, listz_t r, unsigned int n,
                  unsigned int max_r)
{
    list_sub (p, q, r, MIN (n, max_r));
    if (n > max_r)
        list_set (p + max_r, q + max_r, n - max_r);
}

/* Given a[0..m] and c[0..l], puts in b[0..n] the coefficients
   of degree m to n+m of rev(a)*c, i.e.
   b[0] = a[0]*c[0] + ... + a[i]*c[i] with i = min(m, l)
   ...
   b[k] = a[0]*c[k] + ... + a[i]*c[i+k] with i = min(m, l-k)
   ...
   b[n] = a[0]*c[n] + ... + a[i]*c[i+n] with i = min(m, l-n) [=l-n].
   Using auxiliary memory in t.
   Implements algorithm TKarMul of [1].
   Assumes deg(c) = l <= m+n.
*/

static unsigned int
TKarMul (listz_t b, unsigned int n,
	 listz_t a, unsigned int m, listz_t c, unsigned int l, listz_t t)
{
  unsigned int k, mu, nu, h;
  unsigned int s1;
  unsigned tot_muls = 0;
#ifdef DEBUG
  fprintf (ECM_STDOUT, "Enter TKarMul.\nm = %d\nn = %d\nl = %d\n", m, n, l);
  fprintf (ECM_STDOUT, "a = ");
  print_list (a, m + 1);
  fprintf (ECM_STDOUT, "\nc = ");
  print_list (c, l + 1);
  fprintf (ECM_STDOUT, "\n");
#endif


  if (n == 0)
    {
#ifdef DEBUG
      fprintf (ECM_STDOUT, "Case n = 0.\n");
#endif
      mpz_mul (b[0], a[0], c[0]);
      for (k = 1; (k <= m) && (k <= l); k++)
	mpz_addmul (b[0], a[k], c[k]);
#ifdef DEBUG
      fprintf (ECM_STDOUT, "Exit TKarMul.\n");
#endif
      return MIN (m, l) + 1;
    }

  if (m == 0)
    {
#ifdef DEBUG
      fprintf (ECM_STDOUT, "Case m = 0.\n");
#endif
      for (k = 0; (k <= l) && (k <= n); k++)
	mpz_mul (b[k], a[0], c[k]);
      for (k = l + 1; k <= n; k++)
	mpz_set_ui (b[k], 0);
#ifdef DEBUG
      fprintf (ECM_STDOUT, "Exit TKarMul.\n");
#endif
      return MIN (n, l) + 1;
    }

  mu = (m / 2) + 1;		/* 1 <= mu <= m */
  nu = (n / 2) + 1;		/* 1 <= nu <= n */
  h = MAX (mu, nu);		/* h >= 1 */

#ifdef DEBUG
  fprintf (ECM_STDOUT, "mu = %d\nnu = %d\nh = %d\n", mu, nu, h);
#endif

  if (mu > n)
    {
#ifdef DEBUG
      fprintf (ECM_STDOUT, "Case mu > n.\n");
#endif

      tot_muls += TKarMul (b, n, a, mu - 1, c, l, t);
      if (l >= mu)
	{
	  /* we have to check l-mu <= n + (m-mu), i.e. l <= n+m */
	  tot_muls += TKarMul (t, n, a + mu, m - mu, c + mu, l - mu, t + n + 1);
	  list_add (b, b, t, n + 1);
	}
#ifdef DEBUG
      fprintf (ECM_STDOUT, "Exit TKarMul.\n");
#endif
      return tot_muls;
    }

  if (nu > m)
    {
#ifdef DEBUG
      fprintf (ECM_STDOUT, "Case nu > m.\n");
#endif

      /* we have to check MIN(l,m+nu-1) <= nu-1+m: trivial */
      tot_muls += TKarMul (b, nu - 1, a, m, c, MIN (l, m + nu - 1), t);

      /* Description broken in reference. Should be a list
       * concatenation, not an addition.
       * Fixed now.
       */

      if (l >= nu)
	{
	  /* we have to check l-nu <= n-nu+m, i.e. l <= n+m: trivial */
	  tot_muls += TKarMul (b + nu, n - nu, a, m, c + nu, l - nu, t);
	}
      else
        list_zero (b + nu, n - nu + 1);
#ifdef DEBUG
      fprintf (ECM_STDOUT, "Exit TKarMul.\n");
#endif
      return tot_muls;
    }

  /* We want nu = mu */

  mu = nu = h;

#ifdef DEBUG
  fprintf (ECM_STDOUT, "Base Case.\n");
#endif

  s1 = MIN (l + 1, n + mu);
  if (l + 1 > nu)
    list_sub_wrapper (t, c, c + nu, s1, l - nu + 1);
  else
    list_set (t, c, s1);
#ifdef DEBUG
      fprintf (ECM_STDOUT, "DEBUG c - c[nu].\n");
      print_list (t, s1);
      fprintf (ECM_STDOUT, "We compute (1) - (3)\n");
#endif
      tot_muls += TKarMul (b, nu - 1, a, mu - 1, t, s1 - 1, t + s1);
      /* (1) - (3) */
#ifdef DEBUG
      print_list (b, nu);
      fprintf (ECM_STDOUT, "We compute (2) - (4)\n");
#endif
      if (s1 >= nu + 1) { /* nu - 1 */
        tot_muls += TKarMul (b + nu, n - nu, a + mu, m - mu,
                             t + nu, s1 - nu - 1, t + s1);
        /* (2) - (4) */
      }
      else {
          list_zero (b + nu, n - nu + 1);
      }
#ifdef DEBUG
      print_list (b + nu, n - nu + 1);
#endif
      list_add_wrapper (t, a, a + mu, mu, m + 1 - mu);
#ifdef DEBUG
      fprintf (ECM_STDOUT, "We compute (2) + (3)\n");
#endif
      if (l >= nu) {
          tot_muls += TKarMul (t + mu, nu - 1, t, mu - 1, c + nu, l - nu,
                               t + mu + nu);
      }
      else
          list_zero (t + mu, nu);
      /* (2) + (3) */
#ifdef DEBUG
      print_list (t + mu, nu);
#endif
      list_add (b, b, t + mu, nu);
      list_sub (b + nu, t + mu, b + nu, n - nu + 1);
      return tot_muls;
}

/* Computes the space needed for TKarMul of b[0..n],
 * a[0..m] and c[0..l]
 */

static unsigned int
TKarMul_space (unsigned int n, unsigned int m, unsigned int l)
{
  unsigned int mu, nu, h;
  unsigned int s1;

  unsigned int r1, r2;

  if (n == 0)
      return 0;

  if (m == 0)
      return 0;

  mu = (m / 2) + 1;
  nu = (n / 2) + 1;
  h = MAX (mu, nu);

  if (mu > n)
    {
      r1 = TKarMul_space (n, mu - 1, l);
      if (l >= mu)
	{
	  r2 = TKarMul_space (n, m - mu, l - mu) + n + 1;
          r1 = MAX (r1, r2);
	}
      return r1;
    }

  if (nu > m)
    {
      r1 = TKarMul_space (nu - 1, m, MIN (l, m + nu - 1));

      if (l >= nu)
	{
	  r2 = TKarMul_space (n - nu, m,l - nu);
          r1 = MAX (r1, r2);
	}
      return r1;
    }

  mu = nu = h;

  s1 = MIN (l + 1, n + mu);
  r1 = TKarMul_space (nu - 1, mu - 1, s1 - 1) + s1;
  if (s1 >= nu + 1) {
    r2 = TKarMul_space (n - nu, m - mu, s1 - nu - 1) + s1;
    r1 = MAX (r1, r2);
  }
  if (l >= nu) {
    r2 = TKarMul_space (nu - 1, mu - 1, l - nu) + mu + nu;
    r1 = MAX (r1, r2);
  }
  return r1;
}

/* list_sub with bound checking
 */

static void
list_sub_safe (listz_t ret, listz_t a, listz_t b,
               unsigned int sizea, unsigned int sizeb,
               unsigned int needed)
{
    unsigned int i;
    unsigned int safe;
    safe = MIN(sizea, sizeb);
    safe = MIN(safe, needed);

    list_sub (ret, a, b, safe);

    i = safe;
    while (i < needed)
    {
        if (i < sizea)
        {
            if (i < sizeb)
                mpz_sub (ret[i], a[i], b[i]);
            else
                mpz_set (ret[i], a[i]);
        }
        else
        {
            if (i < sizeb)
                mpz_neg (ret[i], b[i]);
            else
                mpz_set_ui (ret[i], 0);
        }
        i++;
    }
}

/* list_add with bound checking
 */

static void
list_add_safe (listz_t ret, listz_t a, listz_t b,
                        unsigned int sizea, unsigned int sizeb,
                        unsigned int needed)
{
    unsigned int i;
    unsigned int safe;
    safe = MIN(sizea, sizeb);
    safe = MIN(safe, needed);

    list_add (ret, a, b, i = safe);

    while (i < needed)
    {
        if (i < sizea)
        {
            if (i < sizeb)
                mpz_add (ret[i], a[i], b[i]);
            else
                mpz_set (ret[i], a[i]);
        }
        else
        {
            if (i < sizeb)
                mpz_set (ret[i], b[i]);
            else
                mpz_set_ui (ret[i], 0);
        }
        i++;
    }
}

static unsigned int
TToomCookMul (listz_t b, unsigned int n,
              listz_t a, unsigned int m, listz_t c, unsigned int l,
              listz_t tmp)
{
    unsigned int nu, mu, h;
    unsigned int i;
    unsigned int btmp;
    unsigned int tot_muls = 0;

    nu = n / 3 + 1;
    mu = m / 3 + 1;

    /* ensures n + 1 > 2 * nu */
    if ((n < 2 * nu) || (m < 2 * mu))
    {
#ifdef DEBUG
        fprintf (ECM_STDOUT, "Too small operands, calling TKara.\n");
#endif
        return TKarMul (b, n, a, m, c, l, tmp);
    }

    /* First strip unnecessary trailing coefficients of c:
     */

    l = MIN(l, n + m);

    /* Now the degenerate cases. We want 2 * nu <= m.
     *
     */

    if (m < 2 * nu)
    {
#ifdef DEBUG
        fprintf (ECM_STDOUT, "Degenerate Case 1.\n");
#endif
        tot_muls += TToomCookMul (b, nu - 1, a, m, c, l, tmp);
        if (l >= nu)
            tot_muls += TToomCookMul (b + nu, nu - 1, a, m,
                                      c + nu, l - nu, tmp);
        else
            list_zero (b + nu, nu);
        if (l >= 2 * nu) /* n >= 2 * nu is assured. Hopefully */
            tot_muls += TToomCookMul (b + 2 * nu, n - 2 * nu, a, m,
                                      c + 2 * nu, l - 2 * nu, tmp);
        else
            list_zero (b + 2 * nu, n - 2 * nu + 1);
        return tot_muls;
    }

    /* Second degenerate case. We want 2 * mu <= n.
     */

    if (n < 2 * mu)
    {
#ifdef DEBUG
        fprintf (ECM_STDOUT, "Degenerate Case 2.\n");
#endif
        tot_muls += TToomCookMul (b, n, a, mu - 1, c, l, tmp);
        if (l >= mu)
        {
            tot_muls += TToomCookMul (tmp, n, a + mu, mu - 1,
                                      c + mu, l - mu, tmp + n + 1);
            list_add (b, b, tmp, n + 1);
        }
        if (l >= 2 * mu)
        {
            tot_muls += TToomCookMul (tmp, n, a + 2 * mu, m - 2 * mu,
                                      c + 2 * mu, l - 2 * mu, tmp + n + 1);
            list_add (b, b, tmp, n + 1);
        }
        return tot_muls;
    }

#ifdef DEBUG
    fprintf (ECM_STDOUT, "Base Case.\n");
    fprintf (ECM_STDOUT, "a = ");
    print_list (a, m + 1);

    fprintf (ECM_STDOUT, "\nc = ");
    print_list (c, l + 1);
#endif
    h = MAX(nu, mu);
    nu = mu = h;

    list_sub_safe (tmp, c + 3 * h, c + h,
                   (l + 1 > 3 * h ? l + 1 - 3 * h : 0),
                   (l + 1 > h ? l + 1 - h : 0), 2 * h - 1);
    list_sub_safe (tmp + 2 * h - 1, c, c + 2 * h,
                   l + 1, (l + 1 > 2 * h ? l + 1 - 2 * h : 0),
                   2 * h - 1);
    for (i = 0; i < 2 * h - 1; i++)
        mpz_mul_2exp (tmp[2 * h - 1 + i], tmp[2 * h - 1 + i], 1);

#ifdef DEBUG
    print_list (tmp, 4 * h - 2);
#endif

    /* --------------------------------
     * | 0 ..  2*h-2 | 2*h-1 .. 4*h-3 |
     * --------------------------------
     * | c3 - c1     |   2(c0 - c2)   |
     * --------------------------------
     */

    list_add (tmp + 2 * h - 1, tmp + 2 * h - 1, tmp, 2 * h - 1);

    tot_muls += TToomCookMul (b, h - 1, a, h - 1, tmp + 2 * h - 1,
                              2 * h - 2, tmp + 4 * h - 2);

    /* b[0 .. h - 1] = 2 * m0 */

#ifdef DEBUG
    fprintf (ECM_STDOUT, "2 * m0 = ");
    print_list (b, h);
#endif

    list_add (tmp + 2 * h - 1, a, a + h, h);

    list_add (tmp + 2 * h - 1, tmp + 2 * h - 1, a + 2 * h,
              MIN(h, m + 1 - 2 * h));

    /* tmp[2*h-1 .. 3*h-2] = a0 + a1 + a2 */

#ifdef DEBUG
    fprintf (ECM_STDOUT, "\na0 + a1 + a2 = ");
    print_list (tmp + 2 * h - 1, h);
#endif

    list_sub_safe (tmp + 3 * h - 1, c + 2 * h, c + 3 * h,
                   (l + 1 > 2 * h ? l + 1 - 2 * h : 0),
                   (l + 1 > 3 * h ? l + 1 - 3 * h : 0),
                   2 * h - 1);

    /* -------------------------------------------------
     * | 0 ..  2*h-2 | 2*h-1 .. 3*h-2 | 3*h-1 .. 5*h-3 |
     * -------------------------------------------------
     * | c3 - c1     |  a0 + a1 + a2  |   c2 - c3      |
     * -------------------------------------------------
     */

    btmp = (l + 1 > h ? l + 1 - h : 0);
    btmp = MIN(btmp, 2 * h - 1);
    for (i = 0; i < btmp; i++)
      {
        mpz_mul_2exp (tmp[5 * h - 2 + i], c[h + i], 1);
        mpz_add (tmp[5 * h - 2 + i], tmp[5 * h - 2 + i], tmp[3 * h - 1 + i]);
      }
    while (i < 2 * h - 1)
      {
        mpz_set (tmp[5 * h - 2 + i], tmp[3 * h - 1 + i]);
        i++;
      }

    tot_muls += TToomCookMul (b + h, h - 1, tmp + 2 * h - 1, h - 1,
                              tmp + 5 * h - 2, 2 * h - 2,
                              tmp + 7 * h - 3);

    /* b[h .. 2 * h - 1] = 2 * m1 */
#ifdef DEBUG
    fprintf (ECM_STDOUT, "\n2 * m1 = ");
    print_list (b + h, h);
#endif

    /* ------------------------------------------------------------------
     * | 0 ..  2*h-2 | 2*h-1 .. 3*h-2 | 3*h-1 .. 5*h-3 | 5*h-2 .. 7*h-4 |
     * ------------------------------------------------------------------
     * | c3 - c1     |  a0 + a1 + a2  |   c2 - c3      | c2 - c3 + 2c1  |
     * ------------------------------------------------------------------
     */


    for (i = 0; i < h; i++)
    {
        mpz_add (tmp[2 * h  - 1 + i], tmp[2 * h  - 1 + i], a[i + h]);
        if (2 * h + i <= m)
          mpz_addmul_ui (tmp[2 * h  - 1 + i], a[2 * h + i], 3);
    }
    tot_muls += TToomCookMul (tmp + 5 * h - 2, h - 1,
                              tmp + 2 * h - 1, h - 1,
                              tmp, 2 * h - 2, tmp + 6 * h - 2);

    /* tmp[5*h-2 .. 6*h - 3] = 6 * m2  */

#ifdef DEBUG
    fprintf (ECM_STDOUT, "\n6 * m2 = ");
    print_list (tmp + 5 * h - 2, h);
#endif
    for (i = 0; i < h; i++)
    {
        mpz_sub (tmp[2 * h - 1 + i], a[i], a[h + i]);
        if (i + 2 * h <= m)
            mpz_add (tmp[2 * h - 1 + i], tmp[2 * h - 1 + i], a[2 * h + i]);
    }

    for (i = 0; i < 2 * h - 1; i++)
    {
        mpz_mul_ui (tmp[3 * h - 1 + i], tmp[3 * h - 1 + i], 3);
        mpz_mul_2exp (tmp[i], tmp[i], 1);
    }

    list_add (tmp + 3 * h - 1, tmp + 3 * h - 1, tmp, 2 * h - 1);

    tot_muls += TToomCookMul (tmp + 6 * h - 2, h - 1,
                              tmp + 2 * h - 1, h - 1,
                              tmp + 3 * h - 1, 2 * h - 2,
                              tmp + 7 * h - 2);

    /* tmp[6h-2 .. 7h - 3] = 6 * mm1 */

#ifdef DEBUG
    fprintf (ECM_STDOUT, "\n6 * mm1 = ");
    print_list (tmp + 6 * h - 2, h);
#endif
    list_add_safe (tmp, tmp, c + 2 * h,
                   2 * h,
                   (l + 1 > 2 * h ? l + 1 - 2 * h : 0),
                   2 * h - 1);

    list_sub_safe (tmp, c + 4 * h, tmp,
                   (l + 1 > 4 * h ? l + 1 - 4 * h : 0),
                   2 * h - 1, 2 * h - 1);

    tot_muls += TToomCookMul (b + 2 * h, n - 2 * h, a + 2 * h, m - 2 * h,
                  tmp, 2 * h - 1, tmp + 7 * h - 2);

    /* b[2 * h .. n] = minf */

#ifdef DEBUG
    fprintf (ECM_STDOUT, "\nminf = ");
    print_list (b + 2 * h, n + 1 - 2 * h);
#endif

    /* Layout of b :
     * ---------------------------------------
     * | 0 ... h-1 | h ... 2*h-1 | 2*h ... n |
     * ---------------------------------------
     * |  2 * m0   |   2 * m1    |    minf   |
     * ---------------------------------------
     *
     * Layout of tmp :
     * ---------------------------------------------------
     * | 0 ... 5*h-1 | 5*h-2 ... 6*h-3 | 6*h-2 ... 7*h-3 |
     * ---------------------------------------------------
     * |  ??????     |    6 * m2       |   6 * mm1       |
     * ---------------------------------------------------
     */

    list_add (tmp, tmp + 5 * h - 2, tmp + 6 * h - 2, h);
    for (i = 0; i < h; i++)
        mpz_divby3_1op (tmp[i]);

    /* t1 = 2 (m2 + mm1)
     * tmp[0 .. h - 1] = t1
     */

    list_add (b, b, b + h, h);
    list_add (b, b, tmp, h);
    for (i = 0; i < h; i++)
      mpz_tdiv_q_2exp (b[i], b[i], 1);

    /* b_{low} should be correct */

    list_add (tmp + h, b + h, tmp, h);

    /* t2 = t1 + 2 m1
     * tmp[h .. 2h - 1] = t2
     */

    list_add (b + h, tmp, tmp + h, h);
    list_sub (b + h, b + h, tmp + 6 * h - 2, h);
    for (i = 0; i < h; i++)
      mpz_tdiv_q_2exp (b[h + i], b[h + i], 1);

    /* b_{mid} should be correct */

    list_add (tmp + h, tmp + h, tmp + 5 * h - 2, n + 1 - 2 * h);
    for (i = 0; i < n + 1 - 2 * h; i++)
      mpz_tdiv_q_2exp (tmp[h + i], tmp[h + i], 1);

    list_add (b + 2 * h, b + 2 * h, tmp + h, n + 1 - 2 * h);
    /* b_{high} should be correct */

    return tot_muls;
}

/* Returns space needed by TToomCookMul */

unsigned int
TToomCookMul_space (unsigned int n, unsigned int m, unsigned int l)

{
    unsigned int nu, mu, h;
    unsigned int stmp1, stmp2;

    nu = n / 3 + 1;
    mu = m / 3 + 1;

    stmp1 = stmp2 = 0;

    /* ensures n + 1 > 2 * nu */
    if ((n < 2 * nu) || (m < 2 * mu))
      return TKarMul_space (n, m, l);

    /* First strip unnecessary trailing coefficients of c:
     */

    l = MIN(l, n + m);

    /* Now the degenerate cases. We want 2 * nu < m.
     *
     */

    if (m <= 2 * nu)
    {
        stmp1 = TToomCookMul_space (nu - 1, m, l);
        if (l >= 2 * nu)
	  stmp2 = TToomCookMul_space (n - 2 * nu, m, l - 2 * nu);
        else if (l >= nu)
	  stmp2 = TToomCookMul_space (nu - 1, m, l - nu);
        return MAX(stmp1, stmp2);
    }

    /* Second degenerate case. We want 2 * mu < n.
     */

    if (n <= 2 * mu)
    {
        stmp1 += TToomCookMul_space (n, mu - 1, l);
        if (l >= 2 * mu)
	  stmp2 = TToomCookMul_space (n, m - 2 * mu, l - 2 * mu) + n + 1;
        else if (l >= mu)
	  stmp2 = TToomCookMul_space (n, mu - 1, l - mu) + n + 1;
        return MAX(stmp1, stmp2);
    }

    h = MAX(nu, mu);

    stmp1 = TToomCookMul_space (h - 1, h - 1, 2 * h - 2);
    stmp2 = stmp1 + 7 * h - 2;
    stmp1 = stmp1 + 6 * h - 2;
    stmp1 = MAX(stmp1, stmp2);
    stmp2 = TToomCookMul_space (n - 2 * h, m - 2 * h, 2 * h - 1) + 7*h-2;
    return MAX(stmp1, stmp2);
}

/* Given a[0..m] and c[0..l], puts in b[0..n] the coefficients
   of degree m to n+m of rev(a)*c, i.e.
   b[0] = a[0]*c[0] + ... + a[i]*c[i] with i = min(m, l)
   ...
   b[k] = a[0]*c[k] + ... + a[i]*c[i+k] with i = min(m, l-k)
   ...
   b[n] = a[0]*c[n] + ... + a[i]*c[i+n] with i = min(m, l-n) [=l-n].
   Using auxiliary memory in tmp.

   Assumes n <= l.

   Returns number of multiplications if known, 0 if not known,
   and -1 for error.
*/
int
TMulGen (listz_t b, unsigned int n, listz_t a, unsigned int m,
         listz_t c, unsigned int l, listz_t tmp, mpz_t modulus)
{
  ASSERT (n <= l);

  if (Fermat)
    {
      unsigned int i;
      for (i = l + 1; i > 1 && (i&1) == 0; i >>= 1);
      ASSERT(i == 1);
      ASSERT(n + 1 == (l + 1) / 2);
      ASSERT(m == l - n || m + 1 == l - n);
      return F_mul_trans (b, a, c, m + 1, l + 1, Fermat, tmp);
    }

  if ((double) n * (double) mpz_sizeinbase (modulus, 2) >= KS_TMUL_THRESHOLD)
    {
      if (TMulKS (b, n, a, m, c, l, modulus, 1)) /* Non-zero means error */
	return -1;
      return 0; /* We have no mul count so we return 0 */
    }

  return TToomCookMul (b, n, a, m, c, l, tmp);
}


unsigned int
TMulGen_space (unsigned int n, unsigned int m, unsigned int l)
{
    if (Fermat)
      return 2 * (l + 1);
    else
      return TToomCookMul_space (n, m, l);
}