mpfrcx-0.6.2/src/c_mul.c

/*
mpcx_mul

computes h = f * g

Copyright (C) 2009, 2011, 2012, 2013 Andreas Enge

This file is part of the MPFRCX Library.

The MPFRCX Library is free software; you can redistribute it and/or modify
it under the terms of the GNU Lesser General Public License as published by
the Free Software Foundation; either version 3 of the License, or (at your
option) any later version.

The MPFRCX Library is distributed in the hope that it will be useful, but
WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public
License for more details.

You should have received a copy of the GNU Lesser General Public License
along with the MPFRCX library; see the file COPYING.LESSER.  If not, write to
the Free Software Foundation, Inc., 59 Temple Place - Suite 330, Boston,
MA 02111-1307, USA.
*/

#include "mpfrcx-impl.h"

static void mpcx_array_mul_karatsuba (mpc_t* h, mpc_t* f, mpc_t* g,
   const int m, const int n, const int offm, const int offn, mpc_t* buff);
static void mpcx_array_mul_toomcook (mpc_t* h, mpc_t* f, mpc_t* g,
   const int m, const int n, const int offm, const int offn, mpc_t* buff);
static void mpcx_array_mul (mpc_t* h, mpc_t* f, mpc_t* g,
                            const int m, const int n);

/**************************************************************************/

void mpcx_mul (mpcx_ptr h, mpcx_srcptr f, mpcx_srcptr g) {
   int    overlap;
   mpcx_t h_local;
   int    f_monic, g_monic, i;

   if (f->deg == -1 || g->deg == -1) {
      h->deg = -1;
      return;
   }

   f_monic = (mpc_cmp_si (f->coeff [f->deg], 1) == 0);
   g_monic = (mpc_cmp_si (g->coeff [g->deg], 1) == 0);

   if (f_monic && f->deg == 0) {
      mpcx_set (h, g);
      return;
   }
   if (g_monic && g->deg == 0) {
      mpcx_set (h, f);
      return;
   }

   overlap = (h == f) || (h == g);
   if (overlap)
      mpcx_init (h_local, f->deg + g->deg + 1, h->prec);
   else
      mpcx_mv (h_local, h);
   h_local->deg = f->deg + g->deg;
   if (h_local->size < h_local->deg + 1)
      mpcx_realloc (h_local, h_local->deg + 1);

   if (f_monic && g_monic) {
      mpcx_array_mul (h_local->coeff, f->coeff, g->coeff, f->deg, g->deg);
      /* watch out: the coefficient of X^{f->deg+g->deg-1} has not been set */
      for (i = 0; i < f->deg - 1; i++)
         mpc_add (h_local->coeff [i + g->deg], h_local->coeff [i + g->deg],
            f->coeff [i], MPC_RNDNN);
      mpc_set (h_local->coeff [f->deg + g->deg - 1], f->coeff [f->deg - 1],
               MPC_RNDNN);
      for (i = 0; i < g->deg; i++)
         mpc_add (h_local->coeff [i + f->deg], h_local->coeff [i + f->deg],
            g->coeff [i], MPC_RNDNN);
      mpc_set_ui (h_local->coeff [h_local->deg], 1, MPC_RNDNN);
   }
   else if (f_monic) {
      mpcx_array_mul (h_local->coeff, f->coeff, g->coeff, f->deg, g->deg+1);
      for (i = 0; i < g->deg; i++)
         mpc_add (h_local->coeff [i + f->deg], h_local->coeff [i + f->deg],
            g->coeff [i], MPC_RNDNN);
      mpc_set (h_local->coeff [f->deg + g->deg], g->coeff [g->deg], MPC_RNDNN);
   }
   else if (g_monic) {
      mpcx_array_mul (h_local->coeff, f->coeff, g->coeff, f->deg+1, g->deg);
      for (i = 0; i < f->deg; i++)
         mpc_add (h_local->coeff [i + g->deg], h_local->coeff [i + g->deg],
            f->coeff [i], MPC_RNDNN);
      mpc_set (h_local->coeff [f->deg + g->deg], f->coeff [f->deg], MPC_RNDNN);
   }
   else
      mpcx_array_mul (h_local->coeff, f->coeff, g->coeff, f->deg+1, g->deg+1);

   if (overlap)
      mpcx_clear (h);
   mpcx_mv (h, h_local);
}

/**************************************************************************/
/*                                                                        */
/* internal multiplication functions working directly on arrays of        */
/* coefficients; together with the array, the number of coefficients is   */
/* passed to the functions, and possibly an offset, indicating that the   */
/* coefficient of degree i of the polynomial is in fact stored at the     */
/* position i*offset. This facilitates the comb-like decomposition of     */
/* polynomials during Karatsuba and Toom-Cook multiplication.             */
/*                                                                        */
/**************************************************************************/

static void mpcx_array_mul_karatsuba (mpc_t* h, mpc_t* f, mpc_t* g,
   const int m, const int n, const int offm, const int offn, mpc_t* buff) {
   /* f is a polynomial of degree m-1, g a polynomial of degree n-1,      */
   /* represented by an array of coefficients. The coefficient of degree  */
   /* i of f is stored at the index i*offm, similarly for g. h is         */
   /* replaced by f times g, computed by Karatsuba multiplication, and    */
   /* using an offset of 1. In practice by recursion, the offsets will    */
   /* always be powers of 2.                                              */
   /* buff must point to a temporary space of m+n+1 coefficients, which   */
   /* means 2 coefficients more than the size of the result.              */
   /* h must be different from f and g and contain sufficiently much      */
   /* space to hold all the coefficients.                                 */

   int   i;
   mpc_t *f_local, *g_local, *h_local, *buff_local, *tmp;
   int   m_local, n_local;
   if (m == 1)
      for (i = 0; i < n; i++) {
         mpc_mul (h [0], g [0], f [0], MPC_RNDNN);
         h++;
         g += offn;
      }
   else if (n == 1)
      for (i = 0; i < m; i++) {
         mpc_mul (h [0], f [0], g [0], MPC_RNDNN);
         h++;
         f += offm;
      }
   else {
      /* recursion */
      /* Write f = f_0 (X^2) + X f_1 (X^2), g = g_0 (X^2) + X g_1 (X^2).  */
      /* copy f_0 + f_1 and g_0 + g_1 into the buffer                     */
      buff_local = buff;
      tmp = f;
      f_local = buff_local;
      m_local = m/2;
      for (i = 0; i < m_local; i++) {
         mpc_set (*buff_local, *tmp, MPC_RNDNN);
         tmp += offm;
         mpc_add (*buff_local, *buff_local, *tmp, MPC_RNDNN);
         tmp += offm;
         buff_local++;
      }
      if (m % 2 != 0) {
         m_local++;
         mpc_set (*buff_local, *tmp, MPC_RNDNN);
         buff_local++;
      }
      tmp = g;
      g_local = buff_local;
      n_local = n/2;
      for (i = 0; i < n_local; i++) {
         mpc_set (*buff_local, *tmp, MPC_RNDNN);
         tmp += offn;
         mpc_add (*buff_local, *buff_local, *tmp, MPC_RNDNN);
         tmp += offn;
         buff_local++;
      }
      if (n % 2 != 0) {
         n_local++;
         mpc_set (*buff_local, *tmp, MPC_RNDNN);
         buff_local++;
      }
      /* multiply them together into the following piece of the buffer,   */
      /* using the result as buffer                                       */
      mpcx_array_mul_karatsuba (buff_local, f_local, g_local, m_local,
         n_local, 1, 1, h);
      /* buff_local contains ((f_0 + f_1)*(g_0 + g1)) (X^2); this must b  */
      /* copied into h, multiplied by X                                   */
      tmp = h + 1;
      for (i = 0; i < m_local + n_local - 1; i++) {
         mpc_set (*tmp, *buff_local, MPC_RNDNN);
         tmp += 2;
         buff_local++;
      }

      /* multiply f0 and g0 into the beginning of the buffer, using the   */
      /* remaining piece of buffer                                        */
      /* m_local and n_local remain as before.                            */
      mpcx_array_mul_karatsuba (buff, f, g, m_local, n_local,
         2*offm, 2*offn, buff + m_local + n_local - 1);
      /* buff contains (f_0*g_0) (X^2); this must be put into h; and      */
      /* subtracted, multiplied by X                                      */
      tmp = h;
      h_local = buff;
      for (i = 0; i < m_local + n_local - 1; i++) {
         mpc_set (*tmp, *h_local, MPC_RNDNN);
         tmp++;
         mpc_sub (*tmp, *tmp, *h_local, MPC_RNDNN);
         tmp++;
         h_local++;
      }
      /* set the remaining even coefficient to 0 */
      if (m % 2 == 0 && n % 2 == 0)
         mpc_set_ui (h [2 * (m_local + n_local - 1)], 0, MPC_RNDNN);

      /* multiply f1 and g1 into the beginning of the buffer, using the   */
      /* remaining piece of buffer                                        */
      m_local = m/2;
      n_local = n/2;
      mpcx_array_mul_karatsuba (buff, f+offm, g+offn, m_local, n_local,
         2*offm, 2*offn, buff + m_local + n_local - 1);
      /* buff contains (f_1*g_1) (X^2); this must be added to h,          */
      /* multiplied by X^2; and subtracted, multiplied by X               */
      tmp = h + 1;
      h_local = buff;
      for (i = 0; i < m_local + n_local - 1; i++) {
         mpc_sub (*tmp, *tmp, *h_local, MPC_RNDNN);
         tmp++;
         mpc_add (*tmp, *tmp, *h_local, MPC_RNDNN);
         tmp++;
         h_local++;
      }
   }
}

/**************************************************************************/

static void mpcx_array_mul_toomcook (mpc_t* h, mpc_t* f, mpc_t* g,
   const int m, const int n, const int offm, const int offn, mpc_t* buff) {
   /* f is a polynomial of degree m-1, g a polynomial of degree n-1,      */
   /* represented by an array of coefficients. The coefficient of degree  */
   /* i of f is stored at the index i*offm, similarly for g. h is         */
   /* replaced by f times g, computed by Toom-Cook 3-way multiplication,  */
   /* and using an offset of 1. In practice by recursion, the offsets     */
   /* will always be powers of 2.                                         */
   /* buff must point to a sufficiently large temporary space;            */
   /* m + n + 117 coefficients are enough for m, n < 2^64.                */
   /* h must be different from f and g and contain sufficiently much      */
   /* space to hold all the coefficients.                                 */

   int    i;
   mpc_t *f_local, *g_local, *h_local, *buff_local, *tmp, tmpc;
   int    m_local, n_local, min1, min2, min3;

   if (m == 1)
      for (i = 0; i < n; i++) {
         mpc_mul (h [0], g [0], f [0], MPC_RNDNN);
         h++;
         g += offn;
      }
   else if (n == 1)
      for (i = 0; i < m; i++) {
         mpc_mul (h [0], f [0], g [0], MPC_RNDNN);
         h++;
         f += offm;
      }
   else if (m == 2 || n == 2 || m == 4 || n == 4)
      mpcx_array_mul_karatsuba (h, f, g, m, n, offm, offn, buff);
   else {
      /* recursion */
      mpc_init2 (tmpc, mpc_get_prec (h [0]));

      /* Write f = f_0 (X^3) + f_1 (X^3) X + f_2 (X^3) X^2, and likewise g */
      /* Copy f_0 - f_1 + f_2 and g_0 - g_1 + g_2 into the buffer          */
      buff_local = buff;
      tmp = f;
      f_local = buff_local;
      m_local = m/3;
      for (i = 0; i < m_local; i++) {
         mpc_set (*buff_local, *tmp, MPC_RNDNN);
         tmp += offm;
         mpc_sub (*buff_local, *buff_local, *tmp, MPC_RNDNN);
         tmp += offm;
         mpc_add (*buff_local, *buff_local, *tmp, MPC_RNDNN);
         tmp += offm;
         buff_local++;
      }
      if (m % 3 == 1) {
         m_local++;
         mpc_set (*buff_local, *tmp, MPC_RNDNN);
         buff_local++;
      }
      else if (m % 3 == 2) {
         m_local++;
         mpc_set (*buff_local, *tmp, MPC_RNDNN);
         tmp+= offm;
         mpc_sub (*buff_local, *buff_local, *tmp, MPC_RNDNN);
         buff_local++;
      }
      tmp = g;
      g_local = buff_local;
      n_local = n/3;
      for (i = 0; i < n_local; i++) {
         mpc_set (*buff_local, *tmp, MPC_RNDNN);
         tmp += offn;
         mpc_sub (*buff_local, *buff_local, *tmp, MPC_RNDNN);
         tmp += offn;
         mpc_add (*buff_local, *buff_local, *tmp, MPC_RNDNN);
         tmp += offn;
         buff_local++;
      }
      if (n % 3 == 1) {
         n_local++;
         mpc_set (*buff_local, *tmp, MPC_RNDNN);
         buff_local++;
      }
      else if (n % 3 == 2) {
         n_local++;
         mpc_set (*buff_local, *tmp, MPC_RNDNN);
         tmp+= offn;
         mpc_sub (*buff_local, *buff_local, *tmp, MPC_RNDNN);
         buff_local++;
      }
      /* multiply them together into the following piece of the buffer,   */
      /* using the remaining piece of buffer                              */
      h_local = buff_local;
      buff_local += m_local + n_local - 1;
      mpcx_array_mul_toomcook (h_local, f_local, g_local, m_local,
         n_local, 1, 1, buff_local);
      /* compute mini, the minimal value such that X*(X^3)^i still occurs */
      /* in the result                                                    */
      min1 = (  (m+2) / 3 + (n+2) / 3 - 2 < (m + n - 3) / 3
              ? (m+2) / 3 + (n+2) / 3 - 2 : (m + n - 3) / 3);
      min2 = (  (m+2) / 3 + (n+2) / 3 - 2 < (m + n - 4) / 3
              ? (m+2) / 3 + (n+2) / 3 - 2 : (m + n - 4) / 3);
      min3 = (  (m+2) / 3 + (n+2) / 3 - 2 < (m + n - 5) / 3
              ? (m+2) / 3 + (n+2) / 3 - 2 : (m + n - 5) / 3);
      /* h_local contains the value in -1; it must be multiplied by       */
      /* -1/3 X + 1/2 X^2 - 1/6 X^3 and put into the result.              */
      mpc_set_ui (h [0], 0, MPC_RNDNN);
      mpc_set_ui (h [m+n-2], 0, MPC_RNDNN);
      tmp = h + 2;
      buff_local = h_local;
      for (i = 0; i <= min2; i++) {
         mpc_div_2ui (*tmp, *buff_local, 1, MPC_RNDNN);
         tmp += 3;
         buff_local++;
      }
      tmp = h + 1;
      buff_local = h_local;
      for (i = 0; i <= min1; i++) {
         mpc_div_ui (*tmp, *buff_local, 3, MPC_RNDNN);
         mpc_neg (*tmp, *tmp, MPC_RNDNN);
         tmp += 3;
         buff_local++;
      }
      tmp = h + 3;
      for (i = 0; i <= min3; i++) {
         mpc_div_2ui (*tmp, *(tmp-2), 1, MPC_RNDNN);
         tmp += 3;
      }

      /* copy f_0 + f_1 + f_2 and g_0 + g_1 + g_2 into the buffer, by     */
      /* adding 2*f_1 and 2*g_1 to what is already there                  */
      buff_local = f_local;
      tmp = f + offm;
      for (i = 0; i < (m+1) / 3; i++) {
         mpc_mul_2ui (tmpc, *tmp, 1, MPC_RNDNN);
         mpc_add (*buff_local, *buff_local, tmpc, MPC_RNDNN);
         tmp += 3*offm;
         buff_local++;
      }
      buff_local = g_local;
      tmp = g + offm;
      for (i = 0; i < (n+1) / 3; i++) {
         mpc_mul_2ui (tmpc, *tmp, 1, MPC_RNDNN);
         mpc_add (*buff_local, *buff_local, tmpc, MPC_RNDNN);
         tmp += 3*offm;
         buff_local++;
      }
      /* multiply them together into the following piece of the buffer,   */
      /* using the remaining piece of buffer                              */
      h_local = buff + m_local + n_local;
      buff_local = h_local + m_local + n_local - 1;
      mpcx_array_mul_toomcook (h_local, f_local, g_local, m_local,
         n_local, 1, 1, buff_local);
      /* h_local contains the value in 1; it must be multiplied by        */
      /* X + 1/2 X^2 - 1/2 X^3 and added to the result.                   */
      tmp = h + 1;
      buff_local = h_local;
      for (i = 0; i <= min1; i++) {
         mpc_add (*tmp, *tmp, *buff_local, MPC_RNDNN);
         tmp += 3;
         buff_local++;
      }
      /* divide h_local by 2 in place */
      buff_local = h_local;
      for (i = 0; i < m_local + n_local - 1; i++) {
         mpc_div_2ui (*buff_local, *buff_local, 1, MPC_RNDNN);
         buff_local++;
      }
      tmp = h + 2;
      buff_local = h_local;
      for (i = 0; i <= min2; i++) {
         mpc_add (*tmp, *tmp, *buff_local, MPC_RNDNN);
         tmp += 3;
         buff_local++;
      }
      tmp = h + 3;
      buff_local = h_local;
      for (i = 0; i <= min3; i++) {
         mpc_sub (*tmp, *tmp, *buff_local, MPC_RNDNN);
         tmp += 3;
         buff_local++;
      }

      /* copy f_0 + 2*f_1 + 4*f_2 and g_0 + 2*g_1 + 4*g_2 into the buffer */
      buff_local = buff;
      tmp = f;
      for (i = 0; i < m/3; i++) {
         mpc_set (*buff_local, *tmp, MPC_RNDNN);
         tmp += offm;
         mpc_mul_2ui (tmpc, *tmp, 1, MPC_RNDNN);
         mpc_add (*buff_local, *buff_local, tmpc, MPC_RNDNN);
         tmp += offm;
         mpc_mul_2ui (tmpc, *tmp, 2, MPC_RNDNN);
         mpc_add (*buff_local, *buff_local, tmpc, MPC_RNDNN);
         tmp += offm;
         buff_local++;
      }
      if (m % 3 == 1) {
         mpc_set (*buff_local, *tmp, MPC_RNDNN);
         buff_local++;
      }
      else if (m % 3 == 2) {
         mpc_set (*buff_local, *tmp, MPC_RNDNN);
         tmp+= offm;
         mpc_mul_2ui (tmpc, *tmp, 1, MPC_RNDNN);
         mpc_add (*buff_local, *buff_local, tmpc, MPC_RNDNN);
         buff_local++;
      }
      tmp = g;
      for (i = 0; i < n/3; i++) {
         mpc_set (*buff_local, *tmp, MPC_RNDNN);
         tmp += offn;
         mpc_mul_2ui (tmpc, *tmp, 1, MPC_RNDNN);
         mpc_add (*buff_local, *buff_local, tmpc, MPC_RNDNN);
         tmp += offn;
         mpc_mul_2ui (tmpc, *tmp, 2, MPC_RNDNN);
         mpc_add (*buff_local, *buff_local, tmpc, MPC_RNDNN);
         tmp += offn;
         buff_local++;
      }
      if (n % 3 == 1) {
         mpc_set (*buff_local, *tmp, MPC_RNDNN);
         buff_local++;
      }
      else if (n % 3 == 2) {
         mpc_set (*buff_local, *tmp, MPC_RNDNN);
         tmp+= offn;
         mpc_mul_2ui (tmpc, *tmp, 1, MPC_RNDNN);
         mpc_add (*buff_local, *buff_local, tmpc, MPC_RNDNN);
         buff_local++;
      }
      /* multiply them together into the following piece of the buffer,   */
      /* using the remaining piece of buffer                              */
      h_local = buff_local;
      buff_local += m_local + n_local - 1;
      mpcx_array_mul_toomcook (h_local, f_local, g_local, m_local,
         n_local, 1, 1, buff_local);
      /* h_local contains the value in 2; it must be multiplied by        */
      /* -1/6 X + 1/6 X^3 and added to the result.                        */
      /* divide h_local by 6 in place */
      buff_local = h_local;
      for (i = 0; i < m_local + n_local - 1; i++) {
         mpc_div_2ui (*buff_local, *buff_local, 1, MPC_RNDNN);
         mpc_div_ui (*buff_local, *buff_local, 3, MPC_RNDNN);
         buff_local++;
      }
      tmp = h + 1;
      buff_local = h_local;
      for (i = 0; i <= min1; i++) {
         mpc_sub (*tmp, *tmp, *buff_local, MPC_RNDNN);
         tmp += 3;
         buff_local++;
      }
      tmp = h + 3;
      buff_local = h_local;
      for (i = 0; i <= min3; i++) {
         mpc_add (*tmp, *tmp, *buff_local, MPC_RNDNN);
         tmp += 3;
         buff_local++;
      }

      /* multiply f0 and g0 into the start of the buffer, using the       */
      /* remaining buffer piece                                           */
      h_local = buff;
      buff_local = buff + m_local + n_local - 1;
      mpcx_array_mul_toomcook (h_local, f, g, m_local, n_local, 3*offm,
         3*offn, buff_local);
      /* h_local contains the value in 0; it must be multiplied by        */
      /* 1 - 1/2 X - X^2 + 1/2 X^3 and added to the result.               */
      tmp = h;
      buff_local = h_local;
      for (i = 0; i < m_local + n_local - 1; i++) {
         mpc_add (*tmp, *tmp, *buff_local, MPC_RNDNN);
         tmp += 3;
         buff_local++;
      }
      tmp = h + 2;
      buff_local = h_local;
      for (i = 0; i <= min2; i++) {
         mpc_sub (*tmp, *tmp, *buff_local, MPC_RNDNN);
         tmp += 3;
         buff_local++;
      }
      /* divide h_local by 2 in place */
      buff_local = h_local;
      for (i = 0; i < m_local + n_local - 1; i++) {
         mpc_div_2ui (*buff_local, *buff_local, 1, MPC_RNDNN);
         buff_local++;
      }
      tmp = h + 1;
      buff_local = h_local;
      for (i = 0; i <= min1; i++) {
         mpc_sub (*tmp, *tmp, *buff_local, MPC_RNDNN);
         tmp += 3;
         buff_local++;
      }
      tmp = h + 3;
      buff_local = h_local;
      for (i = 0; i <= min3; i++) {
         mpc_add (*tmp, *tmp, *buff_local, MPC_RNDNN);
         tmp += 3;
         buff_local++;
      }

      /* multiply f2 and g2 into the start of the buffer, using the       */
      /* remaining buffer piece                                           */
      /* Instead of ceil (m/3) and ceil (n/3), these now have floor (m/3) */
      /* and floor (n/3) coefficients.                                    */
      m_local = m/3;
      n_local = n/3;
      h_local = buff;
      buff_local = buff + m_local + n_local - 1;
      mpcx_array_mul_toomcook (h_local, f+2*offm, g+2*offn, m_local,
         n_local, 3*offm, 3*offn, buff_local);
      /* h_local contains the value in infinity; it must be multiplied by */
      /* 2 X - X^2 - 2 X^3 + X^4 and added to the result.                 */
      /* Here, even X^4*h_local does not exceed the result.               */
      tmp = h + 4;
      buff_local = h_local;
      for (i = 0; i < m_local + n_local - 1; i++) {
         mpc_add (*tmp, *tmp, *buff_local, MPC_RNDNN);
         tmp += 3;
         buff_local++;
      }
      tmp = h + 2;
      buff_local = h_local;
      for (i = 0; i < m_local + n_local - 1; i++) {
         mpc_sub (*tmp, *tmp, *buff_local, MPC_RNDNN);
         tmp += 3;
         buff_local++;
      }
      /* multiply h_local by 2 in place */
      buff_local = h_local;
      for (i = 0; i < m_local + n_local - 1; i++) {
         mpc_mul_2ui (*buff_local, *buff_local, 1, MPC_RNDNN);
         buff_local++;
      }
      tmp = h + 1;
      buff_local = h_local;
      for (i = 0; i < m_local + n_local - 1; i++) {
         mpc_add (*tmp, *tmp, *buff_local, MPC_RNDNN);
         tmp += 3;
         buff_local++;
      }
      tmp = h + 3;
      buff_local = h_local;
      for (i = 0; i < m_local + n_local - 1; i++) {
         mpc_sub (*tmp, *tmp, *buff_local, MPC_RNDNN);
         tmp += 3;
         buff_local++;
      }

      mpc_clear (tmpc);
   }
}

/**************************************************************************/

static void mpcx_array_mul (mpc_t* h, mpc_t* f, mpc_t* g,
                            const int m, const int n)
   /* f is a polynomial of degree m-1, g a polynomial of degree n-1,      */
   /* represented by an array of coefficients. h is replaced by f times g.*/
   /* h is expected to be different from f and g and must contain         */
   /* sufficiently much space to hold all the coefficients.               */

{
   const int min = (m < n ? m : n), max = (m < n ? n : m);

   if (min >= MPCX_FFT_THRESHOLD && min < MPCX_NOFFT_THRESHOLD)
      if (min != max) {
         /* Check whether splitting the larger polynomial into smaller pieces
            is useful. We target an FFT of order the smallest power of 2
            above 2*min. */
         int length = 1, pieces;
         while (length < min)
            length *= 2;
         length *= 2;
         length = length + 1 - min;
         pieces = (max + length - 1) / length;
         length = (max + pieces - 1) / pieces;

         if (pieces > 1) {
            int i, j;
            mpc_t *minpol = (m < n ? f : g);
            mpc_t *maxpol = (m < n ? g : f);

            int current = length;
            mpcx_t buffer;
            int bufsize = min + length - 1;
            mpcx_init (buffer, bufsize, mpc_get_prec (h [0]));

            /* clear out the result */
            for (i = 0; i < min + max - 1; i++)
               mpc_set_ui (h [i], 0, MPC_RNDNN);

            /* multiply minpol by pieces of maxpol and add to result */
            for (i = 0; i < pieces; i++) {
               /* The last piece may be shorter than length. */
               if (i == pieces - 1)
                  current = max - (pieces - 1) * length;
               mpcx_array_mul_fft (buffer->coeff, minpol, maxpol + i * length,
                                 min, current);
               for (j = 0; j < min + current - 1; j++)
                  mpc_add (h [i * length + j], h [i * length + j],
                           buffer->coeff [j], MPC_RNDNN);
            }

            mpcx_clear (buffer);
         }
         else
            mpcx_array_mul_fft (h, f, g, m, n);
      }
      else
         mpcx_array_mul_fft (h, f, g, m, n);
   else {
      mpcx_t buffer;
      int bufsize;
      /* For Karatsuba, we would need m + n + 1 coefficients. */
/*
      mpcx_init (buffer, m + n + 1, mpc_get_prec (h [0]->re));
*/
      /* For Toom-Cook, we may need up to
         m + n + 3 floor (min (log_3 (m-1), log_3 (n-1)) - 1). */
      bufsize = m + n +
                (min <= 82 ? 9 :
                (min <= 1162261468 ? 53 : 117));
      mpcx_init (buffer, bufsize, mpc_get_prec (h [0]));
      mpcx_array_mul_toomcook (h, f, g, m, n, 1, 1, buffer->coeff);
      mpcx_clear (buffer);
   }
}