1 /*
2 mpcx_mul
3 
4 computes h = f * g
5 
6 Copyright (C) 2009, 2011, 2012, 2013 Andreas Enge
7 
8 This file is part of the MPFRCX Library.
9 
10 The MPFRCX Library is free software; you can redistribute it and/or modify
11 it under the terms of the GNU Lesser General Public License as published by
12 the Free Software Foundation; either version 3 of the License, or (at your
13 option) any later version.
14 
15 The MPFRCX Library is distributed in the hope that it will be useful, but
16 WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
17 or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public
18 License for more details.
19 
20 You should have received a copy of the GNU Lesser General Public License
21 along with the MPFRCX library; see the file COPYING.LESSER.  If not, write to
22 the Free Software Foundation, Inc., 59 Temple Place - Suite 330, Boston,
23 MA 02111-1307, USA.
24 */
25 
26 #include "mpfrcx-impl.h"
27 
28 static void mpcx_array_mul_karatsuba (mpc_t* h, mpc_t* f, mpc_t* g,
29    const int m, const int n, const int offm, const int offn, mpc_t* buff);
30 static void mpcx_array_mul_toomcook (mpc_t* h, mpc_t* f, mpc_t* g,
31    const int m, const int n, const int offm, const int offn, mpc_t* buff);
32 static void mpcx_array_mul (mpc_t* h, mpc_t* f, mpc_t* g,
33                             const int m, const int n);
34 
35 /**************************************************************************/
36 
mpcx_mul(mpcx_ptr h,mpcx_srcptr f,mpcx_srcptr g)37 void mpcx_mul (mpcx_ptr h, mpcx_srcptr f, mpcx_srcptr g) {
38    int    overlap;
39    mpcx_t h_local;
40    int    f_monic, g_monic, i;
41 
42    if (f->deg == -1 || g->deg == -1) {
43       h->deg = -1;
44       return;
45    }
46 
47    f_monic = (mpc_cmp_si (f->coeff [f->deg], 1) == 0);
48    g_monic = (mpc_cmp_si (g->coeff [g->deg], 1) == 0);
49 
50    if (f_monic && f->deg == 0) {
51       mpcx_set (h, g);
52       return;
53    }
54    if (g_monic && g->deg == 0) {
55       mpcx_set (h, f);
56       return;
57    }
58 
59    overlap = (h == f) || (h == g);
60    if (overlap)
61       mpcx_init (h_local, f->deg + g->deg + 1, h->prec);
62    else
63       mpcx_mv (h_local, h);
64    h_local->deg = f->deg + g->deg;
65    if (h_local->size < h_local->deg + 1)
66       mpcx_realloc (h_local, h_local->deg + 1);
67 
68    if (f_monic && g_monic) {
69       mpcx_array_mul (h_local->coeff, f->coeff, g->coeff, f->deg, g->deg);
70       /* watch out: the coefficient of X^{f->deg+g->deg-1} has not been set */
71       for (i = 0; i < f->deg - 1; i++)
72          mpc_add (h_local->coeff [i + g->deg], h_local->coeff [i + g->deg],
73             f->coeff [i], MPC_RNDNN);
74       mpc_set (h_local->coeff [f->deg + g->deg - 1], f->coeff [f->deg - 1],
75                MPC_RNDNN);
76       for (i = 0; i < g->deg; i++)
77          mpc_add (h_local->coeff [i + f->deg], h_local->coeff [i + f->deg],
78             g->coeff [i], MPC_RNDNN);
79       mpc_set_ui (h_local->coeff [h_local->deg], 1, MPC_RNDNN);
80    }
81    else if (f_monic) {
82       mpcx_array_mul (h_local->coeff, f->coeff, g->coeff, f->deg, g->deg+1);
83       for (i = 0; i < g->deg; i++)
84          mpc_add (h_local->coeff [i + f->deg], h_local->coeff [i + f->deg],
85             g->coeff [i], MPC_RNDNN);
86       mpc_set (h_local->coeff [f->deg + g->deg], g->coeff [g->deg], MPC_RNDNN);
87    }
88    else if (g_monic) {
89       mpcx_array_mul (h_local->coeff, f->coeff, g->coeff, f->deg+1, g->deg);
90       for (i = 0; i < f->deg; i++)
91          mpc_add (h_local->coeff [i + g->deg], h_local->coeff [i + g->deg],
92             f->coeff [i], MPC_RNDNN);
93       mpc_set (h_local->coeff [f->deg + g->deg], f->coeff [f->deg], MPC_RNDNN);
94    }
95    else
96       mpcx_array_mul (h_local->coeff, f->coeff, g->coeff, f->deg+1, g->deg+1);
97 
98    if (overlap)
99       mpcx_clear (h);
100    mpcx_mv (h, h_local);
101 }
102 
103 /**************************************************************************/
104 /*                                                                        */
105 /* internal multiplication functions working directly on arrays of        */
106 /* coefficients; together with the array, the number of coefficients is   */
107 /* passed to the functions, and possibly an offset, indicating that the   */
108 /* coefficient of degree i of the polynomial is in fact stored at the     */
109 /* position i*offset. This facilitates the comb-like decomposition of     */
110 /* polynomials during Karatsuba and Toom-Cook multiplication.             */
111 /*                                                                        */
112 /**************************************************************************/
113 
mpcx_array_mul_karatsuba(mpc_t * h,mpc_t * f,mpc_t * g,const int m,const int n,const int offm,const int offn,mpc_t * buff)114 static void mpcx_array_mul_karatsuba (mpc_t* h, mpc_t* f, mpc_t* g,
115    const int m, const int n, const int offm, const int offn, mpc_t* buff) {
116    /* f is a polynomial of degree m-1, g a polynomial of degree n-1,      */
117    /* represented by an array of coefficients. The coefficient of degree  */
118    /* i of f is stored at the index i*offm, similarly for g. h is         */
119    /* replaced by f times g, computed by Karatsuba multiplication, and    */
120    /* using an offset of 1. In practice by recursion, the offsets will    */
121    /* always be powers of 2.                                              */
122    /* buff must point to a temporary space of m+n+1 coefficients, which   */
123    /* means 2 coefficients more than the size of the result.              */
124    /* h must be different from f and g and contain sufficiently much      */
125    /* space to hold all the coefficients.                                 */
126 
127    int   i;
128    mpc_t *f_local, *g_local, *h_local, *buff_local, *tmp;
129    int   m_local, n_local;
130    if (m == 1)
131       for (i = 0; i < n; i++) {
132          mpc_mul (h [0], g [0], f [0], MPC_RNDNN);
133          h++;
134          g += offn;
135       }
136    else if (n == 1)
137       for (i = 0; i < m; i++) {
138          mpc_mul (h [0], f [0], g [0], MPC_RNDNN);
139          h++;
140          f += offm;
141       }
142    else {
143       /* recursion */
144       /* Write f = f_0 (X^2) + X f_1 (X^2), g = g_0 (X^2) + X g_1 (X^2).  */
145       /* copy f_0 + f_1 and g_0 + g_1 into the buffer                     */
146       buff_local = buff;
147       tmp = f;
148       f_local = buff_local;
149       m_local = m/2;
150       for (i = 0; i < m_local; i++) {
151          mpc_set (*buff_local, *tmp, MPC_RNDNN);
152          tmp += offm;
153          mpc_add (*buff_local, *buff_local, *tmp, MPC_RNDNN);
154          tmp += offm;
155          buff_local++;
156       }
157       if (m % 2 != 0) {
158          m_local++;
159          mpc_set (*buff_local, *tmp, MPC_RNDNN);
160          buff_local++;
161       }
162       tmp = g;
163       g_local = buff_local;
164       n_local = n/2;
165       for (i = 0; i < n_local; i++) {
166          mpc_set (*buff_local, *tmp, MPC_RNDNN);
167          tmp += offn;
168          mpc_add (*buff_local, *buff_local, *tmp, MPC_RNDNN);
169          tmp += offn;
170          buff_local++;
171       }
172       if (n % 2 != 0) {
173          n_local++;
174          mpc_set (*buff_local, *tmp, MPC_RNDNN);
175          buff_local++;
176       }
177       /* multiply them together into the following piece of the buffer,   */
178       /* using the result as buffer                                       */
179       mpcx_array_mul_karatsuba (buff_local, f_local, g_local, m_local,
180          n_local, 1, 1, h);
181       /* buff_local contains ((f_0 + f_1)*(g_0 + g1)) (X^2); this must b  */
182       /* copied into h, multiplied by X                                   */
183       tmp = h + 1;
184       for (i = 0; i < m_local + n_local - 1; i++) {
185          mpc_set (*tmp, *buff_local, MPC_RNDNN);
186          tmp += 2;
187          buff_local++;
188       }
189 
190       /* multiply f0 and g0 into the beginning of the buffer, using the   */
191       /* remaining piece of buffer                                        */
192       /* m_local and n_local remain as before.                            */
193       mpcx_array_mul_karatsuba (buff, f, g, m_local, n_local,
194          2*offm, 2*offn, buff + m_local + n_local - 1);
195       /* buff contains (f_0*g_0) (X^2); this must be put into h; and      */
196       /* subtracted, multiplied by X                                      */
197       tmp = h;
198       h_local = buff;
199       for (i = 0; i < m_local + n_local - 1; i++) {
200          mpc_set (*tmp, *h_local, MPC_RNDNN);
201          tmp++;
202          mpc_sub (*tmp, *tmp, *h_local, MPC_RNDNN);
203          tmp++;
204          h_local++;
205       }
206       /* set the remaining even coefficient to 0 */
207       if (m % 2 == 0 && n % 2 == 0)
208          mpc_set_ui (h [2 * (m_local + n_local - 1)], 0, MPC_RNDNN);
209 
210       /* multiply f1 and g1 into the beginning of the buffer, using the   */
211       /* remaining piece of buffer                                        */
212       m_local = m/2;
213       n_local = n/2;
214       mpcx_array_mul_karatsuba (buff, f+offm, g+offn, m_local, n_local,
215          2*offm, 2*offn, buff + m_local + n_local - 1);
216       /* buff contains (f_1*g_1) (X^2); this must be added to h,          */
217       /* multiplied by X^2; and subtracted, multiplied by X               */
218       tmp = h + 1;
219       h_local = buff;
220       for (i = 0; i < m_local + n_local - 1; i++) {
221          mpc_sub (*tmp, *tmp, *h_local, MPC_RNDNN);
222          tmp++;
223          mpc_add (*tmp, *tmp, *h_local, MPC_RNDNN);
224          tmp++;
225          h_local++;
226       }
227    }
228 }
229 
230 /**************************************************************************/
231 
mpcx_array_mul_toomcook(mpc_t * h,mpc_t * f,mpc_t * g,const int m,const int n,const int offm,const int offn,mpc_t * buff)232 static void mpcx_array_mul_toomcook (mpc_t* h, mpc_t* f, mpc_t* g,
233    const int m, const int n, const int offm, const int offn, mpc_t* buff) {
234    /* f is a polynomial of degree m-1, g a polynomial of degree n-1,      */
235    /* represented by an array of coefficients. The coefficient of degree  */
236    /* i of f is stored at the index i*offm, similarly for g. h is         */
237    /* replaced by f times g, computed by Toom-Cook 3-way multiplication,  */
238    /* and using an offset of 1. In practice by recursion, the offsets     */
239    /* will always be powers of 2.                                         */
240    /* buff must point to a sufficiently large temporary space;            */
241    /* m + n + 117 coefficients are enough for m, n < 2^64.                */
242    /* h must be different from f and g and contain sufficiently much      */
243    /* space to hold all the coefficients.                                 */
244 
245    int    i;
246    mpc_t *f_local, *g_local, *h_local, *buff_local, *tmp, tmpc;
247    int    m_local, n_local, min1, min2, min3;
248 
249    if (m == 1)
250       for (i = 0; i < n; i++) {
251          mpc_mul (h [0], g [0], f [0], MPC_RNDNN);
252          h++;
253          g += offn;
254       }
255    else if (n == 1)
256       for (i = 0; i < m; i++) {
257          mpc_mul (h [0], f [0], g [0], MPC_RNDNN);
258          h++;
259          f += offm;
260       }
261    else if (m == 2 || n == 2 || m == 4 || n == 4)
262       mpcx_array_mul_karatsuba (h, f, g, m, n, offm, offn, buff);
263    else {
264       /* recursion */
265       mpc_init2 (tmpc, mpc_get_prec (h [0]));
266 
267       /* Write f = f_0 (X^3) + f_1 (X^3) X + f_2 (X^3) X^2, and likewise g */
268       /* Copy f_0 - f_1 + f_2 and g_0 - g_1 + g_2 into the buffer          */
269       buff_local = buff;
270       tmp = f;
271       f_local = buff_local;
272       m_local = m/3;
273       for (i = 0; i < m_local; i++) {
274          mpc_set (*buff_local, *tmp, MPC_RNDNN);
275          tmp += offm;
276          mpc_sub (*buff_local, *buff_local, *tmp, MPC_RNDNN);
277          tmp += offm;
278          mpc_add (*buff_local, *buff_local, *tmp, MPC_RNDNN);
279          tmp += offm;
280          buff_local++;
281       }
282       if (m % 3 == 1) {
283          m_local++;
284          mpc_set (*buff_local, *tmp, MPC_RNDNN);
285          buff_local++;
286       }
287       else if (m % 3 == 2) {
288          m_local++;
289          mpc_set (*buff_local, *tmp, MPC_RNDNN);
290          tmp+= offm;
291          mpc_sub (*buff_local, *buff_local, *tmp, MPC_RNDNN);
292          buff_local++;
293       }
294       tmp = g;
295       g_local = buff_local;
296       n_local = n/3;
297       for (i = 0; i < n_local; i++) {
298          mpc_set (*buff_local, *tmp, MPC_RNDNN);
299          tmp += offn;
300          mpc_sub (*buff_local, *buff_local, *tmp, MPC_RNDNN);
301          tmp += offn;
302          mpc_add (*buff_local, *buff_local, *tmp, MPC_RNDNN);
303          tmp += offn;
304          buff_local++;
305       }
306       if (n % 3 == 1) {
307          n_local++;
308          mpc_set (*buff_local, *tmp, MPC_RNDNN);
309          buff_local++;
310       }
311       else if (n % 3 == 2) {
312          n_local++;
313          mpc_set (*buff_local, *tmp, MPC_RNDNN);
314          tmp+= offn;
315          mpc_sub (*buff_local, *buff_local, *tmp, MPC_RNDNN);
316          buff_local++;
317       }
318       /* multiply them together into the following piece of the buffer,   */
319       /* using the remaining piece of buffer                              */
320       h_local = buff_local;
321       buff_local += m_local + n_local - 1;
322       mpcx_array_mul_toomcook (h_local, f_local, g_local, m_local,
323          n_local, 1, 1, buff_local);
324       /* compute mini, the minimal value such that X*(X^3)^i still occurs */
325       /* in the result                                                    */
326       min1 = (  (m+2) / 3 + (n+2) / 3 - 2 < (m + n - 3) / 3
327               ? (m+2) / 3 + (n+2) / 3 - 2 : (m + n - 3) / 3);
328       min2 = (  (m+2) / 3 + (n+2) / 3 - 2 < (m + n - 4) / 3
329               ? (m+2) / 3 + (n+2) / 3 - 2 : (m + n - 4) / 3);
330       min3 = (  (m+2) / 3 + (n+2) / 3 - 2 < (m + n - 5) / 3
331               ? (m+2) / 3 + (n+2) / 3 - 2 : (m + n - 5) / 3);
332       /* h_local contains the value in -1; it must be multiplied by       */
333       /* -1/3 X + 1/2 X^2 - 1/6 X^3 and put into the result.              */
334       mpc_set_ui (h [0], 0, MPC_RNDNN);
335       mpc_set_ui (h [m+n-2], 0, MPC_RNDNN);
336       tmp = h + 2;
337       buff_local = h_local;
338       for (i = 0; i <= min2; i++) {
339          mpc_div_2ui (*tmp, *buff_local, 1, MPC_RNDNN);
340          tmp += 3;
341          buff_local++;
342       }
343       tmp = h + 1;
344       buff_local = h_local;
345       for (i = 0; i <= min1; i++) {
346          mpc_div_ui (*tmp, *buff_local, 3, MPC_RNDNN);
347          mpc_neg (*tmp, *tmp, MPC_RNDNN);
348          tmp += 3;
349          buff_local++;
350       }
351       tmp = h + 3;
352       for (i = 0; i <= min3; i++) {
353          mpc_div_2ui (*tmp, *(tmp-2), 1, MPC_RNDNN);
354          tmp += 3;
355       }
356 
357       /* copy f_0 + f_1 + f_2 and g_0 + g_1 + g_2 into the buffer, by     */
358       /* adding 2*f_1 and 2*g_1 to what is already there                  */
359       buff_local = f_local;
360       tmp = f + offm;
361       for (i = 0; i < (m+1) / 3; i++) {
362          mpc_mul_2ui (tmpc, *tmp, 1, MPC_RNDNN);
363          mpc_add (*buff_local, *buff_local, tmpc, MPC_RNDNN);
364          tmp += 3*offm;
365          buff_local++;
366       }
367       buff_local = g_local;
368       tmp = g + offm;
369       for (i = 0; i < (n+1) / 3; i++) {
370          mpc_mul_2ui (tmpc, *tmp, 1, MPC_RNDNN);
371          mpc_add (*buff_local, *buff_local, tmpc, MPC_RNDNN);
372          tmp += 3*offm;
373          buff_local++;
374       }
375       /* multiply them together into the following piece of the buffer,   */
376       /* using the remaining piece of buffer                              */
377       h_local = buff + m_local + n_local;
378       buff_local = h_local + m_local + n_local - 1;
379       mpcx_array_mul_toomcook (h_local, f_local, g_local, m_local,
380          n_local, 1, 1, buff_local);
381       /* h_local contains the value in 1; it must be multiplied by        */
382       /* X + 1/2 X^2 - 1/2 X^3 and added to the result.                   */
383       tmp = h + 1;
384       buff_local = h_local;
385       for (i = 0; i <= min1; i++) {
386          mpc_add (*tmp, *tmp, *buff_local, MPC_RNDNN);
387          tmp += 3;
388          buff_local++;
389       }
390       /* divide h_local by 2 in place */
391       buff_local = h_local;
392       for (i = 0; i < m_local + n_local - 1; i++) {
393          mpc_div_2ui (*buff_local, *buff_local, 1, MPC_RNDNN);
394          buff_local++;
395       }
396       tmp = h + 2;
397       buff_local = h_local;
398       for (i = 0; i <= min2; i++) {
399          mpc_add (*tmp, *tmp, *buff_local, MPC_RNDNN);
400          tmp += 3;
401          buff_local++;
402       }
403       tmp = h + 3;
404       buff_local = h_local;
405       for (i = 0; i <= min3; i++) {
406          mpc_sub (*tmp, *tmp, *buff_local, MPC_RNDNN);
407          tmp += 3;
408          buff_local++;
409       }
410 
411       /* copy f_0 + 2*f_1 + 4*f_2 and g_0 + 2*g_1 + 4*g_2 into the buffer */
412       buff_local = buff;
413       tmp = f;
414       for (i = 0; i < m/3; i++) {
415          mpc_set (*buff_local, *tmp, MPC_RNDNN);
416          tmp += offm;
417          mpc_mul_2ui (tmpc, *tmp, 1, MPC_RNDNN);
418          mpc_add (*buff_local, *buff_local, tmpc, MPC_RNDNN);
419          tmp += offm;
420          mpc_mul_2ui (tmpc, *tmp, 2, MPC_RNDNN);
421          mpc_add (*buff_local, *buff_local, tmpc, MPC_RNDNN);
422          tmp += offm;
423          buff_local++;
424       }
425       if (m % 3 == 1) {
426          mpc_set (*buff_local, *tmp, MPC_RNDNN);
427          buff_local++;
428       }
429       else if (m % 3 == 2) {
430          mpc_set (*buff_local, *tmp, MPC_RNDNN);
431          tmp+= offm;
432          mpc_mul_2ui (tmpc, *tmp, 1, MPC_RNDNN);
433          mpc_add (*buff_local, *buff_local, tmpc, MPC_RNDNN);
434          buff_local++;
435       }
436       tmp = g;
437       for (i = 0; i < n/3; i++) {
438          mpc_set (*buff_local, *tmp, MPC_RNDNN);
439          tmp += offn;
440          mpc_mul_2ui (tmpc, *tmp, 1, MPC_RNDNN);
441          mpc_add (*buff_local, *buff_local, tmpc, MPC_RNDNN);
442          tmp += offn;
443          mpc_mul_2ui (tmpc, *tmp, 2, MPC_RNDNN);
444          mpc_add (*buff_local, *buff_local, tmpc, MPC_RNDNN);
445          tmp += offn;
446          buff_local++;
447       }
448       if (n % 3 == 1) {
449          mpc_set (*buff_local, *tmp, MPC_RNDNN);
450          buff_local++;
451       }
452       else if (n % 3 == 2) {
453          mpc_set (*buff_local, *tmp, MPC_RNDNN);
454          tmp+= offn;
455          mpc_mul_2ui (tmpc, *tmp, 1, MPC_RNDNN);
456          mpc_add (*buff_local, *buff_local, tmpc, MPC_RNDNN);
457          buff_local++;
458       }
459       /* multiply them together into the following piece of the buffer,   */
460       /* using the remaining piece of buffer                              */
461       h_local = buff_local;
462       buff_local += m_local + n_local - 1;
463       mpcx_array_mul_toomcook (h_local, f_local, g_local, m_local,
464          n_local, 1, 1, buff_local);
465       /* h_local contains the value in 2; it must be multiplied by        */
466       /* -1/6 X + 1/6 X^3 and added to the result.                        */
467       /* divide h_local by 6 in place */
468       buff_local = h_local;
469       for (i = 0; i < m_local + n_local - 1; i++) {
470          mpc_div_2ui (*buff_local, *buff_local, 1, MPC_RNDNN);
471          mpc_div_ui (*buff_local, *buff_local, 3, MPC_RNDNN);
472          buff_local++;
473       }
474       tmp = h + 1;
475       buff_local = h_local;
476       for (i = 0; i <= min1; i++) {
477          mpc_sub (*tmp, *tmp, *buff_local, MPC_RNDNN);
478          tmp += 3;
479          buff_local++;
480       }
481       tmp = h + 3;
482       buff_local = h_local;
483       for (i = 0; i <= min3; i++) {
484          mpc_add (*tmp, *tmp, *buff_local, MPC_RNDNN);
485          tmp += 3;
486          buff_local++;
487       }
488 
489       /* multiply f0 and g0 into the start of the buffer, using the       */
490       /* remaining buffer piece                                           */
491       h_local = buff;
492       buff_local = buff + m_local + n_local - 1;
493       mpcx_array_mul_toomcook (h_local, f, g, m_local, n_local, 3*offm,
494          3*offn, buff_local);
495       /* h_local contains the value in 0; it must be multiplied by        */
496       /* 1 - 1/2 X - X^2 + 1/2 X^3 and added to the result.               */
497       tmp = h;
498       buff_local = h_local;
499       for (i = 0; i < m_local + n_local - 1; i++) {
500          mpc_add (*tmp, *tmp, *buff_local, MPC_RNDNN);
501          tmp += 3;
502          buff_local++;
503       }
504       tmp = h + 2;
505       buff_local = h_local;
506       for (i = 0; i <= min2; i++) {
507          mpc_sub (*tmp, *tmp, *buff_local, MPC_RNDNN);
508          tmp += 3;
509          buff_local++;
510       }
511       /* divide h_local by 2 in place */
512       buff_local = h_local;
513       for (i = 0; i < m_local + n_local - 1; i++) {
514          mpc_div_2ui (*buff_local, *buff_local, 1, MPC_RNDNN);
515          buff_local++;
516       }
517       tmp = h + 1;
518       buff_local = h_local;
519       for (i = 0; i <= min1; i++) {
520          mpc_sub (*tmp, *tmp, *buff_local, MPC_RNDNN);
521          tmp += 3;
522          buff_local++;
523       }
524       tmp = h + 3;
525       buff_local = h_local;
526       for (i = 0; i <= min3; i++) {
527          mpc_add (*tmp, *tmp, *buff_local, MPC_RNDNN);
528          tmp += 3;
529          buff_local++;
530       }
531 
532       /* multiply f2 and g2 into the start of the buffer, using the       */
533       /* remaining buffer piece                                           */
534       /* Instead of ceil (m/3) and ceil (n/3), these now have floor (m/3) */
535       /* and floor (n/3) coefficients.                                    */
536       m_local = m/3;
537       n_local = n/3;
538       h_local = buff;
539       buff_local = buff + m_local + n_local - 1;
540       mpcx_array_mul_toomcook (h_local, f+2*offm, g+2*offn, m_local,
541          n_local, 3*offm, 3*offn, buff_local);
542       /* h_local contains the value in infinity; it must be multiplied by */
543       /* 2 X - X^2 - 2 X^3 + X^4 and added to the result.                 */
544       /* Here, even X^4*h_local does not exceed the result.               */
545       tmp = h + 4;
546       buff_local = h_local;
547       for (i = 0; i < m_local + n_local - 1; i++) {
548          mpc_add (*tmp, *tmp, *buff_local, MPC_RNDNN);
549          tmp += 3;
550          buff_local++;
551       }
552       tmp = h + 2;
553       buff_local = h_local;
554       for (i = 0; i < m_local + n_local - 1; i++) {
555          mpc_sub (*tmp, *tmp, *buff_local, MPC_RNDNN);
556          tmp += 3;
557          buff_local++;
558       }
559       /* multiply h_local by 2 in place */
560       buff_local = h_local;
561       for (i = 0; i < m_local + n_local - 1; i++) {
562          mpc_mul_2ui (*buff_local, *buff_local, 1, MPC_RNDNN);
563          buff_local++;
564       }
565       tmp = h + 1;
566       buff_local = h_local;
567       for (i = 0; i < m_local + n_local - 1; i++) {
568          mpc_add (*tmp, *tmp, *buff_local, MPC_RNDNN);
569          tmp += 3;
570          buff_local++;
571       }
572       tmp = h + 3;
573       buff_local = h_local;
574       for (i = 0; i < m_local + n_local - 1; i++) {
575          mpc_sub (*tmp, *tmp, *buff_local, MPC_RNDNN);
576          tmp += 3;
577          buff_local++;
578       }
579 
580       mpc_clear (tmpc);
581    }
582 }
583 
584 /**************************************************************************/
585 
mpcx_array_mul(mpc_t * h,mpc_t * f,mpc_t * g,const int m,const int n)586 static void mpcx_array_mul (mpc_t* h, mpc_t* f, mpc_t* g,
587                             const int m, const int n)
588    /* f is a polynomial of degree m-1, g a polynomial of degree n-1,      */
589    /* represented by an array of coefficients. h is replaced by f times g.*/
590    /* h is expected to be different from f and g and must contain         */
591    /* sufficiently much space to hold all the coefficients.               */
592 
593 {
594    const int min = (m < n ? m : n), max = (m < n ? n : m);
595 
596    if (min >= MPCX_FFT_THRESHOLD && min < MPCX_NOFFT_THRESHOLD)
597       if (min != max) {
598          /* Check whether splitting the larger polynomial into smaller pieces
599             is useful. We target an FFT of order the smallest power of 2
600             above 2*min. */
601          int length = 1, pieces;
602          while (length < min)
603             length *= 2;
604          length *= 2;
605          length = length + 1 - min;
606          pieces = (max + length - 1) / length;
607          length = (max + pieces - 1) / pieces;
608 
609          if (pieces > 1) {
610             int i, j;
611             mpc_t *minpol = (m < n ? f : g);
612             mpc_t *maxpol = (m < n ? g : f);
613 
614             int current = length;
615             mpcx_t buffer;
616             int bufsize = min + length - 1;
617             mpcx_init (buffer, bufsize, mpc_get_prec (h [0]));
618 
619             /* clear out the result */
620             for (i = 0; i < min + max - 1; i++)
621                mpc_set_ui (h [i], 0, MPC_RNDNN);
622 
623             /* multiply minpol by pieces of maxpol and add to result */
624             for (i = 0; i < pieces; i++) {
625                /* The last piece may be shorter than length. */
626                if (i == pieces - 1)
627                   current = max - (pieces - 1) * length;
628                mpcx_array_mul_fft (buffer->coeff, minpol, maxpol + i * length,
629                                  min, current);
630                for (j = 0; j < min + current - 1; j++)
631                   mpc_add (h [i * length + j], h [i * length + j],
632                            buffer->coeff [j], MPC_RNDNN);
633             }
634 
635             mpcx_clear (buffer);
636          }
637          else
638             mpcx_array_mul_fft (h, f, g, m, n);
639       }
640       else
641          mpcx_array_mul_fft (h, f, g, m, n);
642    else {
643       mpcx_t buffer;
644       int bufsize;
645       /* For Karatsuba, we would need m + n + 1 coefficients. */
646 /*
647       mpcx_init (buffer, m + n + 1, mpc_get_prec (h [0]->re));
648 */
649       /* For Toom-Cook, we may need up to
650          m + n + 3 floor (min (log_3 (m-1), log_3 (n-1)) - 1). */
651       bufsize = m + n +
652                 (min <= 82 ? 9 :
653                 (min <= 1162261468 ? 53 : 117));
654       mpcx_init (buffer, bufsize, mpc_get_prec (h [0]));
655       mpcx_array_mul_toomcook (h, f, g, m, n, 1, 1, buffer->coeff);
656       mpcx_clear (buffer);
657    }
658 }
659