1 /*
2 mpcx_mul
3
4 computes h = f * g
5
6 Copyright (C) 2009, 2011, 2012, 2013 Andreas Enge
7
8 This file is part of the MPFRCX Library.
9
10 The MPFRCX Library is free software; you can redistribute it and/or modify
11 it under the terms of the GNU Lesser General Public License as published by
12 the Free Software Foundation; either version 3 of the License, or (at your
13 option) any later version.
14
15 The MPFRCX Library is distributed in the hope that it will be useful, but
16 WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
17 or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public
18 License for more details.
19
20 You should have received a copy of the GNU Lesser General Public License
21 along with the MPFRCX library; see the file COPYING.LESSER. If not, write to
22 the Free Software Foundation, Inc., 59 Temple Place - Suite 330, Boston,
23 MA 02111-1307, USA.
24 */
25
26 #include "mpfrcx-impl.h"
27
28 static void mpcx_array_mul_karatsuba (mpc_t* h, mpc_t* f, mpc_t* g,
29 const int m, const int n, const int offm, const int offn, mpc_t* buff);
30 static void mpcx_array_mul_toomcook (mpc_t* h, mpc_t* f, mpc_t* g,
31 const int m, const int n, const int offm, const int offn, mpc_t* buff);
32 static void mpcx_array_mul (mpc_t* h, mpc_t* f, mpc_t* g,
33 const int m, const int n);
34
35 /**************************************************************************/
36
mpcx_mul(mpcx_ptr h,mpcx_srcptr f,mpcx_srcptr g)37 void mpcx_mul (mpcx_ptr h, mpcx_srcptr f, mpcx_srcptr g) {
38 int overlap;
39 mpcx_t h_local;
40 int f_monic, g_monic, i;
41
42 if (f->deg == -1 || g->deg == -1) {
43 h->deg = -1;
44 return;
45 }
46
47 f_monic = (mpc_cmp_si (f->coeff [f->deg], 1) == 0);
48 g_monic = (mpc_cmp_si (g->coeff [g->deg], 1) == 0);
49
50 if (f_monic && f->deg == 0) {
51 mpcx_set (h, g);
52 return;
53 }
54 if (g_monic && g->deg == 0) {
55 mpcx_set (h, f);
56 return;
57 }
58
59 overlap = (h == f) || (h == g);
60 if (overlap)
61 mpcx_init (h_local, f->deg + g->deg + 1, h->prec);
62 else
63 mpcx_mv (h_local, h);
64 h_local->deg = f->deg + g->deg;
65 if (h_local->size < h_local->deg + 1)
66 mpcx_realloc (h_local, h_local->deg + 1);
67
68 if (f_monic && g_monic) {
69 mpcx_array_mul (h_local->coeff, f->coeff, g->coeff, f->deg, g->deg);
70 /* watch out: the coefficient of X^{f->deg+g->deg-1} has not been set */
71 for (i = 0; i < f->deg - 1; i++)
72 mpc_add (h_local->coeff [i + g->deg], h_local->coeff [i + g->deg],
73 f->coeff [i], MPC_RNDNN);
74 mpc_set (h_local->coeff [f->deg + g->deg - 1], f->coeff [f->deg - 1],
75 MPC_RNDNN);
76 for (i = 0; i < g->deg; i++)
77 mpc_add (h_local->coeff [i + f->deg], h_local->coeff [i + f->deg],
78 g->coeff [i], MPC_RNDNN);
79 mpc_set_ui (h_local->coeff [h_local->deg], 1, MPC_RNDNN);
80 }
81 else if (f_monic) {
82 mpcx_array_mul (h_local->coeff, f->coeff, g->coeff, f->deg, g->deg+1);
83 for (i = 0; i < g->deg; i++)
84 mpc_add (h_local->coeff [i + f->deg], h_local->coeff [i + f->deg],
85 g->coeff [i], MPC_RNDNN);
86 mpc_set (h_local->coeff [f->deg + g->deg], g->coeff [g->deg], MPC_RNDNN);
87 }
88 else if (g_monic) {
89 mpcx_array_mul (h_local->coeff, f->coeff, g->coeff, f->deg+1, g->deg);
90 for (i = 0; i < f->deg; i++)
91 mpc_add (h_local->coeff [i + g->deg], h_local->coeff [i + g->deg],
92 f->coeff [i], MPC_RNDNN);
93 mpc_set (h_local->coeff [f->deg + g->deg], f->coeff [f->deg], MPC_RNDNN);
94 }
95 else
96 mpcx_array_mul (h_local->coeff, f->coeff, g->coeff, f->deg+1, g->deg+1);
97
98 if (overlap)
99 mpcx_clear (h);
100 mpcx_mv (h, h_local);
101 }
102
103 /**************************************************************************/
104 /* */
105 /* internal multiplication functions working directly on arrays of */
106 /* coefficients; together with the array, the number of coefficients is */
107 /* passed to the functions, and possibly an offset, indicating that the */
108 /* coefficient of degree i of the polynomial is in fact stored at the */
109 /* position i*offset. This facilitates the comb-like decomposition of */
110 /* polynomials during Karatsuba and Toom-Cook multiplication. */
111 /* */
112 /**************************************************************************/
113
mpcx_array_mul_karatsuba(mpc_t * h,mpc_t * f,mpc_t * g,const int m,const int n,const int offm,const int offn,mpc_t * buff)114 static void mpcx_array_mul_karatsuba (mpc_t* h, mpc_t* f, mpc_t* g,
115 const int m, const int n, const int offm, const int offn, mpc_t* buff) {
116 /* f is a polynomial of degree m-1, g a polynomial of degree n-1, */
117 /* represented by an array of coefficients. The coefficient of degree */
118 /* i of f is stored at the index i*offm, similarly for g. h is */
119 /* replaced by f times g, computed by Karatsuba multiplication, and */
120 /* using an offset of 1. In practice by recursion, the offsets will */
121 /* always be powers of 2. */
122 /* buff must point to a temporary space of m+n+1 coefficients, which */
123 /* means 2 coefficients more than the size of the result. */
124 /* h must be different from f and g and contain sufficiently much */
125 /* space to hold all the coefficients. */
126
127 int i;
128 mpc_t *f_local, *g_local, *h_local, *buff_local, *tmp;
129 int m_local, n_local;
130 if (m == 1)
131 for (i = 0; i < n; i++) {
132 mpc_mul (h [0], g [0], f [0], MPC_RNDNN);
133 h++;
134 g += offn;
135 }
136 else if (n == 1)
137 for (i = 0; i < m; i++) {
138 mpc_mul (h [0], f [0], g [0], MPC_RNDNN);
139 h++;
140 f += offm;
141 }
142 else {
143 /* recursion */
144 /* Write f = f_0 (X^2) + X f_1 (X^2), g = g_0 (X^2) + X g_1 (X^2). */
145 /* copy f_0 + f_1 and g_0 + g_1 into the buffer */
146 buff_local = buff;
147 tmp = f;
148 f_local = buff_local;
149 m_local = m/2;
150 for (i = 0; i < m_local; i++) {
151 mpc_set (*buff_local, *tmp, MPC_RNDNN);
152 tmp += offm;
153 mpc_add (*buff_local, *buff_local, *tmp, MPC_RNDNN);
154 tmp += offm;
155 buff_local++;
156 }
157 if (m % 2 != 0) {
158 m_local++;
159 mpc_set (*buff_local, *tmp, MPC_RNDNN);
160 buff_local++;
161 }
162 tmp = g;
163 g_local = buff_local;
164 n_local = n/2;
165 for (i = 0; i < n_local; i++) {
166 mpc_set (*buff_local, *tmp, MPC_RNDNN);
167 tmp += offn;
168 mpc_add (*buff_local, *buff_local, *tmp, MPC_RNDNN);
169 tmp += offn;
170 buff_local++;
171 }
172 if (n % 2 != 0) {
173 n_local++;
174 mpc_set (*buff_local, *tmp, MPC_RNDNN);
175 buff_local++;
176 }
177 /* multiply them together into the following piece of the buffer, */
178 /* using the result as buffer */
179 mpcx_array_mul_karatsuba (buff_local, f_local, g_local, m_local,
180 n_local, 1, 1, h);
181 /* buff_local contains ((f_0 + f_1)*(g_0 + g1)) (X^2); this must b */
182 /* copied into h, multiplied by X */
183 tmp = h + 1;
184 for (i = 0; i < m_local + n_local - 1; i++) {
185 mpc_set (*tmp, *buff_local, MPC_RNDNN);
186 tmp += 2;
187 buff_local++;
188 }
189
190 /* multiply f0 and g0 into the beginning of the buffer, using the */
191 /* remaining piece of buffer */
192 /* m_local and n_local remain as before. */
193 mpcx_array_mul_karatsuba (buff, f, g, m_local, n_local,
194 2*offm, 2*offn, buff + m_local + n_local - 1);
195 /* buff contains (f_0*g_0) (X^2); this must be put into h; and */
196 /* subtracted, multiplied by X */
197 tmp = h;
198 h_local = buff;
199 for (i = 0; i < m_local + n_local - 1; i++) {
200 mpc_set (*tmp, *h_local, MPC_RNDNN);
201 tmp++;
202 mpc_sub (*tmp, *tmp, *h_local, MPC_RNDNN);
203 tmp++;
204 h_local++;
205 }
206 /* set the remaining even coefficient to 0 */
207 if (m % 2 == 0 && n % 2 == 0)
208 mpc_set_ui (h [2 * (m_local + n_local - 1)], 0, MPC_RNDNN);
209
210 /* multiply f1 and g1 into the beginning of the buffer, using the */
211 /* remaining piece of buffer */
212 m_local = m/2;
213 n_local = n/2;
214 mpcx_array_mul_karatsuba (buff, f+offm, g+offn, m_local, n_local,
215 2*offm, 2*offn, buff + m_local + n_local - 1);
216 /* buff contains (f_1*g_1) (X^2); this must be added to h, */
217 /* multiplied by X^2; and subtracted, multiplied by X */
218 tmp = h + 1;
219 h_local = buff;
220 for (i = 0; i < m_local + n_local - 1; i++) {
221 mpc_sub (*tmp, *tmp, *h_local, MPC_RNDNN);
222 tmp++;
223 mpc_add (*tmp, *tmp, *h_local, MPC_RNDNN);
224 tmp++;
225 h_local++;
226 }
227 }
228 }
229
230 /**************************************************************************/
231
mpcx_array_mul_toomcook(mpc_t * h,mpc_t * f,mpc_t * g,const int m,const int n,const int offm,const int offn,mpc_t * buff)232 static void mpcx_array_mul_toomcook (mpc_t* h, mpc_t* f, mpc_t* g,
233 const int m, const int n, const int offm, const int offn, mpc_t* buff) {
234 /* f is a polynomial of degree m-1, g a polynomial of degree n-1, */
235 /* represented by an array of coefficients. The coefficient of degree */
236 /* i of f is stored at the index i*offm, similarly for g. h is */
237 /* replaced by f times g, computed by Toom-Cook 3-way multiplication, */
238 /* and using an offset of 1. In practice by recursion, the offsets */
239 /* will always be powers of 2. */
240 /* buff must point to a sufficiently large temporary space; */
241 /* m + n + 117 coefficients are enough for m, n < 2^64. */
242 /* h must be different from f and g and contain sufficiently much */
243 /* space to hold all the coefficients. */
244
245 int i;
246 mpc_t *f_local, *g_local, *h_local, *buff_local, *tmp, tmpc;
247 int m_local, n_local, min1, min2, min3;
248
249 if (m == 1)
250 for (i = 0; i < n; i++) {
251 mpc_mul (h [0], g [0], f [0], MPC_RNDNN);
252 h++;
253 g += offn;
254 }
255 else if (n == 1)
256 for (i = 0; i < m; i++) {
257 mpc_mul (h [0], f [0], g [0], MPC_RNDNN);
258 h++;
259 f += offm;
260 }
261 else if (m == 2 || n == 2 || m == 4 || n == 4)
262 mpcx_array_mul_karatsuba (h, f, g, m, n, offm, offn, buff);
263 else {
264 /* recursion */
265 mpc_init2 (tmpc, mpc_get_prec (h [0]));
266
267 /* Write f = f_0 (X^3) + f_1 (X^3) X + f_2 (X^3) X^2, and likewise g */
268 /* Copy f_0 - f_1 + f_2 and g_0 - g_1 + g_2 into the buffer */
269 buff_local = buff;
270 tmp = f;
271 f_local = buff_local;
272 m_local = m/3;
273 for (i = 0; i < m_local; i++) {
274 mpc_set (*buff_local, *tmp, MPC_RNDNN);
275 tmp += offm;
276 mpc_sub (*buff_local, *buff_local, *tmp, MPC_RNDNN);
277 tmp += offm;
278 mpc_add (*buff_local, *buff_local, *tmp, MPC_RNDNN);
279 tmp += offm;
280 buff_local++;
281 }
282 if (m % 3 == 1) {
283 m_local++;
284 mpc_set (*buff_local, *tmp, MPC_RNDNN);
285 buff_local++;
286 }
287 else if (m % 3 == 2) {
288 m_local++;
289 mpc_set (*buff_local, *tmp, MPC_RNDNN);
290 tmp+= offm;
291 mpc_sub (*buff_local, *buff_local, *tmp, MPC_RNDNN);
292 buff_local++;
293 }
294 tmp = g;
295 g_local = buff_local;
296 n_local = n/3;
297 for (i = 0; i < n_local; i++) {
298 mpc_set (*buff_local, *tmp, MPC_RNDNN);
299 tmp += offn;
300 mpc_sub (*buff_local, *buff_local, *tmp, MPC_RNDNN);
301 tmp += offn;
302 mpc_add (*buff_local, *buff_local, *tmp, MPC_RNDNN);
303 tmp += offn;
304 buff_local++;
305 }
306 if (n % 3 == 1) {
307 n_local++;
308 mpc_set (*buff_local, *tmp, MPC_RNDNN);
309 buff_local++;
310 }
311 else if (n % 3 == 2) {
312 n_local++;
313 mpc_set (*buff_local, *tmp, MPC_RNDNN);
314 tmp+= offn;
315 mpc_sub (*buff_local, *buff_local, *tmp, MPC_RNDNN);
316 buff_local++;
317 }
318 /* multiply them together into the following piece of the buffer, */
319 /* using the remaining piece of buffer */
320 h_local = buff_local;
321 buff_local += m_local + n_local - 1;
322 mpcx_array_mul_toomcook (h_local, f_local, g_local, m_local,
323 n_local, 1, 1, buff_local);
324 /* compute mini, the minimal value such that X*(X^3)^i still occurs */
325 /* in the result */
326 min1 = ( (m+2) / 3 + (n+2) / 3 - 2 < (m + n - 3) / 3
327 ? (m+2) / 3 + (n+2) / 3 - 2 : (m + n - 3) / 3);
328 min2 = ( (m+2) / 3 + (n+2) / 3 - 2 < (m + n - 4) / 3
329 ? (m+2) / 3 + (n+2) / 3 - 2 : (m + n - 4) / 3);
330 min3 = ( (m+2) / 3 + (n+2) / 3 - 2 < (m + n - 5) / 3
331 ? (m+2) / 3 + (n+2) / 3 - 2 : (m + n - 5) / 3);
332 /* h_local contains the value in -1; it must be multiplied by */
333 /* -1/3 X + 1/2 X^2 - 1/6 X^3 and put into the result. */
334 mpc_set_ui (h [0], 0, MPC_RNDNN);
335 mpc_set_ui (h [m+n-2], 0, MPC_RNDNN);
336 tmp = h + 2;
337 buff_local = h_local;
338 for (i = 0; i <= min2; i++) {
339 mpc_div_2ui (*tmp, *buff_local, 1, MPC_RNDNN);
340 tmp += 3;
341 buff_local++;
342 }
343 tmp = h + 1;
344 buff_local = h_local;
345 for (i = 0; i <= min1; i++) {
346 mpc_div_ui (*tmp, *buff_local, 3, MPC_RNDNN);
347 mpc_neg (*tmp, *tmp, MPC_RNDNN);
348 tmp += 3;
349 buff_local++;
350 }
351 tmp = h + 3;
352 for (i = 0; i <= min3; i++) {
353 mpc_div_2ui (*tmp, *(tmp-2), 1, MPC_RNDNN);
354 tmp += 3;
355 }
356
357 /* copy f_0 + f_1 + f_2 and g_0 + g_1 + g_2 into the buffer, by */
358 /* adding 2*f_1 and 2*g_1 to what is already there */
359 buff_local = f_local;
360 tmp = f + offm;
361 for (i = 0; i < (m+1) / 3; i++) {
362 mpc_mul_2ui (tmpc, *tmp, 1, MPC_RNDNN);
363 mpc_add (*buff_local, *buff_local, tmpc, MPC_RNDNN);
364 tmp += 3*offm;
365 buff_local++;
366 }
367 buff_local = g_local;
368 tmp = g + offm;
369 for (i = 0; i < (n+1) / 3; i++) {
370 mpc_mul_2ui (tmpc, *tmp, 1, MPC_RNDNN);
371 mpc_add (*buff_local, *buff_local, tmpc, MPC_RNDNN);
372 tmp += 3*offm;
373 buff_local++;
374 }
375 /* multiply them together into the following piece of the buffer, */
376 /* using the remaining piece of buffer */
377 h_local = buff + m_local + n_local;
378 buff_local = h_local + m_local + n_local - 1;
379 mpcx_array_mul_toomcook (h_local, f_local, g_local, m_local,
380 n_local, 1, 1, buff_local);
381 /* h_local contains the value in 1; it must be multiplied by */
382 /* X + 1/2 X^2 - 1/2 X^3 and added to the result. */
383 tmp = h + 1;
384 buff_local = h_local;
385 for (i = 0; i <= min1; i++) {
386 mpc_add (*tmp, *tmp, *buff_local, MPC_RNDNN);
387 tmp += 3;
388 buff_local++;
389 }
390 /* divide h_local by 2 in place */
391 buff_local = h_local;
392 for (i = 0; i < m_local + n_local - 1; i++) {
393 mpc_div_2ui (*buff_local, *buff_local, 1, MPC_RNDNN);
394 buff_local++;
395 }
396 tmp = h + 2;
397 buff_local = h_local;
398 for (i = 0; i <= min2; i++) {
399 mpc_add (*tmp, *tmp, *buff_local, MPC_RNDNN);
400 tmp += 3;
401 buff_local++;
402 }
403 tmp = h + 3;
404 buff_local = h_local;
405 for (i = 0; i <= min3; i++) {
406 mpc_sub (*tmp, *tmp, *buff_local, MPC_RNDNN);
407 tmp += 3;
408 buff_local++;
409 }
410
411 /* copy f_0 + 2*f_1 + 4*f_2 and g_0 + 2*g_1 + 4*g_2 into the buffer */
412 buff_local = buff;
413 tmp = f;
414 for (i = 0; i < m/3; i++) {
415 mpc_set (*buff_local, *tmp, MPC_RNDNN);
416 tmp += offm;
417 mpc_mul_2ui (tmpc, *tmp, 1, MPC_RNDNN);
418 mpc_add (*buff_local, *buff_local, tmpc, MPC_RNDNN);
419 tmp += offm;
420 mpc_mul_2ui (tmpc, *tmp, 2, MPC_RNDNN);
421 mpc_add (*buff_local, *buff_local, tmpc, MPC_RNDNN);
422 tmp += offm;
423 buff_local++;
424 }
425 if (m % 3 == 1) {
426 mpc_set (*buff_local, *tmp, MPC_RNDNN);
427 buff_local++;
428 }
429 else if (m % 3 == 2) {
430 mpc_set (*buff_local, *tmp, MPC_RNDNN);
431 tmp+= offm;
432 mpc_mul_2ui (tmpc, *tmp, 1, MPC_RNDNN);
433 mpc_add (*buff_local, *buff_local, tmpc, MPC_RNDNN);
434 buff_local++;
435 }
436 tmp = g;
437 for (i = 0; i < n/3; i++) {
438 mpc_set (*buff_local, *tmp, MPC_RNDNN);
439 tmp += offn;
440 mpc_mul_2ui (tmpc, *tmp, 1, MPC_RNDNN);
441 mpc_add (*buff_local, *buff_local, tmpc, MPC_RNDNN);
442 tmp += offn;
443 mpc_mul_2ui (tmpc, *tmp, 2, MPC_RNDNN);
444 mpc_add (*buff_local, *buff_local, tmpc, MPC_RNDNN);
445 tmp += offn;
446 buff_local++;
447 }
448 if (n % 3 == 1) {
449 mpc_set (*buff_local, *tmp, MPC_RNDNN);
450 buff_local++;
451 }
452 else if (n % 3 == 2) {
453 mpc_set (*buff_local, *tmp, MPC_RNDNN);
454 tmp+= offn;
455 mpc_mul_2ui (tmpc, *tmp, 1, MPC_RNDNN);
456 mpc_add (*buff_local, *buff_local, tmpc, MPC_RNDNN);
457 buff_local++;
458 }
459 /* multiply them together into the following piece of the buffer, */
460 /* using the remaining piece of buffer */
461 h_local = buff_local;
462 buff_local += m_local + n_local - 1;
463 mpcx_array_mul_toomcook (h_local, f_local, g_local, m_local,
464 n_local, 1, 1, buff_local);
465 /* h_local contains the value in 2; it must be multiplied by */
466 /* -1/6 X + 1/6 X^3 and added to the result. */
467 /* divide h_local by 6 in place */
468 buff_local = h_local;
469 for (i = 0; i < m_local + n_local - 1; i++) {
470 mpc_div_2ui (*buff_local, *buff_local, 1, MPC_RNDNN);
471 mpc_div_ui (*buff_local, *buff_local, 3, MPC_RNDNN);
472 buff_local++;
473 }
474 tmp = h + 1;
475 buff_local = h_local;
476 for (i = 0; i <= min1; i++) {
477 mpc_sub (*tmp, *tmp, *buff_local, MPC_RNDNN);
478 tmp += 3;
479 buff_local++;
480 }
481 tmp = h + 3;
482 buff_local = h_local;
483 for (i = 0; i <= min3; i++) {
484 mpc_add (*tmp, *tmp, *buff_local, MPC_RNDNN);
485 tmp += 3;
486 buff_local++;
487 }
488
489 /* multiply f0 and g0 into the start of the buffer, using the */
490 /* remaining buffer piece */
491 h_local = buff;
492 buff_local = buff + m_local + n_local - 1;
493 mpcx_array_mul_toomcook (h_local, f, g, m_local, n_local, 3*offm,
494 3*offn, buff_local);
495 /* h_local contains the value in 0; it must be multiplied by */
496 /* 1 - 1/2 X - X^2 + 1/2 X^3 and added to the result. */
497 tmp = h;
498 buff_local = h_local;
499 for (i = 0; i < m_local + n_local - 1; i++) {
500 mpc_add (*tmp, *tmp, *buff_local, MPC_RNDNN);
501 tmp += 3;
502 buff_local++;
503 }
504 tmp = h + 2;
505 buff_local = h_local;
506 for (i = 0; i <= min2; i++) {
507 mpc_sub (*tmp, *tmp, *buff_local, MPC_RNDNN);
508 tmp += 3;
509 buff_local++;
510 }
511 /* divide h_local by 2 in place */
512 buff_local = h_local;
513 for (i = 0; i < m_local + n_local - 1; i++) {
514 mpc_div_2ui (*buff_local, *buff_local, 1, MPC_RNDNN);
515 buff_local++;
516 }
517 tmp = h + 1;
518 buff_local = h_local;
519 for (i = 0; i <= min1; i++) {
520 mpc_sub (*tmp, *tmp, *buff_local, MPC_RNDNN);
521 tmp += 3;
522 buff_local++;
523 }
524 tmp = h + 3;
525 buff_local = h_local;
526 for (i = 0; i <= min3; i++) {
527 mpc_add (*tmp, *tmp, *buff_local, MPC_RNDNN);
528 tmp += 3;
529 buff_local++;
530 }
531
532 /* multiply f2 and g2 into the start of the buffer, using the */
533 /* remaining buffer piece */
534 /* Instead of ceil (m/3) and ceil (n/3), these now have floor (m/3) */
535 /* and floor (n/3) coefficients. */
536 m_local = m/3;
537 n_local = n/3;
538 h_local = buff;
539 buff_local = buff + m_local + n_local - 1;
540 mpcx_array_mul_toomcook (h_local, f+2*offm, g+2*offn, m_local,
541 n_local, 3*offm, 3*offn, buff_local);
542 /* h_local contains the value in infinity; it must be multiplied by */
543 /* 2 X - X^2 - 2 X^3 + X^4 and added to the result. */
544 /* Here, even X^4*h_local does not exceed the result. */
545 tmp = h + 4;
546 buff_local = h_local;
547 for (i = 0; i < m_local + n_local - 1; i++) {
548 mpc_add (*tmp, *tmp, *buff_local, MPC_RNDNN);
549 tmp += 3;
550 buff_local++;
551 }
552 tmp = h + 2;
553 buff_local = h_local;
554 for (i = 0; i < m_local + n_local - 1; i++) {
555 mpc_sub (*tmp, *tmp, *buff_local, MPC_RNDNN);
556 tmp += 3;
557 buff_local++;
558 }
559 /* multiply h_local by 2 in place */
560 buff_local = h_local;
561 for (i = 0; i < m_local + n_local - 1; i++) {
562 mpc_mul_2ui (*buff_local, *buff_local, 1, MPC_RNDNN);
563 buff_local++;
564 }
565 tmp = h + 1;
566 buff_local = h_local;
567 for (i = 0; i < m_local + n_local - 1; i++) {
568 mpc_add (*tmp, *tmp, *buff_local, MPC_RNDNN);
569 tmp += 3;
570 buff_local++;
571 }
572 tmp = h + 3;
573 buff_local = h_local;
574 for (i = 0; i < m_local + n_local - 1; i++) {
575 mpc_sub (*tmp, *tmp, *buff_local, MPC_RNDNN);
576 tmp += 3;
577 buff_local++;
578 }
579
580 mpc_clear (tmpc);
581 }
582 }
583
584 /**************************************************************************/
585
mpcx_array_mul(mpc_t * h,mpc_t * f,mpc_t * g,const int m,const int n)586 static void mpcx_array_mul (mpc_t* h, mpc_t* f, mpc_t* g,
587 const int m, const int n)
588 /* f is a polynomial of degree m-1, g a polynomial of degree n-1, */
589 /* represented by an array of coefficients. h is replaced by f times g.*/
590 /* h is expected to be different from f and g and must contain */
591 /* sufficiently much space to hold all the coefficients. */
592
593 {
594 const int min = (m < n ? m : n), max = (m < n ? n : m);
595
596 if (min >= MPCX_FFT_THRESHOLD && min < MPCX_NOFFT_THRESHOLD)
597 if (min != max) {
598 /* Check whether splitting the larger polynomial into smaller pieces
599 is useful. We target an FFT of order the smallest power of 2
600 above 2*min. */
601 int length = 1, pieces;
602 while (length < min)
603 length *= 2;
604 length *= 2;
605 length = length + 1 - min;
606 pieces = (max + length - 1) / length;
607 length = (max + pieces - 1) / pieces;
608
609 if (pieces > 1) {
610 int i, j;
611 mpc_t *minpol = (m < n ? f : g);
612 mpc_t *maxpol = (m < n ? g : f);
613
614 int current = length;
615 mpcx_t buffer;
616 int bufsize = min + length - 1;
617 mpcx_init (buffer, bufsize, mpc_get_prec (h [0]));
618
619 /* clear out the result */
620 for (i = 0; i < min + max - 1; i++)
621 mpc_set_ui (h [i], 0, MPC_RNDNN);
622
623 /* multiply minpol by pieces of maxpol and add to result */
624 for (i = 0; i < pieces; i++) {
625 /* The last piece may be shorter than length. */
626 if (i == pieces - 1)
627 current = max - (pieces - 1) * length;
628 mpcx_array_mul_fft (buffer->coeff, minpol, maxpol + i * length,
629 min, current);
630 for (j = 0; j < min + current - 1; j++)
631 mpc_add (h [i * length + j], h [i * length + j],
632 buffer->coeff [j], MPC_RNDNN);
633 }
634
635 mpcx_clear (buffer);
636 }
637 else
638 mpcx_array_mul_fft (h, f, g, m, n);
639 }
640 else
641 mpcx_array_mul_fft (h, f, g, m, n);
642 else {
643 mpcx_t buffer;
644 int bufsize;
645 /* For Karatsuba, we would need m + n + 1 coefficients. */
646 /*
647 mpcx_init (buffer, m + n + 1, mpc_get_prec (h [0]->re));
648 */
649 /* For Toom-Cook, we may need up to
650 m + n + 3 floor (min (log_3 (m-1), log_3 (n-1)) - 1). */
651 bufsize = m + n +
652 (min <= 82 ? 9 :
653 (min <= 1162261468 ? 53 : 117));
654 mpcx_init (buffer, bufsize, mpc_get_prec (h [0]));
655 mpcx_array_mul_toomcook (h, f, g, m, n, 1, 1, buffer->coeff);
656 mpcx_clear (buffer);
657 }
658 }
659