1 //==============================================================================================
2 //
3 // This file is part of LiDIA --- a library for computational number theory
4 //
5 // Copyright (c) 1994--2001 the LiDIA Group. All rights reserved.
6 //
7 // See http://www.informatik.tu-darmstadt.de/TI/LiDIA/
8 //
9 //----------------------------------------------------------------------------------------------
10 //
11 // $Id$
12 //
13 // Author : Victor Shoup (VS), Thomas Pfahler (TPf)
14 // Changes : See CVS log
15 //
16 //==============================================================================================
17
18
19 #ifdef HAVE_CONFIG_H
20 # include "config.h"
21 #endif
22 #include "LiDIA/Fp_polynomial.h"
23 #include "LiDIA/finite_fields/Fp_polynomial_fft.h"
24 #include "LiDIA/crt.h"
25
26
27
28 #ifdef LIDIA_NAMESPACE
29 namespace LiDIA {
30 #endif
31
32
33
34 //***************************************************************
35 // This File contains the implementation of the functions
36 // - fft_mul, fft_sqr
37 // - fft_rem, fft_div, fft_div_rem (and copy_reverse)
38 // - newton_inv (and class poly_mod_rep)
39 // - build_from_roots (and two auxiliary functions)
40 //***************************************************************
41
42
43
44 //***************************************************************
45 // fft_mul, fft_sqr
46 //***************************************************************
47
fft_mul(Fp_polynomial & x,const Fp_polynomial & a,const Fp_polynomial & b)48 void fft_mul(Fp_polynomial& x, const Fp_polynomial& a, const Fp_polynomial& b)
49 {
50 debug_handler("fftmul.c", "fft_mul(Fp_polynomial&, Fp_polynomial&, Fp_polynomial&)");
51
52 lidia_size_t deg_a = a.degree();
53 lidia_size_t deg_b = b.degree();
54
55 if (deg_a < 0 || deg_b < 0) {
56 //zero polynomial has degree -1
57 x.set_modulus(a); //assigns zero
58 return;
59 }
60
61 lidia_size_t d = deg_a + deg_b;
62 lidia_size_t k = next_power_of_two(d+1);
63
64 fft_data F(a.modulus(), k);
65
66 lidia_size_t num_primes = F.number_of_primes();
67 if (num_primes == 1) {
68 fft_rep Ra(F), Rb(F);
69 Ra.to_fft_rep(a);
70 Rb.to_fft_rep(b);
71 multiply(Ra, Ra, Rb);
72 Ra.from_fft_rep(x, 0, d);
73 }
74 else {
75 modular_fft_rep R1(F), R2(F); // more space efficient version
76 lidia_size_t index;
77 for (index = 0; index < num_primes; index++) {
78 R1.to_modular_fft_rep(a, index);
79 R2.to_modular_fft_rep(b, index);
80 multiply(R1, R1, R2, index);
81 R1.from_modular_fft_rep(0, d, index);
82 }
83 R1.get_result(x, 0, d);
84 }
85 }
86
87
88
fft_sqr(Fp_polynomial & x,const Fp_polynomial & a)89 void fft_sqr(Fp_polynomial& x, const Fp_polynomial& a)
90 {
91 debug_handler("fftmul.c", "fft_sqr(Fp_polynomial&, Fp_polynomial&)");
92
93 lidia_size_t deg_a = a.degree();
94 if (deg_a < 0) {
95 x.set_modulus(a); //assigns zero
96 return;
97 }
98
99 lidia_size_t d = 2*deg_a;
100 lidia_size_t k = next_power_of_two(d+1);
101
102 fft_data F(a.modulus(), k);
103
104 lidia_size_t num_primes = F.number_of_primes();
105 if (num_primes == 1) {
106 fft_rep Ra(F);
107 Ra.to_fft_rep(a);
108 multiply(Ra, Ra, Ra);
109 Ra.from_fft_rep(x, 0, d);
110 }
111 else {
112 modular_fft_rep R1(F); // more space efficient version
113 lidia_size_t index;
114 for (index = 0; index < num_primes; index++) {
115 R1.to_modular_fft_rep(a, index);
116 multiply(R1, R1, R1, index);
117 R1.from_modular_fft_rep(0, d, index);
118 }
119 R1.get_result(x, 0, d);
120 }
121 }
122
123
124
125 //***************************************************************
126 // fft_rem, -div, -div_rem
127 //***************************************************************
128
fft_rem(Fp_polynomial & r,const Fp_polynomial & a,const Fp_polynomial & b)129 void fft_rem(Fp_polynomial& r, const Fp_polynomial& a, const Fp_polynomial& b)
130 {
131 debug_handler("fftrem.c", "fft_rem (Fp_polynomial&, Fp_polynomial&, Fp_polynomial&)");
132
133 lidia_size_t deg_b = b.degree(), deg_a = a.degree();
134 if (deg_a < deg_b) {
135 r.assign(a);
136 return;
137 }
138
139 lidia_size_t m = deg_a - deg_b + 1;
140 Fp_polynomial P1, P2, P3;
141 copy_reverse(P3, b, 0, deg_b);
142 invert(P2, P3, m);
143 copy_reverse(P1, P2, 0, m-1);
144
145 lidia_size_t k = next_power_of_two(2*m-1);
146 lidia_size_t l = next_power_of_two(deg_b);
147 lidia_size_t index;
148 lidia_size_t mx = comparator< lidia_size_t >::max(k, l);
149
150 fft_data F(a.modulus(), mx);
151
152 lidia_size_t num_primes = F.number_of_primes();
153 if (num_primes == 1) {
154 fft_rep Ra(F), Rb(F);
155 Ra.set_size(k);
156 Rb.set_size(k);
157 Ra.to_fft_rep(P1);
158 Rb.to_fft_rep(a, deg_b, deg_a);
159 multiply(Ra, Ra, Rb);
160 Ra.from_fft_rep(P3, deg_a-deg_b, 2*(deg_a-deg_b));
161
162 Ra.set_size(l);
163 Rb.set_size(l);
164 Ra.to_fft_rep(b, 0, b.degree());
165 Rb.to_fft_rep(P3);
166 multiply(Ra, Ra, Rb);
167 Ra.from_fft_rep(P3, 0, deg_b-1);
168 }
169 else {
170 modular_fft_rep R1(F); // more space efficient version
171 modular_fft_rep R2(F);
172
173 R1.set_size(k);
174 R2.set_size(k);
175 for (index = 0; index < num_primes; index++) {
176 R1.to_modular_fft_rep(P1, index);
177 R2.to_modular_fft_rep(a, deg_b, deg_a, index);
178 multiply(R1, R1, R2, index);
179 R1.from_modular_fft_rep(deg_a-deg_b, 2*(deg_a-deg_b), index);
180 }
181 R1.get_result(P3, deg_a-deg_b, 2*(deg_a-deg_b));
182
183 R1.set_size(l);
184 R2.set_size(l);
185 for (index = 0; index < num_primes; index++) {
186 R1.to_modular_fft_rep(b, 0, b.degree(), index);
187 R2.to_modular_fft_rep(P3, index);
188 multiply(R1, R1, R2, index);
189 R1.from_modular_fft_rep(0, deg_b-1, index);
190 }
191 R1.get_result(P3, 0, deg_b-1);
192 }
193
194 lidia_size_t L = 1 << l;
195 cyclic_reduce(P2, a, L);
196 trunc(r, P2, deg_b);
197 subtract(r, r, P3);
198 }
199
200
201
fft_div(Fp_polynomial & q,const Fp_polynomial & a,const Fp_polynomial & b)202 void fft_div(Fp_polynomial& q, const Fp_polynomial& a, const Fp_polynomial& b)
203 {
204 debug_handler("fftrem.c", "fft_div (Fp_polynomial&, Fp_polynomial&, Fp_polynomial&)");
205
206 lidia_size_t deg_b = b.degree(), deg_a = a.degree();
207 if (deg_a < deg_b) {
208 q.set_modulus(b); //assigns zero
209 return;
210 }
211
212 lidia_size_t m = deg_a - deg_b + 1;
213 Fp_polynomial P1, P2, P3;
214 copy_reverse(P3, b, 0, deg_b);
215 invert(P2, P3, m);
216 copy_reverse(P1, P2, 0, m-1);
217
218 lidia_size_t k = next_power_of_two(2*m-1);
219
220 fft_data F(a.modulus(), k);
221
222 lidia_size_t num_primes = F.number_of_primes();
223 if (num_primes == 1) {
224 fft_rep Ra(F), Rb(F);
225 Ra.set_size(k);
226 Rb.set_size(k);
227 Ra.to_fft_rep(P1);
228 Rb.to_fft_rep(a, deg_b, deg_a);
229 multiply(Ra, Ra, Rb);
230 Ra.from_fft_rep(q, deg_a-deg_b, 2*(deg_a-deg_b));
231 }
232 else {
233 modular_fft_rep R1(F); // more space efficient version
234 modular_fft_rep R2(F);
235
236 lidia_size_t index;
237 for (index = 0; index < num_primes; index++) {
238 R1.to_modular_fft_rep(P1, index);
239 R2.to_modular_fft_rep(a, deg_b, deg_a, index);
240
241 multiply(R1, R1, R2, index);
242 R1.from_modular_fft_rep(deg_a-deg_b, 2*(deg_a-deg_b), index);
243 }
244 R1.get_result(q, deg_a-deg_b, 2*(deg_a-deg_b));
245 }
246 }
247
248
249
fft_div_rem(Fp_polynomial & q,Fp_polynomial & r,const Fp_polynomial & a,const Fp_polynomial & b)250 void fft_div_rem(Fp_polynomial& q, Fp_polynomial& r, const Fp_polynomial& a, const Fp_polynomial& b)
251 {
252 debug_handler("fftrem.c", "fft_div_rem (Fp_polynomial&, Fp_polynomial&, Fp_polynomial&, Fp_polynomial&)");
253
254 lidia_size_t deg_b = b.degree(), deg_a = a.degree();
255 if (deg_a < deg_b) {
256 q.set_modulus(b); //assigns zero
257 r.assign(a);
258 return;
259 }
260
261 Fp_polynomial P1, P2, P3;
262 copy_reverse(P3, b, 0, deg_b);
263 invert(P2, P3, deg_a-deg_b+1);
264 copy_reverse(P1, P2, 0, deg_a-deg_b);
265
266 lidia_size_t k = next_power_of_two(2*(deg_a-deg_b)+1);
267 lidia_size_t l = next_power_of_two(deg_b);
268 lidia_size_t index;
269 lidia_size_t mx = comparator< lidia_size_t >::max(k, l);
270
271 fft_data F(a.modulus(), mx);
272
273 lidia_size_t num_primes = F.number_of_primes();
274 if (num_primes == 1) {
275 fft_rep Ra(F), Rb(F);
276 Ra.set_size(k);
277 Rb.set_size(k);
278 Ra.to_fft_rep(P1);
279 Rb.to_fft_rep(a, deg_b, deg_a);
280 multiply(Ra, Ra, Rb);
281 Ra.from_fft_rep(P3, deg_a-deg_b, 2*(deg_a-deg_b));
282
283 Ra.set_size(l);
284 Rb.set_size(l);
285 Ra.to_fft_rep(b, 0, b.degree());
286 Rb.to_fft_rep(P3);
287 multiply(Ra, Ra, Rb);
288 Ra.from_fft_rep(P1, 0, deg_b-1);
289 }
290 else {
291 modular_fft_rep R1(F); // more space efficient version
292 modular_fft_rep R2(F);
293
294 R1.set_size(k);
295 R2.set_size(k);
296 for (index = 0; index < num_primes; index++) {
297 R1.to_modular_fft_rep(P1, index);
298 R2.to_modular_fft_rep(a, deg_b, deg_a, index);
299 multiply(R1, R1, R2, index);
300 R1.from_modular_fft_rep(deg_a-deg_b, 2*(deg_a-deg_b), index);
301 }
302 R1.get_result(P3, deg_a-deg_b, 2*(deg_a-deg_b));
303
304 R1.set_size(l);
305 R2.set_size(l);
306 for (index = 0; index < num_primes; index++) {
307 R1.to_modular_fft_rep(b, index);
308 R2.to_modular_fft_rep(P3, index);
309 multiply(R1, R1, R2, index);
310 R1.from_modular_fft_rep(0, deg_b-1, index);
311 }
312 R1.get_result(P1, 0, deg_b-1);
313 }
314
315 lidia_size_t L = 1 << l;
316 cyclic_reduce(P2, a, L);
317 trunc(r, P2, deg_b);
318 subtract(r, r, P1);
319
320 q.assign(P3);
321 }
322
323
324
325 // x[0..hi-lo+1] = reverse(a[lo..hi]), with zero fill
326 // input may not alias output
copy_reverse(Fp_polynomial & x,const Fp_polynomial & a,lidia_size_t lo,lidia_size_t hi)327 void copy_reverse(Fp_polynomial& x, const Fp_polynomial& a, lidia_size_t lo, lidia_size_t hi)
328 {
329 debug_handler("fftrem.c", "copy_reverse(Fp_polynomial&, Fp_polynomial&, lidia_size_t, lidia_size_t) ");
330 lidia_size_t i, j, n, m;
331
332 n = hi-lo+1;
333 m = a.degree()+1; // = a.c_length
334
335 x.set_modulus(a);
336 x.set_degree(n-1);
337
338 const bigint* ap = a.coeff;
339 bigint* xp = x.coeff;
340
341 for (i = 0; i < n; i++) {
342 j = hi-i;
343 if (j< 0 || j >= m)
344 xp[i].assign_zero();
345 else
346 xp[i] = ap[j];
347 }
348
349 x.remove_leading_zeros();
350 }
351
352
353
354 //***************************************************************
355 // newton_inv, class poly_mod_rep
356 //***************************************************************
357
358
359 class poly_mod_rep
360 {
361
362 // This data structure holds unconvoluted modular representations
363 // of polynomials
364 // used only in function newton_inv
365
366 lidia_size_t num_primes;
367 lidia_size_t size;
368 fft_prime_t **tbl;
369 fft_data fd;
370
371 poly_mod_rep();
372 poly_mod_rep(const poly_mod_rep&); // disable
373
374 public:
375 poly_mod_rep(const Fp_polynomial&, lidia_size_t, lidia_size_t,
376 const fft_data&);
377
378 ~poly_mod_rep();
379
380 friend class modular_fft_rep;
381 };
382
383
poly_mod_rep(const Fp_polynomial & a,lidia_size_t lo,lidia_size_t hi,const fft_data & F)384 poly_mod_rep::poly_mod_rep(const Fp_polynomial& a, lidia_size_t lo,
385 lidia_size_t hi, const fft_data &F) :
386 fd(F)
387 {
388 debug_handler("poly_mod_rep", "poly_mod_rep (Fp_polynomial&, lidia_size_t, lidia_size_t, fft_data&)");
389
390 if (lo < 0 || hi < 0 || F.node == 0) {
391 lidia_error_handler("poly_mod_rep", "poly_mod_rep(...)::bad args");
392 return;
393 }
394
395 num_primes = F.number_of_primes();
396
397 hi = comparator< lidia_size_t >::min(hi, a.degree());
398 size = comparator< lidia_size_t >::max(hi-lo+1, 0);
399 lidia_size_t i;
400
401 tbl = new fft_prime_t*[num_primes];
402 memory_handler(tbl, "poly_mod_rep", "poly_mod_rep(...)::"
403 "Error in memory allocation (tbl)");
404 for (i = 0; i < num_primes; i++) {
405 tbl[i] = new fft_prime_t[size];
406 memory_handler(tbl[i], "poly_mod_rep", "poly_mod_rep(...)::"
407 "Error in memory allocation (tbl[i])");
408 }
409
410 #if 0
411 crt help(*F.crt_node->ct);
412 for (i = 0; i < num_primes; i++)
413 help.reduce(tbl[i], &a.coeff[lo], size, i);
414 #else
415 for (i = 0; i < num_primes; i++) {
416 fft_prime_t q = fd.crt_node->ct->get_prime(i);
417 fft_prime_t *yp = tbl[i];
418 const bigint *ap = &a.coeff[lo];
419
420 for (lidia_size_t j = 0; j < size; j++)
421 remainder(yp[j], ap[j], q);
422 }
423 #endif
424 }
425
426
427
~poly_mod_rep()428 poly_mod_rep::~poly_mod_rep()
429 {
430 debug_handler("poly_mod_rep", "~poly_mod_rep ()");
431
432 if (tbl) //should never be zero
433 for (lidia_size_t i = 0; i < num_primes; i++)
434 delete[] tbl[i];
435 delete[] tbl;
436 }
437
438
439
to_modular_fft_rep(const poly_mod_rep & a,lidia_size_t lo,lidia_size_t hi,lidia_size_t index)440 void modular_fft_rep::to_modular_fft_rep(const poly_mod_rep &a,
441 lidia_size_t lo, lidia_size_t hi, lidia_size_t index)
442 // converts coefficients lo..hi to a 2^k-point fft_rep.
443 // must have hi-lo+1 < 2^k
444 {
445 debug_handler("modular_fft_rep", "to_modular_fft_rep(poly_mod_rep&, lidia_size_t, lidia_size_t, lidia_size_t)");
446
447 if (fd != a.fd || k < 0 || lo < 0) {
448 lidia_error_handler("modular_fft_rep", "to_modular_fft_rep"
449 "(poly_mod_rep&, lidia_size_t, lidia_size_t, lidia_size_t)::"
450 "bad args");
451 return;
452 }
453
454 if (hi > a.size - 1)
455 hi = a.size - 1;
456
457 lidia_size_t K = 1 << k;
458 lidia_size_t j, m = comparator< lidia_size_t >::max(hi-lo + 1, 0);
459
460 if (m > K) {
461 lidia_error_handler("modular_fft_rep", "to_modular_fft_rep"
462 "(poly_mod_rep&, lidia_size_t, lidia_size_t, lidia_size_t)::"
463 "hi-lo+1 is too large");
464 return;
465 }
466
467 fft_prime_t *ap = (m == 0 ? 0 : a.tbl[index]);
468
469 for (j = 0; j < m; j++)
470 stat_vec[j] = ap[lo+j];
471 for (; j < K; j++)
472 stat_vec[j] = 0;
473
474 bool ok;
475 ok = fd.node->prime[index].evaluate(vec, 0, stat_vec, K-1, k);
476 if (!ok)
477 lidia_error_handler("modular_fft_rep", "to_modular_fft_rep"
478 "(poly_mod_rep&, ...)::re-init of FFT prime");
479 }
480
481
482
newton_inv(Fp_polynomial & x,const Fp_polynomial & a,lidia_size_t m)483 void newton_inv(Fp_polynomial& x, const Fp_polynomial& a, lidia_size_t m)
484 {
485 debug_handler("fftrem.c", "newton_inv(Fp_polynomial&, Fp_polynomial&, lidia_size_t)");
486
487 x.set_modulus(a);
488 x.set_degree(m-1);
489
490 lidia_size_t index, t, k;
491 const bigint & p = a.modulus();
492 lidia_size_t crov = Fp_polynomial::crossovers.log2_newton_crossover(p);
493
494 plain_inv(x, a, (1 << crov));
495 t = next_power_of_two(m);
496
497 fft_data F(p, t);
498 fft_rep R1(F);
499 modular_fft_rep R2(F);
500 Fp_polynomial P1;
501 P1.set_max_degree(m/2 - 1);
502
503 lidia_size_t a_len = comparator< lidia_size_t >::min(m, a.c_length);
504
505 poly_mod_rep a_rep(a, 0, a_len-1, F);
506
507 t = crov;
508 k = 1 << t;
509
510 lidia_size_t num_primes = F.number_of_primes();
511 while (k < m) {
512 lidia_size_t l = comparator< lidia_size_t >::min(2*k, m);
513
514 R1.set_size(t+1);
515 R2.set_size(t+1);
516
517 R1.to_fft_rep(x);
518 for (index = 0; index < num_primes; index++) {
519 R2.to_modular_fft_rep(a_rep, 0, l-1, index);
520 multiply(R2, R1, R2, index);
521 R2.from_modular_fft_rep(k, l-1, index);
522 }
523 R2.get_result(P1, k, l-1);
524
525 R2.set_size(t+1);
526 for (index = 0; index < num_primes; index++) {
527 R2.to_modular_fft_rep(P1, index);
528 multiply(R2, R1, R2, index);
529 R2.from_modular_fft_rep(0, l-k-1, index);
530 }
531 R2.get_result(P1, 0, l-k-1);
532
533 x.set_degree(l-1);
534
535 lidia_size_t i, y_len = P1.c_length;
536 for (i = k; i < l; i++) {
537 if (i-k >= y_len)
538 (x.coeff[i]).assign_zero();
539 else
540 NegateMod(x.coeff[i], P1.coeff[i-k], p);
541 }
542 x.remove_leading_zeros();
543
544 t++;
545 k = l;
546 }
547 }
548
549
550
551 //***************************************************************
552 // build_from_roots
553 //***************************************************************
554
555 static
iter_build(bigint * a,lidia_size_t n,const bigint & p)556 void iter_build(bigint* a, lidia_size_t n, const bigint& p)
557 {
558 debug_handler("Fp_polynomial", "iter_build(bigint*, lidia_size_t, const bigint&)");
559 lidia_size_t i, k;
560 bigint b, t;
561
562 if (n <= 0) return;
563
564 NegateMod(a[0], a[0], p);
565
566 for (k = 1; k < n; k++) {
567 NegateMod(b, a[k], p);
568 AddMod(a[k], b, a[k-1], p);
569 for (i = k-1; i > 0; i--) {
570 MulMod(t, a[i], b, p);
571 AddMod(a[i], t, a[i-1], p);
572 }
573 MulMod(a[0], a[0], b, p);
574 }
575 }
576
577
578
579 static
mul_build(bigint * x,const bigint * a,const bigint * b,lidia_size_t n,const bigint & p)580 void mul_build(bigint* x, const bigint* a, const bigint* b, lidia_size_t n, const bigint& p)
581 {
582 debug_handler("Fp_polynomial", "mul_build(bigint*, const bigint*, const bigint*, lidia_size_t, const bigint&)");
583 bigint t, accum; //static
584 lidia_size_t i, j, jmin, jmax;
585
586 lidia_size_t d = 2*n-1;
587
588 for (i = 0; i <= d; i++) {
589 jmin = comparator< lidia_size_t >::max(0, i-(n-1));
590 jmax = comparator< lidia_size_t >::min(n-1, i);
591 accum.assign_zero();
592 for (j = jmin; j <= jmax; j++) {
593 multiply(t, a[j], b[i-j]);
594 add(accum, accum, t);
595 }
596 if (i >= n) {
597 add(accum, accum, a[i-n]);
598 add(accum, accum, b[i-n]);
599 }
600
601 Remainder(x[i], accum, p);
602 }
603 }
604
605
606
607 // computes the polynomial (X-a[0]) ... (X-a[n-1]), where n = a.length()
build_from_roots(const base_vector<bigint> & a)608 void Fp_polynomial::build_from_roots(const base_vector< bigint > & a)
609 {
610 debug_handler("Fp_polynomial", "build_from_roots(base_vector< bigint > &)");
611
612 const bigint& p = modulus();
613 if (p.is_zero()) {
614 lidia_error_handler("Fp_polynomial", "build_from_roots"
615 "(base_vector< bigint > &)::modulus was not set");
616 return;
617 }
618
619 lidia_size_t n = a.size();
620 if (n == 0) {
621 this->assign_one();
622 return;
623 }
624
625 lidia_size_t crov = Fp_polynomial::crossovers.fftmul_crossover(p);
626 lidia_size_t k0 = next_power_of_two(crov);
627 lidia_size_t crossover = 1 << k0;
628
629 if (n <= crossover) {
630 set_max_degree(n);
631 assign(a, p);
632 iter_build(coeff, n, p);
633 // set_degree(n);
634 set_coefficient(n);
635 return;
636 }
637
638 lidia_size_t k = next_power_of_two(n);
639 lidia_size_t m = 1 << k;
640 lidia_size_t i, j, index, l, width;
641
642 Fp_polynomial b;
643
644 b.assign(a, p);
645 b.set_degree(m);
646
647 for (i = n; i < m; i++)
648 b[i].assign_zero();
649
650 b[m].assign_one();
651
652 bigint t1, one(1);
653
654 bigint* g = new bigint[crossover];
655 memory_handler(g, "Fp_polynomial", "build_from_roots(base_vector< bigint > &"
656 ")::Error in memory allocation");
657 bigint* h = new bigint[crossover];
658 memory_handler(h, "Fp_polynomial", "build_from_roots(base_vector< bigint > &"
659 ")::Error in memory allocation");
660 bigint *tmp;
661
662
663 for (i = 0; i < m; i += crossover) {
664 for (j = 0; j < crossover; j++)
665 LiDIA::NegateMod(g[j], b[i+j], p);
666
667 if (k0 > 0) {
668 for (j = 0; j < crossover; j += 2) {
669 MulMod(t1, g[j], g[j+1], p);
670 AddMod(g[j+1], g[j], g[j+1], p);
671 g[j].assign(t1);
672 }
673 }
674
675 for (l = 1; l < k0; l++) {
676 width = 1 << l;
677 for (j = 0; j < crossover; j += 2*width)
678 mul_build(&h[j], &g[j], &g[j+width], width, p);
679 tmp = g; g = h; h = tmp;
680 }
681
682 for (j = 0; j < crossover; j++)
683 b[i+j].assign(g[j]);
684 }
685
686 fft_data F(p, k);
687 modular_fft_rep R1(F), R2(F);
688
689 for (l = k0; l < k; l++) {
690 width = 1 << l;
691 for (i = 0; i < m; i += 2*width) {
692 R1.set_size(l+1);
693 R2.set_size(l+1);
694 for (index = 0; index < F.number_of_primes(); index++) {
695 swap(one, b.coeff[i+width]);
696 //t1 = b[i+width]; (b[i+width]).assign_one();
697
698 R1.to_modular_fft_rep(b, i, i+width, index);
699
700 swap(one, b.coeff[i+width]);
701 swap(b.coeff[i+2*width], one);
702 //b[i+width] = t1; t1 = b[i+2*width]; b[i+2*width].assign_one();
703
704 R2.to_modular_fft_rep(b, i+width, i+2*width, index);
705
706 swap(b.coeff[i+2*width], one);
707 //b[i+2*width] = t1;
708
709 multiply(R1, R1, R2, index);
710 R1.from_modular_fft_rep(0, 2*width-1, index);
711 }
712 R1.get_result_ptr(&b.coeff[i], 0, 2*width-1);
713 SubMod(b.coeff[i], b.coeff[i], one, p);
714 //subtract(b[i], b[i], one);
715 }
716 }
717
718 set_degree(n);
719 lidia_size_t delta = m-n;
720 for (i = 0; i <= n; i++)
721 coeff[i].assign(b[i+delta]);
722
723 delete [] h; // VM
724 delete [] g; // VM
725
726 // no need to normalize
727 }
728
729
730
731 #ifdef LIDIA_NAMESPACE
732 } // end of namespace LiDIA
733 #endif
734