1 /*
2 Copyright (C) 2011 Sebastian Pancratz
3
4 This file is part of FLINT.
5
6 FLINT is free software: you can redistribute it and/or modify it under
7 the terms of the GNU Lesser General Public License (LGPL) as published
8 by the Free Software Foundation; either version 2.1 of the License, or
9 (at your option) any later version. See <http://www.gnu.org/licenses/>.
10 */
11
12 #include <gmp.h>
13 #include "flint.h"
14 #include "fmpz_mod_poly.h"
15
16 /*
17 Let i be such that 2^{i} < len1 <= 2^{i+1}.
18
19 Note that the jth step of the recursion requires temporary space
20 of size no more than (len2 - 1)(2^j - 1) + 1. Note the smallest
21 step j=0 doesn't require any temporary space and the largest step
22 has j = i, and hence the sum is
23
24 sum_{j=1}^i [(len2 - 1) (2^j - 1) + 1]
25
26 = (len2 - 1)(2^{i+1} - 2) - (len2 - 2) i
27 */
28
_fmpz_mod_poly_compose_divconquer_recursive(fmpz * res,const fmpz * poly1,slong len1,fmpz ** pow2,slong len2,fmpz * v,const fmpz_t p)29 void _fmpz_mod_poly_compose_divconquer_recursive(fmpz *res,
30 const fmpz *poly1, slong len1, fmpz **pow2, slong len2, fmpz *v,
31 const fmpz_t p)
32 {
33 if (len1 == 1)
34 {
35 fmpz_set(res, poly1);
36 }
37 else if (len1 == 2)
38 {
39 _fmpz_mod_poly_scalar_mul_fmpz(res, pow2[0], len2, poly1 + 1, p);
40 fmpz_add(res, res, poly1);
41 if (fmpz_cmpabs(res, p) >= 0)
42 fmpz_sub(res, res, p);
43 }
44 else
45 {
46 const slong i = FLINT_BIT_COUNT(len1 - 1) - 1;
47 fmpz *w = v + ((WORD(1) << i) - 1) * (len2 - 1) + 1;
48
49 _fmpz_mod_poly_compose_divconquer_recursive(v,
50 poly1 + (WORD(1) << i), len1 - (WORD(1) << i), pow2, len2, w, p);
51
52 _fmpz_mod_poly_mul(res, pow2[i], (len2 - 1) * (WORD(1) << i) + 1,
53 v, (len2 - 1) * (len1 - (WORD(1) << i) - 1) + 1, p);
54
55 _fmpz_mod_poly_compose_divconquer_recursive(v, poly1, WORD(1) << i,
56 pow2, len2, w, p);
57
58 _fmpz_mod_poly_add(res, res, (len2 - 1) * ((WORD(1) << i) - 1) + 1,
59 v, (len2 - 1) * ((WORD(1) << i) - 1) + 1, p);
60 }
61 }
62
_fmpz_mod_poly_compose_divconquer(fmpz * res,const fmpz * poly1,slong len1,const fmpz * poly2,slong len2,const fmpz_t p)63 void _fmpz_mod_poly_compose_divconquer(fmpz *res,
64 const fmpz *poly1, slong len1,
65 const fmpz *poly2, slong len2,
66 const fmpz_t p)
67 {
68 if (len1 == 1 || len2 == 0)
69 {
70 fmpz_set(res, poly1);
71 }
72 else
73 {
74 const slong k = FLINT_BIT_COUNT(len1 - 1);
75 const slong lenV = len2 * ((WORD(1) << k) - 1) + k;
76 const slong lenW = (len2 - 1) * ((WORD(1) << k) - 2) - (len2 - 2) * (k-1);
77 slong i;
78 fmpz *v, *w, **pow2;
79
80 v = _fmpz_vec_init(lenV + lenW);
81 w = v + lenV;
82 pow2 = flint_malloc(k * sizeof(fmpz *));
83
84 for (i = 0; i < k; i++)
85 {
86 pow2[i] = v + (len2 * ((WORD(1) << i) - 1) + i);
87 }
88
89 _fmpz_vec_set(pow2[0], poly2, len2);
90 for (i = 1; i < k; i++)
91 {
92 _fmpz_mod_poly_sqr(pow2[i],
93 pow2[i-1], (len2 - 1) * (WORD(1) << (i - 1)) + 1, p);
94 }
95
96 _fmpz_mod_poly_compose_divconquer_recursive(res, poly1, len1,
97 pow2, len2, w, p);
98
99 _fmpz_vec_clear(v, lenV + lenW);
100 flint_free(pow2);
101 }
102 }
103
fmpz_mod_poly_compose_divconquer(fmpz_mod_poly_t res,const fmpz_mod_poly_t poly1,const fmpz_mod_poly_t poly2)104 void fmpz_mod_poly_compose_divconquer(fmpz_mod_poly_t res,
105 const fmpz_mod_poly_t poly1,
106 const fmpz_mod_poly_t poly2)
107 {
108 const slong len1 = poly1->length;
109 const slong len2 = poly2->length;
110
111 if (len1 == 0)
112 {
113 fmpz_mod_poly_zero(res);
114 }
115 else if (len1 == 1 || len2 == 0)
116 {
117 fmpz_mod_poly_set_fmpz(res, poly1->coeffs);
118 }
119 else
120 {
121 const slong lenr = (len1 - 1) * (len2 - 1) + 1;
122
123 if ((res != poly1) && (res != poly2))
124 {
125 fmpz_mod_poly_fit_length(res, lenr);
126 _fmpz_mod_poly_compose_divconquer(res->coeffs, poly1->coeffs, len1,
127 poly2->coeffs, len2,
128 &(res->p));
129 }
130 else
131 {
132 fmpz *t = _fmpz_vec_init(lenr);
133
134 _fmpz_mod_poly_compose_divconquer(t, poly1->coeffs, len1,
135 poly2->coeffs, len2, &(res->p));
136 _fmpz_vec_clear(res->coeffs, res->alloc);
137 res->coeffs = t;
138 res->alloc = lenr;
139 res->length = lenr;
140 }
141
142 _fmpz_mod_poly_set_length(res, lenr);
143 _fmpz_mod_poly_normalise(res);
144 }
145 }
146
147