1 /*
2     Copyright (C) 2011 Sebastian Pancratz
3 
4     This file is part of FLINT.
5 
6     FLINT is free software: you can redistribute it and/or modify it under
7     the terms of the GNU Lesser General Public License (LGPL) as published
8     by the Free Software Foundation; either version 2.1 of the License, or
9     (at your option) any later version.  See <http://www.gnu.org/licenses/>.
10 */
11 
12 #include <gmp.h>
13 #include "flint.h"
14 #include "fmpz_mod_poly.h"
15 
16 /*
17     Let i be such that 2^{i} < len1 <= 2^{i+1}.
18 
19     Note that the jth step of the recursion requires temporary space
20     of size no more than (len2 - 1)(2^j - 1) + 1.  Note the smallest
21     step j=0 doesn't require any temporary space and the largest step
22     has j = i, and hence the sum is
23 
24         sum_{j=1}^i [(len2 - 1) (2^j - 1) + 1]
25 
26       = (len2 - 1)(2^{i+1} - 2) - (len2 - 2) i
27  */
28 
_fmpz_mod_poly_compose_divconquer_recursive(fmpz * res,const fmpz * poly1,slong len1,fmpz ** pow2,slong len2,fmpz * v,const fmpz_t p)29 void _fmpz_mod_poly_compose_divconquer_recursive(fmpz *res,
30     const fmpz *poly1, slong len1, fmpz **pow2, slong len2, fmpz *v,
31     const fmpz_t p)
32 {
33     if (len1 == 1)
34     {
35         fmpz_set(res, poly1);
36     }
37     else if (len1 == 2)
38     {
39         _fmpz_mod_poly_scalar_mul_fmpz(res, pow2[0], len2, poly1 + 1, p);
40         fmpz_add(res, res, poly1);
41         if (fmpz_cmpabs(res, p) >= 0)
42             fmpz_sub(res, res, p);
43     }
44     else
45     {
46         const slong i = FLINT_BIT_COUNT(len1 - 1) - 1;
47         fmpz *w = v + ((WORD(1) << i) - 1) * (len2 - 1) + 1;
48 
49         _fmpz_mod_poly_compose_divconquer_recursive(v,
50             poly1 + (WORD(1) << i), len1 - (WORD(1) << i), pow2, len2, w, p);
51 
52         _fmpz_mod_poly_mul(res, pow2[i], (len2 - 1) * (WORD(1) << i) + 1,
53                                 v, (len2 - 1) * (len1 - (WORD(1) << i) - 1) + 1, p);
54 
55         _fmpz_mod_poly_compose_divconquer_recursive(v, poly1, WORD(1) << i,
56                                                        pow2, len2, w, p);
57 
58         _fmpz_mod_poly_add(res, res, (len2 - 1) * ((WORD(1) << i) - 1) + 1,
59                                   v, (len2 - 1) * ((WORD(1) << i) - 1) + 1, p);
60     }
61 }
62 
_fmpz_mod_poly_compose_divconquer(fmpz * res,const fmpz * poly1,slong len1,const fmpz * poly2,slong len2,const fmpz_t p)63 void _fmpz_mod_poly_compose_divconquer(fmpz *res,
64                                        const fmpz *poly1, slong len1,
65                                        const fmpz *poly2, slong len2,
66                                        const fmpz_t p)
67 {
68     if (len1 == 1 || len2 == 0)
69     {
70         fmpz_set(res, poly1);
71     }
72     else
73     {
74         const slong k = FLINT_BIT_COUNT(len1 - 1);
75         const slong lenV = len2 * ((WORD(1) << k) - 1) + k;
76         const slong lenW = (len2 - 1) * ((WORD(1) << k) - 2) - (len2 - 2) * (k-1);
77         slong i;
78         fmpz *v, *w, **pow2;
79 
80         v    = _fmpz_vec_init(lenV + lenW);
81         w    = v + lenV;
82         pow2 = flint_malloc(k * sizeof(fmpz *));
83 
84         for (i = 0; i < k; i++)
85         {
86             pow2[i] = v + (len2 * ((WORD(1) << i) - 1) + i);
87         }
88 
89         _fmpz_vec_set(pow2[0], poly2, len2);
90         for (i = 1; i < k; i++)
91         {
92             _fmpz_mod_poly_sqr(pow2[i],
93                                pow2[i-1], (len2 - 1) * (WORD(1) << (i - 1)) + 1, p);
94         }
95 
96         _fmpz_mod_poly_compose_divconquer_recursive(res, poly1, len1,
97                                                          pow2, len2, w, p);
98 
99         _fmpz_vec_clear(v, lenV + lenW);
100         flint_free(pow2);
101     }
102 }
103 
fmpz_mod_poly_compose_divconquer(fmpz_mod_poly_t res,const fmpz_mod_poly_t poly1,const fmpz_mod_poly_t poly2)104 void fmpz_mod_poly_compose_divconquer(fmpz_mod_poly_t res,
105                                       const fmpz_mod_poly_t poly1,
106                                       const fmpz_mod_poly_t poly2)
107 {
108     const slong len1 = poly1->length;
109     const slong len2 = poly2->length;
110 
111     if (len1 == 0)
112     {
113         fmpz_mod_poly_zero(res);
114     }
115     else if (len1 == 1 || len2 == 0)
116     {
117         fmpz_mod_poly_set_fmpz(res, poly1->coeffs);
118     }
119     else
120     {
121         const slong lenr = (len1 - 1) * (len2 - 1) + 1;
122 
123         if ((res != poly1) && (res != poly2))
124         {
125             fmpz_mod_poly_fit_length(res, lenr);
126             _fmpz_mod_poly_compose_divconquer(res->coeffs, poly1->coeffs, len1,
127                                                            poly2->coeffs, len2,
128                                                            &(res->p));
129         }
130         else
131         {
132             fmpz *t = _fmpz_vec_init(lenr);
133 
134             _fmpz_mod_poly_compose_divconquer(t, poly1->coeffs, len1,
135                                                  poly2->coeffs, len2, &(res->p));
136             _fmpz_vec_clear(res->coeffs, res->alloc);
137             res->coeffs = t;
138             res->alloc  = lenr;
139             res->length = lenr;
140         }
141 
142         _fmpz_mod_poly_set_length(res, lenr);
143         _fmpz_mod_poly_normalise(res);
144     }
145 }
146 
147