1 /*
2     Copyright (C) 2010 William Hart
3     Copyright (C) 2012 Sebastian Pancratz
4     Copyright (C) 2012 Fredrik Johansson
5 
6     This file is part of Arb.
7 
8     Arb is free software: you can redistribute it and/or modify it under
9     the terms of the GNU Lesser General Public License (LGPL) as published
10     by the Free Software Foundation; either version 2.1 of the License, or
11     (at your option) any later version.  See <http://www.gnu.org/licenses/>.
12 */
13 
14 #include "acb_poly.h"
15 
16 void
_acb_poly_compose_divconquer(acb_ptr res,acb_srcptr poly1,slong len1,acb_srcptr poly2,slong len2,slong prec)17 _acb_poly_compose_divconquer(acb_ptr res, acb_srcptr poly1, slong len1,
18                                           acb_srcptr poly2, slong len2, slong prec)
19 {
20     slong i, j, k, n;
21     slong *hlen, alloc, powlen;
22     acb_ptr v, pow, temp;
23     acb_ptr * h;
24 
25     if (len1 == 1)
26     {
27         acb_set(res, poly1);
28         return;
29     }
30     if (len2 == 1)
31     {
32         _acb_poly_evaluate(res, poly1, len1, poly2, prec);
33         return;
34     }
35     if (len1 == 2)
36     {
37         _acb_poly_compose_horner(res, poly1, len1, poly2, len2, prec);
38         return;
39     }
40 
41     /* Initialisation */
42 
43     hlen = (slong *) flint_malloc(((len1 + 1) / 2) * sizeof(slong));
44 
45     for (k = 1; (2 << k) < len1; k++) ;
46 
47     hlen[0] = hlen[1] = ((1 << k) - 1) * (len2 - 1) + 1;
48     for (i = k - 1; i > 0; i--)
49     {
50         slong hi = (len1 + (1 << i) - 1) / (1 << i);
51         for (n = (hi + 1) / 2; n < hi; n++)
52             hlen[n] = ((1 << i) - 1) * (len2 - 1) + 1;
53     }
54     powlen = (1 << k) * (len2 - 1) + 1;
55 
56     alloc = 0;
57     for (i = 0; i < (len1 + 1) / 2; i++)
58         alloc += hlen[i];
59 
60     v = _acb_vec_init(alloc + 2 * powlen);
61     h = (acb_ptr *) flint_malloc(((len1 + 1) / 2) * sizeof(acb_ptr));
62     h[0] = v;
63     for (i = 0; i < (len1 - 1) / 2; i++)
64     {
65         h[i + 1] = h[i] + hlen[i];
66         hlen[i] = 0;
67     }
68     hlen[(len1 - 1) / 2] = 0;
69     pow = v + alloc;
70     temp = pow + powlen;
71 
72     /* Let's start the actual work */
73 
74     for (i = 0, j = 0; i < len1 / 2; i++, j += 2)
75     {
76         if (!acb_is_zero(poly1 + j + 1))
77         {
78             _acb_vec_scalar_mul(h[i], poly2, len2, poly1 + j + 1, prec);
79             acb_add(h[i], h[i], poly1 + j, prec);
80             hlen[i] = len2;
81         }
82         else if (!acb_is_zero(poly1 + j))
83         {
84             acb_set(h[i], poly1 + j);
85             hlen[i] = 1;
86         }
87     }
88     if ((len1 & WORD(1)))
89     {
90         if (!acb_is_zero(poly1 + j))
91         {
92             acb_set(h[i], poly1 + j);
93             hlen[i] = 1;
94         }
95     }
96 
97     _acb_poly_mul(pow, poly2, len2, poly2, len2, prec);
98     powlen = 2 * len2 - 1;
99 
100     for (n = (len1 + 1) / 2; n > 2; n = (n + 1) / 2)
101     {
102         if (hlen[1] > 0)
103         {
104             slong templen = powlen + hlen[1] - 1;
105             _acb_poly_mul(temp, pow, powlen, h[1], hlen[1], prec);
106             _acb_poly_add(h[0], temp, templen, h[0], hlen[0], prec);
107             hlen[0] = FLINT_MAX(hlen[0], templen);
108         }
109 
110         for (i = 1; i < n / 2; i++)
111         {
112             if (hlen[2*i + 1] > 0)
113             {
114                 _acb_poly_mul(h[i], pow, powlen, h[2*i + 1], hlen[2*i + 1], prec);
115                 hlen[i] = hlen[2*i + 1] + powlen - 1;
116             } else
117                 hlen[i] = 0;
118             _acb_poly_add(h[i], h[i], hlen[i], h[2*i], hlen[2*i], prec);
119             hlen[i] = FLINT_MAX(hlen[i], hlen[2*i]);
120         }
121         if ((n & WORD(1)))
122         {
123             _acb_vec_set(h[i], h[2*i], hlen[2*i]);
124             hlen[i] = hlen[2*i];
125         }
126 
127         _acb_poly_mul(temp, pow, powlen, pow, powlen, prec);
128         powlen += powlen - 1;
129         {
130             acb_ptr t = pow;
131             pow = temp;
132             temp = t;
133         }
134     }
135 
136     _acb_poly_mul(res, pow, powlen, h[1], hlen[1], prec);
137     _acb_vec_add(res, res, h[0], hlen[0], prec);
138 
139     _acb_vec_clear(v, alloc + 2 * powlen);
140     flint_free(h);
141     flint_free(hlen);
142 }
143 
144 void
acb_poly_compose_divconquer(acb_poly_t res,const acb_poly_t poly1,const acb_poly_t poly2,slong prec)145 acb_poly_compose_divconquer(acb_poly_t res,
146                              const acb_poly_t poly1, const acb_poly_t poly2, slong prec)
147 {
148     const slong len1 = poly1->length;
149     const slong len2 = poly2->length;
150 
151     if (len1 == 0)
152     {
153         acb_poly_zero(res);
154     }
155     else if (len1 == 1 || len2 == 0)
156     {
157         acb_poly_set_acb(res, poly1->coeffs);
158     }
159     else
160     {
161         const slong lenr = (len1 - 1) * (len2 - 1) + 1;
162 
163         if (res != poly1 && res != poly2)
164         {
165             acb_poly_fit_length(res, lenr);
166             _acb_poly_compose_divconquer(res->coeffs, poly1->coeffs, len1,
167                                                    poly2->coeffs, len2, prec);
168         }
169         else
170         {
171             acb_poly_t t;
172             acb_poly_init2(t, lenr);
173             _acb_poly_compose_divconquer(t->coeffs, poly1->coeffs, len1,
174                                                  poly2->coeffs, len2, prec);
175             acb_poly_swap(res, t);
176             acb_poly_clear(t);
177         }
178 
179         _acb_poly_set_length(res, lenr);
180         _acb_poly_normalise(res);
181     }
182 }
183