1 /*
2     Copyright (C) 2018 Daniel Schultz
3 
4     This file is part of FLINT.
5 
6     FLINT is free software: you can redistribute it and/or modify it under
7     the terms of the GNU Lesser General Public License (LGPL) as published
8     by the Free Software Foundation; either version 2.1 of the License, or
9     (at your option) any later version.  See <https://www.gnu.org/licenses/>.
10 */
11 
12 #include "fmpz_mpoly.h"
13 
_fmpz_poly_pow_fmpz_is_not_feasible(const fmpz_poly_t b,const fmpz_t e)14 static int _fmpz_poly_pow_fmpz_is_not_feasible(const fmpz_poly_t b, const fmpz_t e)
15 {
16     if (b->length < 2)
17     {
18         if (b->length == 1)
19         {
20             return _fmpz_pow_fmpz_is_not_feasible(fmpz_bits(b->coeffs + 0), e);
21         }
22         else
23         {
24             return 0;
25         }
26     }
27     else
28     {
29         ulong limit = (ulong)(WORD_MAX)/(ulong)(2*sizeof(fmpz));
30         return fmpz_cmp_ui(e, limit/(ulong)(b->length)) >= 0;
31     }
32 }
33 
_fmpz_poly_pow_ui_is_not_feasible(const fmpz_poly_t b,ulong e)34 static int _fmpz_poly_pow_ui_is_not_feasible(const fmpz_poly_t b, ulong e)
35 {
36     if (b->length < 2)
37     {
38         if (b->length == 1)
39         {
40             return _fmpz_pow_ui_is_not_feasible(fmpz_bits(b->coeffs + 0), e);
41         }
42         else
43         {
44             return 0;
45         }
46     }
47     else
48     {
49         ulong limit = (ulong)(WORD_MAX)/(ulong)(2*sizeof(fmpz));
50         return e >= limit/(ulong)(b->length);
51     }
52 }
53 
_fmpz_mpoly_compose_fmpz_poly_sp(fmpz_poly_t A,const fmpz_mpoly_t B,fmpz_poly_struct * const * C,const fmpz_mpoly_ctx_t ctx)54 int _fmpz_mpoly_compose_fmpz_poly_sp(fmpz_poly_t A, const fmpz_mpoly_t B,
55                       fmpz_poly_struct * const * C, const fmpz_mpoly_ctx_t ctx)
56 {
57     int success = 1;
58     flint_bitcnt_t bits = B->bits;
59     slong i, j, k, N, nvars = ctx->minfo->nvars;
60     slong entries, k_len, shift, off;
61     slong Blen = B->length;
62     const fmpz * Bcoeff = B->coeffs;
63     const ulong * Bexp = B->exps;
64     fmpz * degrees;
65     slong * offs;
66     ulong * masks;
67     fmpz_poly_struct * powers;
68     fmpz_poly_t t, t2;
69     TMP_INIT;
70 
71     FLINT_ASSERT(Blen != 0);
72 
73     TMP_START;
74 
75     degrees = TMP_ARRAY_ALLOC(nvars, slong);
76     mpoly_degrees_si(degrees, Bexp, Blen, bits, ctx->minfo);
77 
78     /* compute how many masks are needed */
79     entries = 0;
80     for (i = 0; i < nvars; i++)
81     {
82         if (_fmpz_poly_pow_ui_is_not_feasible(C[i], degrees[i]))
83         {
84             success = 0;
85             goto cleanup_degrees;
86         }
87 
88         entries += FLINT_BIT_COUNT(degrees[i]);
89     }
90     offs = TMP_ARRAY_ALLOC(entries, slong);
91     masks = TMP_ARRAY_ALLOC(entries, ulong);
92     powers = TMP_ARRAY_ALLOC(entries, fmpz_poly_struct);
93 
94     N = mpoly_words_per_exp(bits, ctx->minfo);
95 
96     /* store bit masks for each power of two of the non-main variables */
97     k = 0;
98     for (i = 0; i < nvars; i++)
99     {
100         flint_bitcnt_t varibits = FLINT_BIT_COUNT(degrees[i]);
101 
102         mpoly_gen_offset_shift_sp(&off, &shift, i, bits, ctx->minfo);
103         for (j = 0; j < varibits; j++)
104         {
105             offs[k] = off;
106             masks[k] = UWORD(1) << (shift + j);
107             fmpz_poly_init(powers + k);
108             if (j == 0)
109                 fmpz_poly_set(powers + k, C[i]);
110             else
111                 fmpz_poly_mul(powers + k, powers + k - 1, powers + k - 1);
112             k++;
113         }
114     }
115     k_len = k;
116     FLINT_ASSERT(k_len == entries);
117 
118     /* accumulate answer */
119     fmpz_poly_zero(A);
120     fmpz_poly_init(t);
121     fmpz_poly_init(t2);
122     for (i = 0; i < Blen; i++)
123     {
124         fmpz_poly_set_fmpz(t, Bcoeff + i);
125         for (k = 0; k < k_len; k++)
126         {
127             if ((Bexp[N*i + offs[k]] & masks[k]) != WORD(0))
128             {
129                 fmpz_poly_mul(t2, t, powers + k);
130                 fmpz_poly_swap(t, t2);
131             }
132         }
133         fmpz_poly_add(A, A, t);
134     }
135     fmpz_poly_clear(t);
136     fmpz_poly_clear(t2);
137 
138     for (k = 0; k < k_len; k++)
139         fmpz_poly_clear(powers + k);
140 
141 cleanup_degrees:
142 
143     TMP_END;
144 
145     return success;
146 }
147 
148 
_fmpz_mpoly_compose_fmpz_poly_mp(fmpz_poly_t A,const fmpz_mpoly_t B,fmpz_poly_struct * const * C,const fmpz_mpoly_ctx_t ctx)149 int _fmpz_mpoly_compose_fmpz_poly_mp(fmpz_poly_t A, const fmpz_mpoly_t B,
150                       fmpz_poly_struct * const * C, const fmpz_mpoly_ctx_t ctx)
151 {
152     int success = 1;
153     flint_bitcnt_t bits = B->bits;
154     ulong l;
155     slong i, k, N, nvars = ctx->minfo->nvars;
156     slong entries, k_len, off;
157     slong Blen = B->length;
158     fmpz * Bcoeff = B->coeffs;
159     ulong * Bexp = B->exps;
160     fmpz * degrees;
161     slong * offs;
162     ulong * masks;
163     fmpz_poly_struct * powers;
164     fmpz_poly_t t, t2;
165     TMP_INIT;
166 
167     FLINT_ASSERT(Blen > 0);
168 
169     TMP_START;
170 
171     degrees = TMP_ARRAY_ALLOC(nvars, fmpz);
172     for (i = 0; i < nvars; i++)
173         fmpz_init(degrees + i);
174 
175     mpoly_degrees_ffmpz(degrees, Bexp, Blen, bits, ctx->minfo);
176 
177     /* compute how many masks are needed */
178     entries = 0;
179     for (i = 0; i < nvars; i++)
180     {
181         if (_fmpz_poly_pow_fmpz_is_not_feasible(C[i], degrees + i))
182         {
183             success = 0;
184             goto cleanup_degrees;
185         }
186 
187         entries += fmpz_bits(degrees + i);
188     }
189     offs = TMP_ARRAY_ALLOC(entries, slong);
190     masks = TMP_ARRAY_ALLOC(entries, ulong);
191     powers = TMP_ARRAY_ALLOC(entries, fmpz_poly_struct);
192 
193     N = mpoly_words_per_exp(bits, ctx->minfo);
194 
195     /* store bit masks for each power of two of variables */
196     k = 0;
197     for (i = 0; i < nvars; i++)
198     {
199         flint_bitcnt_t varibits = fmpz_bits(degrees + i);
200 
201         off = mpoly_gen_offset_mp(i, bits, ctx->minfo);
202 
203         for (l = 0; l < varibits; l++)
204         {
205             offs[k] = off + (l / FLINT_BITS);
206             masks[k] = UWORD(1) << (l % FLINT_BITS);
207             fmpz_poly_init(powers + k);
208             if (l == 0)
209                 fmpz_poly_set(powers + k, C[i]);
210             else
211                 fmpz_poly_mul(powers + k, powers + k - 1, powers + k - 1);
212             k++;
213         }
214     }
215     k_len = k;
216     FLINT_ASSERT(k_len == entries);
217 
218     /* accumulate answer */
219     fmpz_poly_zero(A);
220     fmpz_poly_init(t);
221     fmpz_poly_init(t2);
222     for (i = 0; i < Blen; i++)
223     {
224         fmpz_poly_set_fmpz(t, Bcoeff + i);
225         for (k = 0; k < k_len; k++)
226         {
227             if ((Bexp[N*i + offs[k]] & masks[k]) != WORD(0))
228             {
229                 fmpz_poly_mul(t2, t, powers + k);
230                 fmpz_poly_swap(t, t2);
231             }
232         }
233         fmpz_poly_add(A, A, t);
234     }
235     fmpz_poly_clear(t);
236     fmpz_poly_clear(t2);
237 
238     for (k = 0; k < k_len; k++)
239         fmpz_poly_clear(powers + k);
240 
241 cleanup_degrees:
242 
243     for (i = 0; i < nvars; i++)
244         fmpz_clear(degrees + i);
245 
246     TMP_END;
247 
248     return success;
249 }
250 
251 
fmpz_mpoly_compose_fmpz_poly(fmpz_poly_t A,const fmpz_mpoly_t B,fmpz_poly_struct * const * C,const fmpz_mpoly_ctx_t ctx)252 int fmpz_mpoly_compose_fmpz_poly(fmpz_poly_t A, const fmpz_mpoly_t B,
253                       fmpz_poly_struct * const * C, const fmpz_mpoly_ctx_t ctx)
254 {
255     if (B->length == 0)
256     {
257         fmpz_poly_zero(A);
258         return 1;
259     }
260 
261     if (B->bits <= FLINT_BITS)
262     {
263         return _fmpz_mpoly_compose_fmpz_poly_sp(A, B, C, ctx);
264     }
265     else
266     {
267         return _fmpz_mpoly_compose_fmpz_poly_mp(A, B, C, ctx);
268     }
269 }
270 
271