1 /*
2     Copyright (C) 2020 Daniel Schultz
3 
4     This file is part of FLINT.
5 
6     FLINT is free software: you can redistribute it and/or modify it under
7     the terms of the GNU Lesser General Public License (LGPL) as published
8     by the Free Software Foundation; either version 2.1 of the License, or
9     (at your option) any later version.  See <http://www.gnu.org/licenses/>.
10 */
11 
12 #include "fq_nmod_mpoly.h"
13 
14 
15 typedef struct
16 {
17     slong f;
18     slong r;
19     slong v_var;
20     fmpz_t v_exp;   /* will be managed as stack grows / shrinks */
21     int ret;
22 } stack_entry_struct;
23 
24 typedef stack_entry_struct stack_entry_t[1];
25 
26 
27 /* A = A * X^pow */
_fq_nmod_mpoly_pmul(fq_nmod_mpoly_t A,const fq_nmod_mpoly_t X,const fmpz_t pow,fq_nmod_mpoly_t T,const fq_nmod_mpoly_ctx_t ctx)28 static int _fq_nmod_mpoly_pmul(fq_nmod_mpoly_t A, const fq_nmod_mpoly_t X,
29             const fmpz_t pow, fq_nmod_mpoly_t T, const fq_nmod_mpoly_ctx_t ctx)
30 {
31     ulong p;
32     FLINT_ASSERT(fmpz_sgn(pow) > 0);
33 
34     if (!fmpz_fits_si(pow))
35     {
36         if (!fq_nmod_mpoly_pow_fmpz(T, X, pow, ctx))
37         {
38             fq_nmod_mpoly_zero(A, ctx);
39             return 0;
40         }
41 
42         fq_nmod_mpoly_mul(A, A, T, ctx);
43         return 1;
44     }
45 
46     p = fmpz_get_ui(pow);
47 
48     if (X->length <= WORD(2) || A->length/p < X->length)
49     {
50         if (!fq_nmod_mpoly_pow_ui(T, X, p, ctx))
51         {
52             fq_nmod_mpoly_zero(A, ctx);
53             return 0;
54         }
55 
56         fq_nmod_mpoly_mul(A, A, T, ctx);
57     }
58     else
59     {
60         while (p >= 1)
61         {
62             fq_nmod_mpoly_mul(T, A, X, ctx);
63             fq_nmod_mpoly_swap(A, T, ctx);
64             p--;
65         }
66     }
67 
68     return 1;
69 }
70 
71 /*
72     evaluate B(xbar) at xbar = C,
73 */
fq_nmod_mpoly_compose_fq_nmod_mpoly_horner(fq_nmod_mpoly_t A,const fq_nmod_mpoly_t B,fq_nmod_mpoly_struct * const * C,const fq_nmod_mpoly_ctx_t ctxB,const fq_nmod_mpoly_ctx_t ctxAC)74 int fq_nmod_mpoly_compose_fq_nmod_mpoly_horner(fq_nmod_mpoly_t A,
75                const fq_nmod_mpoly_t B, fq_nmod_mpoly_struct * const * C,
76                const fq_nmod_mpoly_ctx_t ctxB, const fq_nmod_mpoly_ctx_t ctxAC)
77 {
78     int success = 1;
79     int ret;
80     slong nvars = ctxB->minfo->nvars;
81     slong i, j, k, cur, next, f, r, f_prev, r_prev, v;
82     slong sp, rp;
83     stack_entry_struct * stack;
84     fq_nmod_mpoly_struct * regs;
85     fq_nmod_mpoly_t temp;
86     slong * rtypes;
87     ulong totalcounts, maxcounts;
88     ulong * counts;
89     slong Blen = B->length;
90     slong * Blist;
91     const fq_nmod_struct * Bcoeff = B->coeffs;
92     ulong * Bexp = B->exps;
93     flint_bitcnt_t Bbits = B->bits;
94     slong BN = mpoly_words_per_exp(Bbits, ctxB->minfo);
95     fmpz * Buexp;
96     fmpz * mdegs;
97     fmpz_t score, tz;
98     TMP_INIT;
99 
100     if (Blen == 0)
101     {
102         fq_nmod_mpoly_zero(A, ctxAC);
103         return 1;
104     }
105 
106     FLINT_ASSERT(A != B);
107     FLINT_ASSERT(Blen > 0);
108 
109     TMP_START;
110 
111     fmpz_init(score);
112     fmpz_init(tz);
113 
114     /* unpack B exponents */
115     Buexp = _fmpz_vec_init(nvars*Blen);
116     for (i = 0; i < Blen; i++)
117         mpoly_get_monomial_ffmpz(Buexp + nvars*i, Bexp + BN*i, Bbits, ctxB->minfo);
118 
119     counts = (ulong *) TMP_ALLOC(nvars*sizeof(ulong));
120     mdegs = _fmpz_vec_init(nvars);
121 
122     /* stack */
123     sp = -WORD(1); /* start with empty stack */
124     stack = (stack_entry_struct *) TMP_ALLOC(nvars*(Blen + 1)*sizeof(stack_entry_struct));
125     Blist = (slong *) TMP_ALLOC(Blen*sizeof(slong));
126 
127     /* registers of polynomials */
128     rp = 0;
129     rtypes = (slong *) TMP_ALLOC((nvars + 1)*sizeof(slong));
130     regs   = (fq_nmod_mpoly_struct *) TMP_ALLOC(nvars*sizeof(fq_nmod_mpoly_struct));
131     for (i = 0; i < nvars; i++)
132         fq_nmod_mpoly_init(regs + i, ctxAC);
133     fq_nmod_mpoly_init(temp, ctxAC);
134 
135     /* polynomials will be stored as link lists */
136     for (i = 0; i + 1 < Blen; i++)
137         Blist[i] = i + 1;
138     Blist[i] = -WORD(1);
139 
140     sp++;
141     fmpz_init((stack + sp)->v_exp);
142     (stack + sp)->ret = 0;
143     (stack + sp)->f = 0;
144 
145 HornerForm:
146 
147     f = (stack + sp)->f;
148 
149     FLINT_ASSERT(f != -WORD(1)); /* f is not supposed to be zero */
150 
151     /* obtain a count of the number of terms containing each variable */
152     for (i = 0; i < nvars; i++)
153     {
154         counts[i] = 0;
155         fmpz_set_si(mdegs + i, -WORD(1));
156     }
157 
158     for (j = f; j != -WORD(1); j = Blist[j])
159     {
160         for (i = 0; i < nvars; i++)
161         {
162             if (!fmpz_is_zero(Buexp + nvars*j + i ))
163             {
164                 counts[i]++;
165                 if (fmpz_sgn(mdegs + i) < 0
166                     || fmpz_cmp(mdegs + i, Buexp + nvars*j + i) > 0)
167                 {
168                     fmpz_set(mdegs + i, Buexp + nvars*j + i);
169                 }
170             }
171         }
172     }
173 
174     totalcounts = 0;
175     maxcounts = 0;
176     v = -WORD(1);
177     for (i = 0; i < nvars; i++)
178     {
179         maxcounts = FLINT_MAX(maxcounts, counts[i]);
180         totalcounts += counts[i];
181         if (counts[i] != 0)
182             v = i;
183     }
184 
185     /* handle simple cases */
186     if (totalcounts == 0)
187     {
188         FLINT_ASSERT(Blist[f] == -WORD(1)); /* f should have had only one term */
189         rtypes[rp] = f;
190         goto HornerFormReturn;
191     }
192     else if (totalcounts == 1)
193     {
194         FLINT_ASSERT(!fmpz_is_zero(Buexp + nvars*f + v)); /* this term should not be a scalar */
195         if (!fq_nmod_mpoly_pow_fmpz(regs + rp, C[v], Buexp + nvars*f + v, ctxAC))
196         {
197             success = 0;
198         }
199         fq_nmod_mpoly_scalar_mul_fq_nmod(regs + rp, regs + rp, Bcoeff + f, ctxAC);
200 
201         if (Blist[f] != -WORD(1)) /* if f has a second term */
202         {
203             /* this term should be a scalar */
204             FLINT_ASSERT(fmpz_is_zero(Buexp + nvars*Blist[f] + v));
205             fq_nmod_mpoly_add_fq_nmod(regs + rp, regs + rp,  Bcoeff + Blist[f], ctxAC);
206         }
207 
208         rtypes[rp] = -WORD(1);
209 
210         goto HornerFormReturn;
211     }
212 
213     /* pick best power to pull out */
214     k = 0;
215     if (maxcounts == 1)
216     {
217         fmpz_set_si(score, -WORD(1));
218         for (i = 0; i < nvars; i++)
219         {
220             if (counts[i] == 1 && (fmpz_sgn(score) < 0
221                                    || fmpz_cmp(mdegs + i, score) < 0))
222             {
223                 FLINT_ASSERT(fmpz_sgn(mdegs + i) > 0);
224                 fmpz_set(score, mdegs + i);
225                 k = i;
226             }
227         }
228     }
229     else
230     {
231         fmpz_zero(score);
232         for (i = 0; i < nvars; i++)
233         {
234             if (counts[i] > 1)
235             {
236                 FLINT_ASSERT(fmpz_sgn(mdegs + i) > 0);
237                 fmpz_mul_ui(tz, mdegs + i, counts[i] - 1);
238                 if (fmpz_cmp(tz, score) > 0)
239                 {
240                     fmpz_swap(score, tz);
241                     k = i;
242                 }
243             }
244         }
245     }
246 
247     /* set variable power v */
248     (stack + sp)->v_var = k;
249     fmpz_set((stack + sp)->v_exp, mdegs + k);
250 
251     /* scan f and split into q and v with f = q*v + r then set f = q */
252     r = -WORD(1);
253     cur = f;
254     f_prev = -WORD(1);
255     r_prev = -WORD(1);
256     while (cur != -WORD(1))
257     {
258         next = Blist[cur];
259         if (fmpz_is_zero(Buexp + nvars*cur + k))
260         {
261             if (f_prev == -WORD(1))
262                 f = Blist[cur];
263             else
264                 Blist[f_prev] = Blist[cur];
265 
266             if (r_prev == -WORD(1))
267                 r = cur;
268             else
269                 Blist[r_prev] = cur;
270 
271             Blist[cur] = -WORD(1);
272             r_prev = cur;
273         }
274         else
275         {
276             /* mdegs[k] should be minimum non zero exponent */
277             fmpz_sub(Buexp + nvars*cur + k, Buexp + nvars*cur + k, mdegs + k);
278             FLINT_ASSERT(fmpz_sgn(Buexp + nvars*cur + k) >= 0);
279             f_prev = cur;
280         }
281         cur = next;
282     }
283     (stack + sp)->r = r;
284 
285     /* convert the quotient */
286     sp++;
287     fmpz_init((stack + sp)->v_exp);
288     (stack + sp)->ret = 1;
289     (stack + sp)->f = f;
290     goto HornerForm;
291 
292 HornerForm1:
293 
294     /* convert the remainder */
295     r = (stack + sp)->r;
296     if (r != -WORD(1))
297     {
298         /* remainder is non zero */
299         rp++;
300         FLINT_ASSERT(0 <= rp && rp <= nvars);
301         sp++;
302         fmpz_init((stack + sp)->v_exp);
303         (stack + sp)->ret = 2;
304         (stack + sp)->f = r;
305         goto HornerForm;
306 
307 HornerForm2:
308 
309         if (rtypes[rp - 1] == -WORD(1) && rtypes[rp] == -WORD(1))
310         {
311             /* both quotient and remainder are polynomials */
312             if (!_fq_nmod_mpoly_pmul(regs + rp - 1, C[(stack + sp)->v_var],
313                                              (stack + sp)->v_exp, temp, ctxAC))
314             {
315                 success = 0;
316             }
317             fq_nmod_mpoly_add(temp, regs + rp - 1, regs + rp, ctxAC);
318             fq_nmod_mpoly_swap(temp, regs + rp - 1, ctxAC);
319         }
320         else if (rtypes[rp - 1] == -WORD(1) && rtypes[rp] != -WORD(1))
321         {
322             /* quotient is a polynomial, remainder is a scalar */
323             if (!_fq_nmod_mpoly_pmul(regs + rp - 1, C[(stack + sp)->v_var],
324                                              (stack + sp)->v_exp, temp, ctxAC))
325             {
326                 success = 0;
327             }
328             fq_nmod_mpoly_add_fq_nmod(regs + rp - 1, regs + rp - 1, Bcoeff + rtypes[rp], ctxAC);
329         }
330         else if (rtypes[rp - 1] != -WORD(1) && rtypes[rp] == -WORD(1))
331         {
332             /* quotient is a scalar, remainder is a polynomial */
333             if (!fq_nmod_mpoly_pow_fmpz(temp, C[(stack + sp)->v_var],
334                                                    (stack + sp)->v_exp, ctxAC))
335             {
336                 success = 0;
337             }
338             fq_nmod_mpoly_scalar_mul_fq_nmod(temp, temp, Bcoeff + rtypes[rp - 1], ctxAC);
339             fq_nmod_mpoly_add(regs + rp - 1, temp, regs + rp, ctxAC);
340         }
341         else
342         {
343             /* quotient is a scalar, remainder is a scalar */
344             FLINT_ASSERT(0);    /* this should have been handled by simple case */
345         }
346         rp--;
347         FLINT_ASSERT(0 <= rp && rp <= nvars);
348     }
349     else
350     {
351         /* remainder is zero */
352         FLINT_ASSERT(rtypes[rp] == -WORD(1)); /* quotient is a scalar */
353 
354         /* quotient is a polynomial */
355         if (!_fq_nmod_mpoly_pmul(regs + rp, C[(stack + sp)->v_var],
356                                              (stack + sp)->v_exp, temp, ctxAC))
357         {
358             success = 0;
359         }
360     }
361 
362     rtypes[rp] = -WORD(1);
363 
364 HornerFormReturn:
365 
366     if (!success)
367     {
368         while (sp >= 0)
369         {
370             fmpz_clear((stack + sp)->v_exp);
371             sp--;
372         }
373 
374         goto cleanup;
375     }
376 
377     ret = (stack + sp)->ret;
378     fmpz_clear((stack + sp)->v_exp);
379     sp--;
380     if (ret == 1) goto HornerForm1;
381     if (ret == 2) goto HornerForm2;
382 
383     FLINT_ASSERT(rp == 0);
384     FLINT_ASSERT(sp == -WORD(1));
385 
386     if (rtypes[rp] == -WORD(1))
387     {
388         fq_nmod_mpoly_swap(A, regs + rp, ctxAC);
389     }
390     else
391     {
392         fq_nmod_mpoly_set_fq_nmod(A, Bcoeff + rtypes[rp], ctxAC);
393     }
394 
395 cleanup:
396 
397     for (i = 0; i < nvars; i++)
398         fq_nmod_mpoly_clear(regs + i, ctxAC);
399     fq_nmod_mpoly_clear(temp, ctxAC);
400 
401     fmpz_clear(score);
402     fmpz_clear(tz);
403     _fmpz_vec_clear(mdegs, nvars);
404     _fmpz_vec_clear(Buexp, nvars*Blen);
405 
406     TMP_END;
407 
408     return success;
409 }
410 
411