1 /*
2     Copyright (C) 2019 Daniel Schultz
3 
4     This file is part of FLINT.
5 
6     FLINT is free software: you can redistribute it and/or modify it under
7     the terms of the GNU Lesser General Public License (LGPL) as published
8     by the Free Software Foundation; either version 2.1 of the License, or
9     (at your option) any later version.  See <http://www.gnu.org/licenses/>.
10 */
11 
12 #include "fmpz_mod.h"
13 #include "nmod_mpoly.h"
14 #include "fmpz_mpoly.h"
15 #include "fmpz_mod_mpoly.h"
16 
17 #define LOW_HALF_MASK ((-UWORD(1)) >> (FLINT_BITS - FLINT_BITS/2))
18 
19 /*
20     When interpolating a polynomial f we need degree bounds on f in each
21     variable and substitution degrees in each variable, these are not
22     necessarily the same. We also need to calculate discrete logs.
23 */
mpoly_bma_interpolate_ctx_init(mpoly_bma_interpolate_ctx_t Ictx,slong nvars)24 void mpoly_bma_interpolate_ctx_init(
25     mpoly_bma_interpolate_ctx_t Ictx,
26     slong nvars)
27 {
28     Ictx->degbounds = (slong *) flint_malloc(nvars*sizeof(slong));
29     Ictx->subdegs   = (ulong *) flint_malloc(nvars*sizeof(ulong));
30     fmpz_mod_discrete_log_pohlig_hellman_init(Ictx->dlogenv);
31     nmod_discrete_log_pohlig_hellman_init(Ictx->dlogenv_sp);
32 }
33 
mpoly_bma_interpolate_ctx_clear(mpoly_bma_interpolate_ctx_t Ictx)34 void mpoly_bma_interpolate_ctx_clear(mpoly_bma_interpolate_ctx_t Ictx)
35 {
36     flint_free(Ictx->degbounds);
37     flint_free(Ictx->subdegs);
38     fmpz_mod_discrete_log_pohlig_hellman_clear(Ictx->dlogenv);
39     nmod_discrete_log_pohlig_hellman_clear(Ictx->dlogenv_sp);
40 }
41 
42 /*
43     If I was formed from evaluations at
44         alpha^alphashift, alpha^(alphashift + 1), ...
45     construct the corresponding mpoly if possible with coeffs in (-p/2, p/2]
46     The substitution degrees and degree bounds in Ictx are used.
47 */
nmod_mpoly_bma_get_fmpz_mpoly(fmpz_mpoly_t A,const fmpz_mpoly_ctx_t ctx,ulong alphashift,nmod_berlekamp_massey_t I,const mpoly_bma_interpolate_ctx_t Ictx,const nmodf_ctx_t fpctx)48 int nmod_mpoly_bma_get_fmpz_mpoly(
49     fmpz_mpoly_t A,
50     const fmpz_mpoly_ctx_t ctx,
51     ulong alphashift,
52     nmod_berlekamp_massey_t I,
53     const mpoly_bma_interpolate_ctx_t Ictx,
54     const nmodf_ctx_t fpctx)
55 {
56     slong i, j, t, N;
57     int success;
58     ulong new_exp, this_exp;
59     slong * shifts, * offsets;
60     mp_limb_t * values, * roots;
61     fmpz * Acoeff;
62     ulong * Aexp;
63     slong Alen;
64     mp_limb_t T, S, V, V0, V1, V2, p0, p1, r;
65     TMP_INIT;
66 
67     t = nmod_poly_degree(I->V1);
68     FLINT_ASSERT(I->points->length >= t);
69 
70     if (t <= 0)
71     {
72         return 0;
73     }
74 
75     TMP_START;
76 
77     /* use the rt member of I as temp space for roots - slightly dirty */
78     nmod_poly_fit_length(I->rt, t);
79     I->rt->length = t;
80     roots = I->rt->coeffs;
81     values = I->points->coeffs;
82 
83     success = nmod_poly_find_distinct_nonzero_roots(roots, I->V1);
84     if (!success)
85     {
86         goto cleanup;
87     }
88 
89     FLINT_ASSERT(ctx->minfo->ord == ORD_LEX);
90     FLINT_ASSERT(A->bits <= FLINT_BITS);
91     N = mpoly_words_per_exp_sp(A->bits, ctx->minfo);
92     fmpz_mpoly_fit_length(A, t, ctx);
93     Acoeff = A->coeffs;
94     Aexp = A->exps;
95     A->length = 0;
96 
97     shifts = (slong *) TMP_ALLOC(ctx->minfo->nvars*sizeof(slong));
98     offsets = (slong *) TMP_ALLOC(ctx->minfo->nvars*sizeof(slong));
99     for (j = 0; j < ctx->minfo->nvars; j++)
100     {
101         mpoly_gen_offset_shift_sp(offsets + j, shifts + j, j, A->bits, ctx->minfo);
102     }
103 
104     Alen = 0;
105     for (i = 0; i < t; i++)
106     {
107         /* coeffs[i] is (coeffs(P).values)/P(roots[i]) =: V/S
108             where P(x) = master(x)/(x-roots[i])     */
109         V0 = V1 = V2 = T = S = 0;
110         r = roots[i];
111         for (j = t; j > 0; j--)
112         {
113             T = nmod_add(nmod_mul(r, T, fpctx->mod), I->V1->coeffs[j], fpctx->mod);
114             S = nmod_add(nmod_mul(r, S, fpctx->mod), T, fpctx->mod);
115             umul_ppmm(p1, p0, values[j - 1], T);
116             add_sssaaaaaa(V2, V1, V0, V2, V1, V0, WORD(0), p1, p0);
117         }
118         /* roots[i] should be a root of master */
119         FLINT_ASSERT(nmod_add(nmod_mul(r, T, fpctx->mod), I->V1->coeffs[0], fpctx->mod) == 0);
120         NMOD_RED3(V, V2, V1, V0, fpctx->mod);
121         S = nmod_mul(S, nmod_pow_ui(r, alphashift, fpctx->mod), fpctx->mod);
122         V0 = nmod_mul(V, nmod_inv(S, fpctx->mod), fpctx->mod);
123         if (V0 == 0)
124         {
125             /* hmmm */
126             continue;
127         }
128 
129         fmpz_set_ui(Acoeff + Alen, V0);
130         if (fpctx->mod.n - V0 < V0)
131         {
132             fmpz_sub_ui(Acoeff + Alen, Acoeff + Alen, fpctx->mod.n);
133         }
134 
135         mpoly_monomial_zero(Aexp + N*Alen, N);
136         new_exp = nmod_discrete_log_pohlig_hellman_run(Ictx->dlogenv_sp, roots[i]);
137         for (j = ctx->minfo->nvars - 1; j >= 0; j--)
138         {
139             this_exp = new_exp % Ictx->subdegs[j];
140             new_exp = new_exp / Ictx->subdegs[j];
141             if (this_exp >= Ictx->degbounds[j])
142             {
143                 success = 0;
144                 goto cleanup;
145             }
146             (Aexp + N*Alen)[offsets[j]] |= this_exp << shifts[j];
147         }
148         if (new_exp != 0)
149         {
150             success = 0;
151             goto cleanup;
152         }
153         Alen++;
154     }
155     A->length = Alen;
156 
157     fmpz_mpoly_sort_terms(A, ctx);
158 
159     success = 1;
160 
161 cleanup:
162 
163     TMP_END;
164     return success;
165 }
166 
167 /*
168     nmod_mpoly "skeletons" - just the coefficients
169 */
170 
nmod_mpolyc_init(nmod_mpolyc_t A)171 void nmod_mpolyc_init(nmod_mpolyc_t A)
172 {
173     A->coeffs = NULL;
174     A->alloc = 0;
175     A->length = 0;
176 }
177 
nmod_mpolyc_clear(nmod_mpolyc_t A)178 void nmod_mpolyc_clear(nmod_mpolyc_t A)
179 {
180     if (A->coeffs)
181         flint_free(A->coeffs);
182 }
183 
nmod_mpolyc_fit_length(nmod_mpolyc_t A,slong length)184 void nmod_mpolyc_fit_length(nmod_mpolyc_t A, slong length)
185 {
186     slong old_alloc = A->alloc;
187     slong new_alloc = FLINT_MAX(length, A->alloc + A->alloc/2);
188 
189     if (length > old_alloc)
190     {
191         if (old_alloc == 0)
192         {
193             A->coeffs = (mp_limb_t *) flint_malloc(new_alloc*sizeof(mp_limb_t));
194         }
195         else
196         {
197             A->coeffs = (mp_limb_t *) flint_realloc(A->coeffs, new_alloc*sizeof(mp_limb_t));
198         }
199 
200         A->alloc = new_alloc;
201     }
202 }
203 
nmod_mpolycu_init(nmod_mpolycu_t A)204 void nmod_mpolycu_init(nmod_mpolycu_t A)
205 {
206     A->coeffs = NULL;
207     A->alloc = 0;
208     A->length = 0;
209 }
210 
nmod_mpolycu_clear(nmod_mpolycu_t A)211 void nmod_mpolycu_clear(nmod_mpolycu_t A)
212 {
213     slong i;
214     for (i = 0; i < A->alloc; i++)
215     {
216         nmod_mpolyc_clear(A->coeffs + i);
217     }
218     if (A->coeffs)
219         flint_free(A->coeffs);
220 }
221 
nmod_mpolycu_fit_length(nmod_mpolycu_t A,slong length)222 void nmod_mpolycu_fit_length(nmod_mpolycu_t A, slong length)
223 {
224     slong i;
225     slong old_alloc = A->alloc;
226     slong new_alloc = FLINT_MAX(length, A->alloc + A->alloc/2);
227 
228     if (length > old_alloc)
229     {
230         if (old_alloc == 0)
231         {
232             A->coeffs = (nmod_mpolyc_struct *) flint_malloc(
233                                          new_alloc*sizeof(nmod_mpolyc_struct));
234         }
235         else
236         {
237             A->coeffs = (nmod_mpolyc_struct *) flint_realloc(A->coeffs,
238                                          new_alloc*sizeof(nmod_mpolyc_struct));
239         }
240 
241         for (i = old_alloc; i < new_alloc; i++)
242         {
243             nmod_mpolyc_init(A->coeffs + i);
244         }
245         A->alloc = new_alloc;
246     }
247 }
248 
249 /*
250     fmpz_mod_mpoly "skeletons" - just the coefficients
251 */
252 
fmpz_mpolyc_init(fmpz_mpolyc_t A)253 void fmpz_mpolyc_init(fmpz_mpolyc_t A)
254 {
255     A->coeffs = NULL;
256     A->alloc = 0;
257     A->length = 0;
258 }
259 
fmpz_mpolyc_clear(fmpz_mpolyc_t A)260 void fmpz_mpolyc_clear(fmpz_mpolyc_t A)
261 {
262     slong i;
263     for (i = 0; i < A->alloc; i++)
264     {
265         fmpz_clear(A->coeffs + i);
266     }
267     if (A->coeffs)
268         flint_free(A->coeffs);
269 }
270 
fmpz_mpolyc_fit_length(fmpz_mpolyc_t A,slong length)271 void fmpz_mpolyc_fit_length(fmpz_mpolyc_t A, slong length)
272 {
273     slong i;
274     slong old_alloc = A->alloc;
275     slong new_alloc = FLINT_MAX(length, A->alloc + A->alloc/2);
276 
277     if (length > old_alloc)
278     {
279         if (old_alloc == 0)
280         {
281             A->coeffs = (fmpz *) flint_malloc(new_alloc*sizeof(fmpz));
282         }
283         else
284         {
285             A->coeffs = (fmpz *) flint_realloc(A->coeffs, new_alloc*sizeof(fmpz));
286         }
287 
288         for (i = old_alloc; i < new_alloc; i++)
289         {
290             fmpz_init(A->coeffs + i);
291         }
292         A->alloc = new_alloc;
293     }
294 }
295 
fmpz_mpolycu_init(fmpz_mpolycu_t A)296 void fmpz_mpolycu_init(fmpz_mpolycu_t A)
297 {
298     A->coeffs = NULL;
299     A->alloc = 0;
300     A->length = 0;
301 }
302 
fmpz_mpolycu_clear(fmpz_mpolycu_t A)303 void fmpz_mpolycu_clear(fmpz_mpolycu_t A)
304 {
305     slong i;
306     for (i = 0; i < A->alloc; i++)
307     {
308         fmpz_mpolyc_clear(A->coeffs + i);
309     }
310     if (A->coeffs)
311         flint_free(A->coeffs);
312 }
313 
fmpz_mpolycu_fit_length(fmpz_mpolycu_t A,slong length)314 void fmpz_mpolycu_fit_length(fmpz_mpolycu_t A, slong length)
315 {
316     slong i;
317     slong old_alloc = A->alloc;
318     slong new_alloc = FLINT_MAX(length, A->alloc + A->alloc/2);
319 
320     if (length > old_alloc)
321     {
322         if (old_alloc == 0)
323         {
324             A->coeffs = (fmpz_mpolyc_struct *) flint_malloc(
325                                          new_alloc*sizeof(fmpz_mpolyc_struct));
326         }
327         else
328         {
329             A->coeffs = (fmpz_mpolyc_struct *) flint_realloc(A->coeffs,
330                                          new_alloc*sizeof(fmpz_mpolyc_struct));
331         }
332 
333         for (i = old_alloc; i < new_alloc; i++)
334         {
335             fmpz_mpolyc_init(A->coeffs + i);
336         }
337         A->alloc = new_alloc;
338     }
339 }
340 
341 
342 /*
343     set out to the evaluation of variables after ksub at alpha^w
344 
345     out[0]   = alpha ^ (w * subdegs[n-1] * subdegs[n-2] * ... * * subdegs[1])
346       ...
347     out[n-3] = alpha ^ (w * subdegs[n-1] * subdegs[n-2])
348     out[n-2] = alpha ^ (w * subdegs[n-1])
349     out[n-1] = alpha ^ (w)
350 
351     secret: subdegs[0] is not used
352 */
353 
nmod_mpoly_bma_interpolate_alpha_powers(mp_limb_t * out,ulong w,const mpoly_bma_interpolate_ctx_t Ictx,const fmpz_mpoly_ctx_t ctx,const nmodf_ctx_t fpctx)354 void nmod_mpoly_bma_interpolate_alpha_powers(
355     mp_limb_t * out,
356     ulong w,
357     const mpoly_bma_interpolate_ctx_t Ictx,
358     const fmpz_mpoly_ctx_t ctx,
359     const nmodf_ctx_t fpctx)
360 {
361     slong j = ctx->minfo->nvars - 1;
362     out[j] = nmod_pow_ui(Ictx->dlogenv_sp->alpha, w, fpctx->mod);
363     for (; j > 0; j--)
364     {
365         out[j - 1] = nmod_pow_ui(out[j], Ictx->subdegs[j], fpctx->mod);
366     }
367 }
368 
fmpz_mod_mpoly_bma_interpolate_alpha_powers(fmpz * out,const fmpz_t w,const mpoly_bma_interpolate_ctx_t Ictx,const fmpz_mpoly_ctx_t ctx,const fmpz_mod_ctx_t fpctx)369 void fmpz_mod_mpoly_bma_interpolate_alpha_powers(
370     fmpz * out,
371     const fmpz_t w,
372     const mpoly_bma_interpolate_ctx_t Ictx,
373     const fmpz_mpoly_ctx_t ctx,
374     const fmpz_mod_ctx_t fpctx)
375 {
376     slong j = ctx->minfo->nvars - 1;
377     fmpz_mod_pow_fmpz(out + j, Ictx->dlogenv->alpha, w, fpctx);
378     for (; j > 0; j--)
379     {
380         fmpz_mod_pow_ui(out + j - 1, out + j, Ictx->subdegs[j], fpctx);
381     }
382 }
383 
384 
385 /*
386     Set the coefficients of E to the evaluation of cooresponding monomials of A
387     evaluation at x_i = alpha[i]
388 */
fmpz_mod_mpoly_set_skel(fmpz_mpolyc_t M,const fmpz_mod_mpoly_ctx_t ctx_mp,const fmpz_mpoly_t A,const fmpz * alpha,const fmpz_mpoly_ctx_t ctx)389 void fmpz_mod_mpoly_set_skel(
390     fmpz_mpolyc_t M,
391     const fmpz_mod_mpoly_ctx_t ctx_mp,
392     const fmpz_mpoly_t A,
393     const fmpz * alpha,
394     const fmpz_mpoly_ctx_t ctx)
395 {
396     slong i, j;
397     slong offset, shift;
398     slong N = mpoly_words_per_exp_sp(A->bits, ctx->minfo);
399     slong nvars = ctx->minfo->nvars;
400     ulong * Aexp;
401     fmpz * Mcoeff;
402     slong * LUToffset;
403     ulong * LUTmask;
404     fmpz * LUTvalue;
405     slong LUTlen;
406     fmpz_t xpoweval;
407     ulong * inputexpmask;
408     TMP_INIT;
409 
410     TMP_START;
411 
412     fmpz_init(xpoweval);
413 
414     LUToffset = (slong *) TMP_ALLOC(N*FLINT_BITS*sizeof(slong));
415     LUTmask   = (ulong *) TMP_ALLOC(N*FLINT_BITS*sizeof(ulong));
416     LUTvalue  = (fmpz *) TMP_ALLOC(N*FLINT_BITS*sizeof(fmpz));
417     for (i = 0; i < N*FLINT_BITS; i++)
418     {
419         fmpz_init(LUTvalue + i);
420     }
421 
422     fmpz_mpolyc_fit_length(M, A->length);
423     M->length = A->length;
424 
425     Mcoeff = M->coeffs;
426     Aexp = A->exps;
427 
428     inputexpmask = (ulong *) TMP_ALLOC(N*sizeof(ulong));
429     mpoly_monomial_zero(inputexpmask, N);
430     for (i = 0; i < A->length; i++)
431     {
432         for (j = 0; j < N; j++)
433         {
434             inputexpmask[j] |= (Aexp + N*i)[j];
435         }
436     }
437 
438     LUTlen = 0;
439     for (j = nvars - 1; j >= 0; j--)
440     {
441         mpoly_gen_offset_shift_sp(&offset, &shift, j, A->bits, ctx->minfo);
442 
443         fmpz_set(xpoweval, alpha + j); /* xpoweval = alpha[j]^(2^i) */
444         for (i = 0; i < A->bits; i++)
445         {
446             LUToffset[LUTlen] = offset;
447             LUTmask[LUTlen] = (UWORD(1) << (shift + i));
448             fmpz_set(LUTvalue + LUTlen, xpoweval);
449             if ((inputexpmask[offset] & LUTmask[LUTlen]) != 0)
450             {
451                 LUTlen++;
452             }
453             fmpz_mod_mul(xpoweval, xpoweval, xpoweval, ctx_mp->ffinfo);
454         }
455     }
456     FLINT_ASSERT(LUTlen < N*FLINT_BITS);
457 
458     for (i = 0; i < A->length; i++)
459     {
460         fmpz_one(xpoweval);
461         for (j = 0; j < LUTlen; j++)
462         {
463             if (((Aexp + N*i)[LUToffset[j]] & LUTmask[j]) != 0)
464             {
465                 fmpz_mod_mul(xpoweval, xpoweval, LUTvalue + j, ctx_mp->ffinfo);
466             }
467         }
468         fmpz_set(Mcoeff + i, xpoweval);
469     }
470 
471     fmpz_clear(xpoweval);
472     for (i = 0; i < N*FLINT_BITS; i++)
473     {
474         fmpz_clear(LUTvalue + i);
475     }
476     TMP_END;
477 }
478 
fmpz_mod_mpolyu_set_skel(fmpz_mpolycu_t M,const fmpz_mod_mpoly_ctx_t ctx_mp,const fmpz_mpolyu_t A,const fmpz * alpha,const fmpz_mpoly_ctx_t ctx)479 void fmpz_mod_mpolyu_set_skel(
480     fmpz_mpolycu_t M,
481     const fmpz_mod_mpoly_ctx_t ctx_mp,
482     const fmpz_mpolyu_t A,
483     const fmpz * alpha,
484     const fmpz_mpoly_ctx_t ctx)
485 {
486     slong i;
487     fmpz_mpolycu_fit_length(M, A->length);
488     M->length = A->length;
489     for (i = 0; i < A->length; i++)
490     {
491         fmpz_mod_mpoly_set_skel(M->coeffs + i, ctx_mp, A->coeffs + i, alpha, ctx);
492     }
493 }
494 
495 
496 /* M = S */
fmpz_mod_mpoly_copy_skel(fmpz_mpolyc_t M,const fmpz_mpolyc_t S)497 void fmpz_mod_mpoly_copy_skel(fmpz_mpolyc_t M, const fmpz_mpolyc_t S)
498 {
499     fmpz_mpolyc_fit_length(M, S->length);
500     M->length = S->length;
501     _fmpz_vec_set(M->coeffs, S->coeffs, S->length);
502 }
503 
fmpz_mod_mpolyu_copy_skel(fmpz_mpolycu_t M,const fmpz_mpolycu_t S)504 void fmpz_mod_mpolyu_copy_skel(fmpz_mpolycu_t M, const fmpz_mpolycu_t S)
505 {
506     slong i;
507     fmpz_mpolycu_fit_length(M, S->length);
508     M->length = S->length;
509     for (i = 0; i < S->length; i++)
510     {
511         fmpz_mod_mpoly_copy_skel(M->coeffs + i, S->coeffs + i);
512     }
513 }
514 
515 
516 /* Ared = A mod p */
fmpz_mod_mpoly_red_skel(fmpz_mpolyc_t Ared,const fmpz_mpoly_t A,const fmpz_mod_ctx_t fpctx)517 void fmpz_mod_mpoly_red_skel(
518     fmpz_mpolyc_t Ared,
519     const fmpz_mpoly_t A,
520     const fmpz_mod_ctx_t fpctx)
521 {
522     fmpz_mpolyc_fit_length(Ared, A->length);
523     Ared->length = A->length;
524     _fmpz_vec_scalar_mod_fmpz(Ared->coeffs, A->coeffs, A->length,
525                                                   fmpz_mod_ctx_modulus(fpctx));
526 }
527 
fmpz_mod_mpolyu_red_skel(fmpz_mpolycu_t Ared,const fmpz_mpolyu_t A,const fmpz_mod_ctx_t fpctx)528 void fmpz_mod_mpolyu_red_skel(
529     fmpz_mpolycu_t Ared,
530     const fmpz_mpolyu_t A,
531     const fmpz_mod_ctx_t fpctx)
532 {
533     slong i;
534     fmpz_mpolycu_fit_length(Ared, A->length);
535     Ared->length = A->length;
536     for (i = 0; i < A->length; i++)
537     {
538         fmpz_mod_mpoly_red_skel(Ared->coeffs + i, A->coeffs + i, fpctx);
539     }
540 }
541 
542 
543 /*
544     return Ared.Avar and multiply Avar by Ainc
545     the coefficients of Ared must be reduced mod fpctx->p
546 */
fmpz_mod_mpoly_use_skel_mul(fmpz_t eval,fmpz_mpolyc_t Ared,fmpz_mpolyc_t Avar,const fmpz_mpolyc_t Ainc,const fmpz_mod_ctx_t fpctx)547 void fmpz_mod_mpoly_use_skel_mul(
548     fmpz_t eval,
549     fmpz_mpolyc_t Ared,
550     fmpz_mpolyc_t Avar,
551     const fmpz_mpolyc_t Ainc,
552     const fmpz_mod_ctx_t fpctx)
553 {
554     slong i;
555     fmpz_zero(eval);
556     FLINT_ASSERT(Avar->length == Ared->length);
557     for (i = 0; i < Ared->length; i++)
558     {
559         fmpz_addmul(eval, Ared->coeffs + i, Avar->coeffs + i);
560         fmpz_mod_mul(Avar->coeffs + i, Avar->coeffs + i, Ainc->coeffs + i, fpctx);
561     }
562     fmpz_mod(eval, eval, fmpz_mod_ctx_modulus(fpctx));
563 }
564 
565 
566 /*
567     A is in ZZ[X,Y][x_0, ..., x_(n-1)]
568     E is in Fp[X][Y]
569 
570     Set E to the evaluation of A. The evaluations of A's monomials are in M.
571     M is then multiplied by S.
572 */
fmpz_mod_mpolyuu_use_skel_mul(fmpz_mod_mpolyn_t E,const fmpz_mpolyu_t A,const fmpz_mpolycu_t Ared,fmpz_mpolycu_t M,const fmpz_mpolycu_t S,const fmpz_mod_mpoly_ctx_t ctx_mp)573 void fmpz_mod_mpolyuu_use_skel_mul(
574     fmpz_mod_mpolyn_t E,
575     const fmpz_mpolyu_t A,
576     const fmpz_mpolycu_t Ared,
577     fmpz_mpolycu_t M,
578     const fmpz_mpolycu_t S,
579     const fmpz_mod_mpoly_ctx_t ctx_mp)
580 {
581     slong xexp, yexp;
582     slong i;
583     fmpz_t eval;
584 
585     FLINT_ASSERT(E->bits == FLINT_BITS/2);
586     FLINT_ASSERT(1 == mpoly_words_per_exp_sp(E->bits, ctx_mp->minfo));
587 
588     FLINT_ASSERT(A->length == Ared->length);
589     FLINT_ASSERT(A->length == M->length);
590     FLINT_ASSERT(A->length == S->length);
591 
592     fmpz_init(eval);
593 
594     E->length = 0;
595     for (i = 0; i < A->length; i++)
596     {
597         fmpz_mod_mpoly_use_skel_mul(eval, Ared->coeffs + i, M->coeffs + i,
598                                                 S->coeffs + i, ctx_mp->ffinfo);
599         if (fmpz_is_zero(eval))
600         {
601             continue;
602         }
603 
604         xexp = A->exps[i] >> (FLINT_BITS/2);
605         yexp = A->exps[i] & LOW_HALF_MASK;
606 
607         if (E->length > 0 && (E->exps[E->length - 1] >> (FLINT_BITS/2)) == xexp)
608         {
609             fmpz_mod_poly_set_coeff_fmpz(E->coeffs + E->length - 1, yexp, eval);
610         }
611         else
612         {
613             fmpz_mod_mpolyn_fit_length(E, E->length + 1, ctx_mp);
614             fmpz_mod_poly_zero(E->coeffs + E->length);
615             fmpz_mod_poly_set_coeff_fmpz(E->coeffs + E->length, yexp, eval);
616             E->exps[E->length] = xexp << (FLINT_BITS/2);
617             E->length++;
618         }
619     }
620 
621     fmpz_clear(eval);
622 }
623 
624 
625 
fmpz_mod_bma_get_fmpz_mpoly(fmpz_mpoly_t A,const fmpz_mpoly_ctx_t ctx,const fmpz_t alphashift,fmpz_mod_berlekamp_massey_t I,const mpoly_bma_interpolate_ctx_t Ictx,const fmpz_mod_ctx_t fpctx)626 int fmpz_mod_bma_get_fmpz_mpoly(
627     fmpz_mpoly_t A,
628     const fmpz_mpoly_ctx_t ctx,
629     const fmpz_t alphashift,
630     fmpz_mod_berlekamp_massey_t I,
631     const mpoly_bma_interpolate_ctx_t Ictx,
632     const fmpz_mod_ctx_t fpctx)
633 {
634     slong i, j, t, N;
635     int success;
636     ulong this_exp;
637     fmpz_t new_exp;
638     slong * shifts, * offsets;
639     fmpz * values, * roots;
640     fmpz * Acoeff;
641     ulong * Aexp;
642     slong Alen;
643     fmpz_t T, S, V, temp, halfp;
644     TMP_INIT;
645 
646     TMP_START;
647 
648     fmpz_init(halfp);
649     fmpz_init(T);
650     fmpz_init(S);
651     fmpz_init(V);
652     fmpz_init(temp);
653     fmpz_init(new_exp);
654 
655     fmpz_tdiv_q_2exp(halfp, fmpz_mod_ctx_modulus(fpctx), 1);
656 
657     fmpz_mod_berlekamp_massey_reduce(I);
658     t = fmpz_mod_poly_degree(I->V1);
659     FLINT_ASSERT(t > 0);
660     FLINT_ASSERT(I->points->length >= t);
661 
662     fmpz_mod_poly_fit_length(I->rt, t);
663     I->rt->length = t;
664     success = fmpz_mod_poly_find_distinct_nonzero_roots(I->rt->coeffs, I->V1);
665     if (!success)
666     {
667         goto cleanup;
668     }
669 
670     roots = I->rt->coeffs;
671     values = I->points->coeffs;
672 
673     FLINT_ASSERT(ctx->minfo->ord == ORD_LEX);
674     FLINT_ASSERT(A->bits <= FLINT_BITS);
675     N = mpoly_words_per_exp_sp(A->bits, ctx->minfo);
676     fmpz_mpoly_fit_length(A, t, ctx);
677     Acoeff = A->coeffs;
678     Aexp = A->exps;
679     A->length = 0;
680 
681     shifts = (slong *) TMP_ALLOC(ctx->minfo->nvars*sizeof(slong));
682     offsets = (slong *) TMP_ALLOC(ctx->minfo->nvars*sizeof(slong));
683     for (j = 0; j < ctx->minfo->nvars; j++)
684     {
685         mpoly_gen_offset_shift_sp(offsets + j, shifts + j, j, A->bits, ctx->minfo);
686     }
687 
688     Alen = 0;
689     for (i = 0; i < t; i++)
690     {
691         /* coeffs[i] is (coeffs(P).values)/P(roots[i]) =: V/S
692             where P(x) = master(x)/(x-roots[i])     */
693         fmpz_zero(V);
694         fmpz_zero(T);
695         fmpz_zero(S);
696         for (j = t; j > 0; j--)
697         {
698             fmpz_mod_mul(temp, roots + i, T, fpctx);
699             fmpz_mod_add(T, temp, I->V1->coeffs + j, fpctx);
700             fmpz_mod_mul(temp, roots + i, S, fpctx);
701             fmpz_mod_add(S, temp, T, fpctx);
702             fmpz_mod_mul(temp, values + j - 1, T, fpctx);
703             fmpz_mod_add(V, V, temp, fpctx);
704         }
705         /* roots[i] should be a root of master */
706 #if WANT_ASSERT
707         fmpz_mod_mul(temp, roots + i, T, fpctx);
708         fmpz_mod_add(temp, temp, I->V1->coeffs + 0, fpctx);
709         FLINT_ASSERT(fmpz_is_zero(temp));
710 #endif
711         fmpz_mod_pow_fmpz(temp, roots + i, alphashift, fpctx);
712         fmpz_mod_mul(S, S, temp, fpctx);
713         fmpz_mod_inv(temp, S, fpctx);
714         fmpz_mod_mul(Acoeff + Alen, V, temp, fpctx);
715         if (fmpz_is_zero(Acoeff + Alen))
716         {
717             /* hmmm */
718             continue;
719         }
720 
721         if (fmpz_cmp(Acoeff + Alen, halfp) > 0)
722         {
723             fmpz_sub(Acoeff + Alen, Acoeff + Alen, fmpz_mod_ctx_modulus(fpctx));
724         }
725 
726 
727         mpoly_monomial_zero(Aexp + N*Alen, N);
728         fmpz_mod_discrete_log_pohlig_hellman_run(new_exp, Ictx->dlogenv, roots + i);
729         for (j = ctx->minfo->nvars - 1; j >= 0; j--)
730         {
731             this_exp = fmpz_fdiv_ui(new_exp, Ictx->subdegs[j]);
732             fmpz_fdiv_q_ui(new_exp, new_exp, Ictx->subdegs[j]);
733             if (this_exp >= Ictx->degbounds[j])
734             {
735                 success = 0;
736                 goto cleanup;
737             }
738             (Aexp + N*Alen)[offsets[j]] |= this_exp << shifts[j];
739         }
740         if (!fmpz_is_zero(new_exp))
741         {
742             success = 0;
743             goto cleanup;
744         }
745         Alen++;
746     }
747     A->length = Alen;
748 
749     fmpz_mpoly_sort_terms(A, ctx);
750 
751     success = 1;
752 
753 cleanup:
754 
755     fmpz_clear(T);
756     fmpz_clear(S);
757     fmpz_clear(V);
758     fmpz_clear(temp);
759     fmpz_clear(halfp);
760 
761     TMP_END;
762     return success;
763 }
764 
765 
766 
nmod_bma_mpoly_init(nmod_bma_mpoly_t A)767 void nmod_bma_mpoly_init(nmod_bma_mpoly_t A)
768 {
769     A->length = 0;
770     A->alloc = 0;
771     A->exps = NULL;
772     A->coeffs = NULL;
773     A->pointcount = 0;
774 }
775 
nmod_bma_mpoly_reset_prime(nmod_bma_mpoly_t A,const nmodf_ctx_t fpctx)776 void nmod_bma_mpoly_reset_prime(
777     nmod_bma_mpoly_t A,
778     const nmodf_ctx_t fpctx)
779 {
780     slong i;
781     for (i = 0; i < A->alloc; i++)
782     {
783         nmod_berlekamp_massey_set_prime(A->coeffs + i, fpctx->mod.n);
784     }
785 }
786 
787 
nmod_bma_mpoly_clear(nmod_bma_mpoly_t A)788 void nmod_bma_mpoly_clear(nmod_bma_mpoly_t A)
789 {
790     slong i;
791     for (i = 0; i < A->alloc; i++)
792     {
793         nmod_berlekamp_massey_clear(A->coeffs + i);
794     }
795 
796     if (A->exps)
797         flint_free(A->exps);
798     if (A->coeffs)
799         flint_free(A->coeffs);
800 }
801 
nmod_bma_mpoly_print(const nmod_bma_mpoly_t A)802 void nmod_bma_mpoly_print(const nmod_bma_mpoly_t A)
803 {
804     slong i;
805     flint_printf("0");
806     for (i = 0; i < A->length; i++)
807     {
808         flint_printf(" + [");
809         nmod_berlekamp_massey_print(A->coeffs + i);
810         flint_printf("]*X^%wd*Y^%wd",
811                  A->exps[i] >> (FLINT_BITS/2),
812                  A->exps[i] & ((-UWORD(1)) >> (FLINT_BITS/2)));
813     }
814 }
815 
816 
nmod_bma_mpoly_fit_length(nmod_bma_mpoly_t A,slong length,const nmodf_ctx_t fpctx)817 void nmod_bma_mpoly_fit_length(
818     nmod_bma_mpoly_t A,
819     slong length,
820     const nmodf_ctx_t fpctx)
821 {
822     slong i;
823     slong old_alloc = A->alloc;
824     slong new_alloc = FLINT_MAX(length, A->alloc + A->alloc/2);
825 
826     if (length > old_alloc)
827     {
828         if (old_alloc == 0)
829         {
830             A->exps = (ulong *) flint_malloc(new_alloc*sizeof(ulong));
831             A->coeffs = (nmod_berlekamp_massey_struct *) flint_malloc(
832                                new_alloc*sizeof(nmod_berlekamp_massey_struct));
833         }
834         else
835         {
836             A->exps = (ulong *) flint_realloc(A->exps, new_alloc*sizeof(ulong));
837             A->coeffs = (nmod_berlekamp_massey_struct *) flint_realloc(A->coeffs,
838                                new_alloc*sizeof(nmod_berlekamp_massey_struct));
839         }
840 
841         for (i = old_alloc; i < new_alloc; i++)
842         {
843             nmod_berlekamp_massey_init(A->coeffs + i, fpctx->mod.n);
844         }
845         A->alloc = new_alloc;
846     }
847 }
848 
nmod_bma_mpoly_zero(nmod_bma_mpoly_t L)849 void nmod_bma_mpoly_zero(nmod_bma_mpoly_t L)
850 {
851     L->length = 0;
852     L->pointcount = 0;
853 }
854 
855 
nmod_bma_mpoly_reduce(nmod_bma_mpoly_t L)856 int nmod_bma_mpoly_reduce(nmod_bma_mpoly_t L)
857 {
858     slong i;
859     int changed;
860 
861     changed = 0;
862 
863     for (i = 0; i < L->length; i++)
864     {
865         FLINT_ASSERT(L->pointcount == nmod_berlekamp_massey_point_count(L->coeffs + i));
866         changed |= nmod_berlekamp_massey_reduce(L->coeffs + i);
867     }
868 
869     return changed;
870 }
871 
nmod_bma_mpoly_add_point(nmod_bma_mpoly_t L,const nmod_mpolyn_t A,const nmod_mpoly_ctx_t ctx_sp)872 void nmod_bma_mpoly_add_point(
873     nmod_bma_mpoly_t L,
874     const nmod_mpolyn_t A,
875     const nmod_mpoly_ctx_t ctx_sp)
876 {
877     slong j;
878     slong Alen = A->length;
879     nmod_poly_struct * Acoeff = A->coeffs;
880     slong Li, Ai, ai;
881     ulong Aexp;
882     nmod_berlekamp_massey_struct * Lcoeff;
883     slong Llen;
884     ulong * Lexp;
885 
886     if (L->length == 0)
887     {
888         slong tot = 0;
889         for (Ai = 0; Ai < Alen; Ai++)
890         {
891             for (ai = (Acoeff + Ai)->length - 1; ai >= 0; ai--)
892             {
893                 tot += (0 != (Acoeff + Ai)->coeffs[ai]);
894             }
895         }
896         nmod_bma_mpoly_fit_length(L, tot, ctx_sp->ffinfo);
897     }
898 
899     Lcoeff = L->coeffs;
900     Llen = L->length;
901     Lexp = L->exps;
902 
903     Li = 0;
904     Ai = ai = 0;
905     Aexp = 0;
906     if (Ai < Alen)
907     {
908         ai = nmod_poly_degree(A->coeffs + Ai);
909         FLINT_ASSERT(FLINT_BIT_COUNT(ai) < FLINT_BITS/2);
910         FLINT_ASSERT(0 == (A->exps[Ai] & LOW_HALF_MASK));
911         Aexp = A->exps[Ai] + ai;
912     }
913 
914     while (Li < Llen || Ai < Alen)
915     {
916         if (Li < Llen && Ai < Alen && Lexp[Li] == Aexp)
917         {
918             /* L term present, A term present */
919 add_same_exp:
920             nmod_berlekamp_massey_add_point(Lcoeff + Li, (Acoeff + Ai)->coeffs[ai]);
921             Li++;
922             do {
923                 ai--;
924             } while (ai >= 0 && (0 == (Acoeff + Ai)->coeffs[ai]));
925             if (ai < 0)
926             {
927                 Ai++;
928                 if (Ai < Alen)
929                 {
930                     ai = nmod_poly_degree(A->coeffs + Ai);
931                     FLINT_ASSERT(FLINT_BIT_COUNT(ai) < FLINT_BITS/2);
932                     FLINT_ASSERT(0 == (A->exps[Ai] & LOW_HALF_MASK));
933                     Aexp = A->exps[Ai] + ai;
934                 }
935             }
936             else
937             {
938                 FLINT_ASSERT(Ai < Alen);
939                 FLINT_ASSERT(FLINT_BIT_COUNT(ai) < FLINT_BITS/2);
940                 FLINT_ASSERT(0 == (A->exps[Ai] & LOW_HALF_MASK));
941                 Aexp = A->exps[Ai] + ai;
942             }
943         }
944         else if (Li < Llen && (Ai >= Alen || Lexp[Li] > Aexp))
945         {
946             /* L term present, A term missing */
947             nmod_berlekamp_massey_add_zeros(Lcoeff + Li, 1);
948             Li++;
949         }
950         else
951         {
952             /* L term missing, A term present */
953             FLINT_ASSERT(Ai < Alen && (Li >= Llen || Lexp[Li] < Aexp));
954             {
955                 ulong texp;
956                 nmod_berlekamp_massey_struct tcoeff;
957 
958                 nmod_bma_mpoly_fit_length(L, Llen + 1, ctx_sp->ffinfo);
959                 Lcoeff = L->coeffs;
960                 Lexp = L->exps;
961 
962                 texp = Lexp[Llen];
963                 tcoeff = Lcoeff[Llen];
964                 for (j = Llen - 1; j >= Li; j--)
965                 {
966                     Lexp[j + 1] = Lexp[j];
967                     Lcoeff[j + 1] = Lcoeff[j];
968                 }
969                 Lexp[Li] = texp;
970                 Lcoeff[Li] = tcoeff;
971             }
972 
973             nmod_berlekamp_massey_start_over(Lcoeff + Li);
974             nmod_berlekamp_massey_add_zeros(Lcoeff + Li, L->pointcount);
975             Lexp[Li] = Aexp;
976             Llen++;
977             L->length = Llen;
978 
979             goto add_same_exp;
980         }
981     }
982 
983     L->pointcount++;
984 }
985 
nmod_bma_mpoly_get_fmpz_mpolyu(fmpz_mpolyu_t A,const fmpz_mpoly_ctx_t ctx,ulong alphashift,const nmod_bma_mpoly_t L,const mpoly_bma_interpolate_ctx_t Ictx,const nmodf_ctx_t fpctx)986 int nmod_bma_mpoly_get_fmpz_mpolyu(
987     fmpz_mpolyu_t A,
988     const fmpz_mpoly_ctx_t ctx,
989     ulong alphashift,
990     const nmod_bma_mpoly_t L,
991     const mpoly_bma_interpolate_ctx_t Ictx,
992     const nmodf_ctx_t fpctx)
993 {
994     int success;
995     slong i;
996 
997     fmpz_mpolyu_fit_length(A, L->length, ctx);
998     A->length = 0;
999     for (i = 0; i < L->length; i++)
1000     {
1001         A->exps[A->length] = L->exps[i];
1002         success = nmod_mpoly_bma_get_fmpz_mpoly(A->coeffs + A->length, ctx,
1003                                        alphashift, L->coeffs + i, Ictx, fpctx);
1004         if (!success)
1005         {
1006             return 0;
1007         }
1008         A->length += !fmpz_mpoly_is_zero(A->coeffs + A->length, ctx);
1009     }
1010     return 1;
1011 }
1012 
1013 
1014 
fmpz_mod_bma_mpoly_init(fmpz_mod_bma_mpoly_t A)1015 void fmpz_mod_bma_mpoly_init(fmpz_mod_bma_mpoly_t A)
1016 {
1017     A->length = 0;
1018     A->alloc = 0;
1019     A->exps = NULL;
1020     A->coeffs = NULL;
1021     A->pointcount = 0;
1022 }
1023 
fmpz_mod_bma_mpoly_reset_prime(fmpz_mod_bma_mpoly_t A,const fmpz_mod_ctx_t fpctx)1024 void fmpz_mod_bma_mpoly_reset_prime(
1025     fmpz_mod_bma_mpoly_t A,
1026     const fmpz_mod_ctx_t fpctx)
1027 {
1028     slong i;
1029     for (i = 0; i < A->alloc; i++)
1030     {
1031         fmpz_mod_berlekamp_massey_set_prime(A->coeffs + i, fmpz_mod_ctx_modulus(fpctx));
1032     }
1033 }
1034 
1035 
fmpz_mod_bma_mpoly_clear(fmpz_mod_bma_mpoly_t A)1036 void fmpz_mod_bma_mpoly_clear(fmpz_mod_bma_mpoly_t A)
1037 {
1038     slong i;
1039     for (i = 0; i < A->alloc; i++)
1040     {
1041         fmpz_mod_berlekamp_massey_clear(A->coeffs + i);
1042     }
1043 
1044     if (A->exps)
1045         flint_free(A->exps);
1046     if (A->coeffs)
1047         flint_free(A->coeffs);
1048 }
1049 
fmpz_mod_bma_mpoly_print(fmpz_mod_bma_mpoly_t A,const mpoly_bma_interpolate_ctx_t Ictx)1050 void fmpz_mod_bma_mpoly_print(
1051     fmpz_mod_bma_mpoly_t A,
1052     const mpoly_bma_interpolate_ctx_t Ictx)
1053 {
1054     slong i;
1055     flint_printf("0");
1056     for (i = 0; i < A->length; i++)
1057     {
1058         flint_printf(" + [");
1059         fmpz_mod_berlekamp_massey_print(A->coeffs + i);
1060         flint_printf("]*X^%wd*Y^%wd",
1061                  A->exps[i] >> (FLINT_BITS/2),
1062                  A->exps[i] & ((-UWORD(1)) >> (FLINT_BITS - FLINT_BITS/2)));
1063     }
1064 }
1065 
1066 
fmpz_mod_bma_mpoly_fit_length(fmpz_mod_bma_mpoly_t A,slong length,const fmpz_mod_ctx_t fpctx)1067 void fmpz_mod_bma_mpoly_fit_length(
1068     fmpz_mod_bma_mpoly_t A,
1069     slong length,
1070     const fmpz_mod_ctx_t fpctx)
1071 {
1072     slong i;
1073     slong old_alloc = A->alloc;
1074     slong new_alloc = FLINT_MAX(length, A->alloc + A->alloc/2);
1075 
1076     if (length > old_alloc)
1077     {
1078         if (old_alloc == 0)
1079         {
1080             A->exps = (ulong *) flint_malloc(new_alloc*sizeof(ulong));
1081             A->coeffs = (fmpz_mod_berlekamp_massey_struct *) flint_malloc(
1082                            new_alloc*sizeof(fmpz_mod_berlekamp_massey_struct));
1083         }
1084         else
1085         {
1086             A->exps = (ulong *) flint_realloc(A->exps, new_alloc*sizeof(ulong));
1087             A->coeffs = (fmpz_mod_berlekamp_massey_struct *) flint_realloc(A->coeffs,
1088                            new_alloc*sizeof(fmpz_mod_berlekamp_massey_struct));
1089         }
1090 
1091         for (i = old_alloc; i < new_alloc; i++)
1092         {
1093             fmpz_mod_berlekamp_massey_init(A->coeffs + i, fmpz_mod_ctx_modulus(fpctx));
1094         }
1095         A->alloc = new_alloc;
1096     }
1097 }
1098 
fmpz_mod_bma_mpoly_zero(fmpz_mod_bma_mpoly_t L)1099 void fmpz_mod_bma_mpoly_zero(fmpz_mod_bma_mpoly_t L)
1100 {
1101     L->length = 0;
1102     L->pointcount = 0;
1103 }
1104 
fmpz_mod_bma_mpoly_reduce(fmpz_mod_bma_mpoly_t L)1105 int fmpz_mod_bma_mpoly_reduce(fmpz_mod_bma_mpoly_t L)
1106 {
1107     slong i;
1108     int changed;
1109 
1110     changed = 0;
1111 
1112     for (i = 0; i < L->length; i++)
1113     {
1114         FLINT_ASSERT(L->pointcount == fmpz_mod_berlekamp_massey_point_count(L->coeffs + i));
1115         changed |= fmpz_mod_berlekamp_massey_reduce(L->coeffs + i);
1116     }
1117 
1118     return changed;
1119 }
1120 
fmpz_mod_bma_mpoly_add_point(fmpz_mod_bma_mpoly_t L,const fmpz_mod_mpolyn_t A,const fmpz_mod_mpoly_ctx_t ctx_mp)1121 void fmpz_mod_bma_mpoly_add_point(
1122     fmpz_mod_bma_mpoly_t L,
1123     const fmpz_mod_mpolyn_t A,
1124     const fmpz_mod_mpoly_ctx_t ctx_mp)
1125 {
1126     slong j;
1127     slong Alen = A->length;
1128     fmpz_mod_poly_struct * Acoeff = A->coeffs;
1129     slong Li, Ai, ai;
1130     ulong Aexp;
1131     fmpz_mod_berlekamp_massey_struct * Lcoeff;
1132     slong Llen;
1133     ulong * Lexp;
1134 
1135     if (L->length == 0)
1136     {
1137         slong tot = 0;
1138         for (Ai = 0; Ai < Alen; Ai++)
1139         {
1140             for (ai = (Acoeff + Ai)->length - 1; ai >= 0; ai--)
1141             {
1142                 tot += !fmpz_is_zero((Acoeff + Ai)->coeffs + ai);
1143             }
1144         }
1145         fmpz_mod_bma_mpoly_fit_length(L, tot, ctx_mp->ffinfo);
1146     }
1147 
1148     Lcoeff = L->coeffs;
1149     Llen = L->length;
1150     Lexp = L->exps;
1151 
1152     Li = 0;
1153     Ai = ai = 0;
1154     Aexp = 0;
1155     if (Ai < Alen)
1156     {
1157         ai = fmpz_mod_poly_degree(A->coeffs + Ai);
1158         FLINT_ASSERT(FLINT_BIT_COUNT(ai) < FLINT_BITS/2);
1159         FLINT_ASSERT(0 == (A->exps[Ai] & LOW_HALF_MASK));
1160         Aexp = A->exps[Ai] + ai;
1161     }
1162 
1163     while (Li < Llen || Ai < Alen)
1164     {
1165         if (Li < Llen && Ai < Alen && Lexp[Li] == Aexp)
1166         {
1167             /* L term present, A term present */
1168 add_same_exp:
1169             fmpz_mod_berlekamp_massey_add_point(Lcoeff + Li,
1170                                                    (Acoeff + Ai)->coeffs + ai);
1171             Li++;
1172             do {
1173                 ai--;
1174             } while (ai >= 0 && fmpz_is_zero((Acoeff + Ai)->coeffs + ai));
1175             if (ai < 0)
1176             {
1177                 Ai++;
1178                 if (Ai < Alen)
1179                 {
1180                     ai = fmpz_mod_poly_degree(A->coeffs + Ai);
1181                     FLINT_ASSERT(FLINT_BIT_COUNT(ai) < FLINT_BITS/2);
1182                     FLINT_ASSERT(0 == (A->exps[Ai] & LOW_HALF_MASK));
1183                     Aexp = A->exps[Ai] + ai;
1184                 }
1185             }
1186             else
1187             {
1188                 FLINT_ASSERT(Ai < Alen);
1189                 FLINT_ASSERT(FLINT_BIT_COUNT(ai) < FLINT_BITS/2);
1190                 FLINT_ASSERT(0 == (A->exps[Ai] & LOW_HALF_MASK));
1191                 Aexp = A->exps[Ai] + ai;
1192             }
1193         }
1194         else if (Li < Llen && (Ai >= Alen || Lexp[Li] > Aexp))
1195         {
1196             /* L term present, A term missing */
1197             fmpz_mod_berlekamp_massey_add_zeros(Lcoeff + Li, 1);
1198             Li++;
1199         }
1200         else
1201         {
1202             /* L term missing, A term present */
1203             FLINT_ASSERT(Ai < Alen && (Li >= Llen || Lexp[Li] < Aexp));
1204             {
1205                 ulong texp;
1206                 fmpz_mod_berlekamp_massey_struct tcoeff;
1207 
1208                 fmpz_mod_bma_mpoly_fit_length(L, Llen + 1, ctx_mp->ffinfo);
1209                 Lcoeff = L->coeffs;
1210                 Lexp = L->exps;
1211 
1212                 texp = Lexp[Llen];
1213                 tcoeff = Lcoeff[Llen];
1214                 for (j = Llen - 1; j >= Li; j--)
1215                 {
1216                     Lexp[j + 1] = Lexp[j];
1217                     Lcoeff[j + 1] = Lcoeff[j];
1218                 }
1219                 Lexp[Li] = texp;
1220                 Lcoeff[Li] = tcoeff;
1221             }
1222 
1223             fmpz_mod_berlekamp_massey_start_over(Lcoeff + Li);
1224             fmpz_mod_berlekamp_massey_add_zeros(Lcoeff + Li, L->pointcount);
1225             Lexp[Li] = Aexp;
1226             Llen++;
1227             L->length = Llen;
1228             goto add_same_exp;
1229         }
1230     }
1231 
1232     L->pointcount++;
1233 }
1234 
fmpz_mod_bma_mpoly_get_fmpz_mpolyu(fmpz_mpolyu_t A,const fmpz_mpoly_ctx_t ctx,const fmpz_t alphashift,const fmpz_mod_bma_mpoly_t L,const mpoly_bma_interpolate_ctx_t Ictx,const fmpz_mod_ctx_t fpctx)1235 int fmpz_mod_bma_mpoly_get_fmpz_mpolyu(
1236     fmpz_mpolyu_t A,
1237     const fmpz_mpoly_ctx_t ctx,
1238     const fmpz_t alphashift,
1239     const fmpz_mod_bma_mpoly_t L,
1240     const mpoly_bma_interpolate_ctx_t Ictx,
1241     const fmpz_mod_ctx_t fpctx)
1242 {
1243     int success;
1244     slong i;
1245 
1246     fmpz_mpolyu_fit_length(A, L->length, ctx);
1247     A->length = 0;
1248     for (i = 0; i < L->length; i++)
1249     {
1250         A->exps[A->length] = L->exps[i];
1251         success = fmpz_mod_bma_get_fmpz_mpoly(A->coeffs + A->length, ctx,
1252                                        alphashift, L->coeffs + i, Ictx, fpctx);
1253         if (!success)
1254         {
1255             return 0;
1256         }
1257         A->length += !fmpz_mpoly_is_zero(A->coeffs + A->length, ctx);
1258     }
1259     return 1;
1260 }
1261 
1262 
1263 
fmpz_mod_mpolyn_bidegree(const fmpz_mod_mpolyn_t A)1264 ulong fmpz_mod_mpolyn_bidegree(const fmpz_mod_mpolyn_t A)
1265 {
1266     ulong degx, degy;
1267 
1268     FLINT_ASSERT(A->length > 0);
1269     FLINT_ASSERT(A->bits == FLINT_BITS/2);
1270 
1271     degx = (A->exps + 1*0)[0] >> (FLINT_BITS/2);
1272     degy = fmpz_mod_poly_degree(A->coeffs + 0);
1273 
1274     FLINT_ASSERT(FLINT_BIT_COUNT(degx) < FLINT_BITS/2);
1275     FLINT_ASSERT(FLINT_BIT_COUNT(degy) < FLINT_BITS/2);
1276 
1277     return (degx << (FLINT_BITS/2)) + degy;
1278 }
1279 
nmod_mpolyn_bidegree(const nmod_mpolyn_t A)1280 ulong nmod_mpolyn_bidegree(const nmod_mpolyn_t A)
1281 {
1282     ulong degx, degy;
1283 
1284     FLINT_ASSERT(A->length > 0);
1285     FLINT_ASSERT(A->bits == FLINT_BITS/2);
1286 
1287     degx = A->exps[0] >> (FLINT_BITS/2);
1288     degy = nmod_poly_degree(A->coeffs + 0);
1289 
1290     FLINT_ASSERT(FLINT_BIT_COUNT(degx) < FLINT_BITS/2);
1291     FLINT_ASSERT(FLINT_BIT_COUNT(degy) < FLINT_BITS/2);
1292 
1293     return (degx << (FLINT_BITS/2)) + degy;
1294 }
1295 
1296 
fmpz_mpoly_eval_fmpz_mod(fmpz_t eval,const fmpz_mod_ctx_t fpctx,const fmpz_mpoly_t A,const fmpz * alpha,const fmpz_mpoly_ctx_t ctx)1297 void fmpz_mpoly_eval_fmpz_mod(
1298     fmpz_t eval,
1299     const fmpz_mod_ctx_t fpctx,
1300     const fmpz_mpoly_t A,
1301     const fmpz * alpha,
1302     const fmpz_mpoly_ctx_t ctx)
1303 {
1304     slong i, j;
1305     slong offset, shift;
1306     slong N = mpoly_words_per_exp_sp(A->bits, ctx->minfo);
1307     slong nvars = ctx->minfo->nvars;
1308     ulong * Aexp;
1309     slong * LUToffset;
1310     ulong * LUTmask;
1311     fmpz * LUTvalue;
1312     slong LUTlen;
1313     fmpz_t xpoweval;
1314     ulong * inputexpmask;
1315     TMP_INIT;
1316 
1317     TMP_START;
1318 
1319     fmpz_init(xpoweval);
1320 
1321     LUToffset = (slong *) TMP_ALLOC(N*FLINT_BITS*sizeof(slong));
1322     LUTmask   = (ulong *) TMP_ALLOC(N*FLINT_BITS*sizeof(ulong));
1323     LUTvalue  = (fmpz *) TMP_ALLOC(N*FLINT_BITS*sizeof(fmpz));
1324     for (i = 0; i < N*FLINT_BITS; i++)
1325     {
1326         fmpz_init(LUTvalue + i);
1327     }
1328 
1329     Aexp = A->exps;
1330 
1331     inputexpmask = (ulong *) TMP_ALLOC(N*sizeof(ulong));
1332     mpoly_monomial_zero(inputexpmask, N);
1333     for (i = 0; i < A->length; i++)
1334     {
1335         for (j = 0; j < N; j++)
1336         {
1337             inputexpmask[j] |= (Aexp + N*i)[j];
1338         }
1339     }
1340 
1341     LUTlen = 0;
1342     for (j = nvars - 1; j >= 0; j--)
1343     {
1344         mpoly_gen_offset_shift_sp(&offset, &shift, j, A->bits, ctx->minfo);
1345 
1346         fmpz_set(xpoweval, alpha + j); /* xpoweval = alpha[i]^(2^i) */
1347         for (i = 0; i < A->bits; i++)
1348         {
1349             LUToffset[LUTlen] = offset;
1350             LUTmask[LUTlen] = (UWORD(1) << (shift + i));
1351             fmpz_set(LUTvalue + LUTlen, xpoweval);
1352             if ((inputexpmask[offset] & LUTmask[LUTlen]) != 0)
1353             {
1354                 LUTlen++;
1355             }
1356             fmpz_mod_mul(xpoweval, xpoweval, xpoweval, fpctx);
1357         }
1358     }
1359     FLINT_ASSERT(LUTlen < N*FLINT_BITS);
1360 
1361     fmpz_zero(eval);
1362     for (i = 0; i < A->length; i++)
1363     {
1364         fmpz_mod(xpoweval, A->coeffs + i, fmpz_mod_ctx_modulus(fpctx));
1365         for (j = 0; j < LUTlen; j++)
1366         {
1367             if (((Aexp + N*i)[LUToffset[j]] & LUTmask[j]) != 0)
1368             {
1369                 fmpz_mod_mul(xpoweval, xpoweval, LUTvalue + j, fpctx);
1370             }
1371         }
1372         fmpz_mod_add(eval, eval, xpoweval, fpctx);
1373     }
1374 
1375     fmpz_clear(xpoweval);
1376     for (i = 0; i < N*FLINT_BITS; i++)
1377     {
1378         fmpz_clear(LUTvalue + i);
1379     }
1380     TMP_END;
1381 }
1382 
1383 /* take coeffs from [0, p) to (-p/2, p/2]
1384 */
fmpz_mpolyu_symmetrize_coeffs(fmpz_mpolyu_t A,const fmpz_mpoly_ctx_t ctx,const fmpz_mod_ctx_t fpctx)1385 void fmpz_mpolyu_symmetrize_coeffs(
1386     fmpz_mpolyu_t A,
1387     const fmpz_mpoly_ctx_t ctx,
1388     const fmpz_mod_ctx_t fpctx)
1389 {
1390     slong i, j;
1391     fmpz_mpoly_struct * Ac;
1392 
1393     for (i = 0; i < A->length; i++)
1394     {
1395         Ac = A->coeffs + i;
1396         for (j = 0; j < Ac->length; j++)
1397         {
1398             fmpz_smod(Ac->coeffs + j, Ac->coeffs + j,
1399                                                   fmpz_mod_ctx_modulus(fpctx));
1400         }
1401     }
1402 }
1403 
1404 
1405 
fmpz_mpolyuu_eval_nmod(nmod_mpolyn_t E,const nmod_mpoly_ctx_t ctx_sp,const fmpz_mpolyu_t A,const mp_limb_t * alpha,const fmpz_mpoly_ctx_t ctx)1406 void fmpz_mpolyuu_eval_nmod(
1407     nmod_mpolyn_t E,
1408     const nmod_mpoly_ctx_t ctx_sp,
1409     const fmpz_mpolyu_t A,
1410     const mp_limb_t * alpha,
1411     const fmpz_mpoly_ctx_t ctx)
1412 {
1413     slong i;
1414     slong xexp, yexp;
1415     mp_limb_t eval;
1416 
1417     FLINT_ASSERT(E->bits == FLINT_BITS/2);
1418     FLINT_ASSERT(1 == mpoly_words_per_exp_sp(E->bits, ctx_sp->minfo));
1419 
1420     E->length = 0;
1421     for (i = 0; i < A->length; i++)
1422     {
1423         eval = fmpz_mpoly_eval_nmod(ctx_sp->ffinfo, A->coeffs + i, alpha, ctx);
1424         if (eval == 0)
1425         {
1426             continue;
1427         }
1428 
1429         xexp = A->exps[i] >> (FLINT_BITS/2);
1430         yexp = A->exps[i] & (-UWORD(1) >> (FLINT_BITS - FLINT_BITS/2));
1431 
1432         if (E->length > 0 && (E->exps[E->length - 1] >> (FLINT_BITS/2)) == xexp)
1433         {
1434             nmod_poly_set_coeff_ui(E->coeffs + E->length - 1, yexp, eval);
1435         }
1436         else
1437         {
1438             nmod_mpolyn_fit_length(E, E->length + 1, ctx_sp);
1439             nmod_poly_zero(E->coeffs + E->length);
1440             nmod_poly_set_coeff_ui(E->coeffs + E->length, yexp, eval);
1441             E->exps[E->length] = xexp << (FLINT_BITS/2);
1442             E->length++;
1443         }
1444     }
1445 }
1446 
fmpz_mpolyuu_eval_fmpz_mod(fmpz_mod_mpolyn_t E,const fmpz_mod_mpoly_ctx_t ctx_mp,const fmpz_mpolyu_t A,const fmpz * alpha,const fmpz_mpoly_ctx_t ctx)1447 void fmpz_mpolyuu_eval_fmpz_mod(
1448     fmpz_mod_mpolyn_t E,
1449     const fmpz_mod_mpoly_ctx_t ctx_mp,
1450     const fmpz_mpolyu_t A,
1451     const fmpz * alpha,
1452     const fmpz_mpoly_ctx_t ctx)
1453 {
1454     slong i;
1455     slong xexp, yexp;
1456     fmpz_t eval;
1457 
1458     FLINT_ASSERT(E->bits == FLINT_BITS/2);
1459     FLINT_ASSERT(1 == mpoly_words_per_exp_sp(E->bits, ctx_mp->minfo));
1460 
1461     fmpz_init(eval);
1462 
1463     E->length = 0;
1464     for (i = 0; i < A->length; i++)
1465     {
1466         fmpz_mpoly_eval_fmpz_mod(eval, ctx_mp->ffinfo, A->coeffs + i, alpha, ctx);
1467         if (fmpz_is_zero(eval))
1468         {
1469             continue;
1470         }
1471 
1472         xexp = A->exps[i] >> (FLINT_BITS/2);
1473         yexp = A->exps[i] & (-UWORD(1) >> (FLINT_BITS - FLINT_BITS/2));
1474 
1475         if (E->length > 0 && (E->exps[E->length - 1] >> (FLINT_BITS/2)) == xexp)
1476         {
1477             fmpz_mod_poly_set_coeff_fmpz(E->coeffs + E->length - 1, yexp, eval);
1478         }
1479         else
1480         {
1481             fmpz_mod_mpolyn_fit_length(E, E->length + 1, ctx_mp);
1482             fmpz_mod_poly_zero(E->coeffs + E->length);
1483             fmpz_mod_poly_set_coeff_fmpz(E->coeffs + E->length, yexp, eval);
1484             E->exps[E->length] = xexp << (FLINT_BITS/2);
1485             E->length++;
1486         }
1487     }
1488 
1489     fmpz_clear(eval);
1490 }
1491 
1492 
fmpz_mpoly_eval_nmod(const nmodf_ctx_t fpctx,const fmpz_mpoly_t A,const mp_limb_t * alpha,const fmpz_mpoly_ctx_t ctx)1493 mp_limb_t fmpz_mpoly_eval_nmod(
1494     const nmodf_ctx_t fpctx,
1495     const fmpz_mpoly_t A,
1496     const mp_limb_t * alpha,
1497     const fmpz_mpoly_ctx_t ctx)
1498 {
1499     mp_limb_t eval;
1500     slong i, j;
1501     slong offset, shift;
1502     slong N = mpoly_words_per_exp_sp(A->bits, ctx->minfo);
1503     slong nvars = ctx->minfo->nvars;
1504     ulong * Aexp;
1505     slong * LUToffset;
1506     ulong * LUTmask;
1507     mp_limb_t * LUTvalue;
1508     slong LUTlen;
1509     mp_limb_t xpoweval;
1510     ulong * inputexpmask;
1511     TMP_INIT;
1512 
1513     FLINT_ASSERT(A->bits <= FLINT_BITS);
1514 
1515     TMP_START;
1516 
1517     LUToffset = (slong *) TMP_ALLOC(N*FLINT_BITS*sizeof(slong));
1518     LUTmask   = (ulong *) TMP_ALLOC(N*FLINT_BITS*sizeof(ulong));
1519     LUTvalue  = (mp_limb_t *) TMP_ALLOC(N*FLINT_BITS*sizeof(mp_limb_t));
1520 
1521     Aexp = A->exps;
1522 
1523     inputexpmask = (ulong *) TMP_ALLOC(N*sizeof(ulong));
1524     mpoly_monomial_zero(inputexpmask, N);
1525     for (i = 0; i < A->length; i++)
1526     {
1527         for (j = 0; j < N; j++)
1528         {
1529             inputexpmask[j] |= (Aexp + N*i)[j];
1530         }
1531     }
1532 
1533     LUTlen = 0;
1534     for (j = nvars - 1; j >= 0; j--)
1535     {
1536         mpoly_gen_offset_shift_sp(&offset, &shift, j, A->bits, ctx->minfo);
1537 
1538         xpoweval = alpha[j]; /* xpoweval = alpha[i]^(2^i) */
1539         for (i = 0; i < A->bits; i++)
1540         {
1541             LUToffset[LUTlen] = offset;
1542             LUTmask[LUTlen] = (UWORD(1) << (shift + i));
1543             LUTvalue[LUTlen] = xpoweval;
1544             if ((inputexpmask[offset] & LUTmask[LUTlen]) != 0)
1545             {
1546                 LUTlen++;
1547             }
1548             xpoweval = nmod_mul(xpoweval, xpoweval, fpctx->mod);
1549         }
1550     }
1551     FLINT_ASSERT(LUTlen < N*FLINT_BITS);
1552 
1553     eval = 0;
1554     for (i = 0; i < A->length; i++)
1555     {
1556         xpoweval = fmpz_fdiv_ui(A->coeffs + i, fpctx->mod.n);
1557         for (j = 0; j < LUTlen; j++)
1558         {
1559             if (((Aexp + N*i)[LUToffset[j]] & LUTmask[j]) != 0)
1560             {
1561                 xpoweval = nmod_mul(xpoweval, LUTvalue[j], fpctx->mod);
1562             }
1563         }
1564         eval = nmod_add(eval, xpoweval, fpctx->mod);
1565     }
1566 
1567     TMP_END;
1568 
1569     return eval;
1570 }
1571 
1572 
1573 
1574 
1575 
nmod_zip_init(nmod_zip_t Z)1576 void nmod_zip_init(nmod_zip_t Z)
1577 {
1578     Z->mlength = 0;
1579     Z->malloc = 0;
1580     Z->coeffs = NULL;
1581     Z->monomials = NULL;
1582     Z->ealloc = 0;
1583     Z->evals = NULL;
1584 }
1585 
nmod_zip_clear(nmod_zip_t Z)1586 void nmod_zip_clear(nmod_zip_t Z)
1587 {
1588     if (Z->coeffs)
1589         flint_free(Z->coeffs);
1590     if (Z->monomials)
1591         flint_free(Z->monomials);
1592     if (Z->evals)
1593         flint_free(Z->evals);
1594 }
1595 
nmod_zip_set_lengths(nmod_zip_t A,slong mlength,slong elength)1596 void nmod_zip_set_lengths(nmod_zip_t A, slong mlength, slong elength)
1597 {
1598     slong old_malloc = A->malloc;
1599     slong new_malloc = FLINT_MAX(mlength, A->malloc + A->malloc/2);
1600     slong old_ealloc = A->ealloc;
1601     slong new_ealloc = FLINT_MAX(elength, A->ealloc + A->ealloc/2);
1602 
1603     FLINT_ASSERT(mlength > 0);
1604     FLINT_ASSERT(elength > 0);
1605 
1606     if (mlength > old_malloc)
1607     {
1608         if (old_malloc == 0)
1609         {
1610             A->coeffs    = (mp_limb_t *) flint_malloc(
1611                                                  new_malloc*sizeof(mp_limb_t));
1612             A->monomials = (mp_limb_t *) flint_malloc(
1613                                                  new_malloc*sizeof(mp_limb_t));
1614         }
1615         else
1616         {
1617             A->coeffs    = (mp_limb_t *) flint_realloc(A->coeffs,
1618                                                  new_malloc*sizeof(mp_limb_t));
1619             A->monomials = (mp_limb_t *) flint_realloc(A->monomials,
1620                                                  new_malloc*sizeof(mp_limb_t));
1621         }
1622         A->malloc = new_malloc;
1623     }
1624 
1625     A->mlength = mlength;
1626 
1627     if (elength > old_ealloc)
1628     {
1629         if (old_ealloc == 0)
1630         {
1631             A->evals = (mp_limb_t *) flint_malloc(new_ealloc*sizeof(mp_limb_t));
1632         }
1633         else
1634         {
1635             A->evals = (mp_limb_t *) flint_realloc(A->evals,
1636                                                  new_ealloc*sizeof(mp_limb_t));
1637         }
1638         A->ealloc = new_ealloc;
1639     }
1640 }
1641 
nmod_zip_print(const nmod_zip_t Z,slong elength)1642 void nmod_zip_print(const nmod_zip_t Z, slong elength)
1643 {
1644     slong i;
1645 
1646     printf("m ");
1647     for (i = 0; i < Z->mlength; i++)
1648     {
1649         flint_printf("(%wu %wu) ", Z->coeffs[i], Z->monomials[i]);
1650     }
1651     printf("e ");
1652     for (i = 0; i < elength; i++)
1653     {
1654         flint_printf("%wu ", Z->evals[i]);
1655     }
1656 }
1657 
1658 
1659 
nmod_zip_mpolyu_init(nmod_zip_mpolyu_t Z)1660 void nmod_zip_mpolyu_init(nmod_zip_mpolyu_t Z)
1661 {
1662     Z->alloc = 0;
1663     Z->length = 0;
1664     Z->exps = NULL;
1665     Z->coeffs = NULL;
1666     Z->pointcount = 0;
1667 }
1668 
nmod_zip_mpolyu_clear(nmod_zip_mpolyu_t Z)1669 void nmod_zip_mpolyu_clear(nmod_zip_mpolyu_t Z)
1670 {
1671     slong i;
1672 
1673     for (i = 0; i < Z->alloc; i++)
1674     {
1675         nmod_zip_clear(Z->coeffs + i);
1676     }
1677 
1678     if (Z->exps)
1679         flint_free(Z->exps);
1680     if (Z->coeffs)
1681         flint_free(Z->coeffs);
1682 }
1683 
nmod_zip_mpolyu_fit_length(nmod_zip_mpolyu_t A,slong length)1684 void nmod_zip_mpolyu_fit_length(
1685     nmod_zip_mpolyu_t A,
1686     slong length)
1687 {
1688     slong i;
1689     slong old_alloc = A->alloc;
1690     slong new_alloc = FLINT_MAX(length, A->alloc + A->alloc/2);
1691 
1692     if (length > old_alloc)
1693     {
1694         if (old_alloc == 0)
1695         {
1696             A->exps = (ulong *) flint_malloc(new_alloc*sizeof(ulong));
1697             A->coeffs = (nmod_zip_struct *) flint_malloc(
1698                                             new_alloc*sizeof(nmod_zip_struct));
1699         }
1700         else
1701         {
1702             A->exps = (ulong *) flint_realloc(A->exps, new_alloc*sizeof(ulong));
1703             A->coeffs = (nmod_zip_struct *) flint_realloc(A->coeffs,
1704                                             new_alloc*sizeof(nmod_zip_struct));
1705         }
1706 
1707         for (i = old_alloc; i < new_alloc; i++)
1708         {
1709             nmod_zip_init(A->coeffs + i);
1710         }
1711         A->alloc = new_alloc;
1712     }
1713 }
1714 
nmod_zip_mpolyu_fit_poly(nmod_zip_mpolyu_t Z,fmpz_mpolyu_t H,slong eval_length)1715 void nmod_zip_mpolyu_fit_poly(
1716     nmod_zip_mpolyu_t Z,
1717     fmpz_mpolyu_t H,
1718     slong eval_length)
1719 {
1720     slong i;
1721     FLINT_ASSERT(H->length > 0);
1722 
1723     nmod_zip_mpolyu_fit_length(Z, H->length);
1724 
1725     for (i = 0; i < H->length; i++)
1726     {
1727         Z->exps[i] = H->exps[i];
1728         nmod_zip_set_lengths(Z->coeffs + i, (H->coeffs + i)->length, eval_length);
1729     }
1730 
1731     Z->length = H->length;
1732     Z->pointcount = 0;
1733 }
1734 
1735 
nmod_mpoly_set_skel(nmod_mpolyc_t S,const nmod_mpoly_ctx_t ctx_sp,const fmpz_mpoly_t A,const mp_limb_t * alpha,const fmpz_mpoly_ctx_t ctx)1736 void nmod_mpoly_set_skel(
1737     nmod_mpolyc_t S,
1738     const nmod_mpoly_ctx_t ctx_sp,
1739     const fmpz_mpoly_t A,
1740     const mp_limb_t * alpha,
1741     const fmpz_mpoly_ctx_t ctx)
1742 {
1743     slong i, j;
1744     slong offset, shift;
1745     slong N = mpoly_words_per_exp_sp(A->bits, ctx->minfo);
1746     slong nvars = ctx->minfo->nvars;
1747     ulong * Aexp;
1748     slong * LUToffset;
1749     ulong * LUTmask;
1750     mp_limb_t * LUTvalue;
1751     slong LUTlen;
1752     mp_limb_t xpoweval;
1753     ulong * inputexpmask;
1754     TMP_INIT;
1755 
1756     FLINT_ASSERT(A->bits <= FLINT_BITS);
1757 
1758     TMP_START;
1759 
1760     LUToffset = (slong *) TMP_ALLOC(N*FLINT_BITS*sizeof(slong));
1761     LUTmask   = (ulong *) TMP_ALLOC(N*FLINT_BITS*sizeof(ulong));
1762     LUTvalue  = (mp_limb_t *) TMP_ALLOC(N*FLINT_BITS*sizeof(mp_limb_t));
1763 
1764     Aexp = A->exps;
1765 
1766     inputexpmask = (ulong *) TMP_ALLOC(N*sizeof(ulong));
1767     mpoly_monomial_zero(inputexpmask, N);
1768     for (i = 0; i < A->length; i++)
1769     {
1770         for (j = 0; j < N; j++)
1771         {
1772             inputexpmask[j] |= (Aexp + N*i)[j];
1773         }
1774     }
1775 
1776     LUTlen = 0;
1777     for (j = nvars - 1; j >= 0; j--)
1778     {
1779         mpoly_gen_offset_shift_sp(&offset, &shift, j, A->bits, ctx->minfo);
1780 
1781         xpoweval = alpha[j]; /* xpoweval = alpha[i]^(2^i) */
1782         for (i = 0; i < A->bits; i++)
1783         {
1784             LUToffset[LUTlen] = offset;
1785             LUTmask[LUTlen] = (UWORD(1) << (shift + i));
1786             LUTvalue[LUTlen] = xpoweval;
1787             if ((inputexpmask[offset] & LUTmask[LUTlen]) != 0)
1788             {
1789                 LUTlen++;
1790             }
1791             xpoweval = nmod_mul(xpoweval, xpoweval, ctx_sp->ffinfo->mod);
1792         }
1793     }
1794     FLINT_ASSERT(LUTlen < N*FLINT_BITS);
1795 
1796     nmod_mpolyc_fit_length(S, A->length);
1797     for (i = 0; i < A->length; i++)
1798     {
1799         xpoweval = 1;
1800         for (j = 0; j < LUTlen; j++)
1801         {
1802             if (((Aexp + N*i)[LUToffset[j]] & LUTmask[j]) != 0)
1803             {
1804                 xpoweval = nmod_mul(xpoweval, LUTvalue[j], ctx_sp->ffinfo->mod);
1805             }
1806         }
1807         S->coeffs[i] = xpoweval;
1808     }
1809     S->length = A->length;
1810 
1811     TMP_END;
1812 }
1813 
nmod_zip_mpolyu_set_skel(nmod_zip_mpolyu_t Z,const nmod_mpoly_ctx_t ctx_sp,const fmpz_mpolyu_t A,const mp_limb_t * alpha,const fmpz_mpoly_ctx_t ctx)1814 void nmod_zip_mpolyu_set_skel(
1815     nmod_zip_mpolyu_t Z,
1816     const nmod_mpoly_ctx_t ctx_sp,
1817     const fmpz_mpolyu_t A,
1818     const mp_limb_t * alpha,
1819     const fmpz_mpoly_ctx_t ctx)
1820 {
1821     slong i, j;
1822 
1823     nmod_mpolyc_t T;
1824     nmod_mpolyc_init(T);
1825 
1826     FLINT_ASSERT(Z->length == A->length);
1827     for (i = 0; i < Z->length; i++)
1828     {
1829         nmod_zip_struct * Zc = Z->coeffs + i;
1830 
1831         nmod_mpoly_set_skel(T, ctx_sp, A->coeffs + i, alpha, ctx);
1832 
1833         Z->exps[i] = A->exps[i];
1834         FLINT_ASSERT(Zc->mlength == T->length);
1835         for (j = 0; j < Zc->mlength; j++)
1836         {
1837             Zc->coeffs[j] = 0;
1838             Zc->monomials[j] = T->coeffs[j];
1839         }
1840     }
1841     Z->pointcount = 0;
1842 
1843     nmod_mpolyc_clear(T);
1844 }
1845 
nmod_zip_mpolyuu_print(const nmod_zip_mpolyu_t A)1846 void nmod_zip_mpolyuu_print(const nmod_zip_mpolyu_t A)
1847 {
1848     slong i;
1849     flint_printf("0");
1850     for (i = 0; i < A->length; i++)
1851     {
1852         flint_printf(" + [");
1853         nmod_zip_print(A->coeffs + i, A->pointcount);
1854         flint_printf("]*X^%wd*Y^%wd",
1855                  A->exps[i] >> (FLINT_BITS/2),
1856                  A->exps[i] & ((-UWORD(1)) >> (FLINT_BITS/2)));
1857     }
1858 }
1859 
nmod_zip_mpolyuu_add_point(nmod_zip_mpolyu_t L,const nmod_mpolyn_t A)1860 int nmod_zip_mpolyuu_add_point(
1861     nmod_zip_mpolyu_t L,
1862     const nmod_mpolyn_t A)
1863 {
1864     slong Alen = A->length;
1865     nmod_poly_struct * Acoeff = A->coeffs;
1866     slong Li, Ai, ai;
1867     ulong Aexp;
1868     nmod_zip_struct * Lcoeff;
1869     slong Llen;
1870     ulong * Lexp;
1871     slong pointcount = L->pointcount;
1872 
1873     Lcoeff = L->coeffs;
1874     Llen = L->length;
1875     Lexp = L->exps;
1876 
1877     Li = 0;
1878     Ai = ai = 0;
1879     Aexp = 0;
1880     if (Ai < Alen)
1881     {
1882         ai = nmod_poly_degree(A->coeffs + Ai);
1883         FLINT_ASSERT(FLINT_BIT_COUNT(ai) < FLINT_BITS/2);
1884         FLINT_ASSERT(0 == (A->exps[Ai] & LOW_HALF_MASK));
1885         Aexp = A->exps[Ai] + ai;
1886     }
1887 
1888     for (Li = 0; Li < Llen; Li++)
1889     {
1890         nmod_zip_struct * Lc = Lcoeff + Li;
1891 
1892         if (Ai < Alen && Lexp[Li] == Aexp)
1893         {
1894             /* L present A present */
1895             FLINT_ASSERT(pointcount <= Lc->ealloc);
1896             Lc->evals[pointcount] = (Acoeff + Ai)->coeffs[ai];
1897             do {
1898                 ai--;
1899             } while (ai >= 0 && (0 == (Acoeff + Ai)->coeffs[ai]));
1900             if (ai < 0)
1901             {
1902                 Ai++;
1903                 if (Ai < Alen)
1904                 {
1905                     ai = nmod_poly_degree(A->coeffs + Ai);
1906                     FLINT_ASSERT(FLINT_BIT_COUNT(ai) < FLINT_BITS/2);
1907                     FLINT_ASSERT(0 == (A->exps[Ai] & LOW_HALF_MASK));
1908                     Aexp = A->exps[Ai] + ai;
1909                 }
1910             }
1911             else
1912             {
1913                 FLINT_ASSERT(Ai < Alen);
1914                 FLINT_ASSERT(FLINT_BIT_COUNT(ai) < FLINT_BITS/2);
1915                 FLINT_ASSERT(0 == (A->exps[Ai] & LOW_HALF_MASK));
1916                 Aexp = A->exps[Ai] + ai;
1917             }
1918         }
1919         else if (Ai >= Alen || Lexp[Li] > Aexp)
1920         {
1921             /* L present A missing */
1922             FLINT_ASSERT(pointcount <= Lc->ealloc);
1923             Lc->evals[pointcount] = 0;
1924         }
1925         else
1926         {
1927             /* L missing A present */
1928             return 0;
1929         }
1930     }
1931 
1932     L->pointcount = pointcount + 1;
1933     return 1;
1934 }
1935 
1936 
nmod_mpolyu_set_skel(nmod_mpolycu_t S,const nmod_mpoly_ctx_t ctx_sp,const fmpz_mpolyu_t A,const mp_limb_t * alpha,const fmpz_mpoly_ctx_t ctx)1937 void nmod_mpolyu_set_skel(
1938     nmod_mpolycu_t S,
1939     const nmod_mpoly_ctx_t ctx_sp,
1940     const fmpz_mpolyu_t A,
1941     const mp_limb_t * alpha,
1942     const fmpz_mpoly_ctx_t ctx)
1943 {
1944     slong i;
1945     nmod_mpolycu_fit_length(S, A->length);
1946     for (i = 0; i < A->length; i++)
1947     {
1948         nmod_mpoly_set_skel(S->coeffs + i, ctx_sp, A->coeffs + i, alpha, ctx);
1949     }
1950     S->length = A->length;
1951 }
1952 
1953 
1954 
1955 /* Ared = A mod p */
nmod_mpoly_red_skel(nmod_mpolyc_t Ared,const fmpz_mpoly_t A,const nmodf_ctx_t fpctx)1956 void nmod_mpoly_red_skel(
1957     nmod_mpolyc_t Ared,
1958     const fmpz_mpoly_t A,
1959     const nmodf_ctx_t fpctx)
1960 {
1961     nmod_mpolyc_fit_length(Ared, A->length);
1962     Ared->length = A->length;
1963     _fmpz_vec_get_nmod_vec(Ared->coeffs, A->coeffs, A->length, fpctx->mod);
1964 }
1965 
nmod_mpolyu_red_skel(nmod_mpolycu_t Ared,const fmpz_mpolyu_t A,const nmodf_ctx_t fpctx)1966 void nmod_mpolyu_red_skel(
1967     nmod_mpolycu_t Ared,
1968     const fmpz_mpolyu_t A,
1969     const nmodf_ctx_t fpctx)
1970 {
1971     slong i;
1972     nmod_mpolycu_fit_length(Ared, A->length);
1973     Ared->length = A->length;
1974     for (i = 0; i < A->length; i++)
1975     {
1976         nmod_mpoly_red_skel(Ared->coeffs + i, A->coeffs + i, fpctx);
1977     }
1978 }
1979 
1980 /* M = S */
nmod_mpoly_copy_skel(nmod_mpolyc_t M,const nmod_mpolyc_t S)1981 void nmod_mpoly_copy_skel(nmod_mpolyc_t M, const nmod_mpolyc_t S)
1982 {
1983     slong i;
1984     nmod_mpolyc_fit_length(M, S->length);
1985     M->length = S->length;
1986     for (i = 0; i < S->length; i++)
1987     {
1988         M->coeffs[i] = S->coeffs[i];
1989     }
1990 }
1991 
nmod_mpolyu_copy_skel(nmod_mpolycu_t M,const nmod_mpolycu_t S)1992 void nmod_mpolyu_copy_skel(nmod_mpolycu_t M, const nmod_mpolycu_t S)
1993 {
1994     slong i;
1995     nmod_mpolycu_fit_length(M, S->length);
1996     M->length = S->length;
1997     for (i = 0; i < S->length; i++)
1998     {
1999         nmod_mpoly_copy_skel(M->coeffs + i, S->coeffs + i);
2000     }
2001 }
2002 
2003 /* return Ared.Acur and multiply Acur by Ainc */
nmod_mpoly_use_skel_mul(const nmod_mpolyc_t Ared,nmod_mpolyc_t Acur,const nmod_mpolyc_t Ainc,const nmodf_ctx_t fpctx)2004 mp_limb_t nmod_mpoly_use_skel_mul(
2005     const nmod_mpolyc_t Ared,
2006     nmod_mpolyc_t Acur,
2007     const nmod_mpolyc_t Ainc,
2008     const nmodf_ctx_t fpctx)
2009 {
2010     slong i;
2011     mp_limb_t V, V0, V1, V2, p1, p0;
2012     FLINT_ASSERT(Ared->length == Acur->length);
2013     FLINT_ASSERT(Ared->length == Ainc->length);
2014 
2015     V0 = V1 = V2 = 0;
2016     for (i = 0; i < Ared->length; i++)
2017     {
2018         umul_ppmm(p1, p0, Ared->coeffs[i], Acur->coeffs[i]);
2019         add_sssaaaaaa(V2, V1, V0, V2, V1, V0, WORD(0), p1, p0);
2020         Acur->coeffs[i] = nmod_mul(Acur->coeffs[i], Ainc->coeffs[i], fpctx->mod);
2021     }
2022 
2023     NMOD_RED3(V, V2, V1, V0, fpctx->mod);
2024     return V;
2025 }
2026 
2027 
2028 /* M = S^k */
nmod_mpoly_pow_skel(nmod_mpolyc_t M,const nmod_mpolyc_t S,ulong k,const nmod_mpoly_ctx_t ctx)2029 void nmod_mpoly_pow_skel(
2030     nmod_mpolyc_t M,
2031     const nmod_mpolyc_t S,
2032     ulong k,
2033     const nmod_mpoly_ctx_t ctx)
2034 {
2035     slong i;
2036     nmod_mpolyc_fit_length(M, S->length);
2037     M->length = S->length;
2038     for (i = 0; i < S->length; i++)
2039     {
2040         M->coeffs[i] = nmod_pow_ui(S->coeffs[i], k, ctx->ffinfo->mod);
2041     }
2042 }
2043 
nmod_mpolyu_pow_skel(nmod_mpolycu_t M,const nmod_mpolycu_t S,ulong k,const nmod_mpoly_ctx_t ctx)2044 void nmod_mpolyu_pow_skel(
2045     nmod_mpolycu_t M,
2046     const nmod_mpolycu_t S,
2047     ulong k,
2048     const nmod_mpoly_ctx_t ctx)
2049 {
2050     slong i;
2051     nmod_mpolycu_fit_length(M, S->length);
2052     M->length = S->length;
2053     for (i = 0; i < S->length; i++)
2054     {
2055         nmod_mpoly_pow_skel(M->coeffs + i, S->coeffs + i, k, ctx);
2056     }
2057 }
2058 
nmod_mpolyuu_use_skel_mul(nmod_mpolyn_t E,const fmpz_mpolyu_t A,const nmod_mpolycu_t Ared,nmod_mpolycu_t Acur,const nmod_mpolycu_t Ainc,const nmod_mpoly_ctx_t ctx_sp)2059 void nmod_mpolyuu_use_skel_mul(
2060     nmod_mpolyn_t E,
2061     const fmpz_mpolyu_t A,
2062     const nmod_mpolycu_t Ared,
2063     nmod_mpolycu_t Acur,
2064     const nmod_mpolycu_t Ainc,
2065     const nmod_mpoly_ctx_t ctx_sp)
2066 {
2067     slong xexp, yexp;
2068     slong i;
2069     mp_limb_t eval;
2070 
2071     FLINT_ASSERT(E->bits == FLINT_BITS/2);
2072     FLINT_ASSERT(1 == mpoly_words_per_exp_sp(E->bits, ctx_sp->minfo));
2073 
2074     FLINT_ASSERT(A->length == Ared->length);
2075     FLINT_ASSERT(A->length == Acur->length);
2076     FLINT_ASSERT(A->length == Ainc->length);
2077 
2078     E->length = 0;
2079     for (i = 0; i < A->length; i++)
2080     {
2081         eval = nmod_mpoly_use_skel_mul(Ared->coeffs + i, Acur->coeffs + i,
2082                                              Ainc->coeffs + i, ctx_sp->ffinfo);
2083         if (eval == 0)
2084         {
2085             continue;
2086         }
2087 
2088         xexp = A->exps[i] >> (FLINT_BITS/2);
2089         yexp = A->exps[i] & ((-UWORD(1)) >> (FLINT_BITS - FLINT_BITS/2));
2090 
2091         if (E->length > 0 && (E->exps[E->length - 1] >> (FLINT_BITS/2)) == xexp)
2092         {
2093             nmod_poly_set_coeff_ui(E->coeffs + E->length - 1, yexp, eval);
2094         }
2095         else
2096         {
2097             nmod_mpolyn_fit_length(E, E->length + 1, ctx_sp);
2098             nmod_poly_zero(E->coeffs + E->length);
2099             nmod_poly_set_coeff_ui(E->coeffs + E->length, yexp, eval);
2100             E->exps[E->length] = xexp << (FLINT_BITS/2);
2101             E->length++;
2102         }
2103     }
2104 }
2105 
2106 
2107 
nmod_zip_find_coeffs(nmod_zip_t Z,nmod_poly_t master,slong pointcount,const nmodf_ctx_t ffinfo)2108 nmod_zip_find_coeffs_ret_t nmod_zip_find_coeffs(
2109     nmod_zip_t Z,
2110     nmod_poly_t master,
2111     slong pointcount,
2112     const nmodf_ctx_t ffinfo)
2113 {
2114     slong i, j;
2115     mp_limb_t V, V0, V1, V2, T, S, r, p0, p1;
2116 
2117     FLINT_ASSERT(pointcount >= Z->mlength);
2118 
2119     nmod_poly_product_roots_nmod_vec(master, Z->monomials, Z->mlength);
2120 
2121     for (i = 0; i < Z->mlength; i++)
2122     {
2123         /* coeffs[i] is (coeffs(P).values)/P(roots[i]) =: V/S
2124             where P(x) = master(x)/(x-roots[i])     */
2125         V0 = V1 = V2 = T = S = 0;
2126         r = Z->monomials[i];
2127         for (j = Z->mlength; j > 0; j--)
2128         {
2129             T = nmod_add(nmod_mul(r, T, ffinfo->mod), master->coeffs[j], ffinfo->mod);
2130             S = nmod_add(nmod_mul(r, S, ffinfo->mod), T, ffinfo->mod);
2131             umul_ppmm(p1, p0, Z->evals[j - 1], T);
2132             add_sssaaaaaa(V2, V1, V0, V2, V1, V0, WORD(0), p1, p0);
2133         }
2134         /* roots[i] should be a root of master */
2135         FLINT_ASSERT(nmod_add(nmod_mul(r, T, ffinfo->mod), master->coeffs[0], ffinfo->mod) == 0);
2136         NMOD_RED3(V, V2, V1, V0, ffinfo->mod);
2137         S = nmod_mul(S, r, ffinfo->mod); /* shift is one */
2138         if (S == 0)
2139         {
2140             return nmod_zip_find_coeffs_non_invertible;
2141         }
2142         Z->coeffs[i] = nmod_mul(V, nmod_inv(S, ffinfo->mod), ffinfo->mod);
2143     }
2144 
2145     /* use the coefficients of master as temp work space */
2146     for (i = 0; i < Z->mlength; i++)
2147     {
2148         master->coeffs[i] = nmod_pow_ui(Z->monomials[i], Z->mlength, ffinfo->mod);
2149     }
2150 
2151     /* check that the remaining points match */
2152     for (i = Z->mlength; i < pointcount; i++)
2153     {
2154         V0 = V1 = V2 = S = 0;
2155         for (j = 0; j < Z->mlength; j++)
2156         {
2157             master->coeffs[j] = nmod_mul(master->coeffs[j], Z->monomials[j], ffinfo->mod);
2158             umul_ppmm(p1, p0, Z->coeffs[j], master->coeffs[j]);
2159             add_sssaaaaaa(V2, V1, V0, V2, V1, V0, WORD(0), p1, p0);
2160         }
2161         NMOD_RED3(V, V2, V1, V0, ffinfo->mod);
2162         if (V != Z->evals[i])
2163         {
2164             return nmod_zip_find_coeffs_no_match;
2165         }
2166     }
2167 
2168     return nmod_zip_find_coeffs_good;
2169 }
2170 
nmod_mpolyu_zip_find_coeffs(nmod_zip_mpolyu_t Z,const nmod_mpoly_ctx_t ctx_sp)2171 nmod_zip_find_coeffs_ret_t nmod_mpolyu_zip_find_coeffs(
2172     nmod_zip_mpolyu_t Z,
2173     const nmod_mpoly_ctx_t ctx_sp)
2174 {
2175     slong i;
2176     nmod_zip_find_coeffs_ret_t ret;
2177     nmod_poly_t T;
2178 
2179     nmod_poly_init_mod(T, ctx_sp->ffinfo->mod);
2180 
2181     for (i = 0; i < Z->length; i++)
2182     {
2183         ret = nmod_zip_find_coeffs(Z->coeffs + i, T, Z->pointcount, ctx_sp->ffinfo);
2184         if (ret != nmod_zip_find_coeffs_good)
2185         {
2186             goto cleanup;
2187         }
2188     }
2189 
2190     ret = nmod_zip_find_coeffs_good;
2191 
2192 cleanup:
2193 
2194     nmod_poly_clear(T);
2195 
2196     return ret;
2197 }
2198 
2199 
fmpz_mpolyu_addinterp_zip(fmpz_mpolyu_t H,const fmpz_t Hmodulus,const nmod_zip_mpolyu_t Z,const nmodf_ctx_t ffinfo)2200 int fmpz_mpolyu_addinterp_zip(
2201     fmpz_mpolyu_t H,
2202     const fmpz_t Hmodulus,
2203     const nmod_zip_mpolyu_t Z,
2204     const nmodf_ctx_t ffinfo)
2205 {
2206     int changed = 0;
2207     slong i, j;
2208     fmpz_t t;
2209 
2210     fmpz_init(t);
2211 
2212     FLINT_ASSERT(H->length == Z->length);
2213     for (i = 0; i < H->length; i++)
2214     {
2215         fmpz_mpoly_struct * Hc = H->coeffs + i;
2216         nmod_zip_struct * Zc = Z->coeffs + i;
2217 
2218         FLINT_ASSERT(Hc->length == Zc->mlength);
2219         for (j = 0; j < Hc->length; j++)
2220         {
2221             fmpz_CRT_ui(t, Hc->coeffs + j, Hmodulus, Zc->coeffs[j], ffinfo->mod.n, 1);
2222             changed |= !fmpz_equal(t, Hc->coeffs + j);
2223             fmpz_swap(t, Hc->coeffs + j);
2224         }
2225     }
2226 
2227     fmpz_clear(t);
2228     return changed;
2229 }
2230 
2231 
fmpz_mpolyu_repack_bits(fmpz_mpolyu_t A,flint_bitcnt_t Abits,const fmpz_mpoly_ctx_t ctx)2232 int fmpz_mpolyu_repack_bits(
2233     fmpz_mpolyu_t A,
2234     flint_bitcnt_t Abits,
2235     const fmpz_mpoly_ctx_t ctx)
2236 {
2237     flint_bitcnt_t org_bits = A->bits;
2238     int success;
2239     slong i, j;
2240 
2241     for (i = 0; i < A->length; i++)
2242     {
2243         FLINT_ASSERT((A->coeffs + i)->bits == A->bits);
2244         success = fmpz_mpoly_repack_bits_inplace(A->coeffs + i, Abits, ctx);
2245         if (!success)
2246         {
2247             /* repack changed coeffs */
2248             for (j = 0; j < i; j++)
2249             {
2250                 success = fmpz_mpoly_repack_bits_inplace(A->coeffs + j, org_bits, ctx);
2251                 FLINT_ASSERT(success);
2252             }
2253             return 0;
2254         }
2255     }
2256     return 1;
2257 }
2258 
2259 
2260 /* fit bits is not the best fxn to accomplish this */
fmpz_mpoly_set_bits(fmpz_mpoly_t A,flint_bitcnt_t Abits,const fmpz_mpoly_ctx_t ctx)2261 void fmpz_mpoly_set_bits(
2262     fmpz_mpoly_t A,
2263     flint_bitcnt_t Abits,
2264     const fmpz_mpoly_ctx_t ctx)
2265 {
2266     fmpz_mpoly_fit_bits(A, Abits, ctx);
2267     A->bits = Abits;
2268 }
2269 
fmpz_mpolyu_set_bits(fmpz_mpolyu_t A,flint_bitcnt_t Abits,const fmpz_mpoly_ctx_t ctx)2270 void fmpz_mpolyu_set_bits(
2271     fmpz_mpolyu_t A,
2272     flint_bitcnt_t Abits,
2273     const fmpz_mpoly_ctx_t ctx)
2274 {
2275     slong i;
2276     for (i = 0; i < A->length; i++)
2277     {
2278         fmpz_mpoly_set_bits(A->coeffs + i, Abits, ctx);
2279     }
2280     A->bits = Abits;
2281 }
2282 
2283 
2284 /*
2285     A is in ZZ[x_0,..., x_(n-1)]
2286     evaluation at x_i = values[i]
2287     out is in Fp[x_var]   (p is stored in out :( )
2288 
2289     out_deg is deg of out.
2290 
2291     This function fails to produce out if it will have too large degree:
2292     If return is 0, then out is invalid. Otherwise, out is valid.
2293     out_deg is always valid.
2294 */
fmpz_mpoly_eval_all_but_one_nmod(slong * out_deg,nmod_poly_t out,const fmpz_mpoly_t A,slong var,mp_limb_t * values,const fmpz_mpoly_ctx_t ctx)2295 int fmpz_mpoly_eval_all_but_one_nmod(
2296     slong * out_deg,
2297     nmod_poly_t out,
2298     const fmpz_mpoly_t A,
2299     slong var,
2300     mp_limb_t * values,
2301     const fmpz_mpoly_ctx_t ctx)
2302 {
2303     slong i, j;
2304     slong deg;
2305     const slong deg_limit = 9999;
2306     ulong varexp, thisexp;
2307     mp_limb_t t, v;
2308     ulong mask;
2309     slong * offsets, * shifts;
2310     slong N = mpoly_words_per_exp_sp(A->bits, ctx->minfo);
2311     ulong * Aexp = A->exps;
2312     fmpz * Acoeff = A->coeffs;
2313     TMP_INIT;
2314 
2315     FLINT_ASSERT(A->bits <= FLINT_BITS);
2316 
2317     TMP_START;
2318 
2319     mask = (-UWORD(1)) >> (FLINT_BITS - A->bits);
2320     offsets = (slong *) TMP_ALLOC(ctx->minfo->nvars*sizeof(slong));
2321     shifts = (slong *) TMP_ALLOC(ctx->minfo->nvars*sizeof(slong));
2322     for (j = 0; j < ctx->minfo->nvars; j++)
2323     {
2324         mpoly_gen_offset_shift_sp(offsets + j, shifts + j, j, A->bits, ctx->minfo);
2325     }
2326 
2327     nmod_poly_zero(out);
2328     deg = -WORD(1);
2329     for (i = 0; i < A->length; i++)
2330     {
2331         varexp = ((Aexp + N*i)[offsets[var]]>>shifts[var])&mask;
2332         deg = FLINT_MAX(deg, (slong)(varexp));
2333         v = fmpz_fdiv_ui(Acoeff + i, out->mod.n);
2334         for (j = 0; j < ctx->minfo->nvars; j++)
2335         {
2336             thisexp = ((Aexp + N*i)[offsets[j]]>>shifts[j])&mask;
2337             if (j != var)
2338             {
2339                 v = nmod_mul(v, nmod_pow_ui(values[j], thisexp, out->mod), out->mod);
2340             }
2341         }
2342         t = nmod_poly_get_coeff_ui(out, varexp);
2343         if (deg <= deg_limit)
2344         {
2345             nmod_poly_set_coeff_ui(out, varexp, nmod_add(t, v, out->mod));
2346         }
2347     }
2348 
2349     TMP_END;
2350 
2351     *out_deg = deg;
2352     return deg <= deg_limit;
2353 }
2354 
2355 /*
2356     A is in ZZ[X,Y][x_0,..., x_(n-1)]
2357     out is in Fp[x_var]   (p is stored in out :( )
2358 
2359     X = values[0]
2360     Y = values[1]
2361     x_0 = values[2]
2362     ...
2363 */
fmpz_mpolyuu_eval_all_but_one_nmod(slong * out_deg,nmod_poly_t out,const fmpz_mpolyu_t A,slong var,mp_limb_t * values,const fmpz_mpoly_ctx_t ctx)2364 int fmpz_mpolyuu_eval_all_but_one_nmod(
2365     slong * out_deg,
2366     nmod_poly_t out,
2367     const fmpz_mpolyu_t A,
2368     slong var,
2369     mp_limb_t * values,
2370     const fmpz_mpoly_ctx_t ctx)
2371 {
2372     slong i;
2373     slong deg, this_deg;
2374     ulong * Aexp = A->exps;
2375     const fmpz_mpoly_struct * Acoeff = A->coeffs;
2376     nmod_poly_t temp;
2377     mp_limb_t t, t1;
2378     int success, this_success;
2379 
2380     FLINT_ASSERT(A->bits <= FLINT_BITS);
2381 
2382     nmod_poly_zero(out);
2383     nmod_poly_init(temp, out->mod.n);
2384 
2385     deg = -WORD(1);
2386     success = 1;
2387     for (i = 0; i < A->length; i++)
2388     {
2389         t = nmod_pow_ui(values[0], Aexp[i] >> (FLINT_BITS/2), out->mod);
2390         t1 = nmod_pow_ui(values[1], Aexp[i] & LOW_HALF_MASK, out->mod);
2391         t = nmod_mul(t, t1, out->mod);
2392         this_success = fmpz_mpoly_eval_all_but_one_nmod(&this_deg, temp, Acoeff + i, var, values + 2, ctx);
2393         deg = FLINT_MAX(deg, this_deg);
2394         success = success && this_success;
2395         if (success)
2396         {
2397             nmod_poly_scalar_mul_nmod(temp, temp, t);
2398             nmod_poly_add(out, out, temp);
2399         }
2400     }
2401 
2402     nmod_poly_clear(temp);
2403 
2404     *out_deg = deg;
2405 
2406     return success;
2407 }
2408 
2409 /*
2410     A, B are in ZZ[X,Y][x_0, ..., x_(n-1)]
2411 
2412     Set Adeg (resp Bdeg) to the degree of A (resp B) wrt x_var.
2413     Return an upper bound on the degree of gcd(A,B) wrt x_var.
2414 */
fmpz_mpolyuu_gcd_degree_bound_minor(slong * Adeg,slong * Bdeg,const fmpz_mpolyu_t A,const fmpz_mpolyu_t B,slong var,const fmpz_mpoly_ctx_t ctx,flint_rand_t state)2415 slong fmpz_mpolyuu_gcd_degree_bound_minor(
2416     slong * Adeg,
2417     slong * Bdeg,
2418     const fmpz_mpolyu_t A,
2419     const fmpz_mpolyu_t B,
2420     slong var,
2421     const fmpz_mpoly_ctx_t ctx,
2422     flint_rand_t state)
2423 {
2424     slong i;
2425     int tries = 0;
2426     slong degA, degB, degRet;
2427     mp_limb_t p = UWORD(1) << (FLINT_BITS - 2);
2428     nmod_poly_t Geval, Aeval, Beval;
2429     mp_limb_t * values;
2430     int Asuccess, Bsuccess;
2431     TMP_INIT;
2432 
2433     TMP_START;
2434 
2435     values = (mp_limb_t *) TMP_ALLOC((ctx->minfo->nvars + 2)*sizeof(mp_limb_t));
2436 
2437     p = n_nextprime(p, 1);
2438     nmod_poly_init(Geval, p);
2439     nmod_poly_init(Aeval, p);
2440     nmod_poly_init(Beval, p);
2441 
2442 try_again:
2443 
2444     for (i = 0; i < ctx->minfo->nvars + 2; i++)
2445     {
2446         values[i] = n_urandint(state, p);
2447     }
2448 
2449     Asuccess = fmpz_mpolyuu_eval_all_but_one_nmod(&degA, Aeval, A, var, values, ctx);
2450     Bsuccess = fmpz_mpolyuu_eval_all_but_one_nmod(&degB, Beval, B, var, values, ctx);
2451     *Adeg = degA;
2452     *Bdeg = degB;
2453 
2454     if (!Asuccess || !Bsuccess)
2455     {
2456         degRet = FLINT_MIN(degA, degB);
2457         goto cleanup;
2458     }
2459 
2460     if (degA != nmod_poly_degree(Aeval) || degB != nmod_poly_degree(Beval))
2461     {
2462         if (++tries > 100)
2463         {
2464             degRet = FLINT_MIN(degA, degB);
2465             goto cleanup;
2466         }
2467         p = n_nextprime(p, 1);
2468         nmod_poly_clear(Geval);
2469         nmod_poly_clear(Aeval);
2470         nmod_poly_clear(Beval);
2471         nmod_poly_init(Geval, p);
2472         nmod_poly_init(Aeval, p);
2473         nmod_poly_init(Beval, p);
2474         goto try_again;
2475     }
2476 
2477     nmod_poly_gcd(Geval, Aeval, Beval);
2478     degRet = nmod_poly_degree(Geval);
2479 
2480 cleanup:
2481 
2482     nmod_poly_clear(Geval);
2483     nmod_poly_clear(Aeval);
2484     nmod_poly_clear(Beval);
2485     TMP_END;
2486     return degRet;
2487 }
2488 
2489 /*
2490     A is in ZZ[x_0, ..., x_(n-1)]
2491 
2492     After the substitutions
2493         x_0     = x ^ (sub[1] * sub[2] * ... * sub[n-1])
2494 
2495         x_(n-2) = x ^ (sub[n-1])
2496         x_(n-1) = x ^ (1)
2497     a univariate in ZZ[x] remains. Return the content of this poly.
2498 */
fmpz_mpoly_ksub_content(fmpz_t content,const fmpz_mpoly_t A,const ulong * subdegs,const fmpz_mpoly_ctx_t ctx)2499 void fmpz_mpoly_ksub_content(
2500     fmpz_t content,
2501     const fmpz_mpoly_t A,
2502     const ulong * subdegs,
2503     const fmpz_mpoly_ctx_t ctx)
2504 {
2505     slong i, j;
2506     slong N = mpoly_words_per_exp_sp(A->bits, ctx->minfo);
2507     fmpz_mpoly_t T;
2508     fmpz_mpoly_ctx_t Tctx;
2509     fmpz_t e;
2510     fmpz * Acoeff = A->coeffs;
2511     ulong * Aexp = A->exps;
2512     ulong mask;
2513     slong * offsets, * shifts;
2514     TMP_INIT;
2515 
2516     TMP_START;
2517     fmpz_init(e);
2518 
2519     fmpz_mpoly_ctx_init(Tctx, 1, ORD_LEX);
2520     fmpz_mpoly_init(T, Tctx);
2521 
2522     mask = (-UWORD(1)) >> (FLINT_BITS - A->bits);
2523     offsets = (slong *) TMP_ALLOC(ctx->minfo->nvars*sizeof(slong));
2524     shifts = (slong *) TMP_ALLOC(ctx->minfo->nvars*sizeof(slong));
2525     for (j = 0; j < ctx->minfo->nvars; j++)
2526     {
2527         mpoly_gen_offset_shift_sp(offsets + j, shifts + j, j, A->bits, ctx->minfo);
2528     }
2529 
2530     for (i = 0; i < A->length; i++)
2531     {
2532         fmpz_zero(e);
2533         for (j = 0; j < ctx->minfo->nvars; j++)
2534         {
2535             fmpz_mul_ui(e, e, subdegs[j]);
2536             fmpz_add_ui(e, e, ((Aexp + N*i)[offsets[j]]>>shifts[j])&mask);
2537         }
2538         _fmpz_mpoly_push_exp_ffmpz(T, e, Tctx);
2539         fmpz_set(T->coeffs + T->length - 1, Acoeff + i);
2540     }
2541     fmpz_mpoly_sort_terms(T, Tctx);
2542     fmpz_mpoly_combine_like_terms(T, Tctx);
2543     _fmpz_vec_content(content, T->coeffs, T->length);
2544     fmpz_mpoly_clear(T, Tctx);
2545     fmpz_mpoly_ctx_clear(Tctx);
2546 
2547     fmpz_clear(e);
2548     TMP_END;
2549 }
2550 
2551 /*
2552     Check if H evaluated a random point (eval(H)) matches gcd(eval(A), eval(B))
2553     with leading coefficient eval(Gamma). The evaluations are in x_i.
2554 
2555     H, A, B are in Z[X,Y][x_0, ..., x_(n-1)]
2556     Gamma is in Z[x_0, ..., x_(n-1)]
2557 */
2558 typedef enum {
2559     random_check_good,
2560     random_check_point_not_found,
2561     random_check_image_degree_high,
2562     random_check_image_degree_low,
2563     random_check_image_no_match
2564 } random_check_ret_t;
2565 
_random_check_sp(ulong * GevaldegXY,ulong GdegboundXY,nmod_mpolyn_t Aeval_sp,nmod_mpolyn_t Beval_sp,nmod_mpolyn_t Geval_sp,nmod_mpolyn_t Abareval_sp,nmod_mpolyn_t Bbareval_sp,mp_limb_t * checkalpha_sp,const fmpz_mpolyu_t H,const fmpz_mpolyu_t A,const fmpz_mpolyu_t B,const fmpz_mpoly_t Gamma,const fmpz_mpoly_ctx_t ctx,const nmod_mpoly_ctx_t ctx_sp,flint_rand_t randstate,nmod_poly_stack_t Sp_sp)2566 random_check_ret_t static _random_check_sp(
2567     ulong * GevaldegXY,
2568     ulong GdegboundXY,
2569     nmod_mpolyn_t Aeval_sp,
2570     nmod_mpolyn_t Beval_sp,
2571     nmod_mpolyn_t Geval_sp,
2572     nmod_mpolyn_t Abareval_sp,
2573     nmod_mpolyn_t Bbareval_sp,
2574     mp_limb_t * checkalpha_sp,
2575     const fmpz_mpolyu_t H,
2576     const fmpz_mpolyu_t A,
2577     const fmpz_mpolyu_t B,
2578     const fmpz_mpoly_t Gamma,
2579     const fmpz_mpoly_ctx_t ctx,
2580     const nmod_mpoly_ctx_t ctx_sp,
2581     flint_rand_t randstate,
2582     nmod_poly_stack_t Sp_sp)
2583 {
2584     mp_limb_t Gammaeval_sp;
2585     int success;
2586     int point_try_count;
2587     slong i;
2588 
2589     /* try to test H at a random evaluation point */
2590     for (point_try_count = 0; point_try_count < 10; point_try_count++)
2591     {
2592         /* evaluate Gamma, A, and B at random point */
2593         for (i = 0; i < ctx->minfo->nvars; i++)
2594         {
2595             checkalpha_sp[i] = n_urandint(randstate, ctx_sp->ffinfo->mod.n);
2596         }
2597         fmpz_mpolyuu_eval_nmod(Aeval_sp, ctx_sp, A, checkalpha_sp, ctx);
2598         fmpz_mpolyuu_eval_nmod(Beval_sp, ctx_sp, B, checkalpha_sp, ctx);
2599 
2600         /* make sure that evaluation did not kill either lc(A) or lc(B) */
2601         if ( Aeval_sp->length == 0 || Beval_sp->length == 0
2602             || nmod_mpolyn_bidegree(Aeval_sp) != A->exps[0]
2603             || nmod_mpolyn_bidegree(Beval_sp) != B->exps[0])
2604         {
2605             continue;
2606         }
2607 
2608         /* Gamma is gcd(lc(A), lc(B)) so it evaluation should not be zero */
2609         Gammaeval_sp = fmpz_mpoly_eval_nmod(ctx_sp->ffinfo, Gamma,
2610                                                            checkalpha_sp, ctx);
2611         FLINT_ASSERT(Gammaeval_sp != 0);
2612 
2613         success = nmod_mpolyn_gcd_brown_smprime_bivar(Geval_sp,
2614                   Abareval_sp, Bbareval_sp, Aeval_sp, Beval_sp, ctx_sp, Sp_sp);
2615         if (!success)
2616         {
2617             continue;
2618         }
2619         nmod_mpolyn_scalar_mul_nmod(Geval_sp, Gammaeval_sp, ctx_sp);
2620 
2621         FLINT_ASSERT(Geval_sp->length > 0);
2622         *GevaldegXY = nmod_mpolyn_bidegree(Geval_sp);
2623 
2624         if (GdegboundXY < *GevaldegXY)
2625         {
2626             return random_check_image_degree_high;
2627         }
2628         else if (GdegboundXY > *GevaldegXY)
2629         {
2630             return random_check_image_degree_low;
2631         }
2632 
2633         /* reuse Bbareval for Heval */
2634         fmpz_mpolyuu_eval_nmod(Bbareval_sp, ctx_sp, H, checkalpha_sp, ctx);
2635 
2636         if (!nmod_mpolyn_equal(Bbareval_sp, Geval_sp, ctx_sp))
2637         {
2638             return random_check_image_no_match;
2639         }
2640 
2641         return random_check_good;
2642     }
2643 
2644     return random_check_point_not_found;
2645 }
2646 
_random_check(ulong * GevaldegXY,ulong GdegboundXY,fmpz_mod_mpolyn_t Aeval,fmpz_mod_mpolyn_t Beval,fmpz_mod_mpolyn_t Geval,fmpz_mod_mpolyn_t Abareval,fmpz_mod_mpolyn_t Bbareval,fmpz_t Gammaeval,fmpz * checkalpha,const fmpz_mpolyu_t H,const fmpz_mpolyu_t A,const fmpz_mpolyu_t B,const fmpz_mpoly_t Gamma,const fmpz_mpoly_ctx_t ctx,const fmpz_mod_mpoly_ctx_t ctx_mp,flint_rand_t randstate)2647 random_check_ret_t static _random_check(
2648     ulong * GevaldegXY,
2649     ulong GdegboundXY,
2650     fmpz_mod_mpolyn_t Aeval,
2651     fmpz_mod_mpolyn_t Beval,
2652     fmpz_mod_mpolyn_t Geval,
2653     fmpz_mod_mpolyn_t Abareval,
2654     fmpz_mod_mpolyn_t Bbareval,
2655     fmpz_t Gammaeval,
2656     fmpz * checkalpha,
2657     const fmpz_mpolyu_t H,
2658     const fmpz_mpolyu_t A,
2659     const fmpz_mpolyu_t B,
2660     const fmpz_mpoly_t Gamma,
2661     const fmpz_mpoly_ctx_t ctx,
2662     const fmpz_mod_mpoly_ctx_t ctx_mp,
2663     flint_rand_t randstate)
2664 {
2665     int success;
2666     int point_try_count;
2667     slong i;
2668 
2669     /* try to test H at a random evaluation point */
2670     for (point_try_count = 0; point_try_count < 10; point_try_count++)
2671     {
2672         /* evaluate Gamma, A, and B at random point */
2673         for (i = 0; i < ctx->minfo->nvars; i++)
2674         {
2675             fmpz_randm(checkalpha + i, randstate, fmpz_mod_ctx_modulus(ctx_mp->ffinfo));
2676         }
2677         fmpz_mpolyuu_eval_fmpz_mod(Aeval, ctx_mp, A, checkalpha, ctx);
2678         fmpz_mpolyuu_eval_fmpz_mod(Beval, ctx_mp, B, checkalpha, ctx);
2679 
2680         /* make sure that evaluation did not kill either lc(A) or lc(B) */
2681         if ( Aeval->length == 0 || Beval->length == 0
2682             || fmpz_mod_mpolyn_bidegree(Aeval) != A->exps[0]
2683             || fmpz_mod_mpolyn_bidegree(Beval) != B->exps[0])
2684         {
2685             continue;
2686         }
2687 
2688         /* Gamma is gcd(lc(A), lc(B)) so it evaluation should not be zero */
2689         fmpz_mpoly_eval_fmpz_mod(Gammaeval, ctx_mp->ffinfo, Gamma, checkalpha, ctx);
2690         FLINT_ASSERT(!fmpz_is_zero(Gammaeval));
2691 
2692         success = fmpz_mod_mpolyn_gcd_brown_bivar(Geval, Abareval, Bbareval,
2693                                                          Aeval, Beval, ctx_mp);
2694         if (!success)
2695         {
2696             continue;
2697         }
2698         fmpz_mod_mpolyn_scalar_mul_fmpz_mod(Geval, Gammaeval, ctx_mp);
2699 
2700         FLINT_ASSERT(Geval->length > 0);
2701         *GevaldegXY = fmpz_mod_mpolyn_bidegree(Geval);
2702 
2703         if (GdegboundXY < *GevaldegXY)
2704         {
2705             return random_check_image_degree_high;
2706         }
2707         else if (GdegboundXY > *GevaldegXY)
2708         {
2709             return random_check_image_degree_low;
2710         }
2711 
2712         /* reuse Bbareval for Heval */
2713         fmpz_mpolyuu_eval_fmpz_mod(Bbareval, ctx_mp, H, checkalpha, ctx);
2714         if (!fmpz_mod_mpolyn_equal(Bbareval, Geval, ctx_mp))
2715         {
2716             return random_check_image_no_match;
2717         }
2718 
2719         return random_check_good;
2720     }
2721 
2722     return random_check_point_not_found;
2723 }
2724 
2725 
fmpz_mpolyuu_gcd_berlekamp_massey(fmpz_mpolyu_t G,fmpz_mpolyu_t Abar,fmpz_mpolyu_t Bbar,fmpz_mpolyu_t A,fmpz_mpolyu_t B,const fmpz_mpoly_t Gamma,const fmpz_mpoly_ctx_t ctx)2726 int fmpz_mpolyuu_gcd_berlekamp_massey(
2727     fmpz_mpolyu_t G,
2728     fmpz_mpolyu_t Abar,
2729     fmpz_mpolyu_t Bbar,
2730     fmpz_mpolyu_t A,
2731     fmpz_mpolyu_t B,
2732     const fmpz_mpoly_t Gamma,
2733     const fmpz_mpoly_ctx_t ctx)
2734 {
2735     int changed, success, point_try_count;
2736     flint_bitcnt_t bits = A->bits, Hbits;
2737     mpoly_bma_interpolate_ctx_t Ictx;
2738     nmod_mpoly_ctx_t ctx_sp;
2739     fmpz_mod_mpoly_ctx_t ctx_mp;
2740     fmpz_mpolyu_t H;
2741     fmpz_mpoly_t Hcontent;
2742     /* multi precision workspace */
2743     fmpz_mod_bma_mpoly_t Lambda;
2744     fmpz_mod_mpolyn_t Aeval, Beval, Geval, Abareval, Bbareval;
2745     fmpz_mpolycu_t Ainc, Acur, Binc, Bcur, Ared, Bred;
2746     fmpz_mpolyc_t Gammainc, Gammacur, Gammared;
2747     fmpz_t p, pm1, sshift, last_unlucky_sshift_plus_1, image_count;
2748     fmpz * checkalpha;
2749     /* single precision workspace */
2750     nmod_poly_stack_t Sp_sp;
2751     nmod_bma_mpoly_t Lambda_sp;
2752     nmod_mpolyn_t Aeval_sp, Beval_sp, Geval_sp, Abareval_sp, Bbareval_sp;
2753     nmod_mpolycu_t Ainc_sp, Acur_sp, Binc_sp, Bcur_sp, Ared_sp, Bred_sp;
2754     nmod_mpolyc_t Gammainc_sp, Gammacur_sp, Gammared_sp;
2755     mp_limb_t p_sp, sshift_sp, last_unlucky_sshift_plus_1_sp, image_count_sp;
2756     fmpz_t Gammaeval;
2757     mp_limb_t Gammaeval_sp;
2758     mp_limb_t * checkalpha_sp;
2759     /* misc */
2760     slong i, j;
2761     ulong GdegboundXY, GevaldegXY;
2762     slong * Gdegbounds, * Adegs, * Bdegs, * Gammadegs;
2763     flint_rand_t randstate;
2764     fmpz_t subprod, cAksub, cBksub;
2765     int unlucky_count;
2766     fmpz_t Hmodulus;
2767     nmod_zip_mpolyu_t Z;
2768     slong zip_evals;
2769     ulong ABtotal_length;
2770 
2771     FLINT_ASSERT(bits == A->bits);
2772     FLINT_ASSERT(bits == B->bits);
2773     FLINT_ASSERT(bits == G->bits);
2774     FLINT_ASSERT(bits == Abar->bits);
2775     FLINT_ASSERT(bits == Bbar->bits);
2776     FLINT_ASSERT(bits == Gamma->bits);
2777 
2778     /* let's initialize everything at once to avoid complicated cleanup */
2779 
2780     ABtotal_length = 0;
2781     for (i = 0; i < A->length; i++)
2782         ABtotal_length += (A->coeffs + i)->length;
2783     for (i = 0; i < B->length; i++)
2784         ABtotal_length += (B->coeffs + i)->length;
2785 
2786     flint_randinit(randstate);
2787     fmpz_init(p);
2788     fmpz_init(pm1); /* p - 1 */
2789     fmpz_init(image_count);
2790     fmpz_init(subprod);
2791     fmpz_init(cAksub);
2792     fmpz_init(cBksub);
2793     fmpz_init(sshift);
2794     fmpz_init(last_unlucky_sshift_plus_1);
2795     fmpz_init(Gammaeval);
2796     fmpz_init(Hmodulus);
2797 
2798     mpoly_bma_interpolate_ctx_init(Ictx, ctx->minfo->nvars);
2799 
2800     /* multiprecision workspace */
2801     fmpz_set_ui(p, 2);    /* modulus no care */
2802     fmpz_mod_mpoly_ctx_init(ctx_mp, 2, ORD_LEX, p); /* modulus no care */
2803     fmpz_mod_bma_mpoly_init(Lambda);
2804 
2805     fmpz_mod_mpolyn_init(Aeval, FLINT_BITS/2, ctx_mp);
2806     fmpz_mod_mpolyn_init(Beval, FLINT_BITS/2, ctx_mp);
2807     fmpz_mod_mpolyn_init(Geval, FLINT_BITS/2, ctx_mp);
2808     fmpz_mod_mpolyn_init(Abareval, FLINT_BITS/2, ctx_mp);
2809     fmpz_mod_mpolyn_init(Bbareval, FLINT_BITS/2, ctx_mp);
2810     fmpz_mpolyc_init(Gammainc);
2811     fmpz_mpolyc_init(Gammacur);
2812     fmpz_mpolyc_init(Gammared);
2813     fmpz_mpolycu_init(Ainc);
2814     fmpz_mpolycu_init(Acur);
2815     fmpz_mpolycu_init(Ared);
2816     fmpz_mpolycu_init(Binc);
2817     fmpz_mpolycu_init(Bcur);
2818     fmpz_mpolycu_init(Bred);
2819     checkalpha = (fmpz *) flint_malloc(ctx->minfo->nvars*sizeof(fmpz));
2820     for (i = 0; i < ctx->minfo->nvars; i++)
2821     {
2822         fmpz_init(checkalpha + i);
2823     }
2824 
2825     /* machine precision workspace "sp" */
2826     nmod_mpoly_ctx_init(ctx_sp, 2, ORD_LEX, 2); /* modulus no care */
2827     nmod_poly_stack_init(Sp_sp, FLINT_BITS/2, ctx_sp);
2828     nmod_bma_mpoly_init(Lambda_sp);
2829 
2830     nmod_mpolyn_init(Aeval_sp, FLINT_BITS/2, ctx_sp);
2831     nmod_mpolyn_init(Beval_sp, FLINT_BITS/2, ctx_sp);
2832     nmod_mpolyn_init(Geval_sp, FLINT_BITS/2, ctx_sp);
2833     nmod_mpolyn_init(Abareval_sp, FLINT_BITS/2, ctx_sp);
2834     nmod_mpolyn_init(Bbareval_sp, FLINT_BITS/2, ctx_sp);
2835     nmod_mpolyc_init(Gammainc_sp);
2836     nmod_mpolyc_init(Gammacur_sp);
2837     nmod_mpolyc_init(Gammared_sp);
2838     nmod_mpolycu_init(Ainc_sp);
2839     nmod_mpolycu_init(Acur_sp);
2840     nmod_mpolycu_init(Ared_sp);
2841     nmod_mpolycu_init(Binc_sp);
2842     nmod_mpolycu_init(Bcur_sp);
2843     nmod_mpolycu_init(Bred_sp);
2844     checkalpha_sp = (mp_limb_t *) flint_malloc(ctx->minfo->nvars*sizeof(mp_limb_t));
2845 
2846     /* the zippler */
2847     nmod_zip_mpolyu_init(Z);
2848 
2849     Gdegbounds = (slong *) flint_malloc(ctx->minfo->nvars*sizeof(slong));
2850     Adegs      = (slong *) flint_malloc(ctx->minfo->nvars*sizeof(slong));
2851     Bdegs      = (slong *) flint_malloc(ctx->minfo->nvars*sizeof(slong));
2852     Gammadegs  = (slong *) flint_malloc(ctx->minfo->nvars*sizeof(slong));
2853 
2854     /* find a degree bound on G in the two main variables */
2855     GdegboundXY = FLINT_MIN(A->exps[0], B->exps[0]);
2856     p_sp = UWORD(1) << (FLINT_BITS - 2);
2857     for (point_try_count = 0; point_try_count < 10; point_try_count++)
2858     {
2859         p_sp = n_nextprime(p_sp, 1);
2860         nmod_mpoly_ctx_set_modulus(ctx_sp, p_sp);
2861         /* unfortunate nmod_poly's need mod set */
2862         nmod_poly_stack_set_ctx(Sp_sp, ctx_sp);
2863         nmod_mpolyn_set_mod(Aeval_sp, ctx_sp->ffinfo->mod);
2864         nmod_mpolyn_set_mod(Beval_sp, ctx_sp->ffinfo->mod);
2865         nmod_mpolyn_set_mod(Geval_sp, ctx_sp->ffinfo->mod);
2866         nmod_mpolyn_set_mod(Abareval_sp, ctx_sp->ffinfo->mod);
2867         nmod_mpolyn_set_mod(Bbareval_sp, ctx_sp->ffinfo->mod);
2868         for (i = 0; i < ctx->minfo->nvars; i++)
2869         {
2870             checkalpha_sp[i] = n_urandint(randstate, p_sp);
2871         }
2872         fmpz_mpolyuu_eval_nmod(Aeval_sp, ctx_sp, A, checkalpha_sp, ctx);
2873         fmpz_mpolyuu_eval_nmod(Beval_sp, ctx_sp, B, checkalpha_sp, ctx);
2874 
2875         if (Aeval_sp->length == 0 || Beval_sp->length == 0
2876             || nmod_mpolyn_bidegree(Aeval_sp) != A->exps[0]
2877             || nmod_mpolyn_bidegree(Beval_sp) != B->exps[0])
2878         {
2879             /* evaluation killed at least one of lc(A) or lc(B) */
2880             continue;
2881         }
2882         success = nmod_mpolyn_gcd_brown_smprime_bivar(Geval_sp,
2883                   Abareval_sp, Bbareval_sp, Aeval_sp, Beval_sp, ctx_sp, Sp_sp);
2884         if (success)
2885         {
2886             FLINT_ASSERT(Geval_sp->length > 0);
2887             GdegboundXY = nmod_mpolyn_bidegree(Geval_sp);
2888             break;
2889         }
2890     }
2891 
2892     /*
2893         Find degree bounds on G wrt lesser variables so that
2894             Gdegbounds[i] >= deg_(x_i)(G)
2895         Also fills in
2896             Adegs[i] = deg_(x_i)(A)
2897             Bdegs[i] = deg_(x_i)(B)
2898     */
2899     mpoly_degrees_si(Gammadegs, Gamma->exps, Gamma->length, bits, ctx->minfo);
2900     for (i = 0; i < ctx->minfo->nvars; i++)
2901     {
2902         Gdegbounds[i] = fmpz_mpolyuu_gcd_degree_bound_minor(
2903                                 Adegs + i, Bdegs + i, A, B, i, ctx, randstate);
2904     }
2905 
2906     /*
2907         Find bits into which H can be packed. The degrees satsify
2908             deg_(x_i)(H) <= deg_(x_i)(A)
2909             deg_(x_i)(H) <= deg_(x_i)(B)
2910             deg_(x_i)(H) <= deg_(x_i)(Gamma) + deg_(x_i)(G)
2911     */
2912     Hbits = bits;
2913     for (i = 0; i < ctx->minfo->nvars; i++)
2914     {
2915         flint_bitcnt_t Hibits;
2916         Ictx->degbounds[i] = FLINT_MIN(Adegs[i], Bdegs[i]);
2917         Ictx->degbounds[i] = FLINT_MIN(Ictx->degbounds[i], Gdegbounds[i] + Gammadegs[i]);
2918         Hibits = 1 + FLINT_BIT_COUNT(Ictx->degbounds[i]);
2919         Hbits = FLINT_MAX(Hbits, Hibits);
2920 
2921         /* degbounds[i] will be a strict degree bound on deg_(x_i)(H) */
2922         Ictx->degbounds[i]++;
2923     }
2924 
2925     fmpz_mpolyu_init(Abar, bits, ctx);
2926     fmpz_mpolyu_init(Bbar, bits, ctx);
2927     fmpz_mpolyu_init(H, Hbits, ctx);
2928     fmpz_mpoly_init3(Hcontent, 0, Hbits, ctx);
2929 
2930     /* initialization done! */
2931 
2932     if (GdegboundXY == 0)
2933     {
2934         fmpz_mpolyu_one(G, ctx);
2935         fmpz_mpolyu_swap(Abar, A, ctx);
2936         fmpz_mpolyu_swap(Bbar, B, ctx);
2937         success = 1;
2938         goto cleanup;
2939     }
2940 
2941     if (Hbits > FLINT_BITS)
2942     {
2943         /* H cannot be guaranteed to be packed into FLINT_BITS - absolute falure */
2944         success = 0;
2945         goto cleanup;
2946     }
2947 
2948     /* initial choices for the ksub degrees are the strict degree bounds on H */
2949     for (i = 0; i < ctx->minfo->nvars; i++)
2950     {
2951         Ictx->subdegs[i] = Ictx->degbounds[i];
2952     }
2953     goto got_ksub;
2954 
2955 pick_ksub:
2956 
2957     if (ctx->minfo->nvars > 1)
2958     {
2959         /* just increment the smallest subdegs[j] */
2960         j = 1;
2961         for (i = 2; i < ctx->minfo->nvars; i++)
2962         {
2963             if (Ictx->subdegs[i] < Ictx->subdegs[j])
2964             {
2965                 j = i;
2966             }
2967         }
2968         Ictx->subdegs[j]++;
2969     }
2970 
2971 got_ksub:
2972 
2973     fmpz_one(subprod);
2974     for (i = 0; i < ctx->minfo->nvars; i++)
2975     {
2976         if ((slong)(Ictx->subdegs[i]) < 0)
2977         {
2978             /* ksub has overflown - absolute falure */
2979             success = 0;
2980             goto cleanup;
2981         }
2982         fmpz_mul_ui(subprod, subprod, Ictx->subdegs[i]);
2983     }
2984 
2985     /* see if the ksub killed either lc(A) or lc(B) */
2986     fmpz_mpoly_ksub_content(cAksub, A->coeffs + 0, Ictx->subdegs, ctx);
2987     fmpz_mpoly_ksub_content(cBksub, B->coeffs + 0, Ictx->subdegs, ctx);
2988     if (fmpz_is_zero(cAksub) || fmpz_is_zero(cBksub))
2989     {
2990         /* try a new substitution if we killed either leading coefficient */
2991         goto pick_ksub;
2992     }
2993 
2994 pick_bma_prime:
2995 
2996     if (fmpz_cmp_ui(p, ABtotal_length) < 0)
2997         fmpz_set_ui(p, ABtotal_length);
2998 
2999     if (fmpz_cmp(p, subprod) < 0)
3000         fmpz_set(p, subprod);
3001 
3002     success = fmpz_next_smooth_prime(p, p);
3003     fmpz_sub_ui(pm1, p, 1);
3004     if (!success)
3005     {
3006         /* ran out of smooth primes - absolute falure */
3007         success = 0;
3008         goto cleanup;
3009     }
3010 
3011     /* make sure reduction does not kill either leading coeff after ksub */
3012     if (fmpz_divisible(cAksub, p) || fmpz_divisible(cBksub, p))
3013     {
3014         goto pick_bma_prime;
3015     }
3016 
3017     /* make sure p does not divide any coefficient of Gamma */
3018     for (i = 0; i < Gamma->length; i++)
3019     {
3020         if (fmpz_divisible(Gamma->coeffs + i, p))
3021         {
3022             goto pick_bma_prime;
3023         }
3024     }
3025 
3026     if (fmpz_abs_fits_ui(p))
3027     {
3028         p_sp = fmpz_get_ui(p);
3029         sshift_sp = 1;
3030 
3031         unlucky_count = 0;
3032         last_unlucky_sshift_plus_1_sp = 0;
3033 
3034         nmod_mpoly_ctx_set_modulus(ctx_sp, p_sp);
3035         nmod_discrete_log_pohlig_hellman_precompute_prime(Ictx->dlogenv_sp, p_sp);
3036 
3037         nmod_bma_mpoly_reset_prime(Lambda_sp, ctx_sp->ffinfo);
3038         nmod_bma_mpoly_zero(Lambda_sp);
3039 
3040         /* unfortunate nmod_poly's store their own ctx :( */
3041         nmod_poly_stack_set_ctx(Sp_sp, ctx_sp);
3042         nmod_mpolyn_set_mod(Aeval_sp, ctx_sp->ffinfo->mod);
3043         nmod_mpolyn_set_mod(Beval_sp, ctx_sp->ffinfo->mod);
3044         nmod_mpolyn_set_mod(Geval_sp, ctx_sp->ffinfo->mod);
3045         nmod_mpolyn_set_mod(Abareval_sp, ctx_sp->ffinfo->mod);
3046         nmod_mpolyn_set_mod(Bbareval_sp, ctx_sp->ffinfo->mod);
3047 
3048         FLINT_ASSERT(sshift_sp == 1);
3049         nmod_mpoly_bma_interpolate_alpha_powers(checkalpha_sp, sshift_sp,
3050                                                     Ictx, ctx, ctx_sp->ffinfo);
3051 
3052         /* set evaluation of monomials */
3053         nmod_mpoly_set_skel(Gammainc_sp, ctx_sp, Gamma, checkalpha_sp, ctx);
3054         nmod_mpolyu_set_skel(Ainc_sp, ctx_sp, A, checkalpha_sp, ctx);
3055         nmod_mpolyu_set_skel(Binc_sp, ctx_sp, B, checkalpha_sp, ctx);
3056 
3057         /* set reduction of coeffs */
3058         nmod_mpoly_red_skel(Gammared_sp, Gamma, ctx_sp->ffinfo);
3059         nmod_mpolyu_red_skel(Ared_sp, A, ctx_sp->ffinfo);
3060         nmod_mpolyu_red_skel(Bred_sp, B, ctx_sp->ffinfo);
3061 
3062         /* copy evaluation of monomials */
3063         nmod_mpoly_copy_skel(Gammacur_sp, Gammainc_sp);
3064         nmod_mpolyu_copy_skel(Acur_sp, Ainc_sp);
3065         nmod_mpolyu_copy_skel(Bcur_sp, Binc_sp);
3066 
3067         image_count_sp = 0;
3068 
3069     next_bma_image_sp:
3070 
3071         /* image count is also the current power of alpha we are evaluating */
3072         image_count_sp++;
3073 
3074         FLINT_ASSERT(sshift_sp + Lambda_sp->pointcount == image_count_sp);
3075 
3076         if (image_count_sp >= p_sp - 1)
3077         {
3078             /* out of evaluation points alpha^image_count in Fp* */
3079             goto pick_bma_prime;
3080         }
3081 
3082         Gammaeval_sp = nmod_mpoly_use_skel_mul(Gammared_sp, Gammacur_sp,
3083                                                   Gammainc_sp, ctx_sp->ffinfo);
3084         nmod_mpolyuu_use_skel_mul(Aeval_sp, A, Ared_sp, Acur_sp, Ainc_sp, ctx_sp);
3085         nmod_mpolyuu_use_skel_mul(Beval_sp, B, Bred_sp, Bcur_sp, Binc_sp, ctx_sp);
3086 
3087         if (Aeval_sp->length == 0 || Beval_sp->length == 0
3088             || nmod_mpolyn_bidegree(Aeval_sp) != A->exps[0]
3089             || nmod_mpolyn_bidegree(Beval_sp) != B->exps[0])
3090         {
3091             /* evaluation killed either lc(A) or lc(B) */
3092             sshift_sp += Lambda_sp->pointcount + 1;
3093             nmod_bma_mpoly_zero(Lambda_sp);
3094             goto next_bma_image_sp;
3095         }
3096 
3097         /* the evaluation killed neither lc(A) nor lc(B) */
3098         FLINT_ASSERT(Gammaeval_sp != 0);
3099 
3100         success = nmod_mpolyn_gcd_brown_smprime_bivar(Geval_sp,
3101                   Abareval_sp, Bbareval_sp, Aeval_sp, Beval_sp, ctx_sp, Sp_sp);
3102         if (!success)
3103         {
3104             sshift_sp += Lambda->pointcount + 1;
3105             nmod_bma_mpoly_zero(Lambda_sp);
3106             goto next_bma_image_sp;
3107         }
3108 
3109         FLINT_ASSERT(Geval_sp->length > 0);
3110         GevaldegXY = nmod_mpolyn_bidegree(Geval_sp);
3111         nmod_mpolyn_scalar_mul_nmod(Geval_sp, Gammaeval_sp, ctx_sp);
3112 
3113         FLINT_ASSERT(Gammaeval_sp == nmod_mpolyn_leadcoeff(Geval_sp, ctx_sp));
3114 
3115         if (GdegboundXY < GevaldegXY)
3116         {
3117             /* this image in Fp[X,Y] was unlucky */
3118             if (sshift_sp == last_unlucky_sshift_plus_1_sp)
3119             {
3120                 /* this ksub is probably unlucky */
3121                 goto pick_ksub;
3122             }
3123             if (++unlucky_count > 2)
3124             {
3125                 goto pick_bma_prime;
3126             }
3127             last_unlucky_sshift_plus_1_sp = sshift_sp + 1;
3128             sshift_sp += Lambda_sp->pointcount + 1;
3129             nmod_bma_mpoly_zero(Lambda_sp);
3130             goto next_bma_image_sp;
3131         }
3132         else if (GdegboundXY > GevaldegXY)
3133         {
3134             /* new bound on deg_XY(G) */
3135             sshift_sp += Lambda_sp->pointcount;
3136             nmod_bma_mpoly_zero(Lambda_sp);
3137             nmod_bma_mpoly_add_point(Lambda_sp, Geval_sp, ctx_sp);
3138             GdegboundXY = GevaldegXY;
3139             if (GdegboundXY == 0)
3140             {
3141                 fmpz_mpolyu_one(G, ctx);
3142                 fmpz_mpolyu_swap(Abar, A, ctx);
3143                 fmpz_mpolyu_swap(Bbar, B, ctx);
3144                 success = 1;
3145                 goto cleanup;
3146             }
3147             goto next_bma_image_sp;
3148         }
3149 
3150         nmod_bma_mpoly_add_point(Lambda_sp, Geval_sp, ctx_sp);
3151         if ((Lambda_sp->pointcount & 1) != 0
3152             || Gamma->length > Lambda_sp->pointcount/2)
3153         {
3154             goto next_bma_image_sp;
3155         }
3156 
3157         changed = nmod_bma_mpoly_reduce(Lambda_sp);
3158         if (changed)
3159         {
3160             goto next_bma_image_sp;
3161         }
3162 
3163         success = nmod_bma_mpoly_get_fmpz_mpolyu(H, ctx, sshift_sp,
3164                                               Lambda_sp, Ictx, ctx_sp->ffinfo);
3165         if (!success
3166             || H->length == 0
3167             || (H->coeffs + 0)->length != Gamma->length)
3168         {
3169             goto next_bma_image_sp;
3170         }
3171 
3172         /* GdegboundXY should be the bidegree of H */
3173         FLINT_ASSERT(GdegboundXY == H->exps[0]);
3174 
3175         switch (_random_check_sp(&GevaldegXY, GdegboundXY,
3176                     Aeval_sp, Beval_sp, Geval_sp, Abareval_sp, Bbareval_sp,
3177                  checkalpha_sp, H, A, B, Gamma, ctx, ctx_sp, randstate, Sp_sp))
3178         {
3179             default:
3180                 FLINT_ASSERT(0);
3181             case random_check_image_no_match:
3182             case random_check_image_degree_high:
3183                 goto next_bma_image_sp;
3184             case random_check_image_degree_low:
3185                 /* the random evaluation point gave us a better degree bound */
3186                 sshift_sp += Lambda_sp->pointcount;
3187                 nmod_bma_mpoly_zero(Lambda_sp);
3188                 GdegboundXY = GevaldegXY;
3189                 if (GdegboundXY == 0)
3190                 {
3191                     fmpz_mpolyu_one(G, ctx);
3192                     fmpz_mpolyu_swap(Abar, A, ctx);
3193                     fmpz_mpolyu_swap(Bbar, B, ctx);
3194                     success = 1;
3195                     goto cleanup;
3196                 }
3197                 goto next_bma_image_sp;
3198             case random_check_point_not_found:
3199                 /* hmmm */
3200             case random_check_good:
3201                 NULL;
3202         }
3203     }
3204     else
3205     {
3206         fmpz_one(sshift);
3207 
3208         unlucky_count = 0;
3209         fmpz_zero(last_unlucky_sshift_plus_1);
3210 
3211         fmpz_mod_ctx_set_modulus(ctx_mp->ffinfo, p);
3212         fmpz_mod_discrete_log_pohlig_hellman_precompute_prime(Ictx->dlogenv, p);
3213         fmpz_mod_bma_mpoly_reset_prime(Lambda, ctx_mp->ffinfo);
3214         fmpz_mod_bma_mpoly_zero(Lambda);
3215 
3216         /* unfortunate fmpz_mod_poly's store their own ctx :( */
3217         fmpz_mod_mpolyn_set_modulus(Aeval, ctx_mp->ffinfo);
3218         fmpz_mod_mpolyn_set_modulus(Beval, ctx_mp->ffinfo);
3219         fmpz_mod_mpolyn_set_modulus(Geval, ctx_mp->ffinfo);
3220         fmpz_mod_mpolyn_set_modulus(Abareval, ctx_mp->ffinfo);
3221         fmpz_mod_mpolyn_set_modulus(Bbareval, ctx_mp->ffinfo);
3222 
3223         FLINT_ASSERT(fmpz_is_one(sshift));
3224         fmpz_mod_mpoly_bma_interpolate_alpha_powers(checkalpha, sshift,
3225                                                     Ictx, ctx, ctx_mp->ffinfo);
3226 
3227         /* set evaluation of monomials */
3228         fmpz_mod_mpoly_set_skel(Gammainc, ctx_mp, Gamma, checkalpha, ctx);
3229         fmpz_mod_mpolyu_set_skel(Ainc, ctx_mp, A, checkalpha, ctx);
3230         fmpz_mod_mpolyu_set_skel(Binc, ctx_mp, B, checkalpha, ctx);
3231 
3232         /* set reduction of coeffs */
3233         fmpz_mod_mpoly_red_skel(Gammared, Gamma, ctx_mp->ffinfo);
3234         fmpz_mod_mpolyu_red_skel(Ared, A, ctx_mp->ffinfo);
3235         fmpz_mod_mpolyu_red_skel(Bred, B, ctx_mp->ffinfo);
3236 
3237         /* copy evaluation of monomials */
3238         fmpz_mod_mpoly_copy_skel(Gammacur, Gammainc);
3239         fmpz_mod_mpolyu_copy_skel(Acur, Ainc);
3240         fmpz_mod_mpolyu_copy_skel(Bcur, Binc);
3241 
3242         fmpz_zero(image_count);
3243 
3244     next_bma_image:
3245 
3246         /* image count is also the current power of alpha we are evaluating */
3247         fmpz_add_ui(image_count, image_count, 1);
3248 
3249     #if WANT_ASSERT
3250         /* image_count == sshift + Lambda->pointcount */
3251         {
3252             fmpz_t t;
3253             fmpz_init(t);
3254             fmpz_add_ui(t, sshift, Lambda->pointcount);
3255             FLINT_ASSERT(fmpz_equal(t, image_count));
3256             fmpz_clear(t);
3257         }
3258     #endif
3259 
3260         if (fmpz_cmp(image_count, pm1) >= 0)
3261         {
3262             /* out of evaluation points alpha^image_count in Fp* */
3263             goto pick_bma_prime;
3264         }
3265 
3266         fmpz_mod_mpoly_use_skel_mul(Gammaeval, Gammared, Gammacur, Gammainc,
3267                                                                ctx_mp->ffinfo);
3268         fmpz_mod_mpolyuu_use_skel_mul(Aeval, A, Ared, Acur, Ainc, ctx_mp);
3269         fmpz_mod_mpolyuu_use_skel_mul(Beval, B, Bred, Bcur, Binc, ctx_mp);
3270         if (Aeval->length == 0 || Beval->length == 0
3271             || fmpz_mod_mpolyn_bidegree(Aeval) != A->exps[0]
3272             || fmpz_mod_mpolyn_bidegree(Beval) != B->exps[0])
3273         {
3274             /* evaluation killed either lc(A) or lc(B) */
3275             fmpz_add_ui(sshift, sshift, Lambda->pointcount + 1);
3276             fmpz_mod_bma_mpoly_zero(Lambda);
3277             goto next_bma_image;
3278         }
3279 
3280         /* the evaluation killed neither lc(A) nor lc(B) */
3281         FLINT_ASSERT(!fmpz_is_zero(Gammaeval));
3282 
3283         success = fmpz_mod_mpolyn_gcd_brown_bivar(Geval, Abareval, Bbareval,
3284                                                          Aeval, Beval, ctx_mp);
3285         if (!success)
3286         {
3287             fmpz_add_ui(sshift, sshift, Lambda->pointcount + 1);
3288             fmpz_mod_bma_mpoly_zero(Lambda);
3289             goto next_bma_image;
3290         }
3291 
3292         FLINT_ASSERT(Geval->length > 0);
3293         GevaldegXY = fmpz_mod_mpolyn_bidegree(Geval);
3294         fmpz_mod_mpolyn_scalar_mul_fmpz_mod(Geval, Gammaeval, ctx_mp);
3295 
3296         FLINT_ASSERT(fmpz_equal(Gammaeval, fmpz_mod_mpolyn_leadcoeff_last_ref(Geval, ctx_mp)));
3297 
3298         if (GdegboundXY < GevaldegXY)
3299         {
3300             /* this image in Fp[X,Y] was unlucky */
3301             if (fmpz_equal(sshift, last_unlucky_sshift_plus_1))
3302             {
3303                 /* this ksub is probably unlucky */
3304                 goto pick_ksub;
3305             }
3306             if (++unlucky_count > 2)
3307             {
3308                 goto pick_bma_prime;
3309             }
3310             fmpz_add_ui(last_unlucky_sshift_plus_1, sshift, 1);
3311             fmpz_add_ui(sshift, sshift, Lambda->pointcount + 1);
3312             fmpz_mod_bma_mpoly_zero(Lambda);
3313             goto next_bma_image;
3314         }
3315         else if (GdegboundXY > GevaldegXY)
3316         {
3317             /* new bound on deg_XY(G) */
3318             fmpz_add_ui(sshift, sshift, Lambda->pointcount);
3319             fmpz_mod_bma_mpoly_zero(Lambda);
3320             fmpz_mod_bma_mpoly_add_point(Lambda, Geval, ctx_mp);
3321             GdegboundXY = GevaldegXY;
3322             if (GdegboundXY == 0)
3323             {
3324                 fmpz_mpolyu_one(G, ctx);
3325                 fmpz_mpolyu_swap(Abar, A, ctx);
3326                 fmpz_mpolyu_swap(Bbar, B, ctx);
3327                 success = 1;
3328                 goto cleanup;
3329             }
3330             goto next_bma_image;
3331         }
3332 
3333         fmpz_mod_bma_mpoly_add_point(Lambda, Geval, ctx_mp);
3334         if (   (Lambda->pointcount & 1) != 0
3335             || Gamma->length > Lambda->pointcount/2)
3336         {
3337             goto next_bma_image;
3338         }
3339 
3340         changed = fmpz_mod_bma_mpoly_reduce(Lambda);
3341         if (changed)
3342         {
3343             goto next_bma_image;
3344         }
3345 
3346         success = fmpz_mod_bma_mpoly_get_fmpz_mpolyu(H, ctx, sshift,
3347                                                  Lambda, Ictx, ctx_mp->ffinfo);
3348         if (!success
3349             || H->length == 0
3350             || (H->coeffs + 0)->length != Gamma->length)
3351         {
3352             goto next_bma_image;
3353         }
3354 
3355         /* GdegboundXY should be the bidegree of H */
3356         FLINT_ASSERT(GdegboundXY == H->exps[0]);
3357 
3358         switch (_random_check(&GevaldegXY, GdegboundXY,
3359                     Aeval, Beval, Geval, Abareval, Bbareval, Gammaeval,
3360                            checkalpha, H, A, B, Gamma, ctx, ctx_mp, randstate))
3361         {
3362             default:
3363                 FLINT_ASSERT(0);
3364             case random_check_image_no_match:
3365             case random_check_image_degree_high:
3366                 goto next_bma_image;
3367             case random_check_image_degree_low:
3368                 /* the random evaluation point gave us a better degree bound */
3369                 fmpz_add_ui(sshift, sshift, Lambda->pointcount);
3370                 fmpz_mod_bma_mpoly_zero(Lambda);
3371                 GdegboundXY = GevaldegXY;
3372                 if (GdegboundXY == 0)
3373                 {
3374                     fmpz_mpolyu_one(G, ctx);
3375                     fmpz_mpolyu_swap(Abar, A, ctx);
3376                     fmpz_mpolyu_swap(Bbar, B, ctx);
3377                     success = 1;
3378                     goto cleanup;
3379                 }
3380                 goto next_bma_image;
3381             case random_check_point_not_found:
3382                 /* hmmm */
3383             case random_check_good:
3384                 NULL;
3385         }
3386     }
3387 
3388     /* assume that H is correct modulo Hmodulus = p */
3389     fmpz_set(Hmodulus, p);
3390 
3391     /* find number of evals for zip interp */
3392     FLINT_ASSERT(H->length > 0);
3393     zip_evals = H->coeffs[0].length;
3394     for (i = 1; i < H->length; i++)
3395     {
3396         zip_evals = FLINT_MAX(zip_evals, H->coeffs[i].length);
3397     }
3398     zip_evals += 1; /* one extra check eval */
3399     nmod_zip_mpolyu_fit_poly(Z, H, zip_evals);
3400 
3401     p_sp = UWORD(1) << (FLINT_BITS - 2);
3402 
3403 pick_zip_prime:
3404     /*
3405         Get a new machine prime for zippel interpolation.
3406         H is currently interpolated modulo Hmodulus.
3407     */
3408     if (p_sp >= UWORD_MAX_PRIME)
3409     {
3410         /* ran out of machine primes - absolute failure */
3411         success = 0;
3412         goto cleanup;
3413     }
3414     p_sp = n_nextprime(p_sp, 1);
3415 
3416     if (0 == fmpz_fdiv_ui(Hmodulus, p_sp))
3417     {
3418         goto pick_zip_prime;
3419     }
3420 
3421     nmod_mpoly_ctx_set_modulus(ctx_sp, p_sp);
3422     /* unfortunate nmod_poly's need mod set */
3423     nmod_poly_stack_set_ctx(Sp_sp, ctx_sp);
3424     nmod_mpolyn_set_mod(Aeval_sp, ctx_sp->ffinfo->mod);
3425     nmod_mpolyn_set_mod(Beval_sp, ctx_sp->ffinfo->mod);
3426     nmod_mpolyn_set_mod(Geval_sp, ctx_sp->ffinfo->mod);
3427     nmod_mpolyn_set_mod(Abareval_sp, ctx_sp->ffinfo->mod);
3428     nmod_mpolyn_set_mod(Bbareval_sp, ctx_sp->ffinfo->mod);
3429 
3430     FLINT_ASSERT(p_sp > 3);
3431     for (i = 0; i < ctx->minfo->nvars; i++)
3432     {
3433         checkalpha_sp[i] = n_urandint(randstate, p_sp - 3) + 2;
3434     }
3435 
3436     /* set up the zippler */
3437     nmod_zip_mpolyu_set_skel(Z, ctx_sp, H, checkalpha_sp, ctx);
3438 
3439     /* set evaluation of monomials */
3440     nmod_mpoly_set_skel(Gammainc_sp, ctx_sp, Gamma, checkalpha_sp, ctx);
3441     nmod_mpolyu_set_skel(Ainc_sp, ctx_sp, A, checkalpha_sp, ctx);
3442     nmod_mpolyu_set_skel(Binc_sp, ctx_sp, B, checkalpha_sp, ctx);
3443 
3444     /* set reduction of coeffs */
3445     nmod_mpoly_red_skel(Gammared_sp, Gamma, ctx_sp->ffinfo);
3446     nmod_mpolyu_red_skel(Ared_sp, A, ctx_sp->ffinfo);
3447     nmod_mpolyu_red_skel(Bred_sp, B, ctx_sp->ffinfo);
3448 
3449     /* copy evaluation of monomials */
3450     nmod_mpoly_copy_skel(Gammacur_sp, Gammainc_sp);
3451     nmod_mpolyu_copy_skel(Acur_sp, Ainc_sp);
3452     nmod_mpolyu_copy_skel(Bcur_sp, Binc_sp);
3453 
3454 next_zip_image:
3455 
3456     Gammaeval_sp = nmod_mpoly_use_skel_mul(Gammared_sp, Gammacur_sp,
3457                                                   Gammainc_sp, ctx_sp->ffinfo);
3458     nmod_mpolyuu_use_skel_mul(Aeval_sp, A, Ared_sp, Acur_sp, Ainc_sp, ctx_sp);
3459     nmod_mpolyuu_use_skel_mul(Beval_sp, B, Bred_sp, Bcur_sp, Binc_sp, ctx_sp);
3460 
3461     if (Aeval_sp->length == 0 || Beval_sp->length == 0
3462         || nmod_mpolyn_bidegree(Aeval_sp) != A->exps[0]
3463         || nmod_mpolyn_bidegree(Beval_sp) != B->exps[0])
3464     {
3465         /* evaluation point killed lc(A) or lc(B) */
3466         goto pick_zip_prime;
3467     }
3468 
3469     /* the evaluation killed neither lc(A) nor lc(B) */
3470     FLINT_ASSERT(Gammaeval_sp != 0);
3471 
3472     success = nmod_mpolyn_gcd_brown_smprime_bivar(Geval_sp,
3473                   Abareval_sp, Bbareval_sp, Aeval_sp, Beval_sp, ctx_sp, Sp_sp);
3474     if (!success)
3475     {
3476         /* choose a bigger p if bivar gcd failed */
3477         goto pick_zip_prime;
3478     }
3479 
3480     FLINT_ASSERT(Geval_sp->length > 0);
3481     GevaldegXY = nmod_mpolyn_bidegree(Geval_sp);
3482 
3483     if (GevaldegXY > GdegboundXY)
3484     {
3485         /* this image in Fp'[X,Y] was unlucky */
3486         goto pick_zip_prime;
3487     }
3488     else if (GevaldegXY < GdegboundXY)
3489     {
3490         /* we have a new degree bound on deg_XY(G) */
3491         GdegboundXY = GevaldegXY;
3492         if (GdegboundXY == 0)
3493         {
3494             fmpz_mpolyu_one(G, ctx);
3495             fmpz_mpolyu_swap(Abar, A, ctx);
3496             fmpz_mpolyu_swap(Bbar, B, ctx);
3497             success = 1;
3498             goto cleanup;
3499         }
3500         goto pick_bma_prime;
3501     }
3502 
3503     nmod_mpolyn_scalar_mul_nmod(Geval_sp, Gammaeval_sp, ctx_sp);
3504     FLINT_ASSERT(Gammaeval_sp == nmod_mpolyn_leadcoeff(Geval_sp, ctx_sp));
3505 
3506     /* update the zippler */
3507     success = nmod_zip_mpolyuu_add_point(Z, Geval_sp);
3508     if (!success)
3509     {
3510         /*
3511             An image gcd in Fp'[X,Y] did not match the assumed formed in [X,Y].
3512             Start all over
3513         */
3514         goto pick_bma_prime;
3515     }
3516     if (Z->pointcount < zip_evals)
3517     {
3518         goto next_zip_image;
3519     }
3520 
3521     switch (nmod_mpolyu_zip_find_coeffs(Z, ctx_sp))
3522     {
3523         default:
3524             FLINT_ASSERT(0);
3525         case nmod_zip_find_coeffs_no_match:
3526             /*  The collection of image gcd's in Fp'[X,Y] could not be coerced
3527                 into the assumed form in [X,Y][x_0, ..., x_(n-1)]. */
3528             goto pick_bma_prime;
3529         case nmod_zip_find_coeffs_non_invertible:
3530             /* The unlikely case where the evaluation points alpha produced
3531                a singular Vandermonde matrix. Assumed form is not nec wrong. */
3532             goto pick_zip_prime;
3533         case nmod_zip_find_coeffs_good:
3534             NULL;
3535     }
3536 
3537     FLINT_ASSERT(Hbits == H->bits);
3538     changed = fmpz_mpolyu_addinterp_zip(H, Hmodulus, Z, ctx_sp->ffinfo);
3539     fmpz_mul_ui(Hmodulus, Hmodulus, ctx_sp->ffinfo->mod.n);
3540 
3541     if (changed)
3542     {
3543         /* TODO if the coefficients of H are getting to large? */
3544         goto pick_zip_prime;
3545     }
3546 
3547     success = fmpz_mpolyu_content_mpoly_threaded_pool(Hcontent, H, ctx, NULL, 0);
3548     FLINT_ASSERT(Hcontent->bits == Hbits);
3549     if (!success)
3550     {
3551         /* could not compute content - absolute failure */
3552         success = 0;
3553         goto cleanup;
3554     }
3555 
3556     /* upgrade G to Hbits then try to pack down to bits */
3557     fmpz_mpolyu_set_bits(G, Hbits, ctx);
3558     fmpz_mpolyu_divexact_mpoly(G, H, 1, Hcontent, ctx);
3559     success = fmpz_mpolyu_repack_bits(G, bits, ctx);
3560     if (!success)
3561     {
3562         /* G cannot be the GCD if it cannot be packed into bits */
3563         goto pick_zip_prime;
3564     }
3565     if (   !fmpz_mpolyuu_divides(Abar, A, G, 2, ctx)
3566         || !fmpz_mpolyuu_divides(Bbar, B, G, 2, ctx))
3567     {
3568         goto pick_zip_prime;
3569     }
3570 
3571     success = 1;
3572 
3573 cleanup:
3574 
3575     fmpz_mpoly_clear(Hcontent, ctx);
3576     fmpz_mpolyu_clear(H, ctx);
3577 
3578     flint_free(Gdegbounds);
3579     flint_free(Adegs);
3580     flint_free(Bdegs);
3581     flint_free(Gammadegs);
3582 
3583     /* the zippler */
3584     nmod_zip_mpolyu_clear(Z);
3585 
3586     /* machine precision workspace */
3587     flint_free(checkalpha_sp);
3588     nmod_mpolyn_clear(Aeval_sp, ctx_sp);
3589     nmod_mpolyn_clear(Beval_sp, ctx_sp);
3590     nmod_mpolyn_clear(Geval_sp, ctx_sp);
3591     nmod_mpolyn_clear(Abareval_sp, ctx_sp);
3592     nmod_mpolyn_clear(Bbareval_sp, ctx_sp);
3593     nmod_mpolyc_clear(Gammainc_sp);
3594     nmod_mpolyc_clear(Gammacur_sp);
3595     nmod_mpolyc_clear(Gammared_sp);
3596     nmod_mpolycu_clear(Ainc_sp);
3597     nmod_mpolycu_clear(Acur_sp);
3598     nmod_mpolycu_clear(Ared_sp);
3599     nmod_mpolycu_clear(Binc_sp);
3600     nmod_mpolycu_clear(Bcur_sp);
3601     nmod_mpolycu_clear(Bred_sp);
3602     nmod_bma_mpoly_clear(Lambda_sp);
3603     nmod_poly_stack_clear(Sp_sp);
3604     nmod_mpoly_ctx_clear(ctx_sp);
3605 
3606     /* multiprecision workspace */
3607     for (i = 0; i < ctx->minfo->nvars; i++)
3608     {
3609         fmpz_clear(checkalpha + i);
3610     }
3611     flint_free(checkalpha);
3612     fmpz_mod_mpolyn_clear(Aeval, ctx_mp);
3613     fmpz_mod_mpolyn_clear(Beval, ctx_mp);
3614     fmpz_mod_mpolyn_clear(Geval, ctx_mp);
3615     fmpz_mod_mpolyn_clear(Abareval, ctx_mp);
3616     fmpz_mod_mpolyn_clear(Bbareval, ctx_mp);
3617     fmpz_mpolyc_clear(Gammainc);
3618     fmpz_mpolyc_clear(Gammacur);
3619     fmpz_mpolyc_clear(Gammared);
3620     fmpz_mpolycu_clear(Ainc);
3621     fmpz_mpolycu_clear(Acur);
3622     fmpz_mpolycu_clear(Ared);
3623     fmpz_mpolycu_clear(Binc);
3624     fmpz_mpolycu_clear(Bcur);
3625     fmpz_mpolycu_clear(Bred);
3626     fmpz_mod_bma_mpoly_clear(Lambda);
3627     fmpz_mod_mpoly_ctx_clear(ctx_mp);
3628 
3629     mpoly_bma_interpolate_ctx_clear(Ictx);
3630 
3631     fmpz_clear(Hmodulus);
3632     fmpz_clear(Gammaeval);
3633     fmpz_clear(last_unlucky_sshift_plus_1);
3634     fmpz_clear(sshift);
3635     fmpz_clear(cBksub);
3636     fmpz_clear(cAksub);
3637     fmpz_clear(subprod);
3638     fmpz_clear(image_count);
3639     fmpz_clear(pm1);
3640     fmpz_clear(p);
3641     flint_randclear(randstate);
3642 
3643     if (success)
3644     {
3645         FLINT_ASSERT(G->bits == bits);
3646         FLINT_ASSERT(Abar->bits == bits);
3647         FLINT_ASSERT(Bbar->bits == bits);
3648     }
3649     else
3650     {
3651         fmpz_mpolyu_set_bits(G, bits, ctx);
3652         fmpz_mpolyu_set_bits(Abar, bits, ctx);
3653         fmpz_mpolyu_set_bits(Bbar, bits, ctx);
3654     }
3655 
3656     return success;
3657 }
3658 
3659 
fmpz_mpoly_gcd_berlekamp_massey(fmpz_mpoly_t G,const fmpz_mpoly_t A,const fmpz_mpoly_t B,const fmpz_mpoly_ctx_t ctx)3660 int fmpz_mpoly_gcd_berlekamp_massey(
3661     fmpz_mpoly_t G,
3662     const fmpz_mpoly_t A,
3663     const fmpz_mpoly_t B,
3664     const fmpz_mpoly_ctx_t ctx)
3665 {
3666     slong i;
3667     flint_bitcnt_t wbits;
3668     int success = 0;
3669     fmpz_mpoly_ctx_t uctx;
3670     fmpz_mpolyu_t Auu, Buu, Guu, Abaruu, Bbaruu;
3671     fmpz_mpoly_t Ac, Bc, Gc, Gamma;
3672     slong * Adegs, * Bdegs, * perm;
3673     ulong * shift, * stride;
3674     ulong max_main_degree, max_minor_degree;
3675 
3676     if (fmpz_mpoly_is_zero(A, ctx))
3677     {
3678         if (fmpz_mpoly_is_zero(B, ctx))
3679         {
3680             fmpz_mpoly_zero(G, ctx);
3681             return 1;
3682         }
3683         if (fmpz_sgn(B->coeffs + 0) < 0)
3684         {
3685             fmpz_mpoly_neg(G, B, ctx);
3686             return 1;
3687         }
3688         else
3689         {
3690             fmpz_mpoly_set(G, B, ctx);
3691             return 1;
3692         }
3693     }
3694 
3695     if (fmpz_mpoly_is_zero(B, ctx))
3696     {
3697         if (fmpz_sgn(A->coeffs + 0) < 0)
3698         {
3699             fmpz_mpoly_neg(G, A, ctx);
3700             return 1;
3701         }
3702         else
3703         {
3704             fmpz_mpoly_set(G, A, ctx);
3705             return 1;
3706         }
3707     }
3708 
3709     if (A->bits > FLINT_BITS || B->bits > FLINT_BITS)
3710     {
3711         return 0;
3712     }
3713 
3714     if (ctx->minfo->nvars < 3)
3715     {
3716         return fmpz_mpoly_gcd_zippel(G, A, B, ctx);
3717     }
3718 
3719     FLINT_ASSERT(A->bits <= FLINT_BITS);
3720     FLINT_ASSERT(B->bits <= FLINT_BITS);
3721     FLINT_ASSERT(ctx->minfo->nvars >= 3);
3722     FLINT_ASSERT(!fmpz_mpoly_is_zero(A, ctx));
3723     FLINT_ASSERT(!fmpz_mpoly_is_zero(B, ctx));
3724 
3725     Adegs = (slong *) flint_malloc(ctx->minfo->nvars*sizeof(slong));
3726     Bdegs = (slong *) flint_malloc(ctx->minfo->nvars*sizeof(slong));
3727     perm = (slong *) flint_malloc((ctx->minfo->nvars)*sizeof(slong));
3728     shift = (ulong *) flint_malloc((ctx->minfo->nvars)*sizeof(ulong));
3729     stride = (ulong *) flint_malloc((ctx->minfo->nvars)*sizeof(ulong));
3730 
3731     mpoly_degrees_si(Adegs, A->exps, A->length, A->bits, ctx->minfo);
3732     mpoly_degrees_si(Bdegs, B->exps, B->length, B->bits, ctx->minfo);
3733 
3734     max_main_degree = 0;
3735     max_minor_degree = 0;
3736     for (i = 0; i < ctx->minfo->nvars; i++)
3737     {
3738         perm[i] = i;
3739         shift[i] = 0;
3740         stride[i] = 1;
3741         FLINT_ASSERT(Adegs[i] >= 0);
3742         FLINT_ASSERT(Bdegs[i] >= 0);
3743         if (i < 2)
3744         {
3745             max_main_degree = FLINT_MAX(max_main_degree, Adegs[i]);
3746             max_main_degree = FLINT_MAX(max_main_degree, Bdegs[i]);
3747         }
3748         else
3749         {
3750             max_minor_degree = FLINT_MAX(max_minor_degree, Adegs[i]);
3751             max_minor_degree = FLINT_MAX(max_minor_degree, Bdegs[i]);
3752         }
3753     }
3754 
3755     fmpz_mpoly_ctx_init(uctx, ctx->minfo->nvars - 2, ORD_LEX);
3756 
3757     /* wbits is bits for intermediates in ZZ[x_0,x_1][x_2,...,x_(n-1)] */
3758     wbits = 1 + FLINT_BIT_COUNT(max_minor_degree);
3759     wbits = FLINT_MAX(MPOLY_MIN_BITS, wbits);
3760     wbits = mpoly_fix_bits(wbits, uctx->minfo);
3761     FLINT_ASSERT(wbits <= FLINT_BITS);
3762 
3763     fmpz_mpolyu_init(Auu, wbits, uctx);
3764     fmpz_mpolyu_init(Buu, wbits, uctx);
3765     fmpz_mpolyu_init(Guu, wbits, uctx);
3766     fmpz_mpolyu_init(Abaruu, wbits, uctx);
3767     fmpz_mpolyu_init(Bbaruu, wbits, uctx);
3768     fmpz_mpoly_init3(Ac, 0, wbits, uctx);
3769     fmpz_mpoly_init3(Bc, 0, wbits, uctx);
3770     fmpz_mpoly_init3(Gc, 0, wbits, uctx);
3771     fmpz_mpoly_init3(Gamma, 0, wbits, uctx);
3772 
3773     /* two main variables must be packed into bits = FLINT_BITS/2 */
3774     if (FLINT_BIT_COUNT(max_main_degree) >= FLINT_BITS/2)
3775     {
3776         success = 0;
3777         goto cleanup;
3778     }
3779 
3780     fmpz_mpoly_to_mpolyuu_perm_deflate_threaded_pool(Auu, uctx, A, ctx,
3781                                            perm, shift, stride, NULL, NULL, 0);
3782     fmpz_mpoly_to_mpolyuu_perm_deflate_threaded_pool(Buu, uctx, B, ctx,
3783                                            perm, shift, stride, NULL, NULL, 0);
3784 
3785     success = fmpz_mpolyu_content_mpoly_threaded_pool(Ac, Auu, uctx, NULL, 0);
3786     success = success &&
3787               fmpz_mpolyu_content_mpoly_threaded_pool(Bc, Buu, uctx, NULL, 0);
3788     if (!success)
3789         goto cleanup;
3790 
3791     fmpz_mpolyu_divexact_mpoly_inplace(Auu, Ac, uctx);
3792     fmpz_mpolyu_divexact_mpoly_inplace(Buu, Bc, uctx);
3793 
3794     success = _fmpz_mpoly_gcd_threaded_pool(Gamma, wbits, Auu->coeffs + 0,
3795                                             Buu->coeffs + 0, uctx, NULL, 0);
3796     if (!success)
3797         goto cleanup;
3798 
3799     success = fmpz_mpolyuu_gcd_berlekamp_massey(Guu, Abaruu, Bbaruu,
3800                                                         Auu, Buu, Gamma, uctx);
3801     if (!success)
3802         goto cleanup;
3803 
3804     success = _fmpz_mpoly_gcd_threaded_pool(Gc, wbits, Ac, Bc, uctx, NULL, 0);
3805     if (!success)
3806         goto cleanup;
3807 
3808     fmpz_mpolyu_mul_mpoly_inplace(Guu, Gc, uctx);
3809 
3810     fmpz_mpoly_from_mpolyuu_perm_inflate(G, FLINT_MIN(A->bits, B->bits), ctx,
3811                                                Guu, uctx, perm, shift, stride);
3812     if (fmpz_sgn(G->coeffs + 0) < 0)
3813         fmpz_mpoly_neg(G, G, ctx);
3814 
3815     success = 1;
3816 
3817 cleanup:
3818 
3819     flint_free(Adegs);
3820     flint_free(Bdegs);
3821     flint_free(perm);
3822     flint_free(shift);
3823     flint_free(stride);
3824 
3825     fmpz_mpolyu_clear(Auu, uctx);
3826     fmpz_mpolyu_clear(Buu, uctx);
3827     fmpz_mpolyu_clear(Guu, uctx);
3828     fmpz_mpolyu_clear(Abaruu, uctx);
3829     fmpz_mpolyu_clear(Bbaruu, uctx);
3830     fmpz_mpoly_clear(Ac, uctx);
3831     fmpz_mpoly_clear(Bc, uctx);
3832     fmpz_mpoly_clear(Gc, uctx);
3833     fmpz_mpoly_clear(Gamma, uctx);
3834 
3835     fmpz_mpoly_ctx_clear(uctx);
3836 
3837     return success;
3838 }
3839 
3840