1 /*
2     Copyright (C) 2019-2020 Daniel Schultz
3 
4     This file is part of FLINT.
5 
6     FLINT is free software: you can redistribute it and/or modify it under
7     the terms of the GNU Lesser General Public License (LGPL) as published
8     by the Free Software Foundation; either version 2.1 of the License, or
9     (at your option) any later version.  See <https://www.gnu.org/licenses/>.
10 */
11 
12 #include "fq_nmod_mpoly.h"
13 #include "fq_nmod_mpoly_factor.h"
14 
15 /*
16     For each j, set out[j] to the evaluation of A at x_i = alpha[i] (i != j)
17     i.e. if nvars = 3
18         out[0] = A(x, alpha[1], alpha[2])
19         out[1] = A(alpha[0], x, alpha[2])
20         out[2] = A(alpha[0], alpha[1], x)
21 
22     If ignore[j] is nonzero, then out[j] need not be calculated, probably
23     because we shouldn't calculate it in dense form.
24 */
fq_nmod_mpoly_evals(slong * Atdeg,n_fq_poly_struct * out,const int * ignore,const fq_nmod_mpoly_t A,ulong * Amin_exp,ulong * Amax_exp,ulong * Astride,fq_nmod_struct * alpha,const fq_nmod_mpoly_ctx_t ctx)25 void fq_nmod_mpoly_evals(
26     slong * Atdeg,  /* total degree of deflated A, or -1 for overflow */
27     n_fq_poly_struct * out,
28     const int * ignore,
29     const fq_nmod_mpoly_t A,
30     ulong * Amin_exp,
31     ulong * Amax_exp,
32     ulong * Astride,
33     fq_nmod_struct * alpha,
34     const fq_nmod_mpoly_ctx_t ctx)
35 {
36     slong d = fq_nmod_ctx_degree(ctx->fqctx);
37     slong i, j;
38     slong nvars = ctx->minfo->nvars;
39     ulong mask = (-UWORD(1)) >> (FLINT_BITS - A->bits);
40     slong N = mpoly_words_per_exp_sp(A->bits, ctx->minfo);
41     slong * offsets = FLINT_ARRAY_ALLOC(2*nvars, slong);
42     slong * shifts = offsets + nvars;
43     ulong * varexps = FLINT_ARRAY_ALLOC(nvars, ulong);
44     ulong varexp;
45     slong total_degree, lo, hi;
46     n_poly_struct * caches = FLINT_ARRAY_ALLOC(3*nvars, n_poly_struct);
47     mp_limb_t * t = FLINT_ARRAY_ALLOC(2*d, mp_limb_t);
48     mp_limb_t * meval = t + d;
49 
50     for (j = 0; j < nvars; j++)
51     {
52         mpoly_gen_offset_shift_sp(offsets + j, shifts + j, j, A->bits,
53                                                                    ctx->minfo);
54         n_poly_init(caches + 3*j + 0);
55         n_poly_init(caches + 3*j + 1);
56         n_poly_init(caches + 3*j + 2);
57         n_fq_pow_cache_start_fq_nmod(alpha + j, caches + 3*j + 0,
58                                caches + 3*j + 1, caches + 3*j + 2, ctx->fqctx);
59 
60         if (ignore[j])
61             continue;
62 
63         i = (Astride[j] < 2) ? Amax_exp[j] - Amin_exp[j] :
64                             (Amax_exp[j] - Amin_exp[j])/Astride[j];
65 
66         n_poly_fit_length(out + j, d*(i + 1));
67         _nmod_vec_zero(out[j].coeffs, d*(i + 1));
68         out[j].length = i + 1;
69     }
70 
71     total_degree = 0;
72     for (i = 0; i < A->length; i++)
73     {
74         mp_limb_t * s = A->coeffs + d*i; /* source */
75 
76         lo = hi = 0;
77         for (j = 0; j < nvars; j++)
78         {
79             varexp = ((A->exps + N*i)[offsets[j]]>>shifts[j])&mask;
80 
81             FLINT_ASSERT((Astride[j] == 0 && varexp == Amin_exp[j]) ||
82                                      (varexp - Amin_exp[j]) % Astride[j] == 0);
83 
84             varexps[j] = Astride[j] < 2 ? varexp - Amin_exp[j] :
85                                          (varexp - Amin_exp[j])/Astride[j];
86 
87             add_ssaaaa(hi, lo, hi, lo, 0, varexps[j]);
88 
89             n_fq_pow_cache_mulpow_ui(meval, s, varexps[j], caches + 3*j + 0,
90                                caches + 3*j + 1, caches + 3*j + 2, ctx->fqctx);
91             s = meval;
92         }
93 
94         if (hi == 0 && FLINT_SIGN_EXT(lo) == 0 && total_degree >= 0)
95             total_degree = FLINT_MAX(total_degree, lo);
96         else
97             total_degree = -1;
98 
99         for (j = 0; j < nvars; j++)
100         {
101             varexp = varexps[j];
102 
103             if (ignore[j])
104                 continue;
105 
106             FLINT_ASSERT(out[j].alloc >= d*(varexp + 1));
107 
108             n_fq_pow_cache_mulpow_neg_ui(t, meval, varexp, caches + 3*j + 0,
109                                caches + 3*j + 1, caches + 3*j + 2, ctx->fqctx);
110 
111             n_fq_add(out[j].coeffs + d*varexp, out[j].coeffs + d*varexp,
112                                                                 t, ctx->fqctx);
113         }
114     }
115 
116     *Atdeg = total_degree;
117 
118     for (j = 0; j < nvars; j++)
119         _n_fq_poly_normalise(out + j, d);
120 
121     for (j = 0; j < 3*nvars; j++)
122         n_poly_clear(caches + j);
123 
124     flint_free(offsets);
125     flint_free(varexps);
126     flint_free(caches);
127     flint_free(t);
128 }
129 
fq_nmod_mpoly_evals_lgprime(slong * Atdeg,n_fq_poly_struct * out,const int * ignore,const fq_nmod_mpoly_t A,ulong * Amin_exp,ulong * Amax_exp,ulong * Astride,const fq_nmod_mpoly_ctx_t smctx,fq_nmod_struct * alpha,const fq_nmod_mpoly_ctx_t lgctx,const bad_fq_nmod_embed_t emb)130 void fq_nmod_mpoly_evals_lgprime(
131     slong * Atdeg,  /* total degree of deflated A, or -1 for overflow */
132     n_fq_poly_struct * out,
133     const int * ignore,
134     const fq_nmod_mpoly_t A,
135     ulong * Amin_exp,
136     ulong * Amax_exp,
137     ulong * Astride,
138     const fq_nmod_mpoly_ctx_t smctx,
139     fq_nmod_struct * alpha,
140     const fq_nmod_mpoly_ctx_t lgctx,
141     const bad_fq_nmod_embed_t emb)
142 {
143     slong smd = fq_nmod_ctx_degree(smctx->fqctx);
144     slong lgd = fq_nmod_ctx_degree(lgctx->fqctx);
145     slong i, j;
146     slong nvars = smctx->minfo->nvars;
147     ulong mask = (-UWORD(1)) >> (FLINT_BITS - A->bits);
148     slong N = mpoly_words_per_exp_sp(A->bits, smctx->minfo);
149     slong * offsets = FLINT_ARRAY_ALLOC(2*nvars, slong);
150     slong * shifts = offsets + nvars;
151     ulong * varexps = FLINT_ARRAY_ALLOC(nvars, ulong);
152     ulong varexp, lo, hi;
153     slong total_degree;
154     n_poly_struct * caches = FLINT_ARRAY_ALLOC(3*nvars, n_poly_struct);
155     mp_limb_t * t = FLINT_ARRAY_ALLOC(2*lgd, mp_limb_t);
156     mp_limb_t * meval = t + lgd;
157 
158     for (j = 0; j < nvars; j++)
159     {
160         mpoly_gen_offset_shift_sp(offsets + j, shifts + j, j, A->bits,
161                                                                  smctx->minfo);
162         n_poly_init(caches + 3*j + 0);
163         n_poly_init(caches + 3*j + 1);
164         n_poly_init(caches + 3*j + 2);
165         n_fq_pow_cache_start_fq_nmod(alpha + j, caches + 3*j + 0,
166                              caches + 3*j + 1, caches + 3*j + 2, lgctx->fqctx);
167         if (ignore[j])
168             continue;
169 
170         i = (Astride[j] < 2) ? Amax_exp[j] - Amin_exp[j] :
171                               (Amax_exp[j] - Amin_exp[j])/Astride[j];
172 
173         n_poly_fit_length(out + j, lgd*(i + 1));
174         _nmod_vec_zero(out[j].coeffs, lgd*(i + 1));
175         out[j].length = i + 1;
176     }
177 
178     total_degree = 0;
179     for (i = 0; i < A->length; i++)
180     {
181         bad_n_fq_embed_sm_elem_to_lg(meval, A->coeffs + smd*i, emb);
182 
183         hi = lo = 0;
184         for (j = 0; j < nvars; j++)
185         {
186             varexp = ((A->exps + N*i)[offsets[j]]>>shifts[j])&mask;
187 
188             FLINT_ASSERT((Astride[j] == 0 && varexp == Amin_exp[j]) ||
189                                      (varexp - Amin_exp[j]) % Astride[j] == 0);
190 
191             varexps[j] = Astride[j] < 2 ? varexp - Amin_exp[j] :
192                                          (varexp - Amin_exp[j])/Astride[j];
193 
194             add_ssaaaa(hi, lo, hi, lo, 0, varexps[j]);
195 
196             n_fq_pow_cache_mulpow_ui(meval, meval, varexps[j], caches + 3*j + 0,
197                              caches + 3*j + 1, caches + 3*j + 2, lgctx->fqctx);
198         }
199 
200         if (hi == 0 && FLINT_SIGN_EXT(lo) == 0 && total_degree >= 0)
201             total_degree = FLINT_MAX(total_degree, lo);
202         else
203             total_degree = -1;
204 
205         for (j = 0; j < nvars; j++)
206         {
207             varexp = varexps[j];
208 
209             if (ignore[j])
210                 continue;
211 
212             FLINT_ASSERT(out[j].alloc >= lgd*(varexp + 1));
213 
214             n_fq_pow_cache_mulpow_neg_ui(t, meval, varexp, caches + 3*j + 0,
215                              caches + 3*j + 1, caches + 3*j + 2, lgctx->fqctx);
216 
217             n_fq_add(out[j].coeffs + lgd*varexp, out[j].coeffs + lgd*varexp,
218                                                               t, lgctx->fqctx);
219         }
220     }
221 
222     *Atdeg = total_degree;
223 
224     for (j = 0; j < nvars; j++)
225         _n_fq_poly_normalise(out + j, lgd);
226 
227     for (j = 0; j < 3*nvars; j++)
228         n_poly_clear(caches + j);
229 
230     flint_free(offsets);
231     flint_free(varexps);
232     flint_free(caches);
233     flint_free(t);
234 }
235 
236 
mpoly_gcd_info_set_estimates_fq_nmod_mpoly(mpoly_gcd_info_t I,const fq_nmod_mpoly_t A,const fq_nmod_mpoly_t B,const fq_nmod_mpoly_ctx_t ctx)237 void mpoly_gcd_info_set_estimates_fq_nmod_mpoly(
238     mpoly_gcd_info_t I,
239     const fq_nmod_mpoly_t A,
240     const fq_nmod_mpoly_t B,
241     const fq_nmod_mpoly_ctx_t ctx)
242 {
243     slong d = fq_nmod_ctx_degree(ctx->fqctx);
244     int tries_left = 10;
245     slong nvars = ctx->minfo->nvars;
246     slong i, j;
247     n_fq_poly_t Geval;
248     n_fq_poly_struct * Aevals, * Bevals;
249     fq_nmod_struct * alpha;
250     flint_rand_t state;
251     slong ignore_limit;
252     int * ignore;
253 
254     flint_randinit(state);
255 
256     ignore = FLINT_ARRAY_ALLOC(nvars, int);
257     alpha  = FLINT_ARRAY_ALLOC(nvars, fq_nmod_struct);
258     Aevals = FLINT_ARRAY_ALLOC(2*nvars, n_fq_poly_struct);
259     Bevals = Aevals + nvars;
260 
261     n_fq_poly_init(Geval);
262     for (j = 0; j < nvars; j++)
263     {
264         fq_nmod_init(alpha + j, ctx->fqctx);
265         n_fq_poly_init(Aevals + j);
266         n_fq_poly_init(Bevals + j);
267     }
268 
269     ignore_limit = FLINT_MAX(WORD(9999), (A->length + B->length)/4096);
270     I->Gdeflate_deg_bounds_are_nice = 1;
271     for (j = 0; j < nvars; j++)
272     {
273         if (I->Adeflate_deg[j] > ignore_limit ||
274             I->Bdeflate_deg[j] > ignore_limit)
275         {
276             ignore[j] = 1;
277             I->Gdeflate_deg_bounds_are_nice = 0;
278         }
279         else
280         {
281             ignore[j] = 0;
282         }
283     }
284 
285 try_again:
286 
287     if (--tries_left < 0)
288     {
289         I->Gdeflate_deg_bounds_are_nice = 0;
290         for (j = 0; j < nvars; j++)
291         {
292             I->Gdeflate_deg_bound[j] = FLINT_MIN(I->Adeflate_deg[j],
293                                                  I->Bdeflate_deg[j]);
294             I->Gterm_count_est[j] = 1 + I->Gdeflate_deg_bound[j]/2;
295         }
296 
297         goto cleanup;
298     }
299 
300     for (j = 0; j < nvars; j++)
301     {
302         fq_nmod_rand(alpha + j, state, ctx->fqctx);
303         if (fq_nmod_is_zero(alpha + j, ctx->fqctx))
304             fq_nmod_one(alpha + j, ctx->fqctx);
305     }
306 
307     fq_nmod_mpoly_evals(&I->Adeflate_tdeg, Aevals, ignore, A,
308                              I->Amin_exp, I->Amax_exp, I->Gstride, alpha, ctx);
309     fq_nmod_mpoly_evals(&I->Bdeflate_tdeg, Bevals, ignore, B,
310                              I->Bmin_exp, I->Bmax_exp, I->Gstride, alpha, ctx);
311 
312     for (j = 0; j < nvars; j++)
313     {
314         if (ignore[j])
315         {
316             I->Gdeflate_deg_bound[j] = FLINT_MIN(I->Adeflate_deg[j],
317                                                  I->Bdeflate_deg[j]);
318             I->Gterm_count_est[j] = 1 + I->Gdeflate_deg_bound[j]/2;
319         }
320         else
321         {
322             if (I->Adeflate_deg[j] != n_fq_poly_degree(Aevals + j) ||
323                 I->Bdeflate_deg[j] != n_fq_poly_degree(Bevals + j))
324             {
325                 goto try_again;
326             }
327 
328             n_fq_poly_gcd(Geval, Aevals + j, Bevals + j, ctx->fqctx);
329 
330             I->Gterm_count_est[j] = 0;
331             I->Gdeflate_deg_bound[j] = n_fq_poly_degree(Geval);
332             for (i = I->Gdeflate_deg_bound[j]; i >= 0; i--)
333                 I->Gterm_count_est[j] += _n_fq_is_zero(Geval->coeffs + d*i, d);
334         }
335     }
336 
337 cleanup:
338 
339     n_fq_poly_clear(Geval);
340     for (j = 0; j < nvars; j++)
341     {
342         fq_nmod_clear(alpha + j, ctx->fqctx);
343         n_fq_poly_clear(Aevals + j);
344         n_fq_poly_clear(Bevals + j);
345     }
346 
347     flint_free(ignore);
348     flint_free(alpha);
349     flint_free(Aevals);
350 
351     flint_randclear(state);
352 
353     return;
354 }
355 
356 
mpoly_gcd_info_set_estimates_fq_nmod_mpoly_lgprime(mpoly_gcd_info_t I,const fq_nmod_mpoly_t A,const fq_nmod_mpoly_t B,const fq_nmod_mpoly_ctx_t smctx)357 void mpoly_gcd_info_set_estimates_fq_nmod_mpoly_lgprime(
358     mpoly_gcd_info_t I,
359     const fq_nmod_mpoly_t A,
360     const fq_nmod_mpoly_t B,
361     const fq_nmod_mpoly_ctx_t smctx)
362 {
363     int tries_left = 10;
364     slong nvars = smctx->minfo->nvars;
365     slong i, j;
366     n_fq_poly_t Geval;
367     n_fq_poly_struct * Aevals, * Bevals;
368     fq_nmod_struct * alpha;
369     flint_rand_t state;
370     slong ignore_limit;
371     int * ignore;
372     fq_nmod_mpoly_ctx_t lgctx;
373     bad_fq_nmod_mpoly_embed_chooser_t embc;
374     bad_fq_nmod_embed_struct * cur_emb;
375 
376     flint_randinit(state);
377 
378     cur_emb = bad_fq_nmod_mpoly_embed_chooser_init(embc, lgctx, smctx, state);
379 
380     ignore = FLINT_ARRAY_ALLOC(nvars, int);
381     alpha  = FLINT_ARRAY_ALLOC(nvars, fq_nmod_struct);
382     Aevals = FLINT_ARRAY_ALLOC(2*nvars, n_fq_poly_struct);
383     Bevals = Aevals + nvars;
384 
385     n_fq_poly_init(Geval);
386     for (j = 0; j < nvars; j++)
387     {
388         fq_nmod_init(alpha + j, lgctx->fqctx);
389         n_fq_poly_init(Aevals + j);
390         n_fq_poly_init(Bevals + j);
391     }
392 
393     ignore_limit = FLINT_MAX(WORD(9999), (A->length + B->length)/4096);
394     I->Gdeflate_deg_bounds_are_nice = 1;
395     for (j = 0; j < nvars; j++)
396     {
397         if (I->Adeflate_deg[j] > ignore_limit ||
398             I->Bdeflate_deg[j] > ignore_limit)
399         {
400             ignore[j] = 1;
401             I->Gdeflate_deg_bounds_are_nice = 0;
402         }
403         else
404         {
405             ignore[j] = 0;
406         }
407     }
408 
409 try_again:
410 
411     if (--tries_left < 0 || cur_emb == NULL)
412     {
413         I->Gdeflate_deg_bounds_are_nice = 0;
414         for (j = 0; j < nvars; j++)
415         {
416             I->Gdeflate_deg_bound[j] = FLINT_MIN(I->Adeflate_deg[j],
417                                                  I->Bdeflate_deg[j]);
418             I->Gterm_count_est[j] = 1 + I->Gdeflate_deg_bound[j]/2;
419         }
420 
421         goto cleanup;
422     }
423 
424     for (j = 0; j < nvars; j++)
425     {
426         fq_nmod_rand(alpha + j, state, lgctx->fqctx);
427         if (fq_nmod_is_zero(alpha + j, lgctx->fqctx))
428             fq_nmod_one(alpha + j, lgctx->fqctx);
429     }
430 
431     fq_nmod_mpoly_evals_lgprime(&I->Adeflate_tdeg, Aevals, ignore, A,
432            I->Amin_exp, I->Amax_exp, I->Gstride, smctx, alpha, lgctx, cur_emb);
433     fq_nmod_mpoly_evals_lgprime(&I->Bdeflate_tdeg, Bevals, ignore, B,
434            I->Bmin_exp, I->Bmax_exp, I->Gstride, smctx, alpha, lgctx, cur_emb);
435 
436     for (j = 0; j < nvars; j++)
437     {
438         if (ignore[j])
439         {
440             I->Gdeflate_deg_bound[j] = FLINT_MIN(I->Adeflate_deg[j],
441                                                  I->Bdeflate_deg[j]);
442             I->Gterm_count_est[j] = 1 + I->Gdeflate_deg_bound[j]/2;
443         }
444         else
445         {
446             slong lgd = fq_nmod_ctx_degree(lgctx->fqctx);
447 
448             if (I->Adeflate_deg[j] != n_fq_poly_degree(Aevals + j) ||
449                 I->Bdeflate_deg[j] != n_fq_poly_degree(Bevals + j))
450             {
451                 cur_emb = bad_fq_nmod_mpoly_embed_chooser_next(embc, lgctx, smctx, state);
452                 goto try_again;
453             }
454 
455             n_fq_poly_gcd(Geval, Aevals + j, Bevals + j, lgctx->fqctx);
456 
457             I->Gterm_count_est[j] = 0;
458             I->Gdeflate_deg_bound[j] = n_fq_poly_degree(Geval);
459             for (i = I->Gdeflate_deg_bound[j]; i >= 0; i--)
460                 I->Gterm_count_est[j] += _n_fq_is_zero(Geval->coeffs + lgd*i, lgd);
461         }
462     }
463 
464 cleanup:
465 
466     n_fq_poly_clear(Geval);
467     for (j = 0; j < nvars; j++)
468     {
469         fq_nmod_clear(alpha + j, lgctx->fqctx);
470         n_fq_poly_clear(Aevals + j);
471         n_fq_poly_clear(Bevals + j);
472     }
473 
474     flint_free(ignore);
475     flint_free(alpha);
476     flint_free(Aevals);
477 
478     bad_fq_nmod_mpoly_embed_chooser_clear(embc, lgctx, smctx, state);
479 
480     flint_randclear(state);
481 
482     return;
483 }
484 
485 
486 /* (Abar, Bbar) = (A, B) */
_parallel_set(fq_nmod_mpoly_t Abar,fq_nmod_mpoly_t Bbar,const fq_nmod_mpoly_t A,const fq_nmod_mpoly_t B,const fq_nmod_mpoly_ctx_t ctx)487 static void _parallel_set(
488     fq_nmod_mpoly_t Abar, /* could be NULL */
489     fq_nmod_mpoly_t Bbar, /* could be NULL */
490     const fq_nmod_mpoly_t A,
491     const fq_nmod_mpoly_t B,
492     const fq_nmod_mpoly_ctx_t ctx)
493 {
494     if (Abar == B && Bbar == A)
495     {
496         FLINT_ASSERT(Abar != NULL && Bbar != NULL);
497         fq_nmod_mpoly_set(Abar, B, ctx);
498         fq_nmod_mpoly_set(Bbar, A, ctx);
499         fq_nmod_mpoly_swap(Abar, Bbar, ctx);
500     }
501     else if (Abar == B && Bbar != A)
502     {
503         FLINT_ASSERT(Abar != NULL);
504         if (Bbar != NULL)
505             fq_nmod_mpoly_set(Bbar, B, ctx);
506         fq_nmod_mpoly_set(Abar, A, ctx);
507     }
508     else
509     {
510         if (Abar != NULL)
511             fq_nmod_mpoly_set(Abar, A, ctx);
512         if (Bbar != NULL)
513             fq_nmod_mpoly_set(Bbar, B, ctx);
514     }
515 }
516 
517 
518 /* The variables in ess(A) and ess(B) are disjoint. gcd is trivial to compute */
_do_trivial(fq_nmod_mpoly_t G,fq_nmod_mpoly_t Abar,fq_nmod_mpoly_t Bbar,const fq_nmod_mpoly_t A,const fq_nmod_mpoly_t B,const mpoly_gcd_info_t I,const fq_nmod_mpoly_ctx_t ctx)519 static int _do_trivial(
520     fq_nmod_mpoly_t G,
521     fq_nmod_mpoly_t Abar,  /* could be NULL */
522     fq_nmod_mpoly_t Bbar,  /* could be NULL */
523     const fq_nmod_mpoly_t A,
524     const fq_nmod_mpoly_t B,
525     const mpoly_gcd_info_t I,
526     const fq_nmod_mpoly_ctx_t ctx)
527 {
528     _parallel_set(Abar, Bbar, A, B, ctx);
529 
530     if (Abar != NULL)
531         mpoly_monomials_shift_right_ui(Abar->exps, Abar->bits, Abar->length,
532                                                       I->Gmin_exp, ctx->minfo);
533 
534     if (Bbar != NULL)
535         mpoly_monomials_shift_right_ui(Bbar->exps, Bbar->bits, Bbar->length,
536                                                       I->Gmin_exp, ctx->minfo);
537 
538     fq_nmod_mpoly_fit_length_reset_bits(G, 1, I->Gbits, ctx);
539     mpoly_set_monomial_ui(G->exps, I->Gmin_exp, I->Gbits, ctx->minfo);
540     _n_fq_one(G->coeffs, fq_nmod_ctx_degree(ctx->fqctx));
541     _fq_nmod_mpoly_set_length(G, 1, ctx);
542 
543     return 1;
544 }
545 
546 /*********************** Easy when B is a monomial ***************************/
_do_monomial_gcd(fq_nmod_mpoly_t G,fq_nmod_mpoly_t Abar,fq_nmod_mpoly_t Bbar,const fq_nmod_mpoly_t A,const fq_nmod_mpoly_t B,const fq_nmod_mpoly_ctx_t ctx)547 static int _do_monomial_gcd(
548     fq_nmod_mpoly_t G,
549     fq_nmod_mpoly_t Abar,  /* could be NULL */
550     fq_nmod_mpoly_t Bbar,  /* could be NULL */
551     const fq_nmod_mpoly_t A,
552     const fq_nmod_mpoly_t B,
553     const fq_nmod_mpoly_ctx_t ctx)
554 {
555     slong i;
556     flint_bitcnt_t Gbits = FLINT_MIN(A->bits, B->bits);
557     fmpz * minAfields, * minAdegs, * minBdegs;
558     TMP_INIT;
559 
560     FLINT_ASSERT(A->length > 0);
561     FLINT_ASSERT(B->length == 1);
562 
563     TMP_START;
564 
565     /* get the field-wise minimum of A */
566     minAfields = (fmpz *) TMP_ALLOC(ctx->minfo->nfields*sizeof(fmpz));
567     for (i = 0; i < ctx->minfo->nfields; i++)
568         fmpz_init(minAfields + i);
569     mpoly_min_fields_fmpz(minAfields, A->exps, A->length, A->bits, ctx->minfo);
570 
571     /* unpack to get the min degrees of each variable in A */
572     minAdegs = (fmpz *) TMP_ALLOC(ctx->minfo->nvars*sizeof(fmpz));
573     for (i = 0; i < ctx->minfo->nvars; i++)
574         fmpz_init(minAdegs + i);
575     mpoly_get_monomial_ffmpz_unpacked_ffmpz(minAdegs, minAfields, ctx->minfo);
576 
577     /* get the degree of each variable in B */
578     minBdegs = (fmpz *) TMP_ALLOC(ctx->minfo->nvars*sizeof(fmpz));
579     for (i = 0; i < ctx->minfo->nvars; i++)
580         fmpz_init(minBdegs + i);
581     mpoly_get_monomial_ffmpz(minBdegs, B->exps, B->bits, ctx->minfo);
582 
583     /* compute the degree of each variable in G */
584     _fmpz_vec_min_inplace(minBdegs, minAdegs, ctx->minfo->nvars);
585 
586     _parallel_set(Abar, Bbar, A, B, ctx);
587 
588     if (Abar != NULL)
589         mpoly_monomials_shift_right_ffmpz(Abar->exps, Abar->bits, Abar->length,
590                                                          minBdegs, ctx->minfo);
591 
592     if (Bbar != NULL)
593         mpoly_monomials_shift_right_ffmpz(Bbar->exps, Bbar->bits, Bbar->length,
594                                                          minBdegs, ctx->minfo);
595 
596     fq_nmod_mpoly_fit_length_reset_bits(G, 1, Gbits, ctx);
597     mpoly_set_monomial_ffmpz(G->exps, minBdegs, Gbits, ctx->minfo);
598     _n_fq_one(G->coeffs + 0, fq_nmod_ctx_degree(ctx->fqctx));
599     _fq_nmod_mpoly_set_length(G, 1, ctx);
600 
601     for (i = 0; i < ctx->minfo->nfields; i++)
602     {
603         fmpz_clear(minAfields + i);
604     }
605     for (i = 0; i < ctx->minfo->nvars; i++)
606     {
607         fmpz_clear(minAdegs + i);
608         fmpz_clear(minBdegs + i);
609     }
610 
611     TMP_END;
612 
613     return 1;
614 }
615 
616 
617 /********************** See if cofactors are monomials ***********************/
_try_monomial_cofactors(fq_nmod_mpoly_t G,fq_nmod_mpoly_t Abar,fq_nmod_mpoly_t Bbar,const fq_nmod_mpoly_t A,const fq_nmod_mpoly_t B,const fq_nmod_mpoly_ctx_t ctx)618 static int _try_monomial_cofactors(
619     fq_nmod_mpoly_t G,
620     fq_nmod_mpoly_t Abar,  /* could be NULL */
621     fq_nmod_mpoly_t Bbar,  /* could be NULL */
622     const fq_nmod_mpoly_t A,
623     const fq_nmod_mpoly_t B,
624     const fq_nmod_mpoly_ctx_t ctx)
625 {
626     slong d = fq_nmod_ctx_degree(ctx->fqctx);
627     int success;
628     slong i, j;
629     slong NA, NG;
630     slong nvars = ctx->minfo->nvars;
631     fmpz * Abarexps, * Bbarexps, * Texps;
632     mp_limb_t * tmp, * t1, * t2, * a0, * b0;
633     fq_nmod_mpoly_t T;
634     flint_bitcnt_t Gbits = FLINT_MIN(A->bits, B->bits);
635     flint_bitcnt_t Abarbits = A->bits;
636     flint_bitcnt_t Bbarbits = B->bits;
637     TMP_INIT;
638 
639     FLINT_ASSERT(A->length > 0);
640     FLINT_ASSERT(B->length > 0);
641 
642     if (A->length != B->length)
643         return 0;
644 
645     TMP_START;
646 
647     tmp = (mp_limb_t *) TMP_ALLOC(d*(4 + FLINT_MAX(N_FQ_MUL_ITCH,
648                                             N_FQ_INV_ITCH))*sizeof(mp_limb_t));
649     t1 = tmp + d*FLINT_MAX(N_FQ_MUL_ITCH, N_FQ_INV_ITCH);
650     t2 = t1 + d;
651     a0 = t2 + d;
652     b0 = a0 + d;
653 
654     for (i = A->length - 1; i > 0; i--)
655     {
656         _n_fq_mul(t1, A->coeffs + d*0, B->coeffs + d*i, ctx->fqctx, tmp);
657         _n_fq_mul(t2, B->coeffs + d*0, A->coeffs + d*i, ctx->fqctx, tmp);
658         success = _n_fq_equal(t1, t2, d);
659         if (!success)
660             goto cleanup_less;
661     }
662 
663     _n_fq_set(a0, A->coeffs + d*0, d);
664     _n_fq_set(b0, B->coeffs + d*0, d);
665 
666     Abarexps = (fmpz *) TMP_ALLOC(3*nvars*sizeof(fmpz));
667     Bbarexps = Abarexps + 1*nvars;
668     Texps    = Abarexps + 2*nvars;
669     for (j = 0; j < nvars; j++)
670     {
671         fmpz_init(Abarexps + j);
672         fmpz_init(Bbarexps + j);
673         fmpz_init(Texps + j);
674     }
675 
676     success = mpoly_monomial_cofactors(Abarexps, Bbarexps, A->exps, A->bits,
677                                       B->exps, B->bits, A->length, ctx->minfo);
678     if (!success)
679         goto cleanup_more;
680 
681     fq_nmod_mpoly_init3(T, A->length, Gbits, ctx);
682     NG = mpoly_words_per_exp(Gbits, ctx->minfo);
683     NA = mpoly_words_per_exp(A->bits, ctx->minfo);
684     _n_fq_inv(t1, A->coeffs + d*0, ctx->fqctx, tmp);
685     T->length = A->length;
686     for (i = 0; i < A->length; i++)
687     {
688         mpoly_get_monomial_ffmpz(Texps, A->exps + NA*i, A->bits, ctx->minfo);
689         _fmpz_vec_sub(Texps, Texps, Abarexps, nvars);
690         mpoly_set_monomial_ffmpz(T->exps + NG*i, Texps, Gbits, ctx->minfo);
691         n_fq_mul(T->coeffs + d*i, A->coeffs + d*i, t1, ctx->fqctx);
692     }
693     fq_nmod_mpoly_swap(G, T, ctx);
694     fq_nmod_mpoly_clear(T, ctx);
695 
696     if (Abar != NULL)
697     {
698         fq_nmod_mpoly_fit_length_reset_bits(Abar, 1, Abarbits, ctx);
699         mpoly_set_monomial_ffmpz(Abar->exps, Abarexps, Abarbits, ctx->minfo);
700         _n_fq_set(Abar->coeffs + d*0, a0, d);
701         _fq_nmod_mpoly_set_length(Abar, 1, ctx);
702     }
703 
704     if (Bbar != NULL)
705     {
706         fq_nmod_mpoly_fit_length_reset_bits(Bbar, 1, Bbarbits, ctx);
707         mpoly_set_monomial_ffmpz(Bbar->exps, Bbarexps, Bbarbits, ctx->minfo);
708         _n_fq_set(Bbar->coeffs + d*0, b0, d);
709         _fq_nmod_mpoly_set_length(Bbar, 1, ctx);
710     }
711 
712     success = 1;
713 
714 cleanup_more:
715 
716     for (j = 0; j < nvars; j++)
717     {
718         fmpz_clear(Abarexps + j);
719         fmpz_clear(Bbarexps + j);
720         fmpz_clear(Texps + j);
721     }
722 
723 cleanup_less:
724 
725     TMP_END;
726 
727     return success;
728 }
729 
730 
731 /***  ess(A) and ess(B) depend on only one variable v_in_both ****************/
_do_univar(fq_nmod_mpoly_t G,fq_nmod_mpoly_t Abar,fq_nmod_mpoly_t Bbar,const fq_nmod_mpoly_t A,const fq_nmod_mpoly_t B,slong v_in_both,const mpoly_gcd_info_t I,const fq_nmod_mpoly_ctx_t ctx)732 static int _do_univar(
733     fq_nmod_mpoly_t G,
734     fq_nmod_mpoly_t Abar,
735     fq_nmod_mpoly_t Bbar,
736     const fq_nmod_mpoly_t A,
737     const fq_nmod_mpoly_t B,
738     slong v_in_both,
739     const mpoly_gcd_info_t I,
740     const fq_nmod_mpoly_ctx_t ctx)
741 {
742     fq_nmod_poly_t a, b, g, t, r;
743 
744     fq_nmod_poly_init(a, ctx->fqctx);
745     fq_nmod_poly_init(b, ctx->fqctx);
746     fq_nmod_poly_init(g, ctx->fqctx);
747     fq_nmod_poly_init(t, ctx->fqctx);
748     fq_nmod_poly_init(r, ctx->fqctx);
749 
750     _fq_nmod_mpoly_to_fq_nmod_poly_deflate(a, A, v_in_both,
751                                                  I->Amin_exp, I->Gstride, ctx);
752     _fq_nmod_mpoly_to_fq_nmod_poly_deflate(b, B, v_in_both,
753                                                  I->Bmin_exp, I->Gstride, ctx);
754 
755     fq_nmod_poly_gcd(g, a, b, ctx->fqctx);
756     _fq_nmod_mpoly_from_fq_nmod_poly_inflate(G, I->Gbits, g, v_in_both,
757                                                  I->Gmin_exp, I->Gstride, ctx);
758     if (Abar != NULL)
759     {
760         fq_nmod_poly_divrem(t, r, a, g, ctx->fqctx);
761         FLINT_ASSERT(fq_nmod_poly_is_zero(r, ctx->fqctx));
762         _fq_nmod_mpoly_from_fq_nmod_poly_inflate(Abar, I->Abarbits, t,
763                                    v_in_both, I->Abarmin_exp, I->Gstride, ctx);
764     }
765 
766     if (Bbar != NULL)
767     {
768         fq_nmod_poly_divrem(t, r, b, g, ctx->fqctx);
769         FLINT_ASSERT(fq_nmod_poly_is_zero(r, ctx->fqctx));
770         _fq_nmod_mpoly_from_fq_nmod_poly_inflate(Bbar, I->Bbarbits, t, v_in_both,
771                                               I->Bbarmin_exp, I->Gstride, ctx);
772     }
773 
774     fq_nmod_poly_clear(a, ctx->fqctx);
775     fq_nmod_poly_clear(b, ctx->fqctx);
776     fq_nmod_poly_clear(g, ctx->fqctx);
777     fq_nmod_poly_clear(t, ctx->fqctx);
778     fq_nmod_poly_clear(r, ctx->fqctx);
779 
780     return 1;
781 }
782 
783 /********* Assume B has length one when converted to univar format ***********/
_try_missing_var(fq_nmod_mpoly_t G,flint_bitcnt_t Gbits,fq_nmod_mpoly_t Abar,fq_nmod_mpoly_t Bbar,slong var,const fq_nmod_mpoly_t A,ulong Ashift,const fq_nmod_mpoly_t B,ulong Bshift,const fq_nmod_mpoly_ctx_t ctx)784 static int _try_missing_var(
785     fq_nmod_mpoly_t G, flint_bitcnt_t Gbits,
786     fq_nmod_mpoly_t Abar,
787     fq_nmod_mpoly_t Bbar,
788     slong var,
789     const fq_nmod_mpoly_t A, ulong Ashift,
790     const fq_nmod_mpoly_t B, ulong Bshift,
791     const fq_nmod_mpoly_ctx_t ctx)
792 {
793     int success;
794     fq_nmod_mpoly_univar_t Au;
795 
796     fq_nmod_mpoly_univar_init(Au, ctx);
797 
798 #if FLINT_WANT_ASSERT
799     fq_nmod_mpoly_to_univar(Au, B, var, ctx);
800     FLINT_ASSERT(Au->length == 1);
801 #endif
802     fq_nmod_mpoly_to_univar(Au, A, var, ctx);
803 
804     fq_nmod_mpoly_univar_fit_length(Au, Au->length + 1, ctx);
805     fq_nmod_mpoly_set(Au->coeffs + Au->length, B, ctx);
806     Au->length++;
807 
808     if (Abar == NULL && Bbar == NULL)
809     {
810         success = _fq_nmod_mpoly_vec_content_mpoly(G, Au->coeffs, Au->length, ctx);
811         if (!success)
812             goto cleanup;
813 
814         fq_nmod_mpoly_repack_bits_inplace(G, Gbits, ctx);
815         _mpoly_gen_shift_left(G->exps, G->bits, G->length,
816                                    var, FLINT_MIN(Ashift, Bshift), ctx->minfo);
817     }
818     else
819     {
820         fq_nmod_mpoly_t tG, tAbar, tBbar;
821 
822         fq_nmod_mpoly_init(tG, ctx);
823         fq_nmod_mpoly_init(tAbar, ctx);
824         fq_nmod_mpoly_init(tBbar, ctx);
825 
826         success = _fq_nmod_mpoly_vec_content_mpoly(tG, Au->coeffs, Au->length, ctx);
827         if (!success)
828             goto cleanup;
829 
830         fq_nmod_mpoly_repack_bits_inplace(tG, Gbits, ctx);
831         _mpoly_gen_shift_left(tG->exps, tG->bits, tG->length,
832                                    var, FLINT_MIN(Ashift, Bshift), ctx->minfo);
833 
834         if (Abar != NULL)
835         {
836             success = fq_nmod_mpoly_divides(tAbar, A, tG, ctx);
837             FLINT_ASSERT(success);
838         }
839 
840         if (Bbar != NULL)
841         {
842             success = fq_nmod_mpoly_divides(tBbar, B, tG, ctx);
843             FLINT_ASSERT(success);
844         }
845 
846         fq_nmod_mpoly_swap(G, tG, ctx);
847 
848         if (Abar != NULL)
849             fq_nmod_mpoly_swap(Abar, tAbar, ctx);
850 
851         if (Bbar != NULL)
852             fq_nmod_mpoly_swap(Bbar, tBbar, ctx);
853 
854         fq_nmod_mpoly_clear(tG, ctx);
855         fq_nmod_mpoly_clear(tAbar, ctx);
856         fq_nmod_mpoly_clear(tBbar, ctx);
857     }
858 
859     success = 1;
860 
861 cleanup:
862 
863     fq_nmod_mpoly_univar_clear(Au, ctx);
864 
865     return success;
866 }
867 
868 
869 /************************ See if B divides A ********************************/
_try_divides(fq_nmod_mpoly_t G,fq_nmod_mpoly_t Abar,fq_nmod_mpoly_t Bbar,const fq_nmod_mpoly_t A,const fq_nmod_mpoly_t BB,const fq_nmod_mpoly_ctx_t ctx)870 static int _try_divides(
871     fq_nmod_mpoly_t G,
872     fq_nmod_mpoly_t Abar,
873     fq_nmod_mpoly_t Bbar,
874     const fq_nmod_mpoly_t A,
875     const fq_nmod_mpoly_t BB,
876     const fq_nmod_mpoly_ctx_t ctx)
877 {
878     int success = 0;
879     fq_nmod_mpoly_t Q, B, M;
880 
881     fq_nmod_mpoly_init(Q, ctx);
882     fq_nmod_mpoly_init(B, ctx);
883     fq_nmod_mpoly_init(M, ctx);
884 
885     /* BB = M*B */
886     fq_nmod_mpoly_term_content(M, BB, ctx);
887     fq_nmod_mpoly_divides(B, BB, M, ctx);
888 
889     if (fq_nmod_mpoly_divides(Q, A, B, ctx))
890     {
891         /* gcd(Q*B, M*B) */
892         _do_monomial_gcd(G, Abar, Bbar, Q, M, ctx);
893         fq_nmod_mpoly_mul(G, G, B, ctx);
894         success = 1;
895     }
896 
897     fq_nmod_mpoly_clear(Q, ctx);
898     fq_nmod_mpoly_clear(B, ctx);
899     fq_nmod_mpoly_clear(M, ctx);
900 
901     return success;
902 }
903 
904 /********************** Hit A and B with zippel ******************************/
_try_zippel(fq_nmod_mpoly_t G,fq_nmod_mpoly_t Abar,fq_nmod_mpoly_t Bbar,const fq_nmod_mpoly_t A,const fq_nmod_mpoly_t B,const mpoly_gcd_info_t I,const fq_nmod_mpoly_ctx_t ctx)905 static int _try_zippel(
906     fq_nmod_mpoly_t G,
907     fq_nmod_mpoly_t Abar,
908     fq_nmod_mpoly_t Bbar,
909     const fq_nmod_mpoly_t A,
910     const fq_nmod_mpoly_t B,
911     const mpoly_gcd_info_t I,
912     const fq_nmod_mpoly_ctx_t ctx)
913 {
914     slong m = I->mvars;
915     int success;
916     flint_bitcnt_t wbits;
917     flint_rand_t randstate;
918     fq_nmod_mpoly_ctx_t uctx;
919     fq_nmod_mpolyu_t Au, Bu, Gu, Abaru, Bbaru;
920     fq_nmod_mpoly_t Ac, Bc, Gc, Abarc, Bbarc;
921 
922     FLINT_ASSERT(A->bits <= FLINT_BITS);
923     FLINT_ASSERT(B->bits <= FLINT_BITS);
924 
925     if (!(I->can_use & MPOLY_GCD_USE_ZIPPEL))
926         return 0;
927 
928     FLINT_ASSERT(m >= WORD(2));
929     FLINT_ASSERT(A->length > 0);
930     FLINT_ASSERT(B->length > 0);
931 
932     flint_randinit(randstate);
933 
934     /* uctx is context for Fq[y_1,...,y_{m-1}]*/
935     fq_nmod_mpoly_ctx_init(uctx, m - 1, ORD_LEX, ctx->fqctx);
936 
937     wbits = FLINT_MAX(A->bits, B->bits);
938 
939     fq_nmod_mpolyu_init(Au, wbits, uctx);
940     fq_nmod_mpolyu_init(Bu, wbits, uctx);
941     fq_nmod_mpolyu_init(Gu, wbits, uctx);
942     fq_nmod_mpolyu_init(Abaru, wbits, uctx);
943     fq_nmod_mpolyu_init(Bbaru, wbits, uctx);
944     fq_nmod_mpoly_init3(Ac, 0, wbits, uctx);
945     fq_nmod_mpoly_init3(Bc, 0, wbits, uctx);
946     fq_nmod_mpoly_init3(Gc, 0, wbits, uctx);
947     fq_nmod_mpoly_init3(Abarc, 0, wbits, uctx);
948     fq_nmod_mpoly_init3(Bbarc, 0, wbits, uctx);
949 
950     fq_nmod_mpoly_to_mpolyu_perm_deflate(Au, uctx, A, ctx,
951                                       I->zippel_perm, I->Amin_exp, I->Gstride);
952     fq_nmod_mpoly_to_mpolyu_perm_deflate(Bu, uctx, B, ctx,
953                                       I->zippel_perm, I->Bmin_exp, I->Gstride);
954 
955     success = fq_nmod_mpolyu_content_mpoly(Ac, Au, uctx) &&
956               fq_nmod_mpolyu_content_mpoly(Bc, Bu, uctx);
957     if (!success)
958         goto cleanup;
959 
960     fq_nmod_mpolyu_divexact_mpoly_inplace(Au, Ac, uctx);
961     fq_nmod_mpolyu_divexact_mpoly_inplace(Bu, Bc, uctx);
962 
963     success = fq_nmod_mpolyu_gcdm_zippel(Gu, Abaru, Bbaru, Au, Bu,
964                                                               uctx, randstate);
965     if (!success)
966         goto cleanup;
967 
968     if (Abar == NULL && Bbar == NULL)
969     {
970         success = fq_nmod_mpoly_gcd(Gc, Ac, Bc, uctx);
971         if (!success)
972             goto cleanup;
973 
974         fq_nmod_mpoly_repack_bits_inplace(Gc, wbits, uctx);
975         fq_nmod_mpolyu_mul_mpoly_inplace(Gu, Gc, uctx);
976 
977         fq_nmod_mpoly_from_mpolyu_perm_inflate(G, I->Gbits, ctx, Gu, uctx,
978                                       I->zippel_perm, I->Gmin_exp, I->Gstride);
979     }
980     else
981     {
982         success = fq_nmod_mpoly_gcd_cofactors(Gc, Abarc, Bbarc, Ac, Bc, uctx);
983         if (!success)
984             goto cleanup;
985 
986         fq_nmod_mpoly_repack_bits_inplace(Gc, wbits, uctx);
987         fq_nmod_mpoly_repack_bits_inplace(Abarc, wbits, uctx);
988         fq_nmod_mpoly_repack_bits_inplace(Bbarc, wbits, uctx);
989 
990         fq_nmod_mpolyu_mul_mpoly_inplace(Gu, Gc, uctx);
991         fq_nmod_mpolyu_mul_mpoly_inplace(Abaru, Abarc, uctx);
992         fq_nmod_mpolyu_mul_mpoly_inplace(Bbaru, Bbarc, uctx);
993 
994         fq_nmod_mpoly_from_mpolyu_perm_inflate(G, I->Gbits, ctx, Gu, uctx,
995                                      I->zippel_perm, I->Gmin_exp, I->Gstride);
996 
997         if (Abar != NULL)
998             fq_nmod_mpoly_from_mpolyu_perm_inflate(Abar, I->Abarbits, ctx,
999                       Abaru, uctx, I->zippel_perm, I->Abarmin_exp, I->Gstride);
1000 
1001         if (Bbar != NULL)
1002             fq_nmod_mpoly_from_mpolyu_perm_inflate(Bbar, I->Bbarbits, ctx,
1003                       Bbaru, uctx, I->zippel_perm, I->Bbarmin_exp, I->Gstride);
1004     }
1005 
1006     success = 1;
1007 
1008 cleanup:
1009 
1010     fq_nmod_mpolyu_clear(Au, uctx);
1011     fq_nmod_mpolyu_clear(Bu, uctx);
1012     fq_nmod_mpolyu_clear(Gu, uctx);
1013     fq_nmod_mpolyu_clear(Abaru, uctx);
1014     fq_nmod_mpolyu_clear(Bbaru, uctx);
1015     fq_nmod_mpoly_clear(Ac, uctx);
1016     fq_nmod_mpoly_clear(Bc, uctx);
1017     fq_nmod_mpoly_clear(Gc, uctx);
1018     fq_nmod_mpoly_clear(Abarc, uctx);
1019     fq_nmod_mpoly_clear(Bbarc, uctx);
1020 
1021     fq_nmod_mpoly_ctx_clear(uctx);
1022 
1023     flint_randclear(randstate);
1024 
1025     return success;
1026 }
1027 
_try_zippel2(fq_nmod_mpoly_t G,fq_nmod_mpoly_t Abar,fq_nmod_mpoly_t Bbar,const fq_nmod_mpoly_t A,const fq_nmod_mpoly_t B,const mpoly_gcd_info_t I,const fq_nmod_mpoly_ctx_t ctx)1028 static int _try_zippel2(
1029     fq_nmod_mpoly_t G,
1030     fq_nmod_mpoly_t Abar,
1031     fq_nmod_mpoly_t Bbar,
1032     const fq_nmod_mpoly_t A,
1033     const fq_nmod_mpoly_t B,
1034     const mpoly_gcd_info_t I,
1035     const fq_nmod_mpoly_ctx_t ctx)
1036 {
1037     slong i, k;
1038     slong m = I->mvars;
1039     int success;
1040     flint_bitcnt_t wbits;
1041     fq_nmod_mpoly_ctx_t lctx;
1042     fq_nmod_mpoly_t Al, Bl, Gl, Abarl, Bbarl;
1043     fq_nmod_mpoly_t Al_lc, Bl_lc, Ac, Bc, Gc, Abarc, Bbarc, Gamma;
1044     slong * tmp, * Gl_degs, * Al_degs, * Bl_degs, * Gamma_degs, * Gguess;
1045     slong max_degree;
1046 
1047     FLINT_ASSERT(A->bits <= FLINT_BITS);
1048     FLINT_ASSERT(B->bits <= FLINT_BITS);
1049     FLINT_ASSERT(A->length > 0);
1050     FLINT_ASSERT(B->length > 0);
1051 
1052     if (!(I->can_use & MPOLY_GCD_USE_ZIPPEL2))
1053         return 0;
1054 
1055     FLINT_ASSERT(m >= WORD(3));
1056 
1057     tmp = FLINT_ARRAY_ALLOC(5*m, slong);
1058     Al_degs   = tmp + 1*m;
1059     Bl_degs   = tmp + 2*m;
1060     Gl_degs   = tmp + 3*m;
1061     Gamma_degs = tmp + 4*m;
1062 
1063     fq_nmod_mpoly_ctx_init(lctx, m, ORD_LEX, ctx->fqctx);
1064 
1065     max_degree = 0;
1066     for (i = 0; i < m; i++)
1067     {
1068         k = I->zippel2_perm[i];
1069 
1070         Gl_degs[i] = I->Gdeflate_deg_bound[k];
1071 
1072         Al_degs[i] = I->Adeflate_deg[k];
1073         max_degree = FLINT_MAX(max_degree, Al_degs[i]);
1074 
1075         Bl_degs[i] = I->Bdeflate_deg[k];
1076         max_degree = FLINT_MAX(max_degree, Bl_degs[i]);
1077     }
1078 
1079     wbits = 1 + FLINT_BIT_COUNT(max_degree);
1080     wbits = mpoly_fix_bits(wbits, lctx->minfo);
1081     FLINT_ASSERT(wbits <= FLINT_BITS);
1082 
1083     fq_nmod_mpoly_init3(Al, 0, wbits, lctx);
1084     fq_nmod_mpoly_init3(Bl, 0, wbits, lctx);
1085     fq_nmod_mpoly_init3(Gl, 0, wbits, lctx);
1086     fq_nmod_mpoly_init3(Abarl, 0, wbits, lctx);
1087     fq_nmod_mpoly_init3(Bbarl, 0, wbits, lctx);
1088     fq_nmod_mpoly_init3(Ac, 0, wbits, lctx);
1089     fq_nmod_mpoly_init3(Bc, 0, wbits, lctx);
1090     fq_nmod_mpoly_init3(Gc, 0, wbits, lctx);
1091     fq_nmod_mpoly_init3(Abarc, 0, wbits, lctx);
1092     fq_nmod_mpoly_init3(Bbarc, 0, wbits, lctx);
1093     fq_nmod_mpoly_init3(Gamma, 0, wbits, lctx);
1094     fq_nmod_mpoly_init3(Al_lc, 0, wbits, lctx);
1095     fq_nmod_mpoly_init3(Bl_lc, 0, wbits, lctx);
1096 
1097     fq_nmod_mpoly_to_mpolyl_perm_deflate(Al, lctx, A, ctx,
1098                                      I->zippel2_perm, I->Amin_exp, I->Gstride);
1099     fq_nmod_mpoly_to_mpolyl_perm_deflate(Bl, lctx, B, ctx,
1100                                      I->zippel2_perm, I->Bmin_exp, I->Gstride);
1101 
1102     success = fq_nmod_mpolyl_content(Ac, Al, 2, lctx) &&
1103               fq_nmod_mpolyl_content(Bc, Bl, 2, lctx);
1104     if (!success)
1105         goto cleanup;
1106 
1107     if (Abar == NULL && Bbar == NULL)
1108         success = fq_nmod_mpoly_gcd(Gc, Ac, Bc, lctx);
1109     else
1110         success = fq_nmod_mpoly_gcd_cofactors(Gc, Abarc, Bbarc, Ac, Bc, lctx);
1111     if (!success)
1112         goto cleanup;
1113 
1114     fq_nmod_mpoly_degrees_si(tmp, Ac, lctx);
1115     for (i = 0; i < m; i++)
1116         Al_degs[i] -= tmp[i];
1117 
1118     if (!fq_nmod_mpoly_is_one(Ac, lctx))
1119     {
1120         success = fq_nmod_mpoly_divides(Al, Al, Ac, lctx);
1121         FLINT_ASSERT(success);
1122     }
1123 
1124     fq_nmod_mpoly_degrees_si(tmp, Bc, lctx);
1125     for (i = 0; i < m; i++)
1126         Bl_degs[i] -= tmp[i];
1127 
1128     if (!fq_nmod_mpoly_is_one(Bc, lctx))
1129     {
1130         success = fq_nmod_mpoly_divides(Bl, Bl, Bc, lctx);
1131         FLINT_ASSERT(success);
1132     }
1133 
1134     fq_nmod_mpoly_degrees_si(tmp, Gc, lctx);
1135     for (i = 0; i < m; i++)
1136         Gl_degs[i] -= tmp[i];
1137 
1138     fq_nmod_mpoly_repack_bits_inplace(Al, wbits, lctx);
1139     fq_nmod_mpoly_repack_bits_inplace(Bl, wbits, lctx);
1140     fq_nmod_mpolyl_lead_coeff(Al_lc, Al, 2, lctx);
1141     fq_nmod_mpolyl_lead_coeff(Bl_lc, Bl, 2, lctx);
1142     success = fq_nmod_mpoly_gcd(Gamma, Al_lc, Bl_lc, lctx);
1143     if (!success)
1144         goto cleanup;
1145     fq_nmod_mpoly_repack_bits_inplace(Gamma, wbits, lctx);
1146 
1147     fq_nmod_mpoly_degrees_si(Gamma_degs, Gamma, lctx);
1148 
1149     Gguess = I->Gdeflate_deg_bounds_are_nice ? Gl_degs : NULL;
1150 
1151     success = fq_nmod_mpolyl_gcd_zippel_smprime(Gl, Gguess, Abarl, Bbarl,
1152                             Al, Al_degs, Bl, Bl_degs, Gamma, Gamma_degs, lctx);
1153     if (!success)
1154     {
1155         success = fq_nmod_mpolyl_gcd_zippel_lgprime(Gl, Gguess, Abarl, Bbarl,
1156                             Al, Al_degs, Bl, Bl_degs, Gamma, Gamma_degs, lctx);
1157         if (!success)
1158             goto cleanup;
1159     }
1160 
1161     if (!fq_nmod_mpoly_is_one(Gc, lctx))
1162         fq_nmod_mpoly_mul(Gl, Gl, Gc, lctx);
1163     fq_nmod_mpoly_from_mpolyl_perm_inflate(G, I->Gbits, ctx, Gl, lctx,
1164                                      I->zippel2_perm, I->Gmin_exp, I->Gstride);
1165     if (Abar != NULL)
1166     {
1167         fq_nmod_mpoly_mul(Abarl, Abarl, Abarc, lctx);
1168         fq_nmod_mpoly_from_mpolyl_perm_inflate(Abar, I->Abarbits, ctx, Abarl, lctx,
1169                                   I->zippel2_perm, I->Abarmin_exp, I->Gstride);
1170     }
1171 
1172     if (Bbar != NULL)
1173     {
1174         fq_nmod_mpoly_mul(Bbarl, Bbarl, Bbarc, lctx);
1175         fq_nmod_mpoly_from_mpolyl_perm_inflate(Bbar, I->Bbarbits, ctx, Bbarl, lctx,
1176                                   I->zippel2_perm, I->Bbarmin_exp, I->Gstride);
1177     }
1178 
1179     success = 1;
1180 
1181 cleanup:
1182 
1183     fq_nmod_mpoly_clear(Al, lctx);
1184     fq_nmod_mpoly_clear(Bl, lctx);
1185     fq_nmod_mpoly_clear(Gl, lctx);
1186     fq_nmod_mpoly_clear(Abarl, lctx);
1187     fq_nmod_mpoly_clear(Bbarl, lctx);
1188     fq_nmod_mpoly_clear(Ac, lctx);
1189     fq_nmod_mpoly_clear(Bc, lctx);
1190     fq_nmod_mpoly_clear(Gc, lctx);
1191     fq_nmod_mpoly_clear(Abarc, lctx);
1192     fq_nmod_mpoly_clear(Bbarc, lctx);
1193     fq_nmod_mpoly_clear(Gamma, lctx);
1194     fq_nmod_mpoly_clear(Al_lc, lctx);
1195     fq_nmod_mpoly_clear(Bl_lc, lctx);
1196 
1197     fq_nmod_mpoly_ctx_clear(lctx);
1198 
1199     flint_free(tmp);
1200 
1201     return success;
1202 }
1203 
1204 /******************** Hit A and B with hensel lifting ************************/
_try_hensel(fq_nmod_mpoly_t G,fq_nmod_mpoly_t Abar,fq_nmod_mpoly_t Bbar,const fq_nmod_mpoly_t A,const fq_nmod_mpoly_t B,const mpoly_gcd_info_t I,const fq_nmod_mpoly_ctx_t ctx)1205 static int _try_hensel(
1206     fq_nmod_mpoly_t G,
1207     fq_nmod_mpoly_t Abar,
1208     fq_nmod_mpoly_t Bbar,
1209     const fq_nmod_mpoly_t A,
1210     const fq_nmod_mpoly_t B,
1211     const mpoly_gcd_info_t I,
1212     const fq_nmod_mpoly_ctx_t ctx)
1213 {
1214     slong i, k;
1215     slong m = I->mvars;
1216     int success;
1217     flint_bitcnt_t wbits;
1218     fq_nmod_mpoly_ctx_t lctx;
1219     fq_nmod_mpoly_t Al, Bl, Gl, Abarl, Bbarl;
1220     fq_nmod_mpoly_t Ac, Bc, Gc, Abarc, Bbarc;
1221     slong max_deg;
1222 
1223     FLINT_ASSERT(A->bits <= FLINT_BITS);
1224     FLINT_ASSERT(B->bits <= FLINT_BITS);
1225     FLINT_ASSERT(A->length > 0);
1226     FLINT_ASSERT(B->length > 0);
1227 
1228     if (!(I->can_use & MPOLY_GCD_USE_HENSEL))
1229         return 0;
1230 
1231     FLINT_ASSERT(m >= WORD(2));
1232 
1233     fq_nmod_mpoly_ctx_init(lctx, m, ORD_LEX, ctx->fqctx);
1234 
1235     max_deg = 0;
1236     for (i = 0; i < m; i++)
1237     {
1238         k = I->hensel_perm[i];
1239         max_deg = FLINT_MAX(max_deg, I->Adeflate_deg[k]);
1240         max_deg = FLINT_MAX(max_deg, I->Bdeflate_deg[k]);
1241     }
1242 
1243     wbits = 1 + FLINT_BIT_COUNT(max_deg);
1244     wbits = mpoly_fix_bits(wbits, lctx->minfo);
1245     FLINT_ASSERT(wbits <= FLINT_BITS);
1246 
1247     fq_nmod_mpoly_init3(Al, 0, wbits, lctx);
1248     fq_nmod_mpoly_init3(Bl, 0, wbits, lctx);
1249     fq_nmod_mpoly_init3(Gl, 0, wbits, lctx);
1250     fq_nmod_mpoly_init3(Abarl, 0, wbits, lctx);
1251     fq_nmod_mpoly_init3(Bbarl, 0, wbits, lctx);
1252     fq_nmod_mpoly_init3(Ac, 0, wbits, lctx);
1253     fq_nmod_mpoly_init3(Bc, 0, wbits, lctx);
1254     fq_nmod_mpoly_init3(Gc, 0, wbits, lctx);
1255     fq_nmod_mpoly_init3(Abarc, 0, wbits, lctx);
1256     fq_nmod_mpoly_init3(Bbarc, 0, wbits, lctx);
1257 
1258     fq_nmod_mpoly_to_mpolyl_perm_deflate(Al, lctx, A, ctx,
1259                                       I->hensel_perm, I->Amin_exp, I->Gstride);
1260     fq_nmod_mpoly_to_mpolyl_perm_deflate(Bl, lctx, B, ctx,
1261                                       I->hensel_perm, I->Bmin_exp, I->Gstride);
1262 
1263     success = fq_nmod_mpolyl_content(Ac, Al, 1, lctx) &&
1264               fq_nmod_mpolyl_content(Bc, Bl, 1, lctx);
1265     if (!success)
1266         goto cleanup;
1267 
1268     if (Abar == NULL && Bbar == NULL)
1269         success = fq_nmod_mpoly_gcd(Gc, Ac, Bc, lctx);
1270     else
1271         success = fq_nmod_mpoly_gcd_cofactors(Gc, Abarc, Bbarc, Ac, Bc, lctx);
1272     if (!success)
1273         goto cleanup;
1274 
1275     success = fq_nmod_mpoly_divides(Al, Al, Ac, lctx);
1276     FLINT_ASSERT(success);
1277 
1278     success = fq_nmod_mpoly_divides(Bl, Bl, Bc, lctx);
1279     FLINT_ASSERT(success);
1280 
1281     fq_nmod_mpoly_repack_bits_inplace(Al, wbits, lctx);
1282     fq_nmod_mpoly_repack_bits_inplace(Bl, wbits, lctx);
1283 
1284     max_deg = I->Gdeflate_deg_bound[I->hensel_perm[0]];
1285     success = fq_nmod_mpolyl_gcd_hensel_smprime(Gl, max_deg, Abarl, Bbarl, Al, Bl, lctx);
1286     if (!success)
1287         goto cleanup;
1288 
1289     fq_nmod_mpoly_mul(Gl, Gl, Gc, lctx);
1290     fq_nmod_mpoly_from_mpolyl_perm_inflate(G, I->Gbits, ctx, Gl, lctx,
1291                                       I->hensel_perm, I->Gmin_exp, I->Gstride);
1292     if (Abar != NULL)
1293     {
1294         fq_nmod_mpoly_mul(Abarl, Abarl, Abarc, lctx);
1295         fq_nmod_mpoly_from_mpolyl_perm_inflate(Abar, I->Abarbits, ctx, Abarl, lctx,
1296                                    I->hensel_perm, I->Abarmin_exp, I->Gstride);
1297     }
1298 
1299     if (Bbar != NULL)
1300     {
1301         fq_nmod_mpoly_mul(Bbarl, Bbarl, Bbarc, lctx);
1302         fq_nmod_mpoly_from_mpolyl_perm_inflate(Bbar, I->Bbarbits, ctx, Bbarl, lctx,
1303                                    I->hensel_perm, I->Bbarmin_exp, I->Gstride);
1304     }
1305 
1306     success = 1;
1307 
1308 cleanup:
1309 
1310     fq_nmod_mpoly_clear(Al, lctx);
1311     fq_nmod_mpoly_clear(Bl, lctx);
1312     fq_nmod_mpoly_clear(Gl, lctx);
1313     fq_nmod_mpoly_clear(Abarl, lctx);
1314     fq_nmod_mpoly_clear(Bbarl, lctx);
1315     fq_nmod_mpoly_clear(Ac, lctx);
1316     fq_nmod_mpoly_clear(Bc, lctx);
1317     fq_nmod_mpoly_clear(Gc, lctx);
1318     fq_nmod_mpoly_clear(Abarc, lctx);
1319     fq_nmod_mpoly_clear(Bbarc, lctx);
1320 
1321     fq_nmod_mpoly_ctx_clear(lctx);
1322 
1323     return success;
1324 }
1325 
1326 
1327 /*********************** Hit A and B with brown ******************************/
_try_brown(fq_nmod_mpoly_t G,fq_nmod_mpoly_t Abar,fq_nmod_mpoly_t Bbar,const fq_nmod_mpoly_t A,const fq_nmod_mpoly_t B,mpoly_gcd_info_t I,const fq_nmod_mpoly_ctx_t ctx)1328 static int _try_brown(
1329     fq_nmod_mpoly_t G,
1330     fq_nmod_mpoly_t Abar,
1331     fq_nmod_mpoly_t Bbar,
1332     const fq_nmod_mpoly_t A,
1333     const fq_nmod_mpoly_t B,
1334     mpoly_gcd_info_t I,
1335     const fq_nmod_mpoly_ctx_t ctx)
1336 {
1337     int success;
1338     slong m = I->mvars;
1339     flint_bitcnt_t wbits;
1340     fq_nmod_mpoly_ctx_t nctx;
1341     fq_nmod_mpolyn_t An, Bn, Gn, Abarn, Bbarn;
1342 
1343     if (!(I->can_use & MPOLY_GCD_USE_BROWN))
1344         return 0;
1345 
1346     FLINT_ASSERT(m >= 2);
1347     FLINT_ASSERT(A->bits <= FLINT_BITS);
1348     FLINT_ASSERT(B->bits <= FLINT_BITS);
1349     FLINT_ASSERT(A->length > 0);
1350     FLINT_ASSERT(B->length > 0);
1351 
1352     wbits = FLINT_MAX(A->bits, B->bits);
1353 
1354     fq_nmod_mpoly_ctx_init(nctx, m, ORD_LEX, ctx->fqctx);
1355     fq_nmod_mpolyn_init(An, wbits, nctx);
1356     fq_nmod_mpolyn_init(Bn, wbits, nctx);
1357     fq_nmod_mpolyn_init(Gn, wbits, nctx);
1358     fq_nmod_mpolyn_init(Abarn, wbits, nctx);
1359     fq_nmod_mpolyn_init(Bbarn, wbits, nctx);
1360 
1361     fq_nmod_mpoly_to_mpolyn_perm_deflate(An, nctx, A, ctx,
1362                                        I->brown_perm, I->Amin_exp, I->Gstride);
1363     fq_nmod_mpoly_to_mpolyn_perm_deflate(Bn, nctx, B, ctx,
1364                                        I->brown_perm, I->Bmin_exp, I->Gstride);
1365 
1366     FLINT_ASSERT(An->bits == wbits);
1367     FLINT_ASSERT(Bn->bits == wbits);
1368     FLINT_ASSERT(An->length > 1);
1369     FLINT_ASSERT(Bn->length > 1);
1370 
1371     success = fq_nmod_mpolyn_gcd_brown_smprime(Gn, Abarn, Bbarn, An, Bn,
1372                                                                   m - 1, nctx);
1373     if (!success)
1374     {
1375         fq_nmod_mpoly_to_mpolyn_perm_deflate(An, nctx, A, ctx,
1376                                        I->brown_perm, I->Amin_exp, I->Gstride);
1377         fq_nmod_mpoly_to_mpolyn_perm_deflate(Bn, nctx, B, ctx,
1378                                        I->brown_perm, I->Bmin_exp, I->Gstride);
1379         success = fq_nmod_mpolyn_gcd_brown_lgprime(Gn, Abarn, Bbarn, An, Bn,
1380                                                                   m - 1, nctx);
1381     }
1382 
1383     if (!success)
1384         goto cleanup;
1385 
1386     fq_nmod_mpoly_from_mpolyn_perm_inflate(G, I->Gbits, ctx, Gn, nctx,
1387                                        I->brown_perm, I->Gmin_exp, I->Gstride);
1388 
1389     if (Abar != NULL)
1390         fq_nmod_mpoly_from_mpolyn_perm_inflate(Abar, I->Abarbits, ctx, Abarn, nctx,
1391                                     I->brown_perm, I->Abarmin_exp, I->Gstride);
1392 
1393     if (Bbar != NULL)
1394         fq_nmod_mpoly_from_mpolyn_perm_inflate(Bbar, I->Bbarbits, ctx, Bbarn, nctx,
1395                                     I->brown_perm, I->Bbarmin_exp, I->Gstride);
1396 
1397     success = 1;
1398 
1399 cleanup:
1400 
1401     fq_nmod_mpolyn_clear(An, nctx);
1402     fq_nmod_mpolyn_clear(Bn, nctx);
1403     fq_nmod_mpolyn_clear(Gn, nctx);
1404     fq_nmod_mpolyn_clear(Abarn, nctx);
1405     fq_nmod_mpolyn_clear(Bbarn, nctx);
1406     fq_nmod_mpoly_ctx_clear(nctx);
1407 
1408     return success;
1409 }
1410 
1411 
1412 /*
1413     Both A and B have to be packed into bits <= FLINT_BITS
1414     return is 1 for success, 0 for failure.
1415 */
_fq_nmod_mpoly_gcd_algo_small(fq_nmod_mpoly_t G,fq_nmod_mpoly_t Abar,fq_nmod_mpoly_t Bbar,const fq_nmod_mpoly_t A,const fq_nmod_mpoly_t B,const fq_nmod_mpoly_ctx_t ctx,unsigned int algo)1416 static int _fq_nmod_mpoly_gcd_algo_small(
1417     fq_nmod_mpoly_t G,
1418     fq_nmod_mpoly_t Abar, /* could be NULL */
1419     fq_nmod_mpoly_t Bbar, /* could be NULL */
1420     const fq_nmod_mpoly_t A,
1421     const fq_nmod_mpoly_t B,
1422     const fq_nmod_mpoly_ctx_t ctx,
1423     unsigned int algo)
1424 {
1425     int success;
1426     flint_bitcnt_t Gbits = FLINT_MIN(A->bits, B->bits);
1427     flint_bitcnt_t Abarbits = A->bits;
1428     flint_bitcnt_t Bbarbits = B->bits;
1429     slong v_in_both;
1430     slong v_in_either;
1431     slong v_in_A_only;
1432     slong v_in_B_only;
1433     slong j;
1434     slong nvars = ctx->minfo->nvars;
1435     mpoly_gcd_info_t I;
1436 #if FLINT_WANT_ASSERT
1437     fq_nmod_mpoly_t T, Asave, Bsave;
1438 #endif
1439 
1440     if (A->length == 1)
1441         return _do_monomial_gcd(G, Bbar, Abar, B, A, ctx);
1442     else if (B->length == 1)
1443         return _do_monomial_gcd(G, Abar, Bbar, A, B, ctx);
1444 
1445 #if FLINT_WANT_ASSERT
1446     fq_nmod_mpoly_init(T, ctx);
1447     fq_nmod_mpoly_init(Asave, ctx);
1448     fq_nmod_mpoly_init(Bsave, ctx);
1449     fq_nmod_mpoly_set(Asave, A, ctx);
1450     fq_nmod_mpoly_set(Bsave, B, ctx);
1451 #endif
1452 
1453     mpoly_gcd_info_init(I, nvars);
1454 
1455     /* entries of I are all now invalid */
1456 
1457     I->Gbits = Gbits;
1458     I->Abarbits = Abarbits;
1459     I->Bbarbits = Bbarbits;
1460 
1461     mpoly_gcd_info_limits(I->Amax_exp, I->Amin_exp, I->Alead_count,
1462                       I->Atail_count, A->exps, A->bits, A->length, ctx->minfo);
1463     mpoly_gcd_info_limits(I->Bmax_exp, I->Bmin_exp, I->Blead_count,
1464                       I->Btail_count, B->exps, B->bits, B->length, ctx->minfo);
1465 
1466     /* set ess(p) := p/term_content(p) */
1467 
1468     /* check if the cofactors could be monomials, i.e. ess(A) == ess(B) */
1469     for (j = 0; j < nvars; j++)
1470     {
1471         if (I->Amax_exp[j] - I->Amin_exp[j] != I->Bmax_exp[j] - I->Bmin_exp[j])
1472             goto skip_monomial_cofactors;
1473     }
1474 
1475     if (_try_monomial_cofactors(G, Abar, Bbar, A, B, ctx))
1476     {
1477         goto successful;
1478     }
1479 
1480 skip_monomial_cofactors:
1481 
1482     mpoly_gcd_info_stride(I->Gstride,
1483             A->exps, A->bits, A->length, I->Amax_exp, I->Amin_exp,
1484             B->exps, B->bits, B->length, I->Bmax_exp, I->Bmin_exp, ctx->minfo);
1485 
1486     for (j = 0; j < nvars; j++)
1487     {
1488         ulong t = I->Gstride[j];
1489 
1490         if (t == 0)
1491         {
1492             FLINT_ASSERT(I->Amax_exp[j] == I->Amin_exp[j] ||
1493                          I->Bmax_exp[j] == I->Bmin_exp[j]);
1494         }
1495         else
1496         {
1497             FLINT_ASSERT((I->Amax_exp[j] - I->Amin_exp[j]) % t == 0);
1498             FLINT_ASSERT((I->Bmax_exp[j] - I->Bmin_exp[j]) % t == 0);
1499         }
1500 
1501         I->Adeflate_deg[j] = t == 0 ? 0 : (I->Amax_exp[j] - I->Amin_exp[j])/t;
1502         I->Bdeflate_deg[j] = t == 0 ? 0 : (I->Bmax_exp[j] - I->Bmin_exp[j])/t;
1503 
1504         t = FLINT_MIN(I->Amin_exp[j], I->Bmin_exp[j]);
1505         I->Gmin_exp[j] = t;
1506         I->Abarmin_exp[j] = I->Amin_exp[j] - t;
1507         I->Bbarmin_exp[j] = I->Bmin_exp[j] - t;
1508     }
1509 
1510     /*
1511         The following are now valid:
1512             I->Amax_exp, I->Amin_exp, I->Alead_count, I->Atail_count,
1513             I->Bmax_exp, I->Bmin_exp, I->Blead_count, I->Btail_count,
1514             I->Gstride
1515             I->Adeflate_deg
1516             I->Bdeflate_deg
1517             I->Gmin_exp
1518     */
1519 
1520     /* check if ess(A) and ess(B) have a variable v_in_both in common */
1521     v_in_both = -WORD(1);
1522     for (j = 0; j < nvars; j++)
1523     {
1524         if (I->Amax_exp[j] > I->Amin_exp[j] && I->Bmax_exp[j] > I->Bmin_exp[j])
1525         {
1526             v_in_both = j;
1527             break;
1528         }
1529     }
1530     if (v_in_both == -WORD(1))
1531     {
1532         _do_trivial(G, Abar, Bbar, A, B, I, ctx);
1533         goto successful;
1534     }
1535 
1536     /* check if ess(A) and ess(B) depend on another variable v_in_either */
1537     FLINT_ASSERT(0 <= v_in_both && v_in_both < nvars);
1538 
1539     v_in_either = -WORD(1);
1540     for (j = 0; j < nvars; j++)
1541     {
1542         if (j == v_in_both)
1543             continue;
1544 
1545         if (I->Amax_exp[j] > I->Amin_exp[j] || I->Bmax_exp[j] > I->Bmin_exp[j])
1546         {
1547             v_in_either = j;
1548             break;
1549         }
1550     }
1551 
1552     if (v_in_either == -WORD(1))
1553     {
1554         _do_univar(G, Abar, Bbar, A, B, v_in_both, I, ctx);
1555         goto successful;
1556     }
1557 
1558     /* check if there is a variable in ess(A) that is not in ess(B) */
1559     v_in_A_only = -WORD(1);
1560     v_in_B_only = -WORD(1);
1561     for (j = 0; j < nvars; j++)
1562     {
1563         if (I->Amax_exp[j] > I->Amin_exp[j] && I->Bmax_exp[j] == I->Bmin_exp[j])
1564         {
1565             v_in_A_only = j;
1566             break;
1567         }
1568         if (I->Bmax_exp[j] > I->Bmin_exp[j] && I->Amax_exp[j] == I->Amin_exp[j])
1569         {
1570             v_in_B_only = j;
1571             break;
1572         }
1573     }
1574     if (v_in_A_only != -WORD(1))
1575     {
1576         success = _try_missing_var(G, I->Gbits, Abar, Bbar, v_in_A_only,
1577                                    A, I->Amin_exp[v_in_A_only],
1578                                    B, I->Bmin_exp[v_in_A_only],
1579                                    ctx);
1580         goto cleanup;
1581     }
1582     if (v_in_B_only != -WORD(1))
1583     {
1584         success = _try_missing_var(G, I->Gbits, Bbar, Abar, v_in_B_only,
1585                                    B, I->Bmin_exp[v_in_B_only],
1586                                    A, I->Amin_exp[v_in_B_only],
1587                                    ctx);
1588         goto cleanup;
1589     }
1590 
1591     /* all variable are now either
1592             missing from both ess(A) and ess(B), or
1593             present in both ess(A) and ess(B)
1594         and there are at least two in the latter case
1595     */
1596 
1597     mpoly_gcd_info_set_estimates_fq_nmod_mpoly(I, A, B, ctx);
1598     if (!I->Gdeflate_deg_bounds_are_nice)
1599         mpoly_gcd_info_set_estimates_fq_nmod_mpoly_lgprime(I, A, B, ctx);
1600     mpoly_gcd_info_set_perm(I, A->length, B->length, ctx->minfo);
1601 
1602     /* everything in I is valid now */
1603 
1604     /* check divisibility A/B and B/A */
1605     {
1606         int gcd_is_trivial = 1;
1607         int try_a = I->Gdeflate_deg_bounds_are_nice;
1608         int try_b = I->Gdeflate_deg_bounds_are_nice;
1609         for (j = 0; j < nvars; j++)
1610         {
1611             if (I->Gdeflate_deg_bound[j] != 0)
1612                 gcd_is_trivial = 0;
1613 
1614             if (I->Adeflate_deg[j] != I->Gdeflate_deg_bound[j])
1615                 try_a = 0;
1616 
1617             if (I->Bdeflate_deg[j] != I->Gdeflate_deg_bound[j])
1618                 try_b = 0;
1619         }
1620 
1621         if (gcd_is_trivial)
1622         {
1623             _do_trivial(G, Abar, Bbar, A, B, I, ctx);
1624             goto successful;
1625         }
1626 
1627         if (try_a && _try_divides(G, Bbar, Abar, B, A, ctx))
1628             goto successful;
1629 
1630         if (try_b && _try_divides(G, Abar, Bbar, A, B, ctx))
1631             goto successful;
1632     }
1633 
1634     if (I->mvars < 3)
1635     {
1636         mpoly_gcd_info_measure_brown(I, A->length, B->length, ctx->minfo);
1637         mpoly_gcd_info_measure_hensel(I, A->length, B->length, ctx->minfo);
1638 
1639         algo &= (MPOLY_GCD_USE_BROWN | MPOLY_GCD_USE_HENSEL);
1640 
1641         if (algo == MPOLY_GCD_USE_BROWN)
1642         {
1643             success = _try_brown(G, Abar, Bbar, A, B, I, ctx);
1644         }
1645         else if (algo == MPOLY_GCD_USE_HENSEL)
1646         {
1647             success = _try_hensel(G, Abar, Bbar, A, B, I, ctx);
1648         }
1649         else
1650         {
1651             if (I->Adensity + I->Bdensity > 0.05)
1652             {
1653                 success = _try_brown(G, Abar, Bbar, A, B, I, ctx) ||
1654                           _try_hensel(G, Abar, Bbar, A, B, I, ctx);
1655             }
1656             else
1657             {
1658                 success = _try_hensel(G, Abar, Bbar, A, B, I, ctx) ||
1659                           _try_brown(G, Abar, Bbar, A, B, I, ctx);
1660             }
1661         }
1662 
1663         goto cleanup;
1664     }
1665     else if (algo == MPOLY_GCD_USE_HENSEL)
1666     {
1667         mpoly_gcd_info_measure_hensel(I, A->length, B->length, ctx->minfo);
1668         success = _try_hensel(G, Abar, Bbar, A, B, I, ctx);
1669         goto cleanup;
1670     }
1671     else if (algo == MPOLY_GCD_USE_BROWN)
1672     {
1673         mpoly_gcd_info_measure_brown(I, A->length, B->length, ctx->minfo);
1674         success = _try_brown(G, Abar, Bbar, A, B, I, ctx);
1675         goto cleanup;
1676     }
1677     else if (algo == MPOLY_GCD_USE_ZIPPEL)
1678     {
1679         mpoly_gcd_info_measure_zippel(I, A->length, B->length, ctx->minfo);
1680         success = _try_zippel(G, Abar, Bbar, A, B, I, ctx);
1681         goto cleanup;
1682     }
1683     else if (algo == MPOLY_GCD_USE_ZIPPEL2)
1684     {
1685         mpoly_gcd_info_measure_zippel2(I, A->length, B->length, ctx->minfo);
1686         success = _try_zippel2(G, Abar, Bbar, A, B, I, ctx);
1687         goto cleanup;
1688     }
1689     else
1690     {
1691         slong k;
1692         double density = I->Adensity + I->Bdensity;
1693 
1694         /*
1695             mpoly gcd case.
1696             Only rule is that measure_X must be called before
1697                 try_X is called or I->X_perm is accessed.
1698         */
1699 
1700         mpoly_gcd_info_measure_hensel(I, A->length, B->length, ctx->minfo);
1701         mpoly_gcd_info_measure_brown(I, A->length, B->length, ctx->minfo);
1702         mpoly_gcd_info_measure_zippel(I, A->length, B->length, ctx->minfo);
1703         mpoly_gcd_info_measure_zippel2(I, A->length, B->length, ctx->minfo);
1704 
1705         if (density > 0.08)
1706         {
1707             if (_try_brown(G, Abar, Bbar, A, B, I, ctx))
1708                 goto successful;
1709         }
1710 
1711         if (I->Adeflate_tdeg > 0 && I->Bdeflate_tdeg > 0)
1712         {
1713             fmpz_t x;
1714             double tdensity;
1715             fmpz_init(x);
1716             fmpz_bin_uiui(x, (ulong)I->Adeflate_tdeg + I->mvars, I->mvars);
1717             tdensity = A->length/fmpz_get_d(x);
1718             fmpz_bin_uiui(x, (ulong)I->Bdeflate_tdeg + I->mvars, I->mvars);
1719             tdensity += B->length/fmpz_get_d(x);
1720             density = FLINT_MAX(density, tdensity);
1721             fmpz_clear(x);
1722         }
1723 
1724         if (density > 0.05)
1725         {
1726             if (_try_hensel(G, Abar, Bbar, A, B, I, ctx))
1727                 goto successful;
1728         }
1729 
1730         k = I->zippel2_perm[1];
1731         k = FLINT_MAX(I->Adeflate_deg[k], I->Bdeflate_deg[k]);
1732         if ((A->length + B->length)/64 < k)
1733         {
1734             if (_try_zippel(G, Abar, Bbar, A, B, I, ctx))
1735                 goto successful;
1736             if (_try_zippel2(G, Abar, Bbar, A, B, I, ctx))
1737                 goto successful;
1738         }
1739         else
1740         {
1741             if (_try_zippel2(G, Abar, Bbar, A, B, I, ctx))
1742                 goto successful;
1743             if (_try_zippel(G, Abar, Bbar, A, B, I, ctx))
1744                 goto successful;
1745         }
1746 
1747         success = _try_hensel(G, Abar, Bbar, A, B, I, ctx) ||
1748                   _try_brown(G, Abar, Bbar, A, B, I, ctx);
1749         goto cleanup;
1750     }
1751 
1752     success = 0;
1753     goto cleanup;
1754 
1755 successful:
1756 
1757     success = 1;
1758 
1759 cleanup:
1760 
1761     mpoly_gcd_info_clear(I);
1762 
1763     if (success)
1764     {
1765         FLINT_ASSERT(G->length > 0);
1766 
1767         if (!n_fq_is_one(G->coeffs + 0, ctx->fqctx))
1768         {
1769             if (Abar != NULL)
1770                 fq_nmod_mpoly_scalar_mul_n_fq(Abar, Abar, G->coeffs + 0, ctx);
1771 
1772             if (Bbar != NULL)
1773                 fq_nmod_mpoly_scalar_mul_n_fq(Bbar, Bbar, G->coeffs + 0, ctx);
1774 
1775             fq_nmod_mpoly_make_monic(G, G, ctx);
1776         }
1777 
1778         FLINT_ASSERT(fq_nmod_mpoly_divides(T, Asave, G, ctx));
1779         FLINT_ASSERT(Abar == NULL || fq_nmod_mpoly_equal(T, Abar, ctx));
1780 
1781         FLINT_ASSERT(fq_nmod_mpoly_divides(T, Bsave, G, ctx));
1782         FLINT_ASSERT(Bbar == NULL || fq_nmod_mpoly_equal(T, Bbar, ctx));
1783     }
1784 
1785 #if FLINT_WANT_ASSERT
1786     fq_nmod_mpoly_clear(T, ctx);
1787     fq_nmod_mpoly_clear(Asave, ctx);
1788     fq_nmod_mpoly_clear(Bsave, ctx);
1789 #endif
1790 
1791     return success;
1792 }
1793 
1794 
1795 /*
1796     The gcd calculation is unusual.
1797     First see if both inputs fit into FLINT_BITS.
1798     Then, try deflation as a last resort.
1799 */
_fq_nmod_mpoly_gcd_algo_large(fq_nmod_mpoly_t G,fq_nmod_mpoly_t Abar,fq_nmod_mpoly_t Bbar,const fq_nmod_mpoly_t A,const fq_nmod_mpoly_t B,const fq_nmod_mpoly_ctx_t ctx,unsigned int algo)1800 static int _fq_nmod_mpoly_gcd_algo_large(
1801     fq_nmod_mpoly_t G,
1802     fq_nmod_mpoly_t Abar,
1803     fq_nmod_mpoly_t Bbar,
1804     const fq_nmod_mpoly_t A,
1805     const fq_nmod_mpoly_t B,
1806     const fq_nmod_mpoly_ctx_t ctx,
1807     unsigned int algo)
1808 {
1809     int success;
1810     slong k;
1811     fmpz * Ashift, * Astride;
1812     fmpz * Bshift, * Bstride;
1813     fmpz * Gshift, * Gstride;
1814     fq_nmod_mpoly_t Anew, Bnew;
1815     const fq_nmod_mpoly_struct * Ause, * Buse;
1816 
1817     if (A->length == 1)
1818         return _do_monomial_gcd(G, Bbar, Abar, B, A, ctx);
1819 
1820     if (B->length == 1)
1821         return _do_monomial_gcd(G, Abar, Bbar, A, B, ctx);
1822 
1823     if (_try_monomial_cofactors(G, Abar, Bbar, A, B, ctx))
1824         return 1;
1825 
1826     fq_nmod_mpoly_init(Anew, ctx);
1827     fq_nmod_mpoly_init(Bnew, ctx);
1828 
1829     Ause = A;
1830     if (A->bits > FLINT_BITS)
1831     {
1832        if (!fq_nmod_mpoly_repack_bits(Anew, A, FLINT_BITS, ctx))
1833             goto could_not_repack;
1834         Ause = Anew;
1835     }
1836 
1837     Buse = B;
1838     if (B->bits > FLINT_BITS)
1839     {
1840         if (!fq_nmod_mpoly_repack_bits(Bnew, B, FLINT_BITS, ctx))
1841             goto could_not_repack;
1842         Buse = Bnew;
1843     }
1844 
1845     success = _fq_nmod_mpoly_gcd_algo_small(G, Abar, Bbar, Ause, Buse, ctx, algo);
1846 
1847     goto cleanup;
1848 
1849 could_not_repack:
1850 
1851     /*
1852         One of A or B could not be repacked into FLINT_BITS. See if
1853         they both fit into FLINT_BITS after deflation.
1854     */
1855 
1856     Ashift  = _fmpz_vec_init(ctx->minfo->nvars);
1857     Astride = _fmpz_vec_init(ctx->minfo->nvars);
1858     Bshift  = _fmpz_vec_init(ctx->minfo->nvars);
1859     Bstride = _fmpz_vec_init(ctx->minfo->nvars);
1860     Gshift  = _fmpz_vec_init(ctx->minfo->nvars);
1861     Gstride = _fmpz_vec_init(ctx->minfo->nvars);
1862 
1863     fq_nmod_mpoly_deflation(Ashift, Astride, A, ctx);
1864     fq_nmod_mpoly_deflation(Bshift, Bstride, B, ctx);
1865     _fmpz_vec_min(Gshift, Ashift, Bshift, ctx->minfo->nvars);
1866     for (k = 0; k < ctx->minfo->nvars; k++)
1867     {
1868         fmpz_gcd(Gstride + k, Astride + k, Bstride + k);
1869     }
1870 
1871     fq_nmod_mpoly_deflate(Anew, A, Ashift, Gstride, ctx);
1872     if (Anew->bits > FLINT_BITS)
1873     {
1874         success = fq_nmod_mpoly_repack_bits(Anew, Anew, FLINT_BITS, ctx);
1875         if (!success)
1876             goto deflate_cleanup;
1877     }
1878 
1879     fq_nmod_mpoly_deflate(Bnew, B, Bshift, Gstride, ctx);
1880     if (Bnew->bits > FLINT_BITS)
1881     {
1882         success = fq_nmod_mpoly_repack_bits(Bnew, Bnew, FLINT_BITS, ctx);
1883         if (!success)
1884             goto deflate_cleanup;
1885     }
1886 
1887     success = _fq_nmod_mpoly_gcd_algo(G, Abar, Bbar, Anew, Bnew, ctx, algo);
1888     if (!success)
1889         goto deflate_cleanup;
1890 
1891     for (k = 0; k < ctx->minfo->nvars; k++)
1892     {
1893         fmpz_sub(Ashift + k, Ashift + k, Gshift + k);
1894         fmpz_sub(Bshift + k, Bshift + k, Gshift + k);
1895         FLINT_ASSERT(fmpz_sgn(Ashift + k) >= 0);
1896         FLINT_ASSERT(fmpz_sgn(Bshift + k) >= 0);
1897     }
1898 
1899     fq_nmod_mpoly_inflate(G, G, Gshift, Gstride, ctx);
1900     if (Abar != NULL)
1901         fq_nmod_mpoly_inflate(Abar, Abar, Ashift, Gstride, ctx);
1902     if (Bbar != NULL)
1903         fq_nmod_mpoly_inflate(Bbar, Bbar, Bshift, Gstride, ctx);
1904 
1905     FLINT_ASSERT(G->length > 0);
1906     if (!n_fq_is_one(G->coeffs + 0, ctx->fqctx))
1907     {
1908         if (Abar != NULL)
1909             fq_nmod_mpoly_scalar_mul_n_fq(Abar, Abar, G->coeffs + 0, ctx);
1910 
1911         if (Bbar != NULL)
1912             fq_nmod_mpoly_scalar_mul_n_fq(Bbar, Bbar, G->coeffs + 0, ctx);
1913 
1914         fq_nmod_mpoly_make_monic(G, G, ctx);
1915     }
1916     fq_nmod_mpoly_make_monic(G, G, ctx);
1917 
1918 deflate_cleanup:
1919 
1920     _fmpz_vec_clear(Ashift, ctx->minfo->nvars);
1921     _fmpz_vec_clear(Astride, ctx->minfo->nvars);
1922     _fmpz_vec_clear(Bshift, ctx->minfo->nvars);
1923     _fmpz_vec_clear(Bstride, ctx->minfo->nvars);
1924     _fmpz_vec_clear(Gshift, ctx->minfo->nvars);
1925     _fmpz_vec_clear(Gstride, ctx->minfo->nvars);
1926 
1927 cleanup:
1928 
1929     fq_nmod_mpoly_clear(Anew, ctx);
1930     fq_nmod_mpoly_clear(Bnew, ctx);
1931 
1932     return success;
1933 }
1934 
_fq_nmod_mpoly_gcd_algo(fq_nmod_mpoly_t G,fq_nmod_mpoly_t Abar,fq_nmod_mpoly_t Bbar,const fq_nmod_mpoly_t A,const fq_nmod_mpoly_t B,const fq_nmod_mpoly_ctx_t ctx,unsigned int algo)1935 int _fq_nmod_mpoly_gcd_algo(
1936     fq_nmod_mpoly_t G,
1937     fq_nmod_mpoly_t Abar,
1938     fq_nmod_mpoly_t Bbar,
1939     const fq_nmod_mpoly_t A,
1940     const fq_nmod_mpoly_t B,
1941     const fq_nmod_mpoly_ctx_t ctx,
1942     unsigned int algo)
1943 {
1944     FLINT_ASSERT(!fq_nmod_mpoly_is_zero(A, ctx));
1945     FLINT_ASSERT(!fq_nmod_mpoly_is_zero(B, ctx));
1946 
1947     if (A->bits <= FLINT_BITS && B->bits <= FLINT_BITS)
1948         return _fq_nmod_mpoly_gcd_algo_small(G, Abar, Bbar, A, B, ctx, algo);
1949     else
1950         return _fq_nmod_mpoly_gcd_algo_large(G, Abar, Bbar, A, B, ctx, algo);
1951 }
1952 
1953 
fq_nmod_mpoly_gcd(fq_nmod_mpoly_t G,const fq_nmod_mpoly_t A,const fq_nmod_mpoly_t B,const fq_nmod_mpoly_ctx_t ctx)1954 int fq_nmod_mpoly_gcd(
1955     fq_nmod_mpoly_t G,
1956     const fq_nmod_mpoly_t A,
1957     const fq_nmod_mpoly_t B,
1958     const fq_nmod_mpoly_ctx_t ctx)
1959 {
1960     if (fq_nmod_mpoly_is_zero(A, ctx))
1961     {
1962         if (fq_nmod_mpoly_is_zero(B, ctx))
1963             fq_nmod_mpoly_zero(G, ctx);
1964         else
1965             fq_nmod_mpoly_make_monic(G, B, ctx);
1966         return 1;
1967     }
1968 
1969     if (fq_nmod_mpoly_is_zero(B, ctx))
1970     {
1971         fq_nmod_mpoly_make_monic(G, A, ctx);
1972         return 1;
1973     }
1974 
1975     return _fq_nmod_mpoly_gcd_algo(G, NULL, NULL, A, B, ctx, MPOLY_GCD_USE_ALL);
1976 }
1977 
1978