1 /*
2     Copyright (C) 2018 - 2020 Daniel Schultz
3 
4     This file is part of FLINT.
5 
6     FLINT is free software: you can redistribute it and/or modify it under
7     the terms of the GNU Lesser General Public License (LGPL) as published
8     by the Free Software Foundation; either version 2.1 of the License, or
9     (at your option) any later version.  See <https://www.gnu.org/licenses/>.
10 */
11 
12 #include "n_poly.h"
13 #include "nmod_mpoly.h"
14 #include "fq_nmod_mpoly.h"
15 #include "nmod_mpoly_factor.h"
16 #include "fq_zech_poly.h"
17 
18 /*
19     For each j, set out[j] to the evaluation of A at x_i = alpha[i] (i != j)
20     i.e. if nvars = 3
21         out[0] = A(x, alpha[1], alpha[2])
22         out[1] = A(alpha[0], x, alpha[2])
23         out[2] = A(alpha[0], alpha[1], x)
24 
25     If ignore[j] is nonzero, then out[j] need not be calculated, probably
26     because we shouldn't calculate it in dense form.
27 */
nmod_mpoly_evals(slong * Atdeg,n_poly_struct * out,const int * ignore,const nmod_mpoly_t A,ulong * Amin_exp,ulong * Amax_exp,ulong * Astride,mp_limb_t * alpha,const nmod_mpoly_ctx_t ctx)28 static void nmod_mpoly_evals(
29     slong * Atdeg,  /* total degree of deflated A, or -1 for overflow */
30     n_poly_struct * out,
31     const int * ignore,
32     const nmod_mpoly_t A,
33     ulong * Amin_exp,
34     ulong * Amax_exp,
35     ulong * Astride,
36     mp_limb_t * alpha,
37     const nmod_mpoly_ctx_t ctx)
38 {
39     slong i, j;
40     slong nvars = ctx->minfo->nvars;
41     ulong mask = (-UWORD(1)) >> (FLINT_BITS - A->bits);
42     slong * offsets, * shifts;
43     slong N = mpoly_words_per_exp_sp(A->bits, ctx->minfo);
44     ulong * varexps;
45     ulong varexp;
46     slong total_degree, lo, hi;
47     mp_limb_t meval, t;
48     n_poly_struct * caches;
49 
50     offsets = FLINT_ARRAY_ALLOC(2*nvars, slong);
51     shifts = offsets + nvars;
52     varexps = FLINT_ARRAY_ALLOC(nvars, ulong);
53     caches = FLINT_ARRAY_ALLOC(3*nvars, n_poly_struct);
54     for (j = 0; j < nvars; j++)
55     {
56         n_poly_zero(out + j);
57         mpoly_gen_offset_shift_sp(offsets + j, shifts + j, j, A->bits, ctx->minfo);
58         n_poly_init(caches + 3*j + 0);
59         n_poly_init(caches + 3*j + 1);
60         n_poly_init(caches + 3*j + 2);
61         nmod_pow_cache_start(alpha[j], caches + 3*j + 0,
62                                            caches + 3*j + 1, caches + 3*j + 2);
63     }
64 
65     total_degree = 0;
66     for (i = 0; i < A->length; i++)
67     {
68         meval = A->coeffs[i];
69 
70         hi = lo = 0;
71         for (j = 0; j < nvars; j++)
72         {
73             varexp = ((A->exps + N*i)[offsets[j]]>>shifts[j])&mask;
74 
75             FLINT_ASSERT((Astride[j] == 0 && varexp == Amin_exp[j]) ||
76                                      (varexp - Amin_exp[j]) % Astride[j] == 0);
77 
78             varexps[j] = Astride[j] < 2 ? varexp - Amin_exp[j] :
79                                          (varexp - Amin_exp[j])/Astride[j];
80 
81             add_ssaaaa(hi, lo, hi, lo, 0, varexps[j]);
82 
83             t = nmod_pow_cache_mulpow_ui(meval, varexps[j], caches + 3*j + 0,
84                              caches + 3*j + 1, caches + 3*j + 2, ctx->mod);
85 
86             FLINT_ASSERT(t == nmod_mul(meval, nmod_pow_ui(alpha[j], varexps[j],
87                                                          ctx->mod), ctx->mod));
88             meval = t;
89         }
90 
91         if (hi == 0 && FLINT_SIGN_EXT(lo) == 0 && total_degree >= 0)
92             total_degree = FLINT_MAX(total_degree, lo);
93         else
94             total_degree = -1;
95 
96         for (j = 0; j < nvars; j++)
97         {
98             varexp = varexps[j];
99 
100             if (ignore[j])
101                 continue;
102 
103             n_poly_fit_length(out + j, varexp + 1);
104 
105             while (out[j].length <= varexp)
106             {
107                 out[j].coeffs[out[j].length] = 0;
108                 out[j].length++;
109             }
110 
111             t = nmod_pow_cache_mulpow_neg_ui(meval, varexp, caches + 3*j + 0,
112                                  caches + 3*j + 1, caches + 3*j + 2, ctx->mod);
113 
114             FLINT_ASSERT(t == nmod_mul(meval, nmod_pow_ui(nmod_inv(alpha[j],
115                                       ctx->mod), varexp, ctx->mod), ctx->mod));
116 
117             out[j].coeffs[varexp] = nmod_add(out[j].coeffs[varexp], t, ctx->mod);
118         }
119     }
120 
121     *Atdeg = total_degree;
122 
123     for (j = 0; j < nvars; j++)
124         _n_poly_normalise(out + j);
125 
126     for (j = 0; j < 3*nvars; j++)
127         n_poly_clear(caches + j);
128 
129     flint_free(offsets);
130     flint_free(varexps);
131     flint_free(caches);
132 
133 }
134 
135 
nmod_mpoly_evals_medprime(slong * Atdeg,fq_zech_poly_struct * out,const int * ignore,const nmod_mpoly_t A,ulong * Amin_exp,ulong * Amax_exp,ulong * Astride,const nmod_mpoly_ctx_t smctx,const fq_zech_struct * alphas,const fq_zech_ctx_t medctx)136 static void nmod_mpoly_evals_medprime(
137     slong * Atdeg,  /* total degree of deflated A, or -1 for overflow */
138     fq_zech_poly_struct * out,
139     const int * ignore,
140     const nmod_mpoly_t A,
141     ulong * Amin_exp,
142     ulong * Amax_exp,
143     ulong * Astride,
144     const nmod_mpoly_ctx_t smctx,
145     const fq_zech_struct * alphas,
146     const fq_zech_ctx_t medctx)
147 {
148     slong i, j;
149     slong nvars = smctx->minfo->nvars;
150     ulong mask = (-UWORD(1)) >> (FLINT_BITS - A->bits);
151     slong * offsets, * shifts;
152     slong N = mpoly_words_per_exp_sp(A->bits, smctx->minfo);
153     ulong * varexps;
154     ulong varexp, lo, hi;
155     slong total_degree;
156     fq_zech_t t1, meval;
157 
158     fq_zech_init(t1, medctx);
159     fq_zech_init(meval, medctx);
160 
161     offsets = FLINT_ARRAY_ALLOC(2*nvars, slong);
162     shifts = offsets + nvars;
163     varexps = FLINT_ARRAY_ALLOC(nvars, ulong);
164     for (j = 0; j < nvars; j++)
165     {
166         fq_zech_poly_zero(out + j, medctx);
167         mpoly_gen_offset_shift_sp(offsets + j, shifts + j, j, A->bits,
168                                                                  smctx->minfo);
169     }
170 
171     total_degree = 0;
172     for (i = 0; i < A->length; i++)
173     {
174         fq_zech_set_ui(meval, A->coeffs[i], medctx);
175 
176         lo = hi = 0;
177         for (j = 0; j < nvars; j++)
178         {
179             varexp = ((A->exps + N*i)[offsets[j]]>>shifts[j])&mask;
180 
181             FLINT_ASSERT((Astride[j] == 0 && varexp == Amin_exp[j]) ||
182                                      (varexp - Amin_exp[j]) % Astride[j] == 0);
183 
184             varexps[j] = Astride[j] < 2 ? varexp - Amin_exp[j] :
185                                          (varexp - Amin_exp[j])/Astride[j];
186 
187             add_ssaaaa(hi, lo, hi, lo, 0, varexps[j]);
188 
189             fq_zech_pow_ui(t1, alphas + j, varexps[j], medctx);
190             fq_zech_mul(meval, meval, t1, medctx);
191         }
192 
193         if (hi == 0 && FLINT_SIGN_EXT(lo) == 0 && total_degree >= 0)
194             total_degree = FLINT_MAX(total_degree, lo);
195         else
196             total_degree = -1;
197 
198         for (j = 0; j < nvars; j++)
199         {
200             varexp = varexps[j];
201 
202             if (ignore[j])
203                 continue;
204 
205             fq_zech_poly_fit_length(out + j, varexp + 1, medctx);
206 
207             while (out[j].length <= varexp)
208             {
209                 fq_zech_zero(out[j].coeffs + out[j].length, medctx);
210                 out[j].length++;
211             }
212 
213             fq_zech_inv(t1, alphas + j, medctx);
214             fq_zech_pow_ui(t1, t1, varexp, medctx);
215             fq_zech_mul(t1, meval, t1, medctx);
216             fq_zech_add(out[j].coeffs + varexp, out[j].coeffs + varexp, t1, medctx);
217         }
218     }
219 
220     *Atdeg = total_degree;
221 
222     for (j = 0; j < nvars; j++)
223         _fq_zech_poly_normalise(out + j, medctx);
224 
225     flint_free(offsets);
226     flint_free(varexps);
227 
228     fq_zech_clear(t1, medctx);
229     fq_zech_clear(meval, medctx);
230 }
231 
232 
nmod_mpoly_evals_lgprime(slong * Atdeg,n_fq_poly_struct * out,const int * ignore,const nmod_mpoly_t A,ulong * Amin_exp,ulong * Amax_exp,ulong * Astride,const nmod_mpoly_ctx_t smctx,const fq_nmod_struct * alpha,const fq_nmod_ctx_t lgctx)233 static void nmod_mpoly_evals_lgprime(
234     slong * Atdeg,  /* total degree of deflated A, or -1 for overflow */
235     n_fq_poly_struct * out,
236     const int * ignore,
237     const nmod_mpoly_t A,
238     ulong * Amin_exp,
239     ulong * Amax_exp,
240     ulong * Astride,
241     const nmod_mpoly_ctx_t smctx,
242     const fq_nmod_struct * alpha,
243     const fq_nmod_ctx_t lgctx)
244 {
245     slong d = fq_nmod_ctx_degree(lgctx);
246     slong i, j;
247     slong nvars = smctx->minfo->nvars;
248     ulong mask = (-UWORD(1)) >> (FLINT_BITS - A->bits);
249     slong * offsets, * shifts;
250     slong N = mpoly_words_per_exp_sp(A->bits, smctx->minfo);
251     ulong * varexps;
252     ulong varexp, lo, hi;
253     slong total_degree;
254     n_poly_struct * caches;
255     mp_limb_t * t = FLINT_ARRAY_ALLOC(2*d, mp_limb_t);
256     mp_limb_t * meval = t + d;
257 
258     offsets = FLINT_ARRAY_ALLOC(2*nvars, slong);
259     shifts = offsets + nvars;
260     varexps = FLINT_ARRAY_ALLOC(nvars, ulong);
261     caches = FLINT_ARRAY_ALLOC(3*nvars, n_poly_struct);
262     for (j = 0; j < nvars; j++)
263     {
264         n_fq_poly_zero(out + j);
265         mpoly_gen_offset_shift_sp(offsets + j, shifts + j, j, A->bits,
266                                                                  smctx->minfo);
267         n_poly_init(caches + 3*j + 0);
268         n_poly_init(caches + 3*j + 1);
269         n_poly_init(caches + 3*j + 2);
270         n_fq_pow_cache_start_fq_nmod(alpha + j, caches + 3*j + 0,
271                                     caches + 3*j + 1, caches + 3*j + 2, lgctx);
272     }
273 
274     total_degree = 0;
275     for (i = 0; i < A->length; i++)
276     {
277         _n_fq_set_nmod(meval, A->coeffs[i], d);
278 
279         hi = lo = 0;
280         for (j = 0; j < nvars; j++)
281         {
282             varexp = ((A->exps + N*i)[offsets[j]]>>shifts[j])&mask;
283 
284             FLINT_ASSERT((Astride[j] == 0 && varexp == Amin_exp[j]) ||
285                                      (varexp - Amin_exp[j]) % Astride[j] == 0);
286 
287             varexps[j] = Astride[j] < 2 ? varexp - Amin_exp[j] :
288                                          (varexp - Amin_exp[j])/Astride[j];
289 
290             add_ssaaaa(hi, lo, hi, lo, 0, varexps[j]);
291 
292             n_fq_pow_cache_mulpow_ui(meval, meval, varexps[j], caches + 3*j + 0,
293                                     caches + 3*j + 1, caches + 3*j + 2, lgctx);
294         }
295 
296         if (hi == 0 && FLINT_SIGN_EXT(lo) == 0 && total_degree >= 0)
297             total_degree = FLINT_MAX(total_degree, lo);
298         else
299             total_degree = -1;
300 
301         for (j = 0; j < nvars; j++)
302         {
303             varexp = varexps[j];
304 
305             if (ignore[j])
306                 continue;
307 
308             n_poly_fit_length(out + j, d*(varexp + 1));
309 
310             while (out[j].length <= varexp)
311             {
312                 _n_fq_zero(out[j].coeffs + d*out[j].length, d);
313                 out[j].length++;
314             }
315 
316             n_fq_pow_cache_mulpow_neg_ui(t, meval, varexp, caches + 3*j + 0,
317                                     caches + 3*j + 1, caches + 3*j + 2, lgctx);
318 
319             n_fq_add(out[j].coeffs + d*varexp, out[j].coeffs + d*varexp, t, lgctx);
320         }
321     }
322 
323     *Atdeg = total_degree;
324 
325     for (j = 0; j < nvars; j++)
326         _n_fq_poly_normalise(out + j, d);
327 
328     for (j = 0; j < 3*nvars; j++)
329         n_poly_clear(caches + j);
330 
331     flint_free(offsets);
332     flint_free(varexps);
333     flint_free(caches);
334     flint_free(t);
335 }
336 
337 
_set_estimates(mpoly_gcd_info_t I,const nmod_mpoly_t A,const nmod_mpoly_t B,const nmod_mpoly_ctx_t ctx)338 static void _set_estimates(
339     mpoly_gcd_info_t I,
340     const nmod_mpoly_t A,
341     const nmod_mpoly_t B,
342     const nmod_mpoly_ctx_t ctx)
343 {
344     int try_count = 0;
345     slong nvars = ctx->minfo->nvars;
346     slong i, j;
347     n_poly_t Geval;
348     n_poly_struct * Aevals, * Bevals;
349     mp_limb_t * alpha;
350     flint_rand_t state;
351     slong ignore_limit;
352     int * ignore;
353 
354     flint_randinit(state);
355 
356     ignore = FLINT_ARRAY_ALLOC(nvars, int);
357     alpha  = FLINT_ARRAY_ALLOC(nvars, mp_limb_t);
358     Aevals = FLINT_ARRAY_ALLOC(nvars, n_poly_struct);
359     Bevals = FLINT_ARRAY_ALLOC(nvars, n_poly_struct);
360 
361     n_poly_init(Geval);
362     for (j = 0; j < nvars; j++)
363     {
364         n_poly_init(Aevals + j);
365         n_poly_init(Bevals + j);
366     }
367 
368     ignore_limit = (A->length + B->length)/4096;
369     ignore_limit = FLINT_MAX(WORD(9999), ignore_limit);
370     I->Gdeflate_deg_bounds_are_nice = 1;
371     for (j = 0; j < nvars; j++)
372     {
373         if (I->Adeflate_deg[j] > ignore_limit ||
374             I->Bdeflate_deg[j] > ignore_limit)
375         {
376             ignore[j] = 1;
377             I->Gdeflate_deg_bounds_are_nice = 0;
378         }
379         else
380         {
381             ignore[j] = 0;
382         }
383     }
384 
385 try_again:
386 
387     if (++try_count > 10)
388     {
389         I->Gdeflate_deg_bounds_are_nice = 0;
390         for (j = 0; j < nvars; j++)
391         {
392             I->Gdeflate_deg_bound[j] = FLINT_MIN(I->Adeflate_deg[j],
393                                                  I->Bdeflate_deg[j]);
394             I->Gterm_count_est[j] = 1 + I->Gdeflate_deg_bound[j]/2;
395         }
396 
397         goto cleanup;
398     }
399 
400     for (j = 0; j < nvars; j++)
401         alpha[j] = n_urandint(state, ctx->mod.n - 1) + 1;
402 
403     nmod_mpoly_evals(&I->Adeflate_tdeg, Aevals, ignore, A,
404                              I->Amin_exp, I->Amax_exp, I->Gstride, alpha, ctx);
405     nmod_mpoly_evals(&I->Bdeflate_tdeg, Bevals, ignore, B,
406                              I->Bmin_exp, I->Bmax_exp, I->Gstride, alpha, ctx);
407 
408     for (j = 0; j < nvars; j++)
409     {
410         if (ignore[j])
411         {
412             I->Gdeflate_deg_bound[j] = FLINT_MIN(I->Adeflate_deg[j],
413                                                  I->Bdeflate_deg[j]);
414             I->Gterm_count_est[j] = 1 + I->Gdeflate_deg_bound[j]/2;
415         }
416         else
417         {
418             if (I->Adeflate_deg[j] != n_poly_degree(Aevals + j) ||
419                 I->Bdeflate_deg[j] != n_poly_degree(Bevals + j))
420             {
421                 goto try_again;
422             }
423 
424             n_poly_mod_gcd(Geval, Aevals + j, Bevals + j, ctx->mod);
425 
426             I->Gterm_count_est[j] = 0;
427             I->Gdeflate_deg_bound[j] = n_poly_degree(Geval);
428             for (i = I->Gdeflate_deg_bound[j]; i >= 0; i--)
429                 I->Gterm_count_est[j] += (Geval->coeffs[i] != 0);
430         }
431     }
432 
433 cleanup:
434 
435     n_poly_clear(Geval);
436     for (j = 0; j < nvars; j++)
437     {
438         n_poly_clear(Aevals + j);
439         n_poly_clear(Bevals + j);
440     }
441 
442     flint_free(ignore);
443     flint_free(alpha);
444     flint_free(Aevals);
445     flint_free(Bevals);
446 
447     flint_randclear(state);
448 
449     return;
450 }
451 
452 
453 /* call to improve on the (valid) results of smprime */
_set_estimates_medprime(mpoly_gcd_info_t I,const nmod_mpoly_t A,const nmod_mpoly_t B,const nmod_mpoly_ctx_t smctx)454 static void _set_estimates_medprime(
455     mpoly_gcd_info_t I,
456     const nmod_mpoly_t A,
457     const nmod_mpoly_t B,
458     const nmod_mpoly_ctx_t smctx)
459 {
460     slong nvars = smctx->minfo->nvars;
461     int tries_left = 10;
462     slong i, j;
463     fq_zech_poly_t Geval;
464     fq_zech_poly_struct * Aevals, * Bevals;
465     fq_zech_struct * alpha;
466     flint_rand_t state;
467     slong ignore_limit;
468     int * ignore;
469     fq_zech_ctx_t medctx;
470     slong d, max_degree = n_flog(1000000, smctx->mod.n);
471     fmpz_t P;
472 
473     if (max_degree < 2)
474         return;
475 
476     flint_randinit(state);
477 
478     fmpz_init_set_ui(P, smctx->mod.n);
479     fq_zech_ctx_init(medctx, P, 1, "#");
480 
481     d = n_clog(500, smctx->mod.n);
482     d = FLINT_MAX(d, 1);
483 
484     ignore = FLINT_ARRAY_ALLOC(nvars, int);
485     alpha = FLINT_ARRAY_ALLOC(nvars, fq_zech_struct);
486     Aevals = FLINT_ARRAY_ALLOC(nvars, fq_zech_poly_struct);
487     Bevals = FLINT_ARRAY_ALLOC(nvars, fq_zech_poly_struct);
488     for (j = 0; j < nvars; j++)
489     {
490         fq_zech_poly_init(Aevals + j, medctx);
491         fq_zech_poly_init(Bevals + j, medctx);
492         fq_zech_init(alpha + j, medctx);
493     }
494 
495     fq_zech_poly_init(Geval, medctx);
496 
497     ignore_limit = (A->length + B->length)/4096;
498     ignore_limit = FLINT_MAX(WORD(9999), ignore_limit);
499     I->Gdeflate_deg_bounds_are_nice = 1;
500     for (j = 0; j < nvars; j++)
501     {
502         FLINT_ASSERT(I->Gdeflate_deg_bound[j] <= I->Adeflate_deg[j]);
503         FLINT_ASSERT(I->Gdeflate_deg_bound[j] <= I->Bdeflate_deg[j]);
504 
505         if (I->Adeflate_deg[j] > ignore_limit ||
506             I->Bdeflate_deg[j] > ignore_limit)
507         {
508             ignore[j] = 1;
509             I->Gdeflate_deg_bounds_are_nice = 0;
510         }
511         else
512         {
513             ignore[j] = 0;
514         }
515     }
516 
517 try_again:
518 
519     tries_left--;
520     d = FLINT_MIN(d + (tries_left % 2), max_degree);
521     if (tries_left < 0)
522     {
523         I->Gdeflate_deg_bounds_are_nice = 0;
524         goto cleanup;
525     }
526 
527     fq_zech_ctx_clear(medctx);
528     fq_zech_ctx_init(medctx, P, d, "#");
529 
530     for (j = 0; j < nvars; j++)
531         fq_zech_rand_not_zero(alpha + j, state, medctx);
532 
533     nmod_mpoly_evals_medprime(&I->Adeflate_tdeg, Aevals, ignore, A,
534                    I->Amin_exp, I->Amax_exp, I->Gstride, smctx, alpha, medctx);
535     nmod_mpoly_evals_medprime(&I->Bdeflate_tdeg, Bevals, ignore, B,
536                    I->Bmin_exp, I->Bmax_exp, I->Gstride, smctx, alpha, medctx);
537 
538     for (j = 0; j < nvars; j++)
539     {
540         if (!ignore[j])
541         {
542             if (I->Adeflate_deg[j] != fq_zech_poly_degree(Aevals + j, medctx) ||
543                 I->Bdeflate_deg[j] != fq_zech_poly_degree(Bevals + j, medctx))
544             {
545                 goto try_again;
546             }
547 
548             fq_zech_poly_gcd(Geval, Aevals + j, Bevals + j, medctx);
549 
550             I->Gterm_count_est[j] = 0;
551             I->Gdeflate_deg_bound[j] = FLINT_MIN(I->Gdeflate_deg_bound[j],
552                                            fq_zech_poly_degree(Geval, medctx));
553             for (i = I->Gdeflate_deg_bound[j]; i >= 0; i--)
554                 I->Gterm_count_est[j] += !fq_zech_is_zero(Geval->coeffs + i, medctx);
555         }
556     }
557 
558 cleanup:
559 
560     fq_zech_poly_clear(Geval, medctx);
561     for (j = 0; j < nvars; j++)
562     {
563         fq_zech_poly_clear(Aevals + j, medctx);
564         fq_zech_poly_clear(Bevals + j, medctx);
565         fq_zech_clear(alpha + j, medctx);
566     }
567     flint_free(alpha);
568     flint_free(Aevals);
569     flint_free(Bevals);
570     flint_free(ignore);
571 
572     fq_zech_ctx_clear(medctx);
573 
574     fmpz_clear(P);
575 
576     flint_randclear(state);
577 
578     return;
579 }
580 
581 /* call to improve on the (valid) results of smprime/medprime */
_set_estimates_lgprime(mpoly_gcd_info_t I,const nmod_mpoly_t A,const nmod_mpoly_t B,const nmod_mpoly_ctx_t smctx)582 static void _set_estimates_lgprime(
583     mpoly_gcd_info_t I,
584     const nmod_mpoly_t A,
585     const nmod_mpoly_t B,
586     const nmod_mpoly_ctx_t smctx)
587 {
588     slong nvars = smctx->minfo->nvars;
589     int try_count = 0;
590     slong i, j;
591     n_fq_poly_t Geval;
592     n_fq_poly_struct * Aevals, * Bevals;
593     fq_nmod_struct * alpha;
594     flint_rand_t state;
595     slong ignore_limit;
596     int * ignore;
597     fq_nmod_mpoly_ctx_t lgctx;
598     slong d;
599 
600     flint_randinit(state);
601 
602     d = WORD(20)/(FLINT_BIT_COUNT(smctx->mod.n));
603     d = FLINT_MAX(WORD(2), d);
604     fq_nmod_mpoly_ctx_init_deg(lgctx, nvars, ORD_LEX, smctx->mod.n, d);
605 
606     ignore = FLINT_ARRAY_ALLOC(nvars, int);
607     alpha = FLINT_ARRAY_ALLOC(nvars, fq_nmod_struct);
608     Aevals = FLINT_ARRAY_ALLOC(nvars, n_fq_poly_struct);
609     Bevals = FLINT_ARRAY_ALLOC(nvars, n_fq_poly_struct);
610     for (j = 0; j < nvars; j++)
611     {
612         n_fq_poly_init(Aevals + j);
613         n_fq_poly_init(Bevals + j);
614         fq_nmod_init(alpha + j, lgctx->fqctx);
615     }
616 
617     n_fq_poly_init(Geval);
618 
619     ignore_limit = (A->length + B->length)/4096;
620     ignore_limit = FLINT_MAX(WORD(9999), ignore_limit);
621     I->Gdeflate_deg_bounds_are_nice = 1;
622     for (j = 0; j < nvars; j++)
623     {
624         FLINT_ASSERT(I->Gdeflate_deg_bound[j] <= I->Adeflate_deg[j]);
625         FLINT_ASSERT(I->Gdeflate_deg_bound[j] <= I->Bdeflate_deg[j]);
626 
627         if (I->Adeflate_deg[j] > ignore_limit ||
628             I->Bdeflate_deg[j] > ignore_limit)
629         {
630             ignore[j] = 1;
631             I->Gdeflate_deg_bounds_are_nice = 0;
632         }
633         else
634         {
635             ignore[j] = 0;
636         }
637     }
638 
639 try_again:
640 
641     if (++try_count > 10)
642     {
643         I->Gdeflate_deg_bounds_are_nice = 0;
644         goto cleanup;
645     }
646 
647     for (j = 0; j < nvars; j++)
648         fq_nmod_rand_not_zero(alpha + j, state, lgctx->fqctx);
649 
650     nmod_mpoly_evals_lgprime(&I->Adeflate_tdeg, Aevals, ignore, A,
651              I->Amin_exp, I->Amax_exp, I->Gstride, smctx, alpha, lgctx->fqctx);
652     nmod_mpoly_evals_lgprime(&I->Bdeflate_tdeg, Bevals, ignore, B,
653              I->Bmin_exp, I->Bmax_exp, I->Gstride, smctx, alpha, lgctx->fqctx);
654 
655     for (j = 0; j < nvars; j++)
656     {
657         if (!ignore[j])
658         {
659             if (I->Adeflate_deg[j] != n_fq_poly_degree(Aevals + j) ||
660                 I->Bdeflate_deg[j] != n_fq_poly_degree(Bevals + j))
661             {
662                 d++;
663                 fq_nmod_mpoly_ctx_change_modulus(lgctx, d);
664                 goto try_again;
665             }
666 
667             n_fq_poly_gcd(Geval, Aevals + j, Bevals + j, lgctx->fqctx);
668 
669             I->Gterm_count_est[j] = 0;
670             I->Gdeflate_deg_bound[j] = FLINT_MIN(I->Gdeflate_deg_bound[j],
671                                                       n_fq_poly_degree(Geval));
672             for (i = I->Gdeflate_deg_bound[j]; i >= 0; i--)
673                 I->Gterm_count_est[j] += !_n_fq_is_zero(Geval->coeffs + d*i, d);
674         }
675     }
676 
677 cleanup:
678 
679     n_fq_poly_clear(Geval);
680     for (j = 0; j < nvars; j++)
681     {
682         n_fq_poly_clear(Aevals + j);
683         n_fq_poly_clear(Bevals + j);
684         fq_nmod_clear(alpha + j, lgctx->fqctx);
685     }
686     flint_free(alpha);
687     flint_free(Aevals);
688     flint_free(Bevals);
689     flint_free(ignore);
690 
691     fq_nmod_mpoly_ctx_clear(lgctx);
692 
693     flint_randclear(state);
694 
695     return;
696 }
697 
698 
699 /* (Abar, Bbar) = (A, B) */
_parallel_set(nmod_mpoly_t Abar,nmod_mpoly_t Bbar,const nmod_mpoly_t A,const nmod_mpoly_t B,const nmod_mpoly_ctx_t ctx)700 static void _parallel_set(
701     nmod_mpoly_t Abar, /* could be NULL */
702     nmod_mpoly_t Bbar, /* could be NULL */
703     const nmod_mpoly_t A,
704     const nmod_mpoly_t B,
705     const nmod_mpoly_ctx_t ctx)
706 {
707     if (Abar == B && Bbar == A)
708     {
709         FLINT_ASSERT(Abar != NULL && Bbar != NULL);
710         nmod_mpoly_set(Abar, B, ctx);
711         nmod_mpoly_set(Bbar, A, ctx);
712         nmod_mpoly_swap(Abar, Bbar, ctx);
713     }
714     else if (Abar == B && Bbar != A)
715     {
716         FLINT_ASSERT(Abar != NULL);
717         if (Bbar != NULL)
718             nmod_mpoly_set(Bbar, B, ctx);
719         nmod_mpoly_set(Abar, A, ctx);
720     }
721     else
722     {
723         if (Abar != NULL)
724             nmod_mpoly_set(Abar, A, ctx);
725         if (Bbar != NULL)
726             nmod_mpoly_set(Bbar, B, ctx);
727     }
728 }
729 
730 
731 /* The variables in ess(A) and ess(B) are disjoint. gcd is trivial to compute */
_do_trivial(nmod_mpoly_t G,nmod_mpoly_t Abar,nmod_mpoly_t Bbar,const nmod_mpoly_t A,const nmod_mpoly_t B,const mpoly_gcd_info_t I,const nmod_mpoly_ctx_t ctx)732 static int _do_trivial(
733     nmod_mpoly_t G,
734     nmod_mpoly_t Abar,  /* could be NULL */
735     nmod_mpoly_t Bbar,  /* could be NULL */
736     const nmod_mpoly_t A,
737     const nmod_mpoly_t B,
738     const mpoly_gcd_info_t I,
739     const nmod_mpoly_ctx_t ctx)
740 {
741     _parallel_set(Abar, Bbar, A, B, ctx);
742 
743     if (Abar != NULL)
744         mpoly_monomials_shift_right_ui(Abar->exps, Abar->bits, Abar->length,
745                                                       I->Gmin_exp, ctx->minfo);
746 
747     if (Bbar != NULL)
748         mpoly_monomials_shift_right_ui(Bbar->exps, Bbar->bits, Bbar->length,
749                                                       I->Gmin_exp, ctx->minfo);
750 
751     nmod_mpoly_fit_length_reset_bits(G, 1, I->Gbits, ctx);
752     mpoly_set_monomial_ui(G->exps, I->Gmin_exp, I->Gbits, ctx->minfo);
753     G->coeffs[0] = UWORD(1);
754     G->length = 1;
755 
756     return 1;
757 }
758 
759 /*********************** Easy when B is a monomial ***************************/
_do_monomial_gcd(nmod_mpoly_t G,nmod_mpoly_t Abar,nmod_mpoly_t Bbar,const nmod_mpoly_t A,const nmod_mpoly_t B,const nmod_mpoly_ctx_t ctx)760 static int _do_monomial_gcd(
761     nmod_mpoly_t G,
762     nmod_mpoly_t Abar,  /* could be NULL */
763     nmod_mpoly_t Bbar,  /* could be NULL */
764     const nmod_mpoly_t A,
765     const nmod_mpoly_t B,
766     const nmod_mpoly_ctx_t ctx)
767 {
768     slong i;
769     flint_bitcnt_t Gbits = FLINT_MIN(A->bits, B->bits);
770     fmpz * minAfields, * minAdegs, * minBdegs;
771     TMP_INIT;
772 
773     FLINT_ASSERT(A->length > 0);
774     FLINT_ASSERT(B->length == 1);
775 
776     TMP_START;
777 
778     /* get the field-wise minimum of A */
779     minAfields = (fmpz *) TMP_ALLOC(ctx->minfo->nfields*sizeof(fmpz));
780     for (i = 0; i < ctx->minfo->nfields; i++)
781         fmpz_init(minAfields + i);
782     mpoly_min_fields_fmpz(minAfields, A->exps, A->length, A->bits, ctx->minfo);
783 
784     /* unpack to get the min degrees of each variable in A */
785     minAdegs = (fmpz *) TMP_ALLOC(ctx->minfo->nvars*sizeof(fmpz));
786     for (i = 0; i < ctx->minfo->nvars; i++)
787         fmpz_init(minAdegs + i);
788     mpoly_get_monomial_ffmpz_unpacked_ffmpz(minAdegs, minAfields, ctx->minfo);
789 
790     /* get the degree of each variable in B */
791     minBdegs = (fmpz *) TMP_ALLOC(ctx->minfo->nvars*sizeof(fmpz));
792     for (i = 0; i < ctx->minfo->nvars; i++)
793         fmpz_init(minBdegs + i);
794     mpoly_get_monomial_ffmpz(minBdegs, B->exps, B->bits, ctx->minfo);
795 
796     /* compute the degree of each variable in G */
797     _fmpz_vec_min_inplace(minBdegs, minAdegs, ctx->minfo->nvars);
798 
799     _parallel_set(Abar, Bbar, A, B, ctx);
800 
801     if (Abar != NULL)
802         mpoly_monomials_shift_right_ffmpz(Abar->exps, Abar->bits, Abar->length,
803                                                          minBdegs, ctx->minfo);
804 
805     if (Bbar != NULL)
806         mpoly_monomials_shift_right_ffmpz(Bbar->exps, Bbar->bits, Bbar->length,
807                                                          minBdegs, ctx->minfo);
808 
809     nmod_mpoly_fit_length_reset_bits(G, 1, Gbits, ctx);
810     mpoly_set_monomial_ffmpz(G->exps, minBdegs, Gbits, ctx->minfo);
811     G->coeffs[0] = 1;
812     G->length = 1;
813 
814     for (i = 0; i < ctx->minfo->nfields; i++)
815     {
816         fmpz_clear(minAfields + i);
817     }
818     for (i = 0; i < ctx->minfo->nvars; i++)
819     {
820         fmpz_clear(minAdegs + i);
821         fmpz_clear(minBdegs + i);
822     }
823 
824     TMP_END;
825 
826     return 1;
827 }
828 
829 
830 /********************** See if cofactors are monomials ***********************/
_try_monomial_cofactors(nmod_mpoly_t G,nmod_mpoly_t Abar,nmod_mpoly_t Bbar,const nmod_mpoly_t A,const nmod_mpoly_t B,const nmod_mpoly_ctx_t ctx)831 static int _try_monomial_cofactors(
832     nmod_mpoly_t G,
833     nmod_mpoly_t Abar,  /* could be NULL */
834     nmod_mpoly_t Bbar,  /* could be NULL */
835     const nmod_mpoly_t A,
836     const nmod_mpoly_t B,
837     const nmod_mpoly_ctx_t ctx)
838 {
839     int success;
840     slong i, j;
841     slong NA, NG;
842     slong nvars = ctx->minfo->nvars;
843     fmpz * Abarexps, * Bbarexps, * Texps;
844     mp_limb_t a0, b0, a0inv;
845     nmod_mpoly_t T;
846     flint_bitcnt_t Gbits = FLINT_MIN(A->bits, B->bits);
847     flint_bitcnt_t Abarbits = A->bits;
848     flint_bitcnt_t Bbarbits = B->bits;
849     TMP_INIT;
850 
851     FLINT_ASSERT(A->length > 0);
852     FLINT_ASSERT(B->length > 0);
853 
854     if (A->length != B->length)
855         return 0;
856 
857     a0 = A->coeffs[0];
858     b0 = B->coeffs[0];
859 
860     for (i = A->length - 1; i > 0; i--)
861     {
862         success = (nmod_mul(a0, B->coeffs[i], ctx->mod) ==
863                    nmod_mul(b0, A->coeffs[i], ctx->mod));
864         if (!success)
865             goto cleanup;
866     }
867 
868     TMP_START;
869 
870     Abarexps = (fmpz *) TMP_ALLOC(3*nvars*sizeof(fmpz));
871     Bbarexps = Abarexps + 1*nvars;
872     Texps    = Abarexps + 2*nvars;
873     for (j = 0; j < nvars; j++)
874     {
875         fmpz_init(Abarexps + j);
876         fmpz_init(Bbarexps + j);
877         fmpz_init(Texps + j);
878     }
879 
880     success = mpoly_monomial_cofactors(Abarexps, Bbarexps, A->exps, A->bits,
881                                       B->exps, B->bits, A->length, ctx->minfo);
882     if (!success)
883         goto cleanup_tmp;
884 
885     nmod_mpoly_init3(T, A->length, Gbits, ctx);
886     NG = mpoly_words_per_exp(Gbits, ctx->minfo);
887     NA = mpoly_words_per_exp(A->bits, ctx->minfo);
888     a0inv = nmod_inv(a0, ctx->mod);
889     T->length = A->length;
890     for (i = 0; i < A->length; i++)
891     {
892         mpoly_get_monomial_ffmpz(Texps, A->exps + NA*i, A->bits, ctx->minfo);
893         _fmpz_vec_sub(Texps, Texps, Abarexps, nvars);
894         mpoly_set_monomial_ffmpz(T->exps + NG*i, Texps, Gbits, ctx->minfo);
895         T->coeffs[i] = nmod_mul(A->coeffs[i], a0inv, ctx->mod);
896     }
897     nmod_mpoly_swap(G, T, ctx);
898     nmod_mpoly_clear(T, ctx);
899 
900     if (Abar != NULL)
901     {
902         nmod_mpoly_fit_length_reset_bits(Abar, 1, Abarbits, ctx);
903         mpoly_set_monomial_ffmpz(Abar->exps, Abarexps, Abarbits, ctx->minfo);
904         Abar->coeffs[0] = a0;
905         _nmod_mpoly_set_length(Abar, 1, ctx);
906     }
907 
908     if (Bbar != NULL)
909     {
910         nmod_mpoly_fit_length_reset_bits(Bbar, 1, Bbarbits, ctx);
911         mpoly_set_monomial_ffmpz(Bbar->exps, Bbarexps, Bbarbits, ctx->minfo);
912         Bbar->coeffs[0] = b0;
913         _nmod_mpoly_set_length(Bbar, 1, ctx);
914     }
915 
916     success = 1;
917 
918 cleanup_tmp:
919 
920     for (j = 0; j < nvars; j++)
921     {
922         fmpz_clear(Abarexps + j);
923         fmpz_clear(Bbarexps + j);
924         fmpz_clear(Texps + j);
925     }
926 
927     TMP_END;
928 
929 cleanup:
930 
931     return success;
932 }
933 
934 
935 /***  ess(A) and ess(B) depend on only one variable v_in_both ****************/
_do_univar(nmod_mpoly_t G,nmod_mpoly_t Abar,nmod_mpoly_t Bbar,const nmod_mpoly_t A,const nmod_mpoly_t B,slong v_in_both,const mpoly_gcd_info_t I,const nmod_mpoly_ctx_t ctx)936 int _do_univar(
937     nmod_mpoly_t G,
938     nmod_mpoly_t Abar,
939     nmod_mpoly_t Bbar,
940     const nmod_mpoly_t A,
941     const nmod_mpoly_t B,
942     slong v_in_both,
943     const mpoly_gcd_info_t I,
944     const nmod_mpoly_ctx_t ctx)
945 {
946     nmod_poly_t a, b, g, t;
947 
948     nmod_poly_init_mod(a, ctx->mod);
949     nmod_poly_init_mod(b, ctx->mod);
950     nmod_poly_init_mod(g, ctx->mod);
951     nmod_poly_init_mod(t, ctx->mod);
952 
953     _nmod_mpoly_to_nmod_poly_deflate(a, A, v_in_both, I->Amin_exp, I->Gstride, ctx);
954     _nmod_mpoly_to_nmod_poly_deflate(b, B, v_in_both, I->Bmin_exp, I->Gstride, ctx);
955     nmod_poly_gcd(g, a, b);
956     _nmod_mpoly_from_nmod_poly_inflate(G, I->Gbits, g, v_in_both,
957                                                  I->Gmin_exp, I->Gstride, ctx);
958     if (Abar != NULL)
959     {
960         nmod_poly_div(t, a, g);
961         _nmod_mpoly_from_nmod_poly_inflate(Abar, I->Abarbits, t, v_in_both,
962                                               I->Abarmin_exp, I->Gstride, ctx);
963     }
964 
965     if (Bbar != NULL)
966     {
967         nmod_poly_div(t, b, g);
968         _nmod_mpoly_from_nmod_poly_inflate(Bbar, I->Bbarbits, t, v_in_both,
969                                               I->Bbarmin_exp, I->Gstride, ctx);
970     }
971 
972     nmod_poly_clear(a);
973     nmod_poly_clear(b);
974     nmod_poly_clear(g);
975     nmod_poly_clear(t);
976 
977     return 1;
978 }
979 
980 
981 /********* Assume B has length one when converted to univar format ***********/
_try_missing_var(nmod_mpoly_t G,flint_bitcnt_t Gbits,nmod_mpoly_t Abar,nmod_mpoly_t Bbar,slong var,const nmod_mpoly_t A,ulong Ashift,const nmod_mpoly_t B,ulong Bshift,const nmod_mpoly_ctx_t ctx)982 static int _try_missing_var(
983     nmod_mpoly_t G, flint_bitcnt_t Gbits,
984     nmod_mpoly_t Abar,
985     nmod_mpoly_t Bbar,
986     slong var,
987     const nmod_mpoly_t A, ulong Ashift,
988     const nmod_mpoly_t B, ulong Bshift,
989     const nmod_mpoly_ctx_t ctx)
990 {
991     int success;
992     nmod_mpoly_univar_t Au;
993 
994     nmod_mpoly_univar_init(Au, ctx);
995 
996 #if FLINT_WANT_ASSERT
997     nmod_mpoly_to_univar(Au, B, var, ctx);
998     FLINT_ASSERT(Au->length == 1);
999 #endif
1000     nmod_mpoly_to_univar(Au, A, var, ctx);
1001 
1002     nmod_mpoly_univar_fit_length(Au, Au->length + 1, ctx);
1003     nmod_mpoly_set(Au->coeffs + Au->length, B, ctx);
1004     Au->length++;
1005 
1006     if (Abar == NULL && Bbar == NULL)
1007     {
1008         success = _nmod_mpoly_vec_content_mpoly(G, Au->coeffs, Au->length, ctx);
1009         if (!success)
1010             goto cleanup;
1011 
1012         nmod_mpoly_repack_bits_inplace(G, Gbits, ctx);
1013         _mpoly_gen_shift_left(G->exps, G->bits, G->length,
1014                                    var, FLINT_MIN(Ashift, Bshift), ctx->minfo);
1015     }
1016     else
1017     {
1018         nmod_mpoly_t tG, tAbar, tBbar;
1019 
1020         nmod_mpoly_init(tG, ctx);
1021         nmod_mpoly_init(tAbar, ctx);
1022         nmod_mpoly_init(tBbar, ctx);
1023 
1024         success = _nmod_mpoly_vec_content_mpoly(tG, Au->coeffs, Au->length, ctx);
1025         if (!success)
1026             goto cleanup;
1027 
1028         nmod_mpoly_repack_bits_inplace(tG, Gbits, ctx);
1029         _mpoly_gen_shift_left(tG->exps, tG->bits, tG->length,
1030                                    var, FLINT_MIN(Ashift, Bshift), ctx->minfo);
1031 
1032         if (Abar != NULL)
1033         {
1034             success = nmod_mpoly_divides(tAbar, A, tG, ctx);
1035             FLINT_ASSERT(success);
1036         }
1037 
1038         if (Bbar != NULL)
1039         {
1040             success = nmod_mpoly_divides(tBbar, B, tG, ctx);
1041             FLINT_ASSERT(success);
1042         }
1043 
1044         nmod_mpoly_swap(G, tG, ctx);
1045 
1046         if (Abar != NULL)
1047             nmod_mpoly_swap(Abar, tAbar, ctx);
1048 
1049         if (Bbar != NULL)
1050             nmod_mpoly_swap(Bbar, tBbar, ctx);
1051 
1052         nmod_mpoly_clear(tG, ctx);
1053         nmod_mpoly_clear(tAbar, ctx);
1054         nmod_mpoly_clear(tBbar, ctx);
1055     }
1056 
1057     success = 1;
1058 
1059 cleanup:
1060 
1061     nmod_mpoly_univar_clear(Au, ctx);
1062 
1063     return success;
1064 }
1065 
1066 
1067 /************************ See if B divides A ********************************/
_try_divides(nmod_mpoly_t G,nmod_mpoly_t Abar,nmod_mpoly_t Bbar,const nmod_mpoly_t A,const nmod_mpoly_t BB,const nmod_mpoly_ctx_t ctx)1068 static int _try_divides(
1069     nmod_mpoly_t G,
1070     nmod_mpoly_t Abar,
1071     nmod_mpoly_t Bbar,
1072     const nmod_mpoly_t A,
1073     const nmod_mpoly_t BB,
1074     const nmod_mpoly_ctx_t ctx)
1075 {
1076     int success = 0;
1077     nmod_mpoly_t Q, B, M;
1078 
1079     nmod_mpoly_init(Q, ctx);
1080     nmod_mpoly_init(B, ctx);
1081     nmod_mpoly_init(M, ctx);
1082 
1083     /* BB = M*B */
1084     nmod_mpoly_term_content(M, BB, ctx);
1085     nmod_mpoly_divides(B, BB, M, ctx);
1086 
1087     if (nmod_mpoly_divides(Q, A, B, ctx))
1088     {
1089         /* gcd(Q*B, M*B) */
1090         _do_monomial_gcd(G, Abar, Bbar, Q, M, ctx);
1091         nmod_mpoly_mul(G, G, B, ctx);
1092         success = 1;
1093     }
1094 
1095     nmod_mpoly_clear(Q, ctx);
1096     nmod_mpoly_clear(B, ctx);
1097     nmod_mpoly_clear(M, ctx);
1098 
1099     return success;
1100 }
1101 
1102 
1103 /********************** Hit A and B with zippel ******************************/
_try_zippel(nmod_mpoly_t G,nmod_mpoly_t Abar,nmod_mpoly_t Bbar,const nmod_mpoly_t A,const nmod_mpoly_t B,const mpoly_gcd_info_t I,const nmod_mpoly_ctx_t ctx)1104 static int _try_zippel(
1105     nmod_mpoly_t G,
1106     nmod_mpoly_t Abar,
1107     nmod_mpoly_t Bbar,
1108     const nmod_mpoly_t A,
1109     const nmod_mpoly_t B,
1110     const mpoly_gcd_info_t I,
1111     const nmod_mpoly_ctx_t ctx)
1112 {
1113     slong m = I->mvars;
1114     int success;
1115     flint_bitcnt_t wbits;
1116     flint_rand_t randstate;
1117     nmod_mpoly_ctx_t uctx;
1118     nmod_mpolyu_t Au, Bu, Gu, Abaru, Bbaru;
1119     nmod_mpoly_t Ac, Bc, Gc, Abarc, Bbarc;
1120 
1121     FLINT_ASSERT(A->bits <= FLINT_BITS);
1122     FLINT_ASSERT(B->bits <= FLINT_BITS);
1123 
1124     if (!(I->can_use & MPOLY_GCD_USE_ZIPPEL))
1125         return 0;
1126 
1127     FLINT_ASSERT(m >= WORD(2));
1128     FLINT_ASSERT(A->length > 0);
1129     FLINT_ASSERT(B->length > 0);
1130 
1131     flint_randinit(randstate);
1132 
1133     /* uctx is context for Z[y_1,...,y_{m-1}]*/
1134     nmod_mpoly_ctx_init(uctx, m - 1, ORD_LEX, ctx->mod.n);
1135 
1136     wbits = FLINT_MAX(A->bits, B->bits);
1137 
1138     nmod_mpolyu_init(Au, wbits, uctx);
1139     nmod_mpolyu_init(Bu, wbits, uctx);
1140     nmod_mpolyu_init(Gu, wbits, uctx);
1141     nmod_mpolyu_init(Abaru, wbits, uctx);
1142     nmod_mpolyu_init(Bbaru, wbits, uctx);
1143     nmod_mpoly_init3(Ac, 0, wbits, uctx);
1144     nmod_mpoly_init3(Bc, 0, wbits, uctx);
1145     nmod_mpoly_init3(Gc, 0, wbits, uctx);
1146     nmod_mpoly_init3(Abarc, 0, wbits, uctx);
1147     nmod_mpoly_init3(Bbarc, 0, wbits, uctx);
1148 
1149     nmod_mpoly_to_mpolyu_perm_deflate_threaded_pool(Au, uctx, A, ctx,
1150                              I->zippel_perm, I->Amin_exp, I->Gstride, NULL, 0);
1151     nmod_mpoly_to_mpolyu_perm_deflate_threaded_pool(Bu, uctx, B, ctx,
1152                              I->zippel_perm, I->Bmin_exp, I->Gstride, NULL, 0);
1153 
1154     success = nmod_mpolyu_content_mpoly(Ac, Au, uctx) &&
1155               nmod_mpolyu_content_mpoly(Bc, Bu, uctx);
1156     if (!success)
1157         goto cleanup;
1158 
1159     nmod_mpolyu_divexact_mpoly_inplace(Au, Ac, uctx);
1160     nmod_mpolyu_divexact_mpoly_inplace(Bu, Bc, uctx);
1161 
1162     success = nmod_mpolyu_gcdm_zippel(Gu, Abaru, Bbaru, Au, Bu, uctx, randstate);
1163     if (!success)
1164         goto cleanup;
1165 
1166     if (Abar == NULL && Bbar == NULL)
1167     {
1168         success = nmod_mpoly_gcd(Gc, Ac, Bc, uctx);
1169         if (!success)
1170             goto cleanup;
1171 
1172         nmod_mpoly_repack_bits_inplace(Gc, wbits, uctx);
1173         nmod_mpolyu_mul_mpoly_inplace(Gu, Gc, uctx);
1174 
1175         nmod_mpoly_from_mpolyu_perm_inflate(G, I->Gbits, ctx, Gu, uctx,
1176                                       I->zippel_perm, I->Gmin_exp, I->Gstride);
1177     }
1178     else
1179     {
1180         success = nmod_mpoly_gcd_cofactors(Gc, Abarc, Bbarc, Ac, Bc, uctx);
1181         if (!success)
1182             goto cleanup;
1183 
1184         nmod_mpoly_repack_bits_inplace(Gc, wbits, uctx);
1185         nmod_mpoly_repack_bits_inplace(Abarc, wbits, uctx);
1186         nmod_mpoly_repack_bits_inplace(Bbarc, wbits, uctx);
1187 
1188         nmod_mpolyu_mul_mpoly_inplace(Gu, Gc, uctx);
1189         nmod_mpolyu_mul_mpoly_inplace(Abaru, Abarc, uctx);
1190         nmod_mpolyu_mul_mpoly_inplace(Bbaru, Bbarc, uctx);
1191 
1192         nmod_mpoly_from_mpolyu_perm_inflate(G, I->Gbits, ctx, Gu, uctx,
1193                                       I->zippel_perm, I->Gmin_exp, I->Gstride);
1194 
1195         if (Abar != NULL)
1196             nmod_mpoly_from_mpolyu_perm_inflate(Abar, I->Abarbits, ctx,
1197                       Abaru, uctx, I->zippel_perm, I->Abarmin_exp, I->Gstride);
1198 
1199         if (Bbar != NULL)
1200             nmod_mpoly_from_mpolyu_perm_inflate(Bbar, I->Bbarbits, ctx,
1201                       Bbaru, uctx, I->zippel_perm, I->Bbarmin_exp, I->Gstride);
1202     }
1203 
1204     success = 1;
1205 
1206 cleanup:
1207 
1208     nmod_mpolyu_clear(Au, uctx);
1209     nmod_mpolyu_clear(Bu, uctx);
1210     nmod_mpolyu_clear(Gu, uctx);
1211     nmod_mpolyu_clear(Abaru, uctx);
1212     nmod_mpolyu_clear(Bbaru, uctx);
1213     nmod_mpoly_clear(Ac, uctx);
1214     nmod_mpoly_clear(Bc, uctx);
1215     nmod_mpoly_clear(Gc, uctx);
1216     nmod_mpoly_clear(Abarc, uctx);
1217     nmod_mpoly_clear(Bbarc, uctx);
1218 
1219     nmod_mpoly_ctx_clear(uctx);
1220 
1221     flint_randclear(randstate);
1222 
1223     return success;
1224 }
1225 
1226 
_try_zippel2(nmod_mpoly_t G,nmod_mpoly_t Abar,nmod_mpoly_t Bbar,const nmod_mpoly_t A,const nmod_mpoly_t B,const mpoly_gcd_info_t I,const nmod_mpoly_ctx_t ctx)1227 static int _try_zippel2(
1228     nmod_mpoly_t G,
1229     nmod_mpoly_t Abar,
1230     nmod_mpoly_t Bbar,
1231     const nmod_mpoly_t A,
1232     const nmod_mpoly_t B,
1233     const mpoly_gcd_info_t I,
1234     const nmod_mpoly_ctx_t ctx)
1235 {
1236     slong i, k;
1237     slong m = I->mvars;
1238     int success;
1239     flint_bitcnt_t wbits;
1240     nmod_mpoly_ctx_t lctx;
1241     nmod_mpoly_t Al, Bl, Gl, Abarl, Bbarl;
1242     nmod_mpoly_t Al_lc, Bl_lc, Ac, Bc, Gc, Abarc, Bbarc, Gamma;
1243     slong * tmp, * Gl_degs, * Al_degs, * Bl_degs, * Gamma_degs, * Gguess;
1244     slong max_degree;
1245 
1246     FLINT_ASSERT(A->bits <= FLINT_BITS);
1247     FLINT_ASSERT(B->bits <= FLINT_BITS);
1248     FLINT_ASSERT(A->length > 0);
1249     FLINT_ASSERT(B->length > 0);
1250 
1251     if (!(I->can_use & MPOLY_GCD_USE_ZIPPEL2))
1252         return 0;
1253 
1254     FLINT_ASSERT(m >= 3);
1255 
1256     tmp = FLINT_ARRAY_ALLOC(5*m, slong);
1257     Al_degs   = tmp + 1*m;
1258     Bl_degs   = tmp + 2*m;
1259     Gl_degs   = tmp + 3*m;
1260     Gamma_degs = tmp + 4*m;
1261 
1262     nmod_mpoly_ctx_init(lctx, m, ORD_LEX, ctx->mod.n);
1263 
1264     max_degree = 0;
1265     for (i = 0; i < m; i++)
1266     {
1267         k = I->zippel2_perm[i];
1268 
1269         Gl_degs[i] = I->Gdeflate_deg_bound[k];
1270 
1271         Al_degs[i] = I->Adeflate_deg[k];
1272         max_degree = FLINT_MAX(max_degree, Al_degs[i]);
1273 
1274         Bl_degs[i] = I->Bdeflate_deg[k];
1275         max_degree = FLINT_MAX(max_degree, Bl_degs[i]);
1276     }
1277 
1278     wbits = 1 + FLINT_BIT_COUNT(max_degree);
1279     wbits = mpoly_fix_bits(wbits, lctx->minfo);
1280     FLINT_ASSERT(wbits <= FLINT_BITS);
1281 
1282     nmod_mpoly_init3(Al, 0, wbits, lctx);
1283     nmod_mpoly_init3(Bl, 0, wbits, lctx);
1284     nmod_mpoly_init3(Gl, 0, wbits, lctx);
1285     nmod_mpoly_init3(Abarl, 0, wbits, lctx);
1286     nmod_mpoly_init3(Bbarl, 0, wbits, lctx);
1287     nmod_mpoly_init3(Ac, 0, wbits, lctx);
1288     nmod_mpoly_init3(Bc, 0, wbits, lctx);
1289     nmod_mpoly_init3(Gc, 0, wbits, lctx);
1290     nmod_mpoly_init3(Abarc, 0, wbits, lctx);
1291     nmod_mpoly_init3(Bbarc, 0, wbits, lctx);
1292     nmod_mpoly_init3(Gamma, 0, wbits, lctx);
1293     nmod_mpoly_init3(Al_lc, 0, wbits, lctx);
1294     nmod_mpoly_init3(Bl_lc, 0, wbits, lctx);
1295 
1296     nmod_mpoly_to_mpolyl_perm_deflate(Al, lctx, A, ctx,
1297                                      I->zippel2_perm, I->Amin_exp, I->Gstride);
1298     nmod_mpoly_to_mpolyl_perm_deflate(Bl, lctx, B, ctx,
1299                                      I->zippel2_perm, I->Bmin_exp, I->Gstride);
1300 
1301     success = nmod_mpolyl_content(Ac, Al, 2, lctx) &&
1302               nmod_mpolyl_content(Bc, Bl, 2, lctx);
1303     if (!success)
1304         goto cleanup;
1305 
1306     if (Abar == NULL && Bbar == NULL)
1307         success = nmod_mpoly_gcd(Gc, Ac, Bc, lctx);
1308     else
1309         success = nmod_mpoly_gcd_cofactors(Gc, Abarc, Bbarc, Ac, Bc, lctx);
1310     if (!success)
1311         goto cleanup;
1312 
1313     nmod_mpoly_degrees_si(tmp, Ac, lctx);
1314     for (i = 0; i < m; i++)
1315         Al_degs[i] -= tmp[i];
1316 
1317     success = nmod_mpoly_divides(Al, Al, Ac, lctx);
1318     FLINT_ASSERT(success);
1319 
1320     nmod_mpoly_degrees_si(tmp, Bc, lctx);
1321     for (i = 0; i < m; i++)
1322         Bl_degs[i] -= tmp[i];
1323 
1324     success = nmod_mpoly_divides(Bl, Bl, Bc, lctx);
1325     FLINT_ASSERT(success);
1326 
1327     nmod_mpoly_degrees_si(tmp, Gc, lctx);
1328     for (i = 0; i < m; i++)
1329         Gl_degs[i] -= tmp[i];
1330 
1331     nmod_mpoly_repack_bits_inplace(Al, wbits, lctx);
1332     nmod_mpoly_repack_bits_inplace(Bl, wbits, lctx);
1333     nmod_mpolyl_lead_coeff(Al_lc, Al, 2, lctx);
1334     nmod_mpolyl_lead_coeff(Bl_lc, Bl, 2, lctx);
1335     success = nmod_mpoly_gcd(Gamma, Al_lc, Bl_lc, lctx);
1336     if (!success)
1337         goto cleanup;
1338     nmod_mpoly_repack_bits_inplace(Gamma, wbits, lctx);
1339 
1340     nmod_mpoly_degrees_si(Gamma_degs, Gamma, lctx);
1341 
1342     Gguess = I->Gdeflate_deg_bounds_are_nice ? Gl_degs : NULL;
1343 
1344     success = nmod_mpolyl_gcd_zippel_smprime(Gl, Gguess, Abarl, Bbarl,
1345                             Al, Al_degs, Bl, Bl_degs, Gamma, Gamma_degs, lctx);
1346     if (!success)
1347     {
1348         success = nmod_mpolyl_gcd_zippel_lgprime(Gl, Gguess, Abarl, Bbarl,
1349                             Al, Al_degs, Bl, Bl_degs, Gamma, Gamma_degs, lctx);
1350         if (!success)
1351             goto cleanup;
1352     }
1353 
1354     nmod_mpoly_mul(Gl, Gl, Gc, lctx);
1355     nmod_mpoly_from_mpolyl_perm_inflate(G, I->Gbits, ctx, Gl, lctx,
1356                                      I->zippel2_perm, I->Gmin_exp, I->Gstride);
1357     if (Abar != NULL)
1358     {
1359         nmod_mpoly_mul(Abarl, Abarl, Abarc, lctx);
1360         nmod_mpoly_from_mpolyl_perm_inflate(Abar, I->Abarbits, ctx, Abarl, lctx,
1361                                   I->zippel2_perm, I->Abarmin_exp, I->Gstride);
1362     }
1363 
1364     if (Bbar != NULL)
1365     {
1366         nmod_mpoly_mul(Bbarl, Bbarl, Bbarc, lctx);
1367         nmod_mpoly_from_mpolyl_perm_inflate(Bbar, I->Bbarbits, ctx, Bbarl, lctx,
1368                                   I->zippel2_perm, I->Bbarmin_exp, I->Gstride);
1369     }
1370 
1371     success = 1;
1372 
1373 cleanup:
1374 
1375     nmod_mpoly_clear(Al, lctx);
1376     nmod_mpoly_clear(Bl, lctx);
1377     nmod_mpoly_clear(Gl, lctx);
1378     nmod_mpoly_clear(Abarl, lctx);
1379     nmod_mpoly_clear(Bbarl, lctx);
1380     nmod_mpoly_clear(Ac, lctx);
1381     nmod_mpoly_clear(Bc, lctx);
1382     nmod_mpoly_clear(Gc, lctx);
1383     nmod_mpoly_clear(Abarc, lctx);
1384     nmod_mpoly_clear(Bbarc, lctx);
1385     nmod_mpoly_clear(Gamma, lctx);
1386     nmod_mpoly_clear(Al_lc, lctx);
1387     nmod_mpoly_clear(Bl_lc, lctx);
1388 
1389     nmod_mpoly_ctx_clear(lctx);
1390 
1391     flint_free(tmp);
1392 
1393     return success;
1394 }
1395 
1396 
1397 /******************** Hit A and B with hensel lifting ************************/
_try_hensel(nmod_mpoly_t G,nmod_mpoly_t Abar,nmod_mpoly_t Bbar,const nmod_mpoly_t A,const nmod_mpoly_t B,const mpoly_gcd_info_t I,const nmod_mpoly_ctx_t ctx)1398 static int _try_hensel(
1399     nmod_mpoly_t G,
1400     nmod_mpoly_t Abar,
1401     nmod_mpoly_t Bbar,
1402     const nmod_mpoly_t A,
1403     const nmod_mpoly_t B,
1404     const mpoly_gcd_info_t I,
1405     const nmod_mpoly_ctx_t ctx)
1406 {
1407     slong i, k;
1408     slong m = I->mvars;
1409     int success;
1410     flint_bitcnt_t wbits;
1411     nmod_mpoly_ctx_t lctx;
1412     nmod_mpoly_t Al, Bl, Gl, Abarl, Bbarl;
1413     nmod_mpoly_t Ac, Bc, Gc, Abarc, Bbarc;
1414     slong max_deg;
1415 
1416     FLINT_ASSERT(A->bits <= FLINT_BITS);
1417     FLINT_ASSERT(B->bits <= FLINT_BITS);
1418     FLINT_ASSERT(A->length > 0);
1419     FLINT_ASSERT(B->length > 0);
1420 
1421     if (!(I->can_use & MPOLY_GCD_USE_HENSEL))
1422         return 0;
1423 
1424     FLINT_ASSERT(m >= WORD(2));
1425 
1426     nmod_mpoly_ctx_init(lctx, m, ORD_LEX, ctx->mod.n);
1427 
1428     max_deg = 0;
1429     for (i = 0; i < m; i++)
1430     {
1431         k = I->hensel_perm[i];
1432         max_deg = FLINT_MAX(max_deg, I->Adeflate_deg[k]);
1433         max_deg = FLINT_MAX(max_deg, I->Bdeflate_deg[k]);
1434     }
1435 
1436     wbits = 1 + FLINT_BIT_COUNT(max_deg);
1437     wbits = mpoly_fix_bits(wbits, lctx->minfo);
1438     FLINT_ASSERT(wbits <= FLINT_BITS);
1439 
1440     nmod_mpoly_init3(Al, 0, wbits, lctx);
1441     nmod_mpoly_init3(Bl, 0, wbits, lctx);
1442     nmod_mpoly_init3(Gl, 0, wbits, lctx);
1443     nmod_mpoly_init3(Abarl, 0, wbits, lctx);
1444     nmod_mpoly_init3(Bbarl, 0, wbits, lctx);
1445     nmod_mpoly_init3(Ac, 0, wbits, lctx);
1446     nmod_mpoly_init3(Bc, 0, wbits, lctx);
1447     nmod_mpoly_init3(Gc, 0, wbits, lctx);
1448     nmod_mpoly_init3(Abarc, 0, wbits, lctx);
1449     nmod_mpoly_init3(Bbarc, 0, wbits, lctx);
1450 
1451     nmod_mpoly_to_mpolyl_perm_deflate(Al, lctx, A, ctx,
1452                                       I->hensel_perm, I->Amin_exp, I->Gstride);
1453     nmod_mpoly_to_mpolyl_perm_deflate(Bl, lctx, B, ctx,
1454                                       I->hensel_perm, I->Bmin_exp, I->Gstride);
1455 
1456     success = nmod_mpolyl_content(Ac, Al, 1, lctx) &&
1457               nmod_mpolyl_content(Bc, Bl, 1, lctx);
1458     if (!success)
1459         goto cleanup;
1460 
1461     if (Abar == NULL && Bbar == NULL)
1462         success = nmod_mpoly_gcd(Gc, Ac, Bc, lctx);
1463     else
1464         success = nmod_mpoly_gcd_cofactors(Gc, Abarc, Bbarc, Ac, Bc, lctx);
1465     if (!success)
1466         goto cleanup;
1467 
1468     success = nmod_mpoly_divides(Al, Al, Ac, lctx);
1469     FLINT_ASSERT(success);
1470 
1471     success = nmod_mpoly_divides(Bl, Bl, Bc, lctx);
1472     FLINT_ASSERT(success);
1473 
1474     nmod_mpoly_repack_bits_inplace(Al, wbits, lctx);
1475     nmod_mpoly_repack_bits_inplace(Bl, wbits, lctx);
1476 
1477     max_deg = I->Gdeflate_deg_bound[I->hensel_perm[0]];
1478     success = nmod_mpolyl_gcd_hensel_smprime(Gl, max_deg, Abarl, Bbarl, Al, Bl, lctx);
1479     if (!success)
1480     {
1481         success = nmod_mpolyl_gcd_hensel_medprime(Gl, max_deg, Abarl, Bbarl, Al, Bl, lctx);
1482         if (!success)
1483             goto cleanup;
1484     }
1485 
1486     nmod_mpoly_mul(Gl, Gl, Gc, lctx);
1487     nmod_mpoly_from_mpolyl_perm_inflate(G, I->Gbits, ctx, Gl, lctx,
1488                                       I->hensel_perm, I->Gmin_exp, I->Gstride);
1489     if (Abar != NULL)
1490     {
1491         nmod_mpoly_mul(Abarl, Abarl, Abarc, lctx);
1492         nmod_mpoly_from_mpolyl_perm_inflate(Abar, I->Abarbits, ctx, Abarl, lctx,
1493                                    I->hensel_perm, I->Abarmin_exp, I->Gstride);
1494     }
1495 
1496     if (Bbar != NULL)
1497     {
1498         nmod_mpoly_mul(Bbarl, Bbarl, Bbarc, lctx);
1499         nmod_mpoly_from_mpolyl_perm_inflate(Bbar, I->Bbarbits, ctx, Bbarl, lctx,
1500                                    I->hensel_perm, I->Bbarmin_exp, I->Gstride);
1501     }
1502 
1503     success = 1;
1504 
1505 cleanup:
1506 
1507     nmod_mpoly_clear(Al, lctx);
1508     nmod_mpoly_clear(Bl, lctx);
1509     nmod_mpoly_clear(Gl, lctx);
1510     nmod_mpoly_clear(Abarl, lctx);
1511     nmod_mpoly_clear(Bbarl, lctx);
1512     nmod_mpoly_clear(Ac, lctx);
1513     nmod_mpoly_clear(Bc, lctx);
1514     nmod_mpoly_clear(Gc, lctx);
1515     nmod_mpoly_clear(Abarc, lctx);
1516     nmod_mpoly_clear(Bbarc, lctx);
1517 
1518     nmod_mpoly_ctx_clear(lctx);
1519 
1520     return success;
1521 }
1522 
1523 
1524 /*********************** Hit A and B with brown ******************************/
1525 typedef struct
1526 {
1527     nmod_mpolyn_struct * Pn;
1528     const nmod_mpoly_ctx_struct * nctx;
1529     const nmod_mpoly_struct * P;
1530     const nmod_mpoly_ctx_struct * ctx;
1531     const slong * perm;
1532     const ulong * shift, * stride;
1533     const thread_pool_handle * handles;
1534     slong num_handles;
1535 }
1536 _convertn_arg_struct;
1537 
1538 typedef _convertn_arg_struct _convertn_arg_t[1];
1539 
_worker_convertn(void * varg)1540 static void _worker_convertn(void * varg)
1541 {
1542     _convertn_arg_struct * arg = (_convertn_arg_struct *) varg;
1543 
1544     nmod_mpoly_to_mpolyn_perm_deflate_threaded_pool(arg->Pn, arg->nctx, arg->P, arg->ctx,
1545            arg->perm, arg->shift, arg->stride, arg->handles, arg->num_handles);
1546 }
1547 
_try_brown(nmod_mpoly_t G,nmod_mpoly_t Abar,nmod_mpoly_t Bbar,const nmod_mpoly_t A,const nmod_mpoly_t B,mpoly_gcd_info_t I,const nmod_mpoly_ctx_t ctx)1548 static int _try_brown(
1549     nmod_mpoly_t G,
1550     nmod_mpoly_t Abar,
1551     nmod_mpoly_t Bbar,
1552     const nmod_mpoly_t A,
1553     const nmod_mpoly_t B,
1554     mpoly_gcd_info_t I,
1555     const nmod_mpoly_ctx_t ctx)
1556 {
1557     int success;
1558     slong k, m = I->mvars;
1559     flint_bitcnt_t wbits;
1560     nmod_mpoly_ctx_t nctx;
1561     nmod_mpolyn_t An, Bn, Gn, Abarn, Bbarn;
1562     nmod_poly_stack_t Sp;
1563     slong thread_limit;
1564     thread_pool_handle * handles;
1565     slong num_handles;
1566 
1567     if (!(I->can_use & MPOLY_GCD_USE_BROWN))
1568         return 0;
1569 
1570     FLINT_ASSERT(m >= 2);
1571     FLINT_ASSERT(A->bits <= FLINT_BITS);
1572     FLINT_ASSERT(B->bits <= FLINT_BITS);
1573     FLINT_ASSERT(A->length > 0);
1574     FLINT_ASSERT(B->length > 0);
1575 
1576     wbits = FLINT_MAX(A->bits, B->bits);
1577 
1578     nmod_mpoly_ctx_init(nctx, m, ORD_LEX, ctx->mod.n);
1579     nmod_poly_stack_init(Sp, wbits, nctx);
1580     nmod_mpolyn_init(An, wbits, nctx);
1581     nmod_mpolyn_init(Bn, wbits, nctx);
1582     nmod_mpolyn_init(Gn, wbits, nctx);
1583     nmod_mpolyn_init(Abarn, wbits, nctx);
1584     nmod_mpolyn_init(Bbarn, wbits, nctx);
1585 
1586     /* parallelism only on the outer variable */
1587     k = I->brown_perm[m - 1];
1588     thread_limit = FLINT_MIN(I->Adeflate_deg[k], I->Bdeflate_deg[k])/8;
1589     thread_limit = FLINT_MIN(thread_limit, (A->length + B->length)/1024);
1590 
1591     num_handles = flint_request_threads(&handles, thread_limit);
1592 
1593     if (num_handles > 0)
1594     {
1595         slong s = mpoly_divide_threads(num_handles, A->length, B->length);
1596         _convertn_arg_t arg;
1597 
1598         FLINT_ASSERT(s >= 0);
1599         FLINT_ASSERT(s < num_handles);
1600 
1601         arg->Pn = Bn;
1602         arg->nctx = nctx;
1603         arg->P = B;
1604         arg->ctx = ctx;
1605         arg->perm = I->brown_perm;
1606         arg->shift = I->Bmin_exp;
1607         arg->stride = I->Gstride;
1608         arg->handles = handles + (s + 1);
1609         arg->num_handles = num_handles - (s + 1);
1610 
1611         thread_pool_wake(global_thread_pool, handles[s], 0, _worker_convertn, arg);
1612 
1613         nmod_mpoly_to_mpolyn_perm_deflate_threaded_pool(An, nctx, A, ctx,
1614                        I->brown_perm, I->Amin_exp, I->Gstride, handles + 0, s);
1615 
1616         thread_pool_wait(global_thread_pool, handles[s]);
1617     }
1618     else
1619     {
1620         nmod_mpoly_to_mpolyn_perm_deflate_threaded_pool(An, nctx, A, ctx,
1621                               I->brown_perm, I->Amin_exp, I->Gstride, NULL, 0);
1622         nmod_mpoly_to_mpolyn_perm_deflate_threaded_pool(Bn, nctx, B, ctx,
1623                               I->brown_perm, I->Bmin_exp, I->Gstride, NULL, 0);
1624     }
1625 
1626     FLINT_ASSERT(An->bits == wbits);
1627     FLINT_ASSERT(Bn->bits == wbits);
1628     FLINT_ASSERT(An->length > 1);
1629     FLINT_ASSERT(Bn->length > 1);
1630 
1631     success = (num_handles > 0)
1632         ? nmod_mpolyn_gcd_brown_smprime_threaded_pool(Gn, Abarn, Bbarn, An, Bn,
1633                                           m - 1, nctx, I, handles, num_handles)
1634         : nmod_mpolyn_gcd_brown_smprime(Gn, Abarn, Bbarn, An, Bn,
1635                                                            m - 1, nctx, I, Sp);
1636 
1637     if (!success)
1638     {
1639         nmod_mpoly_to_mpolyn_perm_deflate_threaded_pool(An, nctx, A, ctx,
1640                               I->brown_perm, I->Amin_exp, I->Gstride, NULL, 0);
1641         nmod_mpoly_to_mpolyn_perm_deflate_threaded_pool(Bn, nctx, B, ctx,
1642                               I->brown_perm, I->Bmin_exp, I->Gstride, NULL, 0);
1643         success = nmod_mpolyn_gcd_brown_lgprime(Gn, Abarn, Bbarn, An, Bn,
1644                                                                   m - 1, nctx);
1645     }
1646 
1647     if (!success)
1648         goto cleanup;
1649 
1650     nmod_mpoly_from_mpolyn_perm_inflate(G, I->Gbits, ctx, Gn, nctx,
1651                                        I->brown_perm, I->Gmin_exp, I->Gstride);
1652 
1653     if (Abar != NULL)
1654         nmod_mpoly_from_mpolyn_perm_inflate(Abar, I->Abarbits, ctx, Abarn, nctx,
1655                                     I->brown_perm, I->Abarmin_exp, I->Gstride);
1656 
1657     if (Bbar != NULL)
1658         nmod_mpoly_from_mpolyn_perm_inflate(Bbar, I->Bbarbits, ctx, Bbarn, nctx,
1659                                     I->brown_perm, I->Bbarmin_exp, I->Gstride);
1660 
1661     success = 1;
1662 
1663 cleanup:
1664 
1665     flint_give_back_threads(handles, num_handles);
1666 
1667     nmod_mpolyn_clear(An, nctx);
1668     nmod_mpolyn_clear(Bn, nctx);
1669     nmod_mpolyn_clear(Gn, nctx);
1670     nmod_mpolyn_clear(Abarn, nctx);
1671     nmod_mpolyn_clear(Bbarn, nctx);
1672     nmod_poly_stack_clear(Sp);
1673     nmod_mpoly_ctx_clear(nctx);
1674 
1675     return success;
1676 }
1677 
1678 
1679 /*
1680     Both A and B have to be packed into bits <= FLINT_BITS
1681     return is 1 for success, 0 for failure.
1682 */
_nmod_mpoly_gcd_algo_small(nmod_mpoly_t G,nmod_mpoly_t Abar,nmod_mpoly_t Bbar,const nmod_mpoly_t A,const nmod_mpoly_t B,const nmod_mpoly_ctx_t ctx,unsigned int algo)1683 int _nmod_mpoly_gcd_algo_small(
1684     nmod_mpoly_t G,
1685     nmod_mpoly_t Abar, /* could be NULL */
1686     nmod_mpoly_t Bbar, /* could be NULL */
1687     const nmod_mpoly_t A,
1688     const nmod_mpoly_t B,
1689     const nmod_mpoly_ctx_t ctx,
1690     unsigned int algo)
1691 {
1692     int success;
1693     flint_bitcnt_t Gbits = FLINT_MIN(A->bits, B->bits);
1694     flint_bitcnt_t Abarbits = A->bits;
1695     flint_bitcnt_t Bbarbits = B->bits;
1696     slong v_in_both;
1697     slong v_in_either;
1698     slong v_in_A_only;
1699     slong v_in_B_only;
1700     slong j;
1701     slong nvars = ctx->minfo->nvars;
1702     mpoly_gcd_info_t I;
1703 #if FLINT_WANT_ASSERT
1704     nmod_mpoly_t T, Asave, Bsave;
1705 #endif
1706 
1707     if (A->length == 1)
1708         return _do_monomial_gcd(G, Bbar, Abar, B, A, ctx);
1709     else if (B->length == 1)
1710         return _do_monomial_gcd(G, Abar, Bbar, A, B, ctx);
1711 
1712 #if FLINT_WANT_ASSERT
1713     nmod_mpoly_init(T, ctx);
1714     nmod_mpoly_init(Asave, ctx);
1715     nmod_mpoly_init(Bsave, ctx);
1716     nmod_mpoly_set(Asave, A, ctx);
1717     nmod_mpoly_set(Bsave, B, ctx);
1718 #endif
1719 
1720     mpoly_gcd_info_init(I, nvars);
1721 
1722     /* entries of I are all now invalid */
1723 
1724     I->Gbits = Gbits;
1725     I->Abarbits = Abarbits;
1726     I->Bbarbits = Bbarbits;
1727 
1728     mpoly_gcd_info_limits(I->Amax_exp, I->Amin_exp, I->Alead_count,
1729                       I->Atail_count, A->exps, A->bits, A->length, ctx->minfo);
1730     mpoly_gcd_info_limits(I->Bmax_exp, I->Bmin_exp, I->Blead_count,
1731                       I->Btail_count, B->exps, B->bits, B->length, ctx->minfo);
1732 
1733     /* set ess(p) := p/term_content(p) */
1734 
1735     /* check if the cofactors could be monomials, i.e. ess(A) == ess(B) */
1736     for (j = 0; j < nvars; j++)
1737     {
1738         if (I->Amax_exp[j] - I->Amin_exp[j] != I->Bmax_exp[j] - I->Bmin_exp[j])
1739             goto skip_monomial_cofactors;
1740     }
1741 
1742     if (_try_monomial_cofactors(G, Abar, Bbar, A, B, ctx))
1743     {
1744         goto successful;
1745     }
1746 
1747 skip_monomial_cofactors:
1748 
1749     mpoly_gcd_info_stride(I->Gstride,
1750             A->exps, A->bits, A->length, I->Amax_exp, I->Amin_exp,
1751             B->exps, B->bits, B->length, I->Bmax_exp, I->Bmin_exp, ctx->minfo);
1752 
1753     for (j = 0; j < nvars; j++)
1754     {
1755         ulong t = I->Gstride[j];
1756 
1757         if (t == 0)
1758         {
1759             FLINT_ASSERT(I->Amax_exp[j] == I->Amin_exp[j] ||
1760                          I->Bmax_exp[j] == I->Bmin_exp[j]);
1761         }
1762         else
1763         {
1764             FLINT_ASSERT((I->Amax_exp[j] - I->Amin_exp[j]) % t == 0);
1765             FLINT_ASSERT((I->Bmax_exp[j] - I->Bmin_exp[j]) % t == 0);
1766         }
1767 
1768         I->Adeflate_deg[j] = t == 0 ? 0 : (I->Amax_exp[j] - I->Amin_exp[j])/t;
1769         I->Bdeflate_deg[j] = t == 0 ? 0 : (I->Bmax_exp[j] - I->Bmin_exp[j])/t;
1770 
1771         t = FLINT_MIN(I->Amin_exp[j], I->Bmin_exp[j]);
1772         I->Gmin_exp[j] = t;
1773         I->Abarmin_exp[j] = I->Amin_exp[j] - t;
1774         I->Bbarmin_exp[j] = I->Bmin_exp[j] - t;
1775     }
1776 
1777     /*
1778         The following are now valid:
1779             I->Amax_exp, I->Amin_exp, I->Alead_count, I->Atail_count,
1780             I->Bmax_exp, I->Bmin_exp, I->Blead_count, I->Btail_count,
1781             I->Gstride
1782             I->Adeflate_deg
1783             I->Bdeflate_deg
1784             I->Gmin_exp
1785     */
1786 
1787     /* check if ess(A) and ess(B) have a variable v_in_both in common */
1788     v_in_both = -WORD(1);
1789     for (j = 0; j < nvars; j++)
1790     {
1791         if (I->Amax_exp[j] > I->Amin_exp[j] && I->Bmax_exp[j] > I->Bmin_exp[j])
1792         {
1793             v_in_both = j;
1794             break;
1795         }
1796     }
1797     if (v_in_both == -WORD(1))
1798     {
1799         _do_trivial(G, Abar, Bbar, A, B, I, ctx);
1800         goto successful;
1801     }
1802 
1803     /* check if ess(A) and ess(B) depend on another variable v_in_either */
1804     FLINT_ASSERT(0 <= v_in_both && v_in_both < nvars);
1805 
1806     v_in_either = -WORD(1);
1807     for (j = 0; j < nvars; j++)
1808     {
1809         if (j == v_in_both)
1810             continue;
1811 
1812         if (I->Amax_exp[j] > I->Amin_exp[j] || I->Bmax_exp[j] > I->Bmin_exp[j])
1813         {
1814             v_in_either = j;
1815             break;
1816         }
1817     }
1818 
1819     if (v_in_either == -WORD(1))
1820     {
1821         _do_univar(G, Abar, Bbar, A, B, v_in_both, I, ctx);
1822         goto successful;
1823     }
1824 
1825     /* check if there is a variable in ess(A) that is not in ess(B) */
1826     v_in_A_only = -WORD(1);
1827     v_in_B_only = -WORD(1);
1828     for (j = 0; j < nvars; j++)
1829     {
1830         if (I->Amax_exp[j] > I->Amin_exp[j] && I->Bmax_exp[j] == I->Bmin_exp[j])
1831         {
1832             v_in_A_only = j;
1833             break;
1834         }
1835         if (I->Bmax_exp[j] > I->Bmin_exp[j] && I->Amax_exp[j] == I->Amin_exp[j])
1836         {
1837             v_in_B_only = j;
1838             break;
1839         }
1840     }
1841     if (v_in_A_only != -WORD(1))
1842     {
1843         success = _try_missing_var(G, I->Gbits, Abar, Bbar, v_in_A_only,
1844                                    A, I->Amin_exp[v_in_A_only],
1845                                    B, I->Bmin_exp[v_in_A_only],
1846                                    ctx);
1847         goto cleanup;
1848     }
1849     if (v_in_B_only != -WORD(1))
1850     {
1851         success = _try_missing_var(G, I->Gbits, Bbar, Abar, v_in_B_only,
1852                                    B, I->Bmin_exp[v_in_B_only],
1853                                    A, I->Amin_exp[v_in_B_only],
1854                                    ctx);
1855         goto cleanup;
1856     }
1857 
1858     /* all variable are now either
1859             missing from both ess(A) and ess(B), or
1860             present in both ess(A) and ess(B)
1861         and there are at least two in the latter case
1862     */
1863 
1864     mpoly_gcd_info_set_perm(I, A->length, B->length, ctx->minfo);
1865 
1866     /* _set_estimates will probably calculate the correct total degrees */
1867     I->Adeflate_tdeg = I->Bdeflate_tdeg = -1;
1868 
1869     _set_estimates(I, A, B, ctx);
1870 
1871     j = FLINT_MAX(0, 8 - I->mvars);
1872     if (!I->Gdeflate_deg_bounds_are_nice || ctx->mod.n < j)
1873         _set_estimates_medprime(I, A, B, ctx);
1874 
1875     if (!I->Gdeflate_deg_bounds_are_nice)
1876         _set_estimates_lgprime(I, A, B, ctx);
1877 
1878     /* everything in I is valid now */
1879 
1880     /* check divisibility A/B and B/A */
1881     {
1882         int gcd_is_trivial = 1;
1883         int try_a = I->Gdeflate_deg_bounds_are_nice;
1884         int try_b = I->Gdeflate_deg_bounds_are_nice;
1885         for (j = 0; j < nvars; j++)
1886         {
1887             if (I->Gdeflate_deg_bound[j] != 0)
1888                 gcd_is_trivial = 0;
1889 
1890             if (I->Adeflate_deg[j] != I->Gdeflate_deg_bound[j])
1891                 try_a = 0;
1892 
1893             if (I->Bdeflate_deg[j] != I->Gdeflate_deg_bound[j])
1894                 try_b = 0;
1895         }
1896 
1897         if (gcd_is_trivial)
1898         {
1899             _do_trivial(G, Abar, Bbar, A, B, I, ctx);
1900             goto successful;
1901         }
1902 
1903         if (try_a && _try_divides(G, Bbar, Abar, B, A, ctx))
1904             goto successful;
1905 
1906         if (try_b && _try_divides(G, Abar, Bbar, A, B, ctx))
1907             goto successful;
1908     }
1909 
1910     if (I->mvars < 3)
1911     {
1912         mpoly_gcd_info_measure_brown(I, A->length, B->length, ctx->minfo);
1913         mpoly_gcd_info_measure_hensel(I, A->length, B->length, ctx->minfo);
1914 
1915         algo &= (MPOLY_GCD_USE_BROWN | MPOLY_GCD_USE_HENSEL);
1916 
1917         if (algo == MPOLY_GCD_USE_BROWN)
1918         {
1919             success = _try_brown(G, Abar, Bbar, A, B, I, ctx);
1920         }
1921         else if (algo == MPOLY_GCD_USE_HENSEL)
1922         {
1923             success = _try_hensel(G, Abar, Bbar, A, B, I, ctx);
1924         }
1925         else
1926         {
1927             slong k = I->brown_perm[1];
1928             slong d = FLINT_MAX(I->Adeflate_deg[k], I->Bdeflate_deg[k]);
1929             int deg_is_small = d < ctx->mod.n/2;
1930 
1931             if (I->Adensity + I->Bdensity > (deg_is_small ? 0.05 : 0.2))
1932             {
1933                 success = _try_brown(G, Abar, Bbar, A, B, I, ctx) ||
1934                           _try_hensel(G, Abar, Bbar, A, B, I, ctx);
1935             }
1936             else
1937             {
1938                 success = _try_hensel(G, Abar, Bbar, A, B, I, ctx) ||
1939                           _try_brown(G, Abar, Bbar, A, B, I, ctx);
1940             }
1941         }
1942 
1943         goto cleanup;
1944     }
1945     else if (algo == MPOLY_GCD_USE_HENSEL)
1946     {
1947         mpoly_gcd_info_measure_hensel(I, A->length, B->length, ctx->minfo);
1948         success = _try_hensel(G, Abar, Bbar, A, B, I, ctx);
1949         goto cleanup;
1950     }
1951     else if (algo == MPOLY_GCD_USE_BROWN)
1952     {
1953         mpoly_gcd_info_measure_brown(I, A->length, B->length, ctx->minfo);
1954         success = _try_brown(G, Abar, Bbar, A, B, I, ctx);
1955         goto cleanup;
1956     }
1957     else if (algo == MPOLY_GCD_USE_ZIPPEL)
1958     {
1959         mpoly_gcd_info_measure_zippel(I, A->length, B->length, ctx->minfo);
1960         success = _try_zippel(G, Abar, Bbar, A, B, I, ctx);
1961         goto cleanup;
1962     }
1963     else if (algo == MPOLY_GCD_USE_ZIPPEL2)
1964     {
1965         mpoly_gcd_info_measure_zippel2(I, A->length, B->length, ctx->minfo);
1966         success = _try_zippel2(G, Abar, Bbar, A, B, I, ctx);
1967         goto cleanup;
1968     }
1969     else
1970     {
1971         slong k, d;
1972         int deg_is_small = 1;
1973         double density = I->Adensity + I->Bdensity;
1974 
1975         /*
1976             mpoly gcd case.
1977             Only rule is that measure_X must be called before
1978                 try_X is called or I->X_perm is accessed.
1979         */
1980 
1981         mpoly_gcd_info_measure_hensel(I, A->length, B->length, ctx->minfo);
1982         mpoly_gcd_info_measure_brown(I, A->length, B->length, ctx->minfo);
1983         mpoly_gcd_info_measure_zippel(I, A->length, B->length, ctx->minfo);
1984         mpoly_gcd_info_measure_zippel2(I, A->length, B->length, ctx->minfo);
1985 
1986         for (j = 1; j < I->mvars; j++)
1987         {
1988             k = I->brown_perm[j];
1989             d = FLINT_MAX(I->Adeflate_deg[k], I->Bdeflate_deg[k]);
1990             if (d > ctx->mod.n/2)
1991                 deg_is_small = 0;
1992         }
1993 
1994         if (density > 0.08)
1995         {
1996             if (!deg_is_small && _try_hensel(G, Abar, Bbar, A, B, I, ctx))
1997                 goto successful;
1998 
1999             if (_try_brown(G, Abar, Bbar, A, B, I, ctx))
2000                 goto successful;
2001         }
2002 
2003         if (I->Adeflate_tdeg > 0 && I->Bdeflate_tdeg > 0)
2004         {
2005             fmpz_t x;
2006             double tdensity;
2007             fmpz_init(x);
2008             fmpz_bin_uiui(x, (ulong)I->Adeflate_tdeg + I->mvars, I->mvars);
2009             tdensity = A->length/fmpz_get_d(x);
2010             fmpz_bin_uiui(x, (ulong)I->Bdeflate_tdeg + I->mvars, I->mvars);
2011             tdensity += B->length/fmpz_get_d(x);
2012             density = FLINT_MAX(density, tdensity);
2013             fmpz_clear(x);
2014         }
2015 
2016         if (density > (deg_is_small ? 0.05 : 0.001))
2017         {
2018             if (_try_hensel(G, Abar, Bbar, A, B, I, ctx))
2019                 goto successful;
2020         }
2021 
2022         k = I->zippel2_perm[1];
2023         k = FLINT_MAX(I->Adeflate_deg[k], I->Bdeflate_deg[k]);
2024         if ((A->length + B->length)/64 < k)
2025         {
2026             if (_try_zippel(G, Abar, Bbar, A, B, I, ctx))
2027                 goto successful;
2028             if (_try_zippel2(G, Abar, Bbar, A, B, I, ctx))
2029                 goto successful;
2030         }
2031         else
2032         {
2033             if (_try_zippel2(G, Abar, Bbar, A, B, I, ctx))
2034                 goto successful;
2035             if (_try_zippel(G, Abar, Bbar, A, B, I, ctx))
2036                 goto successful;
2037         }
2038 
2039         success = _try_hensel(G, Abar, Bbar, A, B, I, ctx) ||
2040                   _try_brown(G, Abar, Bbar, A, B, I, ctx);
2041         goto cleanup;
2042     }
2043 
2044     success = 0;
2045     goto cleanup;
2046 
2047 successful:
2048 
2049     success = 1;
2050 
2051 cleanup:
2052 
2053     mpoly_gcd_info_clear(I);
2054 
2055     if (success)
2056     {
2057         FLINT_ASSERT(G->length > 0);
2058 
2059         if (G->coeffs[0] != 1)
2060         {
2061             if (Abar != NULL)
2062                 _nmod_vec_scalar_mul_nmod(Abar->coeffs, Abar->coeffs,
2063                                          Abar->length, G->coeffs[0], ctx->mod);
2064             if (Bbar != NULL)
2065                 _nmod_vec_scalar_mul_nmod(Bbar->coeffs, Bbar->coeffs,
2066                                          Bbar->length, G->coeffs[0], ctx->mod);
2067 
2068             _nmod_vec_scalar_mul_nmod(G->coeffs, G->coeffs, G->length,
2069                                    nmod_inv(G->coeffs[0], ctx->mod), ctx->mod);
2070         }
2071 
2072         FLINT_ASSERT(nmod_mpoly_divides(T, Asave, G, ctx));
2073         FLINT_ASSERT(Abar == NULL || nmod_mpoly_equal(T, Abar, ctx));
2074 
2075         FLINT_ASSERT(nmod_mpoly_divides(T, Bsave, G, ctx));
2076         FLINT_ASSERT(Bbar == NULL || nmod_mpoly_equal(T, Bbar, ctx));
2077     }
2078 
2079 #if FLINT_WANT_ASSERT
2080     nmod_mpoly_clear(T, ctx);
2081     nmod_mpoly_clear(Asave, ctx);
2082     nmod_mpoly_clear(Bsave, ctx);
2083 #endif
2084 
2085     return success;
2086 }
2087 
2088 
2089 /*
2090     The gcd calculation is unusual.
2091     First see if both inputs fit into FLINT_BITS.
2092     Then, try deflation as a last resort.
2093 */
_nmod_mpoly_gcd_algo_large(nmod_mpoly_t G,nmod_mpoly_t Abar,nmod_mpoly_t Bbar,const nmod_mpoly_t A,const nmod_mpoly_t B,const nmod_mpoly_ctx_t ctx,unsigned int algo)2094 static int _nmod_mpoly_gcd_algo_large(
2095     nmod_mpoly_t G,
2096     nmod_mpoly_t Abar,
2097     nmod_mpoly_t Bbar,
2098     const nmod_mpoly_t A,
2099     const nmod_mpoly_t B,
2100     const nmod_mpoly_ctx_t ctx,
2101     unsigned int algo)
2102 {
2103     int success;
2104     slong k;
2105     fmpz * Ashift, * Astride;
2106     fmpz * Bshift, * Bstride;
2107     fmpz * Gshift, * Gstride;
2108     nmod_mpoly_t Anew, Bnew;
2109     const nmod_mpoly_struct * Ause, * Buse;
2110 
2111     if (A->length == 1)
2112         return _do_monomial_gcd(G, Bbar, Abar, B, A, ctx);
2113 
2114     if (B->length == 1)
2115         return _do_monomial_gcd(G, Abar, Bbar, A, B, ctx);
2116 
2117     if (_try_monomial_cofactors(G, Abar, Bbar, A, B, ctx))
2118         return 1;
2119 
2120     nmod_mpoly_init(Anew, ctx);
2121     nmod_mpoly_init(Bnew, ctx);
2122 
2123     Ause = A;
2124     if (A->bits > FLINT_BITS)
2125     {
2126         if (!nmod_mpoly_repack_bits(Anew, A, FLINT_BITS, ctx))
2127             goto could_not_repack;
2128         Ause = Anew;
2129     }
2130 
2131     Buse = B;
2132     if (B->bits > FLINT_BITS)
2133     {
2134         if (!nmod_mpoly_repack_bits(Bnew, B, FLINT_BITS, ctx))
2135             goto could_not_repack;
2136         Buse = Bnew;
2137     }
2138 
2139     success = _nmod_mpoly_gcd_algo(G, Abar, Bbar, Ause, Buse, ctx, algo);
2140 
2141     goto cleanup;
2142 
2143 could_not_repack:
2144 
2145     /*
2146         One of A or B could not be repacked into FLINT_BITS. See if
2147         they both fit into FLINT_BITS after deflation.
2148     */
2149 
2150     Ashift  = _fmpz_vec_init(ctx->minfo->nvars);
2151     Astride = _fmpz_vec_init(ctx->minfo->nvars);
2152     Bshift  = _fmpz_vec_init(ctx->minfo->nvars);
2153     Bstride = _fmpz_vec_init(ctx->minfo->nvars);
2154     Gshift  = _fmpz_vec_init(ctx->minfo->nvars);
2155     Gstride = _fmpz_vec_init(ctx->minfo->nvars);
2156 
2157     nmod_mpoly_deflation(Ashift, Astride, A, ctx);
2158     nmod_mpoly_deflation(Bshift, Bstride, B, ctx);
2159     _fmpz_vec_min(Gshift, Ashift, Bshift, ctx->minfo->nvars);
2160     for (k = 0; k < ctx->minfo->nvars; k++)
2161         fmpz_gcd(Gstride + k, Astride + k, Bstride + k);
2162 
2163     nmod_mpoly_deflate(Anew, A, Ashift, Gstride, ctx);
2164     if (Anew->bits > FLINT_BITS)
2165     {
2166         success = nmod_mpoly_repack_bits(Anew, Anew, FLINT_BITS, ctx);
2167         if (!success)
2168             goto deflate_cleanup;
2169     }
2170 
2171     nmod_mpoly_deflate(Bnew, B, Bshift, Gstride, ctx);
2172     if (Bnew->bits > FLINT_BITS)
2173     {
2174         success = nmod_mpoly_repack_bits(Bnew, Bnew, FLINT_BITS, ctx);
2175         if (!success)
2176             goto deflate_cleanup;
2177     }
2178 
2179     success = _nmod_mpoly_gcd_algo(G, Abar, Bbar, Anew, Bnew, ctx, algo);
2180     if (!success)
2181         goto deflate_cleanup;
2182 
2183     for (k = 0; k < ctx->minfo->nvars; k++)
2184     {
2185         fmpz_sub(Ashift + k, Ashift + k, Gshift + k);
2186         fmpz_sub(Bshift + k, Bshift + k, Gshift + k);
2187         FLINT_ASSERT(fmpz_sgn(Ashift + k) >= 0);
2188         FLINT_ASSERT(fmpz_sgn(Bshift + k) >= 0);
2189     }
2190 
2191     nmod_mpoly_inflate(G, G, Gshift, Gstride, ctx);
2192     if (Abar != NULL)
2193         nmod_mpoly_inflate(Abar, Abar, Ashift, Gstride, ctx);
2194     if (Bbar != NULL)
2195         nmod_mpoly_inflate(Bbar, Bbar, Bshift, Gstride, ctx);
2196 
2197     FLINT_ASSERT(G->length > 0);
2198     if (G->coeffs[0] != 1)
2199     {
2200         if (Abar != NULL)
2201             _nmod_vec_scalar_mul_nmod(Abar->coeffs, Abar->coeffs,
2202                                          Abar->length, G->coeffs[0], ctx->mod);
2203 
2204         if (Bbar != NULL)
2205             _nmod_vec_scalar_mul_nmod(Bbar->coeffs, Bbar->coeffs,
2206                                          Bbar->length, G->coeffs[0], ctx->mod);
2207 
2208         _nmod_vec_scalar_mul_nmod(G->coeffs, G->coeffs, G->length,
2209                                    nmod_inv(G->coeffs[0], ctx->mod), ctx->mod);
2210     }
2211 
2212 deflate_cleanup:
2213 
2214     _fmpz_vec_clear(Ashift, ctx->minfo->nvars);
2215     _fmpz_vec_clear(Astride, ctx->minfo->nvars);
2216     _fmpz_vec_clear(Bshift, ctx->minfo->nvars);
2217     _fmpz_vec_clear(Bstride, ctx->minfo->nvars);
2218     _fmpz_vec_clear(Gshift, ctx->minfo->nvars);
2219     _fmpz_vec_clear(Gstride, ctx->minfo->nvars);
2220 
2221 cleanup:
2222 
2223     nmod_mpoly_clear(Anew, ctx);
2224     nmod_mpoly_clear(Bnew, ctx);
2225 
2226     return success;
2227 }
2228 
2229 
_nmod_mpoly_gcd_algo(nmod_mpoly_t G,nmod_mpoly_t Abar,nmod_mpoly_t Bbar,const nmod_mpoly_t A,const nmod_mpoly_t B,const nmod_mpoly_ctx_t ctx,unsigned int algo)2230 int _nmod_mpoly_gcd_algo(
2231     nmod_mpoly_t G,
2232     nmod_mpoly_t Abar,
2233     nmod_mpoly_t Bbar,
2234     const nmod_mpoly_t A,
2235     const nmod_mpoly_t B,
2236     const nmod_mpoly_ctx_t ctx,
2237     unsigned int algo)
2238 {
2239     FLINT_ASSERT(!nmod_mpoly_is_zero(A, ctx));
2240     FLINT_ASSERT(!nmod_mpoly_is_zero(B, ctx));
2241 
2242     if (A->bits <= FLINT_BITS && B->bits <= FLINT_BITS)
2243         return _nmod_mpoly_gcd_algo_small(G, Abar, Bbar, A, B, ctx, algo);
2244     else
2245         return _nmod_mpoly_gcd_algo_large(G, Abar, Bbar, A, B, ctx, algo);
2246 }
2247 
nmod_mpoly_gcd(nmod_mpoly_t G,const nmod_mpoly_t A,const nmod_mpoly_t B,const nmod_mpoly_ctx_t ctx)2248 int nmod_mpoly_gcd(
2249     nmod_mpoly_t G,
2250     const nmod_mpoly_t A,
2251     const nmod_mpoly_t B,
2252     const nmod_mpoly_ctx_t ctx)
2253 {
2254     if (nmod_mpoly_is_zero(A, ctx))
2255     {
2256         if (nmod_mpoly_is_zero(B, ctx))
2257             nmod_mpoly_zero(G, ctx);
2258         else
2259             nmod_mpoly_make_monic(G, B, ctx);
2260         return 1;
2261     }
2262 
2263     if (nmod_mpoly_is_zero(B, ctx))
2264     {
2265         nmod_mpoly_make_monic(G, A, ctx);
2266         return 1;
2267     }
2268 
2269     return _nmod_mpoly_gcd_algo(G, NULL, NULL, A, B, ctx, MPOLY_GCD_USE_ALL);
2270 }
2271 
2272