1 /*
2     Copyright (C) 2018 Daniel Schultz
3 
4     This file is part of FLINT.
5 
6     FLINT is free software: you can redistribute it and/or modify it under
7     the terms of the GNU Lesser General Public License (LGPL) as published
8     by the Free Software Foundation; either version 2.1 of the License, or
9     (at your option) any later version.  See <http://www.gnu.org/licenses/>.
10 */
11 
12 #include "fmpz_mpoly.h"
13 
14 /*
15     For each j, set out[j] to the evaluation of A at x_i = alpha[i] (i != j)
16     i.e. if nvars = 3
17         out[0] = A(x, alpha[1], alpha[2])
18         out[1] = A(alpha[0], x, alpha[2])
19         out[2] = A(alpha[0], alpha[1], x)
20 
21     If ignore[j] is nonzero, then out[j] need not be calculated, probably
22     because we shouldn't calculate it in dense form.
23 */
fmpz_mpoly_evals(nmod_poly_struct * out,const int * ignore,const fmpz_mpoly_t A,ulong * Amin_exp,ulong * Amax_exp,ulong * Astride,mp_limb_t * alpha,const fmpz_mpoly_ctx_t ctx,const thread_pool_handle * handles,slong num_handles)24 void fmpz_mpoly_evals(
25     nmod_poly_struct * out,
26     const int * ignore,
27     const fmpz_mpoly_t A,
28     ulong * Amin_exp,
29     ulong * Amax_exp,
30     ulong * Astride,
31     mp_limb_t * alpha,
32     const fmpz_mpoly_ctx_t ctx,
33     const thread_pool_handle * handles,
34     slong num_handles)
35 {
36     slong i, j;
37     slong nvars = ctx->minfo->nvars;
38     slong total_limit, total_length;
39     int use_direct_LUT;
40     ulong varexp;
41     ulong mask;
42     slong * offsets, * shifts;
43     slong N = mpoly_words_per_exp_sp(A->bits, ctx->minfo);
44     ulong * Aexp = A->exps;
45     fmpz * Acoeff = A->coeffs;
46     mp_limb_t meval;
47     mp_limb_t t;
48 
49     FLINT_ASSERT(A->bits <= FLINT_BITS);
50 
51     mask = (-UWORD(1)) >> (FLINT_BITS - A->bits);
52     offsets = (slong *) flint_malloc(ctx->minfo->nvars*sizeof(slong));
53     shifts = (slong *) flint_malloc(ctx->minfo->nvars*sizeof(slong));
54 
55     for (j = 0; j < ctx->minfo->nvars; j++)
56     {
57         nmod_poly_zero(out + j);
58         mpoly_gen_offset_shift_sp(offsets + j, shifts + j, j, A->bits, ctx->minfo);
59     }
60 
61     /*
62         two cases:
63         (1) the Amax_exp[j] are small enough to calculate a direct LUT
64         (2) use a LUT for exponents that are powers of two
65     */
66 
67     total_limit = A->length/256;
68     total_limit = FLINT_MAX(WORD(9999), total_limit);
69     total_length = 0;
70     use_direct_LUT = 1;
71     for (j = 0; j < ctx->minfo->nvars; j++)
72     {
73         total_length += Amax_exp[j] + 1;
74         if ((ulong) total_length > (ulong) total_limit)
75             use_direct_LUT = 0;
76     }
77 
78     if (use_direct_LUT)
79     {
80         slong off;
81         mp_limb_t * LUT, ** LUTvalue, ** LUTvalueinv;
82 
83         /* value of powers of alpha[j] */
84         LUT = (mp_limb_t *) flint_malloc(2*total_length*sizeof(mp_limb_t));
85 
86         /* pointers into LUT */
87         LUTvalue    = (mp_limb_t **) flint_malloc(nvars*sizeof(mp_limb_t *));
88         LUTvalueinv = (mp_limb_t **) flint_malloc(nvars*sizeof(mp_limb_t *));
89 
90         off = 0;
91         for (j = 0; j < nvars; j++)
92         {
93             ulong k;
94             mp_limb_t alphainvj = nmod_inv(alpha[j], (out + 0)->mod);
95 
96             LUTvalue[j] = LUT + off;
97             LUTvalueinv[j] = LUT + total_length + off;
98             LUTvalue[j][0] = 1;
99             LUTvalueinv[j][0] = 1;
100             for (k = 0; k < Amax_exp[j]; k++)
101             {
102                 LUTvalue[j][k + 1] = nmod_mul(LUTvalue[j][k], alpha[j],
103                                                                (out + 0)->mod);
104                 LUTvalueinv[j][k + 1] = nmod_mul(LUTvalueinv[j][k], alphainvj,
105                                                                (out + 0)->mod);
106             }
107 
108             off += Amax_exp[j] + 1;
109         }
110         FLINT_ASSERT(off == total_length);
111 
112         for (i = 0; i < A->length; i++)
113         {
114             meval = fmpz_fdiv_ui(Acoeff + i, out->mod.n);
115 
116             for (j = 0; j < nvars; j++)
117             {
118                 varexp = ((Aexp + N*i)[offsets[j]]>>shifts[j])&mask;
119                 FLINT_ASSERT(varexp <= Amax_exp[j]);
120                 meval = nmod_mul(meval, LUTvalue[j][varexp], (out + 0)->mod);
121             }
122 
123             for (j = 0; j < nvars; j++)
124             {
125                 varexp = ((Aexp + N*i)[offsets[j]]>>shifts[j])&mask;
126 
127                 if (ignore[j])
128                     continue;
129 
130                 t = nmod_mul(meval, LUTvalueinv[j][varexp], (out + j)->mod);
131 
132                 FLINT_ASSERT((Astride[j] == 0 && varexp == Amin_exp[j])
133                                   || (varexp - Amin_exp[j]) % Astride[j] == 0);
134 
135                 varexp = Astride[j] < 2 ? varexp - Amin_exp[j] :
136                                            (varexp - Amin_exp[j])/Astride[j];
137 
138                 t = nmod_add(t, nmod_poly_get_coeff_ui(out + j, varexp),
139                                                                (out + j)->mod);
140                 nmod_poly_set_coeff_ui(out + j, varexp, t);
141             }
142         }
143 
144         flint_free(LUT);
145         flint_free(LUTvalue);
146         flint_free(LUTvalueinv);
147     }
148     else
149     {
150         slong LUTlen;
151         ulong * LUTmask;
152         slong * LUToffset, * LUTvar;
153         mp_limb_t * LUTvalue, * LUTvalueinv;
154         mp_limb_t * vieval;
155         mp_limb_t t, xpoweval, xinvpoweval;
156 
157         LUToffset   = (slong *) flint_malloc(N*FLINT_BITS*sizeof(slong));
158         LUTmask     = (ulong *) flint_malloc(N*FLINT_BITS*sizeof(ulong));
159         LUTvalue    = (mp_limb_t *) flint_malloc(N*FLINT_BITS*sizeof(mp_limb_t));
160         LUTvar      = (slong *) flint_malloc(N*FLINT_BITS*sizeof(slong));
161         LUTvalueinv = (mp_limb_t *) flint_malloc(N*FLINT_BITS*sizeof(mp_limb_t));
162 
163         vieval = (mp_limb_t *) flint_malloc(nvars*sizeof(mp_limb_t));
164 
165         LUTlen = 0;
166         for (j = nvars - 1; j >= 0; j--)
167         {
168             flint_bitcnt_t bits = FLINT_BIT_COUNT(Amax_exp[j]);
169             xpoweval = alpha[j]; /* xpoweval = alpha[j]^(2^i) */
170             xinvpoweval = nmod_inv(xpoweval, (out + 0)->mod); /* alpha[j]^(-2^i) */
171             for (i = 0; i < bits; i++)
172             {
173                 LUToffset[LUTlen] = offsets[j];
174                 LUTmask[LUTlen] = (UWORD(1) << (shifts[j] + i));
175                 LUTvalue[LUTlen] = xpoweval;
176                 LUTvalueinv[LUTlen] = xinvpoweval;
177                 LUTvar[LUTlen] = j;
178                 LUTlen++;
179                 xpoweval = nmod_mul(xpoweval, xpoweval, (out + 0)->mod);
180                 xinvpoweval = nmod_mul(xinvpoweval, xinvpoweval, (out + 0)->mod);
181             }
182 
183             vieval[j] = 1;
184         }
185         FLINT_ASSERT(LUTlen < N*FLINT_BITS);
186 
187         for (i = 0; i < A->length; i++)
188         {
189             meval = fmpz_fdiv_ui(Acoeff + i, (out + 0)->mod.n);
190 
191             for (j = 0; j < LUTlen; j++)
192             {
193                 if (((Aexp + N*i)[LUToffset[j]] & LUTmask[j]) != 0)
194                 {
195                     meval = nmod_mul(meval, LUTvalue[j], (out + 0)->mod);
196                     vieval[LUTvar[j]] = nmod_mul(vieval[LUTvar[j]],
197                                                LUTvalueinv[j], (out + 0)->mod);
198                 }
199             }
200 
201             for (j = 0; j < nvars; j++)
202             {
203                 varexp = ((Aexp + N*i)[offsets[j]]>>shifts[j])&mask;
204 
205                 FLINT_ASSERT((Astride[j] == 0 && varexp == Amin_exp[j])
206                                   || (varexp - Amin_exp[j]) % Astride[j] == 0);
207 
208                 varexp = Astride[j] < 2 ? varexp - Amin_exp[j] :
209                                            (varexp - Amin_exp[j])/Astride[j];
210 
211                 t = nmod_mul(meval, vieval[j], (out + j)->mod);
212                 t = nmod_add(t, nmod_poly_get_coeff_ui(out + j, varexp),
213                                                                (out + j)->mod);
214                 nmod_poly_set_coeff_ui(out + j, varexp, t);
215                 vieval[j] = 1;
216             }
217         }
218 
219         flint_free(LUToffset);
220         flint_free(LUTmask);
221         flint_free(LUTvalue);
222         flint_free(LUTvar);
223         flint_free(LUTvalueinv);
224 
225         flint_free(vieval);
226     }
227 
228     flint_free(offsets);
229     flint_free(shifts);
230 }
231 
232 
mpoly_gcd_info_set_estimates_fmpz_mpoly(mpoly_gcd_info_t I,const fmpz_mpoly_t A,const fmpz_mpoly_t B,const fmpz_mpoly_ctx_t ctx,const thread_pool_handle * handles,slong num_handles)233 void mpoly_gcd_info_set_estimates_fmpz_mpoly(
234     mpoly_gcd_info_t I,
235     const fmpz_mpoly_t A,
236     const fmpz_mpoly_t B,
237     const fmpz_mpoly_ctx_t ctx,
238     const thread_pool_handle * handles,
239     slong num_handles)
240 {
241     int try_count = 0;
242     slong i, j;
243     nmod_poly_t Geval;
244     nmod_poly_struct * Aevals, * Bevals;
245     mp_limb_t p = UWORD(1) << (FLINT_BITS - 1);
246     mp_limb_t * alpha;
247     flint_rand_t randstate;
248     slong ignore_limit;
249     int * ignore;
250 
251     flint_randinit(randstate);
252 
253     ignore = (int *) flint_malloc(ctx->minfo->nvars*sizeof(int));
254     alpha = (mp_limb_t *) flint_malloc(ctx->minfo->nvars*sizeof(mp_limb_t));
255     Aevals = (nmod_poly_struct *) flint_malloc(
256                                    ctx->minfo->nvars*sizeof(nmod_poly_struct));
257     Bevals = (nmod_poly_struct *) flint_malloc(
258                                    ctx->minfo->nvars*sizeof(nmod_poly_struct));
259 
260     nmod_poly_init(Geval, p);
261     for (j = 0; j < ctx->minfo->nvars; j++)
262     {
263         nmod_poly_init(Aevals + j, p);
264         nmod_poly_init(Bevals + j, p);
265     }
266 
267     ignore_limit = A->length/4096 + B->length/4096;
268     ignore_limit = FLINT_MAX(WORD(9999), ignore_limit);
269     I->Gdeflate_deg_bounds_are_nice = 1;
270     for (j = 0; j < ctx->minfo->nvars; j++)
271     {
272         if (   I->Adeflate_deg[j] > ignore_limit
273             || I->Bdeflate_deg[j] > ignore_limit)
274         {
275             ignore[j] = 1;
276             I->Gdeflate_deg_bounds_are_nice = 0;
277         }
278         else
279         {
280             ignore[j] = 0;
281         }
282     }
283 
284 try_again:
285 
286     if (++try_count > 10)
287     {
288         I->Gdeflate_deg_bounds_are_nice = 0;
289         for (j = 0; j < ctx->minfo->nvars; j++)
290         {
291             I->Gdeflate_deg_bound[j] = FLINT_MIN(I->Adeflate_deg[j],
292                                                  I->Bdeflate_deg[j]);
293             I->Gterm_count_est[j] = (I->Gdeflate_deg_bound[j] + 1)/2;
294         }
295 
296         goto cleanup;
297     }
298 
299     p = n_nextprime(p, 1);
300     nmod_init(&Geval->mod, p);
301     for (j = 0; j < ctx->minfo->nvars; j++)
302     {
303         alpha[j] = n_urandint(randstate, p - 1) + 1;
304         nmod_init(&(Aevals + j)->mod, p);
305         nmod_init(&(Bevals + j)->mod, p);
306     }
307 
308     fmpz_mpoly_evals(Aevals, ignore, A, I->Amin_exp, I->Amax_exp, I->Gstride,
309                                              alpha, ctx, handles, num_handles);
310     fmpz_mpoly_evals(Bevals, ignore, B, I->Bmin_exp, I->Bmax_exp, I->Gstride,
311                                              alpha, ctx, handles, num_handles);
312 
313     for (j = 0; j < ctx->minfo->nvars; j++)
314     {
315         if (ignore[j])
316         {
317             I->Gdeflate_deg_bound[j] = FLINT_MIN(I->Adeflate_deg[j],
318                                                  I->Bdeflate_deg[j]);
319             I->Gterm_count_est[j] = (I->Gdeflate_deg_bound[j] + 1)/2;
320         }
321         else
322         {
323             if (   I->Adeflate_deg[j] != nmod_poly_degree(Aevals + j)
324                 || I->Bdeflate_deg[j] != nmod_poly_degree(Bevals + j))
325             {
326                 goto try_again;
327             }
328 
329             nmod_poly_gcd(Geval, Aevals + j, Bevals + j);
330 
331             I->Gterm_count_est[j] = 0;
332             I->Gdeflate_deg_bound[j] = nmod_poly_degree(Geval);
333             for (i = I->Gdeflate_deg_bound[j]; i >= 0; i--)
334             {
335                 I->Gterm_count_est[j] += (Geval->coeffs[i] != 0);
336             }
337         }
338     }
339 
340 cleanup:
341 
342     nmod_poly_clear(Geval);
343     for (j = 0; j < ctx->minfo->nvars; j++)
344     {
345         nmod_poly_clear(Aevals + j);
346         nmod_poly_clear(Bevals + j);
347     }
348 
349     flint_free(ignore);
350     flint_free(alpha);
351     flint_free(Aevals);
352     flint_free(Bevals);
353 
354     flint_randclear(randstate);
355 
356     return;
357 }
358 
359 
360 /*********************** Easy when B is a monomial ***************************/
_try_monomial_gcd(fmpz_mpoly_t G,flint_bitcnt_t Gbits,const fmpz_mpoly_t A,const fmpz_mpoly_t B,const fmpz_mpoly_ctx_t ctx)361 static int _try_monomial_gcd(
362     fmpz_mpoly_t G, flint_bitcnt_t Gbits,
363     const fmpz_mpoly_t A,
364     const fmpz_mpoly_t B,
365     const fmpz_mpoly_ctx_t ctx)
366 {
367     slong i;
368     fmpz_t g;
369     fmpz * minAfields, * minAdegs, * minBdegs;
370     TMP_INIT;
371 
372     FLINT_ASSERT(A->length > 0);
373     FLINT_ASSERT(B->length == 1);
374 
375     TMP_START;
376 
377     /* get the field-wise minimum of A */
378     minAfields = (fmpz *) TMP_ALLOC(ctx->minfo->nfields*sizeof(fmpz));
379     for (i = 0; i < ctx->minfo->nfields; i++)
380         fmpz_init(minAfields + i);
381     mpoly_min_fields_fmpz(minAfields, A->exps, A->length, A->bits, ctx->minfo);
382 
383     /* unpack to get the min degrees of each variable in A */
384     minAdegs = (fmpz *) TMP_ALLOC(ctx->minfo->nvars*sizeof(fmpz));
385     for (i = 0; i < ctx->minfo->nvars; i++)
386         fmpz_init(minAdegs + i);
387     mpoly_get_monomial_ffmpz_unpacked_ffmpz(minAdegs, minAfields, ctx->minfo);
388 
389     /* get the degree of each variable in B */
390     minBdegs = (fmpz *) TMP_ALLOC(ctx->minfo->nvars*sizeof(fmpz));
391     for (i = 0; i < ctx->minfo->nvars; i++)
392         fmpz_init(minBdegs + i);
393     mpoly_get_monomial_ffmpz(minBdegs, B->exps, B->bits, ctx->minfo);
394 
395     /* compute the degree of each variable in G */
396     _fmpz_vec_min_inplace(minBdegs, minAdegs, ctx->minfo->nvars);
397 
398     /* compute the coefficient of G */
399     fmpz_init_set(g, B->coeffs + 0);
400     _fmpz_vec_content_chained(g, A->coeffs, A->length);
401 
402     /* write G */
403     fmpz_mpoly_fit_length(G, 1, ctx);
404     fmpz_mpoly_fit_bits(G, Gbits, ctx);
405     G->bits = Gbits;
406     mpoly_set_monomial_ffmpz(G->exps, minBdegs, Gbits, ctx->minfo);
407     fmpz_swap(G->coeffs + 0, g);
408     fmpz_clear(g);
409     _fmpz_mpoly_set_length(G, 1, ctx);
410 
411     for (i = 0; i < ctx->minfo->nfields; i++)
412     {
413         fmpz_clear(minAfields + i);
414     }
415     for (i = 0; i < ctx->minfo->nvars; i++)
416     {
417         fmpz_clear(minAdegs + i);
418         fmpz_clear(minBdegs + i);
419     }
420 
421     TMP_END;
422 
423     return 1;
424 }
425 
426 
427 /********************** See if cofactors are monomials ***********************/
_try_monomial_cofactors(fmpz_mpoly_t G,flint_bitcnt_t Gbits,const fmpz_mpoly_t A,const fmpz_mpoly_t B,const fmpz_mpoly_ctx_t ctx)428 static int _try_monomial_cofactors(
429     fmpz_mpoly_t G, flint_bitcnt_t Gbits,
430     const fmpz_mpoly_t A,
431     const fmpz_mpoly_t B,
432     const fmpz_mpoly_ctx_t ctx)
433 {
434     int success;
435     slong i, j;
436     slong NA, NG;
437     slong nvars = ctx->minfo->nvars;
438     fmpz * Abarexps, * Bbarexps, * Texps;
439     fmpz_t t1, t2;
440     fmpz_t gA, gB;
441     fmpz_mpoly_t T;
442     TMP_INIT;
443 
444     FLINT_ASSERT(A->length > 0);
445     FLINT_ASSERT(B->length > 0);
446 
447     if (A->length != B->length)
448         return 0;
449 
450     fmpz_init(t1);
451     fmpz_init(t2);
452     fmpz_init_set(gA, A->coeffs + 0);
453     fmpz_init_set(gB, B->coeffs + 0);
454 
455     for (i = A->length - 1; i > 0; i--)
456     {
457         fmpz_mul(t1, A->coeffs + 0, B->coeffs + i);
458         fmpz_mul(t2, B->coeffs + 0, A->coeffs + i);
459         success = fmpz_equal(t1, t2);
460         if (!success)
461             goto cleanup;
462 
463         fmpz_gcd(gA, gA, A->coeffs + i);
464         fmpz_gcd(gB, gB, B->coeffs + i);
465     }
466 
467     TMP_START;
468 
469     Abarexps = (fmpz *) TMP_ALLOC(3*nvars*sizeof(fmpz));
470     Bbarexps = Abarexps + 1*nvars;
471     Texps    = Abarexps + 2*nvars;
472     for (j = 0; j < nvars; j++)
473     {
474         fmpz_init(Abarexps + j);
475         fmpz_init(Bbarexps + j);
476         fmpz_init(Texps + j);
477     }
478 
479     success = mpoly_monomial_cofactors(Abarexps, Bbarexps, A->exps, A->bits,
480                                       B->exps, B->bits, A->length, ctx->minfo);
481     if (!success)
482         goto cleanup_tmp;
483 
484     /* put A's cofactor coefficient in t1 */
485     fmpz_gcd(t2, gA, gB);
486     fmpz_divexact(t1, gA, t2);
487     if (fmpz_sgn(A->coeffs + 0) < 0)
488         fmpz_neg(t1, t1);
489 
490     fmpz_mpoly_init3(T, A->length, Gbits, ctx);
491     NG = mpoly_words_per_exp(Gbits, ctx->minfo);
492     NA = mpoly_words_per_exp(A->bits, ctx->minfo);
493     T->length = A->length;
494     for (i = 0; i < A->length; i++)
495     {
496         mpoly_get_monomial_ffmpz(Texps, A->exps + NA*i, A->bits, ctx->minfo);
497         _fmpz_vec_sub(Texps, Texps, Abarexps, nvars);
498         mpoly_set_monomial_ffmpz(T->exps + NG*i, Texps, Gbits, ctx->minfo);
499         fmpz_divexact(T->coeffs + i, A->coeffs + i, t1);
500     }
501     fmpz_mpoly_swap(G, T, ctx);
502     fmpz_mpoly_clear(T, ctx);
503 
504     success = 1;
505 
506 cleanup_tmp:
507 
508     for (j = 0; j < nvars; j++)
509     {
510         fmpz_clear(Abarexps + j);
511         fmpz_clear(Bbarexps + j);
512         fmpz_clear(Texps + j);
513     }
514 
515     TMP_END;
516 
517 cleanup:
518 
519     fmpz_clear(t1);
520     fmpz_clear(t2);
521     fmpz_clear(gA);
522     fmpz_clear(gB);
523 
524     return success;
525 }
526 
527 
528 /********* Assume B has length one when converted to univar format ***********/
_try_missing_var(fmpz_mpoly_t G,flint_bitcnt_t Gbits,slong var,const fmpz_mpoly_t A,ulong Ashift,const fmpz_mpoly_t B,ulong Bshift,const fmpz_mpoly_ctx_t ctx)529 static int _try_missing_var(
530     fmpz_mpoly_t G, flint_bitcnt_t Gbits,
531     slong var,
532     const fmpz_mpoly_t A, ulong Ashift,
533     const fmpz_mpoly_t B, ulong Bshift,
534     const fmpz_mpoly_ctx_t ctx)
535 {
536     int success;
537     slong i;
538     fmpz_mpoly_t tG;
539     fmpz_mpoly_univar_t Ax;
540 
541     fmpz_mpoly_init(tG, ctx);
542     fmpz_mpoly_univar_init(Ax, ctx);
543 
544     fmpz_mpoly_to_univar(Ax, A, var, ctx);
545 
546     FLINT_ASSERT(Ax->length > 0);
547     success = _fmpz_mpoly_gcd_threaded_pool(tG, Gbits, B, Ax->coeffs + 0, ctx, NULL, 0);
548     if (!success)
549         goto cleanup;
550 
551     for (i = 1; i < Ax->length; i++)
552     {
553         success = _fmpz_mpoly_gcd_threaded_pool(tG, Gbits, tG, Ax->coeffs + i, ctx, NULL, 0);
554         if (!success)
555             goto cleanup;
556     }
557 
558     fmpz_mpoly_swap(G, tG, ctx);
559     _mpoly_gen_shift_left(G->exps, G->bits, G->length,
560                                    var, FLINT_MIN(Ashift, Bshift), ctx->minfo);
561 
562 cleanup:
563 
564     fmpz_mpoly_clear(tG, ctx);
565     fmpz_mpoly_univar_clear(Ax, ctx);
566 
567     return success;
568 }
569 
570 
571 /******************* Test if B divides A or A divides B **********************/
_try_divides(fmpz_mpoly_t G,const fmpz_mpoly_t A,int try_a,const fmpz_mpoly_t B,int try_b,const fmpz_mpoly_ctx_t ctx,const thread_pool_handle * handles,slong num_handles)572 static int _try_divides(
573     fmpz_mpoly_t G,
574     const fmpz_mpoly_t A, int try_a,
575     const fmpz_mpoly_t B, int try_b,
576     const fmpz_mpoly_ctx_t ctx,
577     const thread_pool_handle * handles,
578     slong num_handles)
579 {
580     int success;
581     fmpz_t cA, cB, cG;
582     fmpz_mpoly_t Q;
583     fmpz_mpoly_t AA, BB;
584     slong AA_alloc, BB_alloc;
585 
586     *AA = *A;
587     *BB = *B;
588     fmpz_init(cA);
589     fmpz_init(cB);
590     fmpz_init(cG);
591     fmpz_mpoly_init(Q, ctx);
592 
593     _fmpz_vec_content(cA, A->coeffs, A->length);
594     _fmpz_vec_content(cB, B->coeffs, B->length);
595     fmpz_gcd(cG, cA, cB);
596 
597     AA_alloc = 0;
598     if (!fmpz_is_one(cA))
599     {
600         AA_alloc = A->length;
601         AA->coeffs = _fmpz_vec_init(A->length);
602         _fmpz_vec_scalar_divexact_fmpz(AA->coeffs, A->coeffs, A->length, cA);
603         FLINT_ASSERT(AA_alloc > 0);
604     }
605 
606     BB_alloc = 0;
607     if (!fmpz_is_one(cB))
608     {
609         BB_alloc = B->length;
610         BB->coeffs = _fmpz_vec_init(B->length);
611         _fmpz_vec_scalar_divexact_fmpz(BB->coeffs, B->coeffs, B->length, cB);
612         FLINT_ASSERT(BB_alloc > 0);
613     }
614 
615     fmpz_divexact(cA, cA, cG);
616     fmpz_divexact(cB, cB, cG);
617 
618     if (try_b &&
619         ((num_handles > 0) ? _fmpz_mpoly_divides_heap_threaded_pool(Q, AA, BB,
620                                                      ctx, handles, num_handles)
621                            : fmpz_mpoly_divides_monagan_pearce(Q, AA, BB, ctx)))
622     {
623         fmpz_mpoly_scalar_divexact_fmpz(G, B, cB, ctx);
624         success = 1;
625         goto cleanup;
626     }
627 
628     if (try_a &&
629         ((num_handles > 0) ? _fmpz_mpoly_divides_heap_threaded_pool(Q, BB, AA,
630                                                      ctx, handles, num_handles)
631                            : fmpz_mpoly_divides_monagan_pearce(Q, BB, AA, ctx)))
632     {
633         fmpz_mpoly_scalar_divexact_fmpz(G, A, cA, ctx);
634         success = 1;
635         goto cleanup;
636     }
637 
638     success = 0;
639 
640 cleanup:
641 
642     if (AA_alloc > 0)
643         _fmpz_vec_clear(AA->coeffs, AA_alloc);
644 
645     if (BB_alloc > 0)
646         _fmpz_vec_clear(BB->coeffs, BB_alloc);
647 
648     fmpz_mpoly_clear(Q, ctx);
649     fmpz_clear(cA);
650     fmpz_clear(cB);
651     fmpz_clear(cG);
652 
653     return success;
654 }
655 
656 
657 /********************** Hit A and B with zippel ******************************/
_try_zippel(fmpz_mpoly_t G,const fmpz_mpoly_t A,const fmpz_mpoly_t B,const mpoly_gcd_info_t I,const fmpz_mpoly_ctx_t ctx)658 static int _try_zippel(
659     fmpz_mpoly_t G,
660     const fmpz_mpoly_t A,
661     const fmpz_mpoly_t B,
662     const mpoly_gcd_info_t I,
663     const fmpz_mpoly_ctx_t ctx)
664 {
665     slong i, k;
666     slong m = I->mvars;
667     int success;
668     mpoly_zipinfo_t zinfo;
669     flint_bitcnt_t wbits;
670     flint_rand_t randstate;
671     fmpz_mpoly_ctx_t uctx;
672     fmpz_mpolyu_t Au, Bu, Gu, Abaru, Bbaru;
673     fmpz_mpoly_t Ac, Bc, Gc;
674 
675     FLINT_ASSERT(A->bits <= FLINT_BITS);
676     FLINT_ASSERT(B->bits <= FLINT_BITS);
677 
678     if (!I->can_use_zippel)
679         return 0;
680 
681     FLINT_ASSERT(m >= WORD(2));
682     FLINT_ASSERT(A->length > 0);
683     FLINT_ASSERT(B->length > 0);
684 
685     flint_randinit(randstate);
686 
687     /* interpolation will continue in m variables */
688     mpoly_zipinfo_init(zinfo, m);
689 
690     /* uctx is context for Z[y_1,...,y_{m-1}]*/
691     fmpz_mpoly_ctx_init(uctx, m - 1, ORD_LEX);
692 
693     /* fill in a valid zinfo->perm and degrees */
694     for (i = 0; i < m; i++)
695     {
696         k = I->zippel_perm[i];
697         zinfo->perm[i] = k;
698         zinfo->Adegs[i] = I->Adeflate_deg[k];
699         zinfo->Bdegs[i] = I->Bdeflate_deg[k];
700         FLINT_ASSERT(I->Adeflate_deg[k] != 0);
701         FLINT_ASSERT(I->Bdeflate_deg[k] != 0);
702     }
703 
704     wbits = FLINT_MAX(A->bits, B->bits);
705 
706     fmpz_mpolyu_init(Au, wbits, uctx);
707     fmpz_mpolyu_init(Bu, wbits, uctx);
708     fmpz_mpolyu_init(Gu, wbits, uctx);
709     fmpz_mpolyu_init(Abaru, wbits, uctx);
710     fmpz_mpolyu_init(Bbaru, wbits, uctx);
711     fmpz_mpoly_init3(Ac, 0, wbits, uctx);
712     fmpz_mpoly_init3(Bc, 0, wbits, uctx);
713     fmpz_mpoly_init3(Gc, 0, wbits, uctx);
714 
715     fmpz_mpoly_to_mpolyu_perm_deflate_threaded_pool(Au, uctx, A, ctx, zinfo->perm,
716                                 I->Amin_exp, I->Gstride, I->Amax_exp, NULL, 0);
717     fmpz_mpoly_to_mpolyu_perm_deflate_threaded_pool(Bu, uctx, B, ctx, zinfo->perm,
718                                 I->Bmin_exp, I->Gstride, I->Bmax_exp, NULL, 0);
719 
720     success = fmpz_mpolyu_content_mpoly_threaded_pool(Ac, Au, uctx, NULL, 0);
721     success = success &&
722               fmpz_mpolyu_content_mpoly_threaded_pool(Bc, Bu, uctx, NULL, 0);
723     if (!success)
724         goto cleanup;
725 
726     fmpz_mpolyu_divexact_mpoly_inplace(Au, Ac, uctx);
727     fmpz_mpolyu_divexact_mpoly_inplace(Bu, Bc, uctx);
728 
729     /* after removing content, degree bounds in zinfo are still valid bounds */
730     success = fmpz_mpolyu_gcdm_zippel(Gu, Abaru, Bbaru, Au, Bu,
731                                                        uctx, zinfo, randstate);
732     if (!success)
733         goto cleanup;
734 
735     success = _fmpz_mpoly_gcd_threaded_pool(Gc, wbits, Ac, Bc, uctx, NULL, 0);
736     if (!success)
737         goto cleanup;
738 
739     fmpz_mpolyu_mul_mpoly_inplace(Gu, Gc, uctx);
740 
741     fmpz_mpoly_from_mpolyu_perm_inflate(G, I->Gbits, ctx, Gu, uctx,
742                                          zinfo->perm, I->Gmin_exp, I->Gstride);
743     success = 1;
744 
745 cleanup:
746 
747     fmpz_mpolyu_clear(Au, uctx);
748     fmpz_mpolyu_clear(Bu, uctx);
749     fmpz_mpolyu_clear(Gu, uctx);
750     fmpz_mpolyu_clear(Abaru, uctx);
751     fmpz_mpolyu_clear(Bbaru, uctx);
752     fmpz_mpoly_clear(Ac, uctx);
753     fmpz_mpoly_clear(Bc, uctx);
754     fmpz_mpoly_clear(Gc, uctx);
755 
756     fmpz_mpoly_ctx_clear(uctx);
757 
758     mpoly_zipinfo_clear(zinfo);
759 
760     flint_randclear(randstate);
761 
762     return success;
763 }
764 
765 
766 /************************ Hit A and B with bma *******************************/
767 typedef struct
768 {
769     const fmpz_mpoly_struct * P;
770     fmpz_mpoly_struct * Pcontent;
771     fmpz_mpolyu_struct * Puu;
772     const slong * perm;
773     const ulong * shift, * stride, * maxexps;
774     const fmpz_mpoly_ctx_struct * ctx;
775     const fmpz_mpoly_ctx_struct * uctx;
776     const thread_pool_handle * handles;
777     slong num_handles;
778     int success;
779 }
780 _convertuu_arg_struct;
781 
782 typedef _convertuu_arg_struct _convertuu_arg_t[1];
783 
_worker_convertuu(void * varg)784 static void _worker_convertuu(void * varg)
785 {
786     _convertuu_arg_struct * arg = (_convertuu_arg_struct *) varg;
787 
788     fmpz_mpoly_to_mpolyuu_perm_deflate_threaded_pool(arg->Puu, arg->uctx, arg->P, arg->ctx,
789                              arg->perm, arg->shift, arg->stride, arg->maxexps,
790                                                arg->handles, arg->num_handles);
791 
792     arg->success = fmpz_mpolyu_content_mpoly_threaded_pool(arg->Pcontent,
793                           arg->Puu, arg->uctx, arg->handles, arg->num_handles);
794     if (arg->success)
795     {
796         fmpz_mpolyu_divexact_mpoly_inplace(arg->Puu, arg->Pcontent, arg->uctx);
797     }
798 }
799 
_try_bma(fmpz_mpoly_t G,const fmpz_mpoly_t A,const fmpz_mpoly_t B,const mpoly_gcd_info_t I,const fmpz_mpoly_ctx_t ctx,const thread_pool_handle * handles,slong num_handles)800 static int _try_bma(
801     fmpz_mpoly_t G,
802     const fmpz_mpoly_t A,
803     const fmpz_mpoly_t B,
804     const mpoly_gcd_info_t I,
805     const fmpz_mpoly_ctx_t ctx,
806     const thread_pool_handle * handles,
807     slong num_handles)
808 {
809     slong i, k;
810     slong m = I->mvars;
811     int success;
812     flint_bitcnt_t wbits;
813     fmpz_mpoly_ctx_t uctx;
814     fmpz_mpolyu_t Auu, Buu, Guu, Abaruu, Bbaruu;
815     fmpz_mpoly_t Ac, Bc, Gc, Gamma;
816     slong max_minor_degree;
817 
818     FLINT_ASSERT(A->bits <= FLINT_BITS);
819     FLINT_ASSERT(B->bits <= FLINT_BITS);
820     FLINT_ASSERT(A->length > 0);
821     FLINT_ASSERT(B->length > 0);
822 
823     if (!I->can_use_bma)
824         return 0;
825 
826     FLINT_ASSERT(m >= WORD(3));
827 
828     /* uctx is context for Z[y_2,...,y_{m - 1}]*/
829     fmpz_mpoly_ctx_init(uctx, m - 2, ORD_LEX);
830 
831     max_minor_degree = 0;
832     for (i = 2; i < m; i++)
833     {
834         k = I->bma_perm[i];
835         max_minor_degree = FLINT_MAX(max_minor_degree, I->Adeflate_deg[k]);
836         max_minor_degree = FLINT_MAX(max_minor_degree, I->Bdeflate_deg[k]);
837     }
838 
839     wbits = 1 + FLINT_BIT_COUNT(max_minor_degree);
840     wbits = FLINT_MAX(MPOLY_MIN_BITS, wbits);
841     wbits = mpoly_fix_bits(wbits, uctx->minfo);
842     FLINT_ASSERT(wbits <= FLINT_BITS);
843 
844     fmpz_mpolyu_init(Auu, wbits, uctx);
845     fmpz_mpolyu_init(Buu, wbits, uctx);
846     fmpz_mpolyu_init(Guu, wbits, uctx);
847     fmpz_mpolyu_init(Abaruu, wbits, uctx);
848     fmpz_mpolyu_init(Bbaruu, wbits, uctx);
849     fmpz_mpoly_init3(Ac, 0, wbits, uctx);
850     fmpz_mpoly_init3(Bc, 0, wbits, uctx);
851     fmpz_mpoly_init3(Gc, 0, wbits, uctx);
852     fmpz_mpoly_init3(Gamma, 0, wbits, uctx);
853 
854     /* convert to bivariate format and remove content from A and B */
855     if (num_handles > 0)
856     {
857         slong s = mpoly_divide_threads(num_handles, A->length, B->length);
858         _convertuu_arg_t arg;
859 
860         FLINT_ASSERT(s >= 0);
861         FLINT_ASSERT(s < num_handles);
862 
863         arg->ctx = ctx;
864         arg->uctx = uctx;
865         arg->P = B;
866         arg->Puu = Buu;
867         arg->Pcontent = Bc;
868         arg->perm = I->bma_perm;
869         arg->shift = I->Bmin_exp;
870         arg->stride = I->Gstride;
871         arg->maxexps = I->Bmax_exp;
872         arg->handles = handles + (s + 1);
873         arg->num_handles = num_handles - (s + 1);
874 
875         thread_pool_wake(global_thread_pool, handles[s], 0, _worker_convertuu, arg);
876 
877         fmpz_mpoly_to_mpolyuu_perm_deflate_threaded_pool(Auu, uctx, A, ctx,
878                           I->bma_perm, I->Amin_exp, I->Gstride, I->Amax_exp,
879                                                                handles + 0, s);
880         success = fmpz_mpolyu_content_mpoly_threaded_pool(Ac, Auu,
881                                                          uctx, handles + 0, s);
882         if (success)
883         {
884             fmpz_mpolyu_divexact_mpoly_inplace(Auu, Ac, uctx);
885         }
886 
887         thread_pool_wait(global_thread_pool, handles[s]);
888 
889         success = success && arg->success;
890         if (!success)
891             goto cleanup;
892     }
893     else
894     {
895         fmpz_mpoly_to_mpolyuu_perm_deflate_threaded_pool(Auu, uctx, A, ctx,
896                    I->bma_perm, I->Amin_exp, I->Gstride, I->Amax_exp, NULL, 0);
897         fmpz_mpoly_to_mpolyuu_perm_deflate_threaded_pool(Buu, uctx, B, ctx,
898                    I->bma_perm, I->Bmin_exp, I->Gstride, I->Bmax_exp, NULL, 0);
899 
900         success = fmpz_mpolyu_content_mpoly_threaded_pool(Ac, Auu, uctx, NULL, 0);
901         success = success &&
902                   fmpz_mpolyu_content_mpoly_threaded_pool(Bc, Buu, uctx, NULL, 0);
903         if (!success)
904             goto cleanup;
905 
906         fmpz_mpolyu_divexact_mpoly_inplace(Auu, Ac, uctx);
907         fmpz_mpolyu_divexact_mpoly_inplace(Buu, Bc, uctx);
908     }
909 
910     FLINT_ASSERT(Auu->bits == wbits);
911     FLINT_ASSERT(Buu->bits == wbits);
912     FLINT_ASSERT(Auu->length > 1);
913     FLINT_ASSERT(Buu->length > 1);
914     FLINT_ASSERT(Ac->bits == wbits);
915     FLINT_ASSERT(Bc->bits == wbits);
916 
917     _fmpz_mpoly_gcd_threaded_pool(Gamma, wbits, Auu->coeffs + 0, Buu->coeffs + 0,
918                                                    uctx, handles, num_handles);
919     if (!success)
920         goto cleanup;
921 
922     success = (num_handles > 0)
923            ? fmpz_mpolyuu_gcd_berlekamp_massey_threaded_pool(Guu, Abaruu, Bbaruu,
924                                   Auu, Buu, Gamma, uctx, handles, num_handles)
925            : fmpz_mpolyuu_gcd_berlekamp_massey(Guu, Abaruu, Bbaruu,
926                                                         Auu, Buu, Gamma, uctx);
927     if (!success)
928         goto cleanup;
929 
930     success = _fmpz_mpoly_gcd_threaded_pool(Gc, wbits, Ac, Bc, uctx, handles, num_handles);
931     if (!success)
932         goto cleanup;
933 
934     fmpz_mpolyu_mul_mpoly_inplace(Guu, Gc, uctx);
935 
936     fmpz_mpoly_from_mpolyuu_perm_inflate(G, I->Gbits, ctx, Guu, uctx,
937                                          I->bma_perm, I->Gmin_exp, I->Gstride);
938     success = 1;
939 
940 cleanup:
941 
942     fmpz_mpolyu_clear(Auu, uctx);
943     fmpz_mpolyu_clear(Buu, uctx);
944     fmpz_mpolyu_clear(Guu, uctx);
945     fmpz_mpolyu_clear(Abaruu, uctx);
946     fmpz_mpolyu_clear(Bbaruu, uctx);
947     fmpz_mpoly_clear(Ac, uctx);
948     fmpz_mpoly_clear(Bc, uctx);
949     fmpz_mpoly_clear(Gc, uctx);
950     fmpz_mpoly_clear(Gamma, uctx);
951 
952     fmpz_mpoly_ctx_clear(uctx);
953 
954     return success;
955 }
956 
957 
958 /*********************** Hit A and B with brown ******************************/
959 typedef struct
960 {
961     fmpz_mpoly_struct * Pl;
962     const fmpz_mpoly_ctx_struct * lctx;
963     const fmpz_mpoly_struct * P;
964     const fmpz_mpoly_ctx_struct * ctx;
965     const slong * perm;
966     const ulong * shift, * stride, * maxexps;
967     const thread_pool_handle * handles;
968     slong num_handles;
969 }
970 _convertl_arg_struct;
971 
972 typedef _convertl_arg_struct _convertl_arg_t[1];
973 
_worker_convertu(void * varg)974 static void _worker_convertu(void * varg)
975 {
976     _convertl_arg_struct * arg = (_convertl_arg_struct *) varg;
977 
978     fmpz_mpoly_to_mpoly_perm_deflate_threaded_pool(arg->Pl, arg->lctx, arg->P, arg->ctx,
979                                          arg->perm, arg->shift, arg->stride,
980                                                arg->handles, arg->num_handles);
981 }
982 
_try_brown(fmpz_mpoly_t G,const fmpz_mpoly_t A,const fmpz_mpoly_t B,mpoly_gcd_info_t I,const fmpz_mpoly_ctx_t ctx,const thread_pool_handle * handles,slong num_handles)983 static int _try_brown(
984     fmpz_mpoly_t G,
985     const fmpz_mpoly_t A,
986     const fmpz_mpoly_t B,
987     mpoly_gcd_info_t I,
988     const fmpz_mpoly_ctx_t ctx,
989     const thread_pool_handle * handles,
990     slong num_handles)
991 {
992     int success;
993     slong m = I->mvars;
994     flint_bitcnt_t wbits;
995     fmpz_mpoly_ctx_t lctx;
996     fmpz_mpoly_t Al, Bl, Gl, Abarl, Bbarl;
997 
998     if (!I->can_use_brown)
999         return 0;
1000 
1001     FLINT_ASSERT(m >= 2);
1002     FLINT_ASSERT(A->bits <= FLINT_BITS);
1003     FLINT_ASSERT(B->bits <= FLINT_BITS);
1004     FLINT_ASSERT(A->length > 0);
1005     FLINT_ASSERT(B->length > 0);
1006 
1007     wbits = FLINT_MAX(A->bits, B->bits);
1008 
1009     fmpz_mpoly_ctx_init(lctx, m, ORD_LEX);
1010     fmpz_mpoly_init3(Al, 0, wbits, lctx);
1011     fmpz_mpoly_init3(Bl, 0, wbits, lctx);
1012     fmpz_mpoly_init3(Gl, 0, wbits, lctx);
1013     fmpz_mpoly_init3(Abarl, 0, wbits, lctx);
1014     fmpz_mpoly_init3(Bbarl, 0, wbits, lctx);
1015 
1016     /* convert to univariate format */
1017     if (num_handles > 0)
1018     {
1019         slong s = mpoly_divide_threads(num_handles, A->length, B->length);
1020         _convertl_arg_t arg;
1021 
1022         FLINT_ASSERT(s >= 0);
1023         FLINT_ASSERT(s < num_handles);
1024 
1025         arg->Pl = Bl;
1026         arg->lctx = lctx;
1027         arg->P = B;
1028         arg->ctx = ctx;
1029         arg->perm = I->brown_perm;
1030         arg->shift = I->Bmin_exp;
1031         arg->stride = I->Gstride;
1032         arg->maxexps = I->Bmax_exp;
1033         arg->handles = handles + (s + 1);
1034         arg->num_handles = num_handles - (s + 1);
1035 
1036         thread_pool_wake(global_thread_pool, handles[s], 0, _worker_convertu, arg);
1037 
1038         fmpz_mpoly_to_mpoly_perm_deflate_threaded_pool(Al, lctx, A, ctx,
1039                                     I->brown_perm, I->Amin_exp, I->Gstride,
1040                                                                handles + 0, s);
1041 
1042         thread_pool_wait(global_thread_pool, handles[s]);
1043     }
1044     else
1045     {
1046         fmpz_mpoly_to_mpoly_perm_deflate_threaded_pool(Al, lctx, A, ctx,
1047                               I->brown_perm, I->Amin_exp, I->Gstride, NULL, 0);
1048         fmpz_mpoly_to_mpoly_perm_deflate_threaded_pool(Bl, lctx, B, ctx,
1049                               I->brown_perm, I->Bmin_exp, I->Gstride, NULL, 0);
1050     }
1051 
1052     FLINT_ASSERT(Al->bits == wbits);
1053     FLINT_ASSERT(Bl->bits == wbits);
1054     FLINT_ASSERT(Al->length > 1);
1055     FLINT_ASSERT(Bl->length > 1);
1056 
1057     success = (num_handles > 0)
1058            ? fmpz_mpolyl_gcd_brown_threaded_pool(Gl, Abarl, Bbarl, Al, Bl, lctx, I,
1059                                                          handles, num_handles)
1060            : fmpz_mpolyl_gcd_brown(Gl, Abarl, Bbarl, Al, Bl, lctx, I);
1061 
1062     if (!success)
1063         goto cleanup;
1064 
1065     fmpz_mpoly_from_mpoly_perm_inflate(G, I->Gbits, ctx, Gl, lctx,
1066                                        I->brown_perm, I->Gmin_exp, I->Gstride);
1067     success = 1;
1068 
1069 cleanup:
1070 
1071     fmpz_mpoly_clear(Al, lctx);
1072     fmpz_mpoly_clear(Bl, lctx);
1073     fmpz_mpoly_clear(Gl, lctx);
1074     fmpz_mpoly_clear(Abarl, lctx);
1075     fmpz_mpoly_clear(Bbarl, lctx);
1076     fmpz_mpoly_ctx_clear(lctx);
1077 
1078     return success;
1079 }
1080 
1081 
1082 /*
1083     The function must pack a successful answer into bits = Gbits <= FLINT_BITS.
1084     Both A and B have to be packed into bits <= FLINT_BITS.
1085     return is 1 for success, 0 for failure.
1086 */
_fmpz_mpoly_gcd_threaded_pool(fmpz_mpoly_t G,flint_bitcnt_t Gbits,const fmpz_mpoly_t A,const fmpz_mpoly_t B,const fmpz_mpoly_ctx_t ctx,const thread_pool_handle * handles,slong num_handles)1087 int _fmpz_mpoly_gcd_threaded_pool(
1088     fmpz_mpoly_t G, flint_bitcnt_t Gbits,
1089     const fmpz_mpoly_t A,
1090     const fmpz_mpoly_t B,
1091     const fmpz_mpoly_ctx_t ctx,
1092     const thread_pool_handle * handles,
1093     slong num_handles)
1094 {
1095     int success;
1096     slong v_in_both;
1097     slong v_in_either;
1098     slong v_in_A_only;
1099     slong v_in_B_only;
1100     slong j;
1101     slong nvars = ctx->minfo->nvars;
1102     mpoly_gcd_info_t I;
1103 
1104     FLINT_ASSERT(A->length > 0);
1105     FLINT_ASSERT(B->length > 0);
1106     FLINT_ASSERT(Gbits <= FLINT_BITS);
1107     FLINT_ASSERT(A->bits <= FLINT_BITS);
1108     FLINT_ASSERT(B->bits <= FLINT_BITS);
1109 
1110     if (A->length == 1)
1111     {
1112         return _try_monomial_gcd(G, Gbits, B, A, ctx);
1113     }
1114     else if (B->length == 1)
1115     {
1116         return _try_monomial_gcd(G, Gbits, A, B, ctx);
1117     }
1118 
1119     mpoly_gcd_info_init(I, nvars);
1120 
1121     /* entries of I are all now invalid */
1122 
1123     I->Gbits = Gbits;
1124 
1125     mpoly_gcd_info_limits(I->Amax_exp, I->Amin_exp, I->Alead_count,
1126                       I->Atail_count, A->exps, A->bits, A->length, ctx->minfo);
1127     mpoly_gcd_info_limits(I->Bmax_exp, I->Bmin_exp, I->Blead_count,
1128                       I->Btail_count, B->exps, B->bits, B->length, ctx->minfo);
1129 
1130     /* set ess(p) := p/term_content(p) */
1131 
1132     /* check if the cofactors could be monomials, i.e. ess(A) == ess(B) */
1133     for (j = 0; j < nvars; j++)
1134     {
1135         if (I->Amax_exp[j] - I->Amin_exp[j] != I->Bmax_exp[j] - I->Bmin_exp[j])
1136             goto skip_monomial_cofactors;
1137     }
1138     if (_try_monomial_cofactors(G, I->Gbits, A, B, ctx))
1139     {
1140         goto successful;
1141     }
1142 
1143 skip_monomial_cofactors:
1144 
1145     mpoly_gcd_info_stride(I->Gstride,
1146             A->exps, A->bits, A->length, I->Amax_exp, I->Amin_exp,
1147             B->exps, B->bits, B->length, I->Bmax_exp, I->Bmin_exp, ctx->minfo);
1148 
1149     for (j = 0; j < nvars; j++)
1150     {
1151         ulong t = I->Gstride[j];
1152 
1153         if (t == 0)
1154         {
1155             FLINT_ASSERT(  I->Amax_exp[j] == I->Amin_exp[j]
1156                         || I->Bmax_exp[j] == I->Bmin_exp[j]);
1157         }
1158         else
1159         {
1160             FLINT_ASSERT((I->Amax_exp[j] - I->Amin_exp[j]) % t == 0);
1161             FLINT_ASSERT((I->Bmax_exp[j] - I->Bmin_exp[j]) % t == 0);
1162         }
1163 
1164         I->Adeflate_deg[j] = t == 0 ? 0 : (I->Amax_exp[j] - I->Amin_exp[j])/t;
1165         I->Bdeflate_deg[j] = t == 0 ? 0 : (I->Bmax_exp[j] - I->Bmin_exp[j])/t;
1166         I->Gmin_exp[j] = FLINT_MIN(I->Amin_exp[j], I->Bmin_exp[j]);
1167     }
1168 
1169     /*
1170         The following are now valid:
1171             I->Amax_exp, I->Amin_exp, I->Alead_count, I->Atail_count,
1172             I->Bmax_exp, I->Bmin_exp, I->Blead_count, I->Btail_count,
1173             I->Gstride
1174             I->Adeflate_deg
1175             I->Bdeflate_deg
1176             I->Gmin_exp
1177     */
1178 
1179     /* check if ess(A) and ess(B) have a variable v_in_both in common */
1180     v_in_both = -WORD(1);
1181     for (j = 0; j < nvars; j++)
1182     {
1183         if (I->Amax_exp[j] > I->Amin_exp[j] && I->Bmax_exp[j] > I->Bmin_exp[j])
1184         {
1185             v_in_both = j;
1186             break;
1187         }
1188     }
1189     if (v_in_both == -WORD(1))
1190     {
1191         /*
1192             The variables in ess(A) and ess(B) are disjoint.
1193             gcd is trivial to compute.
1194         */
1195         fmpz_t gA, gB;
1196 
1197 calculate_trivial_gcd:
1198 
1199         fmpz_init(gA);
1200         fmpz_init(gB);
1201         _fmpz_vec_content(gA, A->coeffs, A->length);
1202         _fmpz_vec_content(gB, B->coeffs, B->length);
1203 
1204         fmpz_mpoly_fit_length(G, 1, ctx);
1205         fmpz_mpoly_fit_bits(G, Gbits, ctx);
1206         G->bits = Gbits;
1207         mpoly_set_monomial_ui(G->exps, I->Gmin_exp, Gbits, ctx->minfo);
1208         fmpz_gcd(G->coeffs + 0, gA, gB);
1209         _fmpz_mpoly_set_length(G, 1, ctx);
1210 
1211         fmpz_clear(gA);
1212         fmpz_clear(gB);
1213 
1214         goto successful;
1215     }
1216 
1217     /* check if ess(A) and ess(B) depend on another variable v_in_either */
1218     FLINT_ASSERT(0 <= v_in_both);
1219     FLINT_ASSERT(v_in_both < nvars);
1220 
1221     v_in_either = -WORD(1);
1222     for (j = 0; j < nvars; j++)
1223     {
1224         if (j == v_in_both)
1225             continue;
1226 
1227         if (I->Amax_exp[j] > I->Amin_exp[j] || I->Bmax_exp[j] > I->Bmin_exp[j])
1228         {
1229             v_in_either = j;
1230             break;
1231         }
1232     }
1233 
1234     if (v_in_either == -WORD(1))
1235     {
1236         /*
1237             The ess(A) and ess(B) depend on only one variable v_in_both
1238             Calculate gcd using univariates
1239         */
1240         fmpz_poly_t a, b, g;
1241 
1242         fmpz_poly_init(a);
1243         fmpz_poly_init(b);
1244         fmpz_poly_init(g);
1245         _fmpz_mpoly_to_fmpz_poly_deflate(a, A, v_in_both,
1246                                                  I->Amin_exp, I->Gstride, ctx);
1247         _fmpz_mpoly_to_fmpz_poly_deflate(b, B, v_in_both,
1248                                                  I->Bmin_exp, I->Gstride, ctx);
1249         fmpz_poly_gcd(g, a, b);
1250         _fmpz_mpoly_from_fmpz_poly_inflate(G, Gbits, g, v_in_both,
1251                                                  I->Gmin_exp, I->Gstride, ctx);
1252         fmpz_poly_clear(a);
1253         fmpz_poly_clear(b);
1254         fmpz_poly_clear(g);
1255 
1256         goto successful;
1257     }
1258 
1259     /* check if there is a variable in ess(A) that is not in ess(B) */
1260     v_in_A_only = -WORD(1);
1261     v_in_B_only = -WORD(1);
1262     for (j = 0; j < nvars; j++)
1263     {
1264         if (I->Amax_exp[j] > I->Amin_exp[j] && I->Bmax_exp[j] == I->Bmin_exp[j])
1265         {
1266             v_in_A_only = j;
1267             break;
1268         }
1269         if (I->Bmax_exp[j] > I->Bmin_exp[j] && I->Amax_exp[j] == I->Amin_exp[j])
1270         {
1271             v_in_B_only = j;
1272             break;
1273         }
1274     }
1275     if (v_in_A_only != -WORD(1))
1276     {
1277         success = _try_missing_var(G, I->Gbits,
1278                                    v_in_A_only,
1279                                    A, I->Amin_exp[v_in_A_only],
1280                                    B, I->Bmin_exp[v_in_A_only],
1281                                    ctx);
1282         goto cleanup;
1283     }
1284     if (v_in_B_only != -WORD(1))
1285     {
1286         success = _try_missing_var(G, I->Gbits,
1287                                    v_in_B_only,
1288                                    B, I->Bmin_exp[v_in_B_only],
1289                                    A, I->Amin_exp[v_in_B_only],
1290                                    ctx);
1291         goto cleanup;
1292     }
1293 
1294     /*
1295         all variable are now either
1296             missing from both ess(A) and ess(B), or
1297             present in both ess(A) and ess(B)
1298         and there are at least two in the latter case
1299     */
1300 
1301     mpoly_gcd_info_set_estimates_fmpz_mpoly(I, A, B, ctx, handles, num_handles);
1302     mpoly_gcd_info_set_perm(I, A->length, B->length, ctx->minfo);
1303 
1304     /* everything in I is valid now */
1305 
1306     /* check divisibility A/B and B/A */
1307     {
1308         int gcd_is_trivial = 1;
1309         int try_a = I->Gdeflate_deg_bounds_are_nice;
1310         int try_b = I->Gdeflate_deg_bounds_are_nice;
1311         for (j = 0; j < nvars; j++)
1312         {
1313             if (I->Gdeflate_deg_bound[j] != 0)
1314             {
1315                 gcd_is_trivial = 0;
1316             }
1317 
1318             if (I->Adeflate_deg[j] != I->Gdeflate_deg_bound[j]
1319                 || I->Amin_exp[j] > I->Bmin_exp[j])
1320             {
1321                 try_a = 0;
1322             }
1323 
1324             if (I->Bdeflate_deg[j] != I->Gdeflate_deg_bound[j]
1325                 || I->Bmin_exp[j] > I->Amin_exp[j])
1326             {
1327                 try_b = 0;
1328             }
1329         }
1330 
1331         if (gcd_is_trivial)
1332             goto calculate_trivial_gcd;
1333 
1334         if ((try_a || try_b) &&
1335             _try_divides(G, A, try_a, B, try_b, ctx, handles, num_handles))
1336         {
1337             goto successful;
1338         }
1339     }
1340 
1341     mpoly_gcd_info_measure_brown(I, A->length, B->length, ctx->minfo);
1342     mpoly_gcd_info_measure_bma(I, A->length, B->length, ctx->minfo);
1343 
1344     if (I->mvars == 2)
1345     {
1346         /* TODO: bivariate heuristic here */
1347 
1348         if (_try_brown(G, A, B, I, ctx, handles, num_handles))
1349             goto successful;
1350     }
1351     else if (I->can_use_brown && I->can_use_bma
1352             && I->bma_time_est < I->brown_time_est
1353             && (I->mvars*(I->Adensity + I->Bdensity) < 1
1354                 || I->bma_time_est < 0.01*I->brown_time_est))
1355     {
1356         if (_try_bma(G, A, B, I, ctx, handles, num_handles))
1357             goto successful;
1358 
1359         if (_try_brown(G, A, B, I, ctx, handles, num_handles))
1360             goto successful;
1361     }
1362     else
1363     {
1364         if (_try_brown(G, A, B, I, ctx, handles, num_handles))
1365             goto successful;
1366 
1367         if (_try_bma(G, A, B, I, ctx, handles, num_handles))
1368             goto successful;
1369     }
1370 
1371     mpoly_gcd_info_measure_zippel(I, A->length, B->length, ctx->minfo);
1372 
1373     if (_try_zippel(G, A, B, I, ctx))
1374         goto successful;
1375 
1376     success = 0;
1377     goto cleanup;
1378 
1379 successful:
1380 
1381     success = 1;
1382 
1383 cleanup:
1384 
1385     mpoly_gcd_info_clear(I);
1386 
1387     if (success)
1388     {
1389         fmpz_mpoly_repack_bits_inplace(G, Gbits, ctx);
1390         FLINT_ASSERT(G->length > 0);
1391         if (fmpz_sgn(G->coeffs + 0) < 0)
1392             fmpz_mpoly_neg(G, G, ctx);
1393     }
1394 
1395     return success;
1396 }
1397 
1398 
fmpz_mpoly_gcd(fmpz_mpoly_t G,const fmpz_mpoly_t A,const fmpz_mpoly_t B,const fmpz_mpoly_ctx_t ctx)1399 int fmpz_mpoly_gcd(
1400     fmpz_mpoly_t G,
1401     const fmpz_mpoly_t A,
1402     const fmpz_mpoly_t B,
1403     const fmpz_mpoly_ctx_t ctx)
1404 {
1405     flint_bitcnt_t Gbits;
1406     int success;
1407     thread_pool_handle * handles;
1408     slong num_handles;
1409     slong thread_limit;
1410 
1411     thread_limit = FLINT_MIN(A->length, B->length)/256;
1412 
1413     if (fmpz_mpoly_is_zero(A, ctx))
1414     {
1415         if (B->length == 0)
1416         {
1417             fmpz_mpoly_zero(G, ctx);
1418             return 1;
1419         }
1420         if (fmpz_sgn(B->coeffs + 0) < 0)
1421         {
1422             fmpz_mpoly_neg(G, B, ctx);
1423             return 1;
1424         }
1425         else
1426         {
1427             fmpz_mpoly_set(G, B, ctx);
1428             return 1;
1429         }
1430     }
1431 
1432     if (fmpz_mpoly_is_zero(B, ctx))
1433     {
1434         if (fmpz_sgn(A->coeffs + 0) < 0)
1435         {
1436             fmpz_mpoly_neg(G, A, ctx);
1437             return 1;
1438         }
1439         else
1440         {
1441             fmpz_mpoly_set(G, A, ctx);
1442             return 1;
1443         }
1444     }
1445 
1446     if (fmpz_mpoly_is_fmpz(A, ctx))
1447     {
1448         fmpz_t t;
1449         fmpz_init_set(t, A->coeffs);
1450         _fmpz_vec_content_chained(t, B->coeffs, B->length);
1451         fmpz_mpoly_set_fmpz(G, t, ctx);
1452         fmpz_clear(t);
1453         return 1;
1454     }
1455 
1456     if (fmpz_mpoly_is_fmpz(B, ctx))
1457     {
1458         fmpz_t t;
1459         fmpz_init_set(t, B->coeffs);
1460         _fmpz_vec_content_chained(t, A->coeffs, A->length);
1461         fmpz_mpoly_set_fmpz(G, t, ctx);
1462         fmpz_clear(t);
1463         return 1;
1464     }
1465 
1466     Gbits = FLINT_MIN(A->bits, B->bits);
1467 
1468     if (A->length == 1)
1469     {
1470         return _try_monomial_gcd(G, Gbits, B, A, ctx);
1471     }
1472     else if (B->length == 1)
1473     {
1474         return _try_monomial_gcd(G, Gbits, A, B, ctx);
1475     }
1476 
1477     if (A->bits <= FLINT_BITS && B->bits <= FLINT_BITS)
1478     {
1479         num_handles = flint_request_threads(&handles, thread_limit);
1480         success = _fmpz_mpoly_gcd_threaded_pool(G, Gbits, A, B, ctx, handles, num_handles);
1481         flint_give_back_threads(handles, num_handles);
1482         return success;
1483     }
1484 
1485     if (_try_monomial_cofactors(G, Gbits, A, B, ctx))
1486     {
1487         return 1;
1488     }
1489     else
1490     {
1491         /*
1492             The gcd calculation is unusual.
1493             First see if both inputs fit into FLINT_BITS.
1494             Then, try deflation as a last resort.
1495         */
1496 
1497         slong k;
1498         fmpz * Ashift, * Astride;
1499         fmpz * Bshift, * Bstride;
1500         fmpz * Gshift, * Gstride;
1501         fmpz_mpoly_t Anew, Bnew;
1502         const fmpz_mpoly_struct * Ause, * Buse;
1503 
1504         fmpz_mpoly_init(Anew, ctx);
1505         fmpz_mpoly_init(Bnew, ctx);
1506 
1507         Ause = A;
1508         if (A->bits > FLINT_BITS)
1509         {
1510             if (!fmpz_mpoly_repack_bits(Anew, A, FLINT_BITS, ctx))
1511                 goto could_not_repack;
1512             Ause = Anew;
1513         }
1514 
1515         Buse = B;
1516         if (B->bits > FLINT_BITS)
1517         {
1518             if (!fmpz_mpoly_repack_bits(Bnew, B, FLINT_BITS, ctx))
1519                 goto could_not_repack;
1520             Buse = Bnew;
1521         }
1522 
1523         num_handles = flint_request_threads(&handles, thread_limit);
1524         Gbits = FLINT_MIN(Ause->bits, Buse->bits);
1525         success = _fmpz_mpoly_gcd_threaded_pool(G, Gbits, Ause, Buse, ctx,
1526                                                          handles, num_handles);
1527         flint_give_back_threads(handles, num_handles);
1528 
1529         goto cleanup;
1530 
1531 could_not_repack:
1532 
1533         /*
1534             One of A or B could not be repacked into FLINT_BITS. See if
1535             they both fit into FLINT_BITS after deflation.
1536         */
1537 
1538         Ashift  = _fmpz_vec_init(ctx->minfo->nvars);
1539         Astride = _fmpz_vec_init(ctx->minfo->nvars);
1540         Bshift  = _fmpz_vec_init(ctx->minfo->nvars);
1541         Bstride = _fmpz_vec_init(ctx->minfo->nvars);
1542         Gshift  = _fmpz_vec_init(ctx->minfo->nvars);
1543         Gstride = _fmpz_vec_init(ctx->minfo->nvars);
1544 
1545         fmpz_mpoly_deflation(Ashift, Astride, A, ctx);
1546         fmpz_mpoly_deflation(Bshift, Bstride, B, ctx);
1547         _fmpz_vec_min(Gshift, Ashift, Bshift, ctx->minfo->nvars);
1548         for (k = 0; k < ctx->minfo->nvars; k++)
1549         {
1550             fmpz_gcd(Gstride + k, Astride + k, Bstride + k);
1551         }
1552 
1553         success = 0;
1554 
1555         fmpz_mpoly_deflate(Anew, A, Ashift, Gstride, ctx);
1556         if (Anew->bits > FLINT_BITS)
1557         {
1558             if (!fmpz_mpoly_repack_bits(Anew, Anew, FLINT_BITS, ctx))
1559                 goto deflate_cleanup;
1560         }
1561 
1562         fmpz_mpoly_deflate(Bnew, B, Bshift, Gstride, ctx);
1563         if (Bnew->bits > FLINT_BITS)
1564         {
1565             if (!fmpz_mpoly_repack_bits(Bnew, Bnew, FLINT_BITS, ctx))
1566                 goto deflate_cleanup;
1567         }
1568 
1569         num_handles = flint_request_threads(&handles, thread_limit);
1570         Gbits = FLINT_MIN(Anew->bits, Bnew->bits);
1571         success = _fmpz_mpoly_gcd_threaded_pool(G, Gbits, Anew, Bnew, ctx,
1572                                                          handles, num_handles);
1573         flint_give_back_threads(handles, num_handles);
1574 
1575         if (success)
1576         {
1577             fmpz_mpoly_inflate(G, G, Gshift, Gstride, ctx);
1578 
1579             /* inflation may have changed the lc */
1580             FLINT_ASSERT(G->length > 0);
1581             if (fmpz_sgn(G->coeffs + 0) < 0)
1582             {
1583                 fmpz_mpoly_neg(G, G, ctx);
1584             }
1585         }
1586 
1587 deflate_cleanup:
1588 
1589         _fmpz_vec_clear(Ashift, ctx->minfo->nvars);
1590         _fmpz_vec_clear(Astride, ctx->minfo->nvars);
1591         _fmpz_vec_clear(Bshift, ctx->minfo->nvars);
1592         _fmpz_vec_clear(Bstride, ctx->minfo->nvars);
1593         _fmpz_vec_clear(Gshift, ctx->minfo->nvars);
1594         _fmpz_vec_clear(Gstride, ctx->minfo->nvars);
1595 
1596 cleanup:
1597 
1598         fmpz_mpoly_clear(Anew, ctx);
1599         fmpz_mpoly_clear(Bnew, ctx);
1600 
1601         return success;
1602     }
1603 }
1604 
1605