1 /*
2     Copyright (C) 2018, 2019 Daniel Schultz
3 
4     This file is part of FLINT.
5 
6     FLINT is free software: you can redistribute it and/or modify it under
7     the terms of the GNU Lesser General Public License (LGPL) as published
8     by the Free Software Foundation; either version 2.1 of the License, or
9     (at your option) any later version.  See <http://www.gnu.org/licenses/>.
10 */
11 
12 #include "nmod_mpoly.h"
13 
14 /*
15     For each j, set out[j] to the evaluation of A at x_i = alpha[i] (i != j)
16     i.e. if nvars = 3
17         out[0] = A(x, alpha[1], alpha[2])
18         out[1] = A(alpha[0], x, alpha[2])
19         out[2] = A(alpha[0], alpha[1], x)
20 
21     If ignore[j] is nonzero, then out[j] need not be calculated, probably
22     because we shouldn't calculate it in dense form.
23 */
nmod_mpoly_evals(nmod_poly_struct * out,const int * ignore,const nmod_mpoly_t A,ulong * Amin_exp,ulong * Amax_exp,ulong * Astride,mp_limb_t * alpha,const nmod_mpoly_ctx_t ctx,const thread_pool_handle * handles,slong num_handles)24 void nmod_mpoly_evals(
25     nmod_poly_struct * out,
26     const int * ignore,
27     const nmod_mpoly_t A,
28     ulong * Amin_exp,
29     ulong * Amax_exp,
30     ulong * Astride,
31     mp_limb_t * alpha,
32     const nmod_mpoly_ctx_t ctx,
33     const thread_pool_handle * handles,
34     slong num_handles)
35 {
36     slong i, j;
37     slong nvars = ctx->minfo->nvars;
38     slong total_limit, total_length;
39     int use_direct_LUT;
40     ulong varexp;
41     ulong mask;
42     slong * offsets, * shifts;
43     slong N = mpoly_words_per_exp_sp(A->bits, ctx->minfo);
44     ulong * Aexp = A->exps;
45     mp_limb_t * Acoeff = A->coeffs;
46     mp_limb_t meval;
47     mp_limb_t t;
48 
49     FLINT_ASSERT(A->bits <= FLINT_BITS);
50 
51     mask = (-UWORD(1)) >> (FLINT_BITS - A->bits);
52     offsets = (slong *) flint_malloc(ctx->minfo->nvars*sizeof(slong));
53     shifts = (slong *) flint_malloc(ctx->minfo->nvars*sizeof(slong));
54 
55     for (j = 0; j < ctx->minfo->nvars; j++)
56     {
57         nmod_poly_zero(out + j);
58         mpoly_gen_offset_shift_sp(offsets + j, shifts + j, j, A->bits, ctx->minfo);
59     }
60 
61     /*
62         two cases:
63         (1) the Amax_exp[j] are small enough to calculate a direct LUT
64         (2) use a LUT for exponents that are powers of two
65     */
66 
67     total_limit = A->length/256;
68     total_limit = FLINT_MAX(WORD(9999), total_limit);
69     total_length = 0;
70     use_direct_LUT = 1;
71     for (j = 0; j < ctx->minfo->nvars; j++)
72     {
73         total_length += Amax_exp[j] + 1;
74         if ((ulong) total_length > (ulong) total_limit)
75             use_direct_LUT = 0;
76     }
77 
78     if (use_direct_LUT)
79     {
80         slong off;
81         mp_limb_t * LUT, ** LUTvalue, ** LUTvalueinv;
82 
83         /* value of powers of alpha[j] */
84         LUT = (mp_limb_t *) flint_malloc(2*total_length*sizeof(mp_limb_t));
85 
86         /* pointers into LUT */
87         LUTvalue    = (mp_limb_t **) flint_malloc(nvars*sizeof(mp_limb_t *));
88         LUTvalueinv = (mp_limb_t **) flint_malloc(nvars*sizeof(mp_limb_t *));
89 
90         off = 0;
91         for (j = 0; j < nvars; j++)
92         {
93             ulong k;
94             mp_limb_t alphainvj = nmod_inv(alpha[j], (out + 0)->mod);
95 
96             LUTvalue[j] = LUT + off;
97             LUTvalueinv[j] = LUT + total_length + off;
98             LUTvalue[j][0] = 1;
99             LUTvalueinv[j][0] = 1;
100             for (k = 0; k < Amax_exp[j]; k++)
101             {
102                 LUTvalue[j][k + 1] = nmod_mul(LUTvalue[j][k], alpha[j],
103                                                                (out + 0)->mod);
104                 LUTvalueinv[j][k + 1] = nmod_mul(LUTvalueinv[j][k], alphainvj,
105                                                                (out + 0)->mod);
106             }
107 
108             off += Amax_exp[j] + 1;
109         }
110         FLINT_ASSERT(off == total_length);
111 
112         for (i = 0; i < A->length; i++)
113         {
114             meval = Acoeff[i];
115 
116             for (j = 0; j < nvars; j++)
117             {
118                 varexp = ((Aexp + N*i)[offsets[j]]>>shifts[j])&mask;
119                 FLINT_ASSERT(varexp <= Amax_exp[j]);
120                 meval = nmod_mul(meval, LUTvalue[j][varexp], (out + 0)->mod);
121             }
122 
123             for (j = 0; j < nvars; j++)
124             {
125                 varexp = ((Aexp + N*i)[offsets[j]]>>shifts[j])&mask;
126 
127                 if (ignore[j])
128                     continue;
129 
130                 t = nmod_mul(meval, LUTvalueinv[j][varexp], (out + j)->mod);
131 
132                 FLINT_ASSERT((Astride[j] == 0 && varexp == Amin_exp[j])
133                                   || (varexp - Amin_exp[j]) % Astride[j] == 0);
134 
135                 varexp = Astride[j] < 2 ? varexp - Amin_exp[j] :
136                                            (varexp - Amin_exp[j])/Astride[j];
137 
138                 t = nmod_add(t, nmod_poly_get_coeff_ui(out + j, varexp),
139                                                                (out + j)->mod);
140                 nmod_poly_set_coeff_ui(out + j, varexp, t);
141             }
142         }
143 
144         flint_free(LUT);
145         flint_free(LUTvalue);
146         flint_free(LUTvalueinv);
147     }
148     else
149     {
150         slong LUTlen;
151         ulong * LUTmask;
152         slong * LUToffset, * LUTvar;
153         mp_limb_t * LUTvalue, * LUTvalueinv;
154         mp_limb_t * vieval;
155         mp_limb_t t, xpoweval, xinvpoweval; /* TODO t again? */
156 
157         LUToffset   = (slong *) flint_malloc(N*FLINT_BITS*sizeof(slong));
158         LUTmask     = (ulong *) flint_malloc(N*FLINT_BITS*sizeof(ulong));
159         LUTvalue    = (mp_limb_t *) flint_malloc(N*FLINT_BITS*sizeof(mp_limb_t));
160         LUTvar      = (slong *) flint_malloc(N*FLINT_BITS*sizeof(slong));
161         LUTvalueinv = (mp_limb_t *) flint_malloc(N*FLINT_BITS*sizeof(mp_limb_t));
162 
163         vieval = (mp_limb_t *) flint_malloc(nvars*sizeof(mp_limb_t));
164 
165         LUTlen = 0;
166         for (j = nvars - 1; j >= 0; j--)
167         {
168             flint_bitcnt_t bits = FLINT_BIT_COUNT(Amax_exp[j]);
169             xpoweval = alpha[j]; /* xpoweval = alpha[j]^(2^i) */
170             xinvpoweval = nmod_inv(xpoweval, (out + 0)->mod); /* alpha[j]^(-2^i) */
171             for (i = 0; i < bits; i++)
172             {
173                 LUToffset[LUTlen] = offsets[j];
174                 LUTmask[LUTlen] = (UWORD(1) << (shifts[j] + i));
175                 LUTvalue[LUTlen] = xpoweval;
176                 LUTvalueinv[LUTlen] = xinvpoweval;
177                 LUTvar[LUTlen] = j;
178                 LUTlen++;
179                 xpoweval = nmod_mul(xpoweval, xpoweval, (out + 0)->mod);
180                 xinvpoweval = nmod_mul(xinvpoweval, xinvpoweval, (out + 0)->mod);
181             }
182 
183             vieval[j] = 1;
184         }
185         FLINT_ASSERT(LUTlen < N*FLINT_BITS);
186 
187         for (i = 0; i < A->length; i++)
188         {
189             meval = Acoeff[i];
190 
191             for (j = 0; j < LUTlen; j++)
192             {
193                 if (((Aexp + N*i)[LUToffset[j]] & LUTmask[j]) != 0)
194                 {
195                     meval = nmod_mul(meval, LUTvalue[j], (out + 0)->mod);
196                     vieval[LUTvar[j]] = nmod_mul(vieval[LUTvar[j]],
197                                                LUTvalueinv[j], (out + 0)->mod);
198                 }
199             }
200 
201             for (j = 0; j < nvars; j++)
202             {
203                 varexp = ((Aexp + N*i)[offsets[j]]>>shifts[j])&mask;
204 
205                 FLINT_ASSERT((Astride[j] == 0 && varexp == Amin_exp[j])
206                                   || (varexp - Amin_exp[j]) % Astride[j] == 0);
207 
208                 varexp = Astride[j] < 2 ? varexp - Amin_exp[j] :
209                                            (varexp - Amin_exp[j])/Astride[j];
210 
211                 t = nmod_mul(meval, vieval[j], (out + j)->mod);
212                 t = nmod_add(t, nmod_poly_get_coeff_ui(out + j, varexp),
213                                                                (out + j)->mod);
214                 nmod_poly_set_coeff_ui(out + j, varexp, t);
215                 vieval[j] = 1;
216             }
217         }
218 
219         flint_free(LUToffset);
220         flint_free(LUTmask);
221         flint_free(LUTvalue);
222         flint_free(LUTvar);
223         flint_free(LUTvalueinv);
224 
225         flint_free(vieval);
226     }
227 
228     flint_free(offsets);
229     flint_free(shifts);
230 }
231 
232 
mpoly_gcd_info_set_estimates_nmod_mpoly(mpoly_gcd_info_t I,const nmod_mpoly_t A,const nmod_mpoly_t B,const nmod_mpoly_ctx_t ctx,const thread_pool_handle * handles,slong num_handles)233 void mpoly_gcd_info_set_estimates_nmod_mpoly(
234     mpoly_gcd_info_t I,
235     const nmod_mpoly_t A,
236     const nmod_mpoly_t B,
237     const nmod_mpoly_ctx_t ctx,
238     const thread_pool_handle * handles,
239     slong num_handles)
240 {
241     int try_count = 0;
242     slong i, j;
243     nmod_poly_t Geval;
244     nmod_poly_struct * Aevals, * Bevals;
245     mp_limb_t * alpha;
246     flint_rand_t randstate;
247     slong ignore_limit;
248     int * ignore;
249 
250     flint_randinit(randstate);
251 
252     ignore = (int *) flint_malloc(ctx->minfo->nvars*sizeof(int));
253     alpha = (mp_limb_t *) flint_malloc(ctx->minfo->nvars*sizeof(mp_limb_t));
254     Aevals = (nmod_poly_struct *) flint_malloc(
255                                    ctx->minfo->nvars*sizeof(nmod_poly_struct));
256     Bevals = (nmod_poly_struct *) flint_malloc(
257                                    ctx->minfo->nvars*sizeof(nmod_poly_struct));
258 
259     nmod_poly_init_mod(Geval, ctx->ffinfo->mod);
260     for (j = 0; j < ctx->minfo->nvars; j++)
261     {
262         nmod_poly_init_mod(Aevals + j, ctx->ffinfo->mod);
263         nmod_poly_init_mod(Bevals + j, ctx->ffinfo->mod);
264     }
265 
266     ignore_limit = A->length/4096 + B->length/4096;
267     ignore_limit = FLINT_MAX(WORD(9999), ignore_limit);
268     I->Gdeflate_deg_bounds_are_nice = 1;
269     for (j = 0; j < ctx->minfo->nvars; j++)
270     {
271         if (   I->Adeflate_deg[j] > ignore_limit
272             || I->Bdeflate_deg[j] > ignore_limit)
273         {
274             ignore[j] = 1;
275             I->Gdeflate_deg_bounds_are_nice = 0;
276         }
277         else
278         {
279             ignore[j] = 0;
280         }
281     }
282 
283 try_again:
284 
285     if (++try_count > 10)
286     {
287         I->Gdeflate_deg_bounds_are_nice = 0;
288         for (j = 0; j < ctx->minfo->nvars; j++)
289         {
290             I->Gdeflate_deg_bound[j] = FLINT_MIN(I->Adeflate_deg[j],
291                                                  I->Bdeflate_deg[j]);
292             I->Gterm_count_est[j] = 1 + I->Gdeflate_deg_bound[j]/2;
293         }
294 
295         goto cleanup;
296     }
297 
298     for (j = 0; j < ctx->minfo->nvars; j++)
299     {
300         alpha[j] = n_urandint(randstate, ctx->ffinfo->mod.n - 1) + 1;
301     }
302 
303 
304     nmod_mpoly_evals(Aevals, ignore, A, I->Amin_exp, I->Amax_exp, I->Gstride,
305                                              alpha, ctx, handles, num_handles);
306     nmod_mpoly_evals(Bevals, ignore, B, I->Bmin_exp, I->Bmax_exp, I->Gstride,
307                                              alpha, ctx, handles, num_handles);
308 
309     for (j = 0; j < ctx->minfo->nvars; j++)
310     {
311         if (ignore[j])
312         {
313             I->Gdeflate_deg_bound[j] = FLINT_MIN(I->Adeflate_deg[j],
314                                                  I->Bdeflate_deg[j]);
315             I->Gterm_count_est[j] = 1 + I->Gdeflate_deg_bound[j]/2;
316         }
317         else
318         {
319             if (   I->Adeflate_deg[j] != nmod_poly_degree(Aevals + j)
320                 || I->Bdeflate_deg[j] != nmod_poly_degree(Bevals + j))
321             {
322                 goto try_again;
323             }
324 
325             nmod_poly_gcd(Geval, Aevals + j, Bevals + j);
326 
327             I->Gterm_count_est[j] = 0;
328             I->Gdeflate_deg_bound[j] = nmod_poly_degree(Geval);
329             for (i = I->Gdeflate_deg_bound[j]; i >= 0; i--)
330             {
331                 I->Gterm_count_est[j] += (Geval->coeffs[i] != 0);
332             }
333         }
334     }
335 
336 cleanup:
337 
338     nmod_poly_clear(Geval);
339     for (j = 0; j < ctx->minfo->nvars; j++)
340     {
341         nmod_poly_clear(Aevals + j);
342         nmod_poly_clear(Bevals + j);
343     }
344 
345     flint_free(ignore);
346     flint_free(alpha);
347     flint_free(Aevals);
348     flint_free(Bevals);
349 
350     flint_randclear(randstate);
351 
352     return;
353 }
354 
355 
356 /*********************** Easy when B is a monomial ***************************/
_try_monomial_gcd(nmod_mpoly_t G,flint_bitcnt_t Gbits,const nmod_mpoly_t A,const nmod_mpoly_t B,const nmod_mpoly_ctx_t ctx)357 static int _try_monomial_gcd(
358     nmod_mpoly_t G, flint_bitcnt_t Gbits,
359     const nmod_mpoly_t A,
360     const nmod_mpoly_t B,
361     const nmod_mpoly_ctx_t ctx)
362 {
363     slong i;
364     fmpz * minAfields, * minAdegs, * minBdegs;
365     TMP_INIT;
366 
367     FLINT_ASSERT(A->length > 0);
368     FLINT_ASSERT(B->length == 1);
369 
370     TMP_START;
371 
372     /* get the field-wise minimum of A */
373     minAfields = (fmpz *) TMP_ALLOC(ctx->minfo->nfields*sizeof(fmpz));
374     for (i = 0; i < ctx->minfo->nfields; i++)
375         fmpz_init(minAfields + i);
376     mpoly_min_fields_fmpz(minAfields, A->exps, A->length, A->bits, ctx->minfo);
377 
378     /* unpack to get the min degrees of each variable in A */
379     minAdegs = (fmpz *) TMP_ALLOC(ctx->minfo->nvars*sizeof(fmpz));
380     for (i = 0; i < ctx->minfo->nvars; i++)
381         fmpz_init(minAdegs + i);
382     mpoly_get_monomial_ffmpz_unpacked_ffmpz(minAdegs, minAfields, ctx->minfo);
383 
384     /* get the degree of each variable in B */
385     minBdegs = (fmpz *) TMP_ALLOC(ctx->minfo->nvars*sizeof(fmpz));
386     for (i = 0; i < ctx->minfo->nvars; i++)
387         fmpz_init(minBdegs + i);
388     mpoly_get_monomial_ffmpz(minBdegs, B->exps, B->bits, ctx->minfo);
389 
390     /* compute the degree of each variable in G */
391     _fmpz_vec_min_inplace(minBdegs, minAdegs, ctx->minfo->nvars);
392 
393     nmod_mpoly_fit_length(G, 1, ctx);
394     nmod_mpoly_fit_bits(G, Gbits, ctx);
395     G->bits = Gbits;
396     mpoly_set_monomial_ffmpz(G->exps, minBdegs, Gbits, ctx->minfo);
397     G->coeffs[0] = UWORD(1);
398     G->length = 1;
399 
400     for (i = 0; i < ctx->minfo->nfields; i++)
401     {
402         fmpz_clear(minAfields + i);
403     }
404     for (i = 0; i < ctx->minfo->nvars; i++)
405     {
406         fmpz_clear(minAdegs + i);
407         fmpz_clear(minBdegs + i);
408     }
409 
410     TMP_END;
411 
412     return 1;
413 }
414 
415 
416 /********************** See if cofactors are monomials ***********************/
_try_monomial_cofactors(nmod_mpoly_t G,flint_bitcnt_t Gbits,const nmod_mpoly_t A,const nmod_mpoly_t B,const nmod_mpoly_ctx_t ctx)417 static int _try_monomial_cofactors(
418     nmod_mpoly_t G, flint_bitcnt_t Gbits,
419     const nmod_mpoly_t A,
420     const nmod_mpoly_t B,
421     const nmod_mpoly_ctx_t ctx)
422 {
423     int success;
424     slong i, j;
425     slong NA, NG;
426     slong nvars = ctx->minfo->nvars;
427     fmpz * Abarexps, * Bbarexps, * Texps;
428     mp_limb_t a0inv;
429     nmod_mpoly_t T;
430     TMP_INIT;
431 
432     FLINT_ASSERT(A->length > 0);
433     FLINT_ASSERT(B->length > 0);
434 
435     if (A->length != B->length)
436         return 0;
437 
438     for (i = A->length - 1; i > 0; i--)
439     {
440         success = (nmod_mul(A->coeffs[0], B->coeffs[i], ctx->ffinfo->mod)
441                 == nmod_mul(B->coeffs[0], A->coeffs[i], ctx->ffinfo->mod));
442         if (!success)
443             goto cleanup;
444     }
445 
446     TMP_START;
447 
448     Abarexps = (fmpz *) TMP_ALLOC(3*nvars*sizeof(fmpz));
449     Bbarexps = Abarexps + 1*nvars;
450     Texps    = Abarexps + 2*nvars;
451     for (j = 0; j < nvars; j++)
452     {
453         fmpz_init(Abarexps + j);
454         fmpz_init(Bbarexps + j);
455         fmpz_init(Texps + j);
456     }
457 
458     success = mpoly_monomial_cofactors(Abarexps, Bbarexps, A->exps, A->bits,
459                                       B->exps, B->bits, A->length, ctx->minfo);
460     if (!success)
461         goto cleanup_tmp;
462 
463     nmod_mpoly_init3(T, A->length, Gbits, ctx);
464     NG = mpoly_words_per_exp(Gbits, ctx->minfo);
465     NA = mpoly_words_per_exp(A->bits, ctx->minfo);
466     a0inv = nmod_inv(A->coeffs[0], ctx->ffinfo->mod);
467     T->length = A->length;
468     for (i = 0; i < A->length; i++)
469     {
470         mpoly_get_monomial_ffmpz(Texps, A->exps + NA*i, A->bits, ctx->minfo);
471         _fmpz_vec_sub(Texps, Texps, Abarexps, nvars);
472         mpoly_set_monomial_ffmpz(T->exps + NG*i, Texps, Gbits, ctx->minfo);
473         T->coeffs[i] = nmod_mul(A->coeffs[i], a0inv, ctx->ffinfo->mod);
474     }
475     nmod_mpoly_swap(G, T, ctx);
476     nmod_mpoly_clear(T, ctx);
477 
478     success = 1;
479 
480 cleanup_tmp:
481 
482     for (j = 0; j < nvars; j++)
483     {
484         fmpz_clear(Abarexps + j);
485         fmpz_clear(Bbarexps + j);
486         fmpz_clear(Texps + j);
487     }
488 
489     TMP_END;
490 
491 cleanup:
492 
493     return success;
494 }
495 
496 
497 /********* Assume B has length one when converted to univar format ***********/
_try_missing_var(nmod_mpoly_t G,flint_bitcnt_t Gbits,slong var,const nmod_mpoly_t A,ulong Ashift,const nmod_mpoly_t B,ulong Bshift,const nmod_mpoly_ctx_t ctx)498 static int _try_missing_var(
499     nmod_mpoly_t G, flint_bitcnt_t Gbits,
500     slong var,
501     const nmod_mpoly_t A, ulong Ashift,
502     const nmod_mpoly_t B, ulong Bshift,
503     const nmod_mpoly_ctx_t ctx)
504 {
505     int success;
506     slong i;
507     nmod_mpoly_t tG;
508     nmod_mpoly_univar_t Ax;
509 
510     nmod_mpoly_init(tG, ctx);
511     nmod_mpoly_univar_init(Ax, ctx);
512 
513     nmod_mpoly_to_univar(Ax, A, var, ctx);
514 
515     FLINT_ASSERT(Ax->length > 0);
516     success = _nmod_mpoly_gcd_threaded_pool(tG, Gbits, B, Ax->coeffs + 0,
517                                                                  ctx, NULL, 0);
518     if (!success)
519         goto cleanup;
520 
521     for (i = 1; i < Ax->length; i++)
522     {
523         success = _nmod_mpoly_gcd_threaded_pool(tG, Gbits, tG, Ax->coeffs + i,
524                                                                  ctx, NULL, 0);
525         if (!success)
526             goto cleanup;
527     }
528 
529     nmod_mpoly_swap(G, tG, ctx);
530     _mpoly_gen_shift_left(G->exps, G->bits, G->length,
531                                    var, FLINT_MIN(Ashift, Bshift), ctx->minfo);
532 
533 cleanup:
534 
535     nmod_mpoly_clear(tG, ctx);
536     nmod_mpoly_univar_clear(Ax, ctx);
537 
538     return success;
539 }
540 
541 
542 /******************* Test if B divides A or A divides B **********************/
543 /*
544     Test if B divides A or A divides B
545         TODO: incorporate deflation
546 */
_try_divides(nmod_mpoly_t G,const nmod_mpoly_t A,int try_a,const nmod_mpoly_t B,int try_b,const nmod_mpoly_ctx_t ctx,const thread_pool_handle * handles,slong num_handles)547 static int _try_divides(
548     nmod_mpoly_t G,
549     const nmod_mpoly_t A, int try_a,
550     const nmod_mpoly_t B, int try_b,
551     const nmod_mpoly_ctx_t ctx,
552     const thread_pool_handle * handles,
553     slong num_handles)
554 {
555     int success;
556     nmod_mpoly_t Q;
557 
558     nmod_mpoly_init(Q, ctx);
559 
560     if (try_b && _nmod_mpoly_divides_threaded_pool(Q, A, B,
561                                                     ctx, handles, num_handles))
562     {
563         nmod_mpoly_set(G, B, ctx);
564         success = 1;
565         goto cleanup;
566     }
567 
568     if (try_a && _nmod_mpoly_divides_threaded_pool(Q, B, A,
569                                                     ctx, handles, num_handles))
570     {
571         nmod_mpoly_set(G, A, ctx);
572         success = 1;
573         goto cleanup;
574     }
575 
576     success = 0;
577 
578 cleanup:
579 
580     nmod_mpoly_clear(Q, ctx);
581 
582     return success;
583 }
584 
585 
586 /********************** Hit A and B with zippel ******************************/
_try_zippel(nmod_mpoly_t G,const nmod_mpoly_t A,const nmod_mpoly_t B,const mpoly_gcd_info_t I,const nmod_mpoly_ctx_t ctx)587 static int _try_zippel(
588     nmod_mpoly_t G,
589     const nmod_mpoly_t A,
590     const nmod_mpoly_t B,
591     const mpoly_gcd_info_t I,
592     const nmod_mpoly_ctx_t ctx)
593 {
594     slong i, k;
595     slong m = I->mvars;
596     int success;
597     mpoly_zipinfo_t zinfo;
598     flint_bitcnt_t wbits;
599     flint_rand_t randstate;
600     nmod_mpoly_ctx_t uctx;
601     nmod_mpolyu_t Au, Bu, Gu, Abaru, Bbaru;
602     nmod_mpoly_t Ac, Bc, Gc;
603 
604     FLINT_ASSERT(A->bits <= FLINT_BITS);
605     FLINT_ASSERT(B->bits <= FLINT_BITS);
606 
607     if (!I->can_use_zippel)
608         return 0;
609 
610     FLINT_ASSERT(m >= WORD(2));
611     FLINT_ASSERT(A->length > 0);
612     FLINT_ASSERT(B->length > 0);
613 
614     flint_randinit(randstate);
615 
616     /* interpolation will continue in m variables */
617     mpoly_zipinfo_init(zinfo, m);
618 
619     /* uctx is context for Z[y_1,...,y_{m-1}]*/
620     nmod_mpoly_ctx_init(uctx, m - 1, ORD_LEX, ctx->ffinfo->mod.n);
621 
622     /* fill in a valid zinfo->perm and degrees */
623     for (i = 0; i < m; i++)
624     {
625         k = I->zippel_perm[i];
626         zinfo->perm[i] = k;
627         zinfo->Adegs[i] = I->Adeflate_deg[k];
628         zinfo->Bdegs[i] = I->Bdeflate_deg[k];
629         FLINT_ASSERT(I->Adeflate_deg[k] != 0);
630         FLINT_ASSERT(I->Bdeflate_deg[k] != 0);
631     }
632 
633     wbits = FLINT_MAX(A->bits, B->bits);
634 
635     nmod_mpolyu_init(Au, wbits, uctx);
636     nmod_mpolyu_init(Bu, wbits, uctx);
637     nmod_mpolyu_init(Gu, wbits, uctx);
638     nmod_mpolyu_init(Abaru, wbits, uctx);
639     nmod_mpolyu_init(Bbaru, wbits, uctx);
640     nmod_mpoly_init3(Ac, 0, wbits, uctx);
641     nmod_mpoly_init3(Bc, 0, wbits, uctx);
642     nmod_mpoly_init3(Gc, 0, wbits, uctx);
643 
644     nmod_mpoly_to_mpolyu_perm_deflate_threaded_pool(Au, uctx, A, ctx, zinfo->perm,
645                                              I->Amin_exp, I->Gstride, NULL, 0);
646     nmod_mpoly_to_mpolyu_perm_deflate_threaded_pool(Bu, uctx, B, ctx, zinfo->perm,
647                                              I->Bmin_exp, I->Gstride, NULL, 0);
648 
649     success = nmod_mpolyu_content_mpoly_threaded_pool(Ac, Au, uctx, NULL, 0);
650     success = success &&
651               nmod_mpolyu_content_mpoly_threaded_pool(Bc, Bu, uctx, NULL, 0);
652     if (!success)
653         goto cleanup;
654 
655     nmod_mpolyu_divexact_mpoly_inplace(Au, Ac, uctx);
656     nmod_mpolyu_divexact_mpoly_inplace(Bu, Bc, uctx);
657 
658     /* after removing content, degree bounds in zinfo are still valid bounds */
659     success = nmod_mpolyu_gcdm_zippel(Gu, Abaru, Bbaru, Au, Bu,
660                                                        uctx, zinfo, randstate);
661     if (!success)
662         goto cleanup;
663 
664     success = _nmod_mpoly_gcd_threaded_pool(Gc, wbits, Ac, Bc, uctx, NULL, 0);
665     if (!success)
666         goto cleanup;
667 
668     nmod_mpolyu_mul_mpoly_inplace(Gu, Gc, uctx);
669 
670     nmod_mpoly_from_mpolyu_perm_inflate(G, I->Gbits, ctx, Gu, uctx,
671                                          zinfo->perm, I->Gmin_exp, I->Gstride);
672     success = 1;
673 
674 cleanup:
675 
676     nmod_mpolyu_clear(Au, uctx);
677     nmod_mpolyu_clear(Bu, uctx);
678     nmod_mpolyu_clear(Gu, uctx);
679     nmod_mpolyu_clear(Abaru, uctx);
680     nmod_mpolyu_clear(Bbaru, uctx);
681     nmod_mpoly_clear(Ac, uctx);
682     nmod_mpoly_clear(Bc, uctx);
683     nmod_mpoly_clear(Gc, uctx);
684 
685     nmod_mpoly_ctx_clear(uctx);
686 
687     mpoly_zipinfo_clear(zinfo);
688 
689     flint_randclear(randstate);
690 
691     return success;
692 }
693 
694 
695 /*********************** Hit A and B with brown ******************************/
696 typedef struct
697 {
698     nmod_mpolyn_struct * Pn;
699     const nmod_mpoly_ctx_struct * nctx;
700     const nmod_mpoly_struct * P;
701     const nmod_mpoly_ctx_struct * ctx;
702     const slong * perm;
703     const ulong * shift, * stride;
704     const thread_pool_handle * handles;
705     slong num_handles;
706 }
707 _convertn_arg_struct;
708 
709 typedef _convertn_arg_struct _convertn_arg_t[1];
710 
_worker_convertn(void * varg)711 static void _worker_convertn(void * varg)
712 {
713     _convertn_arg_struct * arg = (_convertn_arg_struct *) varg;
714 
715     nmod_mpoly_to_mpolyn_perm_deflate_threaded_pool(arg->Pn, arg->nctx, arg->P, arg->ctx,
716            arg->perm, arg->shift, arg->stride, arg->handles, arg->num_handles);
717 }
718 
_try_brown(nmod_mpoly_t G,const nmod_mpoly_t A,const nmod_mpoly_t B,mpoly_gcd_info_t I,const nmod_mpoly_ctx_t ctx,const thread_pool_handle * handles,slong num_handles)719 static int _try_brown(
720     nmod_mpoly_t G,
721     const nmod_mpoly_t A,
722     const nmod_mpoly_t B,
723     mpoly_gcd_info_t I,
724     const nmod_mpoly_ctx_t ctx,
725     const thread_pool_handle * handles,
726     slong num_handles)
727 {
728     int success;
729     slong m = I->mvars;
730     flint_bitcnt_t wbits;
731     nmod_mpoly_ctx_t nctx;
732     nmod_mpolyn_t An, Bn, Gn, Abarn, Bbarn;
733     nmod_poly_stack_t Sp;
734 
735     if (!I->can_use_brown)
736         return 0;
737 
738     FLINT_ASSERT(m >= 2);
739     FLINT_ASSERT(A->bits <= FLINT_BITS);
740     FLINT_ASSERT(B->bits <= FLINT_BITS);
741     FLINT_ASSERT(A->length > 0);
742     FLINT_ASSERT(B->length > 0);
743 
744     wbits = FLINT_MAX(A->bits, B->bits);
745 
746     nmod_mpoly_ctx_init(nctx, m, ORD_LEX, ctx->ffinfo->mod.n);
747     nmod_poly_stack_init(Sp, wbits, nctx);
748     nmod_mpolyn_init(An, wbits, nctx);
749     nmod_mpolyn_init(Bn, wbits, nctx);
750     nmod_mpolyn_init(Gn, wbits, nctx);
751     nmod_mpolyn_init(Abarn, wbits, nctx);
752     nmod_mpolyn_init(Bbarn, wbits, nctx);
753 
754     if (num_handles > 0)
755     {
756         slong s = mpoly_divide_threads(num_handles, A->length, B->length);
757         _convertn_arg_t arg;
758 
759         FLINT_ASSERT(s >= 0);
760         FLINT_ASSERT(s < num_handles);
761 
762         arg->Pn = Bn;
763         arg->nctx = nctx;
764         arg->P = B;
765         arg->ctx = ctx;
766         arg->perm = I->brown_perm;
767         arg->shift = I->Bmin_exp;
768         arg->stride = I->Gstride;
769         arg->handles = handles + (s + 1);
770         arg->num_handles = num_handles - (s + 1);
771 
772         thread_pool_wake(global_thread_pool, handles[s], 0, _worker_convertn, arg);
773 
774         nmod_mpoly_to_mpolyn_perm_deflate_threaded_pool(An, nctx, A, ctx,
775                        I->brown_perm, I->Amin_exp, I->Gstride, handles + 0, s);
776 
777         thread_pool_wait(global_thread_pool, handles[s]);
778     }
779     else
780     {
781         nmod_mpoly_to_mpolyn_perm_deflate_threaded_pool(An, nctx, A, ctx,
782                               I->brown_perm, I->Amin_exp, I->Gstride, NULL, 0);
783         nmod_mpoly_to_mpolyn_perm_deflate_threaded_pool(Bn, nctx, B, ctx,
784                               I->brown_perm, I->Bmin_exp, I->Gstride, NULL, 0);
785     }
786 
787     FLINT_ASSERT(An->bits == wbits);
788     FLINT_ASSERT(Bn->bits == wbits);
789     FLINT_ASSERT(An->length > 1);
790     FLINT_ASSERT(Bn->length > 1);
791 
792     success = (num_handles > 0)
793         ? nmod_mpolyn_gcd_brown_smprime_threaded_pool(Gn, Abarn, Bbarn, An, Bn,
794                                           m - 1, nctx, I, handles, num_handles)
795         : nmod_mpolyn_gcd_brown_smprime(Gn, Abarn, Bbarn, An, Bn,
796                                                            m - 1, nctx, I, Sp);
797 
798     if (!success)
799     {
800         nmod_mpoly_to_mpolyn_perm_deflate_threaded_pool(An, nctx, A, ctx,
801                               I->brown_perm, I->Amin_exp, I->Gstride, NULL, 0);
802         nmod_mpoly_to_mpolyn_perm_deflate_threaded_pool(Bn, nctx, B, ctx,
803                               I->brown_perm, I->Bmin_exp, I->Gstride, NULL, 0);
804         success = nmod_mpolyn_gcd_brown_lgprime(Gn, Abarn, Bbarn, An, Bn,
805                                                                   m - 1, nctx);
806     }
807 
808     if (!success)
809         goto cleanup;
810 
811     nmod_mpoly_from_mpolyn_perm_inflate(G, I->Gbits, ctx, Gn, nctx,
812                                        I->brown_perm, I->Gmin_exp, I->Gstride);
813     success = 1;
814 
815 cleanup:
816 
817     nmod_mpolyn_clear(An, nctx);
818     nmod_mpolyn_clear(Bn, nctx);
819     nmod_mpolyn_clear(Gn, nctx);
820     nmod_mpolyn_clear(Abarn, nctx);
821     nmod_mpolyn_clear(Bbarn, nctx);
822     nmod_poly_stack_clear(Sp);
823     nmod_mpoly_ctx_clear(nctx);
824 
825     return success;
826 }
827 
828 
829 /*
830     The function must pack its answer into bits = Gbits <= FLINT_BITS
831     Both A and B have to be packed into bits <= FLINT_BITS
832 
833     return is 1 for success, 0 for failure.
834 */
_nmod_mpoly_gcd_threaded_pool(nmod_mpoly_t G,flint_bitcnt_t Gbits,const nmod_mpoly_t A,const nmod_mpoly_t B,const nmod_mpoly_ctx_t ctx,const thread_pool_handle * handles,slong num_handles)835 int _nmod_mpoly_gcd_threaded_pool(
836     nmod_mpoly_t G, flint_bitcnt_t Gbits,
837     const nmod_mpoly_t A,
838     const nmod_mpoly_t B,
839     const nmod_mpoly_ctx_t ctx,
840     const thread_pool_handle * handles,
841     slong num_handles)
842 {
843     int success;
844     slong v_in_both;
845     slong v_in_either;
846     slong v_in_A_only;
847     slong v_in_B_only;
848     slong j;
849     slong nvars = ctx->minfo->nvars;
850     mpoly_gcd_info_t I;
851 
852     if (A->length == 1)
853     {
854         return _try_monomial_gcd(G, Gbits, B, A, ctx);
855     }
856     else if (B->length == 1)
857     {
858         return _try_monomial_gcd(G, Gbits, A, B, ctx);
859     }
860 
861     mpoly_gcd_info_init(I, nvars);
862 
863     /* entries of I are all now invalid */
864 
865     I->Gbits = Gbits;
866 
867     mpoly_gcd_info_limits(I->Amax_exp, I->Amin_exp, I->Alead_count,
868                       I->Atail_count, A->exps, A->bits, A->length, ctx->minfo);
869     mpoly_gcd_info_limits(I->Bmax_exp, I->Bmin_exp, I->Blead_count,
870                       I->Btail_count, B->exps, B->bits, B->length, ctx->minfo);
871 
872     /* set ess(p) := p/term_content(p) */
873 
874     /* check if the cofactors could be monomials, i.e. ess(A) == ess(B) */
875     for (j = 0; j < nvars; j++)
876     {
877         if (I->Amax_exp[j] - I->Amin_exp[j] != I->Bmax_exp[j] - I->Bmin_exp[j])
878             goto skip_monomial_cofactors;
879     }
880     if (_try_monomial_cofactors(G, I->Gbits, A, B, ctx))
881     {
882         goto successful;
883     }
884 
885 skip_monomial_cofactors:
886 
887     mpoly_gcd_info_stride(I->Gstride,
888             A->exps, A->bits, A->length, I->Amax_exp, I->Amin_exp,
889             B->exps, B->bits, B->length, I->Bmax_exp, I->Bmin_exp, ctx->minfo);
890 
891     for (j = 0; j < nvars; j++)
892     {
893         ulong t = I->Gstride[j];
894 
895         if (t == 0)
896         {
897             FLINT_ASSERT(  I->Amax_exp[j] == I->Amin_exp[j]
898                         || I->Bmax_exp[j] == I->Bmin_exp[j]);
899         }
900         else
901         {
902             FLINT_ASSERT((I->Amax_exp[j] - I->Amin_exp[j]) % t == 0);
903             FLINT_ASSERT((I->Bmax_exp[j] - I->Bmin_exp[j]) % t == 0);
904         }
905 
906         I->Adeflate_deg[j] = t == 0 ? 0 : (I->Amax_exp[j] - I->Amin_exp[j])/t;
907         I->Bdeflate_deg[j] = t == 0 ? 0 : (I->Bmax_exp[j] - I->Bmin_exp[j])/t;
908         I->Gmin_exp[j] = FLINT_MIN(I->Amin_exp[j], I->Bmin_exp[j]);
909     }
910 
911     /*
912         The following are now valid:
913             I->Amax_exp, I->Amin_exp, I->Alead_count, I->Atail_count,
914             I->Bmax_exp, I->Bmin_exp, I->Blead_count, I->Btail_count,
915             I->Gstride
916             I->Adeflate_deg
917             I->Bdeflate_deg
918             I->Gmin_exp
919     */
920 
921     /* check if ess(A) and ess(B) have a variable v_in_both in common */
922     v_in_both = -WORD(1);
923     for (j = 0; j < nvars; j++)
924     {
925         if (I->Amax_exp[j] > I->Amin_exp[j] && I->Bmax_exp[j] > I->Bmin_exp[j])
926         {
927             v_in_both = j;
928             break;
929         }
930     }
931     if (v_in_both == -WORD(1))
932     {
933         /*
934             The variables in ess(A) and ess(B) are disjoint.
935             gcd is trivial to compute.
936         */
937 
938 calculate_trivial_gcd:
939 
940         nmod_mpoly_fit_length(G, 1, ctx);
941         nmod_mpoly_fit_bits(G, Gbits, ctx);
942         G->bits = Gbits;
943         mpoly_set_monomial_ui(G->exps, I->Gmin_exp, Gbits, ctx->minfo);
944         G->coeffs[0] = UWORD(1);
945         _nmod_mpoly_set_length(G, 1, ctx);
946 
947         goto successful;
948     }
949 
950     /* check if ess(A) and ess(B) depend on another variable v_in_either */
951     FLINT_ASSERT(0 <= v_in_both);
952     FLINT_ASSERT(v_in_both < nvars);
953 
954     v_in_either = -WORD(1);
955     for (j = 0; j < nvars; j++)
956     {
957         if (j == v_in_both)
958             continue;
959 
960         if (I->Amax_exp[j] > I->Amin_exp[j] || I->Bmax_exp[j] > I->Bmin_exp[j])
961         {
962             v_in_either = j;
963             break;
964         }
965     }
966 
967     if (v_in_either == -WORD(1))
968     {
969         /*
970             The ess(A) and ess(B) depend on only one variable v_in_both
971             Calculate gcd using univariates
972         */
973         nmod_poly_t a, b, g;
974 
975         nmod_poly_init_mod(a, ctx->ffinfo->mod);
976         nmod_poly_init_mod(b, ctx->ffinfo->mod);
977         nmod_poly_init_mod(g, ctx->ffinfo->mod);
978         _nmod_mpoly_to_nmod_poly_deflate(a, A, v_in_both,
979                                                  I->Amin_exp, I->Gstride, ctx);
980         _nmod_mpoly_to_nmod_poly_deflate(b, B, v_in_both,
981                                                  I->Bmin_exp, I->Gstride, ctx);
982         nmod_poly_gcd(g, a, b);
983         _nmod_mpoly_from_nmod_poly_inflate(G, Gbits, g, v_in_both,
984                                                  I->Gmin_exp, I->Gstride, ctx);
985         nmod_poly_clear(a);
986         nmod_poly_clear(b);
987         nmod_poly_clear(g);
988 
989         goto successful;
990     }
991 
992     /* check if there is a variable in ess(A) that is not in ess(B) */
993     v_in_A_only = -WORD(1);
994     v_in_B_only = -WORD(1);
995     for (j = 0; j < nvars; j++)
996     {
997         if (I->Amax_exp[j] > I->Amin_exp[j] && I->Bmax_exp[j] == I->Bmin_exp[j])
998         {
999             v_in_A_only = j;
1000             break;
1001         }
1002         if (I->Bmax_exp[j] > I->Bmin_exp[j] && I->Amax_exp[j] == I->Amin_exp[j])
1003         {
1004             v_in_B_only = j;
1005             break;
1006         }
1007     }
1008     if (v_in_A_only != -WORD(1))
1009     {
1010         success = _try_missing_var(G, I->Gbits,
1011                                    v_in_A_only,
1012                                    A, I->Amin_exp[v_in_A_only],
1013                                    B, I->Bmin_exp[v_in_A_only],
1014                                    ctx);
1015         goto cleanup;
1016     }
1017     if (v_in_B_only != -WORD(1))
1018     {
1019         success = _try_missing_var(G, I->Gbits,
1020                                    v_in_B_only,
1021                                    B, I->Bmin_exp[v_in_B_only],
1022                                    A, I->Amin_exp[v_in_B_only],
1023                                    ctx);
1024         goto cleanup;
1025     }
1026 
1027     /* all variable are now either
1028             missing from both ess(A) and ess(B), or
1029             present in both ess(A) and ess(B)
1030         and there are at least two in the latter case
1031     */
1032 
1033     mpoly_gcd_info_set_estimates_nmod_mpoly(I, A, B, ctx, handles, num_handles);
1034     mpoly_gcd_info_set_perm(I, A->length, B->length, ctx->minfo);
1035 
1036     /* everything in I is valid now */
1037 
1038     /* check divisibility A/B and B/A */
1039     {
1040         int gcd_is_trivial = 1;
1041         int try_a = I->Gdeflate_deg_bounds_are_nice;
1042         int try_b = I->Gdeflate_deg_bounds_are_nice;
1043         for (j = 0; j < nvars; j++)
1044         {
1045             if (I->Gdeflate_deg_bound[j] != 0)
1046             {
1047                 gcd_is_trivial = 0;
1048             }
1049 
1050             if (I->Adeflate_deg[j] != I->Gdeflate_deg_bound[j]
1051                 || I->Amin_exp[j] > I->Bmin_exp[j])
1052             {
1053                 try_a = 0;
1054             }
1055 
1056             if (I->Bdeflate_deg[j] != I->Gdeflate_deg_bound[j]
1057                 || I->Bmin_exp[j] > I->Amin_exp[j])
1058             {
1059                 try_b = 0;
1060             }
1061         }
1062 
1063         if (gcd_is_trivial)
1064             goto calculate_trivial_gcd;
1065 
1066         if ((try_a || try_b) &&
1067             _try_divides(G, A, try_a, B, try_b, ctx, handles, num_handles))
1068         {
1069             goto successful;
1070         }
1071     }
1072 
1073     mpoly_gcd_info_measure_brown(I, A->length, B->length, ctx->minfo);
1074     mpoly_gcd_info_measure_zippel(I, A->length, B->length, ctx->minfo);
1075 
1076     if (I->zippel_time_est < I->brown_time_est)
1077     {
1078         if (_try_zippel(G, A, B, I, ctx))
1079             goto successful;
1080 
1081         if (_try_brown(G, A, B, I, ctx, handles, num_handles))
1082             goto successful;
1083     }
1084     else
1085     {
1086         if (_try_brown(G, A, B, I, ctx, handles, num_handles))
1087             goto successful;
1088 
1089         if (_try_zippel(G, A, B, I, ctx))
1090             goto successful;
1091     }
1092 
1093     success = 0;
1094     goto cleanup;
1095 
1096 successful:
1097 
1098     success = 1;
1099 
1100 cleanup:
1101 
1102     mpoly_gcd_info_clear(I);
1103 
1104     if (success)
1105     {
1106         nmod_mpoly_repack_bits_inplace(G, Gbits, ctx);
1107         nmod_mpoly_make_monic(G, G, ctx);
1108     }
1109 
1110     return success;
1111 }
1112 
1113 
nmod_mpoly_gcd(nmod_mpoly_t G,const nmod_mpoly_t A,const nmod_mpoly_t B,const nmod_mpoly_ctx_t ctx)1114 int nmod_mpoly_gcd(
1115     nmod_mpoly_t G,
1116     const nmod_mpoly_t A,
1117     const nmod_mpoly_t B,
1118     const nmod_mpoly_ctx_t ctx)
1119 {
1120     flint_bitcnt_t Gbits;
1121     int success;
1122     thread_pool_handle * handles;
1123     slong num_handles;
1124     slong thread_limit;
1125 
1126     thread_limit = FLINT_MIN(A->length, B->length)/256;
1127 
1128     if (nmod_mpoly_is_zero(A, ctx))
1129     {
1130         if (nmod_mpoly_is_zero(B, ctx))
1131             nmod_mpoly_zero(G, ctx);
1132         else
1133             nmod_mpoly_make_monic(G, B, ctx);
1134         return 1;
1135     }
1136 
1137     if (nmod_mpoly_is_zero(B, ctx))
1138     {
1139         nmod_mpoly_make_monic(G, A, ctx);
1140         return 1;
1141     }
1142 
1143     Gbits = FLINT_MIN(A->bits, B->bits);
1144 
1145     if (A->bits <= FLINT_BITS && B->bits <= FLINT_BITS)
1146     {
1147         num_handles = flint_request_threads(&handles, thread_limit);
1148         success = _nmod_mpoly_gcd_threaded_pool(G, Gbits, A, B, ctx,
1149                                                          handles, num_handles);
1150         flint_give_back_threads(handles, num_handles);
1151         return success;
1152     }
1153 
1154     if (A->length == 1)
1155     {
1156         return _try_monomial_gcd(G, Gbits, B, A, ctx);
1157     }
1158     else if (B->length == 1)
1159     {
1160         return _try_monomial_gcd(G, Gbits, A, B, ctx);
1161     }
1162     else if (_try_monomial_cofactors(G, Gbits, A, B, ctx))
1163     {
1164         return 1;
1165     }
1166     else
1167     {
1168         /*
1169             The gcd calculation is unusual.
1170             First see if both inputs fit into FLINT_BITS.
1171             Then, try deflation as a last resort.
1172         */
1173 
1174         int success;
1175         slong k;
1176         fmpz * Ashift, * Astride;
1177         fmpz * Bshift, * Bstride;
1178         fmpz * Gshift, * Gstride;
1179         nmod_mpoly_t Anew, Bnew;
1180         const nmod_mpoly_struct * Ause, * Buse;
1181 
1182         nmod_mpoly_init(Anew, ctx);
1183         nmod_mpoly_init(Bnew, ctx);
1184 
1185         Ause = A;
1186         if (A->bits > FLINT_BITS)
1187         {
1188             if (!nmod_mpoly_repack_bits(Anew, A, FLINT_BITS, ctx))
1189                 goto could_not_repack;
1190             Ause = Anew;
1191         }
1192 
1193         Buse = B;
1194         if (B->bits > FLINT_BITS)
1195         {
1196             if (!nmod_mpoly_repack_bits(Bnew, B, FLINT_BITS, ctx))
1197                 goto could_not_repack;
1198             Buse = Bnew;
1199         }
1200 
1201         num_handles = flint_request_threads(&handles, thread_limit);
1202         Gbits = FLINT_MIN(Ause->bits, Buse->bits);
1203         success = _nmod_mpoly_gcd_threaded_pool(G, Gbits, Ause, Buse, ctx,
1204                                                          handles, num_handles);
1205         flint_give_back_threads(handles, num_handles);
1206 
1207         goto cleanup;
1208 
1209 could_not_repack:
1210 
1211         /*
1212             One of A or B could not be repacked into FLINT_BITS. See if
1213             they both fit into FLINT_BITS after deflation.
1214         */
1215 
1216         Ashift  = _fmpz_vec_init(ctx->minfo->nvars);
1217         Astride = _fmpz_vec_init(ctx->minfo->nvars);
1218         Bshift  = _fmpz_vec_init(ctx->minfo->nvars);
1219         Bstride = _fmpz_vec_init(ctx->minfo->nvars);
1220         Gshift  = _fmpz_vec_init(ctx->minfo->nvars);
1221         Gstride = _fmpz_vec_init(ctx->minfo->nvars);
1222 
1223         nmod_mpoly_deflation(Ashift, Astride, A, ctx);
1224         nmod_mpoly_deflation(Bshift, Bstride, B, ctx);
1225         _fmpz_vec_min(Gshift, Ashift, Bshift, ctx->minfo->nvars);
1226         for (k = 0; k < ctx->minfo->nvars; k++)
1227         {
1228             fmpz_gcd(Gstride + k, Astride + k, Bstride + k);
1229         }
1230 
1231         success = 0;
1232 
1233         nmod_mpoly_deflate(Anew, A, Ashift, Gstride, ctx);
1234         if (Anew->bits > FLINT_BITS)
1235         {
1236             if (!nmod_mpoly_repack_bits(Anew, Anew, FLINT_BITS, ctx))
1237                 goto deflate_cleanup;
1238         }
1239 
1240         nmod_mpoly_deflate(Bnew, B, Bshift, Gstride, ctx);
1241         if (Bnew->bits > FLINT_BITS)
1242         {
1243             if (!nmod_mpoly_repack_bits(Bnew, Bnew, FLINT_BITS, ctx))
1244                 goto deflate_cleanup;
1245         }
1246 
1247         num_handles = flint_request_threads(&handles, thread_limit);
1248         Gbits = FLINT_MIN(Anew->bits, Bnew->bits);
1249         success = _nmod_mpoly_gcd_threaded_pool(G, Gbits, Anew, Bnew, ctx,
1250                                                          handles, num_handles);
1251         flint_give_back_threads(handles, num_handles);
1252 
1253         if (success)
1254         {
1255             nmod_mpoly_inflate(G, G, Gshift, Gstride, ctx);
1256             nmod_mpoly_make_monic(G, G, ctx);
1257         }
1258 
1259 deflate_cleanup:
1260 
1261         _fmpz_vec_clear(Ashift, ctx->minfo->nvars);
1262         _fmpz_vec_clear(Astride, ctx->minfo->nvars);
1263         _fmpz_vec_clear(Bshift, ctx->minfo->nvars);
1264         _fmpz_vec_clear(Bstride, ctx->minfo->nvars);
1265         _fmpz_vec_clear(Gshift, ctx->minfo->nvars);
1266         _fmpz_vec_clear(Gstride, ctx->minfo->nvars);
1267 
1268 cleanup:
1269 
1270         nmod_mpoly_clear(Anew, ctx);
1271         nmod_mpoly_clear(Bnew, ctx);
1272 
1273         return success;
1274     }
1275 }
1276 
1277