1 /*
2 Copyright (C) 2018 Daniel Schultz
3
4 This file is part of FLINT.
5
6 FLINT is free software: you can redistribute it and/or modify it under
7 the terms of the GNU Lesser General Public License (LGPL) as published
8 by the Free Software Foundation; either version 2.1 of the License, or
9 (at your option) any later version. See <http://www.gnu.org/licenses/>.
10 */
11
12 #include "fmpz_mpoly.h"
13
14 /*
15 For each j, set out[j] to the evaluation of A at x_i = alpha[i] (i != j)
16 i.e. if nvars = 3
17 out[0] = A(x, alpha[1], alpha[2])
18 out[1] = A(alpha[0], x, alpha[2])
19 out[2] = A(alpha[0], alpha[1], x)
20
21 If ignore[j] is nonzero, then out[j] need not be calculated, probably
22 because we shouldn't calculate it in dense form.
23 */
fmpz_mpoly_evals(nmod_poly_struct * out,const int * ignore,const fmpz_mpoly_t A,ulong * Amin_exp,ulong * Amax_exp,ulong * Astride,mp_limb_t * alpha,const fmpz_mpoly_ctx_t ctx,const thread_pool_handle * handles,slong num_handles)24 void fmpz_mpoly_evals(
25 nmod_poly_struct * out,
26 const int * ignore,
27 const fmpz_mpoly_t A,
28 ulong * Amin_exp,
29 ulong * Amax_exp,
30 ulong * Astride,
31 mp_limb_t * alpha,
32 const fmpz_mpoly_ctx_t ctx,
33 const thread_pool_handle * handles,
34 slong num_handles)
35 {
36 slong i, j;
37 slong nvars = ctx->minfo->nvars;
38 slong total_limit, total_length;
39 int use_direct_LUT;
40 ulong varexp;
41 ulong mask;
42 slong * offsets, * shifts;
43 slong N = mpoly_words_per_exp_sp(A->bits, ctx->minfo);
44 ulong * Aexp = A->exps;
45 fmpz * Acoeff = A->coeffs;
46 mp_limb_t meval;
47 mp_limb_t t;
48
49 FLINT_ASSERT(A->bits <= FLINT_BITS);
50
51 mask = (-UWORD(1)) >> (FLINT_BITS - A->bits);
52 offsets = (slong *) flint_malloc(ctx->minfo->nvars*sizeof(slong));
53 shifts = (slong *) flint_malloc(ctx->minfo->nvars*sizeof(slong));
54
55 for (j = 0; j < ctx->minfo->nvars; j++)
56 {
57 nmod_poly_zero(out + j);
58 mpoly_gen_offset_shift_sp(offsets + j, shifts + j, j, A->bits, ctx->minfo);
59 }
60
61 /*
62 two cases:
63 (1) the Amax_exp[j] are small enough to calculate a direct LUT
64 (2) use a LUT for exponents that are powers of two
65 */
66
67 total_limit = A->length/256;
68 total_limit = FLINT_MAX(WORD(9999), total_limit);
69 total_length = 0;
70 use_direct_LUT = 1;
71 for (j = 0; j < ctx->minfo->nvars; j++)
72 {
73 total_length += Amax_exp[j] + 1;
74 if ((ulong) total_length > (ulong) total_limit)
75 use_direct_LUT = 0;
76 }
77
78 if (use_direct_LUT)
79 {
80 slong off;
81 mp_limb_t * LUT, ** LUTvalue, ** LUTvalueinv;
82
83 /* value of powers of alpha[j] */
84 LUT = (mp_limb_t *) flint_malloc(2*total_length*sizeof(mp_limb_t));
85
86 /* pointers into LUT */
87 LUTvalue = (mp_limb_t **) flint_malloc(nvars*sizeof(mp_limb_t *));
88 LUTvalueinv = (mp_limb_t **) flint_malloc(nvars*sizeof(mp_limb_t *));
89
90 off = 0;
91 for (j = 0; j < nvars; j++)
92 {
93 ulong k;
94 mp_limb_t alphainvj = nmod_inv(alpha[j], (out + 0)->mod);
95
96 LUTvalue[j] = LUT + off;
97 LUTvalueinv[j] = LUT + total_length + off;
98 LUTvalue[j][0] = 1;
99 LUTvalueinv[j][0] = 1;
100 for (k = 0; k < Amax_exp[j]; k++)
101 {
102 LUTvalue[j][k + 1] = nmod_mul(LUTvalue[j][k], alpha[j],
103 (out + 0)->mod);
104 LUTvalueinv[j][k + 1] = nmod_mul(LUTvalueinv[j][k], alphainvj,
105 (out + 0)->mod);
106 }
107
108 off += Amax_exp[j] + 1;
109 }
110 FLINT_ASSERT(off == total_length);
111
112 for (i = 0; i < A->length; i++)
113 {
114 meval = fmpz_fdiv_ui(Acoeff + i, out->mod.n);
115
116 for (j = 0; j < nvars; j++)
117 {
118 varexp = ((Aexp + N*i)[offsets[j]]>>shifts[j])&mask;
119 FLINT_ASSERT(varexp <= Amax_exp[j]);
120 meval = nmod_mul(meval, LUTvalue[j][varexp], (out + 0)->mod);
121 }
122
123 for (j = 0; j < nvars; j++)
124 {
125 varexp = ((Aexp + N*i)[offsets[j]]>>shifts[j])&mask;
126
127 if (ignore[j])
128 continue;
129
130 t = nmod_mul(meval, LUTvalueinv[j][varexp], (out + j)->mod);
131
132 FLINT_ASSERT((Astride[j] == 0 && varexp == Amin_exp[j])
133 || (varexp - Amin_exp[j]) % Astride[j] == 0);
134
135 varexp = Astride[j] < 2 ? varexp - Amin_exp[j] :
136 (varexp - Amin_exp[j])/Astride[j];
137
138 t = nmod_add(t, nmod_poly_get_coeff_ui(out + j, varexp),
139 (out + j)->mod);
140 nmod_poly_set_coeff_ui(out + j, varexp, t);
141 }
142 }
143
144 flint_free(LUT);
145 flint_free(LUTvalue);
146 flint_free(LUTvalueinv);
147 }
148 else
149 {
150 slong LUTlen;
151 ulong * LUTmask;
152 slong * LUToffset, * LUTvar;
153 mp_limb_t * LUTvalue, * LUTvalueinv;
154 mp_limb_t * vieval;
155 mp_limb_t t, xpoweval, xinvpoweval;
156
157 LUToffset = (slong *) flint_malloc(N*FLINT_BITS*sizeof(slong));
158 LUTmask = (ulong *) flint_malloc(N*FLINT_BITS*sizeof(ulong));
159 LUTvalue = (mp_limb_t *) flint_malloc(N*FLINT_BITS*sizeof(mp_limb_t));
160 LUTvar = (slong *) flint_malloc(N*FLINT_BITS*sizeof(slong));
161 LUTvalueinv = (mp_limb_t *) flint_malloc(N*FLINT_BITS*sizeof(mp_limb_t));
162
163 vieval = (mp_limb_t *) flint_malloc(nvars*sizeof(mp_limb_t));
164
165 LUTlen = 0;
166 for (j = nvars - 1; j >= 0; j--)
167 {
168 flint_bitcnt_t bits = FLINT_BIT_COUNT(Amax_exp[j]);
169 xpoweval = alpha[j]; /* xpoweval = alpha[j]^(2^i) */
170 xinvpoweval = nmod_inv(xpoweval, (out + 0)->mod); /* alpha[j]^(-2^i) */
171 for (i = 0; i < bits; i++)
172 {
173 LUToffset[LUTlen] = offsets[j];
174 LUTmask[LUTlen] = (UWORD(1) << (shifts[j] + i));
175 LUTvalue[LUTlen] = xpoweval;
176 LUTvalueinv[LUTlen] = xinvpoweval;
177 LUTvar[LUTlen] = j;
178 LUTlen++;
179 xpoweval = nmod_mul(xpoweval, xpoweval, (out + 0)->mod);
180 xinvpoweval = nmod_mul(xinvpoweval, xinvpoweval, (out + 0)->mod);
181 }
182
183 vieval[j] = 1;
184 }
185 FLINT_ASSERT(LUTlen < N*FLINT_BITS);
186
187 for (i = 0; i < A->length; i++)
188 {
189 meval = fmpz_fdiv_ui(Acoeff + i, (out + 0)->mod.n);
190
191 for (j = 0; j < LUTlen; j++)
192 {
193 if (((Aexp + N*i)[LUToffset[j]] & LUTmask[j]) != 0)
194 {
195 meval = nmod_mul(meval, LUTvalue[j], (out + 0)->mod);
196 vieval[LUTvar[j]] = nmod_mul(vieval[LUTvar[j]],
197 LUTvalueinv[j], (out + 0)->mod);
198 }
199 }
200
201 for (j = 0; j < nvars; j++)
202 {
203 varexp = ((Aexp + N*i)[offsets[j]]>>shifts[j])&mask;
204
205 FLINT_ASSERT((Astride[j] == 0 && varexp == Amin_exp[j])
206 || (varexp - Amin_exp[j]) % Astride[j] == 0);
207
208 varexp = Astride[j] < 2 ? varexp - Amin_exp[j] :
209 (varexp - Amin_exp[j])/Astride[j];
210
211 t = nmod_mul(meval, vieval[j], (out + j)->mod);
212 t = nmod_add(t, nmod_poly_get_coeff_ui(out + j, varexp),
213 (out + j)->mod);
214 nmod_poly_set_coeff_ui(out + j, varexp, t);
215 vieval[j] = 1;
216 }
217 }
218
219 flint_free(LUToffset);
220 flint_free(LUTmask);
221 flint_free(LUTvalue);
222 flint_free(LUTvar);
223 flint_free(LUTvalueinv);
224
225 flint_free(vieval);
226 }
227
228 flint_free(offsets);
229 flint_free(shifts);
230 }
231
232
mpoly_gcd_info_set_estimates_fmpz_mpoly(mpoly_gcd_info_t I,const fmpz_mpoly_t A,const fmpz_mpoly_t B,const fmpz_mpoly_ctx_t ctx,const thread_pool_handle * handles,slong num_handles)233 void mpoly_gcd_info_set_estimates_fmpz_mpoly(
234 mpoly_gcd_info_t I,
235 const fmpz_mpoly_t A,
236 const fmpz_mpoly_t B,
237 const fmpz_mpoly_ctx_t ctx,
238 const thread_pool_handle * handles,
239 slong num_handles)
240 {
241 int try_count = 0;
242 slong i, j;
243 nmod_poly_t Geval;
244 nmod_poly_struct * Aevals, * Bevals;
245 mp_limb_t p = UWORD(1) << (FLINT_BITS - 1);
246 mp_limb_t * alpha;
247 flint_rand_t randstate;
248 slong ignore_limit;
249 int * ignore;
250
251 flint_randinit(randstate);
252
253 ignore = (int *) flint_malloc(ctx->minfo->nvars*sizeof(int));
254 alpha = (mp_limb_t *) flint_malloc(ctx->minfo->nvars*sizeof(mp_limb_t));
255 Aevals = (nmod_poly_struct *) flint_malloc(
256 ctx->minfo->nvars*sizeof(nmod_poly_struct));
257 Bevals = (nmod_poly_struct *) flint_malloc(
258 ctx->minfo->nvars*sizeof(nmod_poly_struct));
259
260 nmod_poly_init(Geval, p);
261 for (j = 0; j < ctx->minfo->nvars; j++)
262 {
263 nmod_poly_init(Aevals + j, p);
264 nmod_poly_init(Bevals + j, p);
265 }
266
267 ignore_limit = A->length/4096 + B->length/4096;
268 ignore_limit = FLINT_MAX(WORD(9999), ignore_limit);
269 I->Gdeflate_deg_bounds_are_nice = 1;
270 for (j = 0; j < ctx->minfo->nvars; j++)
271 {
272 if ( I->Adeflate_deg[j] > ignore_limit
273 || I->Bdeflate_deg[j] > ignore_limit)
274 {
275 ignore[j] = 1;
276 I->Gdeflate_deg_bounds_are_nice = 0;
277 }
278 else
279 {
280 ignore[j] = 0;
281 }
282 }
283
284 try_again:
285
286 if (++try_count > 10)
287 {
288 I->Gdeflate_deg_bounds_are_nice = 0;
289 for (j = 0; j < ctx->minfo->nvars; j++)
290 {
291 I->Gdeflate_deg_bound[j] = FLINT_MIN(I->Adeflate_deg[j],
292 I->Bdeflate_deg[j]);
293 I->Gterm_count_est[j] = (I->Gdeflate_deg_bound[j] + 1)/2;
294 }
295
296 goto cleanup;
297 }
298
299 p = n_nextprime(p, 1);
300 nmod_init(&Geval->mod, p);
301 for (j = 0; j < ctx->minfo->nvars; j++)
302 {
303 alpha[j] = n_urandint(randstate, p - 1) + 1;
304 nmod_init(&(Aevals + j)->mod, p);
305 nmod_init(&(Bevals + j)->mod, p);
306 }
307
308 fmpz_mpoly_evals(Aevals, ignore, A, I->Amin_exp, I->Amax_exp, I->Gstride,
309 alpha, ctx, handles, num_handles);
310 fmpz_mpoly_evals(Bevals, ignore, B, I->Bmin_exp, I->Bmax_exp, I->Gstride,
311 alpha, ctx, handles, num_handles);
312
313 for (j = 0; j < ctx->minfo->nvars; j++)
314 {
315 if (ignore[j])
316 {
317 I->Gdeflate_deg_bound[j] = FLINT_MIN(I->Adeflate_deg[j],
318 I->Bdeflate_deg[j]);
319 I->Gterm_count_est[j] = (I->Gdeflate_deg_bound[j] + 1)/2;
320 }
321 else
322 {
323 if ( I->Adeflate_deg[j] != nmod_poly_degree(Aevals + j)
324 || I->Bdeflate_deg[j] != nmod_poly_degree(Bevals + j))
325 {
326 goto try_again;
327 }
328
329 nmod_poly_gcd(Geval, Aevals + j, Bevals + j);
330
331 I->Gterm_count_est[j] = 0;
332 I->Gdeflate_deg_bound[j] = nmod_poly_degree(Geval);
333 for (i = I->Gdeflate_deg_bound[j]; i >= 0; i--)
334 {
335 I->Gterm_count_est[j] += (Geval->coeffs[i] != 0);
336 }
337 }
338 }
339
340 cleanup:
341
342 nmod_poly_clear(Geval);
343 for (j = 0; j < ctx->minfo->nvars; j++)
344 {
345 nmod_poly_clear(Aevals + j);
346 nmod_poly_clear(Bevals + j);
347 }
348
349 flint_free(ignore);
350 flint_free(alpha);
351 flint_free(Aevals);
352 flint_free(Bevals);
353
354 flint_randclear(randstate);
355
356 return;
357 }
358
359
360 /*********************** Easy when B is a monomial ***************************/
_try_monomial_gcd(fmpz_mpoly_t G,flint_bitcnt_t Gbits,const fmpz_mpoly_t A,const fmpz_mpoly_t B,const fmpz_mpoly_ctx_t ctx)361 static int _try_monomial_gcd(
362 fmpz_mpoly_t G, flint_bitcnt_t Gbits,
363 const fmpz_mpoly_t A,
364 const fmpz_mpoly_t B,
365 const fmpz_mpoly_ctx_t ctx)
366 {
367 slong i;
368 fmpz_t g;
369 fmpz * minAfields, * minAdegs, * minBdegs;
370 TMP_INIT;
371
372 FLINT_ASSERT(A->length > 0);
373 FLINT_ASSERT(B->length == 1);
374
375 TMP_START;
376
377 /* get the field-wise minimum of A */
378 minAfields = (fmpz *) TMP_ALLOC(ctx->minfo->nfields*sizeof(fmpz));
379 for (i = 0; i < ctx->minfo->nfields; i++)
380 fmpz_init(minAfields + i);
381 mpoly_min_fields_fmpz(minAfields, A->exps, A->length, A->bits, ctx->minfo);
382
383 /* unpack to get the min degrees of each variable in A */
384 minAdegs = (fmpz *) TMP_ALLOC(ctx->minfo->nvars*sizeof(fmpz));
385 for (i = 0; i < ctx->minfo->nvars; i++)
386 fmpz_init(minAdegs + i);
387 mpoly_get_monomial_ffmpz_unpacked_ffmpz(minAdegs, minAfields, ctx->minfo);
388
389 /* get the degree of each variable in B */
390 minBdegs = (fmpz *) TMP_ALLOC(ctx->minfo->nvars*sizeof(fmpz));
391 for (i = 0; i < ctx->minfo->nvars; i++)
392 fmpz_init(minBdegs + i);
393 mpoly_get_monomial_ffmpz(minBdegs, B->exps, B->bits, ctx->minfo);
394
395 /* compute the degree of each variable in G */
396 _fmpz_vec_min_inplace(minBdegs, minAdegs, ctx->minfo->nvars);
397
398 /* compute the coefficient of G */
399 fmpz_init_set(g, B->coeffs + 0);
400 _fmpz_vec_content_chained(g, A->coeffs, A->length);
401
402 /* write G */
403 fmpz_mpoly_fit_length(G, 1, ctx);
404 fmpz_mpoly_fit_bits(G, Gbits, ctx);
405 G->bits = Gbits;
406 mpoly_set_monomial_ffmpz(G->exps, minBdegs, Gbits, ctx->minfo);
407 fmpz_swap(G->coeffs + 0, g);
408 fmpz_clear(g);
409 _fmpz_mpoly_set_length(G, 1, ctx);
410
411 for (i = 0; i < ctx->minfo->nfields; i++)
412 {
413 fmpz_clear(minAfields + i);
414 }
415 for (i = 0; i < ctx->minfo->nvars; i++)
416 {
417 fmpz_clear(minAdegs + i);
418 fmpz_clear(minBdegs + i);
419 }
420
421 TMP_END;
422
423 return 1;
424 }
425
426
427 /********************** See if cofactors are monomials ***********************/
_try_monomial_cofactors(fmpz_mpoly_t G,flint_bitcnt_t Gbits,const fmpz_mpoly_t A,const fmpz_mpoly_t B,const fmpz_mpoly_ctx_t ctx)428 static int _try_monomial_cofactors(
429 fmpz_mpoly_t G, flint_bitcnt_t Gbits,
430 const fmpz_mpoly_t A,
431 const fmpz_mpoly_t B,
432 const fmpz_mpoly_ctx_t ctx)
433 {
434 int success;
435 slong i, j;
436 slong NA, NG;
437 slong nvars = ctx->minfo->nvars;
438 fmpz * Abarexps, * Bbarexps, * Texps;
439 fmpz_t t1, t2;
440 fmpz_t gA, gB;
441 fmpz_mpoly_t T;
442 TMP_INIT;
443
444 FLINT_ASSERT(A->length > 0);
445 FLINT_ASSERT(B->length > 0);
446
447 if (A->length != B->length)
448 return 0;
449
450 fmpz_init(t1);
451 fmpz_init(t2);
452 fmpz_init_set(gA, A->coeffs + 0);
453 fmpz_init_set(gB, B->coeffs + 0);
454
455 for (i = A->length - 1; i > 0; i--)
456 {
457 fmpz_mul(t1, A->coeffs + 0, B->coeffs + i);
458 fmpz_mul(t2, B->coeffs + 0, A->coeffs + i);
459 success = fmpz_equal(t1, t2);
460 if (!success)
461 goto cleanup;
462
463 fmpz_gcd(gA, gA, A->coeffs + i);
464 fmpz_gcd(gB, gB, B->coeffs + i);
465 }
466
467 TMP_START;
468
469 Abarexps = (fmpz *) TMP_ALLOC(3*nvars*sizeof(fmpz));
470 Bbarexps = Abarexps + 1*nvars;
471 Texps = Abarexps + 2*nvars;
472 for (j = 0; j < nvars; j++)
473 {
474 fmpz_init(Abarexps + j);
475 fmpz_init(Bbarexps + j);
476 fmpz_init(Texps + j);
477 }
478
479 success = mpoly_monomial_cofactors(Abarexps, Bbarexps, A->exps, A->bits,
480 B->exps, B->bits, A->length, ctx->minfo);
481 if (!success)
482 goto cleanup_tmp;
483
484 /* put A's cofactor coefficient in t1 */
485 fmpz_gcd(t2, gA, gB);
486 fmpz_divexact(t1, gA, t2);
487 if (fmpz_sgn(A->coeffs + 0) < 0)
488 fmpz_neg(t1, t1);
489
490 fmpz_mpoly_init3(T, A->length, Gbits, ctx);
491 NG = mpoly_words_per_exp(Gbits, ctx->minfo);
492 NA = mpoly_words_per_exp(A->bits, ctx->minfo);
493 T->length = A->length;
494 for (i = 0; i < A->length; i++)
495 {
496 mpoly_get_monomial_ffmpz(Texps, A->exps + NA*i, A->bits, ctx->minfo);
497 _fmpz_vec_sub(Texps, Texps, Abarexps, nvars);
498 mpoly_set_monomial_ffmpz(T->exps + NG*i, Texps, Gbits, ctx->minfo);
499 fmpz_divexact(T->coeffs + i, A->coeffs + i, t1);
500 }
501 fmpz_mpoly_swap(G, T, ctx);
502 fmpz_mpoly_clear(T, ctx);
503
504 success = 1;
505
506 cleanup_tmp:
507
508 for (j = 0; j < nvars; j++)
509 {
510 fmpz_clear(Abarexps + j);
511 fmpz_clear(Bbarexps + j);
512 fmpz_clear(Texps + j);
513 }
514
515 TMP_END;
516
517 cleanup:
518
519 fmpz_clear(t1);
520 fmpz_clear(t2);
521 fmpz_clear(gA);
522 fmpz_clear(gB);
523
524 return success;
525 }
526
527
528 /********* Assume B has length one when converted to univar format ***********/
_try_missing_var(fmpz_mpoly_t G,flint_bitcnt_t Gbits,slong var,const fmpz_mpoly_t A,ulong Ashift,const fmpz_mpoly_t B,ulong Bshift,const fmpz_mpoly_ctx_t ctx)529 static int _try_missing_var(
530 fmpz_mpoly_t G, flint_bitcnt_t Gbits,
531 slong var,
532 const fmpz_mpoly_t A, ulong Ashift,
533 const fmpz_mpoly_t B, ulong Bshift,
534 const fmpz_mpoly_ctx_t ctx)
535 {
536 int success;
537 slong i;
538 fmpz_mpoly_t tG;
539 fmpz_mpoly_univar_t Ax;
540
541 fmpz_mpoly_init(tG, ctx);
542 fmpz_mpoly_univar_init(Ax, ctx);
543
544 fmpz_mpoly_to_univar(Ax, A, var, ctx);
545
546 FLINT_ASSERT(Ax->length > 0);
547 success = _fmpz_mpoly_gcd_threaded_pool(tG, Gbits, B, Ax->coeffs + 0, ctx, NULL, 0);
548 if (!success)
549 goto cleanup;
550
551 for (i = 1; i < Ax->length; i++)
552 {
553 success = _fmpz_mpoly_gcd_threaded_pool(tG, Gbits, tG, Ax->coeffs + i, ctx, NULL, 0);
554 if (!success)
555 goto cleanup;
556 }
557
558 fmpz_mpoly_swap(G, tG, ctx);
559 _mpoly_gen_shift_left(G->exps, G->bits, G->length,
560 var, FLINT_MIN(Ashift, Bshift), ctx->minfo);
561
562 cleanup:
563
564 fmpz_mpoly_clear(tG, ctx);
565 fmpz_mpoly_univar_clear(Ax, ctx);
566
567 return success;
568 }
569
570
571 /******************* Test if B divides A or A divides B **********************/
_try_divides(fmpz_mpoly_t G,const fmpz_mpoly_t A,int try_a,const fmpz_mpoly_t B,int try_b,const fmpz_mpoly_ctx_t ctx,const thread_pool_handle * handles,slong num_handles)572 static int _try_divides(
573 fmpz_mpoly_t G,
574 const fmpz_mpoly_t A, int try_a,
575 const fmpz_mpoly_t B, int try_b,
576 const fmpz_mpoly_ctx_t ctx,
577 const thread_pool_handle * handles,
578 slong num_handles)
579 {
580 int success;
581 fmpz_t cA, cB, cG;
582 fmpz_mpoly_t Q;
583 fmpz_mpoly_t AA, BB;
584 slong AA_alloc, BB_alloc;
585
586 *AA = *A;
587 *BB = *B;
588 fmpz_init(cA);
589 fmpz_init(cB);
590 fmpz_init(cG);
591 fmpz_mpoly_init(Q, ctx);
592
593 _fmpz_vec_content(cA, A->coeffs, A->length);
594 _fmpz_vec_content(cB, B->coeffs, B->length);
595 fmpz_gcd(cG, cA, cB);
596
597 AA_alloc = 0;
598 if (!fmpz_is_one(cA))
599 {
600 AA_alloc = A->length;
601 AA->coeffs = _fmpz_vec_init(A->length);
602 _fmpz_vec_scalar_divexact_fmpz(AA->coeffs, A->coeffs, A->length, cA);
603 FLINT_ASSERT(AA_alloc > 0);
604 }
605
606 BB_alloc = 0;
607 if (!fmpz_is_one(cB))
608 {
609 BB_alloc = B->length;
610 BB->coeffs = _fmpz_vec_init(B->length);
611 _fmpz_vec_scalar_divexact_fmpz(BB->coeffs, B->coeffs, B->length, cB);
612 FLINT_ASSERT(BB_alloc > 0);
613 }
614
615 fmpz_divexact(cA, cA, cG);
616 fmpz_divexact(cB, cB, cG);
617
618 if (try_b &&
619 ((num_handles > 0) ? _fmpz_mpoly_divides_heap_threaded_pool(Q, AA, BB,
620 ctx, handles, num_handles)
621 : fmpz_mpoly_divides_monagan_pearce(Q, AA, BB, ctx)))
622 {
623 fmpz_mpoly_scalar_divexact_fmpz(G, B, cB, ctx);
624 success = 1;
625 goto cleanup;
626 }
627
628 if (try_a &&
629 ((num_handles > 0) ? _fmpz_mpoly_divides_heap_threaded_pool(Q, BB, AA,
630 ctx, handles, num_handles)
631 : fmpz_mpoly_divides_monagan_pearce(Q, BB, AA, ctx)))
632 {
633 fmpz_mpoly_scalar_divexact_fmpz(G, A, cA, ctx);
634 success = 1;
635 goto cleanup;
636 }
637
638 success = 0;
639
640 cleanup:
641
642 if (AA_alloc > 0)
643 _fmpz_vec_clear(AA->coeffs, AA_alloc);
644
645 if (BB_alloc > 0)
646 _fmpz_vec_clear(BB->coeffs, BB_alloc);
647
648 fmpz_mpoly_clear(Q, ctx);
649 fmpz_clear(cA);
650 fmpz_clear(cB);
651 fmpz_clear(cG);
652
653 return success;
654 }
655
656
657 /********************** Hit A and B with zippel ******************************/
_try_zippel(fmpz_mpoly_t G,const fmpz_mpoly_t A,const fmpz_mpoly_t B,const mpoly_gcd_info_t I,const fmpz_mpoly_ctx_t ctx)658 static int _try_zippel(
659 fmpz_mpoly_t G,
660 const fmpz_mpoly_t A,
661 const fmpz_mpoly_t B,
662 const mpoly_gcd_info_t I,
663 const fmpz_mpoly_ctx_t ctx)
664 {
665 slong i, k;
666 slong m = I->mvars;
667 int success;
668 mpoly_zipinfo_t zinfo;
669 flint_bitcnt_t wbits;
670 flint_rand_t randstate;
671 fmpz_mpoly_ctx_t uctx;
672 fmpz_mpolyu_t Au, Bu, Gu, Abaru, Bbaru;
673 fmpz_mpoly_t Ac, Bc, Gc;
674
675 FLINT_ASSERT(A->bits <= FLINT_BITS);
676 FLINT_ASSERT(B->bits <= FLINT_BITS);
677
678 if (!I->can_use_zippel)
679 return 0;
680
681 FLINT_ASSERT(m >= WORD(2));
682 FLINT_ASSERT(A->length > 0);
683 FLINT_ASSERT(B->length > 0);
684
685 flint_randinit(randstate);
686
687 /* interpolation will continue in m variables */
688 mpoly_zipinfo_init(zinfo, m);
689
690 /* uctx is context for Z[y_1,...,y_{m-1}]*/
691 fmpz_mpoly_ctx_init(uctx, m - 1, ORD_LEX);
692
693 /* fill in a valid zinfo->perm and degrees */
694 for (i = 0; i < m; i++)
695 {
696 k = I->zippel_perm[i];
697 zinfo->perm[i] = k;
698 zinfo->Adegs[i] = I->Adeflate_deg[k];
699 zinfo->Bdegs[i] = I->Bdeflate_deg[k];
700 FLINT_ASSERT(I->Adeflate_deg[k] != 0);
701 FLINT_ASSERT(I->Bdeflate_deg[k] != 0);
702 }
703
704 wbits = FLINT_MAX(A->bits, B->bits);
705
706 fmpz_mpolyu_init(Au, wbits, uctx);
707 fmpz_mpolyu_init(Bu, wbits, uctx);
708 fmpz_mpolyu_init(Gu, wbits, uctx);
709 fmpz_mpolyu_init(Abaru, wbits, uctx);
710 fmpz_mpolyu_init(Bbaru, wbits, uctx);
711 fmpz_mpoly_init3(Ac, 0, wbits, uctx);
712 fmpz_mpoly_init3(Bc, 0, wbits, uctx);
713 fmpz_mpoly_init3(Gc, 0, wbits, uctx);
714
715 fmpz_mpoly_to_mpolyu_perm_deflate_threaded_pool(Au, uctx, A, ctx, zinfo->perm,
716 I->Amin_exp, I->Gstride, I->Amax_exp, NULL, 0);
717 fmpz_mpoly_to_mpolyu_perm_deflate_threaded_pool(Bu, uctx, B, ctx, zinfo->perm,
718 I->Bmin_exp, I->Gstride, I->Bmax_exp, NULL, 0);
719
720 success = fmpz_mpolyu_content_mpoly_threaded_pool(Ac, Au, uctx, NULL, 0);
721 success = success &&
722 fmpz_mpolyu_content_mpoly_threaded_pool(Bc, Bu, uctx, NULL, 0);
723 if (!success)
724 goto cleanup;
725
726 fmpz_mpolyu_divexact_mpoly_inplace(Au, Ac, uctx);
727 fmpz_mpolyu_divexact_mpoly_inplace(Bu, Bc, uctx);
728
729 /* after removing content, degree bounds in zinfo are still valid bounds */
730 success = fmpz_mpolyu_gcdm_zippel(Gu, Abaru, Bbaru, Au, Bu,
731 uctx, zinfo, randstate);
732 if (!success)
733 goto cleanup;
734
735 success = _fmpz_mpoly_gcd_threaded_pool(Gc, wbits, Ac, Bc, uctx, NULL, 0);
736 if (!success)
737 goto cleanup;
738
739 fmpz_mpolyu_mul_mpoly_inplace(Gu, Gc, uctx);
740
741 fmpz_mpoly_from_mpolyu_perm_inflate(G, I->Gbits, ctx, Gu, uctx,
742 zinfo->perm, I->Gmin_exp, I->Gstride);
743 success = 1;
744
745 cleanup:
746
747 fmpz_mpolyu_clear(Au, uctx);
748 fmpz_mpolyu_clear(Bu, uctx);
749 fmpz_mpolyu_clear(Gu, uctx);
750 fmpz_mpolyu_clear(Abaru, uctx);
751 fmpz_mpolyu_clear(Bbaru, uctx);
752 fmpz_mpoly_clear(Ac, uctx);
753 fmpz_mpoly_clear(Bc, uctx);
754 fmpz_mpoly_clear(Gc, uctx);
755
756 fmpz_mpoly_ctx_clear(uctx);
757
758 mpoly_zipinfo_clear(zinfo);
759
760 flint_randclear(randstate);
761
762 return success;
763 }
764
765
766 /************************ Hit A and B with bma *******************************/
767 typedef struct
768 {
769 const fmpz_mpoly_struct * P;
770 fmpz_mpoly_struct * Pcontent;
771 fmpz_mpolyu_struct * Puu;
772 const slong * perm;
773 const ulong * shift, * stride, * maxexps;
774 const fmpz_mpoly_ctx_struct * ctx;
775 const fmpz_mpoly_ctx_struct * uctx;
776 const thread_pool_handle * handles;
777 slong num_handles;
778 int success;
779 }
780 _convertuu_arg_struct;
781
782 typedef _convertuu_arg_struct _convertuu_arg_t[1];
783
_worker_convertuu(void * varg)784 static void _worker_convertuu(void * varg)
785 {
786 _convertuu_arg_struct * arg = (_convertuu_arg_struct *) varg;
787
788 fmpz_mpoly_to_mpolyuu_perm_deflate_threaded_pool(arg->Puu, arg->uctx, arg->P, arg->ctx,
789 arg->perm, arg->shift, arg->stride, arg->maxexps,
790 arg->handles, arg->num_handles);
791
792 arg->success = fmpz_mpolyu_content_mpoly_threaded_pool(arg->Pcontent,
793 arg->Puu, arg->uctx, arg->handles, arg->num_handles);
794 if (arg->success)
795 {
796 fmpz_mpolyu_divexact_mpoly_inplace(arg->Puu, arg->Pcontent, arg->uctx);
797 }
798 }
799
_try_bma(fmpz_mpoly_t G,const fmpz_mpoly_t A,const fmpz_mpoly_t B,const mpoly_gcd_info_t I,const fmpz_mpoly_ctx_t ctx,const thread_pool_handle * handles,slong num_handles)800 static int _try_bma(
801 fmpz_mpoly_t G,
802 const fmpz_mpoly_t A,
803 const fmpz_mpoly_t B,
804 const mpoly_gcd_info_t I,
805 const fmpz_mpoly_ctx_t ctx,
806 const thread_pool_handle * handles,
807 slong num_handles)
808 {
809 slong i, k;
810 slong m = I->mvars;
811 int success;
812 flint_bitcnt_t wbits;
813 fmpz_mpoly_ctx_t uctx;
814 fmpz_mpolyu_t Auu, Buu, Guu, Abaruu, Bbaruu;
815 fmpz_mpoly_t Ac, Bc, Gc, Gamma;
816 slong max_minor_degree;
817
818 FLINT_ASSERT(A->bits <= FLINT_BITS);
819 FLINT_ASSERT(B->bits <= FLINT_BITS);
820 FLINT_ASSERT(A->length > 0);
821 FLINT_ASSERT(B->length > 0);
822
823 if (!I->can_use_bma)
824 return 0;
825
826 FLINT_ASSERT(m >= WORD(3));
827
828 /* uctx is context for Z[y_2,...,y_{m - 1}]*/
829 fmpz_mpoly_ctx_init(uctx, m - 2, ORD_LEX);
830
831 max_minor_degree = 0;
832 for (i = 2; i < m; i++)
833 {
834 k = I->bma_perm[i];
835 max_minor_degree = FLINT_MAX(max_minor_degree, I->Adeflate_deg[k]);
836 max_minor_degree = FLINT_MAX(max_minor_degree, I->Bdeflate_deg[k]);
837 }
838
839 wbits = 1 + FLINT_BIT_COUNT(max_minor_degree);
840 wbits = FLINT_MAX(MPOLY_MIN_BITS, wbits);
841 wbits = mpoly_fix_bits(wbits, uctx->minfo);
842 FLINT_ASSERT(wbits <= FLINT_BITS);
843
844 fmpz_mpolyu_init(Auu, wbits, uctx);
845 fmpz_mpolyu_init(Buu, wbits, uctx);
846 fmpz_mpolyu_init(Guu, wbits, uctx);
847 fmpz_mpolyu_init(Abaruu, wbits, uctx);
848 fmpz_mpolyu_init(Bbaruu, wbits, uctx);
849 fmpz_mpoly_init3(Ac, 0, wbits, uctx);
850 fmpz_mpoly_init3(Bc, 0, wbits, uctx);
851 fmpz_mpoly_init3(Gc, 0, wbits, uctx);
852 fmpz_mpoly_init3(Gamma, 0, wbits, uctx);
853
854 /* convert to bivariate format and remove content from A and B */
855 if (num_handles > 0)
856 {
857 slong s = mpoly_divide_threads(num_handles, A->length, B->length);
858 _convertuu_arg_t arg;
859
860 FLINT_ASSERT(s >= 0);
861 FLINT_ASSERT(s < num_handles);
862
863 arg->ctx = ctx;
864 arg->uctx = uctx;
865 arg->P = B;
866 arg->Puu = Buu;
867 arg->Pcontent = Bc;
868 arg->perm = I->bma_perm;
869 arg->shift = I->Bmin_exp;
870 arg->stride = I->Gstride;
871 arg->maxexps = I->Bmax_exp;
872 arg->handles = handles + (s + 1);
873 arg->num_handles = num_handles - (s + 1);
874
875 thread_pool_wake(global_thread_pool, handles[s], 0, _worker_convertuu, arg);
876
877 fmpz_mpoly_to_mpolyuu_perm_deflate_threaded_pool(Auu, uctx, A, ctx,
878 I->bma_perm, I->Amin_exp, I->Gstride, I->Amax_exp,
879 handles + 0, s);
880 success = fmpz_mpolyu_content_mpoly_threaded_pool(Ac, Auu,
881 uctx, handles + 0, s);
882 if (success)
883 {
884 fmpz_mpolyu_divexact_mpoly_inplace(Auu, Ac, uctx);
885 }
886
887 thread_pool_wait(global_thread_pool, handles[s]);
888
889 success = success && arg->success;
890 if (!success)
891 goto cleanup;
892 }
893 else
894 {
895 fmpz_mpoly_to_mpolyuu_perm_deflate_threaded_pool(Auu, uctx, A, ctx,
896 I->bma_perm, I->Amin_exp, I->Gstride, I->Amax_exp, NULL, 0);
897 fmpz_mpoly_to_mpolyuu_perm_deflate_threaded_pool(Buu, uctx, B, ctx,
898 I->bma_perm, I->Bmin_exp, I->Gstride, I->Bmax_exp, NULL, 0);
899
900 success = fmpz_mpolyu_content_mpoly_threaded_pool(Ac, Auu, uctx, NULL, 0);
901 success = success &&
902 fmpz_mpolyu_content_mpoly_threaded_pool(Bc, Buu, uctx, NULL, 0);
903 if (!success)
904 goto cleanup;
905
906 fmpz_mpolyu_divexact_mpoly_inplace(Auu, Ac, uctx);
907 fmpz_mpolyu_divexact_mpoly_inplace(Buu, Bc, uctx);
908 }
909
910 FLINT_ASSERT(Auu->bits == wbits);
911 FLINT_ASSERT(Buu->bits == wbits);
912 FLINT_ASSERT(Auu->length > 1);
913 FLINT_ASSERT(Buu->length > 1);
914 FLINT_ASSERT(Ac->bits == wbits);
915 FLINT_ASSERT(Bc->bits == wbits);
916
917 _fmpz_mpoly_gcd_threaded_pool(Gamma, wbits, Auu->coeffs + 0, Buu->coeffs + 0,
918 uctx, handles, num_handles);
919 if (!success)
920 goto cleanup;
921
922 success = (num_handles > 0)
923 ? fmpz_mpolyuu_gcd_berlekamp_massey_threaded_pool(Guu, Abaruu, Bbaruu,
924 Auu, Buu, Gamma, uctx, handles, num_handles)
925 : fmpz_mpolyuu_gcd_berlekamp_massey(Guu, Abaruu, Bbaruu,
926 Auu, Buu, Gamma, uctx);
927 if (!success)
928 goto cleanup;
929
930 success = _fmpz_mpoly_gcd_threaded_pool(Gc, wbits, Ac, Bc, uctx, handles, num_handles);
931 if (!success)
932 goto cleanup;
933
934 fmpz_mpolyu_mul_mpoly_inplace(Guu, Gc, uctx);
935
936 fmpz_mpoly_from_mpolyuu_perm_inflate(G, I->Gbits, ctx, Guu, uctx,
937 I->bma_perm, I->Gmin_exp, I->Gstride);
938 success = 1;
939
940 cleanup:
941
942 fmpz_mpolyu_clear(Auu, uctx);
943 fmpz_mpolyu_clear(Buu, uctx);
944 fmpz_mpolyu_clear(Guu, uctx);
945 fmpz_mpolyu_clear(Abaruu, uctx);
946 fmpz_mpolyu_clear(Bbaruu, uctx);
947 fmpz_mpoly_clear(Ac, uctx);
948 fmpz_mpoly_clear(Bc, uctx);
949 fmpz_mpoly_clear(Gc, uctx);
950 fmpz_mpoly_clear(Gamma, uctx);
951
952 fmpz_mpoly_ctx_clear(uctx);
953
954 return success;
955 }
956
957
958 /*********************** Hit A and B with brown ******************************/
959 typedef struct
960 {
961 fmpz_mpoly_struct * Pl;
962 const fmpz_mpoly_ctx_struct * lctx;
963 const fmpz_mpoly_struct * P;
964 const fmpz_mpoly_ctx_struct * ctx;
965 const slong * perm;
966 const ulong * shift, * stride, * maxexps;
967 const thread_pool_handle * handles;
968 slong num_handles;
969 }
970 _convertl_arg_struct;
971
972 typedef _convertl_arg_struct _convertl_arg_t[1];
973
_worker_convertu(void * varg)974 static void _worker_convertu(void * varg)
975 {
976 _convertl_arg_struct * arg = (_convertl_arg_struct *) varg;
977
978 fmpz_mpoly_to_mpoly_perm_deflate_threaded_pool(arg->Pl, arg->lctx, arg->P, arg->ctx,
979 arg->perm, arg->shift, arg->stride,
980 arg->handles, arg->num_handles);
981 }
982
_try_brown(fmpz_mpoly_t G,const fmpz_mpoly_t A,const fmpz_mpoly_t B,mpoly_gcd_info_t I,const fmpz_mpoly_ctx_t ctx,const thread_pool_handle * handles,slong num_handles)983 static int _try_brown(
984 fmpz_mpoly_t G,
985 const fmpz_mpoly_t A,
986 const fmpz_mpoly_t B,
987 mpoly_gcd_info_t I,
988 const fmpz_mpoly_ctx_t ctx,
989 const thread_pool_handle * handles,
990 slong num_handles)
991 {
992 int success;
993 slong m = I->mvars;
994 flint_bitcnt_t wbits;
995 fmpz_mpoly_ctx_t lctx;
996 fmpz_mpoly_t Al, Bl, Gl, Abarl, Bbarl;
997
998 if (!I->can_use_brown)
999 return 0;
1000
1001 FLINT_ASSERT(m >= 2);
1002 FLINT_ASSERT(A->bits <= FLINT_BITS);
1003 FLINT_ASSERT(B->bits <= FLINT_BITS);
1004 FLINT_ASSERT(A->length > 0);
1005 FLINT_ASSERT(B->length > 0);
1006
1007 wbits = FLINT_MAX(A->bits, B->bits);
1008
1009 fmpz_mpoly_ctx_init(lctx, m, ORD_LEX);
1010 fmpz_mpoly_init3(Al, 0, wbits, lctx);
1011 fmpz_mpoly_init3(Bl, 0, wbits, lctx);
1012 fmpz_mpoly_init3(Gl, 0, wbits, lctx);
1013 fmpz_mpoly_init3(Abarl, 0, wbits, lctx);
1014 fmpz_mpoly_init3(Bbarl, 0, wbits, lctx);
1015
1016 /* convert to univariate format */
1017 if (num_handles > 0)
1018 {
1019 slong s = mpoly_divide_threads(num_handles, A->length, B->length);
1020 _convertl_arg_t arg;
1021
1022 FLINT_ASSERT(s >= 0);
1023 FLINT_ASSERT(s < num_handles);
1024
1025 arg->Pl = Bl;
1026 arg->lctx = lctx;
1027 arg->P = B;
1028 arg->ctx = ctx;
1029 arg->perm = I->brown_perm;
1030 arg->shift = I->Bmin_exp;
1031 arg->stride = I->Gstride;
1032 arg->maxexps = I->Bmax_exp;
1033 arg->handles = handles + (s + 1);
1034 arg->num_handles = num_handles - (s + 1);
1035
1036 thread_pool_wake(global_thread_pool, handles[s], 0, _worker_convertu, arg);
1037
1038 fmpz_mpoly_to_mpoly_perm_deflate_threaded_pool(Al, lctx, A, ctx,
1039 I->brown_perm, I->Amin_exp, I->Gstride,
1040 handles + 0, s);
1041
1042 thread_pool_wait(global_thread_pool, handles[s]);
1043 }
1044 else
1045 {
1046 fmpz_mpoly_to_mpoly_perm_deflate_threaded_pool(Al, lctx, A, ctx,
1047 I->brown_perm, I->Amin_exp, I->Gstride, NULL, 0);
1048 fmpz_mpoly_to_mpoly_perm_deflate_threaded_pool(Bl, lctx, B, ctx,
1049 I->brown_perm, I->Bmin_exp, I->Gstride, NULL, 0);
1050 }
1051
1052 FLINT_ASSERT(Al->bits == wbits);
1053 FLINT_ASSERT(Bl->bits == wbits);
1054 FLINT_ASSERT(Al->length > 1);
1055 FLINT_ASSERT(Bl->length > 1);
1056
1057 success = (num_handles > 0)
1058 ? fmpz_mpolyl_gcd_brown_threaded_pool(Gl, Abarl, Bbarl, Al, Bl, lctx, I,
1059 handles, num_handles)
1060 : fmpz_mpolyl_gcd_brown(Gl, Abarl, Bbarl, Al, Bl, lctx, I);
1061
1062 if (!success)
1063 goto cleanup;
1064
1065 fmpz_mpoly_from_mpoly_perm_inflate(G, I->Gbits, ctx, Gl, lctx,
1066 I->brown_perm, I->Gmin_exp, I->Gstride);
1067 success = 1;
1068
1069 cleanup:
1070
1071 fmpz_mpoly_clear(Al, lctx);
1072 fmpz_mpoly_clear(Bl, lctx);
1073 fmpz_mpoly_clear(Gl, lctx);
1074 fmpz_mpoly_clear(Abarl, lctx);
1075 fmpz_mpoly_clear(Bbarl, lctx);
1076 fmpz_mpoly_ctx_clear(lctx);
1077
1078 return success;
1079 }
1080
1081
1082 /*
1083 The function must pack a successful answer into bits = Gbits <= FLINT_BITS.
1084 Both A and B have to be packed into bits <= FLINT_BITS.
1085 return is 1 for success, 0 for failure.
1086 */
_fmpz_mpoly_gcd_threaded_pool(fmpz_mpoly_t G,flint_bitcnt_t Gbits,const fmpz_mpoly_t A,const fmpz_mpoly_t B,const fmpz_mpoly_ctx_t ctx,const thread_pool_handle * handles,slong num_handles)1087 int _fmpz_mpoly_gcd_threaded_pool(
1088 fmpz_mpoly_t G, flint_bitcnt_t Gbits,
1089 const fmpz_mpoly_t A,
1090 const fmpz_mpoly_t B,
1091 const fmpz_mpoly_ctx_t ctx,
1092 const thread_pool_handle * handles,
1093 slong num_handles)
1094 {
1095 int success;
1096 slong v_in_both;
1097 slong v_in_either;
1098 slong v_in_A_only;
1099 slong v_in_B_only;
1100 slong j;
1101 slong nvars = ctx->minfo->nvars;
1102 mpoly_gcd_info_t I;
1103
1104 FLINT_ASSERT(A->length > 0);
1105 FLINT_ASSERT(B->length > 0);
1106 FLINT_ASSERT(Gbits <= FLINT_BITS);
1107 FLINT_ASSERT(A->bits <= FLINT_BITS);
1108 FLINT_ASSERT(B->bits <= FLINT_BITS);
1109
1110 if (A->length == 1)
1111 {
1112 return _try_monomial_gcd(G, Gbits, B, A, ctx);
1113 }
1114 else if (B->length == 1)
1115 {
1116 return _try_monomial_gcd(G, Gbits, A, B, ctx);
1117 }
1118
1119 mpoly_gcd_info_init(I, nvars);
1120
1121 /* entries of I are all now invalid */
1122
1123 I->Gbits = Gbits;
1124
1125 mpoly_gcd_info_limits(I->Amax_exp, I->Amin_exp, I->Alead_count,
1126 I->Atail_count, A->exps, A->bits, A->length, ctx->minfo);
1127 mpoly_gcd_info_limits(I->Bmax_exp, I->Bmin_exp, I->Blead_count,
1128 I->Btail_count, B->exps, B->bits, B->length, ctx->minfo);
1129
1130 /* set ess(p) := p/term_content(p) */
1131
1132 /* check if the cofactors could be monomials, i.e. ess(A) == ess(B) */
1133 for (j = 0; j < nvars; j++)
1134 {
1135 if (I->Amax_exp[j] - I->Amin_exp[j] != I->Bmax_exp[j] - I->Bmin_exp[j])
1136 goto skip_monomial_cofactors;
1137 }
1138 if (_try_monomial_cofactors(G, I->Gbits, A, B, ctx))
1139 {
1140 goto successful;
1141 }
1142
1143 skip_monomial_cofactors:
1144
1145 mpoly_gcd_info_stride(I->Gstride,
1146 A->exps, A->bits, A->length, I->Amax_exp, I->Amin_exp,
1147 B->exps, B->bits, B->length, I->Bmax_exp, I->Bmin_exp, ctx->minfo);
1148
1149 for (j = 0; j < nvars; j++)
1150 {
1151 ulong t = I->Gstride[j];
1152
1153 if (t == 0)
1154 {
1155 FLINT_ASSERT( I->Amax_exp[j] == I->Amin_exp[j]
1156 || I->Bmax_exp[j] == I->Bmin_exp[j]);
1157 }
1158 else
1159 {
1160 FLINT_ASSERT((I->Amax_exp[j] - I->Amin_exp[j]) % t == 0);
1161 FLINT_ASSERT((I->Bmax_exp[j] - I->Bmin_exp[j]) % t == 0);
1162 }
1163
1164 I->Adeflate_deg[j] = t == 0 ? 0 : (I->Amax_exp[j] - I->Amin_exp[j])/t;
1165 I->Bdeflate_deg[j] = t == 0 ? 0 : (I->Bmax_exp[j] - I->Bmin_exp[j])/t;
1166 I->Gmin_exp[j] = FLINT_MIN(I->Amin_exp[j], I->Bmin_exp[j]);
1167 }
1168
1169 /*
1170 The following are now valid:
1171 I->Amax_exp, I->Amin_exp, I->Alead_count, I->Atail_count,
1172 I->Bmax_exp, I->Bmin_exp, I->Blead_count, I->Btail_count,
1173 I->Gstride
1174 I->Adeflate_deg
1175 I->Bdeflate_deg
1176 I->Gmin_exp
1177 */
1178
1179 /* check if ess(A) and ess(B) have a variable v_in_both in common */
1180 v_in_both = -WORD(1);
1181 for (j = 0; j < nvars; j++)
1182 {
1183 if (I->Amax_exp[j] > I->Amin_exp[j] && I->Bmax_exp[j] > I->Bmin_exp[j])
1184 {
1185 v_in_both = j;
1186 break;
1187 }
1188 }
1189 if (v_in_both == -WORD(1))
1190 {
1191 /*
1192 The variables in ess(A) and ess(B) are disjoint.
1193 gcd is trivial to compute.
1194 */
1195 fmpz_t gA, gB;
1196
1197 calculate_trivial_gcd:
1198
1199 fmpz_init(gA);
1200 fmpz_init(gB);
1201 _fmpz_vec_content(gA, A->coeffs, A->length);
1202 _fmpz_vec_content(gB, B->coeffs, B->length);
1203
1204 fmpz_mpoly_fit_length(G, 1, ctx);
1205 fmpz_mpoly_fit_bits(G, Gbits, ctx);
1206 G->bits = Gbits;
1207 mpoly_set_monomial_ui(G->exps, I->Gmin_exp, Gbits, ctx->minfo);
1208 fmpz_gcd(G->coeffs + 0, gA, gB);
1209 _fmpz_mpoly_set_length(G, 1, ctx);
1210
1211 fmpz_clear(gA);
1212 fmpz_clear(gB);
1213
1214 goto successful;
1215 }
1216
1217 /* check if ess(A) and ess(B) depend on another variable v_in_either */
1218 FLINT_ASSERT(0 <= v_in_both);
1219 FLINT_ASSERT(v_in_both < nvars);
1220
1221 v_in_either = -WORD(1);
1222 for (j = 0; j < nvars; j++)
1223 {
1224 if (j == v_in_both)
1225 continue;
1226
1227 if (I->Amax_exp[j] > I->Amin_exp[j] || I->Bmax_exp[j] > I->Bmin_exp[j])
1228 {
1229 v_in_either = j;
1230 break;
1231 }
1232 }
1233
1234 if (v_in_either == -WORD(1))
1235 {
1236 /*
1237 The ess(A) and ess(B) depend on only one variable v_in_both
1238 Calculate gcd using univariates
1239 */
1240 fmpz_poly_t a, b, g;
1241
1242 fmpz_poly_init(a);
1243 fmpz_poly_init(b);
1244 fmpz_poly_init(g);
1245 _fmpz_mpoly_to_fmpz_poly_deflate(a, A, v_in_both,
1246 I->Amin_exp, I->Gstride, ctx);
1247 _fmpz_mpoly_to_fmpz_poly_deflate(b, B, v_in_both,
1248 I->Bmin_exp, I->Gstride, ctx);
1249 fmpz_poly_gcd(g, a, b);
1250 _fmpz_mpoly_from_fmpz_poly_inflate(G, Gbits, g, v_in_both,
1251 I->Gmin_exp, I->Gstride, ctx);
1252 fmpz_poly_clear(a);
1253 fmpz_poly_clear(b);
1254 fmpz_poly_clear(g);
1255
1256 goto successful;
1257 }
1258
1259 /* check if there is a variable in ess(A) that is not in ess(B) */
1260 v_in_A_only = -WORD(1);
1261 v_in_B_only = -WORD(1);
1262 for (j = 0; j < nvars; j++)
1263 {
1264 if (I->Amax_exp[j] > I->Amin_exp[j] && I->Bmax_exp[j] == I->Bmin_exp[j])
1265 {
1266 v_in_A_only = j;
1267 break;
1268 }
1269 if (I->Bmax_exp[j] > I->Bmin_exp[j] && I->Amax_exp[j] == I->Amin_exp[j])
1270 {
1271 v_in_B_only = j;
1272 break;
1273 }
1274 }
1275 if (v_in_A_only != -WORD(1))
1276 {
1277 success = _try_missing_var(G, I->Gbits,
1278 v_in_A_only,
1279 A, I->Amin_exp[v_in_A_only],
1280 B, I->Bmin_exp[v_in_A_only],
1281 ctx);
1282 goto cleanup;
1283 }
1284 if (v_in_B_only != -WORD(1))
1285 {
1286 success = _try_missing_var(G, I->Gbits,
1287 v_in_B_only,
1288 B, I->Bmin_exp[v_in_B_only],
1289 A, I->Amin_exp[v_in_B_only],
1290 ctx);
1291 goto cleanup;
1292 }
1293
1294 /*
1295 all variable are now either
1296 missing from both ess(A) and ess(B), or
1297 present in both ess(A) and ess(B)
1298 and there are at least two in the latter case
1299 */
1300
1301 mpoly_gcd_info_set_estimates_fmpz_mpoly(I, A, B, ctx, handles, num_handles);
1302 mpoly_gcd_info_set_perm(I, A->length, B->length, ctx->minfo);
1303
1304 /* everything in I is valid now */
1305
1306 /* check divisibility A/B and B/A */
1307 {
1308 int gcd_is_trivial = 1;
1309 int try_a = I->Gdeflate_deg_bounds_are_nice;
1310 int try_b = I->Gdeflate_deg_bounds_are_nice;
1311 for (j = 0; j < nvars; j++)
1312 {
1313 if (I->Gdeflate_deg_bound[j] != 0)
1314 {
1315 gcd_is_trivial = 0;
1316 }
1317
1318 if (I->Adeflate_deg[j] != I->Gdeflate_deg_bound[j]
1319 || I->Amin_exp[j] > I->Bmin_exp[j])
1320 {
1321 try_a = 0;
1322 }
1323
1324 if (I->Bdeflate_deg[j] != I->Gdeflate_deg_bound[j]
1325 || I->Bmin_exp[j] > I->Amin_exp[j])
1326 {
1327 try_b = 0;
1328 }
1329 }
1330
1331 if (gcd_is_trivial)
1332 goto calculate_trivial_gcd;
1333
1334 if ((try_a || try_b) &&
1335 _try_divides(G, A, try_a, B, try_b, ctx, handles, num_handles))
1336 {
1337 goto successful;
1338 }
1339 }
1340
1341 mpoly_gcd_info_measure_brown(I, A->length, B->length, ctx->minfo);
1342 mpoly_gcd_info_measure_bma(I, A->length, B->length, ctx->minfo);
1343
1344 if (I->mvars == 2)
1345 {
1346 /* TODO: bivariate heuristic here */
1347
1348 if (_try_brown(G, A, B, I, ctx, handles, num_handles))
1349 goto successful;
1350 }
1351 else if (I->can_use_brown && I->can_use_bma
1352 && I->bma_time_est < I->brown_time_est
1353 && (I->mvars*(I->Adensity + I->Bdensity) < 1
1354 || I->bma_time_est < 0.01*I->brown_time_est))
1355 {
1356 if (_try_bma(G, A, B, I, ctx, handles, num_handles))
1357 goto successful;
1358
1359 if (_try_brown(G, A, B, I, ctx, handles, num_handles))
1360 goto successful;
1361 }
1362 else
1363 {
1364 if (_try_brown(G, A, B, I, ctx, handles, num_handles))
1365 goto successful;
1366
1367 if (_try_bma(G, A, B, I, ctx, handles, num_handles))
1368 goto successful;
1369 }
1370
1371 mpoly_gcd_info_measure_zippel(I, A->length, B->length, ctx->minfo);
1372
1373 if (_try_zippel(G, A, B, I, ctx))
1374 goto successful;
1375
1376 success = 0;
1377 goto cleanup;
1378
1379 successful:
1380
1381 success = 1;
1382
1383 cleanup:
1384
1385 mpoly_gcd_info_clear(I);
1386
1387 if (success)
1388 {
1389 fmpz_mpoly_repack_bits_inplace(G, Gbits, ctx);
1390 FLINT_ASSERT(G->length > 0);
1391 if (fmpz_sgn(G->coeffs + 0) < 0)
1392 fmpz_mpoly_neg(G, G, ctx);
1393 }
1394
1395 return success;
1396 }
1397
1398
fmpz_mpoly_gcd(fmpz_mpoly_t G,const fmpz_mpoly_t A,const fmpz_mpoly_t B,const fmpz_mpoly_ctx_t ctx)1399 int fmpz_mpoly_gcd(
1400 fmpz_mpoly_t G,
1401 const fmpz_mpoly_t A,
1402 const fmpz_mpoly_t B,
1403 const fmpz_mpoly_ctx_t ctx)
1404 {
1405 flint_bitcnt_t Gbits;
1406 int success;
1407 thread_pool_handle * handles;
1408 slong num_handles;
1409 slong thread_limit;
1410
1411 thread_limit = FLINT_MIN(A->length, B->length)/256;
1412
1413 if (fmpz_mpoly_is_zero(A, ctx))
1414 {
1415 if (B->length == 0)
1416 {
1417 fmpz_mpoly_zero(G, ctx);
1418 return 1;
1419 }
1420 if (fmpz_sgn(B->coeffs + 0) < 0)
1421 {
1422 fmpz_mpoly_neg(G, B, ctx);
1423 return 1;
1424 }
1425 else
1426 {
1427 fmpz_mpoly_set(G, B, ctx);
1428 return 1;
1429 }
1430 }
1431
1432 if (fmpz_mpoly_is_zero(B, ctx))
1433 {
1434 if (fmpz_sgn(A->coeffs + 0) < 0)
1435 {
1436 fmpz_mpoly_neg(G, A, ctx);
1437 return 1;
1438 }
1439 else
1440 {
1441 fmpz_mpoly_set(G, A, ctx);
1442 return 1;
1443 }
1444 }
1445
1446 if (fmpz_mpoly_is_fmpz(A, ctx))
1447 {
1448 fmpz_t t;
1449 fmpz_init_set(t, A->coeffs);
1450 _fmpz_vec_content_chained(t, B->coeffs, B->length);
1451 fmpz_mpoly_set_fmpz(G, t, ctx);
1452 fmpz_clear(t);
1453 return 1;
1454 }
1455
1456 if (fmpz_mpoly_is_fmpz(B, ctx))
1457 {
1458 fmpz_t t;
1459 fmpz_init_set(t, B->coeffs);
1460 _fmpz_vec_content_chained(t, A->coeffs, A->length);
1461 fmpz_mpoly_set_fmpz(G, t, ctx);
1462 fmpz_clear(t);
1463 return 1;
1464 }
1465
1466 Gbits = FLINT_MIN(A->bits, B->bits);
1467
1468 if (A->length == 1)
1469 {
1470 return _try_monomial_gcd(G, Gbits, B, A, ctx);
1471 }
1472 else if (B->length == 1)
1473 {
1474 return _try_monomial_gcd(G, Gbits, A, B, ctx);
1475 }
1476
1477 if (A->bits <= FLINT_BITS && B->bits <= FLINT_BITS)
1478 {
1479 num_handles = flint_request_threads(&handles, thread_limit);
1480 success = _fmpz_mpoly_gcd_threaded_pool(G, Gbits, A, B, ctx, handles, num_handles);
1481 flint_give_back_threads(handles, num_handles);
1482 return success;
1483 }
1484
1485 if (_try_monomial_cofactors(G, Gbits, A, B, ctx))
1486 {
1487 return 1;
1488 }
1489 else
1490 {
1491 /*
1492 The gcd calculation is unusual.
1493 First see if both inputs fit into FLINT_BITS.
1494 Then, try deflation as a last resort.
1495 */
1496
1497 slong k;
1498 fmpz * Ashift, * Astride;
1499 fmpz * Bshift, * Bstride;
1500 fmpz * Gshift, * Gstride;
1501 fmpz_mpoly_t Anew, Bnew;
1502 const fmpz_mpoly_struct * Ause, * Buse;
1503
1504 fmpz_mpoly_init(Anew, ctx);
1505 fmpz_mpoly_init(Bnew, ctx);
1506
1507 Ause = A;
1508 if (A->bits > FLINT_BITS)
1509 {
1510 if (!fmpz_mpoly_repack_bits(Anew, A, FLINT_BITS, ctx))
1511 goto could_not_repack;
1512 Ause = Anew;
1513 }
1514
1515 Buse = B;
1516 if (B->bits > FLINT_BITS)
1517 {
1518 if (!fmpz_mpoly_repack_bits(Bnew, B, FLINT_BITS, ctx))
1519 goto could_not_repack;
1520 Buse = Bnew;
1521 }
1522
1523 num_handles = flint_request_threads(&handles, thread_limit);
1524 Gbits = FLINT_MIN(Ause->bits, Buse->bits);
1525 success = _fmpz_mpoly_gcd_threaded_pool(G, Gbits, Ause, Buse, ctx,
1526 handles, num_handles);
1527 flint_give_back_threads(handles, num_handles);
1528
1529 goto cleanup;
1530
1531 could_not_repack:
1532
1533 /*
1534 One of A or B could not be repacked into FLINT_BITS. See if
1535 they both fit into FLINT_BITS after deflation.
1536 */
1537
1538 Ashift = _fmpz_vec_init(ctx->minfo->nvars);
1539 Astride = _fmpz_vec_init(ctx->minfo->nvars);
1540 Bshift = _fmpz_vec_init(ctx->minfo->nvars);
1541 Bstride = _fmpz_vec_init(ctx->minfo->nvars);
1542 Gshift = _fmpz_vec_init(ctx->minfo->nvars);
1543 Gstride = _fmpz_vec_init(ctx->minfo->nvars);
1544
1545 fmpz_mpoly_deflation(Ashift, Astride, A, ctx);
1546 fmpz_mpoly_deflation(Bshift, Bstride, B, ctx);
1547 _fmpz_vec_min(Gshift, Ashift, Bshift, ctx->minfo->nvars);
1548 for (k = 0; k < ctx->minfo->nvars; k++)
1549 {
1550 fmpz_gcd(Gstride + k, Astride + k, Bstride + k);
1551 }
1552
1553 success = 0;
1554
1555 fmpz_mpoly_deflate(Anew, A, Ashift, Gstride, ctx);
1556 if (Anew->bits > FLINT_BITS)
1557 {
1558 if (!fmpz_mpoly_repack_bits(Anew, Anew, FLINT_BITS, ctx))
1559 goto deflate_cleanup;
1560 }
1561
1562 fmpz_mpoly_deflate(Bnew, B, Bshift, Gstride, ctx);
1563 if (Bnew->bits > FLINT_BITS)
1564 {
1565 if (!fmpz_mpoly_repack_bits(Bnew, Bnew, FLINT_BITS, ctx))
1566 goto deflate_cleanup;
1567 }
1568
1569 num_handles = flint_request_threads(&handles, thread_limit);
1570 Gbits = FLINT_MIN(Anew->bits, Bnew->bits);
1571 success = _fmpz_mpoly_gcd_threaded_pool(G, Gbits, Anew, Bnew, ctx,
1572 handles, num_handles);
1573 flint_give_back_threads(handles, num_handles);
1574
1575 if (success)
1576 {
1577 fmpz_mpoly_inflate(G, G, Gshift, Gstride, ctx);
1578
1579 /* inflation may have changed the lc */
1580 FLINT_ASSERT(G->length > 0);
1581 if (fmpz_sgn(G->coeffs + 0) < 0)
1582 {
1583 fmpz_mpoly_neg(G, G, ctx);
1584 }
1585 }
1586
1587 deflate_cleanup:
1588
1589 _fmpz_vec_clear(Ashift, ctx->minfo->nvars);
1590 _fmpz_vec_clear(Astride, ctx->minfo->nvars);
1591 _fmpz_vec_clear(Bshift, ctx->minfo->nvars);
1592 _fmpz_vec_clear(Bstride, ctx->minfo->nvars);
1593 _fmpz_vec_clear(Gshift, ctx->minfo->nvars);
1594 _fmpz_vec_clear(Gstride, ctx->minfo->nvars);
1595
1596 cleanup:
1597
1598 fmpz_mpoly_clear(Anew, ctx);
1599 fmpz_mpoly_clear(Bnew, ctx);
1600
1601 return success;
1602 }
1603 }
1604
1605