1 /*
2 Copyright (C) 2019 Daniel Schultz
3
4 This file is part of FLINT.
5
6 FLINT is free software: you can redistribute it and/or modify it under
7 the terms of the GNU Lesser General Public License (LGPL) as published
8 by the Free Software Foundation; either version 2.1 of the License, or
9 (at your option) any later version. See <http://www.gnu.org/licenses/>.
10 */
11
12 #include "fmpz_mpoly.h"
13 #include "nmod_mpoly.h"
14 #include "thread_pool.h"
15 #include "fmpq.h"
16
17 typedef struct
18 {
19 volatile int gcd_is_one;
20 volatile mp_limb_t p;
21 #if HAVE_PTHREAD
22 pthread_mutex_t mutex;
23 #endif
24 fmpz_t gamma;
25 const fmpz_mpoly_ctx_struct * ctx;
26 fmpz_mpoly_struct * A, * B;
27 ulong num_threads;
28 slong var;
29 const mpoly_gcd_info_struct * I;
30 }
31 _splitbase_struct;
32
33 typedef _splitbase_struct _splitbase_t[1];
34
35 typedef struct
36 {
37 slong idx;
38 _splitbase_struct * base;
39 fmpz_mpoly_t G, Abar, Bbar;
40 fmpz_t modulus;
41 slong image_count;
42 slong required_images;
43 thread_pool_handle master_handle;
44 slong num_handles;
45 thread_pool_handle * worker_handles;
46
47 nmod_mpoly_ctx_t pctx;
48 nmod_mpolyn_t Ap, Bp, Gp, Abarp, Bbarp;
49 fmpz_mpoly_t T, T1, T2;
50 }
51 _splitworker_arg_struct;
52
53
54
55 /* worker for reducing polynomial over ZZ to polynomial over Fp */
_reduce_Bp_worker(void * varg)56 static void _reduce_Bp_worker(void * varg)
57 {
58 _splitworker_arg_struct * arg = (_splitworker_arg_struct *) varg;
59 fmpz_mpoly_interp_reduce_p_mpolyn(arg->Bp, arg->pctx, arg->base->B,
60 arg->base->ctx);
61 }
62
63 /* workers for crt'ing polynomials */
_join_Abar_worker(void * varg)64 static void _join_Abar_worker(void * varg)
65 {
66 _splitworker_arg_struct * arg = (_splitworker_arg_struct *) varg;
67 if (!fmpz_is_one(arg->modulus))
68 fmpz_mpoly_interp_crt_p_mpolyn(arg->Abar, arg->T1, arg->base->ctx,
69 arg->modulus, arg->Abarp, arg->pctx);
70 else
71 fmpz_mpoly_interp_lift_p_mpolyn(arg->Abar, arg->base->ctx,
72 arg->Abarp, arg->pctx);
73 }
74
_join_Bbar_worker(void * varg)75 static void _join_Bbar_worker(void * varg)
76 {
77 _splitworker_arg_struct * arg = (_splitworker_arg_struct *) varg;
78 if (!fmpz_is_one(arg->modulus))
79 fmpz_mpoly_interp_crt_p_mpolyn(arg->Bbar, arg->T2, arg->base->ctx,
80 arg->modulus, arg->Bbarp, arg->pctx);
81 else
82 fmpz_mpoly_interp_lift_p_mpolyn(arg->Bbar, arg->base->ctx,
83 arg->Bbarp, arg->pctx);
84 }
85
86
_splitworker(void * varg)87 static void _splitworker(void * varg)
88 {
89 _splitworker_arg_struct * arg = (_splitworker_arg_struct *) varg;
90 _splitbase_struct * base = arg->base;
91 const fmpz_mpoly_ctx_struct * ctx = base->ctx;
92 flint_bitcnt_t bits = base->A->bits;
93 slong N = mpoly_words_per_exp_sp(bits, ctx->minfo);
94 slong offset, shift;
95 int success;
96 mp_limb_t p, gammared;
97 nmod_poly_stack_t Sp;
98
99 mpoly_gen_offset_shift_sp(&offset, &shift,
100 ctx->minfo->nvars - 1, bits, ctx->minfo);
101
102 fmpz_one(arg->modulus);
103 arg->image_count = 0;
104
105 nmod_mpoly_ctx_init(arg->pctx, ctx->minfo->nvars, ORD_LEX, 2);
106 nmod_poly_stack_init(Sp, bits, arg->pctx);
107 nmod_mpolyn_init(arg->Ap, bits, arg->pctx);
108 nmod_mpolyn_init(arg->Bp, bits, arg->pctx);
109 nmod_mpolyn_init(arg->Gp, bits, arg->pctx);
110 nmod_mpolyn_init(arg->Abarp, bits, arg->pctx);
111 nmod_mpolyn_init(arg->Bbarp, bits, arg->pctx);
112 fmpz_mpoly_init3(arg->T, 0, bits, ctx);
113 fmpz_mpoly_init3(arg->T1, 0, bits, ctx);
114 fmpz_mpoly_init3(arg->T2, 0, bits, ctx);
115
116 while (arg->image_count < arg->required_images)
117 {
118 /* get prime */
119 #if HAVE_PTHREAD
120 pthread_mutex_lock(&base->mutex);
121 #endif
122 p = base->p;
123 if (p >= UWORD_MAX_PRIME)
124 {
125 #if HAVE_PTHREAD
126 pthread_mutex_unlock(&base->mutex);
127 #endif
128 break;
129 }
130 p = n_nextprime(base->p, 1);
131 base->p = p;
132 #if HAVE_PTHREAD
133 pthread_mutex_unlock(&base->mutex);
134 #endif
135
136 /* make sure reduction does not kill both lc(A) and lc(B) */
137 gammared = fmpz_fdiv_ui(base->gamma, p);
138 if (gammared == 0)
139 {
140 continue;
141 }
142
143 nmod_mpoly_ctx_set_modulus(arg->pctx, p);
144
145 /* the unfortunate nmod poly's store their own context :( */
146 nmod_poly_stack_set_ctx(Sp, arg->pctx);
147 nmod_mpolyn_set_mod(arg->Ap, arg->pctx->ffinfo->mod);
148 nmod_mpolyn_set_mod(arg->Bp, arg->pctx->ffinfo->mod);
149 nmod_mpolyn_set_mod(arg->Gp, arg->pctx->ffinfo->mod);
150 nmod_mpolyn_set_mod(arg->Abarp, arg->pctx->ffinfo->mod);
151 nmod_mpolyn_set_mod(arg->Bbarp, arg->pctx->ffinfo->mod);
152
153 /* reduce to Fp and calculate an image gcd */
154 if (arg->num_handles > 0)
155 {
156 thread_pool_wake(global_thread_pool, arg->worker_handles[0], 0,
157 _reduce_Bp_worker, arg);
158
159 fmpz_mpoly_interp_reduce_p_mpolyn(arg->Ap, arg->pctx, base->A, ctx);
160
161 thread_pool_wait(global_thread_pool, arg->worker_handles[0]);
162
163 success = nmod_mpolyn_gcd_brown_smprime_threaded_pool(
164 arg->Gp, arg->Abarp, arg->Bbarp,
165 arg->Ap, arg->Bp, ctx->minfo->nvars - 1, arg->pctx, base->I,
166 arg->worker_handles, arg->num_handles);
167 }
168 else
169 {
170 /* reduction should kill neither A nor B */
171 fmpz_mpoly_interp_reduce_p_mpolyn(arg->Ap, arg->pctx, base->A, ctx);
172 fmpz_mpoly_interp_reduce_p_mpolyn(arg->Bp, arg->pctx, base->B, ctx);
173 FLINT_ASSERT(arg->Ap->length > 0);
174 FLINT_ASSERT(arg->Bp->length > 0);
175 success = nmod_mpolyn_gcd_brown_smprime(
176 arg->Gp, arg->Abarp, arg->Bbarp, arg->Ap, arg->Bp,
177 ctx->minfo->nvars - 1, arg->pctx, base->I, Sp);
178 }
179
180 if (!success)
181 {
182 continue;
183 }
184
185 FLINT_ASSERT(arg->Gp->length > 0);
186 FLINT_ASSERT(arg->Abarp->length > 0);
187 FLINT_ASSERT(arg->Bbarp->length > 0);
188
189 /* check up */
190 if (base->gcd_is_one)
191 {
192 break;
193 }
194
195 if (nmod_mpolyn_is_nonzero_nmod(arg->Gp, arg->pctx))
196 {
197 base->gcd_is_one = 1;
198 break;
199 }
200
201 if (!fmpz_is_one(arg->modulus))
202 {
203 int cmp = 0;
204 slong k;
205
206 FLINT_ASSERT(arg->G->length > 0);
207
208 k = nmod_poly_degree(arg->Gp->coeffs + 0);
209 cmp = mpoly_monomial_cmp_nomask_extra(arg->G->exps + N*0,
210 arg->Gp->exps + N*0, N, offset, k << shift);
211 if (cmp < 0)
212 {
213 continue;
214 }
215 else if (cmp > 0)
216 {
217 fmpz_one(arg->modulus);
218 arg->image_count = 0;
219 }
220 }
221
222 FLINT_ASSERT(1 == nmod_mpolyn_leadcoeff(arg->Gp, arg->pctx));
223 nmod_mpolyn_scalar_mul_nmod(arg->Gp, gammared, arg->pctx);
224
225 /* crt image gcd */
226 if (arg->num_handles > 0)
227 {
228 thread_pool_wake(global_thread_pool, arg->worker_handles[0], 0,
229 _join_Abar_worker, arg);
230 if (arg->num_handles > 1)
231 {
232 thread_pool_wake(global_thread_pool, arg->worker_handles[1], 0,
233 _join_Bbar_worker, arg);
234 }
235 else
236 {
237 _join_Bbar_worker(arg);
238 }
239
240 if (!fmpz_is_one(arg->modulus))
241 fmpz_mpoly_interp_crt_p_mpolyn(arg->G, arg->T, ctx, arg->modulus,
242 arg->Gp, arg->pctx);
243 else
244 fmpz_mpoly_interp_lift_p_mpolyn(arg->G, ctx, arg->Gp, arg->pctx);
245
246 thread_pool_wait(global_thread_pool, arg->worker_handles[0]);
247 if (arg->num_handles > 1)
248 thread_pool_wait(global_thread_pool, arg->worker_handles[1]);
249 }
250 else
251 {
252 if (!fmpz_is_one(arg->modulus))
253 {
254 fmpz_mpoly_interp_crt_p_mpolyn(arg->G, arg->T, ctx,
255 arg->modulus, arg->Gp, arg->pctx);
256 fmpz_mpoly_interp_crt_p_mpolyn(arg->Abar, arg->T, ctx,
257 arg->modulus, arg->Abarp, arg->pctx);
258 fmpz_mpoly_interp_crt_p_mpolyn(arg->Bbar, arg->T, ctx,
259 arg->modulus, arg->Bbarp, arg->pctx);
260 }
261 else
262 {
263 fmpz_mpoly_interp_lift_p_mpolyn(arg->G, ctx,
264 arg->Gp, arg->pctx);
265 fmpz_mpoly_interp_lift_p_mpolyn(arg->Abar, ctx,
266 arg->Abarp, arg->pctx);
267 fmpz_mpoly_interp_lift_p_mpolyn(arg->Bbar, ctx,
268 arg->Bbarp, arg->pctx);
269 }
270 }
271
272 fmpz_mul_ui(arg->modulus, arg->modulus, p);
273 arg->image_count++;
274 }
275
276 fmpz_mpoly_clear(arg->T, ctx);
277 fmpz_mpoly_clear(arg->T1, ctx);
278 fmpz_mpoly_clear(arg->T2, ctx);
279
280 nmod_mpolyn_clear(arg->Ap, arg->pctx);
281 nmod_mpolyn_clear(arg->Bp, arg->pctx);
282 nmod_mpolyn_clear(arg->Gp, arg->pctx);
283 nmod_mpolyn_clear(arg->Abarp, arg->pctx);
284 nmod_mpolyn_clear(arg->Bbarp, arg->pctx);
285 nmod_poly_stack_clear(Sp);
286 nmod_mpoly_ctx_clear(arg->pctx);
287 }
288
289 typedef struct
290 {
291 slong hint_start, hint_stop;
292 ulong * left_exp, * right_exp;
293 fmpz_mpoly_t poly;
294 fmpz_t maxcoeff, sumcoeff;
295 slong thread_idx;
296 slong final_idx;
297 int GAB; /* 0 -> G, 1 -> A, 2 -> B */
298 }
299 _joinworker_arg_struct;
300
301 /*
302 A = crt(B[0], ...., B[count-1]) wrt to P
303 This function takes some preallocated temp space.
304 */
305
_find_edge(slong * start,slong count,const ulong * exp_left,fmpz_mpoly_struct * const * B,slong N)306 static void _find_edge(
307 slong * start,
308 slong count,
309 const ulong * exp_left,
310 fmpz_mpoly_struct * const * B,
311 slong N)
312 {
313 slong k;
314
315 for (k = 0; k < count; k++)
316 {
317 slong Blength = B[k]->length;
318 const ulong * Bexps = B[k]->exps;
319
320 if (start[k] < Blength
321 && mpoly_monomial_gt_nomask(Bexps + N*start[k], exp_left, N))
322 {
323 /* go right */
324 do {
325 start[k]++;
326 } while (start[k] < Blength
327 && mpoly_monomial_gt_nomask(Bexps + N*start[k], exp_left, N));
328 }
329 else
330 {
331 /* go left */
332 while (start[k] > 0
333 && !mpoly_monomial_gt_nomask(Bexps + N*(start[k] - 1), exp_left, N))
334 {
335 start[k]--;
336 }
337 }
338 }
339 }
340
_fmpz_mpoly_crt(const fmpz_multi_crt_t P,_joinworker_arg_struct * S,fmpz_mpoly_struct * const * B,slong count,fmpz * output,fmpz ** input,const fmpz_mpoly_ctx_t ctx)341 static slong _fmpz_mpoly_crt(
342 const fmpz_multi_crt_t P,
343 _joinworker_arg_struct * S,
344 fmpz_mpoly_struct * const * B,
345 slong count,
346 fmpz * output,
347 fmpz ** input,
348 const fmpz_mpoly_ctx_t ctx)
349 {
350 int cmp;
351 slong N = mpoly_words_per_exp_sp(S->poly->bits, ctx->minfo);
352 slong lastdegree;
353 slong Ai;
354 slong j, k;
355 slong * start, * stop;
356 fmpz_t zero, max, sum;
357 fmpz_mpoly_t A;
358 const ulong * exp_left = S->left_exp;
359 const ulong * exp_right = S->right_exp;
360 TMP_INIT;
361
362 *A = *S->poly;
363
364 TMP_START;
365
366 start = (slong *) TMP_ALLOC(2*count*sizeof(slong));
367 stop = start + count;
368
369 for (k = 0; k < count; k++)
370 {
371 start[k] = exp_left ? FLINT_MIN(S->hint_start, B[k]->length) : 0;
372 stop[k] = exp_right ? FLINT_MIN(S->hint_stop, B[k]->length) : B[k]->length;
373 }
374
375 if (exp_left)
376 _find_edge(start, count, exp_left, B, N);
377
378 if (exp_right)
379 _find_edge(stop, count, exp_right, B, N);
380
381 #if WANT_ASSERT
382 for (k = 0; k < count; k++)
383 {
384 FLINT_ASSERT(0 <= start[k]);
385 FLINT_ASSERT(0 <= stop[k]);
386 FLINT_ASSERT(start[k] <= B[k]->length);
387 FLINT_ASSERT(stop[k] <= B[k]->length);
388 FLINT_ASSERT(start[k] <= stop[k]);
389
390 /* check start */
391 if (!exp_left)
392 {
393 FLINT_ASSERT(start[k] == 0);
394 }
395 else
396 {
397 FLINT_ASSERT(start[k] == B[k]->length ||
398 !mpoly_monomial_gt_nomask(B[k]->exps + N*start[k], exp_left, N));
399
400 FLINT_ASSERT(start[k] == 0 ||
401 mpoly_monomial_gt_nomask(B[k]->exps + N*(start[k] - 1), exp_left, N));
402 }
403
404 /* check stop */
405 if (!exp_right)
406 {
407 FLINT_ASSERT(stop[k] == B[k]->length);
408 }
409 else
410 {
411 FLINT_ASSERT(stop[k] == B[k]->length ||
412 !mpoly_monomial_gt_nomask(B[k]->exps + N*stop[k], exp_right, N));
413
414 FLINT_ASSERT(stop[k] == 0 ||
415 mpoly_monomial_gt_nomask(B[k]->exps + N*(stop[k] - 1), exp_right, N));
416 }
417 }
418 #endif
419
420 fmpz_init(zero);
421 fmpz_init(max);
422 fmpz_init(sum);
423
424 Ai = 0;
425 lastdegree = -WORD(1);
426 while (1)
427 {
428 fmpz_mpoly_fit_length(A, Ai + 1, ctx);
429
430 k = 0;
431 do
432 {
433 input[k] = zero;
434 if (start[k] < stop[k])
435 {
436 goto found_max;
437 }
438 } while (++k < count);
439
440 break; /* all B[k] have been scanned completely */
441
442 found_max:
443
444 input[k] = B[k]->coeffs + start[k];
445 mpoly_monomial_set(A->exps + N*Ai, B[k]->exps + N*start[k], N);
446 start[k]++;
447
448 for (k++; k < count; k++)
449 {
450 input[k] = zero;
451 if (start[k] >= stop[k])
452 {
453 continue;
454 }
455
456 cmp = mpoly_monomial_cmp_nomask(B[k]->exps + N*start[k], A->exps + N*Ai, N);
457 if (cmp == 0)
458 {
459 input[k] = B[k]->coeffs + start[k];
460 start[k]++;
461 }
462 else if (cmp > 0)
463 {
464 /* undo previous max's */
465 for (j = 0; j < k; j++)
466 {
467 start[j] -= (input[j] != zero);
468 input[j] = zero;
469 }
470 goto found_max;
471 }
472 }
473
474 _fmpz_multi_crt_run_p(output, P, (const fmpz * const *) input);
475 fmpz_swap(A->coeffs + Ai, output + 0);
476 cmp = fmpz_sgn(A->coeffs + Ai);
477 if (cmp != 0)
478 {
479 if (fmpz_cmpabs(max, A->coeffs + Ai) < 0)
480 fmpz_abs(max, A->coeffs + Ai);
481 if (cmp > 0)
482 fmpz_add(sum, sum, A->coeffs + Ai);
483 else
484 fmpz_sub(sum, sum, A->coeffs + Ai);
485 Ai++;
486 }
487 }
488 A->length = Ai;
489
490 fmpz_swap(S->maxcoeff, max);
491 fmpz_swap(S->sumcoeff, sum);
492
493 fmpz_clear(zero);
494 fmpz_clear(max);
495 fmpz_clear(sum);
496
497 TMP_END;
498
499 *S->poly = *A;
500
501 return lastdegree;
502 }
503
504
505 typedef struct
506 {
507 volatile int idx;
508 #if HAVE_PTHREAD
509 pthread_mutex_t mutex;
510 #endif
511 const fmpz_mpoly_ctx_struct * ctx;
512 fmpz_multi_crt_t CRT;
513 fmpz_mpoly_struct ** gptrs, ** abarptrs, ** bbarptrs;
514 fmpz_mpoly_struct * G, * Abar, * Bbar;
515 _joinworker_arg_struct * chunks;
516 slong chunks_length;
517 ulong num_images;
518 }
519 _joinbase_struct;
520
521 typedef _joinbase_struct _joinbase_t[1];
522
523 typedef struct
524 {
525 _joinbase_struct * base;
526 slong thread_idx;
527 }
528 _njoinworker_arg_struct;
529
_joinworker(void * varg)530 static void _joinworker(void * varg)
531 {
532 _njoinworker_arg_struct * arg = (_njoinworker_arg_struct *) varg;
533 _joinbase_struct * base = arg->base;
534 fmpz ** input;
535 fmpz * output;
536 slong i, ls = _fmpz_multi_crt_local_size(base->CRT);
537 TMP_INIT;
538
539 TMP_START;
540
541 input = (fmpz **) TMP_ALLOC(base->num_images * sizeof(fmpz *));
542 output = (fmpz *) TMP_ALLOC(ls*sizeof(fmpz));
543 for (i = 0; i < ls; i++)
544 fmpz_init(output + i);
545
546 while (1)
547 {
548 /* get exponent of either G, Abar, or Bbar to start working on */
549 #if HAVE_PTHREAD
550 pthread_mutex_lock(&base->mutex);
551 #endif
552 i = base->idx;
553 base->idx = i + 1;
554 #if HAVE_PTHREAD
555 pthread_mutex_unlock(&base->mutex);
556 #endif
557
558 if (i >= base->chunks_length)
559 {
560 goto cleanup;
561 }
562
563 base->chunks[i].thread_idx = arg->thread_idx;
564
565 if (base->chunks[i].GAB == 0)
566 {
567 _fmpz_mpoly_crt(base->CRT, base->chunks + i, base->gptrs,
568 base->num_images, output, input, base->ctx);
569 }
570 else if (base->chunks[i].GAB == 1)
571 {
572 _fmpz_mpoly_crt(base->CRT, base->chunks + i, base->abarptrs,
573 base->num_images, output, input, base->ctx);
574 }
575 else
576 {
577 FLINT_ASSERT(base->chunks[i].GAB == 2);
578
579 _fmpz_mpoly_crt(base->CRT, base->chunks + i, base->bbarptrs,
580 base->num_images, output, input, base->ctx);
581 }
582 }
583
584 cleanup:
585
586 for (i = 0; i < ls; i++)
587 fmpz_clear(output + i);
588
589 TMP_END;
590
591 return;
592 }
593
594
_finaljoinworker(void * varg)595 static void _finaljoinworker(void * varg)
596 {
597 _njoinworker_arg_struct * arg = (_njoinworker_arg_struct *) varg;
598 _joinbase_struct * base = arg->base;
599 const fmpz_mpoly_ctx_struct * ctx = base->ctx;
600 flint_bitcnt_t bits = base->G->bits;
601 slong N = mpoly_words_per_exp_sp(bits, ctx->minfo);
602 slong i, j;
603 slong source_len;
604 ulong * source_exps;
605 fmpz * source_coeffs;
606 slong Ti;
607 ulong * Texps;
608 fmpz * Tcoeffs;
609
610 for (i = base->chunks_length - 1; i >= 0; i--)
611 {
612 int type = base->chunks[i].GAB;
613
614 FLINT_ASSERT(base->chunks[i].thread_idx >= 0);
615
616 if (base->chunks[i].thread_idx != arg->thread_idx)
617 continue;
618
619 if (type == 0)
620 {
621 Texps = base->G->exps;
622 Tcoeffs = base->G->coeffs;
623 }
624 else if (type == 1)
625 {
626 Texps = base->Abar->exps;
627 Tcoeffs = base->Abar->coeffs;
628 }
629 else
630 {
631 FLINT_ASSERT(type == 2);
632 Texps = base->Bbar->exps;
633 Tcoeffs = base->Bbar->coeffs;
634 }
635
636 source_len = base->chunks[i].poly->length;
637 source_exps = base->chunks[i].poly->exps + N*0;
638 source_coeffs = base->chunks[i].poly->coeffs + 0;
639
640 Ti = base->chunks[i].final_idx;
641 mpoly_copy_monomials(Texps + N*Ti, source_exps, source_len, N);
642 for (j = 0; j < source_len; j++)
643 fmpz_swap(Tcoeffs + Ti + j, source_coeffs + j);
644 }
645 }
646
647 /*
648 Set 1 <= l <= min(n, m) and fractions v + 0, ..., v + l - 1
649 More comments in useage
650
651 example operation on input n = 10, m = 16
652
653 gcd is 2:
654 5/8, 5/8
655 split first 5/8:
656 2/3, 5/8, 3/5,
657 split first 5/8:
658 2/3, 2/3, 3/5, 3/5,
659 split first 3/5:
660 2/3, 2/3, 2/3, 3/5, 1/2
661 split first 3/5:
662 2/3, 2/3, 2/3, 2/3, 1/2, 1/2
663
664 The maximum fraction is now 0.666 which is not much bigger than n/m
665 */
_divide_master_threads(fmpq * v,slong n,slong m)666 static slong _divide_master_threads(fmpq * v, slong n, slong m)
667 {
668 slong l, i;
669 double score_threashold;
670 fmpq_t left, right;
671
672 FLINT_ASSERT(n > 0);
673 FLINT_ASSERT(m > 0);
674
675 fmpq_init(left);
676 fmpq_init(right);
677
678 score_threashold = (double)(n)/(double)(m);
679 score_threashold *= 1.1;
680
681 /* initial choice for v */
682 l = n_gcd(n, m);
683 for (i = 0; i < l; i++)
684 {
685 fmpq_set_si(v + i, n, m);
686 }
687
688 i = 0;
689 while (i < l)
690 {
691 if (fmpz_cmp_ui(fmpq_denref(v + i), 2) >= 0)
692 {
693 fmpq_farey_neighbors(left, right, v + i, fmpq_denref(v + i));
694
695 FLINT_ASSERT(fmpz_get_ui(fmpq_denref(v + i))
696 == fmpz_get_ui(fmpq_denref(left))
697 + fmpz_get_ui(fmpq_denref(right)));
698
699 if (fmpq_sgn(left) > 0 && fmpq_get_d(right) < score_threashold)
700 {
701 /* delete v + i, add left and right */
702 FLINT_ASSERT(l < m);
703 fmpq_set(v + i, right);
704 fmpq_set(v + l, left);
705 l++;
706 continue;
707 }
708 }
709 i++;
710 }
711
712 fmpq_clear(left);
713 fmpq_clear(right);
714
715 return l;
716 }
717
718 /* inputs A and B are modified */
fmpz_mpolyl_gcd_brown_threaded_pool(fmpz_mpoly_t G,fmpz_mpoly_t Abar,fmpz_mpoly_t Bbar,fmpz_mpoly_t A,fmpz_mpoly_t B,const fmpz_mpoly_ctx_t ctx,const mpoly_gcd_info_t I,const thread_pool_handle * handles,slong num_handles)719 int fmpz_mpolyl_gcd_brown_threaded_pool(
720 fmpz_mpoly_t G,
721 fmpz_mpoly_t Abar,
722 fmpz_mpoly_t Bbar,
723 fmpz_mpoly_t A,
724 fmpz_mpoly_t B,
725 const fmpz_mpoly_ctx_t ctx,
726 const mpoly_gcd_info_t I,
727 const thread_pool_handle * handles,
728 slong num_handles)
729 {
730 slong i, j, k;
731 flint_bitcnt_t bits = A->bits;
732 slong N = mpoly_words_per_exp_sp(bits, ctx->minfo);
733 slong num_threads = num_handles + 1;
734 slong num_master_threads;
735 slong num_images;
736 int success;
737 fmpz_t bound, modulus, temp, temp2;
738 fmpz maxcoeff_Gs_Abars_Bbars[3];
739 fmpz sumcoeff_Gs_Abars_Bbars[3];
740 fmpz_t cA, cB, cG, cAbar, cBbar;
741 fmpz ** mptrs;
742 fmpz_mpoly_struct ** gptrs, ** abarptrs, ** bbarptrs;
743 fmpq * qvec;
744 _splitworker_arg_struct * splitargs;
745 _splitbase_t splitbase;
746 _njoinworker_arg_struct * joinargs;
747 _joinbase_t joinbase;
748
749 fmpz_init(bound);
750 fmpz_init(modulus);
751 fmpz_init(temp);
752 fmpz_init(temp2);
753 for (i = 0; i < 3; i++)
754 {
755 fmpz_init(maxcoeff_Gs_Abars_Bbars + i);
756 fmpz_init(sumcoeff_Gs_Abars_Bbars + i);
757 }
758
759 /* compute contents of G, Abar, Bbar, A, B */
760 fmpz_init(cA);
761 fmpz_init(cB);
762 fmpz_init(cG);
763 fmpz_init(cAbar);
764 fmpz_init(cBbar);
765 _fmpz_vec_content(cA, A->coeffs, A->length);
766 _fmpz_vec_content(cB, B->coeffs, B->length);
767 fmpz_gcd(cG, cA, cB);
768 fmpz_divexact(cAbar, cA, cG);
769 fmpz_divexact(cBbar, cB, cG);
770
771 /* remove content from inputs */
772 fmpz_mpoly_scalar_divexact_fmpz(A, A, cA, ctx);
773 fmpz_mpoly_scalar_divexact_fmpz(B, B, cB, ctx);
774
775 /* init split info */
776 qvec = _fmpq_vec_init(num_threads);
777
778 fmpz_init(splitbase->gamma);
779 fmpz_gcd(splitbase->gamma, A->coeffs + 0, B->coeffs + 0);
780
781 /*
782 if compute_split is jumped back to, there could be as many as
783 num_threads + 1 images that need to be joined.
784 */
785 gptrs = (fmpz_mpoly_struct **) flint_malloc(
786 (num_threads + 1)*sizeof(fmpz_mpoly_struct *));
787 abarptrs = (fmpz_mpoly_struct **) flint_malloc(
788 (num_threads + 1)*sizeof(fmpz_mpoly_struct *));
789 bbarptrs = (fmpz_mpoly_struct **) flint_malloc(
790 (num_threads + 1)*sizeof(fmpz_mpoly_struct *));
791 mptrs = (fmpz **) flint_malloc((num_threads + 1)*sizeof(fmpz *));
792
793 splitargs = (_splitworker_arg_struct *) flint_malloc(
794 num_threads*sizeof(_splitworker_arg_struct));
795 for (i = 0; i < num_threads; i++)
796 {
797 fmpz_mpoly_init3(splitargs[i].G, 0, bits, ctx);
798 fmpz_mpoly_init3(splitargs[i].Abar, 0, bits, ctx);
799 fmpz_mpoly_init3(splitargs[i].Bbar, 0, bits, ctx);
800 fmpz_init(splitargs[i].modulus);
801 splitargs[i].worker_handles = (thread_pool_handle *) flint_malloc(
802 num_threads*sizeof(thread_pool_handle));
803 }
804
805 splitbase->num_threads = num_threads;
806 splitbase->A = A;
807 splitbase->B = B;
808 splitbase->ctx = ctx;
809 splitbase->p = UWORD(1) << (FLINT_BITS - 2);
810 splitbase->I = I;
811
812 #if HAVE_PTHREAD
813 pthread_mutex_init(&splitbase->mutex, NULL);
814 #endif
815
816 /* initial bound on target modulus */
817 fmpz_mpoly_height(bound, A, ctx);
818 fmpz_mpoly_height(temp, B, ctx);
819
820 if (fmpz_cmp(bound, temp) < 0)
821 fmpz_swap(bound, temp);
822 fmpz_mul(bound, bound, splitbase->gamma);
823 fmpz_add(bound, bound, bound);
824
825 /* no images yet */
826 fmpz_one(modulus);
827
828 compute_split:
829
830 splitbase->gcd_is_one = 0;
831 fmpz_cdiv_q(temp, bound, modulus);
832 fmpz_add_ui(temp, temp, 2);
833
834 /*
835 n := fmpz_clog_ui(temp, splitbase->p) is the number of images we need.
836 We have m := num_threads threads available for work.
837 We can calculate images in parallel (parallel on p), but the
838 calculation of each image can itself also be done in parallel.
839
840 An integer l := num_master_threads with 1 <= l <= min(n, m) is selected
841 and fractions a_i/b_i, 0 <= i < l are also selected with sum_i(a_i) = n
842 and sum_i(b_i) = m. Then l jobs are spawned, each doing a_i images
843 where each image uses b_i threads.
844 */
845 num_master_threads = _divide_master_threads(qvec,
846 fmpz_clog_ui(temp, splitbase->p), num_threads);
847 FLINT_ASSERT(num_master_threads > 0);
848
849 k = 0;
850 for (i = 0; i < num_master_threads; i++)
851 {
852 splitargs[i].idx = i;
853 splitargs[i].base = splitbase;
854 FLINT_ASSERT(fmpz_fits_si(fmpq_numref(qvec + i)));
855 FLINT_ASSERT(fmpz_fits_si(fmpq_denref(qvec + i)));
856 splitargs[i].required_images = fmpz_get_si(fmpq_numref(qvec + i));
857 splitargs[i].num_handles = fmpz_get_si(fmpq_denref(qvec + i)) - 1;
858 FLINT_ASSERT(splitargs[i].required_images > 0);
859 FLINT_ASSERT(splitargs[i].num_handles >= 0);
860
861 if (i == 0)
862 {
863 splitargs[i].master_handle = -1;
864 }
865 else
866 {
867 splitargs[i].master_handle = handles[k++];
868 }
869 FLINT_ASSERT(splitargs[i].num_handles <= num_handles);
870 for (j = 0; j < splitargs[i].num_handles; j++)
871 {
872 splitargs[i].worker_handles[j] = handles[k++];
873 }
874 }
875 /* all handles should have been distributed */
876 FLINT_ASSERT(k == num_handles);
877
878 for (i = 1; i < num_master_threads; i++)
879 {
880 thread_pool_wake(global_thread_pool, splitargs[i].master_handle, 0,
881 _splitworker, &splitargs[i]);
882 }
883 _splitworker(&splitargs[0]);
884 for (i = 1; i < num_master_threads; i++)
885 {
886 thread_pool_wait(global_thread_pool, splitargs[i].master_handle);
887 }
888
889 if (splitbase->gcd_is_one)
890 {
891 fmpz_mpoly_one(G, ctx);
892 fmpz_mpoly_swap(Abar, A, ctx);
893 fmpz_mpoly_swap(Bbar, B, ctx);
894 goto successful_put_content;
895 }
896
897 /* check each thread reached its goal */
898 for (i = 0; i < num_master_threads; i++)
899 {
900 if (splitargs[i].image_count < splitargs[i].required_images)
901 {
902 /* ran out of rational primes - must fail */
903 success = 0;
904 goto cleanup_split;
905 }
906 }
907
908 /* find images to join */
909 num_images = 0;
910 if (!fmpz_is_one(modulus))
911 {
912 gptrs[num_images] = G;
913 abarptrs[num_images] = Abar;
914 bbarptrs[num_images] = Bbar;
915 mptrs[num_images] = modulus;
916 num_images++;
917 }
918
919 i = 0;
920 if (num_images == 0)
921 {
922 gptrs[num_images] = splitargs[i].G;
923 abarptrs[num_images] = splitargs[i].Abar;
924 bbarptrs[num_images] = splitargs[i].Bbar;
925 mptrs[num_images] = splitargs[i].modulus;
926 num_images++;
927 i++;
928 }
929
930 FLINT_ASSERT(num_images <= num_master_threads + 1);
931
932 for (; i < num_master_threads; i++)
933 {
934 int cmp = 0;
935 cmp = mpoly_monomial_cmp_nomask(gptrs[0]->exps + N*0,
936 splitargs[i].G->exps + N*0, N);
937
938 if (cmp < 0)
939 {
940 /* splitarg[i] was unlucky - ignore it */
941 }
942 else
943 {
944 if (cmp > 0)
945 {
946 /* splitarg[0], ..., splitarg[i - 1] were unlucky */
947 num_images = 0;
948 }
949 gptrs[num_images] = splitargs[i].G;
950 abarptrs[num_images] = splitargs[i].Abar;
951 bbarptrs[num_images] = splitargs[i].Bbar;
952 mptrs[num_images] = splitargs[i].modulus;
953 num_images++;
954 }
955 FLINT_ASSERT(num_images <= num_master_threads + 1);
956 }
957
958 /* now must join ptrs[0], ..., ptrs[num_images-1] where num_images > 0 */
959 FLINT_ASSERT(num_images > 0);
960 fmpz_multi_crt_init(joinbase->CRT);
961 success = fmpz_multi_crt_precompute_p(joinbase->CRT,
962 (const fmpz * const *) mptrs, num_images);
963 FLINT_ASSERT(success);
964
965 joinbase->num_images = num_images;
966 joinbase->gptrs = gptrs;
967 joinbase->abarptrs = abarptrs;
968 joinbase->bbarptrs = bbarptrs;
969 joinbase->G = G;
970 joinbase->Abar = Abar;
971 joinbase->Bbar = Bbar;
972 joinbase->ctx = ctx;
973 #if HAVE_PTHREAD
974 pthread_mutex_init(&joinbase->mutex, NULL);
975 #endif
976
977 joinargs = (_njoinworker_arg_struct *) flint_malloc(
978 num_threads*sizeof(_njoinworker_arg_struct));
979
980 for (i = 0; i < num_threads; i++)
981 {
982 joinargs[i].base = joinbase;
983 joinargs[i].thread_idx = i;
984 }
985
986 joinbase->chunks_length = 3*num_threads;
987 joinbase->chunks = (_joinworker_arg_struct *) flint_malloc(
988 joinbase->chunks_length*sizeof(_joinworker_arg_struct));
989
990 FLINT_ASSERT(joinbase->chunks_length >= 3);
991
992 for (i = 0; i < 3; i++)
993 {
994 fmpz_mpoly_struct * poly = (i == 0) ? gptrs[0]
995 : (i == 1) ? abarptrs[0]
996 : bbarptrs[0];
997
998 for (j = 0; j < num_threads; j++)
999 {
1000 _joinworker_arg_struct * d = joinbase->chunks + i*num_threads + j;
1001 fmpz_mpoly_init3(d->poly, 0, bits, ctx);
1002 fmpz_init(d->maxcoeff);
1003 fmpz_init(d->sumcoeff);
1004 d->GAB = i;
1005 d->thread_idx = -WORD(1);
1006 d->final_idx = -WORD(1);
1007
1008 d->hint_start = poly->length * (j + 0) / num_threads;
1009 d->hint_stop = poly->length * (j + 1) / num_threads;
1010
1011 d->left_exp = NULL;
1012 d->right_exp = NULL;
1013
1014 FLINT_ASSERT(0 <= d->hint_start);
1015 FLINT_ASSERT(d->hint_start <= d->hint_stop);
1016 FLINT_ASSERT(d->hint_stop <= poly->length);
1017 FLINT_ASSERT(d->hint_start < poly->length);
1018
1019 if (j > 0)
1020 {
1021 FLINT_ASSERT(d->hint_start < poly->length);
1022 d->left_exp = poly->exps + N*d->hint_start;
1023 }
1024
1025 if (j + 1 < num_threads)
1026 {
1027 FLINT_ASSERT(d->hint_stop < poly->length);
1028 d->right_exp = poly->exps + N*d->hint_stop;
1029 }
1030 }
1031 }
1032
1033 joinbase->idx = 0;
1034 for (i = 0; i + 1 < num_threads; i++)
1035 {
1036 thread_pool_wake(global_thread_pool,
1037 handles[i], 0, _joinworker, joinargs + i);
1038 }
1039 _joinworker(joinargs + num_threads - 1);
1040 for (i = 0; i + 1 < num_threads; i++)
1041 {
1042 thread_pool_wait(global_thread_pool, handles[i]);
1043 }
1044
1045 /* final trivial join */
1046 for (i = 0; i < 3; i++)
1047 {
1048 fmpz_zero(maxcoeff_Gs_Abars_Bbars + i);
1049 fmpz_zero(sumcoeff_Gs_Abars_Bbars + i);
1050 }
1051
1052 {
1053 slong idxs[3] = {0, 0, 0};
1054 for (i = 0; i < joinbase->chunks_length; i++)
1055 {
1056 int type = joinbase->chunks[i].GAB;
1057 FLINT_ASSERT(0 <= type && type < 3);
1058
1059 joinbase->chunks[i].final_idx = idxs[type];
1060 idxs[type] += joinbase->chunks[i].poly->length;
1061
1062 fmpz_add(sumcoeff_Gs_Abars_Bbars + type,
1063 sumcoeff_Gs_Abars_Bbars + type,
1064 joinbase->chunks[i].sumcoeff);
1065
1066 if (fmpz_cmp(maxcoeff_Gs_Abars_Bbars + type,
1067 joinbase->chunks[i].maxcoeff) < 0)
1068 {
1069 fmpz_set(maxcoeff_Gs_Abars_Bbars + type,
1070 joinbase->chunks[i].maxcoeff);
1071 }
1072 }
1073
1074 fmpz_mpoly_fit_length(G, idxs[0], ctx);
1075 fmpz_mpoly_fit_length(Abar, idxs[1], ctx);
1076 fmpz_mpoly_fit_length(Bbar, idxs[2], ctx);
1077 G->length = idxs[0];
1078 Abar->length = idxs[1];
1079 Bbar->length = idxs[2];
1080 }
1081
1082 joinbase->idx = 0;
1083 for (i = 0; i + 1 < num_threads; i++)
1084 {
1085 thread_pool_wake(global_thread_pool,
1086 handles[i], 0, _finaljoinworker, joinargs + i);
1087 }
1088 _finaljoinworker(joinargs + num_threads - 1);
1089 for (i = 0; i + 1 < num_threads; i++)
1090 {
1091 thread_pool_wait(global_thread_pool, handles[i]);
1092 }
1093
1094 FLINT_ASSERT(fmpz_mpoly_is_canonical(G, ctx));
1095 FLINT_ASSERT(fmpz_mpoly_is_canonical(Abar, ctx));
1096 FLINT_ASSERT(fmpz_mpoly_is_canonical(Bbar, ctx));
1097
1098 #if HAVE_PTHREAD
1099 pthread_mutex_destroy(&joinbase->mutex);
1100 #endif
1101
1102 /* free join data */
1103 fmpz_multi_crt_clear(joinbase->CRT);
1104 for (i = 0; i < joinbase->chunks_length; i++)
1105 {
1106 fmpz_mpoly_clear(joinbase->chunks[i].poly, ctx);
1107 fmpz_clear(joinbase->chunks[i].maxcoeff);
1108 fmpz_clear(joinbase->chunks[i].sumcoeff);
1109 }
1110
1111 flint_free(joinbase->chunks);
1112 flint_free(joinargs);
1113
1114 /* update modulus - modulus could be one of the mptrs */
1115 fmpz_one(temp);
1116 for (i = 0; i < num_images; i++)
1117 {
1118 fmpz_mul(temp, temp, mptrs[i]);
1119 }
1120 fmpz_swap(modulus, temp);
1121
1122 for (i = 1; i < 3; i++)
1123 {
1124 fmpz_mul(temp, maxcoeff_Gs_Abars_Bbars + 0,
1125 sumcoeff_Gs_Abars_Bbars + i);
1126 fmpz_mul(temp2, sumcoeff_Gs_Abars_Bbars + 0,
1127 maxcoeff_Gs_Abars_Bbars + i);
1128 if (fmpz_cmp(temp, temp2) > 0)
1129 fmpz_swap(temp, temp2);
1130 fmpz_mul_2exp(temp, temp, 1);
1131 if (fmpz_cmp(temp, modulus) >= 0)
1132 {
1133 fmpz_mul_2exp(bound, modulus, 2*FLINT_BITS);
1134 goto compute_split;
1135 }
1136 }
1137
1138 FLINT_ASSERT(fmpz_equal(splitbase->gamma, G->coeffs + 0));
1139
1140 _fmpz_vec_content(temp, G->coeffs, G->length);
1141 fmpz_mpoly_scalar_divexact_fmpz(G, G, temp, ctx);
1142 fmpz_mpoly_scalar_divexact_fmpz(Abar, Abar, G->coeffs + 0, ctx);
1143 fmpz_mpoly_scalar_divexact_fmpz(Bbar, Bbar, G->coeffs + 0, ctx);
1144
1145 successful_put_content:
1146
1147 fmpz_mpoly_scalar_mul_fmpz(G, G, cG, ctx);
1148 fmpz_mpoly_scalar_mul_fmpz(Abar, Abar, cAbar, ctx);
1149 fmpz_mpoly_scalar_mul_fmpz(Bbar, Bbar, cBbar, ctx);
1150
1151 success = 1;
1152
1153 cleanup_split:
1154
1155 #if HAVE_PTHREAD
1156 pthread_mutex_destroy(&splitbase->mutex);
1157 #endif
1158 fmpz_clear(splitbase->gamma);
1159
1160 for (i = 0; i < num_threads; i++)
1161 {
1162 fmpz_mpoly_clear(splitargs[i].G, ctx);
1163 fmpz_mpoly_clear(splitargs[i].Abar, ctx);
1164 fmpz_mpoly_clear(splitargs[i].Bbar, ctx);
1165 fmpz_clear(splitargs[i].modulus);
1166 flint_free(splitargs[i].worker_handles);
1167 }
1168
1169 flint_free(gptrs);
1170 flint_free(abarptrs);
1171 flint_free(bbarptrs);
1172 flint_free(mptrs);
1173 flint_free(splitargs);
1174
1175 _fmpq_vec_clear(qvec, num_threads);
1176
1177 fmpz_clear(cA);
1178 fmpz_clear(cB);
1179 fmpz_clear(cG);
1180 fmpz_clear(cAbar);
1181 fmpz_clear(cBbar);
1182
1183 fmpz_clear(bound);
1184 fmpz_clear(modulus);
1185 fmpz_clear(temp);
1186 fmpz_clear(temp2);
1187 for (i = 0; i < 3; i++)
1188 {
1189 fmpz_clear(maxcoeff_Gs_Abars_Bbars + i);
1190 fmpz_clear(sumcoeff_Gs_Abars_Bbars + i);
1191 }
1192
1193 return success;
1194 }
1195
1196
1197 typedef struct
1198 {
1199 fmpz_mpoly_struct * Pl;
1200 const fmpz_mpoly_ctx_struct * lctx;
1201 const fmpz_mpoly_struct * P;
1202 const fmpz_mpoly_ctx_struct * ctx;
1203 const slong * perm;
1204 const ulong * shift;
1205 const ulong * stride;
1206 const thread_pool_handle * handles;
1207 slong num_handles;
1208 }
1209 _convertl_arg_struct;
1210
1211 typedef _convertl_arg_struct _convertl_arg_t[1];
1212
_worker_convertu(void * varg)1213 static void _worker_convertu(void * varg)
1214 {
1215 _convertl_arg_struct * arg = (_convertl_arg_struct *) varg;
1216
1217 fmpz_mpoly_to_mpoly_perm_deflate_threaded_pool(arg->Pl, arg->lctx, arg->P, arg->ctx,
1218 arg->perm, arg->shift, arg->stride,
1219 arg->handles, arg->num_handles);
1220 }
1221
fmpz_mpoly_gcd_brown_threaded(fmpz_mpoly_t G,const fmpz_mpoly_t A,const fmpz_mpoly_t B,const fmpz_mpoly_ctx_t ctx)1222 int fmpz_mpoly_gcd_brown_threaded(
1223 fmpz_mpoly_t G,
1224 const fmpz_mpoly_t A,
1225 const fmpz_mpoly_t B,
1226 const fmpz_mpoly_ctx_t ctx)
1227 {
1228 int success;
1229 slong * perm;
1230 ulong * shift, * stride;
1231 slong i;
1232 flint_bitcnt_t ABbits;
1233 fmpz_mpoly_ctx_t lctx;
1234 fmpz_mpoly_t Al, Bl, Gl, Abarl, Bbarl;
1235 thread_pool_handle * handles;
1236 slong num_handles;
1237 slong thread_limit = FLINT_MIN(A->length, B->length)/16;
1238
1239 if (fmpz_mpoly_is_zero(A, ctx))
1240 {
1241 if (fmpz_mpoly_is_zero(B, ctx))
1242 {
1243 fmpz_mpoly_zero(G, ctx);
1244 return 1;
1245 }
1246 if (fmpz_sgn(B->coeffs + 0) < 0)
1247 {
1248 fmpz_mpoly_neg(G, B, ctx);
1249 return 1;
1250 }
1251 else
1252 {
1253 fmpz_mpoly_set(G, B, ctx);
1254 return 1;
1255 }
1256 }
1257
1258 if (fmpz_mpoly_is_zero(B, ctx))
1259 {
1260 if (fmpz_sgn(A->coeffs + 0) < 0)
1261 {
1262 fmpz_mpoly_neg(G, A, ctx);
1263 return 1;
1264 }
1265 else
1266 {
1267 fmpz_mpoly_set(G, A, ctx);
1268 return 1;
1269 }
1270 }
1271
1272 if (A->bits > FLINT_BITS || B->bits > FLINT_BITS)
1273 {
1274 return 0;
1275 }
1276
1277 perm = (slong *) flint_malloc(ctx->minfo->nvars*sizeof(slong));
1278 shift = (ulong *) flint_malloc(ctx->minfo->nvars*sizeof(ulong));
1279 stride = (ulong *) flint_malloc(ctx->minfo->nvars*sizeof(ulong));
1280 for (i = 0; i < ctx->minfo->nvars; i++)
1281 {
1282 perm[i] = i;
1283 shift[i] = 0;
1284 stride[i] = 1;
1285 }
1286
1287 if (ctx->minfo->nvars == 1)
1288 {
1289 fmpz_poly_t a, b, g;
1290 fmpz_poly_init(a);
1291 fmpz_poly_init(b);
1292 fmpz_poly_init(g);
1293 _fmpz_mpoly_to_fmpz_poly_deflate(a, A, 0, shift, stride, ctx);
1294 _fmpz_mpoly_to_fmpz_poly_deflate(b, B, 0, shift, stride, ctx);
1295 fmpz_poly_gcd(g, a, b);
1296 _fmpz_mpoly_from_fmpz_poly_inflate(G, A->bits, g, 0, shift, stride, ctx);
1297 fmpz_poly_clear(a);
1298 fmpz_poly_clear(b);
1299 fmpz_poly_clear(g);
1300 success = 1;
1301 goto cleanup1;
1302 }
1303
1304 ABbits = FLINT_MAX(A->bits, B->bits);
1305
1306 fmpz_mpoly_ctx_init(lctx, ctx->minfo->nvars, ORD_LEX);
1307 fmpz_mpoly_init3(Al, 0, ABbits, lctx);
1308 fmpz_mpoly_init3(Bl, 0, ABbits, lctx);
1309 fmpz_mpoly_init3(Gl, 0, ABbits, lctx);
1310 fmpz_mpoly_init3(Abarl, 0, ABbits, lctx);
1311 fmpz_mpoly_init3(Bbarl, 0, ABbits, lctx);
1312
1313 num_handles = flint_request_threads(&handles, thread_limit);
1314
1315 /* convert inputs */
1316 if (num_handles > 0)
1317 {
1318 slong m = mpoly_divide_threads(num_handles, A->length, B->length);
1319 _convertl_arg_t arg;
1320
1321 FLINT_ASSERT(m >= 0);
1322 FLINT_ASSERT(m < num_handles);
1323
1324 arg->Pl = Bl;
1325 arg->lctx = lctx;
1326 arg->P = B;
1327 arg->ctx = ctx;
1328 arg->perm = perm;
1329 arg->shift = shift;
1330 arg->stride = stride;
1331 arg->handles = handles + (m + 1);
1332 arg->num_handles = num_handles - (m + 1);
1333
1334 thread_pool_wake(global_thread_pool, handles[m], 0, _worker_convertu, arg);
1335
1336 fmpz_mpoly_to_mpoly_perm_deflate_threaded_pool(Al, lctx, A, ctx,
1337 perm, shift, stride, handles + 0, m);
1338
1339 thread_pool_wait(global_thread_pool, handles[m]);
1340 }
1341 else
1342 {
1343 fmpz_mpoly_to_mpoly_perm_deflate_threaded_pool(Al, lctx, A, ctx,
1344 perm, shift, stride, NULL, 0);
1345 fmpz_mpoly_to_mpoly_perm_deflate_threaded_pool(Bl, lctx, B, ctx,
1346 perm, shift, stride, NULL, 0);
1347 }
1348
1349 /* calculate gcd */
1350 success = fmpz_mpolyl_gcd_brown_threaded_pool(Gl, Abarl, Bbarl, Al, Bl,
1351 lctx, NULL, handles, num_handles);
1352
1353 flint_give_back_threads(handles, num_handles);
1354
1355 if (success)
1356 {
1357 fmpz_mpoly_from_mpoly_perm_inflate(G, ABbits, ctx, Gl, lctx,
1358 perm, shift, stride);
1359 if (fmpz_sgn(G->coeffs + 0) < 0)
1360 fmpz_mpoly_neg(G, G, ctx);
1361 }
1362
1363 fmpz_mpoly_clear(Al, lctx);
1364 fmpz_mpoly_clear(Bl, lctx);
1365 fmpz_mpoly_clear(Gl, lctx);
1366 fmpz_mpoly_clear(Abarl, lctx);
1367 fmpz_mpoly_clear(Bbarl, lctx);
1368 fmpz_mpoly_ctx_clear(lctx);
1369
1370 cleanup1:
1371
1372 flint_free(perm);
1373 flint_free(shift);
1374 flint_free(stride);
1375
1376 return success;
1377 }
1378
1379
1380