1 /*
2     Copyright (C) 2018 Daniel Schultz
3 
4     This file is part of FLINT.
5 
6     FLINT is free software: you can redistribute it and/or modify it under
7     the terms of the GNU Lesser General Public License (LGPL) as published
8     by the Free Software Foundation; either version 2.1 of the License, or
9     (at your option) any later version.  See <http://www.gnu.org/licenses/>.
10 */
11 
12 #include "nmod_mpoly.h"
13 #include "fmpz_mpoly.h"
14 
15 
fmpz_mpolyu_is_canonical(const fmpz_mpolyu_t A,const fmpz_mpoly_ctx_t ctx)16 int fmpz_mpolyu_is_canonical(const fmpz_mpolyu_t A, const fmpz_mpoly_ctx_t ctx)
17 {
18     slong i;
19 
20     if (A->length > A->alloc)
21     {
22         return 0;
23     }
24 
25     for (i = 0; i < A->length; i++)
26     {
27         if (   fmpz_mpoly_is_zero(A->coeffs + i, ctx)
28             || !fmpz_mpoly_is_canonical(A->coeffs + i, ctx))
29         {
30             return 0;
31         }
32 
33         if (i > 0 && A->exps[i - 1] <= A->exps[i])
34         {
35             return 0;
36         }
37     }
38 
39     return 1;
40 }
41 
fmpz_mpolyu_init(fmpz_mpolyu_t A,flint_bitcnt_t bits,const fmpz_mpoly_ctx_t ctx)42 void fmpz_mpolyu_init(fmpz_mpolyu_t A, flint_bitcnt_t bits,
43                                                     const fmpz_mpoly_ctx_t ctx)
44 {
45     A->coeffs = NULL;
46     A->exps = NULL;
47     A->alloc = 0;
48     A->length = 0;
49     A->bits = bits;
50 }
51 
52 
fmpz_mpolyu_clear(fmpz_mpolyu_t A,const fmpz_mpoly_ctx_t uctx)53 void fmpz_mpolyu_clear(fmpz_mpolyu_t A, const fmpz_mpoly_ctx_t uctx)
54 {
55     slong i;
56     for (i = 0; i < A->alloc; i++)
57         fmpz_mpoly_clear(A->coeffs + i, uctx);
58     flint_free(A->coeffs);
59     flint_free(A->exps);
60 }
61 
62 
fmpz_mpolyu_swap(fmpz_mpolyu_t A,fmpz_mpolyu_t B,const fmpz_mpoly_ctx_t uctx)63 void fmpz_mpolyu_swap(fmpz_mpolyu_t A, fmpz_mpolyu_t B,
64                                                    const fmpz_mpoly_ctx_t uctx)
65 {
66    fmpz_mpolyu_struct t = *A;
67    *A = *B;
68    *B = t;
69 }
70 
fmpz_mpolyu_zero(fmpz_mpolyu_t A,const fmpz_mpoly_ctx_t uctx)71 void fmpz_mpolyu_zero(fmpz_mpolyu_t A, const fmpz_mpoly_ctx_t uctx)
72 {
73     slong i;
74     for (i = 0; i < A->alloc; i++) {
75         fmpz_mpoly_clear(A->coeffs + i, uctx);
76         fmpz_mpoly_init(A->coeffs + i, uctx);
77     }
78     A->length = 0;
79 }
80 
81 
82 
fmpz_mpolyu_print_pretty(const fmpz_mpolyu_t poly,const char ** x,const fmpz_mpoly_ctx_t ctx)83 void fmpz_mpolyu_print_pretty(
84     const fmpz_mpolyu_t poly,
85     const char ** x,
86     const fmpz_mpoly_ctx_t ctx)
87 {
88     slong i;
89     if (poly->length == 0)
90         flint_printf("0");
91     for (i = 0; i < poly->length; i++)
92     {
93         if (i != 0)
94             flint_printf(" + ");
95         flint_printf("(");
96         fmpz_mpoly_print_pretty(poly->coeffs + i, x, ctx);
97         flint_printf(")*X^%wd", poly->exps[i]);
98     }
99 }
100 
fmpz_mpolyuu_print_pretty(const fmpz_mpolyu_t poly,const char ** x,slong nmainvars,const fmpz_mpoly_ctx_t ctx)101 void fmpz_mpolyuu_print_pretty(
102     const fmpz_mpolyu_t poly,
103     const char ** x,
104     slong nmainvars,
105     const fmpz_mpoly_ctx_t ctx)
106 {
107     slong i, j;
108     ulong mask = (-UWORD(1)) >> (FLINT_BITS - FLINT_BITS/nmainvars);
109 
110     if (poly->length == 0)
111         flint_printf("0");
112 
113     for (i = 0; i < poly->length; i++)
114     {
115         if (i != 0)
116             flint_printf(" + ");
117         flint_printf("(");
118         fmpz_mpoly_print_pretty(poly->coeffs + i, x, ctx);
119         flint_printf(")");
120         for (j = nmainvars - 1; j >= 0; j--)
121         {
122             flint_printf("*X%wd^%wd", nmainvars - 1 - j,
123                     mask & (poly->exps[i] >> (FLINT_BITS/nmainvars*j)));
124         }
125     }
126 }
127 
128 
fmpz_mpolyu_fit_length(fmpz_mpolyu_t A,slong length,const fmpz_mpoly_ctx_t uctx)129 void fmpz_mpolyu_fit_length(fmpz_mpolyu_t A, slong length,
130                                                    const fmpz_mpoly_ctx_t uctx)
131 {
132     slong i;
133     slong old_alloc = A->alloc;
134     slong new_alloc = FLINT_MAX(length, 2*A->alloc);
135 
136     if (length > old_alloc)
137     {
138         if (old_alloc == 0)
139         {
140             A->exps = (ulong *) flint_malloc(new_alloc*sizeof(ulong));
141             A->coeffs = (fmpz_mpoly_struct *) flint_malloc(
142                                           new_alloc*sizeof(fmpz_mpoly_struct));
143         } else
144         {
145             A->exps = (ulong *) flint_realloc(A->exps,
146                                                       new_alloc*sizeof(ulong));
147             A->coeffs = (fmpz_mpoly_struct *) flint_realloc(A->coeffs,
148                                           new_alloc*sizeof(fmpz_mpoly_struct));
149         }
150 
151         for (i = old_alloc; i < new_alloc; i++)
152         {
153             fmpz_mpoly_init(A->coeffs + i, uctx);
154             fmpz_mpoly_fit_bits(A->coeffs + i, A->bits, uctx);
155             (A->coeffs + i)->bits = A->bits;
156         }
157         A->alloc = new_alloc;
158     }
159 }
160 
fmpz_mpolyu_one(fmpz_mpolyu_t A,const fmpz_mpoly_ctx_t uctx)161 void fmpz_mpolyu_one(fmpz_mpolyu_t A, const fmpz_mpoly_ctx_t uctx)
162 {
163     fmpz_mpolyu_fit_length(A, WORD(1), uctx);
164     A->exps[0] = UWORD(0);
165     fmpz_mpoly_one(A->coeffs + 0, uctx);
166     A->length = WORD(1);
167 }
168 
169 
fmpz_mpolyu_set(fmpz_mpolyu_t A,const fmpz_mpolyu_t B,const fmpz_mpoly_ctx_t uctx)170 void fmpz_mpolyu_set(fmpz_mpolyu_t A, const fmpz_mpolyu_t B,
171                                                    const fmpz_mpoly_ctx_t uctx)
172 {
173     slong i;
174     fmpz_mpoly_struct * Acoeff, * Bcoeff;
175     ulong * Aexp, * Bexp;
176     slong Alen, Blen;
177 
178     Alen = 0;
179     Blen = B->length;
180     fmpz_mpolyu_fit_length(A, Blen, uctx);
181     Acoeff = A->coeffs;
182     Bcoeff = B->coeffs;
183     Aexp = A->exps;
184     Bexp = B->exps;
185 
186     for (i = 0; i < Blen; i++)
187     {
188         fmpz_mpoly_set(Acoeff + Alen, Bcoeff + i, uctx);
189         Aexp[Alen++] = Bexp[i];
190     }
191     Alen = Blen;
192 
193     /* demote remaining coefficients */
194     for (i = Alen; i < A->length; i++)
195     {
196         fmpz_mpoly_clear(Acoeff + i, uctx);
197         fmpz_mpoly_init(Acoeff + i, uctx);
198     }
199     A->length = Alen;
200 }
201 
202 
203 /* if the coefficient doesn't exist, a new one is created (and set to zero) */
_fmpz_mpolyu_get_coeff(fmpz_mpolyu_t A,ulong pow,const fmpz_mpoly_ctx_t uctx)204 fmpz_mpoly_struct * _fmpz_mpolyu_get_coeff(
205     fmpz_mpolyu_t A,
206     ulong pow,
207     const fmpz_mpoly_ctx_t uctx)
208 {
209     slong i, j, a, b;
210     fmpz_mpoly_struct * xk;
211 
212     a = 0;
213     b = A->length;
214 
215     if (b == 0 || pow > A->exps[0])
216     {
217         i = 0;
218         goto create_new;
219     }
220 
221     if (pow == A->exps[b - 1])
222     {
223         return A->coeffs + b - 1;
224     }
225 
226 try_again:
227 
228     if (b - a < 8)
229     {
230         for (i = a; i < b && (A->exps[i] >= pow); i++)
231         {
232             if (A->exps[i] == pow)
233             {
234                 return A->coeffs + i;
235             }
236         }
237         goto create_new;
238     }
239 
240     i = a + (b - a)/2;
241     if (A->exps[i] == pow)
242     {
243         return A->coeffs + i;
244     }
245     else if (A->exps[i] > pow)
246     {
247         a = i;
248     }
249     else
250     {
251         b = i;
252     }
253     goto try_again;
254 
255 create_new: /* new at position i */
256 
257     fmpz_mpolyu_fit_length(A, A->length + 1, uctx);
258 
259     for (j = A->length; j > i; j--)
260     {
261         A->exps[j] = A->exps[j - 1];
262         fmpz_mpoly_swap(A->coeffs + j, A->coeffs + j - 1, uctx);
263     }
264 
265     A->length++;
266     A->exps[i] = pow;
267     xk = A->coeffs + i;
268     xk->length = 0;
269     FLINT_ASSERT(xk->bits == A->bits);
270 
271     return xk;
272 }
273 
274 
275 
276 
277 
278 
279 /*
280     Convert B to A using the variable permutation perm.
281     The uctx (m vars) should be the context of A.
282     The ctx (n vars) should be the context of B.
283 
284     operation on each term:
285 
286     for 0 <= k < m
287         l = perm[k]
288         Aexp[k] = (Bexp[l] - shift[l])/stride[l]
289 */
fmpz_mpoly_to_mpoly_perm_deflate_threaded_pool(fmpz_mpoly_t A,const fmpz_mpoly_ctx_t uctx,const fmpz_mpoly_t B,const fmpz_mpoly_ctx_t ctx,const slong * perm,const ulong * shift,const ulong * stride,const thread_pool_handle * handles,slong num_handles)290 void fmpz_mpoly_to_mpoly_perm_deflate_threaded_pool(
291     fmpz_mpoly_t A,
292     const fmpz_mpoly_ctx_t uctx,
293     const fmpz_mpoly_t B,
294     const fmpz_mpoly_ctx_t ctx,
295     const slong * perm,
296     const ulong * shift,
297     const ulong * stride,
298     const thread_pool_handle * handles,
299     slong num_handles)
300 {
301     slong j, k, l;
302     slong n = ctx->minfo->nvars;
303     slong m = uctx->minfo->nvars;
304     slong NA, NB;
305     ulong * uexps;
306     ulong * Bexps;
307     TMP_INIT;
308 
309     FLINT_ASSERT(A->bits <= FLINT_BITS);
310     FLINT_ASSERT(B->bits <= FLINT_BITS);
311     FLINT_ASSERT(m <= n);
312 
313     TMP_START;
314 
315     uexps = (ulong *) TMP_ALLOC(m*sizeof(ulong));
316     Bexps = (ulong *) TMP_ALLOC(n*sizeof(ulong));
317     NA = mpoly_words_per_exp(A->bits, uctx->minfo);
318     NB = mpoly_words_per_exp(B->bits, ctx->minfo);
319 
320     fmpz_mpoly_fit_length(A, B->length, uctx);
321     for (j = 0; j < B->length; j++)
322     {
323         mpoly_get_monomial_ui(Bexps, B->exps + NB*j, B->bits, ctx->minfo);
324         for (k = 0; k < m; k++)
325         {
326             l = perm[k];
327             FLINT_ASSERT(stride[l] != UWORD(0));
328             if (stride[l] > 1)
329             {
330                 FLINT_ASSERT(((Bexps[l] - shift[l]) % stride[l]) == UWORD(0));
331                 uexps[k] = (Bexps[l] - shift[l]) / stride[l];
332             }
333             else
334             {
335                 uexps[k] = Bexps[l] - shift[l];
336             }
337         }
338 
339         fmpz_set(A->coeffs + j, B->coeffs + j);
340         mpoly_set_monomial_ui(A->exps + NA*j, uexps, A->bits, uctx->minfo);
341     }
342     A->length = B->length;
343 
344     fmpz_mpoly_sort_terms(A, uctx);
345 
346     TMP_END;
347 }
348 
349 
350 
351 
352 
353 
354 typedef struct
355 {
356     volatile slong index;
357 #if HAVE_PTHREAD
358     pthread_mutex_t mutex;
359 #endif
360     slong length;
361     fmpz_mpoly_struct * coeffs;
362     const fmpz_mpoly_ctx_struct * ctx;
363 }
364 _sort_arg_struct;
365 
366 typedef _sort_arg_struct _sort_arg_t[1];
367 
368 
_worker_sort(void * varg)369 static void _worker_sort(void * varg)
370 {
371     _sort_arg_struct * arg = (_sort_arg_struct *) varg;
372     slong i;
373 
374 get_next_index:
375 
376 #if HAVE_PTHREAD
377     pthread_mutex_lock(&arg->mutex);
378 #endif
379     i = arg->index;
380     arg->index++;
381 #if HAVE_PTHREAD
382     pthread_mutex_unlock(&arg->mutex);
383 #endif
384 
385     if (i >= arg->length)
386         goto cleanup;
387 
388     fmpz_mpoly_sort_terms(arg->coeffs + i, arg->ctx);
389 
390     goto get_next_index;
391 
392 cleanup:
393 
394     return;
395 }
396 
397 
398 
399 typedef struct
400 {
401     fmpz_mpoly_t poly;
402     slong threadidx;
403 }
404 _arrayconvertu_base_elem_struct;
405 
406 
407 typedef struct
408 {
409     const fmpz_mpoly_ctx_struct * ctx, * uctx;
410     slong degbx;
411     const slong * perm;
412     const ulong * shift, * stride;
413     flint_bitcnt_t Abits;
414     const fmpz_mpoly_struct * B;
415     _arrayconvertu_base_elem_struct * array;
416     slong nthreads;
417 }
418 _arrayconvertu_base_struct;
419 
420 typedef _arrayconvertu_base_struct _arrayconvertu_base_t[1];
421 
422 
423 typedef struct
424 {
425     slong idx;
426     _arrayconvertu_base_struct * base;
427 }
428 _arrayconvertu_worker_arg_struct;
429 
430 
_arrayconvertu_worker(void * varg)431 void _arrayconvertu_worker(void * varg)
432 {
433     _arrayconvertu_worker_arg_struct * arg = (_arrayconvertu_worker_arg_struct *) varg;
434     _arrayconvertu_base_struct * base = arg->base;
435     const fmpz_mpoly_ctx_struct * uctx = base->uctx;
436     const fmpz_mpoly_ctx_struct * ctx = base->ctx;
437     const slong * perm = base->perm;
438     const ulong * shift = base->shift;
439     const ulong * stride = base->stride;
440     const ulong shiftx  = shift[perm[0]];
441     const ulong stridex = stride[perm[0]];
442     const fmpz_mpoly_struct * B = base->B;
443     ulong mask = (-UWORD(1)) >> (FLINT_BITS - B->bits);
444     slong xoffset, xshift;
445     slong j, k, l, arrayidx;
446     slong n = ctx->minfo->nvars;
447     slong m = uctx->minfo->nvars;
448     slong NA, NB;
449     ulong * uexps;
450     ulong * Bexps;
451     fmpz_mpoly_struct * Ac;
452     TMP_INIT;
453 
454     TMP_START;
455 
456     uexps = (ulong *) TMP_ALLOC((m + 1)*sizeof(ulong));
457     Bexps = (ulong *) TMP_ALLOC(n*sizeof(ulong));
458 
459     NA = mpoly_words_per_exp(base->Abits, uctx->minfo);
460     NB = mpoly_words_per_exp(B->bits, ctx->minfo);
461 
462     mpoly_gen_offset_shift_sp(&xoffset, &xshift, perm[0], B->bits, ctx->minfo);
463 
464     for (j = 0; j < B->length; j++)
465     {
466         ulong xexp = ((B->exps[NB*j + xoffset] >> xshift) & mask) - shiftx;
467         if (stridex != 1)
468         {
469             FLINT_ASSERT((xexp % stridex) == 0);
470             xexp /= stridex;
471         }
472         arrayidx = xexp;
473 
474         FLINT_ASSERT(arrayidx < base->degbx);
475         if (base->array[arrayidx].threadidx == arg->idx)
476         {
477             mpoly_get_monomial_ui(Bexps, B->exps + NB*j, B->bits, ctx->minfo);
478             for (k = 0; k < m + 1; k++)
479             {
480                 l = perm[k];
481                 if (stride[l] == 1)
482                 {
483                     uexps[k] = (Bexps[l] - shift[l]);
484                 }
485                 else
486                 {
487                     FLINT_ASSERT(stride[l] != 0);
488                     FLINT_ASSERT(((Bexps[l] - shift[l]) % stride[l]) == 0);
489                     uexps[k] = (Bexps[l] - shift[l]) / stride[l];
490                 }
491             }
492             Ac = base->array[arrayidx].poly;
493             fmpz_mpoly_fit_length(Ac, Ac->length + 1, uctx);
494             fmpz_set(Ac->coeffs + Ac->length, B->coeffs + j);
495             mpoly_set_monomial_ui(Ac->exps + NA*Ac->length, uexps + 1, base->Abits, uctx->minfo);
496             Ac->length++;
497         }
498     }
499 
500     /* sort all of ours */
501     for (arrayidx = base->degbx - 1; arrayidx >= 0; arrayidx--)
502     {
503         if (base->array[arrayidx].threadidx == arg->idx)
504         {
505             fmpz_mpoly_sort_terms(base->array[arrayidx].poly, uctx);
506         }
507     }
508 
509     TMP_END;
510 }
511 
512 
513 
514 /*
515     Convert B to A using the variable permutation perm.
516     The uctx (m vars) should be the context of the coefficients of A.
517     The ctx (n vars) should be the context of B.
518 
519     operation on each term:
520 
521     for 0 <= k < m + 1
522         l = perm[k]
523         Aexp[k] = (Bexp[l] - shift[l])/stride[l]
524 
525     the most significant main variable uses k = 0
526     the coefficients of A use variables k = 1 ... m
527     maxexps if it exists is supposed to be a degree bound on B
528 */
fmpz_mpoly_to_mpolyu_perm_deflate_threaded_pool(fmpz_mpolyu_t A,const fmpz_mpoly_ctx_t uctx,const fmpz_mpoly_t B,const fmpz_mpoly_ctx_t ctx,const slong * perm,const ulong * shift,const ulong * stride,const ulong * maxexps,const thread_pool_handle * handles,slong num_handles)529 void fmpz_mpoly_to_mpolyu_perm_deflate_threaded_pool(
530     fmpz_mpolyu_t A,
531     const fmpz_mpoly_ctx_t uctx,
532     const fmpz_mpoly_t B,
533     const fmpz_mpoly_ctx_t ctx,
534     const slong * perm,
535     const ulong * shift,
536     const ulong * stride,
537     const ulong * maxexps, /* nullptr is ok */
538     const thread_pool_handle * handles,
539     slong num_handles)
540 {
541     slong limit = 1000;
542     slong degbx;
543     slong i, j, k, l;
544     slong n = ctx->minfo->nvars;
545     slong m = uctx->minfo->nvars;
546     slong NA, NB;
547     ulong * uexps;
548     ulong * Bexps;
549     fmpz_mpoly_struct * Ac;
550     TMP_INIT;
551 
552     FLINT_ASSERT(A->bits <= FLINT_BITS);
553     FLINT_ASSERT(B->bits <= FLINT_BITS);
554     FLINT_ASSERT(m + 1 <= n);
555 
556     fmpz_mpolyu_zero(A, uctx);
557 
558     /* strict degree bounds on the result */
559     degbx = limit + 1;
560     if (maxexps != NULL)
561     {
562         degbx = (maxexps[perm[0]] - shift[perm[0]])/stride[perm[0]] + 1;
563     }
564 
565     if (degbx <= limit)
566     {
567         _arrayconvertu_base_t base;
568         _arrayconvertu_worker_arg_struct * args;
569 
570         base->ctx = ctx;
571         base->uctx = uctx;
572         base->degbx = degbx;
573         base->perm = perm;
574         base->shift = shift;
575         base->stride = stride;
576         base->Abits = A->bits;
577         base->B = B;
578         base->nthreads = num_handles + 1;
579         base->array = (_arrayconvertu_base_elem_struct *) flint_malloc(
580                                 degbx*sizeof(_arrayconvertu_base_elem_struct));
581         for (i = degbx - 1; i >= 0; i--)
582         {
583             base->array[i].threadidx = i % base->nthreads;
584             fmpz_mpoly_init3(base->array[i].poly, 0, A->bits, uctx);
585         }
586         args = (_arrayconvertu_worker_arg_struct *) flint_malloc(
587                      base->nthreads*sizeof(_arrayconvertu_worker_arg_struct));
588 
589         for (i = 0; i < num_handles; i++)
590         {
591             args[i].idx = i;
592             args[i].base = base;
593             thread_pool_wake(global_thread_pool, handles[i], 0,
594                                              _arrayconvertu_worker, &args[i]);
595         }
596         i = num_handles;
597         args[i].idx = i;
598         args[i].base = base;
599         _arrayconvertu_worker(&args[i]);
600         for (i = 0; i < num_handles; i++)
601         {
602             thread_pool_wait(global_thread_pool, handles[i]);
603         }
604 
605         A->length = 0;
606         for (i = degbx - 1; i >= 0; i--)
607         {
608             if (base->array[i].poly->length > 0)
609             {
610                 fmpz_mpolyu_fit_length(A, A->length + 1, uctx);
611                 A->exps[A->length] = i;
612                 fmpz_mpoly_swap(A->coeffs + A->length, base->array[i].poly, uctx);
613                 A->length++;
614             }
615             fmpz_mpoly_clear(base->array[i].poly, uctx);
616         }
617 
618         flint_free(base->array);
619         flint_free(args);
620     }
621     else
622     {
623         TMP_START;
624 
625         uexps = (ulong *) TMP_ALLOC((m + 2)*sizeof(ulong));
626         Bexps = (ulong *) TMP_ALLOC(n*sizeof(ulong));
627         NA = mpoly_words_per_exp(A->bits, uctx->minfo);
628         NB = mpoly_words_per_exp(B->bits, ctx->minfo);
629 
630         for (j = 0; j < B->length; j++)
631         {
632             mpoly_get_monomial_ui(Bexps, B->exps + NB*j, B->bits, ctx->minfo);
633             for (k = 0; k < m + 1; k++)
634             {
635                 l = perm[k];
636                 FLINT_ASSERT(stride[l] != UWORD(0));
637                 if (stride[l] > 1)
638                 {
639                     FLINT_ASSERT(((Bexps[l] - shift[l]) % stride[l]) == UWORD(0));
640                     uexps[k] = (Bexps[l] - shift[l]) / stride[l];
641                 }
642                 else
643                 {
644                     uexps[k] = Bexps[l] - shift[l];
645                 }
646             }
647             Ac = _fmpz_mpolyu_get_coeff(A, uexps[0], uctx);
648             FLINT_ASSERT(Ac->bits == A->bits);
649 
650             fmpz_mpoly_fit_length(Ac, Ac->length + 1, uctx);
651             fmpz_set(Ac->coeffs + Ac->length, B->coeffs + j);
652             mpoly_set_monomial_ui(Ac->exps + NA*Ac->length, uexps + 1, A->bits, uctx->minfo);
653             Ac->length++;
654         }
655 
656         if (num_handles > 0)
657         {
658             _sort_arg_t arg;
659 
660 #if HAVE_PTHREAD
661             pthread_mutex_init(&arg->mutex, NULL);
662 #endif
663 	    arg->index = 0;
664             arg->coeffs = A->coeffs;
665             arg->length = A->length;
666             arg->ctx = uctx;
667 
668             for (i = 0; i < num_handles; i++)
669             {
670                 thread_pool_wake(global_thread_pool, handles[i], 0, _worker_sort, arg);
671             }
672             _worker_sort(arg);
673             for (i = 0; i < num_handles; i++)
674             {
675                 thread_pool_wait(global_thread_pool, handles[i]);
676             }
677 
678 #if HAVE_PTHREAD
679             pthread_mutex_destroy(&arg->mutex);
680 #endif
681 	}
682         else
683         {
684             for (i = 0; i < A->length; i++)
685             {
686                 fmpz_mpoly_sort_terms(A->coeffs + i, uctx);
687             }
688         }
689 
690         TMP_END;
691     }
692 }
693 
694 
695 
696 /*
697     Convert B to A using the variable permutation vector perm.
698     This function inverts fmpz_mpoly_to_mpoly_perm_deflate.
699     A will be constructed with bits = Abits.
700 
701     operation on each term:
702 
703         for 0 <= l < n
704             Aexp[l] = shift[l]
705 
706         for 0 <= k < m
707             l = perm[k]
708             Aexp[l] += scale[l]*Bexp[k]
709 */
710 
fmpz_mpoly_from_mpoly_perm_inflate(fmpz_mpoly_t A,flint_bitcnt_t Abits,const fmpz_mpoly_ctx_t ctx,const fmpz_mpoly_t B,const fmpz_mpoly_ctx_t uctx,const slong * perm,const ulong * shift,const ulong * stride)711 void fmpz_mpoly_from_mpoly_perm_inflate(
712     fmpz_mpoly_t A,
713     flint_bitcnt_t Abits,
714     const fmpz_mpoly_ctx_t ctx,
715     const fmpz_mpoly_t B,
716     const fmpz_mpoly_ctx_t uctx,
717     const slong * perm,
718     const ulong * shift,
719     const ulong * stride)
720 {
721     slong n = ctx->minfo->nvars;
722     slong m = uctx->minfo->nvars;
723     slong j, k, l;
724     slong NA, NB;
725     fmpz * Acoeff;
726     ulong * Aexp;
727     slong Aalloc;
728     ulong * uexps;
729     ulong * Aexps;
730     TMP_INIT;
731 
732     FLINT_ASSERT(B->length > 0);
733     FLINT_ASSERT(Abits <= FLINT_BITS);
734     FLINT_ASSERT(B->bits <= FLINT_BITS);
735     FLINT_ASSERT(m <= n);
736     TMP_START;
737 
738     uexps = (ulong *) TMP_ALLOC(m*sizeof(ulong));
739     Aexps = (ulong *) TMP_ALLOC(n*sizeof(ulong));
740 
741     NA = mpoly_words_per_exp_sp(Abits, ctx->minfo);
742     NB = mpoly_words_per_exp_sp(B->bits, uctx->minfo);
743 
744     fmpz_mpoly_fit_bits(A, Abits, ctx);
745     A->bits = Abits;
746 
747     Acoeff = A->coeffs;
748     Aexp = A->exps;
749     Aalloc = A->alloc;
750     _fmpz_mpoly_fit_length(&Acoeff, &Aexp, &Aalloc, B->length, NA);
751     for (j = 0; j < B->length; j++)
752     {
753         fmpz_set(Acoeff + j, B->coeffs + j);
754         mpoly_get_monomial_ui(uexps, B->exps + NB*j, B->bits, uctx->minfo);
755         for (l = 0; l < n; l++)
756         {
757             Aexps[l] = shift[l];
758         }
759         for (k = 0; k < m; k++)
760         {
761             l = perm[k];
762             Aexps[l] += stride[l]*uexps[k];
763         }
764         mpoly_set_monomial_ui(Aexp + NA*j, Aexps, Abits, ctx->minfo);
765     }
766 
767     A->coeffs = Acoeff;
768     A->exps = Aexp;
769     A->alloc = Aalloc;
770     _fmpz_mpoly_set_length(A, B->length, ctx);
771 
772     fmpz_mpoly_sort_terms(A, ctx);
773     TMP_END;
774 }
775 
776 
777 
778 /*
779     Convert B to A using the variable permutation vector perm.
780     This function inverts fmpz_mpoly_to_mpolyu_perm_deflate.
781     A will be constructed with bits = Abits.
782 
783     operation on each term:
784 
785         for 0 <= l < n
786             Aexp[l] = shift[l]
787 
788         for 0 <= k < m + 1
789             l = perm[k]
790             Aexp[l] += scale[l]*Bexp[k]
791 */
fmpz_mpoly_from_mpolyu_perm_inflate(fmpz_mpoly_t A,flint_bitcnt_t Abits,const fmpz_mpoly_ctx_t ctx,const fmpz_mpolyu_t B,const fmpz_mpoly_ctx_t uctx,const slong * perm,const ulong * shift,const ulong * stride)792 void fmpz_mpoly_from_mpolyu_perm_inflate(
793     fmpz_mpoly_t A,
794     flint_bitcnt_t Abits,
795     const fmpz_mpoly_ctx_t ctx,
796     const fmpz_mpolyu_t B,
797     const fmpz_mpoly_ctx_t uctx,
798     const slong * perm,
799     const ulong * shift,
800     const ulong * stride)
801 {
802     slong n = ctx->minfo->nvars;
803     slong m = uctx->minfo->nvars;
804     slong i, j, k, l;
805     slong NA, NB;
806     slong Alen;
807     fmpz * Acoeff;
808     ulong * Aexp;
809     slong Aalloc;
810     ulong * uexps;
811     ulong * Aexps;
812     TMP_INIT;
813 
814     FLINT_ASSERT(B->length > 0);
815     FLINT_ASSERT(Abits <= FLINT_BITS);
816     FLINT_ASSERT(B->bits <= FLINT_BITS);
817     FLINT_ASSERT(m + 1 <= n);
818     TMP_START;
819 
820     uexps = (ulong *) TMP_ALLOC((m + 1)*sizeof(ulong));
821     Aexps = (ulong *) TMP_ALLOC(n*sizeof(ulong));
822 
823     NA = mpoly_words_per_exp(Abits, ctx->minfo);
824     NB = mpoly_words_per_exp(B->bits, uctx->minfo);
825 
826     fmpz_mpoly_fit_bits(A, Abits, ctx);
827     A->bits = Abits;
828 
829     Acoeff = A->coeffs;
830     Aexp = A->exps;
831     Aalloc = A->alloc;
832     Alen = 0;
833     for (i = 0; i < B->length; i++)
834     {
835         fmpz_mpoly_struct * Bc = B->coeffs + i;
836         _fmpz_mpoly_fit_length(&Acoeff, &Aexp, &Aalloc, Alen + Bc->length, NA);
837         FLINT_ASSERT(Bc->bits == B->bits);
838 
839         for (j = 0; j < Bc->length; j++)
840         {
841             fmpz_set(Acoeff + Alen + j, Bc->coeffs + j);
842             mpoly_get_monomial_ui(uexps + 1, Bc->exps + NB*j, Bc->bits, uctx->minfo);
843             uexps[0] = B->exps[i];
844             for (l = 0; l < n; l++)
845             {
846                 Aexps[l] = shift[l];
847             }
848             for (k = 0; k < m + 1; k++)
849             {
850                 l = perm[k];
851                 Aexps[l] += stride[l]*uexps[k];
852             }
853             mpoly_set_monomial_ui(Aexp + NA*(Alen + j), Aexps, Abits, ctx->minfo);
854         }
855         Alen += Bc->length;
856     }
857     A->coeffs = Acoeff;
858     A->exps = Aexp;
859     A->alloc = Aalloc;
860     _fmpz_mpoly_set_length(A, Alen, ctx);
861 
862     fmpz_mpoly_sort_terms(A, ctx);
863     TMP_END;
864 }
865 
866 
867 typedef struct
868 {
869     fmpz_mpoly_t poly;
870     slong threadidx;
871 }
872 _arrayconvertuu_base_elem_struct;
873 
874 
875 typedef struct
876 {
877     const fmpz_mpoly_ctx_struct * ctx, * uctx;
878     slong degbx, degby;
879     const slong * perm;
880     const ulong * shift, * stride;
881     flint_bitcnt_t Abits;
882     const fmpz_mpoly_struct * B;
883     _arrayconvertuu_base_elem_struct * array;
884     slong nthreads;
885 }
886 _arrayconvertuu_base_struct;
887 
888 typedef _arrayconvertuu_base_struct _arrayconvertuu_base_t[1];
889 
890 
891 typedef struct
892 {
893     slong idx;
894     _arrayconvertuu_base_struct * base;
895 }
896 _arrayconvertuu_worker_arg_struct;
897 
898 
_arrayconvertuu_worker(void * varg)899 void _arrayconvertuu_worker(void * varg)
900 {
901     _arrayconvertuu_worker_arg_struct * arg = (_arrayconvertuu_worker_arg_struct *) varg;
902     _arrayconvertuu_base_struct * base = arg->base;
903     const fmpz_mpoly_ctx_struct * uctx = base->uctx;
904     const fmpz_mpoly_ctx_struct * ctx = base->ctx;
905     const slong * perm = base->perm;
906     const ulong * shift = base->shift;
907     const ulong * stride = base->stride;
908     const ulong shiftx  = shift[perm[0]];
909     const ulong stridex = stride[perm[0]];
910     const ulong shifty  = shift[perm[1]];
911     const ulong stridey = stride[perm[1]];
912     const fmpz_mpoly_struct * B = base->B;
913     ulong mask = (-UWORD(1)) >> (FLINT_BITS - B->bits);
914     slong xoffset, xshift;
915     slong yoffset, yshift;
916     slong j, k, l, arrayidx;
917     slong n = ctx->minfo->nvars;
918     slong m = uctx->minfo->nvars;
919     slong NA, NB;
920     ulong * uexps;
921     ulong * Bexps;
922     fmpz_mpoly_struct * Ac;
923     TMP_INIT;
924 
925     TMP_START;
926 
927     uexps = (ulong *) TMP_ALLOC((m + 2)*sizeof(ulong));
928     Bexps = (ulong *) TMP_ALLOC(n*sizeof(ulong));
929 
930     NA = mpoly_words_per_exp(base->Abits, uctx->minfo);
931     NB = mpoly_words_per_exp(B->bits, ctx->minfo);
932 
933     mpoly_gen_offset_shift_sp(&xoffset, &xshift, perm[0], B->bits, ctx->minfo);
934     mpoly_gen_offset_shift_sp(&yoffset, &yshift, perm[1], B->bits, ctx->minfo);
935 
936     for (j = 0; j < B->length; j++)
937     {
938         ulong xexp = ((B->exps[NB*j + xoffset] >> xshift) & mask) - shiftx;
939         ulong yexp = ((B->exps[NB*j + yoffset] >> yshift) & mask) - shifty;
940         if (stridex != 1 || stridey != 1)
941         {
942             FLINT_ASSERT((xexp % stridex) == 0);
943             FLINT_ASSERT((yexp % stridey) == 0);
944             xexp /= stridex;
945             yexp /= stridey;
946         }
947         arrayidx = xexp*base->degby + yexp;
948 
949         FLINT_ASSERT(arrayidx < base->degbx*base->degby);
950         if (base->array[arrayidx].threadidx == arg->idx)
951         {
952             mpoly_get_monomial_ui(Bexps, B->exps + NB*j, B->bits, ctx->minfo);
953             for (k = 0; k < m + 2; k++)
954             {
955                 l = perm[k];
956                 if (stride[l] == 1)
957                 {
958                     uexps[k] = (Bexps[l] - shift[l]);
959                 }
960                 else
961                 {
962                     FLINT_ASSERT(stride[l] != 0);
963                     FLINT_ASSERT(((Bexps[l] - shift[l]) % stride[l]) == 0);
964                     uexps[k] = (Bexps[l] - shift[l]) / stride[l];
965                 }
966             }
967             Ac = base->array[arrayidx].poly;
968             fmpz_mpoly_fit_length(Ac, Ac->length + 1, uctx);
969             fmpz_set(Ac->coeffs + Ac->length, B->coeffs + j);
970             mpoly_set_monomial_ui(Ac->exps + NA*Ac->length, uexps + 2, base->Abits, uctx->minfo);
971             Ac->length++;
972         }
973     }
974 
975     /* sort all of ours */
976     for (arrayidx = base->degbx*base->degby - 1; arrayidx >= 0; arrayidx--)
977     {
978         if (base->array[arrayidx].threadidx == arg->idx)
979         {
980             fmpz_mpoly_sort_terms(base->array[arrayidx].poly, uctx);
981         }
982     }
983 
984     TMP_END;
985 }
986 
987 /*
988     Convert B to A using the variable permutation perm.
989     The uctx should be the context of the coefficients of A.
990     The ctx should be the context of B.
991 
992     operation on each term:
993 
994     for 0 <= k < m + 2
995         l = perm[k]
996         Aexp[k] = (Bexp[l] - shift[l])/stride[l]
997 
998     the most significant main variable uses Aexp[0]
999     the least significant main variable uses Aexp[1]
1000     the coefficients of A use variables Aexp[2], ..., Aexp[m + 1]
1001     maxexps if it exists is supposed to be a degree bound on B
1002 */
fmpz_mpoly_to_mpolyuu_perm_deflate_threaded_pool(fmpz_mpolyu_t A,const fmpz_mpoly_ctx_t uctx,const fmpz_mpoly_t B,const fmpz_mpoly_ctx_t ctx,const slong * perm,const ulong * shift,const ulong * stride,const ulong * maxexps,const thread_pool_handle * handles,slong num_handles)1003 void fmpz_mpoly_to_mpolyuu_perm_deflate_threaded_pool(
1004     fmpz_mpolyu_t A,
1005     const fmpz_mpoly_ctx_t uctx,
1006     const fmpz_mpoly_t B,
1007     const fmpz_mpoly_ctx_t ctx,
1008     const slong * perm,
1009     const ulong * shift,
1010     const ulong * stride,
1011     const ulong * maxexps, /* nullptr is ok */
1012     const thread_pool_handle * handles,
1013     slong num_handles)
1014 {
1015     slong limit = 1000; /* limit*limit should not overflow a slong */
1016     slong degbx, degby;
1017     slong i, j, k, l;
1018     slong n = ctx->minfo->nvars;
1019     slong m = uctx->minfo->nvars;
1020     slong NA, NB;
1021     ulong * uexps;
1022     ulong * Bexps;
1023     fmpz_mpoly_struct * Ac;
1024     TMP_INIT;
1025 
1026     FLINT_ASSERT(FLINT_BIT_COUNT(limit) < FLINT_BITS/2);
1027     FLINT_ASSERT(A->bits <= FLINT_BITS);
1028     FLINT_ASSERT(B->bits <= FLINT_BITS);
1029     FLINT_ASSERT(m + 2 <= n);
1030 
1031     fmpz_mpolyu_zero(A, uctx);
1032 
1033     /* strict degree bounds on the result */
1034     degbx = limit + 1;
1035     degby = limit + 1;
1036     if (maxexps != NULL)
1037     {
1038         degbx = (maxexps[perm[0]] - shift[perm[0]])/stride[perm[0]] + 1;
1039         degby = (maxexps[perm[1]] - shift[perm[1]])/stride[perm[1]] + 1;
1040     }
1041 
1042     if (degbx <= limit && degby <= limit && degbx*degby <= limit)
1043     {
1044         _arrayconvertuu_base_t base;
1045         _arrayconvertuu_worker_arg_struct * args;
1046 
1047         base->ctx = ctx;
1048         base->uctx = uctx;
1049         base->degbx = degbx;
1050         base->degby = degby;
1051         base->perm = perm;
1052         base->shift = shift;
1053         base->stride = stride;
1054         base->Abits = A->bits;
1055         base->B = B;
1056         base->nthreads = num_handles + 1;
1057         base->array = (_arrayconvertuu_base_elem_struct *) flint_malloc(
1058                          degbx*degby*sizeof(_arrayconvertuu_base_elem_struct));
1059         for (i = degbx*degby - 1; i >= 0; i--)
1060         {
1061             base->array[i].threadidx = i % base->nthreads;
1062             fmpz_mpoly_init3(base->array[i].poly, 0, A->bits, uctx);
1063         }
1064         args = (_arrayconvertuu_worker_arg_struct *) flint_malloc(
1065                      base->nthreads*sizeof(_arrayconvertuu_worker_arg_struct));
1066 
1067         for (i = 0; i < num_handles; i++)
1068         {
1069             args[i].idx = i;
1070             args[i].base = base;
1071             thread_pool_wake(global_thread_pool, handles[i], 0,
1072                                              _arrayconvertuu_worker, &args[i]);
1073         }
1074         i = num_handles;
1075         args[i].idx = i;
1076         args[i].base = base;
1077         _arrayconvertuu_worker(&args[i]);
1078         for (i = 0; i < num_handles; i++)
1079         {
1080             thread_pool_wait(global_thread_pool, handles[i]);
1081         }
1082 
1083         A->length = 0;
1084         for (i = degbx - 1; i >= 0; i--)
1085         for (j = degby - 1; j >= 0; j--)
1086         {
1087             slong off = i*degby + j;
1088             if (base->array[off].poly->length > 0)
1089             {
1090                 fmpz_mpolyu_fit_length(A, A->length + 1, uctx);
1091                 A->exps[A->length] = (i << (FLINT_BITS/2)) + j;
1092                 fmpz_mpoly_swap(A->coeffs + A->length, base->array[off].poly, uctx);
1093                 A->length++;
1094             }
1095             fmpz_mpoly_clear(base->array[off].poly, uctx);
1096         }
1097 
1098         flint_free(base->array);
1099         flint_free(args);
1100     }
1101     else
1102     {
1103         TMP_START;
1104 
1105         uexps = (ulong *) TMP_ALLOC((m + 2)*sizeof(ulong));
1106         Bexps = (ulong *) TMP_ALLOC(n*sizeof(ulong));
1107         NA = mpoly_words_per_exp(A->bits, uctx->minfo);
1108         NB = mpoly_words_per_exp(B->bits, ctx->minfo);
1109 
1110         for (j = 0; j < B->length; j++)
1111         {
1112             mpoly_get_monomial_ui(Bexps, B->exps + NB*j, B->bits, ctx->minfo);
1113             for (k = 0; k < m + 2; k++)
1114             {
1115                 l = perm[k];
1116                 if (stride[l] == 1)
1117                 {
1118                     uexps[k] = (Bexps[l] - shift[l]);
1119                 }
1120                 else
1121                 {
1122                     FLINT_ASSERT(stride[l] != 0);
1123                     FLINT_ASSERT(((Bexps[l] - shift[l]) % stride[l]) == 0);
1124                     uexps[k] = (Bexps[l] - shift[l]) / stride[l];
1125                 }
1126             }
1127             FLINT_ASSERT(FLINT_BIT_COUNT(uexps[0]) < FLINT_BITS/2);
1128             FLINT_ASSERT(FLINT_BIT_COUNT(uexps[1]) < FLINT_BITS/2);
1129             Ac = _fmpz_mpolyu_get_coeff(A, (uexps[0] << (FLINT_BITS/2)) + uexps[1], uctx);
1130             FLINT_ASSERT(Ac->bits == A->bits);
1131 
1132             fmpz_mpoly_fit_length(Ac, Ac->length + 1, uctx);
1133             fmpz_set(Ac->coeffs + Ac->length, B->coeffs + j);
1134             mpoly_set_monomial_ui(Ac->exps + NA*Ac->length, uexps + 2, A->bits, uctx->minfo);
1135             Ac->length++;
1136         }
1137 
1138         if (num_handles > 0)
1139         {
1140             _sort_arg_t arg;
1141 
1142 #if HAVE_PTHREAD
1143             pthread_mutex_init(&arg->mutex, NULL);
1144 #endif
1145             arg->index = 0;
1146             arg->coeffs = A->coeffs;
1147             arg->length = A->length;
1148             arg->ctx = uctx;
1149 
1150             for (i = 0; i < num_handles; i++)
1151             {
1152                 thread_pool_wake(global_thread_pool, handles[i], 0, _worker_sort, arg);
1153             }
1154             _worker_sort(arg);
1155             for (i = 0; i < num_handles; i++)
1156             {
1157                 thread_pool_wait(global_thread_pool, handles[i]);
1158             }
1159 
1160 #if HAVE_PTHREAD
1161             pthread_mutex_destroy(&arg->mutex);
1162 #endif
1163 	}
1164         else
1165         {
1166             for (i = 0; i < A->length; i++)
1167             {
1168                 fmpz_mpoly_sort_terms(A->coeffs + i, uctx);
1169             }
1170         }
1171 
1172         TMP_END;
1173     }
1174 }
1175 
1176 
1177 /*
1178     Convert B to A using the variable permutation vector perm.
1179     A must be constructed with bits = Abits.
1180 
1181     operation on each term:
1182 
1183         for 0 <= l < n
1184             Aexp[l] = shift[l]
1185 
1186         for 0 <= k < m + 2
1187             l = perm[k]
1188             Aexp[l] += scale[l]*Bexp[k]
1189 */
fmpz_mpoly_from_mpolyuu_perm_inflate(fmpz_mpoly_t A,flint_bitcnt_t Abits,const fmpz_mpoly_ctx_t ctx,const fmpz_mpolyu_t B,const fmpz_mpoly_ctx_t uctx,const slong * perm,const ulong * shift,const ulong * stride)1190 void fmpz_mpoly_from_mpolyuu_perm_inflate( /* only for 2 main vars */
1191     fmpz_mpoly_t A,
1192     flint_bitcnt_t Abits,
1193     const fmpz_mpoly_ctx_t ctx,
1194     const fmpz_mpolyu_t B,
1195     const fmpz_mpoly_ctx_t uctx,
1196     const slong * perm,
1197     const ulong * shift,
1198     const ulong * stride)
1199 {
1200     slong n = ctx->minfo->nvars;
1201     slong m = uctx->minfo->nvars;
1202     slong i, j, k, l;
1203     slong NA, NB;
1204     slong Alen;
1205     fmpz * Acoeff;
1206     ulong * Aexp;
1207     slong Aalloc;
1208     ulong * uexps;
1209     ulong * Aexps;
1210     TMP_INIT;
1211 
1212     FLINT_ASSERT(B->length > 0);
1213     FLINT_ASSERT(Abits <= FLINT_BITS);
1214     FLINT_ASSERT(B->bits <= FLINT_BITS);
1215     FLINT_ASSERT(m + 2 <= n);
1216     TMP_START;
1217 
1218     uexps = (ulong *) TMP_ALLOC((m + 2)*sizeof(ulong));
1219     Aexps = (ulong *) TMP_ALLOC(n*sizeof(ulong));
1220 
1221     NA = mpoly_words_per_exp(Abits, ctx->minfo);
1222     NB = mpoly_words_per_exp(B->bits, uctx->minfo);
1223 
1224     fmpz_mpoly_fit_bits(A, Abits, ctx);
1225     A->bits = Abits;
1226 
1227     Acoeff = A->coeffs;
1228     Aexp = A->exps;
1229     Aalloc = A->alloc;
1230     Alen = 0;
1231     for (i = 0; i < B->length; i++)
1232     {
1233         fmpz_mpoly_struct * Bc = B->coeffs + i;
1234         _fmpz_mpoly_fit_length(&Acoeff, &Aexp, &Aalloc, Alen + Bc->length, NA);
1235         FLINT_ASSERT(Bc->bits == B->bits);
1236 
1237         for (j = 0; j < Bc->length; j++)
1238         {
1239             fmpz_set(Acoeff + Alen + j, Bc->coeffs + j);
1240             mpoly_get_monomial_ui(uexps + 2, Bc->exps + NB*j, Bc->bits, uctx->minfo);
1241             uexps[0] = B->exps[i] >> (FLINT_BITS/2);
1242             uexps[1] = B->exps[i] & ((-UWORD(1)) >> (FLINT_BITS - FLINT_BITS/2));
1243             for (l = 0; l < n; l++)
1244             {
1245                 Aexps[l] = shift[l];
1246             }
1247             for (k = 0; k < m + 2; k++)
1248             {
1249                 l = perm[k];
1250                 Aexps[l] += stride[l]*uexps[k];
1251             }
1252             mpoly_set_monomial_ui(Aexp + NA*(Alen + j), Aexps, Abits, ctx->minfo);
1253         }
1254         Alen += Bc->length;
1255     }
1256     A->coeffs = Acoeff;
1257     A->exps = Aexp;
1258     A->alloc = Aalloc;
1259     _fmpz_mpoly_set_length(A, Alen, ctx);
1260 
1261     fmpz_mpoly_sort_terms(A, ctx);
1262     TMP_END;
1263 }
1264 
1265 
fmpz_mpolyu_fmpz_content(fmpz_t c,fmpz_mpolyu_t A,const fmpz_mpoly_ctx_t ctx)1266 void fmpz_mpolyu_fmpz_content(fmpz_t c, fmpz_mpolyu_t A,
1267                                                     const fmpz_mpoly_ctx_t ctx)
1268 {
1269     slong i, j;
1270 
1271     fmpz_zero(c);
1272     for (i = 0; i < A->length; i++)
1273     {
1274         for (j = 0; j < (A->coeffs + i)->length; j++)
1275         {
1276             fmpz_gcd(c, c, (A->coeffs + i)->coeffs + j);
1277             if (fmpz_is_one(c))
1278                 break;
1279         }
1280     }
1281 }
1282 
1283 
fmpz_mpolyu_mul_fmpz(fmpz_mpolyu_t A,fmpz_mpolyu_t B,fmpz_t c,const fmpz_mpoly_ctx_t ctx)1284 void fmpz_mpolyu_mul_fmpz(
1285     fmpz_mpolyu_t A,
1286     fmpz_mpolyu_t B,
1287     fmpz_t c,
1288     const fmpz_mpoly_ctx_t ctx)
1289 {
1290     slong i;
1291 
1292     FLINT_ASSERT(!fmpz_is_zero(c));
1293     FLINT_ASSERT(A->bits == B->bits);
1294     fmpz_mpolyu_fit_length(A, B->length, ctx);
1295 
1296     for (i = 0; i < B->length; i++)
1297     {
1298         A->exps[i] = B->exps[i];
1299         fmpz_mpoly_scalar_mul_fmpz(A->coeffs + i, B->coeffs + i, c, ctx);
1300         FLINT_ASSERT((A->coeffs + i)->bits == B->bits);
1301     }
1302     A->length = B->length;
1303 }
1304 
1305 
fmpz_mpolyu_divexact_fmpz(fmpz_mpolyu_t A,fmpz_mpolyu_t B,fmpz_t c,const fmpz_mpoly_ctx_t ctx)1306 void fmpz_mpolyu_divexact_fmpz(
1307     fmpz_mpolyu_t A,
1308     fmpz_mpolyu_t B,
1309     fmpz_t c,
1310     const fmpz_mpoly_ctx_t ctx)
1311 {
1312     slong i;
1313 
1314     FLINT_ASSERT(!fmpz_is_zero(c));
1315     FLINT_ASSERT(A->bits == B->bits);
1316     fmpz_mpolyu_fit_length(A, B->length, ctx);
1317 
1318     for (i = 0; i < B->length; i++)
1319     {
1320         A->exps[i] = B->exps[i];
1321         fmpz_mpoly_scalar_divexact_fmpz(A->coeffs + i, B->coeffs + i, c, ctx);
1322         FLINT_ASSERT((A->coeffs + i)->bits == B->bits);
1323     }
1324     A->length = B->length;
1325 }
1326 
1327 
1328 /*
1329     The bit counts of A, B and c must all agree for this division
1330     If saveB is zero, B may be clobbered by this operation.
1331 */
fmpz_mpolyu_divexact_mpoly(fmpz_mpolyu_t A,fmpz_mpolyu_t B,int saveB,fmpz_mpoly_t c,const fmpz_mpoly_ctx_t ctx)1332 void fmpz_mpolyu_divexact_mpoly(
1333     fmpz_mpolyu_t A,
1334     fmpz_mpolyu_t B, int saveB,
1335     fmpz_mpoly_t c,
1336     const fmpz_mpoly_ctx_t ctx)
1337 {
1338     slong i;
1339     slong len;
1340     slong N;
1341     flint_bitcnt_t exp_bits = B->bits;
1342     ulong * cmpmask;
1343     TMP_INIT;
1344 
1345     FLINT_ASSERT(exp_bits == A->bits);
1346     FLINT_ASSERT(exp_bits == B->bits);
1347     FLINT_ASSERT(exp_bits == c->bits);
1348 
1349     if (saveB == 0 && fmpz_mpoly_is_one(c, ctx))
1350     {
1351         fmpz_mpolyu_swap(A, B, ctx);
1352         return;
1353     }
1354 
1355     TMP_START;
1356 
1357     fmpz_mpolyu_fit_length(A, B->length, ctx);
1358 
1359     N = mpoly_words_per_exp(exp_bits, ctx->minfo);
1360     cmpmask = (ulong*) TMP_ALLOC(N*sizeof(ulong));
1361     mpoly_get_cmpmask(cmpmask, N, exp_bits, ctx->minfo);
1362 
1363     for (i = 0; i < B->length; i++)
1364     {
1365         fmpz_mpoly_struct * poly1 = A->coeffs + i;
1366         fmpz_mpoly_struct * poly2 = B->coeffs + i;
1367         fmpz_mpoly_struct * poly3 = c;
1368         A->exps[i] = B->exps[i];
1369 
1370         fmpz_mpoly_fit_length(poly1, poly2->length/poly3->length + 1, ctx);
1371         fmpz_mpoly_fit_bits(poly1, exp_bits, ctx);
1372         poly1->bits = exp_bits;
1373 
1374         FLINT_ASSERT(poly2->length > 0);
1375 
1376         len = _fmpz_mpoly_divides_monagan_pearce(&poly1->coeffs, &poly1->exps,
1377                             &poly1->alloc, poly2->coeffs, poly2->exps, poly2->length,
1378                               poly3->coeffs, poly3->exps, poly3->length, exp_bits, N,
1379                                                   cmpmask);
1380         FLINT_ASSERT(len > 0);
1381         _fmpz_mpoly_set_length(poly1, len, ctx);
1382     }
1383     A->length = B->length;
1384 
1385     TMP_END;
1386 }
1387 
fmpz_mpolyu_divexact_mpoly_inplace(fmpz_mpolyu_t A,fmpz_mpoly_t c,const fmpz_mpoly_ctx_t ctx)1388 void fmpz_mpolyu_divexact_mpoly_inplace(fmpz_mpolyu_t A, fmpz_mpoly_t c,
1389                                                     const fmpz_mpoly_ctx_t ctx)
1390 {
1391     slong i, N, len;
1392     flint_bitcnt_t bits;
1393     ulong * cmpmask;
1394     fmpz_mpoly_t t;
1395     TMP_INIT;
1396 
1397     FLINT_ASSERT(c->length > 0);
1398 
1399     if (fmpz_mpoly_is_fmpz(c, ctx))
1400     {
1401         if (fmpz_is_one(c->coeffs + 0))
1402             return;
1403         for (i = 0; i < A->length; i++)
1404             _fmpz_vec_scalar_divexact_fmpz(A->coeffs[i].coeffs, A->coeffs[i].coeffs,
1405                                            A->coeffs[i].length, c->coeffs + 0);
1406         return;
1407     }
1408 
1409     bits = A->bits;
1410     FLINT_ASSERT(bits == c->bits);
1411 
1412     fmpz_mpoly_init3(t, 0, bits, ctx);
1413 
1414     N = mpoly_words_per_exp(bits, ctx->minfo);
1415 
1416     TMP_START;
1417 
1418     cmpmask = (ulong*) TMP_ALLOC(N*sizeof(ulong));
1419     mpoly_get_cmpmask(cmpmask, N, bits, ctx->minfo);
1420 
1421     for (i = A->length - 1; i >= 0; i--)
1422     {
1423         fmpz_mpoly_struct * poly1 = t;
1424         fmpz_mpoly_struct * poly2 = A->coeffs + i;
1425         fmpz_mpoly_struct * poly3 = c;
1426 
1427         FLINT_ASSERT(poly2->bits == bits);
1428 
1429         len = _fmpz_mpoly_divides_monagan_pearce(&poly1->coeffs, &poly1->exps,
1430                             &poly1->alloc, poly2->coeffs, poly2->exps, poly2->length,
1431                               poly3->coeffs, poly3->exps, poly3->length, bits, N,
1432                                                   cmpmask);
1433         FLINT_ASSERT(len > 0);
1434         poly1->length = len;
1435         fmpz_mpoly_swap(poly2, poly1, ctx);
1436     }
1437 
1438     TMP_END;
1439 
1440     fmpz_mpoly_clear(t, ctx);
1441 }
1442 
1443 
1444 
1445 /*
1446     The bit counts of A, B and c must all agree for this multiplication
1447 */
fmpz_mpolyu_mul_mpoly(fmpz_mpolyu_t A,fmpz_mpolyu_t B,fmpz_mpoly_t c,const fmpz_mpoly_ctx_t ctx)1448 void fmpz_mpolyu_mul_mpoly(fmpz_mpolyu_t A, fmpz_mpolyu_t B,
1449                                     fmpz_mpoly_t c, const fmpz_mpoly_ctx_t ctx)
1450 {
1451     slong i;
1452     slong len;
1453     slong N;
1454     flint_bitcnt_t exp_bits;
1455     ulong * cmpmask;
1456     TMP_INIT;
1457 
1458     TMP_START;
1459 
1460     exp_bits = B->bits;
1461     FLINT_ASSERT(A->bits == B->bits);
1462     FLINT_ASSERT(A->bits == c->bits);
1463 
1464     fmpz_mpolyu_fit_length(A, B->length, ctx);
1465 
1466     N = mpoly_words_per_exp(exp_bits, ctx->minfo);
1467     cmpmask = (ulong*) TMP_ALLOC(N*sizeof(ulong));
1468     mpoly_get_cmpmask(cmpmask, N, exp_bits, ctx->minfo);
1469 
1470     for (i = 0; i < B->length; i++)
1471     {
1472         fmpz_mpoly_struct * poly1 = A->coeffs + i;
1473         fmpz_mpoly_struct * poly2 = B->coeffs + i;
1474         fmpz_mpoly_struct * poly3 = c;
1475         A->exps[i] = B->exps[i];
1476 
1477         fmpz_mpoly_fit_length(poly1, poly2->length/poly3->length + 1, ctx);
1478         fmpz_mpoly_fit_bits(poly1, exp_bits, ctx);
1479         poly1->bits = exp_bits;
1480 
1481         len = _fmpz_mpoly_mul_johnson(&poly1->coeffs, &poly1->exps,
1482                             &poly1->alloc, poly2->coeffs, poly2->exps, poly2->length,
1483                               poly3->coeffs, poly3->exps, poly3->length, exp_bits, N,
1484                                                   cmpmask);
1485 
1486         _fmpz_mpoly_set_length(poly1, len, ctx);
1487 
1488     }
1489     A->length = B->length;
1490 
1491     TMP_END;
1492 }
1493 
fmpz_mpolyu_mul_mpoly_inplace(fmpz_mpolyu_t A,fmpz_mpoly_t c,const fmpz_mpoly_ctx_t ctx)1494 void fmpz_mpolyu_mul_mpoly_inplace(fmpz_mpolyu_t A, fmpz_mpoly_t c,
1495                                                     const fmpz_mpoly_ctx_t ctx)
1496 {
1497     slong i;
1498     slong len;
1499     slong N;
1500     flint_bitcnt_t bits;
1501     ulong * cmpmask;
1502     fmpz_mpoly_t t;
1503     TMP_INIT;
1504 
1505     FLINT_ASSERT(c->length > 0);
1506 
1507     if (fmpz_mpoly_is_fmpz(c, ctx))
1508     {
1509         if (fmpz_is_one(c->coeffs + 0))
1510             return;
1511         for (i = 0; i < A->length; i++)
1512             _fmpz_vec_scalar_mul_fmpz(A->coeffs[i].coeffs, A->coeffs[i].coeffs,
1513                                            A->coeffs[i].length, c->coeffs + 0);
1514         return;
1515     }
1516 
1517     bits = A->bits;
1518     FLINT_ASSERT(bits == c->bits);
1519 
1520     fmpz_mpoly_init3(t, 0, bits, ctx);
1521 
1522     N = mpoly_words_per_exp(bits, ctx->minfo);
1523 
1524     TMP_START;
1525 
1526     cmpmask = (ulong*) TMP_ALLOC(N*sizeof(ulong));
1527     mpoly_get_cmpmask(cmpmask, N, bits, ctx->minfo);
1528 
1529     for (i = A->length - 1; i >= 0; i--)
1530     {
1531         fmpz_mpoly_struct * poly1 = t;
1532         fmpz_mpoly_struct * poly2 = A->coeffs + i;
1533         fmpz_mpoly_struct * poly3 = c;
1534 
1535         FLINT_ASSERT(poly2->bits == bits);
1536 
1537         len = _fmpz_mpoly_mul_johnson(&poly1->coeffs, &poly1->exps,
1538                             &poly1->alloc, poly2->coeffs, poly2->exps, poly2->length,
1539                               poly3->coeffs, poly3->exps, poly3->length, bits, N,
1540                                                   cmpmask);
1541         FLINT_ASSERT(len > 0);
1542         poly1->length = len;
1543         fmpz_mpoly_swap(poly2, poly1, ctx);
1544     }
1545 
1546     TMP_END;
1547 
1548     fmpz_mpoly_clear(t, ctx);
1549 }
1550 
1551 
fmpz_mpolyu_shift_right(fmpz_mpolyu_t A,ulong s)1552 void fmpz_mpolyu_shift_right(fmpz_mpolyu_t A, ulong s)
1553 {
1554     slong i;
1555     for (i = 0; i < A->length; i++)
1556     {
1557         FLINT_ASSERT(A->exps[i] >= s);
1558         A->exps[i] -= s;
1559     }
1560 }
1561 
1562 
fmpz_mpolyu_shift_left(fmpz_mpolyu_t A,ulong s)1563 void fmpz_mpolyu_shift_left(fmpz_mpolyu_t A, ulong s)
1564 {
1565     slong i;
1566     for (i = 0; i < A->length; i++)
1567     {
1568         FLINT_ASSERT((slong)(A->exps[i] + s) >= 0);
1569         A->exps[i] += s;
1570     }
1571 }
1572 
1573 
fmpz_mpolyu_content_fmpz(fmpz_t g,const fmpz_mpolyu_t A,const fmpz_mpoly_ctx_t ctx)1574 void fmpz_mpolyu_content_fmpz(
1575     fmpz_t g,
1576     const fmpz_mpolyu_t A,
1577     const fmpz_mpoly_ctx_t ctx)
1578 {
1579     slong i, j;
1580 
1581     fmpz_zero(g);
1582     for (i = 0; i < A->length; i++)
1583     {
1584         fmpz_mpoly_struct * Ac = A->coeffs + i;
1585         for (j = 0; j < Ac->length; j++)
1586         {
1587             fmpz_gcd(g, g, Ac->coeffs + j);
1588             if (fmpz_is_one(g))
1589                 return;
1590         }
1591     }
1592 }
1593 
1594 
fmpz_mpolyu_content_mpoly_threaded_pool(fmpz_mpoly_t g,const fmpz_mpolyu_t A,const fmpz_mpoly_ctx_t ctx,const thread_pool_handle * handles,slong num_handles)1595 int fmpz_mpolyu_content_mpoly_threaded_pool(
1596     fmpz_mpoly_t g,
1597     const fmpz_mpolyu_t A,
1598     const fmpz_mpoly_ctx_t ctx,
1599     const thread_pool_handle * handles,
1600     slong num_handles)
1601 {
1602     slong i, j;
1603     int success;
1604     flint_bitcnt_t bits = A->bits;
1605 
1606     FLINT_ASSERT(g->bits == bits);
1607 
1608     if (A->length < 2)
1609     {
1610         if (A->length == 0)
1611         {
1612             fmpz_mpoly_zero(g, ctx);
1613         }
1614         else
1615         {
1616             fmpz_mpoly_set(g, A->coeffs + 0, ctx);
1617         }
1618 
1619         FLINT_ASSERT(g->bits == bits);
1620         return 1;
1621     }
1622 
1623     j = 0;
1624     for (i = 1; i < A->length; i++)
1625     {
1626         if ((A->coeffs + i)->length < (A->coeffs + j)->length)
1627         {
1628             j = i;
1629         }
1630     }
1631 
1632     if (j == 0)
1633     {
1634         j = 1;
1635     }
1636     success = _fmpz_mpoly_gcd_threaded_pool(g, bits, A->coeffs + 0,
1637                                      A->coeffs + j, ctx, handles, num_handles);
1638     if (!success)
1639     {
1640         return 0;
1641     }
1642 
1643     for (i = 1; i < A->length; i++)
1644     {
1645         if (i == j)
1646         {
1647             continue;
1648         }
1649         success = _fmpz_mpoly_gcd_threaded_pool(g, bits, g,
1650                                      A->coeffs + i, ctx, handles, num_handles);
1651         FLINT_ASSERT(g->bits == bits);
1652         if (!success)
1653         {
1654             return 0;
1655         }
1656     }
1657 
1658     return 1;
1659 }
1660 
1661