1 /*
2     Copyright (C) 2016 William Hart
3 
4     This file is part of FLINT.
5 
6     FLINT is free software: you can redistribute it and/or modify it under
7     the terms of the GNU Lesser General Public License (LGPL) as published
8     by the Free Software Foundation; either version 2.1 of the License, or
9     (at your option) any later version.  See <http://www.gnu.org/licenses/>.
10 */
11 
12 #include <gmp.h>
13 #include <stdlib.h>
14 #include "flint.h"
15 #include "fmpz.h"
16 #include "fmpz_mpoly.h"
17 
18 /* improve locality */
19 #define BLOCK 128
20 #define MAX_ARRAY_SIZE (WORD(300000))
21 
22 /*
23    Set polyq to the quotient and polyr to the remainder of poly2 divided
24    by poly3, and return the length of the quotient. The polynomials have
25    their exponents tightly packed, with mixed bases equal to the largest
26    exponent for each variable, e.g. the input polys have exponents of the
27    form a_0 + a_1*b1 + a_2*b_2*b_2 + .... where b_0, b_1, b_2, etc, are
28    the bases, which are equal to the largest possible exponent for
29    each of the respective variables in the exponent. The dividend poly3
30    is assumed to be nonzero. There are assumed to be "num" variables and
31    the bases b_i are passed in the array "mults". The function reallocates
32    its output. The quotient and remainder are written out in reverse order.
33    The quotient and remainder poly are not assumed to be zero on input.
34    The quotient and remainder terms are appended to the existing terms in
35    those polys.
36 */
_fmpz_mpoly_divrem_array_tight(slong * lenr,fmpz ** polyq,ulong ** expq,slong * allocq,slong len0,fmpz ** polyr,ulong ** expr,slong * allocr,slong len1,const fmpz * poly2,const ulong * exp2,slong len2,const fmpz * poly3,const ulong * exp3,slong len3,slong * mults,slong num)37 slong _fmpz_mpoly_divrem_array_tight(slong * lenr,
38  fmpz ** polyq, ulong ** expq, slong * allocq, slong len0,
39        fmpz ** polyr, ulong ** expr, slong * allocr, slong len1,
40                   const fmpz * poly2, const ulong * exp2, slong len2,
41                         const fmpz * poly3, const ulong * exp3, slong len3,
42                                                       slong * mults, slong num)
43 {
44    slong i, j, q, r, prod, bits1, bits2, bits3, k = len0, l = len1;
45    slong max3 = (slong) exp3[0]; /* largest exponent in poly3 */
46    slong * prods;
47    fmpz c3 = poly3[0];
48    /* abs val of leading coeff of poly3 */
49    ulong u3 = ((ulong) FLINT_ABS(c3)) >> 1;
50    fmpz * p1 = *polyq, * p2 = *polyr;
51    ulong * e1 = *expq, * e2 = *expr;
52    int small;
53    TMP_INIT;
54 
55    TMP_START;
56 
57    prods = (slong *) TMP_ALLOC((num + 1)*sizeof(slong));
58 
59    /*
60       compute products 1, b0, b0*b1, b0*b1*b2 ...
61       from list of bases b0, b1, b2, ...
62    */
63    prods[0] = 1;
64    for (i = 1; i <= num; i++)
65       prods[i] = mults[i - 1]*prods[i - 1];
66 
67    prod = prods[num];
68 
69    /* compute bound on poly2 - q*poly3 assuming quotient remains small */
70    bits2 = _fmpz_vec_max_bits(poly2, len2);
71    bits3 = _fmpz_vec_max_bits(poly3, len3);
72    /* we assume a bound of FLINT_BITS - 2 for coefficients of the quotient */
73    bits1 = FLINT_ABS(bits3) + FLINT_BITS + FLINT_BIT_COUNT(len3) - 2;
74 
75    small = FLINT_ABS(bits2) <= bits1 && FLINT_ABS(bits3) <= FLINT_BITS - 2;
76    bits1 += 2; /* incr. so poly2 - q*poly3 doesn't overflow and for sign */
77 
78    /* input coeffs small and intermediate computations fit two words */
79    if (small && bits1 <= 2*FLINT_BITS)
80    {
81       ulong * t2 = (ulong *) TMP_ALLOC(2*prod*sizeof(ulong));
82 
83       for (i = 0; i < 2*prod; i++)
84          t2[i] = 0;
85 
86       /* poly2 to array format */
87       _fmpz_mpoly_to_ulong_array2(t2, poly2, exp2, len2);
88 
89       /* for each term of poly2 array relevant to quotient */
90       for (i = prod - 1; i >= max3; i--)
91       {
92          ulong * ptr = t2 + 2*i;
93          ulong p[2];
94 
95          /* if coeff is nonzero */
96          if (ptr[0] != 0 || ptr[1] != 0)
97          {
98             if (0 > (slong) ptr[1])
99                mpn_neg(p, ptr, 2);
100             else
101                flint_mpn_copyi(p, ptr, 2);
102 
103             /* not exact quotient monomial, thus remainder monomial */
104             if (!mpoly_monomial_divides_tight(i, max3, prods, num))
105             {
106                /* realloc remainder poly */
107                _fmpz_mpoly_fit_length(&p2, &e2, allocr, l + 1, 1);
108 
109                /* set remainder coeff... */
110                fmpz_set_signed_uiui(p2 + l, ptr[1], ptr[0]);
111 
112                /* ...and exponent */
113                e2[l++] = i;
114             } else /* monomials can be divided exactly */
115             {
116                /* check quotient won't overflow a word */
117                if (u3 <= p[1] || (u3 == 0 && 0 > (slong) p[0])) /* quotient too large */
118                {
119                   for (j = len0; j < k; j++)
120                      _fmpz_demote(p1 + j);
121                   for (j = len1; j < l; j++)
122                      _fmpz_demote(p2 + j);
123                   k = len0;
124                   l = len1;
125 
126                   goto big;
127                }
128 
129                /* quotient and remainder of coeffs */
130                sdiv_qrnnd(q, r, ptr[1], ptr[0], c3);
131 
132                /* check coefficient is small, else restart with multiprec code */
133                if (COEFF_IS_MPZ(FLINT_ABS(q))) /* quotient too large */
134                {
135                   for (j = len0; j < k; j++)
136                      _fmpz_demote(p1 + j);
137                   for (j = len1; j < l; j++)
138                      _fmpz_demote(p2 + j);
139                   k = len0;
140                   l = len1;
141 
142                   goto big;
143                }
144 
145                /* check coeff quotient was exact */
146                if (r != 0) /* not an exact division */
147                {
148                   /* realloc remainder poly */
149                   _fmpz_mpoly_fit_length(&p2, &e2, allocr, l + 1, 1);
150 
151                   /* set remainder coeff... */
152                   fmpz_set_si(p2 + l, (slong) r);
153 
154                   /* ... and exponent */
155                   e2[l++] = i;
156                }
157 
158                if (q != 0)
159                {
160                   /* submul a - q*b */
161                   _fmpz_mpoly_submul_array1_slong2_1(t2, q, i - max3,
162                                                             poly3, exp3, len3);
163 
164                   /* realloc quotient poly */
165                   _fmpz_mpoly_fit_length(&p1, &e1, allocq, k + 1, 1);
166                   /* set quotient coeff and exponent */
167                   fmpz_set_si(p1 + k, q);
168                   e1[k++] = i - max3;
169                }
170             }
171          }
172       }
173 
174       /* all remaining terms are remainder terms */
175       for ( ; i >= 0; i--)
176       {
177          ulong * ptr = t2 + 2*i;
178 
179          /* if coeff nonzero */
180          if (ptr[0] != 0 || ptr[1] != 0)  /* not an exact division */
181          {
182             /* realloc remainder poly */
183             _fmpz_mpoly_fit_length(&p2, &e2, allocr, l + 1, 1);
184 
185             /* set remainder coeff... */
186             fmpz_set_signed_uiui(p2 + l, ptr[1], ptr[0]);
187 
188             /* and exponent */
189             e2[l++] = i;
190          }
191       }
192    }
193 
194    /* not done, coeffs small and intermediate computations fit three words */
195    if (k == len0 && l == len1 && small)
196    {
197       ulong * t2 = (ulong *) TMP_ALLOC(3*prod*sizeof(ulong));
198 
199       for (i = 0; i < 3*prod; i++)
200          t2[i] = 0;
201 
202       /* poly2 to array format */
203       _fmpz_mpoly_to_ulong_array(t2, poly2, exp2, len2);
204 
205       /* for each term of poly2 array relevant to exact quotient */
206       for (i = prod - 1; i >= max3; i--)
207       {
208          ulong * ptr = t2 + 3*i;
209          ulong p[3];
210 
211          /* if coeff is nonzero */
212          if (ptr[0] != 0 || ptr[1] != 0 || ptr[2] != 0)
213          {
214             if (0 > (slong) ptr[2])
215                mpn_neg(p, ptr, 3);
216             else
217                flint_mpn_copyi(p, ptr, 3);
218 
219             /* not exact quotient monomial, thus remainder monomial */
220             if (!mpoly_monomial_divides_tight(i, max3, prods, num))
221             {
222                /* realloc remainder poly */
223                _fmpz_mpoly_fit_length(&p2, &e2, allocr, l + 1, 1);
224 
225                /* set remainder coeff... */
226                fmpz_set_signed_uiuiui(p2 + l, ptr[2], ptr[1], ptr[0]);
227 
228                /* ... and exponent */
229                e2[l++] = i;
230             } else /* monomials can be divided exact */
231             {
232                /* check quotient won't overflow a word */
233                if (p[2] > 0 || u3 <= p[1] || (u3 == 0 && 0 > (slong) p[0])) /* quotient too large */
234                {
235                   for (j = len0; j < k; j++)
236                      _fmpz_demote(p1 + j);
237                   for (j = len1; j < l; j++)
238                      _fmpz_demote(p2 + j);
239                   k = len0;
240                   l = len1;
241 
242                   goto big;
243                }
244 
245                /* quotient and remainder of coeffs */
246                sdiv_qrnnd(q, r, ptr[1], ptr[0], c3);
247 
248                /* check coefficient is small, else restart with multiprec code */
249                if (COEFF_IS_MPZ(FLINT_ABS(q))) /* quotient too large */
250                {
251                   for (j = len0; j < k; j++)
252                      _fmpz_demote(p1 + j);
253                   for (j = len1; j < l; j++)
254                      _fmpz_demote(p2 + j);
255                   k = len0;
256                   l = len1;
257 
258                   goto big;
259                }
260 
261                /* check if coeff quotient was exact */
262                if (r != 0) /* else remainder term */
263                {
264                   /* reallocate remainder poly */
265                   _fmpz_mpoly_fit_length(&p2, &e2, allocr, l + 1, 1);
266 
267                   /* set remainder coeff... */
268                   fmpz_set_si(p2 + l, (slong) r);
269 
270                   /* and exponent */
271                   e2[l++] = i;
272                }
273 
274                /* if nonzero quotient */
275                if (q != 0)
276                {
277                   /* submul a - q*b */
278                   _fmpz_mpoly_submul_array1_slong_1(t2, q, i - max3,
279                                                             poly3, exp3, len3);
280 
281                   /* realloc quotient poly */
282                   _fmpz_mpoly_fit_length(&p1, &e1, allocq, k + 1, 1);
283 
284                   /* set quotient coeff and exponent */
285                   fmpz_set_si(p1 + k, q);
286 
287                   e1[k++] = i - max3;
288                }
289             }
290          }
291       }
292 
293       /* all remaining terms are remainder terms */
294       for ( ; i >= 0; i--)
295       {
296          ulong * ptr = t2 + 3*i;
297 
298          /* if coeff nonzero */
299          if (ptr[0] != 0 || ptr[1] != 0 || ptr[2] != 0)
300          {
301             /* not an exact division */
302 
303             /* realloc remainder poly */
304             _fmpz_mpoly_fit_length(&p2, &e2, allocr, l + 1, 1);
305 
306             /* set remainder coeff... */
307             fmpz_set_signed_uiuiui(p2 + l, ptr[2], ptr[1], ptr[0]);
308 
309             /* ...and exponent */
310             e2[l++] = i;
311          }
312       }
313    }
314 
315 big:
316 
317    /* if still not done, use multiprecision coeffs instead */
318    if (k == len0 && l == len1)
319    {
320       fmpz * t2 = (fmpz *) TMP_ALLOC(prod*sizeof(fmpz));
321       fmpz_t fq, fr;
322 
323       fmpz_init(fq);
324       fmpz_init(fr);
325 
326       for (i = 0; i < prod; i++)
327          fmpz_init(t2 + i);
328 
329       /* poly2 to array format */
330       _fmpz_mpoly_to_fmpz_array(t2, poly2, exp2, len2);
331 
332       /* for each term of poly2 array relevant to exact quotient */
333       for (i = prod - 1; i >= max3; i--)
334       {
335          /* if coeff is nonzero */
336          if (!fmpz_is_zero(t2 + i))
337          {
338             /* not exact quotient monomial, thus remainder monomial */
339             if (!mpoly_monomial_divides_tight(i, max3, prods, num))
340             {
341                /* realloc remainder poly */
342                _fmpz_mpoly_fit_length(&p2, &e2, allocr, l + 1, 1);
343 
344                /* set remainder coeff... */
345                fmpz_set(p2 + l, t2 + i);
346 
347                /* ... and exponent */
348                e2[l++] = i;
349             } else /* monomials can be divided exactly */
350             {
351                /* quotient and remainder of coeffs */
352                fmpz_fdiv_qr(fq, fr, t2 + i, poly3);
353 
354                /* check if coeff quotient was exact */
355                if (!fmpz_is_zero(fr)) /* else remainder term */
356                {
357                   /* realloc remainder poly */
358                   _fmpz_mpoly_fit_length(&p2, &e2, allocr, l + 1, 1);
359 
360                   /* set remainder coeff... */
361                   fmpz_set(p2 + l, fr);
362 
363                   /* and exponent */
364                   e2[l++] = i;
365                }
366 
367                /* if nonzero quotient */
368                if (!fmpz_is_zero(fq))
369                {
370                   /* submul a - q*b */
371                   _fmpz_mpoly_submul_array1_fmpz_1(t2, fq, i - max3,
372                                                             poly3, exp3, len3);
373 
374                   /* realloc quotient poly */
375                   _fmpz_mpoly_fit_length(&p1, &e1, allocq, k + 1, 1);
376 
377                   /* set quotient coeff and exponent */
378                   fmpz_set(p1 + k, fq);
379                   e1[k++] = i - max3;
380                }
381             }
382          }
383       }
384 
385       /* all remaining terms are remainder terms */
386       for ( ; i >= 0; i--)
387       {
388          /* if coeff nonzero */
389          if (!fmpz_is_zero(t2 + i))
390          {
391             /* remainder */
392 
393             /* realloc remainder poly */
394             _fmpz_mpoly_fit_length(&p2, &e2, allocr, l + 1, 1);
395 
396             /* set remainder coeff... */
397             fmpz_set(p2 + l, t2 + i);
398 
399             /* ... and exponent */
400             e2[l++] = i;
401          }
402       }
403 
404       fmpz_clear(fq);
405       fmpz_clear(fr);
406 
407       for (i = 0; i < prod; i++)
408          fmpz_clear(t2 + i);
409    }
410 
411    (*polyq) = p1;
412    (*expq) = e1;
413    (*polyr) = p2;
414    (*expr) = e2;
415 
416    /* set remainder poly length */
417    (*lenr) = l - len1;
418 
419    TMP_END;
420 
421    /* return quotient poly length */
422    return k - len0;
423 }
424 
425 /*
426    Use dense array division to set polyq, polyr to poly2/poly3 in num + 1
427    variables, given a list of multipliers to tightly pack exponents and a
428    number of bits for the fields of the exponents of the result, assuming
429    no aliasing. classical exact division in main variable, array
430    multiplication (submul) for multivariate coefficients in remaining num
431    variables. The function reallocates its output and returns the length
432    of the quotient poly. It is assumed that poly2 is not zero. The
433    quotient and remainder are written in reverse order.
434 */
_fmpz_mpoly_divrem_array_chunked(slong * lenr,fmpz ** polyq,ulong ** expq,slong * allocq,fmpz ** polyr,ulong ** expr,slong * allocr,const fmpz * poly2,const ulong * exp2,slong len2,const fmpz * poly3,const ulong * exp3,slong len3,slong * mults,slong num,slong bits)435 slong _fmpz_mpoly_divrem_array_chunked(slong * lenr,
436             fmpz ** polyq, ulong ** expq, slong * allocq,
437                  fmpz ** polyr, ulong ** expr, slong * allocr,
438                 const fmpz * poly2, const ulong * exp2, slong len2,
439         const fmpz * poly3, const ulong * exp3, slong len3, slong * mults,
440                                                          slong num, slong bits)
441 {
442    slong i, j, k, l = 0, m, prod, len = 0, l1, l2, l3;
443    slong bits1, bits2, bits3 = 0, tlen, talloc;
444    slong shift = bits*(num);
445    slong * i1, * i2, * i3, * n1, * n2, * n3, * prods;
446    slong * b1, * b3, * maxb1, * maxb3, * max_exp1, * max_exp3;
447    ulong * e2, * e3, * texp, * p2;
448    fmpz * temp;
449    int small;
450    TMP_INIT;
451 
452    TMP_START;
453 
454    /*
455       compute products 1, b0, b0*b1, b0*b1*b2 ...
456       from list of bases b0, b1, b2, ...
457    */
458    prods = (slong *) TMP_ALLOC((num + 1)*sizeof(slong));
459 
460    prods[0] = 1;
461    for (i = 0; i < num; i++)
462       prods[i + 1] = prods[i]*mults[i];
463    prod = prods[num];
464 
465    /* lengths of poly2, poly3 and polyq in chunks */
466    l2 = 1 + (slong) (exp2[0] >> shift);
467    l3 = 1 + (slong) (exp3[0] >> shift);
468 
469    l1 = FLINT_MAX(l2 - l3 + 1, 0);
470 
471    /* compute indices and lengths of coefficients of polys in main variable */
472 
473    i1 = (slong *) TMP_ALLOC(l1*sizeof(slong));
474    n1 = (slong *) TMP_ALLOC(l1*sizeof(slong));
475    i2 = (slong *) TMP_ALLOC(l2*sizeof(slong));
476    n2 = (slong *) TMP_ALLOC(l2*sizeof(slong));
477    i3 = (slong *) TMP_ALLOC(l3*sizeof(slong));
478    n3 = (slong *) TMP_ALLOC(l3*sizeof(slong));
479    b1 = (slong *) TMP_ALLOC(l1*sizeof(slong));
480    maxb1 = (slong *) TMP_ALLOC(l1*sizeof(slong));
481    b3 = (slong *) TMP_ALLOC(l3*sizeof(slong));
482    maxb3 = (slong *) TMP_ALLOC(l3*sizeof(slong));
483    max_exp1 = (slong *) TMP_ALLOC(l1*num*sizeof(slong));
484    max_exp3 = (slong *) TMP_ALLOC(l3*num*sizeof(slong));
485 
486    mpoly_main_variable_terms1(i2, n2, exp2, l2, len2, num + 1, num + 1, bits);
487    mpoly_main_variable_terms1(i3, n3, exp3, l3, len3, num + 1, num + 1, bits);
488 
489    /* work out max bits for each coeff and optimal bits */
490 
491    for (i = 0; i < l3; i++)
492    {
493       _fmpz_vec_sum_max_bits(&b3[i], &maxb3[i], poly3+i3[i], n3[i]);
494 
495       if (bits3 < maxb3[i])
496          bits3 = maxb3[i];
497    }
498 
499    /* pack input coefficients tightly */
500 
501    e2 = (ulong *) TMP_ALLOC(len2*sizeof(ulong));
502    e3 = (ulong *) TMP_ALLOC(len3*sizeof(ulong));
503 
504    mpoly_pack_monomials_tight(e2, exp2, len2, mults, num, bits);
505    mpoly_pack_monomials_tight(e3, exp3, len3, mults, num, bits);
506 
507    /* work out maximum exponents for each chunk */
508    for (i = 0; i < l3; i++)
509       mpoly_max_degrees_tight(max_exp3 + i*num, e3 + i3[i], n3[i], prods, num);
510 
511    /* bound poly2 coeffs and check input/output coeffs likely small */
512    bits2 = _fmpz_vec_max_bits(poly2, len2);
513    /* we assume a bound of FLINT_BITS - 2 for coefficients of the quotient */
514    bits1 = FLINT_ABS(bits3) + FLINT_BITS + FLINT_BIT_COUNT(len3) - 2;
515 
516    small = FLINT_ABS(bits2) <= bits1 && FLINT_ABS(bits3) <= FLINT_BITS - 2;
517 
518    /* alloc space for copy of coeff/chunk of poly2 */
519 
520    temp = (fmpz *) flint_calloc(n2[0] + 1, sizeof(fmpz));
521    texp = (ulong *) flint_malloc((n2[0] + 1)*sizeof(ulong));
522    talloc = n2[0] + 1; /* plus one so doubling always increases size */
523 
524    /* enough space for three words per coeff, even if only one or two needed */
525    p2 = (ulong *) TMP_ALLOC(3*prod*sizeof(ulong));
526 
527    /* coefficients likely to be small */
528    if (small)
529    {
530       /* for each chunk of poly2 */
531       for (i = 0; i < l2; i++)
532       {
533          slong num1 = 0;
534          bits1 = 0;
535 
536          /* if there are already quotient terms */
537          if (i != 0)
538          {
539             /* compute bound on intermediate computations a - q*b */
540             for (j = 0; j < i && j < l1; j++)
541             {
542                k = i - j;
543 
544                if (k < l3 && k >= 0)
545                {
546                   for (m = 0; m < num; m++)
547                   {
548                      if (max_exp1[j*num + m] + max_exp3[k*num + m] >= mults[m])
549                      {
550                         for (j = 0; j < len; j++)
551                            _fmpz_demote((*polyq) + j);
552                         for (j = 0; j < l; j++)
553                            _fmpz_demote((*polyr) + j);
554                         len = 0;
555                         l = 0;
556                         goto cleanup3;
557                      }
558                   }
559 
560                   bits1 = FLINT_MAX(bits1, FLINT_MIN(b1[j] + maxb3[k], maxb1[j] + b3[k]));
561                   num1++;
562                }
563             }
564 
565             bits1 += FLINT_BIT_COUNT(num1);
566             bits1 = FLINT_MAX(FLINT_ABS(bits2), bits1);
567 
568             bits1 += 2; /* bit for sign and so a - q*b doesn't overflow */
569          } else
570             bits1 = FLINT_ABS(bits2) + 1; /* extra bit for sign */
571 
572          /* intermediate computations fit in one word */
573          if (bits1 <= FLINT_BITS)
574          {
575             for (j = 0; j < prod; j++)
576                p2[j] = 0;
577 
578             /* convert relevant coeff/chunk of poly2 to array format */
579             _fmpz_mpoly_to_ulong_array1(p2, poly2 + i2[i], e2 + i2[i], n2[i]);
580 
581             /* submuls */
582 
583             for (j = 0; j < i && j < l1; j++)
584             {
585                k = i - j;
586 
587                if (k < l3 && k >= 0)
588                {
589                   _fmpz_mpoly_submul_array1_slong1(p2, (*polyq) + i1[j],
590                      (*expq) + i1[j], n1[j], poly3 + i3[k], e3 + i3[k], n3[k]);
591                }
592             }
593 
594             /* convert chunk from array format */
595             tlen = _fmpz_mpoly_from_ulong_array1(&temp, &texp, &talloc,
596                                                       p2, mults, num, bits, 0);
597          } else if (bits1 <= 2*FLINT_BITS) /* intermed comps fit two words */
598          {
599             for (j = 0; j < 2*prod; j++)
600                p2[j] = 0;
601 
602             /* convert relevant coeff/chunk of poly2 to array format */
603             _fmpz_mpoly_to_ulong_array2(p2, poly2 + i2[i], e2 + i2[i], n2[i]);
604 
605             /* submuls */
606 
607             for (j = 0; j < i && j < l1; j++)
608             {
609                k = i - j;
610 
611                if (k < l3 && k >= 0)
612                {
613                   _fmpz_mpoly_submul_array1_slong2(p2, (*polyq) + i1[j],
614                      (*expq) + i1[j], n1[j], poly3 + i3[k], e3 + i3[k], n3[k]);
615                }
616             }
617 
618             /* convert chunk from array format */
619             tlen = _fmpz_mpoly_from_ulong_array2(&temp, &texp, &talloc,
620                                                       p2, mults, num, bits, 0);
621          } else /* intermed comps fit three words */
622          {
623             for (j = 0; j < 3*prod; j++)
624                p2[j] = 0;
625 
626             /* convert relevant coeff/chunk of poly2 to array format */
627             _fmpz_mpoly_to_ulong_array(p2, poly2 + i2[i], e2 + i2[i], n2[i]);
628 
629             /* submuls */
630 
631             for (j = 0; j < i && j < l1; j++)
632             {
633                k = i - j;
634 
635                if (k < l3 && k >= 0)
636                   _fmpz_mpoly_submul_array1_slong(p2, (*polyq) + i1[j],
637                      (*expq) + i1[j], n1[j], poly3 + i3[k], e3 + i3[k], n3[k]);
638             }
639 
640             /* convert chunk from array format */
641             tlen = _fmpz_mpoly_from_ulong_array(&temp, &texp, &talloc,
642                                                       p2, mults, num, bits, 0);
643          }
644 
645          if (tlen != 0) /* nonzero coeff/chunk */
646          {
647             if (i < l1) /* potentially a quotient with remainder */
648             {
649                /* tightly pack chunk exponents */
650                mpoly_pack_monomials_tight(texp, texp, tlen, mults, num, bits);
651 
652                /* set starting index for quotient chunk we are about to compute */
653                i1[i] = len;
654 
655                /* compute quotient chunk and set length of quotient chunk */
656 	       n1[i] = _fmpz_mpoly_divrem_array_tight(lenr, polyq,
657                  expq, allocq, len, polyr, expr, allocr, l, temp, texp,
658                      tlen, poly3 + i3[0], e3 + i3[0], n3[0], mults, num);
659 
660 	       /* unpack remainder exponents */
661 	       mpoly_unpack_monomials_tight(*expr + l,
662 			                            *expr + l, *lenr, mults, num, bits);
663 
664 	       /* insert main variable */
665 	       for (j = 0; j < *lenr; j++)
666 	          (*expr)[l + j] += ((l2 - i - 1) << shift);
667 
668 	       /* work out maximum exponents for chunk */
669                mpoly_max_degrees_tight(max_exp1 + i*num,
670 	                                   (*expq) + i1[i], n1[i], prods, num);
671 
672 	       /* check there were no exponent overflows */
673 	       for (m = 0; m < num; m++)
674 	       {
675 	          if (max_exp3[m] + max_exp1[i*num + m] >= mults[m])
676                   {
677                      for (j = 0; j < len; j++)
678                         _fmpz_demote((*polyq) + j);
679                      for (j = 0; j < l; j++)
680                         _fmpz_demote((*polyr) + j);
681                      len = 0;
682 		     l = 0;
683 
684 	             goto cleanup3;
685                   }
686 	       }
687 
688                /* check the quotient didn't have large coefficients */
689                if (FLINT_ABS(_fmpz_vec_max_bits((*polyq) + len,
690                                                       n1[i])) > FLINT_BITS - 2)
691                {
692                   for (j = 0; j < len; j++)
693                      _fmpz_demote((*polyq) + j);
694                   for (j = 0; j < l; j++)
695                      _fmpz_demote((*polyr) + j);
696                   len = 0;
697                   l = 0;
698 
699                   goto big;
700                }
701 
702                /* abs bound and sum of abs vals of coeffs of quotient chunk */
703                _fmpz_vec_sum_max_bits(&b1[i], &maxb1[i], (*polyq)+i1[i], n1[i]);
704 
705                /* update length of output quotient and remainder polys */
706                len += n1[i];
707                l += *lenr;
708             } else /* remainder terms only */
709             {
710                /* realloc remainder poly */
711                _fmpz_mpoly_fit_length(polyr, expr, allocr, l + tlen, 1);
712 
713                /* for each term in remainder chunk */
714                for (j = 0; j < tlen; j++)
715                {
716                   /* set remainder coeff and exponent */
717                   fmpz_set(*polyr + l + j, temp + j);
718                   (*expr)[l + j] = (texp[j]) + ((l2 - i - 1) << shift);
719                }
720 
721                l += tlen;
722             }
723          } else if (i < l1) /* zero chunk, no quotient or remainder */
724          {
725             /* set index and length of quotient chunk */
726             i1[i] = len;
727             n1[i] = 0;
728             b1[i] = 0;
729             maxb1[i] = 0;
730 
731             /* write out maximum exponents in chunk */
732             mpoly_max_degrees_tight(max_exp1 + i*num,
733 	                                   (*expq) + i1[i], n1[i], prods, num);
734          }
735       }
736    }
737 
738 big:
739 
740    /* if not done, use multiprecision coeffs instead */
741    if (len == 0 && l == 0)
742    {
743       fmpz * p2 = (fmpz *) TMP_ALLOC(prod*sizeof(fmpz));
744 
745       for (j = 0; j < prod; j++)
746             fmpz_init(p2 + j);
747 
748       /* for each chunk of poly2 */
749       for (i = 0; i < l2; i++)
750       {
751          for (j = 0; j < prod; j++)
752             fmpz_zero(p2 + j);
753 
754          /* convert relevant coeff/chunk of poly2 to array format */
755          _fmpz_mpoly_to_fmpz_array(p2, poly2 + i2[i], e2 + i2[i], n2[i]);
756 
757          /* submuls */
758 
759          for (j = 0; j < i && j < l1; j++)
760          {
761             k = i - j;
762 
763             if (k < l3 && k >= 0)
764             {
765 	       for (m = 0; m < num; m++)
766 	       {
767 	          if (max_exp1[j*num + m] + max_exp3[k*num + m] >= mults[m])
768 	          {
769                      for (j = 0; j < len; j++)
770                         _fmpz_demote((*polyq) + j);
771                      for (j = 0; j < l; j++)
772                         _fmpz_demote((*polyr) + j);
773 		     len = 0;
774 	             l = 0;
775 
776                 for (j = 0; j < prod; j++)
777                    fmpz_clear(p2 + j);
778 		     goto cleanup3;
779 	          }
780 	       }
781 
782                _fmpz_mpoly_submul_array1_fmpz(p2, (*polyq) + i1[j],
783                     (*expq) + i1[j], n1[j], poly3 + i3[k], e3 + i3[k], n3[k]);
784 	    }
785          }
786 
787          /* convert chunk from array format */
788          tlen = _fmpz_mpoly_from_fmpz_array(&temp, &texp, &talloc,
789                                                       p2, mults, num, bits, 0);
790 
791          if (tlen != 0) /* nonzero coeff/chunk */
792          {
793             if (i < l1) /* potentially a quotient with remainder */
794             {
795                /* tightly pack chunk exponents */
796                mpoly_pack_monomials_tight(texp, texp, tlen, mults, num, bits);
797 
798                /* set start index of quotient chunk we are about to compute */
799                i1[i] = len;
800 
801                /* compute quotient chunk and set length of quotient chunk */
802                n1[i] = _fmpz_mpoly_divrem_array_tight(lenr, polyq,
803                       expq, allocq, len, polyr, expr, allocr, l, temp, texp,
804                            tlen, poly3 + i3[0], e3 + i3[0], n3[0], mults, num);
805 
806 	       /* unpack remainder exponents */
807 	       mpoly_unpack_monomials_tight(*expr + l,
808 			                *expr + l, *lenr, mults, num, bits);
809 
810 	       /* insert main variable */
811 	       for (j = 0; j < *lenr; j++)
812 			             (*expr)[l + j] += ((l2 - i - 1) << shift);
813 
814 	       /* work out maximum exponents for chunk */
815                mpoly_max_degrees_tight(max_exp1 + i*num,
816 	                                   (*expq) + i1[i], n1[i], prods, num);
817 
818 	       /* check there were no exponent overflows */
819 	       for (m = 0; m < num; m++)
820 	       {
821 	          if (max_exp3[m] + max_exp1[i*num + m] >= mults[m])
822                   {
823                      for (j = 0; j < len; j++)
824                         _fmpz_demote((*polyq) + j);
825                      for (j = 0; j < l; j++)
826                         _fmpz_demote((*polyr) + j);
827                      len = 0;
828 	             l = 0;
829 
830                 for (j = 0; j < prod; j++)
831                    fmpz_clear(p2 + j);
832 	             goto cleanup3;
833                   }
834 	       }
835 
836                /* abs bound and sum of abs vals of coeffs of quotient chunk */
837                _fmpz_vec_sum_max_bits(&b1[i], &maxb1[i], (*polyq)+i1[i], n1[i]);
838 
839                /* update length of output quotient and remainder polys */
840                len += n1[i];
841                l += *lenr;
842             } else /* remainder terms only */
843             {
844                /* realloc remainder poly */
845                _fmpz_mpoly_fit_length(polyr, expr, allocr, l + tlen, 1);
846 
847                /* for each term in chunk */
848                for (j = 0; j < tlen; j++)
849                {
850                   /* set remainder coeff and exponent */
851                   fmpz_set(*polyr + l + j, temp + j);
852                   (*expr)[l + j] = (texp[j]) + ((l2 - i - 1) << shift);
853                }
854 
855                /* update length of output remainder poly */
856                l += tlen;
857             }
858          } else if (i < l1) /* zero chunk, no quotient or remainder */
859          {
860             /* set index and length of quotient chunk */
861             i1[i] = len;
862             n1[i] = 0;
863             b1[i] = 0;
864             maxb1[i] = 0;
865 
866             /* write out maximum exponents in chunk */
867             mpoly_max_degrees_tight(max_exp1 + i*num,
868 	                                   (*expq) + i1[i], n1[i], prods, num);
869          }
870       }
871 
872       for (j = 0; j < prod; j++)
873             fmpz_clear(p2 + j);
874    }
875 
876    /* if there were quotient terms */
877    if (len != 0)
878    {
879       /* unpack monomials of quotient */
880       mpoly_unpack_monomials_tight((*expq), (*expq), len, mults, num, bits);
881 
882       /* put main variable back in quotient */
883       for (i = 0; i < l1; i++)
884       {
885          for (j = 0; j < n1[i]; j++)
886          {
887             (*expq)[i1[i] + j] += ((l1 - i - 1) << shift);
888          }
889       }
890    }
891 
892 cleanup3:
893 
894    for (j = 0; j < talloc; j++)
895       fmpz_clear(temp + j);
896 
897    flint_free(temp);
898    flint_free(texp);
899 
900    TMP_END;
901 
902    /* set remainder length */
903    *lenr = l;
904 
905    /* return quotient length */
906    return len;
907 }
908 
909 /*
910    Use dense array division to set polyq, polyr to poly2/poly3 in num variables,
911    given a list of multipliers to tightly pack exponents and a number of bits
912    for the fields of the exponents of the result, assuming no aliasing. The
913    array "mults" is a list of bases to be used in encoding the array indices
914    from the exponents. The function reallocates its output and returns the
915    length of the quotient. It is assumed that poly2 is not zero. The quotient
916    and remainder are written in reverse order.
917 */
_fmpz_mpoly_divrem_array(slong * lenr,fmpz ** polyq,ulong ** expq,slong * allocq,fmpz ** polyr,ulong ** expr,slong * allocr,const fmpz * poly2,const ulong * exp2,slong len2,const fmpz * poly3,const ulong * exp3,slong len3,slong * mults,slong num,slong bits)918 slong _fmpz_mpoly_divrem_array(slong * lenr,
919        fmpz ** polyq, ulong ** expq, slong * allocq,
920               fmpz ** polyr, ulong ** expr, slong * allocr,
921                 const fmpz * poly2, const ulong * exp2, slong len2,
922         const fmpz * poly3, const ulong * exp3, slong len3, slong * mults,
923                                                          slong num, slong bits)
924 {
925    slong i;
926    ulong * e2, * e3;
927    slong len, prod;
928    slong * prods, * max_exp1, * max_exp3;
929    TMP_INIT;
930 
931    TMP_START;
932 
933    /*
934       compute products 1, b0, b0*b1, b0*b1*b2 ...
935       from list of bases b0, b1, b2, ...
936    */
937    prods = (slong *) TMP_ALLOC((num + 1)*sizeof(slong));
938 
939    prods[0] = 1;
940    for (i = 0; i < num; i++)
941       prods[i + 1] = prods[i]*mults[i];
942    prod = prods[num];
943 
944    /* if array size will be too large, chunk the polynomials */
945    if (prod > MAX_ARRAY_SIZE)
946    {
947       TMP_END;
948 
949 	  return _fmpz_mpoly_divrem_array_chunked(lenr, polyq, expq, allocq,
950                                      polyr, expr, allocr, poly2, exp2, len2,
951                                       poly3, exp3, len3, mults, num - 1, bits);
952    }
953 
954    e2 = (ulong *) TMP_ALLOC(len2*sizeof(ulong));
955    e3 = (ulong *) TMP_ALLOC(len3*sizeof(ulong));
956    max_exp1 = (slong *) TMP_ALLOC(num*sizeof(slong));
957    max_exp3 = (slong *) TMP_ALLOC(num*sizeof(slong));
958 
959    /* pack input exponents tightly with mixed bases specified by "mults" */
960 
961    mpoly_pack_monomials_tight(e2, exp2, len2, mults, num, bits);
962    mpoly_pack_monomials_tight(e3, exp3, len3, mults, num, bits);
963 
964    /* do divrem on tightly packed polys */
965    len = _fmpz_mpoly_divrem_array_tight(lenr, polyq, expq, allocq, 0,
966                                   polyr, expr, allocr, 0, poly2, e2, len2,
967                                                   poly3, e3, len3, mults, num);
968 
969    /* check for overflows */
970    mpoly_max_degrees_tight(max_exp3, e3, len3, prods, num);
971    mpoly_max_degrees_tight(max_exp1, *expq, len, prods, num);
972 
973    for (i = 0; i < num; i++)
974    {
975       if (max_exp3[i] + max_exp1[i] >= mults[i])
976       {
977 	 len = 0;
978          *lenr = 0;
979 
980          break;
981       }
982    }
983    /* unpack output quotient and remainder exponents */
984    mpoly_unpack_monomials_tight((*expq), (*expq), len, mults, num, bits);
985    mpoly_unpack_monomials_tight((*expr), (*expr), *lenr, mults, num, bits);
986 
987    TMP_END;
988 
989    return len;
990 }
991 
992 /*
993    Return 1 if q, r can be set to quotient and remainder of poly2 by poly3,
994    else return 0 if array division not able to be performed.
995 */
fmpz_mpoly_divrem_array(fmpz_mpoly_t q,fmpz_mpoly_t r,const fmpz_mpoly_t poly2,const fmpz_mpoly_t poly3,const fmpz_mpoly_ctx_t ctx)996 int fmpz_mpoly_divrem_array(fmpz_mpoly_t q, fmpz_mpoly_t r,
997                     const fmpz_mpoly_t poly2, const fmpz_mpoly_t poly3,
998                                                     const fmpz_mpoly_ctx_t ctx)
999 {
1000    slong i, exp_bits, N, lenq = 0, lenr = 0, array_size;
1001    ulong * max_fields, * max_fields2, * max_fields3;
1002    ulong * exp2 = poly2->exps, * exp3 = poly3->exps;
1003    int free2 = 0, free3 = 0;
1004    fmpz_mpoly_t temp1, temp2;
1005    fmpz_mpoly_struct * tq, * tr;
1006    int res = 0;
1007 
1008    TMP_INIT;
1009 
1010    /* check divisor is not zero */
1011    if (poly3->length == 0)
1012       flint_throw(FLINT_DIVZERO, "Divide by zero in fmpz_mpoly_divrem_array");
1013 
1014    /* dividend is zero */
1015    if (poly2->length == 0)
1016    {
1017       fmpz_mpoly_zero(q, ctx);
1018       fmpz_mpoly_zero(r, ctx);
1019 
1020       return 1;
1021    }
1022 
1023    TMP_START;
1024 
1025 
1026    /* compute maximum exponents for each variable */
1027    max_fields = (ulong *) TMP_ALLOC(ctx->minfo->nfields*sizeof(ulong));
1028    max_fields2 = (ulong *) TMP_ALLOC(ctx->minfo->nfields*sizeof(ulong));
1029    max_fields3 = (ulong *) TMP_ALLOC(ctx->minfo->nfields*sizeof(ulong));
1030    mpoly_max_fields_ui_sp(max_fields2, poly2->exps, poly2->length,
1031                                                       poly2->bits, ctx->minfo);
1032    mpoly_max_fields_ui_sp(max_fields3, poly3->exps, poly3->length,
1033                                                       poly3->bits, ctx->minfo);
1034    for (i = 0; i < ctx->minfo->nfields; i++)
1035       max_fields[i] = FLINT_MAX(max_fields2[i], max_fields3[i]);
1036 
1037    /* compute number of bits required for output exponents */
1038    exp_bits = FLINT_MAX(poly2->bits, poly3->bits);
1039    N = mpoly_words_per_exp(exp_bits, ctx->minfo);
1040 
1041    /* array division expects each exponent vector in one word */
1042    /* current code is wrong for reversed orderings */
1043    if (N != 1 || mpoly_ordering_isrev(ctx->minfo))
1044       goto cleanup;
1045 
1046    /* compute bounds on output exps, used as mixed bases for packing exps */
1047    array_size = 1;
1048    for (i = 0; i < ctx->minfo->nfields - 1; i++)
1049    {
1050       max_fields2[i] = max_fields[i] + 1;
1051       array_size *= max_fields2[i];
1052    }
1053    max_fields2[ctx->minfo->nfields - 1] = max_fields[ctx->minfo->nfields - 1] + 1;
1054 
1055    /* if exponents too large for array multiplication, exit silently */
1056    if (array_size > MAX_ARRAY_SIZE)
1057       goto cleanup;
1058 
1059    /* expand input exponents to same number of bits as output */
1060    if (exp_bits > poly2->bits)
1061    {
1062       free2 = 1;
1063       exp2 = (ulong *) flint_malloc(N*poly2->length*sizeof(ulong));
1064       mpoly_repack_monomials(exp2, exp_bits, poly2->exps, poly2->bits,
1065                                                     poly2->length, ctx->minfo);
1066    }
1067 
1068    if (exp_bits > poly3->bits)
1069    {
1070       free3 = 1;
1071       exp3 = (ulong *) flint_malloc(N*poly3->length*sizeof(ulong));
1072       mpoly_repack_monomials(exp3, exp_bits, poly3->exps, poly3->bits,
1073                                                     poly3->length, ctx->minfo);
1074    }
1075 
1076    if (exp2[0] < exp3[0])
1077    {
1078       fmpz_mpoly_set(r, poly2, ctx);
1079       fmpz_mpoly_zero(q, ctx);
1080 
1081       res = 1;
1082 
1083       goto cleanup2;
1084    }
1085 
1086    /* handle aliasing and do array division */
1087 
1088    if (q == poly2 || q == poly3)
1089    {
1090       fmpz_mpoly_init2(temp1, poly2->length/poly3->length + 1, ctx);
1091       fmpz_mpoly_fit_bits(temp1, exp_bits, ctx);
1092       temp1->bits = exp_bits;
1093 
1094       tq = temp1;
1095    } else
1096    {
1097       fmpz_mpoly_fit_length(q, poly2->length/poly3->length + 1, ctx);
1098       fmpz_mpoly_fit_bits(q, exp_bits, ctx);
1099       q->bits = exp_bits;
1100 
1101       tq = q;
1102    }
1103 
1104    if (r == poly2 || r == poly3)
1105    {
1106       fmpz_mpoly_init2(temp2, poly3->length, ctx);
1107       fmpz_mpoly_fit_bits(temp2, exp_bits, ctx);
1108       temp2->bits = exp_bits;
1109 
1110       tr = temp2;
1111    } else
1112    {
1113       fmpz_mpoly_fit_length(r, poly3->length, ctx);
1114       fmpz_mpoly_fit_bits(r, exp_bits, ctx);
1115       r->bits = exp_bits;
1116 
1117       tr = r;
1118    }
1119 
1120    lenq = _fmpz_mpoly_divrem_array(&lenr, &tq->coeffs, &tq->exps,
1121         &tq->alloc, &tr->coeffs, &tr->exps, &tr->alloc, poly2->coeffs,
1122                   exp2, poly2->length, poly3->coeffs, exp3, poly3->length,
1123                          (slong *) max_fields2, ctx->minfo->nfields, exp_bits);
1124 
1125     res = (lenq != 0 || lenr != 0);
1126 
1127     if (res)
1128     {
1129         if (q == poly2 || q == poly3)
1130         {
1131             fmpz_mpoly_swap(temp1, q, ctx);
1132             fmpz_mpoly_clear(temp1, ctx);
1133         }
1134 
1135         if (r == poly2 || r == poly3)
1136         {
1137             fmpz_mpoly_swap(temp2, r, ctx);
1138             fmpz_mpoly_clear(temp2, ctx);
1139         }
1140     }
1141     else
1142     {
1143         if (q == poly2 || q == poly3)
1144         {
1145             fmpz_mpoly_clear(temp1, ctx);
1146         }
1147 
1148         if (r == poly2 || r == poly3)
1149         {
1150             fmpz_mpoly_clear(temp2, ctx);
1151         }
1152 
1153         for (i = q->length; i < q->alloc; i++)
1154         {
1155             _fmpz_demote(q->coeffs + i);
1156         }
1157         for (i = r->length; i < r->alloc; i++)
1158         {
1159             _fmpz_demote(r->coeffs + i);
1160         }
1161     }
1162 
1163     _fmpz_mpoly_set_length(q, lenq, ctx);
1164     _fmpz_mpoly_set_length(r, lenr, ctx);
1165 
1166 
1167 cleanup2:
1168 
1169    if (free2)
1170       flint_free(exp2);
1171 
1172    if (free3)
1173       flint_free(exp3);
1174 
1175 cleanup:
1176 
1177    TMP_END;
1178 
1179    return res;
1180 }
1181