1 /*
2 Copyright (C) 2016 William Hart
3
4 This file is part of FLINT.
5
6 FLINT is free software: you can redistribute it and/or modify it under
7 the terms of the GNU Lesser General Public License (LGPL) as published
8 by the Free Software Foundation; either version 2.1 of the License, or
9 (at your option) any later version. See <http://www.gnu.org/licenses/>.
10 */
11
12 #include <gmp.h>
13 #include <stdlib.h>
14 #include "flint.h"
15 #include "fmpz.h"
16 #include "fmpz_mpoly.h"
17
18 /* improve locality */
19 #define BLOCK 128
20 #define MAX_ARRAY_SIZE (WORD(300000))
21
22 /*
23 Set polyq to the quotient and polyr to the remainder of poly2 divided
24 by poly3, and return the length of the quotient. The polynomials have
25 their exponents tightly packed, with mixed bases equal to the largest
26 exponent for each variable, e.g. the input polys have exponents of the
27 form a_0 + a_1*b1 + a_2*b_2*b_2 + .... where b_0, b_1, b_2, etc, are
28 the bases, which are equal to the largest possible exponent for
29 each of the respective variables in the exponent. The dividend poly3
30 is assumed to be nonzero. There are assumed to be "num" variables and
31 the bases b_i are passed in the array "mults". The function reallocates
32 its output. The quotient and remainder are written out in reverse order.
33 The quotient and remainder poly are not assumed to be zero on input.
34 The quotient and remainder terms are appended to the existing terms in
35 those polys.
36 */
_fmpz_mpoly_divrem_array_tight(slong * lenr,fmpz ** polyq,ulong ** expq,slong * allocq,slong len0,fmpz ** polyr,ulong ** expr,slong * allocr,slong len1,const fmpz * poly2,const ulong * exp2,slong len2,const fmpz * poly3,const ulong * exp3,slong len3,slong * mults,slong num)37 slong _fmpz_mpoly_divrem_array_tight(slong * lenr,
38 fmpz ** polyq, ulong ** expq, slong * allocq, slong len0,
39 fmpz ** polyr, ulong ** expr, slong * allocr, slong len1,
40 const fmpz * poly2, const ulong * exp2, slong len2,
41 const fmpz * poly3, const ulong * exp3, slong len3,
42 slong * mults, slong num)
43 {
44 slong i, j, q, r, prod, bits1, bits2, bits3, k = len0, l = len1;
45 slong max3 = (slong) exp3[0]; /* largest exponent in poly3 */
46 slong * prods;
47 fmpz c3 = poly3[0];
48 /* abs val of leading coeff of poly3 */
49 ulong u3 = ((ulong) FLINT_ABS(c3)) >> 1;
50 fmpz * p1 = *polyq, * p2 = *polyr;
51 ulong * e1 = *expq, * e2 = *expr;
52 int small;
53 TMP_INIT;
54
55 TMP_START;
56
57 prods = (slong *) TMP_ALLOC((num + 1)*sizeof(slong));
58
59 /*
60 compute products 1, b0, b0*b1, b0*b1*b2 ...
61 from list of bases b0, b1, b2, ...
62 */
63 prods[0] = 1;
64 for (i = 1; i <= num; i++)
65 prods[i] = mults[i - 1]*prods[i - 1];
66
67 prod = prods[num];
68
69 /* compute bound on poly2 - q*poly3 assuming quotient remains small */
70 bits2 = _fmpz_vec_max_bits(poly2, len2);
71 bits3 = _fmpz_vec_max_bits(poly3, len3);
72 /* we assume a bound of FLINT_BITS - 2 for coefficients of the quotient */
73 bits1 = FLINT_ABS(bits3) + FLINT_BITS + FLINT_BIT_COUNT(len3) - 2;
74
75 small = FLINT_ABS(bits2) <= bits1 && FLINT_ABS(bits3) <= FLINT_BITS - 2;
76 bits1 += 2; /* incr. so poly2 - q*poly3 doesn't overflow and for sign */
77
78 /* input coeffs small and intermediate computations fit two words */
79 if (small && bits1 <= 2*FLINT_BITS)
80 {
81 ulong * t2 = (ulong *) TMP_ALLOC(2*prod*sizeof(ulong));
82
83 for (i = 0; i < 2*prod; i++)
84 t2[i] = 0;
85
86 /* poly2 to array format */
87 _fmpz_mpoly_to_ulong_array2(t2, poly2, exp2, len2);
88
89 /* for each term of poly2 array relevant to quotient */
90 for (i = prod - 1; i >= max3; i--)
91 {
92 ulong * ptr = t2 + 2*i;
93 ulong p[2];
94
95 /* if coeff is nonzero */
96 if (ptr[0] != 0 || ptr[1] != 0)
97 {
98 if (0 > (slong) ptr[1])
99 mpn_neg(p, ptr, 2);
100 else
101 flint_mpn_copyi(p, ptr, 2);
102
103 /* not exact quotient monomial, thus remainder monomial */
104 if (!mpoly_monomial_divides_tight(i, max3, prods, num))
105 {
106 /* realloc remainder poly */
107 _fmpz_mpoly_fit_length(&p2, &e2, allocr, l + 1, 1);
108
109 /* set remainder coeff... */
110 fmpz_set_signed_uiui(p2 + l, ptr[1], ptr[0]);
111
112 /* ...and exponent */
113 e2[l++] = i;
114 } else /* monomials can be divided exactly */
115 {
116 /* check quotient won't overflow a word */
117 if (u3 <= p[1] || (u3 == 0 && 0 > (slong) p[0])) /* quotient too large */
118 {
119 for (j = len0; j < k; j++)
120 _fmpz_demote(p1 + j);
121 for (j = len1; j < l; j++)
122 _fmpz_demote(p2 + j);
123 k = len0;
124 l = len1;
125
126 goto big;
127 }
128
129 /* quotient and remainder of coeffs */
130 sdiv_qrnnd(q, r, ptr[1], ptr[0], c3);
131
132 /* check coefficient is small, else restart with multiprec code */
133 if (COEFF_IS_MPZ(FLINT_ABS(q))) /* quotient too large */
134 {
135 for (j = len0; j < k; j++)
136 _fmpz_demote(p1 + j);
137 for (j = len1; j < l; j++)
138 _fmpz_demote(p2 + j);
139 k = len0;
140 l = len1;
141
142 goto big;
143 }
144
145 /* check coeff quotient was exact */
146 if (r != 0) /* not an exact division */
147 {
148 /* realloc remainder poly */
149 _fmpz_mpoly_fit_length(&p2, &e2, allocr, l + 1, 1);
150
151 /* set remainder coeff... */
152 fmpz_set_si(p2 + l, (slong) r);
153
154 /* ... and exponent */
155 e2[l++] = i;
156 }
157
158 if (q != 0)
159 {
160 /* submul a - q*b */
161 _fmpz_mpoly_submul_array1_slong2_1(t2, q, i - max3,
162 poly3, exp3, len3);
163
164 /* realloc quotient poly */
165 _fmpz_mpoly_fit_length(&p1, &e1, allocq, k + 1, 1);
166 /* set quotient coeff and exponent */
167 fmpz_set_si(p1 + k, q);
168 e1[k++] = i - max3;
169 }
170 }
171 }
172 }
173
174 /* all remaining terms are remainder terms */
175 for ( ; i >= 0; i--)
176 {
177 ulong * ptr = t2 + 2*i;
178
179 /* if coeff nonzero */
180 if (ptr[0] != 0 || ptr[1] != 0) /* not an exact division */
181 {
182 /* realloc remainder poly */
183 _fmpz_mpoly_fit_length(&p2, &e2, allocr, l + 1, 1);
184
185 /* set remainder coeff... */
186 fmpz_set_signed_uiui(p2 + l, ptr[1], ptr[0]);
187
188 /* and exponent */
189 e2[l++] = i;
190 }
191 }
192 }
193
194 /* not done, coeffs small and intermediate computations fit three words */
195 if (k == len0 && l == len1 && small)
196 {
197 ulong * t2 = (ulong *) TMP_ALLOC(3*prod*sizeof(ulong));
198
199 for (i = 0; i < 3*prod; i++)
200 t2[i] = 0;
201
202 /* poly2 to array format */
203 _fmpz_mpoly_to_ulong_array(t2, poly2, exp2, len2);
204
205 /* for each term of poly2 array relevant to exact quotient */
206 for (i = prod - 1; i >= max3; i--)
207 {
208 ulong * ptr = t2 + 3*i;
209 ulong p[3];
210
211 /* if coeff is nonzero */
212 if (ptr[0] != 0 || ptr[1] != 0 || ptr[2] != 0)
213 {
214 if (0 > (slong) ptr[2])
215 mpn_neg(p, ptr, 3);
216 else
217 flint_mpn_copyi(p, ptr, 3);
218
219 /* not exact quotient monomial, thus remainder monomial */
220 if (!mpoly_monomial_divides_tight(i, max3, prods, num))
221 {
222 /* realloc remainder poly */
223 _fmpz_mpoly_fit_length(&p2, &e2, allocr, l + 1, 1);
224
225 /* set remainder coeff... */
226 fmpz_set_signed_uiuiui(p2 + l, ptr[2], ptr[1], ptr[0]);
227
228 /* ... and exponent */
229 e2[l++] = i;
230 } else /* monomials can be divided exact */
231 {
232 /* check quotient won't overflow a word */
233 if (p[2] > 0 || u3 <= p[1] || (u3 == 0 && 0 > (slong) p[0])) /* quotient too large */
234 {
235 for (j = len0; j < k; j++)
236 _fmpz_demote(p1 + j);
237 for (j = len1; j < l; j++)
238 _fmpz_demote(p2 + j);
239 k = len0;
240 l = len1;
241
242 goto big;
243 }
244
245 /* quotient and remainder of coeffs */
246 sdiv_qrnnd(q, r, ptr[1], ptr[0], c3);
247
248 /* check coefficient is small, else restart with multiprec code */
249 if (COEFF_IS_MPZ(FLINT_ABS(q))) /* quotient too large */
250 {
251 for (j = len0; j < k; j++)
252 _fmpz_demote(p1 + j);
253 for (j = len1; j < l; j++)
254 _fmpz_demote(p2 + j);
255 k = len0;
256 l = len1;
257
258 goto big;
259 }
260
261 /* check if coeff quotient was exact */
262 if (r != 0) /* else remainder term */
263 {
264 /* reallocate remainder poly */
265 _fmpz_mpoly_fit_length(&p2, &e2, allocr, l + 1, 1);
266
267 /* set remainder coeff... */
268 fmpz_set_si(p2 + l, (slong) r);
269
270 /* and exponent */
271 e2[l++] = i;
272 }
273
274 /* if nonzero quotient */
275 if (q != 0)
276 {
277 /* submul a - q*b */
278 _fmpz_mpoly_submul_array1_slong_1(t2, q, i - max3,
279 poly3, exp3, len3);
280
281 /* realloc quotient poly */
282 _fmpz_mpoly_fit_length(&p1, &e1, allocq, k + 1, 1);
283
284 /* set quotient coeff and exponent */
285 fmpz_set_si(p1 + k, q);
286
287 e1[k++] = i - max3;
288 }
289 }
290 }
291 }
292
293 /* all remaining terms are remainder terms */
294 for ( ; i >= 0; i--)
295 {
296 ulong * ptr = t2 + 3*i;
297
298 /* if coeff nonzero */
299 if (ptr[0] != 0 || ptr[1] != 0 || ptr[2] != 0)
300 {
301 /* not an exact division */
302
303 /* realloc remainder poly */
304 _fmpz_mpoly_fit_length(&p2, &e2, allocr, l + 1, 1);
305
306 /* set remainder coeff... */
307 fmpz_set_signed_uiuiui(p2 + l, ptr[2], ptr[1], ptr[0]);
308
309 /* ...and exponent */
310 e2[l++] = i;
311 }
312 }
313 }
314
315 big:
316
317 /* if still not done, use multiprecision coeffs instead */
318 if (k == len0 && l == len1)
319 {
320 fmpz * t2 = (fmpz *) TMP_ALLOC(prod*sizeof(fmpz));
321 fmpz_t fq, fr;
322
323 fmpz_init(fq);
324 fmpz_init(fr);
325
326 for (i = 0; i < prod; i++)
327 fmpz_init(t2 + i);
328
329 /* poly2 to array format */
330 _fmpz_mpoly_to_fmpz_array(t2, poly2, exp2, len2);
331
332 /* for each term of poly2 array relevant to exact quotient */
333 for (i = prod - 1; i >= max3; i--)
334 {
335 /* if coeff is nonzero */
336 if (!fmpz_is_zero(t2 + i))
337 {
338 /* not exact quotient monomial, thus remainder monomial */
339 if (!mpoly_monomial_divides_tight(i, max3, prods, num))
340 {
341 /* realloc remainder poly */
342 _fmpz_mpoly_fit_length(&p2, &e2, allocr, l + 1, 1);
343
344 /* set remainder coeff... */
345 fmpz_set(p2 + l, t2 + i);
346
347 /* ... and exponent */
348 e2[l++] = i;
349 } else /* monomials can be divided exactly */
350 {
351 /* quotient and remainder of coeffs */
352 fmpz_fdiv_qr(fq, fr, t2 + i, poly3);
353
354 /* check if coeff quotient was exact */
355 if (!fmpz_is_zero(fr)) /* else remainder term */
356 {
357 /* realloc remainder poly */
358 _fmpz_mpoly_fit_length(&p2, &e2, allocr, l + 1, 1);
359
360 /* set remainder coeff... */
361 fmpz_set(p2 + l, fr);
362
363 /* and exponent */
364 e2[l++] = i;
365 }
366
367 /* if nonzero quotient */
368 if (!fmpz_is_zero(fq))
369 {
370 /* submul a - q*b */
371 _fmpz_mpoly_submul_array1_fmpz_1(t2, fq, i - max3,
372 poly3, exp3, len3);
373
374 /* realloc quotient poly */
375 _fmpz_mpoly_fit_length(&p1, &e1, allocq, k + 1, 1);
376
377 /* set quotient coeff and exponent */
378 fmpz_set(p1 + k, fq);
379 e1[k++] = i - max3;
380 }
381 }
382 }
383 }
384
385 /* all remaining terms are remainder terms */
386 for ( ; i >= 0; i--)
387 {
388 /* if coeff nonzero */
389 if (!fmpz_is_zero(t2 + i))
390 {
391 /* remainder */
392
393 /* realloc remainder poly */
394 _fmpz_mpoly_fit_length(&p2, &e2, allocr, l + 1, 1);
395
396 /* set remainder coeff... */
397 fmpz_set(p2 + l, t2 + i);
398
399 /* ... and exponent */
400 e2[l++] = i;
401 }
402 }
403
404 fmpz_clear(fq);
405 fmpz_clear(fr);
406
407 for (i = 0; i < prod; i++)
408 fmpz_clear(t2 + i);
409 }
410
411 (*polyq) = p1;
412 (*expq) = e1;
413 (*polyr) = p2;
414 (*expr) = e2;
415
416 /* set remainder poly length */
417 (*lenr) = l - len1;
418
419 TMP_END;
420
421 /* return quotient poly length */
422 return k - len0;
423 }
424
425 /*
426 Use dense array division to set polyq, polyr to poly2/poly3 in num + 1
427 variables, given a list of multipliers to tightly pack exponents and a
428 number of bits for the fields of the exponents of the result, assuming
429 no aliasing. classical exact division in main variable, array
430 multiplication (submul) for multivariate coefficients in remaining num
431 variables. The function reallocates its output and returns the length
432 of the quotient poly. It is assumed that poly2 is not zero. The
433 quotient and remainder are written in reverse order.
434 */
_fmpz_mpoly_divrem_array_chunked(slong * lenr,fmpz ** polyq,ulong ** expq,slong * allocq,fmpz ** polyr,ulong ** expr,slong * allocr,const fmpz * poly2,const ulong * exp2,slong len2,const fmpz * poly3,const ulong * exp3,slong len3,slong * mults,slong num,slong bits)435 slong _fmpz_mpoly_divrem_array_chunked(slong * lenr,
436 fmpz ** polyq, ulong ** expq, slong * allocq,
437 fmpz ** polyr, ulong ** expr, slong * allocr,
438 const fmpz * poly2, const ulong * exp2, slong len2,
439 const fmpz * poly3, const ulong * exp3, slong len3, slong * mults,
440 slong num, slong bits)
441 {
442 slong i, j, k, l = 0, m, prod, len = 0, l1, l2, l3;
443 slong bits1, bits2, bits3 = 0, tlen, talloc;
444 slong shift = bits*(num);
445 slong * i1, * i2, * i3, * n1, * n2, * n3, * prods;
446 slong * b1, * b3, * maxb1, * maxb3, * max_exp1, * max_exp3;
447 ulong * e2, * e3, * texp, * p2;
448 fmpz * temp;
449 int small;
450 TMP_INIT;
451
452 TMP_START;
453
454 /*
455 compute products 1, b0, b0*b1, b0*b1*b2 ...
456 from list of bases b0, b1, b2, ...
457 */
458 prods = (slong *) TMP_ALLOC((num + 1)*sizeof(slong));
459
460 prods[0] = 1;
461 for (i = 0; i < num; i++)
462 prods[i + 1] = prods[i]*mults[i];
463 prod = prods[num];
464
465 /* lengths of poly2, poly3 and polyq in chunks */
466 l2 = 1 + (slong) (exp2[0] >> shift);
467 l3 = 1 + (slong) (exp3[0] >> shift);
468
469 l1 = FLINT_MAX(l2 - l3 + 1, 0);
470
471 /* compute indices and lengths of coefficients of polys in main variable */
472
473 i1 = (slong *) TMP_ALLOC(l1*sizeof(slong));
474 n1 = (slong *) TMP_ALLOC(l1*sizeof(slong));
475 i2 = (slong *) TMP_ALLOC(l2*sizeof(slong));
476 n2 = (slong *) TMP_ALLOC(l2*sizeof(slong));
477 i3 = (slong *) TMP_ALLOC(l3*sizeof(slong));
478 n3 = (slong *) TMP_ALLOC(l3*sizeof(slong));
479 b1 = (slong *) TMP_ALLOC(l1*sizeof(slong));
480 maxb1 = (slong *) TMP_ALLOC(l1*sizeof(slong));
481 b3 = (slong *) TMP_ALLOC(l3*sizeof(slong));
482 maxb3 = (slong *) TMP_ALLOC(l3*sizeof(slong));
483 max_exp1 = (slong *) TMP_ALLOC(l1*num*sizeof(slong));
484 max_exp3 = (slong *) TMP_ALLOC(l3*num*sizeof(slong));
485
486 mpoly_main_variable_terms1(i2, n2, exp2, l2, len2, num + 1, num + 1, bits);
487 mpoly_main_variable_terms1(i3, n3, exp3, l3, len3, num + 1, num + 1, bits);
488
489 /* work out max bits for each coeff and optimal bits */
490
491 for (i = 0; i < l3; i++)
492 {
493 _fmpz_vec_sum_max_bits(&b3[i], &maxb3[i], poly3+i3[i], n3[i]);
494
495 if (bits3 < maxb3[i])
496 bits3 = maxb3[i];
497 }
498
499 /* pack input coefficients tightly */
500
501 e2 = (ulong *) TMP_ALLOC(len2*sizeof(ulong));
502 e3 = (ulong *) TMP_ALLOC(len3*sizeof(ulong));
503
504 mpoly_pack_monomials_tight(e2, exp2, len2, mults, num, bits);
505 mpoly_pack_monomials_tight(e3, exp3, len3, mults, num, bits);
506
507 /* work out maximum exponents for each chunk */
508 for (i = 0; i < l3; i++)
509 mpoly_max_degrees_tight(max_exp3 + i*num, e3 + i3[i], n3[i], prods, num);
510
511 /* bound poly2 coeffs and check input/output coeffs likely small */
512 bits2 = _fmpz_vec_max_bits(poly2, len2);
513 /* we assume a bound of FLINT_BITS - 2 for coefficients of the quotient */
514 bits1 = FLINT_ABS(bits3) + FLINT_BITS + FLINT_BIT_COUNT(len3) - 2;
515
516 small = FLINT_ABS(bits2) <= bits1 && FLINT_ABS(bits3) <= FLINT_BITS - 2;
517
518 /* alloc space for copy of coeff/chunk of poly2 */
519
520 temp = (fmpz *) flint_calloc(n2[0] + 1, sizeof(fmpz));
521 texp = (ulong *) flint_malloc((n2[0] + 1)*sizeof(ulong));
522 talloc = n2[0] + 1; /* plus one so doubling always increases size */
523
524 /* enough space for three words per coeff, even if only one or two needed */
525 p2 = (ulong *) TMP_ALLOC(3*prod*sizeof(ulong));
526
527 /* coefficients likely to be small */
528 if (small)
529 {
530 /* for each chunk of poly2 */
531 for (i = 0; i < l2; i++)
532 {
533 slong num1 = 0;
534 bits1 = 0;
535
536 /* if there are already quotient terms */
537 if (i != 0)
538 {
539 /* compute bound on intermediate computations a - q*b */
540 for (j = 0; j < i && j < l1; j++)
541 {
542 k = i - j;
543
544 if (k < l3 && k >= 0)
545 {
546 for (m = 0; m < num; m++)
547 {
548 if (max_exp1[j*num + m] + max_exp3[k*num + m] >= mults[m])
549 {
550 for (j = 0; j < len; j++)
551 _fmpz_demote((*polyq) + j);
552 for (j = 0; j < l; j++)
553 _fmpz_demote((*polyr) + j);
554 len = 0;
555 l = 0;
556 goto cleanup3;
557 }
558 }
559
560 bits1 = FLINT_MAX(bits1, FLINT_MIN(b1[j] + maxb3[k], maxb1[j] + b3[k]));
561 num1++;
562 }
563 }
564
565 bits1 += FLINT_BIT_COUNT(num1);
566 bits1 = FLINT_MAX(FLINT_ABS(bits2), bits1);
567
568 bits1 += 2; /* bit for sign and so a - q*b doesn't overflow */
569 } else
570 bits1 = FLINT_ABS(bits2) + 1; /* extra bit for sign */
571
572 /* intermediate computations fit in one word */
573 if (bits1 <= FLINT_BITS)
574 {
575 for (j = 0; j < prod; j++)
576 p2[j] = 0;
577
578 /* convert relevant coeff/chunk of poly2 to array format */
579 _fmpz_mpoly_to_ulong_array1(p2, poly2 + i2[i], e2 + i2[i], n2[i]);
580
581 /* submuls */
582
583 for (j = 0; j < i && j < l1; j++)
584 {
585 k = i - j;
586
587 if (k < l3 && k >= 0)
588 {
589 _fmpz_mpoly_submul_array1_slong1(p2, (*polyq) + i1[j],
590 (*expq) + i1[j], n1[j], poly3 + i3[k], e3 + i3[k], n3[k]);
591 }
592 }
593
594 /* convert chunk from array format */
595 tlen = _fmpz_mpoly_from_ulong_array1(&temp, &texp, &talloc,
596 p2, mults, num, bits, 0);
597 } else if (bits1 <= 2*FLINT_BITS) /* intermed comps fit two words */
598 {
599 for (j = 0; j < 2*prod; j++)
600 p2[j] = 0;
601
602 /* convert relevant coeff/chunk of poly2 to array format */
603 _fmpz_mpoly_to_ulong_array2(p2, poly2 + i2[i], e2 + i2[i], n2[i]);
604
605 /* submuls */
606
607 for (j = 0; j < i && j < l1; j++)
608 {
609 k = i - j;
610
611 if (k < l3 && k >= 0)
612 {
613 _fmpz_mpoly_submul_array1_slong2(p2, (*polyq) + i1[j],
614 (*expq) + i1[j], n1[j], poly3 + i3[k], e3 + i3[k], n3[k]);
615 }
616 }
617
618 /* convert chunk from array format */
619 tlen = _fmpz_mpoly_from_ulong_array2(&temp, &texp, &talloc,
620 p2, mults, num, bits, 0);
621 } else /* intermed comps fit three words */
622 {
623 for (j = 0; j < 3*prod; j++)
624 p2[j] = 0;
625
626 /* convert relevant coeff/chunk of poly2 to array format */
627 _fmpz_mpoly_to_ulong_array(p2, poly2 + i2[i], e2 + i2[i], n2[i]);
628
629 /* submuls */
630
631 for (j = 0; j < i && j < l1; j++)
632 {
633 k = i - j;
634
635 if (k < l3 && k >= 0)
636 _fmpz_mpoly_submul_array1_slong(p2, (*polyq) + i1[j],
637 (*expq) + i1[j], n1[j], poly3 + i3[k], e3 + i3[k], n3[k]);
638 }
639
640 /* convert chunk from array format */
641 tlen = _fmpz_mpoly_from_ulong_array(&temp, &texp, &talloc,
642 p2, mults, num, bits, 0);
643 }
644
645 if (tlen != 0) /* nonzero coeff/chunk */
646 {
647 if (i < l1) /* potentially a quotient with remainder */
648 {
649 /* tightly pack chunk exponents */
650 mpoly_pack_monomials_tight(texp, texp, tlen, mults, num, bits);
651
652 /* set starting index for quotient chunk we are about to compute */
653 i1[i] = len;
654
655 /* compute quotient chunk and set length of quotient chunk */
656 n1[i] = _fmpz_mpoly_divrem_array_tight(lenr, polyq,
657 expq, allocq, len, polyr, expr, allocr, l, temp, texp,
658 tlen, poly3 + i3[0], e3 + i3[0], n3[0], mults, num);
659
660 /* unpack remainder exponents */
661 mpoly_unpack_monomials_tight(*expr + l,
662 *expr + l, *lenr, mults, num, bits);
663
664 /* insert main variable */
665 for (j = 0; j < *lenr; j++)
666 (*expr)[l + j] += ((l2 - i - 1) << shift);
667
668 /* work out maximum exponents for chunk */
669 mpoly_max_degrees_tight(max_exp1 + i*num,
670 (*expq) + i1[i], n1[i], prods, num);
671
672 /* check there were no exponent overflows */
673 for (m = 0; m < num; m++)
674 {
675 if (max_exp3[m] + max_exp1[i*num + m] >= mults[m])
676 {
677 for (j = 0; j < len; j++)
678 _fmpz_demote((*polyq) + j);
679 for (j = 0; j < l; j++)
680 _fmpz_demote((*polyr) + j);
681 len = 0;
682 l = 0;
683
684 goto cleanup3;
685 }
686 }
687
688 /* check the quotient didn't have large coefficients */
689 if (FLINT_ABS(_fmpz_vec_max_bits((*polyq) + len,
690 n1[i])) > FLINT_BITS - 2)
691 {
692 for (j = 0; j < len; j++)
693 _fmpz_demote((*polyq) + j);
694 for (j = 0; j < l; j++)
695 _fmpz_demote((*polyr) + j);
696 len = 0;
697 l = 0;
698
699 goto big;
700 }
701
702 /* abs bound and sum of abs vals of coeffs of quotient chunk */
703 _fmpz_vec_sum_max_bits(&b1[i], &maxb1[i], (*polyq)+i1[i], n1[i]);
704
705 /* update length of output quotient and remainder polys */
706 len += n1[i];
707 l += *lenr;
708 } else /* remainder terms only */
709 {
710 /* realloc remainder poly */
711 _fmpz_mpoly_fit_length(polyr, expr, allocr, l + tlen, 1);
712
713 /* for each term in remainder chunk */
714 for (j = 0; j < tlen; j++)
715 {
716 /* set remainder coeff and exponent */
717 fmpz_set(*polyr + l + j, temp + j);
718 (*expr)[l + j] = (texp[j]) + ((l2 - i - 1) << shift);
719 }
720
721 l += tlen;
722 }
723 } else if (i < l1) /* zero chunk, no quotient or remainder */
724 {
725 /* set index and length of quotient chunk */
726 i1[i] = len;
727 n1[i] = 0;
728 b1[i] = 0;
729 maxb1[i] = 0;
730
731 /* write out maximum exponents in chunk */
732 mpoly_max_degrees_tight(max_exp1 + i*num,
733 (*expq) + i1[i], n1[i], prods, num);
734 }
735 }
736 }
737
738 big:
739
740 /* if not done, use multiprecision coeffs instead */
741 if (len == 0 && l == 0)
742 {
743 fmpz * p2 = (fmpz *) TMP_ALLOC(prod*sizeof(fmpz));
744
745 for (j = 0; j < prod; j++)
746 fmpz_init(p2 + j);
747
748 /* for each chunk of poly2 */
749 for (i = 0; i < l2; i++)
750 {
751 for (j = 0; j < prod; j++)
752 fmpz_zero(p2 + j);
753
754 /* convert relevant coeff/chunk of poly2 to array format */
755 _fmpz_mpoly_to_fmpz_array(p2, poly2 + i2[i], e2 + i2[i], n2[i]);
756
757 /* submuls */
758
759 for (j = 0; j < i && j < l1; j++)
760 {
761 k = i - j;
762
763 if (k < l3 && k >= 0)
764 {
765 for (m = 0; m < num; m++)
766 {
767 if (max_exp1[j*num + m] + max_exp3[k*num + m] >= mults[m])
768 {
769 for (j = 0; j < len; j++)
770 _fmpz_demote((*polyq) + j);
771 for (j = 0; j < l; j++)
772 _fmpz_demote((*polyr) + j);
773 len = 0;
774 l = 0;
775
776 for (j = 0; j < prod; j++)
777 fmpz_clear(p2 + j);
778 goto cleanup3;
779 }
780 }
781
782 _fmpz_mpoly_submul_array1_fmpz(p2, (*polyq) + i1[j],
783 (*expq) + i1[j], n1[j], poly3 + i3[k], e3 + i3[k], n3[k]);
784 }
785 }
786
787 /* convert chunk from array format */
788 tlen = _fmpz_mpoly_from_fmpz_array(&temp, &texp, &talloc,
789 p2, mults, num, bits, 0);
790
791 if (tlen != 0) /* nonzero coeff/chunk */
792 {
793 if (i < l1) /* potentially a quotient with remainder */
794 {
795 /* tightly pack chunk exponents */
796 mpoly_pack_monomials_tight(texp, texp, tlen, mults, num, bits);
797
798 /* set start index of quotient chunk we are about to compute */
799 i1[i] = len;
800
801 /* compute quotient chunk and set length of quotient chunk */
802 n1[i] = _fmpz_mpoly_divrem_array_tight(lenr, polyq,
803 expq, allocq, len, polyr, expr, allocr, l, temp, texp,
804 tlen, poly3 + i3[0], e3 + i3[0], n3[0], mults, num);
805
806 /* unpack remainder exponents */
807 mpoly_unpack_monomials_tight(*expr + l,
808 *expr + l, *lenr, mults, num, bits);
809
810 /* insert main variable */
811 for (j = 0; j < *lenr; j++)
812 (*expr)[l + j] += ((l2 - i - 1) << shift);
813
814 /* work out maximum exponents for chunk */
815 mpoly_max_degrees_tight(max_exp1 + i*num,
816 (*expq) + i1[i], n1[i], prods, num);
817
818 /* check there were no exponent overflows */
819 for (m = 0; m < num; m++)
820 {
821 if (max_exp3[m] + max_exp1[i*num + m] >= mults[m])
822 {
823 for (j = 0; j < len; j++)
824 _fmpz_demote((*polyq) + j);
825 for (j = 0; j < l; j++)
826 _fmpz_demote((*polyr) + j);
827 len = 0;
828 l = 0;
829
830 for (j = 0; j < prod; j++)
831 fmpz_clear(p2 + j);
832 goto cleanup3;
833 }
834 }
835
836 /* abs bound and sum of abs vals of coeffs of quotient chunk */
837 _fmpz_vec_sum_max_bits(&b1[i], &maxb1[i], (*polyq)+i1[i], n1[i]);
838
839 /* update length of output quotient and remainder polys */
840 len += n1[i];
841 l += *lenr;
842 } else /* remainder terms only */
843 {
844 /* realloc remainder poly */
845 _fmpz_mpoly_fit_length(polyr, expr, allocr, l + tlen, 1);
846
847 /* for each term in chunk */
848 for (j = 0; j < tlen; j++)
849 {
850 /* set remainder coeff and exponent */
851 fmpz_set(*polyr + l + j, temp + j);
852 (*expr)[l + j] = (texp[j]) + ((l2 - i - 1) << shift);
853 }
854
855 /* update length of output remainder poly */
856 l += tlen;
857 }
858 } else if (i < l1) /* zero chunk, no quotient or remainder */
859 {
860 /* set index and length of quotient chunk */
861 i1[i] = len;
862 n1[i] = 0;
863 b1[i] = 0;
864 maxb1[i] = 0;
865
866 /* write out maximum exponents in chunk */
867 mpoly_max_degrees_tight(max_exp1 + i*num,
868 (*expq) + i1[i], n1[i], prods, num);
869 }
870 }
871
872 for (j = 0; j < prod; j++)
873 fmpz_clear(p2 + j);
874 }
875
876 /* if there were quotient terms */
877 if (len != 0)
878 {
879 /* unpack monomials of quotient */
880 mpoly_unpack_monomials_tight((*expq), (*expq), len, mults, num, bits);
881
882 /* put main variable back in quotient */
883 for (i = 0; i < l1; i++)
884 {
885 for (j = 0; j < n1[i]; j++)
886 {
887 (*expq)[i1[i] + j] += ((l1 - i - 1) << shift);
888 }
889 }
890 }
891
892 cleanup3:
893
894 for (j = 0; j < talloc; j++)
895 fmpz_clear(temp + j);
896
897 flint_free(temp);
898 flint_free(texp);
899
900 TMP_END;
901
902 /* set remainder length */
903 *lenr = l;
904
905 /* return quotient length */
906 return len;
907 }
908
909 /*
910 Use dense array division to set polyq, polyr to poly2/poly3 in num variables,
911 given a list of multipliers to tightly pack exponents and a number of bits
912 for the fields of the exponents of the result, assuming no aliasing. The
913 array "mults" is a list of bases to be used in encoding the array indices
914 from the exponents. The function reallocates its output and returns the
915 length of the quotient. It is assumed that poly2 is not zero. The quotient
916 and remainder are written in reverse order.
917 */
_fmpz_mpoly_divrem_array(slong * lenr,fmpz ** polyq,ulong ** expq,slong * allocq,fmpz ** polyr,ulong ** expr,slong * allocr,const fmpz * poly2,const ulong * exp2,slong len2,const fmpz * poly3,const ulong * exp3,slong len3,slong * mults,slong num,slong bits)918 slong _fmpz_mpoly_divrem_array(slong * lenr,
919 fmpz ** polyq, ulong ** expq, slong * allocq,
920 fmpz ** polyr, ulong ** expr, slong * allocr,
921 const fmpz * poly2, const ulong * exp2, slong len2,
922 const fmpz * poly3, const ulong * exp3, slong len3, slong * mults,
923 slong num, slong bits)
924 {
925 slong i;
926 ulong * e2, * e3;
927 slong len, prod;
928 slong * prods, * max_exp1, * max_exp3;
929 TMP_INIT;
930
931 TMP_START;
932
933 /*
934 compute products 1, b0, b0*b1, b0*b1*b2 ...
935 from list of bases b0, b1, b2, ...
936 */
937 prods = (slong *) TMP_ALLOC((num + 1)*sizeof(slong));
938
939 prods[0] = 1;
940 for (i = 0; i < num; i++)
941 prods[i + 1] = prods[i]*mults[i];
942 prod = prods[num];
943
944 /* if array size will be too large, chunk the polynomials */
945 if (prod > MAX_ARRAY_SIZE)
946 {
947 TMP_END;
948
949 return _fmpz_mpoly_divrem_array_chunked(lenr, polyq, expq, allocq,
950 polyr, expr, allocr, poly2, exp2, len2,
951 poly3, exp3, len3, mults, num - 1, bits);
952 }
953
954 e2 = (ulong *) TMP_ALLOC(len2*sizeof(ulong));
955 e3 = (ulong *) TMP_ALLOC(len3*sizeof(ulong));
956 max_exp1 = (slong *) TMP_ALLOC(num*sizeof(slong));
957 max_exp3 = (slong *) TMP_ALLOC(num*sizeof(slong));
958
959 /* pack input exponents tightly with mixed bases specified by "mults" */
960
961 mpoly_pack_monomials_tight(e2, exp2, len2, mults, num, bits);
962 mpoly_pack_monomials_tight(e3, exp3, len3, mults, num, bits);
963
964 /* do divrem on tightly packed polys */
965 len = _fmpz_mpoly_divrem_array_tight(lenr, polyq, expq, allocq, 0,
966 polyr, expr, allocr, 0, poly2, e2, len2,
967 poly3, e3, len3, mults, num);
968
969 /* check for overflows */
970 mpoly_max_degrees_tight(max_exp3, e3, len3, prods, num);
971 mpoly_max_degrees_tight(max_exp1, *expq, len, prods, num);
972
973 for (i = 0; i < num; i++)
974 {
975 if (max_exp3[i] + max_exp1[i] >= mults[i])
976 {
977 len = 0;
978 *lenr = 0;
979
980 break;
981 }
982 }
983 /* unpack output quotient and remainder exponents */
984 mpoly_unpack_monomials_tight((*expq), (*expq), len, mults, num, bits);
985 mpoly_unpack_monomials_tight((*expr), (*expr), *lenr, mults, num, bits);
986
987 TMP_END;
988
989 return len;
990 }
991
992 /*
993 Return 1 if q, r can be set to quotient and remainder of poly2 by poly3,
994 else return 0 if array division not able to be performed.
995 */
fmpz_mpoly_divrem_array(fmpz_mpoly_t q,fmpz_mpoly_t r,const fmpz_mpoly_t poly2,const fmpz_mpoly_t poly3,const fmpz_mpoly_ctx_t ctx)996 int fmpz_mpoly_divrem_array(fmpz_mpoly_t q, fmpz_mpoly_t r,
997 const fmpz_mpoly_t poly2, const fmpz_mpoly_t poly3,
998 const fmpz_mpoly_ctx_t ctx)
999 {
1000 slong i, exp_bits, N, lenq = 0, lenr = 0, array_size;
1001 ulong * max_fields, * max_fields2, * max_fields3;
1002 ulong * exp2 = poly2->exps, * exp3 = poly3->exps;
1003 int free2 = 0, free3 = 0;
1004 fmpz_mpoly_t temp1, temp2;
1005 fmpz_mpoly_struct * tq, * tr;
1006 int res = 0;
1007
1008 TMP_INIT;
1009
1010 /* check divisor is not zero */
1011 if (poly3->length == 0)
1012 flint_throw(FLINT_DIVZERO, "Divide by zero in fmpz_mpoly_divrem_array");
1013
1014 /* dividend is zero */
1015 if (poly2->length == 0)
1016 {
1017 fmpz_mpoly_zero(q, ctx);
1018 fmpz_mpoly_zero(r, ctx);
1019
1020 return 1;
1021 }
1022
1023 TMP_START;
1024
1025
1026 /* compute maximum exponents for each variable */
1027 max_fields = (ulong *) TMP_ALLOC(ctx->minfo->nfields*sizeof(ulong));
1028 max_fields2 = (ulong *) TMP_ALLOC(ctx->minfo->nfields*sizeof(ulong));
1029 max_fields3 = (ulong *) TMP_ALLOC(ctx->minfo->nfields*sizeof(ulong));
1030 mpoly_max_fields_ui_sp(max_fields2, poly2->exps, poly2->length,
1031 poly2->bits, ctx->minfo);
1032 mpoly_max_fields_ui_sp(max_fields3, poly3->exps, poly3->length,
1033 poly3->bits, ctx->minfo);
1034 for (i = 0; i < ctx->minfo->nfields; i++)
1035 max_fields[i] = FLINT_MAX(max_fields2[i], max_fields3[i]);
1036
1037 /* compute number of bits required for output exponents */
1038 exp_bits = FLINT_MAX(poly2->bits, poly3->bits);
1039 N = mpoly_words_per_exp(exp_bits, ctx->minfo);
1040
1041 /* array division expects each exponent vector in one word */
1042 /* current code is wrong for reversed orderings */
1043 if (N != 1 || mpoly_ordering_isrev(ctx->minfo))
1044 goto cleanup;
1045
1046 /* compute bounds on output exps, used as mixed bases for packing exps */
1047 array_size = 1;
1048 for (i = 0; i < ctx->minfo->nfields - 1; i++)
1049 {
1050 max_fields2[i] = max_fields[i] + 1;
1051 array_size *= max_fields2[i];
1052 }
1053 max_fields2[ctx->minfo->nfields - 1] = max_fields[ctx->minfo->nfields - 1] + 1;
1054
1055 /* if exponents too large for array multiplication, exit silently */
1056 if (array_size > MAX_ARRAY_SIZE)
1057 goto cleanup;
1058
1059 /* expand input exponents to same number of bits as output */
1060 if (exp_bits > poly2->bits)
1061 {
1062 free2 = 1;
1063 exp2 = (ulong *) flint_malloc(N*poly2->length*sizeof(ulong));
1064 mpoly_repack_monomials(exp2, exp_bits, poly2->exps, poly2->bits,
1065 poly2->length, ctx->minfo);
1066 }
1067
1068 if (exp_bits > poly3->bits)
1069 {
1070 free3 = 1;
1071 exp3 = (ulong *) flint_malloc(N*poly3->length*sizeof(ulong));
1072 mpoly_repack_monomials(exp3, exp_bits, poly3->exps, poly3->bits,
1073 poly3->length, ctx->minfo);
1074 }
1075
1076 if (exp2[0] < exp3[0])
1077 {
1078 fmpz_mpoly_set(r, poly2, ctx);
1079 fmpz_mpoly_zero(q, ctx);
1080
1081 res = 1;
1082
1083 goto cleanup2;
1084 }
1085
1086 /* handle aliasing and do array division */
1087
1088 if (q == poly2 || q == poly3)
1089 {
1090 fmpz_mpoly_init2(temp1, poly2->length/poly3->length + 1, ctx);
1091 fmpz_mpoly_fit_bits(temp1, exp_bits, ctx);
1092 temp1->bits = exp_bits;
1093
1094 tq = temp1;
1095 } else
1096 {
1097 fmpz_mpoly_fit_length(q, poly2->length/poly3->length + 1, ctx);
1098 fmpz_mpoly_fit_bits(q, exp_bits, ctx);
1099 q->bits = exp_bits;
1100
1101 tq = q;
1102 }
1103
1104 if (r == poly2 || r == poly3)
1105 {
1106 fmpz_mpoly_init2(temp2, poly3->length, ctx);
1107 fmpz_mpoly_fit_bits(temp2, exp_bits, ctx);
1108 temp2->bits = exp_bits;
1109
1110 tr = temp2;
1111 } else
1112 {
1113 fmpz_mpoly_fit_length(r, poly3->length, ctx);
1114 fmpz_mpoly_fit_bits(r, exp_bits, ctx);
1115 r->bits = exp_bits;
1116
1117 tr = r;
1118 }
1119
1120 lenq = _fmpz_mpoly_divrem_array(&lenr, &tq->coeffs, &tq->exps,
1121 &tq->alloc, &tr->coeffs, &tr->exps, &tr->alloc, poly2->coeffs,
1122 exp2, poly2->length, poly3->coeffs, exp3, poly3->length,
1123 (slong *) max_fields2, ctx->minfo->nfields, exp_bits);
1124
1125 res = (lenq != 0 || lenr != 0);
1126
1127 if (res)
1128 {
1129 if (q == poly2 || q == poly3)
1130 {
1131 fmpz_mpoly_swap(temp1, q, ctx);
1132 fmpz_mpoly_clear(temp1, ctx);
1133 }
1134
1135 if (r == poly2 || r == poly3)
1136 {
1137 fmpz_mpoly_swap(temp2, r, ctx);
1138 fmpz_mpoly_clear(temp2, ctx);
1139 }
1140 }
1141 else
1142 {
1143 if (q == poly2 || q == poly3)
1144 {
1145 fmpz_mpoly_clear(temp1, ctx);
1146 }
1147
1148 if (r == poly2 || r == poly3)
1149 {
1150 fmpz_mpoly_clear(temp2, ctx);
1151 }
1152
1153 for (i = q->length; i < q->alloc; i++)
1154 {
1155 _fmpz_demote(q->coeffs + i);
1156 }
1157 for (i = r->length; i < r->alloc; i++)
1158 {
1159 _fmpz_demote(r->coeffs + i);
1160 }
1161 }
1162
1163 _fmpz_mpoly_set_length(q, lenq, ctx);
1164 _fmpz_mpoly_set_length(r, lenr, ctx);
1165
1166
1167 cleanup2:
1168
1169 if (free2)
1170 flint_free(exp2);
1171
1172 if (free3)
1173 flint_free(exp3);
1174
1175 cleanup:
1176
1177 TMP_END;
1178
1179 return res;
1180 }
1181