1 /*
2 Copyright (C) 2016 William Hart
3 Copyright (C) 2018 Daniel Schultz
4
5 This file is part of FLINT.
6
7 FLINT is free software: you can redistribute it and/or modify it under
8 the terms of the GNU Lesser General Public License (LGPL) as published
9 by the Free Software Foundation; either version 2.1 of the License, or
10 (at your option) any later version. See <http://www.gnu.org/licenses/>.
11 */
12
13 #include <gmp.h>
14 #include <stdlib.h>
15 #include "flint.h"
16 #include "fmpz.h"
17 #include "fmpz_mpoly.h"
18 #include "longlong.h"
19
20 /*
21 Set polyq to the quotient poly2 by poly3 discarding remainder (with notional
22 remainder coeffs reduced modulo the leading coeff of poly3), and return
23 the length of the quotient. This version of the function assumes the
24 exponent vectors all fit in a single word. The exponent vectors are
25 assumed to have fields with the given number of bits. Assumes input polys
26 are nonzero. Implements "Polynomial division using dynamic arrays, heaps
27 and packed exponents" by Michael Monagan and Roman Pearce [1], except that
28 we use a heap with smallest exponent at head. Note that if a < b then
29 (n - b) < (n - b) where n is the maximum value a and b can take. The word
30 "maxn" is set to an exponent vector whose fields are all set to such a
31 value n. This allows division from left to right with a heap with smallest
32 exponent at the head. Quotient poly is written in reverse order.
33 [1] http://www.cecm.sfu.ca/~rpearcea/sdmp/sdmp_paper.pdf
34 */
_fmpz_mpoly_div_monagan_pearce1(fmpz ** polyq,ulong ** expq,slong * allocq,const fmpz * poly2,const ulong * exp2,slong len2,const fmpz * poly3,const ulong * exp3,slong len3,slong bits,ulong maskhi)35 slong _fmpz_mpoly_div_monagan_pearce1(fmpz ** polyq, ulong ** expq,
36 slong * allocq, const fmpz * poly2, const ulong * exp2,
37 slong len2, const fmpz * poly3, const ulong * exp3, slong len3,
38 slong bits, ulong maskhi)
39 {
40 slong i, j, q_len, s;
41 slong next_loc, heap_len = 2;
42 mpoly_heap1_s * heap;
43 mpoly_heap_t * chain;
44 slong * store, * store_base;
45 mpoly_heap_t * x;
46 fmpz * q_coeff = *polyq;
47 ulong * q_exp = *expq;
48 slong * hind;
49 ulong mask, exp;
50 fmpz_t r, acc_lg;
51 ulong acc_sm[3];
52 int lt_divides, small;
53 slong bits2, bits3;
54 ulong lc_norm = 0, lc_abs = 0, lc_sign = 0, lc_n = 0, lc_i = 0;
55 TMP_INIT;
56
57 TMP_START;
58
59 fmpz_init(acc_lg);
60 fmpz_init(r);
61
62 /* whether intermediate computations q - a*b will fit in three words */
63 bits2 = _fmpz_vec_max_bits(poly2, len2);
64 bits3 = _fmpz_vec_max_bits(poly3, len3);
65 /* allow one bit for sign, one bit for subtraction */
66 small = FLINT_ABS(bits2) <= (FLINT_ABS(bits3) + FLINT_BIT_COUNT(len3) +
67 FLINT_BITS - 2) && FLINT_ABS(bits3) <= FLINT_BITS - 2;
68
69 /* alloc array of heap nodes which can be chained together */
70 next_loc = len3 + 4; /* something bigger than heap can ever be */
71 heap = (mpoly_heap1_s *) TMP_ALLOC((len3 + 1)*sizeof(mpoly_heap1_s));
72 chain = (mpoly_heap_t *) TMP_ALLOC(len3*sizeof(mpoly_heap_t));
73 store = store_base = (slong *) TMP_ALLOC(2*len3*sizeof(mpoly_heap_t *));
74
75 /* space for flagged heap indicies */
76 hind = (slong *) TMP_ALLOC(len3*sizeof(slong));
77 for (i = 0; i < len3; i++)
78 hind[i] = 1;
79
80 /* mask with high bit set in each field of exponent vector */
81 mask = 0;
82 for (i = 0; i < FLINT_BITS/bits; i++)
83 mask = (mask << bits) + (UWORD(1) << (bits - 1));
84
85 q_len = WORD(0);
86
87 /* see description of divisor heap division in paper */
88 s = len3;
89
90 /* insert (-1, 0, exp2[0]) into heap */
91 x = chain + 0;
92 x->i = -WORD(1);
93 x->j = 0;
94 x->next = NULL;
95 HEAP_ASSIGN(heap[1], exp2[0], x);
96
97 /* precompute leading cofficient info assuming "small" case */
98 if (small)
99 {
100 lc_abs = FLINT_ABS(poly3[0]);
101 lc_sign = FLINT_SIGN_EXT(poly3[0]);
102 count_leading_zeros(lc_norm, lc_abs);
103 lc_n = lc_abs << lc_norm;
104 invert_limb(lc_i, lc_n);
105 }
106
107 while (heap_len > 1)
108 {
109 exp = heap[1].exp;
110
111 if (mpoly_monomial_overflows1(exp, mask))
112 goto exp_overflow;
113
114 _fmpz_mpoly_fit_length(&q_coeff, &q_exp, allocq, q_len + 1, 1);
115 lt_divides = mpoly_monomial_divides1(q_exp + q_len, exp, exp3[0], mask);
116
117 /* take nodes from heap with exponent matching exp */
118
119 if (!lt_divides)
120 {
121 /* optimation: coeff arithmetic not needed */
122
123 if (mpoly_monomial_gt1(exp3[0], exp, maskhi))
124 {
125 /* optimization: no more quotient terms possible */
126 goto cleanup;
127 }
128
129 do
130 {
131 x = _mpoly_heap_pop1(heap, &heap_len, maskhi);
132 do
133 {
134 *store++ = x->i;
135 *store++ = x->j;
136 if (x->i != -WORD(1))
137 hind[x->i] |= WORD(1);
138
139 } while ((x = x->next) != NULL);
140 } while (heap_len > 1 && heap[1].exp == exp);
141 }
142 else if (small)
143 {
144 /* optimization: small coeff arithmetic, acc_sm used below */
145
146 acc_sm[0] = acc_sm[1] = acc_sm[2] = 0;
147 do
148 {
149 x = _mpoly_heap_pop1(heap, &heap_len, maskhi);
150 do
151 {
152 *store++ = x->i;
153 *store++ = x->j;
154 if (x->i != -WORD(1))
155 hind[x->i] |= WORD(1);
156
157 if (x->i == -WORD(1))
158 _fmpz_mpoly_add_uiuiui_fmpz(acc_sm, poly2 + x->j);
159 else
160 _fmpz_mpoly_submul_uiuiui_fmpz(acc_sm, poly3[x->i], q_coeff[x->j]);
161 } while ((x = x->next) != NULL);
162 } while (heap_len > 1 && heap[1].exp == exp);
163 }
164 else
165 {
166 /* general coeff arithmetic */
167
168 fmpz_zero(acc_lg);
169 do
170 {
171 x = _mpoly_heap_pop1(heap, &heap_len, maskhi);
172 do
173 {
174 *store++ = x->i;
175 *store++ = x->j;
176 if (x->i != -WORD(1))
177 hind[x->i] |= WORD(1);
178
179 if (x->i == -WORD(1))
180 fmpz_add(acc_lg, acc_lg, poly2 + x->j);
181 else
182 fmpz_submul(acc_lg, poly3 + x->i, q_coeff + x->j);
183 } while ((x = x->next) != NULL);
184 } while (heap_len > 1 && heap[1].exp == exp);
185 }
186
187 /* process nodes taken from the heap */
188 while (store > store_base)
189 {
190 j = *--store;
191 i = *--store;
192
193 if (i == -WORD(1))
194 {
195 /* take next dividend term */
196 if (j + 1 < len2)
197 {
198 x = chain + 0;
199 x->i = i;
200 x->j = j + 1;
201 x->next = NULL;
202 _mpoly_heap_insert1(heap, exp2[x->j], x,
203 &next_loc, &heap_len, maskhi);
204 }
205 } else
206 {
207 /* should we go right? */
208 if ( (i + 1 < len3)
209 && (hind[i + 1] == 2*j + 1)
210 )
211 {
212 x = chain + i + 1;
213 x->i = i + 1;
214 x->j = j;
215 x->next = NULL;
216 hind[x->i] = 2*(x->j + 1) + 0;
217 _mpoly_heap_insert1(heap, exp3[x->i] + q_exp[x->j], x,
218 &next_loc, &heap_len, maskhi);
219 }
220 /* should we go up? */
221 if (j + 1 == q_len)
222 {
223 s++;
224 } else if ( ((hind[i] & 1) == 1)
225 && ((i == 1) || (hind[i - 1] >= 2*(j + 2) + 1))
226 )
227 {
228 x = chain + i;
229 x->i = i;
230 x->j = j + 1;
231 x->next = NULL;
232 hind[x->i] = 2*(x->j + 1) + 0;
233 _mpoly_heap_insert1(heap, exp3[x->i] + q_exp[x->j], x,
234 &next_loc, &heap_len, maskhi);
235 }
236 }
237 }
238
239 /* try to divide accumulated term by leading term */
240 if (!lt_divides)
241 continue;
242
243 if (small)
244 {
245 ulong d0, d1, ds = acc_sm[2];
246
247 /* d1:d0 = abs(acc_sm[1:0]) assuming ds is sign extension of acc_sm[1] */
248 sub_ddmmss(d1, d0, acc_sm[1]^ds, acc_sm[0]^ds, ds, ds);
249
250 if ((acc_sm[0] | acc_sm[1] | acc_sm[2]) == 0)
251 continue;
252
253 if (ds == FLINT_SIGN_EXT(acc_sm[1]) && d1 < lc_abs)
254 {
255 ulong qq, rr, nhi, nlo;
256 FLINT_ASSERT(0 < lc_norm && lc_norm < FLINT_BITS);
257 nhi = (d1 << lc_norm) | (d0 >> (FLINT_BITS - lc_norm));
258 nlo = d0 << lc_norm;
259 udiv_qrnnd_preinv(qq, rr, nhi, nlo, lc_n, lc_i);
260 (void) rr; /* silence compiler warning */
261
262 if (qq == 0)
263 continue;
264
265 if (qq <= COEFF_MAX)
266 {
267 _fmpz_demote(q_coeff + q_len);
268 q_coeff[q_len] = qq;
269 if (ds != lc_sign)
270 q_coeff[q_len] = -q_coeff[q_len];
271 }
272 else
273 {
274 small = 0;
275 fmpz_set_ui(q_coeff + q_len, qq);
276 if (ds != lc_sign)
277 fmpz_neg(q_coeff + q_len, q_coeff + q_len);
278 }
279 }
280 else
281 {
282 small = 0;
283 fmpz_set_signed_uiuiui(acc_lg, acc_sm[2], acc_sm[1], acc_sm[0]);
284 goto large_lt_divides;
285 }
286 }
287 else
288 {
289 if (fmpz_is_zero(acc_lg))
290 continue;
291
292 large_lt_divides:
293
294 fmpz_fdiv_qr(q_coeff + q_len, r, acc_lg, poly3 + 0);
295 if (fmpz_is_zero(q_coeff + q_len))
296 continue;
297 }
298
299 /* put newly generated quotient term back into the heap if neccesary */
300 if (s > 1)
301 {
302 i = 1;
303 x = chain + i;
304 x->i = i;
305 x->j = q_len;
306 x->next = NULL;
307 hind[x->i] = 2*(x->j + 1) + 0;
308 _mpoly_heap_insert1(heap, exp3[x->i] + q_exp[x->j], x,
309 &next_loc, &heap_len, maskhi);
310 }
311 s = 1;
312 q_len++;
313 }
314
315
316 cleanup:
317
318 fmpz_clear(acc_lg);
319 fmpz_clear(r);
320
321 (*polyq) = q_coeff;
322 (*expq) = q_exp;
323
324 TMP_END;
325
326 /* return quotient poly length */
327 return q_len;
328
329 exp_overflow:
330 for (i = 0; i < q_len; i++)
331 _fmpz_demote(q_coeff + i);
332 q_len = -WORD(1);
333 goto cleanup;
334 }
335
336
_fmpz_mpoly_div_monagan_pearce(fmpz ** polyq,ulong ** expq,slong * allocq,const fmpz * poly2,const ulong * exp2,slong len2,const fmpz * poly3,const ulong * exp3,slong len3,slong bits,slong N,const ulong * cmpmask)337 slong _fmpz_mpoly_div_monagan_pearce(fmpz ** polyq,
338 ulong ** expq, slong * allocq, const fmpz * poly2,
339 const ulong * exp2, slong len2, const fmpz * poly3, const ulong * exp3,
340 slong len3, slong bits, slong N, const ulong * cmpmask)
341 {
342 slong i, j, q_len, s;
343 slong next_loc;
344 slong heap_len = 2; /* heap zero index unused */
345 mpoly_heap_s * heap;
346 mpoly_heap_t * chain;
347 slong * store, * store_base;
348 mpoly_heap_t * x;
349 fmpz * q_coeff = *polyq;
350 ulong * q_exp = *expq;
351 ulong * exp, * exps;
352 ulong ** exp_list;
353 slong exp_next;
354 ulong mask;
355 fmpz_t r, acc_lg;
356 ulong acc_sm[3];
357 slong * hind;
358 int lt_divides, small;
359 slong bits2, bits3;
360 ulong lc_norm = 0, lc_abs = 0, lc_sign = 0, lc_n = 0, lc_i = 0;
361 TMP_INIT;
362
363 if (N == 1)
364 return _fmpz_mpoly_div_monagan_pearce1(polyq, expq, allocq,
365 poly2, exp2, len2, poly3, exp3, len3, bits, cmpmask[0]);
366
367 TMP_START;
368
369 fmpz_init(acc_lg);
370 fmpz_init(r);
371
372 /* whether intermediate computations q - a*b will fit in three words */
373 bits2 = _fmpz_vec_max_bits(poly2, len2);
374 bits3 = _fmpz_vec_max_bits(poly3, len3);
375 /* allow one bit for sign, one bit for subtraction */
376 small = FLINT_ABS(bits2) <= (FLINT_ABS(bits3) + FLINT_BIT_COUNT(len3) + FLINT_BITS - 2)
377 && FLINT_ABS(bits3) <= FLINT_BITS - 2;
378
379
380 /* alloc array of heap nodes which can be chained together */
381 next_loc = len3 + 4; /* something bigger than heap can ever be */
382 heap = (mpoly_heap_s *) TMP_ALLOC((len3 + 1)*sizeof(mpoly_heap_s));
383 chain = (mpoly_heap_t *) TMP_ALLOC(len3*sizeof(mpoly_heap_t));
384 store = store_base = (slong *) TMP_ALLOC(2*len3*sizeof(mpoly_heap_t *));
385
386 /* array of exponent vectors, each of "N" words */
387 exps = (ulong *) TMP_ALLOC(len3*N*sizeof(ulong));
388 /* list of pointers to available exponent vectors */
389 exp_list = (ulong **) TMP_ALLOC(len3*sizeof(ulong *));
390 /* space to save copy of current exponent vector */
391 exp = (ulong *) TMP_ALLOC(N*sizeof(ulong));
392 /* set up list of available exponent vectors */
393 exp_next = 0;
394 for (i = 0; i < len3; i++)
395 exp_list[i] = exps + i*N;
396
397 /* space for flagged heap indicies */
398 hind = (slong *) TMP_ALLOC(len3*sizeof(slong));
399 for (i = 0; i < len3; i++)
400 hind[i] = 1;
401
402 /* mask with high bit set in each word of each field of exponent vector */
403 mask = 0;
404 for (i = 0; i < FLINT_BITS/bits; i++)
405 mask = (mask << bits) + (UWORD(1) << (bits - 1));
406
407 q_len = WORD(0);
408
409 /* s is the number of terms * (latest quotient) we should put into heap */
410 s = len3;
411
412 /* insert (-1, 0, exp2[0]) into heap */
413 x = chain + 0;
414 x->i = -WORD(1);
415 x->j = 0;
416 x->next = NULL;
417 heap[1].next = x;
418 heap[1].exp = exp_list[exp_next++];
419 mpoly_monomial_set(heap[1].exp, exp2, N);
420
421 /* precompute leading cofficient info in "small" case */
422 if (small)
423 {
424 lc_abs = FLINT_ABS(poly3[0]);
425 lc_sign = FLINT_SIGN_EXT(poly3[0]);
426 count_leading_zeros(lc_norm, lc_abs);
427 lc_n = lc_abs << lc_norm;
428 invert_limb(lc_i, lc_n);
429 }
430
431 while (heap_len > 1)
432 {
433 mpoly_monomial_set(exp, heap[1].exp, N);
434
435 if (bits <= FLINT_BITS)
436 {
437 if (mpoly_monomial_overflows(exp, N, mask))
438 goto exp_overflow;
439 }
440 else
441 {
442 if (mpoly_monomial_overflows_mp(exp, N, bits))
443 goto exp_overflow;
444 }
445
446 _fmpz_mpoly_fit_length(&q_coeff, &q_exp, allocq, q_len + 1, N);
447
448 if (bits <= FLINT_BITS)
449 lt_divides = mpoly_monomial_divides(q_exp + q_len*N, exp, exp3, N, mask);
450 else
451 lt_divides = mpoly_monomial_divides_mp(q_exp + q_len*N, exp, exp3, N, bits);
452
453 /* take nodes from heap with exponent matching exp */
454
455 if (!lt_divides)
456 {
457 /* optimation: coeff arithmetic not needed */
458
459 if (mpoly_monomial_gt(exp3 + 0, exp, N, cmpmask))
460 {
461 /* optimization: no more quotient terms possible */
462 goto cleanup;
463 }
464
465 do
466 {
467 exp_list[--exp_next] = heap[1].exp;
468 x = _mpoly_heap_pop(heap, &heap_len, N, cmpmask);
469 do
470 {
471 *store++ = x->i;
472 *store++ = x->j;
473 if (x->i != -WORD(1))
474 hind[x->i] |= WORD(1);
475
476 } while ((x = x->next) != NULL);
477 } while (heap_len > 1 && mpoly_monomial_equal(heap[1].exp, exp, N));
478 }
479 else if (small)
480 {
481 /* optimization: small coeff arithmetic, acc_sm used below */
482
483 acc_sm[0] = acc_sm[1] = acc_sm[2] = 0;
484 do
485 {
486 exp_list[--exp_next] = heap[1].exp;
487 x = _mpoly_heap_pop(heap, &heap_len, N, cmpmask);
488 do
489 {
490 *store++ = x->i;
491 *store++ = x->j;
492 if (x->i != -WORD(1))
493 hind[x->i] |= WORD(1);
494
495 if (x->i == -WORD(1))
496 _fmpz_mpoly_add_uiuiui_fmpz(acc_sm, poly2 + x->j);
497 else
498 _fmpz_mpoly_submul_uiuiui_fmpz(acc_sm, poly3[x->i], q_coeff[x->j]);
499 } while ((x = x->next) != NULL);
500 } while (heap_len > 1 && mpoly_monomial_equal(heap[1].exp, exp, N));
501 }
502 else
503 {
504 /* general coeff arithmetic*/
505
506 fmpz_zero(acc_lg);
507 do
508 {
509 exp_list[--exp_next] = heap[1].exp;
510 x = _mpoly_heap_pop(heap, &heap_len, N, cmpmask);
511 do
512 {
513 *store++ = x->i;
514 *store++ = x->j;
515 if (x->i != -WORD(1))
516 hind[x->i] |= WORD(1);
517
518 if (x->i == -WORD(1))
519 fmpz_add(acc_lg, acc_lg, poly2 + x->j);
520 else
521 fmpz_submul(acc_lg, poly3 + x->i, q_coeff + x->j);
522 } while ((x = x->next) != NULL);
523 } while (heap_len > 1 && mpoly_monomial_equal(heap[1].exp, exp, N));
524 }
525
526 /* process nodes taken from the heap */
527 while (store > store_base)
528 {
529 j = *--store;
530 i = *--store;
531
532 if (i == -WORD(1))
533 {
534 /* take next dividend term */
535 if (j + 1 < len2)
536 {
537 x = chain + 0;
538 x->i = i;
539 x->j = j + 1;
540 x->next = NULL;
541 mpoly_monomial_set(exp_list[exp_next], exp2 + x->j*N, N);
542 exp_next += _mpoly_heap_insert(heap, exp_list[exp_next], x,
543 &next_loc, &heap_len, N, cmpmask);
544 }
545 } else
546 {
547 /* should we go up */
548 if ( (i + 1 < len3)
549 && (hind[i + 1] == 2*j + 1)
550 )
551 {
552 x = chain + i + 1;
553 x->i = i + 1;
554 x->j = j;
555 x->next = NULL;
556 hind[x->i] = 2*(x->j + 1) + 0;
557
558 if (bits <= FLINT_BITS)
559 mpoly_monomial_add(exp_list[exp_next], exp3 + x->i*N,
560 q_exp + x->j*N, N);
561 else
562 mpoly_monomial_add_mp(exp_list[exp_next], exp3 + x->i*N,
563 q_exp + x->j*N, N);
564
565 exp_next += _mpoly_heap_insert(heap, exp_list[exp_next], x,
566 &next_loc, &heap_len, N, cmpmask);
567 }
568 /* should we go up? */
569 if (j + 1 == q_len)
570 {
571 s++;
572 } else if ( ((hind[i] & 1) == 1)
573 && ((i == 1) || (hind[i - 1] >= 2*(j + 2) + 1))
574 )
575 {
576 x = chain + i;
577 x->i = i;
578 x->j = j + 1;
579 x->next = NULL;
580 hind[x->i] = 2*(x->j + 1) + 0;
581
582 if (bits <= FLINT_BITS)
583 mpoly_monomial_add(exp_list[exp_next], exp3 + x->i*N,
584 q_exp + x->j*N, N);
585 else
586 mpoly_monomial_add_mp(exp_list[exp_next], exp3 + x->i*N,
587 q_exp + x->j*N, N);
588
589 exp_next += _mpoly_heap_insert(heap, exp_list[exp_next], x,
590 &next_loc, &heap_len, N, cmpmask);
591 }
592 }
593 }
594
595 /* try to divide accumulated term by leading term */
596 if (!lt_divides)
597 {
598 continue;
599 }
600 if (small)
601 {
602 ulong d0, d1, ds = acc_sm[2];
603
604 /* d1:d0 = abs(acc_sm[1:0]) assuming ds is sign extension of acc_sm[1] */
605 sub_ddmmss(d1, d0, acc_sm[1]^ds, acc_sm[0]^ds, ds, ds);
606
607 if ((acc_sm[0] | acc_sm[1] | acc_sm[2]) == 0)
608 {
609 continue;
610 }
611
612 if (ds == FLINT_SIGN_EXT(acc_sm[1]) && d1 < lc_abs)
613 {
614 ulong qq, rr, nhi, nlo;
615 FLINT_ASSERT(0 < lc_norm && lc_norm < FLINT_BITS);
616 nhi = (d1 << lc_norm) | (d0 >> (FLINT_BITS - lc_norm));
617 nlo = d0 << lc_norm;
618 udiv_qrnnd_preinv(qq, rr, nhi, nlo, lc_n, lc_i);
619 (void) rr;
620
621 if (qq == 0)
622 continue;
623
624 if (qq <= COEFF_MAX)
625 {
626 _fmpz_demote(q_coeff + q_len);
627 q_coeff[q_len] = qq;
628 if (ds != lc_sign)
629 q_coeff[q_len] = -q_coeff[q_len];
630 }
631 else
632 {
633 small = 0;
634 fmpz_set_ui(q_coeff + q_len, qq);
635 if (ds != lc_sign)
636 fmpz_neg(q_coeff + q_len, q_coeff + q_len);
637 }
638 }
639 else
640 {
641 small = 0;
642 fmpz_set_signed_uiuiui(acc_lg, acc_sm[2], acc_sm[1], acc_sm[0]);
643 goto large_lt_divides;
644 }
645 }
646 else
647 {
648 if (fmpz_is_zero(acc_lg))
649 {
650 continue;
651 }
652 large_lt_divides:
653 fmpz_fdiv_qr(q_coeff + q_len, r, acc_lg, poly3 + 0);
654 if (fmpz_is_zero(q_coeff + q_len))
655 {
656 continue;
657 }
658 }
659
660 /* put newly generated quotient term back into the heap if neccesary */
661 if (s > 1)
662 {
663 i = 1;
664 x = chain + i;
665 x->i = i;
666 x->j = q_len;
667 x->next = NULL;
668 hind[x->i] = 2*(x->j + 1) + 0;
669
670 if (bits <= FLINT_BITS)
671 mpoly_monomial_add(exp_list[exp_next], exp3 + x->i*N,
672 q_exp + x->j*N, N);
673 else
674 mpoly_monomial_add_mp(exp_list[exp_next], exp3 + x->i*N,
675 q_exp + x->j*N, N);
676
677 exp_next += _mpoly_heap_insert(heap, exp_list[exp_next], x,
678 &next_loc, &heap_len, N, cmpmask);
679 }
680 s = 1;
681 q_len++;
682 }
683
684
685 cleanup:
686
687 fmpz_clear(acc_lg);
688 fmpz_clear(r);
689
690 (*polyq) = q_coeff;
691 (*expq) = q_exp;
692
693 TMP_END;
694
695 /* return quotient poly length */
696 return q_len;
697
698 exp_overflow:
699 for (i = 0; i < q_len; i++)
700 _fmpz_demote(q_coeff + i);
701 q_len = -WORD(1);
702 goto cleanup;
703
704 }
705
fmpz_mpoly_div_monagan_pearce(fmpz_mpoly_t q,const fmpz_mpoly_t poly2,const fmpz_mpoly_t poly3,const fmpz_mpoly_ctx_t ctx)706 void fmpz_mpoly_div_monagan_pearce(fmpz_mpoly_t q, const fmpz_mpoly_t poly2,
707 const fmpz_mpoly_t poly3, const fmpz_mpoly_ctx_t ctx)
708 {
709 slong exp_bits, N, lenq = 0;
710 ulong * exp2 = poly2->exps, * exp3 = poly3->exps;
711 ulong * cmpmask;
712 int free2 = 0, free3 = 0;
713 fmpz_mpoly_t temp1;
714 fmpz_mpoly_struct * tq;
715 TMP_INIT;
716
717 /* check divisor is nonzero */
718 if (poly3->length == 0)
719 flint_throw(FLINT_DIVZERO, "Divide by zero in fmpz_mpoly_div_monagan_pearce");
720
721 /* dividend zero, write out quotient */
722 if (poly2->length == 0)
723 {
724 fmpz_mpoly_zero(q, ctx);
725 return;
726 }
727
728 TMP_START;
729
730 /* maximum bits in quotient exps and inputs is max for poly2 and poly3 */
731 exp_bits = FLINT_MAX(poly2->bits, poly3->bits);
732
733 N = mpoly_words_per_exp(exp_bits, ctx->minfo);
734 cmpmask = (ulong*) TMP_ALLOC(N*sizeof(ulong));
735 mpoly_get_cmpmask(cmpmask, N, exp_bits, ctx->minfo);
736
737 /* ensure input exponents packed to same size as output exponents */
738 if (exp_bits > poly2->bits)
739 {
740 free2 = 1;
741 exp2 = (ulong *) flint_malloc(N*poly2->length*sizeof(ulong));
742 mpoly_repack_monomials(exp2, exp_bits, poly2->exps, poly2->bits,
743 poly2->length, ctx->minfo);
744 }
745
746 if (exp_bits > poly3->bits)
747 {
748 free3 = 1;
749 exp3 = (ulong *) flint_malloc(N*poly3->length*sizeof(ulong));
750 mpoly_repack_monomials(exp3, exp_bits, poly3->exps, poly3->bits,
751 poly3->length, ctx->minfo);
752 }
753
754 /* check divisor leading monomial is at most that of the dividend */
755 if (mpoly_monomial_lt(exp2, exp3, N, cmpmask))
756 {
757 fmpz_mpoly_zero(q, ctx);
758 goto cleanup3;
759 }
760
761 /* take care of aliasing */
762 if (q == poly2 || q == poly3)
763 {
764 fmpz_mpoly_init2(temp1, poly2->length/poly3->length + 1, ctx);
765 fmpz_mpoly_fit_bits(temp1, exp_bits, ctx);
766 temp1->bits = exp_bits;
767 tq = temp1;
768 } else
769 {
770 fmpz_mpoly_fit_length(q, poly2->length/poly3->length + 1, ctx);
771 fmpz_mpoly_fit_bits(q, exp_bits, ctx);
772 q->bits = exp_bits;
773 tq = q;
774 }
775
776 /* do division with remainder */
777 while ((lenq = _fmpz_mpoly_div_monagan_pearce(&tq->coeffs, &tq->exps,
778 &tq->alloc, poly2->coeffs, exp2, poly2->length,
779 poly3->coeffs, exp3, poly3->length,
780 exp_bits, N, cmpmask)) == -WORD(1) )
781 {
782 ulong * old_exp2 = exp2, * old_exp3 = exp3;
783 slong old_exp_bits = exp_bits;
784
785 exp_bits = mpoly_fix_bits(exp_bits + 1, ctx->minfo);
786
787 N = mpoly_words_per_exp(exp_bits, ctx->minfo);
788 cmpmask = (ulong*) TMP_ALLOC(N*sizeof(ulong));
789 mpoly_get_cmpmask(cmpmask, N, exp_bits, ctx->minfo);
790
791 exp2 = (ulong *) flint_malloc(N*poly2->length*sizeof(ulong));
792 mpoly_repack_monomials(exp2, exp_bits, old_exp2, old_exp_bits,
793 poly2->length, ctx->minfo);
794
795 exp3 = (ulong *) flint_malloc(N*poly3->length*sizeof(ulong));
796 mpoly_repack_monomials(exp3, exp_bits, old_exp3, old_exp_bits,
797 poly3->length, ctx->minfo);
798
799 if (free2)
800 flint_free(old_exp2);
801
802 if (free3)
803 flint_free(old_exp3);
804
805 free2 = free3 = 1;
806
807 fmpz_mpoly_fit_bits(tq, exp_bits, ctx);
808 tq->bits = exp_bits;
809 }
810
811 /* take care of aliasing */
812 if (q == poly2 || q == poly3)
813 {
814 fmpz_mpoly_swap(temp1, q, ctx);
815 fmpz_mpoly_clear(temp1, ctx);
816 }
817
818 _fmpz_mpoly_set_length(q, lenq, ctx);
819
820 cleanup3:
821
822 if (free2)
823 flint_free(exp2);
824
825 if (free3)
826 flint_free(exp3);
827
828 TMP_END;
829 }
830