1 /*
2     Copyright (C) 2016 William Hart
3     Copyright (C) 2018 Daniel Schultz
4 
5     This file is part of FLINT.
6 
7     FLINT is free software: you can redistribute it and/or modify it under
8     the terms of the GNU Lesser General Public License (LGPL) as published
9     by the Free Software Foundation; either version 2.1 of the License, or
10     (at your option) any later version.  See <http://www.gnu.org/licenses/>.
11 */
12 
13 #include <gmp.h>
14 #include <stdlib.h>
15 #include "flint.h"
16 #include "fmpz.h"
17 #include "fmpz_mpoly.h"
18 #include "longlong.h"
19 
20 /*
21    Set polyq to the quotient poly2 by poly3 discarding remainder (with notional
22    remainder coeffs reduced modulo the leading coeff of poly3), and return
23    the length of the quotient. This version of the function assumes the
24    exponent vectors all fit in a single word. The exponent vectors are
25    assumed to have fields with the given number of bits. Assumes input polys
26    are nonzero. Implements "Polynomial division using dynamic arrays, heaps
27    and packed exponents" by Michael Monagan and Roman Pearce [1], except that
28    we use a heap with smallest exponent at head. Note that if a < b then
29    (n - b) < (n - b) where n is the maximum value a and b can take. The word
30    "maxn" is set to an exponent vector whose fields are all set to such a
31    value n. This allows division from left to right with a heap with smallest
32    exponent at the head. Quotient poly is written in reverse order.
33    [1] http://www.cecm.sfu.ca/~rpearcea/sdmp/sdmp_paper.pdf
34 */
_fmpz_mpoly_div_monagan_pearce1(fmpz ** polyq,ulong ** expq,slong * allocq,const fmpz * poly2,const ulong * exp2,slong len2,const fmpz * poly3,const ulong * exp3,slong len3,slong bits,ulong maskhi)35 slong _fmpz_mpoly_div_monagan_pearce1(fmpz ** polyq, ulong ** expq,
36                 slong * allocq, const fmpz * poly2, const ulong * exp2,
37             slong len2, const fmpz * poly3, const ulong * exp3, slong len3,
38                                                       slong bits, ulong maskhi)
39 {
40     slong i, j, q_len, s;
41     slong next_loc, heap_len = 2;
42     mpoly_heap1_s * heap;
43     mpoly_heap_t * chain;
44     slong * store, * store_base;
45     mpoly_heap_t * x;
46     fmpz * q_coeff = *polyq;
47     ulong * q_exp = *expq;
48     slong * hind;
49     ulong mask, exp;
50     fmpz_t r, acc_lg;
51     ulong acc_sm[3];
52     int lt_divides, small;
53     slong bits2, bits3;
54     ulong lc_norm = 0, lc_abs = 0, lc_sign = 0, lc_n = 0, lc_i = 0;
55     TMP_INIT;
56 
57     TMP_START;
58 
59     fmpz_init(acc_lg);
60     fmpz_init(r);
61 
62    /* whether intermediate computations q - a*b will fit in three words */
63    bits2 = _fmpz_vec_max_bits(poly2, len2);
64    bits3 = _fmpz_vec_max_bits(poly3, len3);
65    /* allow one bit for sign, one bit for subtraction */
66    small = FLINT_ABS(bits2) <= (FLINT_ABS(bits3) + FLINT_BIT_COUNT(len3) +
67            FLINT_BITS - 2) && FLINT_ABS(bits3) <= FLINT_BITS - 2;
68 
69     /* alloc array of heap nodes which can be chained together */
70     next_loc = len3 + 4;   /* something bigger than heap can ever be */
71     heap = (mpoly_heap1_s *) TMP_ALLOC((len3 + 1)*sizeof(mpoly_heap1_s));
72     chain = (mpoly_heap_t *) TMP_ALLOC(len3*sizeof(mpoly_heap_t));
73     store = store_base = (slong *) TMP_ALLOC(2*len3*sizeof(mpoly_heap_t *));
74 
75     /* space for flagged heap indicies */
76     hind = (slong *) TMP_ALLOC(len3*sizeof(slong));
77     for (i = 0; i < len3; i++)
78         hind[i] = 1;
79 
80     /* mask with high bit set in each field of exponent vector */
81     mask = 0;
82     for (i = 0; i < FLINT_BITS/bits; i++)
83         mask = (mask << bits) + (UWORD(1) << (bits - 1));
84 
85     q_len = WORD(0);
86 
87     /* see description of divisor heap division in paper */
88     s = len3;
89 
90     /* insert (-1, 0, exp2[0]) into heap */
91     x = chain + 0;
92     x->i = -WORD(1);
93     x->j = 0;
94     x->next = NULL;
95     HEAP_ASSIGN(heap[1], exp2[0], x);
96 
97     /* precompute leading cofficient info assuming "small" case */
98     if (small)
99     {
100         lc_abs = FLINT_ABS(poly3[0]);
101         lc_sign = FLINT_SIGN_EXT(poly3[0]);
102         count_leading_zeros(lc_norm, lc_abs);
103         lc_n = lc_abs << lc_norm;
104         invert_limb(lc_i, lc_n);
105     }
106 
107     while (heap_len > 1)
108     {
109         exp = heap[1].exp;
110 
111         if (mpoly_monomial_overflows1(exp, mask))
112             goto exp_overflow;
113 
114         _fmpz_mpoly_fit_length(&q_coeff, &q_exp, allocq, q_len + 1, 1);
115         lt_divides = mpoly_monomial_divides1(q_exp + q_len, exp, exp3[0], mask);
116 
117         /* take nodes from heap with exponent matching exp */
118 
119         if (!lt_divides)
120         {
121             /* optimation: coeff arithmetic not needed */
122 
123             if (mpoly_monomial_gt1(exp3[0], exp, maskhi))
124             {
125                 /* optimization: no more quotient terms possible */
126                 goto cleanup;
127             }
128 
129             do
130             {
131                 x = _mpoly_heap_pop1(heap, &heap_len, maskhi);
132                 do
133                 {
134                     *store++ = x->i;
135                     *store++ = x->j;
136                     if (x->i != -WORD(1))
137                         hind[x->i] |= WORD(1);
138 
139                 } while ((x = x->next) != NULL);
140             } while (heap_len > 1 && heap[1].exp == exp);
141         }
142         else if (small)
143         {
144             /* optimization: small coeff arithmetic, acc_sm used below */
145 
146             acc_sm[0] = acc_sm[1] = acc_sm[2] = 0;
147             do
148             {
149                 x = _mpoly_heap_pop1(heap, &heap_len, maskhi);
150                 do
151                 {
152                     *store++ = x->i;
153                     *store++ = x->j;
154                     if (x->i != -WORD(1))
155                         hind[x->i] |= WORD(1);
156 
157                     if (x->i == -WORD(1))
158                         _fmpz_mpoly_add_uiuiui_fmpz(acc_sm, poly2 + x->j);
159                     else
160                         _fmpz_mpoly_submul_uiuiui_fmpz(acc_sm, poly3[x->i], q_coeff[x->j]);
161                 } while ((x = x->next) != NULL);
162             } while (heap_len > 1 && heap[1].exp == exp);
163         }
164         else
165         {
166             /* general coeff arithmetic */
167 
168             fmpz_zero(acc_lg);
169             do
170             {
171                 x = _mpoly_heap_pop1(heap, &heap_len, maskhi);
172                 do
173                 {
174                     *store++ = x->i;
175                     *store++ = x->j;
176                     if (x->i != -WORD(1))
177                         hind[x->i] |= WORD(1);
178 
179                     if (x->i == -WORD(1))
180                         fmpz_add(acc_lg, acc_lg, poly2 + x->j);
181                     else
182                         fmpz_submul(acc_lg, poly3 + x->i, q_coeff + x->j);
183                 } while ((x = x->next) != NULL);
184             } while (heap_len > 1 && heap[1].exp == exp);
185         }
186 
187         /* process nodes taken from the heap */
188         while (store > store_base)
189         {
190             j = *--store;
191             i = *--store;
192 
193             if (i == -WORD(1))
194             {
195                 /* take next dividend term */
196                 if (j + 1 < len2)
197                 {
198                     x = chain + 0;
199                     x->i = i;
200                     x->j = j + 1;
201                     x->next = NULL;
202                     _mpoly_heap_insert1(heap, exp2[x->j], x,
203                                                  &next_loc, &heap_len, maskhi);
204                 }
205             } else
206             {
207                 /* should we go right? */
208                 if (  (i + 1 < len3)
209                    && (hind[i + 1] == 2*j + 1)
210                    )
211                 {
212                     x = chain + i + 1;
213                     x->i = i + 1;
214                     x->j = j;
215                     x->next = NULL;
216                     hind[x->i] = 2*(x->j + 1) + 0;
217                     _mpoly_heap_insert1(heap, exp3[x->i] + q_exp[x->j], x,
218                                                  &next_loc, &heap_len, maskhi);
219                 }
220                 /* should we go up? */
221                 if (j + 1 == q_len)
222                 {
223                     s++;
224                 } else if (  ((hind[i] & 1) == 1)
225                           && ((i == 1) || (hind[i - 1] >= 2*(j + 2) + 1))
226                           )
227                 {
228                     x = chain + i;
229                     x->i = i;
230                     x->j = j + 1;
231                     x->next = NULL;
232                     hind[x->i] = 2*(x->j + 1) + 0;
233                     _mpoly_heap_insert1(heap, exp3[x->i] + q_exp[x->j], x,
234                                                  &next_loc, &heap_len, maskhi);
235                 }
236             }
237         }
238 
239         /* try to divide accumulated term by leading term */
240         if (!lt_divides)
241             continue;
242 
243         if (small)
244         {
245             ulong d0, d1, ds = acc_sm[2];
246 
247             /* d1:d0 = abs(acc_sm[1:0]) assuming ds is sign extension of acc_sm[1] */
248             sub_ddmmss(d1, d0, acc_sm[1]^ds, acc_sm[0]^ds, ds, ds);
249 
250             if ((acc_sm[0] | acc_sm[1] | acc_sm[2]) == 0)
251                 continue;
252 
253             if (ds == FLINT_SIGN_EXT(acc_sm[1]) && d1 < lc_abs)
254             {
255                 ulong qq, rr, nhi, nlo;
256                 FLINT_ASSERT(0 < lc_norm && lc_norm < FLINT_BITS);
257                 nhi = (d1 << lc_norm) | (d0 >> (FLINT_BITS - lc_norm));
258                 nlo = d0 << lc_norm;
259                 udiv_qrnnd_preinv(qq, rr, nhi, nlo, lc_n, lc_i);
260                 (void) rr; /* silence compiler warning */
261 
262                 if (qq == 0)
263                     continue;
264 
265                 if (qq <= COEFF_MAX)
266                 {
267                     _fmpz_demote(q_coeff + q_len);
268                     q_coeff[q_len] = qq;
269                     if (ds != lc_sign)
270                         q_coeff[q_len] = -q_coeff[q_len];
271                 }
272                 else
273                 {
274                     small = 0;
275                     fmpz_set_ui(q_coeff + q_len, qq);
276                     if (ds != lc_sign)
277                         fmpz_neg(q_coeff + q_len, q_coeff + q_len);
278                 }
279             }
280             else
281             {
282                 small = 0;
283                 fmpz_set_signed_uiuiui(acc_lg, acc_sm[2], acc_sm[1], acc_sm[0]);
284                 goto large_lt_divides;
285             }
286         }
287         else
288         {
289             if (fmpz_is_zero(acc_lg))
290                 continue;
291 
292 large_lt_divides:
293 
294             fmpz_fdiv_qr(q_coeff + q_len, r, acc_lg, poly3 + 0);
295             if (fmpz_is_zero(q_coeff + q_len))
296                 continue;
297         }
298 
299         /* put newly generated quotient term back into the heap if neccesary */
300         if (s > 1)
301         {
302             i = 1;
303             x = chain + i;
304             x->i = i;
305             x->j = q_len;
306             x->next = NULL;
307             hind[x->i] = 2*(x->j + 1) + 0;
308             _mpoly_heap_insert1(heap, exp3[x->i] + q_exp[x->j], x,
309                                                  &next_loc, &heap_len, maskhi);
310         }
311         s = 1;
312         q_len++;
313     }
314 
315 
316 cleanup:
317 
318     fmpz_clear(acc_lg);
319     fmpz_clear(r);
320 
321     (*polyq) = q_coeff;
322     (*expq) = q_exp;
323 
324     TMP_END;
325 
326     /* return quotient poly length */
327     return q_len;
328 
329 exp_overflow:
330     for (i = 0; i < q_len; i++)
331         _fmpz_demote(q_coeff + i);
332     q_len = -WORD(1);
333     goto cleanup;
334 }
335 
336 
_fmpz_mpoly_div_monagan_pearce(fmpz ** polyq,ulong ** expq,slong * allocq,const fmpz * poly2,const ulong * exp2,slong len2,const fmpz * poly3,const ulong * exp3,slong len3,slong bits,slong N,const ulong * cmpmask)337 slong _fmpz_mpoly_div_monagan_pearce(fmpz ** polyq,
338            ulong ** expq, slong * allocq, const fmpz * poly2,
339    const ulong * exp2, slong len2, const fmpz * poly3, const ulong * exp3,
340                    slong len3, slong bits, slong N, const ulong * cmpmask)
341 {
342     slong i, j, q_len, s;
343     slong next_loc;
344     slong heap_len = 2; /* heap zero index unused */
345     mpoly_heap_s * heap;
346     mpoly_heap_t * chain;
347     slong * store, * store_base;
348     mpoly_heap_t * x;
349     fmpz * q_coeff = *polyq;
350     ulong * q_exp = *expq;
351     ulong * exp, * exps;
352     ulong ** exp_list;
353     slong exp_next;
354     ulong mask;
355     fmpz_t r, acc_lg;
356     ulong acc_sm[3];
357     slong * hind;
358     int lt_divides, small;
359     slong bits2, bits3;
360     ulong lc_norm = 0, lc_abs = 0, lc_sign = 0, lc_n = 0, lc_i = 0;
361     TMP_INIT;
362 
363     if (N == 1)
364         return _fmpz_mpoly_div_monagan_pearce1(polyq, expq, allocq,
365                        poly2, exp2, len2, poly3, exp3, len3, bits, cmpmask[0]);
366 
367     TMP_START;
368 
369     fmpz_init(acc_lg);
370     fmpz_init(r);
371 
372     /* whether intermediate computations q - a*b will fit in three words */
373     bits2 = _fmpz_vec_max_bits(poly2, len2);
374     bits3 = _fmpz_vec_max_bits(poly3, len3);
375     /* allow one bit for sign, one bit for subtraction */
376     small = FLINT_ABS(bits2) <= (FLINT_ABS(bits3) + FLINT_BIT_COUNT(len3) + FLINT_BITS - 2)
377          && FLINT_ABS(bits3) <= FLINT_BITS - 2;
378 
379 
380     /* alloc array of heap nodes which can be chained together */
381     next_loc = len3 + 4;   /* something bigger than heap can ever be */
382     heap = (mpoly_heap_s *) TMP_ALLOC((len3 + 1)*sizeof(mpoly_heap_s));
383     chain = (mpoly_heap_t *) TMP_ALLOC(len3*sizeof(mpoly_heap_t));
384     store = store_base = (slong *) TMP_ALLOC(2*len3*sizeof(mpoly_heap_t *));
385 
386     /* array of exponent vectors, each of "N" words */
387     exps = (ulong *) TMP_ALLOC(len3*N*sizeof(ulong));
388     /* list of pointers to available exponent vectors */
389     exp_list = (ulong **) TMP_ALLOC(len3*sizeof(ulong *));
390     /* space to save copy of current exponent vector */
391     exp = (ulong *) TMP_ALLOC(N*sizeof(ulong));
392     /* set up list of available exponent vectors */
393     exp_next = 0;
394     for (i = 0; i < len3; i++)
395         exp_list[i] = exps + i*N;
396 
397     /* space for flagged heap indicies */
398     hind = (slong *) TMP_ALLOC(len3*sizeof(slong));
399     for (i = 0; i < len3; i++)
400         hind[i] = 1;
401 
402     /* mask with high bit set in each word of each field of exponent vector */
403     mask = 0;
404     for (i = 0; i < FLINT_BITS/bits; i++)
405         mask = (mask << bits) + (UWORD(1) << (bits - 1));
406 
407     q_len = WORD(0);
408 
409     /* s is the number of terms * (latest quotient) we should put into heap */
410     s = len3;
411 
412     /* insert (-1, 0, exp2[0]) into heap */
413     x = chain + 0;
414     x->i = -WORD(1);
415     x->j = 0;
416     x->next = NULL;
417     heap[1].next = x;
418     heap[1].exp = exp_list[exp_next++];
419     mpoly_monomial_set(heap[1].exp, exp2, N);
420 
421     /* precompute leading cofficient info in "small" case */
422     if (small)
423     {
424         lc_abs = FLINT_ABS(poly3[0]);
425         lc_sign = FLINT_SIGN_EXT(poly3[0]);
426         count_leading_zeros(lc_norm, lc_abs);
427         lc_n = lc_abs << lc_norm;
428         invert_limb(lc_i, lc_n);
429     }
430 
431     while (heap_len > 1)
432     {
433         mpoly_monomial_set(exp, heap[1].exp, N);
434 
435         if (bits <= FLINT_BITS)
436         {
437             if (mpoly_monomial_overflows(exp, N, mask))
438                 goto exp_overflow;
439         }
440         else
441         {
442             if (mpoly_monomial_overflows_mp(exp, N, bits))
443                 goto exp_overflow;
444         }
445 
446         _fmpz_mpoly_fit_length(&q_coeff, &q_exp, allocq, q_len + 1, N);
447 
448         if (bits <= FLINT_BITS)
449             lt_divides = mpoly_monomial_divides(q_exp + q_len*N, exp, exp3, N, mask);
450         else
451             lt_divides = mpoly_monomial_divides_mp(q_exp + q_len*N, exp, exp3, N, bits);
452 
453         /* take nodes from heap with exponent matching exp */
454 
455         if (!lt_divides)
456         {
457             /* optimation: coeff arithmetic not needed */
458 
459             if (mpoly_monomial_gt(exp3 + 0, exp, N, cmpmask))
460             {
461                 /* optimization: no more quotient terms possible */
462                 goto cleanup;
463             }
464 
465             do
466             {
467                 exp_list[--exp_next] = heap[1].exp;
468                 x = _mpoly_heap_pop(heap, &heap_len, N, cmpmask);
469                 do
470                 {
471                     *store++ = x->i;
472                     *store++ = x->j;
473                     if (x->i != -WORD(1))
474                         hind[x->i] |= WORD(1);
475 
476                 } while ((x = x->next) != NULL);
477             } while (heap_len > 1 && mpoly_monomial_equal(heap[1].exp, exp, N));
478         }
479         else if (small)
480         {
481             /* optimization: small coeff arithmetic, acc_sm used below */
482 
483             acc_sm[0] = acc_sm[1] = acc_sm[2] = 0;
484             do
485             {
486                 exp_list[--exp_next] = heap[1].exp;
487                 x = _mpoly_heap_pop(heap, &heap_len, N, cmpmask);
488                 do
489                 {
490                     *store++ = x->i;
491                     *store++ = x->j;
492                     if (x->i != -WORD(1))
493                         hind[x->i] |= WORD(1);
494 
495                     if (x->i == -WORD(1))
496                         _fmpz_mpoly_add_uiuiui_fmpz(acc_sm, poly2 + x->j);
497                     else
498                         _fmpz_mpoly_submul_uiuiui_fmpz(acc_sm, poly3[x->i], q_coeff[x->j]);
499                 } while ((x = x->next) != NULL);
500             } while (heap_len > 1 && mpoly_monomial_equal(heap[1].exp, exp, N));
501         }
502         else
503         {
504             /* general coeff arithmetic*/
505 
506             fmpz_zero(acc_lg);
507             do
508             {
509                 exp_list[--exp_next] = heap[1].exp;
510                 x = _mpoly_heap_pop(heap, &heap_len, N, cmpmask);
511                 do
512                 {
513                     *store++ = x->i;
514                     *store++ = x->j;
515                     if (x->i != -WORD(1))
516                         hind[x->i] |= WORD(1);
517 
518                     if (x->i == -WORD(1))
519                         fmpz_add(acc_lg, acc_lg, poly2 + x->j);
520                     else
521                         fmpz_submul(acc_lg, poly3 + x->i, q_coeff + x->j);
522                 } while ((x = x->next) != NULL);
523             } while (heap_len > 1 && mpoly_monomial_equal(heap[1].exp, exp, N));
524         }
525 
526         /* process nodes taken from the heap */
527         while (store > store_base)
528         {
529             j = *--store;
530             i = *--store;
531 
532             if (i == -WORD(1))
533             {
534                 /* take next dividend term */
535                 if (j + 1 < len2)
536                 {
537                     x = chain + 0;
538                     x->i = i;
539                     x->j = j + 1;
540                     x->next = NULL;
541                     mpoly_monomial_set(exp_list[exp_next], exp2 + x->j*N, N);
542                     exp_next += _mpoly_heap_insert(heap, exp_list[exp_next], x,
543                                              &next_loc, &heap_len, N, cmpmask);
544                 }
545             } else
546             {
547                 /* should we go up */
548                 if (  (i + 1 < len3)
549                    && (hind[i + 1] == 2*j + 1)
550                    )
551                 {
552                     x = chain + i + 1;
553                     x->i = i + 1;
554                     x->j = j;
555                     x->next = NULL;
556                     hind[x->i] = 2*(x->j + 1) + 0;
557 
558                     if (bits <= FLINT_BITS)
559                         mpoly_monomial_add(exp_list[exp_next], exp3 + x->i*N,
560                                                             q_exp + x->j*N, N);
561                     else
562                         mpoly_monomial_add_mp(exp_list[exp_next], exp3 + x->i*N,
563                                                             q_exp + x->j*N, N);
564 
565                     exp_next += _mpoly_heap_insert(heap, exp_list[exp_next], x,
566                                              &next_loc, &heap_len, N, cmpmask);
567                 }
568                 /* should we go up? */
569                 if (j + 1 == q_len)
570                 {
571                     s++;
572                 } else if (  ((hind[i] & 1) == 1)
573                           && ((i == 1) || (hind[i - 1] >= 2*(j + 2) + 1))
574                           )
575                 {
576                     x = chain + i;
577                     x->i = i;
578                     x->j = j + 1;
579                     x->next = NULL;
580                     hind[x->i] = 2*(x->j + 1) + 0;
581 
582                     if (bits <= FLINT_BITS)
583                         mpoly_monomial_add(exp_list[exp_next], exp3 + x->i*N,
584                                                               q_exp + x->j*N, N);
585                     else
586                         mpoly_monomial_add_mp(exp_list[exp_next], exp3 + x->i*N,
587                                                                  q_exp + x->j*N, N);
588 
589                     exp_next += _mpoly_heap_insert(heap, exp_list[exp_next], x,
590                                              &next_loc, &heap_len, N, cmpmask);
591                 }
592             }
593         }
594 
595         /* try to divide accumulated term by leading term */
596         if (!lt_divides)
597         {
598             continue;
599         }
600         if (small)
601         {
602             ulong d0, d1, ds = acc_sm[2];
603 
604             /* d1:d0 = abs(acc_sm[1:0]) assuming ds is sign extension of acc_sm[1] */
605             sub_ddmmss(d1, d0, acc_sm[1]^ds, acc_sm[0]^ds, ds, ds);
606 
607             if ((acc_sm[0] | acc_sm[1] | acc_sm[2]) == 0)
608             {
609                 continue;
610             }
611 
612             if (ds == FLINT_SIGN_EXT(acc_sm[1]) && d1 < lc_abs)
613             {
614                 ulong qq, rr, nhi, nlo;
615                 FLINT_ASSERT(0 < lc_norm && lc_norm < FLINT_BITS);
616                 nhi = (d1 << lc_norm) | (d0 >> (FLINT_BITS - lc_norm));
617                 nlo = d0 << lc_norm;
618                 udiv_qrnnd_preinv(qq, rr, nhi, nlo, lc_n, lc_i);
619                 (void) rr;
620 
621                 if (qq == 0)
622                     continue;
623 
624                 if (qq <= COEFF_MAX)
625                 {
626                     _fmpz_demote(q_coeff + q_len);
627                     q_coeff[q_len] = qq;
628                     if (ds != lc_sign)
629                         q_coeff[q_len] = -q_coeff[q_len];
630                 }
631                 else
632                 {
633                     small = 0;
634                     fmpz_set_ui(q_coeff + q_len, qq);
635                     if (ds != lc_sign)
636                         fmpz_neg(q_coeff + q_len, q_coeff + q_len);
637                 }
638             }
639             else
640             {
641                 small = 0;
642                 fmpz_set_signed_uiuiui(acc_lg, acc_sm[2], acc_sm[1], acc_sm[0]);
643                 goto large_lt_divides;
644             }
645         }
646         else
647         {
648             if (fmpz_is_zero(acc_lg))
649             {
650                 continue;
651             }
652 large_lt_divides:
653             fmpz_fdiv_qr(q_coeff + q_len, r, acc_lg, poly3 + 0);
654             if (fmpz_is_zero(q_coeff + q_len))
655             {
656                 continue;
657             }
658         }
659 
660         /* put newly generated quotient term back into the heap if neccesary */
661         if (s > 1)
662         {
663             i = 1;
664             x = chain + i;
665             x->i = i;
666             x->j = q_len;
667             x->next = NULL;
668             hind[x->i] = 2*(x->j + 1) + 0;
669 
670             if (bits <= FLINT_BITS)
671                 mpoly_monomial_add(exp_list[exp_next], exp3 + x->i*N,
672                                                       q_exp + x->j*N, N);
673             else
674                 mpoly_monomial_add_mp(exp_list[exp_next], exp3 + x->i*N,
675                                                          q_exp + x->j*N, N);
676 
677             exp_next += _mpoly_heap_insert(heap, exp_list[exp_next], x,
678                                              &next_loc, &heap_len, N, cmpmask);
679         }
680         s = 1;
681         q_len++;
682     }
683 
684 
685 cleanup:
686 
687     fmpz_clear(acc_lg);
688     fmpz_clear(r);
689 
690     (*polyq) = q_coeff;
691     (*expq) = q_exp;
692 
693     TMP_END;
694 
695     /* return quotient poly length */
696     return q_len;
697 
698 exp_overflow:
699     for (i = 0; i < q_len; i++)
700         _fmpz_demote(q_coeff + i);
701     q_len = -WORD(1);
702     goto cleanup;
703 
704 }
705 
fmpz_mpoly_div_monagan_pearce(fmpz_mpoly_t q,const fmpz_mpoly_t poly2,const fmpz_mpoly_t poly3,const fmpz_mpoly_ctx_t ctx)706 void fmpz_mpoly_div_monagan_pearce(fmpz_mpoly_t q, const fmpz_mpoly_t poly2,
707                           const fmpz_mpoly_t poly3, const fmpz_mpoly_ctx_t ctx)
708 {
709     slong exp_bits, N, lenq = 0;
710     ulong * exp2 = poly2->exps, * exp3 = poly3->exps;
711     ulong * cmpmask;
712     int free2 = 0, free3 = 0;
713     fmpz_mpoly_t temp1;
714     fmpz_mpoly_struct * tq;
715     TMP_INIT;
716 
717    /* check divisor is nonzero */
718    if (poly3->length == 0)
719       flint_throw(FLINT_DIVZERO, "Divide by zero in fmpz_mpoly_div_monagan_pearce");
720 
721    /* dividend zero, write out quotient */
722    if (poly2->length == 0)
723    {
724       fmpz_mpoly_zero(q, ctx);
725       return;
726    }
727 
728    TMP_START;
729 
730    /* maximum bits in quotient exps and inputs is max for poly2 and poly3 */
731    exp_bits = FLINT_MAX(poly2->bits, poly3->bits);
732 
733     N = mpoly_words_per_exp(exp_bits, ctx->minfo);
734     cmpmask = (ulong*) TMP_ALLOC(N*sizeof(ulong));
735     mpoly_get_cmpmask(cmpmask, N, exp_bits, ctx->minfo);
736 
737    /* ensure input exponents packed to same size as output exponents */
738    if (exp_bits > poly2->bits)
739    {
740       free2 = 1;
741       exp2 = (ulong *) flint_malloc(N*poly2->length*sizeof(ulong));
742       mpoly_repack_monomials(exp2, exp_bits, poly2->exps, poly2->bits,
743                                                     poly2->length, ctx->minfo);
744    }
745 
746    if (exp_bits > poly3->bits)
747    {
748       free3 = 1;
749       exp3 = (ulong *) flint_malloc(N*poly3->length*sizeof(ulong));
750       mpoly_repack_monomials(exp3, exp_bits, poly3->exps, poly3->bits,
751                                                     poly3->length, ctx->minfo);
752    }
753 
754    /* check divisor leading monomial is at most that of the dividend */
755    if (mpoly_monomial_lt(exp2, exp3, N, cmpmask))
756    {
757       fmpz_mpoly_zero(q, ctx);
758       goto cleanup3;
759    }
760 
761    /* take care of aliasing */
762    if (q == poly2 || q == poly3)
763    {
764       fmpz_mpoly_init2(temp1, poly2->length/poly3->length + 1, ctx);
765       fmpz_mpoly_fit_bits(temp1, exp_bits, ctx);
766       temp1->bits = exp_bits;
767       tq = temp1;
768    } else
769    {
770       fmpz_mpoly_fit_length(q, poly2->length/poly3->length + 1, ctx);
771       fmpz_mpoly_fit_bits(q, exp_bits, ctx);
772       q->bits = exp_bits;
773       tq = q;
774    }
775 
776    /* do division with remainder */
777    while ((lenq = _fmpz_mpoly_div_monagan_pearce(&tq->coeffs, &tq->exps,
778                          &tq->alloc, poly2->coeffs, exp2, poly2->length,
779                                      poly3->coeffs, exp3, poly3->length,
780                                       exp_bits, N, cmpmask)) == -WORD(1)  )
781    {
782       ulong * old_exp2 = exp2, * old_exp3 = exp3;
783       slong old_exp_bits = exp_bits;
784 
785       exp_bits = mpoly_fix_bits(exp_bits + 1, ctx->minfo);
786 
787       N = mpoly_words_per_exp(exp_bits, ctx->minfo);
788       cmpmask = (ulong*) TMP_ALLOC(N*sizeof(ulong));
789       mpoly_get_cmpmask(cmpmask, N, exp_bits, ctx->minfo);
790 
791       exp2 = (ulong *) flint_malloc(N*poly2->length*sizeof(ulong));
792       mpoly_repack_monomials(exp2, exp_bits, old_exp2, old_exp_bits,
793                                                     poly2->length, ctx->minfo);
794 
795       exp3 = (ulong *) flint_malloc(N*poly3->length*sizeof(ulong));
796       mpoly_repack_monomials(exp3, exp_bits, old_exp3, old_exp_bits,
797                                                     poly3->length, ctx->minfo);
798 
799       if (free2)
800          flint_free(old_exp2);
801 
802       if (free3)
803          flint_free(old_exp3);
804 
805       free2 = free3 = 1;
806 
807       fmpz_mpoly_fit_bits(tq, exp_bits, ctx);
808       tq->bits = exp_bits;
809    }
810 
811    /* take care of aliasing */
812    if (q == poly2 || q == poly3)
813    {
814       fmpz_mpoly_swap(temp1, q, ctx);
815       fmpz_mpoly_clear(temp1, ctx);
816    }
817 
818    _fmpz_mpoly_set_length(q, lenq, ctx);
819 
820 cleanup3:
821 
822    if (free2)
823       flint_free(exp2);
824 
825    if (free3)
826       flint_free(exp3);
827 
828     TMP_END;
829 }
830