1 /*
2     Copyright (C) 2017 Daniel Schultz
3 
4     This file is part of FLINT.
5 
6     FLINT is free software: you can redistribute it and/or modify it under
7     the terms of the GNU Lesser General Public License (LGPL) as published
8     by the Free Software Foundation; either version 2.1 of the License, or
9     (at your option) any later version.  See <http://www.gnu.org/licenses/>.
10 */
11 
12 #include <gmp.h>
13 #include <stdlib.h>
14 #include "flint.h"
15 #include "fmpz.h"
16 #include "fmpz_mpoly.h"
17 #include "longlong.h"
18 
19 /*
20    Set polyq, polyr to the quotient and remainder of poly2 by poly3 (with
21    remainder coeffs reduced modulo the leading coeff of poly3), and return
22    the length of the quotient. This version of the function assumes the
23    exponent vectors all fit in a single word. The exponent vectors are
24    assumed to have fields with the given number of bits. Assumes input polys
25    are nonzero. Implements "Polynomial division using dynamic arrays, heaps
26    and packed exponents" by Michael Monagan and Roman Pearce [1], except that
27    we use a heap with smallest exponent at head. Note that if a < b then
28    (n - b) < (n - b) where n is the maximum value a and b can take. The word
29    "maxn" is set to an exponent vector whose fields are all set to such a
30    value n. This allows division from left to right with a heap with smallest
31    exponent at the head. Quotient and remainder polys are written in reverse
32    order.
33    [1] http://www.cecm.sfu.ca/~rpearcea/sdmp/sdmp_paper.pdf
34 */
35 
_fmpz_mpoly_divrem_monagan_pearce1(slong * lenr,fmpz ** polyq,ulong ** expq,slong * allocq,fmpz ** polyr,ulong ** expr,slong * allocr,const fmpz * poly2,const ulong * exp2,slong len2,const fmpz * poly3,const ulong * exp3,slong len3,slong bits,ulong maskhi)36 slong _fmpz_mpoly_divrem_monagan_pearce1(slong * lenr,
37    fmpz ** polyq, ulong ** expq, slong * allocq, fmpz ** polyr,
38   ulong ** expr, slong * allocr, const fmpz * poly2, const ulong * exp2,
39             slong len2, const fmpz * poly3, const ulong * exp3, slong len3,
40                                                       slong bits, ulong maskhi)
41 {
42     slong i, j, q_len, r_len, s;
43     slong next_loc, heap_len = 2;
44     mpoly_heap1_s * heap;
45     mpoly_heap_t * chain;
46     slong * store, * store_base;
47     mpoly_heap_t * x;
48     fmpz * q_coeff = *polyq;
49     fmpz * r_coeff = *polyr;
50     ulong * q_exp = *expq;
51     ulong * r_exp = *expr;
52     slong * hind;
53     ulong mask, exp;
54     fmpz_t r, acc_lg;
55     ulong acc_sm[3];
56     int lt_divides, small;
57     slong bits2, bits3;
58     ulong lc_norm = 0, lc_abs = 0, lc_sign = 0, lc_n = 0, lc_i = 0;
59     TMP_INIT;
60 
61     TMP_START;
62 
63     fmpz_init(acc_lg);
64     fmpz_init(r);
65 
66     /* whether intermediate computations q - a*b will fit in three words */
67     bits2 = _fmpz_vec_max_bits(poly2, len2);
68     bits3 = _fmpz_vec_max_bits(poly3, len3);
69     /* allow one bit for sign, one bit for subtraction */
70     small = FLINT_ABS(bits2) <= (FLINT_ABS(bits3) + FLINT_BIT_COUNT(len3) + FLINT_BITS - 2)
71           && FLINT_ABS(bits3) <= FLINT_BITS - 2;
72 
73     /* alloc array of heap nodes which can be chained together */
74     next_loc = len3 + 4;   /* something bigger than heap can ever be */
75     heap = (mpoly_heap1_s *) TMP_ALLOC((len3 + 1)*sizeof(mpoly_heap1_s));
76     chain = (mpoly_heap_t *) TMP_ALLOC(len3*sizeof(mpoly_heap_t));
77     store = store_base = (slong *) TMP_ALLOC(2*len3*sizeof(mpoly_heap_t *));
78 
79     /* space for flagged heap indicies */
80     hind = (slong *) TMP_ALLOC(len3*sizeof(slong));
81     for (i = 0; i < len3; i++)
82         hind[i] = 1;
83 
84     /* mask with high bit set in each field of exponent vector */
85     mask = 0;
86     for (i = 0; i < FLINT_BITS/bits; i++)
87         mask = (mask << bits) + (UWORD(1) << (bits - 1));
88 
89     /* quotient and remainder poly indices start at -1 */
90     q_len = WORD(0);
91     r_len = WORD(0);
92 
93     /* s is the number of terms * (latest quotient) we should put into heap */
94     s = len3;
95 
96     /* insert (-1, 0, exp2[0]) into heap */
97     x = chain + 0;
98     x->i = -WORD(1);
99     x->j = 0;
100     x->next = NULL;
101     HEAP_ASSIGN(heap[1], exp2[0], x);
102 
103     /* precompute leading cofficient info in "small" case */
104     if (small)
105     {
106         lc_abs = FLINT_ABS(poly3[0]);
107         lc_sign = FLINT_SIGN_EXT(poly3[0]);
108         count_leading_zeros(lc_norm, lc_abs);
109         lc_n = lc_abs << lc_norm;
110         invert_limb(lc_i, lc_n);
111     }
112 
113     while (heap_len > 1)
114     {
115         exp = heap[1].exp;
116 
117         if (mpoly_monomial_overflows1(exp, mask))
118             goto exp_overflow;
119 
120         _fmpz_mpoly_fit_length(&q_coeff, &q_exp, allocq, q_len + 1, 1);
121         lt_divides = mpoly_monomial_divides1(q_exp + q_len, exp, exp3[0], mask);
122 
123         /* take nodes from heap with exponent matching exp */
124         if (small)
125         {
126             acc_sm[0] = acc_sm[1] = acc_sm[2] = 0;
127             do
128             {
129                 x = _mpoly_heap_pop1(heap, &heap_len, maskhi);
130                 do
131                 {
132                     *store++ = x->i;
133                     *store++ = x->j;
134                     if (x->i != -WORD(1))
135                         hind[x->i] |= WORD(1);
136 
137                     if (x->i == -WORD(1))
138                         _fmpz_mpoly_add_uiuiui_fmpz(acc_sm, poly2 + x->j);
139                     else
140                         _fmpz_mpoly_submul_uiuiui_fmpz(acc_sm, poly3[x->i], q_coeff[x->j]);
141                 } while ((x = x->next) != NULL);
142             } while (heap_len > 1 && heap[1].exp == exp);
143         } else
144         {
145             fmpz_zero(acc_lg);
146             do
147             {
148                 x = _mpoly_heap_pop1(heap, &heap_len, maskhi);
149                 do
150                 {
151                     *store++ = x->i;
152                     *store++ = x->j;
153                     if (x->i != -WORD(1))
154                         hind[x->i] |= WORD(1);
155 
156                     if (x->i == -WORD(1))
157                         fmpz_add(acc_lg, acc_lg, poly2 + x->j);
158                     else
159                         fmpz_submul(acc_lg, poly3 + x->i, q_coeff + x->j);
160                 } while ((x = x->next) != NULL);
161             } while (heap_len > 1 && heap[1].exp == exp);
162         }
163 
164         /* process nodes taken from the heap */
165         while (store > store_base)
166         {
167             j = *--store;
168             i = *--store;
169 
170             if (i == -WORD(1))
171             {
172                 /* take next dividend term */
173                 if (j + 1 < len2)
174                 {
175                     x = chain + 0;
176                     x->i = i;
177                     x->j = j + 1;
178                     x->next = NULL;
179                     _mpoly_heap_insert1(heap, exp2[x->j], x,
180                                                  &next_loc, &heap_len, maskhi);
181                 }
182             } else
183             {
184                 /* should we go right? */
185                 if (  (i + 1 < len3)
186                    && (hind[i + 1] == 2*j + 1)
187                    )
188                 {
189                     x = chain + i + 1;
190                     x->i = i + 1;
191                     x->j = j;
192                     x->next = NULL;
193                     hind[x->i] = 2*(x->j + 1) + 0;
194                     _mpoly_heap_insert1(heap, exp3[x->i] + q_exp[x->j], x,
195                                                  &next_loc, &heap_len, maskhi);
196                 }
197                 /* should we go up? */
198                 if (j + 1 == q_len)
199                 {
200                     s++;
201                 } else if (  ((hind[i] & 1) == 1)
202                           && ((i == 1) || (hind[i - 1] >= 2*(j + 2) + 1))
203                           )
204                 {
205                     x = chain + i;
206                     x->i = i;
207                     x->j = j + 1;
208                     x->next = NULL;
209                     hind[x->i] = 2*(x->j + 1) + 0;
210                     _mpoly_heap_insert1(heap, exp3[x->i] + q_exp[x->j], x,
211                                                  &next_loc, &heap_len, maskhi);
212                 }
213             }
214         }
215 
216         /* try to divide accumulated term by leading term */
217         if (small)
218         {
219             ulong d0, d1, ds = acc_sm[2];
220 
221             /* d1:d0 = abs(acc_sm[1:0]) assuming ds is sign extension of acc_sm[1] */
222             sub_ddmmss(d1, d0, acc_sm[1]^ds, acc_sm[0]^ds, ds, ds);
223 
224             if ((acc_sm[0] | acc_sm[1] | acc_sm[2]) == 0)
225             {
226                 continue;
227             }
228             if (!lt_divides)
229             {
230                 _fmpz_mpoly_fit_length(&r_coeff, &r_exp, allocr, r_len + 1, 1);
231                 fmpz_set_signed_uiuiui(r_coeff + r_len, acc_sm[2], acc_sm[1], acc_sm[0]);
232                 r_exp[r_len] = exp;
233                 r_len++;
234                 continue;
235             }
236             if (ds == FLINT_SIGN_EXT(acc_sm[1]) && d1 < lc_abs)
237             {
238                 ulong qq, rr, nhi, nlo;
239                 FLINT_ASSERT(0 < lc_norm && lc_norm < FLINT_BITS);
240                 nhi = (d1 << lc_norm) | (d0 >> (FLINT_BITS - lc_norm));
241                 nlo = d0 << lc_norm;
242                 udiv_qrnnd_preinv(qq, rr, nhi, nlo, lc_n, lc_i);
243                 rr = rr >> lc_norm;
244                 if (rr != 0)
245                 {
246                     _fmpz_mpoly_fit_length(&r_coeff, &r_exp, allocr, r_len + 1, 1);
247                     if (ds == 0)
248                         fmpz_set_si(r_coeff + r_len, rr);
249                     else
250                         fmpz_neg_ui(r_coeff + r_len, rr);
251                     r_exp[r_len] = exp;
252                     r_len++;
253                 }
254 
255                 if (qq == 0)
256                     continue;
257 
258                 if (qq <= COEFF_MAX)
259                 {
260                     _fmpz_demote(q_coeff + q_len);
261                     q_coeff[q_len] = qq;
262                     if (ds != lc_sign)
263                         q_coeff[q_len] = -q_coeff[q_len];
264                 }
265                 else
266                 {
267                     small = 0;
268                     fmpz_set_ui(q_coeff + q_len, qq);
269                     if (ds != lc_sign)
270                         fmpz_neg(q_coeff + q_len, q_coeff + q_len);
271                 }
272             }
273             else
274             {
275                 small = 0;
276                 fmpz_set_signed_uiuiui(acc_lg, acc_sm[2], acc_sm[1], acc_sm[0]);
277                 goto large_lt_divides;
278             }
279         }
280         else
281         {
282             if (fmpz_is_zero(acc_lg))
283             {
284                 continue;
285             }
286             if (!lt_divides)
287             {
288                 _fmpz_mpoly_fit_length(&r_coeff, &r_exp, allocr, r_len + 1, 1);
289                 fmpz_set(r_coeff + r_len, acc_lg);
290                 r_exp[r_len] = exp;
291                 r_len++;
292                 continue;
293             }
294 large_lt_divides:
295             fmpz_fdiv_qr(q_coeff + q_len, r, acc_lg, poly3 + 0);
296             if (!fmpz_is_zero(r))
297             {
298                 _fmpz_mpoly_fit_length(&r_coeff, &r_exp, allocr, r_len + 1, 1);
299                 fmpz_set(r_coeff + r_len, r);
300                 r_exp[r_len] = exp;
301                 r_len++;
302             }
303             if (fmpz_is_zero(q_coeff + q_len))
304             {
305                 continue;
306             }
307         }
308 
309         /* put newly generated quotient term back into the heap if neccesary */
310         if (s > 1)
311         {
312             i = 1;
313             x = chain + i;
314             x->i = i;
315             x->j = q_len;
316             x->next = NULL;
317             hind[x->i] = 2*(x->j + 1) + 0;
318             _mpoly_heap_insert1(heap, exp3[x->i] + q_exp[x->j], x,
319                                                  &next_loc, &heap_len, maskhi);
320         }
321         s = 1;
322         q_len++;
323     }
324 
325 cleanup:
326 
327     fmpz_clear(acc_lg);
328     fmpz_clear(r);
329 
330    (*polyq) = q_coeff;
331    (*expq) = q_exp;
332    (*polyr) = r_coeff;
333    (*expr) = r_exp;
334 
335    /* set remainder poly length */
336    (*lenr) = r_len;
337 
338     TMP_END;
339 
340     return q_len;
341 
342 exp_overflow:
343     for (i = 0; i < q_len; i++)
344         _fmpz_demote(q_coeff + i);
345     for (i = 0; i < r_len; i++)
346         _fmpz_demote(r_coeff + i);
347     q_len = 0;
348     r_len = 0;
349     goto cleanup;
350 }
351 
352 
353 
354 
_fmpz_mpoly_divrem_monagan_pearce(slong * lenr,fmpz ** polyq,ulong ** expq,slong * allocq,fmpz ** polyr,ulong ** expr,slong * allocr,const fmpz * poly2,const ulong * exp2,slong len2,const fmpz * poly3,const ulong * exp3,slong len3,slong bits,slong N,const ulong * cmpmask)355 slong _fmpz_mpoly_divrem_monagan_pearce(slong * lenr,
356   fmpz ** polyq, ulong ** expq, slong * allocq, fmpz ** polyr,
357                   ulong ** expr, slong * allocr, const fmpz * poly2,
358    const ulong * exp2, slong len2, const fmpz * poly3, const ulong * exp3,
359                    slong len3, slong bits, slong N, const ulong * cmpmask)
360 {
361     slong i, j, q_len, r_len, s;
362     slong next_loc;
363     slong heap_len = 2; /* heap zero index unused */
364     mpoly_heap_s * heap;
365     mpoly_heap_t * chain;
366     slong * store, * store_base;
367     mpoly_heap_t * x;
368     fmpz * q_coeff = *polyq;
369     fmpz * r_coeff = *polyr;
370     ulong * q_exp = *expq;
371     ulong * r_exp = *expr;
372     ulong * exp, * exps;
373     ulong ** exp_list;
374     slong exp_next;
375     ulong mask;
376     fmpz_t r, acc_lg;
377     ulong acc_sm[3];
378     slong * hind;
379     int lt_divides, small;
380     slong bits2, bits3;
381     ulong lc_norm = 0, lc_abs = 0, lc_sign = 0, lc_n = 0, lc_i = 0;
382     TMP_INIT;
383 
384     if (N == 1)
385         return _fmpz_mpoly_divrem_monagan_pearce1(lenr, polyq, expq, allocq,
386                                      polyr, expr, allocr, poly2, exp2, len2,
387                                           poly3, exp3, len3, bits, cmpmask[0]);
388 
389     TMP_START;
390 
391     fmpz_init(acc_lg);
392     fmpz_init(r);
393 
394     /* whether intermediate computations q - a*b will fit in three words */
395     bits2 = _fmpz_vec_max_bits(poly2, len2);
396     bits3 = _fmpz_vec_max_bits(poly3, len3);
397     /* allow one bit for sign, one bit for subtraction */
398     small = FLINT_ABS(bits2) <= (FLINT_ABS(bits3) + FLINT_BIT_COUNT(len3) + FLINT_BITS - 2)
399          && FLINT_ABS(bits3) <= FLINT_BITS - 2;
400 
401 
402     /* alloc array of heap nodes which can be chained together */
403     next_loc = len3 + 4;   /* something bigger than heap can ever be */
404     heap = (mpoly_heap_s *) TMP_ALLOC((len3 + 1)*sizeof(mpoly_heap_s));
405     chain = (mpoly_heap_t *) TMP_ALLOC(len3*sizeof(mpoly_heap_t));
406     store = store_base = (slong *) TMP_ALLOC(2*len3*sizeof(mpoly_heap_t *));
407 
408     /* array of exponent vectors, each of "N" words */
409     exps = (ulong *) TMP_ALLOC(len3*N*sizeof(ulong));
410     /* list of pointers to available exponent vectors */
411     exp_list = (ulong **) TMP_ALLOC(len3*sizeof(ulong *));
412     /* space to save copy of current exponent vector */
413     exp = (ulong *) TMP_ALLOC(N*sizeof(ulong));
414     /* set up list of available exponent vectors */
415     exp_next = 0;
416     for (i = 0; i < len3; i++)
417         exp_list[i] = exps + i*N;
418 
419     /* space for flagged heap indicies */
420     hind = (slong *) TMP_ALLOC(len3*sizeof(slong));
421     for (i = 0; i < len3; i++)
422         hind[i] = 1;
423 
424     /* mask with high bit set in each word of each field of exponent vector */
425     mask = 0;
426     for (i = 0; i < FLINT_BITS/bits; i++)
427         mask = (mask << bits) + (UWORD(1) << (bits - 1));
428 
429     /* quotient and remainder poly indices start at -1 */
430     q_len = WORD(0);
431     r_len = WORD(0);
432 
433     /* s is the number of terms * (latest quotient) we should put into heap */
434     s = len3;
435 
436     /* insert (-1, 0, exp2[0]) into heap */
437     x = chain + 0;
438     x->i = -WORD(1);
439     x->j = 0;
440     x->next = NULL;
441     heap[1].next = x;
442     heap[1].exp = exp_list[exp_next++];
443     mpoly_monomial_set(heap[1].exp, exp2, N);
444 
445     /* precompute leading cofficient info in "small" case */
446     if (small)
447     {
448         lc_abs = FLINT_ABS(poly3[0]);
449         lc_sign = FLINT_SIGN_EXT(poly3[0]);
450         count_leading_zeros(lc_norm, lc_abs);
451         lc_n = lc_abs << lc_norm;
452         invert_limb(lc_i, lc_n);
453     }
454 
455     while (heap_len > 1)
456     {
457         mpoly_monomial_set(exp, heap[1].exp, N);
458 
459         if (bits <= FLINT_BITS)
460         {
461             if (mpoly_monomial_overflows(exp, N, mask))
462                 goto exp_overflow2;
463         }
464         else
465         {
466             if (mpoly_monomial_overflows_mp(exp, N, bits))
467                 goto exp_overflow2;
468         }
469 
470         _fmpz_mpoly_fit_length(&q_coeff, &q_exp, allocq, q_len + 1, N);
471 
472         if (bits <= FLINT_BITS)
473             lt_divides = mpoly_monomial_divides(q_exp + q_len*N, exp, exp3, N, mask);
474         else
475             lt_divides = mpoly_monomial_divides_mp(q_exp + q_len*N, exp, exp3, N, bits);
476 
477         /* take nodes from heap with exponent matching exp */
478         if (small)
479         {
480             acc_sm[0] = acc_sm[1] = acc_sm[2] = 0;
481             do
482             {
483                 exp_list[--exp_next] = heap[1].exp;
484                 x = _mpoly_heap_pop(heap, &heap_len, N, cmpmask);
485                 do
486                 {
487                     *store++ = x->i;
488                     *store++ = x->j;
489                     if (x->i != -WORD(1))
490                         hind[x->i] |= WORD(1);
491 
492                     if (x->i == -WORD(1))
493                         _fmpz_mpoly_add_uiuiui_fmpz(acc_sm, poly2 + x->j);
494                     else
495                         _fmpz_mpoly_submul_uiuiui_fmpz(acc_sm, poly3[x->i], q_coeff[x->j]);
496                 } while ((x = x->next) != NULL);
497             } while (heap_len > 1 && mpoly_monomial_equal(heap[1].exp, exp, N));
498         } else
499         {
500             fmpz_zero(acc_lg);
501             do
502             {
503                 exp_list[--exp_next] = heap[1].exp;
504                 x = _mpoly_heap_pop(heap, &heap_len, N, cmpmask);
505                 do
506                 {
507                     *store++ = x->i;
508                     *store++ = x->j;
509                     if (x->i != -WORD(1))
510                         hind[x->i] |= WORD(1);
511 
512                     if (x->i == -WORD(1))
513                         fmpz_add(acc_lg, acc_lg, poly2 + x->j);
514                     else
515                         fmpz_submul(acc_lg, poly3 + x->i, q_coeff + x->j);
516                 } while ((x = x->next) != NULL);
517             } while (heap_len > 1 && mpoly_monomial_equal(heap[1].exp, exp, N));
518         }
519 
520         /* process nodes taken from the heap */
521         while (store > store_base)
522         {
523             j = *--store;
524             i = *--store;
525 
526             if (i == -WORD(1))
527             {
528                 /* take next dividend term */
529                 if (j + 1 < len2)
530                 {
531                     x = chain + 0;
532                     x->i = i;
533                     x->j = j + 1;
534                     x->next = NULL;
535                     mpoly_monomial_set(exp_list[exp_next], exp2 + x->j*N, N);
536                     exp_next += _mpoly_heap_insert(heap, exp_list[exp_next], x,
537                                              &next_loc, &heap_len, N, cmpmask);
538                 }
539             } else
540             {
541                 /* should we go right? */
542                 if (  (i + 1 < len3)
543                    && (hind[i + 1] == 2*j + 1)
544                    )
545                 {
546                     x = chain + i + 1;
547                     x->i = i + 1;
548                     x->j = j;
549                     x->next = NULL;
550                     hind[x->i] = 2*(x->j + 1) + 0;
551 
552                     if (bits <= FLINT_BITS)
553                         mpoly_monomial_add(exp_list[exp_next], exp3 + x->i*N,
554                                                             q_exp + x->j*N, N);
555                     else
556                         mpoly_monomial_add_mp(exp_list[exp_next], exp3 + x->i*N,
557                                                             q_exp + x->j*N, N);
558 
559                     exp_next += _mpoly_heap_insert(heap, exp_list[exp_next], x,
560                                              &next_loc, &heap_len, N, cmpmask);
561                 }
562                 /* should we go up? */
563                 if (j + 1 == q_len)
564                 {
565                     s++;
566                 } else if (  ((hind[i] & 1) == 1)
567                           && ((i == 1) || (hind[i - 1] >= 2*(j + 2) + 1))
568                           )
569                 {
570                     x = chain + i;
571                     x->i = i;
572                     x->j = j + 1;
573                     x->next = NULL;
574                     hind[x->i] = 2*(x->j + 1) + 0;
575 
576                     if (bits <= FLINT_BITS)
577                         mpoly_monomial_add(exp_list[exp_next], exp3 + x->i*N,
578                                                            q_exp   + x->j*N, N);
579                     else
580                         mpoly_monomial_add_mp(exp_list[exp_next], exp3 + x->i*N,
581                                                            q_exp   + x->j*N, N);
582 
583                     exp_next += _mpoly_heap_insert(heap, exp_list[exp_next], x,
584                                              &next_loc, &heap_len, N, cmpmask);
585                 }
586             }
587         }
588 
589         /* try to divide accumulated term by leading term */
590         if (small)
591         {
592             ulong d0, d1, ds = acc_sm[2];
593 
594             /* d1:d0 = abs(acc_sm[1:0]) assuming ds is sign extension of acc_sm[1] */
595             sub_ddmmss(d1, d0, acc_sm[1]^ds, acc_sm[0]^ds, ds, ds);
596 
597             if ((acc_sm[0] | acc_sm[1] | acc_sm[2]) == 0)
598             {
599                 continue;
600             }
601             if (!lt_divides)
602             {
603                 _fmpz_mpoly_fit_length(&r_coeff, &r_exp, allocr, r_len + 1, N);
604                 fmpz_set_signed_uiuiui(r_coeff + r_len, acc_sm[2], acc_sm[1], acc_sm[0]);
605                 mpoly_monomial_set(r_exp + r_len*N, exp, N);
606                 r_len++;
607                 continue;
608             }
609             if (ds == FLINT_SIGN_EXT(acc_sm[1]) && d1 < lc_abs)
610             {
611                 ulong qq, rr, nhi, nlo;
612                 FLINT_ASSERT(0 < lc_norm && lc_norm < FLINT_BITS);
613                 nhi = (d1 << lc_norm) | (d0 >> (FLINT_BITS - lc_norm));
614                 nlo = d0 << lc_norm;
615                 udiv_qrnnd_preinv(qq, rr, nhi, nlo, lc_n, lc_i);
616                 rr = rr >> lc_norm;
617                 if (rr != 0)
618                 {
619                     _fmpz_mpoly_fit_length(&r_coeff, &r_exp, allocr, r_len + 1, N);
620                     if (ds == 0)
621                         fmpz_set_ui(r_coeff + r_len, rr);
622                     else
623                         fmpz_neg_ui(r_coeff + r_len, rr);
624                     mpoly_monomial_set(r_exp + r_len*N, exp, N);
625                     r_len++;
626                 }
627 
628                 if (qq == 0)
629                     continue;
630 
631                 if (qq <= COEFF_MAX)
632                 {
633                     _fmpz_demote(q_coeff + q_len);
634                     q_coeff[q_len] = qq;
635                     if (ds != lc_sign)
636                         q_coeff[q_len] = -q_coeff[q_len];
637                 }
638                 else
639                 {
640                     small = 0;
641                     fmpz_set_ui(q_coeff + q_len, qq);
642                     if (ds != lc_sign)
643                         fmpz_neg(q_coeff + q_len, q_coeff + q_len);
644                 }
645             }
646             else
647             {
648                 small = 0;
649                 fmpz_set_signed_uiuiui(acc_lg, acc_sm[2], acc_sm[1], acc_sm[0]);
650                 goto large_lt_divides;
651             }
652         }
653         else
654         {
655             if (fmpz_is_zero(acc_lg))
656             {
657                 continue;
658             }
659             if (!lt_divides)
660             {
661                 _fmpz_mpoly_fit_length(&r_coeff, &r_exp, allocr, r_len + 1, N);
662                 fmpz_set(r_coeff + r_len, acc_lg);
663                 mpoly_monomial_set(r_exp + r_len*N, exp, N);
664                 r_len++;
665                 continue;
666             }
667 large_lt_divides:
668             fmpz_fdiv_qr(q_coeff + q_len, r, acc_lg, poly3 + 0);
669             if (!fmpz_is_zero(r))
670             {
671                 _fmpz_mpoly_fit_length(&r_coeff, &r_exp, allocr, r_len + 1, N);
672                 fmpz_set(r_coeff + r_len, r);
673                 mpoly_monomial_set(r_exp + r_len*N, exp, N);
674                 r_len++;
675             }
676             if (fmpz_is_zero(q_coeff + q_len))
677             {
678                 continue;
679             }
680         }
681 
682         /* put newly generated quotient term back into the heap if neccesary */
683         if (s > 1)
684         {
685             i = 1;
686             x = chain + i;
687             x->i = i;
688             x->j = q_len;
689             x->next = NULL;
690             hind[x->i] = 2*(x->j + 1) + 0;
691 
692             if (bits <= FLINT_BITS)
693                 mpoly_monomial_add(exp_list[exp_next], exp3 + x->i*N,
694                                                       q_exp + x->j*N, N);
695             else
696                 mpoly_monomial_add_mp(exp_list[exp_next], exp3 + x->i*N,
697                                                          q_exp + x->j*N, N);
698 
699             exp_next += _mpoly_heap_insert(heap, exp_list[exp_next], x,
700                                              &next_loc, &heap_len, N, cmpmask);
701         }
702         s = 1;
703         q_len++;
704     }
705 
706 cleanup2:
707 
708     fmpz_clear(acc_lg);
709     fmpz_clear(r);
710 
711     (*polyq) = q_coeff;
712     (*expq) = q_exp;
713     (*polyr) = r_coeff;
714     (*expr) = r_exp;
715 
716     (*lenr) = r_len;
717 
718     TMP_END;
719 
720     return q_len;
721 
722 exp_overflow2:
723     for (i = 0; i < q_len; i++)
724         _fmpz_demote(q_coeff + i);
725     for (i = 0; i < r_len; i++)
726         _fmpz_demote(r_coeff + i);
727     q_len = 0;
728     r_len = 0;
729     goto cleanup2;
730 }
731 
fmpz_mpoly_divrem_monagan_pearce(fmpz_mpoly_t q,fmpz_mpoly_t r,const fmpz_mpoly_t poly2,const fmpz_mpoly_t poly3,const fmpz_mpoly_ctx_t ctx)732 void fmpz_mpoly_divrem_monagan_pearce(fmpz_mpoly_t q, fmpz_mpoly_t r,
733                   const fmpz_mpoly_t poly2, const fmpz_mpoly_t poly3,
734                                                     const fmpz_mpoly_ctx_t ctx)
735 {
736    slong exp_bits, N, lenq = 0, lenr = 0;
737    ulong * exp2 = poly2->exps, * exp3 = poly3->exps;
738    ulong * cmpmask;
739    int free2 = 0, free3 = 0;
740    fmpz_mpoly_t temp1, temp2;
741    fmpz_mpoly_struct * tq, * tr;
742     TMP_INIT;
743 
744    /* check divisor is nonzero */
745    if (poly3->length == 0)
746       flint_throw(FLINT_DIVZERO, "Divide by zero in fmpz_mpoly_divrem_monagan_pearce");
747 
748    /* dividend zero, write out quotient and remainder */
749    if (poly2->length == 0)
750    {
751       fmpz_mpoly_zero(q, ctx);
752       fmpz_mpoly_zero(r, ctx);
753 
754       return;
755    }
756 
757     TMP_START;
758 
759     exp_bits = FLINT_MAX(poly2->bits, poly3->bits);
760     exp_bits = mpoly_fix_bits(exp_bits, ctx->minfo);
761 
762     N = mpoly_words_per_exp(exp_bits, ctx->minfo);
763     cmpmask = (ulong*) TMP_ALLOC(N*sizeof(ulong));
764     mpoly_get_cmpmask(cmpmask, N, exp_bits, ctx->minfo);
765 
766    /* ensure input exponents packed to same size as output exponents */
767    if (exp_bits > poly2->bits)
768    {
769       free2 = 1;
770       exp2 = (ulong *) flint_malloc(N*poly2->length*sizeof(ulong));
771       mpoly_repack_monomials(exp2, exp_bits, poly2->exps, poly2->bits,
772                                                     poly2->length, ctx->minfo);
773    }
774 
775    if (exp_bits > poly3->bits)
776    {
777       free3 = 1;
778       exp3 = (ulong *) flint_malloc(N*poly3->length*sizeof(ulong));
779       mpoly_repack_monomials(exp3, exp_bits, poly3->exps, poly3->bits,
780                                                     poly3->length, ctx->minfo);
781    }
782 
783    /* check divisor leading monomial is at most that of the dividend */
784    if (mpoly_monomial_lt(exp2, exp3, N, cmpmask))
785    {
786       fmpz_mpoly_set(r, poly2, ctx);
787       fmpz_mpoly_zero(q, ctx);
788       goto cleanup3;
789    }
790 
791    /* take care of aliasing */
792    if (q == poly2 || q == poly3)
793    {
794       fmpz_mpoly_init2(temp1, poly2->length/poly3->length + 1, ctx);
795       fmpz_mpoly_fit_bits(temp1, exp_bits, ctx);
796       temp1->bits = exp_bits;
797       tq = temp1;
798    } else
799    {
800       fmpz_mpoly_fit_length(q, poly2->length/poly3->length + 1, ctx);
801       fmpz_mpoly_fit_bits(q, exp_bits, ctx);
802       q->bits = exp_bits;
803       tq = q;
804    }
805 
806    if (r == poly2 || r == poly3)
807    {
808       fmpz_mpoly_init2(temp2, poly3->length, ctx);
809       fmpz_mpoly_fit_bits(temp2, exp_bits, ctx);
810       temp2->bits = exp_bits;
811       tr = temp2;
812    } else
813    {
814       fmpz_mpoly_fit_length(r, poly3->length, ctx);
815       fmpz_mpoly_fit_bits(r, exp_bits, ctx);
816       r->bits = exp_bits;
817       tr = r;
818    }
819 
820    /* do division with remainder */
821    while ((lenq = _fmpz_mpoly_divrem_monagan_pearce(&lenr, &tq->coeffs, &tq->exps,
822          &tq->alloc, &tr->coeffs, &tr->exps, &tr->alloc, poly2->coeffs, exp2,
823          poly2->length, poly3->coeffs, exp3, poly3->length, exp_bits,
824                                                        N, cmpmask)) == 0
825          && lenr == 0)
826    {
827       ulong * old_exp2 = exp2, * old_exp3 = exp3;
828       slong old_exp_bits = exp_bits;
829 
830       exp_bits = mpoly_fix_bits(exp_bits + 1, ctx->minfo);
831 
832       N = mpoly_words_per_exp(exp_bits, ctx->minfo);
833       cmpmask = (ulong*) TMP_ALLOC(N*sizeof(ulong));
834       mpoly_get_cmpmask(cmpmask, N, exp_bits, ctx->minfo);
835 
836       exp2 = (ulong *) flint_malloc(N*poly2->length*sizeof(ulong));
837       mpoly_repack_monomials(exp2, exp_bits, old_exp2, old_exp_bits,
838                                                     poly2->length, ctx->minfo);
839 
840       exp3 = (ulong *) flint_malloc(N*poly3->length*sizeof(ulong));
841       mpoly_repack_monomials(exp3, exp_bits, old_exp3, old_exp_bits,
842                                                     poly3->length, ctx->minfo);
843 
844       if (free2)
845          flint_free(old_exp2);
846 
847       if (free3)
848          flint_free(old_exp3);
849 
850       free2 = free3 = 1;
851 
852       fmpz_mpoly_fit_bits(tq, exp_bits, ctx);
853       tq->bits = exp_bits;
854 
855       fmpz_mpoly_fit_bits(tr, exp_bits, ctx);
856       tr->bits = exp_bits;
857    }
858 
859    /* deal with aliasing */
860    if (q == poly2 || q == poly3)
861    {
862       fmpz_mpoly_swap(temp1, q, ctx);
863 
864       fmpz_mpoly_clear(temp1, ctx);
865    }
866 
867    if (r == poly2 || r == poly3)
868    {
869       fmpz_mpoly_swap(temp2, r, ctx);
870 
871       fmpz_mpoly_clear(temp2, ctx);
872    }
873 
874    _fmpz_mpoly_set_length(q, lenq, ctx);
875    _fmpz_mpoly_set_length(r, lenr, ctx);
876 
877 cleanup3:
878 
879    if (free2)
880       flint_free(exp2);
881 
882    if (free3)
883       flint_free(exp3);
884 
885     TMP_END;
886 }
887