1 /*
2     Copyright (C) 2018 Daniel Schultz
3 
4     This file is part of FLINT.
5 
6     FLINT is free software: you can redistribute it and/or modify it under
7     the terms of the GNU Lesser General Public License (LGPL) as published
8     by the Free Software Foundation; either version 2.1 of the License, or
9     (at your option) any later version.  See <http://www.gnu.org/licenses/>.
10 */
11 
12 #include <gmp.h>
13 #include <stdlib.h>
14 #include "flint.h"
15 #include "fmpz.h"
16 #include "fmpz_mpoly.h"
17 #include "longlong.h"
18 
19 
_fmpz_mpoly_quasidivrem_heap1(fmpz_t scale,slong * lenr,fmpz ** polyq,ulong ** expq,slong * allocq,fmpz ** polyr,ulong ** expr,slong * allocr,const fmpz * poly2,const ulong * exp2,slong len2,const fmpz * poly3,const ulong * exp3,slong len3,slong bits,ulong maskhi)20 slong _fmpz_mpoly_quasidivrem_heap1(fmpz_t scale, slong * lenr,
21   fmpz ** polyq, ulong ** expq, slong * allocq, fmpz ** polyr,
22                   ulong ** expr, slong * allocr, const fmpz * poly2,
23    const ulong * exp2, slong len2, const fmpz * poly3, const ulong * exp3,
24                    slong len3, slong bits, ulong maskhi)
25 {
26     slong i, j, s = len3;
27     slong q_len = 0, r_len = 0;
28     slong next_loc;
29     slong heap_len = 2; /* heap zero index unused */
30     mpoly_heap1_s * heap;
31     mpoly_heap_t * chain;
32     slong * store, * store_base;
33     mpoly_heap_t * x;
34     slong * hind;
35     fmpz * q_coeff = *polyq;
36     fmpz * r_coeff = *polyr;
37     ulong * q_exp = *expq;
38     ulong * r_exp = *expr;
39     ulong acc_sm[3]; /* for accumulating coefficients */
40     ulong mask, exp;
41     ulong lc_norm = 0, lc_abs = 0, lc_sign = 0, lc_n = 0, lc_i = 0;
42     fmpz_t lc_abs_lg, ns, gcd, acc_lg, r, tp;
43     slong bits2, bits3;
44     int lt_divides, scaleis1, small;
45     fmpz * qs, * rs;
46     slong qs_alloc, rs_alloc;
47     TMP_INIT;
48 
49     TMP_START;
50 
51     fmpz_init(lc_abs_lg);
52     fmpz_init(acc_lg);
53     fmpz_init(r);
54     fmpz_init(tp);
55     fmpz_init(ns);
56     fmpz_init(gcd);
57     fmpz_set_ui(scale, 1);
58     scaleis1 = 1;
59 
60     qs_alloc = 64;
61     qs = (fmpz *) flint_calloc(qs_alloc, sizeof(fmpz));
62     rs_alloc = 64;
63     rs = (fmpz *) flint_calloc(rs_alloc, sizeof(fmpz));
64 
65     /* whether intermediate computations q - a*b will fit in three words */
66     bits2 = _fmpz_vec_max_bits(poly2, len2);
67     bits3 = _fmpz_vec_max_bits(poly3, len3);
68     /* allow one bit for sign, one bit for subtraction */
69     small = FLINT_ABS(bits2) <= (FLINT_ABS(bits3) + FLINT_BIT_COUNT(len3) +
70            FLINT_BITS - 2) && FLINT_ABS(bits3) <= FLINT_BITS - 2;
71 
72     next_loc = len3 + 4;   /* something bigger than heap can ever be */
73     heap = (mpoly_heap1_s *) TMP_ALLOC((len3 + 1)*sizeof(mpoly_heap1_s));
74     chain = (mpoly_heap_t *) TMP_ALLOC(len3*sizeof(mpoly_heap_t));
75     store = store_base = (slong *) TMP_ALLOC(2*len3*sizeof(mpoly_heap_t *));
76 
77     /* space for flagged heap indicies */
78     hind = (slong *) TMP_ALLOC(len3*sizeof(slong));
79     for (i = 0; i < len3; i++)
80         hind[i] = 1;
81 
82     /* mask with high bit set in each field of exponent vector */
83     mask = 0;
84     for (i = 0; i < FLINT_BITS/bits; i++)
85         mask = (mask << bits) + (UWORD(1) << (bits - 1));
86 
87     /* insert (-1, 0, exp2[0]) into heap */
88     x = chain + 0;
89     x->i = -WORD(1);
90     x->j = 0;
91     x->next = NULL;
92     HEAP_ASSIGN(heap[1], exp2[0], x);
93 
94     /* precompute leading cofficient info */
95     fmpz_abs(lc_abs_lg, poly3 + 0);
96     if (small)
97     {
98         lc_abs = FLINT_ABS(poly3[0]);
99         lc_sign = FLINT_SIGN_EXT(poly3[0]);
100         count_leading_zeros(lc_norm, lc_abs);
101         lc_n = lc_abs << lc_norm;
102         invert_limb(lc_i, lc_n);
103     }
104 
105     while (heap_len > 1)
106     {
107         /* make sure quotient array has space for q_len + 1 entries */
108         _fmpz_mpoly_fit_length(&q_coeff, &q_exp, allocq, q_len + 1, 1);
109         if (q_len + 1 > qs_alloc)
110         {
111             slong len;
112             len = FLINT_MAX(q_len + 1, 2*qs_alloc);
113             qs = (fmpz *) flint_realloc(qs, len*sizeof(fmpz));
114             if (len > qs_alloc)
115                 flint_mpn_zero((mp_ptr) (qs + qs_alloc), len - qs_alloc);
116             qs_alloc = len;
117         }
118         /* make sure remainder array has space for r_len + 1 entries */
119         _fmpz_mpoly_fit_length(&r_coeff, &r_exp, allocr, r_len + 1, 1);
120         if (r_len + 1 > rs_alloc)
121         {
122             slong len;
123             len = FLINT_MAX(r_len + 1, 2*rs_alloc);
124             rs = (fmpz *) flint_realloc(rs, len*sizeof(fmpz));
125             if (len > rs_alloc)
126                 flint_mpn_zero((mp_ptr) (rs + rs_alloc), len - rs_alloc);
127             rs_alloc = len;
128         }
129 
130         exp = heap[1].exp;
131         if (mpoly_monomial_overflows1(exp, mask))
132             goto exp_overflow;
133 
134         lt_divides = mpoly_monomial_divides1(q_exp + q_len, exp, exp3[0], mask);
135 
136         /* accumulate terms from highest terms on heap */
137         if (small)
138             acc_sm[0] = acc_sm[1] = acc_sm[2] = 0;
139         else
140             fmpz_zero(acc_lg);
141 
142         while (heap_len > 1 && heap[1].exp == exp)
143         {
144             x = _mpoly_heap_pop1(heap, &heap_len, maskhi);
145             do
146             {
147                 *store++ = x->i;
148                 *store++ = x->j;
149                 if (x->i != -WORD(1))
150                     hind[x->i] |= WORD(1);
151 
152                 if (small)
153                 {
154                     if (x->i == -WORD(1))
155                        _fmpz_mpoly_add_uiuiui_fmpz(acc_sm, poly2 + x->j);
156                     else
157                        _fmpz_mpoly_submul_uiuiui_fmpz(acc_sm, poly3[x->i], q_coeff[x->j]);
158                 } else if (scaleis1)
159                 {
160                     if (x->i == -WORD(1))
161                         fmpz_add(acc_lg, acc_lg, poly2 + x->j);
162                     else
163                         fmpz_submul(acc_lg, poly3 + x->i, q_coeff + x->j);
164                 } else
165                 {
166                     if (x->i == -WORD(1))
167                         fmpz_addmul(acc_lg, scale, poly2 + x->j);
168                     else
169                     {
170                         fmpz_divexact(tp, scale, qs + x->j);
171                         fmpz_mul(tp, tp, q_coeff + x->j);
172                         fmpz_submul(acc_lg, poly3 + x->i, tp);
173                     }
174                 }
175             } while ((x = x->next) != NULL);
176         }
177 
178         /* process popped nodes */
179         while (store > store_base)
180         {
181             j = *--store;
182             i = *--store;
183             if (i == -WORD(1))
184             {
185                 if (j + 1 < len2)
186                 {
187                     x = chain + 0;
188                     x->i = i;
189                     x->j = j + 1;
190                     x->next = NULL;
191                     _mpoly_heap_insert1(heap, exp2[x->j], x,
192                                                  &next_loc, &heap_len, maskhi);
193                 }
194             } else
195             {
196                 if (  (i + 1 < len3)
197                    && (hind[i + 1] == 2*j + 1)
198                    )
199                 {
200                     x = chain + i + 1;
201                     x->i = i + 1;
202                     x->j = j;
203                     x->next = NULL;
204                     hind[x->i] = 2*(x->j + 1) + 0;
205                     _mpoly_heap_insert1(heap, exp3[x->i] + q_exp[x->j], x,
206                                                  &next_loc, &heap_len, maskhi);
207                 }
208                 if (j + 1 == q_len)
209                 {
210                     s++;
211                 } else if (  ((hind[i] & 1) == 1)
212                           && ((i == 1) || (hind[i - 1] >= 2*(j + 2) + 1))
213                           )
214                 {
215                     x = chain + i;
216                     x->i = i;
217                     x->j = j + 1;
218                     x->next = NULL;
219                     hind[x->i] = 2*(x->j + 1) + 0;
220                     _mpoly_heap_insert1(heap, exp3[x->i] + q_exp[x->j], x,
221                                                  &next_loc, &heap_len, maskhi);
222                 }
223             }
224         }
225 
226         /* try to divide accumulated term by leading term of divisor */
227         if (small)
228         {
229             ulong d0, d1, ds = acc_sm[2];
230             sub_ddmmss(d1, d0, acc_sm[1]^ds, acc_sm[0]^ds, ds, ds);
231 
232             if ((acc_sm[0] | acc_sm[1] | acc_sm[2]) == 0)
233                 continue;
234 
235             if (!lt_divides)
236             {
237                 fmpz_set_signed_uiuiui(r_coeff + r_len, acc_sm[2], acc_sm[1], acc_sm[0]);
238                 r_exp[r_len] = exp;
239                 fmpz_one(rs + r_len);
240                 r_len++;
241                 continue;
242             }
243             if (ds == FLINT_SIGN_EXT(acc_sm[1]) && d1 < lc_abs)
244             {
245                 ulong qq, rr, nhi, nlo;
246                 FLINT_ASSERT(0 < lc_norm && lc_norm < FLINT_BITS);
247                 nhi = (d1 << lc_norm) | (d0 >> (FLINT_BITS - lc_norm));
248                 nlo = d0 << lc_norm;
249                 udiv_qrnnd_preinv(qq, rr, nhi, nlo, lc_n, lc_i);
250                 if (rr == 0 && qq <= COEFF_MAX)
251                 {
252                     _fmpz_demote(q_coeff + q_len);
253                     q_coeff[q_len] = qq;
254                     if (ds != lc_sign)
255                         q_coeff[q_len] = -q_coeff[q_len];
256                     fmpz_one(qs + q_len);
257                 }
258                 else
259                 {
260                     small = 0;
261                     fmpz_set_signed_uiuiui(acc_lg, acc_sm[2], acc_sm[1], acc_sm[0]);
262                     goto large_lt_divides;
263                 }
264             }
265             else
266             {
267                 small = 0;
268                 fmpz_set_signed_uiuiui(acc_lg, acc_sm[2], acc_sm[1], acc_sm[0]);
269                 goto large_lt_divides;
270             }
271         }
272         else
273         {
274             if (fmpz_is_zero(acc_lg))
275                 continue;
276 
277             if (!lt_divides)
278             {
279                 fmpz_set(r_coeff + r_len, acc_lg);
280                 r_exp[r_len] = exp;
281                 fmpz_set(rs + r_len, scale);
282                 r_len++;
283                 continue;
284             }
285 large_lt_divides:
286             fmpz_gcd(gcd, acc_lg, poly3 + 0);
287             fmpz_divexact(ns, lc_abs_lg, gcd);
288             if (!fmpz_is_one(ns))
289                 scaleis1 = 0;
290             fmpz_mul(q_coeff + q_len, ns, acc_lg);
291             fmpz_divexact(q_coeff + q_len, q_coeff + q_len, poly3 + 0);
292             fmpz_mul(qs + q_len, scale, ns);
293             fmpz_set(scale, qs + q_len);
294         }
295 
296         if (s > 1)
297         {
298             i = 1;
299             x = chain + i;
300             x->i = i;
301             x->j = q_len;
302             x->next = NULL;
303             hind[x->i] = 2*(x->j + 1) + 0;
304             _mpoly_heap_insert1(heap, exp3[x->i] + q_exp[x->j], x,
305                                                  &next_loc, &heap_len, maskhi);
306         }
307         q_len++;
308         s = 1;
309     }
310 
311 cleanup2:
312 
313     (*polyq) = q_coeff;
314     (*expq)  = q_exp;
315     (*polyr) = r_coeff;
316     (*expr)  = r_exp;
317     (*lenr)  = r_len;
318 
319     TMP_END;
320 
321     for (i = 0; i < q_len; i++)
322     {
323         fmpz_divexact(tp, scale, qs + i);
324         fmpz_mul(q_coeff + i, q_coeff + i, tp);
325     }
326     for (i = 0; i < qs_alloc; i++)
327         fmpz_clear(qs + i);
328 
329     for (i = 0; i < r_len; i++)
330     {
331         fmpz_divexact(tp, scale, rs + i);
332         fmpz_mul(r_coeff + i, r_coeff + i, tp);
333     }
334     for (i = 0; i < rs_alloc; i++)
335         fmpz_clear(rs + i);
336 
337     flint_free(qs);
338     flint_free(rs);
339 
340     fmpz_clear(lc_abs_lg);
341     fmpz_clear(acc_lg);
342     fmpz_clear(r);
343     fmpz_clear(tp);
344     fmpz_clear(ns);
345     fmpz_clear(gcd);
346 
347     return q_len;
348 
349 exp_overflow:
350     for (i = 0; i < q_len; i++)
351         _fmpz_demote(q_coeff + i);
352     for (i = 0; i < r_len; i++)
353         _fmpz_demote(r_coeff + i);
354     q_len = 0;
355     r_len = 0;
356     goto cleanup2;
357 }
358 
359 
_fmpz_mpoly_quasidivrem_heap(fmpz_t scale,slong * lenr,fmpz ** polyq,ulong ** expq,slong * allocq,fmpz ** polyr,ulong ** expr,slong * allocr,const fmpz * poly2,const ulong * exp2,slong len2,const fmpz * poly3,const ulong * exp3,slong len3,slong bits,slong N,const ulong * cmpmask)360 slong _fmpz_mpoly_quasidivrem_heap(fmpz_t scale, slong * lenr,
361   fmpz ** polyq, ulong ** expq, slong * allocq, fmpz ** polyr,
362                   ulong ** expr, slong * allocr, const fmpz * poly2,
363    const ulong * exp2, slong len2, const fmpz * poly3, const ulong * exp3,
364                    slong len3, slong bits, slong N, const ulong * cmpmask)
365 {
366     slong i, j, s = len3;
367     slong q_len = 0, r_len = 0;
368     slong next_loc;
369     slong heap_len = 2; /* heap zero index unused */
370     mpoly_heap_s * heap;
371     mpoly_heap_t * chain;
372     slong * store, * store_base;
373     mpoly_heap_t * x;
374     slong * hind;
375     fmpz * q_coeff = *polyq;
376     fmpz * r_coeff = *polyr;
377     ulong * q_exp = *expq;
378     ulong * r_exp = *expr;
379     ulong * exp, * exps;
380     ulong ** exp_list;
381     ulong acc_sm[3]; /* for accumulating coefficients */
382     slong exp_next;
383     ulong mask;
384     ulong lc_norm = 0, lc_abs = 0, lc_sign = 0, lc_n = 0, lc_i = 0;
385     fmpz_t lc_abs_lg, ns, gcd, acc_lg, r, tp;
386     slong bits2, bits3;
387     int lt_divides, scaleis1, small;
388     fmpz * qs, * rs;
389     slong qs_alloc, rs_alloc;
390 
391     TMP_INIT;
392 
393     if (N == 1)
394         return _fmpz_mpoly_quasidivrem_heap1(scale, lenr, polyq, expq,
395                             allocq, polyr, expr, allocr, poly2, exp2, len2,
396                                           poly3, exp3, len3, bits, cmpmask[0]);
397 
398     TMP_START;
399 
400     fmpz_init(lc_abs_lg);
401     fmpz_init(acc_lg);
402     fmpz_init(r);
403     fmpz_init(tp);
404     fmpz_init(ns);
405     fmpz_init(gcd);
406     fmpz_set_ui(scale, 1);
407     scaleis1 = 1;
408 
409     qs_alloc = 64;
410     qs = (fmpz *) flint_calloc(qs_alloc, sizeof(fmpz));
411     rs_alloc = 64;
412     rs = (fmpz *) flint_calloc(rs_alloc, sizeof(fmpz));
413 
414     /* whether intermediate computations q - a*b will fit in three words */
415     bits2 = _fmpz_vec_max_bits(poly2, len2);
416     bits3 = _fmpz_vec_max_bits(poly3, len3);
417     /* allow one bit for sign, one bit for subtraction */
418     small = FLINT_ABS(bits2) <= (FLINT_ABS(bits3) + FLINT_BIT_COUNT(len3) +
419            FLINT_BITS - 2) && FLINT_ABS(bits3) <= FLINT_BITS - 2;
420 
421     next_loc = len3 + 4;   /* something bigger than heap can ever be */
422     heap = (mpoly_heap_s *) TMP_ALLOC((len3 + 1)*sizeof(mpoly_heap_s));
423     chain = (mpoly_heap_t *) TMP_ALLOC(len3*sizeof(mpoly_heap_t));
424     store = store_base = (slong *) TMP_ALLOC(2*len3*sizeof(mpoly_heap_t *));
425 
426     /* array of exponents of N words each */
427     exps = (ulong *) TMP_ALLOC(len3*N*sizeof(ulong));
428     /* array of pointers to unused exponent vectors */
429     exp_list = (ulong **) TMP_ALLOC(len3*sizeof(ulong *));
430     /* space to save copy of current exponent vector */
431     exp = (ulong *) TMP_ALLOC(N*sizeof(ulong));
432 
433     /* set up list of available exponent vectors */
434     exp_next = 0;
435     for (i = 0; i < len3; i++)
436         exp_list[i] = exps + i*N;
437 
438     /* space for flagged heap indicies */
439     hind = (slong *) TMP_ALLOC(len3*sizeof(slong));
440     for (i = 0; i < len3; i++)
441         hind[i] = 1;
442 
443     /* mask with high bit set in each field of exponent vector */
444     mask = 0;
445     for (i = 0; i < FLINT_BITS/bits; i++)
446         mask = (mask << bits) + (UWORD(1) << (bits - 1));
447 
448     /* insert (-1, 0, exp2[0]) into heap */
449     x = chain + 0;
450     x->i = -WORD(1);
451     x->j = 0;
452     x->next = NULL;
453 
454     heap[1].next = x;
455     heap[1].exp = exp_list[exp_next++];
456 
457     mpoly_monomial_set(heap[1].exp, exp2, N);
458 
459     /* precompute leading cofficient info */
460     fmpz_abs(lc_abs_lg, poly3 + 0);
461     if (small)
462     {
463         lc_abs = FLINT_ABS(poly3[0]);
464         lc_sign = FLINT_SIGN_EXT(poly3[0]);
465         count_leading_zeros(lc_norm, lc_abs);
466         lc_n = lc_abs << lc_norm;
467         invert_limb(lc_i, lc_n);
468     }
469 
470     while (heap_len > 1)
471     {
472         /* make sure quotient array has space for q_len + 1 entries */
473         _fmpz_mpoly_fit_length(&q_coeff, &q_exp, allocq, q_len + 1, N);
474         if (q_len + 1 > qs_alloc)
475         {
476             slong len;
477             len = FLINT_MAX(q_len + 1, 2*qs_alloc);
478             qs = (fmpz *) flint_realloc(qs, len*sizeof(fmpz));
479             if (len > qs_alloc)
480                 flint_mpn_zero((mp_ptr) (qs + qs_alloc), len - qs_alloc);
481             qs_alloc = len;
482         }
483         /* make sure remainder array has space for r_len + 1 entries */
484         _fmpz_mpoly_fit_length(&r_coeff, &r_exp, allocr, r_len + 1, N);
485         if (r_len + 1 > rs_alloc)
486         {
487             slong len;
488             len = FLINT_MAX(r_len + 1, 2*rs_alloc);
489             rs = (fmpz *) flint_realloc(rs, len*sizeof(fmpz));
490             if (len > rs_alloc)
491                 flint_mpn_zero((mp_ptr) (rs + rs_alloc), len - rs_alloc);
492             rs_alloc = len;
493         }
494 
495         mpoly_monomial_set(exp, heap[1].exp, N);
496         if (bits <= FLINT_BITS)
497         {
498             if (mpoly_monomial_overflows(exp, N, mask))
499                 goto exp_overflow;
500 
501             lt_divides = mpoly_monomial_divides(q_exp + q_len*N, exp, exp3, N, mask);
502         }
503         else
504         {
505             if (mpoly_monomial_overflows_mp(exp, N, bits))
506                 goto exp_overflow;
507 
508             lt_divides = mpoly_monomial_divides_mp(q_exp + q_len*N, exp, exp3, N, bits);
509         }
510 
511         /* accumulate terms from highest terms on heap */
512         if (small)
513             acc_sm[0] = acc_sm[1] = acc_sm[2] = 0;
514         else
515             fmpz_zero(acc_lg);
516 
517         while (heap_len > 1 && mpoly_monomial_equal(heap[1].exp, exp, N))
518         {
519             exp_list[--exp_next] = heap[1].exp;
520             x = _mpoly_heap_pop(heap, &heap_len, N, cmpmask);
521             do
522             {
523                 *store++ = x->i;
524                 *store++ = x->j;
525                 if (x->i != -WORD(1))
526                     hind[x->i] |= WORD(1);
527 
528                 if (small)
529                 {
530                     if (x->i == -WORD(1))
531                        _fmpz_mpoly_add_uiuiui_fmpz(acc_sm, poly2 + x->j);
532                     else
533                        _fmpz_mpoly_submul_uiuiui_fmpz(acc_sm, poly3[x->i], q_coeff[x->j]);
534                 } else if (scaleis1)
535                 {
536                     if (x->i == -WORD(1))
537                         fmpz_add(acc_lg, acc_lg, poly2 + x->j);
538                     else
539                         fmpz_submul(acc_lg, poly3 + x->i, q_coeff + x->j);
540                 } else
541                 {
542                     if (x->i == -WORD(1))
543                         fmpz_addmul(acc_lg, scale, poly2 + x->j);
544                     else
545                     {
546                         fmpz_divexact(tp, scale, qs + x->j);
547                         fmpz_mul(tp, tp, q_coeff + x->j);
548                         fmpz_submul(acc_lg, poly3 + x->i, tp);
549                     }
550                 }
551             } while ((x = x->next) != NULL);
552         }
553 
554         /* process popped nodes */
555         while (store > store_base)
556         {
557             j = *--store;
558             i = *--store;
559             if (i == -WORD(1))
560             {
561                 if (j + 1 < len2)
562                 {
563                     x = chain + 0;
564                     x->i = i;
565                     x->j = j + 1;
566                     x->next = NULL;
567                     mpoly_monomial_set(exp_list[exp_next], exp2 + x->j*N, N);
568                     exp_next += _mpoly_heap_insert(heap, exp_list[exp_next], x,
569                                              &next_loc, &heap_len, N, cmpmask);
570                 }
571             } else
572             {
573                 if (  (i + 1 < len3)
574                    && (hind[i + 1] == 2*j + 1)
575                    )
576                 {
577                     x = chain + i + 1;
578                     x->i = i + 1;
579                     x->j = j;
580                     x->next = NULL;
581                     hind[x->i] = 2*(x->j + 1) + 0;
582 
583                     if (bits <= FLINT_BITS)
584                         mpoly_monomial_add(exp_list[exp_next], exp3 + x->i*N,
585                                                               q_exp + x->j*N, N);
586                     else
587                         mpoly_monomial_add_mp(exp_list[exp_next], exp3 + x->i*N,
588                                                                  q_exp + x->j*N, N);
589 
590                     exp_next += _mpoly_heap_insert(heap, exp_list[exp_next], x,
591                                              &next_loc, &heap_len, N, cmpmask);
592                 }
593                 if (j + 1 == q_len)
594                 {
595                     s++;
596                 } else if (  ((hind[i] & 1) == 1)
597                           && ((i == 1) || (hind[i - 1] >= 2*(j + 2) + 1))
598                           )
599                 {
600                     x = chain + i;
601                     x->i = i;
602                     x->j = j + 1;
603                     x->next = NULL;
604                     hind[x->i] = 2*(x->j + 1) + 0;
605 
606                     if (bits <= FLINT_BITS)
607                         mpoly_monomial_add(exp_list[exp_next], exp3 + x->i*N,
608                                                               q_exp + x->j*N, N);
609                     else
610                         mpoly_monomial_add_mp(exp_list[exp_next], exp3 + x->i*N,
611                                                                  q_exp + x->j*N, N);
612 
613                     exp_next += _mpoly_heap_insert(heap, exp_list[exp_next], x,
614                                              &next_loc, &heap_len, N, cmpmask);
615                 }
616             }
617         }
618 
619         /* try to divide accumulated term by leading term of divisor */
620         if (small)
621         {
622             ulong d0, d1, ds = acc_sm[2];
623 
624             sub_ddmmss(d1, d0, acc_sm[1]^ds, acc_sm[0]^ds, ds, ds);
625 
626             if ((acc_sm[0] | acc_sm[1] | acc_sm[2]) == 0)
627                 continue;
628 
629             if (!lt_divides)
630             {
631                 fmpz_set_signed_uiuiui(r_coeff + r_len, acc_sm[2], acc_sm[1], acc_sm[0]);
632                 mpoly_monomial_set(r_exp + r_len*N, exp, N);
633                 fmpz_one(rs + r_len);
634                 r_len++;
635                 continue;
636             }
637             if (ds == FLINT_SIGN_EXT(acc_sm[1]) && d1 < lc_abs)
638             {
639                 ulong qq, rr, nhi, nlo;
640                 FLINT_ASSERT(0 < lc_norm && lc_norm < FLINT_BITS);
641                 nhi = (d1 << lc_norm) | (d0 >> (FLINT_BITS - lc_norm));
642                 nlo = d0 << lc_norm;
643                 udiv_qrnnd_preinv(qq, rr, nhi, nlo, lc_n, lc_i);
644                 if (rr == 0 && qq <= COEFF_MAX)
645                 {
646                     _fmpz_demote(q_coeff + q_len);
647                     q_coeff[q_len] = qq;
648                     if (ds != lc_sign)
649                         q_coeff[q_len] = -q_coeff[q_len];
650                     fmpz_one(qs + q_len);
651                 }
652                 else
653                 {
654                     small = 0;
655                     fmpz_set_signed_uiuiui(acc_lg, acc_sm[2], acc_sm[1], acc_sm[0]);
656                     goto large_lt_divides;
657                 }
658             }
659             else
660             {
661                 small = 0;
662                 fmpz_set_signed_uiuiui(acc_lg, acc_sm[2], acc_sm[1], acc_sm[0]);
663                 goto large_lt_divides;
664             }
665         }
666         else
667         {
668             if (fmpz_is_zero(acc_lg))
669                 continue;
670 
671             if (!lt_divides)
672             {
673                 fmpz_set(r_coeff + r_len, acc_lg);
674                 mpoly_monomial_set(r_exp + r_len*N, exp, N);
675                 fmpz_set(rs + r_len, scale);
676                 r_len++;
677                 continue;
678             }
679 large_lt_divides:
680             fmpz_gcd(gcd, acc_lg, poly3 + 0);
681             fmpz_divexact(ns, lc_abs_lg, gcd);
682             if (!fmpz_is_one(ns))
683                 scaleis1 = 0;
684             fmpz_mul(q_coeff + q_len, ns, acc_lg);
685             fmpz_divexact(q_coeff + q_len, q_coeff + q_len, poly3 + 0);
686             fmpz_mul(qs + q_len, scale, ns);
687             fmpz_set(scale, qs + q_len);
688         }
689 
690         if (s > 1)
691         {
692             i = 1;
693             x = chain + i;
694             x->i = i;
695             x->j = q_len;
696             x->next = NULL;
697             hind[x->i] = 2*(x->j + 1) + 0;
698 
699             if (bits <= FLINT_BITS)
700                 mpoly_monomial_add(exp_list[exp_next], exp3 + x->i*N,
701                                                       q_exp + x->j*N, N);
702             else
703                 mpoly_monomial_add_mp(exp_list[exp_next], exp3 + x->i*N,
704                                                          q_exp + x->j*N, N);
705 
706             exp_next += _mpoly_heap_insert(heap, exp_list[exp_next], x,
707                                              &next_loc, &heap_len, N, cmpmask);
708         }
709         q_len++;
710         s = 1;
711     }
712 
713 cleanup2:
714 
715     (*polyq) = q_coeff;
716     (*expq)  = q_exp;
717     (*polyr) = r_coeff;
718     (*expr)  = r_exp;
719     (*lenr)  = r_len;
720 
721     TMP_END;
722 
723     for (i = 0; i < q_len; i++)
724     {
725         fmpz_divexact(tp, scale, qs + i);
726         fmpz_mul(q_coeff + i, q_coeff + i, tp);
727     }
728     for (i = 0; i < qs_alloc; i++)
729         fmpz_clear(qs + i);
730 
731     for (i = 0; i < r_len; i++)
732     {
733         fmpz_divexact(tp, scale, rs + i);
734         fmpz_mul(r_coeff + i, r_coeff + i, tp);
735     }
736     for (i = 0; i < rs_alloc; i++)
737         fmpz_clear(rs + i);
738 
739     flint_free(qs);
740     flint_free(rs);
741 
742     fmpz_clear(lc_abs_lg);
743     fmpz_clear(acc_lg);
744     fmpz_clear(r);
745     fmpz_clear(tp);
746     fmpz_clear(ns);
747     fmpz_clear(gcd);
748 
749     return q_len;
750 
751 exp_overflow:
752     for (i = 0; i < q_len; i++)
753         _fmpz_demote(q_coeff + i);
754     for (i = 0; i < r_len; i++)
755         _fmpz_demote(r_coeff + i);
756     q_len = 0;
757     r_len = 0;
758     goto cleanup2;
759 }
760 
fmpz_mpoly_quasidivrem_heap(fmpz_t scale,fmpz_mpoly_t q,fmpz_mpoly_t r,const fmpz_mpoly_t poly2,const fmpz_mpoly_t poly3,const fmpz_mpoly_ctx_t ctx)761 void fmpz_mpoly_quasidivrem_heap(fmpz_t scale, fmpz_mpoly_t q, fmpz_mpoly_t r,
762                   const fmpz_mpoly_t poly2, const fmpz_mpoly_t poly3,
763                                                     const fmpz_mpoly_ctx_t ctx)
764 {
765     slong exp_bits, N, lenq = 0, lenr = 0;
766     ulong * exp2 = poly2->exps, * exp3 = poly3->exps;
767     ulong * cmpmask;
768     int free2 = 0, free3 = 0;
769     fmpz_mpoly_t temp1, temp2;
770     fmpz_mpoly_struct * tq, * tr;
771     TMP_INIT;
772 
773     /* check divisor is nonzero */
774     if (poly3->length == 0)
775         flint_throw(FLINT_DIVZERO, "Divide by zero in fmpz_mpoly_quasidivrem_heap");
776 
777     fmpz_set_ui(scale, 1);
778 
779     /* dividend zero, write out quotient and remainder */
780     if (poly2->length == 0)
781     {
782         fmpz_mpoly_zero(q, ctx);
783         fmpz_mpoly_zero(r, ctx);
784         return;
785     }
786 
787     TMP_START;
788     exp_bits = FLINT_MAX(poly2->bits, poly3->bits);
789     exp_bits = mpoly_fix_bits(exp_bits, ctx->minfo);
790 
791     N = mpoly_words_per_exp(exp_bits, ctx->minfo);
792     cmpmask = (ulong*) TMP_ALLOC(N*sizeof(ulong));
793     mpoly_get_cmpmask(cmpmask, N, exp_bits, ctx->minfo);
794 
795     /* ensure input exponents packed to same size as output exponents */
796     if (exp_bits > poly2->bits)
797     {
798         free2 = 1;
799         exp2 = (ulong *) flint_malloc(N*poly2->length*sizeof(ulong));
800         mpoly_repack_monomials(exp2, exp_bits, poly2->exps, poly2->bits,
801                                                     poly2->length, ctx->minfo);
802     }
803 
804     if (exp_bits > poly3->bits)
805     {
806         free3 = 1;
807         exp3 = (ulong *) flint_malloc(N*poly3->length*sizeof(ulong));
808         mpoly_repack_monomials(exp3, exp_bits, poly3->exps, poly3->bits,
809                                                     poly3->length, ctx->minfo);
810     }
811 
812     /* check divisor leading monomial is at most that of the dividend */
813     if (mpoly_monomial_lt(exp2, exp3, N, cmpmask))
814     {
815         fmpz_mpoly_set(r, poly2, ctx);
816         fmpz_mpoly_zero(q, ctx);
817         goto cleanup3;
818     }
819 
820    /* take care of aliasing */
821    if (q == poly2 || q == poly3)
822    {
823       fmpz_mpoly_init2(temp1, poly2->length/poly3->length + 1, ctx);
824       fmpz_mpoly_fit_bits(temp1, exp_bits, ctx);
825       temp1->bits = exp_bits;
826 
827       tq = temp1;
828    } else
829    {
830       fmpz_mpoly_fit_length(q, poly2->length/poly3->length + 1, ctx);
831       fmpz_mpoly_fit_bits(q, exp_bits, ctx);
832       q->bits = exp_bits;
833 
834       tq = q;
835    }
836 
837    if (r == poly2 || r == poly3)
838    {
839       fmpz_mpoly_init2(temp2, poly3->length, ctx);
840       fmpz_mpoly_fit_bits(temp2, exp_bits, ctx);
841       temp2->bits = exp_bits;
842 
843       tr = temp2;
844    } else
845    {
846       fmpz_mpoly_fit_length(r, poly3->length, ctx);
847       fmpz_mpoly_fit_bits(r, exp_bits, ctx);
848       r->bits = exp_bits;
849 
850       tr = r;
851    }
852 
853    /* do division with remainder */
854    while ((lenq = _fmpz_mpoly_quasidivrem_heap(scale, &lenr, &tq->coeffs, &tq->exps,
855          &tq->alloc, &tr->coeffs, &tr->exps, &tr->alloc, poly2->coeffs, exp2,
856          poly2->length, poly3->coeffs, exp3, poly3->length, exp_bits,
857                                                        N, cmpmask)) == 0
858          && lenr == 0)
859    {
860       ulong * old_exp2 = exp2, * old_exp3 = exp3;
861       slong old_exp_bits = exp_bits;
862 
863       exp_bits = mpoly_fix_bits(exp_bits + 1, ctx->minfo);
864 
865       N = mpoly_words_per_exp(exp_bits, ctx->minfo);
866       mpoly_get_cmpmask(cmpmask, N, exp_bits, ctx->minfo);
867 
868       exp2 = (ulong *) flint_malloc(N*poly2->length*sizeof(ulong));
869       mpoly_repack_monomials(exp2, exp_bits, old_exp2, old_exp_bits,
870                                                     poly2->length, ctx->minfo);
871 
872       exp3 = (ulong *) flint_malloc(N*poly3->length*sizeof(ulong));
873       mpoly_repack_monomials(exp3, exp_bits, old_exp3, old_exp_bits,
874                                                     poly3->length, ctx->minfo);
875 
876       if (free2)
877          flint_free(old_exp2);
878 
879       if (free3)
880          flint_free(old_exp3);
881 
882       free2 = free3 = 1;
883 
884       fmpz_mpoly_fit_bits(tq, exp_bits, ctx);
885       tq->bits = exp_bits;
886 
887       fmpz_mpoly_fit_bits(tr, exp_bits, ctx);
888       tr->bits = exp_bits;
889    }
890 
891    /* deal with aliasing */
892    if (q == poly2 || q == poly3)
893    {
894       fmpz_mpoly_swap(temp1, q, ctx);
895 
896       fmpz_mpoly_clear(temp1, ctx);
897    }
898 
899    if (r == poly2 || r == poly3)
900    {
901       fmpz_mpoly_swap(temp2, r, ctx);
902 
903       fmpz_mpoly_clear(temp2, ctx);
904    }
905 
906    _fmpz_mpoly_set_length(q, lenq, ctx);
907    _fmpz_mpoly_set_length(r, lenr, ctx);
908 
909 cleanup3:
910 
911    if (free2)
912       flint_free(exp2);
913 
914    if (free3)
915       flint_free(exp3);
916 
917     TMP_END;
918 }
919