1 /*
2 Copyright (C) 2018 Daniel Schultz
3
4 This file is part of FLINT.
5
6 FLINT is free software: you can redistribute it and/or modify it under
7 the terms of the GNU Lesser General Public License (LGPL) as published
8 by the Free Software Foundation; either version 2.1 of the License, or
9 (at your option) any later version. See <http://www.gnu.org/licenses/>.
10 */
11
12 #include <gmp.h>
13 #include <stdlib.h>
14 #include "flint.h"
15 #include "fmpz.h"
16 #include "fmpz_mpoly.h"
17 #include "longlong.h"
18
19
_fmpz_mpoly_quasidivrem_heap1(fmpz_t scale,slong * lenr,fmpz ** polyq,ulong ** expq,slong * allocq,fmpz ** polyr,ulong ** expr,slong * allocr,const fmpz * poly2,const ulong * exp2,slong len2,const fmpz * poly3,const ulong * exp3,slong len3,slong bits,ulong maskhi)20 slong _fmpz_mpoly_quasidivrem_heap1(fmpz_t scale, slong * lenr,
21 fmpz ** polyq, ulong ** expq, slong * allocq, fmpz ** polyr,
22 ulong ** expr, slong * allocr, const fmpz * poly2,
23 const ulong * exp2, slong len2, const fmpz * poly3, const ulong * exp3,
24 slong len3, slong bits, ulong maskhi)
25 {
26 slong i, j, s = len3;
27 slong q_len = 0, r_len = 0;
28 slong next_loc;
29 slong heap_len = 2; /* heap zero index unused */
30 mpoly_heap1_s * heap;
31 mpoly_heap_t * chain;
32 slong * store, * store_base;
33 mpoly_heap_t * x;
34 slong * hind;
35 fmpz * q_coeff = *polyq;
36 fmpz * r_coeff = *polyr;
37 ulong * q_exp = *expq;
38 ulong * r_exp = *expr;
39 ulong acc_sm[3]; /* for accumulating coefficients */
40 ulong mask, exp;
41 ulong lc_norm = 0, lc_abs = 0, lc_sign = 0, lc_n = 0, lc_i = 0;
42 fmpz_t lc_abs_lg, ns, gcd, acc_lg, r, tp;
43 slong bits2, bits3;
44 int lt_divides, scaleis1, small;
45 fmpz * qs, * rs;
46 slong qs_alloc, rs_alloc;
47 TMP_INIT;
48
49 TMP_START;
50
51 fmpz_init(lc_abs_lg);
52 fmpz_init(acc_lg);
53 fmpz_init(r);
54 fmpz_init(tp);
55 fmpz_init(ns);
56 fmpz_init(gcd);
57 fmpz_set_ui(scale, 1);
58 scaleis1 = 1;
59
60 qs_alloc = 64;
61 qs = (fmpz *) flint_calloc(qs_alloc, sizeof(fmpz));
62 rs_alloc = 64;
63 rs = (fmpz *) flint_calloc(rs_alloc, sizeof(fmpz));
64
65 /* whether intermediate computations q - a*b will fit in three words */
66 bits2 = _fmpz_vec_max_bits(poly2, len2);
67 bits3 = _fmpz_vec_max_bits(poly3, len3);
68 /* allow one bit for sign, one bit for subtraction */
69 small = FLINT_ABS(bits2) <= (FLINT_ABS(bits3) + FLINT_BIT_COUNT(len3) +
70 FLINT_BITS - 2) && FLINT_ABS(bits3) <= FLINT_BITS - 2;
71
72 next_loc = len3 + 4; /* something bigger than heap can ever be */
73 heap = (mpoly_heap1_s *) TMP_ALLOC((len3 + 1)*sizeof(mpoly_heap1_s));
74 chain = (mpoly_heap_t *) TMP_ALLOC(len3*sizeof(mpoly_heap_t));
75 store = store_base = (slong *) TMP_ALLOC(2*len3*sizeof(mpoly_heap_t *));
76
77 /* space for flagged heap indicies */
78 hind = (slong *) TMP_ALLOC(len3*sizeof(slong));
79 for (i = 0; i < len3; i++)
80 hind[i] = 1;
81
82 /* mask with high bit set in each field of exponent vector */
83 mask = 0;
84 for (i = 0; i < FLINT_BITS/bits; i++)
85 mask = (mask << bits) + (UWORD(1) << (bits - 1));
86
87 /* insert (-1, 0, exp2[0]) into heap */
88 x = chain + 0;
89 x->i = -WORD(1);
90 x->j = 0;
91 x->next = NULL;
92 HEAP_ASSIGN(heap[1], exp2[0], x);
93
94 /* precompute leading cofficient info */
95 fmpz_abs(lc_abs_lg, poly3 + 0);
96 if (small)
97 {
98 lc_abs = FLINT_ABS(poly3[0]);
99 lc_sign = FLINT_SIGN_EXT(poly3[0]);
100 count_leading_zeros(lc_norm, lc_abs);
101 lc_n = lc_abs << lc_norm;
102 invert_limb(lc_i, lc_n);
103 }
104
105 while (heap_len > 1)
106 {
107 /* make sure quotient array has space for q_len + 1 entries */
108 _fmpz_mpoly_fit_length(&q_coeff, &q_exp, allocq, q_len + 1, 1);
109 if (q_len + 1 > qs_alloc)
110 {
111 slong len;
112 len = FLINT_MAX(q_len + 1, 2*qs_alloc);
113 qs = (fmpz *) flint_realloc(qs, len*sizeof(fmpz));
114 if (len > qs_alloc)
115 flint_mpn_zero((mp_ptr) (qs + qs_alloc), len - qs_alloc);
116 qs_alloc = len;
117 }
118 /* make sure remainder array has space for r_len + 1 entries */
119 _fmpz_mpoly_fit_length(&r_coeff, &r_exp, allocr, r_len + 1, 1);
120 if (r_len + 1 > rs_alloc)
121 {
122 slong len;
123 len = FLINT_MAX(r_len + 1, 2*rs_alloc);
124 rs = (fmpz *) flint_realloc(rs, len*sizeof(fmpz));
125 if (len > rs_alloc)
126 flint_mpn_zero((mp_ptr) (rs + rs_alloc), len - rs_alloc);
127 rs_alloc = len;
128 }
129
130 exp = heap[1].exp;
131 if (mpoly_monomial_overflows1(exp, mask))
132 goto exp_overflow;
133
134 lt_divides = mpoly_monomial_divides1(q_exp + q_len, exp, exp3[0], mask);
135
136 /* accumulate terms from highest terms on heap */
137 if (small)
138 acc_sm[0] = acc_sm[1] = acc_sm[2] = 0;
139 else
140 fmpz_zero(acc_lg);
141
142 while (heap_len > 1 && heap[1].exp == exp)
143 {
144 x = _mpoly_heap_pop1(heap, &heap_len, maskhi);
145 do
146 {
147 *store++ = x->i;
148 *store++ = x->j;
149 if (x->i != -WORD(1))
150 hind[x->i] |= WORD(1);
151
152 if (small)
153 {
154 if (x->i == -WORD(1))
155 _fmpz_mpoly_add_uiuiui_fmpz(acc_sm, poly2 + x->j);
156 else
157 _fmpz_mpoly_submul_uiuiui_fmpz(acc_sm, poly3[x->i], q_coeff[x->j]);
158 } else if (scaleis1)
159 {
160 if (x->i == -WORD(1))
161 fmpz_add(acc_lg, acc_lg, poly2 + x->j);
162 else
163 fmpz_submul(acc_lg, poly3 + x->i, q_coeff + x->j);
164 } else
165 {
166 if (x->i == -WORD(1))
167 fmpz_addmul(acc_lg, scale, poly2 + x->j);
168 else
169 {
170 fmpz_divexact(tp, scale, qs + x->j);
171 fmpz_mul(tp, tp, q_coeff + x->j);
172 fmpz_submul(acc_lg, poly3 + x->i, tp);
173 }
174 }
175 } while ((x = x->next) != NULL);
176 }
177
178 /* process popped nodes */
179 while (store > store_base)
180 {
181 j = *--store;
182 i = *--store;
183 if (i == -WORD(1))
184 {
185 if (j + 1 < len2)
186 {
187 x = chain + 0;
188 x->i = i;
189 x->j = j + 1;
190 x->next = NULL;
191 _mpoly_heap_insert1(heap, exp2[x->j], x,
192 &next_loc, &heap_len, maskhi);
193 }
194 } else
195 {
196 if ( (i + 1 < len3)
197 && (hind[i + 1] == 2*j + 1)
198 )
199 {
200 x = chain + i + 1;
201 x->i = i + 1;
202 x->j = j;
203 x->next = NULL;
204 hind[x->i] = 2*(x->j + 1) + 0;
205 _mpoly_heap_insert1(heap, exp3[x->i] + q_exp[x->j], x,
206 &next_loc, &heap_len, maskhi);
207 }
208 if (j + 1 == q_len)
209 {
210 s++;
211 } else if ( ((hind[i] & 1) == 1)
212 && ((i == 1) || (hind[i - 1] >= 2*(j + 2) + 1))
213 )
214 {
215 x = chain + i;
216 x->i = i;
217 x->j = j + 1;
218 x->next = NULL;
219 hind[x->i] = 2*(x->j + 1) + 0;
220 _mpoly_heap_insert1(heap, exp3[x->i] + q_exp[x->j], x,
221 &next_loc, &heap_len, maskhi);
222 }
223 }
224 }
225
226 /* try to divide accumulated term by leading term of divisor */
227 if (small)
228 {
229 ulong d0, d1, ds = acc_sm[2];
230 sub_ddmmss(d1, d0, acc_sm[1]^ds, acc_sm[0]^ds, ds, ds);
231
232 if ((acc_sm[0] | acc_sm[1] | acc_sm[2]) == 0)
233 continue;
234
235 if (!lt_divides)
236 {
237 fmpz_set_signed_uiuiui(r_coeff + r_len, acc_sm[2], acc_sm[1], acc_sm[0]);
238 r_exp[r_len] = exp;
239 fmpz_one(rs + r_len);
240 r_len++;
241 continue;
242 }
243 if (ds == FLINT_SIGN_EXT(acc_sm[1]) && d1 < lc_abs)
244 {
245 ulong qq, rr, nhi, nlo;
246 FLINT_ASSERT(0 < lc_norm && lc_norm < FLINT_BITS);
247 nhi = (d1 << lc_norm) | (d0 >> (FLINT_BITS - lc_norm));
248 nlo = d0 << lc_norm;
249 udiv_qrnnd_preinv(qq, rr, nhi, nlo, lc_n, lc_i);
250 if (rr == 0 && qq <= COEFF_MAX)
251 {
252 _fmpz_demote(q_coeff + q_len);
253 q_coeff[q_len] = qq;
254 if (ds != lc_sign)
255 q_coeff[q_len] = -q_coeff[q_len];
256 fmpz_one(qs + q_len);
257 }
258 else
259 {
260 small = 0;
261 fmpz_set_signed_uiuiui(acc_lg, acc_sm[2], acc_sm[1], acc_sm[0]);
262 goto large_lt_divides;
263 }
264 }
265 else
266 {
267 small = 0;
268 fmpz_set_signed_uiuiui(acc_lg, acc_sm[2], acc_sm[1], acc_sm[0]);
269 goto large_lt_divides;
270 }
271 }
272 else
273 {
274 if (fmpz_is_zero(acc_lg))
275 continue;
276
277 if (!lt_divides)
278 {
279 fmpz_set(r_coeff + r_len, acc_lg);
280 r_exp[r_len] = exp;
281 fmpz_set(rs + r_len, scale);
282 r_len++;
283 continue;
284 }
285 large_lt_divides:
286 fmpz_gcd(gcd, acc_lg, poly3 + 0);
287 fmpz_divexact(ns, lc_abs_lg, gcd);
288 if (!fmpz_is_one(ns))
289 scaleis1 = 0;
290 fmpz_mul(q_coeff + q_len, ns, acc_lg);
291 fmpz_divexact(q_coeff + q_len, q_coeff + q_len, poly3 + 0);
292 fmpz_mul(qs + q_len, scale, ns);
293 fmpz_set(scale, qs + q_len);
294 }
295
296 if (s > 1)
297 {
298 i = 1;
299 x = chain + i;
300 x->i = i;
301 x->j = q_len;
302 x->next = NULL;
303 hind[x->i] = 2*(x->j + 1) + 0;
304 _mpoly_heap_insert1(heap, exp3[x->i] + q_exp[x->j], x,
305 &next_loc, &heap_len, maskhi);
306 }
307 q_len++;
308 s = 1;
309 }
310
311 cleanup2:
312
313 (*polyq) = q_coeff;
314 (*expq) = q_exp;
315 (*polyr) = r_coeff;
316 (*expr) = r_exp;
317 (*lenr) = r_len;
318
319 TMP_END;
320
321 for (i = 0; i < q_len; i++)
322 {
323 fmpz_divexact(tp, scale, qs + i);
324 fmpz_mul(q_coeff + i, q_coeff + i, tp);
325 }
326 for (i = 0; i < qs_alloc; i++)
327 fmpz_clear(qs + i);
328
329 for (i = 0; i < r_len; i++)
330 {
331 fmpz_divexact(tp, scale, rs + i);
332 fmpz_mul(r_coeff + i, r_coeff + i, tp);
333 }
334 for (i = 0; i < rs_alloc; i++)
335 fmpz_clear(rs + i);
336
337 flint_free(qs);
338 flint_free(rs);
339
340 fmpz_clear(lc_abs_lg);
341 fmpz_clear(acc_lg);
342 fmpz_clear(r);
343 fmpz_clear(tp);
344 fmpz_clear(ns);
345 fmpz_clear(gcd);
346
347 return q_len;
348
349 exp_overflow:
350 for (i = 0; i < q_len; i++)
351 _fmpz_demote(q_coeff + i);
352 for (i = 0; i < r_len; i++)
353 _fmpz_demote(r_coeff + i);
354 q_len = 0;
355 r_len = 0;
356 goto cleanup2;
357 }
358
359
_fmpz_mpoly_quasidivrem_heap(fmpz_t scale,slong * lenr,fmpz ** polyq,ulong ** expq,slong * allocq,fmpz ** polyr,ulong ** expr,slong * allocr,const fmpz * poly2,const ulong * exp2,slong len2,const fmpz * poly3,const ulong * exp3,slong len3,slong bits,slong N,const ulong * cmpmask)360 slong _fmpz_mpoly_quasidivrem_heap(fmpz_t scale, slong * lenr,
361 fmpz ** polyq, ulong ** expq, slong * allocq, fmpz ** polyr,
362 ulong ** expr, slong * allocr, const fmpz * poly2,
363 const ulong * exp2, slong len2, const fmpz * poly3, const ulong * exp3,
364 slong len3, slong bits, slong N, const ulong * cmpmask)
365 {
366 slong i, j, s = len3;
367 slong q_len = 0, r_len = 0;
368 slong next_loc;
369 slong heap_len = 2; /* heap zero index unused */
370 mpoly_heap_s * heap;
371 mpoly_heap_t * chain;
372 slong * store, * store_base;
373 mpoly_heap_t * x;
374 slong * hind;
375 fmpz * q_coeff = *polyq;
376 fmpz * r_coeff = *polyr;
377 ulong * q_exp = *expq;
378 ulong * r_exp = *expr;
379 ulong * exp, * exps;
380 ulong ** exp_list;
381 ulong acc_sm[3]; /* for accumulating coefficients */
382 slong exp_next;
383 ulong mask;
384 ulong lc_norm = 0, lc_abs = 0, lc_sign = 0, lc_n = 0, lc_i = 0;
385 fmpz_t lc_abs_lg, ns, gcd, acc_lg, r, tp;
386 slong bits2, bits3;
387 int lt_divides, scaleis1, small;
388 fmpz * qs, * rs;
389 slong qs_alloc, rs_alloc;
390
391 TMP_INIT;
392
393 if (N == 1)
394 return _fmpz_mpoly_quasidivrem_heap1(scale, lenr, polyq, expq,
395 allocq, polyr, expr, allocr, poly2, exp2, len2,
396 poly3, exp3, len3, bits, cmpmask[0]);
397
398 TMP_START;
399
400 fmpz_init(lc_abs_lg);
401 fmpz_init(acc_lg);
402 fmpz_init(r);
403 fmpz_init(tp);
404 fmpz_init(ns);
405 fmpz_init(gcd);
406 fmpz_set_ui(scale, 1);
407 scaleis1 = 1;
408
409 qs_alloc = 64;
410 qs = (fmpz *) flint_calloc(qs_alloc, sizeof(fmpz));
411 rs_alloc = 64;
412 rs = (fmpz *) flint_calloc(rs_alloc, sizeof(fmpz));
413
414 /* whether intermediate computations q - a*b will fit in three words */
415 bits2 = _fmpz_vec_max_bits(poly2, len2);
416 bits3 = _fmpz_vec_max_bits(poly3, len3);
417 /* allow one bit for sign, one bit for subtraction */
418 small = FLINT_ABS(bits2) <= (FLINT_ABS(bits3) + FLINT_BIT_COUNT(len3) +
419 FLINT_BITS - 2) && FLINT_ABS(bits3) <= FLINT_BITS - 2;
420
421 next_loc = len3 + 4; /* something bigger than heap can ever be */
422 heap = (mpoly_heap_s *) TMP_ALLOC((len3 + 1)*sizeof(mpoly_heap_s));
423 chain = (mpoly_heap_t *) TMP_ALLOC(len3*sizeof(mpoly_heap_t));
424 store = store_base = (slong *) TMP_ALLOC(2*len3*sizeof(mpoly_heap_t *));
425
426 /* array of exponents of N words each */
427 exps = (ulong *) TMP_ALLOC(len3*N*sizeof(ulong));
428 /* array of pointers to unused exponent vectors */
429 exp_list = (ulong **) TMP_ALLOC(len3*sizeof(ulong *));
430 /* space to save copy of current exponent vector */
431 exp = (ulong *) TMP_ALLOC(N*sizeof(ulong));
432
433 /* set up list of available exponent vectors */
434 exp_next = 0;
435 for (i = 0; i < len3; i++)
436 exp_list[i] = exps + i*N;
437
438 /* space for flagged heap indicies */
439 hind = (slong *) TMP_ALLOC(len3*sizeof(slong));
440 for (i = 0; i < len3; i++)
441 hind[i] = 1;
442
443 /* mask with high bit set in each field of exponent vector */
444 mask = 0;
445 for (i = 0; i < FLINT_BITS/bits; i++)
446 mask = (mask << bits) + (UWORD(1) << (bits - 1));
447
448 /* insert (-1, 0, exp2[0]) into heap */
449 x = chain + 0;
450 x->i = -WORD(1);
451 x->j = 0;
452 x->next = NULL;
453
454 heap[1].next = x;
455 heap[1].exp = exp_list[exp_next++];
456
457 mpoly_monomial_set(heap[1].exp, exp2, N);
458
459 /* precompute leading cofficient info */
460 fmpz_abs(lc_abs_lg, poly3 + 0);
461 if (small)
462 {
463 lc_abs = FLINT_ABS(poly3[0]);
464 lc_sign = FLINT_SIGN_EXT(poly3[0]);
465 count_leading_zeros(lc_norm, lc_abs);
466 lc_n = lc_abs << lc_norm;
467 invert_limb(lc_i, lc_n);
468 }
469
470 while (heap_len > 1)
471 {
472 /* make sure quotient array has space for q_len + 1 entries */
473 _fmpz_mpoly_fit_length(&q_coeff, &q_exp, allocq, q_len + 1, N);
474 if (q_len + 1 > qs_alloc)
475 {
476 slong len;
477 len = FLINT_MAX(q_len + 1, 2*qs_alloc);
478 qs = (fmpz *) flint_realloc(qs, len*sizeof(fmpz));
479 if (len > qs_alloc)
480 flint_mpn_zero((mp_ptr) (qs + qs_alloc), len - qs_alloc);
481 qs_alloc = len;
482 }
483 /* make sure remainder array has space for r_len + 1 entries */
484 _fmpz_mpoly_fit_length(&r_coeff, &r_exp, allocr, r_len + 1, N);
485 if (r_len + 1 > rs_alloc)
486 {
487 slong len;
488 len = FLINT_MAX(r_len + 1, 2*rs_alloc);
489 rs = (fmpz *) flint_realloc(rs, len*sizeof(fmpz));
490 if (len > rs_alloc)
491 flint_mpn_zero((mp_ptr) (rs + rs_alloc), len - rs_alloc);
492 rs_alloc = len;
493 }
494
495 mpoly_monomial_set(exp, heap[1].exp, N);
496 if (bits <= FLINT_BITS)
497 {
498 if (mpoly_monomial_overflows(exp, N, mask))
499 goto exp_overflow;
500
501 lt_divides = mpoly_monomial_divides(q_exp + q_len*N, exp, exp3, N, mask);
502 }
503 else
504 {
505 if (mpoly_monomial_overflows_mp(exp, N, bits))
506 goto exp_overflow;
507
508 lt_divides = mpoly_monomial_divides_mp(q_exp + q_len*N, exp, exp3, N, bits);
509 }
510
511 /* accumulate terms from highest terms on heap */
512 if (small)
513 acc_sm[0] = acc_sm[1] = acc_sm[2] = 0;
514 else
515 fmpz_zero(acc_lg);
516
517 while (heap_len > 1 && mpoly_monomial_equal(heap[1].exp, exp, N))
518 {
519 exp_list[--exp_next] = heap[1].exp;
520 x = _mpoly_heap_pop(heap, &heap_len, N, cmpmask);
521 do
522 {
523 *store++ = x->i;
524 *store++ = x->j;
525 if (x->i != -WORD(1))
526 hind[x->i] |= WORD(1);
527
528 if (small)
529 {
530 if (x->i == -WORD(1))
531 _fmpz_mpoly_add_uiuiui_fmpz(acc_sm, poly2 + x->j);
532 else
533 _fmpz_mpoly_submul_uiuiui_fmpz(acc_sm, poly3[x->i], q_coeff[x->j]);
534 } else if (scaleis1)
535 {
536 if (x->i == -WORD(1))
537 fmpz_add(acc_lg, acc_lg, poly2 + x->j);
538 else
539 fmpz_submul(acc_lg, poly3 + x->i, q_coeff + x->j);
540 } else
541 {
542 if (x->i == -WORD(1))
543 fmpz_addmul(acc_lg, scale, poly2 + x->j);
544 else
545 {
546 fmpz_divexact(tp, scale, qs + x->j);
547 fmpz_mul(tp, tp, q_coeff + x->j);
548 fmpz_submul(acc_lg, poly3 + x->i, tp);
549 }
550 }
551 } while ((x = x->next) != NULL);
552 }
553
554 /* process popped nodes */
555 while (store > store_base)
556 {
557 j = *--store;
558 i = *--store;
559 if (i == -WORD(1))
560 {
561 if (j + 1 < len2)
562 {
563 x = chain + 0;
564 x->i = i;
565 x->j = j + 1;
566 x->next = NULL;
567 mpoly_monomial_set(exp_list[exp_next], exp2 + x->j*N, N);
568 exp_next += _mpoly_heap_insert(heap, exp_list[exp_next], x,
569 &next_loc, &heap_len, N, cmpmask);
570 }
571 } else
572 {
573 if ( (i + 1 < len3)
574 && (hind[i + 1] == 2*j + 1)
575 )
576 {
577 x = chain + i + 1;
578 x->i = i + 1;
579 x->j = j;
580 x->next = NULL;
581 hind[x->i] = 2*(x->j + 1) + 0;
582
583 if (bits <= FLINT_BITS)
584 mpoly_monomial_add(exp_list[exp_next], exp3 + x->i*N,
585 q_exp + x->j*N, N);
586 else
587 mpoly_monomial_add_mp(exp_list[exp_next], exp3 + x->i*N,
588 q_exp + x->j*N, N);
589
590 exp_next += _mpoly_heap_insert(heap, exp_list[exp_next], x,
591 &next_loc, &heap_len, N, cmpmask);
592 }
593 if (j + 1 == q_len)
594 {
595 s++;
596 } else if ( ((hind[i] & 1) == 1)
597 && ((i == 1) || (hind[i - 1] >= 2*(j + 2) + 1))
598 )
599 {
600 x = chain + i;
601 x->i = i;
602 x->j = j + 1;
603 x->next = NULL;
604 hind[x->i] = 2*(x->j + 1) + 0;
605
606 if (bits <= FLINT_BITS)
607 mpoly_monomial_add(exp_list[exp_next], exp3 + x->i*N,
608 q_exp + x->j*N, N);
609 else
610 mpoly_monomial_add_mp(exp_list[exp_next], exp3 + x->i*N,
611 q_exp + x->j*N, N);
612
613 exp_next += _mpoly_heap_insert(heap, exp_list[exp_next], x,
614 &next_loc, &heap_len, N, cmpmask);
615 }
616 }
617 }
618
619 /* try to divide accumulated term by leading term of divisor */
620 if (small)
621 {
622 ulong d0, d1, ds = acc_sm[2];
623
624 sub_ddmmss(d1, d0, acc_sm[1]^ds, acc_sm[0]^ds, ds, ds);
625
626 if ((acc_sm[0] | acc_sm[1] | acc_sm[2]) == 0)
627 continue;
628
629 if (!lt_divides)
630 {
631 fmpz_set_signed_uiuiui(r_coeff + r_len, acc_sm[2], acc_sm[1], acc_sm[0]);
632 mpoly_monomial_set(r_exp + r_len*N, exp, N);
633 fmpz_one(rs + r_len);
634 r_len++;
635 continue;
636 }
637 if (ds == FLINT_SIGN_EXT(acc_sm[1]) && d1 < lc_abs)
638 {
639 ulong qq, rr, nhi, nlo;
640 FLINT_ASSERT(0 < lc_norm && lc_norm < FLINT_BITS);
641 nhi = (d1 << lc_norm) | (d0 >> (FLINT_BITS - lc_norm));
642 nlo = d0 << lc_norm;
643 udiv_qrnnd_preinv(qq, rr, nhi, nlo, lc_n, lc_i);
644 if (rr == 0 && qq <= COEFF_MAX)
645 {
646 _fmpz_demote(q_coeff + q_len);
647 q_coeff[q_len] = qq;
648 if (ds != lc_sign)
649 q_coeff[q_len] = -q_coeff[q_len];
650 fmpz_one(qs + q_len);
651 }
652 else
653 {
654 small = 0;
655 fmpz_set_signed_uiuiui(acc_lg, acc_sm[2], acc_sm[1], acc_sm[0]);
656 goto large_lt_divides;
657 }
658 }
659 else
660 {
661 small = 0;
662 fmpz_set_signed_uiuiui(acc_lg, acc_sm[2], acc_sm[1], acc_sm[0]);
663 goto large_lt_divides;
664 }
665 }
666 else
667 {
668 if (fmpz_is_zero(acc_lg))
669 continue;
670
671 if (!lt_divides)
672 {
673 fmpz_set(r_coeff + r_len, acc_lg);
674 mpoly_monomial_set(r_exp + r_len*N, exp, N);
675 fmpz_set(rs + r_len, scale);
676 r_len++;
677 continue;
678 }
679 large_lt_divides:
680 fmpz_gcd(gcd, acc_lg, poly3 + 0);
681 fmpz_divexact(ns, lc_abs_lg, gcd);
682 if (!fmpz_is_one(ns))
683 scaleis1 = 0;
684 fmpz_mul(q_coeff + q_len, ns, acc_lg);
685 fmpz_divexact(q_coeff + q_len, q_coeff + q_len, poly3 + 0);
686 fmpz_mul(qs + q_len, scale, ns);
687 fmpz_set(scale, qs + q_len);
688 }
689
690 if (s > 1)
691 {
692 i = 1;
693 x = chain + i;
694 x->i = i;
695 x->j = q_len;
696 x->next = NULL;
697 hind[x->i] = 2*(x->j + 1) + 0;
698
699 if (bits <= FLINT_BITS)
700 mpoly_monomial_add(exp_list[exp_next], exp3 + x->i*N,
701 q_exp + x->j*N, N);
702 else
703 mpoly_monomial_add_mp(exp_list[exp_next], exp3 + x->i*N,
704 q_exp + x->j*N, N);
705
706 exp_next += _mpoly_heap_insert(heap, exp_list[exp_next], x,
707 &next_loc, &heap_len, N, cmpmask);
708 }
709 q_len++;
710 s = 1;
711 }
712
713 cleanup2:
714
715 (*polyq) = q_coeff;
716 (*expq) = q_exp;
717 (*polyr) = r_coeff;
718 (*expr) = r_exp;
719 (*lenr) = r_len;
720
721 TMP_END;
722
723 for (i = 0; i < q_len; i++)
724 {
725 fmpz_divexact(tp, scale, qs + i);
726 fmpz_mul(q_coeff + i, q_coeff + i, tp);
727 }
728 for (i = 0; i < qs_alloc; i++)
729 fmpz_clear(qs + i);
730
731 for (i = 0; i < r_len; i++)
732 {
733 fmpz_divexact(tp, scale, rs + i);
734 fmpz_mul(r_coeff + i, r_coeff + i, tp);
735 }
736 for (i = 0; i < rs_alloc; i++)
737 fmpz_clear(rs + i);
738
739 flint_free(qs);
740 flint_free(rs);
741
742 fmpz_clear(lc_abs_lg);
743 fmpz_clear(acc_lg);
744 fmpz_clear(r);
745 fmpz_clear(tp);
746 fmpz_clear(ns);
747 fmpz_clear(gcd);
748
749 return q_len;
750
751 exp_overflow:
752 for (i = 0; i < q_len; i++)
753 _fmpz_demote(q_coeff + i);
754 for (i = 0; i < r_len; i++)
755 _fmpz_demote(r_coeff + i);
756 q_len = 0;
757 r_len = 0;
758 goto cleanup2;
759 }
760
fmpz_mpoly_quasidivrem_heap(fmpz_t scale,fmpz_mpoly_t q,fmpz_mpoly_t r,const fmpz_mpoly_t poly2,const fmpz_mpoly_t poly3,const fmpz_mpoly_ctx_t ctx)761 void fmpz_mpoly_quasidivrem_heap(fmpz_t scale, fmpz_mpoly_t q, fmpz_mpoly_t r,
762 const fmpz_mpoly_t poly2, const fmpz_mpoly_t poly3,
763 const fmpz_mpoly_ctx_t ctx)
764 {
765 slong exp_bits, N, lenq = 0, lenr = 0;
766 ulong * exp2 = poly2->exps, * exp3 = poly3->exps;
767 ulong * cmpmask;
768 int free2 = 0, free3 = 0;
769 fmpz_mpoly_t temp1, temp2;
770 fmpz_mpoly_struct * tq, * tr;
771 TMP_INIT;
772
773 /* check divisor is nonzero */
774 if (poly3->length == 0)
775 flint_throw(FLINT_DIVZERO, "Divide by zero in fmpz_mpoly_quasidivrem_heap");
776
777 fmpz_set_ui(scale, 1);
778
779 /* dividend zero, write out quotient and remainder */
780 if (poly2->length == 0)
781 {
782 fmpz_mpoly_zero(q, ctx);
783 fmpz_mpoly_zero(r, ctx);
784 return;
785 }
786
787 TMP_START;
788 exp_bits = FLINT_MAX(poly2->bits, poly3->bits);
789 exp_bits = mpoly_fix_bits(exp_bits, ctx->minfo);
790
791 N = mpoly_words_per_exp(exp_bits, ctx->minfo);
792 cmpmask = (ulong*) TMP_ALLOC(N*sizeof(ulong));
793 mpoly_get_cmpmask(cmpmask, N, exp_bits, ctx->minfo);
794
795 /* ensure input exponents packed to same size as output exponents */
796 if (exp_bits > poly2->bits)
797 {
798 free2 = 1;
799 exp2 = (ulong *) flint_malloc(N*poly2->length*sizeof(ulong));
800 mpoly_repack_monomials(exp2, exp_bits, poly2->exps, poly2->bits,
801 poly2->length, ctx->minfo);
802 }
803
804 if (exp_bits > poly3->bits)
805 {
806 free3 = 1;
807 exp3 = (ulong *) flint_malloc(N*poly3->length*sizeof(ulong));
808 mpoly_repack_monomials(exp3, exp_bits, poly3->exps, poly3->bits,
809 poly3->length, ctx->minfo);
810 }
811
812 /* check divisor leading monomial is at most that of the dividend */
813 if (mpoly_monomial_lt(exp2, exp3, N, cmpmask))
814 {
815 fmpz_mpoly_set(r, poly2, ctx);
816 fmpz_mpoly_zero(q, ctx);
817 goto cleanup3;
818 }
819
820 /* take care of aliasing */
821 if (q == poly2 || q == poly3)
822 {
823 fmpz_mpoly_init2(temp1, poly2->length/poly3->length + 1, ctx);
824 fmpz_mpoly_fit_bits(temp1, exp_bits, ctx);
825 temp1->bits = exp_bits;
826
827 tq = temp1;
828 } else
829 {
830 fmpz_mpoly_fit_length(q, poly2->length/poly3->length + 1, ctx);
831 fmpz_mpoly_fit_bits(q, exp_bits, ctx);
832 q->bits = exp_bits;
833
834 tq = q;
835 }
836
837 if (r == poly2 || r == poly3)
838 {
839 fmpz_mpoly_init2(temp2, poly3->length, ctx);
840 fmpz_mpoly_fit_bits(temp2, exp_bits, ctx);
841 temp2->bits = exp_bits;
842
843 tr = temp2;
844 } else
845 {
846 fmpz_mpoly_fit_length(r, poly3->length, ctx);
847 fmpz_mpoly_fit_bits(r, exp_bits, ctx);
848 r->bits = exp_bits;
849
850 tr = r;
851 }
852
853 /* do division with remainder */
854 while ((lenq = _fmpz_mpoly_quasidivrem_heap(scale, &lenr, &tq->coeffs, &tq->exps,
855 &tq->alloc, &tr->coeffs, &tr->exps, &tr->alloc, poly2->coeffs, exp2,
856 poly2->length, poly3->coeffs, exp3, poly3->length, exp_bits,
857 N, cmpmask)) == 0
858 && lenr == 0)
859 {
860 ulong * old_exp2 = exp2, * old_exp3 = exp3;
861 slong old_exp_bits = exp_bits;
862
863 exp_bits = mpoly_fix_bits(exp_bits + 1, ctx->minfo);
864
865 N = mpoly_words_per_exp(exp_bits, ctx->minfo);
866 mpoly_get_cmpmask(cmpmask, N, exp_bits, ctx->minfo);
867
868 exp2 = (ulong *) flint_malloc(N*poly2->length*sizeof(ulong));
869 mpoly_repack_monomials(exp2, exp_bits, old_exp2, old_exp_bits,
870 poly2->length, ctx->minfo);
871
872 exp3 = (ulong *) flint_malloc(N*poly3->length*sizeof(ulong));
873 mpoly_repack_monomials(exp3, exp_bits, old_exp3, old_exp_bits,
874 poly3->length, ctx->minfo);
875
876 if (free2)
877 flint_free(old_exp2);
878
879 if (free3)
880 flint_free(old_exp3);
881
882 free2 = free3 = 1;
883
884 fmpz_mpoly_fit_bits(tq, exp_bits, ctx);
885 tq->bits = exp_bits;
886
887 fmpz_mpoly_fit_bits(tr, exp_bits, ctx);
888 tr->bits = exp_bits;
889 }
890
891 /* deal with aliasing */
892 if (q == poly2 || q == poly3)
893 {
894 fmpz_mpoly_swap(temp1, q, ctx);
895
896 fmpz_mpoly_clear(temp1, ctx);
897 }
898
899 if (r == poly2 || r == poly3)
900 {
901 fmpz_mpoly_swap(temp2, r, ctx);
902
903 fmpz_mpoly_clear(temp2, ctx);
904 }
905
906 _fmpz_mpoly_set_length(q, lenq, ctx);
907 _fmpz_mpoly_set_length(r, lenr, ctx);
908
909 cleanup3:
910
911 if (free2)
912 flint_free(exp2);
913
914 if (free3)
915 flint_free(exp3);
916
917 TMP_END;
918 }
919