1 /*
2 Copyright (C) 2017 Daniel Schultz
3
4 This file is part of FLINT.
5
6 FLINT is free software: you can redistribute it and/or modify it under
7 the terms of the GNU Lesser General Public License (LGPL) as published
8 by the Free Software Foundation; either version 2.1 of the License, or
9 (at your option) any later version. See <http://www.gnu.org/licenses/>.
10 */
11
12 #include <gmp.h>
13 #include <stdlib.h>
14 #include "flint.h"
15 #include "fmpz.h"
16 #include "fmpz_mpoly.h"
17 #include "longlong.h"
18
19 /*
20 Set polyq, polyr to the quotient and remainder of poly2 by poly3 (with
21 remainder coeffs reduced modulo the leading coeff of poly3), and return
22 the length of the quotient. This version of the function assumes the
23 exponent vectors all fit in a single word. The exponent vectors are
24 assumed to have fields with the given number of bits. Assumes input polys
25 are nonzero. Implements "Polynomial division using dynamic arrays, heaps
26 and packed exponents" by Michael Monagan and Roman Pearce [1], except that
27 we use a heap with smallest exponent at head. Note that if a < b then
28 (n - b) < (n - b) where n is the maximum value a and b can take. The word
29 "maxn" is set to an exponent vector whose fields are all set to such a
30 value n. This allows division from left to right with a heap with smallest
31 exponent at the head. Quotient and remainder polys are written in reverse
32 order.
33 [1] http://www.cecm.sfu.ca/~rpearcea/sdmp/sdmp_paper.pdf
34 */
35
_fmpz_mpoly_divrem_monagan_pearce1(slong * lenr,fmpz ** polyq,ulong ** expq,slong * allocq,fmpz ** polyr,ulong ** expr,slong * allocr,const fmpz * poly2,const ulong * exp2,slong len2,const fmpz * poly3,const ulong * exp3,slong len3,slong bits,ulong maskhi)36 slong _fmpz_mpoly_divrem_monagan_pearce1(slong * lenr,
37 fmpz ** polyq, ulong ** expq, slong * allocq, fmpz ** polyr,
38 ulong ** expr, slong * allocr, const fmpz * poly2, const ulong * exp2,
39 slong len2, const fmpz * poly3, const ulong * exp3, slong len3,
40 slong bits, ulong maskhi)
41 {
42 slong i, j, q_len, r_len, s;
43 slong next_loc, heap_len = 2;
44 mpoly_heap1_s * heap;
45 mpoly_heap_t * chain;
46 slong * store, * store_base;
47 mpoly_heap_t * x;
48 fmpz * q_coeff = *polyq;
49 fmpz * r_coeff = *polyr;
50 ulong * q_exp = *expq;
51 ulong * r_exp = *expr;
52 slong * hind;
53 ulong mask, exp;
54 fmpz_t r, acc_lg;
55 ulong acc_sm[3];
56 int lt_divides, small;
57 slong bits2, bits3;
58 ulong lc_norm = 0, lc_abs = 0, lc_sign = 0, lc_n = 0, lc_i = 0;
59 TMP_INIT;
60
61 TMP_START;
62
63 fmpz_init(acc_lg);
64 fmpz_init(r);
65
66 /* whether intermediate computations q - a*b will fit in three words */
67 bits2 = _fmpz_vec_max_bits(poly2, len2);
68 bits3 = _fmpz_vec_max_bits(poly3, len3);
69 /* allow one bit for sign, one bit for subtraction */
70 small = FLINT_ABS(bits2) <= (FLINT_ABS(bits3) + FLINT_BIT_COUNT(len3) + FLINT_BITS - 2)
71 && FLINT_ABS(bits3) <= FLINT_BITS - 2;
72
73 /* alloc array of heap nodes which can be chained together */
74 next_loc = len3 + 4; /* something bigger than heap can ever be */
75 heap = (mpoly_heap1_s *) TMP_ALLOC((len3 + 1)*sizeof(mpoly_heap1_s));
76 chain = (mpoly_heap_t *) TMP_ALLOC(len3*sizeof(mpoly_heap_t));
77 store = store_base = (slong *) TMP_ALLOC(2*len3*sizeof(mpoly_heap_t *));
78
79 /* space for flagged heap indicies */
80 hind = (slong *) TMP_ALLOC(len3*sizeof(slong));
81 for (i = 0; i < len3; i++)
82 hind[i] = 1;
83
84 /* mask with high bit set in each field of exponent vector */
85 mask = 0;
86 for (i = 0; i < FLINT_BITS/bits; i++)
87 mask = (mask << bits) + (UWORD(1) << (bits - 1));
88
89 /* quotient and remainder poly indices start at -1 */
90 q_len = WORD(0);
91 r_len = WORD(0);
92
93 /* s is the number of terms * (latest quotient) we should put into heap */
94 s = len3;
95
96 /* insert (-1, 0, exp2[0]) into heap */
97 x = chain + 0;
98 x->i = -WORD(1);
99 x->j = 0;
100 x->next = NULL;
101 HEAP_ASSIGN(heap[1], exp2[0], x);
102
103 /* precompute leading cofficient info in "small" case */
104 if (small)
105 {
106 lc_abs = FLINT_ABS(poly3[0]);
107 lc_sign = FLINT_SIGN_EXT(poly3[0]);
108 count_leading_zeros(lc_norm, lc_abs);
109 lc_n = lc_abs << lc_norm;
110 invert_limb(lc_i, lc_n);
111 }
112
113 while (heap_len > 1)
114 {
115 exp = heap[1].exp;
116
117 if (mpoly_monomial_overflows1(exp, mask))
118 goto exp_overflow;
119
120 _fmpz_mpoly_fit_length(&q_coeff, &q_exp, allocq, q_len + 1, 1);
121 lt_divides = mpoly_monomial_divides1(q_exp + q_len, exp, exp3[0], mask);
122
123 /* take nodes from heap with exponent matching exp */
124 if (small)
125 {
126 acc_sm[0] = acc_sm[1] = acc_sm[2] = 0;
127 do
128 {
129 x = _mpoly_heap_pop1(heap, &heap_len, maskhi);
130 do
131 {
132 *store++ = x->i;
133 *store++ = x->j;
134 if (x->i != -WORD(1))
135 hind[x->i] |= WORD(1);
136
137 if (x->i == -WORD(1))
138 _fmpz_mpoly_add_uiuiui_fmpz(acc_sm, poly2 + x->j);
139 else
140 _fmpz_mpoly_submul_uiuiui_fmpz(acc_sm, poly3[x->i], q_coeff[x->j]);
141 } while ((x = x->next) != NULL);
142 } while (heap_len > 1 && heap[1].exp == exp);
143 } else
144 {
145 fmpz_zero(acc_lg);
146 do
147 {
148 x = _mpoly_heap_pop1(heap, &heap_len, maskhi);
149 do
150 {
151 *store++ = x->i;
152 *store++ = x->j;
153 if (x->i != -WORD(1))
154 hind[x->i] |= WORD(1);
155
156 if (x->i == -WORD(1))
157 fmpz_add(acc_lg, acc_lg, poly2 + x->j);
158 else
159 fmpz_submul(acc_lg, poly3 + x->i, q_coeff + x->j);
160 } while ((x = x->next) != NULL);
161 } while (heap_len > 1 && heap[1].exp == exp);
162 }
163
164 /* process nodes taken from the heap */
165 while (store > store_base)
166 {
167 j = *--store;
168 i = *--store;
169
170 if (i == -WORD(1))
171 {
172 /* take next dividend term */
173 if (j + 1 < len2)
174 {
175 x = chain + 0;
176 x->i = i;
177 x->j = j + 1;
178 x->next = NULL;
179 _mpoly_heap_insert1(heap, exp2[x->j], x,
180 &next_loc, &heap_len, maskhi);
181 }
182 } else
183 {
184 /* should we go right? */
185 if ( (i + 1 < len3)
186 && (hind[i + 1] == 2*j + 1)
187 )
188 {
189 x = chain + i + 1;
190 x->i = i + 1;
191 x->j = j;
192 x->next = NULL;
193 hind[x->i] = 2*(x->j + 1) + 0;
194 _mpoly_heap_insert1(heap, exp3[x->i] + q_exp[x->j], x,
195 &next_loc, &heap_len, maskhi);
196 }
197 /* should we go up? */
198 if (j + 1 == q_len)
199 {
200 s++;
201 } else if ( ((hind[i] & 1) == 1)
202 && ((i == 1) || (hind[i - 1] >= 2*(j + 2) + 1))
203 )
204 {
205 x = chain + i;
206 x->i = i;
207 x->j = j + 1;
208 x->next = NULL;
209 hind[x->i] = 2*(x->j + 1) + 0;
210 _mpoly_heap_insert1(heap, exp3[x->i] + q_exp[x->j], x,
211 &next_loc, &heap_len, maskhi);
212 }
213 }
214 }
215
216 /* try to divide accumulated term by leading term */
217 if (small)
218 {
219 ulong d0, d1, ds = acc_sm[2];
220
221 /* d1:d0 = abs(acc_sm[1:0]) assuming ds is sign extension of acc_sm[1] */
222 sub_ddmmss(d1, d0, acc_sm[1]^ds, acc_sm[0]^ds, ds, ds);
223
224 if ((acc_sm[0] | acc_sm[1] | acc_sm[2]) == 0)
225 {
226 continue;
227 }
228 if (!lt_divides)
229 {
230 _fmpz_mpoly_fit_length(&r_coeff, &r_exp, allocr, r_len + 1, 1);
231 fmpz_set_signed_uiuiui(r_coeff + r_len, acc_sm[2], acc_sm[1], acc_sm[0]);
232 r_exp[r_len] = exp;
233 r_len++;
234 continue;
235 }
236 if (ds == FLINT_SIGN_EXT(acc_sm[1]) && d1 < lc_abs)
237 {
238 ulong qq, rr, nhi, nlo;
239 FLINT_ASSERT(0 < lc_norm && lc_norm < FLINT_BITS);
240 nhi = (d1 << lc_norm) | (d0 >> (FLINT_BITS - lc_norm));
241 nlo = d0 << lc_norm;
242 udiv_qrnnd_preinv(qq, rr, nhi, nlo, lc_n, lc_i);
243 rr = rr >> lc_norm;
244 if (rr != 0)
245 {
246 _fmpz_mpoly_fit_length(&r_coeff, &r_exp, allocr, r_len + 1, 1);
247 if (ds == 0)
248 fmpz_set_si(r_coeff + r_len, rr);
249 else
250 fmpz_neg_ui(r_coeff + r_len, rr);
251 r_exp[r_len] = exp;
252 r_len++;
253 }
254
255 if (qq == 0)
256 continue;
257
258 if (qq <= COEFF_MAX)
259 {
260 _fmpz_demote(q_coeff + q_len);
261 q_coeff[q_len] = qq;
262 if (ds != lc_sign)
263 q_coeff[q_len] = -q_coeff[q_len];
264 }
265 else
266 {
267 small = 0;
268 fmpz_set_ui(q_coeff + q_len, qq);
269 if (ds != lc_sign)
270 fmpz_neg(q_coeff + q_len, q_coeff + q_len);
271 }
272 }
273 else
274 {
275 small = 0;
276 fmpz_set_signed_uiuiui(acc_lg, acc_sm[2], acc_sm[1], acc_sm[0]);
277 goto large_lt_divides;
278 }
279 }
280 else
281 {
282 if (fmpz_is_zero(acc_lg))
283 {
284 continue;
285 }
286 if (!lt_divides)
287 {
288 _fmpz_mpoly_fit_length(&r_coeff, &r_exp, allocr, r_len + 1, 1);
289 fmpz_set(r_coeff + r_len, acc_lg);
290 r_exp[r_len] = exp;
291 r_len++;
292 continue;
293 }
294 large_lt_divides:
295 fmpz_fdiv_qr(q_coeff + q_len, r, acc_lg, poly3 + 0);
296 if (!fmpz_is_zero(r))
297 {
298 _fmpz_mpoly_fit_length(&r_coeff, &r_exp, allocr, r_len + 1, 1);
299 fmpz_set(r_coeff + r_len, r);
300 r_exp[r_len] = exp;
301 r_len++;
302 }
303 if (fmpz_is_zero(q_coeff + q_len))
304 {
305 continue;
306 }
307 }
308
309 /* put newly generated quotient term back into the heap if neccesary */
310 if (s > 1)
311 {
312 i = 1;
313 x = chain + i;
314 x->i = i;
315 x->j = q_len;
316 x->next = NULL;
317 hind[x->i] = 2*(x->j + 1) + 0;
318 _mpoly_heap_insert1(heap, exp3[x->i] + q_exp[x->j], x,
319 &next_loc, &heap_len, maskhi);
320 }
321 s = 1;
322 q_len++;
323 }
324
325 cleanup:
326
327 fmpz_clear(acc_lg);
328 fmpz_clear(r);
329
330 (*polyq) = q_coeff;
331 (*expq) = q_exp;
332 (*polyr) = r_coeff;
333 (*expr) = r_exp;
334
335 /* set remainder poly length */
336 (*lenr) = r_len;
337
338 TMP_END;
339
340 return q_len;
341
342 exp_overflow:
343 for (i = 0; i < q_len; i++)
344 _fmpz_demote(q_coeff + i);
345 for (i = 0; i < r_len; i++)
346 _fmpz_demote(r_coeff + i);
347 q_len = 0;
348 r_len = 0;
349 goto cleanup;
350 }
351
352
353
354
_fmpz_mpoly_divrem_monagan_pearce(slong * lenr,fmpz ** polyq,ulong ** expq,slong * allocq,fmpz ** polyr,ulong ** expr,slong * allocr,const fmpz * poly2,const ulong * exp2,slong len2,const fmpz * poly3,const ulong * exp3,slong len3,slong bits,slong N,const ulong * cmpmask)355 slong _fmpz_mpoly_divrem_monagan_pearce(slong * lenr,
356 fmpz ** polyq, ulong ** expq, slong * allocq, fmpz ** polyr,
357 ulong ** expr, slong * allocr, const fmpz * poly2,
358 const ulong * exp2, slong len2, const fmpz * poly3, const ulong * exp3,
359 slong len3, slong bits, slong N, const ulong * cmpmask)
360 {
361 slong i, j, q_len, r_len, s;
362 slong next_loc;
363 slong heap_len = 2; /* heap zero index unused */
364 mpoly_heap_s * heap;
365 mpoly_heap_t * chain;
366 slong * store, * store_base;
367 mpoly_heap_t * x;
368 fmpz * q_coeff = *polyq;
369 fmpz * r_coeff = *polyr;
370 ulong * q_exp = *expq;
371 ulong * r_exp = *expr;
372 ulong * exp, * exps;
373 ulong ** exp_list;
374 slong exp_next;
375 ulong mask;
376 fmpz_t r, acc_lg;
377 ulong acc_sm[3];
378 slong * hind;
379 int lt_divides, small;
380 slong bits2, bits3;
381 ulong lc_norm = 0, lc_abs = 0, lc_sign = 0, lc_n = 0, lc_i = 0;
382 TMP_INIT;
383
384 if (N == 1)
385 return _fmpz_mpoly_divrem_monagan_pearce1(lenr, polyq, expq, allocq,
386 polyr, expr, allocr, poly2, exp2, len2,
387 poly3, exp3, len3, bits, cmpmask[0]);
388
389 TMP_START;
390
391 fmpz_init(acc_lg);
392 fmpz_init(r);
393
394 /* whether intermediate computations q - a*b will fit in three words */
395 bits2 = _fmpz_vec_max_bits(poly2, len2);
396 bits3 = _fmpz_vec_max_bits(poly3, len3);
397 /* allow one bit for sign, one bit for subtraction */
398 small = FLINT_ABS(bits2) <= (FLINT_ABS(bits3) + FLINT_BIT_COUNT(len3) + FLINT_BITS - 2)
399 && FLINT_ABS(bits3) <= FLINT_BITS - 2;
400
401
402 /* alloc array of heap nodes which can be chained together */
403 next_loc = len3 + 4; /* something bigger than heap can ever be */
404 heap = (mpoly_heap_s *) TMP_ALLOC((len3 + 1)*sizeof(mpoly_heap_s));
405 chain = (mpoly_heap_t *) TMP_ALLOC(len3*sizeof(mpoly_heap_t));
406 store = store_base = (slong *) TMP_ALLOC(2*len3*sizeof(mpoly_heap_t *));
407
408 /* array of exponent vectors, each of "N" words */
409 exps = (ulong *) TMP_ALLOC(len3*N*sizeof(ulong));
410 /* list of pointers to available exponent vectors */
411 exp_list = (ulong **) TMP_ALLOC(len3*sizeof(ulong *));
412 /* space to save copy of current exponent vector */
413 exp = (ulong *) TMP_ALLOC(N*sizeof(ulong));
414 /* set up list of available exponent vectors */
415 exp_next = 0;
416 for (i = 0; i < len3; i++)
417 exp_list[i] = exps + i*N;
418
419 /* space for flagged heap indicies */
420 hind = (slong *) TMP_ALLOC(len3*sizeof(slong));
421 for (i = 0; i < len3; i++)
422 hind[i] = 1;
423
424 /* mask with high bit set in each word of each field of exponent vector */
425 mask = 0;
426 for (i = 0; i < FLINT_BITS/bits; i++)
427 mask = (mask << bits) + (UWORD(1) << (bits - 1));
428
429 /* quotient and remainder poly indices start at -1 */
430 q_len = WORD(0);
431 r_len = WORD(0);
432
433 /* s is the number of terms * (latest quotient) we should put into heap */
434 s = len3;
435
436 /* insert (-1, 0, exp2[0]) into heap */
437 x = chain + 0;
438 x->i = -WORD(1);
439 x->j = 0;
440 x->next = NULL;
441 heap[1].next = x;
442 heap[1].exp = exp_list[exp_next++];
443 mpoly_monomial_set(heap[1].exp, exp2, N);
444
445 /* precompute leading cofficient info in "small" case */
446 if (small)
447 {
448 lc_abs = FLINT_ABS(poly3[0]);
449 lc_sign = FLINT_SIGN_EXT(poly3[0]);
450 count_leading_zeros(lc_norm, lc_abs);
451 lc_n = lc_abs << lc_norm;
452 invert_limb(lc_i, lc_n);
453 }
454
455 while (heap_len > 1)
456 {
457 mpoly_monomial_set(exp, heap[1].exp, N);
458
459 if (bits <= FLINT_BITS)
460 {
461 if (mpoly_monomial_overflows(exp, N, mask))
462 goto exp_overflow2;
463 }
464 else
465 {
466 if (mpoly_monomial_overflows_mp(exp, N, bits))
467 goto exp_overflow2;
468 }
469
470 _fmpz_mpoly_fit_length(&q_coeff, &q_exp, allocq, q_len + 1, N);
471
472 if (bits <= FLINT_BITS)
473 lt_divides = mpoly_monomial_divides(q_exp + q_len*N, exp, exp3, N, mask);
474 else
475 lt_divides = mpoly_monomial_divides_mp(q_exp + q_len*N, exp, exp3, N, bits);
476
477 /* take nodes from heap with exponent matching exp */
478 if (small)
479 {
480 acc_sm[0] = acc_sm[1] = acc_sm[2] = 0;
481 do
482 {
483 exp_list[--exp_next] = heap[1].exp;
484 x = _mpoly_heap_pop(heap, &heap_len, N, cmpmask);
485 do
486 {
487 *store++ = x->i;
488 *store++ = x->j;
489 if (x->i != -WORD(1))
490 hind[x->i] |= WORD(1);
491
492 if (x->i == -WORD(1))
493 _fmpz_mpoly_add_uiuiui_fmpz(acc_sm, poly2 + x->j);
494 else
495 _fmpz_mpoly_submul_uiuiui_fmpz(acc_sm, poly3[x->i], q_coeff[x->j]);
496 } while ((x = x->next) != NULL);
497 } while (heap_len > 1 && mpoly_monomial_equal(heap[1].exp, exp, N));
498 } else
499 {
500 fmpz_zero(acc_lg);
501 do
502 {
503 exp_list[--exp_next] = heap[1].exp;
504 x = _mpoly_heap_pop(heap, &heap_len, N, cmpmask);
505 do
506 {
507 *store++ = x->i;
508 *store++ = x->j;
509 if (x->i != -WORD(1))
510 hind[x->i] |= WORD(1);
511
512 if (x->i == -WORD(1))
513 fmpz_add(acc_lg, acc_lg, poly2 + x->j);
514 else
515 fmpz_submul(acc_lg, poly3 + x->i, q_coeff + x->j);
516 } while ((x = x->next) != NULL);
517 } while (heap_len > 1 && mpoly_monomial_equal(heap[1].exp, exp, N));
518 }
519
520 /* process nodes taken from the heap */
521 while (store > store_base)
522 {
523 j = *--store;
524 i = *--store;
525
526 if (i == -WORD(1))
527 {
528 /* take next dividend term */
529 if (j + 1 < len2)
530 {
531 x = chain + 0;
532 x->i = i;
533 x->j = j + 1;
534 x->next = NULL;
535 mpoly_monomial_set(exp_list[exp_next], exp2 + x->j*N, N);
536 exp_next += _mpoly_heap_insert(heap, exp_list[exp_next], x,
537 &next_loc, &heap_len, N, cmpmask);
538 }
539 } else
540 {
541 /* should we go right? */
542 if ( (i + 1 < len3)
543 && (hind[i + 1] == 2*j + 1)
544 )
545 {
546 x = chain + i + 1;
547 x->i = i + 1;
548 x->j = j;
549 x->next = NULL;
550 hind[x->i] = 2*(x->j + 1) + 0;
551
552 if (bits <= FLINT_BITS)
553 mpoly_monomial_add(exp_list[exp_next], exp3 + x->i*N,
554 q_exp + x->j*N, N);
555 else
556 mpoly_monomial_add_mp(exp_list[exp_next], exp3 + x->i*N,
557 q_exp + x->j*N, N);
558
559 exp_next += _mpoly_heap_insert(heap, exp_list[exp_next], x,
560 &next_loc, &heap_len, N, cmpmask);
561 }
562 /* should we go up? */
563 if (j + 1 == q_len)
564 {
565 s++;
566 } else if ( ((hind[i] & 1) == 1)
567 && ((i == 1) || (hind[i - 1] >= 2*(j + 2) + 1))
568 )
569 {
570 x = chain + i;
571 x->i = i;
572 x->j = j + 1;
573 x->next = NULL;
574 hind[x->i] = 2*(x->j + 1) + 0;
575
576 if (bits <= FLINT_BITS)
577 mpoly_monomial_add(exp_list[exp_next], exp3 + x->i*N,
578 q_exp + x->j*N, N);
579 else
580 mpoly_monomial_add_mp(exp_list[exp_next], exp3 + x->i*N,
581 q_exp + x->j*N, N);
582
583 exp_next += _mpoly_heap_insert(heap, exp_list[exp_next], x,
584 &next_loc, &heap_len, N, cmpmask);
585 }
586 }
587 }
588
589 /* try to divide accumulated term by leading term */
590 if (small)
591 {
592 ulong d0, d1, ds = acc_sm[2];
593
594 /* d1:d0 = abs(acc_sm[1:0]) assuming ds is sign extension of acc_sm[1] */
595 sub_ddmmss(d1, d0, acc_sm[1]^ds, acc_sm[0]^ds, ds, ds);
596
597 if ((acc_sm[0] | acc_sm[1] | acc_sm[2]) == 0)
598 {
599 continue;
600 }
601 if (!lt_divides)
602 {
603 _fmpz_mpoly_fit_length(&r_coeff, &r_exp, allocr, r_len + 1, N);
604 fmpz_set_signed_uiuiui(r_coeff + r_len, acc_sm[2], acc_sm[1], acc_sm[0]);
605 mpoly_monomial_set(r_exp + r_len*N, exp, N);
606 r_len++;
607 continue;
608 }
609 if (ds == FLINT_SIGN_EXT(acc_sm[1]) && d1 < lc_abs)
610 {
611 ulong qq, rr, nhi, nlo;
612 FLINT_ASSERT(0 < lc_norm && lc_norm < FLINT_BITS);
613 nhi = (d1 << lc_norm) | (d0 >> (FLINT_BITS - lc_norm));
614 nlo = d0 << lc_norm;
615 udiv_qrnnd_preinv(qq, rr, nhi, nlo, lc_n, lc_i);
616 rr = rr >> lc_norm;
617 if (rr != 0)
618 {
619 _fmpz_mpoly_fit_length(&r_coeff, &r_exp, allocr, r_len + 1, N);
620 if (ds == 0)
621 fmpz_set_ui(r_coeff + r_len, rr);
622 else
623 fmpz_neg_ui(r_coeff + r_len, rr);
624 mpoly_monomial_set(r_exp + r_len*N, exp, N);
625 r_len++;
626 }
627
628 if (qq == 0)
629 continue;
630
631 if (qq <= COEFF_MAX)
632 {
633 _fmpz_demote(q_coeff + q_len);
634 q_coeff[q_len] = qq;
635 if (ds != lc_sign)
636 q_coeff[q_len] = -q_coeff[q_len];
637 }
638 else
639 {
640 small = 0;
641 fmpz_set_ui(q_coeff + q_len, qq);
642 if (ds != lc_sign)
643 fmpz_neg(q_coeff + q_len, q_coeff + q_len);
644 }
645 }
646 else
647 {
648 small = 0;
649 fmpz_set_signed_uiuiui(acc_lg, acc_sm[2], acc_sm[1], acc_sm[0]);
650 goto large_lt_divides;
651 }
652 }
653 else
654 {
655 if (fmpz_is_zero(acc_lg))
656 {
657 continue;
658 }
659 if (!lt_divides)
660 {
661 _fmpz_mpoly_fit_length(&r_coeff, &r_exp, allocr, r_len + 1, N);
662 fmpz_set(r_coeff + r_len, acc_lg);
663 mpoly_monomial_set(r_exp + r_len*N, exp, N);
664 r_len++;
665 continue;
666 }
667 large_lt_divides:
668 fmpz_fdiv_qr(q_coeff + q_len, r, acc_lg, poly3 + 0);
669 if (!fmpz_is_zero(r))
670 {
671 _fmpz_mpoly_fit_length(&r_coeff, &r_exp, allocr, r_len + 1, N);
672 fmpz_set(r_coeff + r_len, r);
673 mpoly_monomial_set(r_exp + r_len*N, exp, N);
674 r_len++;
675 }
676 if (fmpz_is_zero(q_coeff + q_len))
677 {
678 continue;
679 }
680 }
681
682 /* put newly generated quotient term back into the heap if neccesary */
683 if (s > 1)
684 {
685 i = 1;
686 x = chain + i;
687 x->i = i;
688 x->j = q_len;
689 x->next = NULL;
690 hind[x->i] = 2*(x->j + 1) + 0;
691
692 if (bits <= FLINT_BITS)
693 mpoly_monomial_add(exp_list[exp_next], exp3 + x->i*N,
694 q_exp + x->j*N, N);
695 else
696 mpoly_monomial_add_mp(exp_list[exp_next], exp3 + x->i*N,
697 q_exp + x->j*N, N);
698
699 exp_next += _mpoly_heap_insert(heap, exp_list[exp_next], x,
700 &next_loc, &heap_len, N, cmpmask);
701 }
702 s = 1;
703 q_len++;
704 }
705
706 cleanup2:
707
708 fmpz_clear(acc_lg);
709 fmpz_clear(r);
710
711 (*polyq) = q_coeff;
712 (*expq) = q_exp;
713 (*polyr) = r_coeff;
714 (*expr) = r_exp;
715
716 (*lenr) = r_len;
717
718 TMP_END;
719
720 return q_len;
721
722 exp_overflow2:
723 for (i = 0; i < q_len; i++)
724 _fmpz_demote(q_coeff + i);
725 for (i = 0; i < r_len; i++)
726 _fmpz_demote(r_coeff + i);
727 q_len = 0;
728 r_len = 0;
729 goto cleanup2;
730 }
731
fmpz_mpoly_divrem_monagan_pearce(fmpz_mpoly_t q,fmpz_mpoly_t r,const fmpz_mpoly_t poly2,const fmpz_mpoly_t poly3,const fmpz_mpoly_ctx_t ctx)732 void fmpz_mpoly_divrem_monagan_pearce(fmpz_mpoly_t q, fmpz_mpoly_t r,
733 const fmpz_mpoly_t poly2, const fmpz_mpoly_t poly3,
734 const fmpz_mpoly_ctx_t ctx)
735 {
736 slong exp_bits, N, lenq = 0, lenr = 0;
737 ulong * exp2 = poly2->exps, * exp3 = poly3->exps;
738 ulong * cmpmask;
739 int free2 = 0, free3 = 0;
740 fmpz_mpoly_t temp1, temp2;
741 fmpz_mpoly_struct * tq, * tr;
742 TMP_INIT;
743
744 /* check divisor is nonzero */
745 if (poly3->length == 0)
746 flint_throw(FLINT_DIVZERO, "Divide by zero in fmpz_mpoly_divrem_monagan_pearce");
747
748 /* dividend zero, write out quotient and remainder */
749 if (poly2->length == 0)
750 {
751 fmpz_mpoly_zero(q, ctx);
752 fmpz_mpoly_zero(r, ctx);
753
754 return;
755 }
756
757 TMP_START;
758
759 exp_bits = FLINT_MAX(poly2->bits, poly3->bits);
760 exp_bits = mpoly_fix_bits(exp_bits, ctx->minfo);
761
762 N = mpoly_words_per_exp(exp_bits, ctx->minfo);
763 cmpmask = (ulong*) TMP_ALLOC(N*sizeof(ulong));
764 mpoly_get_cmpmask(cmpmask, N, exp_bits, ctx->minfo);
765
766 /* ensure input exponents packed to same size as output exponents */
767 if (exp_bits > poly2->bits)
768 {
769 free2 = 1;
770 exp2 = (ulong *) flint_malloc(N*poly2->length*sizeof(ulong));
771 mpoly_repack_monomials(exp2, exp_bits, poly2->exps, poly2->bits,
772 poly2->length, ctx->minfo);
773 }
774
775 if (exp_bits > poly3->bits)
776 {
777 free3 = 1;
778 exp3 = (ulong *) flint_malloc(N*poly3->length*sizeof(ulong));
779 mpoly_repack_monomials(exp3, exp_bits, poly3->exps, poly3->bits,
780 poly3->length, ctx->minfo);
781 }
782
783 /* check divisor leading monomial is at most that of the dividend */
784 if (mpoly_monomial_lt(exp2, exp3, N, cmpmask))
785 {
786 fmpz_mpoly_set(r, poly2, ctx);
787 fmpz_mpoly_zero(q, ctx);
788 goto cleanup3;
789 }
790
791 /* take care of aliasing */
792 if (q == poly2 || q == poly3)
793 {
794 fmpz_mpoly_init2(temp1, poly2->length/poly3->length + 1, ctx);
795 fmpz_mpoly_fit_bits(temp1, exp_bits, ctx);
796 temp1->bits = exp_bits;
797 tq = temp1;
798 } else
799 {
800 fmpz_mpoly_fit_length(q, poly2->length/poly3->length + 1, ctx);
801 fmpz_mpoly_fit_bits(q, exp_bits, ctx);
802 q->bits = exp_bits;
803 tq = q;
804 }
805
806 if (r == poly2 || r == poly3)
807 {
808 fmpz_mpoly_init2(temp2, poly3->length, ctx);
809 fmpz_mpoly_fit_bits(temp2, exp_bits, ctx);
810 temp2->bits = exp_bits;
811 tr = temp2;
812 } else
813 {
814 fmpz_mpoly_fit_length(r, poly3->length, ctx);
815 fmpz_mpoly_fit_bits(r, exp_bits, ctx);
816 r->bits = exp_bits;
817 tr = r;
818 }
819
820 /* do division with remainder */
821 while ((lenq = _fmpz_mpoly_divrem_monagan_pearce(&lenr, &tq->coeffs, &tq->exps,
822 &tq->alloc, &tr->coeffs, &tr->exps, &tr->alloc, poly2->coeffs, exp2,
823 poly2->length, poly3->coeffs, exp3, poly3->length, exp_bits,
824 N, cmpmask)) == 0
825 && lenr == 0)
826 {
827 ulong * old_exp2 = exp2, * old_exp3 = exp3;
828 slong old_exp_bits = exp_bits;
829
830 exp_bits = mpoly_fix_bits(exp_bits + 1, ctx->minfo);
831
832 N = mpoly_words_per_exp(exp_bits, ctx->minfo);
833 cmpmask = (ulong*) TMP_ALLOC(N*sizeof(ulong));
834 mpoly_get_cmpmask(cmpmask, N, exp_bits, ctx->minfo);
835
836 exp2 = (ulong *) flint_malloc(N*poly2->length*sizeof(ulong));
837 mpoly_repack_monomials(exp2, exp_bits, old_exp2, old_exp_bits,
838 poly2->length, ctx->minfo);
839
840 exp3 = (ulong *) flint_malloc(N*poly3->length*sizeof(ulong));
841 mpoly_repack_monomials(exp3, exp_bits, old_exp3, old_exp_bits,
842 poly3->length, ctx->minfo);
843
844 if (free2)
845 flint_free(old_exp2);
846
847 if (free3)
848 flint_free(old_exp3);
849
850 free2 = free3 = 1;
851
852 fmpz_mpoly_fit_bits(tq, exp_bits, ctx);
853 tq->bits = exp_bits;
854
855 fmpz_mpoly_fit_bits(tr, exp_bits, ctx);
856 tr->bits = exp_bits;
857 }
858
859 /* deal with aliasing */
860 if (q == poly2 || q == poly3)
861 {
862 fmpz_mpoly_swap(temp1, q, ctx);
863
864 fmpz_mpoly_clear(temp1, ctx);
865 }
866
867 if (r == poly2 || r == poly3)
868 {
869 fmpz_mpoly_swap(temp2, r, ctx);
870
871 fmpz_mpoly_clear(temp2, ctx);
872 }
873
874 _fmpz_mpoly_set_length(q, lenq, ctx);
875 _fmpz_mpoly_set_length(r, lenr, ctx);
876
877 cleanup3:
878
879 if (free2)
880 flint_free(exp2);
881
882 if (free3)
883 flint_free(exp3);
884
885 TMP_END;
886 }
887