1 /*
2 Copyright (C) 2017 Daniel Schultz
3
4 This file is part of FLINT.
5
6 FLINT is free software: you can redistribute it and/or modify it under
7 the terms of the GNU Lesser General Public License (LGPL) as published
8 by the Free Software Foundation; either version 2.1 of the License, or
9 (at your option) any later version. See <http://www.gnu.org/licenses/>.
10 */
11
12 #include <gmp.h>
13 #include <stdlib.h>
14 #include "flint.h"
15 #include "fmpz.h"
16 #include "fmpz_mpoly.h"
17
18 /*
19 Set poly1 to poly2*poly3 using Johnson's heap method. The function
20 realocates its output and returns the length of the product. This
21 version of the function assumes the exponent vectors all fit in a
22 single word. Assumes input polys are nonzero.
23 */
_fmpz_mpoly_mul_johnson1(fmpz ** poly1,ulong ** exp1,slong * alloc,const fmpz * poly2,const ulong * exp2,slong len2,const fmpz * poly3,const ulong * exp3,slong len3,ulong maskhi)24 slong _fmpz_mpoly_mul_johnson1(fmpz ** poly1, ulong ** exp1, slong * alloc,
25 const fmpz * poly2, const ulong * exp2, slong len2,
26 const fmpz * poly3, const ulong * exp3, slong len3, ulong maskhi)
27 {
28 slong i, j, k;
29 slong next_loc;
30 slong Q_len = 0, heap_len = 2; /* heap zero index unused */
31 mpoly_heap1_s * heap;
32 mpoly_heap_t * chain;
33 slong * Q;
34 mpoly_heap_t * x;
35 fmpz * p1 = *poly1;
36 ulong * e1 = *exp1;
37 slong * hind;
38 ulong exp, cy;
39 ulong c[3], p[2]; /* for accumulating coefficients */
40 int first, small;
41 TMP_INIT;
42
43 TMP_START;
44
45 /* whether input coeffs are small, thus output coeffs fit in three words */
46 small = _fmpz_mpoly_fits_small(poly2, len2) &&
47 _fmpz_mpoly_fits_small(poly3, len3);
48
49 next_loc = len2 + 4; /* something bigger than heap can ever be */
50 heap = (mpoly_heap1_s *) TMP_ALLOC((len2 + 1)*sizeof(mpoly_heap1_s));
51 /* alloc array of heap nodes which can be chained together */
52 chain = (mpoly_heap_t *) TMP_ALLOC(len2*sizeof(mpoly_heap_t));
53 /* space for temporary storage of pointers to heap nodes */
54 Q = (slong *) TMP_ALLOC(2*len2*sizeof(slong));
55
56 /* space for heap indices */
57 hind = (slong *) TMP_ALLOC(len2*sizeof(slong));
58 for (i = 0; i < len2; i++)
59 hind[i] = 1;
60
61 /* put (0, 0, exp2[0] + exp3[0]) on heap */
62 x = chain + 0;
63 x->i = 0;
64 x->j = 0;
65 x->next = NULL;
66
67 HEAP_ASSIGN(heap[1], exp2[0] + exp3[0], x);
68 hind[0] = 2*1 + 0;
69
70 /* output poly index starts at -1, will be immediately updated to 0 */
71 k = -WORD(1);
72
73 /* while heap is nonempty */
74 while (heap_len > 1)
75 {
76 /* get exponent field of heap top */
77 exp = heap[1].exp;
78
79 /* realloc output poly ready for next product term */
80 k++;
81 _fmpz_mpoly_fit_length(&p1, &e1, alloc, k + 1, 1);
82
83 /* whether we are on first coeff product for this output exponent */
84 first = 1;
85
86 /* set temporary coeff to zero */
87 c[0] = c[1] = c[2] = 0;
88
89 /* while heap nonempty and contains chain with current output exponent */
90 while (heap_len > 1 && heap[1].exp == exp)
91 {
92 /* pop chain from heap */
93 x = _mpoly_heap_pop1(heap, &heap_len, maskhi);
94
95 /* take node out of heap and put into store */
96 hind[x->i] |= WORD(1);
97 Q[Q_len++] = x->i;
98 Q[Q_len++] = x->j;
99
100 /* if output coeffs will fit in three words */
101 if (small)
102 {
103 /* compute product of input poly coeffs */
104 if (first)
105 {
106 smul_ppmm(c[1], c[0], poly2[x->i], poly3[x->j]);
107 c[2] = -(c[1] >> (FLINT_BITS - 1));
108
109 /* set output monomial */
110 e1[k] = exp;
111 first = 0;
112 } else /* addmul product of input poly coeffs */
113 {
114 smul_ppmm(p[1], p[0], poly2[x->i], poly3[x->j]);
115 add_sssaaaaaa(cy, c[1], c[0], 0, c[1], c[0], 0, p[1], p[0]);
116 c[2] += (0 <= (slong) p[1]) ? cy : cy - 1;
117 }
118
119 /* for every node in this chain */
120 while ((x = x->next) != NULL)
121 {
122 /* addmul product of input poly coeffs */
123 smul_ppmm(p[1], p[0], poly2[x->i], poly3[x->j]);
124 add_sssaaaaaa(cy, c[1], c[0], 0, c[1], c[0], 0, p[1], p[0]);
125 c[2] += (0 <= (slong) p[1]) ? cy : cy - 1;
126
127 /* take node out of heap and put into store */
128 hind[x->i] |= WORD(1);
129 Q[Q_len++] = x->i;
130 Q[Q_len++] = x->j;
131 }
132 } else /* output coeffs require multiprecision */
133 {
134 if (first) /* compute product of input poly coeffs */
135 {
136 fmpz_mul(p1 + k, poly2 + x->i, poly3 + x->j);
137
138 e1[k] = exp;
139 first = 0;
140 } else
141 { /* addmul product of input poly coeffs */
142 fmpz_addmul(p1 + k, poly2 + x->i, poly3 + x->j);
143 }
144
145 /* for each node in this chain */
146 while ((x = x->next) != NULL)
147 {
148 /* addmul product of input poly coeffs */
149 fmpz_addmul(p1 + k, poly2 + x->i, poly3 + x->j);
150
151 /* take node out of heap and put into store */
152 hind[x->i] |= WORD(1);
153 Q[Q_len++] = x->i;
154 Q[Q_len++] = x->j;
155 }
156 }
157 }
158
159 /* for each node temporarily stored */
160 while (Q_len > 0)
161 {
162 /* take node from store */
163 j = Q[--Q_len];
164 i = Q[--Q_len];
165
166 /* should we go right? */
167 if ( (i + 1 < len2)
168 && (hind[i + 1] == 2*j + 1)
169 )
170 {
171 x = chain + i + 1;
172 x->i = i + 1;
173 x->j = j;
174 x->next = NULL;
175
176 hind[x->i] = 2*(x->j+1) + 0;
177 _mpoly_heap_insert1(heap, exp2[x->i] + exp3[x->j], x,
178 &next_loc, &heap_len, maskhi);
179 }
180
181 /* should we go up? */
182 if ( (j + 1 < len3)
183 && ((hind[i] & 1) == 1)
184 && ( (i == 0)
185 || (hind[i - 1] > 2*(j + 2) + 1)
186 || (hind[i - 1] == 2*(j + 2) + 1) /* gcc should fuse */
187 )
188 )
189 {
190 x = chain + i;
191 x->i = i;
192 x->j = j + 1;
193 x->next = NULL;
194
195 hind[x->i] = 2*(x->j+1) + 0;
196 _mpoly_heap_insert1(heap, exp2[x->i] + exp3[x->j], x,
197 &next_loc, &heap_len, maskhi);
198 }
199 }
200
201 /* set output poly coeff from temporary accumulation, if not multiprec */
202 if (small)
203 fmpz_set_signed_uiuiui(p1 + k, c[2], c[1], c[0]);
204
205 if (fmpz_is_zero(p1 + k))
206 k--;
207 }
208
209 k++;
210
211 (*poly1) = p1;
212 (*exp1) = e1;
213
214 TMP_END;
215
216 return k;
217 }
218
219 /*
220 Set poly1 to poly2*poly3 using Johnson's heap method. The function
221 realocates its output and returns the length of the product. This
222 version of the function assumes the exponent vectors take N words.
223 */
_fmpz_mpoly_mul_johnson(fmpz ** poly1,ulong ** exp1,slong * alloc,const fmpz * poly2,const ulong * exp2,slong len2,const fmpz * poly3,const ulong * exp3,slong len3,flint_bitcnt_t bits,slong N,const ulong * cmpmask)224 slong _fmpz_mpoly_mul_johnson(fmpz ** poly1, ulong ** exp1, slong * alloc,
225 const fmpz * poly2, const ulong * exp2, slong len2,
226 const fmpz * poly3, const ulong * exp3, slong len3,
227 flint_bitcnt_t bits, slong N, const ulong * cmpmask)
228 {
229 slong i, j, k;
230 slong next_loc;
231 slong Q_len = 0, heap_len = 2; /* heap zero index unused */
232 mpoly_heap_s * heap;
233 mpoly_heap_t * chain;
234 slong * Q;
235 mpoly_heap_t * x;
236 fmpz * p1 = *poly1;
237 ulong * e1 = *exp1;
238 ulong cy;
239 ulong c[3], p[2]; /* for accumulating coefficients */
240 ulong * exp, * exps;
241 ulong ** exp_list;
242 slong exp_next;
243 slong * hind;
244 int first, small;
245 TMP_INIT;
246
247 /* if exponent vectors fit in single word, call special version */
248 if (N == 1)
249 return _fmpz_mpoly_mul_johnson1(poly1, exp1, alloc,
250 poly2, exp2, len2, poly3, exp3, len3, cmpmask[0]);
251
252 TMP_START;
253
254 /* whether input coeffs are small, thus output coeffs fit in three words */
255 small = _fmpz_mpoly_fits_small(poly2, len2) &&
256 _fmpz_mpoly_fits_small(poly3, len3);
257
258 next_loc = len2 + 4; /* something bigger than heap can ever be */
259 heap = (mpoly_heap_s *) TMP_ALLOC((len2 + 1)*sizeof(mpoly_heap_s));
260 /* alloc array of heap nodes which can be chained together */
261 chain = (mpoly_heap_t *) TMP_ALLOC(len2*sizeof(mpoly_heap_t));
262 /* space for temporary storage of pointers to heap nodes */
263 Q = (slong *) TMP_ALLOC(2*len2*sizeof(slong));
264 /* allocate space for exponent vectors of N words */
265 exps = (ulong *) TMP_ALLOC(len2*N*sizeof(ulong));
266 /* list of pointers to allocated exponent vectors */
267 exp_list = (ulong **) TMP_ALLOC(len2*sizeof(ulong *));
268 for (i = 0; i < len2; i++)
269 exp_list[i] = exps + i*N;
270
271 /* space for heap indices */
272 hind = (slong *) TMP_ALLOC(len2*sizeof(slong));
273 for (i = 0; i < len2; i++)
274 hind[i] = 1;
275
276 /* start with no heap nodes and no exponent vectors in use */
277 exp_next = 0;
278
279 /* put (0, 0, exp2[0] + exp3[0]) on heap */
280 x = chain + 0;
281 x->i = 0;
282 x->j = 0;
283 x->next = NULL;
284
285 heap[1].next = x;
286 heap[1].exp = exp_list[exp_next++];
287
288 if (bits <= FLINT_BITS)
289 mpoly_monomial_add(heap[1].exp, exp2, exp3, N);
290 else
291 mpoly_monomial_add_mp(heap[1].exp, exp2, exp3, N);
292
293 hind[0] = 2*1 + 0;
294
295 /* output poly index starts at -1, will be immediately updated to 0 */
296 k = -WORD(1);
297
298 /* while heap is nonempty */
299 while (heap_len > 1)
300 {
301 /* get pointer to exponent field of heap top */
302 exp = heap[1].exp;
303
304 /* realloc output poly ready for next product term */
305 k++;
306 _fmpz_mpoly_fit_length(&p1, &e1, alloc, k + 1, N);
307
308 /* whether we are on first coeff product for this output exponent */
309 first = 1;
310
311 /* set temporary coeff to zero */
312 c[0] = c[1] = c[2] = 0;
313
314 /* while heap nonempty and contains chain with current output exponent */
315 while (heap_len > 1 && mpoly_monomial_equal(heap[1].exp, exp, N))
316 {
317 /* pop chain from heap and set exponent field to be reused */
318 exp_list[--exp_next] = heap[1].exp;
319
320 x = _mpoly_heap_pop(heap, &heap_len, N, cmpmask);
321
322 /* take node out of heap and put into store */
323 hind[x->i] |= WORD(1);
324 Q[Q_len++] = x->i;
325 Q[Q_len++] = x->j;
326
327 /* if output coeffs will fit in three words */
328 if (small)
329 {
330 /* compute product of input poly coeffs */
331 if (first)
332 {
333 smul_ppmm(c[1], c[0], poly2[x->i], poly3[x->j]);
334 c[2] = -(c[1] >> (FLINT_BITS - 1));
335
336 /* set output monomial */
337 mpoly_monomial_set(e1 + k*N, exp, N);
338
339 first = 0;
340 } else /* addmul product of input poly coeffs */
341 {
342 smul_ppmm(p[1], p[0], poly2[x->i], poly3[x->j]);
343 add_sssaaaaaa(cy, c[1], c[0], 0, c[1], c[0], 0, p[1], p[0]);
344 c[2] += (0 <= (slong) p[1]) ? cy : cy - 1;
345 }
346
347 /* for every node in this chain */
348 while ((x = x->next) != NULL)
349 {
350 /* addmul product of input poly coeffs */
351 smul_ppmm(p[1], p[0], poly2[x->i], poly3[x->j]);
352 add_sssaaaaaa(cy, c[1], c[0], 0, c[1], c[0], 0, p[1], p[0]);
353 c[2] += (0 <= (slong) p[1]) ? cy : cy - 1;
354
355 /* take node out of heap and put into store */
356 hind[x->i] |= WORD(1);
357 Q[Q_len++] = x->i;
358 Q[Q_len++] = x->j;
359 }
360 } else /* output coeffs require multiprecision */
361 {
362 if (first) /* compute product of input poly coeffs */
363 {
364 fmpz_mul(p1 + k, poly2 + x->i, poly3 + x->j);
365
366 /* set output monomial */
367 mpoly_monomial_set(e1 + k*N, exp, N);
368
369 first = 0;
370 } else
371 { /* addmul product of input poly coeffs */
372 fmpz_addmul(p1 + k, poly2 + x->i, poly3 + x->j);
373 }
374
375 /* for each node in this chain */
376 while ((x = x->next) != NULL)
377 {
378 /* addmul product of input poly coeffs */
379 fmpz_addmul(p1 + k, poly2 + x->i, poly3 + x->j);
380
381 /* take node out of heap and put into store */
382 hind[x->i] |= WORD(1);
383 Q[Q_len++] = x->i;
384 Q[Q_len++] = x->j;
385 }
386 }
387 }
388
389 /* for each node temporarily stored */
390 while (Q_len > 0)
391 {
392 /* take node from store */
393 j = Q[--Q_len];
394 i = Q[--Q_len];
395
396 /* should we go right? */
397 if ( (i + 1 < len2)
398 && (hind[i + 1] == 2*j + 1)
399 )
400 {
401 x = chain + i + 1;
402 x->i = i + 1;
403 x->j = j;
404 x->next = NULL;
405
406 hind[x->i] = 2*(x->j+1) + 0;
407
408 if (bits <= FLINT_BITS)
409 mpoly_monomial_add(exp_list[exp_next], exp2 + x->i*N, exp3 + x->j*N, N);
410 else
411 mpoly_monomial_add_mp(exp_list[exp_next], exp2 + x->i*N, exp3 + x->j*N, N);
412
413 if (!_mpoly_heap_insert(heap, exp_list[exp_next++], x,
414 &next_loc, &heap_len, N, cmpmask))
415 exp_next--;
416 }
417
418 /* should we go up? */
419 if ( (j + 1 < len3)
420 && ((hind[i] & 1) == 1)
421 && ( (i == 0)
422 || (hind[i - 1] > 2*(j + 2) + 1)
423 || (hind[i - 1] == 2*(j + 2) + 1) /* gcc should fuse */
424 )
425 )
426 {
427 x = chain + i;
428 x->i = i;
429 x->j = j + 1;
430 x->next = NULL;
431
432 hind[x->i] = 2*(x->j+1) + 0;
433
434 if (bits <= FLINT_BITS)
435 mpoly_monomial_add(exp_list[exp_next], exp2 + x->i*N, exp3 + x->j*N, N);
436 else
437 mpoly_monomial_add_mp(exp_list[exp_next], exp2 + x->i*N, exp3 + x->j*N, N);
438
439 if (!_mpoly_heap_insert(heap, exp_list[exp_next++], x,
440 &next_loc, &heap_len, N, cmpmask))
441 exp_next--;
442 }
443 }
444
445 /* set output poly coeff from temporary accumulation, if not multiprec */
446 if (small)
447 fmpz_set_signed_uiuiui(p1 + k, c[2], c[1], c[0]);
448
449 if (fmpz_is_zero(p1 + k))
450 k--;
451 }
452
453 k++;
454
455 (*poly1) = p1;
456 (*exp1) = e1;
457
458 TMP_END;
459
460 return k;
461 }
462
463 /* maxBfields gets clobbered */
_fmpz_mpoly_mul_johnson_maxfields(fmpz_mpoly_t A,const fmpz_mpoly_t B,fmpz * maxBfields,const fmpz_mpoly_t C,fmpz * maxCfields,const fmpz_mpoly_ctx_t ctx)464 void _fmpz_mpoly_mul_johnson_maxfields(
465 fmpz_mpoly_t A,
466 const fmpz_mpoly_t B, fmpz * maxBfields,
467 const fmpz_mpoly_t C, fmpz * maxCfields,
468 const fmpz_mpoly_ctx_t ctx)
469 {
470 slong N, Alen;
471 flint_bitcnt_t Abits;
472 ulong * cmpmask;
473 ulong * Bexp, * Cexp;
474 int freeBexp, freeCexp;
475 TMP_INIT;
476
477 TMP_START;
478
479 _fmpz_vec_add(maxBfields, maxBfields, maxCfields, ctx->minfo->nfields);
480
481 Abits = _fmpz_vec_max_bits(maxBfields, ctx->minfo->nfields);
482 Abits = FLINT_MAX(MPOLY_MIN_BITS, Abits + 1);
483 Abits = FLINT_MAX(Abits, B->bits);
484 Abits = FLINT_MAX(Abits, C->bits);
485 Abits = mpoly_fix_bits(Abits, ctx->minfo);
486
487 N = mpoly_words_per_exp(Abits, ctx->minfo);
488 cmpmask = (ulong *) TMP_ALLOC(N*sizeof(ulong));
489 mpoly_get_cmpmask(cmpmask, N, Abits, ctx->minfo);
490
491 /* ensure input exponents are packed into same sized fields as output */
492 freeBexp = 0;
493 Bexp = B->exps;
494 if (Abits > B->bits)
495 {
496 freeBexp = 1;
497 Bexp = (ulong *) flint_malloc(N*B->length*sizeof(ulong));
498 mpoly_repack_monomials(Bexp, Abits, B->exps, B->bits,
499 B->length, ctx->minfo);
500 }
501
502 freeCexp = 0;
503 Cexp = C->exps;
504 if (Abits > C->bits)
505 {
506 freeCexp = 1;
507 Cexp = (ulong *) flint_malloc(N*C->length*sizeof(ulong));
508 mpoly_repack_monomials(Cexp, Abits, C->exps, C->bits,
509 C->length, ctx->minfo);
510 }
511
512 /* deal with aliasing and do multiplication */
513 if (A == B || A == C)
514 {
515 fmpz_mpoly_t T;
516 fmpz_mpoly_init2(T, B->length + C->length - 1, ctx);
517 fmpz_mpoly_fit_bits(T, Abits, ctx);
518 T->bits = Abits;
519
520 /* algorithm more efficient if smaller poly first */
521 if (B->length > C->length)
522 {
523 Alen = _fmpz_mpoly_mul_johnson(&T->coeffs, &T->exps, &T->alloc,
524 C->coeffs, Cexp, C->length,
525 B->coeffs, Bexp, B->length,
526 Abits, N, cmpmask);
527 }
528 else
529 {
530 Alen = _fmpz_mpoly_mul_johnson(&T->coeffs, &T->exps, &T->alloc,
531 B->coeffs, Bexp, B->length,
532 C->coeffs, Cexp, C->length,
533 Abits, N, cmpmask);
534 }
535
536 fmpz_mpoly_swap(T, A, ctx);
537 fmpz_mpoly_clear(T, ctx);
538 }
539 else
540 {
541 fmpz_mpoly_fit_length(A, B->length + C->length - 1, ctx);
542 fmpz_mpoly_fit_bits(A, Abits, ctx);
543 A->bits = Abits;
544
545 /* algorithm more efficient if smaller poly first */
546 if (B->length > C->length)
547 {
548 Alen = _fmpz_mpoly_mul_johnson(&A->coeffs, &A->exps, &A->alloc,
549 C->coeffs, Cexp, C->length,
550 B->coeffs, Bexp, B->length,
551 Abits, N, cmpmask);
552 }
553 else
554 {
555 Alen = _fmpz_mpoly_mul_johnson(&A->coeffs, &A->exps, &A->alloc,
556 B->coeffs, Bexp, B->length,
557 C->coeffs, Cexp, C->length,
558 Abits, N, cmpmask);
559 }
560 }
561
562 if (freeBexp)
563 flint_free(Bexp);
564
565 if (freeCexp)
566 flint_free(Cexp);
567
568 _fmpz_mpoly_set_length(A, Alen, ctx);
569
570 TMP_END;
571 }
572
573
574
fmpz_mpoly_mul_johnson(fmpz_mpoly_t A,const fmpz_mpoly_t B,const fmpz_mpoly_t C,const fmpz_mpoly_ctx_t ctx)575 void fmpz_mpoly_mul_johnson(
576 fmpz_mpoly_t A,
577 const fmpz_mpoly_t B,
578 const fmpz_mpoly_t C,
579 const fmpz_mpoly_ctx_t ctx)
580 {
581 slong i;
582 fmpz * maxBfields, * maxCfields;
583 TMP_INIT;
584
585 if (B->length == 0 || C->length == 0)
586 {
587 fmpz_mpoly_zero(A, ctx);
588 return;
589 }
590
591 TMP_START;
592
593 maxBfields = (fmpz *) TMP_ALLOC(ctx->minfo->nfields*sizeof(fmpz));
594 maxCfields = (fmpz *) TMP_ALLOC(ctx->minfo->nfields*sizeof(fmpz));
595 for (i = 0; i < ctx->minfo->nfields; i++)
596 {
597 fmpz_init(maxBfields + i);
598 fmpz_init(maxCfields + i);
599 }
600 mpoly_max_fields_fmpz(maxBfields, B->exps, B->length, B->bits, ctx->minfo);
601 mpoly_max_fields_fmpz(maxCfields, C->exps, C->length, C->bits, ctx->minfo);
602
603 _fmpz_mpoly_mul_johnson_maxfields(A, B, maxBfields, C, maxCfields, ctx);
604
605 for (i = 0; i < ctx->minfo->nfields; i++)
606 {
607 fmpz_clear(maxBfields + i);
608 fmpz_clear(maxCfields + i);
609 }
610
611 TMP_END;
612 }
613