/* Copyright (C) 2017 Daniel Schultz This file is part of FLINT. FLINT is free software: you can redistribute it and/or modify it under the terms of the GNU Lesser General Public License (LGPL) as published by the Free Software Foundation; either version 2.1 of the License, or (at your option) any later version. See . */ #include #include #include "flint.h" #include "fmpz.h" #include "fmpz_mpoly.h" /* Set poly1 to poly2*poly3 using Johnson's heap method. The function realocates its output and returns the length of the product. This version of the function assumes the exponent vectors all fit in a single word. Assumes input polys are nonzero. */ slong _fmpz_mpoly_mul_johnson1(fmpz ** poly1, ulong ** exp1, slong * alloc, const fmpz * poly2, const ulong * exp2, slong len2, const fmpz * poly3, const ulong * exp3, slong len3, ulong maskhi) { slong i, j, k; slong next_loc; slong Q_len = 0, heap_len = 2; /* heap zero index unused */ mpoly_heap1_s * heap; mpoly_heap_t * chain; slong * Q; mpoly_heap_t * x; fmpz * p1 = *poly1; ulong * e1 = *exp1; slong * hind; ulong exp, cy; ulong c[3], p[2]; /* for accumulating coefficients */ int first, small; TMP_INIT; TMP_START; /* whether input coeffs are small, thus output coeffs fit in three words */ small = _fmpz_mpoly_fits_small(poly2, len2) && _fmpz_mpoly_fits_small(poly3, len3); next_loc = len2 + 4; /* something bigger than heap can ever be */ heap = (mpoly_heap1_s *) TMP_ALLOC((len2 + 1)*sizeof(mpoly_heap1_s)); /* alloc array of heap nodes which can be chained together */ chain = (mpoly_heap_t *) TMP_ALLOC(len2*sizeof(mpoly_heap_t)); /* space for temporary storage of pointers to heap nodes */ Q = (slong *) TMP_ALLOC(2*len2*sizeof(slong)); /* space for heap indices */ hind = (slong *) TMP_ALLOC(len2*sizeof(slong)); for (i = 0; i < len2; i++) hind[i] = 1; /* put (0, 0, exp2[0] + exp3[0]) on heap */ x = chain + 0; x->i = 0; x->j = 0; x->next = NULL; HEAP_ASSIGN(heap[1], exp2[0] + exp3[0], x); hind[0] = 2*1 + 0; /* output poly index starts at -1, will be immediately updated to 0 */ k = -WORD(1); /* while heap is nonempty */ while (heap_len > 1) { /* get exponent field of heap top */ exp = heap[1].exp; /* realloc output poly ready for next product term */ k++; _fmpz_mpoly_fit_length(&p1, &e1, alloc, k + 1, 1); /* whether we are on first coeff product for this output exponent */ first = 1; /* set temporary coeff to zero */ c[0] = c[1] = c[2] = 0; /* while heap nonempty and contains chain with current output exponent */ while (heap_len > 1 && heap[1].exp == exp) { /* pop chain from heap */ x = _mpoly_heap_pop1(heap, &heap_len, maskhi); /* take node out of heap and put into store */ hind[x->i] |= WORD(1); Q[Q_len++] = x->i; Q[Q_len++] = x->j; /* if output coeffs will fit in three words */ if (small) { /* compute product of input poly coeffs */ if (first) { smul_ppmm(c[1], c[0], poly2[x->i], poly3[x->j]); c[2] = -(c[1] >> (FLINT_BITS - 1)); /* set output monomial */ e1[k] = exp; first = 0; } else /* addmul product of input poly coeffs */ { smul_ppmm(p[1], p[0], poly2[x->i], poly3[x->j]); add_sssaaaaaa(cy, c[1], c[0], 0, c[1], c[0], 0, p[1], p[0]); c[2] += (0 <= (slong) p[1]) ? cy : cy - 1; } /* for every node in this chain */ while ((x = x->next) != NULL) { /* addmul product of input poly coeffs */ smul_ppmm(p[1], p[0], poly2[x->i], poly3[x->j]); add_sssaaaaaa(cy, c[1], c[0], 0, c[1], c[0], 0, p[1], p[0]); c[2] += (0 <= (slong) p[1]) ? cy : cy - 1; /* take node out of heap and put into store */ hind[x->i] |= WORD(1); Q[Q_len++] = x->i; Q[Q_len++] = x->j; } } else /* output coeffs require multiprecision */ { if (first) /* compute product of input poly coeffs */ { fmpz_mul(p1 + k, poly2 + x->i, poly3 + x->j); e1[k] = exp; first = 0; } else { /* addmul product of input poly coeffs */ fmpz_addmul(p1 + k, poly2 + x->i, poly3 + x->j); } /* for each node in this chain */ while ((x = x->next) != NULL) { /* addmul product of input poly coeffs */ fmpz_addmul(p1 + k, poly2 + x->i, poly3 + x->j); /* take node out of heap and put into store */ hind[x->i] |= WORD(1); Q[Q_len++] = x->i; Q[Q_len++] = x->j; } } } /* for each node temporarily stored */ while (Q_len > 0) { /* take node from store */ j = Q[--Q_len]; i = Q[--Q_len]; /* should we go right? */ if ( (i + 1 < len2) && (hind[i + 1] == 2*j + 1) ) { x = chain + i + 1; x->i = i + 1; x->j = j; x->next = NULL; hind[x->i] = 2*(x->j+1) + 0; _mpoly_heap_insert1(heap, exp2[x->i] + exp3[x->j], x, &next_loc, &heap_len, maskhi); } /* should we go up? */ if ( (j + 1 < len3) && ((hind[i] & 1) == 1) && ( (i == 0) || (hind[i - 1] > 2*(j + 2) + 1) || (hind[i - 1] == 2*(j + 2) + 1) /* gcc should fuse */ ) ) { x = chain + i; x->i = i; x->j = j + 1; x->next = NULL; hind[x->i] = 2*(x->j+1) + 0; _mpoly_heap_insert1(heap, exp2[x->i] + exp3[x->j], x, &next_loc, &heap_len, maskhi); } } /* set output poly coeff from temporary accumulation, if not multiprec */ if (small) fmpz_set_signed_uiuiui(p1 + k, c[2], c[1], c[0]); if (fmpz_is_zero(p1 + k)) k--; } k++; (*poly1) = p1; (*exp1) = e1; TMP_END; return k; } /* Set poly1 to poly2*poly3 using Johnson's heap method. The function realocates its output and returns the length of the product. This version of the function assumes the exponent vectors take N words. */ slong _fmpz_mpoly_mul_johnson(fmpz ** poly1, ulong ** exp1, slong * alloc, const fmpz * poly2, const ulong * exp2, slong len2, const fmpz * poly3, const ulong * exp3, slong len3, flint_bitcnt_t bits, slong N, const ulong * cmpmask) { slong i, j, k; slong next_loc; slong Q_len = 0, heap_len = 2; /* heap zero index unused */ mpoly_heap_s * heap; mpoly_heap_t * chain; slong * Q; mpoly_heap_t * x; fmpz * p1 = *poly1; ulong * e1 = *exp1; ulong cy; ulong c[3], p[2]; /* for accumulating coefficients */ ulong * exp, * exps; ulong ** exp_list; slong exp_next; slong * hind; int first, small; TMP_INIT; /* if exponent vectors fit in single word, call special version */ if (N == 1) return _fmpz_mpoly_mul_johnson1(poly1, exp1, alloc, poly2, exp2, len2, poly3, exp3, len3, cmpmask[0]); TMP_START; /* whether input coeffs are small, thus output coeffs fit in three words */ small = _fmpz_mpoly_fits_small(poly2, len2) && _fmpz_mpoly_fits_small(poly3, len3); next_loc = len2 + 4; /* something bigger than heap can ever be */ heap = (mpoly_heap_s *) TMP_ALLOC((len2 + 1)*sizeof(mpoly_heap_s)); /* alloc array of heap nodes which can be chained together */ chain = (mpoly_heap_t *) TMP_ALLOC(len2*sizeof(mpoly_heap_t)); /* space for temporary storage of pointers to heap nodes */ Q = (slong *) TMP_ALLOC(2*len2*sizeof(slong)); /* allocate space for exponent vectors of N words */ exps = (ulong *) TMP_ALLOC(len2*N*sizeof(ulong)); /* list of pointers to allocated exponent vectors */ exp_list = (ulong **) TMP_ALLOC(len2*sizeof(ulong *)); for (i = 0; i < len2; i++) exp_list[i] = exps + i*N; /* space for heap indices */ hind = (slong *) TMP_ALLOC(len2*sizeof(slong)); for (i = 0; i < len2; i++) hind[i] = 1; /* start with no heap nodes and no exponent vectors in use */ exp_next = 0; /* put (0, 0, exp2[0] + exp3[0]) on heap */ x = chain + 0; x->i = 0; x->j = 0; x->next = NULL; heap[1].next = x; heap[1].exp = exp_list[exp_next++]; if (bits <= FLINT_BITS) mpoly_monomial_add(heap[1].exp, exp2, exp3, N); else mpoly_monomial_add_mp(heap[1].exp, exp2, exp3, N); hind[0] = 2*1 + 0; /* output poly index starts at -1, will be immediately updated to 0 */ k = -WORD(1); /* while heap is nonempty */ while (heap_len > 1) { /* get pointer to exponent field of heap top */ exp = heap[1].exp; /* realloc output poly ready for next product term */ k++; _fmpz_mpoly_fit_length(&p1, &e1, alloc, k + 1, N); /* whether we are on first coeff product for this output exponent */ first = 1; /* set temporary coeff to zero */ c[0] = c[1] = c[2] = 0; /* while heap nonempty and contains chain with current output exponent */ while (heap_len > 1 && mpoly_monomial_equal(heap[1].exp, exp, N)) { /* pop chain from heap and set exponent field to be reused */ exp_list[--exp_next] = heap[1].exp; x = _mpoly_heap_pop(heap, &heap_len, N, cmpmask); /* take node out of heap and put into store */ hind[x->i] |= WORD(1); Q[Q_len++] = x->i; Q[Q_len++] = x->j; /* if output coeffs will fit in three words */ if (small) { /* compute product of input poly coeffs */ if (first) { smul_ppmm(c[1], c[0], poly2[x->i], poly3[x->j]); c[2] = -(c[1] >> (FLINT_BITS - 1)); /* set output monomial */ mpoly_monomial_set(e1 + k*N, exp, N); first = 0; } else /* addmul product of input poly coeffs */ { smul_ppmm(p[1], p[0], poly2[x->i], poly3[x->j]); add_sssaaaaaa(cy, c[1], c[0], 0, c[1], c[0], 0, p[1], p[0]); c[2] += (0 <= (slong) p[1]) ? cy : cy - 1; } /* for every node in this chain */ while ((x = x->next) != NULL) { /* addmul product of input poly coeffs */ smul_ppmm(p[1], p[0], poly2[x->i], poly3[x->j]); add_sssaaaaaa(cy, c[1], c[0], 0, c[1], c[0], 0, p[1], p[0]); c[2] += (0 <= (slong) p[1]) ? cy : cy - 1; /* take node out of heap and put into store */ hind[x->i] |= WORD(1); Q[Q_len++] = x->i; Q[Q_len++] = x->j; } } else /* output coeffs require multiprecision */ { if (first) /* compute product of input poly coeffs */ { fmpz_mul(p1 + k, poly2 + x->i, poly3 + x->j); /* set output monomial */ mpoly_monomial_set(e1 + k*N, exp, N); first = 0; } else { /* addmul product of input poly coeffs */ fmpz_addmul(p1 + k, poly2 + x->i, poly3 + x->j); } /* for each node in this chain */ while ((x = x->next) != NULL) { /* addmul product of input poly coeffs */ fmpz_addmul(p1 + k, poly2 + x->i, poly3 + x->j); /* take node out of heap and put into store */ hind[x->i] |= WORD(1); Q[Q_len++] = x->i; Q[Q_len++] = x->j; } } } /* for each node temporarily stored */ while (Q_len > 0) { /* take node from store */ j = Q[--Q_len]; i = Q[--Q_len]; /* should we go right? */ if ( (i + 1 < len2) && (hind[i + 1] == 2*j + 1) ) { x = chain + i + 1; x->i = i + 1; x->j = j; x->next = NULL; hind[x->i] = 2*(x->j+1) + 0; if (bits <= FLINT_BITS) mpoly_monomial_add(exp_list[exp_next], exp2 + x->i*N, exp3 + x->j*N, N); else mpoly_monomial_add_mp(exp_list[exp_next], exp2 + x->i*N, exp3 + x->j*N, N); if (!_mpoly_heap_insert(heap, exp_list[exp_next++], x, &next_loc, &heap_len, N, cmpmask)) exp_next--; } /* should we go up? */ if ( (j + 1 < len3) && ((hind[i] & 1) == 1) && ( (i == 0) || (hind[i - 1] > 2*(j + 2) + 1) || (hind[i - 1] == 2*(j + 2) + 1) /* gcc should fuse */ ) ) { x = chain + i; x->i = i; x->j = j + 1; x->next = NULL; hind[x->i] = 2*(x->j+1) + 0; if (bits <= FLINT_BITS) mpoly_monomial_add(exp_list[exp_next], exp2 + x->i*N, exp3 + x->j*N, N); else mpoly_monomial_add_mp(exp_list[exp_next], exp2 + x->i*N, exp3 + x->j*N, N); if (!_mpoly_heap_insert(heap, exp_list[exp_next++], x, &next_loc, &heap_len, N, cmpmask)) exp_next--; } } /* set output poly coeff from temporary accumulation, if not multiprec */ if (small) fmpz_set_signed_uiuiui(p1 + k, c[2], c[1], c[0]); if (fmpz_is_zero(p1 + k)) k--; } k++; (*poly1) = p1; (*exp1) = e1; TMP_END; return k; } /* maxBfields gets clobbered */ void _fmpz_mpoly_mul_johnson_maxfields( fmpz_mpoly_t A, const fmpz_mpoly_t B, fmpz * maxBfields, const fmpz_mpoly_t C, fmpz * maxCfields, const fmpz_mpoly_ctx_t ctx) { slong N, Alen; flint_bitcnt_t Abits; ulong * cmpmask; ulong * Bexp, * Cexp; int freeBexp, freeCexp; TMP_INIT; TMP_START; _fmpz_vec_add(maxBfields, maxBfields, maxCfields, ctx->minfo->nfields); Abits = _fmpz_vec_max_bits(maxBfields, ctx->minfo->nfields); Abits = FLINT_MAX(MPOLY_MIN_BITS, Abits + 1); Abits = FLINT_MAX(Abits, B->bits); Abits = FLINT_MAX(Abits, C->bits); Abits = mpoly_fix_bits(Abits, ctx->minfo); N = mpoly_words_per_exp(Abits, ctx->minfo); cmpmask = (ulong *) TMP_ALLOC(N*sizeof(ulong)); mpoly_get_cmpmask(cmpmask, N, Abits, ctx->minfo); /* ensure input exponents are packed into same sized fields as output */ freeBexp = 0; Bexp = B->exps; if (Abits > B->bits) { freeBexp = 1; Bexp = (ulong *) flint_malloc(N*B->length*sizeof(ulong)); mpoly_repack_monomials(Bexp, Abits, B->exps, B->bits, B->length, ctx->minfo); } freeCexp = 0; Cexp = C->exps; if (Abits > C->bits) { freeCexp = 1; Cexp = (ulong *) flint_malloc(N*C->length*sizeof(ulong)); mpoly_repack_monomials(Cexp, Abits, C->exps, C->bits, C->length, ctx->minfo); } /* deal with aliasing and do multiplication */ if (A == B || A == C) { fmpz_mpoly_t T; fmpz_mpoly_init2(T, B->length + C->length - 1, ctx); fmpz_mpoly_fit_bits(T, Abits, ctx); T->bits = Abits; /* algorithm more efficient if smaller poly first */ if (B->length > C->length) { Alen = _fmpz_mpoly_mul_johnson(&T->coeffs, &T->exps, &T->alloc, C->coeffs, Cexp, C->length, B->coeffs, Bexp, B->length, Abits, N, cmpmask); } else { Alen = _fmpz_mpoly_mul_johnson(&T->coeffs, &T->exps, &T->alloc, B->coeffs, Bexp, B->length, C->coeffs, Cexp, C->length, Abits, N, cmpmask); } fmpz_mpoly_swap(T, A, ctx); fmpz_mpoly_clear(T, ctx); } else { fmpz_mpoly_fit_length(A, B->length + C->length - 1, ctx); fmpz_mpoly_fit_bits(A, Abits, ctx); A->bits = Abits; /* algorithm more efficient if smaller poly first */ if (B->length > C->length) { Alen = _fmpz_mpoly_mul_johnson(&A->coeffs, &A->exps, &A->alloc, C->coeffs, Cexp, C->length, B->coeffs, Bexp, B->length, Abits, N, cmpmask); } else { Alen = _fmpz_mpoly_mul_johnson(&A->coeffs, &A->exps, &A->alloc, B->coeffs, Bexp, B->length, C->coeffs, Cexp, C->length, Abits, N, cmpmask); } } if (freeBexp) flint_free(Bexp); if (freeCexp) flint_free(Cexp); _fmpz_mpoly_set_length(A, Alen, ctx); TMP_END; } void fmpz_mpoly_mul_johnson( fmpz_mpoly_t A, const fmpz_mpoly_t B, const fmpz_mpoly_t C, const fmpz_mpoly_ctx_t ctx) { slong i; fmpz * maxBfields, * maxCfields; TMP_INIT; if (B->length == 0 || C->length == 0) { fmpz_mpoly_zero(A, ctx); return; } TMP_START; maxBfields = (fmpz *) TMP_ALLOC(ctx->minfo->nfields*sizeof(fmpz)); maxCfields = (fmpz *) TMP_ALLOC(ctx->minfo->nfields*sizeof(fmpz)); for (i = 0; i < ctx->minfo->nfields; i++) { fmpz_init(maxBfields + i); fmpz_init(maxCfields + i); } mpoly_max_fields_fmpz(maxBfields, B->exps, B->length, B->bits, ctx->minfo); mpoly_max_fields_fmpz(maxCfields, C->exps, C->length, C->bits, ctx->minfo); _fmpz_mpoly_mul_johnson_maxfields(A, B, maxBfields, C, maxCfields, ctx); for (i = 0; i < ctx->minfo->nfields; i++) { fmpz_clear(maxBfields + i); fmpz_clear(maxCfields + i); } TMP_END; }