1 /*
2     Copyright (C) 2016 William Hart
3     Copyright (C) 2020 Daniel Schultz
4 
5     This file is part of FLINT.
6 
7     FLINT is free software: you can redistribute it and/or modify it under
8     the terms of the GNU Lesser General Public License (LGPL) as published
9     by the Free Software Foundation; either version 2.1 of the License, or
10     (at your option) any later version.  See <https://www.gnu.org/licenses/>.
11 */
12 
13 #include "fmpz_mpoly.h"
14 
15 
_fmpz_mpoly_add1(fmpz * Acoeffs,ulong * Aexps,const fmpz * Bcoeffs,const ulong * Bexps,slong Blen,const fmpz * Ccoeffs,const ulong * Cexps,slong Clen,ulong maskhi)16 slong _fmpz_mpoly_add1(
17     fmpz * Acoeffs, ulong * Aexps,
18     const fmpz * Bcoeffs, const ulong * Bexps, slong Blen,
19     const fmpz * Ccoeffs, const ulong * Cexps, slong Clen,
20     ulong maskhi)
21 {
22     slong i = 0, j = 0, k = 0;
23 
24     while (i < Blen && j < Clen)
25     {
26         if ((Bexps[i]^maskhi) > (Cexps[j]^maskhi))
27         {
28             Aexps[k] = Bexps[i];
29             fmpz_set(Acoeffs + k, Bcoeffs + i);
30             i++;
31             k++;
32         }
33         else if ((Bexps[i]^maskhi) == (Cexps[j]^maskhi))
34         {
35             Aexps[k] = Bexps[i];
36             fmpz_add(Acoeffs + k, Bcoeffs + i, Ccoeffs + j);
37             k += !fmpz_is_zero(Acoeffs + k);
38             i++;
39             j++;
40         }
41         else
42         {
43             Aexps[k] = Cexps[j];
44             fmpz_set(Acoeffs + k, Ccoeffs + j);
45             j++;
46             k++;
47         }
48     }
49 
50     while (i < Blen)
51     {
52         Aexps[k] = Bexps[i];
53         fmpz_set(Acoeffs + k, Bcoeffs + i);
54         i++;
55         k++;
56     }
57 
58     while (j < Clen)
59     {
60         Aexps[k] = Cexps[j];
61         fmpz_set(Acoeffs + k, Ccoeffs + j);
62         j++;
63         k++;
64     }
65 
66    return k;
67 }
68 
_fmpz_mpoly_add(fmpz * Acoeffs,ulong * Aexps,const fmpz * Bcoeffs,const ulong * Bexps,slong Blen,const fmpz * Ccoeffs,const ulong * Cexps,slong Clen,slong N,const ulong * cmpmask)69 slong _fmpz_mpoly_add(
70     fmpz * Acoeffs, ulong * Aexps,
71     const fmpz * Bcoeffs, const ulong * Bexps, slong Blen,
72     const fmpz * Ccoeffs, const ulong * Cexps, slong Clen,
73     slong N,
74     const ulong * cmpmask)
75 {
76     slong i = 0, j = 0, k = 0;
77 
78     if (N == 1)
79     {
80         return _fmpz_mpoly_add1(Acoeffs, Aexps, Bcoeffs, Bexps, Blen,
81                                              Ccoeffs, Cexps, Clen, cmpmask[0]);
82     }
83 
84     while (i < Blen && j < Clen)
85     {
86         int cmp = mpoly_monomial_cmp(Bexps + i*N, Cexps + j*N, N, cmpmask);
87 
88         if (cmp > 0)
89         {
90             mpoly_monomial_set(Aexps + k*N, Bexps + i*N, N);
91             fmpz_set(Acoeffs + k, Bcoeffs + i);
92             i++;
93             k++;
94         }
95         else if (cmp == 0)
96         {
97             mpoly_monomial_set(Aexps + k*N, Bexps + i*N, N);
98             fmpz_add(Acoeffs + k, Bcoeffs + i, Ccoeffs + j);
99             k += !fmpz_is_zero(Acoeffs + k);
100             i++;
101             j++;
102         }
103         else
104         {
105             mpoly_monomial_set(Aexps + k*N, Cexps + j*N, N);
106             fmpz_set(Acoeffs + k, Ccoeffs + j);
107             j++;
108             k++;
109         }
110     }
111 
112     while (i < Blen)
113     {
114         mpoly_monomial_set(Aexps + k*N, Bexps + i*N, N);
115         fmpz_set(Acoeffs + k, Bcoeffs + i);
116         i++;
117         k++;
118     }
119 
120     while (j < Clen)
121     {
122         mpoly_monomial_set(Aexps + k*N, Cexps + j*N, N);
123         fmpz_set(Acoeffs + k, Ccoeffs + j);
124         j++;
125         k++;
126     }
127 
128     return k;
129 }
130 
fmpz_mpoly_add_inplace(fmpz_mpoly_t A,const fmpz_mpoly_t B,const fmpz_mpoly_ctx_t ctx)131 void fmpz_mpoly_add_inplace(fmpz_mpoly_t A, const fmpz_mpoly_t B,
132                                                     const fmpz_mpoly_ctx_t ctx)
133 {
134     slong i, s, new_len, N;
135     slong Alen = A->length;
136     slong Blen = B->length;
137     ulong * Bexps, * cmpmask;
138     int cmp, freeBexps;
139     flint_bitcnt_t Abits;
140     fmpz_mpoly_t T;
141     TMP_INIT;
142 
143     FLINT_ASSERT(A != B);
144     FLINT_ASSERT(Alen > 0);
145     FLINT_ASSERT(Blen > 0);
146 
147     TMP_START;
148 
149     if (A->bits <= B->bits)
150     {
151         Abits = B->bits;
152         if (A->bits < B->bits)
153             fmpz_mpoly_repack_bits_inplace(A, Abits, ctx);
154         N = mpoly_words_per_exp(Abits, ctx->minfo);
155         Bexps = B->exps;
156         freeBexps = 0;
157     }
158     else
159     {
160         Abits = A->bits;
161         N = mpoly_words_per_exp(Abits, ctx->minfo);
162         Bexps = (ulong *) flint_malloc(N*Blen*sizeof(ulong));
163         mpoly_repack_monomials(Bexps, Abits, B->exps, B->bits, Blen, ctx->minfo);
164         freeBexps = 1;
165     }
166 
167     cmpmask = (ulong *) TMP_ALLOC(N*sizeof(ulong));
168     mpoly_get_cmpmask(cmpmask, N, Abits, ctx->minfo);
169 
170     /*
171         We will move s terms from the end of A. The complexity is roughly
172 
173             s               from search
174             s               from move
175             s + B->length   from addition
176             s               from move cleanup
177 
178         So as long as 4*s + B->length < A->length + B->length, it is
179         technically a win over the simple T = A + B; swap(A, T)
180     */
181 
182     for (s = 0; s < Alen/4; s++)
183     {
184         cmp = mpoly_monomial_cmp(A->exps + N*(Alen - s - 1),
185                                  Bexps + N*0, N, cmpmask);
186         if (cmp >= 0)
187         {
188             s += (cmp == 0);
189             goto doit;
190         }
191     }
192 
193     fmpz_mpoly_init3(T, Alen + Blen, Abits, ctx);
194     T->length = _fmpz_mpoly_add(T->coeffs, T->exps,
195                                   A->coeffs, A->exps, Alen,
196                                   B->coeffs, Bexps, Blen, N, cmpmask);
197     fmpz_mpoly_swap(A, T, ctx);
198     fmpz_mpoly_clear(T, ctx);
199     goto cleanup;
200 
201 doit:
202 
203     FLINT_ASSERT(0 <= s && s <= Alen);
204 
205     FLINT_ASSERT(s == 0 || mpoly_monomial_cmp(A->exps + N*(Alen - s),
206                                                 Bexps + N*0, N, cmpmask) <= 0);
207 
208     FLINT_ASSERT(s == Alen || mpoly_monomial_cmp(A->exps + N*(Alen - s - 1),
209                                                  Bexps + N*0, N, cmpmask) > 0);
210 
211     fmpz_mpoly_fit_length(A, Alen + Blen + s, ctx);
212     mpoly_copy_monomials(A->exps + N*(Alen + Blen), A->exps + N*(Alen - s), s, N);
213     _fmpz_vec_swap(A->coeffs + Alen + Blen, A->coeffs + Alen - s, s);
214 
215     new_len = _fmpz_mpoly_add(A->coeffs + Alen - s, A->exps + N*(Alen - s),
216                      A->coeffs + (Alen + Blen), A->exps + N*(Alen + Blen), s,
217                                            B->coeffs, Bexps, Blen, N, cmpmask);
218     for (i = 0; i < s; i++)
219         _fmpz_demote(A->coeffs + Alen + Blen + i);
220 
221     _fmpz_mpoly_set_length(A, Alen - s + new_len, ctx);
222 
223 cleanup:
224 
225     if (freeBexps)
226         flint_free(Bexps);
227 
228     TMP_END;
229 
230     return;
231 }
232 
fmpz_mpoly_add(fmpz_mpoly_t A,const fmpz_mpoly_t B,const fmpz_mpoly_t C,const fmpz_mpoly_ctx_t ctx)233 void fmpz_mpoly_add(fmpz_mpoly_t A, const fmpz_mpoly_t B,
234                               const fmpz_mpoly_t C, const fmpz_mpoly_ctx_t ctx)
235 {
236     slong Alen, N;
237     flint_bitcnt_t Abits;
238     ulong * Bexps = B->exps, * Cexps = C->exps;
239     ulong * cmpmask;
240     int freeBexps = 0, freeCexps = 0;
241     TMP_INIT;
242 
243     if (fmpz_mpoly_is_zero(B, ctx))
244     {
245         fmpz_mpoly_set(A, C, ctx);
246         return;
247     }
248     else if (fmpz_mpoly_is_zero(C, ctx))
249     {
250         fmpz_mpoly_set(A, B, ctx);
251         return;
252     }
253     else if (A == B)
254     {
255         if (A == C)
256             _fmpz_vec_add(A->coeffs, A->coeffs, A->coeffs, A->length);
257         else
258             fmpz_mpoly_add_inplace(A, C, ctx);
259         return;
260     }
261     else if (A == C)
262     {
263         fmpz_mpoly_add_inplace(A, B, ctx);
264         return;
265     }
266 
267     TMP_START;
268     Abits = FLINT_MAX(B->bits, C->bits);
269     N = mpoly_words_per_exp(Abits, ctx->minfo);
270     cmpmask = (ulong *) TMP_ALLOC(N*sizeof(ulong));
271     mpoly_get_cmpmask(cmpmask, N, Abits, ctx->minfo);
272 
273     if (Abits > B->bits)
274     {
275         freeBexps = 1;
276         Bexps = (ulong *) flint_malloc(N*B->length*sizeof(ulong));
277         mpoly_repack_monomials(Bexps, Abits, B->exps, B->bits, B->length, ctx->minfo);
278     }
279 
280     if (Abits > C->bits)
281     {
282         freeCexps = 1;
283         Cexps = (ulong *) flint_malloc(N*C->length*sizeof(ulong));
284         mpoly_repack_monomials(Cexps, Abits, C->exps, C->bits, C->length, ctx->minfo);
285     }
286 
287     fmpz_mpoly_fit_length_reset_bits(A, B->length + C->length, Abits, ctx);
288 
289     Alen = _fmpz_mpoly_add(A->coeffs, A->exps,
290                                   B->coeffs, Bexps, B->length,
291                                   C->coeffs, Cexps, C->length, N, cmpmask);
292     _fmpz_mpoly_set_length(A, Alen, ctx);
293 
294     if (freeBexps)
295         flint_free(Bexps);
296 
297     if (freeCexps)
298         flint_free(Cexps);
299 
300     TMP_END;
301 }
302