1 /*
2 Copyright (C) 2018 Daniel Schultz
3
4 This file is part of FLINT.
5
6 FLINT is free software: you can redistribute it and/or modify it under
7 the terms of the GNU Lesser General Public License (LGPL) as published
8 by the Free Software Foundation; either version 2.1 of the License, or
9 (at your option) any later version. See <http://www.gnu.org/licenses/>.
10 */
11
12 #include "nmod_mpoly.h"
13
14 /* improve locality */
15 #define BLOCK 128
16 #define MAX_ARRAY_SIZE (WORD(300000))
17 #define MAX_LEX_SIZE (WORD(300))
18
19
20
_nmod_mpoly_addmul_array1_ulong1(ulong * poly1,const ulong * poly2,const ulong * exp2,slong len2,const ulong * poly3,const ulong * exp3,slong len3)21 void _nmod_mpoly_addmul_array1_ulong1(ulong * poly1,
22 const ulong * poly2, const ulong * exp2, slong len2,
23 const ulong * poly3, const ulong * exp3, slong len3)
24 {
25 slong ii, i, jj, j;
26 ulong * c2;
27
28 for (ii = 0; ii < len2 + BLOCK; ii += BLOCK)
29 {
30 for (jj = 0; jj < len3 + BLOCK; jj += BLOCK)
31 {
32 for (i = ii; i < FLINT_MIN(ii + BLOCK, len2); i++)
33 {
34 c2 = poly1 + (slong) exp2[i];
35
36 if (poly2[i] != 0)
37 {
38 for (j = jj; j < FLINT_MIN(jj + BLOCK, len3); j++)
39 {
40 c2[(slong) exp3[j]] += poly2[i]*poly3[j];
41 }
42 }
43 }
44 }
45 }
46 }
47
_nmod_mpoly_addmul_array1_ulong2(ulong * poly1,const ulong * poly2,const ulong * exp2,slong len2,const ulong * poly3,const ulong * exp3,slong len3)48 void _nmod_mpoly_addmul_array1_ulong2(ulong * poly1,
49 const ulong * poly2, const ulong * exp2, slong len2,
50 const ulong * poly3, const ulong * exp3, slong len3)
51 {
52 slong ii, i, jj, j;
53 ulong p[2];
54 ulong * c2, * c;
55
56 for (ii = 0; ii < len2 + BLOCK; ii += BLOCK)
57 {
58 for (jj = 0; jj < len3 + BLOCK; jj += BLOCK)
59 {
60 for (i = ii; i < FLINT_MIN(ii + BLOCK, len2); i++)
61 {
62 c2 = poly1 + 2*((slong) exp2[i]);
63
64 if (poly2[i] != 0)
65 {
66 for (j = jj; j < FLINT_MIN(jj + BLOCK, len3); j++)
67 {
68 c = c2 + 2*((slong) exp3[j]);
69 umul_ppmm(p[1], p[0], poly2[i], poly3[j]);
70 add_ssaaaa(c[1], c[0], c[1], c[0], p[1], p[0]);
71 }
72 }
73 }
74 }
75 }
76 }
77
_nmod_mpoly_addmul_array1_ulong3(ulong * poly1,const ulong * poly2,const ulong * exp2,slong len2,const ulong * poly3,const ulong * exp3,slong len3)78 void _nmod_mpoly_addmul_array1_ulong3(ulong * poly1,
79 const ulong * poly2, const ulong * exp2, slong len2,
80 const ulong * poly3, const ulong * exp3, slong len3)
81 {
82 slong ii, i, jj, j;
83 ulong p[2];
84 ulong * c2, * c;
85
86 for (ii = 0; ii < len2 + BLOCK; ii += BLOCK)
87 {
88 for (jj = 0; jj < len3 + BLOCK; jj += BLOCK)
89 {
90 for (i = ii; i < FLINT_MIN(ii + BLOCK, len2); i++)
91 {
92 c2 = poly1 + 3*((slong) exp2[i]);
93
94 if (poly2[i] != 0)
95 {
96 for (j = jj; j < FLINT_MIN(jj + BLOCK, len3); j++)
97 {
98 c = c2 + 3*((slong) exp3[j]);
99 umul_ppmm(p[1], p[0], poly2[i], poly3[j]);
100 add_sssaaaaaa(c[2], c[1], c[0], c[2], c[1], c[0], 0, p[1], p[0]);
101 }
102 }
103 }
104 }
105 }
106 }
107
108
109 /****************************************************
110 LEX
111 ****************************************************/
112
113
114 void mpoly_main_variable_split_LEX(slong * ind, ulong * pexp, const ulong * Aexp,
115 slong l1, slong Alen, const ulong * mults, slong num, slong Abits);
116
117
118 #define LEX_UNPACK_MACRO(fxn_name, coeff_decl, nonzero_test, reduce_coeff) \
119 slong fxn_name(nmod_mpoly_t P, slong Plen, coeff_decl, \
120 const ulong * mults, slong num, slong array_size, slong top, \
121 const nmod_mpoly_ctx_t ctx) \
122 { \
123 slong off, j; \
124 slong topmult = num == 0 ? 1 : mults[num - 1]; \
125 slong lastd = topmult - 1; \
126 slong reset = array_size/topmult; \
127 slong counter = reset; \
128 ulong startexp = (top << (P->bits*num)) + (lastd << (P->bits*(num-1))); \
129 ulong coeff; \
130 for (off = array_size - 1; off >= 0; off--) \
131 { \
132 if (nonzero_test) \
133 { \
134 reduce_coeff \
135 if (coeff != UWORD(0)) \
136 { \
137 slong d = off; \
138 ulong exp = startexp; \
139 for (j = 0; j + 1 < num; j++) { \
140 exp += (d % mults[j]) << (P->bits*j); \
141 d = d / mults[j]; \
142 } \
143 _nmod_mpoly_fit_length(&P->coeffs, &P->exps, &P->alloc, \
144 Plen + 1, 1); \
145 P->exps[Plen] = exp; \
146 P->coeffs[Plen] = coeff; \
147 Plen++; \
148 } \
149 } \
150 counter--; \
151 if (counter <= 0) { \
152 counter = reset; \
153 lastd--; \
154 startexp -= UWORD(1) << (P->bits*(num-1)); \
155 } \
156 } \
157 return Plen; \
158 }
159
160 /*
161 These four functions will replace
162 _fmpz_mpoly_from_ulong_array, ..., _fmpz_mpoly_from_fmpz_array
163 defined above.
164 */
165
166 LEX_UNPACK_MACRO(
167 nmod_mpoly_append_array_sm1_LEX, ulong * coeff_array
168 ,
169 coeff_array[off] != UWORD(0)
170 ,
171 NMOD_RED(coeff, coeff_array[off], ctx->ffinfo->mod);
172 coeff_array[off] = 0;
173 )
174
175 LEX_UNPACK_MACRO(
176 nmod_mpoly_append_array_sm2_LEX, ulong * coeff_array
177 ,
178 (coeff_array[2*off + 0] || coeff_array[2*off + 1]) != UWORD(0)
179 ,
180 NMOD2_RED2(coeff, coeff_array[2*off + 1], coeff_array[2*off + 0], ctx->ffinfo->mod);
181 coeff_array[2*off + 0] = coeff_array[2*off + 1] = 0;
182 )
183
184 LEX_UNPACK_MACRO(
185 nmod_mpoly_append_array_sm3_LEX, ulong * coeff_array
186 ,
187 (coeff_array[3*off + 0] || coeff_array[3*off + 1] || coeff_array[3*off + 2]) != UWORD(0)
188 ,
189 NMOD_RED3(coeff, coeff_array[3*off + 2], coeff_array[3*off + 1], coeff_array[3*off + 0], ctx->ffinfo->mod);
190 coeff_array[3*off + 0] = coeff_array[3*off + 1] = coeff_array[3*off + 2] = UWORD(0);
191 )
192
193
194
_nmod_mpoly_mul_array_chunked_LEX(nmod_mpoly_t P,const nmod_mpoly_t A,const nmod_mpoly_t B,const ulong * mults,const nmod_mpoly_ctx_t ctx)195 void _nmod_mpoly_mul_array_chunked_LEX(
196 nmod_mpoly_t P,
197 const nmod_mpoly_t A,
198 const nmod_mpoly_t B,
199 const ulong * mults,
200 const nmod_mpoly_ctx_t ctx)
201 {
202 slong num = ctx->minfo->nfields - 1;
203 slong Pi, i, j, Plen, Pl, Al, Bl, array_size;
204 slong * Amain, * Bmain;
205 ulong * Apexp, * Bpexp;
206 TMP_INIT;
207
208 array_size = 1;
209 for (i = 0; i < num; i++) {
210 array_size *= mults[i];
211 }
212
213 /* compute lengths of poly2 and poly3 in chunks */
214 Al = 1 + (slong) (A->exps[0] >> (A->bits*num));
215 Bl = 1 + (slong) (B->exps[0] >> (B->bits*num));
216
217 TMP_START;
218
219 /* compute indices and lengths of coefficients of polys in main variable */
220 Amain = (slong *) TMP_ALLOC((Al + 1)*sizeof(slong));
221 Bmain = (slong *) TMP_ALLOC((Bl + 1)*sizeof(slong));
222 Apexp = (ulong *) flint_malloc(A->length*sizeof(ulong));
223 Bpexp = (ulong *) flint_malloc(B->length*sizeof(ulong));
224 mpoly_main_variable_split_LEX(Amain, Apexp, A->exps, Al, A->length, mults, num, A->bits);
225 mpoly_main_variable_split_LEX(Bmain, Bpexp, B->exps, Bl, B->length, mults, num, B->bits);
226
227 Pl = Al + Bl - 1;
228 Plen = 0;
229
230 {
231 ulong * coeff_array = (ulong *) TMP_ALLOC(3*array_size*sizeof(ulong));
232 for (j = 0; j < 3*array_size; j++)
233 coeff_array[j] = 0;
234
235 /* for each output chunk */
236 for (Pi = 0; Pi < Pl; Pi++)
237 {
238 /* compute bound on coeffs of output chunk */
239 slong len = 0;
240 mp_limb_t t2, t1, t0, u1, u0;
241
242 for (i = 0, j = Pi; i < Al && j >= 0; i++, j--)
243 {
244 if (j < Bl)
245 {
246 len += FLINT_MIN(Amain[i + 1] - Amain[i],
247 Bmain[j + 1] - Bmain[j]);
248 }
249 }
250
251 umul_ppmm(t1, t0, ctx->ffinfo->mod.n - 1, ctx->ffinfo->mod.n - 1);
252 umul_ppmm(t2, t1, t1, len);
253 umul_ppmm(u1, u0, t0, len);
254 add_sssaaaaaa(t2, t1, t0, t2, t1, UWORD(0), UWORD(0), u1, u0);
255
256 if (t2 != UWORD(0))
257 {
258 /* need three words */
259 for (i = 0, j = Pi; i < Al && j >= 0; i++, j--)
260 {
261 if (j < Bl)
262 {
263 _nmod_mpoly_addmul_array1_ulong3(coeff_array,
264 A->coeffs + Amain[i],
265 Apexp + Amain[i], Amain[i + 1] - Amain[i],
266 B->coeffs + Bmain[j],
267 Bpexp + Bmain[j], Bmain[j + 1] - Bmain[j]);
268 }
269 }
270 Plen = nmod_mpoly_append_array_sm3_LEX(P, Plen, coeff_array,
271 mults, num, array_size, Pl - Pi - 1, ctx);
272
273 } else if (t1 != UWORD(0))
274 {
275 /* fits into two words */
276 for (i = 0, j = Pi; i < Al && j >= 0; i++, j--)
277 {
278 if (j < Bl)
279 {
280 _nmod_mpoly_addmul_array1_ulong2(coeff_array,
281 A->coeffs + Amain[i],
282 Apexp + Amain[i], Amain[i + 1] - Amain[i],
283 B->coeffs + Bmain[j],
284 Bpexp + Bmain[j], Bmain[j + 1] - Bmain[j]);
285
286 }
287 }
288 Plen = nmod_mpoly_append_array_sm2_LEX(P, Plen, coeff_array,
289 mults, num, array_size, Pl - Pi - 1, ctx);
290
291 } else
292 {
293 /* fits into one word */
294 for (i = 0, j = Pi; i < Al && j >= 0; i++, j--)
295 {
296 if (j < Bl)
297 {
298 _nmod_mpoly_addmul_array1_ulong1(coeff_array,
299 A->coeffs + Amain[i],
300 Apexp + Amain[i], Amain[i + 1] - Amain[i],
301 B->coeffs + Bmain[j],
302 Bpexp + Bmain[j], Bmain[j + 1] - Bmain[j]);
303 }
304 }
305 Plen = nmod_mpoly_append_array_sm1_LEX(P, Plen, coeff_array,
306 mults, num, array_size, Pl - Pi - 1, ctx);
307 }
308 }
309 }
310
311 _nmod_mpoly_set_length(P, Plen, ctx);
312
313 flint_free(Apexp);
314 flint_free(Bpexp);
315 TMP_END;
316 }
317
318
319
_nmod_mpoly_mul_array_LEX(nmod_mpoly_t A,const nmod_mpoly_t B,fmpz * maxBfields,const nmod_mpoly_t C,fmpz * maxCfields,const nmod_mpoly_ctx_t ctx)320 int _nmod_mpoly_mul_array_LEX(
321 nmod_mpoly_t A,
322 const nmod_mpoly_t B,
323 fmpz * maxBfields,
324 const nmod_mpoly_t C,
325 fmpz * maxCfields,
326 const nmod_mpoly_ctx_t ctx)
327 {
328 slong i, exp_bits, array_size;
329 ulong max, * mults;
330 int success;
331 TMP_INIT;
332
333 FLINT_ASSERT(B->length != 0);
334 FLINT_ASSERT(C->length != 0);
335
336 FLINT_ASSERT(ctx->minfo->ord == ORD_LEX);
337
338 FLINT_ASSERT(1 == mpoly_words_per_exp(B->bits, ctx->minfo));
339 FLINT_ASSERT(1 == mpoly_words_per_exp(C->bits, ctx->minfo));
340
341 TMP_START;
342
343 /* compute maximum exponents for each variable */
344 mults = (ulong *) TMP_ALLOC(ctx->minfo->nfields*sizeof(ulong));
345
346 /* the field of index n-1 is the one that wil be pulled out */
347 i = ctx->minfo->nfields - 1;
348 FLINT_ASSERT(fmpz_fits_si(maxBfields + i));
349 FLINT_ASSERT(fmpz_fits_si(maxCfields + i));
350 mults[i] = 1 + fmpz_get_ui(maxBfields + i) + fmpz_get_ui(maxCfields + i);
351 max = mults[i];
352 if (((slong) mults[i]) <= 0 || mults[i] > MAX_LEX_SIZE)
353 {
354 success = 0;
355 goto cleanup;
356 }
357
358 /* the fields of index n-2...0, contribute to the array size */
359 array_size = WORD(1);
360 for (i--; i >= 0; i--)
361 {
362 ulong hi;
363 FLINT_ASSERT(fmpz_fits_si(maxBfields + i));
364 FLINT_ASSERT(fmpz_fits_si(maxCfields + i));
365 mults[i] = 1 + fmpz_get_ui(maxBfields + i) + fmpz_get_ui(maxCfields + i);
366 max |= mults[i];
367 umul_ppmm(hi, array_size, array_size, mults[i]);
368 if (hi != 0 || (slong) mults[i] <= 0
369 || array_size <= 0
370 || array_size > MAX_ARRAY_SIZE)
371 {
372 success = 0;
373 goto cleanup;
374 }
375 }
376
377 exp_bits = FLINT_MAX(MPOLY_MIN_BITS, FLINT_BIT_COUNT(max) + 1);
378 exp_bits = mpoly_fix_bits(exp_bits, ctx->minfo);
379
380 /* array multiplication assumes result fit into 1 word */
381 if (1 != mpoly_words_per_exp(exp_bits, ctx->minfo))
382 {
383 success = 0;
384 goto cleanup;
385 }
386
387 /* handle aliasing and do array multiplication */
388 if (A == B || A == C)
389 {
390 nmod_mpoly_t T;
391 nmod_mpoly_init3(T, B->length + C->length - 1, exp_bits, ctx);
392 _nmod_mpoly_mul_array_chunked_LEX(T, C, B, mults, ctx);
393 nmod_mpoly_swap(T, A, ctx);
394 nmod_mpoly_clear(T, ctx);
395 }
396 else
397 {
398 nmod_mpoly_fit_length(A, B->length + C->length - 1, ctx);
399 nmod_mpoly_fit_bits(A, exp_bits, ctx);
400 A->bits = exp_bits;
401 _nmod_mpoly_mul_array_chunked_LEX(A, C, B, mults, ctx);
402 }
403 success = 1;
404
405 cleanup:
406
407 TMP_END;
408
409 return success;
410 }
411
412
413
414
415 /****************************************************
416 DEGLEX and DEGREVLEX
417 ****************************************************/
418
419 void mpoly_main_variable_split_DEG(slong * ind, ulong * pexp, const ulong * Aexp,
420 slong l1, slong Alen, ulong deg, slong num, slong Abits);
421
422
423
424 #define DEGLEX_UNPACK_MACRO(fxn_name, coeff_decl, nonzero_test, reduce_coeff) \
425 slong fxn_name(nmod_mpoly_t P, slong Plen, coeff_decl, \
426 slong top, slong nvars, slong degb, \
427 const nmod_mpoly_ctx_t ctx) \
428 { \
429 slong i; \
430 ulong exp, lomask = (UWORD(1) << (P->bits - 1)) - 1; \
431 slong off, array_size; \
432 slong * curexp, * degpow; \
433 ulong * oneexp; \
434 ulong coeff; \
435 int carry; \
436 TMP_INIT; \
437 \
438 TMP_START; \
439 curexp = (slong *) TMP_ALLOC(nvars*sizeof(slong)); \
440 degpow = (slong *) TMP_ALLOC(nvars*sizeof(slong)); \
441 oneexp = (ulong *) TMP_ALLOC(nvars*sizeof(ulong)); \
442 array_size = 1; \
443 curexp[0] = 0; \
444 oneexp[0] = 0; \
445 degpow[0] = 1; \
446 for (i = 0; i < nvars-1; i++) \
447 { \
448 curexp[i] = 0; \
449 degpow[i] = array_size; \
450 oneexp[i] = (UWORD(1) << (P->bits*(i+1))) - UWORD(1); \
451 array_size *= degb; \
452 } \
453 off = 0; \
454 if (nvars > 1) \
455 { \
456 curexp[nvars - 2] = top; \
457 off = top * degpow[nvars - 2]; \
458 } \
459 exp = (top << (P->bits*nvars)) + (top << (P->bits*(nvars-1))); \
460 \
461 carry = 1; \
462 do { \
463 if (nonzero_test) \
464 { \
465 reduce_coeff \
466 if (coeff != UWORD(0)) \
467 { \
468 _nmod_mpoly_fit_length(&P->coeffs, &P->exps, &P->alloc, \
469 Plen + 1, 1); \
470 P->exps[Plen] = exp; \
471 P->coeffs[Plen] = coeff; \
472 Plen++; \
473 } \
474 } \
475 \
476 exp -= oneexp[0]; \
477 off -= 1; \
478 curexp[0] -= 1; \
479 if (curexp[0] >= 0) \
480 { \
481 carry = 0; \
482 } else \
483 { \
484 exp -= curexp[0]*oneexp[0]; \
485 off -= curexp[0]; \
486 curexp[0] = 0; \
487 carry = 1; \
488 \
489 for (i = 1; i < nvars - 1; i++) \
490 { \
491 exp -= oneexp[i]; \
492 off -= degpow[i]; \
493 curexp[i] -= 1; \
494 if (curexp[i] < 0) \
495 { \
496 exp -= curexp[i]*oneexp[i]; \
497 off -= curexp[i]*degpow[i]; \
498 curexp[i] = 0; \
499 carry = 1; \
500 } else \
501 { \
502 ulong t = exp & lomask; \
503 off += t*degpow[i - 1]; \
504 curexp[i - 1] = t; \
505 exp += t*oneexp[i - 1]; \
506 carry = 0; \
507 break; \
508 } \
509 } \
510 } \
511 } while (!carry); \
512 \
513 TMP_END; \
514 \
515 return Plen; \
516 }
517
518 DEGLEX_UNPACK_MACRO(
519 nmod_mpoly_append_array_sm1_DEGLEX, ulong * coeff_array
520 ,
521 coeff_array[off] != UWORD(0)
522 ,
523 NMOD_RED(coeff, coeff_array[off], ctx->ffinfo->mod);
524 coeff_array[off] = 0;
525 )
526
527 DEGLEX_UNPACK_MACRO(
528 nmod_mpoly_append_array_sm2_DEGLEX, ulong * coeff_array
529 ,
530 (coeff_array[2*off + 0] || coeff_array[2*off + 1]) != UWORD(0)
531 ,
532 NMOD2_RED2(coeff, coeff_array[2*off + 1], coeff_array[2*off + 0], ctx->ffinfo->mod);
533 coeff_array[2*off + 0] = coeff_array[2*off + 1] = 0;
534 )
535
536 DEGLEX_UNPACK_MACRO(
537 nmod_mpoly_append_array_sm3_DEGLEX, ulong * coeff_array
538 ,
539 (coeff_array[3*off + 0] || coeff_array[3*off + 1] || coeff_array[3*off + 2]) != UWORD(0)
540 ,
541 NMOD_RED3(coeff, coeff_array[3*off + 2], coeff_array[3*off + 1], coeff_array[3*off + 0], ctx->ffinfo->mod);
542 coeff_array[3*off + 0] = coeff_array[3*off + 1] = coeff_array[3*off + 2] = 0;
543 )
544
545
546
547
548
549 #define DEGREVLEX_UNPACK_MACRO(fxn_name, coeff_decl, nonzero_test, reduce_coeff) \
550 slong fxn_name(nmod_mpoly_t P, slong Plen, coeff_decl, \
551 slong top, slong nvars, slong degb, \
552 const nmod_mpoly_ctx_t ctx) \
553 { \
554 slong i; \
555 ulong exp, mask = UWORD(1) << (P->bits - 1); \
556 slong off, array_size; \
557 slong * curexp, * degpow; \
558 ulong * oneexp; \
559 ulong coeff; \
560 int carry; \
561 TMP_INIT; \
562 \
563 TMP_START; \
564 curexp = (slong *) TMP_ALLOC(nvars*sizeof(slong)); \
565 degpow = (slong *) TMP_ALLOC(nvars*sizeof(slong)); \
566 oneexp = (ulong *) TMP_ALLOC(nvars*sizeof(ulong)); \
567 array_size = 1; \
568 oneexp[0] = 0; \
569 for (i = 0; i < nvars-1; i++) { \
570 curexp[i] = 0; \
571 degpow[i] = array_size; \
572 oneexp[i] = (UWORD(1) << (P->bits*(i+1))) - UWORD(1); \
573 array_size *= degb; \
574 } \
575 \
576 off = 0; \
577 exp = (top << (P->bits*nvars)) + top; \
578 \
579 do { \
580 if (nonzero_test) \
581 { \
582 reduce_coeff \
583 if (coeff != UWORD(0)) \
584 { \
585 _nmod_mpoly_fit_length(&P->coeffs, &P->exps, &P->alloc, \
586 Plen + 1, 1); \
587 P->exps[Plen] = exp; \
588 P->coeffs[Plen] = coeff; \
589 Plen++; \
590 } \
591 } \
592 \
593 exp += oneexp[0]; \
594 off += 1; \
595 curexp[0] += 1; \
596 if ((exp & mask) == 0) \
597 { \
598 carry = (nvars - 1 == 0); \
599 } else \
600 { \
601 carry = 1; \
602 exp -= curexp[0]*oneexp[0]; \
603 off -= curexp[0]; \
604 curexp[0] = 0; \
605 for (i = 1; i < nvars - 1; i++) \
606 { \
607 exp += oneexp[i]; \
608 off += degpow[i]; \
609 curexp[i] += 1; \
610 if ((exp & mask) == 0) \
611 { \
612 carry = 0; \
613 break; \
614 } else { \
615 carry = 1; \
616 exp -= curexp[i]*oneexp[i]; \
617 off -= curexp[i]*degpow[i]; \
618 curexp[i] = 0; \
619 } \
620 } \
621 } \
622 } while (!carry); \
623 \
624 TMP_END; \
625 \
626 return Plen; \
627 }
628
629 DEGREVLEX_UNPACK_MACRO(
630 nmod_mpoly_append_array_sm1_DEGREVLEX, ulong * coeff_array
631 ,
632 coeff_array[off] != WORD(0)
633 ,
634 NMOD_RED(coeff, coeff_array[off], ctx->ffinfo->mod);
635 coeff_array[off] = 0;
636 )
637
638 DEGREVLEX_UNPACK_MACRO(
639 nmod_mpoly_append_array_sm2_DEGREVLEX, ulong * coeff_array
640 ,
641 (coeff_array[2*off + 0] || coeff_array[2*off + 1]) != WORD(0)
642 ,
643 NMOD2_RED2(coeff, coeff_array[2*off + 1], coeff_array[2*off + 0], ctx->ffinfo->mod);
644 coeff_array[2*off + 0] = coeff_array[2*off + 1] = 0;
645 )
646
647 DEGREVLEX_UNPACK_MACRO(
648 nmod_mpoly_append_array_sm3_DEGREVLEX, ulong * coeff_array
649 ,
650 (coeff_array[3*off + 0] || coeff_array[3*off + 1] || coeff_array[3*off + 2]) != WORD(0)
651 ,
652 NMOD_RED3(coeff, coeff_array[3*off + 2], coeff_array[3*off + 1], coeff_array[3*off + 0], ctx->ffinfo->mod);
653 coeff_array[3*off + 0] = coeff_array[3*off + 1] = coeff_array[3*off + 2] = 0;
654 )
655
656
_nmod_mpoly_mul_array_chunked_DEG(nmod_mpoly_t P,const nmod_mpoly_t A,const nmod_mpoly_t B,ulong degb,const nmod_mpoly_ctx_t ctx)657 void _nmod_mpoly_mul_array_chunked_DEG(
658 nmod_mpoly_t P,
659 const nmod_mpoly_t A,
660 const nmod_mpoly_t B,
661 ulong degb,
662 const nmod_mpoly_ctx_t ctx)
663 {
664 slong nvars = ctx->minfo->nvars;
665 slong Pi, i, j, Plen, Pl, Al, Bl, array_size;
666 slong * Amain, * Bmain;
667 ulong * Apexp, * Bpexp;
668 slong (* upack_sm1)(nmod_mpoly_t, slong, ulong *, slong, slong, slong, const nmod_mpoly_ctx_t);
669 slong (* upack_sm2)(nmod_mpoly_t, slong, ulong *, slong, slong, slong, const nmod_mpoly_ctx_t);
670 slong (* upack_sm3)(nmod_mpoly_t, slong, ulong *, slong, slong, slong, const nmod_mpoly_ctx_t);
671 TMP_INIT;
672
673 TMP_START;
674
675 /* compute lengths of poly2 and poly3 in chunks */
676 Al = 1 + (slong) (A->exps[0] >> (A->bits*nvars));
677 Bl = 1 + (slong) (B->exps[0] >> (B->bits*nvars));
678
679 array_size = 1;
680 for (i = 0; i < nvars-1; i++) {
681 array_size *= degb;
682 }
683
684 upack_sm1 = &nmod_mpoly_append_array_sm1_DEGLEX;
685 upack_sm2 = &nmod_mpoly_append_array_sm2_DEGLEX;
686 upack_sm3 = &nmod_mpoly_append_array_sm3_DEGLEX;
687 if (ctx->minfo->ord == ORD_DEGREVLEX) {
688 upack_sm1 = &nmod_mpoly_append_array_sm1_DEGREVLEX;
689 upack_sm2 = &nmod_mpoly_append_array_sm2_DEGREVLEX;
690 upack_sm3 = &nmod_mpoly_append_array_sm3_DEGREVLEX;
691 }
692
693 /* compute indices and lengths of coefficients of polys in main variable */
694 Amain = (slong *) TMP_ALLOC((Al + 1)*sizeof(slong));
695 Bmain = (slong *) TMP_ALLOC((Bl + 1)*sizeof(slong));
696 Apexp = (ulong *) flint_malloc(A->length*sizeof(ulong));
697 Bpexp = (ulong *) flint_malloc(B->length*sizeof(ulong));
698 mpoly_main_variable_split_DEG(Amain, Apexp, A->exps, Al, A->length,
699 degb, nvars, A->bits);
700 mpoly_main_variable_split_DEG(Bmain, Bpexp, B->exps, Bl, B->length,
701 degb, nvars, B->bits);
702
703 Pl = Al + Bl - 1;
704 FLINT_ASSERT(Pl == degb);
705 Plen = 0;
706
707 {
708 ulong * coeff_array = (ulong *) TMP_ALLOC(3*array_size*sizeof(ulong));
709 for (j = 0; j < 3*array_size; j++)
710 coeff_array[j] = 0;
711
712 /* for each output chunk */
713 for (Pi = 0; Pi < Pl; Pi++)
714 {
715 /* compute bound on coeffs of output chunk */
716 slong len = 0;
717 mp_limb_t t2, t1, t0, u1, u0;
718
719 for (i = 0, j = Pi; i < Al && j >= 0; i++, j--)
720 {
721 if (j < Bl)
722 {
723 len += FLINT_MIN(Amain[i + 1] - Amain[i],
724 Bmain[j + 1] - Bmain[j]);
725 }
726 }
727
728 umul_ppmm(t1, t0, ctx->ffinfo->mod.n - 1, ctx->ffinfo->mod.n - 1);
729 umul_ppmm(t2, t1, t1, len);
730 umul_ppmm(u1, u0, t0, len);
731 add_sssaaaaaa(t2, t1, t0, t2, t1, UWORD(0), UWORD(0), u1, u0);
732
733 if (t2 != UWORD(0))
734 {
735 /* need three words */
736 for (i = 0, j = Pi; i < Al && j >= 0; i++, j--)
737 {
738 if (j < Bl)
739 {
740 _nmod_mpoly_addmul_array1_ulong3(coeff_array,
741 A->coeffs + Amain[i],
742 Apexp + Amain[i], Amain[i + 1] - Amain[i],
743 B->coeffs + Bmain[j],
744 Bpexp + Bmain[j], Bmain[j + 1] - Bmain[j]);
745 }
746 }
747
748 Plen = upack_sm3(P, Plen, coeff_array, Pl - Pi - 1, nvars, degb, ctx);
749
750 } else if (t1 != UWORD(0))
751 {
752 /* fits into two words */
753 for (i = 0, j = Pi; i < Al && j >= 0; i++, j--)
754 {
755 if (j < Bl)
756 {
757 _nmod_mpoly_addmul_array1_ulong2(coeff_array,
758 A->coeffs + Amain[i],
759 Apexp + Amain[i], Amain[i + 1] - Amain[i],
760 B->coeffs + Bmain[j],
761 Bpexp + Bmain[j], Bmain[j + 1] - Bmain[j]);
762
763 }
764 }
765 Plen = upack_sm2(P, Plen, coeff_array, Pl - Pi - 1, nvars, degb, ctx);
766
767 } else if (t0 != UWORD(0))
768 {
769 /* fits into one word */
770 for (i = 0, j = Pi; i < Al && j >= 0; i++, j--)
771 {
772 if (j < Bl)
773 {
774 _nmod_mpoly_addmul_array1_ulong1(coeff_array,
775 A->coeffs + Amain[i],
776 Apexp + Amain[i], Amain[i + 1] - Amain[i],
777 B->coeffs + Bmain[j],
778 Bpexp + Bmain[j], Bmain[j + 1] - Bmain[j]);
779 }
780 }
781 Plen = upack_sm1(P, Plen, coeff_array, Pl - Pi - 1, nvars, degb, ctx);
782 }
783 }
784 }
785
786 _nmod_mpoly_set_length(P, Plen, ctx);
787
788 flint_free(Apexp);
789 flint_free(Bpexp);
790 TMP_END;
791 }
792
793
794
_nmod_mpoly_mul_array_DEG(nmod_mpoly_t A,const nmod_mpoly_t B,fmpz * maxBfields,const nmod_mpoly_t C,fmpz * maxCfields,const nmod_mpoly_ctx_t ctx)795 int _nmod_mpoly_mul_array_DEG(
796 nmod_mpoly_t A,
797 const nmod_mpoly_t B, fmpz * maxBfields,
798 const nmod_mpoly_t C, fmpz * maxCfields,
799 const nmod_mpoly_ctx_t ctx)
800 {
801 slong i, exp_bits, array_size;
802 ulong deg;
803 int success;
804
805 FLINT_ASSERT(B->length != 0);
806 FLINT_ASSERT(C->length != 0);
807
808 FLINT_ASSERT( ctx->minfo->ord == ORD_DEGREVLEX
809 || ctx->minfo->ord == ORD_DEGLEX);
810
811 FLINT_ASSERT(1 == mpoly_words_per_exp(B->bits, ctx->minfo));
812 FLINT_ASSERT(1 == mpoly_words_per_exp(C->bits, ctx->minfo));
813
814
815 /* the field of index n-1 is the one that wil be pulled out */
816 i = ctx->minfo->nfields - 1;
817 FLINT_ASSERT(fmpz_fits_si(maxBfields + i));
818 FLINT_ASSERT(fmpz_fits_si(maxCfields + i));
819 deg = 1 + fmpz_get_ui(maxBfields + i) + fmpz_get_ui(maxCfields + i);
820 if (((slong) deg) <= 0 || deg > MAX_ARRAY_SIZE)
821 {
822 success = 0;
823 goto cleanup;
824 }
825
826 /* the fields of index n-2...1, contribute to the array size */
827 array_size = WORD(1);
828 for (i--; i >= 1; i--)
829 {
830 ulong hi;
831 umul_ppmm(hi, array_size, array_size, deg);
832 if (hi != WORD(0) || array_size <= 0
833 || array_size > MAX_ARRAY_SIZE)
834 {
835 success = 0;
836 goto cleanup;
837 }
838 }
839
840 exp_bits = FLINT_MAX(MPOLY_MIN_BITS, FLINT_BIT_COUNT(deg) + 1);
841 exp_bits = mpoly_fix_bits(exp_bits, ctx->minfo);
842
843 /* array multiplication assumes result fit into 1 word */
844 if (1 != mpoly_words_per_exp(exp_bits, ctx->minfo))
845 {
846 success = 0;
847 goto cleanup;
848 }
849
850 /* handle aliasing and do array multiplication */
851 if (A == B || A == C)
852 {
853 nmod_mpoly_t T;
854 nmod_mpoly_init3(T, B->length + C->length - 1, exp_bits, ctx);
855 _nmod_mpoly_mul_array_chunked_DEG(T, C, B, deg, ctx);
856 nmod_mpoly_swap(T, A, ctx);
857 nmod_mpoly_clear(T, ctx);
858 }
859 else
860 {
861 nmod_mpoly_fit_length(A, B->length + C->length - 1, ctx);
862 nmod_mpoly_fit_bits(A, exp_bits, ctx);
863 A->bits = exp_bits;
864 _nmod_mpoly_mul_array_chunked_DEG(A, C, B, deg, ctx);
865 }
866 success = 1;
867
868 cleanup:
869
870 return success;
871 }
872
873
874
nmod_mpoly_mul_array(nmod_mpoly_t A,const nmod_mpoly_t B,const nmod_mpoly_t C,const nmod_mpoly_ctx_t ctx)875 int nmod_mpoly_mul_array(nmod_mpoly_t A, const nmod_mpoly_t B,
876 const nmod_mpoly_t C, const nmod_mpoly_ctx_t ctx)
877 {
878 slong i;
879 int success;
880 fmpz * maxBfields, * maxCfields;
881 TMP_INIT;
882
883 if (B->length == 0 || C->length == 0)
884 {
885 nmod_mpoly_zero(A, ctx);
886 return 1;
887 }
888
889 if ( 1 != mpoly_words_per_exp(B->bits, ctx->minfo)
890 || 1 != mpoly_words_per_exp(C->bits, ctx->minfo)
891 )
892 {
893 return 0;
894 }
895
896 TMP_START;
897
898 maxBfields = (fmpz *) TMP_ALLOC(ctx->minfo->nfields*sizeof(fmpz));
899 maxCfields = (fmpz *) TMP_ALLOC(ctx->minfo->nfields*sizeof(fmpz));
900 for (i = 0; i < ctx->minfo->nfields; i++)
901 {
902 fmpz_init(maxBfields + i);
903 fmpz_init(maxCfields + i);
904 }
905 mpoly_max_fields_fmpz(maxBfields, B->exps, B->length, B->bits, ctx->minfo);
906 mpoly_max_fields_fmpz(maxCfields, C->exps, C->length, C->bits, ctx->minfo);
907
908 switch (ctx->minfo->ord)
909 {
910 case ORD_LEX:
911 {
912 success = _nmod_mpoly_mul_array_LEX(A, B, maxBfields,
913 C, maxCfields, ctx);
914 break;
915 }
916 case ORD_DEGLEX:
917 case ORD_DEGREVLEX:
918 {
919 success = _nmod_mpoly_mul_array_DEG(A, B, maxBfields,
920 C, maxCfields, ctx);
921 break;
922 }
923 default:
924 {
925 success = 0;
926 break;
927 }
928 }
929
930 for (i = 0; i < ctx->minfo->nfields; i++)
931 {
932 fmpz_clear(maxBfields + i);
933 fmpz_clear(maxCfields + i);
934 }
935
936 TMP_END;
937 return success;
938 }
939