1 /*
2 Copyright (C) 2019 Daniel Schultz
3
4 This file is part of FLINT.
5
6 FLINT is free software: you can redistribute it and/or modify it under
7 the terms of the GNU Lesser General Public License (LGPL) as published
8 by the Free Software Foundation; either version 2.1 of the License, or
9 (at your option) any later version. See <http://www.gnu.org/licenses/>.
10 */
11
12 #include "fmpz_mod.h"
13 #include "nmod_mpoly.h"
14 #include "fmpz_mpoly.h"
15 #include "fmpz_mod_mpoly.h"
16
17 #define LOW_HALF_MASK ((-UWORD(1)) >> (FLINT_BITS - FLINT_BITS/2))
18
19 /*
20 When interpolating a polynomial f we need degree bounds on f in each
21 variable and substitution degrees in each variable, these are not
22 necessarily the same. We also need to calculate discrete logs.
23 */
mpoly_bma_interpolate_ctx_init(mpoly_bma_interpolate_ctx_t Ictx,slong nvars)24 void mpoly_bma_interpolate_ctx_init(
25 mpoly_bma_interpolate_ctx_t Ictx,
26 slong nvars)
27 {
28 Ictx->degbounds = (slong *) flint_malloc(nvars*sizeof(slong));
29 Ictx->subdegs = (ulong *) flint_malloc(nvars*sizeof(ulong));
30 fmpz_mod_discrete_log_pohlig_hellman_init(Ictx->dlogenv);
31 nmod_discrete_log_pohlig_hellman_init(Ictx->dlogenv_sp);
32 }
33
mpoly_bma_interpolate_ctx_clear(mpoly_bma_interpolate_ctx_t Ictx)34 void mpoly_bma_interpolate_ctx_clear(mpoly_bma_interpolate_ctx_t Ictx)
35 {
36 flint_free(Ictx->degbounds);
37 flint_free(Ictx->subdegs);
38 fmpz_mod_discrete_log_pohlig_hellman_clear(Ictx->dlogenv);
39 nmod_discrete_log_pohlig_hellman_clear(Ictx->dlogenv_sp);
40 }
41
42 /*
43 If I was formed from evaluations at
44 alpha^alphashift, alpha^(alphashift + 1), ...
45 construct the corresponding mpoly if possible with coeffs in (-p/2, p/2]
46 The substitution degrees and degree bounds in Ictx are used.
47 */
nmod_mpoly_bma_get_fmpz_mpoly(fmpz_mpoly_t A,const fmpz_mpoly_ctx_t ctx,ulong alphashift,nmod_berlekamp_massey_t I,const mpoly_bma_interpolate_ctx_t Ictx,const nmodf_ctx_t fpctx)48 int nmod_mpoly_bma_get_fmpz_mpoly(
49 fmpz_mpoly_t A,
50 const fmpz_mpoly_ctx_t ctx,
51 ulong alphashift,
52 nmod_berlekamp_massey_t I,
53 const mpoly_bma_interpolate_ctx_t Ictx,
54 const nmodf_ctx_t fpctx)
55 {
56 slong i, j, t, N;
57 int success;
58 ulong new_exp, this_exp;
59 slong * shifts, * offsets;
60 mp_limb_t * values, * roots;
61 fmpz * Acoeff;
62 ulong * Aexp;
63 slong Alen;
64 mp_limb_t T, S, V, V0, V1, V2, p0, p1, r;
65 TMP_INIT;
66
67 t = nmod_poly_degree(I->V1);
68 FLINT_ASSERT(I->points->length >= t);
69
70 if (t <= 0)
71 {
72 return 0;
73 }
74
75 TMP_START;
76
77 /* use the rt member of I as temp space for roots - slightly dirty */
78 nmod_poly_fit_length(I->rt, t);
79 I->rt->length = t;
80 roots = I->rt->coeffs;
81 values = I->points->coeffs;
82
83 success = nmod_poly_find_distinct_nonzero_roots(roots, I->V1);
84 if (!success)
85 {
86 goto cleanup;
87 }
88
89 FLINT_ASSERT(ctx->minfo->ord == ORD_LEX);
90 FLINT_ASSERT(A->bits <= FLINT_BITS);
91 N = mpoly_words_per_exp_sp(A->bits, ctx->minfo);
92 fmpz_mpoly_fit_length(A, t, ctx);
93 Acoeff = A->coeffs;
94 Aexp = A->exps;
95 A->length = 0;
96
97 shifts = (slong *) TMP_ALLOC(ctx->minfo->nvars*sizeof(slong));
98 offsets = (slong *) TMP_ALLOC(ctx->minfo->nvars*sizeof(slong));
99 for (j = 0; j < ctx->minfo->nvars; j++)
100 {
101 mpoly_gen_offset_shift_sp(offsets + j, shifts + j, j, A->bits, ctx->minfo);
102 }
103
104 Alen = 0;
105 for (i = 0; i < t; i++)
106 {
107 /* coeffs[i] is (coeffs(P).values)/P(roots[i]) =: V/S
108 where P(x) = master(x)/(x-roots[i]) */
109 V0 = V1 = V2 = T = S = 0;
110 r = roots[i];
111 for (j = t; j > 0; j--)
112 {
113 T = nmod_add(nmod_mul(r, T, fpctx->mod), I->V1->coeffs[j], fpctx->mod);
114 S = nmod_add(nmod_mul(r, S, fpctx->mod), T, fpctx->mod);
115 umul_ppmm(p1, p0, values[j - 1], T);
116 add_sssaaaaaa(V2, V1, V0, V2, V1, V0, WORD(0), p1, p0);
117 }
118 /* roots[i] should be a root of master */
119 FLINT_ASSERT(nmod_add(nmod_mul(r, T, fpctx->mod), I->V1->coeffs[0], fpctx->mod) == 0);
120 NMOD_RED3(V, V2, V1, V0, fpctx->mod);
121 S = nmod_mul(S, nmod_pow_ui(r, alphashift, fpctx->mod), fpctx->mod);
122 V0 = nmod_mul(V, nmod_inv(S, fpctx->mod), fpctx->mod);
123 if (V0 == 0)
124 {
125 /* hmmm */
126 continue;
127 }
128
129 fmpz_set_ui(Acoeff + Alen, V0);
130 if (fpctx->mod.n - V0 < V0)
131 {
132 fmpz_sub_ui(Acoeff + Alen, Acoeff + Alen, fpctx->mod.n);
133 }
134
135 mpoly_monomial_zero(Aexp + N*Alen, N);
136 new_exp = nmod_discrete_log_pohlig_hellman_run(Ictx->dlogenv_sp, roots[i]);
137 for (j = ctx->minfo->nvars - 1; j >= 0; j--)
138 {
139 this_exp = new_exp % Ictx->subdegs[j];
140 new_exp = new_exp / Ictx->subdegs[j];
141 if (this_exp >= Ictx->degbounds[j])
142 {
143 success = 0;
144 goto cleanup;
145 }
146 (Aexp + N*Alen)[offsets[j]] |= this_exp << shifts[j];
147 }
148 if (new_exp != 0)
149 {
150 success = 0;
151 goto cleanup;
152 }
153 Alen++;
154 }
155 A->length = Alen;
156
157 fmpz_mpoly_sort_terms(A, ctx);
158
159 success = 1;
160
161 cleanup:
162
163 TMP_END;
164 return success;
165 }
166
167 /*
168 nmod_mpoly "skeletons" - just the coefficients
169 */
170
nmod_mpolyc_init(nmod_mpolyc_t A)171 void nmod_mpolyc_init(nmod_mpolyc_t A)
172 {
173 A->coeffs = NULL;
174 A->alloc = 0;
175 A->length = 0;
176 }
177
nmod_mpolyc_clear(nmod_mpolyc_t A)178 void nmod_mpolyc_clear(nmod_mpolyc_t A)
179 {
180 if (A->coeffs)
181 flint_free(A->coeffs);
182 }
183
nmod_mpolyc_fit_length(nmod_mpolyc_t A,slong length)184 void nmod_mpolyc_fit_length(nmod_mpolyc_t A, slong length)
185 {
186 slong old_alloc = A->alloc;
187 slong new_alloc = FLINT_MAX(length, A->alloc + A->alloc/2);
188
189 if (length > old_alloc)
190 {
191 if (old_alloc == 0)
192 {
193 A->coeffs = (mp_limb_t *) flint_malloc(new_alloc*sizeof(mp_limb_t));
194 }
195 else
196 {
197 A->coeffs = (mp_limb_t *) flint_realloc(A->coeffs, new_alloc*sizeof(mp_limb_t));
198 }
199
200 A->alloc = new_alloc;
201 }
202 }
203
nmod_mpolycu_init(nmod_mpolycu_t A)204 void nmod_mpolycu_init(nmod_mpolycu_t A)
205 {
206 A->coeffs = NULL;
207 A->alloc = 0;
208 A->length = 0;
209 }
210
nmod_mpolycu_clear(nmod_mpolycu_t A)211 void nmod_mpolycu_clear(nmod_mpolycu_t A)
212 {
213 slong i;
214 for (i = 0; i < A->alloc; i++)
215 {
216 nmod_mpolyc_clear(A->coeffs + i);
217 }
218 if (A->coeffs)
219 flint_free(A->coeffs);
220 }
221
nmod_mpolycu_fit_length(nmod_mpolycu_t A,slong length)222 void nmod_mpolycu_fit_length(nmod_mpolycu_t A, slong length)
223 {
224 slong i;
225 slong old_alloc = A->alloc;
226 slong new_alloc = FLINT_MAX(length, A->alloc + A->alloc/2);
227
228 if (length > old_alloc)
229 {
230 if (old_alloc == 0)
231 {
232 A->coeffs = (nmod_mpolyc_struct *) flint_malloc(
233 new_alloc*sizeof(nmod_mpolyc_struct));
234 }
235 else
236 {
237 A->coeffs = (nmod_mpolyc_struct *) flint_realloc(A->coeffs,
238 new_alloc*sizeof(nmod_mpolyc_struct));
239 }
240
241 for (i = old_alloc; i < new_alloc; i++)
242 {
243 nmod_mpolyc_init(A->coeffs + i);
244 }
245 A->alloc = new_alloc;
246 }
247 }
248
249 /*
250 fmpz_mod_mpoly "skeletons" - just the coefficients
251 */
252
fmpz_mpolyc_init(fmpz_mpolyc_t A)253 void fmpz_mpolyc_init(fmpz_mpolyc_t A)
254 {
255 A->coeffs = NULL;
256 A->alloc = 0;
257 A->length = 0;
258 }
259
fmpz_mpolyc_clear(fmpz_mpolyc_t A)260 void fmpz_mpolyc_clear(fmpz_mpolyc_t A)
261 {
262 slong i;
263 for (i = 0; i < A->alloc; i++)
264 {
265 fmpz_clear(A->coeffs + i);
266 }
267 if (A->coeffs)
268 flint_free(A->coeffs);
269 }
270
fmpz_mpolyc_fit_length(fmpz_mpolyc_t A,slong length)271 void fmpz_mpolyc_fit_length(fmpz_mpolyc_t A, slong length)
272 {
273 slong i;
274 slong old_alloc = A->alloc;
275 slong new_alloc = FLINT_MAX(length, A->alloc + A->alloc/2);
276
277 if (length > old_alloc)
278 {
279 if (old_alloc == 0)
280 {
281 A->coeffs = (fmpz *) flint_malloc(new_alloc*sizeof(fmpz));
282 }
283 else
284 {
285 A->coeffs = (fmpz *) flint_realloc(A->coeffs, new_alloc*sizeof(fmpz));
286 }
287
288 for (i = old_alloc; i < new_alloc; i++)
289 {
290 fmpz_init(A->coeffs + i);
291 }
292 A->alloc = new_alloc;
293 }
294 }
295
fmpz_mpolycu_init(fmpz_mpolycu_t A)296 void fmpz_mpolycu_init(fmpz_mpolycu_t A)
297 {
298 A->coeffs = NULL;
299 A->alloc = 0;
300 A->length = 0;
301 }
302
fmpz_mpolycu_clear(fmpz_mpolycu_t A)303 void fmpz_mpolycu_clear(fmpz_mpolycu_t A)
304 {
305 slong i;
306 for (i = 0; i < A->alloc; i++)
307 {
308 fmpz_mpolyc_clear(A->coeffs + i);
309 }
310 if (A->coeffs)
311 flint_free(A->coeffs);
312 }
313
fmpz_mpolycu_fit_length(fmpz_mpolycu_t A,slong length)314 void fmpz_mpolycu_fit_length(fmpz_mpolycu_t A, slong length)
315 {
316 slong i;
317 slong old_alloc = A->alloc;
318 slong new_alloc = FLINT_MAX(length, A->alloc + A->alloc/2);
319
320 if (length > old_alloc)
321 {
322 if (old_alloc == 0)
323 {
324 A->coeffs = (fmpz_mpolyc_struct *) flint_malloc(
325 new_alloc*sizeof(fmpz_mpolyc_struct));
326 }
327 else
328 {
329 A->coeffs = (fmpz_mpolyc_struct *) flint_realloc(A->coeffs,
330 new_alloc*sizeof(fmpz_mpolyc_struct));
331 }
332
333 for (i = old_alloc; i < new_alloc; i++)
334 {
335 fmpz_mpolyc_init(A->coeffs + i);
336 }
337 A->alloc = new_alloc;
338 }
339 }
340
341
342 /*
343 set out to the evaluation of variables after ksub at alpha^w
344
345 out[0] = alpha ^ (w * subdegs[n-1] * subdegs[n-2] * ... * * subdegs[1])
346 ...
347 out[n-3] = alpha ^ (w * subdegs[n-1] * subdegs[n-2])
348 out[n-2] = alpha ^ (w * subdegs[n-1])
349 out[n-1] = alpha ^ (w)
350
351 secret: subdegs[0] is not used
352 */
353
nmod_mpoly_bma_interpolate_alpha_powers(mp_limb_t * out,ulong w,const mpoly_bma_interpolate_ctx_t Ictx,const fmpz_mpoly_ctx_t ctx,const nmodf_ctx_t fpctx)354 void nmod_mpoly_bma_interpolate_alpha_powers(
355 mp_limb_t * out,
356 ulong w,
357 const mpoly_bma_interpolate_ctx_t Ictx,
358 const fmpz_mpoly_ctx_t ctx,
359 const nmodf_ctx_t fpctx)
360 {
361 slong j = ctx->minfo->nvars - 1;
362 out[j] = nmod_pow_ui(Ictx->dlogenv_sp->alpha, w, fpctx->mod);
363 for (; j > 0; j--)
364 {
365 out[j - 1] = nmod_pow_ui(out[j], Ictx->subdegs[j], fpctx->mod);
366 }
367 }
368
fmpz_mod_mpoly_bma_interpolate_alpha_powers(fmpz * out,const fmpz_t w,const mpoly_bma_interpolate_ctx_t Ictx,const fmpz_mpoly_ctx_t ctx,const fmpz_mod_ctx_t fpctx)369 void fmpz_mod_mpoly_bma_interpolate_alpha_powers(
370 fmpz * out,
371 const fmpz_t w,
372 const mpoly_bma_interpolate_ctx_t Ictx,
373 const fmpz_mpoly_ctx_t ctx,
374 const fmpz_mod_ctx_t fpctx)
375 {
376 slong j = ctx->minfo->nvars - 1;
377 fmpz_mod_pow_fmpz(out + j, Ictx->dlogenv->alpha, w, fpctx);
378 for (; j > 0; j--)
379 {
380 fmpz_mod_pow_ui(out + j - 1, out + j, Ictx->subdegs[j], fpctx);
381 }
382 }
383
384
385 /*
386 Set the coefficients of E to the evaluation of cooresponding monomials of A
387 evaluation at x_i = alpha[i]
388 */
fmpz_mod_mpoly_set_skel(fmpz_mpolyc_t M,const fmpz_mod_mpoly_ctx_t ctx_mp,const fmpz_mpoly_t A,const fmpz * alpha,const fmpz_mpoly_ctx_t ctx)389 void fmpz_mod_mpoly_set_skel(
390 fmpz_mpolyc_t M,
391 const fmpz_mod_mpoly_ctx_t ctx_mp,
392 const fmpz_mpoly_t A,
393 const fmpz * alpha,
394 const fmpz_mpoly_ctx_t ctx)
395 {
396 slong i, j;
397 slong offset, shift;
398 slong N = mpoly_words_per_exp_sp(A->bits, ctx->minfo);
399 slong nvars = ctx->minfo->nvars;
400 ulong * Aexp;
401 fmpz * Mcoeff;
402 slong * LUToffset;
403 ulong * LUTmask;
404 fmpz * LUTvalue;
405 slong LUTlen;
406 fmpz_t xpoweval;
407 ulong * inputexpmask;
408 TMP_INIT;
409
410 TMP_START;
411
412 fmpz_init(xpoweval);
413
414 LUToffset = (slong *) TMP_ALLOC(N*FLINT_BITS*sizeof(slong));
415 LUTmask = (ulong *) TMP_ALLOC(N*FLINT_BITS*sizeof(ulong));
416 LUTvalue = (fmpz *) TMP_ALLOC(N*FLINT_BITS*sizeof(fmpz));
417 for (i = 0; i < N*FLINT_BITS; i++)
418 {
419 fmpz_init(LUTvalue + i);
420 }
421
422 fmpz_mpolyc_fit_length(M, A->length);
423 M->length = A->length;
424
425 Mcoeff = M->coeffs;
426 Aexp = A->exps;
427
428 inputexpmask = (ulong *) TMP_ALLOC(N*sizeof(ulong));
429 mpoly_monomial_zero(inputexpmask, N);
430 for (i = 0; i < A->length; i++)
431 {
432 for (j = 0; j < N; j++)
433 {
434 inputexpmask[j] |= (Aexp + N*i)[j];
435 }
436 }
437
438 LUTlen = 0;
439 for (j = nvars - 1; j >= 0; j--)
440 {
441 mpoly_gen_offset_shift_sp(&offset, &shift, j, A->bits, ctx->minfo);
442
443 fmpz_set(xpoweval, alpha + j); /* xpoweval = alpha[j]^(2^i) */
444 for (i = 0; i < A->bits; i++)
445 {
446 LUToffset[LUTlen] = offset;
447 LUTmask[LUTlen] = (UWORD(1) << (shift + i));
448 fmpz_set(LUTvalue + LUTlen, xpoweval);
449 if ((inputexpmask[offset] & LUTmask[LUTlen]) != 0)
450 {
451 LUTlen++;
452 }
453 fmpz_mod_mul(xpoweval, xpoweval, xpoweval, ctx_mp->ffinfo);
454 }
455 }
456 FLINT_ASSERT(LUTlen < N*FLINT_BITS);
457
458 for (i = 0; i < A->length; i++)
459 {
460 fmpz_one(xpoweval);
461 for (j = 0; j < LUTlen; j++)
462 {
463 if (((Aexp + N*i)[LUToffset[j]] & LUTmask[j]) != 0)
464 {
465 fmpz_mod_mul(xpoweval, xpoweval, LUTvalue + j, ctx_mp->ffinfo);
466 }
467 }
468 fmpz_set(Mcoeff + i, xpoweval);
469 }
470
471 fmpz_clear(xpoweval);
472 for (i = 0; i < N*FLINT_BITS; i++)
473 {
474 fmpz_clear(LUTvalue + i);
475 }
476 TMP_END;
477 }
478
fmpz_mod_mpolyu_set_skel(fmpz_mpolycu_t M,const fmpz_mod_mpoly_ctx_t ctx_mp,const fmpz_mpolyu_t A,const fmpz * alpha,const fmpz_mpoly_ctx_t ctx)479 void fmpz_mod_mpolyu_set_skel(
480 fmpz_mpolycu_t M,
481 const fmpz_mod_mpoly_ctx_t ctx_mp,
482 const fmpz_mpolyu_t A,
483 const fmpz * alpha,
484 const fmpz_mpoly_ctx_t ctx)
485 {
486 slong i;
487 fmpz_mpolycu_fit_length(M, A->length);
488 M->length = A->length;
489 for (i = 0; i < A->length; i++)
490 {
491 fmpz_mod_mpoly_set_skel(M->coeffs + i, ctx_mp, A->coeffs + i, alpha, ctx);
492 }
493 }
494
495
496 /* M = S */
fmpz_mod_mpoly_copy_skel(fmpz_mpolyc_t M,const fmpz_mpolyc_t S)497 void fmpz_mod_mpoly_copy_skel(fmpz_mpolyc_t M, const fmpz_mpolyc_t S)
498 {
499 fmpz_mpolyc_fit_length(M, S->length);
500 M->length = S->length;
501 _fmpz_vec_set(M->coeffs, S->coeffs, S->length);
502 }
503
fmpz_mod_mpolyu_copy_skel(fmpz_mpolycu_t M,const fmpz_mpolycu_t S)504 void fmpz_mod_mpolyu_copy_skel(fmpz_mpolycu_t M, const fmpz_mpolycu_t S)
505 {
506 slong i;
507 fmpz_mpolycu_fit_length(M, S->length);
508 M->length = S->length;
509 for (i = 0; i < S->length; i++)
510 {
511 fmpz_mod_mpoly_copy_skel(M->coeffs + i, S->coeffs + i);
512 }
513 }
514
515
516 /* Ared = A mod p */
fmpz_mod_mpoly_red_skel(fmpz_mpolyc_t Ared,const fmpz_mpoly_t A,const fmpz_mod_ctx_t fpctx)517 void fmpz_mod_mpoly_red_skel(
518 fmpz_mpolyc_t Ared,
519 const fmpz_mpoly_t A,
520 const fmpz_mod_ctx_t fpctx)
521 {
522 fmpz_mpolyc_fit_length(Ared, A->length);
523 Ared->length = A->length;
524 _fmpz_vec_scalar_mod_fmpz(Ared->coeffs, A->coeffs, A->length,
525 fmpz_mod_ctx_modulus(fpctx));
526 }
527
fmpz_mod_mpolyu_red_skel(fmpz_mpolycu_t Ared,const fmpz_mpolyu_t A,const fmpz_mod_ctx_t fpctx)528 void fmpz_mod_mpolyu_red_skel(
529 fmpz_mpolycu_t Ared,
530 const fmpz_mpolyu_t A,
531 const fmpz_mod_ctx_t fpctx)
532 {
533 slong i;
534 fmpz_mpolycu_fit_length(Ared, A->length);
535 Ared->length = A->length;
536 for (i = 0; i < A->length; i++)
537 {
538 fmpz_mod_mpoly_red_skel(Ared->coeffs + i, A->coeffs + i, fpctx);
539 }
540 }
541
542
543 /*
544 return Ared.Avar and multiply Avar by Ainc
545 the coefficients of Ared must be reduced mod fpctx->p
546 */
fmpz_mod_mpoly_use_skel_mul(fmpz_t eval,fmpz_mpolyc_t Ared,fmpz_mpolyc_t Avar,const fmpz_mpolyc_t Ainc,const fmpz_mod_ctx_t fpctx)547 void fmpz_mod_mpoly_use_skel_mul(
548 fmpz_t eval,
549 fmpz_mpolyc_t Ared,
550 fmpz_mpolyc_t Avar,
551 const fmpz_mpolyc_t Ainc,
552 const fmpz_mod_ctx_t fpctx)
553 {
554 slong i;
555 fmpz_zero(eval);
556 FLINT_ASSERT(Avar->length == Ared->length);
557 for (i = 0; i < Ared->length; i++)
558 {
559 fmpz_addmul(eval, Ared->coeffs + i, Avar->coeffs + i);
560 fmpz_mod_mul(Avar->coeffs + i, Avar->coeffs + i, Ainc->coeffs + i, fpctx);
561 }
562 fmpz_mod(eval, eval, fmpz_mod_ctx_modulus(fpctx));
563 }
564
565
566 /*
567 A is in ZZ[X,Y][x_0, ..., x_(n-1)]
568 E is in Fp[X][Y]
569
570 Set E to the evaluation of A. The evaluations of A's monomials are in M.
571 M is then multiplied by S.
572 */
fmpz_mod_mpolyuu_use_skel_mul(fmpz_mod_mpolyn_t E,const fmpz_mpolyu_t A,const fmpz_mpolycu_t Ared,fmpz_mpolycu_t M,const fmpz_mpolycu_t S,const fmpz_mod_mpoly_ctx_t ctx_mp)573 void fmpz_mod_mpolyuu_use_skel_mul(
574 fmpz_mod_mpolyn_t E,
575 const fmpz_mpolyu_t A,
576 const fmpz_mpolycu_t Ared,
577 fmpz_mpolycu_t M,
578 const fmpz_mpolycu_t S,
579 const fmpz_mod_mpoly_ctx_t ctx_mp)
580 {
581 slong xexp, yexp;
582 slong i;
583 fmpz_t eval;
584
585 FLINT_ASSERT(E->bits == FLINT_BITS/2);
586 FLINT_ASSERT(1 == mpoly_words_per_exp_sp(E->bits, ctx_mp->minfo));
587
588 FLINT_ASSERT(A->length == Ared->length);
589 FLINT_ASSERT(A->length == M->length);
590 FLINT_ASSERT(A->length == S->length);
591
592 fmpz_init(eval);
593
594 E->length = 0;
595 for (i = 0; i < A->length; i++)
596 {
597 fmpz_mod_mpoly_use_skel_mul(eval, Ared->coeffs + i, M->coeffs + i,
598 S->coeffs + i, ctx_mp->ffinfo);
599 if (fmpz_is_zero(eval))
600 {
601 continue;
602 }
603
604 xexp = A->exps[i] >> (FLINT_BITS/2);
605 yexp = A->exps[i] & LOW_HALF_MASK;
606
607 if (E->length > 0 && (E->exps[E->length - 1] >> (FLINT_BITS/2)) == xexp)
608 {
609 fmpz_mod_poly_set_coeff_fmpz(E->coeffs + E->length - 1, yexp, eval);
610 }
611 else
612 {
613 fmpz_mod_mpolyn_fit_length(E, E->length + 1, ctx_mp);
614 fmpz_mod_poly_zero(E->coeffs + E->length);
615 fmpz_mod_poly_set_coeff_fmpz(E->coeffs + E->length, yexp, eval);
616 E->exps[E->length] = xexp << (FLINT_BITS/2);
617 E->length++;
618 }
619 }
620
621 fmpz_clear(eval);
622 }
623
624
625
fmpz_mod_bma_get_fmpz_mpoly(fmpz_mpoly_t A,const fmpz_mpoly_ctx_t ctx,const fmpz_t alphashift,fmpz_mod_berlekamp_massey_t I,const mpoly_bma_interpolate_ctx_t Ictx,const fmpz_mod_ctx_t fpctx)626 int fmpz_mod_bma_get_fmpz_mpoly(
627 fmpz_mpoly_t A,
628 const fmpz_mpoly_ctx_t ctx,
629 const fmpz_t alphashift,
630 fmpz_mod_berlekamp_massey_t I,
631 const mpoly_bma_interpolate_ctx_t Ictx,
632 const fmpz_mod_ctx_t fpctx)
633 {
634 slong i, j, t, N;
635 int success;
636 ulong this_exp;
637 fmpz_t new_exp;
638 slong * shifts, * offsets;
639 fmpz * values, * roots;
640 fmpz * Acoeff;
641 ulong * Aexp;
642 slong Alen;
643 fmpz_t T, S, V, temp, halfp;
644 TMP_INIT;
645
646 TMP_START;
647
648 fmpz_init(halfp);
649 fmpz_init(T);
650 fmpz_init(S);
651 fmpz_init(V);
652 fmpz_init(temp);
653 fmpz_init(new_exp);
654
655 fmpz_tdiv_q_2exp(halfp, fmpz_mod_ctx_modulus(fpctx), 1);
656
657 fmpz_mod_berlekamp_massey_reduce(I);
658 t = fmpz_mod_poly_degree(I->V1);
659 FLINT_ASSERT(t > 0);
660 FLINT_ASSERT(I->points->length >= t);
661
662 fmpz_mod_poly_fit_length(I->rt, t);
663 I->rt->length = t;
664 success = fmpz_mod_poly_find_distinct_nonzero_roots(I->rt->coeffs, I->V1);
665 if (!success)
666 {
667 goto cleanup;
668 }
669
670 roots = I->rt->coeffs;
671 values = I->points->coeffs;
672
673 FLINT_ASSERT(ctx->minfo->ord == ORD_LEX);
674 FLINT_ASSERT(A->bits <= FLINT_BITS);
675 N = mpoly_words_per_exp_sp(A->bits, ctx->minfo);
676 fmpz_mpoly_fit_length(A, t, ctx);
677 Acoeff = A->coeffs;
678 Aexp = A->exps;
679 A->length = 0;
680
681 shifts = (slong *) TMP_ALLOC(ctx->minfo->nvars*sizeof(slong));
682 offsets = (slong *) TMP_ALLOC(ctx->minfo->nvars*sizeof(slong));
683 for (j = 0; j < ctx->minfo->nvars; j++)
684 {
685 mpoly_gen_offset_shift_sp(offsets + j, shifts + j, j, A->bits, ctx->minfo);
686 }
687
688 Alen = 0;
689 for (i = 0; i < t; i++)
690 {
691 /* coeffs[i] is (coeffs(P).values)/P(roots[i]) =: V/S
692 where P(x) = master(x)/(x-roots[i]) */
693 fmpz_zero(V);
694 fmpz_zero(T);
695 fmpz_zero(S);
696 for (j = t; j > 0; j--)
697 {
698 fmpz_mod_mul(temp, roots + i, T, fpctx);
699 fmpz_mod_add(T, temp, I->V1->coeffs + j, fpctx);
700 fmpz_mod_mul(temp, roots + i, S, fpctx);
701 fmpz_mod_add(S, temp, T, fpctx);
702 fmpz_mod_mul(temp, values + j - 1, T, fpctx);
703 fmpz_mod_add(V, V, temp, fpctx);
704 }
705 /* roots[i] should be a root of master */
706 #if WANT_ASSERT
707 fmpz_mod_mul(temp, roots + i, T, fpctx);
708 fmpz_mod_add(temp, temp, I->V1->coeffs + 0, fpctx);
709 FLINT_ASSERT(fmpz_is_zero(temp));
710 #endif
711 fmpz_mod_pow_fmpz(temp, roots + i, alphashift, fpctx);
712 fmpz_mod_mul(S, S, temp, fpctx);
713 fmpz_mod_inv(temp, S, fpctx);
714 fmpz_mod_mul(Acoeff + Alen, V, temp, fpctx);
715 if (fmpz_is_zero(Acoeff + Alen))
716 {
717 /* hmmm */
718 continue;
719 }
720
721 if (fmpz_cmp(Acoeff + Alen, halfp) > 0)
722 {
723 fmpz_sub(Acoeff + Alen, Acoeff + Alen, fmpz_mod_ctx_modulus(fpctx));
724 }
725
726
727 mpoly_monomial_zero(Aexp + N*Alen, N);
728 fmpz_mod_discrete_log_pohlig_hellman_run(new_exp, Ictx->dlogenv, roots + i);
729 for (j = ctx->minfo->nvars - 1; j >= 0; j--)
730 {
731 this_exp = fmpz_fdiv_ui(new_exp, Ictx->subdegs[j]);
732 fmpz_fdiv_q_ui(new_exp, new_exp, Ictx->subdegs[j]);
733 if (this_exp >= Ictx->degbounds[j])
734 {
735 success = 0;
736 goto cleanup;
737 }
738 (Aexp + N*Alen)[offsets[j]] |= this_exp << shifts[j];
739 }
740 if (!fmpz_is_zero(new_exp))
741 {
742 success = 0;
743 goto cleanup;
744 }
745 Alen++;
746 }
747 A->length = Alen;
748
749 fmpz_mpoly_sort_terms(A, ctx);
750
751 success = 1;
752
753 cleanup:
754
755 fmpz_clear(T);
756 fmpz_clear(S);
757 fmpz_clear(V);
758 fmpz_clear(temp);
759 fmpz_clear(halfp);
760
761 TMP_END;
762 return success;
763 }
764
765
766
nmod_bma_mpoly_init(nmod_bma_mpoly_t A)767 void nmod_bma_mpoly_init(nmod_bma_mpoly_t A)
768 {
769 A->length = 0;
770 A->alloc = 0;
771 A->exps = NULL;
772 A->coeffs = NULL;
773 A->pointcount = 0;
774 }
775
nmod_bma_mpoly_reset_prime(nmod_bma_mpoly_t A,const nmodf_ctx_t fpctx)776 void nmod_bma_mpoly_reset_prime(
777 nmod_bma_mpoly_t A,
778 const nmodf_ctx_t fpctx)
779 {
780 slong i;
781 for (i = 0; i < A->alloc; i++)
782 {
783 nmod_berlekamp_massey_set_prime(A->coeffs + i, fpctx->mod.n);
784 }
785 }
786
787
nmod_bma_mpoly_clear(nmod_bma_mpoly_t A)788 void nmod_bma_mpoly_clear(nmod_bma_mpoly_t A)
789 {
790 slong i;
791 for (i = 0; i < A->alloc; i++)
792 {
793 nmod_berlekamp_massey_clear(A->coeffs + i);
794 }
795
796 if (A->exps)
797 flint_free(A->exps);
798 if (A->coeffs)
799 flint_free(A->coeffs);
800 }
801
nmod_bma_mpoly_print(const nmod_bma_mpoly_t A)802 void nmod_bma_mpoly_print(const nmod_bma_mpoly_t A)
803 {
804 slong i;
805 flint_printf("0");
806 for (i = 0; i < A->length; i++)
807 {
808 flint_printf(" + [");
809 nmod_berlekamp_massey_print(A->coeffs + i);
810 flint_printf("]*X^%wd*Y^%wd",
811 A->exps[i] >> (FLINT_BITS/2),
812 A->exps[i] & ((-UWORD(1)) >> (FLINT_BITS/2)));
813 }
814 }
815
816
nmod_bma_mpoly_fit_length(nmod_bma_mpoly_t A,slong length,const nmodf_ctx_t fpctx)817 void nmod_bma_mpoly_fit_length(
818 nmod_bma_mpoly_t A,
819 slong length,
820 const nmodf_ctx_t fpctx)
821 {
822 slong i;
823 slong old_alloc = A->alloc;
824 slong new_alloc = FLINT_MAX(length, A->alloc + A->alloc/2);
825
826 if (length > old_alloc)
827 {
828 if (old_alloc == 0)
829 {
830 A->exps = (ulong *) flint_malloc(new_alloc*sizeof(ulong));
831 A->coeffs = (nmod_berlekamp_massey_struct *) flint_malloc(
832 new_alloc*sizeof(nmod_berlekamp_massey_struct));
833 }
834 else
835 {
836 A->exps = (ulong *) flint_realloc(A->exps, new_alloc*sizeof(ulong));
837 A->coeffs = (nmod_berlekamp_massey_struct *) flint_realloc(A->coeffs,
838 new_alloc*sizeof(nmod_berlekamp_massey_struct));
839 }
840
841 for (i = old_alloc; i < new_alloc; i++)
842 {
843 nmod_berlekamp_massey_init(A->coeffs + i, fpctx->mod.n);
844 }
845 A->alloc = new_alloc;
846 }
847 }
848
nmod_bma_mpoly_zero(nmod_bma_mpoly_t L)849 void nmod_bma_mpoly_zero(nmod_bma_mpoly_t L)
850 {
851 L->length = 0;
852 L->pointcount = 0;
853 }
854
855
nmod_bma_mpoly_reduce(nmod_bma_mpoly_t L)856 int nmod_bma_mpoly_reduce(nmod_bma_mpoly_t L)
857 {
858 slong i;
859 int changed;
860
861 changed = 0;
862
863 for (i = 0; i < L->length; i++)
864 {
865 FLINT_ASSERT(L->pointcount == nmod_berlekamp_massey_point_count(L->coeffs + i));
866 changed |= nmod_berlekamp_massey_reduce(L->coeffs + i);
867 }
868
869 return changed;
870 }
871
nmod_bma_mpoly_add_point(nmod_bma_mpoly_t L,const nmod_mpolyn_t A,const nmod_mpoly_ctx_t ctx_sp)872 void nmod_bma_mpoly_add_point(
873 nmod_bma_mpoly_t L,
874 const nmod_mpolyn_t A,
875 const nmod_mpoly_ctx_t ctx_sp)
876 {
877 slong j;
878 slong Alen = A->length;
879 nmod_poly_struct * Acoeff = A->coeffs;
880 slong Li, Ai, ai;
881 ulong Aexp;
882 nmod_berlekamp_massey_struct * Lcoeff;
883 slong Llen;
884 ulong * Lexp;
885
886 if (L->length == 0)
887 {
888 slong tot = 0;
889 for (Ai = 0; Ai < Alen; Ai++)
890 {
891 for (ai = (Acoeff + Ai)->length - 1; ai >= 0; ai--)
892 {
893 tot += (0 != (Acoeff + Ai)->coeffs[ai]);
894 }
895 }
896 nmod_bma_mpoly_fit_length(L, tot, ctx_sp->ffinfo);
897 }
898
899 Lcoeff = L->coeffs;
900 Llen = L->length;
901 Lexp = L->exps;
902
903 Li = 0;
904 Ai = ai = 0;
905 Aexp = 0;
906 if (Ai < Alen)
907 {
908 ai = nmod_poly_degree(A->coeffs + Ai);
909 FLINT_ASSERT(FLINT_BIT_COUNT(ai) < FLINT_BITS/2);
910 FLINT_ASSERT(0 == (A->exps[Ai] & LOW_HALF_MASK));
911 Aexp = A->exps[Ai] + ai;
912 }
913
914 while (Li < Llen || Ai < Alen)
915 {
916 if (Li < Llen && Ai < Alen && Lexp[Li] == Aexp)
917 {
918 /* L term present, A term present */
919 add_same_exp:
920 nmod_berlekamp_massey_add_point(Lcoeff + Li, (Acoeff + Ai)->coeffs[ai]);
921 Li++;
922 do {
923 ai--;
924 } while (ai >= 0 && (0 == (Acoeff + Ai)->coeffs[ai]));
925 if (ai < 0)
926 {
927 Ai++;
928 if (Ai < Alen)
929 {
930 ai = nmod_poly_degree(A->coeffs + Ai);
931 FLINT_ASSERT(FLINT_BIT_COUNT(ai) < FLINT_BITS/2);
932 FLINT_ASSERT(0 == (A->exps[Ai] & LOW_HALF_MASK));
933 Aexp = A->exps[Ai] + ai;
934 }
935 }
936 else
937 {
938 FLINT_ASSERT(Ai < Alen);
939 FLINT_ASSERT(FLINT_BIT_COUNT(ai) < FLINT_BITS/2);
940 FLINT_ASSERT(0 == (A->exps[Ai] & LOW_HALF_MASK));
941 Aexp = A->exps[Ai] + ai;
942 }
943 }
944 else if (Li < Llen && (Ai >= Alen || Lexp[Li] > Aexp))
945 {
946 /* L term present, A term missing */
947 nmod_berlekamp_massey_add_zeros(Lcoeff + Li, 1);
948 Li++;
949 }
950 else
951 {
952 /* L term missing, A term present */
953 FLINT_ASSERT(Ai < Alen && (Li >= Llen || Lexp[Li] < Aexp));
954 {
955 ulong texp;
956 nmod_berlekamp_massey_struct tcoeff;
957
958 nmod_bma_mpoly_fit_length(L, Llen + 1, ctx_sp->ffinfo);
959 Lcoeff = L->coeffs;
960 Lexp = L->exps;
961
962 texp = Lexp[Llen];
963 tcoeff = Lcoeff[Llen];
964 for (j = Llen - 1; j >= Li; j--)
965 {
966 Lexp[j + 1] = Lexp[j];
967 Lcoeff[j + 1] = Lcoeff[j];
968 }
969 Lexp[Li] = texp;
970 Lcoeff[Li] = tcoeff;
971 }
972
973 nmod_berlekamp_massey_start_over(Lcoeff + Li);
974 nmod_berlekamp_massey_add_zeros(Lcoeff + Li, L->pointcount);
975 Lexp[Li] = Aexp;
976 Llen++;
977 L->length = Llen;
978
979 goto add_same_exp;
980 }
981 }
982
983 L->pointcount++;
984 }
985
nmod_bma_mpoly_get_fmpz_mpolyu(fmpz_mpolyu_t A,const fmpz_mpoly_ctx_t ctx,ulong alphashift,const nmod_bma_mpoly_t L,const mpoly_bma_interpolate_ctx_t Ictx,const nmodf_ctx_t fpctx)986 int nmod_bma_mpoly_get_fmpz_mpolyu(
987 fmpz_mpolyu_t A,
988 const fmpz_mpoly_ctx_t ctx,
989 ulong alphashift,
990 const nmod_bma_mpoly_t L,
991 const mpoly_bma_interpolate_ctx_t Ictx,
992 const nmodf_ctx_t fpctx)
993 {
994 int success;
995 slong i;
996
997 fmpz_mpolyu_fit_length(A, L->length, ctx);
998 A->length = 0;
999 for (i = 0; i < L->length; i++)
1000 {
1001 A->exps[A->length] = L->exps[i];
1002 success = nmod_mpoly_bma_get_fmpz_mpoly(A->coeffs + A->length, ctx,
1003 alphashift, L->coeffs + i, Ictx, fpctx);
1004 if (!success)
1005 {
1006 return 0;
1007 }
1008 A->length += !fmpz_mpoly_is_zero(A->coeffs + A->length, ctx);
1009 }
1010 return 1;
1011 }
1012
1013
1014
fmpz_mod_bma_mpoly_init(fmpz_mod_bma_mpoly_t A)1015 void fmpz_mod_bma_mpoly_init(fmpz_mod_bma_mpoly_t A)
1016 {
1017 A->length = 0;
1018 A->alloc = 0;
1019 A->exps = NULL;
1020 A->coeffs = NULL;
1021 A->pointcount = 0;
1022 }
1023
fmpz_mod_bma_mpoly_reset_prime(fmpz_mod_bma_mpoly_t A,const fmpz_mod_ctx_t fpctx)1024 void fmpz_mod_bma_mpoly_reset_prime(
1025 fmpz_mod_bma_mpoly_t A,
1026 const fmpz_mod_ctx_t fpctx)
1027 {
1028 slong i;
1029 for (i = 0; i < A->alloc; i++)
1030 {
1031 fmpz_mod_berlekamp_massey_set_prime(A->coeffs + i, fmpz_mod_ctx_modulus(fpctx));
1032 }
1033 }
1034
1035
fmpz_mod_bma_mpoly_clear(fmpz_mod_bma_mpoly_t A)1036 void fmpz_mod_bma_mpoly_clear(fmpz_mod_bma_mpoly_t A)
1037 {
1038 slong i;
1039 for (i = 0; i < A->alloc; i++)
1040 {
1041 fmpz_mod_berlekamp_massey_clear(A->coeffs + i);
1042 }
1043
1044 if (A->exps)
1045 flint_free(A->exps);
1046 if (A->coeffs)
1047 flint_free(A->coeffs);
1048 }
1049
fmpz_mod_bma_mpoly_print(fmpz_mod_bma_mpoly_t A,const mpoly_bma_interpolate_ctx_t Ictx)1050 void fmpz_mod_bma_mpoly_print(
1051 fmpz_mod_bma_mpoly_t A,
1052 const mpoly_bma_interpolate_ctx_t Ictx)
1053 {
1054 slong i;
1055 flint_printf("0");
1056 for (i = 0; i < A->length; i++)
1057 {
1058 flint_printf(" + [");
1059 fmpz_mod_berlekamp_massey_print(A->coeffs + i);
1060 flint_printf("]*X^%wd*Y^%wd",
1061 A->exps[i] >> (FLINT_BITS/2),
1062 A->exps[i] & ((-UWORD(1)) >> (FLINT_BITS - FLINT_BITS/2)));
1063 }
1064 }
1065
1066
fmpz_mod_bma_mpoly_fit_length(fmpz_mod_bma_mpoly_t A,slong length,const fmpz_mod_ctx_t fpctx)1067 void fmpz_mod_bma_mpoly_fit_length(
1068 fmpz_mod_bma_mpoly_t A,
1069 slong length,
1070 const fmpz_mod_ctx_t fpctx)
1071 {
1072 slong i;
1073 slong old_alloc = A->alloc;
1074 slong new_alloc = FLINT_MAX(length, A->alloc + A->alloc/2);
1075
1076 if (length > old_alloc)
1077 {
1078 if (old_alloc == 0)
1079 {
1080 A->exps = (ulong *) flint_malloc(new_alloc*sizeof(ulong));
1081 A->coeffs = (fmpz_mod_berlekamp_massey_struct *) flint_malloc(
1082 new_alloc*sizeof(fmpz_mod_berlekamp_massey_struct));
1083 }
1084 else
1085 {
1086 A->exps = (ulong *) flint_realloc(A->exps, new_alloc*sizeof(ulong));
1087 A->coeffs = (fmpz_mod_berlekamp_massey_struct *) flint_realloc(A->coeffs,
1088 new_alloc*sizeof(fmpz_mod_berlekamp_massey_struct));
1089 }
1090
1091 for (i = old_alloc; i < new_alloc; i++)
1092 {
1093 fmpz_mod_berlekamp_massey_init(A->coeffs + i, fmpz_mod_ctx_modulus(fpctx));
1094 }
1095 A->alloc = new_alloc;
1096 }
1097 }
1098
fmpz_mod_bma_mpoly_zero(fmpz_mod_bma_mpoly_t L)1099 void fmpz_mod_bma_mpoly_zero(fmpz_mod_bma_mpoly_t L)
1100 {
1101 L->length = 0;
1102 L->pointcount = 0;
1103 }
1104
fmpz_mod_bma_mpoly_reduce(fmpz_mod_bma_mpoly_t L)1105 int fmpz_mod_bma_mpoly_reduce(fmpz_mod_bma_mpoly_t L)
1106 {
1107 slong i;
1108 int changed;
1109
1110 changed = 0;
1111
1112 for (i = 0; i < L->length; i++)
1113 {
1114 FLINT_ASSERT(L->pointcount == fmpz_mod_berlekamp_massey_point_count(L->coeffs + i));
1115 changed |= fmpz_mod_berlekamp_massey_reduce(L->coeffs + i);
1116 }
1117
1118 return changed;
1119 }
1120
fmpz_mod_bma_mpoly_add_point(fmpz_mod_bma_mpoly_t L,const fmpz_mod_mpolyn_t A,const fmpz_mod_mpoly_ctx_t ctx_mp)1121 void fmpz_mod_bma_mpoly_add_point(
1122 fmpz_mod_bma_mpoly_t L,
1123 const fmpz_mod_mpolyn_t A,
1124 const fmpz_mod_mpoly_ctx_t ctx_mp)
1125 {
1126 slong j;
1127 slong Alen = A->length;
1128 fmpz_mod_poly_struct * Acoeff = A->coeffs;
1129 slong Li, Ai, ai;
1130 ulong Aexp;
1131 fmpz_mod_berlekamp_massey_struct * Lcoeff;
1132 slong Llen;
1133 ulong * Lexp;
1134
1135 if (L->length == 0)
1136 {
1137 slong tot = 0;
1138 for (Ai = 0; Ai < Alen; Ai++)
1139 {
1140 for (ai = (Acoeff + Ai)->length - 1; ai >= 0; ai--)
1141 {
1142 tot += !fmpz_is_zero((Acoeff + Ai)->coeffs + ai);
1143 }
1144 }
1145 fmpz_mod_bma_mpoly_fit_length(L, tot, ctx_mp->ffinfo);
1146 }
1147
1148 Lcoeff = L->coeffs;
1149 Llen = L->length;
1150 Lexp = L->exps;
1151
1152 Li = 0;
1153 Ai = ai = 0;
1154 Aexp = 0;
1155 if (Ai < Alen)
1156 {
1157 ai = fmpz_mod_poly_degree(A->coeffs + Ai);
1158 FLINT_ASSERT(FLINT_BIT_COUNT(ai) < FLINT_BITS/2);
1159 FLINT_ASSERT(0 == (A->exps[Ai] & LOW_HALF_MASK));
1160 Aexp = A->exps[Ai] + ai;
1161 }
1162
1163 while (Li < Llen || Ai < Alen)
1164 {
1165 if (Li < Llen && Ai < Alen && Lexp[Li] == Aexp)
1166 {
1167 /* L term present, A term present */
1168 add_same_exp:
1169 fmpz_mod_berlekamp_massey_add_point(Lcoeff + Li,
1170 (Acoeff + Ai)->coeffs + ai);
1171 Li++;
1172 do {
1173 ai--;
1174 } while (ai >= 0 && fmpz_is_zero((Acoeff + Ai)->coeffs + ai));
1175 if (ai < 0)
1176 {
1177 Ai++;
1178 if (Ai < Alen)
1179 {
1180 ai = fmpz_mod_poly_degree(A->coeffs + Ai);
1181 FLINT_ASSERT(FLINT_BIT_COUNT(ai) < FLINT_BITS/2);
1182 FLINT_ASSERT(0 == (A->exps[Ai] & LOW_HALF_MASK));
1183 Aexp = A->exps[Ai] + ai;
1184 }
1185 }
1186 else
1187 {
1188 FLINT_ASSERT(Ai < Alen);
1189 FLINT_ASSERT(FLINT_BIT_COUNT(ai) < FLINT_BITS/2);
1190 FLINT_ASSERT(0 == (A->exps[Ai] & LOW_HALF_MASK));
1191 Aexp = A->exps[Ai] + ai;
1192 }
1193 }
1194 else if (Li < Llen && (Ai >= Alen || Lexp[Li] > Aexp))
1195 {
1196 /* L term present, A term missing */
1197 fmpz_mod_berlekamp_massey_add_zeros(Lcoeff + Li, 1);
1198 Li++;
1199 }
1200 else
1201 {
1202 /* L term missing, A term present */
1203 FLINT_ASSERT(Ai < Alen && (Li >= Llen || Lexp[Li] < Aexp));
1204 {
1205 ulong texp;
1206 fmpz_mod_berlekamp_massey_struct tcoeff;
1207
1208 fmpz_mod_bma_mpoly_fit_length(L, Llen + 1, ctx_mp->ffinfo);
1209 Lcoeff = L->coeffs;
1210 Lexp = L->exps;
1211
1212 texp = Lexp[Llen];
1213 tcoeff = Lcoeff[Llen];
1214 for (j = Llen - 1; j >= Li; j--)
1215 {
1216 Lexp[j + 1] = Lexp[j];
1217 Lcoeff[j + 1] = Lcoeff[j];
1218 }
1219 Lexp[Li] = texp;
1220 Lcoeff[Li] = tcoeff;
1221 }
1222
1223 fmpz_mod_berlekamp_massey_start_over(Lcoeff + Li);
1224 fmpz_mod_berlekamp_massey_add_zeros(Lcoeff + Li, L->pointcount);
1225 Lexp[Li] = Aexp;
1226 Llen++;
1227 L->length = Llen;
1228 goto add_same_exp;
1229 }
1230 }
1231
1232 L->pointcount++;
1233 }
1234
fmpz_mod_bma_mpoly_get_fmpz_mpolyu(fmpz_mpolyu_t A,const fmpz_mpoly_ctx_t ctx,const fmpz_t alphashift,const fmpz_mod_bma_mpoly_t L,const mpoly_bma_interpolate_ctx_t Ictx,const fmpz_mod_ctx_t fpctx)1235 int fmpz_mod_bma_mpoly_get_fmpz_mpolyu(
1236 fmpz_mpolyu_t A,
1237 const fmpz_mpoly_ctx_t ctx,
1238 const fmpz_t alphashift,
1239 const fmpz_mod_bma_mpoly_t L,
1240 const mpoly_bma_interpolate_ctx_t Ictx,
1241 const fmpz_mod_ctx_t fpctx)
1242 {
1243 int success;
1244 slong i;
1245
1246 fmpz_mpolyu_fit_length(A, L->length, ctx);
1247 A->length = 0;
1248 for (i = 0; i < L->length; i++)
1249 {
1250 A->exps[A->length] = L->exps[i];
1251 success = fmpz_mod_bma_get_fmpz_mpoly(A->coeffs + A->length, ctx,
1252 alphashift, L->coeffs + i, Ictx, fpctx);
1253 if (!success)
1254 {
1255 return 0;
1256 }
1257 A->length += !fmpz_mpoly_is_zero(A->coeffs + A->length, ctx);
1258 }
1259 return 1;
1260 }
1261
1262
1263
fmpz_mod_mpolyn_bidegree(const fmpz_mod_mpolyn_t A)1264 ulong fmpz_mod_mpolyn_bidegree(const fmpz_mod_mpolyn_t A)
1265 {
1266 ulong degx, degy;
1267
1268 FLINT_ASSERT(A->length > 0);
1269 FLINT_ASSERT(A->bits == FLINT_BITS/2);
1270
1271 degx = (A->exps + 1*0)[0] >> (FLINT_BITS/2);
1272 degy = fmpz_mod_poly_degree(A->coeffs + 0);
1273
1274 FLINT_ASSERT(FLINT_BIT_COUNT(degx) < FLINT_BITS/2);
1275 FLINT_ASSERT(FLINT_BIT_COUNT(degy) < FLINT_BITS/2);
1276
1277 return (degx << (FLINT_BITS/2)) + degy;
1278 }
1279
nmod_mpolyn_bidegree(const nmod_mpolyn_t A)1280 ulong nmod_mpolyn_bidegree(const nmod_mpolyn_t A)
1281 {
1282 ulong degx, degy;
1283
1284 FLINT_ASSERT(A->length > 0);
1285 FLINT_ASSERT(A->bits == FLINT_BITS/2);
1286
1287 degx = A->exps[0] >> (FLINT_BITS/2);
1288 degy = nmod_poly_degree(A->coeffs + 0);
1289
1290 FLINT_ASSERT(FLINT_BIT_COUNT(degx) < FLINT_BITS/2);
1291 FLINT_ASSERT(FLINT_BIT_COUNT(degy) < FLINT_BITS/2);
1292
1293 return (degx << (FLINT_BITS/2)) + degy;
1294 }
1295
1296
fmpz_mpoly_eval_fmpz_mod(fmpz_t eval,const fmpz_mod_ctx_t fpctx,const fmpz_mpoly_t A,const fmpz * alpha,const fmpz_mpoly_ctx_t ctx)1297 void fmpz_mpoly_eval_fmpz_mod(
1298 fmpz_t eval,
1299 const fmpz_mod_ctx_t fpctx,
1300 const fmpz_mpoly_t A,
1301 const fmpz * alpha,
1302 const fmpz_mpoly_ctx_t ctx)
1303 {
1304 slong i, j;
1305 slong offset, shift;
1306 slong N = mpoly_words_per_exp_sp(A->bits, ctx->minfo);
1307 slong nvars = ctx->minfo->nvars;
1308 ulong * Aexp;
1309 slong * LUToffset;
1310 ulong * LUTmask;
1311 fmpz * LUTvalue;
1312 slong LUTlen;
1313 fmpz_t xpoweval;
1314 ulong * inputexpmask;
1315 TMP_INIT;
1316
1317 TMP_START;
1318
1319 fmpz_init(xpoweval);
1320
1321 LUToffset = (slong *) TMP_ALLOC(N*FLINT_BITS*sizeof(slong));
1322 LUTmask = (ulong *) TMP_ALLOC(N*FLINT_BITS*sizeof(ulong));
1323 LUTvalue = (fmpz *) TMP_ALLOC(N*FLINT_BITS*sizeof(fmpz));
1324 for (i = 0; i < N*FLINT_BITS; i++)
1325 {
1326 fmpz_init(LUTvalue + i);
1327 }
1328
1329 Aexp = A->exps;
1330
1331 inputexpmask = (ulong *) TMP_ALLOC(N*sizeof(ulong));
1332 mpoly_monomial_zero(inputexpmask, N);
1333 for (i = 0; i < A->length; i++)
1334 {
1335 for (j = 0; j < N; j++)
1336 {
1337 inputexpmask[j] |= (Aexp + N*i)[j];
1338 }
1339 }
1340
1341 LUTlen = 0;
1342 for (j = nvars - 1; j >= 0; j--)
1343 {
1344 mpoly_gen_offset_shift_sp(&offset, &shift, j, A->bits, ctx->minfo);
1345
1346 fmpz_set(xpoweval, alpha + j); /* xpoweval = alpha[i]^(2^i) */
1347 for (i = 0; i < A->bits; i++)
1348 {
1349 LUToffset[LUTlen] = offset;
1350 LUTmask[LUTlen] = (UWORD(1) << (shift + i));
1351 fmpz_set(LUTvalue + LUTlen, xpoweval);
1352 if ((inputexpmask[offset] & LUTmask[LUTlen]) != 0)
1353 {
1354 LUTlen++;
1355 }
1356 fmpz_mod_mul(xpoweval, xpoweval, xpoweval, fpctx);
1357 }
1358 }
1359 FLINT_ASSERT(LUTlen < N*FLINT_BITS);
1360
1361 fmpz_zero(eval);
1362 for (i = 0; i < A->length; i++)
1363 {
1364 fmpz_mod(xpoweval, A->coeffs + i, fmpz_mod_ctx_modulus(fpctx));
1365 for (j = 0; j < LUTlen; j++)
1366 {
1367 if (((Aexp + N*i)[LUToffset[j]] & LUTmask[j]) != 0)
1368 {
1369 fmpz_mod_mul(xpoweval, xpoweval, LUTvalue + j, fpctx);
1370 }
1371 }
1372 fmpz_mod_add(eval, eval, xpoweval, fpctx);
1373 }
1374
1375 fmpz_clear(xpoweval);
1376 for (i = 0; i < N*FLINT_BITS; i++)
1377 {
1378 fmpz_clear(LUTvalue + i);
1379 }
1380 TMP_END;
1381 }
1382
1383 /* take coeffs from [0, p) to (-p/2, p/2]
1384 */
fmpz_mpolyu_symmetrize_coeffs(fmpz_mpolyu_t A,const fmpz_mpoly_ctx_t ctx,const fmpz_mod_ctx_t fpctx)1385 void fmpz_mpolyu_symmetrize_coeffs(
1386 fmpz_mpolyu_t A,
1387 const fmpz_mpoly_ctx_t ctx,
1388 const fmpz_mod_ctx_t fpctx)
1389 {
1390 slong i, j;
1391 fmpz_mpoly_struct * Ac;
1392
1393 for (i = 0; i < A->length; i++)
1394 {
1395 Ac = A->coeffs + i;
1396 for (j = 0; j < Ac->length; j++)
1397 {
1398 fmpz_smod(Ac->coeffs + j, Ac->coeffs + j,
1399 fmpz_mod_ctx_modulus(fpctx));
1400 }
1401 }
1402 }
1403
1404
1405
fmpz_mpolyuu_eval_nmod(nmod_mpolyn_t E,const nmod_mpoly_ctx_t ctx_sp,const fmpz_mpolyu_t A,const mp_limb_t * alpha,const fmpz_mpoly_ctx_t ctx)1406 void fmpz_mpolyuu_eval_nmod(
1407 nmod_mpolyn_t E,
1408 const nmod_mpoly_ctx_t ctx_sp,
1409 const fmpz_mpolyu_t A,
1410 const mp_limb_t * alpha,
1411 const fmpz_mpoly_ctx_t ctx)
1412 {
1413 slong i;
1414 slong xexp, yexp;
1415 mp_limb_t eval;
1416
1417 FLINT_ASSERT(E->bits == FLINT_BITS/2);
1418 FLINT_ASSERT(1 == mpoly_words_per_exp_sp(E->bits, ctx_sp->minfo));
1419
1420 E->length = 0;
1421 for (i = 0; i < A->length; i++)
1422 {
1423 eval = fmpz_mpoly_eval_nmod(ctx_sp->ffinfo, A->coeffs + i, alpha, ctx);
1424 if (eval == 0)
1425 {
1426 continue;
1427 }
1428
1429 xexp = A->exps[i] >> (FLINT_BITS/2);
1430 yexp = A->exps[i] & (-UWORD(1) >> (FLINT_BITS - FLINT_BITS/2));
1431
1432 if (E->length > 0 && (E->exps[E->length - 1] >> (FLINT_BITS/2)) == xexp)
1433 {
1434 nmod_poly_set_coeff_ui(E->coeffs + E->length - 1, yexp, eval);
1435 }
1436 else
1437 {
1438 nmod_mpolyn_fit_length(E, E->length + 1, ctx_sp);
1439 nmod_poly_zero(E->coeffs + E->length);
1440 nmod_poly_set_coeff_ui(E->coeffs + E->length, yexp, eval);
1441 E->exps[E->length] = xexp << (FLINT_BITS/2);
1442 E->length++;
1443 }
1444 }
1445 }
1446
fmpz_mpolyuu_eval_fmpz_mod(fmpz_mod_mpolyn_t E,const fmpz_mod_mpoly_ctx_t ctx_mp,const fmpz_mpolyu_t A,const fmpz * alpha,const fmpz_mpoly_ctx_t ctx)1447 void fmpz_mpolyuu_eval_fmpz_mod(
1448 fmpz_mod_mpolyn_t E,
1449 const fmpz_mod_mpoly_ctx_t ctx_mp,
1450 const fmpz_mpolyu_t A,
1451 const fmpz * alpha,
1452 const fmpz_mpoly_ctx_t ctx)
1453 {
1454 slong i;
1455 slong xexp, yexp;
1456 fmpz_t eval;
1457
1458 FLINT_ASSERT(E->bits == FLINT_BITS/2);
1459 FLINT_ASSERT(1 == mpoly_words_per_exp_sp(E->bits, ctx_mp->minfo));
1460
1461 fmpz_init(eval);
1462
1463 E->length = 0;
1464 for (i = 0; i < A->length; i++)
1465 {
1466 fmpz_mpoly_eval_fmpz_mod(eval, ctx_mp->ffinfo, A->coeffs + i, alpha, ctx);
1467 if (fmpz_is_zero(eval))
1468 {
1469 continue;
1470 }
1471
1472 xexp = A->exps[i] >> (FLINT_BITS/2);
1473 yexp = A->exps[i] & (-UWORD(1) >> (FLINT_BITS - FLINT_BITS/2));
1474
1475 if (E->length > 0 && (E->exps[E->length - 1] >> (FLINT_BITS/2)) == xexp)
1476 {
1477 fmpz_mod_poly_set_coeff_fmpz(E->coeffs + E->length - 1, yexp, eval);
1478 }
1479 else
1480 {
1481 fmpz_mod_mpolyn_fit_length(E, E->length + 1, ctx_mp);
1482 fmpz_mod_poly_zero(E->coeffs + E->length);
1483 fmpz_mod_poly_set_coeff_fmpz(E->coeffs + E->length, yexp, eval);
1484 E->exps[E->length] = xexp << (FLINT_BITS/2);
1485 E->length++;
1486 }
1487 }
1488
1489 fmpz_clear(eval);
1490 }
1491
1492
fmpz_mpoly_eval_nmod(const nmodf_ctx_t fpctx,const fmpz_mpoly_t A,const mp_limb_t * alpha,const fmpz_mpoly_ctx_t ctx)1493 mp_limb_t fmpz_mpoly_eval_nmod(
1494 const nmodf_ctx_t fpctx,
1495 const fmpz_mpoly_t A,
1496 const mp_limb_t * alpha,
1497 const fmpz_mpoly_ctx_t ctx)
1498 {
1499 mp_limb_t eval;
1500 slong i, j;
1501 slong offset, shift;
1502 slong N = mpoly_words_per_exp_sp(A->bits, ctx->minfo);
1503 slong nvars = ctx->minfo->nvars;
1504 ulong * Aexp;
1505 slong * LUToffset;
1506 ulong * LUTmask;
1507 mp_limb_t * LUTvalue;
1508 slong LUTlen;
1509 mp_limb_t xpoweval;
1510 ulong * inputexpmask;
1511 TMP_INIT;
1512
1513 FLINT_ASSERT(A->bits <= FLINT_BITS);
1514
1515 TMP_START;
1516
1517 LUToffset = (slong *) TMP_ALLOC(N*FLINT_BITS*sizeof(slong));
1518 LUTmask = (ulong *) TMP_ALLOC(N*FLINT_BITS*sizeof(ulong));
1519 LUTvalue = (mp_limb_t *) TMP_ALLOC(N*FLINT_BITS*sizeof(mp_limb_t));
1520
1521 Aexp = A->exps;
1522
1523 inputexpmask = (ulong *) TMP_ALLOC(N*sizeof(ulong));
1524 mpoly_monomial_zero(inputexpmask, N);
1525 for (i = 0; i < A->length; i++)
1526 {
1527 for (j = 0; j < N; j++)
1528 {
1529 inputexpmask[j] |= (Aexp + N*i)[j];
1530 }
1531 }
1532
1533 LUTlen = 0;
1534 for (j = nvars - 1; j >= 0; j--)
1535 {
1536 mpoly_gen_offset_shift_sp(&offset, &shift, j, A->bits, ctx->minfo);
1537
1538 xpoweval = alpha[j]; /* xpoweval = alpha[i]^(2^i) */
1539 for (i = 0; i < A->bits; i++)
1540 {
1541 LUToffset[LUTlen] = offset;
1542 LUTmask[LUTlen] = (UWORD(1) << (shift + i));
1543 LUTvalue[LUTlen] = xpoweval;
1544 if ((inputexpmask[offset] & LUTmask[LUTlen]) != 0)
1545 {
1546 LUTlen++;
1547 }
1548 xpoweval = nmod_mul(xpoweval, xpoweval, fpctx->mod);
1549 }
1550 }
1551 FLINT_ASSERT(LUTlen < N*FLINT_BITS);
1552
1553 eval = 0;
1554 for (i = 0; i < A->length; i++)
1555 {
1556 xpoweval = fmpz_fdiv_ui(A->coeffs + i, fpctx->mod.n);
1557 for (j = 0; j < LUTlen; j++)
1558 {
1559 if (((Aexp + N*i)[LUToffset[j]] & LUTmask[j]) != 0)
1560 {
1561 xpoweval = nmod_mul(xpoweval, LUTvalue[j], fpctx->mod);
1562 }
1563 }
1564 eval = nmod_add(eval, xpoweval, fpctx->mod);
1565 }
1566
1567 TMP_END;
1568
1569 return eval;
1570 }
1571
1572
1573
1574
1575
nmod_zip_init(nmod_zip_t Z)1576 void nmod_zip_init(nmod_zip_t Z)
1577 {
1578 Z->mlength = 0;
1579 Z->malloc = 0;
1580 Z->coeffs = NULL;
1581 Z->monomials = NULL;
1582 Z->ealloc = 0;
1583 Z->evals = NULL;
1584 }
1585
nmod_zip_clear(nmod_zip_t Z)1586 void nmod_zip_clear(nmod_zip_t Z)
1587 {
1588 if (Z->coeffs)
1589 flint_free(Z->coeffs);
1590 if (Z->monomials)
1591 flint_free(Z->monomials);
1592 if (Z->evals)
1593 flint_free(Z->evals);
1594 }
1595
nmod_zip_set_lengths(nmod_zip_t A,slong mlength,slong elength)1596 void nmod_zip_set_lengths(nmod_zip_t A, slong mlength, slong elength)
1597 {
1598 slong old_malloc = A->malloc;
1599 slong new_malloc = FLINT_MAX(mlength, A->malloc + A->malloc/2);
1600 slong old_ealloc = A->ealloc;
1601 slong new_ealloc = FLINT_MAX(elength, A->ealloc + A->ealloc/2);
1602
1603 FLINT_ASSERT(mlength > 0);
1604 FLINT_ASSERT(elength > 0);
1605
1606 if (mlength > old_malloc)
1607 {
1608 if (old_malloc == 0)
1609 {
1610 A->coeffs = (mp_limb_t *) flint_malloc(
1611 new_malloc*sizeof(mp_limb_t));
1612 A->monomials = (mp_limb_t *) flint_malloc(
1613 new_malloc*sizeof(mp_limb_t));
1614 }
1615 else
1616 {
1617 A->coeffs = (mp_limb_t *) flint_realloc(A->coeffs,
1618 new_malloc*sizeof(mp_limb_t));
1619 A->monomials = (mp_limb_t *) flint_realloc(A->monomials,
1620 new_malloc*sizeof(mp_limb_t));
1621 }
1622 A->malloc = new_malloc;
1623 }
1624
1625 A->mlength = mlength;
1626
1627 if (elength > old_ealloc)
1628 {
1629 if (old_ealloc == 0)
1630 {
1631 A->evals = (mp_limb_t *) flint_malloc(new_ealloc*sizeof(mp_limb_t));
1632 }
1633 else
1634 {
1635 A->evals = (mp_limb_t *) flint_realloc(A->evals,
1636 new_ealloc*sizeof(mp_limb_t));
1637 }
1638 A->ealloc = new_ealloc;
1639 }
1640 }
1641
nmod_zip_print(const nmod_zip_t Z,slong elength)1642 void nmod_zip_print(const nmod_zip_t Z, slong elength)
1643 {
1644 slong i;
1645
1646 printf("m ");
1647 for (i = 0; i < Z->mlength; i++)
1648 {
1649 flint_printf("(%wu %wu) ", Z->coeffs[i], Z->monomials[i]);
1650 }
1651 printf("e ");
1652 for (i = 0; i < elength; i++)
1653 {
1654 flint_printf("%wu ", Z->evals[i]);
1655 }
1656 }
1657
1658
1659
nmod_zip_mpolyu_init(nmod_zip_mpolyu_t Z)1660 void nmod_zip_mpolyu_init(nmod_zip_mpolyu_t Z)
1661 {
1662 Z->alloc = 0;
1663 Z->length = 0;
1664 Z->exps = NULL;
1665 Z->coeffs = NULL;
1666 Z->pointcount = 0;
1667 }
1668
nmod_zip_mpolyu_clear(nmod_zip_mpolyu_t Z)1669 void nmod_zip_mpolyu_clear(nmod_zip_mpolyu_t Z)
1670 {
1671 slong i;
1672
1673 for (i = 0; i < Z->alloc; i++)
1674 {
1675 nmod_zip_clear(Z->coeffs + i);
1676 }
1677
1678 if (Z->exps)
1679 flint_free(Z->exps);
1680 if (Z->coeffs)
1681 flint_free(Z->coeffs);
1682 }
1683
nmod_zip_mpolyu_fit_length(nmod_zip_mpolyu_t A,slong length)1684 void nmod_zip_mpolyu_fit_length(
1685 nmod_zip_mpolyu_t A,
1686 slong length)
1687 {
1688 slong i;
1689 slong old_alloc = A->alloc;
1690 slong new_alloc = FLINT_MAX(length, A->alloc + A->alloc/2);
1691
1692 if (length > old_alloc)
1693 {
1694 if (old_alloc == 0)
1695 {
1696 A->exps = (ulong *) flint_malloc(new_alloc*sizeof(ulong));
1697 A->coeffs = (nmod_zip_struct *) flint_malloc(
1698 new_alloc*sizeof(nmod_zip_struct));
1699 }
1700 else
1701 {
1702 A->exps = (ulong *) flint_realloc(A->exps, new_alloc*sizeof(ulong));
1703 A->coeffs = (nmod_zip_struct *) flint_realloc(A->coeffs,
1704 new_alloc*sizeof(nmod_zip_struct));
1705 }
1706
1707 for (i = old_alloc; i < new_alloc; i++)
1708 {
1709 nmod_zip_init(A->coeffs + i);
1710 }
1711 A->alloc = new_alloc;
1712 }
1713 }
1714
nmod_zip_mpolyu_fit_poly(nmod_zip_mpolyu_t Z,fmpz_mpolyu_t H,slong eval_length)1715 void nmod_zip_mpolyu_fit_poly(
1716 nmod_zip_mpolyu_t Z,
1717 fmpz_mpolyu_t H,
1718 slong eval_length)
1719 {
1720 slong i;
1721 FLINT_ASSERT(H->length > 0);
1722
1723 nmod_zip_mpolyu_fit_length(Z, H->length);
1724
1725 for (i = 0; i < H->length; i++)
1726 {
1727 Z->exps[i] = H->exps[i];
1728 nmod_zip_set_lengths(Z->coeffs + i, (H->coeffs + i)->length, eval_length);
1729 }
1730
1731 Z->length = H->length;
1732 Z->pointcount = 0;
1733 }
1734
1735
nmod_mpoly_set_skel(nmod_mpolyc_t S,const nmod_mpoly_ctx_t ctx_sp,const fmpz_mpoly_t A,const mp_limb_t * alpha,const fmpz_mpoly_ctx_t ctx)1736 void nmod_mpoly_set_skel(
1737 nmod_mpolyc_t S,
1738 const nmod_mpoly_ctx_t ctx_sp,
1739 const fmpz_mpoly_t A,
1740 const mp_limb_t * alpha,
1741 const fmpz_mpoly_ctx_t ctx)
1742 {
1743 slong i, j;
1744 slong offset, shift;
1745 slong N = mpoly_words_per_exp_sp(A->bits, ctx->minfo);
1746 slong nvars = ctx->minfo->nvars;
1747 ulong * Aexp;
1748 slong * LUToffset;
1749 ulong * LUTmask;
1750 mp_limb_t * LUTvalue;
1751 slong LUTlen;
1752 mp_limb_t xpoweval;
1753 ulong * inputexpmask;
1754 TMP_INIT;
1755
1756 FLINT_ASSERT(A->bits <= FLINT_BITS);
1757
1758 TMP_START;
1759
1760 LUToffset = (slong *) TMP_ALLOC(N*FLINT_BITS*sizeof(slong));
1761 LUTmask = (ulong *) TMP_ALLOC(N*FLINT_BITS*sizeof(ulong));
1762 LUTvalue = (mp_limb_t *) TMP_ALLOC(N*FLINT_BITS*sizeof(mp_limb_t));
1763
1764 Aexp = A->exps;
1765
1766 inputexpmask = (ulong *) TMP_ALLOC(N*sizeof(ulong));
1767 mpoly_monomial_zero(inputexpmask, N);
1768 for (i = 0; i < A->length; i++)
1769 {
1770 for (j = 0; j < N; j++)
1771 {
1772 inputexpmask[j] |= (Aexp + N*i)[j];
1773 }
1774 }
1775
1776 LUTlen = 0;
1777 for (j = nvars - 1; j >= 0; j--)
1778 {
1779 mpoly_gen_offset_shift_sp(&offset, &shift, j, A->bits, ctx->minfo);
1780
1781 xpoweval = alpha[j]; /* xpoweval = alpha[i]^(2^i) */
1782 for (i = 0; i < A->bits; i++)
1783 {
1784 LUToffset[LUTlen] = offset;
1785 LUTmask[LUTlen] = (UWORD(1) << (shift + i));
1786 LUTvalue[LUTlen] = xpoweval;
1787 if ((inputexpmask[offset] & LUTmask[LUTlen]) != 0)
1788 {
1789 LUTlen++;
1790 }
1791 xpoweval = nmod_mul(xpoweval, xpoweval, ctx_sp->ffinfo->mod);
1792 }
1793 }
1794 FLINT_ASSERT(LUTlen < N*FLINT_BITS);
1795
1796 nmod_mpolyc_fit_length(S, A->length);
1797 for (i = 0; i < A->length; i++)
1798 {
1799 xpoweval = 1;
1800 for (j = 0; j < LUTlen; j++)
1801 {
1802 if (((Aexp + N*i)[LUToffset[j]] & LUTmask[j]) != 0)
1803 {
1804 xpoweval = nmod_mul(xpoweval, LUTvalue[j], ctx_sp->ffinfo->mod);
1805 }
1806 }
1807 S->coeffs[i] = xpoweval;
1808 }
1809 S->length = A->length;
1810
1811 TMP_END;
1812 }
1813
nmod_zip_mpolyu_set_skel(nmod_zip_mpolyu_t Z,const nmod_mpoly_ctx_t ctx_sp,const fmpz_mpolyu_t A,const mp_limb_t * alpha,const fmpz_mpoly_ctx_t ctx)1814 void nmod_zip_mpolyu_set_skel(
1815 nmod_zip_mpolyu_t Z,
1816 const nmod_mpoly_ctx_t ctx_sp,
1817 const fmpz_mpolyu_t A,
1818 const mp_limb_t * alpha,
1819 const fmpz_mpoly_ctx_t ctx)
1820 {
1821 slong i, j;
1822
1823 nmod_mpolyc_t T;
1824 nmod_mpolyc_init(T);
1825
1826 FLINT_ASSERT(Z->length == A->length);
1827 for (i = 0; i < Z->length; i++)
1828 {
1829 nmod_zip_struct * Zc = Z->coeffs + i;
1830
1831 nmod_mpoly_set_skel(T, ctx_sp, A->coeffs + i, alpha, ctx);
1832
1833 Z->exps[i] = A->exps[i];
1834 FLINT_ASSERT(Zc->mlength == T->length);
1835 for (j = 0; j < Zc->mlength; j++)
1836 {
1837 Zc->coeffs[j] = 0;
1838 Zc->monomials[j] = T->coeffs[j];
1839 }
1840 }
1841 Z->pointcount = 0;
1842
1843 nmod_mpolyc_clear(T);
1844 }
1845
nmod_zip_mpolyuu_print(const nmod_zip_mpolyu_t A)1846 void nmod_zip_mpolyuu_print(const nmod_zip_mpolyu_t A)
1847 {
1848 slong i;
1849 flint_printf("0");
1850 for (i = 0; i < A->length; i++)
1851 {
1852 flint_printf(" + [");
1853 nmod_zip_print(A->coeffs + i, A->pointcount);
1854 flint_printf("]*X^%wd*Y^%wd",
1855 A->exps[i] >> (FLINT_BITS/2),
1856 A->exps[i] & ((-UWORD(1)) >> (FLINT_BITS/2)));
1857 }
1858 }
1859
nmod_zip_mpolyuu_add_point(nmod_zip_mpolyu_t L,const nmod_mpolyn_t A)1860 int nmod_zip_mpolyuu_add_point(
1861 nmod_zip_mpolyu_t L,
1862 const nmod_mpolyn_t A)
1863 {
1864 slong Alen = A->length;
1865 nmod_poly_struct * Acoeff = A->coeffs;
1866 slong Li, Ai, ai;
1867 ulong Aexp;
1868 nmod_zip_struct * Lcoeff;
1869 slong Llen;
1870 ulong * Lexp;
1871 slong pointcount = L->pointcount;
1872
1873 Lcoeff = L->coeffs;
1874 Llen = L->length;
1875 Lexp = L->exps;
1876
1877 Li = 0;
1878 Ai = ai = 0;
1879 Aexp = 0;
1880 if (Ai < Alen)
1881 {
1882 ai = nmod_poly_degree(A->coeffs + Ai);
1883 FLINT_ASSERT(FLINT_BIT_COUNT(ai) < FLINT_BITS/2);
1884 FLINT_ASSERT(0 == (A->exps[Ai] & LOW_HALF_MASK));
1885 Aexp = A->exps[Ai] + ai;
1886 }
1887
1888 for (Li = 0; Li < Llen; Li++)
1889 {
1890 nmod_zip_struct * Lc = Lcoeff + Li;
1891
1892 if (Ai < Alen && Lexp[Li] == Aexp)
1893 {
1894 /* L present A present */
1895 FLINT_ASSERT(pointcount <= Lc->ealloc);
1896 Lc->evals[pointcount] = (Acoeff + Ai)->coeffs[ai];
1897 do {
1898 ai--;
1899 } while (ai >= 0 && (0 == (Acoeff + Ai)->coeffs[ai]));
1900 if (ai < 0)
1901 {
1902 Ai++;
1903 if (Ai < Alen)
1904 {
1905 ai = nmod_poly_degree(A->coeffs + Ai);
1906 FLINT_ASSERT(FLINT_BIT_COUNT(ai) < FLINT_BITS/2);
1907 FLINT_ASSERT(0 == (A->exps[Ai] & LOW_HALF_MASK));
1908 Aexp = A->exps[Ai] + ai;
1909 }
1910 }
1911 else
1912 {
1913 FLINT_ASSERT(Ai < Alen);
1914 FLINT_ASSERT(FLINT_BIT_COUNT(ai) < FLINT_BITS/2);
1915 FLINT_ASSERT(0 == (A->exps[Ai] & LOW_HALF_MASK));
1916 Aexp = A->exps[Ai] + ai;
1917 }
1918 }
1919 else if (Ai >= Alen || Lexp[Li] > Aexp)
1920 {
1921 /* L present A missing */
1922 FLINT_ASSERT(pointcount <= Lc->ealloc);
1923 Lc->evals[pointcount] = 0;
1924 }
1925 else
1926 {
1927 /* L missing A present */
1928 return 0;
1929 }
1930 }
1931
1932 L->pointcount = pointcount + 1;
1933 return 1;
1934 }
1935
1936
nmod_mpolyu_set_skel(nmod_mpolycu_t S,const nmod_mpoly_ctx_t ctx_sp,const fmpz_mpolyu_t A,const mp_limb_t * alpha,const fmpz_mpoly_ctx_t ctx)1937 void nmod_mpolyu_set_skel(
1938 nmod_mpolycu_t S,
1939 const nmod_mpoly_ctx_t ctx_sp,
1940 const fmpz_mpolyu_t A,
1941 const mp_limb_t * alpha,
1942 const fmpz_mpoly_ctx_t ctx)
1943 {
1944 slong i;
1945 nmod_mpolycu_fit_length(S, A->length);
1946 for (i = 0; i < A->length; i++)
1947 {
1948 nmod_mpoly_set_skel(S->coeffs + i, ctx_sp, A->coeffs + i, alpha, ctx);
1949 }
1950 S->length = A->length;
1951 }
1952
1953
1954
1955 /* Ared = A mod p */
nmod_mpoly_red_skel(nmod_mpolyc_t Ared,const fmpz_mpoly_t A,const nmodf_ctx_t fpctx)1956 void nmod_mpoly_red_skel(
1957 nmod_mpolyc_t Ared,
1958 const fmpz_mpoly_t A,
1959 const nmodf_ctx_t fpctx)
1960 {
1961 nmod_mpolyc_fit_length(Ared, A->length);
1962 Ared->length = A->length;
1963 _fmpz_vec_get_nmod_vec(Ared->coeffs, A->coeffs, A->length, fpctx->mod);
1964 }
1965
nmod_mpolyu_red_skel(nmod_mpolycu_t Ared,const fmpz_mpolyu_t A,const nmodf_ctx_t fpctx)1966 void nmod_mpolyu_red_skel(
1967 nmod_mpolycu_t Ared,
1968 const fmpz_mpolyu_t A,
1969 const nmodf_ctx_t fpctx)
1970 {
1971 slong i;
1972 nmod_mpolycu_fit_length(Ared, A->length);
1973 Ared->length = A->length;
1974 for (i = 0; i < A->length; i++)
1975 {
1976 nmod_mpoly_red_skel(Ared->coeffs + i, A->coeffs + i, fpctx);
1977 }
1978 }
1979
1980 /* M = S */
nmod_mpoly_copy_skel(nmod_mpolyc_t M,const nmod_mpolyc_t S)1981 void nmod_mpoly_copy_skel(nmod_mpolyc_t M, const nmod_mpolyc_t S)
1982 {
1983 slong i;
1984 nmod_mpolyc_fit_length(M, S->length);
1985 M->length = S->length;
1986 for (i = 0; i < S->length; i++)
1987 {
1988 M->coeffs[i] = S->coeffs[i];
1989 }
1990 }
1991
nmod_mpolyu_copy_skel(nmod_mpolycu_t M,const nmod_mpolycu_t S)1992 void nmod_mpolyu_copy_skel(nmod_mpolycu_t M, const nmod_mpolycu_t S)
1993 {
1994 slong i;
1995 nmod_mpolycu_fit_length(M, S->length);
1996 M->length = S->length;
1997 for (i = 0; i < S->length; i++)
1998 {
1999 nmod_mpoly_copy_skel(M->coeffs + i, S->coeffs + i);
2000 }
2001 }
2002
2003 /* return Ared.Acur and multiply Acur by Ainc */
nmod_mpoly_use_skel_mul(const nmod_mpolyc_t Ared,nmod_mpolyc_t Acur,const nmod_mpolyc_t Ainc,const nmodf_ctx_t fpctx)2004 mp_limb_t nmod_mpoly_use_skel_mul(
2005 const nmod_mpolyc_t Ared,
2006 nmod_mpolyc_t Acur,
2007 const nmod_mpolyc_t Ainc,
2008 const nmodf_ctx_t fpctx)
2009 {
2010 slong i;
2011 mp_limb_t V, V0, V1, V2, p1, p0;
2012 FLINT_ASSERT(Ared->length == Acur->length);
2013 FLINT_ASSERT(Ared->length == Ainc->length);
2014
2015 V0 = V1 = V2 = 0;
2016 for (i = 0; i < Ared->length; i++)
2017 {
2018 umul_ppmm(p1, p0, Ared->coeffs[i], Acur->coeffs[i]);
2019 add_sssaaaaaa(V2, V1, V0, V2, V1, V0, WORD(0), p1, p0);
2020 Acur->coeffs[i] = nmod_mul(Acur->coeffs[i], Ainc->coeffs[i], fpctx->mod);
2021 }
2022
2023 NMOD_RED3(V, V2, V1, V0, fpctx->mod);
2024 return V;
2025 }
2026
2027
2028 /* M = S^k */
nmod_mpoly_pow_skel(nmod_mpolyc_t M,const nmod_mpolyc_t S,ulong k,const nmod_mpoly_ctx_t ctx)2029 void nmod_mpoly_pow_skel(
2030 nmod_mpolyc_t M,
2031 const nmod_mpolyc_t S,
2032 ulong k,
2033 const nmod_mpoly_ctx_t ctx)
2034 {
2035 slong i;
2036 nmod_mpolyc_fit_length(M, S->length);
2037 M->length = S->length;
2038 for (i = 0; i < S->length; i++)
2039 {
2040 M->coeffs[i] = nmod_pow_ui(S->coeffs[i], k, ctx->ffinfo->mod);
2041 }
2042 }
2043
nmod_mpolyu_pow_skel(nmod_mpolycu_t M,const nmod_mpolycu_t S,ulong k,const nmod_mpoly_ctx_t ctx)2044 void nmod_mpolyu_pow_skel(
2045 nmod_mpolycu_t M,
2046 const nmod_mpolycu_t S,
2047 ulong k,
2048 const nmod_mpoly_ctx_t ctx)
2049 {
2050 slong i;
2051 nmod_mpolycu_fit_length(M, S->length);
2052 M->length = S->length;
2053 for (i = 0; i < S->length; i++)
2054 {
2055 nmod_mpoly_pow_skel(M->coeffs + i, S->coeffs + i, k, ctx);
2056 }
2057 }
2058
nmod_mpolyuu_use_skel_mul(nmod_mpolyn_t E,const fmpz_mpolyu_t A,const nmod_mpolycu_t Ared,nmod_mpolycu_t Acur,const nmod_mpolycu_t Ainc,const nmod_mpoly_ctx_t ctx_sp)2059 void nmod_mpolyuu_use_skel_mul(
2060 nmod_mpolyn_t E,
2061 const fmpz_mpolyu_t A,
2062 const nmod_mpolycu_t Ared,
2063 nmod_mpolycu_t Acur,
2064 const nmod_mpolycu_t Ainc,
2065 const nmod_mpoly_ctx_t ctx_sp)
2066 {
2067 slong xexp, yexp;
2068 slong i;
2069 mp_limb_t eval;
2070
2071 FLINT_ASSERT(E->bits == FLINT_BITS/2);
2072 FLINT_ASSERT(1 == mpoly_words_per_exp_sp(E->bits, ctx_sp->minfo));
2073
2074 FLINT_ASSERT(A->length == Ared->length);
2075 FLINT_ASSERT(A->length == Acur->length);
2076 FLINT_ASSERT(A->length == Ainc->length);
2077
2078 E->length = 0;
2079 for (i = 0; i < A->length; i++)
2080 {
2081 eval = nmod_mpoly_use_skel_mul(Ared->coeffs + i, Acur->coeffs + i,
2082 Ainc->coeffs + i, ctx_sp->ffinfo);
2083 if (eval == 0)
2084 {
2085 continue;
2086 }
2087
2088 xexp = A->exps[i] >> (FLINT_BITS/2);
2089 yexp = A->exps[i] & ((-UWORD(1)) >> (FLINT_BITS - FLINT_BITS/2));
2090
2091 if (E->length > 0 && (E->exps[E->length - 1] >> (FLINT_BITS/2)) == xexp)
2092 {
2093 nmod_poly_set_coeff_ui(E->coeffs + E->length - 1, yexp, eval);
2094 }
2095 else
2096 {
2097 nmod_mpolyn_fit_length(E, E->length + 1, ctx_sp);
2098 nmod_poly_zero(E->coeffs + E->length);
2099 nmod_poly_set_coeff_ui(E->coeffs + E->length, yexp, eval);
2100 E->exps[E->length] = xexp << (FLINT_BITS/2);
2101 E->length++;
2102 }
2103 }
2104 }
2105
2106
2107
nmod_zip_find_coeffs(nmod_zip_t Z,nmod_poly_t master,slong pointcount,const nmodf_ctx_t ffinfo)2108 nmod_zip_find_coeffs_ret_t nmod_zip_find_coeffs(
2109 nmod_zip_t Z,
2110 nmod_poly_t master,
2111 slong pointcount,
2112 const nmodf_ctx_t ffinfo)
2113 {
2114 slong i, j;
2115 mp_limb_t V, V0, V1, V2, T, S, r, p0, p1;
2116
2117 FLINT_ASSERT(pointcount >= Z->mlength);
2118
2119 nmod_poly_product_roots_nmod_vec(master, Z->monomials, Z->mlength);
2120
2121 for (i = 0; i < Z->mlength; i++)
2122 {
2123 /* coeffs[i] is (coeffs(P).values)/P(roots[i]) =: V/S
2124 where P(x) = master(x)/(x-roots[i]) */
2125 V0 = V1 = V2 = T = S = 0;
2126 r = Z->monomials[i];
2127 for (j = Z->mlength; j > 0; j--)
2128 {
2129 T = nmod_add(nmod_mul(r, T, ffinfo->mod), master->coeffs[j], ffinfo->mod);
2130 S = nmod_add(nmod_mul(r, S, ffinfo->mod), T, ffinfo->mod);
2131 umul_ppmm(p1, p0, Z->evals[j - 1], T);
2132 add_sssaaaaaa(V2, V1, V0, V2, V1, V0, WORD(0), p1, p0);
2133 }
2134 /* roots[i] should be a root of master */
2135 FLINT_ASSERT(nmod_add(nmod_mul(r, T, ffinfo->mod), master->coeffs[0], ffinfo->mod) == 0);
2136 NMOD_RED3(V, V2, V1, V0, ffinfo->mod);
2137 S = nmod_mul(S, r, ffinfo->mod); /* shift is one */
2138 if (S == 0)
2139 {
2140 return nmod_zip_find_coeffs_non_invertible;
2141 }
2142 Z->coeffs[i] = nmod_mul(V, nmod_inv(S, ffinfo->mod), ffinfo->mod);
2143 }
2144
2145 /* use the coefficients of master as temp work space */
2146 for (i = 0; i < Z->mlength; i++)
2147 {
2148 master->coeffs[i] = nmod_pow_ui(Z->monomials[i], Z->mlength, ffinfo->mod);
2149 }
2150
2151 /* check that the remaining points match */
2152 for (i = Z->mlength; i < pointcount; i++)
2153 {
2154 V0 = V1 = V2 = S = 0;
2155 for (j = 0; j < Z->mlength; j++)
2156 {
2157 master->coeffs[j] = nmod_mul(master->coeffs[j], Z->monomials[j], ffinfo->mod);
2158 umul_ppmm(p1, p0, Z->coeffs[j], master->coeffs[j]);
2159 add_sssaaaaaa(V2, V1, V0, V2, V1, V0, WORD(0), p1, p0);
2160 }
2161 NMOD_RED3(V, V2, V1, V0, ffinfo->mod);
2162 if (V != Z->evals[i])
2163 {
2164 return nmod_zip_find_coeffs_no_match;
2165 }
2166 }
2167
2168 return nmod_zip_find_coeffs_good;
2169 }
2170
nmod_mpolyu_zip_find_coeffs(nmod_zip_mpolyu_t Z,const nmod_mpoly_ctx_t ctx_sp)2171 nmod_zip_find_coeffs_ret_t nmod_mpolyu_zip_find_coeffs(
2172 nmod_zip_mpolyu_t Z,
2173 const nmod_mpoly_ctx_t ctx_sp)
2174 {
2175 slong i;
2176 nmod_zip_find_coeffs_ret_t ret;
2177 nmod_poly_t T;
2178
2179 nmod_poly_init_mod(T, ctx_sp->ffinfo->mod);
2180
2181 for (i = 0; i < Z->length; i++)
2182 {
2183 ret = nmod_zip_find_coeffs(Z->coeffs + i, T, Z->pointcount, ctx_sp->ffinfo);
2184 if (ret != nmod_zip_find_coeffs_good)
2185 {
2186 goto cleanup;
2187 }
2188 }
2189
2190 ret = nmod_zip_find_coeffs_good;
2191
2192 cleanup:
2193
2194 nmod_poly_clear(T);
2195
2196 return ret;
2197 }
2198
2199
fmpz_mpolyu_addinterp_zip(fmpz_mpolyu_t H,const fmpz_t Hmodulus,const nmod_zip_mpolyu_t Z,const nmodf_ctx_t ffinfo)2200 int fmpz_mpolyu_addinterp_zip(
2201 fmpz_mpolyu_t H,
2202 const fmpz_t Hmodulus,
2203 const nmod_zip_mpolyu_t Z,
2204 const nmodf_ctx_t ffinfo)
2205 {
2206 int changed = 0;
2207 slong i, j;
2208 fmpz_t t;
2209
2210 fmpz_init(t);
2211
2212 FLINT_ASSERT(H->length == Z->length);
2213 for (i = 0; i < H->length; i++)
2214 {
2215 fmpz_mpoly_struct * Hc = H->coeffs + i;
2216 nmod_zip_struct * Zc = Z->coeffs + i;
2217
2218 FLINT_ASSERT(Hc->length == Zc->mlength);
2219 for (j = 0; j < Hc->length; j++)
2220 {
2221 fmpz_CRT_ui(t, Hc->coeffs + j, Hmodulus, Zc->coeffs[j], ffinfo->mod.n, 1);
2222 changed |= !fmpz_equal(t, Hc->coeffs + j);
2223 fmpz_swap(t, Hc->coeffs + j);
2224 }
2225 }
2226
2227 fmpz_clear(t);
2228 return changed;
2229 }
2230
2231
fmpz_mpolyu_repack_bits(fmpz_mpolyu_t A,flint_bitcnt_t Abits,const fmpz_mpoly_ctx_t ctx)2232 int fmpz_mpolyu_repack_bits(
2233 fmpz_mpolyu_t A,
2234 flint_bitcnt_t Abits,
2235 const fmpz_mpoly_ctx_t ctx)
2236 {
2237 flint_bitcnt_t org_bits = A->bits;
2238 int success;
2239 slong i, j;
2240
2241 for (i = 0; i < A->length; i++)
2242 {
2243 FLINT_ASSERT((A->coeffs + i)->bits == A->bits);
2244 success = fmpz_mpoly_repack_bits_inplace(A->coeffs + i, Abits, ctx);
2245 if (!success)
2246 {
2247 /* repack changed coeffs */
2248 for (j = 0; j < i; j++)
2249 {
2250 success = fmpz_mpoly_repack_bits_inplace(A->coeffs + j, org_bits, ctx);
2251 FLINT_ASSERT(success);
2252 }
2253 return 0;
2254 }
2255 }
2256 return 1;
2257 }
2258
2259
2260 /* fit bits is not the best fxn to accomplish this */
fmpz_mpoly_set_bits(fmpz_mpoly_t A,flint_bitcnt_t Abits,const fmpz_mpoly_ctx_t ctx)2261 void fmpz_mpoly_set_bits(
2262 fmpz_mpoly_t A,
2263 flint_bitcnt_t Abits,
2264 const fmpz_mpoly_ctx_t ctx)
2265 {
2266 fmpz_mpoly_fit_bits(A, Abits, ctx);
2267 A->bits = Abits;
2268 }
2269
fmpz_mpolyu_set_bits(fmpz_mpolyu_t A,flint_bitcnt_t Abits,const fmpz_mpoly_ctx_t ctx)2270 void fmpz_mpolyu_set_bits(
2271 fmpz_mpolyu_t A,
2272 flint_bitcnt_t Abits,
2273 const fmpz_mpoly_ctx_t ctx)
2274 {
2275 slong i;
2276 for (i = 0; i < A->length; i++)
2277 {
2278 fmpz_mpoly_set_bits(A->coeffs + i, Abits, ctx);
2279 }
2280 A->bits = Abits;
2281 }
2282
2283
2284 /*
2285 A is in ZZ[x_0,..., x_(n-1)]
2286 evaluation at x_i = values[i]
2287 out is in Fp[x_var] (p is stored in out :( )
2288
2289 out_deg is deg of out.
2290
2291 This function fails to produce out if it will have too large degree:
2292 If return is 0, then out is invalid. Otherwise, out is valid.
2293 out_deg is always valid.
2294 */
fmpz_mpoly_eval_all_but_one_nmod(slong * out_deg,nmod_poly_t out,const fmpz_mpoly_t A,slong var,mp_limb_t * values,const fmpz_mpoly_ctx_t ctx)2295 int fmpz_mpoly_eval_all_but_one_nmod(
2296 slong * out_deg,
2297 nmod_poly_t out,
2298 const fmpz_mpoly_t A,
2299 slong var,
2300 mp_limb_t * values,
2301 const fmpz_mpoly_ctx_t ctx)
2302 {
2303 slong i, j;
2304 slong deg;
2305 const slong deg_limit = 9999;
2306 ulong varexp, thisexp;
2307 mp_limb_t t, v;
2308 ulong mask;
2309 slong * offsets, * shifts;
2310 slong N = mpoly_words_per_exp_sp(A->bits, ctx->minfo);
2311 ulong * Aexp = A->exps;
2312 fmpz * Acoeff = A->coeffs;
2313 TMP_INIT;
2314
2315 FLINT_ASSERT(A->bits <= FLINT_BITS);
2316
2317 TMP_START;
2318
2319 mask = (-UWORD(1)) >> (FLINT_BITS - A->bits);
2320 offsets = (slong *) TMP_ALLOC(ctx->minfo->nvars*sizeof(slong));
2321 shifts = (slong *) TMP_ALLOC(ctx->minfo->nvars*sizeof(slong));
2322 for (j = 0; j < ctx->minfo->nvars; j++)
2323 {
2324 mpoly_gen_offset_shift_sp(offsets + j, shifts + j, j, A->bits, ctx->minfo);
2325 }
2326
2327 nmod_poly_zero(out);
2328 deg = -WORD(1);
2329 for (i = 0; i < A->length; i++)
2330 {
2331 varexp = ((Aexp + N*i)[offsets[var]]>>shifts[var])&mask;
2332 deg = FLINT_MAX(deg, (slong)(varexp));
2333 v = fmpz_fdiv_ui(Acoeff + i, out->mod.n);
2334 for (j = 0; j < ctx->minfo->nvars; j++)
2335 {
2336 thisexp = ((Aexp + N*i)[offsets[j]]>>shifts[j])&mask;
2337 if (j != var)
2338 {
2339 v = nmod_mul(v, nmod_pow_ui(values[j], thisexp, out->mod), out->mod);
2340 }
2341 }
2342 t = nmod_poly_get_coeff_ui(out, varexp);
2343 if (deg <= deg_limit)
2344 {
2345 nmod_poly_set_coeff_ui(out, varexp, nmod_add(t, v, out->mod));
2346 }
2347 }
2348
2349 TMP_END;
2350
2351 *out_deg = deg;
2352 return deg <= deg_limit;
2353 }
2354
2355 /*
2356 A is in ZZ[X,Y][x_0,..., x_(n-1)]
2357 out is in Fp[x_var] (p is stored in out :( )
2358
2359 X = values[0]
2360 Y = values[1]
2361 x_0 = values[2]
2362 ...
2363 */
fmpz_mpolyuu_eval_all_but_one_nmod(slong * out_deg,nmod_poly_t out,const fmpz_mpolyu_t A,slong var,mp_limb_t * values,const fmpz_mpoly_ctx_t ctx)2364 int fmpz_mpolyuu_eval_all_but_one_nmod(
2365 slong * out_deg,
2366 nmod_poly_t out,
2367 const fmpz_mpolyu_t A,
2368 slong var,
2369 mp_limb_t * values,
2370 const fmpz_mpoly_ctx_t ctx)
2371 {
2372 slong i;
2373 slong deg, this_deg;
2374 ulong * Aexp = A->exps;
2375 const fmpz_mpoly_struct * Acoeff = A->coeffs;
2376 nmod_poly_t temp;
2377 mp_limb_t t, t1;
2378 int success, this_success;
2379
2380 FLINT_ASSERT(A->bits <= FLINT_BITS);
2381
2382 nmod_poly_zero(out);
2383 nmod_poly_init(temp, out->mod.n);
2384
2385 deg = -WORD(1);
2386 success = 1;
2387 for (i = 0; i < A->length; i++)
2388 {
2389 t = nmod_pow_ui(values[0], Aexp[i] >> (FLINT_BITS/2), out->mod);
2390 t1 = nmod_pow_ui(values[1], Aexp[i] & LOW_HALF_MASK, out->mod);
2391 t = nmod_mul(t, t1, out->mod);
2392 this_success = fmpz_mpoly_eval_all_but_one_nmod(&this_deg, temp, Acoeff + i, var, values + 2, ctx);
2393 deg = FLINT_MAX(deg, this_deg);
2394 success = success && this_success;
2395 if (success)
2396 {
2397 nmod_poly_scalar_mul_nmod(temp, temp, t);
2398 nmod_poly_add(out, out, temp);
2399 }
2400 }
2401
2402 nmod_poly_clear(temp);
2403
2404 *out_deg = deg;
2405
2406 return success;
2407 }
2408
2409 /*
2410 A, B are in ZZ[X,Y][x_0, ..., x_(n-1)]
2411
2412 Set Adeg (resp Bdeg) to the degree of A (resp B) wrt x_var.
2413 Return an upper bound on the degree of gcd(A,B) wrt x_var.
2414 */
fmpz_mpolyuu_gcd_degree_bound_minor(slong * Adeg,slong * Bdeg,const fmpz_mpolyu_t A,const fmpz_mpolyu_t B,slong var,const fmpz_mpoly_ctx_t ctx,flint_rand_t state)2415 slong fmpz_mpolyuu_gcd_degree_bound_minor(
2416 slong * Adeg,
2417 slong * Bdeg,
2418 const fmpz_mpolyu_t A,
2419 const fmpz_mpolyu_t B,
2420 slong var,
2421 const fmpz_mpoly_ctx_t ctx,
2422 flint_rand_t state)
2423 {
2424 slong i;
2425 int tries = 0;
2426 slong degA, degB, degRet;
2427 mp_limb_t p = UWORD(1) << (FLINT_BITS - 2);
2428 nmod_poly_t Geval, Aeval, Beval;
2429 mp_limb_t * values;
2430 int Asuccess, Bsuccess;
2431 TMP_INIT;
2432
2433 TMP_START;
2434
2435 values = (mp_limb_t *) TMP_ALLOC((ctx->minfo->nvars + 2)*sizeof(mp_limb_t));
2436
2437 p = n_nextprime(p, 1);
2438 nmod_poly_init(Geval, p);
2439 nmod_poly_init(Aeval, p);
2440 nmod_poly_init(Beval, p);
2441
2442 try_again:
2443
2444 for (i = 0; i < ctx->minfo->nvars + 2; i++)
2445 {
2446 values[i] = n_urandint(state, p);
2447 }
2448
2449 Asuccess = fmpz_mpolyuu_eval_all_but_one_nmod(°A, Aeval, A, var, values, ctx);
2450 Bsuccess = fmpz_mpolyuu_eval_all_but_one_nmod(°B, Beval, B, var, values, ctx);
2451 *Adeg = degA;
2452 *Bdeg = degB;
2453
2454 if (!Asuccess || !Bsuccess)
2455 {
2456 degRet = FLINT_MIN(degA, degB);
2457 goto cleanup;
2458 }
2459
2460 if (degA != nmod_poly_degree(Aeval) || degB != nmod_poly_degree(Beval))
2461 {
2462 if (++tries > 100)
2463 {
2464 degRet = FLINT_MIN(degA, degB);
2465 goto cleanup;
2466 }
2467 p = n_nextprime(p, 1);
2468 nmod_poly_clear(Geval);
2469 nmod_poly_clear(Aeval);
2470 nmod_poly_clear(Beval);
2471 nmod_poly_init(Geval, p);
2472 nmod_poly_init(Aeval, p);
2473 nmod_poly_init(Beval, p);
2474 goto try_again;
2475 }
2476
2477 nmod_poly_gcd(Geval, Aeval, Beval);
2478 degRet = nmod_poly_degree(Geval);
2479
2480 cleanup:
2481
2482 nmod_poly_clear(Geval);
2483 nmod_poly_clear(Aeval);
2484 nmod_poly_clear(Beval);
2485 TMP_END;
2486 return degRet;
2487 }
2488
2489 /*
2490 A is in ZZ[x_0, ..., x_(n-1)]
2491
2492 After the substitutions
2493 x_0 = x ^ (sub[1] * sub[2] * ... * sub[n-1])
2494
2495 x_(n-2) = x ^ (sub[n-1])
2496 x_(n-1) = x ^ (1)
2497 a univariate in ZZ[x] remains. Return the content of this poly.
2498 */
fmpz_mpoly_ksub_content(fmpz_t content,const fmpz_mpoly_t A,const ulong * subdegs,const fmpz_mpoly_ctx_t ctx)2499 void fmpz_mpoly_ksub_content(
2500 fmpz_t content,
2501 const fmpz_mpoly_t A,
2502 const ulong * subdegs,
2503 const fmpz_mpoly_ctx_t ctx)
2504 {
2505 slong i, j;
2506 slong N = mpoly_words_per_exp_sp(A->bits, ctx->minfo);
2507 fmpz_mpoly_t T;
2508 fmpz_mpoly_ctx_t Tctx;
2509 fmpz_t e;
2510 fmpz * Acoeff = A->coeffs;
2511 ulong * Aexp = A->exps;
2512 ulong mask;
2513 slong * offsets, * shifts;
2514 TMP_INIT;
2515
2516 TMP_START;
2517 fmpz_init(e);
2518
2519 fmpz_mpoly_ctx_init(Tctx, 1, ORD_LEX);
2520 fmpz_mpoly_init(T, Tctx);
2521
2522 mask = (-UWORD(1)) >> (FLINT_BITS - A->bits);
2523 offsets = (slong *) TMP_ALLOC(ctx->minfo->nvars*sizeof(slong));
2524 shifts = (slong *) TMP_ALLOC(ctx->minfo->nvars*sizeof(slong));
2525 for (j = 0; j < ctx->minfo->nvars; j++)
2526 {
2527 mpoly_gen_offset_shift_sp(offsets + j, shifts + j, j, A->bits, ctx->minfo);
2528 }
2529
2530 for (i = 0; i < A->length; i++)
2531 {
2532 fmpz_zero(e);
2533 for (j = 0; j < ctx->minfo->nvars; j++)
2534 {
2535 fmpz_mul_ui(e, e, subdegs[j]);
2536 fmpz_add_ui(e, e, ((Aexp + N*i)[offsets[j]]>>shifts[j])&mask);
2537 }
2538 _fmpz_mpoly_push_exp_ffmpz(T, e, Tctx);
2539 fmpz_set(T->coeffs + T->length - 1, Acoeff + i);
2540 }
2541 fmpz_mpoly_sort_terms(T, Tctx);
2542 fmpz_mpoly_combine_like_terms(T, Tctx);
2543 _fmpz_vec_content(content, T->coeffs, T->length);
2544 fmpz_mpoly_clear(T, Tctx);
2545 fmpz_mpoly_ctx_clear(Tctx);
2546
2547 fmpz_clear(e);
2548 TMP_END;
2549 }
2550
2551 /*
2552 Check if H evaluated a random point (eval(H)) matches gcd(eval(A), eval(B))
2553 with leading coefficient eval(Gamma). The evaluations are in x_i.
2554
2555 H, A, B are in Z[X,Y][x_0, ..., x_(n-1)]
2556 Gamma is in Z[x_0, ..., x_(n-1)]
2557 */
2558 typedef enum {
2559 random_check_good,
2560 random_check_point_not_found,
2561 random_check_image_degree_high,
2562 random_check_image_degree_low,
2563 random_check_image_no_match
2564 } random_check_ret_t;
2565
_random_check_sp(ulong * GevaldegXY,ulong GdegboundXY,nmod_mpolyn_t Aeval_sp,nmod_mpolyn_t Beval_sp,nmod_mpolyn_t Geval_sp,nmod_mpolyn_t Abareval_sp,nmod_mpolyn_t Bbareval_sp,mp_limb_t * checkalpha_sp,const fmpz_mpolyu_t H,const fmpz_mpolyu_t A,const fmpz_mpolyu_t B,const fmpz_mpoly_t Gamma,const fmpz_mpoly_ctx_t ctx,const nmod_mpoly_ctx_t ctx_sp,flint_rand_t randstate,nmod_poly_stack_t Sp_sp)2566 random_check_ret_t static _random_check_sp(
2567 ulong * GevaldegXY,
2568 ulong GdegboundXY,
2569 nmod_mpolyn_t Aeval_sp,
2570 nmod_mpolyn_t Beval_sp,
2571 nmod_mpolyn_t Geval_sp,
2572 nmod_mpolyn_t Abareval_sp,
2573 nmod_mpolyn_t Bbareval_sp,
2574 mp_limb_t * checkalpha_sp,
2575 const fmpz_mpolyu_t H,
2576 const fmpz_mpolyu_t A,
2577 const fmpz_mpolyu_t B,
2578 const fmpz_mpoly_t Gamma,
2579 const fmpz_mpoly_ctx_t ctx,
2580 const nmod_mpoly_ctx_t ctx_sp,
2581 flint_rand_t randstate,
2582 nmod_poly_stack_t Sp_sp)
2583 {
2584 mp_limb_t Gammaeval_sp;
2585 int success;
2586 int point_try_count;
2587 slong i;
2588
2589 /* try to test H at a random evaluation point */
2590 for (point_try_count = 0; point_try_count < 10; point_try_count++)
2591 {
2592 /* evaluate Gamma, A, and B at random point */
2593 for (i = 0; i < ctx->minfo->nvars; i++)
2594 {
2595 checkalpha_sp[i] = n_urandint(randstate, ctx_sp->ffinfo->mod.n);
2596 }
2597 fmpz_mpolyuu_eval_nmod(Aeval_sp, ctx_sp, A, checkalpha_sp, ctx);
2598 fmpz_mpolyuu_eval_nmod(Beval_sp, ctx_sp, B, checkalpha_sp, ctx);
2599
2600 /* make sure that evaluation did not kill either lc(A) or lc(B) */
2601 if ( Aeval_sp->length == 0 || Beval_sp->length == 0
2602 || nmod_mpolyn_bidegree(Aeval_sp) != A->exps[0]
2603 || nmod_mpolyn_bidegree(Beval_sp) != B->exps[0])
2604 {
2605 continue;
2606 }
2607
2608 /* Gamma is gcd(lc(A), lc(B)) so it evaluation should not be zero */
2609 Gammaeval_sp = fmpz_mpoly_eval_nmod(ctx_sp->ffinfo, Gamma,
2610 checkalpha_sp, ctx);
2611 FLINT_ASSERT(Gammaeval_sp != 0);
2612
2613 success = nmod_mpolyn_gcd_brown_smprime_bivar(Geval_sp,
2614 Abareval_sp, Bbareval_sp, Aeval_sp, Beval_sp, ctx_sp, Sp_sp);
2615 if (!success)
2616 {
2617 continue;
2618 }
2619 nmod_mpolyn_scalar_mul_nmod(Geval_sp, Gammaeval_sp, ctx_sp);
2620
2621 FLINT_ASSERT(Geval_sp->length > 0);
2622 *GevaldegXY = nmod_mpolyn_bidegree(Geval_sp);
2623
2624 if (GdegboundXY < *GevaldegXY)
2625 {
2626 return random_check_image_degree_high;
2627 }
2628 else if (GdegboundXY > *GevaldegXY)
2629 {
2630 return random_check_image_degree_low;
2631 }
2632
2633 /* reuse Bbareval for Heval */
2634 fmpz_mpolyuu_eval_nmod(Bbareval_sp, ctx_sp, H, checkalpha_sp, ctx);
2635
2636 if (!nmod_mpolyn_equal(Bbareval_sp, Geval_sp, ctx_sp))
2637 {
2638 return random_check_image_no_match;
2639 }
2640
2641 return random_check_good;
2642 }
2643
2644 return random_check_point_not_found;
2645 }
2646
_random_check(ulong * GevaldegXY,ulong GdegboundXY,fmpz_mod_mpolyn_t Aeval,fmpz_mod_mpolyn_t Beval,fmpz_mod_mpolyn_t Geval,fmpz_mod_mpolyn_t Abareval,fmpz_mod_mpolyn_t Bbareval,fmpz_t Gammaeval,fmpz * checkalpha,const fmpz_mpolyu_t H,const fmpz_mpolyu_t A,const fmpz_mpolyu_t B,const fmpz_mpoly_t Gamma,const fmpz_mpoly_ctx_t ctx,const fmpz_mod_mpoly_ctx_t ctx_mp,flint_rand_t randstate)2647 random_check_ret_t static _random_check(
2648 ulong * GevaldegXY,
2649 ulong GdegboundXY,
2650 fmpz_mod_mpolyn_t Aeval,
2651 fmpz_mod_mpolyn_t Beval,
2652 fmpz_mod_mpolyn_t Geval,
2653 fmpz_mod_mpolyn_t Abareval,
2654 fmpz_mod_mpolyn_t Bbareval,
2655 fmpz_t Gammaeval,
2656 fmpz * checkalpha,
2657 const fmpz_mpolyu_t H,
2658 const fmpz_mpolyu_t A,
2659 const fmpz_mpolyu_t B,
2660 const fmpz_mpoly_t Gamma,
2661 const fmpz_mpoly_ctx_t ctx,
2662 const fmpz_mod_mpoly_ctx_t ctx_mp,
2663 flint_rand_t randstate)
2664 {
2665 int success;
2666 int point_try_count;
2667 slong i;
2668
2669 /* try to test H at a random evaluation point */
2670 for (point_try_count = 0; point_try_count < 10; point_try_count++)
2671 {
2672 /* evaluate Gamma, A, and B at random point */
2673 for (i = 0; i < ctx->minfo->nvars; i++)
2674 {
2675 fmpz_randm(checkalpha + i, randstate, fmpz_mod_ctx_modulus(ctx_mp->ffinfo));
2676 }
2677 fmpz_mpolyuu_eval_fmpz_mod(Aeval, ctx_mp, A, checkalpha, ctx);
2678 fmpz_mpolyuu_eval_fmpz_mod(Beval, ctx_mp, B, checkalpha, ctx);
2679
2680 /* make sure that evaluation did not kill either lc(A) or lc(B) */
2681 if ( Aeval->length == 0 || Beval->length == 0
2682 || fmpz_mod_mpolyn_bidegree(Aeval) != A->exps[0]
2683 || fmpz_mod_mpolyn_bidegree(Beval) != B->exps[0])
2684 {
2685 continue;
2686 }
2687
2688 /* Gamma is gcd(lc(A), lc(B)) so it evaluation should not be zero */
2689 fmpz_mpoly_eval_fmpz_mod(Gammaeval, ctx_mp->ffinfo, Gamma, checkalpha, ctx);
2690 FLINT_ASSERT(!fmpz_is_zero(Gammaeval));
2691
2692 success = fmpz_mod_mpolyn_gcd_brown_bivar(Geval, Abareval, Bbareval,
2693 Aeval, Beval, ctx_mp);
2694 if (!success)
2695 {
2696 continue;
2697 }
2698 fmpz_mod_mpolyn_scalar_mul_fmpz_mod(Geval, Gammaeval, ctx_mp);
2699
2700 FLINT_ASSERT(Geval->length > 0);
2701 *GevaldegXY = fmpz_mod_mpolyn_bidegree(Geval);
2702
2703 if (GdegboundXY < *GevaldegXY)
2704 {
2705 return random_check_image_degree_high;
2706 }
2707 else if (GdegboundXY > *GevaldegXY)
2708 {
2709 return random_check_image_degree_low;
2710 }
2711
2712 /* reuse Bbareval for Heval */
2713 fmpz_mpolyuu_eval_fmpz_mod(Bbareval, ctx_mp, H, checkalpha, ctx);
2714 if (!fmpz_mod_mpolyn_equal(Bbareval, Geval, ctx_mp))
2715 {
2716 return random_check_image_no_match;
2717 }
2718
2719 return random_check_good;
2720 }
2721
2722 return random_check_point_not_found;
2723 }
2724
2725
fmpz_mpolyuu_gcd_berlekamp_massey(fmpz_mpolyu_t G,fmpz_mpolyu_t Abar,fmpz_mpolyu_t Bbar,fmpz_mpolyu_t A,fmpz_mpolyu_t B,const fmpz_mpoly_t Gamma,const fmpz_mpoly_ctx_t ctx)2726 int fmpz_mpolyuu_gcd_berlekamp_massey(
2727 fmpz_mpolyu_t G,
2728 fmpz_mpolyu_t Abar,
2729 fmpz_mpolyu_t Bbar,
2730 fmpz_mpolyu_t A,
2731 fmpz_mpolyu_t B,
2732 const fmpz_mpoly_t Gamma,
2733 const fmpz_mpoly_ctx_t ctx)
2734 {
2735 int changed, success, point_try_count;
2736 flint_bitcnt_t bits = A->bits, Hbits;
2737 mpoly_bma_interpolate_ctx_t Ictx;
2738 nmod_mpoly_ctx_t ctx_sp;
2739 fmpz_mod_mpoly_ctx_t ctx_mp;
2740 fmpz_mpolyu_t H;
2741 fmpz_mpoly_t Hcontent;
2742 /* multi precision workspace */
2743 fmpz_mod_bma_mpoly_t Lambda;
2744 fmpz_mod_mpolyn_t Aeval, Beval, Geval, Abareval, Bbareval;
2745 fmpz_mpolycu_t Ainc, Acur, Binc, Bcur, Ared, Bred;
2746 fmpz_mpolyc_t Gammainc, Gammacur, Gammared;
2747 fmpz_t p, pm1, sshift, last_unlucky_sshift_plus_1, image_count;
2748 fmpz * checkalpha;
2749 /* single precision workspace */
2750 nmod_poly_stack_t Sp_sp;
2751 nmod_bma_mpoly_t Lambda_sp;
2752 nmod_mpolyn_t Aeval_sp, Beval_sp, Geval_sp, Abareval_sp, Bbareval_sp;
2753 nmod_mpolycu_t Ainc_sp, Acur_sp, Binc_sp, Bcur_sp, Ared_sp, Bred_sp;
2754 nmod_mpolyc_t Gammainc_sp, Gammacur_sp, Gammared_sp;
2755 mp_limb_t p_sp, sshift_sp, last_unlucky_sshift_plus_1_sp, image_count_sp;
2756 fmpz_t Gammaeval;
2757 mp_limb_t Gammaeval_sp;
2758 mp_limb_t * checkalpha_sp;
2759 /* misc */
2760 slong i, j;
2761 ulong GdegboundXY, GevaldegXY;
2762 slong * Gdegbounds, * Adegs, * Bdegs, * Gammadegs;
2763 flint_rand_t randstate;
2764 fmpz_t subprod, cAksub, cBksub;
2765 int unlucky_count;
2766 fmpz_t Hmodulus;
2767 nmod_zip_mpolyu_t Z;
2768 slong zip_evals;
2769 ulong ABtotal_length;
2770
2771 FLINT_ASSERT(bits == A->bits);
2772 FLINT_ASSERT(bits == B->bits);
2773 FLINT_ASSERT(bits == G->bits);
2774 FLINT_ASSERT(bits == Abar->bits);
2775 FLINT_ASSERT(bits == Bbar->bits);
2776 FLINT_ASSERT(bits == Gamma->bits);
2777
2778 /* let's initialize everything at once to avoid complicated cleanup */
2779
2780 ABtotal_length = 0;
2781 for (i = 0; i < A->length; i++)
2782 ABtotal_length += (A->coeffs + i)->length;
2783 for (i = 0; i < B->length; i++)
2784 ABtotal_length += (B->coeffs + i)->length;
2785
2786 flint_randinit(randstate);
2787 fmpz_init(p);
2788 fmpz_init(pm1); /* p - 1 */
2789 fmpz_init(image_count);
2790 fmpz_init(subprod);
2791 fmpz_init(cAksub);
2792 fmpz_init(cBksub);
2793 fmpz_init(sshift);
2794 fmpz_init(last_unlucky_sshift_plus_1);
2795 fmpz_init(Gammaeval);
2796 fmpz_init(Hmodulus);
2797
2798 mpoly_bma_interpolate_ctx_init(Ictx, ctx->minfo->nvars);
2799
2800 /* multiprecision workspace */
2801 fmpz_set_ui(p, 2); /* modulus no care */
2802 fmpz_mod_mpoly_ctx_init(ctx_mp, 2, ORD_LEX, p); /* modulus no care */
2803 fmpz_mod_bma_mpoly_init(Lambda);
2804
2805 fmpz_mod_mpolyn_init(Aeval, FLINT_BITS/2, ctx_mp);
2806 fmpz_mod_mpolyn_init(Beval, FLINT_BITS/2, ctx_mp);
2807 fmpz_mod_mpolyn_init(Geval, FLINT_BITS/2, ctx_mp);
2808 fmpz_mod_mpolyn_init(Abareval, FLINT_BITS/2, ctx_mp);
2809 fmpz_mod_mpolyn_init(Bbareval, FLINT_BITS/2, ctx_mp);
2810 fmpz_mpolyc_init(Gammainc);
2811 fmpz_mpolyc_init(Gammacur);
2812 fmpz_mpolyc_init(Gammared);
2813 fmpz_mpolycu_init(Ainc);
2814 fmpz_mpolycu_init(Acur);
2815 fmpz_mpolycu_init(Ared);
2816 fmpz_mpolycu_init(Binc);
2817 fmpz_mpolycu_init(Bcur);
2818 fmpz_mpolycu_init(Bred);
2819 checkalpha = (fmpz *) flint_malloc(ctx->minfo->nvars*sizeof(fmpz));
2820 for (i = 0; i < ctx->minfo->nvars; i++)
2821 {
2822 fmpz_init(checkalpha + i);
2823 }
2824
2825 /* machine precision workspace "sp" */
2826 nmod_mpoly_ctx_init(ctx_sp, 2, ORD_LEX, 2); /* modulus no care */
2827 nmod_poly_stack_init(Sp_sp, FLINT_BITS/2, ctx_sp);
2828 nmod_bma_mpoly_init(Lambda_sp);
2829
2830 nmod_mpolyn_init(Aeval_sp, FLINT_BITS/2, ctx_sp);
2831 nmod_mpolyn_init(Beval_sp, FLINT_BITS/2, ctx_sp);
2832 nmod_mpolyn_init(Geval_sp, FLINT_BITS/2, ctx_sp);
2833 nmod_mpolyn_init(Abareval_sp, FLINT_BITS/2, ctx_sp);
2834 nmod_mpolyn_init(Bbareval_sp, FLINT_BITS/2, ctx_sp);
2835 nmod_mpolyc_init(Gammainc_sp);
2836 nmod_mpolyc_init(Gammacur_sp);
2837 nmod_mpolyc_init(Gammared_sp);
2838 nmod_mpolycu_init(Ainc_sp);
2839 nmod_mpolycu_init(Acur_sp);
2840 nmod_mpolycu_init(Ared_sp);
2841 nmod_mpolycu_init(Binc_sp);
2842 nmod_mpolycu_init(Bcur_sp);
2843 nmod_mpolycu_init(Bred_sp);
2844 checkalpha_sp = (mp_limb_t *) flint_malloc(ctx->minfo->nvars*sizeof(mp_limb_t));
2845
2846 /* the zippler */
2847 nmod_zip_mpolyu_init(Z);
2848
2849 Gdegbounds = (slong *) flint_malloc(ctx->minfo->nvars*sizeof(slong));
2850 Adegs = (slong *) flint_malloc(ctx->minfo->nvars*sizeof(slong));
2851 Bdegs = (slong *) flint_malloc(ctx->minfo->nvars*sizeof(slong));
2852 Gammadegs = (slong *) flint_malloc(ctx->minfo->nvars*sizeof(slong));
2853
2854 /* find a degree bound on G in the two main variables */
2855 GdegboundXY = FLINT_MIN(A->exps[0], B->exps[0]);
2856 p_sp = UWORD(1) << (FLINT_BITS - 2);
2857 for (point_try_count = 0; point_try_count < 10; point_try_count++)
2858 {
2859 p_sp = n_nextprime(p_sp, 1);
2860 nmod_mpoly_ctx_set_modulus(ctx_sp, p_sp);
2861 /* unfortunate nmod_poly's need mod set */
2862 nmod_poly_stack_set_ctx(Sp_sp, ctx_sp);
2863 nmod_mpolyn_set_mod(Aeval_sp, ctx_sp->ffinfo->mod);
2864 nmod_mpolyn_set_mod(Beval_sp, ctx_sp->ffinfo->mod);
2865 nmod_mpolyn_set_mod(Geval_sp, ctx_sp->ffinfo->mod);
2866 nmod_mpolyn_set_mod(Abareval_sp, ctx_sp->ffinfo->mod);
2867 nmod_mpolyn_set_mod(Bbareval_sp, ctx_sp->ffinfo->mod);
2868 for (i = 0; i < ctx->minfo->nvars; i++)
2869 {
2870 checkalpha_sp[i] = n_urandint(randstate, p_sp);
2871 }
2872 fmpz_mpolyuu_eval_nmod(Aeval_sp, ctx_sp, A, checkalpha_sp, ctx);
2873 fmpz_mpolyuu_eval_nmod(Beval_sp, ctx_sp, B, checkalpha_sp, ctx);
2874
2875 if (Aeval_sp->length == 0 || Beval_sp->length == 0
2876 || nmod_mpolyn_bidegree(Aeval_sp) != A->exps[0]
2877 || nmod_mpolyn_bidegree(Beval_sp) != B->exps[0])
2878 {
2879 /* evaluation killed at least one of lc(A) or lc(B) */
2880 continue;
2881 }
2882 success = nmod_mpolyn_gcd_brown_smprime_bivar(Geval_sp,
2883 Abareval_sp, Bbareval_sp, Aeval_sp, Beval_sp, ctx_sp, Sp_sp);
2884 if (success)
2885 {
2886 FLINT_ASSERT(Geval_sp->length > 0);
2887 GdegboundXY = nmod_mpolyn_bidegree(Geval_sp);
2888 break;
2889 }
2890 }
2891
2892 /*
2893 Find degree bounds on G wrt lesser variables so that
2894 Gdegbounds[i] >= deg_(x_i)(G)
2895 Also fills in
2896 Adegs[i] = deg_(x_i)(A)
2897 Bdegs[i] = deg_(x_i)(B)
2898 */
2899 mpoly_degrees_si(Gammadegs, Gamma->exps, Gamma->length, bits, ctx->minfo);
2900 for (i = 0; i < ctx->minfo->nvars; i++)
2901 {
2902 Gdegbounds[i] = fmpz_mpolyuu_gcd_degree_bound_minor(
2903 Adegs + i, Bdegs + i, A, B, i, ctx, randstate);
2904 }
2905
2906 /*
2907 Find bits into which H can be packed. The degrees satsify
2908 deg_(x_i)(H) <= deg_(x_i)(A)
2909 deg_(x_i)(H) <= deg_(x_i)(B)
2910 deg_(x_i)(H) <= deg_(x_i)(Gamma) + deg_(x_i)(G)
2911 */
2912 Hbits = bits;
2913 for (i = 0; i < ctx->minfo->nvars; i++)
2914 {
2915 flint_bitcnt_t Hibits;
2916 Ictx->degbounds[i] = FLINT_MIN(Adegs[i], Bdegs[i]);
2917 Ictx->degbounds[i] = FLINT_MIN(Ictx->degbounds[i], Gdegbounds[i] + Gammadegs[i]);
2918 Hibits = 1 + FLINT_BIT_COUNT(Ictx->degbounds[i]);
2919 Hbits = FLINT_MAX(Hbits, Hibits);
2920
2921 /* degbounds[i] will be a strict degree bound on deg_(x_i)(H) */
2922 Ictx->degbounds[i]++;
2923 }
2924
2925 fmpz_mpolyu_init(Abar, bits, ctx);
2926 fmpz_mpolyu_init(Bbar, bits, ctx);
2927 fmpz_mpolyu_init(H, Hbits, ctx);
2928 fmpz_mpoly_init3(Hcontent, 0, Hbits, ctx);
2929
2930 /* initialization done! */
2931
2932 if (GdegboundXY == 0)
2933 {
2934 fmpz_mpolyu_one(G, ctx);
2935 fmpz_mpolyu_swap(Abar, A, ctx);
2936 fmpz_mpolyu_swap(Bbar, B, ctx);
2937 success = 1;
2938 goto cleanup;
2939 }
2940
2941 if (Hbits > FLINT_BITS)
2942 {
2943 /* H cannot be guaranteed to be packed into FLINT_BITS - absolute falure */
2944 success = 0;
2945 goto cleanup;
2946 }
2947
2948 /* initial choices for the ksub degrees are the strict degree bounds on H */
2949 for (i = 0; i < ctx->minfo->nvars; i++)
2950 {
2951 Ictx->subdegs[i] = Ictx->degbounds[i];
2952 }
2953 goto got_ksub;
2954
2955 pick_ksub:
2956
2957 if (ctx->minfo->nvars > 1)
2958 {
2959 /* just increment the smallest subdegs[j] */
2960 j = 1;
2961 for (i = 2; i < ctx->minfo->nvars; i++)
2962 {
2963 if (Ictx->subdegs[i] < Ictx->subdegs[j])
2964 {
2965 j = i;
2966 }
2967 }
2968 Ictx->subdegs[j]++;
2969 }
2970
2971 got_ksub:
2972
2973 fmpz_one(subprod);
2974 for (i = 0; i < ctx->minfo->nvars; i++)
2975 {
2976 if ((slong)(Ictx->subdegs[i]) < 0)
2977 {
2978 /* ksub has overflown - absolute falure */
2979 success = 0;
2980 goto cleanup;
2981 }
2982 fmpz_mul_ui(subprod, subprod, Ictx->subdegs[i]);
2983 }
2984
2985 /* see if the ksub killed either lc(A) or lc(B) */
2986 fmpz_mpoly_ksub_content(cAksub, A->coeffs + 0, Ictx->subdegs, ctx);
2987 fmpz_mpoly_ksub_content(cBksub, B->coeffs + 0, Ictx->subdegs, ctx);
2988 if (fmpz_is_zero(cAksub) || fmpz_is_zero(cBksub))
2989 {
2990 /* try a new substitution if we killed either leading coefficient */
2991 goto pick_ksub;
2992 }
2993
2994 pick_bma_prime:
2995
2996 if (fmpz_cmp_ui(p, ABtotal_length) < 0)
2997 fmpz_set_ui(p, ABtotal_length);
2998
2999 if (fmpz_cmp(p, subprod) < 0)
3000 fmpz_set(p, subprod);
3001
3002 success = fmpz_next_smooth_prime(p, p);
3003 fmpz_sub_ui(pm1, p, 1);
3004 if (!success)
3005 {
3006 /* ran out of smooth primes - absolute falure */
3007 success = 0;
3008 goto cleanup;
3009 }
3010
3011 /* make sure reduction does not kill either leading coeff after ksub */
3012 if (fmpz_divisible(cAksub, p) || fmpz_divisible(cBksub, p))
3013 {
3014 goto pick_bma_prime;
3015 }
3016
3017 /* make sure p does not divide any coefficient of Gamma */
3018 for (i = 0; i < Gamma->length; i++)
3019 {
3020 if (fmpz_divisible(Gamma->coeffs + i, p))
3021 {
3022 goto pick_bma_prime;
3023 }
3024 }
3025
3026 if (fmpz_abs_fits_ui(p))
3027 {
3028 p_sp = fmpz_get_ui(p);
3029 sshift_sp = 1;
3030
3031 unlucky_count = 0;
3032 last_unlucky_sshift_plus_1_sp = 0;
3033
3034 nmod_mpoly_ctx_set_modulus(ctx_sp, p_sp);
3035 nmod_discrete_log_pohlig_hellman_precompute_prime(Ictx->dlogenv_sp, p_sp);
3036
3037 nmod_bma_mpoly_reset_prime(Lambda_sp, ctx_sp->ffinfo);
3038 nmod_bma_mpoly_zero(Lambda_sp);
3039
3040 /* unfortunate nmod_poly's store their own ctx :( */
3041 nmod_poly_stack_set_ctx(Sp_sp, ctx_sp);
3042 nmod_mpolyn_set_mod(Aeval_sp, ctx_sp->ffinfo->mod);
3043 nmod_mpolyn_set_mod(Beval_sp, ctx_sp->ffinfo->mod);
3044 nmod_mpolyn_set_mod(Geval_sp, ctx_sp->ffinfo->mod);
3045 nmod_mpolyn_set_mod(Abareval_sp, ctx_sp->ffinfo->mod);
3046 nmod_mpolyn_set_mod(Bbareval_sp, ctx_sp->ffinfo->mod);
3047
3048 FLINT_ASSERT(sshift_sp == 1);
3049 nmod_mpoly_bma_interpolate_alpha_powers(checkalpha_sp, sshift_sp,
3050 Ictx, ctx, ctx_sp->ffinfo);
3051
3052 /* set evaluation of monomials */
3053 nmod_mpoly_set_skel(Gammainc_sp, ctx_sp, Gamma, checkalpha_sp, ctx);
3054 nmod_mpolyu_set_skel(Ainc_sp, ctx_sp, A, checkalpha_sp, ctx);
3055 nmod_mpolyu_set_skel(Binc_sp, ctx_sp, B, checkalpha_sp, ctx);
3056
3057 /* set reduction of coeffs */
3058 nmod_mpoly_red_skel(Gammared_sp, Gamma, ctx_sp->ffinfo);
3059 nmod_mpolyu_red_skel(Ared_sp, A, ctx_sp->ffinfo);
3060 nmod_mpolyu_red_skel(Bred_sp, B, ctx_sp->ffinfo);
3061
3062 /* copy evaluation of monomials */
3063 nmod_mpoly_copy_skel(Gammacur_sp, Gammainc_sp);
3064 nmod_mpolyu_copy_skel(Acur_sp, Ainc_sp);
3065 nmod_mpolyu_copy_skel(Bcur_sp, Binc_sp);
3066
3067 image_count_sp = 0;
3068
3069 next_bma_image_sp:
3070
3071 /* image count is also the current power of alpha we are evaluating */
3072 image_count_sp++;
3073
3074 FLINT_ASSERT(sshift_sp + Lambda_sp->pointcount == image_count_sp);
3075
3076 if (image_count_sp >= p_sp - 1)
3077 {
3078 /* out of evaluation points alpha^image_count in Fp* */
3079 goto pick_bma_prime;
3080 }
3081
3082 Gammaeval_sp = nmod_mpoly_use_skel_mul(Gammared_sp, Gammacur_sp,
3083 Gammainc_sp, ctx_sp->ffinfo);
3084 nmod_mpolyuu_use_skel_mul(Aeval_sp, A, Ared_sp, Acur_sp, Ainc_sp, ctx_sp);
3085 nmod_mpolyuu_use_skel_mul(Beval_sp, B, Bred_sp, Bcur_sp, Binc_sp, ctx_sp);
3086
3087 if (Aeval_sp->length == 0 || Beval_sp->length == 0
3088 || nmod_mpolyn_bidegree(Aeval_sp) != A->exps[0]
3089 || nmod_mpolyn_bidegree(Beval_sp) != B->exps[0])
3090 {
3091 /* evaluation killed either lc(A) or lc(B) */
3092 sshift_sp += Lambda_sp->pointcount + 1;
3093 nmod_bma_mpoly_zero(Lambda_sp);
3094 goto next_bma_image_sp;
3095 }
3096
3097 /* the evaluation killed neither lc(A) nor lc(B) */
3098 FLINT_ASSERT(Gammaeval_sp != 0);
3099
3100 success = nmod_mpolyn_gcd_brown_smprime_bivar(Geval_sp,
3101 Abareval_sp, Bbareval_sp, Aeval_sp, Beval_sp, ctx_sp, Sp_sp);
3102 if (!success)
3103 {
3104 sshift_sp += Lambda->pointcount + 1;
3105 nmod_bma_mpoly_zero(Lambda_sp);
3106 goto next_bma_image_sp;
3107 }
3108
3109 FLINT_ASSERT(Geval_sp->length > 0);
3110 GevaldegXY = nmod_mpolyn_bidegree(Geval_sp);
3111 nmod_mpolyn_scalar_mul_nmod(Geval_sp, Gammaeval_sp, ctx_sp);
3112
3113 FLINT_ASSERT(Gammaeval_sp == nmod_mpolyn_leadcoeff(Geval_sp, ctx_sp));
3114
3115 if (GdegboundXY < GevaldegXY)
3116 {
3117 /* this image in Fp[X,Y] was unlucky */
3118 if (sshift_sp == last_unlucky_sshift_plus_1_sp)
3119 {
3120 /* this ksub is probably unlucky */
3121 goto pick_ksub;
3122 }
3123 if (++unlucky_count > 2)
3124 {
3125 goto pick_bma_prime;
3126 }
3127 last_unlucky_sshift_plus_1_sp = sshift_sp + 1;
3128 sshift_sp += Lambda_sp->pointcount + 1;
3129 nmod_bma_mpoly_zero(Lambda_sp);
3130 goto next_bma_image_sp;
3131 }
3132 else if (GdegboundXY > GevaldegXY)
3133 {
3134 /* new bound on deg_XY(G) */
3135 sshift_sp += Lambda_sp->pointcount;
3136 nmod_bma_mpoly_zero(Lambda_sp);
3137 nmod_bma_mpoly_add_point(Lambda_sp, Geval_sp, ctx_sp);
3138 GdegboundXY = GevaldegXY;
3139 if (GdegboundXY == 0)
3140 {
3141 fmpz_mpolyu_one(G, ctx);
3142 fmpz_mpolyu_swap(Abar, A, ctx);
3143 fmpz_mpolyu_swap(Bbar, B, ctx);
3144 success = 1;
3145 goto cleanup;
3146 }
3147 goto next_bma_image_sp;
3148 }
3149
3150 nmod_bma_mpoly_add_point(Lambda_sp, Geval_sp, ctx_sp);
3151 if ((Lambda_sp->pointcount & 1) != 0
3152 || Gamma->length > Lambda_sp->pointcount/2)
3153 {
3154 goto next_bma_image_sp;
3155 }
3156
3157 changed = nmod_bma_mpoly_reduce(Lambda_sp);
3158 if (changed)
3159 {
3160 goto next_bma_image_sp;
3161 }
3162
3163 success = nmod_bma_mpoly_get_fmpz_mpolyu(H, ctx, sshift_sp,
3164 Lambda_sp, Ictx, ctx_sp->ffinfo);
3165 if (!success
3166 || H->length == 0
3167 || (H->coeffs + 0)->length != Gamma->length)
3168 {
3169 goto next_bma_image_sp;
3170 }
3171
3172 /* GdegboundXY should be the bidegree of H */
3173 FLINT_ASSERT(GdegboundXY == H->exps[0]);
3174
3175 switch (_random_check_sp(&GevaldegXY, GdegboundXY,
3176 Aeval_sp, Beval_sp, Geval_sp, Abareval_sp, Bbareval_sp,
3177 checkalpha_sp, H, A, B, Gamma, ctx, ctx_sp, randstate, Sp_sp))
3178 {
3179 default:
3180 FLINT_ASSERT(0);
3181 case random_check_image_no_match:
3182 case random_check_image_degree_high:
3183 goto next_bma_image_sp;
3184 case random_check_image_degree_low:
3185 /* the random evaluation point gave us a better degree bound */
3186 sshift_sp += Lambda_sp->pointcount;
3187 nmod_bma_mpoly_zero(Lambda_sp);
3188 GdegboundXY = GevaldegXY;
3189 if (GdegboundXY == 0)
3190 {
3191 fmpz_mpolyu_one(G, ctx);
3192 fmpz_mpolyu_swap(Abar, A, ctx);
3193 fmpz_mpolyu_swap(Bbar, B, ctx);
3194 success = 1;
3195 goto cleanup;
3196 }
3197 goto next_bma_image_sp;
3198 case random_check_point_not_found:
3199 /* hmmm */
3200 case random_check_good:
3201 NULL;
3202 }
3203 }
3204 else
3205 {
3206 fmpz_one(sshift);
3207
3208 unlucky_count = 0;
3209 fmpz_zero(last_unlucky_sshift_plus_1);
3210
3211 fmpz_mod_ctx_set_modulus(ctx_mp->ffinfo, p);
3212 fmpz_mod_discrete_log_pohlig_hellman_precompute_prime(Ictx->dlogenv, p);
3213 fmpz_mod_bma_mpoly_reset_prime(Lambda, ctx_mp->ffinfo);
3214 fmpz_mod_bma_mpoly_zero(Lambda);
3215
3216 /* unfortunate fmpz_mod_poly's store their own ctx :( */
3217 fmpz_mod_mpolyn_set_modulus(Aeval, ctx_mp->ffinfo);
3218 fmpz_mod_mpolyn_set_modulus(Beval, ctx_mp->ffinfo);
3219 fmpz_mod_mpolyn_set_modulus(Geval, ctx_mp->ffinfo);
3220 fmpz_mod_mpolyn_set_modulus(Abareval, ctx_mp->ffinfo);
3221 fmpz_mod_mpolyn_set_modulus(Bbareval, ctx_mp->ffinfo);
3222
3223 FLINT_ASSERT(fmpz_is_one(sshift));
3224 fmpz_mod_mpoly_bma_interpolate_alpha_powers(checkalpha, sshift,
3225 Ictx, ctx, ctx_mp->ffinfo);
3226
3227 /* set evaluation of monomials */
3228 fmpz_mod_mpoly_set_skel(Gammainc, ctx_mp, Gamma, checkalpha, ctx);
3229 fmpz_mod_mpolyu_set_skel(Ainc, ctx_mp, A, checkalpha, ctx);
3230 fmpz_mod_mpolyu_set_skel(Binc, ctx_mp, B, checkalpha, ctx);
3231
3232 /* set reduction of coeffs */
3233 fmpz_mod_mpoly_red_skel(Gammared, Gamma, ctx_mp->ffinfo);
3234 fmpz_mod_mpolyu_red_skel(Ared, A, ctx_mp->ffinfo);
3235 fmpz_mod_mpolyu_red_skel(Bred, B, ctx_mp->ffinfo);
3236
3237 /* copy evaluation of monomials */
3238 fmpz_mod_mpoly_copy_skel(Gammacur, Gammainc);
3239 fmpz_mod_mpolyu_copy_skel(Acur, Ainc);
3240 fmpz_mod_mpolyu_copy_skel(Bcur, Binc);
3241
3242 fmpz_zero(image_count);
3243
3244 next_bma_image:
3245
3246 /* image count is also the current power of alpha we are evaluating */
3247 fmpz_add_ui(image_count, image_count, 1);
3248
3249 #if WANT_ASSERT
3250 /* image_count == sshift + Lambda->pointcount */
3251 {
3252 fmpz_t t;
3253 fmpz_init(t);
3254 fmpz_add_ui(t, sshift, Lambda->pointcount);
3255 FLINT_ASSERT(fmpz_equal(t, image_count));
3256 fmpz_clear(t);
3257 }
3258 #endif
3259
3260 if (fmpz_cmp(image_count, pm1) >= 0)
3261 {
3262 /* out of evaluation points alpha^image_count in Fp* */
3263 goto pick_bma_prime;
3264 }
3265
3266 fmpz_mod_mpoly_use_skel_mul(Gammaeval, Gammared, Gammacur, Gammainc,
3267 ctx_mp->ffinfo);
3268 fmpz_mod_mpolyuu_use_skel_mul(Aeval, A, Ared, Acur, Ainc, ctx_mp);
3269 fmpz_mod_mpolyuu_use_skel_mul(Beval, B, Bred, Bcur, Binc, ctx_mp);
3270 if (Aeval->length == 0 || Beval->length == 0
3271 || fmpz_mod_mpolyn_bidegree(Aeval) != A->exps[0]
3272 || fmpz_mod_mpolyn_bidegree(Beval) != B->exps[0])
3273 {
3274 /* evaluation killed either lc(A) or lc(B) */
3275 fmpz_add_ui(sshift, sshift, Lambda->pointcount + 1);
3276 fmpz_mod_bma_mpoly_zero(Lambda);
3277 goto next_bma_image;
3278 }
3279
3280 /* the evaluation killed neither lc(A) nor lc(B) */
3281 FLINT_ASSERT(!fmpz_is_zero(Gammaeval));
3282
3283 success = fmpz_mod_mpolyn_gcd_brown_bivar(Geval, Abareval, Bbareval,
3284 Aeval, Beval, ctx_mp);
3285 if (!success)
3286 {
3287 fmpz_add_ui(sshift, sshift, Lambda->pointcount + 1);
3288 fmpz_mod_bma_mpoly_zero(Lambda);
3289 goto next_bma_image;
3290 }
3291
3292 FLINT_ASSERT(Geval->length > 0);
3293 GevaldegXY = fmpz_mod_mpolyn_bidegree(Geval);
3294 fmpz_mod_mpolyn_scalar_mul_fmpz_mod(Geval, Gammaeval, ctx_mp);
3295
3296 FLINT_ASSERT(fmpz_equal(Gammaeval, fmpz_mod_mpolyn_leadcoeff_last_ref(Geval, ctx_mp)));
3297
3298 if (GdegboundXY < GevaldegXY)
3299 {
3300 /* this image in Fp[X,Y] was unlucky */
3301 if (fmpz_equal(sshift, last_unlucky_sshift_plus_1))
3302 {
3303 /* this ksub is probably unlucky */
3304 goto pick_ksub;
3305 }
3306 if (++unlucky_count > 2)
3307 {
3308 goto pick_bma_prime;
3309 }
3310 fmpz_add_ui(last_unlucky_sshift_plus_1, sshift, 1);
3311 fmpz_add_ui(sshift, sshift, Lambda->pointcount + 1);
3312 fmpz_mod_bma_mpoly_zero(Lambda);
3313 goto next_bma_image;
3314 }
3315 else if (GdegboundXY > GevaldegXY)
3316 {
3317 /* new bound on deg_XY(G) */
3318 fmpz_add_ui(sshift, sshift, Lambda->pointcount);
3319 fmpz_mod_bma_mpoly_zero(Lambda);
3320 fmpz_mod_bma_mpoly_add_point(Lambda, Geval, ctx_mp);
3321 GdegboundXY = GevaldegXY;
3322 if (GdegboundXY == 0)
3323 {
3324 fmpz_mpolyu_one(G, ctx);
3325 fmpz_mpolyu_swap(Abar, A, ctx);
3326 fmpz_mpolyu_swap(Bbar, B, ctx);
3327 success = 1;
3328 goto cleanup;
3329 }
3330 goto next_bma_image;
3331 }
3332
3333 fmpz_mod_bma_mpoly_add_point(Lambda, Geval, ctx_mp);
3334 if ( (Lambda->pointcount & 1) != 0
3335 || Gamma->length > Lambda->pointcount/2)
3336 {
3337 goto next_bma_image;
3338 }
3339
3340 changed = fmpz_mod_bma_mpoly_reduce(Lambda);
3341 if (changed)
3342 {
3343 goto next_bma_image;
3344 }
3345
3346 success = fmpz_mod_bma_mpoly_get_fmpz_mpolyu(H, ctx, sshift,
3347 Lambda, Ictx, ctx_mp->ffinfo);
3348 if (!success
3349 || H->length == 0
3350 || (H->coeffs + 0)->length != Gamma->length)
3351 {
3352 goto next_bma_image;
3353 }
3354
3355 /* GdegboundXY should be the bidegree of H */
3356 FLINT_ASSERT(GdegboundXY == H->exps[0]);
3357
3358 switch (_random_check(&GevaldegXY, GdegboundXY,
3359 Aeval, Beval, Geval, Abareval, Bbareval, Gammaeval,
3360 checkalpha, H, A, B, Gamma, ctx, ctx_mp, randstate))
3361 {
3362 default:
3363 FLINT_ASSERT(0);
3364 case random_check_image_no_match:
3365 case random_check_image_degree_high:
3366 goto next_bma_image;
3367 case random_check_image_degree_low:
3368 /* the random evaluation point gave us a better degree bound */
3369 fmpz_add_ui(sshift, sshift, Lambda->pointcount);
3370 fmpz_mod_bma_mpoly_zero(Lambda);
3371 GdegboundXY = GevaldegXY;
3372 if (GdegboundXY == 0)
3373 {
3374 fmpz_mpolyu_one(G, ctx);
3375 fmpz_mpolyu_swap(Abar, A, ctx);
3376 fmpz_mpolyu_swap(Bbar, B, ctx);
3377 success = 1;
3378 goto cleanup;
3379 }
3380 goto next_bma_image;
3381 case random_check_point_not_found:
3382 /* hmmm */
3383 case random_check_good:
3384 NULL;
3385 }
3386 }
3387
3388 /* assume that H is correct modulo Hmodulus = p */
3389 fmpz_set(Hmodulus, p);
3390
3391 /* find number of evals for zip interp */
3392 FLINT_ASSERT(H->length > 0);
3393 zip_evals = H->coeffs[0].length;
3394 for (i = 1; i < H->length; i++)
3395 {
3396 zip_evals = FLINT_MAX(zip_evals, H->coeffs[i].length);
3397 }
3398 zip_evals += 1; /* one extra check eval */
3399 nmod_zip_mpolyu_fit_poly(Z, H, zip_evals);
3400
3401 p_sp = UWORD(1) << (FLINT_BITS - 2);
3402
3403 pick_zip_prime:
3404 /*
3405 Get a new machine prime for zippel interpolation.
3406 H is currently interpolated modulo Hmodulus.
3407 */
3408 if (p_sp >= UWORD_MAX_PRIME)
3409 {
3410 /* ran out of machine primes - absolute failure */
3411 success = 0;
3412 goto cleanup;
3413 }
3414 p_sp = n_nextprime(p_sp, 1);
3415
3416 if (0 == fmpz_fdiv_ui(Hmodulus, p_sp))
3417 {
3418 goto pick_zip_prime;
3419 }
3420
3421 nmod_mpoly_ctx_set_modulus(ctx_sp, p_sp);
3422 /* unfortunate nmod_poly's need mod set */
3423 nmod_poly_stack_set_ctx(Sp_sp, ctx_sp);
3424 nmod_mpolyn_set_mod(Aeval_sp, ctx_sp->ffinfo->mod);
3425 nmod_mpolyn_set_mod(Beval_sp, ctx_sp->ffinfo->mod);
3426 nmod_mpolyn_set_mod(Geval_sp, ctx_sp->ffinfo->mod);
3427 nmod_mpolyn_set_mod(Abareval_sp, ctx_sp->ffinfo->mod);
3428 nmod_mpolyn_set_mod(Bbareval_sp, ctx_sp->ffinfo->mod);
3429
3430 FLINT_ASSERT(p_sp > 3);
3431 for (i = 0; i < ctx->minfo->nvars; i++)
3432 {
3433 checkalpha_sp[i] = n_urandint(randstate, p_sp - 3) + 2;
3434 }
3435
3436 /* set up the zippler */
3437 nmod_zip_mpolyu_set_skel(Z, ctx_sp, H, checkalpha_sp, ctx);
3438
3439 /* set evaluation of monomials */
3440 nmod_mpoly_set_skel(Gammainc_sp, ctx_sp, Gamma, checkalpha_sp, ctx);
3441 nmod_mpolyu_set_skel(Ainc_sp, ctx_sp, A, checkalpha_sp, ctx);
3442 nmod_mpolyu_set_skel(Binc_sp, ctx_sp, B, checkalpha_sp, ctx);
3443
3444 /* set reduction of coeffs */
3445 nmod_mpoly_red_skel(Gammared_sp, Gamma, ctx_sp->ffinfo);
3446 nmod_mpolyu_red_skel(Ared_sp, A, ctx_sp->ffinfo);
3447 nmod_mpolyu_red_skel(Bred_sp, B, ctx_sp->ffinfo);
3448
3449 /* copy evaluation of monomials */
3450 nmod_mpoly_copy_skel(Gammacur_sp, Gammainc_sp);
3451 nmod_mpolyu_copy_skel(Acur_sp, Ainc_sp);
3452 nmod_mpolyu_copy_skel(Bcur_sp, Binc_sp);
3453
3454 next_zip_image:
3455
3456 Gammaeval_sp = nmod_mpoly_use_skel_mul(Gammared_sp, Gammacur_sp,
3457 Gammainc_sp, ctx_sp->ffinfo);
3458 nmod_mpolyuu_use_skel_mul(Aeval_sp, A, Ared_sp, Acur_sp, Ainc_sp, ctx_sp);
3459 nmod_mpolyuu_use_skel_mul(Beval_sp, B, Bred_sp, Bcur_sp, Binc_sp, ctx_sp);
3460
3461 if (Aeval_sp->length == 0 || Beval_sp->length == 0
3462 || nmod_mpolyn_bidegree(Aeval_sp) != A->exps[0]
3463 || nmod_mpolyn_bidegree(Beval_sp) != B->exps[0])
3464 {
3465 /* evaluation point killed lc(A) or lc(B) */
3466 goto pick_zip_prime;
3467 }
3468
3469 /* the evaluation killed neither lc(A) nor lc(B) */
3470 FLINT_ASSERT(Gammaeval_sp != 0);
3471
3472 success = nmod_mpolyn_gcd_brown_smprime_bivar(Geval_sp,
3473 Abareval_sp, Bbareval_sp, Aeval_sp, Beval_sp, ctx_sp, Sp_sp);
3474 if (!success)
3475 {
3476 /* choose a bigger p if bivar gcd failed */
3477 goto pick_zip_prime;
3478 }
3479
3480 FLINT_ASSERT(Geval_sp->length > 0);
3481 GevaldegXY = nmod_mpolyn_bidegree(Geval_sp);
3482
3483 if (GevaldegXY > GdegboundXY)
3484 {
3485 /* this image in Fp'[X,Y] was unlucky */
3486 goto pick_zip_prime;
3487 }
3488 else if (GevaldegXY < GdegboundXY)
3489 {
3490 /* we have a new degree bound on deg_XY(G) */
3491 GdegboundXY = GevaldegXY;
3492 if (GdegboundXY == 0)
3493 {
3494 fmpz_mpolyu_one(G, ctx);
3495 fmpz_mpolyu_swap(Abar, A, ctx);
3496 fmpz_mpolyu_swap(Bbar, B, ctx);
3497 success = 1;
3498 goto cleanup;
3499 }
3500 goto pick_bma_prime;
3501 }
3502
3503 nmod_mpolyn_scalar_mul_nmod(Geval_sp, Gammaeval_sp, ctx_sp);
3504 FLINT_ASSERT(Gammaeval_sp == nmod_mpolyn_leadcoeff(Geval_sp, ctx_sp));
3505
3506 /* update the zippler */
3507 success = nmod_zip_mpolyuu_add_point(Z, Geval_sp);
3508 if (!success)
3509 {
3510 /*
3511 An image gcd in Fp'[X,Y] did not match the assumed formed in [X,Y].
3512 Start all over
3513 */
3514 goto pick_bma_prime;
3515 }
3516 if (Z->pointcount < zip_evals)
3517 {
3518 goto next_zip_image;
3519 }
3520
3521 switch (nmod_mpolyu_zip_find_coeffs(Z, ctx_sp))
3522 {
3523 default:
3524 FLINT_ASSERT(0);
3525 case nmod_zip_find_coeffs_no_match:
3526 /* The collection of image gcd's in Fp'[X,Y] could not be coerced
3527 into the assumed form in [X,Y][x_0, ..., x_(n-1)]. */
3528 goto pick_bma_prime;
3529 case nmod_zip_find_coeffs_non_invertible:
3530 /* The unlikely case where the evaluation points alpha produced
3531 a singular Vandermonde matrix. Assumed form is not nec wrong. */
3532 goto pick_zip_prime;
3533 case nmod_zip_find_coeffs_good:
3534 NULL;
3535 }
3536
3537 FLINT_ASSERT(Hbits == H->bits);
3538 changed = fmpz_mpolyu_addinterp_zip(H, Hmodulus, Z, ctx_sp->ffinfo);
3539 fmpz_mul_ui(Hmodulus, Hmodulus, ctx_sp->ffinfo->mod.n);
3540
3541 if (changed)
3542 {
3543 /* TODO if the coefficients of H are getting to large? */
3544 goto pick_zip_prime;
3545 }
3546
3547 success = fmpz_mpolyu_content_mpoly_threaded_pool(Hcontent, H, ctx, NULL, 0);
3548 FLINT_ASSERT(Hcontent->bits == Hbits);
3549 if (!success)
3550 {
3551 /* could not compute content - absolute failure */
3552 success = 0;
3553 goto cleanup;
3554 }
3555
3556 /* upgrade G to Hbits then try to pack down to bits */
3557 fmpz_mpolyu_set_bits(G, Hbits, ctx);
3558 fmpz_mpolyu_divexact_mpoly(G, H, 1, Hcontent, ctx);
3559 success = fmpz_mpolyu_repack_bits(G, bits, ctx);
3560 if (!success)
3561 {
3562 /* G cannot be the GCD if it cannot be packed into bits */
3563 goto pick_zip_prime;
3564 }
3565 if ( !fmpz_mpolyuu_divides(Abar, A, G, 2, ctx)
3566 || !fmpz_mpolyuu_divides(Bbar, B, G, 2, ctx))
3567 {
3568 goto pick_zip_prime;
3569 }
3570
3571 success = 1;
3572
3573 cleanup:
3574
3575 fmpz_mpoly_clear(Hcontent, ctx);
3576 fmpz_mpolyu_clear(H, ctx);
3577
3578 flint_free(Gdegbounds);
3579 flint_free(Adegs);
3580 flint_free(Bdegs);
3581 flint_free(Gammadegs);
3582
3583 /* the zippler */
3584 nmod_zip_mpolyu_clear(Z);
3585
3586 /* machine precision workspace */
3587 flint_free(checkalpha_sp);
3588 nmod_mpolyn_clear(Aeval_sp, ctx_sp);
3589 nmod_mpolyn_clear(Beval_sp, ctx_sp);
3590 nmod_mpolyn_clear(Geval_sp, ctx_sp);
3591 nmod_mpolyn_clear(Abareval_sp, ctx_sp);
3592 nmod_mpolyn_clear(Bbareval_sp, ctx_sp);
3593 nmod_mpolyc_clear(Gammainc_sp);
3594 nmod_mpolyc_clear(Gammacur_sp);
3595 nmod_mpolyc_clear(Gammared_sp);
3596 nmod_mpolycu_clear(Ainc_sp);
3597 nmod_mpolycu_clear(Acur_sp);
3598 nmod_mpolycu_clear(Ared_sp);
3599 nmod_mpolycu_clear(Binc_sp);
3600 nmod_mpolycu_clear(Bcur_sp);
3601 nmod_mpolycu_clear(Bred_sp);
3602 nmod_bma_mpoly_clear(Lambda_sp);
3603 nmod_poly_stack_clear(Sp_sp);
3604 nmod_mpoly_ctx_clear(ctx_sp);
3605
3606 /* multiprecision workspace */
3607 for (i = 0; i < ctx->minfo->nvars; i++)
3608 {
3609 fmpz_clear(checkalpha + i);
3610 }
3611 flint_free(checkalpha);
3612 fmpz_mod_mpolyn_clear(Aeval, ctx_mp);
3613 fmpz_mod_mpolyn_clear(Beval, ctx_mp);
3614 fmpz_mod_mpolyn_clear(Geval, ctx_mp);
3615 fmpz_mod_mpolyn_clear(Abareval, ctx_mp);
3616 fmpz_mod_mpolyn_clear(Bbareval, ctx_mp);
3617 fmpz_mpolyc_clear(Gammainc);
3618 fmpz_mpolyc_clear(Gammacur);
3619 fmpz_mpolyc_clear(Gammared);
3620 fmpz_mpolycu_clear(Ainc);
3621 fmpz_mpolycu_clear(Acur);
3622 fmpz_mpolycu_clear(Ared);
3623 fmpz_mpolycu_clear(Binc);
3624 fmpz_mpolycu_clear(Bcur);
3625 fmpz_mpolycu_clear(Bred);
3626 fmpz_mod_bma_mpoly_clear(Lambda);
3627 fmpz_mod_mpoly_ctx_clear(ctx_mp);
3628
3629 mpoly_bma_interpolate_ctx_clear(Ictx);
3630
3631 fmpz_clear(Hmodulus);
3632 fmpz_clear(Gammaeval);
3633 fmpz_clear(last_unlucky_sshift_plus_1);
3634 fmpz_clear(sshift);
3635 fmpz_clear(cBksub);
3636 fmpz_clear(cAksub);
3637 fmpz_clear(subprod);
3638 fmpz_clear(image_count);
3639 fmpz_clear(pm1);
3640 fmpz_clear(p);
3641 flint_randclear(randstate);
3642
3643 if (success)
3644 {
3645 FLINT_ASSERT(G->bits == bits);
3646 FLINT_ASSERT(Abar->bits == bits);
3647 FLINT_ASSERT(Bbar->bits == bits);
3648 }
3649 else
3650 {
3651 fmpz_mpolyu_set_bits(G, bits, ctx);
3652 fmpz_mpolyu_set_bits(Abar, bits, ctx);
3653 fmpz_mpolyu_set_bits(Bbar, bits, ctx);
3654 }
3655
3656 return success;
3657 }
3658
3659
fmpz_mpoly_gcd_berlekamp_massey(fmpz_mpoly_t G,const fmpz_mpoly_t A,const fmpz_mpoly_t B,const fmpz_mpoly_ctx_t ctx)3660 int fmpz_mpoly_gcd_berlekamp_massey(
3661 fmpz_mpoly_t G,
3662 const fmpz_mpoly_t A,
3663 const fmpz_mpoly_t B,
3664 const fmpz_mpoly_ctx_t ctx)
3665 {
3666 slong i;
3667 flint_bitcnt_t wbits;
3668 int success = 0;
3669 fmpz_mpoly_ctx_t uctx;
3670 fmpz_mpolyu_t Auu, Buu, Guu, Abaruu, Bbaruu;
3671 fmpz_mpoly_t Ac, Bc, Gc, Gamma;
3672 slong * Adegs, * Bdegs, * perm;
3673 ulong * shift, * stride;
3674 ulong max_main_degree, max_minor_degree;
3675
3676 if (fmpz_mpoly_is_zero(A, ctx))
3677 {
3678 if (fmpz_mpoly_is_zero(B, ctx))
3679 {
3680 fmpz_mpoly_zero(G, ctx);
3681 return 1;
3682 }
3683 if (fmpz_sgn(B->coeffs + 0) < 0)
3684 {
3685 fmpz_mpoly_neg(G, B, ctx);
3686 return 1;
3687 }
3688 else
3689 {
3690 fmpz_mpoly_set(G, B, ctx);
3691 return 1;
3692 }
3693 }
3694
3695 if (fmpz_mpoly_is_zero(B, ctx))
3696 {
3697 if (fmpz_sgn(A->coeffs + 0) < 0)
3698 {
3699 fmpz_mpoly_neg(G, A, ctx);
3700 return 1;
3701 }
3702 else
3703 {
3704 fmpz_mpoly_set(G, A, ctx);
3705 return 1;
3706 }
3707 }
3708
3709 if (A->bits > FLINT_BITS || B->bits > FLINT_BITS)
3710 {
3711 return 0;
3712 }
3713
3714 if (ctx->minfo->nvars < 3)
3715 {
3716 return fmpz_mpoly_gcd_zippel(G, A, B, ctx);
3717 }
3718
3719 FLINT_ASSERT(A->bits <= FLINT_BITS);
3720 FLINT_ASSERT(B->bits <= FLINT_BITS);
3721 FLINT_ASSERT(ctx->minfo->nvars >= 3);
3722 FLINT_ASSERT(!fmpz_mpoly_is_zero(A, ctx));
3723 FLINT_ASSERT(!fmpz_mpoly_is_zero(B, ctx));
3724
3725 Adegs = (slong *) flint_malloc(ctx->minfo->nvars*sizeof(slong));
3726 Bdegs = (slong *) flint_malloc(ctx->minfo->nvars*sizeof(slong));
3727 perm = (slong *) flint_malloc((ctx->minfo->nvars)*sizeof(slong));
3728 shift = (ulong *) flint_malloc((ctx->minfo->nvars)*sizeof(ulong));
3729 stride = (ulong *) flint_malloc((ctx->minfo->nvars)*sizeof(ulong));
3730
3731 mpoly_degrees_si(Adegs, A->exps, A->length, A->bits, ctx->minfo);
3732 mpoly_degrees_si(Bdegs, B->exps, B->length, B->bits, ctx->minfo);
3733
3734 max_main_degree = 0;
3735 max_minor_degree = 0;
3736 for (i = 0; i < ctx->minfo->nvars; i++)
3737 {
3738 perm[i] = i;
3739 shift[i] = 0;
3740 stride[i] = 1;
3741 FLINT_ASSERT(Adegs[i] >= 0);
3742 FLINT_ASSERT(Bdegs[i] >= 0);
3743 if (i < 2)
3744 {
3745 max_main_degree = FLINT_MAX(max_main_degree, Adegs[i]);
3746 max_main_degree = FLINT_MAX(max_main_degree, Bdegs[i]);
3747 }
3748 else
3749 {
3750 max_minor_degree = FLINT_MAX(max_minor_degree, Adegs[i]);
3751 max_minor_degree = FLINT_MAX(max_minor_degree, Bdegs[i]);
3752 }
3753 }
3754
3755 fmpz_mpoly_ctx_init(uctx, ctx->minfo->nvars - 2, ORD_LEX);
3756
3757 /* wbits is bits for intermediates in ZZ[x_0,x_1][x_2,...,x_(n-1)] */
3758 wbits = 1 + FLINT_BIT_COUNT(max_minor_degree);
3759 wbits = FLINT_MAX(MPOLY_MIN_BITS, wbits);
3760 wbits = mpoly_fix_bits(wbits, uctx->minfo);
3761 FLINT_ASSERT(wbits <= FLINT_BITS);
3762
3763 fmpz_mpolyu_init(Auu, wbits, uctx);
3764 fmpz_mpolyu_init(Buu, wbits, uctx);
3765 fmpz_mpolyu_init(Guu, wbits, uctx);
3766 fmpz_mpolyu_init(Abaruu, wbits, uctx);
3767 fmpz_mpolyu_init(Bbaruu, wbits, uctx);
3768 fmpz_mpoly_init3(Ac, 0, wbits, uctx);
3769 fmpz_mpoly_init3(Bc, 0, wbits, uctx);
3770 fmpz_mpoly_init3(Gc, 0, wbits, uctx);
3771 fmpz_mpoly_init3(Gamma, 0, wbits, uctx);
3772
3773 /* two main variables must be packed into bits = FLINT_BITS/2 */
3774 if (FLINT_BIT_COUNT(max_main_degree) >= FLINT_BITS/2)
3775 {
3776 success = 0;
3777 goto cleanup;
3778 }
3779
3780 fmpz_mpoly_to_mpolyuu_perm_deflate_threaded_pool(Auu, uctx, A, ctx,
3781 perm, shift, stride, NULL, NULL, 0);
3782 fmpz_mpoly_to_mpolyuu_perm_deflate_threaded_pool(Buu, uctx, B, ctx,
3783 perm, shift, stride, NULL, NULL, 0);
3784
3785 success = fmpz_mpolyu_content_mpoly_threaded_pool(Ac, Auu, uctx, NULL, 0);
3786 success = success &&
3787 fmpz_mpolyu_content_mpoly_threaded_pool(Bc, Buu, uctx, NULL, 0);
3788 if (!success)
3789 goto cleanup;
3790
3791 fmpz_mpolyu_divexact_mpoly_inplace(Auu, Ac, uctx);
3792 fmpz_mpolyu_divexact_mpoly_inplace(Buu, Bc, uctx);
3793
3794 success = _fmpz_mpoly_gcd_threaded_pool(Gamma, wbits, Auu->coeffs + 0,
3795 Buu->coeffs + 0, uctx, NULL, 0);
3796 if (!success)
3797 goto cleanup;
3798
3799 success = fmpz_mpolyuu_gcd_berlekamp_massey(Guu, Abaruu, Bbaruu,
3800 Auu, Buu, Gamma, uctx);
3801 if (!success)
3802 goto cleanup;
3803
3804 success = _fmpz_mpoly_gcd_threaded_pool(Gc, wbits, Ac, Bc, uctx, NULL, 0);
3805 if (!success)
3806 goto cleanup;
3807
3808 fmpz_mpolyu_mul_mpoly_inplace(Guu, Gc, uctx);
3809
3810 fmpz_mpoly_from_mpolyuu_perm_inflate(G, FLINT_MIN(A->bits, B->bits), ctx,
3811 Guu, uctx, perm, shift, stride);
3812 if (fmpz_sgn(G->coeffs + 0) < 0)
3813 fmpz_mpoly_neg(G, G, ctx);
3814
3815 success = 1;
3816
3817 cleanup:
3818
3819 flint_free(Adegs);
3820 flint_free(Bdegs);
3821 flint_free(perm);
3822 flint_free(shift);
3823 flint_free(stride);
3824
3825 fmpz_mpolyu_clear(Auu, uctx);
3826 fmpz_mpolyu_clear(Buu, uctx);
3827 fmpz_mpolyu_clear(Guu, uctx);
3828 fmpz_mpolyu_clear(Abaruu, uctx);
3829 fmpz_mpolyu_clear(Bbaruu, uctx);
3830 fmpz_mpoly_clear(Ac, uctx);
3831 fmpz_mpoly_clear(Bc, uctx);
3832 fmpz_mpoly_clear(Gc, uctx);
3833 fmpz_mpoly_clear(Gamma, uctx);
3834
3835 fmpz_mpoly_ctx_clear(uctx);
3836
3837 return success;
3838 }
3839
3840