1 /*
2 Copyright (C) 2020 Daniel Schultz
3
4 This file is part of FLINT.
5
6 FLINT is free software: you can redistribute it and/or modify it under
7 the terms of the GNU Lesser General Public License (LGPL) as published
8 by the Free Software Foundation; either version 2.1 of the License, or
9 (at your option) any later version. See <https://www.gnu.org/licenses/>.
10 */
11
12 #include "n_poly.h"
13 #include "nmod_vec.h"
14
15 #define MAC(h, m, l, a, b) \
16 { \
17 mp_limb_t p1, p0; \
18 umul_ppmm(p1, p0, a, b); \
19 add_sssaaaaaa(h, m, l, h, m, l, 0, p1, p0); \
20 }
21
22 #define MAC3(h, m, l, a, b) \
23 { \
24 mp_limb_t p1, p0; \
25 umul_ppmm(p1, p0, a, b); \
26 add_sssaaaaaa(h, m, l, h, m, l, 0, p1, p0); \
27 }
28
29
30 #define MAC2(h, l, a, b) \
31 { \
32 mp_limb_t p1, p0; \
33 umul_ppmm(p1, p0, a, b); \
34 add_ssaaaa(h, l, h, l, p1, p0); \
35 }
36
n_fq_get_str_pretty(const mp_limb_t * a,const fq_nmod_ctx_t ctx)37 char * n_fq_get_str_pretty(
38 const mp_limb_t * a,
39 const fq_nmod_ctx_t ctx)
40 {
41 char * s;
42 fq_nmod_t A;
43 fq_nmod_init(A, ctx);
44 n_fq_get_fq_nmod(A, a, ctx);
45 s = fq_nmod_get_str_pretty(A, ctx);
46 fq_nmod_clear(A, ctx);
47 return s;
48 }
49
n_fq_fprint_pretty(FILE * file,const mp_limb_t * a,const fq_nmod_ctx_t ctx)50 int n_fq_fprint_pretty(
51 FILE * file,
52 const mp_limb_t * a,
53 const fq_nmod_ctx_t ctx)
54 {
55 slong d = fq_nmod_ctx_degree(ctx);
56 slong i;
57 int first;
58
59 first = 1;
60 for (i = d - 1; i >= 0; i--)
61 {
62 if (a[i] == 0)
63 continue;
64
65 if (!first)
66 flint_fprintf(file, "+");
67
68 first = 0;
69 flint_fprintf(file, "%wu", a[i]);
70
71 if (i > 0)
72 {
73 flint_fprintf(file, "*%s", ctx->var);
74 if (i > 1)
75 flint_fprintf(file, "^%wd", i);
76 }
77 }
78
79 if (first)
80 flint_fprintf(file, "0");
81
82 return 1;
83 }
84
n_fq_print_pretty(const mp_limb_t * a,const fq_nmod_ctx_t ctx)85 void n_fq_print_pretty(
86 const mp_limb_t * a,
87 const fq_nmod_ctx_t ctx)
88 {
89 n_fq_fprint_pretty(stdout, a, ctx);
90 }
91
n_fq_randtest_not_zero(mp_limb_t * a,flint_rand_t state,const fq_nmod_ctx_t ctx)92 void n_fq_randtest_not_zero(
93 mp_limb_t * a,
94 flint_rand_t state,
95 const fq_nmod_ctx_t ctx)
96 {
97 slong d = fq_nmod_ctx_degree(ctx);
98 slong i;
99 for (i = 0; i < d; i++)
100 a[i] = n_randint(state, fq_nmod_ctx_mod(ctx).n);
101 if (_n_fq_is_zero(a, d))
102 _n_fq_one(a, d);
103 }
104
n_fq_get_fq_nmod(fq_nmod_t a,const mp_limb_t * b,const fq_nmod_ctx_t ctx)105 void n_fq_get_fq_nmod(
106 fq_nmod_t a,
107 const mp_limb_t * b,
108 const fq_nmod_ctx_t ctx)
109 {
110 slong i;
111 slong d = fq_nmod_ctx_degree(ctx);
112
113 nmod_poly_fit_length(a, d);
114
115 for (i = 0; i < d; i++)
116 a->coeffs[i] = b[i];
117
118 a->length = d;
119 _nmod_poly_normalise(a);
120 }
121
n_fq_set_fq_nmod(mp_limb_t * a,const fq_nmod_t b,const fq_nmod_ctx_t ctx)122 void n_fq_set_fq_nmod(
123 mp_limb_t * a,
124 const fq_nmod_t b,
125 const fq_nmod_ctx_t ctx)
126 {
127 slong i, d = fq_nmod_ctx_degree(ctx);
128
129 FLINT_ASSERT(b->length <= d);
130
131 for (i = 0; i < d; i++)
132 a[i] = i < b->length ? b->coeffs[i] : 0;
133 }
134
n_fq_get_n_poly(n_poly_t a,const mp_limb_t * b,const fq_nmod_ctx_t ctx)135 void n_fq_get_n_poly(
136 n_poly_t a,
137 const mp_limb_t * b,
138 const fq_nmod_ctx_t ctx)
139 {
140 slong i;
141 slong d = fq_nmod_ctx_degree(ctx);
142
143 n_poly_fit_length(a, d);
144
145 for (i = 0; i < d; i++)
146 a->coeffs[i] = b[i];
147
148 a->length = d;
149 _n_poly_normalise(a);
150 }
151
_n_fq_set_n_poly(mp_limb_t * a,const mp_limb_t * bcoeffs,slong blen,const fq_nmod_ctx_t ctx)152 void _n_fq_set_n_poly(
153 mp_limb_t * a,
154 const mp_limb_t * bcoeffs, slong blen,
155 const fq_nmod_ctx_t ctx)
156 {
157 slong d = fq_nmod_ctx_degree(ctx);
158
159 if (blen > d)
160 {
161 _nmod_poly_rem(a, bcoeffs, blen, ctx->modulus->coeffs, d + 1, ctx->mod);
162 }
163 else
164 {
165 slong i;
166 for (i = 0; i < blen; i++)
167 a[i] = bcoeffs[i];
168 for (; i < d; i++)
169 a[i] = 0;
170 }
171 }
172
173
n_fq_gen(mp_limb_t * a,const fq_nmod_ctx_t ctx)174 void n_fq_gen(
175 mp_limb_t * a,
176 const fq_nmod_ctx_t ctx)
177 {
178 slong i, d = fq_nmod_ctx_degree(ctx);
179 if (d == 1)
180 {
181 a[0] = nmod_neg(nmod_div(ctx->modulus->coeffs[0],
182 ctx->modulus->coeffs[1], ctx->mod), ctx->mod);
183 }
184 else
185 {
186 a[0] = 0;
187 a[1] = 1;
188 for (i = 2; i < d; i++)
189 a[i] = 0;
190 }
191 }
192
n_fq_add_si(mp_limb_t * a,const mp_limb_t * b,slong c,const fq_nmod_ctx_t ctx)193 void n_fq_add_si(
194 mp_limb_t * a,
195 const mp_limb_t * b,
196 slong c,
197 const fq_nmod_ctx_t ctx)
198 {
199 slong d = fq_nmod_ctx_degree(ctx);
200
201 if (a != b)
202 _nmod_vec_set(a, b, d);
203
204 if (c < 0)
205 {
206 ulong cc = -c;
207 if (cc >= ctx->mod.n)
208 NMOD_RED(cc, cc, ctx->mod);
209 a[0] = nmod_sub(a[0], cc, ctx->mod);
210 }
211 else
212 {
213 ulong cc = c;
214 if (cc >= ctx->mod.n)
215 NMOD_RED(cc, cc, ctx->mod);
216 a[0] = nmod_add(a[0], cc, ctx->mod);
217 }
218 }
219
n_fq_equal_fq_nmod(const mp_limb_t * a,const fq_nmod_t b,const fq_nmod_ctx_t ctx)220 int n_fq_equal_fq_nmod(
221 const mp_limb_t * a,
222 const fq_nmod_t b,
223 const fq_nmod_ctx_t ctx)
224 {
225 slong i, d = fq_nmod_ctx_degree(ctx);
226 FLINT_ASSERT(b->length <= d);
227 for (i = 0; i < d; i++)
228 {
229 mp_limb_t c = (i >= b->length) ? 0 : b->coeffs[i];
230 if (a[i] != c)
231 return 0;
232 }
233 return 1;
234 }
235
n_fq_add_fq_nmod(mp_limb_t * a,const mp_limb_t * b,const fq_nmod_t c,const fq_nmod_ctx_t ctx)236 void n_fq_add_fq_nmod(
237 mp_limb_t * a,
238 const mp_limb_t * b,
239 const fq_nmod_t c,
240 const fq_nmod_ctx_t ctx)
241 {
242 slong d = fq_nmod_ctx_degree(ctx);
243 slong i;
244
245 FLINT_ASSERT(c->length <= d);
246
247 for (i = 0; i < d; i++)
248 {
249 if (i < c->length)
250 a[i] = nmod_add(b[i], c->coeffs[i], ctx->mod);
251 else
252 a[i] = b[i];
253 }
254 }
255
256
n_fq_sub_fq_nmod(mp_limb_t * a,const mp_limb_t * b,const fq_nmod_t c,const fq_nmod_ctx_t ctx)257 void n_fq_sub_fq_nmod(
258 mp_limb_t * a,
259 const mp_limb_t * b,
260 const fq_nmod_t c,
261 const fq_nmod_ctx_t ctx)
262 {
263 slong d = fq_nmod_ctx_degree(ctx);
264 slong i;
265
266 FLINT_ASSERT(c->length <= d);
267
268 for (i = 0; i < d; i++)
269 {
270 if (i < c->length)
271 a[i] = nmod_sub(b[i], c->coeffs[i], ctx->mod);
272 else
273 a[i] = b[i];
274 }
275 }
276
277
_n_fq_reduce(mp_limb_t * a,mp_limb_t * b,slong blen,const fq_nmod_ctx_t ctx,mp_limb_t * t)278 void _n_fq_reduce(
279 mp_limb_t * a,
280 mp_limb_t * b,
281 slong blen,
282 const fq_nmod_ctx_t ctx,
283 mp_limb_t * t) /* length 2d */
284 {
285 slong i, j, k, deg = ctx->modulus->length - 1;
286 slong d = ctx->j[ctx->len - 1];
287
288 FLINT_ASSERT(a != b);
289
290 FLINT_ASSERT(0 <= blen && blen <= 2*d - 1);
291 FLINT_ASSERT(blen == 0 || b[blen - 1] != 0);
292
293 if (blen <= deg)
294 {
295 for (i = 0; i < blen; i++)
296 a[i] = b[i];
297 for (i = blen; i < deg; i++)
298 a[i] = 0;
299 }
300 else if (ctx->sparse_modulus)
301 {
302 nmod_t mod = ctx->mod;
303
304 for (k = ctx->len - 2; k >= 0; k--)
305 t[k] = mod.n - ctx->a[k];
306
307 for (i = blen - 1; i >= d; i--)
308 {
309 for (k = ctx->len - 2; k >= 0; k--)
310 NMOD_ADDMUL(b[ctx->j[k] + i - d], b[i], t[k], mod);
311
312 b[i] = 0;
313 }
314
315 for (i = 0; i < deg; i++)
316 a[i] = b[i];
317 }
318 else
319 {
320 /*
321 _nmod_poly_divrem_newton_n_preinv(t, a, b, blen,
322 ctx->modulus->coeffs, ctx->modulus->length,
323 ctx->inv->coeffs, ctx->inv->length,
324 ctx->mod);
325 */
326 mp_limb_t * Q = t;
327 mp_limb_t * R = a;
328 const mp_limb_t * A = b;
329 slong lenA = blen;
330 const mp_limb_t * B = ctx->modulus->coeffs;
331 slong lenB = deg + 1;
332 const mp_limb_t * Binv = ctx->inv->coeffs;
333 slong lenBinv = ctx->inv->length;
334
335 const slong lenQ = lenA - lenB + 1;
336
337 FLINT_ASSERT(lenBinv > 0);
338 FLINT_ASSERT(lenQ > 0);
339
340 if (lenQ <= 2)
341 {
342 if (lenQ == 2)
343 _nmod_poly_divrem_q1(Q, R, A, lenA, B, lenB, ctx->mod);
344 else
345 _nmod_poly_divrem_q0(Q, R, A, B, lenB, ctx->mod);
346 return;
347 }
348
349 if (deg < 20)
350 {
351 for (i = 0; i < lenQ; i++)
352 {
353 mp_limb_t t2 = 0, t1 = 0, t0 = 0;
354 j = FLINT_MAX(0, i - lenBinv + 1);
355 umul_ppmm(t1, t0, A[lenA - 1 - j], Binv[i - j]);
356 for (j++; j <= i; j++)
357 MAC(t2, t1, t0, A[lenA - 1 - j], Binv[i - j]);
358 NMOD_RED3(Q[lenQ - 1 - i], t2, t1, t0, ctx->mod);
359 }
360
361 for (i = 0; i < deg; i++)
362 {
363 mp_limb_t t2 = 0, t1 = 0, t0 = 0;
364 for (j = FLINT_MAX(0, i - lenQ + 1); j <= i; j++)
365 MAC(t2, t1, t0, B[j], Q[i - j]);
366 NMOD_RED3(t0, t2, t1, t0, ctx->mod);
367 R[i] = nmod_sub(A[i], t0, ctx->mod);
368 }
369 }
370 else
371 {
372 mp_ptr Arev = t + d;
373 _nmod_poly_reverse(Arev, A + (lenA - lenQ), lenQ, lenQ);
374 _nmod_poly_mullow(Q, Arev, lenQ, Binv, FLINT_MIN(lenQ, lenBinv), lenQ, ctx->mod);
375 _nmod_poly_reverse(Q, Q, lenQ, lenQ);
376 FLINT_ASSERT(lenB > 1);
377 FLINT_ASSERT(lenQ < lenB - 1);
378 _nmod_poly_mullow(R, B, lenB - 1, Q, lenQ, lenB - 1, ctx->mod);
379 _nmod_vec_sub(R, A, R, lenB - 1, ctx->mod);
380 }
381 }
382 }
383
_n_fq_madd2(mp_limb_t * a,const mp_limb_t * b,const mp_limb_t * c,const fq_nmod_ctx_t ctx,mp_limb_t * t)384 void _n_fq_madd2(
385 mp_limb_t * a, /* length 2d */
386 const mp_limb_t * b, /* length d */
387 const mp_limb_t * c, /* length d */
388 const fq_nmod_ctx_t ctx,
389 mp_limb_t * t) /* length 2d */
390 {
391 slong d = ctx->modulus->length - 1;
392 FLINT_ASSERT(d > 0);
393 if (d < 30)
394 {
395 slong i, j;
396 for (i = 0; i + 1 < d; i++)
397 {
398 mp_limb_t t2 = 0, t1 = 0, t0 = 0;
399 mp_limb_t s2 = 0, s1 = 0, s0 = 0;
400 umul_ppmm(t1, t0, b[i], c[0]);
401 umul_ppmm(s1, s0, b[d - 1], c[d - 1 - i]);
402
403 add_ssaaaa(t1, t0, t1, t0, 0, a[i]);
404 add_ssaaaa(s1, s0, s1, s0, 0, a[2*d - 2 - i]);
405
406 for (j = 1; j <= i; j++)
407 {
408 MAC(t2, t1, t0, b[i - j], c[0 + j]);
409 MAC(s2, s1, s0, b[d - 1 - j], c[d - 1 - i + j]);
410 }
411 NMOD_RED3(a[i], t2, t1, t0, ctx->mod);
412 NMOD_RED3(a[2*d - 2 - i], s2, s1, s0, ctx->mod);
413 }
414
415 {
416 mp_limb_t t2 = 0, t1 = 0, t0 = 0;
417 umul_ppmm(t1, t0, b[d - 1], c[0]);
418 add_ssaaaa(t1, t0, t1, t0, 0, a[d - 1]);
419 for (j = 1; j < d; j++)
420 {
421 MAC(t2, t1, t0, b[d - 1 - j], c[0 + j]);
422 }
423 NMOD_RED3(a[d - 1], t2, t1, t0, ctx->mod);
424 }
425 }
426 else
427 {
428 _nmod_poly_mul(t, b, d, c, d, ctx->mod);
429 _nmod_vec_add(a, a, t, 2*d - 1, ctx->mod);
430 }
431 }
432
_n_fq_mul2(mp_limb_t * a,const mp_limb_t * b,const mp_limb_t * c,const fq_nmod_ctx_t ctx)433 void _n_fq_mul2(
434 mp_limb_t * a, /* length 2d */
435 const mp_limb_t * b, /* length d */
436 const mp_limb_t * c, /* length d */
437 const fq_nmod_ctx_t ctx)
438 {
439 slong d = fq_nmod_ctx_degree(ctx);
440 FLINT_ASSERT(d > 0);
441 if (d < 30)
442 {
443 slong i, j;
444 for (i = 0; i + 1 < d; i++)
445 {
446 mp_limb_t t2 = 0, t1 = 0, t0 = 0;
447 mp_limb_t s2 = 0, s1 = 0, s0 = 0;
448 umul_ppmm(t1, t0, b[i], c[0]);
449 umul_ppmm(s1, s0, b[d - 1], c[d - 1 - i]);
450 for (j = 1; j <= i; j++)
451 {
452 MAC(t2, t1, t0, b[i - j], c[0 + j]);
453 MAC(s2, s1, s0, b[d - 1 - j], c[d - 1 - i + j]);
454 }
455 NMOD_RED3(a[i], t2, t1, t0, ctx->mod);
456 NMOD_RED3(a[2*d - 2 - i], s2, s1, s0, ctx->mod);
457 }
458
459 {
460 mp_limb_t t2 = 0, t1 = 0, t0 = 0;
461 umul_ppmm(t1, t0, b[d - 1], c[0]);
462 for (j = 1; j < d; j++)
463 {
464 MAC(t2, t1, t0, b[d - 1 - j], c[0 + j]);
465 }
466 NMOD_RED3(a[d - 1], t2, t1, t0, ctx->mod);
467 }
468 }
469 else
470 {
471 _nmod_poly_mul(a, b, d, c, d, ctx->mod);
472 }
473 }
474
475 /**************************** lazy *******************************************/
476
_n_fq_dot_lazy_size(slong len,const fq_nmod_ctx_t ctx)477 int _n_fq_dot_lazy_size(
478 slong len,
479 const fq_nmod_ctx_t ctx)
480 {
481 ulong t[4];
482 slong d = fq_nmod_ctx_degree(ctx);
483 mp_limb_t p = ctx->mod.n;
484
485 if (d > 30 || p < 2 || len < 0)
486 return 0;
487
488 umul_ppmm(t[1], t[0], p - 1, p - 1);
489 t[2] = mpn_mul_1(t, t, 2, d);
490 t[3] = mpn_mul_1(t, t, 3, len);
491
492 if (t[3] != 0)
493 return 0;
494 if (t[2] != 0)
495 return 3;
496 if (t[1] != 0)
497 return 2;
498 return 1;
499 }
500
501
_n_fq_reduce2_lazy1(mp_limb_t * a,slong d,nmod_t ctx)502 void _n_fq_reduce2_lazy1(
503 mp_limb_t * a, /* length 6d, 2d used */
504 slong d,
505 nmod_t ctx)
506 {
507 slong i;
508 for (i = 0; i < 2*d - 1; i++)
509 NMOD_RED(a[i], a[i], ctx);
510 }
511
_n_fq_madd2_lazy1(mp_limb_t * a,const mp_limb_t * b,const mp_limb_t * c,slong d)512 void _n_fq_madd2_lazy1(
513 mp_limb_t * a, /* length 6d, 2d used */
514 const mp_limb_t * b, /* length d */
515 const mp_limb_t * c, /* length d */
516 slong d)
517 {
518 slong i, j;
519
520 for (i = 0; i + 1 < d; i++)
521 {
522 mp_limb_t t0 = 0;
523 mp_limb_t s0 = 0;
524 t0 = a[i + 0];
525 s0 = a[(2*d - 2 - i) + 0];
526 t0 += b[i]*c[0];
527 s0 += b[d - 1]*c[d - 1 - i];
528 for (j = 1; j <= i; j++)
529 {
530 t0 += b[i - j]*c[0 + j];
531 s0 += b[d - 1 - j]*c[d - 1 - i + j];
532 }
533 a[i + 0] = t0;
534 a[(2*d - 2 - i) + 0] = s0;
535 }
536
537 {
538 mp_limb_t t0 = 0;
539 t0 = a[(d - 1) + 0];
540 t0 += b[d - 1]*c[0];
541 for (j = 1; j < d; j++)
542 {
543 t0 += b[d - 1 - j]*c[0 + j];
544 }
545 a[(d - 1) + 0] = t0;
546 }
547 }
548
549
_n_fq_mul2_lazy1(mp_limb_t * a,const mp_limb_t * b,const mp_limb_t * c,slong d)550 void _n_fq_mul2_lazy1(
551 mp_limb_t * a, /* length 6d, 2d used */
552 const mp_limb_t * b, /* length d */
553 const mp_limb_t * c, /* length d */
554 slong d)
555 {
556 slong i,j;
557
558 for (i = 0; i + 1 < d; i++)
559 {
560 mp_limb_t t0 = 0;
561 mp_limb_t s0 = 0;
562 t0 = b[i]*c[0];
563 s0 = b[d - 1]*c[d - 1 - i];
564 for (j = 1; j <= i; j++)
565 {
566 t0 += b[i - j]*c[0 + j];
567 s0 += b[d - 1 - j]*c[d - 1 - i + j];
568 }
569 a[i + 0] = t0;
570 a[(2*d - 2 - i) + 0] = s0;
571 }
572
573 {
574 mp_limb_t t0 = 0;
575 t0 = b[d - 1]*c[0];
576 for (j = 1; j < d; j++)
577 {
578 t0 += b[d - 1 - j]*c[0 + j];
579 }
580 a[(d - 1) + 0] = t0;
581 }
582 }
583
584
_n_fq_reduce2_lazy2(mp_limb_t * a,slong d,nmod_t ctx)585 void _n_fq_reduce2_lazy2(
586 mp_limb_t * a, /* length 6d, 4d used */
587 slong d,
588 nmod_t ctx)
589 {
590 slong i;
591 for (i = 0; i < 2*d - 1; i++)
592 NMOD2_RED2(a[i], a[2*i + 1], a[2*i + 0], ctx);
593 }
594
_n_fq_madd2_lazy2(mp_limb_t * a,const mp_limb_t * b,const mp_limb_t * c,slong d)595 void _n_fq_madd2_lazy2(
596 mp_limb_t * a, /* length 6d, 4d used */
597 const mp_limb_t * b, /* length d */
598 const mp_limb_t * c, /* length d */
599 slong d)
600 {
601 slong i,j;
602
603 for (i = 0; i + 1 < d; i++)
604 {
605 mp_limb_t t1 = 0, t0 = 0;
606 mp_limb_t s1 = 0, s0 = 0;
607 t0 = a[2*i + 0];
608 t1 = a[2*i + 1];
609 s0 = a[2*(2*d - 2 - i) + 0];
610 s1 = a[2*(2*d - 2 - i) + 1];
611 MAC2(t1, t0, b[i], c[0]);
612 MAC2(s1, s0, b[d - 1], c[d - 1 - i]);
613 for (j = 1; j <= i; j++)
614 {
615 MAC2(t1, t0, b[i - j], c[0 + j]);
616 MAC2(s1, s0, b[d - 1 - j], c[d - 1 - i + j]);
617 }
618 a[2*i + 0] = t0;
619 a[2*i + 1] = t1;
620 a[2*(2*d - 2 - i) + 0] = s0;
621 a[2*(2*d - 2 - i) + 1] = s1;
622 }
623
624 {
625 mp_limb_t t1 = 0, t0 = 0;
626 t0 = a[2*(d - 1) + 0];
627 t1 = a[2*(d - 1) + 1];
628 MAC2(t1, t0, b[d - 1], c[0]);
629 for (j = 1; j < d; j++)
630 {
631 MAC2(t1, t0, b[d - 1 - j], c[0 + j]);
632 }
633 a[2*(d - 1) + 0] = t0;
634 a[2*(d - 1) + 1] = t1;
635 }
636 }
637
638
_n_fq_mul2_lazy2(mp_limb_t * a,const mp_limb_t * b,const mp_limb_t * c,slong d)639 void _n_fq_mul2_lazy2(
640 mp_limb_t * a, /* length 6d */
641 const mp_limb_t * b, /* length d */
642 const mp_limb_t * c, /* length d */
643 slong d)
644 {
645 slong i,j;
646
647 for (i = 0; i + 1 < d; i++)
648 {
649 mp_limb_t t1 = 0, t0 = 0;
650 mp_limb_t s1 = 0, s0 = 0;
651 umul_ppmm(t1, t0, b[i], c[0]);
652 umul_ppmm(s1, s0, b[d - 1], c[d - 1 - i]);
653 for (j = 1; j <= i; j++)
654 {
655 MAC2(t1, t0, b[i - j], c[0 + j]);
656 MAC2(s1, s0, b[d - 1 - j], c[d - 1 - i + j]);
657 }
658 a[2*i + 0] = t0;
659 a[2*i + 1] = t1;
660 a[2*(2*d - 2 - i) + 0] = s0;
661 a[2*(2*d - 2 - i) + 1] = s1;
662 }
663
664 {
665 mp_limb_t t1 = 0, t0 = 0;
666 umul_ppmm(t1, t0, b[d - 1], c[0]);
667 for (j = 1; j < d; j++)
668 {
669 MAC2(t1, t0, b[d - 1 - j], c[0 + j]);
670 }
671 a[2*(d - 1) + 0] = t0;
672 a[2*(d - 1) + 1] = t1;
673 }
674 }
675
676
_n_fq_reduce2_lazy3(mp_limb_t * a,slong d,nmod_t ctx)677 void _n_fq_reduce2_lazy3(
678 mp_limb_t * a, /* length 6d */
679 slong d,
680 nmod_t ctx)
681 {
682 slong i;
683 for (i = 0; i < 2*d - 1; i++)
684 NMOD_RED3(a[i], a[3*i + 2], a[3*i + 1], a[3*i + 0], ctx);
685 }
686
_n_fq_madd2_lazy3(mp_limb_t * a,const mp_limb_t * b,const mp_limb_t * c,slong d)687 void _n_fq_madd2_lazy3(
688 mp_limb_t * a, /* length 6d */
689 const mp_limb_t * b, /* length d */
690 const mp_limb_t * c, /* length d */
691 slong d)
692 {
693 slong i,j;
694
695 for (i = 0; i + 1 < d; i++)
696 {
697 mp_limb_t t2 = 0, t1 = 0, t0 = 0;
698 mp_limb_t s2 = 0, s1 = 0, s0 = 0;
699 t0 = a[3*i + 0];
700 t1 = a[3*i + 1];
701 t2 = a[3*i + 2];
702 s0 = a[3*(2*d - 2 - i) + 0];
703 s1 = a[3*(2*d - 2 - i) + 1];
704 s2 = a[3*(2*d - 2 - i) + 2];
705 MAC3(t2, t1, t0, b[i], c[0]);
706 MAC3(s2, s1, s0, b[d - 1], c[d - 1 - i]);
707 for (j = 1; j <= i; j++)
708 {
709 MAC3(t2, t1, t0, b[i - j], c[0 + j]);
710 MAC3(s2, s1, s0, b[d - 1 - j], c[d - 1 - i + j]);
711 }
712 a[3*i + 0] = t0;
713 a[3*i + 1] = t1;
714 a[3*i + 2] = t2;
715 a[3*(2*d - 2 - i) + 0] = s0;
716 a[3*(2*d - 2 - i) + 1] = s1;
717 a[3*(2*d - 2 - i) + 2] = s2;
718 }
719
720 {
721 mp_limb_t t2 = 0, t1 = 0, t0 = 0;
722 t0 = a[3*(d - 1) + 0];
723 t1 = a[3*(d - 1) + 1];
724 t2 = a[3*(d - 1) + 2];
725 MAC3(t2, t1, t0, b[d - 1], c[0]);
726 for (j = 1; j < d; j++)
727 {
728 MAC3(t2, t1, t0, b[d - 1 - j], c[0 + j]);
729 }
730 a[3*(d - 1) + 0] = t0;
731 a[3*(d - 1) + 1] = t1;
732 a[3*(d - 1) + 2] = t2;
733 }
734 }
735
736
_n_fq_mul2_lazy3(mp_limb_t * a,const mp_limb_t * b,const mp_limb_t * c,slong d)737 void _n_fq_mul2_lazy3(
738 mp_limb_t * a, /* length 6d */
739 const mp_limb_t * b, /* length d */
740 const mp_limb_t * c, /* length d */
741 slong d)
742 {
743 slong i,j;
744
745 for (i = 0; i + 1 < d; i++)
746 {
747 mp_limb_t t2 = 0, t1 = 0, t0 = 0;
748 mp_limb_t s2 = 0, s1 = 0, s0 = 0;
749 umul_ppmm(t1, t0, b[i], c[0]);
750 umul_ppmm(s1, s0, b[d - 1], c[d - 1 - i]);
751 for (j = 1; j <= i; j++)
752 {
753 MAC3(t2, t1, t0, b[i - j], c[0 + j]);
754 MAC3(s2, s1, s0, b[d - 1 - j], c[d - 1 - i + j]);
755 }
756 a[3*i + 0] = t0;
757 a[3*i + 1] = t1;
758 a[3*i + 2] = t2;
759 a[3*(2*d - 2 - i) + 0] = s0;
760 a[3*(2*d - 2 - i) + 1] = s1;
761 a[3*(2*d - 2 - i) + 2] = s2;
762 }
763
764 {
765 mp_limb_t t2 = 0, t1 = 0, t0 = 0;
766 umul_ppmm(t1, t0, b[d - 1], c[0]);
767 for (j = 1; j < d; j++)
768 {
769 MAC3(t2, t1, t0, b[d - 1 - j], c[0 + j]);
770 }
771 a[3*(d - 1) + 0] = t0;
772 a[3*(d - 1) + 1] = t1;
773 a[3*(d - 1) + 2] = t2;
774 }
775 }
776
777
778 /***************************************************************************/
_n_fq_inv(mp_limb_t * a,const mp_limb_t * b,const fq_nmod_ctx_t ctx,mp_limb_t * t)779 void _n_fq_inv(
780 mp_limb_t * a,
781 const mp_limb_t * b,
782 const fq_nmod_ctx_t ctx,
783 mp_limb_t * t) /* length d */
784 {
785 slong d = ctx->modulus->length - 1;
786 slong blen = d;
787 FLINT_ASSERT(d > 0);
788
789 while (blen > 0 && b[blen - 1] == 0)
790 blen--;
791
792 if (blen < 1)
793 {
794 flint_throw(FLINT_ERROR, "impossible inverse in _fq_nmod_inv");
795 }
796 else if (blen == 1)
797 {
798 a[0] = n_invmod(b[0], ctx->mod.n);
799 _nmod_vec_zero(a + 1, d - 1);
800 }
801 else
802 {
803 if (1 != _nmod_poly_gcdinv(t, a, b, blen, ctx->modulus->coeffs, d + 1, ctx->mod))
804 {
805 flint_throw(FLINT_ERROR, "impossible inverse in _fq_nmod_inv");
806 }
807
808 if (t[0] != 1)
809 {
810 _nmod_vec_scalar_mul_nmod(a, a, d, n_invmod(t[0], ctx->mod.n), ctx->mod);
811 }
812 }
813 }
814
n_fq_mul(mp_limb_t * a,const mp_limb_t * b,const mp_limb_t * c,const fq_nmod_ctx_t ctx)815 void n_fq_mul(
816 mp_limb_t * a,
817 const mp_limb_t * b,
818 const mp_limb_t * c,
819 const fq_nmod_ctx_t ctx)
820 {
821 fq_nmod_t A, B, C;
822 fq_nmod_init(A, ctx);
823 fq_nmod_init(B, ctx);
824 fq_nmod_init(C, ctx);
825 n_fq_get_fq_nmod(B, b, ctx);
826 n_fq_get_fq_nmod(C, c, ctx);
827 fq_nmod_mul(A, B, C, ctx);
828 n_fq_set_fq_nmod(a, A, ctx);
829 fq_nmod_clear(A, ctx);
830 fq_nmod_clear(B, ctx);
831 fq_nmod_clear(C, ctx);
832 }
833
n_fq_addmul(mp_limb_t * a,const mp_limb_t * b,const mp_limb_t * c,const mp_limb_t * d,const fq_nmod_ctx_t ctx)834 void n_fq_addmul(
835 mp_limb_t * a,
836 const mp_limb_t * b,
837 const mp_limb_t * c,
838 const mp_limb_t * d,
839 const fq_nmod_ctx_t ctx)
840 {
841 mp_limb_t * t = FLINT_ARRAY_ALLOC(fq_nmod_ctx_degree(ctx), mp_limb_t);
842 n_fq_mul(t, c, d, ctx);
843 n_fq_add(a, b, t, ctx);
844 flint_free(t);
845 }
846
n_fq_mul_fq_nmod(mp_limb_t * a,const mp_limb_t * b,const fq_nmod_t C,const fq_nmod_ctx_t ctx)847 void n_fq_mul_fq_nmod(
848 mp_limb_t * a,
849 const mp_limb_t * b,
850 const fq_nmod_t C,
851 const fq_nmod_ctx_t ctx)
852 {
853 fq_nmod_t A, B;
854 fq_nmod_init(A, ctx);
855 fq_nmod_init(B, ctx);
856 n_fq_get_fq_nmod(B, b, ctx);
857 fq_nmod_mul(A, B, C, ctx);
858 n_fq_set_fq_nmod(a, A, ctx);
859 fq_nmod_clear(A, ctx);
860 fq_nmod_clear(B, ctx);
861 }
862
n_fq_inv(mp_limb_t * a,const mp_limb_t * b,const fq_nmod_ctx_t ctx)863 void n_fq_inv(
864 mp_limb_t * a,
865 const mp_limb_t * b,
866 const fq_nmod_ctx_t ctx)
867 {
868 fq_nmod_t A, B;
869 fq_nmod_init(A, ctx);
870 fq_nmod_init(B, ctx);
871 n_fq_get_fq_nmod(B, b, ctx);
872 fq_nmod_inv(A, B, ctx);
873 n_fq_set_fq_nmod(a, A, ctx);
874 fq_nmod_clear(A, ctx);
875 fq_nmod_clear(B, ctx);
876 }
877
_n_fq_pow_ui(mp_limb_t * a,const mp_limb_t * b,ulong e,const fq_nmod_ctx_t ctx)878 void _n_fq_pow_ui(
879 mp_limb_t * a,
880 const mp_limb_t * b,
881 ulong e,
882 const fq_nmod_ctx_t ctx)
883 {
884 fq_nmod_t A, B;
885 fq_nmod_init(A, ctx);
886 fq_nmod_init(B, ctx);
887 n_fq_get_fq_nmod(B, b, ctx);
888 fq_nmod_pow_ui(A, B, e, ctx);
889 n_fq_set_fq_nmod(a, A, ctx);
890 fq_nmod_clear(A, ctx);
891 fq_nmod_clear(B, ctx);
892 }
893
n_fq_pow_fmpz(mp_limb_t * a,const mp_limb_t * b,const fmpz_t e,const fq_nmod_ctx_t ctx)894 void n_fq_pow_fmpz(
895 mp_limb_t * a,
896 const mp_limb_t * b,
897 const fmpz_t e,
898 const fq_nmod_ctx_t ctx)
899 {
900 fq_nmod_t A, B;
901 fq_nmod_init(A, ctx);
902 fq_nmod_init(B, ctx);
903 n_fq_get_fq_nmod(B, b, ctx);
904 fq_nmod_pow(A, B, e, ctx);
905 n_fq_set_fq_nmod(a, A, ctx);
906 fq_nmod_clear(A, ctx);
907 fq_nmod_clear(B, ctx);
908 }
909
n_fq_pow_ui(mp_limb_t * a,const mp_limb_t * b,ulong e,const fq_nmod_ctx_t ctx)910 void n_fq_pow_ui(
911 mp_limb_t * a,
912 const mp_limb_t * b,
913 ulong e,
914 const fq_nmod_ctx_t ctx)
915 {
916 fq_nmod_t A, B;
917 fq_nmod_init(A, ctx);
918 fq_nmod_init(B, ctx);
919 n_fq_get_fq_nmod(B, b, ctx);
920 fq_nmod_pow_ui(A, B, e, ctx);
921 n_fq_set_fq_nmod(a, A, ctx);
922 fq_nmod_clear(A, ctx);
923 fq_nmod_clear(B, ctx);
924 }
925
n_fq_is_canonical(const mp_limb_t * a,const fq_nmod_ctx_t ctx)926 int n_fq_is_canonical(
927 const mp_limb_t * a,
928 const fq_nmod_ctx_t ctx)
929 {
930 slong i, d = fq_nmod_ctx_degree(ctx);
931 for (i = 0; i < d; i++)
932 {
933 if (a[i] >= ctx->mod.n)
934 return 0;
935 }
936 return 1;
937 }
938