1 #include "cado.h" // IWYU pragma: keep
2 #include <stdio.h>
3 /* This file implements some methods that work more or less the same
4 with Modulus64 and ModulusREDC64. E.g., div3() and gcd() work
5 unchanged with plain and Montgomery representation (so we can work on
6 the stored residue directly, whatever its representation is);
7 jacobi() converts to plain Integer first, the others use
8 only Modulus methods.
9 Speed-critical functions need to be rewritten in assembly for REDC,
10 but this is a start.
11 */
12
13 #include "mod_common.cpp"
14
15 bool
div3(Residue & r,const Residue & a) const16 Modulus::div3 (Residue &r, const Residue &a) const
17 {
18 const uint64_t a3 = a.r % 3;
19 uint64_t ml, m3;
20 Residue t(*this);
21
22 assertValid(a);
23
24 ml = getmod_u64 ();
25 m3 = ml % 3;
26 if (m3 == 0)
27 return false;
28
29 set (t, a);
30 if (a3 != 0) {
31 if (a3 + m3 == 3) /* Hence a3 == 1, m3 == 2 or a3 == 2, m3 == 1 */
32 t.r += ml;
33 else /* a3 == 1, m3 == 1 or a3 == 2, m3 == 2 */
34 t.r += 2 * ml;
35 }
36
37 /* Now t == a+k*m (mod 2^64) so that a+k*m is divisible by 3.
38 (a+k*m)/3 < 2^64, so doing a division (mod 2^64) produces the
39 correct result. */
40
41 t.r *= UINT64_C(0xAAAAAAAAAAAAAAAB);
42
43 #ifdef WANT_ASSERT_EXPENSIVE
44 Residue c(*this);
45 assertValid(t);
46 add(c, t, t);
47 add(c, c, t);
48 ASSERT_EXPENSIVE (equal (c, a));
49 #endif
50
51 set (r, t);
52
53 return true;
54 }
55
56
57 bool
div5(Residue & r,const Residue & a) const58 Modulus::div5 (Residue &r, const Residue &a) const
59 {
60 uint64_t ml, m5, k;
61 Residue t(*this);
62 const uint64_t a5 = a.r % 5;
63 const uint64_t inv5[5] = {0,4,2,3,1}; /* inv5[i] = -1/i (mod 5) */
64
65 assertValid(a);
66
67 ml = getmod_u64 ();
68 m5 = ml % 5;
69 if (m5 == 0)
70 return false;
71
72 set (t, a);
73 if (a5 != 0)
74 {
75 /* We want a+km == 0 (mod 5), so k = -a*m^{-1} (mod 5) */
76 k = (a5 * inv5[m5]) % 5;
77 ASSERT_EXPENSIVE ((k*m5 + a5) % 5 == 0);
78 t.r = a.r + k * ml;
79 }
80
81 /* Now t == a+k*m (mod 2^w) so that a+k*m is divisible by 5.
82 (a+k*m)/5 < 2^w, so doing a division (mod 2^w) produces the
83 correct result. */
84
85 t.r *= UINT64_C(0xcccccccccccccccd); /* 1/5 (mod 2^64) */
86
87 #ifdef WANT_ASSERT_EXPENSIVE
88 Residue c(*this);
89 assertValid(t);
90 add (c, t, t);
91 add (c, c, c);
92 add (c, c, t);
93 ASSERT_EXPENSIVE (equal (c, a));
94 #endif
95
96 set (r, t);
97
98 return true;
99 }
100
101
102 bool
div7(Residue & r,const Residue & a) const103 Modulus::div7 (Residue &r, const Residue &a) const
104 {
105 uint64_t ml, m7, k;
106 Residue t(*this);
107 const uint64_t a7 = a.r % 7;
108 const uint64_t inv7[7] = {0,6,3,2,5,4,1}; /* inv7[i] = -1/i (mod 7) */
109
110 assertValid(a);
111
112 ml = getmod_u64 ();
113 m7 = ml % 7;
114 if (m7 == 0)
115 return false;
116
117 set (t, a);
118 if (a7 != 0)
119 {
120 /* We want a+km == 0 (mod 7), so k = -a*m^{-1} (mod 7) */
121 k = (a7 * inv7[m7]) % 7;
122 ASSERT_EXPENSIVE ((k*m7 + a7) % 7 == 0);
123 t.r = a.r + k * ml;
124 }
125
126 /* Now t == a+k*m (mod 2^w) so that a+k*m is divisible by 7.
127 (a+k*m)/7 < 2^w, so doing a division (mod 2^w) produces the
128 correct result. */
129
130 t.r *= UINT64_C(0x6db6db6db6db6db7); /* 1/7 (mod 2^64) */
131
132 #ifdef WANT_ASSERT_EXPENSIVE
133 Residue c(*this);
134 assertValid(t);
135 add (c, t, t);
136 add (c, c, c);
137 add (c, c, c);
138 sub (c, c, t);
139 ASSERT_EXPENSIVE (equal (c, a));
140 #endif
141
142 set (r, t);
143
144 return true;
145 }
146
147
148 bool
div11(Residue & r,const Residue & a) const149 Modulus::div11 (Residue &r, const Residue &a) const
150 {
151 uint64_t ml, m11, k;
152 Residue t(*this);
153 const uint64_t a11 = a.r % 11;
154 /* inv11[i] = -1/i (mod 11) */
155 const uint64_t inv11[11] = {0, 10, 5, 7, 8, 2, 9, 3, 4, 6, 1};
156
157 assertValid(a);
158
159 ml = getmod_u64();
160 m11 = ml % 11;
161 if (m11 == 0)
162 return false;
163
164 set (t, a);
165 if (a11 != 0)
166 {
167 /* We want a+km == 0 (mod 11), so k = -a*m^{-1} (mod 11) */
168 k = (a11 * inv11[m11]) % 11;
169 ASSERT_EXPENSIVE ((k*m11 + a11) % 11 == 0);
170 t.r = a.r + k * ml;
171 }
172
173 /* Now t == a+k*m (mod 2^w) so that a+k*m is divisible by 11.
174 (a+k*m)/11 < 2^w, so doing a division (mod 2^w) produces the
175 correct result. */
176
177 t.r *= UINT64_C(0x2e8ba2e8ba2e8ba3); /* 1/11 (mod 2^64) */
178
179 #ifdef WANT_ASSERT_EXPENSIVE
180 Residue c(*this);
181 assertValid(t);
182 add (c, t, t); /* c = 2*t */
183 add (c, c, c); /* c = 4*t */
184 add (c, c, t); /* c = 5*t */
185 add (c, c, c); /* c = 10*t */
186 add (c, c, t); /* c = 11*t */
187 ASSERT_EXPENSIVE (equal (c, a));
188 #endif
189
190 set (r, t);
191
192 return true;
193 }
194
195
196 bool
div13(Residue & r,const Residue & a) const197 Modulus::div13 (Residue &r, const Residue &a) const
198 {
199 uint64_t ml, m13, k;
200 Residue t(*this);
201 const uint64_t a13 = a.r % 13;
202 /* inv13[i] = -1/i (mod 13) */
203 const uint64_t inv13[13] = {0, 12, 6, 4, 3, 5, 2, 11, 8, 10, 9, 7, 1};
204
205 assertValid(a);
206
207 ml = getmod_u64 ();
208 m13 = ml % 13;
209 if (m13 == 0)
210 return false;
211
212 set (t, a);
213 if (a13 != 0)
214 {
215 /* We want a+km == 0 (mod 13), so k = -a*m^{-1} (mod 13) */
216 k = (a13 * inv13[m13]) % 13;
217 ASSERT_EXPENSIVE ((k*m13 + a13) % 13 == 0);
218 t.r = a.r + k * ml;
219 }
220
221 /* Now t == a+k*m (mod 2^w) so that a+k*m is divisible by 13.
222 (a+k*m)/13 < 2^w, so doing a division (mod 2^w) produces the
223 correct result. */
224
225 t.r *= UINT64_C(0x4ec4ec4ec4ec4ec5); /* 1/13 (mod 2^64) */
226
227 #ifdef WANT_ASSERT_EXPENSIVE
228 Residue c(*this);
229 assertValid(t);
230 add (c, t, t); /* c = 2*t */
231 add (c, c, t); /* c = 3*t */
232 add (c, c, c); /* c = 6*t */
233 add (c, c, c); /* c = 12*t */
234 add (c, c, t); /* c = 13*t */
235 ASSERT_EXPENSIVE (equal (c, a));
236 #endif
237
238 set (r, t);
239
240 return true;
241 }
242
243
244 void
gcd(Integer & g,const Residue & r) const245 Modulus::gcd (Integer &g, const Residue &r) const
246 {
247 uint64_t a, b, t;
248
249 a = r.r; /* This works the same for "a" in plain or Montgomery
250 representation */
251 b = getmod_u64 ();
252 /* ASSERT (a < b); Should we require this? */
253 ASSERT (b > 0);
254
255 if (a >= b)
256 a %= b;
257
258 while (a > 0)
259 {
260 /* Here 0 < a < b */
261 t = b % a;
262 b = a;
263 a = t;
264 }
265
266 g = Integer(b);
267 }
268
269 template <typename Modulus, typename WordType>
pow2_oneWord(WordType mask,const WordType word,typename Modulus::Residue & t,const Modulus & m)270 static inline void pow2_oneWord(
271 WordType mask, const WordType word, typename Modulus::Residue &t,
272 const Modulus &m)
273 {
274 while (mask > 0) {
275 m.sqr (t, t);
276 if (word & mask) {
277 m.add (t, t, t);
278 }
279 mask >>= 1;
280 }
281 }
282
283 /* Compute r = 2^e. Here, e is a uint64_t */
284 void
pow2(Residue & r,const uint64_t e) const285 Modulus::pow2 (Residue &r, const uint64_t e) const
286 {
287 uint64_t mask;
288 Residue t(*this);
289
290 if (e == 0)
291 {
292 set1 (r);
293 return;
294 }
295
296 mask = (UINT64_C(1) << 63) >> u64arith_clz (e);
297
298 set1 (t);
299 add (t, t, t);
300 mask >>= 1;
301
302 pow2_oneWord(mask, e, t, *this);
303
304 set (r, t);
305 }
306
307
308 /* Compute r = 3^e. Here, e is a uint64_t */
309 void
pow3(Residue & r,const uint64_t e) const310 Modulus::pow3 (Residue &r, const uint64_t e) const
311 {
312 uint64_t mask;
313 Residue t(*this), u(*this);
314
315 if (e == 0)
316 {
317 set1 (r);
318 return;
319 }
320
321 mask = (UINT64_C(1) << 63) >> u64arith_clz (e);
322
323 set1 (u);
324 add (t, u, u);
325 add (t, t, u);
326 mask >>= 1;
327
328 while (mask > 0)
329 {
330 sqr (t, t);
331 add (u, t, t);
332 add (u, u, t);
333 if (e & mask)
334 set (t, u);
335 mask >>= 1;
336 }
337 set (r, t);
338 }
339
340
341 /* Computes 2^e (mod m), where e is a multiple precision integer.
342 Requires e != 0. The value of 2 in Montgomery representation
343 (i.e. 2*2^w (mod m) must be passed. */
344
345 void
pow2(Residue & r,const uint64_t * e,const size_t e_nrwords) const346 Modulus::pow2 (Residue &r, const uint64_t *e, const size_t e_nrwords) const
347 {
348 Residue t(*this);
349 uint64_t mask;
350 int i = e_nrwords;
351
352 while (i > 0 && e[i - 1] == 0)
353 i--;
354
355 if (i == 0) {
356 set1(r);
357 return;
358 }
359 i--;
360
361 mask = (UINT64_C(1) << 63) >> u64arith_clz (e[i]);
362
363 set1 (t);
364 add (t, t, t);
365 mask >>= 1;
366
367 for ( ; i >= 0; i--) {
368 pow2_oneWord(mask, e[i], t, *this);
369 mask = (UINT64_C(1) << 63);
370 }
371
372 set (r, t);
373 }
374
375 void
pow2(Residue & r,const Integer & e) const376 Modulus::pow2 (Residue &r, const Integer &e) const
377 {
378 size_t i = e.getWordCount();
379 const int bits = e.getWordSize();
380 const Integer::WordType msb = (Integer::WordType) 1 << (bits-1);
381 Integer::WordType word, mask;
382
383 while (i > 0 && e.getWord(i - 1) == 0)
384 i--;
385
386 if (i == 0) {
387 set1(r);
388 return;
389 }
390
391 word = e.getWord(i - 1);
392 mask = msb >> u64arith_clz (word);
393
394 set1 (r);
395 add (r, r, r);
396 mask >>= 1;
397
398 for ( ; i > 0; i--) {
399 word = e.getWord(i - 1);
400 pow2_oneWord(mask, word, r, *this);
401 mask = msb;
402 }
403 }
404
405 /* Returns 1 if m is a strong probable prime wrt base b, 0 otherwise.
406 We assume m is odd. */
407 bool
sprp(const Residue & b) const408 Modulus::sprp (const Residue &b) const
409 {
410 Residue r1(*this), minusone(*this);
411 int i = 0, po2 = 1;
412 uint64_t mm1;
413
414 mm1 = getmod_u64 ();
415
416 /* Set mm1 to the odd part of m-1 */
417 mm1 = (mm1 - 1) >> 1;
418 while (mm1 % 2 == 0)
419 {
420 po2++;
421 mm1 >>= 1;
422 }
423 /* Hence, m-1 = mm1 * 2^po2 */
424
425 set1 (minusone);
426 neg (minusone, minusone);
427
428 /* Exponentiate */
429 pow (r1, b, mm1);
430
431 /* Now r1 == b^mm1 (mod m) */
432 #if defined(PARI)
433 printf ("(Mod(%lu,%lu) ^ %lu) == %lu /* PARI */\n",
434 get_u64 (b, m), getmod_u64 (m), mm1, get_u64 (r1, m));
435 #endif
436
437 i = find_minus1 (r1, minusone, po2);
438
439 return i;
440 }
441
442
443 /* Returns 1 if m is a strong probable prime wrt base 2, 0 otherwise.
444 Assumes m > 1 and is odd.
445 */
446 bool
sprp2() const447 Modulus::sprp2 () const
448 {
449 Residue r(*this), minusone(*this);
450 int i = 0, po2 = 1;
451 uint64_t mm1;
452
453 mm1 = getmod_u64 ();
454
455 /* If m == 1,7 (mod 8), then 2 is a quadratic residue, and we must find
456 -1 with one less squaring. This does not reduce the number of
457 pseudo-primes because strong pseudo-primes are also Euler pseudo-primes,
458 but makes identifying composites a little faster on average. */
459 if (mm1 % 8 == 1 || mm1 % 8 == 7)
460 po2--;
461
462 /* Set mm1 to the odd part of m-1 */
463 mm1 = (mm1 - 1) >> 1;
464 while (mm1 % 2 == 0)
465 {
466 po2++;
467 mm1 >>= 1;
468 }
469 /* Hence, m-1 = mm1 * 2^po2 */
470
471 set1 (minusone);
472 neg (minusone, minusone);
473
474 /* Exponentiate */
475 pow2 (r, mm1);
476
477 /* Now r == b^mm1 (mod m) */
478 #if defined(PARI)
479 printf ("(Mod(2,%lu) ^ %lu) == %lu /* PARI */\n",
480 getmod_u64 (m), mm1, get_u64 (r, m));
481 #endif
482
483 i = find_minus1 (r, minusone, po2);
484
485 return i;
486 }
487
488
489 bool
isprime() const490 Modulus::isprime () const
491 {
492 Residue b(*this), minusone(*this), r1(*this);
493 const uint64_t n = getmod_u64 ();
494 uint64_t mm1;
495 int r = 0, po2;
496
497 if (n == 1)
498 return false;
499
500 if (n % 2 == 0)
501 {
502 r = (n == 2);
503 #if defined(PARI)
504 printf ("isprime(%lu) == %d /* PARI */\n", n, r);
505 #endif
506 return r;
507 }
508
509 /* Set mm1 to the odd part of m-1 */
510 mm1 = n - 1;
511 po2 = u64arith_ctz (mm1);
512 mm1 >>= po2;
513
514 set1 (minusone);
515 neg (minusone, minusone);
516
517 /* Do base 2 SPRP test */
518 pow2 (r1, mm1); /* r = 2^mm1 mod m */
519 /* If n is prime and 1 or 7 (mod 8), then 2 is a square (mod n)
520 and one less squaring must suffice. This does not strengthen the
521 test but saves one squaring for composite input */
522 if (n % 8 == 7)
523 {
524 if (!is1 (r1))
525 goto end;
526 }
527 else if (!find_minus1 (r1, minusone, po2 - ((n % 8 == 1) ? 1 : 0)))
528 goto end; /* Not prime */
529
530 if (n < 2047)
531 {
532 r = 1;
533 goto end;
534 }
535
536 /* Base 3 is poor at identifying composites == 1 (mod 3), but good at
537 identifying composites == 2 (mod 3). Thus we use it only for 2 (mod 3) */
538 if (n % 3 == 1)
539 {
540 set1 (b);
541 add (b, b, b);
542 add (b, b, b);
543 add (b, b, b);
544 add (b, b, minusone); /* b = 7 */
545 pow (r1, b, mm1); /* r = 7^mm1 mod m */
546 if (!find_minus1 (r1, minusone, po2))
547 goto end; /* Not prime */
548
549 if (n < 2269093) {
550 r = (n != 314821);
551 goto end;
552 }
553
554 /* b is still 7 here */
555 add (b, b, b); /* 14 */
556 sub (b, b, minusone); /* 15 */
557 add (b, b, b); /* 30 */
558 add (b, b, b); /* 60 */
559 sub (b, b, minusone); /* 61 */
560 pow (r1, b, mm1); /* r = 61^mm1 mod m */
561 if (!find_minus1 (r1, minusone, po2))
562 goto end; /* Not prime */
563
564 if (n != UINT64_C(4759123141) && n != UINT64_C(8411807377) && n < UINT64_C(11207066041))
565 {
566 r = 1;
567 goto end;
568 }
569
570 set1 (b);
571 add (b, b, b);
572 add (b, b, b);
573 sub (b, b, minusone); /* b = 5 */
574 pow (r1, b, mm1); /* r = 5^mm1 mod m */
575 if (!find_minus1 (r1, minusone, po2))
576 goto end; /* Not prime */
577
578 /* These are the base 5,7,61 SPSP < 10^13 and n == 1 (mod 3) */
579 r = (n != UINT64_C(30926647201) && n != UINT64_C(45821738881) &&
580 n != UINT64_C(74359744201) && n != UINT64_C(90528271681) &&
581 n != UINT64_C(110330267041) && n != UINT64_C(373303331521) &&
582 n != UINT64_C(440478111067) && n != UINT64_C(1436309367751) &&
583 n != UINT64_C(1437328758421) && n != UINT64_C(1858903385041) &&
584 n != UINT64_C(4897239482521) && n != UINT64_C(5026103290981) &&
585 n != UINT64_C(5219055617887) && n != UINT64_C(5660137043641) &&
586 n != UINT64_C(6385803726241));
587 }
588 else
589 {
590 /* Case n % 3 == 0, 2 */
591
592 pow3 (r1, mm1); /* r = 3^mm1 mod m */
593 if (!find_minus1 (r1, minusone, po2))
594 goto end; /* Not prime */
595
596 if (n < UINT64_C(102690677) && n != UINT64_C(5173601) && n != UINT64_C(16070429) &&
597 n != UINT64_C(54029741))
598 {
599 r = 1;
600 goto end;
601 }
602
603 set1 (b);
604 add (b, b, b);
605 add (b, b, b);
606 sub (b, b, minusone); /* b = 5 */
607 pow (r1, b, mm1); /* r = 5^mm1 mod m */
608 if (!find_minus1 (r1, minusone, po2))
609 goto end; /* Not prime */
610
611 /* These are the base 3,5 SPSP < 10^13 with n == 2 (mod 3) */
612 r = (n != UINT64_C(244970876021) && n != UINT64_C(405439595861) &&
613 n != UINT64_C(1566655993781) && n != UINT64_C(3857382025841) &&
614 n != UINT64_C(4074652846961) && n != UINT64_C(5783688565841));
615 }
616
617 end:
618 #if defined(PARI)
619 printf ("isprime(%lu) == %d /* PARI */\n", n, r);
620 #endif
621 return r;
622 }
623
624 #if 1
625
626 int
jacobi(const Residue & a_par) const627 Modulus::jacobi (const Residue &a_par) const
628 {
629 uint64_t x;
630 uint64_t mm;
631 unsigned int s, j;
632
633 /* Get residue in Montgomery form directly without converting */
634 x = a_par.r;
635 mm = getmod_u64 ();
636 if (x == 0) {
637 return (mm == 1) ? 1 : 0; /* Jacobi(0,1) = 1, Jacobi(0,>1) = 0 */
638 }
639 ASSERT (x < mm);
640 ASSERT(mm % 2 == 1);
641
642 j = u64arith_ctz(x);
643 x = x >> j;
644 /* If we divide by an odd power of 2, and 2 is a QNR, flip sign */
645 /* 2 is a QNR (mod mm) iff mm = 3,5 (mod 8)
646 mm = 1 = 001b: 1
647 mm = 3 = 011b: -1
648 mm = 5 = 101b: -1
649 mm = 7 = 111b: 1
650 Hence we can store in s the exponent of -1, i.e., s=0 for jacobi()=1
651 and s=1 for jacobi()=-1, and update s ^= (mm>>1) & (mm>>2) & 1.
652 We can do the &1 at the very end.
653 In fact, we store the exponent of -1 in the second bit of s.
654 The s ^= ((j<<1) & (mm ^ (mm>>1))) still needs 2 shift but one of them can
655 be done with LEA, and f = s ^ (x&mm) needs no shift */
656
657 s = ((j<<1) & (mm ^ (mm>>1)));
658
659 while (x > 1) {
660 /* Here, x < mm, x and mm are odd */
661
662 /* Implicitly swap by reversing roles of x and mm in next loop */
663 /* Flip sign if both are 3 (mod 4) */
664 s = s ^ (x&mm);
665
666 /* Make mm smaller by subtracting and shifting */
667 do {
668 mm -= x; /* Difference is even */
669 if (mm == 0)
670 break;
671 /* Make odd again */
672 j = u64arith_ctz(mm);
673 s ^= ((j<<1) & (x ^ (x>>1)));
674 mm >>= j;
675 } while (mm >= x);
676
677 if (mm <= 1) {
678 x = mm;
679 break;
680 }
681
682 /* Flip sign if both are 3 (mod 4) */
683 /* Implicitly swap again */
684 s = s ^ (x&mm);
685
686 /* Make x<mm by subtracting and shifting */
687 do {
688 x -= mm; /* Difference is even */
689 if (x == 0)
690 break;
691 /* Make odd again */
692 j = u64arith_ctz(x);
693 s ^= ((j<<1) & (mm ^ (mm>>1)));
694 x >>= j;
695 } while (x >= mm);
696 }
697
698 if (x == 0)
699 return 0;
700 return ((s & 2) == 0) ? 1 : -1;
701 }
702
703 #else
704
705 /*
706 #!/usr/bin/env python3
707 # Python program to create mult.h
708
709 def rate(k,b):
710 # The 0.25 magic constant here tries to estimate the ratio m/x,
711 # to minimize (x+c*m)/2^b
712 r = (abs(k)*0.25 + 1.)/2**b
713 # print ("rate(" + str(k) + ", " + str(b) + ") = " + str(r))
714 return(r)
715
716 def bestb(k):
717 best_b = 0
718 best_r = 1
719 best_c = 0
720 for b in range(1, 8):
721 c = k % (2**b)
722 r = rate(c, b)
723 if r < best_r:
724 best_r=r
725 best_b = b
726 best_c = c
727 c = - (2**b - c)
728 r = rate(c, b)
729 if r < best_r:
730 best_r=r
731 best_b = b
732 best_c = c
733 # print ("bestb(" + str(k) + ") = " + str(best_b))
734 return([k, best_b, best_c, 1/best_r])
735
736
737 r = [str(-bestb(2*i+1)[2]) for i in range(0, 128) ]
738 print("static char mult[128] = {" + ", ".join(r) + "};")
739 */
740
741 #include "mult.h"
742 #include "macros.h"
743 static unsigned char invmod8[256] = {
744 0, 1, 0, 171, 0, 205, 0, 183, 0, 57, 0, 163, 0, 197, 0, 239, 0, 241, 0, 27, 0, 61, 0, 167, 0, 41, 0, 19, 0, 53, 0, 223, 0, 225, 0, 139, 0, 173, 0, 151, 0, 25, 0, 131, 0, 165, 0, 207, 0, 209, 0, 251, 0, 29, 0, 135, 0, 9, 0, 243, 0, 21, 0, 191, 0, 193, 0, 107, 0, 141, 0, 119, 0, 249, 0, 99, 0, 133, 0, 175, 0, 177, 0, 219, 0, 253, 0, 103, 0, 233, 0, 211, 0, 245, 0, 159, 0, 161, 0, 75, 0, 109, 0, 87, 0, 217, 0, 67, 0, 101, 0, 143, 0, 145, 0, 187, 0, 221, 0, 71, 0, 201, 0, 179, 0, 213, 0, 127, 0, 129, 0, 43, 0, 77, 0, 55, 0, 185, 0, 35, 0, 69, 0, 111, 0, 113, 0, 155, 0, 189, 0, 39, 0, 169, 0, 147, 0, 181, 0, 95, 0, 97, 0, 11, 0, 45, 0, 23, 0, 153, 0, 3, 0, 37, 0, 79, 0, 81, 0, 123, 0, 157, 0, 7, 0, 137, 0, 115, 0, 149, 0, 63, 0, 65, 0, 235, 0, 13, 0, 247, 0, 121, 0, 227, 0, 5, 0, 47, 0, 49, 0, 91, 0, 125, 0, 231, 0, 105, 0, 83, 0, 117, 0, 31, 0, 33, 0, 203, 0, 237, 0, 215, 0, 89, 0, 195, 0, 229, 0, 15, 0, 17, 0, 59, 0, 93, 0, 199, 0, 73, 0, 51, 0, 85, 0, 255
745 };
746
747 static inline int
s_val(unsigned int s)748 s_val(unsigned int s) {
749 return ((s & 2) == 0) ? 1 : -1;
750 }
751
752 static int
jacobi1(const Residue & a_par)753 Modulus::jacobi1 (const Residue &a_par)
754 {
755 uint64_t x, m;
756 unsigned int s, j;
757
758 x = mod_get_ul (a_par, m_par);
759 m = mod_getmod_ul (m_par);
760 ASSERT (x < m);
761 ASSERT(m % 2 == 1);
762
763 j = u64arith_ctz(x);
764 x = x >> j;
765
766 s = ((j<<1) & (m ^ (m>>1)));
767
768 while (x > 1) {
769 uint64_t t;
770 unsigned char inv;
771
772 // printf ("kronecker(%lu, %lu) == %d * kronecker(%lu, %lu)\n",
773 // mod_get_ul(a_par, m_par), mod_getmod_ul (m_par), s_val(s), x, m);
774 /* Here x < m. Swap to make x > m */
775 t = m;
776 m = x;
777 x = t;
778 s = s ^ (x&m);
779
780 /* Now x > m */
781 inv = invmod8[(unsigned char)m];
782 do {
783 /* Do a REDC-like step. We want a multiplier k such that the low
784 8 bits of x+k*m are all zero.
785 That is, we want k = -x/m (mod 2^8). */
786 unsigned char k;
787 uint64_t t1;
788 long int c, t2;
789 // const uint64_t old_x = x;
790
791 k = inv * (unsigned char)x;
792 // ASSERT_ALWAYS((k & 1) == 1);
793 c = mult[k / 2];
794
795 /* Compute x+cm */
796 long tmp = c >> 63;
797 u64arith_mul_1_1_2 (&t1, (uint64_t *)&t2, (c ^ tmp) - tmp, m);
798 t2 ^= tmp;
799 t1 ^= tmp;
800 u64arith_add_1_2 (&t1, (uint64_t *)&t2, x-tmp);
801 tmp = ((long) t2) >> 63;
802
803 t2 ^= tmp;
804 t1 ^= tmp;
805 s ^= m & tmp;
806 u64arith_add_1_2 (&t1, (uint64_t *)&t2, -tmp);
807 // ASSERT_ALWAYS(t2 >= 0);
808
809 if (t1 == 0) {
810 if (t2 == 0) {
811 x = 0;
812 break;
813 }
814 t1 = t2;
815 /* Divided by 2^64 which is square, so no adjustment to s */
816 t2 = 0;
817 }
818
819 j = u64arith_ctz(t1);
820 u64arith_shrd (&t1, t2, j);
821 // ASSERT_ALWAYS((t2 >> j) == 0);
822 x = t1;
823 s ^= ((j<<1) & (m ^ (m>>1)));
824 // ASSERT_ALWAYS(x < old_x);
825 // printf ("%f\n", (double)old_x / (double)x);
826 } while (x >= m);
827 }
828
829 if (x == 0)
830 return 0;
831 return s_val(s);
832 }
833
834 int
jacobi(const Residue & a)835 Modulus::jacobi (const Residue &a)
836 {
837 uint64_t x, m;
838 unsigned int s, j;
839
840 x = mod_get_ul (a_par, m_par);
841 m = mod_getmod_ul (m_par);
842 ASSERT (x < m);
843 ASSERT(m % 2 == 1);
844
845 if ((LONG_MAX - x) / 50 < m)
846 return mod_jacobi1 (a_par, m_par);
847
848 j = u64arith_ctz(x);
849 x = x >> j;
850
851 s = ((j<<1) & (m ^ (m>>1)));
852
853 while (x > 1) {
854 uint64_t t;
855 unsigned char inv;
856
857 // printf ("kronecker(%lu, %lu) == %d * kronecker(%lu, %lu)\n",
858 // mod_get_ul(a_par, m_par), mod_getmod_ul (m_par), s_val(s), x, m);
859 /* Here x < m. Swap to make x > m */
860 t = m;
861 m = x;
862 x = t;
863 s = s ^ (x&m);
864
865 /* Now x > m */
866 inv = invmod8[(unsigned char)m];
867 do {
868 /* Do a REDC-like step. We want a multiplier k such that the low
869 8 bits of x+k*m are all zero.
870 That is, we want k = -x/m (mod 2^8). */
871 unsigned char k;
872 long int c;
873 // const uint64_t old_x = x;
874
875 k = inv * x;
876 // ASSERT_ALWAYS((k & 1) == 1);
877 c = mult[k / 2];
878
879 c = x + c*m;
880 x = c;
881 c >>= 63;
882 x = (x ^ c) - c;
883
884 if (x == 0) {
885 break;
886 }
887 s ^= m & c;
888
889 j = u64arith_ctz(x);
890 x >>= j;
891 s ^= ((j<<1) & (m ^ (m>>1)));
892 // printf ("%f\n", (double)old_x / (double)x);
893 } while (x >= m);
894 }
895
896 if (x == 0)
897 return 0;
898 return s_val(s);
899 }
900
901 #endif
902