1 #include "cado.h" // IWYU pragma: keep
2 #include <stdio.h>
3 /* This file implements some methods that work more or less the same
4    with Modulus64 and ModulusREDC64. E.g., div3() and gcd() work
5    unchanged with plain and Montgomery representation (so we can work on
6    the stored residue directly, whatever its representation is);
7    jacobi() converts to plain Integer first, the others use
8    only Modulus methods.
9    Speed-critical functions need to be rewritten in assembly for REDC,
10    but this is a start.
11 */
12 
13 #include "mod_common.cpp"
14 
15 bool
div3(Residue & r,const Residue & a) const16 Modulus::div3 (Residue &r, const Residue &a) const
17 {
18   const uint64_t a3 = a.r % 3;
19   uint64_t ml, m3;
20   Residue t(*this);
21 
22   assertValid(a);
23 
24   ml = getmod_u64 ();
25   m3 = ml % 3;
26   if (m3 == 0)
27     return false;
28 
29   set (t, a);
30   if (a3 != 0) {
31     if (a3 + m3 == 3) /* Hence a3 == 1, m3 == 2 or a3 == 2, m3 == 1 */
32       t.r += ml;
33     else /* a3 == 1, m3 == 1 or a3 == 2, m3 == 2 */
34       t.r += 2 * ml;
35   }
36 
37   /* Now t == a+k*m (mod 2^64) so that a+k*m is divisible by 3.
38      (a+k*m)/3 < 2^64, so doing a division (mod 2^64) produces the
39      correct result. */
40 
41   t.r *= UINT64_C(0xAAAAAAAAAAAAAAAB);
42 
43 #ifdef WANT_ASSERT_EXPENSIVE
44   Residue c(*this);
45   assertValid(t);
46   add(c, t, t);
47   add(c, c, t);
48   ASSERT_EXPENSIVE (equal (c, a));
49 #endif
50 
51   set (r, t);
52 
53   return true;
54 }
55 
56 
57 bool
div5(Residue & r,const Residue & a) const58 Modulus::div5 (Residue &r, const Residue &a) const
59 {
60   uint64_t ml, m5, k;
61   Residue t(*this);
62   const uint64_t a5 = a.r % 5;
63   const uint64_t inv5[5] = {0,4,2,3,1}; /* inv5[i] = -1/i (mod 5) */
64 
65   assertValid(a);
66 
67   ml = getmod_u64 ();
68   m5 = ml % 5;
69   if (m5 == 0)
70     return false;
71 
72   set (t, a);
73   if (a5 != 0)
74     {
75       /* We want a+km == 0 (mod 5), so k = -a*m^{-1} (mod 5) */
76       k = (a5 * inv5[m5]) % 5;
77       ASSERT_EXPENSIVE ((k*m5 + a5) % 5 == 0);
78       t.r = a.r + k * ml;
79     }
80 
81   /* Now t == a+k*m (mod 2^w) so that a+k*m is divisible by 5.
82      (a+k*m)/5 < 2^w, so doing a division (mod 2^w) produces the
83      correct result. */
84 
85     t.r *= UINT64_C(0xcccccccccccccccd); /* 1/5 (mod 2^64) */
86 
87 #ifdef WANT_ASSERT_EXPENSIVE
88   Residue c(*this);
89   assertValid(t);
90   add (c, t, t);
91   add (c, c, c);
92   add (c, c, t);
93   ASSERT_EXPENSIVE (equal (c, a));
94 #endif
95 
96   set (r, t);
97 
98   return true;
99 }
100 
101 
102 bool
div7(Residue & r,const Residue & a) const103 Modulus::div7 (Residue &r, const Residue &a) const
104 {
105   uint64_t ml, m7, k;
106   Residue t(*this);
107   const uint64_t a7 = a.r % 7;
108   const uint64_t inv7[7] = {0,6,3,2,5,4,1}; /* inv7[i] = -1/i (mod 7) */
109 
110   assertValid(a);
111 
112   ml = getmod_u64 ();
113   m7 = ml % 7;
114   if (m7 == 0)
115     return false;
116 
117   set (t, a);
118   if (a7 != 0)
119     {
120       /* We want a+km == 0 (mod 7), so k = -a*m^{-1} (mod 7) */
121       k = (a7 * inv7[m7]) % 7;
122       ASSERT_EXPENSIVE ((k*m7 + a7) % 7 == 0);
123       t.r = a.r + k * ml;
124     }
125 
126   /* Now t == a+k*m (mod 2^w) so that a+k*m is divisible by 7.
127      (a+k*m)/7 < 2^w, so doing a division (mod 2^w) produces the
128      correct result. */
129 
130     t.r *= UINT64_C(0x6db6db6db6db6db7); /* 1/7 (mod 2^64) */
131 
132 #ifdef WANT_ASSERT_EXPENSIVE
133   Residue c(*this);
134   assertValid(t);
135   add (c, t, t);
136   add (c, c, c);
137   add (c, c, c);
138   sub (c, c, t);
139   ASSERT_EXPENSIVE (equal (c, a));
140 #endif
141 
142   set (r, t);
143 
144   return true;
145 }
146 
147 
148 bool
div11(Residue & r,const Residue & a) const149 Modulus::div11 (Residue &r, const Residue &a) const
150 {
151   uint64_t ml, m11, k;
152   Residue t(*this);
153   const uint64_t a11 = a.r % 11;
154   /* inv11[i] = -1/i (mod 11) */
155   const uint64_t inv11[11] = {0, 10, 5, 7, 8, 2, 9, 3, 4, 6, 1};
156 
157   assertValid(a);
158 
159   ml = getmod_u64();
160   m11 = ml % 11;
161   if (m11 == 0)
162     return false;
163 
164   set (t, a);
165   if (a11 != 0)
166     {
167       /* We want a+km == 0 (mod 11), so k = -a*m^{-1} (mod 11) */
168       k = (a11 * inv11[m11]) % 11;
169       ASSERT_EXPENSIVE ((k*m11 + a11) % 11 == 0);
170       t.r = a.r + k * ml;
171     }
172 
173   /* Now t == a+k*m (mod 2^w) so that a+k*m is divisible by 11.
174      (a+k*m)/11 < 2^w, so doing a division (mod 2^w) produces the
175      correct result. */
176 
177     t.r *= UINT64_C(0x2e8ba2e8ba2e8ba3); /* 1/11 (mod 2^64) */
178 
179 #ifdef WANT_ASSERT_EXPENSIVE
180   Residue c(*this);
181   assertValid(t);
182   add (c, t, t); /* c = 2*t */
183   add (c, c, c); /* c = 4*t */
184   add (c, c, t); /* c = 5*t */
185   add (c, c, c); /* c = 10*t */
186   add (c, c, t); /* c = 11*t */
187   ASSERT_EXPENSIVE (equal (c, a));
188 #endif
189 
190   set (r, t);
191 
192   return true;
193 }
194 
195 
196 bool
div13(Residue & r,const Residue & a) const197 Modulus::div13 (Residue &r, const Residue &a) const
198 {
199   uint64_t ml, m13, k;
200   Residue t(*this);
201   const uint64_t a13 = a.r % 13;
202   /* inv13[i] = -1/i (mod 13) */
203   const uint64_t inv13[13] = {0, 12, 6, 4, 3, 5, 2, 11, 8, 10, 9, 7, 1};
204 
205   assertValid(a);
206 
207   ml = getmod_u64 ();
208   m13 = ml % 13;
209   if (m13 == 0)
210     return false;
211 
212   set (t, a);
213   if (a13 != 0)
214     {
215       /* We want a+km == 0 (mod 13), so k = -a*m^{-1} (mod 13) */
216       k = (a13 * inv13[m13]) % 13;
217       ASSERT_EXPENSIVE ((k*m13 + a13) % 13 == 0);
218       t.r = a.r + k * ml;
219     }
220 
221   /* Now t == a+k*m (mod 2^w) so that a+k*m is divisible by 13.
222      (a+k*m)/13 < 2^w, so doing a division (mod 2^w) produces the
223      correct result. */
224 
225     t.r *= UINT64_C(0x4ec4ec4ec4ec4ec5); /* 1/13 (mod 2^64) */
226 
227 #ifdef WANT_ASSERT_EXPENSIVE
228   Residue c(*this);
229   assertValid(t);
230   add (c, t, t); /* c = 2*t */
231   add (c, c, t); /* c = 3*t */
232   add (c, c, c); /* c = 6*t */
233   add (c, c, c); /* c = 12*t */
234   add (c, c, t); /* c = 13*t */
235   ASSERT_EXPENSIVE (equal (c, a));
236 #endif
237 
238   set (r, t);
239 
240   return true;
241 }
242 
243 
244 void
gcd(Integer & g,const Residue & r) const245 Modulus::gcd (Integer &g, const Residue &r) const
246 {
247   uint64_t a, b, t;
248 
249   a = r.r; /* This works the same for "a" in plain or Montgomery
250                representation */
251   b = getmod_u64 ();
252   /* ASSERT (a < b); Should we require this? */
253   ASSERT (b > 0);
254 
255   if (a >= b)
256     a %= b;
257 
258   while (a > 0)
259     {
260       /* Here 0 < a < b */
261       t = b % a;
262       b = a;
263       a = t;
264     }
265 
266   g = Integer(b);
267 }
268 
269 template <typename Modulus, typename WordType>
pow2_oneWord(WordType mask,const WordType word,typename Modulus::Residue & t,const Modulus & m)270 static inline void pow2_oneWord(
271     WordType mask, const WordType word, typename Modulus::Residue &t,
272     const Modulus &m)
273 {
274     while (mask > 0) {
275         m.sqr (t, t);
276         if (word & mask) {
277             m.add (t, t, t);
278         }
279         mask >>= 1;
280     }
281 }
282 
283 /* Compute r = 2^e. Here, e is a uint64_t */
284 void
pow2(Residue & r,const uint64_t e) const285 Modulus::pow2 (Residue &r, const uint64_t e) const
286 {
287   uint64_t mask;
288   Residue t(*this);
289 
290   if (e == 0)
291     {
292       set1 (r);
293       return;
294     }
295 
296   mask = (UINT64_C(1) << 63) >> u64arith_clz (e);
297 
298   set1 (t);
299   add (t, t, t);
300   mask >>= 1;
301 
302   pow2_oneWord(mask, e, t, *this);
303 
304   set (r, t);
305 }
306 
307 
308 /* Compute r = 3^e. Here, e is a uint64_t */
309 void
pow3(Residue & r,const uint64_t e) const310 Modulus::pow3 (Residue &r, const uint64_t e) const
311 {
312   uint64_t mask;
313   Residue t(*this), u(*this);
314 
315   if (e == 0)
316     {
317       set1 (r);
318       return;
319     }
320 
321   mask = (UINT64_C(1) << 63) >> u64arith_clz (e);
322 
323   set1 (u);
324   add (t, u, u);
325   add (t, t, u);
326   mask >>= 1;
327 
328   while (mask > 0)
329     {
330       sqr (t, t);
331       add (u, t, t);
332       add (u, u, t);
333       if (e & mask)
334         set (t, u);
335       mask >>= 1;
336     }
337   set (r, t);
338 }
339 
340 
341 /* Computes 2^e (mod m), where e is a multiple precision integer.
342    Requires e != 0. The value of 2 in Montgomery representation
343    (i.e. 2*2^w (mod m) must be passed. */
344 
345 void
pow2(Residue & r,const uint64_t * e,const size_t e_nrwords) const346 Modulus::pow2 (Residue &r, const uint64_t *e, const size_t e_nrwords) const
347 {
348   Residue t(*this);
349   uint64_t mask;
350   int i = e_nrwords;
351 
352   while (i > 0 && e[i - 1] == 0)
353       i--;
354 
355   if (i == 0) {
356       set1(r);
357       return;
358   }
359   i--;
360 
361   mask = (UINT64_C(1) << 63) >> u64arith_clz (e[i]);
362 
363   set1 (t);
364   add (t, t, t);
365   mask >>= 1;
366 
367   for ( ; i >= 0; i--) {
368       pow2_oneWord(mask, e[i], t, *this);
369       mask = (UINT64_C(1) << 63);
370     }
371 
372   set (r, t);
373 }
374 
375 void
pow2(Residue & r,const Integer & e) const376 Modulus::pow2 (Residue &r, const Integer &e) const
377 {
378   size_t i = e.getWordCount();
379   const int bits = e.getWordSize();
380   const Integer::WordType msb = (Integer::WordType) 1 << (bits-1);
381   Integer::WordType word, mask;
382 
383   while (i > 0 && e.getWord(i - 1) == 0)
384       i--;
385 
386   if (i == 0) {
387       set1(r);
388       return;
389   }
390 
391   word = e.getWord(i - 1);
392   mask = msb >> u64arith_clz (word);
393 
394   set1 (r);
395   add (r, r, r);
396   mask >>= 1;
397 
398   for ( ; i > 0; i--) {
399       word = e.getWord(i - 1);
400       pow2_oneWord(mask, word, r, *this);
401       mask = msb;
402     }
403 }
404 
405 /* Returns 1 if m is a strong probable prime wrt base b, 0 otherwise.
406    We assume m is odd. */
407 bool
sprp(const Residue & b) const408 Modulus::sprp (const Residue &b) const
409 {
410   Residue r1(*this), minusone(*this);
411   int i = 0, po2 = 1;
412   uint64_t mm1;
413 
414   mm1 = getmod_u64 ();
415 
416   /* Set mm1 to the odd part of m-1 */
417   mm1 = (mm1 - 1) >> 1;
418   while (mm1 % 2 == 0)
419     {
420       po2++;
421       mm1 >>= 1;
422     }
423   /* Hence, m-1 = mm1 * 2^po2 */
424 
425   set1 (minusone);
426   neg (minusone, minusone);
427 
428   /* Exponentiate */
429   pow (r1, b, mm1);
430 
431   /* Now r1 == b^mm1 (mod m) */
432 #if defined(PARI)
433   printf ("(Mod(%lu,%lu) ^ %lu) == %lu /* PARI */\n",
434 	  get_u64 (b, m), getmod_u64 (m), mm1, get_u64 (r1, m));
435 #endif
436 
437   i = find_minus1 (r1, minusone, po2);
438 
439   return i;
440 }
441 
442 
443 /* Returns 1 if m is a strong probable prime wrt base 2, 0 otherwise.
444    Assumes m > 1 and is odd.
445  */
446 bool
sprp2() const447 Modulus::sprp2 () const
448 {
449   Residue r(*this), minusone(*this);
450   int i = 0, po2 = 1;
451   uint64_t mm1;
452 
453   mm1 = getmod_u64 ();
454 
455   /* If m == 1,7 (mod 8), then 2 is a quadratic residue, and we must find
456      -1 with one less squaring. This does not reduce the number of
457      pseudo-primes because strong pseudo-primes are also Euler pseudo-primes,
458      but makes identifying composites a little faster on average. */
459   if (mm1 % 8 == 1 || mm1 % 8 == 7)
460     po2--;
461 
462   /* Set mm1 to the odd part of m-1 */
463   mm1 = (mm1 - 1) >> 1;
464   while (mm1 % 2 == 0)
465     {
466       po2++;
467       mm1 >>= 1;
468     }
469   /* Hence, m-1 = mm1 * 2^po2 */
470 
471   set1 (minusone);
472   neg (minusone, minusone);
473 
474   /* Exponentiate */
475   pow2 (r, mm1);
476 
477   /* Now r == b^mm1 (mod m) */
478 #if defined(PARI)
479   printf ("(Mod(2,%lu) ^ %lu) == %lu /* PARI */\n",
480 	  getmod_u64 (m), mm1, get_u64 (r, m));
481 #endif
482 
483   i = find_minus1 (r, minusone, po2);
484 
485   return i;
486 }
487 
488 
489 bool
isprime() const490 Modulus::isprime () const
491 {
492   Residue b(*this), minusone(*this), r1(*this);
493   const uint64_t n = getmod_u64 ();
494   uint64_t mm1;
495   int r = 0, po2;
496 
497   if (n == 1)
498     return false;
499 
500   if (n % 2 == 0)
501     {
502       r = (n == 2);
503 #if defined(PARI)
504       printf ("isprime(%lu) == %d /* PARI */\n", n, r);
505 #endif
506       return r;
507     }
508 
509   /* Set mm1 to the odd part of m-1 */
510   mm1 = n - 1;
511   po2 = u64arith_ctz (mm1);
512   mm1 >>= po2;
513 
514   set1 (minusone);
515   neg (minusone, minusone);
516 
517   /* Do base 2 SPRP test */
518   pow2 (r1, mm1);   /* r = 2^mm1 mod m */
519   /* If n is prime and 1 or 7 (mod 8), then 2 is a square (mod n)
520      and one less squaring must suffice. This does not strengthen the
521      test but saves one squaring for composite input */
522   if (n % 8 == 7)
523     {
524       if (!is1 (r1))
525         goto end;
526     }
527   else if (!find_minus1 (r1, minusone, po2 - ((n % 8 == 1) ? 1 : 0)))
528     goto end; /* Not prime */
529 
530   if (n < 2047)
531     {
532       r = 1;
533       goto end;
534     }
535 
536   /* Base 3 is poor at identifying composites == 1 (mod 3), but good at
537      identifying composites == 2 (mod 3). Thus we use it only for 2 (mod 3) */
538   if (n % 3 == 1)
539     {
540       set1 (b);
541       add (b, b, b);
542       add (b, b, b);
543       add (b, b, b);
544       add (b, b, minusone);  /* b = 7 */
545       pow (r1, b, mm1);   /* r = 7^mm1 mod m */
546       if (!find_minus1 (r1, minusone, po2))
547         goto end; /* Not prime */
548 
549       if (n < 2269093) {
550         r = (n != 314821);
551         goto end;
552       }
553 
554       /* b is still 7 here */
555       add (b, b, b); /* 14 */
556       sub (b, b, minusone); /* 15 */
557       add (b, b, b); /* 30 */
558       add (b, b, b); /* 60 */
559       sub (b, b, minusone); /* 61 */
560       pow (r1, b, mm1);   /* r = 61^mm1 mod m */
561       if (!find_minus1 (r1, minusone, po2))
562 	goto end; /* Not prime */
563 
564       if (n != UINT64_C(4759123141) && n != UINT64_C(8411807377) && n < UINT64_C(11207066041))
565         {
566             r = 1;
567             goto end;
568         }
569 
570       set1 (b);
571       add (b, b, b);
572       add (b, b, b);
573       sub (b, b, minusone);    /* b = 5 */
574       pow (r1, b, mm1);   /* r = 5^mm1 mod m */
575       if (!find_minus1 (r1, minusone, po2))
576 	goto end; /* Not prime */
577 
578           /* These are the base 5,7,61 SPSP < 10^13 and n == 1 (mod 3) */
579 	  r = (n != UINT64_C(30926647201) && n != UINT64_C(45821738881) &&
580 	       n != UINT64_C(74359744201) && n != UINT64_C(90528271681) &&
581 	       n != UINT64_C(110330267041) && n != UINT64_C(373303331521) &&
582 	       n != UINT64_C(440478111067) && n != UINT64_C(1436309367751) &&
583 	       n != UINT64_C(1437328758421) && n != UINT64_C(1858903385041) &&
584 	       n != UINT64_C(4897239482521) && n != UINT64_C(5026103290981) &&
585 	       n != UINT64_C(5219055617887) && n != UINT64_C(5660137043641) &&
586 	       n != UINT64_C(6385803726241));
587     }
588   else
589     {
590       /* Case n % 3 == 0, 2 */
591 
592       pow3 (r1, mm1);   /* r = 3^mm1 mod m */
593       if (!find_minus1 (r1, minusone, po2))
594 	goto end; /* Not prime */
595 
596       if (n < UINT64_C(102690677) && n != UINT64_C(5173601) && n != UINT64_C(16070429) &&
597           n != UINT64_C(54029741))
598         {
599 	  r = 1;
600 	  goto end;
601 	}
602 
603       set1 (b);
604       add (b, b, b);
605       add (b, b, b);
606       sub (b, b, minusone);    /* b = 5 */
607       pow (r1, b, mm1);   /* r = 5^mm1 mod m */
608       if (!find_minus1 (r1, minusone, po2))
609 	goto end; /* Not prime */
610 
611       /* These are the base 3,5 SPSP < 10^13 with n == 2 (mod 3) */
612       r = (n != UINT64_C(244970876021) && n != UINT64_C(405439595861) &&
613 	   n != UINT64_C(1566655993781) && n != UINT64_C(3857382025841) &&
614 	   n != UINT64_C(4074652846961) && n != UINT64_C(5783688565841));
615     }
616 
617  end:
618 #if defined(PARI)
619   printf ("isprime(%lu) == %d /* PARI */\n", n, r);
620 #endif
621   return r;
622 }
623 
624 #if 1
625 
626 int
jacobi(const Residue & a_par) const627 Modulus::jacobi (const Residue &a_par) const
628 {
629   uint64_t x;
630   uint64_t mm;
631   unsigned int s, j;
632 
633   /* Get residue in Montgomery form directly without converting */
634   x = a_par.r;
635   mm = getmod_u64 ();
636   if (x == 0) {
637       return (mm == 1) ? 1 : 0; /* Jacobi(0,1) = 1, Jacobi(0,>1) = 0 */
638   }
639   ASSERT (x < mm);
640   ASSERT(mm % 2 == 1);
641 
642   j = u64arith_ctz(x);
643   x = x >> j;
644   /* If we divide by an odd power of 2, and 2 is a QNR, flip sign */
645   /* 2 is a QNR (mod mm) iff mm = 3,5 (mod 8)
646      mm = 1 = 001b:   1
647      mm = 3 = 011b:  -1
648      mm = 5 = 101b:  -1
649      mm = 7 = 111b:   1
650      Hence we can store in s the exponent of -1, i.e., s=0 for jacobi()=1
651      and s=1 for jacobi()=-1, and update s ^= (mm>>1) & (mm>>2) & 1.
652      We can do the &1 at the very end.
653      In fact, we store the exponent of -1 in the second bit of s.
654      The s ^= ((j<<1) & (mm ^ (mm>>1))) still needs 2 shift but one of them can
655      be done with LEA, and f = s ^ (x&mm) needs no shift */
656 
657   s = ((j<<1) & (mm ^ (mm>>1)));
658 
659   while (x > 1) {
660     /* Here, x < mm, x and mm are odd */
661 
662     /* Implicitly swap by reversing roles of x and mm in next loop */
663     /* Flip sign if both are 3 (mod 4) */
664     s = s ^ (x&mm);
665 
666     /* Make mm smaller by subtracting and shifting */
667     do {
668       mm -= x; /* Difference is even */
669       if (mm == 0)
670         break;
671       /* Make odd again */
672       j = u64arith_ctz(mm);
673       s ^= ((j<<1) & (x ^ (x>>1)));
674       mm >>= j;
675     } while (mm >= x);
676 
677     if (mm <= 1) {
678       x = mm;
679       break;
680     }
681 
682     /* Flip sign if both are 3 (mod 4) */
683     /* Implicitly swap again */
684     s = s ^ (x&mm);
685 
686     /* Make x<mm by subtracting and shifting */
687     do {
688       x -= mm; /* Difference is even */
689       if (x == 0)
690         break;
691       /* Make odd again */
692       j = u64arith_ctz(x);
693       s ^= ((j<<1) & (mm ^ (mm>>1)));
694       x >>= j;
695     } while (x >= mm);
696   }
697 
698   if (x == 0)
699     return 0;
700   return ((s & 2) == 0) ? 1 : -1;
701 }
702 
703 #else
704 
705 /*
706 #!/usr/bin/env python3
707 # Python program to create mult.h
708 
709 def rate(k,b):
710   # The 0.25 magic constant here tries to estimate the ratio m/x,
711   # to minimize (x+c*m)/2^b
712   r = (abs(k)*0.25 + 1.)/2**b
713   # print ("rate(" + str(k) + ", " + str(b) + ") = " + str(r))
714   return(r)
715 
716 def bestb(k):
717   best_b = 0
718   best_r = 1
719   best_c = 0
720   for b in range(1, 8):
721     c = k % (2**b)
722     r = rate(c, b)
723     if r < best_r:
724       best_r=r
725       best_b = b
726       best_c = c
727     c = - (2**b - c)
728     r = rate(c, b)
729     if r < best_r:
730       best_r=r
731       best_b = b
732       best_c = c
733   # print ("bestb(" + str(k) + ") = " + str(best_b))
734   return([k, best_b, best_c, 1/best_r])
735 
736 
737 r = [str(-bestb(2*i+1)[2]) for i in range(0, 128) ]
738 print("static char mult[128] = {" + ", ".join(r) + "};")
739 */
740 
741 #include "mult.h"
742 #include "macros.h"
743 static unsigned char invmod8[256] = {
744 0, 1, 0, 171, 0, 205, 0, 183, 0, 57, 0, 163, 0, 197, 0, 239, 0, 241, 0, 27, 0, 61, 0, 167, 0, 41, 0, 19, 0, 53, 0, 223, 0, 225, 0, 139, 0, 173, 0, 151, 0, 25, 0, 131, 0, 165, 0, 207, 0, 209, 0, 251, 0, 29, 0, 135, 0, 9, 0, 243, 0, 21, 0, 191, 0, 193, 0, 107, 0, 141, 0, 119, 0, 249, 0, 99, 0, 133, 0, 175, 0, 177, 0, 219, 0, 253, 0, 103, 0, 233, 0, 211, 0, 245, 0, 159, 0, 161, 0, 75, 0, 109, 0, 87, 0, 217, 0, 67, 0, 101, 0, 143, 0, 145, 0, 187, 0, 221, 0, 71, 0, 201, 0, 179, 0, 213, 0, 127, 0, 129, 0, 43, 0, 77, 0, 55, 0, 185, 0, 35, 0, 69, 0, 111, 0, 113, 0, 155, 0, 189, 0, 39, 0, 169, 0, 147, 0, 181, 0, 95, 0, 97, 0, 11, 0, 45, 0, 23, 0, 153, 0, 3, 0, 37, 0, 79, 0, 81, 0, 123, 0, 157, 0, 7, 0, 137, 0, 115, 0, 149, 0, 63, 0, 65, 0, 235, 0, 13, 0, 247, 0, 121, 0, 227, 0, 5, 0, 47, 0, 49, 0, 91, 0, 125, 0, 231, 0, 105, 0, 83, 0, 117, 0, 31, 0, 33, 0, 203, 0, 237, 0, 215, 0, 89, 0, 195, 0, 229, 0, 15, 0, 17, 0, 59, 0, 93, 0, 199, 0, 73, 0, 51, 0, 85, 0, 255
745 };
746 
747 static inline int
s_val(unsigned int s)748 s_val(unsigned int s) {
749   return ((s & 2) == 0) ? 1 : -1;
750 }
751 
752 static int
jacobi1(const Residue & a_par)753 Modulus::jacobi1 (const Residue &a_par)
754 {
755   uint64_t x, m;
756   unsigned int s, j;
757 
758   x = mod_get_ul (a_par, m_par);
759   m = mod_getmod_ul (m_par);
760   ASSERT (x < m);
761   ASSERT(m % 2 == 1);
762 
763   j = u64arith_ctz(x);
764   x = x >> j;
765 
766   s = ((j<<1) & (m ^ (m>>1)));
767 
768   while (x > 1) {
769     uint64_t t;
770     unsigned char inv;
771 
772     // printf ("kronecker(%lu, %lu) == %d * kronecker(%lu, %lu)\n",
773     //        mod_get_ul(a_par, m_par), mod_getmod_ul (m_par), s_val(s), x, m);
774     /* Here x < m. Swap to make x > m */
775     t = m;
776     m = x;
777     x = t;
778     s = s ^ (x&m);
779 
780     /* Now x > m */
781     inv = invmod8[(unsigned char)m];
782     do {
783       /* Do a REDC-like step. We want a multiplier k such that the low
784          8 bits of x+k*m are all zero.
785          That is, we want k = -x/m (mod 2^8). */
786       unsigned char k;
787       uint64_t t1;
788       long int c, t2;
789       // const uint64_t old_x = x;
790 
791       k = inv * (unsigned char)x;
792       // ASSERT_ALWAYS((k & 1) == 1);
793       c = mult[k / 2];
794 
795       /* Compute x+cm */
796       long tmp = c >> 63;
797       u64arith_mul_1_1_2 (&t1, (uint64_t *)&t2, (c ^ tmp) - tmp, m);
798       t2 ^= tmp;
799       t1 ^= tmp;
800       u64arith_add_1_2 (&t1, (uint64_t *)&t2, x-tmp);
801       tmp = ((long) t2) >> 63;
802 
803       t2 ^= tmp;
804       t1 ^= tmp;
805       s ^= m & tmp;
806       u64arith_add_1_2 (&t1, (uint64_t *)&t2, -tmp);
807       // ASSERT_ALWAYS(t2 >= 0);
808 
809       if (t1 == 0) {
810         if (t2 == 0) {
811           x = 0;
812           break;
813         }
814         t1 = t2;
815         /* Divided by 2^64 which is square, so no adjustment to s */
816         t2 = 0;
817       }
818 
819       j = u64arith_ctz(t1);
820       u64arith_shrd (&t1, t2, j);
821       // ASSERT_ALWAYS((t2 >> j) == 0);
822       x = t1;
823       s ^= ((j<<1) & (m ^ (m>>1)));
824       // ASSERT_ALWAYS(x < old_x);
825       // printf ("%f\n", (double)old_x / (double)x);
826     } while (x >= m);
827   }
828 
829   if (x == 0)
830     return 0;
831   return s_val(s);
832 }
833 
834 int
jacobi(const Residue & a)835 Modulus::jacobi (const Residue &a)
836 {
837   uint64_t x, m;
838   unsigned int s, j;
839 
840   x = mod_get_ul (a_par, m_par);
841   m = mod_getmod_ul (m_par);
842   ASSERT (x < m);
843   ASSERT(m % 2 == 1);
844 
845   if ((LONG_MAX - x) / 50 < m)
846     return mod_jacobi1 (a_par, m_par);
847 
848   j = u64arith_ctz(x);
849   x = x >> j;
850 
851   s = ((j<<1) & (m ^ (m>>1)));
852 
853   while (x > 1) {
854     uint64_t t;
855     unsigned char inv;
856 
857     // printf ("kronecker(%lu, %lu) == %d * kronecker(%lu, %lu)\n",
858     //        mod_get_ul(a_par, m_par), mod_getmod_ul (m_par), s_val(s), x, m);
859     /* Here x < m. Swap to make x > m */
860     t = m;
861     m = x;
862     x = t;
863     s = s ^ (x&m);
864 
865     /* Now x > m */
866     inv = invmod8[(unsigned char)m];
867     do {
868       /* Do a REDC-like step. We want a multiplier k such that the low
869          8 bits of x+k*m are all zero.
870          That is, we want k = -x/m (mod 2^8). */
871       unsigned char k;
872       long int c;
873       // const uint64_t old_x = x;
874 
875       k = inv * x;
876       // ASSERT_ALWAYS((k & 1) == 1);
877       c = mult[k / 2];
878 
879       c = x + c*m;
880       x = c;
881       c >>= 63;
882       x = (x ^ c) - c;
883 
884       if (x == 0) {
885         break;
886       }
887       s ^= m & c;
888 
889       j = u64arith_ctz(x);
890       x >>= j;
891       s ^= ((j<<1) & (m ^ (m>>1)));
892       // printf ("%f\n", (double)old_x / (double)x);
893     } while (x >= m);
894   }
895 
896   if (x == 0)
897     return 0;
898   return s_val(s);
899 }
900 
901 #endif
902