1 /*
2 bernoulli.c: example program; computes irregular indices for a prime p
3
4 Copyright (C) 2007, 2008, David Harvey
5
6 This file is part of the zn_poly library (version 0.9).
7
8 This program is free software: you can redistribute it and/or modify
9 it under the terms of the GNU General Public License as published by
10 the Free Software Foundation, either version 2 of the License, or
11 (at your option) version 3 of the License.
12
13 This program is distributed in the hope that it will be useful,
14 but WITHOUT ANY WARRANTY; without even the implied warranty of
15 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
16 GNU General Public License for more details.
17
18 You should have received a copy of the GNU General Public License
19 along with this program. If not, see <http://www.gnu.org/licenses/>.
20
21 */
22
23 #include <stdio.h>
24 #include <gmp.h>
25 #include "zn_poly.h"
26
27
28 /*
29 Finds distinct prime factors of n, in the range k < p <= n.
30 Assumes that n does not have any prime factors p <= k.
31 Stores them in increasing order starting at res.
32 Returns number of factors written.
33 */
34 unsigned
prime_factors_helper(ulong * res,ulong k,ulong n)35 prime_factors_helper (ulong* res, ulong k, ulong n)
36 {
37 if (n == 1)
38 return 0;
39
40 ulong i;
41 for (i = k + 1; i * i <= n; i++)
42 {
43 if (n % i == 0)
44 {
45 // found a factor
46 *res = i;
47 // remove that factor entirely
48 for (n /= i; n % i == 0; n /= i);
49 return prime_factors_helper (res + 1, i, n) + 1;
50 }
51 }
52 // no more factors
53 *res = n;
54 return 1;
55 }
56
57
58 /*
59 Finds distinct prime factors of n.
60 Writes them to res in increasing order, returns number of factors found.
61 */
62 unsigned
prime_factors(ulong * res,ulong n)63 prime_factors (ulong* res, ulong n)
64 {
65 return prime_factors_helper (res, 1, n);
66 }
67
68
69 /*
70 A really dumb primality test.
71 */
72 int
is_prime(ulong n)73 is_prime (ulong n)
74 {
75 ulong i;
76 for (i = 2; i * i <= n; i++)
77 if (n % i == 0)
78 return 0;
79 return 1;
80 }
81
82
83 /*
84 Assuming p >= 3 is prime, find the minimum primitive root mod p.
85 Stores it at g, and stores its inverse mod p at g_inv.
86
87 Note: this is probably a terrible algorithm, but it's fine compared to the
88 running time of bernoulli() for large p.
89 */
90 void
primitive_root(ulong * g,ulong * g_inv,ulong p)91 primitive_root (ulong* g, ulong* g_inv, ulong p)
92 {
93 zn_mod_t mod;
94 zn_mod_init (mod, p);
95
96 // find prime factors of p-1
97 ulong factors[200];
98 unsigned num_factors, i;
99 num_factors = prime_factors (factors, p - 1);
100
101 // loop through candidates
102 ulong x;
103 for (x = 2; ; x++)
104 {
105 ZNP_ASSERT (x < p);
106
107 // it's a generator if x^((p-1)/q) != 1 for all primes q dividing p-1
108 int good = 1;
109 for (i = 0; i < num_factors && good; i++)
110 good = good && (zn_mod_pow (x, (p - 1) / factors[i], mod) != 1);
111
112 if (good)
113 {
114 *g = x;
115 *g_inv = zn_mod_pow (x, p - 2, mod);
116 zn_mod_clear (mod);
117 return;
118 }
119 }
120 }
121
122
123 /*
124 Computes bernoulli numbers B(0), B(2), ..., B((p-3)/2) mod p.
125
126 If res != NULL, it stores the result in res, which needs to be of length
127 (p-1)/2.
128
129 If irregular != NULL, it stores the number of irregular indices at
130 irregular[0], follows by the irregular indices. The largest permitted
131 index of irregularity is irregular_max; if there are more indices than that
132 to store, the program will abort.
133
134 p must be a prime >= 3
135 p must be < 2^(ULONG_BITS/2)
136 */
137 void
bernoulli(ulong * res,ulong * irregular,unsigned irregular_max,ulong p)138 bernoulli (ulong* res, ulong* irregular, unsigned irregular_max, ulong p)
139 {
140 ZNP_ASSERT (p < (1UL << (ULONG_BITS/2)));
141
142 ulong g, g_inv;
143 primitive_root (&g, &g_inv, p);
144
145 ulong n = (p-1) / 2;
146
147 zn_mod_t mod;
148 zn_mod_init (mod, p);
149
150 // allocate our own res if the caller didn't
151 int own_res = 0;
152 if (!res)
153 {
154 res = (ulong*) malloc (n * sizeof (ulong));
155 own_res = 1;
156 }
157
158 if (irregular)
159 irregular[0] = 0;
160
161 ulong* G = (ulong*) malloc (n * sizeof (ulong));
162 ulong* P = (ulong*) malloc ((2*n - 1) * sizeof (ulong));
163 ulong* J = res;
164
165 // -------------------------------------------------------------------------
166 // Step 1: compute polynomials G(X) and J(X)
167
168 // g_pow = g^(i-1), g_pow = g^(-i) at beginning of each iteration
169 ulong g_pow = g_inv;
170 ulong g_pow_inv = 1;
171
172 // bias = (g-1)/2 mod p
173 ulong bias = (g - 1 + ((g & 1) ? 0 : p)) / 2;
174
175 // fudge = g^(i^2), fudge_inv = g^(-i^2) at each iteration
176 ulong fudge = 1;
177 ulong fudge_inv = 1;
178
179 ulong i;
180 for (i = 0; i < n; i++)
181 {
182 ulong prod = g * g_pow;
183
184 // quo = floor(prod / p)
185 // rem = prod % p
186 ulong quo = zn_mod_quotient (prod, mod);
187 ulong rem = prod - quo * p;
188
189 // h = h(g^i) / g^i mod p
190 ulong h = g_pow_inv * zn_mod_sub_slim (bias, quo, mod);
191 h = zn_mod_reduce (h, mod);
192
193 // update g_pow and g_pow_inv for next iteration
194 g_pow = rem;
195 g_pow_inv = zn_mod_reduce (g_pow_inv * g_inv, mod);
196
197 // X^i coefficient of G(X) is g^(i^2) * h(g^i) / g^i
198 G[i] = zn_mod_reduce (h * fudge, mod);
199
200 // X^i coefficient of J(X) is g^(-i^2)
201 J[i] = fudge_inv;
202
203 // update fudge and fudge_inv for next iteration
204 fudge = zn_mod_reduce (fudge * g_pow, mod);
205 fudge = zn_mod_reduce (fudge * g_pow, mod);
206 fudge = zn_mod_reduce (fudge * g, mod);
207 fudge_inv = zn_mod_reduce (fudge_inv * g_pow_inv, mod);
208 fudge_inv = zn_mod_reduce (fudge_inv * g_pow_inv, mod);
209 fudge_inv = zn_mod_reduce (fudge_inv * g, mod);
210 }
211
212 J[0] = 0;
213
214 // -------------------------------------------------------------------------
215 // Step 2: compute product P(X) = G(X) * J(X)
216
217 zn_array_mul (P, J, n, G, n, mod);
218
219 // -------------------------------------------------------------------------
220 // Step 3: extract output from P(X), and verify result
221
222 res[0] = 1;
223
224 // we will verify that \sum_{j=0}^{(p-3)/2} 4^j (2j+1) B(2j) = -2 mod p
225 ulong check_accum = 1;
226 ulong check_four_pow = 4 % p;
227
228 // g_sqr = g^2
229 // g_sqr_inv = g^(-2)
230 ulong g_sqr = zn_mod_reduce (g * g, mod);
231 ulong g_sqr_inv = zn_mod_reduce (g_inv * g_inv, mod);
232
233 // make table with J[i] = (1 - g^(2i+2))(1 - g^(2i+4)) ... (1 - g^(p-3))
234 // for 0 <= i < (p-1)/2
235 ulong g_sqr_inv_pow = g_sqr_inv;
236 J[n-1] = 1;
237 for (i = 1; i < n; i++)
238 {
239 J[n-i-1] = zn_mod_reduce (J[n-i] * (p + 1 - g_sqr_inv_pow), mod);
240 g_sqr_inv_pow = zn_mod_reduce (g_sqr_inv_pow * g_sqr_inv, mod);
241 }
242
243 // fudge = g^(i^2) at each iteration
244 fudge = g;
245 // g_sqr_pow = g^(2i) at each iteration
246 ulong g_sqr_pow = g_sqr;
247
248 // prod_inv = [(1 - g^(2i))(1 - g^(2i+2)) ... (1 - g^(p-3))]^(-1)
249 // at each iteration (todo: for i == 1, it's experimentally equal to -1/2
250 // mod p, need to prove this)
251 ulong prod_inv = p - 2;
252
253 for (i = 1; i < n; i++)
254 {
255 ulong val = (i == (n-1)) ? 0 : P[i + n];
256 if (n & 1)
257 val = zn_mod_neg (val, mod);
258 val = zn_mod_add_slim (val, G[i], mod);
259 val = zn_mod_add_slim (val, P[i], mod);
260
261 // multiply by 4 * i * g^(i^2)
262 val = zn_mod_reduce (val * fudge, mod);
263 val = zn_mod_reduce (val * (2*i), mod);
264 val = zn_mod_add_slim (val, val, mod);
265
266 // divide by (1 - g^(2i))
267 val = zn_mod_reduce (val * prod_inv, mod);
268 val = zn_mod_reduce (val * J[i], mod);
269 prod_inv = zn_mod_reduce (prod_inv * (1 + p - g_sqr_pow), mod);
270
271 // store output coefficient if requested
272 if (!own_res)
273 res[i] = val;
274
275 // store irregular index if requested
276 if (irregular)
277 {
278 if (val == 0)
279 {
280 irregular[0]++;
281 if (irregular[0] >= irregular_max)
282 {
283 printf ("too many irregular indices for p = %lu\n", p);
284 abort ();
285 }
286 irregular[irregular[0]] = 2*i;
287 }
288 }
289
290 // update fudge and g_sqr_pow
291 g_sqr_pow = zn_mod_reduce (g_sqr_pow * g, mod);
292 fudge = zn_mod_reduce (fudge * g_sqr_pow, mod);
293 g_sqr_pow = zn_mod_reduce (g_sqr_pow * g, mod);
294
295 // update verification data
296 ulong check_term = zn_mod_reduce (check_four_pow * (2*i + 1), mod);
297 check_term = zn_mod_reduce (check_term * val, mod);
298 check_accum = zn_mod_add_slim (check_accum, check_term, mod);
299 check_four_pow = zn_mod_add_slim (check_four_pow, check_four_pow, mod);
300 check_four_pow = zn_mod_add_slim (check_four_pow, check_four_pow, mod);
301 }
302
303 if (check_accum != p-2)
304 {
305 printf ("bernoulli failed correctness check for p = %lu\n", p);
306 abort ();
307 }
308
309 if (own_res)
310 free (res);
311
312 free (P);
313 free (G);
314 zn_mod_clear (mod);
315 }
316
317
318
319 /*
320 Same as bernoulli(), but handles two primes simultaneously.
321
322 p1 and p2 must be distinct primes >= 3.
323 */
324 void
bernoulli_dual(ulong * res1,ulong * irregular1,unsigned irregular1_max,ulong p1,ulong * res2,ulong * irregular2,unsigned irregular2_max,ulong p2)325 bernoulli_dual(ulong* res1, ulong* irregular1, unsigned irregular1_max, ulong p1,
326 ulong* res2, ulong* irregular2, unsigned irregular2_max, ulong p2)
327 {
328 ZNP_ASSERT (p1 < (1UL << (ULONG_BITS/2)));
329 ZNP_ASSERT (p2 < (1UL << (ULONG_BITS/2)));
330 ZNP_ASSERT (p1 != p2);
331
332 // swap them to make p1 < p2
333 if (p1 > p2)
334 {
335 { ulong temp = p2; p2 = p1; p1 = temp; }
336 { ulong* temp = res1; res1 = res2; res2 = temp; }
337 { ulong* temp = irregular1; irregular1 = irregular2; irregular2 = temp; }
338 { unsigned temp = irregular1_max; irregular1_max = irregular2_max; irregular2_max = temp; }
339 }
340
341 ulong g1, g_inv1;
342 ulong g2, g_inv2;
343 primitive_root (&g1, &g_inv1, p1);
344 primitive_root (&g2, &g_inv2, p2);
345
346 ulong n1 = (p1-1) / 2;
347 ulong n2 = (p2-1) / 2;
348
349 zn_mod_t mod1, mod2;
350 zn_mod_init (mod1, p1);
351 zn_mod_init (mod2, p2);
352
353 ulong q = p1 * p2;
354 zn_mod_t mod;
355 zn_mod_init (mod, q);
356
357 // allocate our own res2 if the caller didn't
358 int own_res2 = 0;
359 if (!res2)
360 {
361 res2 = (ulong*) malloc (n2 * sizeof (ulong));
362 own_res2 = 1;
363 }
364
365 if (irregular1)
366 irregular1[0] = 0;
367 if (irregular2)
368 irregular2[0] = 0;
369
370 // find idempotents to CRT modulo p1 and p2, i.e.
371 // id1 = 1 mod p1, id1 = 0 mod p2
372 // id2 = 0 mod p1, id2 = 1 mod p2
373 mpz_t p1_mpz, p2_mpz, a1_mpz, a2_mpz, g_mpz;
374 mpz_init (p1_mpz);
375 mpz_init (p2_mpz);
376 mpz_init (a1_mpz);
377 mpz_init (a2_mpz);
378 mpz_init (g_mpz);
379
380 mpz_set_ui (p1_mpz, p1);
381 mpz_set_ui (p2_mpz, p2);
382 mpz_gcdext (g_mpz, a1_mpz, a2_mpz, p1_mpz, p2_mpz);
383
384 mpz_mul (a1_mpz, a1_mpz, p1_mpz);
385 mpz_mod_ui (a1_mpz, a1_mpz, q);
386 ulong id2 = mpz_get_ui (a1_mpz);
387
388 mpz_mul (a2_mpz, a2_mpz, p2_mpz);
389 mpz_mod_ui (a2_mpz, a2_mpz, q);
390 ulong id1 = mpz_get_ui (a2_mpz);
391
392 mpz_clear (g_mpz);
393 mpz_clear (a2_mpz);
394 mpz_clear (a1_mpz);
395 mpz_clear (p2_mpz);
396 mpz_clear (p1_mpz);
397
398 ulong* G = (ulong*) malloc (n2 * sizeof (ulong));
399 ulong* P = (ulong*) malloc ((2*n2 - 1) * sizeof (ulong));
400 ulong* J = res2;
401
402 // -------------------------------------------------------------------------
403 // Step 1: compute polynomials G(X) and J(X)
404
405 // g_pow = g^(i-1), g_pow = g^(-i) at beginning of each iteration
406 ulong g_pow1 = g_inv1;
407 ulong g_pow2 = g_inv2;
408 ulong g_pow_inv1 = 1;
409 ulong g_pow_inv2 = 1;
410
411 // bias = (g-1)/2 mod p
412 ulong bias1 = (g1 - 1 + ((g1 & 1) ? 0 : p1)) / 2;
413 ulong bias2 = (g2 - 1 + ((g2 & 1) ? 0 : p2)) / 2;
414
415 // fudge = g^(i^2), fudge_inv = g^(-i^2) at each iteration
416 ulong fudge1 = 1;
417 ulong fudge2 = 1;
418 ulong fudge_inv1 = 1;
419 ulong fudge_inv2 = 1;
420
421 ulong i;
422 for (i = 0; i < n1; i++)
423 {
424 ulong prod1 = g1 * g_pow1;
425 ulong prod2 = g2 * g_pow2;
426
427 // quo = floor(prod / p)
428 // rem = prod % p
429 ulong quo1 = zn_mod_quotient (prod1, mod1);
430 ulong rem1 = prod1 - quo1 * p1;
431 ulong quo2 = zn_mod_quotient (prod2, mod2);
432 ulong rem2 = prod2 - quo2 * p2;
433
434 // h = h(g^i) / g^i mod p
435 ulong h1 = g_pow_inv1 * zn_mod_sub_slim (bias1, quo1, mod1);
436 h1 = zn_mod_reduce (h1, mod1);
437 ulong h2 = g_pow_inv2 * zn_mod_sub_slim (bias2, quo2, mod2);
438 h2 = zn_mod_reduce (h2, mod2);
439
440 // update g_pow and g_pow_inv for next iteration
441 g_pow1 = rem1;
442 g_pow_inv1 = zn_mod_reduce (g_pow_inv1 * g_inv1, mod1);
443 g_pow2 = rem2;
444 g_pow_inv2 = zn_mod_reduce (g_pow_inv2 * g_inv2, mod2);
445
446 // X^i coefficient of G(X) is g^(i^2) * h(g^i) / g^i
447 // (combine via CRT)
448 ulong Gval1 = zn_mod_reduce (h1 * fudge1, mod1);
449 Gval1 = zn_mod_mul (Gval1, id1, mod);
450 ulong Gval2 = zn_mod_reduce (h2 * fudge2, mod2);
451 Gval2 = zn_mod_mul (Gval2, id2, mod);
452 G[i] = zn_mod_add (Gval1, Gval2, mod);
453
454 // X^i coefficient of J(X) is g^(-i^2)
455 ulong Jval1 = zn_mod_mul (fudge_inv1, id1, mod);
456 ulong Jval2 = zn_mod_mul (fudge_inv2, id2, mod);
457 J[i] = zn_mod_add (Jval1, Jval2, mod);
458
459 // update fudge and fudge_inv for next iteration
460 fudge1 = zn_mod_reduce (fudge1 * g_pow1, mod1);
461 fudge1 = zn_mod_reduce (fudge1 * g_pow1, mod1);
462 fudge1 = zn_mod_reduce (fudge1 * g1, mod1);
463 fudge_inv1 = zn_mod_reduce (fudge_inv1 * g_pow_inv1, mod1);
464 fudge_inv1 = zn_mod_reduce (fudge_inv1 * g_pow_inv1, mod1);
465 fudge_inv1 = zn_mod_reduce (fudge_inv1 * g1, mod1);
466
467 fudge2 = zn_mod_reduce (fudge2 * g_pow2, mod2);
468 fudge2 = zn_mod_reduce (fudge2 * g_pow2, mod2);
469 fudge2 = zn_mod_reduce (fudge2 * g2, mod2);
470 fudge_inv2 = zn_mod_reduce (fudge_inv2 * g_pow_inv2, mod2);
471 fudge_inv2 = zn_mod_reduce (fudge_inv2 * g_pow_inv2, mod2);
472 fudge_inv2 = zn_mod_reduce (fudge_inv2 * g2, mod2);
473 }
474
475 // finished with p1, now finish the loop for p2
476 for (; i < n2; i++)
477 {
478 ulong prod2 = g2 * g_pow2;
479
480 // quo = floor(prod / p)
481 // rem = prod % p
482 ulong quo2 = zn_mod_quotient (prod2, mod2);
483 ulong rem2 = prod2 - quo2 * p2;
484
485 // h = h(g^i) / g^i mod p
486 ulong h2 = g_pow_inv2 * zn_mod_sub_slim (bias2, quo2, mod2);
487 h2 = zn_mod_reduce (h2, mod2);
488
489 // update g_pow and g_pow_inv for next iteration
490 g_pow2 = rem2;
491 g_pow_inv2 = zn_mod_reduce (g_pow_inv2 * g_inv2, mod2);
492
493 // X^i coefficient of G(X) is g^(i^2) * h(g^i) / g^i
494 // (combine via CRT)
495 ulong Gval2 = zn_mod_reduce (h2 * fudge2, mod2);
496 G[i] = zn_mod_mul (Gval2, id2, mod);
497
498 // X^i coefficient of J(X) is g^(-i^2)
499 J[i] = zn_mod_mul (fudge_inv2, id2, mod);
500
501 // update fudge and fudge_inv for next iteration
502 fudge2 = zn_mod_reduce (fudge2 * g_pow2, mod2);
503 fudge2 = zn_mod_reduce (fudge2 * g_pow2, mod2);
504 fudge2 = zn_mod_reduce (fudge2 * g2, mod2);
505 fudge_inv2 = zn_mod_reduce (fudge_inv2 * g_pow_inv2, mod2);
506 fudge_inv2 = zn_mod_reduce (fudge_inv2 * g_pow_inv2, mod2);
507 fudge_inv2 = zn_mod_reduce (fudge_inv2 * g2, mod2);
508 }
509
510 J[0] = 0;
511
512 // -------------------------------------------------------------------------
513 // Step 2: compute product P(X) = G(X) * J(X)
514
515 #if 0
516 zn_array_mul_fft_dft (P, J, n2, G, n2, 3, mod);
517 #else
518 zn_array_mul (P, J, n2, G, n2, mod);
519 #endif
520
521 // -------------------------------------------------------------------------
522 // Step 3: extract output from P(X), and verify result
523
524 if (res1)
525 res1[0] = 1;
526
527 // we will verify that \sum_{j=0}^{(p-3)/2} 4^j (2j+1) B(2j) = -2 mod p
528 ulong check_accum1 = 1;
529 ulong check_four_pow1 = 4 % p1;
530
531 // g_sqr = g^2
532 // g_sqr_inv = g^(-2)
533 ulong g_sqr1 = zn_mod_reduce (g1 * g1, mod1);
534 ulong g_sqr_inv1 = zn_mod_reduce (g_inv1 * g_inv1, mod1);
535
536 // make table with J[i] = (1 - g^(2i+2))(1 - g^(2i+4)) ... (1 - g^(p-3))
537 // for 0 <= i < (p-1)/2
538 ulong g_sqr_inv_pow1 = g_sqr_inv1;
539 J[n1-1] = 1;
540 for (i = 1; i < n1; i++)
541 {
542 J[n1-i-1] = zn_mod_reduce (J[n1-i] * (p1 + 1 - g_sqr_inv_pow1), mod1);
543 g_sqr_inv_pow1 = zn_mod_reduce (g_sqr_inv_pow1 * g_sqr_inv1, mod1);
544 }
545
546 // fudge = g^(i^2) at each iteration
547 fudge1 = g1;
548 // g_sqr_pow = g^(2i) at each iteration
549 ulong g_sqr_pow1 = g_sqr1;
550
551 // prod_inv = [(1 - g^(2i))(1 - g^(2i+2)) ... (1 - g^(p-3))]^(-1)
552 // at each iteration (todo: for i == 1, it's experimentally equal to -1/2
553 // mod p, need to prove this)
554 ulong prod_inv1 = p1 - 2;
555
556 for (i = 1; i < n1; i++)
557 {
558 ulong val = (i == (n1-1)) ? 0 : P[i + n1];
559 if (n1 & 1)
560 val = zn_mod_neg (val, mod);
561 val = zn_mod_add (val, G[i], mod);
562 val = zn_mod_add (val, P[i], mod);
563
564 // reduce it mod p1
565 val = zn_mod_reduce (val, mod1);
566
567 // multiply by 4 * i * g^(i^2)
568 val = zn_mod_reduce (val * fudge1, mod1);
569 val = zn_mod_reduce (val * (2*i), mod1);
570 val = zn_mod_add_slim (val, val, mod1);
571
572 // divide by (1 - g^(2i))
573 val = zn_mod_reduce (val * prod_inv1, mod1);
574 val = zn_mod_reduce (val * J[i], mod1);
575 prod_inv1 = zn_mod_reduce (prod_inv1 * (1 + p1 - g_sqr_pow1), mod1);
576
577 // store output coefficient if requested
578 if (res1)
579 res1[i] = val;
580
581 // store irregular index if requested
582 if (irregular1)
583 {
584 if (val == 0)
585 {
586 irregular1[0]++;
587 if (irregular1[0] >= irregular1_max)
588 {
589 printf ("too many irregular indices for p = %lu\n", p1);
590 abort ();
591 }
592 irregular1[irregular1[0]] = 2*i;
593 }
594 }
595
596 // update fudge and g_sqr_pow
597 g_sqr_pow1 = zn_mod_reduce (g_sqr_pow1 * g1, mod1);
598 fudge1 = zn_mod_reduce (fudge1 * g_sqr_pow1, mod1);
599 g_sqr_pow1 = zn_mod_reduce (g_sqr_pow1 * g1, mod1);
600
601 // update verification data
602 ulong check_term1 = zn_mod_reduce (check_four_pow1 * (2*i + 1), mod1);
603 check_term1 = zn_mod_reduce (check_term1 * val, mod1);
604 check_accum1 = zn_mod_add_slim (check_accum1, check_term1, mod1);
605 check_four_pow1 = zn_mod_add_slim (check_four_pow1, check_four_pow1, mod1);
606 check_four_pow1 = zn_mod_add_slim (check_four_pow1, check_four_pow1, mod1);
607 }
608
609 if (check_accum1 != p1-2)
610 {
611 printf ("bernoulli_dual failed correctness check for p1 = %lu\n", p1);
612 abort ();
613 }
614
615 // -------------------------------------------------------------------------
616 // Do step 3 again for the second prime
617
618 res2[0] = 1;
619
620 // we will verify that \sum_{j=0}^{(p-3)/2} 4^j (2j+1) B(2j) = -2 mod p
621 ulong check_accum2 = 1;
622 ulong check_four_pow2 = 4 % p2;
623
624 // g_sqr = g^2
625 // g_sqr_inv = g^(-2)
626 ulong g_sqr2 = zn_mod_reduce (g2 * g2, mod2);
627 ulong g_sqr_inv2 = zn_mod_reduce (g_inv2 * g_inv2, mod2);
628
629 // make table with J[i] = (1 - g^(2i+2))(1 - g^(2i+4)) ... (1 - g^(p-3))
630 // for 0 <= i < (p-1)/2
631 ulong g_sqr_inv_pow2 = g_sqr_inv2;
632 J[n2-1] = 1;
633 for (i = 1; i < n2; i++)
634 {
635 J[n2-i-1] = zn_mod_reduce (J[n2-i] * (p2 + 1 - g_sqr_inv_pow2), mod2);
636 g_sqr_inv_pow2 = zn_mod_reduce (g_sqr_inv_pow2 * g_sqr_inv2, mod2);
637 }
638
639 // fudge = g^(i^2) at each iteration
640 fudge2 = g2;
641 // g_sqr_pow = g^(2i) at each iteration
642 ulong g_sqr_pow2 = g_sqr2;
643
644 // prod_inv = [(1 - g^(2i))(1 - g^(2i+2)) ... (1 - g^(p-3))]^(-1)
645 // at each iteration (todo: for i == 1, it's experimentally equal to -1/2
646 // mod p, need to prove this)
647 ulong prod_inv2 = p2 - 2;
648
649 for (i = 1; i < n2; i++)
650 {
651 ulong val = (i == (n2-1)) ? 0 : P[i + n2];
652 if (n2 & 1)
653 val = zn_mod_neg (val, mod);
654 val = zn_mod_add (val, G[i], mod);
655 val = zn_mod_add (val, P[i], mod);
656
657 // reduce it mod p2
658 val = zn_mod_reduce (val, mod2);
659
660 // multiply by 4 * i * g^(i^2)
661 val = zn_mod_reduce (val * fudge2, mod2);
662 val = zn_mod_reduce (val * (2*i), mod2);
663 val = zn_mod_add_slim (val, val, mod2);
664
665 // divide by (1 - g^(2i))
666 val = zn_mod_reduce (val * prod_inv2, mod2);
667 val = zn_mod_reduce (val * J[i], mod2);
668 prod_inv2 = zn_mod_reduce (prod_inv2 * (1 + p2 - g_sqr_pow2), mod2);
669
670 // store output coefficient if requested
671 if (!own_res2)
672 res2[i] = val;
673
674 // store irregular index if requested
675 if (irregular2)
676 {
677 if (val == 0)
678 {
679 irregular2[0]++;
680 if (irregular2[0] >= irregular2_max)
681 {
682 printf ("too many irregular indices for p = %lu\n", p2);
683 abort ();
684 }
685 irregular2[irregular2[0]] = 2*i;
686 }
687 }
688
689 // update fudge and g_sqr_pow
690 g_sqr_pow2 = zn_mod_reduce (g_sqr_pow2 * g2, mod2);
691 fudge2 = zn_mod_reduce (fudge2 * g_sqr_pow2, mod2);
692 g_sqr_pow2 = zn_mod_reduce (g_sqr_pow2 * g2, mod2);
693
694 // update verification data
695 ulong check_term2 = zn_mod_reduce (check_four_pow2 * (2*i + 1), mod2);
696 check_term2 = zn_mod_reduce (check_term2 * val, mod2);
697 check_accum2 = zn_mod_add_slim (check_accum2, check_term2, mod2);
698 check_four_pow2 = zn_mod_add_slim (check_four_pow2, check_four_pow2, mod2);
699 check_four_pow2 = zn_mod_add_slim (check_four_pow2, check_four_pow2, mod2);
700 }
701
702 if (check_accum2 != p2-2)
703 {
704 printf ("bernoulli_dual failed correctness check for p2 = %lu\n", p2);
705 abort ();
706 }
707
708 if (own_res2)
709 free (res2);
710 free (P);
711 free (G);
712
713 zn_mod_clear (mod);
714 zn_mod_clear (mod2);
715 zn_mod_clear (mod1);
716 }
717
718
719
720 int
main(int argc,char * argv[])721 main (int argc, char* argv[])
722 {
723 if (argc == 2)
724 {
725 ulong i, p = atol (argv[1]);
726 ulong irregular[30];
727 bernoulli (NULL, irregular, 29, p);
728
729 printf ("irregular indices for p = %lu: ", p);
730 for (i = 1; i <= irregular[0]; i++)
731 printf ("%lu ", irregular[i]);
732 printf ("\n");
733 }
734 else if (argc == 3)
735 {
736 ulong i, p1 = atol (argv[1]), p2 = atol (argv[2]);
737 ulong irregular1[30];
738 ulong irregular2[30];
739
740 bernoulli_dual (NULL, irregular1, 29, p1, NULL, irregular2, 29, p2);
741
742 printf ("irregular indices for p = %lu: ", p1);
743 for (i = 1; i <= irregular1[0]; i++)
744 printf ("%lu ", irregular1[i]);
745 printf ("\n");
746
747 printf ("irregular indices for p = %lu: ", p2);
748 for (i = 1; i <= irregular2[0]; i++)
749 printf ("%lu ", irregular2[i]);
750 printf ("\n");
751 }
752 else
753 {
754 printf ("usage:\n");
755 printf ("\n");
756 printf (" bernoulli p\n");
757 printf (" prints irregular indices for p\n");
758 printf ("\n");
759 printf (" bernoulli p1 p2\n");
760 printf (" prints irregular indices for p1 and p2\n");
761 }
762
763 return 0;
764 }
765
766
767 // end of file ****************************************************************
768