1 /*
2    bernoulli.c:  example program; computes irregular indices for a prime p
3 
4    Copyright (C) 2007, 2008, David Harvey
5 
6    This file is part of the zn_poly library (version 0.9).
7 
8    This program is free software: you can redistribute it and/or modify
9    it under the terms of the GNU General Public License as published by
10    the Free Software Foundation, either version 2 of the License, or
11    (at your option) version 3 of the License.
12 
13    This program is distributed in the hope that it will be useful,
14    but WITHOUT ANY WARRANTY; without even the implied warranty of
15    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
16    GNU General Public License for more details.
17 
18    You should have received a copy of the GNU General Public License
19    along with this program.  If not, see <http://www.gnu.org/licenses/>.
20 
21 */
22 
23 #include <stdio.h>
24 #include <gmp.h>
25 #include "zn_poly.h"
26 
27 
28 /*
29    Finds distinct prime factors of n, in the range k < p <= n.
30    Assumes that n does not have any prime factors p <= k.
31    Stores them in increasing order starting at res.
32    Returns number of factors written.
33 */
34 unsigned
prime_factors_helper(ulong * res,ulong k,ulong n)35 prime_factors_helper (ulong* res, ulong k, ulong n)
36 {
37    if (n == 1)
38       return 0;
39 
40    ulong i;
41    for (i = k + 1; i * i <= n; i++)
42    {
43       if (n % i == 0)
44       {
45          // found a factor
46          *res = i;
47          // remove that factor entirely
48          for (n /= i; n % i == 0; n /= i);
49          return prime_factors_helper (res + 1, i, n) + 1;
50       }
51    }
52    // no more factors
53    *res = n;
54    return 1;
55 }
56 
57 
58 /*
59    Finds distinct prime factors of n.
60    Writes them to res in increasing order, returns number of factors found.
61 */
62 unsigned
prime_factors(ulong * res,ulong n)63 prime_factors (ulong* res, ulong n)
64 {
65    return prime_factors_helper (res, 1, n);
66 }
67 
68 
69 /*
70    A really dumb primality test.
71 */
72 int
is_prime(ulong n)73 is_prime (ulong n)
74 {
75    ulong i;
76    for (i = 2; i * i <= n; i++)
77       if (n % i == 0)
78          return 0;
79    return 1;
80 }
81 
82 
83 /*
84    Assuming p >= 3 is prime, find the minimum primitive root mod p.
85    Stores it at g, and stores its inverse mod p at g_inv.
86 
87    Note: this is probably a terrible algorithm, but it's fine compared to the
88    running time of bernoulli() for large p.
89 */
90 void
primitive_root(ulong * g,ulong * g_inv,ulong p)91 primitive_root (ulong* g, ulong* g_inv, ulong p)
92 {
93    zn_mod_t mod;
94    zn_mod_init (mod, p);
95 
96    // find prime factors of p-1
97    ulong factors[200];
98    unsigned num_factors, i;
99    num_factors = prime_factors (factors, p - 1);
100 
101    // loop through candidates
102    ulong x;
103    for (x = 2; ; x++)
104    {
105       ZNP_ASSERT (x < p);
106 
107       // it's a generator if x^((p-1)/q) != 1 for all primes q dividing p-1
108       int good = 1;
109       for (i = 0; i < num_factors && good; i++)
110          good = good && (zn_mod_pow (x, (p - 1) / factors[i], mod) != 1);
111 
112       if (good)
113       {
114          *g = x;
115          *g_inv = zn_mod_pow (x, p - 2, mod);
116          zn_mod_clear (mod);
117          return;
118       }
119    }
120 }
121 
122 
123 /*
124    Computes bernoulli numbers B(0), B(2), ..., B((p-3)/2) mod p.
125 
126    If res != NULL, it stores the result in res, which needs to be of length
127    (p-1)/2.
128 
129    If irregular != NULL, it stores the number of irregular indices at
130    irregular[0], follows by the irregular indices. The largest permitted
131    index of irregularity is irregular_max; if there are more indices than that
132    to store, the program will abort.
133 
134    p must be a prime >= 3
135    p must be < 2^(ULONG_BITS/2)
136 */
137 void
bernoulli(ulong * res,ulong * irregular,unsigned irregular_max,ulong p)138 bernoulli (ulong* res, ulong* irregular, unsigned irregular_max, ulong p)
139 {
140    ZNP_ASSERT (p < (1UL << (ULONG_BITS/2)));
141 
142    ulong g, g_inv;
143    primitive_root (&g, &g_inv, p);
144 
145    ulong n = (p-1) / 2;
146 
147    zn_mod_t mod;
148    zn_mod_init (mod, p);
149 
150    // allocate our own res if the caller didn't
151    int own_res = 0;
152    if (!res)
153    {
154       res = (ulong*) malloc (n * sizeof (ulong));
155       own_res = 1;
156    }
157 
158    if (irregular)
159       irregular[0] = 0;
160 
161    ulong* G = (ulong*) malloc (n * sizeof (ulong));
162    ulong* P = (ulong*) malloc ((2*n - 1) * sizeof (ulong));
163    ulong* J = res;
164 
165    // -------------------------------------------------------------------------
166    // Step 1: compute polynomials G(X) and J(X)
167 
168    // g_pow = g^(i-1), g_pow = g^(-i) at beginning of each iteration
169    ulong g_pow = g_inv;
170    ulong g_pow_inv = 1;
171 
172    // bias = (g-1)/2 mod p
173    ulong bias = (g - 1 + ((g & 1) ? 0 : p)) / 2;
174 
175    // fudge = g^(i^2), fudge_inv = g^(-i^2) at each iteration
176    ulong fudge = 1;
177    ulong fudge_inv = 1;
178 
179    ulong i;
180    for (i = 0; i < n; i++)
181    {
182       ulong prod = g * g_pow;
183 
184       // quo = floor(prod / p)
185       // rem = prod % p
186       ulong quo = zn_mod_quotient (prod, mod);
187       ulong rem = prod - quo * p;
188 
189       // h = h(g^i) / g^i mod p
190       ulong h = g_pow_inv * zn_mod_sub_slim (bias, quo, mod);
191       h = zn_mod_reduce (h, mod);
192 
193       // update g_pow and g_pow_inv for next iteration
194       g_pow = rem;
195       g_pow_inv = zn_mod_reduce (g_pow_inv * g_inv, mod);
196 
197       // X^i coefficient of G(X) is g^(i^2) * h(g^i) / g^i
198       G[i] = zn_mod_reduce (h * fudge, mod);
199 
200       // X^i coefficient of J(X) is g^(-i^2)
201       J[i] = fudge_inv;
202 
203       // update fudge and fudge_inv for next iteration
204       fudge = zn_mod_reduce (fudge * g_pow, mod);
205       fudge = zn_mod_reduce (fudge * g_pow, mod);
206       fudge = zn_mod_reduce (fudge * g, mod);
207       fudge_inv = zn_mod_reduce (fudge_inv * g_pow_inv, mod);
208       fudge_inv = zn_mod_reduce (fudge_inv * g_pow_inv, mod);
209       fudge_inv = zn_mod_reduce (fudge_inv * g, mod);
210    }
211 
212    J[0] = 0;
213 
214    // -------------------------------------------------------------------------
215    // Step 2: compute product P(X) = G(X) * J(X)
216 
217    zn_array_mul (P, J, n, G, n, mod);
218 
219    // -------------------------------------------------------------------------
220    // Step 3: extract output from P(X), and verify result
221 
222    res[0] = 1;
223 
224    // we will verify that \sum_{j=0}^{(p-3)/2} 4^j (2j+1) B(2j) = -2 mod p
225    ulong check_accum = 1;
226    ulong check_four_pow = 4 % p;
227 
228    // g_sqr = g^2
229    // g_sqr_inv = g^(-2)
230    ulong g_sqr = zn_mod_reduce (g * g, mod);
231    ulong g_sqr_inv = zn_mod_reduce (g_inv * g_inv, mod);
232 
233    // make table with J[i] = (1 - g^(2i+2))(1 - g^(2i+4)) ... (1 - g^(p-3))
234    // for 0 <= i < (p-1)/2
235    ulong g_sqr_inv_pow = g_sqr_inv;
236    J[n-1] = 1;
237    for (i = 1; i < n; i++)
238    {
239       J[n-i-1] = zn_mod_reduce (J[n-i] * (p + 1 - g_sqr_inv_pow), mod);
240       g_sqr_inv_pow = zn_mod_reduce (g_sqr_inv_pow * g_sqr_inv, mod);
241    }
242 
243    // fudge = g^(i^2) at each iteration
244    fudge = g;
245    // g_sqr_pow = g^(2i) at each iteration
246    ulong g_sqr_pow = g_sqr;
247 
248    // prod_inv = [(1 - g^(2i))(1 - g^(2i+2)) ... (1 - g^(p-3))]^(-1)
249    // at each iteration (todo: for i == 1, it's experimentally equal to -1/2
250    // mod p, need to prove this)
251    ulong prod_inv = p - 2;
252 
253    for (i = 1; i < n; i++)
254    {
255       ulong val = (i == (n-1)) ? 0 : P[i + n];
256       if (n & 1)
257          val = zn_mod_neg (val, mod);
258       val = zn_mod_add_slim (val, G[i], mod);
259       val = zn_mod_add_slim (val, P[i], mod);
260 
261       // multiply by 4 * i * g^(i^2)
262       val = zn_mod_reduce (val * fudge, mod);
263       val = zn_mod_reduce (val * (2*i), mod);
264       val = zn_mod_add_slim (val, val, mod);
265 
266       // divide by (1 - g^(2i))
267       val = zn_mod_reduce (val * prod_inv, mod);
268       val = zn_mod_reduce (val * J[i], mod);
269       prod_inv = zn_mod_reduce (prod_inv * (1 + p - g_sqr_pow), mod);
270 
271       // store output coefficient if requested
272       if (!own_res)
273          res[i] = val;
274 
275       // store irregular index if requested
276       if (irregular)
277       {
278          if (val == 0)
279          {
280             irregular[0]++;
281             if (irregular[0] >= irregular_max)
282             {
283                printf ("too many irregular indices for p = %lu\n", p);
284                abort ();
285             }
286             irregular[irregular[0]] = 2*i;
287          }
288       }
289 
290       // update fudge and g_sqr_pow
291       g_sqr_pow = zn_mod_reduce (g_sqr_pow * g, mod);
292       fudge = zn_mod_reduce (fudge * g_sqr_pow, mod);
293       g_sqr_pow = zn_mod_reduce (g_sqr_pow * g, mod);
294 
295       // update verification data
296       ulong check_term = zn_mod_reduce (check_four_pow * (2*i + 1), mod);
297       check_term = zn_mod_reduce (check_term * val, mod);
298       check_accum = zn_mod_add_slim (check_accum, check_term, mod);
299       check_four_pow = zn_mod_add_slim (check_four_pow, check_four_pow, mod);
300       check_four_pow = zn_mod_add_slim (check_four_pow, check_four_pow, mod);
301    }
302 
303    if (check_accum != p-2)
304    {
305       printf ("bernoulli failed correctness check for p = %lu\n", p);
306       abort ();
307    }
308 
309    if (own_res)
310       free (res);
311 
312    free (P);
313    free (G);
314    zn_mod_clear (mod);
315 }
316 
317 
318 
319 /*
320    Same as bernoulli(), but handles two primes simultaneously.
321 
322    p1 and p2 must be distinct primes >= 3.
323 */
324 void
bernoulli_dual(ulong * res1,ulong * irregular1,unsigned irregular1_max,ulong p1,ulong * res2,ulong * irregular2,unsigned irregular2_max,ulong p2)325 bernoulli_dual(ulong* res1, ulong* irregular1, unsigned irregular1_max, ulong p1,
326                ulong* res2, ulong* irregular2, unsigned irregular2_max, ulong p2)
327 {
328    ZNP_ASSERT (p1 < (1UL << (ULONG_BITS/2)));
329    ZNP_ASSERT (p2 < (1UL << (ULONG_BITS/2)));
330    ZNP_ASSERT (p1 != p2);
331 
332    // swap them to make p1 < p2
333    if (p1 > p2)
334    {
335       { ulong temp = p2; p2 = p1; p1 = temp; }
336       { ulong* temp = res1; res1 = res2; res2 = temp; }
337       { ulong* temp = irregular1; irregular1 = irregular2; irregular2 = temp; }
338       { unsigned temp = irregular1_max; irregular1_max = irregular2_max; irregular2_max = temp; }
339    }
340 
341    ulong g1, g_inv1;
342    ulong g2, g_inv2;
343    primitive_root (&g1, &g_inv1, p1);
344    primitive_root (&g2, &g_inv2, p2);
345 
346    ulong n1 = (p1-1) / 2;
347    ulong n2 = (p2-1) / 2;
348 
349    zn_mod_t mod1, mod2;
350    zn_mod_init (mod1, p1);
351    zn_mod_init (mod2, p2);
352 
353    ulong q = p1 * p2;
354    zn_mod_t mod;
355    zn_mod_init (mod, q);
356 
357    // allocate our own res2 if the caller didn't
358    int own_res2 = 0;
359    if (!res2)
360    {
361       res2 = (ulong*) malloc (n2 * sizeof (ulong));
362       own_res2 = 1;
363    }
364 
365    if (irregular1)
366       irregular1[0] = 0;
367    if (irregular2)
368       irregular2[0] = 0;
369 
370    // find idempotents to CRT modulo p1 and p2, i.e.
371    // id1 = 1 mod p1, id1 = 0 mod p2
372    // id2 = 0 mod p1, id2 = 1 mod p2
373    mpz_t p1_mpz, p2_mpz, a1_mpz, a2_mpz, g_mpz;
374    mpz_init (p1_mpz);
375    mpz_init (p2_mpz);
376    mpz_init (a1_mpz);
377    mpz_init (a2_mpz);
378    mpz_init (g_mpz);
379 
380    mpz_set_ui (p1_mpz, p1);
381    mpz_set_ui (p2_mpz, p2);
382    mpz_gcdext (g_mpz, a1_mpz, a2_mpz, p1_mpz, p2_mpz);
383 
384    mpz_mul (a1_mpz, a1_mpz, p1_mpz);
385    mpz_mod_ui (a1_mpz, a1_mpz, q);
386    ulong id2 = mpz_get_ui (a1_mpz);
387 
388    mpz_mul (a2_mpz, a2_mpz, p2_mpz);
389    mpz_mod_ui (a2_mpz, a2_mpz, q);
390    ulong id1 = mpz_get_ui (a2_mpz);
391 
392    mpz_clear (g_mpz);
393    mpz_clear (a2_mpz);
394    mpz_clear (a1_mpz);
395    mpz_clear (p2_mpz);
396    mpz_clear (p1_mpz);
397 
398    ulong* G = (ulong*) malloc (n2 * sizeof (ulong));
399    ulong* P = (ulong*) malloc ((2*n2 - 1) * sizeof (ulong));
400    ulong* J = res2;
401 
402    // -------------------------------------------------------------------------
403    // Step 1: compute polynomials G(X) and J(X)
404 
405    // g_pow = g^(i-1), g_pow = g^(-i) at beginning of each iteration
406    ulong g_pow1 = g_inv1;
407    ulong g_pow2 = g_inv2;
408    ulong g_pow_inv1 = 1;
409    ulong g_pow_inv2 = 1;
410 
411    // bias = (g-1)/2 mod p
412    ulong bias1 = (g1 - 1 + ((g1 & 1) ? 0 : p1)) / 2;
413    ulong bias2 = (g2 - 1 + ((g2 & 1) ? 0 : p2)) / 2;
414 
415    // fudge = g^(i^2), fudge_inv = g^(-i^2) at each iteration
416    ulong fudge1 = 1;
417    ulong fudge2 = 1;
418    ulong fudge_inv1 = 1;
419    ulong fudge_inv2 = 1;
420 
421    ulong i;
422    for (i = 0; i < n1; i++)
423    {
424       ulong prod1 = g1 * g_pow1;
425       ulong prod2 = g2 * g_pow2;
426 
427       // quo = floor(prod / p)
428       // rem = prod % p
429       ulong quo1 = zn_mod_quotient (prod1, mod1);
430       ulong rem1 = prod1 - quo1 * p1;
431       ulong quo2 = zn_mod_quotient (prod2, mod2);
432       ulong rem2 = prod2 - quo2 * p2;
433 
434       // h = h(g^i) / g^i mod p
435       ulong h1 = g_pow_inv1 * zn_mod_sub_slim (bias1, quo1, mod1);
436       h1 = zn_mod_reduce (h1, mod1);
437       ulong h2 = g_pow_inv2 * zn_mod_sub_slim (bias2, quo2, mod2);
438       h2 = zn_mod_reduce (h2, mod2);
439 
440       // update g_pow and g_pow_inv for next iteration
441       g_pow1 = rem1;
442       g_pow_inv1 = zn_mod_reduce (g_pow_inv1 * g_inv1, mod1);
443       g_pow2 = rem2;
444       g_pow_inv2 = zn_mod_reduce (g_pow_inv2 * g_inv2, mod2);
445 
446       // X^i coefficient of G(X) is g^(i^2) * h(g^i) / g^i
447       // (combine via CRT)
448       ulong Gval1 = zn_mod_reduce (h1 * fudge1, mod1);
449       Gval1 = zn_mod_mul (Gval1, id1, mod);
450       ulong Gval2 = zn_mod_reduce (h2 * fudge2, mod2);
451       Gval2 = zn_mod_mul (Gval2, id2, mod);
452       G[i] = zn_mod_add (Gval1, Gval2, mod);
453 
454       // X^i coefficient of J(X) is g^(-i^2)
455       ulong Jval1 = zn_mod_mul (fudge_inv1, id1, mod);
456       ulong Jval2 = zn_mod_mul (fudge_inv2, id2, mod);
457       J[i] = zn_mod_add (Jval1, Jval2, mod);
458 
459       // update fudge and fudge_inv for next iteration
460       fudge1 = zn_mod_reduce (fudge1 * g_pow1, mod1);
461       fudge1 = zn_mod_reduce (fudge1 * g_pow1, mod1);
462       fudge1 = zn_mod_reduce (fudge1 * g1, mod1);
463       fudge_inv1 = zn_mod_reduce (fudge_inv1 * g_pow_inv1, mod1);
464       fudge_inv1 = zn_mod_reduce (fudge_inv1 * g_pow_inv1, mod1);
465       fudge_inv1 = zn_mod_reduce (fudge_inv1 * g1, mod1);
466 
467       fudge2 = zn_mod_reduce (fudge2 * g_pow2, mod2);
468       fudge2 = zn_mod_reduce (fudge2 * g_pow2, mod2);
469       fudge2 = zn_mod_reduce (fudge2 * g2, mod2);
470       fudge_inv2 = zn_mod_reduce (fudge_inv2 * g_pow_inv2, mod2);
471       fudge_inv2 = zn_mod_reduce (fudge_inv2 * g_pow_inv2, mod2);
472       fudge_inv2 = zn_mod_reduce (fudge_inv2 * g2, mod2);
473    }
474 
475    // finished with p1, now finish the loop for p2
476    for (; i < n2; i++)
477    {
478       ulong prod2 = g2 * g_pow2;
479 
480       // quo = floor(prod / p)
481       // rem = prod % p
482       ulong quo2 = zn_mod_quotient (prod2, mod2);
483       ulong rem2 = prod2 - quo2 * p2;
484 
485       // h = h(g^i) / g^i mod p
486       ulong h2 = g_pow_inv2 * zn_mod_sub_slim (bias2, quo2, mod2);
487       h2 = zn_mod_reduce (h2, mod2);
488 
489       // update g_pow and g_pow_inv for next iteration
490       g_pow2 = rem2;
491       g_pow_inv2 = zn_mod_reduce (g_pow_inv2 * g_inv2, mod2);
492 
493       // X^i coefficient of G(X) is g^(i^2) * h(g^i) / g^i
494       // (combine via CRT)
495       ulong Gval2 = zn_mod_reduce (h2 * fudge2, mod2);
496       G[i] = zn_mod_mul (Gval2, id2, mod);
497 
498       // X^i coefficient of J(X) is g^(-i^2)
499       J[i] = zn_mod_mul (fudge_inv2, id2, mod);
500 
501       // update fudge and fudge_inv for next iteration
502       fudge2 = zn_mod_reduce (fudge2 * g_pow2, mod2);
503       fudge2 = zn_mod_reduce (fudge2 * g_pow2, mod2);
504       fudge2 = zn_mod_reduce (fudge2 * g2, mod2);
505       fudge_inv2 = zn_mod_reduce (fudge_inv2 * g_pow_inv2, mod2);
506       fudge_inv2 = zn_mod_reduce (fudge_inv2 * g_pow_inv2, mod2);
507       fudge_inv2 = zn_mod_reduce (fudge_inv2 * g2, mod2);
508    }
509 
510    J[0] = 0;
511 
512    // -------------------------------------------------------------------------
513    // Step 2: compute product P(X) = G(X) * J(X)
514 
515 #if 0
516    zn_array_mul_fft_dft (P, J, n2, G, n2, 3, mod);
517 #else
518    zn_array_mul (P, J, n2, G, n2, mod);
519 #endif
520 
521    // -------------------------------------------------------------------------
522    // Step 3: extract output from P(X), and verify result
523 
524    if (res1)
525       res1[0] = 1;
526 
527    // we will verify that \sum_{j=0}^{(p-3)/2} 4^j (2j+1) B(2j) = -2 mod p
528    ulong check_accum1 = 1;
529    ulong check_four_pow1 = 4 % p1;
530 
531    // g_sqr = g^2
532    // g_sqr_inv = g^(-2)
533    ulong g_sqr1 = zn_mod_reduce (g1 * g1, mod1);
534    ulong g_sqr_inv1 = zn_mod_reduce (g_inv1 * g_inv1, mod1);
535 
536    // make table with J[i] = (1 - g^(2i+2))(1 - g^(2i+4)) ... (1 - g^(p-3))
537    // for 0 <= i < (p-1)/2
538    ulong g_sqr_inv_pow1 = g_sqr_inv1;
539    J[n1-1] = 1;
540    for (i = 1; i < n1; i++)
541    {
542       J[n1-i-1] = zn_mod_reduce (J[n1-i] * (p1 + 1 - g_sqr_inv_pow1), mod1);
543       g_sqr_inv_pow1 = zn_mod_reduce (g_sqr_inv_pow1 * g_sqr_inv1, mod1);
544    }
545 
546    // fudge = g^(i^2) at each iteration
547    fudge1 = g1;
548    // g_sqr_pow = g^(2i) at each iteration
549    ulong g_sqr_pow1 = g_sqr1;
550 
551    // prod_inv = [(1 - g^(2i))(1 - g^(2i+2)) ... (1 - g^(p-3))]^(-1)
552    // at each iteration (todo: for i == 1, it's experimentally equal to -1/2
553    // mod p, need to prove this)
554    ulong prod_inv1 = p1 - 2;
555 
556    for (i = 1; i < n1; i++)
557    {
558       ulong val = (i == (n1-1)) ? 0 : P[i + n1];
559       if (n1 & 1)
560          val = zn_mod_neg (val, mod);
561       val = zn_mod_add (val, G[i], mod);
562       val = zn_mod_add (val, P[i], mod);
563 
564       // reduce it mod p1
565       val = zn_mod_reduce (val, mod1);
566 
567       // multiply by 4 * i * g^(i^2)
568       val = zn_mod_reduce (val * fudge1, mod1);
569       val = zn_mod_reduce (val * (2*i), mod1);
570       val = zn_mod_add_slim (val, val, mod1);
571 
572       // divide by (1 - g^(2i))
573       val = zn_mod_reduce (val * prod_inv1, mod1);
574       val = zn_mod_reduce (val * J[i], mod1);
575       prod_inv1 = zn_mod_reduce (prod_inv1 * (1 + p1 - g_sqr_pow1), mod1);
576 
577       // store output coefficient if requested
578       if (res1)
579          res1[i] = val;
580 
581       // store irregular index if requested
582       if (irregular1)
583       {
584          if (val == 0)
585          {
586             irregular1[0]++;
587             if (irregular1[0] >= irregular1_max)
588             {
589                printf ("too many irregular indices for p = %lu\n", p1);
590                abort ();
591             }
592             irregular1[irregular1[0]] = 2*i;
593          }
594       }
595 
596       // update fudge and g_sqr_pow
597       g_sqr_pow1 = zn_mod_reduce (g_sqr_pow1 * g1, mod1);
598       fudge1 = zn_mod_reduce (fudge1 * g_sqr_pow1, mod1);
599       g_sqr_pow1 = zn_mod_reduce (g_sqr_pow1 * g1, mod1);
600 
601       // update verification data
602       ulong check_term1 = zn_mod_reduce (check_four_pow1 * (2*i + 1), mod1);
603       check_term1 = zn_mod_reduce (check_term1 * val, mod1);
604       check_accum1 = zn_mod_add_slim (check_accum1, check_term1, mod1);
605       check_four_pow1 = zn_mod_add_slim (check_four_pow1, check_four_pow1, mod1);
606       check_four_pow1 = zn_mod_add_slim (check_four_pow1, check_four_pow1, mod1);
607    }
608 
609    if (check_accum1 != p1-2)
610    {
611       printf ("bernoulli_dual failed correctness check for p1 = %lu\n", p1);
612       abort ();
613    }
614 
615    // -------------------------------------------------------------------------
616    // Do step 3 again for the second prime
617 
618    res2[0] = 1;
619 
620    // we will verify that \sum_{j=0}^{(p-3)/2} 4^j (2j+1) B(2j) = -2 mod p
621    ulong check_accum2 = 1;
622    ulong check_four_pow2 = 4 % p2;
623 
624    // g_sqr = g^2
625    // g_sqr_inv = g^(-2)
626    ulong g_sqr2 = zn_mod_reduce (g2 * g2, mod2);
627    ulong g_sqr_inv2 = zn_mod_reduce (g_inv2 * g_inv2, mod2);
628 
629    // make table with J[i] = (1 - g^(2i+2))(1 - g^(2i+4)) ... (1 - g^(p-3))
630    // for 0 <= i < (p-1)/2
631    ulong g_sqr_inv_pow2 = g_sqr_inv2;
632    J[n2-1] = 1;
633    for (i = 1; i < n2; i++)
634    {
635       J[n2-i-1] = zn_mod_reduce (J[n2-i] * (p2 + 1 - g_sqr_inv_pow2), mod2);
636       g_sqr_inv_pow2 = zn_mod_reduce (g_sqr_inv_pow2 * g_sqr_inv2, mod2);
637    }
638 
639    // fudge = g^(i^2) at each iteration
640    fudge2 = g2;
641    // g_sqr_pow = g^(2i) at each iteration
642    ulong g_sqr_pow2 = g_sqr2;
643 
644    // prod_inv = [(1 - g^(2i))(1 - g^(2i+2)) ... (1 - g^(p-3))]^(-1)
645    // at each iteration (todo: for i == 1, it's experimentally equal to -1/2
646    // mod p, need to prove this)
647    ulong prod_inv2 = p2 - 2;
648 
649    for (i = 1; i < n2; i++)
650    {
651       ulong val = (i == (n2-1)) ? 0 : P[i + n2];
652       if (n2 & 1)
653          val = zn_mod_neg (val, mod);
654       val = zn_mod_add (val, G[i], mod);
655       val = zn_mod_add (val, P[i], mod);
656 
657       // reduce it mod p2
658       val = zn_mod_reduce (val, mod2);
659 
660       // multiply by 4 * i * g^(i^2)
661       val = zn_mod_reduce (val * fudge2, mod2);
662       val = zn_mod_reduce (val * (2*i), mod2);
663       val = zn_mod_add_slim (val, val, mod2);
664 
665       // divide by (1 - g^(2i))
666       val = zn_mod_reduce (val * prod_inv2, mod2);
667       val = zn_mod_reduce (val * J[i], mod2);
668       prod_inv2 = zn_mod_reduce (prod_inv2 * (1 + p2 - g_sqr_pow2), mod2);
669 
670       // store output coefficient if requested
671       if (!own_res2)
672          res2[i] = val;
673 
674       // store irregular index if requested
675       if (irregular2)
676       {
677          if (val == 0)
678          {
679             irregular2[0]++;
680             if (irregular2[0] >= irregular2_max)
681             {
682                printf ("too many irregular indices for p = %lu\n", p2);
683                abort ();
684             }
685             irregular2[irregular2[0]] = 2*i;
686          }
687       }
688 
689       // update fudge and g_sqr_pow
690       g_sqr_pow2 = zn_mod_reduce (g_sqr_pow2 * g2, mod2);
691       fudge2 = zn_mod_reduce (fudge2 * g_sqr_pow2, mod2);
692       g_sqr_pow2 = zn_mod_reduce (g_sqr_pow2 * g2, mod2);
693 
694       // update verification data
695       ulong check_term2 = zn_mod_reduce (check_four_pow2 * (2*i + 1), mod2);
696       check_term2 = zn_mod_reduce (check_term2 * val, mod2);
697       check_accum2 = zn_mod_add_slim (check_accum2, check_term2, mod2);
698       check_four_pow2 = zn_mod_add_slim (check_four_pow2, check_four_pow2, mod2);
699       check_four_pow2 = zn_mod_add_slim (check_four_pow2, check_four_pow2, mod2);
700    }
701 
702    if (check_accum2 != p2-2)
703    {
704       printf ("bernoulli_dual failed correctness check for p2 = %lu\n", p2);
705       abort ();
706    }
707 
708    if (own_res2)
709       free (res2);
710    free (P);
711    free (G);
712 
713    zn_mod_clear (mod);
714    zn_mod_clear (mod2);
715    zn_mod_clear (mod1);
716 }
717 
718 
719 
720 int
main(int argc,char * argv[])721 main (int argc, char* argv[])
722 {
723    if (argc == 2)
724    {
725       ulong i, p = atol (argv[1]);
726       ulong irregular[30];
727       bernoulli (NULL, irregular, 29, p);
728 
729       printf ("irregular indices for p = %lu: ", p);
730       for (i = 1; i <= irregular[0]; i++)
731          printf ("%lu ", irregular[i]);
732       printf ("\n");
733    }
734    else if (argc == 3)
735    {
736       ulong i, p1 = atol (argv[1]), p2 = atol (argv[2]);
737       ulong irregular1[30];
738       ulong irregular2[30];
739 
740       bernoulli_dual (NULL, irregular1, 29, p1, NULL, irregular2, 29, p2);
741 
742       printf ("irregular indices for p = %lu: ", p1);
743       for (i = 1; i <= irregular1[0]; i++)
744          printf ("%lu ", irregular1[i]);
745       printf ("\n");
746 
747       printf ("irregular indices for p = %lu: ", p2);
748       for (i = 1; i <= irregular2[0]; i++)
749          printf ("%lu ", irregular2[i]);
750       printf ("\n");
751    }
752    else
753    {
754       printf ("usage:\n");
755       printf ("\n");
756       printf ("  bernoulli p\n");
757       printf ("    prints irregular indices for p\n");
758       printf ("\n");
759       printf ("  bernoulli p1 p2\n");
760       printf ("    prints irregular indices for p1 and p2\n");
761    }
762 
763    return 0;
764 }
765 
766 
767 // end of file ****************************************************************
768