1 // zeta().
2
3 // General includes.
4 #include "base/cl_sysdep.h"
5
6 // Specification.
7 #include "float/transcendental/cl_F_tran.h"
8
9
10 // Implementation.
11
12 #include "cln/lfloat.h"
13 #include "float/transcendental/cl_LF_tran.h"
14 #include "float/lfloat/cl_LF.h"
15 #include "cln/integer.h"
16 #include "cln/exception.h"
17 #include "base/cl_alloca.h"
18
19 namespace cln {
20
compute_zeta_exp(int s,uintC len)21 const cl_LF compute_zeta_exp (int s, uintC len)
22 {
23 // Method:
24 // zeta(s) = 1/(1-2^(1-s)) sum(n=0..infty, (-1)^n/(n+1)^s),
25 // with convergence acceleration through exp(x), and evaluated
26 // using the binary-splitting algorithm.
27 var uintC actuallen = len+2; // 2 guard digits
28 var uintC x = (uintC)(0.693148*intDsize*actuallen)+1;
29 var uintC N = (uintC)(2.718281828*x);
30 CL_ALLOCA_STACK;
31 var cl_pqd_series_term* args = (cl_pqd_series_term*) cl_alloca(N*sizeof(cl_pqd_series_term));
32 var uintC n;
33 for (n = 0; n < N; n++) {
34 if (n==0) {
35 init1(cl_I, args[n].p) (1);
36 init1(cl_I, args[n].q) (1);
37 } else {
38 init1(cl_I, args[n].p) (x);
39 init1(cl_I, args[n].q) (n);
40 }
41 init1(cl_I, args[n].d) (evenp(n)
42 ? expt_pos(n+1,s)
43 : -expt_pos(n+1,s));
44 }
45 var cl_LF result = eval_pqd_series(N,args,actuallen);
46 for (n = 0; n < N; n++) {
47 args[n].p.~cl_I();
48 args[n].q.~cl_I();
49 args[n].d.~cl_I();
50 }
51 result = shorten(result,len); // verkürzen und fertig
52 // Zum Schluss mit 2^(s-1)/(2^(s-1)-1) multiplizieren:
53 return scale_float(result,s-1) / (ash(1,s-1)-1);
54 }
55 // Bit complexity (N = len): O(log(N)^2*M(N)).
56
compute_zeta_cvz1(int s,uintC len)57 const cl_LF compute_zeta_cvz1 (int s, uintC len)
58 {
59 // Method:
60 // zeta(s) = 1/(1-2^(1-s)) sum(n=0..infty, (-1)^n/(n+1)^s),
61 // with Cohen-Villegas-Zagier convergence acceleration.
62 var uintC actuallen = len+2; // 2 guard digits
63 var uintC N = (uintC)(0.39321985*intDsize*actuallen)+1;
64 var cl_I fterm = 2*(cl_I)N*(cl_I)N;
65 var cl_I fsum = fterm;
66 var cl_LF gterm = cl_I_to_LF(fterm,actuallen);
67 var cl_LF gsum = gterm;
68 var uintC n;
69 // After n loops
70 // fterm = (N+n)!N/(2n+2)!(N-n-1)!*2^(2n+2), fsum = ... + fterm,
71 // gterm = S_n*fterm, gsum = ... + gterm.
72 for (n = 1; n < N; n++) {
73 fterm = exquopos(fterm*(2*(cl_I)(N-n)*(cl_I)(N+n)),(cl_I)(2*n+1)*(cl_I)(n+1));
74 fsum = fsum + fterm;
75 gterm = The(cl_LF)(gterm*(2*(cl_I)(N-n)*(cl_I)(N+n)))/((cl_I)(2*n+1)*(cl_I)(n+1));
76 if (evenp(n))
77 gterm = gterm + cl_I_to_LF(fterm,actuallen)/expt_pos(n+1,s);
78 else
79 gterm = gterm - cl_I_to_LF(fterm,actuallen)/expt_pos(n+1,s);
80 gsum = gsum + gterm;
81 }
82 var cl_LF result = gsum/cl_I_to_LF(1+fsum,actuallen);
83 result = shorten(result,len); // verkürzen und fertig
84 // Zum Schluss mit 2^(s-1)/(2^(s-1)-1) multiplizieren:
85 return scale_float(result,s-1) / (ash(1,s-1)-1);
86 }
87 // Bit complexity (N = len): O(N^2).
88
compute_zeta_cvz2(int s,uintC len)89 const cl_LF compute_zeta_cvz2 (int s, uintC len)
90 {
91 // Method:
92 // zeta(s) = 1/(1-2^(1-s)) sum(n=0..infty, (-1)^n/(n+1)^s),
93 // with Cohen-Villegas-Zagier convergence acceleration, and
94 // evaluated using the binary splitting algorithm with truncation.
95 var uintC actuallen = len+2; // 2 guard digits
96 var uintC N = (uintC)(0.39321985*intDsize*actuallen)+1;
97 struct rational_series_stream : cl_pqd_series_stream {
98 uintC n;
99 int s;
100 uintC N;
101 static cl_pqd_series_term computenext (cl_pqd_series_stream& thisss)
102 {
103 var rational_series_stream& thiss = (rational_series_stream&)thisss;
104 var uintC n = thiss.n;
105 var uintC s = thiss.s;
106 var uintC N = thiss.N;
107 var cl_pqd_series_term result;
108 result.p = 2*(cl_I)(N-n)*(cl_I)(N+n);
109 result.q = (cl_I)(2*n+1)*(cl_I)(n+1);
110 result.d = evenp(n) ? expt_pos(n+1,s) : -expt_pos(n+1,s);
111 thiss.n = n+1;
112 return result;
113 }
114 rational_series_stream (int s_, uintC N_)
115 : cl_pqd_series_stream (rational_series_stream::computenext),
116 n (0), s (s_), N (N_) {}
117 } series(s,N);
118 var cl_pqd_series_result<cl_I> sums;
119 eval_pqd_series_aux(N,series,sums,actuallen);
120 // Here we need U/(1+S) = V/D(Q+T).
121 var cl_LF result =
122 cl_I_to_LF(sums.V,actuallen) / The(cl_LF)(sums.D * cl_I_to_LF(sums.Q+sums.T,actuallen));
123 result = shorten(result,len); // verkürzen und fertig
124 // Zum Schluss mit 2^(s-1)/(2^(s-1)-1) multiplizieren:
125 return scale_float(result,s-1) / (ash(1,s-1)-1);
126 }
127 // Bit complexity (N = len): O(log(N)^2*M(N)).
128
129 // Timings of the above algorithm in seconds, on a P-4, 3GHz, running Linux.
130 // s 5 15
131 // N sum_exp sum_cvz1 sum_cvz2 sum_exp sum_cvz1 sum_cvz2
132 // 125 0.60 0.04 0.06 1.88 0.04 0.20
133 // 250 1.60 0.13 0.19 4.82 0.15 0.58
134 // 500 4.3 0.48 0.60 12.2 0.55 1.67
135 // 1000 11.0 1.87 1.63 31.7 2.11 4.60
136 // 2000 28.0 7.4 4.23 111 8.2 11.3
137 // 4000 70.2 30.6 10.6 50 44
138 // 8000 142 26.8 169 75
139 // asymp. FAST N^2 FAST FAST N^2 FAST
140 //
141 // s 35 75
142 // N sum_exp sum_cvz1 sum_cvz2 sum_exp sum_cvz1 sum_cvz2
143 // 125 4.70 0.05 0.53 11.3 0.07 1.35
144 // 250 12.5 0.19 1.62 28.7 0.25 3.74
145 // 500 31.3 0.69 4.40 70.2 0.96 10.2
146 // 1000 88.8 2.70 11.4 191 3.76 25.4
147 // 2000 10.9 28.9 15.6 64.3
148 // 4000 46 73 64.4 170
149 // 8000 215 178 295 397
150 // 16000 898 419 1290 972
151 // asymp. FAST N^2 FAST FAST N^2 FAST
152 //
153 // The break-even point between cvz1 and cvz2 seems to grow linearly with s.
154
155 // Timings of the above algorithm, on an i486 33 MHz, running Linux.
156 // s 5 15
157 // N sum_exp sum_cvz1 sum_cvz2 sum_exp sum_cvz1 sum_cvz2
158 // 10 2.04 0.09 0.17 8.0 0.11 0.49
159 // 25 8.6 0.30 0.76 30.6 0.37 2.36
160 // 50 25.1 0.92 2.49 91.1 1.15 7.9
161 // 100 2.97 8.46 3.75 24.5
162 // 250 16.7 36.5 21.7 108
163 // 500 64.2 106 85.3 295
164 // 1000 263 285 342 788
165 // asymp. FAST N^2 FAST FAST N^2 FAST
166 //
167 // The break-even point between cvz1 and cvz2 seems to grow linearly with s.
168
zeta(int s,uintC len)169 const cl_LF zeta (int s, uintC len)
170 {
171 if (!(s > 1))
172 throw runtime_exception("zeta(s) with illegal s<2.");
173 if (s==3)
174 return zeta3(len);
175 if (len < 220*(uintC)s)
176 return compute_zeta_cvz1(s,len);
177 else
178 return compute_zeta_cvz2(s,len);
179 }
180 // Bit complexity (N = len): O(log(N)^2*M(N)).
181
182 } // namespace cln
183