1 // zeta().
2 
3 // General includes.
4 #include "base/cl_sysdep.h"
5 
6 // Specification.
7 #include "float/transcendental/cl_F_tran.h"
8 
9 
10 // Implementation.
11 
12 #include "cln/lfloat.h"
13 #include "float/transcendental/cl_LF_tran.h"
14 #include "float/lfloat/cl_LF.h"
15 #include "cln/integer.h"
16 #include "cln/exception.h"
17 #include "base/cl_alloca.h"
18 
19 namespace cln {
20 
compute_zeta_exp(int s,uintC len)21 const cl_LF compute_zeta_exp (int s, uintC len)
22 {
23 	// Method:
24 	// zeta(s) = 1/(1-2^(1-s)) sum(n=0..infty, (-1)^n/(n+1)^s),
25 	// with convergence acceleration through exp(x), and evaluated
26 	// using the binary-splitting algorithm.
27 	var uintC actuallen = len+2; // 2 guard digits
28 	var uintC x = (uintC)(0.693148*intDsize*actuallen)+1;
29 	var uintC N = (uintC)(2.718281828*x);
30 	CL_ALLOCA_STACK;
31 	var cl_pqd_series_term* args = (cl_pqd_series_term*) cl_alloca(N*sizeof(cl_pqd_series_term));
32 	var uintC n;
33 	for (n = 0; n < N; n++) {
34 		if (n==0) {
35 			init1(cl_I, args[n].p) (1);
36 			init1(cl_I, args[n].q) (1);
37 		} else {
38 			init1(cl_I, args[n].p) (x);
39 			init1(cl_I, args[n].q) (n);
40 		}
41 		init1(cl_I, args[n].d) (evenp(n)
42 		                        ? expt_pos(n+1,s)
43 		                        : -expt_pos(n+1,s));
44 	}
45 	var cl_LF result = eval_pqd_series(N,args,actuallen);
46 	for (n = 0; n < N; n++) {
47 		args[n].p.~cl_I();
48 		args[n].q.~cl_I();
49 		args[n].d.~cl_I();
50 	}
51 	result = shorten(result,len); // verkürzen und fertig
52 	// Zum Schluss mit 2^(s-1)/(2^(s-1)-1) multiplizieren:
53 	return scale_float(result,s-1) / (ash(1,s-1)-1);
54 }
55 // Bit complexity (N = len): O(log(N)^2*M(N)).
56 
compute_zeta_cvz1(int s,uintC len)57 const cl_LF compute_zeta_cvz1 (int s, uintC len)
58 {
59 	// Method:
60 	// zeta(s) = 1/(1-2^(1-s)) sum(n=0..infty, (-1)^n/(n+1)^s),
61 	// with Cohen-Villegas-Zagier convergence acceleration.
62 	var uintC actuallen = len+2; // 2 guard digits
63 	var uintC N = (uintC)(0.39321985*intDsize*actuallen)+1;
64 	var cl_I fterm = 2*(cl_I)N*(cl_I)N;
65 	var cl_I fsum = fterm;
66 	var cl_LF gterm = cl_I_to_LF(fterm,actuallen);
67 	var cl_LF gsum = gterm;
68 	var uintC n;
69 	// After n loops
70 	//   fterm = (N+n)!N/(2n+2)!(N-n-1)!*2^(2n+2), fsum = ... + fterm,
71 	//   gterm = S_n*fterm, gsum = ... + gterm.
72 	for (n = 1; n < N; n++) {
73 		fterm = exquopos(fterm*(2*(cl_I)(N-n)*(cl_I)(N+n)),(cl_I)(2*n+1)*(cl_I)(n+1));
74 		fsum = fsum + fterm;
75 		gterm = The(cl_LF)(gterm*(2*(cl_I)(N-n)*(cl_I)(N+n)))/((cl_I)(2*n+1)*(cl_I)(n+1));
76 		if (evenp(n))
77 			gterm = gterm + cl_I_to_LF(fterm,actuallen)/expt_pos(n+1,s);
78 		else
79 			gterm = gterm - cl_I_to_LF(fterm,actuallen)/expt_pos(n+1,s);
80 		gsum = gsum + gterm;
81 	}
82 	var cl_LF result = gsum/cl_I_to_LF(1+fsum,actuallen);
83 	result = shorten(result,len); // verkürzen und fertig
84 	// Zum Schluss mit 2^(s-1)/(2^(s-1)-1) multiplizieren:
85 	return scale_float(result,s-1) / (ash(1,s-1)-1);
86 }
87 // Bit complexity (N = len): O(N^2).
88 
compute_zeta_cvz2(int s,uintC len)89 const cl_LF compute_zeta_cvz2 (int s, uintC len)
90 {
91 	// Method:
92 	// zeta(s) = 1/(1-2^(1-s)) sum(n=0..infty, (-1)^n/(n+1)^s),
93 	// with Cohen-Villegas-Zagier convergence acceleration, and
94 	// evaluated using the binary splitting algorithm with truncation.
95 	var uintC actuallen = len+2; // 2 guard digits
96 	var uintC N = (uintC)(0.39321985*intDsize*actuallen)+1;
97 	struct rational_series_stream : cl_pqd_series_stream {
98 		uintC n;
99 		int s;
100 		uintC N;
101 		static cl_pqd_series_term computenext (cl_pqd_series_stream& thisss)
102 		{
103 			var rational_series_stream& thiss = (rational_series_stream&)thisss;
104 			var uintC n = thiss.n;
105 			var uintC s = thiss.s;
106 			var uintC N = thiss.N;
107 			var cl_pqd_series_term result;
108 			result.p = 2*(cl_I)(N-n)*(cl_I)(N+n);
109 			result.q = (cl_I)(2*n+1)*(cl_I)(n+1);
110 			result.d = evenp(n) ? expt_pos(n+1,s) : -expt_pos(n+1,s);
111 			thiss.n = n+1;
112 			return result;
113 		}
114 		rational_series_stream (int s_, uintC N_)
115 			: cl_pqd_series_stream (rational_series_stream::computenext),
116 			  n (0), s (s_), N (N_) {}
117 	} series(s,N);
118 	var cl_pqd_series_result<cl_I> sums;
119 	eval_pqd_series_aux(N,series,sums,actuallen);
120 	// Here we need U/(1+S) = V/D(Q+T).
121 	var cl_LF result =
122 	  cl_I_to_LF(sums.V,actuallen) / The(cl_LF)(sums.D * cl_I_to_LF(sums.Q+sums.T,actuallen));
123 	result = shorten(result,len); // verkürzen und fertig
124 	// Zum Schluss mit 2^(s-1)/(2^(s-1)-1) multiplizieren:
125 	return scale_float(result,s-1) / (ash(1,s-1)-1);
126 }
127 // Bit complexity (N = len): O(log(N)^2*M(N)).
128 
129 // Timings of the above algorithm in seconds, on a P-4, 3GHz, running Linux.
130 //    s                5                          15
131 //    N     sum_exp sum_cvz1 sum_cvz2   sum_exp sum_cvz1 sum_cvz2
132 //   125      0.60    0.04     0.06       1.88    0.04     0.20
133 //   250      1.60    0.13     0.19       4.82    0.15     0.58
134 //   500      4.3     0.48     0.60      12.2     0.55     1.67
135 //  1000     11.0     1.87     1.63      31.7     2.11     4.60
136 //  2000     28.0     7.4      4.23     111       8.2     11.3
137 //  4000     70.2    30.6     10.6               50       44
138 //  8000            142       26.8              169       75
139 // asymp.      FAST    N^2     FAST        FAST    N^2     FAST
140 //
141 //    s               35                          75
142 //    N     sum_exp sum_cvz1 sum_cvz2   sum_exp sum_cvz1 sum_cvz2
143 //   125      4.70    0.05     0.53      11.3     0.07     1.35
144 //   250     12.5     0.19     1.62      28.7     0.25     3.74
145 //   500     31.3     0.69     4.40      70.2     0.96    10.2
146 //  1000     88.8     2.70    11.4      191       3.76    25.4
147 //  2000             10.9     28.9               15.6     64.3
148 //  4000             46       73                 64.4    170
149 //  8000            215      178                295      397
150 // 16000            898      419               1290      972
151 // asymp.      FAST    N^2     FAST        FAST    N^2     FAST
152 //
153 // The break-even point between cvz1 and cvz2 seems to grow linearly with s.
154 
155 // Timings of the above algorithm, on an i486 33 MHz, running Linux.
156 //    s                5                           15
157 //    N     sum_exp sum_cvz1 sum_cvz2   sum_exp sum_cvz1 sum_cvz2
158 //    10       2.04    0.09    0.17        8.0     0.11    0.49
159 //    25       8.6     0.30    0.76       30.6     0.37    2.36
160 //    50      25.1     0.92    2.49       91.1     1.15    7.9
161 //   100               2.97    8.46                3.75   24.5
162 //   250              16.7    36.5                21.7   108
163 //   500              64.2   106                  85.3   295
164 //  1000             263     285                 342     788
165 // asymp.      FAST    N^2     FAST        FAST    N^2     FAST
166 //
167 // The break-even point between cvz1 and cvz2 seems to grow linearly with s.
168 
zeta(int s,uintC len)169 const cl_LF zeta (int s, uintC len)
170 {
171 	if (!(s > 1))
172 		throw runtime_exception("zeta(s) with illegal s<2.");
173 	if (s==3)
174 		return zeta3(len);
175 	if (len < 220*(uintC)s)
176 		return compute_zeta_cvz1(s,len);
177 	else
178 		return compute_zeta_cvz2(s,len);
179 }
180 // Bit complexity (N = len): O(log(N)^2*M(N)).
181 
182 }  // namespace cln
183