1 #include <goffice/goffice.h>
2 
3 // This program answers the question of whether a constant, c, is more
4 // accurately represented as a double than its inverse, 1/c.
5 //
6 // The purpose is to help squeeze a few more bits out of calculations
7 // that involve multiplying or dividing by the constant.
8 //
9 // Consider the density function for the normal distribution:
10 //     d(x) = (1/sqrt(2*pi)) * exp(-x^2/2)
11 // As written, this multiplies by a constant, 1/sqrt(2*pi), whose value
12 // we could compute and state with enough decimals to guarantee that we
13 // end up using the best possible value of type double.  But we could
14 // also use division by the constant sqrt(2*pi).  Which is best?
15 //
16 // The traditional answer has been that division is expensive, so avoid
17 // it, but that isn't really true anymore.  Here we are only interested
18 // in accuracy.
19 //
20 // Assumptions:
21 // 1. double is base-2 floating-point with 53 mantissa bits (including
22 //    the implied leading bit).
23 // 2. we want to minimize the error on the product or quotient.
24 // 3. we know nothing special about the second factor that would
25 //    force one choice or the other.
26 //
27 // For sqrt(2pi) the answer is that it is ever so slightly better to use
28 // multiplication.  But it's a very close call.  But to get the right value,
29 // one cannot start with the best double approximating sqrt(2pi) and compute
30 // 1/sqrt(2pi) from that.
31 
32 // ----------------------------------------
33 // Examining constant sqrt(2pi)
34 //
35 // Value bit pattern: 1.0100000011011001001100011111111101100010011100000101|10011 * 2^1
36 // Value as double: 2.50662827463100068570156508940272033214569091796875
37 // Relative error: -7.31205e-17
38 // Correct digits: 16.14
39 //
40 // Inverse bit pattern: 1.1001100010000100010100110011110101000011011001010000|10001 * 2^-2
41 // Inverse as double: 0.398942280401432702863218082711682654917240142822265625
42 // Relative error: -6.24734e-17
43 // Correct digits: 16.20
44 // Can inverse be computed as double: no
45 //
46 // Conclusion: for sqrt(2pi), use inverse
47 // ----------------------------------------
48 
49 static GOQuad qln10;
50 
51 static void
print_bits(GOQuad const * qc)52 print_bits (GOQuad const *qc)
53 {
54 	GOQuad qx = *qc, qd;
55 	int e;
56 	double s;
57 	int b;
58 	int N = 53 + 7;
59 
60 	g_return_if_fail (go_finite (qx.h) && qx.h != 0);
61 
62 	if (qx.h < 0) {
63 		g_printerr ("-");
64 		qx.h = -qx.h;
65 		qx.l = -qx.l;
66 	}
67 
68 	// Scale mantissa to [0.5 ; 1.0[
69 	(void)frexp (go_quad_value (&qx), &e);
70 	s = ldexp (1.0, -e);
71 	qx.h *= s;
72 	qx.l *= s;
73 
74 	// Add to simulate rounding when we truncate below
75 	go_quad_init (&qd, ldexp (0.5, -N));
76 	go_quad_add (&qx, &qx, &qd);
77 
78 	for (b = 0; b < N; b++) {
79 		GOQuad d;
80 		qx.h *= 2;
81 		qx.l *= 2;
82 		go_quad_sub (&d, &qx, &go_quad_one);
83 		if (go_quad_value (&d) >= 0) {
84 			qx = d;
85 			g_printerr ("1");
86 		} else {
87 			g_printerr ("0");
88 		}
89 		if (b == 0)
90 			g_printerr (".");
91 		if (b == 52)
92 			g_printerr ("|");
93 	}
94 
95 	g_printerr (" * 2^%d", e - 1);
96 }
97 
98 static void
print_decimal(GOQuad const * qc)99 print_decimal (GOQuad const *qc)
100 {
101 	// Note: this is highly limited implementation not fit for general
102 	// use.
103 
104 	GOQuad qe, qd, qr;
105 	int d, e;
106 	int N = 24;
107 
108 	g_return_if_fail (go_finite (qc->h) && qc->h > 0);
109 
110 	go_quad_log (&qe, qc);
111 	go_quad_div (&qe, &qe, &qln10);
112 	e = (int)floor (go_quad_value (&qe));
113 
114 	// Scale mantissa to [1 ; 10[
115 	go_quad_init (&qe, -e);
116 	go_quad_init (&qd, 10);
117 	go_quad_pow (&qe, NULL, &qd, &qe);
118 	go_quad_mul (&qd, qc, &qe);
119 
120 	// Add to simulate rounding when we truncate below
121 	go_quad_init (&qe, 1 - N);
122 	go_quad_init (&qr, 10);
123 	go_quad_pow (&qr, NULL, &qr, &qe);
124 	go_quad_init (&qe, 0.5);
125 	go_quad_mul (&qr, &qr, &qe);
126 	go_quad_add (&qd, &qd, &qr);
127 
128 	for (d = 0; d < N; d++) {
129 		GOQuad qw;
130 		// Specifically use the high part; rounding implied by
131 		// go_quad_value could otherwise make w bigger than qd.
132 		int w = floor (qd.h);
133 
134 		g_printerr ("%c", '0' + w);
135 		go_quad_init (&qw, w);
136 		go_quad_sub (&qd, &qd, &qw);
137 
138 		go_quad_init (&qw, 10);
139 		go_quad_mul (&qd, &qd, &qw);
140 
141 		if (d == 0)
142 			g_printerr (".");
143 	}
144 
145 	if (e)
146 		g_printerr (" * 10^%d", e);
147 }
148 
149 
150 
151 static double
qmlogabs(GOQuad const * qx)152 qmlogabs (GOQuad const *qx)
153 {
154 	GOQuad qy;
155 	qy.h = fabs (qx->h); qy.l = fabs (qx->l);
156 	go_quad_log (&qy, &qy);
157 	go_quad_div (&qy, &qy, &qln10);
158 	return -go_quad_value (&qy);
159 }
160 
161 static double last_direct;
162 static double last_inverse;
163 static gboolean last_need_nl;
164 
165 static void
examine_constant(const char * descr,GOQuad const * qc)166 examine_constant (const char *descr, GOQuad const *qc)
167 {
168 	double dc, dic, dcic;
169 	GOQuad qd, qic;
170 	double dig_direct, dig_inverse;
171 
172 	g_printerr ("-----------------------------------------------------------------------------\n");
173 	g_printerr ("Examining constant %s\n", descr);
174 	g_printerr ("\n");
175 
176 	last_direct = dc = go_quad_value (qc);
177 	g_printerr ("Value: ");
178 	print_decimal (qc);
179 	g_printerr ("\n");
180 	g_printerr ("Value bit pattern: ");
181 	print_bits (qc);
182 	g_printerr ("\n");
183 	g_printerr ("Value as double: %.55g\n", dc);
184 	go_quad_init (&qd, dc);
185 	go_quad_sub (&qd, qc, &qd);
186 	go_quad_div (&qd, &qd, qc);
187 	g_printerr ("Relative error: %g\n", go_quad_value (&qd));
188 	dig_direct = qmlogabs (&qd);
189 	g_printerr ("Correct digits: %.2f\n", dig_direct);
190 
191 	g_printerr ("\n");
192 
193 	// For the inverse we assume that using double-double is good enough to
194 	// answer our question.
195 	go_quad_div (&qic, &go_quad_one, qc);
196 	last_inverse = dic = go_quad_value (&qic);
197 	g_printerr ("Inverse value: ");
198 	print_decimal (&qic);
199 	g_printerr ("\n");
200 	g_printerr ("Inverse bit pattern: ");
201 	print_bits (&qic);
202 	g_printerr ("\n");
203 	g_printerr ("Inverse as double: %.55g\n", dic);
204 	go_quad_init (&qd, dic);
205 	go_quad_sub (&qd, &qic, &qd);
206 	go_quad_div (&qd, &qd, &qic);
207 	g_printerr ("Relative error: %g\n", go_quad_value (&qd));
208 	dig_inverse = qmlogabs (&qd);
209 	g_printerr ("Correct digits: %.2f\n", dig_inverse);
210 
211 	dcic = (double)(1 / dc);
212 	g_printerr ("Can inverse be computed as double: %s\n",
213 		    (dcic == dic ? "yes" : "no"));
214 
215 	g_printerr ("\n");
216 
217 	g_printerr ("Conclusion: for %s, use %s\n", descr,
218 		    (dig_direct >= dig_inverse ? "direct" : "inverse"));
219 	last_need_nl = TRUE;
220 }
221 
222 static void
check_computable(const char * fmla,double x,gboolean direct)223 check_computable (const char *fmla, double x, gboolean direct)
224 {
225 	gboolean ok = (x == (direct ? last_direct : last_inverse));
226 
227 	g_printerr ("%sComputing %s as double yields the right value?  %s\n",
228 		    (last_need_nl ? "\n" : ""), fmla, (ok ? "yes" : "no"));
229 	last_need_nl = FALSE;
230 }
231 
232 int
main(int argc,char ** argv)233 main (int argc, char **argv)
234 {
235 	void *state;
236 	GOQuad qc;
237 
238 	g_printerr ("Determine for certain constants, c, whether its representation\n");
239 	g_printerr ("as a double is more accurate than its inverse's representation.\n\n");
240 
241 	state = go_quad_start ();
242 
243 	go_quad_init (&qln10, 10);
244 	go_quad_log (&qln10, &qln10);
245 
246 	examine_constant ("pi", &go_quad_pi);
247 	examine_constant ("e", &go_quad_e);
248 	examine_constant ("EulerGamma", &go_quad_euler);
249 
250 	examine_constant ("log(2)", &go_quad_ln2);
251 	examine_constant ("log(10)", &qln10);
252 	go_quad_div (&qc, &go_quad_ln2, &qln10);
253 	examine_constant ("log10(2)", &qc);
254 
255 	examine_constant ("sqrt(2)", &go_quad_sqrt2);
256 	go_quad_init (&qc, 3);
257 	go_quad_sqrt (&qc, &qc);
258 	examine_constant ("sqrt(3)", &qc);
259 	go_quad_init (&qc, 5);
260 	go_quad_sqrt (&qc, &qc);
261 	examine_constant ("sqrt(5)", &qc);
262 
263 	go_quad_sqrt (&qc, &go_quad_pi);
264 	examine_constant ("sqrt(pi)", &qc);
265 	check_computable ("sqrt(pi)", sqrt (M_PI), TRUE);
266 	check_computable ("1/sqrt(pi)", 1 / sqrt (M_PI), FALSE);
267 	check_computable ("sqrt(1/pi)", sqrt (1 / M_PI), FALSE);
268 
269 	go_quad_sqrt (&qc, &go_quad_2pi);
270 	examine_constant ("sqrt(2pi)", &qc);
271 	check_computable ("sqrt(2pi)", sqrt (2 * M_PI), TRUE);
272 	check_computable ("sqrt(2)*sqrt(pi)", sqrt (2) * sqrt (M_PI), TRUE);
273 	check_computable ("1/sqrt(2pi)", 1 / sqrt (2 * M_PI), FALSE);
274 
275 	go_quad_init (&qc, 180);
276 	go_quad_div (&qc, &qc, &go_quad_pi);
277 	examine_constant ("180/pi", &qc);
278 	check_computable ("180/pi", 180 / M_PI, TRUE);
279 	check_computable ("pi/180", M_PI / 180, FALSE);
280 
281 	go_quad_end (state);
282 
283 	return 0;
284 }
285