1 #include <goffice/goffice.h>
2
3 // This program answers the question of whether a constant, c, is more
4 // accurately represented as a double than its inverse, 1/c.
5 //
6 // The purpose is to help squeeze a few more bits out of calculations
7 // that involve multiplying or dividing by the constant.
8 //
9 // Consider the density function for the normal distribution:
10 // d(x) = (1/sqrt(2*pi)) * exp(-x^2/2)
11 // As written, this multiplies by a constant, 1/sqrt(2*pi), whose value
12 // we could compute and state with enough decimals to guarantee that we
13 // end up using the best possible value of type double. But we could
14 // also use division by the constant sqrt(2*pi). Which is best?
15 //
16 // The traditional answer has been that division is expensive, so avoid
17 // it, but that isn't really true anymore. Here we are only interested
18 // in accuracy.
19 //
20 // Assumptions:
21 // 1. double is base-2 floating-point with 53 mantissa bits (including
22 // the implied leading bit).
23 // 2. we want to minimize the error on the product or quotient.
24 // 3. we know nothing special about the second factor that would
25 // force one choice or the other.
26 //
27 // For sqrt(2pi) the answer is that it is ever so slightly better to use
28 // multiplication. But it's a very close call. But to get the right value,
29 // one cannot start with the best double approximating sqrt(2pi) and compute
30 // 1/sqrt(2pi) from that.
31
32 // ----------------------------------------
33 // Examining constant sqrt(2pi)
34 //
35 // Value bit pattern: 1.0100000011011001001100011111111101100010011100000101|10011 * 2^1
36 // Value as double: 2.50662827463100068570156508940272033214569091796875
37 // Relative error: -7.31205e-17
38 // Correct digits: 16.14
39 //
40 // Inverse bit pattern: 1.1001100010000100010100110011110101000011011001010000|10001 * 2^-2
41 // Inverse as double: 0.398942280401432702863218082711682654917240142822265625
42 // Relative error: -6.24734e-17
43 // Correct digits: 16.20
44 // Can inverse be computed as double: no
45 //
46 // Conclusion: for sqrt(2pi), use inverse
47 // ----------------------------------------
48
49 static GOQuad qln10;
50
51 static void
print_bits(GOQuad const * qc)52 print_bits (GOQuad const *qc)
53 {
54 GOQuad qx = *qc, qd;
55 int e;
56 double s;
57 int b;
58 int N = 53 + 7;
59
60 g_return_if_fail (go_finite (qx.h) && qx.h != 0);
61
62 if (qx.h < 0) {
63 g_printerr ("-");
64 qx.h = -qx.h;
65 qx.l = -qx.l;
66 }
67
68 // Scale mantissa to [0.5 ; 1.0[
69 (void)frexp (go_quad_value (&qx), &e);
70 s = ldexp (1.0, -e);
71 qx.h *= s;
72 qx.l *= s;
73
74 // Add to simulate rounding when we truncate below
75 go_quad_init (&qd, ldexp (0.5, -N));
76 go_quad_add (&qx, &qx, &qd);
77
78 for (b = 0; b < N; b++) {
79 GOQuad d;
80 qx.h *= 2;
81 qx.l *= 2;
82 go_quad_sub (&d, &qx, &go_quad_one);
83 if (go_quad_value (&d) >= 0) {
84 qx = d;
85 g_printerr ("1");
86 } else {
87 g_printerr ("0");
88 }
89 if (b == 0)
90 g_printerr (".");
91 if (b == 52)
92 g_printerr ("|");
93 }
94
95 g_printerr (" * 2^%d", e - 1);
96 }
97
98 static void
print_decimal(GOQuad const * qc)99 print_decimal (GOQuad const *qc)
100 {
101 // Note: this is highly limited implementation not fit for general
102 // use.
103
104 GOQuad qe, qd, qr;
105 int d, e;
106 int N = 24;
107
108 g_return_if_fail (go_finite (qc->h) && qc->h > 0);
109
110 go_quad_log (&qe, qc);
111 go_quad_div (&qe, &qe, &qln10);
112 e = (int)floor (go_quad_value (&qe));
113
114 // Scale mantissa to [1 ; 10[
115 go_quad_init (&qe, -e);
116 go_quad_init (&qd, 10);
117 go_quad_pow (&qe, NULL, &qd, &qe);
118 go_quad_mul (&qd, qc, &qe);
119
120 // Add to simulate rounding when we truncate below
121 go_quad_init (&qe, 1 - N);
122 go_quad_init (&qr, 10);
123 go_quad_pow (&qr, NULL, &qr, &qe);
124 go_quad_init (&qe, 0.5);
125 go_quad_mul (&qr, &qr, &qe);
126 go_quad_add (&qd, &qd, &qr);
127
128 for (d = 0; d < N; d++) {
129 GOQuad qw;
130 // Specifically use the high part; rounding implied by
131 // go_quad_value could otherwise make w bigger than qd.
132 int w = floor (qd.h);
133
134 g_printerr ("%c", '0' + w);
135 go_quad_init (&qw, w);
136 go_quad_sub (&qd, &qd, &qw);
137
138 go_quad_init (&qw, 10);
139 go_quad_mul (&qd, &qd, &qw);
140
141 if (d == 0)
142 g_printerr (".");
143 }
144
145 if (e)
146 g_printerr (" * 10^%d", e);
147 }
148
149
150
151 static double
qmlogabs(GOQuad const * qx)152 qmlogabs (GOQuad const *qx)
153 {
154 GOQuad qy;
155 qy.h = fabs (qx->h); qy.l = fabs (qx->l);
156 go_quad_log (&qy, &qy);
157 go_quad_div (&qy, &qy, &qln10);
158 return -go_quad_value (&qy);
159 }
160
161 static double last_direct;
162 static double last_inverse;
163 static gboolean last_need_nl;
164
165 static void
examine_constant(const char * descr,GOQuad const * qc)166 examine_constant (const char *descr, GOQuad const *qc)
167 {
168 double dc, dic, dcic;
169 GOQuad qd, qic;
170 double dig_direct, dig_inverse;
171
172 g_printerr ("-----------------------------------------------------------------------------\n");
173 g_printerr ("Examining constant %s\n", descr);
174 g_printerr ("\n");
175
176 last_direct = dc = go_quad_value (qc);
177 g_printerr ("Value: ");
178 print_decimal (qc);
179 g_printerr ("\n");
180 g_printerr ("Value bit pattern: ");
181 print_bits (qc);
182 g_printerr ("\n");
183 g_printerr ("Value as double: %.55g\n", dc);
184 go_quad_init (&qd, dc);
185 go_quad_sub (&qd, qc, &qd);
186 go_quad_div (&qd, &qd, qc);
187 g_printerr ("Relative error: %g\n", go_quad_value (&qd));
188 dig_direct = qmlogabs (&qd);
189 g_printerr ("Correct digits: %.2f\n", dig_direct);
190
191 g_printerr ("\n");
192
193 // For the inverse we assume that using double-double is good enough to
194 // answer our question.
195 go_quad_div (&qic, &go_quad_one, qc);
196 last_inverse = dic = go_quad_value (&qic);
197 g_printerr ("Inverse value: ");
198 print_decimal (&qic);
199 g_printerr ("\n");
200 g_printerr ("Inverse bit pattern: ");
201 print_bits (&qic);
202 g_printerr ("\n");
203 g_printerr ("Inverse as double: %.55g\n", dic);
204 go_quad_init (&qd, dic);
205 go_quad_sub (&qd, &qic, &qd);
206 go_quad_div (&qd, &qd, &qic);
207 g_printerr ("Relative error: %g\n", go_quad_value (&qd));
208 dig_inverse = qmlogabs (&qd);
209 g_printerr ("Correct digits: %.2f\n", dig_inverse);
210
211 dcic = (double)(1 / dc);
212 g_printerr ("Can inverse be computed as double: %s\n",
213 (dcic == dic ? "yes" : "no"));
214
215 g_printerr ("\n");
216
217 g_printerr ("Conclusion: for %s, use %s\n", descr,
218 (dig_direct >= dig_inverse ? "direct" : "inverse"));
219 last_need_nl = TRUE;
220 }
221
222 static void
check_computable(const char * fmla,double x,gboolean direct)223 check_computable (const char *fmla, double x, gboolean direct)
224 {
225 gboolean ok = (x == (direct ? last_direct : last_inverse));
226
227 g_printerr ("%sComputing %s as double yields the right value? %s\n",
228 (last_need_nl ? "\n" : ""), fmla, (ok ? "yes" : "no"));
229 last_need_nl = FALSE;
230 }
231
232 int
main(int argc,char ** argv)233 main (int argc, char **argv)
234 {
235 void *state;
236 GOQuad qc;
237
238 g_printerr ("Determine for certain constants, c, whether its representation\n");
239 g_printerr ("as a double is more accurate than its inverse's representation.\n\n");
240
241 state = go_quad_start ();
242
243 go_quad_init (&qln10, 10);
244 go_quad_log (&qln10, &qln10);
245
246 examine_constant ("pi", &go_quad_pi);
247 examine_constant ("e", &go_quad_e);
248 examine_constant ("EulerGamma", &go_quad_euler);
249
250 examine_constant ("log(2)", &go_quad_ln2);
251 examine_constant ("log(10)", &qln10);
252 go_quad_div (&qc, &go_quad_ln2, &qln10);
253 examine_constant ("log10(2)", &qc);
254
255 examine_constant ("sqrt(2)", &go_quad_sqrt2);
256 go_quad_init (&qc, 3);
257 go_quad_sqrt (&qc, &qc);
258 examine_constant ("sqrt(3)", &qc);
259 go_quad_init (&qc, 5);
260 go_quad_sqrt (&qc, &qc);
261 examine_constant ("sqrt(5)", &qc);
262
263 go_quad_sqrt (&qc, &go_quad_pi);
264 examine_constant ("sqrt(pi)", &qc);
265 check_computable ("sqrt(pi)", sqrt (M_PI), TRUE);
266 check_computable ("1/sqrt(pi)", 1 / sqrt (M_PI), FALSE);
267 check_computable ("sqrt(1/pi)", sqrt (1 / M_PI), FALSE);
268
269 go_quad_sqrt (&qc, &go_quad_2pi);
270 examine_constant ("sqrt(2pi)", &qc);
271 check_computable ("sqrt(2pi)", sqrt (2 * M_PI), TRUE);
272 check_computable ("sqrt(2)*sqrt(pi)", sqrt (2) * sqrt (M_PI), TRUE);
273 check_computable ("1/sqrt(2pi)", 1 / sqrt (2 * M_PI), FALSE);
274
275 go_quad_init (&qc, 180);
276 go_quad_div (&qc, &qc, &go_quad_pi);
277 examine_constant ("180/pi", &qc);
278 check_computable ("180/pi", 180 / M_PI, TRUE);
279 check_computable ("pi/180", M_PI / 180, FALSE);
280
281 go_quad_end (state);
282
283 return 0;
284 }
285