1 /* Vectorized routines for PowerPC processors, using Altivec/VMX intrinsics.
2 *
3 * Table of contents
4 * 1. SIMD logf(), expf()
5 * 2. Miscellaneous convenience functions.
6 * 3. Benchmark
7 * 4. Unit tests
8 * 5. Test driver
9 * 6. Example
10 *
11 *****************************************************************
12 *
13 * This code is conditionally compiled, only when <eslENABLE_VMX> was
14 * set in <esl_config.h> by the configure script, and that will only
15 * happen on ARM platforms. When <eslENABLE_VMX> is not set, we
16 * include some dummy code to silence compiler and ranlib warnings
17 * about empty translation units and no symbols, and dummy drivers
18 *
19 *****************************************************************
20 * Credits:
21 *
22 * The logf() and expf() routines are derivatives of routines by
23 * Julien Pommier [http://gruntthepeon.free.fr/ssemath/]. Those
24 * routines were in turn based on serial implementations in the Cephes
25 * math library by Stephen Moshier [Moshier89;
26 * http://www.moshier.net/#Cephes]. Thanks and credit to both Moshier
27 * and Pommier for their clear code. Additional copyright and license
28 * information is appended at the end of the file.
29 */
30 #include "esl_config.h"
31 #ifdef eslENABLE_VMX
32
33 #include <stdlib.h>
34 #include <stdio.h>
35 #include <math.h>
36 #include <float.h>
37
38 #ifndef __APPLE_ALTIVEC__
39 #include <altivec.h>
40 #endif
41
42 #include "easel.h"
43 #include "esl_vmx.h"
44
45
46 /*****************************************************************
47 * 1. Altivec/VMX SIMD logf(), expf()
48 *****************************************************************/
49
50 /* Function: esl_vmx_logf()
51 * Synopsis: <r[z] = log x[z]>
52 *
53 * Purpose: Given a vector <x> containing four floats, returns a
54 * vector <r> in which each element <r[z] = logf(x[z])>.
55 *
56 * Valid in the domain $x_z > 0$ for normalized IEEE754
57 * $x_z$.
58 *
59 * For <x> $< 0$, including -0, returns <NaN>. For <x> $==
60 * 0$ or subnormal <x>, returns <-inf>. For <x = inf>,
61 * returns <inf>. For <x = NaN>, returns <NaN>. For
62 * subnormal <x>, returns <-inf>.
63 *
64 * Xref: J2/71.
65 *
66 * Note: Derived from SSE2 implementation which was
67 * Derived from an SSE1 implementation by Julian
68 * Pommier. Converted to SSE2 and added handling
69 * of IEEE754 specials.
70 */
71 vector float
esl_vmx_logf(vector float x)72 esl_vmx_logf(vector float x)
73 {
74 static vector float cephesv_1 = { 7.0376836292E-2f, -1.1514610310E-1f, 1.1676998740E-1f, -1.2420140846E-1f };
75 static vector float cephesv_2 = { 1.4249322787E-1f, -1.6668057665E-1f, 2.0000714765E-1f, -2.4999993993E-1f };
76 static vector float cephesv_3 = { 3.3333331174E-1f, 0.0f, 0.0f, 0.0f };
77
78 static vector float constv = { 0.707106781186547524f, -2.12194440e-4f, 0.5f, 0.693359375f };
79
80 vector float onev = (vector float) {1.0, 1.0, 1.0, 1.0}; /* all elem = 1.0 */
81 vector signed int ei;
82 vector float e;
83 vector bool int invalid_mask, zero_mask, inf_mask; /* masks used to handle special IEEE754 inputs */
84 vector bool int mask;
85 vector float origx;
86 vector float tmp;
87 vector float y;
88 vector float z;
89
90 vector float zerov = (vector float) vec_splat_u32(0);
91 vector signed int infExpv = { 255, 255, 255, 255 };
92
93 /* first, split x apart: x = frexpf(x, &e); */
94 ei = vec_sr((vector signed int) x, ((vector unsigned int) {23, 23, 23, 23}));
95 /* shift right 23: IEEE754 floats: ei = biased exponents */
96 invalid_mask = vec_cmple(x, zerov); /* mask any elem that's negative; these become NaN */
97 zero_mask = vec_cmpeq(ei,(vector signed int) zerov); /* mask any elem zero or subnormal; these become -inf */
98 inf_mask = vec_cmpeq(ei, infExpv); /* mask any elem +inf or NaN; these stay +inf or NaN */
99 origx = x; /* store original x, used for log(inf) = inf, log(NaN) = NaN */
100
101 x = vec_and(x, (vector float) ((vector unsigned int) {~0x7f800000, ~0x7f800000, ~0x7f800000, ~0x7f800000}));
102 /* x now the stored 23 bits of the 24-bit significand */
103 x = vec_or (x, vec_splat(constv, 2)); /* sets hidden bit b[0] */
104
105 ei = vec_sub(ei, ((vector signed int) {126, 126, 126, 126})); /* -127 (ei now signed base-2 exponent); then +1 */
106 e = vec_ctf(ei, 0);
107
108 /* now, calculate the log */
109 mask = vec_cmplt(x, vec_splat(constv, 0)); /* avoid conditional branches. */
110 tmp = vec_and(x, (vector float) mask); /* tmp contains x values < 0.707, else 0 */
111 x = vec_sub(x, onev);
112 e = vec_sub(e, vec_and(onev, (vector float) mask));
113 x = vec_add(x, tmp);
114 z = vec_madd(x, x, zerov);
115
116 y = vec_splat(cephesv_1, 0);
117 y = vec_madd(y, x, vec_splat(cephesv_1, 1));
118 y = vec_madd(y, x, vec_splat(cephesv_1, 2));
119 y = vec_madd(y, x, vec_splat(cephesv_1, 3));
120 y = vec_madd(y, x, vec_splat(cephesv_2, 0));
121 y = vec_madd(y, x, vec_splat(cephesv_2, 1));
122 y = vec_madd(y, x, vec_splat(cephesv_2, 2));
123 y = vec_madd(y, x, vec_splat(cephesv_2, 3));
124 y = vec_madd(y, x, vec_splat(cephesv_3, 0));
125 y = vec_madd(y, x, zerov);
126 y = vec_madd(y, z, zerov);
127
128 tmp = vec_madd(e, vec_splat(constv, 1), zerov);
129 y = vec_add(y, tmp);
130
131 tmp = vec_madd(z, vec_splat(constv, 2), zerov);
132 y = vec_sub(y, tmp);
133
134 x = vec_add(x, y);
135 x = vec_madd(e, vec_splat(constv, 3), x);
136
137 /* IEEE754 cleanup: */
138 x = vec_or(x, (vector float) invalid_mask); /* log(x<0, including -0) = NaN */
139 x = vec_sel(x, ((vector float) {-eslINFINITY, -eslINFINITY, -eslINFINITY, -eslINFINITY}), zero_mask); /* x zero or subnormal = -inf */
140 x = vec_sel(x, origx, inf_mask); /* log(inf)=inf; log(NaN) = NaN */
141 return x;
142 }
143
144 /* Function: esl_vmx_expf()
145 * Synopsis: <r[z] = exp x[z]>
146 *
147 * Purpose: Given a vector <x> containing four floats, returns a
148 * vector <r> in which each element <r[z] = logf(x[z])>.
149 *
150 * Valid for all IEEE754 floats $x_z$.
151 *
152 * Xref: J2/71
153 *
154 * Note: Derived from SSE2 implementation which was
155 * Derived from an SSE1 implementation by Julian
156 * Pommier. Converted to SSE2.
157 */
158 vector float
esl_vmx_expf(vector float x)159 esl_vmx_expf(vector float x)
160 {
161 static vector float cephesv_p1 = { 1.9875691500E-4f, 1.3981999507E-3f, 8.3334519073E-3f, 4.1665795894E-2f };
162 static vector float cephesv_p2 = { 1.6666665459E-1f, 5.0000001201E-1f, 0.693359375f, -2.12194440E-4f };
163
164 static vector float maxlogfv = { 88.72283905206835f, 88.72283905206835f, 88.72283905206835f, 88.72283905206835f }; /* log(2^128) */
165 static vector float minlogfv = { -103.27892990343185f, -103.27892990343185f, -103.27892990343185f, -103.27892990343185f }; /* log(2^-149) */
166
167 vector signed int k;
168 vector bool int minmask, maxmask;
169 vector float tmp, fx, y, z;
170
171 vector float zerov = (vector float) vec_splat_u32(0);
172
173 /* handle out-of-range and special conditions */
174 maxmask = vec_cmpgt(x, maxlogfv);
175 minmask = vec_cmple(x, minlogfv);
176
177 /* range reduction: exp(x) = 2^k e^f = exp(f + k log 2); k = floorf(0.5 + x / log2): */
178 fx = vec_madd(x, ((vector float) {eslCONST_LOG2R, eslCONST_LOG2R, eslCONST_LOG2R, eslCONST_LOG2R}), zerov);
179 fx = vec_add(fx, ((vector float) {0.5, 0.5, 0.5, 0.5}));
180
181 /* floorf() with VMX: */
182 fx = vec_floor(fx);
183 k = vec_cts(fx, 0);
184
185 /* polynomial approx for e^f for f in range [-0.5, 0.5] */
186 tmp = vec_madd(fx, vec_splat(cephesv_p2, 2), zerov);
187 z = vec_madd(fx, vec_splat(cephesv_p2, 3), zerov);
188 x = vec_sub(x, tmp);
189 x = vec_sub(x, z);
190 z = vec_madd(x, x, zerov);
191
192 y = vec_splat(cephesv_p1, 0);
193 y = vec_madd(y, x, vec_splat(cephesv_p1, 1));
194 y = vec_madd(y, x, vec_splat(cephesv_p1, 2));
195 y = vec_madd(y, x, vec_splat(cephesv_p1, 3));
196 y = vec_madd(y, x, vec_splat(cephesv_p2, 0));
197 y = vec_madd(y, x, vec_splat(cephesv_p2, 1));
198 y = vec_madd(y, z, x);
199 y = vec_add(y, ((vector float) {1.0, 1.0, 1.0, 1.0}));
200
201 /* build 2^k by hand, by creating a IEEE754 float */
202 k = vec_add(k, ((vector signed int) {127, 127, 127, 127}));
203 k = vec_sl(k, ((vector unsigned int) {23, 23, 23, 23}));
204 fx = (vector float) k;
205
206
207 /* put 2^k e^f together (fx = 2^k, y = e^f) and we're done */
208 y = vec_madd(y, fx, zerov);
209
210 /* special/range cleanup */
211 y = vec_sel(y, ((vector float) {eslINFINITY, eslINFINITY, eslINFINITY, eslINFINITY}), maxmask); /* exp(x) = inf for x > log(2^128) */
212 y = vec_sel(y, zerov, minmask); /* exp(x) = 0 for x < log(2^-149) */
213 return y;
214 }
215
216
217 /*****************************************************************
218 * 2. Miscellaneous convenience functions
219 *****************************************************************/
220 void
esl_vmx_dump_vecfloat(FILE * fp,vector float v)221 esl_vmx_dump_vecfloat(FILE *fp, vector float v)
222 {
223 float *p = (float *)&v;
224 printf("[%13.8g, %13.8g, %13.8g, %13.8g]", p[0], p[1], p[2], p[3]);
225 }
226
227
228 /*****************************************************************
229 * 3. Benchmark
230 *****************************************************************/
231 #ifdef eslVMX_BENCHMARK
232
233 #include "esl_config.h"
234
235 #include <stdio.h>
236 #include <math.h>
237
238 #include "easel.h"
239 #include "esl_getopts.h"
240 #include "esl_stopwatch.h"
241
242 static ESL_OPTIONS options[] = {
243 /* name type default env range toggles reqs incomp help docgroup*/
244 { "-h", eslARG_NONE, FALSE, NULL, NULL, NULL, NULL, NULL, "show brief help on version and usage", 0 },
245 { "-N", eslARG_INT,"10000000", NULL, NULL, NULL, NULL, NULL, "number of trials", 0 },
246 { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 },
247 };
248 static char usage[] = "[-options]";
249 static char banner[] = "benchmark driver for sse module";
250
251 int
main(int argc,char ** argv)252 main(int argc, char **argv)
253 {
254 ESL_GETOPTS *go = esl_getopts_CreateDefaultApp(options, 0, argc, argv, banner, usage);
255 ESL_STOPWATCH *w = esl_stopwatch_Create();
256 int N = esl_opt_GetInteger(go, "-N");
257 float origx = 2.0;
258 float x = origx;
259 vector float xv = { 2.0f, 2.0f, 2.0f, 2.0f };
260 int i;
261
262 /* First, serial time. */
263 esl_stopwatch_Start(w);
264 for (i = 0; i < N; i++) { x = logf(x); x = expf(x); }
265 esl_stopwatch_Stop(w);
266 esl_stopwatch_Display(stdout, w, "# serial CPU time: ");
267
268 /* Vector time */
269 esl_stopwatch_Start(w);
270 for (i = 0; i < N; i++) { xv = esl_vmx_logf(xv); xv = esl_vmx_expf(xv); }
271 esl_stopwatch_Stop(w);
272 esl_stopwatch_Display(stdout, w, "# vector CPU time: ");
273
274 /* If you don't do something with x and xv, the compiler may optimize them away */
275 printf("%g => many scalar logf,expf cycles => %g\n", origx, N, x);
276 printf("%g => many vector logf,expf cycles => ", origx, N); esl_vmx_dump_vecfloat(stdout, xv); printf("\n");
277
278 esl_stopwatch_Destroy(w);
279 esl_getopts_Destroy(go);
280 return 0;
281 }
282
283 #endif /*eslVMX_BENCHMARK*/
284
285
286 /*****************************************************************
287 * 4. Unit tests
288 *****************************************************************/
289 #ifdef eslVMX_TESTDRIVE
290
291 #include "esl_getopts.h"
292 #include "esl_random.h"
293
294 /* utest_logf(): Test range/domain of logf */
295 static void
utest_logf(ESL_GETOPTS * go)296 utest_logf(ESL_GETOPTS *go)
297 {
298 vector float x; /* test input */
299 union { vector float v; float x[4]; } r; /* test output */
300
301 /* Test IEEE754 specials:
302 * log(-inf) = NaN log(x<0) = NaN log(-0) = NaN
303 * log(0) = -inf log(inf) = inf log(NaN) = NaN
304 */
305 x = (vector float) {-eslINFINITY, -1.0, -0.0, 0.0};
306 r.v = esl_vmx_logf(x);
307 if (esl_opt_GetBoolean(go, "-v")) {
308 printf("logf");
309 esl_vmx_dump_vecfloat(stdout, x); printf(" ==> ");
310 esl_vmx_dump_vecfloat(stdout, r.v); printf("\n");
311 }
312 if (! isnan(r.x[0])) esl_fatal("logf(-inf) should be NaN");
313 if (! isnan(r.x[1])) esl_fatal("logf(-1) should be NaN");
314 if (! isnan(r.x[2])) esl_fatal("logf(-0) should be NaN");
315 if (isinf(r.x[3]) != -1) esl_fatal("logf(0) should be -inf");
316
317 x = (vector float) {eslINFINITY, eslNaN, FLT_MIN, FLT_MAX};
318 r.v = esl_vmx_logf(x);
319 if (esl_opt_GetBoolean(go, "-v")) {
320 printf("logf");
321 esl_vmx_dump_vecfloat(stdout, x); printf(" ==> ");
322 esl_vmx_dump_vecfloat(stdout, r.v); printf("\n");
323 }
324 if (isinf(r.x[0]) != 1) esl_fatal("logf(inf) should be inf");
325 if (! isnan(r.x[1])) esl_fatal("logf(NaN) should be NaN");
326
327 }
328
329 /* utest_expf(): Test range/domain of expf */
330 static void
utest_expf(ESL_GETOPTS * go)331 utest_expf(ESL_GETOPTS *go)
332 {
333 vector float x; /* test input */
334 union { vector float v; float x[4]; } r; /* test output */
335
336 /* exp(-inf) = 0 exp(-0) = 1 exp(0) = 1 exp(inf) = inf exp(NaN) = NaN */
337 x = (vector float) {-eslINFINITY, -0.0, 0.0, eslINFINITY};
338 r.v = esl_vmx_expf(x);
339 if (esl_opt_GetBoolean(go, "-v")) {
340 printf("expf");
341 esl_vmx_dump_vecfloat(stdout, x); printf(" ==> ");
342 esl_vmx_dump_vecfloat(stdout, r.v); printf("\n");
343 }
344 if (r.x[0] != 0.0f) esl_fatal("expf(-inf) should be 0");
345 if (isinf(r.x[3]) != 1) esl_fatal("expf(inf) should be inf");
346
347 /* exp(NaN) = NaN exp(large) = inf exp(-large) = 0 exp(1) = exp(1) */
348 x = (vector float) {eslNaN, 666.0, -666.0, 1.0};
349 r.v = esl_vmx_expf(x);
350 if (esl_opt_GetBoolean(go, "-v")) {
351 printf("expf");
352 esl_vmx_dump_vecfloat(stdout, x); printf(" ==> ");
353 esl_vmx_dump_vecfloat(stdout, r.v); printf("\n");
354 }
355 if (! isnan(r.x[0])) esl_fatal("expf(NaN) should be NaN");
356 if (isinf(r.x[1]) != 1) esl_fatal("expf(large x) should be inf");
357 if (r.x[2] != 0.0f) esl_fatal("expf(-large x) should be 0");
358
359 }
360
361 /* utest_odds(): test accuracy of logf, expf on odds ratios,
362 * our main intended use.
363 */
364 static void
utest_odds(ESL_GETOPTS * go,ESL_RANDOMNESS * r)365 utest_odds(ESL_GETOPTS *go, ESL_RANDOMNESS *r)
366 {
367 int N = esl_opt_GetInteger(go, "-N");
368 int verbose = esl_opt_GetBoolean(go, "-v");
369 int very_verbose = esl_opt_GetBoolean(go, "--vv");
370 int i;
371 float p1, p2, odds;
372 union { vector float v; float x[4]; } r1;
373 union { vector float v; float x[4]; } r2;
374 float scalar_r1, scalar_r2;
375 double err1, maxerr1 = 0.0, avgerr1 = 0.0; /* errors on logf() */
376 double err2, maxerr2 = 0.0, avgerr2 = 0.0; /* errors on expf() */
377
378 for (i = 0; i < N; i++)
379 {
380 p1 = esl_rnd_UniformPositive(r);
381 p2 = esl_rnd_UniformPositive(r);
382 odds = p1 / p2;
383
384 if (odds == 0.0) esl_fatal("whoa, odds ratio can't be 0!\n");
385
386 r1.v = esl_vmx_logf((vector float) {odds}); /* r1.x[z] = log(p1/p2) */
387 scalar_r1 = logf(odds);
388
389 err1 = (r1.x[0] == 0. && scalar_r1 == 0.) ? 0.0 : 2 * fabs(r1.x[0] - scalar_r1) / fabs(r1.x[0] + scalar_r1);
390 if (err1 > maxerr1) maxerr1 = err1;
391 avgerr1 += err1 / (float) N;
392 if (isnan(avgerr1)) esl_fatal("whoa, what?\n");
393
394 r2.v = esl_vmx_expf(r1.v); /* and back to odds */
395 scalar_r2 = expf(r1.x[0]);
396
397 err2 = (r2.x[0] == 0. && scalar_r2 == 0.) ? 0.0 : 2 * fabs(r2.x[0] - scalar_r2) / fabs(r2.x[0] + scalar_r2);
398 if (err2 > maxerr2) maxerr2 = err2;
399 avgerr2 += err2 / (float) N;
400
401 if (very_verbose)
402 printf("%13.7g %13.7g %13.7g %13.7g %13.7g %13.7g %13.7g\n", odds, scalar_r1, r1.x[0], scalar_r2, r2.x[0], err1, err2);
403 }
404
405 if (avgerr1 > 1e-8) esl_fatal("average error on logf() is intolerable\n");
406 if (maxerr1 > 1e-6) esl_fatal("maximum error on logf() is intolerable\n");
407 if (avgerr2 > 1e-8) esl_fatal("average error on expf() is intolerable\n");
408 if (maxerr2 > 1e-6) esl_fatal("maximum error on expf() is intolerable\n");
409
410 if (verbose) {
411 printf("Average [max] logf() relative error in %d odds trials: %13.8g [%13.8g]\n", N, avgerr1, maxerr1);
412 printf("Average [max] expf() relative error in %d odds trials: %13.8g [%13.8g]\n", N, avgerr2, maxerr2);
413 printf("(random seed : %d)\n", esl_randomness_GetSeed(r));
414 }
415 }
416 #endif /*eslVMX_TESTDRIVE*/
417
418
419
420
421 /*****************************************************************
422 * 5. Test driver
423 *****************************************************************/
424
425 #ifdef eslVMX_TESTDRIVE
426 #include "esl_config.h"
427
428 #include <stdio.h>
429 #include <math.h>
430
431 #include "easel.h"
432 #include "esl_getopts.h"
433 #include "esl_random.h"
434 #include "esl_vmx.h"
435
436 static ESL_OPTIONS options[] = {
437 /* name type default env range toggles reqs incomp help docgroup*/
438 { "-h", eslARG_NONE, FALSE, NULL, NULL, NULL, NULL, NULL, "show brief help on version and usage", 0 },
439 { "-N", eslARG_INT, "10000", NULL, NULL, NULL, NULL, NULL, "number of random test points", 0 },
440 { "-s", eslARG_INT, "42", NULL, NULL, NULL, NULL, NULL, "set random number seed to <n>", 0 },
441 { "-v", eslARG_NONE, FALSE, NULL, NULL, NULL, NULL, NULL, "be verbose: show test report", 0 },
442 { "--vv", eslARG_NONE, FALSE, NULL, NULL, NULL, NULL, NULL, "be very verbose: show individual test samples", 0 },
443
444 { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 },
445 };
446 static char usage[] = "[-options]";
447 static char banner[] = "test driver for vmx module";
448
449 int
main(int argc,char ** argv)450 main(int argc, char **argv)
451 {
452 ESL_GETOPTS *go = esl_getopts_CreateDefaultApp(options, 0, argc, argv, banner, usage);
453 ESL_RANDOMNESS *r = esl_randomness_Create(esl_opt_GetInteger(go, "-s"));
454
455 utest_logf(go);
456 utest_expf(go);
457 utest_odds(go, r);
458
459 esl_randomness_Destroy(r);
460 esl_getopts_Destroy(go);
461 return 0;
462 }
463 #endif /* eslVMX_TESTDRIVE*/
464
465
466
467 /*****************************************************************
468 * 6. Example
469 *****************************************************************/
470
471 #ifdef eslVMX_EXAMPLE
472 /*::cexcerpt::vmx_example::begin::*/
473 #include "esl_config.h"
474
475 #include <stdio.h>
476 #include <math.h>
477
478 #include "easel.h"
479 #include "esl_vmx.h"
480
481 int
main(int argc,char ** argv)482 main(int argc, char **argv)
483 {
484 float x; /* scalar input */
485 vector float xv; /* input vector */
486 union { vector float v; float x[4]; } rv; /* result vector*/
487
488 x = 2.0;
489 xv = (vector float) {x};
490 rv.v = esl_vmx_logf(xv);
491 printf("logf(%f) = %f\n", x, rv.x[0]);
492
493 rv.v = esl_vmx_expf(xv);
494 printf("expf(%f) = %f\n", x, rv.x[0]);
495
496 return 0;
497 }
498 /*::cexcerpt::vmx_example::end::*/
499 #endif /*eslVMX_EXAMPLE*/
500
501
502 #else // ! eslENABLE_VMX
503
504 /* If we don't have VMX compiled in, provide some nothingness to:
505 * a. prevent Mac OS/X ranlib from bitching about .o file that "has no symbols"
506 * b. prevent compiler from bitching about "empty compilation unit"
507 * c. compile blank drivers and automatically pass the automated tests.
508 */
esl_vmx_silence_hack(void)509 void esl_vmx_silence_hack(void) { return; }
510 #if defined eslVMX_TESTDRIVE || defined eslVMX_EXAMPLE || eslVMX_BENCHMARK
main(void)511 int main(void) { return 0; }
512 #endif
513 #endif // eslENABLE_VMX or not
514
515
516 /*****************************************************************
517 * additional copyright and license information for this file
518 *****************************************************************
519 * In addition to our own copyrights, esl_vmx_logf() and esl_vmx_expf() are also:
520 * Copyright (C) 2007 Julien Pommier
521 * Copyright (C) 1992 Stephen Moshier
522 *
523 * These functions derived from zlib-licensed routines by
524 * Julien Pommier, http://gruntthepeon.free.fr/ssemath/. The
525 * zlib license:
526 *
527 *-------------------------------------------------------------------------
528 * Copyright (C) 2007 Julien Pommier
529 *
530 * This software is provided 'as-is', without any express or implied
531 * warranty. In no event will the authors be held liable for any damages
532 * arising from the use of this software.
533 *
534 * Permission is granted to anyone to use this software for any purpose,
535 * including commercial applications, and to alter it and redistribute it
536 * freely, subject to the following restrictions:
537 *
538 * 1. The origin of this software must not be misrepresented; you must not
539 * claim that you wrote the original software. If you use this software
540 * in a product, an acknowledgment in the product documentation would be
541 * appreciated but is not required.
542 * 2. Altered source versions must be plainly marked as such, and must not be
543 * misrepresented as being the original software.
544 * 3. This notice may not be removed or altered from any source distribution.
545 *
546 *-------------------------------------------------------------------------
547 *
548 * In turn, Pommier had derived the logf() and expf() functions from
549 * serial versions in the Cephes math library. According to its
550 * readme, Cephes is "copyrighted by the author" and "may be used
551 * freely but it comes with no support or guarantee." Cephes is
552 * available in NETLIB [http://www.netlib.org/cephes/]. NETLIB is
553 * widely considered to be a free scientific code repository, though
554 * the copyright and license status of many parts, including Cephes,
555 * is ill-defined. We have attached Moshier's copyright,
556 * to credit his original contribution. Thanks to both Pommier and
557 * Moshier for their clear code.
558 */
559
560