1 /*
2  * scrm is an implementation of the Sequential-Coalescent-with-Recombination Model.
3  *
4  * Copyright (C) 2013, 2014 Paul R. Staab, Sha (Joe) Zhu, Dirk Metzler and Gerton Lunter
5  *
6  * This file is part of scrm.
7  *
8  * scrm is free software: you can redistribute it and/or modify
9  * it under the terms of the GNU General Public License as published by
10  * the Free Software Foundation, either version 3 of the License, or
11  * (at your option) any later version.
12  *
13  * This program is distributed in the hope that it will be useful,
14  * but WITHOUT ANY WARRANTY; without even the implied warranty of
15  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
16  * GNU General Public License for more details.
17  *
18  * You should have received a copy of the GNU General Public License
19  * along with this program.  If not, see <http://www.gnu.org/licenses/>.
20 
21 */
22 
23 #ifndef scrm_src_random_fastfunc
24 #define scrm_src_random_fastfunc
25 
26 #include <iostream>
27 #include <cmath>
28 #include <vector>
29 
30 #if !defined(__APPLE__) && !defined(__FreeBSD__) && !defined(__DragonFly__)
31 #include <malloc.h>
32 #endif
33 
34 
35 // Number of interpolation points.  If this is changed, several constants in fastlog must also be changed.
36 #define SIZE_DOUBLE 1024
37 
38 class FastFunc {
39  public:
FastFunc()40   FastFunc() {
41     this->build_fastlog_double_table(SIZE_DOUBLE);
42   }
43 
44   // Methods
45   double fastlog(double);       /* about as fast as division; about as accurate as logf */
46   double fastexp_up(double y);  /* upper bound to exp; at most 6.148% too high.  10x as fast as exp */
47   double fastexp_lo(double y);  /* lower bound to exp; at most 5.792% too low.  10x as fast as exp */
48 
49  private:
50   void build_fastlog_double_table(int);
51 
52   static constexpr double LN2 = 0.693147180559945309417; //ln(2)
53   static constexpr double EXP_A = 1048576/LN2;
54   static constexpr long long EXP_C_LO = 90254;
55   static constexpr long long EXP_C_UP = -1;
56 
57   std::vector<double> fastlog_double_table_;
58 };
59 
60 // Fast and fairly tight upper and lower bounds for exp(x)
61 // A quick test showed a ~10x speed improvement
62 // See: Nicol N. Schraudolf, A Fast, Compact Approximation of the Exponential Function, Neural Computation 11, 853-862 (1999)
63 // http://nic.schraudolph.org/pubs/Schraudolph99.pdf
64 
fastexp_up(double y)65 inline double FastFunc::fastexp_up(double y) {
66   if (y<-700) return 0.0;
67   if (y>700) return INFINITY;
68   union {
69     double d;
70     int64_t i;
71   } n;
72   n.i = (((long long)(EXP_A*y)) + (1072693248 - EXP_C_UP)) << 32;
73   return n.d;
74 }
75 
fastexp_lo(double y)76 inline double FastFunc::fastexp_lo(double y) {
77   if (y<-700) return 0.0;
78   if (y>700) return INFINITY;
79   union {
80     double d;
81     int64_t i;
82   } n;
83   n.i = (((long long)(EXP_A*y)) + (1072693248 - EXP_C_LO)) << 32;
84   return n.d;
85 }
86 
fastlog(double x)87 inline double FastFunc::fastlog(double x) {
88   const float offset = 2047;                // as int64_t: 0x409ffc00000....
89   double y = x;
90   int64_t* yint = (int64_t*)(&y);
91   int expon = ((*yint) >> 52) - 1023;       // base-2 exponent of float
92   int index = ((*yint) >> (52-10)) & 1023;  // upper 10 bits of mantissa
93   *yint |= 0x7ffffc0000000000;              // convert float into remainder of mantissa; and
94   *yint &= 0x409fffffffffffff;              // modify exponent to get into proper range
95   return (expon * LN2 +                     // contribution of base-2 log
96 	  fastlog_double_table_[index] +          // table lookup, and linear interpolation
97 	  (fastlog_double_table_[index+1] - fastlog_double_table_[index]) * (*(double*)(yint) - offset) );
98 }
99 
100 #endif
101