1// Copyright 2010 The Go Authors. All rights reserved. 2// Use of this source code is governed by a BSD-style 3// license that can be found in the LICENSE file. 4 5package math 6 7// The original C code, the long comment, and the constants 8// below are from FreeBSD's /usr/src/lib/msun/src/s_expm1.c 9// and came with this notice. The go code is a simplified 10// version of the original C. 11// 12// ==================================================== 13// Copyright (C) 1993 by Sun Microsystems, Inc. All rights reserved. 14// 15// Developed at SunPro, a Sun Microsystems, Inc. business. 16// Permission to use, copy, modify, and distribute this 17// software is freely granted, provided that this notice 18// is preserved. 19// ==================================================== 20// 21// expm1(x) 22// Returns exp(x)-1, the exponential of x minus 1. 23// 24// Method 25// 1. Argument reduction: 26// Given x, find r and integer k such that 27// 28// x = k*ln2 + r, |r| <= 0.5*ln2 ~ 0.34658 29// 30// Here a correction term c will be computed to compensate 31// the error in r when rounded to a floating-point number. 32// 33// 2. Approximating expm1(r) by a special rational function on 34// the interval [0,0.34658]: 35// Since 36// r*(exp(r)+1)/(exp(r)-1) = 2+ r**2/6 - r**4/360 + ... 37// we define R1(r*r) by 38// r*(exp(r)+1)/(exp(r)-1) = 2+ r**2/6 * R1(r*r) 39// That is, 40// R1(r**2) = 6/r *((exp(r)+1)/(exp(r)-1) - 2/r) 41// = 6/r * ( 1 + 2.0*(1/(exp(r)-1) - 1/r)) 42// = 1 - r**2/60 + r**4/2520 - r**6/100800 + ... 43// We use a special Reme algorithm on [0,0.347] to generate 44// a polynomial of degree 5 in r*r to approximate R1. The 45// maximum error of this polynomial approximation is bounded 46// by 2**-61. In other words, 47// R1(z) ~ 1.0 + Q1*z + Q2*z**2 + Q3*z**3 + Q4*z**4 + Q5*z**5 48// where Q1 = -1.6666666666666567384E-2, 49// Q2 = 3.9682539681370365873E-4, 50// Q3 = -9.9206344733435987357E-6, 51// Q4 = 2.5051361420808517002E-7, 52// Q5 = -6.2843505682382617102E-9; 53// (where z=r*r, and the values of Q1 to Q5 are listed below) 54// with error bounded by 55// | 5 | -61 56// | 1.0+Q1*z+...+Q5*z - R1(z) | <= 2 57// | | 58// 59// expm1(r) = exp(r)-1 is then computed by the following 60// specific way which minimize the accumulation rounding error: 61// 2 3 62// r r [ 3 - (R1 + R1*r/2) ] 63// expm1(r) = r + --- + --- * [--------------------] 64// 2 2 [ 6 - r*(3 - R1*r/2) ] 65// 66// To compensate the error in the argument reduction, we use 67// expm1(r+c) = expm1(r) + c + expm1(r)*c 68// ~ expm1(r) + c + r*c 69// Thus c+r*c will be added in as the correction terms for 70// expm1(r+c). Now rearrange the term to avoid optimization 71// screw up: 72// ( 2 2 ) 73// ({ ( r [ R1 - (3 - R1*r/2) ] ) } r ) 74// expm1(r+c)~r - ({r*(--- * [--------------------]-c)-c} - --- ) 75// ({ ( 2 [ 6 - r*(3 - R1*r/2) ] ) } 2 ) 76// ( ) 77// 78// = r - E 79// 3. Scale back to obtain expm1(x): 80// From step 1, we have 81// expm1(x) = either 2**k*[expm1(r)+1] - 1 82// = or 2**k*[expm1(r) + (1-2**-k)] 83// 4. Implementation notes: 84// (A). To save one multiplication, we scale the coefficient Qi 85// to Qi*2**i, and replace z by (x**2)/2. 86// (B). To achieve maximum accuracy, we compute expm1(x) by 87// (i) if x < -56*ln2, return -1.0, (raise inexact if x!=inf) 88// (ii) if k=0, return r-E 89// (iii) if k=-1, return 0.5*(r-E)-0.5 90// (iv) if k=1 if r < -0.25, return 2*((r+0.5)- E) 91// else return 1.0+2.0*(r-E); 92// (v) if (k<-2||k>56) return 2**k(1-(E-r)) - 1 (or exp(x)-1) 93// (vi) if k <= 20, return 2**k((1-2**-k)-(E-r)), else 94// (vii) return 2**k(1-((E+2**-k)-r)) 95// 96// Special cases: 97// expm1(INF) is INF, expm1(NaN) is NaN; 98// expm1(-INF) is -1, and 99// for finite argument, only expm1(0)=0 is exact. 100// 101// Accuracy: 102// according to an error analysis, the error is always less than 103// 1 ulp (unit in the last place). 104// 105// Misc. info. 106// For IEEE double 107// if x > 7.09782712893383973096e+02 then expm1(x) overflow 108// 109// Constants: 110// The hexadecimal values are the intended ones for the following 111// constants. The decimal values may be used, provided that the 112// compiler will convert from decimal to binary accurately enough 113// to produce the hexadecimal values shown. 114// 115 116// Expm1 returns e**x - 1, the base-e exponential of x minus 1. 117// It is more accurate than Exp(x) - 1 when x is near zero. 118// 119// Special cases are: 120// Expm1(+Inf) = +Inf 121// Expm1(-Inf) = -1 122// Expm1(NaN) = NaN 123// Very large values overflow to -1 or +Inf. 124 125//extern expm1 126func libc_expm1(float64) float64 127 128func Expm1(x float64) float64 { 129 if x == 0 { 130 return x 131 } 132 return libc_expm1(x) 133} 134 135func expm1(x float64) float64 { 136 const ( 137 Othreshold = 7.09782712893383973096e+02 // 0x40862E42FEFA39EF 138 Ln2X56 = 3.88162421113569373274e+01 // 0x4043687a9f1af2b1 139 Ln2HalfX3 = 1.03972077083991796413e+00 // 0x3ff0a2b23f3bab73 140 Ln2Half = 3.46573590279972654709e-01 // 0x3fd62e42fefa39ef 141 Ln2Hi = 6.93147180369123816490e-01 // 0x3fe62e42fee00000 142 Ln2Lo = 1.90821492927058770002e-10 // 0x3dea39ef35793c76 143 InvLn2 = 1.44269504088896338700e+00 // 0x3ff71547652b82fe 144 Tiny = 1.0 / (1 << 54) // 2**-54 = 0x3c90000000000000 145 // scaled coefficients related to expm1 146 Q1 = -3.33333333333331316428e-02 // 0xBFA11111111110F4 147 Q2 = 1.58730158725481460165e-03 // 0x3F5A01A019FE5585 148 Q3 = -7.93650757867487942473e-05 // 0xBF14CE199EAADBB7 149 Q4 = 4.00821782732936239552e-06 // 0x3ED0CFCA86E65239 150 Q5 = -2.01099218183624371326e-07 // 0xBE8AFDB76E09C32D 151 ) 152 153 // special cases 154 switch { 155 case IsInf(x, 1) || IsNaN(x): 156 return x 157 case IsInf(x, -1): 158 return -1 159 } 160 161 absx := x 162 sign := false 163 if x < 0 { 164 absx = -absx 165 sign = true 166 } 167 168 // filter out huge argument 169 if absx >= Ln2X56 { // if |x| >= 56 * ln2 170 if sign { 171 return -1 // x < -56*ln2, return -1 172 } 173 if absx >= Othreshold { // if |x| >= 709.78... 174 return Inf(1) 175 } 176 } 177 178 // argument reduction 179 var c float64 180 var k int 181 if absx > Ln2Half { // if |x| > 0.5 * ln2 182 var hi, lo float64 183 if absx < Ln2HalfX3 { // and |x| < 1.5 * ln2 184 if !sign { 185 hi = x - Ln2Hi 186 lo = Ln2Lo 187 k = 1 188 } else { 189 hi = x + Ln2Hi 190 lo = -Ln2Lo 191 k = -1 192 } 193 } else { 194 if !sign { 195 k = int(InvLn2*x + 0.5) 196 } else { 197 k = int(InvLn2*x - 0.5) 198 } 199 t := float64(k) 200 hi = x - t*Ln2Hi // t * Ln2Hi is exact here 201 lo = t * Ln2Lo 202 } 203 x = hi - lo 204 c = (hi - x) - lo 205 } else if absx < Tiny { // when |x| < 2**-54, return x 206 return x 207 } else { 208 k = 0 209 } 210 211 // x is now in primary range 212 hfx := 0.5 * x 213 hxs := x * hfx 214 r1 := 1 + hxs*(Q1+hxs*(Q2+hxs*(Q3+hxs*(Q4+hxs*Q5)))) 215 t := 3 - r1*hfx 216 e := hxs * ((r1 - t) / (6.0 - x*t)) 217 if k == 0 { 218 return x - (x*e - hxs) // c is 0 219 } 220 e = (x*(e-c) - c) 221 e -= hxs 222 switch { 223 case k == -1: 224 return 0.5*(x-e) - 0.5 225 case k == 1: 226 if x < -0.25 { 227 return -2 * (e - (x + 0.5)) 228 } 229 return 1 + 2*(x-e) 230 case k <= -2 || k > 56: // suffice to return exp(x)-1 231 y := 1 - (e - x) 232 y = Float64frombits(Float64bits(y) + uint64(k)<<52) // add k to y's exponent 233 return y - 1 234 } 235 if k < 20 { 236 t := Float64frombits(0x3ff0000000000000 - (0x20000000000000 >> uint(k))) // t=1-2**-k 237 y := t - (e - x) 238 y = Float64frombits(Float64bits(y) + uint64(k)<<52) // add k to y's exponent 239 return y 240 } 241 t = Float64frombits(uint64(0x3ff-k) << 52) // 2**-k 242 y := x - (e + t) 243 y++ 244 y = Float64frombits(Float64bits(y) + uint64(k)<<52) // add k to y's exponent 245 return y 246} 247