1 /* mpfr_exp -- exponential of a floating-point number
2
3 Copyright 1999, 2001, 2002, 2003, 2004, 2005, 2006, 2007, 2008, 2009, 2010, 2011, 2012, 2013 Free Software Foundation, Inc.
4 Contributed by the AriC and Caramel projects, INRIA.
5
6 This file is part of the GNU MPFR Library.
7
8 The GNU MPFR Library is free software; you can redistribute it and/or modify
9 it under the terms of the GNU Lesser General Public License as published by
10 the Free Software Foundation; either version 3 of the License, or (at your
11 option) any later version.
12
13 The GNU MPFR Library is distributed in the hope that it will be useful, but
14 WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
15 or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public
16 License for more details.
17
18 You should have received a copy of the GNU Lesser General Public License
19 along with the GNU MPFR Library; see the file COPYING.LESSER. If not, see
20 http://www.gnu.org/licenses/ or write to the Free Software Foundation, Inc.,
21 51 Franklin St, Fifth Floor, Boston, MA 02110-1301, USA. */
22
23 #define MPFR_NEED_LONGLONG_H /* for MPFR_MPZ_SIZEINBASE2 */
24 #include "mpfr-impl.h"
25
26 /* y <- exp(p/2^r) within 1 ulp, using 2^m terms from the series
27 Assume |p/2^r| < 1.
28 We use the following binary splitting formula:
29 P(a,b) = p if a+1=b, P(a,c)*P(c,b) otherwise
30 Q(a,b) = a*2^r if a+1=b [except Q(0,1)=1], Q(a,c)*Q(c,b) otherwise
31 T(a,b) = P(a,b) if a+1=b, Q(c,b)*T(a,c)+P(a,c)*T(c,b) otherwise
32 Then exp(p/2^r) ~ T(0,i)/Q(0,i) for i so that (p/2^r)^i/i! is small enough.
33
34 Since P(a,b) = p^(b-a), and we consider only values of b-a of the form 2^j,
35 we don't need to compute P(), we only precompute p^(2^j) in the ptoj[] array
36 below.
37
38 Since Q(a,b) is divisible by 2^(r*(b-a-1)), we don't compute the power of
39 two part.
40 */
41 static void
mpfr_exp_rational(mpfr_ptr y,mpz_ptr p,long r,int m,mpz_t * Q,mpfr_prec_t * mult)42 mpfr_exp_rational (mpfr_ptr y, mpz_ptr p, long r, int m,
43 mpz_t *Q, mpfr_prec_t *mult)
44 {
45 unsigned long n, i, j;
46 mpz_t *S, *ptoj;
47 mpfr_prec_t *log2_nb_terms;
48 mpfr_exp_t diff, expo;
49 mpfr_prec_t precy = MPFR_PREC(y), prec_i_have, prec_ptoj;
50 int k, l;
51
52 MPFR_ASSERTN ((size_t) m < sizeof (long) * CHAR_BIT - 1);
53
54 S = Q + (m+1);
55 ptoj = Q + 2*(m+1); /* ptoj[i] = mantissa^(2^i) */
56 log2_nb_terms = mult + (m+1);
57
58 /* Normalize p */
59 MPFR_ASSERTD (mpz_cmp_ui (p, 0) != 0);
60 n = mpz_scan1 (p, 0); /* number of trailing zeros in p */
61 mpz_tdiv_q_2exp (p, p, n);
62 r -= n; /* since |p/2^r| < 1 and p >= 1, r >= 1 */
63
64 /* Set initial var */
65 mpz_set (ptoj[0], p);
66 for (k = 1; k < m; k++)
67 mpz_mul (ptoj[k], ptoj[k-1], ptoj[k-1]); /* ptoj[k] = p^(2^k) */
68 mpz_set_ui (Q[0], 1);
69 mpz_set_ui (S[0], 1);
70 k = 0;
71 mult[0] = 0; /* the multiplier P[k]/Q[k] for the remaining terms
72 satisfies P[k]/Q[k] <= 2^(-mult[k]) */
73 log2_nb_terms[0] = 0; /* log2(#terms) [exact in 1st loop where 2^k] */
74 prec_i_have = 0;
75
76 /* Main Loop */
77 n = 1UL << m;
78 for (i = 1; (prec_i_have < precy) && (i < n); i++)
79 {
80 /* invariant: Q[0]*Q[1]*...*Q[k] equals i! */
81 k++;
82 log2_nb_terms[k] = 0; /* 1 term */
83 mpz_set_ui (Q[k], i + 1);
84 mpz_set_ui (S[k], i + 1);
85 j = i + 1; /* we have computed j = i+1 terms so far */
86 l = 0;
87 while ((j & 1) == 0) /* combine and reduce */
88 {
89 /* invariant: S[k] corresponds to 2^l consecutive terms */
90 mpz_mul (S[k], S[k], ptoj[l]);
91 mpz_mul (S[k-1], S[k-1], Q[k]);
92 /* Q[k] corresponds to 2^l consecutive terms too.
93 Since it does not contains the factor 2^(r*2^l),
94 when going from l to l+1 we need to multiply
95 by 2^(r*2^(l+1))/2^(r*2^l) = 2^(r*2^l) */
96 mpz_mul_2exp (S[k-1], S[k-1], r << l);
97 mpz_add (S[k-1], S[k-1], S[k]);
98 mpz_mul (Q[k-1], Q[k-1], Q[k]);
99 log2_nb_terms[k-1] ++; /* number of terms in S[k-1]
100 is a power of 2 by construction */
101 MPFR_MPZ_SIZEINBASE2 (prec_i_have, Q[k]);
102 MPFR_MPZ_SIZEINBASE2 (prec_ptoj, ptoj[l]);
103 mult[k-1] += prec_i_have + (r << l) - prec_ptoj - 1;
104 prec_i_have = mult[k] = mult[k-1];
105 /* since mult[k] >= mult[k-1] + nbits(Q[k]),
106 we have Q[0]*...*Q[k] <= 2^mult[k] = 2^prec_i_have */
107 l ++;
108 j >>= 1;
109 k --;
110 }
111 }
112
113 /* accumulate all products in S[0] and Q[0]. Warning: contrary to above,
114 here we do not have log2_nb_terms[k-1] = log2_nb_terms[k]+1. */
115 l = 0; /* number of accumulated terms in the right part S[k]/Q[k] */
116 while (k > 0)
117 {
118 j = log2_nb_terms[k-1];
119 mpz_mul (S[k], S[k], ptoj[j]);
120 mpz_mul (S[k-1], S[k-1], Q[k]);
121 l += 1 << log2_nb_terms[k];
122 mpz_mul_2exp (S[k-1], S[k-1], r * l);
123 mpz_add (S[k-1], S[k-1], S[k]);
124 mpz_mul (Q[k-1], Q[k-1], Q[k]);
125 k--;
126 }
127
128 /* Q[0] now equals i! */
129 MPFR_MPZ_SIZEINBASE2 (prec_i_have, S[0]);
130 diff = (mpfr_exp_t) prec_i_have - 2 * (mpfr_exp_t) precy;
131 expo = diff;
132 if (diff >= 0)
133 mpz_fdiv_q_2exp (S[0], S[0], diff);
134 else
135 mpz_mul_2exp (S[0], S[0], -diff);
136
137 MPFR_MPZ_SIZEINBASE2 (prec_i_have, Q[0]);
138 diff = (mpfr_exp_t) prec_i_have - (mpfr_prec_t) precy;
139 expo -= diff;
140 if (diff > 0)
141 mpz_fdiv_q_2exp (Q[0], Q[0], diff);
142 else
143 mpz_mul_2exp (Q[0], Q[0], -diff);
144
145 mpz_tdiv_q (S[0], S[0], Q[0]);
146 mpfr_set_z (y, S[0], MPFR_RNDD);
147 MPFR_SET_EXP (y, MPFR_GET_EXP (y) + expo - r * (i - 1) );
148 }
149
150 #define shift (GMP_NUMB_BITS/2)
151
152 int
mpfr_exp_3(mpfr_ptr y,mpfr_srcptr x,mpfr_rnd_t rnd_mode)153 mpfr_exp_3 (mpfr_ptr y, mpfr_srcptr x, mpfr_rnd_t rnd_mode)
154 {
155 mpfr_t t, x_copy, tmp;
156 mpz_t uk;
157 mpfr_exp_t ttt, shift_x;
158 unsigned long twopoweri;
159 mpz_t *P;
160 mpfr_prec_t *mult;
161 int i, k, loop;
162 int prec_x;
163 mpfr_prec_t realprec, Prec;
164 int iter;
165 int inexact = 0;
166 MPFR_SAVE_EXPO_DECL (expo);
167 MPFR_ZIV_DECL (ziv_loop);
168
169 MPFR_LOG_FUNC
170 (("x[%Pu]=%.*Rg rnd=%d", mpfr_get_prec(x), mpfr_log_prec, x, rnd_mode),
171 ("y[%Pu]=%.*Rg inexact=%d", mpfr_get_prec(y), mpfr_log_prec, y,
172 inexact));
173
174 MPFR_SAVE_EXPO_MARK (expo);
175
176 /* decompose x */
177 /* we first write x = 1.xxxxxxxxxxxxx
178 ----- k bits -- */
179 prec_x = MPFR_INT_CEIL_LOG2 (MPFR_PREC (x)) - MPFR_LOG2_GMP_NUMB_BITS;
180 if (prec_x < 0)
181 prec_x = 0;
182
183 ttt = MPFR_GET_EXP (x);
184 mpfr_init2 (x_copy, MPFR_PREC(x));
185 mpfr_set (x_copy, x, MPFR_RNDD);
186
187 /* we shift to get a number less than 1 */
188 if (ttt > 0)
189 {
190 shift_x = ttt;
191 mpfr_div_2ui (x_copy, x, ttt, MPFR_RNDN);
192 ttt = MPFR_GET_EXP (x_copy);
193 }
194 else
195 shift_x = 0;
196 MPFR_ASSERTD (ttt <= 0);
197
198 /* Init prec and vars */
199 realprec = MPFR_PREC (y) + MPFR_INT_CEIL_LOG2 (prec_x + MPFR_PREC (y));
200 Prec = realprec + shift + 2 + shift_x;
201 mpfr_init2 (t, Prec);
202 mpfr_init2 (tmp, Prec);
203 mpz_init (uk);
204
205 /* Main loop */
206 MPFR_ZIV_INIT (ziv_loop, realprec);
207 for (;;)
208 {
209 int scaled = 0;
210 MPFR_BLOCK_DECL (flags);
211
212 k = MPFR_INT_CEIL_LOG2 (Prec) - MPFR_LOG2_GMP_NUMB_BITS;
213
214 /* now we have to extract */
215 twopoweri = GMP_NUMB_BITS;
216
217 /* Allocate tables */
218 P = (mpz_t*) (*__gmp_allocate_func) (3*(k+2)*sizeof(mpz_t));
219 for (i = 0; i < 3*(k+2); i++)
220 mpz_init (P[i]);
221 mult = (mpfr_prec_t*) (*__gmp_allocate_func) (2*(k+2)*sizeof(mpfr_prec_t));
222
223 /* Particular case for i==0 */
224 mpfr_extract (uk, x_copy, 0);
225 MPFR_ASSERTD (mpz_cmp_ui (uk, 0) != 0);
226 mpfr_exp_rational (tmp, uk, shift + twopoweri - ttt, k + 1, P, mult);
227 for (loop = 0; loop < shift; loop++)
228 mpfr_sqr (tmp, tmp, MPFR_RNDD);
229 twopoweri *= 2;
230
231 /* General case */
232 iter = (k <= prec_x) ? k : prec_x;
233 for (i = 1; i <= iter; i++)
234 {
235 mpfr_extract (uk, x_copy, i);
236 if (MPFR_LIKELY (mpz_cmp_ui (uk, 0) != 0))
237 {
238 mpfr_exp_rational (t, uk, twopoweri - ttt, k - i + 1, P, mult);
239 mpfr_mul (tmp, tmp, t, MPFR_RNDD);
240 }
241 MPFR_ASSERTN (twopoweri <= LONG_MAX/2);
242 twopoweri *=2;
243 }
244
245 /* Clear tables */
246 for (i = 0; i < 3*(k+2); i++)
247 mpz_clear (P[i]);
248 (*__gmp_free_func) (P, 3*(k+2)*sizeof(mpz_t));
249 (*__gmp_free_func) (mult, 2*(k+2)*sizeof(mpfr_prec_t));
250
251 if (shift_x > 0)
252 {
253 MPFR_BLOCK (flags, {
254 for (loop = 0; loop < shift_x - 1; loop++)
255 mpfr_sqr (tmp, tmp, MPFR_RNDD);
256 mpfr_sqr (t, tmp, MPFR_RNDD);
257 } );
258
259 if (MPFR_UNLIKELY (MPFR_OVERFLOW (flags)))
260 {
261 /* tmp <= exact result, so that it is a real overflow. */
262 inexact = mpfr_overflow (y, rnd_mode, 1);
263 MPFR_SAVE_EXPO_UPDATE_FLAGS (expo, MPFR_FLAGS_OVERFLOW);
264 break;
265 }
266
267 if (MPFR_UNLIKELY (MPFR_UNDERFLOW (flags)))
268 {
269 /* This may be a spurious underflow. So, let's scale
270 the result. */
271 mpfr_mul_2ui (tmp, tmp, 1, MPFR_RNDD); /* no overflow, exact */
272 mpfr_sqr (t, tmp, MPFR_RNDD);
273 if (MPFR_IS_ZERO (t))
274 {
275 /* approximate result < 2^(emin - 3), thus
276 exact result < 2^(emin - 2). */
277 inexact = mpfr_underflow (y, (rnd_mode == MPFR_RNDN) ?
278 MPFR_RNDZ : rnd_mode, 1);
279 MPFR_SAVE_EXPO_UPDATE_FLAGS (expo, MPFR_FLAGS_UNDERFLOW);
280 break;
281 }
282 scaled = 1;
283 }
284 }
285
286 if (mpfr_can_round (shift_x > 0 ? t : tmp, realprec, MPFR_RNDD, MPFR_RNDZ,
287 MPFR_PREC(y) + (rnd_mode == MPFR_RNDN)))
288 {
289 inexact = mpfr_set (y, shift_x > 0 ? t : tmp, rnd_mode);
290 if (MPFR_UNLIKELY (scaled && MPFR_IS_PURE_FP (y)))
291 {
292 int inex2;
293 mpfr_exp_t ey;
294
295 /* The result has been scaled and needs to be corrected. */
296 ey = MPFR_GET_EXP (y);
297 inex2 = mpfr_mul_2si (y, y, -2, rnd_mode);
298 if (inex2) /* underflow */
299 {
300 if (rnd_mode == MPFR_RNDN && inexact < 0 &&
301 MPFR_IS_ZERO (y) && ey == __gmpfr_emin + 1)
302 {
303 /* Double rounding case: in MPFR_RNDN, the scaled
304 result has been rounded downward to 2^emin.
305 As the exact result is > 2^(emin - 2), correct
306 rounding must be done upward. */
307 /* TODO: make sure in coverage tests that this line
308 is reached. */
309 inexact = mpfr_underflow (y, MPFR_RNDU, 1);
310 }
311 else
312 {
313 /* No double rounding. */
314 inexact = inex2;
315 }
316 MPFR_SAVE_EXPO_UPDATE_FLAGS (expo, MPFR_FLAGS_UNDERFLOW);
317 }
318 }
319 break;
320 }
321
322 MPFR_ZIV_NEXT (ziv_loop, realprec);
323 Prec = realprec + shift + 2 + shift_x;
324 mpfr_set_prec (t, Prec);
325 mpfr_set_prec (tmp, Prec);
326 }
327 MPFR_ZIV_FREE (ziv_loop);
328
329 mpz_clear (uk);
330 mpfr_clear (tmp);
331 mpfr_clear (t);
332 mpfr_clear (x_copy);
333 MPFR_SAVE_EXPO_FREE (expo);
334 return inexact;
335 }
336