xref: /dragonfly/contrib/mpfr/src/exp_2.c (revision 6e278935)
1 /* mpfr_exp_2 -- exponential of a floating-point number
2                  using algorithms in O(n^(1/2)*M(n)) and O(n^(1/3)*M(n))
3 
4 Copyright 1999, 2000, 2001, 2002, 2003, 2004, 2005, 2006, 2007, 2008, 2009, 2010, 2011 Free Software Foundation, Inc.
5 Contributed by the Arenaire and Caramel projects, INRIA.
6 
7 This file is part of the GNU MPFR Library.
8 
9 The GNU MPFR Library is free software; you can redistribute it and/or modify
10 it under the terms of the GNU Lesser General Public License as published by
11 the Free Software Foundation; either version 3 of the License, or (at your
12 option) any later version.
13 
14 The GNU MPFR Library is distributed in the hope that it will be useful, but
15 WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
16 or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public
17 License for more details.
18 
19 You should have received a copy of the GNU Lesser General Public License
20 along with the GNU MPFR Library; see the file COPYING.LESSER.  If not, see
21 http://www.gnu.org/licenses/ or write to the Free Software Foundation, Inc.,
22 51 Franklin St, Fifth Floor, Boston, MA 02110-1301, USA. */
23 
24 /* #define DEBUG */
25 #define MPFR_NEED_LONGLONG_H /* for count_leading_zeros */
26 #include "mpfr-impl.h"
27 
28 static unsigned long
29 mpfr_exp2_aux (mpz_t, mpfr_srcptr, mpfr_prec_t, mpfr_exp_t *);
30 static unsigned long
31 mpfr_exp2_aux2 (mpz_t, mpfr_srcptr, mpfr_prec_t, mpfr_exp_t *);
32 static mpfr_exp_t
33 mpz_normalize  (mpz_t, mpz_t, mpfr_exp_t);
34 static mpfr_exp_t
35 mpz_normalize2 (mpz_t, mpz_t, mpfr_exp_t, mpfr_exp_t);
36 
37 /* if k = the number of bits of z > q, divides z by 2^(k-q) and returns k-q.
38    Otherwise do nothing and return 0.
39  */
40 static mpfr_exp_t
41 mpz_normalize (mpz_t rop, mpz_t z, mpfr_exp_t q)
42 {
43   size_t k;
44 
45   MPFR_MPZ_SIZEINBASE2 (k, z);
46   MPFR_ASSERTD (k == (mpfr_uexp_t) k);
47   if (q < 0 || (mpfr_uexp_t) k > (mpfr_uexp_t) q)
48     {
49       mpz_fdiv_q_2exp (rop, z, (unsigned long) ((mpfr_uexp_t) k - q));
50       return (mpfr_exp_t) k - q;
51     }
52   if (MPFR_UNLIKELY(rop != z))
53     mpz_set (rop, z);
54   return 0;
55 }
56 
57 /* if expz > target, shift z by (expz-target) bits to the left.
58    if expz < target, shift z by (target-expz) bits to the right.
59    Returns target.
60 */
61 static mpfr_exp_t
62 mpz_normalize2 (mpz_t rop, mpz_t z, mpfr_exp_t expz, mpfr_exp_t target)
63 {
64   if (target > expz)
65     mpz_fdiv_q_2exp (rop, z, target - expz);
66   else
67     mpz_mul_2exp (rop, z, expz - target);
68   return target;
69 }
70 
71 /* use Brent's formula exp(x) = (1+r+r^2/2!+r^3/3!+...)^(2^K)*2^n
72    where x = n*log(2)+(2^K)*r
73    together with the Paterson-Stockmeyer O(t^(1/2)) algorithm for the
74    evaluation of power series. The resulting complexity is O(n^(1/3)*M(n)).
75    This function returns with the exact flags due to exp.
76 */
77 int
78 mpfr_exp_2 (mpfr_ptr y, mpfr_srcptr x, mpfr_rnd_t rnd_mode)
79 {
80   long n;
81   unsigned long K, k, l, err; /* FIXME: Which type ? */
82   int error_r;
83   mpfr_exp_t exps, expx;
84   mpfr_prec_t q, precy;
85   int inexact;
86   mpfr_t r, s;
87   mpz_t ss;
88   MPFR_ZIV_DECL (loop);
89 
90   MPFR_LOG_FUNC
91     (("x[%Pu]=%.*Rg rnd=%d", mpfr_get_prec(x), mpfr_log_prec, x, rnd_mode),
92      ("y[%Pu]=%.*Rg inexact=%d", mpfr_get_prec(y), mpfr_log_prec, y,
93       inexact));
94 
95   expx = MPFR_GET_EXP (x);
96   precy = MPFR_PREC(y);
97 
98   /* Warning: we cannot use the 'double' type here, since on 64-bit machines
99      x may be as large as 2^62*log(2) without overflow, and then x/log(2)
100      is about 2^62: not every integer of that size can be represented as a
101      'double', thus the argument reduction would fail. */
102   if (expx <= -2)
103     /* |x| <= 0.25, thus n = round(x/log(2)) = 0 */
104     n = 0;
105   else
106     {
107       mpfr_init2 (r, sizeof (long) * CHAR_BIT);
108       mpfr_const_log2 (r, MPFR_RNDZ);
109       mpfr_div (r, x, r, MPFR_RNDN);
110       n = mpfr_get_si (r, MPFR_RNDN);
111       mpfr_clear (r);
112     }
113   /* we have |x| <= (|n|+1)*log(2) */
114   MPFR_LOG_MSG (("d(x)=%1.30e n=%ld\n", mpfr_get_d1(x), n));
115 
116   /* error_r bounds the cancelled bits in x - n*log(2) */
117   if (MPFR_UNLIKELY (n == 0))
118     error_r = 0;
119   else
120     {
121       count_leading_zeros (error_r, (mp_limb_t) SAFE_ABS (unsigned long, n) + 1);
122       error_r = GMP_NUMB_BITS - error_r;
123       /* we have |x| <= 2^error_r * log(2) */
124     }
125 
126   /* for the O(n^(1/2)*M(n)) method, the Taylor series computation of
127      n/K terms costs about n/(2K) multiplications when computed in fixed
128      point */
129   K = (precy < MPFR_EXP_2_THRESHOLD) ? __gmpfr_isqrt ((precy + 1) / 2)
130     : __gmpfr_cuberoot (4*precy);
131   l = (precy - 1) / K + 1;
132   err = K + MPFR_INT_CEIL_LOG2 (2 * l + 18);
133   /* add K extra bits, i.e. failure probability <= 1/2^K = O(1/precy) */
134   q = precy + err + K + 8;
135   /* if |x| >> 1, take into account the cancelled bits */
136   if (expx > 0)
137     q += expx;
138 
139   /* Note: due to the mpfr_prec_round below, it is not possible to use
140      the MPFR_GROUP_* macros here. */
141 
142   mpfr_init2 (r, q + error_r);
143   mpfr_init2 (s, q + error_r);
144 
145   /* the algorithm consists in computing an upper bound of exp(x) using
146      a precision of q bits, and see if we can round to MPFR_PREC(y) taking
147      into account the maximal error. Otherwise we increase q. */
148   MPFR_ZIV_INIT (loop, q);
149   for (;;)
150     {
151       MPFR_LOG_MSG (("n=%ld K=%lu l=%lu q=%lu error_r=%d\n",
152                      n, K, l, (unsigned long) q, error_r));
153 
154       /* First reduce the argument to r = x - n * log(2),
155          so that r is small in absolute value. We want an upper
156          bound on r to get an upper bound on exp(x). */
157 
158       /* if n<0, we have to get an upper bound of log(2)
159          in order to get an upper bound of r = x-n*log(2) */
160       mpfr_const_log2 (s, (n >= 0) ? MPFR_RNDZ : MPFR_RNDU);
161       /* s is within 1 ulp(s) of log(2) */
162 
163       mpfr_mul_ui (r, s, (n < 0) ? -n : n, (n >= 0) ? MPFR_RNDZ : MPFR_RNDU);
164       /* r is within 3 ulps of |n|*log(2) */
165       if (n < 0)
166         MPFR_CHANGE_SIGN (r);
167       /* r <= n*log(2), within 3 ulps */
168 
169       MPFR_LOG_VAR (x);
170       MPFR_LOG_VAR (r);
171 
172       mpfr_sub (r, x, r, MPFR_RNDU);
173 
174       if (MPFR_IS_PURE_FP (r))
175         {
176           while (MPFR_IS_NEG (r))
177             { /* initial approximation n was too large */
178               n--;
179               mpfr_add (r, r, s, MPFR_RNDU);
180             }
181 
182           /* since there was a cancellation in x - n*log(2), the low error_r
183              bits from r are zero and thus non significant, thus we can reduce
184              the working precision */
185           if (error_r > 0)
186             mpfr_prec_round (r, q, MPFR_RNDU);
187           /* the error on r is at most 3 ulps (3 ulps if error_r = 0,
188              and 1 + 3/2 if error_r > 0) */
189           MPFR_LOG_VAR (r);
190           MPFR_ASSERTD (MPFR_IS_POS (r));
191           mpfr_div_2ui (r, r, K, MPFR_RNDU); /* r = (x-n*log(2))/2^K, exact */
192 
193           mpz_init (ss);
194           exps = mpfr_get_z_2exp (ss, s);
195           /* s <- 1 + r/1! + r^2/2! + ... + r^l/l! */
196           MPFR_ASSERTD (MPFR_IS_PURE_FP (r) && MPFR_EXP (r) < 0);
197           l = (precy < MPFR_EXP_2_THRESHOLD)
198             ? mpfr_exp2_aux (ss, r, q, &exps)   /* naive method */
199             : mpfr_exp2_aux2 (ss, r, q, &exps); /* Paterson/Stockmeyer meth */
200 
201           MPFR_LOG_MSG (("l=%lu q=%lu (K+l)*q^2=%1.3e\n",
202                          l, (unsigned long) q, (K + l) * (double) q * q));
203 
204           for (k = 0; k < K; k++)
205             {
206               mpz_mul (ss, ss, ss);
207               exps <<= 1;
208               exps += mpz_normalize (ss, ss, q);
209             }
210           mpfr_set_z (s, ss, MPFR_RNDN);
211 
212           MPFR_SET_EXP(s, MPFR_GET_EXP (s) + exps);
213           mpz_clear (ss);
214 
215           /* error is at most 2^K*l, plus 2 to take into account of
216              the error of 3 ulps on r */
217           err = K + MPFR_INT_CEIL_LOG2 (l) + 2;
218 
219           MPFR_LOG_MSG (("before mult. by 2^n:\n", 0));
220           MPFR_LOG_VAR (s);
221           MPFR_LOG_MSG (("err=%lu bits\n", K));
222 
223           if (MPFR_LIKELY (MPFR_CAN_ROUND (s, q - err, precy, rnd_mode)))
224             {
225               mpfr_clear_flags ();
226               inexact = mpfr_mul_2si (y, s, n, rnd_mode);
227               break;
228             }
229         }
230 
231       MPFR_ZIV_NEXT (loop, q);
232       mpfr_set_prec (r, q + error_r);
233       mpfr_set_prec (s, q + error_r);
234     }
235   MPFR_ZIV_FREE (loop);
236 
237   mpfr_clear (r);
238   mpfr_clear (s);
239 
240   return inexact;
241 }
242 
243 /* s <- 1 + r/1! + r^2/2! + ... + r^l/l! while MPFR_EXP(r^l/l!)+MPFR_EXPR(r)>-q
244    using naive method with O(l) multiplications.
245    Return the number of iterations l.
246    The absolute error on s is less than 3*l*(l+1)*2^(-q).
247    Version using fixed-point arithmetic with mpz instead
248    of mpfr for internal computations.
249    NOTE[VL]: the following sentence seems to be obsolete since MY_INIT_MPZ
250    is no longer used (r6919); qn was the number of limbs of q.
251    s must have at least qn+1 limbs (qn should be enough, but currently fails
252    since mpz_mul_2exp(s, s, q-1) reallocates qn+1 limbs)
253 */
254 static unsigned long
255 mpfr_exp2_aux (mpz_t s, mpfr_srcptr r, mpfr_prec_t q, mpfr_exp_t *exps)
256 {
257   unsigned long l;
258   mpfr_exp_t dif, expt, expr;
259   mpz_t t, rr;
260   mp_size_t sbit, tbit;
261 
262   MPFR_ASSERTN (MPFR_IS_PURE_FP (r));
263 
264   expt = 0;
265   *exps = 1 - (mpfr_exp_t) q;                   /* s = 2^(q-1) */
266   mpz_init (t);
267   mpz_init (rr);
268   mpz_set_ui(t, 1);
269   mpz_set_ui(s, 1);
270   mpz_mul_2exp(s, s, q-1);
271   expr = mpfr_get_z_2exp(rr, r);               /* no error here */
272 
273   l = 0;
274   for (;;) {
275     l++;
276     mpz_mul(t, t, rr);
277     expt += expr;
278     MPFR_MPZ_SIZEINBASE2 (sbit, s);
279     MPFR_MPZ_SIZEINBASE2 (tbit, t);
280     dif = *exps + sbit - expt - tbit;
281     /* truncates the bits of t which are < ulp(s) = 2^(1-q) */
282     expt += mpz_normalize(t, t, (mpfr_exp_t) q-dif); /* error at most 2^(1-q) */
283     mpz_fdiv_q_ui (t, t, l);                   /* error at most 2^(1-q) */
284     /* the error wrt t^l/l! is here at most 3*l*ulp(s) */
285     MPFR_ASSERTD (expt == *exps);
286     if (mpz_sgn (t) == 0)
287       break;
288     mpz_add(s, s, t);                      /* no error here: exact */
289     /* ensures rr has the same size as t: after several shifts, the error
290        on rr is still at most ulp(t)=ulp(s) */
291     MPFR_MPZ_SIZEINBASE2 (tbit, t);
292     expr += mpz_normalize(rr, rr, tbit);
293   }
294 
295   mpz_clear (t);
296   mpz_clear (rr);
297 
298   return 3 * l * (l + 1);
299 }
300 
301 /* s <- 1 + r/1! + r^2/2! + ... + r^l/l! while MPFR_EXP(r^l/l!)+MPFR_EXPR(r)>-q
302    using Paterson-Stockmeyer algorithm with O(sqrt(l)) multiplications.
303    Return l.
304    Uses m multiplications of full size and 2l/m of decreasing size,
305    i.e. a total equivalent to about m+l/m full multiplications,
306    i.e. 2*sqrt(l) for m=sqrt(l).
307    NOTE[VL]: The following sentence seems to be obsolete since MY_INIT_MPZ
308    is no longer used (r6919); sizer was the number of limbs of r.
309    Version using mpz. ss must have at least (sizer+1) limbs.
310    The error is bounded by (l^2+4*l) ulps where l is the return value.
311 */
312 static unsigned long
313 mpfr_exp2_aux2 (mpz_t s, mpfr_srcptr r, mpfr_prec_t q, mpfr_exp_t *exps)
314 {
315   mpfr_exp_t expr, *expR, expt;
316   mpfr_prec_t ql;
317   unsigned long l, m, i;
318   mpz_t t, *R, rr, tmp;
319   mp_size_t sbit, rrbit;
320   MPFR_TMP_DECL(marker);
321 
322   /* estimate value of l */
323   MPFR_ASSERTD (MPFR_GET_EXP (r) < 0);
324   l = q / (- MPFR_GET_EXP (r));
325   m = __gmpfr_isqrt (l);
326   /* we access R[2], thus we need m >= 2 */
327   if (m < 2)
328     m = 2;
329 
330   MPFR_TMP_MARK(marker);
331   R = (mpz_t*) MPFR_TMP_ALLOC ((m + 1) * sizeof (mpz_t));     /* R[i] is r^i */
332   expR = (mpfr_exp_t*) MPFR_TMP_ALLOC((m + 1) * sizeof (mpfr_exp_t));
333   /* expR[i] is the exponent for R[i] */
334   mpz_init (tmp);
335   mpz_init (rr);
336   mpz_init (t);
337   mpz_set_ui (s, 0);
338   *exps = 1 - q;                        /* 1 ulp = 2^(1-q) */
339   for (i = 0 ; i <= m ; i++)
340     mpz_init (R[i]);
341   expR[1] = mpfr_get_z_2exp (R[1], r); /* exact operation: no error */
342   expR[1] = mpz_normalize2 (R[1], R[1], expR[1], 1 - q); /* error <= 1 ulp */
343   mpz_mul (t, R[1], R[1]); /* err(t) <= 2 ulps */
344   mpz_fdiv_q_2exp (R[2], t, q - 1); /* err(R[2]) <= 3 ulps */
345   expR[2] = 1 - q;
346   for (i = 3 ; i <= m ; i++)
347     {
348       if ((i & 1) == 1)
349         mpz_mul (t, R[i-1], R[1]); /* err(t) <= 2*i-2 */
350       else
351         mpz_mul (t, R[i/2], R[i/2]);
352       mpz_fdiv_q_2exp (R[i], t, q - 1); /* err(R[i]) <= 2*i-1 ulps */
353       expR[i] = 1 - q;
354     }
355   mpz_set_ui (R[0], 1);
356   mpz_mul_2exp (R[0], R[0], q-1);
357   expR[0] = 1-q; /* R[0]=1 */
358   mpz_set_ui (rr, 1);
359   expr = 0; /* rr contains r^l/l! */
360   /* by induction: err(rr) <= 2*l ulps */
361 
362   l = 0;
363   ql = q; /* precision used for current giant step */
364   do
365     {
366       /* all R[i] must have exponent 1-ql */
367       if (l != 0)
368         for (i = 0 ; i < m ; i++)
369           expR[i] = mpz_normalize2 (R[i], R[i], expR[i], 1 - ql);
370       /* the absolute error on R[i]*rr is still 2*i-1 ulps */
371       expt = mpz_normalize2 (t, R[m-1], expR[m-1], 1 - ql);
372       /* err(t) <= 2*m-1 ulps */
373       /* computes t = 1 + r/(l+1) + ... + r^(m-1)*l!/(l+m-1)!
374          using Horner's scheme */
375       for (i = m-1 ; i-- != 0 ; )
376         {
377           mpz_fdiv_q_ui (t, t, l+i+1); /* err(t) += 1 ulp */
378           mpz_add (t, t, R[i]);
379         }
380       /* now err(t) <= (3m-2) ulps */
381 
382       /* now multiplies t by r^l/l! and adds to s */
383       mpz_mul (t, t, rr);
384       expt += expr;
385       expt = mpz_normalize2 (t, t, expt, *exps);
386       /* err(t) <= (3m-1) + err_rr(l) <= (3m-2) + 2*l */
387       MPFR_ASSERTD (expt == *exps);
388       mpz_add (s, s, t); /* no error here */
389 
390       /* updates rr, the multiplication of the factors l+i could be done
391          using binary splitting too, but it is not sure it would save much */
392       mpz_mul (t, rr, R[m]); /* err(t) <= err(rr) + 2m-1 */
393       expr += expR[m];
394       mpz_set_ui (tmp, 1);
395       for (i = 1 ; i <= m ; i++)
396         mpz_mul_ui (tmp, tmp, l + i);
397       mpz_fdiv_q (t, t, tmp); /* err(t) <= err(rr) + 2m */
398       l += m;
399       if (MPFR_UNLIKELY (mpz_sgn (t) == 0))
400         break;
401       expr += mpz_normalize (rr, t, ql); /* err_rr(l+1) <= err_rr(l) + 2m+1 */
402       if (MPFR_UNLIKELY (mpz_sgn (rr) == 0))
403         rrbit = 1;
404       else
405         MPFR_MPZ_SIZEINBASE2 (rrbit, rr);
406       MPFR_MPZ_SIZEINBASE2 (sbit, s);
407       ql = q - *exps - sbit + expr + rrbit;
408       /* TODO: Wrong cast. I don't want what is right, but this is
409          certainly wrong */
410     }
411   while ((size_t) expr + rrbit > (size_t) -q);
412 
413   for (i = 0 ; i <= m ; i++)
414     mpz_clear (R[i]);
415   MPFR_TMP_FREE(marker);
416   mpz_clear (rr);
417   mpz_clear (t);
418   mpz_clear (tmp);
419 
420   return l * (l + 4);
421 }
422