1 /*
2  * this function computes log, correctly rounded,
3  * using double-extended arithmetic
4  *
5  * Copyright (C) 2004-2011 David Defour, Catherine Daramy-Loirat,
6  * Florent de Dinechin, Matthieu Gallet, Nicolas Gast, Christoph Quirin Lauter,
7  * and Jean-Michel Muller
8  *
9  * Author: Florent de Dinechin
10  * Florent.de.Dinechin at ens-lyon.fr
11  *
12  * This file is part of crlibm, the correctly rounded mathematical library,
13  * which has been developed by the Arénaire project at École normale supérieure
14  * de Lyon.
15  *
16  * This library is free software; you can redistribute it and/or
17  * modify it under the terms of the GNU Lesser General Public
18  * License as published by the Free Software Foundation; either
19  * version 2.1 of the License, or (at your option) any later version.
20  *
21  * This library is distributed in the hope that it will be useful,
22  * but WITHOUT ANY WARRANTY; without even the implied warranty of
23  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
24  * Lesser General Public License for more details.
25  *
26  * You should have received a copy of the GNU Lesser General Public
27  * License along with this library; if not, write to the Free Software
28  * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
29 
30 
31  THIS IS EXPERIMENTAL SOFTWARE
32 
33 In particular it changes rounding modes all the time without warning
34 nor restoring.
35 
36 
37 This function compiles both on IA32 and IA64 architectures. On IA64,
38 it needs icc 8.1 or higher, with the following flags (which should be
39 set up by the autoconf).
40 
41 icc -DHAVE_CONFIG_H  -Qoption,cpp,--extended_float_types \
42                     -IPF_fp_speculationsafe -c log-de.c;\
43  mv log-de.o log-td.o; make
44 
45 
46 */
47 
48 
49 #include <stdio.h>
50 #include <stdlib.h>
51 #include "crlibm.h"
52 #include "crlibm_private.h"
53 #include "double-extended.h"
54 #include "log-de.h"
55 
56 
log_accurate(double_ext * prh,double_ext * prl,double_ext z,int E,int index)57 static void log_accurate(double_ext* prh, double_ext* prl, double_ext z, int E, int index) {
58 
59 double_ext  eh,el,  t13, t12, t11, t10, t9, t8,
60   p7h,p7l, t7h,t7l, t6h,t6l, t5h,t5l, t4h,t4l,
61   t3h,t3l, t2h,t2l, t1h,t1l, t0h,t0l;
62 /* Many temporary because single assignment form is nicer for Gappa */
63 
64 #if !(defined(CRLIBM_TYPECPU_X86) || defined(CRLIBM_TYPECPU_AMD64))
65   double_ext c1h,c2h,c3h,c4h,c5h,c6h,c7h,c8h,c9h,c10h,c11h,c12h,c13h,c14h,c15h;
66   double_ext c1l,c2l,c3l,c4l,c5l,c6l,c7l,c8l;
67 #endif
68 
69 
70 #if EVAL_PERF
71   crlibm_second_step_taken++;
72 #endif
73 
74   /* TODO check the conditions for the double-double ops */
75 
76 
77   PREFETCH_POLY_ACCURATE;
78   t13 = c13h + z*c14h;
79   t12 = c12h + z*t13;
80   t11 = c11h + z*t12;
81   t10 = c10h + z*t11;
82   t9 = c9h  + z*t10;
83   t8 = c8h  + z*t9;
84 #if 1 /* This is faster on PIII. Your mileage may vary */
85   Mul12_ext(&p7h, &p7l,   z, t8);
86   Add22_ext(&t7h, &t7l,   p7h,p7l, c7h,c7l);
87 #else
88   FMA22_ext(&t7h, &t7l,   z,0,   t8,0,    c7h,c7l);
89 #endif
90   FMA22_ext(&t6h, &t6l,   z,0,   t7h,t7l,    c6h,c6l);
91   FMA22_ext(&t5h, &t5l,   z,0,   t6h,t6l,    c5h,c5l);
92   FMA22_ext(&t4h, &t4l,   z,0,   t5h,t5l,    c4h,c4l);
93   FMA22_ext(&t3h, &t3l,   z,0,   t4h,t4l,    c3h,c3l);
94   FMA22_ext(&t2h, &t2l,   z,0,   t3h,t3l,    c2h,c2l);
95   FMA22_ext(&t1h, &t1l,   z,0,   t2h,t2l,    c1h,c1l);
96   FMA22_ext(&t0h, &t0l,   z,0,   t1h,t1l,    argredtable[index].logirh, argredtable[index].logirl);
97 
98   Mul22_ext(&eh, &el,   log2H,log2L, E, 0);
99   Add22_ext(prh, prl,   eh,el,  t0h,t0l);
100 }
101 
102 
103 
104 
105 
106 
107 
log_rn(double x)108 double log_rn(double x) {
109   double_ext logirh, r, y, z, th, tl, logde;
110 #if defined(CRLIBM_TYPECPU_X86) || defined(CRLIBM_TYPECPU_AMD64)
111   db_number xdb;
112   int E, index, index0, roundtestmask;
113 #else /* assuming Itanium here */
114   int64_t  E, i;
115   uint64_t index, roundtestmask;
116   double c2,c3,c4,c5,c6,c7;
117 #endif
118 
119 
120 #if defined(CRLIBM_TYPECPU_X86) || defined(CRLIBM_TYPECPU_AMD64)
121    xdb.d=x;
122 
123    index0 = (xdb.i[HI] & 0x000fffff);
124    index = (index0 + (1<<(20-L-1))) >> (20-L);
125    E = (xdb.i[HI]>>20)-1023;             /* extract the exponent */
126 
127    /* Filter cases */
128    if (xdb.i[HI] < 0x00100000){        /* x < 2^(-1022)    */
129      if (((xdb.i[HI] & 0x7fffffff)|xdb.i[LO])==0)    return -1.0/0.0;  /* log(+/-0) = -Inf */
130      if (xdb.i[HI] < 0)                              return (x-x)/0;   /* log(-x) = Nan    */
131      /* Else subnormal number */
132      xdb.d *= two64; 	  /* make x a normal number    */
133      E = -64 + (xdb.i[HI]>>20)-1023;             /* extract the exponent */
134      index0 = (xdb.i[HI] & 0x000fffff);
135      index = (index0 + (1<<(20-L-1))) >> (20-L);
136    }
137    if (xdb.i[HI] >= 0x7ff00000)                      return  x+x;      /* Inf or Nan       */
138 
139    DOUBLE_EXTENDED_MODE;  /* This one should be overlapped with following integer computation */
140 
141    /* Extract exponent and mantissa */
142    xdb.i[HI] =  index0 | 0x3ff00000;	/* do exponent = 0 */
143    /* reduce  such that sqrt(2)/2 < xdb.d < sqrt(2) */
144    if (index >= MAXINDEX){ /* corresponds to y>sqrt(2)*/
145      xdb.i[HI] -= 0x00100000;
146      index = index & INDEXMASK;
147      E++;
148 }
149    y = xdb.d;
150 
151 #else /* defined(CRLIBM_TYPECPU_X86) || defined(CRLIBM_TYPECPU_AMD64) */
152   /*  Here come the code specific to Itanium processor */
153    E=0;
154    PREFETCH_POLY_QUICK; /* defined in log-de.h */
155    y=x;
156    i =  _Asm_getf(2/*_FR_D*/, y);  /* Cast y to a 64-bit integer */
157 
158    /* Filter special cases */
159    if (i<(int64_t)ULL(0010000000000000)){   /* equivalent to : x < 2^(-1022)    */
160      if ((i & ULL(7fffffffffffffff))==0)  return -1.0/0.0;    /* log(+/-0) = -Inf */
161      if (i<0)                             return (x-x)/0;     /* log(-x) = Nan    */
162      /* Else subnormal number */
163      y *= two64; 	  /* make x a normal number    */
164      E = -64;
165      i =  _Asm_getf(2/*_FR_D*/, y); /* and update i */
166    }
167    if (i >= ULL(7ff0000000000000))        return  x+x;	      /* Inf or Nan       */
168 
169    /* Extract exponent and mantissa */
170    E += (i>>52)-1023;
171    i = i & ULL(000fffffffffffff);  /* keep only mantissa */
172    index = (i + (ULL(1)<<(52-L-1))) >> (52-L);
173    /* reduce  such that sqrt(2)/2 < xdb.d < sqrt(2) */
174    if (index >= MAXINDEX){    /* corresponds to y>sqrt(2)*/
175      y = _Asm_setf(2/*_FR_D*/, (i | ULL(3ff0000000000000)) - ULL(0010000000000000) ); /* exponent = -1 */
176      index = index & INDEXMASK;
177      E++;
178    }
179    else
180      y = _Asm_setf(2/*_FR_D*/, i | ULL(3ff0000000000000) ); /* exponent = 0*/
181 #endif  /* defined(CRLIBM_TYPECPU_X86) || defined(CRLIBM_TYPECPU_AMD64) */
182 
183 
184 
185    /* All the previous argument reduction was exact */
186    /* now y holds 1+f, and E is the exponent */
187 
188      r = (double_ext) (argredtable[index].r); /* approx to 1/y.d */
189      logirh = argredtable[index].logirh;
190      z = y*r - 1. ; /* even without an FMA, all exact */
191 
192 #if 0
193    if(E==0)
194      roundtestmask=ACCURATE_TO_61_BITS;
195     else
196       roundtestmask=ACCURATE_TO_61_BITS;
197 #else
198       roundtestmask=ACCURATE_TO_62_BITS;
199 #endif
200 
201 #ifdef ESTRIN
202   /* Estrin polynomial evaluation  */
203   double_ext z2,z4, p01, p23, p45, p67, p03, p47,p07;
204 
205   z2  = z*z;      p67 = c6 + z*c7;     p45 = c4 + z*c5;   p23 = c2 + z*c3;    p01 = logirh + z;
206   z4  = z2*z2;    p47 = p45 + z2*p67;  p03 = p01 + z2*p23;
207   p07 = p03 + z4*p47;
208   logde = p07 + E*log2H;
209 #endif
210 
211 #ifdef PATERSON
212   double_ext z4,z2,t0,t1,t2,t3,t4,t5,t6,t7,t8;
213 
214   z2 = z * z;        t1 = z + ps_alpha;   t2 = z + ps_beta;  t3 = c3 * z + c2;  t4 = z + logirh;
215   z4 = z2 * z2;      t5 = z2 + ps_c;      t6 = t3 * z2 + t4;
216   t7 = t5 * t1 + t2; t0 = z4 * c7;        t8 = t7 * t0 + t6;
217   logde = t8 + E*log2H;
218 #endif
219 
220 #if 0 /* to time the first step only */
221    BACK_TO_DOUBLE_MODE; return (double)t;
222 #endif
223 
224 
225    /* To test the second step only, comment out the following line */
226    DE_TEST_AND_RETURN_RN(logde, roundtestmask);
227 
228 
229    log_accurate(&th, &tl, z, E, index);
230 
231    BACK_TO_DOUBLE_MODE;
232 
233    return (double) (th+tl); /* The exact sum of these double-extended is rounded to the nearest */
234 }
235 
236 
237 
238 
239 
240 
241 
242 
243 
244 
log_rd(double x)245 double log_rd(double x) {
246   double_ext logirh, r, y, z, th, tl, logde;
247 #if defined(CRLIBM_TYPECPU_X86) || defined(CRLIBM_TYPECPU_AMD64)
248   db_number xdb;
249   int E, index, roundtestmask;
250 #else
251   int64_t  E, i;
252   uint64_t index, roundtestmask;
253   double_ext c1,c2,c3,c4,c5,c6,c7;
254 #endif
255 
256    E=0;
257 
258 #if defined(CRLIBM_TYPECPU_X86) || defined(CRLIBM_TYPECPU_AMD64)
259    xdb.d=x;
260 
261    /* Filter cases */
262    if (xdb.i[HI] < 0x00100000){        /* x < 2^(-1022)    */
263      if (((xdb.i[HI] & 0x7fffffff)|xdb.i[LO])==0)    return -1.0/0.0;  /* log(+/-0) = -Inf */
264      if (xdb.i[HI] < 0)                              return (x-x)/0;   /* log(-x) = Nan    */
265      /* Else subnormal number */
266      E = -64;
267      xdb.d *= two64; 	  /* make x a normal number    */
268    }
269    if (xdb.i[HI] >= 0x7ff00000)                      return  x+x;      /* Inf or Nan       */
270 
271    DOUBLE_EXTENDED_MODE;  /* This one should be overlapped with following integer computation */
272 
273    /* Extract exponent and mantissa */
274    E += (xdb.i[HI]>>20)-1023;             /* extract the exponent */
275    index = (xdb.i[HI] & 0x000fffff);
276    xdb.i[HI] =  index | 0x3ff00000;	/* do exponent = 0 */
277    index = (index + (1<<(20-L-1))) >> (20-L);
278    /* reduce  such that sqrt(2)/2 < xdb.d < sqrt(2) */
279    if (index >= MAXINDEX){ /* corresponds to y>sqrt(2)*/
280      xdb.i[HI] -= 0x00100000;
281      E++;
282    }
283    y = xdb.d;
284 
285 #else /* defined(CRLIBM_TYPECPU_X86) || defined(CRLIBM_TYPECPU_AMD64) */
286   /*  Here come the code specific to Itanium processor */
287    PREFETCH_POLY_QUICK; /* defined in log-de.h */
288    y=x;
289    i =  _Asm_getf(2/*_FR_D*/, y);  /* Cast y to a 64-bit integer */
290 
291    /* Filter special cases */
292    if (i<(int64_t)ULL(0010000000000000)){   /* equivalent to : x < 2^(-1022)    */
293      if ((i & ULL(7fffffffffffffff))==0)  return -1.0/0.0;    /* log(+/-0) = -Inf */
294      if (i<0)                             return (x-x)/0;     /* log(-x) = Nan    */
295      /* Else subnormal number */
296      y *= two64; 	  /* make x a normal number    */
297      E = -64;
298      i =  _Asm_getf(2/*_FR_D*/, y); /* and update i */
299    }
300    if (i >= ULL(7ff0000000000000))        return  x+x;	      /* Inf or Nan       */
301 
302    /* Extract exponent and mantissa */
303    E += (i>>52)-1023;
304    i = i & ULL(000fffffffffffff);  /* keep only mantissa */
305    index = (i + (ULL(1)<<(52-L-1))) >> (52-L);
306    /* reduce  such that sqrt(2)/2 < xdb.d < sqrt(2) */
307    if (index >= MAXINDEX){    /* corresponds to y>sqrt(2)*/
308      y = _Asm_setf(2/*_FR_D*/, (i | ULL(3ff0000000000000)) - ULL(0010000000000000) ); /* exponent = -1 */
309      E++;
310    }
311    else
312      y = _Asm_setf(2/*_FR_D*/, i | ULL(3ff0000000000000) ); /* exponent = 0*/
313 #endif  /* defined(CRLIBM_TYPECPU_X86) || defined(CRLIBM_TYPECPU_AMD64) */
314 
315    /* All the previous argument reduction was exact */
316    /* now y holds 1+f, and E is the exponent */
317    index = index & INDEXMASK;
318 
319    logirh = argredtable[index].logirh;
320    r = (double_ext) (argredtable[index].r); /* approx to 1/y.d */
321    z = y*r - 1. ; /* even without an FMA, all exact */
322 
323    if(E==0)
324      roundtestmask=ACCURATE_TO_61_BITS;
325    else
326      roundtestmask=ACCURATE_TO_62_BITS;
327 
328 #ifdef ESTRIN
329   /* Estrin polynomial evaluation  */
330   double_ext z2,z4, p01, p23, p45, p67, p03, p47,p07;
331 
332   z2  = z*z;              p67 = c6 + z*c7;       p45 = c4 + z*c5;      p23 = c2 + z*c3;    p01 = logirh + z;
333   z4  = z2*z2;            p47 = p45 + z2*p67;    p03 = p01 + z2*p23;
334   p07 = p03 + z4*p47;
335   logde = p07 + E*log2H;
336 #endif
337 
338 #ifdef PATERSON
339   double_ext z4,z2,t0,t1,t2,t3,t4,t5,t6,t7,t8;
340 
341   z2 = z * z;        t1 = z + ps_alpha;        t2 = z + ps_beta;        t3 = c3 * z + c2;        t4 = z + logirh;
342   z4 = z2 * z2;      t5 = z2 + ps_c;           t6 = t3 * z2 + t4;
343 
344   t7 = t5 * t1 + t2; t0 = z4 * c7;             t8 = t7 * t0 + t6;
345 
346   logde = t8 + E*log2H;
347 #endif
348 
349 #if 0 /* to time the first step only */
350    BACK_TO_DOUBLE_MODE; return (double)t;
351 #endif
352 
353 
354    /* To test the second step only, comment out the following line */
355    DE_TEST_AND_RETURN_RD(logde, roundtestmask);
356 
357    log_accurate(&th, &tl, z, E, index);
358 
359    RETURN_SUM_ROUNDED_DOWN(th, tl);
360 
361 }
362 
363 
364 
365 
366 
367 
368 
369 
370 
371 
log_ru(double x)372 double log_ru(double x) {
373   double_ext logirh, r, y, z,  th, tl,  logde;
374 #if defined(CRLIBM_TYPECPU_X86) || defined(CRLIBM_TYPECPU_AMD64)
375   db_number xdb;
376   int E, index, roundtestmask;
377 #else
378   int64_t  E, i;
379   uint64_t index, roundtestmask;
380   double_ext c1,c2,c3,c4,c5,c6,c7;
381 #endif
382 
383    E=0;
384 
385 #if defined(CRLIBM_TYPECPU_X86) || defined(CRLIBM_TYPECPU_AMD64)
386    xdb.d=x;
387 
388    /* Filter cases */
389    if (xdb.i[HI] < 0x00100000){        /* x < 2^(-1022)    */
390      if (((xdb.i[HI] & 0x7fffffff)|xdb.i[LO])==0)    return -1.0/0.0;  /* log(+/-0) = -Inf */
391      if (xdb.i[HI] < 0)                              return (x-x)/0;   /* log(-x) = Nan    */
392      /* Else subnormal number */
393      E = -64;
394      xdb.d *= two64; 	  /* make x a normal number    */
395    }
396    if (xdb.i[HI] >= 0x7ff00000)                      return  x+x;      /* Inf or Nan       */
397 
398    DOUBLE_EXTENDED_MODE;  /* This one should be overlapped with following integer computation */
399 
400    /* Extract exponent and mantissa */
401    E += (xdb.i[HI]>>20)-1023;             /* extract the exponent */
402    index = (xdb.i[HI] & 0x000fffff);
403    xdb.i[HI] =  index | 0x3ff00000;	/* do exponent = 0 */
404    index = (index + (1<<(20-L-1))) >> (20-L);
405 
406    /* reduce  such that sqrt(2)/2 < xdb.d < sqrt(2) */
407    if (index >= MAXINDEX){ /* corresponds to y>sqrt(2)*/
408      index = index & INDEXMASK;
409      xdb.i[HI] -= 0x00100000;
410      E++;
411    }
412    y = xdb.d;
413 
414 #else /* defined(CRLIBM_TYPECPU_X86) || defined(CRLIBM_TYPECPU_AMD64) */
415   /*  Here come the code specific to Itanium processor */
416    PREFETCH_POLY_QUICK; /* defined in log-de.h */
417    y=x;
418    i =  _Asm_getf(2/*_FR_D*/, y);  /* Cast y to a 64-bit integer */
419 
420    /* Filter special cases */
421    if (i<(int64_t)ULL(0010000000000000)){   /* equivalent to : x < 2^(-1022)    */
422      if ((i & ULL(7fffffffffffffff))==0)  return -1.0/0.0;    /* log(+/-0) = -Inf */
423      if (i<0)                             return (x-x)/0;     /* log(-x) = Nan    */
424      /* Else subnormal number */
425      y *= two64; 	  /* make x a normal number    */
426      E = -64;
427      i =  _Asm_getf(2/*_FR_D*/, y); /* and update i */
428    }
429    if (i >= ULL(7ff0000000000000))        return  x+x;	      /* Inf or Nan       */
430 
431    /* Extract exponent and mantissa */
432    E += (i>>52)-1023;
433    i = i & ULL(000fffffffffffff);  /* keep only mantissa */
434    index = (i + (ULL(1)<<(52-L-1))) >> (52-L);
435    /* reduce  such that sqrt(2)/2 < xdb.d < sqrt(2) */
436    if (index >= MAXINDEX){    /* corresponds to y>sqrt(2)*/
437      y = _Asm_setf(2/*_FR_D*/, (i | ULL(3ff0000000000000)) - ULL(0010000000000000) ); /* exponent = -1 */
438      index = index & INDEXMASK;
439      E++;
440    }
441    else
442      y = _Asm_setf(2/*_FR_D*/, i | ULL(3ff0000000000000) ); /* exponent = 0*/
443 #endif  /* defined(CRLIBM_TYPECPU_X86) || defined(CRLIBM_TYPECPU_AMD64) */
444 
445    /* All the previous argument reduction was exact */
446    /* now y holds 1+f, and E is the exponent */
447 
448    logirh = argredtable[index].logirh;
449    r = (double_ext) (argredtable[index].r); /* approx to 1/y.d */
450    z = y*r - 1. ; /* even without an FMA, all exact */
451 
452    if(E==0)
453      roundtestmask=ACCURATE_TO_61_BITS;
454    else
455      roundtestmask=ACCURATE_TO_62_BITS;
456 
457 #ifdef ESTRIN
458   /* Estrin polynomial evaluation  */
459   double_ext z2,z4, p01, p23, p45, p67, p03, p47,p07;
460 
461   z2  = z*z;              p67 = c6 + z*c7;       p45 = c4 + z*c5;      p23 = c2 + z*c3;    p01 = logirh + z;
462   z4  = z2*z2;            p47 = p45 + z2*p67;    p03 = p01 + z2*p23;
463   p07 = p03 + z4*p47;
464   logde = p07 + E*log2H;
465 #endif
466 
467 #ifdef PATERSON
468   double_ext z4,z2,t0,t1,t2,t3,t4,t5,t6,t7,t8;
469 
470   z2 = z * z;        t1 = z + ps_alpha;        t2 = z + ps_beta;        t3 = c3 * z + c2;        t4 = z + logirh;
471   z4 = z2 * z2;      t5 = z2 + ps_c;           t6 = t3 * z2 + t4;
472 
473   t7 = t5 * t1 + t2; t0 = z4 * c7;             t8 = t7 * t0 + t6;
474 
475   logde = t8 + E*log2H;
476 #endif
477 
478 
479 #if 0 /* to time the first step only */
480    BACK_TO_DOUBLE_MODE; return (double)t;
481 #endif
482 
483 
484    /* To test the second step only, comment out the following line */
485    DE_TEST_AND_RETURN_RU(logde, roundtestmask);
486 
487    log_accurate(&th, &tl, z, E, index);
488 
489    RETURN_SUM_ROUNDED_UP(th, tl);
490 
491 }
492 
493 
log_rz(double x)494 double log_rz(double x) {
495   if (x>1.0)
496     return log_rd(x);
497   else
498     return log_ru(x);
499 }
500 
501 
502