1 /*
2 * this function computes log, correctly rounded,
3 * using double-extended arithmetic
4 *
5 * Copyright (C) 2004-2011 David Defour, Catherine Daramy-Loirat,
6 * Florent de Dinechin, Matthieu Gallet, Nicolas Gast, Christoph Quirin Lauter,
7 * and Jean-Michel Muller
8 *
9 * Author: Florent de Dinechin
10 * Florent.de.Dinechin at ens-lyon.fr
11 *
12 * This file is part of crlibm, the correctly rounded mathematical library,
13 * which has been developed by the Arénaire project at École normale supérieure
14 * de Lyon.
15 *
16 * This library is free software; you can redistribute it and/or
17 * modify it under the terms of the GNU Lesser General Public
18 * License as published by the Free Software Foundation; either
19 * version 2.1 of the License, or (at your option) any later version.
20 *
21 * This library is distributed in the hope that it will be useful,
22 * but WITHOUT ANY WARRANTY; without even the implied warranty of
23 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
24 * Lesser General Public License for more details.
25 *
26 * You should have received a copy of the GNU Lesser General Public
27 * License along with this library; if not, write to the Free Software
28 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
29
30
31 THIS IS EXPERIMENTAL SOFTWARE
32
33 In particular it changes rounding modes all the time without warning
34 nor restoring.
35
36
37 This function compiles both on IA32 and IA64 architectures. On IA64,
38 it needs icc 8.1 or higher, with the following flags (which should be
39 set up by the autoconf).
40
41 icc -DHAVE_CONFIG_H -Qoption,cpp,--extended_float_types \
42 -IPF_fp_speculationsafe -c log-de.c;\
43 mv log-de.o log-td.o; make
44
45
46 */
47
48
49 #include <stdio.h>
50 #include <stdlib.h>
51 #include "crlibm.h"
52 #include "crlibm_private.h"
53 #include "double-extended.h"
54 #include "log-de.h"
55
56
log_accurate(double_ext * prh,double_ext * prl,double_ext z,int E,int index)57 static void log_accurate(double_ext* prh, double_ext* prl, double_ext z, int E, int index) {
58
59 double_ext eh,el, t13, t12, t11, t10, t9, t8,
60 p7h,p7l, t7h,t7l, t6h,t6l, t5h,t5l, t4h,t4l,
61 t3h,t3l, t2h,t2l, t1h,t1l, t0h,t0l;
62 /* Many temporary because single assignment form is nicer for Gappa */
63
64 #if !(defined(CRLIBM_TYPECPU_X86) || defined(CRLIBM_TYPECPU_AMD64))
65 double_ext c1h,c2h,c3h,c4h,c5h,c6h,c7h,c8h,c9h,c10h,c11h,c12h,c13h,c14h,c15h;
66 double_ext c1l,c2l,c3l,c4l,c5l,c6l,c7l,c8l;
67 #endif
68
69
70 #if EVAL_PERF
71 crlibm_second_step_taken++;
72 #endif
73
74 /* TODO check the conditions for the double-double ops */
75
76
77 PREFETCH_POLY_ACCURATE;
78 t13 = c13h + z*c14h;
79 t12 = c12h + z*t13;
80 t11 = c11h + z*t12;
81 t10 = c10h + z*t11;
82 t9 = c9h + z*t10;
83 t8 = c8h + z*t9;
84 #if 1 /* This is faster on PIII. Your mileage may vary */
85 Mul12_ext(&p7h, &p7l, z, t8);
86 Add22_ext(&t7h, &t7l, p7h,p7l, c7h,c7l);
87 #else
88 FMA22_ext(&t7h, &t7l, z,0, t8,0, c7h,c7l);
89 #endif
90 FMA22_ext(&t6h, &t6l, z,0, t7h,t7l, c6h,c6l);
91 FMA22_ext(&t5h, &t5l, z,0, t6h,t6l, c5h,c5l);
92 FMA22_ext(&t4h, &t4l, z,0, t5h,t5l, c4h,c4l);
93 FMA22_ext(&t3h, &t3l, z,0, t4h,t4l, c3h,c3l);
94 FMA22_ext(&t2h, &t2l, z,0, t3h,t3l, c2h,c2l);
95 FMA22_ext(&t1h, &t1l, z,0, t2h,t2l, c1h,c1l);
96 FMA22_ext(&t0h, &t0l, z,0, t1h,t1l, argredtable[index].logirh, argredtable[index].logirl);
97
98 Mul22_ext(&eh, &el, log2H,log2L, E, 0);
99 Add22_ext(prh, prl, eh,el, t0h,t0l);
100 }
101
102
103
104
105
106
107
log_rn(double x)108 double log_rn(double x) {
109 double_ext logirh, r, y, z, th, tl, logde;
110 #if defined(CRLIBM_TYPECPU_X86) || defined(CRLIBM_TYPECPU_AMD64)
111 db_number xdb;
112 int E, index, index0, roundtestmask;
113 #else /* assuming Itanium here */
114 int64_t E, i;
115 uint64_t index, roundtestmask;
116 double c2,c3,c4,c5,c6,c7;
117 #endif
118
119
120 #if defined(CRLIBM_TYPECPU_X86) || defined(CRLIBM_TYPECPU_AMD64)
121 xdb.d=x;
122
123 index0 = (xdb.i[HI] & 0x000fffff);
124 index = (index0 + (1<<(20-L-1))) >> (20-L);
125 E = (xdb.i[HI]>>20)-1023; /* extract the exponent */
126
127 /* Filter cases */
128 if (xdb.i[HI] < 0x00100000){ /* x < 2^(-1022) */
129 if (((xdb.i[HI] & 0x7fffffff)|xdb.i[LO])==0) return -1.0/0.0; /* log(+/-0) = -Inf */
130 if (xdb.i[HI] < 0) return (x-x)/0; /* log(-x) = Nan */
131 /* Else subnormal number */
132 xdb.d *= two64; /* make x a normal number */
133 E = -64 + (xdb.i[HI]>>20)-1023; /* extract the exponent */
134 index0 = (xdb.i[HI] & 0x000fffff);
135 index = (index0 + (1<<(20-L-1))) >> (20-L);
136 }
137 if (xdb.i[HI] >= 0x7ff00000) return x+x; /* Inf or Nan */
138
139 DOUBLE_EXTENDED_MODE; /* This one should be overlapped with following integer computation */
140
141 /* Extract exponent and mantissa */
142 xdb.i[HI] = index0 | 0x3ff00000; /* do exponent = 0 */
143 /* reduce such that sqrt(2)/2 < xdb.d < sqrt(2) */
144 if (index >= MAXINDEX){ /* corresponds to y>sqrt(2)*/
145 xdb.i[HI] -= 0x00100000;
146 index = index & INDEXMASK;
147 E++;
148 }
149 y = xdb.d;
150
151 #else /* defined(CRLIBM_TYPECPU_X86) || defined(CRLIBM_TYPECPU_AMD64) */
152 /* Here come the code specific to Itanium processor */
153 E=0;
154 PREFETCH_POLY_QUICK; /* defined in log-de.h */
155 y=x;
156 i = _Asm_getf(2/*_FR_D*/, y); /* Cast y to a 64-bit integer */
157
158 /* Filter special cases */
159 if (i<(int64_t)ULL(0010000000000000)){ /* equivalent to : x < 2^(-1022) */
160 if ((i & ULL(7fffffffffffffff))==0) return -1.0/0.0; /* log(+/-0) = -Inf */
161 if (i<0) return (x-x)/0; /* log(-x) = Nan */
162 /* Else subnormal number */
163 y *= two64; /* make x a normal number */
164 E = -64;
165 i = _Asm_getf(2/*_FR_D*/, y); /* and update i */
166 }
167 if (i >= ULL(7ff0000000000000)) return x+x; /* Inf or Nan */
168
169 /* Extract exponent and mantissa */
170 E += (i>>52)-1023;
171 i = i & ULL(000fffffffffffff); /* keep only mantissa */
172 index = (i + (ULL(1)<<(52-L-1))) >> (52-L);
173 /* reduce such that sqrt(2)/2 < xdb.d < sqrt(2) */
174 if (index >= MAXINDEX){ /* corresponds to y>sqrt(2)*/
175 y = _Asm_setf(2/*_FR_D*/, (i | ULL(3ff0000000000000)) - ULL(0010000000000000) ); /* exponent = -1 */
176 index = index & INDEXMASK;
177 E++;
178 }
179 else
180 y = _Asm_setf(2/*_FR_D*/, i | ULL(3ff0000000000000) ); /* exponent = 0*/
181 #endif /* defined(CRLIBM_TYPECPU_X86) || defined(CRLIBM_TYPECPU_AMD64) */
182
183
184
185 /* All the previous argument reduction was exact */
186 /* now y holds 1+f, and E is the exponent */
187
188 r = (double_ext) (argredtable[index].r); /* approx to 1/y.d */
189 logirh = argredtable[index].logirh;
190 z = y*r - 1. ; /* even without an FMA, all exact */
191
192 #if 0
193 if(E==0)
194 roundtestmask=ACCURATE_TO_61_BITS;
195 else
196 roundtestmask=ACCURATE_TO_61_BITS;
197 #else
198 roundtestmask=ACCURATE_TO_62_BITS;
199 #endif
200
201 #ifdef ESTRIN
202 /* Estrin polynomial evaluation */
203 double_ext z2,z4, p01, p23, p45, p67, p03, p47,p07;
204
205 z2 = z*z; p67 = c6 + z*c7; p45 = c4 + z*c5; p23 = c2 + z*c3; p01 = logirh + z;
206 z4 = z2*z2; p47 = p45 + z2*p67; p03 = p01 + z2*p23;
207 p07 = p03 + z4*p47;
208 logde = p07 + E*log2H;
209 #endif
210
211 #ifdef PATERSON
212 double_ext z4,z2,t0,t1,t2,t3,t4,t5,t6,t7,t8;
213
214 z2 = z * z; t1 = z + ps_alpha; t2 = z + ps_beta; t3 = c3 * z + c2; t4 = z + logirh;
215 z4 = z2 * z2; t5 = z2 + ps_c; t6 = t3 * z2 + t4;
216 t7 = t5 * t1 + t2; t0 = z4 * c7; t8 = t7 * t0 + t6;
217 logde = t8 + E*log2H;
218 #endif
219
220 #if 0 /* to time the first step only */
221 BACK_TO_DOUBLE_MODE; return (double)t;
222 #endif
223
224
225 /* To test the second step only, comment out the following line */
226 DE_TEST_AND_RETURN_RN(logde, roundtestmask);
227
228
229 log_accurate(&th, &tl, z, E, index);
230
231 BACK_TO_DOUBLE_MODE;
232
233 return (double) (th+tl); /* The exact sum of these double-extended is rounded to the nearest */
234 }
235
236
237
238
239
240
241
242
243
244
log_rd(double x)245 double log_rd(double x) {
246 double_ext logirh, r, y, z, th, tl, logde;
247 #if defined(CRLIBM_TYPECPU_X86) || defined(CRLIBM_TYPECPU_AMD64)
248 db_number xdb;
249 int E, index, roundtestmask;
250 #else
251 int64_t E, i;
252 uint64_t index, roundtestmask;
253 double_ext c1,c2,c3,c4,c5,c6,c7;
254 #endif
255
256 E=0;
257
258 #if defined(CRLIBM_TYPECPU_X86) || defined(CRLIBM_TYPECPU_AMD64)
259 xdb.d=x;
260
261 /* Filter cases */
262 if (xdb.i[HI] < 0x00100000){ /* x < 2^(-1022) */
263 if (((xdb.i[HI] & 0x7fffffff)|xdb.i[LO])==0) return -1.0/0.0; /* log(+/-0) = -Inf */
264 if (xdb.i[HI] < 0) return (x-x)/0; /* log(-x) = Nan */
265 /* Else subnormal number */
266 E = -64;
267 xdb.d *= two64; /* make x a normal number */
268 }
269 if (xdb.i[HI] >= 0x7ff00000) return x+x; /* Inf or Nan */
270
271 DOUBLE_EXTENDED_MODE; /* This one should be overlapped with following integer computation */
272
273 /* Extract exponent and mantissa */
274 E += (xdb.i[HI]>>20)-1023; /* extract the exponent */
275 index = (xdb.i[HI] & 0x000fffff);
276 xdb.i[HI] = index | 0x3ff00000; /* do exponent = 0 */
277 index = (index + (1<<(20-L-1))) >> (20-L);
278 /* reduce such that sqrt(2)/2 < xdb.d < sqrt(2) */
279 if (index >= MAXINDEX){ /* corresponds to y>sqrt(2)*/
280 xdb.i[HI] -= 0x00100000;
281 E++;
282 }
283 y = xdb.d;
284
285 #else /* defined(CRLIBM_TYPECPU_X86) || defined(CRLIBM_TYPECPU_AMD64) */
286 /* Here come the code specific to Itanium processor */
287 PREFETCH_POLY_QUICK; /* defined in log-de.h */
288 y=x;
289 i = _Asm_getf(2/*_FR_D*/, y); /* Cast y to a 64-bit integer */
290
291 /* Filter special cases */
292 if (i<(int64_t)ULL(0010000000000000)){ /* equivalent to : x < 2^(-1022) */
293 if ((i & ULL(7fffffffffffffff))==0) return -1.0/0.0; /* log(+/-0) = -Inf */
294 if (i<0) return (x-x)/0; /* log(-x) = Nan */
295 /* Else subnormal number */
296 y *= two64; /* make x a normal number */
297 E = -64;
298 i = _Asm_getf(2/*_FR_D*/, y); /* and update i */
299 }
300 if (i >= ULL(7ff0000000000000)) return x+x; /* Inf or Nan */
301
302 /* Extract exponent and mantissa */
303 E += (i>>52)-1023;
304 i = i & ULL(000fffffffffffff); /* keep only mantissa */
305 index = (i + (ULL(1)<<(52-L-1))) >> (52-L);
306 /* reduce such that sqrt(2)/2 < xdb.d < sqrt(2) */
307 if (index >= MAXINDEX){ /* corresponds to y>sqrt(2)*/
308 y = _Asm_setf(2/*_FR_D*/, (i | ULL(3ff0000000000000)) - ULL(0010000000000000) ); /* exponent = -1 */
309 E++;
310 }
311 else
312 y = _Asm_setf(2/*_FR_D*/, i | ULL(3ff0000000000000) ); /* exponent = 0*/
313 #endif /* defined(CRLIBM_TYPECPU_X86) || defined(CRLIBM_TYPECPU_AMD64) */
314
315 /* All the previous argument reduction was exact */
316 /* now y holds 1+f, and E is the exponent */
317 index = index & INDEXMASK;
318
319 logirh = argredtable[index].logirh;
320 r = (double_ext) (argredtable[index].r); /* approx to 1/y.d */
321 z = y*r - 1. ; /* even without an FMA, all exact */
322
323 if(E==0)
324 roundtestmask=ACCURATE_TO_61_BITS;
325 else
326 roundtestmask=ACCURATE_TO_62_BITS;
327
328 #ifdef ESTRIN
329 /* Estrin polynomial evaluation */
330 double_ext z2,z4, p01, p23, p45, p67, p03, p47,p07;
331
332 z2 = z*z; p67 = c6 + z*c7; p45 = c4 + z*c5; p23 = c2 + z*c3; p01 = logirh + z;
333 z4 = z2*z2; p47 = p45 + z2*p67; p03 = p01 + z2*p23;
334 p07 = p03 + z4*p47;
335 logde = p07 + E*log2H;
336 #endif
337
338 #ifdef PATERSON
339 double_ext z4,z2,t0,t1,t2,t3,t4,t5,t6,t7,t8;
340
341 z2 = z * z; t1 = z + ps_alpha; t2 = z + ps_beta; t3 = c3 * z + c2; t4 = z + logirh;
342 z4 = z2 * z2; t5 = z2 + ps_c; t6 = t3 * z2 + t4;
343
344 t7 = t5 * t1 + t2; t0 = z4 * c7; t8 = t7 * t0 + t6;
345
346 logde = t8 + E*log2H;
347 #endif
348
349 #if 0 /* to time the first step only */
350 BACK_TO_DOUBLE_MODE; return (double)t;
351 #endif
352
353
354 /* To test the second step only, comment out the following line */
355 DE_TEST_AND_RETURN_RD(logde, roundtestmask);
356
357 log_accurate(&th, &tl, z, E, index);
358
359 RETURN_SUM_ROUNDED_DOWN(th, tl);
360
361 }
362
363
364
365
366
367
368
369
370
371
log_ru(double x)372 double log_ru(double x) {
373 double_ext logirh, r, y, z, th, tl, logde;
374 #if defined(CRLIBM_TYPECPU_X86) || defined(CRLIBM_TYPECPU_AMD64)
375 db_number xdb;
376 int E, index, roundtestmask;
377 #else
378 int64_t E, i;
379 uint64_t index, roundtestmask;
380 double_ext c1,c2,c3,c4,c5,c6,c7;
381 #endif
382
383 E=0;
384
385 #if defined(CRLIBM_TYPECPU_X86) || defined(CRLIBM_TYPECPU_AMD64)
386 xdb.d=x;
387
388 /* Filter cases */
389 if (xdb.i[HI] < 0x00100000){ /* x < 2^(-1022) */
390 if (((xdb.i[HI] & 0x7fffffff)|xdb.i[LO])==0) return -1.0/0.0; /* log(+/-0) = -Inf */
391 if (xdb.i[HI] < 0) return (x-x)/0; /* log(-x) = Nan */
392 /* Else subnormal number */
393 E = -64;
394 xdb.d *= two64; /* make x a normal number */
395 }
396 if (xdb.i[HI] >= 0x7ff00000) return x+x; /* Inf or Nan */
397
398 DOUBLE_EXTENDED_MODE; /* This one should be overlapped with following integer computation */
399
400 /* Extract exponent and mantissa */
401 E += (xdb.i[HI]>>20)-1023; /* extract the exponent */
402 index = (xdb.i[HI] & 0x000fffff);
403 xdb.i[HI] = index | 0x3ff00000; /* do exponent = 0 */
404 index = (index + (1<<(20-L-1))) >> (20-L);
405
406 /* reduce such that sqrt(2)/2 < xdb.d < sqrt(2) */
407 if (index >= MAXINDEX){ /* corresponds to y>sqrt(2)*/
408 index = index & INDEXMASK;
409 xdb.i[HI] -= 0x00100000;
410 E++;
411 }
412 y = xdb.d;
413
414 #else /* defined(CRLIBM_TYPECPU_X86) || defined(CRLIBM_TYPECPU_AMD64) */
415 /* Here come the code specific to Itanium processor */
416 PREFETCH_POLY_QUICK; /* defined in log-de.h */
417 y=x;
418 i = _Asm_getf(2/*_FR_D*/, y); /* Cast y to a 64-bit integer */
419
420 /* Filter special cases */
421 if (i<(int64_t)ULL(0010000000000000)){ /* equivalent to : x < 2^(-1022) */
422 if ((i & ULL(7fffffffffffffff))==0) return -1.0/0.0; /* log(+/-0) = -Inf */
423 if (i<0) return (x-x)/0; /* log(-x) = Nan */
424 /* Else subnormal number */
425 y *= two64; /* make x a normal number */
426 E = -64;
427 i = _Asm_getf(2/*_FR_D*/, y); /* and update i */
428 }
429 if (i >= ULL(7ff0000000000000)) return x+x; /* Inf or Nan */
430
431 /* Extract exponent and mantissa */
432 E += (i>>52)-1023;
433 i = i & ULL(000fffffffffffff); /* keep only mantissa */
434 index = (i + (ULL(1)<<(52-L-1))) >> (52-L);
435 /* reduce such that sqrt(2)/2 < xdb.d < sqrt(2) */
436 if (index >= MAXINDEX){ /* corresponds to y>sqrt(2)*/
437 y = _Asm_setf(2/*_FR_D*/, (i | ULL(3ff0000000000000)) - ULL(0010000000000000) ); /* exponent = -1 */
438 index = index & INDEXMASK;
439 E++;
440 }
441 else
442 y = _Asm_setf(2/*_FR_D*/, i | ULL(3ff0000000000000) ); /* exponent = 0*/
443 #endif /* defined(CRLIBM_TYPECPU_X86) || defined(CRLIBM_TYPECPU_AMD64) */
444
445 /* All the previous argument reduction was exact */
446 /* now y holds 1+f, and E is the exponent */
447
448 logirh = argredtable[index].logirh;
449 r = (double_ext) (argredtable[index].r); /* approx to 1/y.d */
450 z = y*r - 1. ; /* even without an FMA, all exact */
451
452 if(E==0)
453 roundtestmask=ACCURATE_TO_61_BITS;
454 else
455 roundtestmask=ACCURATE_TO_62_BITS;
456
457 #ifdef ESTRIN
458 /* Estrin polynomial evaluation */
459 double_ext z2,z4, p01, p23, p45, p67, p03, p47,p07;
460
461 z2 = z*z; p67 = c6 + z*c7; p45 = c4 + z*c5; p23 = c2 + z*c3; p01 = logirh + z;
462 z4 = z2*z2; p47 = p45 + z2*p67; p03 = p01 + z2*p23;
463 p07 = p03 + z4*p47;
464 logde = p07 + E*log2H;
465 #endif
466
467 #ifdef PATERSON
468 double_ext z4,z2,t0,t1,t2,t3,t4,t5,t6,t7,t8;
469
470 z2 = z * z; t1 = z + ps_alpha; t2 = z + ps_beta; t3 = c3 * z + c2; t4 = z + logirh;
471 z4 = z2 * z2; t5 = z2 + ps_c; t6 = t3 * z2 + t4;
472
473 t7 = t5 * t1 + t2; t0 = z4 * c7; t8 = t7 * t0 + t6;
474
475 logde = t8 + E*log2H;
476 #endif
477
478
479 #if 0 /* to time the first step only */
480 BACK_TO_DOUBLE_MODE; return (double)t;
481 #endif
482
483
484 /* To test the second step only, comment out the following line */
485 DE_TEST_AND_RETURN_RU(logde, roundtestmask);
486
487 log_accurate(&th, &tl, z, E, index);
488
489 RETURN_SUM_ROUNDED_UP(th, tl);
490
491 }
492
493
log_rz(double x)494 double log_rz(double x) {
495 if (x>1.0)
496 return log_rd(x);
497 else
498 return log_ru(x);
499 }
500
501
502