1 /*
2 * This function computes log10, correctly rounded,
3 * using experimental techniques based on triple double arithmetics
4
5 THIS IS EXPERIMENTAL SOFTWARE
6
7 *
8 * Author : Christoph Lauter
9 * christoph.lauter at ens-lyon.fr
10 *
11
12 To have it replace the crlibm log10, do:
13
14 gcc -DHAVE_CONFIG_H -I. -fPIC -O2 -c log10-td.c; mv log10-td.o log10_accurate.o; make
15
16
17 **********************************************************
18 *
19 * NOTE: THIS FUNCTION USES SOME BASIC SEQUENCES AND
20 * FURTHER FINAL ROUNDING SEQUENCES WITH SPECIAL CASE
21 * TESTS NO FORMAL PROOF IS AVAILABLE FOR IN THE MOMENT
22 *
23 **********************************************************
24
25 */
26
27
28 #include <stdio.h>
29 #include <stdlib.h>
30 #include "crlibm.h"
31 #include "crlibm_private.h"
32 #include "triple-double.h"
33 #include "log10-td.h"
34
35 #define AVOID_FMA 0
36
37
log10_td_accurate(double * logb10h,double * logb10m,double * logb10l,int E,double ed,int index,double zh,double zl,double logih,double logim)38 void log10_td_accurate(double *logb10h, double *logb10m, double *logb10l, int E, double ed, int index, double zh, double zl, double logih, double logim) {
39 double highPoly, t1h, t1l, t2h, t2l, t3h, t3l, t4h, t4l, t5h, t5l, t6h, t6l, t7h, t7l, t8h, t8l, t9h, t9l, t10h, t10l, t11h, t11l;
40 double t12h, t12l, t13h, t13l, t14h, t14l, zSquareh, zSquarem, zSquarel, zCubeh, zCubem, zCubel, higherPolyMultZh, higherPolyMultZm;
41 double higherPolyMultZl, zSquareHalfh, zSquareHalfm, zSquareHalfl, polyWithSquareh, polyWithSquarem, polyWithSquarel;
42 double polyh, polym, polyl, logil, logyh, logym, logyl, loghover, logmover, loglover, log2edhover, log2edmover, log2edlover;
43 double log2edh, log2edm, log2edl, logb10hover, logb10mover, logb10lover;
44 double logyhnorm, logymnorm, logylnorm;
45
46
47
48
49
50 #if EVAL_PERF
51 crlibm_second_step_taken++;
52 #endif
53
54
55 /* Accurate phase:
56
57 Argument reduction is already done.
58 We must return logh, logm and logl representing the intermediate result in 118 bits precision.
59
60 We use a 14 degree polynomial, computing the first 3 (the first is 0) coefficients in triple double,
61 calculating the next 7 coefficients in double double arithmetics and the last in double.
62
63 We must account for zl starting with the monome of degree 4 (7^3 + 53 - 7 >> 118); so
64 double double calculations won't account for it.
65
66 */
67
68 /* Start of the horner scheme */
69
70 #if defined(PROCESSOR_HAS_FMA) && !defined(AVOID_FMA)
71 highPoly = FMA(FMA(FMA(FMA(accPolyC14,zh,accPolyC13),zh,accPolyC12),zh,accPolyC11),zh,accPolyC10);
72 #else
73 highPoly = accPolyC10 + zh * (accPolyC11 + zh * (accPolyC12 + zh * (accPolyC13 + zh * accPolyC14)));
74 #endif
75
76 /* We want to write
77
78 accPolyC3 + zh * (accPoly4 + zh * (accPoly5 + zh * (accPoly6 + zh * (accPoly7 + zh * (accPoly8 + zh * (accPoly9 + zh * highPoly))))));
79 ( t14 t13 t12 t11 t10 t9 t8 t7 t6 t5 t4 t3 t2 t1 )
80
81 with all additions and multiplications in double double arithmetics
82 but we will produce intermediate results labelled t1h/t1l thru t14h/t14l
83 */
84
85 Mul12(&t1h, &t1l, zh, highPoly);
86 Add22(&t2h, &t2l, accPolyC9h, accPolyC9l, t1h, t1l);
87 Mul22(&t3h, &t3l, zh, zl, t2h, t2l);
88 Add22(&t4h, &t4l, accPolyC8h, accPolyC8l, t3h, t3l);
89 Mul22(&t5h, &t5l, zh, zl, t4h, t4l);
90 Add22(&t6h, &t6l, accPolyC7h, accPolyC7l, t5h, t5l);
91 Mul22(&t7h, &t7l, zh, zl, t6h, t6l);
92 Add22(&t8h, &t8l, accPolyC6h, accPolyC6l, t7h, t7l);
93 Mul22(&t9h, &t9l, zh, zl, t8h, t8l);
94 Add22(&t10h, &t10l, accPolyC5h, accPolyC5l, t9h, t9l);
95 Mul22(&t11h, &t11l, zh, zl, t10h, t10l);
96 Add22(&t12h, &t12l, accPolyC4h, accPolyC4l, t11h, t11l);
97 Mul22(&t13h, &t13l, zh, zl, t12h, t12l);
98 Add22(&t14h, &t14l, accPolyC3h, accPolyC3l, t13h, t13l);
99
100 /* We must now prepare (zh + zl)^2 and (zh + zl)^3 as triple doubles */
101
102 Mul23(&zSquareh, &zSquarem, &zSquarel, zh, zl, zh, zl);
103 Mul233(&zCubeh, &zCubem, &zCubel, zh, zl, zSquareh, zSquarem, zSquarel);
104
105 /* We can now multiplicate the middle and higher polynomial by z^3 */
106
107 Mul233(&higherPolyMultZh, &higherPolyMultZm, &higherPolyMultZl, t14h, t14l, zCubeh, zCubem, zCubel);
108
109 /* Multiply now z^2 by -1/2 (exact op) and add to middle and higher polynomial */
110
111 zSquareHalfh = zSquareh * -0.5;
112 zSquareHalfm = zSquarem * -0.5;
113 zSquareHalfl = zSquarel * -0.5;
114
115 Add33(&polyWithSquareh, &polyWithSquarem, &polyWithSquarel,
116 zSquareHalfh, zSquareHalfm, zSquareHalfl,
117 higherPolyMultZh, higherPolyMultZm, higherPolyMultZl);
118
119 /* Add now zh and zl to obtain the polynomial evaluation result */
120
121 Add233(&polyh, &polym, &polyl, zh, zl, polyWithSquareh, polyWithSquarem, polyWithSquarel);
122
123 /* Reconstruct now log(y) = log(1 + z) - log(ri) by adding logih, logim, logil
124 logil has not been read to the time, do this first
125 */
126
127 logil = argredtable[index].logil;
128
129 Add33(&logyh, &logym, &logyl, logih, logim, logil, polyh, polym, polyl);
130
131
132 /* Renormalize logyh, logym and logyl to a non-overlapping triple-double for winning some
133 accuracy in the final ln(x) result before multiplying with log10inv
134
135 THIS MAY NOT BE NECESSARY NOR SUFFICIENT
136
137 */
138
139 Renormalize3(&logyhnorm, &logymnorm, &logylnorm, logyh, logym, logyl);
140
141
142 /* Multiply log2 with E, i.e. log2h, log2m, log2l by ed
143 ed is always less than 2^(12) and log2h and log2m are stored with at least 12 trailing zeros
144 So multiplying naively is correct (up to 134 bits at least)
145
146 The final result is thus obtained by adding log2 * E to log(y)
147 */
148
149 log2edhover = log2h * ed;
150 log2edmover = log2m * ed;
151 log2edlover = log2l * ed;
152
153 /* It may be necessary to renormalize the tabulated value (multiplied by ed) before adding
154 the to the log(y)-result
155
156 If needed, uncomment the following Renormalize3-Statement and comment out the copies
157 following it.
158 */
159
160 /* Renormalize3(&log2edh, &log2edm, &log2edl, log2edhover, log2edmover, log2edlover); */
161
162 log2edh = log2edhover;
163 log2edm = log2edmover;
164 log2edl = log2edlover;
165
166 Add33(&loghover, &logmover, &loglover, log2edh, log2edm, log2edl, logyhnorm, logymnorm, logylnorm);
167
168
169 /* Change logarithm base from natural base to base 10 by multiplying */
170
171 Mul33(&logb10hover, &logb10mover, &logb10lover, log10invh, log10invm, log10invl, loghover, logmover, loglover);
172
173
174 /* Since we can not guarantee in each addition and multiplication procedure that
175 the results are not overlapping, we must renormalize the result before handing
176 it over to the final rounding
177 */
178
179 Renormalize3(logb10h,logb10m,logb10l,logb10hover,logb10mover,logb10lover);
180 }
181
182
183
184 /*************************************************************
185 *************************************************************
186 * ROUNDED TO NEAREST *
187 *************************************************************
188 *************************************************************/
log10_rn(double x)189 double log10_rn(double x){
190 db_number xdb;
191 double y, ed, ri, logih, logim, yrih, yril, th, zh, zl;
192 double polyHorner, zhSquareh, zhSquarel, polyUpper, zhSquareHalfh, zhSquareHalfl;
193 double t1h, t1l, t2h, t2l, ph, pl, log2edh, log2edl, logTabPolyh, logTabPolyl, logh, logm, roundcst;
194 double logb10h, logb10m, logb10l;
195 int E, index;
196
197 E=0;
198 xdb.d=x;
199
200 /* Filter cases */
201 if (xdb.i[HI] < 0x00100000){ /* x < 2^(-1022) */
202 if (((xdb.i[HI] & 0x7fffffff)|xdb.i[LO])==0){
203 return -1.0/0.0;
204 } /* log(+/-0) = -Inf */
205 if (xdb.i[HI] < 0){
206 return (x-x)/0; /* log(-x) = Nan */
207 }
208 /* Subnormal number */
209 E = -52;
210 xdb.d *= two52; /* make x a normal number */
211 }
212
213 if (xdb.i[HI] >= 0x7ff00000){
214 return x+x; /* Inf or Nan */
215 }
216
217
218 /* Extract exponent and mantissa
219 Do range reduction,
220 yielding to E holding the exponent and
221 y the mantissa between sqrt(2)/2 and sqrt(2)
222 */
223 E += (xdb.i[HI]>>20)-1023; /* extract the exponent */
224 index = (xdb.i[HI] & 0x000fffff);
225 xdb.i[HI] = index | 0x3ff00000; /* do exponent = 0 */
226 index = (index + (1<<(20-L-1))) >> (20-L);
227
228 /* reduce such that sqrt(2)/2 < xdb.d < sqrt(2) */
229 if (index >= MAXINDEX){ /* corresponds to xdb>sqrt(2)*/
230 xdb.i[HI] -= 0x00100000;
231 E++;
232 }
233 y = xdb.d;
234 index = index & INDEXMASK;
235 /* Cast integer E into double ed for multiplication later */
236 ed = (double) E;
237
238 /*
239 Read tables:
240 Read one float for ri
241 Read the first two doubles for -log(r_i) (out of three)
242
243 Organization of the table:
244
245 one struct entry per index, the struct entry containing
246 r, logih, logim and logil in this order
247 */
248
249
250 ri = argredtable[index].ri;
251 /*
252 Actually we don't need the logarithm entries now
253 Move the following two lines to the eventual reconstruction
254 As long as we don't have any if in the following code, we can overlap
255 memory access with calculations
256 */
257 logih = argredtable[index].logih;
258 logim = argredtable[index].logim;
259
260 /* Do range reduction:
261
262 zh + zl = y * ri - 1.0 correctly
263
264 Correctness is assured by use of Mul12 and Add12
265 even if we don't force ri to have its' LSBs set to zero
266
267 Discard zl for higher monome degrees
268 */
269
270 Mul12(&yrih, &yril, y, ri);
271 th = yrih - 1.0;
272 Add12Cond(zh, zl, th, yril);
273
274 /*
275 Polynomial evaluation
276
277 Use a 7 degree polynomial
278 Evaluate the higher 5 terms in double precision (-7 * 3 = -21) using Horner's scheme
279 Evaluate the lower 3 terms (the last is 0) in double double precision accounting also for zl
280 using an ad hoc method
281
282 */
283
284
285
286 #if defined(PROCESSOR_HAS_FMA) && !defined(AVOID_FMA)
287 polyHorner = FMA(FMA(FMA(FMA(c7,zh,c6),zh,c5),zh,c4),zh,c3);
288 #else
289 polyHorner = c3 + zh * (c4 + zh * (c5 + zh * (c6 + zh * c7)));
290 #endif
291
292 Mul12(&zhSquareh, &zhSquarel, zh, zh);
293 polyUpper = polyHorner * (zh * zhSquareh);
294 zhSquareHalfh = zhSquareh * -0.5;
295 zhSquareHalfl = zhSquarel * -0.5;
296 Add12(t1h, t1l, polyUpper, -1 * (zh * zl));
297 Add22(&t2h, &t2l, zh, zl, zhSquareHalfh, zhSquareHalfl);
298 Add22(&ph, &pl, t2h, t2l, t1h, t1l);
299
300 /* Reconstruction
301
302 Read logih and logim in the tables (already done)
303
304 Compute log(x) = E * log(2) + log(1+z) - log(ri)
305 i.e. log(x) = ed * (log2h + log2m) + (ph + pl) + (logih + logim) + delta
306
307 Carry out everything in double double precision
308
309 */
310
311 /*
312 We store log2 as log2h + log2m + log2l where log2h and log2m have 12 trailing zeros
313 Multiplication of ed (double E) and log2h is thus correct
314 The overall accuracy of log2h + log2m + log2l is 53 * 3 - 24 = 135 which
315 is enough for the accurate phase
316 The accuracy suffices also for the quick phase: 53 * 2 - 24 = 82
317 Nevertheless the storage with trailing zeros implies an overlap of the tabulated
318 triple double values. We have to take it into account for the accurate phase
319 basic procedures for addition and multiplication
320 The condition on the next Add12 is verified as log2m is smaller than log2h
321 and both are scaled by ed
322 */
323
324 Add12(log2edh, log2edl, log2h * ed, log2m * ed);
325
326 /* Add logih and logim to ph and pl
327
328 We must use conditioned Add22 as logih can move over ph
329 */
330
331 Add22Cond(&logTabPolyh, &logTabPolyl, logih, logim, ph, pl);
332
333 /* Add log2edh + log2edl to logTabPolyh + logTabPolyl */
334
335 Add22Cond(&logh, &logm, log2edh, log2edl, logTabPolyh, logTabPolyl);
336
337
338
339 /* Change logarithm base from natural base to base 10 by multiplying */
340
341 Mul22(&logb10h, &logb10m, log10invh, log10invm, logh, logm);
342
343
344 /* Rounding test and eventual return or call to the accurate function */
345
346 if(E==0)
347 roundcst = ROUNDCST1;
348 else
349 roundcst = ROUNDCST2;
350
351
352 if(logb10h == (logb10h + (logb10m * roundcst)))
353 return logb10h;
354 else
355 {
356
357 #if DEBUG
358 printf("Going for Accurate Phase for x=%1.50e\n",x);
359 #endif
360
361 log10_td_accurate(&logb10h, &logb10m, &logb10l, E, ed, index, zh, zl, logih, logim);
362
363 ReturnRoundToNearest3(logb10h, logb10m, logb10l);
364
365 } /* Accurate phase launched */
366 }
367
368
369 /*************************************************************
370 *************************************************************
371 * ROUNDED UPWARDS *
372 *************************************************************
373 *************************************************************/
log10_ru(double x)374 double log10_ru(double x) {
375 db_number xdb;
376 double y, ed, ri, logih, logim, yrih, yril, th, zh, zl;
377 double polyHorner, zhSquareh, zhSquarel, polyUpper, zhSquareHalfh, zhSquareHalfl;
378 double t1h, t1l, t2h, t2l, ph, pl, log2edh, log2edl, logTabPolyh, logTabPolyl, logh, logm, roundcst;
379 double logb10h, logb10m, logb10l;
380 int E, index;
381
382 E=0;
383 xdb.d=x;
384
385 /* Filter cases */
386 if (xdb.i[HI] < 0x00100000){ /* x < 2^(-1022) */
387 if (((xdb.i[HI] & 0x7fffffff)|xdb.i[LO])==0){
388 return -1.0/0.0;
389 } /* log(+/-0) = -Inf */
390 if (xdb.i[HI] < 0){
391 return (x-x)/0; /* log(-x) = Nan */
392 }
393 /* Subnormal number */
394 E = -52;
395 xdb.d *= two52; /* make x a normal number */
396 }
397
398 if (xdb.i[HI] >= 0x7ff00000){
399 return x+x; /* Inf or Nan */
400 }
401
402
403 /* Extract exponent and mantissa
404 Do range reduction,
405 yielding to E holding the exponent and
406 y the mantissa between sqrt(2)/2 and sqrt(2)
407 */
408 E += (xdb.i[HI]>>20)-1023; /* extract the exponent */
409 index = (xdb.i[HI] & 0x000fffff);
410 xdb.i[HI] = index | 0x3ff00000; /* do exponent = 0 */
411 index = (index + (1<<(20-L-1))) >> (20-L);
412
413 /* reduce such that sqrt(2)/2 < xdb.d < sqrt(2) */
414 if (index >= MAXINDEX){ /* corresponds to xdb>sqrt(2)*/
415 xdb.i[HI] -= 0x00100000;
416 E++;
417 }
418 y = xdb.d;
419 index = index & INDEXMASK;
420 /* Cast integer E into double ed for multiplication later */
421 ed = (double) E;
422
423 /*
424 Read tables:
425 Read one float for ri
426 Read the first two doubles for -log(r_i) (out of three)
427
428 Organization of the table:
429
430 one struct entry per index, the struct entry containing
431 r, logih, logim and logil in this order
432 */
433
434
435 ri = argredtable[index].ri;
436 /*
437 Actually we don't need the logarithm entries now
438 Move the following two lines to the eventual reconstruction
439 As long as we don't have any if in the following code, we can overlap
440 memory access with calculations
441 */
442 logih = argredtable[index].logih;
443 logim = argredtable[index].logim;
444
445 /* Do range reduction:
446
447 zh + zl = y * ri - 1.0 correctly
448
449 Correctness is assured by use of Mul12 and Add12
450 even if we don't force ri to have its' LSBs set to zero
451
452 Discard zl for higher monome degrees
453 */
454
455 Mul12(&yrih, &yril, y, ri);
456 th = yrih - 1.0;
457 Add12Cond(zh, zl, th, yril);
458
459 /*
460 Polynomial evaluation
461
462 Use a 7 degree polynomial
463 Evaluate the higher 5 terms in double precision (-7 * 3 = -21) using Horner's scheme
464 Evaluate the lower 3 terms (the last is 0) in double double precision accounting also for zl
465 using an ad hoc method
466
467 */
468
469
470
471 #if defined(PROCESSOR_HAS_FMA) && !defined(AVOID_FMA)
472 polyHorner = FMA(FMA(FMA(FMA(c7,zh,c6),zh,c5),zh,c4),zh,c3);
473 #else
474 polyHorner = c3 + zh * (c4 + zh * (c5 + zh * (c6 + zh * c7)));
475 #endif
476
477 Mul12(&zhSquareh, &zhSquarel, zh, zh);
478 polyUpper = polyHorner * (zh * zhSquareh);
479 zhSquareHalfh = zhSquareh * -0.5;
480 zhSquareHalfl = zhSquarel * -0.5;
481 Add12(t1h, t1l, polyUpper, -1 * (zh * zl));
482 Add22(&t2h, &t2l, zh, zl, zhSquareHalfh, zhSquareHalfl);
483 Add22(&ph, &pl, t2h, t2l, t1h, t1l);
484
485 /* Reconstruction
486
487 Read logih and logim in the tables (already done)
488
489 Compute log(x) = E * log(2) + log(1+z) - log(ri)
490 i.e. log(x) = ed * (log2h + log2m) + (ph + pl) + (logih + logim) + delta
491
492 Carry out everything in double double precision
493
494 */
495
496 /*
497 We store log2 as log2h + log2m + log2l where log2h and log2m have 12 trailing zeros
498 Multiplication of ed (double E) and log2h is thus correct
499 The overall accuracy of log2h + log2m + log2l is 53 * 3 - 24 = 135 which
500 is enough for the accurate phase
501 The accuracy suffices also for the quick phase: 53 * 2 - 24 = 82
502 Nevertheless the storage with trailing zeros implies an overlap of the tabulated
503 triple double values. We have to take it into account for the accurate phase
504 basic procedures for addition and multiplication
505 The condition on the next Add12 is verified as log2m is smaller than log2h
506 and both are scaled by ed
507 */
508
509 Add12(log2edh, log2edl, log2h * ed, log2m * ed);
510
511 /* Add logih and logim to ph and pl
512
513 We must use conditioned Add22 as logih can move over ph
514 */
515
516 Add22Cond(&logTabPolyh, &logTabPolyl, logih, logim, ph, pl);
517
518 /* Add log2edh + log2edl to logTabPolyh + logTabPolyl */
519
520 Add22Cond(&logh, &logm, log2edh, log2edl, logTabPolyh, logTabPolyl);
521
522
523
524 /* Change logarithm base from natural base to base 10 by multiplying */
525
526 Mul22(&logb10h, &logb10m, log10invh, log10invm, logh, logm);
527
528
529 /* Rounding test and eventual return or call to the accurate function */
530
531 if(E==0)
532 roundcst = RDROUNDCST1;
533 else
534 roundcst = RDROUNDCST2;
535
536 TEST_AND_RETURN_RU(logb10h, logb10m, roundcst);
537
538 #if DEBUG
539 printf("Going for Accurate Phase for x=%1.50e\n",x);
540 #endif
541
542 log10_td_accurate(&logb10h, &logb10m, &logb10l, E, ed, index, zh, zl, logih, logim);
543
544 ReturnRoundUpwards3Unfiltered(logb10h, logb10m, logb10l, WORSTCASEACCURACY);
545
546 }
547
548
549 /*************************************************************
550 *************************************************************
551 * ROUNDED DOWNWARDS *
552 *************************************************************
553 *************************************************************/
log10_rd(double x)554 double log10_rd(double x) {
555 db_number xdb;
556 double y, ed, ri, logih, logim, yrih, yril, th, zh, zl;
557 double polyHorner, zhSquareh, zhSquarel, polyUpper, zhSquareHalfh, zhSquareHalfl;
558 double t1h, t1l, t2h, t2l, ph, pl, log2edh, log2edl, logTabPolyh, logTabPolyl, logh, logm, roundcst;
559 double logb10h, logb10m, logb10l;
560 int E, index;
561
562 E=0;
563 xdb.d=x;
564
565 /* Filter cases */
566 if (xdb.i[HI] < 0x00100000){ /* x < 2^(-1022) */
567 if (((xdb.i[HI] & 0x7fffffff)|xdb.i[LO])==0){
568 return -1.0/0.0;
569 } /* log(+/-0) = -Inf */
570 if (xdb.i[HI] < 0){
571 return (x-x)/0; /* log(-x) = Nan */
572 }
573 /* Subnormal number */
574 E = -52;
575 xdb.d *= two52; /* make x a normal number */
576 }
577
578 if (xdb.i[HI] >= 0x7ff00000){
579 return x+x; /* Inf or Nan */
580 }
581
582
583 /* Extract exponent and mantissa
584 Do range reduction,
585 yielding to E holding the exponent and
586 y the mantissa between sqrt(2)/2 and sqrt(2)
587 */
588 E += (xdb.i[HI]>>20)-1023; /* extract the exponent */
589 index = (xdb.i[HI] & 0x000fffff);
590 xdb.i[HI] = index | 0x3ff00000; /* do exponent = 0 */
591 index = (index + (1<<(20-L-1))) >> (20-L);
592
593 /* reduce such that sqrt(2)/2 < xdb.d < sqrt(2) */
594 if (index >= MAXINDEX){ /* corresponds to xdb>sqrt(2)*/
595 xdb.i[HI] -= 0x00100000;
596 E++;
597 }
598 y = xdb.d;
599 index = index & INDEXMASK;
600 /* Cast integer E into double ed for multiplication later */
601 ed = (double) E;
602
603 /*
604 Read tables:
605 Read one float for ri
606 Read the first two doubles for -log(r_i) (out of three)
607
608 Organization of the table:
609
610 one struct entry per index, the struct entry containing
611 r, logih, logim and logil in this order
612 */
613
614
615 ri = argredtable[index].ri;
616 /*
617 Actually we don't need the logarithm entries now
618 Move the following two lines to the eventual reconstruction
619 As long as we don't have any if in the following code, we can overlap
620 memory access with calculations
621 */
622 logih = argredtable[index].logih;
623 logim = argredtable[index].logim;
624
625 /* Do range reduction:
626
627 zh + zl = y * ri - 1.0 correctly
628
629 Correctness is assured by use of Mul12 and Add12
630 even if we don't force ri to have its' LSBs set to zero
631
632 Discard zl for higher monome degrees
633 */
634
635 Mul12(&yrih, &yril, y, ri);
636 th = yrih - 1.0;
637 Add12Cond(zh, zl, th, yril);
638
639 /*
640 Polynomial evaluation
641
642 Use a 7 degree polynomial
643 Evaluate the higher 5 terms in double precision (-7 * 3 = -21) using Horner's scheme
644 Evaluate the lower 3 terms (the last is 0) in double double precision accounting also for zl
645 using an ad hoc method
646
647 */
648
649
650
651 #if defined(PROCESSOR_HAS_FMA) && !defined(AVOID_FMA)
652 polyHorner = FMA(FMA(FMA(FMA(c7,zh,c6),zh,c5),zh,c4),zh,c3);
653 #else
654 polyHorner = c3 + zh * (c4 + zh * (c5 + zh * (c6 + zh * c7)));
655 #endif
656
657 Mul12(&zhSquareh, &zhSquarel, zh, zh);
658 polyUpper = polyHorner * (zh * zhSquareh);
659 zhSquareHalfh = zhSquareh * -0.5;
660 zhSquareHalfl = zhSquarel * -0.5;
661 Add12(t1h, t1l, polyUpper, -1 * (zh * zl));
662 Add22(&t2h, &t2l, zh, zl, zhSquareHalfh, zhSquareHalfl);
663 Add22(&ph, &pl, t2h, t2l, t1h, t1l);
664
665 /* Reconstruction
666
667 Read logih and logim in the tables (already done)
668
669 Compute log(x) = E * log(2) + log(1+z) - log(ri)
670 i.e. log(x) = ed * (log2h + log2m) + (ph + pl) + (logih + logim) + delta
671
672 Carry out everything in double double precision
673
674 */
675
676 /*
677 We store log2 as log2h + log2m + log2l where log2h and log2m have 12 trailing zeros
678 Multiplication of ed (double E) and log2h is thus correct
679 The overall accuracy of log2h + log2m + log2l is 53 * 3 - 24 = 135 which
680 is enough for the accurate phase
681 The accuracy suffices also for the quick phase: 53 * 2 - 24 = 82
682 Nevertheless the storage with trailing zeros implies an overlap of the tabulated
683 triple double values. We have to take it into account for the accurate phase
684 basic procedures for addition and multiplication
685 The condition on the next Add12 is verified as log2m is smaller than log2h
686 and both are scaled by ed
687 */
688
689 Add12(log2edh, log2edl, log2h * ed, log2m * ed);
690
691 /* Add logih and logim to ph and pl
692
693 We must use conditioned Add22 as logih can move over ph
694 */
695
696 Add22Cond(&logTabPolyh, &logTabPolyl, logih, logim, ph, pl);
697
698 /* Add log2edh + log2edl to logTabPolyh + logTabPolyl */
699
700 Add22Cond(&logh, &logm, log2edh, log2edl, logTabPolyh, logTabPolyl);
701
702
703
704 /* Change logarithm base from natural base to base 10 by multiplying */
705
706 Mul22(&logb10h, &logb10m, log10invh, log10invm, logh, logm);
707
708
709 /* Rounding test and eventual return or call to the accurate function */
710
711 if(E==0)
712 roundcst = RDROUNDCST1;
713 else
714 roundcst = RDROUNDCST2;
715
716 TEST_AND_RETURN_RD(logb10h, logb10m, roundcst);
717
718 #if DEBUG
719 printf("Going for Accurate Phase for x=%1.50e\n",x);
720 #endif
721
722 log10_td_accurate(&logb10h, &logb10m, &logb10l, E, ed, index, zh, zl, logih, logim);
723
724 ReturnRoundDownwards3Unfiltered(logb10h, logb10m, logb10l, WORSTCASEACCURACY);
725 }
726
727 /*************************************************************
728 *************************************************************
729 * ROUNDED TOWARDS ZERO *
730 *************************************************************
731 *************************************************************/
log10_rz(double x)732 double log10_rz(double x) {
733 db_number xdb;
734 double y, ed, ri, logih, logim, yrih, yril, th, zh, zl;
735 double polyHorner, zhSquareh, zhSquarel, polyUpper, zhSquareHalfh, zhSquareHalfl;
736 double t1h, t1l, t2h, t2l, ph, pl, log2edh, log2edl, logTabPolyh, logTabPolyl, logh, logm, roundcst;
737 double logb10h, logb10m, logb10l;
738 int E, index;
739
740 E=0;
741 xdb.d=x;
742
743 /* Filter cases */
744 if (xdb.i[HI] < 0x00100000){ /* x < 2^(-1022) */
745 if (((xdb.i[HI] & 0x7fffffff)|xdb.i[LO])==0){
746 return -1.0/0.0;
747 } /* log(+/-0) = -Inf */
748 if (xdb.i[HI] < 0){
749 return (x-x)/0; /* log(-x) = Nan */
750 }
751 /* Subnormal number */
752 E = -52;
753 xdb.d *= two52; /* make x a normal number */
754 }
755
756 if (xdb.i[HI] >= 0x7ff00000){
757 return x+x; /* Inf or Nan */
758 }
759
760
761 /* Extract exponent and mantissa
762 Do range reduction,
763 yielding to E holding the exponent and
764 y the mantissa between sqrt(2)/2 and sqrt(2)
765 */
766 E += (xdb.i[HI]>>20)-1023; /* extract the exponent */
767 index = (xdb.i[HI] & 0x000fffff);
768 xdb.i[HI] = index | 0x3ff00000; /* do exponent = 0 */
769 index = (index + (1<<(20-L-1))) >> (20-L);
770
771 /* reduce such that sqrt(2)/2 < xdb.d < sqrt(2) */
772 if (index >= MAXINDEX){ /* corresponds to xdb>sqrt(2)*/
773 xdb.i[HI] -= 0x00100000;
774 E++;
775 }
776 y = xdb.d;
777 index = index & INDEXMASK;
778 /* Cast integer E into double ed for multiplication later */
779 ed = (double) E;
780
781 /*
782 Read tables:
783 Read one float for ri
784 Read the first two doubles for -log(r_i) (out of three)
785
786 Organization of the table:
787
788 one struct entry per index, the struct entry containing
789 r, logih, logim and logil in this order
790 */
791
792
793 ri = argredtable[index].ri;
794 /*
795 Actually we don't need the logarithm entries now
796 Move the following two lines to the eventual reconstruction
797 As long as we don't have any if in the following code, we can overlap
798 memory access with calculations
799 */
800 logih = argredtable[index].logih;
801 logim = argredtable[index].logim;
802
803 /* Do range reduction:
804
805 zh + zl = y * ri - 1.0 correctly
806
807 Correctness is assured by use of Mul12 and Add12
808 even if we don't force ri to have its' LSBs set to zero
809
810 Discard zl for higher monome degrees
811 */
812
813 Mul12(&yrih, &yril, y, ri);
814 th = yrih - 1.0;
815 Add12Cond(zh, zl, th, yril);
816
817 /*
818 Polynomial evaluation
819
820 Use a 7 degree polynomial
821 Evaluate the higher 5 terms in double precision (-7 * 3 = -21) using Horner's scheme
822 Evaluate the lower 3 terms (the last is 0) in double double precision accounting also for zl
823 using an ad hoc method
824
825 */
826
827
828
829 #if defined(PROCESSOR_HAS_FMA) && !defined(AVOID_FMA)
830 polyHorner = FMA(FMA(FMA(FMA(c7,zh,c6),zh,c5),zh,c4),zh,c3);
831 #else
832 polyHorner = c3 + zh * (c4 + zh * (c5 + zh * (c6 + zh * c7)));
833 #endif
834
835 Mul12(&zhSquareh, &zhSquarel, zh, zh);
836 polyUpper = polyHorner * (zh * zhSquareh);
837 zhSquareHalfh = zhSquareh * -0.5;
838 zhSquareHalfl = zhSquarel * -0.5;
839 Add12(t1h, t1l, polyUpper, -1 * (zh * zl));
840 Add22(&t2h, &t2l, zh, zl, zhSquareHalfh, zhSquareHalfl);
841 Add22(&ph, &pl, t2h, t2l, t1h, t1l);
842
843 /* Reconstruction
844
845 Read logih and logim in the tables (already done)
846
847 Compute log(x) = E * log(2) + log(1+z) - log(ri)
848 i.e. log(x) = ed * (log2h + log2m) + (ph + pl) + (logih + logim) + delta
849
850 Carry out everything in double double precision
851
852 */
853
854 /*
855 We store log2 as log2h + log2m + log2l where log2h and log2m have 12 trailing zeros
856 Multiplication of ed (double E) and log2h is thus correct
857 The overall accuracy of log2h + log2m + log2l is 53 * 3 - 24 = 135 which
858 is enough for the accurate phase
859 The accuracy suffices also for the quick phase: 53 * 2 - 24 = 82
860 Nevertheless the storage with trailing zeros implies an overlap of the tabulated
861 triple double values. We have to take it into account for the accurate phase
862 basic procedures for addition and multiplication
863 The condition on the next Add12 is verified as log2m is smaller than log2h
864 and both are scaled by ed
865 */
866
867 Add12(log2edh, log2edl, log2h * ed, log2m * ed);
868
869 /* Add logih and logim to ph and pl
870
871 We must use conditioned Add22 as logih can move over ph
872 */
873
874 Add22Cond(&logTabPolyh, &logTabPolyl, logih, logim, ph, pl);
875
876 /* Add log2edh + log2edl to logTabPolyh + logTabPolyl */
877
878 Add22Cond(&logh, &logm, log2edh, log2edl, logTabPolyh, logTabPolyl);
879
880
881
882 /* Change logarithm base from natural base to base 10 by multiplying */
883
884 Mul22(&logb10h, &logb10m, log10invh, log10invm, logh, logm);
885
886
887 /* Rounding test and eventual return or call to the accurate function */
888
889 if(E==0)
890 roundcst = RDROUNDCST1;
891 else
892 roundcst = RDROUNDCST2;
893
894 TEST_AND_RETURN_RZ(logb10h, logb10m, roundcst);
895
896 #if DEBUG
897 printf("Going for Accurate Phase for x=%1.50e\n",x);
898 #endif
899
900 log10_td_accurate(&logb10h, &logb10m, &logb10l, E, ed, index, zh, zl, logih, logim);
901
902 ReturnRoundTowardsZero3Unfiltered(logb10h, logb10m, logb10l, WORSTCASEACCURACY);
903 }
904
905
906
907