1 /*
2  * This function computes log10, correctly rounded,
3  * using experimental techniques based on triple double arithmetics
4 
5  THIS IS EXPERIMENTAL SOFTWARE
6 
7  *
8  * Author :  Christoph Lauter
9  * christoph.lauter at ens-lyon.fr
10  *
11 
12  To have it replace the crlibm log10, do:
13 
14  gcc -DHAVE_CONFIG_H -I.  -fPIC  -O2 -c log10-td.c;   mv log10-td.o log10_accurate.o; make
15 
16 
17  **********************************************************
18  *
19  *  NOTE: THIS FUNCTION USES SOME BASIC SEQUENCES AND
20  *  FURTHER FINAL ROUNDING SEQUENCES WITH SPECIAL CASE
21  *  TESTS NO FORMAL PROOF IS AVAILABLE FOR IN THE MOMENT
22  *
23  **********************************************************
24 
25 */
26 
27 
28 #include <stdio.h>
29 #include <stdlib.h>
30 #include "crlibm.h"
31 #include "crlibm_private.h"
32 #include "triple-double.h"
33 #include "log10-td.h"
34 
35 #define AVOID_FMA 0
36 
37 
log10_td_accurate(double * logb10h,double * logb10m,double * logb10l,int E,double ed,int index,double zh,double zl,double logih,double logim)38 void log10_td_accurate(double *logb10h, double *logb10m, double *logb10l, int E, double ed, int index, double zh, double zl, double logih, double logim) {
39   double highPoly, t1h, t1l, t2h, t2l, t3h, t3l, t4h, t4l, t5h, t5l, t6h, t6l, t7h, t7l, t8h, t8l, t9h, t9l, t10h, t10l, t11h, t11l;
40   double t12h, t12l, t13h, t13l, t14h, t14l, zSquareh, zSquarem, zSquarel, zCubeh, zCubem, zCubel, higherPolyMultZh, higherPolyMultZm;
41   double higherPolyMultZl, zSquareHalfh, zSquareHalfm, zSquareHalfl, polyWithSquareh, polyWithSquarem, polyWithSquarel;
42   double polyh, polym, polyl, logil, logyh, logym, logyl, loghover, logmover, loglover, log2edhover, log2edmover, log2edlover;
43   double log2edh, log2edm, log2edl, logb10hover, logb10mover, logb10lover;
44   double logyhnorm, logymnorm, logylnorm;
45 
46 
47 
48 
49 
50 #if EVAL_PERF
51   crlibm_second_step_taken++;
52 #endif
53 
54 
55   /* Accurate phase:
56 
57      Argument reduction is already done.
58      We must return logh, logm and logl representing the intermediate result in 118 bits precision.
59 
60      We use a 14 degree polynomial, computing the first 3 (the first is 0) coefficients in triple double,
61      calculating the next 7 coefficients in double double arithmetics and the last in double.
62 
63      We must account for zl starting with the monome of degree 4 (7^3 + 53 - 7 >> 118); so
64      double double calculations won't account for it.
65 
66   */
67 
68   /* Start of the horner scheme */
69 
70 #if defined(PROCESSOR_HAS_FMA) && !defined(AVOID_FMA)
71   highPoly = FMA(FMA(FMA(FMA(accPolyC14,zh,accPolyC13),zh,accPolyC12),zh,accPolyC11),zh,accPolyC10);
72 #else
73   highPoly = accPolyC10 + zh * (accPolyC11 + zh * (accPolyC12 + zh * (accPolyC13 + zh * accPolyC14)));
74 #endif
75 
76   /* We want to write
77 
78      accPolyC3 + zh * (accPoly4 + zh * (accPoly5 + zh * (accPoly6 + zh * (accPoly7 + zh * (accPoly8 + zh * (accPoly9 + zh * highPoly))))));
79      (        t14  t13         t12  t11         t10   t9          t8   t7          t6   t5          t4   t3          t2   t1  )
80 
81      with all additions and multiplications in double double arithmetics
82      but we will produce intermediate results labelled t1h/t1l thru t14h/t14l
83   */
84 
85   Mul12(&t1h, &t1l, zh, highPoly);
86   Add22(&t2h, &t2l, accPolyC9h, accPolyC9l, t1h, t1l);
87   Mul22(&t3h, &t3l, zh, zl, t2h, t2l);
88   Add22(&t4h, &t4l, accPolyC8h, accPolyC8l, t3h, t3l);
89   Mul22(&t5h, &t5l, zh, zl, t4h, t4l);
90   Add22(&t6h, &t6l, accPolyC7h, accPolyC7l, t5h, t5l);
91   Mul22(&t7h, &t7l, zh, zl, t6h, t6l);
92   Add22(&t8h, &t8l, accPolyC6h, accPolyC6l, t7h, t7l);
93   Mul22(&t9h, &t9l, zh, zl, t8h, t8l);
94   Add22(&t10h, &t10l, accPolyC5h, accPolyC5l, t9h, t9l);
95   Mul22(&t11h, &t11l, zh, zl, t10h, t10l);
96   Add22(&t12h, &t12l, accPolyC4h, accPolyC4l, t11h, t11l);
97   Mul22(&t13h, &t13l, zh, zl, t12h, t12l);
98   Add22(&t14h, &t14l, accPolyC3h, accPolyC3l, t13h, t13l);
99 
100   /* We must now prepare (zh + zl)^2 and (zh + zl)^3 as triple doubles */
101 
102   Mul23(&zSquareh, &zSquarem, &zSquarel, zh, zl, zh, zl);
103   Mul233(&zCubeh, &zCubem, &zCubel, zh, zl, zSquareh, zSquarem, zSquarel);
104 
105   /* We can now multiplicate the middle and higher polynomial by z^3 */
106 
107   Mul233(&higherPolyMultZh, &higherPolyMultZm, &higherPolyMultZl, t14h, t14l, zCubeh, zCubem, zCubel);
108 
109   /* Multiply now z^2 by -1/2 (exact op) and add to middle and higher polynomial */
110 
111   zSquareHalfh = zSquareh * -0.5;
112   zSquareHalfm = zSquarem * -0.5;
113   zSquareHalfl = zSquarel * -0.5;
114 
115   Add33(&polyWithSquareh, &polyWithSquarem, &polyWithSquarel,
116 	zSquareHalfh, zSquareHalfm, zSquareHalfl,
117 	higherPolyMultZh, higherPolyMultZm, higherPolyMultZl);
118 
119   /* Add now zh and zl to obtain the polynomial evaluation result */
120 
121   Add233(&polyh, &polym, &polyl, zh, zl, polyWithSquareh, polyWithSquarem, polyWithSquarel);
122 
123   /* Reconstruct now log(y) = log(1 + z) - log(ri) by adding logih, logim, logil
124      logil has not been read to the time, do this first
125   */
126 
127   logil =  argredtable[index].logil;
128 
129   Add33(&logyh, &logym, &logyl, logih, logim, logil, polyh, polym, polyl);
130 
131 
132   /* Renormalize logyh, logym and logyl to a non-overlapping triple-double for winning some
133      accuracy in the final ln(x) result before multiplying with log10inv
134 
135      THIS MAY NOT BE NECESSARY NOR SUFFICIENT
136 
137   */
138 
139   Renormalize3(&logyhnorm, &logymnorm, &logylnorm, logyh, logym, logyl);
140 
141 
142   /* Multiply log2 with E, i.e. log2h, log2m, log2l by ed
143      ed is always less than 2^(12) and log2h and log2m are stored with at least 12 trailing zeros
144      So multiplying naively is correct (up to 134 bits at least)
145 
146      The final result is thus obtained by adding log2 * E to log(y)
147   */
148 
149   log2edhover = log2h * ed;
150   log2edmover = log2m * ed;
151   log2edlover = log2l * ed;
152 
153   /* It may be necessary to renormalize the tabulated value (multiplied by ed) before adding
154      the to the log(y)-result
155 
156      If needed, uncomment the following Renormalize3-Statement and comment out the copies
157      following it.
158   */
159 
160   /* Renormalize3(&log2edh, &log2edm, &log2edl, log2edhover, log2edmover, log2edlover); */
161 
162   log2edh = log2edhover;
163   log2edm = log2edmover;
164   log2edl = log2edlover;
165 
166   Add33(&loghover, &logmover, &loglover, log2edh, log2edm, log2edl, logyhnorm, logymnorm, logylnorm);
167 
168 
169   /* Change logarithm base from natural base to base 10 by multiplying */
170 
171   Mul33(&logb10hover, &logb10mover, &logb10lover, log10invh, log10invm, log10invl, loghover, logmover, loglover);
172 
173 
174   /* Since we can not guarantee in each addition and multiplication procedure that
175      the results are not overlapping, we must renormalize the result before handing
176      it over to the final rounding
177   */
178 
179   Renormalize3(logb10h,logb10m,logb10l,logb10hover,logb10mover,logb10lover);
180 }
181 
182 
183 
184 /*************************************************************
185  *************************************************************
186  *               ROUNDED  TO NEAREST			     *
187  *************************************************************
188  *************************************************************/
log10_rn(double x)189  double log10_rn(double x){
190    db_number xdb;
191    double y, ed, ri, logih, logim, yrih, yril, th, zh, zl;
192    double polyHorner, zhSquareh, zhSquarel, polyUpper, zhSquareHalfh, zhSquareHalfl;
193    double t1h, t1l, t2h, t2l, ph, pl, log2edh, log2edl, logTabPolyh, logTabPolyl, logh, logm, roundcst;
194    double logb10h, logb10m, logb10l;
195    int E, index;
196 
197    E=0;
198    xdb.d=x;
199 
200    /* Filter cases */
201    if (xdb.i[HI] < 0x00100000){        /* x < 2^(-1022)    */
202      if (((xdb.i[HI] & 0x7fffffff)|xdb.i[LO])==0){
203        return -1.0/0.0;
204      }                    		   /* log(+/-0) = -Inf */
205      if (xdb.i[HI] < 0){
206        return (x-x)/0;                      /* log(-x) = Nan    */
207      }
208      /* Subnormal number */
209      E = -52;
210      xdb.d *= two52; 	  /* make x a normal number    */
211    }
212 
213    if (xdb.i[HI] >= 0x7ff00000){
214      return  x+x;				 /* Inf or Nan       */
215    }
216 
217 
218    /* Extract exponent and mantissa
219       Do range reduction,
220       yielding to E holding the exponent and
221       y the mantissa between sqrt(2)/2 and sqrt(2)
222    */
223    E += (xdb.i[HI]>>20)-1023;             /* extract the exponent */
224    index = (xdb.i[HI] & 0x000fffff);
225    xdb.i[HI] =  index | 0x3ff00000;	/* do exponent = 0 */
226    index = (index + (1<<(20-L-1))) >> (20-L);
227 
228    /* reduce  such that sqrt(2)/2 < xdb.d < sqrt(2) */
229    if (index >= MAXINDEX){ /* corresponds to xdb>sqrt(2)*/
230      xdb.i[HI] -= 0x00100000;
231      E++;
232    }
233    y = xdb.d;
234    index = index & INDEXMASK;
235    /* Cast integer E into double ed for multiplication later */
236    ed = (double) E;
237 
238    /*
239       Read tables:
240       Read one float for ri
241       Read the first two doubles for -log(r_i) (out of three)
242 
243       Organization of the table:
244 
245       one struct entry per index, the struct entry containing
246       r, logih, logim and logil in this order
247    */
248 
249 
250    ri = argredtable[index].ri;
251    /*
252       Actually we don't need the logarithm entries now
253       Move the following two lines to the eventual reconstruction
254       As long as we don't have any if in the following code, we can overlap
255       memory access with calculations
256    */
257    logih = argredtable[index].logih;
258    logim = argredtable[index].logim;
259 
260    /* Do range reduction:
261 
262       zh + zl = y * ri - 1.0 correctly
263 
264       Correctness is assured by use of Mul12 and Add12
265       even if we don't force ri to have its' LSBs set to zero
266 
267       Discard zl for higher monome degrees
268    */
269 
270    Mul12(&yrih, &yril, y, ri);
271    th = yrih - 1.0;
272    Add12Cond(zh, zl, th, yril);
273 
274    /*
275       Polynomial evaluation
276 
277       Use a 7 degree polynomial
278       Evaluate the higher 5 terms in double precision (-7 * 3 = -21) using Horner's scheme
279       Evaluate the lower 3 terms (the last is 0) in double double precision accounting also for zl
280       using an ad hoc method
281 
282    */
283 
284 
285 
286 #if defined(PROCESSOR_HAS_FMA) && !defined(AVOID_FMA)
287    polyHorner = FMA(FMA(FMA(FMA(c7,zh,c6),zh,c5),zh,c4),zh,c3);
288 #else
289    polyHorner = c3 + zh * (c4 + zh * (c5 + zh * (c6 + zh * c7)));
290 #endif
291 
292    Mul12(&zhSquareh, &zhSquarel, zh, zh);
293    polyUpper = polyHorner * (zh * zhSquareh);
294    zhSquareHalfh = zhSquareh * -0.5;
295    zhSquareHalfl = zhSquarel * -0.5;
296    Add12(t1h, t1l, polyUpper, -1 * (zh * zl));
297    Add22(&t2h, &t2l, zh, zl, zhSquareHalfh, zhSquareHalfl);
298    Add22(&ph, &pl, t2h, t2l, t1h, t1l);
299 
300    /* Reconstruction
301 
302       Read logih and logim in the tables (already done)
303 
304       Compute log(x) = E * log(2) + log(1+z) - log(ri)
305       i.e. log(x) = ed * (log2h + log2m) + (ph + pl) + (logih + logim) + delta
306 
307       Carry out everything in double double precision
308 
309    */
310 
311    /*
312       We store log2 as log2h + log2m + log2l where log2h and log2m have 12 trailing zeros
313       Multiplication of ed (double E) and log2h is thus correct
314       The overall accuracy of log2h + log2m + log2l is 53 * 3 - 24 = 135 which
315       is enough for the accurate phase
316       The accuracy suffices also for the quick phase: 53 * 2 - 24 = 82
317       Nevertheless the storage with trailing zeros implies an overlap of the tabulated
318       triple double values. We have to take it into account for the accurate phase
319       basic procedures for addition and multiplication
320       The condition on the next Add12 is verified as log2m is smaller than log2h
321       and both are scaled by ed
322    */
323 
324    Add12(log2edh, log2edl, log2h * ed, log2m * ed);
325 
326    /* Add logih and logim to ph and pl
327 
328       We must use conditioned Add22 as logih can move over ph
329    */
330 
331    Add22Cond(&logTabPolyh, &logTabPolyl, logih, logim, ph, pl);
332 
333    /* Add log2edh + log2edl to logTabPolyh + logTabPolyl */
334 
335    Add22Cond(&logh, &logm, log2edh, log2edl, logTabPolyh, logTabPolyl);
336 
337 
338 
339    /* Change logarithm base from natural base to base 10 by multiplying */
340 
341    Mul22(&logb10h, &logb10m, log10invh, log10invm, logh, logm);
342 
343 
344    /* Rounding test and eventual return or call to the accurate function */
345 
346    if(E==0)
347       roundcst = ROUNDCST1;
348    else
349       roundcst = ROUNDCST2;
350 
351 
352    if(logb10h == (logb10h + (logb10m * roundcst)))
353      return logb10h;
354    else
355      {
356 
357 #if DEBUG
358        printf("Going for Accurate Phase for x=%1.50e\n",x);
359 #endif
360 
361        log10_td_accurate(&logb10h, &logb10m, &logb10l, E, ed, index, zh, zl, logih, logim);
362 
363        ReturnRoundToNearest3(logb10h, logb10m, logb10l);
364 
365      } /* Accurate phase launched */
366  }
367 
368 
369 /*************************************************************
370  *************************************************************
371  *               ROUNDED  UPWARDS			     *
372  *************************************************************
373  *************************************************************/
log10_ru(double x)374  double log10_ru(double x) {
375    db_number xdb;
376    double y, ed, ri, logih, logim, yrih, yril, th, zh, zl;
377    double polyHorner, zhSquareh, zhSquarel, polyUpper, zhSquareHalfh, zhSquareHalfl;
378    double t1h, t1l, t2h, t2l, ph, pl, log2edh, log2edl, logTabPolyh, logTabPolyl, logh, logm, roundcst;
379    double logb10h, logb10m, logb10l;
380    int E, index;
381 
382    E=0;
383    xdb.d=x;
384 
385    /* Filter cases */
386    if (xdb.i[HI] < 0x00100000){        /* x < 2^(-1022)    */
387      if (((xdb.i[HI] & 0x7fffffff)|xdb.i[LO])==0){
388        return -1.0/0.0;
389      }                    		   /* log(+/-0) = -Inf */
390      if (xdb.i[HI] < 0){
391        return (x-x)/0;                      /* log(-x) = Nan    */
392      }
393      /* Subnormal number */
394      E = -52;
395      xdb.d *= two52; 	  /* make x a normal number    */
396    }
397 
398    if (xdb.i[HI] >= 0x7ff00000){
399      return  x+x;				 /* Inf or Nan       */
400    }
401 
402 
403    /* Extract exponent and mantissa
404       Do range reduction,
405       yielding to E holding the exponent and
406       y the mantissa between sqrt(2)/2 and sqrt(2)
407    */
408    E += (xdb.i[HI]>>20)-1023;             /* extract the exponent */
409    index = (xdb.i[HI] & 0x000fffff);
410    xdb.i[HI] =  index | 0x3ff00000;	/* do exponent = 0 */
411    index = (index + (1<<(20-L-1))) >> (20-L);
412 
413    /* reduce  such that sqrt(2)/2 < xdb.d < sqrt(2) */
414    if (index >= MAXINDEX){ /* corresponds to xdb>sqrt(2)*/
415      xdb.i[HI] -= 0x00100000;
416      E++;
417    }
418    y = xdb.d;
419    index = index & INDEXMASK;
420    /* Cast integer E into double ed for multiplication later */
421    ed = (double) E;
422 
423    /*
424       Read tables:
425       Read one float for ri
426       Read the first two doubles for -log(r_i) (out of three)
427 
428       Organization of the table:
429 
430       one struct entry per index, the struct entry containing
431       r, logih, logim and logil in this order
432    */
433 
434 
435    ri = argredtable[index].ri;
436    /*
437       Actually we don't need the logarithm entries now
438       Move the following two lines to the eventual reconstruction
439       As long as we don't have any if in the following code, we can overlap
440       memory access with calculations
441    */
442    logih = argredtable[index].logih;
443    logim = argredtable[index].logim;
444 
445    /* Do range reduction:
446 
447       zh + zl = y * ri - 1.0 correctly
448 
449       Correctness is assured by use of Mul12 and Add12
450       even if we don't force ri to have its' LSBs set to zero
451 
452       Discard zl for higher monome degrees
453    */
454 
455    Mul12(&yrih, &yril, y, ri);
456    th = yrih - 1.0;
457    Add12Cond(zh, zl, th, yril);
458 
459    /*
460       Polynomial evaluation
461 
462       Use a 7 degree polynomial
463       Evaluate the higher 5 terms in double precision (-7 * 3 = -21) using Horner's scheme
464       Evaluate the lower 3 terms (the last is 0) in double double precision accounting also for zl
465       using an ad hoc method
466 
467    */
468 
469 
470 
471 #if defined(PROCESSOR_HAS_FMA) && !defined(AVOID_FMA)
472    polyHorner = FMA(FMA(FMA(FMA(c7,zh,c6),zh,c5),zh,c4),zh,c3);
473 #else
474    polyHorner = c3 + zh * (c4 + zh * (c5 + zh * (c6 + zh * c7)));
475 #endif
476 
477    Mul12(&zhSquareh, &zhSquarel, zh, zh);
478    polyUpper = polyHorner * (zh * zhSquareh);
479    zhSquareHalfh = zhSquareh * -0.5;
480    zhSquareHalfl = zhSquarel * -0.5;
481    Add12(t1h, t1l, polyUpper, -1 * (zh * zl));
482    Add22(&t2h, &t2l, zh, zl, zhSquareHalfh, zhSquareHalfl);
483    Add22(&ph, &pl, t2h, t2l, t1h, t1l);
484 
485    /* Reconstruction
486 
487       Read logih and logim in the tables (already done)
488 
489       Compute log(x) = E * log(2) + log(1+z) - log(ri)
490       i.e. log(x) = ed * (log2h + log2m) + (ph + pl) + (logih + logim) + delta
491 
492       Carry out everything in double double precision
493 
494    */
495 
496    /*
497       We store log2 as log2h + log2m + log2l where log2h and log2m have 12 trailing zeros
498       Multiplication of ed (double E) and log2h is thus correct
499       The overall accuracy of log2h + log2m + log2l is 53 * 3 - 24 = 135 which
500       is enough for the accurate phase
501       The accuracy suffices also for the quick phase: 53 * 2 - 24 = 82
502       Nevertheless the storage with trailing zeros implies an overlap of the tabulated
503       triple double values. We have to take it into account for the accurate phase
504       basic procedures for addition and multiplication
505       The condition on the next Add12 is verified as log2m is smaller than log2h
506       and both are scaled by ed
507    */
508 
509    Add12(log2edh, log2edl, log2h * ed, log2m * ed);
510 
511    /* Add logih and logim to ph and pl
512 
513       We must use conditioned Add22 as logih can move over ph
514    */
515 
516    Add22Cond(&logTabPolyh, &logTabPolyl, logih, logim, ph, pl);
517 
518    /* Add log2edh + log2edl to logTabPolyh + logTabPolyl */
519 
520    Add22Cond(&logh, &logm, log2edh, log2edl, logTabPolyh, logTabPolyl);
521 
522 
523 
524    /* Change logarithm base from natural base to base 10 by multiplying */
525 
526    Mul22(&logb10h, &logb10m, log10invh, log10invm, logh, logm);
527 
528 
529    /* Rounding test and eventual return or call to the accurate function */
530 
531    if(E==0)
532       roundcst = RDROUNDCST1;
533    else
534       roundcst = RDROUNDCST2;
535 
536    TEST_AND_RETURN_RU(logb10h, logb10m, roundcst);
537 
538 #if DEBUG
539    printf("Going for Accurate Phase for x=%1.50e\n",x);
540 #endif
541 
542     log10_td_accurate(&logb10h, &logb10m, &logb10l, E, ed, index, zh, zl, logih, logim);
543 
544     ReturnRoundUpwards3Unfiltered(logb10h, logb10m, logb10l, WORSTCASEACCURACY);
545 
546  }
547 
548 
549 /*************************************************************
550  *************************************************************
551  *               ROUNDED  DOWNWARDS			     *
552  *************************************************************
553  *************************************************************/
log10_rd(double x)554  double log10_rd(double x) {
555    db_number xdb;
556    double y, ed, ri, logih, logim, yrih, yril, th, zh, zl;
557    double polyHorner, zhSquareh, zhSquarel, polyUpper, zhSquareHalfh, zhSquareHalfl;
558    double t1h, t1l, t2h, t2l, ph, pl, log2edh, log2edl, logTabPolyh, logTabPolyl, logh, logm, roundcst;
559    double logb10h, logb10m, logb10l;
560    int E, index;
561 
562    E=0;
563    xdb.d=x;
564 
565    /* Filter cases */
566    if (xdb.i[HI] < 0x00100000){        /* x < 2^(-1022)    */
567      if (((xdb.i[HI] & 0x7fffffff)|xdb.i[LO])==0){
568        return -1.0/0.0;
569      }                    		   /* log(+/-0) = -Inf */
570      if (xdb.i[HI] < 0){
571        return (x-x)/0;                      /* log(-x) = Nan    */
572      }
573      /* Subnormal number */
574      E = -52;
575      xdb.d *= two52; 	  /* make x a normal number    */
576    }
577 
578    if (xdb.i[HI] >= 0x7ff00000){
579      return  x+x;				 /* Inf or Nan       */
580    }
581 
582 
583    /* Extract exponent and mantissa
584       Do range reduction,
585       yielding to E holding the exponent and
586       y the mantissa between sqrt(2)/2 and sqrt(2)
587    */
588    E += (xdb.i[HI]>>20)-1023;             /* extract the exponent */
589    index = (xdb.i[HI] & 0x000fffff);
590    xdb.i[HI] =  index | 0x3ff00000;	/* do exponent = 0 */
591    index = (index + (1<<(20-L-1))) >> (20-L);
592 
593    /* reduce  such that sqrt(2)/2 < xdb.d < sqrt(2) */
594    if (index >= MAXINDEX){ /* corresponds to xdb>sqrt(2)*/
595      xdb.i[HI] -= 0x00100000;
596      E++;
597    }
598    y = xdb.d;
599    index = index & INDEXMASK;
600    /* Cast integer E into double ed for multiplication later */
601    ed = (double) E;
602 
603    /*
604       Read tables:
605       Read one float for ri
606       Read the first two doubles for -log(r_i) (out of three)
607 
608       Organization of the table:
609 
610       one struct entry per index, the struct entry containing
611       r, logih, logim and logil in this order
612    */
613 
614 
615    ri = argredtable[index].ri;
616    /*
617       Actually we don't need the logarithm entries now
618       Move the following two lines to the eventual reconstruction
619       As long as we don't have any if in the following code, we can overlap
620       memory access with calculations
621    */
622    logih = argredtable[index].logih;
623    logim = argredtable[index].logim;
624 
625    /* Do range reduction:
626 
627       zh + zl = y * ri - 1.0 correctly
628 
629       Correctness is assured by use of Mul12 and Add12
630       even if we don't force ri to have its' LSBs set to zero
631 
632       Discard zl for higher monome degrees
633    */
634 
635    Mul12(&yrih, &yril, y, ri);
636    th = yrih - 1.0;
637    Add12Cond(zh, zl, th, yril);
638 
639    /*
640       Polynomial evaluation
641 
642       Use a 7 degree polynomial
643       Evaluate the higher 5 terms in double precision (-7 * 3 = -21) using Horner's scheme
644       Evaluate the lower 3 terms (the last is 0) in double double precision accounting also for zl
645       using an ad hoc method
646 
647    */
648 
649 
650 
651 #if defined(PROCESSOR_HAS_FMA) && !defined(AVOID_FMA)
652    polyHorner = FMA(FMA(FMA(FMA(c7,zh,c6),zh,c5),zh,c4),zh,c3);
653 #else
654    polyHorner = c3 + zh * (c4 + zh * (c5 + zh * (c6 + zh * c7)));
655 #endif
656 
657    Mul12(&zhSquareh, &zhSquarel, zh, zh);
658    polyUpper = polyHorner * (zh * zhSquareh);
659    zhSquareHalfh = zhSquareh * -0.5;
660    zhSquareHalfl = zhSquarel * -0.5;
661    Add12(t1h, t1l, polyUpper, -1 * (zh * zl));
662    Add22(&t2h, &t2l, zh, zl, zhSquareHalfh, zhSquareHalfl);
663    Add22(&ph, &pl, t2h, t2l, t1h, t1l);
664 
665    /* Reconstruction
666 
667       Read logih and logim in the tables (already done)
668 
669       Compute log(x) = E * log(2) + log(1+z) - log(ri)
670       i.e. log(x) = ed * (log2h + log2m) + (ph + pl) + (logih + logim) + delta
671 
672       Carry out everything in double double precision
673 
674    */
675 
676    /*
677       We store log2 as log2h + log2m + log2l where log2h and log2m have 12 trailing zeros
678       Multiplication of ed (double E) and log2h is thus correct
679       The overall accuracy of log2h + log2m + log2l is 53 * 3 - 24 = 135 which
680       is enough for the accurate phase
681       The accuracy suffices also for the quick phase: 53 * 2 - 24 = 82
682       Nevertheless the storage with trailing zeros implies an overlap of the tabulated
683       triple double values. We have to take it into account for the accurate phase
684       basic procedures for addition and multiplication
685       The condition on the next Add12 is verified as log2m is smaller than log2h
686       and both are scaled by ed
687    */
688 
689    Add12(log2edh, log2edl, log2h * ed, log2m * ed);
690 
691    /* Add logih and logim to ph and pl
692 
693       We must use conditioned Add22 as logih can move over ph
694    */
695 
696    Add22Cond(&logTabPolyh, &logTabPolyl, logih, logim, ph, pl);
697 
698    /* Add log2edh + log2edl to logTabPolyh + logTabPolyl */
699 
700    Add22Cond(&logh, &logm, log2edh, log2edl, logTabPolyh, logTabPolyl);
701 
702 
703 
704    /* Change logarithm base from natural base to base 10 by multiplying */
705 
706    Mul22(&logb10h, &logb10m, log10invh, log10invm, logh, logm);
707 
708 
709    /* Rounding test and eventual return or call to the accurate function */
710 
711    if(E==0)
712       roundcst = RDROUNDCST1;
713    else
714       roundcst = RDROUNDCST2;
715 
716    TEST_AND_RETURN_RD(logb10h, logb10m, roundcst);
717 
718 #if DEBUG
719   printf("Going for Accurate Phase for x=%1.50e\n",x);
720 #endif
721 
722     log10_td_accurate(&logb10h, &logb10m, &logb10l, E, ed, index, zh, zl, logih, logim);
723 
724     ReturnRoundDownwards3Unfiltered(logb10h, logb10m, logb10l, WORSTCASEACCURACY);
725  }
726 
727 /*************************************************************
728  *************************************************************
729  *               ROUNDED  TOWARDS ZERO			     *
730  *************************************************************
731  *************************************************************/
log10_rz(double x)732  double log10_rz(double x) {
733    db_number xdb;
734    double y, ed, ri, logih, logim, yrih, yril, th, zh, zl;
735    double polyHorner, zhSquareh, zhSquarel, polyUpper, zhSquareHalfh, zhSquareHalfl;
736    double t1h, t1l, t2h, t2l, ph, pl, log2edh, log2edl, logTabPolyh, logTabPolyl, logh, logm, roundcst;
737    double logb10h, logb10m, logb10l;
738    int E, index;
739 
740    E=0;
741    xdb.d=x;
742 
743    /* Filter cases */
744    if (xdb.i[HI] < 0x00100000){        /* x < 2^(-1022)    */
745      if (((xdb.i[HI] & 0x7fffffff)|xdb.i[LO])==0){
746        return -1.0/0.0;
747      }                    		   /* log(+/-0) = -Inf */
748      if (xdb.i[HI] < 0){
749        return (x-x)/0;                      /* log(-x) = Nan    */
750      }
751      /* Subnormal number */
752      E = -52;
753      xdb.d *= two52; 	  /* make x a normal number    */
754    }
755 
756    if (xdb.i[HI] >= 0x7ff00000){
757      return  x+x;				 /* Inf or Nan       */
758    }
759 
760 
761    /* Extract exponent and mantissa
762       Do range reduction,
763       yielding to E holding the exponent and
764       y the mantissa between sqrt(2)/2 and sqrt(2)
765    */
766    E += (xdb.i[HI]>>20)-1023;             /* extract the exponent */
767    index = (xdb.i[HI] & 0x000fffff);
768    xdb.i[HI] =  index | 0x3ff00000;	/* do exponent = 0 */
769    index = (index + (1<<(20-L-1))) >> (20-L);
770 
771    /* reduce  such that sqrt(2)/2 < xdb.d < sqrt(2) */
772    if (index >= MAXINDEX){ /* corresponds to xdb>sqrt(2)*/
773      xdb.i[HI] -= 0x00100000;
774      E++;
775    }
776    y = xdb.d;
777    index = index & INDEXMASK;
778    /* Cast integer E into double ed for multiplication later */
779    ed = (double) E;
780 
781    /*
782       Read tables:
783       Read one float for ri
784       Read the first two doubles for -log(r_i) (out of three)
785 
786       Organization of the table:
787 
788       one struct entry per index, the struct entry containing
789       r, logih, logim and logil in this order
790    */
791 
792 
793    ri = argredtable[index].ri;
794    /*
795       Actually we don't need the logarithm entries now
796       Move the following two lines to the eventual reconstruction
797       As long as we don't have any if in the following code, we can overlap
798       memory access with calculations
799    */
800    logih = argredtable[index].logih;
801    logim = argredtable[index].logim;
802 
803    /* Do range reduction:
804 
805       zh + zl = y * ri - 1.0 correctly
806 
807       Correctness is assured by use of Mul12 and Add12
808       even if we don't force ri to have its' LSBs set to zero
809 
810       Discard zl for higher monome degrees
811    */
812 
813    Mul12(&yrih, &yril, y, ri);
814    th = yrih - 1.0;
815    Add12Cond(zh, zl, th, yril);
816 
817    /*
818       Polynomial evaluation
819 
820       Use a 7 degree polynomial
821       Evaluate the higher 5 terms in double precision (-7 * 3 = -21) using Horner's scheme
822       Evaluate the lower 3 terms (the last is 0) in double double precision accounting also for zl
823       using an ad hoc method
824 
825    */
826 
827 
828 
829 #if defined(PROCESSOR_HAS_FMA) && !defined(AVOID_FMA)
830    polyHorner = FMA(FMA(FMA(FMA(c7,zh,c6),zh,c5),zh,c4),zh,c3);
831 #else
832    polyHorner = c3 + zh * (c4 + zh * (c5 + zh * (c6 + zh * c7)));
833 #endif
834 
835    Mul12(&zhSquareh, &zhSquarel, zh, zh);
836    polyUpper = polyHorner * (zh * zhSquareh);
837    zhSquareHalfh = zhSquareh * -0.5;
838    zhSquareHalfl = zhSquarel * -0.5;
839    Add12(t1h, t1l, polyUpper, -1 * (zh * zl));
840    Add22(&t2h, &t2l, zh, zl, zhSquareHalfh, zhSquareHalfl);
841    Add22(&ph, &pl, t2h, t2l, t1h, t1l);
842 
843    /* Reconstruction
844 
845       Read logih and logim in the tables (already done)
846 
847       Compute log(x) = E * log(2) + log(1+z) - log(ri)
848       i.e. log(x) = ed * (log2h + log2m) + (ph + pl) + (logih + logim) + delta
849 
850       Carry out everything in double double precision
851 
852    */
853 
854    /*
855       We store log2 as log2h + log2m + log2l where log2h and log2m have 12 trailing zeros
856       Multiplication of ed (double E) and log2h is thus correct
857       The overall accuracy of log2h + log2m + log2l is 53 * 3 - 24 = 135 which
858       is enough for the accurate phase
859       The accuracy suffices also for the quick phase: 53 * 2 - 24 = 82
860       Nevertheless the storage with trailing zeros implies an overlap of the tabulated
861       triple double values. We have to take it into account for the accurate phase
862       basic procedures for addition and multiplication
863       The condition on the next Add12 is verified as log2m is smaller than log2h
864       and both are scaled by ed
865    */
866 
867    Add12(log2edh, log2edl, log2h * ed, log2m * ed);
868 
869    /* Add logih and logim to ph and pl
870 
871       We must use conditioned Add22 as logih can move over ph
872    */
873 
874    Add22Cond(&logTabPolyh, &logTabPolyl, logih, logim, ph, pl);
875 
876    /* Add log2edh + log2edl to logTabPolyh + logTabPolyl */
877 
878    Add22Cond(&logh, &logm, log2edh, log2edl, logTabPolyh, logTabPolyl);
879 
880 
881 
882    /* Change logarithm base from natural base to base 10 by multiplying */
883 
884    Mul22(&logb10h, &logb10m, log10invh, log10invm, logh, logm);
885 
886 
887    /* Rounding test and eventual return or call to the accurate function */
888 
889    if(E==0)
890       roundcst = RDROUNDCST1;
891    else
892       roundcst = RDROUNDCST2;
893 
894    TEST_AND_RETURN_RZ(logb10h, logb10m, roundcst);
895 
896 #if DEBUG
897   printf("Going for Accurate Phase for x=%1.50e\n",x);
898 #endif
899 
900     log10_td_accurate(&logb10h, &logb10m, &logb10l, E, ed, index, zh, zl, logih, logim);
901 
902     ReturnRoundTowardsZero3Unfiltered(logb10h, logb10m, logb10l, WORSTCASEACCURACY);
903  }
904 
905 
906 
907