1#######################################################################
2# This file is part of crlibm, the correctly rounded mathematical library,
3# which has been developed by the Arénaire project at École normale supérieure
4# de Lyon.
5#
6# Copyright (C) 2004-2011 David Defour, Catherine Daramy-Loirat,
7# Florent de Dinechin, Matthieu Gallet, Nicolas Gast, Christoph Quirin Lauter,
8# and Jean-Michel Muller
9#
10# This library is free software; you can redistribute it and/or
11# modify it under the terms of the GNU Lesser General Public
12# License as published by the Free Software Foundation; either
13# version 2.1 of the License, or (at your option) any later version.
14#
15# This library is distributed in the hope that it will be useful,
16# but WITHOUT ANY WARRANTY; without even the implied warranty of
17# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
18# Lesser General Public License for more details.
19#
20# You should have received a copy of the GNU Lesser General Public
21# License along with this library; if not, write to the Free Software
22# Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
23
24# To use:
25# restart; read "log-de.mpl";
26#TODO output scripts for the Gappa proof out for Paterson/Stockmayer
27Digits := 100:
28
29interface(quiet=true):
30
31read "common-procedures.mpl":
32read "double-extended.mpl":
33mkdir("TEMPLOG"):
34
35
36log2h,log2l := hiloExt(log(2)):
37
38
39L := 7: # number of bits used to address the table
40
41MAXINDEX    := round(2^L * (sqrt(2)-1)):
42
43for i from 0 to MAXINDEX-1 do
44    center[i] := 1 + i*2^(-L): # center[i] in [1, 2[
45    # We want it to fit on 11 bits of mantissa
46    r[i] :=  round(evalf(  (1/center[i]) * 2^(11)) ) / 2^(11) ;
47
48od:
49for i from MAXINDEX to 2^L do
50    # y has been divided by two, center[i] in [0.5, 1[
51    center[i]:=(1 + i*2^(-L)) / 2:
52    # We want it to fit on 11 bits of mantissa,
53    r[i] :=  round(evalf(  (1/center[i]) * 2^(10)) ) / 2^(10) ;
54od:
55
56# Note that we go up to 2^L although the case 2^L is wrapped to zero
57# in the C code. It could be important for zmax (but it turns out not).
58
59for i from 0 to 2^L do
60    (logirh[i], logirl[i]) := hiloExt(-log(r[i])):
61od:
62
63#Computation of ZMax.
64for i from 0 to MAXINDEX-1 do
65    t_x := center[i] + 2^(-L-1) :
66    zmax[i] := (t_x*r[i]-1) :
67    t_x := center[i] - 2^(-L-1) :
68    zmin[i] := (t_x*r[i]-1) :
69    zabsmax[i] := max(abs(zmin[i]), abs(zmax[i])):
70od:
71for i from MAXINDEX to 2^L do
72    t_x := center[i] + 2^(-L-2) :
73    zmax[i] := (t_x*r[i]-1) :
74    t_x := center[i] - 2^(-L-2) :
75    zmin[i] := (t_x*r[i]-1) :
76    zabsmax[i] := max(abs(zmin[i]), abs(zmax[i])):
77od:
78
79zmaxmax:=0:
80zminmin:=0:
81for i from 0 to 2^L do
82    if zmax[i] > zmaxmax then zmaxmax := zmax[i]: fi:
83    if zmin[i] < zminmin then zminmin := zmin[i]: fi:
84od:
85printf("zminmin = -2^(%2f)   zmaxmax = 2^(%2f)\n", log2(-zminmin), log2(zmaxmax) ) :
86
87
88PolyDegreeQuick:=7:
89
90#Keep -zmaxmax..zmaxmax to keep c1=1, which is useful in the proof
91
92if(1+1=3) then # The polynomial used to be computed here,
93polyQuick:= polyExact2Ext(x  * numapprox[minimax](  log(1+x)/x,  x=-zmaxmax..zmaxmax,  [PolyDegreeQuick-1,0], 1 ,  'deltaApprox'), 0):
94
95epsilonApproxQuick := numapprox[infnorm]( 1-polyQuick/log(1+x), x=zminmin..zmaxmax):
96deltaApproxQuick := numapprox[infnorm]( polyQuick-log(1+x), x=zminmin..zmaxmax):
97else
98# magical polynomial given by Sylvain's tool
99polyQuick := x * (1 + (x * ((-0.50000000000000000000000000000000000000000000000000000000000000e0) + (x * (0.33333333333333342585191871876304503530263900756835937500000000e0 + (x * ((-0.24999999998423708125194764306797878816723823547363281250000000e0) + (x * (0.19999999996748479835773082413652446120977401733398437500000000e0 + (x * ((-0.16666957260954737285452154083031928166747093200683593750000000e0) + (x * 0.14286056338555042088955815415829420089721679687500000000000000e0)))))))))))):
100
101# validated infinite norms given by Christoph's tool
102epsilonApproxQuick := 2^(-64.119667587221):
103deltaApproxQuick := 2^(-72.230387592107):
104fi:
105printf(" rel approximation error for the quick phase is 2^(%2f)\n", log2(epsilonApproxQuick) ) :
106printf(" abs approximation error for the quick phase is 2^(%2f)\n", log2(deltaApproxQuick) ) :
107
108# For the Paterson/Stockmayer method
109polyQuick0:= x  * numapprox[minimax](  log(1+x)/x,  x=-zmaxmax..zmaxmax,  [PolyDegreeQuick-1,0], 1 ,  'deltaApprox'):
110
111## Création des nouveaux coefficients
112polyQuick0:= x  * numapprox[minimax](  log(1+x)/x,  x=-zmaxmax..zmaxmax,  [PolyDegreeQuick-1,0], 1 ,  'deltaApprox'):
113
114## Création des nouveaux coefficients
115a7 := convert(coeff(polyQuick0,x,7),rational):
116a6 := convert(coeff(polyQuick0,x,6)/a7,rational):
117a5 := convert(coeff(polyQuick0,x,5)/a7,rational):
118a4 := convert(coeff(polyQuick0,x,4)/a7,rational):
119
120a3 := convert(coeff(polyQuick0,x,3),rational):
121a2 := convert(coeff(polyQuick0,x,2),rational):
122a1 := convert(coeff(polyQuick0,x,1),rational):
123a0 := convert(coeff(polyQuick0,x,0),rational):
124
125alpha0 := a5 - 1:
126alpha1 := a6:
127alpha2 := a4 - alpha0 * a6:
128
129##
130a7_hi,a7_lo         := hiloExt(a7):
131a3_hi,a3_lo         := hiloExt(a3):
132a2_hi,a2_lo         := hiloExt(a2):
133a0_hi,a0_lo         := hiloExt(a0):
134
135c_hi,c_lo         := hiloExt(alpha0):
136alpha_hi,alpha_lo := hiloExt(alpha1):
137beta_hi,beta_lo   := hiloExt(alpha2):
138
139#coef_c := c_hi + c_lo:
140#coef_alpha := alpha_hi + alpha_lo:
141#coef_beta := beta_hi + beta_lo:
142#coef_c7 := a7_hi + a7_lo:
143#coef_c3 := a3_hi + a3_lo:
144#coef_c2 := a2_hi + a2_lo:
145#coef_c0 := a0_hi + a0_lo:
146
147coef_c := c_hi:
148coef_alpha := alpha_hi:
149coef_beta := beta_hi:
150coef_c7 := a7_hi:
151coef_c3 := a3_hi:
152coef_c2 := a2_hi:
153coef_c0 := a0_hi:
154
155polyQuickPat := ((x^2 + coef_c)*(x + coef_alpha) + (x + coef_beta)) * (coef_c7*x^4) + ((coef_c3 * x + coef_c2)*x^2 + (x)):
156
157epsilonApproxQuickPat := numapprox[infnorm]( 1-polyQuickPat/log(1+x), x=zminmin..zmaxmax):
158printf("   approximation rel error for Paterson/Stockmayer in the quick phase is 2^(%2f)\n", log2(epsilonApproxQuickPat) ) :
159deltaApproxQuickPat := numapprox[infnorm]( polyQuickPat-log(1+x), x=zminmin..zmaxmax):
160printf("   approximation abs error for Paterson/Stockmayer in the quick phase is 2^(%2f)\n", log2(deltaApproxQuickPat) ) :
161
162
163
164PolyDegreeAccurate:=14:
165
166printf("Computing the polynomial for accurate phase (this may take some time...)\n"):
167pe:= x  * numapprox[minimax](  log(1+x)/x,  x=-zmaxmax..zmaxmax,  [PolyDegreeAccurate-1,0], 1 ,  'deltaApprox'):
168
169
170MaxDegreeDDE:=8:  #
171
172polyAccurate := polyExact2Ext(pe, MaxDegreeDDE):
173deltaApproxAccurate := numapprox[infnorm](polyAccurate-log(1+x), x=-zmaxmax..zmaxmax):
174epsilonApproxAccurate := numapprox[infnorm]( 1-polyAccurate/log(1+x), x=-zmaxmax..zmaxmax):
175printf("   approximation error for the accurate phase is 2^(%2f)\n", log2(epsilonApproxAccurate) ) :
176
177
178
179
180filename:="TEMPLOG/log-de.h":
181fd:=fopen(filename, WRITE, TEXT):
182
183fprintf(fd, "/*File generated by maple/log-de.mpl*/\n\n"):
184# some constants
185fprintf(fd, "#define L        %d\n", L):
186fprintf(fd, "#define MAXINDEX %d\n", MAXINDEX):
187fprintf(fd, "#define INDEXMASK %d\n", 2^L-1):
188fprintf(fd, "static const double two64 = %1.25e ;\n", evalf(2^64)):
189
190fprintf(fd, "#if 1\n#define ESTRIN\n#else\n#define PATERSON\n#endif\n"):
191
192fprintf(fd, "#if defined(CRLIBM_TYPECPU_X86) || defined(CRLIBM_TYPECPU_AMD64)\n\n"):
193
194fprintf(fd, "#ifdef PATERSON\n"):
195  #  polynomial for quick phase
196  fprintf(fd,"static const long double c7 = %1.50e;\n",coef_c7):
197  fprintf(fd,"static const long double c3 = %1.50e;\n",coef_c3):
198  fprintf(fd,"static const long double c2 = %1.50e;\n",coef_c2):
199  fprintf(fd,"static const long double ps_alpha = %1.50e;\n",coef_alpha):
200  fprintf(fd,"static const long double ps_beta = %1.50e;\n",coef_beta):
201  fprintf(fd,"static const long double ps_c = %1.50e;\n",coef_c):
202fprintf(fd, "#endif/* PATERSON*/\n\n"):
203
204fprintf(fd, "#ifdef ESTRIN\n"):
205 fprintf(fd, "  /* Coefficients are read directly from the array thanks to the following macros */ \n"):
206  for i from PolyDegreeQuick to 2 by -1 do
207    fprintf(fd, "#define c%d  c[%d]\n", i, PolyDegreeQuick-i):
208  od:
209   fprintf(fd, "static const double c[%d] =  {\n",PolyDegreeQuick):
210   for i from PolyDegreeQuick to 2 by -1 do
211     fprintf(fd, "   /* c%d  = */ %1.50eL, \n", i, coeff(polyQuick,x,i)):
212   od:
213  fprintf(fd, "}; \n \n"):
214fprintf(fd, "#endif/* ESTRIN */\n\n"):
215
216
217
218  for i from PolyDegreeAccurate to 1 by -1 do
219    fprintf(fd, "#define c%dh  ch[%d]\n", i, PolyDegreeAccurate-i):
220  od:
221
222  for i from MaxDegreeDDE-1 to 1 by -1 do
223    fprintf(fd, "#define c%dl  cl[%d]\n", i, MaxDegreeDDE-1-i):
224  od:
225  fprintf(fd, "#define PREFETCH_POLY_ACCURATE \n"):
226  fprintf(fd, "\n#else /* not(defined(CRLIBM_TYPECPU_X86) || defined(CRLIBM_TYPECPU_AMD64)),\n   assuming Itanium, otherwise we shouldn't be there */ \n\n"):
227  fprintf(fd, "#define PREFETCH_POLY_QUICK "):
228  for i from PolyDegreeQuick to 2 by -1 do
229    fprintf(fd, "c%d=c[%d]; ", i, PolyDegreeQuick-i):
230  od:
231  fprintf(fd, "\n#define PREFETCH_POLY_ACCURATE "):
232  for i from PolyDegreeAccurate to 1 by -1 do
233    fprintf(fd, "c%dh=ch[%d]; ", i, PolyDegreeAccurate-i):
234    if i mod 4 =0 then  fprintf(fd, "\\\n        "): fi:
235  od:
236  fprintf(fd, "\\\n        "):
237  for i from MaxDegreeDDE-1 to 1 by -1 do
238    fprintf(fd, "c%dl=cl[%d]; ", i, MaxDegreeDDE-1-i):
239  od:
240
241  fprintf(fd, "\n#endif /* defined(CRLIBM_TYPECPU_X86) || defined(CRLIBM_TYPECPU_AMD64) */ \n\n"):
242
243  # Various constants
244  fprintf(fd, "static const long double log2H  = %1.50eL ;\n", log2h):
245  fprintf(fd, "static const long double log2L  = %1.50eL ;\n", log2l):
246
247  # The polynomials
248  #  polynomial for quick phase
249#  for i from PolyDegreeQuick to 1 by -1 do
250#    fprintf(fd, "static const long double c%d =    %1.50eL ;\n", i, coeff(polyQuick,x,i)):
251#  od:
252
253  #  polynomial for accurate phase
254  fprintf(fd, "static const long double ch[%d] =  {\n",PolyDegreeAccurate):
255   for i from PolyDegreeAccurate to 1 by -1 do
256     (ch, cl) := hiloExt(coeff(polyAccurate,x,i)):
257     fprintf(fd, "   /* ch%d  = */ %1.50eL, \n", i, ch):
258   od:
259  fprintf(fd, "}; \n \n"):
260
261  fprintf(fd, "static const long double cl[%d] =  {\n", MaxDegreeDDE):
262  for i from MaxDegreeDDE-1 to 1 by -1 do
263     (ch, cl) := hiloExt(coeff(polyAccurate,x,i)):
264     fprintf(fd, "   /* cl%d  = */ %1.50eL, \n", i, cl):
265   od:
266  fprintf(fd, "}; \n \n"):
267
268
269  # The tables
270
271
272  fprintf(fd, "typedef struct rri_tag {float r; long double logirh;  long double logirl; } rri ;  \n"):
273  fprintf(fd, "static const rri argredtable[%d] = {\n", 2^L):
274  for i from 0 to 2^L-1 do
275      fprintf(fd, "  { \n"):
276      fprintf(fd, "    %1.50eL,   /* r[%d] */ \n", r[i], i):
277      fprintf(fd, "    %1.50eL, /* logirh[%d] */ \n", logirh[i], i):
278      fprintf(fd, "    %1.50eL, /* logirl[%d] */ \n", logirl[i], i):
279      fprintf(fd, "  } "):
280      if(i<2^L-1) then  fprintf(fd, ", \n"): fi
281  od:
282fprintf(fd, "}; \n \n"):
283
284fclose(fd):
285
286printf("\n************ DONE TEMPLOG/log-de.h ************\n");
287
288
289
290
291filename:="TEMPLOG/polynomials.sed":
292fd:=fopen(filename, WRITE, TEXT):
293for i from PolyDegreeAccurate to 1 by -1 do
294    (ch, cl) := hiloExt(coeff(polyAccurate,x,i)):
295    fprintf(fd, " s/_c%dh/%1.40e/g\n", i, ch):
296    if(i<MaxDegreeDDE) then
297        fprintf(fd, " s/_c%dl/%1.40e/g\n", i, cl):
298    fi:
299od:
300for i from PolyDegreeQuick to 1 by -1 do
301    fprintf(fd, " s/_c%d/%1.40e/g\n", i, coeff(polyQuick,x,i)):
302od:
303fprintf(fd, " s/_deltaApproxQuick/%1.40e/g\n", deltaApproxQuick):
304fprintf(fd, " s/_epsilonApproxQuick/%1.40e/g\n", epsilonApproxQuick):
305fprintf(fd, " s/_deltaApproxAccurate/%1.40e/g\n", deltaApproxAccurate):
306fprintf(fd, " s/_epsilonApproxAccurate/%1.40e/g\n", epsilonApproxAccurate):
307fprintf(fd, " s/_log2h/%1.40e/g\n", log2h):
308fprintf(fd, " s/_log2l/%1.40e/g\n", log2l):
309fclose(fd):
310
311printf("\n************ DONE TEMPLOG/polynomials.sed ************\n");
312
313for j from 0 to 2^L-1 do
314    filename:=cat("TEMPLOG/log-de_",j,".sed"):
315    fd:=fopen(filename, WRITE, TEXT):
316    fprintf(fd, " s/_rval/%1.40e/g\n",     r[j]):
317    fprintf(fd, " s/_logirh/%1.40e/g\n", logirh[j]):
318    fprintf(fd, " s/_logirl/%1.40e/g\n", logirl[j]):
319    fprintf(fd, " s/_zabsmax/%1.40e/g\n", zabsmax[j]):
320    fclose(fd):
321  od:
322
323
324
325printf("\n************ DONE TEMPLOG/log-de*.sed ************\n");
326
327# A shell script to use them
328filename:="../gappa/log-de/run-log-de-proof.sh":
329fd:=fopen(filename, WRITE, TEXT):
330fprintf(fd, "#!/bin/sh\n"):
331fprintf(fd, "# You probably need to edit the path to the gappa executable\n"):
332fprintf(fd, "GAPPA=~/local/src/gappa/src/gappa\n"):
333fprintf(fd, "CRLIBMDIR=../..\n"):
334
335fprintf(fd, "echo Accurate phase, case E=0, index=0: 1>&2\n"):
336fprintf(fd, "sed  -f $CRLIBMDIR/maple/TEMPLOG/polynomials.sed  -f $CRLIBMDIR/maple/TEMPLOG/log-de_0.sed $CRLIBMDIR/gappa/log-de/log-de-acc-index0-E0.gappa | $GAPPA \n"):
337
338fprintf(fd, "echo Accurate phase, case E!=0, index=0 1>&2\n"):
339fprintf(fd, "sed  -f $CRLIBMDIR/maple/TEMPLOG/polynomials.sed  -f $CRLIBMDIR/maple/TEMPLOG/log-de_0.sed $CRLIBMDIR/gappa/log-de/log-de-acc-index0-E1N.gappa | $GAPPA \n"):
340
341fprintf(fd, "for num in `seq 1 %d`; do\n", 2^L-1):
342fprintf(fd, "  echo Accurate phase, case E=0, index=$num 1>&2\n"):
343fprintf(fd, "  sed -f $CRLIBMDIR/maple/TEMPLOG/polynomials.sed  -f $CRLIBMDIR/maple/TEMPLOG/log-de_$num.sed $CRLIBMDIR/gappa/log-de/log-de-acc-index1N-E0.gappa | $GAPPA \n"):
344fprintf(fd, "  echo 1>&2\n"):
345fprintf(fd, "done\n"):
346
347fprintf(fd, "for num in `seq 1 %d`; do\n", 2^L-1):
348fprintf(fd, "  echo Accurate phase, case E!=0, index = $num 1>&2 \n"):
349fprintf(fd, "  sed -f $CRLIBMDIR/maple/TEMPLOG/polynomials.sed  -f $CRLIBMDIR/maple/TEMPLOG/log-de_$num.sed $CRLIBMDIR/gappa/log-de/log-de-acc-index1N-E1N.gappa | $GAPPA \n"):
350fprintf(fd, "  echo 1>&2\n"):
351fprintf(fd, "done\n\n"):
352
353
354fprintf(fd, "echo Quick phase, case E=0, index=0  1>&2\n"):
355fprintf(fd, "sed  -f $CRLIBMDIR/maple/TEMPLOG/polynomials.sed  -f $CRLIBMDIR/maple/TEMPLOG/log-de_0.sed $CRLIBMDIR/gappa/log-de/log-de-index0-E0.gappa | $GAPPA \n"):
356fprintf(fd, "  echo 1>&2\n"):
357
358fprintf(fd, "echo Quick phase, case E!=0, index=0 1>&2 \n"):
359fprintf(fd, "sed  -f $CRLIBMDIR/maple/TEMPLOG/polynomials.sed  -f $CRLIBMDIR/maple/TEMPLOG/log-de_0.sed $CRLIBMDIR/gappa/log-de/log-de-index0-E1N.gappa | $GAPPA \n"):
360fprintf(fd, "  echo 1>&2\n"):
361
362
363fprintf(fd, "for num in `seq 1 %d`; do\n", 2^L-1):
364fprintf(fd, "  echo Quick phase, for all E,  index=$num 1>&2\n"):
365fprintf(fd, "  sed  -f $CRLIBMDIR/maple/TEMPLOG/polynomials.sed  -f $CRLIBMDIR/maple/TEMPLOG/log-de_$num.sed $CRLIBMDIR/gappa/log-de/log-de-index1N-E0N.gappa | $GAPPA \n"):
366fprintf(fd, "  echo 1>&2\n"):
367fprintf(fd, "done\n"):
368
369fclose(fd):
370
371printf("\n************ DONE TEMPLOG/run-log-de-proof.sh ************\n"):
372printf("Now you should run \n"):
373printf(" sh ../../gappa/log-de/run-log-de-proof.sh  2> ../../maple/TEMPLOG/Gappa.out\n"):
374printf("  (You probably need to edit the path to the gappa executable within run-log-de-proof.sh)\n"):
375printf("Then look at TEMPLOG/Gappa.out. It shouldn't contain 'some enclosures were not satisfied'.\n If it does, report it !\n"):
376
377
378printf("----DONE---\n") :
379
380