1#######################################################################
2# This file is part of crlibm, the correctly rounded mathematical library,
3# which has been developed by the Arénaire project at École normale supérieure
4# de Lyon.
5#
6# Copyright (C) 2004-2011 David Defour, Catherine Daramy-Loirat,
7# Florent de Dinechin, Matthieu Gallet, Nicolas Gast, Christoph Quirin Lauter,
8# and Jean-Michel Muller
9#
10# This library is free software; you can redistribute it and/or
11# modify it under the terms of the GNU Lesser General Public
12# License as published by the Free Software Foundation; either
13# version 2.1 of the License, or (at your option) any later version.
14#
15# This library is distributed in the hope that it will be useful,
16# but WITHOUT ANY WARRANTY; without even the implied warranty of
17# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
18# Lesser General Public License for more details.
19#
20# You should have received a copy of the GNU Lesser General Public
21# License along with this library; if not, write to the Free Software
22# Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
23
24# To use:
25# restart; read "log-de.mpl";
26Digits := 200:
27
28interface(quiet=true):
29
30read "common-procedures.mpl":
31read "double-extended.mpl":
32mkdir("PATERSON"):
33
34
35log2h,log2l := hiloExt(log(2)):
36
37
38L := 7: # number of bits used to address the table
39
40MAXINDEX    := round(2^L * (sqrt(2)-1)):
41
42for i from 0 to MAXINDEX-1 do
43    center[i] := 1 + i*2^(-L): # center[i] in [1, 2[
44    # We want it to fit on 11 bits of mantissa
45    r[i] :=  round(evalf(  (1/center[i]) * 2^(11)) ) / 2^(11) ;
46
47od:
48for i from MAXINDEX to 2^L do
49    # y has been divided by two, center[i] in [0.5, 1[
50    center[i]:=(1 + i*2^(-L)) / 2:
51    # We want it to fit on 11 bits of mantissa,
52    r[i] :=  round(evalf(  (1/center[i]) * 2^(10)) ) / 2^(10) ;
53od:
54
55# Note that we go up to 2^L although the case 2^L is wrapped to zero
56# in the C code. It could be important for zmax (but it turns out not).
57
58for i from 0 to 2^L do
59    (logirh[i], logirl[i]) := hiloExt(-log(r[i])):
60od:
61
62#Computation of ZMax.
63for i from 0 to MAXINDEX-1 do
64    t_x := center[i] + 2^(-L-1) :
65    zmax[i] := (t_x*r[i]-1) :
66    t_x := center[i] - 2^(-L-1) :
67    zmin[i] := (t_x*r[i]-1) :
68    zabsmax[i] := max(abs(zmin[i]), abs(zmax[i])):
69od:
70for i from MAXINDEX to 2^L do
71    t_x := center[i] + 2^(-L-2) :
72    zmax[i] := (t_x*r[i]-1) :
73    t_x := center[i] - 2^(-L-2) :
74    zmin[i] := (t_x*r[i]-1) :
75    zabsmax[i] := max(abs(zmin[i]), abs(zmax[i])):
76od:
77
78zmaxmax:=0:
79zminmin:=0:
80for i from 0 to 2^L do
81    if zmax[i] > zmaxmax then zmaxmax := zmax[i]: fi:
82    if zmin[i] < zminmin then zminmin := zmin[i]: fi:
83od:
84printf("zminmin = -2^(%2f)   zmaxmax = 2^(%2f)\n", log2(-zminmin), log2(zmaxmax) ) :
85
86
87PolyDegreeQuick:=7:
88
89#Keep -zmaxmax..zmaxmax to keep c1=1, which is useful in the proof
90
91###### PHASE RAPIDE
92polyQuick0:= x  * numapprox[minimax](  log(1+x)/x,  x=-zmaxmax..zmaxmax,  [PolyDegreeQuick-1,0], 1 ,  'deltaApprox'):
93
94## Création des nouveaux coefficients
95a7 := convert(coeff(polyQuick0,x,7),rational):
96a6 := convert(coeff(polyQuick0,x,6)/a7,rational):
97a5 := convert(coeff(polyQuick0,x,5)/a7,rational):
98a4 := convert(coeff(polyQuick0,x,4)/a7,rational):
99
100a3 := convert(coeff(polyQuick0,x,3),rational):
101a2 := convert(coeff(polyQuick0,x,2),rational):
102a1 := convert(coeff(polyQuick0,x,1),rational):
103a0 := convert(coeff(polyQuick0,x,0),rational):
104
105alpha0 := a5 - 1:
106alpha1 := a6:
107alpha2 := a4 - alpha0 * a6:
108
109##
110a7_hi,a7_lo         := hiloExt(a7):
111a3_hi,a3_lo         := hiloExt(a3):
112a2_hi,a2_lo         := hiloExt(a2):
113a0_hi,a0_lo         := hiloExt(a0):
114
115c_hi,c_lo         := hiloExt(alpha0):
116alpha_hi,alpha_lo := hiloExt(alpha1):
117beta_hi,beta_lo   := hiloExt(alpha2):
118
119coef_c := c_hi + c_lo:
120coef_alpha := alpha_hi + alpha_lo:
121coef_beta := beta_hi + beta_lo:
122coef_c7 := a7_hi + a7_lo:
123coef_c3 := a3_hi + a3_lo:
124coef_c2 := a2_hi + a2_lo:
125coef_c0 := a0_hi + a0_lo:
126
127polyQuick := ((x^2 + coef_c)*(x + coef_alpha) + (x + coef_beta)) * (coef_c7*x^4) + ((coef_c3 * x + coef_c2)*x^2 + (x)):
128
129epsilonApproxQuick := numapprox[infnorm]( 1-polyQuick/log(1+x), x=zminmin..zmaxmax):
130printf("   approximation rel error for the quick phase is 2^(%2f)\n", log2(epsilonApproxQuick) ) :
131deltaApproxQuick := numapprox[infnorm]( polyQuick-log(1+x), x=zminmin..zmaxmax):
132printf("   approximation abs error for the quick phase is 2^(%2f)\n", log2(deltaApproxQuick) ) :
133################################################################################################
134
135PolyDegreeAccurate:=14:
136
137printf("Computing the polynomial for accurate phase (this may take some time...)\n"):
138pe:= x  * numapprox[minimax](  log(1+x)/x,  x=-zmaxmax..zmaxmax,  [PolyDegreeAccurate-1,0], 1 ,  'deltaApprox'):
139
140
141MaxDegreeDDE:=8:  #
142
143polyAccurate := polyExact2Ext(pe, MaxDegreeDDE):
144deltaApproxAccurate := numapprox[infnorm](polyAccurate-log(1+x), x=-zmaxmax..zmaxmax):
145epsilonApproxAccurate := numapprox[infnorm]( 1-polyAccurate/log(1+x), x=-zmaxmax..zmaxmax):
146printf("   approximation error for the accurate phase is 2^(%2f)\n", log2(epsilonApproxAccurate) ) :
147
148
149
150
151filename:="PATERSON/log-de.h":
152fd:=fopen(filename, WRITE, TEXT):
153
154fprintf(fd, "/*File generated by maple/log-de.mpl*/\n\n"):
155
156  fprintf(fd, "#if defined(CRLIBM_TYPECPU_X86) || defined(CRLIBM_TYPECPU_AMD64)\n\n"):
157
158  fprintf(fd, "#ifndef PATERSON\n#define PATERSON\n#endif\n"):
159
160  for i from PolyDegreeAccurate to 1 by -1 do
161    fprintf(fd, "#define c%dh  ch[%d]\n", i, PolyDegreeAccurate-i):
162  od:
163
164  for i from MaxDegreeDDE-1 to 1 by -1 do
165    fprintf(fd, "#define c%dl  cl[%d]\n", i, MaxDegreeDDE-1-i):
166  od:
167  fprintf(fd, "#define PREFETCH_POLY_ACCURATE \n"):
168  fprintf(fd, "\n#else /* not(defined(CRLIBM_TYPECPU_X86) || defined(CRLIBM_TYPECPU_AMD64)),\n   assuming Itanium, otherwise we shouldn't be there */ \n\n"):
169
170  fprintf(fd, "\n#define PREFETCH_POLY_ACCURATE "):
171  for i from PolyDegreeAccurate to 1 by -1 do
172    fprintf(fd, "c%dh=ch[%d]; ", i, PolyDegreeAccurate-i):
173    if i mod 4 =0 then  fprintf(fd, "\\\n        "): fi:
174  od:
175  fprintf(fd, "\\\n        "):
176  for i from MaxDegreeDDE-1 to 1 by -1 do
177    fprintf(fd, "c%dl=cl[%d]; ", i, MaxDegreeDDE-1-i):
178  od:
179
180  fprintf(fd, "\n#endif /* defined(CRLIBM_TYPECPU_X86) || defined(CRLIBM_TYPECPU_AMD64) */ \n\n"):
181
182  # Various constants
183  fprintf(fd, "#define L        %d\n", L):
184  fprintf(fd, "#define MAXINDEX %d\n", MAXINDEX):
185  fprintf(fd, "#define INDEXMASK %d\n", 2^L-1):
186  fprintf(fd, "static const long double log2h  = %1.50eL ;\n", log2h):
187  fprintf(fd, "static const long double log2l  = %1.50eL ;\n", log2l):
188  fprintf(fd, "static const long double two64 = %1.50eL ;\n", evalf(2^64)):
189
190  # The polynomials
191  #  polynomial for quick phase
192  fprintf(fd,"static const long double c7 = %1.50e;\n",coef_c7):
193  fprintf(fd,"static const long double c3 = %1.50e;\n",coef_c3):
194  fprintf(fd,"static const long double c2 = %1.50e;\n",coef_c2):
195  fprintf(fd,"static const long double ps_alpha = %1.50e;\n",coef_alpha):
196  fprintf(fd,"static const long double ps_beta = %1.50e;\n",coef_beta):
197  fprintf(fd,"static const long double ps_c = %1.50e;\n",coef_c):
198
199  #  polynomial for accurate phase
200  fprintf(fd, "static const long double ch[%d] =  {\n",PolyDegreeAccurate):
201   for i from PolyDegreeAccurate to 1 by -1 do
202     (ch, cl) := hiloExt(coeff(polyAccurate,x,i)):
203     fprintf(fd, "   /* ch%d  = */ %1.50eL, \n", i, ch):
204   od:
205  fprintf(fd, "}; \n \n"):
206
207  fprintf(fd, "static const long double cl[%d] =  {\n", MaxDegreeDDE):
208  for i from MaxDegreeDDE-1 to 1 by -1 do
209     (ch, cl) := hiloExt(coeff(polyAccurate,x,i)):
210     fprintf(fd, "   /* cl%d  = */ %1.50eL, \n", i, cl):
211   od:
212  fprintf(fd, "}; \n \n"):
213
214
215  # The tables
216  fprintf(fd, "typedef struct rri_tag {float r; long double logirh;  long double logirl; } rri ;  \n"):
217  fprintf(fd, "static const rri argredtable[%d] = {\n", 2^L):
218  for i from 0 to 2^L-1 do
219      fprintf(fd, "  { \n"):
220      fprintf(fd, "    %1.50eL,   /* r[%d] */ \n", r[i], i):
221      fprintf(fd, "    %1.50eL, /* logirh[%d] */ \n", logirh[i], i):
222      fprintf(fd, "    %1.50eL, /* logirl[%d] */ \n", logirl[i], i):
223      fprintf(fd, "  } "):
224      if(i<2^L-1) then  fprintf(fd, ", \n"): fi
225  od:
226  fprintf(fd, "}; \n \n"):
227fclose(fd):
228
229printf("\n************ DONE PATERSON/log-de.h ************\n");
230
231
232
233
234filename:="PATERSON/polynomials.sed":
235fd:=fopen(filename, WRITE, TEXT):
236for i from PolyDegreeAccurate to 1 by -1 do
237    (ch, cl) := hiloExt(coeff(polyAccurate,x,i)):
238    fprintf(fd, " s/_c%dh/%1.40e/g\n", i, ch):
239    if(i<MaxDegreeDDE) then
240        fprintf(fd, " s/_c%dl/%1.40e/g\n", i, cl):
241    fi:
242od:
243
244fprintf(fd, " s/_c7/%1.40e/g\n", coef_c7):
245fprintf(fd, " s/_c3/%1.40e/g\n", coef_c3):
246fprintf(fd, " s/_c2/%1.40e/g\n", coef_c2):
247
248fprintf(fd, " s/_ps_alpha/%1.40e/g\n", coef_alpha):
249fprintf(fd, " s/_ps_beta/%1.40e/g\n", coef_beta):
250fprintf(fd, " s/_ps_c/%1.40e/g\n", coef_c):
251
252fprintf(fd, " s/_deltaApproxQuick/%1.40e/g\n", deltaApproxQuick):
253fprintf(fd, " s/_epsilonApproxQuick/%1.40e/g\n", epsilonApproxQuick):
254fprintf(fd, " s/_deltaApproxAccurate/%1.40e/g\n", deltaApproxAccurate):
255fprintf(fd, " s/_epsilonApproxAccurate/%1.40e/g\n", epsilonApproxAccurate):
256fprintf(fd, " s/_log2h/%1.40e/g\n", log2h):
257fprintf(fd, " s/_log2l/%1.40e/g\n", log2l):
258fclose(fd):
259
260printf("\n************ DONE PATERSON/polynomials.sed ************\n");
261
262for j from 0 to 2^L-1 do
263    filename:=cat("PATERSON/log-de_",j,".sed"):
264    fd:=fopen(filename, WRITE, TEXT):
265    fprintf(fd, " s/_rval/%1.40e/g\n",     r[j]):
266    fprintf(fd, " s/_logirh/%1.40e/g\n", logirh[j]):
267    fprintf(fd, " s/_logirl/%1.40e/g\n", logirl[j]):
268    fprintf(fd, " s/_zabsmax/%1.40e/g\n", zabsmax[j]):
269    fclose(fd):
270  od:
271
272
273
274printf("\n************ DONE PATERSON/log-de*.sed ************\n");
275
276# A shell script to use them
277filename:="../gappa/run-log-de-proof.sh":
278fd:=fopen(filename, WRITE, TEXT):
279fprintf(fd, "#!/bin/sh\n"):
280fprintf(fd, "# You probably need to edit the path to the gappa executable\n"):
281fprintf(fd, "GAPPA=~/gappa/src/gappa\n"):
282
283fprintf(fd, "echo Accurate phase, case E=0, index=0:\n"):
284fprintf(fd, "sed  -f ../maple/PATERSON/polynomials.sed  -f ../maple/PATERSON/log-de_0.sed ../gappa/log-de-acc-index0-E0.gappa | $GAPPA \n"):
285
286fprintf(fd, "echo Accurate phase, case E!=0, index=0\n"):
287fprintf(fd, "sed  -f ../maple/PATERSON/polynomials.sed  -f ../maple/PATERSON/log-de_0.sed ../gappa/log-de-acc-index0-E1N.gappa | $GAPPA \n"):
288
289fprintf(fd, "for num in `seq 1 %d`; do\n", 2^L-1):
290fprintf(fd, "  echo Accurate phase, case E=0, index=$num\n"):
291fprintf(fd, "  sed -f ../maple/PATERSON/polynomials.sed  -f ../maple/PATERSON/log-de_$num.sed ../gappa/log-de-acc-index1N-E0.gappa | $GAPPA \n"):
292fprintf(fd, "  echo\n"):
293fprintf(fd, "done\n"):
294
295fprintf(fd, "for num in `seq 1 %d`; do\n", 2^L-1):
296fprintf(fd, "  echo Accurate phase, case E!=0, index = $num\n"):
297fprintf(fd, "  sed -f ../maple/PATERSON/polynomials.sed  -f ../maple/PATERSON/log-de_$num.sed ../gappa/log-de-acc-index1N-E1N.gappa | $GAPPA \n"):
298fprintf(fd, "  echo\n"):
299fprintf(fd, "done\n\n"):
300
301
302fprintf(fd, "echo Quick phase, case E=0, index=0\n"):
303fprintf(fd, "sed  -f ../maple/PATERSON/polynomials.sed  -f ../maple/PATERSON/log-de_0.sed ../gappa/log-de-index0-E0.gappa | $GAPPA \n"):
304
305fprintf(fd, "echo Quick phase, case E!=0, index=0\n"):
306fprintf(fd, "sed  -f ../maple/PATERSON/polynomials.sed  -f ../maple/PATERSON/log-de_0.sed ../gappa/log-de-index0-E1N.gappa | $GAPPA \n"):
307
308
309fprintf(fd, "for num in `seq 0 %d`; do\n", 2^L-1):
310fprintf(fd, "  echo Quick phase, for all E,  index=$num \n"):
311fprintf(fd, "  sed  -f ../maple/PATERSON/polynomials.sed  -f ../maple/PATERSON/log-de_$num.sed ../gappa/log-de-index1N-E0N.gappa | $GAPPA \n"):
312fprintf(fd, "  echo\n"):
313fprintf(fd, "done\n"):
314
315fclose(fd):
316
317printf("\n************ DONE PATERSON/run-log-de-proof.sh ************\n"):
318printf("Now you should run \n"):
319printf(" sh ../gappa/run-log-de-proof.sh  2> ../maple/PATERSON/Gappa.out\n"):
320printf("  (You probably need to edit the path to the gappa executable within run-log-de-proof.sh)\n"):
321printf("Then look at PATERSON/Gappa.out. It shouldn't contain 'some enclosures were not satisfied'.\n If it does, report it !\n"):
322
323
324printf("----DONE---\n") :
325
326