1! In any Gaussian based QM code the performance of the 2-electron
2! integrals is likely to be critical (unless the wave function is
3! represented on a grid). Recent reviews on integral evaluations is
4! [1] and [2].
5! Wim Klopper suggested that the evaluation of the integrals could be
6! performed faster by approximating the natural exponent functions (EXP)
7! as a Taylor series [3]. To see to what extent using a Taylor series
8! may improve the performance this kernel was written. Given the order
9! of the Taylor series all other quantities are automatically
10! initialized. We also use the fact that we are interested only in
11! exp(x) for ln(eps) <= x <= 0 (where eps is the machine precision).
12! For x < ln(eps) we assume exp(x)=0.
13! If nord = 4 then the Taylor series approximation is 1.5 times faster
14! than the exp function on a Intel(R) Xeon(R) CPU E5-2670 0 @ 2.60GHz.
15! The interpolation table requires 1.4 MB of memory. The memory
16! requirements mean that the algorithm could be badly impacted by cache
17! misses. Knights Landing does not have such big caches. Also the Volta
18! GPU is unlikely going to have such big caches. In addition Knights
19! Landing has added AVX-512ER instructions to perform exponentiations
20! of the form 2^x. These might help boost the performance of the exp
21! function well beyond what a Taylor series brings. These instructions
22! are not available on all Intel processors though.
23!
24! [1] Simen Reine, Trygve Helgaker, Roland Lindh, "Multi-electron
25!     integrals", WIREs Comput. Mol. Sci. 2012, 2:290-303,
26!     doi:10.1002/wcms.78
27! [2] Justin T. Fermann, Edward F. Valeev, "Fundamentals of Molecular
28!     Integrals Evaluation",
29!     https://www.valeevgroup.chem.vt.edu/docs/ints.07032015.pdf
30! [3] Wim Klopper, "SCF methods, basis sets, and integrals:
31!     Lecture IV: Integrals", ESQC 2011, Torre Normanna, 9/18 –
32!     10/01/2011,
33!     http://esqc.ups-tlse.fr/11/lectures/KlopperLecture4-1x2.pdf
34!     Slide: "Calculating the Boys function".
35!
36      module taylor
37      double precision, allocatable :: ts(:) ! table of exp(x_i)
38      double precision :: h  ! step size
39      double precision :: xm ! x_min (exp(x) is needed only in the range
40                             ! x_min <= x <= 0)
41      double precision :: xmh ! = xm+h/2
42      integer, parameter :: nord = 4 ! the maximum order
43      double precision, parameter :: eps = 1.0d-16 ! machine precision
44      contains
45      subroutine t_init
46        ! Initialize the interpolation table
47        implicit none
48        integer :: npt ! number of points
49        integer :: ipt ! current point
50        double precision :: a, x
51        xm  = dlog(eps)
52        h   = 2.0d0*(eps**(1.0d0/dble(nord)))
53        xmh = xm+0.5d0*h
54        npt = (-xm/h)+1
55        allocate(ts(npt))
56        do ipt = 1, npt
57          x=xmh+h*(ipt-1)
58          ts(ipt) = dexp(x)
59        enddo
60        write(*,*)'xm ',xm
61        write(*,*)'h  ',h
62        write(*,*)'npt',npt,' requires ',dble(8*npt)/1024,' KB'
63      end subroutine t_init
64      double precision function t_exp(x)
65        ! Use Taylor series to evaluate exp(x)
66        implicit none
67        double precision, intent(in) :: x
68        double precision :: xi, xd
69        double precision :: r
70        integer ipt
71        integer iord
72        ipt = (x-xmh)/h
73        xi  = xmh+h*(ipt-1)
74        xd  = x-xi
75        r   = xd/dble(nord)+1.0d0
76        do iord = nord-1, 1, -1
77          r = r*xd/dble(iord)+1.0d0
78        enddo
79        t_exp = r*ts(ipt)
80      end function t_exp
81      end module taylor
82
83      program performance
84      use taylor
85      implicit none
86#include "mpif.h"
87      integer :: ierr, nmax, i
88      double precision :: a, b
89      double precision :: tt, te, t0
90      nmax = 10000000
91      a = -30.0d0
92      call mpi_init(ierr)
93      call t_init
94      t0 = -mpi_wtime()
95      b =   0.0d0
96      do i = 1, nmax
97        b = b + a/i
98      enddo
99      t0 = t0 + mpi_wtime()
100      write(*,*)'Null time:  ',t0,b
101      tt = -mpi_wtime()
102      b =   0.0d0
103      do i = 1, nmax
104        b = b + t_exp(a/i)
105      enddo
106      tt = tt + mpi_wtime()
107      write(*,*)'t-exp time: ',tt-t0,b
108      te = -mpi_wtime()
109      b =   0.0d0
110      do i = 1, nmax
111        b = b + exp(a/i)
112      enddo
113      te = te + mpi_wtime()
114      write(*,*)'Exp time:   ',te-t0,b
115      write(*,*)'Exp/t-exp:  ',(te-t0)/(tt-t0)
116!     do i = 1, 1000
117!       write(*,*)a/i,exp(a/i),t_exp(a/i)
118!     enddo
119      call mpi_finalize(ierr)
120      end
121
122
123
124
125