1! *****************************************************************************
2!> \\brief provides specific implementations for common cases of xyz_to_vab routine.
3!>
4!> \\note
5!>     ____              _ _     __  __           _ _  __         _____ _     _       _____ _ _      _
6!>    |  _ \  ___  _ __ ( ) |_  |  \/  | ___   __| (_)/ _|_   _  |_   _| |__ (_)___  |  ___(_) | ___| |
7!>    | | | |/ _ \| '_ \|/| __| | |\/| |/ _ \ / _` | | |_| | | |   | | | '_ \| / __| | |_  | | |/ _ \ |
8!>    | |_| | (_) | | | | | |_  | |  | | (_) | (_| | |  _| |_| |   | | | | | | \__ \ |  _| | | |  __/_|
9!>    |____/ \___/|_| |_|  \__| |_|  |_|\___/ \__,_|_|_|  \__, |   |_| |_| |_|_|___/ |_|   |_|_|\___(_)
10!>                                                        |___/
11!>      ____ _                  ___                              _ _       _       _
12!>     / ___| | ___  ___  ___  |_ _|_ __ ___  _ __ ___   ___  __| (_) __ _| |_ ___| |_   _
13!>    | |   | |/ _ \/ __|/ _ \  | || '_ ` _ \| '_ ` _ \ / _ \/ _` | |/ _` | __/ _ \ | | | |
14!>    | |___| | (_) \__ \  __/  | || | | | | | | | | | |  __/ (_| | | (_| | ||  __/ | |_| |
15!>     \____|_|\___/|___/\___| |___|_| |_| |_|_| |_| |_|\___|\__,_|_|\__,_|\__\___|_|\__, |
16!>                                                                                   |___/
17!>     _____ _     _       _____ _ _      _
18!>    |_   _| |__ (_)___  |  ___(_) | ___| |
19!>      | | | '_ \| / __| | |_  | | |/ _ \ |
20!>      | | | | | | \__ \ |  _| | | |  __/_|
21!>      |_| |_| |_|_|___/ |_|   |_|_|\___(_)
22!>
23!>      This is a template
24!>
25!>      **** DO NOT MODIFY THIS .f90 FILE ****
26!>      modify the .template instead
27!>      The script to regenerate this file can be found at autotune_grid/xyz_to_vab/
28!> \par History
29!>      07.2012 Fixed a bug with the dimension of coef_xyz
30!>      06.2012 Template cleanup
31!>      05.2012 created
32!>
33!> \\author Ruyman Reyes
34! *****************************************************************************
35
36! Caller routine
37      SUBROUTINE call_to_xyz_to_vab(prefactor, coef_xyz, lp, &
38                                    la_max_local, lb_max_local, &
39                                    rp, ra, rab, vab, coset, &
40                                    la_min_local, lb_min_local, &
41                                    maxl, lvab, hvab)
42
43              USE kinds,                        ONLY: dp
44
45             REAL(KIND=dp), INTENT(IN) :: prefactor
46             REAL(KIND=dp), DIMENSION(((lp+1)*(lp+2)*(lp+3))/6), &
47                                  INTENT(INOUT) :: coef_xyz
48             INTEGER, INTENT(IN) :: lp, la_max_local, lb_max_local,&
49                                    la_min_local, lb_min_local
50             INTEGER, INTENT(IN) :: maxl, lvab, hvab
51             REAL(KIND=dp), DIMENSION(3), INTENT(IN) :: rp, ra, rab
52             REAL(KIND=dp), DIMENSION(lvab,hvab), INTENT(INOUT)  :: vab
53             INTEGER,  DIMENSION(-1:maxl,-1:maxl,-1:maxl), &
54                       INTENT(IN)     :: coset
55
56
57      	<$--(if cray_tracing)-->
58      	    include "pat_apif.h"
59      	    INTEGER :: istat
60      	    CALL PAT_sampling_state(PAT_STATE_ON,istat)
61      	    CALL PAT_region_begin(1,"XYZ_TO_VAB_X_X",istat)
62      	<$--(end)-->
63          SELECT CASE(la_max_local)
64           <$--(for la_value in range(0,la_max_local+1))-->
65               CASE (@<la_value>@)
66                    SELECT CASE(lb_max_local)
67                    <$--(for lb_value in range(0,lb_max_local+1))-->
68                             CASE(@<lb_value>@)
69                               CALL xyz_to_vab_@<la_value>@_@<lb_value>@(prefactor, coef_xyz, lp, &
70                                                  la_max_local, lb_max_local, &
71                                                  rp, ra, rab, vab, coset, la_min_local, &
72                                                  lb_min_local, maxl, lvab, hvab)
73
74                    <$--(end)-->
75                             CASE DEFAULT
76  			          CALL xyz_to_vab(prefactor, coef_xyz, lp, &
77                                                  la_max_local, lb_max_local, &
78                                                  rp, ra, rab, vab, coset, la_min_local, &
79                                                  lb_min_local, maxl, lvab, hvab)
80                     END SELECT
81           <$--(end)-->
82               CASE DEFAULT
83  			CALL xyz_to_vab(prefactor, coef_xyz, lp, &
84  					 la_max_local, lb_max_local, &
85  					  rp, ra, rab, vab, coset, la_min_local, lb_min_local, maxl, lvab, hvab)
86
87           END SELECT
88
89      	<$--(if cray_tracing)-->
90      	    CALL PAT_region_end(1, istat)
91      	    CALL PAT_sampling_state(PAT_STATE_OFF,istat)
92      	<$--(end)-->
93
94      	    END SUBROUTINE call_to_xyz_to_vab
95
96!!! Cloned routines
97<$--(for la_max_local in range(0,la_max_local+1))-->
98 <$--(for lb_max_local in range(0,lb_max_local+1))-->
99   <$--(if (la_max_local + lb_max_local) <= 4)-->
100          ! Fully unrolled version
101    <$--(include)--> xyz_to_vab.template <$--(end)-->
102   <$--(else)-->
103          ! Partial unrolled version
104    <$--(include)--> xyz_to_vab_nonunroll.template <$--(end)-->
105   <$--(end)-->
106 <$--(end)-->
107<$--(end)-->
108
109!!! End of cloned routines
110
111      ! Original routine
112      SUBROUTINE xyz_to_vab(prefactor, coef_xyz, lp, la_max_local, &
113                             lb_max_local, rp, ra, rab, vab, coset, &
114                             la_min_local, lb_min_local, maxl, &
115                             lvab, hvab)
116
117              USE kinds,                        ONLY: dp
118
119         implicit none
120
121             ! PARAMETERS
122             REAL(KIND=dp), INTENT(IN) :: prefactor
123             INTEGER, INTENT(IN) :: lp, la_max_local, lb_max_local, &
124                                        la_min_local, lb_min_local, &
125                                        maxl, lvab, hvab
126             INTEGER,  DIMENSION(-1:maxl,-1:maxl,-1:maxl), &
127                       INTENT(IN)     :: coset
128
129
130             REAL(KIND=dp), DIMENSION(((lp+1)*(lp+2)*(lp+3))/6), &
131                             INTENT(INOUT) :: coef_xyz
132             REAL(KIND=dp), DIMENSION(3), INTENT(IN) :: rp, ra, rab
133             REAL(KIND=dp), DIMENSION(lvab,hvab), INTENT(INOUT) :: vab
134
135             ! LOCAL
136             INTEGER :: ico,jco,lxa,lxb,lxp,lxyz,lyp,lzp, iaxis, l, k, &
137                        lya,lyb,lza,lzb
138             REAL(KIND=dp) :: a,b,binomial_k_lxa,binomial_l_lxb
139
140          REAL(KIND=dp), DIMENSION(:,:,:,:), ALLOCATABLE :: alpha
141          REAL(KIND=dp), DIMENSION(:,:), ALLOCATABLE :: coef_ttz
142          REAL(KIND=dp), DIMENSION(:,:,:,:), ALLOCATABLE :: coef_tyz
143
144
145          coef_xyz=coef_xyz*prefactor
146
147      !   *** initialise the coefficient matrix, we transform the sum
148      !
149      !   sum_{lxa,lya,lza,lxb,lyb,lzb} P_{lxa,lya,lza,lxb,lyb,lzb} (x-a_x)**lxa (y-a_y)**lya (z-a_z)**lza (x-b_x)**lxb (y-a_y)**lya (z-a_z)**lza
150      !
151      !   into
152      !
153      !   sum_{lxp,lyp,lzp} P_{lxp,lyp,lzp} (x-p_x)**lxp (y-p_y)**lyp (z-p_z)**lzp
154      !
155      !   where p is center of the product gaussian, and lp = la_max + lb_max
156      !   (current implementation is l**7)
157      !
158      !
159      !   compute polynomial expansion coefs -> (x-a)**lxa (x-b)**lxb -> sum_{ls} alpha(ls,lxa,lxb,1)*(x-p)**ls
160      !
161      !   *** make the alpha matrix ***
162          ALLOCATE(alpha(0:lp,0:la_max_local,0:lb_max_local,3))
163          alpha(:,:,:,:)=0.0_dp
164          DO iaxis=1,3
165          DO lxa=0,la_max_local
166          DO lxb=0,lb_max_local
167             binomial_k_lxa=1.0_dp
168             a=1.0_dp
169             DO k=0,lxa
170              binomial_l_lxb=1.0_dp
171              b=1.0_dp
172              DO l=0,lxb
173                 alpha(lxa-l+lxb-k,lxa,lxb,iaxis)=alpha(lxa-l+lxb-k,lxa,lxb,iaxis)+ &
174                                   binomial_k_lxa*binomial_l_lxb*a*b
175                 binomial_l_lxb=binomial_l_lxb*REAL(lxb-l,dp)/REAL(l+1,dp)
176                 b=b*(rp(iaxis)-(ra(iaxis)+rab(iaxis)))
177              ENDDO
178              binomial_k_lxa=binomial_k_lxa*REAL(lxa-k,dp)/REAL(k+1,dp)
179              a=a*(-ra(iaxis)+rp(iaxis))
180             ENDDO
181          ENDDO
182          ENDDO
183          ENDDO
184
185          !
186          !   compute v_{lxa,lya,lza,lxb,lyb,lzb} given v_{lxp,lyp,lzp} and alpha(ls,lxa,lxb,1)
187          !   use a three step procedure
188          !
189
190          ALLOCATE(coef_ttz(0:la_max_local,0:lb_max_local))
191          ALLOCATE(coef_tyz(0:la_max_local,0:lb_max_local,0:la_max_local,0:lb_max_local))
192          lxyz=0
193          DO lzp=0,lp
194             coef_tyz=0.0_dp
195             DO lyp=0,lp-lzp
196                coef_ttz=0.0_dp
197                DO lxp=0,lp-lzp-lyp
198                   lxyz=lxyz+1
199                   DO lxb=0,lb_max_local
200                   DO lxa=0,la_max_local
201                      coef_ttz(lxa,lxb)=coef_ttz(lxa,lxb)+coef_xyz(lxyz)*alpha(lxp,lxa,lxb,1)
202                   ENDDO
203                   ENDDO
204
205                ENDDO
206
207                DO lyb=0,lb_max_local
208                DO lya=0,la_max_local
209                   DO lxb=0,lb_max_local-lyb
210                   DO lxa=0,la_max_local-lya
211                      coef_tyz(lxa,lxb,lya,lyb)=coef_tyz(lxa,lxb,lya,lyb)+coef_ttz(lxa,lxb)*alpha(lyp,lya,lyb,2)
212                   ENDDO
213                   ENDDO
214                ENDDO
215                ENDDO
216
217             ENDDO
218
219             DO lzb=0,lb_max_local
220             DO lza=0,la_max_local
221                DO lyb=0,lb_max_local-lzb
222                DO lya=0,la_max_local-lza
223                   DO lxb=MAX(lb_min_local-lzb-lyb,0),lb_max_local-lzb-lyb
224                   jco=coset(lxb,lyb,lzb)
225                   DO lxa=MAX(la_min_local-lza-lya,0),la_max_local-lza-lya
226                      ico=coset(lxa,lya,lza)
227                      vab(ico,jco)=vab(ico,jco)+coef_tyz(lxa,lxb,lya,lyb)*alpha(lzp,lza,lzb,3)
228                   ENDDO
229                   ENDDO
230                ENDDO
231                ENDDO
232             ENDDO
233             ENDDO
234
235          ENDDO
236
237          DEALLOCATE(coef_tyz)
238          DEALLOCATE(coef_ttz)
239          DEALLOCATE(alpha)
240
241
242
243            end subroutine xyz_to_vab
244
245
246
247