1 /**********************************************************************
2   Set_OLP_Kin.c:
3 
4      Set_OLP_Kin.c is a subroutine to calculate the overlap matrix
5      and the matrix for the kinetic operator in momentum space.
6 
7   Log of Set_OLP_Kin.c:
8 
9      15/Oct./2002  Released by T.Ozaki
10      25/Nov./2014  Memory allocation modified by A.M. Ito (AITUNE)
11 
12 ***********************************************************************/
13 
14 #include <stdio.h>
15 #include <stdlib.h>
16 #include <math.h>
17 #include <time.h>
18 #include "openmx_common.h"
19 #include "mpi.h"
20 #include <omp.h>
21 
22 
23 #ifdef kcomp
24 dcomplex****** Allocate6D_dcomplex(int size_1, int size_2, int size_3,
25                                           int size_4, int size_5, int size_6);
26 double**** Allocate4D_double(int size_1, int size_2, int size_3, int size_4);
27 dcomplex** Allocate2D_dcomplex(int size_1, int size_2);
28 void Free6D_dcomplex(dcomplex****** buffer);
29 void Free4D_double(double**** buffer);
30 void Free2D_dcomplex(dcomplex** buffer);
31 #else
32 inline dcomplex****** Allocate6D_dcomplex(int size_1, int size_2, int size_3,
33                                           int size_4, int size_5, int size_6);
34 inline double**** Allocate4D_double(int size_1, int size_2, int size_3, int size_4);
35 inline dcomplex** Allocate2D_dcomplex(int size_1, int size_2);
36 inline void Free6D_dcomplex(dcomplex****** buffer);
37 inline void Free4D_double(double**** buffer);
38 inline void Free2D_dcomplex(dcomplex** buffer);
39 #endif
40 
41 
Set_OLP_Kin(double ***** OLP,double ***** H0)42 double Set_OLP_Kin(double *****OLP, double *****H0)
43 {
44   /****************************************************
45           Evaluate overlap and kinetic integrals
46                  in the momentum space
47   ****************************************************/
48   static int firsttime=1;
49   int size_SumS0,size_TmpOLP;
50   double time0;
51   double TStime,TEtime;
52   int numprocs,myid;
53   int Mc_AN,Gc_AN,h_AN;
54   int OneD_Nloop;
55   int *OneD2Mc_AN,*OneD2h_AN;
56 
57   /* MPI */
58   MPI_Comm_size(mpi_comm_level1,&numprocs);
59   MPI_Comm_rank(mpi_comm_level1,&myid);
60 
61   dtime(&TStime);
62 
63   /****************************************************
64    MPI_Barrier
65   ****************************************************/
66 
67   MPI_Barrier(mpi_comm_level1);
68 
69   /* PrintMemory */
70 
71   if (firsttime) {
72 
73     size_SumS0  = (List_YOUSO[25]+1)*List_YOUSO[24]*(List_YOUSO[25]+1)*List_YOUSO[24];
74     size_TmpOLP = (List_YOUSO[25]+1)*List_YOUSO[24]*(2*(List_YOUSO[25]+1)+1)*
75                   (List_YOUSO[25]+1)*List_YOUSO[24]*(2*(List_YOUSO[25]+1)+1);
76 
77     PrintMemory("Set_OLP_Kin: SumS0",  sizeof(double)*size_SumS0,NULL);
78     PrintMemory("Set_OLP_Kin: SumK0",  sizeof(double)*size_SumS0,NULL);
79     PrintMemory("Set_OLP_Kin: SumSr0", sizeof(double)*size_SumS0,NULL);
80     PrintMemory("Set_OLP_Kin: SumKr0", sizeof(double)*size_SumS0,NULL);
81     PrintMemory("Set_OLP_Kin: TmpOLP", sizeof(dcomplex)*size_TmpOLP,NULL);
82     PrintMemory("Set_OLP_Kin: TmpOLPr",sizeof(dcomplex)*size_TmpOLP,NULL);
83     PrintMemory("Set_OLP_Kin: TmpOLPt",sizeof(dcomplex)*size_TmpOLP,NULL);
84     PrintMemory("Set_OLP_Kin: TmpOLPp",sizeof(dcomplex)*size_TmpOLP,NULL);
85     PrintMemory("Set_OLP_Kin: TmpKin", sizeof(dcomplex)*size_TmpOLP,NULL);
86     PrintMemory("Set_OLP_Kin: TmpKinr",sizeof(dcomplex)*size_TmpOLP,NULL);
87     PrintMemory("Set_OLP_Kin: TmpKint",sizeof(dcomplex)*size_TmpOLP,NULL);
88     PrintMemory("Set_OLP_Kin: TmpKinp",sizeof(dcomplex)*size_TmpOLP,NULL);
89     firsttime=0;
90   }
91 
92   /* one-dimensionalize the Mc_AN and h_AN loops */
93 
94   OneD_Nloop = 0;
95   for (Mc_AN=1; Mc_AN<=Matomnum; Mc_AN++){
96     Gc_AN = M2G[Mc_AN];
97     for (h_AN=0; h_AN<=FNAN[Gc_AN]; h_AN++){
98       OneD_Nloop++;
99     }
100   }
101 
102   OneD2Mc_AN = (int*)malloc(sizeof(int)*(OneD_Nloop+1));
103   OneD2h_AN = (int*)malloc(sizeof(int)*(OneD_Nloop+1));
104 
105   OneD_Nloop = 0;
106   for (Mc_AN=1; Mc_AN<=Matomnum; Mc_AN++){
107     Gc_AN = M2G[Mc_AN];
108     for (h_AN=0; h_AN<=FNAN[Gc_AN]; h_AN++){
109       OneD2Mc_AN[OneD_Nloop] = Mc_AN;
110       OneD2h_AN[OneD_Nloop]  = h_AN;
111       OneD_Nloop++;
112     }
113   }
114 
115 
116   /* OpenMP */
117 
118 #pragma omp parallel
119   {
120 
121     int Nloop;
122     int OMPID,Nthrds,Nprocs;
123     int Mc_AN,h_AN,Gc_AN,Cwan;
124     int Gh_AN,Rnh,Hwan;
125     int Ls,L0,Mul0,L1,Mul1,M0,M1;
126     int Lmax_Four_Int;
127     int i,j,k,l,m,p;
128     int num0,num1;
129 
130     double Stime_atom,Etime_atom;
131     double dx,dy,dz;
132     double S_coordinate[3];
133     double theta,phi,h;
134     double Bessel_Pro0,Bessel_Pro1;
135     double tmp0,tmp1,tmp2,tmp3,tmp4;
136     double siT,coT,siP,coP;
137     double kmin,kmax,Sk,Dk,r;
138     double sj,sjp,coe0,coe1;
139     double Normk,Normk2;
140     double gant,SH[2],dSHt[2],dSHp[2];
141     double **SphB,**SphBp;
142     double *tmp_SphB,*tmp_SphBp;
143     double ****SumS0;
144     double ****SumK0;
145     double ****SumSr0;
146     double ****SumKr0;
147 
148     dcomplex CsumS_Lx,CsumS_Ly,CsumS_Lz;
149     dcomplex CsumS0,CsumSr,CsumSt,CsumSp;
150     dcomplex CsumK0,CsumKr,CsumKt,CsumKp;
151     dcomplex Ctmp0,Ctmp1,Ctmp2,Cpow;
152     dcomplex CY,CYt,CYp,CY1,CYt1,CYp1;
153     dcomplex ******TmpOLP;
154     dcomplex ******TmpOLPr;
155     dcomplex ******TmpOLPt;
156     dcomplex ******TmpOLPp;
157     dcomplex ******TmpKin;
158     dcomplex ******TmpKinr;
159     dcomplex ******TmpKint;
160     dcomplex ******TmpKinp;
161 
162     dcomplex **CmatS0;
163     dcomplex **CmatSr;
164     dcomplex **CmatSt;
165     dcomplex **CmatSp;
166     dcomplex **CmatK0;
167     dcomplex **CmatKr;
168     dcomplex **CmatKt;
169     dcomplex **CmatKp;
170 
171     /****************************************************************
172                           allocation of arrays:
173     ****************************************************************/
174 
175     TmpOLP  = Allocate6D_dcomplex(List_YOUSO[25]+1, List_YOUSO[24], (2*(List_YOUSO[25]+1)+1), (List_YOUSO[25]+1), List_YOUSO[24], (2*(List_YOUSO[25]+1)+1));
176     TmpOLPr = Allocate6D_dcomplex(List_YOUSO[25]+1, List_YOUSO[24], (2*(List_YOUSO[25]+1)+1), (List_YOUSO[25]+1), List_YOUSO[24], (2*(List_YOUSO[25]+1)+1));
177     TmpOLPt = Allocate6D_dcomplex(List_YOUSO[25]+1, List_YOUSO[24], (2*(List_YOUSO[25]+1)+1), (List_YOUSO[25]+1), List_YOUSO[24], (2*(List_YOUSO[25]+1)+1));
178     TmpOLPp = Allocate6D_dcomplex(List_YOUSO[25]+1, List_YOUSO[24], (2*(List_YOUSO[25]+1)+1), (List_YOUSO[25]+1), List_YOUSO[24], (2*(List_YOUSO[25]+1)+1));
179     TmpKin  = Allocate6D_dcomplex(List_YOUSO[25]+1, List_YOUSO[24], (2*(List_YOUSO[25]+1)+1), (List_YOUSO[25]+1), List_YOUSO[24], (2*(List_YOUSO[25]+1)+1));
180     TmpKinr = Allocate6D_dcomplex(List_YOUSO[25]+1, List_YOUSO[24], (2*(List_YOUSO[25]+1)+1), (List_YOUSO[25]+1), List_YOUSO[24], (2*(List_YOUSO[25]+1)+1));
181     TmpKint = Allocate6D_dcomplex(List_YOUSO[25]+1, List_YOUSO[24], (2*(List_YOUSO[25]+1)+1), (List_YOUSO[25]+1), List_YOUSO[24], (2*(List_YOUSO[25]+1)+1));
182     TmpKinp = Allocate6D_dcomplex(List_YOUSO[25]+1, List_YOUSO[24], (2*(List_YOUSO[25]+1)+1), (List_YOUSO[25]+1), List_YOUSO[24], (2*(List_YOUSO[25]+1)+1));
183 
184     SumS0  = Allocate4D_double(List_YOUSO[25]+1, List_YOUSO[24], (List_YOUSO[25]+1), List_YOUSO[24]);
185     SumK0  = Allocate4D_double(List_YOUSO[25]+1, List_YOUSO[24], (List_YOUSO[25]+1), List_YOUSO[24]);
186     SumSr0 = Allocate4D_double(List_YOUSO[25]+1, List_YOUSO[24], (List_YOUSO[25]+1), List_YOUSO[24]);
187     SumKr0 = Allocate4D_double(List_YOUSO[25]+1, List_YOUSO[24], (List_YOUSO[25]+1), List_YOUSO[24]);
188 
189     CmatS0 = Allocate2D_dcomplex((2*(List_YOUSO[25]+1)+1), (2*(List_YOUSO[25]+1)+1));
190     CmatSr = Allocate2D_dcomplex((2*(List_YOUSO[25]+1)+1), (2*(List_YOUSO[25]+1)+1));
191     CmatSt = Allocate2D_dcomplex((2*(List_YOUSO[25]+1)+1), (2*(List_YOUSO[25]+1)+1));
192     CmatSp = Allocate2D_dcomplex((2*(List_YOUSO[25]+1)+1), (2*(List_YOUSO[25]+1)+1));
193     CmatK0 = Allocate2D_dcomplex((2*(List_YOUSO[25]+1)+1), (2*(List_YOUSO[25]+1)+1));
194     CmatKr = Allocate2D_dcomplex((2*(List_YOUSO[25]+1)+1), (2*(List_YOUSO[25]+1)+1));
195     CmatKt = Allocate2D_dcomplex((2*(List_YOUSO[25]+1)+1), (2*(List_YOUSO[25]+1)+1));
196     CmatKp = Allocate2D_dcomplex((2*(List_YOUSO[25]+1)+1), (2*(List_YOUSO[25]+1)+1));
197 
198     /* get info. on OpenMP */
199 
200     OMPID = omp_get_thread_num();
201     Nthrds = omp_get_num_threads();
202     Nprocs = omp_get_num_procs();
203 
204     /* one-dimensionalized loop */
205 
206     for (Nloop=OMPID*OneD_Nloop/Nthrds; Nloop<(OMPID+1)*OneD_Nloop/Nthrds; Nloop++){
207 
208       dtime(&Stime_atom);
209 
210       /* get Mc_AN and h_AN */
211 
212       Mc_AN = OneD2Mc_AN[Nloop];
213       h_AN  = OneD2h_AN[Nloop];
214 
215       /* set data on Mc_AN */
216 
217       Gc_AN = M2G[Mc_AN];
218       Cwan = WhatSpecies[Gc_AN];
219 
220       /* set data on h_AN */
221 
222       Gh_AN = natn[Gc_AN][h_AN];
223       Rnh = ncn[Gc_AN][h_AN];
224       Hwan = WhatSpecies[Gh_AN];
225 
226       dx = Gxyz[Gh_AN][1] + atv[Rnh][1] - Gxyz[Gc_AN][1];
227       dy = Gxyz[Gh_AN][2] + atv[Rnh][2] - Gxyz[Gc_AN][2];
228       dz = Gxyz[Gh_AN][3] + atv[Rnh][3] - Gxyz[Gc_AN][3];
229 
230       xyz2spherical(dx,dy,dz,0.0,0.0,0.0,S_coordinate);
231       r     = S_coordinate[0];
232       theta = S_coordinate[1];
233       phi   = S_coordinate[2];
234 
235       /* for empty atoms or finite elemens basis */
236       if (r<1.0e-10) r = 1.0e-10;
237 
238       /* precalculation of sin and cos */
239 
240       siT = sin(theta);
241       coT = cos(theta);
242       siP = sin(phi);
243       coP = cos(phi);
244 
245       /****************************************************
246           Overlap and the derivative
247               \int RL(k)*RL'(k)*jl(k*R) k^2 dk^3,
248               \int RL(k)*RL'(k)*j'l(k*R) k^3 dk^3
249 
250           Kinetic
251               \int RL(k)*RL'(k)*jl(k*R) k^4 dk^3,
252               \int RL(k)*RL'(k)*j'l(k*R) k^5 dk^3
253       ****************************************************/
254 
255       kmin = Radial_kmin;
256       kmax = PAO_Nkmax;
257       Sk = kmax + kmin;
258       Dk = kmax - kmin;
259 
260       for (L0=0; L0<=Spe_MaxL_Basis[Cwan]; L0++){
261 	for (Mul0=0; Mul0<Spe_Num_Basis[Cwan][L0]; Mul0++){
262 	  for (L1=0; L1<=Spe_MaxL_Basis[Hwan]; L1++){
263 	    for (Mul1=0; Mul1<Spe_Num_Basis[Hwan][L1]; Mul1++){
264 	      for (M0=-L0; M0<=L0; M0++){
265 		for (M1=-L1; M1<=L1; M1++){
266 
267                   TmpOLP[L0][Mul0][L0+M0][L1][Mul1][L1+M1]  = Complex(0.0,0.0);
268                   TmpOLPr[L0][Mul0][L0+M0][L1][Mul1][L1+M1] = Complex(0.0,0.0);
269                   TmpOLPt[L0][Mul0][L0+M0][L1][Mul1][L1+M1] = Complex(0.0,0.0);
270                   TmpOLPp[L0][Mul0][L0+M0][L1][Mul1][L1+M1] = Complex(0.0,0.0);
271 
272                   TmpKin[L0][Mul0][L0+M0][L1][Mul1][L1+M1]  = Complex(0.0,0.0);
273                   TmpKinr[L0][Mul0][L0+M0][L1][Mul1][L1+M1] = Complex(0.0,0.0);
274                   TmpKint[L0][Mul0][L0+M0][L1][Mul1][L1+M1] = Complex(0.0,0.0);
275                   TmpKinp[L0][Mul0][L0+M0][L1][Mul1][L1+M1] = Complex(0.0,0.0);
276 
277 		}
278 	      }
279 	    }
280 	  }
281 	}
282       }
283 
284       if (Spe_MaxL_Basis[Cwan]<Spe_MaxL_Basis[Hwan])
285         Lmax_Four_Int = 2*Spe_MaxL_Basis[Hwan];
286       else
287         Lmax_Four_Int = 2*Spe_MaxL_Basis[Cwan];
288 
289       /* allocate SphB and SphBp */
290 
291       SphB = (double**)malloc(sizeof(double*)*(Lmax_Four_Int+3));
292       for(l=0; l<(Lmax_Four_Int+3); l++){
293         SphB[l] = (double*)malloc(sizeof(double)*(OneD_Grid+1));
294       }
295 
296       SphBp = (double**)malloc(sizeof(double*)*(Lmax_Four_Int+3));
297       for(l=0; l<(Lmax_Four_Int+3); l++){
298         SphBp[l] = (double*)malloc(sizeof(double)*(OneD_Grid+1));
299       }
300 
301       tmp_SphB  = (double*)malloc(sizeof(double)*(Lmax_Four_Int+3));
302       tmp_SphBp = (double*)malloc(sizeof(double)*(Lmax_Four_Int+3));
303 
304       /* calculate SphB and SphBp */
305 
306       h = (kmax - kmin)/(double)OneD_Grid;
307 
308       for (i=0; i<=OneD_Grid; i++){
309         Normk = kmin + (double)i*h;
310         Spherical_Bessel(Normk*r,Lmax_Four_Int,tmp_SphB,tmp_SphBp);
311         for(l=0; l<=Lmax_Four_Int; l++){
312           SphB[l][i]  = tmp_SphB[l];
313           SphBp[l][i] = tmp_SphBp[l];
314 	}
315       }
316 
317       free(tmp_SphB);
318       free(tmp_SphBp);
319 
320       /* l loop */
321 
322       for(l=0; l<=Lmax_Four_Int; l++){
323 
324         for (L0=0; L0<=Spe_MaxL_Basis[Cwan]; L0++){
325 	  for (Mul0=0; Mul0<Spe_Num_Basis[Cwan][L0]; Mul0++){
326             for (L1=0; L1<=Spe_MaxL_Basis[Hwan]; L1++){
327 	      for (Mul1=0; Mul1<Spe_Num_Basis[Hwan][L1]; Mul1++){
328                 SumS0[L0][Mul0][L1][Mul1]  = 0.0;
329                 SumK0[L0][Mul0][L1][Mul1]  = 0.0;
330                 SumSr0[L0][Mul0][L1][Mul1] = 0.0;
331                 SumKr0[L0][Mul0][L1][Mul1] = 0.0;
332 	      }
333 	    }
334 	  }
335 	}
336 
337         h = (kmax - kmin)/(double)OneD_Grid;
338 	for (i=0; i<=OneD_Grid; i++){
339 
340           if (i==0 || i==OneD_Grid) coe0 = 0.50;
341           else                      coe0 = 1.00;
342 
343 	  Normk = kmin + (double)i*h;
344           Normk2 = Normk*Normk;
345 
346           sj  =  SphB[l][i];
347           sjp = SphBp[l][i];
348 
349 	  for (L0=0; L0<=Spe_MaxL_Basis[Cwan]; L0++){
350 	    for (Mul0=0; Mul0<Spe_Num_Basis[Cwan][L0]; Mul0++){
351 
352 	      Bessel_Pro0 = RF_BesselF(Cwan,L0,Mul0,Normk);
353 
354               tmp0 = coe0*h*Normk2*Bessel_Pro0;
355               tmp1 = tmp0*sj;
356               tmp2 = tmp0*Normk*sjp;
357 
358 	      for (L1=0; L1<=Spe_MaxL_Basis[Hwan]; L1++){
359 		for (Mul1=0; Mul1<Spe_Num_Basis[Hwan][L1]; Mul1++){
360 
361                   Bessel_Pro1 = RF_BesselF(Hwan,L1,Mul1,Normk);
362 
363                   tmp3 = tmp1*Bessel_Pro1;
364                   tmp4 = tmp2*Bessel_Pro1;
365 
366                   SumS0[L0][Mul0][L1][Mul1] += tmp3;
367                   SumK0[L0][Mul0][L1][Mul1] += tmp3*Normk2;
368 
369                   SumSr0[L0][Mul0][L1][Mul1] += tmp4;
370                   SumKr0[L0][Mul0][L1][Mul1] += tmp4*Normk2;
371 		}
372 	      }
373 
374 	    }
375 	  }
376 	}
377 
378         if (h_AN==0){
379 	  for (L0=0; L0<=Spe_MaxL_Basis[Cwan]; L0++){
380 	    for (Mul0=0; Mul0<Spe_Num_Basis[Cwan][L0]; Mul0++){
381 	      for (L1=0; L1<=Spe_MaxL_Basis[Hwan]; L1++){
382 		for (Mul1=0; Mul1<Spe_Num_Basis[Hwan][L1]; Mul1++){
383 		  SumSr0[L0][Mul0][L1][Mul1] = 0.0;
384 		  SumKr0[L0][Mul0][L1][Mul1] = 0.0;
385 		}
386 	      }
387 	    }
388 	  }
389 	}
390 
391         /****************************************************
392           For overlap and the derivative,
393           sum_m 8*(-i)^{-L0+L1+1}*
394                 C_{L0,-M0,L1,M1,l,m}*Y_{lm}
395                 \int RL(k)*RL'(k)*jl(k*R) k^2 dk^3,
396 
397           For kinetic,
398           sum_m 4*(-i)^{-L0+L1+1}*
399                 C_{L0,-M0,L1,M1,l,m}*
400                 \int RL(k)*RL'(k)*jl(k*R) k^4 dk^3,
401         ****************************************************/
402 
403         for(m=-l; m<=l; m++){
404 
405           ComplexSH(l,m,theta,phi,SH,dSHt,dSHp);
406           SH[1]   = -SH[1];
407           dSHt[1] = -dSHt[1];
408           dSHp[1] = -dSHp[1];
409 
410           CY  = Complex(SH[0],SH[1]);
411           CYt = Complex(dSHt[0],dSHt[1]);
412           CYp = Complex(dSHp[0],dSHp[1]);
413 
414           for (L0=0; L0<=Spe_MaxL_Basis[Cwan]; L0++){
415             for (Mul0=0; Mul0<Spe_Num_Basis[Cwan][L0]; Mul0++){
416               for (L1=0; L1<=Spe_MaxL_Basis[Hwan]; L1++){
417                 for (Mul1=0; Mul1<Spe_Num_Basis[Hwan][L1]; Mul1++){
418 
419                   Ls = -L0 + L1 + l;
420 
421                   if (abs(L1-l)<=L0 && L0<=(L1+l) ){
422 
423 		    Cpow = Im_pow(-1,Ls);
424 		    CY1  = Cmul(Cpow,CY);
425 		    CYt1 = Cmul(Cpow,CYt);
426 		    CYp1 = Cmul(Cpow,CYp);
427 
428 		    for (M0=-L0; M0<=L0; M0++){
429 
430 		      M1 = M0 - m;
431 
432                       if (abs(M1)<=L1){
433 
434                         gant = Gaunt(L0,M0,L1,M1,l,m);
435 
436                         /* S */
437 
438                         tmp0 = gant*SumS0[L0][Mul0][L1][Mul1];
439                         Ctmp2 = CRmul(CY1,tmp0);
440                         TmpOLP[L0][Mul0][L0+M0][L1][Mul1][L1+M1] =
441 			  Cadd(TmpOLP[L0][Mul0][L0+M0][L1][Mul1][L1+M1],Ctmp2);
442 
443                         /* dS/dr */
444 
445                         tmp0 = gant*SumSr0[L0][Mul0][L1][Mul1];
446                         Ctmp2 = CRmul(CY1,tmp0);
447                         TmpOLPr[L0][Mul0][L0+M0][L1][Mul1][L1+M1] =
448 			  Cadd(TmpOLPr[L0][Mul0][L0+M0][L1][Mul1][L1+M1],Ctmp2);
449 
450                         /* dS/dt */
451 
452                         tmp0 = gant*SumS0[L0][Mul0][L1][Mul1];
453                         Ctmp2 = CRmul(CYt1,tmp0);
454                         TmpOLPt[L0][Mul0][L0+M0][L1][Mul1][L1+M1] =
455 			  Cadd(TmpOLPt[L0][Mul0][L0+M0][L1][Mul1][L1+M1],Ctmp2);
456 
457                         /* dS/dp */
458 
459                         tmp0 = gant*SumS0[L0][Mul0][L1][Mul1];
460                         Ctmp2 = CRmul(CYp1,tmp0);
461                         TmpOLPp[L0][Mul0][L0+M0][L1][Mul1][L1+M1] =
462 			  Cadd(TmpOLPp[L0][Mul0][L0+M0][L1][Mul1][L1+M1],Ctmp2);
463 
464                         /* K */
465 
466                         tmp0 = gant*SumK0[L0][Mul0][L1][Mul1];
467                         Ctmp2 = CRmul(CY1,tmp0);
468                         TmpKin[L0][Mul0][L0+M0][L1][Mul1][L1+M1] =
469 			  Cadd(TmpKin[L0][Mul0][L0+M0][L1][Mul1][L1+M1],Ctmp2);
470 
471                         /* dK/dr */
472 
473                         tmp0 = gant*SumKr0[L0][Mul0][L1][Mul1];
474                         Ctmp2 = CRmul(CY1,tmp0);
475                         TmpKinr[L0][Mul0][L0+M0][L1][Mul1][L1+M1] =
476 			  Cadd(TmpKinr[L0][Mul0][L0+M0][L1][Mul1][L1+M1],Ctmp2);
477 
478                         /* dK/dt */
479 
480                         tmp0 = gant*SumK0[L0][Mul0][L1][Mul1];
481                         Ctmp2 = CRmul(CYt1,tmp0);
482                         TmpKint[L0][Mul0][L0+M0][L1][Mul1][L1+M1] =
483 			  Cadd(TmpKint[L0][Mul0][L0+M0][L1][Mul1][L1+M1],Ctmp2);
484 
485                         /* dK/dp */
486 
487                         tmp0 = gant*SumK0[L0][Mul0][L1][Mul1];
488                         Ctmp2 = CRmul(CYp1,tmp0);
489                         TmpKinp[L0][Mul0][L0+M0][L1][Mul1][L1+M1] =
490 			  Cadd(TmpKinp[L0][Mul0][L0+M0][L1][Mul1][L1+M1],Ctmp2);
491 
492 		      }
493 		    }
494 		  }
495 		}
496 	      }
497 
498 	    }
499 	  }
500         }
501       } /* l */
502 
503       /* free SphB and SphBp */
504 
505       for(l=0; l<(Lmax_Four_Int+3); l++){
506         free(SphB[l]);
507       }
508       free(SphB);
509 
510       for(l=0; l<(Lmax_Four_Int+3); l++){
511         free(SphBp[l]);
512       }
513       free(SphBp);
514 
515       /****************************************************
516                          Complex to Real
517       ****************************************************/
518 
519       num0 = 0;
520       for (L0=0; L0<=Spe_MaxL_Basis[Cwan]; L0++){
521 	for (Mul0=0; Mul0<Spe_Num_Basis[Cwan][L0]; Mul0++){
522 
523           num1 = 0;
524           for (L1=0; L1<=Spe_MaxL_Basis[Hwan]; L1++){
525 	    for (Mul1=0; Mul1<Spe_Num_Basis[Hwan][L1]; Mul1++){
526 
527     	      for (M0=-L0; M0<=L0; M0++){
528 		for (M1=-L1; M1<=L1; M1++){
529 
530                   CsumS0 = Complex(0.0,0.0);
531                   CsumSr = Complex(0.0,0.0);
532                   CsumSt = Complex(0.0,0.0);
533                   CsumSp = Complex(0.0,0.0);
534 
535                   CsumK0 = Complex(0.0,0.0);
536                   CsumKr = Complex(0.0,0.0);
537                   CsumKt = Complex(0.0,0.0);
538                   CsumKp = Complex(0.0,0.0);
539 
540      	          for (k=-L0; k<=L0; k++){
541 
542                     Ctmp1 = Conjg(Comp2Real[L0][L0+M0][L0+k]);
543 
544                     /* S */
545 
546                     Ctmp0 = TmpOLP[L0][Mul0][L0+k][L1][Mul1][L1+M1];
547                     Ctmp2 = Cmul(Ctmp1,Ctmp0);
548                     CsumS0 = Cadd(CsumS0,Ctmp2);
549 
550                     /* dS/dr */
551 
552                     Ctmp0 = TmpOLPr[L0][Mul0][L0+k][L1][Mul1][L1+M1];
553                     Ctmp2 = Cmul(Ctmp1,Ctmp0);
554                     CsumSr = Cadd(CsumSr,Ctmp2);
555 
556                     /* dS/dt */
557 
558                     Ctmp0 = TmpOLPt[L0][Mul0][L0+k][L1][Mul1][L1+M1];
559                     Ctmp2 = Cmul(Ctmp1,Ctmp0);
560                     CsumSt = Cadd(CsumSt,Ctmp2);
561 
562                     /* dS/dp */
563 
564                     Ctmp0 = TmpOLPp[L0][Mul0][L0+k][L1][Mul1][L1+M1];
565                     Ctmp2 = Cmul(Ctmp1,Ctmp0);
566                     CsumSp = Cadd(CsumSp,Ctmp2);
567 
568                     /* K */
569 
570                     Ctmp0 = TmpKin[L0][Mul0][L0+k][L1][Mul1][L1+M1];
571                     Ctmp2 = Cmul(Ctmp1,Ctmp0);
572                     CsumK0 = Cadd(CsumK0,Ctmp2);
573 
574                     /* dK/dr */
575 
576                     Ctmp0 = TmpKinr[L0][Mul0][L0+k][L1][Mul1][L1+M1];
577                     Ctmp2 = Cmul(Ctmp1,Ctmp0);
578                     CsumKr = Cadd(CsumKr,Ctmp2);
579 
580                     /* dK/dt */
581 
582                     Ctmp0 = TmpKint[L0][Mul0][L0+k][L1][Mul1][L1+M1];
583                     Ctmp2 = Cmul(Ctmp1,Ctmp0);
584                     CsumKt = Cadd(CsumKt,Ctmp2);
585 
586                     /* dK/dp */
587 
588                     Ctmp0 = TmpKinp[L0][Mul0][L0+k][L1][Mul1][L1+M1];
589                     Ctmp2 = Cmul(Ctmp1,Ctmp0);
590                     CsumKp = Cadd(CsumKp,Ctmp2);
591 
592 		  }
593 
594                   CmatS0[L0+M0][L1+M1] = CsumS0;
595                   CmatSr[L0+M0][L1+M1] = CsumSr;
596                   CmatSt[L0+M0][L1+M1] = CsumSt;
597                   CmatSp[L0+M0][L1+M1] = CsumSp;
598 
599                   CmatK0[L0+M0][L1+M1] = CsumK0;
600                   CmatKr[L0+M0][L1+M1] = CsumKr;
601                   CmatKt[L0+M0][L1+M1] = CsumKt;
602                   CmatKp[L0+M0][L1+M1] = CsumKp;
603 		}
604 	      }
605 
606     	      for (M0=-L0; M0<=L0; M0++){
607 		for (M1=-L1; M1<=L1; M1++){
608 
609                   CsumS_Lx = Complex(0.0,0.0);
610                   CsumS_Ly = Complex(0.0,0.0);
611                   CsumS_Lz = Complex(0.0,0.0);
612 
613                   CsumS0 = Complex(0.0,0.0);
614                   CsumSr = Complex(0.0,0.0);
615                   CsumSt = Complex(0.0,0.0);
616                   CsumSp = Complex(0.0,0.0);
617                   CsumK0 = Complex(0.0,0.0);
618                   CsumKr = Complex(0.0,0.0);
619                   CsumKt = Complex(0.0,0.0);
620                   CsumKp = Complex(0.0,0.0);
621 
622      	          for (k=-L1; k<=L1; k++){
623 
624                     /*** S_Lx ***/
625 
626                     /*  Y k+1 */
627                     if (k<L1){
628                       coe0 = sqrt((double)((L1-k)*(L1+k+1)));
629                       Ctmp1 = Cmul(CmatS0[L0+M0][L1+k+1],Comp2Real[L1][L1+M1][L1+k]);
630                       Ctmp1.r = 0.5*coe0*Ctmp1.r;
631                       Ctmp1.i = 0.5*coe0*Ctmp1.i;
632                       CsumS_Lx = Cadd(CsumS_Lx,Ctmp1);
633 		    }
634 
635                     /*  Y k-1 */
636                     if (-L1<k){
637                       coe1 = sqrt((double)((L1+k)*(L1-k+1)));
638                       Ctmp1 = Cmul(CmatS0[L0+M0][L1+k-1],Comp2Real[L1][L1+M1][L1+k]);
639                       Ctmp1.r = 0.5*coe1*Ctmp1.r;
640                       Ctmp1.i = 0.5*coe1*Ctmp1.i;
641                       CsumS_Lx = Cadd(CsumS_Lx,Ctmp1);
642 		    }
643 
644                     /*** S_Ly ***/
645 
646                     /*  Y k+1 */
647 
648                     if (k<L1){
649                       Ctmp1 = Cmul(CmatS0[L0+M0][L1+k+1],Comp2Real[L1][L1+M1][L1+k]);
650                       Ctmp2.r = 0.5*coe0*Ctmp1.i;
651                       Ctmp2.i =-0.5*coe0*Ctmp1.r;
652                       CsumS_Ly = Cadd(CsumS_Ly,Ctmp2);
653 		    }
654 
655                     /*  Y k-1 */
656 
657                     if (-L1<k){
658                       Ctmp1 = Cmul(CmatS0[L0+M0][L1+k-1],Comp2Real[L1][L1+M1][L1+k]);
659                       Ctmp2.r =-0.5*coe1*Ctmp1.i;
660                       Ctmp2.i = 0.5*coe1*Ctmp1.r;
661                       CsumS_Ly = Cadd(CsumS_Ly,Ctmp2);
662 		    }
663 
664                     /*** S_Lz ***/
665 
666                     Ctmp1    = Cmul(CmatS0[L0+M0][L1+k],Comp2Real[L1][L1+M1][L1+k]);
667                     Ctmp1.r = (double)k*Ctmp1.r;;
668                     Ctmp1.i = (double)k*Ctmp1.i;
669                     CsumS_Lz = Cadd(CsumS_Lz,Ctmp1);
670 
671                     /* S */
672 
673                     Ctmp1 = Cmul(CmatS0[L0+M0][L1+k],Comp2Real[L1][L1+M1][L1+k]);
674                     CsumS0 = Cadd(CsumS0,Ctmp1);
675 
676                     /* dS/dr */
677 
678                     Ctmp1 = Cmul(CmatSr[L0+M0][L1+k],Comp2Real[L1][L1+M1][L1+k]);
679                     CsumSr = Cadd(CsumSr,Ctmp1);
680 
681                     /* dS/dt */
682 
683                     Ctmp1 = Cmul(CmatSt[L0+M0][L1+k],Comp2Real[L1][L1+M1][L1+k]);
684                     CsumSt = Cadd(CsumSt,Ctmp1);
685 
686                     /* dS/dp */
687 
688                     Ctmp1 = Cmul(CmatSp[L0+M0][L1+k],Comp2Real[L1][L1+M1][L1+k]);
689                     CsumSp = Cadd(CsumSp,Ctmp1);
690 
691                     /* K */
692 
693                     Ctmp1 = Cmul(CmatK0[L0+M0][L1+k],Comp2Real[L1][L1+M1][L1+k]);
694                     CsumK0 = Cadd(CsumK0,Ctmp1);
695 
696                     /* dK/dr */
697 
698                     Ctmp1 = Cmul(CmatKr[L0+M0][L1+k],Comp2Real[L1][L1+M1][L1+k]);
699                     CsumKr = Cadd(CsumKr,Ctmp1);
700 
701                     /* dK/dt */
702 
703                     Ctmp1 = Cmul(CmatKt[L0+M0][L1+k],Comp2Real[L1][L1+M1][L1+k]);
704                     CsumKt = Cadd(CsumKt,Ctmp1);
705 
706                     /* dK/dp */
707 
708                     Ctmp1 = Cmul(CmatKp[L0+M0][L1+k],Comp2Real[L1][L1+M1][L1+k]);
709                     CsumKp = Cadd(CsumKp,Ctmp1);
710 
711 		  }
712 
713                   OLP_L[0][Mc_AN][h_AN][num0+L0+M0][num1+L1+M1] = 8.0*CsumS_Lx.i;
714                   OLP_L[1][Mc_AN][h_AN][num0+L0+M0][num1+L1+M1] = 8.0*CsumS_Ly.i;
715                   OLP_L[2][Mc_AN][h_AN][num0+L0+M0][num1+L1+M1] = 8.0*CsumS_Lz.i;
716 
717                   /* add a small value for stabilization of eigenvalue routine */
718 
719                   OLP[0][Mc_AN][h_AN][num0+L0+M0][num1+L1+M1] = 8.0*CsumS0.r + 1.0*rnd(1.0e-13);
720 		  H0[0][Mc_AN][h_AN][num0+L0+M0][num1+L1+M1] = 4.0*CsumK0.r;
721 
722                   if (h_AN!=0){
723 
724                     if (fabs(siT)<10e-14){
725 
726 		      OLP[1][Mc_AN][h_AN][num0+L0+M0][num1+L1+M1] =
727 			-8.0*(siT*coP*CsumSr.r + coT*coP/r*CsumSt.r);
728 
729 		      OLP[2][Mc_AN][h_AN][num0+L0+M0][num1+L1+M1] =
730 			-8.0*(siT*siP*CsumSr.r + coT*siP/r*CsumSt.r);
731 
732 		      OLP[3][Mc_AN][h_AN][num0+L0+M0][num1+L1+M1] =
733 			-8.0*(coT*CsumSr.r - siT/r*CsumSt.r);
734 
735 		      H0[1][Mc_AN][h_AN][num0+L0+M0][num1+L1+M1] =
736 			-4.0*(siT*coP*CsumKr.r + coT*coP/r*CsumKt.r);
737 
738 		      H0[2][Mc_AN][h_AN][num0+L0+M0][num1+L1+M1] =
739 			-4.0*(siT*siP*CsumKr.r + coT*siP/r*CsumKt.r);
740 
741 		      H0[3][Mc_AN][h_AN][num0+L0+M0][num1+L1+M1] =
742 			-4.0*(coT*CsumKr.r - siT/r*CsumKt.r);
743                     }
744 
745                     else{
746 
747 		      OLP[1][Mc_AN][h_AN][num0+L0+M0][num1+L1+M1] =
748 			-8.0*(siT*coP*CsumSr.r + coT*coP/r*CsumSt.r
749 			     - siP/siT/r*CsumSp.r);
750 
751 		      OLP[2][Mc_AN][h_AN][num0+L0+M0][num1+L1+M1] =
752 			-8.0*(siT*siP*CsumSr.r + coT*siP/r*CsumSt.r
753 			     + coP/siT/r*CsumSp.r);
754 
755 		      OLP[3][Mc_AN][h_AN][num0+L0+M0][num1+L1+M1] =
756 			-8.0*(coT*CsumSr.r - siT/r*CsumSt.r);
757 
758 		      H0[1][Mc_AN][h_AN][num0+L0+M0][num1+L1+M1] =
759 			-4.0*(siT*coP*CsumKr.r + coT*coP/r*CsumKt.r
760 			     - siP/siT/r*CsumKp.r);
761 
762 		      H0[2][Mc_AN][h_AN][num0+L0+M0][num1+L1+M1] =
763 			-4.0*(siT*siP*CsumKr.r + coT*siP/r*CsumKt.r
764 			     + coP/siT/r*CsumKp.r);
765 
766 		      H0[3][Mc_AN][h_AN][num0+L0+M0][num1+L1+M1] =
767 			-4.0*(coT*CsumKr.r - siT/r*CsumKt.r);
768 		    }
769 
770 		  }
771 		  else{
772 		    OLP[1][Mc_AN][h_AN][num0+L0+M0][num1+L1+M1] = 0.0;
773 	            OLP[2][Mc_AN][h_AN][num0+L0+M0][num1+L1+M1] = 0.0;
774 		    OLP[3][Mc_AN][h_AN][num0+L0+M0][num1+L1+M1] = 0.0;
775  	            H0[1][Mc_AN][h_AN][num0+L0+M0][num1+L1+M1] = 0.0;
776 	            H0[2][Mc_AN][h_AN][num0+L0+M0][num1+L1+M1] = 0.0;
777 	            H0[3][Mc_AN][h_AN][num0+L0+M0][num1+L1+M1] = 0.0;
778                   }
779 
780 		}
781 	      }
782 
783               num1 = num1 + 2*L1 + 1;
784 	    }
785 	  }
786 
787           num0 = num0 + 2*L0 + 1;
788 	}
789       }
790 
791       dtime(&Etime_atom);
792       time_per_atom[Gc_AN] += Etime_atom - Stime_atom;
793     } /* end of loop for Nloop */
794 
795 
796     /* freeing of arrays */
797     Free6D_dcomplex(TmpOLP);
798     Free6D_dcomplex(TmpOLPr);
799     Free6D_dcomplex(TmpOLPt);
800     Free6D_dcomplex(TmpOLPp);
801     Free6D_dcomplex(TmpKin);
802     Free6D_dcomplex(TmpKinr);
803     Free6D_dcomplex(TmpKint);
804     Free6D_dcomplex(TmpKinp);
805 
806     Free4D_double(SumS0);
807     Free4D_double(SumK0);
808     Free4D_double(SumSr0);
809     Free4D_double(SumKr0);
810 
811     Free2D_dcomplex(CmatS0);
812     Free2D_dcomplex(CmatSr);
813     Free2D_dcomplex(CmatSt);
814     Free2D_dcomplex(CmatSp);
815     Free2D_dcomplex(CmatK0);
816     Free2D_dcomplex(CmatKr);
817     Free2D_dcomplex(CmatKt);
818     Free2D_dcomplex(CmatKp);
819 
820   } /* #pragma omp parallel */
821 
822   /****************************************************
823                    freeing of arrays:
824   ****************************************************/
825 
826   free(OneD2h_AN);
827   free(OneD2Mc_AN);
828 
829   /* for time */
830   dtime(&TEtime);
831   time0 = TEtime - TStime;
832 
833   return time0;
834 }
835 
836 
Allocate6D_dcomplex(int size_1,int size_2,int size_3,int size_4,int size_5,int size_6)837 inline dcomplex****** Allocate6D_dcomplex(int size_1, int size_2, int size_3, int size_4, int size_5, int size_6)
838 {
839   int i, j, k, l, m, p;
840 
841   dcomplex****** buffer = (dcomplex******)malloc(sizeof(dcomplex*****)*size_1);
842   buffer[0] = (dcomplex*****)malloc(sizeof(dcomplex****)*size_1*size_2);
843   buffer[0][0] = (dcomplex****)malloc(sizeof(dcomplex***)*size_1*size_2*size_3);
844   buffer[0][0][0] = (dcomplex***)malloc(sizeof(dcomplex**)*size_1*size_2*size_3*size_4);
845   buffer[0][0][0][0] = (dcomplex**)malloc(sizeof(dcomplex*)*size_1*size_2*size_3*size_4*size_5);
846   buffer[0][0][0][0][0] = (dcomplex*)malloc(sizeof(dcomplex)*size_1*size_2*size_3*size_4*size_5*size_6);
847 
848   for (i=0; i<size_1; i++){
849     buffer[i] = buffer[0] + i * size_2;
850     for (j=0; j<size_2; j++){
851       buffer[i][j] = buffer[0][0] + (i * size_2 + j) * size_3;
852       for (k=0; k<size_3; k++){
853 	buffer[i][j][k] = buffer[0][0][0] + ((i * size_2 + j) * size_3 + k) * size_4;
854 	for (l=0; l<size_4; l++){
855 	  buffer[i][j][k][l] = buffer[0][0][0][0] + (((i * size_2 + j) * size_3 + k) * size_4 + l) * size_5;
856 	  for (m=0; m<size_5; m++){
857 	    buffer[i][j][k][l][m] = buffer[0][0][0][0][0] + ((((i * size_2 + j) * size_3 + k) * size_4 + l) * size_5 + m) * size_6;
858 	    for (p=0; p<size_6; p++) buffer[i][j][k][l][m][p] = Complex(0.0,0.0);
859 	  }
860 	}
861       }
862     }
863   }
864 
865   return buffer;
866 }
867 
Allocate4D_double(int size_1,int size_2,int size_3,int size_4)868 inline double**** Allocate4D_double(int size_1, int size_2, int size_3, int size_4)
869 {
870   int i, j, k, l;
871 
872   double**** buffer = (double****)malloc(sizeof(double***)*size_1);
873   buffer[0] = (double***)malloc(sizeof(double**)*size_1*size_2);
874   buffer[0][0] = (double**)malloc(sizeof(double*)*size_1*size_2*size_3);
875   buffer[0][0][0] = (double*)malloc(sizeof(double)*size_1*size_2*size_3*size_4);
876 
877   for (i=0; i<size_1; i++){
878     buffer[i] = buffer[0] + i * size_2;
879     for (j=0; j<size_2; j++){
880       buffer[i][j] = buffer[0][0] + (i * size_2 + j) * size_3;
881       for (k=0; k<size_3; k++){
882 	buffer[i][j][k] = buffer[0][0][0] + ((i * size_2 + j) * size_3 + k) * size_4;
883 	for (l=0; l<size_4; l++){
884 	  buffer[i][j][k][l] = 0.0;
885 	}
886       }
887     }
888   }
889 
890   return buffer;
891 }
892 
Allocate2D_dcomplex(int size_1,int size_2)893 inline dcomplex** Allocate2D_dcomplex(int size_1, int size_2)
894 {
895   int i, j;
896 
897   dcomplex** buffer = (dcomplex**)malloc(sizeof(dcomplex*)*size_1);
898   buffer[0] = (dcomplex*)malloc(sizeof(dcomplex)*size_1*size_2);
899 
900   for (i=0; i<size_1; i++){
901     buffer[i] = buffer[0] + i * size_2;
902     for (j=0; j<size_2; j++){
903       buffer[i][j] = Complex(0.0,0.0);
904     }
905   }
906 
907   return buffer;
908 }
909 
Free6D_dcomplex(dcomplex ****** buffer)910 inline void Free6D_dcomplex(dcomplex****** buffer)
911 {
912   free(buffer[0][0][0][0][0]);
913   free(buffer[0][0][0][0]);
914   free(buffer[0][0][0]);
915   free(buffer[0][0]);
916   free(buffer[0]);
917   free(buffer);
918 }
919 
Free4D_double(double **** buffer)920 inline void Free4D_double(double**** buffer)
921 {
922   free(buffer[0][0][0]);
923   free(buffer[0][0]);
924   free(buffer[0]);
925   free(buffer);
926 }
927 
Free2D_dcomplex(dcomplex ** buffer)928 inline void Free2D_dcomplex(dcomplex** buffer)
929 {
930   free(buffer[0]);
931   free(buffer);
932 }
933 
934 
935 
936