1 /**********************************************************************
2 Set_ProExpn_VNA.c:
3
4 Set_ProExpn_VNA.c is a subroutine to calculate matrix elements and
5 the derivatives of VNA projector expansion in the momentum space.
6
7 Log of Set_ProExpn_VNA.c:
8
9 8/Apr/2004 Released by T.Ozaki
10
11 ***********************************************************************/
12
13 #define measure_time 0
14
15 #include <stdio.h>
16 #include <stdlib.h>
17 #include <math.h>
18 #include <string.h>
19 #include <time.h>
20 #include <sys/types.h>
21 #include <sys/times.h>
22 #include <sys/time.h>
23 #include <sys/stat.h>
24 #include <unistd.h>
25 #include "openmx_common.h"
26 #include "mpi.h"
27 #include <omp.h>
28
29 static double Set_ProExpn(double ****HVNA, Type_DS_VNA *****DS_VNA);
30 static double Set_VNA2(double ****HVNA, double *****HVNA2);
31 static double Set_VNA3(double *****HVNA3);
32
33 #ifdef kcomp
34 static void Spherical_Bessel2( double x, int lmax, double *sb, double *dsb );
35 #else
36 inline void Spherical_Bessel2( double x, int lmax, double *sb, double *dsb );
37 #endif
38
39
Set_ProExpn_VNA(double **** HVNA,double ***** HVNA2,Type_DS_VNA ***** DS_VNA)40 double Set_ProExpn_VNA(double ****HVNA, double *****HVNA2, Type_DS_VNA *****DS_VNA)
41 {
42 double time1,time2,time3;
43
44 /* separable form */
45
46 time1 = Set_ProExpn(HVNA, DS_VNA);
47
48 /* one-center (orbitals) but two-center integrals, <Phi_{LM,L'M'}|VNA> */
49
50 time2 = Set_VNA2(HVNA, HVNA2);
51
52 /* one-center (orbitals) but two-center integrals, <VNA|Phi_{LM,L'M'}> */
53
54 time3 = Set_VNA3(HVNA3);
55
56 if (measure_time){
57 printf("Time Set_ProExpn=%7.3f Set_VNA2=%7.3f Set_VNA3=%7.3f\n",time1,time2,time3);
58 }
59
60 return time1+time2+time3;
61 }
62
63
64
Set_ProExpn(double **** HVNA,Type_DS_VNA ***** DS_VNA)65 double Set_ProExpn(double ****HVNA, Type_DS_VNA *****DS_VNA)
66 {
67 /****************************************************
68 evaluate matrix elements of neutral atom potential
69 in the KB or Blochl separable form in momentum space.
70 ****************************************************/
71 static int firsttime=1;
72 int L2,L3,L,i,j,Mc_AN,Gc_AN,h_AN,Cwan,i1,j1;
73 int kk,Ls,n,j0,jg0,Mj_AN0,m,L1,GL,Mul1;
74 int tno0,tno1,tno2,p,q,po1,po,Original_Mc_AN;
75 int L0,Mul0,spe,Gh_AN,Hwan,fan,jg,k,kg,wakg,kl;
76 int size_SumNL0,size_TmpNL;
77 int Mc_AN2,Gc_AN2,num,size1,size2;
78 int *Snd_DS_VNA_Size,*Rcv_DS_VNA_Size;
79 double kmin,kmax,Sk,Dk,Normk,dmp;
80 Type_DS_VNA *tmp_array;
81 Type_DS_VNA *tmp_array2;
82 double tmp0,tmp3,tmp4,tmp5,tmp10;
83 double ****Bessel_Pro00;
84 double ****Bessel_Pro01;
85 double ene,sum,h,coe0,sj,sy,sjp,syp;
86 double rcutA,rcutB,rcut;
87 double TStime,TEtime;
88 double stime,etime;
89 double Stime_atom,Etime_atom;
90 int numprocs,myid,tag=999,ID,IDS,IDR;
91 int *VNA_List;
92 int *VNA_List2;
93 int Num_RVNA;
94 double **NLH;
95 double time0,time1,time2,time3,time4,time5;
96 dcomplex sum0,sum1,sum2;
97
98 MPI_Status stat;
99 MPI_Request request;
100
101 int OneD_Nloop,*OneD2Mc_AN,*OneD2h_AN;
102
103 /* MPI */
104 MPI_Comm_size(mpi_comm_level1,&numprocs);
105 MPI_Comm_rank(mpi_comm_level1,&myid);
106
107 dtime(&TStime);
108
109 /****************************************************
110 allocation of arrays:
111 ****************************************************/
112
113 Num_RVNA = List_YOUSO[34]*(List_YOUSO[35] + 1);
114
115 Snd_DS_VNA_Size = (int*)malloc(sizeof(int)*numprocs);
116 Rcv_DS_VNA_Size = (int*)malloc(sizeof(int)*numprocs);
117
118 VNA_List = (int*)malloc(sizeof(int)*(List_YOUSO[34]*(List_YOUSO[35] + 1)+2) );
119 VNA_List2 = (int*)malloc(sizeof(int)*(List_YOUSO[34]*(List_YOUSO[35] + 1)+2) );
120
121 Bessel_Pro00 = (double****)malloc(sizeof(double***)*SpeciesNum);
122 for (spe=0; spe<SpeciesNum; spe++){
123 Bessel_Pro00[spe] = (double***)malloc(sizeof(double**)*(Spe_MaxL_Basis[spe]+1));
124 for (L0=0; L0<=Spe_MaxL_Basis[spe]; L0++){
125 Bessel_Pro00[spe][L0] = (double**)malloc(sizeof(double*)*Spe_Num_Basis[spe][L0]);
126 for (Mul0=0; Mul0<Spe_Num_Basis[spe][L0]; Mul0++){
127 Bessel_Pro00[spe][L0][Mul0] = (double*)malloc(sizeof(double)*GL_Mesh);
128 }
129 }
130 }
131
132 Bessel_Pro01 = (double****)malloc(sizeof(double***)*SpeciesNum);
133 for (spe=0; spe<SpeciesNum; spe++){
134 Bessel_Pro01[spe] = (double***)malloc(sizeof(double**)*(Spe_MaxL_Basis[spe]+1));
135 for (L0=0; L0<=Spe_MaxL_Basis[spe]; L0++){
136 Bessel_Pro01[spe][L0] = (double**)malloc(sizeof(double*)*Spe_Num_Basis[spe][L0]);
137 for (Mul0=0; Mul0<Spe_Num_Basis[spe][L0]; Mul0++){
138 Bessel_Pro01[spe][L0][Mul0] = (double*)malloc(sizeof(double)*GL_Mesh);
139 }
140 }
141 }
142
143 size_TmpNL = (List_YOUSO[25]+1)*List_YOUSO[24]*
144 (2*(List_YOUSO[25]+1)+1)*Num_RVNA*(2*List_YOUSO[30]+1);
145 size_SumNL0 = (List_YOUSO[25]+1)*List_YOUSO[24]*(Num_RVNA+1);
146
147 /* PrintMemory */
148 if (firsttime) {
149 PrintMemory("Set_ProExpn_VNA: SumNL0",sizeof(double)*size_SumNL0,NULL);
150 PrintMemory("Set_ProExpn_VNA: SumNLr0",sizeof(double)*size_SumNL0,NULL);
151 PrintMemory("Set_ProExpn_VNA: TmpNL", sizeof(dcomplex)*size_TmpNL,NULL);
152 PrintMemory("Set_ProExpn_VNA: TmpNLr",sizeof(dcomplex)*size_TmpNL,NULL);
153 PrintMemory("Set_ProExpn_VNA: TmpNLt",sizeof(dcomplex)*size_TmpNL,NULL);
154 PrintMemory("Set_ProExpn_VNA: TmpNLp",sizeof(dcomplex)*size_TmpNL,NULL);
155 firsttime=0;
156 }
157
158 if (measure_time){
159 time1 = 0.0;
160 time2 = 0.0;
161 time3 = 0.0;
162 time4 = 0.0;
163 time5 = 0.0;
164 }
165
166 /************************************************************
167 Spe_Num_RVPS[Hwan] -> Num_RVNA = List_YOUSO[34]*(List_YOUSO[35]+1)
168 Spe_VPS_List[Hwan][L] -> VNA_List[L] = 0,0,0,.., 1,1,1,.., 2,2,2,..,
169 List_YOUSO[19] -> Num_RVNA
170 List_YOUSO[30] -> List_YOUSO[35]
171 *************************************************************/
172
173 L = 0;
174 for (i=0; i<=List_YOUSO[35]; i++){ /* max L */
175 for (j=0; j<List_YOUSO[34]; j++){ /* # of radial projectors */
176 VNA_List[L] = i;
177 VNA_List2[L] = j;
178 L++;
179 }
180 }
181
182 /************************************************************
183 pre-calculate the time consuming part in the 'time1' part
184 *************************************************************/
185
186 kmin = Radial_kmin;
187 kmax = PAO_Nkmax;
188 Sk = kmax + kmin;
189 Dk = kmax - kmin;
190
191 for (spe=0; spe<SpeciesNum; spe++){
192
193 for (L0=0; L0<=Spe_MaxL_Basis[spe]; L0++){
194 for (Mul0=0; Mul0<Spe_Num_Basis[spe][L0]; Mul0++){
195
196 #pragma omp parallel shared(Mul0,L0,spe,Bessel_Pro01,Bessel_Pro00,GL_Weight,Dk,GL_NormK) private(i,Normk,tmp10)
197 {
198
199 int OMPID,Nthrds,Nprocs;
200
201 /* get info. on OpenMP */
202
203 OMPID = omp_get_thread_num();
204 Nthrds = omp_get_num_threads();
205 Nprocs = omp_get_num_procs();
206
207 for (i=OMPID*GL_Mesh/Nthrds; i<(OMPID+1)*GL_Mesh/Nthrds; i++){
208
209 Normk = GL_NormK[i];
210 tmp10 = 0.50*Dk*GL_Weight[i]*Normk*Normk;
211
212 Bessel_Pro00[spe][L0][Mul0][i] = RF_BesselF(spe,L0,Mul0,Normk)*tmp10;
213 Bessel_Pro01[spe][L0][Mul0][i] = Bessel_Pro00[spe][L0][Mul0][i]*Normk;
214 }
215
216 #pragma omp flush(Bessel_Pro00,Bessel_Pro01)
217
218 } /* #pragma omp parallel */
219
220 }
221 }
222 }
223
224 /************************************************************
225 start the main calculation
226 *************************************************************/
227
228 /* one-dimensionalize the Mc_AN and h_AN loops */
229
230 OneD_Nloop = 0;
231 for (Mc_AN=1; Mc_AN<=Matomnum; Mc_AN++){
232 Gc_AN = M2G[Mc_AN];
233 for (h_AN=0; h_AN<=FNAN[Gc_AN]; h_AN++){
234 OneD_Nloop++;
235 }
236 }
237
238 OneD2Mc_AN = (int*)malloc(sizeof(int)*(OneD_Nloop+1));
239 OneD2h_AN = (int*)malloc(sizeof(int)*(OneD_Nloop+1));
240
241 OneD_Nloop = 0;
242 for (Mc_AN=1; Mc_AN<=Matomnum; Mc_AN++){
243 Gc_AN = M2G[Mc_AN];
244 for (h_AN=0; h_AN<=FNAN[Gc_AN]; h_AN++){
245 OneD2Mc_AN[OneD_Nloop] = Mc_AN;
246 OneD2h_AN[OneD_Nloop] = h_AN;
247 OneD_Nloop++;
248 }
249 }
250
251 #pragma omp parallel shared(time_per_atom,DS_VNA,Comp2Real,Spe_VNA_Bessel,Bessel_Pro01,Bessel_Pro00,GL_NormK,List_YOUSO,VNA_List,Num_RVNA,Spe_Num_Basis,Spe_MaxL_Basis,PAO_Nkmax,atv,Gxyz,ncn,natn,Spe_Atom_Cut1,WhatSpecies,M2G,OneD2h_AN,OneD2Mc_AN,OneD_Nloop,time1,time2,time3)
252
253 {
254 int i,j,k,l,m,num0,num1;
255 int Mc_AN,h_AN,Gc_AN,Cwan,Gh_AN;
256 int Rnh,Hwan,L0,Mul0,L,L1,M0,M1,LL,Mul1,Ls;
257 int OMPID,Nthrds,Nprocs,Nloop;
258 int Lmax,Lmax_Four_Int;
259 double Stime_atom,Etime_atom;
260 double rcutA,rcutB,rcut;
261 double dx,dy,dz;
262 double kmin,kmax,Sk,Dk;
263 double gant,SH[2],dSHt[2],dSHp[2];
264 double S_coordinate[3];
265 double r,theta,phi;
266 double Normk,tmp0,tmp1,tmp2;
267 double siT,coT,siP,coP;
268 double ***SumNL0;
269 double ***SumNLr0;
270 double *Bes00;
271 double *Bes01;
272 double tmp_SphB[30];
273 double tmp_SphBp[30];
274 double SphB[30][GL_Mesh];
275 double SphBp[30][GL_Mesh];
276 double stime,etime;
277
278 dcomplex Ctmp1,Ctmp0,Ctmp2;
279 dcomplex CsumNL0,CsumNLr,CsumNLt,CsumNLp;
280 dcomplex CY,CYt,CYp,CY1,CYt1,CYp1,Cpow;
281 dcomplex *****TmpNL;
282 dcomplex *****TmpNLr;
283 dcomplex *****TmpNLt;
284 dcomplex *****TmpNLp;
285 dcomplex **CmatNL0;
286 dcomplex **CmatNLr;
287 dcomplex **CmatNLt;
288 dcomplex **CmatNLp;
289
290 /* allocation of arrays */
291
292 TmpNL = (dcomplex*****)malloc(sizeof(dcomplex****)*(List_YOUSO[25]+1));
293 for (i=0; i<(List_YOUSO[25]+1); i++){
294 TmpNL[i] = (dcomplex****)malloc(sizeof(dcomplex***)*List_YOUSO[24]);
295 for (j=0; j<List_YOUSO[24]; j++){
296 TmpNL[i][j] = (dcomplex***)malloc(sizeof(dcomplex**)*(2*(List_YOUSO[25]+1)+1));
297 for (k=0; k<(2*(List_YOUSO[25]+1)+1); k++){
298 TmpNL[i][j][k] = (dcomplex**)malloc(sizeof(dcomplex*)*Num_RVNA);
299 for (l=0; l<Num_RVNA; l++){
300 TmpNL[i][j][k][l] = (dcomplex*)malloc(sizeof(dcomplex)*(2*List_YOUSO[35]+1));
301 }
302 }
303 }
304 }
305
306 TmpNLr = (dcomplex*****)malloc(sizeof(dcomplex****)*(List_YOUSO[25]+1));
307 for (i=0; i<(List_YOUSO[25]+1); i++){
308 TmpNLr[i] = (dcomplex****)malloc(sizeof(dcomplex***)*List_YOUSO[24]);
309 for (j=0; j<List_YOUSO[24]; j++){
310 TmpNLr[i][j] = (dcomplex***)malloc(sizeof(dcomplex**)*(2*(List_YOUSO[25]+1)+1));
311 for (k=0; k<(2*(List_YOUSO[25]+1)+1); k++){
312 TmpNLr[i][j][k] = (dcomplex**)malloc(sizeof(dcomplex*)*Num_RVNA);
313 for (l=0; l<Num_RVNA; l++){
314 TmpNLr[i][j][k][l] = (dcomplex*)malloc(sizeof(dcomplex)*(2*List_YOUSO[35]+1));
315 }
316 }
317 }
318 }
319
320 TmpNLt = (dcomplex*****)malloc(sizeof(dcomplex****)*(List_YOUSO[25]+1));
321 for (i=0; i<(List_YOUSO[25]+1); i++){
322 TmpNLt[i] = (dcomplex****)malloc(sizeof(dcomplex***)*List_YOUSO[24]);
323 for (j=0; j<List_YOUSO[24]; j++){
324 TmpNLt[i][j] = (dcomplex***)malloc(sizeof(dcomplex**)*(2*(List_YOUSO[25]+1)+1));
325 for (k=0; k<(2*(List_YOUSO[25]+1)+1); k++){
326 TmpNLt[i][j][k] = (dcomplex**)malloc(sizeof(dcomplex*)*Num_RVNA);
327 for (l=0; l<Num_RVNA; l++){
328 TmpNLt[i][j][k][l] = (dcomplex*)malloc(sizeof(dcomplex)*(2*List_YOUSO[35]+1));
329 }
330 }
331 }
332 }
333
334 TmpNLp = (dcomplex*****)malloc(sizeof(dcomplex****)*(List_YOUSO[25]+1));
335 for (i=0; i<(List_YOUSO[25]+1); i++){
336 TmpNLp[i] = (dcomplex****)malloc(sizeof(dcomplex***)*List_YOUSO[24]);
337 for (j=0; j<List_YOUSO[24]; j++){
338 TmpNLp[i][j] = (dcomplex***)malloc(sizeof(dcomplex**)*(2*(List_YOUSO[25]+1)+1));
339 for (k=0; k<(2*(List_YOUSO[25]+1)+1); k++){
340 TmpNLp[i][j][k] = (dcomplex**)malloc(sizeof(dcomplex*)*Num_RVNA);
341 for (l=0; l<Num_RVNA; l++){
342 TmpNLp[i][j][k][l] = (dcomplex*)malloc(sizeof(dcomplex)*(2*List_YOUSO[35]+1));
343 }
344 }
345 }
346 }
347
348 SumNL0 = (double***)malloc(sizeof(double**)*(List_YOUSO[25]+1));
349 for (i=0; i<(List_YOUSO[25]+1); i++){
350 SumNL0[i] = (double**)malloc(sizeof(double*)*List_YOUSO[24]);
351 for (j=0; j<List_YOUSO[24]; j++){
352 SumNL0[i][j] = (double*)malloc(sizeof(double)*Num_RVNA);
353 }
354 }
355
356 SumNLr0 = (double***)malloc(sizeof(double**)*(List_YOUSO[25]+1));
357 for (i=0; i<(List_YOUSO[25]+1); i++){
358 SumNLr0[i] = (double**)malloc(sizeof(double*)*List_YOUSO[24]);
359 for (j=0; j<List_YOUSO[24]; j++){
360 SumNLr0[i][j] = (double*)malloc(sizeof(double)*Num_RVNA);
361 }
362 }
363
364 CmatNL0 = (dcomplex**)malloc(sizeof(dcomplex*)*(2*(List_YOUSO[25]+1)+1));
365 for (i=0; i<(2*(List_YOUSO[25]+1)+1); i++){
366 CmatNL0[i] = (dcomplex*)malloc(sizeof(dcomplex)*(2*List_YOUSO[35]+1));
367 }
368
369 CmatNLr = (dcomplex**)malloc(sizeof(dcomplex*)*(2*(List_YOUSO[25]+1)+1));
370 for (i=0; i<(2*(List_YOUSO[25]+1)+1); i++){
371 CmatNLr[i] = (dcomplex*)malloc(sizeof(dcomplex)*(2*List_YOUSO[35]+1));
372 }
373
374 CmatNLt = (dcomplex**)malloc(sizeof(dcomplex*)*(2*(List_YOUSO[25]+1)+1));
375 for (i=0; i<(2*(List_YOUSO[25]+1)+1); i++){
376 CmatNLt[i] = (dcomplex*)malloc(sizeof(dcomplex)*(2*List_YOUSO[35]+1));
377 }
378
379 CmatNLp = (dcomplex**)malloc(sizeof(dcomplex*)*(2*(List_YOUSO[25]+1)+1));
380 for (i=0; i<(2*(List_YOUSO[25]+1)+1); i++){
381 CmatNLp[i] = (dcomplex*)malloc(sizeof(dcomplex)*(2*List_YOUSO[35]+1));
382 }
383
384 Bes00 = (double*)malloc(sizeof(double)*GL_Mesh);
385 Bes01 = (double*)malloc(sizeof(double)*GL_Mesh);
386
387 /* get info. on OpenMP */
388
389 OMPID = omp_get_thread_num();
390 Nthrds = omp_get_num_threads();
391 Nprocs = omp_get_num_procs();
392
393 /* one-dimensionalized loop */
394
395 for (Nloop=OMPID*OneD_Nloop/Nthrds; Nloop<(OMPID+1)*OneD_Nloop/Nthrds; Nloop++){
396
397 dtime(&Stime_atom);
398
399 /* get Mc_AN and h_AN */
400
401 Mc_AN = OneD2Mc_AN[Nloop];
402 h_AN = OneD2h_AN[Nloop];
403
404 /* set data on Mc_AN */
405
406 Gc_AN = M2G[Mc_AN];
407 Cwan = WhatSpecies[Gc_AN];
408 rcutA = Spe_Atom_Cut1[Cwan];
409
410 /* set data on h_AN */
411
412 Gh_AN = natn[Gc_AN][h_AN];
413 Rnh = ncn[Gc_AN][h_AN];
414 Hwan = WhatSpecies[Gh_AN];
415 rcutB = Spe_Atom_Cut1[Hwan];
416 rcut = rcutA + rcutB;
417
418 dx = Gxyz[Gh_AN][1] + atv[Rnh][1] - Gxyz[Gc_AN][1];
419 dy = Gxyz[Gh_AN][2] + atv[Rnh][2] - Gxyz[Gc_AN][2];
420 dz = Gxyz[Gh_AN][3] + atv[Rnh][3] - Gxyz[Gc_AN][3];
421
422 xyz2spherical(dx,dy,dz,0.0,0.0,0.0,S_coordinate);
423 r = S_coordinate[0];
424 theta = S_coordinate[1];
425 phi = S_coordinate[2];
426
427 /* for empty atoms or finite elemens basis */
428
429 if (r<1.0e-10) r = 1.0e-10;
430
431 /* precalculation of sin and cos */
432
433 siT = sin(theta);
434 coT = cos(theta);
435 siP = sin(phi);
436 coP = cos(phi);
437
438 /****************************************************
439 evaluate ovelap integrals <chi0|P> between PAOs
440 and progectors of nonlocal VNA potentials.
441 ****************************************************/
442 /****************************************************
443 \int RL(k)*RL'(k)*jl(k*R) k^2 dk^3
444 ****************************************************/
445
446 kmin = Radial_kmin;
447 kmax = PAO_Nkmax;
448 Sk = kmax + kmin;
449 Dk = kmax - kmin;
450
451 for (L0=0; L0<=Spe_MaxL_Basis[Cwan]; L0++){
452 for (Mul0=0; Mul0<Spe_Num_Basis[Cwan][L0]; Mul0++){
453 for (L=0; L<Num_RVNA; L++){
454 L1 = VNA_List[L];
455 for (M0=-L0; M0<=L0; M0++){
456 for (M1=-L1; M1<=L1; M1++){
457 TmpNL[L0][Mul0][L0+M0][L][L1+M1] = Complex(0.0,0.0);
458 TmpNLr[L0][Mul0][L0+M0][L][L1+M1] = Complex(0.0,0.0);
459 TmpNLt[L0][Mul0][L0+M0][L][L1+M1] = Complex(0.0,0.0);
460 TmpNLp[L0][Mul0][L0+M0][L][L1+M1] = Complex(0.0,0.0);
461 }
462 }
463 }
464 }
465 }
466
467 Lmax = -10;
468 for (L=0; L<Num_RVNA; L++){
469 if (Lmax<VNA_List[L]) Lmax = VNA_List[L];
470 }
471 if (Spe_MaxL_Basis[Cwan]<Lmax)
472 Lmax_Four_Int = 2*Lmax;
473 else
474 Lmax_Four_Int = 2*Spe_MaxL_Basis[Cwan];
475
476 /* calculate SphB and SphBp */
477 #ifdef kcomp
478 #else
479 #pragma forceinline recursive
480 #endif
481 for (i=0; i<GL_Mesh; i++){
482 Normk = GL_NormK[i];
483 Spherical_Bessel2(Normk*r,Lmax_Four_Int,tmp_SphB,tmp_SphBp);
484 for(LL=0; LL<=Lmax_Four_Int; LL++){
485 SphB[LL][i] = tmp_SphB[LL];
486 SphBp[LL][i] = tmp_SphBp[LL];
487 }
488 }
489
490 /* LL loop */
491
492 for(LL=0; LL<=Lmax_Four_Int; LL++){
493
494 for (L0=0; L0<=Spe_MaxL_Basis[Cwan]; L0++){
495 for (Mul0=0; Mul0<Spe_Num_Basis[Cwan][L0]; Mul0++){
496 for (L=0; L<Num_RVNA; L++){
497 SumNL0[L0][Mul0][L] = 0.0;
498 SumNLr0[L0][Mul0][L] = 0.0;
499 }
500 }
501 }
502
503 /* Gauss-Legendre quadrature */
504
505 if (measure_time) dtime(&stime);
506
507 for (L0=0; L0<=Spe_MaxL_Basis[Cwan]; L0++){
508 for (Mul0=0; Mul0<Spe_Num_Basis[Cwan][L0]; Mul0++){
509
510 for (i=0; i<GL_Mesh; i++){
511 Bes00[i] = Bessel_Pro00[Cwan][L0][Mul0][i]*SphB[LL][i];
512 Bes01[i] = Bessel_Pro01[Cwan][L0][Mul0][i]*SphBp[LL][i];
513 }
514
515 L = 0;
516 for (L1=0; L1<=List_YOUSO[35]; L1++){
517 for (Mul1=0; Mul1<List_YOUSO[34]; Mul1++){
518
519 tmp1 = 0.0;
520 tmp2 = 0.0;
521
522 for (i=0; i<GL_Mesh; i++){
523 tmp1 += Bes00[i]*Spe_VNA_Bessel[Hwan][L1][Mul1][i];
524 tmp2 += Bes01[i]*Spe_VNA_Bessel[Hwan][L1][Mul1][i];
525 }
526
527 SumNL0[L0][Mul0][L] = tmp1;
528 SumNLr0[L0][Mul0][L] = tmp2;
529
530 L++;
531 }
532 }
533 }
534 }
535
536 if (measure_time && OMPID==0){
537 dtime(&etime);
538 time1 += etime - stime;
539 }
540
541 /* derivatives of "on site" */
542
543 if (h_AN==0){
544 for (L0=0; L0<=Spe_MaxL_Basis[Cwan]; L0++){
545 for (Mul0=0; Mul0<Spe_Num_Basis[Cwan][L0]; Mul0++){
546 for (L=0; L<Num_RVNA; L++){
547 SumNLr0[L0][Mul0][L] = 0.0;
548 }
549 }
550 }
551 }
552
553 /****************************************************
554 for "overlap",
555 sum_m 8*(-i)^{-L0+L1+l}*
556 C_{L0,-M0,L1,M1,l,m}*
557 \int RL(k)*RL'(k)*jl(k*R) k^2 dk^3,
558 ****************************************************/
559
560 if (measure_time && OMPID==0) dtime(&stime);
561
562 for(m=-LL; m<=LL; m++){
563
564 ComplexSH(LL,m,theta,phi,SH,dSHt,dSHp);
565
566 SH[1] = -SH[1];
567 dSHt[1] = -dSHt[1];
568 dSHp[1] = -dSHp[1];
569
570 CY = Complex(SH[0],SH[1]);
571 CYt = Complex(dSHt[0],dSHt[1]);
572 CYp = Complex(dSHp[0],dSHp[1]);
573
574 for (L0=0; L0<=Spe_MaxL_Basis[Cwan]; L0++){
575 for (Mul0=0; Mul0<Spe_Num_Basis[Cwan][L0]; Mul0++){
576 for (L=0; L<Num_RVNA; L++){
577
578 L1 = VNA_List[L];
579 Ls = -L0 + L1 + LL;
580
581 if ( abs(L1-LL)<=L0 && L0<=(L1+LL) ){
582
583 Cpow = Im_pow(-1,Ls);
584 CY1 = Cmul(Cpow,CY);
585 CYt1 = Cmul(Cpow,CYt);
586 CYp1 = Cmul(Cpow,CYp);
587
588 for (M0=-L0; M0<=L0; M0++){
589
590 M1 = M0 - m;
591
592 if (abs(M1)<=L1){
593
594 gant = Gaunt(L0,M0,L1,M1,LL,m);
595 tmp2 = gant*SumNL0[L0][Mul0][L];
596
597 /* S */
598
599 TmpNL[L0][Mul0][L0+M0][L][L1+M1].r += CY1.r*tmp2;
600 TmpNL[L0][Mul0][L0+M0][L][L1+M1].i += CY1.i*tmp2;
601
602 /* dS/dr */
603
604 tmp0 = gant*SumNLr0[L0][Mul0][L];
605
606 TmpNLr[L0][Mul0][L0+M0][L][L1+M1].r += CY1.r*tmp0;
607 TmpNLr[L0][Mul0][L0+M0][L][L1+M1].i += CY1.i*tmp0;
608
609 /* dS/dt */
610
611 TmpNLt[L0][Mul0][L0+M0][L][L1+M1].r += CYt1.r*tmp2;
612 TmpNLt[L0][Mul0][L0+M0][L][L1+M1].i += CYt1.i*tmp2;
613
614 /* dS/dp */
615
616 TmpNLp[L0][Mul0][L0+M0][L][L1+M1].r += CYp1.r*tmp2;
617 TmpNLp[L0][Mul0][L0+M0][L][L1+M1].i += CYp1.i*tmp2;
618
619 }
620 }
621 }
622
623 }
624 }
625 }
626 } /* m */
627
628 if (measure_time && OMPID==0){
629 dtime(&etime);
630 time2 += etime - stime;
631 }
632
633 } /* LL */
634
635 /****************************************************
636 Complex to Real
637 ****************************************************/
638
639 if (measure_time) dtime(&stime);
640
641 num0 = 0;
642 for (L0=0; L0<=Spe_MaxL_Basis[Cwan]; L0++){
643 for (Mul0=0; Mul0<Spe_Num_Basis[Cwan][L0]; Mul0++){
644
645 num1 = 0;
646 for (L=0; L<Num_RVNA; L++){
647 L1 = VNA_List[L];
648
649 for (M0=-L0; M0<=L0; M0++){
650 for (M1=-L1; M1<=L1; M1++){
651
652 CsumNL0 = Complex(0.0,0.0);
653 CsumNLr = Complex(0.0,0.0);
654 CsumNLt = Complex(0.0,0.0);
655 CsumNLp = Complex(0.0,0.0);
656
657 for (k=-L0; k<=L0; k++){
658
659 Ctmp1 = Conjg(Comp2Real[L0][L0+M0][L0+k]);
660
661 /* S */
662
663 Ctmp0 = TmpNL[L0][Mul0][L0+k][L][L1+M1];
664 Ctmp2 = Cmul(Ctmp1,Ctmp0);
665 CsumNL0 = Cadd(CsumNL0,Ctmp2);
666
667 /* dS/dr */
668
669 Ctmp0 = TmpNLr[L0][Mul0][L0+k][L][L1+M1];
670 Ctmp2 = Cmul(Ctmp1,Ctmp0);
671 CsumNLr = Cadd(CsumNLr,Ctmp2);
672
673 /* dS/dt */
674
675 Ctmp0 = TmpNLt[L0][Mul0][L0+k][L][L1+M1];
676 Ctmp2 = Cmul(Ctmp1,Ctmp0);
677 CsumNLt = Cadd(CsumNLt,Ctmp2);
678
679 /* dS/dp */
680
681 Ctmp0 = TmpNLp[L0][Mul0][L0+k][L][L1+M1];
682 Ctmp2 = Cmul(Ctmp1,Ctmp0);
683 CsumNLp = Cadd(CsumNLp,Ctmp2);
684
685 }
686
687 CmatNL0[L0+M0][L1+M1] = CsumNL0;
688 CmatNLr[L0+M0][L1+M1] = CsumNLr;
689 CmatNLt[L0+M0][L1+M1] = CsumNLt;
690 CmatNLp[L0+M0][L1+M1] = CsumNLp;
691
692 }
693 }
694
695 for (M0=-L0; M0<=L0; M0++){
696 for (M1=-L1; M1<=L1; M1++){
697
698 CsumNL0 = Complex(0.0,0.0);
699 CsumNLr = Complex(0.0,0.0);
700 CsumNLt = Complex(0.0,0.0);
701 CsumNLp = Complex(0.0,0.0);
702
703 for (k=-L1; k<=L1; k++){
704
705 /* S */
706
707 CsumNL0.r += (CmatNL0[L0+M0][L1+k].r*Comp2Real[L1][L1+M1][L1+k].r
708 - CmatNL0[L0+M0][L1+k].i*Comp2Real[L1][L1+M1][L1+k].i);
709
710 CsumNL0.i += (CmatNL0[L0+M0][L1+k].r*Comp2Real[L1][L1+M1][L1+k].i
711 + CmatNL0[L0+M0][L1+k].i*Comp2Real[L1][L1+M1][L1+k].r);
712
713 /* dS/dr */
714
715 CsumNLr.r += (CmatNLr[L0+M0][L1+k].r*Comp2Real[L1][L1+M1][L1+k].r
716 - CmatNLr[L0+M0][L1+k].i*Comp2Real[L1][L1+M1][L1+k].i);
717
718 CsumNLr.i += (CmatNLr[L0+M0][L1+k].r*Comp2Real[L1][L1+M1][L1+k].i
719 + CmatNLr[L0+M0][L1+k].i*Comp2Real[L1][L1+M1][L1+k].r);
720
721 /* dS/dt */
722
723 CsumNLt.r += (CmatNLt[L0+M0][L1+k].r*Comp2Real[L1][L1+M1][L1+k].r
724 - CmatNLt[L0+M0][L1+k].i*Comp2Real[L1][L1+M1][L1+k].i);
725
726 CsumNLt.i += (CmatNLt[L0+M0][L1+k].r*Comp2Real[L1][L1+M1][L1+k].i
727 + CmatNLt[L0+M0][L1+k].i*Comp2Real[L1][L1+M1][L1+k].r);
728
729 /* dS/dp */
730
731 CsumNLp.r += (CmatNLp[L0+M0][L1+k].r*Comp2Real[L1][L1+M1][L1+k].r
732 - CmatNLp[L0+M0][L1+k].i*Comp2Real[L1][L1+M1][L1+k].i);
733
734 CsumNLp.i += (CmatNLp[L0+M0][L1+k].r*Comp2Real[L1][L1+M1][L1+k].i
735 + CmatNLp[L0+M0][L1+k].i*Comp2Real[L1][L1+M1][L1+k].r);
736
737 }
738
739 DS_VNA[0][Mc_AN][h_AN][num0+L0+M0][num1+L1+M1] = 8.0*(Type_DS_VNA)CsumNL0.r;
740
741 if (h_AN!=0){
742
743 if (fabs(siT)<10e-14){
744
745 DS_VNA[1][Mc_AN][h_AN][num0+L0+M0][num1+L1+M1] =
746 -8.0*(Type_DS_VNA)(siT*coP*CsumNLr.r + coT*coP/r*CsumNLt.r);
747
748 DS_VNA[2][Mc_AN][h_AN][num0+L0+M0][num1+L1+M1] =
749 -8.0*(Type_DS_VNA)(siT*siP*CsumNLr.r + coT*siP/r*CsumNLt.r);
750
751 DS_VNA[3][Mc_AN][h_AN][num0+L0+M0][num1+L1+M1] =
752 -8.0*(Type_DS_VNA)(coT*CsumNLr.r - siT/r*CsumNLt.r);
753 }
754
755 else{
756
757 DS_VNA[1][Mc_AN][h_AN][num0+L0+M0][num1+L1+M1] =
758 -8.0*(Type_DS_VNA)(siT*coP*CsumNLr.r + coT*coP/r*CsumNLt.r
759 - siP/siT/r*CsumNLp.r);
760
761 DS_VNA[2][Mc_AN][h_AN][num0+L0+M0][num1+L1+M1] =
762 -8.0*(Type_DS_VNA)(siT*siP*CsumNLr.r + coT*siP/r*CsumNLt.r
763 + coP/siT/r*CsumNLp.r);
764
765 DS_VNA[3][Mc_AN][h_AN][num0+L0+M0][num1+L1+M1] =
766 -8.0*(Type_DS_VNA)(coT*CsumNLr.r - siT/r*CsumNLt.r);
767 }
768
769 }
770
771 else{
772 DS_VNA[1][Mc_AN][h_AN][num0+L0+M0][num1+L1+M1] = 0.0;
773 DS_VNA[2][Mc_AN][h_AN][num0+L0+M0][num1+L1+M1] = 0.0;
774 DS_VNA[3][Mc_AN][h_AN][num0+L0+M0][num1+L1+M1] = 0.0;
775 }
776
777 }
778 }
779
780 num1 = num1 + 2*L1 + 1;
781 }
782
783 num0 = num0 + 2*L0 + 1;
784 } /* M0 */
785 } /* L0 */
786
787 if (measure_time && OMPID==0){
788 dtime(&etime);
789 time3 += etime - stime;
790 }
791
792 dtime(&Etime_atom);
793 time_per_atom[Gc_AN] += Etime_atom - Stime_atom;
794
795 } /* Mc_AN */
796
797 /* freeing of arrays */
798
799 for (i=0; i<(2*(List_YOUSO[25]+1)+1); i++){
800 free(CmatNL0[i]);
801 }
802 free(CmatNL0);
803
804 for (i=0; i<(2*(List_YOUSO[25]+1)+1); i++){
805 free(CmatNLr[i]);
806 }
807 free(CmatNLr);
808
809 for (i=0; i<(2*(List_YOUSO[25]+1)+1); i++){
810 free(CmatNLt[i]);
811 }
812 free(CmatNLt);
813
814 for (i=0; i<(2*(List_YOUSO[25]+1)+1); i++){
815 free(CmatNLp[i]);
816 }
817 free(CmatNLp);
818
819 free(Bes00);
820 free(Bes01);
821
822 for (i=0; i<(List_YOUSO[25]+1); i++){
823 for (j=0; j<List_YOUSO[24]; j++){
824 free(SumNLr0[i][j]);
825 }
826 free(SumNLr0[i]);
827 }
828 free(SumNLr0);
829
830 for (i=0; i<(List_YOUSO[25]+1); i++){
831 for (j=0; j<List_YOUSO[24]; j++){
832 free(SumNL0[i][j]);
833 }
834 free(SumNL0[i]);
835 }
836 free(SumNL0);
837
838 for (i=0; i<(List_YOUSO[25]+1); i++){
839 for (j=0; j<List_YOUSO[24]; j++){
840 for (k=0; k<(2*(List_YOUSO[25]+1)+1); k++){
841 for (l=0; l<Num_RVNA; l++){
842 free(TmpNLp[i][j][k][l]);
843 }
844 free(TmpNLp[i][j][k]);
845 }
846 free(TmpNLp[i][j]);
847 }
848 free(TmpNLp[i]);
849 }
850 free(TmpNLp);
851
852 for (i=0; i<(List_YOUSO[25]+1); i++){
853 for (j=0; j<List_YOUSO[24]; j++){
854 for (k=0; k<(2*(List_YOUSO[25]+1)+1); k++){
855 for (l=0; l<Num_RVNA; l++){
856 free(TmpNLt[i][j][k][l]);
857 }
858 free(TmpNLt[i][j][k]);
859 }
860 free(TmpNLt[i][j]);
861 }
862 free(TmpNLt[i]);
863 }
864 free(TmpNLt);
865
866 for (i=0; i<(List_YOUSO[25]+1); i++){
867 for (j=0; j<List_YOUSO[24]; j++){
868 for (k=0; k<(2*(List_YOUSO[25]+1)+1); k++){
869 for (l=0; l<Num_RVNA; l++){
870 free(TmpNLr[i][j][k][l]);
871 }
872 free(TmpNLr[i][j][k]);
873 }
874 free(TmpNLr[i][j]);
875 }
876 free(TmpNLr[i]);
877 }
878 free(TmpNLr);
879
880 for (i=0; i<(List_YOUSO[25]+1); i++){
881 for (j=0; j<List_YOUSO[24]; j++){
882 for (k=0; k<(2*(List_YOUSO[25]+1)+1); k++){
883 for (l=0; l<Num_RVNA; l++){
884 free(TmpNL[i][j][k][l]);
885 }
886 free(TmpNL[i][j][k]);
887 }
888 free(TmpNL[i][j]);
889 }
890 free(TmpNL[i]);
891 }
892 free(TmpNL);
893
894 #pragma omp flush(DS_VNA)
895
896 } /* #pragma omp parallel */
897
898 /*******************************************************
899 *******************************************************
900 multiplying overlap integrals WITH COMMUNICATION
901
902 MPI: communicate only for k=0
903 DS_VNA
904 *******************************************************
905 *******************************************************/
906
907 MPI_Barrier(mpi_comm_level1);
908 if (measure_time) dtime(&stime);
909
910 /* allocation of array */
911
912 NLH = (double**)malloc(sizeof(double*)*List_YOUSO[7]);
913 for (i=0; i<List_YOUSO[7]; i++){
914 NLH[i] = (double*)malloc(sizeof(double)*List_YOUSO[7]);
915 }
916
917 for (ID=0; ID<numprocs; ID++){
918 F_Snd_Num_WK[ID] = 0;
919 F_Rcv_Num_WK[ID] = 0;
920 }
921
922 do {
923
924 /***********************************
925 set the size of data
926 ************************************/
927
928 for (ID=0; ID<numprocs; ID++){
929
930 IDS = (myid + ID) % numprocs;
931 IDR = (myid - ID + numprocs) % numprocs;
932
933 /* find the data size to send the block data */
934
935 if ( 0<(F_Snd_Num[IDS]-F_Snd_Num_WK[IDS]) ){
936
937 size1 = 0;
938 n = F_Snd_Num_WK[IDS];
939
940 Mc_AN = Snd_MAN[IDS][n];
941 Gc_AN = Snd_GAN[IDS][n];
942 Cwan = WhatSpecies[Gc_AN];
943 tno1 = Spe_Total_NO[Cwan];
944 for (h_AN=0; h_AN<=FNAN[Gc_AN]; h_AN++){
945 Gh_AN = natn[Gc_AN][h_AN];
946 Hwan = WhatSpecies[Gh_AN];
947 tno2 = (List_YOUSO[35]+1)*(List_YOUSO[35]+1)*List_YOUSO[34];
948 size1 += tno1*tno2;
949 }
950
951 Snd_DS_VNA_Size[IDS] = size1;
952 MPI_Isend(&size1, 1, MPI_INT, IDS, tag, mpi_comm_level1, &request);
953 }
954 else{
955 Snd_DS_VNA_Size[IDS] = 0;
956 }
957
958 /* receiving of the size of the data */
959
960 if ( 0<(F_Rcv_Num[IDR]-F_Rcv_Num_WK[IDR]) ){
961 MPI_Recv(&size2, 1, MPI_INT, IDR, tag, mpi_comm_level1, &stat);
962 Rcv_DS_VNA_Size[IDR] = size2;
963 }
964 else{
965 Rcv_DS_VNA_Size[IDR] = 0;
966 }
967
968 if ( 0<(F_Snd_Num[IDS]-F_Snd_Num_WK[IDS]) ) MPI_Wait(&request,&stat);
969
970 } /* ID */
971
972 /***********************************
973 data transfer
974 ************************************/
975
976 for (ID=0; ID<numprocs; ID++){
977
978 IDS = (myid + ID) % numprocs;
979 IDR = (myid - ID + numprocs) % numprocs;
980
981 /******************************
982 sending of the data
983 ******************************/
984
985 if ( 0<(F_Snd_Num[IDS]-F_Snd_Num_WK[IDS]) ){
986
987 size1 = Snd_DS_VNA_Size[IDS];
988
989 /*
990 printf("S myid=%2d ID=%2d IDS=%2d IDR=%2d size1=%2d\n",myid,ID,IDS,IDR,size1);fflush(stdout);
991 */
992
993 /* allocation of the array */
994
995 tmp_array = (Type_DS_VNA*)malloc(sizeof(Type_DS_VNA)*size1);
996
997 /* multidimentional array to the vector array */
998
999 num = 0;
1000 n = F_Snd_Num_WK[IDS];
1001
1002 Mc_AN = Snd_MAN[IDS][n];
1003 Gc_AN = Snd_GAN[IDS][n];
1004 Cwan = WhatSpecies[Gc_AN];
1005 tno1 = Spe_Total_NO[Cwan];
1006 for (h_AN=0; h_AN<=FNAN[Gc_AN]; h_AN++){
1007 Gh_AN = natn[Gc_AN][h_AN];
1008 Hwan = WhatSpecies[Gh_AN];
1009 tno2 = (List_YOUSO[35]+1)*(List_YOUSO[35]+1)*List_YOUSO[34];
1010
1011 for (i=0; i<tno1; i++){
1012 for (j=0; j<tno2; j++){
1013 tmp_array[num] = DS_VNA[0][Mc_AN][h_AN][i][j];
1014 num++;
1015 }
1016 }
1017 }
1018
1019 MPI_Isend(&tmp_array[0], size1, MPI_Type_DS_VNA, IDS, tag, mpi_comm_level1, &request);
1020
1021 }
1022
1023 /******************************
1024 receiving of the block data
1025 ******************************/
1026
1027 if ( 0<(F_Rcv_Num[IDR]-F_Rcv_Num_WK[IDR]) ){
1028
1029 size2 = Rcv_DS_VNA_Size[IDR];
1030
1031 /*
1032 printf("R myid=%2d ID=%2d IDS=%2d IDR=%2d size2=%2d\n",myid,ID,IDS,IDR,size2);fflush(stdout);
1033 */
1034
1035 tmp_array2 = (Type_DS_VNA*)malloc(sizeof(Type_DS_VNA)*size2);
1036 MPI_Recv(&tmp_array2[0], size2, MPI_Type_DS_VNA, IDR, tag, mpi_comm_level1, &stat);
1037
1038 /* store */
1039
1040 num = 0;
1041 n = F_Rcv_Num_WK[IDR];
1042 Original_Mc_AN = F_TopMAN[IDR] + n;
1043
1044 Gc_AN = Rcv_GAN[IDR][n];
1045 Cwan = WhatSpecies[Gc_AN];
1046 tno1 = Spe_Total_NO[Cwan];
1047 for (h_AN=0; h_AN<=FNAN[Gc_AN]; h_AN++){
1048 Gh_AN = natn[Gc_AN][h_AN];
1049 Hwan = WhatSpecies[Gh_AN];
1050 tno2 = (List_YOUSO[35]+1)*(List_YOUSO[35]+1)*List_YOUSO[34];
1051 for (i=0; i<tno1; i++){
1052 for (j=0; j<tno2; j++){
1053 DS_VNA[0][Matomnum+1][h_AN][i][j] = tmp_array2[num];
1054 num++;
1055 }
1056 }
1057 }
1058
1059 /* free tmp_array2 */
1060 free(tmp_array2);
1061
1062 /*****************************************
1063 multiplying overlap integrals
1064 *****************************************/
1065
1066 for (Mc_AN=1; Mc_AN<=Matomnum; Mc_AN++){
1067
1068 dtime(&Stime_atom);
1069
1070 Gc_AN = M2G[Mc_AN];
1071 Cwan = WhatSpecies[Gc_AN];
1072 fan = FNAN[Gc_AN];
1073 rcutA = Spe_Atom_Cut1[Cwan];
1074
1075 n = F_Rcv_Num_WK[IDR];
1076 jg = Rcv_GAN[IDR][n];
1077
1078 for (j0=0; j0<=fan; j0++){
1079
1080 jg0 = natn[Gc_AN][j0];
1081 Mj_AN0 = F_G2M[jg0];
1082
1083 po = 0;
1084 if (Original_Mc_AN==Mj_AN0){
1085 po = 1;
1086 j = j0;
1087 }
1088
1089 if (po==1){
1090
1091 Hwan = WhatSpecies[jg];
1092 rcutB = Spe_Atom_Cut1[Hwan];
1093 rcut = rcutA + rcutB;
1094
1095 for (k=0; k<=fan; k++){
1096
1097 kg = natn[Gc_AN][k];
1098 wakg = WhatSpecies[kg];
1099 kl = RMI1[Mc_AN][j][k];
1100
1101 if (0<=kl){
1102 #pragma omp parallel for private(m, n, sum, L ,L1, GL, Mul1, ene, L2 ,tmp0, L3) default(shared) if(Spe_Total_NO[Cwan] > 8)
1103 for (m=0; m<Spe_Total_NO[Cwan]; m++){
1104 for (n=0; n<Spe_Total_NO[Hwan]; n++){
1105
1106 sum = 0.0;
1107 L = 0;
1108
1109 for (L1=0; L1<Num_RVNA; L1++){
1110
1111 GL = VNA_List[L1];
1112 Mul1 = VNA_List2[L1];
1113
1114 ene = VNA_proj_ene[wakg][GL][Mul1];
1115 L2 = 2*VNA_List[L1];
1116
1117 tmp0 = 0.0;
1118
1119 for (L3=0; L3<=L2; L3++){
1120
1121 tmp0 += DS_VNA[0][Mc_AN][k][m][L]*DS_VNA[0][Matomnum+1][kl][n][L];
1122
1123 L++;
1124 }
1125
1126 sum += ene*tmp0;
1127
1128 }
1129
1130 if (k==0) NLH[m][n] = sum;
1131 else NLH[m][n] += sum;
1132
1133 } /* n */
1134 } /* m */
1135 } /* if (0<=kl)*/
1136 } /* k */
1137
1138 /****************************************************
1139 NLH to HVNA
1140 ****************************************************/
1141
1142 dmp = dampingF(rcut,Dis[Gc_AN][j]);
1143
1144 for (i1=0; i1<Spe_Total_NO[Cwan]; i1++){
1145 for (j1=0; j1<Spe_Total_NO[Hwan]; j1++){
1146 HVNA[Mc_AN][j][i1][j1] = dmp*NLH[i1][j1];
1147 }
1148 }
1149
1150 } /* if (po==1) */
1151
1152 } /* j */
1153
1154 dtime(&Etime_atom);
1155 time_per_atom[Gc_AN] += Etime_atom - Stime_atom;
1156
1157 } /* Mc_AN */
1158
1159 /********************************************
1160 increment of F_Rcv_Num_WK[IDR]
1161 ********************************************/
1162
1163 F_Rcv_Num_WK[IDR]++;
1164
1165 }
1166
1167 if ( 0<(F_Snd_Num[IDS]-F_Snd_Num_WK[IDS]) ) {
1168 MPI_Wait(&request,&stat);
1169 free(tmp_array); /* freeing of array */
1170
1171 /********************************************
1172 increment of F_Snd_Num_WK[IDS]
1173 ********************************************/
1174
1175 F_Snd_Num_WK[IDS]++;
1176 }
1177
1178 } /* ID */
1179
1180 /*****************************************************
1181 check whether all the communications have finished
1182 *****************************************************/
1183
1184 po = 0;
1185 for (ID=0; ID<numprocs; ID++){
1186
1187 IDS = (myid + ID) % numprocs;
1188 IDR = (myid - ID + numprocs) % numprocs;
1189
1190 if ( 0<(F_Snd_Num[IDS]-F_Snd_Num_WK[IDS]) ) po += F_Snd_Num[IDS]-F_Snd_Num_WK[IDS];
1191 if ( 0<(F_Rcv_Num[IDR]-F_Rcv_Num_WK[IDR]) ) po += F_Rcv_Num[IDR]-F_Rcv_Num_WK[IDR];
1192 }
1193
1194 } while (po!=0);
1195
1196 /* freeing of arrays */
1197
1198 for (i=0; i<List_YOUSO[7]; i++){
1199 free(NLH[i]);
1200 }
1201 free(NLH);
1202
1203 if (measure_time){
1204 dtime(&etime);
1205 time4 += etime - stime;
1206 }
1207
1208 /*******************************************************
1209 *******************************************************
1210 multiplying overlap integrals WITHOUT COMMUNICATION
1211 *******************************************************
1212 *******************************************************/
1213
1214 if (measure_time) dtime(&stime);
1215
1216 #pragma omp parallel shared(List_YOUSO,time_per_atom,HVNA,Dis,DS_VNA,VNA_proj_ene,VNA_List2,VNA_List,Num_RVNA,Spe_Total_NO,RMI1,F_G2M,natn,Spe_Atom_Cut1,FNAN,WhatSpecies,M2G,Matomnum)
1217 {
1218
1219 int OMPID,Nthrds,Nprocs;
1220 int Mc_AN,Gc_AN,Cwan,fan,Mj_AN,Hwan;
1221 int L,L1,GL,Mul1,L2,L3,i1,j1;
1222 int k,kg,wakg,kl,i,j,jg,m,n;
1223 double **NLH;
1224 double rcutA,rcutB,rcut,sum,ene,tmp0,dmp;
1225 double Stime_atom,Etime_atom;
1226
1227 /* allocation of array */
1228
1229 NLH = (double**)malloc(sizeof(double*)*List_YOUSO[7]);
1230 for (i=0; i<List_YOUSO[7]; i++){
1231 NLH[i] = (double*)malloc(sizeof(double)*List_YOUSO[7]);
1232 }
1233
1234 /* get info. on OpenMP */
1235
1236 OMPID = omp_get_thread_num();
1237 Nthrds = omp_get_num_threads();
1238 Nprocs = omp_get_num_procs();
1239
1240 for (Mc_AN=(OMPID*Matomnum/Nthrds+1); Mc_AN<((OMPID+1)*Matomnum/Nthrds+1); Mc_AN++){
1241
1242 dtime(&Stime_atom);
1243
1244 Gc_AN = M2G[Mc_AN];
1245 Cwan = WhatSpecies[Gc_AN];
1246 fan = FNAN[Gc_AN];
1247 rcutA = Spe_Atom_Cut1[Cwan];
1248
1249 for (j=0; j<=fan; j++){
1250
1251 jg = natn[Gc_AN][j];
1252 Mj_AN = F_G2M[jg];
1253
1254 if (Mj_AN<=Matomnum){
1255
1256 Hwan = WhatSpecies[jg];
1257 rcutB = Spe_Atom_Cut1[Hwan];
1258 rcut = rcutA + rcutB;
1259
1260 for (k=0; k<=fan; k++){
1261
1262 kg = natn[Gc_AN][k];
1263 wakg = WhatSpecies[kg];
1264 kl = RMI1[Mc_AN][j][k];
1265
1266 if (0<=kl){
1267
1268 for (m=0; m<Spe_Total_NO[Cwan]; m++){
1269 for (n=0; n<Spe_Total_NO[Hwan]; n++){
1270
1271 sum = 0.0;
1272 L = 0;
1273
1274 for (L1=0; L1<Num_RVNA; L1++){
1275
1276 GL = VNA_List[L1];
1277 Mul1 = VNA_List2[L1];
1278
1279 ene = VNA_proj_ene[wakg][GL][Mul1];
1280 L2 = 2*VNA_List[L1];
1281
1282 tmp0 = 0.0;
1283
1284 for (L3=0; L3<=L2; L3++){
1285 tmp0 += DS_VNA[0][Mc_AN][k][m][L]*DS_VNA[0][Mj_AN][kl][n][L];
1286 L++;
1287 }
1288
1289 sum += ene*tmp0;
1290
1291 }
1292
1293 if (k==0) NLH[m][n] = sum;
1294 else NLH[m][n] += sum;
1295
1296 }
1297 }
1298 }
1299 } /* k */
1300
1301 /****************************************************
1302 NLH to HVNA
1303 ****************************************************/
1304
1305 dmp = dampingF(rcut,Dis[Gc_AN][j]);
1306
1307 for (i1=0; i1<Spe_Total_NO[Cwan]; i1++){
1308 for (j1=0; j1<Spe_Total_NO[Hwan]; j1++){
1309 HVNA[Mc_AN][j][i1][j1] = dmp*NLH[i1][j1];
1310 }
1311 }
1312
1313 } /* if (Mj_AN<=Matomnum) */
1314 } /* j */
1315
1316 dtime(&Etime_atom);
1317 time_per_atom[Gc_AN] += Etime_atom - Stime_atom;
1318
1319 } /* Mc_AN */
1320
1321 /* freeing of arrays */
1322
1323 for (i=0; i<List_YOUSO[7]; i++){
1324 free(NLH[i]);
1325 }
1326 free(NLH);
1327
1328 #pragma omp flush(HVNA)
1329
1330 } /* #pragma omp parallel */
1331
1332 if (measure_time){
1333 dtime(&etime);
1334 time5 += etime - stime;
1335 }
1336
1337 if (measure_time){
1338 printf("Set_ProExpn_VNA myid=%2d time1=%7.3f time2=%7.3f time3=%7.3f time4=%7.3f time5=%7.3f\n",
1339 myid,time1,time2,time3,time4,time5);fflush(stdout);
1340 }
1341
1342 /****************************************************
1343 freeing of arrays:
1344 ****************************************************/
1345
1346 free(Snd_DS_VNA_Size);
1347 free(Rcv_DS_VNA_Size);
1348
1349 free(VNA_List);
1350 free(VNA_List2);
1351
1352 for (spe=0; spe<SpeciesNum; spe++){
1353 for (L0=0; L0<=Spe_MaxL_Basis[spe]; L0++){
1354 for (Mul0=0; Mul0<Spe_Num_Basis[spe][L0]; Mul0++){
1355 free(Bessel_Pro00[spe][L0][Mul0]);
1356 }
1357 free(Bessel_Pro00[spe][L0]);
1358 }
1359 free(Bessel_Pro00[spe]);
1360 }
1361 free(Bessel_Pro00);
1362
1363 for (spe=0; spe<SpeciesNum; spe++){
1364 for (L0=0; L0<=Spe_MaxL_Basis[spe]; L0++){
1365 for (Mul0=0; Mul0<Spe_Num_Basis[spe][L0]; Mul0++){
1366 free(Bessel_Pro01[spe][L0][Mul0]);
1367 }
1368 free(Bessel_Pro01[spe][L0]);
1369 }
1370 free(Bessel_Pro01[spe]);
1371 }
1372 free(Bessel_Pro01);
1373
1374 free(OneD2Mc_AN);
1375 free(OneD2h_AN);
1376
1377 /* for time */
1378 dtime(&TEtime);
1379 time0 = TEtime - TStime;
1380
1381 return time0;
1382 }
1383
1384
1385
1386
Set_VNA2(double **** HVNA,double ***** HVNA2)1387 double Set_VNA2(double ****HVNA, double *****HVNA2)
1388 {
1389 /****************************************************
1390 Evaluate matrix elements of one-center (orbitals)
1391 but two-center integrals for neutral atom potentials
1392 in the momentum space.
1393
1394 <Phi_{LM,L'M'}|VNA>
1395 ****************************************************/
1396 static int firsttime=1;
1397 int L2,L3,L,GL,i,j;
1398 int k,kl,h_AN,Gh_AN,Rnh,Ls,n;
1399 int tno0,tno1,tno2,i1,j1,p;
1400 int Cwan,Hwan,fan,jg,kg,wakg,Lmax;
1401 int size_TmpHNA;
1402 int Mc_AN,Gc_AN,Mj_AN,num,size1,size2;
1403 double time0;
1404 double tmp2,tmp3,tmp4,tmp5;
1405 double TStime,TEtime;
1406 int Num_RVNA;
1407 /* for OpenMP */
1408 int OneD_Nloop,*OneD2Mc_AN,*OneD2h_AN;
1409
1410 dtime(&TStime);
1411
1412 /****************************************************
1413 allocation of arrays:
1414 ****************************************************/
1415
1416 size_TmpHNA = (List_YOUSO[25]+1)*List_YOUSO[24]*
1417 (2*(List_YOUSO[25]+1)+1)*
1418 (List_YOUSO[25]+1)*List_YOUSO[24]*
1419 (2*(List_YOUSO[25]+1)+1);
1420
1421 /* PrintMemory */
1422 if (firsttime) {
1423 PrintMemory("Set_ProExpn_VNA: TmpHNA",sizeof(double)*size_TmpHNA,NULL);
1424 PrintMemory("Set_ProExpn_VNA: TmpHNAr",sizeof(double)*size_TmpHNA,NULL);
1425 PrintMemory("Set_ProExpn_VNA: TmpHNAt",sizeof(double)*size_TmpHNA,NULL);
1426 PrintMemory("Set_ProExpn_VNA: TmpHNAp",sizeof(double)*size_TmpHNA,NULL);
1427
1428 firsttime=0;
1429 }
1430
1431 /* one-dimensionalize the Mc_AN and h_AN loops */
1432
1433 OneD_Nloop = 0;
1434 for (Mc_AN=1; Mc_AN<=Matomnum; Mc_AN++){
1435 Gc_AN = M2G[Mc_AN];
1436 for (h_AN=0; h_AN<=FNAN[Gc_AN]; h_AN++){
1437 OneD_Nloop++;
1438 }
1439 }
1440
1441 OneD2Mc_AN = (int*)malloc(sizeof(int)*(OneD_Nloop+1));
1442 OneD2h_AN = (int*)malloc(sizeof(int)*(OneD_Nloop+1));
1443
1444 OneD_Nloop = 0;
1445 for (Mc_AN=1; Mc_AN<=Matomnum; Mc_AN++){
1446 Gc_AN = M2G[Mc_AN];
1447 for (h_AN=0; h_AN<=FNAN[Gc_AN]; h_AN++){
1448 OneD2Mc_AN[OneD_Nloop] = Mc_AN;
1449 OneD2h_AN[OneD_Nloop] = h_AN;
1450 OneD_Nloop++;
1451 }
1452 }
1453
1454 #pragma omp parallel shared(List_YOUSO,time_per_atom,HVNA2,Comp2Real,GL_Weight,Spe_ProductRF_Bessel,Spe_CrudeVNA_Bessel,GL_NormK,Spe_Num_Basis,Spe_MaxL_Basis,PAO_Nkmax,atv,Gxyz,WhatSpecies,ncn,natn,M2G,OneD2h_AN,OneD2Mc_AN,OneD_Nloop)
1455 {
1456 int OMPID,Nthrds,Nprocs,Nloop;
1457 int Mc_AN,h_AN,Gc_AN,Cwan,Gh_AN;
1458 int Rnh,Hwan,L0,Mul0,L1,Mul1,M0,M1,LL;
1459 int Lmax_Four_Int,i,j,k,l,m,num0,num1;
1460 double dx,dy,dz;
1461 double siT,coT,siP,coP;
1462 double Normk,kmin,kmax,Sk,Dk;
1463 double gant,r,theta,phi;
1464 double SH[2],dSHt[2],dSHp[2];
1465 double S_coordinate[3];
1466 double Stime_atom,Etime_atom;
1467 double sum,sumr,sj,sjp,tmp0,tmp1,tmp10;
1468 double **SphB,**SphBp;
1469 double *tmp_SphB,*tmp_SphBp;
1470
1471 dcomplex Ctmp0,Ctmp1,Ctmp2,Cpow;
1472 dcomplex Csum,Csumr,Csumt,Csump;
1473 dcomplex CsumS0,CsumSr,CsumSt,CsumSp;
1474 dcomplex CY,CYt,CYp,CY1,CYt1,CYp1;
1475 dcomplex **CmatS0;
1476 dcomplex **CmatSr;
1477 dcomplex **CmatSt;
1478 dcomplex **CmatSp;
1479 dcomplex ******TmpHNA;
1480 dcomplex ******TmpHNAr;
1481 dcomplex ******TmpHNAt;
1482 dcomplex ******TmpHNAp;
1483
1484 /* allocation of arrays */
1485
1486 TmpHNA = (dcomplex******)malloc(sizeof(dcomplex*****)*(List_YOUSO[25]+1));
1487 for (i=0; i<(List_YOUSO[25]+1); i++){
1488 TmpHNA[i] = (dcomplex*****)malloc(sizeof(dcomplex****)*List_YOUSO[24]);
1489 for (j=0; j<List_YOUSO[24]; j++){
1490 TmpHNA[i][j] = (dcomplex****)malloc(sizeof(dcomplex***)*(2*(List_YOUSO[25]+1)+1));
1491 for (k=0; k<(2*(List_YOUSO[25]+1)+1); k++){
1492 TmpHNA[i][j][k] = (dcomplex***)malloc(sizeof(dcomplex**)*(List_YOUSO[25]+1));
1493 for (l=0; l<(List_YOUSO[25]+1); l++){
1494 TmpHNA[i][j][k][l] = (dcomplex**)malloc(sizeof(dcomplex*)*List_YOUSO[24]);
1495 for (m=0; m<List_YOUSO[24]; m++){
1496 TmpHNA[i][j][k][l][m] = (dcomplex*)malloc(sizeof(dcomplex)*(2*(List_YOUSO[25]+1)+1));
1497 }
1498 }
1499 }
1500 }
1501 }
1502
1503 TmpHNAr = (dcomplex******)malloc(sizeof(dcomplex*****)*(List_YOUSO[25]+1));
1504 for (i=0; i<(List_YOUSO[25]+1); i++){
1505 TmpHNAr[i] = (dcomplex*****)malloc(sizeof(dcomplex****)*List_YOUSO[24]);
1506 for (j=0; j<List_YOUSO[24]; j++){
1507 TmpHNAr[i][j] = (dcomplex****)malloc(sizeof(dcomplex***)*(2*(List_YOUSO[25]+1)+1));
1508 for (k=0; k<(2*(List_YOUSO[25]+1)+1); k++){
1509 TmpHNAr[i][j][k] = (dcomplex***)malloc(sizeof(dcomplex**)*(List_YOUSO[25]+1));
1510 for (l=0; l<(List_YOUSO[25]+1); l++){
1511 TmpHNAr[i][j][k][l] = (dcomplex**)malloc(sizeof(dcomplex*)*List_YOUSO[24]);
1512 for (m=0; m<List_YOUSO[24]; m++){
1513 TmpHNAr[i][j][k][l][m] = (dcomplex*)malloc(sizeof(dcomplex)*(2*(List_YOUSO[25]+1)+1));
1514 }
1515 }
1516 }
1517 }
1518 }
1519
1520 TmpHNAt = (dcomplex******)malloc(sizeof(dcomplex*****)*(List_YOUSO[25]+1));
1521 for (i=0; i<(List_YOUSO[25]+1); i++){
1522 TmpHNAt[i] = (dcomplex*****)malloc(sizeof(dcomplex****)*List_YOUSO[24]);
1523 for (j=0; j<List_YOUSO[24]; j++){
1524 TmpHNAt[i][j] = (dcomplex****)malloc(sizeof(dcomplex***)*(2*(List_YOUSO[25]+1)+1));
1525 for (k=0; k<(2*(List_YOUSO[25]+1)+1); k++){
1526 TmpHNAt[i][j][k] = (dcomplex***)malloc(sizeof(dcomplex**)*(List_YOUSO[25]+1));
1527 for (l=0; l<(List_YOUSO[25]+1); l++){
1528 TmpHNAt[i][j][k][l] = (dcomplex**)malloc(sizeof(dcomplex*)*List_YOUSO[24]);
1529 for (m=0; m<List_YOUSO[24]; m++){
1530 TmpHNAt[i][j][k][l][m] = (dcomplex*)malloc(sizeof(dcomplex)*(2*(List_YOUSO[25]+1)+1));
1531 }
1532 }
1533 }
1534 }
1535 }
1536
1537 TmpHNAp = (dcomplex******)malloc(sizeof(dcomplex*****)*(List_YOUSO[25]+1));
1538 for (i=0; i<(List_YOUSO[25]+1); i++){
1539 TmpHNAp[i] = (dcomplex*****)malloc(sizeof(dcomplex****)*List_YOUSO[24]);
1540 for (j=0; j<List_YOUSO[24]; j++){
1541 TmpHNAp[i][j] = (dcomplex****)malloc(sizeof(dcomplex***)*(2*(List_YOUSO[25]+1)+1));
1542 for (k=0; k<(2*(List_YOUSO[25]+1)+1); k++){
1543 TmpHNAp[i][j][k] = (dcomplex***)malloc(sizeof(dcomplex**)*(List_YOUSO[25]+1));
1544 for (l=0; l<(List_YOUSO[25]+1); l++){
1545 TmpHNAp[i][j][k][l] = (dcomplex**)malloc(sizeof(dcomplex*)*List_YOUSO[24]);
1546 for (m=0; m<List_YOUSO[24]; m++){
1547 TmpHNAp[i][j][k][l][m] = (dcomplex*)malloc(sizeof(dcomplex)*(2*(List_YOUSO[25]+1)+1));
1548 }
1549 }
1550 }
1551 }
1552 }
1553
1554 CmatS0 = (dcomplex**)malloc(sizeof(dcomplex*)*(2*(List_YOUSO[25]+1)+1));
1555 for (i=0; i<(2*(List_YOUSO[25]+1)+1); i++){
1556 CmatS0[i] = (dcomplex*)malloc(sizeof(dcomplex)*(2*(List_YOUSO[25]+1)+1));
1557 }
1558
1559 CmatSr = (dcomplex**)malloc(sizeof(dcomplex*)*(2*(List_YOUSO[25]+1)+1));
1560 for (i=0; i<(2*(List_YOUSO[25]+1)+1); i++){
1561 CmatSr[i] = (dcomplex*)malloc(sizeof(dcomplex)*(2*(List_YOUSO[25]+1)+1));
1562 }
1563
1564 CmatSt = (dcomplex**)malloc(sizeof(dcomplex*)*(2*(List_YOUSO[25]+1)+1));
1565 for (i=0; i<(2*(List_YOUSO[25]+1)+1); i++){
1566 CmatSt[i] = (dcomplex*)malloc(sizeof(dcomplex)*(2*(List_YOUSO[25]+1)+1));
1567 }
1568
1569 CmatSp = (dcomplex**)malloc(sizeof(dcomplex*)*(2*(List_YOUSO[25]+1)+1));
1570 for (i=0; i<(2*(List_YOUSO[25]+1)+1); i++){
1571 CmatSp[i] = (dcomplex*)malloc(sizeof(dcomplex)*(2*(List_YOUSO[25]+1)+1));
1572 }
1573
1574 /* get info. on OpenMP */
1575
1576 OMPID = omp_get_thread_num();
1577 Nthrds = omp_get_num_threads();
1578 Nprocs = omp_get_num_procs();
1579
1580 /* one-dimensionalized loop */
1581
1582 for (Nloop=OMPID*OneD_Nloop/Nthrds; Nloop<(OMPID+1)*OneD_Nloop/Nthrds; Nloop++){
1583
1584 dtime(&Stime_atom);
1585
1586 /* get Mc_AN and h_AN */
1587
1588 Mc_AN = OneD2Mc_AN[Nloop];
1589 h_AN = OneD2h_AN[Nloop];
1590
1591 /* set data on Mc_AN */
1592
1593 Gc_AN = M2G[Mc_AN];
1594 Cwan = WhatSpecies[Gc_AN];
1595
1596 /* set data on h_AN */
1597
1598 Gh_AN = natn[Gc_AN][h_AN];
1599 Rnh = ncn[Gc_AN][h_AN];
1600 Hwan = WhatSpecies[Gh_AN];
1601
1602 dx = Gxyz[Gh_AN][1] + atv[Rnh][1] - Gxyz[Gc_AN][1];
1603 dy = Gxyz[Gh_AN][2] + atv[Rnh][2] - Gxyz[Gc_AN][2];
1604 dz = Gxyz[Gh_AN][3] + atv[Rnh][3] - Gxyz[Gc_AN][3];
1605
1606 xyz2spherical(dx,dy,dz,0.0,0.0,0.0,S_coordinate);
1607 r = S_coordinate[0];
1608 theta = S_coordinate[1];
1609 phi = S_coordinate[2];
1610
1611 /* for empty atoms or finite elemens basis */
1612
1613 if (r<1.0e-10) r = 1.0e-10;
1614
1615 /* precalculation of sin and cos */
1616
1617 siT = sin(theta);
1618 coT = cos(theta);
1619 siP = sin(phi);
1620 coP = cos(phi);
1621
1622 /****************************************************
1623 evaluate ovelap integrals <Phi_{LM,L'M'}|VNA>
1624 between VNA and PAO.
1625 ****************************************************/
1626 /****************************************************
1627 \int VNA(k)*Spe_ProductRF_Bessel(k)*jl(k*R) k^2 dk^3
1628 ****************************************************/
1629
1630 kmin = Radial_kmin;
1631 kmax = PAO_Nkmax;
1632 Sk = kmax + kmin;
1633 Dk = kmax - kmin;
1634
1635 for (L0=0; L0<=Spe_MaxL_Basis[Cwan]; L0++){
1636 for (Mul0=0; Mul0<Spe_Num_Basis[Cwan][L0]; Mul0++){
1637 for (L1=0; L1<=Spe_MaxL_Basis[Cwan]; L1++){
1638 for (Mul1=0; Mul1<Spe_Num_Basis[Cwan][L1]; Mul1++){
1639 for (M0=-L0; M0<=L0; M0++){
1640 for (M1=-L1; M1<=L1; M1++){
1641 TmpHNA[L0][Mul0][L0+M0][L1][Mul1][L1+M1] = Complex(0.0,0.0);
1642 TmpHNAr[L0][Mul0][L0+M0][L1][Mul1][L1+M1] = Complex(0.0,0.0);
1643 TmpHNAt[L0][Mul0][L0+M0][L1][Mul1][L1+M1] = Complex(0.0,0.0);
1644 TmpHNAp[L0][Mul0][L0+M0][L1][Mul1][L1+M1] = Complex(0.0,0.0);
1645 }
1646 }
1647 }
1648 }
1649 }
1650 }
1651
1652 /* allocate SphB and SphBp */
1653
1654 Lmax_Four_Int = 2*Spe_MaxL_Basis[Cwan];
1655
1656 SphB = (double**)malloc(sizeof(double*)*(Lmax_Four_Int+3));
1657 for(LL=0; LL<(Lmax_Four_Int+3); LL++){
1658 SphB[LL] = (double*)malloc(sizeof(double)*GL_Mesh);
1659 }
1660
1661 SphBp = (double**)malloc(sizeof(double*)*(Lmax_Four_Int+3));
1662 for(LL=0; LL<(Lmax_Four_Int+3); LL++){
1663 SphBp[LL] = (double*)malloc(sizeof(double)*GL_Mesh);
1664 }
1665
1666 tmp_SphB = (double*)malloc(sizeof(double)*(Lmax_Four_Int+3));
1667 tmp_SphBp = (double*)malloc(sizeof(double)*(Lmax_Four_Int+3));
1668
1669 /* calculate SphB and SphBp */
1670 #ifdef kcomp
1671 #else
1672 #pragma forceinline recursive
1673 #endif
1674 for (i=0; i<GL_Mesh; i++){
1675 Normk = GL_NormK[i];
1676 Spherical_Bessel2(Normk*r,Lmax_Four_Int,tmp_SphB,tmp_SphBp);
1677 for(LL=0; LL<=Lmax_Four_Int; LL++){
1678 SphB[LL][i] = tmp_SphB[LL];
1679 SphBp[LL][i] = tmp_SphBp[LL];
1680 }
1681 }
1682
1683 free(tmp_SphB);
1684 free(tmp_SphBp);
1685
1686 /* loops for L0, Mul0, L1, and Mul1 */
1687
1688 for (L0=0; L0<=Spe_MaxL_Basis[Cwan]; L0++){
1689 for (Mul0=0; Mul0<Spe_Num_Basis[Cwan][L0]; Mul0++){
1690 for (L1=0; L1<=Spe_MaxL_Basis[Cwan]; L1++){
1691 for (Mul1=0; Mul1<Spe_Num_Basis[Cwan][L1]; Mul1++){
1692
1693 if (L0<=L1){
1694 Lmax_Four_Int = 2*L1;
1695
1696 /* sum over LL */
1697
1698 for(LL=0; LL<=Lmax_Four_Int; LL++){
1699
1700 if (abs(L1-LL)<=L0 && L0<=(L1+LL) ){
1701
1702 sum = 0.0;
1703 sumr = 0.0;
1704
1705 /* Gauss-Legendre quadrature */
1706
1707 for (i=0; i<GL_Mesh; i++){
1708
1709 Normk = GL_NormK[i];
1710
1711 sj = SphB[LL][i];
1712 sjp = SphBp[LL][i];
1713
1714 tmp0 = Spe_CrudeVNA_Bessel[Hwan][i];
1715 tmp1 = Spe_ProductRF_Bessel[Cwan][L0][Mul0][L1][Mul1][LL][i];
1716 tmp10 = 0.50*Dk*tmp0*tmp1*GL_Weight[i]*Normk*Normk;
1717
1718 sum += tmp10*sj;
1719 sumr += tmp10*sjp*Normk;
1720 }
1721
1722 /* sum over m */
1723
1724 for (M0=-L0; M0<=L0; M0++){
1725 for (M1=-L1; M1<=L1; M1++){
1726
1727 Csum = Complex(0.0,0.0);
1728 Csumr = Complex(0.0,0.0);
1729 Csumt = Complex(0.0,0.0);
1730 Csump = Complex(0.0,0.0);
1731
1732 for(m=-LL; m<=LL; m++){
1733
1734 if ( (M1-m)==M0){
1735
1736 ComplexSH(LL,m,theta,phi,SH,dSHt,dSHp);
1737 CY = Complex(SH[0],SH[1]);
1738 CYt = Complex(dSHt[0],dSHt[1]);
1739 CYp = Complex(dSHp[0],dSHp[1]);
1740
1741 gant = pow(-1.0,(double)abs(m))*Gaunt(L0,M0,L1,M1,LL,-m);
1742
1743 /* S */
1744
1745 Ctmp2 = CRmul(CY,gant);
1746 Csum = Cadd(Csum,Ctmp2);
1747
1748 /* dS/dt */
1749
1750 Ctmp2 = CRmul(CYt,gant);
1751 Csumt = Cadd(Csumt,Ctmp2);
1752
1753 /* dS/dp */
1754
1755 Ctmp2 = CRmul(CYp,gant);
1756 Csump = Cadd(Csump,Ctmp2);
1757 }
1758
1759 }
1760
1761 /* S */
1762 Ctmp1 = CRmul(Csum,sum);
1763 TmpHNA[L0][Mul0][L0+M0][L1][Mul1][L1+M1]
1764 = Cadd(TmpHNA[L0][Mul0][L0+M0][L1][Mul1][L1+M1],Ctmp1);
1765
1766 /* dS/dr */
1767 Ctmp1 = CRmul(Csum,sumr);
1768 TmpHNAr[L0][Mul0][L0+M0][L1][Mul1][L1+M1]
1769 = Cadd(TmpHNAr[L0][Mul0][L0+M0][L1][Mul1][L1+M1],Ctmp1);
1770
1771 /* dS/dt */
1772 Ctmp1 = CRmul(Csumt,sum);
1773 TmpHNAt[L0][Mul0][L0+M0][L1][Mul1][L1+M1]
1774 = Cadd(TmpHNAt[L0][Mul0][L0+M0][L1][Mul1][L1+M1],Ctmp1);
1775
1776 /* dS/dp */
1777 Ctmp1 = CRmul(Csump,sum);
1778 TmpHNAp[L0][Mul0][L0+M0][L1][Mul1][L1+M1]
1779 = Cadd(TmpHNAp[L0][Mul0][L0+M0][L1][Mul1][L1+M1],Ctmp1);
1780 }
1781 }
1782 }
1783 } /* LL */
1784 } /* if (L0<=L1) */
1785 }
1786 }
1787 }
1788 }
1789
1790 /* free SphB and SphBp */
1791
1792 for(LL=0; LL<(Lmax_Four_Int+3); LL++){
1793 free(SphB[LL]);
1794 }
1795 free(SphB);
1796
1797 for(LL=0; LL<(Lmax_Four_Int+3); LL++){
1798 free(SphBp[LL]);
1799 }
1800 free(SphBp);
1801
1802 /* copy the upper part to the lower part */
1803
1804 for (L0=0; L0<=Spe_MaxL_Basis[Cwan]; L0++){
1805 for (Mul0=0; Mul0<Spe_Num_Basis[Cwan][L0]; Mul0++){
1806 for (L1=0; L1<=Spe_MaxL_Basis[Cwan]; L1++){
1807 for (Mul1=0; Mul1<Spe_Num_Basis[Cwan][L1]; Mul1++){
1808 if (L0<=L1){
1809 for (M0=-L0; M0<=L0; M0++){
1810 for (M1=-L1; M1<=L1; M1++){
1811
1812 TmpHNA[L1][Mul1][L1+M1][L0][Mul0][L0+M0]
1813 = Conjg(TmpHNA[L0][Mul0][L0+M0][L1][Mul1][L1+M1]);
1814
1815 TmpHNAr[L1][Mul1][L1+M1][L0][Mul0][L0+M0]
1816 = Conjg(TmpHNAr[L0][Mul0][L0+M0][L1][Mul1][L1+M1]);
1817
1818 TmpHNAt[L1][Mul1][L1+M1][L0][Mul0][L0+M0]
1819 = Conjg(TmpHNAt[L0][Mul0][L0+M0][L1][Mul1][L1+M1]);
1820
1821 TmpHNAp[L1][Mul1][L1+M1][L0][Mul0][L0+M0]
1822 = Conjg(TmpHNAp[L0][Mul0][L0+M0][L1][Mul1][L1+M1]);
1823
1824 }
1825 }
1826 }
1827 }
1828 }
1829 }
1830 }
1831
1832 /****************************************************
1833 Complex to Real
1834 ****************************************************/
1835
1836 num0 = 0;
1837 for (L0=0; L0<=Spe_MaxL_Basis[Cwan]; L0++){
1838 for (Mul0=0; Mul0<Spe_Num_Basis[Cwan][L0]; Mul0++){
1839
1840 num1 = 0;
1841 for (L1=0; L1<=Spe_MaxL_Basis[Cwan]; L1++){
1842 for (Mul1=0; Mul1<Spe_Num_Basis[Cwan][L1]; Mul1++){
1843
1844 for (M0=-L0; M0<=L0; M0++){
1845 for (M1=-L1; M1<=L1; M1++){
1846
1847 CsumS0 = Complex(0.0,0.0);
1848 CsumSr = Complex(0.0,0.0);
1849 CsumSt = Complex(0.0,0.0);
1850 CsumSp = Complex(0.0,0.0);
1851
1852 for (k=-L0; k<=L0; k++){
1853
1854 Ctmp1 = Conjg(Comp2Real[L0][L0+M0][L0+k]);
1855
1856 /* S */
1857
1858 Ctmp0 = TmpHNA[L0][Mul0][L0+k][L1][Mul1][L1+M1];
1859 Ctmp2 = Cmul(Ctmp1,Ctmp0);
1860 CsumS0 = Cadd(CsumS0,Ctmp2);
1861
1862 /* dS/dr */
1863
1864 Ctmp0 = TmpHNAr[L0][Mul0][L0+k][L1][Mul1][L1+M1];
1865 Ctmp2 = Cmul(Ctmp1,Ctmp0);
1866 CsumSr = Cadd(CsumSr,Ctmp2);
1867
1868 /* dS/dt */
1869
1870 Ctmp0 = TmpHNAt[L0][Mul0][L0+k][L1][Mul1][L1+M1];
1871 Ctmp2 = Cmul(Ctmp1,Ctmp0);
1872 CsumSt = Cadd(CsumSt,Ctmp2);
1873
1874 /* dS/dp */
1875
1876 Ctmp0 = TmpHNAp[L0][Mul0][L0+k][L1][Mul1][L1+M1];
1877 Ctmp2 = Cmul(Ctmp1,Ctmp0);
1878 CsumSp = Cadd(CsumSp,Ctmp2);
1879
1880 }
1881
1882 CmatS0[L0+M0][L1+M1] = CsumS0;
1883 CmatSr[L0+M0][L1+M1] = CsumSr;
1884 CmatSt[L0+M0][L1+M1] = CsumSt;
1885 CmatSp[L0+M0][L1+M1] = CsumSp;
1886
1887 }
1888 }
1889
1890 for (M0=-L0; M0<=L0; M0++){
1891 for (M1=-L1; M1<=L1; M1++){
1892
1893 CsumS0 = Complex(0.0,0.0);
1894 CsumSr = Complex(0.0,0.0);
1895 CsumSt = Complex(0.0,0.0);
1896 CsumSp = Complex(0.0,0.0);
1897
1898 for (k=-L1; k<=L1; k++){
1899
1900 /* S */
1901
1902 Ctmp1 = Cmul(CmatS0[L0+M0][L1+k],Comp2Real[L1][L1+M1][L1+k]);
1903 CsumS0 = Cadd(CsumS0,Ctmp1);
1904
1905 /* dS/dr */
1906
1907 Ctmp1 = Cmul(CmatSr[L0+M0][L1+k],Comp2Real[L1][L1+M1][L1+k]);
1908 CsumSr = Cadd(CsumSr,Ctmp1);
1909
1910 /* dS/dt */
1911
1912 Ctmp1 = Cmul(CmatSt[L0+M0][L1+k],Comp2Real[L1][L1+M1][L1+k]);
1913 CsumSt = Cadd(CsumSt,Ctmp1);
1914
1915 /* dS/dp */
1916
1917 Ctmp1 = Cmul(CmatSp[L0+M0][L1+k],Comp2Real[L1][L1+M1][L1+k]);
1918 CsumSp = Cadd(CsumSp,Ctmp1);
1919
1920 }
1921
1922 HVNA2[0][Mc_AN][h_AN][num0+L0+M0][num1+L1+M1] = 8.0*CsumS0.r;
1923
1924 if (h_AN!=0){
1925
1926 if (fabs(siT)<10e-14){
1927
1928 HVNA2[1][Mc_AN][h_AN][num0+L0+M0][num1+L1+M1] =
1929 -8.0*(siT*coP*CsumSr.r + coT*coP/r*CsumSt.r);
1930
1931 HVNA2[2][Mc_AN][h_AN][num0+L0+M0][num1+L1+M1] =
1932 -8.0*(siT*siP*CsumSr.r + coT*siP/r*CsumSt.r);
1933
1934 HVNA2[3][Mc_AN][h_AN][num0+L0+M0][num1+L1+M1] =
1935 -8.0*(coT*CsumSr.r - siT/r*CsumSt.r);
1936 }
1937
1938 else{
1939
1940 HVNA2[1][Mc_AN][h_AN][num0+L0+M0][num1+L1+M1] =
1941 -8.0*(siT*coP*CsumSr.r + coT*coP/r*CsumSt.r
1942 - siP/siT/r*CsumSp.r);
1943
1944 HVNA2[2][Mc_AN][h_AN][num0+L0+M0][num1+L1+M1] =
1945 -8.0*(siT*siP*CsumSr.r + coT*siP/r*CsumSt.r
1946 + coP/siT/r*CsumSp.r);
1947
1948 HVNA2[3][Mc_AN][h_AN][num0+L0+M0][num1+L1+M1] =
1949 -8.0*(coT*CsumSr.r - siT/r*CsumSt.r);
1950 }
1951 }
1952 else{
1953 HVNA2[1][Mc_AN][h_AN][num0+L0+M0][num1+L1+M1] = 0.0;
1954 HVNA2[2][Mc_AN][h_AN][num0+L0+M0][num1+L1+M1] = 0.0;
1955 HVNA2[3][Mc_AN][h_AN][num0+L0+M0][num1+L1+M1] = 0.0;
1956 }
1957
1958 }
1959 }
1960
1961 num1 = num1 + 2*L1 + 1;
1962 }
1963 }
1964
1965 num0 = num0 + 2*L0 + 1;
1966 }
1967 }
1968
1969 dtime(&Etime_atom);
1970 time_per_atom[Gc_AN] += Etime_atom - Stime_atom;
1971
1972 } /* Mc_AN */
1973
1974 /* freeing of arrays */
1975
1976 for (i=0; i<(List_YOUSO[25]+1); i++){
1977 for (j=0; j<List_YOUSO[24]; j++){
1978 for (k=0; k<(2*(List_YOUSO[25]+1)+1); k++){
1979 for (l=0; l<(List_YOUSO[25]+1); l++){
1980 for (m=0; m<List_YOUSO[24]; m++){
1981 free(TmpHNA[i][j][k][l][m]);
1982 }
1983 free(TmpHNA[i][j][k][l]);
1984 }
1985 free(TmpHNA[i][j][k]);
1986 }
1987 free(TmpHNA[i][j]);
1988 }
1989 free(TmpHNA[i]);
1990 }
1991 free(TmpHNA);
1992
1993 for (i=0; i<(List_YOUSO[25]+1); i++){
1994 for (j=0; j<List_YOUSO[24]; j++){
1995 for (k=0; k<(2*(List_YOUSO[25]+1)+1); k++){
1996 for (l=0; l<(List_YOUSO[25]+1); l++){
1997 for (m=0; m<List_YOUSO[24]; m++){
1998 free(TmpHNAr[i][j][k][l][m]);
1999 }
2000 free(TmpHNAr[i][j][k][l]);
2001 }
2002 free(TmpHNAr[i][j][k]);
2003 }
2004 free(TmpHNAr[i][j]);
2005 }
2006 free(TmpHNAr[i]);
2007 }
2008 free(TmpHNAr);
2009
2010 for (i=0; i<(List_YOUSO[25]+1); i++){
2011 for (j=0; j<List_YOUSO[24]; j++){
2012 for (k=0; k<(2*(List_YOUSO[25]+1)+1); k++){
2013 for (l=0; l<(List_YOUSO[25]+1); l++){
2014 for (m=0; m<List_YOUSO[24]; m++){
2015 free(TmpHNAt[i][j][k][l][m]);
2016 }
2017 free(TmpHNAt[i][j][k][l]);
2018 }
2019 free(TmpHNAt[i][j][k]);
2020 }
2021 free(TmpHNAt[i][j]);
2022 }
2023 free(TmpHNAt[i]);
2024 }
2025 free(TmpHNAt);
2026
2027 for (i=0; i<(List_YOUSO[25]+1); i++){
2028 for (j=0; j<List_YOUSO[24]; j++){
2029 for (k=0; k<(2*(List_YOUSO[25]+1)+1); k++){
2030 for (l=0; l<(List_YOUSO[25]+1); l++){
2031 for (m=0; m<List_YOUSO[24]; m++){
2032 free(TmpHNAp[i][j][k][l][m]);
2033 }
2034 free(TmpHNAp[i][j][k][l]);
2035 }
2036 free(TmpHNAp[i][j][k]);
2037 }
2038 free(TmpHNAp[i][j]);
2039 }
2040 free(TmpHNAp[i]);
2041 }
2042 free(TmpHNAp);
2043
2044 for (i=0; i<(2*(List_YOUSO[25]+1)+1); i++){
2045 free(CmatS0[i]);
2046 }
2047 free(CmatS0);
2048
2049 for (i=0; i<(2*(List_YOUSO[25]+1)+1); i++){
2050 free(CmatSr[i]);
2051 }
2052 free(CmatSr);
2053
2054 for (i=0; i<(2*(List_YOUSO[25]+1)+1); i++){
2055 free(CmatSt[i]);
2056 }
2057 free(CmatSt);
2058
2059 for (i=0; i<(2*(List_YOUSO[25]+1)+1); i++){
2060 free(CmatSp[i]);
2061 }
2062 free(CmatSp);
2063
2064 #pragma omp flush(HVNA2)
2065
2066 } /* #pragma omp parallel */
2067
2068 /****************************************************
2069 HVNA[Mc_AN][0] = sum_{h_AN} HVNA2
2070 ****************************************************/
2071
2072 for (Mc_AN=1; Mc_AN<=Matomnum; Mc_AN++){
2073
2074 Gc_AN = M2G[Mc_AN];
2075 Cwan = WhatSpecies[Gc_AN];
2076 for (i=0; i<Spe_Total_NO[Cwan]; i++){
2077 for (j=0; j<Spe_Total_NO[Cwan]; j++){
2078 HVNA[Mc_AN][0][i][j] = 0.0;
2079 }
2080 }
2081
2082 for (h_AN=0; h_AN<=FNAN[Gc_AN]; h_AN++){
2083 for (i=0; i<Spe_Total_NO[Cwan]; i++){
2084 for (j=0; j<Spe_Total_NO[Cwan]; j++){
2085 HVNA[Mc_AN][0][i][j] += HVNA2[0][Mc_AN][h_AN][i][j];
2086 }
2087 }
2088 }
2089 }
2090
2091 /****************************************************
2092 freeing of arrays:
2093 ****************************************************/
2094
2095 free(OneD2Mc_AN);
2096 free(OneD2h_AN);
2097
2098 /* for time */
2099 dtime(&TEtime);
2100 time0 = TEtime - TStime;
2101 return time0;
2102 }
2103
2104
2105
Set_VNA3(double ***** HVNA3)2106 double Set_VNA3(double *****HVNA3)
2107 {
2108 /****************************************************
2109 Evaluate matrix elements of one-center (orbitals)
2110 but two-center integrals for neutral atom potentials
2111 in the momentum space.
2112
2113 <VNA|Phi_{LM,L'M'}>
2114 ****************************************************/
2115 static int firsttime=0; /* due to the same as in Set_VNA2 */
2116 int L2,L3,L,GL,i,j;
2117 int k,kl,h_AN,Gh_AN,Rnh,Ls,n;
2118 int tno0,tno1,tno2,i1,j1,p;
2119 int Cwan,Hwan,fan,jg,kg,wakg,Lmax;
2120 int size_TmpHNA;
2121 int Mc_AN,Gc_AN,Mj_AN,num,size1,size2;
2122 double time0;
2123 double tmp2,tmp3,tmp4,tmp5;
2124 double TStime,TEtime;
2125 int Num_RVNA;
2126
2127 /* for OpenMP */
2128 int OneD_Nloop,*OneD2Mc_AN,*OneD2h_AN;
2129
2130 dtime(&TStime);
2131
2132 /****************************************************
2133 allocation of arrays:
2134 ****************************************************/
2135
2136 size_TmpHNA = (List_YOUSO[25]+1)*List_YOUSO[24]*
2137 (2*(List_YOUSO[25]+1)+1)*
2138 (List_YOUSO[25]+1)*List_YOUSO[24]*
2139 (2*(List_YOUSO[25]+1)+1);
2140
2141 /* PrintMemory */
2142 if (firsttime) {
2143 PrintMemory("Set_ProExpn_VNA: TmpHNA",sizeof(double)*size_TmpHNA,NULL);
2144 PrintMemory("Set_ProExpn_VNA: TmpHNAr",sizeof(double)*size_TmpHNA,NULL);
2145 PrintMemory("Set_ProExpn_VNA: TmpHNAt",sizeof(double)*size_TmpHNA,NULL);
2146 PrintMemory("Set_ProExpn_VNA: TmpHNAp",sizeof(double)*size_TmpHNA,NULL);
2147
2148 firsttime=0;
2149 }
2150
2151 /* one-dimensionalize the Mc_AN and h_AN loops */
2152
2153 OneD_Nloop = 0;
2154 for (Mc_AN=1; Mc_AN<=Matomnum; Mc_AN++){
2155 Gc_AN = M2G[Mc_AN];
2156 for (h_AN=0; h_AN<=FNAN[Gc_AN]; h_AN++){
2157 OneD_Nloop++;
2158 }
2159 }
2160
2161 OneD2Mc_AN = (int*)malloc(sizeof(int)*(OneD_Nloop+1));
2162 OneD2h_AN = (int*)malloc(sizeof(int)*(OneD_Nloop+1));
2163
2164 OneD_Nloop = 0;
2165 for (Mc_AN=1; Mc_AN<=Matomnum; Mc_AN++){
2166 Gc_AN = M2G[Mc_AN];
2167 for (h_AN=0; h_AN<=FNAN[Gc_AN]; h_AN++){
2168 OneD2Mc_AN[OneD_Nloop] = Mc_AN;
2169 OneD2h_AN[OneD_Nloop] = h_AN;
2170 OneD_Nloop++;
2171 }
2172 }
2173
2174 #pragma omp parallel shared(List_YOUSO,time_per_atom,HVNA3,Comp2Real,GL_Weight,Spe_ProductRF_Bessel,Spe_CrudeVNA_Bessel,GL_NormK,Spe_Num_Basis,Spe_MaxL_Basis,PAO_Nkmax,atv,Gxyz,WhatSpecies,ncn,natn,M2G,OneD2h_AN,OneD2Mc_AN,OneD_Nloop)
2175 {
2176 int OMPID,Nthrds,Nprocs,Nloop;
2177 int Mc_AN,h_AN,Gc_AN,Cwan,Gh_AN;
2178 int Rnh,Hwan,L0,Mul0,L1,Mul1,M0,M1,LL;
2179 int Lmax_Four_Int,i,j,k,l,m,num0,num1;
2180 double dx,dy,dz;
2181 double siT,coT,siP,coP;
2182 double Normk,kmin,kmax,Sk,Dk;
2183 double gant,r,theta,phi;
2184 double SH[2],dSHt[2],dSHp[2];
2185 double S_coordinate[3];
2186 double Stime_atom,Etime_atom;
2187 double sum,sumr,sj,sjp,tmp0,tmp1,tmp10;
2188 double **SphB,**SphBp;
2189 double *tmp_SphB,*tmp_SphBp;
2190
2191 dcomplex Ctmp0,Ctmp1,Ctmp2,Cpow;
2192 dcomplex Csum,Csumr,Csumt,Csump;
2193 dcomplex CsumS0,CsumSr,CsumSt,CsumSp;
2194 dcomplex CY,CYt,CYp,CY1,CYt1,CYp1;
2195 dcomplex **CmatS0;
2196 dcomplex **CmatSr;
2197 dcomplex **CmatSt;
2198 dcomplex **CmatSp;
2199 dcomplex ******TmpHNA;
2200 dcomplex ******TmpHNAr;
2201 dcomplex ******TmpHNAt;
2202 dcomplex ******TmpHNAp;
2203
2204 /* allocation of arrays */
2205
2206 TmpHNA = (dcomplex******)malloc(sizeof(dcomplex*****)*(List_YOUSO[25]+1));
2207 for (i=0; i<(List_YOUSO[25]+1); i++){
2208 TmpHNA[i] = (dcomplex*****)malloc(sizeof(dcomplex****)*List_YOUSO[24]);
2209 for (j=0; j<List_YOUSO[24]; j++){
2210 TmpHNA[i][j] = (dcomplex****)malloc(sizeof(dcomplex***)*(2*(List_YOUSO[25]+1)+1));
2211 for (k=0; k<(2*(List_YOUSO[25]+1)+1); k++){
2212 TmpHNA[i][j][k] = (dcomplex***)malloc(sizeof(dcomplex**)*(List_YOUSO[25]+1));
2213 for (l=0; l<(List_YOUSO[25]+1); l++){
2214 TmpHNA[i][j][k][l] = (dcomplex**)malloc(sizeof(dcomplex*)*List_YOUSO[24]);
2215 for (m=0; m<List_YOUSO[24]; m++){
2216 TmpHNA[i][j][k][l][m] = (dcomplex*)malloc(sizeof(dcomplex)*(2*(List_YOUSO[25]+1)+1));
2217 }
2218 }
2219 }
2220 }
2221 }
2222
2223 TmpHNAr = (dcomplex******)malloc(sizeof(dcomplex*****)*(List_YOUSO[25]+1));
2224 for (i=0; i<(List_YOUSO[25]+1); i++){
2225 TmpHNAr[i] = (dcomplex*****)malloc(sizeof(dcomplex****)*List_YOUSO[24]);
2226 for (j=0; j<List_YOUSO[24]; j++){
2227 TmpHNAr[i][j] = (dcomplex****)malloc(sizeof(dcomplex***)*(2*(List_YOUSO[25]+1)+1));
2228 for (k=0; k<(2*(List_YOUSO[25]+1)+1); k++){
2229 TmpHNAr[i][j][k] = (dcomplex***)malloc(sizeof(dcomplex**)*(List_YOUSO[25]+1));
2230 for (l=0; l<(List_YOUSO[25]+1); l++){
2231 TmpHNAr[i][j][k][l] = (dcomplex**)malloc(sizeof(dcomplex*)*List_YOUSO[24]);
2232 for (m=0; m<List_YOUSO[24]; m++){
2233 TmpHNAr[i][j][k][l][m] = (dcomplex*)malloc(sizeof(dcomplex)*(2*(List_YOUSO[25]+1)+1));
2234 }
2235 }
2236 }
2237 }
2238 }
2239
2240 TmpHNAt = (dcomplex******)malloc(sizeof(dcomplex*****)*(List_YOUSO[25]+1));
2241 for (i=0; i<(List_YOUSO[25]+1); i++){
2242 TmpHNAt[i] = (dcomplex*****)malloc(sizeof(dcomplex****)*List_YOUSO[24]);
2243 for (j=0; j<List_YOUSO[24]; j++){
2244 TmpHNAt[i][j] = (dcomplex****)malloc(sizeof(dcomplex***)*(2*(List_YOUSO[25]+1)+1));
2245 for (k=0; k<(2*(List_YOUSO[25]+1)+1); k++){
2246 TmpHNAt[i][j][k] = (dcomplex***)malloc(sizeof(dcomplex**)*(List_YOUSO[25]+1));
2247 for (l=0; l<(List_YOUSO[25]+1); l++){
2248 TmpHNAt[i][j][k][l] = (dcomplex**)malloc(sizeof(dcomplex*)*List_YOUSO[24]);
2249 for (m=0; m<List_YOUSO[24]; m++){
2250 TmpHNAt[i][j][k][l][m] = (dcomplex*)malloc(sizeof(dcomplex)*(2*(List_YOUSO[25]+1)+1));
2251 }
2252 }
2253 }
2254 }
2255 }
2256
2257 TmpHNAp = (dcomplex******)malloc(sizeof(dcomplex*****)*(List_YOUSO[25]+1));
2258 for (i=0; i<(List_YOUSO[25]+1); i++){
2259 TmpHNAp[i] = (dcomplex*****)malloc(sizeof(dcomplex****)*List_YOUSO[24]);
2260 for (j=0; j<List_YOUSO[24]; j++){
2261 TmpHNAp[i][j] = (dcomplex****)malloc(sizeof(dcomplex***)*(2*(List_YOUSO[25]+1)+1));
2262 for (k=0; k<(2*(List_YOUSO[25]+1)+1); k++){
2263 TmpHNAp[i][j][k] = (dcomplex***)malloc(sizeof(dcomplex**)*(List_YOUSO[25]+1));
2264 for (l=0; l<(List_YOUSO[25]+1); l++){
2265 TmpHNAp[i][j][k][l] = (dcomplex**)malloc(sizeof(dcomplex*)*List_YOUSO[24]);
2266 for (m=0; m<List_YOUSO[24]; m++){
2267 TmpHNAp[i][j][k][l][m] = (dcomplex*)malloc(sizeof(dcomplex)*(2*(List_YOUSO[25]+1)+1));
2268 }
2269 }
2270 }
2271 }
2272 }
2273
2274 CmatS0 = (dcomplex**)malloc(sizeof(dcomplex*)*(2*(List_YOUSO[25]+1)+1));
2275 for (i=0; i<(2*(List_YOUSO[25]+1)+1); i++){
2276 CmatS0[i] = (dcomplex*)malloc(sizeof(dcomplex)*(2*(List_YOUSO[25]+1)+1));
2277 }
2278
2279 CmatSr = (dcomplex**)malloc(sizeof(dcomplex*)*(2*(List_YOUSO[25]+1)+1));
2280 for (i=0; i<(2*(List_YOUSO[25]+1)+1); i++){
2281 CmatSr[i] = (dcomplex*)malloc(sizeof(dcomplex)*(2*(List_YOUSO[25]+1)+1));
2282 }
2283
2284 CmatSt = (dcomplex**)malloc(sizeof(dcomplex*)*(2*(List_YOUSO[25]+1)+1));
2285 for (i=0; i<(2*(List_YOUSO[25]+1)+1); i++){
2286 CmatSt[i] = (dcomplex*)malloc(sizeof(dcomplex)*(2*(List_YOUSO[25]+1)+1));
2287 }
2288
2289 CmatSp = (dcomplex**)malloc(sizeof(dcomplex*)*(2*(List_YOUSO[25]+1)+1));
2290 for (i=0; i<(2*(List_YOUSO[25]+1)+1); i++){
2291 CmatSp[i] = (dcomplex*)malloc(sizeof(dcomplex)*(2*(List_YOUSO[25]+1)+1));
2292 }
2293
2294 /* get info. on OpenMP */
2295
2296 OMPID = omp_get_thread_num();
2297 Nthrds = omp_get_num_threads();
2298 Nprocs = omp_get_num_procs();
2299
2300 /* one-dimensionalized loop */
2301
2302 for (Nloop=OMPID*OneD_Nloop/Nthrds; Nloop<(OMPID+1)*OneD_Nloop/Nthrds; Nloop++){
2303
2304 dtime(&Stime_atom);
2305
2306 /* get Mc_AN and h_AN */
2307
2308 Mc_AN = OneD2Mc_AN[Nloop];
2309 h_AN = OneD2h_AN[Nloop];
2310
2311 /* set data on Mc_AN */
2312
2313 Gc_AN = M2G[Mc_AN];
2314 Cwan = WhatSpecies[Gc_AN];
2315
2316 /* set data on h_AN */
2317
2318 Gh_AN = natn[Gc_AN][h_AN];
2319 Rnh = ncn[Gc_AN][h_AN];
2320 Hwan = WhatSpecies[Gh_AN];
2321
2322 dx = Gxyz[Gc_AN][1] - Gxyz[Gh_AN][1] - atv[Rnh][1];
2323 dy = Gxyz[Gc_AN][2] - Gxyz[Gh_AN][2] - atv[Rnh][2];
2324 dz = Gxyz[Gc_AN][3] - Gxyz[Gh_AN][3] - atv[Rnh][3];
2325
2326 xyz2spherical(dx,dy,dz,0.0,0.0,0.0,S_coordinate);
2327 r = S_coordinate[0];
2328 theta = S_coordinate[1];
2329 phi = S_coordinate[2];
2330
2331 /* for empty atoms or finite elemens basis */
2332
2333 if (r<1.0e-10) r = 1.0e-10;
2334
2335 /* precalculation of sin and cos */
2336
2337 siT = sin(theta);
2338 coT = cos(theta);
2339 siP = sin(phi);
2340 coP = cos(phi);
2341
2342 /****************************************************
2343 evaluate ovelap integrals <Phi_{LM,L'M'}|VNA>
2344 between VNA and PAO.
2345 ****************************************************/
2346 /****************************************************
2347 \int VNA(k)*Spe_ProductRF_Bessel(k)*jl(k*R) k^2 dk^3
2348 ****************************************************/
2349
2350 kmin = Radial_kmin;
2351 kmax = PAO_Nkmax;
2352 Sk = kmax + kmin;
2353 Dk = kmax - kmin;
2354
2355 for (L0=0; L0<=Spe_MaxL_Basis[Hwan]; L0++){
2356 for (Mul0=0; Mul0<Spe_Num_Basis[Hwan][L0]; Mul0++){
2357 for (L1=0; L1<=Spe_MaxL_Basis[Hwan]; L1++){
2358 for (Mul1=0; Mul1<Spe_Num_Basis[Hwan][L1]; Mul1++){
2359 for (M0=-L0; M0<=L0; M0++){
2360 for (M1=-L1; M1<=L1; M1++){
2361 TmpHNA[L0][Mul0][L0+M0][L1][Mul1][L1+M1] = Complex(0.0,0.0);
2362 TmpHNAr[L0][Mul0][L0+M0][L1][Mul1][L1+M1] = Complex(0.0,0.0);
2363 TmpHNAt[L0][Mul0][L0+M0][L1][Mul1][L1+M1] = Complex(0.0,0.0);
2364 TmpHNAp[L0][Mul0][L0+M0][L1][Mul1][L1+M1] = Complex(0.0,0.0);
2365 }
2366 }
2367 }
2368 }
2369 }
2370 }
2371
2372 /* allocate SphB and SphBp */
2373
2374 Lmax_Four_Int = 2*Spe_MaxL_Basis[Hwan];
2375
2376 SphB = (double**)malloc(sizeof(double*)*(Lmax_Four_Int+3));
2377 for(LL=0; LL<(Lmax_Four_Int+3); LL++){
2378 SphB[LL] = (double*)malloc(sizeof(double)*GL_Mesh);
2379 }
2380
2381 SphBp = (double**)malloc(sizeof(double*)*(Lmax_Four_Int+3));
2382 for(LL=0; LL<(Lmax_Four_Int+3); LL++){
2383 SphBp[LL] = (double*)malloc(sizeof(double)*GL_Mesh);
2384 }
2385
2386 tmp_SphB = (double*)malloc(sizeof(double)*(Lmax_Four_Int+3));
2387 tmp_SphBp = (double*)malloc(sizeof(double)*(Lmax_Four_Int+3));
2388
2389 /* calculate SphB and SphBp */
2390 #ifdef kcomp
2391 #else
2392 #pragma forceinline recursive
2393 #endif
2394 for (i=0; i<GL_Mesh; i++){
2395 Normk = GL_NormK[i];
2396 Spherical_Bessel2(Normk*r,Lmax_Four_Int,tmp_SphB,tmp_SphBp);
2397 for(LL=0; LL<=Lmax_Four_Int; LL++){
2398 SphB[LL][i] = tmp_SphB[LL];
2399 SphBp[LL][i] = tmp_SphBp[LL];
2400 }
2401 }
2402
2403 free(tmp_SphB);
2404 free(tmp_SphBp);
2405
2406 /* loops for L0, Mul0, L1, and Mul1 */
2407
2408 for (L0=0; L0<=Spe_MaxL_Basis[Hwan]; L0++){
2409 for (Mul0=0; Mul0<Spe_Num_Basis[Hwan][L0]; Mul0++){
2410 for (L1=0; L1<=Spe_MaxL_Basis[Hwan]; L1++){
2411 for (Mul1=0; Mul1<Spe_Num_Basis[Hwan][L1]; Mul1++){
2412
2413 if (L0<=L1){
2414
2415 Lmax_Four_Int = 2*L1;
2416
2417 /* sum over LL */
2418
2419 for(LL=0; LL<=Lmax_Four_Int; LL++){
2420
2421 if (abs(L1-LL)<=L0 && L0<=(L1+LL) ){
2422
2423 sum = 0.0;
2424 sumr = 0.0;
2425
2426 /* Gauss-Legendre quadrature */
2427
2428 for (i=0; i<GL_Mesh; i++){
2429
2430 Normk = GL_NormK[i];
2431
2432 sj = SphB[LL][i];
2433 sjp = SphBp[LL][i];
2434
2435 tmp0 = Spe_CrudeVNA_Bessel[Cwan][i];
2436 tmp1 = Spe_ProductRF_Bessel[Hwan][L0][Mul0][L1][Mul1][LL][i];
2437 tmp10 = 0.50*Dk*tmp0*tmp1*GL_Weight[i]*Normk*Normk;
2438
2439 sum += tmp10*sj;
2440 sumr += tmp10*sjp*Normk;
2441 }
2442
2443 /* sum over m */
2444
2445 for (M0=-L0; M0<=L0; M0++){
2446 for (M1=-L1; M1<=L1; M1++){
2447
2448 Csum = Complex(0.0,0.0);
2449 Csumr = Complex(0.0,0.0);
2450 Csumt = Complex(0.0,0.0);
2451 Csump = Complex(0.0,0.0);
2452
2453 for(m=-LL; m<=LL; m++){
2454
2455 if ( (M1-m)==M0){
2456
2457 ComplexSH(LL,m,theta,phi,SH,dSHt,dSHp);
2458 CY = Complex(SH[0],SH[1]);
2459 CYt = Complex(dSHt[0],dSHt[1]);
2460 CYp = Complex(dSHp[0],dSHp[1]);
2461
2462 gant = pow(-1.0,(double)abs(m))*Gaunt(L0,M0,L1,M1,LL,-m);
2463
2464 /* S */
2465
2466 Ctmp2 = CRmul(CY,gant);
2467 Csum = Cadd(Csum,Ctmp2);
2468
2469 /* dS/dt */
2470
2471 Ctmp2 = CRmul(CYt,gant);
2472 Csumt = Cadd(Csumt,Ctmp2);
2473
2474 /* dS/dp */
2475
2476 Ctmp2 = CRmul(CYp,gant);
2477 Csump = Cadd(Csump,Ctmp2);
2478 }
2479
2480 }
2481
2482 /* S */
2483 Ctmp1 = CRmul(Csum,sum);
2484 TmpHNA[L0][Mul0][L0+M0][L1][Mul1][L1+M1]
2485 = Cadd(TmpHNA[L0][Mul0][L0+M0][L1][Mul1][L1+M1],Ctmp1);
2486
2487 /* dS/dr */
2488 Ctmp1 = CRmul(Csum,sumr);
2489 TmpHNAr[L0][Mul0][L0+M0][L1][Mul1][L1+M1]
2490 = Cadd(TmpHNAr[L0][Mul0][L0+M0][L1][Mul1][L1+M1],Ctmp1);
2491
2492 /* dS/dt */
2493 Ctmp1 = CRmul(Csumt,sum);
2494 TmpHNAt[L0][Mul0][L0+M0][L1][Mul1][L1+M1]
2495 = Cadd(TmpHNAt[L0][Mul0][L0+M0][L1][Mul1][L1+M1],Ctmp1);
2496
2497 /* dS/dp */
2498 Ctmp1 = CRmul(Csump,sum);
2499 TmpHNAp[L0][Mul0][L0+M0][L1][Mul1][L1+M1]
2500 = Cadd(TmpHNAp[L0][Mul0][L0+M0][L1][Mul1][L1+M1],Ctmp1);
2501 }
2502 }
2503 }
2504 } /* LL */
2505 } /* if (L0<=L1) */
2506 }
2507 }
2508 }
2509 }
2510
2511 /* free SphB and SphBp */
2512
2513 for(LL=0; LL<(Lmax_Four_Int+3); LL++){
2514 free(SphB[LL]);
2515 }
2516 free(SphB);
2517
2518 for(LL=0; LL<(Lmax_Four_Int+3); LL++){
2519 free(SphBp[LL]);
2520 }
2521 free(SphBp);
2522
2523 /* copy the upper part to the lower part */
2524
2525 for (L0=0; L0<=Spe_MaxL_Basis[Hwan]; L0++){
2526 for (Mul0=0; Mul0<Spe_Num_Basis[Hwan][L0]; Mul0++){
2527 for (L1=0; L1<=Spe_MaxL_Basis[Hwan]; L1++){
2528 for (Mul1=0; Mul1<Spe_Num_Basis[Hwan][L1]; Mul1++){
2529 if (L0<=L1){
2530 for (M0=-L0; M0<=L0; M0++){
2531 for (M1=-L1; M1<=L1; M1++){
2532
2533 TmpHNA[L1][Mul1][L1+M1][L0][Mul0][L0+M0]
2534 = Conjg(TmpHNA[L0][Mul0][L0+M0][L1][Mul1][L1+M1]);
2535
2536 TmpHNAr[L1][Mul1][L1+M1][L0][Mul0][L0+M0]
2537 = Conjg(TmpHNAr[L0][Mul0][L0+M0][L1][Mul1][L1+M1]);
2538
2539 TmpHNAt[L1][Mul1][L1+M1][L0][Mul0][L0+M0]
2540 = Conjg(TmpHNAt[L0][Mul0][L0+M0][L1][Mul1][L1+M1]);
2541
2542 TmpHNAp[L1][Mul1][L1+M1][L0][Mul0][L0+M0]
2543 = Conjg(TmpHNAp[L0][Mul0][L0+M0][L1][Mul1][L1+M1]);
2544
2545 }
2546 }
2547 }
2548 }
2549 }
2550 }
2551 }
2552
2553 /****************************************************
2554 Complex to Real
2555 ****************************************************/
2556
2557 num0 = 0;
2558 for (L0=0; L0<=Spe_MaxL_Basis[Hwan]; L0++){
2559 for (Mul0=0; Mul0<Spe_Num_Basis[Hwan][L0]; Mul0++){
2560
2561 num1 = 0;
2562 for (L1=0; L1<=Spe_MaxL_Basis[Hwan]; L1++){
2563 for (Mul1=0; Mul1<Spe_Num_Basis[Hwan][L1]; Mul1++){
2564
2565 for (M0=-L0; M0<=L0; M0++){
2566 for (M1=-L1; M1<=L1; M1++){
2567
2568 CsumS0 = Complex(0.0,0.0);
2569 CsumSr = Complex(0.0,0.0);
2570 CsumSt = Complex(0.0,0.0);
2571 CsumSp = Complex(0.0,0.0);
2572
2573 for (k=-L0; k<=L0; k++){
2574
2575 Ctmp1 = Conjg(Comp2Real[L0][L0+M0][L0+k]);
2576
2577 /* S */
2578
2579 Ctmp0 = TmpHNA[L0][Mul0][L0+k][L1][Mul1][L1+M1];
2580 Ctmp2 = Cmul(Ctmp1,Ctmp0);
2581 CsumS0 = Cadd(CsumS0,Ctmp2);
2582
2583 /* dS/dr */
2584
2585 Ctmp0 = TmpHNAr[L0][Mul0][L0+k][L1][Mul1][L1+M1];
2586 Ctmp2 = Cmul(Ctmp1,Ctmp0);
2587 CsumSr = Cadd(CsumSr,Ctmp2);
2588
2589 /* dS/dt */
2590
2591 Ctmp0 = TmpHNAt[L0][Mul0][L0+k][L1][Mul1][L1+M1];
2592 Ctmp2 = Cmul(Ctmp1,Ctmp0);
2593 CsumSt = Cadd(CsumSt,Ctmp2);
2594
2595 /* dS/dp */
2596
2597 Ctmp0 = TmpHNAp[L0][Mul0][L0+k][L1][Mul1][L1+M1];
2598 Ctmp2 = Cmul(Ctmp1,Ctmp0);
2599 CsumSp = Cadd(CsumSp,Ctmp2);
2600
2601 }
2602
2603 CmatS0[L0+M0][L1+M1] = CsumS0;
2604 CmatSr[L0+M0][L1+M1] = CsumSr;
2605 CmatSt[L0+M0][L1+M1] = CsumSt;
2606 CmatSp[L0+M0][L1+M1] = CsumSp;
2607
2608 }
2609 }
2610
2611 for (M0=-L0; M0<=L0; M0++){
2612 for (M1=-L1; M1<=L1; M1++){
2613
2614 CsumS0 = Complex(0.0,0.0);
2615 CsumSr = Complex(0.0,0.0);
2616 CsumSt = Complex(0.0,0.0);
2617 CsumSp = Complex(0.0,0.0);
2618
2619 for (k=-L1; k<=L1; k++){
2620
2621 /* S */
2622
2623
2624 Ctmp1 = Cmul(CmatS0[L0+M0][L1+k],Comp2Real[L1][L1+M1][L1+k]);
2625 CsumS0 = Cadd(CsumS0,Ctmp1);
2626
2627 /* dS/dr */
2628
2629 Ctmp1 = Cmul(CmatSr[L0+M0][L1+k],Comp2Real[L1][L1+M1][L1+k]);
2630 CsumSr = Cadd(CsumSr,Ctmp1);
2631
2632 /* dS/dt */
2633
2634 Ctmp1 = Cmul(CmatSt[L0+M0][L1+k],Comp2Real[L1][L1+M1][L1+k]);
2635 CsumSt = Cadd(CsumSt,Ctmp1);
2636
2637 /* dS/dp */
2638
2639 Ctmp1 = Cmul(CmatSp[L0+M0][L1+k],Comp2Real[L1][L1+M1][L1+k]);
2640 CsumSp = Cadd(CsumSp,Ctmp1);
2641
2642 }
2643
2644 HVNA3[0][Mc_AN][h_AN][num0+L0+M0][num1+L1+M1] = 8.0*CsumS0.r;
2645
2646 if (h_AN!=0){
2647
2648 if (fabs(siT)<10e-14){
2649
2650 HVNA3[1][Mc_AN][h_AN][num0+L0+M0][num1+L1+M1] =
2651 -8.0*(siT*coP*CsumSr.r + coT*coP/r*CsumSt.r);
2652
2653 HVNA3[2][Mc_AN][h_AN][num0+L0+M0][num1+L1+M1] =
2654 -8.0*(siT*siP*CsumSr.r + coT*siP/r*CsumSt.r);
2655
2656 HVNA3[3][Mc_AN][h_AN][num0+L0+M0][num1+L1+M1] =
2657 -8.0*(coT*CsumSr.r - siT/r*CsumSt.r);
2658 }
2659
2660 else{
2661
2662 HVNA3[1][Mc_AN][h_AN][num0+L0+M0][num1+L1+M1] =
2663 -8.0*(siT*coP*CsumSr.r + coT*coP/r*CsumSt.r
2664 - siP/siT/r*CsumSp.r);
2665
2666 HVNA3[2][Mc_AN][h_AN][num0+L0+M0][num1+L1+M1] =
2667 -8.0*(siT*siP*CsumSr.r + coT*siP/r*CsumSt.r
2668 + coP/siT/r*CsumSp.r);
2669
2670 HVNA3[3][Mc_AN][h_AN][num0+L0+M0][num1+L1+M1] =
2671 -8.0*(coT*CsumSr.r - siT/r*CsumSt.r);
2672 }
2673 }
2674 else{
2675 HVNA3[1][Mc_AN][h_AN][num0+L0+M0][num1+L1+M1] = 0.0;
2676 HVNA3[2][Mc_AN][h_AN][num0+L0+M0][num1+L1+M1] = 0.0;
2677 HVNA3[3][Mc_AN][h_AN][num0+L0+M0][num1+L1+M1] = 0.0;
2678 }
2679
2680 }
2681 }
2682
2683 num1 = num1 + 2*L1 + 1;
2684 }
2685 }
2686
2687 num0 = num0 + 2*L0 + 1;
2688 }
2689 }
2690
2691 dtime(&Etime_atom);
2692 time_per_atom[Gc_AN] += Etime_atom - Stime_atom;
2693
2694 } /* Mc_AN */
2695
2696 /* freeing of arrays */
2697
2698 for (i=0; i<(List_YOUSO[25]+1); i++){
2699 for (j=0; j<List_YOUSO[24]; j++){
2700 for (k=0; k<(2*(List_YOUSO[25]+1)+1); k++){
2701 for (l=0; l<(List_YOUSO[25]+1); l++){
2702 for (m=0; m<List_YOUSO[24]; m++){
2703 free(TmpHNA[i][j][k][l][m]);
2704 }
2705 free(TmpHNA[i][j][k][l]);
2706 }
2707 free(TmpHNA[i][j][k]);
2708 }
2709 free(TmpHNA[i][j]);
2710 }
2711 free(TmpHNA[i]);
2712 }
2713 free(TmpHNA);
2714
2715 for (i=0; i<(List_YOUSO[25]+1); i++){
2716 for (j=0; j<List_YOUSO[24]; j++){
2717 for (k=0; k<(2*(List_YOUSO[25]+1)+1); k++){
2718 for (l=0; l<(List_YOUSO[25]+1); l++){
2719 for (m=0; m<List_YOUSO[24]; m++){
2720 free(TmpHNAr[i][j][k][l][m]);
2721 }
2722 free(TmpHNAr[i][j][k][l]);
2723 }
2724 free(TmpHNAr[i][j][k]);
2725 }
2726 free(TmpHNAr[i][j]);
2727 }
2728 free(TmpHNAr[i]);
2729 }
2730 free(TmpHNAr);
2731
2732 for (i=0; i<(List_YOUSO[25]+1); i++){
2733 for (j=0; j<List_YOUSO[24]; j++){
2734 for (k=0; k<(2*(List_YOUSO[25]+1)+1); k++){
2735 for (l=0; l<(List_YOUSO[25]+1); l++){
2736 for (m=0; m<List_YOUSO[24]; m++){
2737 free(TmpHNAt[i][j][k][l][m]);
2738 }
2739 free(TmpHNAt[i][j][k][l]);
2740 }
2741 free(TmpHNAt[i][j][k]);
2742 }
2743 free(TmpHNAt[i][j]);
2744 }
2745 free(TmpHNAt[i]);
2746 }
2747 free(TmpHNAt);
2748
2749 for (i=0; i<(List_YOUSO[25]+1); i++){
2750 for (j=0; j<List_YOUSO[24]; j++){
2751 for (k=0; k<(2*(List_YOUSO[25]+1)+1); k++){
2752 for (l=0; l<(List_YOUSO[25]+1); l++){
2753 for (m=0; m<List_YOUSO[24]; m++){
2754 free(TmpHNAp[i][j][k][l][m]);
2755 }
2756 free(TmpHNAp[i][j][k][l]);
2757 }
2758 free(TmpHNAp[i][j][k]);
2759 }
2760 free(TmpHNAp[i][j]);
2761 }
2762 free(TmpHNAp[i]);
2763 }
2764 free(TmpHNAp);
2765
2766 for (i=0; i<(2*(List_YOUSO[25]+1)+1); i++){
2767 free(CmatS0[i]);
2768 }
2769 free(CmatS0);
2770
2771 for (i=0; i<(2*(List_YOUSO[25]+1)+1); i++){
2772 free(CmatSr[i]);
2773 }
2774 free(CmatSr);
2775
2776 for (i=0; i<(2*(List_YOUSO[25]+1)+1); i++){
2777 free(CmatSt[i]);
2778 }
2779 free(CmatSt);
2780
2781 for (i=0; i<(2*(List_YOUSO[25]+1)+1); i++){
2782 free(CmatSp[i]);
2783 }
2784 free(CmatSp);
2785
2786 #pragma omp flush(HVNA3)
2787
2788 } /* #pragma omp parallel */
2789
2790 /****************************************************
2791 freeing of arrays:
2792 ****************************************************/
2793
2794 free(OneD2Mc_AN);
2795 free(OneD2h_AN);
2796
2797 /* for time */
2798 dtime(&TEtime);
2799 time0 = TEtime - TStime;
2800 return time0;
2801 }
2802
2803
2804 #define xmin 0.0
2805 #define asize_lmax 20
2806
2807 #ifdef kcomp
Spherical_Bessel2(double x,int lmax,double * sb,double * dsb)2808 static void Spherical_Bessel2( double x, int lmax, double *sb, double *dsb )
2809 #else
2810 inline void Spherical_Bessel2( double x, int lmax, double *sb, double *dsb )
2811 #endif
2812 {
2813 int m,n,nmax;
2814 double tsb[asize_lmax+10];
2815 double invx,vsb0,vsb1,vsb2,vsbi;
2816 double j0,j1,j0p,j1p,sf,tmp,si,co,ix,ix2;
2817
2818 if (x<0.0){
2819 printf("minus x is invalid for Spherical_Bessel\n");
2820 exit(0);
2821 }
2822
2823 /* find an appropriate nmax */
2824
2825 nmax = lmax + 3*x + 20;
2826 if (nmax<100) nmax = 100;
2827
2828 if (asize_lmax<(lmax+1)){
2829 printf("asize_lmax should be larger than %d in Spherical_Bessel.c\n",lmax+1);
2830 exit(0);
2831 }
2832
2833 /* if x is larger than xmin */
2834
2835 if ( xmin < x ){
2836
2837 invx = 1.0/x;
2838
2839 /* initial values */
2840
2841 vsb0 = 0.0;
2842 vsb1 = 1.0e-14;
2843
2844 /* downward recurrence from nmax-2 to lmax+2 */
2845
2846 for ( n=nmax-1; (lmax+2)<n; n-- ){
2847
2848 vsb2 = (2.0*n + 1.0)*invx*vsb1 - vsb0;
2849
2850 if (1.0e+250<vsb2){
2851 tmp = 1.0/vsb2;
2852 vsb2 *= tmp;
2853 vsb1 *= tmp;
2854 }
2855
2856 vsbi = vsb0;
2857 vsb0 = vsb1;
2858 vsb1 = vsb2;
2859 }
2860
2861 /* downward recurrence from lmax+1 to 0 */
2862
2863 n = lmax + 3;
2864 tsb[n-1] = vsb1;
2865 tsb[n ] = vsb0;
2866 tsb[n+1] = vsbi;
2867
2868 tmp = tsb[n-1];
2869 tsb[n-1] /= tmp;
2870 tsb[n ] /= tmp;
2871
2872 for ( n=lmax+2; 0<n; n-- ){
2873
2874 tsb[n-1] = (2.0*n + 1.0)*invx*tsb[n] - tsb[n+1];
2875
2876 if (1.0e+250<tsb[n-1]){
2877 tmp = tsb[n-1];
2878 for (m=n-1; m<=lmax+1; m++){
2879 tsb[m] /= tmp;
2880 }
2881 }
2882 }
2883
2884 /* normalization */
2885
2886 si = sin(x);
2887 co = cos(x);
2888 ix = 1.0/x;
2889 ix2 = ix*ix;
2890 j0 = si*ix;
2891 j1 = si*ix*ix - co*ix;
2892
2893 if (fabs(tsb[1])<fabs(tsb[0])) sf = j0/tsb[0];
2894 else sf = j1/tsb[1];
2895
2896 /* tsb to sb */
2897
2898 for ( n=0; n<=lmax+1; n++ ){
2899 sb[n] = tsb[n]*sf;
2900 }
2901
2902 /* derivative of sb */
2903
2904 dsb[0] = co*ix - si*ix*ix;
2905 for ( n=1; n<=lmax; n++ ){
2906 dsb[n] = ( (double)n*sb[n-1] - (double)(n+1.0)*sb[n+1] )/(2.0*(double)n + 1.0);
2907 }
2908
2909 }
2910
2911 /* if x is smaller than xmin */
2912
2913 else {
2914
2915 /* sb */
2916
2917 for ( n=0; n<=lmax; n++ ){
2918 sb[n] = 0.0;
2919 }
2920 sb[0] = 1.0;
2921
2922 /* derivative of sb */
2923
2924 dsb[0] = 0.0;
2925 for ( n=1; n<=lmax; n++ ){
2926 dsb[n] = ( (double)n*sb[n-1] - (double)(n+1.0)*sb[n+1] )/(2.0*(double)n + 1.0);
2927 }
2928 }
2929 }
2930