1 /**********************************************************************
2   Set_ProExpn_VNA.c:
3 
4      Set_ProExpn_VNA.c is a subroutine to calculate matrix elements and
5      the derivatives of VNA projector expansion in the momentum space.
6 
7   Log of Set_ProExpn_VNA.c:
8 
9      8/Apr/2004  Released by T.Ozaki
10 
11 ***********************************************************************/
12 
13 #define  measure_time   0
14 
15 #include <stdio.h>
16 #include <stdlib.h>
17 #include <math.h>
18 #include <string.h>
19 #include <time.h>
20 #include <sys/types.h>
21 #include <sys/times.h>
22 #include <sys/time.h>
23 #include <sys/stat.h>
24 #include <unistd.h>
25 #include "openmx_common.h"
26 #include "mpi.h"
27 #include <omp.h>
28 
29 static double Set_ProExpn(double ****HVNA, Type_DS_VNA *****DS_VNA);
30 static double Set_VNA2(double ****HVNA, double *****HVNA2);
31 static double Set_VNA3(double *****HVNA3);
32 
33 #ifdef kcomp
34 static void Spherical_Bessel2( double x, int lmax, double *sb, double *dsb );
35 #else
36 inline void Spherical_Bessel2( double x, int lmax, double *sb, double *dsb );
37 #endif
38 
39 
Set_ProExpn_VNA(double **** HVNA,double ***** HVNA2,Type_DS_VNA ***** DS_VNA)40 double Set_ProExpn_VNA(double ****HVNA, double *****HVNA2, Type_DS_VNA *****DS_VNA)
41 {
42   double time1,time2,time3;
43 
44   /* separable form */
45 
46   time1 = Set_ProExpn(HVNA, DS_VNA);
47 
48   /* one-center (orbitals) but two-center integrals, <Phi_{LM,L'M'}|VNA> */
49 
50   time2 = Set_VNA2(HVNA, HVNA2);
51 
52   /* one-center (orbitals) but two-center integrals, <VNA|Phi_{LM,L'M'}> */
53 
54   time3 = Set_VNA3(HVNA3);
55 
56   if (measure_time){
57     printf("Time Set_ProExpn=%7.3f Set_VNA2=%7.3f Set_VNA3=%7.3f\n",time1,time2,time3);
58   }
59 
60   return time1+time2+time3;
61 }
62 
63 
64 
Set_ProExpn(double **** HVNA,Type_DS_VNA ***** DS_VNA)65 double Set_ProExpn(double ****HVNA, Type_DS_VNA *****DS_VNA)
66 {
67   /****************************************************
68    evaluate matrix elements of neutral atom potential
69    in the KB or Blochl separable form in momentum space.
70   ****************************************************/
71   static int firsttime=1;
72   int L2,L3,L,i,j,Mc_AN,Gc_AN,h_AN,Cwan,i1,j1;
73   int kk,Ls,n,j0,jg0,Mj_AN0,m,L1,GL,Mul1;
74   int tno0,tno1,tno2,p,q,po1,po,Original_Mc_AN;
75   int L0,Mul0,spe,Gh_AN,Hwan,fan,jg,k,kg,wakg,kl;
76   int size_SumNL0,size_TmpNL;
77   int Mc_AN2,Gc_AN2,num,size1,size2;
78   int *Snd_DS_VNA_Size,*Rcv_DS_VNA_Size;
79   double kmin,kmax,Sk,Dk,Normk,dmp;
80   Type_DS_VNA *tmp_array;
81   Type_DS_VNA *tmp_array2;
82   double tmp0,tmp3,tmp4,tmp5,tmp10;
83   double ****Bessel_Pro00;
84   double ****Bessel_Pro01;
85   double ene,sum,h,coe0,sj,sy,sjp,syp;
86   double rcutA,rcutB,rcut;
87   double TStime,TEtime;
88   double stime,etime;
89   double Stime_atom,Etime_atom;
90   int numprocs,myid,tag=999,ID,IDS,IDR;
91   int *VNA_List;
92   int *VNA_List2;
93   int Num_RVNA;
94   double **NLH;
95   double time0,time1,time2,time3,time4,time5;
96   dcomplex sum0,sum1,sum2;
97 
98   MPI_Status stat;
99   MPI_Request request;
100 
101   int OneD_Nloop,*OneD2Mc_AN,*OneD2h_AN;
102 
103   /* MPI */
104   MPI_Comm_size(mpi_comm_level1,&numprocs);
105   MPI_Comm_rank(mpi_comm_level1,&myid);
106 
107   dtime(&TStime);
108 
109   /****************************************************
110                  allocation of arrays:
111   ****************************************************/
112 
113   Num_RVNA = List_YOUSO[34]*(List_YOUSO[35] + 1);
114 
115   Snd_DS_VNA_Size = (int*)malloc(sizeof(int)*numprocs);
116   Rcv_DS_VNA_Size = (int*)malloc(sizeof(int)*numprocs);
117 
118   VNA_List  = (int*)malloc(sizeof(int)*(List_YOUSO[34]*(List_YOUSO[35] + 1)+2) );
119   VNA_List2 = (int*)malloc(sizeof(int)*(List_YOUSO[34]*(List_YOUSO[35] + 1)+2) );
120 
121   Bessel_Pro00 = (double****)malloc(sizeof(double***)*SpeciesNum);
122   for (spe=0; spe<SpeciesNum; spe++){
123     Bessel_Pro00[spe] = (double***)malloc(sizeof(double**)*(Spe_MaxL_Basis[spe]+1));
124     for (L0=0; L0<=Spe_MaxL_Basis[spe]; L0++){
125       Bessel_Pro00[spe][L0] = (double**)malloc(sizeof(double*)*Spe_Num_Basis[spe][L0]);
126       for (Mul0=0; Mul0<Spe_Num_Basis[spe][L0]; Mul0++){
127 	Bessel_Pro00[spe][L0][Mul0] = (double*)malloc(sizeof(double)*GL_Mesh);
128       }
129     }
130   }
131 
132   Bessel_Pro01 = (double****)malloc(sizeof(double***)*SpeciesNum);
133   for (spe=0; spe<SpeciesNum; spe++){
134     Bessel_Pro01[spe] = (double***)malloc(sizeof(double**)*(Spe_MaxL_Basis[spe]+1));
135     for (L0=0; L0<=Spe_MaxL_Basis[spe]; L0++){
136       Bessel_Pro01[spe][L0] = (double**)malloc(sizeof(double*)*Spe_Num_Basis[spe][L0]);
137       for (Mul0=0; Mul0<Spe_Num_Basis[spe][L0]; Mul0++){
138         Bessel_Pro01[spe][L0][Mul0] = (double*)malloc(sizeof(double)*GL_Mesh);
139       }
140     }
141   }
142 
143   size_TmpNL = (List_YOUSO[25]+1)*List_YOUSO[24]*
144                (2*(List_YOUSO[25]+1)+1)*Num_RVNA*(2*List_YOUSO[30]+1);
145   size_SumNL0 = (List_YOUSO[25]+1)*List_YOUSO[24]*(Num_RVNA+1);
146 
147   /* PrintMemory */
148   if (firsttime) {
149     PrintMemory("Set_ProExpn_VNA: SumNL0",sizeof(double)*size_SumNL0,NULL);
150     PrintMemory("Set_ProExpn_VNA: SumNLr0",sizeof(double)*size_SumNL0,NULL);
151     PrintMemory("Set_ProExpn_VNA: TmpNL", sizeof(dcomplex)*size_TmpNL,NULL);
152     PrintMemory("Set_ProExpn_VNA: TmpNLr",sizeof(dcomplex)*size_TmpNL,NULL);
153     PrintMemory("Set_ProExpn_VNA: TmpNLt",sizeof(dcomplex)*size_TmpNL,NULL);
154     PrintMemory("Set_ProExpn_VNA: TmpNLp",sizeof(dcomplex)*size_TmpNL,NULL);
155     firsttime=0;
156   }
157 
158   if (measure_time){
159     time1 = 0.0;
160     time2 = 0.0;
161     time3 = 0.0;
162     time4 = 0.0;
163     time5 = 0.0;
164   }
165 
166   /************************************************************
167    Spe_Num_RVPS[Hwan]     ->  Num_RVNA = List_YOUSO[34]*(List_YOUSO[35]+1)
168    Spe_VPS_List[Hwan][L]  ->  VNA_List[L] = 0,0,0,.., 1,1,1,.., 2,2,2,..,
169    List_YOUSO[19]         ->  Num_RVNA
170    List_YOUSO[30]         ->  List_YOUSO[35]
171   *************************************************************/
172 
173   L = 0;
174   for (i=0; i<=List_YOUSO[35]; i++){    /* max L */
175     for (j=0; j<List_YOUSO[34]; j++){   /* # of radial projectors */
176       VNA_List[L]  = i;
177       VNA_List2[L] = j;
178       L++;
179     }
180   }
181 
182   /************************************************************
183    pre-calculate the time consuming part in the 'time1' part
184   *************************************************************/
185 
186   kmin = Radial_kmin;
187   kmax = PAO_Nkmax;
188   Sk = kmax + kmin;
189   Dk = kmax - kmin;
190 
191   for (spe=0; spe<SpeciesNum; spe++){
192 
193     for (L0=0; L0<=Spe_MaxL_Basis[spe]; L0++){
194       for (Mul0=0; Mul0<Spe_Num_Basis[spe][L0]; Mul0++){
195 
196 #pragma omp parallel shared(Mul0,L0,spe,Bessel_Pro01,Bessel_Pro00,GL_Weight,Dk,GL_NormK) private(i,Normk,tmp10)
197 	{
198 
199 	  int OMPID,Nthrds,Nprocs;
200 
201 	  /* get info. on OpenMP */
202 
203 	  OMPID = omp_get_thread_num();
204 	  Nthrds = omp_get_num_threads();
205 	  Nprocs = omp_get_num_procs();
206 
207 	  for (i=OMPID*GL_Mesh/Nthrds; i<(OMPID+1)*GL_Mesh/Nthrds; i++){
208 
209 	    Normk = GL_NormK[i];
210 	    tmp10 = 0.50*Dk*GL_Weight[i]*Normk*Normk;
211 
212 	    Bessel_Pro00[spe][L0][Mul0][i] = RF_BesselF(spe,L0,Mul0,Normk)*tmp10;
213 	    Bessel_Pro01[spe][L0][Mul0][i] = Bessel_Pro00[spe][L0][Mul0][i]*Normk;
214 	  }
215 
216 #pragma omp flush(Bessel_Pro00,Bessel_Pro01)
217 
218 	} /* #pragma omp parallel */
219 
220       }
221     }
222   }
223 
224   /************************************************************
225     start the main calculation
226   *************************************************************/
227 
228   /* one-dimensionalize the Mc_AN and h_AN loops */
229 
230   OneD_Nloop = 0;
231   for (Mc_AN=1; Mc_AN<=Matomnum; Mc_AN++){
232     Gc_AN = M2G[Mc_AN];
233     for (h_AN=0; h_AN<=FNAN[Gc_AN]; h_AN++){
234       OneD_Nloop++;
235     }
236   }
237 
238   OneD2Mc_AN = (int*)malloc(sizeof(int)*(OneD_Nloop+1));
239   OneD2h_AN = (int*)malloc(sizeof(int)*(OneD_Nloop+1));
240 
241   OneD_Nloop = 0;
242   for (Mc_AN=1; Mc_AN<=Matomnum; Mc_AN++){
243     Gc_AN = M2G[Mc_AN];
244     for (h_AN=0; h_AN<=FNAN[Gc_AN]; h_AN++){
245       OneD2Mc_AN[OneD_Nloop] = Mc_AN;
246       OneD2h_AN[OneD_Nloop] = h_AN;
247       OneD_Nloop++;
248     }
249   }
250 
251 #pragma omp parallel shared(time_per_atom,DS_VNA,Comp2Real,Spe_VNA_Bessel,Bessel_Pro01,Bessel_Pro00,GL_NormK,List_YOUSO,VNA_List,Num_RVNA,Spe_Num_Basis,Spe_MaxL_Basis,PAO_Nkmax,atv,Gxyz,ncn,natn,Spe_Atom_Cut1,WhatSpecies,M2G,OneD2h_AN,OneD2Mc_AN,OneD_Nloop,time1,time2,time3)
252 
253   {
254     int i,j,k,l,m,num0,num1;
255     int Mc_AN,h_AN,Gc_AN,Cwan,Gh_AN;
256     int Rnh,Hwan,L0,Mul0,L,L1,M0,M1,LL,Mul1,Ls;
257     int OMPID,Nthrds,Nprocs,Nloop;
258     int Lmax,Lmax_Four_Int;
259     double Stime_atom,Etime_atom;
260     double rcutA,rcutB,rcut;
261     double dx,dy,dz;
262     double kmin,kmax,Sk,Dk;
263     double gant,SH[2],dSHt[2],dSHp[2];
264     double S_coordinate[3];
265     double r,theta,phi;
266     double Normk,tmp0,tmp1,tmp2;
267     double siT,coT,siP,coP;
268     double ***SumNL0;
269     double ***SumNLr0;
270     double *Bes00;
271     double *Bes01;
272     double tmp_SphB[30];
273     double tmp_SphBp[30];
274     double SphB[30][GL_Mesh];
275     double SphBp[30][GL_Mesh];
276     double stime,etime;
277 
278     dcomplex Ctmp1,Ctmp0,Ctmp2;
279     dcomplex CsumNL0,CsumNLr,CsumNLt,CsumNLp;
280     dcomplex CY,CYt,CYp,CY1,CYt1,CYp1,Cpow;
281     dcomplex *****TmpNL;
282     dcomplex *****TmpNLr;
283     dcomplex *****TmpNLt;
284     dcomplex *****TmpNLp;
285     dcomplex **CmatNL0;
286     dcomplex **CmatNLr;
287     dcomplex **CmatNLt;
288     dcomplex **CmatNLp;
289 
290     /* allocation of arrays */
291 
292     TmpNL = (dcomplex*****)malloc(sizeof(dcomplex****)*(List_YOUSO[25]+1));
293     for (i=0; i<(List_YOUSO[25]+1); i++){
294       TmpNL[i] = (dcomplex****)malloc(sizeof(dcomplex***)*List_YOUSO[24]);
295       for (j=0; j<List_YOUSO[24]; j++){
296 	TmpNL[i][j] = (dcomplex***)malloc(sizeof(dcomplex**)*(2*(List_YOUSO[25]+1)+1));
297 	for (k=0; k<(2*(List_YOUSO[25]+1)+1); k++){
298 	  TmpNL[i][j][k] = (dcomplex**)malloc(sizeof(dcomplex*)*Num_RVNA);
299 	  for (l=0; l<Num_RVNA; l++){
300 	    TmpNL[i][j][k][l] = (dcomplex*)malloc(sizeof(dcomplex)*(2*List_YOUSO[35]+1));
301 	  }
302 	}
303       }
304     }
305 
306     TmpNLr = (dcomplex*****)malloc(sizeof(dcomplex****)*(List_YOUSO[25]+1));
307     for (i=0; i<(List_YOUSO[25]+1); i++){
308       TmpNLr[i] = (dcomplex****)malloc(sizeof(dcomplex***)*List_YOUSO[24]);
309       for (j=0; j<List_YOUSO[24]; j++){
310 	TmpNLr[i][j] = (dcomplex***)malloc(sizeof(dcomplex**)*(2*(List_YOUSO[25]+1)+1));
311 	for (k=0; k<(2*(List_YOUSO[25]+1)+1); k++){
312 	  TmpNLr[i][j][k] = (dcomplex**)malloc(sizeof(dcomplex*)*Num_RVNA);
313 	  for (l=0; l<Num_RVNA; l++){
314 	    TmpNLr[i][j][k][l] = (dcomplex*)malloc(sizeof(dcomplex)*(2*List_YOUSO[35]+1));
315 	  }
316 	}
317       }
318     }
319 
320     TmpNLt = (dcomplex*****)malloc(sizeof(dcomplex****)*(List_YOUSO[25]+1));
321     for (i=0; i<(List_YOUSO[25]+1); i++){
322       TmpNLt[i] = (dcomplex****)malloc(sizeof(dcomplex***)*List_YOUSO[24]);
323       for (j=0; j<List_YOUSO[24]; j++){
324 	TmpNLt[i][j] = (dcomplex***)malloc(sizeof(dcomplex**)*(2*(List_YOUSO[25]+1)+1));
325 	for (k=0; k<(2*(List_YOUSO[25]+1)+1); k++){
326 	  TmpNLt[i][j][k] = (dcomplex**)malloc(sizeof(dcomplex*)*Num_RVNA);
327 	  for (l=0; l<Num_RVNA; l++){
328 	    TmpNLt[i][j][k][l] = (dcomplex*)malloc(sizeof(dcomplex)*(2*List_YOUSO[35]+1));
329 	  }
330 	}
331       }
332     }
333 
334     TmpNLp = (dcomplex*****)malloc(sizeof(dcomplex****)*(List_YOUSO[25]+1));
335     for (i=0; i<(List_YOUSO[25]+1); i++){
336       TmpNLp[i] = (dcomplex****)malloc(sizeof(dcomplex***)*List_YOUSO[24]);
337       for (j=0; j<List_YOUSO[24]; j++){
338 	TmpNLp[i][j] = (dcomplex***)malloc(sizeof(dcomplex**)*(2*(List_YOUSO[25]+1)+1));
339 	for (k=0; k<(2*(List_YOUSO[25]+1)+1); k++){
340 	  TmpNLp[i][j][k] = (dcomplex**)malloc(sizeof(dcomplex*)*Num_RVNA);
341 	  for (l=0; l<Num_RVNA; l++){
342 	    TmpNLp[i][j][k][l] = (dcomplex*)malloc(sizeof(dcomplex)*(2*List_YOUSO[35]+1));
343 	  }
344 	}
345       }
346     }
347 
348     SumNL0 = (double***)malloc(sizeof(double**)*(List_YOUSO[25]+1));
349     for (i=0; i<(List_YOUSO[25]+1); i++){
350       SumNL0[i] = (double**)malloc(sizeof(double*)*List_YOUSO[24]);
351       for (j=0; j<List_YOUSO[24]; j++){
352 	SumNL0[i][j] = (double*)malloc(sizeof(double)*Num_RVNA);
353       }
354     }
355 
356     SumNLr0 = (double***)malloc(sizeof(double**)*(List_YOUSO[25]+1));
357     for (i=0; i<(List_YOUSO[25]+1); i++){
358       SumNLr0[i] = (double**)malloc(sizeof(double*)*List_YOUSO[24]);
359       for (j=0; j<List_YOUSO[24]; j++){
360 	SumNLr0[i][j] = (double*)malloc(sizeof(double)*Num_RVNA);
361       }
362     }
363 
364     CmatNL0 = (dcomplex**)malloc(sizeof(dcomplex*)*(2*(List_YOUSO[25]+1)+1));
365     for (i=0; i<(2*(List_YOUSO[25]+1)+1); i++){
366       CmatNL0[i] = (dcomplex*)malloc(sizeof(dcomplex)*(2*List_YOUSO[35]+1));
367     }
368 
369     CmatNLr = (dcomplex**)malloc(sizeof(dcomplex*)*(2*(List_YOUSO[25]+1)+1));
370     for (i=0; i<(2*(List_YOUSO[25]+1)+1); i++){
371       CmatNLr[i] = (dcomplex*)malloc(sizeof(dcomplex)*(2*List_YOUSO[35]+1));
372     }
373 
374     CmatNLt = (dcomplex**)malloc(sizeof(dcomplex*)*(2*(List_YOUSO[25]+1)+1));
375     for (i=0; i<(2*(List_YOUSO[25]+1)+1); i++){
376       CmatNLt[i] = (dcomplex*)malloc(sizeof(dcomplex)*(2*List_YOUSO[35]+1));
377     }
378 
379     CmatNLp = (dcomplex**)malloc(sizeof(dcomplex*)*(2*(List_YOUSO[25]+1)+1));
380     for (i=0; i<(2*(List_YOUSO[25]+1)+1); i++){
381       CmatNLp[i] = (dcomplex*)malloc(sizeof(dcomplex)*(2*List_YOUSO[35]+1));
382     }
383 
384     Bes00 = (double*)malloc(sizeof(double)*GL_Mesh);
385     Bes01 = (double*)malloc(sizeof(double)*GL_Mesh);
386 
387     /* get info. on OpenMP */
388 
389     OMPID = omp_get_thread_num();
390     Nthrds = omp_get_num_threads();
391     Nprocs = omp_get_num_procs();
392 
393     /* one-dimensionalized loop */
394 
395     for (Nloop=OMPID*OneD_Nloop/Nthrds; Nloop<(OMPID+1)*OneD_Nloop/Nthrds; Nloop++){
396 
397       dtime(&Stime_atom);
398 
399       /* get Mc_AN and h_AN */
400 
401       Mc_AN = OneD2Mc_AN[Nloop];
402       h_AN  = OneD2h_AN[Nloop];
403 
404       /* set data on Mc_AN */
405 
406       Gc_AN = M2G[Mc_AN];
407       Cwan = WhatSpecies[Gc_AN];
408       rcutA = Spe_Atom_Cut1[Cwan];
409 
410       /* set data on h_AN */
411 
412       Gh_AN = natn[Gc_AN][h_AN];
413       Rnh = ncn[Gc_AN][h_AN];
414       Hwan = WhatSpecies[Gh_AN];
415       rcutB = Spe_Atom_Cut1[Hwan];
416       rcut = rcutA + rcutB;
417 
418       dx = Gxyz[Gh_AN][1] + atv[Rnh][1] - Gxyz[Gc_AN][1];
419       dy = Gxyz[Gh_AN][2] + atv[Rnh][2] - Gxyz[Gc_AN][2];
420       dz = Gxyz[Gh_AN][3] + atv[Rnh][3] - Gxyz[Gc_AN][3];
421 
422       xyz2spherical(dx,dy,dz,0.0,0.0,0.0,S_coordinate);
423       r     = S_coordinate[0];
424       theta = S_coordinate[1];
425       phi   = S_coordinate[2];
426 
427       /* for empty atoms or finite elemens basis */
428 
429       if (r<1.0e-10) r = 1.0e-10;
430 
431       /* precalculation of sin and cos */
432 
433       siT = sin(theta);
434       coT = cos(theta);
435       siP = sin(phi);
436       coP = cos(phi);
437 
438       /****************************************************
439          evaluate ovelap integrals <chi0|P> between PAOs
440          and progectors of nonlocal VNA potentials.
441       ****************************************************/
442       /****************************************************
443               \int RL(k)*RL'(k)*jl(k*R) k^2 dk^3
444       ****************************************************/
445 
446       kmin = Radial_kmin;
447       kmax = PAO_Nkmax;
448       Sk = kmax + kmin;
449       Dk = kmax - kmin;
450 
451       for (L0=0; L0<=Spe_MaxL_Basis[Cwan]; L0++){
452         for (Mul0=0; Mul0<Spe_Num_Basis[Cwan][L0]; Mul0++){
453           for (L=0; L<Num_RVNA; L++){
454             L1 = VNA_List[L];
455             for (M0=-L0; M0<=L0; M0++){
456 	      for (M1=-L1; M1<=L1; M1++){
457 		TmpNL[L0][Mul0][L0+M0][L][L1+M1] = Complex(0.0,0.0);
458 		TmpNLr[L0][Mul0][L0+M0][L][L1+M1] = Complex(0.0,0.0);
459 		TmpNLt[L0][Mul0][L0+M0][L][L1+M1] = Complex(0.0,0.0);
460 		TmpNLp[L0][Mul0][L0+M0][L][L1+M1] = Complex(0.0,0.0);
461 	      }
462 	    }
463 	  }
464 	}
465       }
466 
467       Lmax = -10;
468       for (L=0; L<Num_RVNA; L++){
469 	if (Lmax<VNA_List[L]) Lmax = VNA_List[L];
470       }
471       if (Spe_MaxL_Basis[Cwan]<Lmax)
472 	Lmax_Four_Int = 2*Lmax;
473       else
474         Lmax_Four_Int = 2*Spe_MaxL_Basis[Cwan];
475 
476       /* calculate SphB and SphBp */
477 #ifdef kcomp
478 #else
479 #pragma forceinline recursive
480 #endif
481       for (i=0; i<GL_Mesh; i++){
482         Normk = GL_NormK[i];
483         Spherical_Bessel2(Normk*r,Lmax_Four_Int,tmp_SphB,tmp_SphBp);
484         for(LL=0; LL<=Lmax_Four_Int; LL++){
485           SphB[LL][i]  = tmp_SphB[LL];
486           SphBp[LL][i] = tmp_SphBp[LL];
487 	}
488       }
489 
490       /* LL loop */
491 
492       for(LL=0; LL<=Lmax_Four_Int; LL++){
493 
494 	for (L0=0; L0<=Spe_MaxL_Basis[Cwan]; L0++){
495 	  for (Mul0=0; Mul0<Spe_Num_Basis[Cwan][L0]; Mul0++){
496 	    for (L=0; L<Num_RVNA; L++){
497 	      SumNL0[L0][Mul0][L] = 0.0;
498 	      SumNLr0[L0][Mul0][L] = 0.0;
499 	    }
500 	  }
501 	}
502 
503         /* Gauss-Legendre quadrature */
504 
505         if (measure_time) dtime(&stime);
506 
507         for (L0=0; L0<=Spe_MaxL_Basis[Cwan]; L0++){
508 	  for (Mul0=0; Mul0<Spe_Num_Basis[Cwan][L0]; Mul0++){
509 
510             for (i=0; i<GL_Mesh; i++){
511               Bes00[i] = Bessel_Pro00[Cwan][L0][Mul0][i]*SphB[LL][i];
512               Bes01[i] = Bessel_Pro01[Cwan][L0][Mul0][i]*SphBp[LL][i];
513             }
514 
515             L = 0;
516             for (L1=0; L1<=List_YOUSO[35]; L1++){
517               for (Mul1=0; Mul1<List_YOUSO[34]; Mul1++){
518 
519                 tmp1 = 0.0;
520                 tmp2 = 0.0;
521 
522                 for (i=0; i<GL_Mesh; i++){
523                   tmp1 += Bes00[i]*Spe_VNA_Bessel[Hwan][L1][Mul1][i];
524                   tmp2 += Bes01[i]*Spe_VNA_Bessel[Hwan][L1][Mul1][i];
525 		}
526 
527 		SumNL0[L0][Mul0][L]  = tmp1;
528 		SumNLr0[L0][Mul0][L] = tmp2;
529 
530                 L++;
531 	      }
532 	    }
533 	  }
534 	}
535 
536         if (measure_time && OMPID==0){
537           dtime(&etime);
538           time1 += etime - stime;
539 	}
540 
541 	/* derivatives of "on site" */
542 
543 	if (h_AN==0){
544 	  for (L0=0; L0<=Spe_MaxL_Basis[Cwan]; L0++){
545 	    for (Mul0=0; Mul0<Spe_Num_Basis[Cwan][L0]; Mul0++){
546 	      for (L=0; L<Num_RVNA; L++){
547 		SumNLr0[L0][Mul0][L] = 0.0;
548 	      }
549 	    }
550 	  }
551 	}
552 
553 	/****************************************************
554          for "overlap",
555 	    sum_m 8*(-i)^{-L0+L1+l}*
556 	          C_{L0,-M0,L1,M1,l,m}*
557 	          \int RL(k)*RL'(k)*jl(k*R) k^2 dk^3,
558 	****************************************************/
559 
560         if (measure_time && OMPID==0) dtime(&stime);
561 
562 	for(m=-LL; m<=LL; m++){
563 
564 	  ComplexSH(LL,m,theta,phi,SH,dSHt,dSHp);
565 
566 	  SH[1]   = -SH[1];
567 	  dSHt[1] = -dSHt[1];
568 	  dSHp[1] = -dSHp[1];
569 
570 	  CY  = Complex(SH[0],SH[1]);
571 	  CYt = Complex(dSHt[0],dSHt[1]);
572 	  CYp = Complex(dSHp[0],dSHp[1]);
573 
574 	  for (L0=0; L0<=Spe_MaxL_Basis[Cwan]; L0++){
575 	    for (Mul0=0; Mul0<Spe_Num_Basis[Cwan][L0]; Mul0++){
576 	      for (L=0; L<Num_RVNA; L++){
577 
578 		L1 = VNA_List[L];
579 		Ls = -L0 + L1 + LL;
580 
581   	        if ( abs(L1-LL)<=L0 && L0<=(L1+LL) ){
582 
583 		  Cpow = Im_pow(-1,Ls);
584 		  CY1  = Cmul(Cpow,CY);
585 		  CYt1 = Cmul(Cpow,CYt);
586 		  CYp1 = Cmul(Cpow,CYp);
587 
588 		  for (M0=-L0; M0<=L0; M0++){
589 
590 		    M1 = M0 - m;
591 
592                     if (abs(M1)<=L1){
593 
594 		      gant = Gaunt(L0,M0,L1,M1,LL,m);
595 		      tmp2 = gant*SumNL0[L0][Mul0][L];
596 
597 		      /* S */
598 
599 		      TmpNL[L0][Mul0][L0+M0][L][L1+M1].r += CY1.r*tmp2;
600 		      TmpNL[L0][Mul0][L0+M0][L][L1+M1].i += CY1.i*tmp2;
601 
602 		      /* dS/dr */
603 
604 		      tmp0 = gant*SumNLr0[L0][Mul0][L];
605 
606 		      TmpNLr[L0][Mul0][L0+M0][L][L1+M1].r += CY1.r*tmp0;
607 		      TmpNLr[L0][Mul0][L0+M0][L][L1+M1].i += CY1.i*tmp0;
608 
609 		      /* dS/dt */
610 
611 		      TmpNLt[L0][Mul0][L0+M0][L][L1+M1].r += CYt1.r*tmp2;
612 		      TmpNLt[L0][Mul0][L0+M0][L][L1+M1].i += CYt1.i*tmp2;
613 
614 		      /* dS/dp */
615 
616 		      TmpNLp[L0][Mul0][L0+M0][L][L1+M1].r += CYp1.r*tmp2;
617 		      TmpNLp[L0][Mul0][L0+M0][L][L1+M1].i += CYp1.i*tmp2;
618 
619 		    }
620 		  }
621 		}
622 
623 	      }
624 	    }
625 	  }
626 	} /* m */
627 
628         if (measure_time && OMPID==0){
629           dtime(&etime);
630           time2 += etime - stime;
631 	}
632 
633       } /* LL */
634 
635       /****************************************************
636                         Complex to Real
637       ****************************************************/
638 
639       if (measure_time) dtime(&stime);
640 
641       num0 = 0;
642       for (L0=0; L0<=Spe_MaxL_Basis[Cwan]; L0++){
643 	for (Mul0=0; Mul0<Spe_Num_Basis[Cwan][L0]; Mul0++){
644 
645 	  num1 = 0;
646 	  for (L=0; L<Num_RVNA; L++){
647 	    L1 = VNA_List[L];
648 
649 	    for (M0=-L0; M0<=L0; M0++){
650 	      for (M1=-L1; M1<=L1; M1++){
651 
652 		CsumNL0 = Complex(0.0,0.0);
653 		CsumNLr = Complex(0.0,0.0);
654 		CsumNLt = Complex(0.0,0.0);
655 		CsumNLp = Complex(0.0,0.0);
656 
657 		for (k=-L0; k<=L0; k++){
658 
659 		  Ctmp1 = Conjg(Comp2Real[L0][L0+M0][L0+k]);
660 
661 		  /* S */
662 
663 		  Ctmp0 = TmpNL[L0][Mul0][L0+k][L][L1+M1];
664 		  Ctmp2 = Cmul(Ctmp1,Ctmp0);
665 		  CsumNL0 = Cadd(CsumNL0,Ctmp2);
666 
667 		  /* dS/dr */
668 
669 		  Ctmp0 = TmpNLr[L0][Mul0][L0+k][L][L1+M1];
670 		  Ctmp2 = Cmul(Ctmp1,Ctmp0);
671 		  CsumNLr = Cadd(CsumNLr,Ctmp2);
672 
673 		  /* dS/dt */
674 
675 		  Ctmp0 = TmpNLt[L0][Mul0][L0+k][L][L1+M1];
676 		  Ctmp2 = Cmul(Ctmp1,Ctmp0);
677 		  CsumNLt = Cadd(CsumNLt,Ctmp2);
678 
679 		  /* dS/dp */
680 
681 		  Ctmp0 = TmpNLp[L0][Mul0][L0+k][L][L1+M1];
682 		  Ctmp2 = Cmul(Ctmp1,Ctmp0);
683 		  CsumNLp = Cadd(CsumNLp,Ctmp2);
684 
685 		}
686 
687 		CmatNL0[L0+M0][L1+M1] = CsumNL0;
688 		CmatNLr[L0+M0][L1+M1] = CsumNLr;
689 		CmatNLt[L0+M0][L1+M1] = CsumNLt;
690 		CmatNLp[L0+M0][L1+M1] = CsumNLp;
691 
692 	      }
693 	    }
694 
695 	    for (M0=-L0; M0<=L0; M0++){
696 	      for (M1=-L1; M1<=L1; M1++){
697 
698 		CsumNL0 = Complex(0.0,0.0);
699 		CsumNLr = Complex(0.0,0.0);
700 		CsumNLt = Complex(0.0,0.0);
701 		CsumNLp = Complex(0.0,0.0);
702 
703 		for (k=-L1; k<=L1; k++){
704 
705 		  /* S */
706 
707 		  CsumNL0.r += (CmatNL0[L0+M0][L1+k].r*Comp2Real[L1][L1+M1][L1+k].r
708 				- CmatNL0[L0+M0][L1+k].i*Comp2Real[L1][L1+M1][L1+k].i);
709 
710 		  CsumNL0.i += (CmatNL0[L0+M0][L1+k].r*Comp2Real[L1][L1+M1][L1+k].i
711 				+ CmatNL0[L0+M0][L1+k].i*Comp2Real[L1][L1+M1][L1+k].r);
712 
713 		  /* dS/dr */
714 
715 		  CsumNLr.r += (CmatNLr[L0+M0][L1+k].r*Comp2Real[L1][L1+M1][L1+k].r
716 				- CmatNLr[L0+M0][L1+k].i*Comp2Real[L1][L1+M1][L1+k].i);
717 
718 		  CsumNLr.i += (CmatNLr[L0+M0][L1+k].r*Comp2Real[L1][L1+M1][L1+k].i
719 				+ CmatNLr[L0+M0][L1+k].i*Comp2Real[L1][L1+M1][L1+k].r);
720 
721 		  /* dS/dt */
722 
723 		  CsumNLt.r += (CmatNLt[L0+M0][L1+k].r*Comp2Real[L1][L1+M1][L1+k].r
724 				- CmatNLt[L0+M0][L1+k].i*Comp2Real[L1][L1+M1][L1+k].i);
725 
726 		  CsumNLt.i += (CmatNLt[L0+M0][L1+k].r*Comp2Real[L1][L1+M1][L1+k].i
727 				+ CmatNLt[L0+M0][L1+k].i*Comp2Real[L1][L1+M1][L1+k].r);
728 
729 		  /* dS/dp */
730 
731 		  CsumNLp.r += (CmatNLp[L0+M0][L1+k].r*Comp2Real[L1][L1+M1][L1+k].r
732 				- CmatNLp[L0+M0][L1+k].i*Comp2Real[L1][L1+M1][L1+k].i);
733 
734 		  CsumNLp.i += (CmatNLp[L0+M0][L1+k].r*Comp2Real[L1][L1+M1][L1+k].i
735 				+ CmatNLp[L0+M0][L1+k].i*Comp2Real[L1][L1+M1][L1+k].r);
736 
737 		}
738 
739 		DS_VNA[0][Mc_AN][h_AN][num0+L0+M0][num1+L1+M1] = 8.0*(Type_DS_VNA)CsumNL0.r;
740 
741 		if (h_AN!=0){
742 
743 		  if (fabs(siT)<10e-14){
744 
745 		    DS_VNA[1][Mc_AN][h_AN][num0+L0+M0][num1+L1+M1] =
746 		      -8.0*(Type_DS_VNA)(siT*coP*CsumNLr.r + coT*coP/r*CsumNLt.r);
747 
748 		    DS_VNA[2][Mc_AN][h_AN][num0+L0+M0][num1+L1+M1] =
749 		      -8.0*(Type_DS_VNA)(siT*siP*CsumNLr.r + coT*siP/r*CsumNLt.r);
750 
751 		    DS_VNA[3][Mc_AN][h_AN][num0+L0+M0][num1+L1+M1] =
752 		      -8.0*(Type_DS_VNA)(coT*CsumNLr.r - siT/r*CsumNLt.r);
753 		  }
754 
755 		  else{
756 
757 		    DS_VNA[1][Mc_AN][h_AN][num0+L0+M0][num1+L1+M1] =
758 		      -8.0*(Type_DS_VNA)(siT*coP*CsumNLr.r + coT*coP/r*CsumNLt.r
759 					- siP/siT/r*CsumNLp.r);
760 
761 		    DS_VNA[2][Mc_AN][h_AN][num0+L0+M0][num1+L1+M1] =
762 		      -8.0*(Type_DS_VNA)(siT*siP*CsumNLr.r + coT*siP/r*CsumNLt.r
763 					+ coP/siT/r*CsumNLp.r);
764 
765 		    DS_VNA[3][Mc_AN][h_AN][num0+L0+M0][num1+L1+M1] =
766 		      -8.0*(Type_DS_VNA)(coT*CsumNLr.r - siT/r*CsumNLt.r);
767 		  }
768 
769 		}
770 
771 		else{
772 		  DS_VNA[1][Mc_AN][h_AN][num0+L0+M0][num1+L1+M1] = 0.0;
773 		  DS_VNA[2][Mc_AN][h_AN][num0+L0+M0][num1+L1+M1] = 0.0;
774 		  DS_VNA[3][Mc_AN][h_AN][num0+L0+M0][num1+L1+M1] = 0.0;
775 		}
776 
777 	      }
778 	    }
779 
780 	    num1 = num1 + 2*L1 + 1;
781 	  }
782 
783 	  num0 = num0 + 2*L0 + 1;
784 	} /* M0 */
785       } /* L0 */
786 
787       if (measure_time && OMPID==0){
788 	dtime(&etime);
789 	time3 += etime - stime;
790       }
791 
792       dtime(&Etime_atom);
793       time_per_atom[Gc_AN] += Etime_atom - Stime_atom;
794 
795     } /* Mc_AN */
796 
797     /* freeing of arrays */
798 
799     for (i=0; i<(2*(List_YOUSO[25]+1)+1); i++){
800       free(CmatNL0[i]);
801     }
802     free(CmatNL0);
803 
804     for (i=0; i<(2*(List_YOUSO[25]+1)+1); i++){
805       free(CmatNLr[i]);
806     }
807     free(CmatNLr);
808 
809     for (i=0; i<(2*(List_YOUSO[25]+1)+1); i++){
810       free(CmatNLt[i]);
811     }
812     free(CmatNLt);
813 
814     for (i=0; i<(2*(List_YOUSO[25]+1)+1); i++){
815       free(CmatNLp[i]);
816     }
817     free(CmatNLp);
818 
819     free(Bes00);
820     free(Bes01);
821 
822     for (i=0; i<(List_YOUSO[25]+1); i++){
823       for (j=0; j<List_YOUSO[24]; j++){
824 	free(SumNLr0[i][j]);
825       }
826       free(SumNLr0[i]);
827     }
828     free(SumNLr0);
829 
830     for (i=0; i<(List_YOUSO[25]+1); i++){
831       for (j=0; j<List_YOUSO[24]; j++){
832 	free(SumNL0[i][j]);
833       }
834       free(SumNL0[i]);
835     }
836     free(SumNL0);
837 
838     for (i=0; i<(List_YOUSO[25]+1); i++){
839       for (j=0; j<List_YOUSO[24]; j++){
840 	for (k=0; k<(2*(List_YOUSO[25]+1)+1); k++){
841 	  for (l=0; l<Num_RVNA; l++){
842 	    free(TmpNLp[i][j][k][l]);
843 	  }
844           free(TmpNLp[i][j][k]);
845 	}
846         free(TmpNLp[i][j]);
847       }
848       free(TmpNLp[i]);
849     }
850     free(TmpNLp);
851 
852     for (i=0; i<(List_YOUSO[25]+1); i++){
853       for (j=0; j<List_YOUSO[24]; j++){
854 	for (k=0; k<(2*(List_YOUSO[25]+1)+1); k++){
855 	  for (l=0; l<Num_RVNA; l++){
856 	    free(TmpNLt[i][j][k][l]);
857 	  }
858 	  free(TmpNLt[i][j][k]);
859 	}
860 	free(TmpNLt[i][j]);
861       }
862       free(TmpNLt[i]);
863     }
864     free(TmpNLt);
865 
866     for (i=0; i<(List_YOUSO[25]+1); i++){
867       for (j=0; j<List_YOUSO[24]; j++){
868 	for (k=0; k<(2*(List_YOUSO[25]+1)+1); k++){
869 	  for (l=0; l<Num_RVNA; l++){
870 	    free(TmpNLr[i][j][k][l]);
871 	  }
872 	  free(TmpNLr[i][j][k]);
873 	}
874 	free(TmpNLr[i][j]);
875       }
876       free(TmpNLr[i]);
877     }
878     free(TmpNLr);
879 
880     for (i=0; i<(List_YOUSO[25]+1); i++){
881       for (j=0; j<List_YOUSO[24]; j++){
882 	for (k=0; k<(2*(List_YOUSO[25]+1)+1); k++){
883 	  for (l=0; l<Num_RVNA; l++){
884 	    free(TmpNL[i][j][k][l]);
885 	  }
886 	  free(TmpNL[i][j][k]);
887 	}
888 	free(TmpNL[i][j]);
889       }
890       free(TmpNL[i]);
891     }
892     free(TmpNL);
893 
894 #pragma omp flush(DS_VNA)
895 
896   } /* #pragma omp parallel */
897 
898   /*******************************************************
899    *******************************************************
900      multiplying overlap integrals WITH COMMUNICATION
901 
902      MPI: communicate only for k=0
903      DS_VNA
904   *******************************************************
905   *******************************************************/
906 
907   MPI_Barrier(mpi_comm_level1);
908   if (measure_time) dtime(&stime);
909 
910   /* allocation of array */
911 
912   NLH = (double**)malloc(sizeof(double*)*List_YOUSO[7]);
913   for (i=0; i<List_YOUSO[7]; i++){
914     NLH[i] = (double*)malloc(sizeof(double)*List_YOUSO[7]);
915   }
916 
917   for (ID=0; ID<numprocs; ID++){
918     F_Snd_Num_WK[ID] = 0;
919     F_Rcv_Num_WK[ID] = 0;
920   }
921 
922   do {
923 
924     /***********************************
925           set the size of data
926     ************************************/
927 
928     for (ID=0; ID<numprocs; ID++){
929 
930       IDS = (myid + ID) % numprocs;
931       IDR = (myid - ID + numprocs) % numprocs;
932 
933       /* find the data size to send the block data */
934 
935       if ( 0<(F_Snd_Num[IDS]-F_Snd_Num_WK[IDS]) ){
936 
937 	size1 = 0;
938 	n = F_Snd_Num_WK[IDS];
939 
940 	Mc_AN = Snd_MAN[IDS][n];
941 	Gc_AN = Snd_GAN[IDS][n];
942 	Cwan = WhatSpecies[Gc_AN];
943 	tno1 = Spe_Total_NO[Cwan];
944 	for (h_AN=0; h_AN<=FNAN[Gc_AN]; h_AN++){
945 	  Gh_AN = natn[Gc_AN][h_AN];
946 	  Hwan = WhatSpecies[Gh_AN];
947 	  tno2 = (List_YOUSO[35]+1)*(List_YOUSO[35]+1)*List_YOUSO[34];
948 	  size1 += tno1*tno2;
949 	}
950 
951 	Snd_DS_VNA_Size[IDS] = size1;
952 	MPI_Isend(&size1, 1, MPI_INT, IDS, tag, mpi_comm_level1, &request);
953       }
954       else{
955 	Snd_DS_VNA_Size[IDS] = 0;
956       }
957 
958       /* receiving of the size of the data */
959 
960       if ( 0<(F_Rcv_Num[IDR]-F_Rcv_Num_WK[IDR]) ){
961 	MPI_Recv(&size2, 1, MPI_INT, IDR, tag, mpi_comm_level1, &stat);
962 	Rcv_DS_VNA_Size[IDR] = size2;
963       }
964       else{
965 	Rcv_DS_VNA_Size[IDR] = 0;
966       }
967 
968       if ( 0<(F_Snd_Num[IDS]-F_Snd_Num_WK[IDS]) )  MPI_Wait(&request,&stat);
969 
970     } /* ID */
971 
972     /***********************************
973                data transfer
974     ************************************/
975 
976     for (ID=0; ID<numprocs; ID++){
977 
978       IDS = (myid + ID) % numprocs;
979       IDR = (myid - ID + numprocs) % numprocs;
980 
981       /******************************
982          sending of the data
983       ******************************/
984 
985       if ( 0<(F_Snd_Num[IDS]-F_Snd_Num_WK[IDS]) ){
986 
987 	size1 = Snd_DS_VNA_Size[IDS];
988 
989 	/*
990 	printf("S myid=%2d ID=%2d IDS=%2d IDR=%2d  size1=%2d\n",myid,ID,IDS,IDR,size1);fflush(stdout);
991 	*/
992 
993 	/* allocation of the array */
994 
995 	tmp_array = (Type_DS_VNA*)malloc(sizeof(Type_DS_VNA)*size1);
996 
997 	/* multidimentional array to the vector array */
998 
999 	num = 0;
1000 	n = F_Snd_Num_WK[IDS];
1001 
1002 	Mc_AN = Snd_MAN[IDS][n];
1003 	Gc_AN = Snd_GAN[IDS][n];
1004 	Cwan = WhatSpecies[Gc_AN];
1005 	tno1 = Spe_Total_NO[Cwan];
1006 	for (h_AN=0; h_AN<=FNAN[Gc_AN]; h_AN++){
1007 	  Gh_AN = natn[Gc_AN][h_AN];
1008 	  Hwan = WhatSpecies[Gh_AN];
1009 	  tno2 = (List_YOUSO[35]+1)*(List_YOUSO[35]+1)*List_YOUSO[34];
1010 
1011 	  for (i=0; i<tno1; i++){
1012 	    for (j=0; j<tno2; j++){
1013 	      tmp_array[num] = DS_VNA[0][Mc_AN][h_AN][i][j];
1014 	      num++;
1015 	    }
1016 	  }
1017 	}
1018 
1019 	MPI_Isend(&tmp_array[0], size1, MPI_Type_DS_VNA, IDS, tag, mpi_comm_level1, &request);
1020 
1021       }
1022 
1023       /******************************
1024         receiving of the block data
1025       ******************************/
1026 
1027       if ( 0<(F_Rcv_Num[IDR]-F_Rcv_Num_WK[IDR]) ){
1028 
1029 	size2 = Rcv_DS_VNA_Size[IDR];
1030 
1031 	/*
1032 	printf("R myid=%2d ID=%2d IDS=%2d IDR=%2d  size2=%2d\n",myid,ID,IDS,IDR,size2);fflush(stdout);
1033 	*/
1034 
1035 	tmp_array2 = (Type_DS_VNA*)malloc(sizeof(Type_DS_VNA)*size2);
1036 	MPI_Recv(&tmp_array2[0], size2, MPI_Type_DS_VNA, IDR, tag, mpi_comm_level1, &stat);
1037 
1038 	/* store */
1039 
1040 	num = 0;
1041 	n = F_Rcv_Num_WK[IDR];
1042 	Original_Mc_AN = F_TopMAN[IDR] + n;
1043 
1044 	Gc_AN = Rcv_GAN[IDR][n];
1045 	Cwan = WhatSpecies[Gc_AN];
1046 	tno1 = Spe_Total_NO[Cwan];
1047 	for (h_AN=0; h_AN<=FNAN[Gc_AN]; h_AN++){
1048 	  Gh_AN = natn[Gc_AN][h_AN];
1049 	  Hwan = WhatSpecies[Gh_AN];
1050 	  tno2 = (List_YOUSO[35]+1)*(List_YOUSO[35]+1)*List_YOUSO[34];
1051 	  for (i=0; i<tno1; i++){
1052 	    for (j=0; j<tno2; j++){
1053 	      DS_VNA[0][Matomnum+1][h_AN][i][j] = tmp_array2[num];
1054 	      num++;
1055 	    }
1056 	  }
1057 	}
1058 
1059 	/* free tmp_array2 */
1060 	free(tmp_array2);
1061 
1062 	/*****************************************
1063               multiplying overlap integrals
1064 	*****************************************/
1065 
1066 	for (Mc_AN=1; Mc_AN<=Matomnum; Mc_AN++){
1067 
1068 	  dtime(&Stime_atom);
1069 
1070 	  Gc_AN = M2G[Mc_AN];
1071 	  Cwan = WhatSpecies[Gc_AN];
1072 	  fan = FNAN[Gc_AN];
1073 	  rcutA = Spe_Atom_Cut1[Cwan];
1074 
1075 	  n = F_Rcv_Num_WK[IDR];
1076 	  jg = Rcv_GAN[IDR][n];
1077 
1078 	  for (j0=0; j0<=fan; j0++){
1079 
1080 	    jg0 = natn[Gc_AN][j0];
1081 	    Mj_AN0 = F_G2M[jg0];
1082 
1083   	    po = 0;
1084 	    if (Original_Mc_AN==Mj_AN0){
1085 	      po = 1;
1086 	      j = j0;
1087 	    }
1088 
1089 	    if (po==1){
1090 
1091 	      Hwan = WhatSpecies[jg];
1092 	      rcutB = Spe_Atom_Cut1[Hwan];
1093 	      rcut = rcutA + rcutB;
1094 
1095 	      for (k=0; k<=fan; k++){
1096 
1097 		kg = natn[Gc_AN][k];
1098 		wakg = WhatSpecies[kg];
1099 		kl = RMI1[Mc_AN][j][k];
1100 
1101 		if (0<=kl){
1102 #pragma omp parallel for private(m, n, sum, L ,L1, GL, Mul1, ene, L2 ,tmp0, L3) default(shared) if(Spe_Total_NO[Cwan] > 8)
1103 		  for (m=0; m<Spe_Total_NO[Cwan]; m++){
1104 		    for (n=0; n<Spe_Total_NO[Hwan]; n++){
1105 
1106 		      sum = 0.0;
1107 		      L = 0;
1108 
1109 		      for (L1=0; L1<Num_RVNA; L1++){
1110 
1111 			GL = VNA_List[L1];
1112 			Mul1 = VNA_List2[L1];
1113 
1114 			ene = VNA_proj_ene[wakg][GL][Mul1];
1115 			L2 = 2*VNA_List[L1];
1116 
1117 			tmp0 = 0.0;
1118 
1119 			for (L3=0; L3<=L2; L3++){
1120 
1121 			  tmp0 += DS_VNA[0][Mc_AN][k][m][L]*DS_VNA[0][Matomnum+1][kl][n][L];
1122 
1123 			  L++;
1124 			}
1125 
1126 			sum += ene*tmp0;
1127 
1128 		      }
1129 
1130 		      if (k==0)  NLH[m][n]  = sum;
1131 		      else       NLH[m][n] += sum;
1132 
1133 		    } /* n */
1134 		  } /* m */
1135 		} /* if (0<=kl)*/
1136 	      } /* k */
1137 
1138 	      /****************************************************
1139                             NLH to HVNA
1140 	      ****************************************************/
1141 
1142 	      dmp = dampingF(rcut,Dis[Gc_AN][j]);
1143 
1144 	      for (i1=0; i1<Spe_Total_NO[Cwan]; i1++){
1145 		for (j1=0; j1<Spe_Total_NO[Hwan]; j1++){
1146 		  HVNA[Mc_AN][j][i1][j1] = dmp*NLH[i1][j1];
1147 		}
1148 	      }
1149 
1150 	    } /* if (po==1) */
1151 
1152 	  } /* j */
1153 
1154 	  dtime(&Etime_atom);
1155 	  time_per_atom[Gc_AN] += Etime_atom - Stime_atom;
1156 
1157 	} /* Mc_AN */
1158 
1159 	/********************************************
1160             increment of F_Rcv_Num_WK[IDR]
1161 	********************************************/
1162 
1163 	F_Rcv_Num_WK[IDR]++;
1164 
1165       }
1166 
1167       if ( 0<(F_Snd_Num[IDS]-F_Snd_Num_WK[IDS]) ) {
1168 	MPI_Wait(&request,&stat);
1169 	free(tmp_array);  /* freeing of array */
1170 
1171 	/********************************************
1172              increment of F_Snd_Num_WK[IDS]
1173 	********************************************/
1174 
1175 	F_Snd_Num_WK[IDS]++;
1176       }
1177 
1178     } /* ID */
1179 
1180     /*****************************************************
1181       check whether all the communications have finished
1182     *****************************************************/
1183 
1184     po = 0;
1185     for (ID=0; ID<numprocs; ID++){
1186 
1187       IDS = (myid + ID) % numprocs;
1188       IDR = (myid - ID + numprocs) % numprocs;
1189 
1190       if ( 0<(F_Snd_Num[IDS]-F_Snd_Num_WK[IDS]) ) po += F_Snd_Num[IDS]-F_Snd_Num_WK[IDS];
1191       if ( 0<(F_Rcv_Num[IDR]-F_Rcv_Num_WK[IDR]) ) po += F_Rcv_Num[IDR]-F_Rcv_Num_WK[IDR];
1192     }
1193 
1194   } while (po!=0);
1195 
1196   /* freeing of arrays */
1197 
1198   for (i=0; i<List_YOUSO[7]; i++){
1199     free(NLH[i]);
1200   }
1201   free(NLH);
1202 
1203   if (measure_time){
1204     dtime(&etime);
1205     time4 += etime - stime;
1206   }
1207 
1208   /*******************************************************
1209    *******************************************************
1210     multiplying overlap integrals WITHOUT COMMUNICATION
1211   *******************************************************
1212   *******************************************************/
1213 
1214   if (measure_time) dtime(&stime);
1215 
1216 #pragma omp parallel shared(List_YOUSO,time_per_atom,HVNA,Dis,DS_VNA,VNA_proj_ene,VNA_List2,VNA_List,Num_RVNA,Spe_Total_NO,RMI1,F_G2M,natn,Spe_Atom_Cut1,FNAN,WhatSpecies,M2G,Matomnum)
1217   {
1218 
1219     int OMPID,Nthrds,Nprocs;
1220     int Mc_AN,Gc_AN,Cwan,fan,Mj_AN,Hwan;
1221     int L,L1,GL,Mul1,L2,L3,i1,j1;
1222     int k,kg,wakg,kl,i,j,jg,m,n;
1223     double **NLH;
1224     double rcutA,rcutB,rcut,sum,ene,tmp0,dmp;
1225     double Stime_atom,Etime_atom;
1226 
1227     /* allocation of array */
1228 
1229     NLH = (double**)malloc(sizeof(double*)*List_YOUSO[7]);
1230     for (i=0; i<List_YOUSO[7]; i++){
1231       NLH[i] = (double*)malloc(sizeof(double)*List_YOUSO[7]);
1232     }
1233 
1234     /* get info. on OpenMP */
1235 
1236     OMPID = omp_get_thread_num();
1237     Nthrds = omp_get_num_threads();
1238     Nprocs = omp_get_num_procs();
1239 
1240     for (Mc_AN=(OMPID*Matomnum/Nthrds+1); Mc_AN<((OMPID+1)*Matomnum/Nthrds+1); Mc_AN++){
1241 
1242       dtime(&Stime_atom);
1243 
1244       Gc_AN = M2G[Mc_AN];
1245       Cwan = WhatSpecies[Gc_AN];
1246       fan = FNAN[Gc_AN];
1247       rcutA = Spe_Atom_Cut1[Cwan];
1248 
1249       for (j=0; j<=fan; j++){
1250 
1251 	jg = natn[Gc_AN][j];
1252 	Mj_AN = F_G2M[jg];
1253 
1254 	if (Mj_AN<=Matomnum){
1255 
1256 	  Hwan = WhatSpecies[jg];
1257 	  rcutB = Spe_Atom_Cut1[Hwan];
1258 	  rcut = rcutA + rcutB;
1259 
1260 	  for (k=0; k<=fan; k++){
1261 
1262 	    kg = natn[Gc_AN][k];
1263 	    wakg = WhatSpecies[kg];
1264 	    kl = RMI1[Mc_AN][j][k];
1265 
1266 	    if (0<=kl){
1267 
1268 	      for (m=0; m<Spe_Total_NO[Cwan]; m++){
1269 		for (n=0; n<Spe_Total_NO[Hwan]; n++){
1270 
1271 		  sum = 0.0;
1272 		  L = 0;
1273 
1274 		  for (L1=0; L1<Num_RVNA; L1++){
1275 
1276 		    GL = VNA_List[L1];
1277 		    Mul1 = VNA_List2[L1];
1278 
1279 		    ene = VNA_proj_ene[wakg][GL][Mul1];
1280 		    L2 = 2*VNA_List[L1];
1281 
1282 		    tmp0 = 0.0;
1283 
1284 		    for (L3=0; L3<=L2; L3++){
1285 		      tmp0 += DS_VNA[0][Mc_AN][k][m][L]*DS_VNA[0][Mj_AN][kl][n][L];
1286 		      L++;
1287 		    }
1288 
1289 		    sum += ene*tmp0;
1290 
1291 		  }
1292 
1293 		  if (k==0)  NLH[m][n]  = sum;
1294 		  else       NLH[m][n] += sum;
1295 
1296 		}
1297 	      }
1298 	    }
1299 	  } /* k */
1300 
1301 	  /****************************************************
1302                             NLH to HVNA
1303 	  ****************************************************/
1304 
1305 	  dmp = dampingF(rcut,Dis[Gc_AN][j]);
1306 
1307 	  for (i1=0; i1<Spe_Total_NO[Cwan]; i1++){
1308 	    for (j1=0; j1<Spe_Total_NO[Hwan]; j1++){
1309 	      HVNA[Mc_AN][j][i1][j1] = dmp*NLH[i1][j1];
1310 	    }
1311 	  }
1312 
1313 	} /* if (Mj_AN<=Matomnum) */
1314       } /* j */
1315 
1316       dtime(&Etime_atom);
1317       time_per_atom[Gc_AN] += Etime_atom - Stime_atom;
1318 
1319     } /* Mc_AN */
1320 
1321     /* freeing of arrays */
1322 
1323     for (i=0; i<List_YOUSO[7]; i++){
1324       free(NLH[i]);
1325     }
1326     free(NLH);
1327 
1328 #pragma omp flush(HVNA)
1329 
1330   } /* #pragma omp parallel */
1331 
1332   if (measure_time){
1333     dtime(&etime);
1334     time5 += etime - stime;
1335   }
1336 
1337   if (measure_time){
1338     printf("Set_ProExpn_VNA myid=%2d time1=%7.3f time2=%7.3f time3=%7.3f time4=%7.3f time5=%7.3f\n",
1339             myid,time1,time2,time3,time4,time5);fflush(stdout);
1340   }
1341 
1342   /****************************************************
1343                    freeing of arrays:
1344   ****************************************************/
1345 
1346   free(Snd_DS_VNA_Size);
1347   free(Rcv_DS_VNA_Size);
1348 
1349   free(VNA_List);
1350   free(VNA_List2);
1351 
1352   for (spe=0; spe<SpeciesNum; spe++){
1353     for (L0=0; L0<=Spe_MaxL_Basis[spe]; L0++){
1354       for (Mul0=0; Mul0<Spe_Num_Basis[spe][L0]; Mul0++){
1355         free(Bessel_Pro00[spe][L0][Mul0]);
1356       }
1357       free(Bessel_Pro00[spe][L0]);
1358     }
1359     free(Bessel_Pro00[spe]);
1360   }
1361   free(Bessel_Pro00);
1362 
1363   for (spe=0; spe<SpeciesNum; spe++){
1364     for (L0=0; L0<=Spe_MaxL_Basis[spe]; L0++){
1365       for (Mul0=0; Mul0<Spe_Num_Basis[spe][L0]; Mul0++){
1366         free(Bessel_Pro01[spe][L0][Mul0]);
1367       }
1368       free(Bessel_Pro01[spe][L0]);
1369     }
1370     free(Bessel_Pro01[spe]);
1371   }
1372   free(Bessel_Pro01);
1373 
1374   free(OneD2Mc_AN);
1375   free(OneD2h_AN);
1376 
1377   /* for time */
1378   dtime(&TEtime);
1379   time0 = TEtime - TStime;
1380 
1381   return time0;
1382 }
1383 
1384 
1385 
1386 
Set_VNA2(double **** HVNA,double ***** HVNA2)1387 double Set_VNA2(double ****HVNA, double *****HVNA2)
1388 {
1389   /****************************************************
1390    Evaluate matrix elements of one-center (orbitals)
1391    but two-center integrals for neutral atom potentials
1392    in the momentum space.
1393 
1394    <Phi_{LM,L'M'}|VNA>
1395   ****************************************************/
1396   static int firsttime=1;
1397   int L2,L3,L,GL,i,j;
1398   int k,kl,h_AN,Gh_AN,Rnh,Ls,n;
1399   int tno0,tno1,tno2,i1,j1,p;
1400   int Cwan,Hwan,fan,jg,kg,wakg,Lmax;
1401   int size_TmpHNA;
1402   int Mc_AN,Gc_AN,Mj_AN,num,size1,size2;
1403   double time0;
1404   double tmp2,tmp3,tmp4,tmp5;
1405   double TStime,TEtime;
1406   int Num_RVNA;
1407   /* for OpenMP */
1408   int OneD_Nloop,*OneD2Mc_AN,*OneD2h_AN;
1409 
1410   dtime(&TStime);
1411 
1412   /****************************************************
1413   allocation of arrays:
1414   ****************************************************/
1415 
1416   size_TmpHNA = (List_YOUSO[25]+1)*List_YOUSO[24]*
1417                 (2*(List_YOUSO[25]+1)+1)*
1418                 (List_YOUSO[25]+1)*List_YOUSO[24]*
1419                 (2*(List_YOUSO[25]+1)+1);
1420 
1421   /* PrintMemory */
1422   if (firsttime) {
1423     PrintMemory("Set_ProExpn_VNA: TmpHNA",sizeof(double)*size_TmpHNA,NULL);
1424     PrintMemory("Set_ProExpn_VNA: TmpHNAr",sizeof(double)*size_TmpHNA,NULL);
1425     PrintMemory("Set_ProExpn_VNA: TmpHNAt",sizeof(double)*size_TmpHNA,NULL);
1426     PrintMemory("Set_ProExpn_VNA: TmpHNAp",sizeof(double)*size_TmpHNA,NULL);
1427 
1428     firsttime=0;
1429   }
1430 
1431   /* one-dimensionalize the Mc_AN and h_AN loops */
1432 
1433   OneD_Nloop = 0;
1434   for (Mc_AN=1; Mc_AN<=Matomnum; Mc_AN++){
1435     Gc_AN = M2G[Mc_AN];
1436     for (h_AN=0; h_AN<=FNAN[Gc_AN]; h_AN++){
1437       OneD_Nloop++;
1438     }
1439   }
1440 
1441   OneD2Mc_AN = (int*)malloc(sizeof(int)*(OneD_Nloop+1));
1442   OneD2h_AN = (int*)malloc(sizeof(int)*(OneD_Nloop+1));
1443 
1444   OneD_Nloop = 0;
1445   for (Mc_AN=1; Mc_AN<=Matomnum; Mc_AN++){
1446     Gc_AN = M2G[Mc_AN];
1447     for (h_AN=0; h_AN<=FNAN[Gc_AN]; h_AN++){
1448       OneD2Mc_AN[OneD_Nloop] = Mc_AN;
1449       OneD2h_AN[OneD_Nloop] = h_AN;
1450       OneD_Nloop++;
1451     }
1452   }
1453 
1454 #pragma omp parallel shared(List_YOUSO,time_per_atom,HVNA2,Comp2Real,GL_Weight,Spe_ProductRF_Bessel,Spe_CrudeVNA_Bessel,GL_NormK,Spe_Num_Basis,Spe_MaxL_Basis,PAO_Nkmax,atv,Gxyz,WhatSpecies,ncn,natn,M2G,OneD2h_AN,OneD2Mc_AN,OneD_Nloop)
1455   {
1456     int OMPID,Nthrds,Nprocs,Nloop;
1457     int Mc_AN,h_AN,Gc_AN,Cwan,Gh_AN;
1458     int Rnh,Hwan,L0,Mul0,L1,Mul1,M0,M1,LL;
1459     int Lmax_Four_Int,i,j,k,l,m,num0,num1;
1460     double dx,dy,dz;
1461     double siT,coT,siP,coP;
1462     double Normk,kmin,kmax,Sk,Dk;
1463     double gant,r,theta,phi;
1464     double SH[2],dSHt[2],dSHp[2];
1465     double S_coordinate[3];
1466     double Stime_atom,Etime_atom;
1467     double sum,sumr,sj,sjp,tmp0,tmp1,tmp10;
1468     double **SphB,**SphBp;
1469     double *tmp_SphB,*tmp_SphBp;
1470 
1471     dcomplex Ctmp0,Ctmp1,Ctmp2,Cpow;
1472     dcomplex Csum,Csumr,Csumt,Csump;
1473     dcomplex CsumS0,CsumSr,CsumSt,CsumSp;
1474     dcomplex CY,CYt,CYp,CY1,CYt1,CYp1;
1475     dcomplex **CmatS0;
1476     dcomplex **CmatSr;
1477     dcomplex **CmatSt;
1478     dcomplex **CmatSp;
1479     dcomplex ******TmpHNA;
1480     dcomplex ******TmpHNAr;
1481     dcomplex ******TmpHNAt;
1482     dcomplex ******TmpHNAp;
1483 
1484     /* allocation of arrays */
1485 
1486     TmpHNA = (dcomplex******)malloc(sizeof(dcomplex*****)*(List_YOUSO[25]+1));
1487     for (i=0; i<(List_YOUSO[25]+1); i++){
1488       TmpHNA[i] = (dcomplex*****)malloc(sizeof(dcomplex****)*List_YOUSO[24]);
1489       for (j=0; j<List_YOUSO[24]; j++){
1490 	TmpHNA[i][j] = (dcomplex****)malloc(sizeof(dcomplex***)*(2*(List_YOUSO[25]+1)+1));
1491 	for (k=0; k<(2*(List_YOUSO[25]+1)+1); k++){
1492 	  TmpHNA[i][j][k] = (dcomplex***)malloc(sizeof(dcomplex**)*(List_YOUSO[25]+1));
1493 	  for (l=0; l<(List_YOUSO[25]+1); l++){
1494 	    TmpHNA[i][j][k][l] = (dcomplex**)malloc(sizeof(dcomplex*)*List_YOUSO[24]);
1495 	    for (m=0; m<List_YOUSO[24]; m++){
1496 	      TmpHNA[i][j][k][l][m] = (dcomplex*)malloc(sizeof(dcomplex)*(2*(List_YOUSO[25]+1)+1));
1497 	    }
1498 	  }
1499 	}
1500       }
1501     }
1502 
1503     TmpHNAr = (dcomplex******)malloc(sizeof(dcomplex*****)*(List_YOUSO[25]+1));
1504     for (i=0; i<(List_YOUSO[25]+1); i++){
1505       TmpHNAr[i] = (dcomplex*****)malloc(sizeof(dcomplex****)*List_YOUSO[24]);
1506       for (j=0; j<List_YOUSO[24]; j++){
1507 	TmpHNAr[i][j] = (dcomplex****)malloc(sizeof(dcomplex***)*(2*(List_YOUSO[25]+1)+1));
1508 	for (k=0; k<(2*(List_YOUSO[25]+1)+1); k++){
1509 	  TmpHNAr[i][j][k] = (dcomplex***)malloc(sizeof(dcomplex**)*(List_YOUSO[25]+1));
1510 	  for (l=0; l<(List_YOUSO[25]+1); l++){
1511 	    TmpHNAr[i][j][k][l] = (dcomplex**)malloc(sizeof(dcomplex*)*List_YOUSO[24]);
1512 	    for (m=0; m<List_YOUSO[24]; m++){
1513 	      TmpHNAr[i][j][k][l][m] = (dcomplex*)malloc(sizeof(dcomplex)*(2*(List_YOUSO[25]+1)+1));
1514 	    }
1515 	  }
1516 	}
1517       }
1518     }
1519 
1520     TmpHNAt = (dcomplex******)malloc(sizeof(dcomplex*****)*(List_YOUSO[25]+1));
1521     for (i=0; i<(List_YOUSO[25]+1); i++){
1522       TmpHNAt[i] = (dcomplex*****)malloc(sizeof(dcomplex****)*List_YOUSO[24]);
1523       for (j=0; j<List_YOUSO[24]; j++){
1524 	TmpHNAt[i][j] = (dcomplex****)malloc(sizeof(dcomplex***)*(2*(List_YOUSO[25]+1)+1));
1525 	for (k=0; k<(2*(List_YOUSO[25]+1)+1); k++){
1526 	  TmpHNAt[i][j][k] = (dcomplex***)malloc(sizeof(dcomplex**)*(List_YOUSO[25]+1));
1527 	  for (l=0; l<(List_YOUSO[25]+1); l++){
1528 	    TmpHNAt[i][j][k][l] = (dcomplex**)malloc(sizeof(dcomplex*)*List_YOUSO[24]);
1529 	    for (m=0; m<List_YOUSO[24]; m++){
1530 	      TmpHNAt[i][j][k][l][m] = (dcomplex*)malloc(sizeof(dcomplex)*(2*(List_YOUSO[25]+1)+1));
1531 	    }
1532 	  }
1533 	}
1534       }
1535     }
1536 
1537     TmpHNAp = (dcomplex******)malloc(sizeof(dcomplex*****)*(List_YOUSO[25]+1));
1538     for (i=0; i<(List_YOUSO[25]+1); i++){
1539       TmpHNAp[i] = (dcomplex*****)malloc(sizeof(dcomplex****)*List_YOUSO[24]);
1540       for (j=0; j<List_YOUSO[24]; j++){
1541 	TmpHNAp[i][j] = (dcomplex****)malloc(sizeof(dcomplex***)*(2*(List_YOUSO[25]+1)+1));
1542 	for (k=0; k<(2*(List_YOUSO[25]+1)+1); k++){
1543 	  TmpHNAp[i][j][k] = (dcomplex***)malloc(sizeof(dcomplex**)*(List_YOUSO[25]+1));
1544 	  for (l=0; l<(List_YOUSO[25]+1); l++){
1545 	    TmpHNAp[i][j][k][l] = (dcomplex**)malloc(sizeof(dcomplex*)*List_YOUSO[24]);
1546 	    for (m=0; m<List_YOUSO[24]; m++){
1547 	      TmpHNAp[i][j][k][l][m] = (dcomplex*)malloc(sizeof(dcomplex)*(2*(List_YOUSO[25]+1)+1));
1548 	    }
1549 	  }
1550 	}
1551       }
1552     }
1553 
1554     CmatS0 = (dcomplex**)malloc(sizeof(dcomplex*)*(2*(List_YOUSO[25]+1)+1));
1555     for (i=0; i<(2*(List_YOUSO[25]+1)+1); i++){
1556       CmatS0[i] = (dcomplex*)malloc(sizeof(dcomplex)*(2*(List_YOUSO[25]+1)+1));
1557     }
1558 
1559     CmatSr = (dcomplex**)malloc(sizeof(dcomplex*)*(2*(List_YOUSO[25]+1)+1));
1560     for (i=0; i<(2*(List_YOUSO[25]+1)+1); i++){
1561       CmatSr[i] = (dcomplex*)malloc(sizeof(dcomplex)*(2*(List_YOUSO[25]+1)+1));
1562     }
1563 
1564     CmatSt = (dcomplex**)malloc(sizeof(dcomplex*)*(2*(List_YOUSO[25]+1)+1));
1565     for (i=0; i<(2*(List_YOUSO[25]+1)+1); i++){
1566       CmatSt[i] = (dcomplex*)malloc(sizeof(dcomplex)*(2*(List_YOUSO[25]+1)+1));
1567     }
1568 
1569     CmatSp = (dcomplex**)malloc(sizeof(dcomplex*)*(2*(List_YOUSO[25]+1)+1));
1570     for (i=0; i<(2*(List_YOUSO[25]+1)+1); i++){
1571       CmatSp[i] = (dcomplex*)malloc(sizeof(dcomplex)*(2*(List_YOUSO[25]+1)+1));
1572     }
1573 
1574     /* get info. on OpenMP */
1575 
1576     OMPID = omp_get_thread_num();
1577     Nthrds = omp_get_num_threads();
1578     Nprocs = omp_get_num_procs();
1579 
1580     /* one-dimensionalized loop */
1581 
1582     for (Nloop=OMPID*OneD_Nloop/Nthrds; Nloop<(OMPID+1)*OneD_Nloop/Nthrds; Nloop++){
1583 
1584       dtime(&Stime_atom);
1585 
1586       /* get Mc_AN and h_AN */
1587 
1588       Mc_AN = OneD2Mc_AN[Nloop];
1589       h_AN  = OneD2h_AN[Nloop];
1590 
1591       /* set data on Mc_AN */
1592 
1593       Gc_AN = M2G[Mc_AN];
1594       Cwan = WhatSpecies[Gc_AN];
1595 
1596       /* set data on h_AN */
1597 
1598       Gh_AN = natn[Gc_AN][h_AN];
1599       Rnh = ncn[Gc_AN][h_AN];
1600       Hwan = WhatSpecies[Gh_AN];
1601 
1602       dx = Gxyz[Gh_AN][1] + atv[Rnh][1] - Gxyz[Gc_AN][1];
1603       dy = Gxyz[Gh_AN][2] + atv[Rnh][2] - Gxyz[Gc_AN][2];
1604       dz = Gxyz[Gh_AN][3] + atv[Rnh][3] - Gxyz[Gc_AN][3];
1605 
1606       xyz2spherical(dx,dy,dz,0.0,0.0,0.0,S_coordinate);
1607       r     = S_coordinate[0];
1608       theta = S_coordinate[1];
1609       phi   = S_coordinate[2];
1610 
1611       /* for empty atoms or finite elemens basis */
1612 
1613       if (r<1.0e-10) r = 1.0e-10;
1614 
1615       /* precalculation of sin and cos */
1616 
1617       siT = sin(theta);
1618       coT = cos(theta);
1619       siP = sin(phi);
1620       coP = cos(phi);
1621 
1622       /****************************************************
1623          evaluate ovelap integrals <Phi_{LM,L'M'}|VNA>
1624          between VNA and PAO.
1625       ****************************************************/
1626       /****************************************************
1627        \int VNA(k)*Spe_ProductRF_Bessel(k)*jl(k*R) k^2 dk^3
1628       ****************************************************/
1629 
1630       kmin = Radial_kmin;
1631       kmax = PAO_Nkmax;
1632       Sk = kmax + kmin;
1633       Dk = kmax - kmin;
1634 
1635       for (L0=0; L0<=Spe_MaxL_Basis[Cwan]; L0++){
1636 	for (Mul0=0; Mul0<Spe_Num_Basis[Cwan][L0]; Mul0++){
1637 	  for (L1=0; L1<=Spe_MaxL_Basis[Cwan]; L1++){
1638 	    for (Mul1=0; Mul1<Spe_Num_Basis[Cwan][L1]; Mul1++){
1639 	      for (M0=-L0; M0<=L0; M0++){
1640 		for (M1=-L1; M1<=L1; M1++){
1641 		  TmpHNA[L0][Mul0][L0+M0][L1][Mul1][L1+M1] = Complex(0.0,0.0);
1642 		  TmpHNAr[L0][Mul0][L0+M0][L1][Mul1][L1+M1] = Complex(0.0,0.0);
1643 		  TmpHNAt[L0][Mul0][L0+M0][L1][Mul1][L1+M1] = Complex(0.0,0.0);
1644 		  TmpHNAp[L0][Mul0][L0+M0][L1][Mul1][L1+M1] = Complex(0.0,0.0);
1645 		}
1646 	      }
1647 	    }
1648 	  }
1649 	}
1650       }
1651 
1652       /* allocate SphB and SphBp */
1653 
1654       Lmax_Four_Int = 2*Spe_MaxL_Basis[Cwan];
1655 
1656       SphB = (double**)malloc(sizeof(double*)*(Lmax_Four_Int+3));
1657       for(LL=0; LL<(Lmax_Four_Int+3); LL++){
1658 	SphB[LL] = (double*)malloc(sizeof(double)*GL_Mesh);
1659       }
1660 
1661       SphBp = (double**)malloc(sizeof(double*)*(Lmax_Four_Int+3));
1662       for(LL=0; LL<(Lmax_Four_Int+3); LL++){
1663 	SphBp[LL] = (double*)malloc(sizeof(double)*GL_Mesh);
1664       }
1665 
1666       tmp_SphB  = (double*)malloc(sizeof(double)*(Lmax_Four_Int+3));
1667       tmp_SphBp = (double*)malloc(sizeof(double)*(Lmax_Four_Int+3));
1668 
1669       /* calculate SphB and SphBp */
1670 #ifdef kcomp
1671 #else
1672 #pragma forceinline recursive
1673 #endif
1674       for (i=0; i<GL_Mesh; i++){
1675 	Normk = GL_NormK[i];
1676 	Spherical_Bessel2(Normk*r,Lmax_Four_Int,tmp_SphB,tmp_SphBp);
1677 	for(LL=0; LL<=Lmax_Four_Int; LL++){
1678 	  SphB[LL][i]  = tmp_SphB[LL];
1679 	  SphBp[LL][i] = tmp_SphBp[LL];
1680 	}
1681       }
1682 
1683       free(tmp_SphB);
1684       free(tmp_SphBp);
1685 
1686       /* loops for L0, Mul0, L1, and Mul1 */
1687 
1688       for (L0=0; L0<=Spe_MaxL_Basis[Cwan]; L0++){
1689 	for (Mul0=0; Mul0<Spe_Num_Basis[Cwan][L0]; Mul0++){
1690 	  for (L1=0; L1<=Spe_MaxL_Basis[Cwan]; L1++){
1691 	    for (Mul1=0; Mul1<Spe_Num_Basis[Cwan][L1]; Mul1++){
1692 
1693 	      if (L0<=L1){
1694 		Lmax_Four_Int = 2*L1;
1695 
1696 		/* sum over LL */
1697 
1698 		for(LL=0; LL<=Lmax_Four_Int; LL++){
1699 
1700 		  if (abs(L1-LL)<=L0 && L0<=(L1+LL) ){
1701 
1702 		    sum  = 0.0;
1703 		    sumr = 0.0;
1704 
1705 		    /* Gauss-Legendre quadrature */
1706 
1707 		    for (i=0; i<GL_Mesh; i++){
1708 
1709 		      Normk = GL_NormK[i];
1710 
1711 		      sj  =  SphB[LL][i];
1712 		      sjp = SphBp[LL][i];
1713 
1714 		      tmp0 = Spe_CrudeVNA_Bessel[Hwan][i];
1715 		      tmp1 = Spe_ProductRF_Bessel[Cwan][L0][Mul0][L1][Mul1][LL][i];
1716 		      tmp10 = 0.50*Dk*tmp0*tmp1*GL_Weight[i]*Normk*Normk;
1717 
1718 		      sum  += tmp10*sj;
1719 		      sumr += tmp10*sjp*Normk;
1720 		    }
1721 
1722 		    /* sum over m */
1723 
1724 		    for (M0=-L0; M0<=L0; M0++){
1725 		      for (M1=-L1; M1<=L1; M1++){
1726 
1727 			Csum = Complex(0.0,0.0);
1728 			Csumr = Complex(0.0,0.0);
1729 			Csumt = Complex(0.0,0.0);
1730 			Csump = Complex(0.0,0.0);
1731 
1732 			for(m=-LL; m<=LL; m++){
1733 
1734 			  if ( (M1-m)==M0){
1735 
1736 			    ComplexSH(LL,m,theta,phi,SH,dSHt,dSHp);
1737 			    CY  = Complex(SH[0],SH[1]);
1738 			    CYt = Complex(dSHt[0],dSHt[1]);
1739 			    CYp = Complex(dSHp[0],dSHp[1]);
1740 
1741 			    gant = pow(-1.0,(double)abs(m))*Gaunt(L0,M0,L1,M1,LL,-m);
1742 
1743 			    /* S */
1744 
1745 			    Ctmp2 = CRmul(CY,gant);
1746 			    Csum = Cadd(Csum,Ctmp2);
1747 
1748 			    /* dS/dt */
1749 
1750 			    Ctmp2 = CRmul(CYt,gant);
1751 			    Csumt = Cadd(Csumt,Ctmp2);
1752 
1753 			    /* dS/dp */
1754 
1755 			    Ctmp2 = CRmul(CYp,gant);
1756 			    Csump = Cadd(Csump,Ctmp2);
1757 			  }
1758 
1759 			}
1760 
1761 			/* S */
1762 			Ctmp1 = CRmul(Csum,sum);
1763 			TmpHNA[L0][Mul0][L0+M0][L1][Mul1][L1+M1]
1764 			  = Cadd(TmpHNA[L0][Mul0][L0+M0][L1][Mul1][L1+M1],Ctmp1);
1765 
1766 			/* dS/dr */
1767 			Ctmp1 = CRmul(Csum,sumr);
1768 			TmpHNAr[L0][Mul0][L0+M0][L1][Mul1][L1+M1]
1769 			  = Cadd(TmpHNAr[L0][Mul0][L0+M0][L1][Mul1][L1+M1],Ctmp1);
1770 
1771 			/* dS/dt */
1772 			Ctmp1 = CRmul(Csumt,sum);
1773 			TmpHNAt[L0][Mul0][L0+M0][L1][Mul1][L1+M1]
1774 			  = Cadd(TmpHNAt[L0][Mul0][L0+M0][L1][Mul1][L1+M1],Ctmp1);
1775 
1776 			/* dS/dp */
1777 			Ctmp1 = CRmul(Csump,sum);
1778 			TmpHNAp[L0][Mul0][L0+M0][L1][Mul1][L1+M1]
1779 			  = Cadd(TmpHNAp[L0][Mul0][L0+M0][L1][Mul1][L1+M1],Ctmp1);
1780 		      }
1781 		    }
1782 		  }
1783 		} /* LL */
1784 	      } /* if (L0<=L1) */
1785 	    }
1786 	  }
1787 	}
1788       }
1789 
1790       /* free SphB and SphBp */
1791 
1792       for(LL=0; LL<(Lmax_Four_Int+3); LL++){
1793 	free(SphB[LL]);
1794       }
1795       free(SphB);
1796 
1797       for(LL=0; LL<(Lmax_Four_Int+3); LL++){
1798 	free(SphBp[LL]);
1799       }
1800       free(SphBp);
1801 
1802       /* copy the upper part to the lower part */
1803 
1804       for (L0=0; L0<=Spe_MaxL_Basis[Cwan]; L0++){
1805 	for (Mul0=0; Mul0<Spe_Num_Basis[Cwan][L0]; Mul0++){
1806 	  for (L1=0; L1<=Spe_MaxL_Basis[Cwan]; L1++){
1807 	    for (Mul1=0; Mul1<Spe_Num_Basis[Cwan][L1]; Mul1++){
1808 	      if (L0<=L1){
1809 		for (M0=-L0; M0<=L0; M0++){
1810 		  for (M1=-L1; M1<=L1; M1++){
1811 
1812 		    TmpHNA[L1][Mul1][L1+M1][L0][Mul0][L0+M0]
1813 		      = Conjg(TmpHNA[L0][Mul0][L0+M0][L1][Mul1][L1+M1]);
1814 
1815 		    TmpHNAr[L1][Mul1][L1+M1][L0][Mul0][L0+M0]
1816 		      = Conjg(TmpHNAr[L0][Mul0][L0+M0][L1][Mul1][L1+M1]);
1817 
1818 		    TmpHNAt[L1][Mul1][L1+M1][L0][Mul0][L0+M0]
1819 		      = Conjg(TmpHNAt[L0][Mul0][L0+M0][L1][Mul1][L1+M1]);
1820 
1821 		    TmpHNAp[L1][Mul1][L1+M1][L0][Mul0][L0+M0]
1822 		      = Conjg(TmpHNAp[L0][Mul0][L0+M0][L1][Mul1][L1+M1]);
1823 
1824 		  }
1825 		}
1826 	      }
1827 	    }
1828 	  }
1829 	}
1830       }
1831 
1832       /****************************************************
1833  	                 Complex to Real
1834       ****************************************************/
1835 
1836       num0 = 0;
1837       for (L0=0; L0<=Spe_MaxL_Basis[Cwan]; L0++){
1838 	for (Mul0=0; Mul0<Spe_Num_Basis[Cwan][L0]; Mul0++){
1839 
1840 	  num1 = 0;
1841 	  for (L1=0; L1<=Spe_MaxL_Basis[Cwan]; L1++){
1842 	    for (Mul1=0; Mul1<Spe_Num_Basis[Cwan][L1]; Mul1++){
1843 
1844 	      for (M0=-L0; M0<=L0; M0++){
1845 		for (M1=-L1; M1<=L1; M1++){
1846 
1847 		  CsumS0 = Complex(0.0,0.0);
1848 		  CsumSr = Complex(0.0,0.0);
1849 		  CsumSt = Complex(0.0,0.0);
1850 		  CsumSp = Complex(0.0,0.0);
1851 
1852 		  for (k=-L0; k<=L0; k++){
1853 
1854 		    Ctmp1 = Conjg(Comp2Real[L0][L0+M0][L0+k]);
1855 
1856 		    /* S */
1857 
1858 		    Ctmp0 = TmpHNA[L0][Mul0][L0+k][L1][Mul1][L1+M1];
1859 		    Ctmp2 = Cmul(Ctmp1,Ctmp0);
1860 		    CsumS0 = Cadd(CsumS0,Ctmp2);
1861 
1862 		    /* dS/dr */
1863 
1864 		    Ctmp0 = TmpHNAr[L0][Mul0][L0+k][L1][Mul1][L1+M1];
1865 		    Ctmp2 = Cmul(Ctmp1,Ctmp0);
1866 		    CsumSr = Cadd(CsumSr,Ctmp2);
1867 
1868 		    /* dS/dt */
1869 
1870 		    Ctmp0 = TmpHNAt[L0][Mul0][L0+k][L1][Mul1][L1+M1];
1871 		    Ctmp2 = Cmul(Ctmp1,Ctmp0);
1872 		    CsumSt = Cadd(CsumSt,Ctmp2);
1873 
1874 		    /* dS/dp */
1875 
1876 		    Ctmp0 = TmpHNAp[L0][Mul0][L0+k][L1][Mul1][L1+M1];
1877 		    Ctmp2 = Cmul(Ctmp1,Ctmp0);
1878 		    CsumSp = Cadd(CsumSp,Ctmp2);
1879 
1880 		  }
1881 
1882 		  CmatS0[L0+M0][L1+M1] = CsumS0;
1883 		  CmatSr[L0+M0][L1+M1] = CsumSr;
1884 		  CmatSt[L0+M0][L1+M1] = CsumSt;
1885 		  CmatSp[L0+M0][L1+M1] = CsumSp;
1886 
1887 		}
1888 	      }
1889 
1890 	      for (M0=-L0; M0<=L0; M0++){
1891 		for (M1=-L1; M1<=L1; M1++){
1892 
1893 		  CsumS0 = Complex(0.0,0.0);
1894 		  CsumSr = Complex(0.0,0.0);
1895 		  CsumSt = Complex(0.0,0.0);
1896 		  CsumSp = Complex(0.0,0.0);
1897 
1898 		  for (k=-L1; k<=L1; k++){
1899 
1900 		    /* S */
1901 
1902 		    Ctmp1 = Cmul(CmatS0[L0+M0][L1+k],Comp2Real[L1][L1+M1][L1+k]);
1903 		    CsumS0 = Cadd(CsumS0,Ctmp1);
1904 
1905 		    /* dS/dr */
1906 
1907 		    Ctmp1 = Cmul(CmatSr[L0+M0][L1+k],Comp2Real[L1][L1+M1][L1+k]);
1908 		    CsumSr = Cadd(CsumSr,Ctmp1);
1909 
1910 		    /* dS/dt */
1911 
1912 		    Ctmp1 = Cmul(CmatSt[L0+M0][L1+k],Comp2Real[L1][L1+M1][L1+k]);
1913 		    CsumSt = Cadd(CsumSt,Ctmp1);
1914 
1915 		    /* dS/dp */
1916 
1917 		    Ctmp1 = Cmul(CmatSp[L0+M0][L1+k],Comp2Real[L1][L1+M1][L1+k]);
1918 		    CsumSp = Cadd(CsumSp,Ctmp1);
1919 
1920 		  }
1921 
1922 		  HVNA2[0][Mc_AN][h_AN][num0+L0+M0][num1+L1+M1] = 8.0*CsumS0.r;
1923 
1924 		  if (h_AN!=0){
1925 
1926 		    if (fabs(siT)<10e-14){
1927 
1928 		      HVNA2[1][Mc_AN][h_AN][num0+L0+M0][num1+L1+M1] =
1929 			-8.0*(siT*coP*CsumSr.r + coT*coP/r*CsumSt.r);
1930 
1931 		      HVNA2[2][Mc_AN][h_AN][num0+L0+M0][num1+L1+M1] =
1932 			-8.0*(siT*siP*CsumSr.r + coT*siP/r*CsumSt.r);
1933 
1934 		      HVNA2[3][Mc_AN][h_AN][num0+L0+M0][num1+L1+M1] =
1935 			-8.0*(coT*CsumSr.r - siT/r*CsumSt.r);
1936 		    }
1937 
1938 		    else{
1939 
1940 		      HVNA2[1][Mc_AN][h_AN][num0+L0+M0][num1+L1+M1] =
1941 			-8.0*(siT*coP*CsumSr.r + coT*coP/r*CsumSt.r
1942 			     - siP/siT/r*CsumSp.r);
1943 
1944 		      HVNA2[2][Mc_AN][h_AN][num0+L0+M0][num1+L1+M1] =
1945 			-8.0*(siT*siP*CsumSr.r + coT*siP/r*CsumSt.r
1946 			     + coP/siT/r*CsumSp.r);
1947 
1948 		      HVNA2[3][Mc_AN][h_AN][num0+L0+M0][num1+L1+M1] =
1949 			-8.0*(coT*CsumSr.r - siT/r*CsumSt.r);
1950 		    }
1951 		  }
1952 		  else{
1953 		    HVNA2[1][Mc_AN][h_AN][num0+L0+M0][num1+L1+M1] = 0.0;
1954 		    HVNA2[2][Mc_AN][h_AN][num0+L0+M0][num1+L1+M1] = 0.0;
1955 		    HVNA2[3][Mc_AN][h_AN][num0+L0+M0][num1+L1+M1] = 0.0;
1956 		  }
1957 
1958 		}
1959 	      }
1960 
1961 	      num1 = num1 + 2*L1 + 1;
1962 	    }
1963 	  }
1964 
1965 	  num0 = num0 + 2*L0 + 1;
1966 	}
1967       }
1968 
1969       dtime(&Etime_atom);
1970       time_per_atom[Gc_AN] += Etime_atom - Stime_atom;
1971 
1972     } /* Mc_AN */
1973 
1974     /* freeing of arrays */
1975 
1976     for (i=0; i<(List_YOUSO[25]+1); i++){
1977       for (j=0; j<List_YOUSO[24]; j++){
1978 	for (k=0; k<(2*(List_YOUSO[25]+1)+1); k++){
1979 	  for (l=0; l<(List_YOUSO[25]+1); l++){
1980 	    for (m=0; m<List_YOUSO[24]; m++){
1981 	      free(TmpHNA[i][j][k][l][m]);
1982 	    }
1983 	    free(TmpHNA[i][j][k][l]);
1984 	  }
1985 	  free(TmpHNA[i][j][k]);
1986 	}
1987 	free(TmpHNA[i][j]);
1988       }
1989       free(TmpHNA[i]);
1990     }
1991     free(TmpHNA);
1992 
1993     for (i=0; i<(List_YOUSO[25]+1); i++){
1994       for (j=0; j<List_YOUSO[24]; j++){
1995 	for (k=0; k<(2*(List_YOUSO[25]+1)+1); k++){
1996 	  for (l=0; l<(List_YOUSO[25]+1); l++){
1997 	    for (m=0; m<List_YOUSO[24]; m++){
1998 	      free(TmpHNAr[i][j][k][l][m]);
1999 	    }
2000 	    free(TmpHNAr[i][j][k][l]);
2001 	  }
2002 	  free(TmpHNAr[i][j][k]);
2003 	}
2004 	free(TmpHNAr[i][j]);
2005       }
2006       free(TmpHNAr[i]);
2007     }
2008     free(TmpHNAr);
2009 
2010     for (i=0; i<(List_YOUSO[25]+1); i++){
2011       for (j=0; j<List_YOUSO[24]; j++){
2012 	for (k=0; k<(2*(List_YOUSO[25]+1)+1); k++){
2013 	  for (l=0; l<(List_YOUSO[25]+1); l++){
2014 	    for (m=0; m<List_YOUSO[24]; m++){
2015 	      free(TmpHNAt[i][j][k][l][m]);
2016 	    }
2017 	    free(TmpHNAt[i][j][k][l]);
2018 	  }
2019 	  free(TmpHNAt[i][j][k]);
2020 	}
2021 	free(TmpHNAt[i][j]);
2022       }
2023       free(TmpHNAt[i]);
2024     }
2025     free(TmpHNAt);
2026 
2027     for (i=0; i<(List_YOUSO[25]+1); i++){
2028       for (j=0; j<List_YOUSO[24]; j++){
2029 	for (k=0; k<(2*(List_YOUSO[25]+1)+1); k++){
2030 	  for (l=0; l<(List_YOUSO[25]+1); l++){
2031 	    for (m=0; m<List_YOUSO[24]; m++){
2032 	      free(TmpHNAp[i][j][k][l][m]);
2033 	    }
2034 	    free(TmpHNAp[i][j][k][l]);
2035 	  }
2036 	  free(TmpHNAp[i][j][k]);
2037 	}
2038 	free(TmpHNAp[i][j]);
2039       }
2040       free(TmpHNAp[i]);
2041     }
2042     free(TmpHNAp);
2043 
2044     for (i=0; i<(2*(List_YOUSO[25]+1)+1); i++){
2045       free(CmatS0[i]);
2046     }
2047     free(CmatS0);
2048 
2049     for (i=0; i<(2*(List_YOUSO[25]+1)+1); i++){
2050       free(CmatSr[i]);
2051     }
2052     free(CmatSr);
2053 
2054     for (i=0; i<(2*(List_YOUSO[25]+1)+1); i++){
2055       free(CmatSt[i]);
2056     }
2057     free(CmatSt);
2058 
2059     for (i=0; i<(2*(List_YOUSO[25]+1)+1); i++){
2060       free(CmatSp[i]);
2061     }
2062     free(CmatSp);
2063 
2064 #pragma omp flush(HVNA2)
2065 
2066   } /* #pragma omp parallel */
2067 
2068   /****************************************************
2069     HVNA[Mc_AN][0] = sum_{h_AN} HVNA2
2070   ****************************************************/
2071 
2072   for (Mc_AN=1; Mc_AN<=Matomnum; Mc_AN++){
2073 
2074     Gc_AN = M2G[Mc_AN];
2075     Cwan = WhatSpecies[Gc_AN];
2076     for (i=0; i<Spe_Total_NO[Cwan]; i++){
2077       for (j=0; j<Spe_Total_NO[Cwan]; j++){
2078 	HVNA[Mc_AN][0][i][j] = 0.0;
2079       }
2080     }
2081 
2082     for (h_AN=0; h_AN<=FNAN[Gc_AN]; h_AN++){
2083       for (i=0; i<Spe_Total_NO[Cwan]; i++){
2084         for (j=0; j<Spe_Total_NO[Cwan]; j++){
2085           HVNA[Mc_AN][0][i][j] += HVNA2[0][Mc_AN][h_AN][i][j];
2086 	}
2087       }
2088     }
2089   }
2090 
2091   /****************************************************
2092   freeing of arrays:
2093   ****************************************************/
2094 
2095   free(OneD2Mc_AN);
2096   free(OneD2h_AN);
2097 
2098   /* for time */
2099   dtime(&TEtime);
2100   time0 = TEtime - TStime;
2101   return time0;
2102 }
2103 
2104 
2105 
Set_VNA3(double ***** HVNA3)2106 double Set_VNA3(double *****HVNA3)
2107 {
2108   /****************************************************
2109    Evaluate matrix elements of one-center (orbitals)
2110    but two-center integrals for neutral atom potentials
2111    in the momentum space.
2112 
2113    <VNA|Phi_{LM,L'M'}>
2114   ****************************************************/
2115   static int firsttime=0; /* due to the same as in Set_VNA2 */
2116   int L2,L3,L,GL,i,j;
2117   int k,kl,h_AN,Gh_AN,Rnh,Ls,n;
2118   int tno0,tno1,tno2,i1,j1,p;
2119   int Cwan,Hwan,fan,jg,kg,wakg,Lmax;
2120   int size_TmpHNA;
2121   int Mc_AN,Gc_AN,Mj_AN,num,size1,size2;
2122   double time0;
2123   double tmp2,tmp3,tmp4,tmp5;
2124   double TStime,TEtime;
2125   int Num_RVNA;
2126 
2127   /* for OpenMP */
2128   int OneD_Nloop,*OneD2Mc_AN,*OneD2h_AN;
2129 
2130   dtime(&TStime);
2131 
2132   /****************************************************
2133   allocation of arrays:
2134   ****************************************************/
2135 
2136   size_TmpHNA = (List_YOUSO[25]+1)*List_YOUSO[24]*
2137                 (2*(List_YOUSO[25]+1)+1)*
2138                 (List_YOUSO[25]+1)*List_YOUSO[24]*
2139                 (2*(List_YOUSO[25]+1)+1);
2140 
2141   /* PrintMemory */
2142   if (firsttime) {
2143     PrintMemory("Set_ProExpn_VNA: TmpHNA",sizeof(double)*size_TmpHNA,NULL);
2144     PrintMemory("Set_ProExpn_VNA: TmpHNAr",sizeof(double)*size_TmpHNA,NULL);
2145     PrintMemory("Set_ProExpn_VNA: TmpHNAt",sizeof(double)*size_TmpHNA,NULL);
2146     PrintMemory("Set_ProExpn_VNA: TmpHNAp",sizeof(double)*size_TmpHNA,NULL);
2147 
2148     firsttime=0;
2149   }
2150 
2151   /* one-dimensionalize the Mc_AN and h_AN loops */
2152 
2153   OneD_Nloop = 0;
2154   for (Mc_AN=1; Mc_AN<=Matomnum; Mc_AN++){
2155     Gc_AN = M2G[Mc_AN];
2156     for (h_AN=0; h_AN<=FNAN[Gc_AN]; h_AN++){
2157       OneD_Nloop++;
2158     }
2159   }
2160 
2161   OneD2Mc_AN = (int*)malloc(sizeof(int)*(OneD_Nloop+1));
2162   OneD2h_AN = (int*)malloc(sizeof(int)*(OneD_Nloop+1));
2163 
2164   OneD_Nloop = 0;
2165   for (Mc_AN=1; Mc_AN<=Matomnum; Mc_AN++){
2166     Gc_AN = M2G[Mc_AN];
2167     for (h_AN=0; h_AN<=FNAN[Gc_AN]; h_AN++){
2168       OneD2Mc_AN[OneD_Nloop] = Mc_AN;
2169       OneD2h_AN[OneD_Nloop] = h_AN;
2170       OneD_Nloop++;
2171     }
2172   }
2173 
2174 #pragma omp parallel shared(List_YOUSO,time_per_atom,HVNA3,Comp2Real,GL_Weight,Spe_ProductRF_Bessel,Spe_CrudeVNA_Bessel,GL_NormK,Spe_Num_Basis,Spe_MaxL_Basis,PAO_Nkmax,atv,Gxyz,WhatSpecies,ncn,natn,M2G,OneD2h_AN,OneD2Mc_AN,OneD_Nloop)
2175   {
2176     int OMPID,Nthrds,Nprocs,Nloop;
2177     int Mc_AN,h_AN,Gc_AN,Cwan,Gh_AN;
2178     int Rnh,Hwan,L0,Mul0,L1,Mul1,M0,M1,LL;
2179     int Lmax_Four_Int,i,j,k,l,m,num0,num1;
2180     double dx,dy,dz;
2181     double siT,coT,siP,coP;
2182     double Normk,kmin,kmax,Sk,Dk;
2183     double gant,r,theta,phi;
2184     double SH[2],dSHt[2],dSHp[2];
2185     double S_coordinate[3];
2186     double Stime_atom,Etime_atom;
2187     double sum,sumr,sj,sjp,tmp0,tmp1,tmp10;
2188     double **SphB,**SphBp;
2189     double *tmp_SphB,*tmp_SphBp;
2190 
2191     dcomplex Ctmp0,Ctmp1,Ctmp2,Cpow;
2192     dcomplex Csum,Csumr,Csumt,Csump;
2193     dcomplex CsumS0,CsumSr,CsumSt,CsumSp;
2194     dcomplex CY,CYt,CYp,CY1,CYt1,CYp1;
2195     dcomplex **CmatS0;
2196     dcomplex **CmatSr;
2197     dcomplex **CmatSt;
2198     dcomplex **CmatSp;
2199     dcomplex ******TmpHNA;
2200     dcomplex ******TmpHNAr;
2201     dcomplex ******TmpHNAt;
2202     dcomplex ******TmpHNAp;
2203 
2204     /* allocation of arrays */
2205 
2206     TmpHNA = (dcomplex******)malloc(sizeof(dcomplex*****)*(List_YOUSO[25]+1));
2207     for (i=0; i<(List_YOUSO[25]+1); i++){
2208       TmpHNA[i] = (dcomplex*****)malloc(sizeof(dcomplex****)*List_YOUSO[24]);
2209       for (j=0; j<List_YOUSO[24]; j++){
2210 	TmpHNA[i][j] = (dcomplex****)malloc(sizeof(dcomplex***)*(2*(List_YOUSO[25]+1)+1));
2211 	for (k=0; k<(2*(List_YOUSO[25]+1)+1); k++){
2212 	  TmpHNA[i][j][k] = (dcomplex***)malloc(sizeof(dcomplex**)*(List_YOUSO[25]+1));
2213 	  for (l=0; l<(List_YOUSO[25]+1); l++){
2214 	    TmpHNA[i][j][k][l] = (dcomplex**)malloc(sizeof(dcomplex*)*List_YOUSO[24]);
2215 	    for (m=0; m<List_YOUSO[24]; m++){
2216 	      TmpHNA[i][j][k][l][m] = (dcomplex*)malloc(sizeof(dcomplex)*(2*(List_YOUSO[25]+1)+1));
2217 	    }
2218 	  }
2219 	}
2220       }
2221     }
2222 
2223     TmpHNAr = (dcomplex******)malloc(sizeof(dcomplex*****)*(List_YOUSO[25]+1));
2224     for (i=0; i<(List_YOUSO[25]+1); i++){
2225       TmpHNAr[i] = (dcomplex*****)malloc(sizeof(dcomplex****)*List_YOUSO[24]);
2226       for (j=0; j<List_YOUSO[24]; j++){
2227 	TmpHNAr[i][j] = (dcomplex****)malloc(sizeof(dcomplex***)*(2*(List_YOUSO[25]+1)+1));
2228 	for (k=0; k<(2*(List_YOUSO[25]+1)+1); k++){
2229 	  TmpHNAr[i][j][k] = (dcomplex***)malloc(sizeof(dcomplex**)*(List_YOUSO[25]+1));
2230 	  for (l=0; l<(List_YOUSO[25]+1); l++){
2231 	    TmpHNAr[i][j][k][l] = (dcomplex**)malloc(sizeof(dcomplex*)*List_YOUSO[24]);
2232 	    for (m=0; m<List_YOUSO[24]; m++){
2233 	      TmpHNAr[i][j][k][l][m] = (dcomplex*)malloc(sizeof(dcomplex)*(2*(List_YOUSO[25]+1)+1));
2234 	    }
2235 	  }
2236 	}
2237       }
2238     }
2239 
2240     TmpHNAt = (dcomplex******)malloc(sizeof(dcomplex*****)*(List_YOUSO[25]+1));
2241     for (i=0; i<(List_YOUSO[25]+1); i++){
2242       TmpHNAt[i] = (dcomplex*****)malloc(sizeof(dcomplex****)*List_YOUSO[24]);
2243       for (j=0; j<List_YOUSO[24]; j++){
2244 	TmpHNAt[i][j] = (dcomplex****)malloc(sizeof(dcomplex***)*(2*(List_YOUSO[25]+1)+1));
2245 	for (k=0; k<(2*(List_YOUSO[25]+1)+1); k++){
2246 	  TmpHNAt[i][j][k] = (dcomplex***)malloc(sizeof(dcomplex**)*(List_YOUSO[25]+1));
2247 	  for (l=0; l<(List_YOUSO[25]+1); l++){
2248 	    TmpHNAt[i][j][k][l] = (dcomplex**)malloc(sizeof(dcomplex*)*List_YOUSO[24]);
2249 	    for (m=0; m<List_YOUSO[24]; m++){
2250 	      TmpHNAt[i][j][k][l][m] = (dcomplex*)malloc(sizeof(dcomplex)*(2*(List_YOUSO[25]+1)+1));
2251 	    }
2252 	  }
2253 	}
2254       }
2255     }
2256 
2257     TmpHNAp = (dcomplex******)malloc(sizeof(dcomplex*****)*(List_YOUSO[25]+1));
2258     for (i=0; i<(List_YOUSO[25]+1); i++){
2259       TmpHNAp[i] = (dcomplex*****)malloc(sizeof(dcomplex****)*List_YOUSO[24]);
2260       for (j=0; j<List_YOUSO[24]; j++){
2261 	TmpHNAp[i][j] = (dcomplex****)malloc(sizeof(dcomplex***)*(2*(List_YOUSO[25]+1)+1));
2262 	for (k=0; k<(2*(List_YOUSO[25]+1)+1); k++){
2263 	  TmpHNAp[i][j][k] = (dcomplex***)malloc(sizeof(dcomplex**)*(List_YOUSO[25]+1));
2264 	  for (l=0; l<(List_YOUSO[25]+1); l++){
2265 	    TmpHNAp[i][j][k][l] = (dcomplex**)malloc(sizeof(dcomplex*)*List_YOUSO[24]);
2266 	    for (m=0; m<List_YOUSO[24]; m++){
2267 	      TmpHNAp[i][j][k][l][m] = (dcomplex*)malloc(sizeof(dcomplex)*(2*(List_YOUSO[25]+1)+1));
2268 	    }
2269 	  }
2270 	}
2271       }
2272     }
2273 
2274     CmatS0 = (dcomplex**)malloc(sizeof(dcomplex*)*(2*(List_YOUSO[25]+1)+1));
2275     for (i=0; i<(2*(List_YOUSO[25]+1)+1); i++){
2276       CmatS0[i] = (dcomplex*)malloc(sizeof(dcomplex)*(2*(List_YOUSO[25]+1)+1));
2277     }
2278 
2279     CmatSr = (dcomplex**)malloc(sizeof(dcomplex*)*(2*(List_YOUSO[25]+1)+1));
2280     for (i=0; i<(2*(List_YOUSO[25]+1)+1); i++){
2281       CmatSr[i] = (dcomplex*)malloc(sizeof(dcomplex)*(2*(List_YOUSO[25]+1)+1));
2282     }
2283 
2284     CmatSt = (dcomplex**)malloc(sizeof(dcomplex*)*(2*(List_YOUSO[25]+1)+1));
2285     for (i=0; i<(2*(List_YOUSO[25]+1)+1); i++){
2286       CmatSt[i] = (dcomplex*)malloc(sizeof(dcomplex)*(2*(List_YOUSO[25]+1)+1));
2287     }
2288 
2289     CmatSp = (dcomplex**)malloc(sizeof(dcomplex*)*(2*(List_YOUSO[25]+1)+1));
2290     for (i=0; i<(2*(List_YOUSO[25]+1)+1); i++){
2291       CmatSp[i] = (dcomplex*)malloc(sizeof(dcomplex)*(2*(List_YOUSO[25]+1)+1));
2292     }
2293 
2294     /* get info. on OpenMP */
2295 
2296     OMPID = omp_get_thread_num();
2297     Nthrds = omp_get_num_threads();
2298     Nprocs = omp_get_num_procs();
2299 
2300     /* one-dimensionalized loop */
2301 
2302     for (Nloop=OMPID*OneD_Nloop/Nthrds; Nloop<(OMPID+1)*OneD_Nloop/Nthrds; Nloop++){
2303 
2304       dtime(&Stime_atom);
2305 
2306       /* get Mc_AN and h_AN */
2307 
2308       Mc_AN = OneD2Mc_AN[Nloop];
2309       h_AN  = OneD2h_AN[Nloop];
2310 
2311       /* set data on Mc_AN */
2312 
2313       Gc_AN = M2G[Mc_AN];
2314       Cwan = WhatSpecies[Gc_AN];
2315 
2316       /* set data on h_AN */
2317 
2318       Gh_AN = natn[Gc_AN][h_AN];
2319       Rnh = ncn[Gc_AN][h_AN];
2320       Hwan = WhatSpecies[Gh_AN];
2321 
2322       dx = Gxyz[Gc_AN][1] - Gxyz[Gh_AN][1] - atv[Rnh][1];
2323       dy = Gxyz[Gc_AN][2] - Gxyz[Gh_AN][2] - atv[Rnh][2];
2324       dz = Gxyz[Gc_AN][3] - Gxyz[Gh_AN][3] - atv[Rnh][3];
2325 
2326       xyz2spherical(dx,dy,dz,0.0,0.0,0.0,S_coordinate);
2327       r     = S_coordinate[0];
2328       theta = S_coordinate[1];
2329       phi   = S_coordinate[2];
2330 
2331       /* for empty atoms or finite elemens basis */
2332 
2333       if (r<1.0e-10) r = 1.0e-10;
2334 
2335       /* precalculation of sin and cos */
2336 
2337       siT = sin(theta);
2338       coT = cos(theta);
2339       siP = sin(phi);
2340       coP = cos(phi);
2341 
2342       /****************************************************
2343          evaluate ovelap integrals <Phi_{LM,L'M'}|VNA>
2344          between VNA and PAO.
2345       ****************************************************/
2346       /****************************************************
2347        \int VNA(k)*Spe_ProductRF_Bessel(k)*jl(k*R) k^2 dk^3
2348       ****************************************************/
2349 
2350       kmin = Radial_kmin;
2351       kmax = PAO_Nkmax;
2352       Sk = kmax + kmin;
2353       Dk = kmax - kmin;
2354 
2355       for (L0=0; L0<=Spe_MaxL_Basis[Hwan]; L0++){
2356 	for (Mul0=0; Mul0<Spe_Num_Basis[Hwan][L0]; Mul0++){
2357 	  for (L1=0; L1<=Spe_MaxL_Basis[Hwan]; L1++){
2358 	    for (Mul1=0; Mul1<Spe_Num_Basis[Hwan][L1]; Mul1++){
2359 	      for (M0=-L0; M0<=L0; M0++){
2360 		for (M1=-L1; M1<=L1; M1++){
2361 		  TmpHNA[L0][Mul0][L0+M0][L1][Mul1][L1+M1] = Complex(0.0,0.0);
2362 		  TmpHNAr[L0][Mul0][L0+M0][L1][Mul1][L1+M1] = Complex(0.0,0.0);
2363 		  TmpHNAt[L0][Mul0][L0+M0][L1][Mul1][L1+M1] = Complex(0.0,0.0);
2364 		  TmpHNAp[L0][Mul0][L0+M0][L1][Mul1][L1+M1] = Complex(0.0,0.0);
2365 		}
2366 	      }
2367 	    }
2368 	  }
2369 	}
2370       }
2371 
2372       /* allocate SphB and SphBp */
2373 
2374       Lmax_Four_Int = 2*Spe_MaxL_Basis[Hwan];
2375 
2376       SphB = (double**)malloc(sizeof(double*)*(Lmax_Four_Int+3));
2377       for(LL=0; LL<(Lmax_Four_Int+3); LL++){
2378 	SphB[LL] = (double*)malloc(sizeof(double)*GL_Mesh);
2379       }
2380 
2381       SphBp = (double**)malloc(sizeof(double*)*(Lmax_Four_Int+3));
2382       for(LL=0; LL<(Lmax_Four_Int+3); LL++){
2383 	SphBp[LL] = (double*)malloc(sizeof(double)*GL_Mesh);
2384       }
2385 
2386       tmp_SphB  = (double*)malloc(sizeof(double)*(Lmax_Four_Int+3));
2387       tmp_SphBp = (double*)malloc(sizeof(double)*(Lmax_Four_Int+3));
2388 
2389       /* calculate SphB and SphBp */
2390 #ifdef kcomp
2391 #else
2392 #pragma forceinline recursive
2393 #endif
2394       for (i=0; i<GL_Mesh; i++){
2395 	Normk = GL_NormK[i];
2396 	Spherical_Bessel2(Normk*r,Lmax_Four_Int,tmp_SphB,tmp_SphBp);
2397 	for(LL=0; LL<=Lmax_Four_Int; LL++){
2398 	  SphB[LL][i]  = tmp_SphB[LL];
2399 	  SphBp[LL][i] = tmp_SphBp[LL];
2400 	}
2401       }
2402 
2403       free(tmp_SphB);
2404       free(tmp_SphBp);
2405 
2406       /* loops for L0, Mul0, L1, and Mul1 */
2407 
2408       for (L0=0; L0<=Spe_MaxL_Basis[Hwan]; L0++){
2409 	for (Mul0=0; Mul0<Spe_Num_Basis[Hwan][L0]; Mul0++){
2410 	  for (L1=0; L1<=Spe_MaxL_Basis[Hwan]; L1++){
2411 	    for (Mul1=0; Mul1<Spe_Num_Basis[Hwan][L1]; Mul1++){
2412 
2413 	      if (L0<=L1){
2414 
2415 		Lmax_Four_Int = 2*L1;
2416 
2417 		/* sum over LL */
2418 
2419 		for(LL=0; LL<=Lmax_Four_Int; LL++){
2420 
2421 		  if (abs(L1-LL)<=L0 && L0<=(L1+LL) ){
2422 
2423 		    sum  = 0.0;
2424 		    sumr = 0.0;
2425 
2426 		    /* Gauss-Legendre quadrature */
2427 
2428 		    for (i=0; i<GL_Mesh; i++){
2429 
2430 		      Normk = GL_NormK[i];
2431 
2432 		      sj  =  SphB[LL][i];
2433 		      sjp = SphBp[LL][i];
2434 
2435 		      tmp0 = Spe_CrudeVNA_Bessel[Cwan][i];
2436 		      tmp1 = Spe_ProductRF_Bessel[Hwan][L0][Mul0][L1][Mul1][LL][i];
2437 		      tmp10 = 0.50*Dk*tmp0*tmp1*GL_Weight[i]*Normk*Normk;
2438 
2439 		      sum  += tmp10*sj;
2440 		      sumr += tmp10*sjp*Normk;
2441 		    }
2442 
2443 		    /* sum over m */
2444 
2445 		    for (M0=-L0; M0<=L0; M0++){
2446 		      for (M1=-L1; M1<=L1; M1++){
2447 
2448 			Csum = Complex(0.0,0.0);
2449 			Csumr = Complex(0.0,0.0);
2450 			Csumt = Complex(0.0,0.0);
2451 			Csump = Complex(0.0,0.0);
2452 
2453 			for(m=-LL; m<=LL; m++){
2454 
2455 			  if ( (M1-m)==M0){
2456 
2457 			    ComplexSH(LL,m,theta,phi,SH,dSHt,dSHp);
2458 			    CY  = Complex(SH[0],SH[1]);
2459 			    CYt = Complex(dSHt[0],dSHt[1]);
2460 			    CYp = Complex(dSHp[0],dSHp[1]);
2461 
2462 			    gant = pow(-1.0,(double)abs(m))*Gaunt(L0,M0,L1,M1,LL,-m);
2463 
2464 			    /* S */
2465 
2466 			    Ctmp2 = CRmul(CY,gant);
2467 			    Csum = Cadd(Csum,Ctmp2);
2468 
2469 			    /* dS/dt */
2470 
2471 			    Ctmp2 = CRmul(CYt,gant);
2472 			    Csumt = Cadd(Csumt,Ctmp2);
2473 
2474 			    /* dS/dp */
2475 
2476 			    Ctmp2 = CRmul(CYp,gant);
2477 			    Csump = Cadd(Csump,Ctmp2);
2478 			  }
2479 
2480 			}
2481 
2482 			/* S */
2483 			Ctmp1 = CRmul(Csum,sum);
2484 			TmpHNA[L0][Mul0][L0+M0][L1][Mul1][L1+M1]
2485 			  = Cadd(TmpHNA[L0][Mul0][L0+M0][L1][Mul1][L1+M1],Ctmp1);
2486 
2487 			/* dS/dr */
2488 			Ctmp1 = CRmul(Csum,sumr);
2489 			TmpHNAr[L0][Mul0][L0+M0][L1][Mul1][L1+M1]
2490 			  = Cadd(TmpHNAr[L0][Mul0][L0+M0][L1][Mul1][L1+M1],Ctmp1);
2491 
2492 			/* dS/dt */
2493 			Ctmp1 = CRmul(Csumt,sum);
2494 			TmpHNAt[L0][Mul0][L0+M0][L1][Mul1][L1+M1]
2495 			  = Cadd(TmpHNAt[L0][Mul0][L0+M0][L1][Mul1][L1+M1],Ctmp1);
2496 
2497 			/* dS/dp */
2498 			Ctmp1 = CRmul(Csump,sum);
2499 			TmpHNAp[L0][Mul0][L0+M0][L1][Mul1][L1+M1]
2500 			  = Cadd(TmpHNAp[L0][Mul0][L0+M0][L1][Mul1][L1+M1],Ctmp1);
2501 		      }
2502 		    }
2503 		  }
2504 		} /* LL */
2505 	      } /* if (L0<=L1) */
2506 	    }
2507 	  }
2508 	}
2509       }
2510 
2511       /* free SphB and SphBp */
2512 
2513       for(LL=0; LL<(Lmax_Four_Int+3); LL++){
2514 	free(SphB[LL]);
2515       }
2516       free(SphB);
2517 
2518       for(LL=0; LL<(Lmax_Four_Int+3); LL++){
2519 	free(SphBp[LL]);
2520       }
2521       free(SphBp);
2522 
2523       /* copy the upper part to the lower part */
2524 
2525       for (L0=0; L0<=Spe_MaxL_Basis[Hwan]; L0++){
2526 	for (Mul0=0; Mul0<Spe_Num_Basis[Hwan][L0]; Mul0++){
2527 	  for (L1=0; L1<=Spe_MaxL_Basis[Hwan]; L1++){
2528 	    for (Mul1=0; Mul1<Spe_Num_Basis[Hwan][L1]; Mul1++){
2529 	      if (L0<=L1){
2530 		for (M0=-L0; M0<=L0; M0++){
2531 		  for (M1=-L1; M1<=L1; M1++){
2532 
2533 		    TmpHNA[L1][Mul1][L1+M1][L0][Mul0][L0+M0]
2534 		      = Conjg(TmpHNA[L0][Mul0][L0+M0][L1][Mul1][L1+M1]);
2535 
2536 		    TmpHNAr[L1][Mul1][L1+M1][L0][Mul0][L0+M0]
2537 		      = Conjg(TmpHNAr[L0][Mul0][L0+M0][L1][Mul1][L1+M1]);
2538 
2539 		    TmpHNAt[L1][Mul1][L1+M1][L0][Mul0][L0+M0]
2540 		      = Conjg(TmpHNAt[L0][Mul0][L0+M0][L1][Mul1][L1+M1]);
2541 
2542 		    TmpHNAp[L1][Mul1][L1+M1][L0][Mul0][L0+M0]
2543 		      = Conjg(TmpHNAp[L0][Mul0][L0+M0][L1][Mul1][L1+M1]);
2544 
2545 		  }
2546 		}
2547 	      }
2548 	    }
2549 	  }
2550 	}
2551       }
2552 
2553       /****************************************************
2554  	                 Complex to Real
2555       ****************************************************/
2556 
2557       num0 = 0;
2558       for (L0=0; L0<=Spe_MaxL_Basis[Hwan]; L0++){
2559 	for (Mul0=0; Mul0<Spe_Num_Basis[Hwan][L0]; Mul0++){
2560 
2561 	  num1 = 0;
2562 	  for (L1=0; L1<=Spe_MaxL_Basis[Hwan]; L1++){
2563 	    for (Mul1=0; Mul1<Spe_Num_Basis[Hwan][L1]; Mul1++){
2564 
2565 	      for (M0=-L0; M0<=L0; M0++){
2566 		for (M1=-L1; M1<=L1; M1++){
2567 
2568 		  CsumS0 = Complex(0.0,0.0);
2569 		  CsumSr = Complex(0.0,0.0);
2570 		  CsumSt = Complex(0.0,0.0);
2571 		  CsumSp = Complex(0.0,0.0);
2572 
2573 		  for (k=-L0; k<=L0; k++){
2574 
2575 		    Ctmp1 = Conjg(Comp2Real[L0][L0+M0][L0+k]);
2576 
2577 		    /* S */
2578 
2579 		    Ctmp0 = TmpHNA[L0][Mul0][L0+k][L1][Mul1][L1+M1];
2580 		    Ctmp2 = Cmul(Ctmp1,Ctmp0);
2581 		    CsumS0 = Cadd(CsumS0,Ctmp2);
2582 
2583 		    /* dS/dr */
2584 
2585 		    Ctmp0 = TmpHNAr[L0][Mul0][L0+k][L1][Mul1][L1+M1];
2586 		    Ctmp2 = Cmul(Ctmp1,Ctmp0);
2587 		    CsumSr = Cadd(CsumSr,Ctmp2);
2588 
2589 		    /* dS/dt */
2590 
2591 		    Ctmp0 = TmpHNAt[L0][Mul0][L0+k][L1][Mul1][L1+M1];
2592 		    Ctmp2 = Cmul(Ctmp1,Ctmp0);
2593 		    CsumSt = Cadd(CsumSt,Ctmp2);
2594 
2595 		    /* dS/dp */
2596 
2597 		    Ctmp0 = TmpHNAp[L0][Mul0][L0+k][L1][Mul1][L1+M1];
2598 		    Ctmp2 = Cmul(Ctmp1,Ctmp0);
2599 		    CsumSp = Cadd(CsumSp,Ctmp2);
2600 
2601 		  }
2602 
2603 		  CmatS0[L0+M0][L1+M1] = CsumS0;
2604 		  CmatSr[L0+M0][L1+M1] = CsumSr;
2605 		  CmatSt[L0+M0][L1+M1] = CsumSt;
2606 		  CmatSp[L0+M0][L1+M1] = CsumSp;
2607 
2608 		}
2609 	      }
2610 
2611 	      for (M0=-L0; M0<=L0; M0++){
2612 		for (M1=-L1; M1<=L1; M1++){
2613 
2614 		  CsumS0 = Complex(0.0,0.0);
2615 		  CsumSr = Complex(0.0,0.0);
2616 		  CsumSt = Complex(0.0,0.0);
2617 		  CsumSp = Complex(0.0,0.0);
2618 
2619 		  for (k=-L1; k<=L1; k++){
2620 
2621 		    /* S */
2622 
2623 
2624 		    Ctmp1 = Cmul(CmatS0[L0+M0][L1+k],Comp2Real[L1][L1+M1][L1+k]);
2625 		    CsumS0 = Cadd(CsumS0,Ctmp1);
2626 
2627 		    /* dS/dr */
2628 
2629 		    Ctmp1 = Cmul(CmatSr[L0+M0][L1+k],Comp2Real[L1][L1+M1][L1+k]);
2630 		    CsumSr = Cadd(CsumSr,Ctmp1);
2631 
2632 		    /* dS/dt */
2633 
2634 		    Ctmp1 = Cmul(CmatSt[L0+M0][L1+k],Comp2Real[L1][L1+M1][L1+k]);
2635 		    CsumSt = Cadd(CsumSt,Ctmp1);
2636 
2637 		    /* dS/dp */
2638 
2639 		    Ctmp1 = Cmul(CmatSp[L0+M0][L1+k],Comp2Real[L1][L1+M1][L1+k]);
2640 		    CsumSp = Cadd(CsumSp,Ctmp1);
2641 
2642 		  }
2643 
2644 		  HVNA3[0][Mc_AN][h_AN][num0+L0+M0][num1+L1+M1] = 8.0*CsumS0.r;
2645 
2646 		  if (h_AN!=0){
2647 
2648 		    if (fabs(siT)<10e-14){
2649 
2650 		      HVNA3[1][Mc_AN][h_AN][num0+L0+M0][num1+L1+M1] =
2651 			-8.0*(siT*coP*CsumSr.r + coT*coP/r*CsumSt.r);
2652 
2653 		      HVNA3[2][Mc_AN][h_AN][num0+L0+M0][num1+L1+M1] =
2654 			-8.0*(siT*siP*CsumSr.r + coT*siP/r*CsumSt.r);
2655 
2656 		      HVNA3[3][Mc_AN][h_AN][num0+L0+M0][num1+L1+M1] =
2657 			-8.0*(coT*CsumSr.r - siT/r*CsumSt.r);
2658 		    }
2659 
2660 		    else{
2661 
2662 		      HVNA3[1][Mc_AN][h_AN][num0+L0+M0][num1+L1+M1] =
2663 			-8.0*(siT*coP*CsumSr.r + coT*coP/r*CsumSt.r
2664 			     - siP/siT/r*CsumSp.r);
2665 
2666 		      HVNA3[2][Mc_AN][h_AN][num0+L0+M0][num1+L1+M1] =
2667 			-8.0*(siT*siP*CsumSr.r + coT*siP/r*CsumSt.r
2668 			     + coP/siT/r*CsumSp.r);
2669 
2670 		      HVNA3[3][Mc_AN][h_AN][num0+L0+M0][num1+L1+M1] =
2671 			-8.0*(coT*CsumSr.r - siT/r*CsumSt.r);
2672 		    }
2673 		  }
2674 		  else{
2675 		    HVNA3[1][Mc_AN][h_AN][num0+L0+M0][num1+L1+M1] = 0.0;
2676 		    HVNA3[2][Mc_AN][h_AN][num0+L0+M0][num1+L1+M1] = 0.0;
2677 		    HVNA3[3][Mc_AN][h_AN][num0+L0+M0][num1+L1+M1] = 0.0;
2678 		  }
2679 
2680 		}
2681 	      }
2682 
2683 	      num1 = num1 + 2*L1 + 1;
2684 	    }
2685 	  }
2686 
2687 	  num0 = num0 + 2*L0 + 1;
2688 	}
2689       }
2690 
2691       dtime(&Etime_atom);
2692       time_per_atom[Gc_AN] += Etime_atom - Stime_atom;
2693 
2694     } /* Mc_AN */
2695 
2696     /* freeing of arrays */
2697 
2698     for (i=0; i<(List_YOUSO[25]+1); i++){
2699       for (j=0; j<List_YOUSO[24]; j++){
2700 	for (k=0; k<(2*(List_YOUSO[25]+1)+1); k++){
2701 	  for (l=0; l<(List_YOUSO[25]+1); l++){
2702 	    for (m=0; m<List_YOUSO[24]; m++){
2703 	      free(TmpHNA[i][j][k][l][m]);
2704 	    }
2705 	    free(TmpHNA[i][j][k][l]);
2706 	  }
2707 	  free(TmpHNA[i][j][k]);
2708 	}
2709 	free(TmpHNA[i][j]);
2710       }
2711       free(TmpHNA[i]);
2712     }
2713     free(TmpHNA);
2714 
2715     for (i=0; i<(List_YOUSO[25]+1); i++){
2716       for (j=0; j<List_YOUSO[24]; j++){
2717 	for (k=0; k<(2*(List_YOUSO[25]+1)+1); k++){
2718 	  for (l=0; l<(List_YOUSO[25]+1); l++){
2719 	    for (m=0; m<List_YOUSO[24]; m++){
2720 	      free(TmpHNAr[i][j][k][l][m]);
2721 	    }
2722 	    free(TmpHNAr[i][j][k][l]);
2723 	  }
2724 	  free(TmpHNAr[i][j][k]);
2725 	}
2726 	free(TmpHNAr[i][j]);
2727       }
2728       free(TmpHNAr[i]);
2729     }
2730     free(TmpHNAr);
2731 
2732     for (i=0; i<(List_YOUSO[25]+1); i++){
2733       for (j=0; j<List_YOUSO[24]; j++){
2734 	for (k=0; k<(2*(List_YOUSO[25]+1)+1); k++){
2735 	  for (l=0; l<(List_YOUSO[25]+1); l++){
2736 	    for (m=0; m<List_YOUSO[24]; m++){
2737 	      free(TmpHNAt[i][j][k][l][m]);
2738 	    }
2739 	    free(TmpHNAt[i][j][k][l]);
2740 	  }
2741 	  free(TmpHNAt[i][j][k]);
2742 	}
2743 	free(TmpHNAt[i][j]);
2744       }
2745       free(TmpHNAt[i]);
2746     }
2747     free(TmpHNAt);
2748 
2749     for (i=0; i<(List_YOUSO[25]+1); i++){
2750       for (j=0; j<List_YOUSO[24]; j++){
2751 	for (k=0; k<(2*(List_YOUSO[25]+1)+1); k++){
2752 	  for (l=0; l<(List_YOUSO[25]+1); l++){
2753 	    for (m=0; m<List_YOUSO[24]; m++){
2754 	      free(TmpHNAp[i][j][k][l][m]);
2755 	    }
2756 	    free(TmpHNAp[i][j][k][l]);
2757 	  }
2758 	  free(TmpHNAp[i][j][k]);
2759 	}
2760 	free(TmpHNAp[i][j]);
2761       }
2762       free(TmpHNAp[i]);
2763     }
2764     free(TmpHNAp);
2765 
2766     for (i=0; i<(2*(List_YOUSO[25]+1)+1); i++){
2767       free(CmatS0[i]);
2768     }
2769     free(CmatS0);
2770 
2771     for (i=0; i<(2*(List_YOUSO[25]+1)+1); i++){
2772       free(CmatSr[i]);
2773     }
2774     free(CmatSr);
2775 
2776     for (i=0; i<(2*(List_YOUSO[25]+1)+1); i++){
2777       free(CmatSt[i]);
2778     }
2779     free(CmatSt);
2780 
2781     for (i=0; i<(2*(List_YOUSO[25]+1)+1); i++){
2782       free(CmatSp[i]);
2783     }
2784     free(CmatSp);
2785 
2786 #pragma omp flush(HVNA3)
2787 
2788   } /* #pragma omp parallel */
2789 
2790   /****************************************************
2791   freeing of arrays:
2792   ****************************************************/
2793 
2794   free(OneD2Mc_AN);
2795   free(OneD2h_AN);
2796 
2797   /* for time */
2798   dtime(&TEtime);
2799   time0 = TEtime - TStime;
2800   return time0;
2801 }
2802 
2803 
2804 #define xmin  0.0
2805 #define asize_lmax 20
2806 
2807 #ifdef kcomp
Spherical_Bessel2(double x,int lmax,double * sb,double * dsb)2808 static void Spherical_Bessel2( double x, int lmax, double *sb, double *dsb )
2809 #else
2810 inline void Spherical_Bessel2( double x, int lmax, double *sb, double *dsb )
2811 #endif
2812 {
2813   int m,n,nmax;
2814   double tsb[asize_lmax+10];
2815   double invx,vsb0,vsb1,vsb2,vsbi;
2816   double j0,j1,j0p,j1p,sf,tmp,si,co,ix,ix2;
2817 
2818   if (x<0.0){
2819     printf("minus x is invalid for Spherical_Bessel\n");
2820     exit(0);
2821   }
2822 
2823   /* find an appropriate nmax */
2824 
2825   nmax = lmax + 3*x + 20;
2826   if (nmax<100) nmax = 100;
2827 
2828   if (asize_lmax<(lmax+1)){
2829     printf("asize_lmax should be larger than %d in Spherical_Bessel.c\n",lmax+1);
2830     exit(0);
2831   }
2832 
2833   /* if x is larger than xmin */
2834 
2835   if ( xmin < x ){
2836 
2837     invx = 1.0/x;
2838 
2839     /* initial values */
2840 
2841     vsb0 = 0.0;
2842     vsb1 = 1.0e-14;
2843 
2844     /* downward recurrence from nmax-2 to lmax+2 */
2845 
2846     for ( n=nmax-1; (lmax+2)<n; n-- ){
2847 
2848       vsb2 = (2.0*n + 1.0)*invx*vsb1 - vsb0;
2849 
2850       if (1.0e+250<vsb2){
2851         tmp = 1.0/vsb2;
2852         vsb2 *= tmp;
2853         vsb1 *= tmp;
2854       }
2855 
2856       vsbi = vsb0;
2857       vsb0 = vsb1;
2858       vsb1 = vsb2;
2859     }
2860 
2861     /* downward recurrence from lmax+1 to 0 */
2862 
2863     n = lmax + 3;
2864     tsb[n-1] = vsb1;
2865     tsb[n  ] = vsb0;
2866     tsb[n+1] = vsbi;
2867 
2868     tmp = tsb[n-1];
2869     tsb[n-1] /= tmp;
2870     tsb[n  ] /= tmp;
2871 
2872     for ( n=lmax+2; 0<n; n-- ){
2873 
2874       tsb[n-1] = (2.0*n + 1.0)*invx*tsb[n] - tsb[n+1];
2875 
2876       if (1.0e+250<tsb[n-1]){
2877         tmp = tsb[n-1];
2878         for (m=n-1; m<=lmax+1; m++){
2879           tsb[m] /= tmp;
2880         }
2881       }
2882     }
2883 
2884     /* normalization */
2885 
2886     si = sin(x);
2887     co = cos(x);
2888     ix = 1.0/x;
2889     ix2 = ix*ix;
2890     j0 = si*ix;
2891     j1 = si*ix*ix - co*ix;
2892 
2893     if (fabs(tsb[1])<fabs(tsb[0])) sf = j0/tsb[0];
2894     else                           sf = j1/tsb[1];
2895 
2896     /* tsb to sb */
2897 
2898     for ( n=0; n<=lmax+1; n++ ){
2899       sb[n] = tsb[n]*sf;
2900     }
2901 
2902     /* derivative of sb */
2903 
2904     dsb[0] = co*ix - si*ix*ix;
2905     for ( n=1; n<=lmax; n++ ){
2906       dsb[n] = ( (double)n*sb[n-1] - (double)(n+1.0)*sb[n+1] )/(2.0*(double)n + 1.0);
2907     }
2908 
2909   }
2910 
2911   /* if x is smaller than xmin */
2912 
2913   else {
2914 
2915     /* sb */
2916 
2917     for ( n=0; n<=lmax; n++ ){
2918       sb[n] = 0.0;
2919     }
2920     sb[0] = 1.0;
2921 
2922     /* derivative of sb */
2923 
2924     dsb[0] = 0.0;
2925     for ( n=1; n<=lmax; n++ ){
2926       dsb[n] = ( (double)n*sb[n-1] - (double)(n+1.0)*sb[n+1] )/(2.0*(double)n + 1.0);
2927     }
2928   }
2929 }
2930