1 /**********************************************************************
2   Band_DFT_Dosout.c:
3 
4      Band_DFT_Dosout.c is a subroutine to calculate the density of
5      states based on the band calculation.
6 
7   Log of Band_DFT_Dosout.c:
8 
9      12/May/2003  Released by H.Kino
10      25/Dec/2003  a non-collinear part (added by T.Ozaki)
11 
12 ***********************************************************************/
13 
14 #include <stdio.h>
15 #include <stdlib.h>
16 #include <string.h>
17 #include <math.h>
18 #include <time.h>
19 #include "openmx_common.h"
20 #include "mpi.h"
21 #include <omp.h>
22 
23 #define  measure_time   0
24 
25 
26 
27 
28 static double Band_DFT_Dosout_Col(
29                            int knum_i, int knum_j, int knum_k,
30                            int SpinP_switch,
31                            double *****nh,
32                            double *****ImNL,
33                            double ****CntOLP);
34 
35 static double Band_DFT_DosoutGauss_Col(
36                            int knum_i, int knum_j, int knum_k,
37                            int SpinP_switch,
38                            double *****nh,
39                            double *****ImNL,
40                            double ****CntOLP);
41 
42 static double Band_DFT_Dosout_NonCol(
43                            int knum_i, int knum_j, int knum_k,
44                            int SpinP_switch,
45                            double *****nh,
46                            double *****ImNL,
47                            double ****CntOLP);
48 
49 static double Band_DFT_DosoutGauss_NonCol(
50                            int knum_i, int knum_j, int knum_k,
51                            int SpinP_switch,
52                            double *****nh,
53                            double *****ImNL,
54                            double ****CntOLP);
55 
56 
57 
Band_DFT_Dosout(int knum_i,int knum_j,int knum_k,int SpinP_switch,double ***** nh,double ***** ImNL,double **** CntOLP)58 double Band_DFT_Dosout( int knum_i, int knum_j, int knum_k,
59                         int SpinP_switch,
60                         double *****nh,
61                         double *****ImNL,
62                         double ****CntOLP)
63 {
64   static double time0;
65 
66 
67   if ( (SpinP_switch==0 || SpinP_switch==1) && Dos_fileout){
68 
69     time0 = Band_DFT_Dosout_Col( knum_i, knum_j, knum_k, SpinP_switch, nh, ImNL, CntOLP);
70   }
71 
72   else if ( (SpinP_switch==0 || SpinP_switch==1) && DosGauss_fileout){
73 
74     time0 = Band_DFT_DosoutGauss_Col( knum_i, knum_j, knum_k, SpinP_switch, nh, ImNL, CntOLP);
75   }
76 
77   else if (SpinP_switch==3 && Dos_fileout){
78 
79     time0 = Band_DFT_Dosout_NonCol( knum_i, knum_j, knum_k, SpinP_switch, nh, ImNL, CntOLP);
80   }
81 
82   else if (SpinP_switch==3 && DosGauss_fileout){
83 
84     time0 = Band_DFT_DosoutGauss_NonCol( knum_i, knum_j, knum_k, SpinP_switch, nh, ImNL, CntOLP);
85   }
86 
87   return time0;
88 }
89 
90 
91 
92 
93 
94 
95 
96 
Band_DFT_DosoutGauss_Col(int knum_i,int knum_j,int knum_k,int SpinP_switch,double ***** nh,double ***** ImNL,double **** CntOLP)97 static double Band_DFT_DosoutGauss_Col(
98                            int knum_i, int knum_j, int knum_k,
99                            int SpinP_switch,
100                            double *****nh,
101                            double *****ImNL,
102                            double ****CntOLP)
103 {
104   int i,j,k,spin,l,i1,j1,n1;
105   int n, wanA;
106   int *MP;
107   int MA_AN, GA_AN, tnoA,Anum, LB_AN;
108   int GB_AN, wanB, tnoB, Bnum, RnB;
109   int h_AN,tno0,tno1,Gh_AN,Hwan,Gc_AN,Cwan;
110   int l1,l2,l3,ik,Rn;
111   int kloop,kloop0,S_knum,E_knum,T_knum,num_kloop0;
112   int ie,iemin,iemax,n1min,iecenter,iewidth;
113   int MaxL,e1,s1;
114   int i_vec[10];
115 
116   double sum,sumi,u2,v2,uv,vu,tmp,pi2,xa,eg,x,de;
117   double factor,EV_cut0;
118   double FermiF1,FermiF2,x1,x2,diffF,max_x=50.0;
119   double kRn,si,co,Redum,Imdum,Redum2,Resum;
120   double TStime,TEtime,time0;
121   double OLP_eigen_cut=Threshold_OLP_Eigen;
122   double *****PDM;
123   double *****TPDM;
124   double *koS;
125   double **ko; int N_ko, i_ko[10];
126   dcomplex **H;  int N_H,  i_H[10];
127   dcomplex **S;  int N_S,  i_S[10];
128   double *M1,***EIGEN;
129   dcomplex **C;  int N_C,  i_C[10];
130   double *KGrids1, *KGrids2, *KGrids3;
131   double *SD;
132   dcomplex ***H2,**TmpM,**TmpS;
133   double *T_KGrids1,*T_KGrids2,*T_KGrids3;
134   int *Ti_KGrids1,*Tj_KGrids2,*Tk_KGrids3,*arpo;
135   dcomplex Ctmp1,Ctmp2;
136 
137   double ****Dummy_ImNL;
138   double snum_i, snum_j, snum_k;
139   double k1,k2,k3;
140   double Dos_Erange_Tmp[2];
141   double *r_energy;
142   double time1,time2,time3;
143   double time4,time5,time6;
144   double time7,time8,time9,time10;
145   double Stime1,Etime1;
146   float ***Dos;
147   float *DosVec;
148   char file_ev[YOUSO10],file_eig[YOUSO10];
149   FILE *fp_ev,*fp_eig;
150   char buf1[fp_bsize];          /* setvbuf */
151   char buf2[fp_bsize];          /* setvbuf */
152   int numprocs,myid,ID,ID1,ID2,tag;
153   /* for OpenMP */
154   int OMPID,Nthrds,Nprocs;
155 
156   MPI_Status stat;
157   MPI_Request request;
158 
159   /* MPI */
160   MPI_Comm_size(mpi_comm_level1,&numprocs);
161   MPI_Comm_rank(mpi_comm_level1,&myid);
162   MPI_Barrier(mpi_comm_level1);
163 
164   if (myid==Host_ID && 0<level_stdout){
165     printf("\n<Band_DFT_Dosout>\n"); fflush(stdout);
166   }
167 
168   dtime(&TStime);
169 
170   time1 = 0.0;
171   time2 = 0.0;
172   time3 = 0.0;
173   time4 = 0.0;
174   time5 = 0.0;
175   time6 = 0.0;
176   time7 = 0.0;
177   time8 = 0.0;
178   time9 = 0.0;
179   time10 = 0.0;
180 
181   /****************************************************
182              calculation of the array size
183   ****************************************************/
184 
185   n = 0;
186   for (i=1; i<=atomnum; i++){
187     wanA  = WhatSpecies[i];
188     n  += Spe_Total_CNO[wanA];
189   }
190 
191   /****************************************************
192    Allocation of arrays
193   ****************************************************/
194 
195   DosVec = (float*)malloc(sizeof(float)*(atomnum+1)*List_YOUSO[7]);
196 
197   MP = (int*)malloc(sizeof(int)*List_YOUSO[1]);
198   arpo = (int*)malloc(sizeof(int)*numprocs);
199 
200   N_ko=2; i_ko[0]=List_YOUSO[23]; i_ko[1]=n+1;
201   ko=(double**)malloc_multidimarray("double",N_ko, i_ko);
202 
203   koS = (double*)malloc(sizeof(double)*(n+1));
204 
205   N_H=2; i_H[0]=i_H[1]=n+1;
206   H=(dcomplex**)malloc_multidimarray("dcomplex",N_H, i_H);
207 
208   N_S=2; i_S[0]=i_S[1]=n+1;
209   S=(dcomplex**)malloc_multidimarray("dcomplex",N_S, i_S);
210 
211   M1 = (double*)malloc(sizeof(double)*(n+1));
212 
213   N_C=2; i_C[0]=i_C[1]=n+1;
214   C=(dcomplex**)malloc_multidimarray("dcomplex",N_C, i_C);
215 
216   KGrids1 = (double*)malloc(sizeof(double)*knum_i);
217   KGrids2 = (double*)malloc(sizeof(double)*knum_j);
218   KGrids3 = (double*)malloc(sizeof(double)*knum_k);
219 
220   SD = (double*)malloc(sizeof(double)*(atomnum+1)*List_YOUSO[7]*2);
221 
222   TmpM = (dcomplex**)malloc(sizeof(dcomplex*)*(n+1));
223   for (j=0; j<n+1; j++){
224     TmpM[j] = (dcomplex*)malloc(sizeof(dcomplex)*(n+1));
225   }
226 
227   TmpS = (dcomplex**)malloc(sizeof(dcomplex*)*(n+1));
228   for (j=0; j<n+1; j++){
229     TmpS[j] = (dcomplex*)malloc(sizeof(dcomplex)*(n+1));
230   }
231 
232   H2 = (dcomplex***)malloc(sizeof(dcomplex**)*List_YOUSO[23]);
233   for (i=0; i<List_YOUSO[23]; i++){
234     H2[i] = (dcomplex**)malloc(sizeof(dcomplex*)*(n+1));
235     for (j=0; j<n+1; j++){
236       H2[i][j] = (dcomplex*)malloc(sizeof(dcomplex)*(n+1));
237     }
238   }
239 
240   /* for calculation partial density matrix */
241 
242   if (cal_partial_charge){
243 
244     PDM = (double*****)malloc(sizeof(double****)*2);
245     for (k=0; k<=1; k++){
246 
247       PDM[k] = (double****)malloc(sizeof(double***)*(atomnum+1));
248       FNAN[0] = 0;
249 
250       for (Gc_AN=0; Gc_AN<=atomnum; Gc_AN++){
251 
252 	if (Gc_AN==0){
253 	  tno0 = 1;
254 	}
255 	else{
256 	  Cwan = WhatSpecies[Gc_AN];
257 	  tno0 = Spe_Total_NO[Cwan];
258 	}
259 
260 	PDM[k][Gc_AN] = (double***)malloc(sizeof(double**)*(FNAN[Gc_AN]+1));
261 	for (h_AN=0; h_AN<=FNAN[Gc_AN]; h_AN++){
262 
263 	  if (Gc_AN==0){
264 	    tno1 = 1;
265 	  }
266 	  else{
267 	    Gh_AN = natn[Gc_AN][h_AN];
268 	    Hwan = WhatSpecies[Gh_AN];
269 	    tno1 = Spe_Total_NO[Hwan];
270 	  }
271 
272 	  PDM[k][Gc_AN][h_AN] = (double**)malloc(sizeof(double*)*tno0);
273 	  for (i=0; i<tno0; i++){
274 	    PDM[k][Gc_AN][h_AN][i] = (double*)malloc(sizeof(double)*tno1);
275   	    for (j=0; j<tno1; j++) PDM[k][Gc_AN][h_AN][i][j] = 0.0;
276 	  }
277 	}
278       }
279     }
280 
281     TPDM = (double*****)malloc(sizeof(double****)*2);
282     for (k=0; k<=1; k++){
283 
284       TPDM[k] = (double****)malloc(sizeof(double***)*(atomnum+1));
285       FNAN[0] = 0;
286 
287       for (Gc_AN=0; Gc_AN<=atomnum; Gc_AN++){
288 
289 	if (Gc_AN==0){
290 	  tno0 = 1;
291 	}
292 	else{
293 	  Cwan = WhatSpecies[Gc_AN];
294 	  tno0 = Spe_Total_NO[Cwan];
295 	}
296 
297 	TPDM[k][Gc_AN] = (double***)malloc(sizeof(double**)*(FNAN[Gc_AN]+1));
298 	for (h_AN=0; h_AN<=FNAN[Gc_AN]; h_AN++){
299 
300 	  if (Gc_AN==0){
301 	    tno1 = 1;
302 	  }
303 	  else{
304 	    Gh_AN = natn[Gc_AN][h_AN];
305 	    Hwan = WhatSpecies[Gh_AN];
306 	    tno1 = Spe_Total_NO[Hwan];
307 	  }
308 
309 	  TPDM[k][Gc_AN][h_AN] = (double**)malloc(sizeof(double*)*tno0);
310 	  for (i=0; i<tno0; i++){
311 	    TPDM[k][Gc_AN][h_AN][i] = (double*)malloc(sizeof(double)*tno1);
312   	    for (j=0; j<tno1; j++) TPDM[k][Gc_AN][h_AN][i][j] = 0.0;
313 	  }
314 	}
315       }
316     }
317   }
318 
319   /* allocate Dos */
320 
321   Dos = (float***)malloc(sizeof(float**)*DosGauss_Num_Mesh);
322   for (ik=0; ik<DosGauss_Num_Mesh; ik++) {
323     Dos[ik] = (float**)malloc(sizeof(float*)*(SpinP_switch+1) );
324     for (spin=0; spin<=SpinP_switch; spin++) {
325       Dos[ik][spin] = (float*)malloc(sizeof(float)*(atomnum+1)*List_YOUSO[7]);
326       for (i=0; i<(atomnum+1)*List_YOUSO[7]; i++)  Dos[ik][spin][i] = 0.0;
327     }
328   }
329 
330   /* allocate r_energy */
331 
332   r_energy = (double*)malloc(sizeof(double)*DosGauss_Num_Mesh);
333 
334   /* set up energies where DOS is calculated */
335 
336   Dos_Erange_Tmp[0] = Dos_Erange[0] + ChemP;
337   Dos_Erange_Tmp[1] = Dos_Erange[1] + ChemP;
338 
339   de = (Dos_Erange_Tmp[1]-Dos_Erange_Tmp[0])/(double)DosGauss_Num_Mesh;
340   for (i=0; i<DosGauss_Num_Mesh; i++) {
341     r_energy[i] = Dos_Erange_Tmp[0] + de*(double)i;
342   }
343 
344   /* no spin-orbit coupling */
345   if (SO_switch==0){
346     Dummy_ImNL = (double****)malloc(sizeof(double***)*1);
347     Dummy_ImNL[0] = (double***)malloc(sizeof(double**)*1);
348     Dummy_ImNL[0][0] = (double**)malloc(sizeof(double*)*1);
349     Dummy_ImNL[0][0][0] = (double*)malloc(sizeof(double)*1);
350   }
351 
352   snum_i = knum_i;
353   snum_j = knum_j;
354   snum_k = knum_k;
355 
356   /* set up  k-grids */
357 
358   for (i=0; i<knum_i; i++){
359     k1 = (double)i/snum_i + Shift_K_Point;
360     KGrids1[i]=k1;
361   }
362   for (i=0; i<knum_j; i++){
363     k1 = (double)i/snum_j - Shift_K_Point;
364     KGrids2[i]=k1;
365   }
366   for (i=0; i<knum_k; i++){
367     k1 = (double)i/snum_k + 2.0*Shift_K_Point;
368     KGrids3[i]=k1;
369   }
370 
371   if (myid==Host_ID && 0<level_stdout){
372     printf(" KGrids1: ");
373     for (i=0;i<=knum_i-1;i++) printf("%f ",KGrids1[i]);
374     printf("\n");
375     printf(" KGrids2: ");
376     for (i=0;i<=knum_j-1;i++) printf("%f ",KGrids2[i]);
377     printf("\n");
378     printf(" KGrids3: ");
379     for (i=0;i<=knum_k-1;i++) printf("%f ",KGrids3[i]);
380     printf("\n");
381   }
382 
383   /***********************************
384        one-dimentionalize for MPI
385   ************************************/
386 
387   T_knum = 0;
388   for (i=0; i<=(knum_i-1); i++){
389     for (j=0; j<=(knum_j-1); j++){
390       for (k=0; k<=(knum_k-1); k++){
391         T_knum++;
392       }
393     }
394   }
395 
396   T_KGrids1 = (double*)malloc(sizeof(double)*T_knum);
397   T_KGrids2 = (double*)malloc(sizeof(double)*T_knum);
398   T_KGrids3 = (double*)malloc(sizeof(double)*T_knum);
399 
400   Ti_KGrids1 = (int*)malloc(sizeof(int)*T_knum);
401   Tj_KGrids2 = (int*)malloc(sizeof(int)*T_knum);
402   Tk_KGrids3 = (int*)malloc(sizeof(int)*T_knum);
403 
404   EIGEN  = (double***)malloc(sizeof(double**)*List_YOUSO[23]);
405   for (i=0; i<List_YOUSO[23]; i++){
406     EIGEN[i] = (double**)malloc(sizeof(double*)*T_knum);
407     for (j=0; j<T_knum; j++){
408       EIGEN[i][j] = (double*)malloc(sizeof(double)*(n+1));
409     }
410   }
411 
412   /* set T_KGrid1,2,3 */
413 
414   T_knum = 0;
415   for (i=0; i<=(knum_i-1); i++){
416     for (j=0; j<=(knum_j-1); j++){
417       for (k=0; k<=(knum_k-1); k++){
418 
419 	T_KGrids1[T_knum] = KGrids1[i];
420 	T_KGrids2[T_knum] = KGrids2[j];
421 	T_KGrids3[T_knum] = KGrids3[k];
422 
423 	Ti_KGrids1[T_knum] = i;
424 	Tj_KGrids2[T_knum] = j;
425 	Tk_KGrids3[T_knum] = k;
426 
427         T_knum++;
428       }
429     }
430   }
431 
432   /* allocate k-points into proccessors */
433 
434   if (T_knum<=myid){
435     S_knum = -10;
436     E_knum = -100;
437     num_kloop0 = 1;
438   }
439   else if (T_knum<numprocs) {
440     S_knum = myid;
441     E_knum = myid;
442     num_kloop0 = 1;
443   }
444   else {
445     tmp = (double)T_knum/(double)numprocs;
446     num_kloop0 = (int)tmp + 1;
447 
448     S_knum = (int)((double)myid*(tmp+0.0001));
449     E_knum = (int)((double)(myid+1)*(tmp+0.0001)) - 1;
450     if (myid==(numprocs-1)) E_knum = T_knum - 1;
451     if (E_knum<0)           E_knum = 0;
452   }
453 
454   factor = 1.0/(double)(knum_i * knum_j * knum_k); /* normalization factor */
455   pi2 = sqrt(PI);
456 
457   /* for kloop */
458 
459   for (kloop0=0; kloop0<num_kloop0; kloop0++){
460 
461     kloop = kloop0 + S_knum;
462     arpo[myid] = -1;
463     if (S_knum<=kloop && kloop<=E_knum) arpo[myid] = kloop;
464     for (ID=0; ID<numprocs; ID++){
465       MPI_Bcast(&arpo[ID], 1, MPI_INT, ID, mpi_comm_level1);
466     }
467 
468     /* set S */
469 
470     for (ID=0; ID<numprocs; ID++){
471 
472       kloop = arpo[ID];
473 
474       if (measure_time==1) dtime(&Stime1);
475 
476       if (0<=kloop){
477 
478         k1 = T_KGrids1[kloop];
479         k2 = T_KGrids2[kloop];
480         k3 = T_KGrids3[kloop];
481 
482         Overlap_Band(ID,CntOLP,TmpM,MP,k1,k2,k3);
483         n = TmpM[0][0].r;
484 
485         if (myid==ID){
486 	  for (i1=1; i1<=n; i1++){
487 	    for (j1=1; j1<=n; j1++){
488 	      S[i1][j1].r = TmpM[i1][j1].r;
489 	      S[i1][j1].i = TmpM[i1][j1].i;
490 
491 	      TmpS[i1][j1].r = TmpM[i1][j1].r;
492 	      TmpS[i1][j1].i = TmpM[i1][j1].i;
493 	    }
494 	  }
495         }
496 
497       }
498 
499       if (measure_time==1){
500         dtime(&Etime1);
501         time1 += Etime1 - Stime1;
502       }
503     }
504 
505     kloop = arpo[myid];
506 
507     if (0<=kloop){
508 
509       if (measure_time==1) dtime(&Stime1);
510 
511       EigenBand_lapack(S,ko[0],n,n,1);
512 
513       if (measure_time==1){
514         dtime(&Etime1);
515         time2 += Etime1 - Stime1;
516       }
517 
518       if (3<=level_stdout && 0<=kloop){
519 	printf("  kloop %2d  k1 k2 k3 %10.6f %10.6f %10.6f\n",
520 	       kloop,T_KGrids1[kloop],T_KGrids2[kloop],T_KGrids3[kloop]);
521 	for (i1=1; i1<=n; i1++){
522 	  printf("  Eigenvalues of OLP  %2d  %15.12f\n",i1,ko[0][i1]);
523 	}
524       }
525 
526       /* minus eigenvalues to 1.0e-14 */
527 
528       for (l=1; l<=n; l++){
529         if (ko[0][l]<0.0) ko[0][l] = 1.0e-14;
530         koS[l] = ko[0][l];
531       }
532 
533       /* calculate S*1/sqrt(ko) */
534 
535       for (l=1; l<=n; l++) M1[l] = 1.0/sqrt(ko[0][l]);
536 
537       /* S * M1  */
538 
539       for (i1=1; i1<=n; i1++){
540 	for (j1=1; j1<=n; j1++){
541 	  S[i1][j1].r = S[i1][j1].r*M1[j1];
542 	  S[i1][j1].i = S[i1][j1].i*M1[j1];
543 	}
544       }
545 
546     } /* if (0<=kloop) */
547 
548     /* set H */
549 
550     for (spin=0; spin<=SpinP_switch; spin++){
551 
552       for (ID=0; ID<numprocs; ID++){
553 
554         kloop = arpo[ID];
555 
556         if (0<=kloop){
557           k1 = T_KGrids1[kloop];
558           k2 = T_KGrids2[kloop];
559           k3 = T_KGrids3[kloop];
560 
561           Hamiltonian_Band(ID, nh[spin], TmpM, MP, k1, k2, k3);
562 
563           if (myid==ID){
564 	    for (i1=1; i1<=n; i1++){
565 	      for (j1=1; j1<=n; j1++){
566 	        H[i1][j1].r = TmpM[i1][j1].r;
567 	        H[i1][j1].i = TmpM[i1][j1].i;
568 	      }
569 	    }
570           }
571 	}
572       }
573 
574       kloop = arpo[myid];
575 
576       if (0<=kloop){
577 
578         /****************************************************
579  	                 M1 * U^t * H * U * M1
580         ****************************************************/
581 
582         if (measure_time==1) dtime(&Stime1);
583 
584         /* transpose S */
585 
586 	for (i1=1; i1<=n; i1++){
587 	  for (j1=i1+1; j1<=n; j1++){
588 	    Ctmp1 = S[i1][j1];
589 	    Ctmp2 = S[j1][i1];
590 	    S[i1][j1] = Ctmp2;
591 	    S[j1][i1] = Ctmp1;
592 	  }
593 	}
594 
595         /* H * U * M1 */
596 
597 #pragma omp parallel shared(C,S,H,n) private(OMPID,Nthrds,Nprocs,i1,j1,sum,sumi,l)
598 	{
599 
600 	  /* get info. on OpenMP */
601 
602 	  OMPID = omp_get_thread_num();
603 	  Nthrds = omp_get_num_threads();
604 	  Nprocs = omp_get_num_procs();
605 
606 	  for (i1=1+OMPID; i1<=n; i1+=Nthrds){
607 
608 	    for (j1=1; j1<=n; j1++){
609 
610 	      sum  = 0.0;
611 	      sumi = 0.0;
612 
613 	      for (l=1; l<=n; l++){
614 		sum  += H[i1][l].r*S[j1][l].r - H[i1][l].i*S[j1][l].i;
615 		sumi += H[i1][l].r*S[j1][l].i + H[i1][l].i*S[j1][l].r;
616 	      }
617 
618 	      C[j1][i1].r = sum;
619 	      C[j1][i1].i = sumi;
620 	    }
621 	  }
622 	} /* #pragma omp parallel */
623 
624         /* M1 * U^+ H * U * M1 */
625 
626 #pragma omp parallel shared(C,S,H,n) private(OMPID,Nthrds,Nprocs,i1,j1,sum,sumi,l)
627 	{
628 
629 	  /* get info. on OpenMP */
630 
631 	  OMPID = omp_get_thread_num();
632 	  Nthrds = omp_get_num_threads();
633 	  Nprocs = omp_get_num_procs();
634 
635 	  for (i1=1+OMPID; i1<=n; i1+=Nthrds){
636 	    for (j1=1; j1<=n; j1++){
637 
638 	      sum  = 0.0;
639 	      sumi = 0.0;
640 
641 	      for (l=1; l<=n; l++){
642 		sum  +=  S[i1][l].r*C[j1][l].r + S[i1][l].i*C[j1][l].i;
643 		sumi +=  S[i1][l].r*C[j1][l].i - S[i1][l].i*C[j1][l].r;
644 	      }
645 	      H[i1][j1].r = sum;
646 	      H[i1][j1].i = sumi;
647 	    }
648 	  }
649 	} /* #pragma omp parallel */
650 
651         /* H to C */
652 
653 	for (i1=1; i1<=n; i1++){
654 	  for (j1=1; j1<=n; j1++){
655 	    C[i1][j1] = H[i1][j1];
656 	  }
657 	}
658 
659        /* penalty for ill-conditioning states */
660 
661 	EV_cut0 = Threshold_OLP_Eigen;
662 
663 	for (i1=1; i1<=n; i1++){
664 
665 	  if (koS[i1]<EV_cut0){
666 	    C[i1][i1].r += pow((koS[i1]/EV_cut0),-2.0) - 1.0;
667 	  }
668 
669 	  /* cutoff the interaction between the ill-conditioned state */
670 
671 	  if (1.0e+3<C[i1][i1].r){
672 	    for (j1=1; j1<=n; j1++){
673 	      C[i1][j1] = Complex(0.0,0.0);
674 	      C[j1][i1] = Complex(0.0,0.0);
675 	    }
676 	    C[i1][i1].r = 1.0e+4;
677 	  }
678 	}
679 
680         if (measure_time==1){
681           dtime(&Etime1);
682           time3 += Etime1 - Stime1;
683         }
684 
685         /* solve eigenvalue problem */
686 
687         if (measure_time==1) dtime(&Stime1);
688 
689 	n1 = n;
690         EigenBand_lapack(C,ko[spin],n1,n1,1);
691 
692 	for (i1=1; i1<=n; i1++) EIGEN[spin][kloop][i1] = ko[spin][i1];
693 
694         if (measure_time==1){
695           dtime(&Etime1);
696           time4 += Etime1 - Stime1;
697         }
698 
699         /****************************************************
700 	     transformation to the original eigen vectors.
701 	     NOTE JRCAT-244p and JAIST-2122p
702 	     C = U * lambda^{-1/2} * D
703         ****************************************************/
704 
705         if (measure_time==1) dtime(&Stime1);
706 
707 	/* transpose */
708 
709 	for (i1=1; i1<=n; i1++){
710 	  for (j1=i1+1; j1<=n; j1++){
711 	    Ctmp1 = S[i1][j1];
712 	    Ctmp2 = S[j1][i1];
713 	    S[i1][j1] = Ctmp2;
714 	    S[j1][i1] = Ctmp1;
715 	  }
716 	}
717 
718 	/* transpose */
719 
720 	for (i1=1; i1<=n; i1++){
721 	  for (j1=i1+1; j1<=n; j1++){
722 	    Ctmp1 = C[i1][j1];
723 	    Ctmp2 = C[j1][i1];
724 	    C[i1][j1] = Ctmp2;
725 	    C[j1][i1] = Ctmp1;
726 	  }
727 	}
728 
729         /* shift */
730 
731 	for (j1=1; j1<=n; j1++){
732 	  for (l=n; 1<=l; l--){
733    	    C[j1][l] = C[j1][l];
734 	  }
735 	}
736 
737 #pragma omp parallel shared(C,S,H2,n,n1,spin) private(OMPID,Nthrds,Nprocs,i1,j1,sum,sumi,l)
738 	{
739 
740 	  /* get info. on OpenMP */
741 
742 	  OMPID = omp_get_thread_num();
743 	  Nthrds = omp_get_num_threads();
744 	  Nprocs = omp_get_num_procs();
745 
746 	  for (i1=1+OMPID; i1<=n; i1+=Nthrds){
747 	    for (j1=1; j1<=n1; j1++){
748 
749 	      sum  = 0.0;
750 	      sumi = 0.0;
751 
752 	      for (l=1; l<=n; l++){
753 		sum  +=  S[i1][l].r*C[j1][l].r - S[i1][l].i*C[j1][l].i;
754 		sumi +=  S[i1][l].r*C[j1][l].i + S[i1][l].i*C[j1][l].r;
755 	      }
756 
757 	      H2[spin][j1][i1].r = sum;
758 	      H2[spin][j1][i1].i = sumi;
759 
760 	    }
761 	  }
762 	} /* #pragma omp parallel */
763 
764         if (measure_time==1){
765           dtime(&Etime1);
766           time5 += Etime1 - Stime1;
767         }
768 
769       } /* if (0<=kloop) */
770     } /* spin */
771 
772     /****************************************************
773         store LDOS
774     ****************************************************/
775 
776     kloop = arpo[myid];
777 
778     if (0<=kloop){
779 
780       for (spin=0; spin<=SpinP_switch; spin++){
781 
782 	k1 = T_KGrids1[kloop];
783 	k2 = T_KGrids2[kloop];
784 	k3 = T_KGrids3[kloop];
785 
786 	i = Ti_KGrids1[kloop];
787 	j = Tj_KGrids2[kloop];
788 	k = Tk_KGrids3[kloop];
789 
790 	for (l=1; l<=n; l++){
791 
792 	  /* initialize */
793 
794 	  for (i1=0; i1<(atomnum+1)*List_YOUSO[7]; i1++){
795 	    SD[i1] = 0.0;
796 	  }
797 
798 	  /* calculate SD */
799 
800 #pragma omp parallel shared(TmpS,SD,spin,l,H2,MP,Spe_Total_CNO,WhatSpecies,atomnum) private(OMPID,Nthrds,Nprocs,GA_AN,wanA,tnoA,Anum,GB_AN,wanB,tnoB,Bnum,i1,j1,u2,v2,uv,vu,Redum,Imdum)
801 	  {
802 
803 	    /* get info. on OpenMP */
804 
805 	    OMPID = omp_get_thread_num();
806 	    Nthrds = omp_get_num_threads();
807 	    Nprocs = omp_get_num_procs();
808 
809 	    for (GA_AN=1+OMPID; GA_AN<=atomnum; GA_AN+=Nthrds){
810 
811 	      wanA = WhatSpecies[GA_AN];
812 	      tnoA = Spe_Total_CNO[wanA];
813 	      Anum = MP[GA_AN];
814 
815 	      for (GB_AN=1; GB_AN<=atomnum; GB_AN++){
816 
817 		wanB = WhatSpecies[GB_AN];
818 		tnoB = Spe_Total_CNO[wanB];
819 		Bnum = MP[GB_AN];
820 
821 		for (i1=0; i1<tnoA; i1++){
822 		  for (j1=0; j1<tnoB; j1++){
823 
824 		    u2 = H2[spin][l][Anum+i1].r*H2[spin][l][Bnum+j1].r;
825 		    v2 = H2[spin][l][Anum+i1].i*H2[spin][l][Bnum+j1].i;
826 		    uv = H2[spin][l][Anum+i1].r*H2[spin][l][Bnum+j1].i;
827 		    vu = H2[spin][l][Anum+i1].i*H2[spin][l][Bnum+j1].r;
828 
829 		    Redum = (u2 + v2);
830 		    Imdum = (uv - vu);
831 
832 		    SD[Anum+i1] += Redum*TmpS[Anum+i1][Bnum+j1].r - Imdum*TmpS[Anum+i1][Bnum+j1].i;
833 
834 		  } /* j1 */
835 		} /* i1 */
836 	      } /* GB_AN */
837 	    } /* GA_AN */
838 
839 	  } /* #pragma omp parallel */
840 
841           /*  calculate contribution to Gaussian DOS */
842 
843 #pragma omp parallel shared(MP,Spe_Total_CNO,WhatSpecies,pi2,SD,factor,Dos,r_energy,DosGauss_Width,DosGauss_Num_Mesh,Dos_Erange_Tmp,l,kloop,spin,EIGEN,atomnum) private(OMPID,Nthrds,Nprocs,GA_AN,wanA,tnoA,Anum,eg,x,iecenter,iewidth,iemin,iemax,ie,xa,i1)
844 	  {
845 
846 	    /* get info. on OpenMP */
847 
848 	    OMPID = omp_get_thread_num();
849 	    Nthrds = omp_get_num_threads();
850 	    Nprocs = omp_get_num_procs();
851 
852 	    for (GA_AN=1+OMPID; GA_AN<=atomnum; GA_AN+=Nthrds){
853 
854 	      wanA = WhatSpecies[GA_AN];
855 	      tnoA = Spe_Total_CNO[wanA];
856 	      Anum = MP[GA_AN];
857 
858 	      /*************************
859                   exp( -(x/a)^2 )
860                   a= DosGauss_Width
861 	          x=-3a : 3a is counted
862 	      **************************/
863 
864 	      eg = EIGEN[spin][kloop][l];
865 	      x = (eg-Dos_Erange_Tmp[0])/(Dos_Erange_Tmp[1]-Dos_Erange_Tmp[0])*(DosGauss_Num_Mesh-1);
866 	      iecenter = (int)x ;
867 
868 	      iewidth = DosGauss_Width*3.0/(Dos_Erange_Tmp[1]-Dos_Erange_Tmp[0])*(DosGauss_Num_Mesh-1)+3;
869 
870 	      iemin = iecenter - iewidth;
871 	      iemax = iecenter + iewidth;
872 
873 	      if (iemin<0) iemin=0;
874 	      if (iemax>=DosGauss_Num_Mesh) iemax=DosGauss_Num_Mesh-1;
875 
876 	      if ( 0<=iemin && iemin<DosGauss_Num_Mesh && 0<=iemax && iemax<DosGauss_Num_Mesh ) {
877 
878 		for (ie=iemin;ie<=iemax;ie++) {
879 		  xa = (eg-r_energy[ie])/DosGauss_Width;
880 
881 		  for (i1=0; i1<tnoA; i1++){
882 		    Dos[ie][spin][Anum+i1] += (float)(factor*SD[Anum+i1]* exp(-xa*xa)/(DosGauss_Width*pi2));
883 		  }
884 		}
885 	      }
886 
887 	    }   /* GA_AN */
888 
889 	  } /* #pragma omp parallel */
890 
891 	} /* l */
892       } /* spin */
893     } /* if (0<=kloop) */
894 
895     /*********************************************
896         calculation of partial density matrix
897     *********************************************/
898 
899     kloop = arpo[myid];
900 
901     if (0<=kloop && cal_partial_charge) {
902 
903       for (spin=0; spin<=SpinP_switch; spin++){
904 
905 	k1 = T_KGrids1[kloop];
906 	k2 = T_KGrids2[kloop];
907 	k3 = T_KGrids3[kloop];
908 
909 	for (l=1; l<=n; l++){
910 
911 	  x1 = (ko[spin][l] - ChemP)*Beta;
912 	  if (x1<=-max_x) x1 = -max_x;
913 	  if (max_x<=x1)  x1 = max_x;
914 	  FermiF1 = 1.0/(1.0 + exp(x1));
915 
916 	  x2 = (ko[spin][l] - (ChemP+ene_win_partial_charge))*Beta;
917 	  if (x2<=-max_x) x2 = -max_x;
918 	  if (max_x<=x2)  x2 = max_x;
919 	  FermiF2 = 1.0/(1.0 + exp(x2));
920 
921 	  diffF = fabs(FermiF1-FermiF2);
922 
923 	  for (GA_AN=1; GA_AN<=atomnum; GA_AN++){
924 
925 	    wanA = WhatSpecies[GA_AN];
926 	    tnoA = Spe_Total_CNO[wanA];
927 	    Anum = MP[GA_AN];
928 
929 	    for (LB_AN=0; LB_AN<=FNAN[GA_AN]; LB_AN++){
930 	      GB_AN = natn[GA_AN][LB_AN];
931               Rn = ncn[GA_AN][LB_AN];
932 	      wanB = WhatSpecies[GB_AN];
933 	      tnoB = Spe_Total_CNO[wanB];
934 	      Bnum = MP[GB_AN];
935 
936 	      l1 = atv_ijk[Rn][1];
937 	      l2 = atv_ijk[Rn][2];
938 	      l3 = atv_ijk[Rn][3];
939 	      kRn = k1*(double)l1 + k2*(double)l2 + k3*(double)l3;
940 
941 	      si = sin(2.0*PI*kRn);
942 	      co = cos(2.0*PI*kRn);
943 
944 	      for (i=0; i<tnoA; i++){
945 		for (j=0; j<tnoB; j++){
946 
947 		  u2 = H2[spin][l][Anum+i].r*H2[spin][l][Bnum+j].r;
948 		  v2 = H2[spin][l][Anum+i].i*H2[spin][l][Bnum+j].i;
949 		  uv = H2[spin][l][Anum+i].r*H2[spin][l][Bnum+j].i;
950 		  vu = H2[spin][l][Anum+i].i*H2[spin][l][Bnum+j].r;
951 		  tmp = co*(u2+v2) - si*(uv-vu);
952                   PDM[spin][GA_AN][LB_AN][i][j] += diffF*tmp;
953 		}
954 	      }
955 	    }
956 	  }
957 	}
958       }
959     }
960 
961     /****************************************************
962        MPI: EIGEN
963     ****************************************************/
964 
965     for (ID=0; ID<numprocs; ID++){
966 
967       kloop = arpo[ID];
968 
969       if (0<=kloop){
970         for (spin=0; spin<=SpinP_switch; spin++){
971           MPI_Bcast(&EIGEN[spin][kloop][0], n+1, MPI_DOUBLE, ID, mpi_comm_level1);
972 	}
973       }
974     }
975 
976   } /* kloop0 */
977 
978   /****************************************************
979      MPI: PDM
980   ****************************************************/
981 
982   if (cal_partial_charge) {
983 
984     /* MPI: PDM */
985 
986     for (spin=0; spin<=SpinP_switch; spin++){
987       for (GA_AN=1; GA_AN<=atomnum; GA_AN++){
988 
989 	wanA = WhatSpecies[GA_AN];
990 	tnoA = Spe_Total_CNO[wanA];
991 
992 	for (LB_AN=0; LB_AN<=FNAN[GA_AN]; LB_AN++){
993 
994 	  GB_AN = natn[GA_AN][LB_AN];
995 	  wanB = WhatSpecies[GB_AN];
996 	  tnoB = Spe_Total_CNO[wanB];
997 
998 	  for (i=0; i<tnoA; i++){
999 
1000 	    MPI_Allreduce(&PDM[spin][GA_AN][LB_AN][i][0],
1001 			  &TPDM[spin][GA_AN][LB_AN][i][0],
1002 			  tnoB, MPI_DOUBLE, MPI_SUM,
1003 			  mpi_comm_level1);
1004 
1005 	    for (j=0; j<tnoB; j++){
1006               TPDM[spin][GA_AN][LB_AN][i][j] /= (double)T_knum;
1007 	    }
1008 	  }
1009 	}
1010       }
1011     }
1012 
1013     /* store TPDM to Partial_DM */
1014 
1015     for (spin=0; spin<=SpinP_switch; spin++){
1016       for (GA_AN=1; GA_AN<=atomnum; GA_AN++){
1017 
1018         MA_AN = F_G2M[GA_AN];
1019 	wanA = WhatSpecies[GA_AN];
1020 	tnoA = Spe_Total_CNO[wanA];
1021 
1022         if (1<=MA_AN && MA_AN<=Matomnum){
1023 
1024 	  for (LB_AN=0; LB_AN<=FNAN[GA_AN]; LB_AN++){
1025 
1026 	    GB_AN = natn[GA_AN][LB_AN];
1027 	    wanB = WhatSpecies[GB_AN];
1028 	    tnoB = Spe_Total_CNO[wanB];
1029 
1030 	    for (i=0; i<tnoA; i++){
1031 	      for (j=0; j<tnoB; j++){
1032 		Partial_DM[spin][MA_AN][LB_AN][i][j] = TPDM[spin][GA_AN][LB_AN][i][j];
1033 	      }
1034 	    }
1035 	  }
1036 	}
1037       }
1038     }
1039   }
1040 
1041   /****************************************************************
1042                     write eigenvalues and eigenvectors
1043   ****************************************************************/
1044 
1045   if (measure_time==1) dtime(&Stime1);
1046 
1047   if (myid==Host_ID){
1048 
1049     sprintf(file_eig,"%s%s.Dos.val",filepath,filename);
1050 
1051     if ( (fp_eig=fopen(file_eig,"w"))==NULL ) {
1052       printf("can not open a file %s\n",file_eig);
1053     }
1054 
1055     else{
1056 
1057       /* write *.Dos.val */
1058 
1059       printf("write eigenvalues\n");
1060 
1061       fprintf(fp_eig,"mode        7\n");
1062       fprintf(fp_eig,"NonCol      0\n");
1063       fprintf(fp_eig,"N           %d\n",n);
1064       fprintf(fp_eig,"Nspin       %d\n",SpinP_switch);
1065       fprintf(fp_eig,"Erange      %lf %lf\n",Dos_Erange[0],Dos_Erange[1]);
1066       fprintf(fp_eig,"Kgrid       %d %d %d\n",1,1,1);
1067       fprintf(fp_eig,"atomnum     %d\n",atomnum);
1068       fprintf(fp_eig,"<WhatSpecies\n");
1069       for (i=1; i<=atomnum; i++) {
1070         fprintf(fp_eig,"%d ",WhatSpecies[i]);
1071       }
1072       fprintf(fp_eig,"\nWhatSpecies>\n");
1073       fprintf(fp_eig,"SpeciesNum     %d\n",SpeciesNum);
1074       fprintf(fp_eig,"<Spe_Total_CNO\n");
1075       for (i=0;i<SpeciesNum;i++) {
1076         fprintf(fp_eig,"%d ",Spe_Total_CNO[i]);
1077       }
1078       fprintf(fp_eig,"\nSpe_Total_CNO>\n");
1079 
1080       MaxL = Supported_MaxL;
1081 
1082       fprintf(fp_eig,"MaxL           %d\n",Supported_MaxL);
1083       fprintf(fp_eig,"<Spe_Num_CBasis\n");
1084       for (i=0;i<SpeciesNum;i++) {
1085         for (l=0;l<=MaxL;l++) {
1086 	  fprintf(fp_eig,"%d ",Spe_Num_CBasis[i][l]);
1087         }
1088         fprintf(fp_eig,"\n");
1089       }
1090       fprintf(fp_eig,"Spe_Num_CBasis>\n");
1091       fprintf(fp_eig,"ChemP       %lf\n",ChemP);
1092 
1093       fprintf(fp_eig,"irange      %d %d\n",0,DosGauss_Num_Mesh-1);
1094       fprintf(fp_eig,"<Eigenvalues\n");
1095       for (spin=0; spin<=SpinP_switch; spin++) {
1096         fprintf(fp_eig,"%d %d %d ",0,0,0);
1097 
1098         for (ik=0; ik<DosGauss_Num_Mesh; ik++) {
1099           fprintf(fp_eig,"%lf ",r_energy[ik]);
1100 	}
1101         fprintf(fp_eig,"\n");
1102       }
1103       fprintf(fp_eig,"Eigenvalues>\n");
1104 
1105       fclose(fp_eig);
1106     }
1107 
1108   } /* if (myid==Host_ID) */
1109 
1110    /* write *.Dos.vec */
1111 
1112   if (myid==Host_ID){
1113     printf("write eigenvectors\n");
1114     sprintf(file_ev,"%s%s.Dos.vec",filepath,filename);
1115     if ( (fp_ev=fopen(file_ev,"w"))==NULL ) {
1116       printf("cannot open a file %s\n",file_ev);
1117     }
1118   }
1119 
1120   for (spin=0; spin<=SpinP_switch; spin++) {
1121     for (ik=0; ik<DosGauss_Num_Mesh; ik++) {
1122 
1123       i_vec[0] = 0;
1124       i_vec[1] = 0;
1125       i_vec[2] = 0;
1126 
1127       MPI_Reduce(&Dos[ik][spin][1], &DosVec[1], n, MPI_FLOAT, MPI_SUM, Host_ID, mpi_comm_level1);
1128 
1129       if (myid==Host_ID){
1130         fwrite(i_vec,sizeof(int),3,fp_ev);
1131         fwrite(&DosVec[1],sizeof(float),n,fp_ev);
1132       }
1133     }
1134   }
1135 
1136   if (myid==Host_ID){
1137     fclose(fp_ev);
1138   }
1139 
1140   if (measure_time==1){
1141     dtime(&Etime1);
1142     time10 += Etime1 - Stime1;
1143   }
1144 
1145   if (measure_time==1){
1146     printf("myid=%4d  time1=%15.12f\n",myid,time1);
1147     printf("myid=%4d  time2=%15.12f\n",myid,time2);
1148     printf("myid=%4d  time3=%15.12f\n",myid,time3);
1149     printf("myid=%4d  time4=%15.12f\n",myid,time4);
1150     printf("myid=%4d  time5=%15.12f\n",myid,time5);
1151     printf("myid=%4d  time6=%15.12f\n",myid,time6);
1152     printf("myid=%4d  time7=%15.12f\n",myid,time7);
1153     printf("myid=%4d  time8=%15.12f\n",myid,time8);
1154     printf("myid=%4d  time9=%15.12f\n",myid,time9);
1155     printf("myid=%4d time10=%15.12f\n",myid,time10);
1156   }
1157 
1158   /****************************************************
1159           Fermi surface viewable by XCrysDen
1160   ****************************************************/
1161 
1162   if (myid==Host_ID){
1163 
1164     int ii,jj,kk;
1165     int ***TD2OD;
1166     double x0,y0,z0;
1167     char file_fermi[YOUSO10];
1168     FILE *fp;
1169 
1170     /* allocation of TD2OD */
1171 
1172     TD2OD = (int***)malloc(sizeof(int**)*knum_i);
1173     for (i=0; i<knum_i; i++){
1174       TD2OD[i] = (int**)malloc(sizeof(int*)*knum_j);
1175       for (j=0; j<knum_j; j++){
1176         TD2OD[i][j] = (int*)malloc(sizeof(int)*knum_k);
1177 	for (k=0; k<knum_k; k++) TD2OD[i][j][k] = 0;
1178       }
1179     }
1180 
1181     kloop = 0;
1182     for (i=0; i<knum_i; i++){
1183       for (j=0; j<knum_j; j++){
1184 	for (k=0; k<knum_k; k++){
1185           TD2OD[i][j][k] = kloop;
1186           kloop++;
1187 	}
1188       }
1189     }
1190 
1191     /* make bxsf files */
1192 
1193     for (spin=0; spin<=SpinP_switch; spin++){
1194 
1195       sprintf(file_fermi,"%s%s.FermiSurf%d.bxsf",filepath,filename,spin);
1196 
1197       if ((fp=fopen(file_fermi,"w")) != NULL){
1198 
1199 	fprintf(fp,"BEGIN_INFO\n");
1200 	fprintf(fp,"Fermi Energy: %15.12f\n",ChemP);
1201 	fprintf(fp,"END_INFO\n\n");
1202 
1203 	fprintf(fp,"BEGIN_BLOCK_BANDGRID_3D\n");
1204 	fprintf(fp,"comment_line\n");
1205 	fprintf(fp,"BEGIN_BANDGRID_3D_simple_example\n");
1206 	fprintf(fp,"%d\n",n);
1207 	fprintf(fp,"%d %d %d\n",knum_i+1,knum_j+1,knum_k+1);
1208 
1209 	fprintf(fp,"0.0 0.0 0.0\n");
1210 	fprintf(fp,"%15.10f %15.10f %15.10f\n",rtv[1][1],rtv[1][2],rtv[1][3]);
1211 	fprintf(fp,"%15.10f %15.10f %15.10f\n",rtv[2][1],rtv[2][2],rtv[2][3]);
1212 	fprintf(fp,"%15.10f %15.10f %15.10f\n",rtv[3][1],rtv[3][2],rtv[3][3]);
1213 
1214 	for (i1=1; i1<=n; i1++){
1215 
1216 	  fprintf(fp,"BAND:  %d\n",i1);
1217 
1218 	  for (i=0; i<=knum_i; i++){
1219             ii = i%knum_i;
1220 	    for (j=0; j<=knum_j; j++){
1221               jj = j%knum_j;
1222 	      for (k=0; k<=knum_k; k++){
1223                 kk = k%knum_k;
1224 		kloop = TD2OD[ii][jj][kk];
1225 		fprintf(fp,"%10.5f ",EIGEN[spin][kloop][i1]);
1226 	      }
1227 	      fprintf(fp,"\n");
1228 	    }
1229 
1230             if (i!=knum_i)  fprintf(fp,"\n");
1231 
1232 	  }
1233 	}
1234 
1235 	fprintf(fp,"END_BANDGRID_3D\n");
1236 	fprintf(fp,"END_BLOCK_BANDGRID_3D\n");
1237 
1238 	fclose(fp);
1239       }
1240       else{
1241         printf("Failure in saving bxsf files.\n");
1242       }
1243 
1244     } /* spin */
1245 
1246     /* free TD2OD */
1247     for (i=0; i<knum_i; i++){
1248       for (j=0; j<knum_j; j++){
1249         free(TD2OD[i][j]);
1250       }
1251       free(TD2OD[i]);
1252     }
1253     free(TD2OD);
1254 
1255   } /* if (myid==Host_ID) */
1256 
1257   /****************************************************
1258                        free arrays
1259   ****************************************************/
1260 
1261   free(DosVec);
1262 
1263   free(MP);
1264   free(arpo);
1265 
1266   for (i=0; i<i_ko[0]; i++){
1267     free(ko[i]);
1268   }
1269   free(ko);
1270 
1271   free(koS);
1272 
1273   for (i=0; i<i_H[0]; i++){
1274     free(H[i]);
1275   }
1276   free(H);
1277 
1278   for (i=0; i<i_S[0]; i++){
1279     free(S[i]);
1280   }
1281   free(S);
1282 
1283   free(M1);
1284 
1285   for (i=0; i<i_C[0]; i++){
1286     free(C[i]);
1287   }
1288   free(C);
1289 
1290   free(KGrids1); free(KGrids2);free(KGrids3);
1291 
1292   free(SD);
1293 
1294   for (j=0; j<n+1; j++){
1295     free(TmpM[j]);
1296   }
1297   free(TmpM);
1298 
1299   for (j=0; j<n+1; j++){
1300     free(TmpS[j]);
1301   }
1302   free(TmpS);
1303 
1304   for (i=0; i<List_YOUSO[23]; i++){
1305     for (j=0; j<n+1; j++){
1306       free(H2[i][j]);
1307     }
1308     free(H2[i]);
1309   }
1310   free(H2);
1311 
1312   /* partial density matrix */
1313 
1314   if (cal_partial_charge){
1315 
1316     /* PDM */
1317 
1318     for (k=0; k<=1; k++){
1319 
1320       FNAN[0] = 0;
1321 
1322       for (Gc_AN=0; Gc_AN<=atomnum; Gc_AN++){
1323 
1324 	if (Gc_AN==0){
1325 	  tno0 = 1;
1326 	}
1327 	else{
1328 	  Cwan = WhatSpecies[Gc_AN];
1329 	  tno0 = Spe_Total_NO[Cwan];
1330 	}
1331 
1332 	for (h_AN=0; h_AN<=FNAN[Gc_AN]; h_AN++){
1333 
1334 	  if (Gc_AN==0){
1335 	    tno1 = 1;
1336 	  }
1337 	  else{
1338 	    Gh_AN = natn[Gc_AN][h_AN];
1339 	    Hwan = WhatSpecies[Gh_AN];
1340 	    tno1 = Spe_Total_NO[Hwan];
1341 	  }
1342 
1343 	  for (i=0; i<tno0; i++){
1344 	    free(PDM[k][Gc_AN][h_AN][i]);
1345 	  }
1346           free(PDM[k][Gc_AN][h_AN]);
1347 	}
1348         free(PDM[k][Gc_AN]);
1349       }
1350       free(PDM[k]);
1351     }
1352     free(PDM);
1353 
1354     /* TPDM */
1355 
1356     for (k=0; k<=1; k++){
1357 
1358       FNAN[0] = 0;
1359 
1360       for (Gc_AN=0; Gc_AN<=atomnum; Gc_AN++){
1361 
1362 	if (Gc_AN==0){
1363 	  tno0 = 1;
1364 	}
1365 	else{
1366 	  Cwan = WhatSpecies[Gc_AN];
1367 	  tno0 = Spe_Total_NO[Cwan];
1368 	}
1369 
1370 	for (h_AN=0; h_AN<=FNAN[Gc_AN]; h_AN++){
1371 
1372 	  if (Gc_AN==0){
1373 	    tno1 = 1;
1374 	  }
1375 	  else{
1376 	    Gh_AN = natn[Gc_AN][h_AN];
1377 	    Hwan = WhatSpecies[Gh_AN];
1378 	    tno1 = Spe_Total_NO[Hwan];
1379 	  }
1380 
1381 	  for (i=0; i<tno0; i++){
1382 	    free(TPDM[k][Gc_AN][h_AN][i]);
1383 	  }
1384           free(TPDM[k][Gc_AN][h_AN]);
1385 	}
1386         free(TPDM[k][Gc_AN]);
1387       }
1388       free(TPDM[k]);
1389     }
1390     free(TPDM);
1391   }
1392 
1393   /* no spin-orbit coupling */
1394   if (SO_switch==0){
1395     free(Dummy_ImNL[0][0][0]);
1396     free(Dummy_ImNL[0][0]);
1397     free(Dummy_ImNL[0]);
1398     free(Dummy_ImNL);
1399   }
1400 
1401   free(T_KGrids1);
1402   free(T_KGrids2);
1403   free(T_KGrids3);
1404 
1405   free(Ti_KGrids1);
1406   free(Tj_KGrids2);
1407   free(Tk_KGrids3);
1408 
1409   for (i=0; i<List_YOUSO[23]; i++){
1410     for (j=0; j<T_knum; j++){
1411       free(EIGEN[i][j]);
1412     }
1413     free(EIGEN[i]);
1414   }
1415   free(EIGEN);
1416 
1417   for (ik=0; ik<DosGauss_Num_Mesh; ik++) {
1418     for (spin=0; spin<=SpinP_switch; spin++) {
1419       free(Dos[ik][spin]);
1420     }
1421     free(Dos[ik]);
1422   }
1423   free(Dos);
1424 
1425   free(r_energy);
1426 
1427   /* for elapsed time */
1428 
1429   dtime(&TEtime);
1430   time0 = TEtime - TStime;
1431   return time0;
1432 
1433 }
1434 
1435 
1436 
1437 
1438 
1439 
1440 
Band_DFT_DosoutGauss_NonCol(int knum_i,int knum_j,int knum_k,int SpinP_switch,double ***** nh,double ***** ImNL,double **** CntOLP)1441 static double Band_DFT_DosoutGauss_NonCol(
1442                            int knum_i, int knum_j, int knum_k,
1443                            int SpinP_switch,
1444                            double *****nh,
1445                            double *****ImNL,
1446                            double ****CntOLP)
1447 {
1448   int i,j,k,spin,l,i1,j1,ik,n1;
1449   int n, wanA,ii1,jj1,m,n2;
1450   int *MP;
1451   int MA_AN, GA_AN, tnoA,Anum, LB_AN;
1452   int GB_AN, wanB, tnoB, Bnum, RnB;
1453   int l1,l2,l3;
1454 
1455   int ie,iemin,iemax,iemin0,iemax0,n1min,mn;
1456   int MaxL,e1,s1,T_knum,S_knum,E_knum;
1457   int kloop,kloop0,num_kloop0;
1458   int i_vec[10];
1459   int h_AN,tno0,tno1,Gh_AN,Hwan,Gc_AN,Cwan,Rn;
1460 
1461   double *****PDM;
1462   double *****TPDM;
1463   double EV_cut0;
1464   double FermiF1,FermiF2,x1,x2,diffF,max_x=50.0;
1465   double sum,sumi,u2,v2,uv,vu,tmp;
1466   double sum_r0,sum_i0,sum_r1,sum_i1;
1467   double d0,d1,d2,d3;
1468   double kRn,si,co,Redum,Imdum;
1469   double Redum2,Resum,Imdum2;
1470   double TStime,TEtime,time0;
1471   double OLP_eigen_cut=Threshold_OLP_Eigen;
1472   double theta,phi,sit,cot,sip,cop,de;
1473   double tmp1,tmp2,tmp3,eg,x,xa,pi2,factor;
1474   double *ko; int N_ko, i_ko[10];
1475   double *koS;
1476   double *r_energy;
1477   double Dos_Erange_Tmp[2];
1478   dcomplex **H;  int N_H,  i_H[10];
1479   dcomplex **S;  int N_S,  i_S[10];
1480   dcomplex Ctmp1,Ctmp2;
1481   double *M1,**EIGEN;
1482   dcomplex **C;  int N_C,  i_C[10];
1483   double *KGrids1, *KGrids2, *KGrids3;
1484   double *SD;
1485   dcomplex **TmpM,**TmpS;
1486   double *T_KGrids1,*T_KGrids2,*T_KGrids3;
1487   int *Ti_KGrids1,*Tj_KGrids2,*Tk_KGrids3,*arpo;
1488   int iecenter,iewidth;
1489 
1490   double *****Dummy_ImNL;
1491   float **Dos;
1492   float *DosVec;
1493   double snum_i, snum_j, snum_k;
1494   double k1,k2,k3;
1495 
1496   char file_ev[YOUSO10],file_eig[YOUSO10];
1497   FILE *fp_ev,*fp_eig;
1498   char buf1[fp_bsize];          /* setvbuf */
1499   char buf2[fp_bsize];          /* setvbuf */
1500   int numprocs,myid,ID,ID1,ID2,tag;
1501 
1502   /* for OpenMP */
1503   int OMPID,Nthrds,Nprocs;
1504 
1505   MPI_Status stat;
1506   MPI_Request request;
1507 
1508   /* MPI */
1509   MPI_Comm_size(mpi_comm_level1,&numprocs);
1510   MPI_Comm_rank(mpi_comm_level1,&myid);
1511   MPI_Barrier(mpi_comm_level1);
1512 
1513   if (myid==Host_ID && 0<level_stdout){
1514     printf("\n<Band_DFT_Dosout>\n"); fflush(stdout);
1515   }
1516 
1517   dtime(&TStime);
1518 
1519   /****************************************************
1520              calculation of the array size
1521   ****************************************************/
1522 
1523   n = 0;
1524   for (i=1; i<=atomnum; i++){
1525     wanA  = WhatSpecies[i];
1526     n  = n + Spe_Total_CNO[wanA];
1527   }
1528   n2 = 2*n + 2;
1529 
1530   /****************************************************
1531    Allocation of arrays
1532   ****************************************************/
1533 
1534   DosVec = (float*)malloc(sizeof(float)*2*(atomnum+1)*List_YOUSO[7]);
1535 
1536   MP = (int*)malloc(sizeof(int)*List_YOUSO[1]);
1537 
1538   arpo = (int*)malloc(sizeof(int)*numprocs);
1539 
1540   ko = (double*)malloc(sizeof(double)*n2);
1541   koS = (double*)malloc(sizeof(double)*(n+1));
1542 
1543   H = (dcomplex**)malloc(sizeof(dcomplex*)*n2);
1544   for (j=0; j<n2; j++){
1545     H[j] = (dcomplex*)malloc(sizeof(dcomplex)*n2);
1546   }
1547 
1548   S = (dcomplex**)malloc(sizeof(dcomplex*)*n2);
1549   for (i=0; i<n2; i++){
1550     S[i] = (dcomplex*)malloc(sizeof(dcomplex)*n2);
1551   }
1552 
1553   M1 = (double*)malloc(sizeof(double)*n2);
1554 
1555   C = (dcomplex**)malloc(sizeof(dcomplex*)*n2);
1556   for (j=0; j<n2; j++){
1557     C[j] = (dcomplex*)malloc(sizeof(dcomplex)*n2);
1558   }
1559 
1560   KGrids1 = (double*)malloc(sizeof(double)*knum_i);
1561   KGrids2 = (double*)malloc(sizeof(double)*knum_j);
1562   KGrids3 = (double*)malloc(sizeof(double)*knum_k);
1563 
1564   SD = (double*)malloc(sizeof(double)*(atomnum+1)*List_YOUSO[7]*2);
1565 
1566   TmpM = (dcomplex**)malloc(sizeof(dcomplex*)*n2);
1567   for (j=0; j<n2; j++){
1568     TmpM[j] = (dcomplex*)malloc(sizeof(dcomplex)*n2);
1569   }
1570 
1571   TmpS = (dcomplex**)malloc(sizeof(dcomplex*)*(n+1));
1572   for (j=0; j<n+1; j++){
1573     TmpS[j] = (dcomplex*)malloc(sizeof(dcomplex)*(n+1));
1574   }
1575 
1576   /* allocate Dos */
1577 
1578   Dos = (float**)malloc(sizeof(float*)*DosGauss_Num_Mesh);
1579   for (ik=0; ik<DosGauss_Num_Mesh; ik++) {
1580     Dos[ik] = (float*)malloc(sizeof(float)*(atomnum+1)*List_YOUSO[7]*2);
1581     for (i=0; i<(atomnum+1)*List_YOUSO[7]*2; i++)  Dos[ik][i] = 0.0;
1582   }
1583 
1584   /* allocate r_energy */
1585 
1586   r_energy = (double*)malloc(sizeof(double)*DosGauss_Num_Mesh);
1587 
1588   /* set up energies where DOS is calculated */
1589 
1590   Dos_Erange_Tmp[0] = Dos_Erange[0] + ChemP;
1591   Dos_Erange_Tmp[1] = Dos_Erange[1] + ChemP;
1592 
1593   de = (Dos_Erange_Tmp[1]-Dos_Erange_Tmp[0])/(double)DosGauss_Num_Mesh;
1594   for (i=0; i<DosGauss_Num_Mesh; i++) {
1595     r_energy[i] = Dos_Erange_Tmp[0] + de*(double)i;
1596   }
1597 
1598   /* non-spin-orbit coupling and non-LDA+U */
1599   if (SO_switch==0 && Hub_U_switch==0 && Constraint_NCS_switch==0
1600       && Zeeman_NCS_switch==0 && Zeeman_NCO_switch==0){
1601 
1602     Dummy_ImNL = (double*****)malloc(sizeof(double****)*1);
1603     Dummy_ImNL[0] = (double****)malloc(sizeof(double***)*1);
1604     Dummy_ImNL[0][0] = (double***)malloc(sizeof(double**)*1);
1605     Dummy_ImNL[0][0][0] = (double**)malloc(sizeof(double*)*1);
1606     Dummy_ImNL[0][0][0][0] = (double*)malloc(sizeof(double)*1);
1607   }
1608 
1609   /* for calculation partial density matrix */
1610 
1611   if (cal_partial_charge){
1612 
1613     PDM = (double*****)malloc(sizeof(double****)*2);
1614     for (k=0; k<=1; k++){
1615 
1616       PDM[k] = (double****)malloc(sizeof(double***)*(atomnum+1));
1617       FNAN[0] = 0;
1618 
1619       for (Gc_AN=0; Gc_AN<=atomnum; Gc_AN++){
1620 
1621 	if (Gc_AN==0){
1622 	  tno0 = 1;
1623 	}
1624 	else{
1625 	  Cwan = WhatSpecies[Gc_AN];
1626 	  tno0 = Spe_Total_NO[Cwan];
1627 	}
1628 
1629 	PDM[k][Gc_AN] = (double***)malloc(sizeof(double**)*(FNAN[Gc_AN]+1));
1630 	for (h_AN=0; h_AN<=FNAN[Gc_AN]; h_AN++){
1631 
1632 	  if (Gc_AN==0){
1633 	    tno1 = 1;
1634 	  }
1635 	  else{
1636 	    Gh_AN = natn[Gc_AN][h_AN];
1637 	    Hwan = WhatSpecies[Gh_AN];
1638 	    tno1 = Spe_Total_NO[Hwan];
1639 	  }
1640 
1641 	  PDM[k][Gc_AN][h_AN] = (double**)malloc(sizeof(double*)*tno0);
1642 	  for (i=0; i<tno0; i++){
1643 	    PDM[k][Gc_AN][h_AN][i] = (double*)malloc(sizeof(double)*tno1);
1644   	    for (j=0; j<tno1; j++) PDM[k][Gc_AN][h_AN][i][j] = 0.0;
1645 	  }
1646 	}
1647       }
1648     }
1649 
1650     TPDM = (double*****)malloc(sizeof(double****)*2);
1651     for (k=0; k<=1; k++){
1652 
1653       TPDM[k] = (double****)malloc(sizeof(double***)*(atomnum+1));
1654       FNAN[0] = 0;
1655 
1656       for (Gc_AN=0; Gc_AN<=atomnum; Gc_AN++){
1657 
1658 	if (Gc_AN==0){
1659 	  tno0 = 1;
1660 	}
1661 	else{
1662 	  Cwan = WhatSpecies[Gc_AN];
1663 	  tno0 = Spe_Total_NO[Cwan];
1664 	}
1665 
1666 	TPDM[k][Gc_AN] = (double***)malloc(sizeof(double**)*(FNAN[Gc_AN]+1));
1667 	for (h_AN=0; h_AN<=FNAN[Gc_AN]; h_AN++){
1668 
1669 	  if (Gc_AN==0){
1670 	    tno1 = 1;
1671 	  }
1672 	  else{
1673 	    Gh_AN = natn[Gc_AN][h_AN];
1674 	    Hwan = WhatSpecies[Gh_AN];
1675 	    tno1 = Spe_Total_NO[Hwan];
1676 	  }
1677 
1678 	  TPDM[k][Gc_AN][h_AN] = (double**)malloc(sizeof(double*)*tno0);
1679 	  for (i=0; i<tno0; i++){
1680 	    TPDM[k][Gc_AN][h_AN][i] = (double*)malloc(sizeof(double)*tno1);
1681   	    for (j=0; j<tno1; j++) TPDM[k][Gc_AN][h_AN][i][j] = 0.0;
1682 	  }
1683 	}
1684       }
1685     }
1686   }
1687 
1688   /****************************************************
1689     set up k-grids
1690   ****************************************************/
1691 
1692   snum_i = knum_i;
1693   snum_j = knum_j;
1694   snum_k = knum_k;
1695 
1696   for (i=0; i<knum_i; i++){
1697     k1 = (double)i/snum_i + Shift_K_Point;
1698     KGrids1[i]=k1;
1699   }
1700   for (i=0; i<knum_j; i++){
1701     k1 = (double)i/snum_j - Shift_K_Point;
1702     KGrids2[i]=k1;
1703   }
1704   for (i=0; i<knum_k; i++){
1705     k1 = (double)i/snum_k + 2.0*Shift_K_Point;
1706     KGrids3[i]=k1;
1707   }
1708 
1709   if (myid==Host_ID && 0<level_stdout){
1710     printf(" KGrids1: ");
1711     for (i=0;i<=knum_i-1;i++) printf("%f ",KGrids1[i]);
1712     printf("\n");
1713     printf(" KGrids2: ");
1714     for (i=0;i<=knum_j-1;i++) printf("%f ",KGrids2[i]);
1715     printf("\n");
1716     printf(" KGrids3: ");
1717     for (i=0;i<=knum_k-1;i++) printf("%f ",KGrids3[i]);
1718     printf("\n");
1719   }
1720 
1721   /***********************************
1722        one-dimentionalize for MPI
1723   ************************************/
1724 
1725   T_knum = 0;
1726   for (i=0; i<=(knum_i-1); i++){
1727     for (j=0; j<=(knum_j-1); j++){
1728       for (k=0; k<=(knum_k-1); k++){
1729         T_knum++;
1730       }
1731     }
1732   }
1733 
1734   T_KGrids1 = (double*)malloc(sizeof(double)*T_knum);
1735   T_KGrids2 = (double*)malloc(sizeof(double)*T_knum);
1736   T_KGrids3 = (double*)malloc(sizeof(double)*T_knum);
1737 
1738   Ti_KGrids1 = (int*)malloc(sizeof(int)*T_knum);
1739   Tj_KGrids2 = (int*)malloc(sizeof(int)*T_knum);
1740   Tk_KGrids3 = (int*)malloc(sizeof(int)*T_knum);
1741 
1742   EIGEN = (double**)malloc(sizeof(double*)*T_knum);
1743   for (j=0; j<T_knum; j++){
1744     EIGEN[j] = (double*)malloc(sizeof(double)*n2);
1745   }
1746 
1747   /* set T_KGrid1,2,3 */
1748 
1749   T_knum = 0;
1750   for (i=0; i<=(knum_i-1); i++){
1751     for (j=0; j<=(knum_j-1); j++){
1752       for (k=0; k<=(knum_k-1); k++){
1753 
1754 	T_KGrids1[T_knum] = KGrids1[i];
1755 	T_KGrids2[T_knum] = KGrids2[j];
1756 	T_KGrids3[T_knum] = KGrids3[k];
1757 
1758 	Ti_KGrids1[T_knum] = i;
1759 	Tj_KGrids2[T_knum] = j;
1760 	Tk_KGrids3[T_knum] = k;
1761 
1762         T_knum++;
1763       }
1764     }
1765   }
1766 
1767   /* allocate k-points into proccessors */
1768 
1769   if (T_knum<=myid){
1770     S_knum = -10;
1771     E_knum = -100;
1772     num_kloop0 = 1;
1773   }
1774   else if (T_knum<numprocs) {
1775     S_knum = myid;
1776     E_knum = myid;
1777     num_kloop0 = 1;
1778   }
1779   else {
1780     tmp = (double)T_knum/(double)numprocs;
1781     num_kloop0 = (int)tmp + 1;
1782 
1783     S_knum = (int)((double)myid*(tmp+0.0001));
1784     E_knum = (int)((double)(myid+1)*(tmp+0.0001)) - 1;
1785     if (myid==(numprocs-1)) E_knum = T_knum - 1;
1786     if (E_knum<0)           E_knum = 0;
1787   }
1788 
1789   factor = 1.0/(double)(knum_i * knum_j * knum_k); /* normalization factor */
1790   pi2 = sqrt(PI);
1791 
1792   /* for kloop */
1793 
1794   for (kloop0=0; kloop0<num_kloop0; kloop0++){
1795 
1796     kloop = kloop0 + S_knum;
1797     arpo[myid] = -1;
1798     if (S_knum<=kloop && kloop<=E_knum) arpo[myid] = kloop;
1799     for (ID=0; ID<numprocs; ID++){
1800       MPI_Bcast(&arpo[ID], 1, MPI_INT, ID, mpi_comm_level1);
1801     }
1802 
1803     /* set S */
1804 
1805     for (ID=0; ID<numprocs; ID++){
1806 
1807       kloop = arpo[ID];
1808 
1809       if (0<=kloop){
1810 
1811         k1 = T_KGrids1[kloop];
1812         k2 = T_KGrids2[kloop];
1813         k3 = T_KGrids3[kloop];
1814 
1815         Overlap_Band(ID,CntOLP,TmpM,MP,k1,k2,k3);
1816         n = TmpM[0][0].r;
1817 
1818         if (myid==ID){
1819 	  for (i1=1; i1<=n; i1++){
1820 	    for (j1=1; j1<=n; j1++){
1821 	      S[i1][j1].r = TmpM[i1][j1].r;
1822 	      S[i1][j1].i = TmpM[i1][j1].i;
1823 
1824 	      TmpS[i1][j1].r = TmpM[i1][j1].r;
1825 	      TmpS[i1][j1].i = TmpM[i1][j1].i;
1826 	    }
1827 	  }
1828         }
1829 
1830       }
1831     }
1832 
1833     kloop = arpo[myid];
1834 
1835     if (0<=kloop){
1836 
1837       EigenBand_lapack(S,ko,n,n,1);
1838 
1839       if (2<=level_stdout && 0<=kloop){
1840 	printf("  kloop %2d  k1 k2 k3 %10.6f %10.6f %10.6f\n",
1841 	       kloop,T_KGrids1[kloop],T_KGrids2[kloop],T_KGrids3[kloop]);
1842 	for (i1=1; i1<=n; i1++){
1843 	  printf("  Eigenvalues of OLP  %2d  %15.12f\n",i1,ko[i1]);
1844 	}
1845       }
1846 
1847       /* minus eigenvalues to 1.0e-14 */
1848       for (l=1; l<=n; l++){
1849         if (ko[l]<0.0){
1850           ko[l] = 1.0e-14;
1851 
1852           if (2<=level_stdout){
1853   	    printf("found an eigenvalue smaller than %10.8f of OLP kloop=%2d\n",
1854                            Threshold_OLP_Eigen,kloop);
1855 	  }
1856 	}
1857 
1858         koS[l] = ko[l];
1859       }
1860 
1861       /* calculate S*1/sqrt(ko) */
1862 
1863       for (l=1; l<=n; l++) M1[l] = 1.0/sqrt(ko[l]);
1864 
1865       /* S * M1  */
1866 
1867       for (i1=1; i1<=n; i1++){
1868 	for (j1=1; j1<=n; j1++){
1869 	  S[i1][j1].r = S[i1][j1].r*M1[j1];
1870 	  S[i1][j1].i = S[i1][j1].i*M1[j1];
1871 	}
1872       }
1873 
1874     }
1875 
1876     /****************************************************
1877        make a full Hamiltonian matrix
1878     ****************************************************/
1879 
1880     for (ID=0; ID<numprocs; ID++){
1881 
1882       kloop = arpo[ID];
1883 
1884       if (0<=kloop){
1885         k1 = T_KGrids1[kloop];
1886         k2 = T_KGrids2[kloop];
1887         k3 = T_KGrids3[kloop];
1888 
1889         /* non-spin-orbit coupling and non-LDA+U */
1890         if (SO_switch==0 && Hub_U_switch==0 && Constraint_NCS_switch==0
1891             && Zeeman_NCS_switch==0 && Zeeman_NCO_switch==0)
1892           Hamiltonian_Band_NC(ID, nh, Dummy_ImNL, TmpM, MP, k1, k2, k3);
1893 
1894         /* spin-orbit coupling or LDA+U */
1895         else
1896           Hamiltonian_Band_NC(ID, nh,       ImNL, TmpM, MP, k1, k2, k3);
1897 
1898 	if (myid==ID){
1899 	  for (i1=1; i1<=2*n; i1++){
1900 	    for (j1=1; j1<=2*n; j1++){
1901 	      H[i1][j1].r = TmpM[i1][j1].r;
1902 	      H[i1][j1].i = TmpM[i1][j1].i;
1903 	    }
1904 	  }
1905 	}
1906       }
1907     }
1908 
1909     kloop = arpo[myid];
1910 
1911     if (0<=kloop){
1912 
1913       /****************************************************
1914 	               M1 * U^t * H * U * M1
1915       ****************************************************/
1916 
1917       /* transpose S */
1918 
1919       for (i1=1; i1<=n; i1++){
1920 	for (j1=i1+1; j1<=n; j1++){
1921 	  Ctmp1 = S[i1][j1];
1922 	  Ctmp2 = S[j1][i1];
1923 	  S[i1][j1] = Ctmp2;
1924 	  S[j1][i1] = Ctmp1;
1925 	}
1926       }
1927 
1928       /* H * U * M1 */
1929 
1930 #pragma omp parallel shared(C,S,H,n) private(OMPID,Nthrds,Nprocs,i1,jj1,j1,sum_r0,sum_i0,sum_r1,sum_i1,l)
1931       {
1932 	/* get info. on OpenMP */
1933 
1934 	OMPID = omp_get_thread_num();
1935 	Nthrds = omp_get_num_threads();
1936 	Nprocs = omp_get_num_procs();
1937 
1938 	for (i1=1+OMPID; i1<=2*n; i1+=Nthrds){
1939 
1940 	  jj1 = 1;
1941 	  for (j1=1; j1<=n; j1++){
1942 
1943 	    sum_r0 = 0.0;
1944 	    sum_i0 = 0.0;
1945 
1946 	    sum_r1 = 0.0;
1947 	    sum_i1 = 0.0;
1948 
1949 	    for (l=1; l<=n; l++){
1950 
1951 	      sum_r0 += H[i1][l  ].r*S[j1][l].r - H[i1][l  ].i*S[j1][l].i;
1952 	      sum_i0 += H[i1][l  ].r*S[j1][l].i + H[i1][l  ].i*S[j1][l].r;
1953 
1954 	      sum_r1 += H[i1][l+n].r*S[j1][l].r - H[i1][l+n].i*S[j1][l].i;
1955 	      sum_i1 += H[i1][l+n].r*S[j1][l].i + H[i1][l+n].i*S[j1][l].r;
1956 	    }
1957 
1958 	    C[jj1][i1].r = sum_r0;
1959 	    C[jj1][i1].i = sum_i0;  jj1++;
1960 
1961 	    C[jj1][i1].r = sum_r1;
1962 	    C[jj1][i1].i = sum_i1;  jj1++;
1963 
1964 	  }
1965 	}
1966 
1967       } /* #pragma omp parallel */
1968 
1969       /* M1 * U^+ H * U * M1 */
1970 
1971 #pragma omp parallel shared(C,S,H,n) private(OMPID,Nthrds,Nprocs,i1,ii1,j1,sum_r0,sum_i0,sum_r1,sum_i1,l)
1972       {
1973 	/* get info. on OpenMP */
1974 
1975 	OMPID = omp_get_thread_num();
1976 	Nthrds = omp_get_num_threads();
1977 	Nprocs = omp_get_num_procs();
1978 
1979 	for (j1=1+OMPID; j1<=2*n; j1+=Nthrds){
1980 
1981 	  ii1 = 1;
1982 	  for (i1=1; i1<=n; i1++){
1983 
1984 	    sum_r0 = 0.0;
1985 	    sum_i0 = 0.0;
1986 
1987 	    sum_r1 = 0.0;
1988 	    sum_i1 = 0.0;
1989 
1990 	    for (l=1; l<=n; l++){
1991 
1992 	      sum_r0 +=  S[i1][l].r*C[j1][l  ].r + S[i1][l].i*C[j1][l  ].i;
1993 	      sum_i0 +=  S[i1][l].r*C[j1][l  ].i - S[i1][l].i*C[j1][l  ].r;
1994 
1995 	      sum_r1 +=  S[i1][l].r*C[j1][l+n].r + S[i1][l].i*C[j1][l+n].i;
1996 	      sum_i1 +=  S[i1][l].r*C[j1][l+n].i - S[i1][l].i*C[j1][l+n].r;
1997 	    }
1998 
1999 	    H[ii1][j1].r = sum_r0;
2000 	    H[ii1][j1].i = sum_i0; ii1++;
2001 
2002 	    H[ii1][j1].r = sum_r1;
2003 	    H[ii1][j1].i = sum_i1; ii1++;
2004 
2005 	  }
2006 	}
2007 
2008       } /* #pragma omp parallel */
2009 
2010       /* H to C */
2011 
2012       for (i1=1; i1<=2*n; i1++){
2013 	for (j1=1; j1<=2*n; j1++){
2014 	  C[i1][j1].r = H[i1][j1].r;
2015 	  C[i1][j1].i = H[i1][j1].i;
2016 	}
2017       }
2018 
2019       /* penalty for ill-conditioning states */
2020 
2021       EV_cut0 = Threshold_OLP_Eigen;
2022 
2023       for (i1=1; i1<=n; i1++){
2024 
2025 	if (koS[i1]<EV_cut0){
2026 	  C[2*i1-1][2*i1-1].r += pow((koS[i1]/EV_cut0),-2.0) - 1.0;
2027 	  C[2*i1  ][2*i1  ].r += pow((koS[i1]/EV_cut0),-2.0) - 1.0;
2028 	}
2029 
2030 	/* cutoff the interaction between the ill-conditioned state */
2031 
2032 	if (1.0e+3<C[2*i1-1][2*i1-1].r){
2033 	  for (j1=1; j1<=2*n; j1++){
2034 	    C[2*i1-1][j1    ] = Complex(0.0,0.0);
2035 	    C[j1    ][2*i1-1] = Complex(0.0,0.0);
2036 	    C[2*i1  ][j1    ] = Complex(0.0,0.0);
2037 	    C[j1    ][2*i1  ] = Complex(0.0,0.0);
2038 	  }
2039 	  C[2*i1-1][2*i1-1] = Complex(1.0e+4,0.0);
2040 	  C[2*i1  ][2*i1  ] = Complex(1.0e+4,0.0);
2041 	}
2042       }
2043 
2044       /* solve eigenvalue problem */
2045 
2046       n1 = 2*n;
2047       EigenBand_lapack(C,ko,n1,n1,1);
2048 
2049       for (i1=1; i1<=2*n; i1++) EIGEN[kloop][i1] = ko[i1];
2050 
2051       /****************************************************
2052 	   Transformation to the original eigenvectors.
2053 	   NOTE JRCAT-244p and JAIST-2122p
2054 	   C = U * lambda^{-1/2} * D
2055       ****************************************************/
2056 
2057       /* transpose */
2058       for (i1=1; i1<=2*n; i1++){
2059 	for (j1=i1+1; j1<=2*n; j1++){
2060 	  Ctmp1 = C[i1][j1];
2061 	  Ctmp2 = C[j1][i1];
2062 	  C[i1][j1] = Ctmp2;
2063 	  C[j1][i1] = Ctmp1;
2064 	}
2065       }
2066 
2067       /* transpose */
2068       for (i1=1; i1<=n; i1++){
2069 	for (j1=i1+1; j1<=n; j1++){
2070 	  Ctmp1 = S[i1][j1];
2071 	  Ctmp2 = S[j1][i1];
2072 	  S[i1][j1] = Ctmp2;
2073 	  S[j1][i1] = Ctmp1;
2074 	}
2075       }
2076 
2077 #pragma omp parallel shared(C,S,H,n,n1) private(OMPID,Nthrds,Nprocs,i1,j1,l1,sum_r0,sum_i0,sum_r1,sum_i1,l)
2078       {
2079 	/* get info. on OpenMP */
2080 
2081 	OMPID = omp_get_thread_num();
2082 	Nthrds = omp_get_num_threads();
2083 	Nprocs = omp_get_num_procs();
2084 
2085 	for (j1=1+OMPID; j1<=n1; j1+=Nthrds){
2086 	  for (i1=1; i1<=n; i1++){
2087 
2088 	    sum_r0 = 0.0;
2089 	    sum_i0 = 0.0;
2090 
2091 	    sum_r1 = 0.0;
2092 	    sum_i1 = 0.0;
2093 
2094 	    l1 = 1;
2095 	    for (l=1; l<=n; l++){
2096 
2097 	      sum_r0 += S[i1][l].r*C[j1][l1].r
2098 		       -S[i1][l].i*C[j1][l1].i;
2099 	      sum_i0 += S[i1][l].r*C[j1][l1].i
2100 		       +S[i1][l].i*C[j1][l1].r; l1++;
2101 
2102 	      sum_r1 += S[i1][l].r*C[j1][l1].r
2103 		       -S[i1][l].i*C[j1][l1].i;
2104 	      sum_i1 += S[i1][l].r*C[j1][l1].i
2105 	 	       +S[i1][l].i*C[j1][l1].r; l1++;
2106 	    }
2107 
2108 	    H[j1][i1  ].r = sum_r0;
2109 	    H[j1][i1  ].i = sum_i0;
2110 
2111 	    H[j1][i1+n].r = sum_r1;
2112 	    H[j1][i1+n].i = sum_i1;
2113 	  }
2114 	}
2115 
2116       } /* #pragma omp parallel */
2117 
2118     } /* if (0<=kloop) */
2119 
2120     /****************************************************
2121         store LDOS
2122     ****************************************************/
2123 
2124     kloop = arpo[myid];
2125 
2126     if (0<=kloop){
2127 
2128       k1 = T_KGrids1[kloop];
2129       k2 = T_KGrids2[kloop];
2130       k3 = T_KGrids3[kloop];
2131 
2132       i = Ti_KGrids1[kloop];
2133       j = Tj_KGrids2[kloop];
2134       k = Tk_KGrids3[kloop];
2135 
2136       for (l=1; l<=2*n; l++){
2137 
2138 	/* calculate SD */
2139 
2140 #pragma omp parallel shared(TmpS,SD,H,l,MP,Spe_Total_CNO,WhatSpecies,atomnum) private(OMPID,Nthrds,Nprocs,GA_AN,wanA,tnoA,Anum,theta,phi,sit,cot,sip,cop,d0,d1,d2,d3,GB_AN,wanB,tnoB,Bnum,i1,j1,u2,v2,uv,vu,Redum,Imdum,tmp1,tmp2,tmp3)
2141 	{
2142 
2143 	  /* get info. on OpenMP */
2144 
2145 	  OMPID = omp_get_thread_num();
2146 	  Nthrds = omp_get_num_threads();
2147 	  Nprocs = omp_get_num_procs();
2148 
2149 	  for (GA_AN=1+OMPID; GA_AN<=atomnum; GA_AN+=Nthrds){
2150 
2151 	    wanA = WhatSpecies[GA_AN];
2152 	    tnoA = Spe_Total_CNO[wanA];
2153 	    Anum = MP[GA_AN];
2154 	    theta = Angle0_Spin[GA_AN];
2155 	    phi   = Angle1_Spin[GA_AN];
2156 	    sit = sin(theta);
2157 	    cot = cos(theta);
2158 	    sip = sin(phi);
2159 	    cop = cos(phi);
2160 
2161 	    for (i1=0; i1<tnoA; i1++){
2162 
2163 	      d0 = 0.0;
2164 	      d1 = 0.0;
2165 	      d2 = 0.0;
2166 	      d3 = 0.0;
2167 
2168 	      for (GB_AN=1; GB_AN<=atomnum; GB_AN++){
2169 
2170 		wanB = WhatSpecies[GB_AN];
2171 		tnoB = Spe_Total_CNO[wanB];
2172 		Bnum = MP[GB_AN];
2173 
2174 		for (j1=0; j1<tnoB; j1++){
2175 
2176 		  /* Re11 */
2177 		  u2 = H[l][Anum+i1].r*H[l][Bnum+j1].r;
2178 		  v2 = H[l][Anum+i1].i*H[l][Bnum+j1].i;
2179 		  uv = H[l][Anum+i1].r*H[l][Bnum+j1].i;
2180 		  vu = H[l][Anum+i1].i*H[l][Bnum+j1].r;
2181 		  Redum = (u2 + v2);
2182 		  Imdum = (uv - vu);
2183 		  d0 += Redum*TmpS[Anum+i1][Bnum+j1].r - Imdum*TmpS[Anum+i1][Bnum+j1].i;
2184 
2185 		  /* Re22 */
2186 		  u2 = H[l][Anum+i1+n].r*H[l][Bnum+j1+n].r;
2187 		  v2 = H[l][Anum+i1+n].i*H[l][Bnum+j1+n].i;
2188 		  uv = H[l][Anum+i1+n].r*H[l][Bnum+j1+n].i;
2189 		  vu = H[l][Anum+i1+n].i*H[l][Bnum+j1+n].r;
2190 		  Redum = (u2 + v2);
2191 		  Imdum = (uv - vu);
2192 		  d1 += Redum*TmpS[Anum+i1][Bnum+j1].r - Imdum*TmpS[Anum+i1][Bnum+j1].i;
2193 
2194 		  /* Re12 */
2195 		  u2 = H[l][Anum+i1].r*H[l][Bnum+j1+n].r;
2196 		  v2 = H[l][Anum+i1].i*H[l][Bnum+j1+n].i;
2197 		  uv = H[l][Anum+i1].r*H[l][Bnum+j1+n].i;
2198 		  vu = H[l][Anum+i1].i*H[l][Bnum+j1+n].r;
2199 		  Redum = (u2 + v2);
2200 		  Imdum = (uv - vu);
2201 		  d2 += Redum*TmpS[Anum+i1][Bnum+j1].r - Imdum*TmpS[Anum+i1][Bnum+j1].i;
2202 
2203 		  /* Im12
2204 		     conjugate complex of Im12 due to difference in the definition
2205 		     between density matrix and charge density
2206 		  */
2207 
2208 		  d3 += -(Imdum*TmpS[Anum+i1][Bnum+j1].r + Redum*TmpS[Anum+i1][Bnum+j1].i);
2209 
2210 		} /* j1 */
2211 	      } /* GB_AN */
2212 
2213 	      tmp1 = 0.5*(d0 + d1);
2214 	      tmp2 = 0.5*cot*(d0 - d1);
2215 	      tmp3 = (d2*cop - d3*sip)*sit;
2216 
2217 	      SD[2*(Anum-1)+i1]      = tmp1 + tmp2 + tmp3;
2218 	      SD[2*(Anum-1)+tnoA+i1] = tmp1 - tmp2 - tmp3;
2219 
2220 	    } /* i1 */
2221 	  } /* GA_AN */
2222 
2223 	} /* #pragma omp parallel */
2224 
2225           /*  calculate contribution to Gaussian DOS */
2226 
2227 	for (GA_AN=1; GA_AN<=atomnum; GA_AN++){
2228 
2229 	  wanA = WhatSpecies[GA_AN];
2230 	  tnoA = Spe_Total_CNO[wanA];
2231 	  Anum = MP[GA_AN];
2232 
2233 	  /*************************
2234              exp( -(x/a)^2 )
2235              a= DosGauss_Width
2236              x=-3a : 3a is counted
2237 	  **************************/
2238 
2239 	  eg = EIGEN[kloop][l];
2240 	  x = (eg-Dos_Erange_Tmp[0])/(Dos_Erange_Tmp[1]-Dos_Erange_Tmp[0])*(DosGauss_Num_Mesh-1);
2241 	  iecenter = (int)x ;
2242 
2243 	  iewidth = DosGauss_Width*3.0/(Dos_Erange_Tmp[1]-Dos_Erange_Tmp[0])*(DosGauss_Num_Mesh-1)+3;
2244 
2245 	  iemin = iecenter - iewidth;
2246 	  iemax = iecenter + iewidth;
2247 
2248 	  if (iemin<0) iemin=0;
2249 	  if (iemax>=DosGauss_Num_Mesh) iemax=DosGauss_Num_Mesh-1;
2250 
2251 	  if ( 0<=iemin && iemin<DosGauss_Num_Mesh && 0<=iemax && iemax<DosGauss_Num_Mesh ) {
2252 
2253 	    for (ie=iemin;ie<=iemax;ie++) {
2254 	      xa = (eg-r_energy[ie])/DosGauss_Width;
2255 
2256 	      for (i1=0; i1<tnoA; i1++){
2257 		Dos[ie][2*(Anum-1)+i1     ] += (float)(factor*SD[2*(Anum-1)+i1     ]*exp(-xa*xa)/(DosGauss_Width*pi2));
2258 		Dos[ie][2*(Anum-1)+tnoA+i1] += (float)(factor*SD[2*(Anum-1)+tnoA+i1]*exp(-xa*xa)/(DosGauss_Width*pi2));
2259 	      }
2260 	    }
2261 	  }
2262 
2263 	} /* GA_AN */
2264       } /* l */
2265     } /* if (0<=kloop) */
2266 
2267     /*********************************************
2268         calculation of partial density matrix
2269     *********************************************/
2270 
2271     kloop = arpo[myid];
2272 
2273     if (0<=kloop && cal_partial_charge) {
2274 
2275       k1 = T_KGrids1[kloop];
2276       k2 = T_KGrids2[kloop];
2277       k3 = T_KGrids3[kloop];
2278 
2279       for (l=1; l<=2*n; l++){
2280 
2281 	x1 = (ko[l] - ChemP)*Beta;
2282 	if (x1<=-max_x) x1 = -max_x;
2283 	if (max_x<=x1)  x1 = max_x;
2284 	FermiF1 = 1.0/(1.0 + exp(x1));
2285 
2286 	x2 = (ko[l] - (ChemP+ene_win_partial_charge))*Beta;
2287 	if (x2<=-max_x) x2 = -max_x;
2288 	if (max_x<=x2)  x2 = max_x;
2289 	FermiF2 = 1.0/(1.0 + exp(x2));
2290 
2291 	diffF = fabs(FermiF1-FermiF2);
2292 
2293 	for (GA_AN=1; GA_AN<=atomnum; GA_AN++){
2294 
2295 	  wanA = WhatSpecies[GA_AN];
2296 	  tnoA = Spe_Total_CNO[wanA];
2297 	  Anum = MP[GA_AN];
2298 
2299 	  for (LB_AN=0; LB_AN<=FNAN[GA_AN]; LB_AN++){
2300 	    GB_AN = natn[GA_AN][LB_AN];
2301 	    Rn = ncn[GA_AN][LB_AN];
2302 	    wanB = WhatSpecies[GB_AN];
2303 	    tnoB = Spe_Total_CNO[wanB];
2304 	    Bnum = MP[GB_AN];
2305 
2306 	    l1 = atv_ijk[Rn][1];
2307 	    l2 = atv_ijk[Rn][2];
2308 	    l3 = atv_ijk[Rn][3];
2309 	    kRn = k1*(double)l1 + k2*(double)l2 + k3*(double)l3;
2310 
2311 	    si = sin(2.0*PI*kRn);
2312 	    co = cos(2.0*PI*kRn);
2313 
2314 	    for (i=0; i<tnoA; i++){
2315 	      for (j=0; j<tnoB; j++){
2316 
2317 		/* Re11 */
2318 		u2 = H[l][Anum+i].r*H[l][Bnum+j].r;
2319 		v2 = H[l][Anum+i].i*H[l][Bnum+j].i;
2320 		uv = H[l][Anum+i].r*H[l][Bnum+j].i;
2321 		vu = H[l][Anum+i].i*H[l][Bnum+j].r;
2322 		tmp = co*(u2+v2) - si*(uv-vu);
2323 		PDM[0][GA_AN][LB_AN][i][j] += diffF*tmp;
2324 
2325 		/* Re22 */
2326 		u2 = H[l][Anum+i+n].r*H[l][Bnum+j+n].r;
2327 		v2 = H[l][Anum+i+n].i*H[l][Bnum+j+n].i;
2328 		uv = H[l][Anum+i+n].r*H[l][Bnum+j+n].i;
2329 		vu = H[l][Anum+i+n].i*H[l][Bnum+j+n].r;
2330 		tmp = co*(u2+v2) - si*(uv-vu);
2331 		PDM[1][GA_AN][LB_AN][i][j] += diffF*tmp;
2332 
2333 	      }
2334 	    }
2335 	  }
2336 	}
2337       }
2338     }
2339 
2340     /****************************************************
2341        MPI: EIGEN
2342     ****************************************************/
2343 
2344     for (ID=0; ID<numprocs; ID++){
2345 
2346       kloop = arpo[ID];
2347 
2348       if (0<=kloop){
2349         MPI_Bcast(&EIGEN[kloop][0], 2*n+1, MPI_DOUBLE, ID, mpi_comm_level1);
2350       }
2351     }
2352 
2353   } /* kloop0 */
2354 
2355   /****************************************************
2356      MPI: PDM
2357   ****************************************************/
2358 
2359   if (cal_partial_charge) {
2360 
2361     /* MPI: PDM */
2362 
2363     for (spin=0; spin<=1; spin++){
2364       for (GA_AN=1; GA_AN<=atomnum; GA_AN++){
2365 
2366 	wanA = WhatSpecies[GA_AN];
2367 	tnoA = Spe_Total_CNO[wanA];
2368 
2369 	for (LB_AN=0; LB_AN<=FNAN[GA_AN]; LB_AN++){
2370 
2371 	  GB_AN = natn[GA_AN][LB_AN];
2372 	  wanB = WhatSpecies[GB_AN];
2373 	  tnoB = Spe_Total_CNO[wanB];
2374 
2375 	  for (i=0; i<tnoA; i++){
2376 
2377 	    MPI_Allreduce(&PDM[spin][GA_AN][LB_AN][i][0],
2378 			  &TPDM[spin][GA_AN][LB_AN][i][0],
2379 			  tnoB, MPI_DOUBLE, MPI_SUM,
2380 			  mpi_comm_level1);
2381 
2382 	    for (j=0; j<tnoB; j++){
2383               TPDM[spin][GA_AN][LB_AN][i][j] /= (double)T_knum;
2384 	    }
2385 	  }
2386 	}
2387       }
2388     }
2389 
2390     /* store TPDM to Partial_DM */
2391 
2392     for (spin=0; spin<=1; spin++){
2393       for (GA_AN=1; GA_AN<=atomnum; GA_AN++){
2394 
2395         MA_AN = F_G2M[GA_AN];
2396 	wanA = WhatSpecies[GA_AN];
2397 	tnoA = Spe_Total_CNO[wanA];
2398 
2399         if (1<=MA_AN && MA_AN<=Matomnum){
2400 
2401 	  for (LB_AN=0; LB_AN<=FNAN[GA_AN]; LB_AN++){
2402 
2403 	    GB_AN = natn[GA_AN][LB_AN];
2404 	    wanB = WhatSpecies[GB_AN];
2405 	    tnoB = Spe_Total_CNO[wanB];
2406 
2407 	    for (i=0; i<tnoA; i++){
2408 	      for (j=0; j<tnoB; j++){
2409 		Partial_DM[spin][MA_AN][LB_AN][i][j] = TPDM[spin][GA_AN][LB_AN][i][j];
2410 	      }
2411 	    }
2412 	  }
2413 	}
2414       }
2415     }
2416   }
2417 
2418 
2419 
2420 
2421 
2422 
2423   /****************************************************************
2424                     write eigenvalues and eigenvectors
2425   ****************************************************************/
2426 
2427   if (myid==Host_ID){
2428 
2429     sprintf(file_eig,"%s%s.Dos.val",filepath,filename);
2430 
2431     if ( (fp_eig=fopen(file_eig,"w"))==NULL ) {
2432       printf("can not open a file %s\n",file_eig);
2433     }
2434 
2435     else{
2436 
2437       /* write *.Dos.val */
2438 
2439       printf("write eigenvalues\n");
2440 
2441       fprintf(fp_eig,"mode        7\n");
2442       fprintf(fp_eig,"NonCol      1\n");
2443       fprintf(fp_eig,"N           %d\n",n);
2444       fprintf(fp_eig,"Nspin       %d\n",1);  /* switch to 1 */
2445       fprintf(fp_eig,"Erange      %lf %lf\n",Dos_Erange[0],Dos_Erange[1]);
2446       fprintf(fp_eig,"Kgrid       %d %d %d\n",1,1,1);
2447       fprintf(fp_eig,"atomnum     %d\n",atomnum);
2448       fprintf(fp_eig,"<WhatSpecies\n");
2449       for (i=1; i<=atomnum; i++) {
2450         fprintf(fp_eig,"%d ",WhatSpecies[i]);
2451       }
2452       fprintf(fp_eig,"\nWhatSpecies>\n");
2453       fprintf(fp_eig,"SpeciesNum     %d\n",SpeciesNum);
2454       fprintf(fp_eig,"<Spe_Total_CNO\n");
2455       for (i=0;i<SpeciesNum;i++) {
2456         fprintf(fp_eig,"%d ",Spe_Total_CNO[i]);
2457       }
2458       fprintf(fp_eig,"\nSpe_Total_CNO>\n");
2459 
2460       MaxL = Supported_MaxL;
2461 
2462       fprintf(fp_eig,"MaxL           %d\n",Supported_MaxL);
2463       fprintf(fp_eig,"<Spe_Num_CBasis\n");
2464       for (i=0;i<SpeciesNum;i++) {
2465         for (l=0;l<=MaxL;l++) {
2466 	  fprintf(fp_eig,"%d ",Spe_Num_CBasis[i][l]);
2467         }
2468         fprintf(fp_eig,"\n");
2469       }
2470       fprintf(fp_eig,"Spe_Num_CBasis>\n");
2471       fprintf(fp_eig,"ChemP       %lf\n",ChemP);
2472 
2473       fprintf(fp_eig,"<SpinAngle\n");
2474       for (i=1; i<=atomnum; i++) {
2475         fprintf(fp_eig,"%lf %lf\n",Angle0_Spin[i],Angle1_Spin[i]);
2476       }
2477       fprintf(fp_eig,"SpinAngle>\n");
2478 
2479       fprintf(fp_eig,"irange      %d %d\n",0,DosGauss_Num_Mesh-1);
2480       fprintf(fp_eig,"<Eigenvalues\n");
2481       for (spin=0; spin<=1; spin++) {
2482         fprintf(fp_eig,"%d %d %d ",0,0,0);
2483 
2484         for (ik=0; ik<DosGauss_Num_Mesh; ik++) {
2485           fprintf(fp_eig,"%lf ",r_energy[ik]);
2486 	}
2487         fprintf(fp_eig,"\n");
2488       }
2489       fprintf(fp_eig,"Eigenvalues>\n");
2490 
2491       fclose(fp_eig);
2492     }
2493 
2494   } /* if (myid==Host_ID) */
2495 
2496   if (myid==Host_ID){
2497 
2498     /* write *.Dos.vec */
2499     printf("write eigenvectors\n");
2500     sprintf(file_ev,"%s%s.Dos.vec",filepath,filename);
2501     if ( (fp_ev=fopen(file_ev,"w"))==NULL ) {
2502       printf("can not open a file %s\n",file_ev);
2503     }
2504   }
2505 
2506   for (ik=0; ik<DosGauss_Num_Mesh; ik++) {
2507 
2508     i_vec[0] = 0;
2509     i_vec[1] = 0;
2510     i_vec[2] = 0;
2511 
2512     MPI_Reduce(&Dos[ik][0], &DosVec[0], 2*n, MPI_FLOAT, MPI_SUM, Host_ID, mpi_comm_level1);
2513 
2514     if (myid==Host_ID){
2515       fwrite(i_vec,sizeof(int),3,fp_ev);
2516       fwrite(&DosVec[0],sizeof(float),2*n,fp_ev);
2517     }
2518 
2519   }
2520 
2521   if (myid==Host_ID){
2522     fclose(fp_ev);
2523   }
2524 
2525   /****************************************************
2526           Fermi surface viewable by XCrysDen
2527   ****************************************************/
2528 
2529   if (myid==Host_ID){
2530 
2531     int ii,jj,kk;
2532     int ***TD2OD;
2533     double x0,y0,z0;
2534     char file_fermi[YOUSO10];
2535     FILE *fp;
2536 
2537     /* allocation of TD2OD */
2538 
2539     TD2OD = (int***)malloc(sizeof(int**)*knum_i);
2540     for (i=0; i<knum_i; i++){
2541       TD2OD[i] = (int**)malloc(sizeof(int*)*knum_j);
2542       for (j=0; j<knum_j; j++){
2543         TD2OD[i][j] = (int*)malloc(sizeof(int)*knum_k);
2544 	for (k=0; k<knum_k; k++) TD2OD[i][j][k] = 0;
2545       }
2546     }
2547 
2548     kloop = 0;
2549     for (i=0; i<knum_i; i++){
2550       for (j=0; j<knum_j; j++){
2551 	for (k=0; k<knum_k; k++){
2552           TD2OD[i][j][k] = kloop;
2553           kloop++;
2554 	}
2555       }
2556     }
2557 
2558     /* make bxsf files */
2559 
2560     sprintf(file_fermi,"%s%s.FermiSurf.bxsf",filepath,filename);
2561 
2562     if ((fp=fopen(file_fermi,"w")) != NULL){
2563 
2564       fprintf(fp,"BEGIN_INFO\n");
2565       fprintf(fp,"Fermi Energy: %15.12f\n",ChemP);
2566       fprintf(fp,"END_INFO\n\n");
2567 
2568       fprintf(fp,"BEGIN_BLOCK_BANDGRID_3D\n");
2569       fprintf(fp,"comment_line\n");
2570       fprintf(fp,"BEGIN_BANDGRID_3D_simple_example\n");
2571       fprintf(fp,"%d\n",2*n);
2572       fprintf(fp,"%d %d %d\n",knum_i+1,knum_j+1,knum_k+1);
2573 
2574       fprintf(fp,"0.0 0.0 0.0\n");
2575       fprintf(fp,"%15.10f %15.10f %15.10f\n",rtv[1][1],rtv[1][2],rtv[1][3]);
2576       fprintf(fp,"%15.10f %15.10f %15.10f\n",rtv[2][1],rtv[2][2],rtv[2][3]);
2577       fprintf(fp,"%15.10f %15.10f %15.10f\n",rtv[3][1],rtv[3][2],rtv[3][3]);
2578 
2579       for (i1=1; i1<=2*n; i1++){
2580 
2581 	fprintf(fp,"BAND:  %d\n",i1);
2582 
2583 	for (i=0; i<=knum_i; i++){
2584 	  ii = i%knum_i;
2585 	  for (j=0; j<=knum_j; j++){
2586 	    jj = j%knum_j;
2587 	    for (k=0; k<=knum_k; k++){
2588 	      kk = k%knum_k;
2589 	      kloop = TD2OD[ii][jj][kk];
2590 	      fprintf(fp,"%10.5f ",EIGEN[kloop][i1]);
2591 	    }
2592 	    fprintf(fp,"\n");
2593 	  }
2594 
2595 	  if (i!=knum_i)  fprintf(fp,"\n");
2596 
2597 	}
2598       }
2599 
2600       fprintf(fp,"END_BANDGRID_3D\n");
2601       fprintf(fp,"END_BLOCK_BANDGRID_3D\n");
2602 
2603       fclose(fp);
2604     }
2605     else{
2606       printf("Failure in saving bxsf files.\n");
2607     }
2608 
2609     /* free TD2OD */
2610     for (i=0; i<knum_i; i++){
2611       for (j=0; j<knum_j; j++){
2612         free(TD2OD[i][j]);
2613       }
2614       free(TD2OD[i]);
2615     }
2616     free(TD2OD);
2617 
2618   } /* if (myid==Host_ID) */
2619 
2620   /****************************************************
2621                        free arrays
2622   ****************************************************/
2623 
2624   free(DosVec);
2625 
2626   free(MP);
2627 
2628   free(arpo);
2629 
2630   free(ko);
2631   free(koS);
2632 
2633   for (j=0; j<n2; j++){
2634     free(H[j]);
2635   }
2636   free(H);
2637 
2638   for (i=0; i<n2; i++){
2639     free(S[i]);
2640   }
2641   free(S);
2642 
2643   free(M1);
2644 
2645   for (j=0; j<n2; j++){
2646     free(C[j]);
2647   }
2648   free(C);
2649 
2650   free(KGrids1); free(KGrids2);free(KGrids3);
2651 
2652   free(SD);
2653 
2654   for (j=0; j<n2; j++){
2655     free(TmpM[j]);
2656   }
2657   free(TmpM);
2658 
2659   for (j=0; j<n+1; j++){
2660     free(TmpS[j]);
2661   }
2662   free(TmpS);
2663 
2664   for (ik=0; ik<DosGauss_Num_Mesh; ik++) {
2665     free(Dos[ik]);
2666   }
2667   free(Dos);
2668 
2669   free(r_energy);
2670 
2671   /* non-spin-orbit coupling and non-LDA+U */
2672   if (SO_switch==0 && Hub_U_switch==0 && Constraint_NCS_switch==0
2673       && Zeeman_NCS_switch==0 && Zeeman_NCO_switch==0){
2674     free(Dummy_ImNL[0][0][0][0]);
2675     free(Dummy_ImNL[0][0][0]);
2676     free(Dummy_ImNL[0][0]);
2677     free(Dummy_ImNL[0]);
2678     free(Dummy_ImNL);
2679   }
2680 
2681   /* partial density matrix */
2682 
2683   if (cal_partial_charge){
2684 
2685     /* PDM */
2686 
2687     for (k=0; k<=1; k++){
2688 
2689       FNAN[0] = 0;
2690 
2691       for (Gc_AN=0; Gc_AN<=atomnum; Gc_AN++){
2692 
2693 	if (Gc_AN==0){
2694 	  tno0 = 1;
2695 	}
2696 	else{
2697 	  Cwan = WhatSpecies[Gc_AN];
2698 	  tno0 = Spe_Total_NO[Cwan];
2699 	}
2700 
2701 	for (h_AN=0; h_AN<=FNAN[Gc_AN]; h_AN++){
2702 
2703 	  if (Gc_AN==0){
2704 	    tno1 = 1;
2705 	  }
2706 	  else{
2707 	    Gh_AN = natn[Gc_AN][h_AN];
2708 	    Hwan = WhatSpecies[Gh_AN];
2709 	    tno1 = Spe_Total_NO[Hwan];
2710 	  }
2711 
2712 	  for (i=0; i<tno0; i++){
2713 	    free(PDM[k][Gc_AN][h_AN][i]);
2714 	  }
2715           free(PDM[k][Gc_AN][h_AN]);
2716 	}
2717         free(PDM[k][Gc_AN]);
2718       }
2719       free(PDM[k]);
2720     }
2721     free(PDM);
2722 
2723     /* TPDM */
2724 
2725     for (k=0; k<=1; k++){
2726 
2727       FNAN[0] = 0;
2728 
2729       for (Gc_AN=0; Gc_AN<=atomnum; Gc_AN++){
2730 
2731 	if (Gc_AN==0){
2732 	  tno0 = 1;
2733 	}
2734 	else{
2735 	  Cwan = WhatSpecies[Gc_AN];
2736 	  tno0 = Spe_Total_NO[Cwan];
2737 	}
2738 
2739 	for (h_AN=0; h_AN<=FNAN[Gc_AN]; h_AN++){
2740 
2741 	  if (Gc_AN==0){
2742 	    tno1 = 1;
2743 	  }
2744 	  else{
2745 	    Gh_AN = natn[Gc_AN][h_AN];
2746 	    Hwan = WhatSpecies[Gh_AN];
2747 	    tno1 = Spe_Total_NO[Hwan];
2748 	  }
2749 
2750 	  for (i=0; i<tno0; i++){
2751 	    free(TPDM[k][Gc_AN][h_AN][i]);
2752 	  }
2753           free(TPDM[k][Gc_AN][h_AN]);
2754 	}
2755         free(TPDM[k][Gc_AN]);
2756       }
2757       free(TPDM[k]);
2758     }
2759     free(TPDM);
2760   }
2761 
2762   free(T_KGrids1);
2763   free(T_KGrids2);
2764   free(T_KGrids3);
2765 
2766   free(Ti_KGrids1);
2767   free(Tj_KGrids2);
2768   free(Tk_KGrids3);
2769 
2770   for (j=0; j<T_knum; j++){
2771     free(EIGEN[j]);
2772   }
2773   free(EIGEN);
2774 
2775   /* for elapsed time */
2776 
2777   dtime(&TEtime);
2778   time0 = TEtime - TStime;
2779   return time0;
2780 }
2781 
2782 
2783 
2784 
2785 
2786 
2787 
Band_DFT_Dosout_Col(int knum_i,int knum_j,int knum_k,int SpinP_switch,double ***** nh,double ***** ImNL,double **** CntOLP)2788 static double Band_DFT_Dosout_Col(
2789                            int knum_i, int knum_j, int knum_k,
2790                            int SpinP_switch,
2791                            double *****nh,
2792                            double *****ImNL,
2793                            double ****CntOLP)
2794 {
2795   int i,j,k,spin,l,i1,j1,n1;
2796   int n, wanA;
2797   int *MP;
2798   int MA_AN, GA_AN, tnoA,Anum, LB_AN;
2799   int GB_AN, wanB, tnoB, Bnum, RnB;
2800   int l1,l2,l3,Rn;
2801   int kloop,kloop0,S_knum,E_knum,T_knum,num_kloop0;
2802   int ie,iemin,iemax,iemin0,iemax0,n1min;
2803   int MaxL,e1,s1;
2804   int h_AN,tno0,tno1,Gh_AN,Hwan,Gc_AN,Cwan;
2805   int i_vec[10];
2806   double sum,sumi,u2,v2,uv,vu,tmp;
2807   double FermiF1,FermiF2,x1,x2,diffF,max_x=50.0;
2808   double kRn,si,co,Redum,Imdum,Redum2,Resum;
2809   double TStime,TEtime,time0;
2810   double OLP_eigen_cut=Threshold_OLP_Eigen;
2811   double EV_cut0;
2812   double *****PDM;
2813   double *****TPDM;
2814   double *koS;
2815   double **ko; int N_ko, i_ko[10];
2816   dcomplex **H;  int N_H,  i_H[10];
2817   dcomplex **S;  int N_S,  i_S[10];
2818   double *M1,***EIGEN;
2819   dcomplex **C;  int N_C,  i_C[10];
2820   double *KGrids1, *KGrids2, *KGrids3;
2821   float *SD; int N_SD, i_SD[10];
2822   dcomplex ***H2,**TmpM,**TmpS;
2823   double *T_KGrids1,*T_KGrids2,*T_KGrids3;
2824   int *Ti_KGrids1,*Tj_KGrids2,*Tk_KGrids3,*arpo;
2825   int *num_allocated_k;
2826   dcomplex Ctmp1,Ctmp2;
2827   double ****Dummy_ImNL;
2828   double snum_i, snum_j, snum_k;
2829   double k1,k2,k3;
2830   char file_ev[YOUSO10],file_eig[YOUSO10];
2831   char file_ev0[YOUSO10];
2832   FILE *fp_ev0,*fp_ev,*fp_eig;
2833   char buf1[fp_bsize];          /* setvbuf */
2834   char buf2[fp_bsize];          /* setvbuf */
2835   int numprocs,myid,ID,ID1,ID2,tag;
2836 
2837   /* for OpenMP */
2838   int OMPID,Nthrds,Nprocs;
2839 
2840   MPI_Status stat;
2841   MPI_Request request;
2842 
2843   /* MPI */
2844   MPI_Comm_size(mpi_comm_level1,&numprocs);
2845   MPI_Comm_rank(mpi_comm_level1,&myid);
2846   MPI_Barrier(mpi_comm_level1);
2847 
2848   if (myid==Host_ID && 0<level_stdout){
2849     printf("\n<Band_DFT_Dosout>\n"); fflush(stdout);
2850   }
2851 
2852   dtime(&TStime);
2853 
2854   /****************************************************
2855              calculation of the array size
2856   ****************************************************/
2857 
2858   n = 0;
2859   for (i=1; i<=atomnum; i++){
2860     wanA  = WhatSpecies[i];
2861     n  += Spe_Total_CNO[wanA];
2862   }
2863 
2864   /****************************************************
2865    Allocation of arrays
2866   ****************************************************/
2867 
2868   MP = (int*)malloc(sizeof(int)*List_YOUSO[1]);
2869   arpo = (int*)malloc(sizeof(int)*numprocs);
2870 
2871   num_allocated_k = (int*)malloc(sizeof(int)*numprocs);
2872 
2873   N_ko=2; i_ko[0]=List_YOUSO[23]; i_ko[1]=n+1;
2874   ko=(double**)malloc_multidimarray("double",N_ko, i_ko);
2875 
2876   koS = (double*)malloc(sizeof(double)*(n+1));
2877 
2878   N_H=2; i_H[0]=i_H[1]=n+1;
2879   H=(dcomplex**)malloc_multidimarray("dcomplex",N_H, i_H);
2880 
2881   N_S=2; i_S[0]=i_S[1]=n+1;
2882   S=(dcomplex**)malloc_multidimarray("dcomplex",N_S, i_S);
2883 
2884   M1 = (double*)malloc(sizeof(double)*(n+1));
2885 
2886   N_C=2; i_C[0]=i_C[1]=n+1;
2887   C=(dcomplex**)malloc_multidimarray("dcomplex",N_C, i_C);
2888 
2889   KGrids1 = (double*)malloc(sizeof(double)*knum_i);
2890   KGrids2 = (double*)malloc(sizeof(double)*knum_j);
2891   KGrids3 = (double*)malloc(sizeof(double)*knum_k);
2892 
2893   SD=(float*)malloc(sizeof(float)*(atomnum+1)*List_YOUSO[7]);
2894 
2895   TmpM = (dcomplex**)malloc(sizeof(dcomplex*)*(n+1));
2896   for (j=0; j<n+1; j++){
2897     TmpM[j] = (dcomplex*)malloc(sizeof(dcomplex)*(n+1));
2898   }
2899 
2900   TmpS = (dcomplex**)malloc(sizeof(dcomplex*)*(n+1));
2901   for (j=0; j<n+1; j++){
2902     TmpS[j] = (dcomplex*)malloc(sizeof(dcomplex)*(n+1));
2903   }
2904 
2905   H2 = (dcomplex***)malloc(sizeof(dcomplex**)*List_YOUSO[23]);
2906   for (i=0; i<List_YOUSO[23]; i++){
2907     H2[i] = (dcomplex**)malloc(sizeof(dcomplex*)*(n+1));
2908     for (j=0; j<n+1; j++){
2909       H2[i][j] = (dcomplex*)malloc(sizeof(dcomplex)*(n+1));
2910     }
2911   }
2912 
2913   /* for calculation partial density matrix */
2914 
2915   if (cal_partial_charge){
2916 
2917     PDM = (double*****)malloc(sizeof(double****)*2);
2918     for (k=0; k<=1; k++){
2919 
2920       PDM[k] = (double****)malloc(sizeof(double***)*(atomnum+1));
2921       FNAN[0] = 0;
2922 
2923       for (Gc_AN=0; Gc_AN<=atomnum; Gc_AN++){
2924 
2925 	if (Gc_AN==0){
2926 	  tno0 = 1;
2927 	}
2928 	else{
2929 	  Cwan = WhatSpecies[Gc_AN];
2930 	  tno0 = Spe_Total_NO[Cwan];
2931 	}
2932 
2933 	PDM[k][Gc_AN] = (double***)malloc(sizeof(double**)*(FNAN[Gc_AN]+1));
2934 	for (h_AN=0; h_AN<=FNAN[Gc_AN]; h_AN++){
2935 
2936 	  if (Gc_AN==0){
2937 	    tno1 = 1;
2938 	  }
2939 	  else{
2940 	    Gh_AN = natn[Gc_AN][h_AN];
2941 	    Hwan = WhatSpecies[Gh_AN];
2942 	    tno1 = Spe_Total_NO[Hwan];
2943 	  }
2944 
2945 	  PDM[k][Gc_AN][h_AN] = (double**)malloc(sizeof(double*)*tno0);
2946 	  for (i=0; i<tno0; i++){
2947 	    PDM[k][Gc_AN][h_AN][i] = (double*)malloc(sizeof(double)*tno1);
2948   	    for (j=0; j<tno1; j++) PDM[k][Gc_AN][h_AN][i][j] = 0.0;
2949 	  }
2950 	}
2951       }
2952     }
2953 
2954     TPDM = (double*****)malloc(sizeof(double****)*2);
2955     for (k=0; k<=1; k++){
2956 
2957       TPDM[k] = (double****)malloc(sizeof(double***)*(atomnum+1));
2958       FNAN[0] = 0;
2959 
2960       for (Gc_AN=0; Gc_AN<=atomnum; Gc_AN++){
2961 
2962 	if (Gc_AN==0){
2963 	  tno0 = 1;
2964 	}
2965 	else{
2966 	  Cwan = WhatSpecies[Gc_AN];
2967 	  tno0 = Spe_Total_NO[Cwan];
2968 	}
2969 
2970 	TPDM[k][Gc_AN] = (double***)malloc(sizeof(double**)*(FNAN[Gc_AN]+1));
2971 	for (h_AN=0; h_AN<=FNAN[Gc_AN]; h_AN++){
2972 
2973 	  if (Gc_AN==0){
2974 	    tno1 = 1;
2975 	  }
2976 	  else{
2977 	    Gh_AN = natn[Gc_AN][h_AN];
2978 	    Hwan = WhatSpecies[Gh_AN];
2979 	    tno1 = Spe_Total_NO[Hwan];
2980 	  }
2981 
2982 	  TPDM[k][Gc_AN][h_AN] = (double**)malloc(sizeof(double*)*tno0);
2983 	  for (i=0; i<tno0; i++){
2984 	    TPDM[k][Gc_AN][h_AN][i] = (double*)malloc(sizeof(double)*tno1);
2985   	    for (j=0; j<tno1; j++) TPDM[k][Gc_AN][h_AN][i][j] = 0.0;
2986 	  }
2987 	}
2988       }
2989     }
2990   }
2991 
2992   /* no spin-orbit coupling */
2993   if (SO_switch==0){
2994     Dummy_ImNL = (double****)malloc(sizeof(double***)*1);
2995     Dummy_ImNL[0] = (double***)malloc(sizeof(double**)*1);
2996     Dummy_ImNL[0][0] = (double**)malloc(sizeof(double*)*1);
2997     Dummy_ImNL[0][0][0] = (double*)malloc(sizeof(double)*1);
2998   }
2999 
3000   snum_i = knum_i;
3001   snum_j = knum_j;
3002   snum_k = knum_k;
3003 
3004   /* set up k-grids */
3005   for (i=0; i<knum_i; i++){
3006     k1 = (double)i/snum_i + Shift_K_Point;
3007     KGrids1[i]=k1;
3008   }
3009   for (i=0; i<knum_j; i++){
3010     k1 = (double)i/snum_j - Shift_K_Point;
3011     KGrids2[i]=k1;
3012   }
3013   for (i=0; i<knum_k; i++){
3014     k1 = (double)i/snum_k + 2.0*Shift_K_Point;
3015     KGrids3[i]=k1;
3016   }
3017 
3018   if (myid==Host_ID && 0<level_stdout){
3019     printf(" KGrids1: ");
3020     for (i=0;i<=knum_i-1;i++) printf("%f ",KGrids1[i]);
3021     printf("\n");
3022     printf(" KGrids2: ");
3023     for (i=0;i<=knum_j-1;i++) printf("%f ",KGrids2[i]);
3024     printf("\n");
3025     printf(" KGrids3: ");
3026     for (i=0;i<=knum_k-1;i++) printf("%f ",KGrids3[i]);
3027     printf("\n");
3028   }
3029 
3030   /***********************************
3031        one-dimentionalize for MPI
3032   ************************************/
3033 
3034   T_knum = 0;
3035   for (i=0; i<=(knum_i-1); i++){
3036     for (j=0; j<=(knum_j-1); j++){
3037       for (k=0; k<=(knum_k-1); k++){
3038         T_knum++;
3039       }
3040     }
3041   }
3042 
3043   T_KGrids1 = (double*)malloc(sizeof(double)*T_knum);
3044   T_KGrids2 = (double*)malloc(sizeof(double)*T_knum);
3045   T_KGrids3 = (double*)malloc(sizeof(double)*T_knum);
3046 
3047   Ti_KGrids1 = (int*)malloc(sizeof(int)*T_knum);
3048   Tj_KGrids2 = (int*)malloc(sizeof(int)*T_knum);
3049   Tk_KGrids3 = (int*)malloc(sizeof(int)*T_knum);
3050 
3051   EIGEN  = (double***)malloc(sizeof(double**)*List_YOUSO[23]);
3052   for (i=0; i<List_YOUSO[23]; i++){
3053     EIGEN[i] = (double**)malloc(sizeof(double*)*T_knum);
3054     for (j=0; j<T_knum; j++){
3055       EIGEN[i][j] = (double*)malloc(sizeof(double)*(n+1));
3056     }
3057   }
3058 
3059   /* set T_KGrid1,2,3 */
3060 
3061   T_knum = 0;
3062   for (i=0; i<=(knum_i-1); i++){
3063     for (j=0; j<=(knum_j-1); j++){
3064       for (k=0; k<=(knum_k-1); k++){
3065 
3066 	T_KGrids1[T_knum] = KGrids1[i];
3067 	T_KGrids2[T_knum] = KGrids2[j];
3068 	T_KGrids3[T_knum] = KGrids3[k];
3069 
3070 	Ti_KGrids1[T_knum] = i;
3071 	Tj_KGrids2[T_knum] = j;
3072 	Tk_KGrids3[T_knum] = k;
3073 
3074         T_knum++;
3075       }
3076     }
3077   }
3078 
3079   /* allocate k-points into proccessors */
3080 
3081   if (T_knum<=myid){
3082     S_knum = -10;
3083     E_knum = -100;
3084     num_kloop0 = 1;
3085     num_allocated_k[myid] = 0;
3086   }
3087   else if (T_knum<numprocs) {
3088     S_knum = myid;
3089     E_knum = myid;
3090     num_kloop0 = 1;
3091     num_allocated_k[myid] = 1;
3092   }
3093   else {
3094     tmp = (double)T_knum/(double)numprocs;
3095     num_kloop0 = (int)tmp + 1;
3096 
3097     S_knum = (int)((double)myid*(tmp+0.0001));
3098     E_knum = (int)((double)(myid+1)*(tmp+0.0001)) - 1;
3099     if (myid==(numprocs-1)) E_knum = T_knum - 1;
3100     if (E_knum<0)           E_knum = 0;
3101     num_allocated_k[myid] = E_knum - S_knum + 1;
3102   }
3103 
3104   for (ID=0; ID<numprocs; ID++){
3105     MPI_Bcast(&num_allocated_k[ID], 1, MPI_INT, ID, mpi_comm_level1);
3106   }
3107 
3108   /****************************************************************
3109                       find iemin and iemax
3110   *****************************************************************/
3111 
3112   iemin=n; iemax=1; n1min=1;
3113 
3114   k1 = 0.0;
3115   k2 = 0.0;
3116   k3 = 0.0;
3117 
3118   Overlap_Band(Host_ID,CntOLP,S,MP,k1,k2,k3);
3119 
3120   if (myid==Host_ID){
3121 
3122     n = S[0][0].r;
3123     EigenBand_lapack(S,ko[0],n,n,1);
3124 
3125     if (3<=level_stdout){
3126       printf("  k1 k2 k3 %10.6f %10.6f %10.6f\n",k1,k2,k3);
3127       for (i1=1; i1<=n; i1++){
3128 	printf("  Eigenvalues of OLP  %2d  %15.12f\n",i1,ko[0][i1]);
3129       }
3130     }
3131 
3132     /* minus eigenvalues to 1.0e-14 */
3133 
3134     for (l=1; l<=n; l++){
3135       if (ko[0][l]<0.0) ko[0][l] = 1.0e-14;
3136       koS[l] = ko[0][l];
3137     }
3138 
3139     /* calculate S*1/sqrt(ko) */
3140 
3141     for (l=1; l<=n; l++) M1[l] = 1.0/sqrt(ko[0][l]);
3142 
3143     /* S * M1  */
3144 
3145     for (i1=1; i1<=n; i1++){
3146       for (j1=1; j1<=n; j1++){
3147 	S[i1][j1].r = S[i1][j1].r*M1[j1];
3148 	S[i1][j1].i = S[i1][j1].i*M1[j1];
3149       }
3150     }
3151 
3152   } /* if (myid==Host_ID) */
3153 
3154   spin = 0;
3155 
3156   /****************************************************
3157                make a full Hamiltonian matrix
3158   ****************************************************/
3159 
3160   Hamiltonian_Band(Host_ID, nh[spin], H, MP, k1, k2, k3);
3161 
3162   if (myid==Host_ID){
3163 
3164     /****************************************************
3165                     M1 * U^t * H * U * M1
3166     ****************************************************/
3167 
3168     /* transpose S */
3169 
3170     if (spin==0){
3171       for (i1=1; i1<=n; i1++){
3172 	for (j1=i1+1; j1<=n; j1++){
3173 	  Ctmp1 = S[i1][j1];
3174 	  Ctmp2 = S[j1][i1];
3175 	  S[i1][j1] = Ctmp2;
3176 	  S[j1][i1] = Ctmp1;
3177 	}
3178       }
3179     }
3180 
3181     /* H * U * M1 */
3182 
3183     for (i1=1; i1<=n; i1++){
3184       for (j1=1; j1<=n; j1++){
3185 	sum = 0.0; sumi=0.0;
3186 	for (l=1; l<=n; l++){
3187 	  sum  += H[i1][l].r*S[j1][l].r - H[i1][l].i*S[j1][l].i;
3188 	  sumi += H[i1][l].r*S[j1][l].i + H[i1][l].i*S[j1][l].r;
3189 	}
3190 
3191 	C[j1][i1].r = sum;
3192 	C[j1][i1].i = sumi;
3193       }
3194     }
3195 
3196     /* M1 * U^+ H * U * M1 */
3197 
3198     for (i1=1; i1<=n; i1++){
3199       for (j1=1; j1<=n; j1++){
3200 	sum = 0.0;
3201 	sumi=0.0;
3202 	for (l=1; l<=n; l++){
3203 	  sum  +=  S[i1][l].r*C[j1][l].r + S[i1][l].i*C[j1][l].i;
3204 	  sumi +=  S[i1][l].r*C[j1][l].i - S[i1][l].i*C[j1][l].r;
3205 	}
3206 	H[i1][j1].r = sum;
3207 	H[i1][j1].i = sumi;
3208       }
3209     }
3210 
3211     /* H to C */
3212 
3213     for (i1=1; i1<=n; i1++){
3214       for (j1=1; j1<=n; j1++){
3215 	C[i1][j1] = H[i1][j1];
3216       }
3217     }
3218 
3219     /* penalty for ill-conditioning states */
3220 
3221     EV_cut0 = Threshold_OLP_Eigen;
3222 
3223     for (i1=1; i1<=n; i1++){
3224 
3225       if (koS[i1]<EV_cut0){
3226 	C[i1][i1].r += pow((koS[i1]/EV_cut0),-2.0) - 1.0;
3227       }
3228 
3229       /* cutoff the interaction between the ill-conditioned state */
3230 
3231       if (1.0e+3<C[i1][i1].r){
3232 	for (j1=1; j1<=n; j1++){
3233 	  C[i1][j1] = Complex(0.0,0.0);
3234 	  C[j1][i1] = Complex(0.0,0.0);
3235 	}
3236 	C[i1][i1].r = 1.0e+4;
3237       }
3238     }
3239 
3240     /* solve eigenvalue problem */
3241 
3242     n1 = n;
3243     EigenBand_lapack(C,ko[spin],n1,n1,1);
3244 
3245     if (n1min<n1) n1min=n1;
3246 
3247     iemin0=1;
3248     for (i1=1;i1<=n1;i1++) {
3249       if (ko[spin][i1]>(ChemP+Dos_Erange[0]) ) {
3250 	iemin0=i1-1;
3251 	break;
3252       }
3253     }
3254     if (iemin0<1) iemin0=1;
3255 
3256     iemax0=n1;
3257     for (i1=iemin0;i1<=n1;i1++) {
3258       if (ko[spin][i1]>(ChemP+Dos_Erange[1]) ) {
3259 	iemax0=i1;
3260 	break;
3261       }
3262     }
3263     if (iemax0>n1) iemax0=n1;
3264 
3265     if (iemin>iemin0) iemin=iemin0;
3266     if (iemax<iemax0) iemax=iemax0;
3267 
3268   } /* if (myid==Host_ID) */
3269 
3270   /* add a buffer to iemin and iemax */
3271 
3272   iemin -= 5;
3273   iemax += 5;
3274 
3275   if (iemin<1) iemin = 1;
3276   if (n<iemax) iemax = n;
3277 
3278    /* partial density matrix for STM simulation */
3279   if (cal_partial_charge){
3280     iemin = 1;
3281     iemax = n;
3282   }
3283 
3284   if (myid==Host_ID){
3285     if (n1min<iemax) iemax=n1min;
3286     printf(" iemin and iemax= %d %d\n",iemin,iemax);
3287   }
3288 
3289   /* MPI, iemin, iemax */
3290   MPI_Barrier(mpi_comm_level1);
3291   MPI_Bcast(&iemin, 1, MPI_INT, Host_ID, mpi_comm_level1);
3292   MPI_Bcast(&iemax, 1, MPI_INT, Host_ID, mpi_comm_level1);
3293 
3294   /****************************************************************
3295                      eigenvalues and eigenvectors
3296    ****************************************************************/
3297 
3298   if (myid==Host_ID){
3299 
3300     sprintf(file_eig,"%s%s.Dos.val",filepath,filename);
3301     if ( (fp_eig=fopen(file_eig,"w"))==NULL ) {
3302 
3303 #ifdef xt3
3304       setvbuf(fp_eig,buf1,_IOFBF,fp_bsize);  /* setvbuf */
3305 #endif
3306 
3307       printf("can not open a file %s\n",file_eig);
3308     }
3309 
3310     if ( fp_eig==NULL ) {
3311       goto Finishing;
3312     }
3313   }
3314 
3315   sprintf(file_ev,"%s%s.Dos.vec%d",filepath,filename,myid);
3316   if ( (fp_ev=fopen(file_ev,"w"))==NULL ) {
3317     printf("cannot open a file %s\n",file_ev);
3318   }
3319   if ( fp_ev==NULL ) {
3320     goto Finishing;
3321   }
3322 
3323   if (myid==Host_ID){
3324 
3325     if (fp_eig) {
3326       fprintf(fp_eig,"mode        1\n");
3327       fprintf(fp_eig,"NonCol      0\n");
3328       fprintf(fp_eig,"N           %d\n",n);
3329       fprintf(fp_eig,"Nspin       %d\n",SpinP_switch);
3330       fprintf(fp_eig,"Erange      %lf %lf\n",Dos_Erange[0],Dos_Erange[1]);
3331       fprintf(fp_eig,"irange      %d %d\n",iemin,iemax);
3332       fprintf(fp_eig,"Kgrid       %d %d %d\n",knum_i,knum_j,knum_k);
3333       fprintf(fp_eig,"atomnum     %d\n",atomnum);
3334       fprintf(fp_eig,"<WhatSpecies\n");
3335       for (i=1;i<=atomnum;i++) {
3336         fprintf(fp_eig,"%d ",WhatSpecies[i]);
3337       }
3338       fprintf(fp_eig,"\nWhatSpecies>\n");
3339       fprintf(fp_eig,"SpeciesNum     %d\n",SpeciesNum);
3340       fprintf(fp_eig,"<Spe_Total_CNO\n");
3341       for (i=0;i<SpeciesNum;i++) {
3342         fprintf(fp_eig,"%d ",Spe_Total_CNO[i]);
3343       }
3344       fprintf(fp_eig,"\nSpe_Total_CNO>\n");
3345 
3346       MaxL=Supported_MaxL;
3347 
3348       fprintf(fp_eig,"MaxL           %d\n",Supported_MaxL);
3349       fprintf(fp_eig,"<Spe_Num_CBasis\n");
3350       for (i=0;i<SpeciesNum;i++) {
3351         for (l=0;l<=MaxL;l++) {
3352           fprintf(fp_eig,"%d ",Spe_Num_CBasis[i][l]);
3353         }
3354         fprintf(fp_eig,"\n");
3355       }
3356       fprintf(fp_eig,"Spe_Num_CBasis>\n");
3357 
3358       fprintf(fp_eig,"ChemP       %lf\n",ChemP);
3359 
3360       fprintf(fp_eig,"<Eigenvalues\n");
3361 
3362     }
3363     if (fp_eig) {
3364       printf(" write eigenvalues\n");
3365     }
3366     if (fp_ev) {
3367       printf(" write eigenvectors\n");
3368     }
3369   }
3370 
3371   /* for kloop */
3372 
3373   for (kloop0=0; kloop0<num_kloop0; kloop0++){
3374 
3375     kloop = kloop0 + S_knum;
3376     arpo[myid] = -1;
3377     if (S_knum<=kloop && kloop<=E_knum) arpo[myid] = kloop;
3378     for (ID=0; ID<numprocs; ID++){
3379       MPI_Bcast(&arpo[ID], 1, MPI_INT, ID, mpi_comm_level1);
3380     }
3381 
3382     /* set S */
3383 
3384     for (ID=0; ID<numprocs; ID++){
3385 
3386       kloop = arpo[ID];
3387 
3388       if (0<=kloop){
3389 
3390         k1 = T_KGrids1[kloop];
3391         k2 = T_KGrids2[kloop];
3392         k3 = T_KGrids3[kloop];
3393 
3394         Overlap_Band(ID,CntOLP,TmpM,MP,k1,k2,k3);
3395         n = TmpM[0][0].r;
3396 
3397         if (myid==ID){
3398 	  for (i1=1; i1<=n; i1++){
3399 	    for (j1=1; j1<=n; j1++){
3400 	      S[i1][j1].r = TmpM[i1][j1].r;
3401 	      S[i1][j1].i = TmpM[i1][j1].i;
3402 
3403 	      TmpS[i1][j1].r = TmpM[i1][j1].r;
3404 	      TmpS[i1][j1].i = TmpM[i1][j1].i;
3405 	    }
3406 	  }
3407         }
3408 
3409       }
3410     }
3411 
3412     kloop = arpo[myid];
3413 
3414     if (0<=kloop){
3415 
3416       EigenBand_lapack(S,ko[0],n,n,1);
3417 
3418       if (3<=level_stdout && 0<=kloop){
3419 	printf("  kloop %2d  k1 k2 k3 %10.6f %10.6f %10.6f\n",
3420 	       kloop,T_KGrids1[kloop],T_KGrids2[kloop],T_KGrids3[kloop]);
3421 	for (i1=1; i1<=n; i1++){
3422 	  printf("  Eigenvalues of OLP  %2d  %15.12f\n",i1,ko[0][i1]);
3423 	}
3424       }
3425 
3426       /* minus eigenvalues to 1.0e-14 */
3427 
3428       for (l=1; l<=n; l++){
3429 	if (ko[0][l]<0.0) ko[0][l] = 1.0e-14;
3430 	koS[l] = ko[0][l];
3431       }
3432 
3433       /* calculate S*1/sqrt(ko) */
3434 
3435       for (l=1; l<=n; l++) M1[l] = 1.0/sqrt(ko[0][l]);
3436 
3437       /* S * M1  */
3438 
3439       for (i1=1; i1<=n; i1++){
3440 	for (j1=1; j1<=n; j1++){
3441 	  S[i1][j1].r = S[i1][j1].r*M1[j1];
3442 	  S[i1][j1].i = S[i1][j1].i*M1[j1];
3443 	}
3444       }
3445 
3446     } /* if (0<=kloop) */
3447 
3448     /* set H */
3449 
3450     for (spin=0; spin<=SpinP_switch; spin++){
3451 
3452       for (ID=0; ID<numprocs; ID++){
3453 
3454         kloop = arpo[ID];
3455 
3456         if (0<=kloop){
3457           k1 = T_KGrids1[kloop];
3458           k2 = T_KGrids2[kloop];
3459           k3 = T_KGrids3[kloop];
3460 
3461           Hamiltonian_Band(ID, nh[spin], TmpM, MP, k1, k2, k3);
3462 
3463           if (myid==ID){
3464 	    for (i1=1; i1<=n; i1++){
3465 	      for (j1=1; j1<=n; j1++){
3466 	        H[i1][j1].r = TmpM[i1][j1].r;
3467 	        H[i1][j1].i = TmpM[i1][j1].i;
3468 	      }
3469 	    }
3470           }
3471 	}
3472       }
3473 
3474       kloop = arpo[myid];
3475 
3476       if (0<=kloop){
3477 
3478         /****************************************************
3479  	                 M1 * U^t * H * U * M1
3480         ****************************************************/
3481 
3482         /* transpose S */
3483 
3484 	for (i1=1; i1<=n; i1++){
3485 	  for (j1=i1+1; j1<=n; j1++){
3486 	    Ctmp1 = S[i1][j1];
3487 	    Ctmp2 = S[j1][i1];
3488 	    S[i1][j1] = Ctmp2;
3489 	    S[j1][i1] = Ctmp1;
3490 	  }
3491 	}
3492 
3493         /* H * U * M1 */
3494 
3495 #pragma omp parallel shared(C,S,H,n) private(OMPID,Nthrds,Nprocs,i1,j1,sum,sumi,l)
3496 	{
3497 
3498 	  /* get info. on OpenMP */
3499 
3500 	  OMPID = omp_get_thread_num();
3501 	  Nthrds = omp_get_num_threads();
3502 	  Nprocs = omp_get_num_procs();
3503 
3504 	  for (i1=1+OMPID; i1<=n; i1+=Nthrds){
3505 
3506 	    for (j1=1; j1<=n; j1++){
3507 
3508 	      sum  = 0.0;
3509 	      sumi = 0.0;
3510 
3511 	      for (l=1; l<=n; l++){
3512 		sum  += H[i1][l].r*S[j1][l].r - H[i1][l].i*S[j1][l].i;
3513 		sumi += H[i1][l].r*S[j1][l].i + H[i1][l].i*S[j1][l].r;
3514 	      }
3515 
3516 	      C[j1][i1].r = sum;
3517 	      C[j1][i1].i = sumi;
3518 	    }
3519 	  }
3520 	} /* #pragma omp parallel */
3521 
3522         /* M1 * U^+ H * U * M1 */
3523 
3524 #pragma omp parallel shared(C,S,H,n) private(OMPID,Nthrds,Nprocs,i1,j1,sum,sumi,l)
3525 	{
3526 
3527 	  /* get info. on OpenMP */
3528 
3529 	  OMPID = omp_get_thread_num();
3530 	  Nthrds = omp_get_num_threads();
3531 	  Nprocs = omp_get_num_procs();
3532 
3533 	  for (i1=1+OMPID; i1<=n; i1+=Nthrds){
3534 	    for (j1=1; j1<=n; j1++){
3535 
3536 	      sum  = 0.0;
3537 	      sumi = 0.0;
3538 
3539 	      for (l=1; l<=n; l++){
3540 		sum  +=  S[i1][l].r*C[j1][l].r + S[i1][l].i*C[j1][l].i;
3541 		sumi +=  S[i1][l].r*C[j1][l].i - S[i1][l].i*C[j1][l].r;
3542 	      }
3543 	      H[i1][j1].r = sum;
3544 	      H[i1][j1].i = sumi;
3545 	    }
3546 	  }
3547 	} /* #pragma omp parallel */
3548 
3549         /* H to C */
3550 
3551 	for (i1=1; i1<=n; i1++){
3552 	  for (j1=1; j1<=n; j1++){
3553 	    C[i1][j1] = H[i1][j1];
3554 	  }
3555 	}
3556 
3557 	/* penalty for ill-conditioning states */
3558 
3559 	EV_cut0 = Threshold_OLP_Eigen;
3560 
3561 	for (i1=1; i1<=n; i1++){
3562 
3563 	  if (koS[i1]<EV_cut0){
3564 	    C[i1][i1].r += pow((koS[i1]/EV_cut0),-2.0) - 1.0;
3565 	  }
3566 
3567 	  /* cutoff the interaction between the ill-conditioned state */
3568 
3569 	  if (1.0e+3<C[i1][i1].r){
3570 	    for (j1=1; j1<=n; j1++){
3571 	      C[i1][j1] = Complex(0.0,0.0);
3572 	      C[j1][i1] = Complex(0.0,0.0);
3573 	    }
3574 	    C[i1][i1].r = 1.0e+4;
3575 	  }
3576 	}
3577 
3578         /* solve eigenvalue problem */
3579 
3580 	n1 = n;
3581         EigenBand_lapack(C,ko[spin],n1,n1,1);
3582 
3583 	for (i1=1; i1<=n; i1++) EIGEN[spin][kloop][i1] = ko[spin][i1];
3584 
3585         /****************************************************
3586 	     transformation to the original eigen vectors.
3587 	     NOTE JRCAT-244p and JAIST-2122p
3588 	     C = U * lambda^{-1/2} * D
3589         ****************************************************/
3590 
3591 	/* transpose */
3592 
3593 	for (i1=1; i1<=n; i1++){
3594 	  for (j1=i1+1; j1<=n; j1++){
3595 	    Ctmp1 = S[i1][j1];
3596 	    Ctmp2 = S[j1][i1];
3597 	    S[i1][j1] = Ctmp2;
3598 	    S[j1][i1] = Ctmp1;
3599 	  }
3600 	}
3601 
3602 	/* transpose */
3603 
3604 	for (i1=1; i1<=n; i1++){
3605 	  for (j1=i1+1; j1<=n; j1++){
3606 	    Ctmp1 = C[i1][j1];
3607 	    Ctmp2 = C[j1][i1];
3608 	    C[i1][j1] = Ctmp2;
3609 	    C[j1][i1] = Ctmp1;
3610 	  }
3611 	}
3612 
3613         /* shift */
3614 
3615 	for (j1=1; j1<=n; j1++){
3616 	  for (l=n; 1<=l; l--){
3617    	    C[j1][l] = C[j1][l];
3618 	  }
3619 	}
3620 
3621 #pragma omp parallel shared(C,S,H2,n) private(OMPID,Nthrds,Nprocs,i1,j1,sum,sumi,l)
3622 	{
3623 
3624 	  /* get info. on OpenMP */
3625 
3626 	  OMPID = omp_get_thread_num();
3627 	  Nthrds = omp_get_num_threads();
3628 	  Nprocs = omp_get_num_procs();
3629 
3630 	  for (i1=1+OMPID; i1<=n; i1+=Nthrds){
3631 	    for (j1=1; j1<=n1; j1++){
3632 
3633 	      sum  = 0.0;
3634 	      sumi = 0.0;
3635 
3636 	      for (l=1; l<=n; l++){
3637 		sum  +=  S[i1][l].r*C[j1][l].r - S[i1][l].i*C[j1][l].i;
3638 		sumi +=  S[i1][l].r*C[j1][l].i + S[i1][l].i*C[j1][l].r;
3639 	      }
3640 
3641 	      H2[spin][j1][i1].r = sum;
3642 	      H2[spin][j1][i1].i = sumi;
3643 
3644 	    }
3645 	  }
3646 	} /* #pragma omp parallel */
3647 
3648       } /* if (0<=kloop) */
3649     } /* spin */
3650 
3651     /****************************************************
3652         store LDOS
3653     ****************************************************/
3654 
3655     kloop = arpo[myid];
3656 
3657     if (0<=kloop){
3658 
3659       for (spin=0; spin<=SpinP_switch; spin++){
3660 
3661 	k1 = T_KGrids1[kloop];
3662 	k2 = T_KGrids2[kloop];
3663 	k3 = T_KGrids3[kloop];
3664 
3665 	i = Ti_KGrids1[kloop];
3666 	j = Tj_KGrids2[kloop];
3667 	k = Tk_KGrids3[kloop];
3668 
3669 	for (l=iemin; l<=iemax; l++){
3670 
3671 	  /* initialize */
3672 
3673 	  for (i1=0; i1<(atomnum+1)*List_YOUSO[7]; i1++){
3674 	    SD[i1] = 0.0;
3675 	  }
3676 
3677 	  /* calculate SD */
3678 
3679 #pragma omp parallel shared(TmpS,SD,spin,l,H2,MP,Spe_Total_CNO,WhatSpecies,atomnum) private(OMPID,Nthrds,Nprocs,GA_AN,wanA,tnoA,Anum,GB_AN,wanB,tnoB,Bnum,i1,j1,u2,v2,uv,vu,Redum,Imdum)
3680 	  {
3681 
3682 	    /* get info. on OpenMP */
3683 
3684 	    OMPID = omp_get_thread_num();
3685 	    Nthrds = omp_get_num_threads();
3686 	    Nprocs = omp_get_num_procs();
3687 
3688 	    for (GA_AN=1+OMPID; GA_AN<=atomnum; GA_AN+=Nthrds){
3689 
3690 	      wanA = WhatSpecies[GA_AN];
3691 	      tnoA = Spe_Total_CNO[wanA];
3692 	      Anum = MP[GA_AN];
3693 
3694 	      for (GB_AN=1; GB_AN<=atomnum; GB_AN++){
3695 
3696 		wanB = WhatSpecies[GB_AN];
3697 		tnoB = Spe_Total_CNO[wanB];
3698 		Bnum = MP[GB_AN];
3699 
3700 		for (i1=0; i1<tnoA; i1++){
3701 		  for (j1=0; j1<tnoB; j1++){
3702 
3703 		    u2 = H2[spin][l][Anum+i1].r*H2[spin][l][Bnum+j1].r;
3704 		    v2 = H2[spin][l][Anum+i1].i*H2[spin][l][Bnum+j1].i;
3705 		    uv = H2[spin][l][Anum+i1].r*H2[spin][l][Bnum+j1].i;
3706 		    vu = H2[spin][l][Anum+i1].i*H2[spin][l][Bnum+j1].r;
3707 
3708 		    Redum = (u2 + v2);
3709 		    Imdum = (uv - vu);
3710 
3711 		    SD[Anum+i1] += (float)(Redum*TmpS[Anum+i1][Bnum+j1].r - Imdum*TmpS[Anum+i1][Bnum+j1].i);
3712 
3713 		  } /* j1 */
3714 		} /* i1 */
3715 	      } /* GB_AN */
3716 	    } /* GA_AN */
3717 
3718 	  } /* #pragma omp parallel */
3719 
3720 	  /*********************************************
3721                    writing a binary file
3722 	  *********************************************/
3723 
3724 	  i_vec[0] = Ti_KGrids1[kloop];
3725 	  i_vec[1] = Tj_KGrids2[kloop];
3726 	  i_vec[2] = Tk_KGrids3[kloop];
3727 
3728 	  fwrite(i_vec,sizeof(int),3,fp_ev);
3729 	  fwrite(&SD[1],sizeof(float),n,fp_ev);
3730 
3731 	} /* l */
3732       } /* spin */
3733     } /* if (0<=kloop) */
3734 
3735     /*********************************************
3736         calculation of partial density matrix
3737     *********************************************/
3738 
3739     kloop = arpo[myid];
3740 
3741     if (0<=kloop && cal_partial_charge) {
3742 
3743       for (spin=0; spin<=SpinP_switch; spin++){
3744 
3745 	k1 = T_KGrids1[kloop];
3746 	k2 = T_KGrids2[kloop];
3747 	k3 = T_KGrids3[kloop];
3748 
3749 	for (l=iemin; l<=iemax; l++){
3750 
3751 	  x1 = (ko[spin][l] - ChemP)*Beta;
3752 	  if (x1<=-max_x) x1 = -max_x;
3753 	  if (max_x<=x1)  x1 = max_x;
3754 	  FermiF1 = 1.0/(1.0 + exp(x1));
3755 
3756 	  x2 = (ko[spin][l] - (ChemP+ene_win_partial_charge))*Beta;
3757 	  if (x2<=-max_x) x2 = -max_x;
3758 	  if (max_x<=x2)  x2 = max_x;
3759 	  FermiF2 = 1.0/(1.0 + exp(x2));
3760 
3761 	  diffF = fabs(FermiF1-FermiF2);
3762 
3763 	  for (GA_AN=1; GA_AN<=atomnum; GA_AN++){
3764 
3765 	    wanA = WhatSpecies[GA_AN];
3766 	    tnoA = Spe_Total_CNO[wanA];
3767 	    Anum = MP[GA_AN];
3768 
3769 	    for (LB_AN=0; LB_AN<=FNAN[GA_AN]; LB_AN++){
3770 	      GB_AN = natn[GA_AN][LB_AN];
3771               Rn = ncn[GA_AN][LB_AN];
3772 	      wanB = WhatSpecies[GB_AN];
3773 	      tnoB = Spe_Total_CNO[wanB];
3774 	      Bnum = MP[GB_AN];
3775 
3776 	      l1 = atv_ijk[Rn][1];
3777 	      l2 = atv_ijk[Rn][2];
3778 	      l3 = atv_ijk[Rn][3];
3779 	      kRn = k1*(double)l1 + k2*(double)l2 + k3*(double)l3;
3780 
3781 	      si = sin(2.0*PI*kRn);
3782 	      co = cos(2.0*PI*kRn);
3783 
3784 	      for (i=0; i<tnoA; i++){
3785 		for (j=0; j<tnoB; j++){
3786 
3787 		  u2 = H2[spin][l][Anum+i].r*H2[spin][l][Bnum+j].r;
3788 		  v2 = H2[spin][l][Anum+i].i*H2[spin][l][Bnum+j].i;
3789 		  uv = H2[spin][l][Anum+i].r*H2[spin][l][Bnum+j].i;
3790 		  vu = H2[spin][l][Anum+i].i*H2[spin][l][Bnum+j].r;
3791 		  tmp = co*(u2+v2) - si*(uv-vu);
3792                   PDM[spin][GA_AN][LB_AN][i][j] += diffF*tmp;
3793 		}
3794 	      }
3795 	    }
3796 	  }
3797 	}
3798       }
3799     }
3800 
3801     /****************************************************
3802        MPI: EIGEN
3803     ****************************************************/
3804 
3805     for (ID=0; ID<numprocs; ID++){
3806 
3807       kloop = arpo[ID];
3808 
3809       if (0<=kloop){
3810         for (spin=0; spin<=SpinP_switch; spin++){
3811           MPI_Bcast(&EIGEN[spin][kloop][0], n+1, MPI_DOUBLE, ID, mpi_comm_level1);
3812 	}
3813       }
3814     }
3815 
3816   } /* kloop0        */
3817 
3818   /****************************************************
3819      MPI: PDM
3820   ****************************************************/
3821 
3822   if (cal_partial_charge) {
3823 
3824     /* MPI: PDM */
3825 
3826     for (spin=0; spin<=SpinP_switch; spin++){
3827       for (GA_AN=1; GA_AN<=atomnum; GA_AN++){
3828 
3829 	wanA = WhatSpecies[GA_AN];
3830 	tnoA = Spe_Total_CNO[wanA];
3831 
3832 	for (LB_AN=0; LB_AN<=FNAN[GA_AN]; LB_AN++){
3833 
3834 	  GB_AN = natn[GA_AN][LB_AN];
3835 	  wanB = WhatSpecies[GB_AN];
3836 	  tnoB = Spe_Total_CNO[wanB];
3837 
3838 	  for (i=0; i<tnoA; i++){
3839 
3840 	    MPI_Allreduce(&PDM[spin][GA_AN][LB_AN][i][0],
3841 			  &TPDM[spin][GA_AN][LB_AN][i][0],
3842 			  tnoB, MPI_DOUBLE, MPI_SUM,
3843 			  mpi_comm_level1);
3844 
3845 	    for (j=0; j<tnoB; j++){
3846               TPDM[spin][GA_AN][LB_AN][i][j] /= (double)T_knum;
3847 	    }
3848 	  }
3849 	}
3850       }
3851     }
3852 
3853     /* store TPDM to Partial_DM */
3854 
3855     for (spin=0; spin<=SpinP_switch; spin++){
3856       for (GA_AN=1; GA_AN<=atomnum; GA_AN++){
3857 
3858         MA_AN = F_G2M[GA_AN];
3859 	wanA = WhatSpecies[GA_AN];
3860 	tnoA = Spe_Total_CNO[wanA];
3861 
3862         if (1<=MA_AN && MA_AN<=Matomnum){
3863 
3864 	  for (LB_AN=0; LB_AN<=FNAN[GA_AN]; LB_AN++){
3865 
3866 	    GB_AN = natn[GA_AN][LB_AN];
3867 	    wanB = WhatSpecies[GB_AN];
3868 	    tnoB = Spe_Total_CNO[wanB];
3869 
3870 	    for (i=0; i<tnoA; i++){
3871 	      for (j=0; j<tnoB; j++){
3872 		Partial_DM[spin][MA_AN][LB_AN][i][j] = TPDM[spin][GA_AN][LB_AN][i][j];
3873 	      }
3874 	    }
3875 	  }
3876 	}
3877       }
3878     }
3879   }
3880 
3881   /****************************************************
3882      write: EIGEN
3883   ****************************************************/
3884 
3885   if (myid==Host_ID){
3886     if (fp_eig) {
3887 
3888       for (kloop=0; kloop<T_knum; kloop++){
3889 	for (spin=0; spin<=SpinP_switch; spin++){
3890 
3891           i = Ti_KGrids1[kloop];
3892           j = Tj_KGrids2[kloop];
3893           k = Tk_KGrids3[kloop];
3894 
3895 	  fprintf(fp_eig,"%d %d %d ",i,j,k);
3896 	  for (ie=iemin;ie<=iemax;ie++) {
3897 	    fprintf(fp_eig,"%lf ",EIGEN[spin][kloop][ie]);
3898 	  }
3899 	  fprintf(fp_eig,"\n");
3900 
3901 	}
3902       }
3903 
3904       fprintf(fp_eig,"Eigenvalues>\n");
3905     }
3906   }
3907 
3908 Finishing:
3909 
3910   if (myid==Host_ID){
3911     if (fp_eig) {
3912       fclose(fp_eig);
3913     }
3914   }
3915 
3916   if (fp_ev) {
3917     fclose(fp_ev);
3918   }
3919 
3920   /****************************************************
3921      merge *.Dos.vec#
3922   ****************************************************/
3923 
3924   MPI_Barrier(mpi_comm_level1);
3925 
3926   if (myid==Host_ID){
3927 
3928     sprintf(file_ev0,"%s%s.Dos.vec",filepath,filename);
3929     if ( (fp_ev0=fopen(file_ev0,"w"))==NULL ) {
3930       printf("cannot open a file %s\n",file_ev0);
3931     }
3932 
3933     for (ID=0; ID<numprocs; ID++){
3934 
3935       sprintf(file_ev,"%s%s.Dos.vec%d",filepath,filename,ID);
3936 
3937       if ( 1<=num_allocated_k[ID] ){
3938 
3939 	if ( (fp_ev=fopen(file_ev,"r"))==NULL ) {
3940 	  printf("cannot open a file %s\n",file_ev);
3941 	}
3942 
3943         for (k=0; k<num_allocated_k[ID]; k++){
3944 	  for (spin=0; spin<=SpinP_switch; spin++){
3945    	    for (l=iemin; l<=iemax; l++){
3946 	      fread( i_vec,  sizeof(int),  3,fp_ev);
3947 	      fwrite(i_vec,  sizeof(int),  3,fp_ev0);
3948 	      fread( &SD[1], sizeof(float),n,fp_ev);
3949 	      fwrite(&SD[1], sizeof(float),n,fp_ev0);
3950 	    }
3951 	  }
3952 	}
3953 
3954 	fclose(fp_ev);
3955       }
3956     }
3957 
3958     fclose(fp_ev0);
3959   }
3960 
3961   MPI_Barrier(mpi_comm_level1);
3962 
3963   /* delete files */
3964   if (myid==Host_ID){
3965     for (ID=0; ID<numprocs; ID++){
3966       sprintf(file_ev,"%s%s.Dos.vec%d",filepath,filename,ID);
3967       remove(file_ev);
3968     }
3969   }
3970 
3971   /****************************************************
3972           Fermi surface viewable by XCrysDen
3973   ****************************************************/
3974 
3975   if (myid==Host_ID){
3976 
3977     int ii,jj,kk;
3978     int ***TD2OD;
3979     double x0,y0,z0;
3980     char file_fermi[YOUSO10];
3981     FILE *fp;
3982 
3983     /* allocation of TD2OD */
3984 
3985     TD2OD = (int***)malloc(sizeof(int**)*knum_i);
3986     for (i=0; i<knum_i; i++){
3987       TD2OD[i] = (int**)malloc(sizeof(int*)*knum_j);
3988       for (j=0; j<knum_j; j++){
3989         TD2OD[i][j] = (int*)malloc(sizeof(int)*knum_k);
3990 	for (k=0; k<knum_k; k++) TD2OD[i][j][k] = 0;
3991       }
3992     }
3993 
3994     kloop = 0;
3995     for (i=0; i<knum_i; i++){
3996       for (j=0; j<knum_j; j++){
3997 	for (k=0; k<knum_k; k++){
3998           TD2OD[i][j][k] = kloop;
3999           kloop++;
4000 	}
4001       }
4002     }
4003 
4004     /* make bxsf files */
4005 
4006     for (spin=0; spin<=SpinP_switch; spin++){
4007 
4008       sprintf(file_fermi,"%s%s.FermiSurf%d.bxsf",filepath,filename,spin);
4009 
4010       if ((fp=fopen(file_fermi,"w")) != NULL){
4011 
4012 	fprintf(fp,"BEGIN_INFO\n");
4013 	fprintf(fp,"Fermi Energy: %15.12f\n",ChemP);
4014 	fprintf(fp,"END_INFO\n\n");
4015 
4016 	fprintf(fp,"BEGIN_BLOCK_BANDGRID_3D\n");
4017 	fprintf(fp,"comment_line\n");
4018 	fprintf(fp,"BEGIN_BANDGRID_3D_simple_example\n");
4019 	fprintf(fp,"%d\n",n);
4020 	fprintf(fp,"%d %d %d\n",knum_i+1,knum_j+1,knum_k+1);
4021 
4022 	fprintf(fp,"0.0 0.0 0.0\n");
4023 	fprintf(fp,"%15.10f %15.10f %15.10f\n",rtv[1][1],rtv[1][2],rtv[1][3]);
4024 	fprintf(fp,"%15.10f %15.10f %15.10f\n",rtv[2][1],rtv[2][2],rtv[2][3]);
4025 	fprintf(fp,"%15.10f %15.10f %15.10f\n",rtv[3][1],rtv[3][2],rtv[3][3]);
4026 
4027 	for (i1=1; i1<=n; i1++){
4028 
4029 	  fprintf(fp,"BAND:  %d\n",i1);
4030 
4031 	  for (i=0; i<=knum_i; i++){
4032             ii = i%knum_i;
4033 	    for (j=0; j<=knum_j; j++){
4034               jj = j%knum_j;
4035 	      for (k=0; k<=knum_k; k++){
4036                 kk = k%knum_k;
4037 		kloop = TD2OD[ii][jj][kk];
4038 		fprintf(fp,"%10.5f ",EIGEN[spin][kloop][i1]);
4039 	      }
4040 	      fprintf(fp,"\n");
4041 	    }
4042 
4043             if (i!=knum_i)  fprintf(fp,"\n");
4044 
4045 	  }
4046 	}
4047 
4048 	fprintf(fp,"END_BANDGRID_3D\n");
4049 	fprintf(fp,"END_BLOCK_BANDGRID_3D\n");
4050 
4051 	fclose(fp);
4052       }
4053       else{
4054         printf("Failure in saving bxsf files.\n");
4055       }
4056 
4057     } /* spin */
4058 
4059     /* free TD2OD */
4060     for (i=0; i<knum_i; i++){
4061       for (j=0; j<knum_j; j++){
4062         free(TD2OD[i][j]);
4063       }
4064       free(TD2OD[i]);
4065     }
4066     free(TD2OD);
4067 
4068   } /* if (myid==Host_ID) */
4069 
4070   /****************************************************
4071                        free arrays
4072   ****************************************************/
4073 
4074   free(MP);
4075   free(arpo);
4076   free(num_allocated_k);
4077 
4078   for (i=0; i<i_ko[0]; i++){
4079     free(ko[i]);
4080   }
4081   free(ko);
4082 
4083   free(koS);
4084 
4085   for (i=0; i<i_H[0]; i++){
4086     free(H[i]);
4087   }
4088   free(H);
4089 
4090   for (i=0; i<i_S[0]; i++){
4091     free(S[i]);
4092   }
4093   free(S);
4094 
4095   free(M1);
4096 
4097   for (i=0; i<i_C[0]; i++){
4098     free(C[i]);
4099   }
4100   free(C);
4101 
4102   free(KGrids1); free(KGrids2);free(KGrids3);
4103 
4104   free(SD);
4105 
4106   for (j=0; j<n+1; j++){
4107     free(TmpM[j]);
4108   }
4109   free(TmpM);
4110 
4111   for (j=0; j<n+1; j++){
4112     free(TmpS[j]);
4113   }
4114   free(TmpS);
4115 
4116   for (i=0; i<List_YOUSO[23]; i++){
4117     for (j=0; j<n+1; j++){
4118       free(H2[i][j]);
4119     }
4120     free(H2[i]);
4121   }
4122   free(H2);
4123 
4124   /* partial density matrix */
4125 
4126   if (cal_partial_charge){
4127 
4128     /* PDM */
4129 
4130     for (k=0; k<=1; k++){
4131 
4132       FNAN[0] = 0;
4133 
4134       for (Gc_AN=0; Gc_AN<=atomnum; Gc_AN++){
4135 
4136 	if (Gc_AN==0){
4137 	  tno0 = 1;
4138 	}
4139 	else{
4140 	  Cwan = WhatSpecies[Gc_AN];
4141 	  tno0 = Spe_Total_NO[Cwan];
4142 	}
4143 
4144 	for (h_AN=0; h_AN<=FNAN[Gc_AN]; h_AN++){
4145 
4146 	  if (Gc_AN==0){
4147 	    tno1 = 1;
4148 	  }
4149 	  else{
4150 	    Gh_AN = natn[Gc_AN][h_AN];
4151 	    Hwan = WhatSpecies[Gh_AN];
4152 	    tno1 = Spe_Total_NO[Hwan];
4153 	  }
4154 
4155 	  for (i=0; i<tno0; i++){
4156 	    free(PDM[k][Gc_AN][h_AN][i]);
4157 	  }
4158           free(PDM[k][Gc_AN][h_AN]);
4159 	}
4160         free(PDM[k][Gc_AN]);
4161       }
4162       free(PDM[k]);
4163     }
4164     free(PDM);
4165 
4166     /* TPDM */
4167 
4168     for (k=0; k<=1; k++){
4169 
4170       FNAN[0] = 0;
4171 
4172       for (Gc_AN=0; Gc_AN<=atomnum; Gc_AN++){
4173 
4174 	if (Gc_AN==0){
4175 	  tno0 = 1;
4176 	}
4177 	else{
4178 	  Cwan = WhatSpecies[Gc_AN];
4179 	  tno0 = Spe_Total_NO[Cwan];
4180 	}
4181 
4182 	for (h_AN=0; h_AN<=FNAN[Gc_AN]; h_AN++){
4183 
4184 	  if (Gc_AN==0){
4185 	    tno1 = 1;
4186 	  }
4187 	  else{
4188 	    Gh_AN = natn[Gc_AN][h_AN];
4189 	    Hwan = WhatSpecies[Gh_AN];
4190 	    tno1 = Spe_Total_NO[Hwan];
4191 	  }
4192 
4193 	  for (i=0; i<tno0; i++){
4194 	    free(TPDM[k][Gc_AN][h_AN][i]);
4195 	  }
4196           free(TPDM[k][Gc_AN][h_AN]);
4197 	}
4198         free(TPDM[k][Gc_AN]);
4199       }
4200       free(TPDM[k]);
4201     }
4202     free(TPDM);
4203   }
4204 
4205   /* no spin-orbit coupling */
4206   if (SO_switch==0){
4207     free(Dummy_ImNL[0][0][0]);
4208     free(Dummy_ImNL[0][0]);
4209     free(Dummy_ImNL[0]);
4210     free(Dummy_ImNL);
4211   }
4212 
4213   free(T_KGrids1);
4214   free(T_KGrids2);
4215   free(T_KGrids3);
4216 
4217   free(Ti_KGrids1);
4218   free(Tj_KGrids2);
4219   free(Tk_KGrids3);
4220 
4221   for (i=0; i<List_YOUSO[23]; i++){
4222     for (j=0; j<T_knum; j++){
4223       free(EIGEN[i][j]);
4224     }
4225     free(EIGEN[i]);
4226   }
4227   free(EIGEN);
4228 
4229   /* for elapsed time */
4230 
4231   dtime(&TEtime);
4232   time0 = TEtime - TStime;
4233   return time0;
4234 
4235 }
4236 
4237 
4238 
4239 
4240 
4241 
4242 
Band_DFT_Dosout_NonCol(int knum_i,int knum_j,int knum_k,int SpinP_switch,double ***** nh,double ***** ImNL,double **** CntOLP)4243 static double Band_DFT_Dosout_NonCol(
4244                            int knum_i, int knum_j, int knum_k,
4245                            int SpinP_switch,
4246                            double *****nh,
4247                            double *****ImNL,
4248                            double ****CntOLP)
4249 {
4250   int i,j,k,spin,l,i1,j1,n1;
4251   int n, wanA,ii1,jj1,m,n2;
4252   int *MP;
4253   int MA_AN, GA_AN, tnoA,Anum, LB_AN;
4254   int GB_AN, wanB, tnoB, Bnum, RnB;
4255   int l1,l2,l3;
4256 
4257   int ie,iemin,iemax,iemin0,iemax0,n1min,mn;
4258   int MaxL,e1,s1,T_knum,S_knum,E_knum;
4259   int kloop,kloop0,num_kloop0;
4260   int i_vec[10];
4261   int h_AN,tno0,tno1,Gh_AN,Hwan,Gc_AN,Cwan,Rn;
4262 
4263   double *****PDM;
4264   double *****TPDM;
4265   double EV_cut0;
4266   double FermiF1,FermiF2,x1,x2,diffF,max_x=50.0;
4267   double sum_r0,sum_i0,sum_r1,sum_i1;
4268   double sum,sumi,u2,v2,uv,vu,tmp;
4269   double kRn,si,co,Redum,Imdum,Redum2,Resum,Imdum2;
4270   double theta,phi,sit,cot,sip,cop,tmp1,tmp2,tmp3;
4271   double TStime,TEtime,time0;
4272   double OLP_eigen_cut=Threshold_OLP_Eigen;
4273 
4274   double *ko; int N_ko, i_ko[10];
4275   double *koS;
4276   double d0,d1,d2,d3;
4277   dcomplex **H;  int N_H,  i_H[10];
4278   dcomplex **S;  int N_S,  i_S[10];
4279   dcomplex **TmpS;
4280   dcomplex Ctmp1,Ctmp2;
4281   double *M1,**EIGEN;
4282   dcomplex **C;  int N_C,  i_C[10];
4283   double *KGrids1, *KGrids2, *KGrids3;
4284   float *SD; int N_SD, i_SD[10];
4285   dcomplex **TmpM;
4286   double *T_KGrids1,*T_KGrids2,*T_KGrids3;
4287   int *Ti_KGrids1,*Tj_KGrids2,*Tk_KGrids3,*arpo;
4288   int *num_allocated_k;
4289 
4290   double *****Dummy_ImNL;
4291 
4292   double snum_i, snum_j, snum_k;
4293   double k1,k2,k3;
4294 
4295   char file_ev[YOUSO10],file_ev0[YOUSO10],file_eig[YOUSO10];
4296   FILE *fp_ev,*fp_ev0,*fp_eig;
4297 
4298   char buf1[fp_bsize];          /* setvbuf */
4299   char buf2[fp_bsize];          /* setvbuf */
4300   int numprocs,myid,ID,ID1,ID2,tag;
4301 
4302   /* for OpenMP */
4303   int OMPID,Nthrds,Nprocs;
4304 
4305   MPI_Status stat;
4306   MPI_Request request;
4307 
4308   /* MPI */
4309   MPI_Comm_size(mpi_comm_level1,&numprocs);
4310   MPI_Comm_rank(mpi_comm_level1,&myid);
4311   MPI_Barrier(mpi_comm_level1);
4312 
4313   if (myid==Host_ID && 0<level_stdout){
4314     printf("\n<Band_DFT_Dosout>\n"); fflush(stdout);
4315   }
4316 
4317   dtime(&TStime);
4318 
4319   /****************************************************
4320              calculation of the array size
4321   ****************************************************/
4322 
4323   n = 0;
4324   for (i=1; i<=atomnum; i++){
4325     wanA  = WhatSpecies[i];
4326     n  = n + Spe_Total_CNO[wanA];
4327   }
4328   n2 = 2*n + 2;
4329 
4330   /****************************************************
4331    Allocation of arrays
4332   ****************************************************/
4333 
4334   MP = (int*)malloc(sizeof(int)*List_YOUSO[1]);
4335 
4336   arpo = (int*)malloc(sizeof(int)*numprocs);
4337   num_allocated_k = (int*)malloc(sizeof(int)*numprocs);
4338 
4339   ko = (double*)malloc(sizeof(double)*n2);
4340   koS = (double*)malloc(sizeof(double)*(n+1));
4341 
4342   H = (dcomplex**)malloc(sizeof(dcomplex*)*n2);
4343   for (j=0; j<n2; j++){
4344     H[j] = (dcomplex*)malloc(sizeof(dcomplex)*n2);
4345   }
4346 
4347   S = (dcomplex**)malloc(sizeof(dcomplex*)*(n+1));
4348   for (i=0; i<(n+1); i++){
4349     S[i] = (dcomplex*)malloc(sizeof(dcomplex)*(n+1));
4350   }
4351 
4352   TmpS = (dcomplex**)malloc(sizeof(dcomplex*)*(n+1));
4353   for (i=0; i<(n+1); i++){
4354     TmpS[i] = (dcomplex*)malloc(sizeof(dcomplex)*(n+1));
4355   }
4356 
4357   M1 = (double*)malloc(sizeof(double)*n2);
4358 
4359   C = (dcomplex**)malloc(sizeof(dcomplex*)*n2);
4360   for (j=0; j<n2; j++){
4361     C[j] = (dcomplex*)malloc(sizeof(dcomplex)*n2);
4362   }
4363 
4364   KGrids1 = (double*)malloc(sizeof(double)*knum_i);
4365   KGrids2 = (double*)malloc(sizeof(double)*knum_j);
4366   KGrids3 = (double*)malloc(sizeof(double)*knum_k);
4367 
4368   SD = (float*)malloc(sizeof(float)*(atomnum+1)*List_YOUSO[7]*2);
4369 
4370   TmpM = (dcomplex**)malloc(sizeof(dcomplex*)*n2);
4371   for (j=0; j<n2; j++){
4372     TmpM[j] = (dcomplex*)malloc(sizeof(dcomplex)*n2);
4373   }
4374 
4375   /* non-spin-orbit coupling and non-LDA+U */
4376   if (SO_switch==0 && Hub_U_switch==0 && Constraint_NCS_switch==0
4377        && Zeeman_NCS_switch==0 && Zeeman_NCO_switch==0){
4378     Dummy_ImNL = (double*****)malloc(sizeof(double****)*1);
4379     Dummy_ImNL[0] = (double****)malloc(sizeof(double***)*1);
4380     Dummy_ImNL[0][0] = (double***)malloc(sizeof(double**)*1);
4381     Dummy_ImNL[0][0][0] = (double**)malloc(sizeof(double*)*1);
4382     Dummy_ImNL[0][0][0][0] = (double*)malloc(sizeof(double)*1);
4383   }
4384 
4385   /* for calculation partial density matrix */
4386 
4387   if (cal_partial_charge){
4388 
4389     PDM = (double*****)malloc(sizeof(double****)*2);
4390     for (k=0; k<=1; k++){
4391 
4392       PDM[k] = (double****)malloc(sizeof(double***)*(atomnum+1));
4393       FNAN[0] = 0;
4394 
4395       for (Gc_AN=0; Gc_AN<=atomnum; Gc_AN++){
4396 
4397 	if (Gc_AN==0){
4398 	  tno0 = 1;
4399 	}
4400 	else{
4401 	  Cwan = WhatSpecies[Gc_AN];
4402 	  tno0 = Spe_Total_NO[Cwan];
4403 	}
4404 
4405 	PDM[k][Gc_AN] = (double***)malloc(sizeof(double**)*(FNAN[Gc_AN]+1));
4406 	for (h_AN=0; h_AN<=FNAN[Gc_AN]; h_AN++){
4407 
4408 	  if (Gc_AN==0){
4409 	    tno1 = 1;
4410 	  }
4411 	  else{
4412 	    Gh_AN = natn[Gc_AN][h_AN];
4413 	    Hwan = WhatSpecies[Gh_AN];
4414 	    tno1 = Spe_Total_NO[Hwan];
4415 	  }
4416 
4417 	  PDM[k][Gc_AN][h_AN] = (double**)malloc(sizeof(double*)*tno0);
4418 	  for (i=0; i<tno0; i++){
4419 	    PDM[k][Gc_AN][h_AN][i] = (double*)malloc(sizeof(double)*tno1);
4420   	    for (j=0; j<tno1; j++) PDM[k][Gc_AN][h_AN][i][j] = 0.0;
4421 	  }
4422 	}
4423       }
4424     }
4425 
4426     TPDM = (double*****)malloc(sizeof(double****)*2);
4427     for (k=0; k<=1; k++){
4428 
4429       TPDM[k] = (double****)malloc(sizeof(double***)*(atomnum+1));
4430       FNAN[0] = 0;
4431 
4432       for (Gc_AN=0; Gc_AN<=atomnum; Gc_AN++){
4433 
4434 	if (Gc_AN==0){
4435 	  tno0 = 1;
4436 	}
4437 	else{
4438 	  Cwan = WhatSpecies[Gc_AN];
4439 	  tno0 = Spe_Total_NO[Cwan];
4440 	}
4441 
4442 	TPDM[k][Gc_AN] = (double***)malloc(sizeof(double**)*(FNAN[Gc_AN]+1));
4443 	for (h_AN=0; h_AN<=FNAN[Gc_AN]; h_AN++){
4444 
4445 	  if (Gc_AN==0){
4446 	    tno1 = 1;
4447 	  }
4448 	  else{
4449 	    Gh_AN = natn[Gc_AN][h_AN];
4450 	    Hwan = WhatSpecies[Gh_AN];
4451 	    tno1 = Spe_Total_NO[Hwan];
4452 	  }
4453 
4454 	  TPDM[k][Gc_AN][h_AN] = (double**)malloc(sizeof(double*)*tno0);
4455 	  for (i=0; i<tno0; i++){
4456 	    TPDM[k][Gc_AN][h_AN][i] = (double*)malloc(sizeof(double)*tno1);
4457   	    for (j=0; j<tno1; j++) TPDM[k][Gc_AN][h_AN][i][j] = 0.0;
4458 	  }
4459 	}
4460       }
4461     }
4462   }
4463 
4464   /****************************************************
4465     set up k-grids
4466   ****************************************************/
4467 
4468   snum_i = knum_i;
4469   snum_j = knum_j;
4470   snum_k = knum_k;
4471 
4472   for (i=0; i<knum_i; i++){
4473     k1 = (double)i/snum_i + Shift_K_Point;
4474     KGrids1[i]=k1;
4475   }
4476   for (i=0; i<knum_j; i++){
4477     k1 = (double)i/snum_j - Shift_K_Point;
4478     KGrids2[i]=k1;
4479   }
4480   for (i=0; i<knum_k; i++){
4481     k1 = (double)i/snum_k + 2.0*Shift_K_Point;
4482     KGrids3[i]=k1;
4483   }
4484 
4485   if (myid==Host_ID && 0<level_stdout){
4486     printf(" KGrids1: ");
4487     for (i=0;i<=knum_i-1;i++) printf("%f ",KGrids1[i]);
4488     printf("\n");
4489     printf(" KGrids2: ");
4490     for (i=0;i<=knum_j-1;i++) printf("%f ",KGrids2[i]);
4491     printf("\n");
4492     printf(" KGrids3: ");
4493     for (i=0;i<=knum_k-1;i++) printf("%f ",KGrids3[i]);
4494     printf("\n");
4495   }
4496 
4497   /***********************************
4498        one-dimentionalize for MPI
4499   ************************************/
4500 
4501   T_knum = 0;
4502   for (i=0; i<=(knum_i-1); i++){
4503     for (j=0; j<=(knum_j-1); j++){
4504       for (k=0; k<=(knum_k-1); k++){
4505         T_knum++;
4506       }
4507     }
4508   }
4509 
4510   T_KGrids1 = (double*)malloc(sizeof(double)*T_knum);
4511   T_KGrids2 = (double*)malloc(sizeof(double)*T_knum);
4512   T_KGrids3 = (double*)malloc(sizeof(double)*T_knum);
4513 
4514   Ti_KGrids1 = (int*)malloc(sizeof(int)*T_knum);
4515   Tj_KGrids2 = (int*)malloc(sizeof(int)*T_knum);
4516   Tk_KGrids3 = (int*)malloc(sizeof(int)*T_knum);
4517 
4518   EIGEN = (double**)malloc(sizeof(double*)*T_knum);
4519   for (j=0; j<T_knum; j++){
4520     EIGEN[j] = (double*)malloc(sizeof(double)*n2);
4521   }
4522 
4523   /* set T_KGrid1,2,3 */
4524 
4525   T_knum = 0;
4526   for (i=0; i<=(knum_i-1); i++){
4527     for (j=0; j<=(knum_j-1); j++){
4528       for (k=0; k<=(knum_k-1); k++){
4529 
4530 	T_KGrids1[T_knum] = KGrids1[i];
4531 	T_KGrids2[T_knum] = KGrids2[j];
4532 	T_KGrids3[T_knum] = KGrids3[k];
4533 
4534 	Ti_KGrids1[T_knum] = i;
4535 	Tj_KGrids2[T_knum] = j;
4536 	Tk_KGrids3[T_knum] = k;
4537 
4538         T_knum++;
4539       }
4540     }
4541   }
4542 
4543   /* allocate k-points into proccessors */
4544 
4545   if (T_knum<=myid){
4546     S_knum = -10;
4547     E_knum = -100;
4548     num_kloop0 = 1;
4549     num_allocated_k[myid] = 0;
4550   }
4551   else if (T_knum<numprocs) {
4552     S_knum = myid;
4553     E_knum = myid;
4554     num_kloop0 = 1;
4555     num_allocated_k[myid] = 1;
4556   }
4557   else {
4558     tmp = (double)T_knum/(double)numprocs;
4559     num_kloop0 = (int)tmp + 1;
4560 
4561     S_knum = (int)((double)myid*(tmp+0.0001));
4562     E_knum = (int)((double)(myid+1)*(tmp+0.0001)) - 1;
4563     if (myid==(numprocs-1)) E_knum = T_knum - 1;
4564     if (E_knum<0)           E_knum = 0;
4565     num_allocated_k[myid] = E_knum - S_knum + 1;
4566   }
4567 
4568   for (ID=0; ID<numprocs; ID++){
4569     MPI_Bcast(&num_allocated_k[ID], 1, MPI_INT, ID, mpi_comm_level1);
4570   }
4571 
4572   /****************************************************************
4573                       find iemin and iemax
4574   *****************************************************************/
4575 
4576   iemin=n2; iemax=1; n1min=1;
4577 
4578   k1 = 0.0;
4579   k2 = 0.0;
4580   k3 = 0.0;
4581 
4582   Overlap_Band(Host_ID,CntOLP,S,MP,k1,k2,k3);
4583 
4584   if (myid==Host_ID){
4585 
4586     n = S[0][0].r;
4587     EigenBand_lapack(S,ko,n,n,1);
4588 
4589     if (2<=level_stdout){
4590       printf("  k1 k2 k3 %10.6f %10.6f %10.6f\n",k1,k2,k3);
4591       for (i1=1; i1<=n; i1++){
4592 	printf("  Eigenvalues of OLP  %2d  %15.12f\n",i1,ko[i1]);
4593       }
4594     }
4595 
4596     /* minus eigenvalues to 1.0e-14 */
4597     for (l=1; l<=n; l++){
4598       if (ko[l]<0.0){
4599 	ko[l] = 1.0e-14;
4600 
4601 	if (2<=level_stdout){
4602 	  printf("found an eigenvalue smaller than %10.8f of OLP kloop=%2d\n",
4603 		 Threshold_OLP_Eigen,kloop);
4604 	}
4605       }
4606 
4607       koS[l] = ko[l];
4608     }
4609 
4610     /* calculate S*1/sqrt(ko) */
4611 
4612     for (l=1; l<=n; l++) M1[l] = 1.0/sqrt(ko[l]);
4613 
4614     /* S * M1  */
4615 
4616     for (i1=1; i1<=n; i1++){
4617       for (j1=1; j1<=n; j1++){
4618 	S[i1][j1].r = S[i1][j1].r*M1[j1];
4619 	S[i1][j1].i = S[i1][j1].i*M1[j1];
4620       }
4621     }
4622 
4623   } /* if (myid==Host_ID) */
4624 
4625   /****************************************************
4626              make a full Hamiltonian matrix
4627   ****************************************************/
4628 
4629   /* non-spin-orbit coupling and non-LDA+U */
4630   if (SO_switch==0 && Hub_U_switch==0 && Constraint_NCS_switch==0
4631       && Zeeman_NCS_switch==0 && Zeeman_NCO_switch==0)
4632     Hamiltonian_Band_NC(Host_ID, nh, Dummy_ImNL, H, MP, k1, k2, k3);
4633 
4634   /* spin-orbit coupling or LDA+U */
4635   else
4636     Hamiltonian_Band_NC(Host_ID, nh, ImNL, H, MP, k1, k2, k3);
4637 
4638   if (myid==Host_ID){
4639 
4640     /****************************************************
4641                     M1 * U^+ * H * U * M1
4642     ****************************************************/
4643 
4644     /* transpose S */
4645 
4646     for (i1=1; i1<=n; i1++){
4647       for (j1=i1+1; j1<=n; j1++){
4648 	Ctmp1 = S[i1][j1];
4649 	Ctmp2 = S[j1][i1];
4650 	S[i1][j1] = Ctmp2;
4651 	S[j1][i1] = Ctmp1;
4652       }
4653     }
4654 
4655     /* H * U * M1 */
4656 
4657     for (i1=1; i1<=2*n; i1++){
4658 
4659       jj1 = 1;
4660       for (j1=1; j1<=n; j1++){
4661 
4662 	sum_r0 = 0.0;
4663 	sum_i0 = 0.0;
4664 
4665 	sum_r1 = 0.0;
4666 	sum_i1 = 0.0;
4667 
4668 	for (l=1; l<=n; l++){
4669 
4670 	  sum_r0 += H[i1][l  ].r*S[j1][l].r - H[i1][l  ].i*S[j1][l].i;
4671 	  sum_i0 += H[i1][l  ].r*S[j1][l].i + H[i1][l  ].i*S[j1][l].r;
4672 
4673 	  sum_r1 += H[i1][l+n].r*S[j1][l].r - H[i1][l+n].i*S[j1][l].i;
4674 	  sum_i1 += H[i1][l+n].r*S[j1][l].i + H[i1][l+n].i*S[j1][l].r;
4675 	}
4676 
4677 	C[jj1][i1].r = sum_r0;
4678 	C[jj1][i1].i = sum_i0;  jj1++;
4679 
4680 	C[jj1][i1].r = sum_r1;
4681 	C[jj1][i1].i = sum_i1;  jj1++;
4682 
4683       }
4684     }
4685 
4686     /* M1 * U^+ H * U * M1 */
4687 
4688     for (j1=1; j1<=2*n; j1++){
4689 
4690       ii1 = 1;
4691       for (i1=1; i1<=n; i1++){
4692 
4693 	sum_r0 = 0.0;
4694 	sum_i0 = 0.0;
4695 
4696 	sum_r1 = 0.0;
4697 	sum_i1 = 0.0;
4698 
4699 	for (l=1; l<=n; l++){
4700 
4701 	  sum_r0 +=  S[i1][l].r*C[j1][l  ].r + S[i1][l].i*C[j1][l  ].i;
4702 	  sum_i0 +=  S[i1][l].r*C[j1][l  ].i - S[i1][l].i*C[j1][l  ].r;
4703 
4704 	  sum_r1 +=  S[i1][l].r*C[j1][l+n].r + S[i1][l].i*C[j1][l+n].i;
4705 	  sum_i1 +=  S[i1][l].r*C[j1][l+n].i - S[i1][l].i*C[j1][l+n].r;
4706 	}
4707 
4708 	H[ii1][j1].r = sum_r0;
4709 	H[ii1][j1].i = sum_i0; ii1++;
4710 
4711 	H[ii1][j1].r = sum_r1;
4712 	H[ii1][j1].i = sum_i1; ii1++;
4713 
4714       }
4715     }
4716 
4717     /* H to C */
4718 
4719     for (i1=1; i1<=2*n; i1++){
4720       for (j1=1; j1<=2*n; j1++){
4721 	C[i1][j1].r = H[i1][j1].r;
4722 	C[i1][j1].i = H[i1][j1].i;
4723       }
4724     }
4725 
4726     /* penalty for ill-conditioning states */
4727 
4728     EV_cut0 = Threshold_OLP_Eigen;
4729 
4730     for (i1=1; i1<=n; i1++){
4731 
4732       if (koS[i1]<EV_cut0){
4733 	C[2*i1-1][2*i1-1].r += pow((koS[i1]/EV_cut0),-2.0) - 1.0;
4734 	C[2*i1  ][2*i1  ].r += pow((koS[i1]/EV_cut0),-2.0) - 1.0;
4735       }
4736 
4737       /* cutoff the interaction between the ill-conditioned state */
4738 
4739       if (1.0e+3<C[2*i1-1][2*i1-1].r){
4740 	for (j1=1; j1<=2*n; j1++){
4741 	  C[2*i1-1][j1    ] = Complex(0.0,0.0);
4742 	  C[j1    ][2*i1-1] = Complex(0.0,0.0);
4743 	  C[2*i1  ][j1    ] = Complex(0.0,0.0);
4744 	  C[j1    ][2*i1  ] = Complex(0.0,0.0);
4745 	}
4746 	C[2*i1-1][2*i1-1] = Complex(1.0e+4,0.0);
4747 	C[2*i1  ][2*i1  ] = Complex(1.0e+4,0.0);
4748       }
4749     }
4750 
4751     /* solve eigenvalue problem */
4752 
4753     n1 = 2*n;
4754     EigenBand_lapack(C,ko,n1,n1,1);
4755 
4756     if (n1min<n1) n1min=n1;
4757 
4758     iemin0=1;
4759     for (i1=1;i1<=n1;i1++) {
4760       if (ko[i1]> (ChemP+Dos_Erange[0])) {
4761 	iemin0=i1-1;
4762 	break;
4763       }
4764     }
4765     if (iemin0<1) iemin0=1;
4766 
4767     iemax0=n1;
4768     for (i1=iemin0;i1<=n1;i1++) {
4769       if (ko[i1]> (ChemP+Dos_Erange[1])) {
4770 	iemax0=i1;
4771 	break;
4772       }
4773     }
4774     if (iemax0>n1) iemax0=n1;
4775 
4776     if (iemin>iemin0) iemin=iemin0;
4777     if (iemax<iemax0) iemax=iemax0;
4778 
4779   }   /* if (myid==Host_ID) */
4780 
4781   /* add a buffer to iemin and iemax */
4782 
4783   iemin -= 5;
4784   iemax += 5;
4785 
4786   if (iemin<1)  iemin = 1;
4787   if ((2*n)<iemax) iemax = 2*n;
4788 
4789    /* partial density matrix for STM simulation */
4790   if (cal_partial_charge){
4791     iemin = 1;
4792     iemax = 2*n;
4793   }
4794 
4795   if (myid==Host_ID){
4796     if (n1min<iemax) iemax=n1min;
4797     printf(" iemin and iemax= %d %d\n",iemin,iemax);fflush(stdout);
4798   }
4799 
4800   /* MPI, iemin, iemax */
4801   MPI_Barrier(mpi_comm_level1);
4802   MPI_Bcast(&iemin, 1, MPI_INT, Host_ID, mpi_comm_level1);
4803   MPI_Bcast(&iemax, 1, MPI_INT, Host_ID, mpi_comm_level1);
4804 
4805   /****************************************************************
4806                      eigenvalues and eigenvectors
4807    ****************************************************************/
4808 
4809   if (myid==Host_ID){
4810 
4811     sprintf(file_eig,"%s%s.Dos.val",filepath,filename);
4812     if ( (fp_eig=fopen(file_eig,"w"))==NULL ) {
4813 
4814 #ifdef xt3
4815       setvbuf(fp_eig,buf1,_IOFBF,fp_bsize);  /* setvbuf */
4816 #endif
4817 
4818       printf("cannot open a file %s\n",file_eig);
4819     }
4820 
4821     if ( fp_eig==NULL ) {
4822       goto Finishing;
4823     }
4824   }
4825 
4826   sprintf(file_ev,"%s%s.Dos.vec%d",filepath,filename,myid);
4827   if ( (fp_ev=fopen(file_ev,"w"))==NULL ) {
4828     printf("cannot open a file %s\n",file_ev);
4829   }
4830   if ( fp_ev==NULL ) {
4831     goto Finishing;
4832   }
4833 
4834   if (myid==Host_ID){
4835 
4836     if (fp_eig) {
4837       fprintf(fp_eig,"mode        1\n");
4838       fprintf(fp_eig,"NonCol      1\n");
4839       fprintf(fp_eig,"N           %d\n",n);
4840       fprintf(fp_eig,"Nspin       %d\n",1); /* switch to 1 */
4841       fprintf(fp_eig,"Erange      %lf %lf\n",Dos_Erange[0],Dos_Erange[1]);
4842       fprintf(fp_eig,"Kgrid       %d %d %d\n",knum_i,knum_j,knum_k);
4843       fprintf(fp_eig,"atomnum     %d\n",atomnum);
4844       fprintf(fp_eig,"<WhatSpecies\n");
4845       for (i=1;i<=atomnum;i++) {
4846         fprintf(fp_eig,"%d ",WhatSpecies[i]);
4847       }
4848       fprintf(fp_eig,"\nWhatSpecies>\n");
4849       fprintf(fp_eig,"SpeciesNum     %d\n",SpeciesNum);
4850       fprintf(fp_eig,"<Spe_Total_CNO\n");
4851       for (i=0;i<SpeciesNum;i++) {
4852         fprintf(fp_eig,"%d ",Spe_Total_CNO[i]);
4853       }
4854       fprintf(fp_eig,"\nSpe_Total_CNO>\n");
4855 
4856       MaxL=Supported_MaxL;
4857 
4858       fprintf(fp_eig,"MaxL           %d\n",Supported_MaxL);
4859       fprintf(fp_eig,"<Spe_Num_CBasis\n");
4860       for (i=0;i<SpeciesNum;i++) {
4861         for (l=0;l<=MaxL;l++) {
4862           fprintf(fp_eig,"%d ",Spe_Num_CBasis[i][l]);
4863         }
4864         fprintf(fp_eig,"\n");
4865       }
4866       fprintf(fp_eig,"Spe_Num_CBasis>\n");
4867 
4868       fprintf(fp_eig,"ChemP       %lf\n",ChemP);
4869 
4870       fprintf(fp_eig,"<SpinAngle\n");
4871       for (i=1; i<=atomnum; i++) {
4872         fprintf(fp_eig,"%lf %lf\n",Angle0_Spin[i],Angle1_Spin[i]);
4873       }
4874       fprintf(fp_eig,"SpinAngle>\n");
4875 
4876       printf(" write eigenvalues\n");
4877       printf(" write eigenvectors\n");
4878     }
4879   }
4880 
4881   /* for kloop */
4882 
4883   for (kloop0=0; kloop0<num_kloop0; kloop0++){
4884 
4885     kloop = kloop0 + S_knum;
4886     arpo[myid] = -1;
4887     if (S_knum<=kloop && kloop<=E_knum) arpo[myid] = kloop;
4888     for (ID=0; ID<numprocs; ID++){
4889       MPI_Bcast(&arpo[ID], 1, MPI_INT, ID, mpi_comm_level1);
4890     }
4891 
4892     /* set S */
4893 
4894     for (ID=0; ID<numprocs; ID++){
4895 
4896       kloop = arpo[ID];
4897 
4898       if (0<=kloop){
4899 
4900         k1 = T_KGrids1[kloop];
4901         k2 = T_KGrids2[kloop];
4902         k3 = T_KGrids3[kloop];
4903 
4904         Overlap_Band(ID,CntOLP,TmpM,MP,k1,k2,k3);
4905         n = TmpM[0][0].r;
4906 
4907         if (myid==ID){
4908 	  for (i1=1; i1<=n; i1++){
4909 	    for (j1=1; j1<=n; j1++){
4910 	      S[i1][j1].r = TmpM[i1][j1].r;
4911 	      S[i1][j1].i = TmpM[i1][j1].i;
4912 
4913 	      TmpS[i1][j1].r = TmpM[i1][j1].r;
4914 	      TmpS[i1][j1].i = TmpM[i1][j1].i;
4915 	    }
4916 	  }
4917         }
4918 
4919       }
4920     }
4921 
4922     kloop = arpo[myid];
4923 
4924     if (0<=kloop){
4925 
4926       EigenBand_lapack(S,ko,n,n,1);
4927 
4928       if (2<=level_stdout && 0<=kloop){
4929 	printf("  kloop %2d  k1 k2 k3 %10.6f %10.6f %10.6f\n",
4930 	       kloop,T_KGrids1[kloop],T_KGrids2[kloop],T_KGrids3[kloop]);
4931 	for (i1=1; i1<=n; i1++){
4932 	  printf("  Eigenvalues of OLP  %2d  %15.12f\n",i1,ko[i1]);
4933 	}
4934       }
4935 
4936       /* minus eigenvalues to 1.0e-14 */
4937       for (l=1; l<=n; l++){
4938 	if (ko[l]<0.0){
4939 	  ko[l] = 1.0e-14;
4940 
4941 	  if (2<=level_stdout){
4942 	    printf("found an eigenvalue smaller than %10.8f of OLP kloop=%2d\n",
4943 		   Threshold_OLP_Eigen,kloop);
4944 	  }
4945 	}
4946 
4947 	koS[l] = ko[l];
4948       }
4949 
4950       /* calculate S*1/sqrt(ko) */
4951 
4952       for (l=1; l<=n; l++) M1[l] = 1.0/sqrt(ko[l]);
4953 
4954       /* S * M1  */
4955 
4956       for (i1=1; i1<=n; i1++){
4957 	for (j1=1; j1<=n; j1++){
4958 	  S[i1][j1].r = S[i1][j1].r*M1[j1];
4959 	  S[i1][j1].i = S[i1][j1].i*M1[j1];
4960 	}
4961       }
4962 
4963     }
4964 
4965     /****************************************************
4966        make a full Hamiltonian matrix
4967     ****************************************************/
4968 
4969     for (ID=0; ID<numprocs; ID++){
4970 
4971       kloop = arpo[ID];
4972 
4973       if (0<=kloop){
4974         k1 = T_KGrids1[kloop];
4975         k2 = T_KGrids2[kloop];
4976         k3 = T_KGrids3[kloop];
4977 
4978         /* non-spin-orbit coupling and non-LDA+U */
4979         if (SO_switch==0 && Hub_U_switch==0 && Constraint_NCS_switch==0
4980             && Zeeman_NCS_switch==0 && Zeeman_NCO_switch==0)
4981           Hamiltonian_Band_NC(ID, nh, Dummy_ImNL, TmpM, MP, k1, k2, k3);
4982 
4983         /* spin-orbit coupling or LDA+U */
4984         else
4985           Hamiltonian_Band_NC(ID, nh,       ImNL, TmpM, MP, k1, k2, k3);
4986 
4987 	if (myid==ID){
4988 	  for (i1=1; i1<=2*n; i1++){
4989 	    for (j1=1; j1<=2*n; j1++){
4990 	      H[i1][j1].r = TmpM[i1][j1].r;
4991 	      H[i1][j1].i = TmpM[i1][j1].i;
4992 	    }
4993 	  }
4994 	}
4995       }
4996     }
4997 
4998     kloop = arpo[myid];
4999 
5000     if (0<=kloop){
5001 
5002       /****************************************************
5003 	               M1 * U^t * H * U * M1
5004       ****************************************************/
5005 
5006       /* transpose S */
5007 
5008       for (i1=1; i1<=n; i1++){
5009 	for (j1=i1+1; j1<=n; j1++){
5010 	  Ctmp1 = S[i1][j1];
5011 	  Ctmp2 = S[j1][i1];
5012 	  S[i1][j1] = Ctmp2;
5013 	  S[j1][i1] = Ctmp1;
5014 	}
5015       }
5016 
5017       /* H * U * M1 */
5018 
5019 #pragma omp parallel shared(C,S,H,n) private(OMPID,Nthrds,Nprocs,i1,jj1,j1,sum_r0,sum_i0,sum_r1,sum_i1,l)
5020       {
5021 	/* get info. on OpenMP */
5022 
5023 	OMPID = omp_get_thread_num();
5024 	Nthrds = omp_get_num_threads();
5025 	Nprocs = omp_get_num_procs();
5026 
5027 	for (i1=1+OMPID; i1<=2*n; i1+=Nthrds){
5028 
5029 	  jj1 = 1;
5030 	  for (j1=1; j1<=n; j1++){
5031 
5032 	    sum_r0 = 0.0;
5033 	    sum_i0 = 0.0;
5034 
5035 	    sum_r1 = 0.0;
5036 	    sum_i1 = 0.0;
5037 
5038 	    for (l=1; l<=n; l++){
5039 
5040 	      sum_r0 += H[i1][l  ].r*S[j1][l].r - H[i1][l  ].i*S[j1][l].i;
5041 	      sum_i0 += H[i1][l  ].r*S[j1][l].i + H[i1][l  ].i*S[j1][l].r;
5042 
5043 	      sum_r1 += H[i1][l+n].r*S[j1][l].r - H[i1][l+n].i*S[j1][l].i;
5044 	      sum_i1 += H[i1][l+n].r*S[j1][l].i + H[i1][l+n].i*S[j1][l].r;
5045 	    }
5046 
5047 	    C[jj1][i1].r = sum_r0;
5048 	    C[jj1][i1].i = sum_i0;  jj1++;
5049 
5050 	    C[jj1][i1].r = sum_r1;
5051 	    C[jj1][i1].i = sum_i1;  jj1++;
5052 
5053 	  }
5054 	}
5055 
5056       } /* #pragma omp parallel */
5057 
5058       /* M1 * U^+ H * U * M1 */
5059 
5060 #pragma omp parallel shared(C,S,H,n) private(OMPID,Nthrds,Nprocs,i1,ii1,j1,sum_r0,sum_i0,sum_r1,sum_i1,l)
5061       {
5062 	/* get info. on OpenMP */
5063 
5064 	OMPID = omp_get_thread_num();
5065 	Nthrds = omp_get_num_threads();
5066 	Nprocs = omp_get_num_procs();
5067 
5068 	for (j1=1+OMPID; j1<=2*n; j1+=Nthrds){
5069 
5070 	  ii1 = 1;
5071 	  for (i1=1; i1<=n; i1++){
5072 
5073 	    sum_r0 = 0.0;
5074 	    sum_i0 = 0.0;
5075 
5076 	    sum_r1 = 0.0;
5077 	    sum_i1 = 0.0;
5078 
5079 	    for (l=1; l<=n; l++){
5080 
5081 	      sum_r0 +=  S[i1][l].r*C[j1][l  ].r + S[i1][l].i*C[j1][l  ].i;
5082 	      sum_i0 +=  S[i1][l].r*C[j1][l  ].i - S[i1][l].i*C[j1][l  ].r;
5083 
5084 	      sum_r1 +=  S[i1][l].r*C[j1][l+n].r + S[i1][l].i*C[j1][l+n].i;
5085 	      sum_i1 +=  S[i1][l].r*C[j1][l+n].i - S[i1][l].i*C[j1][l+n].r;
5086 	    }
5087 
5088 	    H[ii1][j1].r = sum_r0;
5089 	    H[ii1][j1].i = sum_i0; ii1++;
5090 
5091 	    H[ii1][j1].r = sum_r1;
5092 	    H[ii1][j1].i = sum_i1; ii1++;
5093 
5094 	  }
5095 	}
5096 
5097       } /* #pragma omp parallel */
5098 
5099       /* H to C */
5100 
5101       for (i1=1; i1<=2*n; i1++){
5102 	for (j1=1; j1<=2*n; j1++){
5103 	  C[i1][j1].r = H[i1][j1].r;
5104 	  C[i1][j1].i = H[i1][j1].i;
5105 	}
5106       }
5107 
5108       /* penalty for ill-conditioning states */
5109 
5110       EV_cut0 = Threshold_OLP_Eigen;
5111 
5112       for (i1=1; i1<=n; i1++){
5113 
5114 	if (koS[i1]<EV_cut0){
5115 	  C[2*i1-1][2*i1-1].r += pow((koS[i1]/EV_cut0),-2.0) - 1.0;
5116 	  C[2*i1  ][2*i1  ].r += pow((koS[i1]/EV_cut0),-2.0) - 1.0;
5117 	}
5118 
5119 	/* cutoff the interaction between the ill-conditioned state */
5120 
5121 	if (1.0e+3<C[2*i1-1][2*i1-1].r){
5122 	  for (j1=1; j1<=2*n; j1++){
5123 	    C[2*i1-1][j1    ] = Complex(0.0,0.0);
5124 	    C[j1    ][2*i1-1] = Complex(0.0,0.0);
5125 	    C[2*i1  ][j1    ] = Complex(0.0,0.0);
5126 	    C[j1    ][2*i1  ] = Complex(0.0,0.0);
5127 	  }
5128 	  C[2*i1-1][2*i1-1] = Complex(1.0e+4,0.0);
5129 	  C[2*i1  ][2*i1  ] = Complex(1.0e+4,0.0);
5130 	}
5131       }
5132 
5133       /* solve eigenvalue problem */
5134 
5135       n1 = 2*n;
5136       EigenBand_lapack(C,ko,n1,n1,1);
5137 
5138       for (i1=1; i1<=2*n; i1++) EIGEN[kloop][i1] = ko[i1];
5139 
5140       /****************************************************
5141 	   Transformation to the original eigenvectors.
5142 	   NOTE JRCAT-244p and JAIST-2122p
5143 	   C = U * lambda^{-1/2} * D
5144       ****************************************************/
5145 
5146       /* transpose */
5147       for (i1=1; i1<=2*n; i1++){
5148 	for (j1=i1+1; j1<=2*n; j1++){
5149 	  Ctmp1 = C[i1][j1];
5150 	  Ctmp2 = C[j1][i1];
5151 	  C[i1][j1] = Ctmp2;
5152 	  C[j1][i1] = Ctmp1;
5153 	}
5154       }
5155 
5156       /* transpose */
5157       for (i1=1; i1<=n; i1++){
5158 	for (j1=i1+1; j1<=n; j1++){
5159 	  Ctmp1 = S[i1][j1];
5160 	  Ctmp2 = S[j1][i1];
5161 	  S[i1][j1] = Ctmp2;
5162 	  S[j1][i1] = Ctmp1;
5163 	}
5164       }
5165 
5166 #pragma omp parallel shared(C,S,H,n,n1) private(OMPID,Nthrds,Nprocs,i1,j1,l1,sum_r0,sum_i0,sum_r1,sum_i1,l)
5167       {
5168 	/* get info. on OpenMP */
5169 
5170 	OMPID = omp_get_thread_num();
5171 	Nthrds = omp_get_num_threads();
5172 	Nprocs = omp_get_num_procs();
5173 
5174 	for (j1=1+OMPID; j1<=n1; j1+=Nthrds){
5175 	  for (i1=1; i1<=n; i1++){
5176 
5177 	    sum_r0 = 0.0;
5178 	    sum_i0 = 0.0;
5179 
5180 	    sum_r1 = 0.0;
5181 	    sum_i1 = 0.0;
5182 
5183 	    l1 = 1;
5184 	    for (l=1; l<=n; l++){
5185 
5186 	      sum_r0 += S[i1][l].r*C[j1][l1].r
5187 		       -S[i1][l].i*C[j1][l1].i;
5188 	      sum_i0 += S[i1][l].r*C[j1][l1].i
5189 		       +S[i1][l].i*C[j1][l1].r; l1++;
5190 
5191 	      sum_r1 += S[i1][l].r*C[j1][l1].r
5192 		       -S[i1][l].i*C[j1][l1].i;
5193 	      sum_i1 += S[i1][l].r*C[j1][l1].i
5194 	 	       +S[i1][l].i*C[j1][l1].r; l1++;
5195 	    }
5196 
5197 	    H[j1][i1  ].r = sum_r0;
5198 	    H[j1][i1  ].i = sum_i0;
5199 
5200 	    H[j1][i1+n].r = sum_r1;
5201 	    H[j1][i1+n].i = sum_i1;
5202 	  }
5203 	}
5204 
5205       } /* #pragma omp parallel */
5206 
5207     } /* if (0<=kloop) */
5208 
5209     /****************************************************
5210         store LDOS
5211     ****************************************************/
5212 
5213     kloop = arpo[myid];
5214 
5215     if (0<=kloop){
5216 
5217       k1 = T_KGrids1[kloop];
5218       k2 = T_KGrids2[kloop];
5219       k3 = T_KGrids3[kloop];
5220 
5221       i = Ti_KGrids1[kloop];
5222       j = Tj_KGrids2[kloop];
5223       k = Tk_KGrids3[kloop];
5224 
5225       for (l=iemin; l<=iemax; l++){
5226 
5227 	/* calculate SD */
5228 
5229 #pragma omp parallel shared(TmpS,SD,H,l,MP,Spe_Total_CNO,WhatSpecies,atomnum) private(OMPID,Nthrds,Nprocs,GA_AN,wanA,tnoA,Anum,theta,phi,sit,cot,sip,cop,d0,d1,d2,d3,GB_AN,wanB,tnoB,Bnum,i1,j1,u2,v2,uv,vu,Redum,Imdum,tmp1,tmp2,tmp3)
5230 	{
5231 
5232 	  /* get info. on OpenMP */
5233 
5234 	  OMPID = omp_get_thread_num();
5235 	  Nthrds = omp_get_num_threads();
5236 	  Nprocs = omp_get_num_procs();
5237 
5238 	  for (GA_AN=1+OMPID; GA_AN<=atomnum; GA_AN+=Nthrds){
5239 
5240 	    wanA = WhatSpecies[GA_AN];
5241 	    tnoA = Spe_Total_CNO[wanA];
5242 	    Anum = MP[GA_AN];
5243 	    theta = Angle0_Spin[GA_AN];
5244 	    phi   = Angle1_Spin[GA_AN];
5245 	    sit = sin(theta);
5246 	    cot = cos(theta);
5247 	    sip = sin(phi);
5248 	    cop = cos(phi);
5249 
5250             for (i1=0; i1<tnoA; i1++){
5251 
5252 	      d0 = 0.0;
5253 	      d1 = 0.0;
5254 	      d2 = 0.0;
5255 	      d3 = 0.0;
5256 
5257 	      for (GB_AN=1; GB_AN<=atomnum; GB_AN++){
5258 
5259 		wanB = WhatSpecies[GB_AN];
5260 		tnoB = Spe_Total_CNO[wanB];
5261 		Bnum = MP[GB_AN];
5262 
5263 		for (j1=0; j1<tnoB; j1++){
5264 
5265 		  /* Re11 */
5266 		  u2 = H[l][Anum+i1].r*H[l][Bnum+j1].r;
5267 		  v2 = H[l][Anum+i1].i*H[l][Bnum+j1].i;
5268 		  uv = H[l][Anum+i1].r*H[l][Bnum+j1].i;
5269 		  vu = H[l][Anum+i1].i*H[l][Bnum+j1].r;
5270 		  Redum = (u2 + v2);
5271 		  Imdum = (uv - vu);
5272                   d0 += Redum*TmpS[Anum+i1][Bnum+j1].r - Imdum*TmpS[Anum+i1][Bnum+j1].i;
5273 
5274 		  /* Re22 */
5275 		  u2 = H[l][Anum+i1+n].r*H[l][Bnum+j1+n].r;
5276 		  v2 = H[l][Anum+i1+n].i*H[l][Bnum+j1+n].i;
5277 		  uv = H[l][Anum+i1+n].r*H[l][Bnum+j1+n].i;
5278 		  vu = H[l][Anum+i1+n].i*H[l][Bnum+j1+n].r;
5279 		  Redum = (u2 + v2);
5280 		  Imdum = (uv - vu);
5281                   d1 += Redum*TmpS[Anum+i1][Bnum+j1].r - Imdum*TmpS[Anum+i1][Bnum+j1].i;
5282 
5283 		  /* Re12 */
5284 		  u2 = H[l][Anum+i1].r*H[l][Bnum+j1+n].r;
5285 		  v2 = H[l][Anum+i1].i*H[l][Bnum+j1+n].i;
5286 		  uv = H[l][Anum+i1].r*H[l][Bnum+j1+n].i;
5287 		  vu = H[l][Anum+i1].i*H[l][Bnum+j1+n].r;
5288 		  Redum = (u2 + v2);
5289 		  Imdum = (uv - vu);
5290                   d2 += Redum*TmpS[Anum+i1][Bnum+j1].r - Imdum*TmpS[Anum+i1][Bnum+j1].i;
5291 
5292 		  /* Im12
5293 		     conjugate complex of Im12 due to difference in the definition
5294 		     between density matrix and charge density
5295 		  */
5296 
5297                   d3 += -(Imdum*TmpS[Anum+i1][Bnum+j1].r + Redum*TmpS[Anum+i1][Bnum+j1].i);
5298 
5299 		} /* j1 */
5300 	      } /* GB_AN */
5301 
5302 	      tmp1 = 0.5*(d0 + d1);
5303 	      tmp2 = 0.5*cot*(d0 - d1);
5304 	      tmp3 = (d2*cop - d3*sip)*sit;
5305 
5306 	      SD[2*(Anum-1)+i1]      = (float)(tmp1 + tmp2 + tmp3);
5307 	      SD[2*(Anum-1)+tnoA+i1] = (float)(tmp1 - tmp2 - tmp3);
5308 
5309 	    } /* i1 */
5310 	  } /* GA_AN */
5311 
5312 	} /* #pragma omp parallel */
5313 
5314 	/*********************************************
5315                  writing a binary file
5316 	*********************************************/
5317 
5318 	i_vec[0] = i;
5319 	i_vec[1] = j;
5320 	i_vec[2] = k;
5321 
5322 	fwrite(i_vec,sizeof(int),3,fp_ev);
5323 	fwrite(&SD[0],sizeof(float),2*n,fp_ev);
5324 
5325       } /* l */
5326     } /* if (kloop<=0) */
5327 
5328     /*********************************************
5329         calculation of partial density matrix
5330     *********************************************/
5331 
5332     kloop = arpo[myid];
5333 
5334     if (0<=kloop && cal_partial_charge) {
5335 
5336       k1 = T_KGrids1[kloop];
5337       k2 = T_KGrids2[kloop];
5338       k3 = T_KGrids3[kloop];
5339 
5340       for (l=iemin; l<=iemax; l++){
5341 
5342 	x1 = (ko[l] - ChemP)*Beta;
5343 	if (x1<=-max_x) x1 = -max_x;
5344 	if (max_x<=x1)  x1 = max_x;
5345 	FermiF1 = 1.0/(1.0 + exp(x1));
5346 
5347 	x2 = (ko[l] - (ChemP+ene_win_partial_charge))*Beta;
5348 	if (x2<=-max_x) x2 = -max_x;
5349 	if (max_x<=x2)  x2 = max_x;
5350 	FermiF2 = 1.0/(1.0 + exp(x2));
5351 
5352 	diffF = fabs(FermiF1-FermiF2);
5353 
5354 	for (GA_AN=1; GA_AN<=atomnum; GA_AN++){
5355 
5356 	  wanA = WhatSpecies[GA_AN];
5357 	  tnoA = Spe_Total_CNO[wanA];
5358 	  Anum = MP[GA_AN];
5359 
5360 	  for (LB_AN=0; LB_AN<=FNAN[GA_AN]; LB_AN++){
5361 	    GB_AN = natn[GA_AN][LB_AN];
5362 	    Rn = ncn[GA_AN][LB_AN];
5363 	    wanB = WhatSpecies[GB_AN];
5364 	    tnoB = Spe_Total_CNO[wanB];
5365 	    Bnum = MP[GB_AN];
5366 
5367 	    l1 = atv_ijk[Rn][1];
5368 	    l2 = atv_ijk[Rn][2];
5369 	    l3 = atv_ijk[Rn][3];
5370 	    kRn = k1*(double)l1 + k2*(double)l2 + k3*(double)l3;
5371 
5372 	    si = sin(2.0*PI*kRn);
5373 	    co = cos(2.0*PI*kRn);
5374 
5375 	    for (i=0; i<tnoA; i++){
5376 	      for (j=0; j<tnoB; j++){
5377 
5378 		/* Re11 */
5379 		u2 = H[l][Anum+i].r*H[l][Bnum+j].r;
5380 		v2 = H[l][Anum+i].i*H[l][Bnum+j].i;
5381 		uv = H[l][Anum+i].r*H[l][Bnum+j].i;
5382 		vu = H[l][Anum+i].i*H[l][Bnum+j].r;
5383 		tmp = co*(u2+v2) - si*(uv-vu);
5384 		PDM[0][GA_AN][LB_AN][i][j] += diffF*tmp;
5385 
5386 		/* Re22 */
5387 		u2 = H[l][Anum+i+n].r*H[l][Bnum+j+n].r;
5388 		v2 = H[l][Anum+i+n].i*H[l][Bnum+j+n].i;
5389 		uv = H[l][Anum+i+n].r*H[l][Bnum+j+n].i;
5390 		vu = H[l][Anum+i+n].i*H[l][Bnum+j+n].r;
5391 		tmp = co*(u2+v2) - si*(uv-vu);
5392 		PDM[1][GA_AN][LB_AN][i][j] += diffF*tmp;
5393 
5394 	      }
5395 	    }
5396 	  }
5397 	}
5398       }
5399     }
5400 
5401     /****************************************************
5402        MPI: EIGEN
5403     ****************************************************/
5404 
5405     for (ID=0; ID<numprocs; ID++){
5406 
5407       kloop = arpo[ID];
5408 
5409       if (0<=kloop){
5410         MPI_Bcast(&EIGEN[kloop][0], 2*n+1, MPI_DOUBLE, ID, mpi_comm_level1);
5411       }
5412     }
5413 
5414   } /* kloop0 */
5415 
5416   /****************************************************
5417      MPI:
5418 
5419      PDM
5420   ****************************************************/
5421 
5422   if (cal_partial_charge) {
5423 
5424     /* MPI: PDM */
5425 
5426     for (spin=0; spin<=1; spin++){
5427       for (GA_AN=1; GA_AN<=atomnum; GA_AN++){
5428 
5429 	wanA = WhatSpecies[GA_AN];
5430 	tnoA = Spe_Total_CNO[wanA];
5431 
5432 	for (LB_AN=0; LB_AN<=FNAN[GA_AN]; LB_AN++){
5433 
5434 	  GB_AN = natn[GA_AN][LB_AN];
5435 	  wanB = WhatSpecies[GB_AN];
5436 	  tnoB = Spe_Total_CNO[wanB];
5437 
5438 	  for (i=0; i<tnoA; i++){
5439 
5440 	    MPI_Allreduce(&PDM[spin][GA_AN][LB_AN][i][0],
5441 			  &TPDM[spin][GA_AN][LB_AN][i][0],
5442 			  tnoB, MPI_DOUBLE, MPI_SUM,
5443 			  mpi_comm_level1);
5444 
5445 	    for (j=0; j<tnoB; j++){
5446               TPDM[spin][GA_AN][LB_AN][i][j] /= (double)T_knum;
5447 	    }
5448 	  }
5449 	}
5450       }
5451     }
5452 
5453     /* store TPDM to Partial_DM */
5454 
5455     for (spin=0; spin<=1; spin++){
5456       for (GA_AN=1; GA_AN<=atomnum; GA_AN++){
5457 
5458         MA_AN = F_G2M[GA_AN];
5459 	wanA = WhatSpecies[GA_AN];
5460 	tnoA = Spe_Total_CNO[wanA];
5461 
5462         if (1<=MA_AN && MA_AN<=Matomnum){
5463 
5464 	  for (LB_AN=0; LB_AN<=FNAN[GA_AN]; LB_AN++){
5465 
5466 	    GB_AN = natn[GA_AN][LB_AN];
5467 	    wanB = WhatSpecies[GB_AN];
5468 	    tnoB = Spe_Total_CNO[wanB];
5469 
5470 	    for (i=0; i<tnoA; i++){
5471 	      for (j=0; j<tnoB; j++){
5472 		Partial_DM[spin][MA_AN][LB_AN][i][j] = TPDM[spin][GA_AN][LB_AN][i][j];
5473 	      }
5474 	    }
5475 	  }
5476 	}
5477       }
5478     }
5479   }
5480 
5481   /****************************************************
5482      write: EIGEN
5483   ****************************************************/
5484 
5485   if (myid==Host_ID){
5486     if (fp_eig) {
5487 
5488       fprintf(fp_eig,"irange      %d %d\n",iemin,iemax);
5489       fprintf(fp_eig,"<Eigenvalues\n");
5490 
5491       for (kloop=0; kloop<T_knum; kloop++){
5492         for (spin=0; spin<=1; spin++){
5493 
5494           i = Ti_KGrids1[kloop];
5495           j = Tj_KGrids2[kloop];
5496           k = Tk_KGrids3[kloop];
5497 
5498           fprintf(fp_eig,"%d %d %d ",i,j,k);
5499 	  for (ie=iemin; ie<=iemax; ie++) {
5500 	    fprintf(fp_eig,"%lf ",EIGEN[kloop][ie]);
5501 	  }
5502 	  fprintf(fp_eig,"\n");
5503 
5504 	}
5505       }
5506 
5507       fprintf(fp_eig,"Eigenvalues>\n");
5508 
5509     } /* fp_eig */
5510   } /* if (myid==Host_ID) */
5511 
5512 Finishing:
5513 
5514   if (myid==Host_ID){
5515     if (fp_eig) {
5516       fclose(fp_eig);
5517     }
5518   }
5519 
5520   if (fp_ev) {
5521     fclose(fp_ev);
5522   }
5523 
5524   /****************************************************
5525      merge *.Dos.vec#
5526   ****************************************************/
5527 
5528   MPI_Barrier(mpi_comm_level1);
5529 
5530   if (myid==Host_ID){
5531 
5532     sprintf(file_ev0,"%s%s.Dos.vec",filepath,filename);
5533     if ( (fp_ev0=fopen(file_ev0,"w"))==NULL ) {
5534       printf("cannot open a file %s\n",file_ev0);
5535     }
5536 
5537     for (ID=0; ID<numprocs; ID++){
5538 
5539       sprintf(file_ev,"%s%s.Dos.vec%d",filepath,filename,ID);
5540 
5541       if ( 1<=num_allocated_k[ID] ){
5542 
5543 	if ( (fp_ev=fopen(file_ev,"r"))==NULL ) {
5544 	  printf("cannot open a file %s\n",file_ev);
5545 	}
5546 
5547         for (k=0; k<num_allocated_k[ID]; k++){
5548 	  for (l=iemin; l<=iemax; l++){
5549 	    fread( i_vec,  sizeof(int),  3,fp_ev);
5550 	    fwrite(i_vec,  sizeof(int),  3,fp_ev0);
5551 	    fread( &SD[0], sizeof(float),2*n,fp_ev);
5552 	    fwrite(&SD[0], sizeof(float),2*n,fp_ev0);
5553 	  }
5554 	}
5555 
5556 	fclose(fp_ev);
5557       }
5558     }
5559 
5560     fclose(fp_ev0);
5561   }
5562 
5563   MPI_Barrier(mpi_comm_level1);
5564 
5565   /* delete files */
5566   if (myid==Host_ID){
5567     for (ID=0; ID<numprocs; ID++){
5568       sprintf(file_ev,"%s%s.Dos.vec%d",filepath,filename,ID);
5569       remove(file_ev);
5570     }
5571   }
5572 
5573   /****************************************************
5574           Fermi surface viewable by XCrysDen
5575   ****************************************************/
5576 
5577   if (myid==Host_ID){
5578 
5579     int ii,jj,kk;
5580     int ***TD2OD;
5581     double x0,y0,z0;
5582     char file_fermi[YOUSO10];
5583     FILE *fp;
5584 
5585     /* allocation of TD2OD */
5586 
5587     TD2OD = (int***)malloc(sizeof(int**)*knum_i);
5588     for (i=0; i<knum_i; i++){
5589       TD2OD[i] = (int**)malloc(sizeof(int*)*knum_j);
5590       for (j=0; j<knum_j; j++){
5591         TD2OD[i][j] = (int*)malloc(sizeof(int)*knum_k);
5592 	for (k=0; k<knum_k; k++) TD2OD[i][j][k] = 0;
5593       }
5594     }
5595 
5596     kloop = 0;
5597     for (i=0; i<knum_i; i++){
5598       for (j=0; j<knum_j; j++){
5599 	for (k=0; k<knum_k; k++){
5600           TD2OD[i][j][k] = kloop;
5601           kloop++;
5602 	}
5603       }
5604     }
5605 
5606     /* make bxsf files */
5607 
5608     sprintf(file_fermi,"%s%s.FermiSurf.bxsf",filepath,filename);
5609 
5610     if ((fp=fopen(file_fermi,"w")) != NULL){
5611 
5612       fprintf(fp,"BEGIN_INFO\n");
5613       fprintf(fp,"Fermi Energy: %15.12f\n",ChemP);
5614       fprintf(fp,"END_INFO\n\n");
5615 
5616       fprintf(fp,"BEGIN_BLOCK_BANDGRID_3D\n");
5617       fprintf(fp,"comment_line\n");
5618       fprintf(fp,"BEGIN_BANDGRID_3D_simple_example\n");
5619       fprintf(fp,"%d\n",2*n);
5620       fprintf(fp,"%d %d %d\n",knum_i+1,knum_j+1,knum_k+1);
5621 
5622       fprintf(fp,"0.0 0.0 0.0\n");
5623       fprintf(fp,"%15.10f %15.10f %15.10f\n",rtv[1][1],rtv[1][2],rtv[1][3]);
5624       fprintf(fp,"%15.10f %15.10f %15.10f\n",rtv[2][1],rtv[2][2],rtv[2][3]);
5625       fprintf(fp,"%15.10f %15.10f %15.10f\n",rtv[3][1],rtv[3][2],rtv[3][3]);
5626 
5627       for (i1=1; i1<=2*n; i1++){
5628 
5629 	fprintf(fp,"BAND:  %d\n",i1);
5630 
5631 	for (i=0; i<=knum_i; i++){
5632 	  ii = i%knum_i;
5633 	  for (j=0; j<=knum_j; j++){
5634 	    jj = j%knum_j;
5635 	    for (k=0; k<=knum_k; k++){
5636 	      kk = k%knum_k;
5637 	      kloop = TD2OD[ii][jj][kk];
5638 	      fprintf(fp,"%10.5f ",EIGEN[kloop][i1]);
5639 	    }
5640 	    fprintf(fp,"\n");
5641 	  }
5642 
5643 	  if (i!=knum_i)  fprintf(fp,"\n");
5644 
5645 	}
5646       }
5647 
5648       fprintf(fp,"END_BANDGRID_3D\n");
5649       fprintf(fp,"END_BLOCK_BANDGRID_3D\n");
5650 
5651       fclose(fp);
5652     }
5653     else{
5654       printf("Failure in saving bxsf files.\n");
5655     }
5656 
5657     /* free TD2OD */
5658     for (i=0; i<knum_i; i++){
5659       for (j=0; j<knum_j; j++){
5660         free(TD2OD[i][j]);
5661       }
5662       free(TD2OD[i]);
5663     }
5664     free(TD2OD);
5665 
5666   } /* if (myid==Host_ID) */
5667 
5668   /****************************************************
5669                        free arrays
5670   ****************************************************/
5671 
5672   free(MP);
5673   free(arpo);
5674   free(num_allocated_k);
5675 
5676   free(ko);
5677   free(koS);
5678 
5679   for (j=0; j<n2; j++){
5680     free(H[j]);
5681   }
5682   free(H);
5683 
5684   free(SD);
5685 
5686   for (i=0; i<(n+1); i++){
5687     free(TmpS[i]);
5688   }
5689   free(TmpS);
5690 
5691   free(M1);
5692 
5693   for (j=0; j<n2; j++){
5694     free(C[j]);
5695   }
5696   free(C);
5697 
5698   free(KGrids1); free(KGrids2);free(KGrids3);
5699 
5700   for (j=0; j<n2; j++){
5701     free(TmpM[j]);
5702   }
5703   free(TmpM);
5704 
5705   /* partial density matrix */
5706 
5707   if (cal_partial_charge){
5708 
5709     /* PDM */
5710 
5711     for (k=0; k<=1; k++){
5712 
5713       FNAN[0] = 0;
5714 
5715       for (Gc_AN=0; Gc_AN<=atomnum; Gc_AN++){
5716 
5717 	if (Gc_AN==0){
5718 	  tno0 = 1;
5719 	}
5720 	else{
5721 	  Cwan = WhatSpecies[Gc_AN];
5722 	  tno0 = Spe_Total_NO[Cwan];
5723 	}
5724 
5725 	for (h_AN=0; h_AN<=FNAN[Gc_AN]; h_AN++){
5726 
5727 	  if (Gc_AN==0){
5728 	    tno1 = 1;
5729 	  }
5730 	  else{
5731 	    Gh_AN = natn[Gc_AN][h_AN];
5732 	    Hwan = WhatSpecies[Gh_AN];
5733 	    tno1 = Spe_Total_NO[Hwan];
5734 	  }
5735 
5736 	  for (i=0; i<tno0; i++){
5737 	    free(PDM[k][Gc_AN][h_AN][i]);
5738 	  }
5739           free(PDM[k][Gc_AN][h_AN]);
5740 	}
5741         free(PDM[k][Gc_AN]);
5742       }
5743       free(PDM[k]);
5744     }
5745     free(PDM);
5746 
5747     /* TPDM */
5748 
5749     for (k=0; k<=1; k++){
5750 
5751       FNAN[0] = 0;
5752 
5753       for (Gc_AN=0; Gc_AN<=atomnum; Gc_AN++){
5754 
5755 	if (Gc_AN==0){
5756 	  tno0 = 1;
5757 	}
5758 	else{
5759 	  Cwan = WhatSpecies[Gc_AN];
5760 	  tno0 = Spe_Total_NO[Cwan];
5761 	}
5762 
5763 	for (h_AN=0; h_AN<=FNAN[Gc_AN]; h_AN++){
5764 
5765 	  if (Gc_AN==0){
5766 	    tno1 = 1;
5767 	  }
5768 	  else{
5769 	    Gh_AN = natn[Gc_AN][h_AN];
5770 	    Hwan = WhatSpecies[Gh_AN];
5771 	    tno1 = Spe_Total_NO[Hwan];
5772 	  }
5773 
5774 	  for (i=0; i<tno0; i++){
5775 	    free(TPDM[k][Gc_AN][h_AN][i]);
5776 	  }
5777           free(TPDM[k][Gc_AN][h_AN]);
5778 	}
5779         free(TPDM[k][Gc_AN]);
5780       }
5781       free(TPDM[k]);
5782     }
5783     free(TPDM);
5784   }
5785 
5786   /* non-spin-orbit coupling and non-LDA+U */
5787   if (SO_switch==0 && Hub_U_switch==0 && Constraint_NCS_switch==0
5788       && Zeeman_NCS_switch==0 && Zeeman_NCO_switch==0){
5789     free(Dummy_ImNL[0][0][0][0]);
5790     free(Dummy_ImNL[0][0][0]);
5791     free(Dummy_ImNL[0][0]);
5792     free(Dummy_ImNL[0]);
5793     free(Dummy_ImNL);
5794   }
5795 
5796   free(T_KGrids1);
5797   free(T_KGrids2);
5798   free(T_KGrids3);
5799 
5800   free(Ti_KGrids1);
5801   free(Tj_KGrids2);
5802   free(Tk_KGrids3);
5803 
5804   for (j=0; j<T_knum; j++){
5805     free(EIGEN[j]);
5806   }
5807   free(EIGEN);
5808 
5809   /* for elapsed time */
5810 
5811   dtime(&TEtime);
5812   time0 = TEtime - TStime;
5813   return time0;
5814 }
5815