1 /**********************************************************************
2   DFT.c:
3 
4      DFT.c is a subroutine to perform self-consistent calculations
5      within LDA or GGA.
6 
7   Log of DFT.c:
8 
9      22/Nov/2001  Released by T.Ozaki
10 
11 ***********************************************************************/
12 
13 #include <stdio.h>
14 #include <stdlib.h>
15 #include <math.h>
16 #include <time.h>
17 #include "openmx_common.h"
18 #include "Inputtools.h"
19 #include "mpi.h"
20 
21 
22 #include "tran_prototypes.h"
23 
24 /*---------- added by TOYODA 15/FEB/2010 */
25 #include "exx_debug.h"
26 #include "exx_rhox.h"
27 /*---------- until here */
28 
29 
30 static void Output_Energies_Forces(FILE *fp);
31 static double dUele;
32 void Read_SCF_keywords();
33 
34 
DFT(int MD_iter,int Cnt_Now)35 double DFT(int MD_iter, int Cnt_Now)
36 {
37   static int firsttime=1;
38   double ECE[20],Eele0[2],Eele1[2];
39   double pUele,ChemP_e0[2];
40   double Norm1,Norm2,Norm3,Norm4,Norm5;
41   double TotalE;
42   double S_coordinate[3];
43   double tmp,tmp0;
44   int Cnt_kind,Calc_CntOrbital_ON,spin,spinmax,m;
45   int SCF_iter,SCF_iter_shift,SCF_MAX;
46   int i,j,k,fft_charge_flag,M1N;
47   int orbitalOpt_iter,LSCF_iter,OrbOpt_end;
48   int n,n2,wanA,ETemp_controller;
49   double time0,time1,time2,time3,time4;
50   double time5,time6,time7,time8,time9;
51   double time10,time11,time12,time13;
52   double time14,time15,etime;
53   double x,y,z;
54   int po3,po,TRAN_Poisson_flag2;
55   int  SucceedReadingDMfile,My_SucceedReadingDMfile;
56   char file_DFTSCF[YOUSO10] = ".DFTSCF";
57   char file_OrbOpt[YOUSO10] = ".OrbOpt";
58   char operate[200];
59   char *s_vec[20];
60   double TStime,TEtime;
61   FILE *fp_DFTSCF;
62   FILE *fp_OrbOpt;
63 
64   double ***Cluster_ReCoes;
65   double **Cluster_ko;
66 
67   double *ReVk;
68   double *ImVk;
69   double *ReRhoAtomk;
70   double *ImRhoAtomk;
71   double ***ReRhok;
72   double ***ImRhok;
73   double **Residual_ReRhok;
74   double **Residual_ImRhok;
75 
76   /* for Band_DFT_Col */
77 
78   int ii,ij,ik;
79   int T_knum,size_H1;
80   int *MP;
81   int *order_GA;
82   int *My_NZeros;
83   int *SP_NZeros;
84   int *SP_Atoms;
85   double *ko;
86   double *koS;
87   dcomplex **H_Band_Col;
88   dcomplex **S_Band;
89   dcomplex **C_Band_Col;
90   dcomplex *BLAS_S;
91   double *H1_Band_Col;
92   double *S1_Band_Col;
93   double *CDM1_Band_Col;
94   double *EDM1_Band_Col;
95 
96   int ***k_op;
97   int *T_k_op;
98   int **T_k_ID;
99   double *T_KGrids1,*T_KGrids2,*T_KGrids3;
100   double ***EIGEN_Band_Col;
101 
102   int numprocs0,myid0;
103   int numprocs1,myid1;
104 
105   int Num_Comm_World1;
106   int Num_Comm_World2;
107 
108   int myworld1;
109   int myworld2;
110 
111   int *NPROCS_ID1;
112   int *Comm_World1;
113   int *NPROCS_WD1;
114   int *Comm_World_StartID1;
115   MPI_Comm *MPI_CommWD1;
116 
117   int *NPROCS_ID2;
118   int *NPROCS_WD2;
119   int *Comm_World2;
120   int *Comm_World_StartID2;
121   MPI_Comm *MPI_CommWD2;
122 
123   /* for orbital optimization */
124   double ****His_CntCoes,****His_D_CntCoes;
125   double ****His_CntCoes_Species,****His_D_CntCoes_Species;
126   double **OrbOpt_Hessian,*His_OrbOpt_Etot;
127   int dim_H,al,p,wan;
128 
129   /* BLACS and PBLAS by ScaLAPACK */
130 
131 #ifdef scalapack
132   int numprocs2,myid2;
133   int ZERO=0, ONE=1;
134   int info;
135   dcomplex *Hs_Band_Col,*Ss_Band_Col,*Cs_Band_Col;
136   double *Hs_Cluster,*Ss_Cluster,*Cs_Cluster;
137   int nblk_m;
138 #endif
139 
140   dtime(&TStime);
141 
142   /* MPI */
143 
144   MPI_Comm_size(mpi_comm_level1,&numprocs0);
145   MPI_Comm_rank(mpi_comm_level1,&myid0);
146 
147   /*******************************************************
148    allocation of arrays for Cluster and Band methods
149   *******************************************************/
150 
151   /* band and collinear calculation */
152 
153   if (Solver==3 && SpinP_switch<=1){
154 
155     n = 0;
156     for (i=1; i<=atomnum; i++){
157       wanA = WhatSpecies[i];
158       n += Spe_Total_CNO[wanA];
159     }
160     n2 = n + 2;
161 
162     MP = (int*)malloc(sizeof(int)*List_YOUSO[1]);
163     order_GA = (int*)malloc(sizeof(int)*(List_YOUSO[1]+1));
164 
165     My_NZeros = (int*)malloc(sizeof(int)*numprocs0);
166     SP_NZeros = (int*)malloc(sizeof(int)*numprocs0);
167     SP_Atoms = (int*)malloc(sizeof(int)*numprocs0);
168 
169     ko = (double*)malloc(sizeof(double)*(n+1));
170     koS = (double*)malloc(sizeof(double)*(n+1));
171 
172     H_Band_Col = (dcomplex**)malloc(sizeof(dcomplex*)*(n+1));
173     for (j=0; j<n+1; j++){
174       H_Band_Col[j] = (dcomplex*)malloc(sizeof(dcomplex)*(n+1));
175     }
176 
177     /* BLACS and PBLAS by ADO */
178 
179 #ifndef scalapack
180     S_Band = (dcomplex**)malloc(sizeof(dcomplex*)*(n+1));
181     for (i=0; i<n+1; i++){
182       S_Band[i] = (dcomplex*)malloc(sizeof(dcomplex)*(n+1));
183     }
184 
185     C_Band_Col = (dcomplex**)malloc(sizeof(dcomplex*)*(n+1));
186     for (j=0; j<n+1; j++){
187       C_Band_Col[j] = (dcomplex*)malloc(sizeof(dcomplex)*(n+1));
188     }
189 
190     BLAS_S = (dcomplex*)malloc(sizeof(dcomplex)*n*n);
191 #endif
192 
193     /* find size_H1 */
194 
195     size_H1 = Get_OneD_HS_Col(0, H[0], &tmp, MP, order_GA, My_NZeros, SP_NZeros, SP_Atoms);
196 
197     H1_Band_Col   = (double*)malloc(sizeof(double)*size_H1);
198     S1_Band_Col   = (double*)malloc(sizeof(double)*size_H1);
199     CDM1_Band_Col = (double*)malloc(sizeof(double)*size_H1);
200     EDM1_Band_Col = (double*)malloc(sizeof(double)*size_H1);
201 
202     k_op = (int***)malloc(sizeof(int**)*Kspace_grid1);
203     for (i=0;i<Kspace_grid1; i++) {
204       k_op[i] = (int**)malloc(sizeof(int*)*Kspace_grid2);
205       for (j=0;j<Kspace_grid2; j++) {
206 	k_op[i][j] = (int*)malloc(sizeof(int)*Kspace_grid3);
207       }
208     }
209 
210     for (i=0; i<Kspace_grid1; i++) {
211       for (j=0; j<Kspace_grid2; j++) {
212 	for (k=0; k<Kspace_grid3; k++) {
213 	  k_op[i][j][k] = -999;
214 	}
215       }
216     }
217 
218     for (i=0; i<Kspace_grid1; i++) {
219       for (j=0; j<Kspace_grid2; j++) {
220 	for (k=0; k<Kspace_grid3; k++) {
221 
222 	  if ( k_op[i][j][k]==-999 ) {
223 	    k_inversion(i,j,k,Kspace_grid1,Kspace_grid2,Kspace_grid3,&ii,&ij,&ik);
224 	    if ( i==ii && j==ij && k==ik ) {
225 	      k_op[i][j][k]    = 1;
226 	    }
227 
228 	    else {
229 	      k_op[i][j][k]    = 2;
230 	      k_op[ii][ij][ik] = 0;
231 	    }
232 	  }
233 	} /* k */
234       } /* j */
235     } /* i */
236 
237     /* find T_knum */
238 
239     T_knum = 0;
240     for (i=0; i<Kspace_grid1; i++) {
241       for (j=0; j<Kspace_grid2; j++) {
242 	for (k=0; k<Kspace_grid3; k++) {
243 	  if (0<k_op[i][j][k]){
244 	    T_knum++;
245 	  }
246 	}
247       }
248     }
249 
250     /* Monkhorst-Pack k-points */
251     if (way_of_kpoint==2) T_knum = num_non_eq_kpt;
252 
253     T_KGrids1 = (double*)malloc(sizeof(double)*T_knum);
254     T_KGrids2 = (double*)malloc(sizeof(double)*T_knum);
255     T_KGrids3 = (double*)malloc(sizeof(double)*T_knum);
256     T_k_op    = (int*)malloc(sizeof(int)*T_knum);
257 
258     T_k_ID    = (int**)malloc(sizeof(int*)*2);
259     for (i=0; i<2; i++){
260       T_k_ID[i] = (int*)malloc(sizeof(int)*T_knum);
261     }
262 
263     EIGEN_Band_Col  = (double***)malloc(sizeof(double**)*List_YOUSO[23]);
264     for (i=0; i<List_YOUSO[23]; i++){
265       EIGEN_Band_Col[i] = (double**)malloc(sizeof(double*)*T_knum);
266       for (j=0; j<T_knum; j++){
267 	EIGEN_Band_Col[i][j] = (double*)malloc(sizeof(double)*(n+1));
268 	for (k=0; k<(n+1); k++) EIGEN_Band_Col[i][j][k] = 1.0e+5;
269       }
270     }
271 
272     /***********************************************
273       allocation of arrays for the first world
274       and
275       make the first level worlds
276     ***********************************************/
277 
278     Num_Comm_World1 = SpinP_switch + 1;
279 
280     NPROCS_ID1 = (int*)malloc(sizeof(int)*numprocs0);
281     Comm_World1 = (int*)malloc(sizeof(int)*numprocs0);
282     NPROCS_WD1 = (int*)malloc(sizeof(int)*Num_Comm_World1);
283     Comm_World_StartID1 = (int*)malloc(sizeof(int)*Num_Comm_World1);
284     MPI_CommWD1 = (MPI_Comm*)malloc(sizeof(MPI_Comm)*Num_Comm_World1);
285 
286     Make_Comm_Worlds(mpi_comm_level1, myid0, numprocs0, Num_Comm_World1, &myworld1, MPI_CommWD1,
287 		     NPROCS_ID1, Comm_World1, NPROCS_WD1, Comm_World_StartID1);
288 
289     MPI_Comm_size(MPI_CommWD1[myworld1],&numprocs1);
290     MPI_Comm_rank(MPI_CommWD1[myworld1],&myid1);
291 
292     /***********************************************
293         allocation of arrays for the second world
294         and
295         make the second level worlds
296     ***********************************************/
297 
298 #ifndef scalapack
299     if (T_knum<=numprocs1){
300 
301       Num_Comm_World2 = T_knum;
302 
303       NPROCS_ID2 = (int*)malloc(sizeof(int)*numprocs1);
304       Comm_World2 = (int*)malloc(sizeof(int)*numprocs1);
305       NPROCS_WD2 = (int*)malloc(sizeof(int)*Num_Comm_World2);
306       Comm_World_StartID2 = (int*)malloc(sizeof(int)*Num_Comm_World2);
307       MPI_CommWD2 = (MPI_Comm*)malloc(sizeof(MPI_Comm)*Num_Comm_World2);
308 
309       Make_Comm_Worlds(MPI_CommWD1[myworld1], myid1, numprocs1, Num_Comm_World2, &myworld2, MPI_CommWD2,
310 		       NPROCS_ID2, Comm_World2, NPROCS_WD2, Comm_World_StartID2);
311     }
312 #endif
313 
314 #ifdef scalapack
315 
316     if (T_knum<=numprocs1){
317 
318       Num_Comm_World2 = T_knum;
319 
320       NPROCS_ID2 = (int*)malloc(sizeof(int)*numprocs1);
321       Comm_World2 = (int*)malloc(sizeof(int)*numprocs1);
322       NPROCS_WD2 = (int*)malloc(sizeof(int)*Num_Comm_World2);
323       Comm_World_StartID2 = (int*)malloc(sizeof(int)*Num_Comm_World2);
324       MPI_CommWD2 = (MPI_Comm*)malloc(sizeof(MPI_Comm)*Num_Comm_World2);
325 
326       Make_Comm_Worlds(MPI_CommWD1[myworld1], myid1, numprocs1, Num_Comm_World2,
327 		       &myworld2, MPI_CommWD2, NPROCS_ID2, Comm_World2,
328 		       NPROCS_WD2, Comm_World_StartID2);
329 
330 
331       MPI_Comm_size(MPI_CommWD2[myworld2],&numprocs2);
332       MPI_Comm_rank(MPI_CommWD2[myworld2],&myid2);
333 
334       np_cols = (int)(sqrt((float)numprocs2));
335       do{
336 	if((numprocs2%np_cols)==0) break;
337 	np_cols--;
338       } while (np_cols>=2);
339       np_rows = numprocs2/np_cols;
340 
341       nblk_m = NBLK;
342       while((nblk_m*np_rows>n || nblk_m*np_cols>n) && (nblk_m > 1)){
343 	nblk_m /= 2;
344       }
345       if(nblk_m<1) nblk_m = 1;
346 
347       MPI_Allreduce(&nblk_m,&nblk,1,MPI_INT,MPI_MIN,mpi_comm_level1);
348 
349       my_prow = myid2/np_cols;
350       my_pcol = myid2%np_cols;
351 
352       na_rows = numroc_(&n, &nblk, &my_prow, &ZERO, &np_rows);
353       na_cols = numroc_(&n, &nblk, &my_pcol, &ZERO, &np_cols );
354 
355       bhandle2 = Csys2blacs_handle(MPI_CommWD2[myworld2]);
356       ictxt2 = bhandle2;
357       Cblacs_gridinit(&ictxt2, "Row", np_rows, np_cols);
358 
359       Ss_Band_Col = (dcomplex*)malloc(sizeof(dcomplex)*na_rows*na_cols);
360       Hs_Band_Col = (dcomplex*)malloc(sizeof(dcomplex)*na_rows*na_cols);
361 
362       MPI_Allreduce(&na_rows,&na_rows_max,1,MPI_INT,MPI_MAX,MPI_CommWD2[myworld2]);
363       MPI_Allreduce(&na_cols,&na_cols_max,1,MPI_INT,MPI_MAX,MPI_CommWD2[myworld2]);
364       Cs_Band_Col = (dcomplex*)malloc(sizeof(dcomplex)*na_rows_max*na_cols_max);
365 
366       descinit_(descS, &n,   &n,   &nblk,  &nblk,  &ZERO, &ZERO, &ictxt2, &na_rows,  &info);
367       descinit_(descH, &n,   &n,   &nblk,  &nblk,  &ZERO, &ZERO, &ictxt2, &na_rows,  &info);
368       descinit_(descC, &n,   &n,   &nblk,  &nblk,  &ZERO, &ZERO, &ictxt2, &na_rows,  &info);
369 
370       /*
371       printf("myid0=%d myid2=%d np2=%d my_pr=%d my_pc=%d na_r=%d na_c=%d np_r=%d np_c=%d nblk=%d T_k=%d ictxt2=%d n=%d\n",
372 	     myid0,myid2,numprocs2,my_prow,my_pcol,na_rows,na_cols,np_rows,np_cols,nblk,T_knum,ictxt2,n);
373       */
374     }
375 
376     else {
377 
378       Num_Comm_World2 = numprocs1;
379 
380       NPROCS_ID2 = (int*)malloc(sizeof(int)*numprocs1);
381       Comm_World2 = (int*)malloc(sizeof(int)*numprocs1);
382       NPROCS_WD2 = (int*)malloc(sizeof(int)*Num_Comm_World2);
383       Comm_World_StartID2 = (int*)malloc(sizeof(int)*Num_Comm_World2);
384       MPI_CommWD2 = (MPI_Comm*)malloc(sizeof(MPI_Comm)*Num_Comm_World2);
385 
386       Make_Comm_Worlds(MPI_CommWD1[myworld1], myid1, numprocs1, Num_Comm_World2,
387 		       &myworld2, MPI_CommWD2,
388 		       NPROCS_ID2, Comm_World2, NPROCS_WD2, Comm_World_StartID2);
389 
390 
391       MPI_Comm_size(MPI_CommWD2[myworld2],&numprocs2);
392       MPI_Comm_rank(MPI_CommWD2[myworld2],&myid2);
393 
394       np_cols = (int)(sqrt((float)numprocs2));
395       do{
396 	if((numprocs2%np_cols)==0) break;
397 	np_cols--;
398       } while (np_cols>=2);
399       np_rows = numprocs2/np_cols;
400 
401       nblk_m = NBLK;
402       while((nblk_m*np_rows>n || nblk_m*np_cols>n) && (nblk_m > 1)){
403 	nblk_m /= 2;
404       }
405       if(nblk_m<1) nblk_m = 1;
406 
407       MPI_Allreduce(&nblk_m,&nblk,1,MPI_INT,MPI_MIN,mpi_comm_level1);
408 
409       my_prow = myid2/np_cols;
410       my_pcol = myid2%np_cols;
411 
412       na_rows = numroc_(&n, &nblk, &my_prow, &ZERO, &np_rows);
413       na_cols = numroc_(&n, &nblk, &my_pcol, &ZERO, &np_cols );
414 
415       bhandle2 = Csys2blacs_handle(MPI_CommWD2[myworld2]);
416       ictxt2 = bhandle2;
417       Cblacs_gridinit(&ictxt2, "Row", np_rows, np_cols);
418 
419       Ss_Band_Col = (dcomplex*)malloc(sizeof(dcomplex)*na_rows*na_cols);
420       Hs_Band_Col = (dcomplex*)malloc(sizeof(dcomplex)*na_rows*na_cols);
421 
422       MPI_Allreduce(&na_rows,&na_rows_max,1,MPI_INT,MPI_MAX,MPI_CommWD2[myworld2]);
423       MPI_Allreduce(&na_cols,&na_cols_max,1,MPI_INT,MPI_MAX,MPI_CommWD2[myworld2]);
424       Cs_Band_Col = (dcomplex*)malloc(sizeof(dcomplex)*na_rows_max*na_cols_max);
425 
426       descinit_(descS, &n,   &n,   &nblk,  &nblk,  &ZERO, &ZERO, &ictxt2, &na_rows,  &info);
427       descinit_(descH, &n,   &n,   &nblk,  &nblk,  &ZERO, &ZERO, &ictxt2, &na_rows,  &info);
428       descinit_(descC, &n,   &n,   &nblk,  &nblk,  &ZERO, &ZERO, &ictxt2, &na_rows,  &info);
429 
430       /*
431       printf("myid0=%d myid2=%d np2=%d my_pr=%d my_pc=%d na_r=%d na_c=%d np_r=%d np_c=%d nblk=%d T_k=%d ictxt2=%d n=%d\n",
432 	     myid0,myid2,numprocs2,my_prow,my_pcol,na_rows,na_cols,np_rows,np_cols,nblk,T_knum,ictxt2,n);
433       */
434     }
435 
436 #endif
437 
438   } /* if (Solver==3 && SpinP_switch<=1) */
439 
440   /* band and non-collinear calculation */
441 
442   else if (Solver==3 && SpinP_switch==3){
443 
444     n = 0;
445     for (i=1; i<=atomnum; i++){
446       wanA = WhatSpecies[i];
447       n += Spe_Total_CNO[wanA];
448     }
449     n2 = n + 2;
450 
451     koS = (double*)malloc(sizeof(double)*(n+1));
452 
453     S_Band = (dcomplex**)malloc(sizeof(dcomplex*)*(n+1));
454     for (i=0; i<n+1; i++){
455       S_Band[i] = (dcomplex*)malloc(sizeof(dcomplex)*(n+1));
456     }
457   }
458 
459   /* revised by Y. Xiao for Noncollinear NEGF calculations*/
460   if (Solver==4 && SpinP_switch==3 ){
461 
462     n = 0;
463     for (i=1; i<=atomnum; i++){
464       wanA = WhatSpecies[i];
465       n += Spe_Total_CNO[wanA];
466     }
467     n2 = n + 2;
468 
469     koS = (double*)malloc(sizeof(double)*(n+1));
470 
471     S_Band = (dcomplex**)malloc(sizeof(dcomplex*)*(n+1));
472     for (i=0; i<n+1; i++){
473       S_Band[i] = (dcomplex*)malloc(sizeof(dcomplex)*(n+1));
474     }
475   }  /* until here by Y. Xiao for Noncollinear NEGF calculations*/
476 
477   /* cluster */
478 
479 #ifndef scalapack
480 
481   if (Solver==2){
482 
483     n = 0;
484     for (i=1; i<=atomnum; i++){
485       wanA  = WhatSpecies[i];
486       n += Spe_Total_CNO[wanA];
487     }
488     n2 = n + 2;
489 
490     if ( SpinP_switch==0 || SpinP_switch==1 ){
491 
492       Cluster_ReCoes = (double***)malloc(sizeof(double**)*List_YOUSO[23]);
493       for (i=0; i<List_YOUSO[23]; i++){
494 	Cluster_ReCoes[i] = (double**)malloc(sizeof(double*)*n2);
495 	for (j=0; j<n2; j++){
496 	  Cluster_ReCoes[i][j] = (double*)malloc(sizeof(double)*n2);
497 	}
498       }
499     }
500 
501     Cluster_ko = (double**)malloc(sizeof(double*)*List_YOUSO[23]);
502     for (i=0; i<List_YOUSO[23]; i++){
503       Cluster_ko[i] = (double*)malloc(sizeof(double)*n2);
504     }
505 
506   }
507 
508 #endif
509 
510 #ifdef scalapack
511 
512   if (Solver==2){
513 
514     n = 0;
515     for (i=1; i<=atomnum; i++){
516       wanA  = WhatSpecies[i];
517       n += Spe_Total_CNO[wanA];
518     }
519     n2 = n + 2;
520 
521     if ( SpinP_switch==0 || SpinP_switch==1 ){
522 
523       Cluster_ReCoes = (double***)malloc(sizeof(double**)*List_YOUSO[23]);
524       for (i=0; i<List_YOUSO[23]; i++){
525 	Cluster_ReCoes[i] = (double**)malloc(sizeof(double*)*n2);
526 	for (j=0; j<n2; j++){
527 	  Cluster_ReCoes[i][j] = (double*)malloc(sizeof(double)*n2);
528 	}
529       }
530     }
531 
532     Cluster_ko = (double**)malloc(sizeof(double*)*List_YOUSO[23]);
533     for (i=0; i<List_YOUSO[23]; i++){
534       Cluster_ko[i] = (double*)malloc(sizeof(double)*n2);
535     }
536 
537 
538     if ( SpinP_switch==0 || SpinP_switch==1 ){
539 
540       /***********************************************
541         allocation of arrays for the first world
542       ***********************************************/
543 
544       Num_Comm_World1 = SpinP_switch + 1;
545 
546       NPROCS_ID1 = (int*)malloc(sizeof(int)*numprocs0);
547       Comm_World1 = (int*)malloc(sizeof(int)*numprocs0);
548       NPROCS_WD1 = (int*)malloc(sizeof(int)*Num_Comm_World1);
549       Comm_World_StartID1 = (int*)malloc(sizeof(int)*Num_Comm_World1);
550       MPI_CommWD1 = (MPI_Comm*)malloc(sizeof(MPI_Comm)*Num_Comm_World1);
551 
552       /***********************************************
553              make the first level worlds
554       ***********************************************/
555 
556       Make_Comm_Worlds(mpi_comm_level1, myid0, numprocs0, Num_Comm_World1,
557 		       &myworld1, MPI_CommWD1, NPROCS_ID1, Comm_World1,
558 		       NPROCS_WD1, Comm_World_StartID1);
559 
560       /***********************************************
561          for pallalel calculations in myworld1
562       ***********************************************/
563 
564       MPI_Comm_size(MPI_CommWD1[myworld1],&numprocs1);
565       MPI_Comm_rank(MPI_CommWD1[myworld1],&myid1);
566 
567       np_cols = (int)(sqrt((float)numprocs1));
568       do{
569 	if((numprocs1%np_cols)==0) break;
570 	np_cols--;
571       } while (np_cols>=2);
572       np_rows = numprocs1/np_cols;
573 
574       nblk_m = NBLK;
575       while((nblk_m*np_rows>n || nblk_m*np_cols>n) && (nblk_m > 1)){
576 	nblk_m /= 2;
577       }
578       if(nblk_m<1) nblk_m = 1;
579 
580       MPI_Allreduce(&nblk_m,&nblk,1,MPI_INT,MPI_MIN,mpi_comm_level1);
581 
582       my_prow = myid1/np_cols;
583       my_pcol = myid1%np_cols;
584 
585       na_rows = numroc_(&n, &nblk, &my_prow, &ZERO, &np_rows);
586       na_cols = numroc_(&n, &nblk, &my_pcol, &ZERO, &np_cols );
587 
588       bhandle1 = Csys2blacs_handle(MPI_CommWD1[myworld1]);
589       ictxt1 = bhandle1;
590       Cblacs_gridinit(&ictxt1, "Row", np_rows, np_cols);
591 
592       Ss_Cluster = (double*)malloc(sizeof(double)*na_rows*na_cols);
593       Hs_Cluster = (double*)malloc(sizeof(double)*na_rows*na_cols);
594 
595       MPI_Allreduce(&na_rows,&na_rows_max,1,MPI_INT,MPI_MAX,MPI_CommWD1[myworld1]);
596       MPI_Allreduce(&na_cols,&na_cols_max,1,MPI_INT,MPI_MAX,MPI_CommWD1[myworld1]);
597       Cs_Cluster = (double*)malloc(sizeof(double)*na_rows_max*na_cols_max);
598 
599       descinit_(descS, &n,   &n,   &nblk,  &nblk,  &ZERO, &ZERO, &ictxt1, &na_rows,  &info);
600       descinit_(descH, &n,   &n,   &nblk,  &nblk,  &ZERO, &ZERO, &ictxt1, &na_rows,  &info);
601       descinit_(descC, &n,   &n,   &nblk,  &nblk,  &ZERO, &ZERO, &ictxt1, &na_rows,  &info);
602 
603       /*
604       printf("myid0=%d myid1=%d np1=%d my_pr=%d my_pc=%d na_r=%d na_c=%d np_r=%d np_c=%d nblk=%d ictxt1=%d, spin=%d myworld1=%d n=%d\n",
605   	      myid0,myid1,numprocs1,my_prow,my_pcol,na_rows,na_cols,np_rows,np_cols,nblk,ictxt1,SpinP_switch,myworld1,n);
606       */
607     }
608 
609   }
610 
611 #endif
612 
613   /***************************************
614    allocation of arrays for data on grid
615   ***************************************/
616 
617   ReVk = (double*)malloc(sizeof(double)*My_Max_NumGridB);
618   for (i=0; i<My_Max_NumGridB; i++) ReVk[i] = 0.0;
619 
620   ImVk = (double*)malloc(sizeof(double)*My_Max_NumGridB);
621   for (i=0; i<My_Max_NumGridB; i++) ImVk[i] = 0.0;
622 
623   if ( Mixing_switch==3 || Mixing_switch==4 ){
624 
625     if      (SpinP_switch==0)  spinmax = 1;
626     else if (SpinP_switch==1)  spinmax = 2;
627     else if (SpinP_switch==3)  spinmax = 3;
628 
629     ReRhoAtomk = (double*)malloc(sizeof(double)*My_Max_NumGridB);
630     for (i=0; i<My_Max_NumGridB; i++) ReRhoAtomk[i] = 0.0;
631 
632     ImRhoAtomk = (double*)malloc(sizeof(double)*My_Max_NumGridB);
633     for (i=0; i<My_Max_NumGridB; i++) ImRhoAtomk[i] = 0.0;
634 
635     ReRhok = (double***)malloc(sizeof(double**)*List_YOUSO[38]);
636     for (m=0; m<List_YOUSO[38]; m++){
637       ReRhok[m] = (double**)malloc(sizeof(double*)*spinmax);
638       for (spin=0; spin<spinmax; spin++){
639 	ReRhok[m][spin] = (double*)malloc(sizeof(double)*My_Max_NumGridB);
640 	for (i=0; i<My_Max_NumGridB; i++) ReRhok[m][spin][i] = 0.0;
641       }
642     }
643 
644     ImRhok = (double***)malloc(sizeof(double**)*List_YOUSO[38]);
645     for (m=0; m<List_YOUSO[38]; m++){
646       ImRhok[m] = (double**)malloc(sizeof(double*)*spinmax);
647       for (spin=0; spin<spinmax; spin++){
648 	ImRhok[m][spin] = (double*)malloc(sizeof(double)*My_Max_NumGridB);
649 	for (i=0; i<My_Max_NumGridB; i++) ImRhok[m][spin][i] = 0.0;
650       }
651     }
652 
653     Residual_ReRhok = (double**)malloc(sizeof(double*)*spinmax);
654     for (spin=0; spin<spinmax; spin++){
655       Residual_ReRhok[spin] = (double*)malloc(sizeof(double)*My_NumGridB_CB*List_YOUSO[38]);
656       for (i=0; i<My_NumGridB_CB*List_YOUSO[38]; i++) Residual_ReRhok[spin][i] = 0.0;
657     }
658 
659     Residual_ImRhok = (double**)malloc(sizeof(double*)*spinmax);
660     for (spin=0; spin<spinmax; spin++){
661       Residual_ImRhok[spin] = (double*)malloc(sizeof(double)*My_NumGridB_CB*List_YOUSO[38]);
662       for (i=0; i<My_NumGridB_CB*List_YOUSO[38]; i++) Residual_ImRhok[spin][i] = 0.0;
663     }
664   }
665 
666   /*************************************************
667       allocation of arrays for orbital optimization
668   *************************************************/
669 
670   if (Cnt_switch==1){
671 
672     His_CntCoes = (double****)malloc(sizeof(double***)*(orbitalOpt_History+1));
673     for (k=0; k<(orbitalOpt_History+1); k++){
674       His_CntCoes[k] = (double***)malloc(sizeof(double**)*(Matomnum+1));
675       for (i=0; i<=Matomnum; i++){
676 	His_CntCoes[k][i] = (double**)malloc(sizeof(double*)*List_YOUSO[7]);
677 	for (j=0; j<List_YOUSO[7]; j++){
678 	  His_CntCoes[k][i][j] = (double*)malloc(sizeof(double)*List_YOUSO[24]);
679 	}
680       }
681     }
682 
683     His_D_CntCoes = (double****)malloc(sizeof(double***)*(orbitalOpt_History+1));
684     for (k=0; k<(orbitalOpt_History+1); k++){
685       His_D_CntCoes[k] = (double***)malloc(sizeof(double**)*(Matomnum+1));
686       for (i=0; i<=Matomnum; i++){
687 	His_D_CntCoes[k][i] = (double**)malloc(sizeof(double*)*List_YOUSO[7]);
688 	for (j=0; j<List_YOUSO[7]; j++){
689 	  His_D_CntCoes[k][i][j] = (double*)malloc(sizeof(double)*List_YOUSO[24]);
690 	}
691       }
692     }
693 
694     His_CntCoes_Species = (double****)malloc(sizeof(double***)*(orbitalOpt_History+1));
695     for (k=0; k<(orbitalOpt_History+1); k++){
696       His_CntCoes_Species[k] = (double***)malloc(sizeof(double**)*(SpeciesNum+1));
697       for (i=0; i<=SpeciesNum; i++){
698 	His_CntCoes_Species[k][i] = (double**)malloc(sizeof(double*)*List_YOUSO[7]);
699 	for (j=0; j<List_YOUSO[7]; j++){
700 	  His_CntCoes_Species[k][i][j] = (double*)malloc(sizeof(double)*List_YOUSO[24]);
701 	}
702       }
703     }
704 
705     His_D_CntCoes_Species = (double****)malloc(sizeof(double***)*(orbitalOpt_History+1));
706     for (k=0; k<(orbitalOpt_History+1); k++){
707       His_D_CntCoes_Species[k] = (double***)malloc(sizeof(double**)*(SpeciesNum+1));
708       for (i=0; i<=SpeciesNum; i++){
709 	His_D_CntCoes_Species[k][i] = (double**)malloc(sizeof(double*)*List_YOUSO[7]);
710 	for (j=0; j<List_YOUSO[7]; j++){
711 	  His_D_CntCoes_Species[k][i][j] = (double*)malloc(sizeof(double)*List_YOUSO[24]);
712 	}
713       }
714     }
715 
716     dim_H = 0;
717     for (wan=0; wan<SpeciesNum; wan++){
718       for (al=0; al<Spe_Total_CNO[wan]; al++){
719 	for (p=0; p<Spe_Specified_Num[wan][al]; p++){
720 	  dim_H++;
721 	}
722       }
723     }
724 
725     OrbOpt_Hessian = (double**)malloc(sizeof(double*)*(dim_H+2));
726     for (i=0; i<(dim_H+2); i++){
727       OrbOpt_Hessian[i] = (double*)malloc(sizeof(double)*(dim_H+2));
728     }
729 
730     His_OrbOpt_Etot = (double*)malloc(sizeof(double)*(orbitalOpt_History+2));
731   }
732 
733   /* PrintMemory */
734   if (firsttime) {
735 
736     PrintMemory("DFT: ReVk",sizeof(double)*My_Max_NumGridB,NULL);
737     PrintMemory("DFT: ImVk",sizeof(double)*My_Max_NumGridB,NULL);
738 
739     if (Mixing_switch==3 || Mixing_switch==4){
740       PrintMemory("DFT: ReRhoAtomk",sizeof(double)*My_Max_NumGridB,NULL);
741       PrintMemory("DFT: ImRhoAtomk",sizeof(double)*My_Max_NumGridB,NULL);
742       PrintMemory("DFT: ReRhok",sizeof(double)*My_Max_NumGridB*List_YOUSO[38]*spinmax,NULL);
743       PrintMemory("DFT: ImRhok",sizeof(double)*My_Max_NumGridB*List_YOUSO[38]*spinmax,NULL);
744       PrintMemory("DFT: Residual_ReRhok", sizeof(double)*My_NumGridB_CB*List_YOUSO[38]*spinmax,NULL);
745       PrintMemory("DFT: Residual_ImRhok", sizeof(double)*My_NumGridB_CB*List_YOUSO[38]*spinmax,NULL);
746     }
747 
748     if (Cnt_switch==1){
749 
750       PrintMemory("DFT: His_CntCoes",
751 		  sizeof(double)*(orbitalOpt_History+1)*(Matomnum+1)*List_YOUSO[7]*List_YOUSO[24],NULL);
752       PrintMemory("DFT: His_D_CntCoes",
753 		  sizeof(double)*(orbitalOpt_History+1)*(Matomnum+1)*List_YOUSO[7]*List_YOUSO[24],NULL);
754       PrintMemory("DFT: His_CntCoes_Species",
755 		  sizeof(double)*(orbitalOpt_History+1)*(SpeciesNum+1)*List_YOUSO[7]*List_YOUSO[24],NULL);
756       PrintMemory("DFT: His_D_CntCoes_Species",
757 		  sizeof(double)*(orbitalOpt_History+1)*(SpeciesNum+1)*List_YOUSO[7]*List_YOUSO[24],NULL);
758     }
759 
760     /* turn off firsttime flag */
761     firsttime=0;
762   }
763 
764   /****************************************************
765     print some informations to the standard output
766     and initialize times
767   ****************************************************/
768 
769   if (Cnt_switch==1 && Cnt_Now==1 && MYID_MPI_COMM_WORLD==Host_ID && 0<level_stdout){
770     printf("\n*******************************************************\n");      fflush(stdout);
771     printf("             Orbital optimization                      \n");        fflush(stdout);
772     printf("             SCF calculation at MD =%2d                \n",MD_iter);fflush(stdout);
773     printf("*******************************************************\n\n");      fflush(stdout);
774   }
775   else if (MYID_MPI_COMM_WORLD==Host_ID  && 0<level_stdout){
776     printf("\n*******************************************************\n");      fflush(stdout);
777     printf("             SCF calculation at MD =%2d                \n",MD_iter);fflush(stdout);
778     printf("*******************************************************\n\n");      fflush(stdout);
779   }
780 
781   fnjoint(filepath,filename,file_DFTSCF);
782   fnjoint(filepath,filename,file_OrbOpt);
783 
784   /* initialize */
785 
786   time3  = 0.0;
787   time4  = 0.0;
788   time5  = 0.0;
789   time6  = 0.0;
790   time7  = 0.0;
791   time8  = 0.0;
792   time10 = 0.0;
793   time11 = 0.0;
794   time12 = 0.0;
795   time13 = 0.0;
796   time14 = 0.0;
797   time15 = 0.0;
798 
799   for (i=0; i<20; i++) ECE[i] = 0.0;
800 
801   fft_charge_flag = 0;
802   TRAN_Poisson_flag2 = 0;
803 
804   /****************************************************
805          Calculations of overlap and Hamiltonian
806          matrices for DFTSCF
807   ****************************************************/
808 
809   if (MYID_MPI_COMM_WORLD==Host_ID  && 0<level_stdout){
810     printf("<MD=%2d>  Calculation of the overlap matrix\n",MD_iter);fflush(stdout);
811   }
812 
813   time1 = Set_OLP_Kin(OLP,H0);
814 
815   if (MYID_MPI_COMM_WORLD==Host_ID && 0<level_stdout){
816     printf("<MD=%2d>  Calculation of the nonlocal matrix\n",MD_iter);fflush(stdout);
817   }
818 
819   time2 = Set_Nonlocal(HNL,DS_NL);
820 
821   if (ProExpn_VNA==1){
822     if (MYID_MPI_COMM_WORLD==Host_ID && 0<level_stdout){
823       printf("<MD=%2d>  Calculation of the VNA projector matrix\n",MD_iter);fflush(stdout);
824     }
825     time12 = Set_ProExpn_VNA(HVNA, HVNA2, DS_VNA);
826   }
827 
828   /*---------- added by TOYODA 06/Jan/2010 */
829   g_exx_switch = 0;
830   if (5==XC_switch) {
831     g_exx_switch = 1;
832 
833     EXX_on_OpenMX_Init(mpi_comm_level1);
834     EXX_Initial_DM(g_exx, g_exx_DM[0]);
835   }
836   /*---------- until here */
837 
838   /* SCF loop */
839 
840   if (Cnt_switch==1 && Cnt_Now==1){
841     SCF_MAX = orbitalOpt_SCF*(orbitalOpt_MD+1)-1;
842     Oopt_NormD[1] = 1.0e+5;
843   }
844   else{
845     SCF_MAX = DFTSCF_loop;
846   }
847 
848   orbitalOpt_iter = 1;
849   OrbOpt_end = 0;
850   SCF_iter = 0;
851   LSCF_iter = 0;
852   ETemp_controller = 0;
853   SCF_iter_shift = 0;
854   NormRD[0] = 100.0;
855 
856   SCF_RENZOKU = -1;
857   po = 0;
858   pUele  = 100.0;
859   Norm1 = 100.0;
860   Norm2 = 100.0;
861   Norm3 = 100.0;
862   Norm4 = 100.0;
863 
864   /*****************************************************
865               read contraction coefficients
866   *****************************************************/
867 
868   if ( Cnt_switch==1 && MD_iter!=1 ) File_CntCoes("read");
869 
870   /****************************************************
871             Start self consistent calculation
872   ****************************************************/
873 
874   do {
875 
876     SCF_iter++;
877     LSCF_iter++;
878 
879     /*****************************************************
880                          print stdout
881     *****************************************************/
882 
883     if (MYID_MPI_COMM_WORLD==Host_ID && 0<level_stdout){
884       if (Cnt_switch==1 && Cnt_Now==1){
885         printf("\n***************** Orbital optimization **************\n");fflush(stdout);
886         printf("    MD=%2d  orbitalOpt_iter=%2d  G-SCF=%2d  L-SCF=%2d  \n",
887                MD_iter,orbitalOpt_iter,SCF_iter,LSCF_iter);fflush(stdout);
888       }
889       else{
890         printf("\n******************* MD=%2d  SCF=%2d *******************\n",
891                MD_iter,SCF_iter);fflush(stdout);
892       }
893       fflush(stdout);
894     }
895 
896     /*****************************************************
897      setting of densities, potentials and KS Hamiltonian
898     *****************************************************/
899     /****************************************************
900      if (SCF==1)
901     ****************************************************/
902 
903     if (SCF_iter==1){
904 
905       if (MD_iter!=1) Mixing_weight = Max_Mixing_weight;
906 
907       if (Cnt_switch==0 || (Cnt_switch==1 && Cnt_Now==1)) Cnt_kind = 0;
908       else if (Cnt_switch==1)                             Cnt_kind = 1;
909       else                                                Cnt_kind = 0;
910 
911       time9 = Set_Aden_Grid();
912 
913       if (Solver==4) {
914 
915         /*****************************************************
916                           set grid for NEGF
917 	*****************************************************/
918 
919 	TRAN_Set_Electrode_Grid(mpi_comm_level1,
920                                 &TRAN_Poisson_flag2,
921 				Grid_Origin, tv, Left_tv, Right_tv, gtv,
922 				Ngrid1, Ngrid2, Ngrid3);
923 
924 	/* revised by Y. Xiao for Noncollinear NEGF calculations */
925 	if (SpinP_switch<2) {
926 	  TRAN_Allocate_Lead_Region(mpi_comm_level1);
927 	  TRAN_Allocate_Cregion(mpi_comm_level1, SpinP_switch,atomnum,WhatSpecies,Spe_Total_CNO);
928 	}
929 	else {
930 	  TRAN_Allocate_Lead_Region_NC(mpi_comm_level1);
931 	  TRAN_Allocate_Cregion_NC(mpi_comm_level1, SpinP_switch,atomnum,WhatSpecies,Spe_Total_CNO);
932 	} /* until here by Y. Xiao for Noncollinear NEGF calculations*/
933 
934         /*****************************************************
935                          add density from Leads
936  	*****************************************************/
937 
938 	TRAN_Add_Density_Lead(mpi_comm_level1,
939 			      SpinP_switch, Ngrid1, Ngrid2, Ngrid3,
940 			      My_NumGridB_AB, Density_Grid_B);
941 
942 	TRAN_Add_ADensity_Lead(mpi_comm_level1,
943  	 		       SpinP_switch, Ngrid1, Ngrid2, Ngrid3,
944 			       My_NumGridB_AB, ADensity_Grid_B);
945       }
946 
947       time10 += Set_Orbitals_Grid(Cnt_kind);
948 
949       /******************************************************
950                         read restart files
951       ******************************************************/
952 
953       SucceedReadingDMfile = 0;
954       if (Scf_RestartFromFile==1 && ((Cnt_switch==1 && Cnt_Now!=1) || Cnt_switch==0 ) ) {
955 
956 	My_SucceedReadingDMfile = RestartFileDFT("read",MD_iter,&Uele,H,CntH,&etime);
957         time13 += etime;
958         MPI_Barrier(mpi_comm_level1);
959 	MPI_Allreduce(&My_SucceedReadingDMfile, &SucceedReadingDMfile,
960 		      1, MPI_INT, MPI_PROD, mpi_comm_level1);
961 
962         if (MYID_MPI_COMM_WORLD==Host_ID && 0<level_stdout){
963           if (SucceedReadingDMfile==1){
964             printf("<Restart>  Found restart files\n");          fflush(stdout);
965 	  }
966           else{
967             printf("<Restart>  Could not find restart files\n"); fflush(stdout);
968 	  }
969 	}
970 
971         /***********************************************************************
972          If reading the restart files is terminated, the densities on grid are
973          partially overwritten. So, Set_Aden_Grid() is called once more.
974         ***********************************************************************/
975 
976         if (SucceedReadingDMfile==0){
977 
978           time9 += Set_Aden_Grid();
979 
980           if (Solver==4) {
981 	    TRAN_Add_ADensity_Lead(mpi_comm_level1,
982 				   SpinP_switch, Ngrid1, Ngrid2, Ngrid3,
983  		  	           My_NumGridB_AB, ADensity_Grid_B);
984 	  }
985         }
986       }
987 
988       /*****************************************************
989        FFT of the initial density for k-space charge mixing
990       *****************************************************/
991 
992       if ( (Mixing_switch==3 || Mixing_switch==4) && SCF_iter==1) {
993 
994         /* non-spin polarization */
995 	if (SpinP_switch==0){
996           if (Solver!=4 || TRAN_Poisson_flag2==2){
997             time15 += FFT_Density(1,ReRhok[1][0],ImRhok[1][0]);
998 	  }
999           else{
1000             time15 += FFT2D_Density(1,ReRhok[1][0],ImRhok[1][0]);
1001 	  }
1002 	}
1003 
1004 	/* collinear spin polarization */
1005 	else if (SpinP_switch==1) {
1006 	  if (Solver!=4 || TRAN_Poisson_flag2==2){
1007             time15 += FFT_Density(1,ReRhok[1][0],ImRhok[1][0]);
1008             time15 += FFT_Density(2,ReRhok[1][1],ImRhok[1][1]);
1009 	  }
1010           else{
1011             time15 += FFT2D_Density(1,ReRhok[1][0],ImRhok[1][0]);
1012             time15 += FFT2D_Density(2,ReRhok[1][1],ImRhok[1][1]);
1013           }
1014 	}
1015 
1016 	/* non-collinear spin polarization */
1017 	else if (SpinP_switch==3) {
1018           if (Solver!=4 || TRAN_Poisson_flag2==2){
1019   	    time15 += FFT_Density(1,ReRhok[1][0],ImRhok[1][0]);
1020 	    time15 += FFT_Density(2,ReRhok[1][1],ImRhok[1][1]);
1021 	    time15 += FFT_Density(4,ReRhok[1][2],ImRhok[1][2]);
1022 	  }
1023           else{
1024   	    time15 += FFT2D_Density(1,ReRhok[1][0],ImRhok[1][0]);
1025 	    time15 += FFT2D_Density(2,ReRhok[1][1],ImRhok[1][1]);
1026 	    time15 += FFT2D_Density(4,ReRhok[1][2],ImRhok[1][2]);
1027           }
1028 	}
1029       }
1030 
1031       if (SpinP_switch==3) diagonalize_nc_density();
1032 
1033       /* In case the restart file is found */
1034 
1035       if (SucceedReadingDMfile && Cnt_switch==0) {
1036 
1037 	if (Solver!=4 && ESM_switch==0)      time4 += Poisson(1,ReVk,ImVk);
1038         else if (Solver!=4 && ESM_switch!=0) time4 += Poisson_ESM(1,ReVk,ImVk); /* added by Ohwaki */
1039         else                                 time4 += TRAN_Poisson(ReVk,ImVk);
1040 
1041 	/*
1042 	  Although the matrix of Hamiltonian is read from files, it would be better to
1043 	  reconstruct the matrix using charge density read from files in order to avoid
1044 	  abrupt change of NormRD at the second SCF step.
1045 	  Also, if (Correct_Position_flag), then the reconstruction has to be done
1046 	  regardless of the above reason.
1047 	*/
1048 
1049         if (Correct_Position_flag ||
1050               (SO_switch==0 && Hub_U_switch==0 && Constraint_NCS_switch==0
1051                && Zeeman_NCS_switch==0 && Zeeman_NCO_switch==0) /* non-spin-orbit coupling and non-LDA+U */
1052 	    ){
1053 
1054   	  time3 += Set_Hamiltonian("nostdout",SCF_iter,SucceedReadingDMfile,Cnt_kind,H0,HNL,DM[0],H);
1055 	}
1056 
1057       }
1058 
1059       /* failure of reading restart files */
1060       else{
1061 
1062 	if (Solver==4) time4 += TRAN_Poisson(ReVk,ImVk);
1063 	time3 += Set_Hamiltonian("nostdout",SCF_iter,SucceedReadingDMfile,Cnt_kind,H0,HNL,DM[0],H);
1064       }
1065 
1066       if (Cnt_switch==1 && Cnt_Now==1){
1067         if (MD_iter==1) Initial_CntCoes2(H,OLP);
1068 
1069 	Contract_Hamiltonian(H,CntH,OLP,CntOLP);
1070         if (SO_switch==1) Contract_iHNL(iHNL,iCntHNL);
1071       }
1072 
1073       /* switch a restart flag on for proceeding MD steps */
1074       if (MD_switch!=0 && Scf_RestartFromFile!=-1) Scf_RestartFromFile = 1;
1075 
1076     } /* end of if (SCF_iter==1) */
1077 
1078     /****************************************************
1079      if (SCF!=1)
1080     ****************************************************/
1081 
1082     else {
1083 
1084       if (Cnt_switch==0 || (Cnt_switch==1 && Cnt_Now==1)) Cnt_kind = 0;
1085       else if (Cnt_switch==1)                             Cnt_kind = 1;
1086       else                                                Cnt_kind = 0;
1087 
1088       if (Solver!=4 && ESM_switch==0)       time4 += Poisson(fft_charge_flag,ReVk,ImVk);
1089       else if (Solver!=4 && ESM_switch!=0)  time4 += Poisson_ESM(fft_charge_flag,ReVk,ImVk); /* added by Ohwaki */
1090       else                                  time4 += TRAN_Poisson(ReVk,ImVk);
1091 
1092       /* construct matrix elements for LDA+U or Zeeman term, added by MJ */
1093       Eff_Hub_Pot(SCF_iter, OLP[0]) ;
1094 
1095       time3  += Set_Hamiltonian("stdout",SCF_iter,SucceedReadingDMfile,Cnt_kind,H0,HNL,DM[0],H);
1096 
1097       if (Cnt_switch==1 && Cnt_Now==1){
1098 	Contract_Hamiltonian(H,CntH,OLP,CntOLP);
1099 	if (SO_switch==1) Contract_iHNL(iHNL,iCntHNL);
1100       }
1101 
1102     }
1103 
1104     /****************************************************
1105      In case of RMM-DIISH (Mixing_switch==5), the mixing
1106      of Hamiltonian matrix elements is performed before
1107      the eigenvalue problem.
1108     ****************************************************/
1109 
1110     if ( Mixing_switch==5
1111        || ( Cnt_switch!=1 && SucceedReadingDMfile!=1
1112        && (Mixing_switch==1 || Mixing_switch==6)
1113        && (LSCF_iter-SCF_iter_shift)<=(Pulay_SCF/2))
1114        && Solver!=4 ){
1115 
1116       time6  += Mixing_H(MD_iter, LSCF_iter-SCF_iter_shift, SCF_iter-SCF_iter_shift);
1117     }
1118 
1119     /****************************************************
1120                 Solve the eigenvalue problem
1121     ****************************************************/
1122 
1123     s_vec[0]="Recursion";     s_vec[1]="Cluster"; s_vec[2]="Band";
1124     s_vec[3]="NEGF";          s_vec[4]="DC";      s_vec[5]="GDC";
1125     s_vec[6]="Cluster-DIIS";  s_vec[7]="Krylov";  s_vec[8]="Cluster2";
1126     s_vec[9]="EC";
1127 
1128     if (MYID_MPI_COMM_WORLD==Host_ID && 0<level_stdout){
1129       printf("<%s>  Solving the eigenvalue problem...\n",s_vec[Solver-1]);fflush(stdout);
1130     }
1131 
1132     if (Cnt_switch==0){
1133 
1134       switch (Solver) {
1135 
1136       case 1:
1137 	/* not supported */
1138 	break;
1139 
1140       case 2:
1141 
1142 #ifndef scalapack
1143 
1144         /*---------- modified by TOYODA 08/JAN/2010 */
1145 
1146         if (g_exx_switch) {
1147           time5 += Cluster_DFT("scf",LSCF_iter,SpinP_switch,
1148 			       Cluster_ReCoes,Cluster_ko, H,iHNL,OLP[0],DM[0],EDM,
1149 			       g_exx,g_exx_DM[0],g_exx_U,Eele0,Eele1);
1150         } else {
1151 
1152 	  time5 += Cluster_DFT("scf",LSCF_iter,SpinP_switch,
1153 			       Cluster_ReCoes,Cluster_ko, H,iHNL,OLP[0],DM[0],EDM,
1154 			       NULL,NULL,NULL,Eele0,Eele1);
1155         }
1156         /*---------- until here */
1157 
1158 #endif
1159 
1160 #ifdef scalapack
1161 
1162         /*---------- modified by TOYODA 08/JAN/2010 */
1163         if (g_exx_switch) {
1164 
1165           time5 += Cluster_DFT_ScaLAPACK("scf",LSCF_iter,SpinP_switch,
1166 			       Cluster_ReCoes,Cluster_ko, H,iHNL,OLP[0],DM[0],EDM,
1167 			       g_exx,g_exx_DM[0],g_exx_U,Eele0,Eele1,
1168 			       myworld1,NPROCS_ID1,Comm_World1,NPROCS_WD1,
1169 			       Comm_World_StartID1,MPI_CommWD1,
1170 			       Ss_Cluster,Cs_Cluster,Hs_Cluster);
1171 
1172         } else {
1173 
1174 	  time5 += Cluster_DFT_ScaLAPACK("scf",LSCF_iter,SpinP_switch,
1175 			       Cluster_ReCoes,Cluster_ko, H,iHNL,OLP[0],DM[0],EDM,
1176 			       NULL,NULL,NULL,Eele0,Eele1,
1177 			       myworld1,NPROCS_ID1,Comm_World1,NPROCS_WD1,
1178 			       Comm_World_StartID1,MPI_CommWD1,
1179 			       Ss_Cluster,Cs_Cluster,Hs_Cluster);
1180 
1181         }
1182         /*---------- until here */
1183 
1184 #endif
1185 
1186 	break;
1187 
1188       case 3:
1189 
1190 	if (SpinP_switch<=1)
1191 
1192 #ifndef scalapack
1193 
1194           /*---------- modified by TOYODA 15/FEB/2010 */
1195           if (5==XC_switch) {
1196             time5 += Band_DFT_Col(LSCF_iter,
1197                                   Kspace_grid1,Kspace_grid2,Kspace_grid3,
1198 	    			  SpinP_switch,H,iHNL,OLP[0],DM[0],EDM,Eele0,Eele1,
1199                                   MP,order_GA,ko,koS,EIGEN_Band_Col,
1200                                   H1_Band_Col,S1_Band_Col,
1201                                   CDM1_Band_Col,EDM1_Band_Col,
1202                                   H_Band_Col,S_Band,C_Band_Col,
1203                                   BLAS_S,
1204                                   k_op,T_k_op,T_k_ID,
1205                                   T_KGrids1,T_KGrids2,T_KGrids3,
1206 				  myworld1,
1207 				  NPROCS_ID1,
1208 				  Comm_World1,
1209 				  NPROCS_WD1,
1210 				  Comm_World_StartID1,
1211 				  MPI_CommWD1,
1212 				  myworld2,
1213 				  NPROCS_ID2,
1214  			          NPROCS_WD2,
1215  			          Comm_World2,
1216 				  Comm_World_StartID2,
1217 				  MPI_CommWD2,
1218                                   g_exx,
1219                                   g_exx_DM[0],
1220                                   g_exx_U
1221                                   );
1222           } else {
1223 
1224             time5 += Band_DFT_Col(LSCF_iter,
1225                                   Kspace_grid1,Kspace_grid2,Kspace_grid3,
1226 	    			  SpinP_switch,H,iHNL,OLP[0],DM[0],EDM,Eele0,Eele1,
1227                                   MP,order_GA,ko,koS,EIGEN_Band_Col,
1228                                   H1_Band_Col,S1_Band_Col,
1229                                   CDM1_Band_Col,EDM1_Band_Col,
1230                                   H_Band_Col,S_Band,C_Band_Col,
1231                                   BLAS_S,
1232                                   k_op,T_k_op,T_k_ID,
1233                                   T_KGrids1,T_KGrids2,T_KGrids3,
1234 				  myworld1,
1235 				  NPROCS_ID1,
1236 				  Comm_World1,
1237 				  NPROCS_WD1,
1238 				  Comm_World_StartID1,
1239 				  MPI_CommWD1,
1240 				  myworld2,
1241 				  NPROCS_ID2,
1242 				  NPROCS_WD2,
1243 				  Comm_World2,
1244 				  Comm_World_StartID2,
1245 				  MPI_CommWD2,
1246                                   NULL, NULL, NULL);
1247         }
1248 	/*---------- until here */
1249 
1250 #endif
1251 
1252 #ifdef scalapack
1253 
1254           /*---------- modified by TOYODA 15/FEB/2010 */
1255           if (5==XC_switch) {
1256             time5 += Band_DFT_Col_ScaLAPACK(LSCF_iter,
1257                                   Kspace_grid1,Kspace_grid2,Kspace_grid3,
1258 	    			  SpinP_switch,H,iHNL,OLP[0],DM[0],EDM,Eele0,Eele1,
1259                                   MP,order_GA,ko,koS,EIGEN_Band_Col,
1260                                   H1_Band_Col,S1_Band_Col,
1261                                   CDM1_Band_Col,EDM1_Band_Col,
1262                                   H_Band_Col,Ss_Band_Col,Cs_Band_Col,
1263                                   Hs_Band_Col,
1264                                   k_op,T_k_op,T_k_ID,
1265                                   T_KGrids1,T_KGrids2,T_KGrids3,
1266 				  myworld1,
1267 				  NPROCS_ID1,
1268 				  Comm_World1,
1269 				  NPROCS_WD1,
1270 				  Comm_World_StartID1,
1271 				  MPI_CommWD1,
1272 				  myworld2,
1273 				  NPROCS_ID2,
1274  			          NPROCS_WD2,
1275  			          Comm_World2,
1276 				  Comm_World_StartID2,
1277 				  MPI_CommWD2,
1278                                   g_exx,
1279                                   g_exx_DM[0],
1280                                   g_exx_U
1281                                   );
1282           } else {
1283 
1284             time5 += Band_DFT_Col_ScaLAPACK(LSCF_iter,
1285                                   Kspace_grid1,Kspace_grid2,Kspace_grid3,
1286 	    			  SpinP_switch,H,iHNL,OLP[0],DM[0],EDM,Eele0,Eele1,
1287                                   MP,order_GA,ko,koS,EIGEN_Band_Col,
1288                                   H1_Band_Col,S1_Band_Col,
1289                                   CDM1_Band_Col,EDM1_Band_Col,
1290                                   H_Band_Col,Ss_Band_Col,Cs_Band_Col,
1291                                   Hs_Band_Col,
1292                                   k_op,T_k_op,T_k_ID,
1293                                   T_KGrids1,T_KGrids2,T_KGrids3,
1294 				  myworld1,
1295 				  NPROCS_ID1,
1296 				  Comm_World1,
1297 				  NPROCS_WD1,
1298 				  Comm_World_StartID1,
1299 				  MPI_CommWD1,
1300 				  myworld2,
1301 				  NPROCS_ID2,
1302 				  NPROCS_WD2,
1303 				  Comm_World2,
1304 				  Comm_World_StartID2,
1305 				  MPI_CommWD2,
1306                                   NULL, NULL, NULL);
1307           }
1308 	/*---------- until here */
1309 
1310 #endif
1311 
1312 	else
1313 	  time5 += Band_DFT_NonCol(LSCF_iter,
1314 				   koS,S_Band,
1315 				   Kspace_grid1,Kspace_grid2,Kspace_grid3,
1316 				   SpinP_switch,H,iHNL,OLP[0],DM[0],EDM,Eele0,Eele1);
1317 	break;
1318 
1319       case 4:
1320 	/* revised by Y. Xiao for Noncollinear NEGF calculations*/
1321 	if (SpinP_switch<2) {
1322 	  time5 += TRAN_DFT(mpi_comm_level1, SucceedReadingDMfile,
1323 			    level_stdout, LSCF_iter,SpinP_switch,H,iHNL,OLP[0],
1324 			    atomnum,Matomnum,WhatSpecies, Spe_Total_CNO, FNAN, natn,ncn,
1325 			    M2G, G2ID, F_G2M, atv_ijk, List_YOUSO,
1326 			    DM[0],EDM,TRAN_DecMulP,Eele0,Eele1,ChemP_e0);
1327 	}
1328 	else {
1329 	  time5 += TRAN_DFT_NC(mpi_comm_level1, SucceedReadingDMfile,
1330 			       level_stdout, LSCF_iter,SpinP_switch,H,iHNL,OLP[0],
1331 			       atomnum,Matomnum,WhatSpecies, Spe_Total_CNO, FNAN, natn,ncn,
1332 			       M2G, G2ID, F_G2M, atv_ijk, List_YOUSO,koS,S_Band,
1333 			       DM[0],iDM[0],EDM,TRAN_DecMulP,Eele0,Eele1,ChemP_e0);
1334 	} /* until here by Y. Xiao for Noncollinear NEGF calculations*/
1335 
1336 	break;
1337 
1338       case 5:
1339 	time5 += Divide_Conquer("scf",LSCF_iter,H,iHNL,OLP[0],DM[0],EDM,Eele0,Eele1);
1340 	break;
1341 
1342       case 6:
1343 	/* not supported */
1344 	break;
1345 
1346       case 7:
1347 	break;
1348 
1349       case 8:
1350 	time5 += Krylov("scf",LSCF_iter,H,iHNL,OLP[0],DM[0],EDM,Eele0,Eele1);
1351 	break;
1352 
1353       case 9:
1354 	time5 += Cluster_DFT_ON2("scf",LSCF_iter,SpinP_switch,Cluster_ReCoes,Cluster_ko,
1355 				 H,iHNL,OLP[0],DM[0],EDM,Eele0,Eele1);
1356 	break;
1357 
1358       case 10:
1359 	time5 += EC("scf",LSCF_iter,H,iHNL,OLP[0],DM[0],EDM,Eele0,Eele1);
1360 	break;
1361 
1362       }
1363 
1364     }
1365     else{
1366 
1367       switch (Solver) {
1368 
1369       case 1:
1370 	/* not supported */
1371 	break;
1372 
1373       case 2:
1374 
1375 #ifndef scalapack
1376 
1377         /*---------- modified by TOYODA 08/JAN/2010 */
1378         if (g_exx_switch) {
1379           time5 += Cluster_DFT("scf",LSCF_iter,SpinP_switch,
1380                                Cluster_ReCoes,Cluster_ko, CntH,iCntHNL,CntOLP[0],DM[0],EDM,
1381                                g_exx,g_exx_DM[0],g_exx_U,Eele0,Eele1);
1382         }
1383         else {
1384           time5 += Cluster_DFT("scf",LSCF_iter,SpinP_switch,
1385                                Cluster_ReCoes,Cluster_ko, CntH,iCntHNL,CntOLP[0],DM[0],EDM,
1386                                NULL,NULL,NULL,Eele0,Eele1);
1387         }
1388         /*---------- until here */
1389 
1390 #endif
1391 
1392 #ifdef scalapack
1393 
1394         /*---------- modified by TOYODA 08/JAN/2010 */
1395         if (g_exx_switch) {
1396           time5 += Cluster_DFT_ScaLAPACK("scf",LSCF_iter,SpinP_switch,
1397                                Cluster_ReCoes,Cluster_ko, CntH,iCntHNL,CntOLP[0],DM[0],EDM,
1398                                g_exx,g_exx_DM[0],g_exx_U,Eele0,Eele1,
1399 			       myworld1,NPROCS_ID1,Comm_World1,NPROCS_WD1,
1400 			       Comm_World_StartID1,MPI_CommWD1,
1401 			       Ss_Cluster,Cs_Cluster,Hs_Cluster);
1402 
1403         }
1404         else {
1405           time5 += Cluster_DFT_ScaLAPACK("scf",LSCF_iter,SpinP_switch,
1406                                Cluster_ReCoes,Cluster_ko, CntH,iCntHNL,CntOLP[0],DM[0],EDM,
1407                                NULL,NULL,NULL,Eele0,Eele1,
1408 			       myworld1,NPROCS_ID1,Comm_World1,NPROCS_WD1,
1409 			       Comm_World_StartID1,MPI_CommWD1,
1410 			       Ss_Cluster,Cs_Cluster,Hs_Cluster);
1411 
1412         }
1413         /*---------- until here */
1414 
1415 #endif
1416 
1417 	break;
1418 
1419       case 3:
1420 
1421 	if (SpinP_switch<=1)
1422 
1423 #ifndef scalapack
1424 
1425           /*---------- modified by TOYODA 15/FEB/2010 */
1426           if (5==XC_switch) {
1427             time5 += Band_DFT_Col(LSCF_iter,
1428                                   Kspace_grid1,Kspace_grid2,Kspace_grid3,
1429 	                          SpinP_switch,CntH,iCntHNL,CntOLP[0],DM[0],EDM,Eele0,Eele1,
1430                                   MP,order_GA,ko,koS,EIGEN_Band_Col,
1431                                   H1_Band_Col,S1_Band_Col,
1432                                   CDM1_Band_Col,EDM1_Band_Col,
1433                                   H_Band_Col,S_Band,C_Band_Col,
1434                                   BLAS_S,
1435                                   k_op,T_k_op,T_k_ID,
1436                                   T_KGrids1,T_KGrids2,T_KGrids3,
1437 		  	          myworld1,
1438 		  	          NPROCS_ID1,
1439 			          Comm_World1,
1440 			          NPROCS_WD1,
1441   			          Comm_World_StartID1,
1442 				  MPI_CommWD1,
1443 				  myworld2,
1444 				  NPROCS_ID2,
1445 				  NPROCS_WD2,
1446 				  Comm_World2,
1447 				  Comm_World_StartID2,
1448 				  MPI_CommWD2,
1449                                   g_exx,
1450                                   g_exx_DM[0],
1451                                   g_exx_U
1452                                   );
1453           }
1454           else {
1455 
1456             time5 += Band_DFT_Col(LSCF_iter,
1457                                   Kspace_grid1,Kspace_grid2,Kspace_grid3,
1458 	                          SpinP_switch,CntH,iCntHNL,CntOLP[0],DM[0],EDM,Eele0,Eele1,
1459                                   MP,order_GA,ko,koS,EIGEN_Band_Col,
1460                                   H1_Band_Col,S1_Band_Col,
1461                                   CDM1_Band_Col,EDM1_Band_Col,
1462                                   H_Band_Col,S_Band,C_Band_Col,
1463                                   BLAS_S,
1464                                   k_op,T_k_op,T_k_ID,
1465                                   T_KGrids1,T_KGrids2,T_KGrids3,
1466 		  	          myworld1,
1467 		  	          NPROCS_ID1,
1468 			          Comm_World1,
1469 			          NPROCS_WD1,
1470   			          Comm_World_StartID1,
1471 				  MPI_CommWD1,
1472 				  myworld2,
1473 				  NPROCS_ID2,
1474 				  NPROCS_WD2,
1475 				  Comm_World2,
1476 				  Comm_World_StartID2,
1477 				  MPI_CommWD2,
1478                                   NULL,
1479                                   NULL,
1480                                   NULL
1481                                   );
1482 
1483           }
1484         /*---------- until here */
1485 
1486 #endif
1487 
1488 #ifdef scalapack
1489 
1490           /*---------- modified by TOYODA 15/FEB/2010 */
1491           if (5==XC_switch) {
1492             time5 += Band_DFT_Col_ScaLAPACK(LSCF_iter,
1493                                   Kspace_grid1,Kspace_grid2,Kspace_grid3,
1494 	                          SpinP_switch,CntH,iCntHNL,CntOLP[0],DM[0],EDM,Eele0,Eele1,
1495                                   MP,order_GA,ko,koS,EIGEN_Band_Col,
1496                                   H1_Band_Col,S1_Band_Col,
1497                                   CDM1_Band_Col,EDM1_Band_Col,
1498                                   H_Band_Col,Ss_Band_Col,Cs_Band_Col,
1499                                   Hs_Band_Col,
1500                                   k_op,T_k_op,T_k_ID,
1501                                   T_KGrids1,T_KGrids2,T_KGrids3,
1502 		  	          myworld1,
1503 		  	          NPROCS_ID1,
1504 			          Comm_World1,
1505 			          NPROCS_WD1,
1506   			          Comm_World_StartID1,
1507 				  MPI_CommWD1,
1508 				  myworld2,
1509 				  NPROCS_ID2,
1510 				  NPROCS_WD2,
1511 				  Comm_World2,
1512 				  Comm_World_StartID2,
1513 				  MPI_CommWD2,
1514                                   g_exx,
1515                                   g_exx_DM[0],
1516                                   g_exx_U
1517                                   );
1518           }
1519           else {
1520 
1521             time5 += Band_DFT_Col_ScaLAPACK(LSCF_iter,
1522                                   Kspace_grid1,Kspace_grid2,Kspace_grid3,
1523 	                          SpinP_switch,CntH,iCntHNL,CntOLP[0],DM[0],EDM,Eele0,Eele1,
1524                                   MP,order_GA,ko,koS,EIGEN_Band_Col,
1525                                   H1_Band_Col,S1_Band_Col,
1526                                   CDM1_Band_Col,EDM1_Band_Col,
1527                                   H_Band_Col,Ss_Band_Col,Cs_Band_Col,
1528                                   Hs_Band_Col,
1529                                   k_op,T_k_op,T_k_ID,
1530                                   T_KGrids1,T_KGrids2,T_KGrids3,
1531 		  	          myworld1,
1532 		  	          NPROCS_ID1,
1533 			          Comm_World1,
1534 			          NPROCS_WD1,
1535   			          Comm_World_StartID1,
1536 				  MPI_CommWD1,
1537 				  myworld2,
1538 				  NPROCS_ID2,
1539 				  NPROCS_WD2,
1540 				  Comm_World2,
1541 				  Comm_World_StartID2,
1542 				  MPI_CommWD2,
1543                                   NULL,
1544                                   NULL,
1545                                   NULL
1546                                   );
1547 
1548           }
1549         /*---------- until here */
1550 
1551 #endif
1552 
1553 	else
1554 	  time5 += Band_DFT_NonCol(LSCF_iter,
1555 				   koS,S_Band,
1556 				   Kspace_grid1,Kspace_grid2,Kspace_grid3,
1557 				   SpinP_switch,CntH,iCntHNL,CntOLP[0],DM[0],EDM,Eele0,Eele1);
1558 	break;
1559 
1560       case 4:
1561 	/* revised by Y. Xiao for Noncollinear NEGF calculations*/
1562 	if (SpinP_switch<2) {
1563 	  time5 += TRAN_DFT(mpi_comm_level1, SucceedReadingDMfile,
1564 			    level_stdout, LSCF_iter,SpinP_switch,H,iHNL,OLP[0],
1565 			    atomnum,Matomnum,WhatSpecies, Spe_Total_CNO, FNAN, natn,ncn,
1566 			    M2G, G2ID, F_G2M, atv_ijk, List_YOUSO,
1567 			    DM[0],EDM,TRAN_DecMulP,Eele0,Eele1,ChemP_e0);
1568 	}
1569 	else {
1570 	  time5 += TRAN_DFT_NC(mpi_comm_level1, SucceedReadingDMfile,
1571 			       level_stdout, LSCF_iter,SpinP_switch,H,iHNL,OLP[0],
1572 			       atomnum,Matomnum,WhatSpecies, Spe_Total_CNO, FNAN, natn,ncn,
1573 			       M2G, G2ID, F_G2M, atv_ijk, List_YOUSO,koS,S_Band,
1574 			       DM[0],iDM[0],EDM,TRAN_DecMulP,Eele0,Eele1,ChemP_e0);
1575 	}  /* until here by Y. Xiao for Noncollinear NEGF calculations*/
1576 
1577 	break;
1578 
1579       case 5:
1580 	time5 += Divide_Conquer("scf",LSCF_iter,CntH,iCntHNL,CntOLP[0],DM[0],EDM,Eele0,Eele1);
1581 	break;
1582 
1583       case 6:
1584 	/* not supported */
1585 	break;
1586 
1587       case 7:
1588 	break;
1589 
1590       case 8:
1591 	time5 += Krylov("scf",LSCF_iter,CntH,iCntHNL,CntOLP[0],DM[0],EDM,Eele0,Eele1);
1592 	break;
1593 
1594       case 9:
1595 	time5 += Cluster_DFT_ON2("scf",LSCF_iter,SpinP_switch,Cluster_ReCoes,Cluster_ko,
1596 				 CntH,iCntHNL,CntOLP[0],DM[0],EDM,Eele0,Eele1);
1597 	break;
1598 
1599       case 10:
1600 	time5 += EC("scf",LSCF_iter,CntH,iCntHNL,CntOLP[0],DM[0],EDM,Eele0,Eele1);
1601 	break;
1602 
1603       }
1604     }
1605 
1606     Uele_OS0 = Eele0[0];
1607     Uele_OS1 = Eele0[1];
1608     Uele_IS0 = Eele1[0];
1609     Uele_IS1 = Eele1[1];
1610     Uele = Uele_IS0 + Uele_IS1;
1611 
1612     /*****************************************************
1613                    Orbital magnetic moment
1614     *****************************************************/
1615 
1616     if (SpinP_switch==3) Orbital_Moment("non");
1617 
1618     /*****************************************************
1619                       Mulliken charge
1620     *****************************************************/
1621 
1622     time14 += Mulliken_Charge("stdout");
1623 
1624     /*****************************************************
1625                     check SCF-convergence
1626     *****************************************************/
1627 
1628     if (Solver==4) {
1629       dUele = 1.0; /* do not calculate Uele */
1630     }
1631     else {
1632 
1633       if (SCF_iter==1)
1634         dUele = 1.0;
1635       else
1636         dUele = fabs(Uele - pUele);
1637     }
1638 
1639     if (
1640 	2<=SCF_iter &&
1641 	((dUele<SCF_Criterion && NormRD[0]<(10*SCF_Criterion) && Cnt_switch==0)   ||
1642 	 (dUele<SCF_Criterion && Cnt_switch==1 && Cnt_Now!=1)                     ||
1643 	 (dUele<SCF_Criterion && Cnt_switch==1 && Cnt_Now==1 && OrbOpt_end==1))
1644 	) po = 1;
1645 
1646     /*****************************************************
1647                      orbital optimization
1648     *****************************************************/
1649 
1650     if ( Cnt_switch==1 && Cnt_Now==1 && OrbOpt_end!=1 &&
1651 	 ( LSCF_iter%orbitalOpt_SCF==0 ||
1652 	   ( dUele<SCF_Criterion && NormRD[0]<1.0e-7 )
1653 	   )
1654 	 ){
1655 
1656       /* calculate the total energy */
1657 
1658       /*
1659 	time10 += Set_Orbitals_Grid(1);
1660 	time11 += Set_Density_Grid(1, 0, DM[0]);
1661 	time7 += Force(H0,DS_NL,OLP,DM[0],EDM);
1662 	time8 += Total_Energy(MD_iter,DM[0],ECE);
1663 	time10 += Set_Orbitals_Grid(0);
1664 
1665 	TotalE = 0.0;
1666 	for (i=0; i<=12; i++){
1667 	TotalE += ECE[i];
1668 	}
1669 	printf("orbitalOpt_iter=%3d Etot=%15.12f\n",orbitalOpt_iter,TotalE);
1670       */
1671 
1672       /* optimization of contraction coefficients */
1673 
1674       if (Opt_Contraction(orbitalOpt_iter,
1675                           TotalE,
1676                           H,OLP,DM[0],EDM,
1677                           His_CntCoes,His_D_CntCoes,
1678                           His_CntCoes_Species,His_D_CntCoes_Species,
1679                           His_OrbOpt_Etot,OrbOpt_Hessian)<orbitalOpt_criterion){
1680 
1681         SCF_MAX = SCF_iter + orbitalOpt_SCF - 1;
1682         OrbOpt_end = 1;
1683       }
1684 
1685       /* save the information at iteration */
1686       outputfile1(3,MD_iter,orbitalOpt_iter,0,SCF_iter,file_OrbOpt,ChemP_e0);
1687 
1688       orbitalOpt_iter++;
1689       LSCF_iter = 0;
1690       SCF_iter_shift = 0;
1691       ETemp_controller = 0;
1692 
1693       if ( (orbitalOpt_SCF*(orbitalOpt_MD+1)-1) < SCF_iter) po = 1;
1694       if (orbitalOpt_MD < orbitalOpt_iter){
1695         SCF_MAX = SCF_iter + orbitalOpt_SCF - 1;
1696         OrbOpt_end = 1;
1697       }
1698     }
1699 
1700     /*****************************************************
1701                    mixing of density matrices
1702     *****************************************************/
1703 
1704     if (SCF_iter==1 || LSCF_iter==0)          Calc_CntOrbital_ON = 1;
1705     else                                      Calc_CntOrbital_ON = 0;
1706 
1707     /********************************************************
1708       control the electric temperature
1709       for accelerating SCF convergence
1710     ********************************************************/
1711 
1712     if (Solver!=4 && SCF_Control_Temp==1){
1713 
1714       Norm5 = Norm4;
1715       Norm4 = Norm3;
1716       Norm3 = Norm2;
1717       Norm2 = Norm1;
1718       Norm1 = NormRD[0];
1719       tmp = (Norm1+Norm2+Norm3+Norm4+Norm5)/5.0;
1720       tmp0 = sqrt(fabs(NormRD[0]));
1721 
1722       if      (0.01<tmp0 && tmp<Norm1 && 6<LSCF_iter && ETemp_controller==0){
1723         E_Temp = 10.0*Original_E_Temp;
1724         n = LSCF_iter - Pulay_SCF + 2;
1725         if (0<=n && n<LSCF_iter) SCF_iter_shift = n;
1726         ETemp_controller = 1;
1727       }
1728       else if (tmp0<=0.01 && ETemp_controller<=1){
1729         E_Temp = 1.0*Original_E_Temp;
1730         n = LSCF_iter - Pulay_SCF + 2;
1731         if (0<=n && n<LSCF_iter) SCF_iter_shift = n;
1732         ETemp_controller = 2;
1733       }
1734 
1735       /* update Beta */
1736       Beta = 1.0/kB/E_Temp;
1737     }
1738 
1739     /********************************************************
1740      simple, RMM-DIIS, or GR-Pulay mixing for density matrix
1741     ********************************************************/
1742 
1743     if (    Mixing_switch==0
1744 	 || Mixing_switch==1
1745 	 || Mixing_switch==2
1746          || Mixing_switch==6 ){
1747 
1748       time6  += Mixing_DM(MD_iter,
1749 			  LSCF_iter-SCF_iter_shift,
1750 			  SCF_iter-SCF_iter_shift,
1751 			  SucceedReadingDMfile,
1752 			  ReRhok,ImRhok,
1753 			  Residual_ReRhok,Residual_ImRhok,
1754 			  ReVk,ImVk,ReRhoAtomk,ImRhoAtomk);
1755 
1756       time11 += Set_Density_Grid(Cnt_kind,Calc_CntOrbital_ON,DM[0]);
1757 
1758       if (Solver==4) {
1759 	TRAN_Add_Density_Lead(mpi_comm_level1,
1760 			      SpinP_switch, Ngrid1, Ngrid2, Ngrid3,
1761 			      My_NumGridB_AB, Density_Grid_B);
1762       }
1763 
1764       if (SpinP_switch==3) diagonalize_nc_density();
1765 
1766       fft_charge_flag = 1;
1767     }
1768 
1769     /********************************************************
1770         Kerker or RMM-DIISK mixing for density in k-space
1771     ********************************************************/
1772 
1773     else if (Mixing_switch==3 || Mixing_switch==4) {
1774 
1775       time11 += Set_Density_Grid(Cnt_kind,Calc_CntOrbital_ON,DM[0]);
1776 
1777       if (Solver==4) {
1778 	TRAN_Add_Density_Lead(mpi_comm_level1,
1779 			      SpinP_switch, Ngrid1, Ngrid2, Ngrid3,
1780 			      My_NumGridB_AB, Density_Grid_B);
1781       }
1782 
1783       /* non-spin polarization */
1784       if (SpinP_switch==0){
1785 	if (Solver!=4 || TRAN_Poisson_flag2==2) time15 += FFT_Density(1,ReRhok[0][0],ImRhok[0][0]);
1786 	else                                    time15 += FFT2D_Density(1,ReRhok[0][0],ImRhok[0][0]);
1787       }
1788 
1789       /* collinear spin polarization */
1790       else if (SpinP_switch==1) {
1791 	if (Solver!=4 || TRAN_Poisson_flag2==2){
1792 	  time15 += FFT_Density(1,ReRhok[0][0],ImRhok[0][0]);
1793 	  time15 += FFT_Density(2,ReRhok[0][1],ImRhok[0][1]);
1794 	}
1795 	else{
1796 	  time15 += FFT2D_Density(1,ReRhok[0][0],ImRhok[0][0]);
1797 	  time15 += FFT2D_Density(2,ReRhok[0][1],ImRhok[0][1]);
1798 	}
1799       }
1800 
1801       /* non-collinear spin polarization */
1802       else if (SpinP_switch==3) {
1803 	if (Solver!=4 || TRAN_Poisson_flag2==2){
1804 	  time15 += FFT_Density(1,ReRhok[0][0],ImRhok[0][0]);
1805 	  time15 += FFT_Density(2,ReRhok[0][1],ImRhok[0][1]);
1806 	  time15 += FFT_Density(4,ReRhok[0][2],ImRhok[0][2]);
1807 	}
1808 	else{
1809 	  time15 += FFT2D_Density(1,ReRhok[0][0],ImRhok[0][0]);
1810 	  time15 += FFT2D_Density(2,ReRhok[0][1],ImRhok[0][1]);
1811 	  time15 += FFT2D_Density(4,ReRhok[0][2],ImRhok[0][2]);
1812 	}
1813       }
1814 
1815       if (SCF_iter==1){
1816 	if (Solver!=4 || TRAN_Poisson_flag2==2)  time15 += FFT_Density(3,ReRhoAtomk,ImRhoAtomk);
1817 	else                                     time15 += FFT2D_Density(3,ReRhoAtomk,ImRhoAtomk);
1818       }
1819 
1820       time6 += Mixing_DM(MD_iter,
1821 			 LSCF_iter-SCF_iter_shift,
1822 			 SCF_iter-SCF_iter_shift,
1823 			 SucceedReadingDMfile,
1824 			 ReRhok,ImRhok,
1825 			 Residual_ReRhok,Residual_ImRhok,
1826 			 ReVk,ImVk,ReRhoAtomk,ImRhoAtomk);
1827 
1828       if (SpinP_switch==3) diagonalize_nc_density();
1829 
1830       fft_charge_flag = 0;
1831 
1832     }
1833 
1834     /**********************************************************
1835      RMM-DIISH which is RMM-DIIS mixing method for Hamiltonian
1836     **********************************************************/
1837 
1838     else if (Mixing_switch==5){
1839 
1840       time11 += Set_Density_Grid(Cnt_kind,Calc_CntOrbital_ON,DM[0]);
1841 
1842       if (Solver==4) {
1843 	TRAN_Add_Density_Lead(mpi_comm_level1,
1844 			      SpinP_switch, Ngrid1, Ngrid2, Ngrid3,
1845 			      My_NumGridB_AB, Density_Grid_B);
1846       }
1847 
1848       if (SpinP_switch==3) diagonalize_nc_density();
1849 
1850       fft_charge_flag = 1;
1851     }
1852 
1853     else{
1854       printf("unknown scf.Mixing.Type\n");fflush(stdout);
1855       MPI_Finalize();
1856       exit(0);
1857     }
1858 
1859     /*****************************************************
1860         calculate occupation number for LDA+U
1861         and calculate the effective potential
1862         for LDA+U, constraint for spin, Zeeman term for
1863         spin, Zeeman term for orbital
1864     *****************************************************/
1865 
1866     if (  Hub_U_switch==1
1867 	  || 1<=Constraint_NCS_switch
1868 	  || Zeeman_NCS_switch==1
1869 	  || Zeeman_NCO_switch==1){
1870 
1871       Occupation_Number_LDA_U(SCF_iter, SucceedReadingDMfile, dUele, ECE, "stdout");
1872     }
1873 
1874     /*****************************************************
1875                        print informations
1876     *****************************************************/
1877 
1878     if (MYID_MPI_COMM_WORLD==Host_ID){
1879 
1880       /* spin non-collinear */
1881       if (SpinP_switch==3){
1882 
1883         if (0<level_stdout){
1884           printf("<DFT>  Total Spin Moment    (muB) %12.9f   Angles %14.9f  %14.9f\n",
1885 		 2.0*Total_SpinS,Total_SpinAngle0/PI*180.0,
1886 		 Total_SpinAngle1/PI*180.0); fflush(stdout);
1887           printf("<DFT>  Total Orbital Moment (muB) %12.9f   Angles %14.9f  %14.9f\n",
1888 		 Total_OrbitalMoment,
1889 		 Total_OrbitalMomentAngle0/PI*180.0,
1890 		 Total_OrbitalMomentAngle1/PI*180.0); fflush(stdout);
1891 	}
1892 
1893         x = 2.0*Total_SpinS*sin(Total_SpinAngle0)*cos(Total_SpinAngle1);
1894         y = 2.0*Total_SpinS*sin(Total_SpinAngle0)*sin(Total_SpinAngle1);
1895         z = 2.0*Total_SpinS*cos(Total_SpinAngle0);
1896 
1897         x += Total_OrbitalMoment*sin(Total_OrbitalMomentAngle0)*cos(Total_OrbitalMomentAngle1);
1898         y += Total_OrbitalMoment*sin(Total_OrbitalMomentAngle0)*sin(Total_OrbitalMomentAngle1);
1899         z += Total_OrbitalMoment*cos(Total_OrbitalMomentAngle0);
1900 
1901         xyz2spherical( x,y,z, 0.0,0.0,0.0, S_coordinate );
1902 
1903         if (0<level_stdout){
1904           printf("<DFT>  Total Moment         (muB) %12.9f   Angles %14.9f  %14.9f\n",
1905                  S_coordinate[0],S_coordinate[1]/PI*180.0,S_coordinate[2]/PI*180.0);
1906 	  fflush(stdout);
1907 	}
1908 
1909       }
1910       /* spin collinear */
1911       else{
1912         if (0<level_stdout){
1913           printf("<DFT>  Total Spin Moment (muB) = %15.12f\n",2.0*Total_SpinS);fflush(stdout);
1914 	}
1915       }
1916 
1917       if (Mixing_switch==0 && 0<level_stdout){
1918         printf("<DFT>  Mixing_weight=%15.12f SCF_RENZOKU=%2d\n",
1919 	       Mixing_weight,SCF_RENZOKU);fflush(stdout);
1920       }
1921       else{
1922         if (0<level_stdout){
1923           printf("<DFT>  Mixing_weight=%15.12f\n",Mixing_weight);fflush(stdout);
1924 	}
1925       }
1926 
1927       if (0<level_stdout){
1928         printf("<DFT>  Uele   =%18.12f  dUele     =%17.12f\n",Uele,dUele);fflush(stdout);
1929         printf("<DFT>  NormRD =%18.12f  Criterion =%17.12f\n",
1930                sqrt(fabs(NormRD[0])),SCF_Criterion);fflush(stdout);
1931       }
1932       else if (0==level_stdout){
1933         printf("<DFT> MD=%4d SCF=%4d   NormRD =%18.12f  Criterion =%17.12f\n",
1934                MD_iter,SCF_iter,sqrt(fabs(NormRD[0])),SCF_Criterion);fflush(stdout);
1935       }
1936 
1937     }
1938 
1939     if (Solver==4) {
1940 
1941       /* check whether the SCF converges or not */
1942 
1943       if ( sqrt(fabs(NormRD[0]))< SCF_Criterion ) {
1944 
1945 	po = 1;
1946 
1947 	/* printf("TRAN  NormRD < SCF_Criterion, SCF is satisfied \n");	*/
1948       }
1949     }
1950 
1951     outputfile1(1,MD_iter,orbitalOpt_iter,Cnt_Now,SCF_iter,file_DFTSCF,ChemP_e0);
1952 
1953     /*****************************************************
1954                          Uele -> pUele
1955     *****************************************************/
1956 
1957     pUele = Uele;
1958 
1959     /*****************************************************
1960             on-the-fly adjustment of Pulay_SCF
1961     *****************************************************/
1962 
1963     if (1<MD_iter && SucceedReadingDMfile==1 && Scf_RestartFromFile!=-1
1964         && Pulay_SCF==Pulay_SCF_original && SCF_iter<Pulay_SCF_original && NormRD[0]<0.01 ){
1965 
1966       Pulay_SCF = SCF_iter + 1;
1967     }
1968 
1969     /*************************************************************
1970      If there is a proper file, change parameters controlling the
1971      SCF iteration, based on keywords described in the file.
1972     **************************************************************/
1973 
1974     Read_SCF_keywords();
1975     if (Cnt_switch==0) SCF_MAX = DFTSCF_loop;
1976 
1977     /************************************************************************
1978                               end of SCF calculation
1979     ************************************************************************/
1980 
1981   } while (po==0 && SCF_iter<SCF_MAX);
1982 
1983   /*****************************************************
1984           making of the input data for TranMain
1985   *****************************************************/
1986 
1987   /* revised by Y. Xiao for Noncollinear NEGF calculations */ /* iHNL is outputed */
1988   TRAN_Output_Trans_HS( mpi_comm_level1, Solver, SpinP_switch, ChemP, H, iHNL, OLP, H0,
1989                         atomnum, SpeciesNum, WhatSpecies,
1990                         Spe_Total_CNO, FNAN, natn, ncn, G2ID, atv_ijk,
1991                         Max_FSNAN, ScaleSize, F_G2M, TCpyCell, List_YOUSO,
1992                         filepath,filename,"tranb");
1993 
1994   /*****************************************************
1995               save contraction coefficients
1996   *****************************************************/
1997 
1998   if (Cnt_switch==1) File_CntCoes("write");
1999 
2000   /*****************************************************
2001               save orbital magnetic moment
2002   *****************************************************/
2003 
2004   if (SpinP_switch==3) Orbital_Moment("write");
2005 
2006   /*****************************************************
2007               save Mulliken charge
2008   *****************************************************/
2009 
2010   time14 += Mulliken_Charge("write");
2011 
2012   /*****************************************************
2013             save occupation number in LDA+U
2014   *****************************************************/
2015 
2016   /* ---- added by MJ */
2017   if (  Hub_U_switch==1
2018 	|| 1<=Constraint_NCS_switch
2019 	|| Zeeman_NCS_switch==1
2020 	|| Zeeman_NCO_switch==1){
2021 
2022     Occupation_Number_LDA_U(SCF_iter, SucceedReadingDMfile, dUele, ECE, "write");
2023 
2024   }
2025 
2026   /*****************************************************
2027             Natural Bond Orbital (NBO) Analysis
2028 
2029     NBO_switch = 1 : for Cluster
2030     NBO_switch = 2 : for Krylov
2031     NBO_switch = 3 : for Band
2032     NBO_switch = 4 : for Band (at designated k-points)
2033   *****************************************************/
2034 
2035   if (NBO_switch == 1){
2036     if (Solver == 2){
2037       Calc_NAO_Cluster(DM[0]);
2038     }
2039     else{
2040       if (MYID_MPI_COMM_WORLD == Host_ID){
2041 	printf("# NBO ERROR: Please check combination of calc. types of SCF and NAO\n");
2042       }
2043     }
2044   }
2045 
2046   if (NBO_switch == 2){
2047     if (Solver == 8){
2048       Calc_NAO_Krylov(H,OLP[0],DM[0]);
2049     }
2050     else{
2051       if (MYID_MPI_COMM_WORLD == Host_ID){
2052 	printf("# NBO ERROR: Please check combination of calc. types of SCF and NAO\n");
2053       }
2054     }
2055   }
2056 
2057   /*
2058   if (NBO_switch == 3 || NBO_switch == 4){
2059 
2060     int Nkpoint_NAO;
2061     double **kpoint_NAO;
2062 
2063     if      (NBO_switch == 3) Nkpoint_NAO = T_knum;
2064     else if (NBO_switch == 4) Nkpoint_NAO = NAO_Nkpoint;
2065 
2066     kpoint_NAO = (double**)malloc(sizeof(double*)*(Nkpoint_NAO+1));
2067     for (i=0; i<(Nkpoint_NAO+1); i++){
2068       kpoint_NAO[i] = (double*)malloc(sizeof(double)*4);
2069     }
2070 
2071     if (NBO_switch == 3){
2072       for (i=0; i<Nkpoint_NAO; i++){
2073         kpoint_NAO[i][1] = T_KGrids1[i];
2074         kpoint_NAO[i][2] = T_KGrids2[i];
2075         kpoint_NAO[i][3] = T_KGrids3[i];
2076       }
2077     }
2078     else if (NBO_switch == 4){
2079       for (i=0; i<Nkpoint_NAO; i++){
2080 	kpoint_NAO[i][1] = NAO_kpoint[i][1];
2081 	kpoint_NAO[i][2] = NAO_kpoint[i][2];
2082 	kpoint_NAO[i][3] = NAO_kpoint[i][3];
2083       }
2084     }
2085 
2086     Calc_NAO_Band(Nkpoint_NAO, kpoint_NAO, SpinP_switch, H, OLP[0]);
2087 
2088     for (i=0; i<(Nkpoint_NAO+1); i++){
2089       free(kpoint_NAO[i]);
2090     }
2091     free(kpoint_NAO);
2092 
2093   }
2094   */
2095 
2096   /*****************************************************
2097                       Band dispersion
2098   *****************************************************/
2099 
2100   if ( Band_disp_switch==1 && Band_Nkpath>0 && (Solver==3 || PeriodicGamma_flag==1) ){
2101     if (Cnt_switch==0){
2102       Band_DFT_kpath(Band_Nkpath, Band_N_perpath,
2103                      Band_kpath, Band_kname,
2104                      SpinP_switch,H,iHNL,OLP[0]);
2105     }
2106     else {
2107       Band_DFT_kpath(Band_Nkpath, Band_N_perpath,
2108                      Band_kpath, Band_kname,
2109                      SpinP_switch,CntH,iCntHNL,CntOLP[0]);
2110     }
2111   }
2112 
2113   /******************************************************
2114           calculation of density of states (DOS)
2115   ******************************************************/
2116 
2117   if ( Dos_fileout || DosGauss_fileout) {
2118 
2119     if ( Solver==2 && PeriodicGamma_flag==0 ){  /* cluster */
2120 
2121       if ( Cnt_switch==0 ){
2122 
2123         if (Opticalconductivity_fileout==1){
2124   	  Cluster_DFT_Dosout( SpinP_switch, H, iHNL, OLP[0] );
2125 	}
2126         else {
2127 
2128 #ifndef scalapack
2129 
2130           /*---------- modified by TOYODA 08/JAN/2010 */
2131           if (g_exx_switch) {
2132             time5 += Cluster_DFT("dos",LSCF_iter,SpinP_switch,
2133 				 Cluster_ReCoes,Cluster_ko, H,iHNL,OLP[0],DM[0],EDM,
2134 				 g_exx,g_exx_DM[0],g_exx_U,Eele0,Eele1);
2135           } else {
2136             time5 += Cluster_DFT("dos",LSCF_iter,SpinP_switch,
2137 				 Cluster_ReCoes,Cluster_ko, H,iHNL,OLP[0],DM[0],EDM,
2138 				 NULL,NULL,NULL,Eele0,Eele1);
2139           }
2140           /*---------- until here */
2141 #endif
2142 
2143 #ifdef scalapack
2144 
2145           /*---------- modified by TOYODA 08/JAN/2010 */
2146           if (g_exx_switch) {
2147             time5 += Cluster_DFT_ScaLAPACK("dos",LSCF_iter,SpinP_switch,
2148 				 Cluster_ReCoes,Cluster_ko, H,iHNL,OLP[0],DM[0],EDM,
2149 				 g_exx,g_exx_DM[0],g_exx_U,Eele0,Eele1,
2150 				 myworld1,NPROCS_ID1,Comm_World1,NPROCS_WD1,
2151 				 Comm_World_StartID1,MPI_CommWD1,
2152 				 Ss_Cluster,Cs_Cluster,Hs_Cluster);
2153           } else {
2154             time5 += Cluster_DFT_ScaLAPACK("dos",LSCF_iter,SpinP_switch,
2155 				 Cluster_ReCoes,Cluster_ko, H,iHNL,OLP[0],DM[0],EDM,
2156 				 NULL,NULL,NULL,Eele0,Eele1,
2157 				 myworld1,NPROCS_ID1,Comm_World1,NPROCS_WD1,
2158 				 Comm_World_StartID1,MPI_CommWD1,
2159 				 Ss_Cluster,Cs_Cluster,Hs_Cluster);
2160 
2161           }
2162           /*---------- until here */
2163 
2164 #endif
2165 	}
2166       }
2167       else {
2168 
2169         if (Opticalconductivity_fileout==1){
2170  	  Cluster_DFT_Dosout( SpinP_switch, CntH, iCntHNL, CntOLP[0] );
2171 	}
2172         else {
2173 
2174 #ifndef scalapack
2175 
2176           /*---------- modifined by TOYODA 08/JAN/2010 */
2177           if (g_exx_switch) {
2178 
2179             time5 += Cluster_DFT("dos",LSCF_iter,SpinP_switch,
2180 				 Cluster_ReCoes,Cluster_ko,CntH,iCntHNL,CntOLP[0],DM[0],EDM,
2181 				 g_exx,g_exx_DM[0],g_exx_U,Eele0,Eele1);
2182           } else {
2183             time5 += Cluster_DFT("dos",LSCF_iter,SpinP_switch,
2184 				 Cluster_ReCoes,Cluster_ko,CntH,iCntHNL,CntOLP[0],DM[0],EDM,
2185 				 NULL,NULL,NULL,Eele0,Eele1);
2186           }
2187           /*---------- until here */
2188 #endif
2189 
2190 #ifdef scalapack
2191 
2192           /*---------- modifined by TOYODA 08/JAN/2010 */
2193           if (g_exx_switch) {
2194 
2195             time5 += Cluster_DFT_ScaLAPACK("dos",LSCF_iter,SpinP_switch,
2196 				 Cluster_ReCoes,Cluster_ko,CntH,iCntHNL,CntOLP[0],DM[0],EDM,
2197 				 g_exx,g_exx_DM[0],g_exx_U,Eele0,Eele1,
2198 				 myworld1,NPROCS_ID1,Comm_World1,NPROCS_WD1,
2199 				 Comm_World_StartID1,MPI_CommWD1,
2200 				 Ss_Cluster,Cs_Cluster,Hs_Cluster);
2201 
2202           } else {
2203             time5 += Cluster_DFT_ScaLAPACK("dos",LSCF_iter,SpinP_switch,
2204 				 Cluster_ReCoes,Cluster_ko,CntH,iCntHNL,CntOLP[0],DM[0],EDM,
2205 				 NULL,NULL,NULL,Eele0,Eele1,
2206 				 myworld1,NPROCS_ID1,Comm_World1,NPROCS_WD1,
2207 				 Comm_World_StartID1,MPI_CommWD1,
2208 				 Ss_Cluster,Cs_Cluster,Hs_Cluster);
2209 
2210           }
2211           /*---------- until here */
2212 #endif
2213 
2214         }
2215       }
2216     }
2217 
2218     if ( Solver==3 || PeriodicGamma_flag==1 ){  /* band */
2219       if (Cnt_switch==0){
2220 
2221 	Band_DFT_Dosout( Dos_Kgrid[0], Dos_Kgrid[1], Dos_Kgrid[2],
2222 			 SpinP_switch, H, iHNL, OLP[0] );
2223       }
2224       else {
2225 	Band_DFT_Dosout( Dos_Kgrid[0], Dos_Kgrid[1], Dos_Kgrid[2],
2226 			 SpinP_switch, CntH, iCntHNL, CntOLP[0] );
2227       }
2228     }
2229 
2230     if (Solver==4){ /* NEGF */
2231 
2232       TRAN_DFT_Dosout( mpi_comm_level1, level_stdout, LSCF_iter,SpinP_switch,H,iHNL,OLP[0],
2233 		       atomnum,Matomnum,WhatSpecies, Spe_Total_CNO, FNAN, natn, ncn,
2234 		       M2G, G2ID, atv_ijk, List_YOUSO, Spe_Num_CBasis, SpeciesNum, filename, filepath,
2235 		       DM[0],EDM,Eele0,Eele1 );
2236 
2237     }
2238 
2239     if ( Solver==5 ){  /* divide-conquer */
2240       if (Cnt_switch==0){
2241         time5 += Divide_Conquer("dos",LSCF_iter,H,iHNL,OLP[0],DM[0],EDM,Eele0,Eele1);
2242       }
2243       else {
2244         time5 += Divide_Conquer("dos",LSCF_iter,CntH,iCntHNL,CntOLP[0],DM[0],EDM,Eele0,Eele1);
2245       }
2246     }
2247 
2248     if ( Solver==8 ){  /* Krylov subspace method */
2249       if (Cnt_switch==0){
2250         time5 += Krylov("dos",LSCF_iter,H,iHNL,OLP[0],DM[0],EDM,Eele0,Eele1);
2251       }
2252       else {
2253         time5 += Krylov("dos",LSCF_iter,CntH,iCntHNL,CntOLP[0],DM[0],EDM,Eele0,Eele1);
2254       }
2255     }
2256   }
2257 
2258   /*****************************************************
2259          Calc. of Bloch waves at given k-points
2260   *****************************************************/
2261 
2262   if (MO_fileout==1 && MO_Nkpoint>0 && (Solver==3 || PeriodicGamma_flag==1) ) {
2263     if (Cnt_switch==0){
2264       Band_DFT_MO(MO_Nkpoint, MO_kpoint, SpinP_switch, H, iHNL, OLP[0]);
2265     }
2266     else {
2267       Band_DFT_MO(MO_Nkpoint, MO_kpoint, SpinP_switch, CntH, iCntHNL, CntOLP[0]);
2268     }
2269   }
2270 
2271   /*****************************************************
2272        write a restart file if SpinP_switch!=3
2273   *****************************************************/
2274 
2275   if (SpinP_switch!=3){
2276     RestartFileDFT("write",MD_iter,&Uele,H,CntH,&etime);
2277     time13 += etime;
2278     MPI_Barrier(mpi_comm_level1);
2279   }
2280 
2281   /****************************************************
2282       Note on the force calculation.
2283       If you first call Total_Energy.c before
2284       Force.c, then the force calculation will fail.
2285   ****************************************************/
2286 
2287   if (MYID_MPI_COMM_WORLD==Host_ID && 0<level_stdout){
2288     printf("<MD=%2d>  Force calculation\n",MD_iter);fflush(stdout);
2289   }
2290 
2291   if (Cnt_switch==1 && Cnt_Now==1) time10 += Set_Orbitals_Grid(1);
2292   time11 += Set_Density_Grid(1, 0, DM[0]);
2293 
2294   if (Solver==4) {
2295     TRAN_Add_Density_Lead(mpi_comm_level1,
2296 			  SpinP_switch, Ngrid1, Ngrid2, Ngrid3,
2297  	                  My_NumGridB_AB, Density_Grid_B);
2298   }
2299 
2300   /* to save mixing densities between the previous and current ones
2301      do charge mixing */
2302 
2303   if (Mixing_switch==3 || Mixing_switch==4) {
2304 
2305     /* non-spin polarization */
2306     if (SpinP_switch==0){
2307       if (Solver!=4 || TRAN_Poisson_flag2==2)  time15 += FFT_Density(1,ReRhok[0][0],ImRhok[0][0]);
2308       else                                     time15 += FFT2D_Density(1,ReRhok[0][0],ImRhok[0][0]);
2309     }
2310 
2311     /* collinear spin polarization */
2312     else if (SpinP_switch==1) {
2313       if (Solver!=4 || TRAN_Poisson_flag2==2){
2314         time15 += FFT_Density(1,ReRhok[0][0],ImRhok[0][0]);
2315         time15 += FFT_Density(2,ReRhok[0][1],ImRhok[0][1]);
2316       }
2317       else{
2318         time15 += FFT2D_Density(1,ReRhok[0][0],ImRhok[0][0]);
2319         time15 += FFT2D_Density(2,ReRhok[0][1],ImRhok[0][1]);
2320       }
2321     }
2322     /* non-collinear spin polarization */
2323     else if (SpinP_switch==3) {
2324       if (Solver!=4 || TRAN_Poisson_flag2==2){
2325         time15 += FFT_Density(1,ReRhok[0][0],ImRhok[0][0]);
2326         time15 += FFT_Density(2,ReRhok[0][1],ImRhok[0][1]);
2327         time15 += FFT_Density(4,ReRhok[0][2],ImRhok[0][2]);
2328       }
2329       else{
2330         time15 += FFT2D_Density(1,ReRhok[0][0],ImRhok[0][0]);
2331         time15 += FFT2D_Density(2,ReRhok[0][1],ImRhok[0][1]);
2332         time15 += FFT2D_Density(4,ReRhok[0][2],ImRhok[0][2]);
2333       }
2334     }
2335 
2336     time6 += Mixing_DM(1,
2337                        LSCF_iter-SCF_iter_shift,
2338                        SCF_iter-SCF_iter_shift,
2339                        SucceedReadingDMfile,
2340 		       ReRhok,ImRhok,
2341 		       Residual_ReRhok,Residual_ImRhok,
2342 		       ReVk,ImVk,ReRhoAtomk,ImRhoAtomk);
2343   }
2344 
2345   /* write a restart file, it is important to save
2346      charge density before calling diagonalize_nc_density */
2347 
2348   if (SpinP_switch==3){
2349     RestartFileDFT("write",MD_iter,&Uele,H,CntH,&etime);
2350     time13 += etime;
2351     MPI_Barrier(mpi_comm_level1);
2352   }
2353 
2354   if (SpinP_switch==3) diagonalize_nc_density();
2355 
2356   /****************************************************
2357    if (Solver==9), calculate the energy density matrix
2358   ****************************************************/
2359 
2360   if (Solver==9){
2361 
2362     if (Cnt_switch==0){
2363       time5 += Cluster_DFT_ON2("force",LSCF_iter,SpinP_switch,Cluster_ReCoes,Cluster_ko,
2364 			       H,iHNL,OLP[0],DM[0],EDM,Eele0,Eele1);
2365     }
2366     else{
2367 
2368       time5 += Cluster_DFT_ON2("force",LSCF_iter,SpinP_switch,Cluster_ReCoes,Cluster_ko,
2369 			       CntH,iCntHNL,CntOLP[0],DM[0],EDM,Eele0,Eele1);
2370     }
2371   }
2372 
2373   /****************************************************
2374                calculation of forces
2375   ****************************************************/
2376 
2377   if (!orbitalOpt_Force_Skip) time7 += Force(H0,DS_NL,OLP,DM[0],EDM);
2378 
2379   if (scf_stress_flag){
2380     Stress(H0,DS_NL,OLP,DM[0],EDM);
2381   }
2382 
2383   /*
2384   if (SpinP_switch==3) {
2385     Stress_NC(H0,DS_NL,OLP,DM[0],EDM);
2386   }
2387   else{
2388     Stress(H0,DS_NL,OLP,DM[0],EDM);
2389   }
2390   */
2391 
2392   /****************************************************
2393      set Pulay_SCF = Pulay_SCF_original in anycase
2394   ****************************************************/
2395 
2396   Pulay_SCF = Pulay_SCF_original;
2397 
2398   /****************************************************
2399      if the SCF iteration did not converge,
2400      set Scf_RestartFromFile = 0,
2401   ****************************************************/
2402 
2403   if (po==0 && Scf_RestartFromFile==1){
2404     Scf_RestartFromFile = 0;
2405   }
2406 
2407   /****************************************************
2408                calculate the total energy
2409   ****************************************************/
2410 
2411   if (MYID_MPI_COMM_WORLD==Host_ID && 0<level_stdout){
2412     printf("<MD=%2d>  Total Energy\n",MD_iter);fflush(stdout);
2413   }
2414 
2415   if (!orbitalOpt_Force_Skip) time8 += Total_Energy(MD_iter,DM[0],ECE);
2416 
2417   Uatom  = 0.0;
2418   Ucore  = ECE[0];
2419   UH0    = ECE[1];
2420   Ukin   = ECE[2];
2421   Una    = ECE[3];
2422   Unl    = ECE[4];
2423   UH1    = ECE[5];
2424   Uxc0   = ECE[6];
2425   Uxc1   = ECE[7];
2426   Uhub   = ECE[8];
2427   Ucs    = ECE[9];
2428   Uzs    = ECE[10];
2429   Uzo    = ECE[11];
2430   Uef    = ECE[12];
2431   UvdW   = ECE[13];
2432 
2433   Utot  = Ucore + UH0 + Ukin + Una + Unl + UH1
2434          + Uxc0 + Uxc1 + Uhub + Ucs + Uzs + Uzo + Uef + UvdW;
2435 
2436   /*---------- added by TOYODA 20/JAN/2010 */
2437   if (g_exx_switch) { Utot += g_exx_U[0] + g_exx_U[1]; }
2438   /*---------- until here */
2439 
2440   /* elapsed time */
2441   dtime(&TEtime);
2442   time0 = TEtime - TStime;
2443 
2444   if (MYID_MPI_COMM_WORLD==Host_ID && 0<level_stdout){
2445 
2446     printf("\n*******************************************************\n");
2447     printf("                Total Energy (Hartree) at MD =%2d        \n",MD_iter);
2448     printf("*******************************************************\n\n");
2449 
2450     printf("  Uele  = %20.12f\n\n",Uele);
2451     printf("  Ukin  = %20.12f\n",Ukin);
2452     printf("  UH0   = %20.12f\n",UH0);
2453     printf("  UH1   = %20.12f\n",UH1);
2454     printf("  Una   = %20.12f\n",Una);
2455     printf("  Unl   = %20.12f\n",Unl);
2456     printf("  Uxc0  = %20.12f\n",Uxc0);
2457     printf("  Uxc1  = %20.12f\n",Uxc1);
2458     /*---------- added by TOYODA 20/JAN/2010 */
2459     if (g_exx_switch) {
2460       printf("  Uexx0 = %20.12f\n",g_exx_U[0]);
2461       printf("  Uexx1 = %20.12f\n",g_exx_U[1]);
2462     }
2463     printf("  Ucore = %20.12f\n",Ucore);
2464     printf("  Uhub  = %20.12f\n",Uhub);
2465     printf("  Ucs   = %20.12f\n",Ucs);
2466     printf("  Uzs   = %20.12f\n",Uzs);
2467     printf("  Uzo   = %20.12f\n",Uzo);
2468     printf("  Uef   = %20.12f\n",Uef);
2469     printf("  UvdW  = %20.12f\n",UvdW);
2470     printf("  Utot  = %20.12f\n\n",Utot);
2471 
2472     printf("  Note:\n\n");
2473     /*---------- modified by TOYODA 20/JAN/2010 */
2474     if (g_exx_switch) {
2475       printf("  Utot = Ukin+UH0+UH1+Una+Unl+Uxc0+Uxc1+Uexx0+Uexx1");
2476       printf("+Ucore+Uhub+Ucs+Uzs+Uzo+Uef+UvdW\n\n");
2477     } else {
2478       printf("  Utot = Ukin+UH0+UH1+Una+Unl+Uxc0+Uxc1");
2479       printf("+Ucore+Uhub+Ucs+Uzs+Uzo+Uef+UvdW\n\n");
2480     }
2481     /*---------- until here */
2482     printf("  Uele:   band energy\n");
2483     printf("  Ukin:   kinetic energy\n");
2484     printf("  UH0:    electric part of screened Coulomb energy\n");
2485     printf("  UH1:    difference electron-electron Coulomb energy\n");
2486     printf("  Una:    neutral atom potential energy\n");
2487     printf("  Unl:    non-local potential energy\n");
2488     printf("  Uxc0:   exchange-correlation energy for alpha spin\n");
2489     printf("  Uxc1:   exchange-correlation energy for beta spin\n");
2490     printf("  Ucore:  core-core Coulomb energy\n");
2491     printf("  Uhub:   LDA+U energy\n");
2492     printf("  Ucs:    constraint energy for spin orientation\n");
2493     printf("  Uzs:    Zeeman term for spin magnetic moment\n");
2494     printf("  Uzo:    Zeeman term for orbital magnetic moment\n");
2495     printf("  Uef:    electric energy by electric field\n");
2496     printf("  UvdW:   semi-empirical vdW energy \n\n"); /* okuno */
2497     printf("  (see also PRB 72, 045121(2005) for the energy contributions)\n\n");
2498 
2499     printf("\n*******************************************************\n");
2500     printf("           Computational times (s) at MD =%2d            \n",MD_iter);
2501     printf("*******************************************************\n\n");
2502 
2503     printf("  DFT in total      = %10.5f\n\n",time0);
2504     printf("  Set_OLP_Kin       = %10.5f\n",time1);
2505     printf("  Set_Nonlocal      = %10.5f\n",time2);
2506     printf("  Set_ProExpn_VNA   = %10.5f\n",time12);
2507     printf("  Set_Hamiltonian   = %10.5f\n",time3);
2508     printf("  Poisson           = %10.5f\n",time4);
2509     printf("  diagonalization   = %10.5f\n",time5);
2510     printf("  Mixing_DM         = %10.5f\n",time6);
2511     printf("  Force             = %10.5f\n",time7);
2512     printf("  Total_Energy      = %10.5f\n",time8);
2513     printf("  Set_Aden_Grid     = %10.5f\n",time9);
2514     printf("  Set_Orbitals_Grid = %10.5f\n",time10);
2515     printf("  Set_Density_Grid  = %10.5f\n",time11);
2516     printf("  RestartFileDFT    = %10.5f\n",time13);
2517     printf("  Mulliken_Charge   = %10.5f\n",time14);
2518     printf("  FFT(2D)_Density   = %10.5f\n",time15);
2519 
2520     /*---------- added by TOYODA 18/FEB/2010 */
2521     if (g_exx_switch) { EXX_Time_Report(); }
2522     /*---------- until here */
2523   }
2524 
2525   outputfile1(2,MD_iter,0,0,SCF_iter,file_DFTSCF,ChemP_e0);
2526 
2527   /****************************************************
2528                      updating CompTime
2529   ****************************************************/
2530 
2531   /* The Sum of Atomic Energy */
2532   /* if (iter==1) Atomic_Energy();  */
2533   /* Output_Energies_Forces(fp_DFTSCF); */
2534 
2535   CompTime[myid0][5]  += time1;  /* Set_OLP_Kin       */
2536   CompTime[myid0][6]  += time2;  /* Set_Nonlocal      */
2537   CompTime[myid0][7]  += time3;  /* Set_Hamiltonian   */
2538   CompTime[myid0][8]  += time4;  /* Poisson           */
2539   CompTime[myid0][9]  += time5;  /* diagonalization   */
2540   CompTime[myid0][10] += time6;  /* Mixing_DM         */
2541   CompTime[myid0][11] += time7;  /* Force             */
2542   CompTime[myid0][12] += time8;  /* Total_Energy      */
2543   CompTime[myid0][13] += time9;  /* Set_Aden_Grid     */
2544   CompTime[myid0][14] += time10; /* Set_Orbitals_Grid */
2545   CompTime[myid0][15] += time11; /* Set_Density_Grid  */
2546   CompTime[myid0][16] += time12; /* Set_ProExpn_VNA   */
2547   CompTime[myid0][17] += time13; /* RestartFileDFT    */
2548   CompTime[myid0][18] += time14; /* Mulliken_Charge   */
2549   CompTime[myid0][19] += time15; /* FFT(2D)_Density   */
2550 
2551   /* added by Chi-Cheng for unfolding */
2552   /*****************************************************
2553          Calc. of unfolded weight at given k-points
2554   *****************************************************/
2555 
2556   if (unfold_electronic_band==1 && unfold_Nkpoint>0 && (Solver==2 || Solver==3) ) {
2557     if (Cnt_switch==0){
2558       Unfolding_Bands(unfold_Nkpoint, unfold_kpoint, SpinP_switch, H, iHNL, OLP[0]);
2559     }
2560     else {
2561       Unfolding_Bands(unfold_Nkpoint, unfold_kpoint, SpinP_switch, CntH, iCntHNL, CntOLP[0]);
2562     }
2563   }
2564   /* end for unfolding */
2565 
2566   /*********************************************************
2567    freeing of arrays
2568   *********************************************************/
2569 
2570   /*---------- added by TOYODA 06/Jan/2010 */
2571   if (g_exx_switch) {
2572     if (g_exx_switch_output_DM && Host_ID==myid1) {
2573       EXX_Output_DM(g_exx, g_exx_DM[0]);
2574     }
2575     EXX_on_OpenMX_Free();
2576   }
2577   /*---------- until here */
2578 
2579   free(ReVk);
2580   free(ImVk);
2581 
2582   if ( Mixing_switch==3 || Mixing_switch==4 ){
2583 
2584     if      (SpinP_switch==0)  spinmax = 1;
2585     else if (SpinP_switch==1)  spinmax = 2;
2586     else if (SpinP_switch==3)  spinmax = 3;
2587 
2588     free(ReRhoAtomk);
2589     free(ImRhoAtomk);
2590 
2591     for (m=0; m<List_YOUSO[38]; m++){
2592       for (spin=0; spin<spinmax; spin++){
2593 	free(ReRhok[m][spin]);
2594       }
2595       free(ReRhok[m]);
2596     }
2597     free(ReRhok);
2598 
2599     for (m=0; m<List_YOUSO[38]; m++){
2600       for (spin=0; spin<spinmax; spin++){
2601 	free(ImRhok[m][spin]);
2602       }
2603       free(ImRhok[m]);
2604     }
2605     free(ImRhok);
2606 
2607     for (spin=0; spin<spinmax; spin++){
2608       free(Residual_ReRhok[spin]);
2609     }
2610     free(Residual_ReRhok);
2611 
2612     for (spin=0; spin<spinmax; spin++){
2613       free(Residual_ImRhok[spin]);
2614     }
2615     free(Residual_ImRhok);
2616   }
2617 
2618   /*************************************************
2619       allocation of arrays for orbital optimization
2620   *************************************************/
2621 
2622   if (Cnt_switch==1){
2623 
2624     for (k=0; k<(orbitalOpt_History+1); k++){
2625       for (i=0; i<=Matomnum; i++){
2626 	for (j=0; j<List_YOUSO[7]; j++){
2627 	  free(His_CntCoes[k][i][j]);
2628 	}
2629 	free(His_CntCoes[k][i]);
2630       }
2631       free(His_CntCoes[k]);
2632     }
2633     free(His_CntCoes);
2634 
2635     for (k=0; k<(orbitalOpt_History+1); k++){
2636       for (i=0; i<=Matomnum; i++){
2637 	for (j=0; j<List_YOUSO[7]; j++){
2638 	  free(His_D_CntCoes[k][i][j]);
2639 	}
2640 	free(His_D_CntCoes[k][i]);
2641       }
2642       free(His_D_CntCoes[k]);
2643     }
2644     free(His_D_CntCoes);
2645 
2646     for (k=0; k<(orbitalOpt_History+1); k++){
2647       for (i=0; i<=SpeciesNum; i++){
2648 	for (j=0; j<List_YOUSO[7]; j++){
2649 	  free(His_CntCoes_Species[k][i][j]);
2650 	}
2651 	free(His_CntCoes_Species[k][i]);
2652       }
2653       free(His_CntCoes_Species[k]);
2654     }
2655     free(His_CntCoes_Species);
2656 
2657     for (k=0; k<(orbitalOpt_History+1); k++){
2658       for (i=0; i<=SpeciesNum; i++){
2659 	for (j=0; j<List_YOUSO[7]; j++){
2660 	  free(His_D_CntCoes_Species[k][i][j]);
2661 	}
2662 	free(His_D_CntCoes_Species[k][i]);
2663       }
2664       free(His_D_CntCoes_Species[k]);
2665     }
2666     free(His_D_CntCoes_Species);
2667 
2668     dim_H = 0;
2669     for (wan=0; wan<SpeciesNum; wan++){
2670       for (al=0; al<Spe_Total_CNO[wan]; al++){
2671 	for (p=0; p<Spe_Specified_Num[wan][al]; p++){
2672 	  dim_H++;
2673 	}
2674       }
2675     }
2676 
2677     for (i=0; i<(dim_H+2); i++){
2678       free(OrbOpt_Hessian[i]);
2679     }
2680     free(OrbOpt_Hessian);
2681 
2682     free(His_OrbOpt_Etot);
2683   }
2684 
2685   /* band and collinear calculation */
2686 
2687   if (Solver==3 && SpinP_switch<=1){
2688 
2689     n = 0;
2690     for (i=1; i<=atomnum; i++){
2691       wanA  = WhatSpecies[i];
2692       n += Spe_Total_CNO[wanA];
2693     }
2694     n2 = n + 2;
2695 
2696     free(MP);
2697     free(order_GA);
2698 
2699     free(My_NZeros);
2700     free(SP_NZeros);
2701     free(SP_Atoms);
2702 
2703     free(ko);
2704     free(koS);
2705 
2706     for (j=0; j<n+1; j++){
2707       free(H_Band_Col[j]);
2708     }
2709     free(H_Band_Col);
2710 
2711 #ifndef scalapack
2712 
2713     for (i=0; i<n+1; i++){
2714       free(S_Band[i]);
2715     }
2716     free(S_Band);
2717 
2718     for (j=0; j<n+1; j++){
2719       free(C_Band_Col[j]);
2720     }
2721     free(C_Band_Col);
2722 
2723     free(BLAS_S);
2724 
2725 #endif
2726 
2727 #ifdef scalapack
2728 
2729     free(Ss_Band_Col);
2730     free(Hs_Band_Col);
2731     free(Cs_Band_Col);
2732 
2733 #endif
2734 
2735     free(H1_Band_Col);
2736     free(S1_Band_Col);
2737     free(CDM1_Band_Col);
2738     free(EDM1_Band_Col);
2739 
2740     for (i=0; i<Kspace_grid1; i++) {
2741       for (j=0; j<Kspace_grid2; j++) {
2742 	free(k_op[i][j]);
2743       }
2744       free(k_op[i]);
2745     }
2746     free(k_op);
2747 
2748     free(T_KGrids1);
2749     free(T_KGrids2);
2750     free(T_KGrids3);
2751     free(T_k_op);
2752 
2753     for (i=0; i<2; i++){
2754       free(T_k_ID[i]);
2755     }
2756     free(T_k_ID);
2757 
2758     for (i=0; i<List_YOUSO[23]; i++){
2759       for (j=0; j<T_knum; j++){
2760 	free(EIGEN_Band_Col[i][j]);
2761       }
2762       free(EIGEN_Band_Col[i]);
2763     }
2764     free(EIGEN_Band_Col);
2765 
2766     /* freeing of arrays for the second world */
2767 
2768 #ifndef scalapack
2769 
2770     if (T_knum<=numprocs1){
2771 
2772       if (Num_Comm_World2<=numprocs1){
2773         MPI_Comm_free(&MPI_CommWD2[myworld2]);
2774       }
2775 
2776       free(MPI_CommWD2);
2777       free(Comm_World_StartID2);
2778       free(NPROCS_WD2);
2779       free(Comm_World2);
2780       free(NPROCS_ID2);
2781     }
2782 
2783 #endif
2784 
2785 #ifdef scalapack
2786 
2787     MPI_Comm_free(&MPI_CommWD2[myworld2]);
2788     free(MPI_CommWD2);
2789     free(Comm_World_StartID2);
2790     free(NPROCS_WD2);
2791     free(Comm_World2);
2792     free(NPROCS_ID2);
2793 
2794     Cfree_blacs_system_handle(bhandle2);
2795     Cblacs_gridexit(ictxt2);
2796 
2797 #endif
2798 
2799     /* freeing of arrays for the first world */
2800 
2801     if (Num_Comm_World1<=numprocs0){
2802       MPI_Comm_free(&MPI_CommWD1[myworld1]);
2803     }
2804 
2805     free(MPI_CommWD1);
2806     free(Comm_World_StartID1);
2807     free(NPROCS_WD1);
2808     free(Comm_World1);
2809     free(NPROCS_ID1);
2810   }
2811 
2812   /* band and non-collinear calculation */
2813 
2814   else if (Solver==3 && SpinP_switch==3){
2815 
2816     n = 0;
2817     for (i=1; i<=atomnum; i++){
2818       wanA = WhatSpecies[i];
2819       n += Spe_Total_CNO[wanA];
2820     }
2821     n2 = n + 2;
2822 
2823     free(koS);
2824 
2825     for (i=0; i<n+1; i++){
2826       free(S_Band[i]);
2827     }
2828     free(S_Band);
2829   }
2830 
2831   /* cluster */
2832 
2833   if (Solver==2){
2834 
2835     n = 0;
2836     for (i=1; i<=atomnum; i++){
2837       wanA  = WhatSpecies[i];
2838       n += Spe_Total_CNO[wanA];
2839     }
2840     n2 = n + 2;
2841 
2842     if ( SpinP_switch==0 || SpinP_switch==1 ){
2843       for (i=0; i<List_YOUSO[23]; i++){
2844 	for (j=0; j<n2; j++){
2845 	  free(Cluster_ReCoes[i][j]);
2846 	}
2847         free(Cluster_ReCoes[i]);
2848       }
2849       free(Cluster_ReCoes);
2850     }
2851 
2852     for (i=0; i<List_YOUSO[23]; i++){
2853       free(Cluster_ko[i]);
2854     }
2855     free(Cluster_ko);
2856 
2857 #ifdef scalapack
2858 
2859     /* freeing of arrays for the first world */
2860 
2861     if ( SpinP_switch==0 || SpinP_switch==1 ){
2862 
2863       if (Num_Comm_World1<=numprocs0){
2864 	MPI_Comm_free(&MPI_CommWD1[myworld1]);
2865       }
2866 
2867       free(NPROCS_ID1);
2868       free(Comm_World1);
2869       free(NPROCS_WD1);
2870       free(Comm_World_StartID1);
2871       free(MPI_CommWD1);
2872 
2873       free(Hs_Cluster);
2874       free(Ss_Cluster);
2875       free(Cs_Cluster);
2876 
2877       Cfree_blacs_system_handle(bhandle1);
2878       Cblacs_gridexit(ictxt1);
2879 
2880     }
2881 
2882 #endif
2883 
2884   }
2885 
2886   if (Solver==4)  {
2887     /* revised by Y. Xiao for Noncollinear NEGF calculations */
2888     if (SpinP_switch<2) {
2889       TRAN_Deallocate_Lead_Region();
2890       TRAN_Deallocate_Cregion( SpinP_switch );
2891     } else {
2892       TRAN_Deallocate_Lead_Region_NC();
2893       TRAN_Deallocate_Cregion_NC( SpinP_switch );
2894     }
2895     TRAN_Deallocate_Electrode_Grid(Ngrid2);
2896   } /* if (Solver==4) */
2897 
2898   /* freeing koS and S_Band */
2899   if (Solver==4 && SpinP_switch==3){
2900     n = 0;
2901     for (i=1; i<=atomnum; i++){
2902       wanA = WhatSpecies[i];
2903       n += Spe_Total_CNO[wanA];
2904     }
2905     n2 = n + 2;
2906 
2907     free(koS);
2908 
2909     for (i=0; i<n+1; i++){
2910       free(S_Band[i]);
2911     }
2912     free(S_Band);
2913   }
2914   /* until here by Y. Xiao for Noncollinear NEGF calculations */
2915 
2916   MPI_Barrier(MPI_COMM_WORLD1);
2917 
2918   return time0;
2919 }
2920 
2921 
2922 
2923 
Read_SCF_keywords()2924 void Read_SCF_keywords()
2925 {
2926   int po,myid;
2927   char fname[YOUSO10];
2928   FILE *fp;
2929 
2930   sprintf(fname,"%s%s_SCF_keywords",filepath,filename);
2931 
2932   if ((fp = fopen(fname,"r")) != NULL){
2933     po = 1;
2934   }
2935   else {
2936     po = 0;
2937   }
2938 
2939   if (po==1){
2940 
2941     MPI_Comm_rank(MPI_COMM_WORLD1,&myid);
2942 
2943     if (myid==Host_ID){
2944       printf("\n The keywords for SCF iteration are renewed by %s.\n",fname);fflush(stdout);
2945     }
2946 
2947     /* open the file */
2948 
2949     input_open(fname);
2950 
2951     /* scf.maxIter */
2952 
2953     input_int("scf.maxIter",&DFTSCF_loop,40);
2954     if (DFTSCF_loop<0) {
2955       MPI_Finalize();
2956       exit(0);
2957     }
2958 
2959     /* scf.Min.Mixing.Weight */
2960 
2961     input_double("scf.Min.Mixing.Weight",&Min_Mixing_weight,(double)0.001);
2962 
2963     /* scf.Max.Mixing.Weight */
2964 
2965     input_double("scf.Max.Mixing.Weight",&Max_Mixing_weight,(double)0.4);
2966 
2967     /* scf.Kerker.factor */
2968 
2969     input_double("scf.Kerker.factor",&Kerker_factor,(double)4.0);
2970 
2971     /* scf.Mixing.StartPulay */
2972 
2973     input_int("scf.Mixing.StartPulay",&Pulay_SCF,6);
2974 
2975     /* scf.criterion */
2976 
2977     input_double("scf.criterion",&SCF_Criterion,(double)1.0e-6);
2978 
2979     /* close the file */
2980 
2981     input_close();
2982   }
2983 
2984 }
2985 
2986 
2987 
2988 
2989 
2990 
2991 
2992 
Output_Energies_Forces(FILE * fp)2993 void Output_Energies_Forces(FILE *fp)
2994 {
2995   int ct_AN;
2996   double sumx,sumy,sumz;
2997   double AEx,AEy,AEz;
2998 
2999   fprintf(fp,"\n");
3000   fprintf(fp,"***********************************************************\n");
3001   fprintf(fp,"***********************************************************\n");
3002   fprintf(fp,"                     Energies (Hartree)                    \n");
3003   fprintf(fp,"***********************************************************\n");
3004   fprintf(fp,"***********************************************************\n");
3005   fprintf(fp,"    Uatom                         = %20.14f\n",Uatom);
3006   fprintf(fp,"    Uele for   up-spin (OS)       = %20.14f\n",Uele_OS0);
3007   fprintf(fp,"    Uele for   up-spin (IS)       = %20.14f\n",Uele_IS0);
3008   fprintf(fp,"    Uele for down-spin (OS)       = %20.14f\n",Uele_OS1);
3009   fprintf(fp,"    Uele for down-spin (IS)       = %20.14f\n",Uele_IS1);
3010   fprintf(fp,"    Uxc for up-spin               = %20.14f\n",Uxc0);
3011   fprintf(fp,"    Uxc for down-spin             = %20.14f\n",Uxc1);
3012   fprintf(fp,"    UH0                           = %20.14f\n",UH0);
3013   fprintf(fp,"    UH1                           = %20.14f\n",UH1);
3014   fprintf(fp,"    UH2                           = %20.14f\n",UH2);
3015   fprintf(fp,"    Ucore                         = %20.14f\n",Ucore);
3016   fprintf(fp,"    Udc  (Uxc+UH0+UH1+UH2+Ucore)  = %20.14f\n",Udc);
3017   fprintf(fp,"    Utot (Uele+Udc)               = %20.14f\n",Utot);
3018   fprintf(fp,"    Ucoh (Utot-Uatom)             = %20.14f\n",Ucoh);
3019 
3020   fprintf(fp,"\n");
3021   fprintf(fp,"***********************************************************\n");
3022   fprintf(fp,"***********************************************************\n");
3023   fprintf(fp,"                  Energies/atom (Hartree)                  \n");
3024   fprintf(fp,"***********************************************************\n");
3025   fprintf(fp,"***********************************************************\n");
3026   fprintf(fp,"    Uatom                         = %20.14f\n",Uatom/atomnum);
3027   fprintf(fp,"    Uele for   up-spin (OS)       = %20.14f\n",Uele_OS0/atomnum);
3028   fprintf(fp,"    Uele for   up-spin (IS)       = %20.14f\n",Uele_IS0/atomnum);
3029   fprintf(fp,"    Uele for down-spin (OS)       = %20.14f\n",Uele_OS1/atomnum);
3030   fprintf(fp,"    Uele for down-spin (IS)       = %20.14f\n",Uele_IS1/atomnum);
3031   fprintf(fp,"    Uxc for up-spin               = %20.14f\n",Uxc0/atomnum);
3032   fprintf(fp,"    Uxc for down-spin             = %20.14f\n",Uxc1/atomnum);
3033   fprintf(fp,"    UH0                           = %20.14f\n",UH0/atomnum);
3034   fprintf(fp,"    UH1                           = %20.14f\n",UH1/atomnum);
3035   fprintf(fp,"    UH2                           = %20.14f\n",UH2/atomnum);
3036   fprintf(fp,"    Ucore                         = %20.14f\n",Ucore/atomnum);
3037   fprintf(fp,"    Udc  (Uxc+UH0+UH1+UH2+Ucore)  = %20.14f\n",Udc/atomnum);
3038   fprintf(fp,"    Utot (Uele+Udc)               = %20.14f\n",Utot/atomnum);
3039   fprintf(fp,"    Ucoh (Utot-Uatom)             = %20.14f\n",Ucoh/atomnum);
3040 
3041   fprintf(fp,"\n");
3042   fprintf(fp,"***********************************************************\n");
3043   fprintf(fp,"***********************************************************\n");
3044   fprintf(fp,"               Force on Atom (Hartree/bohr)                \n");
3045   fprintf(fp,"***********************************************************\n");
3046   fprintf(fp,"***********************************************************\n");
3047   fprintf(fp,"\n");
3048 
3049   fprintf(fp,"                   Fx            Fy            Fz\n");
3050 
3051   sumx = 0.0;
3052   sumy = 0.0;
3053   sumz = 0.0;
3054 
3055   for (ct_AN=1; ct_AN<=atomnum; ct_AN++){
3056     fprintf(fp,"   %i %i        %12.9f  %12.9f  %12.9f\n",
3057             ct_AN,WhatSpecies[ct_AN],
3058             -Gxyz[ct_AN][17],-Gxyz[ct_AN][18],-Gxyz[ct_AN][19]);
3059 
3060     sumx = sumx - Gxyz[ct_AN][17];
3061     sumy = sumy - Gxyz[ct_AN][18];
3062     sumz = sumz - Gxyz[ct_AN][19];
3063   }
3064   fprintf(fp,"\n");
3065   fprintf(fp,"   Sum of F   %12.9f  %12.9f  %12.9f\n",sumx,sumy,sumz);
3066 
3067   /****************************************************
3068                    Correction of Force
3069   ****************************************************/
3070 
3071   AEx = sumx/(double)atomnum;
3072   AEy = sumy/(double)atomnum;
3073   AEz = sumz/(double)atomnum;
3074 
3075   fprintf(fp,"\n");
3076   fprintf(fp,"***********************************************************\n");
3077   fprintf(fp,"***********************************************************\n");
3078   fprintf(fp,"            Corrected Force on Atom (Hartree/bohr)         \n");
3079   fprintf(fp,"***********************************************************\n");
3080   fprintf(fp,"***********************************************************\n");
3081   fprintf(fp,"\n");
3082 
3083   fprintf(fp,"                   Fx            Fy            Fz\n");
3084 
3085   sumx = 0.0;
3086   sumy = 0.0;
3087   sumz = 0.0;
3088 
3089   for (ct_AN=1; ct_AN<=atomnum; ct_AN++){
3090     Gxyz[ct_AN][17] = Gxyz[ct_AN][17] + AEx;
3091     Gxyz[ct_AN][18] = Gxyz[ct_AN][18] + AEy;
3092     Gxyz[ct_AN][19] = Gxyz[ct_AN][19] + AEz;
3093 
3094     fprintf(fp,"   %i %i        %12.9f  %12.9f  %12.9f\n",
3095             ct_AN,WhatSpecies[ct_AN],
3096             -Gxyz[ct_AN][17],-Gxyz[ct_AN][18],-Gxyz[ct_AN][19]);
3097 
3098     sumx = sumx - Gxyz[ct_AN][17];
3099     sumy = sumy - Gxyz[ct_AN][18];
3100     sumz = sumz - Gxyz[ct_AN][19];
3101   }
3102   fprintf(fp,"\n");
3103   fprintf(fp,"   Sum of F   %12.9f  %12.9f  %12.9f\n",sumx,sumy,sumz);
3104 
3105 }
3106