1 /**********************************************************************
2   Divide_Conquer.c:
3 
4      Divide_Conquer.c is a subroutine to perform a divide and conquer
5      method for eigenvalue problem
6 
7   Log of Divide_Conquer.c:
8 
9      10/Dec/2003  Released by T.Ozaki
10 
11 ***********************************************************************/
12 
13 #include <stdio.h>
14 #include <stdlib.h>
15 #include <string.h>
16 #include <math.h>
17 #include <time.h>
18 #include "openmx_common.h"
19 #include "mpi.h"
20 #include <omp.h>
21 
22 #define  measure_time   0
23 
24 
25 
26 static double DC_Col(char *mode,
27 		     int SCF_iter,
28 		     double *****Hks, double ****OLP0,
29 		     double *****CDM,
30 		     double *****EDM,
31 		     double Eele0[2], double Eele1[2]);
32 
33 static double DC_NonCol(char *mode,
34 			int SCF_iter,
35 			double *****Hks,
36 			double *****ImNL,
37 			double ****OLP0,
38 			double *****CDM,
39 			double *****EDM,
40 			double Eele0[2],
41 			double Eele1[2]);
42 
43 static void Save_DOS_Col(double ******Residues, double ****OLP0, double ***EVal, int *Msize);
44 static void Save_DOS_NonCol(dcomplex ******Residues, double ****OLP0, double **EVal, int *Msize);
45 
46 
Divide_Conquer(char * mode,int SCF_iter,double ***** Hks,double ***** ImNL,double **** OLP0,double ***** CDM,double ***** EDM,double Eele0[2],double Eele1[2])47 double Divide_Conquer(char *mode,
48                       int SCF_iter,
49                       double *****Hks,
50                       double *****ImNL,
51                       double ****OLP0,
52                       double *****CDM,
53                       double *****EDM,
54                       double Eele0[2], double Eele1[2])
55 {
56   double time0;
57 
58   /****************************************************
59          collinear without spin-orbit coupling
60   ****************************************************/
61 
62   if ( (SpinP_switch==0 || SpinP_switch==1) && SO_switch==0 ){
63     time0 = DC_Col(mode,SCF_iter, Hks, OLP0, CDM, EDM, Eele0, Eele1);
64   }
65 
66   /****************************************************
67          collinear with spin-orbit coupling
68   ****************************************************/
69 
70   else if ( (SpinP_switch==0 || SpinP_switch==1) && SO_switch==1 ){
71     printf("Spin-orbit coupling is not supported for collinear DFT calculations.\n");
72     MPI_Finalize();
73     exit(1);
74   }
75 
76   /****************************************************
77    non-collinear with and without spin-orbit coupling
78   ****************************************************/
79 
80   else if (SpinP_switch==3){
81     time0 = DC_NonCol(mode,SCF_iter, Hks, ImNL, OLP0, CDM, EDM, Eele0, Eele1);
82   }
83 
84   return time0;
85 }
86 
87 
88 
89 
90 
91 
92 
DC_Col(char * mode,int SCF_iter,double ***** Hks,double **** OLP0,double ***** CDM,double ***** EDM,double Eele0[2],double Eele1[2])93 static double DC_Col(char *mode,
94 		     int SCF_iter,
95 		     double *****Hks, double ****OLP0,
96 		     double *****CDM,
97 		     double *****EDM,
98 		     double Eele0[2], double Eele1[2])
99 {
100   static int firsttime=1;
101   int Mc_AN,Gc_AN,i,Gi,wan,wanA,wanB,Anum;
102   int size1,size2,num,NUM,NUM1,n2,Cwan,Hwan;
103   int ih,ig,ian,j,kl,jg,jan,Bnum,m,n,spin;
104   int l,i1,j1,P_min,m_size;
105   int po,loopN,tno1,tno2,h_AN,Gh_AN;
106   int MA_AN,GA_AN,GB_AN,tnoA,tnoB,k;
107   double My_TZ,TZ,sum,FermiF,time0;
108   double tmp1,tmp2;
109   double My_Num_State,Num_State,x,Dnum;
110   double TStime,TEtime;
111   double My_Eele0[2],My_Eele1[2];
112   double max_x=30.0;
113   double ChemP_MAX,ChemP_MIN,spin_degeneracy;
114   double **S_DC,***H_DC,*ko,*M1;
115   double **C;
116   double ***EVal;
117   double ******Residues;
118   double ***PDOS_DC;
119   int *MP,*Msize;
120   double *tmp_array;
121   double *tmp_array2;
122   int *Snd_H_Size,*Rcv_H_Size;
123   int *Snd_S_Size,*Rcv_S_Size;
124   int numprocs,myid,ID,IDS,IDR,tag=999;
125   double Stime_atom, Etime_atom;
126   double OLP_eigen_cut=Threshold_OLP_Eigen;
127   double stime, etime;
128   double time1,time2,time3,time4,time5,time6;
129 
130   double sum1,sum2,sum3,sum4;
131   int i1s, j1s;
132 
133   MPI_Status stat;
134   MPI_Request request;
135 
136   /* for OpenMP */
137   int OMPID,Nthrds,Nprocs;
138 
139   /* MPI */
140   MPI_Comm_size(mpi_comm_level1,&numprocs);
141   MPI_Comm_rank(mpi_comm_level1,&myid);
142 
143   dtime(&TStime);
144 
145   if (measure_time){
146     time1 = 0.0;
147     time2 = 0.0;
148     time3 = 0.0;
149     time4 = 0.0;
150     time5 = 0.0;
151     time6 = 0.0;
152   }
153 
154   /****************************************************
155     allocation of arrays:
156 
157     int MP[List_YOUSO[2]];
158     int Msize[Matomnum+1];
159     double EVal[SpinP_switch+1][Matomnum+1][n2];
160   ****************************************************/
161 
162   Snd_H_Size = (int*)malloc(sizeof(int)*numprocs);
163   Rcv_H_Size = (int*)malloc(sizeof(int)*numprocs);
164   Snd_S_Size = (int*)malloc(sizeof(int)*numprocs);
165   Rcv_S_Size = (int*)malloc(sizeof(int)*numprocs);
166 
167   m_size = 0;
168   Msize = (int*)malloc(sizeof(int)*(Matomnum+1));
169 
170   EVal = (double***)malloc(sizeof(double**)*(SpinP_switch+1));
171   for (spin=0; spin<=SpinP_switch; spin++){
172     EVal[spin] = (double**)malloc(sizeof(double*)*(Matomnum+1));
173 
174 
175     for (Mc_AN=0; Mc_AN<=Matomnum; Mc_AN++){
176 
177       if (Mc_AN==0){
178         Gc_AN = 0;
179         FNAN[0] = 1;
180         SNAN[0] = 0;
181         n2 = 1;
182         Msize[Mc_AN] = 1;
183       }
184       else{
185 
186         Gc_AN = M2G[Mc_AN];
187         Anum = 1;
188         for (i=0; i<=(FNAN[Gc_AN]+SNAN[Gc_AN]); i++){
189           Gi = natn[Gc_AN][i];
190           wanA = WhatSpecies[Gi];
191           Anum += Spe_Total_CNO[wanA];
192         }
193         NUM = Anum - 1;
194         Msize[Mc_AN] = NUM;
195         n2 = NUM + 3;
196       }
197 
198       m_size += n2;
199       EVal[spin][Mc_AN] = (double*)malloc(sizeof(double)*n2);
200 
201     }
202   }
203 
204   if (firsttime)
205   PrintMemory("Divide_Conquer: EVal",sizeof(double)*m_size,NULL);
206 
207   if (2<=level_stdout){
208     for (Mc_AN=1; Mc_AN<=Matomnum; Mc_AN++){
209         printf("<DC> myid=%i Mc_AN=%2d Gc_AN=%2d Msize=%3d\n",
210         myid,Mc_AN,M2G[Mc_AN],Msize[Mc_AN]);
211     }
212   }
213 
214   /****************************************************
215     allocation of arrays:
216 
217     double Residues[SpinP_switch+1]
218                    [Matomnum+1]
219                    [FNAN[Gc_AN]+1]
220                    [Spe_Total_CNO[Gc_AN]]
221                    [Spe_Total_CNO[Gh_AN]]
222                    [NUM2]
223      To reduce the memory size, the size of NUM2 is
224      needed to be found in the loop.
225   ****************************************************/
226 
227   m_size = 0;
228   Residues = (double******)malloc(sizeof(double*****)*(SpinP_switch+1));
229   for (spin=0; spin<=SpinP_switch; spin++){
230     Residues[spin] = (double*****)malloc(sizeof(double****)*(Matomnum+1));
231     for (Mc_AN=0; Mc_AN<=Matomnum; Mc_AN++){
232 
233       if (Mc_AN==0){
234         Gc_AN = 0;
235         FNAN[0] = 1;
236         tno1 = 1;
237         n2 = 1;
238       }
239       else{
240         Gc_AN = M2G[Mc_AN];
241         wanA = WhatSpecies[Gc_AN];
242         tno1 = Spe_Total_CNO[wanA];
243         n2 = Msize[Mc_AN] + 2;
244       }
245 
246       Residues[spin][Mc_AN] =
247            (double****)malloc(sizeof(double***)*(FNAN[Gc_AN]+1));
248 
249       for (h_AN=0; h_AN<=FNAN[Gc_AN]; h_AN++){
250 
251         if (Mc_AN==0){
252           tno2 = 1;
253         }
254         else {
255           Gh_AN = natn[Gc_AN][h_AN];
256           wanB = WhatSpecies[Gh_AN];
257           tno2 = Spe_Total_CNO[wanB];
258         }
259 
260         Residues[spin][Mc_AN][h_AN] = (double***)malloc(sizeof(double**)*tno1);
261         for (i=0; i<tno1; i++){
262           Residues[spin][Mc_AN][h_AN][i] = (double**)malloc(sizeof(double*)*tno2);
263           for (j=0; j<tno2; j++){
264             Residues[spin][Mc_AN][h_AN][i][j] = (double*)malloc(sizeof(double)*n2);
265 	  }
266         }
267 
268         m_size += tno1*tno2*n2;
269       }
270     }
271   }
272 
273   if (firsttime)
274   PrintMemory("Divide_Conquer: Residues",sizeof(double)*m_size,NULL);
275 
276   /****************************************************
277     allocation of arrays:
278 
279     double PDOS[SpinP_switch+1]
280                [Matomnum+1]
281                [NUM]
282   ****************************************************/
283 
284   m_size = 0;
285   PDOS_DC = (double***)malloc(sizeof(double**)*(SpinP_switch+1));
286   for (spin=0; spin<=SpinP_switch; spin++){
287     PDOS_DC[spin] = (double**)malloc(sizeof(double*)*(Matomnum+1));
288     for (Mc_AN=0; Mc_AN<=Matomnum; Mc_AN++){
289 
290       if (Mc_AN==0)  n2 = 1;
291       else           n2 = Msize[Mc_AN] + 2;
292 
293       m_size += n2;
294       PDOS_DC[spin][Mc_AN] = (double*)malloc(sizeof(double)*n2);
295     }
296   }
297 
298   if (firsttime)
299   PrintMemory("Divide_Conquer: PDOS_DC",sizeof(double)*m_size,NULL);
300 
301   /****************************************************
302    MPI
303 
304    Hks
305   ****************************************************/
306 
307   /***********************************
308              set data size
309   ************************************/
310 
311   for (ID=0; ID<numprocs; ID++){
312 
313     IDS = (myid + ID) % numprocs;
314     IDR = (myid - ID + numprocs) % numprocs;
315 
316     if (ID!=0){
317       tag = 999;
318 
319       /* find data size to send block data */
320       if ((F_Snd_Num[IDS]+S_Snd_Num[IDS])!=0){
321 
322         size1 = 0;
323         for (spin=0; spin<=SpinP_switch; spin++){
324           for (n=0; n<(F_Snd_Num[IDS]+S_Snd_Num[IDS]); n++){
325             Mc_AN = Snd_MAN[IDS][n];
326             Gc_AN = Snd_GAN[IDS][n];
327             Cwan = WhatSpecies[Gc_AN];
328             tno1 = Spe_Total_CNO[Cwan];
329             for (h_AN=0; h_AN<=FNAN[Gc_AN]; h_AN++){
330               Gh_AN = natn[Gc_AN][h_AN];
331               Hwan = WhatSpecies[Gh_AN];
332               tno2 = Spe_Total_CNO[Hwan];
333               size1 += tno1*tno2;
334 	    }
335           }
336 	}
337 
338         Snd_H_Size[IDS] = size1;
339         MPI_Isend(&size1, 1, MPI_INT, IDS, tag, mpi_comm_level1, &request);
340       }
341       else{
342         Snd_H_Size[IDS] = 0;
343       }
344 
345       /* receiving of size of data */
346 
347       if ((F_Rcv_Num[IDR]+S_Rcv_Num[IDR])!=0){
348 
349         MPI_Recv(&size2, 1, MPI_INT, IDR, tag, mpi_comm_level1, &stat);
350         Rcv_H_Size[IDR] = size2;
351       }
352       else{
353         Rcv_H_Size[IDR] = 0;
354       }
355 
356       if ((F_Snd_Num[IDS]+S_Snd_Num[IDS])!=0) MPI_Wait(&request,&stat);
357     }
358     else{
359       Snd_H_Size[IDS] = 0;
360       Rcv_H_Size[IDR] = 0;
361     }
362   }
363 
364   /***********************************
365              data transfer
366   ************************************/
367 
368   tag = 999;
369   for (ID=0; ID<numprocs; ID++){
370 
371     IDS = (myid + ID) % numprocs;
372     IDR = (myid - ID + numprocs) % numprocs;
373 
374     if (ID!=0){
375 
376       /*****************************
377               sending of data
378       *****************************/
379 
380       if ((F_Snd_Num[IDS]+S_Snd_Num[IDS])!=0){
381 
382         size1 = Snd_H_Size[IDS];
383 
384         /* allocation of array */
385 
386         tmp_array = (double*)malloc(sizeof(double)*size1);
387 
388         /* multidimentional array to vector array */
389 
390         num = 0;
391         for (spin=0; spin<=SpinP_switch; spin++){
392           for (n=0; n<(F_Snd_Num[IDS]+S_Snd_Num[IDS]); n++){
393             Mc_AN = Snd_MAN[IDS][n];
394             Gc_AN = Snd_GAN[IDS][n];
395             Cwan = WhatSpecies[Gc_AN];
396             tno1 = Spe_Total_CNO[Cwan];
397             for (h_AN=0; h_AN<=FNAN[Gc_AN]; h_AN++){
398               Gh_AN = natn[Gc_AN][h_AN];
399               Hwan = WhatSpecies[Gh_AN];
400               tno2 = Spe_Total_CNO[Hwan];
401               for (i=0; i<tno1; i++){
402                 for (j=0; j<tno2; j++){
403                   tmp_array[num] = Hks[spin][Mc_AN][h_AN][i][j];
404                   num++;
405                 }
406               }
407 	    }
408           }
409 	}
410 
411         MPI_Isend(&tmp_array[0], size1, MPI_DOUBLE, IDS, tag, mpi_comm_level1, &request);
412 
413       }
414 
415       /*****************************
416          receiving of block data
417       *****************************/
418 
419       if ((F_Rcv_Num[IDR]+S_Rcv_Num[IDR])!=0){
420 
421         size2 = Rcv_H_Size[IDR];
422 
423         /* allocation of array */
424         tmp_array2 = (double*)malloc(sizeof(double)*size2);
425 
426         MPI_Recv(&tmp_array2[0], size2, MPI_DOUBLE, IDR, tag, mpi_comm_level1, &stat);
427 
428         num = 0;
429         for (spin=0; spin<=SpinP_switch; spin++){
430           Mc_AN = S_TopMAN[IDR] - 1;  /* S_TopMAN should be used. */
431           for (n=0; n<(F_Rcv_Num[IDR]+S_Rcv_Num[IDR]); n++){
432             Mc_AN++;
433             Gc_AN = Rcv_GAN[IDR][n];
434             Cwan = WhatSpecies[Gc_AN];
435             tno1 = Spe_Total_CNO[Cwan];
436 
437             for (h_AN=0; h_AN<=FNAN[Gc_AN]; h_AN++){
438               Gh_AN = natn[Gc_AN][h_AN];
439               Hwan = WhatSpecies[Gh_AN];
440               tno2 = Spe_Total_CNO[Hwan];
441               for (i=0; i<tno1; i++){
442                 for (j=0; j<tno2; j++){
443                   Hks[spin][Mc_AN][h_AN][i][j] = tmp_array2[num];
444                   num++;
445 		}
446 	      }
447 	    }
448 	  }
449 	}
450 
451         /* freeing of array */
452         free(tmp_array2);
453       }
454 
455       if ((F_Snd_Num[IDS]+S_Snd_Num[IDS])!=0){
456         MPI_Wait(&request,&stat);
457         free(tmp_array); /* freeing of array */
458       }
459     }
460   }
461 
462   /****************************************************
463    MPI
464 
465    OLP0
466   ****************************************************/
467 
468   /***********************************
469              set data size
470   ************************************/
471 
472   if (SCF_iter<=2){
473 
474     for (ID=0; ID<numprocs; ID++){
475 
476       IDS = (myid + ID) % numprocs;
477       IDR = (myid - ID + numprocs) % numprocs;
478 
479       if (ID!=0){
480         tag = 999;
481 
482         /* find data size to send block data */
483         if ((F_Snd_Num[IDS]+S_Snd_Num[IDS])!=0){
484           size1 = 0;
485           for (n=0; n<(F_Snd_Num[IDS]+S_Snd_Num[IDS]); n++){
486             Mc_AN = Snd_MAN[IDS][n];
487             Gc_AN = Snd_GAN[IDS][n];
488             Cwan = WhatSpecies[Gc_AN];
489             tno1 = Spe_Total_CNO[Cwan];
490             for (h_AN=0; h_AN<=FNAN[Gc_AN]; h_AN++){
491               Gh_AN = natn[Gc_AN][h_AN];
492               Hwan = WhatSpecies[Gh_AN];
493               tno2 = Spe_Total_CNO[Hwan];
494               for (i=0; i<tno1; i++){
495                 for (j=0; j<tno2; j++){
496                   size1++;
497                 }
498               }
499 	    }
500           }
501 
502           Snd_S_Size[IDS] = size1;
503           MPI_Isend(&size1, 1, MPI_INT, IDS, tag, mpi_comm_level1, &request);
504         }
505         else{
506           Snd_S_Size[IDS] = 0;
507         }
508 
509         /* receiving of size of data */
510 
511         if ((F_Rcv_Num[IDR]+S_Rcv_Num[IDR])!=0){
512           MPI_Recv(&size2, 1, MPI_INT, IDR, tag, mpi_comm_level1, &stat);
513           Rcv_S_Size[IDR] = size2;
514         }
515         else{
516           Rcv_S_Size[IDR] = 0;
517         }
518 
519         if ((F_Snd_Num[IDS]+S_Snd_Num[IDS])!=0) MPI_Wait(&request,&stat);
520       }
521       else{
522         Snd_S_Size[IDS] = 0;
523         Rcv_S_Size[IDR] = 0;
524       }
525     }
526 
527     /***********************************
528                data transfer
529     ************************************/
530 
531     tag = 999;
532     for (ID=0; ID<numprocs; ID++){
533 
534       IDS = (myid + ID) % numprocs;
535       IDR = (myid - ID + numprocs) % numprocs;
536 
537       if (ID!=0){
538 
539         /*****************************
540                 sending of data
541         *****************************/
542 
543         if ((F_Snd_Num[IDS]+S_Snd_Num[IDS])!=0){
544 
545           size1 = Snd_S_Size[IDS];
546 
547           /* allocation of array */
548 
549           tmp_array = (double*)malloc(sizeof(double)*size1);
550 
551           /* multidimentional array to vector array */
552 
553           num = 0;
554 
555           for (n=0; n<(F_Snd_Num[IDS]+S_Snd_Num[IDS]); n++){
556             Mc_AN = Snd_MAN[IDS][n];
557             Gc_AN = Snd_GAN[IDS][n];
558             Cwan = WhatSpecies[Gc_AN];
559             tno1 = Spe_Total_CNO[Cwan];
560             for (h_AN=0; h_AN<=FNAN[Gc_AN]; h_AN++){
561               Gh_AN = natn[Gc_AN][h_AN];
562               Hwan = WhatSpecies[Gh_AN];
563               tno2 = Spe_Total_CNO[Hwan];
564               for (i=0; i<tno1; i++){
565                 for (j=0; j<tno2; j++){
566                   tmp_array[num] = OLP0[Mc_AN][h_AN][i][j];
567                   num++;
568                 }
569               }
570 	    }
571           }
572 
573           MPI_Isend(&tmp_array[0], size1, MPI_DOUBLE, IDS, tag, mpi_comm_level1, &request);
574         }
575 
576         /*****************************
577            receiving of block data
578         *****************************/
579 
580         if ((F_Rcv_Num[IDR]+S_Rcv_Num[IDR])!=0){
581 
582           size2 = Rcv_S_Size[IDR];
583 
584          /* allocation of array */
585           tmp_array2 = (double*)malloc(sizeof(double)*size2);
586 
587           MPI_Recv(&tmp_array2[0], size2, MPI_DOUBLE, IDR, tag, mpi_comm_level1, &stat);
588 
589           num = 0;
590           Mc_AN = S_TopMAN[IDR] - 1; /* S_TopMAN should be used. */
591           for (n=0; n<(F_Rcv_Num[IDR]+S_Rcv_Num[IDR]); n++){
592             Mc_AN++;
593             Gc_AN = Rcv_GAN[IDR][n];
594             Cwan = WhatSpecies[Gc_AN];
595             tno1 = Spe_Total_CNO[Cwan];
596 
597             for (h_AN=0; h_AN<=FNAN[Gc_AN]; h_AN++){
598               Gh_AN = natn[Gc_AN][h_AN];
599               Hwan = WhatSpecies[Gh_AN];
600               tno2 = Spe_Total_CNO[Hwan];
601               for (i=0; i<tno1; i++){
602                 for (j=0; j<tno2; j++){
603                   OLP0[Mc_AN][h_AN][i][j] = tmp_array2[num];
604                   num++;
605    	        }
606 	      }
607 	    }
608 	  }
609 
610           /* freeing of array */
611           free(tmp_array2);
612 
613         }
614 
615         if ((F_Snd_Num[IDS]+S_Snd_Num[IDS])!=0){
616           MPI_Wait(&request,&stat);
617           free(tmp_array); /* freeing of array */
618 	}
619       }
620     }
621   }
622 
623   /****************************************************
624             find the total number of electrons
625   ****************************************************/
626 
627   My_TZ = 0.0;
628   for (i=1; i<=Matomnum; i++){
629     Gc_AN = M2G[i];
630     wan = WhatSpecies[Gc_AN];
631     My_TZ = My_TZ + Spe_Core_Charge[wan];
632   }
633 
634   /* MPI, My_TZ */
635 
636   MPI_Barrier(mpi_comm_level1);
637   MPI_Allreduce(&My_TZ, &TZ, 1, MPI_DOUBLE, MPI_SUM, mpi_comm_level1);
638 
639   /****************************************************
640       Setting of Hamiltonian and overlap matrices
641 
642          MP indicates the starting position of
643               atom i in arraies H and S
644   ****************************************************/
645 
646 #pragma omp parallel shared(OLP_eigen_cut,List_YOUSO,Etime_atom,time_per_atom,time3,Residues,EVal,time2,time1,S12,level_stdout,SpinP_switch,Hks,OLP0,SCF_iter,RMI1,S_G2M,Spe_Total_CNO,natn,FNAN,SNAN,WhatSpecies,M2G,Matomnum) private(OMPID,Nthrds,Nprocs,Mc_AN,Stime_atom,Gc_AN,wan,Anum,i,j,MP,Gi,wanA,NUM,NUM1,n2,spin,S_DC,H_DC,ko,M1,C,ig,ian,ih,kl,jg,jan,Bnum,m,n,stime,P_min,l,i1,j1,etime,tmp1,tmp2,sum1,sum2,sum3,sum4,j1s,sum,tno1,h_AN,Gh_AN,wanB,tno2)
647   {
648 
649     /* get info. on OpenMP */
650 
651     OMPID = omp_get_thread_num();
652     Nthrds = omp_get_num_threads();
653     Nprocs = omp_get_num_procs();
654 
655     /* allocation of arrays */
656 
657     MP = (int*)malloc(sizeof(int)*List_YOUSO[2]);
658 
659     /* start of the Mc_AN loop which is parallelized by OpenMP */
660 
661     for (Mc_AN=1+OMPID; Mc_AN<=Matomnum; Mc_AN+=Nthrds){
662 
663       dtime(&Stime_atom);
664 
665       Gc_AN = M2G[Mc_AN];
666       wan = WhatSpecies[Gc_AN];
667 
668       /***********************************************
669        find the size of matrix for the atom Mc_AN
670                  and set the MP vector
671 
672         Note:
673          MP indicates the starting position of
674               atom i in arraies H and S
675       ***********************************************/
676 
677       Anum = 1;
678       for (i=0; i<=(FNAN[Gc_AN]+SNAN[Gc_AN]); i++){
679 	MP[i] = Anum;
680 	Gi = natn[Gc_AN][i];
681 	wanA = WhatSpecies[Gi];
682 	Anum += Spe_Total_CNO[wanA];
683       }
684       NUM = Anum - 1;
685       n2 = NUM + 3;
686 
687       /***********************************************
688        allocation of arrays:
689 
690        double S_DC[n2][n2];
691        double H_DC[SpinP_switch+1][n2][n2];
692        double ko[n2];
693       ***********************************************/
694 
695       S_DC = (double**)malloc(sizeof(double*)*n2);
696       for (i=0; i<n2; i++){
697 	S_DC[i] = (double*)malloc(sizeof(double)*n2);
698       }
699 
700       H_DC = (double***)malloc(sizeof(double**)*(SpinP_switch+1));
701       for (spin=0; spin<=SpinP_switch; spin++){
702 	H_DC[spin] = (double**)malloc(sizeof(double*)*n2);
703 	for (i=0; i<n2; i++){
704 	  H_DC[spin][i] = (double*)malloc(sizeof(double)*n2);
705 	}
706       }
707 
708       ko = (double*)malloc(sizeof(double)*n2);
709       M1 = (double*)malloc(sizeof(double)*n2);
710 
711       C = (double**)malloc(sizeof(double*)*n2);
712       for (i=0; i<n2; i++){
713 	C[i] = (double*)malloc(sizeof(double)*n2);
714       }
715 
716       /***********************************************
717        construct cluster full matrices of Hamiltonian
718              and overlap for the atom Mc_AN
719       ***********************************************/
720 
721       for (i=0; i<=(FNAN[Gc_AN]+SNAN[Gc_AN]); i++){
722 	ig = natn[Gc_AN][i];
723 	ian = Spe_Total_CNO[WhatSpecies[ig]];
724 	Anum = MP[i];
725 	ih = S_G2M[ig];
726 
727 	for (j=0; j<=(FNAN[Gc_AN]+SNAN[Gc_AN]); j++){
728 
729 	  kl = RMI1[Mc_AN][i][j];
730 	  jg = natn[Gc_AN][j];
731 	  jan = Spe_Total_CNO[WhatSpecies[jg]];
732 	  Bnum = MP[j];
733 
734 	  if (0<=kl){
735 
736 	    if (SCF_iter<=2){
737 	      for (m=0; m<ian; m++){
738 		for (n=0; n<jan; n++){
739 		  S_DC[Anum+m][Bnum+n] = OLP0[ih][kl][m][n];
740 		}
741 	      }
742 	    }
743 
744 	    for (spin=0; spin<=SpinP_switch; spin++){
745 	      for (m=0; m<ian; m++){
746 		for (n=0; n<jan; n++){
747 		  H_DC[spin][Anum+m][Bnum+n] = Hks[spin][ih][kl][m][n];
748 		}
749 	      }
750 	    }
751 	  }
752 
753 	  else{
754 
755 	    if (SCF_iter<=2){
756 	      for (m=0; m<ian; m++){
757 		for (n=0; n<jan; n++){
758 		  S_DC[Anum+m][Bnum+n] = 0.0;
759 		}
760 	      }
761 	    }
762 
763 	    for (spin=0; spin<=SpinP_switch; spin++){
764 	      for (m=0; m<ian; m++){
765 		for (n=0; n<jan; n++){
766 		  H_DC[spin][Anum+m][Bnum+n] = 0.0;
767 		}
768 	      }
769 	    }
770 	  }
771 	}
772       }
773 
774       /****************************************************
775      Solve the generalized eigenvalue problem
776      HC = SCE
777 
778      1) diagonalize S
779      2) search negative eigenvalues of S
780       ****************************************************/
781 
782       if (SCF_iter<=2){
783 
784 	if (measure_time) dtime(&stime);
785 
786 	Eigen_lapack(S_DC,ko,NUM,NUM);
787 
788 	/***********************************************
789               Searching of negative eigenvalues
790 	************************************************/
791 
792 	P_min = 1;
793 	for (l=1; l<=NUM; l++){
794 
795 	  if (ko[l]<OLP_eigen_cut){
796 	    P_min = l + 1;
797 	    if (3<=level_stdout){
798 	      printf("<DC>  Negative EV of OLP %2d %15.12f\n",l,ko[l]);
799 	    }
800 	  }
801 	}
802 
803 	S12[Mc_AN][0][0] = P_min;
804 
805 	for (l=1; l<P_min; l++)     M1[l] = 0.0;
806 	for (l=P_min; l<=NUM; l++)  M1[l] = 1.0/sqrt(ko[l]);
807 
808 	for (i1=1; i1<=NUM; i1++){
809 	  for (j1=1; j1<=NUM; j1++){
810 	    S_DC[i1][j1]       = S_DC[i1][j1]*M1[j1];
811 	    S12[Mc_AN][i1][j1] = S_DC[i1][j1];
812 	  }
813 	}
814 
815 	if (measure_time){
816 	  dtime(&etime);
817 	  time1 += etime - stime;
818 	}
819 
820       }
821 
822       else{
823 
824 	P_min = (int)S12[Mc_AN][0][0];
825 
826 	for (i1=1; i1<=NUM; i1++){
827 	  for (j1=1; j1<=NUM; j1++){
828 	    S_DC[i1][j1] = S12[Mc_AN][i1][j1];
829 	  }
830 	}
831       }
832 
833       /***********************************************
834         transform Hamiltonian matrix
835       ************************************************/
836 
837       for (spin=0; spin<=SpinP_switch; spin++){
838 
839 	if (measure_time) dtime(&stime);
840 
841 	/* transpose S */
842 	for (i1=1; i1<=NUM; i1++){
843 	  for (j1=i1+1; j1<=NUM; j1++){
844 	    tmp1 = S_DC[i1][j1];
845 	    tmp2 = S_DC[j1][i1];
846 	    S_DC[i1][j1] = tmp2;
847 	    S_DC[j1][i1] = tmp1;
848 	  }
849 	}
850 
851 	/* H * U * M1 */
852 
853 	for (j1=1; j1<=NUM-3; j1=j1+4){
854 	  for (i1=1; i1<=NUM; i1++){
855 	    sum1 = 0.0;
856 	    sum2 = 0.0;
857 	    sum3 = 0.0;
858 	    sum4 = 0.0;
859 	    for (l=1; l<=NUM; l++){
860 	      sum1 += H_DC[spin][i1][l]*S_DC[j1][l];
861 	      sum2 += H_DC[spin][i1][l]*S_DC[j1+1][l];
862 	      sum3 += H_DC[spin][i1][l]*S_DC[j1+2][l];
863 	      sum4 += H_DC[spin][i1][l]*S_DC[j1+3][l];
864 	    }
865 	    C[j1][i1] = sum1;
866 	    C[j1+1][i1] = sum2;
867 	    C[j1+2][i1] = sum3;
868 	    C[j1+3][i1] = sum4;
869 	  }
870 	}
871 
872 	j1s =  NUM - NUM%4 + 1;
873 	for (j1=j1s; j1<=NUM; j1++){
874 	  for (i1=1; i1<=NUM; i1++){
875 	    sum = 0.0;
876 	    for (l=1; l<=NUM; l++){
877 	      sum += H_DC[spin][i1][l]*S_DC[j1][l];
878 	    }
879 	    C[j1][i1] = sum;
880 	  }
881 	}
882 
883 	/* M1 * U^+ H * U * M1 */
884 
885 	for (j1=1; j1<=NUM-3; j1=j1+4){
886 	  for (i1=1; i1<=NUM; i1++){
887 	    sum1 = 0.0;
888 	    sum2 = 0.0;
889 	    sum3 = 0.0;
890 	    sum4 = 0.0;
891 	    for (l=1; l<=NUM; l++){
892 	      sum1 += S_DC[i1][l]*C[j1  ][l];
893 	      sum2 += S_DC[i1][l]*C[j1+1][l];
894 	      sum3 += S_DC[i1][l]*C[j1+2][l];
895 	      sum4 += S_DC[i1][l]*C[j1+3][l];
896 	    }
897 	    H_DC[spin][j1  ][i1] = sum1;
898 	    H_DC[spin][j1+1][i1] = sum2;
899 	    H_DC[spin][j1+2][i1] = sum3;
900 	    H_DC[spin][j1+3][i1] = sum4;
901 	  }
902 	}
903 	j1s =  NUM - NUM%4 + 1;
904 	for (j1=j1s; j1<=NUM; j1++){
905 	  for (i1=1; i1<=NUM; i1++){
906 	    sum1 = 0.0;
907 	    for (l=1; l<=NUM; l++){
908 	      sum1 += S_DC[i1][l]*C[j1][l];
909 	    }
910 	    H_DC[spin][j1][i1] = sum1;
911 	  }
912 	}
913 
914 	/* H_DC to C (transposition) */
915 
916 	for (i1=P_min; i1<=NUM; i1++){
917 	  for (j1=P_min; j1<=NUM; j1++){
918 	    C[j1-(P_min-1)][i1-(P_min-1)] = H_DC[spin][i1][j1];
919 	  }
920 	}
921 
922 	/***********************************************
923          diagonalize the trasformed Hamiltonian matrix
924 	************************************************/
925 
926 	NUM1 = NUM - (P_min - 1);
927 	Eigen_lapack(C,ko,NUM1,NUM1);
928 
929 	/*
930         for (i=1; i<=NUM1; i++){
931           printf("DCQ1 i=%2d ko=%16.12f\n",i,ko[i]);
932         }
933         MPI_Finalize();
934         exit(0);
935 	*/
936 
937 	/* C to H (transposition) */
938 
939 	for (i1=1; i1<=NUM; i1++){
940 	  for (j1=1; j1<=NUM1; j1++){
941 	    H_DC[spin][j1][i1] = C[i1][j1];
942 	  }
943 	}
944 
945 	/***********************************************
946          transformation to the original eigen vectors.
947                         NOTE 244P
948 	***********************************************/
949 
950 	/* transpose */
951 
952 	for (i1=1; i1<=NUM; i1++){
953 	  for (j1=i1+1; j1<=NUM; j1++){
954 	    tmp1 = S_DC[i1][j1];
955 	    tmp2 = S_DC[j1][i1];
956 	    S_DC[i1][j1] = tmp2;
957 	    S_DC[j1][i1] = tmp1;
958 	  }
959 	}
960 
961 	for (j1=1; j1<=NUM1; j1++){
962 	  for (l=NUM; P_min<=l; l--){
963 	    H_DC[spin][j1][l] = H_DC[spin][j1][l-(P_min-1)];
964 	  }
965 	}
966 
967 	for (j1=1; j1<=NUM-3; j1=j1+4){
968 	  for (i1=1; i1<=NUM; i1++){
969 	    sum1 = 0.0;
970 	    sum2 = 0.0;
971 	    sum3 = 0.0;
972 	    sum4 = 0.0;
973 	    for (l=P_min; l<=NUM; l++){
974 	      sum1 += S_DC[i1][l]*H_DC[spin][j1  ][l];
975 	      sum2 += S_DC[i1][l]*H_DC[spin][j1+1][l];
976 	      sum3 += S_DC[i1][l]*H_DC[spin][j1+2][l];
977 	      sum4 += S_DC[i1][l]*H_DC[spin][j1+3][l];
978 	    }
979 	    C[i1][j1  ] = sum1;
980 	    C[i1][j1+1] = sum2;
981 	    C[i1][j1+2] = sum3;
982 	    C[i1][j1+3] = sum4;
983 	  }
984 	}
985 	j1s =  NUM - NUM%4 + 1;
986 	for (j1=j1s; j1<=NUM; j1++){
987 	  for (i1=1; i1<=NUM; i1++){
988 	    sum = 0.0;
989 	    for (l=P_min; l<=NUM; l++){
990 	      sum += S_DC[i1][l]*H_DC[spin][j1][l];
991 	    }
992 	    C[i1][j1] = sum;
993 	  }
994 	}
995 
996 	if (measure_time){
997 	  dtime(&etime);
998 	  time2 += etime - stime;
999 	}
1000 
1001 	/*
1002 	  for (i=1; i<=NUM1; i++){
1003 	  printf("ko %15.12f 1.0\n",ko[i]);
1004 	  }
1005 
1006 	  for (i=1; i<=NUM; i++){
1007 	  for (j=1; j<=NUM; j++){
1008 	  printf("%15.12f ",C[i][j]);
1009 	  }
1010 	  printf("\n");
1011 	  }
1012 	*/
1013 
1014 	/*
1015 	  MPI_Finalize();
1016 	  exit(0);
1017 	*/
1018 
1019 	/***********************************************
1020            store eigenvalues and residues of poles
1021 	***********************************************/
1022 
1023 	if (measure_time) dtime(&stime);
1024 
1025 	for (i1=1; i1<=NUM; i1++){
1026 	  EVal[spin][Mc_AN][i1-1] = 1000.0;
1027 	}
1028 	for (i1=1; i1<=NUM1; i1++){
1029 	  EVal[spin][Mc_AN][i1-1] = ko[i1];
1030 	}
1031 
1032 	wanA = WhatSpecies[Gc_AN];
1033 	tno1 = Spe_Total_CNO[wanA];
1034 
1035 	for (i=0; i<tno1; i++){
1036 	  for (h_AN=0; h_AN<=FNAN[Gc_AN]; h_AN++){
1037 	    Gh_AN = natn[Gc_AN][h_AN];
1038 	    wanB = WhatSpecies[Gh_AN];
1039 	    tno2 = Spe_Total_CNO[wanB];
1040 	    Bnum = MP[h_AN];
1041 	    for (j=0; j<tno2; j++){
1042 	      for (i1=1; i1<=NUM1; i1++){
1043 		Residues[spin][Mc_AN][h_AN][i][j][i1-1] = C[1+i][i1]*C[Bnum+j][i1];
1044 	      }
1045 	    }
1046 	  }
1047 	}
1048 
1049 	if (measure_time){
1050 	  dtime(&etime);
1051 	  time3 += etime - stime;
1052 	}
1053 
1054       } /* end of spin */
1055 
1056       /****************************************************
1057                         free arrays
1058       ****************************************************/
1059 
1060       for (i=0; i<n2; i++){
1061 	free(S_DC[i]);
1062       }
1063       free(S_DC);
1064 
1065       for (spin=0; spin<=SpinP_switch; spin++){
1066 	for (i=0; i<n2; i++){
1067 	  free(H_DC[spin][i]);
1068 	}
1069 	free(H_DC[spin]);
1070       }
1071       free(H_DC);
1072 
1073       free(ko);
1074       free(M1);
1075 
1076       for (i=0; i<n2; i++){
1077 	free(C[i]);
1078       }
1079       free(C);
1080 
1081       dtime(&Etime_atom);
1082       time_per_atom[Gc_AN] += Etime_atom - Stime_atom;
1083 
1084     } /* end of Mc_AN */
1085 
1086     /* freeing of arrays */
1087 
1088     free(MP);
1089 
1090   } /* #pragma omp parallel */
1091 
1092   if ( strcasecmp(mode,"scf")==0 ){
1093 
1094     /****************************************************
1095               calculate projected DOS
1096     ****************************************************/
1097 
1098     if (measure_time) dtime(&stime);
1099 
1100 #pragma omp parallel shared(FNAN,time_per_atom,Residues,OLP0,natn,PDOS_DC,Msize,Spe_Total_CNO,WhatSpecies,M2G,Matomnum,SpinP_switch) private(OMPID,Nthrds,Nprocs,Mc_AN,spin,Stime_atom,Etime_atom,Gc_AN,wanA,tno1,i1,i,h_AN,Gh_AN,wanB,tno2,j,tmp1)
1101     {
1102 
1103       /* get info. on OpenMP */
1104 
1105       OMPID = omp_get_thread_num();
1106       Nthrds = omp_get_num_threads();
1107       Nprocs = omp_get_num_procs();
1108 
1109       for (Mc_AN=1+OMPID; Mc_AN<=Matomnum; Mc_AN+=Nthrds){
1110 	for (spin=0; spin<=SpinP_switch; spin++){
1111 
1112 	  dtime(&Stime_atom);
1113 
1114 	  Gc_AN = M2G[Mc_AN];
1115 	  wanA = WhatSpecies[Gc_AN];
1116 	  tno1 = Spe_Total_CNO[wanA];
1117 
1118 	  for (i1=0; i1<Msize[Mc_AN]; i1++){
1119 	    PDOS_DC[spin][Mc_AN][i1] = 0.0;
1120 	  }
1121 
1122 	  for (i=0; i<tno1; i++){
1123 	    for (h_AN=0; h_AN<=FNAN[Gc_AN]; h_AN++){
1124 	      Gh_AN = natn[Gc_AN][h_AN];
1125 	      wanB = WhatSpecies[Gh_AN];
1126 	      tno2 = Spe_Total_CNO[wanB];
1127 	      for (j=0; j<tno2; j++){
1128 
1129 		tmp1 = OLP0[Mc_AN][h_AN][i][j];
1130 		for (i1=0; i1<Msize[Mc_AN]; i1++){
1131 		  PDOS_DC[spin][Mc_AN][i1] += Residues[spin][Mc_AN][h_AN][i][j][i1]*tmp1;
1132 		}
1133 	      }
1134 	    }
1135 	  }
1136 
1137 	  dtime(&Etime_atom);
1138 	  time_per_atom[Gc_AN] += Etime_atom - Stime_atom;
1139 
1140 	  /*
1141 	    for (i1=0; i1<Msize[Mc_AN]; i1++){
1142 	    printf("%4d  %18.15f\n",i1,PDOS_DC[spin][Mc_AN][i1]);
1143 	    }
1144 	  */
1145 
1146 	}
1147       }
1148 
1149     } /* #pragma omp parallel */
1150 
1151     if (measure_time){
1152       dtime(&etime);
1153       time4 += etime - stime;
1154     }
1155 
1156     /****************************************************
1157                    find chemical potential
1158     ****************************************************/
1159 
1160     if (measure_time) dtime(&stime);
1161 
1162     po = 0;
1163     loopN = 0;
1164 
1165     ChemP_MAX = 30.0;
1166     ChemP_MIN =-30.0;
1167     if      (SpinP_switch==0) spin_degeneracy = 2.0;
1168     else if (SpinP_switch==1) spin_degeneracy = 1.0;
1169 
1170     do {
1171       ChemP = 0.50*(ChemP_MAX + ChemP_MIN);
1172 
1173       My_Num_State = 0.0;
1174       for (spin=0; spin<=SpinP_switch; spin++){
1175 	for (Mc_AN=1; Mc_AN<=Matomnum; Mc_AN++){
1176 
1177 	  dtime(&Stime_atom);
1178 
1179 	  Gc_AN = M2G[Mc_AN];
1180 
1181 	  for (i=0; i<Msize[Mc_AN]; i++){
1182 	    x = (EVal[spin][Mc_AN][i] - ChemP)*Beta;
1183 	    if (x<=-max_x) x = -max_x;
1184 	    if (max_x<=x)  x = max_x;
1185 	    FermiF = 1.0/(1.0 + exp(x));
1186 	    My_Num_State += spin_degeneracy*FermiF*PDOS_DC[spin][Mc_AN][i];
1187 	  }
1188 
1189 	  dtime(&Etime_atom);
1190 	  time_per_atom[Gc_AN] += Etime_atom - Stime_atom;
1191 	}
1192       }
1193 
1194       /* MPI, My_Num_State */
1195 
1196       MPI_Barrier(mpi_comm_level1);
1197       MPI_Allreduce(&My_Num_State, &Num_State, 1, MPI_DOUBLE, MPI_SUM, mpi_comm_level1);
1198 
1199       Dnum = (TZ - Num_State) - system_charge;
1200       if (0.0<=Dnum) ChemP_MIN = ChemP;
1201       else           ChemP_MAX = ChemP;
1202       if (fabs(Dnum)<1.0e-13) po = 1;
1203 
1204 
1205       if (myid==Host_ID && 2<=level_stdout){
1206 	printf("ChemP=%15.12f TZ=%15.12f Num_state=%15.12f\n",ChemP,TZ,Num_State);
1207       }
1208 
1209       loopN++;
1210     }
1211     while (po==0 && loopN<1000);
1212 
1213     /****************************************************
1214         eigenenergy by summing up eigenvalues
1215     ****************************************************/
1216 
1217     My_Eele0[0] = 0.0;
1218     My_Eele0[1] = 0.0;
1219     for (spin=0; spin<=SpinP_switch; spin++){
1220       for (Mc_AN=1; Mc_AN<=Matomnum; Mc_AN++){
1221 
1222 	dtime(&Stime_atom);
1223 
1224 	Gc_AN = M2G[Mc_AN];
1225 	for (i=0; i<Msize[Mc_AN]; i++){
1226 	  x = (EVal[spin][Mc_AN][i] - ChemP)*Beta;
1227 	  if (x<=-max_x) x = -max_x;
1228 	  if (max_x<=x)  x = max_x;
1229 	  FermiF = 1.0/(1.0 + exp(x));
1230 	  My_Eele0[spin] += FermiF*EVal[spin][Mc_AN][i]*PDOS_DC[spin][Mc_AN][i];
1231 	}
1232 
1233 	dtime(&Etime_atom);
1234 	time_per_atom[Gc_AN] += Etime_atom - Stime_atom;
1235       }
1236     }
1237 
1238     /* MPI, My_Eele0 */
1239     for (spin=0; spin<=SpinP_switch; spin++){
1240       MPI_Barrier(mpi_comm_level1);
1241       MPI_Allreduce(&My_Eele0[spin], &Eele0[spin], 1, MPI_DOUBLE, MPI_SUM, mpi_comm_level1);
1242     }
1243 
1244     if (SpinP_switch==0){
1245       Eele0[1] = Eele0[0];
1246     }
1247 
1248     if (measure_time){
1249       dtime(&etime);
1250       time5 += etime - stime;
1251     }
1252 
1253     /****************************************************
1254        calculate density and energy density matrices
1255     ****************************************************/
1256 
1257     if (measure_time) dtime(&stime);
1258 
1259     for (spin=0; spin<=SpinP_switch; spin++){
1260       for (Mc_AN=1; Mc_AN<=Matomnum; Mc_AN++){
1261 	Gc_AN = M2G[Mc_AN];
1262 	wanA = WhatSpecies[Gc_AN];
1263 	tno1 = Spe_Total_CNO[wanA];
1264 	for (h_AN=0; h_AN<=FNAN[Gc_AN]; h_AN++){
1265 	  Gh_AN = natn[Gc_AN][h_AN];
1266 	  wanB = WhatSpecies[Gh_AN];
1267 	  tno2 = Spe_Total_CNO[wanB];
1268 	  for (i=0; i<tno1; i++){
1269 	    for (j=0; j<tno2; j++){
1270 	      CDM[spin][Mc_AN][h_AN][i][j] = 0.0;
1271 	      EDM[spin][Mc_AN][h_AN][i][j] = 0.0;
1272 	    }
1273 	  }
1274 	}
1275       }
1276     }
1277 
1278 #pragma omp parallel shared(FNAN,time_per_atom,EDM,CDM,Residues,natn,max_x,Beta,ChemP,EVal,Msize,Spe_Total_CNO,WhatSpecies,M2G,SpinP_switch,Matomnum) private(OMPID,Nthrds,Nprocs,Mc_AN,spin,Stime_atom,Gc_AN,wanA,tno1,i1,x,FermiF,h_AN,Gh_AN,wanB,tno2,i,j,tmp1,Etime_atom)
1279     {
1280 
1281       /* get info. on OpenMP */
1282 
1283       OMPID = omp_get_thread_num();
1284       Nthrds = omp_get_num_threads();
1285       Nprocs = omp_get_num_procs();
1286 
1287       for (Mc_AN=1+OMPID; Mc_AN<=Matomnum; Mc_AN+=Nthrds){
1288 	for (spin=0; spin<=SpinP_switch; spin++){
1289 
1290 	  dtime(&Stime_atom);
1291 
1292 	  Gc_AN = M2G[Mc_AN];
1293 	  wanA = WhatSpecies[Gc_AN];
1294 	  tno1 = Spe_Total_CNO[wanA];
1295 
1296 	  for (i1=0; i1<Msize[Mc_AN]; i1++){
1297 	    x = (EVal[spin][Mc_AN][i1] - ChemP)*Beta;
1298 	    if (x<=-max_x) x = -max_x;
1299 	    if (max_x<=x)  x = max_x;
1300 	    FermiF = 1.0/(1.0 + exp(x));
1301 
1302 	    for (h_AN=0; h_AN<=FNAN[Gc_AN]; h_AN++){
1303 	      Gh_AN = natn[Gc_AN][h_AN];
1304 	      wanB = WhatSpecies[Gh_AN];
1305 	      tno2 = Spe_Total_CNO[wanB];
1306 	      for (i=0; i<tno1; i++){
1307 		for (j=0; j<tno2; j++){
1308 		  tmp1 = FermiF*Residues[spin][Mc_AN][h_AN][i][j][i1];
1309 		  CDM[spin][Mc_AN][h_AN][i][j] += tmp1;
1310 		  EDM[spin][Mc_AN][h_AN][i][j] += tmp1*EVal[spin][Mc_AN][i1];
1311 		}
1312 	      }
1313 	    }
1314 	  }
1315 
1316 	  dtime(&Etime_atom);
1317 	  time_per_atom[Gc_AN] += Etime_atom - Stime_atom;
1318 	}
1319       }
1320 
1321     } /* #pragma omp parallel */
1322 
1323     /****************************************************
1324                      bond energies
1325     ****************************************************/
1326 
1327     My_Eele1[0] = 0.0;
1328     My_Eele1[1] = 0.0;
1329     for (MA_AN=1; MA_AN<=Matomnum; MA_AN++){
1330       GA_AN = M2G[MA_AN];
1331       wanA = WhatSpecies[GA_AN];
1332       tnoA = Spe_Total_CNO[wanA];
1333 
1334       for (j=0; j<=FNAN[GA_AN]; j++){
1335 	GB_AN = natn[GA_AN][j];
1336 	wanB = WhatSpecies[GB_AN];
1337 	tnoB = Spe_Total_CNO[wanB];
1338 
1339 	for (k=0; k<tnoA; k++){
1340 	  for (l=0; l<tnoB; l++){
1341 	    for (spin=0; spin<=SpinP_switch; spin++){
1342 	      My_Eele1[spin] += CDM[spin][MA_AN][j][k][l]*Hks[spin][MA_AN][j][k][l];
1343 	    }
1344 	  }
1345 	}
1346 
1347       }
1348     }
1349 
1350     /* MPI, My_Eele1 */
1351     MPI_Barrier(mpi_comm_level1);
1352     for (spin=0; spin<=SpinP_switch; spin++){
1353       MPI_Allreduce(&My_Eele1[spin], &Eele1[spin], 1, MPI_DOUBLE,
1354 		    MPI_SUM, mpi_comm_level1);
1355     }
1356 
1357     if (SpinP_switch==0){
1358       Eele1[1] = Eele1[0];
1359     }
1360 
1361     if (3<=level_stdout && myid==Host_ID){
1362       printf("Eele00=%15.12f Eele01=%15.12f\n",Eele0[0],Eele0[1]);
1363       printf("Eele10=%15.12f Eele11=%15.12f\n",Eele1[0],Eele1[1]);
1364     }
1365 
1366     if (measure_time){
1367       dtime(&etime);
1368       time6 += etime - stime;
1369     }
1370 
1371   } /* if ( strcasecmp(mode,"scf")==0 ) */
1372 
1373   else if ( strcasecmp(mode,"dos")==0 ){
1374     Save_DOS_Col(Residues,OLP0,EVal,Msize);
1375   }
1376 
1377   if (measure_time){
1378     printf("Divide_Conquer myid=%2d time1=%7.3f time2=%7.3f time3=%7.3f time4=%7.3f time5=%7.3f time6=%7.3f\n",
1379             myid,time1,time2,time3,time4,time5,time6);fflush(stdout);
1380   }
1381 
1382   /****************************************************
1383     freeing of arrays:
1384 
1385   ****************************************************/
1386 
1387   free(Snd_H_Size);
1388   free(Rcv_H_Size);
1389 
1390   free(Snd_S_Size);
1391   free(Rcv_S_Size);
1392 
1393   free(Msize);
1394 
1395   for (spin=0; spin<=SpinP_switch; spin++){
1396     for (Mc_AN=0; Mc_AN<=Matomnum; Mc_AN++){
1397       free(EVal[spin][Mc_AN]);
1398     }
1399     free(EVal[spin]);
1400   }
1401   free(EVal);
1402 
1403   for (spin=0; spin<=SpinP_switch; spin++){
1404     for (Mc_AN=0; Mc_AN<=Matomnum; Mc_AN++){
1405 
1406       if (Mc_AN==0){
1407         Gc_AN = 0;
1408         FNAN[0] = 1;
1409         tno1 = 1;
1410       }
1411       else{
1412         Gc_AN = M2G[Mc_AN];
1413         wanA = WhatSpecies[Gc_AN];
1414         tno1 = Spe_Total_CNO[wanA];
1415       }
1416 
1417       for (h_AN=0; h_AN<=FNAN[Gc_AN]; h_AN++){
1418         if (Mc_AN==0){
1419           tno2 = 1;
1420         }
1421         else {
1422           Gh_AN = natn[Gc_AN][h_AN];
1423           wanB = WhatSpecies[Gh_AN];
1424           tno2 = Spe_Total_CNO[wanB];
1425         }
1426 
1427         for (i=0; i<tno1; i++){
1428           for (j=0; j<tno2; j++){
1429             free(Residues[spin][Mc_AN][h_AN][i][j]);
1430 	  }
1431           free(Residues[spin][Mc_AN][h_AN][i]);
1432         }
1433         free(Residues[spin][Mc_AN][h_AN]);
1434       }
1435       free(Residues[spin][Mc_AN]);
1436     }
1437     free(Residues[spin]);
1438   }
1439   free(Residues);
1440 
1441   for (spin=0; spin<=SpinP_switch; spin++){
1442     for (Mc_AN=0; Mc_AN<=Matomnum; Mc_AN++){
1443       free(PDOS_DC[spin][Mc_AN]);
1444     }
1445     free(PDOS_DC[spin]);
1446   }
1447   free(PDOS_DC);
1448 
1449   /* for time */
1450   dtime(&TEtime);
1451   time0 = TEtime - TStime;
1452 
1453   /* for PrintMemory */
1454   firsttime=0;
1455 
1456   return time0;
1457 }
1458 
1459 
1460 
1461 
1462 
1463 
1464 
1465 
DC_NonCol(char * mode,int SCF_iter,double ***** Hks,double ***** ImNL,double **** OLP0,double ***** CDM,double ***** EDM,double Eele0[2],double Eele1[2])1466 static double DC_NonCol(char *mode,
1467 			int SCF_iter,
1468 			double *****Hks,
1469 			double *****ImNL,
1470 			double ****OLP0,
1471 			double *****CDM,
1472 			double *****EDM,
1473 			double Eele0[2],
1474 			double Eele1[2])
1475 {
1476   static int firsttime=1;
1477   int Mc_AN,Gc_AN,i,Gi,wan,wanA,wanB,Anum;
1478   int size1,size2,num,NUM,NUM1,n2,Cwan,Hwan;
1479   int ih,ig,ian,j,kl,jg,jan,Bnum,m,n,spin,so;
1480   int l,i1,j1,P_min,m_size;
1481   int po,loopN,tno1,tno2,h_AN,Gh_AN;
1482   int k,ii1,jj1,k1,l1,ompi;
1483   double My_TZ,TZ,sum,FermiF,time0;
1484   double My_Num_State,Num_State,x,Dnum;
1485   double sum_r,sum_i,tmp1,tmp2;
1486   double sum1_r,sum1_i,sum2_r,sum2_i;
1487   double TStime,TEtime;
1488   double My_Eele0[2],My_Eele1[2];
1489   double max_x=50.0;
1490   double ChemP_MAX,ChemP_MIN,spin_degeneracy;
1491   double **S_DC,*ko,*M1;
1492   dcomplex **C,**H_DC;
1493   double **EVal;
1494   dcomplex ******Residues;
1495   double **PDOS_DC;
1496   int *MP,*Msize;
1497   double *tmp_array;
1498   double *tmp_array2;
1499   double *omp_tmp;
1500   int *Snd_H_Size,*Rcv_H_Size;
1501   int *Snd_iHNL_Size,*Rcv_iHNL_Size;
1502   int *Snd_S_Size,*Rcv_S_Size;
1503   int numprocs,myid,ID,IDS,IDR,tag=999;
1504   double Stime_atom, Etime_atom;
1505   double OLP_eigen_cut=Threshold_OLP_Eigen;
1506 
1507   MPI_Status stat;
1508   MPI_Request request;
1509 
1510   /* for OpenMP */
1511   int OMPID,Nthrds,Nprocs,Nthrds0;
1512 
1513   /* MPI */
1514   MPI_Comm_size(mpi_comm_level1,&numprocs);
1515   MPI_Comm_rank(mpi_comm_level1,&myid);
1516 
1517   dtime(&TStime);
1518 
1519   /****************************************************
1520     allocation of arrays:
1521 
1522     int MP[List_YOUSO[2]];
1523     int Msize[Matomnum+1];
1524     double EVal[Matomnum+1][n2];
1525   ****************************************************/
1526 
1527   Snd_H_Size = (int*)malloc(sizeof(int)*numprocs);
1528   Rcv_H_Size = (int*)malloc(sizeof(int)*numprocs);
1529   Snd_iHNL_Size = (int*)malloc(sizeof(int)*numprocs);
1530   Rcv_iHNL_Size = (int*)malloc(sizeof(int)*numprocs);
1531   Snd_S_Size = (int*)malloc(sizeof(int)*numprocs);
1532   Rcv_S_Size = (int*)malloc(sizeof(int)*numprocs);
1533 
1534   m_size = 0;
1535   Msize = (int*)malloc(sizeof(int)*(Matomnum+1));
1536 
1537   EVal = (double**)malloc(sizeof(double*)*(Matomnum+1));
1538 
1539   for (Mc_AN=0; Mc_AN<=Matomnum; Mc_AN++){
1540 
1541     if (Mc_AN==0){
1542       Gc_AN = 0;
1543       FNAN[0] = 1;
1544       SNAN[0] = 0;
1545       n2 = 1;
1546       Msize[Mc_AN] = 1;
1547     }
1548     else{
1549       Gc_AN = M2G[Mc_AN];
1550       Anum = 1;
1551       for (i=0; i<=(FNAN[Gc_AN]+SNAN[Gc_AN]); i++){
1552 	Gi = natn[Gc_AN][i];
1553 	wanA = WhatSpecies[Gi];
1554 	Anum = Anum + Spe_Total_CNO[wanA];
1555       }
1556       NUM = Anum - 1;
1557       Msize[Mc_AN] = NUM;
1558       n2 = 2*NUM + 3;
1559     }
1560 
1561     m_size += n2;
1562 
1563     EVal[Mc_AN] = (double*)malloc(sizeof(double)*n2);
1564   }
1565 
1566   if (firsttime)
1567   PrintMemory("Divide_Conquer: EVal",sizeof(double)*m_size,NULL);
1568 
1569   if (2<=level_stdout){
1570     for (Mc_AN=1; Mc_AN<=Matomnum; Mc_AN++){
1571         printf("<DC> myid=%i Mc_AN=%2d Gc_AN=%2d Msize=%3d\n",
1572         myid,Mc_AN,M2G[Mc_AN],Msize[Mc_AN]);
1573     }
1574   }
1575 
1576   /****************************************************
1577     allocation of arrays:
1578 
1579     dcomplex Residues[3]
1580                      [Matomnum+1]
1581                      [FNAN[Gc_AN]+1]
1582                      [Spe_Total_CNO[Gc_AN]]
1583                      [Spe_Total_CNO[Gh_AN]]
1584                      [n2]
1585      To reduce the memory size, the size of NUM2
1586      should be found in the loop.
1587   ****************************************************/
1588 
1589   m_size = 0;
1590 
1591   Residues = (dcomplex******)malloc(sizeof(dcomplex*****)*3);
1592   for (spin=0; spin<3; spin++){
1593     Residues[spin] = (dcomplex*****)malloc(sizeof(dcomplex****)*(Matomnum+1));
1594     for (Mc_AN=0; Mc_AN<=Matomnum; Mc_AN++){
1595 
1596       if (Mc_AN==0){
1597 	Gc_AN = 0;
1598 	FNAN[0] = 1;
1599 	tno1 = 1;
1600 	n2 = 1;
1601       }
1602       else{
1603         Gc_AN = M2G[Mc_AN];
1604 	wanA = WhatSpecies[Gc_AN];
1605 	tno1 = Spe_Total_CNO[wanA];
1606 	n2 = 2*Msize[Mc_AN] + 2;
1607       }
1608 
1609       Residues[spin][Mc_AN] = (dcomplex****)malloc(sizeof(dcomplex***)*(FNAN[Gc_AN]+1));
1610 
1611       for (h_AN=0; h_AN<=FNAN[Gc_AN]; h_AN++){
1612 
1613 	if (Mc_AN==0){
1614 	  tno2 = 1;
1615 	}
1616 	else {
1617 	  Gh_AN = natn[Gc_AN][h_AN];
1618 	  wanB = WhatSpecies[Gh_AN];
1619 	  tno2 = Spe_Total_CNO[wanB];
1620 	}
1621 
1622 	Residues[spin][Mc_AN][h_AN] = (dcomplex***)malloc(sizeof(dcomplex**)*tno1);
1623 	for (i=0; i<tno1; i++){
1624 	  Residues[spin][Mc_AN][h_AN][i] = (dcomplex**)malloc(sizeof(dcomplex*)*tno2);
1625 	  for (j=0; j<tno2; j++){
1626 	    Residues[spin][Mc_AN][h_AN][i][j] = (dcomplex*)malloc(sizeof(dcomplex)*n2);
1627 	  }
1628 	}
1629 
1630 	m_size += tno1*tno2*n2;
1631       }
1632     }
1633   }
1634 
1635   if (firsttime)
1636   PrintMemory("Divide_Conquer: Residues",sizeof(dcomplex)*m_size,NULL);
1637 
1638   /****************************************************
1639     allocation of arrays:
1640 
1641     double PDOS_DC[Matomnum+1]
1642                   [n2]
1643   ****************************************************/
1644 
1645   m_size = 0;
1646 
1647   PDOS_DC = (double**)malloc(sizeof(double*)*(Matomnum+1));
1648   for (Mc_AN=0; Mc_AN<=Matomnum; Mc_AN++){
1649 
1650     if (Mc_AN==0) n2 = 1;
1651     else          n2 = 2*Msize[Mc_AN] + 2;
1652 
1653     m_size += n2;
1654     PDOS_DC[Mc_AN] = (double*)malloc(sizeof(double)*n2);
1655   }
1656 
1657   if (firsttime)
1658   PrintMemory("Divide_Conquer: PDOS_DC",sizeof(double)*m_size,NULL);
1659 
1660   /* allocation of a temporal array for OpenMP */
1661 
1662 #pragma omp parallel shared(Nthrds0)
1663   {
1664     Nthrds0 = omp_get_num_threads();
1665   }
1666 
1667   omp_tmp = (double*)malloc(sizeof(double)*Nthrds0);
1668 
1669   /****************************************************
1670    MPI
1671 
1672    Hks
1673   ****************************************************/
1674 
1675   /***********************************
1676             set data size
1677   ************************************/
1678 
1679   for (ID=0; ID<numprocs; ID++){
1680 
1681     IDS = (myid + ID) % numprocs;
1682     IDR = (myid - ID + numprocs) % numprocs;
1683 
1684     if (ID!=0){
1685       tag = 999;
1686 
1687       /* find data size to send block data */
1688       if ((F_Snd_Num[IDS]+S_Snd_Num[IDS])!=0){
1689 
1690         size1 = 0;
1691         for (spin=0; spin<=SpinP_switch; spin++){
1692           for (n=0; n<(F_Snd_Num[IDS]+S_Snd_Num[IDS]); n++){
1693             Mc_AN = Snd_MAN[IDS][n];
1694             Gc_AN = Snd_GAN[IDS][n];
1695             Cwan = WhatSpecies[Gc_AN];
1696             tno1 = Spe_Total_CNO[Cwan];
1697             for (h_AN=0; h_AN<=FNAN[Gc_AN]; h_AN++){
1698               Gh_AN = natn[Gc_AN][h_AN];
1699               Hwan = WhatSpecies[Gh_AN];
1700               tno2 = Spe_Total_CNO[Hwan];
1701               for (i=0; i<tno1; i++){
1702                 for (j=0; j<tno2; j++){
1703                   size1++;
1704                 }
1705               }
1706 	    }
1707           }
1708 	}
1709 
1710         Snd_H_Size[IDS] = size1;
1711         MPI_Isend(&size1, 1, MPI_INT, IDS, tag, mpi_comm_level1, &request);
1712       }
1713       else{
1714         Snd_H_Size[IDS] = 0;
1715       }
1716 
1717       /* receiving of size of data */
1718 
1719       if ((F_Rcv_Num[IDR]+S_Rcv_Num[IDR])!=0){
1720         MPI_Recv(&size2, 1, MPI_INT, IDR, tag, mpi_comm_level1, &stat);
1721         Rcv_H_Size[IDR] = size2;
1722       }
1723       else{
1724         Rcv_H_Size[IDR] = 0;
1725       }
1726 
1727       if ((F_Snd_Num[IDS]+S_Snd_Num[IDS])!=0) MPI_Wait(&request,&stat);
1728 
1729     }
1730     else{
1731       Snd_H_Size[IDS] = 0;
1732       Rcv_H_Size[IDR] = 0;
1733     }
1734   }
1735 
1736   /***********************************
1737              data transfer
1738   ************************************/
1739 
1740   tag = 999;
1741   for (ID=0; ID<numprocs; ID++){
1742 
1743     IDS = (myid + ID) % numprocs;
1744     IDR = (myid - ID + numprocs) % numprocs;
1745 
1746     if (ID!=0){
1747 
1748       /*****************************
1749               sending of data
1750       *****************************/
1751 
1752       if ((F_Snd_Num[IDS]+S_Snd_Num[IDS])!=0){
1753 
1754         size1 = Snd_H_Size[IDS];
1755 
1756         /* allocation of array */
1757 
1758         tmp_array = (double*)malloc(sizeof(double)*size1);
1759 
1760         /* multidimentional array to vector array */
1761 
1762         num = 0;
1763         for (spin=0; spin<=SpinP_switch; spin++){
1764           for (n=0; n<(F_Snd_Num[IDS]+S_Snd_Num[IDS]); n++){
1765             Mc_AN = Snd_MAN[IDS][n];
1766             Gc_AN = Snd_GAN[IDS][n];
1767             Cwan = WhatSpecies[Gc_AN];
1768             tno1 = Spe_Total_CNO[Cwan];
1769             for (h_AN=0; h_AN<=FNAN[Gc_AN]; h_AN++){
1770               Gh_AN = natn[Gc_AN][h_AN];
1771               Hwan = WhatSpecies[Gh_AN];
1772               tno2 = Spe_Total_CNO[Hwan];
1773               for (i=0; i<tno1; i++){
1774                 for (j=0; j<tno2; j++){
1775                   tmp_array[num] = Hks[spin][Mc_AN][h_AN][i][j];
1776                   num++;
1777                 }
1778               }
1779 	    }
1780           }
1781 	}
1782 
1783         MPI_Isend(&tmp_array[0], size1, MPI_DOUBLE, IDS, tag, mpi_comm_level1, &request);
1784 
1785       }
1786 
1787       /*****************************
1788          receiving of block data
1789       *****************************/
1790 
1791       if ((F_Rcv_Num[IDR]+S_Rcv_Num[IDR])!=0){
1792 
1793         size2 = Rcv_H_Size[IDR];
1794 
1795         /* allocation of array */
1796         tmp_array2 = (double*)malloc(sizeof(double)*size2);
1797 
1798         MPI_Recv(&tmp_array2[0], size2, MPI_DOUBLE, IDR, tag, mpi_comm_level1, &stat);
1799 
1800         num = 0;
1801         for (spin=0; spin<=SpinP_switch; spin++){
1802           Mc_AN = S_TopMAN[IDR] - 1;  /* S_TopMAN should be used. */
1803           for (n=0; n<(F_Rcv_Num[IDR]+S_Rcv_Num[IDR]); n++){
1804             Mc_AN++;
1805             Gc_AN = Rcv_GAN[IDR][n];
1806             Cwan = WhatSpecies[Gc_AN];
1807             tno1 = Spe_Total_CNO[Cwan];
1808 
1809             for (h_AN=0; h_AN<=FNAN[Gc_AN]; h_AN++){
1810               Gh_AN = natn[Gc_AN][h_AN];
1811               Hwan = WhatSpecies[Gh_AN];
1812               tno2 = Spe_Total_CNO[Hwan];
1813               for (i=0; i<tno1; i++){
1814                 for (j=0; j<tno2; j++){
1815                   Hks[spin][Mc_AN][h_AN][i][j] = tmp_array2[num];
1816                   num++;
1817 		}
1818 	      }
1819 	    }
1820 	  }
1821 	}
1822 
1823         /* freeing of array */
1824         free(tmp_array2);
1825       }
1826 
1827       if ((F_Snd_Num[IDS]+S_Snd_Num[IDS])!=0){
1828         MPI_Wait(&request,&stat);
1829         free(tmp_array); /* freeing of array */
1830       }
1831     }
1832   }
1833 
1834   /****************************************************
1835    MPI
1836 
1837    ImNL
1838   ****************************************************/
1839 
1840   /***********************************
1841              set data size
1842   ************************************/
1843 
1844   /* spin-orbit coupling or LDA+U */
1845 
1846   if (SO_switch==1 || Hub_U_switch==1 || 1<=Constraint_NCS_switch
1847       || Zeeman_NCS_switch==1 || Zeeman_NCO_switch==1){
1848 
1849     for (ID=0; ID<numprocs; ID++){
1850 
1851       IDS = (myid + ID) % numprocs;
1852       IDR = (myid - ID + numprocs) % numprocs;
1853 
1854       if (ID!=0){
1855 	tag = 999;
1856 
1857 	/* find data size to send block data */
1858 	if ((F_Snd_Num[IDS]+S_Snd_Num[IDS])!=0){
1859 
1860 	  size1 = 0;
1861 	  for (so=0; so<List_YOUSO[5]; so++){
1862 	    for (n=0; n<(F_Snd_Num[IDS]+S_Snd_Num[IDS]); n++){
1863 	      Mc_AN = Snd_MAN[IDS][n];
1864 	      Gc_AN = Snd_GAN[IDS][n];
1865 	      Cwan = WhatSpecies[Gc_AN];
1866 	      tno1 = Spe_Total_CNO[Cwan];
1867 	      for (h_AN=0; h_AN<=FNAN[Gc_AN]; h_AN++){
1868 		Gh_AN = natn[Gc_AN][h_AN];
1869 		Hwan = WhatSpecies[Gh_AN];
1870 		tno2 = Spe_Total_CNO[Hwan];
1871 		for (i=0; i<tno1; i++){
1872 		  for (j=0; j<tno2; j++){
1873 		    size1++;
1874 		  }
1875 		}
1876 	      }
1877 	    }
1878 	  }
1879 
1880 	  Snd_iHNL_Size[IDS] = size1;
1881 	  MPI_Isend(&size1, 1, MPI_INT, IDS, tag, mpi_comm_level1, &request);
1882 	}
1883 	else{
1884 	  Snd_iHNL_Size[IDS] = 0;
1885 	}
1886 
1887 	/* receiving of size of data */
1888 
1889 	if ((F_Rcv_Num[IDR]+S_Rcv_Num[IDR])!=0){
1890 	  MPI_Recv(&size2, 1, MPI_INT, IDR, tag, mpi_comm_level1, &stat);
1891 	  Rcv_iHNL_Size[IDR] = size2;
1892 	}
1893 	else{
1894 	  Rcv_iHNL_Size[IDR] = 0;
1895 	}
1896 
1897 	if ((F_Snd_Num[IDS]+S_Snd_Num[IDS])!=0) MPI_Wait(&request,&stat);
1898 
1899       }
1900       else{
1901 	Snd_iHNL_Size[IDS] = 0;
1902 	Rcv_iHNL_Size[IDR] = 0;
1903       }
1904     }
1905 
1906     /***********************************
1907              data transfer
1908   ************************************/
1909 
1910     tag = 999;
1911     for (ID=0; ID<numprocs; ID++){
1912 
1913       IDS = (myid + ID) % numprocs;
1914       IDR = (myid - ID + numprocs) % numprocs;
1915 
1916       if (ID!=0){
1917 
1918 	/*****************************
1919 				      sending of data
1920 	*****************************/
1921 
1922 	if ((F_Snd_Num[IDS]+S_Snd_Num[IDS])!=0){
1923 
1924 	  size1 = Snd_iHNL_Size[IDS];
1925 
1926 	  /* allocation of array */
1927 
1928 	  tmp_array = (double*)malloc(sizeof(double)*size1);
1929 
1930 	  /* multidimentional array to vector array */
1931 
1932 	  num = 0;
1933 	  for (so=0; so<List_YOUSO[5]; so++){
1934 	    for (n=0; n<(F_Snd_Num[IDS]+S_Snd_Num[IDS]); n++){
1935 	      Mc_AN = Snd_MAN[IDS][n];
1936 	      Gc_AN = Snd_GAN[IDS][n];
1937 	      Cwan = WhatSpecies[Gc_AN];
1938 	      tno1 = Spe_Total_CNO[Cwan];
1939 	      for (h_AN=0; h_AN<=FNAN[Gc_AN]; h_AN++){
1940 		Gh_AN = natn[Gc_AN][h_AN];
1941 		Hwan = WhatSpecies[Gh_AN];
1942 		tno2 = Spe_Total_CNO[Hwan];
1943 		for (i=0; i<tno1; i++){
1944 		  for (j=0; j<tno2; j++){
1945 		    tmp_array[num] = ImNL[so][Mc_AN][h_AN][i][j];
1946 		    num++;
1947 		  }
1948 		}
1949 	      }
1950 	    }
1951 	  }
1952 
1953 	  MPI_Isend(&tmp_array[0], size1, MPI_DOUBLE, IDS, tag, mpi_comm_level1, &request);
1954 
1955 	}
1956 
1957 	/*****************************
1958 	   receiving of block data
1959 	*****************************/
1960 
1961 	if ((F_Rcv_Num[IDR]+S_Rcv_Num[IDR])!=0){
1962 
1963 	  size2 = Rcv_iHNL_Size[IDR];
1964 
1965 	  /* allocation of array */
1966 	  tmp_array2 = (double*)malloc(sizeof(double)*size2);
1967 
1968 	  MPI_Recv(&tmp_array2[0], size2, MPI_DOUBLE, IDR, tag, mpi_comm_level1, &stat);
1969 
1970 	  num = 0;
1971 	  for (so=0; so<List_YOUSO[5]; so++){
1972 	    Mc_AN = S_TopMAN[IDR] - 1;  /* S_TopMAN should be used. */
1973 	    for (n=0; n<(F_Rcv_Num[IDR]+S_Rcv_Num[IDR]); n++){
1974 	      Mc_AN++;
1975 	      Gc_AN = Rcv_GAN[IDR][n];
1976 	      Cwan = WhatSpecies[Gc_AN];
1977 	      tno1 = Spe_Total_CNO[Cwan];
1978 
1979 	      for (h_AN=0; h_AN<=FNAN[Gc_AN]; h_AN++){
1980 		Gh_AN = natn[Gc_AN][h_AN];
1981 		Hwan = WhatSpecies[Gh_AN];
1982 		tno2 = Spe_Total_CNO[Hwan];
1983 		for (i=0; i<tno1; i++){
1984 		  for (j=0; j<tno2; j++){
1985 		    ImNL[so][Mc_AN][h_AN][i][j] = tmp_array2[num];
1986 		    num++;
1987 		  }
1988 		}
1989 	      }
1990 	    }
1991 	  }
1992 
1993 	  /* freeing of array */
1994 	  free(tmp_array2);
1995 	}
1996 
1997 	if ((F_Snd_Num[IDS]+S_Snd_Num[IDS])!=0){
1998 	  MPI_Wait(&request,&stat);
1999 	  free(tmp_array); /* freeing of array */
2000 	}
2001       }
2002     }
2003   }
2004 
2005   /****************************************************
2006    MPI
2007 
2008    OLP0
2009   ****************************************************/
2010 
2011   /***********************************
2012              set data size
2013   ************************************/
2014 
2015   if (SCF_iter<=2){
2016 
2017     for (ID=0; ID<numprocs; ID++){
2018 
2019       IDS = (myid + ID) % numprocs;
2020       IDR = (myid - ID + numprocs) % numprocs;
2021 
2022       if (ID!=0){
2023         tag = 999;
2024 
2025         /* find data size to send block data */
2026         if ((F_Snd_Num[IDS]+S_Snd_Num[IDS])!=0){
2027           size1 = 0;
2028           for (n=0; n<(F_Snd_Num[IDS]+S_Snd_Num[IDS]); n++){
2029             Mc_AN = Snd_MAN[IDS][n];
2030             Gc_AN = Snd_GAN[IDS][n];
2031             Cwan = WhatSpecies[Gc_AN];
2032             tno1 = Spe_Total_CNO[Cwan];
2033             for (h_AN=0; h_AN<=FNAN[Gc_AN]; h_AN++){
2034               Gh_AN = natn[Gc_AN][h_AN];
2035               Hwan = WhatSpecies[Gh_AN];
2036               tno2 = Spe_Total_CNO[Hwan];
2037               for (i=0; i<tno1; i++){
2038                 for (j=0; j<tno2; j++){
2039                   size1++;
2040                 }
2041               }
2042 	    }
2043           }
2044 
2045           Snd_S_Size[IDS] = size1;
2046           MPI_Isend(&size1, 1, MPI_INT, IDS, tag, mpi_comm_level1, &request);
2047         }
2048         else{
2049           Snd_S_Size[IDS] = 0;
2050         }
2051 
2052         /* receiving of size of data */
2053 
2054         if ((F_Rcv_Num[IDR]+S_Rcv_Num[IDR])!=0){
2055           MPI_Recv(&size2, 1, MPI_INT, IDR, tag, mpi_comm_level1, &stat);
2056           Rcv_S_Size[IDR] = size2;
2057         }
2058         else{
2059           Rcv_S_Size[IDR] = 0;
2060         }
2061 
2062         if ((F_Snd_Num[IDS]+S_Snd_Num[IDS])!=0) MPI_Wait(&request,&stat);
2063       }
2064       else{
2065         Snd_S_Size[IDS] = 0;
2066         Rcv_S_Size[IDR] = 0;
2067       }
2068     }
2069 
2070     /***********************************
2071                data transfer
2072     ************************************/
2073 
2074     tag = 999;
2075     for (ID=0; ID<numprocs; ID++){
2076 
2077       IDS = (myid + ID) % numprocs;
2078       IDR = (myid - ID + numprocs) % numprocs;
2079 
2080       if (ID!=0){
2081 
2082         /*****************************
2083                 sending of data
2084         *****************************/
2085 
2086         if ((F_Snd_Num[IDS]+S_Snd_Num[IDS])!=0){
2087 
2088           size1 = Snd_S_Size[IDS];
2089 
2090           /* allocation of array */
2091 
2092           tmp_array = (double*)malloc(sizeof(double)*size1);
2093 
2094           /* multidimentional array to vector array */
2095 
2096           num = 0;
2097 
2098           for (n=0; n<(F_Snd_Num[IDS]+S_Snd_Num[IDS]); n++){
2099             Mc_AN = Snd_MAN[IDS][n];
2100             Gc_AN = Snd_GAN[IDS][n];
2101             Cwan = WhatSpecies[Gc_AN];
2102             tno1 = Spe_Total_CNO[Cwan];
2103             for (h_AN=0; h_AN<=FNAN[Gc_AN]; h_AN++){
2104               Gh_AN = natn[Gc_AN][h_AN];
2105               Hwan = WhatSpecies[Gh_AN];
2106               tno2 = Spe_Total_CNO[Hwan];
2107               for (i=0; i<tno1; i++){
2108                 for (j=0; j<tno2; j++){
2109                   tmp_array[num] = OLP0[Mc_AN][h_AN][i][j];
2110                   num++;
2111                 }
2112               }
2113 	    }
2114           }
2115 
2116           MPI_Isend(&tmp_array[0], size1, MPI_DOUBLE, IDS, tag, mpi_comm_level1, &request);
2117         }
2118 
2119         /*****************************
2120            receiving of block data
2121         *****************************/
2122 
2123         if ((F_Rcv_Num[IDR]+S_Rcv_Num[IDR])!=0){
2124 
2125           size2 = Rcv_S_Size[IDR];
2126 
2127          /* allocation of array */
2128           tmp_array2 = (double*)malloc(sizeof(double)*size2);
2129 
2130           MPI_Recv(&tmp_array2[0], size2, MPI_DOUBLE, IDR, tag, mpi_comm_level1, &stat);
2131 
2132           num = 0;
2133           Mc_AN = S_TopMAN[IDR] - 1; /* S_TopMAN should be used. */
2134           for (n=0; n<(F_Rcv_Num[IDR]+S_Rcv_Num[IDR]); n++){
2135             Mc_AN++;
2136             Gc_AN = Rcv_GAN[IDR][n];
2137             Cwan = WhatSpecies[Gc_AN];
2138             tno1 = Spe_Total_CNO[Cwan];
2139 
2140             for (h_AN=0; h_AN<=FNAN[Gc_AN]; h_AN++){
2141               Gh_AN = natn[Gc_AN][h_AN];
2142               Hwan = WhatSpecies[Gh_AN];
2143               tno2 = Spe_Total_CNO[Hwan];
2144               for (i=0; i<tno1; i++){
2145                 for (j=0; j<tno2; j++){
2146                   OLP0[Mc_AN][h_AN][i][j] = tmp_array2[num];
2147                   num++;
2148    	        }
2149 	      }
2150 	    }
2151 	  }
2152 
2153           /* freeing of array */
2154           free(tmp_array2);
2155         }
2156 
2157         if ((F_Snd_Num[IDS]+S_Snd_Num[IDS])!=0){
2158           MPI_Wait(&request,&stat);
2159           free(tmp_array); /* freeing of array */
2160 	}
2161       }
2162     }
2163   }
2164 
2165   /****************************************************
2166             find the total number of electrons
2167   ****************************************************/
2168 
2169   My_TZ = 0.0;
2170   for (i=1; i<=Matomnum; i++){
2171     Gc_AN = M2G[i];
2172     wan = WhatSpecies[Gc_AN];
2173     My_TZ = My_TZ + Spe_Core_Charge[wan];
2174   }
2175 
2176   /* MPI, My_TZ */
2177 
2178   MPI_Barrier(mpi_comm_level1);
2179   MPI_Allreduce(&My_TZ, &TZ, 1, MPI_DOUBLE, MPI_SUM, mpi_comm_level1);
2180 
2181   /****************************************************
2182       Setting of Hamiltonian and overlap matrices
2183 
2184          MP indicates the starting position of
2185               atom i in arraies H and S
2186   ****************************************************/
2187 
2188 #pragma omp parallel shared(List_YOUSO,time_per_atom,Residues,EVal,S12,OLP_eigen_cut,ImNL,Hks,Zeeman_NCO_switch,Zeeman_NCS_switch,Constraint_NCS_switch,Hub_U_switch,SO_switch,OLP0,SCF_iter,RMI1,S_G2M,Spe_Total_CNO,natn,SNAN,FNAN,WhatSpecies,M2G,Matomnum) private(OMPID,Nthrds,Nprocs,Mc_AN,Etime_atom,Stime_atom,Gc_AN,wan,Anum,i,MP,Gi,wanA,NUM,n2,S_DC,H_DC,ko,M1,C,ig,ian,ih,j,kl,jg,jan,Bnum,m,n,P_min,l,i1,j1,tmp1,tmp2,k,jj1,k1,sum_r,sum_i,l1,sum1_r,sum1_i,sum2_r,sum2_i,ii1,NUM1,tno1,h_AN,Gh_AN,wanB,tno2)
2189   {
2190 
2191     /* get info. on OpenMP */
2192 
2193     OMPID = omp_get_thread_num();
2194     Nthrds = omp_get_num_threads();
2195     Nprocs = omp_get_num_procs();
2196 
2197     /* allocation of arrays */
2198 
2199     MP = (int*)malloc(sizeof(int)*List_YOUSO[2]);
2200 
2201     /* start of the Mc_AN loop which is parallelized by OpenMP */
2202 
2203     for (Mc_AN=1+OMPID; Mc_AN<=Matomnum; Mc_AN+=Nthrds){
2204 
2205       dtime(&Stime_atom);
2206 
2207       Gc_AN = M2G[Mc_AN];
2208       wan = WhatSpecies[Gc_AN];
2209 
2210       /***********************************************
2211       find the size of matrix for the atom Mc_AN
2212                 and set the MP vector
2213 
2214      Note:
2215          MP indicates the starting position of
2216               atom i in arraies H and S
2217       ***********************************************/
2218 
2219       Anum = 1;
2220       for (i=0; i<=(FNAN[Gc_AN]+SNAN[Gc_AN]); i++){
2221 	MP[i] = Anum;
2222 	Gi = natn[Gc_AN][i];
2223 	wanA = WhatSpecies[Gi];
2224 	Anum = Anum + Spe_Total_CNO[wanA];
2225       }
2226       NUM = Anum - 1;
2227 
2228       n2 = 2*NUM + 3;
2229 
2230       /***********************************************
2231        allocation of arrays:
2232 
2233        double   S_DC[NUM+2][NUM+2];
2234        dcomplex H_DC[n2][n2];
2235        double   ko[n2];
2236        double   M1[n2];
2237        dcomplex C[n2][n2];
2238       ***********************************************/
2239 
2240       S_DC = (double**)malloc(sizeof(double*)*(NUM+2));
2241       for (i=0; i<(NUM+2); i++){
2242 	S_DC[i] = (double*)malloc(sizeof(double)*(NUM+2));
2243       }
2244 
2245       H_DC = (dcomplex**)malloc(sizeof(dcomplex*)*n2);
2246       for (i=0; i<n2; i++){
2247 	H_DC[i] = (dcomplex*)malloc(sizeof(dcomplex)*n2);
2248       }
2249 
2250       ko = (double*)malloc(sizeof(double)*n2);
2251       M1 = (double*)malloc(sizeof(double)*n2);
2252 
2253       C = (dcomplex**)malloc(sizeof(dcomplex*)*n2);
2254       for (i=0; i<n2; i++){
2255 	C[i] = (dcomplex*)malloc(sizeof(dcomplex)*n2);
2256       }
2257 
2258       /***********************************************
2259        construct cluster full matrices of Hamiltonian
2260                and overlap for the atom Mc_AN
2261       ***********************************************/
2262 
2263       for (i=0; i<=(FNAN[Gc_AN]+SNAN[Gc_AN]); i++){
2264 	ig = natn[Gc_AN][i];
2265 	ian = Spe_Total_CNO[WhatSpecies[ig]];
2266 	Anum = MP[i];
2267 	ih = S_G2M[ig];
2268 
2269 	for (j=0; j<=(FNAN[Gc_AN]+SNAN[Gc_AN]); j++){
2270 
2271 	  kl = RMI1[Mc_AN][i][j];
2272 	  jg = natn[Gc_AN][j];
2273 	  jan = Spe_Total_CNO[WhatSpecies[jg]];
2274 	  Bnum = MP[j];
2275 
2276 	  if (0<=kl){
2277 
2278 	    if (SCF_iter<=2){
2279 	      for (m=0; m<ian; m++){
2280 		for (n=0; n<jan; n++){
2281 		  S_DC[Anum+m][Bnum+n] = OLP0[ih][kl][m][n];
2282 		}
2283 	      }
2284 	    }
2285 
2286 	    /* non-spin-orbit coupling and non-LDA+U */
2287 	    if (SO_switch==0 && Hub_U_switch==0 && Constraint_NCS_switch==0
2288 		&& Zeeman_NCS_switch==0 && Zeeman_NCO_switch==0){
2289 	      for (m=0; m<ian; m++){
2290 		for (n=0; n<jan; n++){
2291 		  H_DC[Anum+m    ][Bnum+n    ].r =  Hks[0][ih][kl][m][n];
2292 		  H_DC[Anum+m    ][Bnum+n    ].i =  0.0;
2293 		  H_DC[Anum+m+NUM][Bnum+n+NUM].r =  Hks[1][ih][kl][m][n];
2294 		  H_DC[Anum+m+NUM][Bnum+n+NUM].i =  0.0;
2295 		  H_DC[Anum+m    ][Bnum+n+NUM].r =  Hks[2][ih][kl][m][n];
2296 		  H_DC[Anum+m    ][Bnum+n+NUM].i =  Hks[3][ih][kl][m][n];
2297 		  H_DC[Bnum+n+NUM][Anum+m    ].r =  H_DC[Anum+m    ][Bnum+n+NUM].r;
2298 		  H_DC[Bnum+n+NUM][Anum+m    ].i = -H_DC[Anum+m    ][Bnum+n+NUM].i;
2299 		}
2300 	      }
2301 	    }
2302 
2303 	    /* spin-orbit coupling or LDA+U */
2304 	    else {
2305 	      for (m=0; m<ian; m++){
2306 		for (n=0; n<jan; n++){
2307 		  H_DC[Anum+m    ][Bnum+n    ].r =  Hks[0][ih][kl][m][n];
2308 		  H_DC[Anum+m    ][Bnum+n    ].i =  ImNL[0][ih][kl][m][n];
2309 		  H_DC[Anum+m+NUM][Bnum+n+NUM].r =  Hks[1][ih][kl][m][n];
2310 		  H_DC[Anum+m+NUM][Bnum+n+NUM].i =  ImNL[1][ih][kl][m][n];
2311 		  H_DC[Anum+m    ][Bnum+n+NUM].r =  Hks[2][ih][kl][m][n];
2312 		  H_DC[Anum+m    ][Bnum+n+NUM].i =  Hks[3][ih][kl][m][n] + ImNL[2][ih][kl][m][n];
2313 		  H_DC[Bnum+n+NUM][Anum+m    ].r =  H_DC[Anum+m    ][Bnum+n+NUM].r;
2314 		  H_DC[Bnum+n+NUM][Anum+m    ].i = -H_DC[Anum+m    ][Bnum+n+NUM].i;
2315 		}
2316 	      }
2317 	    }
2318 
2319 	  }
2320 
2321 	  else{
2322 
2323 	    if (SCF_iter<=2){
2324 	      for (m=0; m<ian; m++){
2325 		for (n=0; n<jan; n++){
2326 		  S_DC[Anum+m][Bnum+n] = 0.0;
2327 		}
2328 	      }
2329 	    }
2330 
2331 	    for (m=0; m<ian; m++){
2332 	      for (n=0; n<jan; n++){
2333 		H_DC[Anum+m    ][Bnum+n    ].r = 0.0;
2334 		H_DC[Anum+m    ][Bnum+n    ].i = 0.0;
2335 		H_DC[Anum+m+NUM][Bnum+n+NUM].r = 0.0;
2336 		H_DC[Anum+m+NUM][Bnum+n+NUM].i = 0.0;
2337 		H_DC[Anum+m    ][Bnum+n+NUM].r = 0.0;
2338 		H_DC[Anum+m    ][Bnum+n+NUM].i = 0.0;
2339 		H_DC[Bnum+n+NUM][Anum+m    ].r = 0.0;
2340 		H_DC[Bnum+n+NUM][Anum+m    ].i = 0.0;
2341 	      }
2342 	    }
2343 
2344 	  }
2345 	}
2346       }
2347 
2348       /****************************************************
2349        Solve the generalized eigenvalue problem
2350        HC = SCE
2351 
2352        1) diagonalize S
2353        2) search negative eigenvalues of S
2354       ****************************************************/
2355 
2356       if (SCF_iter<=2){
2357 
2358 	Eigen_lapack(S_DC, ko, NUM, NUM);
2359 
2360 	/***********************************************
2361              Searching of negative eigenvalues
2362 	************************************************/
2363 
2364 	P_min = 1;
2365 	for (l=1; l<=NUM; l++){
2366 	  if (ko[l]<OLP_eigen_cut){
2367 	    P_min = l + 1;
2368 	    if (3<=level_stdout){
2369 	      printf("<DC>  Negative EV of OLP %2d %15.12f\n",l,ko[l]);
2370 	    }
2371 	  }
2372 	}
2373 
2374 	S12[Mc_AN][0][0] = P_min;
2375 
2376 	for (l=1; l<P_min; l++)     M1[l] = 0.0;
2377 	for (l=P_min; l<=NUM; l++)  M1[l] = 1.0/sqrt(ko[l]);
2378 
2379 	for (i1=1; i1<=NUM; i1++){
2380 	  for (j1=1; j1<=NUM; j1++){
2381 	    S_DC[i1][j1]       = S_DC[i1][j1]*M1[j1];
2382 	    S12[Mc_AN][i1][j1] = S_DC[i1][j1];
2383 	  }
2384 	}
2385       }
2386 
2387       else{
2388 
2389 	P_min = (int)S12[Mc_AN][0][0];
2390 
2391 	for (i1=1; i1<=NUM; i1++){
2392 	  for (j1=1; j1<=NUM; j1++){
2393 	    S_DC[i1][j1] = S12[Mc_AN][i1][j1];
2394 	  }
2395 	}
2396       }
2397 
2398       /***********************************************
2399              transform Hamiltonian matrix
2400       ************************************************/
2401 
2402       /* transpose S */
2403 
2404       for (i1=1; i1<=NUM; i1++){
2405 	for (j1=i1+1; j1<=NUM; j1++){
2406 	  tmp1 = S_DC[i1][j1];
2407 	  tmp2 = S_DC[j1][i1];
2408 	  S_DC[i1][j1] = tmp2;
2409 	  S_DC[j1][i1] = tmp1;
2410 	}
2411       }
2412 
2413       /* H * U * M1 */
2414 
2415       for (j1=P_min; j1<=NUM; j1++){
2416 	for (k=0; k<=1; k++){
2417 	  jj1 = 2*j1 - P_min + k;
2418 	  k1 = k*NUM;
2419 
2420 	  for (i1=1; i1<=2*NUM; i1++){
2421 
2422 	    sum_r = 0.0;
2423 	    sum_i = 0.0;
2424 
2425 	    for (l=1; l<=NUM; l++){
2426 	      l1 = k1 + l;
2427 	      sum_r += H_DC[i1][l1].r*S_DC[j1][l];
2428 	      sum_i += H_DC[i1][l1].i*S_DC[j1][l];
2429 	    }
2430 
2431 	    C[jj1][i1].r = sum_r;
2432 	    C[jj1][i1].i = sum_i;
2433 	  }
2434 	}
2435       }
2436 
2437       /* M1 * U^+ H * U * M1 */
2438 
2439       for (j1=1; j1<=2*NUM; j1++){
2440 	for (i1=P_min; i1<=NUM; i1++){
2441 
2442 	  sum1_r = 0.0;
2443 	  sum1_i = 0.0;
2444 	  sum2_r = 0.0;
2445 	  sum2_i = 0.0;
2446 
2447 	  for (l=1; l<=NUM; l++){
2448 	    sum1_r += S_DC[i1][l]*C[j1][l].r;
2449 	    sum1_i += S_DC[i1][l]*C[j1][l].i;
2450 	  }
2451 
2452 	  for (l=NUM+1; l<=2*NUM; l++){
2453 	    l1 = l - NUM;
2454 	    sum2_r += S_DC[i1][l1]*C[j1][l].r;
2455 	    sum2_i += S_DC[i1][l1]*C[j1][l].i;
2456 	  }
2457 
2458 	  ii1 = 2*i1 - P_min;
2459 	  H_DC[j1][ii1].r = sum1_r;
2460 	  H_DC[j1][ii1].i = sum1_i;
2461 
2462 	  ii1 = 2*i1 - P_min + 1;
2463 	  H_DC[j1][ii1].r = sum2_r;
2464 	  H_DC[j1][ii1].i = sum2_i;
2465 	}
2466       }
2467 
2468       /* H to C (transposition) */
2469 
2470       for (i1=P_min; i1<=2*NUM; i1++){
2471 	for (j1=P_min; j1<=2*NUM; j1++){
2472 	  C[j1-(P_min-1)][i1-(P_min-1)].r = H_DC[i1][j1].r;
2473 	  C[j1-(P_min-1)][i1-(P_min-1)].i = H_DC[i1][j1].i;
2474 	}
2475       }
2476 
2477       /***********************************************
2478      diagonalize the trasformed Hamiltonian matrix
2479       ************************************************/
2480 
2481       NUM1 = 2*NUM - (P_min - 1);
2482       EigenBand_lapack(C, ko, NUM1, NUM1, 1);
2483 
2484       /* C to H (transposition) */
2485 
2486       for (i1=1; i1<=NUM1; i1++){
2487 	for (j1=1; j1<=NUM1; j1++){
2488 	  H_DC[j1][i1].r = C[i1][j1].r;
2489 	  H_DC[j1][i1].i = C[i1][j1].i;
2490 	}
2491       }
2492 
2493       /***********************************************
2494       transformation to the original eigen vectors.
2495       NOTE 244P    C = U * lambda^{-1/2} * D
2496       ***********************************************/
2497 
2498       /* transpose S */
2499 
2500       for (i1=1; i1<=NUM; i1++){
2501 	for (j1=i1+1; j1<=NUM; j1++){
2502 	  tmp1 = S_DC[i1][j1];
2503 	  tmp2 = S_DC[j1][i1];
2504 	  S_DC[i1][j1] = tmp2;
2505 	  S_DC[j1][i1] = tmp1;
2506 	}
2507       }
2508 
2509       for (j1=1; j1<=NUM1; j1++){
2510 	for (k=0; k<=1; k++){
2511 	  for (i1=1; i1<=NUM; i1++){
2512 	    sum_r = 0.0;
2513 	    sum_i = 0.0;
2514 	    for (l=P_min; l<=NUM; l++){
2515 	      sum_r += S_DC[i1][l]*H_DC[j1][2*(l-P_min)+1+k].r;
2516 	      sum_i += S_DC[i1][l]*H_DC[j1][2*(l-P_min)+1+k].i;
2517 	    }
2518 	    C[i1+k*NUM][j1].r = sum_r;
2519 	    C[i1+k*NUM][j1].i = sum_i;
2520 	  }
2521 	}
2522       }
2523 
2524       /***********************************************
2525         store eigenvalues and residues of poles
2526       ***********************************************/
2527 
2528       for (i1=1; i1<=2*NUM; i1++){
2529 	EVal[Mc_AN][i1-1] = 1000.0;
2530       }
2531       for (i1=1; i1<=NUM1; i1++){
2532 	EVal[Mc_AN][i1-1] = ko[i1];
2533       }
2534 
2535       wanA = WhatSpecies[Gc_AN];
2536       tno1 = Spe_Total_CNO[wanA];
2537 
2538       for (i=0; i<tno1; i++){
2539 	for (h_AN=0; h_AN<=FNAN[Gc_AN]; h_AN++){
2540 	  Gh_AN = natn[Gc_AN][h_AN];
2541 	  wanB = WhatSpecies[Gh_AN];
2542 	  tno2 = Spe_Total_CNO[wanB];
2543 	  Bnum = MP[h_AN];
2544 	  for (j=0; j<tno2; j++){
2545 	    for (i1=1; i1<=NUM1; i1++){
2546 
2547 	      /* Re11 */
2548 	      Residues[0][Mc_AN][h_AN][i][j][i1-1].r = C[1+i    ][i1].r*C[Bnum+j    ][i1].r
2549 		                                      +C[1+i    ][i1].i*C[Bnum+j    ][i1].i;
2550 
2551 	      /* Re22 */
2552 	      Residues[1][Mc_AN][h_AN][i][j][i1-1].r = C[1+i+NUM][i1].r*C[Bnum+j+NUM][i1].r
2553 		                                      +C[1+i+NUM][i1].i*C[Bnum+j+NUM][i1].i;
2554 
2555 	      /* Re12 */
2556 	      Residues[2][Mc_AN][h_AN][i][j][i1-1].r = C[1+i    ][i1].r*C[Bnum+j+NUM][i1].r
2557 		                                      +C[1+i    ][i1].i*C[Bnum+j+NUM][i1].i;
2558 
2559 	      /* Im12 */
2560 	      Residues[2][Mc_AN][h_AN][i][j][i1-1].i = C[1+i    ][i1].r*C[Bnum+j+NUM][i1].i
2561 		                                      -C[1+i    ][i1].i*C[Bnum+j+NUM][i1].r;
2562 
2563 	      /* spin-orbit coupling or LDA+U */
2564 	      if ( SO_switch==1 || Hub_U_switch==1 || 1<=Constraint_NCS_switch
2565 		   || Zeeman_NCS_switch==1 || Zeeman_NCO_switch==1){
2566 
2567 		/* Im11 */
2568 		Residues[0][Mc_AN][h_AN][i][j][i1-1].i = C[1+i    ][i1].r*C[Bnum+j    ][i1].i
2569 	 	                                        -C[1+i    ][i1].i*C[Bnum+j    ][i1].r;
2570 		/* Im22 */
2571 		Residues[1][Mc_AN][h_AN][i][j][i1-1].i = C[1+i+NUM][i1].r*C[Bnum+j+NUM][i1].i
2572 		                                        -C[1+i+NUM][i1].i*C[Bnum+j+NUM][i1].r;
2573 	      }
2574 
2575 	    }
2576 	  }
2577 	}
2578       }
2579 
2580       /****************************************************
2581                         free arrays
2582       ****************************************************/
2583 
2584       for (i=0; i<(NUM+2); i++){
2585 	free(S_DC[i]);
2586       }
2587       free(S_DC);
2588 
2589       for (i=0; i<n2; i++){
2590 	free(H_DC[i]);
2591       }
2592       free(H_DC);
2593 
2594       free(ko);
2595       free(M1);
2596 
2597       for (i=0; i<n2; i++){
2598 	free(C[i]);
2599       }
2600       free(C);
2601 
2602       dtime(&Etime_atom);
2603       time_per_atom[Gc_AN] += Etime_atom - Stime_atom;
2604 
2605       /*
2606 	for (i1=1; i1<=NUM1; i1++){
2607 	printf("Mc_AN=%2d i1=%2d EVal=%15.12f\n",Mc_AN,i1,EVal[Mc_AN][i1-1]);
2608 	}
2609       */
2610 
2611     } /* end of Mc_AN */
2612 
2613     /* freeing of arrays */
2614 
2615     free(MP);
2616 
2617   } /* #pragma omp parallel */
2618 
2619   if ( strcasecmp(mode,"scf")==0 ){
2620 
2621     /****************************************************
2622               calculate projected DOS
2623     ****************************************************/
2624 
2625 #pragma omp parallel shared(time_per_atom,Residues,OLP0,natn,PDOS_DC,Msize,Spe_Total_CNO,WhatSpecies,M2G,Matomnum) private(OMPID,Nthrds,Nprocs,Mc_AN,Stime_atom,Etime_atom,Gc_AN,wanA,tno1,i1,i,h_AN,Gh_AN,wanB,tno2,j,tmp1)
2626     {
2627 
2628       /* get info. on OpenMP */
2629 
2630       OMPID = omp_get_thread_num();
2631       Nthrds = omp_get_num_threads();
2632       Nprocs = omp_get_num_procs();
2633 
2634       for (Mc_AN=1+OMPID; Mc_AN<=Matomnum; Mc_AN+=Nthrds){
2635 
2636 	dtime(&Stime_atom);
2637 
2638 	Gc_AN = M2G[Mc_AN];
2639 	wanA = WhatSpecies[Gc_AN];
2640 	tno1 = Spe_Total_CNO[wanA];
2641 
2642 	for (i1=0; i1<2*Msize[Mc_AN]; i1++){
2643 	  PDOS_DC[Mc_AN][i1] = 0.0;
2644 	}
2645 
2646 	for (i=0; i<tno1; i++){
2647 	  for (h_AN=0; h_AN<=FNAN[Gc_AN]; h_AN++){
2648 	    Gh_AN = natn[Gc_AN][h_AN];
2649 	    wanB = WhatSpecies[Gh_AN];
2650 	    tno2 = Spe_Total_CNO[wanB];
2651 	    for (j=0; j<tno2; j++){
2652 
2653 	      tmp1 = OLP0[Mc_AN][h_AN][i][j];
2654 	      for (i1=0; i1<2*Msize[Mc_AN]; i1++){
2655 		PDOS_DC[Mc_AN][i1] += ( Residues[0][Mc_AN][h_AN][i][j][i1].r
2656 					+ Residues[1][Mc_AN][h_AN][i][j][i1].r )*tmp1;
2657 	      }
2658 
2659 	    }
2660 	  }
2661 	}
2662 
2663 	dtime(&Etime_atom);
2664 	time_per_atom[Gc_AN] += Etime_atom - Stime_atom;
2665       }
2666 
2667     } /* #pragma omp parallel */
2668 
2669     /****************************************************
2670     find chemical potential
2671     ****************************************************/
2672 
2673     po = 0;
2674     loopN = 0;
2675 
2676     ChemP_MAX = 15.0;
2677     ChemP_MIN =-15.0;
2678 
2679     do {
2680       ChemP = 0.50*(ChemP_MAX + ChemP_MIN);
2681 
2682 #pragma omp parallel shared(omp_tmp,max_x,ChemP,Beta,EVal,Msize,Matomnum,M2G) private(OMPID,Nthrds,Nprocs,Mc_AN,Gc_AN,i,x,FermiF)
2683       {
2684 
2685 	/* get info. on OpenMP */
2686 
2687 	OMPID = omp_get_thread_num();
2688 	Nthrds = omp_get_num_threads();
2689 	Nprocs = omp_get_num_procs();
2690 
2691 	omp_tmp[OMPID] = 0.0;
2692 
2693 	for (Mc_AN=1+OMPID; Mc_AN<=Matomnum; Mc_AN+=Nthrds){
2694 
2695 	  Gc_AN = M2G[Mc_AN];
2696 
2697 	  for (i=0; i<2*Msize[Mc_AN]; i++){
2698 	    x = (EVal[Mc_AN][i] - ChemP)*Beta;
2699 	    if (x<=-max_x) x = -max_x;
2700 	    if (max_x<=x)  x = max_x;
2701 	    FermiF = 1.0/(1.0 + exp(x));
2702 	    omp_tmp[OMPID] += FermiF*PDOS_DC[Mc_AN][i];
2703 	  }
2704 
2705 	}
2706 
2707       } /* #pragma omp parallel */
2708 
2709       My_Num_State = 0.0;
2710       for (ompi=0; ompi<Nthrds0; ompi++){
2711 	My_Num_State += omp_tmp[ompi];
2712       }
2713 
2714       /* MPI, My_Num_State */
2715 
2716       MPI_Barrier(mpi_comm_level1);
2717       MPI_Allreduce(&My_Num_State, &Num_State, 1, MPI_DOUBLE, MPI_SUM, mpi_comm_level1);
2718 
2719       Dnum = (TZ - Num_State) - system_charge;
2720       if (0.0<=Dnum) ChemP_MIN = ChemP;
2721       else           ChemP_MAX = ChemP;
2722       if (fabs(Dnum)<1.0e-11) po = 1;
2723 
2724       if (myid==Host_ID && 2<=level_stdout){
2725 	printf("ChemP=%15.12f TZ=%15.12f Num_state=%15.12f\n",ChemP,TZ,Num_State);
2726       }
2727 
2728       loopN++;
2729     }
2730     while (po==0 && loopN<1000);
2731 
2732     /****************************************************
2733         eigenenergy by summing up eigenvalues
2734     ****************************************************/
2735 
2736     My_Eele0[0] = 0.0;
2737     My_Eele0[1] = 0.0;
2738 
2739 #pragma omp parallel shared(PDOS_DC,Beta,ChemP,max_x,EVal,Msize,M2G,Matomnum) private(OMPID,Nthrds,Nprocs,Mc_AN,Gc_AN,i,x,FermiF)
2740     {
2741 
2742       /* get info. on OpenMP */
2743 
2744       OMPID = omp_get_thread_num();
2745       Nthrds = omp_get_num_threads();
2746       Nprocs = omp_get_num_procs();
2747 
2748       omp_tmp[OMPID] = 0.0;
2749 
2750       for (Mc_AN=1+OMPID; Mc_AN<=Matomnum; Mc_AN+=Nthrds){
2751 
2752 	Gc_AN = M2G[Mc_AN];
2753 	for (i=0; i<2*Msize[Mc_AN]; i++){
2754 	  x = (EVal[Mc_AN][i] - ChemP)*Beta;
2755 	  if (x<=-max_x) x = -max_x;
2756 	  if (max_x<=x)  x = max_x;
2757 	  FermiF = 1.0/(1.0 + exp(x));
2758 	  omp_tmp[OMPID] += FermiF*EVal[Mc_AN][i]*PDOS_DC[Mc_AN][i];
2759 	}
2760       }
2761 
2762     } /* #pragma omp parallel */
2763 
2764     for (ompi=0; ompi<Nthrds0; ompi++){
2765       My_Eele0[0] += omp_tmp[ompi];
2766     }
2767 
2768     /* MPI, My_Eele0 */
2769 
2770     MPI_Barrier(mpi_comm_level1);
2771     MPI_Allreduce(&My_Eele0[0], &Eele0[0], 1, MPI_DOUBLE, MPI_SUM, mpi_comm_level1);
2772     MPI_Allreduce(&My_Eele0[1], &Eele0[1], 1, MPI_DOUBLE, MPI_SUM, mpi_comm_level1);
2773 
2774     /****************************************************
2775       calculate density and energy density matrices
2776 
2777         CDM[0]  Re alpha alpha density matrix
2778         CDM[1]  Re beta  beta  density matrix
2779         CDM[2]  Re alpha beta  density matrix
2780         CDM[3]  Im alpha beta  density matrix
2781         iDM[0][0]  Im alpha alpha density matrix
2782         iDM[0][1]  Im beta  beta  density matrix
2783 
2784         EDM[0]  Re alpha alpha energy density matrix
2785         EDM[1]  Re beta  beta  energy density matrix
2786         EDM[2]  Re alpha beta  energy density matrix
2787         EDM[3]  Im alpha beta  energy density matrix
2788     ****************************************************/
2789 
2790     for (spin=0; spin<=SpinP_switch; spin++){
2791       for (Mc_AN=1; Mc_AN<=Matomnum; Mc_AN++){
2792 	Gc_AN = M2G[Mc_AN];
2793 	wanA = WhatSpecies[Gc_AN];
2794 	tno1 = Spe_Total_CNO[wanA];
2795 	for (h_AN=0; h_AN<=FNAN[Gc_AN]; h_AN++){
2796 	  Gh_AN = natn[Gc_AN][h_AN];
2797 	  wanB = WhatSpecies[Gh_AN];
2798 	  tno2 = Spe_Total_CNO[wanB];
2799 
2800 	  for (i=0; i<tno1; i++){
2801 	    for (j=0; j<tno2; j++){
2802 	      CDM[spin][Mc_AN][h_AN][i][j] = 0.0;
2803 	      EDM[spin][Mc_AN][h_AN][i][j] = 0.0;
2804 	    }
2805 	  }
2806 
2807 	  /* spin-orbit coupling or LDA+U */
2808 	  if ( (SO_switch==1 || Hub_U_switch==1 || 1<=Constraint_NCS_switch
2809 		|| Zeeman_NCS_switch==1 || Zeeman_NCO_switch==1) && spin==0 ){
2810 	    for (i=0; i<tno1; i++){
2811 	      for (j=0; j<tno2; j++){
2812 		iDM[0][0][Mc_AN][h_AN][i][j] = 0.0;
2813 		iDM[0][1][Mc_AN][h_AN][i][j] = 0.0;
2814 	      }
2815 	    }
2816 	  }
2817 
2818 	}
2819       }
2820     }
2821 
2822 #pragma omp parallel shared(iDM,Zeeman_NCO_switch,Zeeman_NCS_switch,Constraint_NCS_switch,Hub_U_switch,SO_switch,time_per_atom,EDM,CDM,Residues,natn,max_x,Beta,ChemP,EVal,Msize,Spe_Total_CNO,WhatSpecies,M2G,Matomnum) private(OMPID,Nthrds,Nprocs,Mc_AN,Stime_atom,Gc_AN,wanA,tno1,i1,x,FermiF,h_AN,Gh_AN,wanB,tno2,i,j,tmp1,Etime_atom)
2823     {
2824 
2825       /* get info. on OpenMP */
2826 
2827       OMPID = omp_get_thread_num();
2828       Nthrds = omp_get_num_threads();
2829       Nprocs = omp_get_num_procs();
2830 
2831       for (Mc_AN=1+OMPID; Mc_AN<=Matomnum; Mc_AN+=Nthrds){
2832 
2833 	dtime(&Stime_atom);
2834 
2835 	Gc_AN = M2G[Mc_AN];
2836 	wanA = WhatSpecies[Gc_AN];
2837 	tno1 = Spe_Total_CNO[wanA];
2838 
2839 	for (i1=0; i1<2*Msize[Mc_AN]; i1++){
2840 	  x = (EVal[Mc_AN][i1] - ChemP)*Beta;
2841 	  if (x<=-max_x) x = -max_x;
2842 	  if (max_x<=x)  x = max_x;
2843 	  FermiF = 1.0/(1.0 + exp(x));
2844 
2845 	  for (h_AN=0; h_AN<=FNAN[Gc_AN]; h_AN++){
2846 	    Gh_AN = natn[Gc_AN][h_AN];
2847 	    wanB = WhatSpecies[Gh_AN];
2848 	    tno2 = Spe_Total_CNO[wanB];
2849 	    for (i=0; i<tno1; i++){
2850 	      for (j=0; j<tno2; j++){
2851 
2852 		tmp1 = FermiF*Residues[0][Mc_AN][h_AN][i][j][i1].r;
2853 
2854 		CDM[0][Mc_AN][h_AN][i][j] += tmp1;
2855 		EDM[0][Mc_AN][h_AN][i][j] += tmp1*EVal[Mc_AN][i1];
2856 
2857 		tmp1 = FermiF*Residues[1][Mc_AN][h_AN][i][j][i1].r;
2858 
2859 		CDM[1][Mc_AN][h_AN][i][j] += tmp1;
2860 		EDM[1][Mc_AN][h_AN][i][j] += tmp1*EVal[Mc_AN][i1];
2861 
2862 		tmp1 = FermiF*Residues[2][Mc_AN][h_AN][i][j][i1].r;
2863 
2864 		CDM[2][Mc_AN][h_AN][i][j] += tmp1;
2865 		EDM[2][Mc_AN][h_AN][i][j] += tmp1*EVal[Mc_AN][i1];
2866 
2867 		tmp1 = FermiF*Residues[2][Mc_AN][h_AN][i][j][i1].i;
2868 
2869 		CDM[3][Mc_AN][h_AN][i][j] += tmp1;
2870 		EDM[3][Mc_AN][h_AN][i][j] += tmp1*EVal[Mc_AN][i1];
2871 
2872 		/* spin-orbit coupling or LDA+U */
2873 		if (SO_switch==1 || Hub_U_switch==1 || 1<=Constraint_NCS_switch
2874 		    || Zeeman_NCS_switch==1 || Zeeman_NCO_switch==1){
2875 		  iDM[0][0][Mc_AN][h_AN][i][j] += FermiF*Residues[0][Mc_AN][h_AN][i][j][i1].i;
2876 		  iDM[0][1][Mc_AN][h_AN][i][j] += FermiF*Residues[1][Mc_AN][h_AN][i][j][i1].i;
2877 		}
2878 
2879 	      }
2880 	    }
2881 	  }
2882 	}
2883 
2884       } /* #pragma omp parallel */
2885 
2886       dtime(&Etime_atom);
2887       time_per_atom[Gc_AN] += Etime_atom - Stime_atom;
2888     }
2889 
2890   } /* if ( strcasecmp(mode,"scf")==0 ) */
2891 
2892   else if ( strcasecmp(mode,"dos")==0 ){
2893     Save_DOS_NonCol(Residues,OLP0,EVal,Msize);
2894   }
2895 
2896   /****************************************************
2897     freeing of arrays:
2898 
2899   ****************************************************/
2900 
2901   free(Snd_H_Size);
2902   free(Rcv_H_Size);
2903 
2904   free(Snd_iHNL_Size);
2905   free(Rcv_iHNL_Size);
2906 
2907   free(Snd_S_Size);
2908   free(Rcv_S_Size);
2909 
2910   free(Msize);
2911 
2912   for (Mc_AN=0; Mc_AN<=Matomnum; Mc_AN++){
2913     free(EVal[Mc_AN]);
2914   }
2915   free(EVal);
2916 
2917   for (spin=0; spin<3; spin++){
2918     for (Mc_AN=0; Mc_AN<=Matomnum; Mc_AN++){
2919 
2920       if (Mc_AN==0){
2921 	Gc_AN = 0;
2922 	FNAN[0] = 1;
2923 	tno1 = 1;
2924       }
2925       else{
2926         Gc_AN = M2G[Mc_AN];
2927 	wanA = WhatSpecies[Gc_AN];
2928 	tno1 = Spe_Total_CNO[wanA];
2929       }
2930 
2931       for (h_AN=0; h_AN<=FNAN[Gc_AN]; h_AN++){
2932 	if (Mc_AN==0){
2933 	  tno2 = 1;
2934 	}
2935 	else {
2936 	  Gh_AN = natn[Gc_AN][h_AN];
2937 	  wanB = WhatSpecies[Gh_AN];
2938 	  tno2 = Spe_Total_CNO[wanB];
2939 	}
2940 
2941 	for (i=0; i<tno1; i++){
2942 	  for (j=0; j<tno2; j++){
2943 	    free(Residues[spin][Mc_AN][h_AN][i][j]);
2944 	  }
2945 	  free(Residues[spin][Mc_AN][h_AN][i]);
2946 	}
2947 	free(Residues[spin][Mc_AN][h_AN]);
2948       }
2949       free(Residues[spin][Mc_AN]);
2950     }
2951     free(Residues[spin]);
2952   }
2953   free(Residues);
2954 
2955   for (Mc_AN=0; Mc_AN<=Matomnum; Mc_AN++){
2956     free(PDOS_DC[Mc_AN]);
2957   }
2958   free(PDOS_DC);
2959 
2960   free(omp_tmp);
2961 
2962   /* for time */
2963   dtime(&TEtime);
2964   time0 = TEtime - TStime;
2965 
2966   /* for PrintMemory */
2967   firsttime=0;
2968 
2969   return time0;
2970 }
2971 
2972 
2973 
Save_DOS_Col(double ****** Residues,double **** OLP0,double *** EVal,int * Msize)2974 void Save_DOS_Col(double ******Residues, double ****OLP0, double ***EVal, int *Msize)
2975 {
2976   int spin,Mc_AN,wanA,Gc_AN,tno1;
2977   int i1,i,j,MaxL,l,h_AN,Gh_AN,wanB,tno2;
2978   double Stime_atom,Etime_atom;
2979   double sum;
2980   int i_vec[10];
2981   char file_eig[YOUSO10],file_ev[YOUSO10];
2982   FILE *fp_eig, *fp_ev;
2983   int numprocs,myid,ID,tag;
2984 
2985   /* for OpenMP */
2986   int OMPID,Nthrds,Nprocs;
2987 
2988   /* MPI */
2989 
2990   MPI_Comm_size(mpi_comm_level1,&numprocs);
2991   MPI_Comm_rank(mpi_comm_level1,&myid);
2992 
2993   /* open file pointers */
2994 
2995   if (myid==Host_ID){
2996 
2997     sprintf(file_eig,"%s%s.Dos.val",filepath,filename);
2998     if ( (fp_eig=fopen(file_eig,"w"))==NULL ) {
2999       printf("cannot open a file %s\n",file_eig);
3000     }
3001   }
3002 
3003   sprintf(file_ev, "%s%s.Dos.vec%i",filepath,filename,myid);
3004   if ( (fp_ev=fopen(file_ev,"w"))==NULL ) {
3005     printf("cannot open a file %s\n",file_ev);
3006   }
3007 
3008   /****************************************************
3009                    save *.Dos.vec
3010   ****************************************************/
3011 
3012   for (spin=0; spin<=SpinP_switch; spin++){
3013     for (Mc_AN=1; Mc_AN<=Matomnum; Mc_AN++){
3014 
3015       dtime(&Stime_atom);
3016 
3017       Gc_AN = M2G[Mc_AN];
3018       wanA = WhatSpecies[Gc_AN];
3019       tno1 = Spe_Total_CNO[wanA];
3020 
3021       fprintf(fp_ev,"<AN%dAN%d\n",Gc_AN,spin);
3022       fprintf(fp_ev,"%d\n",Msize[Mc_AN]);
3023 
3024       for (i1=0; i1<Msize[Mc_AN]; i1++){
3025 
3026 	fprintf(fp_ev,"%4d  %10.6f  ",i1,EVal[spin][Mc_AN][i1]);
3027 
3028 	for (i=0; i<tno1; i++){
3029 
3030 	  sum = 0.0;
3031 	  for (h_AN=0; h_AN<=FNAN[Gc_AN]; h_AN++){
3032 	    Gh_AN = natn[Gc_AN][h_AN];
3033 	    wanB = WhatSpecies[Gh_AN];
3034 	    tno2 = Spe_Total_CNO[wanB];
3035 	    for (j=0; j<tno2; j++){
3036 	      sum += Residues[spin][Mc_AN][h_AN][i][j][i1]*
3037 		               OLP0[Mc_AN][h_AN][i][j];
3038 	    }
3039 	  }
3040 
3041 	  fprintf(fp_ev,"%8.5f",sum);
3042 	}
3043 
3044 	fprintf(fp_ev,"\n");
3045       }
3046 
3047       fprintf(fp_ev,"AN%dAN%d>\n",Gc_AN,spin);
3048 
3049       dtime(&Etime_atom);
3050       time_per_atom[Gc_AN] += Etime_atom - Stime_atom;
3051     }
3052   }
3053 
3054   /****************************************************
3055                    save *.Dos.val
3056   ****************************************************/
3057 
3058   if (myid==Host_ID){
3059 
3060     fprintf(fp_eig,"mode        5\n");
3061     fprintf(fp_eig,"NonCol      0\n");
3062     /*      fprintf(fp_eig,"N           %d\n",n); */
3063     fprintf(fp_eig,"Nspin       %d\n",SpinP_switch);
3064     fprintf(fp_eig,"Erange      %lf %lf\n",Dos_Erange[0],Dos_Erange[1]);
3065     fprintf(fp_eig,"atomnum     %d\n",atomnum);
3066 
3067     fprintf(fp_eig,"<WhatSpecies\n");
3068     for (i=1;i<=atomnum;i++) {
3069       fprintf(fp_eig,"%d ",WhatSpecies[i]);
3070     }
3071     fprintf(fp_eig,"\nWhatSpecies>\n");
3072 
3073     fprintf(fp_eig,"SpeciesNum     %d\n",SpeciesNum);
3074     fprintf(fp_eig,"<Spe_Total_CNO\n");
3075     for (i=0;i<SpeciesNum;i++) {
3076       fprintf(fp_eig,"%d ",Spe_Total_CNO[i]);
3077     }
3078     fprintf(fp_eig,"\nSpe_Total_CNO>\n");
3079 
3080     MaxL=Supported_MaxL;
3081     fprintf(fp_eig,"MaxL           %d\n",Supported_MaxL);
3082     fprintf(fp_eig,"<Spe_Num_CBasis\n");
3083     for (i=0;i<SpeciesNum;i++) {
3084       for (l=0;l<=MaxL;l++) {
3085 	fprintf(fp_eig,"%d ",Spe_Num_CBasis[i][l]);
3086       }
3087       fprintf(fp_eig,"\n");
3088     }
3089     fprintf(fp_eig,"Spe_Num_CBasis>\n");
3090     fprintf(fp_eig,"ChemP       %lf\n",ChemP);
3091   }
3092 
3093   /* close file pointers */
3094 
3095   if (myid==Host_ID){
3096     if (fp_eig) fclose(fp_eig);
3097   }
3098 
3099   if (fp_ev)  fclose(fp_ev);
3100 }
3101 
3102 
3103 
Save_DOS_NonCol(dcomplex ****** Residues,double **** OLP0,double ** EVal,int * Msize)3104 void Save_DOS_NonCol(dcomplex ******Residues, double ****OLP0, double **EVal, int *Msize)
3105 {
3106   int spin,Mc_AN,wanA,Gc_AN,tno1;
3107   int i1,i,j,MaxL,l,h_AN,Gh_AN,wanB,tno2;
3108   double Stime_atom,Etime_atom;
3109   double tmp1,tmp2,tmp3,sum,SDup,SDdn;
3110   double Re11,Re22,Re12,Im12;
3111   double theta,phi,sit,cot,sip,cop;
3112   int i_vec[10];
3113   char file_eig[YOUSO10],file_ev[YOUSO10];
3114   FILE *fp_eig, *fp_ev;
3115   int numprocs,myid,ID,tag;
3116 
3117   /* for OpenMP */
3118   int OMPID,Nthrds,Nprocs;
3119 
3120   /* MPI */
3121 
3122   MPI_Comm_size(mpi_comm_level1,&numprocs);
3123   MPI_Comm_rank(mpi_comm_level1,&myid);
3124 
3125   /* open file pointers */
3126 
3127   if (myid==Host_ID){
3128 
3129     sprintf(file_eig,"%s%s.Dos.val",filepath,filename);
3130     if ( (fp_eig=fopen(file_eig,"w"))==NULL ) {
3131       printf("cannot open a file %s\n",file_eig);
3132     }
3133   }
3134 
3135   sprintf(file_ev, "%s%s.Dos.vec%i",filepath,filename,myid);
3136   if ( (fp_ev=fopen(file_ev,"w"))==NULL ) {
3137     printf("cannot open a file %s\n",file_ev);
3138   }
3139 
3140   /****************************************************
3141                    save *.Dos.vec
3142   ****************************************************/
3143 
3144   for (Mc_AN=1; Mc_AN<=Matomnum; Mc_AN++){
3145 
3146     dtime(&Stime_atom);
3147 
3148     Gc_AN = M2G[Mc_AN];
3149     wanA = WhatSpecies[Gc_AN];
3150     tno1 = Spe_Total_CNO[wanA];
3151 
3152     theta = Angle0_Spin[Gc_AN];
3153     phi   = Angle1_Spin[Gc_AN];
3154 
3155     sit = sin(theta);
3156     cot = cos(theta);
3157     sip = sin(phi);
3158     cop = cos(phi);
3159 
3160     fprintf(fp_ev,"<AN%d\n",Gc_AN);
3161     fprintf(fp_ev,"%d %d\n",2*Msize[Mc_AN],2*Msize[Mc_AN]);
3162 
3163     for (i1=0; i1<2*Msize[Mc_AN]; i1++){
3164 
3165       fprintf(fp_ev,"%4d  %10.6f %10.6f ",i1,EVal[Mc_AN][i1],EVal[Mc_AN][i1]);
3166 
3167       for (i=0; i<tno1; i++){
3168 
3169 	Re11 = 0.0;
3170 	Re22 = 0.0;
3171 	Re12 = 0.0;
3172 	Im12 = 0.0;
3173 
3174 	for (h_AN=0; h_AN<=FNAN[Gc_AN]; h_AN++){
3175 
3176 	  Gh_AN = natn[Gc_AN][h_AN];
3177 	  wanB = WhatSpecies[Gh_AN];
3178 	  tno2 = Spe_Total_CNO[wanB];
3179 
3180 	  for (j=0; j<tno2; j++){
3181 
3182 	    Re11 += Residues[0][Mc_AN][h_AN][i][j][i1].r*
3183 	                   OLP0[Mc_AN][h_AN][i][j];
3184 
3185 	    Re22 += Residues[1][Mc_AN][h_AN][i][j][i1].r*
3186 	                   OLP0[Mc_AN][h_AN][i][j];
3187 
3188 	    Re12 += Residues[2][Mc_AN][h_AN][i][j][i1].r*
3189  	                   OLP0[Mc_AN][h_AN][i][j];
3190 
3191 	    Im12 += Residues[2][Mc_AN][h_AN][i][j][i1].i*
3192 	                   OLP0[Mc_AN][h_AN][i][j];
3193 
3194 	  }
3195 	}
3196 
3197 	tmp1 = 0.5*(Re11 + Re22);
3198 	tmp2 = 0.5*cot*(Re11 - Re22);
3199 	tmp3 = (Re12*cop - Im12*sip)*sit;
3200 
3201 	SDup = tmp1 + tmp2 + tmp3;
3202 	SDdn = tmp1 - tmp2 - tmp3;
3203 
3204 	fprintf(fp_ev,"%8.5f %8.5f ",SDup,SDdn);
3205       }
3206       fprintf(fp_ev,"\n");
3207     }
3208 
3209     fprintf(fp_ev,"AN%d>\n",Gc_AN);
3210 
3211     dtime(&Etime_atom);
3212     time_per_atom[Gc_AN] += Etime_atom - Stime_atom;
3213   }
3214 
3215   /****************************************************
3216                    save *.Dos.val
3217   ****************************************************/
3218 
3219   if (myid==Host_ID){
3220 
3221     fprintf(fp_eig,"mode        5\n");
3222     fprintf(fp_eig,"NonCol      1\n");
3223     /*      fprintf(fp_eig,"N           %d\n",n); */
3224     fprintf(fp_eig,"Nspin       %d\n",1);  /* switch to 1 */
3225     fprintf(fp_eig,"Erange      %lf %lf\n",Dos_Erange[0],Dos_Erange[1]);
3226     fprintf(fp_eig,"atomnum     %d\n",atomnum);
3227 
3228     fprintf(fp_eig,"<WhatSpecies\n");
3229     for (i=1;i<=atomnum;i++) {
3230       fprintf(fp_eig,"%d ",WhatSpecies[i]);
3231     }
3232     fprintf(fp_eig,"\nWhatSpecies>\n");
3233 
3234     fprintf(fp_eig,"SpeciesNum     %d\n",SpeciesNum);
3235     fprintf(fp_eig,"<Spe_Total_CNO\n");
3236     for (i=0;i<SpeciesNum;i++) {
3237       fprintf(fp_eig,"%d ",Spe_Total_CNO[i]);
3238     }
3239     fprintf(fp_eig,"\nSpe_Total_CNO>\n");
3240 
3241     MaxL=Supported_MaxL;
3242     fprintf(fp_eig,"MaxL           %d\n",Supported_MaxL);
3243     fprintf(fp_eig,"<Spe_Num_CBasis\n");
3244     for (i=0;i<SpeciesNum;i++) {
3245       for (l=0;l<=MaxL;l++) {
3246 	fprintf(fp_eig,"%d ",Spe_Num_CBasis[i][l]);
3247       }
3248       fprintf(fp_eig,"\n");
3249     }
3250     fprintf(fp_eig,"Spe_Num_CBasis>\n");
3251     fprintf(fp_eig,"ChemP       %lf\n",ChemP);
3252 
3253     fprintf(fp_eig,"<SpinAngle\n");
3254     for (i=1; i<=atomnum; i++) {
3255       fprintf(fp_eig,"%lf %lf\n",Angle0_Spin[i],Angle1_Spin[i]);
3256     }
3257     fprintf(fp_eig,"SpinAngle>\n");
3258   }
3259 
3260   /* close file pointers */
3261 
3262   if (myid==Host_ID){
3263     if (fp_eig) fclose(fp_eig);
3264   }
3265 
3266   if (fp_ev)  fclose(fp_ev);
3267 
3268 }
3269 
3270