1 /**********************************************************************
2   DIIS_Mixing_Rhok.c:
3 
4      DIIS_Mixing_Rhok.c is a subroutine to achieve self-consistent field
5      using the direct inversion in the iterative subspace in k-space.
6 
7   Log of DIIS_Mixing_Rhok.c:
8 
9      3/Jan/2005  Released by T.Ozaki
10 
11 ***********************************************************************/
12 
13 #include <stdio.h>
14 #include <stdlib.h>
15 #include <math.h>
16 #include <string.h>
17 #include "openmx_common.h"
18 #include "tran_prototypes.h"
19 #include "tran_variables.h"
20 #include "lapack_prototypes.h"
21 #include "mpi.h"
22 #include <omp.h>
23 
24 
25 #define  measure_time   0
26 #define  maxima_step  10000000.0
27 
28 #ifdef MIN
29 #undef MIN
30 #endif
31 #define MIN(a,b) ((a)<(b))?  (a):(b)
32 
33 
34 static void Inverse(int n, double **a, double **ia);
35 static void Complex_Inverse(int n, double **a, double **ia, double **b, double **ib);
36 
37 
38 static void DIIS_Mixing_Rhok_Normal(int SCF_iter,
39 			      double Mix_wgt,
40 			      double ***ReRhok,
41 			      double ***ImRhok,
42 			      double **Residual_ReRhok,
43 			      double **Residual_ImRhok,
44 			      double *ReVk,
45 			      double *ImVk,
46 			      double *ReRhoAtomk,
47 			      double *ImRhoAtomk);
48 
49 
50 static void DIIS_Mixing_Rhok_NEGF(int SCF_iter,
51 				  double Mix_wgt,
52 				  double ***ReRhok,
53 				  double ***ImRhok,
54 				  double **Residual_ReRhok,
55 				  double **Residual_ImRhok,
56 				  double *ReVk,
57 				  double *ImVk,
58 				  double *ReRhoAtomk,
59 				  double *ImRhoAtomk);
60 
DIIS_Mixing_Rhok(int SCF_iter,double Mix_wgt,double *** ReRhok,double *** ImRhok,double ** Residual_ReRhok,double ** Residual_ImRhok,double * ReVk,double * ImVk,double * ReRhoAtomk,double * ImRhoAtomk)61 void DIIS_Mixing_Rhok(int SCF_iter,
62                       double Mix_wgt,
63                       double ***ReRhok,
64                       double ***ImRhok,
65                       double **Residual_ReRhok,
66                       double **Residual_ImRhok,
67                       double *ReVk,
68                       double *ImVk,
69                       double *ReRhoAtomk,
70                       double *ImRhoAtomk)
71 {
72 
73   if (Solver!=4 || TRAN_Poisson_flag==2){
74 
75     DIIS_Mixing_Rhok_Normal(
76 			    SCF_iter,
77 			    Mix_wgt,
78 			    ReRhok,
79 			    ImRhok,
80 			    Residual_ReRhok,
81 			    Residual_ImRhok,
82 			    ReVk,
83 			    ImVk,
84 			    ReRhoAtomk,
85 			    ImRhoAtomk);
86   }
87 
88   else if (Solver==4){
89 
90     DIIS_Mixing_Rhok_NEGF(
91 			  SCF_iter,
92 			  Mix_wgt,
93 			  ReRhok,
94 			  ImRhok,
95 			  Residual_ReRhok,
96 			  Residual_ImRhok,
97 			  ReVk,
98 			  ImVk,
99 			  ReRhoAtomk,
100 			  ImRhoAtomk);
101   }
102 
103 }
104 
105 
106 
107 
DIIS_Mixing_Rhok_Normal(int SCF_iter,double Mix_wgt,double *** ReRhok,double *** ImRhok,double ** Residual_ReRhok,double ** Residual_ImRhok,double * ReVk,double * ImVk,double * ReRhoAtomk,double * ImRhoAtomk)108 void DIIS_Mixing_Rhok_Normal(int SCF_iter,
109 			     double Mix_wgt,
110 			     double ***ReRhok,
111 			     double ***ImRhok,
112 			     double **Residual_ReRhok,
113 			     double **Residual_ImRhok,
114 			     double *ReVk,
115 			     double *ImVk,
116 			     double *ReRhoAtomk,
117 			     double *ImRhoAtomk)
118 {
119   static int firsttime=1;
120   int spin,Mc_AN,Gc_AN,wan1,TNO1,h_AN,Gh_AN;
121   int wan2,TNO2,i,j,k,NumMix,NumSlide;
122   int SCFi,SCFj,tno1,tno0,Cwan,Hwan,k1,k2,k3,kk2;
123   int pSCF_iter,size_Re_OptRhok,size_Kerker_weight;
124   int spinmax,MN,flag_nan,refiter,po3,po4,refiter2;
125   int N2D,GN,GNs,N3[4],BN_AB,p0,p1,M,N,K;
126   double time1,time2,time3,time4,time5;
127   double Stime1,Etime1,scale;
128   double *alden,*alden0,*gradF;
129   double **A,**IA;
130   double *My_NMat,*NMat;
131   double alpha,beta;
132   double tmp0,itmp0,tmp1;
133   double im1,im2,re1,re2;
134   double OptRNorm,My_OptRNorm,sum0,sum1;
135   double dum1,dum2,bunsi,bunbo,sum,coef_OptRho;
136   double Gx,Gy,Gz,G2,G12,G22,G32,F,F0;
137   double Min_Weight;
138   double Max_Weight;
139   double G0,G02,G02p,weight,wgt0,wgt1;
140   double sk1,sk2,sk3;
141   double **Re_OptRhok;
142   double **Im_OptRhok;
143   double *Kerker_weight;
144   int numprocs,myid;
145   char nanchar[300];
146   /* for OpenMP */
147   int OMPID,Nthrds,Nprocs,Nloop,Nthrds0;
148 
149   /* MPI */
150   MPI_Comm_size(mpi_comm_level1,&numprocs);
151   MPI_Comm_rank(mpi_comm_level1,&myid);
152 
153   /* find an optimum G0 */
154 
155   G12 = rtv[1][1]*rtv[1][1] + rtv[1][2]*rtv[1][2] + rtv[1][3]*rtv[1][3];
156   G22 = rtv[2][1]*rtv[2][1] + rtv[2][2]*rtv[2][2] + rtv[2][3]*rtv[2][3];
157   G32 = rtv[3][1]*rtv[3][1] + rtv[3][2]*rtv[3][2] + rtv[3][3]*rtv[3][3];
158 
159   if (G12<G22) G0 = G12;
160   else         G0 = G22;
161   if (G32<G0)  G0 = G32;
162 
163   G0 = sqrt(G0);
164   G02 = Kerker_factor*Kerker_factor*G0*G0;
165   G02p = (0.01*Kerker_factor*G0)*(0.01*Kerker_factor*G0);
166 
167   if      (SpinP_switch==0)  spinmax = 1;
168   else if (SpinP_switch==1)  spinmax = 2;
169   else if (SpinP_switch==3)  spinmax = 3;
170 
171   /****************************************************
172     allocation of arrays:
173   ****************************************************/
174 
175   alden = (double*)malloc(sizeof(double)*List_YOUSO[38]);
176   alden0 = (double*)malloc(sizeof(double)*List_YOUSO[38]);
177   gradF = (double*)malloc(sizeof(double)*List_YOUSO[38]);
178 
179   My_NMat = (double*)malloc(sizeof(double)*List_YOUSO[38]*List_YOUSO[38]);
180   NMat = (double*)malloc(sizeof(double)*List_YOUSO[38]*List_YOUSO[38]);
181 
182   A = (double**)malloc(sizeof(double*)*List_YOUSO[38]);
183   for (i=0; i<List_YOUSO[38]; i++){
184     A[i] = (double*)malloc(sizeof(double)*List_YOUSO[38]);
185   }
186 
187   IA = (double**)malloc(sizeof(double*)*List_YOUSO[38]);
188   for (i=0; i<List_YOUSO[38]; i++){
189     IA[i] = (double*)malloc(sizeof(double)*List_YOUSO[38]);
190   }
191 
192   /* Re_OptRhok and Im_OptRhok */
193 
194   size_Re_OptRhok = spinmax*My_NumGridB_CB;
195 
196   Re_OptRhok = (double**)malloc(sizeof(double*)*spinmax);
197   for (spin=0; spin<spinmax; spin++){
198     Re_OptRhok[spin] = (double*)malloc(sizeof(double)*My_NumGridB_CB);
199   }
200 
201   Im_OptRhok = (double**)malloc(sizeof(double*)*spinmax);
202   for (spin=0; spin<spinmax; spin++){
203     Im_OptRhok[spin] = (double*)malloc(sizeof(double)*My_NumGridB_CB);
204   }
205 
206   size_Kerker_weight = My_NumGridB_CB;
207   Kerker_weight = (double*)malloc(sizeof(double)*My_NumGridB_CB);
208 
209   /***********************************
210             set Kerker_weight
211   ************************************/
212 
213   if (measure_time==1) dtime(&Stime1);
214 
215   N2D = Ngrid3*Ngrid2;
216   GNs = ((myid*N2D+numprocs-1)/numprocs)*Ngrid1;
217 
218   for (k=0; k<My_NumGridB_CB; k++){
219 
220     /* get k3, k2, and k1 */
221 
222     GN = k + GNs;
223     k3 = GN/(Ngrid2*Ngrid1);
224     k2 = (GN - k3*Ngrid2*Ngrid1)/Ngrid1;
225     k1 = GN - k3*Ngrid2*Ngrid1 - k2*Ngrid1;
226 
227     if (k1<Ngrid1/2) sk1 = (double)k1;
228     else             sk1 = (double)(k1 - Ngrid1);
229 
230     if (k2<Ngrid2/2) sk2 = (double)k2;
231     else             sk2 = (double)(k2 - Ngrid2);
232 
233     if (k3<Ngrid3/2) sk3 = (double)k3;
234     else             sk3 = (double)(k3 - Ngrid3);
235 
236     Gx = sk1*rtv[1][1] + sk2*rtv[2][1] + sk3*rtv[3][1];
237     Gy = sk1*rtv[1][2] + sk2*rtv[2][2] + sk3*rtv[3][2];
238     Gz = sk1*rtv[1][3] + sk2*rtv[2][3] + sk3*rtv[3][3];
239 
240     G2 = Gx*Gx + Gy*Gy + Gz*Gz;
241 
242     if (k1==0 && k2==0 && k3==0)  weight = 0.5;
243     else                          weight = (G2 + G02)/(G2 + G02p);
244 
245     /*
246     if (SCF_iter==10){
247 
248     printf("%18.13f %18.13f %18.13f\n",
249            G2,
250            ReRhok[0][0][k]-ReRhok[1][0][k],
251            ImRhok[0][0][k]-ImRhok[1][0][k]);
252     }
253     */
254 
255     Kerker_weight[k] = sqrt(weight);
256 
257   } /* k */
258 
259   if (measure_time==1){
260     dtime(&Etime1);
261     time1 = Etime1 - Stime1;
262   }
263 
264   /****************************************************
265      start calc.
266   ****************************************************/
267 
268   if (SCF_iter==1){
269     /* memory calc. done, only 1st iteration */
270     if (firsttime) {
271       PrintMemory("DIIS_Mixing_Rhok: Re_OptRhok",sizeof(double)*size_Re_OptRhok,NULL);
272       PrintMemory("DIIS_Mixing_Rhok: Im_OptRhok",sizeof(double)*size_Re_OptRhok,NULL);
273       PrintMemory("DIIS_Mixing_Rhok: Kerker_weight",sizeof(double)*size_Kerker_weight,NULL);
274       firsttime=0;
275     }
276 
277     Kerker_Mixing_Rhok(0,1.00,ReRhok,ImRhok,
278                        Residual_ReRhok,Residual_ImRhok,
279                        ReVk,ImVk,ReRhoAtomk,ImRhoAtomk);
280   }
281 
282   else if (SCF_iter==2){
283 
284     if (measure_time==1) dtime(&Stime1);
285 
286     /* construct residual rho */
287 
288     for (spin=0; spin<spinmax; spin++){
289       for (k=0; k<My_NumGridB_CB; k++){
290 
291 	Residual_ReRhok[spin][2*My_NumGridB_CB+k] = (ReRhok[0][spin][k] - ReRhok[1][spin][k])*Kerker_weight[k];
292 	Residual_ImRhok[spin][2*My_NumGridB_CB+k] = (ImRhok[0][spin][k] - ImRhok[1][spin][k])*Kerker_weight[k];
293 
294       } /* k */
295     } /* spin */
296 
297     if (measure_time==1){
298       dtime(&Etime1);
299       time2 = Etime1 - Stime1;
300     }
301 
302     /* rho1 to rho2 */
303 
304     for (spin=0; spin<spinmax; spin++){
305       for (k=0; k<My_NumGridB_CB; k++){
306 	ReRhok[2][spin][k] = ReRhok[1][spin][k];
307 	ImRhok[2][spin][k] = ImRhok[1][spin][k];
308       }
309     }
310 
311     Kerker_Mixing_Rhok(0,Mixing_weight,ReRhok,ImRhok,
312                        Residual_ReRhok,Residual_ImRhok,
313                        ReVk,ImVk,ReRhoAtomk,ImRhoAtomk);
314   }
315 
316   else {
317 
318     /****************************************************
319                    construct residual rho1
320     ****************************************************/
321 
322     if (measure_time==1) dtime(&Stime1);
323 
324     for (spin=0; spin<spinmax; spin++){
325       for (k=0; k<My_NumGridB_CB; k++){
326 
327 	Residual_ReRhok[spin][My_NumGridB_CB+k] = (ReRhok[0][spin][k] - ReRhok[1][spin][k])*Kerker_weight[k];
328 	Residual_ImRhok[spin][My_NumGridB_CB+k] = (ImRhok[0][spin][k] - ImRhok[1][spin][k])*Kerker_weight[k];
329 
330       } /* k */
331     } /* spin */
332 
333     if (measure_time==1){
334       dtime(&Etime1);
335       time3 = Etime1 - Stime1;
336     }
337 
338     if ((SCF_iter-1)<Num_Mixing_pDM){
339       NumMix   = SCF_iter - 1;
340       NumSlide = NumMix + 1;
341     }
342     else{
343       NumMix   = Num_Mixing_pDM;
344       NumSlide = NumMix;
345     }
346 
347     /****************************************************
348                     alpha from residual rho
349     ****************************************************/
350 
351     if (measure_time==1) dtime(&Stime1);
352 
353     /* calculation of the norm matrix for the residual vectors */
354 
355     for (i=0; i<List_YOUSO[38]*List_YOUSO[38]; i++) My_NMat[i] = 0.0;
356 
357     for (spin=0; spin<spinmax; spin++){
358 
359       M = NumMix + 1;
360       N = NumMix + 1;
361       K = My_NumGridB_CB;
362       alpha = 1.0;
363       beta = 1.0;
364 
365       F77_NAME(dgemm,DGEMM)( "T","N", &M, &N, &K,
366                              &alpha,
367                              Residual_ReRhok[spin], &K,
368                              Residual_ReRhok[spin], &K,
369                              &beta,
370                              My_NMat,
371                              &M);
372 
373       F77_NAME(dgemm,DGEMM)( "T","N", &M, &N, &K,
374                              &alpha,
375                              Residual_ImRhok[spin], &K,
376                              Residual_ImRhok[spin], &K,
377                              &beta,
378                              My_NMat,
379                              &M);
380     }
381 
382     MPI_Allreduce(My_NMat, NMat, (NumMix+1)*(NumMix+1),
383                   MPI_DOUBLE, MPI_SUM, mpi_comm_level1);
384 
385     for (i=0; i<NumMix; i++){
386       for (j=0; j<NumMix; j++){
387         A[i][j] = NMat[(i+1)*(NumMix+1)+(j+1)];
388       }
389     }
390 
391     if (measure_time==1){
392       dtime(&Etime1);
393       time4 = Etime1 - Stime1;
394     }
395 
396     /* store NormRD */
397 
398     for (i=4; 1<=i; i--){
399       NormRD[i] = NormRD[i-1];
400       History_Uele[i] = History_Uele[i-1];
401     }
402     NormRD[0] = A[0][0]/(double)Ngrid1/(double)Ngrid2/(double)Ngrid3/(double)spinmax;
403     History_Uele[0] = Uele;
404 
405     /* set matrix elements on lammda */
406 
407     for (SCFi=1; SCFi<=NumMix; SCFi++){
408        A[SCFi-1][NumMix] = -1.0;
409        A[NumMix][SCFi-1] = -1.0;
410     }
411     A[NumMix][NumMix] = 0.0;
412 
413     /* solve the linear equation */
414 
415     Inverse(NumMix,A,IA);
416 
417     for (SCFi=1; SCFi<=NumMix; SCFi++){
418        alden[SCFi] = -IA[SCFi-1][NumMix];
419     }
420 
421     /*************************************
422       check "nan", "NaN", "inf" or "Inf"
423     *************************************/
424 
425     flag_nan = 0;
426     for (SCFi=1; SCFi<=NumMix; SCFi++){
427 
428       sprintf(nanchar,"%8.4f",alden[SCFi]);
429       if (strstr(nanchar,"nan")!=NULL || strstr(nanchar,"NaN")!=NULL
430        || strstr(nanchar,"inf")!=NULL || strstr(nanchar,"Inf")!=NULL){
431 
432         flag_nan = 1;
433       }
434     }
435 
436     if (flag_nan==1){
437       for (SCFi=1; SCFi<=NumMix; SCFi++){
438         alden[SCFi] = 0.0;
439       }
440       alden[1] = 0.1;
441       alden[2] = 0.9;
442     }
443 
444     /****************************************************
445               calculate optimized residual rho
446     ****************************************************/
447 
448     if (measure_time==1) dtime(&Stime1);
449 
450     for (spin=0; spin<spinmax; spin++){
451       for (k=0; k<My_NumGridB_CB; k++){
452         Re_OptRhok[spin][k] = 0.0;
453         Im_OptRhok[spin][k] = 0.0;
454       }
455     }
456 
457     /* Pulay mixing for residual Rhok */
458 
459     for (pSCF_iter=1; pSCF_iter<=NumMix; pSCF_iter++){
460 
461       tmp0 = alden[pSCF_iter];
462       p0 = pSCF_iter*My_NumGridB_CB;
463 
464 #pragma omp parallel shared(spinmax,My_NumGridB_CB,Re_OptRhok,Im_OptRhok,tmp0,p0,Residual_ReRhok,Residual_ImRhok)
465       {
466 
467         int OMPID,Nthrds,k;
468 
469 	OMPID = omp_get_thread_num();
470 	Nthrds = omp_get_num_threads();
471 
472 	if (spinmax==1){
473 	  for (k=My_NumGridB_CB*OMPID/Nthrds; k<My_NumGridB_CB*(OMPID+1)/Nthrds; k++){
474 	    Re_OptRhok[0][k] += tmp0*Residual_ReRhok[0][p0+k];
475 	    Im_OptRhok[0][k] += tmp0*Residual_ImRhok[0][p0+k];
476 	  } /* k */
477 	}
478 
479 	else if (spinmax==2){
480 	  for (k=My_NumGridB_CB*OMPID/Nthrds; k<My_NumGridB_CB*(OMPID+1)/Nthrds; k++){
481 	    Re_OptRhok[0][k] += tmp0*Residual_ReRhok[0][p0+k];
482 	    Im_OptRhok[0][k] += tmp0*Residual_ImRhok[0][p0+k];
483 	    Re_OptRhok[1][k] += tmp0*Residual_ReRhok[1][p0+k];
484 	    Im_OptRhok[1][k] += tmp0*Residual_ImRhok[1][p0+k];
485 	  } /* k */
486 	}
487 
488 	else if (spinmax==3){
489 	  for (k=My_NumGridB_CB*OMPID/Nthrds; k<My_NumGridB_CB*(OMPID+1)/Nthrds; k++){
490 	    Re_OptRhok[0][k] += tmp0*Residual_ReRhok[0][p0+k];
491 	    Im_OptRhok[0][k] += tmp0*Residual_ImRhok[0][p0+k];
492 	    Re_OptRhok[1][k] += tmp0*Residual_ReRhok[1][p0+k];
493 	    Im_OptRhok[1][k] += tmp0*Residual_ImRhok[1][p0+k];
494 	    Re_OptRhok[2][k] += tmp0*Residual_ReRhok[2][p0+k];
495 	    Im_OptRhok[2][k] += tmp0*Residual_ImRhok[2][p0+k];
496 	  } /* k */
497 	}
498 
499       } /* #pragma omp parallel */
500 
501     } /* pSCF_iter */
502 
503     if      (1.0e-2<=NormRD[0])                        coef_OptRho = 0.5;
504     else if (1.0e-4<=NormRD[0]  && NormRD[0]<1.0e-2)   coef_OptRho = 0.6;
505     else if (1.0e-6<=NormRD[0]  && NormRD[0]<1.0e-4)   coef_OptRho = 0.7;
506     else if (1.0e-8<=NormRD[0]  && NormRD[0]<1.0e-6)   coef_OptRho = 0.8;
507     else if (1.0e-10<=NormRD[0] && NormRD[0]<1.0e-8)   coef_OptRho = 0.9;
508     else                                               coef_OptRho = 1.0;
509 
510     /****************************************************
511                         mixing of rho
512     ****************************************************/
513 
514     /****************************************************
515        Pulay mixing
516     ****************************************************/
517 
518     if ( SCF_iter%EveryPulay_SCF==0 ) {
519 
520       /* reduce Mixing_weight so that charge sloshing can be avoided at the next SCF */
521 
522       Mixing_weight = 0.1*Max_Mixing_weight;
523       if (Mixing_weight<Min_Mixing_weight) Mixing_weight = Min_Mixing_weight;
524 
525       /* initial ReRhok and ImRhok */
526 
527       for (spin=0; spin<spinmax; spin++){
528         for (k=0; k<My_NumGridB_CB; k++){
529           ReRhok[0][spin][k] = 0.0;
530 	  ImRhok[0][spin][k] = 0.0;
531 	}
532       }
533 
534       /* Pulay mixing */
535 
536       for (pSCF_iter=1; pSCF_iter<=NumMix; pSCF_iter++){
537 
538 	tmp0 =  alden[pSCF_iter];
539 
540 #pragma omp parallel shared(tmp0,pSCF_iter,spinmax,My_NumGridB_CB,ReRhok,ImRhok)
541 	{
542 
543           int OMPID,Nthrds,k;
544 
545 	  OMPID = omp_get_thread_num();
546 	  Nthrds = omp_get_num_threads();
547 
548 	  if (spinmax==1){
549   	    for (k=My_NumGridB_CB*OMPID/Nthrds; k<My_NumGridB_CB*(OMPID+1)/Nthrds; k++){
550 	      ReRhok[0][0][k] += tmp0*ReRhok[pSCF_iter][0][k];
551 	      ImRhok[0][0][k] += tmp0*ImRhok[pSCF_iter][0][k];
552 	    } /* k */
553 	  }
554 
555 	  else if (spinmax==2){
556   	    for (k=My_NumGridB_CB*OMPID/Nthrds; k<My_NumGridB_CB*(OMPID+1)/Nthrds; k++){
557 	      ReRhok[0][0][k] += tmp0*ReRhok[pSCF_iter][0][k];
558 	      ImRhok[0][0][k] += tmp0*ImRhok[pSCF_iter][0][k];
559 	      ReRhok[0][1][k] += tmp0*ReRhok[pSCF_iter][1][k];
560 	      ImRhok[0][1][k] += tmp0*ImRhok[pSCF_iter][1][k];
561 	    } /* k */
562 	  }
563 
564 	  else if (spinmax==3){
565   	    for (k=My_NumGridB_CB*OMPID/Nthrds; k<My_NumGridB_CB*(OMPID+1)/Nthrds; k++){
566 	      ReRhok[0][0][k] += tmp0*ReRhok[pSCF_iter][0][k];
567 	      ImRhok[0][0][k] += tmp0*ImRhok[pSCF_iter][0][k];
568 	      ReRhok[0][1][k] += tmp0*ReRhok[pSCF_iter][1][k];
569 	      ImRhok[0][1][k] += tmp0*ImRhok[pSCF_iter][1][k];
570 	      ReRhok[0][2][k] += tmp0*ReRhok[pSCF_iter][2][k];
571 	      ImRhok[0][2][k] += tmp0*ImRhok[pSCF_iter][2][k];
572 	    } /* k */
573 	  }
574 
575 	} /* #pragma omp parallel */
576 
577       } /* pSCF_iter */
578 
579       /* Correction by optimized residual rho */
580 
581       for (spin=0; spin<spinmax; spin++){
582 
583         for (k=0; k<My_NumGridB_CB; k++){
584 
585  	  if (1.0<NormRD[0])
586   	    weight = 1.0/(Kerker_weight[k]*Kerker_weight[k]);
587           else
588             weight = 1.0/Kerker_weight[k];
589 
590 	  ReRhok[0][spin][k] += coef_OptRho*Re_OptRhok[spin][k]*weight;
591 	  ImRhok[0][spin][k] += coef_OptRho*Im_OptRhok[spin][k]*weight;
592 
593 	  /* correction to large changing components */
594 
595           tmp0 = ReRhok[0][spin][k] - ReRhok[1][spin][k];
596           tmp1 = ImRhok[0][spin][k] - ImRhok[1][spin][k];
597 
598           if ( maxima_step<(fabs(tmp0)+fabs(tmp1)) ){
599             ReRhok[0][spin][k] = sgn(tmp0)*maxima_step + ReRhok[1][spin][k];
600             ImRhok[0][spin][k] = sgn(tmp1)*maxima_step + ImRhok[1][spin][k];
601           }
602 
603 	} /* k */
604       } /* spin */
605 
606       if (measure_time==1){
607         dtime(&Etime1);
608         time5 = Etime1 - Stime1;
609       }
610 
611     }
612 
613     /****************************************************
614        Kerker mixing
615     ****************************************************/
616 
617     else {
618 
619       /* find an optimum mixing weight */
620 
621       Min_Weight = Min_Mixing_weight;
622       Max_Weight = Max_Mixing_weight;
623 
624       if ((int)sgn(History_Uele[0]-History_Uele[1])
625 	  ==(int)sgn(History_Uele[1]-History_Uele[2])
626 	  && NormRD[0]<NormRD[1]){
627 
628 	/* tmp0 = 1.6*Mixing_weight; */
629 
630 	tmp0 = NormRD[1]/(largest(NormRD[1]-NormRD[0],10e-10))*Mixing_weight;
631 
632 	if (tmp0<Max_Weight){
633 	  if (Min_Weight<tmp0){
634 	    Mixing_weight = tmp0;
635 	  }
636 	  else{
637 	    Mixing_weight = Min_Weight;
638 	  }
639 	}
640 	else{
641 	  Mixing_weight = Max_Weight;
642 	  SCF_RENZOKU++;
643 	}
644       }
645 
646       else if ((int)sgn(History_Uele[0]-History_Uele[1])
647 	       ==(int)sgn(History_Uele[1]-History_Uele[2])
648 	       && NormRD[1]<NormRD[0]){
649 
650 	tmp0 = NormRD[1]/(largest(NormRD[1]+NormRD[0],10e-10))*Mixing_weight;
651 
652 	/* tmp0 = Mixing_weight/1.6; */
653 
654 	if (tmp0<Max_Weight){
655 	  if (Min_Weight<tmp0)
656 	    Mixing_weight = tmp0;
657 	  else
658 	    Mixing_weight = Min_Weight;
659 	}
660 	else
661 	  Mixing_weight = Max_Weight;
662 
663 	SCF_RENZOKU = -1;
664       }
665 
666       else if ((int)sgn(History_Uele[0]-History_Uele[1])
667 	       !=(int)sgn(History_Uele[1]-History_Uele[2])
668 	       && NormRD[0]<NormRD[1]){
669 
670 	/* tmp0 = Mixing_weight*1.2; */
671 
672 	tmp0 = NormRD[1]/(largest(NormRD[1]-NormRD[0],10e-10))*Mixing_weight;
673 
674 	if (tmp0<Max_Weight){
675 	  if (Min_Weight<tmp0)
676 	    Mixing_weight = tmp0;
677 	  else
678 	    Mixing_weight = Min_Weight;
679 	}
680 	else{
681 	  Mixing_weight = Max_Weight;
682 	  SCF_RENZOKU++;
683 	}
684       }
685 
686       else if ((int)sgn(History_Uele[0]-History_Uele[1])
687 	       !=(int)sgn(History_Uele[1]-History_Uele[2])
688 	       && NormRD[1]<NormRD[0]){
689 
690 	/* tmp0 = Mixing_weight/2.0; */
691 
692 	tmp0 = NormRD[1]/(largest(NormRD[1]+NormRD[0],10e-10))*Mixing_weight;
693 
694 	if (tmp0<Max_Weight){
695 	  if (Min_Weight<tmp0)
696 	    Mixing_weight = tmp0;
697 	  else
698 	    Mixing_weight = Min_Weight;
699 	}
700 	else
701 	  Mixing_weight = Max_Weight;
702 
703 	SCF_RENZOKU = -1;
704       }
705 
706       Mix_wgt = Mixing_weight;
707 
708       for (spin=0; spin<spinmax; spin++){
709         for (k=0; k<My_NumGridB_CB; k++){
710 
711 	  weight = 1.0/(Kerker_weight[k]*Kerker_weight[k]);
712 	  wgt0  = Mix_wgt*weight;
713 	  wgt1 =  1.0 - wgt0;
714 
715 	  ReRhok[0][spin][k] = wgt0*ReRhok[0][spin][k] + wgt1*ReRhok[1][spin][k];
716 	  ImRhok[0][spin][k] = wgt0*ImRhok[0][spin][k] + wgt1*ImRhok[1][spin][k];
717 
718 	} /* k */
719       } /* spin */
720     } /* else */
721 
722     /****************************************************
723                          shift of rho
724     ****************************************************/
725 
726     for (pSCF_iter=(List_YOUSO[38]-1); 0<pSCF_iter; pSCF_iter--){
727 
728       for (spin=0; spin<spinmax; spin++){
729         for (k=0; k<My_NumGridB_CB; k++){
730 	  ReRhok[pSCF_iter][spin][k] = ReRhok[pSCF_iter-1][spin][k];
731 	  ImRhok[pSCF_iter][spin][k] = ImRhok[pSCF_iter-1][spin][k];
732 	}
733       }
734     }
735 
736     /****************************************************
737                     shift of residual rho
738     ****************************************************/
739 
740     for (pSCF_iter=(List_YOUSO[38]-1); 0<pSCF_iter; pSCF_iter--){
741 
742       p0 = pSCF_iter*My_NumGridB_CB;
743       p1 = (pSCF_iter-1)*My_NumGridB_CB;
744 
745       for (spin=0; spin<spinmax; spin++){
746         for (k=0; k<My_NumGridB_CB; k++){
747           Residual_ReRhok[spin][p0+k] = Residual_ReRhok[spin][p1+k];
748           Residual_ImRhok[spin][p0+k] = Residual_ImRhok[spin][p1+k];
749 	}
750       }
751     }
752   } /* else */
753 
754   /****************************************************
755         find the charge density in real space
756   ****************************************************/
757 
758   tmp0 = 1.0/(double)Ngrid1/(double)Ngrid2/(double)Ngrid3;
759 
760   for (spin=0; spin<spinmax; spin++){
761 
762     for (k=0; k<My_NumGridB_CB; k++){
763       ReVk[k] = ReRhok[0][spin][k];
764       ImVk[k] = ImRhok[0][spin][k];
765     }
766 
767     if (spin==0 || spin==1){
768       Get_Value_inReal(0,Density_Grid_B[spin],Density_Grid_B[spin],ReVk,ImVk);
769 
770       for (BN_AB=0; BN_AB<My_NumGridB_AB; BN_AB++){
771 	Density_Grid_B[spin][BN_AB] = Density_Grid_B[spin][BN_AB]*tmp0;
772       }
773     }
774 
775     else if (spin==2){
776 
777       Get_Value_inReal(1,Density_Grid_B[2],Density_Grid_B[3],ReVk,ImVk);
778 
779       for (BN_AB=0; BN_AB<My_NumGridB_AB; BN_AB++){
780 	Density_Grid_B[2][BN_AB] = Density_Grid_B[2][BN_AB]*tmp0;
781 	Density_Grid_B[3][BN_AB] = Density_Grid_B[3][BN_AB]*tmp0;
782       }
783     }
784   }
785 
786   /******************************************************
787              MPI: from the partitions B to D
788   ******************************************************/
789 
790   Density_Grid_Copy_B2D();
791 
792   /****************************************************
793       set ReV2 and ImV2 which are used in Poisson.c
794   ****************************************************/
795 
796   if (SpinP_switch==0){
797     for (k=0; k<My_NumGridB_CB; k++){
798       ReVk[k] = 2.0*ReRhok[0][0][k] - ReRhoAtomk[k];
799       ImVk[k] = 2.0*ImRhok[0][0][k] - ImRhoAtomk[k];
800     }
801   }
802 
803   else {
804     for (k=0; k<My_NumGridB_CB; k++){
805       ReVk[k] = ReRhok[0][0][k] + ReRhok[0][1][k] - ReRhoAtomk[k];
806       ImVk[k] = ImRhok[0][0][k] + ImRhok[0][1][k] - ImRhoAtomk[k];
807     }
808   }
809 
810   if (measure_time==1){
811     printf("time1=%12.6f time2=%12.6f time3=%12.6f time4=%12.6f time5=%12.6f\n",
812             time1,time2,time3,time4,time5);
813   }
814 
815   /****************************************************
816     freeing of arrays:
817   ****************************************************/
818 
819   free(alden);
820   free(alden0);
821   free(gradF);
822 
823   free(My_NMat);
824   free(NMat);
825 
826   for (i=0; i<List_YOUSO[38]; i++){
827     free(A[i]);
828   }
829   free(A);
830 
831   for (i=0; i<List_YOUSO[38]; i++){
832     free(IA[i]);
833   }
834   free(IA);
835 
836   for (spin=0; spin<spinmax; spin++){
837     free(Re_OptRhok[spin]);
838   }
839   free(Re_OptRhok);
840 
841   for (spin=0; spin<spinmax; spin++){
842     free(Im_OptRhok[spin]);
843   }
844   free(Im_OptRhok);
845 
846   free(Kerker_weight);
847 }
848 
849 
850 
DIIS_Mixing_Rhok_NEGF(int SCF_iter,double Mix_wgt,double *** ReRhok,double *** ImRhok,double ** Residual_ReRhok,double ** Residual_ImRhok,double * ReVk,double * ImVk,double * ReRhoAtomk,double * ImRhoAtomk)851 void DIIS_Mixing_Rhok_NEGF(int SCF_iter,
852 			   double Mix_wgt,
853 			   double ***ReRhok,
854 			   double ***ImRhok,
855 			   double **Residual_ReRhok,
856 			   double **Residual_ImRhok,
857 			   double *ReVk,
858 			   double *ImVk,
859 			   double *ReRhoAtomk,
860 			   double *ImRhoAtomk)
861 {
862   static int firsttime=1;
863   int spin,Mc_AN,Gc_AN,wan1,TNO1,h_AN,Gh_AN;
864   int wan2,TNO2,i,j,k,NumMix,NumSlide;
865   int SCFi,SCFj,tno1,tno0,Cwan,Hwan,n1,k1,k2,k3,kk2;
866   int pSCF_iter,size_Re_OptRhok,size_Kerker_weight;
867   int spinmax,MN,flag_nan,M,N,K;
868   int N2D,GN,GNs,N3[4],BN_AB,p0,p1;
869   double time1,time2,time3,time4,time5;
870   double Stime1,Etime1;
871   double *alden,alpha,beta;
872   double **A,**My_A,**IA,*My_A_thrds;
873   double *My_NMat,*NMat;
874   double tmp0,itmp0,tmp1,im1,im2,re1,re2;
875   double OptRNorm,My_OptRNorm,x,wt;
876   double dum1,dum2,bunsi,bunbo,sum,coef_OptRho;
877   double Gx,Gy,Gz,G2,G12,G22,G32;
878   double Min_Weight,Q,sumL,sumR;
879   double Max_Weight;
880   double G0,G02,G02p,weight,wgt0,wgt1;
881   double sk1,sk2,sk3;
882   double **Re_OptRhok;
883   double **Im_OptRhok;
884   double *Kerker_weight;
885   int numprocs,myid;
886   char nanchar[300];
887   /* for OpenMP */
888   int OMPID,Nthrds,Nprocs,Nloop,Nthrds0;
889 
890   /* MPI */
891   MPI_Comm_size(mpi_comm_level1,&numprocs);
892   MPI_Comm_rank(mpi_comm_level1,&myid);
893 
894   /* find an optimum G0 */
895 
896   G22 = rtv[2][1]*rtv[2][1] + rtv[2][2]*rtv[2][2] + rtv[2][3]*rtv[2][3];
897   G32 = rtv[3][1]*rtv[3][1] + rtv[3][2]*rtv[3][2] + rtv[3][3]*rtv[3][3];
898 
899   if (G22<G32) G0 = G22;
900   else         G0 = G32;
901 
902   G0 = sqrt(G0);
903   G02 = Kerker_factor*Kerker_factor*G0*G0;
904   G02p = (0.1*Kerker_factor*G0)*(0.1*Kerker_factor*G0);
905 
906   if      (SpinP_switch==0)  spinmax = 1;
907   else if (SpinP_switch==1)  spinmax = 2;
908   else if (SpinP_switch==3)  spinmax = 3;
909 
910   /****************************************************
911     allocation of arrays:
912   ****************************************************/
913 
914   alden = (double*)malloc(sizeof(double)*List_YOUSO[38]);
915 
916   My_NMat = (double*)malloc(sizeof(double)*List_YOUSO[38]*List_YOUSO[38]);
917   NMat = (double*)malloc(sizeof(double)*List_YOUSO[38]*List_YOUSO[38]);
918 
919   A = (double**)malloc(sizeof(double*)*List_YOUSO[38]);
920   for (i=0; i<List_YOUSO[38]; i++){
921     A[i] = (double*)malloc(sizeof(double)*List_YOUSO[38]);
922   }
923 
924   My_A = (double**)malloc(sizeof(double*)*List_YOUSO[38]);
925   for (i=0; i<List_YOUSO[38]; i++){
926     My_A[i] = (double*)malloc(sizeof(double)*List_YOUSO[38]);
927   }
928 
929   IA = (double**)malloc(sizeof(double*)*List_YOUSO[38]);
930   for (i=0; i<List_YOUSO[38]; i++){
931     IA[i] = (double*)malloc(sizeof(double)*List_YOUSO[38]);
932   }
933 
934   /* Re_OptRhok and Im_OptRhok */
935 
936   size_Re_OptRhok = spinmax*My_NumGridB_CB;
937 
938   Re_OptRhok = (double**)malloc(sizeof(double*)*spinmax);
939   for (spin=0; spin<spinmax; spin++){
940     Re_OptRhok[spin] = (double*)malloc(sizeof(double)*My_NumGridB_CB);
941     for (k=0; k<My_NumGridB_CB; k++) Re_OptRhok[spin][k] = 0.0;
942   }
943 
944   Im_OptRhok = (double**)malloc(sizeof(double*)*spinmax);
945   for (spin=0; spin<spinmax; spin++){
946     Im_OptRhok[spin] = (double*)malloc(sizeof(double)*My_NumGridB_CB);
947     for (k=0; k<My_NumGridB_CB; k++) Im_OptRhok[spin][k] = 0.0;
948   }
949 
950   size_Kerker_weight = My_NumGridB_CB;
951   Kerker_weight = (double*)malloc(sizeof(double)*My_NumGridB_CB);
952 
953   /***********************************
954             set Kerker_weight
955   ************************************/
956 
957   N2D = Ngrid3*Ngrid2;
958   GNs = ((myid*N2D+numprocs-1)/numprocs)*Ngrid1;
959 
960   if (measure_time==1) dtime(&Stime1);
961 
962   for (k=0; k<My_NumGridB_CB; k+=Ngrid1){
963 
964     /* get k3 and k2 */
965 
966     GN = k + GNs;
967     k3 = GN/(Ngrid2*Ngrid1);
968     k2 = (GN - k3*Ngrid2*Ngrid1)/Ngrid1;
969 
970     if (k2<Ngrid2/2)  sk2 = (double)k2;
971     else              sk2 = (double)(k2 - Ngrid2);
972 
973     if (k3<Ngrid3/2)  sk3 = (double)k3;
974     else              sk3 = (double)(k3 - Ngrid3);
975 
976     Gx = sk2*rtv[2][1] + sk3*rtv[3][1];
977     Gy = sk2*rtv[2][2] + sk3*rtv[3][2];
978     Gz = sk2*rtv[2][3] + sk3*rtv[3][3];
979     G2 = Gx*Gx + Gy*Gy + Gz*Gz;
980 
981     for (k1=0; k1<Ngrid1; k1++){
982 
983       if (k2==0 && k3==0){
984 
985 	sumL = 0.0;
986 	sumR = 0.0;
987 
988 	if (SpinP_switch==0){
989 
990 	  for (n1=0; n1<k1; n1++){
991 	    sumL += 2.0*ReRhok[0][0][k+n1];
992 	  }
993 
994 	  for (n1=k1+1; n1<Ngrid1; n1++){
995 	    sumR += 2.0*ReRhok[0][0][k+n1];
996 	  }
997 	}
998 
999 	else if (SpinP_switch==1 || SpinP_switch==3){
1000 
1001 	  for (n1=0; n1<k1; n1++){
1002 	    sumL += ReRhok[0][0][k+n1] + ReRhok[0][1][k+n1];
1003 	  }
1004 
1005 	  for (n1=k1+1; n1<Ngrid1; n1++){
1006 	    sumR += ReRhok[0][0][k+n1] + ReRhok[0][1][k+n1];
1007 	  }
1008 	}
1009 
1010 	Q = 4.0*fabs(sumL - sumR)*GridVol + 1.0;
1011 
1012       }
1013       else {
1014 	Q = 1.0;
1015       }
1016 
1017       weight = (G2 + G02)/(G2 + G02p)*Q;
1018       Kerker_weight[k+k1] = sqrt(weight);
1019 
1020     } /* k1 */
1021   } /* k */
1022 
1023   if (measure_time==1){
1024     dtime(&Etime1);
1025     time1 = Etime1 - Stime1;
1026   }
1027 
1028   /****************************************************
1029      start calc.
1030   ****************************************************/
1031 
1032   if (SCF_iter==1){
1033     /* memory calc. done, only 1st iteration */
1034     if (firsttime) {
1035       PrintMemory("DIIS_Mixing_Rhok: Re_OptRhok",sizeof(double)*size_Re_OptRhok,NULL);
1036       PrintMemory("DIIS_Mixing_Rhok: Im_OptRhok",sizeof(double)*size_Re_OptRhok,NULL);
1037       PrintMemory("DIIS_Mixing_Rhok: Kerker_weight",sizeof(double)*size_Kerker_weight,NULL);
1038       firsttime=0;
1039     }
1040 
1041     Kerker_Mixing_Rhok(0, 1.0, ReRhok, ImRhok,
1042                        Residual_ReRhok, Residual_ImRhok,
1043                        ReVk,ImVk, ReRhoAtomk, ImRhoAtomk);
1044   }
1045 
1046   else if (SCF_iter==2){
1047 
1048     if (measure_time==1) dtime(&Stime1);
1049 
1050     /* construct residual rho */
1051 
1052     for (spin=0; spin<spinmax; spin++){
1053       for (k=0; k<My_NumGridB_CB; k++){
1054 
1055 	Residual_ReRhok[spin][2*My_NumGridB_CB+k] = (ReRhok[0][spin][k] - ReRhok[1][spin][k])*Kerker_weight[k];
1056 	Residual_ImRhok[spin][2*My_NumGridB_CB+k] = (ImRhok[0][spin][k] - ImRhok[1][spin][k])*Kerker_weight[k];
1057 
1058       } /* k */
1059     }
1060 
1061     if (measure_time==1){
1062       dtime(&Etime1);
1063       time2 = Etime1 - Stime1;
1064     }
1065 
1066     /* rho1 to rho2 */
1067 
1068     for (spin=0; spin<spinmax; spin++){
1069       for (k=0; k<My_NumGridB_CB; k++){
1070 	ReRhok[2][spin][k] = ReRhok[1][spin][k];
1071 	ImRhok[2][spin][k] = ImRhok[1][spin][k];
1072       }
1073     }
1074 
1075     Kerker_Mixing_Rhok(0, Mixing_weight, ReRhok, ImRhok,
1076                        Residual_ReRhok, Residual_ImRhok,
1077                        ReVk,ImVk, ReRhoAtomk, ImRhoAtomk);
1078   }
1079 
1080   else {
1081 
1082     /****************************************************
1083                    construct residual rho1
1084     ****************************************************/
1085 
1086     if (measure_time==1) dtime(&Stime1);
1087 
1088     for (spin=0; spin<spinmax; spin++){
1089       for (k=0; k<My_NumGridB_CB; k++){
1090 
1091 	Residual_ReRhok[spin][My_NumGridB_CB+k] = (ReRhok[0][spin][k] - ReRhok[1][spin][k])*Kerker_weight[k];
1092 	Residual_ImRhok[spin][My_NumGridB_CB+k] = (ImRhok[0][spin][k] - ImRhok[1][spin][k])*Kerker_weight[k];
1093       }
1094     }
1095 
1096     if (measure_time==1){
1097       dtime(&Etime1);
1098       time3 = Etime1 - Stime1;
1099     }
1100 
1101     if ((SCF_iter-1)<Num_Mixing_pDM){
1102       NumMix   = SCF_iter - 1;
1103       NumSlide = NumMix + 1;
1104     }
1105     else{
1106       NumMix   = Num_Mixing_pDM;
1107       NumSlide = NumMix;
1108     }
1109 
1110     /****************************************************
1111                     alpha from residual rho
1112     ****************************************************/
1113 
1114     if (measure_time==1) dtime(&Stime1);
1115 
1116     /* calculation of the norm matrix for the residual vectors */
1117 
1118     for (i=0; i<List_YOUSO[38]*List_YOUSO[38]; i++) My_NMat[i] = 0.0;
1119 
1120     for (spin=0; spin<spinmax; spin++){
1121 
1122       M = NumMix + 1;
1123       N = NumMix + 1;
1124       K = My_NumGridB_CB;
1125       alpha = 1.0;
1126       beta = 1.0;
1127 
1128       F77_NAME(dgemm,DGEMM)( "T","N", &M, &N, &K,
1129                              &alpha,
1130                              Residual_ReRhok[spin], &K,
1131                              Residual_ReRhok[spin], &K,
1132                              &beta,
1133                              My_NMat,
1134                              &M);
1135 
1136       F77_NAME(dgemm,DGEMM)( "T","N", &M, &N, &K,
1137                              &alpha,
1138                              Residual_ImRhok[spin], &K,
1139                              Residual_ImRhok[spin], &K,
1140                              &beta,
1141                              My_NMat,
1142                              &M);
1143     }
1144 
1145     MPI_Allreduce(My_NMat, NMat, (NumMix+1)*(NumMix+1),
1146                   MPI_DOUBLE, MPI_SUM, mpi_comm_level1);
1147 
1148     for (i=0; i<NumMix; i++){
1149       for (j=0; j<NumMix; j++){
1150         A[i][j] = NMat[(i+1)*(NumMix+1)+(j+1)];
1151       }
1152     }
1153 
1154     /**********************************
1155               original code
1156     **********************************/
1157 
1158     /*
1159     for (SCFi=1; SCFi<=NumMix; SCFi++){
1160       for (SCFj=SCFi; SCFj<=NumMix; SCFj++){
1161 
1162         tmp0 = 0.0;
1163 	for (spin=0; spin<spinmax; spin++){
1164           for (k=0; k<My_NumGridB_CB; k++){
1165 
1166 	    re1 = Residual_ReRhok[spin][SCFi*My_NumGridB_CB+k];
1167 	    im1 = Residual_ImRhok[spin][SCFi*My_NumGridB_CB+k];
1168 
1169 	    re2 = Residual_ReRhok[spin][SCFj*My_NumGridB_CB+k];
1170 	    im2 = Residual_ImRhok[spin][SCFj*My_NumGridB_CB+k];
1171 
1172 	    tmp0 += re1*re2 + im1*im2;
1173 	  }
1174 	}
1175 
1176 	My_A[SCFi][SCFj] = tmp0;
1177 
1178       }
1179     }
1180 
1181     for (SCFi=1; SCFi<=NumMix; SCFi++){
1182       for (SCFj=SCFi; SCFj<=NumMix; SCFj++){
1183         MPI_Allreduce(&My_A[SCFi][SCFj], &A[SCFi-1][SCFj-1],
1184                       1, MPI_DOUBLE, MPI_SUM, mpi_comm_level1);
1185         A[SCFj-1][SCFi-1] = A[SCFi-1][SCFj-1];
1186       }
1187     }
1188     */
1189 
1190     if (measure_time==1){
1191       dtime(&Etime1);
1192       time4 = Etime1 - Stime1;
1193     }
1194 
1195     /* store NormRD */
1196 
1197     for (i=4; 1<=i; i--){
1198       NormRD[i] = NormRD[i-1];
1199       History_Uele[i] = History_Uele[i-1];
1200     }
1201     NormRD[0] = A[0][0];
1202     History_Uele[0] = Uele;
1203 
1204     /* solve the linear equation */
1205 
1206     for (SCFi=1; SCFi<=NumMix; SCFi++){
1207        A[SCFi-1][NumMix] = -1.0;
1208        A[NumMix][SCFi-1] = -1.0;
1209     }
1210     A[NumMix][NumMix] = 0.0;
1211 
1212     Inverse(NumMix,A,IA);
1213 
1214     for (SCFi=1; SCFi<=NumMix; SCFi++){
1215        alden[SCFi] = -IA[SCFi-1][NumMix];
1216     }
1217 
1218     /*************************************
1219       check "nan", "NaN", "inf" or "Inf"
1220     *************************************/
1221 
1222     flag_nan = 0;
1223     for (SCFi=1; SCFi<=NumMix; SCFi++){
1224 
1225       sprintf(nanchar,"%8.4f",alden[SCFi]);
1226       if (strstr(nanchar,"nan")!=NULL || strstr(nanchar,"NaN")!=NULL
1227        || strstr(nanchar,"inf")!=NULL || strstr(nanchar,"Inf")!=NULL){
1228 
1229         flag_nan = 1;
1230       }
1231     }
1232 
1233     if (flag_nan==1){
1234       for (SCFi=1; SCFi<=NumMix; SCFi++){
1235         alden[SCFi] = 0.0;
1236       }
1237       alden[1] = 0.1;
1238       alden[2] = 0.9;
1239     }
1240 
1241     /****************************************************
1242               calculate optimized residual rho
1243     ****************************************************/
1244 
1245     if (measure_time==1) dtime(&Stime1);
1246 
1247     for (pSCF_iter=1; pSCF_iter<=NumMix; pSCF_iter++){
1248 
1249       tmp0 =  alden[pSCF_iter];
1250       p0 = pSCF_iter*My_NumGridB_CB;
1251 
1252       for (spin=0; spin<spinmax; spin++){
1253         for (k=0; k<My_NumGridB_CB; k++){
1254 
1255 	  /* Pulay mixing */
1256 
1257 	  Re_OptRhok[spin][k] += tmp0*Residual_ReRhok[spin][p0+k];
1258 	  Im_OptRhok[spin][k] += tmp0*Residual_ImRhok[spin][p0+k];
1259 
1260 	} /* k */
1261       } /* spin */
1262     } /* pSCF_iter */
1263 
1264     if      (1.0e-2<=NormRD[0])                        coef_OptRho = 0.5;
1265     else if (1.0e-4<=NormRD[0]  && NormRD[0]<1.0e-2)   coef_OptRho = 0.6;
1266     else if (1.0e-6<=NormRD[0]  && NormRD[0]<1.0e-4)   coef_OptRho = 0.7;
1267     else if (1.0e-8<=NormRD[0]  && NormRD[0]<1.0e-6)   coef_OptRho = 0.8;
1268     else if (1.0e-10<=NormRD[0] && NormRD[0]<1.0e-8)   coef_OptRho = 0.9;
1269     else                                               coef_OptRho = 1.0;
1270 
1271     /****************************************************
1272                         mixing of rho
1273     ****************************************************/
1274 
1275     /****************************************************
1276        Pulay mixing
1277     ****************************************************/
1278 
1279     if ( SCF_iter%EveryPulay_SCF==0 ) {
1280 
1281       /* reduce Mixing_weight so that charge sloshing can be avoided at the next SCF */
1282 
1283       Mixing_weight = 0.1*Max_Mixing_weight;
1284       if (Mixing_weight<Min_Mixing_weight) Mixing_weight = Min_Mixing_weight;
1285 
1286       /* initial ReRhok and ImRhok */
1287 
1288       for (spin=0; spin<spinmax; spin++){
1289         for (k=0; k<My_NumGridB_CB; k++){
1290           ReRhok[0][spin][k] = 0.0;
1291           ImRhok[0][spin][k] = 0.0;
1292 	}
1293       }
1294 
1295       /* Pulay mixing */
1296 
1297       for (pSCF_iter=1; pSCF_iter<=NumMix; pSCF_iter++){
1298 
1299 	tmp0 =  alden[pSCF_iter];
1300 
1301         for (spin=0; spin<spinmax; spin++){
1302           for (k=0; k<My_NumGridB_CB; k++){
1303 
1304 	    ReRhok[0][spin][k] += tmp0*ReRhok[pSCF_iter][spin][k];
1305 	    ImRhok[0][spin][k] += tmp0*ImRhok[pSCF_iter][spin][k];
1306 
1307 	  } /* k */
1308 	} /* spin */
1309       } /* pSCF_iter */
1310 
1311       /* Correction by optimized residual rho */
1312 
1313       for (spin=0; spin<spinmax; spin++){
1314         for (k=0; k<My_NumGridB_CB; k++){
1315 
1316 	  weight = 1.0/(Kerker_weight[k]*Kerker_weight[k]);
1317 
1318 	  ReRhok[0][spin][k] += coef_OptRho*Re_OptRhok[spin][k]*weight;
1319 	  ImRhok[0][spin][k] += coef_OptRho*Im_OptRhok[spin][k]*weight;
1320 
1321 	  /* correction to large changing components */
1322 
1323           tmp0 = ReRhok[0][spin][k] - ReRhok[1][spin][k];
1324           tmp1 = ImRhok[0][spin][k] - ImRhok[1][spin][k];
1325 
1326           if ( maxima_step<(fabs(tmp0)+fabs(tmp1)) ){
1327             ReRhok[0][spin][k] = sgn(tmp0)*maxima_step + ReRhok[1][spin][k];
1328             ImRhok[0][spin][k] = sgn(tmp1)*maxima_step + ImRhok[1][spin][k];
1329           }
1330 
1331 	} /* k */
1332       } /* spin */
1333 
1334       if (measure_time==1){
1335         dtime(&Etime1);
1336         time5 = Etime1 - Stime1;
1337       }
1338 
1339     }
1340 
1341     /****************************************************
1342        Kerker mixing
1343     ****************************************************/
1344 
1345     else {
1346 
1347       /* find an optimum mixing weight */
1348 
1349       Min_Weight = Min_Mixing_weight;
1350       Max_Weight = Max_Mixing_weight;
1351 
1352       if ((int)sgn(History_Uele[0]-History_Uele[1])
1353 	  ==(int)sgn(History_Uele[1]-History_Uele[2])
1354 	  && NormRD[0]<NormRD[1]){
1355 
1356 	/* tmp0 = 1.6*Mixing_weight; */
1357 
1358 	tmp0 = NormRD[1]/(largest(NormRD[1]-NormRD[0],10e-10))*Mixing_weight;
1359 
1360 	if (tmp0<Max_Weight){
1361 	  if (Min_Weight<tmp0){
1362 	    Mixing_weight = tmp0;
1363 	  }
1364 	  else{
1365 	    Mixing_weight = Min_Weight;
1366 	  }
1367 	}
1368 	else{
1369 	  Mixing_weight = Max_Weight;
1370 	  SCF_RENZOKU++;
1371 	}
1372       }
1373 
1374       else if ((int)sgn(History_Uele[0]-History_Uele[1])
1375 	       ==(int)sgn(History_Uele[1]-History_Uele[2])
1376 	       && NormRD[1]<NormRD[0]){
1377 
1378 	tmp0 = NormRD[1]/(largest(NormRD[1]+NormRD[0],10e-10))*Mixing_weight;
1379 
1380 	/* tmp0 = Mixing_weight/1.6; */
1381 
1382 	if (tmp0<Max_Weight){
1383 	  if (Min_Weight<tmp0)
1384 	    Mixing_weight = tmp0;
1385 	  else
1386 	    Mixing_weight = Min_Weight;
1387 	}
1388 	else
1389 	  Mixing_weight = Max_Weight;
1390 
1391 	SCF_RENZOKU = -1;
1392       }
1393 
1394       else if ((int)sgn(History_Uele[0]-History_Uele[1])
1395 	       !=(int)sgn(History_Uele[1]-History_Uele[2])
1396 	       && NormRD[0]<NormRD[1]){
1397 
1398 	/* tmp0 = Mixing_weight*1.2; */
1399 
1400 	tmp0 = NormRD[1]/(largest(NormRD[1]-NormRD[0],10e-10))*Mixing_weight;
1401 
1402 	if (tmp0<Max_Weight){
1403 	  if (Min_Weight<tmp0)
1404 	    Mixing_weight = tmp0;
1405 	  else
1406 	    Mixing_weight = Min_Weight;
1407 	}
1408 	else{
1409 	  Mixing_weight = Max_Weight;
1410 	  SCF_RENZOKU++;
1411 	}
1412       }
1413 
1414       else if ((int)sgn(History_Uele[0]-History_Uele[1])
1415 	       !=(int)sgn(History_Uele[1]-History_Uele[2])
1416 	       && NormRD[1]<NormRD[0]){
1417 
1418 	/* tmp0 = Mixing_weight/2.0; */
1419 
1420 	tmp0 = NormRD[1]/(largest(NormRD[1]+NormRD[0],10e-10))*Mixing_weight;
1421 
1422 	if (tmp0<Max_Weight){
1423 	  if (Min_Weight<tmp0)
1424 	    Mixing_weight = tmp0;
1425 	  else
1426 	    Mixing_weight = Min_Weight;
1427 	}
1428 	else
1429 	  Mixing_weight = Max_Weight;
1430 
1431 	SCF_RENZOKU = -1;
1432       }
1433 
1434       /* Kerer mixing */
1435 
1436       Mix_wgt = Mixing_weight;
1437 
1438       for (spin=0; spin<spinmax; spin++){
1439         for (k=0; k<My_NumGridB_CB; k++){
1440 
1441 	  weight = 1.0/(Kerker_weight[k]*Kerker_weight[k]);
1442 	  wgt0  = Mix_wgt*weight;
1443 	  wgt1 =  1.0 - wgt0;
1444 
1445 	  ReRhok[0][spin][k] = wgt0*ReRhok[0][spin][k] + wgt1*ReRhok[1][spin][k];
1446 	  ImRhok[0][spin][k] = wgt0*ImRhok[0][spin][k] + wgt1*ImRhok[1][spin][k];
1447 
1448 	} /* k */
1449       } /* spin */
1450     } /* else */
1451 
1452     /****************************************************
1453                         shift of rho
1454     ****************************************************/
1455 
1456     for (pSCF_iter=(List_YOUSO[38]-1); 0<pSCF_iter; pSCF_iter--){
1457       for (spin=0; spin<spinmax; spin++){
1458         for (k=0; k<My_NumGridB_CB; k++){
1459 	  ReRhok[pSCF_iter][spin][k] = ReRhok[pSCF_iter-1][spin][k];
1460 	  ImRhok[pSCF_iter][spin][k] = ImRhok[pSCF_iter-1][spin][k];
1461 	}
1462       }
1463     }
1464 
1465     /****************************************************
1466                     shift of residual rho
1467     ****************************************************/
1468 
1469     for (pSCF_iter=(List_YOUSO[38]-1); 0<pSCF_iter; pSCF_iter--){
1470 
1471       p0 = pSCF_iter*My_NumGridB_CB;
1472       p1 = (pSCF_iter-1)*My_NumGridB_CB;
1473 
1474       for (spin=0; spin<spinmax; spin++){
1475         for (k=0; k<My_NumGridB_CB; k++){
1476 	  Residual_ReRhok[spin][p0+k] = Residual_ReRhok[spin][p1+k];
1477 	  Residual_ImRhok[spin][p0+k] = Residual_ImRhok[spin][p1+k];
1478 	}
1479       }
1480     }
1481 
1482   } /* else */
1483 
1484   /****************************************************
1485         find the charge density in real space
1486   ****************************************************/
1487 
1488   for (spin=0; spin<spinmax; spin++){
1489 
1490     for (k=0; k<My_NumGridB_CB; k++){
1491       ReVk[k] = ReRhok[0][spin][k];
1492       ImVk[k] = ImRhok[0][spin][k];
1493     }
1494 /* revised by Y. Xiao for Noncollinear NEGF calculations */
1495   if (spin==0 || spin==1) {
1496     Get_Value_inReal2D(0, Density_Grid_B[spin], NULL, ReVk, ImVk);
1497   } else {
1498     Get_Value_inReal2D(1, Density_Grid_B[2], Density_Grid_B[3], ReVk, ImVk);
1499   }
1500 /* until here by Y. Xiao for Noncollinear NEGF calculations */
1501 
1502 /*    Get_Value_inReal2D(0, Density_Grid_B[spin], NULL, ReVk, ImVk); */
1503   }
1504 
1505   /******************************************************
1506              MPI: from the partitions B to D
1507   ******************************************************/
1508 
1509   Density_Grid_Copy_B2D();
1510 
1511   if (measure_time==1){
1512     printf("time1=%12.6f time2=%12.6f time3=%12.6f time4=%12.6f time5=%12.6f\n",
1513             time1,time2,time3,time4,time5);
1514   }
1515 
1516   /****************************************************
1517     freeing of arrays:
1518   ****************************************************/
1519 
1520   free(alden);
1521 
1522   for (i=0; i<List_YOUSO[38]; i++){
1523     free(A[i]);
1524   }
1525   free(A);
1526 
1527   for (i=0; i<List_YOUSO[38]; i++){
1528     free(My_A[i]);
1529   }
1530   free(My_A);
1531 
1532   for (i=0; i<List_YOUSO[38]; i++){
1533     free(IA[i]);
1534   }
1535   free(IA);
1536 
1537   free(My_NMat);
1538   free(NMat);
1539 
1540   for (spin=0; spin<spinmax; spin++){
1541     free(Re_OptRhok[spin]);
1542   }
1543   free(Re_OptRhok);
1544 
1545   for (spin=0; spin<spinmax; spin++){
1546     free(Im_OptRhok[spin]);
1547   }
1548   free(Im_OptRhok);
1549 
1550   free(Kerker_weight);
1551 }
1552 
1553 
1554 
1555 
1556 
1557 
1558 
1559 
1560 
1561 
1562 
1563 
1564 
1565 
Inverse(int n,double ** a,double ** ia)1566 void Inverse(int n, double **a, double **ia)
1567 {
1568   int method_flag=1;
1569 
1570   if (method_flag==0){
1571 
1572   /****************************************************
1573                   LU decomposition
1574                       0 to n
1575    NOTE:
1576    This routine does not consider the reduction of rank
1577   ****************************************************/
1578 
1579   int i,j,k;
1580   double w;
1581   double *x,*y;
1582   double **da;
1583 
1584   /***************************************************
1585     allocation of arrays:
1586 
1587      x[List_YOUSO[38]]
1588      y[List_YOUSO[38]]
1589      da[List_YOUSO[38]][List_YOUSO[38]]
1590   ***************************************************/
1591 
1592   x = (double*)malloc(sizeof(double)*List_YOUSO[38]);
1593   y = (double*)malloc(sizeof(double)*List_YOUSO[38]);
1594   for (i=0; i<List_YOUSO[38]; i++){
1595     x[i] = 0.0;
1596     y[i] = 0.0;
1597   }
1598 
1599   da = (double**)malloc(sizeof(double*)*List_YOUSO[38]);
1600   for (i=0; i<List_YOUSO[38]; i++){
1601     da[i] = (double*)malloc(sizeof(double)*List_YOUSO[38]);
1602     for (j=0; j<List_YOUSO[38]; j++){
1603       da[i][j] = 0.0;
1604     }
1605   }
1606 
1607   /* start calc. */
1608 
1609   if (n==-1){
1610     for (i=0; i<List_YOUSO[38]; i++){
1611       for (j=0; j<List_YOUSO[38]; j++){
1612 	a[i][j] = 0.0;
1613       }
1614     }
1615   }
1616   else{
1617     for (i=0; i<=n; i++){
1618       for (j=0; j<=n; j++){
1619 	da[i][j] = a[i][j];
1620       }
1621     }
1622 
1623     /****************************************************
1624                      LU factorization
1625     ****************************************************/
1626 
1627     for (k=0; k<=n-1; k++){
1628       w = 1.0/a[k][k];
1629       for (i=k+1; i<=n; i++){
1630 	a[i][k] = w*a[i][k];
1631 	for (j=k+1; j<=n; j++){
1632 	  a[i][j] = a[i][j] - a[i][k]*a[k][j];
1633 	}
1634       }
1635     }
1636 
1637     for (k=0; k<=n; k++){
1638 
1639       /****************************************************
1640                              Ly = b
1641       ****************************************************/
1642 
1643       for (i=0; i<=n; i++){
1644 	if (i==k)
1645 	  y[i] = 1.0;
1646 	else
1647 	  y[i] = 0.0;
1648 	for (j=0; j<=i-1; j++){
1649 	  y[i] = y[i] - a[i][j]*y[j];
1650 	}
1651       }
1652 
1653       /****************************************************
1654                              Ux = y
1655       ****************************************************/
1656 
1657       for (i=n; 0<=i; i--){
1658 	x[i] = y[i];
1659 	for (j=n; (i+1)<=j; j--){
1660 	  x[i] = x[i] - a[i][j]*x[j];
1661 	}
1662 	x[i] = x[i]/a[i][i];
1663 	ia[i][k] = x[i];
1664       }
1665     }
1666 
1667     for (i=0; i<=n; i++){
1668       for (j=0; j<=n; j++){
1669 	a[i][j] = da[i][j];
1670       }
1671     }
1672   }
1673 
1674   /***************************************************
1675     freeing of arrays:
1676 
1677      x[List_YOUSO[38]]
1678      y[List_YOUSO[38]]
1679      da[List_YOUSO[38]][List_YOUSO[38]]
1680   ***************************************************/
1681 
1682   free(x);
1683   free(y);
1684 
1685   for (i=0; i<List_YOUSO[38]; i++){
1686     free(da[i]);
1687   }
1688   free(da);
1689 
1690   }
1691 
1692   else if (method_flag==1){
1693 
1694     int i,j,M,N,LDA,INFO;
1695     int *IPIV,LWORK;
1696     double *A,*WORK;
1697 
1698     A = (double*)malloc(sizeof(double)*(n+2)*(n+2));
1699     WORK = (double*)malloc(sizeof(double)*(n+2));
1700     IPIV = (int*)malloc(sizeof(int)*(n+2));
1701 
1702     for (i=0; i<=n; i++){
1703       for (j=0; j<=n; j++){
1704         A[i*(n+1)+j] = a[i][j];
1705       }
1706     }
1707 
1708     M = n + 1;
1709     N = M;
1710     LDA = M;
1711     LWORK = M;
1712 
1713     F77_NAME(dgetrf,DGETRF)( &M, &N, A, &LDA, IPIV, &INFO);
1714     F77_NAME(dgetri,DGETRI)( &N, A, &LDA, IPIV, WORK, &LWORK, &INFO);
1715 
1716     for (i=0; i<=n; i++){
1717       for (j=0; j<=n; j++){
1718         ia[i][j] = A[i*(n+1)+j];
1719       }
1720     }
1721 
1722     free(A);
1723     free(WORK);
1724     free(IPIV);
1725   }
1726 
1727   else if (method_flag==2){
1728 
1729     int N,i,j,k;
1730     double *A,*B,*ko;
1731     double sum;
1732 
1733     N = n + 1;
1734 
1735     A = (double*)malloc(sizeof(double)*(N+2)*(N+2));
1736     B = (double*)malloc(sizeof(double)*(N+2)*(N+2));
1737     ko = (double*)malloc(sizeof(double)*(N+2));
1738 
1739     /*
1740     printf("N=%2d\n",N);
1741 
1742     N = 3;
1743 
1744     a[0][0] =-1.2; a[0][1] =-1.0; a[0][2] = -0.0;
1745     a[1][0] =-1.0; a[1][1] =-1.2; a[1][2] = -1.0;
1746     a[2][0] =-0.0; a[2][1] =-1.0; a[2][2] = -1.2;
1747     */
1748 
1749     for (i=0; i<N; i++){
1750       for (j=0; j<N; j++){
1751         A[j*N+i] = a[i][j];
1752       }
1753     }
1754 
1755     Eigen_lapack3(A, ko, N, N);
1756 
1757     /*
1758     printf("ko\n");
1759     for (i=0; i<N; i++){
1760       printf("ko i=%2d %15.12f\n",i,ko[i]);
1761     }
1762 
1763     printf("A\n");
1764     for (i=0; i<N; i++){
1765       for (j=0; j<N; j++){
1766         printf("%10.5f ",A[i*N+j]);
1767       }
1768       printf("\n");
1769     }
1770     */
1771 
1772     for (i=0; i<N; i++){
1773       /*
1774       printf("i=%2d ko=%18.15f iko=%18.15f\n",i,ko[i],1.0/ko[i]);
1775       */
1776 
1777       ko[i] = 1.0/ko[i];
1778     }
1779 
1780     for (i=0; i<N; i++){
1781       for (j=0; j<N; j++){
1782         B[i*N+j] = A[i*N+j]*ko[i];
1783       }
1784     }
1785 
1786     /*
1787     printf("A*ko\n");
1788     for (i=0; i<N; i++){
1789       for (j=0; j<N; j++){
1790         printf("%10.5f ",B[i*N+j]);
1791       }
1792       printf("\n");
1793     }
1794     */
1795 
1796     for (i=0; i<N; i++){
1797       for (j=0; j<N; j++){
1798         ia[i][j] = 0.0;
1799       }
1800     }
1801 
1802     for (i=0; i<N; i++){
1803       for (j=0; j<N; j++){
1804         sum = 0.0;
1805 	for (k=0; k<N; k++){
1806 	  sum += A[k*N+i]*B[k*N+j];
1807 	}
1808         ia[i][j] = sum;
1809       }
1810     }
1811 
1812     /*
1813     printf("ia\n");
1814     for (i=0; i<N; i++){
1815       for (j=0; j<N; j++){
1816         printf("%10.5f ",ia[i][j]);
1817       }
1818       printf("\n");
1819     }
1820 
1821     MPI_Finalize();
1822     exit(0);
1823     */
1824 
1825     free(A);
1826     free(B);
1827     free(ko);
1828   }
1829 
1830 
1831 }
1832 
1833 
Inverse2(int n,double ** a,double ** ia)1834 void Inverse2(int n, double **a, double **ia)
1835 {
1836   int N,i,j,k;
1837   double *A,*B,*ko;
1838   double sum;
1839 
1840   N = n + 1;
1841 
1842   A = (double*)malloc(sizeof(double)*(N+2)*(N+2));
1843   B = (double*)malloc(sizeof(double)*(N+2)*(N+2));
1844   ko = (double*)malloc(sizeof(double)*(N+2));
1845 
1846   /*
1847     printf("N=%2d\n",N);
1848 
1849     N = 3;
1850 
1851     a[0][0] =-1.2; a[0][1] =-1.0; a[0][2] = -0.0;
1852     a[1][0] =-1.0; a[1][1] =-1.2; a[1][2] = -1.0;
1853     a[2][0] =-0.0; a[2][1] =-1.0; a[2][2] = -1.2;
1854   */
1855 
1856   for (i=0; i<N; i++){
1857     for (j=0; j<N; j++){
1858       A[j*N+i] = a[i][j];
1859     }
1860   }
1861 
1862   Eigen_lapack3(A, ko, N, N);
1863 
1864   /*
1865     printf("ko\n");
1866     for (i=0; i<N; i++){
1867     printf("ko i=%2d %15.12f\n",i,ko[i]);
1868     }
1869 
1870     printf("A\n");
1871     for (i=0; i<N; i++){
1872     for (j=0; j<N; j++){
1873     printf("%10.5f ",A[i*N+j]);
1874     }
1875     printf("\n");
1876     }
1877   */
1878 
1879   for (i=0; i<N; i++){
1880 
1881     printf("i=%2d ko=%18.15f iko=%18.15f\n",i,ko[i],1.0/ko[i]);
1882 
1883     ko[i] = 1.0/ko[i];
1884   }
1885 
1886   for (i=0; i<N; i++){
1887     for (j=0; j<N; j++){
1888       B[i*N+j] = A[i*N+j]*ko[i];
1889     }
1890   }
1891 
1892   /*
1893     printf("A*ko\n");
1894     for (i=0; i<N; i++){
1895     for (j=0; j<N; j++){
1896     printf("%10.5f ",B[i*N+j]);
1897     }
1898     printf("\n");
1899     }
1900   */
1901 
1902   for (i=0; i<N; i++){
1903     for (j=0; j<N; j++){
1904       ia[i][j] = 0.0;
1905     }
1906   }
1907 
1908   for (i=0; i<N; i++){
1909     for (j=0; j<N; j++){
1910       sum = 0.0;
1911       for (k=0; k<N; k++){
1912 	sum += A[k*N+i]*B[k*N+j];
1913       }
1914       ia[i][j] = sum;
1915     }
1916   }
1917 
1918   /*
1919     printf("ia\n");
1920     for (i=0; i<N; i++){
1921     for (j=0; j<N; j++){
1922     printf("%10.5f ",ia[i][j]);
1923     }
1924     printf("\n");
1925     }
1926 
1927     MPI_Finalize();
1928     exit(0);
1929   */
1930 
1931   free(A);
1932   free(B);
1933   free(ko);
1934 
1935 }
1936 
1937 
1938 
1939 
1940 
Complex_Inverse(int n,double ** a,double ** ia,double ** b,double ** ib)1941 void Complex_Inverse(int n, double **a, double **ia, double **b, double **ib)
1942 {
1943   /****************************************************
1944                   LU decomposition
1945                       0 to n
1946    NOTE:
1947    This routine does not consider the reduction of rank
1948   ****************************************************/
1949 
1950   int i,j,k;
1951   double w2,re,im;
1952   dcomplex w;
1953   dcomplex *x,*y;
1954   dcomplex **da;
1955 
1956   /***************************************************
1957     allocation of arrays:
1958 
1959      x[List_YOUSO[38]]
1960      y[List_YOUSO[38]]
1961      da[List_YOUSO[38]][List_YOUSO[38]]
1962   ***************************************************/
1963 
1964   x = (dcomplex*)malloc(sizeof(dcomplex)*List_YOUSO[38]);
1965   y = (dcomplex*)malloc(sizeof(dcomplex)*List_YOUSO[38]);
1966 
1967   da = (dcomplex**)malloc(sizeof(dcomplex*)*List_YOUSO[38]);
1968   for (i=0; i<List_YOUSO[38]; i++){
1969     da[i] = (dcomplex*)malloc(sizeof(dcomplex)*List_YOUSO[38]);
1970   }
1971 
1972   /* start calc. */
1973 
1974   if (n==-1){
1975     for (i=0; i<List_YOUSO[38]; i++){
1976       for (j=0; j<List_YOUSO[38]; j++){
1977  	 a[i][j] = 0.0;
1978 	ia[i][j] = 0.0;
1979       }
1980     }
1981   }
1982 
1983   else {
1984 
1985     for (i=0; i<=n; i++){
1986       for (j=0; j<=n; j++){
1987 	da[i][j].r =  a[i][j];
1988 	da[i][j].i = ia[i][j];
1989       }
1990     }
1991 
1992     /****************************************************
1993                      LU factorization
1994     ****************************************************/
1995 
1996     for (k=0; k<=n-1; k++){
1997 
1998       w.r =  a[k][k];
1999       w.i = ia[k][k];
2000       w2 = 1.0/(w.r*w.r + w.i*w.i);
2001 
2002       for (i=k+1; i<=n; i++){
2003 
2004 	re = w2*(w.r* a[i][k] + w.i*ia[i][k]);
2005         im = w2*(w.r*ia[i][k] - w.i* a[i][k]);
2006 	a[i][k] = re;
2007        ia[i][k] = im;
2008 
2009 	for (j=k+1; j<=n; j++){
2010  	   re =  a[i][j] - (a[i][k]* a[k][j] - ia[i][k]*ia[k][j]);
2011 	   im = ia[i][j] - (a[i][k]*ia[k][j] + ia[i][k]* a[k][j]);
2012  	   a[i][j] = re;
2013 	  ia[i][j] = im;
2014 	}
2015       }
2016     }
2017 
2018     for (k=0; k<=n; k++){
2019 
2020       /****************************************************
2021                              Ly = b
2022       ****************************************************/
2023 
2024       for (i=0; i<=n; i++){
2025 	if (i==k){
2026 	  y[i].r = 1.0;
2027 	  y[i].i = 0.0;
2028 	}
2029 	else{
2030 	  y[i].r = 0.0;
2031 	  y[i].i = 0.0;
2032 	}
2033 
2034 	for (j=0; j<=i-1; j++){
2035    	  y[i].r = y[i].r - (a[i][j]*y[j].r - ia[i][j]*y[j].i);
2036 	  y[i].i = y[i].i - (a[i][j]*y[j].i + ia[i][j]*y[j].r);
2037 	}
2038       }
2039 
2040       /****************************************************
2041                              Ux = y
2042       ****************************************************/
2043 
2044       for (i=n; 0<=i; i--){
2045 	x[i] = y[i];
2046 	for (j=n; (i+1)<=j; j--){
2047 	  x[i].r = x[i].r - (a[i][j]*x[j].r - ia[i][j]*x[j].i);
2048 	  x[i].i = x[i].i - (a[i][j]*x[j].i + ia[i][j]*x[j].r);
2049 	}
2050 
2051         w.r =  a[i][i];
2052         w.i = ia[i][i];
2053         w2 = 1.0/(w.r*w.r + w.i*w.i);
2054 
2055 	re = w2*(w.r*x[i].r + w.i*x[i].i);
2056         im = w2*(w.r*x[i].i - w.i*x[i].r);
2057 	x[i].r = re;
2058 	x[i].i = im;
2059 
2060          b[i][k] = x[i].r;
2061         ib[i][k] = x[i].i;
2062       }
2063     }
2064 
2065     for (i=0; i<=n; i++){
2066       for (j=0; j<=n; j++){
2067 	 a[i][j] = da[i][j].r;
2068 	ia[i][j] = da[i][j].i;
2069       }
2070     }
2071   }
2072 
2073   /***************************************************
2074     freeing of arrays:
2075 
2076      x[List_YOUSO[38]]
2077      y[List_YOUSO[38]]
2078      da[List_YOUSO[38]][List_YOUSO[38]]
2079   ***************************************************/
2080 
2081   free(x);
2082   free(y);
2083 
2084   for (i=0; i<List_YOUSO[38]; i++){
2085     free(da[i]);
2086   }
2087   free(da);
2088 }
2089