1 /**********************************************************************
2   Force.c:
3 
4      Force.c is a subroutine to calculate force on atoms.
5 
6   Log of Force.c:
7 
8      22/Nov/2001  Released by T. Ozaki
9      18/Apr/2013  Force3() modified by A.M. Ito
10 
11 ***********************************************************************/
12 
13 #include <stdio.h>
14 #include <stdlib.h>
15 #include <math.h>
16 #include <time.h>
17 #include "openmx_common.h"
18 #include "mpi.h"
19 #include <omp.h>
20 
21 #define  measure_time   0
22 
23 
24 static void dH_U_full(int Mc_AN, int h_AN, int q_AN,
25 		      double *****OLP, double ****v_eff,
26 		      double ***Hx, double ***Hy, double ***Hz);
27 
28 static void dH_U_NC_full(int Mc_AN, int h_AN, int q_AN,
29 			 double *****OLP, dcomplex *****NC_v_eff,
30 			 dcomplex ****Hx, dcomplex ****Hy, dcomplex ****Hz);
31 
32 static void dHNL(int where_flag,
33 		 int Mc_AN, int h_AN, int q_AN,
34 		 double ******DS_NL1,
35 		 dcomplex ***Hx, dcomplex ***Hy, dcomplex ***Hz);
36 
37 static void dHVNA(int where_flag, int Mc_AN, int h_AN, int q_AN,
38                   Type_DS_VNA *****DS_VNA1,
39                   double *****TmpHVNA2, double *****TmpHVNA3,
40                   double **Hx, double **Hy, double **Hz);
41 
42 
43 static void dHNL_SO(
44 	     double *sumx0r,
45 	     double *sumy0r,
46 	     double *sumz0r,
47 	     double *sumx1r,
48 	     double *sumy1r,
49 	     double *sumz1r,
50 	     double *sumx2r,
51 	     double *sumy2r,
52 	     double *sumz2r,
53 	     double *sumx0i,
54 	     double *sumy0i,
55 	     double *sumz0i,
56 	     double *sumx1i,
57 	     double *sumy1i,
58 	     double *sumz1i,
59 	     double *sumx2i,
60 	     double *sumy2i,
61 	     double *sumz2i,
62              double fugou,
63 	     double PFp,
64 	     double PFm,
65 	     double ene_p,
66 	     double ene_m,
67 	     int l2, int *l,
68 	     int Mc_AN, int k,  int m,
69 	     int Mj_AN, int kl, int n,
70 	     double ******DS_NL1);
71 
72 static void MPI_OLP(double *****OLP1);
73 static void Force3();
74 static void Force4();
75 static void Force4B(double *****CDM0);
76 
77 static void Force_HNL(double *****CDM0, double *****iDM0);
78 
79 
Force(double ***** H0,double ****** DS_NL,double ***** OLP,double ***** CDM,double ***** EDM)80 double Force(double *****H0,
81              double ******DS_NL,
82              double *****OLP,
83              double *****CDM,
84              double *****EDM)
85 {
86   static int firsttime=1;
87   int Nc,GNc,GRc,Cwan,s1,s2,BN_AB;
88   int Mc_AN,Gc_AN,MNc,start_q_AN;
89   double x,y,z,dx,dy,dz,tmp0,tmp1,tmp2,tmp3;
90   double xx,r2,tot_den;
91   double sumx,sumy,sumz,r,dege,pref;
92   int i,j,k,l,Hwan,Qwan,so,p0,q,q0;
93   int h_AN,Gh_AN,q_AN,Gq_AN;
94   int ian,jan,kl,spin,spinmax,al,be,p,size_CDM0,size_iDM0;
95   int tno0,tno1,tno2,Mh_AN,Mq_AN,n,num,size1,size2;
96   int wanA,wanB,Gc_BN;
97   int XC_P_switch;
98   double time0;
99   double dum,dge;
100   double dEx,dEy,dEz;
101   double Cxyz[4];
102   double *Fx,*Fy,*Fz;
103   dcomplex ***Hx;
104   dcomplex ***Hy;
105   dcomplex ***Hz;
106   double ***HUx;
107   double ***HUy;
108   double ***HUz;
109   dcomplex ****NC_HUx;
110   dcomplex ****NC_HUy;
111   dcomplex ****NC_HUz;
112   double **HVNAx;
113   double **HVNAy;
114   double **HVNAz;
115   double *****CDM0;
116   double *****iDM0;
117   double *tmp_array;
118   double *tmp_array2;
119   double Re00x,Re00y,Re00z;
120   double Re11x,Re11y,Re11z;
121   double Re01x,Re01y,Re01z;
122   double Im00x,Im00y,Im00z;
123   double Im11x,Im11y,Im11z;
124   double Im01x,Im01y,Im01z;
125   int *Snd_CDM0_Size,*Rcv_CDM0_Size;
126   int *Snd_iDM0_Size,*Rcv_iDM0_Size;
127   double TStime,TEtime;
128   int numprocs,myid,tag=999,ID,IDS,IDR;
129   double Stime_atom, Etime_atom;
130   /* for OpenMP */
131   int OMPID,Nthrds,Nprocs;
132   double stime,etime;
133 
134   MPI_Status stat;
135   MPI_Request request;
136 
137   /* MPI */
138   MPI_Comm_size(mpi_comm_level1,&numprocs);
139   MPI_Comm_rank(mpi_comm_level1,&myid);
140 
141   MPI_Barrier(mpi_comm_level1);
142   dtime(&TStime);
143 
144   /****************************************************
145    allocation of arrays:
146   ****************************************************/
147 
148   Fx = (double*)malloc(sizeof(double)*(Matomnum+1));
149   Fy = (double*)malloc(sizeof(double)*(Matomnum+1));
150   Fz = (double*)malloc(sizeof(double)*(Matomnum+1));
151 
152   HVNAx = (double**)malloc(sizeof(double*)*List_YOUSO[7]);
153   for (j=0; j<List_YOUSO[7]; j++){
154     HVNAx[j] = (double*)malloc(sizeof(double)*List_YOUSO[7]);
155   }
156 
157   HVNAy = (double**)malloc(sizeof(double*)*List_YOUSO[7]);
158   for (j=0; j<List_YOUSO[7]; j++){
159     HVNAy[j] = (double*)malloc(sizeof(double)*List_YOUSO[7]);
160   }
161 
162   HVNAz = (double**)malloc(sizeof(double*)*List_YOUSO[7]);
163   for (j=0; j<List_YOUSO[7]; j++){
164     HVNAz[j] = (double*)malloc(sizeof(double)*List_YOUSO[7]);
165   }
166 
167   /* CDM0 */
168   size_CDM0 = 0;
169   CDM0 = (double*****)malloc(sizeof(double****)*(SpinP_switch+1));
170   for (k=0; k<=SpinP_switch; k++){
171     CDM0[k] = (double****)malloc(sizeof(double***)*(Matomnum+MatomnumF+1));
172     FNAN[0] = 0;
173     for (Mc_AN=0; Mc_AN<=(Matomnum+MatomnumF); Mc_AN++){
174 
175       if (Mc_AN==0){
176         Gc_AN = 0;
177         tno0 = 1;
178       }
179       else{
180         Gc_AN = F_M2G[Mc_AN];
181         Cwan = WhatSpecies[Gc_AN];
182         tno0 = Spe_Total_CNO[Cwan];
183       }
184 
185       CDM0[k][Mc_AN] = (double***)malloc(sizeof(double**)*(FNAN[Gc_AN]+1));
186       for (h_AN=0; h_AN<=FNAN[Gc_AN]; h_AN++){
187 
188         if (Mc_AN==0){
189           tno1 = 1;
190         }
191         else{
192           Gh_AN = natn[Gc_AN][h_AN];
193           Hwan = WhatSpecies[Gh_AN];
194           tno1 = Spe_Total_CNO[Hwan];
195         }
196 
197         CDM0[k][Mc_AN][h_AN] = (double**)malloc(sizeof(double*)*tno0);
198         for (i=0; i<tno0; i++){
199           CDM0[k][Mc_AN][h_AN][i] = (double*)malloc(sizeof(double)*tno1);
200           size_CDM0 += tno1;
201         }
202       }
203     }
204   }
205 
206   Snd_CDM0_Size = (int*)malloc(sizeof(int)*numprocs);
207   Rcv_CDM0_Size = (int*)malloc(sizeof(int)*numprocs);
208 
209   /* iDM0 */
210 
211   if ( SO_switch==1 || (Hub_U_switch==1 && SpinP_switch==3) || 1<=Constraint_NCS_switch
212        || Zeeman_NCS_switch==1 || Zeeman_NCO_switch==1 ){
213 
214     size_iDM0 = 0;
215     iDM0 = (double*****)malloc(sizeof(double****)*2);
216     for (k=0; k<2; k++){
217       iDM0[k] = (double****)malloc(sizeof(double***)*(Matomnum+MatomnumF+1));
218       FNAN[0] = 0;
219       for (Mc_AN=0; Mc_AN<=(Matomnum+MatomnumF); Mc_AN++){
220 
221 	if (Mc_AN==0){
222 	  Gc_AN = 0;
223 	  tno0 = 1;
224 	}
225 	else{
226 	  Gc_AN = F_M2G[Mc_AN];
227 	  Cwan = WhatSpecies[Gc_AN];
228 	  tno0 = Spe_Total_CNO[Cwan];
229 	}
230 
231 	iDM0[k][Mc_AN] = (double***)malloc(sizeof(double**)*(FNAN[Gc_AN]+1));
232 	for (h_AN=0; h_AN<=FNAN[Gc_AN]; h_AN++){
233 
234 	  if (Mc_AN==0){
235 	    tno1 = 1;
236 	  }
237 	  else{
238 	    Gh_AN = natn[Gc_AN][h_AN];
239 	    Hwan = WhatSpecies[Gh_AN];
240 	    tno1 = Spe_Total_CNO[Hwan];
241 	  }
242 
243 	  iDM0[k][Mc_AN][h_AN] = (double**)malloc(sizeof(double*)*tno0);
244 	  for (i=0; i<tno0; i++){
245 	    iDM0[k][Mc_AN][h_AN][i] = (double*)malloc(sizeof(double)*tno1);
246 	    size_iDM0 += tno1;
247 	  }
248 	}
249       }
250     }
251 
252     Snd_iDM0_Size = (int*)malloc(sizeof(int)*numprocs);
253     Rcv_iDM0_Size = (int*)malloc(sizeof(int)*numprocs);
254   }
255 
256   /****************************************************
257                       PrintMemory
258   ****************************************************/
259 
260   if (firsttime) {
261     PrintMemory("Force: Hx",sizeof(dcomplex)*List_YOUSO[7]*List_YOUSO[7],NULL);
262     PrintMemory("Force: Hy",sizeof(dcomplex)*List_YOUSO[7]*List_YOUSO[7],NULL);
263     PrintMemory("Force: Hz",sizeof(dcomplex)*List_YOUSO[7]*List_YOUSO[7],NULL);
264     PrintMemory("Force: CDM0",sizeof(double)*size_CDM0,NULL);
265     if ( SO_switch==1 || (Hub_U_switch==1 && SpinP_switch==3) || 1<=Constraint_NCS_switch
266          || Zeeman_NCS_switch==1 || Zeeman_NCO_switch==1){
267       PrintMemory("Force: iDM0",sizeof(double)*size_iDM0,NULL);
268     }
269     firsttime=0;
270   }
271 
272   /****************************************************
273     CDM to CDM0
274   ****************************************************/
275 
276   for (Mc_AN=1; Mc_AN<=Matomnum; Mc_AN++){
277 
278     Gc_AN = M2G[Mc_AN];
279     Cwan = WhatSpecies[Gc_AN];
280     tno1 = Spe_Total_CNO[Cwan];
281 
282     for (h_AN=0; h_AN<=FNAN[Gc_AN]; h_AN++){
283 
284       Gh_AN = natn[Gc_AN][h_AN];
285       Hwan = WhatSpecies[Gh_AN];
286       tno2 = Spe_Total_CNO[Hwan];
287 
288       for (spin=0; spin<=SpinP_switch; spin++){
289         for (i=0; i<tno1; i++){
290 	  for (j=0; j<tno2; j++){
291             CDM0[spin][Mc_AN][h_AN][i][j] = CDM[spin][Mc_AN][h_AN][i][j];
292 	  }
293         }
294       }
295     }
296   }
297 
298   /****************************************************
299     iDM to iDM0
300   ****************************************************/
301 
302   if ( SO_switch==1 || (Hub_U_switch==1 && F_U_flag==1 && SpinP_switch==3) || 1<=Constraint_NCS_switch
303        || Zeeman_NCS_switch==1 || Zeeman_NCO_switch==1){
304 
305     for (Mc_AN=1; Mc_AN<=Matomnum; Mc_AN++){
306 
307       Gc_AN = M2G[Mc_AN];
308       Cwan = WhatSpecies[Gc_AN];
309       tno1 = Spe_Total_CNO[Cwan];
310 
311       for (h_AN=0; h_AN<=FNAN[Gc_AN]; h_AN++){
312 
313 	Gh_AN = natn[Gc_AN][h_AN];
314 	Hwan = WhatSpecies[Gh_AN];
315 	tno2 = Spe_Total_CNO[Hwan];
316 
317 	for (i=0; i<tno1; i++){
318 	  for (j=0; j<tno2; j++){
319 	    iDM0[0][Mc_AN][h_AN][i][j] = iDM[0][0][Mc_AN][h_AN][i][j];
320 	    iDM0[1][Mc_AN][h_AN][i][j] = iDM[0][1][Mc_AN][h_AN][i][j];
321 	  }
322 	}
323       }
324     }
325   }
326 
327   /****************************************************
328    MPI:
329 
330    CDM0
331   ****************************************************/
332 
333   /***********************************
334              set data size
335   ************************************/
336 
337   for (ID=0; ID<numprocs; ID++){
338 
339     IDS = (myid + ID) % numprocs;
340     IDR = (myid - ID + numprocs) % numprocs;
341 
342     if (ID!=0){
343       tag = 999;
344 
345       /* find data size to send block data */
346       if (F_Snd_Num[IDS]!=0){
347 
348         size1 = 0;
349         for (spin=0; spin<=SpinP_switch; spin++){
350           for (n=0; n<F_Snd_Num[IDS]; n++){
351             Mc_AN = Snd_MAN[IDS][n];
352             Gc_AN = Snd_GAN[IDS][n];
353             Cwan = WhatSpecies[Gc_AN];
354             tno1 = Spe_Total_CNO[Cwan];
355             for (h_AN=0; h_AN<=FNAN[Gc_AN]; h_AN++){
356               Gh_AN = natn[Gc_AN][h_AN];
357               Hwan = WhatSpecies[Gh_AN];
358               tno2 = Spe_Total_CNO[Hwan];
359               for (i=0; i<tno1; i++){
360                 for (j=0; j<tno2; j++){
361                   size1++;
362                 }
363               }
364 	    }
365           }
366 	}
367 
368         Snd_CDM0_Size[IDS] = size1;
369         MPI_Isend(&size1, 1, MPI_INT, IDS, tag, mpi_comm_level1, &request);
370       }
371       else{
372         Snd_CDM0_Size[IDS] = 0;
373       }
374 
375       /* receiving of size of data */
376 
377       if (F_Rcv_Num[IDR]!=0){
378         MPI_Recv(&size2, 1, MPI_INT, IDR, tag, mpi_comm_level1, &stat);
379         Rcv_CDM0_Size[IDR] = size2;
380       }
381       else{
382         Rcv_CDM0_Size[IDR] = 0;
383       }
384 
385       if (F_Snd_Num[IDS]!=0) MPI_Wait(&request,&stat);
386 
387     }
388     else{
389       Snd_CDM0_Size[IDS] = 0;
390       Rcv_CDM0_Size[IDR] = 0;
391     }
392   }
393 
394   /***********************************
395              data transfer
396   ************************************/
397 
398   tag = 999;
399   for (ID=0; ID<numprocs; ID++){
400 
401     IDS = (myid + ID) % numprocs;
402     IDR = (myid - ID + numprocs) % numprocs;
403 
404     if (ID!=0){
405 
406       /*****************************
407               sending of data
408       *****************************/
409 
410       if (F_Snd_Num[IDS]!=0){
411 
412         size1 = Snd_CDM0_Size[IDS];
413 
414         /* allocation of array */
415 
416         tmp_array = (double*)malloc(sizeof(double)*size1);
417 
418         /* multidimentional array to vector array */
419 
420         num = 0;
421         for (spin=0; spin<=SpinP_switch; spin++){
422           for (n=0; n<F_Snd_Num[IDS]; n++){
423             Mc_AN = Snd_MAN[IDS][n];
424             Gc_AN = Snd_GAN[IDS][n];
425             Cwan = WhatSpecies[Gc_AN];
426             tno1 = Spe_Total_CNO[Cwan];
427             for (h_AN=0; h_AN<=FNAN[Gc_AN]; h_AN++){
428               Gh_AN = natn[Gc_AN][h_AN];
429               Hwan = WhatSpecies[Gh_AN];
430               tno2 = Spe_Total_CNO[Hwan];
431               for (i=0; i<tno1; i++){
432                 for (j=0; j<tno2; j++){
433                   tmp_array[num] = CDM[spin][Mc_AN][h_AN][i][j];
434                   num++;
435                 }
436               }
437 	    }
438           }
439 	}
440 
441         MPI_Isend(&tmp_array[0], size1, MPI_DOUBLE, IDS, tag, mpi_comm_level1, &request);
442 
443       }
444 
445       /*****************************
446          receiving of block data
447       *****************************/
448 
449       if (F_Rcv_Num[IDR]!=0){
450 
451         size2 = Rcv_CDM0_Size[IDR];
452 
453         /* allocation of array */
454         tmp_array2 = (double*)malloc(sizeof(double)*size2);
455 
456         MPI_Recv(&tmp_array2[0], size2, MPI_DOUBLE, IDR, tag, mpi_comm_level1, &stat);
457 
458         num = 0;
459         for (spin=0; spin<=SpinP_switch; spin++){
460           Mc_AN = F_TopMAN[IDR] - 1;
461           for (n=0; n<F_Rcv_Num[IDR]; n++){
462             Mc_AN++;
463             Gc_AN = Rcv_GAN[IDR][n];
464             Cwan = WhatSpecies[Gc_AN];
465             tno1 = Spe_Total_CNO[Cwan];
466 
467             for (h_AN=0; h_AN<=FNAN[Gc_AN]; h_AN++){
468               Gh_AN = natn[Gc_AN][h_AN];
469               Hwan = WhatSpecies[Gh_AN];
470               tno2 = Spe_Total_CNO[Hwan];
471               for (i=0; i<tno1; i++){
472                 for (j=0; j<tno2; j++){
473                   CDM0[spin][Mc_AN][h_AN][i][j] = tmp_array2[num];
474                   num++;
475 		}
476 	      }
477 	    }
478 	  }
479 	}
480 
481         /* freeing of array */
482         free(tmp_array2);
483       }
484 
485       if (F_Snd_Num[IDS]!=0){
486         MPI_Wait(&request,&stat);
487         free(tmp_array); /* freeing of array */
488       }
489 
490     }
491   }
492 
493   /****************************************************
494    MPI:
495 
496    iDM0
497   ****************************************************/
498 
499   if ( SO_switch==1 || (Hub_U_switch==1 && F_U_flag==1 && SpinP_switch==3) || 1<=Constraint_NCS_switch
500       || Zeeman_NCS_switch==1 || Zeeman_NCO_switch==1){
501 
502     /***********************************
503                 set data size
504     ************************************/
505 
506     for (ID=0; ID<numprocs; ID++){
507 
508       IDS = (myid + ID) % numprocs;
509       IDR = (myid - ID + numprocs) % numprocs;
510 
511       if (ID!=0){
512 	tag = 999;
513 
514 	/* find data size to send block data */
515 	if (F_Snd_Num[IDS]!=0){
516 
517 	  size1 = 0;
518 	  for (so=0; so<2; so++){
519 	    for (n=0; n<F_Snd_Num[IDS]; n++){
520 	      Mc_AN = Snd_MAN[IDS][n];
521 	      Gc_AN = Snd_GAN[IDS][n];
522 	      Cwan = WhatSpecies[Gc_AN];
523 	      tno1 = Spe_Total_CNO[Cwan];
524 	      for (h_AN=0; h_AN<=FNAN[Gc_AN]; h_AN++){
525 		Gh_AN = natn[Gc_AN][h_AN];
526 		Hwan = WhatSpecies[Gh_AN];
527 		tno2 = Spe_Total_CNO[Hwan];
528 		for (i=0; i<tno1; i++){
529 		  for (j=0; j<tno2; j++){
530 		    size1++;
531 		  }
532 		}
533 	      }
534 	    }
535 	  }
536 
537 	  Snd_iDM0_Size[IDS] = size1;
538 	  MPI_Isend(&size1, 1, MPI_INT, IDS, tag, mpi_comm_level1, &request);
539 
540 	}
541 	else{
542 	  Snd_iDM0_Size[IDS] = 0;
543 	}
544 
545 	/* receiving of size of data */
546 
547 	if (F_Rcv_Num[IDR]!=0){
548 	  MPI_Recv(&size2, 1, MPI_INT, IDR, tag, mpi_comm_level1, &stat);
549 	  Rcv_iDM0_Size[IDR] = size2;
550 	}
551 	else{
552 	  Rcv_iDM0_Size[IDR] = 0;
553 	}
554 
555 	if (F_Snd_Num[IDS]!=0) MPI_Wait(&request,&stat);
556 
557       }
558       else{
559 	Snd_iDM0_Size[IDS] = 0;
560 	Rcv_iDM0_Size[IDR] = 0;
561       }
562     }
563 
564     /***********************************
565                data transfer
566     ************************************/
567 
568     tag = 999;
569     for (ID=0; ID<numprocs; ID++){
570 
571       IDS = (myid + ID) % numprocs;
572       IDR = (myid - ID + numprocs) % numprocs;
573 
574       if (ID!=0){
575 
576 	/*****************************
577 	      sending of data
578 	*****************************/
579 
580 	if (F_Snd_Num[IDS]!=0){
581 
582 	  size1 = Snd_iDM0_Size[IDS];
583 
584 	  /* allocation of array */
585 
586 	  tmp_array = (double*)malloc(sizeof(double)*size1);
587 
588 	  /* multidimentional array to vector array */
589 
590 	  num = 0;
591 	  for (so=0; so<2; so++){
592 	    for (n=0; n<F_Snd_Num[IDS]; n++){
593 	      Mc_AN = Snd_MAN[IDS][n];
594 	      Gc_AN = Snd_GAN[IDS][n];
595 	      Cwan = WhatSpecies[Gc_AN];
596 	      tno1 = Spe_Total_CNO[Cwan];
597 	      for (h_AN=0; h_AN<=FNAN[Gc_AN]; h_AN++){
598 		Gh_AN = natn[Gc_AN][h_AN];
599 		Hwan = WhatSpecies[Gh_AN];
600 		tno2 = Spe_Total_CNO[Hwan];
601 		for (i=0; i<tno1; i++){
602 		  for (j=0; j<tno2; j++){
603 		    tmp_array[num] = iDM[0][so][Mc_AN][h_AN][i][j];
604 		    num++;
605 		  }
606 		}
607 	      }
608 	    }
609 	  }
610 
611 	  MPI_Isend(&tmp_array[0], size1, MPI_DOUBLE, IDS, tag, mpi_comm_level1, &request);
612 
613 	}
614 
615 	/*****************************
616           receiving of block data
617 	*****************************/
618 
619 	if (F_Rcv_Num[IDR]!=0){
620 
621 	  size2 = Rcv_iDM0_Size[IDR];
622 
623 	  /* allocation of array */
624 	  tmp_array2 = (double*)malloc(sizeof(double)*size2);
625 
626 	  MPI_Recv(&tmp_array2[0], size2, MPI_DOUBLE, IDR, tag, mpi_comm_level1, &stat);
627 
628 	  num = 0;
629 	  for (so=0; so<2; so++){
630 	    Mc_AN = F_TopMAN[IDR] - 1;
631 	    for (n=0; n<F_Rcv_Num[IDR]; n++){
632 	      Mc_AN++;
633 	      Gc_AN = Rcv_GAN[IDR][n];
634 	      Cwan = WhatSpecies[Gc_AN];
635 	      tno1 = Spe_Total_CNO[Cwan];
636 
637 	      for (h_AN=0; h_AN<=FNAN[Gc_AN]; h_AN++){
638 		Gh_AN = natn[Gc_AN][h_AN];
639 		Hwan = WhatSpecies[Gh_AN];
640 		tno2 = Spe_Total_CNO[Hwan];
641 		for (i=0; i<tno1; i++){
642 		  for (j=0; j<tno2; j++){
643 		    iDM0[so][Mc_AN][h_AN][i][j] = tmp_array2[num];
644 		    num++;
645 		  }
646 		}
647 	      }
648 	    }
649 	  }
650 
651 	  /* freeing of array */
652 	  free(tmp_array2);
653 	}
654 
655 	if (F_Snd_Num[IDS]!=0){
656 	  MPI_Wait(&request,&stat);
657 	  free(tmp_array); /* freeing of array */
658 	}
659 
660       }
661     }
662 
663   } /* if ( SO_switch==1 || (Hub_U_switch==1 && F_U_flag==1 && SpinP_switch==3)
664      || 1<=Constraint_NCS_switch || Zeeman_NCS_switch==1 || Zeeman_NCO_switch==1) */
665 
666   /****************************************************
667                       #1 of force
668 
669               -\int \delta V_H drho_a/dx dr
670                          and
671                 force induced from PCC
672               +\int V_XC drho_pcc/dx dr
673 
674   ****************************************************/
675 
676   if (myid==Host_ID && 0<level_stdout){
677     printf("  Force calculation #1\n");fflush(stdout);
678   }
679 
680   dtime(&stime);
681 
682   /****************************************************
683    set RefVxc_Grid, where the CA-LDA exchange-correlation
684    functional is alway used.
685   ****************************************************/
686 
687   XC_P_switch = 1;
688   for (BN_AB=0; BN_AB<My_NumGridB_AB; BN_AB++){
689     tot_den = ADensity_Grid_B[BN_AB] + ADensity_Grid_B[BN_AB];
690     if (PCC_switch==1) {
691       tot_den += PCCDensity_Grid_B[0][BN_AB] + PCCDensity_Grid_B[1][BN_AB];
692     }
693     RefVxc_Grid_B[BN_AB] = XC_Ceperly_Alder(tot_den,XC_P_switch);
694   }
695 
696   Data_Grid_Copy_B2C_1( RefVxc_Grid_B,  RefVxc_Grid  );
697   Data_Grid_Copy_B2C_1( dVHart_Grid_B,  dVHart_Grid  );
698   Data_Grid_Copy_B2C_2( Vxc_Grid_B,     Vxc_Grid     );
699   Data_Grid_Copy_B2C_2( Density_Grid_B, Density_Grid );
700 
701 #pragma omp parallel shared(myid,Spe_OpenCore_flag,Spe_Atomic_PCC,Spe_VPS_RV,Spe_VPS_XV,Spe_Num_Mesh_VPS,Spe_PAO_RV,Spe_Atomic_Den,Spe_PAO_XV,Spe_Num_Mesh_PAO,time_per_atom,level_stdout,GridVol,Vxc_Grid,RefVxc_Grid,SpinP_switch,F_Vxc_flag,PCC_switch,dVHart_Grid,F_dVHart_flag,Gxyz,atv,MGridListAtom,CellListAtom,GridListAtom,GridN_Atom,WhatSpecies,M2G,Matomnum) private(OMPID,Nthrds,Nprocs,Mc_AN,Stime_atom,Etime_atom,Gc_AN,Cwan,sumx,sumy,sumz,Nc,GNc,GRc,MNc,Cxyz,x,y,z,dx,dy,dz,r,r2,tmp0,tmp1,tmp2,xx)
702   {
703 
704     /* get info. on OpenMP */
705 
706     OMPID = omp_get_thread_num();
707     Nthrds = omp_get_num_threads();
708     Nprocs = omp_get_num_procs();
709 
710     for (Mc_AN=(OMPID*Matomnum/Nthrds+1); Mc_AN<((OMPID+1)*Matomnum/Nthrds+1); Mc_AN++){
711 
712       dtime(&Stime_atom);
713 
714       Gc_AN = M2G[Mc_AN];
715       Cwan = WhatSpecies[Gc_AN];
716 
717       sumx = 0.0;
718       sumy = 0.0;
719       sumz = 0.0;
720 
721       for (Nc=0; Nc<GridN_Atom[Gc_AN]; Nc++){
722 
723 	GNc = GridListAtom[Mc_AN][Nc];
724 	GRc = CellListAtom[Mc_AN][Nc];
725 	MNc = MGridListAtom[Mc_AN][Nc];
726 
727 	Get_Grid_XYZ(GNc,Cxyz);
728 	x = Cxyz[1] + atv[GRc][1];
729 	y = Cxyz[2] + atv[GRc][2];
730 	z = Cxyz[3] + atv[GRc][3];
731 
732 	dx = Gxyz[Gc_AN][1] - x;
733 	dy = Gxyz[Gc_AN][2] - y;
734 	dz = Gxyz[Gc_AN][3] - z;
735         r2 = dx*dx + dy*dy + dz*dz;
736 	r = sqrt(r2);
737         xx = 0.5*log(r2);
738 
739 	/* for empty atoms */
740 	if (r<1.0e-10) r = 1.0e-10;
741 
742 	if (1.0e-14<r){
743 
744 	  tmp0 = Dr_KumoF( Spe_Num_Mesh_PAO[Cwan], xx, r,
745                            Spe_PAO_XV[Cwan], Spe_PAO_RV[Cwan], Spe_Atomic_Den[Cwan]);
746 
747 	  tmp1 = dVHart_Grid[MNc]*tmp0/r*F_dVHart_flag;
748 	  sumx += tmp1*dx;
749 	  sumy += tmp1*dy;
750 	  sumz += tmp1*dz;
751 
752           /* contribution of Exc^(0) */
753 
754 	  tmp1 = RefVxc_Grid[MNc]*tmp0/r*F_Vxc_flag;
755 	  sumx += tmp1*dx;
756 	  sumy += tmp1*dy;
757 	  sumz += tmp1*dz;
758 
759 	  /* partial core correction */
760 	  if (PCC_switch==1){
761 
762 	    tmp0 = 0.5*F_Vxc_flag*Dr_KumoF( Spe_Num_Mesh_VPS[Cwan], xx, r,
763                                             Spe_VPS_XV[Cwan], Spe_VPS_RV[Cwan], Spe_Atomic_PCC[Cwan]);
764 
765 	    if (SpinP_switch==0){
766               tmp2 = 2.0*Vxc_Grid[0][MNc];
767 	    }
768 	    else {
769               if (Spe_OpenCore_flag[Cwan]==0){
770                 tmp2 = Vxc_Grid[0][MNc] + Vxc_Grid[1][MNc];
771 	      }
772               else if (Spe_OpenCore_flag[Cwan]==1){
773                 tmp2 = 2.0*Vxc_Grid[0][MNc];
774 	      }
775               else if (Spe_OpenCore_flag[Cwan]==-1){
776                 tmp2 = 2.0*Vxc_Grid[1][MNc];
777 	      }
778 	    }
779 
780 	    tmp1 = tmp2*tmp0/r;
781 	    sumx -= tmp1*dx;
782 	    sumy -= tmp1*dy;
783 	    sumz -= tmp1*dz;
784 
785             /* contribution of Exc^(0) */
786 
787 	    tmp2 = 2.0*RefVxc_Grid[MNc];
788 	    tmp1 = tmp2*tmp0/r;
789 	    sumx += tmp1*dx;
790 	    sumy += tmp1*dy;
791 	    sumz += tmp1*dz;
792 	  }
793 	}
794       }
795 
796       Gxyz[Gc_AN][17] = -sumx*GridVol;
797       Gxyz[Gc_AN][18] = -sumy*GridVol;
798       Gxyz[Gc_AN][19] = -sumz*GridVol;
799 
800       if (2<=level_stdout){
801 	printf("<Force>  force(1) myid=%2d  Mc_AN=%2d Gc_AN=%2d  %15.12f %15.12f %15.12f\n",
802 	       myid,Mc_AN,Gc_AN,-sumx*GridVol,-sumy*GridVol,-sumz*GridVol);fflush(stdout);
803       }
804 
805       dtime(&Etime_atom);
806       time_per_atom[Gc_AN] += Etime_atom - Stime_atom;
807     }
808 
809   } /* #pragma omp parallel */
810 
811   dtime(&etime);
812   if(myid==0 && measure_time){
813     printf("Time for force#1=%18.5f\n",etime-stime);fflush(stdout);
814   }
815 
816   /****************************************************
817      added by T.Ohwaki
818 
819                       #1' of force
820      contribution from an artificial wall applied
821      in the ESM method so that atoms cannot go beyond
822      the boundary of the unit cell along the a-axis.
823   ****************************************************/
824 
825   if (ESM_switch!=0){
826 
827     double fx,xb,x0,x,a;
828 
829     xb = Grid_Origin[1] + tv[1][1];
830     a = ESM_wall_height/pow(1.89,3.0);
831 
832     for (Mc_AN=1; Mc_AN<=Matomnum; Mc_AN++){
833 
834       Gc_AN = M2G[Mc_AN];
835       x = Gxyz[Gc_AN][1];
836       x0 = xb - ESM_wall_position;
837       dx = x - x0;
838 
839       if (0.0<dx){
840         fx = 3.0*a*dx*dx;
841       }
842       else {
843         fx = 0.0;
844       }
845 
846       Gxyz[Gc_AN][17] += fx;
847 
848       /*
849       printf("Gc_AN=%2d fx=%15.12f\n",Gc_AN,fx);fflush(stdout);
850       */
851 
852       /* add an artifical force if required. */
853 
854       if(Arti_Force==1){
855         if(Gc_AN==1) Gxyz[1][17] += Arti_Grad;
856         if(myid==0) printf("    adding force at the proc. 'Force #1' \n");
857       }
858     }
859   }
860 
861   /****************************************************
862    contraction
863 
864    H0
865    OLP
866   ****************************************************/
867 
868   MPI_Barrier(mpi_comm_level1);
869 
870   if (Cnt_switch==1){
871 
872     Cont_Matrix0(H0[0],CntH0[0]);
873     Cont_Matrix0(H0[1],CntH0[1]);
874     Cont_Matrix0(H0[2],CntH0[2]);
875     Cont_Matrix0(H0[3],CntH0[3]);
876 
877     Cont_Matrix0(OLP[0],CntOLP[0]);
878     Cont_Matrix0(OLP[1],CntOLP[1]);
879     Cont_Matrix0(OLP[2],CntOLP[2]);
880     Cont_Matrix0(OLP[3],CntOLP[3]);
881   }
882 
883   if ( Hub_U_switch==1 && Hub_U_occupation==1 ){
884     MPI_OLP(OLP);
885   }
886 
887   MPI_Barrier(mpi_comm_level1);
888 
889   /****************************************************
890                       #2 of force
891 
892                    kinetic operator
893   ****************************************************/
894 
895   dtime(&stime);
896 
897   if (myid==Host_ID && 0<level_stdout){
898     printf("  Force calculation #2\n");fflush(stdout);
899   }
900 
901 #pragma omp parallel shared(time_per_atom,Gxyz,myid,level_stdout,iDM0,CDM0,CntH0,H0,F_Kin_flag,NC_v_eff,v_eff,OLP,Hub_U_occupation,Cnt_switch,F_NL_flag,List_YOUSO,RMI1,Zeeman_NCO_switch,Zeeman_NCS_switch,Constraint_NCS_switch,F_U_flag,Hub_U_switch,SO_switch,SpinP_switch,Spe_Total_CNO,F_G2M,natn,FNAN,WhatSpecies,M2G,Matomnum) private(OMPID,Nthrds,Nprocs,Mc_AN,Stime_atom,Etime_atom,Gc_AN,Cwan,dEx,dEy,dEz,h_AN,Gh_AN,Mh_AN,Hwan,ian,start_q_AN,q_AN,Gq_AN,Mq_AN,Qwan,jan,kl,so,i,j,k,Hx,Hy,Hz,HUx,HUy,HUz,NC_HUx,NC_HUy,NC_HUz,s1,s2,pref,spinmax,spin)
902   {
903 
904     /* allocation of arrays */
905 
906     Hx = (dcomplex***)malloc(sizeof(dcomplex**)*3);
907     for (i=0; i<3; i++){
908       Hx[i] = (dcomplex**)malloc(sizeof(dcomplex*)*List_YOUSO[7]);
909       for (j=0; j<List_YOUSO[7]; j++){
910 	Hx[i][j] = (dcomplex*)malloc(sizeof(dcomplex)*List_YOUSO[7]);
911       }
912     }
913 
914     Hy = (dcomplex***)malloc(sizeof(dcomplex**)*3);
915     for (i=0; i<3; i++){
916       Hy[i] = (dcomplex**)malloc(sizeof(dcomplex*)*List_YOUSO[7]);
917       for (j=0; j<List_YOUSO[7]; j++){
918 	Hy[i][j] = (dcomplex*)malloc(sizeof(dcomplex)*List_YOUSO[7]);
919       }
920     }
921 
922     Hz = (dcomplex***)malloc(sizeof(dcomplex**)*3);
923     for (i=0; i<3; i++){
924       Hz[i] = (dcomplex**)malloc(sizeof(dcomplex*)*List_YOUSO[7]);
925       for (j=0; j<List_YOUSO[7]; j++){
926 	Hz[i][j] = (dcomplex*)malloc(sizeof(dcomplex)*List_YOUSO[7]);
927       }
928     }
929 
930     if (   (Hub_U_switch==1 || 1<=Constraint_NCS_switch || Zeeman_NCS_switch==1 || Zeeman_NCO_switch==1)
931 	   && (Hub_U_occupation==1 || Hub_U_occupation==2)
932 	   && SpinP_switch!=3 ){
933 
934       HUx = (double***)malloc(sizeof(double**)*3);
935       for (i=0; i<3; i++){
936 	HUx[i] = (double**)malloc(sizeof(double*)*List_YOUSO[7]);
937 	for (j=0; j<List_YOUSO[7]; j++){
938 	  HUx[i][j] = (double*)malloc(sizeof(double)*List_YOUSO[7]);
939 	}
940       }
941 
942       HUy = (double***)malloc(sizeof(double**)*3);
943       for (i=0; i<3; i++){
944 	HUy[i] = (double**)malloc(sizeof(double*)*List_YOUSO[7]);
945 	for (j=0; j<List_YOUSO[7]; j++){
946 	  HUy[i][j] = (double*)malloc(sizeof(double)*List_YOUSO[7]);
947 	}
948       }
949 
950       HUz = (double***)malloc(sizeof(double**)*3);
951       for (i=0; i<3; i++){
952 	HUz[i] = (double**)malloc(sizeof(double*)*List_YOUSO[7]);
953 	for (j=0; j<List_YOUSO[7]; j++){
954 	  HUz[i][j] = (double*)malloc(sizeof(double)*List_YOUSO[7]);
955 	}
956       }
957     }
958 
959     if (   (Hub_U_switch==1 || 1<=Constraint_NCS_switch || Zeeman_NCS_switch==1 || Zeeman_NCO_switch==1)
960 	   && (Hub_U_occupation==1 || Hub_U_occupation==2)
961 	   && SpinP_switch==3 ){
962 
963       NC_HUx = (dcomplex****)malloc(sizeof(dcomplex***)*2);
964       for (i=0; i<2; i++){
965 	NC_HUx[i] = (dcomplex***)malloc(sizeof(dcomplex**)*2);
966 	for (j=0; j<2; j++){
967 	  NC_HUx[i][j] = (dcomplex**)malloc(sizeof(dcomplex*)*List_YOUSO[7]);
968 	  for (k=0; k<List_YOUSO[7]; k++){
969 	    NC_HUx[i][j][k] = (dcomplex*)malloc(sizeof(dcomplex)*List_YOUSO[7]);
970 	  }
971 	}
972       }
973 
974       NC_HUy = (dcomplex****)malloc(sizeof(dcomplex***)*2);
975       for (i=0; i<2; i++){
976 	NC_HUy[i] = (dcomplex***)malloc(sizeof(dcomplex**)*2);
977 	for (j=0; j<2; j++){
978 	  NC_HUy[i][j] = (dcomplex**)malloc(sizeof(dcomplex*)*List_YOUSO[7]);
979 	  for (k=0; k<List_YOUSO[7]; k++){
980 	    NC_HUy[i][j][k] = (dcomplex*)malloc(sizeof(dcomplex)*List_YOUSO[7]);
981 	  }
982 	}
983       }
984 
985       NC_HUz = (dcomplex****)malloc(sizeof(dcomplex***)*2);
986       for (i=0; i<2; i++){
987 	NC_HUz[i] = (dcomplex***)malloc(sizeof(dcomplex**)*2);
988 	for (j=0; j<2; j++){
989 	  NC_HUz[i][j] = (dcomplex**)malloc(sizeof(dcomplex*)*List_YOUSO[7]);
990 	  for (k=0; k<List_YOUSO[7]; k++){
991 	    NC_HUz[i][j][k] = (dcomplex*)malloc(sizeof(dcomplex)*List_YOUSO[7]);
992 	  }
993 	}
994       }
995     }
996 
997     /* get info. on OpenMP */
998 
999     OMPID = omp_get_thread_num();
1000     Nthrds = omp_get_num_threads();
1001     Nprocs = omp_get_num_procs();
1002 
1003     for (Mc_AN=(OMPID*Matomnum/Nthrds+1); Mc_AN<((OMPID+1)*Matomnum/Nthrds+1); Mc_AN++){
1004 
1005       dtime(&Stime_atom);
1006 
1007       Gc_AN = M2G[Mc_AN];
1008       Cwan = WhatSpecies[Gc_AN];
1009 
1010       dEx = 0.0;
1011       dEy = 0.0;
1012       dEz = 0.0;
1013 
1014       for (h_AN=0; h_AN<=FNAN[Gc_AN]; h_AN++){
1015 
1016 	Gh_AN = natn[Gc_AN][h_AN];
1017 	Mh_AN = F_G2M[Gh_AN];
1018 	Hwan = WhatSpecies[Gh_AN];
1019 	ian = Spe_Total_CNO[Hwan];
1020 
1021 	if ( SpinP_switch==3 && (SO_switch==1 || (Hub_U_switch==1 && F_U_flag==1)
1022 	 || 1<=Constraint_NCS_switch || Zeeman_NCS_switch==1 || Zeeman_NCO_switch==1) )
1023 	  start_q_AN = 0;
1024 	else
1025 	  start_q_AN = h_AN;
1026 
1027 	for (q_AN=start_q_AN; q_AN<=FNAN[Gc_AN]; q_AN++){
1028 
1029 	  Gq_AN = natn[Gc_AN][q_AN];
1030 	  Mq_AN = F_G2M[Gq_AN];
1031 	  Qwan = WhatSpecies[Gq_AN];
1032 	  jan = Spe_Total_CNO[Qwan];
1033 	  kl = RMI1[Mc_AN][h_AN][q_AN];
1034 
1035 	  if (0<=kl){
1036 
1037 	    for (so=0; so<3; so++){
1038 	      for (i=0; i<List_YOUSO[7]; i++){
1039 		for (j=0; j<List_YOUSO[7]; j++){
1040 		  Hx[so][i][j] = Complex(0.0,0.0);
1041 		  Hy[so][i][j] = Complex(0.0,0.0);
1042 		  Hz[so][i][j] = Complex(0.0,0.0);
1043 		}
1044 	      }
1045 	    }
1046 
1047 	    /****************************************************
1048              Contribution from LDA+U with 'full'treatment for
1049              counting the occupation number
1050 	    ****************************************************/
1051 
1052 	    if ( (Hub_U_switch==1 && F_U_flag==1) || 1<=Constraint_NCS_switch
1053                || Zeeman_NCS_switch==1 || Zeeman_NCO_switch==1 ){
1054 
1055 	      /* full treatment and collinear case */
1056 
1057 	      if (Hub_U_occupation==1 && SpinP_switch!=3){
1058 
1059 		/* initialize HUx, HUy, and HUz */
1060 
1061 		for (so=0; so<3; so++){
1062 		  for (i=0; i<List_YOUSO[7]; i++){
1063 		    for (j=0; j<List_YOUSO[7]; j++){
1064 		      HUx[so][i][j] = 0.0;
1065 		      HUy[so][i][j] = 0.0;
1066 		      HUz[so][i][j] = 0.0;
1067 		    }
1068 		  }
1069 		}
1070 
1071 		dH_U_full(Mc_AN,h_AN,q_AN,OLP,v_eff,HUx,HUy,HUz);
1072 
1073 		/* add the contribution to Hx, Hy, and Hz */
1074 
1075 		if (SpinP_switch==0) spinmax = 0;
1076 		else                 spinmax = 1;
1077 
1078 		for (spin=0; spin<=spinmax; spin++){
1079 		  for (i=0; i<ian; i++){
1080 		    for (j=0; j<jan; j++){
1081 		      Hx[spin][i][j].r += HUx[spin][i][j];
1082 		      Hy[spin][i][j].r += HUy[spin][i][j];
1083 		      Hz[spin][i][j].r += HUz[spin][i][j];
1084 		    }
1085 		  }
1086 		}
1087 	      }
1088 
1089 	      /* full treatment and non-collinear case */
1090 
1091 	      else if (Hub_U_occupation==1 && SpinP_switch==3){
1092 
1093 		/* initialize NC_HUx, NC_HUy, and NC_HUz */
1094 
1095 		for (s1=0; s1<2; s1++){
1096 		  for (s2=0; s2<2; s2++){
1097 		    for (i=0; i<List_YOUSO[7]; i++){
1098 		      for (j=0; j<List_YOUSO[7]; j++){
1099 			NC_HUx[s1][s2][i][j] = Complex(0.0,0.0);
1100 			NC_HUy[s1][s2][i][j] = Complex(0.0,0.0);
1101 			NC_HUz[s1][s2][i][j] = Complex(0.0,0.0);
1102 		      }
1103 		    }
1104 		  }
1105 		}
1106 
1107 		dH_U_NC_full(Mc_AN,h_AN,q_AN,OLP,NC_v_eff,NC_HUx,NC_HUy,NC_HUz);
1108 
1109 		/******************************************************
1110                        add the contribution to Hx, Hy, and Hz
1111 
1112                        Hx[0] 00
1113                        Hx[1] 11
1114                        Hx[2] 01
1115 		******************************************************/
1116 
1117 		for (i=0; i<ian; i++){
1118 		  for (j=0; j<jan; j++){
1119 
1120 		    Hx[0][i][j].r += NC_HUx[0][0][i][j].r;
1121 		    Hy[0][i][j].r += NC_HUy[0][0][i][j].r;
1122 		    Hz[0][i][j].r += NC_HUz[0][0][i][j].r;
1123 
1124 		    Hx[1][i][j].r += NC_HUx[1][1][i][j].r;
1125 		    Hy[1][i][j].r += NC_HUy[1][1][i][j].r;
1126 		    Hz[1][i][j].r += NC_HUz[1][1][i][j].r;
1127 
1128 		    Hx[2][i][j].r += NC_HUx[0][1][i][j].r;
1129 		    Hy[2][i][j].r += NC_HUy[0][1][i][j].r;
1130 		    Hz[2][i][j].r += NC_HUz[0][1][i][j].r;
1131 
1132 		    Hx[0][i][j].i += NC_HUx[0][0][i][j].i;
1133 		    Hy[0][i][j].i += NC_HUy[0][0][i][j].i;
1134 		    Hz[0][i][j].i += NC_HUz[0][0][i][j].i;
1135 
1136 		    Hx[1][i][j].i += NC_HUx[1][1][i][j].i;
1137 		    Hy[1][i][j].i += NC_HUy[1][1][i][j].i;
1138 		    Hz[1][i][j].i += NC_HUz[1][1][i][j].i;
1139 
1140 		    Hx[2][i][j].i += NC_HUx[0][1][i][j].i;
1141 		    Hy[2][i][j].i += NC_HUy[0][1][i][j].i;
1142 		    Hz[2][i][j].i += NC_HUz[0][1][i][j].i;
1143 		  }
1144 		}
1145 	      }
1146 
1147 	    }
1148 
1149 	    /****************************************************
1150                                H0 = dKinetic
1151 	    ****************************************************/
1152 
1153 	    if (F_Kin_flag==1){
1154 
1155 	      /* in case of no obital optimization */
1156 
1157 	      if (Cnt_switch==0){
1158 		if (h_AN==0){
1159 		  for (i=0; i<ian; i++){
1160 		    for (j=0; j<jan; j++){
1161 		      Hx[0][i][j].r += H0[1][Mc_AN][q_AN][i][j];
1162 		      Hy[0][i][j].r += H0[2][Mc_AN][q_AN][i][j];
1163 		      Hz[0][i][j].r += H0[3][Mc_AN][q_AN][i][j];
1164 
1165 		      Hx[1][i][j].r += H0[1][Mc_AN][q_AN][i][j];
1166 		      Hy[1][i][j].r += H0[2][Mc_AN][q_AN][i][j];
1167 		      Hz[1][i][j].r += H0[3][Mc_AN][q_AN][i][j];
1168 		    }
1169 		  }
1170 		}
1171 
1172 		else if (h_AN!=0 && q_AN==0){
1173 		  for (i=0; i<ian; i++){
1174 		    for (j=0; j<jan; j++){
1175 		      Hx[0][i][j].r += H0[1][Mc_AN][h_AN][j][i];
1176 		      Hy[0][i][j].r += H0[2][Mc_AN][h_AN][j][i];
1177 		      Hz[0][i][j].r += H0[3][Mc_AN][h_AN][j][i];
1178 
1179 		      Hx[1][i][j].r += H0[1][Mc_AN][h_AN][j][i];
1180 		      Hy[1][i][j].r += H0[2][Mc_AN][h_AN][j][i];
1181 		      Hz[1][i][j].r += H0[3][Mc_AN][h_AN][j][i];
1182 		    }
1183 		  }
1184 		}
1185 	      }
1186 
1187 	      /* in case of obital optimization */
1188 
1189 	      else{
1190 
1191 		if (h_AN==0){
1192 		  for (i=0; i<ian; i++){
1193 		    for (j=0; j<jan; j++){
1194 
1195 		      Hx[0][i][j].r += CntH0[1][Mc_AN][q_AN][i][j];
1196 		      Hy[0][i][j].r += CntH0[2][Mc_AN][q_AN][i][j];
1197 		      Hz[0][i][j].r += CntH0[3][Mc_AN][q_AN][i][j];
1198 
1199 		      Hx[1][i][j].r += CntH0[1][Mc_AN][q_AN][i][j];
1200 		      Hy[1][i][j].r += CntH0[2][Mc_AN][q_AN][i][j];
1201 		      Hz[1][i][j].r += CntH0[3][Mc_AN][q_AN][i][j];
1202 
1203 		    }
1204 		  }
1205 		}
1206 
1207 		else if (h_AN!=0 && q_AN==0){
1208 		  for (i=0; i<ian; i++){
1209 		    for (j=0; j<jan; j++){
1210 
1211 		      Hx[0][i][j].r += CntH0[1][Mc_AN][h_AN][j][i];
1212 		      Hy[0][i][j].r += CntH0[2][Mc_AN][h_AN][j][i];
1213 		      Hz[0][i][j].r += CntH0[3][Mc_AN][h_AN][j][i];
1214 
1215 		      Hx[1][i][j].r += CntH0[1][Mc_AN][h_AN][j][i];
1216 		      Hy[1][i][j].r += CntH0[2][Mc_AN][h_AN][j][i];
1217 		      Hz[1][i][j].r += CntH0[3][Mc_AN][h_AN][j][i];
1218 
1219 		    }
1220 		  }
1221 		}
1222 	      }
1223 
1224             } /* if F_Kin_flag */
1225 
1226 	    /****************************************************
1227                               \sum rho*dH
1228 	    ****************************************************/
1229 
1230 	    /* non-spin polarization */
1231 
1232 	    if (SpinP_switch==0){
1233 
1234               if (q_AN==h_AN) pref = 2.0;
1235               else            pref = 4.0;
1236 
1237 	      for (i=0; i<Spe_Total_CNO[Hwan]; i++){
1238 		for (j=0; j<Spe_Total_CNO[Qwan]; j++){
1239 		  dEx += pref*CDM0[0][Mh_AN][kl][i][j]*Hx[0][i][j].r;
1240 		  dEy += pref*CDM0[0][Mh_AN][kl][i][j]*Hy[0][i][j].r;
1241 		  dEz += pref*CDM0[0][Mh_AN][kl][i][j]*Hz[0][i][j].r;
1242 		}
1243 	      }
1244 	    }
1245 
1246 	    /* collinear spin polarized or non-colliear without SO and LDA+U */
1247 
1248 	    else if (SpinP_switch==1 || (SpinP_switch==3 && SO_switch==0 && Hub_U_switch==0
1249 		 && Constraint_NCS_switch==0 && Zeeman_NCS_switch==0 && Zeeman_NCO_switch==0)){
1250 
1251               if (q_AN==h_AN) pref = 1.0;
1252               else            pref = 2.0;
1253 
1254 	      for (i=0; i<Spe_Total_CNO[Hwan]; i++){
1255 		for (j=0; j<Spe_Total_CNO[Qwan]; j++){
1256 
1257 		  dEx += pref*(  CDM0[0][Mh_AN][kl][i][j]*Hx[0][i][j].r
1258 		               + CDM0[1][Mh_AN][kl][i][j]*Hx[1][i][j].r);
1259 		  dEy += pref*(  CDM0[0][Mh_AN][kl][i][j]*Hy[0][i][j].r
1260 		               + CDM0[1][Mh_AN][kl][i][j]*Hy[1][i][j].r);
1261 		  dEz += pref*(  CDM0[0][Mh_AN][kl][i][j]*Hz[0][i][j].r
1262 		               + CDM0[1][Mh_AN][kl][i][j]*Hz[1][i][j].r);
1263 		}
1264 	      }
1265 	    }
1266 
1267 	    /* spin collinear with spin-orbit coupling */
1268 
1269 	    else if ( SpinP_switch==1 && SO_switch==1 ){
1270 	      printf("Spin-orbit coupling is not supported for collinear DFT calculations.\n");fflush(stdout);
1271 	      MPI_Finalize();
1272 	      exit(1);
1273 	    }
1274 
1275 	    /* spin non-collinear with spin-orbit coupling or with LDA+U */
1276 
1277 	    else if ( SpinP_switch==3 && (SO_switch==1 || (Hub_U_switch==1 && F_U_flag==1)
1278 		  || 1<=Constraint_NCS_switch || Zeeman_NCS_switch==1 || Zeeman_NCO_switch==1) ){
1279 
1280 	      for (i=0; i<Spe_Total_CNO[Hwan]; i++){
1281 		for (j=0; j<Spe_Total_CNO[Qwan]; j++){
1282 
1283 		  dEx +=   CDM0[0][Mh_AN][kl][i][j]*Hx[0][i][j].r
1284 		         - iDM0[0][Mh_AN][kl][i][j]*Hx[0][i][j].i
1285 		         + CDM0[1][Mh_AN][kl][i][j]*Hx[1][i][j].r
1286 		         - iDM0[1][Mh_AN][kl][i][j]*Hx[1][i][j].i
1287 		     + 2.0*CDM0[2][Mh_AN][kl][i][j]*Hx[2][i][j].r
1288 		     - 2.0*CDM0[3][Mh_AN][kl][i][j]*Hx[2][i][j].i;
1289 
1290 		  dEy +=   CDM0[0][Mh_AN][kl][i][j]*Hy[0][i][j].r
1291 		         - iDM0[0][Mh_AN][kl][i][j]*Hy[0][i][j].i
1292 		         + CDM0[1][Mh_AN][kl][i][j]*Hy[1][i][j].r
1293 		         - iDM0[1][Mh_AN][kl][i][j]*Hy[1][i][j].i
1294 		     + 2.0*CDM0[2][Mh_AN][kl][i][j]*Hy[2][i][j].r
1295 		     - 2.0*CDM0[3][Mh_AN][kl][i][j]*Hy[2][i][j].i;
1296 
1297 		  dEz +=   CDM0[0][Mh_AN][kl][i][j]*Hz[0][i][j].r
1298 		         - iDM0[0][Mh_AN][kl][i][j]*Hz[0][i][j].i
1299 		         + CDM0[1][Mh_AN][kl][i][j]*Hz[1][i][j].r
1300 		         - iDM0[1][Mh_AN][kl][i][j]*Hz[1][i][j].i
1301 		     + 2.0*CDM0[2][Mh_AN][kl][i][j]*Hz[2][i][j].r
1302 		     - 2.0*CDM0[3][Mh_AN][kl][i][j]*Hz[2][i][j].i;
1303 
1304 		}
1305 	      }
1306 	    }
1307 
1308 	  }  /* if (0<=kl) */
1309 	}  /* q_AN */
1310       }  /* h_AN */
1311 
1312       /****************************************************
1313                         #2 of Force
1314       ****************************************************/
1315 
1316       if (2<=level_stdout){
1317 	printf("<Force>  force(2) myid=%2d  Mc_AN=%2d Gc_AN=%2d  %15.12f %15.12f %15.12f\n",
1318 	       myid,Mc_AN,Gc_AN,dEx,dEy,dEz);fflush(stdout);
1319       }
1320 
1321       Gxyz[Gc_AN][17] += dEx;
1322       Gxyz[Gc_AN][18] += dEy;
1323       Gxyz[Gc_AN][19] += dEz;
1324 
1325       dtime(&Etime_atom);
1326       time_per_atom[Gc_AN] += Etime_atom - Stime_atom;
1327 
1328     } /* Mc_AN */
1329 
1330     /* freeing of arrays */
1331 
1332     for (i=0; i<3; i++){
1333       for (j=0; j<List_YOUSO[7]; j++){
1334 	free(Hx[i][j]);
1335       }
1336       free(Hx[i]);
1337     }
1338     free(Hx);
1339 
1340     for (i=0; i<3; i++){
1341       for (j=0; j<List_YOUSO[7]; j++){
1342 	free(Hy[i][j]);
1343       }
1344       free(Hy[i]);
1345     }
1346     free(Hy);
1347 
1348     for (i=0; i<3; i++){
1349       for (j=0; j<List_YOUSO[7]; j++){
1350 	free(Hz[i][j]);
1351       }
1352       free(Hz[i]);
1353     }
1354     free(Hz);
1355 
1356     if (   (Hub_U_switch==1 || 1<=Constraint_NCS_switch || Zeeman_NCS_switch==1 || Zeeman_NCO_switch==1)
1357 	   && (Hub_U_occupation==1 || Hub_U_occupation==2)
1358 	   && SpinP_switch!=3 ){
1359 
1360       for (i=0; i<3; i++){
1361 	for (j=0; j<List_YOUSO[7]; j++){
1362 	  free(HUx[i][j]);
1363 	}
1364 	free(HUx[i]);
1365       }
1366       free(HUx);
1367 
1368       for (i=0; i<3; i++){
1369 	for (j=0; j<List_YOUSO[7]; j++){
1370 	  free(HUy[i][j]);
1371 	}
1372 	free(HUy[i]);
1373       }
1374       free(HUy);
1375 
1376       for (i=0; i<3; i++){
1377 	for (j=0; j<List_YOUSO[7]; j++){
1378 	  free(HUz[i][j]);
1379 	}
1380 	free(HUz[i]);
1381       }
1382       free(HUz);
1383     }
1384 
1385     if (   (Hub_U_switch==1 || 1<=Constraint_NCS_switch || Zeeman_NCS_switch==1 || Zeeman_NCO_switch==1)
1386 	   && (Hub_U_occupation==1 || Hub_U_occupation==2)
1387 	   && SpinP_switch==3 ){
1388 
1389       for (i=0; i<2; i++){
1390 	for (j=0; j<2; j++){
1391 	  for (k=0; k<List_YOUSO[7]; k++){
1392 	    free(NC_HUx[i][j][k]);
1393 	  }
1394 	  free(NC_HUx[i][j]);
1395 	}
1396 	free(NC_HUx[i]);
1397       }
1398       free(NC_HUx);
1399 
1400       for (i=0; i<2; i++){
1401 	for (j=0; j<2; j++){
1402 	  for (k=0; k<List_YOUSO[7]; k++){
1403 	    free(NC_HUy[i][j][k]);
1404 	  }
1405 	  free(NC_HUy[i][j]);
1406 	}
1407 	free(NC_HUy[i]);
1408       }
1409       free(NC_HUy);
1410 
1411       for (i=0; i<2; i++){
1412 	for (j=0; j<2; j++){
1413 	  for (k=0; k<List_YOUSO[7]; k++){
1414 	    free(NC_HUz[i][j][k]);
1415 	  }
1416 	  free(NC_HUz[i][j]);
1417 	}
1418 	free(NC_HUz[i]);
1419       }
1420       free(NC_HUz);
1421     }
1422 
1423   } /* #pragma omp parallel */
1424 
1425   dtime(&etime);
1426   if(myid==0 && measure_time){
1427     printf("Time for force#2=%18.5f\n",etime-stime);fflush(stdout);
1428   }
1429 
1430   /****************************************************
1431                       #3 of Force
1432 
1433                dn/dx * (VNA + dVH + Vxc)
1434             or
1435                dn/dx * (dVH + Vxc)
1436   ****************************************************/
1437 
1438   dtime(&stime);
1439 
1440   if (myid==Host_ID && 0<level_stdout){
1441     printf("  Force calculation #3\n");fflush(stdout);
1442   }
1443 
1444   Force3();
1445 
1446   dtime(&etime);
1447   if(myid==0 && measure_time){
1448     printf("Time for force#3=%18.5f\n",etime-stime);fflush(stdout);
1449   }
1450 
1451   /****************************************************
1452                       #4 of Force
1453 
1454        Force4:   n * dVNA/dx
1455        Force4B:  from separable VNA projectors
1456   ****************************************************/
1457 
1458   dtime(&stime);
1459 
1460   if (myid==Host_ID && 0<level_stdout){
1461     printf("  Force calculation #4\n");fflush(stdout);
1462   }
1463 
1464   if (ProExpn_VNA==0 && F_VNA_flag==1){
1465     Force4();
1466   }
1467   else if (ProExpn_VNA==1 && F_VNA_flag==1){
1468     Force4B(CDM0);
1469   }
1470 
1471   dtime(&etime);
1472   if(myid==0 && measure_time){
1473     printf("Time for force#4=%18.5f\n",etime-stime);fflush(stdout);
1474   }
1475 
1476   /****************************************************
1477                       #5 of Force
1478 
1479                Contribution from overlap
1480   ****************************************************/
1481 
1482   dtime(&stime);
1483 
1484   if (myid==Host_ID && 0<level_stdout){
1485     printf("  Force calculation #5\n");fflush(stdout);
1486   }
1487 
1488   for (Mc_AN=1; Mc_AN<=Matomnum; Mc_AN++){
1489     Fx[Mc_AN] = 0.0;
1490     Fy[Mc_AN] = 0.0;
1491     Fz[Mc_AN] = 0.0;
1492   }
1493 
1494 #pragma omp parallel shared(time_per_atom,Fx,Fy,Fz,CntOLP,OLP,Cnt_switch,EDM,SpinP_switch,Spe_Total_CNO,natn,FNAN,WhatSpecies,M2G,Matomnum) private(OMPID,Nthrds,Nprocs,Mc_AN,Stime_atom,Etime_atom,Gc_AN,Cwan,h_AN,Gh_AN,Hwan,i,j,dum,dx,dy,dz)
1495   {
1496 
1497     /* get info. on OpenMP */
1498 
1499     OMPID = omp_get_thread_num();
1500     Nthrds = omp_get_num_threads();
1501     Nprocs = omp_get_num_procs();
1502 
1503     for (Mc_AN=(OMPID*Matomnum/Nthrds+1); Mc_AN<((OMPID+1)*Matomnum/Nthrds+1); Mc_AN++){
1504 
1505       dtime(&Stime_atom);
1506 
1507       Gc_AN = M2G[Mc_AN];
1508       Cwan = WhatSpecies[Gc_AN];
1509 
1510       for (h_AN=1; h_AN<=FNAN[Gc_AN]; h_AN++){
1511 
1512 	Gh_AN = natn[Gc_AN][h_AN];
1513 	Hwan = WhatSpecies[Gh_AN];
1514 
1515 	for (i=0; i<Spe_Total_CNO[Cwan]; i++){
1516 	  for (j=0; j<Spe_Total_CNO[Hwan]; j++){
1517 
1518 	    if (SpinP_switch==0){
1519 	      dum = 2.0*EDM[0][Mc_AN][h_AN][i][j];
1520 	    }
1521 	    else if (SpinP_switch==1 || SpinP_switch==3){
1522 	      dum = EDM[0][Mc_AN][h_AN][i][j] + EDM[1][Mc_AN][h_AN][i][j];
1523 	    }
1524 
1525 	    if (Cnt_switch==0){
1526 	      dx = dum*OLP[1][Mc_AN][h_AN][i][j];
1527 	      dy = dum*OLP[2][Mc_AN][h_AN][i][j];
1528 	      dz = dum*OLP[3][Mc_AN][h_AN][i][j];
1529 	    }
1530 	    else{
1531 	      dx = dum*CntOLP[1][Mc_AN][h_AN][i][j];
1532 	      dy = dum*CntOLP[2][Mc_AN][h_AN][i][j];
1533 	      dz = dum*CntOLP[3][Mc_AN][h_AN][i][j];
1534 	    }
1535 
1536 	    Fx[Mc_AN] = Fx[Mc_AN] - 2.0*dx;
1537 	    Fy[Mc_AN] = Fy[Mc_AN] - 2.0*dy;
1538 	    Fz[Mc_AN] = Fz[Mc_AN] - 2.0*dz;
1539 
1540 	  }
1541 	}
1542       }
1543 
1544       dtime(&Etime_atom);
1545       time_per_atom[Gc_AN] += Etime_atom - Stime_atom;
1546     }
1547 
1548   } /* #pragma omp parallel */
1549 
1550   dtime(&etime);
1551   if(myid==0 && measure_time){
1552     printf("Time for force#5=%18.5f\n",etime-stime);fflush(stdout);
1553   }
1554 
1555   /****************************************************
1556                   add #5 of Force
1557   ****************************************************/
1558 
1559   for (Mc_AN=1; Mc_AN<=Matomnum; Mc_AN++){
1560 
1561     Gc_AN = M2G[Mc_AN];
1562 
1563     Gxyz[Gc_AN][17] += Fx[Mc_AN];
1564     Gxyz[Gc_AN][18] += Fy[Mc_AN];
1565     Gxyz[Gc_AN][19] += Fz[Mc_AN];
1566 
1567     if (2<=level_stdout){
1568       printf("<Force>  force(5) myid=%2d  Mc_AN=%2d Gc_AN=%2d  %15.12f %15.12f %15.12f\n",
1569              myid,Mc_AN,Gc_AN,Fx[Mc_AN],Fy[Mc_AN],Fz[Mc_AN]);fflush(stdout);
1570     }
1571   }
1572 
1573   /****************************************************************
1574    In case that the dual representation is used for evaluation of
1575    the occupation number in the LDA+U method, the following force
1576    term is added.
1577   ****************************************************************/
1578 
1579   if (   (Hub_U_switch==1 || 1<=Constraint_NCS_switch || Zeeman_NCS_switch==1 || Zeeman_NCO_switch==1)
1580 	 && (Hub_U_occupation==1 || Hub_U_occupation==2)
1581 	 && SpinP_switch!=3 ){
1582 
1583     HUx = (double***)malloc(sizeof(double**)*3);
1584     for (i=0; i<3; i++){
1585       HUx[i] = (double**)malloc(sizeof(double*)*List_YOUSO[7]);
1586       for (j=0; j<List_YOUSO[7]; j++){
1587 	HUx[i][j] = (double*)malloc(sizeof(double)*List_YOUSO[7]);
1588       }
1589     }
1590 
1591     HUy = (double***)malloc(sizeof(double**)*3);
1592     for (i=0; i<3; i++){
1593       HUy[i] = (double**)malloc(sizeof(double*)*List_YOUSO[7]);
1594       for (j=0; j<List_YOUSO[7]; j++){
1595 	HUy[i][j] = (double*)malloc(sizeof(double)*List_YOUSO[7]);
1596       }
1597     }
1598 
1599     HUz = (double***)malloc(sizeof(double**)*3);
1600     for (i=0; i<3; i++){
1601       HUz[i] = (double**)malloc(sizeof(double*)*List_YOUSO[7]);
1602       for (j=0; j<List_YOUSO[7]; j++){
1603 	HUz[i][j] = (double*)malloc(sizeof(double)*List_YOUSO[7]);
1604       }
1605     }
1606   }
1607 
1608   if ( (Hub_U_switch==1 || 1<=Constraint_NCS_switch || Zeeman_NCS_switch==1 || Zeeman_NCO_switch==1)
1609         && F_U_flag==1 && Hub_U_occupation==2){
1610 
1611     if (myid==Host_ID)  printf("  Force calculation for LDA_U with dual\n");fflush(stdout);
1612 
1613     for (Mc_AN=1; Mc_AN<=Matomnum; Mc_AN++){
1614       Fx[Mc_AN] = 0.0;
1615       Fy[Mc_AN] = 0.0;
1616       Fz[Mc_AN] = 0.0;
1617     }
1618 
1619     /****************************************************
1620       if (SpinP_switch!=3)
1621 
1622       collinear case
1623     ****************************************************/
1624 
1625     if (SpinP_switch!=3){
1626 
1627       if (SpinP_switch==0){
1628 	spinmax = 0;
1629 	dege = 2.0;
1630       }
1631       else{
1632 	spinmax = 1;
1633 	dege = 1.0;
1634       }
1635 
1636       for (Mc_AN=1; Mc_AN<=Matomnum; Mc_AN++){
1637 
1638 	dtime(&Stime_atom);
1639 
1640 	Gc_AN = M2G[Mc_AN];
1641 	Cwan = WhatSpecies[Gc_AN];
1642 
1643 	for (spin=0; spin<=spinmax; spin++){
1644 
1645 	  for (h_AN=1; h_AN<=FNAN[Gc_AN]; h_AN++){
1646 
1647 	    Gh_AN = natn[Gc_AN][h_AN];
1648 	    Mh_AN = F_G2M[Gh_AN];
1649 	    Hwan = WhatSpecies[Gh_AN];
1650 
1651 	    /* non-orbital optimization */
1652 
1653 	    if (Cnt_switch==0){
1654 
1655 	      for (i=0; i<Spe_Total_NO[Cwan]; i++){
1656 		for (j=0; j<Spe_Total_NO[Hwan]; j++){
1657 
1658 		  tmp1 = 0.0;
1659 		  tmp2 = 0.0;
1660 		  tmp3 = 0.0;
1661 
1662 		  for (k=0; k<Spe_Total_NO[Cwan]; k++){
1663 		    tmp1 += v_eff[spin][Mc_AN][i][k]*OLP[1][Mc_AN][h_AN][k][j];
1664 		    tmp2 += v_eff[spin][Mc_AN][i][k]*OLP[2][Mc_AN][h_AN][k][j];
1665 		    tmp3 += v_eff[spin][Mc_AN][i][k]*OLP[3][Mc_AN][h_AN][k][j];
1666 		  }
1667 
1668 		  for (k=0; k<Spe_Total_NO[Hwan]; k++){
1669 		    tmp1 += v_eff[spin][Mh_AN][k][j]*OLP[1][Mc_AN][h_AN][i][k];
1670 		    tmp2 += v_eff[spin][Mh_AN][k][j]*OLP[2][Mc_AN][h_AN][i][k];
1671 		    tmp3 += v_eff[spin][Mh_AN][k][j]*OLP[3][Mc_AN][h_AN][i][k];
1672 		  }
1673 
1674 		  dx = tmp1*dege*CDM[spin][Mc_AN][h_AN][i][j];
1675 		  dy = tmp2*dege*CDM[spin][Mc_AN][h_AN][i][j];
1676 		  dz = tmp3*dege*CDM[spin][Mc_AN][h_AN][i][j];
1677 
1678 		  Fx[Mc_AN] += dx;
1679 		  Fy[Mc_AN] += dy;
1680 		  Fz[Mc_AN] += dz;
1681 		}
1682 	      }
1683 	    }
1684 
1685 	    /* orbital optimization */
1686 
1687 	    else if (Cnt_switch==1){
1688 
1689 	      /* HUx, HUy, HUz for primitive orbital */
1690 
1691 	      for (i=0; i<Spe_Total_NO[Cwan]; i++){
1692 		for (j=0; j<Spe_Total_NO[Hwan]; j++){
1693 
1694 		  tmp1 = 0.0;
1695 		  tmp2 = 0.0;
1696 		  tmp3 = 0.0;
1697 
1698 		  for (k=0; k<Spe_Total_NO[Cwan]; k++){
1699 		    tmp1 += v_eff[spin][Mc_AN][i][k]*OLP[1][Mc_AN][h_AN][k][j];
1700 		    tmp2 += v_eff[spin][Mc_AN][i][k]*OLP[2][Mc_AN][h_AN][k][j];
1701 		    tmp3 += v_eff[spin][Mc_AN][i][k]*OLP[3][Mc_AN][h_AN][k][j];
1702 		  }
1703 
1704 		  for (k=0; k<Spe_Total_NO[Hwan]; k++){
1705 		    tmp1 += v_eff[spin][Mh_AN][k][j]*OLP[1][Mc_AN][h_AN][i][k];
1706 		    tmp2 += v_eff[spin][Mh_AN][k][j]*OLP[2][Mc_AN][h_AN][i][k];
1707 		    tmp3 += v_eff[spin][Mh_AN][k][j]*OLP[3][Mc_AN][h_AN][i][k];
1708 		  }
1709 
1710 		  HUx[0][i][j] = tmp1;
1711 		  HUy[0][i][j] = tmp2;
1712 		  HUz[0][i][j] = tmp3;
1713 		}
1714 	      }
1715 
1716 	      /* contract HUx, HUy, HUz */
1717 
1718 	      for (al=0; al<Spe_Total_CNO[Cwan]; al++){
1719 		for (be=0; be<Spe_Total_CNO[Hwan]; be++){
1720 
1721 		  tmp1 = 0.0;
1722 		  tmp2 = 0.0;
1723 		  tmp3 = 0.0;
1724 
1725 		  for (p=0; p<Spe_Specified_Num[Cwan][al]; p++){
1726 		    p0 = Spe_Trans_Orbital[Cwan][al][p];
1727 		    for (q=0; q<Spe_Specified_Num[Hwan][be]; q++){
1728 		      q0 = Spe_Trans_Orbital[Hwan][be][q];
1729 		      tmp0 = CntCoes[Mc_AN][al][p]*CntCoes[Mh_AN][be][q];
1730 		      tmp1 += tmp0*HUx[0][p0][q0];
1731 		      tmp2 += tmp0*HUy[0][p0][q0];
1732 		      tmp3 += tmp0*HUz[0][p0][q0];
1733 		    }
1734 		  }
1735 
1736 		  dx = tmp1*dege*CDM[spin][Mc_AN][h_AN][al][be];
1737 		  dy = tmp2*dege*CDM[spin][Mc_AN][h_AN][al][be];
1738 		  dz = tmp3*dege*CDM[spin][Mc_AN][h_AN][al][be];
1739 
1740 		  Fx[Mc_AN] += dx;
1741 		  Fy[Mc_AN] += dy;
1742 		  Fz[Mc_AN] += dz;
1743 		}
1744 	      }
1745 
1746 	    }
1747 
1748 	  }
1749 	}
1750 
1751 	dtime(&Etime_atom);
1752 	time_per_atom[Gc_AN] += Etime_atom - Stime_atom;
1753       }
1754 
1755     }
1756 
1757     /****************************************************
1758       if (SpinP_switch==3)
1759 
1760       spin non-collinear
1761     ****************************************************/
1762 
1763     else {
1764 
1765       for (Mc_AN=1; Mc_AN<=Matomnum; Mc_AN++){
1766 
1767 	dtime(&Stime_atom);
1768 
1769 	Gc_AN = M2G[Mc_AN];
1770 	Cwan = WhatSpecies[Gc_AN];
1771 
1772 	for (h_AN=1; h_AN<=FNAN[Gc_AN]; h_AN++){
1773 
1774 	  Gh_AN = natn[Gc_AN][h_AN];
1775 	  Mh_AN = F_G2M[Gh_AN];
1776 	  Hwan = WhatSpecies[Gh_AN];
1777 
1778           kl = RMI1[Mc_AN][h_AN][0];
1779 
1780 	  for (i=0; i<Spe_Total_NO[Cwan]; i++){
1781 	    for (j=0; j<Spe_Total_NO[Hwan]; j++){
1782 
1783 	      Re00x = 0.0;  Re00y = 0.0;   Re00z = 0.0;
1784 	      Re11x = 0.0;  Re11y = 0.0;   Re11z = 0.0;
1785 	      Re01x = 0.0;  Re01y = 0.0;   Re01z = 0.0;
1786 
1787 	      Im00x = 0.0;  Im00y = 0.0;   Im00z = 0.0;
1788 	      Im11x = 0.0;  Im11y = 0.0;   Im11z = 0.0;
1789 	      Im01x = 0.0;  Im01y = 0.0;   Im01z = 0.0;
1790 
1791 	      for (k=0; k<Spe_Total_NO[Cwan]; k++){
1792 
1793 		Re00x += NC_v_eff[0][0][Mc_AN][i][k].r * OLP[1][Mc_AN][h_AN][k][j];
1794 		Re00y += NC_v_eff[0][0][Mc_AN][i][k].r * OLP[2][Mc_AN][h_AN][k][j];
1795 		Re00z += NC_v_eff[0][0][Mc_AN][i][k].r * OLP[3][Mc_AN][h_AN][k][j];
1796 
1797 		Re11x += NC_v_eff[1][1][Mc_AN][i][k].r * OLP[1][Mc_AN][h_AN][k][j];
1798 		Re11y += NC_v_eff[1][1][Mc_AN][i][k].r * OLP[2][Mc_AN][h_AN][k][j];
1799 		Re11z += NC_v_eff[1][1][Mc_AN][i][k].r * OLP[3][Mc_AN][h_AN][k][j];
1800 
1801 		Re01x += NC_v_eff[0][1][Mc_AN][i][k].r * OLP[1][Mc_AN][h_AN][k][j];
1802 		Re01y += NC_v_eff[0][1][Mc_AN][i][k].r * OLP[2][Mc_AN][h_AN][k][j];
1803 		Re01z += NC_v_eff[0][1][Mc_AN][i][k].r * OLP[3][Mc_AN][h_AN][k][j];
1804 
1805 		Im00x += NC_v_eff[0][0][Mc_AN][i][k].i * OLP[1][Mc_AN][h_AN][k][j];
1806 		Im00y += NC_v_eff[0][0][Mc_AN][i][k].i * OLP[2][Mc_AN][h_AN][k][j];
1807 		Im00z += NC_v_eff[0][0][Mc_AN][i][k].i * OLP[3][Mc_AN][h_AN][k][j];
1808 
1809 		Im11x += NC_v_eff[1][1][Mc_AN][i][k].i * OLP[1][Mc_AN][h_AN][k][j];
1810 		Im11y += NC_v_eff[1][1][Mc_AN][i][k].i * OLP[2][Mc_AN][h_AN][k][j];
1811 		Im11z += NC_v_eff[1][1][Mc_AN][i][k].i * OLP[3][Mc_AN][h_AN][k][j];
1812 
1813 		Im01x += NC_v_eff[0][1][Mc_AN][i][k].i * OLP[1][Mc_AN][h_AN][k][j];
1814 		Im01y += NC_v_eff[0][1][Mc_AN][i][k].i * OLP[2][Mc_AN][h_AN][k][j];
1815 		Im01z += NC_v_eff[0][1][Mc_AN][i][k].i * OLP[3][Mc_AN][h_AN][k][j];
1816 
1817 	      }
1818 
1819 	      for (k=0; k<Spe_Total_NO[Hwan]; k++){
1820 
1821 		Re00x += NC_v_eff[0][0][Mh_AN][k][j].r * OLP[1][Mc_AN][h_AN][i][k];
1822 		Re00y += NC_v_eff[0][0][Mh_AN][k][j].r * OLP[2][Mc_AN][h_AN][i][k];
1823 		Re00z += NC_v_eff[0][0][Mh_AN][k][j].r * OLP[3][Mc_AN][h_AN][i][k];
1824 
1825 		Re11x += NC_v_eff[1][1][Mh_AN][k][j].r * OLP[1][Mc_AN][h_AN][i][k];
1826 		Re11y += NC_v_eff[1][1][Mh_AN][k][j].r * OLP[2][Mc_AN][h_AN][i][k];
1827 		Re11z += NC_v_eff[1][1][Mh_AN][k][j].r * OLP[3][Mc_AN][h_AN][i][k];
1828 
1829 		Re01x += NC_v_eff[0][1][Mh_AN][k][j].r * OLP[1][Mc_AN][h_AN][i][k];
1830 		Re01y += NC_v_eff[0][1][Mh_AN][k][j].r * OLP[2][Mc_AN][h_AN][i][k];
1831 		Re01z += NC_v_eff[0][1][Mh_AN][k][j].r * OLP[3][Mc_AN][h_AN][i][k];
1832 
1833 		Im00x += NC_v_eff[0][0][Mh_AN][k][j].i * OLP[1][Mc_AN][h_AN][i][k];
1834 		Im00y += NC_v_eff[0][0][Mh_AN][k][j].i * OLP[2][Mc_AN][h_AN][i][k];
1835 		Im00z += NC_v_eff[0][0][Mh_AN][k][j].i * OLP[3][Mc_AN][h_AN][i][k];
1836 
1837 		Im11x += NC_v_eff[1][1][Mh_AN][k][j].i * OLP[1][Mc_AN][h_AN][i][k];
1838 		Im11y += NC_v_eff[1][1][Mh_AN][k][j].i * OLP[2][Mc_AN][h_AN][i][k];
1839 		Im11z += NC_v_eff[1][1][Mh_AN][k][j].i * OLP[3][Mc_AN][h_AN][i][k];
1840 
1841 		Im01x += NC_v_eff[0][1][Mh_AN][k][j].i * OLP[1][Mc_AN][h_AN][i][k];
1842 		Im01y += NC_v_eff[0][1][Mh_AN][k][j].i * OLP[2][Mc_AN][h_AN][i][k];
1843 		Im01z += NC_v_eff[0][1][Mh_AN][k][j].i * OLP[3][Mc_AN][h_AN][i][k];
1844 
1845 	      }
1846 
1847 	      dx =      Re00x*CDM0[0][Mc_AN][h_AN][i][j]
1848                   +     Re11x*CDM0[1][Mc_AN][h_AN][i][j]
1849                   + 2.0*Re01x*CDM0[2][Mc_AN][h_AN][i][j]
1850                   -     Im00x*iDM0[0][Mc_AN][h_AN][i][j]
1851                   -     Im11x*iDM0[1][Mc_AN][h_AN][i][j]
1852 	  	  - 2.0*Im01x*CDM0[3][Mc_AN][h_AN][i][j];
1853 
1854 	      dy =      Re00y*CDM0[0][Mc_AN][h_AN][i][j]
1855                   +     Re11y*CDM0[1][Mc_AN][h_AN][i][j]
1856                   + 2.0*Re01y*CDM0[2][Mc_AN][h_AN][i][j]
1857                   -     Im00y*iDM0[0][Mc_AN][h_AN][i][j]
1858                   -     Im11y*iDM0[1][Mc_AN][h_AN][i][j]
1859 	  	  - 2.0*Im01y*CDM0[3][Mc_AN][h_AN][i][j];
1860 
1861 	      dz =      Re00z*CDM0[0][Mc_AN][h_AN][i][j]
1862                   +     Re11z*CDM0[1][Mc_AN][h_AN][i][j]
1863                   + 2.0*Re01z*CDM0[2][Mc_AN][h_AN][i][j]
1864                   -     Im00z*iDM0[0][Mc_AN][h_AN][i][j]
1865                   -     Im11z*iDM0[1][Mc_AN][h_AN][i][j]
1866 	  	  - 2.0*Im01z*CDM0[3][Mc_AN][h_AN][i][j];
1867 
1868 	      Fx[Mc_AN] += 0.5*dx;
1869 	      Fy[Mc_AN] += 0.5*dy;
1870 	      Fz[Mc_AN] += 0.5*dz;
1871 
1872 	      Re00x = 0.0;  Re00y = 0.0;   Re00z = 0.0;
1873 	      Re11x = 0.0;  Re11y = 0.0;   Re11z = 0.0;
1874 	      Re01x = 0.0;  Re01y = 0.0;   Re01z = 0.0;
1875 
1876 	      Im00x = 0.0;  Im00y = 0.0;   Im00z = 0.0;
1877 	      Im11x = 0.0;  Im11y = 0.0;   Im11z = 0.0;
1878 	      Im01x = 0.0;  Im01y = 0.0;   Im01z = 0.0;
1879 
1880 	      for (k=0; k<Spe_Total_NO[Hwan]; k++){
1881 
1882 		Re00x += NC_v_eff[0][0][Mh_AN][j][k].r * OLP[1][Mc_AN][h_AN][i][k];
1883 		Re00y += NC_v_eff[0][0][Mh_AN][j][k].r * OLP[2][Mc_AN][h_AN][i][k];
1884 		Re00z += NC_v_eff[0][0][Mh_AN][j][k].r * OLP[3][Mc_AN][h_AN][i][k];
1885 
1886 		Re11x += NC_v_eff[1][1][Mh_AN][j][k].r * OLP[1][Mc_AN][h_AN][i][k];
1887 		Re11y += NC_v_eff[1][1][Mh_AN][j][k].r * OLP[2][Mc_AN][h_AN][i][k];
1888 		Re11z += NC_v_eff[1][1][Mh_AN][j][k].r * OLP[3][Mc_AN][h_AN][i][k];
1889 
1890 		Re01x += NC_v_eff[0][1][Mh_AN][j][k].r * OLP[1][Mc_AN][h_AN][i][k];
1891 		Re01y += NC_v_eff[0][1][Mh_AN][j][k].r * OLP[2][Mc_AN][h_AN][i][k];
1892 		Re01z += NC_v_eff[0][1][Mh_AN][j][k].r * OLP[3][Mc_AN][h_AN][i][k];
1893 
1894 		Im00x += NC_v_eff[0][0][Mh_AN][j][k].i * OLP[1][Mc_AN][h_AN][i][k];
1895 		Im00y += NC_v_eff[0][0][Mh_AN][j][k].i * OLP[2][Mc_AN][h_AN][i][k];
1896 		Im00z += NC_v_eff[0][0][Mh_AN][j][k].i * OLP[3][Mc_AN][h_AN][i][k];
1897 
1898 		Im11x += NC_v_eff[1][1][Mh_AN][j][k].i * OLP[1][Mc_AN][h_AN][i][k];
1899 		Im11y += NC_v_eff[1][1][Mh_AN][j][k].i * OLP[2][Mc_AN][h_AN][i][k];
1900 		Im11z += NC_v_eff[1][1][Mh_AN][j][k].i * OLP[3][Mc_AN][h_AN][i][k];
1901 
1902 		Im01x += NC_v_eff[0][1][Mh_AN][j][k].i * OLP[1][Mc_AN][h_AN][i][k];
1903 		Im01y += NC_v_eff[0][1][Mh_AN][j][k].i * OLP[2][Mc_AN][h_AN][i][k];
1904 		Im01z += NC_v_eff[0][1][Mh_AN][j][k].i * OLP[3][Mc_AN][h_AN][i][k];
1905 
1906 	      }
1907 
1908 	      for (k=0; k<Spe_Total_NO[Cwan]; k++){
1909 
1910 		Re00x += NC_v_eff[0][0][Mc_AN][k][i].r * OLP[1][Mc_AN][h_AN][k][j];
1911 		Re00y += NC_v_eff[0][0][Mc_AN][k][i].r * OLP[2][Mc_AN][h_AN][k][j];
1912 		Re00z += NC_v_eff[0][0][Mc_AN][k][i].r * OLP[3][Mc_AN][h_AN][k][j];
1913 
1914 		Re11x += NC_v_eff[1][1][Mc_AN][k][i].r * OLP[1][Mc_AN][h_AN][k][j];
1915 		Re11y += NC_v_eff[1][1][Mc_AN][k][i].r * OLP[2][Mc_AN][h_AN][k][j];
1916 		Re11z += NC_v_eff[1][1][Mc_AN][k][i].r * OLP[3][Mc_AN][h_AN][k][j];
1917 
1918 		Re01x += NC_v_eff[0][1][Mc_AN][k][i].r * OLP[1][Mc_AN][h_AN][k][j];
1919 		Re01y += NC_v_eff[0][1][Mc_AN][k][i].r * OLP[2][Mc_AN][h_AN][k][j];
1920 		Re01z += NC_v_eff[0][1][Mc_AN][k][i].r * OLP[3][Mc_AN][h_AN][k][j];
1921 
1922 		Im00x += NC_v_eff[0][0][Mc_AN][k][i].i * OLP[1][Mc_AN][h_AN][k][j];
1923 		Im00y += NC_v_eff[0][0][Mc_AN][k][i].i * OLP[2][Mc_AN][h_AN][k][j];
1924 		Im00z += NC_v_eff[0][0][Mc_AN][k][i].i * OLP[3][Mc_AN][h_AN][k][j];
1925 
1926 		Im11x += NC_v_eff[1][1][Mc_AN][k][i].i * OLP[1][Mc_AN][h_AN][k][j];
1927 		Im11y += NC_v_eff[1][1][Mc_AN][k][i].i * OLP[2][Mc_AN][h_AN][k][j];
1928 		Im11z += NC_v_eff[1][1][Mc_AN][k][i].i * OLP[3][Mc_AN][h_AN][k][j];
1929 
1930 		Im01x += NC_v_eff[0][1][Mc_AN][k][i].i * OLP[1][Mc_AN][h_AN][k][j];
1931 		Im01y += NC_v_eff[0][1][Mc_AN][k][i].i * OLP[2][Mc_AN][h_AN][k][j];
1932 		Im01z += NC_v_eff[0][1][Mc_AN][k][i].i * OLP[3][Mc_AN][h_AN][k][j];
1933 
1934 	      }
1935 
1936 	      dx =      Re00x*CDM0[0][Mh_AN][kl][j][i]
1937                   +     Re11x*CDM0[1][Mh_AN][kl][j][i]
1938                   + 2.0*Re01x*CDM0[2][Mh_AN][kl][j][i]
1939                   -     Im00x*iDM0[0][Mh_AN][kl][j][i]
1940                   -     Im11x*iDM0[1][Mh_AN][kl][j][i]
1941 	  	  - 2.0*Im01x*CDM0[3][Mh_AN][kl][j][i];
1942 
1943 	      dy =      Re00y*CDM0[0][Mh_AN][kl][j][i]
1944                   +     Re11y*CDM0[1][Mh_AN][kl][j][i]
1945                   + 2.0*Re01y*CDM0[2][Mh_AN][kl][j][i]
1946                   -     Im00y*iDM0[0][Mh_AN][kl][j][i]
1947                   -     Im11y*iDM0[1][Mh_AN][kl][j][i]
1948 	  	  - 2.0*Im01y*CDM0[3][Mh_AN][kl][j][i];
1949 
1950 	      dz =      Re00z*CDM0[0][Mh_AN][kl][j][i]
1951                   +     Re11z*CDM0[1][Mh_AN][kl][j][i]
1952                   + 2.0*Re01z*CDM0[2][Mh_AN][kl][j][i]
1953                   -     Im00z*iDM0[0][Mh_AN][kl][j][i]
1954                   -     Im11z*iDM0[1][Mh_AN][kl][j][i]
1955 	  	  - 2.0*Im01z*CDM0[3][Mh_AN][kl][j][i];
1956 
1957 	      Fx[Mc_AN] += 0.5*dx;
1958 	      Fy[Mc_AN] += 0.5*dy;
1959 	      Fz[Mc_AN] += 0.5*dz;
1960 
1961 	    }
1962 	  }
1963 	}
1964 
1965 	dtime(&Etime_atom);
1966 	time_per_atom[Gc_AN] += Etime_atom - Stime_atom;
1967       }
1968     }
1969 
1970     /****************************************************
1971       add the contribution
1972     ****************************************************/
1973 
1974     for (Mc_AN=1; Mc_AN<=Matomnum; Mc_AN++){
1975 
1976       Gc_AN = M2G[Mc_AN];
1977 
1978       Gxyz[Gc_AN][17] += Fx[Mc_AN];
1979       Gxyz[Gc_AN][18] += Fy[Mc_AN];
1980       Gxyz[Gc_AN][19] += Fz[Mc_AN];
1981 
1982       if (2<=level_stdout){
1983 	printf("<Force>  force(LDA_U_dual) myid=%2d  Mc_AN=%2d Gc_AN=%2d  %15.12f %15.12f %15.12f\n",
1984 	       myid,Mc_AN,Gc_AN,Fx[Mc_AN],Fy[Mc_AN],Fz[Mc_AN]);fflush(stdout);
1985       }
1986     }
1987 
1988   } /* if ( (Hub_U_switch==1 || 1<=Constraint_NCS_switch || Zeeman_NCS_switch==1 || Zeeman_NCO_switch==1)
1989        && F_U_flag==1 && Hub_U_occupation==2) */
1990 
1991   /****************************************************
1992                  Force arising from HNL
1993   ****************************************************/
1994 
1995   Force_HNL(CDM0, iDM0);
1996 
1997   /****************************************************
1998    freeing of arrays:
1999   ****************************************************/
2000 
2001   if (   (Hub_U_switch==1 || 1<=Constraint_NCS_switch || Zeeman_NCS_switch==1 || Zeeman_NCO_switch==1)
2002 	 && (Hub_U_occupation==1 || Hub_U_occupation==2)
2003 	 && SpinP_switch!=3 ){
2004 
2005     for (i=0; i<3; i++){
2006       for (j=0; j<List_YOUSO[7]; j++){
2007 	free(HUx[i][j]);
2008       }
2009       free(HUx[i]);
2010     }
2011     free(HUx);
2012 
2013     for (i=0; i<3; i++){
2014       for (j=0; j<List_YOUSO[7]; j++){
2015 	free(HUy[i][j]);
2016       }
2017       free(HUy[i]);
2018     }
2019     free(HUy);
2020 
2021     for (i=0; i<3; i++){
2022       for (j=0; j<List_YOUSO[7]; j++){
2023 	free(HUz[i][j]);
2024       }
2025       free(HUz[i]);
2026     }
2027     free(HUz);
2028   }
2029 
2030   free(Fx);
2031   free(Fy);
2032   free(Fz);
2033 
2034   for (j=0; j<List_YOUSO[7]; j++){
2035     free(HVNAx[j]);
2036   }
2037   free(HVNAx);
2038 
2039   for (j=0; j<List_YOUSO[7]; j++){
2040     free(HVNAy[j]);
2041   }
2042   free(HVNAy);
2043 
2044   for (j=0; j<List_YOUSO[7]; j++){
2045     free(HVNAz[j]);
2046   }
2047   free(HVNAz);
2048 
2049   /* CDM0 */
2050   for (k=0; k<=SpinP_switch; k++){
2051     FNAN[0] = 0;
2052     for (Mc_AN=0; Mc_AN<=(Matomnum+MatomnumF); Mc_AN++){
2053 
2054       if (Mc_AN==0){
2055         Gc_AN = 0;
2056         tno0 = 1;
2057       }
2058       else{
2059         Gc_AN = F_M2G[Mc_AN];
2060         Cwan = WhatSpecies[Gc_AN];
2061         tno0 = Spe_Total_CNO[Cwan];
2062       }
2063 
2064       for (h_AN=0; h_AN<=FNAN[Gc_AN]; h_AN++){
2065 
2066         if (Mc_AN==0){
2067           tno1 = 1;
2068         }
2069         else{
2070           Gh_AN = natn[Gc_AN][h_AN];
2071           Hwan = WhatSpecies[Gh_AN];
2072           tno1 = Spe_Total_CNO[Hwan];
2073         }
2074 
2075         for (i=0; i<tno0; i++){
2076           free(CDM0[k][Mc_AN][h_AN][i]);
2077         }
2078         free(CDM0[k][Mc_AN][h_AN]);
2079       }
2080       free(CDM0[k][Mc_AN]);
2081     }
2082     free(CDM0[k]);
2083   }
2084   free(CDM0);
2085 
2086   free(Snd_CDM0_Size);
2087   free(Rcv_CDM0_Size);
2088 
2089   /* iDM0 */
2090   if ( SO_switch==1 || (Hub_U_switch==1 && SpinP_switch==3) || 1<=Constraint_NCS_switch
2091       || Zeeman_NCS_switch==1 || Zeeman_NCO_switch==1 ){
2092 
2093     for (k=0; k<2; k++){
2094 
2095       FNAN[0] = 0;
2096       for (Mc_AN=0; Mc_AN<=(Matomnum+MatomnumF); Mc_AN++){
2097 
2098 	if (Mc_AN==0){
2099 	  Gc_AN = 0;
2100 	  tno0 = 1;
2101 	}
2102 	else{
2103 	  Gc_AN = F_M2G[Mc_AN];
2104 	  Cwan = WhatSpecies[Gc_AN];
2105 	  tno0 = Spe_Total_CNO[Cwan];
2106 	}
2107 
2108 	for (h_AN=0; h_AN<=FNAN[Gc_AN]; h_AN++){
2109 
2110 	  if (Mc_AN==0){
2111 	    tno1 = 1;
2112 	  }
2113 	  else{
2114 	    Gh_AN = natn[Gc_AN][h_AN];
2115 	    Hwan = WhatSpecies[Gh_AN];
2116 	    tno1 = Spe_Total_CNO[Hwan];
2117 	  }
2118 
2119 	  for (i=0; i<tno0; i++){
2120 	    free(iDM0[k][Mc_AN][h_AN][i]);
2121 	  }
2122           free(iDM0[k][Mc_AN][h_AN]);
2123 	}
2124         free(iDM0[k][Mc_AN]);
2125       }
2126       free(iDM0[k]);
2127     }
2128     free(iDM0);
2129 
2130     free(Snd_iDM0_Size);
2131     free(Rcv_iDM0_Size);
2132   }
2133 
2134   /* for time */
2135 
2136   MPI_Barrier(mpi_comm_level1);
2137   dtime(&TEtime);
2138   time0 = TEtime - TStime;
2139 
2140   return time0;
2141 }
2142 
2143 
2144 
Force3()2145 void Force3()
2146 {
2147   /****************************************************
2148                       #3 of Force
2149 
2150                dn/dx * (VNA + dVH + Vxc)
2151            or
2152                dn/dx * (dVH + Vxc)
2153   ****************************************************/
2154   /* for OpenMP */
2155 
2156   /* MPI */
2157   int numprocs,myid;
2158   MPI_Comm_size(mpi_comm_level1,&numprocs);
2159   MPI_Comm_rank(mpi_comm_level1,&myid);
2160 
2161   /**********************************************************
2162               main loop for calculation of force #3
2163   **********************************************************/
2164   /* shared memory for force */
2165 
2166   double*** dChi0 = (double***)malloc(sizeof(double**)*Max_GridN_Atom);
2167   {
2168 	double** p2 = (double**)malloc(sizeof(double*)*Max_GridN_Atom*List_YOUSO[7]);
2169 	double* p = (double*)malloc(sizeof(double)*Max_GridN_Atom*List_YOUSO[7]*3);
2170 	int Nc;
2171 	for (Nc=0; Nc<Max_GridN_Atom; Nc++){
2172 	  dChi0[Nc] = p2;
2173 	  p2 += List_YOUSO[7];
2174 	  int i;
2175 	  for (i=0; i<List_YOUSO[7]; i++){
2176 	    dChi0[Nc][i] = p;
2177 	    p += 3;
2178 	  }
2179 	}
2180   }
2181 
2182   int gNthrds;
2183 #pragma omp parallel
2184   {
2185      gNthrds = omp_get_num_threads();
2186   }
2187 
2188   double* ai_sh_sum = (double*)malloc(sizeof(double)*3*gNthrds);
2189 
2190 
2191 #pragma omp parallel
2192   {
2193 
2194     /* get info. on OpenMP */
2195 
2196     int OMPID = omp_get_thread_num();
2197     int Nthrds = omp_get_num_threads();
2198 
2199     /* allocation of arrays */
2200 
2201     double** dorbs0 = (double**)malloc(sizeof(double*)*4);
2202     {
2203       int i;
2204       for (i=0; i<4; i++){
2205 	dorbs0[i] = (double*)malloc(sizeof(double)*List_YOUSO[7]);
2206       }
2207     }
2208     double* orbs1 = (double*)malloc(sizeof(double)*List_YOUSO[7]);
2209 
2210     double*** dDen_Grid = (double***)malloc(sizeof(double**)*Max_GridN_Atom);
2211     {
2212       double** p2 = (double**)malloc(sizeof(double*)*Max_GridN_Atom*(SpinP_switch+1));
2213       double* p = (double*)malloc(sizeof(double)*Max_GridN_Atom*(SpinP_switch+1)*3);
2214       int Nc;
2215       for (Nc=0; Nc<Max_GridN_Atom; Nc++){
2216 	dDen_Grid[Nc] = p2;
2217 	p2 += (SpinP_switch+1);
2218 	int i;
2219 	for (i=0; i<(SpinP_switch+1); i++){
2220 	  dDen_Grid[Nc][i] = p;
2221 	  p += 3;
2222 	}
2223       }
2224     }
2225     /* allocated as shared memory */
2226 /*
2227 #pragma omp master
2228     {
2229       dChi0 = (double***)malloc(sizeof(double**)*Max_GridN_Atom);
2230       {
2231 	double** p2 = (double**)malloc(sizeof(double*)*Max_GridN_Atom*List_YOUSO[7]);
2232 	double* p = (double*)malloc(sizeof(double)*Max_GridN_Atom*List_YOUSO[7]*3);
2233 	int Nc;
2234 	for (Nc=0; Nc<Max_GridN_Atom; Nc++){
2235 	  dChi0[Nc] = p2;
2236 	  p2 += List_YOUSO[7];
2237 	  int i;
2238 	  for (i=0; i<List_YOUSO[7]; i++){
2239 	    dChi0[Nc][i] = p;
2240 	    p += 3;
2241 	  }
2242 	}
2243       }
2244 
2245       ai_sh_sum = (double*)malloc(sizeof(double)*3*Nthrds);
2246     }
2247 #pragma omp barrier
2248 */
2249     int Mc_AN;
2250     for (Mc_AN = 1; Mc_AN <= Matomnum; Mc_AN++){
2251 
2252       int Gc_AN = M2G[Mc_AN];
2253       int Cwan = WhatSpecies[Gc_AN];
2254       int NO0 = Spe_Total_CNO[Cwan];
2255 
2256       /***********************************
2257   	         calc dOrb0
2258       ***********************************/
2259 
2260       int Nc;
2261 #pragma omp for
2262       for (Nc=0; Nc<GridN_Atom[Gc_AN]; Nc++){
2263 
2264 	int GNc = GridListAtom[Mc_AN][Nc];
2265 	int GRc = CellListAtom[Mc_AN][Nc];
2266 	int MNc = MGridListAtom[Mc_AN][Nc];
2267 
2268 	double Cxyz[4];
2269 	Get_Grid_XYZ(GNc,Cxyz);
2270 	double x = Cxyz[1] + atv[GRc][1];
2271 	double y = Cxyz[2] + atv[GRc][2];
2272 	double z = Cxyz[3] + atv[GRc][3];
2273 	double dx = x - Gxyz[Gc_AN][1];
2274 	double dy = y - Gxyz[Gc_AN][2];
2275 	double dz = z - Gxyz[Gc_AN][3];
2276 
2277 	if (Cnt_switch==0){
2278 	  Get_dOrbitals(Cwan,dx,dy,dz,dorbs0);
2279 	}else{
2280 	  Get_Cnt_dOrbitals(Mc_AN,dx,dy,dz,dorbs0);
2281 	}
2282 
2283 	int k;
2284 	for (k=0; k<3; k++){
2285 	  int i;
2286 	  for (i=0; i<NO0; i++){
2287 	    dChi0[Nc][i][k] = dorbs0[k+1][i];
2288 	  }
2289 	}
2290       }/* Nc */
2291 
2292       /***********************************
2293                 calc dDen_Grid
2294       ***********************************/
2295 
2296       /* initialize */
2297 
2298       /* AITUNE  this loop must not be parallelized by omp */
2299       for (Nc=0; Nc<GridN_Atom[Gc_AN]; Nc++){
2300 	int i;
2301 	for (i=0; i<=SpinP_switch; i++){
2302 	  int k;
2303 	  for (k=0; k<3; k++){
2304 	    dDen_Grid[Nc][i][k] = 0.0;
2305 	  }
2306 	}
2307       }/* Nc */
2308 
2309 
2310       int h_AN;
2311       for (h_AN=0; h_AN<=FNAN[Gc_AN]; h_AN++){
2312 
2313 	int Gh_AN = natn[Gc_AN][h_AN];
2314 	int Mh_AN = F_G2M[Gh_AN];
2315 	int Rnh = ncn[Gc_AN][h_AN];
2316 	int Hwan = WhatSpecies[Gh_AN];
2317 	int NO1 = Spe_Total_CNO[Hwan];
2318 
2319 	int Nog;
2320 #pragma omp for
2321 	for (Nog=0; Nog<NumOLG[Mc_AN][h_AN]; Nog++){
2322 
2323 	  int Nc = GListTAtoms1[Mc_AN][h_AN][Nog];
2324 	  int Nh = GListTAtoms2[Mc_AN][h_AN][Nog];
2325 
2326 	  /*
2327 	  double const * const * ai_dorbs0 = dChi0[Nc];
2328 	  */
2329 
2330 	  double** const ai_dorbs0 = dChi0[Nc];
2331 
2332 	  /* set orbs1 */
2333 
2334 	  if (G2ID[Gh_AN]==myid){
2335 	    int j;
2336 	    for (j=0; j<NO1; j++){
2337 	      orbs1[j] = Orbs_Grid[Mh_AN][Nh][j];
2338 	    }
2339 	  }
2340 	  else{
2341 	    int j;
2342 	    for (j=0; j<NO1; j++) {
2343 	      orbs1[j] = Orbs_Grid_FNAN[Mc_AN][h_AN][Nog][j];
2344 	    }
2345 	  }
2346 
2347 
2348 	  int spin;
2349 	  for (spin=0; spin<=SpinP_switch; spin++){
2350 
2351 	    double tmpx = 0.0;
2352 	    double tmpy = 0.0;
2353 	    double tmpz = 0.0;
2354 
2355 	    int i;
2356 	    for (i=0; i<NO0; i++){
2357 	      double tmp0 = 0.0;
2358 	      int j;
2359 	      for (j=0; j<NO1; j++){
2360 		tmp0 += orbs1[j]*DM[0][spin][Mc_AN][h_AN][i][j];
2361 	      }
2362 
2363 	      tmpx += ai_dorbs0[i][0]*tmp0;
2364 	      tmpy += ai_dorbs0[i][1]*tmp0;
2365 	      tmpz += ai_dorbs0[i][2]*tmp0;
2366 	    }
2367 
2368 	    /* due to difference in the definition between density matrix and density */
2369 	    /* AITUNE
2370 	       the sign of the case spin==3 is negative but the negative sign is
2371 	       cancell in the "calc force #3" section. */
2372 
2373 	    if (spin==3){
2374 	      dDen_Grid[Nc][spin][0] -= tmpx;
2375 	      dDen_Grid[Nc][spin][1] -= tmpy;
2376 	      dDen_Grid[Nc][spin][2] -= tmpz;
2377 	    }else{
2378 	      dDen_Grid[Nc][spin][0] += tmpx;
2379 	      dDen_Grid[Nc][spin][1] += tmpy;
2380 	      dDen_Grid[Nc][spin][2] += tmpz;
2381 	    }
2382 
2383 	  }/* spin */
2384 	}/* Nog */
2385       }/* h_AN */
2386 
2387       /***********************************
2388                  calc force #3
2389       ***********************************/
2390 
2391       /* spin collinear */
2392       double sumx = 0.0;
2393       double sumy = 0.0;
2394       double sumz = 0.0;
2395 
2396       if (SpinP_switch==0 || SpinP_switch==1){
2397 
2398 	int spin;
2399 	for (spin=0; spin<=SpinP_switch; spin++){
2400 	  int Nc;
2401 	  for (Nc=0; Nc<GridN_Atom[Gc_AN]; Nc++){
2402 
2403 	    int MNc = MGridListAtom[Mc_AN][Nc];
2404 
2405 	    double Vpt;
2406 	    if (0<=MNc){
2407 	      if ( E_Field_switch==1 ){
2408 
2409 		if (ProExpn_VNA==0){
2410 
2411 		  Vpt = F_dVHart_flag*dVHart_Grid[MNc]
2412 		    + F_Vxc_flag*Vxc_Grid[spin][MNc]
2413 		    + F_VNA_flag*VNA_Grid[MNc]
2414 		    + F_VEF_flag*VEF_Grid[MNc];
2415 
2416 		}else{
2417 
2418 		  Vpt = F_dVHart_flag*dVHart_Grid[MNc]
2419 		    + F_Vxc_flag*Vxc_Grid[spin][MNc]
2420 		    + F_VEF_flag*VEF_Grid[MNc];
2421 
2422 		}
2423 
2424 	      }else{
2425 		if (ProExpn_VNA==0){
2426 
2427 		  Vpt = F_dVHart_flag*dVHart_Grid[MNc]
2428 		    + F_Vxc_flag*Vxc_Grid[spin][MNc]
2429 		    + F_VNA_flag*VNA_Grid[MNc];
2430 
2431 		}else{
2432 
2433 		  Vpt = F_dVHart_flag*dVHart_Grid[MNc]
2434 		    + F_Vxc_flag*Vxc_Grid[spin][MNc];
2435 
2436 		}
2437 	      }
2438 	    }else{
2439 	      Vpt = 0.0;
2440 	    }
2441 
2442 	    sumx += dDen_Grid[Nc][spin][0]*Vpt;
2443 	    sumy += dDen_Grid[Nc][spin][1]*Vpt;
2444 	    sumz += dDen_Grid[Nc][spin][2]*Vpt;
2445 
2446 	  }
2447 	}
2448 
2449 	if (SpinP_switch==0){
2450 	  sumx = 4.0*sumx;
2451 	  sumy = 4.0*sumy;
2452 	  sumz = 4.0*sumz;
2453 	}else if (SpinP_switch==1){
2454 	  sumx = 2.0*sumx;
2455 	  sumy = 2.0*sumy;
2456 	  sumz = 2.0*sumz;
2457 	}
2458 
2459 
2460       }else if (SpinP_switch==3){
2461 
2462 	/* spin non-collinear */
2463 
2464 	int Nc;
2465 	for (Nc=0; Nc<GridN_Atom[Gc_AN]; Nc++){
2466 
2467 	  int MNc = MGridListAtom[Mc_AN][Nc];
2468 
2469 	  double ReVpt11;
2470 	  double ReVpt22;
2471 	  double ReVpt21;
2472 	  double ImVpt21;
2473 
2474 	  if (0<=MNc){
2475 	    if ( E_Field_switch==1 ){
2476 
2477 	      if (ProExpn_VNA==0){
2478 
2479 		ReVpt11 = F_dVHart_flag*dVHart_Grid[MNc]
2480 		  + F_Vxc_flag*Vxc_Grid[0][MNc]
2481 		  + F_VNA_flag*VNA_Grid[MNc]
2482 		  + F_VEF_flag*VEF_Grid[MNc];
2483 
2484 		ReVpt22 = F_dVHart_flag*dVHart_Grid[MNc]
2485 		  + F_Vxc_flag*Vxc_Grid[1][MNc]
2486 		  + F_VNA_flag*VNA_Grid[MNc]
2487 		  + F_VEF_flag*VEF_Grid[MNc];
2488 
2489 		ReVpt21 =  F_Vxc_flag*Vxc_Grid[2][MNc];
2490 		ImVpt21 = -F_Vxc_flag*Vxc_Grid[3][MNc];
2491 	      }else{
2492 
2493 		ReVpt11 = F_dVHart_flag*dVHart_Grid[MNc]
2494 		  + F_Vxc_flag*Vxc_Grid[0][MNc]
2495 		  + F_VEF_flag*VEF_Grid[MNc];
2496 
2497 		ReVpt22 = F_dVHart_flag*dVHart_Grid[MNc]
2498 		  + F_Vxc_flag*Vxc_Grid[1][MNc]
2499 		  + F_VEF_flag*VEF_Grid[MNc];
2500 
2501 		ReVpt21 =  F_Vxc_flag*Vxc_Grid[2][MNc];
2502 		ImVpt21 = -F_Vxc_flag*Vxc_Grid[3][MNc];
2503 	      }
2504 
2505 	    }else{
2506 
2507 	      if (ProExpn_VNA==0){
2508 
2509 		ReVpt11 = F_dVHart_flag*dVHart_Grid[MNc]
2510 		  + F_Vxc_flag*Vxc_Grid[0][MNc]
2511 		  + F_VNA_flag*VNA_Grid[MNc];
2512 
2513 		ReVpt22 = F_dVHart_flag*dVHart_Grid[MNc]
2514 		  + F_Vxc_flag*Vxc_Grid[1][MNc]
2515 		  + F_VNA_flag*VNA_Grid[MNc];
2516 
2517 		ReVpt21 =  F_Vxc_flag*Vxc_Grid[2][MNc];
2518 		ImVpt21 = -F_Vxc_flag*Vxc_Grid[3][MNc];
2519 
2520 	      }else{
2521 
2522 		ReVpt11 = F_dVHart_flag*dVHart_Grid[MNc] + F_Vxc_flag*Vxc_Grid[0][MNc];
2523 		ReVpt22 = F_dVHart_flag*dVHart_Grid[MNc] + F_Vxc_flag*Vxc_Grid[1][MNc];
2524 
2525 		ReVpt21 =  F_Vxc_flag*Vxc_Grid[2][MNc];
2526 		ImVpt21 = -F_Vxc_flag*Vxc_Grid[3][MNc];
2527 	      }
2528 
2529 	    }
2530 	  }else{
2531 	    ReVpt11 = 0.0;
2532 	    ReVpt22 = 0.0;
2533 	    ReVpt21 = 0.0;
2534 	    ImVpt21 = 0.0;
2535 	  }
2536 
2537 	  sumx +=      dDen_Grid[Nc][0][0]*ReVpt11;
2538 	  sumx +=      dDen_Grid[Nc][1][0]*ReVpt22;
2539 	  sumx +=  2.0*dDen_Grid[Nc][2][0]*ReVpt21;
2540 	  sumx += -2.0*dDen_Grid[Nc][3][0]*ImVpt21;
2541 	  /* AITUNE  sign is opposite by tune in the above section */
2542 
2543 
2544 	  sumy +=      dDen_Grid[Nc][0][1]*ReVpt11;
2545 	  sumy +=      dDen_Grid[Nc][1][1]*ReVpt22;
2546 	  sumy +=  2.0*dDen_Grid[Nc][2][1]*ReVpt21;
2547 	  sumy += -2.0*dDen_Grid[Nc][3][1]*ImVpt21;
2548 	  /* AITUNE  sign is opposite by tune in the above section */
2549 
2550 
2551 	  sumz +=      dDen_Grid[Nc][0][2]*ReVpt11;
2552 	  sumz +=      dDen_Grid[Nc][1][2]*ReVpt22;
2553 	  sumz +=  2.0*dDen_Grid[Nc][2][2]*ReVpt21;
2554 	  sumz += -2.0*dDen_Grid[Nc][3][2]*ImVpt21;
2555 	  /* AITUNE  sign is opposite by tune in the above section */
2556 
2557 	}
2558 
2559 	sumx = 2.0*sumx;
2560 	sumy = 2.0*sumy;
2561 	sumz = 2.0*sumz;
2562 
2563       }
2564 
2565       /* gather sumx, sumy, and sumz into Gxyz[Gc_AN][17,18,19] */
2566 
2567       ai_sh_sum[OMPID*3  ] = sumx*GridVol;
2568       ai_sh_sum[OMPID*3+1] = sumy*GridVol;
2569       ai_sh_sum[OMPID*3+2] = sumz*GridVol;
2570 
2571 #pragma omp barrier
2572 #pragma omp master
2573       {
2574 	int t;
2575 	for (t = 0; t < Nthrds*3; t+=3){
2576 	  Gxyz[Gc_AN][17] += ai_sh_sum[t];
2577 	  Gxyz[Gc_AN][18] += ai_sh_sum[t+1];
2578 	  Gxyz[Gc_AN][19] += ai_sh_sum[t+2];
2579 	}
2580       }
2581 
2582 
2583       if (2<=level_stdout){
2584 	printf("<Force>  force(3) myid=%2d  Mc_AN=%2d Gc_AN=%2d  %15.12f %15.12f %15.12f\n",
2585 	       myid,Mc_AN,Gc_AN,sumx*GridVol,sumy*GridVol,sumz*GridVol);fflush(stdout);
2586       }
2587 
2588 
2589     } /* Mc_AN */
2590 
2591     /* freeing of arrays */
2592 
2593     free(dDen_Grid[0][0]);
2594     free(dDen_Grid[0]);
2595     free(dDen_Grid);
2596 
2597     free(orbs1);
2598 
2599     int i;
2600     for (i=0; i<4; i++){
2601       free(dorbs0[i]);
2602     }
2603     free(dorbs0);
2604 
2605   } /* #pragma omp parallel */
2606 
2607   /* free */
2608   free(dChi0[0][0]);
2609   free(dChi0[0]);
2610   free(dChi0);
2611   free(ai_sh_sum);
2612 }
2613 
2614 
2615 
Force3_org3665()2616 void Force3_org3665()
2617 {
2618   /****************************************************
2619                       #3 of Force
2620 
2621                dn/dx * (VNA + dVH + Vxc)
2622            or
2623                dn/dx * (dVH + Vxc)
2624   ****************************************************/
2625 
2626   int Mc_AN,Gc_AN,Cwan,Hwan,NO0,NO1;
2627   int i,j,k,Nc,Nh,GNc,GRc,MNc,GNh,GRh;
2628   int h_AN,Gh_AN,Mh_AN,Rnh,spin,Nog;
2629   double ***dDen_Grid;
2630   double sum,tmp0,r,dx,dy,dz;
2631   double sumx,sumy,sumz;
2632   double x,y,z,x1,y1,z1,Vpt;
2633   double Cxyz[4];
2634   double **dorbs0,*orbs1,***dChi0;
2635   double ReVpt11,ReVpt22,ReVpt21,ImVpt21;
2636   int numprocs,myid,tag=999,ID,IDS,IDR;
2637   /* for OpenMP */
2638   int OMPID,Nthrds,Nprocs;
2639 
2640   /* MPI */
2641   MPI_Comm_size(mpi_comm_level1,&numprocs);
2642   MPI_Comm_rank(mpi_comm_level1,&myid);
2643 
2644   /**********************************************************
2645               main loop for calculation of force #3
2646   **********************************************************/
2647 
2648 #pragma omp parallel shared(Orbs_Grid_FNAN,G2ID,myid,level_stdout,GridVol,F_VEF_flag,VEF_Grid,F_VNA_flag,VNA_Grid,F_Vxc_flag,Vxc_Grid,dVHart_Grid,F_dVHart_flag,ProExpn_VNA,E_Field_switch,DM,Orbs_Grid,GListTAtoms2,GListTAtoms1,NumOLG,ncn,F_G2M,natn,FNAN,Max_GridN_Atom,SpinP_switch,List_YOUSO,Cnt_switch,Gxyz,atv,MGridListAtom,CellListAtom,GridListAtom,GridN_Atom,Spe_Total_CNO,WhatSpecies,M2G,Matomnum) private(OMPID,Nthrds,Nprocs,Mc_AN,Gc_AN,Cwan,NO0,Nc,GNc,GRc,MNc,Cxyz,x,y,z,dx,dy,dz,dorbs0,orbs1,dDen_Grid,dChi0,i,k,h_AN,Gh_AN,Mh_AN,Rnh,Hwan,NO1,spin,Nog,Nh,j,sum,tmp0,sumx,sumy,sumz,Vpt,ReVpt11,ReVpt22,ReVpt21,ImVpt21)
2649   {
2650 
2651     /* allocation of arrays */
2652 
2653     dorbs0 = (double**)malloc(sizeof(double*)*4);
2654     for (i=0; i<4; i++){
2655       dorbs0[i] = (double*)malloc(sizeof(double)*List_YOUSO[7]);
2656     }
2657 
2658     orbs1 = (double*)malloc(sizeof(double)*List_YOUSO[7]);
2659 
2660     dDen_Grid = (double***)malloc(sizeof(double**)*(SpinP_switch+1));
2661     for (i=0; i<(SpinP_switch+1); i++){
2662       dDen_Grid[i] = (double**)malloc(sizeof(double*)*3);
2663       for (k=0; k<3; k++){
2664 	dDen_Grid[i][k] = (double*)malloc(sizeof(double)*Max_GridN_Atom);
2665       }
2666     }
2667 
2668     dChi0 = (double***)malloc(sizeof(double**)*3);
2669     for (k=0; k<3; k++){
2670       dChi0[k] = (double**)malloc(sizeof(double*)*List_YOUSO[7]);
2671       for (i=0; i<List_YOUSO[7]; i++){
2672 	dChi0[k][i] = (double*)malloc(sizeof(double)*Max_GridN_Atom);
2673       }
2674     }
2675 
2676     /* get info. on OpenMP */
2677 
2678     OMPID = omp_get_thread_num();
2679     Nthrds = omp_get_num_threads();
2680     Nprocs = omp_get_num_procs();
2681 
2682     for (Mc_AN=(OMPID*Matomnum/Nthrds+1); Mc_AN<((OMPID+1)*Matomnum/Nthrds+1); Mc_AN++){
2683 
2684       Gc_AN = M2G[Mc_AN];
2685       Cwan = WhatSpecies[Gc_AN];
2686       NO0 = Spe_Total_CNO[Cwan];
2687 
2688       /***********************************
2689                calc dOrb0
2690       ***********************************/
2691 
2692       for (Nc=0; Nc<GridN_Atom[Gc_AN]; Nc++){
2693 
2694 	GNc = GridListAtom[Mc_AN][Nc];
2695 	GRc = CellListAtom[Mc_AN][Nc];
2696 	MNc = MGridListAtom[Mc_AN][Nc];
2697 
2698 	Get_Grid_XYZ(GNc,Cxyz);
2699 	x = Cxyz[1] + atv[GRc][1];
2700 	y = Cxyz[2] + atv[GRc][2];
2701 	z = Cxyz[3] + atv[GRc][3];
2702 	dx = x - Gxyz[Gc_AN][1];
2703 	dy = y - Gxyz[Gc_AN][2];
2704 	dz = z - Gxyz[Gc_AN][3];
2705 
2706 	if (Cnt_switch==0)
2707 	  Get_dOrbitals(Cwan,dx,dy,dz,dorbs0);
2708 	else
2709 	  Get_Cnt_dOrbitals(Mc_AN,dx,dy,dz,dorbs0);
2710 
2711 	for (k=0; k<3; k++){
2712 	  for (i=0; i<NO0; i++){
2713 	    dChi0[k][i][Nc] = dorbs0[k+1][i];
2714 	  }
2715 	}
2716       }
2717 
2718       /***********************************
2719               calc dDen_Grid
2720       ***********************************/
2721 
2722       /* initialize */
2723       for (i=0; i<=SpinP_switch; i++){
2724 	for (k=0; k<3; k++){
2725 	  for (Nc=0; Nc<GridN_Atom[Gc_AN]; Nc++){
2726 	    dDen_Grid[i][k][Nc] = 0.0;
2727 	  }
2728 	}
2729       }
2730 
2731       for (h_AN=0; h_AN<=FNAN[Gc_AN]; h_AN++){
2732 
2733 	Gh_AN = natn[Gc_AN][h_AN];
2734 	Mh_AN = F_G2M[Gh_AN];
2735 	Rnh = ncn[Gc_AN][h_AN];
2736 	Hwan = WhatSpecies[Gh_AN];
2737 	NO1 = Spe_Total_CNO[Hwan];
2738 
2739 	for (spin=0; spin<=SpinP_switch; spin++){
2740 	  for (Nog=0; Nog<NumOLG[Mc_AN][h_AN]; Nog++){
2741 
2742 	    Nc = GListTAtoms1[Mc_AN][h_AN][Nog];
2743 	    Nh = GListTAtoms2[Mc_AN][h_AN][Nog];
2744 
2745 	    for (k=0; k<3; k++){
2746 	      for (i=0; i<NO0; i++){
2747 		dorbs0[k][i] = dChi0[k][i][Nc];
2748 	      }
2749 	    }
2750 
2751 	    /* set orbs1 */
2752 
2753             if (G2ID[Gh_AN]==myid){
2754   	      for (j=0; j<NO1; j++) orbs1[j] = Orbs_Grid[Mh_AN][Nh][j];/* AITUNE */
2755 	    }
2756             else{
2757   	      for (j=0; j<NO1; j++) orbs1[j] = Orbs_Grid_FNAN[Mc_AN][h_AN][Nog][j];/* AITUNE */
2758             }
2759 
2760 	    for (k=0; k<3; k++){
2761 	      sum = 0.0;
2762 	      for (i=0; i<NO0; i++){
2763 		tmp0 = 0.0;
2764 		for (j=0; j<NO1; j++){
2765 		  tmp0 += orbs1[j]*DM[0][spin][Mc_AN][h_AN][i][j];
2766 		}
2767 		sum += dorbs0[k][i]*tmp0;
2768 	      }
2769 
2770 	      /* due to difference in the definition between density matrix and density */
2771 	      if (spin==3)
2772 		dDen_Grid[spin][k][Nc] -= sum;
2773 	      else
2774 		dDen_Grid[spin][k][Nc] += sum;
2775 
2776 	    }
2777 	  }
2778 	}
2779       }
2780 
2781       /***********************************
2782                calc force #3
2783       ***********************************/
2784 
2785       /* spin collinear */
2786 
2787       if (SpinP_switch==0 || SpinP_switch==1){
2788 
2789 	sumx = 0.0;
2790 	sumy = 0.0;
2791 	sumz = 0.0;
2792 
2793 	for (spin=0; spin<=SpinP_switch; spin++){
2794 	  for (Nc=0; Nc<GridN_Atom[Gc_AN]; Nc++){
2795 
2796 	    MNc = MGridListAtom[Mc_AN][Nc];
2797 
2798 	    if (0<=MNc){
2799 	      if ( E_Field_switch==1 ){
2800 
2801 		if (ProExpn_VNA==0){
2802 
2803 		  Vpt = F_dVHart_flag*dVHart_Grid[MNc]
2804                       + F_Vxc_flag*Vxc_Grid[spin][MNc]
2805                       + F_VNA_flag*VNA_Grid[MNc]
2806                       + F_VEF_flag*VEF_Grid[MNc];
2807 
2808 		}
2809 		else{
2810 
2811 		  Vpt = F_dVHart_flag*dVHart_Grid[MNc]
2812                       + F_Vxc_flag*Vxc_Grid[spin][MNc]
2813                       + F_VEF_flag*VEF_Grid[MNc];
2814 
2815 		}
2816 
2817 	      }
2818 	      else{
2819 		if (ProExpn_VNA==0){
2820 
2821 		  Vpt = F_dVHart_flag*dVHart_Grid[MNc]
2822                       + F_Vxc_flag*Vxc_Grid[spin][MNc]
2823                       + F_VNA_flag*VNA_Grid[MNc];
2824 
2825 		}
2826 		else{
2827 
2828 		  Vpt = F_dVHart_flag*dVHart_Grid[MNc]
2829                       + F_Vxc_flag*Vxc_Grid[spin][MNc];
2830 
2831 		}
2832 	      }
2833 	    }
2834 	    else
2835 	      Vpt = 0.0;
2836 
2837 	    sumx += dDen_Grid[spin][0][Nc]*Vpt;
2838 	    sumy += dDen_Grid[spin][1][Nc]*Vpt;
2839 	    sumz += dDen_Grid[spin][2][Nc]*Vpt;
2840 
2841 	  }
2842 	}
2843 
2844 	if (SpinP_switch==0){
2845 	  sumx = 4.0*sumx;
2846 	  sumy = 4.0*sumy;
2847 	  sumz = 4.0*sumz;
2848 	}
2849 	else if (SpinP_switch==1){
2850 	  sumx = 2.0*sumx;
2851 	  sumy = 2.0*sumy;
2852 	  sumz = 2.0*sumz;
2853 	}
2854       }
2855 
2856       /* spin non-collinear */
2857 
2858       else if (SpinP_switch==3){
2859 
2860 	sumx = 0.0;
2861 	sumy = 0.0;
2862 	sumz = 0.0;
2863 
2864 	for (Nc=0; Nc<GridN_Atom[Gc_AN]; Nc++){
2865 
2866 	  MNc = MGridListAtom[Mc_AN][Nc];
2867 
2868 	  if (0<=MNc){
2869 	    if ( E_Field_switch==1 ){
2870 
2871 	      if (ProExpn_VNA==0){
2872 
2873 		ReVpt11 = F_dVHart_flag*dVHart_Grid[MNc]
2874 		        + F_Vxc_flag*Vxc_Grid[0][MNc]
2875 		        + F_VNA_flag*VNA_Grid[MNc]
2876 		        + F_VEF_flag*VEF_Grid[MNc];
2877 
2878 		ReVpt22 = F_dVHart_flag*dVHart_Grid[MNc]
2879   		        + F_Vxc_flag*Vxc_Grid[1][MNc]
2880 		        + F_VNA_flag*VNA_Grid[MNc]
2881 		        + F_VEF_flag*VEF_Grid[MNc];
2882 
2883 		ReVpt21 =  F_Vxc_flag*Vxc_Grid[2][MNc];
2884 		ImVpt21 = -F_Vxc_flag*Vxc_Grid[3][MNc];
2885 	      }
2886 	      else{
2887 
2888 		ReVpt11 = F_dVHart_flag*dVHart_Grid[MNc]
2889 	 	        + F_Vxc_flag*Vxc_Grid[0][MNc]
2890 		        + F_VEF_flag*VEF_Grid[MNc];
2891 
2892 		ReVpt22 = F_dVHart_flag*dVHart_Grid[MNc]
2893 		        + F_Vxc_flag*Vxc_Grid[1][MNc]
2894 		        + F_VEF_flag*VEF_Grid[MNc];
2895 
2896 		ReVpt21 =  F_Vxc_flag*Vxc_Grid[2][MNc];
2897 		ImVpt21 = -F_Vxc_flag*Vxc_Grid[3][MNc];
2898 	      }
2899 
2900 	    }
2901 	    else{
2902 
2903 	      if (ProExpn_VNA==0){
2904 
2905 		ReVpt11 = F_dVHart_flag*dVHart_Grid[MNc]
2906 		        + F_Vxc_flag*Vxc_Grid[0][MNc]
2907 		        + F_VNA_flag*VNA_Grid[MNc];
2908 
2909 		ReVpt22 = F_dVHart_flag*dVHart_Grid[MNc]
2910 		        + F_Vxc_flag*Vxc_Grid[1][MNc]
2911 		        + F_VNA_flag*VNA_Grid[MNc];
2912 
2913 		ReVpt21 =  F_Vxc_flag*Vxc_Grid[2][MNc];
2914 		ImVpt21 = -F_Vxc_flag*Vxc_Grid[3][MNc];
2915 	      }
2916 	      else{
2917 
2918 		ReVpt11 = F_dVHart_flag*dVHart_Grid[MNc] + F_Vxc_flag*Vxc_Grid[0][MNc];
2919 		ReVpt22 = F_dVHart_flag*dVHart_Grid[MNc] + F_Vxc_flag*Vxc_Grid[1][MNc];
2920 
2921 		ReVpt21 =  F_Vxc_flag*Vxc_Grid[2][MNc];
2922 		ImVpt21 = -F_Vxc_flag*Vxc_Grid[3][MNc];
2923 	      }
2924 
2925 	    }
2926 	  }
2927 	  else{
2928 	    ReVpt11 = 0.0;
2929 	    ReVpt22 = 0.0;
2930 	    ReVpt21 = 0.0;
2931 	    ImVpt21 = 0.0;
2932 	  }
2933 
2934 	  sumx +=      dDen_Grid[0][0][Nc]*ReVpt11;
2935 	  sumx +=      dDen_Grid[1][0][Nc]*ReVpt22;
2936 	  sumx +=  2.0*dDen_Grid[2][0][Nc]*ReVpt21;
2937 	  sumx += -2.0*dDen_Grid[3][0][Nc]*ImVpt21;
2938 
2939 	  sumy +=      dDen_Grid[0][1][Nc]*ReVpt11;
2940 	  sumy +=      dDen_Grid[1][1][Nc]*ReVpt22;
2941 	  sumy +=  2.0*dDen_Grid[2][1][Nc]*ReVpt21;
2942 	  sumy += -2.0*dDen_Grid[3][1][Nc]*ImVpt21;
2943 
2944 	  sumz +=      dDen_Grid[0][2][Nc]*ReVpt11;
2945 	  sumz +=      dDen_Grid[1][2][Nc]*ReVpt22;
2946 	  sumz +=  2.0*dDen_Grid[2][2][Nc]*ReVpt21;
2947 	  sumz += -2.0*dDen_Grid[3][2][Nc]*ImVpt21;
2948 
2949 	}
2950 
2951 	sumx = 2.0*sumx;
2952 	sumy = 2.0*sumy;
2953 	sumz = 2.0*sumz;
2954 
2955       }
2956 
2957       Gxyz[Gc_AN][17] += sumx*GridVol;
2958       Gxyz[Gc_AN][18] += sumy*GridVol;
2959       Gxyz[Gc_AN][19] += sumz*GridVol;
2960 
2961       if (2<=level_stdout){
2962 	printf("<Force>  force(3) myid=%2d  Mc_AN=%2d Gc_AN=%2d  %15.12f %15.12f %15.12f\n",
2963 	       myid,Mc_AN,Gc_AN,sumx*GridVol,sumy*GridVol,sumz*GridVol);fflush(stdout);
2964       }
2965 
2966     } /* Mc_AN */
2967 
2968     /* freeing of arrays */
2969 
2970     for (k=0; k<3; k++){
2971       for (i=0; i<List_YOUSO[7]; i++){
2972 	free(dChi0[k][i]);
2973       }
2974       free(dChi0[k]);
2975     }
2976     free(dChi0);
2977 
2978     for (i=0; i<(SpinP_switch+1); i++){
2979       for (k=0; k<3; k++){
2980 	free(dDen_Grid[i][k]);
2981       }
2982       free(dDen_Grid[i]);
2983     }
2984     free(dDen_Grid);
2985 
2986     free(orbs1);
2987 
2988     for (i=0; i<4; i++){
2989       free(dorbs0[i]);
2990     }
2991     free(dorbs0);
2992 
2993   } /* #pragma omp parallel */
2994 
2995   /* free */
2996 
2997 }
2998 
2999 
3000 
3001 
Force4()3002 void Force4()
3003 {
3004   /****************************************************
3005                       #4 of Force
3006 
3007                       n * dVNA/dx
3008   ****************************************************/
3009 
3010   int Mc_AN,Gc_AN,Cwan,Hwan,NO0,NO1;
3011   int i,j,k,Nc,Nh,GNc,GRc,MNc;
3012   int h_AN,Gh_AN,Mh_AN,Rnh,spin,Nog;
3013   double sum,tmp0,r,dx,dy,dz;
3014   double dvx,dvy,dvz;
3015   double sumx,sumy,sumz;
3016   double x,y,z,den;
3017   double Cxyz[4];
3018 
3019   /**********************************************************
3020               main loop for calculation of force #4
3021   **********************************************************/
3022 
3023   for (Mc_AN=1; Mc_AN<=Matomnum; Mc_AN++){
3024 
3025     Gc_AN = M2G[Mc_AN];
3026     Cwan = WhatSpecies[Gc_AN];
3027     NO0 = Spe_Total_CNO[Cwan];
3028 
3029     /***********************************
3030                  summation
3031     ***********************************/
3032 
3033     sumx = 0.0;
3034     sumy = 0.0;
3035     sumz = 0.0;
3036 
3037     for (Nc=0; Nc<GridN_Atom[Gc_AN]; Nc++){
3038 
3039       GNc = GridListAtom[Mc_AN][Nc];
3040       GRc = CellListAtom[Mc_AN][Nc];
3041       MNc = MGridListAtom[Mc_AN][Nc];
3042 
3043       Get_Grid_XYZ(GNc,Cxyz);
3044       x = Cxyz[1] + atv[GRc][1];
3045       y = Cxyz[2] + atv[GRc][2];
3046       z = Cxyz[3] + atv[GRc][3];
3047       dx = Gxyz[Gc_AN][1] - x;
3048       dy = Gxyz[Gc_AN][2] - y;
3049       dz = Gxyz[Gc_AN][3] - z;
3050       r = sqrt(dx*dx + dy*dy + dz*dz);
3051 
3052       /* for empty atoms or finite elemens basis */
3053       if (r<1.0e-10) r = 1.0e-10;
3054 
3055       if (1.0e-14<r){
3056         tmp0 = Dr_VNAF(Cwan,r);
3057         dvx = tmp0*dx/r;
3058         dvy = tmp0*dy/r;
3059         dvz = tmp0*dz/r;
3060       }
3061       else{
3062         dvx = 0.0;
3063         dvy = 0.0;
3064         dvz = 0.0;
3065       }
3066 
3067       den = Density_Grid[0][MNc] + Density_Grid[1][MNc];
3068       sumx += den*dvx;
3069       sumy += den*dvy;
3070       sumz += den*dvz;
3071     }
3072 
3073     Gxyz[Gc_AN][17] += sumx*GridVol;
3074     Gxyz[Gc_AN][18] += sumy*GridVol;
3075     Gxyz[Gc_AN][19] += sumz*GridVol;
3076 
3077     /*
3078     if (2<=level_stdout){
3079       printf("<Force>  force(4) myid=%2d  Mc_AN=%2d Gc_AN=%2d  %15.12f %15.12f %15.12f\n",
3080               myid,Mc_AN,Gc_AN,sumx*GridVol,sumy*GridVol,sumz*GridVol);fflush(stdout);
3081     }
3082     */
3083 
3084   }
3085 }
3086 
3087 
3088 
3089 
3090 
Force_HNL(double ***** CDM0,double ***** iDM0)3091 void Force_HNL(double *****CDM0, double *****iDM0)
3092 {
3093   /****************************************************
3094                   Force arising from HNL
3095   ****************************************************/
3096 
3097   int Mc_AN,Gc_AN,Cwan,i,j,h_AN,q_AN,Mq_AN,start_q_AN;
3098   int jan,kl,km,kl1,Qwan,Gq_AN,Gh_AN,Mh_AN,Hwan,ian;
3099   int l1,l2,l3,l,LL,Mul1,tno0,ncp,so;
3100   int tno1,tno2,size1,size2,n,kk,num,po,po1,po2;
3101   int numprocs,myid,tag=999,ID,IDS,IDR;
3102   int **S_array,**R_array;
3103   int S_comm_flag,R_comm_flag;
3104   int SA_num,q,Sc_AN,GSc_AN,smul;
3105   int Sc_wan,Sh_AN,GSh_AN,Sh_wan;
3106   int Sh_AN2,fan,jg,j0,jg0,Mj_AN0;
3107   int Original_Mc_AN;
3108 
3109   double rcutA,rcutB,rcut;
3110   double dEx,dEy,dEz,ene,pref;
3111   double Stime_atom, Etime_atom;
3112   dcomplex ***Hx,***Hy,***Hz;
3113   dcomplex ***Hx0,***Hy0,***Hz0;
3114   dcomplex ***Hx1,***Hy1,***Hz1;
3115   int *Snd_DS_NL_Size,*Rcv_DS_NL_Size;
3116   int *Indicator;
3117   double *tmp_array;
3118   double *tmp_array2;
3119 
3120   /* for OpenMP */
3121   int OMPID,Nthrds,Nthrds0,Nprocs,Nloop,ODNloop;
3122   int *OneD2h_AN,*OneD2q_AN;
3123   double *dEx_threads;
3124   double *dEy_threads;
3125   double *dEz_threads;
3126   double stime,etime;
3127   double stime1,etime1;
3128 
3129   MPI_Status stat;
3130   MPI_Request request;
3131 
3132   /* MPI */
3133 
3134   MPI_Comm_size(mpi_comm_level1,&numprocs);
3135   MPI_Comm_rank(mpi_comm_level1,&myid);
3136 
3137   dtime(&stime);
3138 
3139   /****************************
3140        allocation of arrays
3141   *****************************/
3142 
3143   Indicator = (int*)malloc(sizeof(int)*numprocs);
3144 
3145   S_array = (int**)malloc(sizeof(int*)*numprocs);
3146   for (ID=0; ID<numprocs; ID++){
3147     S_array[ID] = (int*)malloc(sizeof(int)*3);
3148   }
3149 
3150   R_array = (int**)malloc(sizeof(int*)*numprocs);
3151   for (ID=0; ID<numprocs; ID++){
3152     R_array[ID] = (int*)malloc(sizeof(int)*3);
3153   }
3154 
3155   Snd_DS_NL_Size = (int*)malloc(sizeof(int)*numprocs);
3156   Rcv_DS_NL_Size = (int*)malloc(sizeof(int)*numprocs);
3157 
3158   /* initialize the temporal array storing the force contribution */
3159 
3160   for (Mc_AN=1; Mc_AN<=Matomnum; Mc_AN++){
3161     Gc_AN = F_M2G[Mc_AN];
3162     Gxyz[Gc_AN][41] = 0.0;
3163     Gxyz[Gc_AN][42] = 0.0;
3164     Gxyz[Gc_AN][43] = 0.0;
3165   }
3166 
3167   /*************************************************************
3168                     contraction of DS_NL
3169      Note: DS_NL is overwritten by CntDS_NL in Cont_Matrix1().
3170   *************************************************************/
3171 
3172   if (Cnt_switch==1){
3173     for (so=0; so<(SO_switch+1); so++){
3174       Cont_Matrix1(DS_NL[so][0],CntDS_NL[so][0]);
3175       Cont_Matrix1(DS_NL[so][1],CntDS_NL[so][1]);
3176       Cont_Matrix1(DS_NL[so][2],CntDS_NL[so][2]);
3177       Cont_Matrix1(DS_NL[so][3],CntDS_NL[so][3]);
3178     }
3179   }
3180 
3181   /*****************************************}**********************
3182       THE FIRST CASE:
3183       In case of I=i or I=j
3184       for d [ \sum_k <i|k>ek<k|j> ]/dRI
3185   ****************************************************************/
3186 
3187   /*******************************************************
3188    *******************************************************
3189        multiplying overlap integrals WITH COMMUNICATION
3190 
3191        In case of I=i or I=j
3192        for d [ \sum_k <i|k>ek<k|j> ]/dRI
3193    *******************************************************
3194    *******************************************************/
3195 
3196   MPI_Barrier(mpi_comm_level1);
3197   dtime(&stime);
3198 
3199   Hx0 = (dcomplex***)malloc(sizeof(dcomplex**)*3);
3200   for (i=0; i<3; i++){
3201     Hx0[i] = (dcomplex**)malloc(sizeof(dcomplex*)*List_YOUSO[7]);
3202     for (j=0; j<List_YOUSO[7]; j++){
3203       Hx0[i][j] = (dcomplex*)malloc(sizeof(dcomplex)*List_YOUSO[7]);
3204     }
3205   }
3206 
3207   Hy0 = (dcomplex***)malloc(sizeof(dcomplex**)*3);
3208   for (i=0; i<3; i++){
3209     Hy0[i] = (dcomplex**)malloc(sizeof(dcomplex*)*List_YOUSO[7]);
3210     for (j=0; j<List_YOUSO[7]; j++){
3211       Hy0[i][j] = (dcomplex*)malloc(sizeof(dcomplex)*List_YOUSO[7]);
3212     }
3213   }
3214 
3215   Hz0 = (dcomplex***)malloc(sizeof(dcomplex**)*3);
3216   for (i=0; i<3; i++){
3217     Hz0[i] = (dcomplex**)malloc(sizeof(dcomplex*)*List_YOUSO[7]);
3218     for (j=0; j<List_YOUSO[7]; j++){
3219       Hz0[i][j] = (dcomplex*)malloc(sizeof(dcomplex)*List_YOUSO[7]);
3220     }
3221   }
3222 
3223   Hx1 = (dcomplex***)malloc(sizeof(dcomplex**)*3);
3224   for (i=0; i<3; i++){
3225     Hx1[i] = (dcomplex**)malloc(sizeof(dcomplex*)*List_YOUSO[7]);
3226     for (j=0; j<List_YOUSO[7]; j++){
3227       Hx1[i][j] = (dcomplex*)malloc(sizeof(dcomplex)*List_YOUSO[7]);
3228     }
3229   }
3230 
3231   Hy1 = (dcomplex***)malloc(sizeof(dcomplex**)*3);
3232   for (i=0; i<3; i++){
3233     Hy1[i] = (dcomplex**)malloc(sizeof(dcomplex*)*List_YOUSO[7]);
3234     for (j=0; j<List_YOUSO[7]; j++){
3235       Hy1[i][j] = (dcomplex*)malloc(sizeof(dcomplex)*List_YOUSO[7]);
3236     }
3237   }
3238 
3239   Hz1 = (dcomplex***)malloc(sizeof(dcomplex**)*3);
3240   for (i=0; i<3; i++){
3241     Hz1[i] = (dcomplex**)malloc(sizeof(dcomplex*)*List_YOUSO[7]);
3242     for (j=0; j<List_YOUSO[7]; j++){
3243       Hz1[i][j] = (dcomplex*)malloc(sizeof(dcomplex)*List_YOUSO[7]);
3244     }
3245   }
3246 
3247   for (ID=0; ID<numprocs; ID++){
3248     F_Snd_Num_WK[ID] = 0;
3249     F_Rcv_Num_WK[ID] = 0;
3250   }
3251 
3252   do {
3253 
3254     /***********************************
3255             set the size of data
3256     ************************************/
3257 
3258     for (ID=0; ID<numprocs; ID++){
3259 
3260       IDS = (myid + ID) % numprocs;
3261       IDR = (myid - ID + numprocs) % numprocs;
3262 
3263       /* find the data size to send the block data */
3264 
3265       if ( 0<(F_Snd_Num[IDS]-F_Snd_Num_WK[IDS]) ){
3266 
3267 	size1 = 0;
3268 	n = F_Snd_Num_WK[IDS];
3269 
3270 	Mc_AN = Snd_MAN[IDS][n];
3271 	Gc_AN = Snd_GAN[IDS][n];
3272 	Cwan = WhatSpecies[Gc_AN];
3273 	tno1 = Spe_Total_NO[Cwan];
3274 
3275 	for (h_AN=0; h_AN<=FNAN[Gc_AN]; h_AN++){
3276 	  Gh_AN = natn[Gc_AN][h_AN];
3277 	  Hwan = WhatSpecies[Gh_AN];
3278 	  tno2 = Spe_Total_VPS_Pro[Hwan];
3279 	  size1 += (VPS_j_dependency[Hwan]+1)*tno1*tno2;
3280 	}
3281 
3282 	Snd_DS_NL_Size[IDS] = size1;
3283 	MPI_Isend(&size1, 1, MPI_INT, IDS, tag, mpi_comm_level1, &request);
3284       }
3285       else{
3286 	Snd_DS_NL_Size[IDS] = 0;
3287       }
3288 
3289       /* receiving of the size of the data */
3290 
3291       if ( 0<(F_Rcv_Num[IDR]-F_Rcv_Num_WK[IDR]) ){
3292 	MPI_Recv(&size2, 1, MPI_INT, IDR, tag, mpi_comm_level1, &stat);
3293 	Rcv_DS_NL_Size[IDR] = size2;
3294       }
3295       else{
3296 	Rcv_DS_NL_Size[IDR] = 0;
3297       }
3298 
3299       if ( 0<(F_Snd_Num[IDS]-F_Snd_Num_WK[IDS]) )  MPI_Wait(&request,&stat);
3300 
3301     } /* ID */
3302 
3303     /***********************************
3304                data transfer
3305     ************************************/
3306 
3307     for (ID=0; ID<numprocs; ID++){
3308 
3309       IDS = (myid + ID) % numprocs;
3310       IDR = (myid - ID + numprocs) % numprocs;
3311 
3312       /******************************
3313             sending of the data
3314       ******************************/
3315 
3316       if ( 0<(F_Snd_Num[IDS]-F_Snd_Num_WK[IDS]) ){
3317 
3318 	size1 = Snd_DS_NL_Size[IDS];
3319 
3320 	/* allocation of the array */
3321 
3322 	tmp_array = (double*)malloc(sizeof(double)*size1);
3323 
3324 	/* multidimentional array to the vector array */
3325 
3326 	num = 0;
3327 	n = F_Snd_Num_WK[IDS];
3328 
3329 	Mc_AN = Snd_MAN[IDS][n];
3330 	Gc_AN = Snd_GAN[IDS][n];
3331 	Cwan = WhatSpecies[Gc_AN];
3332 	tno1 = Spe_Total_NO[Cwan];
3333 
3334 	for (h_AN=0; h_AN<=FNAN[Gc_AN]; h_AN++){
3335 	  Gh_AN = natn[Gc_AN][h_AN];
3336 	  Hwan = WhatSpecies[Gh_AN];
3337 	  tno2 = Spe_Total_VPS_Pro[Hwan];
3338 
3339 	  for (so=0; so<=VPS_j_dependency[Hwan]; so++){
3340 	    for (i=0; i<tno1; i++){
3341 	      for (j=0; j<tno2; j++){
3342 		tmp_array[num] = DS_NL[so][0][Mc_AN][h_AN][i][j];
3343 		num++;
3344 	      }
3345 	    }
3346 	  }
3347 	}
3348 
3349 	MPI_Isend(&tmp_array[0], size1, MPI_DOUBLE, IDS, tag, mpi_comm_level1, &request);
3350       }
3351 
3352       /******************************
3353         receiving of the block data
3354       ******************************/
3355 
3356       if ( 0<(F_Rcv_Num[IDR]-F_Rcv_Num_WK[IDR]) ){
3357 
3358 	size2 = Rcv_DS_NL_Size[IDR];
3359 	tmp_array2 = (double*)malloc(sizeof(double)*size2);
3360 	MPI_Recv(&tmp_array2[0], size2, MPI_DOUBLE, IDR, tag, mpi_comm_level1, &stat);
3361 
3362 	/* store */
3363 
3364 	num = 0;
3365 	n = F_Rcv_Num_WK[IDR];
3366 	Original_Mc_AN = F_TopMAN[IDR] + n;
3367 
3368 	Gc_AN = Rcv_GAN[IDR][n];
3369 	Cwan = WhatSpecies[Gc_AN];
3370 	tno1 = Spe_Total_NO[Cwan];
3371 	for (h_AN=0; h_AN<=FNAN[Gc_AN]; h_AN++){
3372 	  Gh_AN = natn[Gc_AN][h_AN];
3373 	  Hwan = WhatSpecies[Gh_AN];
3374 	  tno2 = Spe_Total_VPS_Pro[Hwan];
3375 
3376 	  for (so=0; so<=VPS_j_dependency[Hwan]; so++){
3377 	    for (i=0; i<tno1; i++){
3378 	      for (j=0; j<tno2; j++){
3379 		DS_NL[so][0][Matomnum+1][h_AN][i][j] = tmp_array2[num];
3380 		num++;
3381 	      }
3382 	    }
3383 	  }
3384 	}
3385 
3386 	/* free tmp_array2 */
3387 	free(tmp_array2);
3388 
3389 	/*****************************************************************
3390                            multiplying overlap integrals
3391 	*****************************************************************/
3392 
3393 	for (Mc_AN=1; Mc_AN<=Matomnum; Mc_AN++){
3394 
3395 	  dtime(&Stime_atom);
3396 
3397 	  dEx = 0.0;
3398 	  dEy = 0.0;
3399 	  dEz = 0.0;
3400 
3401 	  Gc_AN = M2G[Mc_AN];
3402 	  Cwan = WhatSpecies[Gc_AN];
3403 	  fan = FNAN[Gc_AN];
3404 
3405 	  h_AN = 0;
3406 	  Gh_AN = natn[Gc_AN][h_AN];
3407 	  Mh_AN = F_G2M[Gh_AN];
3408 	  Hwan = WhatSpecies[Gh_AN];
3409 	  ian = Spe_Total_CNO[Hwan];
3410 
3411 	  n = F_Rcv_Num_WK[IDR];
3412 	  jg = Rcv_GAN[IDR][n];
3413 
3414 	  for (j0=0; j0<=fan; j0++){
3415 
3416 	    jg0 = natn[Gc_AN][j0];
3417 	    Mj_AN0 = F_G2M[jg0];
3418 
3419 	    po2 = 0;
3420 	    if (Original_Mc_AN==Mj_AN0){
3421 	      po2 = 1;
3422 	      q_AN = j0;
3423 	    }
3424 
3425 	    if (po2==1){
3426 
3427 	      Gq_AN = natn[Gc_AN][q_AN];
3428 	      Mq_AN = F_G2M[Gq_AN];
3429 	      Qwan = WhatSpecies[Gq_AN];
3430 	      jan = Spe_Total_CNO[Qwan];
3431 	      kl = RMI1[Mc_AN][h_AN][q_AN];
3432 
3433  	      dHNL(0,Mc_AN,h_AN,q_AN,DS_NL,Hx0,Hy0,Hz0);
3434 
3435 	      /* contribution of force = Trace(CDM0*dH) */
3436 	      /* spin non-polarization */
3437 
3438 	      if (SpinP_switch==0){
3439 
3440                 if (q_AN==h_AN) pref = 2.0;
3441                 else            pref = 4.0;
3442 
3443 		for (i=0; i<ian; i++){
3444 		  for (j=0; j<jan; j++){
3445 
3446 		    dEx += pref*CDM0[0][Mh_AN][kl][i][j]*Hx0[0][i][j].r;
3447 		    dEy += pref*CDM0[0][Mh_AN][kl][i][j]*Hy0[0][i][j].r;
3448 		    dEz += pref*CDM0[0][Mh_AN][kl][i][j]*Hz0[0][i][j].r;
3449 		  }
3450 		}
3451 	      }
3452 
3453 	      /* collinear spin polarized or non-colliear without SO and LDA+U */
3454 
3455 	      else if (SpinP_switch==1 || (SpinP_switch==3 && SO_switch==0 && Hub_U_switch==0
3456 		   && Constraint_NCS_switch==0 && Zeeman_NCS_switch==0 && Zeeman_NCO_switch==0)){
3457 
3458 		if (q_AN==h_AN) pref = 1.0;
3459 		else            pref = 2.0;
3460 
3461 		for (i=0; i<Spe_Total_CNO[Hwan]; i++){
3462 		  for (j=0; j<Spe_Total_CNO[Qwan]; j++){
3463 
3464 		    dEx += pref*(  CDM0[0][Mh_AN][kl][i][j]*Hx0[0][i][j].r
3465 			         + CDM0[1][Mh_AN][kl][i][j]*Hx0[1][i][j].r);
3466 		    dEy += pref*(  CDM0[0][Mh_AN][kl][i][j]*Hy0[0][i][j].r
3467 				 + CDM0[1][Mh_AN][kl][i][j]*Hy0[1][i][j].r);
3468 		    dEz += pref*(  CDM0[0][Mh_AN][kl][i][j]*Hz0[0][i][j].r
3469 				 + CDM0[1][Mh_AN][kl][i][j]*Hz0[1][i][j].r);
3470 		  }
3471 		}
3472 	      }
3473 
3474 	      /* spin non-collinear with spin-orbit coupling or with LDA+U */
3475 
3476 	      else if ( SpinP_switch==3 && (SO_switch==1 || (Hub_U_switch==1 && F_U_flag==1)
3477                      || 1<=Constraint_NCS_switch || Zeeman_NCS_switch==1 || Zeeman_NCO_switch==1)){
3478 
3479                 if (q_AN==h_AN){
3480 
3481 		  for (i=0; i<Spe_Total_CNO[Hwan]; i++){
3482 		    for (j=0; j<Spe_Total_CNO[Qwan]; j++){
3483 
3484 		      dEx +=
3485 			  CDM0[0][Mh_AN][kl][i][j]*Hx0[0][i][j].r
3486 			- iDM0[0][Mh_AN][kl][i][j]*Hx0[0][i][j].i
3487 			+ CDM0[1][Mh_AN][kl][i][j]*Hx0[1][i][j].r
3488 			- iDM0[1][Mh_AN][kl][i][j]*Hx0[1][i][j].i
3489 		    + 2.0*CDM0[2][Mh_AN][kl][i][j]*Hx0[2][i][j].r
3490 		    - 2.0*CDM0[3][Mh_AN][kl][i][j]*Hx0[2][i][j].i;
3491 
3492 		      dEy +=
3493 			  CDM0[0][Mh_AN][kl][i][j]*Hy0[0][i][j].r
3494 			- iDM0[0][Mh_AN][kl][i][j]*Hy0[0][i][j].i
3495 			+ CDM0[1][Mh_AN][kl][i][j]*Hy0[1][i][j].r
3496 			- iDM0[1][Mh_AN][kl][i][j]*Hy0[1][i][j].i
3497 		    + 2.0*CDM0[2][Mh_AN][kl][i][j]*Hy0[2][i][j].r
3498 		    - 2.0*CDM0[3][Mh_AN][kl][i][j]*Hy0[2][i][j].i;
3499 
3500 		      dEz +=
3501 			  CDM0[0][Mh_AN][kl][i][j]*Hz0[0][i][j].r
3502 			- iDM0[0][Mh_AN][kl][i][j]*Hz0[0][i][j].i
3503 			+ CDM0[1][Mh_AN][kl][i][j]*Hz0[1][i][j].r
3504 			- iDM0[1][Mh_AN][kl][i][j]*Hz0[1][i][j].i
3505 		    + 2.0*CDM0[2][Mh_AN][kl][i][j]*Hz0[2][i][j].r
3506 		    - 2.0*CDM0[3][Mh_AN][kl][i][j]*Hz0[2][i][j].i;
3507 
3508 		    }
3509 		  }
3510 		}
3511 
3512                 else {
3513 
3514 		  for (i=0; i<Spe_Total_CNO[Hwan]; i++){  /* Hwan */
3515 		    for (j=0; j<Spe_Total_CNO[Qwan]; j++){ /* Qwan  */
3516 
3517 		      dEx +=
3518 			  CDM0[0][Mh_AN][kl ][i][j]*Hx0[0][i][j].r
3519 			- iDM0[0][Mh_AN][kl ][i][j]*Hx0[0][i][j].i
3520 			+ CDM0[1][Mh_AN][kl ][i][j]*Hx0[1][i][j].r
3521 			- iDM0[1][Mh_AN][kl ][i][j]*Hx0[1][i][j].i
3522 		    + 2.0*CDM0[2][Mh_AN][kl ][i][j]*Hx0[2][i][j].r
3523 		    - 2.0*CDM0[3][Mh_AN][kl ][i][j]*Hx0[2][i][j].i;
3524 
3525 		      dEy +=
3526 			  CDM0[0][Mh_AN][kl ][i][j]*Hy0[0][i][j].r
3527 			- iDM0[0][Mh_AN][kl ][i][j]*Hy0[0][i][j].i
3528 			+ CDM0[1][Mh_AN][kl ][i][j]*Hy0[1][i][j].r
3529 			- iDM0[1][Mh_AN][kl ][i][j]*Hy0[1][i][j].i
3530 	 	    + 2.0*CDM0[2][Mh_AN][kl ][i][j]*Hy0[2][i][j].r
3531 		    - 2.0*CDM0[3][Mh_AN][kl ][i][j]*Hy0[2][i][j].i;
3532 
3533 		      dEz +=
3534 			  CDM0[0][Mh_AN][kl ][i][j]*Hz0[0][i][j].r
3535 			- iDM0[0][Mh_AN][kl ][i][j]*Hz0[0][i][j].i
3536 			+ CDM0[1][Mh_AN][kl ][i][j]*Hz0[1][i][j].r
3537 			- iDM0[1][Mh_AN][kl ][i][j]*Hz0[1][i][j].i
3538 	 	    + 2.0*CDM0[2][Mh_AN][kl ][i][j]*Hz0[2][i][j].r
3539 		    - 2.0*CDM0[3][Mh_AN][kl ][i][j]*Hz0[2][i][j].i;
3540 
3541 		    } /* j */
3542 		  } /* i */
3543 
3544 		  dHNL(0,Mc_AN,q_AN,h_AN,DS_NL,Hx1,Hy1,Hz1);
3545 		  kl1 = RMI1[Mc_AN][q_AN][h_AN];
3546 
3547 		  for (i=0; i<Spe_Total_CNO[Qwan]; i++){ /* Qwan */
3548 		    for (j=0; j<Spe_Total_CNO[Hwan]; j++){ /* Hwan */
3549 
3550 		      dEx +=
3551 			  CDM0[0][Mq_AN][kl1][i][j]*Hx1[0][i][j].r
3552 			- iDM0[0][Mq_AN][kl1][i][j]*Hx1[0][i][j].i
3553 			+ CDM0[1][Mq_AN][kl1][i][j]*Hx1[1][i][j].r
3554 			- iDM0[1][Mq_AN][kl1][i][j]*Hx1[1][i][j].i
3555 		    + 2.0*CDM0[2][Mq_AN][kl1][i][j]*Hx1[2][i][j].r
3556 		    - 2.0*CDM0[3][Mq_AN][kl1][i][j]*Hx1[2][i][j].i;
3557 
3558 		      dEy +=
3559 			  CDM0[0][Mq_AN][kl1][i][j]*Hy1[0][i][j].r
3560 			- iDM0[0][Mq_AN][kl1][i][j]*Hy1[0][i][j].i
3561 			+ CDM0[1][Mq_AN][kl1][i][j]*Hy1[1][i][j].r
3562 			- iDM0[1][Mq_AN][kl1][i][j]*Hy1[1][i][j].i
3563 		    + 2.0*CDM0[2][Mq_AN][kl1][i][j]*Hy1[2][i][j].r
3564 		    - 2.0*CDM0[3][Mq_AN][kl1][i][j]*Hy1[2][i][j].i;
3565 
3566 		      dEz +=
3567 			  CDM0[0][Mq_AN][kl1][i][j]*Hz1[0][i][j].r
3568 			- iDM0[0][Mq_AN][kl1][i][j]*Hz1[0][i][j].i
3569 			+ CDM0[1][Mq_AN][kl1][i][j]*Hz1[1][i][j].r
3570 			- iDM0[1][Mq_AN][kl1][i][j]*Hz1[1][i][j].i
3571 		    + 2.0*CDM0[2][Mq_AN][kl1][i][j]*Hz1[2][i][j].r
3572 		    - 2.0*CDM0[3][Mq_AN][kl1][i][j]*Hz1[2][i][j].i;
3573 
3574 		    } /* j */
3575 		  } /* i */
3576 
3577                 }
3578 	      }
3579 
3580 	    } /* if (po2==1) */
3581 	  } /* j0 */
3582 
3583 	  /* force from #4B */
3584 
3585 	  Gxyz[Gc_AN][41] += dEx;
3586 	  Gxyz[Gc_AN][42] += dEy;
3587 	  Gxyz[Gc_AN][43] += dEz;
3588 
3589 	  /* timing */
3590 	  dtime(&Etime_atom);
3591 	  time_per_atom[Gc_AN] += Etime_atom - Stime_atom;
3592 
3593 	} /* Mc_AN */
3594 
3595 	  /********************************************
3596             increment of F_Rcv_Num_WK[IDR]
3597 	  ********************************************/
3598 
3599 	F_Rcv_Num_WK[IDR]++;
3600 
3601       } /* if ( 0<(F_Snd_Num[IDS]-F_Snd_Num_WK[IDS]) ) */
3602 
3603       if ( 0<(F_Snd_Num[IDS]-F_Snd_Num_WK[IDS]) ) {
3604 
3605 	MPI_Wait(&request,&stat);
3606 	free(tmp_array);  /* freeing of array */
3607 
3608 	/********************************************
3609              increment of F_Snd_Num_WK[IDS]
3610 	********************************************/
3611 
3612 	F_Snd_Num_WK[IDS]++;
3613       }
3614 
3615     } /* ID */
3616 
3617     /*****************************************************
3618       check whether all the communications have finished
3619     *****************************************************/
3620 
3621     po = 0;
3622     for (ID=0; ID<numprocs; ID++){
3623 
3624       IDS = (myid + ID) % numprocs;
3625       IDR = (myid - ID + numprocs) % numprocs;
3626 
3627       if ( 0<(F_Snd_Num[IDS]-F_Snd_Num_WK[IDS]) ) po += F_Snd_Num[IDS]-F_Snd_Num_WK[IDS];
3628       if ( 0<(F_Rcv_Num[IDR]-F_Rcv_Num_WK[IDR]) ) po += F_Rcv_Num[IDR]-F_Rcv_Num_WK[IDR];
3629     }
3630 
3631   } while (po!=0);
3632 
3633   for (i=0; i<3; i++){
3634     for (j=0; j<List_YOUSO[7]; j++){
3635       free(Hx0[i][j]);
3636     }
3637     free(Hx0[i]);
3638   }
3639   free(Hx0);
3640 
3641   for (i=0; i<3; i++){
3642     for (j=0; j<List_YOUSO[7]; j++){
3643       free(Hy0[i][j]);
3644     }
3645     free(Hy0[i]);
3646   }
3647   free(Hy0);
3648 
3649   for (i=0; i<3; i++){
3650     for (j=0; j<List_YOUSO[7]; j++){
3651       free(Hz0[i][j]);
3652     }
3653     free(Hz0[i]);
3654   }
3655   free(Hz0);
3656 
3657   for (i=0; i<3; i++){
3658     for (j=0; j<List_YOUSO[7]; j++){
3659       free(Hx1[i][j]);
3660     }
3661     free(Hx1[i]);
3662   }
3663   free(Hx1);
3664 
3665   for (i=0; i<3; i++){
3666     for (j=0; j<List_YOUSO[7]; j++){
3667       free(Hy1[i][j]);
3668     }
3669     free(Hy1[i]);
3670   }
3671   free(Hy1);
3672 
3673   for (i=0; i<3; i++){
3674     for (j=0; j<List_YOUSO[7]; j++){
3675       free(Hz1[i][j]);
3676     }
3677     free(Hz1[i]);
3678   }
3679   free(Hz1);
3680 
3681   dtime(&etime);
3682   if(myid==0 && measure_time){
3683     printf("Time for part1 of force_NL=%18.5f\n",etime-stime);fflush(stdout);
3684   }
3685 
3686   for (Mc_AN=1; Mc_AN<=Matomnum; Mc_AN++){
3687     Gc_AN = M2G[Mc_AN];
3688 
3689     if (2<=level_stdout){
3690       printf("<Force>  force(HNL1) myid=%2d  Mc_AN=%2d Gc_AN=%2d  %15.12f %15.12f %15.12f\n",
3691 	     myid,Mc_AN,Gc_AN,Gxyz[Gc_AN][41],Gxyz[Gc_AN][42],Gxyz[Gc_AN][43]);fflush(stdout);
3692     }
3693   }
3694 
3695   /*******************************************************
3696    *******************************************************
3697      THE FIRST CASE:
3698      multiplying overlap integrals WITHOUT COMMUNICATION
3699 
3700      In case of I=i or I=j
3701      for d [ \sum_k <i|k>ek<k|j> ]/dRI
3702    *******************************************************
3703    *******************************************************/
3704 
3705   dtime(&stime);
3706 
3707 #pragma omp parallel shared(time_per_atom,Gxyz,CDM0,SpinP_switch,SO_switch,Hub_U_switch,F_U_flag,Constraint_NCS_switch,Zeeman_NCS_switch,Zeeman_NCO_switch,DS_NL,RMI1,FNAN,Spe_Total_CNO,WhatSpecies,F_G2M,natn,M2G,Matomnum,List_YOUSO,F_NL_flag) private(Hx0,Hy0,Hz0,Hx1,Hy1,Hz1,OMPID,Nthrds,Nprocs,Mc_AN,Stime_atom,Etime_atom,dEx,dEy,dEz,Gc_AN,h_AN,Gh_AN,Mh_AN,Hwan,ian,q_AN,Gq_AN,Mq_AN,Qwan,jan,kl,kl1,i,j,kk,pref)
3708   {
3709 
3710     /* allocation of array */
3711 
3712     Hx0 = (dcomplex***)malloc(sizeof(dcomplex**)*3);
3713     for (i=0; i<3; i++){
3714       Hx0[i] = (dcomplex**)malloc(sizeof(dcomplex*)*List_YOUSO[7]);
3715       for (j=0; j<List_YOUSO[7]; j++){
3716 	Hx0[i][j] = (dcomplex*)malloc(sizeof(dcomplex)*List_YOUSO[7]);
3717       }
3718     }
3719 
3720     Hy0 = (dcomplex***)malloc(sizeof(dcomplex**)*3);
3721     for (i=0; i<3; i++){
3722       Hy0[i] = (dcomplex**)malloc(sizeof(dcomplex*)*List_YOUSO[7]);
3723       for (j=0; j<List_YOUSO[7]; j++){
3724 	Hy0[i][j] = (dcomplex*)malloc(sizeof(dcomplex)*List_YOUSO[7]);
3725       }
3726     }
3727 
3728     Hz0 = (dcomplex***)malloc(sizeof(dcomplex**)*3);
3729     for (i=0; i<3; i++){
3730       Hz0[i] = (dcomplex**)malloc(sizeof(dcomplex*)*List_YOUSO[7]);
3731       for (j=0; j<List_YOUSO[7]; j++){
3732 	Hz0[i][j] = (dcomplex*)malloc(sizeof(dcomplex)*List_YOUSO[7]);
3733       }
3734     }
3735 
3736     Hx1 = (dcomplex***)malloc(sizeof(dcomplex**)*3);
3737     for (i=0; i<3; i++){
3738       Hx1[i] = (dcomplex**)malloc(sizeof(dcomplex*)*List_YOUSO[7]);
3739       for (j=0; j<List_YOUSO[7]; j++){
3740 	Hx1[i][j] = (dcomplex*)malloc(sizeof(dcomplex)*List_YOUSO[7]);
3741       }
3742     }
3743 
3744     Hy1 = (dcomplex***)malloc(sizeof(dcomplex**)*3);
3745     for (i=0; i<3; i++){
3746       Hy1[i] = (dcomplex**)malloc(sizeof(dcomplex*)*List_YOUSO[7]);
3747       for (j=0; j<List_YOUSO[7]; j++){
3748 	Hy1[i][j] = (dcomplex*)malloc(sizeof(dcomplex)*List_YOUSO[7]);
3749       }
3750     }
3751 
3752     Hz1 = (dcomplex***)malloc(sizeof(dcomplex**)*3);
3753     for (i=0; i<3; i++){
3754       Hz1[i] = (dcomplex**)malloc(sizeof(dcomplex*)*List_YOUSO[7]);
3755       for (j=0; j<List_YOUSO[7]; j++){
3756 	Hz1[i][j] = (dcomplex*)malloc(sizeof(dcomplex)*List_YOUSO[7]);
3757       }
3758     }
3759 
3760     /* get info. on OpenMP */
3761 
3762     OMPID = omp_get_thread_num();
3763     Nthrds = omp_get_num_threads();
3764     Nprocs = omp_get_num_procs();
3765 
3766     for (Mc_AN=(OMPID*Matomnum/Nthrds+1); Mc_AN<((OMPID+1)*Matomnum/Nthrds+1); Mc_AN++){
3767 
3768       dtime(&Stime_atom);
3769 
3770       dEx = 0.0;
3771       dEy = 0.0;
3772       dEz = 0.0;
3773 
3774       Gc_AN = M2G[Mc_AN];
3775       h_AN = 0;
3776       Gh_AN = natn[Gc_AN][h_AN];
3777       Mh_AN = F_G2M[Gh_AN];
3778       Hwan = WhatSpecies[Gh_AN];
3779       ian = Spe_Total_CNO[Hwan];
3780 
3781       for (q_AN=0; q_AN<=FNAN[Gc_AN]; q_AN++){
3782 
3783 	Gq_AN = natn[Gc_AN][q_AN];
3784 	Mq_AN = F_G2M[Gq_AN];
3785 
3786 	if (Mq_AN<=Matomnum){
3787 
3788 	  Qwan = WhatSpecies[Gq_AN];
3789 	  jan = Spe_Total_CNO[Qwan];
3790 	  kl = RMI1[Mc_AN][h_AN][q_AN];
3791 
3792           dHNL(0,Mc_AN,h_AN,q_AN,DS_NL,Hx0,Hy0,Hz0);
3793 
3794 	  if (SpinP_switch==0){
3795 
3796             if (q_AN==h_AN) pref = 2.0;
3797             else            pref = 4.0;
3798 
3799 	    for (i=0; i<ian; i++){
3800 	      for (j=0; j<jan; j++){
3801 
3802 		dEx += pref*CDM0[0][Mh_AN][kl][i][j]*Hx0[0][i][j].r;
3803 		dEy += pref*CDM0[0][Mh_AN][kl][i][j]*Hy0[0][i][j].r;
3804 		dEz += pref*CDM0[0][Mh_AN][kl][i][j]*Hz0[0][i][j].r;
3805 	      }
3806 	    }
3807 	  }
3808 
3809           /* collinear spin polarized or non-colliear without SO and LDA+U */
3810 
3811 	  else if (SpinP_switch==1 || (SpinP_switch==3 && SO_switch==0 && Hub_U_switch==0
3812 	        && Constraint_NCS_switch==0 && Zeeman_NCS_switch==0 && Zeeman_NCO_switch==0)){
3813 
3814 	    if (q_AN==h_AN) pref = 1.0;
3815 	    else            pref = 2.0;
3816 
3817 	    for (i=0; i<ian; i++){
3818 	      for (j=0; j<jan; j++){
3819 
3820 		dEx += pref*(  CDM0[0][Mh_AN][kl][i][j]*Hx0[0][i][j].r
3821 			     + CDM0[1][Mh_AN][kl][i][j]*Hx0[1][i][j].r);
3822 		dEy += pref*(  CDM0[0][Mh_AN][kl][i][j]*Hy0[0][i][j].r
3823 			     + CDM0[1][Mh_AN][kl][i][j]*Hy0[1][i][j].r);
3824 		dEz += pref*(  CDM0[0][Mh_AN][kl][i][j]*Hz0[0][i][j].r
3825 			     + CDM0[1][Mh_AN][kl][i][j]*Hz0[1][i][j].r);
3826 	      }
3827 	    }
3828 	  }
3829 
3830 	  /* spin non-collinear with spin-orbit coupling or with LDA+U */
3831 
3832 	  else if ( SpinP_switch==3 && (SO_switch==1 || (Hub_U_switch==1 && F_U_flag==1)
3833 		|| 1<=Constraint_NCS_switch || Zeeman_NCS_switch==1 || Zeeman_NCO_switch==1)){
3834 
3835             if (q_AN==h_AN){
3836 
3837 	      for (i=0; i<Spe_Total_CNO[Hwan]; i++){
3838 		for (j=0; j<Spe_Total_CNO[Qwan]; j++){
3839 
3840 		  dEx +=
3841                       CDM0[0][Mh_AN][kl][i][j]*Hx0[0][i][j].r
3842 		    - iDM0[0][Mh_AN][kl][i][j]*Hx0[0][i][j].i
3843 		    + CDM0[1][Mh_AN][kl][i][j]*Hx0[1][i][j].r
3844 		    - iDM0[1][Mh_AN][kl][i][j]*Hx0[1][i][j].i
3845 	        + 2.0*CDM0[2][Mh_AN][kl][i][j]*Hx0[2][i][j].r
3846 		- 2.0*CDM0[3][Mh_AN][kl][i][j]*Hx0[2][i][j].i;
3847 
3848 		  dEy +=
3849                       CDM0[0][Mh_AN][kl][i][j]*Hy0[0][i][j].r
3850 		    - iDM0[0][Mh_AN][kl][i][j]*Hy0[0][i][j].i
3851 		    + CDM0[1][Mh_AN][kl][i][j]*Hy0[1][i][j].r
3852 		    - iDM0[1][Mh_AN][kl][i][j]*Hy0[1][i][j].i
3853 		+ 2.0*CDM0[2][Mh_AN][kl][i][j]*Hy0[2][i][j].r
3854 	        - 2.0*CDM0[3][Mh_AN][kl][i][j]*Hy0[2][i][j].i;
3855 
3856 		  dEz +=
3857                       CDM0[0][Mh_AN][kl][i][j]*Hz0[0][i][j].r
3858 		    - iDM0[0][Mh_AN][kl][i][j]*Hz0[0][i][j].i
3859 		    + CDM0[1][Mh_AN][kl][i][j]*Hz0[1][i][j].r
3860 		    - iDM0[1][Mh_AN][kl][i][j]*Hz0[1][i][j].i
3861 	        + 2.0*CDM0[2][Mh_AN][kl][i][j]*Hz0[2][i][j].r
3862 		- 2.0*CDM0[3][Mh_AN][kl][i][j]*Hz0[2][i][j].i;
3863 
3864 		}
3865 	      }
3866             }
3867 
3868             else{
3869 
3870               for (i=0; i<Spe_Total_CNO[Hwan]; i++){  /* Hwan */
3871 		for (j=0; j<Spe_Total_CNO[Qwan]; j++){ /* Qwan  */
3872 
3873 		  dEx +=
3874                       CDM0[0][Mh_AN][kl ][i][j]*Hx0[0][i][j].r
3875 		    - iDM0[0][Mh_AN][kl ][i][j]*Hx0[0][i][j].i
3876 		    + CDM0[1][Mh_AN][kl ][i][j]*Hx0[1][i][j].r
3877 		    - iDM0[1][Mh_AN][kl ][i][j]*Hx0[1][i][j].i
3878 		+ 2.0*CDM0[2][Mh_AN][kl ][i][j]*Hx0[2][i][j].r
3879                 - 2.0*CDM0[3][Mh_AN][kl ][i][j]*Hx0[2][i][j].i;
3880 
3881 		  dEy +=
3882                       CDM0[0][Mh_AN][kl ][i][j]*Hy0[0][i][j].r
3883 		    - iDM0[0][Mh_AN][kl ][i][j]*Hy0[0][i][j].i
3884 		    + CDM0[1][Mh_AN][kl ][i][j]*Hy0[1][i][j].r
3885 		    - iDM0[1][Mh_AN][kl ][i][j]*Hy0[1][i][j].i
3886 		+ 2.0*CDM0[2][Mh_AN][kl ][i][j]*Hy0[2][i][j].r
3887                 - 2.0*CDM0[3][Mh_AN][kl ][i][j]*Hy0[2][i][j].i;
3888 
3889 		  dEz +=
3890                       CDM0[0][Mh_AN][kl ][i][j]*Hz0[0][i][j].r
3891 		    - iDM0[0][Mh_AN][kl ][i][j]*Hz0[0][i][j].i
3892 		    + CDM0[1][Mh_AN][kl ][i][j]*Hz0[1][i][j].r
3893 		    - iDM0[1][Mh_AN][kl ][i][j]*Hz0[1][i][j].i
3894 	        + 2.0*CDM0[2][Mh_AN][kl ][i][j]*Hz0[2][i][j].r
3895                 - 2.0*CDM0[3][Mh_AN][kl ][i][j]*Hz0[2][i][j].i;
3896 
3897 		} /* j */
3898 	      } /* i */
3899 
3900               dHNL(0,Mc_AN,q_AN,h_AN,DS_NL,Hx1,Hy1,Hz1);
3901        	      kl1 = RMI1[Mc_AN][q_AN][h_AN];
3902 
3903 	      for (i=0; i<Spe_Total_CNO[Qwan]; i++){ /* Qwan */
3904 		for (j=0; j<Spe_Total_CNO[Hwan]; j++){ /* Hwan */
3905 
3906 		  dEx +=
3907 		      CDM0[0][Mq_AN][kl1][i][j]*Hx1[0][i][j].r
3908 		    - iDM0[0][Mq_AN][kl1][i][j]*Hx1[0][i][j].i
3909 		    + CDM0[1][Mq_AN][kl1][i][j]*Hx1[1][i][j].r
3910 		    - iDM0[1][Mq_AN][kl1][i][j]*Hx1[1][i][j].i
3911 		+ 2.0*CDM0[2][Mq_AN][kl1][i][j]*Hx1[2][i][j].r
3912 		- 2.0*CDM0[3][Mq_AN][kl1][i][j]*Hx1[2][i][j].i;
3913 
3914 		  dEy +=
3915                       CDM0[0][Mq_AN][kl1][i][j]*Hy1[0][i][j].r
3916 		    - iDM0[0][Mq_AN][kl1][i][j]*Hy1[0][i][j].i
3917 		    + CDM0[1][Mq_AN][kl1][i][j]*Hy1[1][i][j].r
3918 		    - iDM0[1][Mq_AN][kl1][i][j]*Hy1[1][i][j].i
3919 	        + 2.0*CDM0[2][Mq_AN][kl1][i][j]*Hy1[2][i][j].r
3920 	        - 2.0*CDM0[3][Mq_AN][kl1][i][j]*Hy1[2][i][j].i;
3921 
3922 		  dEz +=
3923                       CDM0[0][Mq_AN][kl1][i][j]*Hz1[0][i][j].r
3924 		    - iDM0[0][Mq_AN][kl1][i][j]*Hz1[0][i][j].i
3925 		    + CDM0[1][Mq_AN][kl1][i][j]*Hz1[1][i][j].r
3926 		    - iDM0[1][Mq_AN][kl1][i][j]*Hz1[1][i][j].i
3927 	        + 2.0*CDM0[2][Mq_AN][kl1][i][j]*Hz1[2][i][j].r
3928 		- 2.0*CDM0[3][Mq_AN][kl1][i][j]*Hz1[2][i][j].i;
3929 
3930 		} /* j */
3931 	      } /* i */
3932 
3933 	    }
3934 	  }
3935 	}
3936       }
3937 
3938       /* force from #4B */
3939 
3940       if (F_NL_flag==1){
3941         Gxyz[Gc_AN][41] += dEx;
3942         Gxyz[Gc_AN][42] += dEy;
3943         Gxyz[Gc_AN][43] += dEz;
3944       }
3945 
3946       /* timing */
3947       dtime(&Etime_atom);
3948       time_per_atom[Gc_AN] += Etime_atom - Stime_atom;
3949 
3950     } /* Mc_AN */
3951 
3952     /* freeing of array */
3953 
3954     for (i=0; i<3; i++){
3955       for (j=0; j<List_YOUSO[7]; j++){
3956 	free(Hx0[i][j]);
3957       }
3958       free(Hx0[i]);
3959     }
3960     free(Hx0);
3961 
3962     for (i=0; i<3; i++){
3963       for (j=0; j<List_YOUSO[7]; j++){
3964 	free(Hy0[i][j]);
3965       }
3966       free(Hy0[i]);
3967     }
3968     free(Hy0);
3969 
3970     for (i=0; i<3; i++){
3971       for (j=0; j<List_YOUSO[7]; j++){
3972 	free(Hz0[i][j]);
3973       }
3974       free(Hz0[i]);
3975     }
3976     free(Hz0);
3977 
3978     for (i=0; i<3; i++){
3979       for (j=0; j<List_YOUSO[7]; j++){
3980 	free(Hx1[i][j]);
3981       }
3982       free(Hx1[i]);
3983     }
3984     free(Hx1);
3985 
3986     for (i=0; i<3; i++){
3987       for (j=0; j<List_YOUSO[7]; j++){
3988 	free(Hy1[i][j]);
3989       }
3990       free(Hy1[i]);
3991     }
3992     free(Hy1);
3993 
3994     for (i=0; i<3; i++){
3995       for (j=0; j<List_YOUSO[7]; j++){
3996 	free(Hz1[i][j]);
3997       }
3998       free(Hz1[i]);
3999     }
4000     free(Hz1);
4001 
4002   } /* #pragma omp parallel */
4003 
4004   dtime(&etime);
4005   if(myid==0 && measure_time){
4006     printf("Time for part2 of force_NL=%18.5f\n",etime-stime);fflush(stdout);
4007   }
4008 
4009   for (Mc_AN=1; Mc_AN<=Matomnum; Mc_AN++){
4010     Gc_AN = M2G[Mc_AN];
4011 
4012     if (2<=level_stdout){
4013       printf("<Force>  force(HNL2) myid=%2d  Mc_AN=%2d Gc_AN=%2d  %15.12f %15.12f %15.12f\n",
4014 	     myid,Mc_AN,Gc_AN,Gxyz[Gc_AN][41],Gxyz[Gc_AN][42],Gxyz[Gc_AN][43]);fflush(stdout);
4015     }
4016   }
4017 
4018   /*************************************************************
4019      THE SECOND CASE:
4020      In case of I=k with I!=i and I!=j
4021      d [ \sum_k <i|k>ek<k|j> ]/dRI
4022   *************************************************************/
4023 
4024   /************************************************************
4025      MPI communication of DS_NL whose basis part is not located
4026      on own site but projector part is located on own site.
4027   ************************************************************/
4028 
4029   MPI_Barrier(mpi_comm_level1);
4030   dtime(&stime);
4031 
4032   for (ID=0; ID<numprocs; ID++) Indicator[ID] = 0;
4033 
4034   for (Mc_AN=1; Mc_AN<=Max_Matomnum; Mc_AN++){
4035 
4036     if (Mc_AN<=Matomnum)  Gc_AN = M2G[Mc_AN];
4037     else                  Gc_AN = 0;
4038 
4039     for (ID=0; ID<numprocs; ID++){
4040 
4041       IDS = (myid + ID) % numprocs;
4042       IDR = (myid - ID + numprocs) % numprocs;
4043 
4044       i = Indicator[IDS];
4045       po = 0;
4046 
4047       Gh_AN = Pro_Snd_GAtom[IDS][i];
4048 
4049       if (Gh_AN!=0){
4050 
4051 	/* find the range with the same global atomic number */
4052 
4053 	do {
4054 
4055 	  i++;
4056 	  if (Gh_AN!=Pro_Snd_GAtom[IDS][i]) po = 1;
4057 	} while(po==0);
4058 
4059 	i--;
4060 	SA_num = i - Indicator[IDS] + 1;
4061 
4062 	/* find the data size to send the block data */
4063 
4064 	size1 = 0;
4065 	for (q=Indicator[IDS]; q<=(Indicator[IDS]+SA_num-1); q++){
4066 
4067 	  Sc_AN = Pro_Snd_MAtom[IDS][q];
4068 	  GSc_AN = F_M2G[Sc_AN];
4069 	  Sc_wan = WhatSpecies[GSc_AN];
4070 	  tno1 = Spe_Total_CNO[Sc_wan];
4071 
4072 	  Sh_AN = Pro_Snd_LAtom[IDS][q];
4073 	  GSh_AN = natn[GSc_AN][Sh_AN];
4074 	  Sh_wan = WhatSpecies[GSh_AN];
4075 	  tno2 = Spe_Total_VPS_Pro[Sh_wan];
4076           smul = (VPS_j_dependency[Sh_wan]+1);
4077 
4078 	  size1 += smul*4*tno1*tno2;
4079 	  size1 += 3;
4080 	}
4081 
4082       } /* if (Gh_AN!=0) */
4083 
4084       else {
4085 	SA_num = 0;
4086 	size1 = 0;
4087       }
4088 
4089       S_array[IDS][0] = Gh_AN;
4090       S_array[IDS][1] = SA_num;
4091       S_array[IDS][2] = size1;
4092 
4093       if (ID!=0){
4094 	MPI_Isend(&S_array[IDS][0], 3, MPI_INT, IDS, tag, mpi_comm_level1, &request);
4095 	MPI_Recv( &R_array[IDR][0], 3, MPI_INT, IDR, tag, mpi_comm_level1, &stat);
4096 	MPI_Wait(&request,&stat);
4097       }
4098       else {
4099 	R_array[myid][0] = S_array[myid][0];
4100 	R_array[myid][1] = S_array[myid][1];
4101 	R_array[myid][2] = S_array[myid][2];
4102       }
4103 
4104       if (R_array[IDR][0]==Gc_AN) R_comm_flag = 1;
4105       else                        R_comm_flag = 0;
4106 
4107       if (ID!=0){
4108 	MPI_Isend(&R_comm_flag, 1, MPI_INT, IDR, tag, mpi_comm_level1, &request);
4109 	MPI_Recv( &S_comm_flag, 1, MPI_INT, IDS, tag, mpi_comm_level1, &stat);
4110 	MPI_Wait(&request,&stat);
4111       }
4112       else{
4113 	S_comm_flag = R_comm_flag;
4114       }
4115 
4116       /*****************************************
4117                     send the data
4118       *****************************************/
4119 
4120       /* if (S_comm_flag==1) then, send data to IDS */
4121 
4122       if (S_comm_flag==1){
4123 
4124 	/* allocate tmp_array */
4125 
4126 	tmp_array = (double*)malloc(sizeof(double)*size1);
4127 
4128 	/* multidimentional array to vector array */
4129 
4130 	num = 0;
4131 
4132 	for (q=Indicator[IDS]; q<=(Indicator[IDS]+SA_num-1); q++){
4133 
4134 	  Sc_AN = Pro_Snd_MAtom[IDS][q];
4135 	  GSc_AN = F_M2G[Sc_AN];
4136 	  Sc_wan = WhatSpecies[GSc_AN];
4137 	  tno1 = Spe_Total_CNO[Sc_wan];
4138 
4139 	  Sh_AN = Pro_Snd_LAtom[IDS][q];
4140 	  GSh_AN = natn[GSc_AN][Sh_AN];
4141 	  Sh_wan = WhatSpecies[GSh_AN];
4142 	  tno2 = Spe_Total_VPS_Pro[Sh_wan];
4143 	  Sh_AN2 = Pro_Snd_LAtom2[IDS][q];
4144 
4145 	  tmp_array[num] = (double)Sc_AN;  num++;
4146 	  tmp_array[num] = (double)Sh_AN;  num++;
4147 	  tmp_array[num] = (double)Sh_AN2; num++;
4148 
4149 	  for (so=0; so<=VPS_j_dependency[Sh_wan]; so++){
4150 	    for (kk=0; kk<=3; kk++){
4151 	      for (i=0; i<tno1; i++){
4152 		for (j=0; j<tno2; j++){
4153 		  tmp_array[num] = DS_NL[so][kk][Sc_AN][Sh_AN][i][j];
4154 		  num++;
4155 		}
4156 	      }
4157 	    }
4158 	  }
4159 	}
4160 
4161 	if (ID!=0){
4162 	  MPI_Isend(&tmp_array[0], size1, MPI_DOUBLE, IDS, tag, mpi_comm_level1, &request);
4163 	}
4164 
4165 	/* update Indicator[IDS] */
4166 
4167 	Indicator[IDS] += SA_num;
4168 
4169       } /* if (S_comm_flag==1) */
4170 
4171       /*****************************************
4172                    receive the data
4173       *****************************************/
4174 
4175       /* if (R_comm_flag==1) then, receive the data from IDR */
4176 
4177       if (R_comm_flag==1){
4178 
4179 	size2 = R_array[IDR][2];
4180 	tmp_array2 = (double*)malloc(sizeof(double)*size2);
4181 
4182 	if (ID!=0){
4183 	  MPI_Recv(&tmp_array2[0], size2, MPI_DOUBLE, IDR, tag, mpi_comm_level1, &stat);
4184 	}
4185 	else{
4186 	  for (i=0; i<size2; i++) tmp_array2[i] = tmp_array[i];
4187 	}
4188 
4189 	/* store */
4190 
4191 	num = 0;
4192 
4193 	for (n=0; n<R_array[IDR][1]; n++){
4194 
4195 	  Sc_AN  = (int)tmp_array2[num]; num++;
4196 	  Sh_AN  = (int)tmp_array2[num]; num++;
4197 	  Sh_AN2 = (int)tmp_array2[num]; num++;
4198 
4199 	  GSc_AN = natn[Gc_AN][Sh_AN2];
4200 	  Sc_wan = WhatSpecies[GSc_AN];
4201 	  tno1 = Spe_Total_CNO[Sc_wan];
4202 
4203 	  GSh_AN = natn[GSc_AN][Sh_AN];
4204 	  Sh_wan = WhatSpecies[GSh_AN];
4205 	  tno2 = Spe_Total_VPS_Pro[Sh_wan];
4206 
4207 	  for (so=0; so<=VPS_j_dependency[Sh_wan]; so++){
4208 	    for (kk=0; kk<=3; kk++){
4209 	      for (i=0; i<tno1; i++){
4210 		for (j=0; j<tno2; j++){
4211 		  DS_NL[so][kk][Matomnum+1][Sh_AN2][i][j] = tmp_array2[num];
4212 		  num++;
4213 		}
4214 	      }
4215 	    }
4216 	  }
4217 	}
4218 
4219 	/* free tmp_array2 */
4220 	free(tmp_array2);
4221 
4222       } /* if (R_comm_flag==1) */
4223 
4224       if (S_comm_flag==1){
4225 	if (ID!=0) MPI_Wait(&request,&stat);
4226 	free(tmp_array);  /* freeing of array */
4227       }
4228 
4229     } /* ID */
4230 
4231     if (Mc_AN<=Matomnum){
4232 
4233       /* get Nthrds0 */
4234 #pragma omp parallel shared(Nthrds0)
4235       {
4236 	Nthrds0 = omp_get_num_threads();
4237       }
4238 
4239       /* allocation of arrays */
4240       dEx_threads = (double*)malloc(sizeof(double)*Nthrds0);
4241       dEy_threads = (double*)malloc(sizeof(double)*Nthrds0);
4242       dEz_threads = (double*)malloc(sizeof(double)*Nthrds0);
4243 
4244       for (Nloop=0; Nloop<Nthrds0; Nloop++){
4245 	dEx_threads[Nloop] = 0.0;
4246 	dEy_threads[Nloop] = 0.0;
4247 	dEz_threads[Nloop] = 0.0;
4248       }
4249 
4250       /* one-dimensionalize the h_AN and q_AN loops */
4251 
4252       OneD2h_AN = (int*)malloc(sizeof(int)*(FNAN[Gc_AN]+1)*(FNAN[Gc_AN]+2));
4253       OneD2q_AN = (int*)malloc(sizeof(int)*(FNAN[Gc_AN]+1)*(FNAN[Gc_AN]+2));
4254 
4255       ODNloop = 0;
4256       for (h_AN=0; h_AN<=FNAN[Gc_AN]; h_AN++){
4257 
4258 	if ( SpinP_switch==3 && (SO_switch==1 || (Hub_U_switch==1 && F_U_flag==1)
4259 	 || 1<=Constraint_NCS_switch || Zeeman_NCS_switch==1 || Zeeman_NCO_switch==1)
4260          || (Solver==5 || Solver==8) )
4261 	  start_q_AN = 0;
4262 	else
4263 	  start_q_AN = h_AN;
4264 
4265 	for (q_AN=start_q_AN; q_AN<=FNAN[Gc_AN]; q_AN++){
4266 
4267 	  kl = RMI1[Mc_AN][h_AN][q_AN];
4268 
4269 	  if (0<=kl){
4270 	    OneD2h_AN[ODNloop] = h_AN;
4271 	    OneD2q_AN[ODNloop] = q_AN;
4272 	    ODNloop++;
4273 	  }
4274 	}
4275       }
4276 
4277 #pragma omp parallel shared(ODNloop,OneD2h_AN,OneD2q_AN,Mc_AN,Gc_AN,dEx_threads,dEy_threads,dEz_threads,CDM0,SpinP_switch,SO_switch,Hub_U_switch,Constraint_NCS_switch,Zeeman_NCS_switch,Zeeman_NCO_switch,DS_NL,RMI1,Spe_Total_CNO,WhatSpecies,F_G2M,natn,FNAN,List_YOUSO,Solver,F_NL_flag,F_U_flag) private(OMPID,Nthrds,Nprocs,Hx,Hy,Hz,i,j,h_AN,Gh_AN,Mh_AN,Hwan,ian,q_AN,Gq_AN,Mq_AN,Qwan,jan,kl,km,Nloop,pref)
4278       {
4279 
4280 	/* allocation of arrays */
4281 
4282 	Hx = (dcomplex***)malloc(sizeof(dcomplex**)*3);
4283 	for (i=0; i<3; i++){
4284 	  Hx[i] = (dcomplex**)malloc(sizeof(dcomplex*)*List_YOUSO[7]);
4285 	  for (j=0; j<List_YOUSO[7]; j++){
4286 	    Hx[i][j] = (dcomplex*)malloc(sizeof(dcomplex)*List_YOUSO[7]);
4287 	  }
4288 	}
4289 
4290 	Hy = (dcomplex***)malloc(sizeof(dcomplex**)*3);
4291 	for (i=0; i<3; i++){
4292 	  Hy[i] = (dcomplex**)malloc(sizeof(dcomplex*)*List_YOUSO[7]);
4293 	  for (j=0; j<List_YOUSO[7]; j++){
4294 	    Hy[i][j] = (dcomplex*)malloc(sizeof(dcomplex)*List_YOUSO[7]);
4295 	  }
4296 	}
4297 
4298 	Hz = (dcomplex***)malloc(sizeof(dcomplex**)*3);
4299 	for (i=0; i<3; i++){
4300 	  Hz[i] = (dcomplex**)malloc(sizeof(dcomplex*)*List_YOUSO[7]);
4301 	  for (j=0; j<List_YOUSO[7]; j++){
4302 	    Hz[i][j] = (dcomplex*)malloc(sizeof(dcomplex)*List_YOUSO[7]);
4303 	  }
4304 	}
4305 
4306 	/* get info. on OpenMP */
4307 
4308 	OMPID = omp_get_thread_num();
4309 	Nthrds = omp_get_num_threads();
4310 	Nprocs = omp_get_num_procs();
4311 
4312 	for (Nloop=OMPID*ODNloop/Nthrds; Nloop<(OMPID+1)*ODNloop/Nthrds; Nloop++){
4313 
4314 	  /* get h_AN and q_AN */
4315 
4316 	  h_AN = OneD2h_AN[Nloop];
4317 	  q_AN = OneD2q_AN[Nloop];
4318 
4319 	  /* set informations on h_AN */
4320 
4321 	  Gh_AN = natn[Gc_AN][h_AN];
4322 	  Mh_AN = F_G2M[Gh_AN];
4323 	  Hwan = WhatSpecies[Gh_AN];
4324 	  ian = Spe_Total_CNO[Hwan];
4325 
4326 	  /* set informations on q_AN */
4327 
4328 	  Gq_AN = natn[Gc_AN][q_AN];
4329 	  Mq_AN = F_G2M[Gq_AN];
4330 	  Qwan = WhatSpecies[Gq_AN];
4331 	  jan = Spe_Total_CNO[Qwan];
4332 	  kl = RMI1[Mc_AN][h_AN][q_AN];
4333           km = RMI1[Mc_AN][q_AN][h_AN];
4334 
4335 	  if (0<=kl){
4336 
4337             dHNL(1,Mc_AN,h_AN,q_AN,DS_NL,Hx,Hy,Hz);
4338 
4339 	    /* contribution of force = Trace(CDM0*dH) */
4340 
4341 	    /* spin non-polarization */
4342 
4343 	    if (SpinP_switch==0){
4344 
4345               if (Solver==5 || Solver==8){
4346 	        pref = 2.0;
4347               }
4348               else {
4349 	        if (q_AN==h_AN) pref = 2.0;
4350   	        else            pref = 4.0;
4351               }
4352 
4353 	      for (i=0; i<ian; i++){
4354 		for (j=0; j<jan; j++){
4355 		  dEx_threads[OMPID] += pref*CDM0[0][Mh_AN][kl][i][j]*Hx[0][i][j].r;
4356 		  dEy_threads[OMPID] += pref*CDM0[0][Mh_AN][kl][i][j]*Hy[0][i][j].r;
4357 		  dEz_threads[OMPID] += pref*CDM0[0][Mh_AN][kl][i][j]*Hz[0][i][j].r;
4358 		}
4359 	      }
4360 
4361 	    }
4362 
4363             /* collinear spin polarized or non-colliear without SO and LDA+U */
4364 
4365 	    else if (SpinP_switch==1 || (SpinP_switch==3 && SO_switch==0 && Hub_U_switch==0
4366 	          && Constraint_NCS_switch==0 && Zeeman_NCS_switch==0 && Zeeman_NCO_switch==0)){
4367 
4368               if (Solver==5 || Solver==8){
4369 	        pref = 1.0;
4370               }
4371               else {
4372 	        if (q_AN==h_AN) pref = 1.0;
4373   	        else            pref = 2.0;
4374               }
4375 
4376 	      for (i=0; i<ian; i++){
4377 		for (j=0; j<jan; j++){
4378 
4379 		  dEx_threads[OMPID] += pref*(  CDM0[0][Mh_AN][kl][i][j]*Hx[0][i][j].r
4380 					      + CDM0[1][Mh_AN][kl][i][j]*Hx[1][i][j].r);
4381 		  dEy_threads[OMPID] += pref*(  CDM0[0][Mh_AN][kl][i][j]*Hy[0][i][j].r
4382 					      + CDM0[1][Mh_AN][kl][i][j]*Hy[1][i][j].r);
4383 		  dEz_threads[OMPID] += pref*(  CDM0[0][Mh_AN][kl][i][j]*Hz[0][i][j].r
4384 					      + CDM0[1][Mh_AN][kl][i][j]*Hz[1][i][j].r);
4385 
4386 		}
4387 	      }
4388 	    }
4389 
4390 	    /* spin non-collinear with spin-orbit coupling or with LDA+U */
4391 
4392 	    else if ( SpinP_switch==3 && (SO_switch==1 || (Hub_U_switch==1 && F_U_flag==1)
4393  		   || 1<=Constraint_NCS_switch || Zeeman_NCS_switch==1 || Zeeman_NCO_switch==1)){
4394 
4395 	      pref = 1.0;
4396 
4397 	      for (i=0; i<ian; i++){
4398 	        for (j=0; j<jan; j++){
4399 
4400 		  dEx_threads[OMPID] +=
4401 	        pref*(CDM0[0][Mh_AN][kl][i][j]*Hx[0][i][j].r
4402 		    - iDM0[0][Mh_AN][kl][i][j]*Hx[0][i][j].i
4403 		    + CDM0[1][Mh_AN][kl][i][j]*Hx[1][i][j].r
4404 		    - iDM0[1][Mh_AN][kl][i][j]*Hx[1][i][j].i
4405  	        + 2.0*CDM0[2][Mh_AN][kl][i][j]*Hx[2][i][j].r
4406 	        - 2.0*CDM0[3][Mh_AN][kl][i][j]*Hx[2][i][j].i);
4407 
4408 		  dEy_threads[OMPID] +=
4409 		pref*(CDM0[0][Mh_AN][kl][i][j]*Hy[0][i][j].r
4410 	            - iDM0[0][Mh_AN][kl][i][j]*Hy[0][i][j].i
4411 		    + CDM0[1][Mh_AN][kl][i][j]*Hy[1][i][j].r
4412 		    - iDM0[1][Mh_AN][kl][i][j]*Hy[1][i][j].i
4413 	        + 2.0*CDM0[2][Mh_AN][kl][i][j]*Hy[2][i][j].r
4414 	        - 2.0*CDM0[3][Mh_AN][kl][i][j]*Hy[2][i][j].i);
4415 
4416 		  dEz_threads[OMPID] +=
4417 		pref*(CDM0[0][Mh_AN][kl][i][j]*Hz[0][i][j].r
4418 		    - iDM0[0][Mh_AN][kl][i][j]*Hz[0][i][j].i
4419 		    + CDM0[1][Mh_AN][kl][i][j]*Hz[1][i][j].r
4420 		    - iDM0[1][Mh_AN][kl][i][j]*Hz[1][i][j].i
4421 	        + 2.0*CDM0[2][Mh_AN][kl][i][j]*Hz[2][i][j].r
4422 	        - 2.0*CDM0[3][Mh_AN][kl][i][j]*Hz[2][i][j].i);
4423 
4424 		}
4425 	      }
4426 	    }
4427 
4428 	  } /* if (0<=kl) */
4429 	} /* Nloop */
4430 
4431         /* freeing of arrays */
4432 
4433 	for (i=0; i<3; i++){
4434 	  for (j=0; j<List_YOUSO[7]; j++){
4435 	    free(Hx[i][j]);
4436 	  }
4437 	  free(Hx[i]);
4438 	}
4439 	free(Hx);
4440 
4441 	for (i=0; i<3; i++){
4442 	  for (j=0; j<List_YOUSO[7]; j++){
4443 	    free(Hy[i][j]);
4444 	  }
4445 	  free(Hy[i]);
4446 	}
4447 	free(Hy);
4448 
4449 	for (i=0; i<3; i++){
4450 	  for (j=0; j<List_YOUSO[7]; j++){
4451 	    free(Hz[i][j]);
4452 	  }
4453 	  free(Hz[i]);
4454 	}
4455 	free(Hz);
4456 
4457       } /* #pragma omp parallel */
4458 
4459       /* sum of dEx_threads */
4460 
4461       dEx = 0.0;
4462       dEy = 0.0;
4463       dEz = 0.0;
4464 
4465       if (F_NL_flag==1){
4466         for (Nloop=0; Nloop<Nthrds0; Nloop++){
4467 	  dEx += dEx_threads[Nloop];
4468 	  dEy += dEy_threads[Nloop];
4469 	  dEz += dEz_threads[Nloop];
4470         }
4471 
4472         /* force from #4B */
4473 
4474         Gxyz[Gc_AN][41] += dEx;
4475         Gxyz[Gc_AN][42] += dEy;
4476         Gxyz[Gc_AN][43] += dEz;
4477       }
4478 
4479       if (2<=level_stdout){
4480         printf("<Force>  force(HNL3) myid=%2d  Mc_AN=%2d Gc_AN=%2d  %15.12f %15.12f %15.12f\n",
4481 	       myid,Mc_AN,Gc_AN,dEx,dEy,dEz);fflush(stdout);
4482       }
4483 
4484       /* freeing of array */
4485       free(OneD2q_AN);
4486       free(OneD2h_AN);
4487       free(dEx_threads);
4488       free(dEy_threads);
4489       free(dEz_threads);
4490 
4491     } /* if (Mc_AN<=Matomnum) */
4492 
4493   } /* Mc_AN */
4494 
4495   dtime(&etime);
4496   if(myid==0 && measure_time){
4497     printf("Time for part3 of force_NL=%18.5f\n",etime-stime);fflush(stdout);
4498   }
4499 
4500   /********************************************************
4501     adding Gxyz[Gc_AN][41,42,43] to Gxyz[Gc_AN][17,18,19]
4502   ********************************************************/
4503 
4504   for (Mc_AN=1; Mc_AN<=Matomnum; Mc_AN++){
4505     Gc_AN = M2G[Mc_AN];
4506 
4507     if (2<=level_stdout){
4508       printf("<Force>  force(HNL) myid=%2d  Mc_AN=%2d Gc_AN=%2d  %15.12f %15.12f %15.12f\n",
4509 	     myid,Mc_AN,Gc_AN,Gxyz[Gc_AN][41],Gxyz[Gc_AN][42],Gxyz[Gc_AN][43]);fflush(stdout);
4510     }
4511 
4512     Gxyz[Gc_AN][17] += Gxyz[Gc_AN][41];
4513     Gxyz[Gc_AN][18] += Gxyz[Gc_AN][42];
4514     Gxyz[Gc_AN][19] += Gxyz[Gc_AN][43];
4515   }
4516 
4517   /***********************************
4518             freeing of arrays
4519   ************************************/
4520 
4521   free(Indicator);
4522 
4523   for (ID=0; ID<numprocs; ID++){
4524     free(S_array[ID]);
4525   }
4526   free(S_array);
4527 
4528   for (ID=0; ID<numprocs; ID++){
4529     free(R_array[ID]);
4530   }
4531   free(R_array);
4532 
4533   free(Snd_DS_NL_Size);
4534   free(Rcv_DS_NL_Size);
4535 }
4536 
4537 
4538 
4539 
4540 
Force4B(double ***** CDM0)4541 void Force4B(double *****CDM0)
4542 {
4543   /****************************************************
4544                       #4 of Force
4545 
4546             by the projector expansion of VNA
4547   ****************************************************/
4548 
4549   int Mc_AN,Gc_AN,Cwan,i,j,h_AN,q_AN,start_q_AN,Mq_AN;
4550   int jan,kl,Qwan,Gq_AN,Gh_AN,Mh_AN,Hwan,ian;
4551   int l1,l2,l3,l,LL,Mul1,Num_RVNA,tno0,ncp;
4552   int tno1,tno2,size1,size2,n,kk,num,po,po1,po2;
4553   int numprocs,myid,tag=999,ID,IDS,IDR;
4554   int **S_array,**R_array;
4555   int S_comm_flag,R_comm_flag;
4556   int SA_num,q,Sc_AN,GSc_AN;
4557   int Sc_wan,Sh_AN,GSh_AN,Sh_wan;
4558   int Sh_AN2,fan,jg,j0,jg0,Mj_AN0;
4559   int Original_Mc_AN;
4560 
4561   double rcutA,rcutB,rcut;
4562   double dEx,dEy,dEz,ene,pref;
4563   double Stime_atom, Etime_atom;
4564   double **HVNAx,**HVNAy,**HVNAz;
4565   int *VNA_List;
4566   int *VNA_List2;
4567   int *Snd_DS_VNA_Size,*Rcv_DS_VNA_Size;
4568   int *Indicator;
4569   Type_DS_VNA *tmp_array;
4570   Type_DS_VNA *tmp_array2;
4571 
4572   /* for OpenMP */
4573   int OMPID,Nthrds,Nthrds0,Nprocs,Nloop,ODNloop;
4574   int *OneD2h_AN,*OneD2q_AN;
4575   double *dEx_threads;
4576   double *dEy_threads;
4577   double *dEz_threads;
4578   double stime,etime;
4579   double stime1,etime1;
4580 
4581   MPI_Status stat;
4582   MPI_Request request;
4583 
4584   /* MPI */
4585 
4586   MPI_Comm_size(mpi_comm_level1,&numprocs);
4587   MPI_Comm_rank(mpi_comm_level1,&myid);
4588 
4589   dtime(&stime);
4590 
4591   /****************************
4592        allocation of arrays
4593   *****************************/
4594 
4595   Indicator = (int*)malloc(sizeof(int)*numprocs);
4596 
4597   S_array = (int**)malloc(sizeof(int*)*numprocs);
4598   for (ID=0; ID<numprocs; ID++){
4599     S_array[ID] = (int*)malloc(sizeof(int)*3);
4600   }
4601 
4602   R_array = (int**)malloc(sizeof(int*)*numprocs);
4603   for (ID=0; ID<numprocs; ID++){
4604     R_array[ID] = (int*)malloc(sizeof(int)*3);
4605   }
4606 
4607   Snd_DS_VNA_Size = (int*)malloc(sizeof(int)*numprocs);
4608   Rcv_DS_VNA_Size = (int*)malloc(sizeof(int)*numprocs);
4609 
4610   VNA_List  = (int*)malloc(sizeof(int)*(List_YOUSO[34]*(List_YOUSO[35] + 1)+2) );
4611   VNA_List2 = (int*)malloc(sizeof(int)*(List_YOUSO[34]*(List_YOUSO[35] + 1)+2) );
4612 
4613   /* initialize the temporal array storing the force contribution */
4614 
4615   for (Mc_AN=1; Mc_AN<=Matomnum; Mc_AN++){
4616     Gc_AN = F_M2G[Mc_AN];
4617     Gxyz[Gc_AN][41] = 0.0;
4618     Gxyz[Gc_AN][42] = 0.0;
4619     Gxyz[Gc_AN][43] = 0.0;
4620   }
4621 
4622   /*************************************************************
4623                  contraction of DS_VNA and HVNA2
4624   *************************************************************/
4625 
4626   if (Cnt_switch==1 && ProExpn_VNA==1){
4627 
4628     Cont_Matrix2(DS_VNA[0],CntDS_VNA[0]);
4629     Cont_Matrix2(DS_VNA[1],CntDS_VNA[1]);
4630     Cont_Matrix2(DS_VNA[2],CntDS_VNA[2]);
4631     Cont_Matrix2(DS_VNA[3],CntDS_VNA[3]);
4632 
4633     Cont_Matrix3(HVNA2[1],CntHVNA2[1]);
4634     Cont_Matrix3(HVNA2[2],CntHVNA2[2]);
4635     Cont_Matrix3(HVNA2[3],CntHVNA2[3]);
4636 
4637     Cont_Matrix4(HVNA3[1],CntHVNA3[1]);
4638     Cont_Matrix4(HVNA3[2],CntHVNA3[2]);
4639     Cont_Matrix4(HVNA3[3],CntHVNA3[3]);
4640   }
4641 
4642   /*************************************************************
4643                   make VNA_List and VNA_List2
4644   *************************************************************/
4645 
4646   l = 0;
4647   for (i=0; i<=List_YOUSO[35]; i++){     /* max L */
4648     for (j=0; j<List_YOUSO[34]; j++){    /* # of radial projectors */
4649       VNA_List[l]  = i;
4650       VNA_List2[l] = j;
4651       l++;
4652     }
4653   }
4654 
4655   Num_RVNA = List_YOUSO[34]*(List_YOUSO[35] + 1);
4656 
4657   dtime(&etime);
4658   if(myid==0 && measure_time){
4659     printf("Time for part1 of force#4=%18.5f\n",etime-stime);fflush(stdout);
4660   }
4661 
4662   /*****************************************************
4663    if orbital optimization
4664    copy CntDS_VNA[0] into DS_VNA[0]
4665   *****************************************************/
4666 
4667   if (Cnt_switch==1){
4668 
4669     for (Mc_AN=1; Mc_AN<=Matomnum; Mc_AN++){
4670 
4671       Gc_AN = F_M2G[Mc_AN];
4672       Cwan = WhatSpecies[Gc_AN];
4673       tno0 = Spe_Total_CNO[Cwan];
4674 
4675       for (h_AN=0; h_AN<=FNAN[Gc_AN]; h_AN++){
4676 
4677 	Gh_AN = natn[Gc_AN][h_AN];
4678 	Hwan = WhatSpecies[Gh_AN];
4679 
4680 	for (i=0; i<tno0; i++){
4681 
4682 	  l = 0;
4683 	  for (l1=0; l1<Num_RVNA; l1++){
4684 
4685 	    l2 = 2*VNA_List[l1];
4686 	    for (l3=0; l3<=l2; l3++){
4687 	      DS_VNA[0][Mc_AN][h_AN][i][l] = CntDS_VNA[0][Mc_AN][h_AN][i][l];
4688 	      l++;
4689 	    }
4690 	  }
4691 	}
4692       }
4693     }
4694   }
4695 
4696   /*****************************************************
4697      (1) pre-multiplying DS_VNA[kk] with ene
4698      (2) copy DS_VNA[kk] or CntDS_VNA[kk] into DS_VNA[kk]
4699   *****************************************************/
4700 
4701   dtime(&stime);
4702 
4703 #pragma omp parallel shared(CntDS_VNA,DS_VNA,Cnt_switch,VNA_proj_ene,VNA_List2,VNA_List,Num_RVNA,natn,FNAN,Spe_Total_CNO,WhatSpecies,F_M2G,Matomnum) private(kk,OMPID,Nthrds,Nprocs,Gc_AN,Cwan,tno0,Mc_AN,h_AN,Gh_AN,Hwan,i,l,l1,LL,Mul1,ene,l2,l3)
4704   {
4705 
4706     /* get info. on OpenMP */
4707 
4708     OMPID = omp_get_thread_num();
4709     Nthrds = omp_get_num_threads();
4710     Nprocs = omp_get_num_procs();
4711 
4712     for (kk=1; kk<=3; kk++){
4713       for (Mc_AN=(OMPID*Matomnum/Nthrds+1); Mc_AN<((OMPID+1)*Matomnum/Nthrds+1); Mc_AN++){
4714 
4715 	Gc_AN = F_M2G[Mc_AN];
4716 	Cwan = WhatSpecies[Gc_AN];
4717 	tno0 = Spe_Total_CNO[Cwan];
4718 
4719 	for (h_AN=0; h_AN<=FNAN[Gc_AN]; h_AN++){
4720 
4721 	  Gh_AN = natn[Gc_AN][h_AN];
4722 	  Hwan = WhatSpecies[Gh_AN];
4723 
4724 	  for (i=0; i<tno0; i++){
4725 
4726 	    l = 0;
4727 	    for (l1=0; l1<Num_RVNA; l1++){
4728 
4729 	      LL   = VNA_List[l1];
4730 	      Mul1 = VNA_List2[l1];
4731 
4732 	      ene = VNA_proj_ene[Hwan][LL][Mul1];
4733 	      l2 = 2*VNA_List[l1];
4734 
4735 	      if (Cnt_switch==0){
4736 		for (l3=0; l3<=l2; l3++){
4737 		  DS_VNA[kk][Mc_AN][h_AN][i][l] = ene*DS_VNA[kk][Mc_AN][h_AN][i][l];
4738 		  l++;
4739 		}
4740 	      }
4741 
4742 	      else{
4743 		for (l3=0; l3<=l2; l3++){
4744 		  DS_VNA[kk][Mc_AN][h_AN][i][l] = ene*CntDS_VNA[kk][Mc_AN][h_AN][i][l];
4745 		  l++;
4746 		}
4747 	      }
4748 	    }
4749 	  }
4750 
4751 	} /* h_AN */
4752       } /* Mc_AN */
4753     } /* kk */
4754 
4755   } /* #pragma omp parallel */
4756 
4757   dtime(&etime);
4758   if(myid==0 && measure_time){
4759     printf("Time for part2 of force#4=%18.5f\n",etime-stime);fflush(stdout);
4760   }
4761 
4762   /*****************************************}**********************
4763       THE FIRST CASE:
4764       In case of I=i or I=j
4765       for d [ \sum_k <i|k>ek<k|j> ]/dRI
4766   ****************************************************************/
4767 
4768   /*******************************************************
4769    *******************************************************
4770       multiplying overlap integrals WITH COMMUNICATION
4771    *******************************************************
4772    *******************************************************/
4773 
4774   MPI_Barrier(mpi_comm_level1);
4775   dtime(&stime);
4776 
4777   for (ID=0; ID<numprocs; ID++){
4778     F_Snd_Num_WK[ID] = 0;
4779     F_Rcv_Num_WK[ID] = 0;
4780   }
4781 
4782   do {
4783 
4784     /***********************************
4785              set the size of data
4786     ************************************/
4787 
4788     for (ID=0; ID<numprocs; ID++){
4789 
4790       IDS = (myid + ID) % numprocs;
4791       IDR = (myid - ID + numprocs) % numprocs;
4792 
4793       /* find the data size to send the block data */
4794 
4795       if ( 0<(F_Snd_Num[IDS]-F_Snd_Num_WK[IDS]) ){
4796 
4797 	size1 = 0;
4798 	n = F_Snd_Num_WK[IDS];
4799 
4800 	Mc_AN = Snd_MAN[IDS][n];
4801 	Gc_AN = Snd_GAN[IDS][n];
4802 	Cwan = WhatSpecies[Gc_AN];
4803 	tno1 = Spe_Total_NO[Cwan];
4804 	for (h_AN=0; h_AN<=FNAN[Gc_AN]; h_AN++){
4805 	  Gh_AN = natn[Gc_AN][h_AN];
4806 	  Hwan = WhatSpecies[Gh_AN];
4807 	  tno2 = (List_YOUSO[35]+1)*(List_YOUSO[35]+1)*List_YOUSO[34];
4808 	  size1 += tno1*tno2;
4809 	}
4810 
4811 	Snd_DS_VNA_Size[IDS] = size1;
4812 	MPI_Isend(&size1, 1, MPI_INT, IDS, tag, mpi_comm_level1, &request);
4813       }
4814       else{
4815 	Snd_DS_VNA_Size[IDS] = 0;
4816       }
4817 
4818       /* receiving of the size of the data */
4819 
4820       if ( 0<(F_Rcv_Num[IDR]-F_Rcv_Num_WK[IDR]) ){
4821 	MPI_Recv(&size2, 1, MPI_INT, IDR, tag, mpi_comm_level1, &stat);
4822 	Rcv_DS_VNA_Size[IDR] = size2;
4823       }
4824       else{
4825 	Rcv_DS_VNA_Size[IDR] = 0;
4826       }
4827 
4828       if ( 0<(F_Snd_Num[IDS]-F_Snd_Num_WK[IDS]) )  MPI_Wait(&request,&stat);
4829 
4830     } /* ID */
4831 
4832       /***********************************
4833                 data transfer
4834       ************************************/
4835 
4836     for (ID=0; ID<numprocs; ID++){
4837 
4838       IDS = (myid + ID) % numprocs;
4839       IDR = (myid - ID + numprocs) % numprocs;
4840 
4841       /******************************
4842              sending of the data
4843       ******************************/
4844 
4845       if ( 0<(F_Snd_Num[IDS]-F_Snd_Num_WK[IDS]) ){
4846 
4847 	size1 = Snd_DS_VNA_Size[IDS];
4848 
4849 	/* allocation of the array */
4850 
4851 	tmp_array = (Type_DS_VNA*)malloc(sizeof(Type_DS_VNA)*size1);
4852 
4853 	/* multidimentional array to the vector array */
4854 
4855 	num = 0;
4856 	n = F_Snd_Num_WK[IDS];
4857 
4858 	Mc_AN = Snd_MAN[IDS][n];
4859 	Gc_AN = Snd_GAN[IDS][n];
4860 	Cwan = WhatSpecies[Gc_AN];
4861 	tno1 = Spe_Total_NO[Cwan];
4862 	for (h_AN=0; h_AN<=FNAN[Gc_AN]; h_AN++){
4863 	  Gh_AN = natn[Gc_AN][h_AN];
4864 	  Hwan = WhatSpecies[Gh_AN];
4865 	  tno2 = (List_YOUSO[35]+1)*(List_YOUSO[35]+1)*List_YOUSO[34];
4866 
4867 	  for (i=0; i<tno1; i++){
4868 	    for (j=0; j<tno2; j++){
4869 	      tmp_array[num] = DS_VNA[0][Mc_AN][h_AN][i][j];
4870 	      num++;
4871 	    }
4872 	  }
4873 	}
4874 
4875 	MPI_Isend(&tmp_array[0], size1, MPI_Type_DS_VNA, IDS, tag, mpi_comm_level1, &request);
4876       }
4877 
4878       /******************************
4879           receiving of the block data
4880       ******************************/
4881 
4882       if ( 0<(F_Rcv_Num[IDR]-F_Rcv_Num_WK[IDR]) ){
4883 
4884 	size2 = Rcv_DS_VNA_Size[IDR];
4885 	tmp_array2 = (Type_DS_VNA*)malloc(sizeof(Type_DS_VNA)*size2);
4886 	MPI_Recv(&tmp_array2[0], size2, MPI_Type_DS_VNA, IDR, tag, mpi_comm_level1, &stat);
4887 
4888 	/* store */
4889 
4890 	num = 0;
4891 	n = F_Rcv_Num_WK[IDR];
4892 	Original_Mc_AN = F_TopMAN[IDR] + n;
4893 	Gc_AN = Rcv_GAN[IDR][n];
4894 	Cwan = WhatSpecies[Gc_AN];
4895 	tno1 = Spe_Total_NO[Cwan];
4896 
4897 	for (h_AN=0; h_AN<=FNAN[Gc_AN]; h_AN++){
4898 
4899 	  Gh_AN = natn[Gc_AN][h_AN];
4900 	  Hwan = WhatSpecies[Gh_AN];
4901 	  tno2 = (List_YOUSO[35]+1)*(List_YOUSO[35]+1)*List_YOUSO[34];
4902 
4903 	  for (i=0; i<tno1; i++){
4904 	    for (j=0; j<tno2; j++){
4905 	      DS_VNA[0][Matomnum+1][h_AN][i][j] = tmp_array2[num];
4906 	      num++;
4907 	    }
4908 	  }
4909 	}
4910 
4911 	/* free tmp_array2 */
4912 	free(tmp_array2);
4913 
4914 	/*****************************************
4915                multiplying overlap integrals
4916 	*****************************************/
4917 
4918 #pragma omp parallel shared(List_YOUSO,time_per_atom,Gxyz,CDM0,SpinP_switch,CntHVNA2,HVNA2,DS_VNA,Cnt_switch,RMI1,Original_Mc_AN,IDR,Rcv_GAN,F_Rcv_Num_WK,Spe_Total_CNO,F_G2M,natn,FNAN,WhatSpecies,M2G,Matomnum) private(OMPID,Nthrds,Nprocs,Stime_atom,Etime_atom,dEx,dEy,dEz,Gc_AN,Mc_AN,Cwan,fan,h_AN,Gh_AN,Mh_AN,Hwan,ian,n,jg,j0,jg0,Mj_AN0,po2,q_AN,Gq_AN,Mq_AN,Qwan,jan,kl,HVNAx,HVNAy,HVNAz,i,j)
4919 	{
4920 
4921 	  /* allocation of array */
4922 
4923 	  HVNAx = (double**)malloc(sizeof(double*)*List_YOUSO[7]);
4924 	  for (j=0; j<List_YOUSO[7]; j++){
4925 	    HVNAx[j] = (double*)malloc(sizeof(double)*List_YOUSO[7]);
4926 	  }
4927 
4928 	  HVNAy = (double**)malloc(sizeof(double*)*List_YOUSO[7]);
4929 	  for (j=0; j<List_YOUSO[7]; j++){
4930 	    HVNAy[j] = (double*)malloc(sizeof(double)*List_YOUSO[7]);
4931 	  }
4932 
4933 	  HVNAz = (double**)malloc(sizeof(double*)*List_YOUSO[7]);
4934 	  for (j=0; j<List_YOUSO[7]; j++){
4935 	    HVNAz[j] = (double*)malloc(sizeof(double)*List_YOUSO[7]);
4936 	  }
4937 
4938 	  /* get info. on OpenMP */
4939 
4940 	  OMPID = omp_get_thread_num();
4941 	  Nthrds = omp_get_num_threads();
4942 	  Nprocs = omp_get_num_procs();
4943 
4944 	  for (Mc_AN=(OMPID*Matomnum/Nthrds+1); Mc_AN<((OMPID+1)*Matomnum/Nthrds+1); Mc_AN++){
4945 
4946 	    dtime(&Stime_atom);
4947 
4948 	    dEx = 0.0;
4949 	    dEy = 0.0;
4950 	    dEz = 0.0;
4951 
4952 	    Gc_AN = M2G[Mc_AN];
4953 	    Cwan = WhatSpecies[Gc_AN];
4954 	    fan = FNAN[Gc_AN];
4955 
4956 	    h_AN = 0;
4957 	    Gh_AN = natn[Gc_AN][h_AN];
4958 	    Mh_AN = F_G2M[Gh_AN];
4959 	    Hwan = WhatSpecies[Gh_AN];
4960 	    ian = Spe_Total_CNO[Hwan];
4961 
4962 	    n = F_Rcv_Num_WK[IDR];
4963 	    jg = Rcv_GAN[IDR][n];
4964 
4965 	    for (j0=0; j0<=fan; j0++){
4966 
4967 	      jg0 = natn[Gc_AN][j0];
4968 	      Mj_AN0 = F_G2M[jg0];
4969 
4970 	      po2 = 0;
4971 	      if (Original_Mc_AN==Mj_AN0){
4972 		po2 = 1;
4973 		q_AN = j0;
4974 	      }
4975 
4976 	      if (po2==1){
4977 
4978 		Gq_AN = natn[Gc_AN][q_AN];
4979 		Mq_AN = F_G2M[Gq_AN];
4980 		Qwan = WhatSpecies[Gq_AN];
4981 		jan = Spe_Total_CNO[Qwan];
4982 		kl = RMI1[Mc_AN][h_AN][q_AN];
4983 
4984 		if (Cnt_switch==0) {
4985 		  dHVNA(0,Mc_AN,h_AN,q_AN,DS_VNA,HVNA2,HVNA3,HVNAx,HVNAy,HVNAz);
4986 		}
4987 		else {
4988 		  dHVNA(0,Mc_AN,h_AN,q_AN,DS_VNA,CntHVNA2,CntHVNA3,HVNAx,HVNAy,HVNAz);
4989 		}
4990 
4991 		/* contribution of force = Trace(CDM0*dH) */
4992 		/* spin non-polarization */
4993 
4994 		if (SpinP_switch==0){
4995 
4996 		  for (i=0; i<ian; i++){
4997 		    for (j=0; j<jan; j++){
4998 		      if (q_AN==h_AN){
4999 
5000 			dEx += 2.0*CDM0[0][Mh_AN][kl][i][j]*HVNAx[i][j];
5001 			dEy += 2.0*CDM0[0][Mh_AN][kl][i][j]*HVNAy[i][j];
5002 			dEz += 2.0*CDM0[0][Mh_AN][kl][i][j]*HVNAz[i][j];
5003 		      }
5004 		      else{
5005 			dEx += 4.0*CDM0[0][Mh_AN][kl][i][j]*HVNAx[i][j];
5006 			dEy += 4.0*CDM0[0][Mh_AN][kl][i][j]*HVNAy[i][j];
5007 			dEz += 4.0*CDM0[0][Mh_AN][kl][i][j]*HVNAz[i][j];
5008 		      }
5009 		    }
5010 		  }
5011 		}
5012 
5013 		/* else */
5014 
5015 		else{
5016 
5017 		  for (i=0; i<ian; i++){
5018 		    for (j=0; j<jan; j++){
5019 		      if (q_AN==h_AN){
5020 			dEx += (  CDM0[0][Mh_AN][kl][i][j]
5021 			        + CDM0[1][Mh_AN][kl][i][j] )*HVNAx[i][j];
5022 			dEy += (  CDM0[0][Mh_AN][kl][i][j]
5023 			        + CDM0[1][Mh_AN][kl][i][j] )*HVNAy[i][j];
5024 			dEz += (  CDM0[0][Mh_AN][kl][i][j]
5025 				+ CDM0[1][Mh_AN][kl][i][j] )*HVNAz[i][j];
5026 		      }
5027 		      else{
5028 			dEx += 2.0*(  CDM0[0][Mh_AN][kl][i][j]
5029 				    + CDM0[1][Mh_AN][kl][i][j] )*HVNAx[i][j];
5030 			dEy += 2.0*(  CDM0[0][Mh_AN][kl][i][j]
5031 				    + CDM0[1][Mh_AN][kl][i][j] )*HVNAy[i][j];
5032 			dEz += 2.0*(  CDM0[0][Mh_AN][kl][i][j]
5033 				    + CDM0[1][Mh_AN][kl][i][j] )*HVNAz[i][j];
5034 		      }
5035 		    }
5036 		  }
5037 		}
5038 
5039 	      } /* if (po2==1) */
5040 	    } /* j0 */
5041 
5042 	      /* force from #4B */
5043 
5044 	    Gxyz[Gc_AN][41] += dEx;
5045 	    Gxyz[Gc_AN][42] += dEy;
5046 	    Gxyz[Gc_AN][43] += dEz;
5047 
5048 	    /* timing */
5049 	    dtime(&Etime_atom);
5050 	    time_per_atom[Gc_AN] += Etime_atom - Stime_atom;
5051 
5052 	  } /* Mc_AN */
5053 
5054 	    /* freeing of array */
5055 
5056 	  for (j=0; j<List_YOUSO[7]; j++){
5057 	    free(HVNAx[j]);
5058 	  }
5059 	  free(HVNAx);
5060 
5061 	  for (j=0; j<List_YOUSO[7]; j++){
5062 	    free(HVNAy[j]);
5063 	  }
5064 	  free(HVNAy);
5065 
5066 	  for (j=0; j<List_YOUSO[7]; j++){
5067 	    free(HVNAz[j]);
5068 	  }
5069 	  free(HVNAz);
5070 
5071 	} /* #pragma omp parallel */
5072 
5073 	  /********************************************
5074             increment of F_Rcv_Num_WK[IDR]
5075 	  ********************************************/
5076 
5077 	F_Rcv_Num_WK[IDR]++;
5078 
5079       } /* if ( 0<(F_Snd_Num[IDS]-F_Snd_Num_WK[IDS]) ) */
5080 
5081       if ( 0<(F_Snd_Num[IDS]-F_Snd_Num_WK[IDS]) ) {
5082 
5083 	MPI_Wait(&request,&stat);
5084 	free(tmp_array);  /* freeing of array */
5085 
5086 	/********************************************
5087              increment of F_Snd_Num_WK[IDS]
5088 	********************************************/
5089 
5090 	F_Snd_Num_WK[IDS]++;
5091       }
5092 
5093     } /* ID */
5094 
5095       /*****************************************************
5096         check whether all the communications have finished
5097       *****************************************************/
5098 
5099     po = 0;
5100     for (ID=0; ID<numprocs; ID++){
5101 
5102       IDS = (myid + ID) % numprocs;
5103       IDR = (myid - ID + numprocs) % numprocs;
5104 
5105       if ( 0<(F_Snd_Num[IDS]-F_Snd_Num_WK[IDS]) ) po += F_Snd_Num[IDS]-F_Snd_Num_WK[IDS];
5106       if ( 0<(F_Rcv_Num[IDR]-F_Rcv_Num_WK[IDR]) ) po += F_Rcv_Num[IDR]-F_Rcv_Num_WK[IDR];
5107     }
5108 
5109   } while (po!=0);
5110 
5111   dtime(&etime);
5112   if(myid==0 && measure_time){
5113     printf("Time for part3 of force#4=%18.5f\n",etime-stime);fflush(stdout);
5114   }
5115 
5116   /*******************************************************
5117    *******************************************************
5118       THE FIRST CASE:
5119       multiplying overlap integrals WITHOUT COMMUNICATION
5120    *******************************************************
5121    *******************************************************/
5122 
5123   dtime(&stime);
5124 
5125 #pragma omp parallel shared(time_per_atom,Gxyz,CDM0,SpinP_switch,CntHVNA2,HVNA2,DS_VNA,Cnt_switch,RMI1,FNAN,Spe_Total_CNO,WhatSpecies,F_G2M,natn,M2G,Matomnum,List_YOUSO) private(HVNAx,HVNAy,HVNAz,OMPID,Nthrds,Nprocs,Mc_AN,Stime_atom,Etime_atom,dEx,dEy,dEz,Gc_AN,h_AN,Gh_AN,Mh_AN,Hwan,ian,q_AN,Gq_AN,Mq_AN,Qwan,jan,kl,i,j,kk)
5126   {
5127 
5128     /* allocation of array */
5129 
5130     HVNAx = (double**)malloc(sizeof(double*)*List_YOUSO[7]);
5131     for (j=0; j<List_YOUSO[7]; j++){
5132       HVNAx[j] = (double*)malloc(sizeof(double)*List_YOUSO[7]);
5133     }
5134 
5135     HVNAy = (double**)malloc(sizeof(double*)*List_YOUSO[7]);
5136     for (j=0; j<List_YOUSO[7]; j++){
5137       HVNAy[j] = (double*)malloc(sizeof(double)*List_YOUSO[7]);
5138     }
5139 
5140     HVNAz = (double**)malloc(sizeof(double*)*List_YOUSO[7]);
5141     for (j=0; j<List_YOUSO[7]; j++){
5142       HVNAz[j] = (double*)malloc(sizeof(double)*List_YOUSO[7]);
5143     }
5144 
5145     /* get info. on OpenMP */
5146 
5147     OMPID = omp_get_thread_num();
5148     Nthrds = omp_get_num_threads();
5149     Nprocs = omp_get_num_procs();
5150 
5151     for (Mc_AN=(OMPID*Matomnum/Nthrds+1); Mc_AN<((OMPID+1)*Matomnum/Nthrds+1); Mc_AN++){
5152 
5153       dtime(&Stime_atom);
5154 
5155       dEx = 0.0;
5156       dEy = 0.0;
5157       dEz = 0.0;
5158 
5159       Gc_AN = M2G[Mc_AN];
5160       h_AN = 0;
5161       Gh_AN = natn[Gc_AN][h_AN];
5162       Mh_AN = F_G2M[Gh_AN];
5163       Hwan = WhatSpecies[Gh_AN];
5164       ian = Spe_Total_CNO[Hwan];
5165 
5166       for (q_AN=h_AN; q_AN<=FNAN[Gc_AN]; q_AN++){
5167 
5168 	Gq_AN = natn[Gc_AN][q_AN];
5169 	Mq_AN = F_G2M[Gq_AN];
5170 
5171 	if (Mq_AN<=Matomnum){
5172 
5173 	  Qwan = WhatSpecies[Gq_AN];
5174 	  jan = Spe_Total_CNO[Qwan];
5175 	  kl = RMI1[Mc_AN][h_AN][q_AN];
5176 
5177 	  if (Cnt_switch==0) {
5178 	    dHVNA(0,Mc_AN,h_AN,q_AN,DS_VNA,HVNA2,HVNA3,HVNAx,HVNAy,HVNAz);
5179 	  }
5180 	  else {
5181 	    dHVNA(0,Mc_AN,h_AN,q_AN,DS_VNA,CntHVNA2,CntHVNA3,HVNAx,HVNAy,HVNAz);
5182 	  }
5183 
5184 	  if (SpinP_switch==0){
5185 
5186 	    for (i=0; i<ian; i++){
5187 	      for (j=0; j<jan; j++){
5188 		if (q_AN==h_AN){
5189 		  dEx += 2.0*CDM0[0][Mh_AN][kl][i][j]*HVNAx[i][j];
5190 		  dEy += 2.0*CDM0[0][Mh_AN][kl][i][j]*HVNAy[i][j];
5191 		  dEz += 2.0*CDM0[0][Mh_AN][kl][i][j]*HVNAz[i][j];
5192 		}
5193 		else{
5194 		  dEx += 4.0*CDM0[0][Mh_AN][kl][i][j]*HVNAx[i][j];
5195 		  dEy += 4.0*CDM0[0][Mh_AN][kl][i][j]*HVNAy[i][j];
5196 		  dEz += 4.0*CDM0[0][Mh_AN][kl][i][j]*HVNAz[i][j];
5197 		}
5198 
5199 	      }
5200 	    }
5201 	  }
5202 
5203 	  /* else */
5204 
5205 	  else{
5206 
5207 	    for (i=0; i<ian; i++){
5208 	      for (j=0; j<jan; j++){
5209 		if (q_AN==h_AN){
5210 		  dEx += (  CDM0[0][Mh_AN][kl][i][j]
5211 			  + CDM0[1][Mh_AN][kl][i][j] )*HVNAx[i][j];
5212 		  dEy += (  CDM0[0][Mh_AN][kl][i][j]
5213 			  + CDM0[1][Mh_AN][kl][i][j] )*HVNAy[i][j];
5214 		  dEz += (  CDM0[0][Mh_AN][kl][i][j]
5215 			  + CDM0[1][Mh_AN][kl][i][j] )*HVNAz[i][j];
5216 		}
5217 		else{
5218 		  dEx += 2.0*(  CDM0[0][Mh_AN][kl][i][j]
5219 			      + CDM0[1][Mh_AN][kl][i][j] )*HVNAx[i][j];
5220 		  dEy += 2.0*(  CDM0[0][Mh_AN][kl][i][j]
5221 			      + CDM0[1][Mh_AN][kl][i][j] )*HVNAy[i][j];
5222 		  dEz += 2.0*(  CDM0[0][Mh_AN][kl][i][j]
5223 			      + CDM0[1][Mh_AN][kl][i][j] )*HVNAz[i][j];
5224 		}
5225 	      }
5226 	    }
5227 	  }
5228 	}
5229       }
5230 
5231       /* force from #4B */
5232 
5233       Gxyz[Gc_AN][41] += dEx;
5234       Gxyz[Gc_AN][42] += dEy;
5235       Gxyz[Gc_AN][43] += dEz;
5236 
5237       /* timing */
5238       dtime(&Etime_atom);
5239       time_per_atom[Gc_AN] += Etime_atom - Stime_atom;
5240 
5241     } /* Mc_AN */
5242 
5243       /* freeing of array */
5244 
5245     for (j=0; j<List_YOUSO[7]; j++){
5246       free(HVNAx[j]);
5247     }
5248     free(HVNAx);
5249 
5250     for (j=0; j<List_YOUSO[7]; j++){
5251       free(HVNAy[j]);
5252     }
5253     free(HVNAy);
5254 
5255     for (j=0; j<List_YOUSO[7]; j++){
5256       free(HVNAz[j]);
5257     }
5258     free(HVNAz);
5259 
5260   } /* #pragma omp parallel */
5261 
5262   dtime(&etime);
5263   if(myid==0 && measure_time){
5264     printf("Time for part4 of force#4=%18.5f\n",etime-stime);fflush(stdout);
5265   }
5266 
5267   /*************************************************************
5268      THE SECOND CASE:
5269      In case of I=k with I!=i and I!=j
5270      d [ \sum_k <i|k>ek<k|j> ]/dRI
5271   *************************************************************/
5272 
5273   /************************************************************
5274      MPI communication of DS_VNA whose basis part is not located
5275      on own site but projector part is located on own site.
5276   ************************************************************/
5277 
5278   MPI_Barrier(mpi_comm_level1);
5279   dtime(&stime);
5280 
5281   for (ID=0; ID<numprocs; ID++) Indicator[ID] = 0;
5282 
5283   for (Mc_AN=1; Mc_AN<=Max_Matomnum; Mc_AN++){
5284 
5285     dtime(&Stime_atom);
5286 
5287     dtime(&stime1);
5288 
5289     if (Mc_AN<=Matomnum)  Gc_AN = M2G[Mc_AN];
5290     else                  Gc_AN = 0;
5291 
5292     for (ID=0; ID<numprocs; ID++){
5293 
5294       IDS = (myid + ID) % numprocs;
5295       IDR = (myid - ID + numprocs) % numprocs;
5296 
5297       i = Indicator[IDS];
5298       po = 0;
5299 
5300       Gh_AN = Pro_Snd_GAtom[IDS][i];
5301 
5302       if (Gh_AN!=0){
5303 
5304 	/* find the range with the same global atomic number */
5305 
5306 	do {
5307 
5308 	  i++;
5309 	  if (Gh_AN!=Pro_Snd_GAtom[IDS][i]) po = 1;
5310 	} while(po==0);
5311 
5312 	i--;
5313 	SA_num = i - Indicator[IDS] + 1;
5314 
5315 	/* find the data size to send the block data */
5316 
5317 	size1 = 0;
5318 	for (q=Indicator[IDS]; q<=(Indicator[IDS]+SA_num-1); q++){
5319 
5320 	  Sc_AN = Pro_Snd_MAtom[IDS][q];
5321 	  GSc_AN = F_M2G[Sc_AN];
5322 	  Sc_wan = WhatSpecies[GSc_AN];
5323 	  tno1 = Spe_Total_CNO[Sc_wan];
5324 	  tno2 = (List_YOUSO[35]+1)*(List_YOUSO[35]+1)*List_YOUSO[34];
5325 	  size1 += 4*tno1*tno2;
5326 	  size1 += 3;
5327 	}
5328 
5329       } /* if (Gh_AN!=0) */
5330 
5331       else {
5332 	SA_num = 0;
5333 	size1 = 0;
5334       }
5335 
5336       S_array[IDS][0] = Gh_AN;
5337       S_array[IDS][1] = SA_num;
5338       S_array[IDS][2] = size1;
5339 
5340       if (ID!=0){
5341 	MPI_Isend(&S_array[IDS][0], 3, MPI_INT, IDS, tag, mpi_comm_level1, &request);
5342 	MPI_Recv( &R_array[IDR][0], 3, MPI_INT, IDR, tag, mpi_comm_level1, &stat);
5343 	MPI_Wait(&request,&stat);
5344       }
5345       else {
5346 	R_array[myid][0] = S_array[myid][0];
5347 	R_array[myid][1] = S_array[myid][1];
5348 	R_array[myid][2] = S_array[myid][2];
5349       }
5350 
5351       if (R_array[IDR][0]==Gc_AN) R_comm_flag = 1;
5352       else                        R_comm_flag = 0;
5353 
5354       if (ID!=0){
5355 	MPI_Isend(&R_comm_flag, 1, MPI_INT, IDR, tag, mpi_comm_level1, &request);
5356 	MPI_Recv( &S_comm_flag, 1, MPI_INT, IDS, tag, mpi_comm_level1, &stat);
5357 	MPI_Wait(&request,&stat);
5358       }
5359       else{
5360 	S_comm_flag = R_comm_flag;
5361       }
5362 
5363       /*****************************************
5364                        send the data
5365       *****************************************/
5366 
5367       /* if (S_comm_flag==1) then, send data to IDS */
5368 
5369       if (S_comm_flag==1){
5370 
5371 	/* allocate tmp_array */
5372 
5373 	tmp_array = (Type_DS_VNA*)malloc(sizeof(Type_DS_VNA)*size1);
5374 
5375 	/* multidimentional array to vector array */
5376 
5377 	num = 0;
5378 
5379 	for (q=Indicator[IDS]; q<=(Indicator[IDS]+SA_num-1); q++){
5380 
5381 	  Sc_AN = Pro_Snd_MAtom[IDS][q];
5382 	  GSc_AN = F_M2G[Sc_AN];
5383 	  Sc_wan = WhatSpecies[GSc_AN];
5384 	  tno1 = Spe_Total_CNO[Sc_wan];
5385 
5386 	  Sh_AN = Pro_Snd_LAtom[IDS][q];
5387 	  GSh_AN = natn[GSc_AN][Sh_AN];
5388 	  Sh_wan = WhatSpecies[GSh_AN];
5389 	  tno2 = (List_YOUSO[35]+1)*(List_YOUSO[35]+1)*List_YOUSO[34];
5390 
5391 	  Sh_AN2 = Pro_Snd_LAtom2[IDS][q];
5392 
5393 	  tmp_array[num] = (Type_DS_VNA)Sc_AN;  num++;
5394 	  tmp_array[num] = (Type_DS_VNA)Sh_AN;  num++;
5395 	  tmp_array[num] = (Type_DS_VNA)Sh_AN2; num++;
5396 
5397 	  for (kk=0; kk<=3; kk++){
5398 	    for (i=0; i<tno1; i++){
5399 	      for (j=0; j<tno2; j++){
5400 		tmp_array[num] = DS_VNA[kk][Sc_AN][Sh_AN][i][j];
5401 		num++;
5402 	      }
5403 	    }
5404 	  }
5405 	}
5406 
5407 	if (ID!=0){
5408 	  MPI_Isend(&tmp_array[0], size1, MPI_Type_DS_VNA, IDS, tag, mpi_comm_level1, &request);
5409 	}
5410 
5411 	/* update Indicator[IDS] */
5412 
5413 	Indicator[IDS] += SA_num;
5414 
5415       } /* if (S_comm_flag==1) */
5416 
5417         /*****************************************
5418                      receive the data
5419 	*****************************************/
5420 
5421         /* if (R_comm_flag==1) then, receive the data from IDR */
5422 
5423       if (R_comm_flag==1){
5424 
5425 	size2 = R_array[IDR][2];
5426 	tmp_array2 = (Type_DS_VNA*)malloc(sizeof(Type_DS_VNA)*size2);
5427 
5428 	if (ID!=0){
5429 	  MPI_Recv(&tmp_array2[0], size2, MPI_Type_DS_VNA, IDR, tag, mpi_comm_level1, &stat);
5430 	}
5431 	else{
5432 	  for (i=0; i<size2; i++) tmp_array2[i] = tmp_array[i];
5433 	}
5434 
5435 	/* store */
5436 
5437 	num = 0;
5438 
5439 	for (n=0; n<R_array[IDR][1]; n++){
5440 
5441 	  Sc_AN  = (int)tmp_array2[num]; num++;
5442 	  Sh_AN  = (int)tmp_array2[num]; num++;
5443 	  Sh_AN2 = (int)tmp_array2[num]; num++;
5444 
5445 	  GSc_AN = natn[Gc_AN][Sh_AN2];
5446 	  Sc_wan = WhatSpecies[GSc_AN];
5447 
5448 	  tno1 = Spe_Total_CNO[Sc_wan];
5449 	  tno2 = (List_YOUSO[35]+1)*(List_YOUSO[35]+1)*List_YOUSO[34];
5450 
5451 	  for (kk=0; kk<=3; kk++){
5452 	    for (i=0; i<tno1; i++){
5453 	      for (j=0; j<tno2; j++){
5454 		DS_VNA[kk][Matomnum+1][Sh_AN2][i][j] = tmp_array2[num];
5455 		num++;
5456 	      }
5457 	    }
5458 	  }
5459 	}
5460 
5461 	/* free tmp_array2 */
5462 	free(tmp_array2);
5463 
5464       } /* if (R_comm_flag==1) */
5465 
5466       if (S_comm_flag==1){
5467 	if (ID!=0) MPI_Wait(&request,&stat);
5468 	free(tmp_array);  /* freeing of array */
5469       }
5470 
5471     } /* ID */
5472 
5473     dtime(&etime1);
5474     if(myid==0 && measure_time){
5475       printf("Time for part5A of force#4=%18.5f\n",etime1-stime1);fflush(stdout);
5476     }
5477 
5478     dtime(&stime1);
5479 
5480     if (Mc_AN<=Matomnum){
5481 
5482       /* get Nthrds0 */
5483 #pragma omp parallel shared(Nthrds0)
5484       {
5485 	Nthrds0 = omp_get_num_threads();
5486       }
5487 
5488       /* allocation of arrays */
5489       dEx_threads = (double*)malloc(sizeof(double)*Nthrds0);
5490       dEy_threads = (double*)malloc(sizeof(double)*Nthrds0);
5491       dEz_threads = (double*)malloc(sizeof(double)*Nthrds0);
5492 
5493       for (Nloop=0; Nloop<Nthrds0; Nloop++){
5494 	dEx_threads[Nloop] = 0.0;
5495 	dEy_threads[Nloop] = 0.0;
5496 	dEz_threads[Nloop] = 0.0;
5497       }
5498 
5499       /* one-dimensionalize the h_AN and q_AN loops */
5500 
5501       OneD2h_AN = (int*)malloc(sizeof(int)*(FNAN[Gc_AN]+1)*(FNAN[Gc_AN]+2));
5502       OneD2q_AN = (int*)malloc(sizeof(int)*(FNAN[Gc_AN]+1)*(FNAN[Gc_AN]+2));
5503 
5504       ODNloop = 0;
5505       for (h_AN=0; h_AN<=FNAN[Gc_AN]; h_AN++){
5506 
5507 	if ( Solver==5 || Solver==8 )
5508 	  start_q_AN = 0;
5509 	else
5510 	  start_q_AN = h_AN;
5511 
5512 	for (q_AN=start_q_AN; q_AN<=FNAN[Gc_AN]; q_AN++){
5513 
5514 	  kl = RMI1[Mc_AN][h_AN][q_AN];
5515 
5516 	  if (0<=kl){
5517 	    OneD2h_AN[ODNloop] = h_AN;
5518 	    OneD2q_AN[ODNloop] = q_AN;
5519 	    ODNloop++;
5520 	  }
5521 	}
5522       }
5523 
5524 #pragma omp parallel shared(ODNloop,OneD2h_AN,OneD2q_AN,Mc_AN,Gc_AN,dEx_threads,dEy_threads,dEz_threads,CDM0,SpinP_switch,CntHVNA2,HVNA2,DS_VNA,Cnt_switch,RMI1,Spe_Total_CNO,WhatSpecies,F_G2M,natn,FNAN,List_YOUSO,Solver) private(OMPID,Nthrds,Nprocs,HVNAx,HVNAy,HVNAz,i,j,h_AN,Gh_AN,Mh_AN,Hwan,ian,q_AN,Gq_AN,Mq_AN,Qwan,jan,kl,Nloop,pref)
5525       {
5526 
5527 	/* allocation of arrays */
5528 
5529 	HVNAx = (double**)malloc(sizeof(double*)*List_YOUSO[7]);
5530 	for (j=0; j<List_YOUSO[7]; j++){
5531 	  HVNAx[j] = (double*)malloc(sizeof(double)*List_YOUSO[7]);
5532 	}
5533 
5534 	HVNAy = (double**)malloc(sizeof(double*)*List_YOUSO[7]);
5535 	for (j=0; j<List_YOUSO[7]; j++){
5536 	  HVNAy[j] = (double*)malloc(sizeof(double)*List_YOUSO[7]);
5537 	}
5538 
5539 	HVNAz = (double**)malloc(sizeof(double*)*List_YOUSO[7]);
5540 	for (j=0; j<List_YOUSO[7]; j++){
5541 	  HVNAz[j] = (double*)malloc(sizeof(double)*List_YOUSO[7]);
5542 	}
5543 
5544 	/* get info. on OpenMP */
5545 
5546 	OMPID = omp_get_thread_num();
5547 	Nthrds = omp_get_num_threads();
5548 	Nprocs = omp_get_num_procs();
5549 
5550 	for (Nloop=OMPID*ODNloop/Nthrds; Nloop<(OMPID+1)*ODNloop/Nthrds; Nloop++){
5551 
5552 	  /* get h_AN and q_AN */
5553 
5554 	  h_AN = OneD2h_AN[Nloop];
5555 	  q_AN = OneD2q_AN[Nloop];
5556 
5557 	  /* set informations on h_AN */
5558 
5559 	  Gh_AN = natn[Gc_AN][h_AN];
5560 	  Mh_AN = F_G2M[Gh_AN];
5561 	  Hwan = WhatSpecies[Gh_AN];
5562 	  ian = Spe_Total_CNO[Hwan];
5563 
5564 	  /* set informations on q_AN */
5565 
5566 	  Gq_AN = natn[Gc_AN][q_AN];
5567 	  Mq_AN = F_G2M[Gq_AN];
5568 	  Qwan = WhatSpecies[Gq_AN];
5569 	  jan = Spe_Total_CNO[Qwan];
5570 	  kl = RMI1[Mc_AN][h_AN][q_AN];
5571 
5572 	  if (0<=kl){
5573 
5574 	    if (Cnt_switch==0)
5575 	      dHVNA(1,Mc_AN,h_AN,q_AN,DS_VNA,HVNA2,HVNA3,HVNAx,HVNAy,HVNAz);
5576 	    else
5577 	      dHVNA(1,Mc_AN,h_AN,q_AN,DS_VNA,CntHVNA2,CntHVNA3,HVNAx,HVNAy,HVNAz);
5578 
5579 	    /* contribution of force = Trace(CDM0*dH) */
5580 
5581 	    /* spin non-polarization */
5582 
5583 	    if (SpinP_switch==0){
5584 
5585               if (Solver==5 || Solver==8){
5586 	        pref = 2.0;
5587               }
5588               else {
5589 	        if (q_AN==h_AN) pref = 2.0;
5590   	        else            pref = 4.0;
5591               }
5592 
5593 	      for (i=0; i<ian; i++){
5594 		for (j=0; j<jan; j++){
5595 		  dEx_threads[OMPID] += pref*CDM0[0][Mh_AN][kl][i][j]*HVNAx[i][j];
5596 		  dEy_threads[OMPID] += pref*CDM0[0][Mh_AN][kl][i][j]*HVNAy[i][j];
5597 		  dEz_threads[OMPID] += pref*CDM0[0][Mh_AN][kl][i][j]*HVNAz[i][j];
5598 		}
5599 	      }
5600 	    }
5601 
5602 	    /* else */
5603 
5604 	    else{
5605 
5606               if (Solver==5 || Solver==8){
5607 	        pref = 1.0;
5608               }
5609               else {
5610 	        if (q_AN==h_AN) pref = 1.0;
5611   	        else            pref = 2.0;
5612               }
5613 
5614 	      for (i=0; i<ian; i++){
5615 		for (j=0; j<jan; j++){
5616 		  dEx_threads[OMPID] += pref*(   CDM0[0][Mh_AN][kl][i][j]
5617 			 	               + CDM0[1][Mh_AN][kl][i][j] )*HVNAx[i][j];
5618 		  dEy_threads[OMPID] += pref*(   CDM0[0][Mh_AN][kl][i][j]
5619 					       + CDM0[1][Mh_AN][kl][i][j] )*HVNAy[i][j];
5620 		  dEz_threads[OMPID] += pref*(   CDM0[0][Mh_AN][kl][i][j]
5621 					       + CDM0[1][Mh_AN][kl][i][j] )*HVNAz[i][j];
5622 		}
5623 	      }
5624 	    }
5625 
5626 	  } /* if (0<=kl) */
5627 
5628 	} /* Nloop */
5629 
5630 	  /* freeing of arrays */
5631 
5632 	for (j=0; j<List_YOUSO[7]; j++){
5633 	  free(HVNAx[j]);
5634 	}
5635 	free(HVNAx);
5636 
5637 	for (j=0; j<List_YOUSO[7]; j++){
5638 	  free(HVNAy[j]);
5639 	}
5640 	free(HVNAy);
5641 
5642 	for (j=0; j<List_YOUSO[7]; j++){
5643 	  free(HVNAz[j]);
5644 	}
5645 	free(HVNAz);
5646 
5647       } /* #pragma omp parallel */
5648 
5649 	/* sum of dEx_threads */
5650 
5651       dEx = 0.0;
5652       dEy = 0.0;
5653       dEz = 0.0;
5654 
5655       for (Nloop=0; Nloop<Nthrds0; Nloop++){
5656 	dEx += dEx_threads[Nloop];
5657 	dEy += dEy_threads[Nloop];
5658 	dEz += dEz_threads[Nloop];
5659       }
5660 
5661       /* force from #4B */
5662 
5663       Gxyz[Gc_AN][41] += dEx;
5664       Gxyz[Gc_AN][42] += dEy;
5665       Gxyz[Gc_AN][43] += dEz;
5666 
5667       /* timing */
5668       dtime(&Etime_atom);
5669       time_per_atom[Gc_AN] += Etime_atom - Stime_atom;
5670 
5671       /* freeing of array */
5672       free(OneD2q_AN);
5673       free(OneD2h_AN);
5674       free(dEx_threads);
5675       free(dEy_threads);
5676       free(dEz_threads);
5677 
5678     } /* if (Mc_AN<=Matomnum) */
5679 
5680     dtime(&etime1);
5681     if(myid==0 && measure_time){
5682       printf("Time for part5B of force#4=%18.5f\n",etime1-stime1);fflush(stdout);
5683     }
5684 
5685   } /* Mc_AN */
5686 
5687   dtime(&etime);
5688   if(myid==0 && measure_time){
5689     printf("Time for part5 of force#4=%18.5f\n",etime-stime);fflush(stdout);
5690   }
5691 
5692   for (Mc_AN=1; Mc_AN<=Matomnum; Mc_AN++){
5693     Gc_AN = M2G[Mc_AN];
5694 
5695     if (2<=level_stdout){
5696       printf("<Force>  force(4B) myid=%2d  Mc_AN=%2d Gc_AN=%2d  %15.12f %15.12f %15.12f\n",
5697 	     myid,Mc_AN,Gc_AN,Gxyz[Gc_AN][41],Gxyz[Gc_AN][42],Gxyz[Gc_AN][43]);fflush(stdout);
5698     }
5699 
5700     Gxyz[Gc_AN][17] += Gxyz[Gc_AN][41];
5701     Gxyz[Gc_AN][18] += Gxyz[Gc_AN][42];
5702     Gxyz[Gc_AN][19] += Gxyz[Gc_AN][43];
5703   }
5704 
5705   /***********************************
5706             freeing of arrays
5707   ************************************/
5708 
5709   free(Indicator);
5710 
5711   for (ID=0; ID<numprocs; ID++){
5712     free(S_array[ID]);
5713   }
5714   free(S_array);
5715 
5716   for (ID=0; ID<numprocs; ID++){
5717     free(R_array[ID]);
5718   }
5719   free(R_array);
5720 
5721   free(Snd_DS_VNA_Size);
5722   free(Rcv_DS_VNA_Size);
5723 
5724   free(VNA_List);
5725   free(VNA_List2);
5726 
5727 }
5728 
5729 
5730 
5731 
dHNL(int where_flag,int Mc_AN,int h_AN,int q_AN,double ****** DS_NL1,dcomplex *** Hx,dcomplex *** Hy,dcomplex *** Hz)5732 void dHNL(int where_flag,
5733           int Mc_AN, int h_AN, int q_AN,
5734           double ******DS_NL1,
5735           dcomplex ***Hx, dcomplex ***Hy, dcomplex ***Hz)
5736 {
5737   int i,j,k,m,n,l,kg,kan,so,deri_kind;
5738   int ig,ian,jg,jan,kl,kl1,kl2;
5739   int wakg,l1,l2,l3,Gc_AN,Mi_AN,Mi_AN2,Mj_AN,Mj_AN2;
5740   int Rni,Rnj,somax;
5741   double PF[2],sumx,sumy,sumz,ene,dmp,deri_dmp;
5742   double tmpx,tmpy,tmpz,tmp,r;
5743   double x0,y0,z0,x1,y1,z1,dx,dy,dz;
5744   double rcuti,rcutj,rcut;
5745   double PFp,PFm,ene_p,ene_m;
5746   dcomplex sumx0,sumy0,sumz0;
5747   dcomplex sumx1,sumy1,sumz1;
5748   dcomplex sumx2,sumy2,sumz2;
5749 
5750   /****************************************************
5751    start calc.
5752   ****************************************************/
5753 
5754   Gc_AN = M2G[Mc_AN];
5755   ig = natn[Gc_AN][h_AN];
5756   Rni = ncn[Gc_AN][h_AN];
5757   Mi_AN = F_G2M[ig];
5758   ian = Spe_Total_CNO[WhatSpecies[ig]];
5759   rcuti = Spe_Atom_Cut1[WhatSpecies[ig]];
5760 
5761   jg = natn[Gc_AN][q_AN];
5762   Rnj = ncn[Gc_AN][q_AN];
5763   Mj_AN = F_G2M[jg];
5764   jan = Spe_Total_CNO[WhatSpecies[jg]];
5765   rcutj = Spe_Atom_Cut1[WhatSpecies[jg]];
5766 
5767   rcut = rcuti + rcutj;
5768   kl = RMI1[Mc_AN][h_AN][q_AN];
5769   dmp = dampingF(rcut,Dis[ig][kl]);
5770 
5771   for (so=0; so<3; so++){
5772     for (i=0; i<List_YOUSO[7]; i++){
5773       for (j=0; j<List_YOUSO[7]; j++){
5774 	Hx[so][i][j] = Complex(0.0,0.0);
5775 	Hy[so][i][j] = Complex(0.0,0.0);
5776 	Hz[so][i][j] = Complex(0.0,0.0);
5777       }
5778     }
5779   }
5780 
5781   if (h_AN==0){
5782 
5783     /****************************************************
5784                           dH*ep*H
5785     ****************************************************/
5786 
5787     for (k=0; k<=FNAN[Gc_AN]; k++){
5788 
5789       kg = natn[Gc_AN][k];
5790       wakg = WhatSpecies[kg];
5791       kan = Spe_Total_VPS_Pro[wakg];
5792       kl = RMI1[Mc_AN][q_AN][k];
5793 
5794       /****************************************************
5795                    l-dependent non-local part
5796       ****************************************************/
5797 
5798       if (0<=kl && VPS_j_dependency[wakg]==0 && where_flag==0){
5799 
5800 	for (m=0; m<ian; m++){
5801 	  for (n=0; n<jan; n++){
5802 
5803 	    sumx = 0.0;
5804 	    sumy = 0.0;
5805 	    sumz = 0.0;
5806 
5807 	    l = 0;
5808 	    for (l1=1; l1<=Spe_Num_RVPS[wakg]; l1++){
5809 
5810 	      ene = Spe_VNLE[0][wakg][l1-1];
5811 	      if      (Spe_VPS_List[wakg][l1]==0) l2 = 0;
5812 	      else if (Spe_VPS_List[wakg][l1]==1) l2 = 2;
5813 	      else if (Spe_VPS_List[wakg][l1]==2) l2 = 4;
5814 	      else if (Spe_VPS_List[wakg][l1]==3) l2 = 6;
5815 
5816               if (Mj_AN<=Matomnum) Mj_AN2 = Mj_AN;
5817               else                 Mj_AN2 = Matomnum + 1;
5818 
5819 	      for (l3=0; l3<=l2; l3++){
5820 		sumx += ene*DS_NL1[0][1][Mc_AN][k][m][l]*DS_NL1[0][0][Mj_AN2][kl][n][l];
5821 		sumy += ene*DS_NL1[0][2][Mc_AN][k][m][l]*DS_NL1[0][0][Mj_AN2][kl][n][l];
5822 		sumz += ene*DS_NL1[0][3][Mc_AN][k][m][l]*DS_NL1[0][0][Mj_AN2][kl][n][l];
5823 		l++;
5824 	      }
5825 	    }
5826 
5827 	    Hx[0][m][n].r += sumx;
5828 	    Hy[0][m][n].r += sumy;
5829 	    Hz[0][m][n].r += sumz;
5830 
5831 	    Hx[1][m][n].r += sumx;
5832 	    Hy[1][m][n].r += sumy;
5833 	    Hz[1][m][n].r += sumz;
5834 
5835 	  } /* n */
5836 	} /* m */
5837 
5838       } /* if */
5839 
5840       /****************************************************
5841                    j-dependent non-local part
5842       ****************************************************/
5843 
5844       else if ( 0<=kl && VPS_j_dependency[wakg]==1 && where_flag==0 ){
5845 
5846 	for (m=0; m<ian; m++){
5847 	  for (n=0; n<jan; n++){
5848 
5849 	    sumx0 = Complex(0.0,0.0);
5850 	    sumy0 = Complex(0.0,0.0);
5851 	    sumz0 = Complex(0.0,0.0);
5852 
5853 	    sumx1 = Complex(0.0,0.0);
5854 	    sumy1 = Complex(0.0,0.0);
5855 	    sumz1 = Complex(0.0,0.0);
5856 
5857 	    sumx2 = Complex(0.0,0.0);
5858 	    sumy2 = Complex(0.0,0.0);
5859 	    sumz2 = Complex(0.0,0.0);
5860 
5861             if (Mj_AN<=Matomnum) Mj_AN2 = Mj_AN;
5862             else                 Mj_AN2 = Matomnum + 1;
5863 
5864 	    l = 0;
5865 	    for (l1=1; l1<=Spe_Num_RVPS[wakg]; l1++){
5866 
5867 	      ene_p = Spe_VNLE[0][wakg][l1-1];
5868 	      ene_m = Spe_VNLE[1][wakg][l1-1];
5869 
5870 	      if      (Spe_VPS_List[wakg][l1]==0) { l2=0; PFp=1.0;     PFm=0.0;     }
5871 	      else if (Spe_VPS_List[wakg][l1]==1) { l2=2; PFp=2.0/3.0; PFm=1.0/3.0; }
5872 	      else if (Spe_VPS_List[wakg][l1]==2) { l2=4; PFp=3.0/5.0; PFm=2.0/5.0; }
5873 	      else if (Spe_VPS_List[wakg][l1]==3) { l2=6; PFp=4.0/7.0; PFm=3.0/7.0; }
5874 
5875 	      dHNL_SO(&sumx0.r,&sumy0.r,&sumz0.r,
5876                       &sumx1.r,&sumy1.r,&sumz1.r,
5877                       &sumx2.r,&sumy2.r,&sumz2.r,
5878                       &sumx0.i,&sumy0.i,&sumz0.i,
5879                       &sumx1.i,&sumy1.i,&sumz1.i,
5880                       &sumx2.i,&sumy2.i,&sumz2.i,
5881                       1.0,
5882                       PFp, PFm,
5883                       ene_p,ene_m,
5884                       l2, &l,
5885                       Mc_AN ,k, m,
5886                       Mj_AN2,kl,n,
5887                       DS_NL1);
5888 	    }
5889 
5890             if (q_AN==0){
5891 
5892 	      l = 0;
5893 	      for (l1=1; l1<=Spe_Num_RVPS[wakg]; l1++){
5894 
5895 		ene_p = Spe_VNLE[0][wakg][l1-1];
5896 		ene_m = Spe_VNLE[1][wakg][l1-1];
5897 
5898 		if      (Spe_VPS_List[wakg][l1]==0) { l2=0; PFp=1.0;     PFm=0.0;     }
5899 		else if (Spe_VPS_List[wakg][l1]==1) { l2=2; PFp=2.0/3.0; PFm=1.0/3.0; }
5900 		else if (Spe_VPS_List[wakg][l1]==2) { l2=4; PFp=3.0/5.0; PFm=2.0/5.0; }
5901 		else if (Spe_VPS_List[wakg][l1]==3) { l2=6; PFp=4.0/7.0; PFm=3.0/7.0; }
5902 
5903 		dHNL_SO(&sumx0.r,&sumy0.r,&sumz0.r,
5904 			&sumx1.r,&sumy1.r,&sumz1.r,
5905 			&sumx2.r,&sumy2.r,&sumz2.r,
5906 			&sumx0.i,&sumy0.i,&sumz0.i,
5907 			&sumx1.i,&sumy1.i,&sumz1.i,
5908                         &sumx2.i,&sumy2.i,&sumz2.i,
5909 			-1.0,
5910 			PFp, PFm,
5911 			ene_p,ene_m,
5912 			l2, &l,
5913 			Mj_AN2, kl, n,
5914 			Mc_AN,  k,  m,
5915 			DS_NL1);
5916 
5917 	      }
5918 	    }
5919 
5920 	    Hx[0][m][n].r += sumx0.r;     /* up-up */
5921 	    Hy[0][m][n].r += sumy0.r;     /* up-up */
5922 	    Hz[0][m][n].r += sumz0.r;     /* up-up */
5923 
5924 	    Hx[1][m][n].r += sumx1.r;     /* dn-dn */
5925 	    Hy[1][m][n].r += sumy1.r;     /* dn-dn */
5926 	    Hz[1][m][n].r += sumz1.r;     /* dn-dn */
5927 
5928 	    Hx[2][m][n].r += sumx2.r;     /* up-dn */
5929 	    Hy[2][m][n].r += sumy2.r;     /* up-dn */
5930 	    Hz[2][m][n].r += sumz2.r;     /* up-dn */
5931 
5932 	    Hx[0][m][n].i += sumx0.i;     /* up-up */
5933 	    Hy[0][m][n].i += sumy0.i;     /* up-up */
5934 	    Hz[0][m][n].i += sumz0.i;     /* up-up */
5935 
5936 	    Hx[1][m][n].i += sumx1.i;     /* dn-dn */
5937 	    Hy[1][m][n].i += sumy1.i;     /* dn-dn */
5938 	    Hz[1][m][n].i += sumz1.i;     /* dn-dn */
5939 
5940 	    Hx[2][m][n].i += sumx2.i;     /* up-dn */
5941 	    Hy[2][m][n].i += sumy2.i;     /* up-dn */
5942 	    Hz[2][m][n].i += sumz2.i;     /* up-dn */
5943 
5944 	  }
5945 	}
5946       }
5947 
5948     } /* k */
5949 
5950     /****************************************************
5951                            H*ep*dH
5952     ****************************************************/
5953 
5954     /* h_AN==0 && q_AN==0 */
5955 
5956     if (q_AN==0 && VPS_j_dependency[wakg]==0){
5957 
5958       for (m=0; m<ian; m++){
5959         for (n=m; n<jan; n++){
5960 
5961           tmpx = Hx[0][m][n].r + Hx[0][n][m].r;
5962           Hx[0][m][n].r = tmpx;
5963           Hx[0][n][m].r = tmpx;
5964           Hx[1][m][n].r = tmpx;
5965           Hx[1][n][m].r = tmpx;
5966 
5967           tmpy = Hy[0][m][n].r + Hy[0][n][m].r;
5968           Hy[0][m][n].r = tmpy;
5969           Hy[0][n][m].r = tmpy;
5970           Hy[1][m][n].r = tmpy;
5971           Hy[1][n][m].r = tmpy;
5972 
5973           tmpz = Hz[0][m][n].r + Hz[0][n][m].r;
5974           Hz[0][m][n].r = tmpz;
5975           Hz[0][n][m].r = tmpz;
5976           Hz[1][m][n].r = tmpz;
5977           Hz[1][n][m].r = tmpz;
5978         }
5979       }
5980     }
5981 
5982     else if (where_flag==1){
5983 
5984       kg = natn[Gc_AN][0];
5985       wakg = WhatSpecies[kg];
5986       kan = Spe_Total_VPS_Pro[wakg];
5987       kl = RMI1[Mc_AN][q_AN][0];
5988 
5989       /****************************************************
5990                    l-dependent non-local part
5991       ****************************************************/
5992 
5993       if (VPS_j_dependency[wakg]==0){
5994 
5995 	for (m=0; m<ian; m++){
5996 	  for (n=0; n<jan; n++){
5997 
5998 	    sumx = 0.0;
5999 	    sumy = 0.0;
6000 	    sumz = 0.0;
6001 
6002             if (Mj_AN<=Matomnum){
6003               Mj_AN2 = Mj_AN;
6004  	      kl2 = RMI1[Mc_AN][q_AN][0];
6005 	    }
6006             else{
6007               Mj_AN2 = Matomnum + 1;
6008 	      kl2 = RMI1[Mc_AN][0][q_AN];
6009 	    }
6010 
6011 	    l = 0;
6012 	    for (l1=1; l1<=Spe_Num_RVPS[wakg]; l1++){
6013 
6014 	      ene = Spe_VNLE[0][wakg][l1-1];
6015 	      if      (Spe_VPS_List[wakg][l1]==0) l2 = 0;
6016 	      else if (Spe_VPS_List[wakg][l1]==1) l2 = 2;
6017 	      else if (Spe_VPS_List[wakg][l1]==2) l2 = 4;
6018 	      else if (Spe_VPS_List[wakg][l1]==3) l2 = 6;
6019 
6020 	      for (l3=0; l3<=l2; l3++){
6021 
6022 		sumx -= ene*DS_NL1[0][0][Mc_AN][0][m][l]*DS_NL1[0][1][Mj_AN2][kl2][n][l];
6023 		sumy -= ene*DS_NL1[0][0][Mc_AN][0][m][l]*DS_NL1[0][2][Mj_AN2][kl2][n][l];
6024 		sumz -= ene*DS_NL1[0][0][Mc_AN][0][m][l]*DS_NL1[0][3][Mj_AN2][kl2][n][l];
6025 		l++;
6026 	      }
6027 	    }
6028 
6029 	    Hx[0][m][n].r += sumx;
6030 	    Hy[0][m][n].r += sumy;
6031 	    Hz[0][m][n].r += sumz;
6032 
6033 	    Hx[1][m][n].r += sumx;
6034 	    Hy[1][m][n].r += sumy;
6035 	    Hz[1][m][n].r += sumz;
6036 	  }
6037 	}
6038       }
6039 
6040       /****************************************************
6041                    j-dependent non-local part
6042       ****************************************************/
6043 
6044       else if ( VPS_j_dependency[wakg]==1 ){
6045 
6046 	for (m=0; m<ian; m++){
6047 	  for (n=0; n<jan; n++){
6048 
6049 	    sumx0 = Complex(0.0,0.0);
6050 	    sumy0 = Complex(0.0,0.0);
6051 	    sumz0 = Complex(0.0,0.0);
6052 
6053 	    sumx1 = Complex(0.0,0.0);
6054 	    sumy1 = Complex(0.0,0.0);
6055 	    sumz1 = Complex(0.0,0.0);
6056 
6057 	    sumx2 = Complex(0.0,0.0);
6058 	    sumy2 = Complex(0.0,0.0);
6059 	    sumz2 = Complex(0.0,0.0);
6060 
6061             if (Mj_AN<=Matomnum){
6062               Mj_AN2 = Mj_AN;
6063  	      kl2 = RMI1[Mc_AN][q_AN][0];
6064 	    }
6065             else{
6066               Mj_AN2 = Matomnum + 1;
6067 	      kl2 = RMI1[Mc_AN][0][q_AN];
6068 	    }
6069 
6070 	    l = 0;
6071 	    for (l1=1; l1<=Spe_Num_RVPS[wakg]; l1++){
6072 
6073 	      ene_p = Spe_VNLE[0][wakg][l1-1];
6074 	      ene_m = Spe_VNLE[1][wakg][l1-1];
6075 
6076 	      if      (Spe_VPS_List[wakg][l1]==0) { l2=0; PFp=1.0;     PFm=0.0;     }
6077 	      else if (Spe_VPS_List[wakg][l1]==1) { l2=2; PFp=2.0/3.0; PFm=1.0/3.0; }
6078 	      else if (Spe_VPS_List[wakg][l1]==2) { l2=4; PFp=3.0/5.0; PFm=2.0/5.0; }
6079 	      else if (Spe_VPS_List[wakg][l1]==3) { l2=6; PFp=4.0/7.0; PFm=3.0/7.0; }
6080 
6081        	      /* 1 */
6082 
6083 	      dHNL_SO(&sumx0.r,&sumy0.r,&sumz0.r,
6084                       &sumx1.r,&sumy1.r,&sumz1.r,
6085                       &sumx2.r,&sumy2.r,&sumz2.r,
6086                       &sumx0.i,&sumy0.i,&sumz0.i,
6087                       &sumx1.i,&sumy1.i,&sumz1.i,
6088                       &sumx2.i,&sumy2.i,&sumz2.i,
6089                       -1.0,
6090                       PFp, PFm,
6091                       -ene_p,-ene_m,
6092                       l2, &l,
6093                       Mj_AN2,kl2,n,
6094                       Mc_AN, 0,  m,
6095                       DS_NL1);
6096 	    }
6097 
6098 	    Hx[0][m][n].r += sumx0.r;     /* up-up */
6099 	    Hy[0][m][n].r += sumy0.r;     /* up-up */
6100 	    Hz[0][m][n].r += sumz0.r;     /* up-up */
6101 
6102 	    Hx[1][m][n].r += sumx1.r;     /* dn-dn */
6103 	    Hy[1][m][n].r += sumy1.r;     /* dn-dn */
6104 	    Hz[1][m][n].r += sumz1.r;     /* dn-dn */
6105 
6106 	    Hx[2][m][n].r += sumx2.r;     /* up-dn */
6107 	    Hy[2][m][n].r += sumy2.r;     /* up-dn */
6108 	    Hz[2][m][n].r += sumz2.r;     /* up-dn */
6109 
6110 	    Hx[0][m][n].i += sumx0.i;     /* up-up */
6111 	    Hy[0][m][n].i += sumy0.i;     /* up-up */
6112 	    Hz[0][m][n].i += sumz0.i;     /* up-up */
6113 
6114 	    Hx[1][m][n].i += sumx1.i;     /* dn-dn */
6115 	    Hy[1][m][n].i += sumy1.i;     /* dn-dn */
6116 	    Hz[1][m][n].i += sumz1.i;     /* dn-dn */
6117 
6118 	    Hx[2][m][n].i += sumx2.i;     /* up-dn */
6119 	    Hy[2][m][n].i += sumy2.i;     /* up-dn */
6120 	    Hz[2][m][n].i += sumz2.i;     /* up-dn */
6121 
6122 	  }
6123 	}
6124       }
6125 
6126     }
6127 
6128   } /* if (h_AN==0) */
6129 
6130 
6131   else if (where_flag==0){
6132 
6133     /****************************************************
6134        H*ep*dH
6135 
6136        if (h_AN!=0 && where_flag==0)
6137        This happens
6138        only if
6139        ( SpinP_switch==3
6140          &&
6141          (SO_switch==1 || (Hub_U_switch==1 && F_U_flag==1)
6142   	   || 1<=Constraint_NCS_switch || Zeeman_NCS_switch==1
6143            || Zeeman_NCO_switch==1)
6144          &&
6145          q_AN==0
6146        )
6147     ****************************************************/
6148 
6149     for (k=0; k<=FNAN[Gc_AN]; k++){
6150 
6151       kg = natn[Gc_AN][k];
6152       wakg = WhatSpecies[kg];
6153       kan = Spe_Total_VPS_Pro[wakg];
6154       kl = RMI1[Mc_AN][h_AN][k];
6155 
6156       if (Mi_AN<=Matomnum) Mi_AN2 = Mi_AN;
6157       else                 Mi_AN2 = Matomnum + 1;
6158 
6159       if (0<=kl && VPS_j_dependency[wakg]==1){
6160 
6161 	for (m=0; m<ian; m++){
6162 	  for (n=0; n<jan; n++){
6163 
6164 	    sumx0 = Complex(0.0,0.0);
6165 	    sumy0 = Complex(0.0,0.0);
6166 	    sumz0 = Complex(0.0,0.0);
6167 
6168 	    sumx1 = Complex(0.0,0.0);
6169 	    sumy1 = Complex(0.0,0.0);
6170 	    sumz1 = Complex(0.0,0.0);
6171 
6172 	    sumx2 = Complex(0.0,0.0);
6173 	    sumy2 = Complex(0.0,0.0);
6174 	    sumz2 = Complex(0.0,0.0);
6175 
6176 	    l = 0;
6177 	    for (l1=1; l1<=Spe_Num_RVPS[wakg]; l1++){
6178 
6179 	      ene_p = Spe_VNLE[0][wakg][l1-1];
6180 	      ene_m = Spe_VNLE[1][wakg][l1-1];
6181 
6182 	      if      (Spe_VPS_List[wakg][l1]==0) { l2=0; PFp=1.0;     PFm=0.0;     }
6183 	      else if (Spe_VPS_List[wakg][l1]==1) { l2=2; PFp=2.0/3.0; PFm=1.0/3.0; }
6184 	      else if (Spe_VPS_List[wakg][l1]==2) { l2=4; PFp=3.0/5.0; PFm=2.0/5.0; }
6185 	      else if (Spe_VPS_List[wakg][l1]==3) { l2=6; PFp=4.0/7.0; PFm=3.0/7.0; }
6186 
6187 	      dHNL_SO(&sumx0.r,&sumy0.r,&sumz0.r,
6188                       &sumx1.r,&sumy1.r,&sumz1.r,
6189                       &sumx2.r,&sumy2.r,&sumz2.r,
6190                       &sumx0.i,&sumy0.i,&sumz0.i,
6191                       &sumx1.i,&sumy1.i,&sumz1.i,
6192                       &sumx2.i,&sumy2.i,&sumz2.i,
6193                       -1.0,
6194                       PFp, PFm,
6195                       ene_p, ene_m,
6196                       l2, &l,
6197                       Mj_AN,  k,  n,
6198                       Mi_AN2, kl, m,
6199                       DS_NL1);
6200 	    }
6201 
6202 	    Hx[0][m][n].r += sumx0.r;     /* up-up */
6203 	    Hy[0][m][n].r += sumy0.r;     /* up-up */
6204 	    Hz[0][m][n].r += sumz0.r;     /* up-up */
6205 
6206 	    Hx[1][m][n].r += sumx1.r;     /* dn-dn */
6207 	    Hy[1][m][n].r += sumy1.r;     /* dn-dn */
6208 	    Hz[1][m][n].r += sumz1.r;     /* dn-dn */
6209 
6210 	    Hx[2][m][n].r += sumx2.r;     /* up-dn */
6211 	    Hy[2][m][n].r += sumy2.r;     /* up-dn */
6212 	    Hz[2][m][n].r += sumz2.r;     /* up-dn */
6213 
6214 	    Hx[0][m][n].i += sumx0.i;     /* up-up */
6215 	    Hy[0][m][n].i += sumy0.i;     /* up-up */
6216 	    Hz[0][m][n].i += sumz0.i;     /* up-up */
6217 
6218 	    Hx[1][m][n].i += sumx1.i;     /* dn-dn */
6219 	    Hy[1][m][n].i += sumy1.i;     /* dn-dn */
6220 	    Hz[1][m][n].i += sumz1.i;     /* dn-dn */
6221 
6222 	    Hx[2][m][n].i += sumx2.i;     /* up-dn */
6223 	    Hy[2][m][n].i += sumy2.i;     /* up-dn */
6224 	    Hz[2][m][n].i += sumz2.i;     /* up-dn */
6225 
6226 	  }
6227 	}
6228       }
6229 
6230     }
6231 
6232   }
6233 
6234   /* if (h_AN!=0 && where_flag==1) */
6235 
6236   else {
6237 
6238     /****************************************************
6239                            dH*ep*H
6240     ****************************************************/
6241 
6242     kg = natn[Gc_AN][0];
6243     wakg = WhatSpecies[kg];
6244     kan = Spe_Total_VPS_Pro[wakg];
6245     kl1 = RMI1[Mc_AN][0][h_AN];
6246     kl2 = RMI1[Mc_AN][0][q_AN];
6247 
6248     /****************************************************
6249                    l-dependent non-local part
6250     ****************************************************/
6251 
6252     if (VPS_j_dependency[wakg]==0){
6253 
6254       for (m=0; m<ian; m++){
6255 	for (n=0; n<jan; n++){
6256 
6257 	  sumx = 0.0;
6258 	  sumy = 0.0;
6259 	  sumz = 0.0;
6260 
6261 	  l = 0;
6262 	  for (l1=1; l1<=Spe_Num_RVPS[wakg]; l1++){
6263 
6264 	    ene = Spe_VNLE[0][wakg][l1-1];
6265 	    if      (Spe_VPS_List[wakg][l1]==0) l2 = 0;
6266 	    else if (Spe_VPS_List[wakg][l1]==1) l2 = 2;
6267 	    else if (Spe_VPS_List[wakg][l1]==2) l2 = 4;
6268 	    else if (Spe_VPS_List[wakg][l1]==3) l2 = 6;
6269 
6270 	    for (l3=0; l3<=l2; l3++){
6271 	      sumx -= ene*DS_NL1[0][1][Matomnum+1][kl1][m][l]*DS_NL1[0][0][Matomnum+1][kl2][n][l];
6272 	      sumy -= ene*DS_NL1[0][2][Matomnum+1][kl1][m][l]*DS_NL1[0][0][Matomnum+1][kl2][n][l];
6273 	      sumz -= ene*DS_NL1[0][3][Matomnum+1][kl1][m][l]*DS_NL1[0][0][Matomnum+1][kl2][n][l];
6274 	      l++;
6275 	    }
6276 	  }
6277 
6278 	  Hx[0][m][n].r = sumx;
6279 	  Hy[0][m][n].r = sumy;
6280 	  Hz[0][m][n].r = sumz;
6281 
6282 	  Hx[1][m][n].r = sumx;
6283 	  Hy[1][m][n].r = sumy;
6284 	  Hz[1][m][n].r = sumz;
6285 
6286 	  Hx[2][m][n].r = 0.0;
6287 	  Hy[2][m][n].r = 0.0;
6288 	  Hz[2][m][n].r = 0.0;
6289 
6290 	  Hx[0][m][n].i = 0.0;
6291 	  Hy[0][m][n].i = 0.0;
6292 	  Hz[0][m][n].i = 0.0;
6293 
6294 	  Hx[1][m][n].i = 0.0;
6295 	  Hy[1][m][n].i = 0.0;
6296 	  Hz[1][m][n].i = 0.0;
6297 
6298 	  Hx[2][m][n].i = 0.0;
6299 	  Hy[2][m][n].i = 0.0;
6300 	  Hz[2][m][n].i = 0.0;
6301 
6302 	}
6303       }
6304     }
6305 
6306     /****************************************************
6307                  j-dependent non-local part
6308     ****************************************************/
6309 
6310     else if ( VPS_j_dependency[wakg]==1 ){
6311 
6312       for (m=0; m<ian; m++){
6313 	for (n=0; n<jan; n++){
6314 
6315 	  sumx0 = Complex(0.0,0.0);
6316 	  sumy0 = Complex(0.0,0.0);
6317 	  sumz0 = Complex(0.0,0.0);
6318 
6319 	  sumx1 = Complex(0.0,0.0);
6320 	  sumy1 = Complex(0.0,0.0);
6321 	  sumz1 = Complex(0.0,0.0);
6322 
6323           sumx2 = Complex(0.0,0.0);
6324 	  sumy2 = Complex(0.0,0.0);
6325 	  sumz2 = Complex(0.0,0.0);
6326 
6327 	  l = 0;
6328 	  for (l1=1; l1<=Spe_Num_RVPS[wakg]; l1++){
6329 
6330 	    ene_p = Spe_VNLE[0][wakg][l1-1];
6331 	    ene_m = Spe_VNLE[1][wakg][l1-1];
6332 
6333 	    if      (Spe_VPS_List[wakg][l1]==0) { l2=0; PFp=1.0;     PFm=0.0;     }
6334 	    else if (Spe_VPS_List[wakg][l1]==1) { l2=2; PFp=2.0/3.0; PFm=1.0/3.0; }
6335 	    else if (Spe_VPS_List[wakg][l1]==2) { l2=4; PFp=3.0/5.0; PFm=2.0/5.0; }
6336 	    else if (Spe_VPS_List[wakg][l1]==3) { l2=6; PFp=4.0/7.0; PFm=3.0/7.0; }
6337 
6338    	    /* 2 */
6339 
6340 	    dHNL_SO(&sumx0.r,&sumy0.r,&sumz0.r,
6341 		    &sumx1.r,&sumy1.r,&sumz1.r,
6342                     &sumx2.r,&sumy2.r,&sumz2.r,
6343 		    &sumx0.i,&sumy0.i,&sumz0.i,
6344 		    &sumx1.i,&sumy1.i,&sumz1.i,
6345                     &sumx2.i,&sumy2.i,&sumz2.i,
6346                     1.0,
6347 		    PFp, PFm,
6348 		    -ene_p,-ene_m,
6349 		    l2, &l,
6350 		    Matomnum+1, kl1,m,
6351 		    Matomnum+1, kl2,n,
6352 		    DS_NL1);
6353 	  }
6354 
6355 	  Hx[0][m][n].r = sumx0.r;     /* up-up */
6356 	  Hy[0][m][n].r = sumy0.r;     /* up-up */
6357 	  Hz[0][m][n].r = sumz0.r;     /* up-up */
6358 
6359 	  Hx[1][m][n].r = sumx1.r;     /* dn-dn */
6360 	  Hy[1][m][n].r = sumy1.r;     /* dn-dn */
6361 	  Hz[1][m][n].r = sumz1.r;     /* dn-dn */
6362 
6363 	  Hx[2][m][n].r = sumx2.r;     /* up-dn */
6364 	  Hy[2][m][n].r = sumy2.r;     /* up-dn */
6365 	  Hz[2][m][n].r = sumz2.r;     /* up-dn */
6366 
6367 	  Hx[0][m][n].i = sumx0.i;     /* up-up */
6368 	  Hy[0][m][n].i = sumy0.i;     /* up-up */
6369 	  Hz[0][m][n].i = sumz0.i;     /* up-up */
6370 
6371 	  Hx[1][m][n].i = sumx1.i;     /* dn-dn */
6372 	  Hy[1][m][n].i = sumy1.i;     /* dn-dn */
6373 	  Hz[1][m][n].i = sumz1.i;     /* dn-dn */
6374 
6375 	  Hx[2][m][n].i = sumx2.i;     /* up-dn */
6376 	  Hy[2][m][n].i = sumy2.i;     /* up-dn */
6377 	  Hz[2][m][n].i = sumz2.i;     /* up-dn */
6378 
6379 	}
6380       }
6381     }
6382 
6383     /****************************************************
6384                            H*ep*dH
6385     ****************************************************/
6386 
6387     if (q_AN!=0) {
6388 
6389       kg = natn[Gc_AN][0];
6390       wakg = WhatSpecies[kg];
6391       kan = Spe_Total_VPS_Pro[wakg];
6392       kl1 = RMI1[Mc_AN][0][h_AN];
6393       kl2 = RMI1[Mc_AN][0][q_AN];
6394 
6395       /****************************************************
6396                      l-dependent non-local part
6397       ****************************************************/
6398 
6399       if (VPS_j_dependency[wakg]==0){
6400 
6401 	for (m=0; m<ian; m++){
6402 	  for (n=0; n<jan; n++){
6403 
6404 	    sumx = 0.0;
6405 	    sumy = 0.0;
6406 	    sumz = 0.0;
6407 
6408 	    l = 0;
6409 	    for (l1=1; l1<=Spe_Num_RVPS[wakg]; l1++){
6410 
6411 	      ene = Spe_VNLE[0][wakg][l1-1];
6412 	      if      (Spe_VPS_List[wakg][l1]==0) l2 = 0;
6413 	      else if (Spe_VPS_List[wakg][l1]==1) l2 = 2;
6414 	      else if (Spe_VPS_List[wakg][l1]==2) l2 = 4;
6415 	      else if (Spe_VPS_List[wakg][l1]==3) l2 = 6;
6416 
6417 	      for (l3=0; l3<=l2; l3++){
6418 		sumx -= ene*DS_NL1[0][0][Matomnum+1][kl1][m][l]*DS_NL1[0][1][Matomnum+1][kl2][n][l];
6419 		sumy -= ene*DS_NL1[0][0][Matomnum+1][kl1][m][l]*DS_NL1[0][2][Matomnum+1][kl2][n][l];
6420 		sumz -= ene*DS_NL1[0][0][Matomnum+1][kl1][m][l]*DS_NL1[0][3][Matomnum+1][kl2][n][l];
6421 		l++;
6422 	      }
6423 	    }
6424 
6425 	    Hx[0][m][n].r += sumx;
6426 	    Hy[0][m][n].r += sumy;
6427 	    Hz[0][m][n].r += sumz;
6428 
6429 	    Hx[1][m][n].r += sumx;
6430 	    Hy[1][m][n].r += sumy;
6431 	    Hz[1][m][n].r += sumz;
6432 	  }
6433 	}
6434       }
6435 
6436       /****************************************************
6437                     j-dependent non-local part
6438       ****************************************************/
6439 
6440       else if ( VPS_j_dependency[wakg]==1 ){
6441 
6442 	for (m=0; m<ian; m++){
6443 	  for (n=0; n<jan; n++){
6444 
6445 	    sumx0 = Complex(0.0,0.0);
6446 	    sumy0 = Complex(0.0,0.0);
6447 	    sumz0 = Complex(0.0,0.0);
6448 
6449 	    sumx1 = Complex(0.0,0.0);
6450 	    sumy1 = Complex(0.0,0.0);
6451 	    sumz1 = Complex(0.0,0.0);
6452 
6453 	    sumx2 = Complex(0.0,0.0);
6454 	    sumy2 = Complex(0.0,0.0);
6455 	    sumz2 = Complex(0.0,0.0);
6456 
6457 	    l = 0;
6458 	    for (l1=1; l1<=Spe_Num_RVPS[wakg]; l1++){
6459 
6460  	      ene_p = Spe_VNLE[0][wakg][l1-1];
6461 	      ene_m = Spe_VNLE[1][wakg][l1-1];
6462 
6463 	      if      (Spe_VPS_List[wakg][l1]==0) { l2=0; PFp=1.0;     PFm=0.0;     }
6464 	      else if (Spe_VPS_List[wakg][l1]==1) { l2=2; PFp=2.0/3.0; PFm=1.0/3.0; }
6465 	      else if (Spe_VPS_List[wakg][l1]==2) { l2=4; PFp=3.0/5.0; PFm=2.0/5.0; }
6466 	      else if (Spe_VPS_List[wakg][l1]==3) { l2=6; PFp=4.0/7.0; PFm=3.0/7.0; }
6467 
6468 	      /* 4 */
6469 
6470 	      dHNL_SO(&sumx0.r,&sumy0.r,&sumz0.r,
6471 		      &sumx1.r,&sumy1.r,&sumz1.r,
6472                       &sumx2.r,&sumy2.r,&sumz2.r,
6473 		      &sumx0.i,&sumy0.i,&sumz0.i,
6474 		      &sumx1.i,&sumy1.i,&sumz1.i,
6475                       &sumx2.i,&sumy2.i,&sumz2.i,
6476                       -1.0,
6477 		      PFp, PFm,
6478 		      -ene_p,-ene_m,
6479 		      l2, &l,
6480 		      Matomnum+1, kl2,n,
6481 		      Matomnum+1, kl1,m,
6482 		      DS_NL1);
6483 	    }
6484 
6485 	    Hx[0][m][n].r += sumx0.r;     /* up-up */
6486 	    Hy[0][m][n].r += sumy0.r;     /* up-up */
6487 	    Hz[0][m][n].r += sumz0.r;     /* up-up */
6488 
6489 	    Hx[1][m][n].r += sumx1.r;     /* dn-dn */
6490 	    Hy[1][m][n].r += sumy1.r;     /* dn-dn */
6491 	    Hz[1][m][n].r += sumz1.r;     /* dn-dn */
6492 
6493 	    Hx[2][m][n].r += sumx2.r;     /* up-dn */
6494 	    Hy[2][m][n].r += sumy2.r;     /* up-dn */
6495 	    Hz[2][m][n].r += sumz2.r;     /* up-dn */
6496 
6497 	    Hx[0][m][n].i += sumx0.i;     /* up-up */
6498 	    Hy[0][m][n].i += sumy0.i;     /* up-up */
6499 	    Hz[0][m][n].i += sumz0.i;     /* up-up */
6500 
6501 	    Hx[1][m][n].i += sumx1.i;     /* dn-dn */
6502 	    Hy[1][m][n].i += sumy1.i;     /* dn-dn */
6503 	    Hz[1][m][n].i += sumz1.i;     /* dn-dn */
6504 
6505 	    Hx[2][m][n].i += sumx2.i;     /* up-dn */
6506 	    Hy[2][m][n].i += sumy2.i;     /* up-dn */
6507 	    Hz[2][m][n].i += sumz2.i;     /* up-dn */
6508 
6509 	  }
6510 	}
6511       }
6512 
6513     }
6514 
6515   } /* else */
6516 
6517   /****************************************************
6518                contribution by dampingF
6519   ****************************************************/
6520 
6521   /* Qij * dH/dx  */
6522 
6523   for (so=0; so<3; so++){
6524     for (m=0; m<ian; m++){
6525       for (n=0; n<jan; n++){
6526 
6527         Hx[so][m][n].r = dmp*Hx[so][m][n].r;
6528         Hy[so][m][n].r = dmp*Hy[so][m][n].r;
6529         Hz[so][m][n].r = dmp*Hz[so][m][n].r;
6530 
6531         Hx[so][m][n].i = dmp*Hx[so][m][n].i;
6532         Hy[so][m][n].i = dmp*Hy[so][m][n].i;
6533         Hz[so][m][n].i = dmp*Hz[so][m][n].i;
6534       }
6535     }
6536   }
6537 
6538   /* dQij/dx * H */
6539 
6540   if ( (h_AN==0 && q_AN!=0) || (h_AN!=0 && q_AN==0) ){
6541 
6542     if      (h_AN==0)   kl = q_AN;
6543     else if (q_AN==0)   kl = h_AN;
6544 
6545     if      (SpinP_switch==0) somax = 0;
6546     else if (SpinP_switch==1) somax = 1;
6547     else if (SpinP_switch==3) somax = 2;
6548 
6549     r = Dis[Gc_AN][kl];
6550 
6551     if (rcut<=r) {
6552       deri_dmp = 0.0;
6553       tmp = 0.0;
6554     }
6555     else {
6556       deri_dmp = deri_dampingF(rcut,r);
6557       tmp = deri_dmp/dmp;
6558     }
6559 
6560     x0 = Gxyz[ig][1] + atv[Rni][1];
6561     y0 = Gxyz[ig][2] + atv[Rni][2];
6562     z0 = Gxyz[ig][3] + atv[Rni][3];
6563 
6564     x1 = Gxyz[jg][1] + atv[Rnj][1];
6565     y1 = Gxyz[jg][2] + atv[Rnj][2];
6566     z1 = Gxyz[jg][3] + atv[Rnj][3];
6567 
6568     /* for empty atoms or finite elemens basis */
6569     if (r<1.0e-10) r = 1.0e-10;
6570 
6571     if (h_AN==0 && q_AN!=0){
6572       dx = tmp*(x0-x1)/r;
6573       dy = tmp*(y0-y1)/r;
6574       dz = tmp*(z0-z1)/r;
6575     }
6576 
6577     else if (h_AN!=0 && q_AN==0){
6578       dx = tmp*(x1-x0)/r;
6579       dy = tmp*(y1-y0)/r;
6580       dz = tmp*(z1-z0)/r;
6581     }
6582 
6583     if (SpinP_switch==0 || SpinP_switch==1){
6584 
6585       if (h_AN==0){
6586         for (so=0; so<=somax; so++){
6587 	  for (m=0; m<ian; m++){
6588 	    for (n=0; n<jan; n++){
6589 	      Hx[so][m][n].r += HNL[so][Mc_AN][kl][m][n]*dx;
6590 	      Hy[so][m][n].r += HNL[so][Mc_AN][kl][m][n]*dy;
6591 	      Hz[so][m][n].r += HNL[so][Mc_AN][kl][m][n]*dz;
6592 	    }
6593 	  }
6594         }
6595       }
6596 
6597       else if (q_AN==0){
6598         for (so=0; so<=somax; so++){
6599 	  for (m=0; m<ian; m++){
6600 	    for (n=0; n<jan; n++){
6601 	      Hx[so][m][n].r += HNL[so][Mc_AN][kl][n][m]*dx;
6602 	      Hy[so][m][n].r += HNL[so][Mc_AN][kl][n][m]*dy;
6603 	      Hz[so][m][n].r += HNL[so][Mc_AN][kl][n][m]*dz;
6604 	    }
6605 	  }
6606         }
6607       }
6608     }
6609 
6610     else if (SpinP_switch==3){
6611 
6612       if (h_AN==0){
6613         for (so=0; so<=somax; so++){
6614 	  for (m=0; m<ian; m++){
6615 	    for (n=0; n<jan; n++){
6616 	      Hx[so][m][n].r +=  HNL[so][Mc_AN][kl][m][n]*dx;
6617 	      Hy[so][m][n].r +=  HNL[so][Mc_AN][kl][m][n]*dy;
6618 	      Hz[so][m][n].r +=  HNL[so][Mc_AN][kl][m][n]*dz;
6619 	    }
6620 	  }
6621         }
6622       }
6623 
6624       else if (q_AN==0){
6625         for (so=0; so<=somax; so++){
6626 	  for (m=0; m<ian; m++){
6627 	    for (n=0; n<jan; n++){
6628 	      Hx[so][m][n].r +=  HNL[so][Mc_AN][kl][n][m]*dx;
6629 	      Hy[so][m][n].r +=  HNL[so][Mc_AN][kl][n][m]*dy;
6630 	      Hz[so][m][n].r +=  HNL[so][Mc_AN][kl][n][m]*dz;
6631 	    }
6632 	  }
6633         }
6634       }
6635 
6636       if (SO_switch==1){
6637 
6638         if (h_AN==0){
6639 	  for (so=0; so<=somax; so++){
6640 	    for (m=0; m<ian; m++){
6641 	      for (n=0; n<jan; n++){
6642 		Hx[so][m][n].i += iHNL[so][Mc_AN][kl][m][n]*dx;
6643 		Hy[so][m][n].i += iHNL[so][Mc_AN][kl][m][n]*dy;
6644 		Hz[so][m][n].i += iHNL[so][Mc_AN][kl][m][n]*dz;
6645 	      }
6646 	    }
6647 	  }
6648 	}
6649 
6650         else if (q_AN==0){
6651 	  for (so=0; so<=somax; so++){
6652 	    for (m=0; m<ian; m++){
6653 	      for (n=0; n<jan; n++){
6654 		Hx[so][m][n].i += iHNL[so][Mc_AN][kl][n][m]*dx;
6655 		Hy[so][m][n].i += iHNL[so][Mc_AN][kl][n][m]*dy;
6656 		Hz[so][m][n].i += iHNL[so][Mc_AN][kl][n][m]*dz;
6657 	      }
6658 	    }
6659 	  }
6660 	}
6661 
6662       }
6663     }
6664   }
6665 
6666 }
6667 
6668 
6669 
6670 
6671 
dHVNA(int where_flag,int Mc_AN,int h_AN,int q_AN,Type_DS_VNA ***** DS_VNA1,double ***** TmpHVNA2,double ***** TmpHVNA3,double ** Hx,double ** Hy,double ** Hz)6672 void dHVNA(int where_flag, int Mc_AN, int h_AN, int q_AN,
6673            Type_DS_VNA *****DS_VNA1,
6674            double *****TmpHVNA2, double *****TmpHVNA3,
6675            double **Hx, double **Hy, double **Hz)
6676 {
6677   int i,j,k,m,n,l,kg,kan,so,deri_kind;
6678   int ig,ian,jg,jan,kl,kl1,kl2,Rni,Rnj;
6679   int wakg,l1,l2,l3,Gc_AN,Mi_AN,Mj_AN,Mj_AN2,num_projectors;
6680   double sumx,sumy,sumz,ene,rcuti,rcutj,rcut;
6681   double tmpx,tmpy,tmpz,dmp,deri_dmp,tmp;
6682   double dx,dy,dz,x0,y0,z0,x1,y1,z1,r;
6683   double PFp,PFm,ene_p,ene_m;
6684   double sumx0,sumy0,sumz0;
6685   double sumx1,sumy1,sumz1;
6686   double sumx2,sumy2,sumz2;
6687   int L,LL,Mul1,Num_RVNA;
6688 
6689   Num_RVNA = List_YOUSO[34]*(List_YOUSO[35] + 1);
6690   num_projectors = (List_YOUSO[35]+1)*(List_YOUSO[35]+1)*List_YOUSO[34];
6691 
6692   /****************************************************
6693    start calc.
6694   ****************************************************/
6695 
6696   Gc_AN = M2G[Mc_AN];
6697   ig = natn[Gc_AN][h_AN];
6698   Rni = ncn[Gc_AN][h_AN];
6699   Mi_AN = F_G2M[ig];
6700   ian = Spe_Total_CNO[WhatSpecies[ig]];
6701   rcuti = Spe_Atom_Cut1[WhatSpecies[ig]];
6702 
6703   jg = natn[Gc_AN][q_AN];
6704   Rnj = ncn[Gc_AN][q_AN];
6705   Mj_AN = F_G2M[jg];
6706   jan = Spe_Total_CNO[WhatSpecies[jg]];
6707   rcutj = Spe_Atom_Cut1[WhatSpecies[jg]];
6708 
6709   rcut = rcuti + rcutj;
6710   kl = RMI1[Mc_AN][h_AN][q_AN];
6711   dmp = dampingF(rcut,Dis[ig][kl]);
6712 
6713   for (m=0; m<ian; m++){
6714     for (n=0; n<jan; n++){
6715       Hx[m][n] = 0.0;
6716       Hy[m][n] = 0.0;
6717       Hz[m][n] = 0.0;
6718     }
6719   }
6720 
6721   /****************************************************
6722     two-center integral with orbitals on one-center
6723 
6724     in case of h_AN==0 && q_AN==0
6725   ****************************************************/
6726 
6727   if (h_AN==0 && q_AN==0 && where_flag==0){
6728 
6729     for (k=1; k<=FNAN[Gc_AN]; k++){
6730       for (m=0; m<ian; m++){
6731         for (n=0; n<jan; n++){
6732 	  Hx[m][n] += TmpHVNA2[1][Mc_AN][k][m][n];
6733 	  Hy[m][n] += TmpHVNA2[2][Mc_AN][k][m][n];
6734 	  Hz[m][n] += TmpHVNA2[3][Mc_AN][k][m][n];
6735         }
6736       }
6737     }
6738   }
6739 
6740   /****************************************************
6741     two-center integral with orbitals on one-center
6742 
6743     in case of h_AN==q_AN && h_AN!=0
6744   ****************************************************/
6745 
6746   else if (h_AN==q_AN && h_AN!=0){
6747 
6748     kl = RMI1[Mc_AN][h_AN][0];
6749 
6750     for (m=0; m<ian; m++){
6751       for (n=0; n<jan; n++){
6752 
6753 	Hx[m][n] = -TmpHVNA3[1][Mc_AN][h_AN][m][n];
6754 	Hy[m][n] = -TmpHVNA3[2][Mc_AN][h_AN][m][n];
6755 	Hz[m][n] = -TmpHVNA3[3][Mc_AN][h_AN][m][n];
6756       }
6757     }
6758   }
6759 
6760   /****************************************************
6761              two and three center integrals
6762              with orbitals on two-center
6763   ****************************************************/
6764 
6765   else{
6766 
6767     if (h_AN==0){
6768 
6769       /****************************************************
6770 			   dH*ep*H
6771       ****************************************************/
6772 
6773       for (k=0; k<=FNAN[Gc_AN]; k++){
6774 
6775 	kg = natn[Gc_AN][k];
6776 	wakg = WhatSpecies[kg];
6777 	kl = RMI1[Mc_AN][q_AN][k];
6778 
6779 	/****************************************************
6780 			     non-local part
6781 	****************************************************/
6782 
6783 	if (0<=kl && where_flag==0){
6784 
6785           if (Mj_AN<=Matomnum) Mj_AN2 = Mj_AN;
6786           else                 Mj_AN2 = Matomnum+1;
6787 
6788 	  for (m=0; m<ian; m++){
6789 	    for (n=0; n<jan; n++){
6790 
6791 	      sumx = 0.0;
6792 	      sumy = 0.0;
6793 	      sumz = 0.0;
6794 
6795 	      for (l=0; l<num_projectors; l++){
6796 		sumx += DS_VNA1[1][Mc_AN][k][m][l]*DS_VNA1[0][Mj_AN2][kl][n][l];
6797 		sumy += DS_VNA1[2][Mc_AN][k][m][l]*DS_VNA1[0][Mj_AN2][kl][n][l];
6798 		sumz += DS_VNA1[3][Mc_AN][k][m][l]*DS_VNA1[0][Mj_AN2][kl][n][l];
6799 	      }
6800 
6801 	      Hx[m][n] += sumx;
6802 	      Hy[m][n] += sumy;
6803 	      Hz[m][n] += sumz;
6804 
6805 	    } /* n */
6806 	  } /* m */
6807 
6808 	} /* if */
6809 
6810       } /* k */
6811 
6812       /****************************************************
6813  		  	     H*ep*dH
6814       ****************************************************/
6815 
6816       /* non-local part */
6817 
6818       if (q_AN==0){
6819 
6820 	for (m=0; m<ian; m++){
6821 	  for (n=m; n<jan; n++){
6822 
6823             tmpx = Hx[m][n] + Hx[n][m];
6824 	    Hx[m][n] = tmpx;
6825 	    Hx[n][m] = tmpx;
6826 
6827             tmpy = Hy[m][n] + Hy[n][m];
6828 	    Hy[m][n] = tmpy;
6829 	    Hy[n][m] = tmpy;
6830 
6831             tmpz = Hz[m][n] + Hz[n][m];
6832 	    Hz[m][n] = tmpz;
6833 	    Hz[n][m] = tmpz;
6834 	  }
6835 	}
6836 
6837       }
6838 
6839       else if (where_flag==1) {
6840 
6841 	kg = natn[Gc_AN][0];
6842 	wakg = WhatSpecies[kg];
6843 
6844 	/****************************************************
6845 			     non-local part
6846 	****************************************************/
6847 
6848 	for (m=0; m<ian; m++){
6849 	  for (n=0; n<jan; n++){
6850 
6851 	    sumx = 0.0;
6852 	    sumy = 0.0;
6853 	    sumz = 0.0;
6854 
6855             if (Mj_AN<=Matomnum){
6856               Mj_AN2 = Mj_AN;
6857  	      kl = RMI1[Mc_AN][q_AN][0];
6858 	    }
6859             else{
6860               Mj_AN2 = Matomnum+1;
6861 	      kl = RMI1[Mc_AN][0][q_AN];
6862             }
6863 
6864 	    for (l=0; l<num_projectors; l++){
6865 	      sumx -= DS_VNA1[0][Mc_AN][0][m][l]*DS_VNA1[1][Mj_AN2][kl][n][l];
6866 	      sumy -= DS_VNA1[0][Mc_AN][0][m][l]*DS_VNA1[2][Mj_AN2][kl][n][l];
6867 	      sumz -= DS_VNA1[0][Mc_AN][0][m][l]*DS_VNA1[3][Mj_AN2][kl][n][l];
6868 	    }
6869 
6870 	    Hx[m][n] += sumx;
6871 	    Hy[m][n] += sumy;
6872 	    Hz[m][n] += sumz;
6873 
6874 	  }
6875 	}
6876 
6877       }
6878 
6879     } /* if (h_AN==0) */
6880 
6881     else {
6882 
6883       /****************************************************
6884 			   dH*ep*H
6885       ****************************************************/
6886 
6887       kg = natn[Gc_AN][0];
6888       wakg = WhatSpecies[kg];
6889       kl1 = RMI1[Mc_AN][0][h_AN];
6890       kl2 = RMI1[Mc_AN][0][q_AN];
6891 
6892       /****************************************************
6893  		         non-local part
6894       ****************************************************/
6895 
6896       for (m=0; m<ian; m++){
6897 	for (n=0; n<jan; n++){
6898 
6899 	  sumx = 0.0;
6900 	  sumy = 0.0;
6901 	  sumz = 0.0;
6902 
6903           for (l=0; l<num_projectors; l++){
6904 	    sumx -= DS_VNA1[1][Matomnum+1][kl1][m][l]*DS_VNA1[0][Matomnum+1][kl2][n][l];
6905 	    sumy -= DS_VNA1[2][Matomnum+1][kl1][m][l]*DS_VNA1[0][Matomnum+1][kl2][n][l];
6906 	    sumz -= DS_VNA1[3][Matomnum+1][kl1][m][l]*DS_VNA1[0][Matomnum+1][kl2][n][l];
6907 	  }
6908 
6909 	  Hx[m][n] = sumx;
6910 	  Hy[m][n] = sumy;
6911 	  Hz[m][n] = sumz;
6912 	}
6913       }
6914 
6915       /****************************************************
6916 			   H*ep*dH
6917       ****************************************************/
6918 
6919       if (q_AN!=0){
6920 
6921 	kg = natn[Gc_AN][0];
6922 	wakg = WhatSpecies[kg];
6923 	kl1 = RMI1[Mc_AN][0][h_AN];
6924 	kl2 = RMI1[Mc_AN][0][q_AN];
6925 
6926 	/****************************************************
6927 			    non-local part
6928 	****************************************************/
6929 
6930 	for (m=0; m<ian; m++){
6931 	  for (n=0; n<jan; n++){
6932 
6933 	    sumx = 0.0;
6934 	    sumy = 0.0;
6935 	    sumz = 0.0;
6936 
6937 	    for (l=0; l<num_projectors; l++){
6938 	      sumx -= DS_VNA1[0][Matomnum+1][kl1][m][l]*DS_VNA1[1][Matomnum+1][kl2][n][l];
6939 	      sumy -= DS_VNA1[0][Matomnum+1][kl1][m][l]*DS_VNA1[2][Matomnum+1][kl2][n][l];
6940 	      sumz -= DS_VNA1[0][Matomnum+1][kl1][m][l]*DS_VNA1[3][Matomnum+1][kl2][n][l];
6941 	    }
6942 
6943 	    Hx[m][n] += sumx;
6944 	    Hy[m][n] += sumy;
6945 	    Hz[m][n] += sumz;
6946 
6947 	  }
6948 	}
6949       }
6950 
6951     }
6952   }
6953 
6954   /****************************************************
6955                 contribution by dampingF
6956   ****************************************************/
6957 
6958   /* Qij * dH/dx  */
6959 
6960   for (m=0; m<ian; m++){
6961     for (n=0; n<jan; n++){
6962       Hx[m][n] = dmp*Hx[m][n];
6963       Hy[m][n] = dmp*Hy[m][n];
6964       Hz[m][n] = dmp*Hz[m][n];
6965     }
6966   }
6967 
6968   /* dQij/dx * H */
6969 
6970   if ( (h_AN==0 && q_AN!=0) || (h_AN!=0 && q_AN==0) ){
6971 
6972     if      (h_AN==0)   kl = q_AN;
6973     else if (q_AN==0)   kl = h_AN;
6974 
6975     r = Dis[Gc_AN][kl];
6976 
6977     if (rcut<=r) {
6978       deri_dmp = 0.0;
6979       tmp = 0.0;
6980     }
6981     else {
6982       deri_dmp = deri_dampingF(rcut,r);
6983       tmp = deri_dmp/dmp;
6984     }
6985 
6986     x0 = Gxyz[ig][1] + atv[Rni][1];
6987     x1 = Gxyz[jg][1] + atv[Rnj][1];
6988 
6989     y0 = Gxyz[ig][2] + atv[Rni][2];
6990     y1 = Gxyz[jg][2] + atv[Rnj][2];
6991 
6992     z0 = Gxyz[ig][3] + atv[Rni][3];
6993     z1 = Gxyz[jg][3] + atv[Rnj][3];
6994 
6995     /* for empty atoms or finite elemens basis */
6996     if (r<1.0e-10) r = 1.0e-10;
6997 
6998     if ( h_AN==0 ){
6999       dx = tmp*(x0-x1)/r;
7000       dy = tmp*(y0-y1)/r;
7001       dz = tmp*(z0-z1)/r;
7002     }
7003 
7004     else if ( q_AN==0 ){
7005       dx = tmp*(x1-x0)/r;
7006       dy = tmp*(y1-y0)/r;
7007       dz = tmp*(z1-z0)/r;
7008     }
7009 
7010     if (h_AN==0){
7011       for (m=0; m<ian; m++){
7012         for (n=0; n<jan; n++){
7013 	  Hx[m][n] += HVNA[Mc_AN][kl][m][n]*dx;
7014 	  Hy[m][n] += HVNA[Mc_AN][kl][m][n]*dy;
7015 	  Hz[m][n] += HVNA[Mc_AN][kl][m][n]*dz;
7016         }
7017       }
7018     }
7019 
7020     else if (q_AN==0){
7021       for (m=0; m<ian; m++){
7022         for (n=0; n<jan; n++){
7023 	  Hx[m][n] += HVNA[Mc_AN][kl][n][m]*dx;
7024 	  Hy[m][n] += HVNA[Mc_AN][kl][n][m]*dy;
7025 	  Hz[m][n] += HVNA[Mc_AN][kl][n][m]*dz;
7026         }
7027       }
7028     }
7029   }
7030 
7031 }
7032 
7033 
7034 
7035 
7036 
7037 
7038 
7039 
7040 
7041 
7042 
dHNL_SO(double * sumx0r,double * sumy0r,double * sumz0r,double * sumx1r,double * sumy1r,double * sumz1r,double * sumx2r,double * sumy2r,double * sumz2r,double * sumx0i,double * sumy0i,double * sumz0i,double * sumx1i,double * sumy1i,double * sumz1i,double * sumx2i,double * sumy2i,double * sumz2i,double fugou,double PFp,double PFm,double ene_p,double ene_m,int l2,int * l,int Mc_AN,int k,int m,int Mj_AN,int kl,int n,double ****** DS_NL1)7043 void dHNL_SO(
7044 	     double *sumx0r,
7045 	     double *sumy0r,
7046 	     double *sumz0r,
7047 	     double *sumx1r,
7048 	     double *sumy1r,
7049 	     double *sumz1r,
7050 	     double *sumx2r,
7051 	     double *sumy2r,
7052 	     double *sumz2r,
7053 	     double *sumx0i,
7054 	     double *sumy0i,
7055 	     double *sumz0i,
7056 	     double *sumx1i,
7057 	     double *sumy1i,
7058 	     double *sumz1i,
7059 	     double *sumx2i,
7060 	     double *sumy2i,
7061 	     double *sumz2i,
7062              double fugou,
7063 	     double PFp,
7064 	     double PFm,
7065 	     double ene_p,
7066 	     double ene_m,
7067 	     int l2, int *l,
7068 	     int Mc_AN, int k,  int m,
7069 	     int Mj_AN, int kl, int n,
7070 	     double ******DS_NL1)
7071 {
7072 
7073   int l3,i;
7074   double tmpx,tmpy,tmpz;
7075   double tmp0,tmp1,tmp2;
7076   double tmp3,tmp4,tmp5,tmp6;
7077   double deri[4];
7078 
7079   /****************************************************
7080     off-diagonal contribution to up-dn matrix
7081     for spin non-collinear
7082   ****************************************************/
7083 
7084   if (SpinP_switch==3){
7085 
7086     /* p */
7087     if (l2==2){
7088 
7089       /* real contribution of l+1/2 to off diagonal up-down matrix */
7090       tmpx =
7091         fugou*
7092         ( ene_p/3.0*DS_NL1[0][1][Mc_AN][k][m][*l  ]*DS_NL1[0][0][Mj_AN][kl][n][*l+2]
7093          -ene_p/3.0*DS_NL1[0][1][Mc_AN][k][m][*l+2]*DS_NL1[0][0][Mj_AN][kl][n][*l  ] );
7094 
7095       tmpy =
7096         fugou*
7097         ( ene_p/3.0*DS_NL1[0][2][Mc_AN][k][m][*l  ]*DS_NL1[0][0][Mj_AN][kl][n][*l+2]
7098          -ene_p/3.0*DS_NL1[0][2][Mc_AN][k][m][*l+2]*DS_NL1[0][0][Mj_AN][kl][n][*l  ] );
7099 
7100       tmpz =
7101         fugou*
7102         ( ene_p/3.0*DS_NL1[0][3][Mc_AN][k][m][*l  ]*DS_NL1[0][0][Mj_AN][kl][n][*l+2]
7103          -ene_p/3.0*DS_NL1[0][3][Mc_AN][k][m][*l+2]*DS_NL1[0][0][Mj_AN][kl][n][*l  ] );
7104 
7105       *sumx2r += tmpx;
7106       *sumy2r += tmpy;
7107       *sumz2r += tmpz;
7108 
7109       /* imaginary contribution of l+1/2 to off diagonal up-down matrix */
7110 
7111       tmpx =
7112         fugou*
7113         ( -ene_p/3.0*DS_NL1[0][1][Mc_AN][k][m][*l+1]*DS_NL1[0][0][Mj_AN][kl][n][*l+2]
7114           +ene_p/3.0*DS_NL1[0][1][Mc_AN][k][m][*l+2]*DS_NL1[0][0][Mj_AN][kl][n][*l+1] );
7115 
7116       tmpy =
7117         fugou*
7118         ( -ene_p/3.0*DS_NL1[0][2][Mc_AN][k][m][*l+1]*DS_NL1[0][0][Mj_AN][kl][n][*l+2]
7119           +ene_p/3.0*DS_NL1[0][2][Mc_AN][k][m][*l+2]*DS_NL1[0][0][Mj_AN][kl][n][*l+1] );
7120 
7121       tmpz =
7122         fugou*
7123         ( -ene_p/3.0*DS_NL1[0][3][Mc_AN][k][m][*l+1]*DS_NL1[0][0][Mj_AN][kl][n][*l+2]
7124           +ene_p/3.0*DS_NL1[0][3][Mc_AN][k][m][*l+2]*DS_NL1[0][0][Mj_AN][kl][n][*l+1] );
7125 
7126       *sumx2i += tmpx;
7127       *sumy2i += tmpy;
7128       *sumz2i += tmpz;
7129 
7130        /* real contribution of l-1/2 for to diagonal up-down matrix */
7131 
7132       tmpx =
7133         fugou*
7134         ( ene_m/3.0*DS_NL1[1][1][Mc_AN][k][m][*l  ]*DS_NL1[1][0][Mj_AN][kl][n][*l+2]
7135          -ene_m/3.0*DS_NL1[1][1][Mc_AN][k][m][*l+2]*DS_NL1[1][0][Mj_AN][kl][n][*l  ] );
7136 
7137       tmpy =
7138         fugou*
7139         ( ene_m/3.0*DS_NL1[1][2][Mc_AN][k][m][*l  ]*DS_NL1[1][0][Mj_AN][kl][n][*l+2]
7140          -ene_m/3.0*DS_NL1[1][2][Mc_AN][k][m][*l+2]*DS_NL1[1][0][Mj_AN][kl][n][*l  ] );
7141 
7142       tmpz =
7143         fugou*
7144         ( ene_m/3.0*DS_NL1[1][3][Mc_AN][k][m][*l  ]*DS_NL1[1][0][Mj_AN][kl][n][*l+2]
7145          -ene_m/3.0*DS_NL1[1][3][Mc_AN][k][m][*l+2]*DS_NL1[1][0][Mj_AN][kl][n][*l  ] );
7146 
7147       *sumx2r -= tmpx;
7148       *sumy2r -= tmpy;
7149       *sumz2r -= tmpz;
7150 
7151        /* imaginary contribution of l-1/2 to off diagonal up-down matrix */
7152 
7153       tmpx =
7154         fugou*
7155         ( -ene_m/3.0*DS_NL1[1][1][Mc_AN][k][m][*l+1]*DS_NL1[1][0][Mj_AN][kl][n][*l+2]
7156           +ene_m/3.0*DS_NL1[1][1][Mc_AN][k][m][*l+2]*DS_NL1[1][0][Mj_AN][kl][n][*l+1] );
7157 
7158       tmpy =
7159         fugou*
7160         ( -ene_m/3.0*DS_NL1[1][2][Mc_AN][k][m][*l+1]*DS_NL1[1][0][Mj_AN][kl][n][*l+2]
7161           +ene_m/3.0*DS_NL1[1][2][Mc_AN][k][m][*l+2]*DS_NL1[1][0][Mj_AN][kl][n][*l+1] );
7162 
7163       tmpz =
7164         fugou*
7165         ( -ene_m/3.0*DS_NL1[1][3][Mc_AN][k][m][*l+1]*DS_NL1[1][0][Mj_AN][kl][n][*l+2]
7166           +ene_m/3.0*DS_NL1[1][3][Mc_AN][k][m][*l+2]*DS_NL1[1][0][Mj_AN][kl][n][*l+1] );
7167 
7168       *sumx2i -= tmpx;
7169       *sumy2i -= tmpy;
7170       *sumz2i -= tmpz;
7171 
7172     }
7173 
7174     /* d */
7175     if (l2==4){
7176 
7177        tmp0 = sqrt(3.0);
7178        tmp1 = ene_p/5.0;
7179        tmp2 = tmp0*tmp1;
7180 
7181        /* real contribution of l+1/2 to off diagonal up-down matrix */
7182 
7183        for (i=1; i<=3; i++){
7184          deri[i] =
7185             fugou*
7186             ( -tmp2*DS_NL1[0][i][Mc_AN][k][m][*l  ]*DS_NL1[0][0][Mj_AN][kl][n][*l+3]
7187               +tmp2*DS_NL1[0][i][Mc_AN][k][m][*l+3]*DS_NL1[0][0][Mj_AN][kl][n][*l  ]
7188               +tmp1*DS_NL1[0][i][Mc_AN][k][m][*l+1]*DS_NL1[0][0][Mj_AN][kl][n][*l+3]
7189               -tmp1*DS_NL1[0][i][Mc_AN][k][m][*l+3]*DS_NL1[0][0][Mj_AN][kl][n][*l+1]
7190               +tmp1*DS_NL1[0][i][Mc_AN][k][m][*l+2]*DS_NL1[0][0][Mj_AN][kl][n][*l+4]
7191               -tmp1*DS_NL1[0][i][Mc_AN][k][m][*l+4]*DS_NL1[0][0][Mj_AN][kl][n][*l+2] );
7192        }
7193        *sumx2r += deri[1];
7194        *sumy2r += deri[2];
7195        *sumz2r += deri[3];
7196 
7197        /* imaginary contribution of l+1/2 to off diagonal up-down matrix */
7198 
7199        for (i=1; i<=3; i++){
7200          deri[i] =
7201             fugou*
7202             ( +tmp2*DS_NL1[0][i][Mc_AN][k][m][*l  ]*DS_NL1[0][0][Mj_AN][kl][n][*l+4]
7203               -tmp2*DS_NL1[0][i][Mc_AN][k][m][*l+4]*DS_NL1[0][0][Mj_AN][kl][n][*l  ]
7204               +tmp1*DS_NL1[0][i][Mc_AN][k][m][*l+1]*DS_NL1[0][0][Mj_AN][kl][n][*l+4]
7205               -tmp1*DS_NL1[0][i][Mc_AN][k][m][*l+4]*DS_NL1[0][0][Mj_AN][kl][n][*l+1]
7206               -tmp1*DS_NL1[0][i][Mc_AN][k][m][*l+2]*DS_NL1[0][0][Mj_AN][kl][n][*l+3]
7207               +tmp1*DS_NL1[0][i][Mc_AN][k][m][*l+3]*DS_NL1[0][0][Mj_AN][kl][n][*l+2] );
7208        }
7209        *sumx2i += deri[1];
7210        *sumy2i += deri[2];
7211        *sumz2i += deri[3];
7212 
7213        /* real contribution of l-1/2 for to diagonal up-down matrix */
7214 
7215        tmp1 = ene_m/5.0;
7216        tmp2 = tmp0*tmp1;
7217 
7218        for (i=1; i<=3; i++){
7219          deri[i] =
7220             fugou*
7221             ( -tmp2*DS_NL1[1][i][Mc_AN][k][m][*l  ]*DS_NL1[1][0][Mj_AN][kl][n][*l+3]
7222               +tmp2*DS_NL1[1][i][Mc_AN][k][m][*l+3]*DS_NL1[1][0][Mj_AN][kl][n][*l  ]
7223               +tmp1*DS_NL1[1][i][Mc_AN][k][m][*l+1]*DS_NL1[1][0][Mj_AN][kl][n][*l+3]
7224               -tmp1*DS_NL1[1][i][Mc_AN][k][m][*l+3]*DS_NL1[1][0][Mj_AN][kl][n][*l+1]
7225               +tmp1*DS_NL1[1][i][Mc_AN][k][m][*l+2]*DS_NL1[1][0][Mj_AN][kl][n][*l+4]
7226               -tmp1*DS_NL1[1][i][Mc_AN][k][m][*l+4]*DS_NL1[1][0][Mj_AN][kl][n][*l+2] );
7227        }
7228        *sumx2r -= deri[1];
7229        *sumy2r -= deri[2];
7230        *sumz2r -= deri[3];
7231 
7232        /* imaginary contribution of l-1/2 to off diagonal up-down matrix */
7233 
7234        for (i=1; i<=3; i++){
7235          deri[i] =
7236             fugou*
7237 	    ( +tmp2*DS_NL1[1][i][Mc_AN][k][m][*l  ]*DS_NL1[1][0][Mj_AN][kl][n][*l+4]
7238               -tmp2*DS_NL1[1][i][Mc_AN][k][m][*l+4]*DS_NL1[1][0][Mj_AN][kl][n][*l  ]
7239               +tmp1*DS_NL1[1][i][Mc_AN][k][m][*l+1]*DS_NL1[1][0][Mj_AN][kl][n][*l+4]
7240               -tmp1*DS_NL1[1][i][Mc_AN][k][m][*l+4]*DS_NL1[1][0][Mj_AN][kl][n][*l+1]
7241               -tmp1*DS_NL1[1][i][Mc_AN][k][m][*l+2]*DS_NL1[1][0][Mj_AN][kl][n][*l+3]
7242 	      +tmp1*DS_NL1[1][i][Mc_AN][k][m][*l+3]*DS_NL1[1][0][Mj_AN][kl][n][*l+2] );
7243        }
7244        *sumx2i -= deri[1];
7245        *sumy2i -= deri[2];
7246        *sumz2i -= deri[3];
7247     }
7248 
7249     /* f */
7250     if (l2==6){
7251 
7252       /* real contribution of l+1/2 to off diagonal up-down matrix */
7253 
7254       tmp0 = sqrt(6.0);
7255       tmp1 = sqrt(3.0/2.0);
7256       tmp2 = sqrt(5.0/2.0);
7257 
7258       tmp3 = ene_p/7.0;
7259       tmp4 = tmp1*tmp3; /* sqrt(3.0/2.0) */
7260       tmp5 = tmp2*tmp3; /* sqrt(5.0/2.0) */
7261       tmp6 = tmp0*tmp3; /* sqrt(6.0)     */
7262 
7263        for (i=1; i<=3; i++){
7264          deri[i] =
7265 	   fugou*
7266              ( -tmp6*DS_NL1[0][i][Mc_AN][k][m][*l  ]*DS_NL1[0][0][Mj_AN][kl][n][*l+1]
7267                +tmp6*DS_NL1[0][i][Mc_AN][k][m][*l+1]*DS_NL1[0][0][Mj_AN][kl][n][*l  ]
7268                -tmp5*DS_NL1[0][i][Mc_AN][k][m][*l+1]*DS_NL1[0][0][Mj_AN][kl][n][*l+3]
7269                +tmp5*DS_NL1[0][i][Mc_AN][k][m][*l+3]*DS_NL1[0][0][Mj_AN][kl][n][*l+1]
7270                -tmp5*DS_NL1[0][i][Mc_AN][k][m][*l+2]*DS_NL1[0][0][Mj_AN][kl][n][*l+4]
7271 	       +tmp5*DS_NL1[0][i][Mc_AN][k][m][*l+4]*DS_NL1[0][0][Mj_AN][kl][n][*l+2]
7272                -tmp4*DS_NL1[0][i][Mc_AN][k][m][*l+3]*DS_NL1[0][0][Mj_AN][kl][n][*l+5]
7273 	       +tmp4*DS_NL1[0][i][Mc_AN][k][m][*l+5]*DS_NL1[0][0][Mj_AN][kl][n][*l+3]
7274                -tmp4*DS_NL1[0][i][Mc_AN][k][m][*l+4]*DS_NL1[0][0][Mj_AN][kl][n][*l+6]
7275                +tmp4*DS_NL1[0][i][Mc_AN][k][m][*l+6]*DS_NL1[0][0][Mj_AN][kl][n][*l+4] );
7276        }
7277        *sumx2r += deri[1];
7278        *sumy2r += deri[2];
7279        *sumz2r += deri[3];
7280 
7281        /* imaginary contribution of l+1/2 to off diagonal up-down matrix */
7282 
7283        for (i=1; i<=3; i++){
7284          deri[i] =
7285 	   fugou*
7286 	     ( +tmp6*DS_NL1[0][i][Mc_AN][k][m][*l  ]*DS_NL1[0][0][Mj_AN][kl][n][*l+2]
7287                -tmp6*DS_NL1[0][i][Mc_AN][k][m][*l+2]*DS_NL1[0][0][Mj_AN][kl][n][*l  ]
7288                +tmp5*DS_NL1[0][i][Mc_AN][k][m][*l+1]*DS_NL1[0][0][Mj_AN][kl][n][*l+4]
7289                -tmp5*DS_NL1[0][i][Mc_AN][k][m][*l+4]*DS_NL1[0][0][Mj_AN][kl][n][*l+1]
7290                -tmp5*DS_NL1[0][i][Mc_AN][k][m][*l+2]*DS_NL1[0][0][Mj_AN][kl][n][*l+3]
7291 	       +tmp5*DS_NL1[0][i][Mc_AN][k][m][*l+3]*DS_NL1[0][0][Mj_AN][kl][n][*l+2]
7292                +tmp4*DS_NL1[0][i][Mc_AN][k][m][*l+3]*DS_NL1[0][0][Mj_AN][kl][n][*l+6]
7293 	       -tmp4*DS_NL1[0][i][Mc_AN][k][m][*l+6]*DS_NL1[0][0][Mj_AN][kl][n][*l+3]
7294                -tmp4*DS_NL1[0][i][Mc_AN][k][m][*l+4]*DS_NL1[0][0][Mj_AN][kl][n][*l+5]
7295                +tmp4*DS_NL1[0][i][Mc_AN][k][m][*l+5]*DS_NL1[0][0][Mj_AN][kl][n][*l+4] );
7296        }
7297        *sumx2i += deri[1];
7298        *sumy2i += deri[2];
7299        *sumz2i += deri[3];
7300 
7301        /* real contribution of l-1/2 for to diagonal up-down matrix */
7302 
7303        tmp3 = ene_m/7.0;
7304        tmp4 = tmp1*tmp3; /* sqrt(3.0/2.0) */
7305        tmp5 = tmp2*tmp3; /* sqrt(5.0/2.0) */
7306        tmp6 = tmp0*tmp3; /* sqrt(6.0)     */
7307 
7308        for (i=1; i<=3; i++){
7309          deri[i] =
7310 	   fugou*
7311 	     ( -tmp6*DS_NL1[1][i][Mc_AN][k][m][*l  ]*DS_NL1[1][0][Mj_AN][kl][n][*l+1]
7312                +tmp6*DS_NL1[1][i][Mc_AN][k][m][*l+1]*DS_NL1[1][0][Mj_AN][kl][n][*l  ]
7313                -tmp5*DS_NL1[1][i][Mc_AN][k][m][*l+1]*DS_NL1[1][0][Mj_AN][kl][n][*l+3]
7314                +tmp5*DS_NL1[1][i][Mc_AN][k][m][*l+3]*DS_NL1[1][0][Mj_AN][kl][n][*l+1]
7315                -tmp5*DS_NL1[1][i][Mc_AN][k][m][*l+2]*DS_NL1[1][0][Mj_AN][kl][n][*l+4]
7316 	       +tmp5*DS_NL1[1][i][Mc_AN][k][m][*l+4]*DS_NL1[1][0][Mj_AN][kl][n][*l+2]
7317                -tmp4*DS_NL1[1][i][Mc_AN][k][m][*l+3]*DS_NL1[1][0][Mj_AN][kl][n][*l+5]
7318 	       +tmp4*DS_NL1[1][i][Mc_AN][k][m][*l+5]*DS_NL1[1][0][Mj_AN][kl][n][*l+3]
7319                -tmp4*DS_NL1[1][i][Mc_AN][k][m][*l+4]*DS_NL1[1][0][Mj_AN][kl][n][*l+6]
7320                +tmp4*DS_NL1[1][i][Mc_AN][k][m][*l+6]*DS_NL1[1][0][Mj_AN][kl][n][*l+4] );
7321        }
7322        *sumx2r -= deri[1];
7323        *sumy2r -= deri[2];
7324        *sumz2r -= deri[3];
7325 
7326        /* imaginary contribution of l-1/2 to off diagonal up-down matrix */
7327 
7328        for (i=1; i<=3; i++){
7329          deri[i] =
7330             fugou*
7331 	      ( +tmp6*DS_NL1[1][i][Mc_AN][k][m][*l  ]*DS_NL1[1][0][Mj_AN][kl][n][*l+2]
7332                 -tmp6*DS_NL1[1][i][Mc_AN][k][m][*l+2]*DS_NL1[1][0][Mj_AN][kl][n][*l  ]
7333                 +tmp5*DS_NL1[1][i][Mc_AN][k][m][*l+1]*DS_NL1[1][0][Mj_AN][kl][n][*l+4]
7334                 -tmp5*DS_NL1[1][i][Mc_AN][k][m][*l+4]*DS_NL1[1][0][Mj_AN][kl][n][*l+1]
7335                 -tmp5*DS_NL1[1][i][Mc_AN][k][m][*l+2]*DS_NL1[1][0][Mj_AN][kl][n][*l+3]
7336 	        +tmp5*DS_NL1[1][i][Mc_AN][k][m][*l+3]*DS_NL1[1][0][Mj_AN][kl][n][*l+2]
7337                 +tmp4*DS_NL1[1][i][Mc_AN][k][m][*l+3]*DS_NL1[1][0][Mj_AN][kl][n][*l+6]
7338 	        -tmp4*DS_NL1[1][i][Mc_AN][k][m][*l+6]*DS_NL1[1][0][Mj_AN][kl][n][*l+3]
7339                 -tmp4*DS_NL1[1][i][Mc_AN][k][m][*l+4]*DS_NL1[1][0][Mj_AN][kl][n][*l+5]
7340                 +tmp4*DS_NL1[1][i][Mc_AN][k][m][*l+5]*DS_NL1[1][0][Mj_AN][kl][n][*l+4] );
7341        }
7342        *sumx2i -= deri[1];
7343        *sumy2i -= deri[2];
7344        *sumz2i -= deri[3];
7345 
7346     }
7347 
7348   }
7349 
7350   /****************************************************
7351       off-diagonal contribution on up-up and dn-dn
7352   ****************************************************/
7353 
7354   /* p */
7355   if (l2==2){
7356 
7357     tmpx =
7358        fugou*
7359        ( ene_p/3.0*DS_NL1[0][1][Mc_AN][k][m][*l  ]*DS_NL1[0][0][Mj_AN][kl][n][*l+1]
7360         -ene_p/3.0*DS_NL1[0][1][Mc_AN][k][m][*l+1]*DS_NL1[0][0][Mj_AN][kl][n][*l  ] );
7361 
7362     tmpy =
7363        fugou*
7364        ( ene_p/3.0*DS_NL1[0][2][Mc_AN][k][m][*l  ]*DS_NL1[0][0][Mj_AN][kl][n][*l+1]
7365         -ene_p/3.0*DS_NL1[0][2][Mc_AN][k][m][*l+1]*DS_NL1[0][0][Mj_AN][kl][n][*l  ] );
7366 
7367     tmpz =
7368        fugou*
7369        ( ene_p/3.0*DS_NL1[0][3][Mc_AN][k][m][*l  ]*DS_NL1[0][0][Mj_AN][kl][n][*l+1]
7370         -ene_p/3.0*DS_NL1[0][3][Mc_AN][k][m][*l+1]*DS_NL1[0][0][Mj_AN][kl][n][*l  ] );
7371 
7372     /* contribution of l+1/2 for up spin */
7373     *sumx0i += -tmpx;
7374     *sumy0i += -tmpy;
7375     *sumz0i += -tmpz;
7376 
7377     /* contribution of l+1/2 for down spin */
7378     *sumx1i += tmpx;
7379     *sumy1i += tmpy;
7380     *sumz1i += tmpz;
7381 
7382     tmpx =
7383        fugou*
7384        ( ene_m/3.0*DS_NL1[1][1][Mc_AN][k][m][*l  ]*DS_NL1[1][0][Mj_AN][kl][n][*l+1]
7385         -ene_m/3.0*DS_NL1[1][1][Mc_AN][k][m][*l+1]*DS_NL1[1][0][Mj_AN][kl][n][*l  ] );
7386 
7387     tmpy =
7388        fugou*
7389        ( ene_m/3.0*DS_NL1[1][2][Mc_AN][k][m][*l  ]*DS_NL1[1][0][Mj_AN][kl][n][*l+1]
7390         -ene_m/3.0*DS_NL1[1][2][Mc_AN][k][m][*l+1]*DS_NL1[1][0][Mj_AN][kl][n][*l  ] );
7391 
7392     tmpz =
7393        fugou*
7394        ( ene_m/3.0*DS_NL1[1][3][Mc_AN][k][m][*l  ]*DS_NL1[1][0][Mj_AN][kl][n][*l+1]
7395         -ene_m/3.0*DS_NL1[1][3][Mc_AN][k][m][*l+1]*DS_NL1[1][0][Mj_AN][kl][n][*l  ] );
7396 
7397     /* contribution of l-1/2 for up spin */
7398     *sumx0i += tmpx;
7399     *sumy0i += tmpy;
7400     *sumz0i += tmpz;
7401 
7402     /* contribution of l+1/2 for down spin */
7403     *sumx1i += -tmpx;
7404     *sumy1i += -tmpy;
7405     *sumz1i += -tmpz;
7406   }
7407 
7408   /* d */
7409   else if (l2==4){
7410 
7411     tmpx =
7412        fugou*
7413        (
7414        ene_p*2.0/5.0*DS_NL1[0][1][Mc_AN][k][m][*l+1]*DS_NL1[0][0][Mj_AN][kl][n][*l+2]
7415       -ene_p*2.0/5.0*DS_NL1[0][1][Mc_AN][k][m][*l+2]*DS_NL1[0][0][Mj_AN][kl][n][*l+1]
7416       +ene_p*1.0/5.0*DS_NL1[0][1][Mc_AN][k][m][*l+3]*DS_NL1[0][0][Mj_AN][kl][n][*l+4]
7417       -ene_p*1.0/5.0*DS_NL1[0][1][Mc_AN][k][m][*l+4]*DS_NL1[0][0][Mj_AN][kl][n][*l+3] );
7418 
7419     tmpy =
7420        fugou*
7421        (
7422        ene_p*2.0/5.0*DS_NL1[0][2][Mc_AN][k][m][*l+1]*DS_NL1[0][0][Mj_AN][kl][n][*l+2]
7423       -ene_p*2.0/5.0*DS_NL1[0][2][Mc_AN][k][m][*l+2]*DS_NL1[0][0][Mj_AN][kl][n][*l+1]
7424       +ene_p*1.0/5.0*DS_NL1[0][2][Mc_AN][k][m][*l+3]*DS_NL1[0][0][Mj_AN][kl][n][*l+4]
7425       -ene_p*1.0/5.0*DS_NL1[0][2][Mc_AN][k][m][*l+4]*DS_NL1[0][0][Mj_AN][kl][n][*l+3] );
7426 
7427     tmpz =
7428        fugou*
7429        (
7430        ene_p*2.0/5.0*DS_NL1[0][3][Mc_AN][k][m][*l+1]*DS_NL1[0][0][Mj_AN][kl][n][*l+2]
7431       -ene_p*2.0/5.0*DS_NL1[0][3][Mc_AN][k][m][*l+2]*DS_NL1[0][0][Mj_AN][kl][n][*l+1]
7432       +ene_p*1.0/5.0*DS_NL1[0][3][Mc_AN][k][m][*l+3]*DS_NL1[0][0][Mj_AN][kl][n][*l+4]
7433       -ene_p*1.0/5.0*DS_NL1[0][3][Mc_AN][k][m][*l+4]*DS_NL1[0][0][Mj_AN][kl][n][*l+3] );
7434 
7435     /* contribution of l+1/2 for up spin */
7436     *sumx0i += -tmpx;
7437     *sumy0i += -tmpy;
7438     *sumz0i += -tmpz;
7439 
7440     /* contribution of l+1/2 for down spin */
7441     *sumx1i += tmpx;
7442     *sumy1i += tmpy;
7443     *sumz1i += tmpz;
7444 
7445     tmpx =
7446        fugou*
7447        (
7448        ene_m*2.0/5.0*DS_NL1[1][1][Mc_AN][k][m][*l+1]*DS_NL1[1][0][Mj_AN][kl][n][*l+2]
7449       -ene_m*2.0/5.0*DS_NL1[1][1][Mc_AN][k][m][*l+2]*DS_NL1[1][0][Mj_AN][kl][n][*l+1]
7450       +ene_m*1.0/5.0*DS_NL1[1][1][Mc_AN][k][m][*l+3]*DS_NL1[1][0][Mj_AN][kl][n][*l+4]
7451       -ene_m*1.0/5.0*DS_NL1[1][1][Mc_AN][k][m][*l+4]*DS_NL1[1][0][Mj_AN][kl][n][*l+3] );
7452 
7453     tmpy =
7454        fugou*
7455        (
7456        ene_m*2.0/5.0*DS_NL1[1][2][Mc_AN][k][m][*l+1]*DS_NL1[1][0][Mj_AN][kl][n][*l+2]
7457       -ene_m*2.0/5.0*DS_NL1[1][2][Mc_AN][k][m][*l+2]*DS_NL1[1][0][Mj_AN][kl][n][*l+1]
7458       +ene_m*1.0/5.0*DS_NL1[1][2][Mc_AN][k][m][*l+3]*DS_NL1[1][0][Mj_AN][kl][n][*l+4]
7459       -ene_m*1.0/5.0*DS_NL1[1][2][Mc_AN][k][m][*l+4]*DS_NL1[1][0][Mj_AN][kl][n][*l+3] );
7460 
7461     tmpz =
7462        fugou*
7463        (
7464        ene_m*2.0/5.0*DS_NL1[1][3][Mc_AN][k][m][*l+1]*DS_NL1[1][0][Mj_AN][kl][n][*l+2]
7465       -ene_m*2.0/5.0*DS_NL1[1][3][Mc_AN][k][m][*l+2]*DS_NL1[1][0][Mj_AN][kl][n][*l+1]
7466       +ene_m*1.0/5.0*DS_NL1[1][3][Mc_AN][k][m][*l+3]*DS_NL1[1][0][Mj_AN][kl][n][*l+4]
7467       -ene_m*1.0/5.0*DS_NL1[1][3][Mc_AN][k][m][*l+4]*DS_NL1[1][0][Mj_AN][kl][n][*l+3] );
7468 
7469     /* contribution of l-1/2 for up spin */
7470     *sumx0i += tmpx;
7471     *sumy0i += tmpy;
7472     *sumz0i += tmpz;
7473 
7474     /* contribution of l-1/2 for down spin */
7475     *sumx1i += -tmpx;
7476     *sumy1i += -tmpy;
7477     *sumz1i += -tmpz;
7478 
7479   }
7480 
7481   /* f */
7482   else if (l2==6){
7483 
7484     tmpx =
7485        fugou*
7486        (
7487        ene_p*1.0/7.0*DS_NL1[0][1][Mc_AN][k][m][*l+1]*DS_NL1[0][0][Mj_AN][kl][n][*l+2]
7488       -ene_p*1.0/7.0*DS_NL1[0][1][Mc_AN][k][m][*l+2]*DS_NL1[0][0][Mj_AN][kl][n][*l+1]
7489       +ene_p*2.0/7.0*DS_NL1[0][1][Mc_AN][k][m][*l+3]*DS_NL1[0][0][Mj_AN][kl][n][*l+4]
7490       -ene_p*2.0/7.0*DS_NL1[0][1][Mc_AN][k][m][*l+4]*DS_NL1[0][0][Mj_AN][kl][n][*l+3]
7491       +ene_p*3.0/7.0*DS_NL1[0][1][Mc_AN][k][m][*l+5]*DS_NL1[0][0][Mj_AN][kl][n][*l+6]
7492       -ene_p*3.0/7.0*DS_NL1[0][1][Mc_AN][k][m][*l+6]*DS_NL1[0][0][Mj_AN][kl][n][*l+5] );
7493 
7494     tmpy =
7495        fugou*
7496        (
7497        ene_p*1.0/7.0*DS_NL1[0][2][Mc_AN][k][m][*l+1]*DS_NL1[0][0][Mj_AN][kl][n][*l+2]
7498       -ene_p*1.0/7.0*DS_NL1[0][2][Mc_AN][k][m][*l+2]*DS_NL1[0][0][Mj_AN][kl][n][*l+1]
7499       +ene_p*2.0/7.0*DS_NL1[0][2][Mc_AN][k][m][*l+3]*DS_NL1[0][0][Mj_AN][kl][n][*l+4]
7500       -ene_p*2.0/7.0*DS_NL1[0][2][Mc_AN][k][m][*l+4]*DS_NL1[0][0][Mj_AN][kl][n][*l+3]
7501       +ene_p*3.0/7.0*DS_NL1[0][2][Mc_AN][k][m][*l+5]*DS_NL1[0][0][Mj_AN][kl][n][*l+6]
7502       -ene_p*3.0/7.0*DS_NL1[0][2][Mc_AN][k][m][*l+6]*DS_NL1[0][0][Mj_AN][kl][n][*l+5] );
7503 
7504     tmpz =
7505        fugou*
7506        (
7507        ene_p*1.0/7.0*DS_NL1[0][3][Mc_AN][k][m][*l+1]*DS_NL1[0][0][Mj_AN][kl][n][*l+2]
7508       -ene_p*1.0/7.0*DS_NL1[0][3][Mc_AN][k][m][*l+2]*DS_NL1[0][0][Mj_AN][kl][n][*l+1]
7509       +ene_p*2.0/7.0*DS_NL1[0][3][Mc_AN][k][m][*l+3]*DS_NL1[0][0][Mj_AN][kl][n][*l+4]
7510       -ene_p*2.0/7.0*DS_NL1[0][3][Mc_AN][k][m][*l+4]*DS_NL1[0][0][Mj_AN][kl][n][*l+3]
7511       +ene_p*3.0/7.0*DS_NL1[0][3][Mc_AN][k][m][*l+5]*DS_NL1[0][0][Mj_AN][kl][n][*l+6]
7512       -ene_p*3.0/7.0*DS_NL1[0][3][Mc_AN][k][m][*l+6]*DS_NL1[0][0][Mj_AN][kl][n][*l+5] );
7513 
7514     /* contribution of l+1/2 for up spin */
7515     *sumx0i += -tmpx;
7516     *sumy0i += -tmpy;
7517     *sumz0i += -tmpz;
7518 
7519     /* contribution of l+1/2 for down spin */
7520     *sumx1i += tmpx;
7521     *sumy1i += tmpy;
7522     *sumz1i += tmpz;
7523 
7524     tmpx =
7525        fugou*
7526        (
7527        ene_m*1.0/7.0*DS_NL1[1][1][Mc_AN][k][m][*l+1]*DS_NL1[1][0][Mj_AN][kl][n][*l+2]
7528       -ene_m*1.0/7.0*DS_NL1[1][1][Mc_AN][k][m][*l+2]*DS_NL1[1][0][Mj_AN][kl][n][*l+1]
7529       +ene_m*2.0/7.0*DS_NL1[1][1][Mc_AN][k][m][*l+3]*DS_NL1[1][0][Mj_AN][kl][n][*l+4]
7530       -ene_m*2.0/7.0*DS_NL1[1][1][Mc_AN][k][m][*l+4]*DS_NL1[1][0][Mj_AN][kl][n][*l+3]
7531       +ene_m*3.0/7.0*DS_NL1[1][1][Mc_AN][k][m][*l+5]*DS_NL1[1][0][Mj_AN][kl][n][*l+6]
7532       -ene_m*3.0/7.0*DS_NL1[1][1][Mc_AN][k][m][*l+6]*DS_NL1[1][0][Mj_AN][kl][n][*l+5] );
7533 
7534     tmpy =
7535        fugou*
7536        (
7537        ene_m*1.0/7.0*DS_NL1[1][2][Mc_AN][k][m][*l+1]*DS_NL1[1][0][Mj_AN][kl][n][*l+2]
7538       -ene_m*1.0/7.0*DS_NL1[1][2][Mc_AN][k][m][*l+2]*DS_NL1[1][0][Mj_AN][kl][n][*l+1]
7539       +ene_m*2.0/7.0*DS_NL1[1][2][Mc_AN][k][m][*l+3]*DS_NL1[1][0][Mj_AN][kl][n][*l+4]
7540       -ene_m*2.0/7.0*DS_NL1[1][2][Mc_AN][k][m][*l+4]*DS_NL1[1][0][Mj_AN][kl][n][*l+3]
7541       +ene_m*3.0/7.0*DS_NL1[1][2][Mc_AN][k][m][*l+5]*DS_NL1[1][0][Mj_AN][kl][n][*l+6]
7542       -ene_m*3.0/7.0*DS_NL1[1][2][Mc_AN][k][m][*l+6]*DS_NL1[1][0][Mj_AN][kl][n][*l+5] );
7543 
7544     tmpz =
7545        fugou*
7546        (
7547        ene_m*1.0/7.0*DS_NL1[1][3][Mc_AN][k][m][*l+1]*DS_NL1[1][0][Mj_AN][kl][n][*l+2]
7548       -ene_m*1.0/7.0*DS_NL1[1][3][Mc_AN][k][m][*l+2]*DS_NL1[1][0][Mj_AN][kl][n][*l+1]
7549       +ene_m*2.0/7.0*DS_NL1[1][3][Mc_AN][k][m][*l+3]*DS_NL1[1][0][Mj_AN][kl][n][*l+4]
7550       -ene_m*2.0/7.0*DS_NL1[1][3][Mc_AN][k][m][*l+4]*DS_NL1[1][0][Mj_AN][kl][n][*l+3]
7551       +ene_m*3.0/7.0*DS_NL1[1][3][Mc_AN][k][m][*l+5]*DS_NL1[1][0][Mj_AN][kl][n][*l+6]
7552       -ene_m*3.0/7.0*DS_NL1[1][3][Mc_AN][k][m][*l+6]*DS_NL1[1][0][Mj_AN][kl][n][*l+5] );
7553 
7554     /* contribution of l-1/2 for up spin */
7555     *sumx0i += tmpx;
7556     *sumy0i += tmpy;
7557     *sumz0i += tmpz;
7558 
7559     /* contribution of l-1/2 for down spin */
7560     *sumx1i += -tmpx;
7561     *sumy1i += -tmpy;
7562     *sumz1i += -tmpz;
7563   }
7564 
7565   /****************************************************
7566       diagonal contribution on up-up and dn-dn
7567   ****************************************************/
7568 
7569   for (l3=0; l3<=l2; l3++){
7570 
7571     /* VNL for j=l+1/2 */
7572 
7573     tmpx = PFp*ene_p*DS_NL1[0][1][Mc_AN][k][m][*l]*DS_NL1[0][0][Mj_AN][kl][n][*l];
7574     tmpy = PFp*ene_p*DS_NL1[0][2][Mc_AN][k][m][*l]*DS_NL1[0][0][Mj_AN][kl][n][*l];
7575     tmpz = PFp*ene_p*DS_NL1[0][3][Mc_AN][k][m][*l]*DS_NL1[0][0][Mj_AN][kl][n][*l];
7576 
7577     *sumx0r += tmpx;
7578     *sumy0r += tmpy;
7579     *sumz0r += tmpz;
7580 
7581     *sumx1r += tmpx;
7582     *sumy1r += tmpy;
7583     *sumz1r += tmpz;
7584 
7585     /* VNL for j=l-1/2 */
7586 
7587     tmpx = PFm*ene_m*DS_NL1[1][1][Mc_AN][k][m][*l]*DS_NL1[1][0][Mj_AN][kl][n][*l];
7588     tmpy = PFm*ene_m*DS_NL1[1][2][Mc_AN][k][m][*l]*DS_NL1[1][0][Mj_AN][kl][n][*l];
7589     tmpz = PFm*ene_m*DS_NL1[1][3][Mc_AN][k][m][*l]*DS_NL1[1][0][Mj_AN][kl][n][*l];
7590 
7591     *sumx0r += tmpx;
7592     *sumy0r += tmpy;
7593     *sumz0r += tmpz;
7594 
7595     *sumx1r += tmpx;
7596     *sumy1r += tmpy;
7597     *sumz1r += tmpz;
7598 
7599     *l = *l + 1;
7600   }
7601 }
7602 
7603 
7604 
7605 
7606 
dH_U_full(int Mc_AN,int h_AN,int q_AN,double ***** OLP,double **** v_eff,double *** Hx,double *** Hy,double *** Hz)7607 void dH_U_full(int Mc_AN, int h_AN, int q_AN,
7608                double *****OLP, double ****v_eff,
7609                double ***Hx, double ***Hy, double ***Hz)
7610 {
7611   int i,j,k,m,n,kg,kan,so,deri_kind,Mk_AN;
7612   int ig,ian,jg,jan,kl,kl1,kl2,spin,spinmax;
7613   int wakg,l1,l2,l3,Gc_AN,Mi_AN,Mj_AN;
7614   int Rwan,Lwan,p,p0;
7615   double PF[2],sumx,sumy,sumz,ene;
7616   double tmpx,tmpy,tmpz;
7617   double Lsum0,Lsum1,Lsum2,Lsum3;
7618   double Rsum0,Rsum1,Rsum2,Rsum3;
7619   double PFp,PFm,ene_p,ene_m;
7620   double ***Hx2,***Hy2,***Hz2;
7621   double sumx0,sumy0,sumz0;
7622   double sumx1,sumy1,sumz1;
7623   double sumx2,sumy2,sumz2;
7624 
7625   /****************************************************
7626    allocation of arrays:
7627 
7628    double Hx2[3][List_YOUSO[7]][List_YOUSO[7]];
7629    double Hy2[3][List_YOUSO[7]][List_YOUSO[7]];
7630    double Hz2[3][List_YOUSO[7]][List_YOUSO[7]];
7631   ****************************************************/
7632 
7633   Hx2 = (double***)malloc(sizeof(double**)*3);
7634   for (i=0; i<3; i++){
7635     Hx2[i] = (double**)malloc(sizeof(double*)*List_YOUSO[7]);
7636     for (j=0; j<List_YOUSO[7]; j++){
7637       Hx2[i][j] = (double*)malloc(sizeof(double)*List_YOUSO[7]);
7638     }
7639   }
7640 
7641   Hy2 = (double***)malloc(sizeof(double**)*3);
7642   for (i=0; i<3; i++){
7643     Hy2[i] = (double**)malloc(sizeof(double*)*List_YOUSO[7]);
7644     for (j=0; j<List_YOUSO[7]; j++){
7645       Hy2[i][j] = (double*)malloc(sizeof(double)*List_YOUSO[7]);
7646     }
7647   }
7648 
7649   Hz2 = (double***)malloc(sizeof(double**)*3);
7650   for (i=0; i<3; i++){
7651     Hz2[i] = (double**)malloc(sizeof(double*)*List_YOUSO[7]);
7652     for (j=0; j<List_YOUSO[7]; j++){
7653       Hz2[i][j] = (double*)malloc(sizeof(double)*List_YOUSO[7]);
7654     }
7655   }
7656 
7657   /****************************************************
7658    start calc.
7659   ****************************************************/
7660 
7661   if (SpinP_switch==0) spinmax = 0;
7662   else                 spinmax = 1;
7663 
7664   Gc_AN = M2G[Mc_AN];
7665   ig = natn[Gc_AN][h_AN];
7666   Lwan = WhatSpecies[ig];
7667   Mi_AN = F_G2M[ig]; /* F_G2M should be used */
7668   ian = Spe_Total_CNO[Lwan];
7669   jg = natn[Gc_AN][q_AN];
7670   Rwan = WhatSpecies[jg];
7671   Mj_AN = F_G2M[jg]; /* F_G2M should be used */
7672   jan = Spe_Total_CNO[Rwan];
7673 
7674   if (h_AN==0){
7675 
7676     /****************************************************
7677                           dS*ep*S
7678     ****************************************************/
7679 
7680     for (k=0; k<=FNAN[Gc_AN]; k++){
7681 
7682       kg = natn[Gc_AN][k];
7683       Mk_AN = F_G2M[kg];  /* F_G2M should be used */
7684       wakg = WhatSpecies[kg];
7685       kan = Spe_Total_NO[wakg];
7686       kl = RMI1[Mc_AN][q_AN][k];
7687 
7688       /****************************************************
7689                   derivative at h_AN (=Mc_AN)
7690       ****************************************************/
7691 
7692       if (0<=kl){
7693 
7694 	for (m=0; m<ian; m++){
7695 	  for (n=0; n<jan; n++){
7696 
7697             for (spin=0; spin<=spinmax; spin++){
7698 
7699 	      sumx = 0.0;
7700 	      sumy = 0.0;
7701 	      sumz = 0.0;
7702 
7703               if (Cnt_switch==0){
7704 
7705 		for (l1=0; l1<kan; l1++){
7706 		  for (l2=0; l2<kan; l2++){
7707 		    ene = v_eff[spin][Mk_AN][l1][l2];
7708 		    sumx += ene*OLP[1][Mc_AN][k][m][l1]*OLP[0][Mj_AN][kl][n][l2];
7709 		    sumy += ene*OLP[2][Mc_AN][k][m][l1]*OLP[0][Mj_AN][kl][n][l2];
7710 		    sumz += ene*OLP[3][Mc_AN][k][m][l1]*OLP[0][Mj_AN][kl][n][l2];
7711 		  }
7712 		}
7713 
7714 	      }
7715 
7716               else if (Cnt_switch==1){
7717 
7718 		for (l1=0; l1<kan; l1++){
7719 		  for (l2=0; l2<kan; l2++){
7720 		    Lsum1 = 0.0;
7721 		    Lsum2 = 0.0;
7722 		    Lsum3 = 0.0;
7723 		    for (p=0; p<Spe_Specified_Num[Lwan][m]; p++){
7724 		      p0 = Spe_Trans_Orbital[Lwan][m][p];
7725 		      Lsum1 += CntCoes[Mc_AN][m][p]*OLP[1][Mc_AN][k][p0][l1];
7726 		      Lsum2 += CntCoes[Mc_AN][m][p]*OLP[2][Mc_AN][k][p0][l1];
7727 		      Lsum3 += CntCoes[Mc_AN][m][p]*OLP[3][Mc_AN][k][p0][l1];
7728 		    }
7729 
7730 		    Rsum0 = 0.0;
7731 		    for (p=0; p<Spe_Specified_Num[Rwan][n]; p++){
7732 		      p0 = Spe_Trans_Orbital[Rwan][n][p];
7733 		      Rsum0 += CntCoes[Mj_AN][n][p]*OLP[0][Mj_AN][kl][p0][l2];
7734 		    }
7735 
7736 		    ene = v_eff[spin][Mk_AN][l1][l2];
7737 		    sumx += ene*Lsum1*Rsum0;
7738 		    sumy += ene*Lsum2*Rsum0;
7739 		    sumz += ene*Lsum3*Rsum0;
7740 		  }
7741 		}
7742 
7743 	      }
7744 
7745 	      if (k==0){
7746 		Hx[spin][m][n] = sumx;
7747 		Hy[spin][m][n] = sumy;
7748 		Hz[spin][m][n] = sumz;
7749 
7750 		Hx[2][m][n] = 0.0;
7751 		Hy[2][m][n] = 0.0;
7752 		Hz[2][m][n] = 0.0;
7753 	      }
7754 	      else {
7755 		Hx[spin][m][n] += sumx;
7756 		Hy[spin][m][n] += sumy;
7757 		Hz[spin][m][n] += sumz;
7758 	      }
7759 	    }
7760 	  }
7761 	}
7762       } /* if */
7763     } /* k */
7764 
7765     /****************************************************
7766                           S*ep*dS
7767     ****************************************************/
7768 
7769     if (q_AN==0){
7770       for (m=0; m<ian; m++){
7771         for (n=0; n<jan; n++){
7772           Hx2[0][m][n] = Hx[0][m][n];
7773           Hy2[0][m][n] = Hy[0][m][n];
7774           Hz2[0][m][n] = Hz[0][m][n];
7775 
7776           Hx2[1][m][n] = Hx[1][m][n];
7777           Hy2[1][m][n] = Hy[1][m][n];
7778           Hz2[1][m][n] = Hz[1][m][n];
7779         }
7780       }
7781       for (m=0; m<ian; m++){
7782         for (n=0; n<jan; n++){
7783           Hx[0][m][n] = Hx2[0][m][n] + Hx2[0][n][m];
7784           Hy[0][m][n] = Hy2[0][m][n] + Hy2[0][n][m];
7785           Hz[0][m][n] = Hz2[0][m][n] + Hz2[0][n][m];
7786 
7787           Hx[1][m][n] = Hx2[1][m][n] + Hx2[1][n][m];
7788           Hy[1][m][n] = Hy2[1][m][n] + Hy2[1][n][m];
7789           Hz[1][m][n] = Hz2[1][m][n] + Hz2[1][n][m];
7790         }
7791       }
7792     }
7793 
7794     else {
7795 
7796       kg = natn[Gc_AN][0];
7797       Mk_AN = F_G2M[kg]; /* F_G2M should be used */
7798       wakg = WhatSpecies[kg];
7799       kan = Spe_Total_NO[wakg];
7800       kl = RMI1[Mc_AN][q_AN][0];
7801 
7802       /****************************************************
7803                         derivative at k=0
7804       ****************************************************/
7805 
7806       for (m=0; m<ian; m++){
7807 	for (n=0; n<jan; n++){
7808 
7809           for (spin=0; spin<=spinmax; spin++){
7810 
7811   	    sumx = 0.0;
7812 	    sumy = 0.0;
7813 	    sumz = 0.0;
7814 
7815             if (Cnt_switch==0){
7816 
7817 	      for (l1=0; l1<kan; l1++){
7818   	        for (l2=0; l2<kan; l2++){
7819 		  ene = v_eff[spin][Mk_AN][l1][l2];
7820 		  sumx -= ene*OLP[0][Mc_AN][0][m][l1]*OLP[1][Mj_AN][kl][n][l2];
7821 		  sumy -= ene*OLP[0][Mc_AN][0][m][l1]*OLP[2][Mj_AN][kl][n][l2];
7822 		  sumz -= ene*OLP[0][Mc_AN][0][m][l1]*OLP[3][Mj_AN][kl][n][l2];
7823 		}
7824 	      }
7825 
7826 	    }
7827 
7828             else if (Cnt_switch==1){
7829 
7830               for (l1=0; l1<kan; l1++){
7831                 for (l2=0; l2<kan; l2++){
7832 
7833 		  Lsum0 = 0.0;
7834 
7835 		  for (p=0; p<Spe_Specified_Num[Lwan][m]; p++){
7836 		    p0 = Spe_Trans_Orbital[Lwan][m][p];
7837 		    Lsum0 += CntCoes[Mc_AN][m][p]*OLP[0][Mc_AN][0][p0][l1];
7838 		  }
7839 
7840 		  Rsum1 = 0.0;
7841 		  Rsum2 = 0.0;
7842 		  Rsum3 = 0.0;
7843 
7844 		  for (p=0; p<Spe_Specified_Num[Rwan][n]; p++){
7845 		    p0 = Spe_Trans_Orbital[Rwan][n][p];
7846 		    Rsum1 += CntCoes[Mj_AN][n][p]*OLP[1][Mj_AN][kl][p0][l2];
7847 		    Rsum2 += CntCoes[Mj_AN][n][p]*OLP[2][Mj_AN][kl][p0][l2];
7848 		    Rsum3 += CntCoes[Mj_AN][n][p]*OLP[3][Mj_AN][kl][p0][l2];
7849 		  }
7850 
7851 		  ene = v_eff[spin][Mk_AN][l1][l2];
7852 		  sumx -= ene*Lsum0*Rsum1;
7853 		  sumy -= ene*Lsum0*Rsum2;
7854 		  sumz -= ene*Lsum0*Rsum3;
7855 		}
7856 	      }
7857 	    }
7858 
7859 
7860 	    Hx[spin][m][n] += sumx;
7861 	    Hy[spin][m][n] += sumy;
7862 	    Hz[spin][m][n] += sumz;
7863 	  }
7864 	}
7865       }
7866     }
7867 
7868   } /* if (h_AN==0) */
7869 
7870   else {
7871 
7872     /****************************************************
7873                            dS*ep*S
7874     ****************************************************/
7875 
7876     kg = natn[Gc_AN][0];
7877     Mk_AN = F_G2M[kg]; /* F_G2M should be used */
7878     wakg = WhatSpecies[kg];
7879     kan = Spe_Total_NO[wakg];
7880     kl1 = RMI1[Mc_AN][h_AN][0];
7881     kl2 = RMI1[Mc_AN][q_AN][0];
7882 
7883     for (m=0; m<ian; m++){
7884       for (n=0; n<jan; n++){
7885 
7886         for (spin=0; spin<=spinmax; spin++){
7887 
7888    	  sumx = 0.0;
7889 	  sumy = 0.0;
7890 	  sumz = 0.0;
7891 
7892           if (Cnt_switch==0){
7893 
7894             for (l1=0; l1<kan; l1++){
7895               for (l2=0; l2<kan; l2++){
7896 		ene = v_eff[spin][Mk_AN][l1][l2];
7897 		sumx -= ene*OLP[1][Mi_AN][kl1][m][l1]*OLP[0][Mj_AN][kl2][n][l2];
7898 		sumy -= ene*OLP[2][Mi_AN][kl1][m][l1]*OLP[0][Mj_AN][kl2][n][l2];
7899 		sumz -= ene*OLP[3][Mi_AN][kl1][m][l1]*OLP[0][Mj_AN][kl2][n][l2];
7900 	      }
7901 	    }
7902 	  }
7903 
7904           else if (Cnt_switch==1){
7905 
7906             for (l1=0; l1<kan; l1++){
7907               for (l2=0; l2<kan; l2++){
7908 
7909 		Lsum1 = 0.0;
7910 		Lsum2 = 0.0;
7911 		Lsum3 = 0.0;
7912 		for (p=0; p<Spe_Specified_Num[Lwan][m]; p++){
7913 		  p0 = Spe_Trans_Orbital[Lwan][m][p];
7914 		  Lsum1 += CntCoes[Mi_AN][m][p]*OLP[1][Mi_AN][kl1][p0][l1];
7915 		  Lsum2 += CntCoes[Mi_AN][m][p]*OLP[2][Mi_AN][kl1][p0][l1];
7916 		  Lsum3 += CntCoes[Mi_AN][m][p]*OLP[3][Mi_AN][kl1][p0][l1];
7917 		}
7918 
7919 		Rsum0 = 0.0;
7920 		for (p=0; p<Spe_Specified_Num[Rwan][n]; p++){
7921 		  p0 = Spe_Trans_Orbital[Rwan][n][p];
7922 		  Rsum0 += CntCoes[Mj_AN][n][p]*OLP[0][Mj_AN][kl2][p0][l2];
7923 		}
7924 
7925 		ene = v_eff[spin][Mk_AN][l1][l2];
7926 		sumx -= ene*Lsum1*Rsum0;
7927 		sumy -= ene*Lsum2*Rsum0;
7928 		sumz -= ene*Lsum3*Rsum0;
7929 	      }
7930 	    }
7931 
7932 	  }
7933 
7934 
7935 	  Hx[spin][m][n] = sumx;
7936 	  Hy[spin][m][n] = sumy;
7937 	  Hz[spin][m][n] = sumz;
7938 
7939 	  Hx[2][m][n] = 0.0;
7940 	  Hy[2][m][n] = 0.0;
7941 	  Hz[2][m][n] = 0.0;
7942 	}
7943       }
7944     }
7945 
7946     /****************************************************
7947                            S*ep*dS
7948     ****************************************************/
7949 
7950     if (q_AN==0){
7951 
7952       for (k=0; k<=FNAN[Gc_AN]; k++){
7953         kg = natn[Gc_AN][k];
7954         Mk_AN = F_G2M[kg]; /* F_G2M should be used */
7955         wakg = WhatSpecies[kg];
7956         kan = Spe_Total_NO[wakg];
7957         kl1 = RMI1[Mc_AN][h_AN][k];
7958         kl2 = RMI1[Mc_AN][q_AN][k];
7959 
7960         if (0<=kl1){
7961 
7962 	  for (m=0; m<ian; m++){
7963 	    for (n=0; n<jan; n++){
7964 
7965               for (spin=0; spin<=spinmax; spin++){
7966 
7967 	        sumx = 0.0;
7968 	        sumy = 0.0;
7969 	        sumz = 0.0;
7970 
7971                 if (Cnt_switch==0){
7972 
7973 		  for (l1=0; l1<kan; l1++){
7974 		    for (l2=0; l2<kan; l2++){
7975 		      ene = v_eff[spin][Mk_AN][l1][l2];
7976 		      sumx += ene*OLP[0][Mi_AN][kl1][m][l1]*OLP[1][Mj_AN][kl2][n][l2];
7977 		      sumy += ene*OLP[0][Mi_AN][kl1][m][l1]*OLP[2][Mj_AN][kl2][n][l2];
7978 		      sumz += ene*OLP[0][Mi_AN][kl1][m][l1]*OLP[3][Mj_AN][kl2][n][l2];
7979 		    }
7980 		  }
7981 
7982 		}
7983 
7984                 else if (Cnt_switch==1){
7985 
7986                   for (l1=0; l1<kan; l1++){
7987                     for (l2=0; l2<kan; l2++){
7988 
7989 		      Lsum0 = 0.0;
7990 
7991 		      for (p=0; p<Spe_Specified_Num[Lwan][m]; p++){
7992 			p0 = Spe_Trans_Orbital[Lwan][m][p];
7993 			Lsum0 += CntCoes[Mi_AN][m][p]*OLP[0][Mi_AN][kl1][p0][l1];
7994 		      }
7995 
7996 		      Rsum1 = 0.0;
7997 		      Rsum2 = 0.0;
7998 		      Rsum3 = 0.0;
7999 
8000 		      for (p=0; p<Spe_Specified_Num[Rwan][n]; p++){
8001 			p0 = Spe_Trans_Orbital[Rwan][n][p];
8002 			Rsum1 += CntCoes[Mj_AN][n][p]*OLP[1][Mj_AN][kl2][p0][l2];
8003 			Rsum2 += CntCoes[Mj_AN][n][p]*OLP[2][Mj_AN][kl2][p0][l2];
8004 			Rsum3 += CntCoes[Mj_AN][n][p]*OLP[3][Mj_AN][kl2][p0][l2];
8005 		      }
8006 
8007 		      ene = v_eff[spin][Mk_AN][l1][l2];
8008 		      sumx += ene*Lsum0*Rsum1;
8009 		      sumy += ene*Lsum0*Rsum2;
8010 		      sumz += ene*Lsum0*Rsum3;
8011 
8012 		    }
8013 		  }
8014 		}
8015 
8016 		Hx[spin][m][n] += sumx;
8017 		Hy[spin][m][n] += sumy;
8018 		Hz[spin][m][n] += sumz;
8019 	      }
8020 	    }
8021 	  }
8022 	}
8023 
8024       }
8025     } /* if (q_AN==0) */
8026 
8027     else {
8028 
8029       kg = natn[Gc_AN][0];
8030       Mk_AN = F_G2M[kg]; /* F_G2M should be used */
8031       wakg = WhatSpecies[kg];
8032       kan = Spe_Total_NO[wakg];
8033       kl1 = RMI1[Mc_AN][h_AN][0];
8034       kl2 = RMI1[Mc_AN][q_AN][0];
8035 
8036       for (m=0; m<ian; m++){
8037 	for (n=0; n<jan; n++){
8038 
8039           for (spin=0; spin<=spinmax; spin++){
8040 
8041  	    sumx = 0.0;
8042 	    sumy = 0.0;
8043 	    sumz = 0.0;
8044 
8045             if (Cnt_switch==0){
8046 
8047               for (l1=0; l1<kan; l1++){
8048                 for (l2=0; l2<kan; l2++){
8049 		  ene = v_eff[spin][Mk_AN][l1][l2];
8050 		  sumx -= ene*OLP[0][Mi_AN][kl1][m][l1]*OLP[1][Mj_AN][kl2][n][l2];
8051 		  sumy -= ene*OLP[0][Mi_AN][kl1][m][l1]*OLP[2][Mj_AN][kl2][n][l2];
8052 		  sumz -= ene*OLP[0][Mi_AN][kl1][m][l1]*OLP[3][Mj_AN][kl2][n][l2];
8053 		}
8054 	      }
8055 	    }
8056 
8057             else if (Cnt_switch==1){
8058 
8059               for (l1=0; l1<kan; l1++){
8060                 for (l2=0; l2<kan; l2++){
8061 
8062 		  Lsum0 = 0.0;
8063 
8064 		  for (p=0; p<Spe_Specified_Num[Lwan][m]; p++){
8065 		    p0 = Spe_Trans_Orbital[Lwan][m][p];
8066 		    Lsum0 += CntCoes[Mi_AN][m][p]*OLP[0][Mi_AN][kl1][p0][l1];
8067 		  }
8068 
8069 		  Rsum1 = 0.0;
8070 		  Rsum2 = 0.0;
8071 		  Rsum3 = 0.0;
8072 
8073 		  for (p=0; p<Spe_Specified_Num[Rwan][n]; p++){
8074 		    p0 = Spe_Trans_Orbital[Rwan][n][p];
8075 		    Rsum1 += CntCoes[Mj_AN][n][p]*OLP[1][Mj_AN][kl2][p0][l2];
8076 		    Rsum2 += CntCoes[Mj_AN][n][p]*OLP[2][Mj_AN][kl2][p0][l2];
8077 		    Rsum3 += CntCoes[Mj_AN][n][p]*OLP[3][Mj_AN][kl2][p0][l2];
8078 		  }
8079 
8080 		  ene = v_eff[spin][Mk_AN][l1][l2];
8081 		  sumx -= ene*Lsum0*Rsum1;
8082 		  sumy -= ene*Lsum0*Rsum2;
8083 		  sumz -= ene*Lsum0*Rsum3;
8084 		}
8085 	      }
8086 	    }
8087 
8088 	    Hx[spin][m][n] += sumx;
8089 	    Hy[spin][m][n] += sumy;
8090 	    Hz[spin][m][n] += sumz;
8091 	  }
8092 	}
8093       }
8094     }
8095   }
8096 
8097   /****************************************************
8098    freeing of arrays:
8099 
8100    double Hx2[3][List_YOUSO[7]][List_YOUSO[7]];
8101    double Hy2[3][List_YOUSO[7]][List_YOUSO[7]];
8102    double Hz2[3][List_YOUSO[7]][List_YOUSO[7]];
8103   ****************************************************/
8104 
8105   for (i=0; i<3; i++){
8106     for (j=0; j<List_YOUSO[7]; j++){
8107       free(Hx2[i][j]);
8108     }
8109     free(Hx2[i]);
8110   }
8111   free(Hx2);
8112 
8113   for (i=0; i<3; i++){
8114     for (j=0; j<List_YOUSO[7]; j++){
8115       free(Hy2[i][j]);
8116     }
8117     free(Hy2[i]);
8118   }
8119   free(Hy2);
8120 
8121   for (i=0; i<3; i++){
8122     for (j=0; j<List_YOUSO[7]; j++){
8123       free(Hz2[i][j]);
8124     }
8125     free(Hz2[i]);
8126   }
8127   free(Hz2);
8128 }
8129 
8130 
8131 
8132 
8133 
8134 
8135 
dH_U_NC_full(int Mc_AN,int h_AN,int q_AN,double ***** OLP,dcomplex ***** NC_v_eff,dcomplex **** Hx,dcomplex **** Hy,dcomplex **** Hz)8136 void dH_U_NC_full(int Mc_AN, int h_AN, int q_AN,
8137                   double *****OLP, dcomplex *****NC_v_eff,
8138                   dcomplex ****Hx, dcomplex ****Hy, dcomplex ****Hz)
8139 {
8140   int i,j,k,m,n,kg,kan,so,deri_kind,Mk_AN;
8141   int ig,ian,jg,jan,kl,kl1,kl2,spin;
8142   int wakg,l1,l2,l3,Gc_AN,Mi_AN,Mj_AN;
8143   int Rwan,Lwan,p,p0,s1,s2;
8144   double PF[2],sumx,sumy,sumz,ene;
8145   double tmpx,tmpy,tmpz;
8146   double Lsum0,Lsum1,Lsum2,Lsum3;
8147   double Rsum0,Rsum1,Rsum2,Rsum3;
8148   double PFp,PFm,ene_p,ene_m;
8149   double Re00x,Re00y,Re00z;
8150   double Re11x,Re11y,Re11z;
8151   double Re01x,Re01y,Re01z;
8152   double Re10x,Re10y,Re10z;
8153   double Im00x,Im00y,Im00z;
8154   double Im11x,Im11y,Im11z;
8155   double Im01x,Im01y,Im01z;
8156   double Im10x,Im10y,Im10z;
8157 
8158   /****************************************************
8159    start calc.
8160   ****************************************************/
8161 
8162   Gc_AN = M2G[Mc_AN];
8163   ig = natn[Gc_AN][h_AN];
8164   Lwan = WhatSpecies[ig];
8165   Mi_AN = F_G2M[ig]; /* F_G2M should be used */
8166   ian = Spe_Total_CNO[Lwan];
8167   jg = natn[Gc_AN][q_AN];
8168   Rwan = WhatSpecies[jg];
8169   Mj_AN = F_G2M[jg]; /* F_G2M should be used */
8170   jan = Spe_Total_CNO[Rwan];
8171 
8172   if (h_AN==0){
8173 
8174     /****************************************************
8175                           dS*ep*S
8176     ****************************************************/
8177 
8178     for (k=0; k<=FNAN[Gc_AN]; k++){
8179 
8180       kg = natn[Gc_AN][k];
8181       Mk_AN = F_G2M[kg];  /* F_G2M should be used */
8182       wakg = WhatSpecies[kg];
8183       kan = Spe_Total_NO[wakg];
8184       kl = RMI1[Mc_AN][q_AN][k];
8185 
8186       /****************************************************
8187                   derivative at h_AN (=Mc_AN)
8188       ****************************************************/
8189 
8190       if (0<=kl){
8191 
8192 	for (m=0; m<ian; m++){
8193 	  for (n=0; n<jan; n++){
8194 
8195 	    Re00x = 0.0;     Re00y = 0.0;     Re00z = 0.0;
8196 	    Re11x = 0.0;     Re11y = 0.0;     Re11z = 0.0;
8197 	    Re01x = 0.0;     Re01y = 0.0;     Re01z = 0.0;
8198 	    Re10x = 0.0;     Re10y = 0.0;     Re10z = 0.0;
8199 
8200 	    Im00x = 0.0;     Im00y = 0.0;     Im00z = 0.0;
8201 	    Im11x = 0.0;     Im11y = 0.0;     Im11z = 0.0;
8202 	    Im01x = 0.0;     Im01y = 0.0;     Im01z = 0.0;
8203 	    Im10x = 0.0;     Im10y = 0.0;     Im10z = 0.0;
8204 
8205 	    for (l1=0; l1<kan; l1++){
8206 	      for (l2=0; l2<kan; l2++){
8207 
8208 		ene = NC_v_eff[0][0][Mk_AN][l1][l2].r;
8209 		Re00x += ene*OLP[1][Mc_AN][k][m][l1]*OLP[0][Mj_AN][kl][n][l2];
8210 		Re00y += ene*OLP[2][Mc_AN][k][m][l1]*OLP[0][Mj_AN][kl][n][l2];
8211 		Re00z += ene*OLP[3][Mc_AN][k][m][l1]*OLP[0][Mj_AN][kl][n][l2];
8212 
8213 		ene = NC_v_eff[1][1][Mk_AN][l1][l2].r;
8214 		Re11x += ene*OLP[1][Mc_AN][k][m][l1]*OLP[0][Mj_AN][kl][n][l2];
8215 		Re11y += ene*OLP[2][Mc_AN][k][m][l1]*OLP[0][Mj_AN][kl][n][l2];
8216 		Re11z += ene*OLP[3][Mc_AN][k][m][l1]*OLP[0][Mj_AN][kl][n][l2];
8217 
8218 		ene = NC_v_eff[0][1][Mk_AN][l1][l2].r;
8219 		Re01x += ene*OLP[1][Mc_AN][k][m][l1]*OLP[0][Mj_AN][kl][n][l2];
8220 		Re01y += ene*OLP[2][Mc_AN][k][m][l1]*OLP[0][Mj_AN][kl][n][l2];
8221 		Re01z += ene*OLP[3][Mc_AN][k][m][l1]*OLP[0][Mj_AN][kl][n][l2];
8222 
8223 		ene = NC_v_eff[1][0][Mk_AN][l1][l2].r;
8224 		Re10x += ene*OLP[1][Mc_AN][k][m][l1]*OLP[0][Mj_AN][kl][n][l2];
8225 		Re10y += ene*OLP[2][Mc_AN][k][m][l1]*OLP[0][Mj_AN][kl][n][l2];
8226 		Re10z += ene*OLP[3][Mc_AN][k][m][l1]*OLP[0][Mj_AN][kl][n][l2];
8227 
8228 		ene = NC_v_eff[0][0][Mk_AN][l1][l2].i;
8229 		Im00x += ene*OLP[1][Mc_AN][k][m][l1]*OLP[0][Mj_AN][kl][n][l2];
8230 		Im00y += ene*OLP[2][Mc_AN][k][m][l1]*OLP[0][Mj_AN][kl][n][l2];
8231 		Im00z += ene*OLP[3][Mc_AN][k][m][l1]*OLP[0][Mj_AN][kl][n][l2];
8232 
8233 		ene = NC_v_eff[1][1][Mk_AN][l1][l2].i;
8234 		Im11x += ene*OLP[1][Mc_AN][k][m][l1]*OLP[0][Mj_AN][kl][n][l2];
8235 		Im11y += ene*OLP[2][Mc_AN][k][m][l1]*OLP[0][Mj_AN][kl][n][l2];
8236 		Im11z += ene*OLP[3][Mc_AN][k][m][l1]*OLP[0][Mj_AN][kl][n][l2];
8237 
8238 		ene = NC_v_eff[0][1][Mk_AN][l1][l2].i;
8239 		Im01x += ene*OLP[1][Mc_AN][k][m][l1]*OLP[0][Mj_AN][kl][n][l2];
8240 		Im01y += ene*OLP[2][Mc_AN][k][m][l1]*OLP[0][Mj_AN][kl][n][l2];
8241 		Im01z += ene*OLP[3][Mc_AN][k][m][l1]*OLP[0][Mj_AN][kl][n][l2];
8242 
8243 		ene = NC_v_eff[1][0][Mk_AN][l1][l2].i;
8244 		Im10x += ene*OLP[1][Mc_AN][k][m][l1]*OLP[0][Mj_AN][kl][n][l2];
8245 		Im10y += ene*OLP[2][Mc_AN][k][m][l1]*OLP[0][Mj_AN][kl][n][l2];
8246 		Im10z += ene*OLP[3][Mc_AN][k][m][l1]*OLP[0][Mj_AN][kl][n][l2];
8247 
8248 	      }
8249 	    }
8250 
8251 
8252 	    if (k==0){
8253 	      Hx[0][0][m][n] = Complex(Re00x,Im00x);
8254 	      Hy[0][0][m][n] = Complex(Re00y,Im00y);
8255 	      Hz[0][0][m][n] = Complex(Re00z,Im00z);
8256 
8257 	      Hx[1][1][m][n] = Complex(Re11x,Im11x);
8258 	      Hy[1][1][m][n] = Complex(Re11y,Im11y);
8259 	      Hz[1][1][m][n] = Complex(Re11z,Im11z);
8260 
8261 	      Hx[0][1][m][n] = Complex(Re01x,Im01x);
8262 	      Hy[0][1][m][n] = Complex(Re01y,Im01y);
8263 	      Hz[0][1][m][n] = Complex(Re01z,Im01z);
8264 
8265 	      Hx[1][0][m][n] = Complex(Re10x,Im10x);
8266 	      Hy[1][0][m][n] = Complex(Re10y,Im10y);
8267 	      Hz[1][0][m][n] = Complex(Re10z,Im10z);
8268 	    }
8269 	    else{
8270 
8271 	      Hx[0][0][m][n].r += Re00x;  Hx[0][0][m][n].i += Im00x;
8272 	      Hy[0][0][m][n].r += Re00y;  Hy[0][0][m][n].i += Im00y;
8273 	      Hz[0][0][m][n].r += Re00z;  Hz[0][0][m][n].i += Im00z;
8274 
8275 	      Hx[1][1][m][n].r += Re11x;  Hx[1][1][m][n].i += Im11x;
8276 	      Hy[1][1][m][n].r += Re11y;  Hy[1][1][m][n].i += Im11y;
8277 	      Hz[1][1][m][n].r += Re11z;  Hz[1][1][m][n].i += Im11z;
8278 
8279 	      Hx[0][1][m][n].r += Re01x;  Hx[0][1][m][n].i += Im01x;
8280 	      Hy[0][1][m][n].r += Re01y;  Hy[0][1][m][n].i += Im01y;
8281 	      Hz[0][1][m][n].r += Re01z;  Hz[0][1][m][n].i += Im01z;
8282 
8283 	      Hx[1][0][m][n].r += Re10x;  Hx[1][0][m][n].i += Im10x;
8284 	      Hy[1][0][m][n].r += Re10y;  Hy[1][0][m][n].i += Im10y;
8285 	      Hz[1][0][m][n].r += Re10z;  Hz[1][0][m][n].i += Im10z;
8286 	    }
8287 
8288 	  } /* n */
8289 	} /* m */
8290       } /* if */
8291     } /* k */
8292 
8293     /****************************************************
8294                           S*ep*dS
8295     ****************************************************/
8296 
8297     /* ????? */
8298 
8299     if (q_AN==0){
8300 
8301       for (s1=0; s1<2; s1++){
8302 	for (s2=0; s2<2; s2++){
8303 	  for (m=0; m<ian; m++){
8304 	    for (n=0; n<jan; n++){
8305 
8306 	      Hx[s1][s2][m][n].r = 2.0*Hx[s1][s2][m][n].r;
8307 	      Hy[s1][s2][m][n].r = 2.0*Hy[s1][s2][m][n].r;
8308 	      Hz[s1][s2][m][n].r = 2.0*Hz[s1][s2][m][n].r;
8309 
8310 	      Hx[s1][s2][m][n].i = 2.0*Hx[s1][s2][m][n].i;
8311 	      Hy[s1][s2][m][n].i = 2.0*Hy[s1][s2][m][n].i;
8312 	      Hz[s1][s2][m][n].i = 2.0*Hz[s1][s2][m][n].i;
8313 	    }
8314 	  }
8315 	}
8316       }
8317     }
8318 
8319     else {
8320 
8321       kg = natn[Gc_AN][0];
8322       Mk_AN = F_G2M[kg]; /* F_G2M should be used */
8323       wakg = WhatSpecies[kg];
8324       kan = Spe_Total_NO[wakg];
8325       kl = RMI1[Mc_AN][q_AN][0];
8326 
8327       /****************************************************
8328                         derivative at k=0
8329       ****************************************************/
8330 
8331       for (m=0; m<ian; m++){
8332 	for (n=0; n<jan; n++){
8333 
8334 	  Re00x = 0.0;     Re00y = 0.0;     Re00z = 0.0;
8335 	  Re11x = 0.0;     Re11y = 0.0;     Re11z = 0.0;
8336 	  Re01x = 0.0;     Re01y = 0.0;     Re01z = 0.0;
8337 	  Re10x = 0.0;     Re10y = 0.0;     Re10z = 0.0;
8338 
8339 	  Im00x = 0.0;     Im00y = 0.0;     Im00z = 0.0;
8340 	  Im11x = 0.0;     Im11y = 0.0;     Im11z = 0.0;
8341 	  Im01x = 0.0;     Im01y = 0.0;     Im01z = 0.0;
8342 	  Im10x = 0.0;     Im10y = 0.0;     Im10z = 0.0;
8343 
8344 	  for (l1=0; l1<kan; l1++){
8345 	    for (l2=0; l2<kan; l2++){
8346 
8347 	      ene = NC_v_eff[0][0][Mk_AN][l1][l2].r;
8348 	      Re00x -= ene*OLP[0][Mc_AN][0][m][l1]*OLP[1][Mj_AN][kl][n][l2];
8349 	      Re00y -= ene*OLP[0][Mc_AN][0][m][l1]*OLP[2][Mj_AN][kl][n][l2];
8350 	      Re00z -= ene*OLP[0][Mc_AN][0][m][l1]*OLP[3][Mj_AN][kl][n][l2];
8351 
8352 	      ene = NC_v_eff[1][1][Mk_AN][l1][l2].r;
8353 	      Re11x -= ene*OLP[0][Mc_AN][0][m][l1]*OLP[1][Mj_AN][kl][n][l2];
8354 	      Re11y -= ene*OLP[0][Mc_AN][0][m][l1]*OLP[2][Mj_AN][kl][n][l2];
8355 	      Re11z -= ene*OLP[0][Mc_AN][0][m][l1]*OLP[3][Mj_AN][kl][n][l2];
8356 
8357 	      ene = NC_v_eff[0][1][Mk_AN][l1][l2].r;
8358 	      Re01x -= ene*OLP[0][Mc_AN][0][m][l1]*OLP[1][Mj_AN][kl][n][l2];
8359 	      Re01y -= ene*OLP[0][Mc_AN][0][m][l1]*OLP[2][Mj_AN][kl][n][l2];
8360 	      Re01z -= ene*OLP[0][Mc_AN][0][m][l1]*OLP[3][Mj_AN][kl][n][l2];
8361 
8362 	      ene = NC_v_eff[1][0][Mk_AN][l1][l2].r;
8363 	      Re10x -= ene*OLP[0][Mc_AN][0][m][l1]*OLP[1][Mj_AN][kl][n][l2];
8364 	      Re10y -= ene*OLP[0][Mc_AN][0][m][l1]*OLP[2][Mj_AN][kl][n][l2];
8365 	      Re10z -= ene*OLP[0][Mc_AN][0][m][l1]*OLP[3][Mj_AN][kl][n][l2];
8366 
8367 	      ene = NC_v_eff[0][0][Mk_AN][l1][l2].i;
8368 	      Im00x -= ene*OLP[0][Mc_AN][0][m][l1]*OLP[1][Mj_AN][kl][n][l2];
8369 	      Im00y -= ene*OLP[0][Mc_AN][0][m][l1]*OLP[2][Mj_AN][kl][n][l2];
8370 	      Im00z -= ene*OLP[0][Mc_AN][0][m][l1]*OLP[3][Mj_AN][kl][n][l2];
8371 
8372 	      ene = NC_v_eff[1][1][Mk_AN][l1][l2].i;
8373 	      Im11x -= ene*OLP[0][Mc_AN][0][m][l1]*OLP[1][Mj_AN][kl][n][l2];
8374 	      Im11y -= ene*OLP[0][Mc_AN][0][m][l1]*OLP[2][Mj_AN][kl][n][l2];
8375 	      Im11z -= ene*OLP[0][Mc_AN][0][m][l1]*OLP[3][Mj_AN][kl][n][l2];
8376 
8377 	      ene = NC_v_eff[0][1][Mk_AN][l1][l2].i;
8378 	      Im01x -= ene*OLP[0][Mc_AN][0][m][l1]*OLP[1][Mj_AN][kl][n][l2];
8379 	      Im01y -= ene*OLP[0][Mc_AN][0][m][l1]*OLP[2][Mj_AN][kl][n][l2];
8380 	      Im01z -= ene*OLP[0][Mc_AN][0][m][l1]*OLP[3][Mj_AN][kl][n][l2];
8381 
8382 	      ene = NC_v_eff[1][0][Mk_AN][l1][l2].i;
8383 	      Im10x -= ene*OLP[0][Mc_AN][0][m][l1]*OLP[1][Mj_AN][kl][n][l2];
8384 	      Im10y -= ene*OLP[0][Mc_AN][0][m][l1]*OLP[2][Mj_AN][kl][n][l2];
8385 	      Im10z -= ene*OLP[0][Mc_AN][0][m][l1]*OLP[3][Mj_AN][kl][n][l2];
8386 
8387 	    }
8388 	  }
8389 
8390 	  Hx[0][0][m][n].r += Re00x;  Hx[0][0][m][n].i += Im00x;
8391 	  Hy[0][0][m][n].r += Re00y;  Hy[0][0][m][n].i += Im00y;
8392 	  Hz[0][0][m][n].r += Re00z;  Hz[0][0][m][n].i += Im00z;
8393 
8394 	  Hx[1][1][m][n].r += Re11x;  Hx[1][1][m][n].i += Im11x;
8395 	  Hy[1][1][m][n].r += Re11y;  Hy[1][1][m][n].i += Im11y;
8396 	  Hz[1][1][m][n].r += Re11z;  Hz[1][1][m][n].i += Im11z;
8397 
8398 	  Hx[0][1][m][n].r += Re01x;  Hx[0][1][m][n].i += Im01x;
8399 	  Hy[0][1][m][n].r += Re01y;  Hy[0][1][m][n].i += Im01y;
8400 	  Hz[0][1][m][n].r += Re01z;  Hz[0][1][m][n].i += Im01z;
8401 
8402 	  Hx[1][0][m][n].r += Re10x;  Hx[1][0][m][n].i += Im10x;
8403 	  Hy[1][0][m][n].r += Re10y;  Hy[1][0][m][n].i += Im10y;
8404 	  Hz[1][0][m][n].r += Re10z;  Hz[1][0][m][n].i += Im10z;
8405 	}
8406       }
8407     }
8408 
8409   } /* if (h_AN==0) */
8410 
8411   else {
8412 
8413     /****************************************************
8414                            dS*ep*S
8415     ****************************************************/
8416 
8417     kg = natn[Gc_AN][0];
8418     Mk_AN = F_G2M[kg]; /* F_G2M should be used */
8419     wakg = WhatSpecies[kg];
8420     kan = Spe_Total_NO[wakg];
8421     kl1 = RMI1[Mc_AN][h_AN][0];
8422     kl2 = RMI1[Mc_AN][q_AN][0];
8423 
8424     for (m=0; m<ian; m++){
8425       for (n=0; n<jan; n++){
8426 
8427 	Re00x = 0.0;     Re00y = 0.0;     Re00z = 0.0;
8428 	Re11x = 0.0;     Re11y = 0.0;     Re11z = 0.0;
8429 	Re01x = 0.0;     Re01y = 0.0;     Re01z = 0.0;
8430 	Re10x = 0.0;     Re10y = 0.0;     Re10z = 0.0;
8431 
8432 	Im00x = 0.0;     Im00y = 0.0;     Im00z = 0.0;
8433 	Im11x = 0.0;     Im11y = 0.0;     Im11z = 0.0;
8434 	Im01x = 0.0;     Im01y = 0.0;     Im01z = 0.0;
8435 	Im10x = 0.0;     Im10y = 0.0;     Im10z = 0.0;
8436 
8437 	for (l1=0; l1<kan; l1++){
8438 	  for (l2=0; l2<kan; l2++){
8439 
8440 	    ene = NC_v_eff[0][0][Mk_AN][l1][l2].r;
8441 	    Re00x -= ene*OLP[1][Mi_AN][kl1][m][l1]*OLP[0][Mj_AN][kl2][n][l2];
8442 	    Re00y -= ene*OLP[2][Mi_AN][kl1][m][l1]*OLP[0][Mj_AN][kl2][n][l2];
8443 	    Re00z -= ene*OLP[3][Mi_AN][kl1][m][l1]*OLP[0][Mj_AN][kl2][n][l2];
8444 
8445 	    ene = NC_v_eff[1][1][Mk_AN][l1][l2].r;
8446 	    Re11x -= ene*OLP[1][Mi_AN][kl1][m][l1]*OLP[0][Mj_AN][kl2][n][l2];
8447 	    Re11y -= ene*OLP[2][Mi_AN][kl1][m][l1]*OLP[0][Mj_AN][kl2][n][l2];
8448 	    Re11z -= ene*OLP[3][Mi_AN][kl1][m][l1]*OLP[0][Mj_AN][kl2][n][l2];
8449 
8450 	    ene = NC_v_eff[0][1][Mk_AN][l1][l2].r;
8451 	    Re01x -= ene*OLP[1][Mi_AN][kl1][m][l1]*OLP[0][Mj_AN][kl2][n][l2];
8452 	    Re01y -= ene*OLP[2][Mi_AN][kl1][m][l1]*OLP[0][Mj_AN][kl2][n][l2];
8453 	    Re01z -= ene*OLP[3][Mi_AN][kl1][m][l1]*OLP[0][Mj_AN][kl2][n][l2];
8454 
8455 	    ene = NC_v_eff[1][0][Mk_AN][l1][l2].r;
8456 	    Re10x -= ene*OLP[1][Mi_AN][kl1][m][l1]*OLP[0][Mj_AN][kl2][n][l2];
8457 	    Re10y -= ene*OLP[2][Mi_AN][kl1][m][l1]*OLP[0][Mj_AN][kl2][n][l2];
8458 	    Re10z -= ene*OLP[3][Mi_AN][kl1][m][l1]*OLP[0][Mj_AN][kl2][n][l2];
8459 
8460 	    ene = NC_v_eff[0][0][Mk_AN][l1][l2].i;
8461 	    Im00x -= ene*OLP[1][Mi_AN][kl1][m][l1]*OLP[0][Mj_AN][kl2][n][l2];
8462 	    Im00y -= ene*OLP[2][Mi_AN][kl1][m][l1]*OLP[0][Mj_AN][kl2][n][l2];
8463 	    Im00z -= ene*OLP[3][Mi_AN][kl1][m][l1]*OLP[0][Mj_AN][kl2][n][l2];
8464 
8465 	    ene = NC_v_eff[1][1][Mk_AN][l1][l2].i;
8466 	    Im11x -= ene*OLP[1][Mi_AN][kl1][m][l1]*OLP[0][Mj_AN][kl2][n][l2];
8467 	    Im11y -= ene*OLP[2][Mi_AN][kl1][m][l1]*OLP[0][Mj_AN][kl2][n][l2];
8468 	    Im11z -= ene*OLP[3][Mi_AN][kl1][m][l1]*OLP[0][Mj_AN][kl2][n][l2];
8469 
8470 	    ene = NC_v_eff[0][1][Mk_AN][l1][l2].i;
8471 	    Im01x -= ene*OLP[1][Mi_AN][kl1][m][l1]*OLP[0][Mj_AN][kl2][n][l2];
8472 	    Im01y -= ene*OLP[2][Mi_AN][kl1][m][l1]*OLP[0][Mj_AN][kl2][n][l2];
8473 	    Im01z -= ene*OLP[3][Mi_AN][kl1][m][l1]*OLP[0][Mj_AN][kl2][n][l2];
8474 
8475 	    ene = NC_v_eff[1][0][Mk_AN][l1][l2].i;
8476 	    Im10x -= ene*OLP[1][Mi_AN][kl1][m][l1]*OLP[0][Mj_AN][kl2][n][l2];
8477 	    Im10y -= ene*OLP[2][Mi_AN][kl1][m][l1]*OLP[0][Mj_AN][kl2][n][l2];
8478 	    Im10z -= ene*OLP[3][Mi_AN][kl1][m][l1]*OLP[0][Mj_AN][kl2][n][l2];
8479 
8480 	  }
8481 	}
8482 
8483 	Hx[0][0][m][n] = Complex(Re00x,Im00x);
8484 	Hy[0][0][m][n] = Complex(Re00y,Im00y);
8485 	Hz[0][0][m][n] = Complex(Re00z,Im00z);
8486 
8487 	Hx[1][1][m][n] = Complex(Re11x,Im11x);
8488 	Hy[1][1][m][n] = Complex(Re11y,Im11y);
8489 	Hz[1][1][m][n] = Complex(Re11z,Im11z);
8490 
8491 	Hx[0][1][m][n] = Complex(Re01x,Im01x);
8492 	Hy[0][1][m][n] = Complex(Re01y,Im01y);
8493 	Hz[0][1][m][n] = Complex(Re01z,Im01z);
8494 
8495 	Hx[1][0][m][n] = Complex(Re10x,Im10x);
8496 	Hy[1][0][m][n] = Complex(Re10y,Im10y);
8497 	Hz[1][0][m][n] = Complex(Re10z,Im10z);
8498       }
8499     }
8500 
8501     /****************************************************
8502                            S*ep*dS
8503     ****************************************************/
8504 
8505     if (q_AN==0){
8506 
8507       for (k=0; k<=FNAN[Gc_AN]; k++){
8508         kg = natn[Gc_AN][k];
8509         Mk_AN = F_G2M[kg]; /* F_G2M should be used */
8510         wakg = WhatSpecies[kg];
8511         kan = Spe_Total_NO[wakg];
8512         kl1 = RMI1[Mc_AN][h_AN][k];
8513         kl2 = RMI1[Mc_AN][q_AN][k];
8514 
8515         if (0<=kl1){
8516 
8517 	  for (m=0; m<ian; m++){
8518 	    for (n=0; n<jan; n++){
8519 
8520 	      Re00x = 0.0;     Re00y = 0.0;     Re00z = 0.0;
8521 	      Re11x = 0.0;     Re11y = 0.0;     Re11z = 0.0;
8522 	      Re01x = 0.0;     Re01y = 0.0;     Re01z = 0.0;
8523 	      Re10x = 0.0;     Re10y = 0.0;     Re10z = 0.0;
8524 
8525 	      Im00x = 0.0;     Im00y = 0.0;     Im00z = 0.0;
8526 	      Im11x = 0.0;     Im11y = 0.0;     Im11z = 0.0;
8527 	      Im01x = 0.0;     Im01y = 0.0;     Im01z = 0.0;
8528 	      Im10x = 0.0;     Im10y = 0.0;     Im10z = 0.0;
8529 
8530 	      for (l1=0; l1<kan; l1++){
8531 		for (l2=0; l2<kan; l2++){
8532 
8533 		  ene = NC_v_eff[0][0][Mk_AN][l1][l2].r;
8534 		  Re00x += ene*OLP[0][Mi_AN][kl1][m][l1]*OLP[1][Mj_AN][kl2][n][l2];
8535 		  Re00y += ene*OLP[0][Mi_AN][kl1][m][l1]*OLP[2][Mj_AN][kl2][n][l2];
8536 		  Re00z += ene*OLP[0][Mi_AN][kl1][m][l1]*OLP[3][Mj_AN][kl2][n][l2];
8537 
8538 		  ene = NC_v_eff[1][1][Mk_AN][l1][l2].r;
8539 		  Re11x += ene*OLP[0][Mi_AN][kl1][m][l1]*OLP[1][Mj_AN][kl2][n][l2];
8540 		  Re11y += ene*OLP[0][Mi_AN][kl1][m][l1]*OLP[2][Mj_AN][kl2][n][l2];
8541 		  Re11z += ene*OLP[0][Mi_AN][kl1][m][l1]*OLP[3][Mj_AN][kl2][n][l2];
8542 
8543 		  ene = NC_v_eff[0][1][Mk_AN][l1][l2].r;
8544 		  Re01x += ene*OLP[0][Mi_AN][kl1][m][l1]*OLP[1][Mj_AN][kl2][n][l2];
8545 		  Re01y += ene*OLP[0][Mi_AN][kl1][m][l1]*OLP[2][Mj_AN][kl2][n][l2];
8546 		  Re01z += ene*OLP[0][Mi_AN][kl1][m][l1]*OLP[3][Mj_AN][kl2][n][l2];
8547 
8548 		  ene = NC_v_eff[1][0][Mk_AN][l1][l2].r;
8549 		  Re10x += ene*OLP[0][Mi_AN][kl1][m][l1]*OLP[1][Mj_AN][kl2][n][l2];
8550 		  Re10y += ene*OLP[0][Mi_AN][kl1][m][l1]*OLP[2][Mj_AN][kl2][n][l2];
8551 		  Re10z += ene*OLP[0][Mi_AN][kl1][m][l1]*OLP[3][Mj_AN][kl2][n][l2];
8552 
8553 		  ene = NC_v_eff[0][0][Mk_AN][l1][l2].i;
8554 		  Im00x += ene*OLP[0][Mi_AN][kl1][m][l1]*OLP[1][Mj_AN][kl2][n][l2];
8555 		  Im00y += ene*OLP[0][Mi_AN][kl1][m][l1]*OLP[2][Mj_AN][kl2][n][l2];
8556 		  Im00z += ene*OLP[0][Mi_AN][kl1][m][l1]*OLP[3][Mj_AN][kl2][n][l2];
8557 
8558 		  ene = NC_v_eff[1][1][Mk_AN][l1][l2].i;
8559 		  Im11x += ene*OLP[0][Mi_AN][kl1][m][l1]*OLP[1][Mj_AN][kl2][n][l2];
8560 		  Im11y += ene*OLP[0][Mi_AN][kl1][m][l1]*OLP[2][Mj_AN][kl2][n][l2];
8561 		  Im11z += ene*OLP[0][Mi_AN][kl1][m][l1]*OLP[3][Mj_AN][kl2][n][l2];
8562 
8563 		  ene = NC_v_eff[0][1][Mk_AN][l1][l2].i;
8564 		  Im01x += ene*OLP[0][Mi_AN][kl1][m][l1]*OLP[1][Mj_AN][kl2][n][l2];
8565 		  Im01y += ene*OLP[0][Mi_AN][kl1][m][l1]*OLP[2][Mj_AN][kl2][n][l2];
8566 		  Im01z += ene*OLP[0][Mi_AN][kl1][m][l1]*OLP[3][Mj_AN][kl2][n][l2];
8567 
8568 		  ene = NC_v_eff[1][0][Mk_AN][l1][l2].i;
8569 		  Im10x += ene*OLP[0][Mi_AN][kl1][m][l1]*OLP[1][Mj_AN][kl2][n][l2];
8570 		  Im10y += ene*OLP[0][Mi_AN][kl1][m][l1]*OLP[2][Mj_AN][kl2][n][l2];
8571 		  Im10z += ene*OLP[0][Mi_AN][kl1][m][l1]*OLP[3][Mj_AN][kl2][n][l2];
8572 
8573 		}
8574 	      }
8575 
8576 	      Hx[0][0][m][n].r += Re00x;  Hx[0][0][m][n].i += Im00x;
8577 	      Hy[0][0][m][n].r += Re00y;  Hy[0][0][m][n].i += Im00y;
8578 	      Hz[0][0][m][n].r += Re00z;  Hz[0][0][m][n].i += Im00z;
8579 
8580 	      Hx[1][1][m][n].r += Re11x;  Hx[1][1][m][n].i += Im11x;
8581 	      Hy[1][1][m][n].r += Re11y;  Hy[1][1][m][n].i += Im11y;
8582 	      Hz[1][1][m][n].r += Re11z;  Hz[1][1][m][n].i += Im11z;
8583 
8584 	      Hx[0][1][m][n].r += Re01x;  Hx[0][1][m][n].i += Im01x;
8585 	      Hy[0][1][m][n].r += Re01y;  Hy[0][1][m][n].i += Im01y;
8586 	      Hz[0][1][m][n].r += Re01z;  Hz[0][1][m][n].i += Im01z;
8587 
8588 	      Hx[1][0][m][n].r += Re10x;  Hx[1][0][m][n].i += Im10x;
8589 	      Hy[1][0][m][n].r += Re10y;  Hy[1][0][m][n].i += Im10y;
8590 	      Hz[1][0][m][n].r += Re10z;  Hz[1][0][m][n].i += Im10z;
8591 
8592 	    }
8593 	  }
8594 	}
8595 
8596       }
8597     } /* if (q_AN==0) */
8598 
8599     else {
8600 
8601       kg = natn[Gc_AN][0];
8602       Mk_AN = F_G2M[kg]; /* F_G2M should be used */
8603       wakg = WhatSpecies[kg];
8604       kan = Spe_Total_NO[wakg];
8605       kl1 = RMI1[Mc_AN][h_AN][0];
8606       kl2 = RMI1[Mc_AN][q_AN][0];
8607 
8608       for (m=0; m<ian; m++){
8609 	for (n=0; n<jan; n++){
8610 
8611 	  Re00x = 0.0;     Re00y = 0.0;     Re00z = 0.0;
8612 	  Re11x = 0.0;     Re11y = 0.0;     Re11z = 0.0;
8613 	  Re01x = 0.0;     Re01y = 0.0;     Re01z = 0.0;
8614 	  Re10x = 0.0;     Re10y = 0.0;     Re10z = 0.0;
8615 
8616 	  Im00x = 0.0;     Im00y = 0.0;     Im00z = 0.0;
8617 	  Im11x = 0.0;     Im11y = 0.0;     Im11z = 0.0;
8618 	  Im01x = 0.0;     Im01y = 0.0;     Im01z = 0.0;
8619 	  Im10x = 0.0;     Im10y = 0.0;     Im10z = 0.0;
8620 
8621 	  for (l1=0; l1<kan; l1++){
8622 	    for (l2=0; l2<kan; l2++){
8623 
8624 	      ene = NC_v_eff[0][0][Mk_AN][l1][l2].r;
8625 	      Re00x -= ene*OLP[0][Mi_AN][kl1][m][l1]*OLP[1][Mj_AN][kl2][n][l2];
8626 	      Re00y -= ene*OLP[0][Mi_AN][kl1][m][l1]*OLP[2][Mj_AN][kl2][n][l2];
8627 	      Re00z -= ene*OLP[0][Mi_AN][kl1][m][l1]*OLP[3][Mj_AN][kl2][n][l2];
8628 
8629 	      ene = NC_v_eff[1][1][Mk_AN][l1][l2].r;
8630 	      Re11x -= ene*OLP[0][Mi_AN][kl1][m][l1]*OLP[1][Mj_AN][kl2][n][l2];
8631 	      Re11y -= ene*OLP[0][Mi_AN][kl1][m][l1]*OLP[2][Mj_AN][kl2][n][l2];
8632 	      Re11z -= ene*OLP[0][Mi_AN][kl1][m][l1]*OLP[3][Mj_AN][kl2][n][l2];
8633 
8634 	      ene = NC_v_eff[0][1][Mk_AN][l1][l2].r;
8635 	      Re01x -= ene*OLP[0][Mi_AN][kl1][m][l1]*OLP[1][Mj_AN][kl2][n][l2];
8636 	      Re01y -= ene*OLP[0][Mi_AN][kl1][m][l1]*OLP[2][Mj_AN][kl2][n][l2];
8637 	      Re01z -= ene*OLP[0][Mi_AN][kl1][m][l1]*OLP[3][Mj_AN][kl2][n][l2];
8638 
8639 	      ene = NC_v_eff[1][0][Mk_AN][l1][l2].r;
8640 	      Re10x -= ene*OLP[0][Mi_AN][kl1][m][l1]*OLP[1][Mj_AN][kl2][n][l2];
8641 	      Re10y -= ene*OLP[0][Mi_AN][kl1][m][l1]*OLP[2][Mj_AN][kl2][n][l2];
8642 	      Re10z -= ene*OLP[0][Mi_AN][kl1][m][l1]*OLP[3][Mj_AN][kl2][n][l2];
8643 
8644 	      ene = NC_v_eff[0][0][Mk_AN][l1][l2].i;
8645 	      Im00x -= ene*OLP[0][Mi_AN][kl1][m][l1]*OLP[1][Mj_AN][kl2][n][l2];
8646 	      Im00y -= ene*OLP[0][Mi_AN][kl1][m][l1]*OLP[2][Mj_AN][kl2][n][l2];
8647 	      Im00z -= ene*OLP[0][Mi_AN][kl1][m][l1]*OLP[3][Mj_AN][kl2][n][l2];
8648 
8649 	      ene = NC_v_eff[1][1][Mk_AN][l1][l2].i;
8650 	      Im11x -= ene*OLP[0][Mi_AN][kl1][m][l1]*OLP[1][Mj_AN][kl2][n][l2];
8651 	      Im11y -= ene*OLP[0][Mi_AN][kl1][m][l1]*OLP[2][Mj_AN][kl2][n][l2];
8652 	      Im11z -= ene*OLP[0][Mi_AN][kl1][m][l1]*OLP[3][Mj_AN][kl2][n][l2];
8653 
8654 	      ene = NC_v_eff[0][1][Mk_AN][l1][l2].i;
8655 	      Im01x -= ene*OLP[0][Mi_AN][kl1][m][l1]*OLP[1][Mj_AN][kl2][n][l2];
8656 	      Im01y -= ene*OLP[0][Mi_AN][kl1][m][l1]*OLP[2][Mj_AN][kl2][n][l2];
8657 	      Im01z -= ene*OLP[0][Mi_AN][kl1][m][l1]*OLP[3][Mj_AN][kl2][n][l2];
8658 
8659 	      ene = NC_v_eff[1][0][Mk_AN][l1][l2].i;
8660 	      Im10x -= ene*OLP[0][Mi_AN][kl1][m][l1]*OLP[1][Mj_AN][kl2][n][l2];
8661 	      Im10y -= ene*OLP[0][Mi_AN][kl1][m][l1]*OLP[2][Mj_AN][kl2][n][l2];
8662 	      Im10z -= ene*OLP[0][Mi_AN][kl1][m][l1]*OLP[3][Mj_AN][kl2][n][l2];
8663 
8664 	    }
8665 	  }
8666 
8667 	  Hx[0][0][m][n].r += Re00x;  Hx[0][0][m][n].i += Im00x;
8668 	  Hy[0][0][m][n].r += Re00y;  Hy[0][0][m][n].i += Im00y;
8669 	  Hz[0][0][m][n].r += Re00z;  Hz[0][0][m][n].i += Im00z;
8670 
8671 	  Hx[1][1][m][n].r += Re11x;  Hx[1][1][m][n].i += Im11x;
8672 	  Hy[1][1][m][n].r += Re11y;  Hy[1][1][m][n].i += Im11y;
8673 	  Hz[1][1][m][n].r += Re11z;  Hz[1][1][m][n].i += Im11z;
8674 
8675 	  Hx[0][1][m][n].r += Re01x;  Hx[0][1][m][n].i += Im01x;
8676 	  Hy[0][1][m][n].r += Re01y;  Hy[0][1][m][n].i += Im01y;
8677 	  Hz[0][1][m][n].r += Re01z;  Hz[0][1][m][n].i += Im01z;
8678 
8679 	  Hx[1][0][m][n].r += Re10x;  Hx[1][0][m][n].i += Im10x;
8680 	  Hy[1][0][m][n].r += Re10y;  Hy[1][0][m][n].i += Im10y;
8681 	  Hz[1][0][m][n].r += Re10z;  Hz[1][0][m][n].i += Im10z;
8682 
8683 	}
8684       }
8685     }
8686   }
8687 
8688 }
8689 
8690 
8691 
8692 
8693 
8694 
8695 
8696 
8697 
8698 
MPI_OLP(double ***** OLP1)8699 void MPI_OLP(double *****OLP1)
8700 {
8701   int i,j,h_AN,Gh_AN,Hwan,n;
8702   int tno1,tno2,Mc_AN,Gc_AN,Cwan;
8703   int num,k,size1,size2;
8704   double *tmp_array;
8705   double *tmp_array2;
8706   int *Snd_S_Size,*Rcv_S_Size;
8707   int numprocs,myid,ID,IDS,IDR,tag=999;
8708 
8709   MPI_Status stat;
8710   MPI_Request request;
8711 
8712   /* MPI */
8713   MPI_Comm_size(mpi_comm_level1,&numprocs);
8714   MPI_Comm_rank(mpi_comm_level1,&myid);
8715 
8716   /****************************************************
8717     allocation of arrays:
8718   ****************************************************/
8719 
8720   Snd_S_Size = (int*)malloc(sizeof(int)*numprocs);
8721   Rcv_S_Size = (int*)malloc(sizeof(int)*numprocs);
8722 
8723   /******************************************************************
8724    MPI
8725 
8726    OLP[1], OLP[2], and OLP[3]
8727 
8728    note:
8729 
8730    OLP is used in DC and GDC method, where overlap integrals
8731    of Matomnum+MatomnumF+MatomnumS+1 are stored.
8732    However, overlap integrals of Matomnum+MatomnumF+1 are
8733    stored in Force.c. So, F_TopMAN should be used to refer
8734    overlap integrals in Force.c, while S_TopMAN should be
8735    used in DC and GDC routines.
8736 
8737    Although OLP is used in Eff_Hub_Pot.c, the usage is
8738    consistent with that of DC and GDC routines by the following
8739    reason:
8740 
8741    DC or GDC:      OLP + Spe_Total_NO   if no orbital optimization
8742                 CntOLP + Spe_Total_CNO  if orbital optimization
8743 
8744    Eff_Hub_Pot:    OLP + Spe_Total_NO   always since the U-potential
8745                                         affects to primitive orbital
8746 
8747    If no orbital optimization, both the usages are consistent.
8748    If orbital optimization, CntOLP and OLP are used in DC(GDC) and
8749    Eff_Hub_Pot.c, respectively. Therefore, there is no conflict.
8750   *******************************************************************/
8751 
8752   /***********************************
8753              set data size
8754   ************************************/
8755 
8756   for (ID=0; ID<numprocs; ID++){
8757 
8758     IDS = (myid + ID) % numprocs;
8759     IDR = (myid - ID + numprocs) % numprocs;
8760 
8761     if (ID!=0){
8762       tag = 999;
8763 
8764       /* find data size to send block data */
8765       if (F_Snd_Num[IDS]!=0){
8766 	size1 = 0;
8767 	for (n=0; n<F_Snd_Num[IDS]; n++){
8768 	  Mc_AN = Snd_MAN[IDS][n];
8769 	  Gc_AN = Snd_GAN[IDS][n];
8770 	  Cwan = WhatSpecies[Gc_AN];
8771 	  tno1 = Spe_Total_NO[Cwan];
8772 	  for (h_AN=0; h_AN<=FNAN[Gc_AN]; h_AN++){
8773 	    Gh_AN = natn[Gc_AN][h_AN];
8774 	    Hwan = WhatSpecies[Gh_AN];
8775 	    tno2 = Spe_Total_NO[Hwan];
8776             size1 += 4*tno1*tno2;
8777 	  }
8778 	}
8779 
8780 	Snd_S_Size[IDS] = size1;
8781 	MPI_Isend(&size1, 1, MPI_INT, IDS, tag, mpi_comm_level1, &request);
8782       }
8783       else{
8784 	Snd_S_Size[IDS] = 0;
8785       }
8786 
8787       /* receiving of size of data */
8788 
8789       if (F_Rcv_Num[IDR]!=0){
8790 	MPI_Recv(&size2, 1, MPI_INT, IDR, tag, mpi_comm_level1, &stat);
8791 	Rcv_S_Size[IDR] = size2;
8792       }
8793       else{
8794 	Rcv_S_Size[IDR] = 0;
8795       }
8796 
8797       if (F_Snd_Num[IDS]!=0) MPI_Wait(&request,&stat);
8798     }
8799     else{
8800       Snd_S_Size[IDS] = 0;
8801       Rcv_S_Size[IDR] = 0;
8802     }
8803   }
8804 
8805   /***********************************
8806                data transfer
8807   ************************************/
8808 
8809   tag = 999;
8810   for (ID=0; ID<numprocs; ID++){
8811 
8812     IDS = (myid + ID) % numprocs;
8813     IDR = (myid - ID + numprocs) % numprocs;
8814 
8815     if (ID!=0){
8816 
8817       /*****************************
8818                 sending of data
8819       *****************************/
8820 
8821       if (F_Snd_Num[IDS]!=0){
8822 
8823 	size1 = Snd_S_Size[IDS];
8824 
8825 	/* allocation of array */
8826 
8827 	tmp_array = (double*)malloc(sizeof(double)*size1);
8828 
8829 	/* multidimentional array to vector array */
8830 
8831 	num = 0;
8832 
8833         for (k=0; k<=3; k++){
8834   	  for (n=0; n<F_Snd_Num[IDS]; n++){
8835 	    Mc_AN = Snd_MAN[IDS][n];
8836 	    Gc_AN = Snd_GAN[IDS][n];
8837 	    Cwan = WhatSpecies[Gc_AN];
8838 	    tno1 = Spe_Total_NO[Cwan];
8839 	    for (h_AN=0; h_AN<=FNAN[Gc_AN]; h_AN++){
8840 	      Gh_AN = natn[Gc_AN][h_AN];
8841 	      Hwan = WhatSpecies[Gh_AN];
8842 	      tno2 = Spe_Total_NO[Hwan];
8843 	      for (i=0; i<tno1; i++){
8844 		for (j=0; j<tno2; j++){
8845 		  tmp_array[num] = OLP1[k][Mc_AN][h_AN][i][j];
8846 		  num++;
8847 		}
8848 	      }
8849 	    }
8850 	  }
8851 	}
8852 
8853 	MPI_Isend(&tmp_array[0], size1, MPI_DOUBLE, IDS, tag, mpi_comm_level1, &request);
8854       }
8855 
8856       /*****************************
8857            receiving of block data
8858       *****************************/
8859 
8860       if (F_Rcv_Num[IDR]!=0){
8861 
8862 	size2 = Rcv_S_Size[IDR];
8863 
8864 	/* allocation of array */
8865 	tmp_array2 = (double*)malloc(sizeof(double)*size2);
8866 
8867 	MPI_Recv(&tmp_array2[0], size2, MPI_DOUBLE, IDR, tag, mpi_comm_level1, &stat);
8868 
8869 	num = 0;
8870 
8871         for (k=0; k<=3; k++){
8872 	  Mc_AN = F_TopMAN[IDR] - 1; /* F_TopMAN should be used. */
8873   	  for (n=0; n<F_Rcv_Num[IDR]; n++){
8874 	    Mc_AN++;
8875 	    Gc_AN = Rcv_GAN[IDR][n];
8876 	    Cwan = WhatSpecies[Gc_AN];
8877 	    tno1 = Spe_Total_NO[Cwan];
8878 
8879 	    for (h_AN=0; h_AN<=FNAN[Gc_AN]; h_AN++){
8880 	      Gh_AN = natn[Gc_AN][h_AN];
8881 	      Hwan = WhatSpecies[Gh_AN];
8882 	      tno2 = Spe_Total_NO[Hwan];
8883 	      for (i=0; i<tno1; i++){
8884 		for (j=0; j<tno2; j++){
8885 		  OLP1[k][Mc_AN][h_AN][i][j] = tmp_array2[num];
8886 		  num++;
8887 		}
8888 	      }
8889 	    }
8890 	  }
8891 	}
8892 
8893 	/* freeing of array */
8894 	free(tmp_array2);
8895       }
8896 
8897       if (F_Snd_Num[IDS]!=0){
8898 	MPI_Wait(&request,&stat);
8899 	free(tmp_array); /* freeing of array */
8900       }
8901     }
8902   }
8903 
8904   /****************************************************
8905     freeing of arrays:
8906   ****************************************************/
8907 
8908   free(Snd_S_Size);
8909   free(Rcv_S_Size);
8910 }
8911