/********************************************************************** Force.c: Force.c is a subroutine to calculate force on atoms. Log of Force.c: 22/Nov/2001 Released by T. Ozaki 18/Apr/2013 Force3() modified by A.M. Ito ***********************************************************************/ #include #include #include #include #include "openmx_common.h" #include "mpi.h" #include #define measure_time 0 static void dH_U_full(int Mc_AN, int h_AN, int q_AN, double *****OLP, double ****v_eff, double ***Hx, double ***Hy, double ***Hz); static void dH_U_NC_full(int Mc_AN, int h_AN, int q_AN, double *****OLP, dcomplex *****NC_v_eff, dcomplex ****Hx, dcomplex ****Hy, dcomplex ****Hz); static void dHNL(int where_flag, int Mc_AN, int h_AN, int q_AN, double ******DS_NL1, dcomplex ***Hx, dcomplex ***Hy, dcomplex ***Hz); static void dHVNA(int where_flag, int Mc_AN, int h_AN, int q_AN, Type_DS_VNA *****DS_VNA1, double *****TmpHVNA2, double *****TmpHVNA3, double **Hx, double **Hy, double **Hz); static void dHNL_SO( double *sumx0r, double *sumy0r, double *sumz0r, double *sumx1r, double *sumy1r, double *sumz1r, double *sumx2r, double *sumy2r, double *sumz2r, double *sumx0i, double *sumy0i, double *sumz0i, double *sumx1i, double *sumy1i, double *sumz1i, double *sumx2i, double *sumy2i, double *sumz2i, double fugou, double PFp, double PFm, double ene_p, double ene_m, int l2, int *l, int Mc_AN, int k, int m, int Mj_AN, int kl, int n, double ******DS_NL1); static void MPI_OLP(double *****OLP1); static void Force3(); static void Force4(); static void Force4B(double *****CDM0); static void Force_HNL(double *****CDM0, double *****iDM0); double Force(double *****H0, double ******DS_NL, double *****OLP, double *****CDM, double *****EDM) { static int firsttime=1; int Nc,GNc,GRc,Cwan,s1,s2,BN_AB; int Mc_AN,Gc_AN,MNc,start_q_AN; double x,y,z,dx,dy,dz,tmp0,tmp1,tmp2,tmp3; double xx,r2,tot_den; double sumx,sumy,sumz,r,dege,pref; int i,j,k,l,Hwan,Qwan,so,p0,q,q0; int h_AN,Gh_AN,q_AN,Gq_AN; int ian,jan,kl,spin,spinmax,al,be,p,size_CDM0,size_iDM0; int tno0,tno1,tno2,Mh_AN,Mq_AN,n,num,size1,size2; int wanA,wanB,Gc_BN; int XC_P_switch; double time0; double dum,dge; double dEx,dEy,dEz; double Cxyz[4]; double *Fx,*Fy,*Fz; dcomplex ***Hx; dcomplex ***Hy; dcomplex ***Hz; double ***HUx; double ***HUy; double ***HUz; dcomplex ****NC_HUx; dcomplex ****NC_HUy; dcomplex ****NC_HUz; double **HVNAx; double **HVNAy; double **HVNAz; double *****CDM0; double *****iDM0; double *tmp_array; double *tmp_array2; double Re00x,Re00y,Re00z; double Re11x,Re11y,Re11z; double Re01x,Re01y,Re01z; double Im00x,Im00y,Im00z; double Im11x,Im11y,Im11z; double Im01x,Im01y,Im01z; int *Snd_CDM0_Size,*Rcv_CDM0_Size; int *Snd_iDM0_Size,*Rcv_iDM0_Size; double TStime,TEtime; int numprocs,myid,tag=999,ID,IDS,IDR; double Stime_atom, Etime_atom; /* for OpenMP */ int OMPID,Nthrds,Nprocs; double stime,etime; MPI_Status stat; MPI_Request request; /* MPI */ MPI_Comm_size(mpi_comm_level1,&numprocs); MPI_Comm_rank(mpi_comm_level1,&myid); MPI_Barrier(mpi_comm_level1); dtime(&TStime); /**************************************************** allocation of arrays: ****************************************************/ Fx = (double*)malloc(sizeof(double)*(Matomnum+1)); Fy = (double*)malloc(sizeof(double)*(Matomnum+1)); Fz = (double*)malloc(sizeof(double)*(Matomnum+1)); HVNAx = (double**)malloc(sizeof(double*)*List_YOUSO[7]); for (j=0; j force(1) myid=%2d Mc_AN=%2d Gc_AN=%2d %15.12f %15.12f %15.12f\n", myid,Mc_AN,Gc_AN,-sumx*GridVol,-sumy*GridVol,-sumz*GridVol);fflush(stdout); } dtime(&Etime_atom); time_per_atom[Gc_AN] += Etime_atom - Stime_atom; } } /* #pragma omp parallel */ dtime(&etime); if(myid==0 && measure_time){ printf("Time for force#1=%18.5f\n",etime-stime);fflush(stdout); } /**************************************************** added by T.Ohwaki #1' of force contribution from an artificial wall applied in the ESM method so that atoms cannot go beyond the boundary of the unit cell along the a-axis. ****************************************************/ if (ESM_switch!=0){ double fx,xb,x0,x,a; xb = Grid_Origin[1] + tv[1][1]; a = ESM_wall_height/pow(1.89,3.0); for (Mc_AN=1; Mc_AN<=Matomnum; Mc_AN++){ Gc_AN = M2G[Mc_AN]; x = Gxyz[Gc_AN][1]; x0 = xb - ESM_wall_position; dx = x - x0; if (0.0 force(2) myid=%2d Mc_AN=%2d Gc_AN=%2d %15.12f %15.12f %15.12f\n", myid,Mc_AN,Gc_AN,dEx,dEy,dEz);fflush(stdout); } Gxyz[Gc_AN][17] += dEx; Gxyz[Gc_AN][18] += dEy; Gxyz[Gc_AN][19] += dEz; dtime(&Etime_atom); time_per_atom[Gc_AN] += Etime_atom - Stime_atom; } /* Mc_AN */ /* freeing of arrays */ for (i=0; i<3; i++){ for (j=0; j force(5) myid=%2d Mc_AN=%2d Gc_AN=%2d %15.12f %15.12f %15.12f\n", myid,Mc_AN,Gc_AN,Fx[Mc_AN],Fy[Mc_AN],Fz[Mc_AN]);fflush(stdout); } } /**************************************************************** In case that the dual representation is used for evaluation of the occupation number in the LDA+U method, the following force term is added. ****************************************************************/ if ( (Hub_U_switch==1 || 1<=Constraint_NCS_switch || Zeeman_NCS_switch==1 || Zeeman_NCO_switch==1) && (Hub_U_occupation==1 || Hub_U_occupation==2) && SpinP_switch!=3 ){ HUx = (double***)malloc(sizeof(double**)*3); for (i=0; i<3; i++){ HUx[i] = (double**)malloc(sizeof(double*)*List_YOUSO[7]); for (j=0; j force(LDA_U_dual) myid=%2d Mc_AN=%2d Gc_AN=%2d %15.12f %15.12f %15.12f\n", myid,Mc_AN,Gc_AN,Fx[Mc_AN],Fy[Mc_AN],Fz[Mc_AN]);fflush(stdout); } } } /* if ( (Hub_U_switch==1 || 1<=Constraint_NCS_switch || Zeeman_NCS_switch==1 || Zeeman_NCO_switch==1) && F_U_flag==1 && Hub_U_occupation==2) */ /**************************************************** Force arising from HNL ****************************************************/ Force_HNL(CDM0, iDM0); /**************************************************** freeing of arrays: ****************************************************/ if ( (Hub_U_switch==1 || 1<=Constraint_NCS_switch || Zeeman_NCS_switch==1 || Zeeman_NCO_switch==1) && (Hub_U_occupation==1 || Hub_U_occupation==2) && SpinP_switch!=3 ){ for (i=0; i<3; i++){ for (j=0; j force(3) myid=%2d Mc_AN=%2d Gc_AN=%2d %15.12f %15.12f %15.12f\n", myid,Mc_AN,Gc_AN,sumx*GridVol,sumy*GridVol,sumz*GridVol);fflush(stdout); } } /* Mc_AN */ /* freeing of arrays */ free(dDen_Grid[0][0]); free(dDen_Grid[0]); free(dDen_Grid); free(orbs1); int i; for (i=0; i<4; i++){ free(dorbs0[i]); } free(dorbs0); } /* #pragma omp parallel */ /* free */ free(dChi0[0][0]); free(dChi0[0]); free(dChi0); free(ai_sh_sum); } void Force3_org3665() { /**************************************************** #3 of Force dn/dx * (VNA + dVH + Vxc) or dn/dx * (dVH + Vxc) ****************************************************/ int Mc_AN,Gc_AN,Cwan,Hwan,NO0,NO1; int i,j,k,Nc,Nh,GNc,GRc,MNc,GNh,GRh; int h_AN,Gh_AN,Mh_AN,Rnh,spin,Nog; double ***dDen_Grid; double sum,tmp0,r,dx,dy,dz; double sumx,sumy,sumz; double x,y,z,x1,y1,z1,Vpt; double Cxyz[4]; double **dorbs0,*orbs1,***dChi0; double ReVpt11,ReVpt22,ReVpt21,ImVpt21; int numprocs,myid,tag=999,ID,IDS,IDR; /* for OpenMP */ int OMPID,Nthrds,Nprocs; /* MPI */ MPI_Comm_size(mpi_comm_level1,&numprocs); MPI_Comm_rank(mpi_comm_level1,&myid); /********************************************************** main loop for calculation of force #3 **********************************************************/ #pragma omp parallel shared(Orbs_Grid_FNAN,G2ID,myid,level_stdout,GridVol,F_VEF_flag,VEF_Grid,F_VNA_flag,VNA_Grid,F_Vxc_flag,Vxc_Grid,dVHart_Grid,F_dVHart_flag,ProExpn_VNA,E_Field_switch,DM,Orbs_Grid,GListTAtoms2,GListTAtoms1,NumOLG,ncn,F_G2M,natn,FNAN,Max_GridN_Atom,SpinP_switch,List_YOUSO,Cnt_switch,Gxyz,atv,MGridListAtom,CellListAtom,GridListAtom,GridN_Atom,Spe_Total_CNO,WhatSpecies,M2G,Matomnum) private(OMPID,Nthrds,Nprocs,Mc_AN,Gc_AN,Cwan,NO0,Nc,GNc,GRc,MNc,Cxyz,x,y,z,dx,dy,dz,dorbs0,orbs1,dDen_Grid,dChi0,i,k,h_AN,Gh_AN,Mh_AN,Rnh,Hwan,NO1,spin,Nog,Nh,j,sum,tmp0,sumx,sumy,sumz,Vpt,ReVpt11,ReVpt22,ReVpt21,ImVpt21) { /* allocation of arrays */ dorbs0 = (double**)malloc(sizeof(double*)*4); for (i=0; i<4; i++){ dorbs0[i] = (double*)malloc(sizeof(double)*List_YOUSO[7]); } orbs1 = (double*)malloc(sizeof(double)*List_YOUSO[7]); dDen_Grid = (double***)malloc(sizeof(double**)*(SpinP_switch+1)); for (i=0; i<(SpinP_switch+1); i++){ dDen_Grid[i] = (double**)malloc(sizeof(double*)*3); for (k=0; k<3; k++){ dDen_Grid[i][k] = (double*)malloc(sizeof(double)*Max_GridN_Atom); } } dChi0 = (double***)malloc(sizeof(double**)*3); for (k=0; k<3; k++){ dChi0[k] = (double**)malloc(sizeof(double*)*List_YOUSO[7]); for (i=0; i force(3) myid=%2d Mc_AN=%2d Gc_AN=%2d %15.12f %15.12f %15.12f\n", myid,Mc_AN,Gc_AN,sumx*GridVol,sumy*GridVol,sumz*GridVol);fflush(stdout); } } /* Mc_AN */ /* freeing of arrays */ for (k=0; k<3; k++){ for (i=0; i force(4) myid=%2d Mc_AN=%2d Gc_AN=%2d %15.12f %15.12f %15.12f\n", myid,Mc_AN,Gc_AN,sumx*GridVol,sumy*GridVol,sumz*GridVol);fflush(stdout); } */ } } void Force_HNL(double *****CDM0, double *****iDM0) { /**************************************************** Force arising from HNL ****************************************************/ int Mc_AN,Gc_AN,Cwan,i,j,h_AN,q_AN,Mq_AN,start_q_AN; int jan,kl,km,kl1,Qwan,Gq_AN,Gh_AN,Mh_AN,Hwan,ian; int l1,l2,l3,l,LL,Mul1,tno0,ncp,so; int tno1,tno2,size1,size2,n,kk,num,po,po1,po2; int numprocs,myid,tag=999,ID,IDS,IDR; int **S_array,**R_array; int S_comm_flag,R_comm_flag; int SA_num,q,Sc_AN,GSc_AN,smul; int Sc_wan,Sh_AN,GSh_AN,Sh_wan; int Sh_AN2,fan,jg,j0,jg0,Mj_AN0; int Original_Mc_AN; double rcutA,rcutB,rcut; double dEx,dEy,dEz,ene,pref; double Stime_atom, Etime_atom; dcomplex ***Hx,***Hy,***Hz; dcomplex ***Hx0,***Hy0,***Hz0; dcomplex ***Hx1,***Hy1,***Hz1; int *Snd_DS_NL_Size,*Rcv_DS_NL_Size; int *Indicator; double *tmp_array; double *tmp_array2; /* for OpenMP */ int OMPID,Nthrds,Nthrds0,Nprocs,Nloop,ODNloop; int *OneD2h_AN,*OneD2q_AN; double *dEx_threads; double *dEy_threads; double *dEz_threads; double stime,etime; double stime1,etime1; MPI_Status stat; MPI_Request request; /* MPI */ MPI_Comm_size(mpi_comm_level1,&numprocs); MPI_Comm_rank(mpi_comm_level1,&myid); dtime(&stime); /**************************** allocation of arrays *****************************/ Indicator = (int*)malloc(sizeof(int)*numprocs); S_array = (int**)malloc(sizeof(int*)*numprocs); for (ID=0; IDek ]/dRI ****************************************************************/ /******************************************************* ******************************************************* multiplying overlap integrals WITH COMMUNICATION In case of I=i or I=j for d [ \sum_k ek ]/dRI ******************************************************* *******************************************************/ MPI_Barrier(mpi_comm_level1); dtime(&stime); Hx0 = (dcomplex***)malloc(sizeof(dcomplex**)*3); for (i=0; i<3; i++){ Hx0[i] = (dcomplex**)malloc(sizeof(dcomplex*)*List_YOUSO[7]); for (j=0; j force(HNL1) myid=%2d Mc_AN=%2d Gc_AN=%2d %15.12f %15.12f %15.12f\n", myid,Mc_AN,Gc_AN,Gxyz[Gc_AN][41],Gxyz[Gc_AN][42],Gxyz[Gc_AN][43]);fflush(stdout); } } /******************************************************* ******************************************************* THE FIRST CASE: multiplying overlap integrals WITHOUT COMMUNICATION In case of I=i or I=j for d [ \sum_k ek ]/dRI ******************************************************* *******************************************************/ dtime(&stime); #pragma omp parallel shared(time_per_atom,Gxyz,CDM0,SpinP_switch,SO_switch,Hub_U_switch,F_U_flag,Constraint_NCS_switch,Zeeman_NCS_switch,Zeeman_NCO_switch,DS_NL,RMI1,FNAN,Spe_Total_CNO,WhatSpecies,F_G2M,natn,M2G,Matomnum,List_YOUSO,F_NL_flag) private(Hx0,Hy0,Hz0,Hx1,Hy1,Hz1,OMPID,Nthrds,Nprocs,Mc_AN,Stime_atom,Etime_atom,dEx,dEy,dEz,Gc_AN,h_AN,Gh_AN,Mh_AN,Hwan,ian,q_AN,Gq_AN,Mq_AN,Qwan,jan,kl,kl1,i,j,kk,pref) { /* allocation of array */ Hx0 = (dcomplex***)malloc(sizeof(dcomplex**)*3); for (i=0; i<3; i++){ Hx0[i] = (dcomplex**)malloc(sizeof(dcomplex*)*List_YOUSO[7]); for (j=0; j force(HNL2) myid=%2d Mc_AN=%2d Gc_AN=%2d %15.12f %15.12f %15.12f\n", myid,Mc_AN,Gc_AN,Gxyz[Gc_AN][41],Gxyz[Gc_AN][42],Gxyz[Gc_AN][43]);fflush(stdout); } } /************************************************************* THE SECOND CASE: In case of I=k with I!=i and I!=j d [ \sum_k ek ]/dRI *************************************************************/ /************************************************************ MPI communication of DS_NL whose basis part is not located on own site but projector part is located on own site. ************************************************************/ MPI_Barrier(mpi_comm_level1); dtime(&stime); for (ID=0; ID force(HNL3) myid=%2d Mc_AN=%2d Gc_AN=%2d %15.12f %15.12f %15.12f\n", myid,Mc_AN,Gc_AN,dEx,dEy,dEz);fflush(stdout); } /* freeing of array */ free(OneD2q_AN); free(OneD2h_AN); free(dEx_threads); free(dEy_threads); free(dEz_threads); } /* if (Mc_AN<=Matomnum) */ } /* Mc_AN */ dtime(&etime); if(myid==0 && measure_time){ printf("Time for part3 of force_NL=%18.5f\n",etime-stime);fflush(stdout); } /******************************************************** adding Gxyz[Gc_AN][41,42,43] to Gxyz[Gc_AN][17,18,19] ********************************************************/ for (Mc_AN=1; Mc_AN<=Matomnum; Mc_AN++){ Gc_AN = M2G[Mc_AN]; if (2<=level_stdout){ printf(" force(HNL) myid=%2d Mc_AN=%2d Gc_AN=%2d %15.12f %15.12f %15.12f\n", myid,Mc_AN,Gc_AN,Gxyz[Gc_AN][41],Gxyz[Gc_AN][42],Gxyz[Gc_AN][43]);fflush(stdout); } Gxyz[Gc_AN][17] += Gxyz[Gc_AN][41]; Gxyz[Gc_AN][18] += Gxyz[Gc_AN][42]; Gxyz[Gc_AN][19] += Gxyz[Gc_AN][43]; } /*********************************** freeing of arrays ************************************/ free(Indicator); for (ID=0; IDek ]/dRI ****************************************************************/ /******************************************************* ******************************************************* multiplying overlap integrals WITH COMMUNICATION ******************************************************* *******************************************************/ MPI_Barrier(mpi_comm_level1); dtime(&stime); for (ID=0; IDek ]/dRI *************************************************************/ /************************************************************ MPI communication of DS_VNA whose basis part is not located on own site but projector part is located on own site. ************************************************************/ MPI_Barrier(mpi_comm_level1); dtime(&stime); for (ID=0; ID force(4B) myid=%2d Mc_AN=%2d Gc_AN=%2d %15.12f %15.12f %15.12f\n", myid,Mc_AN,Gc_AN,Gxyz[Gc_AN][41],Gxyz[Gc_AN][42],Gxyz[Gc_AN][43]);fflush(stdout); } Gxyz[Gc_AN][17] += Gxyz[Gc_AN][41]; Gxyz[Gc_AN][18] += Gxyz[Gc_AN][42]; Gxyz[Gc_AN][19] += Gxyz[Gc_AN][43]; } /*********************************** freeing of arrays ************************************/ free(Indicator); for (ID=0; ID