1! 2! Copyright (C) 2002-2009 Quantum ESPRESSO group 3! This file is distributed under the terms of the 4! GNU General Public License. See the file `License' 5! in the root directory of the present distribution, 6! or http://www.gnu.org/copyleft/gpl.txt . 7! 8 9 10!----------------------------------------------------------------------- 11 subroutine calcmt( nrlx, fdiag, zmat, fmat ) 12!----------------------------------------------------------------------- 13! 14! constructs fmat=z0^t.fdiag.z0 zmat = z0^t 15! 16 USE kinds, ONLY: DP 17 use electrons_base, ONLY: nudx, nspin, nupdwn, iupdwn, nx => nbspx 18 USE cp_main_variables, ONLY: idesc 19 USE mp, ONLY: mp_sum, mp_bcast 20 21 implicit none 22 23 include 'laxlib.fh' 24 25 integer, intent(in) :: nrlx 26 real(DP) :: zmat( nrlx, nudx, nspin ), fmat( nrlx, nudx, nspin ) 27 ! NOTE: zmat and fmat are distributed by row across processors 28 real(dp) :: fdiag( nx ) ! fdiag is replicated 29 30 integer :: iss, nss, istart, i, j, k, ii, jj, kk 31 integer :: np_rot, me_rot, nrl, comm_rot, ip, nrl_ip 32 33 real(DP), ALLOCATABLE :: mtmp(:,:) 34 real(DP) :: f_z0t 35 36 37 call start_clock('calcmt') 38 39 fmat = 0.0d0 40 41 DO iss = 1, nspin 42 43 nss = nupdwn( iss ) 44 istart = iupdwn( iss ) 45 np_rot = idesc( LAX_DESC_NPR, iss ) * idesc( LAX_DESC_NPC, iss ) 46 me_rot = idesc( LAX_DESC_MYPE, iss ) 47 nrl = idesc( LAX_DESC_NRL, iss ) 48 comm_rot = idesc( LAX_DESC_COMM, iss ) 49 50 IF( idesc( LAX_DESC_ACTIVE_NODE, iss ) > 0 ) THEN 51 52 ALLOCATE( mtmp( MAXVAL(idesc(LAX_DESC_NRLX, :)), nudx ) ) 53 54 DO ip = 1, np_rot 55 56 IF( me_rot == ( ip - 1 ) ) THEN 57 mtmp = zmat(:,:,iss) 58 END IF 59 nrl_ip = ldim_cyclic( nss, np_rot, ip - 1 ) 60 CALL mp_bcast( mtmp , ip - 1 , comm_rot ) 61 62 DO j = 1, nss 63 ii = ip 64 DO i = 1, nrl_ip 65 f_z0t = fdiag( j + istart - 1 ) * mtmp( i, j ) 66 DO k = 1, nrl 67 fmat( k, ii, iss ) = fmat( k, ii, iss )+ zmat( k, j, iss ) * f_z0t 68 END DO 69 ii = ii + np_rot 70 END DO 71 END DO 72 73 END DO 74 75 DEALLOCATE( mtmp ) 76 77 END IF 78 79 END DO 80 81 call stop_clock('calcmt') 82 83 RETURN 84 END SUBROUTINE calcmt 85 86 87!----------------------------------------------------------------------- 88 subroutine rotate( nrlx, z0, c0, bec, c0diag, becdiag ) 89!----------------------------------------------------------------------- 90 use kinds, only: dp 91 use electrons_base, only: nudx, nspin, nupdwn, iupdwn, nx => nbspx, n => nbsp 92 use uspp_param, only: nh, upf 93 use uspp, only :nkb, nkbus, qq_nt, indv_ijkb0 94 use gvecw, only: ngw 95 use ions_base, only: nat, ityp 96 USE cp_main_variables, ONLY: idesc 97 USE cp_interfaces, ONLY: protate 98 99 implicit none 100 include 'laxlib.fh' 101 integer, intent(in) :: nrlx 102 real(kind=DP) z0( nrlx, nudx, nspin ) 103 real(kind=DP) bec( nkb, n ), becdiag( nkb, n ) 104 complex(kind=DP) c0( ngw, nx ), c0diag( ngw, nx ) 105 integer :: np_rot, me_rot, nrl, comm_rot 106 integer iss, nss, istart 107 108 CALL start_clock( 'rotate' ) 109 110 DO iss = 1, nspin 111 istart = iupdwn( iss ) 112 nss = nupdwn( iss ) 113 np_rot = idesc( LAX_DESC_NPR, iss ) * idesc( LAX_DESC_NPC, iss ) 114 me_rot = idesc( LAX_DESC_MYPE, iss ) 115 nrl = idesc( LAX_DESC_NRL, iss ) 116 comm_rot = idesc( LAX_DESC_COMM, iss ) 117 CALL protate ( c0, bec, c0diag, becdiag, ngw, nss, istart, z0(:,:,iss), nrl, & 118 ityp, nat, indv_ijkb0, nh, np_rot, me_rot, comm_rot ) 119 END DO 120 121 CALL stop_clock( 'rotate' ) 122 return 123 end subroutine rotate 124 125 126 subroutine minparabola(ene0,dene0,ene1,passop,passo,stima) 127!this subroutines finds the minimum of a quadratic real function 128 129 use kinds, only : dp 130 131 implicit none 132 real(dp) ene0,dene0,ene1,passop,passo,stima 133 real(dp) a,b,c!a*x^2+b*x+c 134 135 c=ene0 136 b=dene0 137 a=(ene1-b*passop-c)/(passop**2.d0) 138 139 passo = -b/(2.d0*a) 140 if( a.lt.0.d0) then 141 if(ene1.lt.ene0) then 142 passo=passop 143 else 144 passo=0.5d0*passop 145 endif 146 endif 147 148 149 stima=a*passo**2.d0+b*passo+c 150 151 152 return 153 end subroutine minparabola 154 155 156subroutine pc2(a,beca,b,becb) 157 158! this function applies the operator Pc 159 160! this subroutine applies the Pc operator 161! a input :unperturbed wavefunctions 162! b input :first order wavefunctions 163! b output:b_i =b_i-a_j><a_j|S|b_i> 164 165 use kinds, only: dp 166 use ions_base, only: na, nsp, nat, ityp 167 use io_global, only: stdout 168 use mp_global, only: intra_bgrp_comm 169 use gvecw, only: ngw 170 use constants, only: pi, fpi 171 use gvect, only: gstart 172 use mp, only: mp_sum 173 use electrons_base, only: n => nbsp, ispin, nupdwn, iupdwn, nspin 174 use uspp_param, only: nh, upf 175 use uspp, only :nkb, nkbus, indv_ijkb0 176 use uspp, only :qq_nt 177 178 179 implicit none 180 181 complex(kind=DP) a(ngw,n), b(ngw,n) 182 183 real(kind=DP) beca(nkb,n),becb(nkb,n) 184! local variables 185 integer is, iv, jv, ia, inl, jnl, i, j,ig 186 real(kind=DP) sca 187 real(DP), allocatable :: bectmp(:,:) 188 real(DP), allocatable :: qq_tmp(:,:), qqb_tmp(:,:) 189 complex(DP), allocatable :: zbectmp(:,:) 190 integer :: nl_max 191 integer :: nss,iss, istart 192 193 logical :: mat_par=.true.!if true uses parallel routines 194 195 CALL start_clock( 'pc2' ) 196 197 do iss= 1, nspin 198 nss= nupdwn( iss ) 199 istart= iupdwn( iss ) 200 201 allocate(bectmp(nss,nss)) 202 allocate(zbectmp(nss,nss)) 203 bectmp(:,:)=0.d0 204 205 call zgemm('C','N',nss,nss,ngw,(1.d0,0.d0),a(:,istart),ngw,b(:,istart),ngw,(0.d0,0.d0),zbectmp,nss) 206 207 do j=1,nss 208 do i=1,nss 209 bectmp(i,j)=2.d0*dble(zbectmp(i,j)) 210 if(gstart==2) bectmp(i,j)=bectmp(i,j)-DBLE(a(1,j))*DBLE(b(1,i)) 211 212 enddo 213 enddo 214 deallocate(zbectmp) 215 call mp_sum( bectmp(:,:), intra_bgrp_comm) 216 if(nkbus >= 0) then 217 218 nl_max=0 219 do is=1,nsp 220 nl_max=nl_max+nh(is)*na(is) 221 enddo 222 allocate (qq_tmp(nl_max,nl_max)) 223 allocate (qqb_tmp(nl_max,nss)) 224 qq_tmp(:,:)=0.d0 225 do ia=1,nat 226 is = ityp(ia) 227 IF( upf(is)%tvanp ) THEN 228 do iv=1,nh(is) 229 do jv=1,nh(is) 230 inl = indv_ijkb0(ia) + iv 231 jnl = indv_ijkb0(ia) + jv 232 qq_tmp(inl,jnl)=qq_nt(iv,jv,is) 233 enddo 234 enddo 235 ENDIF 236 enddo 237 if(.not. mat_par) then 238 call dgemm('N','N',nl_max,nss,nl_max,1.d0,qq_tmp,nl_max,becb(:,istart),nkb,0.d0,qqb_tmp,nl_max) 239 call dgemm('T','N',nss,nss,nl_max,1.d0,beca(:,istart),nkb,qqb_tmp,nl_max,1.d0,bectmp,nss) 240 else 241 call para_dgemm & 242& ('N','N',nl_max,nss,nl_max,1.d0,qq_tmp,nl_max,becb(:,istart),nkb,0.d0,qqb_tmp,nl_max, intra_bgrp_comm) 243 call para_dgemm & 244&('T','N',nss,nss,nl_max,1.d0,beca(:,istart),nkb,qqb_tmp,nl_max,1.d0,bectmp,nss, intra_bgrp_comm) 245 endif 246 deallocate(qq_tmp,qqb_tmp) 247 endif 248 allocate(zbectmp(nss,nss)) 249 do i=1,nss 250 do j=1,nss 251 zbectmp(i,j)=CMPLX(bectmp(i,j),0.d0,kind=dp) 252 enddo 253 enddo 254 call zgemm('N','N',ngw,nss,nss,(-1.d0,0.d0),a(:,istart),ngw,zbectmp,nss,(1.d0,0.d0),b(:,istart),ngw) 255 deallocate(zbectmp) 256 call dgemm('N','N',nkb,nss,nss,1.0d0,beca(:,istart),nkb,bectmp,nss,1.0d0,becb(:,istart),nkb) 257 deallocate(bectmp) 258 enddo!on spin 259 CALL stop_clock( 'pc2' ) 260 return 261 end subroutine pc2 262 263 264 subroutine pcdaga2(a,as ,b ) 265 266! this function applies the operator Pc 267 268! this subroutine applies the Pc^dagerr operator 269! a input :unperturbed wavefunctions 270! b input :first order wavefunctions 271! b output:b_i =b_i - S|a_j><a_j|b_i> 272 273 use kinds 274 use io_global, only: stdout 275 use mp_global, only: intra_bgrp_comm 276 use gvecw, only: ngw 277 use constants, only: pi, fpi 278 use gvect, only: gstart 279 use mp, only: mp_sum 280 use electrons_base, only: n => nbsp, ispin 281 282 implicit none 283 284 complex(dp) a(ngw,n), b(ngw,n), as(ngw,n) 285 ! local variables 286 integer is, iv, jv, ia, inl, jnl, i, j,ig 287 real(dp) sca 288 real(DP), allocatable:: scar(:) 289 ! 290 call start_clock('pcdaga2') 291 allocate(scar(n)) 292 do j=1,n 293 do i=1,n 294 sca=0.0d0 295 if(ispin(i) == ispin(j)) then 296 if (gstart==2) b(1,i) = CMPLX(dble(b(1,i)),0.0d0,kind=dp) 297 do ig=1,ngw !loop on g vectors 298 sca=sca+DBLE(CONJG(a(ig,j))*b(ig,i)) 299 enddo 300 sca = sca*2.0d0 !2. for real weavefunctions 301 if (gstart==2) sca = sca - dble(a(1,j))*dble(b(1,i)) 302 endif 303 scar(i) = sca 304 enddo 305 306 307 call mp_sum( scar, intra_bgrp_comm ) 308 309 310 do i=1,n 311 if(ispin(i) == ispin(j)) then 312 sca = scar(i) 313 do ig=1,ngw 314 b(ig,i)=b(ig,i)-sca*as(ig,j) 315 enddo 316 ! this to prevent numerical errors 317 if (gstart==2) b(1,i) = CMPLX(dble(b(1,i)),0.0d0,kind=dp) 318 endif 319 enddo 320 enddo 321 deallocate(scar) 322 call stop_clock('pcdaga2') 323 return 324 end subroutine pcdaga2 325 326 subroutine set_x_minus1(betae,m_minus1,ema0bg,use_ema) 327 328! this function calculates the factors for the inverse of the US K matrix 329! it takes care of the preconditioning 330 331 use kinds, only: dp 332 use ions_base, only: nsp, nat, ityp 333 use io_global, only: stdout 334 use mp_global, only: intra_bgrp_comm 335 use gvecw, only: ngw 336 use constants, only: pi, fpi 337 use gvect, only: gstart 338 use mp, only: mp_sum, mp_bcast 339 use electrons_base, only: n => nbsp, ispin 340 use uspp_param, only: nh, upf 341 use uspp, only :nkb,qq_nt,nkbus, indv_ijkb0 342 use io_global, ONLY: ionode, ionode_id 343 344 implicit none 345 346 complex(DP) :: betae(ngw,nkb) 347 real(DP) :: m_minus1(nkb,nkb) 348 real(DP) :: ema0bg(ngw) 349 logical :: use_ema 350 351 352! local variables 353 real(DP),allocatable :: q_matrix(:,:), b_matrix(:,:),c_matrix(:,:) 354 integer is, iv, jv, ia, inl, jnl, i, j, k,ig, js, ja 355 real(DP) sca 356 integer info, lwork 357 integer, allocatable :: ipiv(:) 358 real(dp),allocatable :: work(:) 359 360 call start_clock('set_x_minus1') 361 allocate(ipiv(nkb)) 362 allocate(work(nkb)) 363 364 lwork=nkb 365 366 allocate(q_matrix(nkb,nkb),c_matrix(nkb,nkb)) 367!construct q matrix 368 q_matrix(:,:) = 0.d0 369 370 do ia=1,nat 371 is = ityp(ia) 372 IF( upf(is)%tvanp ) THEN 373 do iv=1,nh(is) 374 do jv=1,nh(is) 375 inl = indv_ijkb0(ia) + iv 376 jnl = indv_ijkb0(ia) + jv 377 q_matrix(inl,jnl)= qq_nt(iv,jv,is) 378 enddo 379 enddo 380 END IF 381 enddo 382 383!construct b matrix 384! m_minus1 used to be b matrix 385 m_minus1(:,:) = 0.d0 386 do ia=1,nat 387 is = ityp(ia) 388 IF( upf(is)%tvanp ) THEN 389 do iv=1,nh(is) 390 do ja=1,nat 391 js=ityp(ja) 392 IF( upf(js)%tvanp ) THEN 393 do jv=1,nh(js) 394 inl = indv_ijkb0(ia) + iv 395 jnl = indv_ijkb0(ja) + jv 396 sca=0.d0 397 if (use_ema) then 398 ! k_minus case 399 do ig=1,ngw !loop on g vectors 400 sca=sca+ema0bg(ig)*DBLE(CONJG(betae(ig,inl))*betae(ig,jnl)) 401 enddo 402 sca = sca*2.0d0 !2. for real weavefunctions 403 if (gstart==2) sca = sca - ema0bg(1)*DBLE(CONJG(betae(1,inl))*betae(1,jnl)) 404 else 405 ! s_minus case 406 do ig=1,ngw !loop on g vectors 407 sca=sca+DBLE(CONJG(betae(ig,inl))*betae(ig,jnl)) 408 enddo 409 sca = sca*2.0d0 !2. for real weavefunctions 410 if (gstart==2) sca = sca - DBLE(CONJG(betae(1,inl))*betae(1,jnl)) 411 endif 412 m_minus1(inl,jnl)=sca 413 enddo 414 END IF 415 enddo 416 enddo 417 END IF 418 enddo 419 call mp_sum( m_minus1, intra_bgrp_comm ) 420 421!calculate -(1+QB)**(-1) * Q 422 CALL dgemm('N','N',nkb,nkb,nkb,1.0d0,q_matrix,nkb,m_minus1,nkb,0.0d0,c_matrix,nkb) 423 424 do i=1,nkb 425 c_matrix(i,i)=c_matrix(i,i)+1.d0 426 enddo 427 428 if(ionode) then 429 call dgetrf(nkb,nkb,c_matrix,nkb,ipiv,info) 430 if(info .ne. 0) write(stdout,*) 'set_k_minus1 Problem with dgetrf :', info 431 call dgetri(nkb,c_matrix,nkb,ipiv,work,lwork,info) 432 if(info .ne. 0) write(stdout,*) 'set_k_minus1 Problem with dgetri :', info 433 endif 434 call mp_bcast( c_matrix, ionode_id, intra_bgrp_comm ) 435 436 437 CALL dgemm('N','N',nkb,nkb,nkb,-1.0d0,c_matrix,nkb,q_matrix,nkb,0.0d0,m_minus1,nkb) 438 439 deallocate(q_matrix,c_matrix) 440 deallocate(ipiv,work) 441 call stop_clock('set_x_minus1') 442 return 443 end subroutine set_x_minus1 444! 445 subroutine xminus1(c0,betae,ema0bg,beck,m_minus1,do_k) 446! if (do_k) then 447!----------------------------------------------------------------------- 448! input: c0 , bec=<c0|beta>, betae=|beta> 449! computes the matrix phi (with the old positions) 450! where |phi> = K^{-1}|c0> 451! else 452!----------------------------------------------------------------------- 453! input: c0 , bec=<c0|beta>, betae=|beta> 454! computes the matrix phi (with the old positions) 455! where |phi> = s^{-1}|c0> 456! endif 457 use kinds, only: dp 458 use ions_base, only: nsp, nat, ityp 459 use io_global, only: stdout 460 use mp_global, only: intra_bgrp_comm 461 use uspp_param, only: nh, upf 462 use uspp, only :nkb, nkbus, qq_nt, indv_ijkb0 463 use electrons_base, only: n => nbsp 464 use gvecw, only: ngw 465 use constants, only: pi, fpi 466 use mp, only: mp_sum 467 use gvect, only: gstart 468! 469 implicit none 470 complex(dp) c0(ngw,n), betae(ngw,nkb) 471 real(dp) beck(nkb,n), ema0bg(ngw) 472 real(DP) :: m_minus1(nkb,nkb) 473 logical :: do_k 474! local variables 475 complex(dp), allocatable :: phi(:,:) 476 real(dp) , allocatable :: qtemp(:,:) 477 integer is, iv, jv, ia, inl, jnl, i, j, js, ja,ig 478 real(dp) becktmp 479 480 481 logical :: mat_par=.true.!if true uses parallel routines 482 483 call start_clock('xminus1') 484 485 if (nkbus.gt.0) then 486!calculates beck 487 if (do_k) then 488 beck(:,:) = 0.d0 489 do ia=1,nat 490 is=ityp(ia) 491 IF( upf(is)%tvanp ) THEN 492 do iv=1,nh(is) 493 inl = indv_ijkb0(ia) + iv 494 do i=1,n 495 becktmp = 0.0d0 496 do ig=1,ngw 497 becktmp=becktmp+ema0bg(ig)*DBLE(CONJG(betae(ig,inl))*c0(ig,i)) 498 enddo 499 becktmp = becktmp*2.0d0 500 if (gstart==2) becktmp = becktmp-ema0bg(1)*DBLE(CONJG(betae(1,inl))*c0(1,i)) 501 beck(inl,i) = beck(inl,i) + becktmp 502 enddo 503 enddo 504 END IF 505 enddo 506 call mp_sum( beck, intra_bgrp_comm ) 507 endif 508! 509! 510 allocate(phi(ngw,n)) 511 allocate(qtemp(nkb,n)) 512 phi(1:ngw,1:n) = 0.0d0 513 qtemp(:,:) = 0.0d0 514 if(.not.mat_par) then 515 call dgemm( 'N', 'N', nkb, n, nkb, 1.0d0, m_minus1,nkb , & 516 beck, nkb, 0.0d0, qtemp,nkb ) 517 else 518 call para_dgemm( 'N', 'N', nkb, n, nkb, 1.0d0, m_minus1,nkb , & 519 beck, nkb, 0.0d0, qtemp,nkb,intra_bgrp_comm ) 520 endif 521 522!NB nkb is the total number of US projectors 523! it works because the first pseudos are the vanderbilt's ones 524 525 CALL dgemm( 'N', 'N', 2*ngw, n, nkb, 1.0d0, betae, 2*ngw, & 526 qtemp, nkb, 0.0d0, phi, 2*ngw ) 527 if (do_k) then 528 do j=1,n 529 do ig=1,ngw 530 c0(ig,j)=(phi(ig,j)+c0(ig,j))*ema0bg(ig) 531 end do 532 end do 533 else 534 do j=1,n 535 do i=1,ngw 536 c0(i,j)=(phi(i,j)+c0(i,j)) 537 end do 538 end do 539 endif 540 deallocate(qtemp,phi) 541 542 else 543 if (do_k) then 544 do j=1,n 545 do ig=1,ngw 546 c0(ig,j)=c0(ig,j)*ema0bg(ig) 547 end do 548 end do 549 endif 550 endif 551 call stop_clock('xminus1') 552 return 553 end subroutine xminus1 554 555 SUBROUTINE emass_precond_tpa( ema0bg, tpiba2, emaec ) 556 use kinds, ONLY : dp 557 use gvecw, ONLY : g2kin,ngw 558 IMPLICIT NONE 559 REAL(DP), INTENT(OUT) :: ema0bg(ngw) 560 REAL(DP), INTENT(IN) :: tpiba2, emaec 561 INTEGER :: i 562 563 real(DP) :: x 564 565 call start_clock('emass_p_tpa') 566 do i = 1, ngw 567 568 x=0.5d0*tpiba2*g2kin(i)/emaec 569 ema0bg(i) = 1.d0/(1.d0+(16.d0*x**4)/(27.d0+18.d0*x+12.d0*x**2+8.d0*x**3)) 570 end do 571 call stop_clock('emass_p_tpa') 572 RETURN 573 END SUBROUTINE emass_precond_tpa 574 575 subroutine ave_kin( c, ngwx, n, ene_ave ) 576!this subroutine calculates the average kinetic energy of 577!each state , to be used for preconditioning 578 579 580 USE kinds, ONLY: DP 581 USE constants, ONLY: pi, fpi 582 USE gvecw, ONLY: ngw 583 USE gvect, ONLY: gstart 584 USE gvecw, ONLY: g2kin 585 USE mp, ONLY: mp_sum 586 USE mp_global, ONLY: intra_bgrp_comm 587 USE cell_base, ONLY: tpiba2 588 589 IMPLICIT NONE 590 591 592 ! input 593 594 INTEGER, INTENT(IN) :: ngwx, n 595 COMPLEX(kind=DP), INTENT(IN) :: c( ngwx, n ) 596 REAL(kind=DP), INTENT(out) :: ene_ave(n)!average kinetic energy to be calculated 597 ! 598 ! local 599 600 INTEGER :: ig, i 601 602 ! 603 DO i=1,n 604 ene_ave(i)=0.d0 605 DO ig=gstart,ngw 606 ene_ave(i)=ene_ave(i)+DBLE(CONJG(c(ig,i))*c(ig,i))*g2kin(ig) 607 END DO 608 END DO 609 610 611 CALL mp_sum( ene_ave(1:n), intra_bgrp_comm ) 612 ene_ave(:)=ene_ave(:)*tpiba2 613 614 RETURN 615 END subroutine ave_kin 616 617 618 619 subroutine xminus1_state(c0,betae,ema0bg,beck,m_minus1,do_k,ave_kin) 620! if (do_k) then 621!----------------------------------------------------------------------- 622! input: c0 , bec=<c0|beta>, betae=|beta> 623! computes the matrix phi (with the old positions) 624! where |phi> = K^{-1}|c0> 625! else 626!----------------------------------------------------------------------- 627! input: c0 , bec=<c0|beta>, betae=|beta> 628! computes the matrix phi (with the old positions) 629! where |phi> = s^{-1}|c0> 630! endif 631!adapted for state by state 632 use kinds, only: dp 633 use ions_base, only: nat, ityp 634 use io_global, only: stdout 635 use mp_global, only: intra_bgrp_comm 636 use uspp_param, only: nh, upf 637 use uspp, only :nkb, nkbus, qq_nt, indv_ijkb0 638 use electrons_base, only: n => nbsp 639 use gvecw, only: ngw 640 use constants, only: pi, fpi 641 use mp, only: mp_sum 642 use gvect, only: gstart 643 USE gvecw, ONLY: g2kin 644 USE cell_base, ONLY: tpiba2 645 646 647! 648 implicit none 649 complex(dp) c0(ngw,n), betae(ngw,nkb) 650 real(dp) beck(nkb,n), ema0bg(ngw) 651 real(DP) :: m_minus1(nkb,nkb) 652 logical :: do_k 653 real(kind=DP) :: ave_kin(n)!average kinetic energy per state 654! local variables 655 complex(dp), allocatable :: phi(:,:) 656 real(dp) , allocatable :: qtemp(:,:) 657 integer is, iv, jv, ia, inl, jnl, i, j, js, ja,ig 658 real(dp) becktmp 659 real(kind=DP) :: prec_fact, x 660 661 662 call start_clock('xminus1') 663 if (nkbus.gt.0) then 664!calculates beck 665 if (do_k) then 666 beck(:,:) = 0.d0 667 668 do ia=1,nat 669 is = ityp(ia) 670 IF( upf(is)%tvanp ) THEN 671 do iv=1,nh(is) 672 inl = indv_ijkb0(ia) + iv 673 do i=1,n 674 becktmp = 0.0d0 675 do ig=1,ngw 676 becktmp=becktmp+ema0bg(ig)*DBLE(CONJG(betae(ig,inl))*c0(ig,i)) 677 enddo 678 becktmp = becktmp*2.0d0 679 if (gstart==2) becktmp = becktmp-ema0bg(1)*DBLE(CONJG(betae(1,inl))*c0(1,i)) 680 beck(inl,i) = beck(inl,i) + becktmp 681 enddo 682 enddo 683 END IF 684 enddo 685 call mp_sum( beck, intra_bgrp_comm ) 686 endif 687! 688! 689 allocate(phi(ngw,n)) 690 allocate(qtemp(nkb,n)) 691 phi(1:ngw,1:n) = 0.0d0 692 qtemp(:,:) = 0.0d0 693 call dgemm( 'N', 'N', nkb, n, nkb, 1.0d0, m_minus1,nkb , & 694 beck, nkb, 0.0d0, qtemp,nkb ) 695 696 697 698!NB nkb is the total number of US projectors, it works because the first pseudos are the vanderbilt's ones 699 700 CALL dgemm( 'N', 'N', 2*ngw, n, nkb, 1.0d0, betae, 2*ngw, & 701 qtemp, nkb, 0.0d0, phi, 2*ngw ) 702 if (do_k) then 703 do j=1,n 704 do ig=1,ngw 705 x=tpiba2*g2kin(i)/ave_kin(j) 706 prec_fact = 1.d0/(1.d0+(16.d0*x**4)/(27.d0+18.d0*x+12.d0*x**2+8.d0*x**3)) 707 c0(ig,j)=c0(ig,j)*prec_fact 708 !c0(ig,j)=(phi(ig,j)+c0(ig,j))*ema0bg(ig) 709 end do 710 end do 711 else 712 do j=1,n 713 do i=1,ngw 714 c0(i,j)=(phi(i,j)+c0(i,j)) 715 end do 716 end do 717 endif 718 deallocate(qtemp,phi) 719 720 else 721 if (do_k) then 722 do j=1,n 723 do ig=1,ngw 724 x=tpiba2*g2kin(ig)/ave_kin(j) 725 prec_fact = 1.d0/(1.d0+(16.d0*x**4)/(27.d0+18.d0*x+12.d0*x**2+8.d0*x**3)) 726 c0(ig,j)=c0(ig,j)*prec_fact 727 end do 728 end do 729 endif 730 endif 731 call stop_clock('xminus1') 732 return 733 end subroutine xminus1_state 734! 735! ... some simple routines for parallel linear algebra (the matrices are 736! ... always replicated on all the cpus) 737! 738! ... written by carlo sbraccia ( 2006 ) 739! 740!---------------------------------------------------------------------------- 741SUBROUTINE para_dgemm( transa, transb, m, n, k, & 742 alpha, a, lda, b, ldb, beta, c, ldc, comm ) 743 !---------------------------------------------------------------------------- 744 ! 745 ! ... trivial parallelization (splitting matrix B by columns) of dgemm 746 ! 747 USE kinds, ONLY : DP 748 ! 749 IMPLICIT NONE 750 ! 751 CHARACTER(LEN=1), INTENT(IN) :: transa, transb 752 INTEGER, INTENT(IN) :: m, n, k 753 REAL(DP), INTENT(IN) :: alpha, beta 754 INTEGER, INTENT(IN) :: lda, ldb, ldc 755 REAL(DP), INTENT(INOUT) :: a(lda,*), b(ldb,*), c(ldc,*) 756 INTEGER, INTENT(IN) :: comm 757 ! 758 ! ... quick return if possible 759 ! 760 IF ( m == 0 .OR. n == 0 .OR. & 761 ( ( alpha == 0.0_DP .OR. k == 0 ) .AND. beta == 1.0_DP ) ) RETURN 762 ! 763!write(*,*) 'DEBUG: para_dgemm' 764 ! 765 CALL rep_matmul_drv( transa, transb, m, n, k, & 766 alpha, a, lda, b, ldb, beta, c, ldc, comm ) 767 RETURN 768 ! 769END SUBROUTINE para_dgemm 770