1! 2! Copyright (C) 2003-2015 Quantum ESPRESSO group 3! This file is distributed under the terms of the 4! GNU General Public License. See the file `License' 5! in the root directory of the present distribution, 6! or http://www.gnu.org/copyleft/gpl.txt . 7! 8#define ZERO ( 0.D0, 0.D0 ) 9#define ONE ( 1.D0, 0.D0 ) 10! 11! 12!---------------------------------------------------------------------------- 13SUBROUTINE regterg( h_psi, s_psi, uspp, g_psi, & 14 npw, npwx, nvec, nvecx, evc, ethr, & 15 e, btype, notcnv, lrot, dav_iter, nhpsi ) 16 !---------------------------------------------------------------------------- 17 ! 18 ! ... iterative solution of the eigenvalue problem: 19 ! 20 ! ... ( H - e S ) * evc = 0 21 ! 22 ! ... where H is an hermitean operator, e is a real scalar, 23 ! ... S is an uspp matrix, evc is a complex vector 24 ! ... (real wavefunctions with only half plane waves stored) 25 26 USE util_param, ONLY : DP, stdout 27 USE mp_bands_util, ONLY : intra_bgrp_comm, inter_bgrp_comm, root_bgrp_id, & 28 nbgrp, my_bgrp_id, me_bgrp, root_bgrp 29 USE mp_bands_util, ONLY : gstart 30 USE mp, ONLY : mp_sum, mp_bcast 31 ! 32 IMPLICIT NONE 33 ! 34 include 'laxlib.fh' 35 ! 36 INTEGER, INTENT(IN) :: npw, npwx, nvec, nvecx 37 ! dimension of the matrix to be diagonalized 38 ! leading dimension of matrix evc, as declared in the calling pgm unit 39 ! integer number of searched low-lying roots 40 ! maximum dimension of the reduced basis set 41 ! (the basis set is refreshed when its dimension would exceed nvecx) 42 COMPLEX(DP), INTENT(INOUT) :: evc(npwx,nvec) 43 ! evc contains the refined estimates of the eigenvectors 44 REAL(DP), INTENT(IN) :: ethr 45 ! energy threshold for convergence: root improvement is stopped, 46 ! when two consecutive estimates of the root differ by less than ethr. 47 LOGICAL, INTENT(IN) :: uspp 48 ! if .FALSE. : S|psi> not needed 49 INTEGER, INTENT(IN) :: btype(nvec) 50 ! band type ( 1 = occupied, 0 = empty ) 51 LOGICAL, INTENT(IN) :: lrot 52 ! .TRUE. if the wfc have already been rotated 53 REAL(DP), INTENT(OUT) :: e(nvec) 54 ! contains the estimated roots. 55 INTEGER, INTENT(OUT) :: dav_iter, notcnv 56 ! integer number of iterations performed 57 ! number of unconverged roots 58 INTEGER, INTENT(OUT) :: nhpsi 59 ! number of individual Hpsi made 60 ! 61 ! ... LOCAL variables 62 ! 63 INTEGER, PARAMETER :: maxter = 20 64 ! maximum number of iterations 65 ! 66 INTEGER :: kter, nbase, np, npw2, npwx2, n, m, nb1, nbn 67 ! counter on iterations 68 ! dimension of the reduced basis 69 ! counter on the reduced basis vectors 70 ! do-loop counters 71 ! counter on the bands 72 INTEGER :: n_start, n_end, my_n 73 INTEGER :: ierr 74 REAL(DP), ALLOCATABLE :: hr(:,:), sr(:,:), vr(:,:), ew(:) 75 ! Hamiltonian on the reduced basis 76 ! S matrix on the reduced basis 77 ! eigenvectors of the Hamiltonian 78 ! eigenvalues of the reduced hamiltonian 79 COMPLEX(DP), ALLOCATABLE :: psi(:,:), hpsi(:,:), spsi(:,:) 80 ! work space, contains psi 81 ! the product of H and psi 82 ! the product of S and psi 83 LOGICAL, ALLOCATABLE :: conv(:) 84 ! true if the root is converged 85 REAL(DP) :: empty_ethr 86 ! threshold for empty bands 87 ! 88 REAL(DP), EXTERNAL :: ddot 89 ! 90 EXTERNAL h_psi, s_psi, g_psi 91 ! h_psi(npwx,npw,nvec,psi,hpsi) 92 ! calculates H|psi> 93 ! s_psi(npwx,npw,nvec,psi,spsi) 94 ! calculates S|psi> (if needed) 95 ! Vectors psi,hpsi,spsi are dimensioned (npwx,nvec) 96 ! g_psi(npwx,npw,notcnv,psi,e) 97 ! calculates (diag(h)-e)^-1 * psi, diagonal approx. to (h-e)^-1*psi 98 ! the first nvec columns contain the trial eigenvectors 99 ! 100 CALL start_clock( 'regterg' ) !; write(6,*) 'enter regterg' ; FLUSH(6) 101 ! 102 IF ( nvec > nvecx / 2 ) CALL errore( 'regter', 'nvecx is too small', 1 ) 103 ! 104 IF ( gstart == -1 ) CALL errore( 'regter', 'gstart variable not initialized', 1 ) 105 ! 106 ! ... threshold for empty bands 107 ! 108 empty_ethr = MAX( ( ethr * 5.D0 ), 1.D-5 ) 109 ! 110 npw2 = 2*npw 111 npwx2 = 2*npwx 112 ! 113 ALLOCATE( psi( npwx, nvecx ), STAT=ierr ) 114 IF( ierr /= 0 ) & 115 CALL errore( 'regterg ',' cannot allocate psi ', ABS(ierr) ) 116 ALLOCATE( hpsi( npwx, nvecx ), STAT=ierr ) 117 IF( ierr /= 0 ) & 118 CALL errore( 'regterg ',' cannot allocate hpsi ', ABS(ierr) ) 119 ! 120 IF ( uspp ) THEN 121 ALLOCATE( spsi( npwx, nvecx ), STAT=ierr ) 122 IF( ierr /= 0 ) & 123 CALL errore( ' regterg ',' cannot allocate spsi ', ABS(ierr) ) 124 END IF 125 ! 126 ALLOCATE( sr( nvecx, nvecx ), STAT=ierr ) 127 IF( ierr /= 0 ) & 128 CALL errore( 'regterg ',' cannot allocate sr ', ABS(ierr) ) 129 ALLOCATE( hr( nvecx, nvecx ), STAT=ierr ) 130 IF( ierr /= 0 ) & 131 CALL errore( 'regterg ',' cannot allocate hr ', ABS(ierr) ) 132 ALLOCATE( vr( nvecx, nvecx ), STAT=ierr ) 133 IF( ierr /= 0 ) & 134 CALL errore( 'regterg ',' cannot allocate vr ', ABS(ierr) ) 135 ALLOCATE( ew( nvecx ), STAT=ierr ) 136 IF( ierr /= 0 ) & 137 CALL errore( 'regterg ',' cannot allocate ew ', ABS(ierr) ) 138 ALLOCATE( conv( nvec ), STAT=ierr ) 139 IF( ierr /= 0 ) & 140 CALL errore( 'regterg ',' cannot allocate conv ', ABS(ierr) ) 141 ! 142 notcnv = nvec 143 nbase = nvec 144 conv = .FALSE. 145 ! 146 IF ( uspp ) spsi = ZERO 147 ! 148 hpsi = ZERO 149 psi = ZERO 150 psi(:,1:nvec) = evc(:,1:nvec) 151 ! ... set Im[ psi(G=0) ] - needed for numerical stability 152 IF ( gstart == 2 ) psi(1,1:nvec) = CMPLX( DBLE( psi(1,1:nvec) ), 0.D0 ,kind=DP) 153 ! 154 ! ... hpsi contains h times the basis vectors 155 ! 156 CALL h_psi( npwx, npw, nvec, psi, hpsi ) ; nhpsi = nvec 157 ! 158 ! ... spsi contains s times the basis vectors 159 ! 160 IF ( uspp ) CALL s_psi( npwx, npw, nvec, psi, spsi ) 161 ! 162 ! ... hr contains the projection of the hamiltonian onto the reduced 163 ! ... space vr contains the eigenvectors of hr 164 ! 165 CALL start_clock( 'regterg:init' ) 166 hr(:,:) = 0.D0 167 sr(:,:) = 0.D0 168 vr(:,:) = 0.D0 169 ! 170 CALL divide(inter_bgrp_comm,nbase,n_start,n_end) 171 my_n = n_end - n_start + 1; !write (*,*) nbase,n_start,n_end 172 if (n_start .le. n_end) & 173 CALL DGEMM( 'T','N', nbase, my_n, npw2, 2.D0 , psi, npwx2, hpsi(1,n_start), npwx2, 0.D0, hr(1,n_start), nvecx ) 174 IF ( gstart == 2 ) CALL DGER( nbase, my_n, -1.D0, psi, npwx2, hpsi(1,n_start), npwx2, hr(1,n_start), nvecx ) 175 CALL mp_sum( hr( :, 1:nbase ), inter_bgrp_comm ) 176 ! 177 CALL mp_sum( hr( :, 1:nbase ), intra_bgrp_comm ) 178 ! 179 IF ( uspp ) THEN 180 ! 181 if (n_start .le. n_end) & 182 CALL DGEMM( 'T','N', nbase, my_n, npw2, 2.D0, psi, npwx2, spsi(1,n_start), npwx2, 0.D0, sr(1,n_start), nvecx ) 183 IF ( gstart == 2 ) CALL DGER( nbase, my_n, -1.D0, psi, npwx2, spsi(1,n_start), npwx2, sr(1,n_start), nvecx ) 184 ! 185 ELSE 186 ! 187 if (n_start .le. n_end) & 188 CALL DGEMM( 'T','N', nbase, my_n, npw2, 2.D0, psi, npwx2, psi(1,n_start), npwx2, 0.D0, sr(1,n_start), nvecx ) 189 IF ( gstart == 2 ) CALL DGER( nbase, my_n, -1.D0, psi, npwx2, psi(1,n_start), npwx2, sr(1,n_start), nvecx ) 190 ! 191 END IF 192 CALL mp_sum( sr( :, 1:nbase ), inter_bgrp_comm ) 193 ! 194 CALL mp_sum( sr( :, 1:nbase ), intra_bgrp_comm ) 195 CALL stop_clock( 'regterg:init' ) 196 ! 197 IF ( lrot ) THEN 198 ! 199 DO n = 1, nbase 200 ! 201 e(n) = hr(n,n) 202 vr(n,n) = 1.D0 203 ! 204 END DO 205 ! 206 ELSE 207 ! 208 ! ... diagonalize the reduced hamiltonian 209 ! 210 CALL start_clock( 'regterg:diag' ) 211 IF( my_bgrp_id == root_bgrp_id ) THEN 212 CALL diaghg( nbase, nvec, hr, sr, nvecx, ew, vr, me_bgrp, root_bgrp, intra_bgrp_comm ) 213 END IF 214 IF( nbgrp > 1 ) THEN 215 CALL mp_bcast( vr, root_bgrp_id, inter_bgrp_comm ) 216 CALL mp_bcast( ew, root_bgrp_id, inter_bgrp_comm ) 217 ENDIF 218 CALL stop_clock( 'regterg:diag' ) 219 ! 220 e(1:nvec) = ew(1:nvec) 221 ! 222 END IF 223 ! 224 ! ... iterate 225 ! 226 iterate: DO kter = 1, maxter 227 ! 228 dav_iter = kter ; !write(*,*) kter, notcnv, conv 229 ! 230 CALL start_clock( 'regterg:update' ) 231 ! 232 np = 0 233 ! 234 DO n = 1, nvec 235 ! 236 IF ( .NOT. conv(n) ) THEN 237 ! 238 ! ... this root not yet converged ... 239 ! 240 np = np + 1 241 ! 242 ! ... reorder eigenvectors so that coefficients for unconverged 243 ! ... roots come first. This allows to use quick matrix-matrix 244 ! ... multiplications to set a new basis vector (see below) 245 ! 246 IF ( np /= n ) vr(:,np) = vr(:,n) 247 ! 248 ! ... for use in g_psi 249 ! 250 ew(nbase+np) = e(n) 251 ! 252 END IF 253 ! 254 END DO 255 ! 256 nb1 = nbase + 1 257 ! 258 ! ... expand the basis set with new basis vectors ( H - e*S )|psi> ... 259 ! 260 CALL divide(inter_bgrp_comm,nbase,n_start,n_end) 261 my_n = n_end - n_start + 1; !write (*,*) nbase,n_start,n_end 262 psi(:,nb1:nbase+notcnv)=ZERO 263 IF ( uspp ) THEN 264 ! 265 if (n_start .le. n_end) & 266 CALL DGEMM( 'N','N', npw2, notcnv, my_n, 1.D0, spsi(1,n_start), npwx2, vr(n_start,1), nvecx, 0.D0, psi(1,nb1), npwx2 ) 267 ! 268 ELSE 269 ! 270 if (n_start .le. n_end) & 271 CALL DGEMM( 'N','N', npw2, notcnv, my_n, 1.D0, psi(1,n_start), npwx2, vr(n_start,1), nvecx, 0.D0, psi(1,nb1), npwx2 ) 272 ! 273 END IF 274! NB: must not call mp_sum over inter_bgrp_comm here because it is done later to the full correction 275 ! 276 DO np = 1, notcnv 277 ! 278 psi(:,nbase+np) = - ew(nbase+np) * psi(:,nbase+np) 279 ! 280 END DO 281 ! 282 if (n_start .le. n_end) & 283 CALL DGEMM( 'N','N', npw2, notcnv, my_n, 1.D0, hpsi(1,n_start), npwx2, vr(n_start,1), nvecx, 1.D0, psi(1,nb1), npwx2 ) 284 CALL mp_sum( psi(:,nb1:nbase+notcnv), inter_bgrp_comm ) 285 ! 286 CALL stop_clock( 'regterg:update' ) 287 ! 288 ! ... approximate inverse iteration 289 ! 290 CALL g_psi( npwx, npw, notcnv, 1, psi(1,nb1), ew(nb1) ) 291 ! 292 ! ... "normalize" correction vectors psi(:,nb1:nbase+notcnv) in 293 ! ... order to improve numerical stability of subspace diagonalization 294 ! ... (rdiaghg) ew is used as work array : 295 ! 296 ! ... ew = <psi_i|psi_i>, i = nbase + 1, nbase + notcnv 297 ! 298 DO n = 1, notcnv 299 ! 300 nbn = nbase + n 301 ! 302 ew(n) = 2.D0 * ddot( npw2, psi(1,nbn), 1, psi(1,nbn), 1 ) 303 IF ( gstart == 2 ) ew(n) = ew(n) - psi(1,nbn) * psi(1,nbn) 304 ! 305 END DO 306 ! 307 CALL mp_sum( ew( 1:notcnv ), intra_bgrp_comm ) 308 ! 309 DO n = 1, notcnv 310 ! 311 psi(:,nbase+n) = psi(:,nbase+n) / SQRT( ew(n) ) 312 ! ... set Im[ psi(G=0) ] - needed for numerical stability 313 IF ( gstart == 2 ) psi(1,nbase+n) = CMPLX( DBLE(psi(1,nbase+n)), 0.D0 ,kind=DP) 314 ! 315 END DO 316 ! 317 ! ... here compute the hpsi and spsi of the new functions 318 ! 319 CALL h_psi( npwx, npw, notcnv, psi(1,nb1), hpsi(1,nb1) ) ; nhpsi = nhpsi + notcnv 320 ! 321 IF ( uspp ) CALL s_psi( npwx, npw, notcnv, psi(1,nb1), spsi(1,nb1) ) 322 ! 323 ! ... update the reduced hamiltonian 324 ! 325 CALL start_clock( 'regterg:overlap' ) 326 ! 327 hr( :, nb1:nb1+notcnv-1 )=0.d0 328 CALL divide(inter_bgrp_comm,nbase+notcnv,n_start,n_end) 329 my_n = n_end - n_start + 1; !write (*,*) nbase+notcnv,n_start,n_end 330 CALL DGEMM( 'T','N', my_n, notcnv, npw2, 2.D0, psi(1,n_start), npwx2, hpsi(1,nb1), npwx2, 0.D0, hr(n_start,nb1), nvecx ) 331 IF ( gstart == 2 ) CALL DGER( my_n, notcnv, -1.D0, psi(1,n_start), npwx2, hpsi(1,nb1), npwx2, hr(n_start,nb1), nvecx ) 332 CALL mp_sum( hr( :, nb1:nb1+notcnv-1 ), inter_bgrp_comm ) 333 ! 334 CALL mp_sum( hr( :, nb1:nb1+notcnv-1 ), intra_bgrp_comm ) 335 ! 336 sr( :, nb1:nb1+notcnv-1 )=0.d0 337 CALL divide(inter_bgrp_comm,nbase+notcnv,n_start,n_end) 338 my_n = n_end - n_start + 1; !write (*,*) nbase+notcnv,n_start,n_end 339 IF ( uspp ) THEN 340 ! 341 CALL DGEMM( 'T','N', my_n, notcnv, npw2, 2.D0, psi(1,n_start), npwx2, spsi(1,nb1), npwx2, 0.D0, sr(n_start,nb1), nvecx ) 342 IF ( gstart == 2 ) CALL DGER( my_n, notcnv, -1.D0, psi(1,n_start), npwx2, spsi(1,nb1), npwx2, sr(n_start,nb1), nvecx ) 343 ! 344 ELSE 345 ! 346 CALL DGEMM( 'T','N', my_n, notcnv, npw2, 2.D0, psi(1,n_start), npwx2, psi(1,nb1), npwx2, 0.D0, sr(n_start,nb1) , nvecx ) 347 IF ( gstart == 2 ) CALL DGER( my_n, notcnv, -1.D0, psi(1,n_start), npwx2, psi(1,nb1), npwx2, sr(n_start,nb1), nvecx ) 348 ! 349 END IF 350 CALL mp_sum( sr( :, nb1:nb1+notcnv-1 ), inter_bgrp_comm ) 351 ! 352 CALL mp_sum( sr( :, nb1:nb1+notcnv-1 ), intra_bgrp_comm ) 353 ! 354 CALL stop_clock( 'regterg:overlap' ) 355 ! 356 nbase = nbase + notcnv 357 ! 358 DO n = 1, nbase 359 ! 360 DO m = n + 1, nbase 361 ! 362 hr(m,n) = hr(n,m) 363 sr(m,n) = sr(n,m) 364 ! 365 END DO 366 ! 367 END DO 368 ! 369 ! ... diagonalize the reduced hamiltonian 370 ! 371 CALL start_clock( 'regterg:diag' ) 372 IF( my_bgrp_id == root_bgrp_id ) THEN 373 CALL diaghg( nbase, nvec, hr, sr, nvecx, ew, vr, me_bgrp, root_bgrp, intra_bgrp_comm ) 374 END IF 375 IF( nbgrp > 1 ) THEN 376 CALL mp_bcast( vr, root_bgrp_id, inter_bgrp_comm ) 377 CALL mp_bcast( ew, root_bgrp_id, inter_bgrp_comm ) 378 ENDIF 379 CALL stop_clock( 'regterg:diag' ) 380 ! 381 ! ... test for convergence 382 ! 383 WHERE( btype(1:nvec) == 1 ) 384 ! 385 conv(1:nvec) = ( ( ABS( ew(1:nvec) - e(1:nvec) ) < ethr ) ) 386 ! 387 ELSEWHERE 388 ! 389 conv(1:nvec) = ( ( ABS( ew(1:nvec) - e(1:nvec) ) < empty_ethr ) ) 390 ! 391 END WHERE 392 ! ... next line useful for band parallelization of exact exchange 393 IF ( nbgrp > 1 ) CALL mp_bcast(conv,root_bgrp_id,inter_bgrp_comm) 394 ! 395 notcnv = COUNT( .NOT. conv(:) ) 396 ! 397 e(1:nvec) = ew(1:nvec) 398 ! 399 ! ... if overall convergence has been achieved, or the dimension of 400 ! ... the reduced basis set is becoming too large, or in any case if 401 ! ... we are at the last iteration refresh the basis set. i.e. replace 402 ! ... the first nvec elements with the current estimate of the 403 ! ... eigenvectors; set the basis dimension to nvec. 404 ! 405 IF ( notcnv == 0 .OR. & 406 nbase+notcnv > nvecx .OR. dav_iter == maxter ) THEN 407 ! 408 CALL start_clock( 'regterg:last' ) 409 ! 410 evc = ZERO 411 CALL divide(inter_bgrp_comm,nbase,n_start,n_end) 412 my_n = n_end - n_start + 1; !write (*,*) nbase,n_start,n_end 413 CALL DGEMM( 'N','N', npw2, nvec, my_n, 1.D0, psi(1,n_start), npwx2, vr(n_start,1), nvecx, 0.D0, evc, npwx2 ) 414 CALL mp_sum( evc, inter_bgrp_comm ) 415 ! 416 IF ( notcnv == 0 ) THEN 417 ! 418 ! ... all roots converged: return 419 ! 420 CALL stop_clock( 'regterg:last' ) 421 ! 422 EXIT iterate 423 ! 424 ELSE IF ( dav_iter == maxter ) THEN 425 ! 426 ! ... last iteration, some roots not converged: return 427 ! 428 !!!WRITE( stdout, '(5X,"WARNING: ",I5, & 429 !!! & " eigenvalues not converged in regterg")' ) notcnv 430 ! 431 CALL stop_clock( 'regterg:last' ) 432 ! 433 EXIT iterate 434 ! 435 END IF 436 ! 437 ! ... refresh psi, H*psi and S*psi 438 ! 439 psi(:,1:nvec) = evc(:,1:nvec) 440 ! 441 IF ( uspp ) THEN 442 ! 443 psi(:,nvec+1:nvec+nvec) = ZERO 444 CALL DGEMM( 'N','N', npw2, nvec, my_n, 1.D0, spsi(1,n_start), npwx2, vr(n_start,1), nvecx, 0.D0, psi(1,nvec+1), npwx2 ) 445 CALL mp_sum( psi(:,nvec+1:nvec+nvec), inter_bgrp_comm ) 446 ! 447 spsi(:,1:nvec) = psi(:,nvec+1:nvec+nvec) 448 ! 449 END IF 450 ! 451 psi(:,nvec+1:nvec+nvec) = ZERO 452 CALL DGEMM( 'N','N', npw2, nvec, my_n, 1.D0, hpsi(1,n_start), npwx2, vr(n_start,1), nvecx, 0.D0, psi(1,nvec+1), npwx2 ) 453 CALL mp_sum( psi(:,nvec+1:nvec+nvec), inter_bgrp_comm ) 454 ! 455 hpsi(:,1:nvec) = psi(:,nvec+1:nvec+nvec) 456 ! 457 ! ... refresh the reduced hamiltonian 458 ! 459 nbase = nvec 460 ! 461 hr(:,1:nbase) = 0.D0 462 sr(:,1:nbase) = 0.D0 463 vr(:,1:nbase) = 0.D0 464 ! 465 DO n = 1, nbase 466 ! 467 hr(n,n) = e(n) 468 sr(n,n) = 1.D0 469 vr(n,n) = 1.D0 470 ! 471 END DO 472 ! 473 CALL stop_clock( 'regterg:last' ) 474 ! 475 END IF 476 ! 477 END DO iterate 478 ! 479 DEALLOCATE( conv ) 480 DEALLOCATE( ew ) 481 DEALLOCATE( vr ) 482 DEALLOCATE( hr ) 483 DEALLOCATE( sr ) 484 ! 485 IF ( uspp ) DEALLOCATE( spsi ) 486 ! 487 DEALLOCATE( hpsi ) 488 DEALLOCATE( psi ) 489 ! 490 CALL stop_clock( 'regterg' ) 491 !call print_clock( 'regterg' ) 492 !call print_clock( 'regterg:init' ) 493 !call print_clock( 'regterg:diag' ) 494 !call print_clock( 'regterg:update' ) 495 !call print_clock( 'regterg:overlap' ) 496 !call print_clock( 'regterg:last' ) 497 ! 498 RETURN 499 ! 500END SUBROUTINE regterg 501! 502! 503! Subroutine with distributed matrixes 504! (written by Carlo Cavazzoni) 505! 506!---------------------------------------------------------------------------- 507SUBROUTINE pregterg(h_psi, s_psi, uspp, g_psi, & 508 npw, npwx, nvec, nvecx, evc, ethr, & 509 e, btype, notcnv, lrot, dav_iter, nhpsi ) 510 !---------------------------------------------------------------------------- 511 ! 512 ! ... iterative solution of the eigenvalue problem: 513 ! 514 ! ... ( H - e S ) * evc = 0 515 ! 516 ! ... where H is an hermitean operator, e is a real scalar, 517 ! ... S is an uspp matrix, evc is a complex vector 518 ! ... (real wavefunctions with only half plane waves stored) 519 ! 520 USE util_param, ONLY : DP, stdout 521 USE mp_bands_util, ONLY : intra_bgrp_comm, inter_bgrp_comm, root_bgrp_id, nbgrp, my_bgrp_id 522 USE mp_bands_util, ONLY : gstart 523 USE mp, ONLY : mp_bcast, mp_root_sum, mp_sum 524 ! 525 IMPLICIT NONE 526 ! 527 include 'laxlib.fh' 528 ! 529 INTEGER, INTENT(IN) :: npw, npwx, nvec, nvecx 530 ! dimension of the matrix to be diagonalized 531 ! leading dimension of matrix evc, as declared in the calling pgm unit 532 ! integer number of searched low-lying roots 533 ! maximum dimension of the reduced basis set 534 ! (the basis set is refreshed when its dimension would exceed nvecx) 535 COMPLEX(DP), INTENT(INOUT) :: evc(npwx,nvec) 536 ! evc contains the refined estimates of the eigenvectors 537 REAL(DP), INTENT(IN) :: ethr 538 ! energy threshold for convergence: root improvement is stopped, 539 ! when two consecutive estimates of the root differ by less than ethr. 540 LOGICAL, INTENT(IN) :: uspp 541 ! if .FALSE. : S|psi> not needed 542 INTEGER, INTENT(IN) :: btype(nvec) 543 ! band type ( 1 = occupied, 0 = empty ) 544 LOGICAL, INTENT(IN) :: lrot 545 ! .TRUE. if the wfc have already be rotated 546 REAL(DP), INTENT(OUT) :: e(nvec) 547 ! contains the estimated roots. 548 INTEGER, INTENT(OUT) :: dav_iter, notcnv 549 ! integer number of iterations performed 550 ! number of unconverged roots 551 INTEGER, INTENT(OUT) :: nhpsi 552 ! number of individual Hpsi made 553 ! 554 ! ... LOCAL variables 555 ! 556 INTEGER, PARAMETER :: maxter = 20 557 ! maximum number of iterations 558 ! 559 INTEGER :: kter, nbase, np, n, m, nb1 560 ! counter on iterations 561 ! dimension of the reduced basis 562 ! counter on the reduced basis vectors 563 ! do-loop counters 564 INTEGER :: ierr 565 REAL(DP), ALLOCATABLE :: ew(:) 566 REAL(DP), ALLOCATABLE :: hl(:,:), sl(:,:), vl(:,:) 567 ! Hamiltonian on the reduced basis 568 ! S matrix on the reduced basis 569 ! eigenvectors of the Hamiltonian 570 ! eigenvalues of the reduced hamiltonian 571 COMPLEX(DP), ALLOCATABLE :: psi(:,:), hpsi(:,:), spsi(:,:) 572 ! work space, contains psi 573 ! the product of H and psi 574 ! the product of S and psi 575 LOGICAL, ALLOCATABLE :: conv(:) 576 ! true if the root is converged 577 REAL(DP) :: empty_ethr 578 ! threshold for empty bands 579 INTEGER :: npw2, npwx2 580 INTEGER :: idesc(LAX_DESC_SIZE), idesc_old(LAX_DESC_SIZE) 581 INTEGER, ALLOCATABLE :: irc_ip( : ) 582 INTEGER, ALLOCATABLE :: nrc_ip( : ) 583 INTEGER, ALLOCATABLE :: rank_ip( :, : ) 584 ! matrix distribution descriptors 585 INTEGER :: nx 586 ! maximum local block dimension 587 LOGICAL :: la_proc 588 ! flag to distinguish procs involved in linear algebra 589 INTEGER, ALLOCATABLE :: notcnv_ip( : ) 590 INTEGER, ALLOCATABLE :: ic_notcnv( : ) 591 ! 592 INTEGER :: np_ortho(2), ortho_parent_comm 593 LOGICAL :: do_distr_diag_inside_bgrp 594 ! 595 REAL(DP), EXTERNAL :: ddot 596 ! 597 EXTERNAL h_psi, s_psi, g_psi 598 ! h_psi(npwx,npw,nvec,psi,hpsi) 599 ! calculates H|psi> 600 ! s_psi(npwx,npw,nvec,psi,spsi) 601 ! calculates S|psi> (if needed) 602 ! Vectors psi,hpsi,spsi are dimensioned (npwx,nvec) 603 ! g_psi(npwx,npw,notcnv,psi,e) 604 ! calculates (diag(h)-e)^-1 * psi, diagonal approx. to (h-e)^-1*psi 605 ! the first nvec columns contain the trial eigenvectors 606 ! 607 ! 608 CALL start_clock( 'regterg' ) !; write(6,*) 'enter pregterg' ; FLUSH(6) 609 ! 610 CALL laxlib_getval( np_ortho = np_ortho, ortho_parent_comm = ortho_parent_comm, & 611 do_distr_diag_inside_bgrp = do_distr_diag_inside_bgrp ) 612 ! 613 IF ( nvec > nvecx / 2 ) CALL errore( 'pregter', 'nvecx is too small', 1 ) 614 ! 615 IF ( gstart == -1 ) CALL errore( 'pregter', 'gstart variable not initialized', 1 ) 616 ! 617 ! ... threshold for empty bands 618 ! 619 empty_ethr = MAX( ( ethr * 5.D0 ), 1.D-5 ) 620 ! 621 ALLOCATE( psi( npwx, nvecx ), STAT=ierr ) 622 IF( ierr /= 0 ) & 623 CALL errore( 'pregterg ',' cannot allocate psi ', ABS(ierr) ) 624 ! 625 ALLOCATE( hpsi( npwx, nvecx ), STAT=ierr ) 626 IF( ierr /= 0 ) & 627 CALL errore( 'pregterg ',' cannot allocate hpsi ', ABS(ierr) ) 628 ! 629 IF ( uspp ) THEN 630 ALLOCATE( spsi( npwx, nvecx ), STAT=ierr ) 631 IF( ierr /= 0 ) & 632 CALL errore( 'pregterg ',' cannot allocate spsi ', ABS(ierr) ) 633 END IF 634 ! 635 ! ... Initialize the matrix descriptor 636 ! 637 ALLOCATE( ic_notcnv( np_ortho(2) ), STAT=ierr ) 638 IF( ierr /= 0 ) & 639 CALL errore( 'pregterg ',' cannot allocate ic_notcnv ', ABS(ierr) ) 640 ! 641 ALLOCATE( notcnv_ip( np_ortho(2) ), STAT=ierr ) 642 IF( ierr /= 0 ) & 643 CALL errore( 'pregterg ',' cannot allocate notcnv_ip ', ABS(ierr) ) 644 ! 645 CALL desc_init( nvec, nx, la_proc, idesc, rank_ip, irc_ip, nrc_ip ) 646 ! 647 IF( la_proc ) THEN 648 ! 649 ! only procs involved in the diagonalization need to allocate local 650 ! matrix block. 651 ! 652 ALLOCATE( vl( nx , nx ), STAT=ierr ) 653 IF( ierr /= 0 ) & 654 CALL errore( 'pregterg ',' cannot allocate vl ', ABS(ierr) ) 655 ! 656 ALLOCATE( sl( nx , nx ), STAT=ierr ) 657 IF( ierr /= 0 ) & 658 CALL errore( 'pregterg ',' cannot allocate sl ', ABS(ierr) ) 659 ! 660 ALLOCATE( hl( nx , nx ), STAT=ierr ) 661 IF( ierr /= 0 ) & 662 CALL errore( 'pregterg ',' cannot allocate hl ', ABS(ierr) ) 663 ! 664 ELSE 665 ! 666 ALLOCATE( vl( 1 , 1 ), STAT=ierr ) 667 IF( ierr /= 0 ) & 668 CALL errore( 'pregterg ',' cannot allocate vl ', ABS(ierr) ) 669 ! 670 ALLOCATE( sl( 1 , 1 ), STAT=ierr ) 671 IF( ierr /= 0 ) & 672 CALL errore( 'pregterg ',' cannot allocate sl ', ABS(ierr) ) 673 ! 674 ALLOCATE( hl( 1 , 1 ), STAT=ierr ) 675 IF( ierr /= 0 ) & 676 CALL errore( 'pregterg ',' cannot allocate hl ', ABS(ierr) ) 677 ! 678 END IF 679 ! 680 ALLOCATE( ew( nvecx ), STAT=ierr ) 681 IF( ierr /= 0 ) & 682 CALL errore( 'pregterg ',' cannot allocate ew ', ABS(ierr) ) 683 ! 684 ALLOCATE( conv( nvec ), STAT=ierr ) 685 IF( ierr /= 0 ) & 686 CALL errore( 'pregterg ',' cannot allocate conv ', ABS(ierr) ) 687 ! 688 npw2 = 2*npw 689 npwx2 = 2*npwx 690 notcnv = nvec 691 nbase = nvec 692 conv = .FALSE. 693 ! 694 IF ( uspp ) spsi = ZERO 695 ! 696 hpsi = ZERO 697 psi = ZERO 698 psi(:,1:nvec) = evc(:,1:nvec) 699 ! ... set Im[ psi(G=0) ] - needed for numerical stability 700 IF ( gstart == 2 ) psi(1,1:nvec) = CMPLX( DBLE( psi(1,1:nvec) ), 0.D0 ,kind=DP) 701 ! 702 ! ... hpsi contains h times the basis vectors 703 ! 704 CALL h_psi( npwx, npw, nvec, psi, hpsi ) ; nhpsi = nvec 705 ! 706 IF ( uspp ) CALL s_psi( npwx, npw, nvec, psi, spsi ) 707 ! 708 ! ... hl contains the projection of the hamiltonian onto the reduced 709 ! ... space, vl contains the eigenvectors of hl. Remember hl, vl and sl 710 ! ... are all distributed across processors, global replicated matrixes 711 ! ... here are never allocated 712 ! 713 CALL start_clock( 'regterg:init' ) 714 715 CALL compute_distmat( hl, psi, hpsi ) 716 ! 717 IF ( uspp ) THEN 718 ! 719 CALL compute_distmat( sl, psi, spsi ) 720 ! 721 ELSE 722 ! 723 CALL compute_distmat( sl, psi, psi ) 724 ! 725 END IF 726 CALL stop_clock( 'regterg:init' ) 727 ! 728 IF ( lrot ) THEN 729 ! 730 CALL set_e_from_h() 731 ! 732 CALL set_to_identity( vl, idesc ) 733 ! 734 ELSE 735 ! 736 ! ... diagonalize the reduced hamiltonian 737 ! Calling block parallel algorithm 738 ! 739 CALL start_clock( 'regterg:diag' ) 740 IF ( do_distr_diag_inside_bgrp ) THEN ! NB on output of pdiaghg ew and vl are the same across ortho_parent_comm 741 ! only the first bgrp performs the diagonalization 742 IF( my_bgrp_id == root_bgrp_id ) CALL pdiaghg( nbase, hl, sl, nx, ew, vl, idesc ) 743 IF( nbgrp > 1 ) THEN ! results must be brodcast to the other band groups 744 CALL mp_bcast( vl, root_bgrp_id, inter_bgrp_comm ) 745 CALL mp_bcast( ew, root_bgrp_id, inter_bgrp_comm ) 746 ENDIF 747 ELSE 748 CALL pdiaghg( nbase, hl, sl, nx, ew, vl, idesc ) 749 END IF 750 CALL stop_clock( 'regterg:diag' ) 751 ! 752 e(1:nvec) = ew(1:nvec) 753 ! 754 END IF 755 ! 756 ! ... iterate 757 ! 758 iterate: DO kter = 1, maxter 759 ! 760 dav_iter = kter ; !write(*,*) kter, notcnv, conv 761 ! 762 CALL start_clock( 'regterg:update' ) 763 ! 764 CALL reorder_v() 765 ! 766 nb1 = nbase + 1 767 ! 768 ! ... expand the basis set with new basis vectors ( H - e*S )|psi> ... 769 ! 770 CALL hpsi_dot_v() 771 ! 772 CALL stop_clock( 'regterg:update' ) 773 ! 774 ! ... approximate inverse iteration 775 ! 776 CALL g_psi( npwx, npw, notcnv, 1, psi(1,nb1), ew(nb1) ) 777 ! 778 ! ... "normalize" correction vectors psi(:,nb1:nbase+notcnv) in 779 ! ... order to improve numerical stability of subspace diagonalization 780 ! ... (rdiaghg) ew is used as work array : 781 ! 782 ! ... ew = <psi_i|psi_i>, i = nbase + 1, nbase + notcnv 783 ! 784 DO n = 1, notcnv 785 ! 786 ew(n) = 2.D0 * ddot( npw2, psi(1,nbase+n), 1, psi(1,nbase+n), 1 ) 787 ! 788 IF ( gstart == 2 ) ew(n) = ew(n) - psi(1,nbase+n) * psi(1,nbase+n) 789 ! 790 END DO 791 ! 792 CALL mp_sum( ew( 1:notcnv ), intra_bgrp_comm ) 793 ! 794 DO n = 1, notcnv 795 ! 796 psi(:,nbase+n) = psi(:,nbase+n) / SQRT( ew(n) ) 797 ! ... set Im[ psi(G=0) ] - needed for numerical stability 798 IF ( gstart == 2 ) psi(1,nbase+n) = CMPLX( DBLE(psi(1,nbase+n)), 0.D0 ,kind=DP) 799 ! 800 END DO 801 ! 802 ! ... here compute the hpsi and spsi of the new functions 803 ! 804 CALL h_psi( npwx, npw, notcnv, psi(1,nb1), hpsi(1,nb1) ) ; nhpsi = nhpsi + notcnv 805 ! 806 IF ( uspp ) CALL s_psi( npwx, npw, notcnv, psi(1,nb1), spsi(1,nb1) ) 807 ! 808 ! ... update the reduced hamiltonian 809 ! 810 CALL start_clock( 'regterg:overlap' ) 811 ! 812 ! we need to save the old descriptor in order to redistribute matrices 813 ! 814 idesc_old = idesc 815 ! 816 ! ... RE-Initialize the matrix descriptor 817 ! 818 CALL desc_init( nbase+notcnv, nx, la_proc, idesc, rank_ip, irc_ip, nrc_ip ) 819 ! 820 IF( la_proc ) THEN 821 822 ! redistribute hl and sl (see dsqmred), since the dimension of the subspace has changed 823 ! 824 vl = hl 825 DEALLOCATE( hl ) 826 ALLOCATE( hl( nx , nx ), STAT=ierr ) 827 IF( ierr /= 0 ) & 828 CALL errore( 'pregterg ',' cannot allocate hl ', ABS(ierr) ) 829 830 CALL laxlib_dsqmred( nbase, vl, idesc_old(LAX_DESC_NRCX), idesc_old, nbase+notcnv, hl, nx, idesc ) 831 832 vl = sl 833 DEALLOCATE( sl ) 834 ALLOCATE( sl( nx , nx ), STAT=ierr ) 835 IF( ierr /= 0 ) & 836 CALL errore( 'pregterg ',' cannot allocate sl ', ABS(ierr) ) 837 838 CALL laxlib_dsqmred( nbase, vl, idesc_old(LAX_DESC_NRCX), idesc_old, nbase+notcnv, sl, nx, idesc ) 839 840 DEALLOCATE( vl ) 841 ALLOCATE( vl( nx , nx ), STAT=ierr ) 842 IF( ierr /= 0 ) & 843 CALL errore( 'pregterg ',' cannot allocate vl ', ABS(ierr) ) 844 845 END IF 846 ! 847 ! 848 CALL update_distmat( hl, psi, hpsi ) 849 ! 850 IF ( uspp ) THEN 851 ! 852 CALL update_distmat( sl, psi, spsi ) 853 ! 854 ELSE 855 ! 856 CALL update_distmat( sl, psi, psi ) 857 ! 858 END IF 859 ! 860 CALL stop_clock( 'regterg:overlap' ) 861 ! 862 nbase = nbase + notcnv 863 ! 864 ! ... diagonalize the reduced hamiltonian 865 ! Call block parallel algorithm 866 ! 867 CALL start_clock( 'regterg:diag' ) 868 IF ( do_distr_diag_inside_bgrp ) THEN ! NB on output of pdiaghg ew and vl are the same across ortho_parent_comm 869 ! only the first bgrp performs the diagonalization 870 IF( my_bgrp_id == root_bgrp_id ) CALL pdiaghg( nbase, hl, sl, nx, ew, vl, idesc ) 871 IF( nbgrp > 1 ) THEN ! results must be brodcast to the other bnd groups 872 CALL mp_bcast( vl, root_bgrp_id, inter_bgrp_comm ) 873 CALL mp_bcast( ew, root_bgrp_id, inter_bgrp_comm ) 874 ENDIF 875 ELSE 876 CALL pdiaghg( nbase, hl, sl, nx, ew, vl, idesc ) 877 END IF 878 CALL stop_clock( 'regterg:diag' ) 879 ! 880 ! ... test for convergence 881 ! 882 WHERE( btype(1:nvec) == 1 ) 883 ! 884 conv(1:nvec) = ( ( ABS( ew(1:nvec) - e(1:nvec) ) < ethr ) ) 885 ! 886 ELSEWHERE 887 ! 888 conv(1:nvec) = ( ( ABS( ew(1:nvec) - e(1:nvec) ) < empty_ethr ) ) 889 ! 890 END WHERE 891 ! ... next line useful for band parallelization of exact exchange 892 IF ( nbgrp > 1 ) CALL mp_bcast(conv,root_bgrp_id,inter_bgrp_comm) 893 ! 894 notcnv = COUNT( .NOT. conv(:) ) 895 ! 896 e(1:nvec) = ew(1:nvec) 897 ! 898 ! ... if overall convergence has been achieved, or the dimension of 899 ! ... the reduced basis set is becoming too large, or in any case if 900 ! ... we are at the last iteration refresh the basis set. i.e. replace 901 ! ... the first nvec elements with the current estimate of the 902 ! ... eigenvectors; set the basis dimension to nvec. 903 ! 904 IF ( notcnv == 0 .OR. nbase+notcnv > nvecx .OR. dav_iter == maxter ) THEN 905 ! 906 CALL start_clock( 'regterg:last' ) 907 ! 908 CALL refresh_evc() 909 ! 910 IF ( notcnv == 0 ) THEN 911 ! 912 ! ... all roots converged: return 913 ! 914 CALL stop_clock( 'regterg:last' ) 915 ! 916 EXIT iterate 917 ! 918 ELSE IF ( dav_iter == maxter ) THEN 919 ! 920 ! ... last iteration, some roots not converged: return 921 ! 922 !!!WRITE( stdout, '(5X,"WARNING: ",I5, & 923 !!! & " eigenvalues not converged in regterg")' ) notcnv 924 ! 925 CALL stop_clock( 'regterg:last' ) 926 ! 927 EXIT iterate 928 ! 929 END IF 930 ! 931 ! ... refresh psi, H*psi and S*psi 932 ! 933 psi(:,1:nvec) = evc(:,1:nvec) 934 ! 935 IF ( uspp ) THEN 936 ! 937 CALL refresh_spsi() 938 ! 939 END IF 940 ! 941 CALL refresh_hpsi() 942 ! 943 ! ... refresh the reduced hamiltonian 944 ! 945 nbase = nvec 946 ! 947 CALL desc_init( nvec, nx, la_proc, idesc, rank_ip, irc_ip, nrc_ip ) 948 ! 949 IF( la_proc ) THEN 950 ! 951 ! note that nx has been changed by desc_init 952 ! we need to re-alloc with the new size. 953 ! 954 DEALLOCATE( vl, hl, sl ) 955 ALLOCATE( vl( nx, nx ), STAT=ierr ) 956 IF( ierr /= 0 ) & 957 CALL errore( 'pregterg ',' cannot allocate vl ', ABS(ierr) ) 958 ALLOCATE( hl( nx, nx ), STAT=ierr ) 959 IF( ierr /= 0 ) & 960 CALL errore( 'pregterg ',' cannot allocate hl ', ABS(ierr) ) 961 ALLOCATE( sl( nx, nx ), STAT=ierr ) 962 IF( ierr /= 0 ) & 963 CALL errore( 'pregterg ',' cannot allocate sl ', ABS(ierr) ) 964 ! 965 END IF 966 ! 967 CALL set_h_from_e( ) 968 ! 969 CALL set_to_identity( vl, idesc ) 970 CALL set_to_identity( sl, idesc ) 971 ! 972 CALL stop_clock( 'regterg:last' ) 973 ! 974 END IF 975 ! 976 END DO iterate 977 ! 978 DEALLOCATE( vl, hl, sl ) 979 ! 980 DEALLOCATE( rank_ip ) 981 DEALLOCATE( irc_ip ) 982 DEALLOCATE( nrc_ip ) 983 DEALLOCATE( ic_notcnv ) 984 DEALLOCATE( notcnv_ip ) 985 DEALLOCATE( conv ) 986 DEALLOCATE( ew ) 987 ! 988 IF ( uspp ) DEALLOCATE( spsi ) 989 ! 990 DEALLOCATE( hpsi ) 991 DEALLOCATE( psi ) 992 ! 993 CALL stop_clock( 'regterg' ) 994 !call print_clock( 'regterg' ) 995 !call print_clock( 'regterg:init' ) 996 !call print_clock( 'regterg:diag' ) 997 !call print_clock( 'regterg:update' ) 998 !call print_clock( 'regterg:overlap' ) 999 !call print_clock( 'regterg:last' ) 1000 1001 ! 1002 RETURN 1003 ! 1004 ! 1005CONTAINS 1006 ! 1007 ! 1008 SUBROUTINE set_to_identity( distmat, idesc ) 1009 INTEGER, INTENT(IN) :: idesc(LAX_DESC_SIZE) 1010 REAL(DP), INTENT(OUT) :: distmat(:,:) 1011 INTEGER :: i 1012 distmat = 0_DP 1013 IF( idesc(LAX_DESC_MYC) == idesc(LAX_DESC_MYR) .AND. idesc(LAX_DESC_ACTIVE_NODE) > 0 ) THEN 1014 DO i = 1, idesc(LAX_DESC_NC) 1015 distmat( i, i ) = 1_DP 1016 END DO 1017 END IF 1018 RETURN 1019 END SUBROUTINE set_to_identity 1020 ! 1021 ! 1022 SUBROUTINE reorder_v() 1023 ! 1024 INTEGER :: ipc, ipr 1025 INTEGER :: nc, ic 1026 INTEGER :: nl, npl 1027 ! 1028 np = 0 1029 ! 1030 notcnv_ip = 0 1031 ! 1032 n = 0 1033 ! 1034 DO ipc = 1, idesc(LAX_DESC_NPC) 1035 ! 1036 nc = nrc_ip( ipc ) 1037 ic = irc_ip( ipc ) 1038 ! 1039 npl = 0 1040 ! 1041 IF( ic <= nvec ) THEN 1042 ! 1043 DO nl = 1, min( nvec - ic + 1, nc ) 1044 ! 1045 n = n + 1 1046 ! 1047 IF ( .NOT. conv(n) ) THEN 1048 ! 1049 ! ... this root not yet converged ... 1050 ! 1051 np = np + 1 1052 npl = npl + 1 1053 IF( npl == 1 ) ic_notcnv( ipc ) = np 1054 ! 1055 ! ... reorder eigenvectors so that coefficients for unconverged 1056 ! ... roots come first. This allows to use quick matrix-matrix 1057 ! ... multiplications to set a new basis vector (see below) 1058 ! 1059 notcnv_ip( ipc ) = notcnv_ip( ipc ) + 1 1060 ! 1061 IF ( npl /= nl ) THEN 1062 IF( la_proc .AND. idesc(LAX_DESC_MYC) == ipc-1 ) THEN 1063 vl( :, npl) = vl( :, nl ) 1064 END IF 1065 END IF 1066 ! 1067 ! ... for use in g_psi 1068 ! 1069 ew(nbase+np) = e(n) 1070 ! 1071 END IF 1072 ! 1073 END DO 1074 ! 1075 END IF 1076 ! 1077 END DO 1078 ! 1079 END SUBROUTINE reorder_v 1080 ! 1081 ! 1082 SUBROUTINE hpsi_dot_v() 1083 ! 1084 INTEGER :: ipc, ipr 1085 INTEGER :: nr, nc, ir, ic, notcl, root, np 1086 REAL(DP), ALLOCATABLE :: vtmp( :, : ) 1087 COMPLEX(DP), ALLOCATABLE :: ptmp( :, : ) 1088 REAL(DP) :: beta 1089 1090 ALLOCATE( vtmp( nx, nx ) ) 1091 ALLOCATE( ptmp( npwx, nx ) ) 1092 1093 DO ipc = 1, idesc(LAX_DESC_NPC) 1094 ! 1095 IF( notcnv_ip( ipc ) > 0 ) THEN 1096 1097 notcl = notcnv_ip( ipc ) 1098 ic = ic_notcnv( ipc ) 1099 1100 beta = 0.0d0 1101 1102 DO ipr = 1, idesc(LAX_DESC_NPR) 1103 ! 1104 nr = nrc_ip( ipr ) 1105 ir = irc_ip( ipr ) 1106 ! 1107 root = rank_ip( ipr, ipc ) 1108 1109 IF( ipr-1 == idesc(LAX_DESC_MYR) .AND. ipc-1 == idesc(LAX_DESC_MYC) .AND. la_proc ) THEN 1110 vtmp(:,1:notcl) = vl(:,1:notcl) 1111 END IF 1112 1113 CALL mp_bcast( vtmp(:,1:notcl), root, ortho_parent_comm ) 1114 ! 1115 IF ( uspp ) THEN 1116 ! 1117 CALL DGEMM( 'N', 'N', npw2, notcl, nr, 1.D0, & 1118 spsi( 1, ir ), npwx2, vtmp, nx, beta, psi(1,nb1+ic-1), npwx2 ) 1119 ! 1120 ELSE 1121 ! 1122 CALL DGEMM( 'N', 'N', npw2, notcl, nr, 1.D0, & 1123 psi( 1, ir ), npwx2, vtmp, nx, beta, psi(1,nb1+ic-1), npwx2 ) 1124 ! 1125 END IF 1126 ! 1127 CALL DGEMM( 'N', 'N', npw2, notcl, nr, 1.D0, & 1128 hpsi( 1, ir ), npwx2, vtmp, nx, beta, ptmp, npwx2 ) 1129 1130 beta = 1.0d0 1131 1132 END DO 1133 1134 DO np = 1, notcl 1135 ! 1136 psi(1:npw,nbase+np+ic-1) = ptmp(1:npw,np) - ew(nbase+np+ic-1) * psi(1:npw,nbase+np+ic-1) 1137 ! 1138 END DO 1139 ! 1140 END IF 1141 ! 1142 END DO 1143 1144 1145 DEALLOCATE( vtmp ) 1146 DEALLOCATE( ptmp ) 1147 1148 RETURN 1149 END SUBROUTINE hpsi_dot_v 1150 ! 1151 ! 1152 SUBROUTINE refresh_evc( ) 1153 ! 1154 INTEGER :: ipc, ipr 1155 INTEGER :: nr, nc, ir, ic, root 1156 REAL(DP), ALLOCATABLE :: vtmp( :, : ) 1157 REAL(DP) :: beta 1158 1159 ALLOCATE( vtmp( nx, nx ) ) 1160 ! 1161 DO ipc = 1, idesc(LAX_DESC_NPC) 1162 ! 1163 nc = nrc_ip( ipc ) 1164 ic = irc_ip( ipc ) 1165 ! 1166 IF( ic <= nvec ) THEN 1167 ! 1168 nc = min( nc, nvec - ic + 1 ) 1169 ! 1170 beta = 0.0d0 1171 1172 DO ipr = 1, idesc(LAX_DESC_NPR) 1173 ! 1174 nr = nrc_ip( ipr ) 1175 ir = irc_ip( ipr ) 1176 ! 1177 root = rank_ip( ipr, ipc ) 1178 1179 IF( ipr-1 == idesc(LAX_DESC_MYR) .AND. ipc-1 == idesc(LAX_DESC_MYC) .AND. la_proc ) THEN 1180 ! 1181 ! this proc sends his block 1182 ! 1183 CALL mp_bcast( vl(:,1:nc), root, ortho_parent_comm ) 1184 CALL DGEMM( 'N', 'N', npw2, nc, nr, 1.D0, & 1185 psi(1,ir), npwx2, vl, nx, beta, evc(1,ic), npwx2 ) 1186 ELSE 1187 ! 1188 ! all other procs receive 1189 ! 1190 CALL mp_bcast( vtmp(:,1:nc), root, ortho_parent_comm ) 1191 CALL DGEMM( 'N', 'N', npw2, nc, nr, 1.D0, & 1192 psi(1,ir), npwx2, vtmp, nx, beta, evc(1,ic), npwx2 ) 1193 END IF 1194 ! 1195 1196 beta = 1.0d0 1197 1198 END DO 1199 ! 1200 END IF 1201 ! 1202 END DO 1203 ! 1204 DEALLOCATE( vtmp ) 1205 1206 RETURN 1207 END SUBROUTINE refresh_evc 1208 ! 1209 ! 1210 SUBROUTINE refresh_spsi( ) 1211 ! 1212 INTEGER :: ipc, ipr 1213 INTEGER :: nr, nc, ir, ic, root 1214 REAL(DP), ALLOCATABLE :: vtmp( :, : ) 1215 REAL(DP) :: beta 1216 1217 ALLOCATE( vtmp( nx, nx ) ) 1218 ! 1219 DO ipc = 1, idesc(LAX_DESC_NPC) 1220 ! 1221 nc = nrc_ip( ipc ) 1222 ic = irc_ip( ipc ) 1223 ! 1224 IF( ic <= nvec ) THEN 1225 ! 1226 nc = min( nc, nvec - ic + 1 ) 1227 ! 1228 beta = 0_DP 1229 ! 1230 DO ipr = 1, idesc(LAX_DESC_NPR) 1231 ! 1232 nr = nrc_ip( ipr ) 1233 ir = irc_ip( ipr ) 1234 ! 1235 root = rank_ip( ipr, ipc ) 1236 1237 IF( ipr-1 == idesc(LAX_DESC_MYR) .AND. ipc-1 == idesc(LAX_DESC_MYC) .AND. la_proc ) THEN 1238 ! 1239 ! this proc sends his block 1240 ! 1241 CALL mp_bcast( vl(:,1:nc), root, ortho_parent_comm ) 1242 CALL DGEMM( 'N', 'N', npw2, nc, nr, 1.D0, & 1243 spsi(1,ir), npwx2, vl, nx, beta, psi(1,nvec+ic), npwx2 ) 1244 ELSE 1245 ! 1246 ! all other procs receive 1247 ! 1248 CALL mp_bcast( vtmp(:,1:nc), root, ortho_parent_comm ) 1249 CALL DGEMM( 'N', 'N', npw2, nc, nr, 1.D0, & 1250 spsi(1,ir), npwx2, vtmp, nx, beta, psi(1,nvec+ic), npwx2 ) 1251 END IF 1252 ! 1253 beta = 1_DP 1254 1255 END DO 1256 ! 1257 END IF 1258 ! 1259 END DO 1260 ! 1261 spsi(:,1:nvec) = psi(:,nvec+1:nvec+nvec) 1262 ! 1263 DEALLOCATE( vtmp ) 1264 1265 RETURN 1266 END SUBROUTINE refresh_spsi 1267 ! 1268 ! 1269 ! 1270 SUBROUTINE refresh_hpsi( ) 1271 ! 1272 INTEGER :: ipc, ipr 1273 INTEGER :: nr, nc, ir, ic, root 1274 REAL(DP), ALLOCATABLE :: vtmp( :, : ) 1275 REAL(DP) :: beta 1276 1277 ALLOCATE( vtmp( nx, nx ) ) 1278 ! 1279 DO ipc = 1, idesc(LAX_DESC_NPC) 1280 ! 1281 nc = nrc_ip( ipc ) 1282 ic = irc_ip( ipc ) 1283 ! 1284 IF( ic <= nvec ) THEN 1285 ! 1286 nc = min( nc, nvec - ic + 1 ) 1287 ! 1288 beta = 0.0d0 1289 ! 1290 DO ipr = 1, idesc(LAX_DESC_NPR) 1291 ! 1292 nr = nrc_ip( ipr ) 1293 ir = irc_ip( ipr ) 1294 ! 1295 root = rank_ip( ipr, ipc ) 1296 1297 IF( ipr-1 == idesc(LAX_DESC_MYR) .AND. ipc-1 == idesc(LAX_DESC_MYC) .AND. la_proc ) THEN 1298 ! 1299 ! this proc sends his block 1300 ! 1301 CALL mp_bcast( vl(:,1:nc), root, ortho_parent_comm ) 1302 CALL DGEMM( 'N', 'N', npw2, nc, nr, 1.D0, & 1303 hpsi(1,ir), npwx2, vl, nx, beta, psi(1,nvec+ic), npwx2 ) 1304 ELSE 1305 ! 1306 ! all other procs receive 1307 ! 1308 CALL mp_bcast( vtmp(:,1:nc), root, ortho_parent_comm ) 1309 CALL DGEMM( 'N', 'N', npw2, nc, nr, 1.D0, & 1310 hpsi(1,ir), npwx2, vtmp, nx, beta, psi(1,nvec+ic), npwx2 ) 1311 END IF 1312 ! 1313 beta = 1.0d0 1314 1315 END DO 1316 ! 1317 END IF 1318 ! 1319 END DO 1320 ! 1321 DEALLOCATE( vtmp ) 1322 1323 hpsi(:,1:nvec) = psi(:,nvec+1:nvec+nvec) 1324 1325 RETURN 1326 END SUBROUTINE refresh_hpsi 1327 ! 1328 ! 1329 1330 SUBROUTINE compute_distmat( dm, v, w ) 1331 ! 1332 ! This subroutine compute <vi|wj> and store the 1333 ! result in distributed matrix dm 1334 ! 1335 INTEGER :: ipc, ipr 1336 INTEGER :: nr, nc, ir, ic, root 1337 REAL(DP), INTENT(OUT) :: dm( :, : ) 1338 COMPLEX(DP) :: v(:,:), w(:,:) 1339 REAL(DP), ALLOCATABLE :: work( :, : ) 1340 ! 1341 ALLOCATE( work( nx, nx ) ) 1342 ! 1343 work = 0.0d0 1344 ! 1345 DO ipc = 1, idesc(LAX_DESC_NPC) ! loop on column procs 1346 ! 1347 nc = nrc_ip( ipc ) 1348 ic = irc_ip( ipc ) 1349 ! 1350 DO ipr = 1, ipc ! use symmetry for the loop on row procs 1351 ! 1352 nr = nrc_ip( ipr ) 1353 ir = irc_ip( ipr ) 1354 ! 1355 ! rank of the processor for which this block (ipr,ipc) is destinated 1356 ! 1357 root = rank_ip( ipr, ipc ) 1358 1359 ! use blas subs. on the matrix block 1360 1361 CALL DGEMM( 'T', 'N', nr, nc, npw2, 2.D0 , & 1362 v(1,ir), npwx2, w(1,ic), npwx2, 0.D0, work, nx ) 1363 1364 IF ( gstart == 2 ) & 1365 CALL DGER( nr, nc, -1.D0, v(1,ir), npwx2, w(1,ic), npwx2, work, nx ) 1366 1367 ! accumulate result on dm of root proc. 1368 1369 CALL mp_root_sum( work, dm, root, ortho_parent_comm ) 1370 1371 END DO 1372 ! 1373 END DO 1374 IF (ortho_parent_comm.ne.intra_bgrp_comm .and. nbgrp > 1) dm = dm/nbgrp 1375 ! 1376 CALL laxlib_dsqmsym( nbase, dm, nx, idesc ) 1377 ! 1378 DEALLOCATE( work ) 1379 ! 1380 RETURN 1381 END SUBROUTINE compute_distmat 1382 ! 1383 ! 1384 SUBROUTINE update_distmat( dm, v, w ) 1385 ! 1386 INTEGER :: ipc, ipr 1387 INTEGER :: nr, nc, ir, ic, root, icc, ii 1388 REAL(DP) :: dm( :, : ) 1389 COMPLEX(DP) :: v(:,:), w(:,:) 1390 REAL(DP), ALLOCATABLE :: vtmp( :, : ) 1391 1392 ALLOCATE( vtmp( nx, nx ) ) 1393 ! 1394 vtmp = 0.0d0 1395 ! 1396 DO ipc = 1, idesc(LAX_DESC_NPC) 1397 ! 1398 nc = nrc_ip( ipc ) 1399 ic = irc_ip( ipc ) 1400 ! 1401 IF( ic+nc-1 >= nb1 ) THEN 1402 1403 nc = MIN( nc, ic+nc-1 - nb1 + 1 ) 1404 IF( ic >= nb1 ) THEN 1405 ii = ic 1406 icc = 1 1407 ELSE 1408 ii = nb1 1409 icc = nb1-ic+1 1410 END IF 1411 1412 DO ipr = 1, ipc ! desc%npr use symmetry 1413 ! 1414 nr = nrc_ip( ipr ) 1415 ir = irc_ip( ipr ) 1416 ! 1417 root = rank_ip( ipr, ipc ) 1418 1419 CALL DGEMM( 'T', 'N', nr, nc, npw2, 2.D0, v( 1, ir ), & 1420 npwx2, w(1,ii), npwx2, 0.D0, vtmp, nx ) 1421 ! 1422 IF ( gstart == 2 ) & 1423 CALL DGER( nr, nc, -1.D0, v( 1, ir ), npwx2, w(1,ii), npwx2, vtmp, nx ) 1424 IF (ortho_parent_comm.ne.intra_bgrp_comm .and. nbgrp > 1) vtmp = vtmp/nbgrp 1425 1426 IF( (idesc(LAX_DESC_ACTIVE_NODE) > 0) .AND. & 1427 (ipr-1 == idesc(LAX_DESC_MYR)) .AND. (ipc-1 == idesc(LAX_DESC_MYC)) ) THEN 1428 CALL mp_root_sum( vtmp(:,1:nc), dm(:,icc:icc+nc-1), root, ortho_parent_comm ) 1429 ELSE 1430 CALL mp_root_sum( vtmp(:,1:nc), dm, root, ortho_parent_comm ) 1431 END IF 1432 1433 1434 END DO 1435 ! 1436 END IF 1437 ! 1438 END DO 1439 ! 1440 CALL laxlib_dsqmsym( nbase+notcnv, dm, nx, idesc ) 1441 ! 1442 DEALLOCATE( vtmp ) 1443 RETURN 1444 END SUBROUTINE update_distmat 1445 ! 1446 ! 1447 ! 1448 SUBROUTINE set_e_from_h() 1449 INTEGER :: nc, ic, i 1450 e(1:nbase) = 0.0d0 1451 IF( idesc(LAX_DESC_MYC) == idesc(LAX_DESC_MYR) .AND. la_proc ) THEN 1452 nc = idesc(LAX_DESC_NC) 1453 ic = idesc(LAX_DESC_IC) 1454 DO i = 1, nc 1455 e( i + ic - 1 ) = hl( i, i ) 1456 END DO 1457 END IF 1458 CALL mp_sum( e(1:nbase), ortho_parent_comm ) 1459 RETURN 1460 END SUBROUTINE set_e_from_h 1461 ! 1462 SUBROUTINE set_h_from_e() 1463 INTEGER :: nc, ic, i 1464 IF( la_proc ) THEN 1465 hl = 0.0d0 1466 IF( idesc(LAX_DESC_MYC) == idesc(LAX_DESC_MYR) ) THEN 1467 nc = idesc(LAX_DESC_NC) 1468 ic = idesc(LAX_DESC_IC) 1469 DO i = 1, nc 1470 hl(i,i) = e( i + ic - 1 ) 1471 END DO 1472 END IF 1473 END IF 1474 RETURN 1475 END SUBROUTINE set_h_from_e 1476 ! 1477END SUBROUTINE pregterg 1478