1! 2! Copyright (C) 2001-2015 Quantum ESPRESSO group 3! This file is distributed under the terms of the 4! GNU General Public License. See the file `License' 5! in the root directory of the present distribution, 6! or http://www.gnu.org/copyleft/gpl.txt . 7! 8#define ZERO ( 0.D0, 0.D0 ) 9#define ONE ( 1.D0, 0.D0 ) 10! 11!---------------------------------------------------------------------------- 12SUBROUTINE cegterg( h_psi, s_psi, uspp, g_psi, & 13 npw, npwx, nvec, nvecx, npol, evc, ethr, & 14 e, btype, notcnv, lrot, dav_iter, nhpsi ) 15 !---------------------------------------------------------------------------- 16 ! 17 ! ... iterative solution of the eigenvalue problem: 18 ! 19 ! ... ( H - e S ) * evc = 0 20 ! 21 ! ... where H is an hermitean operator, e is a real scalar, 22 ! ... S is an overlap matrix, evc is a complex vector 23 ! 24 USE util_param, ONLY : DP 25 USE mp_bands_util, ONLY : intra_bgrp_comm, inter_bgrp_comm, root_bgrp_id,& 26 nbgrp, my_bgrp_id, me_bgrp, root_bgrp 27 USE mp, ONLY : mp_sum, mp_gather, mp_bcast, mp_size,& 28 mp_type_create_column_section, mp_type_free 29 ! 30 IMPLICIT NONE 31 ! 32 include 'laxlib.fh' 33 ! 34 INTEGER, INTENT(IN) :: npw, npwx, nvec, nvecx, npol 35 ! dimension of the matrix to be diagonalized 36 ! leading dimension of matrix evc, as declared in the calling pgm unit 37 ! integer number of searched low-lying roots 38 ! maximum dimension of the reduced basis set : 39 ! (the basis set is refreshed when its dimension would exceed nvecx) 40 ! umber of spin polarizations 41 INTEGER, PARAMETER :: blocksize = 256 42 INTEGER :: numblock 43 ! chunking parameters 44 COMPLEX(DP), INTENT(INOUT) :: evc(npwx*npol,nvec) 45 ! evc contains the refined estimates of the eigenvectors 46 REAL(DP), INTENT(IN) :: ethr 47 ! energy threshold for convergence : 48 ! root improvement is stopped, when two consecutive estimates of the root 49 ! differ by less than ethr. 50 LOGICAL, INTENT(IN) :: uspp 51 ! if .FALSE. : do not calculate S|psi> 52 INTEGER, INTENT(IN) :: btype(nvec) 53 ! band type ( 1 = occupied, 0 = empty ) 54 LOGICAL, INTENT(IN) :: lrot 55 ! .TRUE. if the wfc have already been rotated 56 REAL(DP), INTENT(OUT) :: e(nvec) 57 ! contains the estimated roots. 58 INTEGER, INTENT(OUT) :: dav_iter, notcnv 59 ! integer number of iterations performed 60 ! number of unconverged roots 61 INTEGER, INTENT(OUT) :: nhpsi 62 ! total number of indivitual hpsi 63 ! 64 ! ... LOCAL variables 65 ! 66 INTEGER, PARAMETER :: maxter = 20 67 ! maximum number of iterations 68 ! 69 INTEGER :: kter, nbase, np, kdim, kdmx, n, m, ipol, nb1, nbn 70 ! counter on iterations 71 ! dimension of the reduced basis 72 ! counter on the reduced basis vectors 73 ! adapted npw and npwx 74 ! do-loop counters 75 INTEGER :: n_start, n_end, my_n 76 INTEGER :: column_section_type 77 ! defines a column section for communication 78 INTEGER :: ierr 79 COMPLEX(DP), ALLOCATABLE :: hc(:,:), sc(:,:), vc(:,:) 80 ! Hamiltonian on the reduced basis 81 ! S matrix on the reduced basis 82 ! the eigenvectors of the Hamiltonian 83 REAL(DP), ALLOCATABLE :: ew(:) 84 ! eigenvalues of the reduced hamiltonian 85 COMPLEX(DP), ALLOCATABLE :: psi(:,:), hpsi(:,:), spsi(:,:) 86 ! work space, contains psi 87 ! the product of H and psi 88 ! the product of S and psi 89 LOGICAL, ALLOCATABLE :: conv(:) 90 ! true if the root is converged 91 REAL(DP) :: empty_ethr 92 ! threshold for empty bands 93 INTEGER, ALLOCATABLE :: recv_counts(:), displs(:) 94 ! receive counts and memory offsets 95 ! 96 REAL(DP), EXTERNAL :: ddot 97 ! 98 EXTERNAL h_psi, s_psi, g_psi 99 ! h_psi(npwx,npw,nvec,psi,hpsi) 100 ! calculates H|psi> 101 ! s_psi(npwx,npw,nvec,spsi) 102 ! calculates S|psi> (if needed) 103 ! Vectors psi,hpsi,spsi are dimensioned (npwx*npol,nvec) 104 ! g_psi(npwx,npw,notcnv,psi,e) 105 ! calculates (diag(h)-e)^-1 * psi, diagonal approx. to (h-e)^-1*psi 106 ! the first nvec columns contain the trial eigenvectors 107 ! 108 nhpsi = 0 109 CALL start_clock( 'cegterg' ); !write(*,*) 'start cegterg' ; FLUSH(6) 110 ! 111 IF ( nvec > nvecx / 2 ) CALL errore( 'cegterg', 'nvecx is too small', 1 ) 112 ! 113 ! ... threshold for empty bands 114 ! 115 empty_ethr = MAX( ( ethr * 5.D0 ), 1.D-5 ) 116 ! 117 IF ( npol == 1 ) THEN 118 ! 119 kdim = npw 120 kdmx = npwx 121 ! 122 ELSE 123 ! 124 kdim = npwx*npol 125 kdmx = npwx*npol 126 ! 127 END IF 128 ! 129 ! compute the number of chuncks 130 numblock = (npw+blocksize-1)/blocksize 131 ! 132 ALLOCATE( psi( npwx*npol, nvecx ), STAT=ierr ) 133 IF( ierr /= 0 ) & 134 CALL errore( ' cegterg ',' cannot allocate psi ', ABS(ierr) ) 135 ALLOCATE( hpsi( npwx*npol, nvecx ), STAT=ierr ) 136 IF( ierr /= 0 ) & 137 CALL errore( ' cegterg ',' cannot allocate hpsi ', ABS(ierr) ) 138 ! 139 IF ( uspp ) THEN 140 ALLOCATE( spsi( npwx*npol, nvecx ), STAT=ierr ) 141 IF( ierr /= 0 ) & 142 CALL errore( ' cegterg ',' cannot allocate spsi ', ABS(ierr) ) 143 END IF 144 ! 145 ALLOCATE( sc( nvecx, nvecx ), STAT=ierr ) 146 IF( ierr /= 0 ) & 147 CALL errore( ' cegterg ',' cannot allocate sc ', ABS(ierr) ) 148 ALLOCATE( hc( nvecx, nvecx ), STAT=ierr ) 149 IF( ierr /= 0 ) & 150 CALL errore( ' cegterg ',' cannot allocate hc ', ABS(ierr) ) 151 ALLOCATE( vc( nvecx, nvecx ), STAT=ierr ) 152 IF( ierr /= 0 ) & 153 CALL errore( ' cegterg ',' cannot allocate vc ', ABS(ierr) ) 154 ALLOCATE( ew( nvecx ), STAT=ierr ) 155 IF( ierr /= 0 ) & 156 CALL errore( ' cegterg ',' cannot allocate ew ', ABS(ierr) ) 157 ALLOCATE( conv( nvec ), STAT=ierr ) 158 IF( ierr /= 0 ) & 159 CALL errore( ' cegterg ',' cannot allocate conv ', ABS(ierr) ) 160 ALLOCATE( recv_counts(mp_size(inter_bgrp_comm)), displs(mp_size(inter_bgrp_comm)) ) 161 ! 162 notcnv = nvec 163 nbase = nvec 164 conv = .FALSE. 165 ! 166 CALL threaded_memcpy(psi, evc, nvec*npol*npwx*2) 167 ! 168 ! ... hpsi contains h times the basis vectors 169 ! 170 CALL h_psi( npwx, npw, nvec, psi, hpsi ) ; nhpsi = nhpsi + nvec 171 ! 172 ! ... spsi contains s times the basis vectors 173 ! 174 IF ( uspp ) CALL s_psi( npwx, npw, nvec, psi, spsi ) 175 ! 176 ! ... hc contains the projection of the hamiltonian onto the reduced 177 ! ... space vc contains the eigenvectors of hc 178 ! 179 CALL start_clock( 'cegterg:init' ) 180 ! 181 CALL divide_all(inter_bgrp_comm,nbase,n_start,n_end,recv_counts,displs) 182 CALL mp_type_create_column_section(sc(1,1), 0, nbase, nvecx, column_section_type) 183 my_n = n_end - n_start + 1; !write (*,*) nbase,n_start,n_end 184 ! 185 if (n_start .le. n_end) & 186 CALL ZGEMM( 'C','N', nbase, my_n, kdim, ONE, psi, kdmx, hpsi(1,n_start), kdmx, ZERO, hc(1,n_start), nvecx ) 187 ! 188 if (n_start .le. n_end) CALL mp_sum( hc( 1:nbase, n_start:n_end ), intra_bgrp_comm ) 189 CALL mp_gather( hc, column_section_type, recv_counts, displs, root_bgrp_id, inter_bgrp_comm ) 190 ! 191 IF ( uspp ) THEN 192 ! 193 if (n_start .le. n_end) & 194 CALL ZGEMM( 'C','N', nbase, my_n, kdim, ONE, psi, kdmx, spsi(1,n_start), kdmx, & 195 ZERO, sc(1,n_start), nvecx ) 196 ! 197 ELSE 198 ! 199 if (n_start .le. n_end) & 200 CALL ZGEMM( 'C','N', nbase, my_n, kdim, ONE, psi, kdmx, psi(1,n_start), kdmx, & 201 ZERO, sc(1,n_start), nvecx ) 202 ! 203 END IF 204 ! 205 if (n_start .le. n_end) CALL mp_sum( sc( 1:nbase, n_start:n_end ), intra_bgrp_comm ) 206 CALL mp_gather( sc, column_section_type, recv_counts, displs, root_bgrp_id, inter_bgrp_comm ) 207 ! 208 CALL mp_type_free( column_section_type ) 209 ! 210 DO n = 1, nbase 211 ! 212 ! ... the diagonal of hc and sc must be strictly real 213 ! 214 hc(n,n) = CMPLX( REAL( hc(n,n) ), 0.D0 ,kind=DP) 215 sc(n,n) = CMPLX( REAL( sc(n,n) ), 0.D0 ,kind=DP) 216 ! 217 DO m = n + 1, nbase 218 ! 219 hc(n,m) = CONJG( hc(m,n) ) 220 sc(n,m) = CONJG( sc(m,n) ) 221 ! 222 END DO 223 ! 224 END DO 225 ! 226 CALL stop_clock( 'cegterg:init' ) 227 ! 228 IF ( lrot ) THEN 229 ! 230 vc(1:nbase,1:nbase) = ZERO 231 ! 232 DO n = 1, nbase 233 ! 234 e(n) = REAL( hc(n,n) ) 235 ! 236 vc(n,n) = ONE 237 ! 238 END DO 239 ! 240 CALL mp_bcast( e, root_bgrp_id, inter_bgrp_comm ) 241 ! 242 ELSE 243 ! 244 ! ... diagonalize the reduced hamiltonian 245 ! 246 CALL start_clock( 'cegterg:diag' ) 247 IF( my_bgrp_id == root_bgrp_id ) THEN 248 CALL diaghg( nbase, nvec, hc, sc, nvecx, ew, vc, me_bgrp, root_bgrp, intra_bgrp_comm ) 249 END IF 250 IF( nbgrp > 1 ) THEN 251 CALL mp_bcast( vc, root_bgrp_id, inter_bgrp_comm ) 252 CALL mp_bcast( ew, root_bgrp_id, inter_bgrp_comm ) 253 ENDIF 254 CALL stop_clock( 'cegterg:diag' ) 255 ! 256 e(1:nvec) = ew(1:nvec) 257 ! 258 END IF 259 ! 260 ! ... iterate 261 ! 262 iterate: DO kter = 1, maxter 263 ! 264 dav_iter = kter ; !write(*,*) kter, notcnv, conv 265 ! 266 CALL start_clock( 'cegterg:update' ) 267 ! 268 np = 0 269 ! 270 DO n = 1, nvec 271 ! 272 IF ( .NOT. conv(n) ) THEN 273 ! 274 ! ... this root not yet converged ... 275 ! 276 np = np + 1 277 ! 278 ! ... reorder eigenvectors so that coefficients for unconverged 279 ! ... roots come first. This allows to use quick matrix-matrix 280 ! ... multiplications to set a new basis vector (see below) 281 ! 282 IF ( np /= n ) vc(:,np) = vc(:,n) 283 ! 284 ! ... for use in g_psi 285 ! 286 ew(nbase+np) = e(n) 287 ! 288 END IF 289 ! 290 END DO 291 ! 292 nb1 = nbase + 1 293 ! 294 ! ... expand the basis set with new basis vectors ( H - e*S )|psi> ... 295 ! 296 CALL divide(inter_bgrp_comm,nbase,n_start,n_end) 297 my_n = n_end - n_start + 1; !write (*,*) nbase,n_start,n_end 298 IF ( uspp ) THEN 299 ! 300 if (n_start .le. n_end) & 301 CALL ZGEMM( 'N','N', kdim, notcnv, my_n, ONE, spsi(1,n_start), kdmx, vc(n_start,1), nvecx, & 302 ZERO, psi(1,nb1), kdmx ) 303 ! 304 ELSE 305 ! 306 if (n_start .le. n_end) & 307 CALL ZGEMM( 'N','N', kdim, notcnv, my_n, ONE, psi(1,n_start), kdmx, vc(n_start,1), nvecx, & 308 ZERO, psi(1,nb1), kdmx ) 309 ! 310 END IF 311! NB: must not call mp_sum over inter_bgrp_comm here because it is done later to the full correction 312 ! 313 !$omp parallel do collapse(3) 314 DO n = 1, notcnv 315 DO ipol = 1, npol 316 DO m = 1, numblock 317 psi( (m-1)*blocksize+(ipol-1)*npwx+1: & 318 MIN(npw, m*blocksize)+(ipol-1)*npwx,nbase+n) = & 319 - ew(nbase+n) * & 320 psi( (m-1)*blocksize+(ipol-1)*npwx+1: & 321 MIN(npw, m*blocksize)+(ipol-1)*npwx,nbase+n) 322 END DO 323 END DO 324 END DO 325 !$omp end parallel do 326 ! 327 if (n_start .le. n_end) & 328 CALL ZGEMM( 'N','N', kdim, notcnv, my_n, ONE, hpsi(1,n_start), kdmx, vc(n_start,1), nvecx, & 329 ONE, psi(1,nb1), kdmx ) 330 CALL mp_sum( psi(:,nb1:nbase+notcnv), inter_bgrp_comm ) 331 ! 332 ! clean up garbage if there is any 333 IF (npw < npwx) psi(npw+1:npwx,nb1:nbase+notcnv) = ZERO 334 IF (npol == 2) psi(npwx+npw+1:2*npwx,nb1:nbase+notcnv) = ZERO 335 ! 336 CALL stop_clock( 'cegterg:update' ) 337 ! 338 ! ... approximate inverse iteration 339 ! 340 CALL g_psi( npwx, npw, notcnv, npol, psi(1,nb1), ew(nb1) ) 341 ! 342 ! ... "normalize" correction vectors psi(:,nb1:nbase+notcnv) in 343 ! ... order to improve numerical stability of subspace diagonalization 344 ! ... (cdiaghg) ew is used as work array : 345 ! 346 ! ... ew = <psi_i|psi_i>, i = nbase + 1, nbase + notcnv 347 ! 348 DO n = 1, notcnv 349 ! 350 nbn = nbase + n 351 ! 352 IF ( npol == 1 ) THEN 353 ! 354 ew(n) = ddot( 2*npw, psi(1,nbn), 1, psi(1,nbn), 1 ) 355 ! 356 ELSE 357 ! 358 ew(n) = ddot( 2*npw, psi(1,nbn), 1, psi(1,nbn), 1 ) + & 359 ddot( 2*npw, psi(npwx+1,nbn), 1, psi(npwx+1,nbn), 1 ) 360 ! 361 END IF 362 ! 363 END DO 364 ! 365 CALL mp_sum( ew( 1:notcnv ), intra_bgrp_comm ) 366 ! 367 !$omp parallel do collapse(3) 368 DO n = 1, notcnv 369 DO ipol = 1, npol 370 DO m = 1, numblock 371 psi( (m-1)*blocksize+(ipol-1)*npwx+1: & 372 MIN(npw, m*blocksize)+(ipol-1)*npwx,nbase+n) = & 373 psi( (m-1)*blocksize+(ipol-1)*npwx+1: & 374 MIN(npw, m*blocksize)+(ipol-1)*npwx,nbase+n) / & 375 SQRT( ew(n) ) 376 END DO 377 END DO 378 END DO 379 !$omp end parallel do 380 ! 381 ! ... here compute the hpsi and spsi of the new functions 382 ! 383 CALL h_psi( npwx, npw, notcnv, psi(1,nb1), hpsi(1,nb1) ) ; nhpsi = nhpsi + notcnv 384 ! 385 IF ( uspp ) CALL s_psi( npwx, npw, notcnv, psi(1,nb1), spsi(1,nb1) ) 386 ! 387 ! ... update the reduced hamiltonian 388 ! 389 CALL start_clock( 'cegterg:overlap' ) 390 ! 391 CALL divide_all(inter_bgrp_comm,nbase+notcnv,n_start,n_end,recv_counts,displs) 392 CALL mp_type_create_column_section(sc(1,1), nbase, notcnv, nvecx, column_section_type) 393 my_n = n_end - n_start + 1; !write (*,*) nbase+notcnv,n_start,n_end 394 ! 395 CALL ZGEMM( 'C','N', notcnv, my_n, kdim, ONE, hpsi(1,nb1), kdmx, psi(1,n_start), kdmx, & 396 ZERO, hc(nb1,n_start), nvecx ) 397 ! 398 if (n_start .le. n_end) CALL mp_sum( hc( nb1:nbase+notcnv, n_start:n_end ), intra_bgrp_comm ) 399 CALL mp_gather( hc, column_section_type, recv_counts, displs, root_bgrp_id, inter_bgrp_comm ) 400 ! 401 CALL divide(inter_bgrp_comm,nbase+notcnv,n_start,n_end) 402 my_n = n_end - n_start + 1; !write (*,*) nbase+notcnv,n_start,n_end 403 IF ( uspp ) THEN 404 ! 405 CALL ZGEMM( 'C','N', notcnv, my_n, kdim, ONE, spsi(1,nb1), kdmx, psi(1,n_start), kdmx, & 406 ZERO, sc(nb1,n_start), nvecx ) 407 ! 408 ELSE 409 ! 410 CALL ZGEMM( 'C','N', notcnv, my_n, kdim, ONE, psi(1,nb1), kdmx, psi(1,n_start), kdmx, & 411 ZERO, sc(nb1,n_start), nvecx ) 412 ! 413 END IF 414 ! 415 if (n_start .le. n_end) CALL mp_sum( sc( nb1:nbase+notcnv, n_start:n_end ), intra_bgrp_comm ) 416 CALL mp_gather( sc, column_section_type, recv_counts, displs, root_bgrp_id, inter_bgrp_comm ) 417 ! 418 CALL mp_type_free( column_section_type ) 419 ! 420 CALL stop_clock( 'cegterg:overlap' ) 421 ! 422 nbase = nbase + notcnv 423 ! 424 DO n = 1, nbase 425 ! 426 ! ... the diagonal of hc and sc must be strictly real 427 ! 428 IF( n>=nb1 ) THEN 429 hc(n,n) = CMPLX( REAL( hc(n,n) ), 0.D0 ,kind=DP) 430 sc(n,n) = CMPLX( REAL( sc(n,n) ), 0.D0 ,kind=DP) 431 ENDIF 432 ! 433 DO m = MAX(n+1,nb1), nbase 434 ! 435 hc(n,m) = CONJG( hc(m,n) ) 436 sc(n,m) = CONJG( sc(m,n) ) 437 ! 438 END DO 439 ! 440 END DO 441 ! 442 ! ... diagonalize the reduced hamiltonian 443 ! 444 CALL start_clock( 'cegterg:diag' ) 445 IF( my_bgrp_id == root_bgrp_id ) THEN 446 CALL diaghg( nbase, nvec, hc, sc, nvecx, ew, vc, me_bgrp, root_bgrp, intra_bgrp_comm ) 447 END IF 448 IF( nbgrp > 1 ) THEN 449 CALL mp_bcast( vc, root_bgrp_id, inter_bgrp_comm ) 450 CALL mp_bcast( ew, root_bgrp_id, inter_bgrp_comm ) 451 ENDIF 452 CALL stop_clock( 'cegterg:diag' ) 453 ! 454 ! ... test for convergence 455 ! 456 WHERE( btype(1:nvec) == 1 ) 457 ! 458 conv(1:nvec) = ( ( ABS( ew(1:nvec) - e(1:nvec) ) < ethr ) ) 459 ! 460 ELSEWHERE 461 ! 462 conv(1:nvec) = ( ( ABS( ew(1:nvec) - e(1:nvec) ) < empty_ethr ) ) 463 ! 464 END WHERE 465 ! ... next line useful for band parallelization of exact exchange 466 IF ( nbgrp > 1 ) CALL mp_bcast(conv,root_bgrp_id,inter_bgrp_comm) 467 ! 468 notcnv = COUNT( .NOT. conv(:) ) 469 ! 470 e(1:nvec) = ew(1:nvec) 471 ! 472 ! ... if overall convergence has been achieved, or the dimension of 473 ! ... the reduced basis set is becoming too large, or in any case if 474 ! ... we are at the last iteration refresh the basis set. i.e. replace 475 ! ... the first nvec elements with the current estimate of the 476 ! ... eigenvectors; set the basis dimension to nvec. 477 ! 478 IF ( notcnv == 0 .OR. & 479 nbase+notcnv > nvecx .OR. dav_iter == maxter ) THEN 480 ! 481 CALL start_clock( 'cegterg:last' ) 482 ! 483 CALL divide(inter_bgrp_comm,nbase,n_start,n_end) 484 my_n = n_end - n_start + 1; !write (*,*) nbase,n_start,n_end 485 CALL ZGEMM( 'N','N', kdim, nvec, my_n, ONE, psi(1,n_start), kdmx, vc(n_start,1), nvecx, & 486 ZERO, evc, kdmx ) 487 CALL mp_sum( evc, inter_bgrp_comm ) 488 ! 489 IF ( notcnv == 0 ) THEN 490 ! 491 ! ... all roots converged: return 492 ! 493 CALL stop_clock( 'cegterg:last' ) 494 ! 495 EXIT iterate 496 ! 497 ELSE IF ( dav_iter == maxter ) THEN 498 ! 499 ! ... last iteration, some roots not converged: return 500 ! 501 !!!WRITE( stdout, '(5X,"WARNING: ",I5, & 502 !!! & " eigenvalues not converged")' ) notcnv 503 ! 504 CALL stop_clock( 'cegterg:last' ) 505 ! 506 EXIT iterate 507 ! 508 END IF 509 ! 510 ! ... refresh psi, H*psi and S*psi 511 ! 512 CALL threaded_memcpy(psi, evc, nvec*npol*npwx*2) 513 ! 514 IF ( uspp ) THEN 515 ! 516 CALL ZGEMM( 'N','N', kdim, nvec, my_n, ONE, spsi(1,n_start), kdmx, vc(n_start,1), nvecx, & 517 ZERO, psi(1,nvec+1), kdmx) 518 CALL threaded_memcpy(spsi, psi(1,nvec+1), nvec*npol*npwx*2) 519 CALL mp_sum( spsi(:,1:nvec), inter_bgrp_comm ) 520 ! 521 END IF 522 ! 523 CALL ZGEMM( 'N','N', kdim, nvec, my_n, ONE, hpsi(1,n_start), kdmx, vc(n_start,1), nvecx, & 524 ZERO, psi(1,nvec+1), kdmx ) 525 CALL threaded_memcpy(hpsi, psi(1,nvec+1), nvec*npol*npwx*2) 526 CALL mp_sum( hpsi(:,1:nvec), inter_bgrp_comm ) 527 ! 528 ! ... refresh the reduced hamiltonian 529 ! 530 nbase = nvec 531 ! 532 hc(1:nbase,1:nbase) = ZERO 533 sc(1:nbase,1:nbase) = ZERO 534 vc(1:nbase,1:nbase) = ZERO 535 ! 536 DO n = 1, nbase 537 ! 538 hc(n,n) = CMPLX( e(n), 0.0_DP ,kind=DP) 539 ! 540 sc(n,n) = ONE 541 vc(n,n) = ONE 542 ! 543 END DO 544 ! 545 CALL stop_clock( 'cegterg:last' ) 546 ! 547 END IF 548 ! 549 END DO iterate 550 ! 551 DEALLOCATE( recv_counts ) 552 DEALLOCATE( displs ) 553 DEALLOCATE( conv ) 554 DEALLOCATE( ew ) 555 DEALLOCATE( vc ) 556 DEALLOCATE( hc ) 557 DEALLOCATE( sc ) 558 ! 559 IF ( uspp ) DEALLOCATE( spsi ) 560 ! 561 DEALLOCATE( hpsi ) 562 DEALLOCATE( psi ) 563 ! 564 CALL stop_clock( 'cegterg' ); !write(*,*) 'stop cegterg' ; FLUSH(6) 565 !call print_clock( 'cegterg' ) 566 !call print_clock( 'cegterg:init' ) 567 !call print_clock( 'cegterg:diag' ) 568 !call print_clock( 'cegterg:update' ) 569 !call print_clock( 'cegterg:overlap' ) 570 !call print_clock( 'cegterg:last' ) 571 ! 572 RETURN 573 ! 574END SUBROUTINE cegterg 575 576! 577! Subroutine with distributed matrixes 578! (written by Carlo Cavazzoni) 579! 580!---------------------------------------------------------------------------- 581SUBROUTINE pcegterg(h_psi, s_psi, uspp, g_psi, & 582 npw, npwx, nvec, nvecx, npol, evc, ethr, & 583 e, btype, notcnv, lrot, dav_iter, nhpsi ) 584 !---------------------------------------------------------------------------- 585 ! 586 ! ... iterative solution of the eigenvalue problem: 587 ! 588 ! ... ( H - e S ) * evc = 0 589 ! 590 ! ... where H is an hermitean operator, e is a real scalar, 591 ! ... S is an uspp matrix, evc is a complex vector 592 ! 593 USE util_param, ONLY : DP, stdout 594 USE mp_bands_util, ONLY : intra_bgrp_comm, inter_bgrp_comm, root_bgrp_id, nbgrp, my_bgrp_id 595 USE mp, ONLY : mp_bcast, mp_root_sum, mp_sum, mp_barrier, & 596 mp_size, mp_type_free, mp_allgather 597 ! 598 IMPLICIT NONE 599 ! 600 include 'laxlib.fh' 601 ! 602 INTEGER, INTENT(IN) :: npw, npwx, nvec, nvecx, npol 603 ! dimension of the matrix to be diagonalized 604 ! leading dimension of matrix evc, as declared in the calling pgm unit 605 ! integer number of searched low-lying roots 606 ! maximum dimension of the reduced basis set 607 ! (the basis set is refreshed when its dimension would exceed nvecx) 608 ! number of spin polarizations 609 INTEGER, PARAMETER :: blocksize = 256 610 INTEGER :: numblock 611 ! chunking parameters 612 COMPLEX(DP), INTENT(INOUT) :: evc(npwx*npol,nvec) 613 ! evc contains the refined estimates of the eigenvectors 614 REAL(DP), INTENT(IN) :: ethr 615 ! energy threshold for convergence: root improvement is stopped, 616 ! when two consecutive estimates of the root differ by less than ethr. 617 LOGICAL, INTENT(IN) :: uspp 618 ! if .FALSE. : S|psi> not needed 619 INTEGER, INTENT(IN) :: btype(nvec) 620 ! band type ( 1 = occupied, 0 = empty ) 621 LOGICAL, INTENT(IN) :: lrot 622 ! .TRUE. if the wfc have already been rotated 623 REAL(DP), INTENT(OUT) :: e(nvec) 624 ! contains the estimated roots. 625 INTEGER, INTENT(OUT) :: dav_iter, notcnv 626 ! integer number of iterations performed 627 ! number of unconverged roots 628 INTEGER, INTENT(OUT) :: nhpsi 629 ! total number of indivitual hpsi 630 ! 631 ! ... LOCAL variables 632 ! 633 INTEGER, PARAMETER :: maxter = 20 634 ! maximum number of iterations 635 ! 636 INTEGER :: kter, nbase, np, kdim, kdmx, n, m, ipol, nb1, nbn 637 ! counter on iterations 638 ! dimension of the reduced basis 639 ! counter on the reduced basis vectors 640 ! do-loop counters 641 INTEGER :: ierr 642 REAL(DP), ALLOCATABLE :: ew(:) 643 COMPLEX(DP), ALLOCATABLE :: hl(:,:), sl(:,:), vl(:,:) 644 ! Hamiltonian on the reduced basis 645 ! S matrix on the reduced basis 646 ! eigenvectors of the Hamiltonian 647 ! eigenvalues of the reduced hamiltonian 648 COMPLEX(DP), ALLOCATABLE :: psi(:,:), hpsi(:,:), spsi(:,:) 649 ! work space, contains psi 650 ! the product of H and psi 651 ! the product of S and psi 652 LOGICAL, ALLOCATABLE :: conv(:) 653 ! true if the root is converged 654 REAL(DP) :: empty_ethr 655 ! threshold for empty bands 656 INTEGER :: idesc(LAX_DESC_SIZE), idesc_old(LAX_DESC_SIZE) 657 INTEGER, ALLOCATABLE :: irc_ip( : ) 658 INTEGER, ALLOCATABLE :: nrc_ip( : ) 659 INTEGER, ALLOCATABLE :: rank_ip( :, : ) 660 ! matrix distribution descriptors 661 INTEGER :: nx 662 ! maximum local block dimension 663 LOGICAL :: la_proc 664 ! flag to distinguish procs involved in linear algebra 665 INTEGER, ALLOCATABLE :: notcnv_ip( : ) 666 INTEGER, ALLOCATABLE :: ic_notcnv( : ) 667 ! 668 INTEGER :: np_ortho(2), ortho_parent_comm 669 LOGICAL :: do_distr_diag_inside_bgrp 670 ! 671 REAL(DP), EXTERNAL :: ddot 672 ! 673 EXTERNAL h_psi, s_psi, g_psi 674 ! h_psi(npwx,npw,nvec,psi,hpsi) 675 ! calculates H|psi> 676 ! s_psi(npwx,npw,nvec,psi,spsi) 677 ! calculates S|psi> (if needed) 678 ! Vectors psi,hpsi,spsi are dimensioned (npwx,nvec) 679 ! g_psi(npwx,npw,notcnv,psi,e) 680 ! calculates (diag(h)-e)^-1 * psi, diagonal approx. to (h-e)^-1*psi 681 ! the first nvec columns contain the trial eigenvectors 682 ! 683 nhpsi = 0 684 CALL start_clock( 'cegterg' ) 685 ! 686 CALL laxlib_getval( np_ortho = np_ortho, ortho_parent_comm = ortho_parent_comm, & 687 do_distr_diag_inside_bgrp = do_distr_diag_inside_bgrp ) 688 ! 689 IF ( nvec > nvecx / 2 ) CALL errore( 'pcegterg', 'nvecx is too small', 1 ) 690 ! 691 ! ... threshold for empty bands 692 ! 693 empty_ethr = MAX( ( ethr * 5.D0 ), 1.D-5 ) 694 ! 695 IF ( npol == 1 ) THEN 696 ! 697 kdim = npw 698 kdmx = npwx 699 ! 700 ELSE 701 ! 702 kdim = npwx*npol 703 kdmx = npwx*npol 704 ! 705 END IF 706 ! 707 ! compute the number of chuncks 708 numblock = (npw+blocksize-1)/blocksize 709 ! 710 ALLOCATE( psi( npwx*npol, nvecx ), STAT=ierr ) 711 IF( ierr /= 0 ) & 712 CALL errore( ' pcegterg ',' cannot allocate psi ', ABS(ierr) ) 713 ! 714 ALLOCATE( hpsi( npwx*npol, nvecx ), STAT=ierr ) 715 IF( ierr /= 0 ) & 716 CALL errore( ' pcegterg ',' cannot allocate hpsi ', ABS(ierr) ) 717 ! 718 IF ( uspp ) THEN 719 ALLOCATE( spsi( npwx*npol, nvecx ), STAT=ierr ) 720 IF( ierr /= 0 ) & 721 CALL errore( ' pcegterg ',' cannot allocate spsi ', ABS(ierr) ) 722 END IF 723 ! 724 ! ... Initialize the matrix descriptor 725 ! 726 ALLOCATE( ic_notcnv( np_ortho(2) ), STAT=ierr ) 727 IF( ierr /= 0 ) & 728 CALL errore( ' pcegterg ',' cannot allocate ic_notcnv ', ABS(ierr) ) 729 ! 730 ALLOCATE( notcnv_ip( np_ortho(2) ), STAT=ierr ) 731 IF( ierr /= 0 ) & 732 CALL errore( ' pcegterg ',' cannot allocate notcnv_ip ', ABS(ierr) ) 733 ! 734 CALL desc_init( nvec, nx, la_proc, idesc, rank_ip, irc_ip, nrc_ip ) 735 ! 736 IF( la_proc ) THEN 737 ! 738 ! only procs involved in the diagonalization need to allocate local 739 ! matrix block. 740 ! 741 ALLOCATE( vl( nx , nx ), STAT=ierr ) 742 IF( ierr /= 0 ) & 743 CALL errore( ' pcegterg ',' cannot allocate vl ', ABS(ierr) ) 744 ! 745 ALLOCATE( sl( nx , nx ), STAT=ierr ) 746 IF( ierr /= 0 ) & 747 CALL errore( ' pcegterg ',' cannot allocate sl ', ABS(ierr) ) 748 ! 749 ALLOCATE( hl( nx , nx ), STAT=ierr ) 750 IF( ierr /= 0 ) & 751 CALL errore( ' pcegterg ',' cannot allocate hl ', ABS(ierr) ) 752 ! 753 ELSE 754 ! 755 ALLOCATE( vl( 1 , 1 ), STAT=ierr ) 756 IF( ierr /= 0 ) & 757 CALL errore( ' pcegterg ',' cannot allocate vl ', ABS(ierr) ) 758 ! 759 ALLOCATE( sl( 1 , 1 ), STAT=ierr ) 760 IF( ierr /= 0 ) & 761 CALL errore( ' pcegterg ',' cannot allocate sl ', ABS(ierr) ) 762 ! 763 ALLOCATE( hl( 1 , 1 ), STAT=ierr ) 764 IF( ierr /= 0 ) & 765 CALL errore( ' pcegterg ',' cannot allocate hl ', ABS(ierr) ) 766 ! 767 END IF 768 ! 769 ALLOCATE( ew( nvecx ), STAT=ierr ) 770 IF( ierr /= 0 ) & 771 CALL errore( ' pcegterg ',' cannot allocate ew ', ABS(ierr) ) 772 ! 773 ALLOCATE( conv( nvec ), STAT=ierr ) 774 IF( ierr /= 0 ) & 775 CALL errore( ' pcegterg ',' cannot allocate conv ', ABS(ierr) ) 776 ! 777 notcnv = nvec 778 nbase = nvec 779 conv = .FALSE. 780 ! 781 CALL threaded_memcpy(psi, evc, nvec*npol*npwx*2) 782 ! 783 ! ... hpsi contains h times the basis vectors 784 ! 785 CALL h_psi( npwx, npw, nvec, psi, hpsi ) ; nhpsi = nhpsi + nvec 786 ! 787 IF ( uspp ) CALL s_psi( npwx, npw, nvec, psi, spsi ) 788 ! 789 ! ... hl contains the projection of the hamiltonian onto the reduced 790 ! ... space, vl contains the eigenvectors of hl. Remember hl, vl and sl 791 ! ... are all distributed across processors, global replicated matrixes 792 ! ... here are never allocated 793 ! 794 CALL start_clock( 'cegterg:init' ) 795 796 CALL compute_distmat( hl, psi, hpsi ) 797 ! 798 IF ( uspp ) THEN 799 ! 800 CALL compute_distmat( sl, psi, spsi ) 801 ! 802 ELSE 803 ! 804 CALL compute_distmat( sl, psi, psi ) 805 ! 806 END IF 807 CALL stop_clock( 'cegterg:init' ) 808 ! 809 IF ( lrot ) THEN 810 ! 811 CALL set_e_from_h() 812 ! 813 CALL set_to_identity( vl, idesc ) 814 ! 815 ELSE 816 ! 817 ! ... diagonalize the reduced hamiltonian 818 ! Calling block parallel algorithm 819 ! 820 CALL start_clock( 'cegterg:diag' ) 821 IF ( do_distr_diag_inside_bgrp ) THEN ! NB on output of pdiaghg ew and vl are the same across ortho_parent_comm 822 ! only the first bgrp performs the diagonalization 823 IF( my_bgrp_id == root_bgrp_id ) CALL pdiaghg( nbase, hl, sl, nx, ew, vl, idesc ) 824 IF( nbgrp > 1 ) THEN ! results must be brodcast to the other band groups 825 CALL mp_bcast( vl, root_bgrp_id, inter_bgrp_comm ) 826 CALL mp_bcast( ew, root_bgrp_id, inter_bgrp_comm ) 827 ENDIF 828 ELSE 829 CALL pdiaghg( nbase, hl, sl, nx, ew, vl, idesc ) 830 END IF 831 CALL stop_clock( 'cegterg:diag' ) 832 ! 833 e(1:nvec) = ew(1:nvec) 834 ! 835 END IF 836 ! 837 ! ... iterate 838 ! 839 iterate: DO kter = 1, maxter 840 ! 841 dav_iter = kter ; !write(*,*) kter, notcnv, conv 842 ! 843 CALL start_clock( 'cegterg:update' ) 844 ! 845 CALL reorder_v() 846 ! 847 nb1 = nbase + 1 848 ! 849 ! ... expand the basis set with new basis vectors ( H - e*S )|psi> ... 850 ! 851 CALL hpsi_dot_v() 852 ! 853 CALL stop_clock( 'cegterg:update' ) 854 ! 855 ! ... approximate inverse iteration 856 ! 857 CALL g_psi( npwx, npw, notcnv, npol, psi(1,nb1), ew(nb1) ) 858 ! 859 ! ... "normalize" correction vectors psi(:,nb1:nbase+notcnv) in 860 ! ... order to improve numerical stability of subspace diagonalization 861 ! ... (cdiaghg) ew is used as work array : 862 ! 863 ! ... ew = <psi_i|psi_i>, i = nbase + 1, nbase + notcnv 864 ! 865 DO n = 1, notcnv 866 ! 867 nbn = nbase + n 868 ! 869 IF ( npol == 1 ) THEN 870 ! 871 ew(n) = ddot( 2*npw, psi(1,nbn), 1, psi(1,nbn), 1 ) 872 ! 873 ELSE 874 ! 875 ew(n) = ddot( 2*npw, psi(1,nbn), 1, psi(1,nbn), 1 ) + & 876 ddot( 2*npw, psi(npwx+1,nbn), 1, psi(npwx+1,nbn), 1 ) 877 ! 878 END IF 879 ! 880 END DO 881 ! 882 CALL mp_sum( ew( 1:notcnv ), intra_bgrp_comm ) 883 ! 884 !$omp parallel do collapse(3) 885 DO n = 1, notcnv 886 DO ipol = 1, npol 887 DO m = 1, numblock 888 psi( (m-1)*blocksize+(ipol-1)*npwx+1: & 889 MIN(npw, m*blocksize)+(ipol-1)*npwx,nbase+n) = & 890 psi( (m-1)*blocksize+(ipol-1)*npwx+1: & 891 MIN(npw, m*blocksize)+(ipol-1)*npwx,nbase+n) / & 892 SQRT( ew(n) ) 893 END DO 894 END DO 895 END DO 896 !$omp end parallel do 897 ! 898 ! ... here compute the hpsi and spsi of the new functions 899 ! 900 CALL h_psi( npwx, npw, notcnv, psi(1,nb1), hpsi(1,nb1) ) ; nhpsi = nhpsi + notcnv 901 ! 902 IF ( uspp ) CALL s_psi( npwx, npw, notcnv, psi(1,nb1), spsi(1,nb1) ) 903 ! 904 ! ... update the reduced hamiltonian 905 ! 906 CALL start_clock( 'cegterg:overlap' ) 907 ! 908 ! we need to save the old descriptor in order to redistribute matrices 909 ! 910 idesc_old = idesc 911 ! 912 ! ... RE-Initialize the matrix descriptor 913 ! 914 CALL desc_init( nbase+notcnv, nx, la_proc, idesc, rank_ip, irc_ip, nrc_ip ) 915 ! 916 IF( la_proc ) THEN 917 918 ! redistribute hl and sl (see dsqmred), since the dimension of the subspace has changed 919 ! 920 vl = hl 921 DEALLOCATE( hl ) 922 ALLOCATE( hl( nx , nx ), STAT=ierr ) 923 IF( ierr /= 0 ) & 924 CALL errore( ' pcegterg ',' cannot allocate hl ', ABS(ierr) ) 925 926 CALL laxlib_zsqmred( nbase, vl, idesc_old(LAX_DESC_NRCX), idesc_old, nbase+notcnv, hl, nx, idesc ) 927 928 vl = sl 929 DEALLOCATE( sl ) 930 ALLOCATE( sl( nx , nx ), STAT=ierr ) 931 IF( ierr /= 0 ) & 932 CALL errore( ' pcegterg ',' cannot allocate sl ', ABS(ierr) ) 933 934 CALL laxlib_zsqmred( nbase, vl, idesc_old(LAX_DESC_NRCX), idesc_old, nbase+notcnv, sl, nx, idesc ) 935 936 DEALLOCATE( vl ) 937 ALLOCATE( vl( nx , nx ), STAT=ierr ) 938 IF( ierr /= 0 ) & 939 CALL errore( ' pcegterg ',' cannot allocate vl ', ABS(ierr) ) 940 941 END IF 942 ! 943 ! 944 CALL update_distmat( hl, psi, hpsi ) 945 ! 946 IF ( uspp ) THEN 947 ! 948 CALL update_distmat( sl, psi, spsi ) 949 ! 950 ELSE 951 ! 952 CALL update_distmat( sl, psi, psi ) 953 ! 954 END IF 955 ! 956 CALL stop_clock( 'cegterg:overlap' ) 957 ! 958 nbase = nbase + notcnv 959 ! 960 ! ... diagonalize the reduced hamiltonian 961 ! Call block parallel algorithm 962 ! 963 CALL start_clock( 'cegterg:diag' ) 964 IF ( do_distr_diag_inside_bgrp ) THEN ! NB on output of pdiaghg ew and vl are the same across ortho_parent_comm 965 ! only the first bgrp performs the diagonalization 966 IF( my_bgrp_id == root_bgrp_id ) CALL pdiaghg( nbase, hl, sl, nx, ew, vl, idesc ) 967 IF( nbgrp > 1 ) THEN ! results must be brodcast to the other band groups 968 CALL mp_bcast( vl, root_bgrp_id, inter_bgrp_comm ) 969 CALL mp_bcast( ew, root_bgrp_id, inter_bgrp_comm ) 970 ENDIF 971 ELSE 972 CALL pdiaghg( nbase, hl, sl, nx, ew, vl, idesc ) 973 END IF 974 CALL stop_clock( 'cegterg:diag' ) 975 ! 976 ! ... test for convergence 977 ! 978 WHERE( btype(1:nvec) == 1 ) 979 ! 980 conv(1:nvec) = ( ( ABS( ew(1:nvec) - e(1:nvec) ) < ethr ) ) 981 ! 982 ELSEWHERE 983 ! 984 conv(1:nvec) = ( ( ABS( ew(1:nvec) - e(1:nvec) ) < empty_ethr ) ) 985 ! 986 END WHERE 987 ! ... next line useful for band parallelization of exact exchange 988 IF ( nbgrp > 1 ) CALL mp_bcast(conv,root_bgrp_id,inter_bgrp_comm) 989 ! 990 notcnv = COUNT( .NOT. conv(:) ) 991 ! 992 e(1:nvec) = ew(1:nvec) 993 ! 994 ! ... if overall convergence has been achieved, or the dimension of 995 ! ... the reduced basis set is becoming too large, or in any case if 996 ! ... we are at the last iteration refresh the basis set. i.e. replace 997 ! ... the first nvec elements with the current estimate of the 998 ! ... eigenvectors; set the basis dimension to nvec. 999 ! 1000 IF ( notcnv == 0 .OR. nbase+notcnv > nvecx .OR. dav_iter == maxter ) THEN 1001 ! 1002 CALL start_clock( 'cegterg:last' ) 1003 ! 1004 CALL refresh_evc() 1005 ! 1006 IF ( notcnv == 0 ) THEN 1007 ! 1008 ! ... all roots converged: return 1009 ! 1010 CALL stop_clock( 'cegterg:last' ) 1011 ! 1012 EXIT iterate 1013 ! 1014 ELSE IF ( dav_iter == maxter ) THEN 1015 ! 1016 ! ... last iteration, some roots not converged: return 1017 ! 1018 !!!WRITE( stdout, '(5X,"WARNING: ",I5, & 1019 !!! & " eigenvalues not converged")' ) notcnv 1020 ! 1021 CALL stop_clock( 'cegterg:last' ) 1022 ! 1023 EXIT iterate 1024 ! 1025 END IF 1026 ! 1027 ! ... refresh psi, H*psi and S*psi 1028 ! 1029 CALL threaded_memcpy(psi, evc, nvec*npol*npwx*2) 1030 ! 1031 IF ( uspp ) THEN 1032 ! 1033 CALL refresh_spsi() 1034 ! 1035 END IF 1036 ! 1037 CALL refresh_hpsi() 1038 ! 1039 ! ... refresh the reduced hamiltonian 1040 ! 1041 nbase = nvec 1042 ! 1043 CALL desc_init( nvec, nx, la_proc, idesc, rank_ip, irc_ip, nrc_ip ) 1044 ! 1045 IF( la_proc ) THEN 1046 ! 1047 ! note that nx has been changed by desc_init 1048 ! we need to re-alloc with the new size. 1049 ! 1050 DEALLOCATE( vl, hl, sl ) 1051 ALLOCATE( vl( nx, nx ), STAT=ierr ) 1052 IF( ierr /= 0 ) & 1053 CALL errore( ' pcegterg ',' cannot allocate vl ', ABS(ierr) ) 1054 ALLOCATE( hl( nx, nx ), STAT=ierr ) 1055 IF( ierr /= 0 ) & 1056 CALL errore( ' pcegterg ',' cannot allocate hl ', ABS(ierr) ) 1057 ALLOCATE( sl( nx, nx ), STAT=ierr ) 1058 IF( ierr /= 0 ) & 1059 CALL errore( ' pcegterg ',' cannot allocate sl ', ABS(ierr) ) 1060 ! 1061 END IF 1062 ! 1063 CALL set_h_from_e( ) 1064 ! 1065 CALL set_to_identity( vl, idesc ) 1066 CALL set_to_identity( sl, idesc ) 1067 ! 1068 CALL stop_clock( 'cegterg:last' ) 1069 ! 1070 END IF 1071 ! 1072 END DO iterate 1073 ! 1074 DEALLOCATE( vl, hl, sl ) 1075 ! 1076 DEALLOCATE( rank_ip ) 1077 DEALLOCATE( irc_ip ) 1078 DEALLOCATE( nrc_ip ) 1079 DEALLOCATE( ic_notcnv ) 1080 DEALLOCATE( notcnv_ip ) 1081 DEALLOCATE( conv ) 1082 DEALLOCATE( ew ) 1083 ! 1084 IF ( uspp ) DEALLOCATE( spsi ) 1085 ! 1086 DEALLOCATE( hpsi ) 1087 DEALLOCATE( psi ) 1088 ! 1089 CALL stop_clock( 'cegterg' ) 1090 !call print_clock( 'cegterg' ) 1091 !call print_clock( 'cegterg:init' ) 1092 !call print_clock( 'cegterg:diag' ) 1093 !call print_clock( 'cegterg:update' ) 1094 !call print_clock( 'cegterg:overlap' ) 1095 !call print_clock( 'cegterg:last' ) 1096 ! 1097 RETURN 1098 ! 1099 ! 1100CONTAINS 1101 ! 1102 SUBROUTINE set_to_identity( distmat, idesc ) 1103 INTEGER, INTENT(IN) :: idesc(LAX_DESC_SIZE) 1104 COMPLEX(DP), INTENT(OUT) :: distmat(:,:) 1105 INTEGER :: i 1106 distmat = ( 0_DP , 0_DP ) 1107 IF( idesc(LAX_DESC_MYC) == idesc(LAX_DESC_MYR) .AND. idesc(LAX_DESC_ACTIVE_NODE) > 0 ) THEN 1108 DO i = 1, idesc(LAX_DESC_NC) 1109 distmat( i, i ) = ( 1_DP , 0_DP ) 1110 END DO 1111 END IF 1112 RETURN 1113 END SUBROUTINE set_to_identity 1114 ! 1115 ! 1116 SUBROUTINE reorder_v() 1117 ! 1118 INTEGER :: ipc 1119 INTEGER :: nc, ic 1120 INTEGER :: nl, npl 1121 ! 1122 np = 0 1123 ! 1124 notcnv_ip = 0 1125 ! 1126 n = 0 1127 ! 1128 DO ipc = 1, idesc(LAX_DESC_NPC) 1129 ! 1130 nc = nrc_ip( ipc ) 1131 ic = irc_ip( ipc ) 1132 ! 1133 npl = 0 1134 ! 1135 IF( ic <= nvec ) THEN 1136 ! 1137 DO nl = 1, min( nvec - ic + 1, nc ) 1138 ! 1139 n = n + 1 1140 ! 1141 IF ( .NOT. conv(n) ) THEN 1142 ! 1143 ! ... this root not yet converged ... 1144 ! 1145 np = np + 1 1146 npl = npl + 1 1147 IF( npl == 1 ) ic_notcnv( ipc ) = np 1148 ! 1149 ! ... reorder eigenvectors so that coefficients for unconverged 1150 ! ... roots come first. This allows to use quick matrix-matrix 1151 ! ... multiplications to set a new basis vector (see below) 1152 ! 1153 notcnv_ip( ipc ) = notcnv_ip( ipc ) + 1 1154 ! 1155 IF ( npl /= nl ) THEN 1156 IF( la_proc .AND. idesc(LAX_DESC_MYC) == ipc-1 ) THEN 1157 vl( :, npl) = vl( :, nl ) 1158 END IF 1159 END IF 1160 ! 1161 ! ... for use in g_psi 1162 ! 1163 ew(nbase+np) = e(n) 1164 ! 1165 END IF 1166 ! 1167 END DO 1168 ! 1169 END IF 1170 ! 1171 END DO 1172 ! 1173 END SUBROUTINE reorder_v 1174 ! 1175 ! 1176 SUBROUTINE hpsi_dot_v() 1177 ! 1178 INTEGER :: ipc, ipr 1179 INTEGER :: nr, ir, ic, notcl, root, np, ipol, ib 1180 COMPLEX(DP), ALLOCATABLE :: vtmp( :, : ) 1181 COMPLEX(DP), ALLOCATABLE :: ptmp( :, : ) 1182 COMPLEX(DP) :: beta 1183 1184 ALLOCATE( vtmp( nx, nx ) ) 1185 ALLOCATE( ptmp( npwx*npol, nx ) ) 1186 1187 DO ipc = 1, idesc(LAX_DESC_NPC) 1188 ! 1189 IF( notcnv_ip( ipc ) > 0 ) THEN 1190 1191 notcl = notcnv_ip( ipc ) 1192 ic = ic_notcnv( ipc ) 1193 1194 beta = ZERO 1195 1196 DO ipr = 1, idesc(LAX_DESC_NPR) 1197 ! 1198 nr = nrc_ip( ipr ) 1199 ir = irc_ip( ipr ) 1200 ! 1201 root = rank_ip( ipr, ipc ) 1202 1203 IF( ipr-1 == idesc(LAX_DESC_MYR) .AND. ipc-1 == idesc(LAX_DESC_MYC) .AND. la_proc ) THEN 1204 vtmp(:,1:notcl) = vl(:,1:notcl) 1205 END IF 1206 1207 CALL mp_bcast( vtmp(:,1:notcl), root, ortho_parent_comm ) 1208 ! 1209 IF ( uspp ) THEN 1210 ! 1211 CALL ZGEMM( 'N', 'N', kdim, notcl, nr, ONE, & 1212 spsi(1, ir), kdmx, vtmp, nx, beta, psi(1,nb1+ic-1), kdmx ) 1213 ! 1214 ELSE 1215 ! 1216 CALL ZGEMM( 'N', 'N', kdim, notcl, nr, ONE, & 1217 psi(1, ir), kdmx, vtmp, nx, beta, psi(1,nb1+ic-1), kdmx ) 1218 ! 1219 END IF 1220 ! 1221 CALL ZGEMM( 'N', 'N', kdim, notcl, nr, ONE, & 1222 hpsi(1, ir), kdmx, vtmp, nx, beta, ptmp, kdmx ) 1223 1224 beta = ONE 1225 1226 END DO 1227 1228 !$omp parallel do collapse(3) 1229 DO np = 1, notcl 1230 DO ipol = 1, npol 1231 DO ib = 1, numblock 1232 ! 1233 psi( (ib-1)*blocksize+(ipol-1)*npwx+1: & 1234 MIN(npw, ib*blocksize)+(ipol-1)*npwx,nbase+np+ic-1) = & 1235 ptmp((ib-1)*blocksize+(ipol-1)*npwx+1: & 1236 MIN(npw, ib*blocksize)+(ipol-1)*npwx,np) - & 1237 ew(nbase+np+ic-1) * psi((ib-1)*blocksize+(ipol-1)*npwx+1:& 1238 MIN(npw, ib*blocksize)+(ipol-1)*npwx,nbase+np+ic-1) 1239 ! 1240 END DO 1241 END DO 1242 END DO 1243 !$omp end parallel do 1244 ! 1245 ! clean up garbage if there is any 1246 IF (npw < npwx) psi(npw+1:npwx,nbase+ic:nbase+notcl+ic-1) = ZERO 1247 IF (npol == 2) psi(npwx+npw+1:2*npwx,nbase+ic:nbase+notcl+ic-1) = ZERO 1248 ! 1249 END IF 1250 ! 1251 END DO 1252 1253 DEALLOCATE( vtmp ) 1254 DEALLOCATE( ptmp ) 1255 1256 RETURN 1257 END SUBROUTINE hpsi_dot_v 1258 ! 1259 ! 1260 SUBROUTINE refresh_evc( ) 1261 ! 1262 INTEGER :: ipc, ipr 1263 INTEGER :: nr, nc, ir, ic, root 1264 COMPLEX(DP), ALLOCATABLE :: vtmp( :, : ) 1265 COMPLEX(DP) :: beta 1266 1267 ALLOCATE( vtmp( nx, nx ) ) 1268 ! 1269 DO ipc = 1, idesc(LAX_DESC_NPC) 1270 ! 1271 nc = nrc_ip( ipc ) 1272 ic = irc_ip( ipc ) 1273 ! 1274 IF( ic <= nvec ) THEN 1275 ! 1276 nc = min( nc, nvec - ic + 1 ) 1277 ! 1278 beta = ZERO 1279 1280 DO ipr = 1, idesc(LAX_DESC_NPR) 1281 ! 1282 nr = nrc_ip( ipr ) 1283 ir = irc_ip( ipr ) 1284 ! 1285 root = rank_ip( ipr, ipc ) 1286 1287 IF( ipr-1 == idesc(LAX_DESC_MYR) .AND. ipc-1 == idesc(LAX_DESC_MYC) .AND. la_proc ) THEN 1288 ! 1289 ! this proc sends his block 1290 ! 1291 CALL mp_bcast( vl(:,1:nc), root, ortho_parent_comm ) 1292 CALL ZGEMM( 'N', 'N', kdim, nc, nr, ONE, & 1293 psi(1,ir), kdmx, vl, nx, beta, evc(1,ic), kdmx ) 1294 ELSE 1295 ! 1296 ! all other procs receive 1297 ! 1298 CALL mp_bcast( vtmp(:,1:nc), root, ortho_parent_comm ) 1299 CALL ZGEMM( 'N', 'N', kdim, nc, nr, ONE, & 1300 psi(1,ir), kdmx, vtmp, nx, beta, evc(1,ic), kdmx ) 1301 END IF 1302 ! 1303 1304 beta = ONE 1305 1306 END DO 1307 ! 1308 END IF 1309 ! 1310 END DO 1311 ! 1312 DEALLOCATE( vtmp ) 1313 1314 RETURN 1315 END SUBROUTINE refresh_evc 1316 ! 1317 ! 1318 SUBROUTINE refresh_spsi( ) 1319 ! 1320 INTEGER :: ipc, ipr 1321 INTEGER :: nr, nc, ir, ic, root 1322 COMPLEX(DP), ALLOCATABLE :: vtmp( :, : ) 1323 COMPLEX(DP) :: beta 1324 1325 ALLOCATE( vtmp( nx, nx ) ) 1326 ! 1327 DO ipc = 1, idesc(LAX_DESC_NPC) 1328 ! 1329 nc = nrc_ip( ipc ) 1330 ic = irc_ip( ipc ) 1331 ! 1332 IF( ic <= nvec ) THEN 1333 ! 1334 nc = min( nc, nvec - ic + 1 ) 1335 ! 1336 beta = ZERO 1337 ! 1338 DO ipr = 1, idesc(LAX_DESC_NPR) 1339 ! 1340 nr = nrc_ip( ipr ) 1341 ir = irc_ip( ipr ) 1342 ! 1343 root = rank_ip( ipr, ipc ) 1344 1345 IF( ipr-1 == idesc(LAX_DESC_MYR) .AND. ipc-1 == idesc(LAX_DESC_MYC) .AND. la_proc ) THEN 1346 ! 1347 ! this proc sends his block 1348 ! 1349 CALL mp_bcast( vl(:,1:nc), root, ortho_parent_comm ) 1350 CALL ZGEMM( 'N', 'N', kdim, nc, nr, ONE, & 1351 spsi(1,ir), kdmx, vl, nx, beta, psi(1,nvec+ic), kdmx ) 1352 ELSE 1353 ! 1354 ! all other procs receive 1355 ! 1356 CALL mp_bcast( vtmp(:,1:nc), root, ortho_parent_comm ) 1357 CALL ZGEMM( 'N', 'N', kdim, nc, nr, ONE, & 1358 spsi(1,ir), kdmx, vtmp, nx, beta, psi(1,nvec+ic), kdmx ) 1359 END IF 1360 ! 1361 beta = ONE 1362 1363 END DO 1364 ! 1365 END IF 1366 ! 1367 END DO 1368 ! 1369 CALL threaded_memcpy(spsi, psi(1,nvec+1), nvec*npol*npwx*2) 1370 ! 1371 DEALLOCATE( vtmp ) 1372 1373 RETURN 1374 END SUBROUTINE refresh_spsi 1375 ! 1376 ! 1377 ! 1378 SUBROUTINE refresh_hpsi( ) 1379 ! 1380 INTEGER :: ipc, ipr 1381 INTEGER :: nr, nc, ir, ic, root 1382 COMPLEX(DP), ALLOCATABLE :: vtmp( :, : ) 1383 COMPLEX(DP) :: beta 1384 1385 ALLOCATE( vtmp( nx, nx ) ) 1386 ! 1387 DO ipc = 1, idesc(LAX_DESC_NPC) 1388 ! 1389 nc = nrc_ip( ipc ) 1390 ic = irc_ip( ipc ) 1391 ! 1392 IF( ic <= nvec ) THEN 1393 ! 1394 nc = min( nc, nvec - ic + 1 ) 1395 ! 1396 beta = ZERO 1397 ! 1398 DO ipr = 1, idesc(LAX_DESC_NPR) 1399 ! 1400 nr = nrc_ip( ipr ) 1401 ir = irc_ip( ipr ) 1402 ! 1403 root = rank_ip( ipr, ipc ) 1404 1405 IF( ipr-1 == idesc(LAX_DESC_MYR) .AND. ipc-1 == idesc(LAX_DESC_MYC) .AND. la_proc ) THEN 1406 ! 1407 ! this proc sends his block 1408 ! 1409 CALL mp_bcast( vl(:,1:nc), root, ortho_parent_comm ) 1410 CALL ZGEMM( 'N', 'N', kdim, nc, nr, ONE, & 1411 hpsi(1,ir), kdmx, vl, nx, beta, psi(1,nvec+ic), kdmx ) 1412 ELSE 1413 ! 1414 ! all other procs receive 1415 ! 1416 CALL mp_bcast( vtmp(:,1:nc), root, ortho_parent_comm ) 1417 CALL ZGEMM( 'N', 'N', kdim, nc, nr, ONE, & 1418 hpsi(1,ir), kdmx, vtmp, nx, beta, psi(1,nvec+ic), kdmx ) 1419 END IF 1420 ! 1421 beta = ONE 1422 1423 END DO 1424 ! 1425 END IF 1426 ! 1427 END DO 1428 ! 1429 DEALLOCATE( vtmp ) 1430 ! 1431 CALL threaded_memcpy(hpsi, psi(1,nvec+1), nvec*npol*npwx*2) 1432 ! 1433 RETURN 1434 END SUBROUTINE refresh_hpsi 1435 ! 1436 ! 1437 SUBROUTINE compute_distmat( dm, v, w ) 1438 ! 1439 ! This subroutine compute <vi|wj> and store the 1440 ! result in distributed matrix dm 1441 ! 1442 INTEGER :: ipc, ipr 1443 INTEGER :: nr, nc, ir, ic, root 1444 COMPLEX(DP), INTENT(OUT) :: dm( :, : ) 1445 COMPLEX(DP) :: v(:,:), w(:,:) 1446 COMPLEX(DP), ALLOCATABLE :: work( :, : ) 1447 ! 1448 ALLOCATE( work( nx, nx ) ) 1449 ! 1450 work = ZERO 1451 ! 1452 ! Only upper triangle is computed, then the matrix is hermitianized 1453 ! 1454 DO ipc = 1, idesc(LAX_DESC_NPC) ! loop on column procs 1455 ! 1456 nc = nrc_ip( ipc ) 1457 ic = irc_ip( ipc ) 1458 ! 1459 DO ipr = 1, ipc ! idesc(LAX_DESC_NPR) ! ipc ! use symmetry for the loop on row procs 1460 ! 1461 nr = nrc_ip( ipr ) 1462 ir = irc_ip( ipr ) 1463 ! 1464 ! rank of the processor for which this block (ipr,ipc) is destinated 1465 ! 1466 root = rank_ip( ipr, ipc ) 1467 1468 ! use blas subs. on the matrix block 1469 1470 CALL ZGEMM( 'C', 'N', nr, nc, kdim, ONE , & 1471 v(1,ir), kdmx, w(1,ic), kdmx, ZERO, work, nx ) 1472 1473 ! accumulate result on dm of root proc. 1474 1475 CALL mp_root_sum( work, dm, root, ortho_parent_comm ) 1476 1477 END DO 1478 ! 1479 END DO 1480 if (ortho_parent_comm.ne.intra_bgrp_comm .and. nbgrp > 1) dm = dm/nbgrp 1481 ! 1482 ! The matrix is hermitianized using upper triangle 1483 ! 1484 CALL laxlib_zsqmher( nbase, dm, nx, idesc ) 1485 ! 1486 DEALLOCATE( work ) 1487 ! 1488 RETURN 1489 END SUBROUTINE compute_distmat 1490 ! 1491 ! 1492 SUBROUTINE update_distmat( dm, v, w ) 1493 ! 1494 INTEGER :: ipc, ipr 1495 INTEGER :: nr, nc, ir, ic, root, icc, ii 1496 COMPLEX(DP) :: dm( :, : ) 1497 COMPLEX(DP) :: v(:,:), w(:,:) 1498 COMPLEX(DP), ALLOCATABLE :: vtmp( :, : ) 1499 1500 ALLOCATE( vtmp( nx, nx ) ) 1501 ! 1502 vtmp = ZERO 1503 ! 1504 DO ipc = 1, idesc(LAX_DESC_NPC) 1505 ! 1506 nc = nrc_ip( ipc ) 1507 ic = irc_ip( ipc ) 1508 ! 1509 IF( ic+nc-1 >= nb1 ) THEN 1510 ! 1511 nc = MIN( nc, ic+nc-1 - nb1 + 1 ) 1512 IF( ic >= nb1 ) THEN 1513 ii = ic 1514 icc = 1 1515 ELSE 1516 ii = nb1 1517 icc = nb1-ic+1 1518 END IF 1519 ! 1520 ! icc to nc is the local index of the unconverged bands 1521 ! ii is the global index of the first unconverged bands 1522 ! 1523 DO ipr = 1, ipc ! idesc(LAX_DESC_NPR) use symmetry 1524 ! 1525 nr = nrc_ip( ipr ) 1526 ir = irc_ip( ipr ) 1527 ! 1528 root = rank_ip( ipr, ipc ) 1529 1530 CALL ZGEMM( 'C', 'N', nr, nc, kdim, ONE, v(1, ir), & 1531 kdmx, w(1,ii), kdmx, ZERO, vtmp, nx ) 1532 IF (ortho_parent_comm.ne.intra_bgrp_comm .and. nbgrp > 1) vtmp = vtmp/nbgrp 1533 ! 1534 IF( (idesc(LAX_DESC_ACTIVE_NODE) > 0) .AND. & 1535 (ipr-1 == idesc(LAX_DESC_MYR)) .AND. (ipc-1 == idesc(LAX_DESC_MYC)) ) THEN 1536 CALL mp_root_sum( vtmp(:,1:nc), dm(:,icc:icc+nc-1), root, ortho_parent_comm ) 1537 ELSE 1538 CALL mp_root_sum( vtmp(:,1:nc), dm, root, ortho_parent_comm ) 1539 END IF 1540 1541 END DO 1542 ! 1543 END IF 1544 ! 1545 END DO 1546 ! 1547 CALL laxlib_zsqmher( nbase+notcnv, dm, nx, idesc ) 1548 ! 1549 DEALLOCATE( vtmp ) 1550 RETURN 1551 END SUBROUTINE update_distmat 1552 ! 1553 ! 1554 SUBROUTINE set_e_from_h() 1555 INTEGER :: nc, ic, i 1556 e(1:nbase) = 0_DP 1557 IF( idesc(LAX_DESC_MYC) == idesc(LAX_DESC_MYR) .AND. la_proc ) THEN 1558 nc = idesc(LAX_DESC_NC) 1559 ic = idesc(LAX_DESC_IC) 1560 DO i = 1, nc 1561 e( i + ic - 1 ) = REAL( hl( i, i ) ) 1562 END DO 1563 END IF 1564 CALL mp_sum( e(1:nbase), ortho_parent_comm ) 1565 RETURN 1566 END SUBROUTINE set_e_from_h 1567 ! 1568 SUBROUTINE set_h_from_e() 1569 INTEGER :: nc, ic, i 1570 IF( la_proc ) THEN 1571 hl = ZERO 1572 IF( idesc(LAX_DESC_MYC) == idesc(LAX_DESC_MYR) ) THEN 1573 nc = idesc(LAX_DESC_NC) 1574 ic = idesc(LAX_DESC_IC) 1575 DO i = 1, nc 1576 hl(i,i) = CMPLX( e( i + ic - 1 ), 0_DP ,kind=DP) 1577 END DO 1578 END IF 1579 END IF 1580 RETURN 1581 END SUBROUTINE set_h_from_e 1582 ! 1583END SUBROUTINE pcegterg 1584