1!
2! Copyright (C) 2003-2015 Quantum ESPRESSO group
3! This file is distributed under the terms of the
4! GNU General Public License. See the file `License'
5! in the root directory of the present distribution,
6! or http://www.gnu.org/copyleft/gpl.txt .
7!
8#define ZERO ( 0.D0, 0.D0 )
9#define ONE  ( 1.D0, 0.D0 )
10!
11!
12!----------------------------------------------------------------------------
13SUBROUTINE regterg(  h_psi, s_psi, uspp, g_psi, &
14                    npw, npwx, nvec, nvecx, evc, ethr, &
15                    e, btype, notcnv, lrot, dav_iter, nhpsi )
16  !----------------------------------------------------------------------------
17  !
18  ! ... iterative solution of the eigenvalue problem:
19  !
20  ! ... ( H - e S ) * evc = 0
21  !
22  ! ... where H is an hermitean operator, e is a real scalar,
23  ! ... S is an uspp matrix, evc is a complex vector
24  ! ... (real wavefunctions with only half plane waves stored)
25
26  USE util_param,    ONLY : DP, stdout
27  USE mp_bands_util, ONLY : intra_bgrp_comm, inter_bgrp_comm, root_bgrp_id, &
28          nbgrp, my_bgrp_id, me_bgrp, root_bgrp
29  USE mp_bands_util, ONLY : gstart
30  USE mp,            ONLY : mp_sum, mp_bcast
31  !
32  IMPLICIT NONE
33  !
34  include 'laxlib.fh'
35  !
36  INTEGER, INTENT(IN) :: npw, npwx, nvec, nvecx
37    ! dimension of the matrix to be diagonalized
38    ! leading dimension of matrix evc, as declared in the calling pgm unit
39    ! integer number of searched low-lying roots
40    ! maximum dimension of the reduced basis set
41    !    (the basis set is refreshed when its dimension would exceed nvecx)
42  COMPLEX(DP), INTENT(INOUT) :: evc(npwx,nvec)
43    !  evc   contains the  refined estimates of the eigenvectors
44  REAL(DP), INTENT(IN) :: ethr
45    ! energy threshold for convergence: root improvement is stopped,
46    ! when two consecutive estimates of the root differ by less than ethr.
47  LOGICAL, INTENT(IN) :: uspp
48    ! if .FALSE. : S|psi> not needed
49  INTEGER, INTENT(IN) :: btype(nvec)
50    ! band type ( 1 = occupied, 0 = empty )
51  LOGICAL, INTENT(IN) :: lrot
52    ! .TRUE. if the wfc have already been rotated
53  REAL(DP), INTENT(OUT) :: e(nvec)
54    ! contains the estimated roots.
55  INTEGER, INTENT(OUT) :: dav_iter, notcnv
56    ! integer  number of iterations performed
57    ! number of unconverged roots
58  INTEGER, INTENT(OUT) :: nhpsi
59    ! number of individual Hpsi made
60  !
61  ! ... LOCAL variables
62  !
63  INTEGER, PARAMETER :: maxter = 20
64    ! maximum number of iterations
65  !
66  INTEGER :: kter, nbase, np, npw2, npwx2, n, m, nb1, nbn
67    ! counter on iterations
68    ! dimension of the reduced basis
69    ! counter on the reduced basis vectors
70    ! do-loop counters
71    ! counter on the bands
72  INTEGER :: n_start, n_end, my_n
73  INTEGER :: ierr
74  REAL(DP), ALLOCATABLE :: hr(:,:), sr(:,:), vr(:,:), ew(:)
75    ! Hamiltonian on the reduced basis
76    ! S matrix on the reduced basis
77    ! eigenvectors of the Hamiltonian
78    ! eigenvalues of the reduced hamiltonian
79  COMPLEX(DP), ALLOCATABLE :: psi(:,:), hpsi(:,:), spsi(:,:)
80    ! work space, contains psi
81    ! the product of H and psi
82    ! the product of S and psi
83  LOGICAL, ALLOCATABLE :: conv(:)
84    ! true if the root is converged
85  REAL(DP) :: empty_ethr
86    ! threshold for empty bands
87  !
88  REAL(DP), EXTERNAL :: ddot
89  !
90  EXTERNAL  h_psi, s_psi, g_psi
91    ! h_psi(npwx,npw,nvec,psi,hpsi)
92    !     calculates H|psi>
93    ! s_psi(npwx,npw,nvec,psi,spsi)
94    !     calculates S|psi> (if needed)
95    !     Vectors psi,hpsi,spsi are dimensioned (npwx,nvec)
96    ! g_psi(npwx,npw,notcnv,psi,e)
97    !    calculates (diag(h)-e)^-1 * psi, diagonal approx. to (h-e)^-1*psi
98    !    the first nvec columns contain the trial eigenvectors
99  !
100  CALL start_clock( 'regterg' ) !; write(6,*) 'enter regterg' ; FLUSH(6)
101  !
102  IF ( nvec > nvecx / 2 ) CALL errore( 'regter', 'nvecx is too small', 1 )
103  !
104  IF ( gstart == -1 ) CALL errore( 'regter', 'gstart variable not initialized', 1 )
105  !
106  ! ... threshold for empty bands
107  !
108  empty_ethr = MAX( ( ethr * 5.D0 ), 1.D-5 )
109  !
110  npw2  = 2*npw
111  npwx2  = 2*npwx
112  !
113  ALLOCATE( psi(  npwx, nvecx ), STAT=ierr )
114  IF( ierr /= 0 ) &
115     CALL errore( 'regterg ',' cannot allocate psi ', ABS(ierr) )
116  ALLOCATE( hpsi( npwx, nvecx ), STAT=ierr )
117  IF( ierr /= 0 ) &
118     CALL errore( 'regterg ',' cannot allocate hpsi ', ABS(ierr) )
119  !
120  IF ( uspp ) THEN
121     ALLOCATE( spsi( npwx, nvecx ), STAT=ierr )
122     IF( ierr /= 0 ) &
123        CALL errore( ' regterg ',' cannot allocate spsi ', ABS(ierr) )
124  END IF
125  !
126  ALLOCATE( sr( nvecx, nvecx ), STAT=ierr )
127  IF( ierr /= 0 ) &
128     CALL errore( 'regterg ',' cannot allocate sr ', ABS(ierr) )
129  ALLOCATE( hr( nvecx, nvecx ), STAT=ierr )
130  IF( ierr /= 0 ) &
131     CALL errore( 'regterg ',' cannot allocate hr ', ABS(ierr) )
132  ALLOCATE( vr( nvecx, nvecx ), STAT=ierr )
133  IF( ierr /= 0 ) &
134     CALL errore( 'regterg ',' cannot allocate vr ', ABS(ierr) )
135  ALLOCATE( ew( nvecx ), STAT=ierr )
136  IF( ierr /= 0 ) &
137     CALL errore( 'regterg ',' cannot allocate ew ', ABS(ierr) )
138  ALLOCATE( conv( nvec ), STAT=ierr )
139  IF( ierr /= 0 ) &
140     CALL errore( 'regterg ',' cannot allocate conv ', ABS(ierr) )
141  !
142  notcnv = nvec
143  nbase  = nvec
144  conv   = .FALSE.
145  !
146  IF ( uspp ) spsi = ZERO
147  !
148  hpsi = ZERO
149  psi  = ZERO
150  psi(:,1:nvec) = evc(:,1:nvec)
151  ! ... set Im[ psi(G=0) ] -  needed for numerical stability
152  IF ( gstart == 2 ) psi(1,1:nvec) = CMPLX( DBLE( psi(1,1:nvec) ), 0.D0 ,kind=DP)
153  !
154  ! ... hpsi contains h times the basis vectors
155  !
156  CALL h_psi( npwx, npw, nvec, psi, hpsi )  ; nhpsi = nvec
157  !
158  ! ... spsi contains s times the basis vectors
159  !
160  IF ( uspp ) CALL s_psi( npwx, npw, nvec, psi, spsi )
161  !
162  ! ... hr contains the projection of the hamiltonian onto the reduced
163  ! ... space vr contains the eigenvectors of hr
164  !
165  CALL start_clock( 'regterg:init' )
166  hr(:,:) = 0.D0
167  sr(:,:) = 0.D0
168  vr(:,:) = 0.D0
169  !
170  CALL divide(inter_bgrp_comm,nbase,n_start,n_end)
171  my_n = n_end - n_start + 1; !write (*,*) nbase,n_start,n_end
172  if (n_start .le. n_end) &
173  CALL DGEMM( 'T','N', nbase, my_n, npw2, 2.D0 , psi, npwx2, hpsi(1,n_start), npwx2, 0.D0, hr(1,n_start), nvecx )
174  IF ( gstart == 2 ) CALL DGER( nbase, my_n, -1.D0, psi, npwx2, hpsi(1,n_start), npwx2, hr(1,n_start), nvecx )
175  CALL mp_sum( hr( :, 1:nbase ), inter_bgrp_comm )
176  !
177  CALL mp_sum( hr( :, 1:nbase ), intra_bgrp_comm )
178  !
179  IF ( uspp ) THEN
180     !
181     if (n_start .le. n_end) &
182     CALL DGEMM( 'T','N', nbase, my_n, npw2, 2.D0, psi, npwx2, spsi(1,n_start), npwx2, 0.D0, sr(1,n_start), nvecx )
183     IF ( gstart == 2 ) CALL DGER( nbase, my_n, -1.D0, psi, npwx2, spsi(1,n_start), npwx2, sr(1,n_start), nvecx )
184     !
185  ELSE
186     !
187     if (n_start .le. n_end) &
188     CALL DGEMM( 'T','N', nbase, my_n, npw2, 2.D0, psi, npwx2, psi(1,n_start), npwx2, 0.D0, sr(1,n_start), nvecx )
189     IF ( gstart == 2 ) CALL DGER( nbase, my_n, -1.D0, psi, npwx2, psi(1,n_start), npwx2, sr(1,n_start), nvecx )
190     !
191  END IF
192  CALL mp_sum( sr( :, 1:nbase ), inter_bgrp_comm )
193  !
194  CALL mp_sum( sr( :, 1:nbase ), intra_bgrp_comm )
195  CALL stop_clock( 'regterg:init' )
196  !
197  IF ( lrot ) THEN
198     !
199     DO n = 1, nbase
200        !
201        e(n) = hr(n,n)
202        vr(n,n) = 1.D0
203        !
204     END DO
205     !
206  ELSE
207     !
208     ! ... diagonalize the reduced hamiltonian
209     !
210     CALL start_clock( 'regterg:diag' )
211     IF( my_bgrp_id == root_bgrp_id ) THEN
212        CALL diaghg( nbase, nvec, hr, sr, nvecx, ew, vr, me_bgrp, root_bgrp, intra_bgrp_comm )
213     END IF
214     IF( nbgrp > 1 ) THEN
215        CALL mp_bcast( vr, root_bgrp_id, inter_bgrp_comm )
216        CALL mp_bcast( ew, root_bgrp_id, inter_bgrp_comm )
217     ENDIF
218     CALL stop_clock( 'regterg:diag' )
219     !
220     e(1:nvec) = ew(1:nvec)
221     !
222  END IF
223  !
224  ! ... iterate
225  !
226  iterate: DO kter = 1, maxter
227     !
228     dav_iter = kter ; !write(*,*) kter, notcnv, conv
229     !
230     CALL start_clock( 'regterg:update' )
231     !
232     np = 0
233     !
234     DO n = 1, nvec
235        !
236        IF ( .NOT. conv(n) ) THEN
237           !
238           ! ... this root not yet converged ...
239           !
240           np = np + 1
241           !
242           ! ... reorder eigenvectors so that coefficients for unconverged
243           ! ... roots come first. This allows to use quick matrix-matrix
244           ! ... multiplications to set a new basis vector (see below)
245           !
246           IF ( np /= n ) vr(:,np) = vr(:,n)
247           !
248           ! ... for use in g_psi
249           !
250           ew(nbase+np) = e(n)
251           !
252        END IF
253        !
254     END DO
255     !
256     nb1 = nbase + 1
257     !
258     ! ... expand the basis set with new basis vectors ( H - e*S )|psi> ...
259     !
260     CALL divide(inter_bgrp_comm,nbase,n_start,n_end)
261     my_n = n_end - n_start + 1; !write (*,*) nbase,n_start,n_end
262     psi(:,nb1:nbase+notcnv)=ZERO
263     IF ( uspp ) THEN
264        !
265        if (n_start .le. n_end) &
266        CALL DGEMM( 'N','N', npw2, notcnv, my_n, 1.D0, spsi(1,n_start), npwx2, vr(n_start,1), nvecx, 0.D0, psi(1,nb1), npwx2 )
267        !
268     ELSE
269        !
270        if (n_start .le. n_end) &
271        CALL DGEMM( 'N','N', npw2, notcnv, my_n, 1.D0, psi(1,n_start), npwx2, vr(n_start,1), nvecx, 0.D0, psi(1,nb1), npwx2 )
272        !
273     END IF
274! NB: must not call mp_sum over inter_bgrp_comm here because it is done later to the full correction
275     !
276     DO np = 1, notcnv
277        !
278        psi(:,nbase+np) = - ew(nbase+np) * psi(:,nbase+np)
279        !
280     END DO
281     !
282     if (n_start .le. n_end) &
283     CALL DGEMM( 'N','N', npw2, notcnv, my_n, 1.D0, hpsi(1,n_start), npwx2, vr(n_start,1), nvecx, 1.D0, psi(1,nb1), npwx2 )
284     CALL mp_sum( psi(:,nb1:nbase+notcnv), inter_bgrp_comm )
285     !
286     CALL stop_clock( 'regterg:update' )
287     !
288     ! ... approximate inverse iteration
289     !
290     CALL g_psi( npwx, npw, notcnv, 1, psi(1,nb1), ew(nb1) )
291     !
292     ! ... "normalize" correction vectors psi(:,nb1:nbase+notcnv) in
293     ! ... order to improve numerical stability of subspace diagonalization
294     ! ... (rdiaghg) ew is used as work array :
295     !
296     ! ...         ew = <psi_i|psi_i>,  i = nbase + 1, nbase + notcnv
297     !
298     DO n = 1, notcnv
299        !
300        nbn = nbase + n
301        !
302        ew(n) = 2.D0 * ddot( npw2, psi(1,nbn), 1, psi(1,nbn), 1 )
303        IF ( gstart == 2 ) ew(n) = ew(n) - psi(1,nbn) * psi(1,nbn)
304        !
305     END DO
306     !
307     CALL mp_sum( ew( 1:notcnv ), intra_bgrp_comm )
308     !
309     DO n = 1, notcnv
310        !
311        psi(:,nbase+n) = psi(:,nbase+n) / SQRT( ew(n) )
312        ! ... set Im[ psi(G=0) ] -  needed for numerical stability
313        IF ( gstart == 2 ) psi(1,nbase+n) = CMPLX( DBLE(psi(1,nbase+n)), 0.D0 ,kind=DP)
314        !
315     END DO
316     !
317     ! ... here compute the hpsi and spsi of the new functions
318     !
319     CALL h_psi( npwx, npw, notcnv, psi(1,nb1), hpsi(1,nb1) ) ; nhpsi = nhpsi + notcnv
320     !
321     IF ( uspp ) CALL s_psi( npwx, npw, notcnv, psi(1,nb1), spsi(1,nb1) )
322     !
323     ! ... update the reduced hamiltonian
324     !
325     CALL start_clock( 'regterg:overlap' )
326     !
327     hr( :, nb1:nb1+notcnv-1 )=0.d0
328     CALL divide(inter_bgrp_comm,nbase+notcnv,n_start,n_end)
329     my_n = n_end - n_start + 1; !write (*,*) nbase+notcnv,n_start,n_end
330     CALL DGEMM( 'T','N', my_n, notcnv, npw2, 2.D0, psi(1,n_start), npwx2, hpsi(1,nb1), npwx2, 0.D0, hr(n_start,nb1), nvecx )
331     IF ( gstart == 2 ) CALL DGER( my_n, notcnv, -1.D0, psi(1,n_start), npwx2, hpsi(1,nb1), npwx2, hr(n_start,nb1), nvecx )
332     CALL mp_sum( hr( :, nb1:nb1+notcnv-1 ), inter_bgrp_comm )
333     !
334     CALL mp_sum( hr( :, nb1:nb1+notcnv-1 ), intra_bgrp_comm )
335     !
336     sr( :, nb1:nb1+notcnv-1 )=0.d0
337     CALL divide(inter_bgrp_comm,nbase+notcnv,n_start,n_end)
338     my_n = n_end - n_start + 1; !write (*,*) nbase+notcnv,n_start,n_end
339     IF ( uspp ) THEN
340        !
341        CALL DGEMM( 'T','N', my_n, notcnv, npw2, 2.D0, psi(1,n_start), npwx2, spsi(1,nb1), npwx2, 0.D0, sr(n_start,nb1), nvecx )
342        IF ( gstart == 2 ) CALL DGER( my_n, notcnv, -1.D0, psi(1,n_start), npwx2, spsi(1,nb1), npwx2, sr(n_start,nb1), nvecx )
343        !
344     ELSE
345        !
346        CALL DGEMM( 'T','N', my_n, notcnv, npw2, 2.D0, psi(1,n_start), npwx2, psi(1,nb1), npwx2, 0.D0, sr(n_start,nb1) , nvecx )
347        IF ( gstart == 2 ) CALL DGER( my_n, notcnv, -1.D0, psi(1,n_start), npwx2, psi(1,nb1), npwx2, sr(n_start,nb1), nvecx )
348        !
349     END IF
350     CALL mp_sum( sr( :, nb1:nb1+notcnv-1 ), inter_bgrp_comm )
351     !
352     CALL mp_sum( sr( :, nb1:nb1+notcnv-1 ), intra_bgrp_comm  )
353     !
354     CALL stop_clock( 'regterg:overlap' )
355     !
356     nbase = nbase + notcnv
357     !
358     DO n = 1, nbase
359        !
360        DO m = n + 1, nbase
361           !
362           hr(m,n) = hr(n,m)
363           sr(m,n) = sr(n,m)
364           !
365        END DO
366        !
367     END DO
368     !
369     ! ... diagonalize the reduced hamiltonian
370     !
371     CALL start_clock( 'regterg:diag' )
372     IF( my_bgrp_id == root_bgrp_id ) THEN
373        CALL diaghg( nbase, nvec, hr, sr, nvecx, ew, vr, me_bgrp, root_bgrp, intra_bgrp_comm )
374     END IF
375     IF( nbgrp > 1 ) THEN
376        CALL mp_bcast( vr, root_bgrp_id, inter_bgrp_comm )
377        CALL mp_bcast( ew, root_bgrp_id, inter_bgrp_comm )
378     ENDIF
379     CALL stop_clock( 'regterg:diag' )
380     !
381     ! ... test for convergence
382     !
383     WHERE( btype(1:nvec) == 1 )
384        !
385        conv(1:nvec) = ( ( ABS( ew(1:nvec) - e(1:nvec) ) < ethr ) )
386        !
387     ELSEWHERE
388        !
389        conv(1:nvec) = ( ( ABS( ew(1:nvec) - e(1:nvec) ) < empty_ethr ) )
390        !
391     END WHERE
392     ! ... next line useful for band parallelization of exact exchange
393     IF ( nbgrp > 1 ) CALL mp_bcast(conv,root_bgrp_id,inter_bgrp_comm)
394     !
395     notcnv = COUNT( .NOT. conv(:) )
396     !
397     e(1:nvec) = ew(1:nvec)
398     !
399     ! ... if overall convergence has been achieved, or the dimension of
400     ! ... the reduced basis set is becoming too large, or in any case if
401     ! ... we are at the last iteration refresh the basis set. i.e. replace
402     ! ... the first nvec elements with the current estimate of the
403     ! ... eigenvectors;  set the basis dimension to nvec.
404     !
405     IF ( notcnv == 0 .OR. &
406          nbase+notcnv > nvecx .OR. dav_iter == maxter ) THEN
407        !
408        CALL start_clock( 'regterg:last' )
409        !
410        evc = ZERO
411        CALL divide(inter_bgrp_comm,nbase,n_start,n_end)
412        my_n = n_end - n_start + 1; !write (*,*) nbase,n_start,n_end
413        CALL DGEMM( 'N','N', npw2, nvec, my_n, 1.D0, psi(1,n_start), npwx2, vr(n_start,1), nvecx, 0.D0, evc, npwx2 )
414        CALL mp_sum( evc, inter_bgrp_comm )
415        !
416        IF ( notcnv == 0 ) THEN
417           !
418           ! ... all roots converged: return
419           !
420           CALL stop_clock( 'regterg:last' )
421           !
422           EXIT iterate
423           !
424        ELSE IF ( dav_iter == maxter ) THEN
425           !
426           ! ... last iteration, some roots not converged: return
427           !
428           !!!WRITE( stdout, '(5X,"WARNING: ",I5, &
429           !!!     &   " eigenvalues not converged in regterg")' ) notcnv
430           !
431           CALL stop_clock( 'regterg:last' )
432           !
433           EXIT iterate
434           !
435        END IF
436        !
437        ! ... refresh psi, H*psi and S*psi
438        !
439        psi(:,1:nvec) = evc(:,1:nvec)
440        !
441        IF ( uspp ) THEN
442           !
443           psi(:,nvec+1:nvec+nvec) = ZERO
444           CALL DGEMM( 'N','N', npw2, nvec, my_n, 1.D0, spsi(1,n_start), npwx2, vr(n_start,1), nvecx, 0.D0, psi(1,nvec+1), npwx2 )
445           CALL mp_sum( psi(:,nvec+1:nvec+nvec), inter_bgrp_comm )
446           !
447           spsi(:,1:nvec) = psi(:,nvec+1:nvec+nvec)
448           !
449        END IF
450        !
451        psi(:,nvec+1:nvec+nvec) = ZERO
452        CALL DGEMM( 'N','N', npw2, nvec, my_n, 1.D0, hpsi(1,n_start), npwx2, vr(n_start,1), nvecx, 0.D0, psi(1,nvec+1), npwx2 )
453        CALL mp_sum( psi(:,nvec+1:nvec+nvec), inter_bgrp_comm )
454        !
455        hpsi(:,1:nvec) = psi(:,nvec+1:nvec+nvec)
456        !
457        ! ... refresh the reduced hamiltonian
458        !
459        nbase = nvec
460        !
461        hr(:,1:nbase) = 0.D0
462        sr(:,1:nbase) = 0.D0
463        vr(:,1:nbase) = 0.D0
464        !
465        DO n = 1, nbase
466           !
467           hr(n,n) = e(n)
468           sr(n,n) = 1.D0
469           vr(n,n) = 1.D0
470           !
471        END DO
472        !
473        CALL stop_clock( 'regterg:last' )
474        !
475     END IF
476     !
477  END DO iterate
478  !
479  DEALLOCATE( conv )
480  DEALLOCATE( ew )
481  DEALLOCATE( vr )
482  DEALLOCATE( hr )
483  DEALLOCATE( sr )
484  !
485  IF ( uspp ) DEALLOCATE( spsi )
486  !
487  DEALLOCATE( hpsi )
488  DEALLOCATE( psi )
489  !
490  CALL stop_clock( 'regterg' )
491  !call print_clock( 'regterg' )
492  !call print_clock( 'regterg:init' )
493  !call print_clock( 'regterg:diag' )
494  !call print_clock( 'regterg:update' )
495  !call print_clock( 'regterg:overlap' )
496  !call print_clock( 'regterg:last' )
497  !
498  RETURN
499  !
500END SUBROUTINE regterg
501!
502!
503!  Subroutine with distributed matrixes
504!  (written by Carlo Cavazzoni)
505!
506!----------------------------------------------------------------------------
507SUBROUTINE pregterg(h_psi, s_psi, uspp, g_psi, &
508                    npw, npwx, nvec, nvecx, evc, ethr, &
509                    e, btype, notcnv, lrot, dav_iter, nhpsi )
510  !----------------------------------------------------------------------------
511  !
512  ! ... iterative solution of the eigenvalue problem:
513  !
514  ! ... ( H - e S ) * evc = 0
515  !
516  ! ... where H is an hermitean operator, e is a real scalar,
517  ! ... S is an uspp matrix, evc is a complex vector
518  ! ... (real wavefunctions with only half plane waves stored)
519  !
520  USE util_param,        ONLY : DP, stdout
521  USE mp_bands_util,     ONLY : intra_bgrp_comm, inter_bgrp_comm, root_bgrp_id, nbgrp, my_bgrp_id
522  USE mp_bands_util,     ONLY : gstart
523  USE mp,                ONLY : mp_bcast, mp_root_sum, mp_sum
524  !
525  IMPLICIT NONE
526  !
527  include 'laxlib.fh'
528  !
529  INTEGER, INTENT(IN) :: npw, npwx, nvec, nvecx
530    ! dimension of the matrix to be diagonalized
531    ! leading dimension of matrix evc, as declared in the calling pgm unit
532    ! integer number of searched low-lying roots
533    ! maximum dimension of the reduced basis set
534    !    (the basis set is refreshed when its dimension would exceed nvecx)
535  COMPLEX(DP), INTENT(INOUT) :: evc(npwx,nvec)
536    !  evc   contains the  refined estimates of the eigenvectors
537  REAL(DP), INTENT(IN) :: ethr
538    ! energy threshold for convergence: root improvement is stopped,
539    ! when two consecutive estimates of the root differ by less than ethr.
540  LOGICAL, INTENT(IN) :: uspp
541    ! if .FALSE. : S|psi> not needed
542  INTEGER, INTENT(IN) :: btype(nvec)
543    ! band type ( 1 = occupied, 0 = empty )
544  LOGICAL, INTENT(IN) :: lrot
545    ! .TRUE. if the wfc have already be rotated
546  REAL(DP), INTENT(OUT) :: e(nvec)
547    ! contains the estimated roots.
548  INTEGER, INTENT(OUT) :: dav_iter, notcnv
549    ! integer  number of iterations performed
550    ! number of unconverged roots
551  INTEGER, INTENT(OUT) :: nhpsi
552    ! number of individual Hpsi made
553  !
554  ! ... LOCAL variables
555  !
556  INTEGER, PARAMETER :: maxter = 20
557    ! maximum number of iterations
558  !
559  INTEGER :: kter, nbase, np, n, m, nb1
560    ! counter on iterations
561    ! dimension of the reduced basis
562    ! counter on the reduced basis vectors
563    ! do-loop counters
564  INTEGER :: ierr
565  REAL(DP), ALLOCATABLE :: ew(:)
566  REAL(DP), ALLOCATABLE :: hl(:,:), sl(:,:), vl(:,:)
567    ! Hamiltonian on the reduced basis
568    ! S matrix on the reduced basis
569    ! eigenvectors of the Hamiltonian
570    ! eigenvalues of the reduced hamiltonian
571  COMPLEX(DP), ALLOCATABLE :: psi(:,:), hpsi(:,:), spsi(:,:)
572    ! work space, contains psi
573    ! the product of H and psi
574    ! the product of S and psi
575  LOGICAL, ALLOCATABLE :: conv(:)
576    ! true if the root is converged
577  REAL(DP) :: empty_ethr
578    ! threshold for empty bands
579  INTEGER :: npw2, npwx2
580  INTEGER :: idesc(LAX_DESC_SIZE), idesc_old(LAX_DESC_SIZE)
581  INTEGER, ALLOCATABLE :: irc_ip( : )
582  INTEGER, ALLOCATABLE :: nrc_ip( : )
583  INTEGER, ALLOCATABLE :: rank_ip( :, : )
584    ! matrix distribution descriptors
585  INTEGER :: nx
586    ! maximum local block dimension
587  LOGICAL :: la_proc
588    ! flag to distinguish procs involved in linear algebra
589  INTEGER, ALLOCATABLE :: notcnv_ip( : )
590  INTEGER, ALLOCATABLE :: ic_notcnv( : )
591  !
592  INTEGER :: np_ortho(2), ortho_parent_comm
593  LOGICAL :: do_distr_diag_inside_bgrp
594  !
595  REAL(DP), EXTERNAL :: ddot
596  !
597  EXTERNAL  h_psi, s_psi, g_psi
598    ! h_psi(npwx,npw,nvec,psi,hpsi)
599    !     calculates H|psi>
600    ! s_psi(npwx,npw,nvec,psi,spsi)
601    !     calculates S|psi> (if needed)
602    !     Vectors psi,hpsi,spsi are dimensioned (npwx,nvec)
603    ! g_psi(npwx,npw,notcnv,psi,e)
604    !    calculates (diag(h)-e)^-1 * psi, diagonal approx. to (h-e)^-1*psi
605    !    the first nvec columns contain the trial eigenvectors
606  !
607  !
608  CALL start_clock( 'regterg' ) !; write(6,*) 'enter pregterg' ; FLUSH(6)
609  !
610  CALL laxlib_getval( np_ortho = np_ortho, ortho_parent_comm = ortho_parent_comm, &
611    do_distr_diag_inside_bgrp = do_distr_diag_inside_bgrp )
612  !
613  IF ( nvec > nvecx / 2 ) CALL errore( 'pregter', 'nvecx is too small', 1 )
614  !
615  IF ( gstart == -1 ) CALL errore( 'pregter', 'gstart variable not initialized', 1 )
616  !
617  ! ... threshold for empty bands
618  !
619  empty_ethr = MAX( ( ethr * 5.D0 ), 1.D-5 )
620  !
621  ALLOCATE( psi(  npwx, nvecx ), STAT=ierr )
622  IF( ierr /= 0 ) &
623     CALL errore( 'pregterg ',' cannot allocate psi ', ABS(ierr) )
624  !
625  ALLOCATE( hpsi( npwx, nvecx ), STAT=ierr )
626  IF( ierr /= 0 ) &
627     CALL errore( 'pregterg ',' cannot allocate hpsi ', ABS(ierr) )
628  !
629  IF ( uspp ) THEN
630     ALLOCATE( spsi( npwx, nvecx ), STAT=ierr )
631     IF( ierr /= 0 ) &
632        CALL errore( 'pregterg ',' cannot allocate spsi ', ABS(ierr) )
633  END IF
634  !
635  ! ... Initialize the matrix descriptor
636  !
637  ALLOCATE( ic_notcnv( np_ortho(2) ), STAT=ierr )
638  IF( ierr /= 0 ) &
639     CALL errore( 'pregterg ',' cannot allocate ic_notcnv ', ABS(ierr) )
640  !
641  ALLOCATE( notcnv_ip( np_ortho(2) ), STAT=ierr )
642  IF( ierr /= 0 ) &
643     CALL errore( 'pregterg ',' cannot allocate notcnv_ip ', ABS(ierr) )
644  !
645  CALL desc_init( nvec, nx, la_proc, idesc, rank_ip, irc_ip, nrc_ip  )
646  !
647  IF( la_proc ) THEN
648     !
649     ! only procs involved in the diagonalization need to allocate local
650     ! matrix block.
651     !
652     ALLOCATE( vl( nx , nx ), STAT=ierr )
653     IF( ierr /= 0 ) &
654        CALL errore( 'pregterg ',' cannot allocate vl ', ABS(ierr) )
655     !
656     ALLOCATE( sl( nx , nx ), STAT=ierr )
657     IF( ierr /= 0 ) &
658        CALL errore( 'pregterg ',' cannot allocate sl ', ABS(ierr) )
659     !
660     ALLOCATE( hl( nx , nx ), STAT=ierr )
661     IF( ierr /= 0 ) &
662        CALL errore( 'pregterg ',' cannot allocate hl ', ABS(ierr) )
663     !
664  ELSE
665     !
666     ALLOCATE( vl( 1 , 1 ), STAT=ierr )
667     IF( ierr /= 0 ) &
668        CALL errore( 'pregterg ',' cannot allocate vl ', ABS(ierr) )
669     !
670     ALLOCATE( sl( 1 , 1 ), STAT=ierr )
671     IF( ierr /= 0 ) &
672        CALL errore( 'pregterg ',' cannot allocate sl ', ABS(ierr) )
673     !
674     ALLOCATE( hl( 1 , 1 ), STAT=ierr )
675     IF( ierr /= 0 ) &
676        CALL errore( 'pregterg ',' cannot allocate hl ', ABS(ierr) )
677     !
678  END IF
679  !
680  ALLOCATE( ew( nvecx ), STAT=ierr )
681  IF( ierr /= 0 ) &
682     CALL errore( 'pregterg ',' cannot allocate ew ', ABS(ierr) )
683  !
684  ALLOCATE( conv( nvec ), STAT=ierr )
685  IF( ierr /= 0 ) &
686     CALL errore( 'pregterg ',' cannot allocate conv ', ABS(ierr) )
687  !
688  npw2  = 2*npw
689  npwx2  = 2*npwx
690  notcnv = nvec
691  nbase  = nvec
692  conv   = .FALSE.
693  !
694  IF ( uspp ) spsi = ZERO
695  !
696  hpsi = ZERO
697  psi  = ZERO
698  psi(:,1:nvec) = evc(:,1:nvec)
699  ! ... set Im[ psi(G=0) ] -  needed for numerical stability
700  IF ( gstart == 2 ) psi(1,1:nvec) = CMPLX( DBLE( psi(1,1:nvec) ), 0.D0 ,kind=DP)
701  !
702  ! ... hpsi contains h times the basis vectors
703  !
704  CALL h_psi( npwx, npw, nvec, psi, hpsi )  ; nhpsi = nvec
705  !
706  IF ( uspp ) CALL s_psi( npwx, npw, nvec, psi, spsi )
707  !
708  ! ... hl contains the projection of the hamiltonian onto the reduced
709  ! ... space, vl contains the eigenvectors of hl. Remember hl, vl and sl
710  ! ... are all distributed across processors, global replicated matrixes
711  ! ... here are never allocated
712  !
713  CALL start_clock( 'regterg:init' )
714
715  CALL compute_distmat( hl, psi, hpsi )
716  !
717  IF ( uspp ) THEN
718     !
719     CALL compute_distmat( sl, psi, spsi )
720     !
721  ELSE
722     !
723     CALL compute_distmat( sl, psi, psi )
724     !
725  END IF
726  CALL stop_clock( 'regterg:init' )
727  !
728  IF ( lrot ) THEN
729     !
730     CALL set_e_from_h()
731     !
732     CALL set_to_identity( vl, idesc )
733     !
734  ELSE
735     !
736     ! ... diagonalize the reduced hamiltonian
737     !     Calling block parallel algorithm
738     !
739     CALL start_clock( 'regterg:diag' )
740     IF ( do_distr_diag_inside_bgrp ) THEN ! NB on output of pdiaghg ew and vl are the same across ortho_parent_comm
741        ! only the first bgrp performs the diagonalization
742        IF( my_bgrp_id == root_bgrp_id ) CALL pdiaghg( nbase, hl, sl, nx, ew, vl, idesc )
743        IF( nbgrp > 1 ) THEN ! results must be brodcast to the other band groups
744           CALL mp_bcast( vl, root_bgrp_id, inter_bgrp_comm )
745           CALL mp_bcast( ew, root_bgrp_id, inter_bgrp_comm )
746        ENDIF
747     ELSE
748        CALL pdiaghg( nbase, hl, sl, nx, ew, vl, idesc )
749     END IF
750     CALL stop_clock( 'regterg:diag' )
751     !
752     e(1:nvec) = ew(1:nvec)
753     !
754  END IF
755  !
756  ! ... iterate
757  !
758  iterate: DO kter = 1, maxter
759     !
760     dav_iter = kter ; !write(*,*) kter, notcnv, conv
761     !
762     CALL start_clock( 'regterg:update' )
763     !
764     CALL reorder_v()
765     !
766     nb1 = nbase + 1
767     !
768     ! ... expand the basis set with new basis vectors ( H - e*S )|psi> ...
769     !
770     CALL hpsi_dot_v()
771     !
772     CALL stop_clock( 'regterg:update' )
773     !
774     ! ... approximate inverse iteration
775     !
776     CALL g_psi( npwx, npw, notcnv, 1, psi(1,nb1), ew(nb1) )
777     !
778     ! ... "normalize" correction vectors psi(:,nb1:nbase+notcnv) in
779     ! ... order to improve numerical stability of subspace diagonalization
780     ! ... (rdiaghg) ew is used as work array :
781     !
782     ! ...         ew = <psi_i|psi_i>,  i = nbase + 1, nbase + notcnv
783     !
784     DO n = 1, notcnv
785        !
786        ew(n) = 2.D0 * ddot( npw2, psi(1,nbase+n), 1, psi(1,nbase+n), 1 )
787        !
788        IF ( gstart == 2 ) ew(n) = ew(n) - psi(1,nbase+n) * psi(1,nbase+n)
789        !
790     END DO
791     !
792     CALL mp_sum( ew( 1:notcnv ), intra_bgrp_comm )
793     !
794     DO n = 1, notcnv
795        !
796        psi(:,nbase+n) = psi(:,nbase+n) / SQRT( ew(n) )
797        ! ... set Im[ psi(G=0) ] -  needed for numerical stability
798        IF ( gstart == 2 ) psi(1,nbase+n) = CMPLX( DBLE(psi(1,nbase+n)), 0.D0 ,kind=DP)
799        !
800     END DO
801     !
802     ! ... here compute the hpsi and spsi of the new functions
803     !
804     CALL h_psi( npwx, npw, notcnv, psi(1,nb1), hpsi(1,nb1) ) ; nhpsi = nhpsi + notcnv
805     !
806     IF ( uspp ) CALL s_psi( npwx, npw, notcnv, psi(1,nb1), spsi(1,nb1) )
807     !
808     ! ... update the reduced hamiltonian
809     !
810     CALL start_clock( 'regterg:overlap' )
811     !
812     ! we need to save the old descriptor in order to redistribute matrices
813     !
814     idesc_old = idesc
815     !
816     ! ... RE-Initialize the matrix descriptor
817     !
818     CALL desc_init( nbase+notcnv, nx, la_proc, idesc, rank_ip, irc_ip, nrc_ip  )
819     !
820     IF( la_proc ) THEN
821
822        !  redistribute hl and sl (see dsqmred), since the dimension of the subspace has changed
823        !
824        vl = hl
825        DEALLOCATE( hl )
826        ALLOCATE( hl( nx , nx ), STAT=ierr )
827        IF( ierr /= 0 ) &
828           CALL errore( 'pregterg ',' cannot allocate hl ', ABS(ierr) )
829
830        CALL laxlib_dsqmred( nbase, vl, idesc_old(LAX_DESC_NRCX), idesc_old, nbase+notcnv, hl, nx, idesc )
831
832        vl = sl
833        DEALLOCATE( sl )
834        ALLOCATE( sl( nx , nx ), STAT=ierr )
835        IF( ierr /= 0 ) &
836           CALL errore( 'pregterg ',' cannot allocate sl ', ABS(ierr) )
837
838        CALL laxlib_dsqmred( nbase, vl, idesc_old(LAX_DESC_NRCX), idesc_old, nbase+notcnv, sl, nx, idesc )
839
840        DEALLOCATE( vl )
841        ALLOCATE( vl( nx , nx ), STAT=ierr )
842        IF( ierr /= 0 ) &
843           CALL errore( 'pregterg ',' cannot allocate vl ', ABS(ierr) )
844
845     END IF
846     !
847     !
848     CALL update_distmat( hl, psi, hpsi )
849     !
850     IF ( uspp ) THEN
851        !
852        CALL update_distmat( sl, psi, spsi )
853        !
854     ELSE
855        !
856        CALL update_distmat( sl, psi, psi )
857        !
858     END IF
859     !
860     CALL stop_clock( 'regterg:overlap' )
861     !
862     nbase = nbase + notcnv
863     !
864     ! ... diagonalize the reduced hamiltonian
865     !     Call block parallel algorithm
866     !
867     CALL start_clock( 'regterg:diag' )
868     IF ( do_distr_diag_inside_bgrp ) THEN ! NB on output of pdiaghg ew and vl are the same across ortho_parent_comm
869        ! only the first bgrp performs the diagonalization
870        IF( my_bgrp_id == root_bgrp_id ) CALL pdiaghg( nbase, hl, sl, nx, ew, vl, idesc )
871        IF( nbgrp > 1 ) THEN ! results must be brodcast to the other bnd groups
872           CALL mp_bcast( vl, root_bgrp_id, inter_bgrp_comm )
873           CALL mp_bcast( ew, root_bgrp_id, inter_bgrp_comm )
874        ENDIF
875     ELSE
876        CALL pdiaghg( nbase, hl, sl, nx, ew, vl, idesc )
877     END IF
878     CALL stop_clock( 'regterg:diag' )
879     !
880     ! ... test for convergence
881     !
882     WHERE( btype(1:nvec) == 1 )
883        !
884        conv(1:nvec) = ( ( ABS( ew(1:nvec) - e(1:nvec) ) < ethr ) )
885        !
886     ELSEWHERE
887        !
888        conv(1:nvec) = ( ( ABS( ew(1:nvec) - e(1:nvec) ) < empty_ethr ) )
889        !
890     END WHERE
891     ! ... next line useful for band parallelization of exact exchange
892     IF ( nbgrp > 1 ) CALL mp_bcast(conv,root_bgrp_id,inter_bgrp_comm)
893     !
894     notcnv = COUNT( .NOT. conv(:) )
895     !
896     e(1:nvec) = ew(1:nvec)
897     !
898     ! ... if overall convergence has been achieved, or the dimension of
899     ! ... the reduced basis set is becoming too large, or in any case if
900     ! ... we are at the last iteration refresh the basis set. i.e. replace
901     ! ... the first nvec elements with the current estimate of the
902     ! ... eigenvectors;  set the basis dimension to nvec.
903     !
904     IF ( notcnv == 0 .OR. nbase+notcnv > nvecx .OR. dav_iter == maxter ) THEN
905        !
906        CALL start_clock( 'regterg:last' )
907        !
908        CALL refresh_evc()
909        !
910        IF ( notcnv == 0 ) THEN
911           !
912           ! ... all roots converged: return
913           !
914           CALL stop_clock( 'regterg:last' )
915           !
916           EXIT iterate
917           !
918        ELSE IF ( dav_iter == maxter ) THEN
919           !
920           ! ... last iteration, some roots not converged: return
921           !
922           !!!WRITE( stdout, '(5X,"WARNING: ",I5, &
923           !!!     &   " eigenvalues not converged in regterg")' ) notcnv
924           !
925           CALL stop_clock( 'regterg:last' )
926           !
927           EXIT iterate
928           !
929        END IF
930        !
931        ! ... refresh psi, H*psi and S*psi
932        !
933        psi(:,1:nvec) = evc(:,1:nvec)
934        !
935        IF ( uspp ) THEN
936           !
937           CALL refresh_spsi()
938           !
939        END IF
940        !
941        CALL refresh_hpsi()
942        !
943        ! ... refresh the reduced hamiltonian
944        !
945        nbase = nvec
946        !
947        CALL desc_init( nvec, nx, la_proc, idesc, rank_ip, irc_ip, nrc_ip  )
948        !
949        IF( la_proc ) THEN
950           !
951           ! note that nx has been changed by desc_init
952           ! we need to re-alloc with the new size.
953           !
954           DEALLOCATE( vl, hl, sl )
955           ALLOCATE( vl( nx, nx ), STAT=ierr )
956           IF( ierr /= 0 ) &
957              CALL errore( 'pregterg ',' cannot allocate vl ', ABS(ierr) )
958           ALLOCATE( hl( nx, nx ), STAT=ierr )
959           IF( ierr /= 0 ) &
960              CALL errore( 'pregterg ',' cannot allocate hl ', ABS(ierr) )
961           ALLOCATE( sl( nx, nx ), STAT=ierr )
962           IF( ierr /= 0 ) &
963              CALL errore( 'pregterg ',' cannot allocate sl ', ABS(ierr) )
964           !
965        END IF
966        !
967        CALL set_h_from_e( )
968        !
969        CALL set_to_identity( vl, idesc )
970        CALL set_to_identity( sl, idesc )
971        !
972        CALL stop_clock( 'regterg:last' )
973        !
974     END IF
975     !
976  END DO iterate
977  !
978  DEALLOCATE( vl, hl, sl )
979  !
980  DEALLOCATE( rank_ip )
981  DEALLOCATE( irc_ip )
982  DEALLOCATE( nrc_ip )
983  DEALLOCATE( ic_notcnv )
984  DEALLOCATE( notcnv_ip )
985  DEALLOCATE( conv )
986  DEALLOCATE( ew )
987  !
988  IF ( uspp ) DEALLOCATE( spsi )
989  !
990  DEALLOCATE( hpsi )
991  DEALLOCATE( psi )
992  !
993  CALL stop_clock( 'regterg' )
994  !call print_clock( 'regterg' )
995  !call print_clock( 'regterg:init' )
996  !call print_clock( 'regterg:diag' )
997  !call print_clock( 'regterg:update' )
998  !call print_clock( 'regterg:overlap' )
999  !call print_clock( 'regterg:last' )
1000
1001  !
1002  RETURN
1003  !
1004  !
1005CONTAINS
1006  !
1007  !
1008  SUBROUTINE set_to_identity( distmat, idesc )
1009     INTEGER, INTENT(IN)  :: idesc(LAX_DESC_SIZE)
1010     REAL(DP), INTENT(OUT) :: distmat(:,:)
1011     INTEGER :: i
1012     distmat = 0_DP
1013     IF( idesc(LAX_DESC_MYC) == idesc(LAX_DESC_MYR) .AND. idesc(LAX_DESC_ACTIVE_NODE) > 0 ) THEN
1014        DO i = 1, idesc(LAX_DESC_NC)
1015           distmat( i, i ) = 1_DP
1016        END DO
1017     END IF
1018     RETURN
1019  END SUBROUTINE set_to_identity
1020  !
1021  !
1022  SUBROUTINE reorder_v()
1023     !
1024     INTEGER :: ipc, ipr
1025     INTEGER :: nc, ic
1026     INTEGER :: nl, npl
1027     !
1028     np = 0
1029     !
1030     notcnv_ip = 0
1031     !
1032     n = 0
1033     !
1034     DO ipc = 1, idesc(LAX_DESC_NPC)
1035        !
1036        nc = nrc_ip( ipc )
1037        ic = irc_ip( ipc )
1038        !
1039        npl = 0
1040        !
1041        IF( ic <= nvec ) THEN
1042           !
1043           DO nl = 1, min( nvec - ic + 1, nc )
1044              !
1045              n  = n  + 1
1046              !
1047              IF ( .NOT. conv(n) ) THEN
1048                 !
1049                 ! ... this root not yet converged ...
1050                 !
1051                 np  = np  + 1
1052                 npl = npl + 1
1053                 IF( npl == 1 ) ic_notcnv( ipc ) = np
1054                 !
1055                 ! ... reorder eigenvectors so that coefficients for unconverged
1056                 ! ... roots come first. This allows to use quick matrix-matrix
1057                 ! ... multiplications to set a new basis vector (see below)
1058                 !
1059                 notcnv_ip( ipc ) = notcnv_ip( ipc ) + 1
1060                 !
1061                 IF ( npl /= nl ) THEN
1062                    IF( la_proc .AND. idesc(LAX_DESC_MYC) == ipc-1 ) THEN
1063                       vl( :, npl) = vl( :, nl )
1064                    END IF
1065                 END IF
1066                 !
1067                 ! ... for use in g_psi
1068                 !
1069                 ew(nbase+np) = e(n)
1070                 !
1071              END IF
1072              !
1073           END DO
1074           !
1075        END IF
1076        !
1077     END DO
1078     !
1079  END SUBROUTINE reorder_v
1080  !
1081  !
1082  SUBROUTINE hpsi_dot_v()
1083     !
1084     INTEGER :: ipc, ipr
1085     INTEGER :: nr, nc, ir, ic, notcl, root, np
1086     REAL(DP), ALLOCATABLE :: vtmp( :, : )
1087     COMPLEX(DP), ALLOCATABLE :: ptmp( :, : )
1088     REAL(DP) :: beta
1089
1090     ALLOCATE( vtmp( nx, nx ) )
1091     ALLOCATE( ptmp( npwx, nx ) )
1092
1093     DO ipc = 1, idesc(LAX_DESC_NPC)
1094        !
1095        IF( notcnv_ip( ipc ) > 0 ) THEN
1096
1097           notcl = notcnv_ip( ipc )
1098           ic    = ic_notcnv( ipc )
1099
1100           beta = 0.0d0
1101
1102           DO ipr = 1, idesc(LAX_DESC_NPR)
1103              !
1104              nr = nrc_ip( ipr )
1105              ir = irc_ip( ipr )
1106              !
1107              root = rank_ip( ipr, ipc )
1108
1109              IF( ipr-1 == idesc(LAX_DESC_MYR) .AND. ipc-1 == idesc(LAX_DESC_MYC) .AND. la_proc ) THEN
1110                 vtmp(:,1:notcl) = vl(:,1:notcl)
1111              END IF
1112
1113              CALL mp_bcast( vtmp(:,1:notcl), root, ortho_parent_comm )
1114              !
1115              IF ( uspp ) THEN
1116                 !
1117                 CALL DGEMM( 'N', 'N', npw2, notcl, nr, 1.D0, &
1118                    spsi( 1, ir ), npwx2, vtmp, nx, beta, psi(1,nb1+ic-1), npwx2 )
1119                 !
1120              ELSE
1121                 !
1122                 CALL DGEMM( 'N', 'N', npw2, notcl, nr, 1.D0, &
1123                    psi( 1, ir ), npwx2, vtmp, nx, beta, psi(1,nb1+ic-1), npwx2 )
1124                 !
1125              END IF
1126              !
1127              CALL DGEMM( 'N', 'N', npw2, notcl, nr, 1.D0, &
1128                      hpsi( 1, ir ), npwx2, vtmp, nx, beta, ptmp, npwx2 )
1129
1130              beta = 1.0d0
1131
1132           END DO
1133
1134           DO np = 1, notcl
1135              !
1136              psi(1:npw,nbase+np+ic-1) = ptmp(1:npw,np) - ew(nbase+np+ic-1) * psi(1:npw,nbase+np+ic-1)
1137              !
1138           END DO
1139           !
1140        END IF
1141        !
1142     END DO
1143
1144
1145     DEALLOCATE( vtmp )
1146     DEALLOCATE( ptmp )
1147
1148     RETURN
1149  END SUBROUTINE hpsi_dot_v
1150  !
1151  !
1152  SUBROUTINE refresh_evc( )
1153     !
1154     INTEGER :: ipc, ipr
1155     INTEGER :: nr, nc, ir, ic, root
1156     REAL(DP), ALLOCATABLE :: vtmp( :, : )
1157     REAL(DP) :: beta
1158
1159     ALLOCATE( vtmp( nx, nx ) )
1160     !
1161     DO ipc = 1, idesc(LAX_DESC_NPC)
1162        !
1163        nc = nrc_ip( ipc )
1164        ic = irc_ip( ipc )
1165        !
1166        IF( ic <= nvec ) THEN
1167           !
1168           nc = min( nc, nvec - ic + 1 )
1169           !
1170           beta = 0.0d0
1171
1172           DO ipr = 1, idesc(LAX_DESC_NPR)
1173              !
1174              nr = nrc_ip( ipr )
1175              ir = irc_ip( ipr )
1176              !
1177              root = rank_ip( ipr, ipc )
1178
1179              IF( ipr-1 == idesc(LAX_DESC_MYR) .AND. ipc-1 == idesc(LAX_DESC_MYC) .AND. la_proc ) THEN
1180                 !
1181                 !  this proc sends his block
1182                 !
1183                 CALL mp_bcast( vl(:,1:nc), root, ortho_parent_comm )
1184                 CALL DGEMM( 'N', 'N', npw2, nc, nr, 1.D0, &
1185                          psi(1,ir), npwx2, vl, nx, beta, evc(1,ic), npwx2 )
1186              ELSE
1187                 !
1188                 !  all other procs receive
1189                 !
1190                 CALL mp_bcast( vtmp(:,1:nc), root, ortho_parent_comm )
1191                 CALL DGEMM( 'N', 'N', npw2, nc, nr, 1.D0, &
1192                          psi(1,ir), npwx2, vtmp, nx, beta, evc(1,ic), npwx2 )
1193              END IF
1194              !
1195
1196              beta = 1.0d0
1197
1198           END DO
1199           !
1200        END IF
1201        !
1202     END DO
1203     !
1204     DEALLOCATE( vtmp )
1205
1206     RETURN
1207  END SUBROUTINE refresh_evc
1208  !
1209  !
1210  SUBROUTINE refresh_spsi( )
1211     !
1212     INTEGER :: ipc, ipr
1213     INTEGER :: nr, nc, ir, ic, root
1214     REAL(DP), ALLOCATABLE :: vtmp( :, : )
1215     REAL(DP) :: beta
1216
1217     ALLOCATE( vtmp( nx, nx ) )
1218     !
1219     DO ipc = 1, idesc(LAX_DESC_NPC)
1220        !
1221        nc = nrc_ip( ipc )
1222        ic = irc_ip( ipc )
1223        !
1224        IF( ic <= nvec ) THEN
1225           !
1226           nc = min( nc, nvec - ic + 1 )
1227           !
1228           beta = 0_DP
1229           !
1230           DO ipr = 1, idesc(LAX_DESC_NPR)
1231              !
1232              nr = nrc_ip( ipr )
1233              ir = irc_ip( ipr )
1234              !
1235              root = rank_ip( ipr, ipc )
1236
1237              IF( ipr-1 == idesc(LAX_DESC_MYR) .AND. ipc-1 == idesc(LAX_DESC_MYC) .AND. la_proc ) THEN
1238                 !
1239                 !  this proc sends his block
1240                 !
1241                 CALL mp_bcast( vl(:,1:nc), root, ortho_parent_comm )
1242                 CALL DGEMM( 'N', 'N', npw2, nc, nr, 1.D0, &
1243                          spsi(1,ir), npwx2, vl, nx, beta, psi(1,nvec+ic), npwx2 )
1244              ELSE
1245                 !
1246                 !  all other procs receive
1247                 !
1248                 CALL mp_bcast( vtmp(:,1:nc), root, ortho_parent_comm )
1249                 CALL DGEMM( 'N', 'N', npw2, nc, nr, 1.D0, &
1250                          spsi(1,ir), npwx2, vtmp, nx, beta, psi(1,nvec+ic), npwx2 )
1251              END IF
1252              !
1253              beta = 1_DP
1254
1255           END DO
1256           !
1257        END IF
1258        !
1259     END DO
1260     !
1261     spsi(:,1:nvec) = psi(:,nvec+1:nvec+nvec)
1262     !
1263     DEALLOCATE( vtmp )
1264
1265     RETURN
1266  END SUBROUTINE refresh_spsi
1267  !
1268  !
1269  !
1270  SUBROUTINE refresh_hpsi( )
1271     !
1272     INTEGER :: ipc, ipr
1273     INTEGER :: nr, nc, ir, ic, root
1274     REAL(DP), ALLOCATABLE :: vtmp( :, : )
1275     REAL(DP) :: beta
1276
1277     ALLOCATE( vtmp( nx, nx ) )
1278     !
1279     DO ipc = 1, idesc(LAX_DESC_NPC)
1280        !
1281        nc = nrc_ip( ipc )
1282        ic = irc_ip( ipc )
1283        !
1284        IF( ic <= nvec ) THEN
1285           !
1286           nc = min( nc, nvec - ic + 1 )
1287           !
1288           beta = 0.0d0
1289           !
1290           DO ipr = 1, idesc(LAX_DESC_NPR)
1291              !
1292              nr = nrc_ip( ipr )
1293              ir = irc_ip( ipr )
1294              !
1295              root = rank_ip( ipr, ipc )
1296
1297              IF( ipr-1 == idesc(LAX_DESC_MYR) .AND. ipc-1 == idesc(LAX_DESC_MYC) .AND. la_proc ) THEN
1298                 !
1299                 !  this proc sends his block
1300                 !
1301                 CALL mp_bcast( vl(:,1:nc), root, ortho_parent_comm )
1302                 CALL DGEMM( 'N', 'N', npw2, nc, nr, 1.D0, &
1303                          hpsi(1,ir), npwx2, vl, nx, beta, psi(1,nvec+ic), npwx2 )
1304              ELSE
1305                 !
1306                 !  all other procs receive
1307                 !
1308                 CALL mp_bcast( vtmp(:,1:nc), root, ortho_parent_comm )
1309                 CALL DGEMM( 'N', 'N', npw2, nc, nr, 1.D0, &
1310                          hpsi(1,ir), npwx2, vtmp, nx, beta, psi(1,nvec+ic), npwx2 )
1311              END IF
1312              !
1313              beta = 1.0d0
1314
1315           END DO
1316           !
1317        END IF
1318        !
1319     END DO
1320     !
1321     DEALLOCATE( vtmp )
1322
1323     hpsi(:,1:nvec) = psi(:,nvec+1:nvec+nvec)
1324
1325     RETURN
1326  END SUBROUTINE refresh_hpsi
1327  !
1328  !
1329
1330  SUBROUTINE compute_distmat( dm, v, w )
1331     !
1332     !  This subroutine compute <vi|wj> and store the
1333     !  result in distributed matrix dm
1334     !
1335     INTEGER :: ipc, ipr
1336     INTEGER :: nr, nc, ir, ic, root
1337     REAL(DP), INTENT(OUT) :: dm( :, : )
1338     COMPLEX(DP) :: v(:,:), w(:,:)
1339     REAL(DP), ALLOCATABLE :: work( :, : )
1340     !
1341     ALLOCATE( work( nx, nx ) )
1342     !
1343     work = 0.0d0
1344     !
1345     DO ipc = 1, idesc(LAX_DESC_NPC) !  loop on column procs
1346        !
1347        nc = nrc_ip( ipc )
1348        ic = irc_ip( ipc )
1349        !
1350        DO ipr = 1, ipc ! use symmetry for the loop on row procs
1351           !
1352           nr = nrc_ip( ipr )
1353           ir = irc_ip( ipr )
1354           !
1355           !  rank of the processor for which this block (ipr,ipc) is destinated
1356           !
1357           root = rank_ip( ipr, ipc )
1358
1359           ! use blas subs. on the matrix block
1360
1361           CALL DGEMM( 'T', 'N', nr, nc, npw2, 2.D0 , &
1362                       v(1,ir), npwx2, w(1,ic), npwx2, 0.D0, work, nx )
1363
1364           IF ( gstart == 2 ) &
1365              CALL DGER( nr, nc, -1.D0, v(1,ir), npwx2, w(1,ic), npwx2, work, nx )
1366
1367           ! accumulate result on dm of root proc.
1368
1369           CALL mp_root_sum( work, dm, root, ortho_parent_comm )
1370
1371        END DO
1372        !
1373     END DO
1374     IF (ortho_parent_comm.ne.intra_bgrp_comm .and. nbgrp > 1) dm = dm/nbgrp
1375     !
1376     CALL laxlib_dsqmsym( nbase, dm, nx, idesc )
1377     !
1378     DEALLOCATE( work )
1379     !
1380     RETURN
1381  END SUBROUTINE compute_distmat
1382  !
1383  !
1384  SUBROUTINE update_distmat( dm, v, w )
1385     !
1386     INTEGER :: ipc, ipr
1387     INTEGER :: nr, nc, ir, ic, root, icc, ii
1388     REAL(DP)    :: dm( :, : )
1389     COMPLEX(DP) :: v(:,:), w(:,:)
1390     REAL(DP), ALLOCATABLE :: vtmp( :, : )
1391
1392     ALLOCATE( vtmp( nx, nx ) )
1393     !
1394     vtmp = 0.0d0
1395     !
1396     DO ipc = 1, idesc(LAX_DESC_NPC)
1397        !
1398        nc = nrc_ip( ipc )
1399        ic = irc_ip( ipc )
1400        !
1401        IF( ic+nc-1 >= nb1 ) THEN
1402
1403           nc = MIN( nc, ic+nc-1 - nb1 + 1 )
1404           IF( ic >= nb1 ) THEN
1405              ii = ic
1406              icc = 1
1407           ELSE
1408              ii = nb1
1409              icc = nb1-ic+1
1410           END IF
1411
1412           DO ipr = 1, ipc ! desc%npr use symmetry
1413              !
1414              nr = nrc_ip( ipr )
1415              ir = irc_ip( ipr )
1416              !
1417              root = rank_ip( ipr, ipc )
1418
1419              CALL DGEMM( 'T', 'N', nr, nc, npw2, 2.D0, v( 1, ir ), &
1420                          npwx2, w(1,ii), npwx2, 0.D0, vtmp, nx )
1421              !
1422              IF ( gstart == 2 ) &
1423                 CALL DGER( nr, nc, -1.D0, v( 1, ir ), npwx2, w(1,ii), npwx2, vtmp, nx )
1424              IF (ortho_parent_comm.ne.intra_bgrp_comm .and. nbgrp > 1) vtmp = vtmp/nbgrp
1425
1426              IF(  (idesc(LAX_DESC_ACTIVE_NODE) > 0) .AND. &
1427                   (ipr-1 == idesc(LAX_DESC_MYR)) .AND. (ipc-1 == idesc(LAX_DESC_MYC)) ) THEN
1428                 CALL mp_root_sum( vtmp(:,1:nc), dm(:,icc:icc+nc-1), root, ortho_parent_comm )
1429              ELSE
1430                 CALL mp_root_sum( vtmp(:,1:nc), dm, root, ortho_parent_comm )
1431              END IF
1432
1433
1434           END DO
1435           !
1436        END IF
1437        !
1438     END DO
1439     !
1440     CALL laxlib_dsqmsym( nbase+notcnv, dm, nx, idesc )
1441     !
1442     DEALLOCATE( vtmp )
1443     RETURN
1444  END SUBROUTINE update_distmat
1445  !
1446  !
1447  !
1448  SUBROUTINE set_e_from_h()
1449     INTEGER :: nc, ic, i
1450     e(1:nbase) = 0.0d0
1451     IF( idesc(LAX_DESC_MYC) == idesc(LAX_DESC_MYR) .AND. la_proc ) THEN
1452        nc = idesc(LAX_DESC_NC)
1453        ic = idesc(LAX_DESC_IC)
1454        DO i = 1, nc
1455           e( i + ic - 1 ) = hl( i, i )
1456        END DO
1457     END IF
1458     CALL mp_sum( e(1:nbase), ortho_parent_comm )
1459     RETURN
1460  END SUBROUTINE set_e_from_h
1461  !
1462  SUBROUTINE set_h_from_e()
1463     INTEGER :: nc, ic, i
1464     IF( la_proc ) THEN
1465        hl = 0.0d0
1466        IF( idesc(LAX_DESC_MYC) == idesc(LAX_DESC_MYR) ) THEN
1467           nc = idesc(LAX_DESC_NC)
1468           ic = idesc(LAX_DESC_IC)
1469           DO i = 1, nc
1470              hl(i,i) = e( i + ic - 1 )
1471           END DO
1472        END IF
1473     END IF
1474     RETURN
1475  END SUBROUTINE set_h_from_e
1476  !
1477END SUBROUTINE pregterg
1478