1!
2! Copyright (C) 2002-2006 Quantum ESPRESSO group
3! This file is distributed under the terms of the
4! GNU General Public License. See the file `License'
5! in the root directory of the present distribution,
6! or http://www.gnu.org/copyleft/gpl.txt .
7!
8! define __VERBOSE to print a message after each eigenvalue is computed
9!----------------------------------------------------------------------------
10SUBROUTINE rcgdiagg( hs_1psi, s_1psi, precondition, &
11                     npwx, npw, nbnd, psi, e, btype, &
12                     ethr, maxter, reorder, notconv, avg_iter )
13  !----------------------------------------------------------------------------
14  !
15  ! ... "poor man" iterative diagonalization of a complex hermitian matrix
16  ! ... through preconditioned conjugate gradient algorithm
17  ! ... Band-by-band algorithm with minimal use of memory
18  ! ... Calls hs_1psi and s_1psi to calculate H|psi> + S|psi> and S|psi>
19  ! ... Works for generalized eigenvalue problem (US pseudopotentials) as well
20  !
21  USE util_param,     ONLY : DP
22  USE mp_bands_util,  ONLY : intra_bgrp_comm, inter_bgrp_comm, gstart
23  USE mp,             ONLY : mp_sum
24#if defined(__VERBOSE)
25  USE util_param,     ONLY : stdout
26#endif
27  !
28  IMPLICIT NONE
29  !
30  ! ... Mathematical constants
31  !
32  REAL(DP), PARAMETER :: pi     = 3.14159265358979323846_DP
33  !
34  ! ... I/O variables
35  !
36  INTEGER,     INTENT(IN)    :: npwx, npw, nbnd, maxter
37  INTEGER,     INTENT(IN)    :: btype(nbnd)
38  REAL(DP),    INTENT(IN)    :: precondition(npw), ethr
39  COMPLEX(DP), INTENT(INOUT) :: psi(npwx,nbnd)
40  REAL(DP),    INTENT(INOUT) :: e(nbnd)
41  INTEGER,     INTENT(OUT)   :: notconv
42  REAL(DP),    INTENT(OUT)   :: avg_iter
43  !
44  ! ... local variables
45  !
46  INTEGER                  :: i, j, m, m_start, m_end, iter, moved
47  REAL(DP),    ALLOCATABLE :: lagrange(:)
48  COMPLEX(DP), ALLOCATABLE :: hpsi(:), spsi(:), g(:), cg(:), &
49                              scg(:), ppsi(:), g0(:)
50  REAL(DP)                 :: psi_norm, a0, b0, gg0, gamma, gg, gg1, &
51                              cg0, e0, es(2)
52  REAL(DP)                 :: theta, cost, sint, cos2t, sin2t
53  LOGICAL                  :: reorder
54  INTEGER                  :: npw2, npwx2
55  REAL(DP)                 :: empty_ethr, ethr_m
56  !
57  ! ... external functions
58  !
59  REAL (DP), EXTERNAL :: ddot
60  EXTERNAL  hs_1psi,    s_1psi
61  ! hs_1psi( npwx, npw, psi, hpsi, spsi )
62  ! s_1psi( npwx, npw, psi, spsi )
63  !
64  !
65  CALL start_clock( 'rcgdiagg' )
66  !
67  IF ( gstart == -1 ) CALL errore( 'regter', 'gstart variable not initialized', 1 )
68  !
69  empty_ethr = MAX( ( ethr * 5.D0 ), 1.D-5 )
70  !
71  npw2 = 2 * npw
72  npwx2 = 2 * npwx
73  !
74  ALLOCATE( spsi( npwx ) )
75  ALLOCATE( scg(  npwx ) )
76  ALLOCATE( hpsi( npwx ) )
77  ALLOCATE( g(    npwx ) )
78  ALLOCATE( cg(   npwx ) )
79  ALLOCATE( g0(   npwx ) )
80  ALLOCATE( ppsi( npwx ) )
81  !
82  ALLOCATE( lagrange( nbnd ) )
83  !
84  avg_iter = 0.D0
85  notconv  = 0
86  moved    = 0
87  !
88  ! ... every eigenfunction is calculated separately
89  !
90  DO m = 1, nbnd
91
92     IF ( btype(m) == 1 ) THEN
93        !
94        ethr_m = ethr
95        !
96     ELSE
97        !
98        ethr_m = empty_ethr
99        !
100     END IF
101     !
102     ! ... calculate S|psi>
103     !
104     CALL s_1psi( npwx, npw, psi(1,m), spsi )
105     !
106     ! ... orthogonalize starting eigenfunction to those already calculated
107     !
108     CALL start_clock( 'cg:ortho' )
109     call divide(inter_bgrp_comm,m,m_start,m_end); !write(*,*) m,m_start,m_end
110     lagrange = 0.d0
111     if(m_start.le.m_end) &
112     CALL DGEMV( 'T', npw2, m_end-m_start+1, 2.D0, psi(1,m_start), npwx2, spsi, 1, 0.D0, lagrange(m_start), 1 )
113     CALL mp_sum( lagrange( 1:m ), inter_bgrp_comm )
114     IF ( gstart == 2 ) lagrange(1:m) = lagrange(1:m) - psi(1,1:m) * spsi(1)
115     !
116     CALL mp_sum( lagrange( 1:m ), intra_bgrp_comm )
117     !
118     psi_norm = lagrange(m)
119     !
120     DO j = 1, m - 1
121        !
122        psi(:,m)  = psi(:,m) - lagrange(j) * psi(:,j)
123        !
124        psi_norm = psi_norm - lagrange(j)**2
125        !
126     END DO
127     !
128     psi_norm = SQRT( psi_norm )
129     !
130     psi(:,m) = psi(:,m) / psi_norm
131     ! ... set Im[ psi(G=0) ] -  needed for numerical stability
132     IF ( gstart == 2 ) psi(1,m) = CMPLX( DBLE(psi(1,m)), 0.D0 ,kind=DP)
133     CALL stop_clock( 'cg:ortho' )
134     !
135     ! ... calculate starting gradient (|hpsi> = H|psi>) ...
136     !
137     CALL hs_1psi( npwx, npw, psi(1,m), hpsi, spsi )
138     !
139     ! ... and starting eigenvalue (e = <y|PHP|y> = <psi|H|psi>)
140     !
141     ! ... NB:  ddot(2*npw,a,1,b,1) = DBLE( zdotc(npw,a,1,b,1) )
142     !
143     e(m) = 2.D0 * ddot( npw2, psi(1,m), 1, hpsi, 1 )
144     IF ( gstart == 2 ) e(m) = e(m) - psi(1,m) * hpsi(1)
145     !
146     CALL mp_sum( e(m), intra_bgrp_comm )
147     !
148     ! ... start iteration for this band
149     !
150     iterate: DO iter = 1, maxter
151        !
152        ! ... calculate  P (PHP)|y>
153        ! ... ( P = preconditioning matrix, assumed diagonal )
154        !
155        g(1:npw)    = hpsi(1:npw) / precondition(:)
156        ppsi(1:npw) = spsi(1:npw) / precondition(:)
157        !
158        ! ... ppsi is now S P(P^2)|y> = S P^2|psi>)
159        !
160        es(1) = 2.D0 * ddot( npw2, spsi(1), 1, g(1), 1 )
161        es(2) = 2.D0 * ddot( npw2, spsi(1), 1, ppsi(1), 1 )
162        !
163        IF ( gstart == 2 ) THEN
164           !
165           es(1) = es(1) - spsi(1) * g(1)
166           es(2) = es(2) - spsi(1) * ppsi(1)
167           !
168        END IF
169        !
170        CALL mp_sum( es , intra_bgrp_comm )
171        !
172        es(1) = es(1) / es(2)
173        !
174        g(:) = g(:) - es(1) * ppsi(:)
175        !
176        ! ... e1 = <y| S P^2 PHP|y> / <y| S S P^2|y>  ensures that
177        ! ... <g| S P^2|y> = 0
178        !
179        ! ... orthogonalize to lowest eigenfunctions (already calculated)
180        !
181        ! ... scg is used as workspace
182        !
183        CALL s_1psi( npwx, npw, g(1), scg(1) )
184        !
185        CALL start_clock( 'cg:ortho' )
186        lagrange(1:m-1) = 0.d0
187        call divide(inter_bgrp_comm,m-1,m_start,m_end); !write(*,*) m-1,m_start,m_end
188        if(m_start.le.m_end) &
189        CALL DGEMV( 'T', npw2, m_end-m_start+1, 2.D0, psi(1,m_start), npw2, scg, 1, 0.D0, lagrange(m_start), 1 )
190        CALL mp_sum( lagrange( 1:m-1 ), inter_bgrp_comm )
191        IF ( gstart == 2 ) lagrange(1:m-1) = lagrange(1:m-1) - psi(1,1:m-1) * scg(1)
192        !
193        CALL mp_sum( lagrange( 1:m-1 ), intra_bgrp_comm )
194        !
195        DO j = 1, ( m - 1 )
196           !
197           g(:)   = g(:)   - lagrange(j) * psi(:,j)
198           scg(:) = scg(:) - lagrange(j) * psi(:,j)
199           !
200        END DO
201        CALL stop_clock( 'cg:ortho' )
202        !
203        IF ( iter /= 1 ) THEN
204           !
205           ! ... gg1 is <g(n+1)|S|g(n)> (used in Polak-Ribiere formula)
206           !
207           gg1 = 2.D0 * ddot( npw2, g(1), 1, g0(1), 1 )
208           IF ( gstart == 2 ) gg1 = gg1 - g(1) * g0(1)
209           !
210           CALL mp_sum(  gg1 , intra_bgrp_comm )
211           !
212        END IF
213        !
214        ! ... gg is <g(n+1)|S|g(n+1)>
215        !
216        g0(:) = scg(:)
217        !
218        g0(1:npw) = g0(1:npw) * precondition(:)
219        !
220        gg = 2.D0 * ddot( npw2, g(1), 1, g0(1), 1 )
221        IF ( gstart == 2 ) gg = gg - g(1) * g0(1)
222        !
223        CALL mp_sum(  gg , intra_bgrp_comm )
224        !
225        IF ( iter == 1 ) THEN
226           !
227           ! ... starting iteration, the conjugate gradient |cg> = |g>
228           !
229           gg0 = gg
230           !
231           cg(:) = g(:)
232           !
233        ELSE
234           !
235           ! ... |cg(n+1)> = |g(n+1)> + gamma(n) * |cg(n)>
236           !
237           ! ... Polak-Ribiere formula :
238           !
239           gamma = ( gg - gg1 ) / gg0
240           gg0   = gg
241           !
242           cg(:) = cg(:) * gamma
243           cg(:) = g + cg(:)
244           !
245           ! ... The following is needed because <y(n+1)| S P^2 |cg(n+1)>
246           ! ... is not 0. In fact :
247           ! ... <y(n+1)| S P^2 |cg(n)> = sin(theta)*<cg(n)|S|cg(n)>
248           !
249           psi_norm = gamma * cg0 * sint
250           !
251           cg(:) = cg(:) - psi_norm * psi(:,m)
252           !
253        END IF
254        !
255        ! ... |cg> contains now the conjugate gradient
256        ! ... set Im[ cg(G=0) ] -  needed for numerical stability
257        IF ( gstart == 2 ) cg(1) = CMPLX( DBLE(cg(1)), 0.D0 ,kind=DP)
258        !
259        ! ... |scg> is S|cg>
260        !
261        CALL hs_1psi( npwx, npw, cg(1), ppsi(1), scg(1) )
262        !
263        cg0 = 2.D0 * ddot( npw2, cg(1), 1, scg(1), 1 )
264        IF ( gstart == 2 ) cg0 = cg0 - cg(1) * scg(1)
265        !
266        CALL mp_sum(  cg0 , intra_bgrp_comm )
267        !
268        cg0 = SQRT( cg0 )
269        !
270        ! ... |ppsi> contains now HP|cg>
271        ! ... minimize <y(t)|PHP|y(t)> , where :
272        ! ...                         |y(t)> = cos(t)|y> + sin(t)/cg0 |cg>
273        ! ... Note that  <y|P^2S|y> = 1, <y|P^2S|cg> = 0 ,
274        ! ...           <cg|P^2S|cg> = cg0^2
275        ! ... so that the result is correctly normalized :
276        ! ...                           <y(t)|P^2S|y(t)> = 1
277        !
278        a0 = 4.D0 * ddot( npw2, psi(1,m), 1, ppsi(1), 1 )
279        IF ( gstart == 2 ) a0 = a0 - 2.D0 * psi(1,m) * ppsi(1)
280        !
281        a0 = a0 / cg0
282        !
283        CALL mp_sum(  a0 , intra_bgrp_comm )
284        !
285        b0 = 2.D0 * ddot( npw2, cg(1), 1, ppsi(1), 1 )
286        IF ( gstart == 2 ) b0 = b0 - cg(1) * ppsi(1)
287        !
288        b0 = b0 / cg0**2
289        !
290        CALL mp_sum(  b0 , intra_bgrp_comm )
291        !
292        e0 = e(m)
293        !
294        theta = 0.5D0 * ATAN( a0 / ( e0 - b0 ) )
295        !
296        cost = COS( theta )
297        sint = SIN( theta )
298        !
299        cos2t = cost*cost - sint*sint
300        sin2t = 2.D0*cost*sint
301        !
302        es(1) = 0.5D0 * (   ( e0 - b0 ) * cos2t + a0 * sin2t + e0 + b0 )
303        es(2) = 0.5D0 * ( - ( e0 - b0 ) * cos2t - a0 * sin2t + e0 + b0 )
304        !
305        ! ... there are two possible solutions, choose the minimum
306        !
307        IF ( es(2) < es(1) ) THEN
308           !
309           theta = theta + 0.5D0 * pi
310           !
311           cost = COS( theta )
312           sint = SIN( theta )
313           !
314        END IF
315        !
316        ! ... new estimate of the eigenvalue
317        !
318        e(m) = MIN( es(1), es(2) )
319        !
320        ! ... upgrade |psi>
321        !
322        psi(:,m) = cost * psi(:,m) + sint / cg0 * cg(:)
323        !
324        ! ... here one could test convergence on the energy
325        !
326        IF ( ABS( e(m) - e0 ) < ethr_m ) EXIT iterate
327        !
328        ! ... upgrade H|psi> and S|psi>
329        !
330        spsi(:) = cost * spsi(:) + sint / cg0 * scg(:)
331        !
332        hpsi(:) = cost * hpsi(:) + sint / cg0 * ppsi(:)
333        !
334     END DO iterate
335     !
336#if defined(__VERBOSE)
337     IF ( iter >= maxter ) THEN
338        WRITE(stdout,'("e(",i4,") = ",f12.6," eV  (not converged after ",i3,&
339                     & " iterations)")') m, e(m)*13.6058, iter
340     ELSE
341        WRITE(stdout,'("e(",i4,") = ",f12.6," eV  (",i3," iterations)")') &
342         m, e(m)*13.6058, iter
343     END IF
344     FLUSH (stdout)
345#endif
346     IF ( iter >= maxter ) notconv = notconv + 1
347     !
348     avg_iter = avg_iter + iter + 1
349     !
350     ! ... reorder eigenvalues if they are not in the right order
351     ! ... ( this CAN and WILL happen in not-so-special cases )
352     !
353     IF ( m > 1 .AND. reorder ) THEN
354        !
355        IF ( e(m) - e(m-1) < - 2.D0 * ethr_m ) THEN
356           !
357           ! ... if the last calculated eigenvalue is not the largest...
358           !
359           DO i = m - 2, 1, - 1
360              !
361              IF ( e(m) - e(i) > 2.D0 * ethr_m ) EXIT
362              !
363           END DO
364           !
365           i = i + 1
366           !
367           moved = moved + 1
368           !
369           ! ... last calculated eigenvalue should be in the
370           ! ... i-th position: reorder
371           !
372           e0 = e(m)
373           !
374           ppsi(:) = psi(:,m)
375           !
376           DO j = m, i + 1, - 1
377              !
378              e(j) = e(j-1)
379              !
380              psi(:,j) = psi(:,j-1)
381              !
382           END DO
383           !
384           e(i) = e0
385           !
386           psi(:,i) = ppsi(:)
387           !
388           ! ... this procedure should be good if only a few inversions occur,
389           ! ... extremely inefficient if eigenvectors are often in bad order
390           ! ... ( but this should not happen )
391           !
392        END IF
393        !
394     END IF
395     !
396  END DO
397  !
398  avg_iter = avg_iter / DBLE( nbnd )
399  !
400  DEALLOCATE( lagrange )
401  DEALLOCATE( ppsi )
402  DEALLOCATE( g0 )
403  DEALLOCATE( cg )
404  DEALLOCATE( g )
405  DEALLOCATE( hpsi )
406  DEALLOCATE( scg )
407  DEALLOCATE( spsi )
408  !
409  CALL stop_clock( 'rcgdiagg' )
410  !
411  RETURN
412  !
413END SUBROUTINE rcgdiagg
414