1subroutine lanczos_nonhermitian(j, npwx_npol, nbnd_occ, nksq, qj, Aqj, Sqj, qjold, n_ipol, u, alpha, beta, gamma, zeta)
2   !
3   !! Bi-Orthogonal Lanczos algorithm
4   !!
5   !! $$ g(w) =\sum_j (u,q_j){q_j,(w-A)^(-1)q}
6   !! Algorithm 1 in "Computer Physics Communications 185 (2014) 2080-2089"
7   !!
8   !! this subroutine generates alpha, beta and gamma coefficients (the
9   !! tridiagonal matrix elements), z = (u,q_j) elements. And update
10   !! Lanczos vectors
11   !
12   USE kinds,                    ONLY : dp
13   !
14   INTEGER, INTENT(IN)               :: j
15   !! iteration index
16   INTEGER, INTENT(IN)               :: npwx_npol
17   !! firts dimension of qj, Aqj, SAqj, qjold, u in qe npwx*npol
18   INTEGER, INTENT(IN)               :: nbnd_occ
19   !! second dimension of qj, Aqj, SAqj, qjold, n_ipol, u in qe nbnd
20   INTEGER, INTENT(IN)               :: nksq
21   !! third dimension of qj, Aqj, SAqj, qjold, n_ipol, u in qe nksq
22   INTEGER, INTENT(IN)               :: n_ipol
23   !! polarization, forth dimension of u and dimension of zeta
24   COMPLEX(kind=dp), INTENT(IN)      :: Aqj(npwx_npol,nbnd_occ,nksq, 2)
25   !! operator applied to qj vector
26   COMPLEX(kind=dp), INTENT(IN)      :: u(npwx_npol,nbnd_occ,nksq,n_ipol)
27   !! second lanczos vector, in qe d0psi (q0psi2 for eels)
28   COMPLEX(kind=dp), INTENT(IN)      :: Sqj(npwx_npol,nbnd_occ,nksq)
29   !! S operator applied to qj vector only for USPP, otherwise a copy of qj
30   COMPLEX(kind=dp), INTENT(INOUT)   :: qj(npwx_npol,nbnd_occ,nksq, 2)
31   !! qj vector, become qj+1 vector
32   COMPLEX(kind=dp), INTENT(INOUT)   :: qjold(npwx_npol,nbnd_occ,nksq, 2)
33   !! qj-1 vector, become qj vector
34   REAL(kind=dp),    INTENT(OUT)     :: alpha
35   !! diagonal cofficient of the tridiagonal matrix
36   REAL(kind=dp),    INTENT(OUT)     :: beta
37   !! lower coefficient of the tridiagonal matrix
38   REAL(kind=dp),    INTENT(OUT)     :: gamma
39   !! upper coefficient of the tridiagonal matrix
40   COMPLEX(kind=dp), INTENT(OUT)     :: zeta(n_ipol)
41   !! (u,q_j) products
42   !
43   COMPLEX(kind=dp),EXTERNAL :: lr_dot
44   !
45   INTEGER                :: size_evc, ip
46   !
47   size_evc = npwx_npol*nbnd_occ*nksq
48   !
49   ! By construction <p|Lq>=0 should be 0, forcing this both conserves
50   ! resources and increases stability.
51   !
52   alpha = 0.0d0
53   !
54   ! Orthogonality requirement: <v|\bar{L}|v> = 1
55   !
56   beta = dble(lr_dot(qj(:,:,:,1), Sqj(:,:,:)))
57   !
58   IF ( beta<0.0d0 ) THEN
59      !
60      beta = sqrt(-beta)
61      gamma = -beta
62      !
63   ELSEIF ( beta>0.0d0 ) THEN
64      !
65      ! X. Ge: Actually, this is the only case in the pseudo-Hermitian
66      ! algorithm.
67      !
68      beta = sqrt(beta)
69      gamma = beta
70      !
71   ENDIF
72   !
73   ! Renormalize q(i) and Lq(i), also p(i) and Lp(i) in the non-Hermitian case
74   !
75   CALL zscal(size_evc,cmplx(1.0d0/beta,0.0d0,kind=dp),qj(1,1,1,1),1)
76   CALL zscal(size_evc,cmplx(1.0d0/beta,0.0d0,kind=dp),Aqj(1,1,1,1),1)
77   !
78   CALL zscal(size_evc,cmplx(1.0d0/gamma,0.0d0,kind=dp),qj(1,1,1,2),1)
79   CALL zscal(size_evc,cmplx(1.0d0/gamma,0.0d0,kind=dp),Aqj(1,1,1,2),1)
80   !
81   ! Calculation of zeta coefficients.
82   ! See Eq.(35) in Malcioglu et al., Comput. Phys. Commun. 182, 1744 (2011).
83   !
84   IF (mod(j,2)==0) THEN
85      !
86      DO ip = 1, n_ipol
87         !
88         ! Optics: In the ultrasoft case, the S operator was already
89         ! applied to d0psi, so we have <S*d0psi|evc1>.
90         !
91         zeta(ip) = lr_dot(u(:,:,:,ip),qj(:,:,:,1))
92         !
93      ENDDO
94      !
95   ELSE
96      !
97      DO ip = 1, n_ipol
98         !
99         zeta(ip) = (0.0d0,0.0d0)
100         !
101      ENDDO
102      !
103   ENDIF
104   !
105   ! X. Ge: q(i+1) = Lq(i) - beta(i)*q(i-1);
106   ! Renormalization will be done in the begining of the next iteration.
107   ! In the non-Hermitian case, similar operation needs to be done also for p(i).
108   !
109   CALL zaxpy(size_evc,-cmplx(gamma,0.0d0,kind=dp),qjold(1,1,1,1),1,Aqj(1,1,1,1),1)
110   CALL zaxpy(size_evc,-cmplx(beta,0.0d0,kind=dp),qjold(1,1,1,2),1,Aqj(1,1,1,2),1)
111   !
112   ! X. Ge: Throw away q(i-1), and make q(i+1) to be the current vector,
113   ! be ready for the next iteration. Aqj will be free again after this
114   ! step
115   !
116   CALL zcopy(size_evc,qj(1,1,1,1),1,qjold(1,1,1,1),1)    ! qjold = qj
117   CALL zcopy(size_evc,Aqj(1,1,1,1),1,qj(1,1,1,1),1)      ! qj = Aqj
118   !
119   CALL zcopy(size_evc,qj(1,1,1,2),1,qjold(1,1,1,2),1)    ! qjold = qj
120   CALL zcopy(size_evc,Aqj(1,1,1,2),1,qj(1,1,1,2),1)      ! qj = Aqj
121   !
122end subroutine lanczos_nonhermitian
123