1!
2! Copyright (C) 2013-2015 Quantum ESPRESSO group
3! This file is distributed under the terms of the
4! GNU General Public License. See the file `License'
5! in the root directory of the present distribution,
6! or http://www.gnu.org/copyleft/gpl.txt .
7!
8! Written by Lorenzo Paulatto (2012-2013)
9! Gamma-only tricks by Simon Binnie
10! G-space code based on addusdens.f90 and compute_becsum.f90,
11! modified by Paolo Giannozzi (2015) for speed
12! Real space code based on realus.f90
13!-----------------------------------------------------------------------
14MODULE us_exx
15  !-----------------------------------------------------------------------
16  !! It contains most of the USPP+EXX code (Ultra-Soft PP + Exact Exchange).
17  !
18  ! Notes:
19  ! * compute_becxx is still in exx.f90 as it uses plenty of global variables
20  !   from there;
21  ! * some tests and loops are done directly in exx.f90;
22  ! * PAW specific parts are in paw_exx.f90;
23  ! * USPP terms in G-space are now computed on the "custom" grid (cutoff
24  !   ecutfock), the same used in exx.f90, instead of the "smooth" grid
25  !   (cutoff 4*ecutwfc). Not sure what happens when the two do not coincide,
26  !   but I expect things to work. Previously the "smooth" grid was used,
27  !   but this lead to erratic problems with non-coincident shell ordering.
28  !
29  USE kinds,   ONLY : DP
30  USE becmod,  ONLY : bec_type, calbec, ALLOCATE_bec_type, DEALLOCATE_bec_type
31  !
32  IMPLICIT NONE
33  !
34  SAVE
35  !
36  TYPE(bec_type), ALLOCATABLE :: becxx(:)
37  !! \(\langle\beta_I|\phi_j,k\rangle\), with the wfcs from \(\textrm{exxbuff}\)
38  TYPE(bec_type), ALLOCATABLE :: becxx0(:)
39  !! \(\langle\beta_I|\phi_j,k\rangle\), on the reduced k-points, local to pools. The visible index
40  !! is k; while I and J are inside bec_type.
41  !
42  !COMPLEX(DP),ALLOCATABLE :: becxx_gamma(:,:)
43  ! gamma only version of becxx%r. two bands stored per stripe.
44  COMPLEX(DP), ALLOCATABLE :: qgm(:,:)
45  !! used in \(\textrm{addusxx_g}\) and \(\textrm{newdxx_g}\). Pre-computed
46  !! projectors.
47  INTEGER, ALLOCATABLE :: nij_type(:)
48  !! (ih,jh) pairs offset for all atom types.
49  !
50 CONTAINS ! ~~+~~---//--~~~-+
51  !
52  !-------------------------------------------------------------------------
53  FUNCTION bexg_merge( w, m, n, imin, imax, i )
54    !-----------------------------------------------------------------------
55    !! Used at Gamma point when number of bands is odd, especially for band
56    !! parallelisation when band group is odd. Returns:
57    !! \begin{equation}\notag
58    !! \begin{split}
59    !!   &w(i)+i\ w(i+1),\qquad & \text{if}\quad \text{imin}\leq i<\text{imax} \\
60    !!   &w(i), & \text{if}\quad i=\text{imax} \\
61    !!   &i\ w(i+1), & \text{if}\quad i=\text{imin}-1 \\
62    !!   &0, &\text{otherwise}
63    !! \end{split}
64    !! \end{equation}
65    !
66    INTEGER,  INTENT(IN) :: m, n
67    INTEGER,  INTENT(IN) :: imin, imax, i
68    REAL(DP), INTENT(IN) :: w(m,n)
69    COMPLEX(DP) :: bexg_merge(m)
70    !
71    bexg_merge = (0._dp, 0._dp)
72    IF (imin<=i .AND. i<imax) THEN
73      bexg_merge = CMPLX(w(:,i),w(:,i+1), KIND=DP)
74    ELSEIF (i==imax) THEN
75      bexg_merge = CMPLX(w(:,i),   0._dp, KIND=DP)
76    ELSEIF (i+1==imin) THEN
77      bexg_merge = CMPLX( 0._dp,w(:,i+1), KIND=DP)
78    ELSE
79      bexg_merge = CMPLX( 0._dp,   0._dp, KIND=DP)
80    ENDIF
81    !
82    RETURN
83    !
84  END FUNCTION bexg_merge
85  !
86  !
87  !-----------------------------------------------------------------------
88  SUBROUTINE qvan_init( ngms, xkq, xk )
89    !-----------------------------------------------------------------------
90    !! Allocate and store augmentation charges in G space Q(G) for USPP.
91    !
92    USE cell_base,           ONLY : tpiba
93    USE ions_base,           ONLY : ntyp => nsp
94    USE uspp_param,          ONLY : upf, nh, lmaxq
95    USE gvect,               ONLY : g
96    !
97    IMPLICIT NONE
98    !
99    REAL(DP), INTENT(IN) :: xkq(3)
100    !! coordintes of k+q point
101    REAL(DP), INTENT(IN) :: xk(3)
102    !! coordinates of k-point
103    INTEGER, INTENT(IN)  :: ngms
104    !! number of G-space points in this process
105    !
106    ! ... local variables
107    !
108    REAL(DP), ALLOCATABLE :: ylmk0(:,:), qmod(:), q(:,:), qq(:)
109    INTEGER  :: nij, ijh, ig, nt, ih, jh
110    !
111    CALL start_clock( 'qvan_init' )
112    !
113    ! nij = number of (ih,jh) pairs for all atom types
114    !
115    ALLOCATE( nij_type(ntyp) )
116    nij = 0
117    DO nt = 1, ntyp
118       nij_type(nt) = nij
119       IF ( upf(nt)%tvanp ) nij = nij + (nh(nt)*(nh(nt)+1))/2
120    ENDDO
121    ALLOCATE( qgm(ngms,nij) )
122    !
123    ALLOCATE( ylmk0(ngms,lmaxq*lmaxq), qmod(ngms) )
124    ALLOCATE( q(3,ngms), qq(ngms) )
125    DO ig = 1, ngms
126       q(:,ig) = xk(:) - xkq(:) + g(:,ig)
127       qq(ig)  = SUM(q(:,ig)**2)
128       qmod(ig)= SQRT(qq(ig))*tpiba
129    ENDDO
130    CALL ylmr2( lmaxq*lmaxq, ngms, q, qq, ylmk0 )
131    DEALLOCATE( qq, q )
132    !
133    ! ijh = position of (ih,jh) pairs for atom type nt
134    !
135    ijh = 0
136    DO nt = 1, ntyp
137       IF ( upf(nt)%tvanp ) THEN
138          DO ih = 1, nh(nt)
139             DO jh = ih, nh(nt)
140                ijh = ijh + 1
141                CALL qvan2( ngms, ih, jh, nt, qmod, qgm(1,ijh), ylmk0 )
142             ENDDO
143          ENDDO
144       ENDIF
145    ENDDO
146    DEALLOCATE( qmod, ylmk0 )
147    CALL stop_clock( 'qvan_init' )
148    !
149  END SUBROUTINE qvan_init
150  !
151  !
152  !-----------------------------------------------------------------------
153  SUBROUTINE qvan_clean()
154    !-----------------------------------------------------------------------
155    !! Deallocates qvan variables.
156    !
157    DEALLOCATE( qgm )
158    DEALLOCATE( nij_type )
159    !
160  END SUBROUTINE qvan_clean
161  !
162  !
163  !-----------------------------------------------------------------------
164  SUBROUTINE addusxx_g( dfftt, rhoc, xkq, xk, flag, becphi_c, becpsi_c, &
165                        becphi_r, becpsi_r )
166    !-----------------------------------------------------------------------
167    !! Add US contribution to \(\text{rhoc}\) for hybrid functionals:
168    !
169    !! * \(\text{flag}\) = 'c': add complex contribution;
170    !! * \(\text{flag}\) = 'r': add real contribution to the real part of
171    !!   \(\text{rhoc}\);
172    !! * \(\text{flag}\) = 'i': add real contribution to the imaginary part.
173    !
174    !! The two latter cases are used together with gamma tricks to store contributions
175    !! from two bands into the real and the imaginary part separately.
176    !
177    USE constants,           ONLY : tpi
178    USE ions_base,           ONLY : nat, ntyp => nsp, ityp, tau
179    USE uspp,                ONLY : nkb, vkb,  okvan, indv_ijkb0, ijtoh
180    USE uspp_param,          ONLY : upf, nh, nhm, lmaxq
181    USE gvect,               ONLY : g, eigts1, eigts2, eigts3, mill, gstart
182    USE control_flags,       ONLY : gamma_only
183    USE fft_types,           ONLY : fft_type_descriptor
184    !
185    IMPLICIT NONE
186    !
187    TYPE(fft_type_descriptor), INTENT(IN) :: dfftt
188    !! In input it gets a slice of \(\langle\text{beta}|\text{left}\rangle\)
189    !! and \(\langle\text{beta}|\text{right}\rangle\) only for this
190    !! \(\text{kpoint}\) and this \(\text{band}\).
191    COMPLEX(DP), INTENT(INOUT) :: rhoc(dfftt%nnr)
192    !! charge density.
193    COMPLEX(DP), INTENT(IN), OPTIONAL :: becphi_c(nkb)
194    !! \(\langle\beta_I|\phi_j,k\rangle\) - complex contr.
195    COMPLEX(DP), INTENT(IN), OPTIONAL :: becpsi_c(nkb)
196    REAL(DP), INTENT(IN), OPTIONAL :: becphi_r(nkb)
197    REAL(DP), INTENT(IN), OPTIONAL :: becpsi_r(nkb)
198    REAL(DP), INTENT(IN) :: xkq(3)
199    !!  coordintes of k+q point
200    REAL(DP), INTENT(IN) :: xk(3)
201    !!  coordintes of k point
202    CHARACTER(LEN=1), INTENT(IN) :: flag
203    !! see main comment
204    !
205    ! ... local variables
206    !
207    COMPLEX(DP),ALLOCATABLE :: aux1(:), aux2(:), eigqts(:)
208    INTEGER :: ngms, ikb, jkb, ijkb0, ih, jh, na, nt, ig, nij, ijh
209    COMPLEX(DP) :: becfac_c
210    REAL(DP) :: arg, becfac_r
211    LOGICAL :: add_complex, add_real, add_imaginary
212    ! cache blocking parameters
213    INTEGER, PARAMETER :: blocksize = 256
214    INTEGER :: iblock, numblock, realblocksize, offset
215    !
216    IF (.NOT.okvan) RETURN
217    CALL start_clock( 'addusxx' )
218    !
219    ngms = dfftt%ngm
220    add_complex = ( flag=='c' .OR. flag=='C' )
221    add_real    = ( flag=='r' .OR. flag=='R' )
222    add_imaginary=( flag=='i' .OR. flag=='I' )
223    IF ( .NOT. (add_complex .OR. add_real .OR. add_imaginary) ) &
224       CALL errore('addusxx_g', 'called with incorrect flag: '//flag, 1 )
225    IF ( .NOT. gamma_only .AND. ( add_real .OR. add_imaginary) ) &
226       CALL errore('addusxx_g', 'need gamma tricks for this flag: '//flag, 2 )
227    IF ( gamma_only .AND. add_complex ) &
228       CALL errore('addusxx_g', 'gamma trick not good for this flag: '//flag, 3 )
229    IF ( (add_complex  .AND.(.NOT. PRESENT(becphi_c).OR..NOT.PRESENT(becpsi_c))) .OR. &
230         (add_real     .AND.(.NOT. PRESENT(becphi_r).OR..NOT.PRESENT(becpsi_r))) .OR. &
231         (add_imaginary.AND.(.NOT. PRESENT(becphi_r).OR..NOT.PRESENT(becpsi_r))) )    &
232       CALL errore('addusxx_g', 'called with incorrect arguments', 2 )
233    !
234    ALLOCATE( eigqts(nat) )
235    !
236    DO na = 1, nat
237      arg = tpi* SUM( (xk(:) - xkq(:))*tau(:,na) )
238      eigqts(na) = CMPLX( COS(arg), -SIN(arg), kind=DP)
239    ENDDO
240    !
241    ! setting cache blocking size
242    numblock  = (ngms+blocksize-1)/blocksize
243    !
244    !$omp parallel private(aux1,aux2,nij,offset,realblocksize,ijkb0,ikb,jkb)
245    !
246    ALLOCATE( aux1(blocksize), aux2(blocksize) )
247    !
248    DO nt = 1, ntyp
249       !
250       IF ( upf(nt)%tvanp ) THEN
251          !
252          nij = nij_type(nt)
253          !
254          !$omp do
255          DO iblock = 1, numblock
256             !
257             DO na = 1, nat
258                !
259                IF (ityp(na) /= nt) CYCLE
260                !
261                offset = (iblock-1)*blocksize
262                realblocksize = MIN(ngms-offset,blocksize)
263                !
264                ! ijkb0 points to the manifold of beta functions for atom na
265                !
266                ijkb0 = indv_ijkb0(na)
267                !
268                aux2(:) = (0.0_dp, 0.0_dp)
269                DO ih = 1, nh(nt)
270                   ikb = ijkb0 + ih
271                   aux1(:) = (0.0_dp, 0.0_dp)
272                   DO jh = 1, nh(nt)
273                      jkb = ijkb0 + jh
274                      IF ( add_complex ) THEN
275                         aux1(1:realblocksize) = aux1(1:realblocksize)                &
276                             + qgm(offset+1:offset+realblocksize,nij+ijtoh(ih,jh,nt)) &
277                             * becpsi_c(jkb)
278                      ELSE
279                         aux1(1:realblocksize) = aux1(1:realblocksize)                &
280                             + qgm(offset+1:offset+realblocksize,nij+ijtoh(ih,jh,nt)) &
281                             * becpsi_r(jkb)
282                      ENDIF
283                   ENDDO
284                   IF ( add_complex ) THEN
285                      aux2(1:realblocksize) = aux2(1:realblocksize) &
286                                            + aux1(1:realblocksize)*CONJG(becphi_c(ikb))
287                   ELSE
288                      aux2(1:realblocksize) = aux2(1:realblocksize) &
289                                            + aux1(1:realblocksize)*becphi_r(ikb)
290                   ENDIF
291                ENDDO
292                !
293                aux2(1:realblocksize) = aux2(1:realblocksize) * eigqts(na) * &
294                         eigts1(mill(1,offset+1:offset+realblocksize), na) * &
295                         eigts2(mill(2,offset+1:offset+realblocksize), na) * &
296                         eigts3(mill(3,offset+1:offset+realblocksize), na)
297                !
298                IF ( add_complex ) THEN
299                   rhoc(dfftt%nl(offset+1:offset+realblocksize)) =           &
300                               rhoc(dfftt%nl(offset+1:offset+realblocksize)) &
301                               + aux2(1:realblocksize)
302                !
303                ELSEIF ( add_real ) THEN
304                   rhoc(dfftt%nl(offset+1:offset+realblocksize)) =           &
305                               rhoc(dfftt%nl(offset+1:offset+realblocksize)) &
306                               + aux2(1:realblocksize)
307                   !
308                   IF ( gstart==2 .AND. iblock==1 ) aux2(1) = (0.0_dp,0.0_dp)
309                   rhoc(dfftt%nlm(offset+1:offset+realblocksize)) =          &
310                              rhoc(dfftt%nlm(offset+1:offset+realblocksize)) &
311                              + CONJG(aux2(1:realblocksize))
312                   !
313                ELSEIF ( add_imaginary ) THEN
314                   rhoc(dfftt%nl(offset+1:offset+realblocksize)) =           &
315                              rhoc(dfftt%nl(offset+1:offset+realblocksize))  &
316                              + (0.0_dp,1.0_dp) * aux2(1:realblocksize)
317                   !
318                   IF ( gstart==2 .AND. iblock==1 ) aux2(1) = (0.0_dp,0.0_dp)
319                   rhoc(dfftt%nlm(offset+1:offset+realblocksize)) =          &
320                              rhoc(dfftt%nlm(offset+1:offset+realblocksize)) &
321                              + (0.0_dp,1.0_dp)* CONJG(aux2(1:realblocksize))
322                ENDIF
323             ENDDO ! nat
324          ENDDO   ! block
325          !$omp end do nowait
326          !
327       ENDIF
328       !
329    ENDDO   ! nt
330    !
331    DEALLOCATE( aux2, aux1 )
332    !
333    !$omp end parallel
334    !
335    DEALLOCATE( eigqts )
336    !
337    CALL stop_clock( 'addusxx' )
338    !
339    RETURN
340    !
341  END SUBROUTINE addusxx_g
342  !
343  !
344  !-----------------------------------------------------------------------
345  SUBROUTINE newdxx_g( dfftt, vc, xkq, xk, flag, deexx, becphi_r, becphi_c )
346    !-----------------------------------------------------------------------
347    !! This subroutine computes some sort of EXX contribution to the non-local
348    !! part of the hamiltonian:
349    !! \[ \alpha_{Ii} = \int \sum_{Jj} Q_{IJ}(r) V^{i,j}_\text{Fock}
350    !! \langle\beta_J|\phi_j\rangle dr^3 \]
351    !! The actual contribution will be (summed outside):
352    !! \[ H = H+\sum_I |\beta_I\rangle \alpha_{Ii} \]
353    !! Three cases for \(\text{iflag}\):
354    !
355    !! * flag = 'c': \(V(G)\) is contained in complex array vc;
356    !! * flag = 'r': \(V(G)=v_1(G)+i\ v_2(G)\): select \(v_1(G)\);
357    !! * flag = 'i': \(V(G)=v_1(G)+i\ v_2(G)\): select \(v_2(G)\).
358    !
359    !! The two latter cases are used together with gamma tricks.
360    !
361    USE constants,      ONLY : tpi
362    USE ions_base,      ONLY : nat, ntyp => nsp, ityp, tau
363    USE uspp,           ONLY : nkb, vkb,  okvan, indv_ijkb0, ijtoh
364    USE uspp_param,     ONLY : upf, nh, nhm, lmaxq
365    USE gvect,          ONLY : gg, g, gstart, eigts1, eigts2, eigts3, mill
366    USE cell_base,      ONLY : omega
367    USE control_flags,  ONLY : gamma_only
368    USE fft_types,      ONLY : fft_type_descriptor
369    !
370    IMPLICIT NONE
371    !
372    TYPE ( fft_type_descriptor ), INTENT(IN) :: dfftt
373    COMPLEX(DP), INTENT(IN) :: vc(dfftt%nnr)
374    ! In input I get a slice of <beta|left> and <beta|right>
375    ! only for this kpoint and this band
376    COMPLEX(DP), INTENT(IN), OPTIONAL :: becphi_c(nkb)
377    REAL(DP),    INTENT(IN), OPTIONAL :: becphi_r(nkb)
378    COMPLEX(DP), INTENT(INOUT) :: deexx(nkb)
379    REAL(DP), INTENT(IN) :: xk(3), xkq(3)
380    CHARACTER(LEN=1), INTENT(IN) :: flag
381    !
382    ! ... local variables
383    !
384    INTEGER :: ngms, ig, ikb, jkb, ijkb0, ih, jh, na, ina, nt, nij
385    REAL(DP) :: fact
386    !
387    COMPLEX(DP),ALLOCATABLE :: auxvc(:), &  ! vc in order of |g|
388                               eigqts(:), aux1(:), aux2(:)
389    COMPLEX(DP) :: fp, fm
390    REAL(DP) :: arg
391    LOGICAL :: add_complex, add_real, add_imaginary
392    ! cache blocking parameters
393    INTEGER, PARAMETER :: blocksize = 256
394    INTEGER :: iblock, numblock, realblocksize, offset
395    !
396    IF (.NOT.okvan) RETURN
397    !
398    ngms = dfftt%ngm
399    add_complex = ( flag=='c' .OR. flag=='C' )
400    add_real    = ( flag=='r' .OR. flag=='R' )
401    add_imaginary=( flag=='i' .OR. flag=='I' )
402    IF ( .NOT. (add_complex .OR. add_real .OR. add_imaginary) ) &
403       CALL errore('newdxx_g', 'called with incorrect flag: '//flag, 1 )
404    IF ( .NOT. gamma_only .AND. ( add_real .OR. add_imaginary) ) &
405       CALL errore('newdxx_g', 'need gamma tricks for this flag: '//flag, 2 )
406    IF ( gamma_only .AND. add_complex ) &
407       CALL errore('newdxx_g', 'gamma trick not good for this flag: '//flag, 3 )
408    IF ( ( add_complex  .AND..NOT. PRESENT(becphi_c) ) .OR. &
409         ( add_real     .AND..NOT. PRESENT(becphi_r) ) .OR. &
410         ( add_imaginary.AND..NOT. PRESENT(becphi_r) ) )    &
411       CALL errore('newdxx_g', 'called with incorrect arguments', 2 )
412    !
413    CALL start_clock( 'newdxx' )
414    !
415    ALLOCATE( auxvc(ngms) )
416    ALLOCATE(eigqts(nat))
417    !
418    DO na = 1, nat
419      arg = tpi* SUM( (xk(:) - xkq(:))*tau(:,na) )
420      eigqts(na) = CMPLX( COS(arg), -SIN(arg), kind=DP)
421    ENDDO
422    !
423    ! reindex just once at the beginning
424    ! select real or imaginary part if so desired
425    ! fact=2 to account for G and -G components
426    !
427    IF ( add_complex ) THEN
428       auxvc(1:ngms) = vc(dfftt%nl(1:ngms) )
429       fact=omega
430    ELSEIF ( add_real ) THEN
431       DO ig = 1, ngms
432          fp = (vc(dfftt%nl(ig)) + vc(dfftt%nlm(ig)))/2.0_dp
433          fm = (vc(dfftt%nl(ig)) - vc(dfftt%nlm(ig)))/2.0_dp
434          auxvc(ig) = CMPLX( DBLE(fp), AIMAG(fm), KIND=dp)
435       ENDDO
436       fact=2.0_dp*omega
437    ELSEIF ( add_imaginary ) THEN
438       DO ig = 1, ngms
439          fp = (vc(dfftt%nl(ig)) + vc(dfftt%nlm(ig)))/2.0_dp
440          fm = (vc(dfftt%nl(ig)) - vc(dfftt%nlm(ig)))/2.0_dp
441          auxvc(ig) = CMPLX( AIMAG(fp), -DBLE(fm), KIND=dp)
442       ENDDO
443       fact=2.0_dp*omega
444    ENDIF
445    !
446    ! setting cache blocking size
447    numblock  = (ngms+blocksize-1)/blocksize
448    !
449    !$omp parallel private(aux1,aux2,nij,offset,realblocksize,ijkb0,ikb,jkb,nt)
450    !
451    ALLOCATE( aux1(blocksize), aux2(blocksize) )
452    !
453    ! Note: cache blocking is more efficient when atoms are grouped by
454    ! specie (see history). However, it requires atom sorting info available
455    ! in ion_base which requires consistent fixing.
456    !
457    DO iblock = 1, numblock
458       !
459       offset = (iblock-1)*blocksize
460       realblocksize = MIN(ngms-offset,blocksize)
461       !
462       !$omp do
463       DO na = 1, nat
464          !
465          nt = ityp(na)
466          !
467          IF ( upf(nt)%tvanp ) THEN
468             !
469             nij = nij_type(nt)
470             !
471             ! ijkb0 points to the manifold of beta functions for atom na
472             !
473             ijkb0 = indv_ijkb0(na)
474             !
475             aux2(1:realblocksize) = CONJG( auxvc(offset+1:offset+realblocksize) ) * eigqts(na) * &
476                        eigts1(mill(1,offset+1:offset+realblocksize), na) * &
477                        eigts2(mill(2,offset+1:offset+realblocksize), na) * &
478                        eigts3(mill(3,offset+1:offset+realblocksize), na)
479             DO ih = 1, nh(nt)
480                ikb = ijkb0 + ih
481                aux1(:) = (0.0_dp, 0.0_dp)
482                DO jh = 1, nh(nt)
483                   jkb = ijkb0 + jh
484                   IF ( gamma_only ) THEN
485                      aux1(1:realblocksize) = aux1(1:realblocksize) + becphi_r(jkb) * &
486                           CONJG( qgm(offset+1:offset+realblocksize,nij+ijtoh(ih,jh,nt)) )
487                   ELSE
488                      aux1(1:realblocksize) = aux1(1:realblocksize) + becphi_c(jkb) * &
489                           CONJG( qgm(offset+1:offset+realblocksize,nij+ijtoh(ih,jh,nt)) )
490                   ENDIF
491                ENDDO
492                !
493                deexx(ikb) = deexx(ikb) + fact*dot_product( aux2(1:realblocksize),aux1(1:realblocksize))
494                IF( gamma_only .AND. gstart == 2 .AND. iblock == 1 ) &
495                     deexx(ikb) =  deexx(ikb) - omega * CONJG(aux2(1))*aux1(1)
496             ENDDO
497          ENDIF
498          !
499       ENDDO ! nat
500       !$omp end do nowait
501       !
502    ENDDO ! block
503    !
504    DEALLOCATE( aux2, aux1 )
505    !
506    !$omp end parallel
507    !
508    DEALLOCATE( eigqts, auxvc )
509    CALL stop_clock( 'newdxx' )
510    !
511    RETURN
512    !
513  END SUBROUTINE newdxx_g
514  !
515  !
516  !-------------------------------------------------------------------------------
517  SUBROUTINE add_nlxx_pot( lda, hpsi, xkp, npwp, igkp, deexx, eps_occ, exxalfa )
518    !----------------------------------------------------------------------------
519    !! This subroutine computes some sort of EXX contribution to the non-local
520    !! part of the hamiltonian:
521    !! \[ \alpha_{Ii} = \int \sum_{Jj} Q_{IJ}(r) V^{i,j}_\text{Fock}
522    !!    \langle\beta_J|\phi_j\rangle d^3(r) \]
523    !! The actual contribution will be (summed outside):
524    !! \[ H = H+\sum_I |\beta_I\rangle \alpha_{Ii} \]
525    !
526    USE ions_base,           ONLY : nat, ntyp => nsp, ityp
527    USE uspp,                ONLY : nkb, okvan,indv_ijkb0
528    USE uspp_param,          ONLY : upf, nh
529    USE wvfct,               ONLY : nbnd, npwx
530    USE control_flags,       ONLY : gamma_only
531    !
532    IMPLICIT NONE
533    !
534    ! ... In input I get a slice of <beta|left> and <beta|right> only for this
535    ! kpoint and this band
536    !
537    INTEGER, INTENT(IN) :: lda
538    !! leading dimension of hpsi
539    COMPLEX(DP), INTENT(INOUT) :: hpsi(lda)!*npol)
540    !! the hamiltonian
541    COMPLEX(DP), INTENT(IN) :: deexx(nkb)
542    !! \[ \int \sum_J Q_{IJ} \langle\beta_J|\phi_i\rangle dr^3 \]
543    REAL(DP), INTENT(IN) :: xkp(3)
544    !! current k point
545    REAL(DP), INTENT(IN) :: exxalfa
546    !! fraction of ex. exchange to add
547    REAL(DP), INTENT(IN) :: eps_occ
548    !! skip band where occupation is less than this
549    INTEGER, INTENT(IN) :: npwp, igkp(npwp)
550    !
551    ! ... local variables
552    !
553    INTEGER :: ikb, ijkb0, ih, na, np
554    INTEGER :: ig
555    COMPLEX(DP), ALLOCATABLE :: vkbp(:,:)  ! the <beta_I| function
556    !
557    CALL start_clock( 'nlxx_pot' )
558    !
559    IF (.NOT. okvan) RETURN
560    !
561    ! These are beta functions for k-point "xkp" with indices "igkp"
562    ! Possibly already available in the calling routines vexx, since
563    ! xkp and igkp are the "current" k-point and indices in hpsi
564    !
565    ALLOCATE( vkbp(npwx,nkb) )
566    CALL init_us_2( npwp, igkp, xkp, vkbp )
567    !
568    DO np = 1, ntyp
569      ONLY_FOR_USPP : &
570      IF ( upf(np)%tvanp ) THEN
571          DO na = 1, nat
572            IF (ityp(na)==np) THEN
573              DO ih = 1, nh(np)
574                ikb = indv_ijkb0(na) + ih
575                !
576                IF (ABS(deexx(ikb)) < eps_occ) CYCLE
577                !
578                IF (gamma_only) THEN
579                   DO ig = 1, npwp
580                      hpsi(ig) = hpsi(ig) - exxalfa*DBLE(deexx(ikb))*vkbp(ig,ikb)
581                   ENDDO
582                ELSE
583                   DO ig = 1, npwp
584                      hpsi(ig) = hpsi(ig) - exxalfa*deexx(ikb)*vkbp(ig,ikb)
585                   ENDDO
586                ENDIF
587              ENDDO
588            ENDIF
589          ENDDO ! nat
590      ENDIF &
591      ONLY_FOR_USPP
592    ENDDO
593    !
594    DEALLOCATE( vkbp )
595    CALL stop_clock( 'nlxx_pot' )
596    !
597    RETURN
598    !
599  END SUBROUTINE add_nlxx_pot
600  !
601  !
602  !------------------------------------------------------------------------
603  SUBROUTINE addusxx_r( rho, becphi, becpsi )
604    !------------------------------------------------------------------------
605    !! This routine adds to the two wavefunctions density (in real space)
606    !! the part that is due to the US augmentation.
607    !! NOTE: the density in this case is NOT real and NOT normalized to 1,
608    !!       except when (bec-)\(\text{phi}\) and (bec-)\(\text{psi}\) are equal,
609    !!       or with gamma tricks.
610    !
611    !! With gamma tricks: input rho must contain contributions from band 1
612    !! in real part, from band 2 in imaginary part. Call routine twice:
613    !
614    !! * with \(\text{becphi}=\langle\beta|\phi(1)\rangle\) (real);
615    !! * then with \(\text{becphi}=-i\langle\beta|\phi(2)\rangle\) (imaginary).
616    !
617    USE ions_base,        ONLY : nat, ityp
618    USE cell_base,        ONLY : omega
619    USE uspp,             ONLY : okvan, nkb, ijtoh, indv_ijkb0
620    USE uspp_param,       ONLY : upf, nh
621    USE realus,           ONLY : tabxx
622    !
623    IMPLICIT NONE
624    !
625    COMPLEX(DP), INTENT(INOUT) :: rho(:)
626    !! charge density
627    COMPLEX(DP), INTENT(IN) :: becphi(nkb)
628    !! see main comment
629    COMPLEX(DP), INTENT(IN) :: becpsi(nkb)
630    !! see main comment
631    !
632    ! ... local variables
633    !
634    INTEGER :: ia, nt, ir, irb, ih, jh, mbia
635    INTEGER :: ikb, jkb
636    !
637    IF ( .NOT. okvan ) RETURN
638    !
639    CALL start_clock( 'addusxx' )
640    !
641    DO ia = 1, nat
642      !
643      mbia = tabxx(ia)%maxbox
644      IF ( mbia == 0 ) CYCLE
645      !
646      nt = ityp(ia)
647      IF ( .NOT. upf(nt)%tvanp ) CYCLE
648      !
649      DO ih = 1, nh(nt)
650        DO jh = 1, nh(nt)
651          ikb = indv_ijkb0(ia) + ih
652          jkb = indv_ijkb0(ia) + jh
653          DO ir = 1, mbia
654            irb = tabxx(ia)%box(ir)
655            rho(irb) = rho(irb) + tabxx(ia)%qr(ir,ijtoh(ih,jh,nt)) &
656                                  *CONJG(becphi(ikb))*becpsi(jkb)
657          ENDDO
658        ENDDO
659      ENDDO
660      !
661    ENDDO
662    !
663    CALL stop_clock( 'addusxx' )
664    !
665    RETURN
666    !
667  END SUBROUTINE addusxx_r
668  !
669  !
670  !------------------------------------------------------------------------
671  SUBROUTINE newdxx_r( dfftt, vr, becphi, deexx )
672    !------------------------------------------------------------------------
673    !! This routine computes the integral of the perturbed potential with
674    !! the Q function in real space.
675    !
676    USE cell_base,          ONLY : omega
677    USE ions_base,          ONLY : nat, ityp
678    USE uspp_param,         ONLY : upf, nh, nhm
679    USE uspp,               ONLY : nkb, ijtoh, indv_ijkb0
680    USE noncollin_module,   ONLY : nspin_mag
681    USE fft_types,          ONLY : fft_type_descriptor
682    USE realus,             ONLY : tabxx
683    !
684    IMPLICIT NONE
685    !
686    ! ... In input I get a slice of <beta|left> and <beta|right>
687    !
688    TYPE(fft_type_descriptor), INTENT(IN) :: dfftt
689    !! ...
690    COMPLEX(DP), INTENT(IN) :: vr(:)
691    !! the potential
692    COMPLEX(DP), INTENT(IN) :: becphi(nkb)
693    !! ...
694    COMPLEX(DP), INTENT(INOUT) :: deexx(nkb)
695    !! contribution to integral
696    !
697    ! ... local variables
698    !
699    INTEGER :: ia, ih, jh, ir, nt
700    INTEGER :: mbia
701    INTEGER :: ikb, jkb, ijkb0
702    REAL(DP) :: domega
703    COMPLEX(DP) :: aux
704    !
705    CALL start_clock( 'newdxx' )
706    !
707    domega = omega/(dfftt%nr1 *dfftt%nr2 *dfftt%nr3)
708    !
709    DO ia = 1, nat
710      !
711      mbia = tabxx(ia)%maxbox
712      IF ( mbia == 0 ) CYCLE
713      !
714      nt = ityp(ia)
715      IF ( .NOT. upf(nt)%tvanp ) CYCLE
716      !
717      DO ih = 1, nh(nt)
718        DO jh = 1, nh(nt)
719          ijkb0 = indv_ijkb0(ia)
720          ikb = ijkb0 + ih
721          jkb = ijkb0 + jh
722          !
723          aux = 0._dp
724          DO ir = 1, mbia
725              aux = aux + tabxx(ia)%qr(ir,ijtoh(ih,jh,nt))*vr(tabxx(ia)%box(ir))
726          ENDDO
727          deexx(ikb) = deexx(ikb) + becphi(jkb)*domega*aux
728          !
729        ENDDO
730      ENDDO
731      !
732    ENDDO
733    !
734    CALL stop_clock( 'newdxx' )
735    !
736  END SUBROUTINE newdxx_r
737  !
738  !
739  !------------------------------------------------------------------------
740  SUBROUTINE store_becxx0( ik, becp )
741    !------------------------------------------------------------------------
742    !! In the EXX case with ultrasoft or PAW, a copy of \(\text{becp}\) will be
743    !! saved in a global variable to be rotated later.
744    !
745    USE klist,    ONLY : nks
746    USE uspp,     ONLY : nkb
747    USE becmod,   ONLY : bec_type, allocate_bec_type, beccopy
748    USE wvfct,    ONLY : nbnd
749    USE funct,    ONLY : dft_is_hybrid
750    USE uspp,     ONLY : okvan
751    USE mp_bands, ONLY : inter_bgrp_comm
752
753    !
754    IMPLICIT NONE
755    !
756    INTEGER,INTENT(IN) :: ik
757    TYPE(bec_type),INTENT(IN) :: becp
758    INTEGER :: jk
759    !
760    IF (.NOT. okvan) RETURN
761    IF (.NOT. dft_is_hybrid()) RETURN
762    !
763    IF(.NOT. ALLOCATED(becxx0)) THEN
764      ALLOCATE( becxx0(nks) )
765      DO jk = 1,nks
766        CALL allocate_bec_type( nkb, nbnd, becxx0(jk) )
767      ENDDO
768    ENDIF
769    !
770    IF (ik<1 .OR. ik>nks) CALL errore( "store_becxx0", "unexpected ik", 1 )
771    !
772    CALL beccopy( becp, becxx0(ik), nkb, nbnd, inter_bgrp_comm )
773    !
774  END SUBROUTINE
775  !
776  !
777  !-----------------------------------------------------------------------
778  SUBROUTINE rotate_becxx( nkqs, index_xk, index_sym, xkq_collect )
779    !-----------------------------------------------------------------------
780    !! Collect \(\text{becxx0}\) (the product \(\langle\beta|\psi\rangle\) on
781    !! the irreducible wedge) among pools, then rotate them to reconstruct the
782    !! k+q grid. In order to have the necessary data in \(\text{becxx0}\), you
783    !! must call \(\texttt{store_becxx0}\) in \(\texttt{sum_bands}\).
784    !
785    USE kinds,                ONLY : DP
786    USE wvfct,                ONLY : nbnd
787    USE uspp,                 ONLY : nkb, okvan
788    USE becmod,               ONLY : calbec, allocate_bec_type, &
789                                     deallocate_bec_type, bec_type
790    ! USE us_exx,              ONLY : becp_rotate_k, becxx0, becxx
791    USE klist,                ONLY : xk, nkstot, nks
792    USE io_global,            ONLY : stdout
793    USE mp,                   ONLY : mp_bcast, mp_sum
794    USE mp_pools,             ONLY : me_pool, my_pool_id, root_pool, &
795                                     inter_pool_comm, intra_pool_comm
796    USE control_flags,        ONLY : gamma_only
797    USE noncollin_module,     ONLY : nspin_lsda, noncolin
798    !
799    ! USE cell_base,           ONLY : at, bg
800    ! USE symm_base,           ONLY : s
801    !
802    IMPLICIT NONE
803    !
804    INTEGER, INTENT(IN) :: nkqs
805    !! number of points in the k+q grid
806    INTEGER, INTENT(IN) :: index_xk(nspin_lsda*nkqs)
807    !! idx of k-point that can be rotated to k+q
808    INTEGER, INTENT(IN) :: index_sym(nspin_lsda*nkqs)
809    !! sym.op taking k to k+q
810    REAL(DP), INTENT(IN):: xkq_collect(3,nspin_lsda*nkqs)
811    !! the list of k+q points
812    !
813    ! ... local variables
814    !
815    INTEGER :: ikq, ik, ik_global
816    INTEGER :: isym, sgn_sym, ibnd
817    TYPE(bec_type), ALLOCATABLE :: becxx0_global(:)
818    REAL(dp), ALLOCATABLE :: xk_collect(:,:)
819    INTEGER :: bec_working_pool !, current_root
820    !
821    INTEGER, EXTERNAL :: local_kpoint_index
822    ! REAL(DP) :: xk_cryst(3), sxk(3)
823    !
824    IF (.NOT. okvan) RETURN
825    IF (noncolin) CALL errore( "rotate_becxx", "Ultrasoft/PAW+EXX+noncolin &
826                               &not yet implemented", 1 )
827
828    IF(.not.ALLOCATED(becxx0))THEN
829      CALL infomsg("rotate_becxx","skipping rotation of <beta|psi>, this will only work for open_grid")
830      RETURN
831    ENDIF
832    !
833    !
834    CALL start_clock( 'becxx' )
835    !
836    ! becxx will contain the products <beta|psi> for all the wfcs in the k+q grid
837    IF (.NOT. ALLOCATED(becxx)) THEN
838      ALLOCATE( becxx(nkqs) )
839      DO ikq = 1,nkqs
840        CALL allocate_bec_type( nkb, nbnd, becxx(ikq) )
841      ENDDO
842    ENDIF
843    !
844    IF (gamma_only) THEN
845      ! In the Gamma-only case, only one k-point, no need to rotate anything
846      ! I copy over instead of using pointer of stuff, because it makes the
847      ! rest much simpler (we may have spin)
848      DO ik = 1, nks
849        becxx(ik)%r = becxx0(ik)%r
850      ENDDO
851      CALL stop_clock( 'becxx' )
852      RETURN
853    ENDIF
854    !
855    ALLOCATE( xk_collect(3,nkstot) )
856    CALL poolcollect( 3, nks, xk, nkstot, xk_collect )
857    !
858    ! ... becxx0_global is a temporary array that will collect the <beta|psi>
859    ! products from all the pools
860    ALLOCATE( becxx0_global(nkstot) )
861    DO ik_global = 1,nkstot
862      CALL allocate_bec_type( nkb, nbnd, becxx0_global(ik_global) )
863    ENDDO
864    !
865    ! ... Collect in a smart way (i.e. without doing all-to-all sum and bcast)
866    !
867!     WRITE(100001, *) "---------------------------------"
868    DO ik_global = 1, nkstot
869      ik = local_kpoint_index(nkstot, ik_global)
870      IF (ik > 0) THEN
871        becxx0_global(ik_global)%k = becxx0(ik)%k
872        ! ... my_pool_id is also the index of the possible roots when doing an mp_bcast with
873        ! inter_pool_comm, this is confusing but logical
874        bec_working_pool = my_pool_id
875      ELSE
876        bec_working_pool = 0
877      ENDIF
878      !CALL mp_sum( current_root, inter_pool_comm )
879      CALL mp_sum( bec_working_pool, inter_pool_comm )
880      IF ( me_pool == root_pool ) &
881        CALL mp_bcast( becxx0_global(ik_global)%k, bec_working_pool, inter_pool_comm )
882      ! No need to broadcast inside the pool which had the data already
883      IF (my_pool_id /= bec_working_pool ) &
884        CALL mp_bcast( becxx0_global(ik_global)%k, root_pool, intra_pool_comm )
885    ENDDO
886    !
887    ! ... Use symmetry rotation to generate the missing <beta|psi>
888    DO ikq = 1, nkqs
889      !
890      isym    = ABS(index_sym(ikq))
891      sgn_sym = SIGN(1, index_sym(ikq))
892!       WRITE(100001, '(a,i6)') "ikq", ikq
893!       WRITE(100001, '(a,6i6)') "sym", isym, sgn_sym
894      CALL becp_rotate_k( becxx0_global(index_xk(ikq))%k, becxx(ikq)%k, isym, sgn_sym, &
895                          xk_collect(:,index_xk(ikq)), xkq_collect(:,ikq) )
896!
897!         xk_cryst(:) = sgn_sym * xk_collect(:,index_xk(ikq))
898!         CALL cryst_to_cart(1, xk_cryst, at, -1)
899!         ! rotate with this sym.op.
900!         sxk(:) = s(:,1,isym)*xk_cryst(1) + &
901!                  s(:,2,isym)*xk_cryst(2) + &
902!                  s(:,3,isym)*xk_cryst(3)
903!         CALL cryst_to_cart(1, sxk, bg, +1)
904!
905!       WRITE(100001, '(a,3f12.6)') "xk_0", xk_collect(:,index_xk(ikq))
906!       WRITE(100001, '(a,3f12.6)') "xk_r", sxk
907!       WRITE(100001, '(a,3f12.6,5x,1f12.6)') "xk_f", xkq_collect(:,ikq),&
908!         SUM(xkq_collect(:,ikq)-sxk)
909!       DO ibnd = 1, 1
910!       !DO ik = 1, nkb
911!       WRITE(100001, '(a,i6)') "bnd", ibnd
912!       WRITE(100001, '(a,99(2f12.5,3x))') "before", becxx0_global(index_xk(ikq))%k(:,ibnd)
913!       WRITE(100001, '(a,99(2f12.5,3x))') "after ", becxx(ikq)%k(:, ibnd)
914!       !ENDDO
915!       ENDDO
916    ENDDO
917    !
918    ! Cleanup
919    DEALLOCATE( xk_collect )
920    DO ik = 1, nkstot
921        CALL deallocate_bec_type( becxx0_global(ik) )
922    ENDDO
923    DEALLOCATE( becxx0_global )
924    !
925    CALL stop_clock( 'becxx' )
926    !
927  END SUBROUTINE rotate_becxx
928  !
929  !
930  !------------------------------------------------------------------------
931  SUBROUTINE becp_rotate_k( becp0, becp, isym, sgn_sym, xk0, xk )
932    !------------------------------------------------------------------------
933    !! Rotate \(\langle\beta|\psi_k\rangle\) for every bands with symmetry
934    !! operation \(\text{isym}\).
935    !! If \(\text{sgn_sym} < 0\), the initial matrix was computed for
936    !! \(-\text{xk0}\).
937    !! This subroutine comes from \(\texttt{PAW_symmetrize}\).
938    !
939    USE kinds,        ONLY : DP
940    USE constants,    ONLY : tpi
941    USE io_global,    ONLY : stdout
942    USE ions_base,    ONLY : tau, nat, ityp
943    USE symm_base,    ONLY : irt, d1, d2, d3, s, nsym
944    USE uspp,         ONLY : nkb, indv_ijkb0, nhtolm, nhtol
945    USE uspp_param,   ONLY : nh, upf
946    USE wvfct,        ONLY : nbnd
947    USE becmod,       ONLY : allocate_bec_type, is_allocated_bec_type
948    !
949    USE cell_base,    ONLY : at, bg
950    !
951    IMPLICIT NONE
952    !
953    COMPLEX(DP), INTENT(IN)  :: becp0(nkb,nbnd)
954    !! \(\langle\beta|\psi_k\rangle\)
955    COMPLEX(DP), INTENT(OUT) :: becp(nkb,nbnd)
956    !! Rotated \(\langle\beta|\psi_k\rangle\)
957    INTEGER, INTENT(IN) :: isym
958    !! symmetry operation
959    INTEGER, INTENT(IN) :: sgn_sym
960    !! If negative, the initial matrix was computed for \(-\text{xk0}\)
961    REAL(DP), INTENT(IN) :: xk0(3)
962    REAL(DP), INTENT(IN) :: xk(3)
963    !
964    ! ... local variables
965    !
966    INTEGER :: ia, mykey, ia_s, ia_e
967                                     ! atoms counters and indexes
968    INTEGER :: ibnd, nt              ! counters on spin, atom-type
969    INTEGER :: ma                    ! atom symmetric to na
970    INTEGER :: ih, ikb, oh, okb      ! counters for augmentation channels
971    INTEGER :: lm_i, l_i,m_i, m_o
972    REAL(DP) :: tau_phase !dtau(3), rtau(3),
973    COMPLEX(DP) :: tau_fact
974    LOGICAL :: do_r, do_k
975    !
976    REAL(DP), TARGET :: d0(1,1,48)
977    TYPE symmetrization_tensor
978        REAL(DP), POINTER :: d(:,:,:)
979    END TYPE symmetrization_tensor
980    TYPE(symmetrization_tensor) :: D(0:3)
981    !
982    REAL(DP) :: xau(3,nat), rau(3,nat)
983    !
984    IF ( isym == 1 ) THEN
985      IF (sgn_sym > 0) THEN
986        becp = becp0
987      ELSE
988        becp = CONJG(becp0)
989      ENDIF
990      RETURN
991    ENDIF
992    !
993    ! d_matrix are now done in setup.f90
994    !CALL d_matrix(d1,d2,d3)
995    d0(1,1,:) = 1._dp
996    D(0)%d => d0 ! d0(1,1,48)
997    D(1)%d => d1 ! d1(3,3,48)
998    D(2)%d => d2 ! d2(5,5,48)
999    D(3)%d => d3 ! d3(7,7,48)
1000    !
1001    IF (ABS(sgn_sym) /= 1) CALL errore( "becp_rotate", "sign must be +1 or -1", 1 )
1002    !
1003    CALL start_clock( 'becp_rotate' )
1004    !
1005    xau = tau
1006    CALL cryst_to_cart( nat, xau, bg, -1 )
1007    !
1008    DO ia = 1,nat
1009        ! rau = rotated atom coordinates
1010        rau(:,ia) = s(1,:,isym) * xau(1,ia) + &
1011                    s(2,:,isym) * xau(2,ia) + &
1012                    s(3,:,isym) * xau(3,ia)
1013    ENDDO
1014    !
1015    CALL cryst_to_cart( nat, rau, at, +1 )
1016!     DO ia = 1,nat
1017!       WRITE(100001, '(a,3f12.6)') "tau_0", tau(:,ia)
1018!       WRITE(100001, '(a,3f12.6)') "tau_r", rau(:,ia)
1019!       ma = irt(isym,ia)
1020!       WRITE(100001, '(a,3f12.6,5x,1f12.6)') "tau_f", tau(:,ma), SUM(rau(:,ia)-tau(:,ma))
1021!     ENDDO
1022
1023    becp = 0._dp
1024    !DO ibnd = 1, nbnd
1025    DO ia = 1,nat !ia_s, ia_e
1026      nt = ityp(ia)
1027      ma = irt(isym,ia)
1028      ! We have two phases to keep in account, one comes from translating
1029      !  <beta_I| -> <beta_sI+R| (I is the atom position)
1030      ! the other from the wavefunction
1031      !  |psi_k> -> |psi_sk+G> .
1032      tau_phase = -tpi*( sgn_sym*SUM(tau(:,ia)*xk0) - SUM(tau(:,ma)*xk) )
1033      !tau_phase = -tpi*( sgn_sym*SUM(tau(:,ia)*xk0) - SUM(rau(:,ia)*xk) )
1034      tau_fact = CMPLX(COS(tau_phase), SIN(tau_phase), KIND=DP)
1035!       tau_fact = 1._dp
1036      !WRITE(stdout,'(2(3f10.4,2x),5x,1f12.6,2x,2f12.6)') &
1037      !  tau(:,ia), tau(:,ma), tau_phase, tau_fact
1038      !
1039      !IF ( .NOT. upf(nt)%tvanp ) CYCLE
1040      !
1041      DO ih = 1, nh(nt)
1042          !
1043          lm_i  = nhtolm(ih,nt)
1044          l_i   = nhtol(ih,nt)
1045          m_i   = lm_i - l_i**2
1046          ikb = indv_ijkb0(ma) + ih
1047!           print*, "doing", ikb, ma, l_i, lm_i
1048          !
1049          DO m_o = 1, 2*l_i +1
1050              oh = ih - m_i + m_o
1051              okb = indv_ijkb0(ia) + oh
1052!               WRITE(*,'(a,5i4,2f10.3)') "okb", okb, oh, ih, m_i, m_o, &
1053!                                               D(l_i)%d(m_o,m_i, isym), &
1054!                                               D(l_i)%d(m_i,m_o, isym)
1055              IF (sgn_sym > 0) THEN
1056                becp(ikb, :) = becp(ikb, :) &
1057                    + D(l_i)%d(m_o,m_i, isym) * tau_fact*becp0(okb, :)
1058              ELSE
1059                becp(ikb, :) = becp(ikb, :) &
1060                    + D(l_i)%d(m_o,m_i, isym) * tau_fact*CONJG(becp0(okb, :))
1061              ENDIF
1062          ENDDO ! m_o
1063          !ENDDO ! isym
1064          !
1065      ENDDO ! ih
1066    ENDDO ! nat
1067!     print*, "STOP", nkb
1068!     stop 1
1069    !
1070    !ENDDO ! ibnd
1071    !
1072    CALL stop_clock( 'becp_rotate' )
1073    !
1074  END SUBROUTINE becp_rotate_k
1075#if defined (__UNUSED_SUBROUTINE_LEAVE_FOR_TESTING)
1076! Note: in order to work, this subroutine must be place in exx.f90,
1077! as it uses a load of global variables from there and we cannot have a cross
1078! dependency between this file an that
1079  !
1080  !-----------------------------------------------------------------------
1081  SUBROUTINE compute_becxx( )
1082    !-----------------------------------------------------------------------
1083    !! It prepares the necessary quantities, then calls \(\texttt{calbec}\) to
1084    !! compute \(\langle\beta_I|\phi_j,k+q\rangle\) and store it in
1085    !! \(\text{becxx(ikq)}\).
1086    !! This must be called AFTER \(\texttt{exxbuff}\) and \(\texttt{xkq_collected}\)
1087    !! are done (i.e. at the end of \(\texttt{exxinit}\)).
1088    !
1089    USE kinds,                ONLY : DP
1090    USE wvfct,                ONLY : npwx, nbnd
1091    USE gvect,                ONLY : g, ngm
1092    USE gvecw,                ONLY : gcutw
1093    USE uspp,                 ONLY : nkb, okvan
1094    USE becmod,               ONLY : calbec
1095    USE fft_interfaces,       ONLY : fwfft
1096    USE control_flags,        ONLY : gamma_only
1097    USE fft_interfaces,       ONLY : fwfft, invfft
1098    USE us_exx,               ONLY : becxx
1099    !
1100    IMPLICIT NONE
1101    !
1102    INTEGER, EXTERNAL :: n_plane_waves
1103    INTEGER :: npwq, npwx_, ibnd, ikq, j, h_ibnd, ibnd_loop_start
1104    INTEGER, ALLOCATABLE :: igkq(:)       !  order of wavefunctions at k+q[+G]
1105    INTEGER, ALLOCATABLE :: ngkq(:)       !  number of plane waves at k+q[+G]
1106    COMPLEX(DP), ALLOCATABLE :: vkbq(:,:) ! |beta_I>
1107    COMPLEX(DP), ALLOCATABLE :: evcq(:,:) ! |psi_j,k> in g-space
1108    COMPLEX(DP), ALLOCATABLE :: phi(:)    ! aux space for fwfft
1109    REAL(DP), ALLOCATABLE :: gk(:)        ! work space
1110    COMPLEX(DP) :: fp, fm
1111    !
1112    IF (.NOT. okvan) RETURN
1113    !
1114    CALL start_clock( 'becxx' )
1115    !
1116    ! ... Find maximum number of plane waves npwq among the entire grid of k and
1117    ! of k+q points - needed if plane waves are distributed (if not, the number
1118    ! of plane waves for each k+q point is the same as for the k-point that is
1119    ! equivalent by symmetry)
1120    !
1121    ALLOCATE( ngkq(nkqs) )
1122    npwq = n_plane_waves( gcutw, nkqs, xkq_collect, g, ngm )
1123    npwq = MAX(npwx, npwq)
1124    !
1125    ! ... Dirty trick to prevent gk_sort from stopping with an error message:
1126    ! set npwx to max value now, reset it to original value later
1127    ! (better solution: gk_sort should check actual array dimension, not npwx)
1128    !
1129    npwx_= npwx
1130    npwx = npwq
1131    !
1132    ALLOCATE( gk(npwq), igkq(npwq) )
1133    ALLOCATE( vkbq(npwq,nkb)  )
1134    ALLOCATE( evcq(npwq,nbnd) )
1135    ALLOCATE( phi(dfftt%nnr)  )
1136    !
1137!     WRITE(100002, *) "---------------------------------"
1138    DO ikq = 1, nkqs
1139      !
1140      ! prepare the g-vectors mapping
1141      CALL gk_sort( xkq_collect(:,ikq), ngm, g, gcutw, ngkq(ikq), igkq, gk )
1142      ! prepare the |beta> function at k+q
1143      CALL init_us_2( ngkq(ikq), igkq, xkq_collect(:,ikq), vkbq )
1144      !
1145      ! take rotated phi to G space
1146      IF (gamma_only) THEN
1147         !
1148         h_ibnd = ibnd_start / 2
1149         !
1150         IF (MOD(ibnd_start,2)==0) THEN
1151            h_ibnd = h_ibnd - 1
1152            ibnd_loop_start = ibnd_start - 1
1153         ELSE
1154            ibnd_loop_start = ibnd_start
1155         ENDIF
1156         !
1157         DO ibnd = ibnd_loop_start, ibnd_end, 2
1158            h_ibnd = h_ibnd + 1
1159            phi(:) = exxbuff(:,h_ibnd,ikq)
1160            CALL fwfft( 'Wave', phi, dfftt )
1161            IF (ibnd < ibnd_end) THEN
1162               ! two ffts at the same time
1163               DO j = 1, ngkq(ikq)
1164                  fp = (phi(dfftt%nl(j)) + phi(dfftt%nlm(j)))*0.5d0
1165                  fm = (phi(dfftt%nl(j)) - phi(dfftt%nlm(j)))*0.5d0
1166                  evcq(j,ibnd)   = CMPLX( DBLE(fp), AIMAG(fm),KIND=DP)
1167                  evcq(j,ibnd+1) = CMPLX(AIMAG(fp),- DBLE(fm),KIND=DP)
1168               ENDDO
1169            ELSE
1170               DO j = 1, ngkq(ikq)
1171                  evcq(j,ibnd) = phi(dfftt%nl(j))
1172               ENDDO
1173            ENDIF
1174         ENDDO
1175      ELSE
1176         DO ibnd = ibnd_start,ibnd_end
1177            phi(:) = exxbuff(:,ibnd,ikq)
1178            CALL fwfft( 'Wave', phi, dfftt )
1179            DO j = 1, ngkq(ikq)
1180               evcq(j,ibnd) = phi(dfftt%nl(igkq(j)))
1181            ENDDO
1182         ENDDO
1183      ENDIF
1184      !
1185      ! compute <beta_I|psi_j> at this k+q point, for all bands
1186      ! and all projectors
1187      !
1188      CALL calbec( ngkq(ikq), vkbq, evcq, becxx(ikq), nbnd )
1189      !
1190!       WRITE(100002, '(a,i6)') "ikq", ikq
1191!       DO ibnd = 1, 1
1192!       !DO ik = 1, nkb
1193!       WRITE(100002, '(a,i6)') "bnd", ibnd
1194!       WRITE(100002, '(a,99(2f12.5,3x))') "after ", becxx(ikq)%k(:, ibnd)
1195!       !ENDDO
1196!       ENDDO
1197    ENDDO
1198    !
1199    DEALLOCATE( phi, evcq, vkbq, igkq, gk, ngkq )
1200    ! suite of the dirty trick: reset npwx to its original value
1201    npwx = npwx_
1202    !
1203    CALL stop_clock( 'becxx' )
1204    !
1205  END SUBROUTINE compute_becxx
1206  !
1207#endif
1208!
1209END MODULE us_exx
1210