1!
2! Copyright (C) 2011-2014 Quantum ESPRESSO group
3! This file is distributed under the terms of the
4! GNU General Public License. See the file `License'
5! in the root directory of the present distribution,
6! or http://www.gnu.org/copyleft/gpl.txt .
7!
8!-------------------------------------------------------------------------
9      SUBROUTINE s_wfc(nwfc,becwfc,betae,wfc,swfc)
10!-----------------------------------------------------------------------
11!
12!     input: wfc, becwfc=<wfc|beta>, betae=|beta>
13!     output: swfc=S|wfc>
14!
15      USE kinds, ONLY: DP
16      USE ions_base, ONLY: nat, ityp
17      USE uspp, ONLY: nkb, nkbus, qq_nt, indv_ijkb0
18      USE uspp_param, ONLY: nh, upf
19      USE gvecw, ONLY: ngw
20      IMPLICIT NONE
21! input
22      INTEGER, INTENT(in)     :: nwfc
23      COMPLEX(DP), INTENT(in) :: betae(ngw,nkb),                   &
24     &                             wfc(ngw,nwfc)
25      REAL(DP), INTENT(in)    :: becwfc(nkb,nwfc)
26! output
27      COMPLEX(DP), INTENT(out):: swfc(ngw,nwfc)
28! local
29      INTEGER :: is, iv, jv, ia, inl, jnl, i
30      REAL(DP) :: qtemp(nkb,nwfc)
31!
32      swfc = wfc
33!
34      IF (nkbus > 0) THEN
35         qtemp=0.d0
36         DO ia=1,nat
37            is=ityp(ia)
38            IF( upf(is)%tvanp ) THEN
39               DO iv=1,nh(is)
40                  DO jv=1,nh(is)
41                     IF(ABS(qq_nt(iv,jv,is)).GT.1.e-5) THEN
42                        inl = indv_ijkb0(ia) + iv
43                        jnl = indv_ijkb0(ia) + jv
44                        DO i=1,nwfc
45                           qtemp(inl,i) = qtemp(inl,i) + qq_nt(iv,jv,is)*becwfc(jnl,i)
46                        END DO
47                     ENDIF
48                  END DO
49               END DO
50            END IF
51         END DO
52!
53         CALL dgemm &
54              ('N','N',2*ngw,nwfc,nkb,1.0d0,betae,2*ngw,&
55               qtemp,nkb,1.0d0,swfc,2*ngw)
56!
57      END IF
58!
59      RETURN
60      END SUBROUTINE s_wfc
61!-----------------------------------------------------------------------
62      subroutine ldaU_init
63!-----------------------------------------------------------------------
64!
65      use ldaU_cp,          ONLY: nwfcU, lda_plus_u, Hubbard_U
66      use ldaU_cp,          ONLY: Hubbard_lmax, Hubbard_l, ldmx, ns, vupsi
67      use ions_base,        only: nsp, atm, nat
68      use gvecw,            only: ngw
69      use electrons_base,   only: nspin, nx => nbspx
70      USE uspp_param,       ONLY: upf
71      !
72      implicit none
73      integer is, nb, l
74      integer, external :: set_hubbard_l
75
76      IF ( .NOT.lda_plus_u ) RETURN
77      ! FIXME: wasteful allocation, should be removed
78      allocate(vupsi(ngw,nx))
79      vupsi=(0.0d0,0.0d0)
80
81      Hubbard_lmax = -1
82      do is=1,nsp
83         if (Hubbard_U(is).ne.0.d0) then
84            Hubbard_l(is) = set_hubbard_l( upf(is)%psd )
85            Hubbard_lmax = max(Hubbard_lmax,Hubbard_l(is))
86            write (6,*) ' HUBBARD L FOR TYPE ',atm(is),' IS ', Hubbard_l(is)
87         else
88            Hubbard_l(is) = -1
89         end if
90      end do
91      write (6,*) ' MAXIMUM HUBBARD L IS ', Hubbard_lmax
92      if (Hubbard_lmax.eq.-1) call errore                            &
93     &        ('setup','lda_plus_u calculation but Hubbard_l not set',1)
94      !
95      ldmx = 2 * Hubbard_lmax + 1
96      allocate(ns(ldmx,ldmx,nspin,nat))
97      !
98      return
99      end subroutine ldaU_init
100!
101!-----------------------------------------------------------------------
102      subroutine new_ns( c, eigr, betae, hpsi, forceh )
103!-----------------------------------------------------------------------
104!
105! This routine computes the on site occupation numbers of the Hubbard ions.
106! It also calculates the contribution of the Hubbard Hamiltonian to the
107! electronic potential and to the forces acting on ions.
108!
109      use kinds,              ONLY: DP
110      use control_flags,      ONLY: tfor, tprnfor
111      use ions_base,          only: nat, nsp, ityp
112      use gvecw,              only: ngw
113      use gvect,              only: gstart
114      USE uspp,               ONLY: nkb
115      USE uspp_param,         ONLY: upf
116      use electrons_base,     only: nspin, n => nbsp, nx => nbspx, ispin, f
117      USE ldaU_cp,            ONLY: Hubbard_U, Hubbard_l, ldmx
118      USE ldaU_cp,            ONLY: nwfcU, ns, e_hubbard
119      USE step_penalty,       ONLY: penalty_e, penalty_f
120      USE mp_pools,           ONLY: intra_pool_comm, me_pool, nproc_pool
121      USE mp_bands,           only: nbgrp
122      USE cp_interfaces,      only: nlsm1, nlsm2_bgrp
123!
124      implicit none
125      complex(DP), intent(in) :: c(ngw,nx), eigr(ngw,nat), betae(ngw,nkb)
126      complex(DP), intent(out) :: hpsi(ngw,nx)
127      real(DP), INTENT(OUT) :: forceh(3,nat)
128!
129      complex(DP), allocatable:: wfcU(:,:), swfc(:,:), spsi(:,:)
130      real(DP), allocatable   :: becwfc(:,:), bp(:,:), dbp(:,:,:), wdb(:,:,:)
131      real(DP), allocatable   :: dns(:,:,:,:), proj(:,:), tempsi(:,:)
132      integer is, ia, nb, isp, l, m, m1, m2, k, i, ldim, ig
133      integer iv, jv, inl, jnl,alpha_a,alpha_s,ipol
134      integer, allocatable ::  offset (:)
135      INTEGER :: nb_s, nb_e, mykey
136!
137      if( nbgrp > 1 ) call errore(' new_ns ', &
138         ' parallelization over bands not yet implemented ', 1 )
139      call start_clock('new_ns')
140!
141      allocate(offset(nat))
142      offset(:) = -1
143      ! offset = -1 means "not a Hubbard wfc"
144      nwfcU = 0
145      do ia = 1, nat
146         is = ityp(ia)
147         do i = 1, upf(is)%nwfc
148            l = upf(is)%lchi(i)
149            if (l == Hubbard_l(is)) then
150               offset (ia) = nwfcU
151               nwfcU = nwfcU + 2 * l + 1
152            end if
153         end do
154      end do
155      !
156      allocate(wfcU(ngw,nwfcU))
157      allocate(becwfc(nkb,nwfcU))
158      allocate(swfc(ngw,nwfcU))
159      allocate(proj(nwfcU,n))
160      !
161      ! calculate proj = <wfcU|S|c>
162      !
163      CALL projwfc_hub( c, nx, eigr, betae, n, nwfcU, &
164     &                  offset, Hubbard_l, wfcU, becwfc, swfc, proj )
165      !
166      ns(:,:,:,:) = 0.d0
167      do ia = 1,nat
168         is = ityp(ia)
169         if (Hubbard_U(is).ne.0.d0) then
170            k = offset(ia)
171            do m1 = 1, 2*Hubbard_l(is) + 1
172               do m2 = m1, 2*Hubbard_l(is) + 1
173                  do i = 1,n
174                   ns(m1,m2,ispin(i),ia) = ns(m1,m2,ispin(i),ia) + &
175     &                            f(i) * proj(k+m2,i) * proj(k+m1,i)
176                  end do
177               end do
178               do m2 = m1+1, 2*Hubbard_l(is) + 1
179                  ns(m2,m1,:,ia) = ns(m1,m2,:,ia)
180               end do
181            end do
182         end if
183      end do
184      if (nspin.eq.1) ns = 0.5d0 * ns
185! Contributions to total energy
186      e_hubbard = 0.d0
187      do ia = 1,nat
188         is = ityp(ia)
189         if (Hubbard_U(is).ne.0.d0) then
190             do isp = 1,nspin
191                do m1 = 1, 2*Hubbard_l(is) + 1
192                  e_hubbard = e_hubbard + 0.5d0 * Hubbard_U(is) *    &
193     &                        ns(m1,m1,isp,ia)
194                  do m2 = 1, 2*Hubbard_l(is) + 1
195                     e_hubbard = e_hubbard - 0.5d0 * Hubbard_U(is) * &
196     &                           ns(m1,m2,isp,ia) * ns(m2,m1,isp,ia)
197                  end do
198               end do
199            end do
200         end if
201      end do
202      if (nspin.eq.1) e_hubbard = 2.d0*e_hubbard
203!
204!      Calculate the potential and forces on wavefunctions due to U
205!
206      hpsi(:,:)=(0.d0,0.d0)
207      ALLOCATE ( tempsi(ldmx,n) )
208      tempsi(:,:)=(0.d0,0.d0)
209      do ia = 1,nat
210         is = ityp(ia)
211         if (Hubbard_U(is).ne.0.d0) then
212            ldim = 2*Hubbard_l(is) + 1
213            do i=1, n
214               do m1 = 1, ldim
215                  tempsi(m1,i) = proj (offset(ia)+m1,i)
216                  do m2 = 1, ldim
217                     tempsi(m1,i) = tempsi(m1,i) - &
218                                    2.0_dp*ns(m1,m2,ispin(i),ia) * &
219                                            proj (offset(ia)+m2,i)
220                  enddo
221                  tempsi(m1,i) = tempsi(m1,i) * Hubbard_U(is)/2.d0*f(i)
222               enddo
223            enddo
224            !
225            CALL dgemm ( 'N','N', 2*ngw, n, ldim, 1.0_dp, &
226                          swfc(1,offset(ia)+1), 2*ngw, tempsi, &
227                          ldmx, 1.0_dp, hpsi, 2*ngw )
228         endif
229      enddo
230      DEALLOCATE ( tempsi )
231!
232!      Calculate the potential and energy due to constraint
233!
234      CALL  penalty_e ( offset, swfc, proj, e_hubbard, hpsi )
235!
236! Calculate the contribution to forces on ions due to U and constraint
237!
238      forceh=0.d0
239
240      if ( tfor .or. tprnfor ) then
241        call start_clock('new_ns:forc')
242        allocate (bp(nkb,n), dbp(nkb,nx,3), wdb(nkb,nwfcU,3))
243        allocate(dns(ldmx,ldmx,nspin,nat))
244        allocate (spsi(ngw,n))
245!
246        call nlsm1 ( n, 1, nsp, eigr, c, bp )
247        call s_wfc ( n, bp, betae, c, spsi )
248        call nlsm2_bgrp( ngw, nkb, eigr, c, dbp, nx, n )
249        call nlsm2_bgrp( ngw, nkb, eigr, wfcU, wdb, nwfcU, nwfcU )
250        !
251        ! poor-man parallelization over bands
252        ! - if nproc_pool=1   : nb_s=1, nb_e=n, mykey=0
253        ! - if nproc_pool<=nbnd:each processor calculates band nb_s to nb_e; mykey=0
254        ! - if nproc_pool>nbnd :each processor takes care of band nb_s=nb_e;
255        !   mykey labels how many times each band appears (mykey=0 first time etc.)
256        !
257        CALL block_distribute( n, me_pool, nproc_pool, nb_s, nb_e, mykey )
258        !
259        do alpha_a = 1, nat
260            alpha_s = ityp(alpha_a)
261            do ipol = 1,3
262               call dndtau(alpha_a,alpha_s,becwfc,spsi,bp,dbp,wdb,          &
263                           offset,wfcU,eigr,proj,ipol,nb_s,nb_e,mykey,&
264                           dns)
265               do ia=1, nat
266                  is = ityp(ia)
267                  if (Hubbard_U(is).ne.0.d0) then
268                     do isp = 1,nspin
269                        do m2 = 1,2*Hubbard_l(is) + 1
270                           forceh(ipol,alpha_a) = forceh(ipol,alpha_a) -   &
271     &                     Hubbard_U(is) * 0.5d0 * dns(m2,m2,isp,ia)
272                           do m1 = 1,2*Hubbard_l(is) + 1
273                              forceh(ipol,alpha_a) = forceh(ipol,alpha_a) + &
274     &                        Hubbard_U(is)*ns(m2,m1,isp,ia)*       &
275     &                        dns(m1,m2,isp,ia)
276                           end do
277                        end do
278                     end do
279                  end if
280! Occupation constraint added here to forceh(ipol,alpha)
281                  CALL penalty_f ( is, ia, dns, forceh(ipol,alpha_a) )
282               end do
283            end do
284        end do
285        !
286        ! I am not sure why the following instruction (present in PW)
287        ! seems to yield a wrong factor here ... PG
288        !if (nspin.eq.1) then
289        !   forceh = 2.d0 * forceh
290        !end if
291        !
292        deallocate ( spsi, dns, bp, dbp, wdb)
293        call stop_clock('new_ns:forc')
294      end if
295      !
296      deallocate ( wfcU, becwfc, proj, offset, swfc)
297      !
298      call stop_clock('new_ns')
299      !
300      return
301      end subroutine new_ns
302!
303!-----------------------------------------------------------------------
304      subroutine write_ns
305!-----------------------------------------------------------------------
306!
307! This routine computes the occupation numbers on atomic orbitals.
308! It also write the occupation number in the output file.
309!
310      USE kinds,            only: DP
311      USE constants,        ONLY: autoev
312      use electrons_base,   only: nspin
313      use electrons_base,   only: n => nbsp
314      use ions_base,        only: nat, nsp, ityp
315      use gvecw,            only: ngw
316      USE ldaU_cp,          ONLY: Hubbard_U, Hubbard_l, ldmx
317      USE ldaU_cp,          ONLY: nwfcU, ns, e_hubbard
318      USE step_penalty,     ONLY: write_pen
319
320      implicit none
321
322      include 'laxlib.fh'
323
324  integer :: is, isp, ia, m1, m2, err, k
325  real(DP), allocatable   :: ftemp1(:), ftemp2(:), f1 (:), vet (:,:)
326
327  real(DP) :: lambda (ldmx), nsum, nsuma
328
329  CALL write_pen (nsp, nspin)
330
331  write (6,'(6(a,i2,a,f8.4,6x))') &
332        ('U(',is,') =', Hubbard_U(is) * autoev, is=1,nsp)
333      nsum = 0.d0
334      allocate( ftemp1(ldmx), ftemp2(ldmx), f1(ldmx*ldmx), vet(ldmx,ldmx) )
335      write(6,*) 'nsp',nsp
336      do ia = 1, nat
337         is=ityp(ia)
338         nsuma = 0.d0
339         if (Hubbard_U(is).ne.0.d0) then
340            do isp = 1, nspin
341                do m1 = 1, 2 * Hubbard_l(is) + 1
342                   nsuma = nsuma + ns (m1,m1,isp,ia)
343                end do
344            end do
345            if (nspin.eq.1) nsuma = 2.d0 * nsuma
346            write(6,'(a,1x,i2,2x,a,f11.7)') 'atom', ia,              &
347     &                                      ' Tr[ns(na)]= ',nsuma
348            nsum = nsum + nsuma
349!
350            do isp = 1, nspin
351
352               k = 0
353               do m1 = 1, 2 * Hubbard_l(is) + 1
354                  do m2 = m1, 2 * Hubbard_l(is) + 1
355                     k = k + 1
356                     f1 ( k ) = ns (m2,m1,isp,ia)
357                  enddo
358               enddo
359
360               CALL dspev_drv( 'V', 'L', 2 * Hubbard_l(is) + 1, &
361                               f1, lambda, vet, ldmx  )
362
363               write(6,'(a,1x,i2,2x,a,1x,i2)') 'atom', ia, 'spin', isp
364               write(6,'(a,7f10.7)') 'eigenvalues: ',(lambda(m1),m1=1,&
365     &                                2 * Hubbard_l(is) + 1)
366               write(6,*) 'eigenvectors'
367               do m2 = 1, 2*Hubbard_l(is)+1
368                  write(6,'(i2,2x,7(f10.7,1x))') m2,(real(vet(m1,m2)),&
369     &                            m1=1,2 * Hubbard_l(is) + 1)
370               end do
371               write(6,*) 'occupations'
372               do m1 = 1, 2*Hubbard_l(is)+1
373                  write (6,'(7(f6.3,1x))') (ns(m1,m2,isp,ia),m2=1,    &
374     &                     2*Hubbard_l(is)+1)
375               end do
376            end do
377         end if
378      end do
379      deallocate ( ftemp1, ftemp2,f1, vet )
380      return
381      end subroutine write_ns
382!
383!-------------------------------------------------------------------------
384      subroutine dndtau(alpha_a,alpha_s,becwfc,spsi,bp,dbp,wdb,         &
385                        offset,wfcU,eigr,proj,ipol,nb_s,nb_e,mykey,dns)
386!-----------------------------------------------------------------------
387!
388! This routine computes the derivative of the ns with respect to the ionic
389! displacement tau(alpha,ipol) used to obtain the Hubbard contribution to the
390! atomic forces.
391!
392      use ions_base, only: nat, nsp, ityp
393      use gvecw, only: ngw
394      use electrons_base, only: nspin, n => nbsp, nx => nbspx, ispin, f
395      USE uspp,           ONLY: nkb
396      USE ldaU_cp,        ONLY: Hubbard_U, Hubbard_l, ldmx
397      USE ldaU_cp,        ONLY: nwfcU, ns
398      USE kinds,          ONLY: DP
399      USE mp,             ONLY: mp_sum
400      USE mp_pools,       ONLY: intra_pool_comm
401!
402      implicit none
403! input
404      integer,      intent(in) :: offset(nat)
405      integer,      intent(in) :: alpha_a,alpha_s,ipol
406      INTEGER,      INTENT(in) :: nb_s, nb_e, mykey
407      COMPLEX(dp),  INTENT(in) :: wfcU(ngw,nwfcU), eigr(ngw,nat)
408      REAL(dp),     INTENT(IN) :: becwfc(nkb,nwfcU), bp(nkb,n), &
409                                  dbp(nkb,nx,3), wdb(nkb,nwfcU,3)
410      real(DP),     intent(in) :: proj(nwfcU,n)
411      complex (DP), intent(in) :: spsi(ngw,n)
412! output
413      real (DP),   intent(out) :: dns(ldmx,ldmx,nspin,nat)
414!     dns   derivative of ns(:,:,:,:) w.r.t. tau
415!
416      integer ibnd,is,i,ia, m1,m2, l, ldim
417      real (DP),   allocatable :: dproj(:,:)
418!     dproj(nwfcU,n)   derivative of proj(:,:) w.r.t. tau
419!
420      CALL start_clock('dndtau')
421      !
422      allocate (dproj(nwfcU,nb_s:nb_e) )
423      call dprojdtau(wfcU,becwfc,spsi,bp,dbp,wdb,eigr,alpha_a,     &
424                     alpha_s,ipol,offset(alpha_a),nb_s,nb_e,mykey, &
425                     dproj)
426      !
427      ! compute the derivative of occupation numbers (the quantities dn(m1,m2))
428      ! of the atomic orbitals. They are real quantities as well as n(m1,m2)
429      !
430      dns(:,:,:,:) = 0.d0
431      !
432      ! band parallelization. If each band appears more than once
433      ! compute its contribution only once (i.e. when mykey=0)
434      !
435      IF ( mykey /= 0 ) GO TO 10
436      do ia = 1,nat
437         is = ityp(ia)
438         if (Hubbard_U(is).ne.0.d0) then
439            ldim = 2*Hubbard_l(is) + 1
440            do m1 = 1, ldim
441               do m2 = m1, ldim
442                  do ibnd = nb_s,nb_e
443                     dns(m1,m2,ispin(ibnd),ia) =                    &
444     &               dns(m1,m2,ispin(ibnd),ia) +                    &
445     &                f(ibnd)*REAL(  proj(offset(ia)+m1,ibnd) *   &
446     &                             (dproj(offset(ia)+m2,ibnd))+   &
447     &                              dproj(offset(ia)+m1,ibnd) *   &
448     &                              (proj(offset(ia)+m2,ibnd)) )
449                  end do
450                  dns(m2,m1,:,ia) = dns(m1,m2,:,ia)
451               end do
452            end do
453         end if
454      end do
455!
456 10   deallocate (dproj)
457      CALL mp_sum(dns, intra_pool_comm)
458      CALL stop_clock('dndtau')
459      return
460      end subroutine dndtau
461!
462!-----------------------------------------------------------------------
463      subroutine dprojdtau(wfcU,becwfc,spsi,bp,dbp,wdb,eigr,alpha_a,    &
464                           alpha_s,ipol,offset,nb_s,nb_e,mykey,dproj)
465!-----------------------------------------------------------------------
466!
467! This routine computes the first derivative of the projection
468! <\fi^{at}_{I,m1}|S|\psi_{k,v,s}> with respect to the atomic displacement
469! u(alpha,ipol) (we remember that ns_{m1,m2,s,I} = \sum_{k,v}
470! f_{kv} <\fi^{at}_{I,m1}|S|\psi_{k,v,s}><\psi_{k,v,s}|S|\fi^{at}_{I,m2}>)
471!
472      use ions_base, only: nat
473      use gvecw, only: ngw
474      use gvect, only: g, gstart
475      use electrons_base, only: n => nbsp, nx => nbspx
476      USE uspp,           ONLY: nkb, qq_nt, indv_ijkb0
477      USE ldaU_cp,        ONLY: Hubbard_U, Hubbard_l
478      USE ldaU_cp,        ONLY: nwfcU
479      use cell_base,      ONLY: tpiba
480      USE uspp_param,     only: nh
481      use mp_global,      only: intra_bgrp_comm
482      use mp,             only: mp_sum
483      USE kinds,          ONLY: DP
484!
485       implicit none
486       integer, INTENT(in) :: alpha_a, alpha_s,ipol, offset
487! input: the displaced atom
488! input: the component of displacement
489! input: the offset of the wfcs of the atom "alpha_a,alpha_s"
490       INTEGER, INTENT(in) :: nb_s, nb_e, mykey
491       complex (DP), intent(in) :: spsi(ngw,n),                     &
492     &                   eigr(ngw,nat)
493! input: S|evc>, structure factors
494       real(DP), intent(in) ::becwfc(nkb,nwfcU), &
495     &            bp(nkb,n), dbp(nkb,nx,3), wdb(nkb,nwfcU,3)
496       COMPLEX(dp), INTENT(IN) :: wfcU(ngw,nwfcU)
497       real(DP), intent(out) :: dproj(nwfcU,nb_s:nb_e)
498! output: the derivative of the projection
499!
500      integer i,ig,m1,ibnd,iwf,ia,is,iv,jv,ldim,alpha,l,m,k,inl
501!
502      real(dp), allocatable :: dproj0(:,:)
503      real(dp) :: gvec
504      complex (DP), allocatable :: dwfc(:,:)
505      real (DP), allocatable :: betapsi(:,:), dbetapsi(:,:), &
506     &                          wfcbeta(:,:),wfcdbeta(:,:), auxwfc(:,:)
507!      dwfc(ngw,ldmx),      ! the derivative of the atomic Hubbard wfc
508!      betapsi(nh,n),       ! <beta|evc>
509!      dbetapsi(nh,n),      ! <dbeta|evc>
510!      wfcbeta(nwfcU,nh),   ! <wfc|beta>
511!      wfcdbeta(nwfcU,nh),  ! <wfc|dbeta>
512
513      ldim = 2 * Hubbard_l(alpha_s) + 1
514      dproj(:,:)=0.d0
515!
516! At first the derivative of the atomic wfc is computed
517!
518      if (Hubbard_U(alpha_s).ne.0.d0) then
519         !
520         allocate ( dwfc(ngw,ldim), dproj0(ldim,n) )
521         !
522         do ig=1,ngw
523            gvec = g(ipol,ig)*tpiba
524            do m1=1,ldim
525               dwfc(ig,m1) = CMPLX (gvec*AIMAG(wfcU(ig,offset+m1)),      &
526     &                             -gvec* DBLE(wfcU(ig,offset+m1)), kind=dp )
527            end do
528         end do
529         !
530         ! no need to calculate the G=0 term: it is zero
531         !
532         CALL dgemm( 'C', 'N', ldim, n, 2*ngw, 2.0_DP, dwfc, 2*ngw, spsi, &
533                    2*ngw, 0.0_DP, dproj0, ldim )
534         call mp_sum( dproj0, intra_bgrp_comm )
535         !
536         ! copy to dproj results for the bands treated by this processor
537         !
538         dproj(offset+1:offset+ldim,:) = dproj0(:,nb_s:nb_e)
539         deallocate (dproj0, dwfc)
540         !
541      end if
542      !
543      IF( nh(alpha_s) > 0 ) THEN
544         !
545         allocate (  wfcbeta(nwfcU,nh(alpha_s)) )
546         allocate ( wfcdbeta(nwfcU,nh(alpha_s)) )
547         allocate (   auxwfc(nwfcU,nh(alpha_s)) )
548         !
549         do iv=1,nh(alpha_s)
550            inl=indv_ijkb0(alpha_a) + iv
551            do m=1,nwfcU
552               auxwfc(m,iv) = becwfc(inl,m)
553            end do
554         end do
555         ! following dgemm performs (note that qq is symmetric)
556         ! wfcbeta(m,iv) = sum_jv qq(iv,jv,alpha_s)*auxwfc(m,jv)
557         CALL dgemm( 'N', 'N', nwfcU, nh(alpha_s), nh(alpha_s), 1.0_DP, &
558                  auxwfc, nwfcU, qq_nt(1,1,alpha_s), nh(alpha_s), &
559                  0.0_DP, wfcbeta, nwfcU )
560         do iv=1,nh(alpha_s)
561            inl=indv_ijkb0(alpha_a) + iv
562            do m=1,nwfcU
563               auxwfc(m,iv) = wdb(inl,m,ipol)
564            end do
565         end do
566         ! as above with wfcbeta(m,iv) => wfcdbeta
567         CALL dgemm( 'N', 'N', nwfcU, nh(alpha_s), nh(alpha_s), 1.0_DP, &
568                  auxwfc, nwfcU, qq_nt(1,1,alpha_s), nh(alpha_s), &
569                  0.0_DP, wfcdbeta, nwfcU )
570         deallocate(auxwfc)
571         !
572         IF ( mykey == 0 ) THEN
573            allocate (  betapsi(nh(alpha_s),nb_s:nb_e) )
574            allocate ( dbetapsi(nh(alpha_s),nb_s:nb_e) )
575            do iv=1,nh(alpha_s)
576               inl=indv_ijkb0(alpha_a) + iv
577               do i=nb_s,nb_e
578                  betapsi (iv,i)=bp(inl,i)
579                  dbetapsi(iv,i)=dbp(inl,i,ipol)
580               end do
581            end do
582            !
583            ! dproj(m,i) = \sum_iv wfcdbeta(m,iv)*betapsi (iv,i) +
584            !                      wfcbeta (m,iv)*dbetapsi(iv,i)
585            !
586            CALL dgemm( 'N', 'N', nwfcU, nb_e-nb_s+1, nh(alpha_s), 1.0_DP, &
587                  wfcdbeta, nwfcU, betapsi(1,nb_s), nh(alpha_s), &
588                  1.0_DP, dproj(1,nb_s), nwfcU )
589            CALL dgemm( 'N', 'N', nwfcU, nb_e-nb_s+1, nh(alpha_s), 1.0_DP, &
590                  wfcbeta, nwfcU, dbetapsi(1,nb_s), nh(alpha_s), &
591                  1.0_DP, dproj(1,nb_s), nwfcU )
592            !
593            deallocate (dbetapsi)
594            deallocate (betapsi)
595            !
596         end if
597         ! end band parallelization - only dproj(1,nb_s:nb_e) are calculated
598         !
599         deallocate (wfcbeta)
600         deallocate (wfcdbeta)
601
602      END IF
603
604      return
605      end subroutine dprojdtau
606!
607!-----------------------------------------------------------------------
608      SUBROUTINE projwfc_hub( c, nx, eigr, betae, n, nwfcU,  &
609     &                        offset, Hubbard_l, wfcU, becwfc, swfc, proj )
610!-----------------------------------------------------------------------
611      !
612      ! Projection on atomic wavefunctions
613      ! Atomic wavefunctions are not orthogonalized
614      !
615      USE kinds,              ONLY: DP
616      USE io_global,          ONLY: stdout
617      USE mp_global,          ONLY: intra_bgrp_comm
618      USE mp,                 ONLY: mp_sum
619      USE gvecw,              ONLY: ngw
620      USE gvect,              ONLY: gstart
621      USE ions_base,          ONLY: nsp, nat
622      USE uspp,               ONLY: nkb
623      USE cp_interfaces,      only: nlsm1
624!
625      IMPLICIT NONE
626      INTEGER,     INTENT(IN) :: nx, n, nwfcU, offset(nat), &
627                                 Hubbard_l(nsp)
628      COMPLEX(DP), INTENT(IN) :: c( ngw, nx ), eigr(ngw,nat), betae(ngw,nkb)
629!
630      COMPLEX(DP), INTENT(OUT):: wfcU(ngw, nwfcU),    &
631     &                           swfc(ngw, nwfcU)
632      real(DP), intent(out):: becwfc(nkb,nwfcU), proj(nwfcU,n)
633      !
634      IF ( nwfcU .EQ. 0 ) RETURN
635      !
636      CALL start_clock('projwfc_hub')
637      !
638      ! calculate wfcU = atomic states with associated Hubbard U
639      !
640      CALL atomic_wfc_hub( offset, Hubbard_l, eigr, nwfcU, wfcU )
641      !
642      ! calculate bec = <beta|wfc>
643      !
644      CALL nlsm1( nwfcU, 1, nsp, eigr, wfcU, becwfc )
645      !
646      ! calculate swfc = S|wfc>
647      !
648      CALL s_wfc( nwfcU, becwfc, betae, wfcU, swfc )
649      !
650      ! calculate proj = <wfcU|S|c>
651      !
652      CALL dgemm( 'C', 'N', nwfcU, n, 2*ngw, 2.0_DP, swfc, 2*ngw, c, &
653                    2*ngw, 0.0_DP, proj, nwfcU )
654      IF ( gstart == 2 ) &
655         CALL dger( nwfcU, n,  -1.0_DP, swfc, 2*ngw, c, 2*ngw, proj, nwfcU )
656      CALL mp_sum( proj, intra_bgrp_comm )
657!
658      CALL stop_clock('projwfc_hub')
659!
660      RETURN
661      END SUBROUTINE projwfc_hub
662!
663!-----------------------------------------------------------------------
664      SUBROUTINE atomic_wfc_hub( offset, Hubbard_l, eigr, nwfcU, wfcU )
665!-----------------------------------------------------------------------
666!
667! Compute atomic wavefunctions (not orthogonalized) in G-space
668!
669      USE kinds,              ONLY: DP
670      USE gvecw,              ONLY: ngw
671      USE gvect,              ONLY: gstart, gg, g
672      USE ions_base,          ONLY: nsp, nat, ityp
673      USE cell_base,          ONLY: tpiba, omega
674      USE atom,               ONLY: rgrid
675      USE uspp_param,         ONLY: upf
676      USE constants,          ONLY: fpi
677!
678      IMPLICIT NONE
679      INTEGER,     INTENT(in) :: nwfcU, offset(nat), &
680                                 Hubbard_l(nsp)
681      COMPLEX(DP), INTENT(in) :: eigr( ngw, nat )
682      COMPLEX(DP), INTENT(out):: wfcU( ngw, nwfcU )
683!
684      INTEGER :: natwfc, ndm, is, ia, ir, nb, l, m, lm, i, lmax_wfc
685      REAL(DP), ALLOCATABLE ::  ylm(:,:), q(:), jl(:), vchi(:), chiq(:)
686
687      IF( .NOT. ALLOCATED( rgrid ) ) &
688         CALL errore( ' atomic_wfc_hub ', ' rgrid not allocated ', 1 )
689!
690! calculate max angular momentum required in wavefunctions
691!
692      lmax_wfc=-1
693      DO is = 1,nsp
694         lmax_wfc = MAX ( lmax_wfc, MAXVAL (upf(is)%lchi(1:upf(is)%nwfc) ) )
695      ENDDO
696      !
697      ALLOCATE(ylm(ngw,(lmax_wfc+1)**2))
698      !
699      CALL ylmr2 ((lmax_wfc+1)**2, ngw, g, gg, ylm)
700      ndm = MAXVAL(rgrid(1:nsp)%mesh)
701      !
702      ALLOCATE(jl(ndm), vchi(ndm))
703      ALLOCATE(q(ngw), chiq(ngw))
704!
705      DO i=1,ngw
706         q(i) = SQRT(gg(i))*tpiba
707      END DO
708      !
709      DO is=1,nsp
710         !
711         !   radial fourier transform of the chi functions. NOTA BENE:
712         !   chi is r times the radial part of the atomic wavefunction
713         !
714         natwfc=0
715         DO nb = 1,upf(is)%nwfc
716            l = upf(is)%lchi(nb)
717            IF ( l /= Hubbard_l(is) ) GO TO 10
718            DO i=1,ngw
719               CALL sph_bes (rgrid(is)%mesh, rgrid(is)%r, q(i), l, jl)
720               DO ir=1,rgrid(is)%mesh
721                  vchi(ir) = upf(is)%chi(ir,nb)*rgrid(is)%r(ir)*jl(ir)
722               ENDDO
723               CALL simpson_cp90(rgrid(is)%mesh,vchi,rgrid(is)%rab,chiq(i))
724            ENDDO
725            !
726            !   multiply by angular part and structure factor
727            !   NOTA BENE: the factor i^l MUST be present!!!
728            !
729            DO m = 1,2*l+1
730               lm = l**2 + m
731               natwfc = natwfc + 1
732               DO ia = 1, nat
733                  IF( ityp(ia) == is ) THEN
734                     wfcU(:,natwfc+offset(ia)) = (0.d0,1.d0)**l * eigr(:,ia) * ylm(:,lm)*chiq(:)
735                  END IF
736               ENDDO
737            ENDDO
738  10        CONTINUE
739         ENDDO
740      ENDDO
741!
742      IF (natwfc+offset(nat) .NE. nwfcU )  CALL errore('atomic_wfc','unexpected error',natwfc)
743!
744      do i = 1,nwfcU
745        call dscal(2*ngw,fpi/sqrt(omega),wfcU(1,i),1)
746      end do
747      DEALLOCATE(q, chiq, vchi, jl, ylm)
748!
749      RETURN
750      END SUBROUTINE atomic_wfc_hub
751