1!
2! Copyright (C) 2007 Quantum ESPRESSO group
3! This file is distributed under the terms of the
4! GNU General Public License. See the file `License'
5! in the root directory of the present distribution,
6! or http://www.gnu.org/copyleft/gpl.txt .
7!
8MODULE paw_init
9  !
10  USE kinds, ONLY : DP
11  !
12  IMPLICIT NONE
13
14  PUBLIC :: PAW_atomic_becsum
15  PUBLIC :: PAW_init_onecenter
16  !PUBLIC :: PAW_increase_lm ! <-- unused
17#if defined(__MPI)
18  PUBLIC :: PAW_post_init
19#endif
20  !
21  PUBLIC :: allocate_paw_internals, deallocate_paw_internals
22  !
23  LOGICAL, PARAMETER :: TIMING = .FALSE.
24  !
25  !
26 CONTAINS
27  !
28  !----------------------------------------------------------------------------
29  SUBROUTINE allocate_paw_internals
30    !--------------------------------------------------------------------------
31    !! Allocate PAW internal variables require for SCF calculation.
32    !
33    USE lsda_mod,       ONLY : nspin
34    USE ions_base,      ONLY : nat
35    USE uspp_param,     ONLY : nhm
36    !
37    USE paw_variables
38    !
39    IMPLICIT NONE
40    !
41    ALLOCATE( ddd_paw(nhm*(nhm+1)/2,nat,nspin) )
42    !
43  END SUBROUTINE allocate_paw_internals
44  !
45  !
46  !------------------------------------------------------------------------------
47  SUBROUTINE deallocate_paw_internals
48    !----------------------------------------------------------------------------
49    !! Called by \(\textrm{clean_pw}\).
50    !
51    USE uspp_param,    ONLY : upf
52    USE ions_base,     ONLY : nat, ntyp => nsp
53    USE paw_variables
54    !
55    IMPLICIT NONE
56    !
57    INTEGER :: nt, na
58    !
59    IF (ALLOCATED(ddd_paw)) DEALLOCATE (ddd_paw)
60    !
61    IF (ALLOCATED(rad)) THEN
62       !
63       DO nt = 1,ntyp
64          IF (ASSOCIATED(rad(nt)%ww))       DEALLOCATE( rad(nt)%ww  )
65          IF (ASSOCIATED(rad(nt)%ylm))      DEALLOCATE( rad(nt)%ylm )
66          IF (ASSOCIATED(rad(nt)%wwylm))    DEALLOCATE( rad(nt)%wwylm )
67          IF (ASSOCIATED(rad(nt)%dylmt))    DEALLOCATE( rad(nt)%dylmt )
68          IF (ASSOCIATED(rad(nt)%dylmp))    DEALLOCATE( rad(nt)%dylmp )
69          IF (ASSOCIATED(rad(nt)%cotg_th))  DEALLOCATE( rad(nt)%cotg_th )
70          IF (ASSOCIATED(rad(nt)%cos_phi))  DEALLOCATE( rad(nt)%cos_phi )
71          IF (ASSOCIATED(rad(nt)%sin_phi))  DEALLOCATE( rad(nt)%sin_phi )
72          IF (ASSOCIATED(rad(nt)%cos_th))   DEALLOCATE( rad(nt)%cos_th )
73          IF (ASSOCIATED(rad(nt)%sin_th))   DEALLOCATE( rad(nt)%sin_th )
74       ENDDO
75       !
76       DEALLOCATE( rad )
77       !
78    ENDIF
79    !
80    IF ( ALLOCATED(vs_rad) )   DEALLOCATE( vs_rad )
81    !
82    paw_is_init = .FALSE.
83    !
84    RETURN
85    !
86  END SUBROUTINE deallocate_paw_internals
87  !
88  !
89#if defined(__MPI)
90  !
91  !---------------------------------------------------------------------------------
92  SUBROUTINE PAW_post_init()
93    !--------------------------------------------------------------------------------
94    !! Deallocate variables that are used only at init and then no more necessary.
95    !! This is only useful in parallel, as each node only does a limited number of atoms.
96    !
97    ! this routine does nothing at the moment...
98    !
99    USE ions_base,          ONLY : nat, ntyp=>nsp, ityp
100    USE uspp_param,         ONLY : upf
101    USE mp_images,          ONLY : me_image, nproc_image, intra_image_comm
102    USE mp,                 ONLY : mp_sum
103    USE io_global,          ONLY : stdout, ionode
104    USE control_flags,      ONLY : iverbosity
105    USE funct,              ONLY : dft_is_hybrid
106    !
107    INTEGER :: nt, np, ia, ia_s, ia_e, mykey, nnodes
108    INTEGER :: info(0:nproc_image-1,ntyp)
109    !
110    ! FIXME: the PAW EXX code is not parallelized (but it is very fast)
111    IF ( dft_is_hybrid() ) RETURN
112    !
113    IF (ionode) &
114    WRITE(stdout,"(5x,a)") &
115        'Checking if some PAW data can be deallocated... '
116    info(:,:) = 0
117    !
118    CALL block_distribute( nat, me_image, nproc_image, ia_s, ia_e, mykey )
119    !
120    types : &
121    DO nt = 1, ntyp
122        DO ia = ia_s, ia_e
123            IF (ityp(ia) == nt.OR..NOT.upf(nt)%tpawp ) CYCLE types
124        ENDDO
125        !
126        ! If I can't find any atom within first_nat and last_nat
127        ! which is of type nt, then I can deallocate:
128        IF ( ALLOCATED(upf(nt)%paw%ae_rho_atc) ) DEALLOCATE( upf(nt)%paw%ae_rho_atc )
129        IF ( ALLOCATED(upf(nt)%paw%pfunc)      ) DEALLOCATE( upf(nt)%paw%pfunc      )
130        IF ( ALLOCATED(upf(nt)%paw%ptfunc)     ) DEALLOCATE( upf(nt)%paw%ptfunc     )
131        IF ( ALLOCATED(upf(nt)%paw%pfunc_rel)  ) DEALLOCATE( upf(nt)%paw%pfunc_rel  )
132        IF ( ALLOCATED(upf(nt)%paw%ae_vloc)    ) DEALLOCATE( upf(nt)%paw%ae_vloc    )
133        info(me_image,nt) = 1
134        !
135    ENDDO types
136    !
137    CALL mp_sum( info, intra_image_comm )
138    !
139    IF (ionode .AND. iverbosity>0 ) THEN
140    !
141#if defined (__DEBUG)
142        DO np = 0,nproc_image-1
143          DO nt = 1,ntyp
144             IF ( info(np,nt) > 0 ) &
145             WRITE(stdout,"(7x,a,i4,a,10i3)") "node ",np,&
146                         ", deallocated PAW data for type:", nt
147          ENDDO
148        ENDDO
149#else
150        DO nt = 1,ntyp
151          nnodes = SUM( info(:,nt) )
152          IF ( nnodes>0 ) WRITE(stdout,'(7x,"PAW data deallocated on ",&
153                  &                     i4," nodes for type:",i3)')   &
154                  &         nnodes,nt
155        ENDDO
156#endif
157    ENDIF
158    !
159    END SUBROUTINE PAW_post_init
160#endif
161  !
162  !-----------------------------------------------------------------------------
163  SUBROUTINE PAW_atomic_becsum()
164    !--------------------------------------------------------------------------
165    !! Initialize becsum with atomic occupations (for PAW atoms only).
166    !! NOTE: requires exact correspondence chi <--> beta in the atom,
167    !! that is that all wavefunctions considered for PAW generation are
168    !! counted in chi (otherwise the array "oc" does not correspond to beta).
169    !
170    USE kinds,                ONLY : DP
171    USE uspp,                 ONLY : nhtoj, nhtol, indv, becsum
172    USE scf,                  ONLY : rho
173    USE uspp_param,           ONLY : upf, nh, nhm
174    USE ions_base,            ONLY : nat, ityp
175    USE lsda_mod,             ONLY : nspin, starting_magnetization
176    USE paw_variables,        ONLY : okpaw
177    USE paw_symmetry,         ONLY : PAW_symmetrize
178    USE random_numbers,       ONLY : randy
179    USE basis,                ONLY : starting_wfc
180    USE noncollin_module,     ONLY : nspin_mag, angle1, angle2
181    !
182    IMPLICIT NONE
183    !
184    !REAL(DP), INTENT(INOUT) :: becsum(nhm*(nhm+1)/2,nat,nspin)
185    INTEGER :: ispin, na, nt, ijh, ih, jh, nb, mb
186    REAL(DP) :: noise = 0._DP
187    !
188    IF (.NOT. okpaw) RETURN
189    IF (.NOT. ALLOCATED(becsum))   CALL errore( 'PAW_init_becsum', &
190                   'Something bad has happened: becsum is not allocated yet', 1 )
191    !
192    ! Add a bit of random noise if not starting from atomic or saved wfcs:
193    IF ( starting_wfc=='atomic+random') noise = 0.05_DP
194    IF ( starting_wfc=='random')        noise = 0.10_DP
195    !
196    becsum = 0.0_DP
197    na_loop: DO na = 1, nat
198       nt = ityp(na)
199       is_paw: IF (upf(nt)%tpawp) THEN
200          !
201          ijh = 1
202          ih_loop: DO ih = 1, nh(nt)
203             nb = indv(ih,nt)
204             !
205             IF (nspin == 1) THEN
206                !
207                becsum(ijh,na,1) = upf(nt)%paw%oc(nb) / DBLE(2*nhtol(ih,nt)+1)
208                !
209             ELSEIF (nspin == 2) THEN
210                !
211                becsum(ijh,na,1) = 0.5_dp*(1._DP+starting_magnetization(nt))* &
212                                   upf(nt)%paw%oc(nb) / DBLE(2*nhtol(ih,nt)+1)
213                becsum(ijh,na,2) = 0.5_dp*(1._DP-starting_magnetization(nt))* &
214                                   upf(nt)%paw%oc(nb) / DBLE(2*nhtol(ih,nt)+1)
215                !
216             ELSEIF (nspin == 4) THEN
217                !
218                becsum(ijh,na,1) = upf(nt)%paw%oc(nb)/DBLE(2*nhtol(ih,nt)+1)
219                IF (nspin_mag == 4) THEN
220                   becsum(ijh,na,2) = becsum(ijh,na,1) *              &
221                                      starting_magnetization(nt)*     &
222                                      SIN(angle1(nt))*COS(angle2(nt))
223                   becsum(ijh,na,3) = becsum(ijh,na,1) *              &
224                                      starting_magnetization(nt)*     &
225                                      SIN(angle1(nt))*SIN(angle2(nt))
226                   becsum(ijh,na,4) = becsum(ijh,na,1) *              &
227                                      starting_magnetization(nt)*     &
228                                      COS(angle1(nt))
229                ENDIF
230                !
231             ENDIF
232             !
233             ijh = ijh + 1
234             !
235             jh_loop: &
236              DO jh = ( ih + 1 ), nh(nt)
237                !mb = indv(jh,nt)
238                DO ispin = 1, nspin_mag
239                   IF (noise > 0._DP) &
240                      becsum(ijh,na,ispin) = becsum(ijh,na,ispin) + noise *2._DP*(.5_DP-randy())
241                ENDDO
242                !
243                ijh = ijh + 1
244                !
245             ENDDO jh_loop
246          ENDDO ih_loop
247       ENDIF is_paw
248    ENDDO na_loop
249    !
250    ! ... copy becsum in scf structure and symmetrize it
251    rho%bec(:,:,:) = becsum(:,:,:)
252    !
253    CALL PAW_symmetrize( rho%bec )
254    !
255  END SUBROUTINE PAW_atomic_becsum
256  !
257  !
258  !-------------------------------------------------------------------
259  SUBROUTINE PAW_init_onecenter()
260    !-----------------------------------------------------------------
261    !! This allocates space to store onecenter potential and
262    !! calls PAW_rad_init to initialize onecenter integration.
263    !
264    USE ions_base,          ONLY : nat, ityp, ntyp => nsp
265    USE paw_variables,      ONLY : xlm, lm_fact, lm_fact_x,  &
266                                   rad, paw_is_init, vs_rad, &
267                                   total_core_energy, only_paw
268    USE atom,               ONLY : g => rgrid
269    USE radial_grids,       ONLY : do_mesh
270    USE uspp_param,         ONLY : upf
271    USE lsda_mod,           ONLY : nspin
272    USE spin_orb,           ONLY : domag
273    USE noncollin_module,   ONLY : noncolin
274    USE funct,              ONLY : dft_is_gradient
275    USE mp_images,          ONLY : me_image, nproc_image
276    USE mp,                 ONLY : mp_sum
277    !
278    ! ... local variables
279    !
280    INTEGER :: nt, lmax_safe, lmax_add, ia, ia_s, ia_e, na, mykey, max_mesh, &
281               max_nx
282    !
283    CHARACTER(LEN=12) :: env='            '
284    !
285    IF ( paw_is_init ) THEN
286        CALL errore( 'PAW_init_onecenter', 'Already initialized!', 1 )
287        RETURN
288    ENDIF
289    !
290    ! Init only for the atoms that it will actually use later.
291    ! Parallel: divide among processors for the same image
292    CALL block_distribute( nat, me_image, nproc_image, ia_s, ia_e, mykey )
293    !
294    ! Sum all core energies to get...
295    total_core_energy = 0._dp
296    only_paw = .TRUE.
297    max_nx = 0
298    max_mesh = 0
299    !
300    DO na = 1, nat
301        only_paw = only_paw .AND. upf(ityp(na))%tpawp
302        !
303        IF ( upf(ityp(na))%tpawp ) &
304        total_core_energy = total_core_energy &
305                           +upf(ityp(na))%paw%core_energy
306    ENDDO
307    !
308    ! initialize for integration on angular momentum and gradient, integrating
309    ! up to 2*lmaxq (twice the maximum angular momentum of rho) is enough for
310    ! H energy and for XC energy. If I have gradient correction I have to go a bit higher
311    ALLOCATE( rad(ntyp) )
312    !
313    DO nt = 1, ntyp
314        NULLIFY( rad(nt)%ww    )
315        NULLIFY( rad(nt)%ylm   )
316        NULLIFY( rad(nt)%wwylm )
317        NULLIFY( rad(nt)%dylmt )
318        NULLIFY( rad(nt)%dylmp )
319        NULLIFY( rad(nt)%cotg_th )
320        NULLIFY( rad(nt)%cos_phi )
321        NULLIFY( rad(nt)%sin_phi )
322        NULLIFY( rad(nt)%cos_th  )
323        NULLIFY( rad(nt)%sin_th  )
324    ENDDO
325    !
326    types : &
327    DO nt = 1,ntyp
328      IF(.NOT. upf(nt)%tpawp) CYCLE types
329      ! only allocate radial grid integrator for atomic species
330      ! that are actually present on this parallel node:
331      DO ia = ia_s, ia_e
332         IF (ityp(ia) == nt ) THEN
333            IF (upf(nt)%lmax_rho == 0) THEN
334               ! no need for more than one direction, when it is spherical!
335               lmax_safe = 0
336               lmax_add  = 0
337            ELSE
338                !
339                IF ( dft_is_gradient() ) THEN
340                   ! Integrate up to a higher maximum lm if using gradient
341                   ! correction check expression for d(y_lm)/d\theta for details
342                   lmax_safe = lm_fact_x*upf(nt)%lmax_rho
343                   lmax_add  = xlm
344                ELSE
345                   ! no gradient correction:
346                   lmax_safe = lm_fact*upf(nt)%lmax_rho
347                   lmax_add  = 0
348                ENDIF
349            ENDIF
350            !
351            CALL PAW_rad_init( lmax_safe, lmax_add, rad(nt) )
352            max_mesh = MAX( max_mesh, g(nt)%mesh )
353            max_nx = MAX( max_nx, rad(nt)%nx )
354            !
355            CYCLE types
356         ENDIF
357      ENDDO
358    ENDDO types
359    !
360    IF (noncolin .AND. domag)  ALLOCATE( vs_rad(max_mesh,max_nx,nat) )
361    !
362    paw_is_init = .TRUE.
363    !
364  END SUBROUTINE PAW_init_onecenter
365  !
366#if defined(__COMPILE_THIS_UNUSED_FUNCTION)
367  !
368  !-------------------------------------------------------------------------------------------
369  SUBROUTINE PAW_increase_lm( incr )
370    !-----------------------------------------------------------------------------------------
371    !! Increase maximum angularm momentum component for integration from l to l+incr.
372    !
373    USE ions_base,          ONLY : nat, ityp, ntyp => nsp
374    USE paw_variables,      ONLY : rad, paw_is_init
375    USE mp_images,          ONLY : me_image, nproc_image, intra_image_comm
376    USE io_global,          ONLY : stdout, ionode
377    !
378    INTEGER, INTENT(IN) :: incr
379    !! required increase in lm precision
380    !
381    ! ... local variables
382    !
383    INTEGER :: nt, lmax_safe
384    INTEGER :: ia, ia_s, ia_e, mykey
385    !
386    IF( .NOT. paw_is_init .OR. .NOT. ALLOCATED(rad) ) THEN
387       CALL infomsg( 'PAW_increase_lm', &
388               'WARNING: trying to increase max paw angular momentum, but it is not set!' )
389       RETURN
390    ENDIF
391    !
392    ! Parallel: divide among processors for the same image
393    CALL block_distribute( nat, me_image, nproc_image, ia_s, ia_e, mykey )
394    !
395    IF (ionode) &
396        WRITE( stdout, '(5x,a)') &
397            "WARNING: increasing angular resolution of radial grid for PAW."
398    !
399    types : &
400    DO nt = 1,ntyp
401        IF (ionode) THEN
402        WRITE( stdout, '(7x,a,i3,a,i3,a,i3,a,i3)') &
403               "type: ", nt,                      &
404               ", prev. max{l}:",rad(nt)%lmax,    &
405               ", cur. max{l}:",rad(nt)%lmax+incr,&
406               ", directions:",((rad(nt)%lmax+1+incr)*(rad(nt)%lmax+2+incr))/2
407        ENDIF
408        ! only allocate radial grid integrator for atomic species
409        ! that are actually present on this parallel node:
410        DO ia = ia_s, ia_e
411          IF (ityp(ia) == nt ) THEN
412            IF (ASSOCIATED(rad(nt)%ww)  )      DEALLOCATE( rad(nt)%ww  )
413            IF (ASSOCIATED(rad(nt)%ylm) )      DEALLOCATE( rad(nt)%ylm )
414            IF (ASSOCIATED(rad(nt)%wwylm) )    DEALLOCATE( rad(nt)%wwylm )
415            IF (ASSOCIATED(rad(nt)%dylmt) )    DEALLOCATE( rad(nt)%dylmt )
416            IF (ASSOCIATED(rad(nt)%dylmp) )    DEALLOCATE( rad(nt)%dylmp )
417            IF (ASSOCIATED(rad(nt)%cos_phi) )  DEALLOCATE( rad(nt)%cos_phi )
418            IF (ASSOCIATED(rad(nt)%sin_phi) )  DEALLOCATE( rad(nt)%sin_phi )
419            IF (ASSOCIATED(rad(nt)%cos_th)  )  DEALLOCATE( rad(nt)%cos_th  )
420            IF (ASSOCIATED(rad(nt)%sin_th)  )  DEALLOCATE( rad(nt)%sin_th  )
421            IF (ASSOCIATED(rad(nt)%cotg_th) )  DEALLOCATE( rad(nt)%cotg_th )
422            !
423            CALL PAW_rad_init( rad(nt)%lmax+incr, rad(nt) )
424            !
425            CYCLE types
426          ENDIF
427        ENDDO
428    ENDDO types
429    !
430    !paw_is_init = .TRUE.
431    !
432  END SUBROUTINE PAW_increase_lm
433#endif
434  !
435  !----------------------------------------------------------------------------------
436  SUBROUTINE PAW_rad_init( l, ls, rad )
437    !--------------------------------------------------------------------------------
438    !! Initialize several quantities related to radial integration: spherical harmonics and their
439    !! gradients along a few (depending on lmaxq) directions, weights for spherical integration.
440    !
441    ! IMPORTANT: routine PW/summary.f90 has the initialization parameters hardcoded in it
442    !            remember to update it if you change this!
443    !
444    USE constants,              ONLY : pi, fpi, eps8
445    USE funct,                  ONLY : dft_is_gradient
446    USE paw_variables,          ONLY : paw_radial_integrator
447    !
448    INTEGER, INTENT(IN) :: l
449    !! max angular momentum component that will be integrated
450    !! exactly (to numerical precision).
451    INTEGER, INTENT(IN) :: ls
452    !! additional max l that will be used when computing gradient
453    !! and divergence in speherical coords
454    TYPE(paw_radial_integrator), INTENT(OUT) :: rad
455    !! containt weights and more info to integrate
456    !! on radial grid up to lmax = l
457    !
458    ! ... local variables
459    !
460    REAL(DP), ALLOCATABLE :: x(:)    ! nx versors in smart directions
461    REAL(DP), ALLOCATABLE :: w(:)    ! temporary integration weights
462    REAL(DP), ALLOCATABLE :: r(:,:)  ! integration directions
463    REAL(DP), ALLOCATABLE :: r2(:)   ! square modulus of r
464    REAL(DP), ALLOCATABLE :: ath(:), aph(:)
465                                ! angles in sph coords for r
466    INTEGER :: i, ii, n, nphi   ! counters
467    INTEGER :: lm, m            ! indexes for ang.mom
468    REAL(DP) :: phi, dphi, rho  ! spherical coordinates
469    REAL(DP) :: z               ! cartesian coordinates
470    ! for gradient corrections:
471    INTEGER :: ipol
472    REAL(DP), ALLOCATABLE :: aux(:,:)  ! workspace
473    REAL(DP) :: vth(3), vph(3)         !versors for theta and phi
474    !
475    IF (TIMING) CALL start_clock( 'PAW_rad_init' )
476    !
477    ! maximum value of l correctly integrated
478    rad%lmax = l+ls
479    rad%ladd = ls
480    ! volume element for angle phi
481    nphi = rad%lmax + 1 + MOD(rad%lmax,2)
482    dphi = 2._DP*pi/nphi !(rad%lmax+1)
483    ! number of samples for theta angle
484    n = (rad%lmax+2)/2
485    !
486    ALLOCATE( x(n), w(n) )
487    ! compute weights for theta integration
488    CALL gauss_weights( x, w, n )
489    !
490    ! number of integration directions
491    rad%nx = n*nphi !(rad%lmax+1)
492    !write(*,*) "paw --> directions",rad%nx," lmax:",rad%lmax
493    !
494    ALLOCATE( r(3,rad%nx), r2(rad%nx), rad%ww(rad%nx), ath(rad%nx), aph(rad%nx) )
495    !
496    ! compute real weights multiplying theta and phi weights
497    ii = 0
498    !
499    DO i = 1, n
500        z = x(i)
501        rho = SQRT(1._DP-z**2)
502        DO m = 1, nphi !rad%lmax
503            ii= ii+1
504            phi = dphi*DBLE(m-1)
505            r(1,ii) = rho*COS(phi)
506            r(2,ii) = rho*SIN(phi)
507            r(3,ii) = z
508            rad%ww(ii) = w(i)*2._dp*pi/nphi !(rad%lmax+1)
509            r2(ii) = r(1,ii)**2+r(2,ii)**2+r(3,ii)**2
510            ! these will be used later:
511            ath(ii) = ACOS(z/SQRT(r2(ii)))
512            aph(ii) = phi
513        ENDDO
514    ENDDO
515    ! cleanup
516    DEALLOCATE( x, w )
517    !
518    ! initialize spherical harmonics that will be used
519    ! to convert rho_lm to radial grid
520    rad%lm_max = (rad%lmax+1)**2
521    !
522    ALLOCATE( rad%ylm(rad%nx,rad%lm_max) )
523    CALL ylmr2( rad%lm_max, rad%nx, r, r2, rad%ylm )
524    ! As I will mostly use the product ww*ylm I can
525    ! precompute it here:
526    ALLOCATE( rad%wwylm(rad%nx,rad%lm_max) )
527    !
528    DO i = 1, rad%nx
529        DO lm = 1, rad%lm_max
530            rad%wwylm(i,lm) = rad%ww(i) * rad%ylm(i,lm)
531        ENDDO
532    ENDDO
533    !
534    ALLOCATE( rad%cos_phi(rad%nx) )
535    ALLOCATE( rad%sin_phi(rad%nx) )
536    ALLOCATE( rad%cos_th(rad%nx) )
537    ALLOCATE( rad%sin_th(rad%nx) )
538    !
539    DO i = 1, rad%nx
540       rad%cos_phi(i) = COS(aph(i))
541       rad%sin_phi(i) = SIN(aph(i))
542       rad%cos_th(i) = COS(ath(i))
543       rad%sin_th(i) = SIN(ath(i))
544    ENDDO
545    !
546    ! if gradient corrections will be used than we need
547    ! to initialize the gradient of ylm, as we are working in spherical
548    ! coordinates the formula involves \hat{theta} and \hat{phi}
549    gradient: IF (dft_is_gradient()) THEN
550        ALLOCATE( rad%dylmt(rad%nx,rad%lm_max), &
551                  rad%dylmp(rad%nx,rad%lm_max), &
552                  aux(rad%nx,rad%lm_max) )
553        ALLOCATE( rad%cotg_th(rad%nx) )
554        !
555        rad%dylmt(:,:) = 0._DP
556        rad%dylmp(:,:) = 0._DP
557        !
558        ! Compute derivative along x, y and z => gradient, then compute the
559        ! scalar products with \hat{theta} and \hat{phi} and store them in
560        ! dylmt and dylmp respectively.
561        !
562        DO ipol = 1, 3 !x,y,z
563            !
564            CALL dylmr2( rad%lm_max, rad%nx, r,r2, aux, ipol )
565            !
566            DO lm = 1, rad%lm_max
567              DO i = 1, rad%nx
568                vph = (/-SIN(aph(i)), COS(aph(i)), 0._DP/)
569                ! this is the explicit form, but the cross product trick (below) is much faster:
570                ! vth = (/COS(aph(i))*COS(ath(i)), SIN(aph(i))*COS(ath(i)), -SIN(ath(i))/)
571                vth = (/vph(2)*r(3,i)-vph(3)*r(2,i),&
572                        vph(3)*r(1,i)-vph(1)*r(3,i),&
573                        vph(1)*r(2,i)-vph(2)*r(1,i)/)
574                rad%dylmt(i,lm) = rad%dylmt(i,lm) + aux(i,lm)*vth(ipol)
575                ! CHECK: the 1/SIN(th) factor should be correct, but deals wrong result, why?
576                rad%dylmp(i,lm) = rad%dylmp(i,lm) + aux(i,lm)*vph(ipol) !/SIN(ath(i))
577              ENDDO
578            ENDDO
579            !
580        ENDDO
581        !
582        DO i = 1, rad%nx
583           rad%cotg_th(i) = COS(ath(i))/SIN(ath(i))
584        ENDDO
585        !
586        DEALLOCATE( aux )
587        !
588    ENDIF gradient
589    ! cleanup
590    DEALLOCATE( r, r2, ath, aph )
591    !
592    IF (TIMING) CALL stop_clock( 'PAW_rad_init' )
593    !
594 CONTAINS
595    !
596    !---------------------------------------------------
597    SUBROUTINE gauss_weights( x, w, n )
598      !--------------------------------------------------
599      !! Computes weights for gaussian integrals,
600      !! from numerical recipes.
601      !
602      USE constants, ONLY : pi, eps => eps12
603      !
604      IMPLICIT NONE
605      !
606      INTEGER :: n, i, j, m
607      REAL(8) :: x(n), w(n), z, z1, p1, p2, p3, pp
608      !
609      m = (n+1)/2
610      !
611      DO i = 1, m
612         z1 = 2._DP
613         z = COS(pi*(i-0.25_DP)/(n+0.5_DP))
614         DO WHILE( ABS(z-z1) > eps )
615            p1 = 1._DP
616            p2 = 0._DP
617            DO j = 1, n
618              p3 = p2
619              p2 = p1
620              p1 = ((2._DP*j-1._DP)*z*p2-(j-1._DP)*p3)/j
621            ENDDO
622            pp = n*(z*p1-p2)/(z*z-1._DP)
623            z1 = z
624            z  = z1-p1/pp
625         ENDDO
626         x(i) = -z
627         x(n+1-i) = z
628         w(i) = 2._DP/((1._DP-z*z)*pp*pp)
629         w(n+1-i) = w(i)
630      ENDDO
631      !
632    END SUBROUTINE gauss_weights
633    !
634  END SUBROUTINE PAW_rad_init
635  !
636  !
637END MODULE paw_init
638