1!
2! Copyright (C) 2004-2015 Quantum ESPRESSO group
3! This file is distributed under the terms of the
4! GNU General Public License. See the file `License'
5! in the root directory of the present distribution,
6! or http://www.gnu.org/copyleft/gpl.txt .
7!
8!----------------------------------------------------------------------------
9MODULE realus
10  !----------------------------------------------------------------------------
11  !! Module Real Ultra-Soft.
12  !
13  !! Originally written by Antonio Suriano and Stefano de Gironcoli;
14  !! modified by Carlo Sbraccia;
15  !! modified by O. Baris Malcioglu (2008);
16  !! modified by P. Umari and G. Stenuit (2009);
17  !! cleanup, GWW-specific stuff moved out by P. Giannozzi (2015);
18  !! computation of dQ/dtau_i needed for forces added by P. Giannozzi (2015);
19  !! some comments about the way some routines act added by S. de Gironcoli (2015);
20  !! extended to generic k by S. de Gironcoli (2016);
21  !! addusstress_r added by S. de Gironcoli (2016);
22  !! task group reorganization by S. de Gironcoli (2016);
23  !! additional parallelization work (OpenMP and MPI) by S. de Gironcoli (2020).
24  !
25  USE kinds,     ONLY : DP
26  USE io_global, ONLY : stdout
27  !
28  IMPLICIT NONE
29  !
30  REAL(DP), ALLOCATABLE :: boxrad(:)
31  !! radius of Q boxes, does not depend on the grid
32  !
33  ! Beta function in real space
34  INTEGER              :: boxtot             ! total dimension of the boxes
35  INTEGER, ALLOCATABLE :: box_beta(:)        ! (combined) list of points in the atomic boxes
36  INTEGER, ALLOCATABLE :: maxbox_beta(:)     ! number of points in a given atom section
37  INTEGER, ALLOCATABLE :: box0(:), box_s(:), box_e(:) ! offset, start, and end index of a given atom section
38  REAL(DP), ALLOCATABLE :: xyz_beta(:,:)     ! coordinates of the points in box_beta
39  REAL(DP), ALLOCATABLE :: betasave(:,:)     ! beta funtions on the (combined) list of points
40  REAL(DP), ALLOCATABLE :: boxrad_beta(:)    ! (ntypx) radius of the boxes
41  COMPLEX(DP), ALLOCATABLE :: box_psic(:)    ! a box-friendly version of psic
42  !! beta function
43  !!
44  COMPLEX(DP), ALLOCATABLE :: xkphase(:)   ! (combined) phases for all atomic boxes
45  !! kpoint-related phase factor around each atom
46  INTEGER :: current_phase_kpoint=-1
47  !! the kpoint index for which the xkphase is currently set.
48  !! Negative initial value  means not set
49  INTEGER :: intra_bbox_comm
50  !! MPI communicator used to break boxes of the beta functions
51  !
52  ! ... General
53  !
54  LOGICAL :: real_space = .FALSE.
55  !! if true perform calculations in real space
56  INTEGER :: initialisation_level
57  !! init_realspace_vars sets this to 3; qpointlist adds 5; betapointlist adds 7
58  !! so the value should be 15 if the real space routine is initialised properly
59  !
60  COMPLEX(DP), ALLOCATABLE :: tg_psic(:)
61  COMPLEX(DP), ALLOCATABLE :: psic_temp(:)
62  !! Copy of psic
63  COMPLEX(DP), ALLOCATABLE :: tg_psic_temp(:)
64  !! Copy of tg_psic
65  COMPLEX(DP), ALLOCATABLE :: tg_vrs(:)
66  !! task groups linear V memory
67  COMPLEX(DP), ALLOCATABLE :: psic_box_temp(:),tg_psic_box_temp(:)
68  !
69  TYPE realsp_augmentation
70  !! Contains the augmentation functions and related quantities, in real space for
71  !! a single atom.
72     INTEGER :: maxbox = 0
73     !!  number of R points in the augmentation sphere of this atom
74     INTEGER,ALLOCATABLE  :: box(:)
75     !! (maxbox) Index of point R in the global order of the R-space grid
76     REAL(DP),ALLOCATABLE :: dist(:)
77     !! (maxbox) distances |R-tau_i| (box is centered on atom tau_i)
78     REAL(DP),ALLOCATABLE :: xyz(:,:)
79     !! (3,maxbox) coordinates of R-tau_i
80     REAL(DP),ALLOCATABLE :: qr(:,:)
81     !! (maxbox,number_of_q_funcs) the Q functions sampled over R points
82  END TYPE realsp_augmentation
83  !
84  TYPE(realsp_augmentation),POINTER :: tabp(:) => null()
85  !! Augmentation functions on the RHO (HARD) grid for all atoms
86  !
87  !TYPE(realsp_augmentation),POINTER :: tabs(:) => null()
88  ! !Augmentation functions on the SMOOTH grid for all atoms
89  !
90  TYPE(realsp_augmentation),POINTER :: tabxx(:) => null()
91  !! Augmentation functions on the EXX grid for all atoms
92  !
93  PRIVATE
94  ! variables for real-space Q, followed by routines
95  PUBLIC :: tabp, tabxx, boxrad, realsp_augmentation
96  PUBLIC :: generate_qpointlist, qpointlist, addusdens_r, newq_r, &
97       addusforce_r, addusstress_r, real_space_dq, deallocate_realsp
98  ! variables for real-space beta, followed by routines
99  PUBLIC :: real_space, initialisation_level, &
100       tg_psic, betasave, maxbox_beta, box_beta
101  PUBLIC :: betapointlist, init_realspace_vars, v_loc_psir, v_loc_psir_inplace
102  PUBLIC :: box0, box_s, box_e, box_psic
103  PUBLIC :: invfft_orbital_gamma, fwfft_orbital_gamma, s_psir_gamma, &
104            calbec_rs_gamma, add_vuspsir_gamma, invfft_orbital_k,    &
105            fwfft_orbital_k, s_psir_k, calbec_rs_k, add_vuspsir_k
106  !
107  CONTAINS
108
109    !------------------------------------------------------------------------
110    SUBROUTINE generate_qpointlist
111      !------------------------------------------------------------------------
112      USE fft_base,     ONLY : dfftp  !, dffts
113      USE funct,        ONLY : dft_is_hybrid
114      !USE gvecs,        ONLY : doublegrid
115      !USE exx,  ONLY : exx_fft
116      IMPLICIT NONE
117      !
118      ! 1. initialize hard grid
119      WRITE(stdout, '(/,5x,a)') "Initializing real-space augmentation for DENSE grid"
120      CALL qpointlist(dfftp, tabp)
121      !
122      ! NOTE: nothing is using the real space smooth grid at the moment, as EXX
123      !       now uses its own custom grid. In case this is re-introduced, please
124      !       also modify exx_fft_create to recycle this grid when ecutfock = ecutwfc
125!       ! 2. initialize smooth grid (only for EXX at this moment)
126!       IF ( dft_is_hybrid() ) THEN
127!         IF(doublegrid)THEN
128!           WRITE(stdout, '(5x,a)') "Initializing real-space augmentation for SMOOTH grid"
129!           CALL qpointlist(dffts, tabs)
130!         ELSE
131!           ! smooth and rho grid are the same if not double grid
132!           WRITE(stdout, '(7x,a)') " SMOOTH grid -> DENSE grid"
133!           tabs => tabp
134!         ENDIF
135!       ENDIF
136      !
137      RETURN
138      !------------------------------------------------------------------------
139    END SUBROUTINE generate_qpointlist
140    !------------------------------------------------------------------------
141    !
142    !----------------------------------------------------------------------------
143    SUBROUTINE init_realspace_vars()
144     !---------------------------------------------------------------------------
145     !! This subroutine should be called to allocate/reset real space related
146     !! variables.
147     !
148     USE fft_base,             ONLY : dffts
149
150     IMPLICIT NONE
151
152     !print *, "<<<<<init_realspace_vars>>>>>>>"
153
154     !real space, allocation for task group fft work arrays
155
156     IF( dffts%has_task_groups ) THEN
157        !
158        IF (allocated( tg_psic ) ) DEALLOCATE( tg_psic )
159        !
160        ALLOCATE( tg_psic( dffts%nnr_tg ) )
161        ALLOCATE( tg_vrs ( dffts%nnr_tg ) )
162        !
163     ENDIF
164     !
165     initialisation_level = initialisation_level + 7
166
167    END SUBROUTINE init_realspace_vars
168    !
169    !------------------------------------------------------------------------
170    SUBROUTINE deallocate_realsp()
171      !------------------------------------------------------------------------
172      !! Deallocate real space related variables.
173      !
174!      USE gvecs,      ONLY : doublegrid
175      !
176      IMPLICIT NONE
177      !
178      IF ( allocated( boxrad ) )  DEALLOCATE( boxrad )
179      !
180      CALL deallocate_realsp_aug ( tabp )
181!       IF ( doublegrid ) THEN
182!          CALL deallocate_realsp_aug ( tabs )
183!       ELSE
184!          NULLIFY(tabs)
185!       END IF
186      !
187    END SUBROUTINE deallocate_realsp
188    !
189    !------------------------------------------------------------------------
190    SUBROUTINE deallocate_realsp_aug( tab )
191      !------------------------------------------------------------------------
192      !! Deallocate \(\text{realsp_augmentation}\) type variable.
193      !
194      TYPE(realsp_augmentation), POINTER :: tab(:)
195      INTEGER :: ia
196      !
197      IF ( associated( tab ) ) THEN
198         DO ia = 1, SIZE(tab)
199            IF(allocated(tab(ia)%qr ))  DEALLOCATE(tab(ia)%qr)
200            IF(allocated(tab(ia)%box))  DEALLOCATE(tab(ia)%box)
201            IF(allocated(tab(ia)%dist)) DEALLOCATE(tab(ia)%dist)
202            IF(allocated(tab(ia)%xyz))  DEALLOCATE(tab(ia)%xyz)
203            tab(ia)%maxbox = 0
204         ENDDO
205         DEALLOCATE(tab)
206      ENDIF
207      !
208    END SUBROUTINE deallocate_realsp_aug
209    !
210    !------------------------------------------------------------------------
211    SUBROUTINE qpointlist( dfft, tabp )
212      !------------------------------------------------------------------------
213      !! This subroutine computes and stores into \(\text{tabp}\) the values of \(Q(r)\)
214      !! on atom-centered boxes in the real-space grid of descriptor "dfft".
215      !! Must be called at startup and every time atoms (tau_i) move.
216      !! A set of spherical boxes are computed for each atom. The Q functions
217      !! \(Q(r-\tau_i)\) are then interpolated on the real-space grid points.
218      !! On output:
219      !
220      !! * \(\text{boxrad}\) contains the radii of the boxes for each atom type;
221      !! * \(\text{tabp(nat)}\) contains pointers to structure tabp, each contaning:
222      !!    * \(\text{maxbox}\) = number of real-space grid points in the atom-centered box;
223      !!    * \(\text{qr(maxbox)}\) = interpolated values of Q(r) on the points of the box;
224      !!    * \(\text{box(maxbox)}\) = index of grid points in the real-space grid "dfft".
225      !
226      ! ... Most of time is spent here; the calling routines are faster.
227      !
228      USE constants,  ONLY : pi, fpi, eps16, eps6
229      USE ions_base,  ONLY : nat, nsp, ityp, tau
230      USE cell_base,  ONLY : at, bg, alat
231      USE uspp,       ONLY : okvan, qq_at, qq_nt, nhtol
232      USE uspp_param, ONLY : upf, nh
233      USE atom,       ONLY : rgrid
234      USE fft_types,  ONLY : fft_type_descriptor
235      USE mp_bands,   ONLY : intra_bgrp_comm
236      USE mp,         ONLY : mp_sum
237      !
238      IMPLICIT NONE
239      !
240      TYPE(fft_type_descriptor),INTENT(in)    :: dfft
241      TYPE(realsp_augmentation),POINTER,INTENT(inout) :: tabp(:)
242      !
243      INTEGER               :: ia, mbia
244      INTEGER               :: indm, ih, jh, ijh, il, jl
245      INTEGER               :: roughestimate, nt
246      INTEGER,  ALLOCATABLE :: buffpoints(:)
247      INTEGER               :: ir, i, j, k, ijv
248      INTEGER               :: imin, imax, ii, jmin, jmax, jj, kmin, kmax, kk
249      REAL(DP)              :: distsq, posi(3)
250      REAL(DP), ALLOCATABLE :: boxdist(:), xyz(:,:), diff(:)
251      REAL(DP)              :: mbr, mbx, mby, mbz, dmbx, dmby, dmbz, aux
252      REAL(DP)              :: inv_nr1, inv_nr2, inv_nr3, boxradsq_ia, boxrad_ia
253      !
254      initialisation_level = 3
255      IF ( .not. okvan ) RETURN
256      !
257      CALL start_clock( 'realus' )
258      !
259      ! ... tabp is deallocated here to free the memory for the buffers
260      !
261      CALL deallocate_realsp_aug ( tabp )
262      !
263      ALLOCATE(tabp(nat))
264      !
265      IF ( .not. allocated( boxrad ) ) THEN
266         !
267         ! ... here we calculate the radius of each spherical box ( one
268         ! ... for each non-local projector )
269         !
270         ALLOCATE( boxrad( nsp ) )
271         !
272         boxrad(:) = 0.D0
273         !
274         DO nt = 1, nsp
275            IF ( .not. upf(nt)%tvanp ) CYCLE
276            DO ijv = 1, upf(nt)%nbeta*(upf(nt)%nbeta+1)/2
277               DO indm = upf(nt)%mesh,1,-1
278                  !
279                  aux = maxval(abs( upf(nt)%qfuncl(indm,ijv,:) ))
280                  IF ( aux > eps16 ) THEN
281                     boxrad(nt) = max( rgrid(nt)%r(indm), boxrad(nt) )
282                     exit
283                  ENDIF
284                  !
285               ENDDO
286            ENDDO
287         ENDDO
288         !
289         boxrad(:) = boxrad(:) / alat
290         !
291      ENDIF
292#if defined(__DEBUG)
293      write (stdout,*) 'BOXRAD = ', boxrad(:) * alat
294#endif
295      !
296      ! ... a rough estimate for the number of grid-points per box
297      ! ... is provided here
298      !
299      mbr = maxval( boxrad(:) )
300      !
301      mbx = mbr*sqrt( bg(1,1)**2 + bg(1,2)**2 + bg(1,3)**2 )
302      mby = mbr*sqrt( bg(2,1)**2 + bg(2,2)**2 + bg(2,3)**2 )
303      mbz = mbr*sqrt( bg(3,1)**2 + bg(3,2)**2 + bg(3,3)**2 )
304      !
305      dmbx = 2*anint( mbx*dfft%nr1x ) + 2
306      dmby = 2*anint( mby*dfft%nr2x ) + 2
307      dmbz = 2*anint( mbz*dfft%nr3x ) + 2
308      !
309      roughestimate = anint( dble( dmbx*dmby*dmbz ) * pi / 6.D0 )
310      !
311      ALLOCATE( buffpoints( roughestimate ) )
312      ALLOCATE( boxdist (   roughestimate ) )
313      ALLOCATE( xyz( 3, roughestimate ) )
314      !
315      inv_nr1 = 1.D0 / dble( dfft%nr1 )
316      inv_nr2 = 1.D0 / dble( dfft%nr2 )
317      inv_nr3 = 1.D0 / dble( dfft%nr3 )
318      !
319      ! the qq_at matrices are recalculated  here. initialize them
320      qq_at(:,:,:) =0.d0
321
322      DO ia = 1, nat
323         !
324         CALL start_clock( 'realus:boxes' )
325         !
326         nt = ityp(ia)
327         IF ( .not. upf(nt)%tvanp ) CYCLE
328         !
329         ! ... here we find the points in the box surrounding atom "ia"
330         !
331         buffpoints(:) = 0
332         boxdist(:) = 0.D0
333         !
334         boxrad_ia   = boxrad(nt)
335         boxradsq_ia = boxrad_ia**2
336         !
337         mbia = 0
338         !
339         ! ... compute the needed ranges for i,j,k  indices around the atom position in crystal coordinates
340         !
341         posi(:) = tau(:,ia) ; CALL cryst_to_cart( 1, posi, bg, -1 )
342         imin = NINT((posi(1) - boxrad_ia * sqrt(bg(1,1)*bg(1,1)+bg(2,1)*bg(2,1)+bg(3,1)*bg(3,1)))*dfft%nr1)
343         imax = NINT((posi(1) + boxrad_ia * sqrt(bg(1,1)*bg(1,1)+bg(2,1)*bg(2,1)+bg(3,1)*bg(3,1)))*dfft%nr1)
344         jmin = NINT((posi(2) - boxrad_ia * sqrt(bg(1,2)*bg(1,2)+bg(2,2)*bg(2,2)+bg(3,2)*bg(3,2)))*dfft%nr2)
345         jmax = NINT((posi(2) + boxrad_ia * sqrt(bg(1,2)*bg(1,2)+bg(2,2)*bg(2,2)+bg(3,2)*bg(3,2)))*dfft%nr2)
346         kmin = NINT((posi(3) - boxrad_ia * sqrt(bg(1,3)*bg(1,3)+bg(2,3)*bg(2,3)+bg(3,3)*bg(3,3)))*dfft%nr3)
347         kmax = NINT((posi(3) + boxrad_ia * sqrt(bg(1,3)*bg(1,3)+bg(2,3)*bg(2,3)+bg(3,3)*bg(3,3)))*dfft%nr3)
348#if defined(__DEBUG)
349         write (*,*) tau(:,ia)
350         write (*,*) posi
351         write (*,*) imin,imax,jmin,jmax,kmin,kmax
352#endif
353         DO k = kmin, kmax
354            kk = modulo(k,dfft%nr3) - dfft%my_i0r3p
355            if (kk .LT. 0 .OR. kk .ge. dfft%my_nr3p ) cycle
356            DO j = jmin, jmax
357               jj = modulo(j,dfft%nr2) - dfft%my_i0r2p
358               if (jj .LT. 0 .OR. jj .ge. dfft%my_nr2p ) cycle
359               DO i = imin, imax
360                  ii = modulo(i,dfft%nr1)
361                  !
362                  posi(:) = i * inv_nr1*at(:,1) + j * inv_nr2*at(:,2) + k * inv_nr3*at(:,3) - tau(:,ia)
363                  !
364                  distsq = posi(1)**2 + posi(2)**2 + posi(3)**2
365                  IF ( distsq < boxradsq_ia ) THEN
366                     ! compute fft index ir from ii,jj,kk
367                     ir = 1 + ii + jj * dfft%nr1x + kk * dfft%nr1x * dfft%my_nr2p
368                     !
369                     mbia = mbia + 1
370                     IF( mbia > roughestimate ) CALL errore('qpointlist', 'rough-estimate is too rough', 3)
371                     !
372                     buffpoints(mbia)    = ir
373                     boxdist(mbia)       = sqrt( distsq )*alat
374                     xyz(:,mbia)         = posi(:)*alat
375                     !
376                  ENDIF
377               END DO
378            END DO
379         END DO
380#if defined(__DEBUG)
381         WRITE (stdout,*) 'QPOINTLIST  ATOM ', ia, ' MBIA =', mbia
382#endif
383         !
384         IF ( mbia == 0 ) CYCLE
385         !
386         ! ... store points in the appropriate place
387         !
388         tabp(ia)%maxbox = mbia
389         ALLOCATE( tabp(ia)%box(mbia) )
390         tabp(ia)%box(:) = buffpoints(1:mbia)
391         ALLOCATE( tabp(ia)%dist(mbia) )
392         tabp(ia)%dist(:) = boxdist(1:mbia)
393         ALLOCATE( tabp(ia)%xyz(3,mbia) )
394         tabp(ia)%xyz(:,:) = xyz(:,1:mbia)
395         CALL stop_clock( 'realus:boxes' )
396         !
397         ! ... compute the Q functions for this box
398         !
399         CALL real_space_q( nt, ia, mbia, tabp )
400         !
401      ENDDO
402      ! collect the result of the qq_at matrix across processors
403      CALL mp_sum( qq_at, intra_bgrp_comm )
404      ! and test that they don't differ too much from the result computed on the atomic grid
405      ALLOCATE (diff(nsp))
406      diff(:)=0.0_dp
407      do ia=1,nat
408         nt = ityp(ia)
409         ijh = 0
410         do ih = 1, nh(nt)
411            il = nhtol (ih, nt)
412            do jh = ih, nh(nt)
413               jl = nhtol (jh, nt)
414               ijh=ijh+1
415               if (abs(qq_at(ih,jh,ia)-qq_nt(ih,jh,nt)) .gt. eps6*10 ) then
416                  diff(nt) = MAX(diff(nt), abs(qq_at(ih,jh,ia)-qq_nt(ih,jh,nt)))
417               end if
418            end do
419         end do
420      end do
421      IF ( ANY( diff(:) > 0.0_dp ) ) THEN
422         CALL infomsg('real_space_q', &
423                      'integrated real space q too different from target q')
424         WRITE(stdout, &
425         '(5X,"Largest difference: ",f10.6," for atom type #",i2)') &
426            MAXVAL(diff), MAXLOC(diff(:))
427      END IF
428      DEALLOCATE (diff)
429      !
430      DEALLOCATE( xyz )
431      DEALLOCATE( boxdist )
432      DEALLOCATE( buffpoints )
433      CALL stop_clock( 'realus' )
434      !
435    END SUBROUTINE qpointlist
436    !
437    !------------------------------------------------------------------------
438    SUBROUTINE real_space_q( nt, ia, mbia, tab )
439      !------------------------------------------------------------------------
440      !! Compute \(Q_{lm}(r-\tau_{ia})\) centered around  atom \(\text{ia}\) of
441      !! type \(\text{nt}\).
442      !! First we compute the radial \(Q(r)\) (\(\text{qtot}\)) for each \(l\),
443      !! then we interpolate it in our mesh (contained in \(\text{tabp}\)), finally
444      !! we multiply by the spherical harmonics and store it into \(\text{tab}\).
445      !
446      USE constants,  ONLY : eps16, eps6
447      USE uspp,       ONLY : indv, nhtolm, ap, qq_at
448      USE uspp_param, ONLY : upf, lmaxq, nh
449      USE atom,       ONLY : rgrid
450      USE splinelib,  ONLY : spline, splint
451      USE cell_base,  ONLY : omega
452      USE fft_base,   ONLY : dfftp
453      !
454      IMPLICIT NONE
455      !
456      INTEGER, INTENT(IN) :: ia, nt, mbia
457      TYPE(realsp_augmentation), INTENT(INOUT), POINTER :: tab(:)
458      !
459      INTEGER  :: l, nb, mb, ijv, lllnbnt, lllmbnt, ir, nfuncs, lm, i0, ijh, ih, jh
460      REAL(dp) :: first, second, qtot_int
461      REAL(dp), ALLOCATABLE :: qtot(:), dqtot(:), xsp(:), wsp(:), rl2(:), spher(:,:)
462
463      CALL start_clock( 'realus:tabp' )
464
465      if (mbia.ne.tab(ia)%maxbox) then
466         write (stdout,*) 'real space q: ia,nt,mbia,tab(ia)%maxbox ', ia, nt, mbia, tab(ia)%maxbox
467         call errore('real_space_q','inconsistent tab(ia)%maxbox dimension',ia)
468      end if
469      !
470      nfuncs = nh(nt)*(nh(nt)+1)/2
471      ALLOCATE(tab(ia)%qr(mbia, nfuncs))
472      !
473      ! ... compute the spherical harmonics
474      !
475      ALLOCATE( spher(mbia, lmaxq**2) )
476      spher (:,:) = 0.0_dp
477      !
478      ALLOCATE( rl2( mbia ) )
479      DO ir = 1, mbia
480         rl2(ir) = tab(ia)%xyz(1,ir)**2 + &
481                   tab(ia)%xyz(2,ir)**2 + &
482                   tab(ia)%xyz(3,ir)**2
483      ENDDO
484      CALL ylmr2 ( lmaxq**2, mbia, tab(ia)%xyz, rl2, spher )
485      DEALLOCATE( rl2 )
486
487      tab(ia)%qr(:,:)=0.0_dp
488      ALLOCATE( qtot(upf(nt)%kkbeta), dqtot(upf(nt)%kkbeta) )
489      !
490      ! ... variables used for spline interpolation
491      !
492      ALLOCATE( xsp( upf(nt)%kkbeta ), wsp( upf(nt)%kkbeta ) )
493      !
494      ! ... the radii in x
495      !
496      xsp(:) = rgrid(nt)%r(1:upf(nt)%kkbeta)
497      !
498      ! ... prevent FP errors if r(1) = 0  (may happen for some grids)
499      !
500      IF ( rgrid(nt)%r(1) < eps16 ) THEN
501         i0 = 2
502      ELSE
503         i0 = 1
504      END IF
505      !
506      DO l = 0, upf(nt)%nqlc - 1
507         !
508         ! ... first we build for each nb,mb,l the total Q(|r|) function
509         ! ... note that l is the true (combined) angular momentum
510         ! ... and that the arrays have dimensions 1..l+1
511         !
512         DO nb = 1, upf(nt)%nbeta
513            DO mb = nb, upf(nt)%nbeta
514               ijv = mb * (mb-1) /2 + nb
515               !
516               lllnbnt = upf(nt)%lll(nb)
517               lllmbnt = upf(nt)%lll(mb)
518               !
519               IF ( .not. ( l >= abs( lllnbnt - lllmbnt ) .and. &
520                            l <= lllnbnt + lllmbnt        .and. &
521                            mod( l + lllnbnt + lllmbnt, 2 ) == 0 ) ) CYCLE
522               !
523               IF( upf(nt)%tvanp ) THEN
524                  qtot(i0:upf(nt)%kkbeta) = &
525                       upf(nt)%qfuncl(i0:upf(nt)%kkbeta,ijv,l) &
526                       / rgrid(nt)%r(i0:upf(nt)%kkbeta)**2
527                  IF ( i0 == 2 ) qtot(1) = qtot(2)
528               ENDIF
529               !
530               ! ... compute the first derivative
531               !
532               ! ... analytical derivative up to r = rinner, numerical beyond
533               !
534               CALL radial_gradient(qtot, dqtot, rgrid(nt)%r, upf(nt)%kkbeta, 1)
535               !
536               ! ... we need the first and second derivatives in the first point
537               !
538               first = dqtot(1)
539               !
540               ! ... if we don't have the analitical coefficients, try the same
541               ! ... numerically (note that setting first=0.0 and second=0.0
542               ! ... makes almost no difference) - wsp is used as work space
543               !
544               CALL radial_gradient(dqtot, wsp, rgrid(nt)%r, upf(nt)%kkbeta, 1)
545               second = wsp(1) ! second derivative in first point
546               !
547               ! ... call spline for interpolation of Q(r)
548               !
549               CALL spline( xsp, qtot, first, second, wsp )
550               !
551               DO ir = 1, mbia
552                  !
553                  ! ... spline interpolation
554                  !
555                  qtot_int = splint( xsp, qtot, wsp, tab(ia)%dist(ir) )
556                  !
557                  ijh = 0
558                  DO ih = 1, nh(nt)
559                     DO jh = ih, nh(nt)
560                        !
561                        ijh = ijh + 1
562                        !
563                        IF ( .not.( nb == indv(ih,nt) .and. &
564                                    mb == indv(jh,nt) ) ) CYCLE
565                        !
566                        DO lm = l**2+1, (l+1)**2
567                           tab(ia)%qr(ir,ijh) = tab(ia)%qr(ir,ijh) + &
568                                        qtot_int*spher(ir,lm)*&
569                                        ap(lm,nhtolm(ih,nt),nhtolm(jh,nt))
570                        ENDDO
571                     ENDDO
572                  ENDDO
573               ENDDO
574               !
575            ENDDO
576         ENDDO
577      ENDDO
578      ! compute qq_at interating qr in the sphere
579      ijh = 0
580      DO ih = 1, nh(nt)
581         DO jh = ih, nh(nt)
582            ijh = ijh + 1
583            qq_at(ih,jh,ia) = sum (tab(ia)%qr(1:mbia,ijh) )
584            qq_at(jh,ih,ia) = qq_at(ih,jh,ia)
585         END DO
586      END DO
587      qq_at(:,:,ia) = qq_at(:,:,ia) * omega/(dfftp%nr1*dfftp%nr2*dfftp%nr3)
588
589      !
590      DEALLOCATE( qtot, dqtot )
591      DEALLOCATE( wsp, xsp )
592      DEALLOCATE( spher )
593      !
594      CALL stop_clock( 'realus:tabp' )
595      !
596    END SUBROUTINE real_space_q
597    !
598    !------------------------------------------------------------------------
599    SUBROUTINE real_space_dq( nt, ia, mbia, nfuncs, dqr )
600      !------------------------------------------------------------------------
601      !! Compute \(dQ(r-\tau_i)/d\tau_{ia}\) using the formula:
602      !! \begin{equation}\notag
603      !!   \frac{dQ(r-\tau_i)}{d\tau_{ia}} = - \frac{dq}{dr}(|r-\tau_i|)\frac{
604      !!                                       (r_a-\tau_{ia})}{|r-\tau_i|}
605      !!                                       Y_{lm}(r-\tau_i)
606      !!                                     - q(|r-\tau_i|)\cdot
607      !!                                       \frac{dY_{lm}(r-\tau_i)}{d\tau_{ia}}
608      !! \end{equation}
609      !! This routine follows the same logic of \(\texttt{real_space_q}\).
610      !
611      USE constants,  ONLY : eps8, eps16
612      USE uspp,       ONLY : indv, nhtolm, ap
613      USE uspp_param, ONLY : upf, lmaxq, nh
614      USE atom,       ONLY : rgrid
615      USE splinelib,  ONLY : spline, splint
616      !
617      IMPLICIT NONE
618      !
619      INTEGER, INTENT(IN) :: ia, nt, mbia, nfuncs
620      REAL(dp),INTENT(OUT):: dqr(mbia,nfuncs,3)
621      !
622      INTEGER  :: l, nb, mb, ijv, lllnbnt, lllmbnt, ir, lm, ijh, ih, jh, ipol
623      REAL(dp) :: first, second, qtot_int, dqtot_int, dqxyz(3)
624      REAL(dp), ALLOCATABLE :: qtot(:), dqtot(:), xsp(:), wsp(:,:), &
625           rl2(:), spher(:,:), dspher(:,:,:)
626      !
627      CALL start_clock( 'realus:tabp' )
628      if (mbia.ne.tabp(ia)%maxbox) then
629         write (stdout,*) 'real space dq: ia,nt,mbia,tabp(ia)%maxbox ', ia, nt, mbia, tabp(ia)%maxbox
630         call errore('real_space_dq','inconsistent tab(ia)%maxbox dimension',ia)
631      end if
632      !
633      ! ... compute the spherical harmonics
634      !
635      ALLOCATE( spher(mbia, lmaxq**2), dspher(mbia, lmaxq**2, 3) )
636      spher (:,:) = 0.0_dp
637      dspher(:,:,:) = 0.0_dp
638      !
639      ALLOCATE( rl2( mbia ) )
640      DO ir = 1, mbia
641         rl2(ir) = tabp(ia)%xyz(1,ir)**2 + &
642                   tabp(ia)%xyz(2,ir)**2 + &
643                   tabp(ia)%xyz(3,ir)**2
644      ENDDO
645      CALL ylmr2 ( lmaxq**2, mbia, tabp(ia)%xyz, rl2, spher )
646      do ipol = 1, 3
647         CALL dylmr2( lmaxq**2, mbia, tabp(ia)%xyz, rl2, dspher(1,1,ipol),ipol )
648      END DO
649      DEALLOCATE( rl2 )
650
651      dqr(:,:,:)=0.0_dp
652      ALLOCATE( qtot(upf(nt)%kkbeta), dqtot(upf(nt)%kkbeta) )
653      !
654      ! ... variables used for spline interpolation
655      !
656      ALLOCATE( xsp( upf(nt)%kkbeta ), wsp( upf(nt)%kkbeta, 2 ) )
657      !
658      ! ... the radii in x
659      !
660      xsp(:) = rgrid(nt)%r(1:upf(nt)%kkbeta)
661      !
662      DO l = 0, upf(nt)%nqlc - 1
663         !
664         ! ... first we build for each nb,mb,l the total Q(|r|) function
665         ! ... note that l is the true (combined) angular momentum
666         ! ... and that the arrays have dimensions 1..l+1
667         !
668         DO nb = 1, upf(nt)%nbeta
669            DO mb = nb, upf(nt)%nbeta
670               ijv = mb * (mb-1) /2 + nb
671               !
672               lllnbnt = upf(nt)%lll(nb)
673               lllmbnt = upf(nt)%lll(mb)
674               !
675               IF ( .not. ( l >= abs( lllnbnt - lllmbnt ) .and. &
676                            l <= lllnbnt + lllmbnt        .and. &
677                            mod( l + lllnbnt + lllmbnt, 2 ) == 0 ) ) CYCLE
678               !
679               IF( upf(nt)%tvanp ) THEN
680                  qtot(1:upf(nt)%kkbeta) = &
681                       upf(nt)%qfuncl(1:upf(nt)%kkbeta,ijv,l) &
682                       / rgrid(nt)%r(1:upf(nt)%kkbeta)**2
683                  if (rgrid(nt)%r(1)< eps16) then
684!                      if (l==0) then
685                         qtot(1) = qtot(2)
686                         qtot(1) = qtot(3)
687#if defined(__DEBUG)
688                         write (stdout,*) qtot(2),qtot(3)
689#endif
690!                      else
691!                         qtot(1) = 0.d0
692!                      end if
693                  end if
694               ENDIF
695               !
696               ! ... compute the first derivative
697               !
698               CALL radial_gradient(qtot, dqtot, rgrid(nt)%r, upf(nt)%kkbeta, 1)
699               !
700               ! ... we need the first and second derivatives in the first point
701               !
702               first = dqtot(1)
703               !
704               ! ... if we don't have the analitical coefficients, try the same
705               ! ... numerically (note that setting first=0.0 and second=0.0
706               ! ... makes almost no difference) - wsp is used as work space
707               !
708               CALL radial_gradient(dqtot, wsp, rgrid(nt)%r, upf(nt)%kkbeta, 1)
709               second = wsp(1,1) ! second derivative in first point
710               !
711               ! ... call spline for interpolation of Q(r)
712               !
713               CALL spline( xsp, qtot, first, second, wsp(:,1) )
714               !
715               ! ... same for dQ/dr (third derivative at first point is set to zero)
716               !
717               CALL spline( xsp, dqtot, second, 0.0_dp, wsp(:,2) )
718               !
719               DO ir = 1, mbia
720                  !
721                  ! ... spline interpolation
722                  !
723                  qtot_int = splint( xsp, qtot, wsp(:,1), tabp(ia)%dist(ir) )
724                  dqtot_int= splint( xsp,dqtot, wsp(:,2), tabp(ia)%dist(ir) )
725                  !
726                  ! ... prevent floating-point error if dist = 0
727                  !
728                  IF ( tabp(ia)%dist(ir) > eps8 ) THEN
729                     dqxyz(:) = dqtot_int*tabp(ia)%xyz(:,ir) / &
730                                     tabp(ia)%dist(ir)
731                  ELSE
732                     dqxyz(:) = 0.0_dp
733                  ENDIF
734                  !
735                  ijh = 0
736                  DO ih = 1, nh(nt)
737                     DO jh = ih, nh(nt)
738                        !
739                        ijh = ijh + 1
740                        !
741                        IF ( .not.( nb == indv(ih,nt) .and. &
742                                    mb == indv(jh,nt) ) ) CYCLE
743                        !
744                        DO lm = l**2+1, (l+1)**2
745                           DO ipol = 1,3
746                              dqr(ir,ijh,ipol) = &
747                                   dqr(ir,ijh,ipol) - &
748                                   ( qtot_int*dspher(ir,lm,ipol) +  &
749                                     dqxyz(ipol)*spher(ir,lm) ) * &
750                                     ap(lm,nhtolm(ih,nt),nhtolm(jh,nt))
751                           END DO
752                        ENDDO
753                     ENDDO
754                  ENDDO
755               ENDDO
756               !
757            ENDDO
758         ENDDO
759      ENDDO
760      !
761      DEALLOCATE( qtot, dqtot )
762      DEALLOCATE( wsp, xsp )
763      DEALLOCATE( dspher, spher )
764      !
765      CALL stop_clock( 'realus:tabp' )
766      !
767    END SUBROUTINE real_space_dq
768    !
769    !------------------------------------------------------------------------
770    SUBROUTINE betapointlist()
771      !------------------------------------------------------------------------
772      !! This subroutine is the driver routine of the box system in this
773      !! implementation of US in real space.
774      !
775      !! * all the variables common in the module are computed and stored for
776      !!   reusing;
777      !! * this routine has to be called every time the atoms are moved and of
778      !!   course at the beginning;
779      !! * a set of spherical boxes are computed for each atom;
780      !! * in \(\text{boxrad_beta}\) there are the radii of the boxes;
781      !! * in \(\text{maxbox_beta}\) the upper limit of leading index, namely the
782      !!   number of points of the fine mesh contained in each box;
783      !! * in \(\text{xyz_beta}\) there are the coordinates of the points with
784      !!   origin in the centre of atom;
785      !! * in the last part ('tabp') the q value is interpolated in these boxes.
786      !
787      ! ... Most of time is spent here; the calling routines are faster.
788      !
789      ! The source inspired by qsave
790      !
791      USE constants,  ONLY : pi
792      USE control_flags, ONLY : gamma_only
793      USE ions_base,  ONLY : nat, nsp, ityp, tau
794      USE cell_base,  ONLY : at, bg, alat
795      USE uspp,       ONLY : okvan, indv,nhtolm
796      USE uspp_param, ONLY : upf, lmaxq, nh, nhm
797      USE atom,       ONLY : rgrid
798      USE fft_base,   ONLY : dffts
799      USE splinelib,  ONLY : spline, splint
800      !
801      IMPLICIT NONE
802      !
803      LOGICAL, PARAMETER    :: tprint = .false.  ! whether to print info on betapointlist
804      !
805      INTEGER               :: ia, it, mbia
806      INTEGER               :: indm, inbrx, idimension, ih
807      INTEGER               :: roughestimate, lamx2, nt
808      INTEGER,  ALLOCATABLE :: tmp_box_beta(:,:)
809      REAL(DP), ALLOCATABLE :: tmp_boxdist_beta(:,:)
810      REAL(DP), ALLOCATABLE :: tmp_xyz_beta(:,:,:)
811      REAL(DP), ALLOCATABLE :: spher_beta(:,:), boxdist_beta(:)
812      REAL(DP)              :: distsq, qtot_int, first, second
813      INTEGER               :: ir, i, j, k, lm, nb, box_ir
814      INTEGER               :: imin, imax, ii, jmin, jmax, jj, kmin, kmax, kk
815      REAL(DP)              :: posi(3)
816      REAL(DP), ALLOCATABLE :: rl(:,:), rl2(:)
817      REAL(DP), ALLOCATABLE :: tempspher(:,:), qtot(:,:,:), &
818                               xsp(:), ysp(:), wsp(:), d1y(:), d2y(:)
819      REAL(DP)              :: mbr, mbx, mby, mbz, dmbx, dmby, dmbz
820      REAL(DP)              :: inv_nr1s, inv_nr2s, inv_nr3s, tau_ia(3), boxradsq_ia, boxrad_ia
821      !
822      initialisation_level = initialisation_level + 5
823      IF ( .not. okvan ) CALL errore &
824                        ('betapointlist','real space routines for USPP only',1)
825      !
826      !print *, "<<<betapointlist>>>"
827      !
828      CALL start_clock( 'betapointlist' )
829      !
830      ! ... betasave is deallocated here to free the memory for the buffers
831      !
832      IF ( allocated( betasave ) ) DEALLOCATE( betasave )
833      !
834      IF ( .not. allocated( boxrad_beta ) ) THEN
835         !
836         ! ... here we calculate the radius of each spherical box ( one
837         ! ... for each non-local projector )
838         !
839         ALLOCATE( boxrad_beta( nsp ) )
840         boxrad_beta(:) = 0.D0
841         !
842         DO it = 1, nsp
843            DO inbrx = 1, upf(it)%nbeta
844               DO indm = upf(it)%kkbeta, 1, -1
845                  IF ( abs( upf(it)%beta(indm,inbrx) ) > 0.d0 ) THEN
846                     boxrad_beta(it) = max( rgrid(it)%r(indm), boxrad_beta(it) )
847                     CYCLE
848                  ENDIF
849               ENDDO
850            ENDDO
851         ENDDO
852         !
853         boxrad_beta(:) = boxrad_beta(:) / alat
854         !
855      ENDIF
856      !
857      ! ... a rough estimate for the number of grid-points per box
858      ! ... is provided here
859      !
860      mbr = maxval( boxrad_beta(:) )
861      !
862      mbx = mbr*sqrt( bg(1,1)**2 + bg(1,2)**2 + bg(1,3)**2 )
863      mby = mbr*sqrt( bg(2,1)**2 + bg(2,2)**2 + bg(2,3)**2 )
864      mbz = mbr*sqrt( bg(3,1)**2 + bg(3,2)**2 + bg(3,3)**2 )
865      !
866      dmbx = 2*anint( mbx*dffts%nr1x ) + 2
867      dmby = 2*anint( mby*dffts%nr2x ) + 2
868      dmbz = 2*anint( mbz*dffts%nr3x ) + 2
869      !
870      roughestimate = anint( dble( dmbx*dmby*dmbz ) * pi / 6.D0 )
871      !
872      CALL start_clock( 'realus:boxes' )
873
874      !
875      ALLOCATE( tmp_box_beta( roughestimate, nat ) )
876      ALLOCATE( tmp_boxdist_beta( roughestimate, nat ) )
877      !
878      IF ( allocated( xyz_beta ) ) DEALLOCATE( xyz_beta )
879      ALLOCATE( tmp_xyz_beta( 3, roughestimate, nat ) )
880      !
881      tmp_box_beta(:,:) = 0
882      tmp_boxdist_beta(:,:) = 0.D0
883      !
884      IF ( .not.allocated( maxbox_beta ) ) ALLOCATE( maxbox_beta( nat ) )
885      !
886      maxbox_beta(:) = 0
887      !
888      ! The beta functions are treated on smooth grid
889      !
890      inv_nr1s = 1.D0 / dble( dffts%nr1 )
891      inv_nr2s = 1.D0 / dble( dffts%nr2 )
892      inv_nr3s = 1.D0 / dble( dffts%nr3 )
893      !
894      DO ia = 1, nat
895         !
896         IF ( .not. upf(ityp(ia))%tvanp ) CYCLE
897         !
898         boxrad_ia = boxrad_beta(ityp(ia))
899         boxradsq_ia = boxrad_ia**2
900         !
901         tau_ia(1) = tau(1,ia)
902         tau_ia(2) = tau(2,ia)
903         tau_ia(3) = tau(3,ia)
904         !
905         ! ... compute the needed ranges for i,j,k  indices around the atom position in crystal coordinates
906         !
907         posi(:) = tau_ia(:) ; CALL cryst_to_cart( 1, posi, bg, -1 )
908         imin = NINT((posi(1) - boxrad_ia * sqrt(bg(1,1)*bg(1,1)+bg(2,1)*bg(2,1)+bg(3,1)*bg(3,1)))*dffts%nr1)
909         imax = NINT((posi(1) + boxrad_ia * sqrt(bg(1,1)*bg(1,1)+bg(2,1)*bg(2,1)+bg(3,1)*bg(3,1)))*dffts%nr1)
910         jmin = NINT((posi(2) - boxrad_ia * sqrt(bg(1,2)*bg(1,2)+bg(2,2)*bg(2,2)+bg(3,2)*bg(3,2)))*dffts%nr2)
911         jmax = NINT((posi(2) + boxrad_ia * sqrt(bg(1,2)*bg(1,2)+bg(2,2)*bg(2,2)+bg(3,2)*bg(3,2)))*dffts%nr2)
912         kmin = NINT((posi(3) - boxrad_ia * sqrt(bg(1,3)*bg(1,3)+bg(2,3)*bg(2,3)+bg(3,3)*bg(3,3)))*dffts%nr3)
913         kmax = NINT((posi(3) + boxrad_ia * sqrt(bg(1,3)*bg(1,3)+bg(2,3)*bg(2,3)+bg(3,3)*bg(3,3)))*dffts%nr3)
914         if (tprint) then
915            write (*,*) tau_ia
916            write (*,*) posi
917            write (*,*) imin,imax,jmin,jmax,kmin,kmax
918         end if
919         DO k = kmin, kmax
920            kk = modulo(k,dffts%nr3) - dffts%my_i0r3p
921            if (kk .LT. 0 .OR. kk .ge. dffts%my_nr3p ) cycle
922            DO j = jmin, jmax
923               jj = modulo(j,dffts%nr2) - dffts%my_i0r2p
924               if (jj .LT. 0 .OR. jj .ge. dffts%my_nr2p ) cycle
925               DO i = imin, imax
926                  ii = modulo(i,dffts%nr1)
927                  !
928                  posi(:) = i * inv_nr1s*at(:,1) + j * inv_nr2s*at(:,2) + k * inv_nr3s*at(:,3) - tau_ia(:)
929                  !
930                  distsq = posi(1)**2 + posi(2)**2 + posi(3)**2
931                  IF ( distsq < boxradsq_ia ) THEN
932                     ! compute fft index ir from ii,jj,kk
933                     ir = 1 + ii + jj * dffts%nr1x + kk * dffts%nr1x * dffts%my_nr2p
934                     !
935                     mbia = maxbox_beta(ia) + 1
936                     !
937                     maxbox_beta(ia)     = mbia
938                     tmp_box_beta(mbia,ia) = ir
939                     tmp_boxdist_beta(mbia,ia)   = sqrt( distsq )*alat
940                     tmp_xyz_beta(:,mbia,ia) = posi(:)*alat
941                     !
942                  ENDIF
943               END DO
944            END DO
945         END DO
946         if (tprint) WRITE (*,*) 'BETAPOINTLIST: ATOM ',ia, ' MAXBOX_BETA =', maxbox_beta(ia)
947      ENDDO
948      !
949      CALL beta_box_breaking (nat, maxbox_beta)
950      !
951      boxtot = sum(maxbox_beta(1:nat))
952      WRITE( stdout,* )  'BOXTOT', boxtot
953
954      IF (.not. ALLOCATED( box0  ) ) ALLOCATE ( box0  ( nat ) )
955      IF (.not. ALLOCATED( box_s ) ) ALLOCATE ( box_s ( nat ) )
956      IF (.not. ALLOCATED( box_e ) ) ALLOCATE ( box_e ( nat ) )
957      box0(1) = 0 ; box_s(1) = 1 ; box_e(1) = maxbox_beta(1)
958      do ia =2,nat
959         box0 (ia) = box_e(ia-1) ; box_s(ia) = box0(ia) + 1 ;  box_e(ia) = box0(ia) + maxbox_beta(ia)
960      end do
961      !
962      ! ... now store them in a more convenient place
963      !
964      IF ( allocated( box_psic ) )     DEALLOCATE( box_psic )
965      IF ( allocated( xyz_beta ) )     DEALLOCATE( xyz_beta )
966      IF ( allocated( box_beta ) )     DEALLOCATE( box_beta )
967      IF ( allocated( xkphase ) )      DEALLOCATE( xkphase )
968      !
969      ALLOCATE( box_psic    ( boxtot ) )
970      ALLOCATE( box_beta    ( boxtot ) )
971      ALLOCATE( xyz_beta ( 3, boxtot ) )
972      ALLOCATE( boxdist_beta( boxtot ) )
973      ALLOCATE( xkphase     ( boxtot ) )
974      !
975      do ia =1,nat
976         xyz_beta ( :, box_s(ia):box_e(ia) ) =  tmp_xyz_beta(:,1:maxbox_beta(ia),ia)
977         box_beta (    box_s(ia):box_e(ia) ) =  tmp_box_beta(  1:maxbox_beta(ia),ia)
978         boxdist_beta( box_s(ia):box_e(ia) ) =  tmp_boxdist_beta(       1:maxbox_beta(ia),ia)
979      end do
980      !
981      call set_xkphase(1)
982      !
983      DEALLOCATE( tmp_xyz_beta )
984      DEALLOCATE( tmp_box_beta )
985      DEALLOCATE( tmp_boxdist_beta )
986      !
987      CALL stop_clock( 'realus:boxes' )
988      CALL start_clock( 'realus:spher' )
989      !
990      ! ... now it computes the spherical harmonics
991      !
992      lamx2 = lmaxq*lmaxq
993      !
994      ALLOCATE( spher_beta( boxtot, lamx2 ) ) ! ( boxtot,lmax2 )
995      !
996      spher_beta(:,:) = 0.D0
997      !
998      DO ia = 1, nat
999         !
1000         IF ( .not. upf(ityp(ia))%tvanp ) CYCLE
1001         !
1002         idimension = maxbox_beta(ia)
1003         ALLOCATE( rl( 3, idimension ), rl2( idimension ) )
1004         !
1005         DO ir = 1, idimension
1006            rl(:,ir) = xyz_beta(:,box0(ia)+ir)
1007            rl2(ir) = rl(1,ir)**2 + rl(2,ir)**2 + rl(3,ir)**2
1008         ENDDO
1009         !
1010         ALLOCATE( tempspher( idimension, lamx2 ) )
1011         CALL ylmr2( lamx2, idimension, rl, rl2, tempspher )
1012         spher_beta(box_s(ia):box_e(ia),:) = tempspher(:,:)
1013         DEALLOCATE( rl, rl2, tempspher )
1014         !
1015      ENDDO
1016      !
1017      if (gamma_only) DEALLOCATE( xyz_beta )
1018      !
1019      CALL stop_clock( 'realus:spher' )
1020      CALL start_clock( 'realus:tabp' )
1021      !
1022      ! ... let's do the main work
1023      !
1024      ALLOCATE( betasave( boxtot, nhm )  )
1025      !
1026      betasave = 0.D0
1027      ! Box is set, Y_lm is known in the box, now the calculation can commence
1028      ! Reminder: In real space
1029      ! |Beta_lm(r)>=f_l(r).Y_lm(r)
1030      ! In q space (calculated in init_us_1 and then init_us_2 )
1031      ! |Beta_lm(q)>= (4pi/omega).Y_lm(q).f_l(q).(i^l).S(q)
1032      ! Where
1033      ! f_l(q)=\int_0 ^\infty dr r^2 f_l (r) j_l(q.r)
1034      !
1035      ! We know f_l(r) and Y_lm(r) for certain points,
1036      ! basically we interpolate the known values to new mesh using splint
1037      !
1038      DO ia = 1, nat
1039         !
1040         mbia = maxbox_beta(ia)
1041         IF ( mbia == 0 ) CYCLE
1042         !
1043         nt = ityp(ia)
1044         IF ( .not. upf(nt)%tvanp ) CYCLE
1045         !
1046         ALLOCATE( qtot( upf(nt)%kkbeta, upf(nt)%nbeta, upf(nt)%nbeta ) )
1047         !
1048         ! ... variables used for spline interpolation
1049         !
1050         ALLOCATE( xsp( upf(nt)%kkbeta ), ysp( upf(nt)%kkbeta ), wsp( upf(nt)%kkbeta ) )
1051         !
1052         ! ... the radii in x
1053         !
1054         xsp(:) = rgrid(nt)%r(1:upf(nt)%kkbeta)
1055         !
1056         DO ih = 1, nh (nt)
1057            !
1058            lm = nhtolm(ih, nt)
1059            nb = indv(ih, nt)
1060            !
1061            !OBM rgrid(nt)%r(1) == 0, attempting correction
1062            ! In the UPF file format, beta field is r*|beta>
1063            IF (rgrid(nt)%r(1)==0) THEN
1064             ysp(2:) = upf(nt)%beta(2:upf(nt)%kkbeta,nb) / rgrid(nt)%r(2:upf(nt)%kkbeta)
1065             ysp(1)=0.d0
1066            ELSE
1067             ysp(:) = upf(nt)%beta(1:upf(nt)%kkbeta,nb) / rgrid(nt)%r(1:upf(nt)%kkbeta)
1068            ENDIF
1069
1070            ALLOCATE( d1y(upf(nt)%kkbeta), d2y(upf(nt)%kkbeta) )
1071            CALL radial_gradient(ysp(1:upf(nt)%kkbeta), d1y, &
1072                                 rgrid(nt)%r, upf(nt)%kkbeta, 1)
1073            CALL radial_gradient(d1y, d2y, rgrid(nt)%r, upf(nt)%kkbeta, 1)
1074
1075            first = d1y(1) ! first derivative in first point
1076            second =d2y(1) ! second derivative in first point
1077            DEALLOCATE( d1y, d2y )
1078
1079            CALL spline( xsp, ysp, first, second, wsp )
1080
1081            DO box_ir = box_s(ia), box_e(ia)
1082               !
1083               ! ... spline interpolation
1084               !
1085               qtot_int = splint( xsp, ysp, wsp, boxdist_beta(box_ir) ) !the value of f_l(r) in point ir in atom ia
1086               !
1087               betasave(box_ir,ih) = qtot_int*spher_beta(box_ir,lm) !spher_beta is the Y_lm in point ir for atom ia
1088               !
1089            ENDDO
1090         ENDDO
1091         !
1092         DEALLOCATE( qtot )
1093         DEALLOCATE( xsp )
1094         DEALLOCATE( ysp )
1095         DEALLOCATE( wsp )
1096         !
1097      ENDDO
1098      !
1099      DEALLOCATE( boxdist_beta )
1100      DEALLOCATE( spher_beta )
1101      !
1102      CALL stop_clock( 'realus:tabp' )
1103      CALL stop_clock( 'betapointlist' )
1104      !
1105    END SUBROUTINE betapointlist
1106    !
1107    SUBROUTINE beta_box_breaking (nat, maxbox_beta)
1108      !! This routine determines whether it is needed to re-shuffle the
1109      !! beta point grids in real space.
1110      USE mp_bands,   ONLY : me_bgrp, intra_bgrp_comm, nproc_bgrp
1111      USE mp,         ONLY : mp_sum, mp_comm_split
1112      !
1113      IMPLICIT NONE
1114      !
1115      INTEGER, INTENT(IN)  :: nat
1116      INTEGER, INTENT(IN)  :: maxbox_beta(nat)
1117      REAL(DP)             :: boxtot_avg, boxtot_unbalance, opt_boxtot_unbalance
1118      INTEGER              :: in, nn, ip, np, ncheck, dd, boxtot_max, opt_nn, my_color
1119      INTEGER, ALLOCATABLE :: ind(:), color(:), data(:), boxtot_beta(:)
1120      !
1121      REAL(DP), PARAMETER  :: tollerable_unbalance = 1.25D0
1122      INTEGER, PARAMETER   :: maximum_nn = 6
1123      !
1124!      WRITE (*,*) ' me_bgrp ', me_bgrp ; FLUSH(6)
1125!      WRITE (*,*)  ' BETAPOINT LIST', maxbox_beta(:) ; FLUSH(6)
1126      ALLOCATE ( boxtot_beta ( nproc_bgrp ) )
1127      boxtot_beta (:) = 0 ; boxtot_beta ( me_bgrp+1 ) = SUM(maxbox_beta(1:nat))
1128      CALL mp_sum(  boxtot_beta, intra_bgrp_comm )
1129
1130      boxtot_avg = SUM ( boxtot_beta ( 1:nproc_bgrp ) ) / dble ( nproc_bgrp )
1131      boxtot_max = MAXVAL( boxtot_beta ( 1:nproc_bgrp ) )
1132      boxtot_unbalance =  boxtot_max / boxtot_avg
1133      WRITE ( stdout, * ) 'BETAPOINTLIST SUMMARY: ', boxtot_max , boxtot_avg, boxtot_unbalance ; FLUSH(6)
1134      WRITE ( stdout, * ) ' BETATOT LIST', boxtot_beta(:) ; FLUSH(6)
1135
1136      nn = 1; opt_nn = 1 ; opt_boxtot_unbalance = boxtot_unbalance
1137
1138      ALLOCATE ( ind(nproc_bgrp), color(nproc_bgrp),  data(nproc_bgrp) )
1139      data(:) = - boxtot_beta(:) ; ind(1) = 0
1140      call ihpsort ( nproc_bgrp, data, ind )
1141
1142      DO WHILE ( boxtot_unbalance > tollerable_unbalance .and. nn < maximum_nn )
1143
1144         nn = nn + 1
1145
1146         np = nproc_bgrp / nn ; IF ( nproc_bgrp .ne. np * nn  ) CYCLE
1147
1148!         WRITE ( stdout, * ) 'CHECKING nn =', nn, np
1149!         WRITE ( stdout, * ) ind(:) ; FLUSH(6)
1150!         WRITE ( stdout, * ) data(:) ; FLUSH(6)
1151         color(ind(1:np)) = ind(1:np)
1152         DO in = nn, 2, -1
1153            DO ip = 1, np
1154               data(ip) = data(ip) + data (in*np+1-ip)
1155               color(ind(in*np+1-ip)) = ind(ip)
1156            END DO
1157            call ihpsort ( np, data, ind )
1158!            WRITE ( stdout, * ) ind(:) ; FLUSH(6)
1159!            WRITE ( stdout, * ) data(:) ; FLUSH(6)
1160!            WRITE ( stdout, * ) color(:) ; FLUSH(6)
1161         END DO
1162         boxtot_unbalance =  -data(1)/(boxtot_avg*nn)
1163         WRITE ( stdout, * ) nn, ' is a factor ',-data(1), -data(1)/boxtot_avg, boxtot_unbalance ; FLUSH(6)
1164         IF  ( boxtot_unbalance < opt_boxtot_unbalance ) THEN
1165            opt_boxtot_unbalance = boxtot_unbalance ; opt_nn = nn ; my_color = color(me_bgrp+1)
1166         END IF
1167         DO ip = 1, np
1168            data(ip) = -boxtot_beta(ind(ip))
1169         END DO
1170         DO ip = 1, np
1171            WRITE ( stdout, '(A,i4,A,$)' ) ' color ', color(ind(ip)), ':'  ; FLUSH(6)
1172            dd = 0 ; ncheck = 0
1173            DO in = 1, nproc_bgrp
1174               IF ( color(in) == color(ind(ip)) ) THEN
1175                  WRITE (stdout, '(i4,$)' ) in ; FLUSH(6)
1176                  dd = dd + boxtot_beta(in)
1177                  ncheck = ncheck + 1
1178               END IF
1179            END DO
1180            WRITE ( stdout, * ) ' ', dd ; FLUSH(6)
1181            if (ncheck .ne. nn) WRITE(*,*) '!!!! something wrong ', ncheck, nn ; FLUSH(6)
1182         END DO
1183
1184      END DO
1185      DEALLOCATE ( data, color, ind, boxtot_beta )
1186
1187      IF (nn > 1 ) THEN
1188         CALL mp_comm_split( intra_bgrp_comm, my_color, me_bgrp, intra_bbox_comm )
1189      ELSE
1190         intra_bbox_comm = 0
1191      END IF
1192
1193      RETURN
1194      !
1195    END SUBROUTINE beta_box_breaking
1196    !
1197    !------------------------------------------------------------------------
1198    SUBROUTINE newq_r( vr,deeq, skip_vltot )
1199      !-----------------------------------------------------------------------
1200      !! This routine computes the integral of the perturbed potential with
1201      !! the \(Q\) function in real space.
1202      !
1203      USE cell_base,        ONLY : omega
1204      USE fft_base,         ONLY : dfftp
1205      USE lsda_mod,         ONLY : nspin
1206      USE ions_base,        ONLY : nat, ityp
1207      USE uspp_param,       ONLY : upf, nh, nhm
1208      USE uspp,             ONLY : ijtoh
1209      USE control_flags,    ONLY : tqr
1210      USE noncollin_module, ONLY : nspin_mag
1211      USE scf,              ONLY : vltot
1212      USE mp_bands,         ONLY : intra_bgrp_comm
1213      USE mp,               ONLY : mp_sum
1214      !
1215      IMPLICIT NONE
1216      !
1217      REAL(DP), INTENT(IN) :: vr(dfftp%nnr,nspin)
1218      !! The input potential
1219      REAL(DP), INTENT(OUT) :: deeq(nhm,nhm,nat,nspin)
1220      !! Contribution to the integral
1221      LOGICAL, INTENT(in) :: skip_vltot
1222      !! If .FALSE. \(\text{vltot}\) is added to \(\text{vr}\) when necessary
1223      !
1224      ! ... local variables
1225      !
1226      REAL(DP), ALLOCATABLE :: aux(:)
1227      !
1228      INTEGER :: ia, ih, jh, is, ir, nt
1229      INTEGER :: mbia
1230      !
1231      IF (tqr .and. .not. associated(tabp)) THEN
1232         CALL generate_qpointlist()
1233      ENDIF
1234
1235      deeq(:,:,:,:) = 0.D0
1236      !
1237      ALLOCATE( aux( dfftp%nnr ) )
1238      !
1239      DO is = 1, nspin_mag
1240         !
1241         IF ( (nspin_mag == 4 .and. is /= 1) .or. skip_vltot ) THEN
1242            aux(:) = vr(:,is)
1243         ELSE
1244            aux(:) = vltot(:) + vr(:,is)
1245         ENDIF
1246         !
1247         DO ia = 1, nat
1248            !
1249            mbia = tabp(ia)%maxbox
1250            IF ( mbia == 0 ) CYCLE
1251            !
1252            nt = ityp(ia)
1253            IF ( .not. upf(nt)%tvanp ) CYCLE
1254            !
1255            DO ih = 1, nh(nt)
1256               DO jh = ih, nh(nt)
1257                  DO ir = 1, mbia
1258                     deeq(ih,jh,ia,is)= deeq(ih,jh,ia,is) + &
1259                        tabp(ia)%qr(ir,ijtoh(ih,jh,nt))*aux(tabp(ia)%box(ir))
1260                  ENDDO
1261                  deeq(jh,ih,ia,is) = deeq(ih,jh,ia,is)
1262               ENDDO
1263            ENDDO
1264         ENDDO
1265      ENDDO
1266      !
1267      deeq(:,:,:,:) = deeq(:,:,:,:)*omega/(dfftp%nr1*dfftp%nr2*dfftp%nr3)
1268      DEALLOCATE( aux )
1269      CALL mp_sum(  deeq(:,:,:,1:nspin_mag) , intra_bgrp_comm )
1270      !
1271    END SUBROUTINE newq_r
1272    !
1273    !------------------------------------------------------------------------
1274    SUBROUTINE addusdens_r( rho )
1275      !------------------------------------------------------------------------
1276      !! This routine adds to the charge density the part which is due to
1277      !! the US augmentation, in real space.
1278      !
1279      USE ions_base,        ONLY : nat, ityp
1280      USE uspp,             ONLY : okvan, becsum
1281      USE uspp_param,       ONLY : upf, nh
1282      USE noncollin_module, ONLY : nspin_mag
1283      USE fft_interfaces,   ONLY : fwfft
1284      USE fft_base,         ONLY : dfftp
1285      USE wavefunctions,    ONLY : psic
1286#if defined (__DEBUG)
1287      USE noncollin_module, ONLY : nspin_lsda
1288      USE constants,        ONLY : eps6
1289      USE klist,            ONLY : nelec
1290      USE cell_base,        ONLY : omega
1291      USE mp_bands,         ONLY : intra_bgrp_comm
1292      USE mp,               ONLY : mp_sum
1293      USE gvect,            ONLY : gstart
1294#endif
1295      !
1296      IMPLICIT NONE
1297      ! The charge density to be augmented (in G-space)
1298      COMPLEX(kind=dp), INTENT(inout) :: rho(dfftp%ngm,nspin_mag)
1299      !
1300      INTEGER  :: ia, nt, ir, irb, ih, jh, ijh, is, mbia
1301      REAL(kind=dp), ALLOCATABLE :: rhor(:,:)
1302#if defined (__DEBUG)
1303      CHARACTER(len=80) :: msg
1304      REAL(kind=dp) :: charge
1305#endif
1306      !
1307      !
1308      IF ( .not. okvan ) RETURN
1309      !
1310      CALL start_clock( 'addusdens' )
1311      !
1312      ALLOCATE ( rhor(dfftp%nnr,nspin_mag) )
1313      rhor(:,:) = 0.0_dp
1314      DO is = 1, nspin_mag
1315         !
1316         DO ia = 1, nat
1317            !
1318            mbia = tabp(ia)%maxbox
1319            IF ( mbia == 0 ) CYCLE
1320            !
1321            nt = ityp(ia)
1322            IF ( .not. upf(nt)%tvanp ) CYCLE
1323            !
1324            ijh = 0
1325            DO ih = 1, nh(nt)
1326               DO jh = ih, nh(nt)
1327                  ijh = ijh + 1
1328                  DO ir = 1, mbia
1329                     irb = tabp(ia)%box(ir)
1330                     rhor(irb,is) = rhor(irb,is) + tabp(ia)%qr(ir,ijh)*becsum(ijh,ia,is)
1331                  ENDDO
1332               ENDDO
1333            ENDDO
1334         ENDDO
1335         !
1336      ENDDO
1337      !
1338      !
1339      DO is = 1, nspin_mag
1340         psic(:) = rhor(:,is)
1341         CALL fwfft ('Rho', psic, dfftp)
1342         rho(:,is) = rho(:,is) + psic(dfftp%nl(:))
1343      END DO
1344      !
1345      DEALLOCATE ( rhor )
1346#if defined (__DEBUG)
1347      !
1348      ! ... check the total charge (must not be summed on k-points)
1349      !
1350      IF ( gstart == 2) THEN
1351         charge = SUM(rho(1,1:nspin_lsda) )*omega
1352      ELSE
1353         charge = 0.0_dp
1354      ENDIF
1355      CALL mp_sum(  charge , intra_bgrp_comm )
1356      write (stdout,*) 'charge before rescaling ', charge
1357      IF ( abs(charge - nelec) > MAX(eps6,eps6*ABS(nelec)) ) THEN
1358         !
1359         ! ... the error on the charge is too large, stop and complain
1360         !
1361         WRITE (msg,'("expected ",f10.6,", found ",f10.6)') nelec, charge
1362         CALL errore( 'addusdens_r', 'WRONG CHARGE '//trim(msg)//&
1363              ': ions may be overlapping or increase ecutrho', 1 )
1364      ENDIF
1365      !
1366#endif
1367      CALL stop_clock( 'addusdens' )
1368      !
1369      RETURN
1370      !
1371    END SUBROUTINE addusdens_r
1372    !
1373    !----------------------------------------------------------------------
1374    SUBROUTINE addusforce_r( forcenl )
1375      !----------------------------------------------------------------------
1376      !! This routine computes the contribution to atomic forces due
1377      !! to the dependence of the \(Q\) function on the atomic position.
1378      !! \begin{equation}\notag
1379      !! \begin{split}
1380      !!    F_{j,at} = &-\int \sum_{lm} \frac{dQ_{lm}(r)}{d\tau_{at}}
1381      !!                 \text{becsum}(lm,at)\ V(r)\ dr \\
1382      !!               &+\int \sum_{lm} \frac{dQ_{lm}(r)}{d\tau_{at}}
1383      !!                 \text{ebecsum}(lm,at)\ dr
1384      !! \end{split}
1385      !! \end{equation}
1386      !! where:
1387      !! \begin{equation}\notag
1388      !! \begin{split}
1389      !!    &\text{becsum}(lm,at) = \sum_i \langle\psi_i|\beta_l\rangle w_i
1390      !!                                   \langle\beta_m|\psi_i\rangle, \\
1391      !!    &\text{ebecsum}(lm,at) = \sum_i\langle\psi_i|\beta_l\rangle w_i
1392      !!                                   \langle\beta_m|\psi_i\rangle et_i
1393      !! \end{split}
1394      !! \end{equation}
1395      !! On output the contribution is added to \(\text{forcenl}\).
1396      !
1397      USE kinds,      ONLY : DP
1398      USE cell_base,  ONLY : omega
1399      USE ions_base,  ONLY : nat, ityp
1400      USE fft_base,   ONLY : dfftp
1401      USE noncollin_module,   ONLY : nspin_mag
1402      USE scf,        ONLY : v, vltot
1403      USE uspp,       ONLY : becsum, ebecsum, okvan
1404      USE uspp_param, ONLY : upf,  nh
1405      USE mp_bands,   ONLY : intra_bgrp_comm
1406      USE mp,         ONLY : mp_sum
1407      !
1408      IMPLICIT NONE
1409      !
1410      REAL(DP), INTENT(INOUT) :: forcenl (3, nat)
1411      !
1412      INTEGER :: na, nt, ijh, nfuncs, mbia, ir, is, irb
1413      REAL(dp), ALLOCATABLE:: dqr(:,:,:), forceq(:,:)
1414      REAL(dp) :: dqrforce(3), dqb(3), dqeb(3), v_eff
1415      !
1416      IF (.not.okvan) RETURN
1417      !
1418      ALLOCATE ( forceq(3,nat) )
1419      forceq(:,:) = 0.0_dp
1420      !
1421      DO na = 1, nat
1422         nt = ityp(na)
1423         IF ( .NOT. upf(nt)%tvanp ) CYCLE
1424         !
1425         dqrforce(:) = 0.0_dp
1426         !
1427         mbia = tabp(na)%maxbox
1428         IF ( mbia == 0 ) CYCLE
1429!         write (stdout,*) ' inside addusforce na, mbia ', na,mbia
1430         nfuncs = nh(nt)*(nh(nt)+1)/2
1431         ALLOCATE ( dqr(mbia,nfuncs,3) )
1432         CALL real_space_dq( nt, na, mbia, nfuncs, dqr )
1433         !
1434         DO ir = 1, mbia
1435            irb = tabp(na)%box(ir)
1436
1437            DO is = 1, nspin_mag
1438               dqb = 0.d0; dqeb = 0.d0
1439               do ijh =1, nfuncs
1440                  dqb (:) = dqb (:) +  dqr(ir,ijh,:) *  becsum(ijh,na,is)
1441                  dqeb(:) = dqeb(:) +  dqr(ir,ijh,:) * ebecsum(ijh,na,is)
1442               end do
1443               v_eff = vltot(irb) + v%of_r(irb,is) ;  IF (nspin_mag==4.and.is/=1) v_eff = v%of_r(irb,is)
1444               dqrforce(:) = dqrforce(:) + dqb(:) * v_eff - dqeb(:)
1445            ENDDO
1446
1447         ENDDO
1448         DEALLOCATE ( dqr )
1449         !
1450         forceq(:,na) = - dqrforce(:) * omega / (dfftp%nr1*dfftp%nr2*dfftp%nr3)
1451      ENDDO
1452      CALL mp_sum ( forceq, intra_bgrp_comm )
1453      !
1454      forcenl(:,:) = forcenl(:,:) + forceq(:,:)
1455      !
1456      DEALLOCATE (forceq )
1457      !
1458      RETURN
1459    END SUBROUTINE addusforce_r
1460    !
1461    !----------------------------------------------------------------------
1462    SUBROUTINE addusstress_r( sigmanl )
1463      !---------------------------------------------------------------------
1464      !! This routine computes the contribution to atomic stress due
1465      !! to the dependence of the Q function on the atomic position.
1466      !! \begin{equation}\notag
1467      !! \begin{split}
1468      !!    S_{i,j} = &+\int \sum_{lm} \left[r_i \frac{dQ_{lm}(r)}{d\tau_{at_j}} +
1469      !!                 Q_{lm}(r)\right] \text{becsum}(lm,at)\ V(ir)\ dr \\
1470      !!              &-\int \sum_{lm} \left[r_i \frac{dQ_{lm}(r)}{d\tau_{at_j}} +
1471      !!                 Q_{lm}(r)\right] \text{ebecsum}(lm,at)\ dr
1472      !! \end{split}
1473      !! \end{equation}
1474      !! where:
1475      !! \begin{equation}\notag
1476      !! \begin{split}
1477      !!    &\text{becsum}(lm,at) = \sum_i \langle\psi_i|\beta_l\rangle w_i
1478      !!                                   \langle\beta_m|\psi_i\rangle, \\
1479      !!    &\text{ebecsum}(lm,at) = \sum_i\langle\psi_i|\beta_l\rangle w_i
1480      !!                                   \langle\beta_m|\psi_i\rangle et_i
1481      !! \end{split}
1482      !! \end{equation}
1483      !! On output the contribution is added to \(\text{stressnl}\).
1484      !! NB: a factor -1.0/omega is later applied to \(\text{stressnl}\) as a whole.
1485      !
1486      ! FOR REASONS I DON'T UNDERSTAND THE SECOND TERM APPEARS TO NEED THE + SIGN TOO
1487      !
1488      USE kinds,      ONLY : DP
1489      USE cell_base,  ONLY : omega
1490      USE ions_base,  ONLY : nat, ityp
1491      USE fft_base,   ONLY : dfftp
1492      USE noncollin_module,  ONLY : nspin_mag
1493      USE scf,        ONLY : v, vltot
1494      USE uspp,       ONLY : becsum, ebecsum, okvan
1495      USE uspp_param, ONLY : upf,  nh
1496!      USE mp_bands,   ONLY : intra_bgrp_comm
1497!      USE mp,         ONLY : mp_sum
1498      !
1499      IMPLICIT NONE
1500      !
1501      REAL(DP), INTENT(INOUT) :: sigmanl (3,3)
1502      !
1503      INTEGER :: ipol, na, nt, nfuncs, mbia, ir, is, irb , ijh
1504      REAL(dp), ALLOCATABLE:: dqr(:,:,:)
1505      REAL(dp) :: sus(3,3), sus_at(3,3), qb, qeb, dqb(3), dqeb(3), v_eff
1506      !
1507      IF (.not.okvan) RETURN
1508      !
1509      sus(:,:) = 0.0_dp
1510      !
1511      DO na = 1, nat
1512         nt = ityp(na)
1513         IF ( .NOT. upf(nt)%tvanp ) CYCLE
1514         !
1515         mbia = tabp(na)%maxbox
1516!         write (stdout,*) ' inside addusstress na, mbia ', na,mbia
1517         nfuncs = nh(nt)*(nh(nt)+1)/2
1518         ALLOCATE ( dqr(mbia,nfuncs,3) )
1519         CALL real_space_dq( nt, na, mbia, nfuncs, dqr )
1520         !
1521         sus_at(:,:) = 0.0_dp
1522         DO ir = 1, mbia
1523            irb = tabp(na)%box(ir)
1524            DO is = 1, nspin_mag
1525               qb = 0.d0; qeb =0.d0; dqb = 0.d0; dqeb = 0.d0
1526               do ijh =1, nfuncs
1527                  qb      = qb      +  tabp(na)%qr(ir,ijh) *  becsum(ijh,na,is)
1528                  qeb     = qeb     +  tabp(na)%qr(ir,ijh) * ebecsum(ijh,na,is)
1529                  dqb (:) = dqb (:) +  dqr(ir,ijh,:) *  becsum(ijh,na,is)
1530                  dqeb(:) = dqeb(:) +  dqr(ir,ijh,:) * ebecsum(ijh,na,is)
1531               end do
1532               v_eff = vltot(irb) + v%of_r(irb,is) ;  IF (nspin_mag==4.and.is/=1) v_eff = v%of_r(irb,is)
1533               do ipol=1, 3
1534                  sus_at(:, ipol) = sus_at(:,ipol) - tabp(na)%xyz(:,ir) * ( dqb(ipol) * v_eff + dqeb(ipol) )
1535                  sus_at(ipol,ipol) = sus_at(ipol,ipol) +                 ( qb * v_eff + qeb )
1536               end do
1537            END DO
1538         ENDDO
1539         DEALLOCATE ( dqr )
1540
1541         sus (:,:) = sus(:,:) + sus_at(:,:)
1542
1543      ENDDO
1544      !
1545!      CALL mp_sum ( sus, intra_bgrp_comm )
1546      sus(:,:) = sus(:,:) * omega / (dfftp%nr1*dfftp%nr2*dfftp%nr3)
1547      !
1548      sigmanl(:,:) = sigmanl(:,:) + sus(:,:)
1549      !
1550      RETURN
1551    END SUBROUTINE addusstress_r
1552    !
1553    !------------------------------------------------------------------------
1554    SUBROUTINE set_xkphase( ik )
1555    !--------------------------------------------------------------------------
1556    !! In the calculation of \(\text{becp}\) or when performing \(\texttt{add_vuspsir}\)
1557    !! the wavefunction \(\text{psi_k}\) and not its periodic part (which is what
1558    !! we get from the FFT) should be used.
1559    !! A k-dependent phase \(\text{exp}[-xk(\text{current_k}\cdot(r-\tau_\text{ia}))]\) must
1560    !! be added.
1561    !
1562    USE kinds,      ONLY : DP
1563    USE klist,      ONLY : xk
1564    USE cell_base,  ONLY : tpiba
1565
1566    IMPLICIT NONE
1567
1568    INTEGER, INTENT (IN) :: ik
1569
1570    INTEGER :: box_ir
1571    REAL(DP) :: arg
1572
1573    if (.not.allocated ( xkphase ) ) call errore ('set_xkphase',' array not allocated yes',1)
1574    if (ik .eq. current_phase_kpoint ) return
1575    !
1576    !$omp parallel do
1577    do box_ir =1, boxtot
1578       arg = ( xk(1,ik) * xyz_beta(1,box_ir) + &
1579               xk(2,ik) * xyz_beta(2,box_ir) + &
1580               xk(3,ik) * xyz_beta(3,box_ir) ) * tpiba
1581       xkphase( box_ir ) = CMPLX(COS(arg),-SIN(arg),KIND=dp)
1582    end do
1583    !$omp end parallel do
1584    !
1585    current_phase_kpoint = ik
1586    !
1587    return
1588    END SUBROUTINE set_xkphase
1589
1590    !--------------------------------------------------------------------------
1591    SUBROUTINE calbec_rs_gamma( ibnd, last, becp_r )
1592      !--------------------------------------------------------------------------
1593      !! Calculates \(\text{becp_r}\) in real space.
1594      !
1595      !! * Requires \(\text{betasave}\), the beta functions at real space calculated
1596      !!   by \(\texttt{betapointlist()}\);
1597      !! * \(\text{ibnd}\) is an index that runs over the number of bands, which
1598      !!   is given by m, so you have to call this subroutine inside a cycle with
1599      !!   index \(\text{ibnd}\);
1600      !! * In this cycle you have to perform a Fourier transform of the orbital
1601      !!   corresponding to \(\text{ibnd}\), namely you have to transform the orbital
1602      !!   to real space and store it in the global variable \(\text{psic}\);
1603      !! * Remember that in the gamma_only case you perform two FFT at the same time,
1604      !!   so you have that the real part corresponds to \(\text{ibnd}\), and the
1605      !!   imaginary part to \(\text{ibnd}+1\).
1606      !
1607      !! WARNING: For the sake of speed, there are no checks performed in this
1608      !!          routine, check beforehand!
1609      !
1610      !! Subroutine written by Dario Rocca, Stefano de Gironcoli, modified by O.
1611      !! Baris Malcioglu.
1612      !! Some speedup and restrucuring by SdG April 2020.
1613      !!
1614      !
1615    USE kinds,                 ONLY : DP
1616    USE cell_base,             ONLY : omega
1617    USE wavefunctions,         ONLY : psic
1618    USE ions_base,             ONLY : nat, nsp, ityp
1619    USE uspp_param,            ONLY : nh
1620    USE fft_base,              ONLY : dffts
1621    USE mp_bands,              ONLY : intra_bgrp_comm
1622    USE mp,                    ONLY : mp_sum
1623    USE uspp,                  ONLY : indv_ijkb0
1624    !
1625    IMPLICIT NONE
1626    !
1627    INTEGER, INTENT(in) :: ibnd, last
1628    INTEGER :: ikb, nt, ia, ih, mbia
1629    REAL(DP) :: fac
1630    REAL(DP), ALLOCATABLE, DIMENSION(:) :: wr, wi
1631    REAL(DP) :: bcr, bci
1632    REAL(DP), DIMENSION(:,:), INTENT(out) :: becp_r
1633    integer :: ir, box_ir, maxbox, ijkb0, nh_nt
1634    !
1635    REAL(DP), EXTERNAL :: ddot
1636    !
1637    !
1638    CALL start_clock( 'calbec_rs' )
1639    !
1640    IF( dffts%has_task_groups ) CALL errore( 'calbec_rs_gamma', 'task_groups not implemented', 1 )
1641
1642    fac = sqrt(omega) / (dffts%nr1*dffts%nr2*dffts%nr3)
1643    !
1644    maxbox = MAXVAL(maxbox_beta(1:nat))
1645    !
1646    becp_r(:,ibnd)=0.d0
1647    IF ( ibnd+1 <= last ) becp_r(:,ibnd+1)=0.d0
1648    ! Clearly for an odd number of bands for ibnd=nbnd=last you don't have
1649    ! anymore bands, and so the imaginary part equal zero
1650    !
1651!
1652! copy psic into a box-friendly array (and scatter it across intra_bbox_comm if needed)
1653!
1654    !$omp parallel do
1655    DO box_ir =1, boxtot
1656       box_psic ( box_ir ) = psic( box_beta( box_ir ) )
1657    END DO
1658    !$omp end parallel do
1659
1660    ALLOCATE( wr(maxbox), wi(maxbox) )
1661    ! working arrays to order the points in the clever way
1662    DO nt = 1, nsp
1663       !
1664       nh_nt = nh(nt)
1665       !
1666       DO ia = 1, nat
1667          !
1668          IF ( ityp(ia) == nt ) THEN
1669             !
1670             mbia = maxbox_beta(ia) ; IF ( mbia == 0 ) CYCLE
1671             !
1672             ijkb0 = indv_ijkb0(ia)
1673             !$omp parallel default(shared) private(ih,ikb,ir,bcr,bci)
1674             !$omp do
1675             DO ir =1, mbia
1676                wr(ir) = dble ( box_psic( box0(ia)+ir) )
1677             END DO
1678             !$omp end do
1679             !$omp do
1680             DO ih = 1, nh_nt
1681                !
1682                ikb = ijkb0 + ih
1683                bcr = ddot( mbia, betasave(box_s(ia):box_e(ia),ih), 1, wr(:) , 1 )
1684                becp_r(ikb,ibnd)   = fac * bcr
1685                !
1686             ENDDO
1687             !$omp end do nowait
1688             IF ( ibnd+1 <= last ) THEN
1689                !$omp do
1690                DO ir =1, mbia
1691                   wi(ir) = aimag( psic( box_beta(box0(ia)+ir) ) )
1692                END DO
1693                !$omp end do
1694                !$omp do
1695                DO ih = 1, nh_nt
1696                   !
1697                   ikb = ijkb0 + ih
1698                   bci = ddot( mbia, betasave(box_s(ia):box_e(ia),ih), 1, wi(:) , 1 )
1699                   becp_r(ikb,ibnd+1) = fac * bci
1700                   !
1701                ENDDO
1702                !$omp end do
1703             END IF
1704             !$omp end parallel
1705             !
1706          ENDIF
1707          !
1708       ENDDO
1709       !
1710    ENDDO
1711    DEALLOCATE( wr, wi )
1712    !
1713    CALL mp_sum( becp_r( :, ibnd ), intra_bgrp_comm )
1714    IF ( ibnd+1 <= last ) CALL mp_sum( becp_r( :, ibnd+1 ), intra_bgrp_comm )
1715    CALL stop_clock( 'calbec_rs' )
1716    !
1717    RETURN
1718
1719  END SUBROUTINE calbec_rs_gamma
1720    !
1721    !-------------------------------------------------------------------------------
1722    SUBROUTINE calbec_rs_k( ibnd, last )
1723    !------------------------------------------------------------------------------
1724    !! The k-point generalised version of \(\texttt{calbec_rs_gamma}\). Basically
1725    !! same as above, but \(\text{becp}\) is used instead of \(\text{becp_r}\),
1726    !! skipping the gamma point reduction derived from above by OBM 051108.
1727    !
1728    !! k-point phase factor fixed by SdG 030816.
1729    !! Some speedup and restrucuring by SdG April 2020.
1730    !
1731    USE kinds,                 ONLY : DP
1732    USE wvfct,                 ONLY : current_k
1733    USE cell_base,             ONLY : omega
1734    USE wavefunctions,         ONLY : psic
1735    USE ions_base,             ONLY : nat, nsp, ityp
1736    USE uspp_param,            ONLY : nh
1737    USE becmod,                ONLY : bec_type, becp
1738    USE fft_base,              ONLY : dffts
1739    USE mp_bands,              ONLY : intra_bgrp_comm
1740    USE mp,                    ONLY : mp_sum
1741    USE uspp,                  ONLY : indv_ijkb0
1742    !
1743    IMPLICIT NONE
1744    !
1745    INTEGER, INTENT(in) :: ibnd, last
1746    INTEGER :: ikb, nt, ia, ih, mbia
1747    REAL(DP) :: fac
1748    REAL(DP), ALLOCATABLE, DIMENSION(:) :: wr, wi
1749    REAL(DP) :: bcr, bci
1750    integer :: ir, box_ir, maxbox, ijkb0, nh_nt
1751    !
1752    REAL(DP), EXTERNAL :: ddot
1753    !
1754    CALL start_clock( 'calbec_rs' )
1755    !
1756    IF( dffts%has_task_groups ) CALL errore( 'calbec_rs_k', 'task_groups not implemented', 1 )
1757
1758    call set_xkphase(current_k)
1759
1760    fac = sqrt(omega) / (dffts%nr1*dffts%nr2*dffts%nr3)
1761    !
1762    maxbox = MAXVAL(maxbox_beta(1:nat))
1763    !
1764    becp%k(:,ibnd)=0.d0
1765!
1766! copy psic into a box-friendly array (and scatter it across intra_bbox_comm if needed)
1767!
1768    !$omp parallel do
1769    DO box_ir =1, boxtot
1770       box_psic ( box_ir ) = psic( box_beta( box_ir ) )
1771    END DO
1772    !$omp end parallel do
1773
1774    ALLOCATE( wr(maxbox), wi(maxbox) )
1775    ! working arrays to order the points in the clever way
1776    DO nt = 1, nsp
1777       !
1778       nh_nt = nh(nt)
1779       !
1780       DO ia = 1, nat
1781          !
1782          IF ( ityp(ia) == nt ) THEN
1783             !
1784             mbia = maxbox_beta(ia) ; IF ( mbia == 0 ) CYCLE
1785             !
1786             ijkb0 = indv_ijkb0(ia)
1787             !
1788             !$omp parallel default(shared) private(ih,ikb,ir,bcr,bci)
1789             !$omp do
1790             DO ir =1, mbia
1791                wr(ir) = dble ( box_psic( box0(ia)+ir ) * CONJG(xkphase(box0(ia)+ir)))
1792                wi(ir) = aimag( box_psic( box0(ia)+ir ) * CONJG(xkphase(box0(ia)+ir)))
1793             END DO
1794             !$omp end do
1795             !$omp do
1796             DO ih = 1, nh_nt
1797                ikb = ijkb0 + ih
1798                bcr = ddot( mbia, betasave(box_s(ia):box_e(ia),ih), 1, wr(:) , 1 )
1799                bci = ddot( mbia, betasave(box_s(ia):box_e(ia),ih), 1, wi(:) , 1 )
1800                becp%k(ikb,ibnd)   = fac * cmplx( bcr, bci, kind=DP)
1801             ENDDO
1802             !$omp end do
1803             !$omp end parallel
1804             !
1805          ENDIF
1806          !
1807       ENDDO
1808       !
1809    ENDDO
1810    DEALLOCATE( wr, wi )
1811    !
1812    CALL mp_sum( becp%k( :, ibnd ), intra_bgrp_comm )
1813    CALL stop_clock( 'calbec_rs' )
1814    !
1815    RETURN
1816
1817  END SUBROUTINE calbec_rs_k
1818    !
1819    !--------------------------------------------------------------------------
1820    SUBROUTINE s_psir_gamma( ibnd, last )
1821    !--------------------------------------------------------------------------
1822    !! This routine applies the \(S\) matrix to wfc ibnd (and wfc ibnd+1 if
1823    !! \(\le\text{last}\) ) stored in real space in psic, and puts the results
1824    !! again in psic for backtransforming.
1825    !! Requires \(\text{becp%r}\) (\(\texttt{calbecr}\) in REAL SPACE) and
1826    !! \(\text{betasave}\) (from \(\texttt{betapointlist}\) in \(\texttt{realus}\)).
1827    !
1828    !! WARNING: for the sake of speed, no checks performed in this subroutine.
1829    !
1830    !! Subroutine written by Dario Rocca, modified by O. Baris Malcioglu,
1831    !! and S. de Gironcoli(2020).
1832    !
1833      USE kinds,                  ONLY : DP
1834      USE cell_base,              ONLY : omega
1835      USE ions_base,              ONLY : nat, nsp, ityp
1836      USE uspp_param,             ONLY : nh, nhm
1837      USE uspp,                   ONLY : qq_at, indv_ijkb0
1838      USE becmod,                 ONLY : bec_type, becp
1839      USE fft_base,               ONLY : dffts
1840      !
1841      IMPLICIT NONE
1842      !
1843      INTEGER, INTENT(in) :: ibnd, last
1844      !
1845      INTEGER :: ih, nt, ia, mbia, ijkb0, box_ir
1846      REAL(DP) :: fac
1847      REAL(DP), ALLOCATABLE, DIMENSION(:) :: w1, w2
1848      !
1849      CALL start_clock( 's_psir' )
1850
1851      IF( dffts%has_task_groups ) CALL errore( 's_psir_gamma', 'task_groups not implemented', 1 )
1852
1853      ALLOCATE( w1(nhm), w2(nhm) )
1854      IF ( ibnd+1 > last) w2 = 0.D0
1855      !
1856      fac = sqrt(omega)
1857      !
1858      DO nt = 1, nsp
1859         !
1860         DO ia = 1, nat
1861            !
1862            IF ( ityp(ia) == nt ) THEN
1863               !
1864               mbia = maxbox_beta(ia) ; IF ( mbia == 0 ) CYCLE
1865               !print *, "mbia=",mbia
1866               !
1867               ijkb0 = indv_ijkb0(ia)
1868               !
1869               !$omp parallel
1870               !$omp do
1871               DO ih = 1, nh(nt)
1872                  w1(ih) = fac * SUM(qq_at(ih,1:nh(nt),ia) * becp%r(ijkb0+1:ijkb0+nh(nt), ibnd))
1873                  IF ( ibnd+1 <= last ) &
1874                     w2(ih) = fac * SUM(qq_at(ih,1:nh(nt),ia) * becp%r(ijkb0+1:ijkb0+nh(nt), ibnd+1))
1875               ENDDO
1876               !$omp end do
1877               !$omp do
1878               DO box_ir = box_s(ia), box_e(ia)
1879                  box_psic( box_ir ) = SUM(betasave(box_ir,1:nh(nt))*cmplx(w1(1:nh(nt)),w2(1:nh(nt)),kind=DP))
1880               ENDDO
1881               !$omp end do
1882               !$omp end parallel
1883               !
1884            ENDIF
1885            !
1886         ENDDO
1887         !
1888      ENDDO
1889      DEALLOCATE( w1, w2 )
1890
1891      call add_box_to_psic ( )
1892
1893      CALL stop_clock( 's_psir' )
1894      RETURN
1895      !
1896  END SUBROUTINE s_psir_gamma
1897  !
1898  !---------------------------------------------------------------------------
1899  SUBROUTINE s_psir_k( ibnd, last )
1900      !--------------------------------------------------------------------------
1901      !! Same as \(\texttt{s_psir_gamma}\) but for generalised k-point scheme i.e.:
1902      !! 1) Only one band is considered at a time;
1903      !! 2) \(\text{becp}\) is a complex entity now.
1904      !
1905      !! Derived from \(\texttt{s_psir_gamma}\) by OBM 061108.
1906      !! k-point phase factor fixed by SdG 030816.
1907      !
1908      USE kinds,                  ONLY : DP
1909      USE wvfct,                  ONLY : current_k
1910      USE cell_base,              ONLY : omega
1911      USE ions_base,              ONLY : nat, nsp, ityp
1912      USE uspp_param,             ONLY : nh, nhm
1913      USE uspp,                   ONLY : qq_at, indv_ijkb0
1914      USE becmod,                 ONLY : bec_type, becp
1915      USE fft_base,               ONLY : dffts
1916      !
1917      IMPLICIT NONE
1918      !
1919      INTEGER, INTENT(in) :: ibnd, last
1920      !
1921      INTEGER :: ih, nt, ia, mbia, ijkb0, box_ir
1922      REAL(DP) :: fac
1923      COMPLEX(DP) , ALLOCATABLE :: w1(:)
1924      !
1925      REAL(DP), EXTERNAL :: ddot
1926      !
1927
1928      CALL start_clock( 's_psir' )
1929
1930      IF( dffts%has_task_groups ) CALL errore( 's_psir_k', 'task_groups not implemented', 1 )
1931
1932      call set_xkphase(current_k)
1933      !
1934      fac = sqrt(omega)
1935      !
1936      ALLOCATE( w1(nhm) )
1937      DO nt = 1, nsp
1938         !
1939         DO ia = 1, nat
1940            !
1941            IF ( ityp(ia) == nt ) THEN
1942               !
1943               mbia = maxbox_beta(ia) ; IF ( mbia == 0 ) CYCLE
1944               !print *, "mbia=",mbia
1945               !
1946               ijkb0 = indv_ijkb0(ia)
1947               !
1948               !$omp parallel
1949               !$omp do
1950               DO ih = 1, nh(nt)
1951                  w1(ih) = fac * SUM(qq_at(ih,1:nh(nt),ia) * becp%k(ijkb0+1:ijkb0+nh(nt),ibnd))
1952               ENDDO
1953               !$omp end do
1954               !$omp do
1955               DO box_ir = box_s(ia), box_e(ia)
1956                  box_psic( box_ir ) = SUM(xkphase(box_ir)*betasave(box_ir,1:nh(nt))*w1(1:nh(nt)))
1957               ENDDO
1958               !$omp end do
1959               !$omp end parallel
1960               !
1961               !
1962            ENDIF
1963            !
1964         ENDDO
1965         !
1966      ENDDO
1967      DEALLOCATE( w1 )
1968
1969      call add_box_to_psic ( )
1970
1971      CALL stop_clock( 's_psir' )
1972      RETURN
1973      !
1974  END SUBROUTINE s_psir_k
1975  !
1976  !---------------------------------------------------------------------------
1977  SUBROUTINE add_vuspsir_gamma( ibnd, last )
1978  !--------------------------------------------------------------------------
1979  !! This routine applies the Ultra-Soft Hamiltonian to a vector transformed
1980  !! in real space contained in \(\text{psic}\).
1981  !! The index \(\text{ibnd}\) runs up to band \(\text{last}\).
1982  !! Requires the products of psi with all beta functions in array
1983  !! \(\text{becp%r(nkb,last)}\) (calculated by \(\texttt{calbecr}\) in
1984  !! real space).
1985  !
1986  !! WARNING: for the sake of speed, no checks performed in this subroutine.
1987  !
1988  !! Subroutine written by Dario Rocca, modified by O. Baris Malcioglu.
1989  !
1990  USE kinds,                  ONLY : DP
1991  USE cell_base,              ONLY : omega
1992  USE ions_base,              ONLY : nat, nsp, ityp
1993  USE uspp_param,             ONLY : nh, nhm
1994  USE lsda_mod,               ONLY : current_spin
1995  USE uspp,                   ONLY : deeq, indv_ijkb0
1996  USE becmod,                 ONLY : bec_type, becp
1997  USE fft_base,               ONLY : dffts
1998  !
1999  IMPLICIT NONE
2000  !
2001  INTEGER, INTENT(in) :: ibnd, last
2002  !
2003  INTEGER :: ih, nt, ia, mbia, ijkb0, box_ir
2004  REAL(DP) :: fac
2005  REAL(DP), ALLOCATABLE, DIMENSION(:) :: w1, w2
2006  !
2007  CALL start_clock( 'add_vuspsir' )
2008
2009  IF( dffts%has_task_groups ) CALL errore( 'add_vuspsir_gamma', 'task_groups not implemented', 1 )
2010  !
2011  fac = sqrt(omega)
2012  !
2013  ALLOCATE( w1(nhm), w2(nhm) )
2014  IF ( ibnd+1 > last) w2 = 0.D0
2015  DO nt = 1, nsp
2016     !
2017     DO ia = 1, nat
2018        !
2019        IF ( ityp(ia) == nt ) THEN
2020           !
2021           mbia = maxbox_beta(ia) ; IF ( mbia == 0 ) CYCLE
2022           !
2023           ijkb0 = indv_ijkb0(ia)
2024           !
2025           !$omp parallel
2026           !$omp do
2027           DO ih = 1, nh(nt)
2028              w1(ih) = fac * SUM(deeq(ih,1:nh(nt),ia,current_spin) * becp%r(ijkb0+1:ijkb0+nh(nt),ibnd))
2029              IF ( ibnd+1 <= last ) &
2030                 w2(ih) = fac * SUM(deeq(ih,1:nh(nt),ia,current_spin) * becp%r(ijkb0+1:ijkb0+nh(nt),ibnd+1))
2031           ENDDO
2032           !$omp end do
2033           !$omp do
2034           DO box_ir = box_s(ia), box_e(ia)
2035              box_psic( box_ir ) = SUM(betasave(box_ir,1:nh(nt))*cmplx( w1(1:nh(nt)), w2(1:nh(nt)) ,kind=DP))
2036           ENDDO
2037           !$omp end do
2038           !$omp end parallel
2039           !
2040        ENDIF
2041        !
2042     ENDDO
2043     !
2044  ENDDO
2045  DEALLOCATE( w1, w2 )
2046
2047  call add_box_to_psic ( )
2048
2049  CALL stop_clock( 'add_vuspsir' )
2050  RETURN
2051  !
2052  END SUBROUTINE add_vuspsir_gamma
2053  !
2054  !----------------------------------------------------------------------------
2055  SUBROUTINE add_vuspsir_k( ibnd, last )
2056  !--------------------------------------------------------------------------
2057  !! This routine applies the Ultra-Soft Hamiltonian to a vector transformed
2058  !! in real space contained in \(\text{psic}\).
2059  !! \(\text{ibnd}\) is an index that runs up to band \(\text{last}\).
2060  !! Requires the products of psi with all beta functions in array
2061  !! \(\text{becp(nkb,last)}\) (calculated by \(\texttt{calbecr}\) in real space).
2062  !
2063  !! WARNING: for the sake of speed, no checks performed in this subroutine.
2064  !
2065  !! Subroutine written by Stefano de Gironcoli, modified by O. Baris Malcioglu.
2066  !! k-point phase factor fixed by SdG 030816.
2067  !
2068  USE kinds,                  ONLY : DP
2069  USE wvfct,                  ONLY : current_k
2070  USE cell_base,              ONLY : omega
2071  USE ions_base,              ONLY : nat, nsp, ityp
2072  USE uspp_param,             ONLY : nh, nhm
2073  USE lsda_mod,               ONLY : current_spin
2074  USE uspp,                   ONLY : deeq, indv_ijkb0
2075  USE becmod,                 ONLY : bec_type, becp
2076  USE fft_base,               ONLY : dffts
2077  !
2078  IMPLICIT NONE
2079  !
2080  INTEGER, INTENT(in) :: ibnd, last
2081  !
2082  INTEGER :: ih, nt, ia, mbia, ijkb0, box_ir
2083  REAL(DP) :: fac
2084  !
2085  COMPLEX(DP), ALLOCATABLE :: w1(:)
2086  !
2087  CALL start_clock( 'add_vuspsir' )
2088
2089  IF( dffts%has_task_groups ) CALL errore( 'add_vuspsir_k', 'task_groups not implemented', 1 )
2090  !
2091  call set_xkphase(current_k)
2092  !
2093  fac = sqrt(omega)
2094  !
2095  ALLOCATE( w1(nhm))
2096  DO nt = 1, nsp
2097     !
2098     DO ia = 1, nat
2099        !
2100        IF ( ityp(ia) == nt ) THEN
2101           !
2102           mbia = maxbox_beta(ia) ; IF ( mbia == 0 ) CYCLE
2103
2104           ijkb0 = indv_ijkb0(ia)
2105
2106           !$omp parallel
2107           !$omp do
2108           DO ih = 1, nh(nt)
2109              w1(ih) = fac * SUM( deeq(ih,1:nh(nt),ia,current_spin) * becp%k(ijkb0+1:ijkb0+nh(nt),ibnd))
2110           ENDDO
2111           !$omp end do
2112           !$omp do
2113           DO box_ir = box_s(ia), box_e(ia)
2114              box_psic( box_ir ) = xkphase(box_ir)*SUM(betasave(box_ir,1:nh(nt))*w1(1:nh(nt)))
2115           ENDDO
2116           !$omp end do
2117           !$omp end parallel
2118           !
2119        ENDIF
2120        !
2121     ENDDO
2122     !
2123  ENDDO
2124  DEALLOCATE( w1 )
2125
2126  call add_box_to_psic ( )
2127
2128  CALL stop_clock( 'add_vuspsir' )
2129  RETURN
2130  !
2131  END SUBROUTINE add_vuspsir_k
2132
2133  !--------------------------------------------------------------------------
2134  SUBROUTINE add_box_to_psic ( )
2135    !--------------------------------------------------------------------------
2136    !! the box-friendly array box_psic is (collected from across intra_bbox_comm
2137    !! and) used to update psic.
2138    !
2139    USE wavefunctions, ONLY : psic
2140    USE ions_base,     ONLY : nat
2141    !
2142    IMPLICIT NONE
2143    !
2144    INTEGER :: ia, box_ir
2145
2146    !$omp parallel
2147    DO ia = 1, nat
2148       !$omp do
2149       DO box_ir = box_s(ia), box_e(ia)
2150          psic( box_beta( box_ir ) ) = psic( box_beta( box_ir ) ) + box_psic ( box_ir )
2151       END DO
2152       !$omp end do
2153    END DO
2154    !$omp end parallel
2155
2156    RETURN
2157
2158  END SUBROUTINE add_box_to_psic
2159  !
2160  !
2161  !--------------------------------------------------------------------------
2162  SUBROUTINE invfft_orbital_gamma( orbital, ibnd, last, conserved )
2163    !--------------------------------------------------------------------------
2164    !! This driver subroutine transforms the given orbital using FFT and puts the
2165    !! result in \(\text{psic}\).
2166    !
2167    !! WARNING: in order to be fast, no checks on the supplied data are performed!
2168    !
2169    !! OBM 241008.
2170    !
2171    USE wavefunctions, ONLY : psic
2172    USE klist,         ONLY : ngk, igk_k
2173    USE kinds,         ONLY : DP
2174    USE fft_base,      ONLY : dffts
2175    USE fft_interfaces,ONLY : invfft
2176    USE fft_helper_subroutines,   ONLY : fftx_ntgrp, tg_get_recip_inc
2177    !
2178    IMPLICIT NONE
2179    !
2180    INTEGER, INTENT(in) :: ibnd
2181    !! index of the band currently being transformed
2182    INTEGER, INTENT(in) :: last
2183    !! index of the last band that you want to transform (usually the
2184    !! total number of bands but can be different in band parallelization)
2185    COMPLEX(DP),INTENT(in) :: orbital(:,:)
2186    !! the array of orbitals to be transformed
2187    LOGICAL, OPTIONAL :: conserved
2188    !! if this flag is true, the orbital is stored in temporary memory
2189    !
2190    ! Internal temporary variables
2191    INTEGER :: j, idx, ioff, right_inc, ntgrp
2192
2193    !Task groups
2194
2195    !The new task group version based on vloc_psi
2196    !print *, "->Real space"
2197    CALL start_clock( 'invfft_orbital' )
2198    !
2199    IF( dffts%has_task_groups ) THEN
2200        !
2201
2202        tg_psic = (0.d0, 0.d0)
2203        ioff   = 0
2204        CALL tg_get_recip_inc( dffts, right_inc )
2205        ntgrp = fftx_ntgrp(dffts)
2206        !
2207        DO idx = 1, 2*ntgrp, 2
2208
2209           IF( idx + ibnd - 1 < last ) THEN
2210              DO j = 1, ngk(1)
2211                 tg_psic(dffts%nl (igk_k(j,1))+ioff) =      orbital(j,idx+ibnd-1) +&
2212                      (0.0d0,1.d0) * orbital(j,idx+ibnd)
2213                 tg_psic(dffts%nlm(igk_k(j,1))+ioff) =conjg(orbital(j,idx+ibnd-1) -&
2214                      (0.0d0,1.d0) * orbital(j,idx+ibnd) )
2215              ENDDO
2216           ELSEIF( idx + ibnd - 1 == last ) THEN
2217              DO j = 1, ngk(1)
2218                 tg_psic(dffts%nl (igk_k(j,1))+ioff) =        orbital(j,idx+ibnd-1)
2219                 tg_psic(dffts%nlm(igk_k(j,1))+ioff) = conjg( orbital(j,idx+ibnd-1))
2220              ENDDO
2221           ENDIF
2222
2223           ioff = ioff + right_inc
2224
2225        ENDDO
2226        !
2227        !
2228        CALL invfft ('tgWave', tg_psic, dffts)
2229        !
2230        !
2231        IF (present(conserved)) THEN
2232         IF (conserved .eqv. .true.) THEN
2233          IF (.not. allocated(tg_psic_temp)) ALLOCATE( tg_psic_temp( dffts%nnr_tg ) )
2234          tg_psic_temp=tg_psic
2235         ENDIF
2236        ENDIF
2237
2238    ELSE !Task groups not used
2239        !
2240        !$omp parallel default(shared) private(j)
2241        CALL threaded_barrier_memset(psic, 0.D0, dffts%nnr*2)
2242
2243        IF (ibnd < last) THEN
2244           ! two ffts at the same time
2245           !$omp do
2246           DO j = 1, ngk(1)
2247              psic (dffts%nl (igk_k(j,1))) =       orbital(j, ibnd) + (0.0d0,1.d0)*orbital(j, ibnd+1)
2248              psic (dffts%nlm(igk_k(j,1))) = conjg(orbital(j, ibnd) - (0.0d0,1.d0)*orbital(j, ibnd+1))
2249           ENDDO
2250           !$omp end do
2251        ELSE
2252           !$omp do
2253           DO j = 1, ngk(1)
2254              psic (dffts%nl (igk_k(j,1))) =       orbital(j, ibnd)
2255              psic (dffts%nlm(igk_k(j,1))) = conjg(orbital(j, ibnd))
2256           ENDDO
2257           !$omp end do
2258        ENDIF
2259        !$omp end parallel
2260        !
2261       CALL invfft ('Wave', psic, dffts)
2262        !
2263        IF (present(conserved)) THEN
2264         IF (conserved .eqv. .true.) THEN
2265           IF (.not. allocated(psic_temp) ) ALLOCATE (psic_temp(size(psic)))
2266           CALL zcopy(size(psic),psic,1,psic_temp,1)
2267         ENDIF
2268        ENDIF
2269
2270    ENDIF
2271
2272    CALL stop_clock( 'invfft_orbital' )
2273
2274  END SUBROUTINE invfft_orbital_gamma
2275  !
2276  !
2277  !--------------------------------------------------------------------------
2278  SUBROUTINE fwfft_orbital_gamma( orbital, ibnd, last, conserved, add_to_orbital )
2279    !--------------------------------------------------------------------------
2280    !! This driver subroutine -back- transforms the given contribution using FFT from
2281    !! the already existent data in \(\text{psic}\) and return it in (or optionally
2282    !! add it to) orbital.
2283    !
2284    !! WARNING 1: this subroutine does not reset the orbital, use carefully!
2285    !! WARNING 2: in order to be fast, no checks on the supplied data are performed!
2286    !
2287    !! OBM 241008,
2288    !! SdG 130420.
2289    !
2290    USE wavefunctions, &
2291                       ONLY : psic
2292    USE klist,         ONLY : ngk, igk_k
2293    USE kinds,         ONLY : DP
2294    USE fft_base,      ONLY : dffts
2295    USE fft_interfaces,ONLY : fwfft
2296    USE fft_helper_subroutines,   ONLY : fftx_ntgrp, tg_get_recip_inc
2297    !
2298    IMPLICIT NONE
2299    !
2300    INTEGER, INTENT(in) :: ibnd
2301    !! index of the band currently being transformed
2302    INTEGER, INTENT(IN) :: last
2303    !! index of the last band that you want to transform (usually the
2304    !! total number of bands but can be different in band parallelization)
2305    COMPLEX(DP),INTENT(inout) :: orbital(:,:)
2306    !! the array of orbitals to be returned (or updated)
2307    LOGICAL, OPTIONAL :: conserved
2308    !! if this flag is true, the orbital is stored in temporary memory
2309    LOGICAL, OPTIONAL :: add_to_orbital
2310    !! if this flag is true, the result is added to (rather than stored into) orbital
2311    !
2312    ! Internal temporary variables
2313    COMPLEX(DP) :: fp, fm
2314    INTEGER :: j, idx, ioff, right_inc, ntgrp
2315    LOGICAL :: add_to_orbital_
2316
2317    !Task groups
2318    !print *, "->fourier space"
2319    CALL start_clock( 'fwfft_orbital' )
2320    !
2321    add_to_orbital_=.FALSE. ; IF( present(add_to_orbital)) add_to_orbital_ = add_to_orbital
2322    !
2323    !New task_groups versions
2324    IF( dffts%has_task_groups ) THEN
2325       !
2326        CALL fwfft ('tgWave', tg_psic, dffts )
2327        !
2328        ioff   = 0
2329        CALL tg_get_recip_inc( dffts, right_inc )
2330        ntgrp = fftx_ntgrp(dffts)
2331        !
2332        DO idx = 1, 2*ntgrp, 2
2333           !
2334           IF( idx + ibnd - 1 < last ) THEN
2335              DO j = 1, ngk(1)
2336                 fp= ( tg_psic( dffts%nl(igk_k(j,1)) + ioff ) +  &
2337                      tg_psic( dffts%nlm(igk_k(j,1)) + ioff ) ) * 0.5d0
2338                 fm= ( tg_psic( dffts%nl(igk_k(j,1)) + ioff ) -  &
2339                      tg_psic( dffts%nlm(igk_k(j,1)) + ioff ) ) * 0.5d0
2340                 IF( add_to_orbital_ ) THEN
2341                    orbital (j, ibnd+idx-1) = orbital (j, ibnd+idx-1) + cmplx( dble(fp), aimag(fm),kind=DP)
2342                    orbital (j, ibnd+idx  ) = orbital (j, ibnd+idx  ) + cmplx(aimag(fp),- dble(fm),kind=DP)
2343                 ELSE
2344                    orbital (j, ibnd+idx-1) = cmplx( dble(fp), aimag(fm),kind=DP)
2345                    orbital (j, ibnd+idx  ) = cmplx(aimag(fp),- dble(fm),kind=DP)
2346                 END IF
2347              ENDDO
2348           ELSEIF( idx + ibnd - 1 == last ) THEN
2349              DO j = 1, ngk(1)
2350                 IF( add_to_orbital_ ) THEN
2351                    orbital (j, ibnd+idx-1) = orbital (j, ibnd+idx-1) + tg_psic( dffts%nl(igk_k(j,1)) + ioff )
2352                 ELSE
2353                    orbital (j, ibnd+idx-1) = tg_psic( dffts%nl(igk_k(j,1)) + ioff )
2354                 END IF
2355              ENDDO
2356           ENDIF
2357           !
2358           ioff = ioff + right_inc
2359           !
2360        ENDDO
2361        !
2362        IF (present(conserved)) THEN
2363         IF (conserved .eqv. .true.) THEN
2364          IF (allocated(tg_psic_temp)) DEALLOCATE( tg_psic_temp )
2365         ENDIF
2366        ENDIF
2367
2368    ELSE !Non task_groups version
2369         !larger memory slightly faster
2370        CALL fwfft ('Wave', psic, dffts)
2371
2372        IF (ibnd < last) THEN
2373           ! two ffts at the same time
2374
2375           IF( add_to_orbital_ ) THEN
2376              !$omp parallel do
2377              DO j = 1, ngk(1)
2378                 fp = (psic (dffts%nl(igk_k(j,1))) + psic (dffts%nlm(igk_k(j,1))))*0.5d0
2379                 fm = (psic (dffts%nl(igk_k(j,1))) - psic (dffts%nlm(igk_k(j,1))))*0.5d0
2380                 orbital( j, ibnd)   = orbital( j, ibnd)   + cmplx( dble(fp), aimag(fm),kind=DP)
2381                 orbital( j, ibnd+1) = orbital( j, ibnd+1) + cmplx(aimag(fp),- dble(fm),kind=DP)
2382              ENDDO
2383              !$omp end parallel do
2384           ELSE
2385              !$omp parallel do
2386              DO j = 1, ngk(1)
2387                 fp = (psic (dffts%nl(igk_k(j,1))) + psic (dffts%nlm(igk_k(j,1))))*0.5d0
2388                 fm = (psic (dffts%nl(igk_k(j,1))) - psic (dffts%nlm(igk_k(j,1))))*0.5d0
2389                 orbital( j, ibnd)   = cmplx( dble(fp), aimag(fm),kind=DP)
2390                 orbital( j, ibnd+1) = cmplx(aimag(fp),- dble(fm),kind=DP)
2391              ENDDO
2392              !$omp end parallel do
2393           ENDIF
2394        ELSE
2395           IF( add_to_orbital_ ) THEN
2396              !$omp parallel do
2397              DO j = 1, ngk(1)
2398                 orbital(j, ibnd)   =  orbital(j, ibnd) +  psic (dffts%nl(igk_k(j,1)))
2399              ENDDO
2400              !$omp end parallel do
2401           ELSE
2402              !$omp parallel do
2403              DO j = 1, ngk(1)
2404                 orbital(j, ibnd)   =  psic (dffts%nl(igk_k(j,1)))
2405              ENDDO
2406              !$omp end parallel do
2407           ENDIF
2408        ENDIF
2409        IF (present(conserved)) THEN
2410         IF (conserved .eqv. .true.) THEN
2411           IF (allocated(psic_temp) ) DEALLOCATE(psic_temp)
2412         ENDIF
2413        ENDIF
2414    ENDIF
2415    !
2416    CALL stop_clock( 'fwfft_orbital' )
2417
2418  END SUBROUTINE fwfft_orbital_gamma
2419  !
2420  !--------------------------------------------------------------------------
2421  SUBROUTINE invfft_orbital_k( orbital, ibnd, last, ik, conserved )
2422    !--------------------------------------------------------------------------
2423    !! This subroutine transforms the given orbital using FFT and puts the result
2424    !! in \(\text{psic}\).
2425    !
2426    !! WARNING: in order to be fast, no checks on the supplied data are performed!
2427    !
2428    !! OBM 110908
2429    !
2430    USE kinds,                    ONLY : DP
2431    USE wavefunctions,     ONLY : psic
2432    USE klist,                    ONLY : ngk, igk_k
2433    USE wvfct,                    ONLY : current_k
2434    USE fft_base,                 ONLY : dffts
2435    USE fft_interfaces,           ONLY : invfft
2436    USE fft_helper_subroutines,   ONLY : fftx_ntgrp, tg_get_recip_inc
2437
2438    IMPLICIT NONE
2439
2440    INTEGER, INTENT(in) :: ibnd
2441    !! index of the band currently being transformed
2442    INTEGER, INTENT(in) :: last
2443    !! index of the last band that you want to transform (usually the total
2444    !! number of bands but can be different in band parallelization)
2445    COMPLEX(DP),INTENT(in) :: orbital(:,:)
2446    !! the array of orbitals to be transformed
2447    INTEGER, OPTIONAL :: ik
2448    !! index of the desired k-point
2449    LOGICAL, OPTIONAL :: conserved
2450    !! if this flag is true, the orbital is stored in temporary memory
2451    !
2452    ! Internal variables
2453    INTEGER :: ioff, idx, ik_ , right_inc, ntgrp, ig
2454
2455    CALL start_clock( 'invfft_orbital' )
2456
2457    ! current_k  variable  must contain the index of the desired kpoint
2458    ik_ = current_k ; if (present(ik)) ik_ = ik
2459
2460    IF( dffts%has_task_groups ) THEN
2461       !
2462       tg_psic = ( 0.D0, 0.D0 )
2463       ioff   = 0
2464       CALL tg_get_recip_inc( dffts, right_inc )
2465       ntgrp = fftx_ntgrp(dffts)
2466       !
2467       DO idx = 1, ntgrp
2468          !
2469          IF( idx + ibnd - 1 <= last ) THEN
2470             !DO j = 1, size(orbital,1)
2471             tg_psic( dffts%nl( igk_k(:, ik_) ) + ioff ) = orbital(:,idx+ibnd-1)
2472             !END DO
2473          ENDIF
2474
2475          ioff = ioff + right_inc
2476
2477       ENDDO
2478       !
2479       CALL invfft ('tgWave', tg_psic, dffts)
2480       IF (present(conserved)) THEN
2481          IF (conserved .eqv. .true.) THEN
2482             IF (.not. allocated(tg_psic_temp)) &
2483                  &ALLOCATE( tg_psic_temp( dffts%nnr_tg ) )
2484             tg_psic_temp=tg_psic
2485          ENDIF
2486       ENDIF
2487       !
2488    ELSE  !non task_groups version
2489       !
2490       !$omp parallel default(shared) private(ig)
2491       CALL threaded_barrier_memset(psic, 0.D0, dffts%nnr*2)
2492       !$omp do
2493       do ig = 1, ngk(ik_)
2494          psic(dffts%nl(igk_k(ig, ik_))) = orbital(ig,ibnd)
2495       end do
2496       !$omp end do
2497       !$omp end parallel
2498       !
2499       CALL invfft ('Wave', psic, dffts)
2500       IF (present(conserved)) THEN
2501          IF (conserved .eqv. .true.) THEN
2502             IF (.not. allocated(psic_temp) ) ALLOCATE (psic_temp(size(psic)))
2503             psic_temp=psic
2504          ENDIF
2505       ENDIF
2506       !
2507    ENDIF
2508    CALL stop_clock( 'invfft_orbital' )
2509  END SUBROUTINE invfft_orbital_k
2510  !
2511  !--------------------------------------------------------------------------
2512  SUBROUTINE fwfft_orbital_k( orbital, ibnd, last, ik, conserved, add_to_orbital )
2513    !-------------------------------------------------------------------------
2514    !! This driver subroutine -back- transforms the given contribution using FFT from
2515    !! the already existent data in \(\text{psic}\) and return it in (or optionally
2516    !! add it to) orbital.
2517    !
2518    !! WARNING 1: this subroutine does not reset the orbital, use carefully!
2519    !! WARNING 2: in order to be fast, no checks on the supplied data are performed!
2520    !
2521    !! OBM 241008,
2522    !! SdG 130420.
2523    !
2524    USE wavefunctions,            ONLY : psic
2525    USE klist,                    ONLY : ngk, igk_k
2526    USE wvfct,                    ONLY : current_k
2527    USE kinds,                    ONLY : DP
2528    USE fft_base,                 ONLY : dffts
2529    USE fft_interfaces,           ONLY : fwfft
2530    USE fft_helper_subroutines,   ONLY : fftx_ntgrp, tg_get_recip_inc
2531    !
2532    IMPLICIT NONE
2533    !
2534    INTEGER, INTENT(in) :: ibnd
2535    !! index of the band currently being transformed
2536    INTEGER, INTENT(in) :: last
2537    !! index of the last band that you want to transform (usually the
2538    !! total number of bands but can be different in band parallelization)
2539    COMPLEX(DP),INTENT(inout) :: orbital(:,:)
2540    !! the array of orbitals to be returned (or updated)
2541    INTEGER, OPTIONAL :: ik
2542    !! the index of the desired kpoint
2543    LOGICAL, OPTIONAL :: conserved
2544    !! if this flag is true, the orbital is stored in temporary memory
2545    LOGICAL, OPTIONAL :: add_to_orbital
2546    !! if this flag is true, the result is added to (rather than stored into) orbital
2547    !
2548    ! Internal variables
2549    INTEGER :: ioff, idx, ik_ , right_inc, ntgrp, ig
2550    LOGICAL :: add_to_orbital_
2551    !
2552    CALL start_clock( 'fwfft_orbital' )
2553    !
2554    add_to_orbital_=.FALSE. ; IF( present(add_to_orbital)) add_to_orbital_ = add_to_orbital
2555    !
2556    ! current_k  variable  must contain the index of the desired kpoint
2557    ik_ = current_k ; if (present(ik)) ik_ = ik
2558
2559    IF( dffts%has_task_groups ) THEN
2560       !
2561       CALL fwfft ('tgWave', tg_psic, dffts)
2562       !
2563       ioff   = 0
2564       CALL tg_get_recip_inc( dffts, right_inc )
2565       ntgrp = fftx_ntgrp(dffts)
2566       !
2567       DO idx = 1, ntgrp
2568          !
2569          IF( idx + ibnd - 1 <= last ) THEN
2570             IF( add_to_orbital_ ) THEN
2571                orbital (:, ibnd+idx-1) = orbital (:, ibnd+idx-1) + tg_psic( dffts%nl(igk_k(:,ik_)) + ioff )
2572             ELSE
2573                orbital (:, ibnd+idx-1) = tg_psic( dffts%nl(igk_k(:,ik_)) + ioff )
2574             END IF
2575
2576          ENDIF
2577          !
2578          ioff = ioff + right_inc
2579          !
2580       ENDDO
2581       IF (present(conserved)) THEN
2582          IF (conserved .eqv. .true.) THEN
2583             IF (allocated(tg_psic_temp)) DEALLOCATE( tg_psic_temp )
2584          ENDIF
2585       ENDIF
2586       !
2587    ELSE !non task groups version
2588       !
2589       CALL fwfft ('Wave', psic, dffts)
2590       !
2591       IF( add_to_orbital_ ) THEN
2592          !$omp parallel do default(shared) private(ig)
2593          do ig=1,ngk(ik_)
2594             orbital(ig,ibnd) = orbital(ig,ibnd) + psic(dffts%nl(igk_k(ig,ik_)))
2595          end do
2596          !$omp end parallel do
2597       ELSE
2598          !$omp parallel do default(shared) private(ig)
2599          do ig=1,ngk(ik_)
2600             orbital(ig,ibnd) = psic(dffts%nl(igk_k(ig,ik_)))
2601          end do
2602          !$omp end parallel do
2603       END IF
2604       !
2605       IF (present(conserved)) THEN
2606          IF (conserved .eqv. .true.) THEN
2607             IF (allocated(psic_temp) ) DEALLOCATE(psic_temp)
2608          ENDIF
2609       ENDIF
2610    ENDIF
2611    CALL stop_clock( 'fwfft_orbital' )
2612
2613  END SUBROUTINE fwfft_orbital_k
2614
2615  !--------------------------------------------------------------------------
2616  SUBROUTINE v_loc_psir( ibnd, last )
2617    !--------------------------------------------------------------------------
2618    !! Basically the same thing as \(\texttt{v_loc}\) but without implicit FFT
2619    !! modified for real space implementation.
2620    !
2621    !! OBM 241008
2622    !
2623    USE wavefunctions, &
2624                       ONLY : psic
2625    USE kinds,         ONLY : DP
2626    USE fft_base,      ONLY : dffts
2627    USE scf,           ONLY : vrs
2628    USE lsda_mod,      ONLY : current_spin
2629    !
2630    IMPLICIT NONE
2631    !
2632    INTEGER, INTENT(in) :: ibnd
2633    !! index of the band currently being operated on
2634    INTEGER, INTENT(in) :: last
2635    !! index of the last band that you want to operate on
2636    !
2637    ! Internal temporary variables
2638    INTEGER :: j
2639    !Task groups
2640    REAL(DP), ALLOCATABLE :: tg_v(:)
2641    !
2642    CALL start_clock( 'v_loc_psir' )
2643
2644    IF( dffts%has_task_groups ) THEN
2645        IF (ibnd == 1 ) THEN
2646          CALL tg_gather( dffts, vrs(:,current_spin), tg_v )
2647          !if ibnd==1 this is a new calculation, and tg_v should be distributed.
2648        ENDIF
2649        !
2650        !$omp parallel do
2651        DO j = 1, dffts%nr1x*dffts%nr2x*dffts%my_nr3p
2652           tg_psic (j) = tg_psic (j) + tg_psic_temp (j) * tg_v(j)
2653        ENDDO
2654        !$omp end parallel do
2655        !
2656        DEALLOCATE( tg_v )
2657     ELSE
2658        !   product with the potential v on the smooth grid
2659        !
2660        !$omp parallel do
2661        DO j = 1, dffts%nnr
2662           psic (j) = psic (j) + psic_temp (j) * vrs(j,current_spin)
2663        ENDDO
2664        !$omp end parallel do
2665     ENDIF
2666  CALL stop_clock( 'v_loc_psir' )
2667  END SUBROUTINE v_loc_psir
2668  !
2669  !--------------------------------------------------------------------------
2670  SUBROUTINE v_loc_psir_inplace( ibnd, last )
2671    !--------------------------------------------------------------------------
2672    !! The same thing as \(\texttt{v_loc_psir}\), but:
2673    !! - on input \(\text{psic}\) contains the wavefunction;
2674    !! - on output \(\text{psic}\) is overwritten to contain \(\texttt{v_loc_psir}\).
2675    !! Therefore must be the first term to be considered when building \(\text{hpsi}\).
2676    !
2677    !! SdG 290716
2678    !
2679    USE wavefunctions, &
2680                       ONLY : psic
2681    USE kinds,         ONLY : DP
2682    USE fft_base,      ONLY : dffts
2683    USE scf,           ONLY : vrs
2684    USE lsda_mod,      ONLY : current_spin
2685    !
2686    IMPLICIT NONE
2687    !
2688    INTEGER, INTENT(in) :: ibnd
2689    !! index of the band currently being operated on
2690    INTEGER, INTENT(in) :: last
2691    !! index of the last band that you want to operate on
2692    !
2693    ! ... local variables
2694    !
2695    INTEGER :: j  !Task groups
2696    REAL(DP), ALLOCATABLE :: tg_v(:)
2697    !
2698    CALL start_clock( 'v_loc_psir' )
2699
2700    IF( dffts%has_task_groups ) THEN
2701        IF (ibnd == 1 ) THEN
2702          CALL tg_gather( dffts, vrs(:,current_spin), tg_v )
2703          !if ibnd==1 this is a new calculation, and tg_v should be distributed.
2704        ENDIF
2705        !
2706        !$omp parallel do
2707        DO j = 1, dffts%nr1x*dffts%nr2x*dffts%my_nr3p
2708           tg_psic (j) = tg_v(j) * tg_psic(j)
2709        ENDDO
2710        !$omp end parallel do
2711        !
2712        DEALLOCATE( tg_v )
2713    ELSE
2714       !   product with the potential v on the smooth grid
2715       !
2716       !$omp parallel do
2717       DO j = 1, dffts%nnr
2718          psic (j) = vrs(j,current_spin) * psic(j)
2719       ENDDO
2720       !$omp end parallel do
2721    ENDIF
2722  CALL stop_clock( 'v_loc_psir' )
2723  END SUBROUTINE v_loc_psir_inplace
2724    !--------------------------------------------------------------------------
2725  !
2726END MODULE realus
2727