1!! Copyright (C) 2002-2011 M. Marques, A. Castro, X. Andrade, J. Alberdi-Rodriguez, M. Oliveira
2!! Copyright (C) Luigi Genovese, Thierry Deutsch, CEA Grenoble, 2006
3!!
4!! This program is free software; you can redistribute it and/or modify
5!! it under the terms of the GNU General Public License as published by
6!! the Free Software Foundation; either version 2, or (at your option)
7!! any later version.
8!!
9!! This program is distributed in the hope that it will be useful,
10!! but WITHOUT ANY WARRANTY; without even the implied warranty of
11!! MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
12!! GNU General Public License for more details.
13!!
14!! You should have received a copy of the GNU General Public License
15!! along with this program; if not, write to the Free Software
16!! Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
17!! 02110-1301, USA.
18!!
19
20#include "global.h"
21
22module poisson_isf_oct_m
23  use cube_function_oct_m
24  use cube_oct_m
25  use global_oct_m
26  use io_oct_m
27  use messages_oct_m
28  use mesh_oct_m
29  use mpi_oct_m
30  use namespace_oct_m
31  use parser_oct_m
32  use profiling_oct_m
33  use scaling_function_oct_m
34  use sgfft_oct_m
35  use submesh_oct_m
36
37  implicit none
38
39  private
40
41  public ::               &
42    poisson_isf_t,        &
43    isf_cnf_t,    &
44    poisson_isf_init,     &
45    poisson_isf_solve,    &
46    poisson_isf_end
47
48  ! Indices for the cnf array
49  integer, parameter :: SERIAL = 1
50  integer, parameter :: WORLD = 2
51  integer, parameter :: DOMAIN = 3
52  integer, parameter :: N_CNF = 3
53
54  ! Datatype to store kernel values to solve Poisson equation
55  ! on different communicators (configurations).
56  type isf_cnf_t
57    private
58    real(8), pointer  :: kernel(:, :, :)
59    integer           :: nfft1, nfft2, nfft3
60    type(mpi_grp_t)   :: mpi_grp
61    logical           :: all_nodes
62  end type isf_cnf_t
63
64  type poisson_isf_t
65    private
66    integer         :: all_nodes_comm
67    type(isf_cnf_t) :: cnf(1:N_CNF)
68  end type poisson_isf_t
69
70  integer, parameter :: order_scaling_function = 8
71
72
73contains
74
75  ! ---------------------------------------------------------
76  subroutine poisson_isf_init(this, namespace, mesh, cube, all_nodes_comm, init_world)
77    type(poisson_isf_t),       intent(out)   :: this
78    type(namespace_t), target, intent(in)    :: namespace
79    type(mesh_t),              intent(in)    :: mesh
80    type(cube_t),              intent(inout) :: cube
81    integer,                   intent(in)    :: all_nodes_comm
82    logical, optional,         intent(in)    :: init_world
83
84    integer :: n1, n2, n3
85    integer :: i_cnf
86#if defined(HAVE_MPI)
87    integer :: m1, m2, m3, md1, md2, md3
88    integer :: n(3)
89    logical :: init_world_
90    integer :: default_nodes
91    integer :: ierr, world_grp, poisson_grp, ii
92    integer, allocatable :: ranks(:)
93    !data ranks /0, 1/
94    integer :: world_size
95    integer :: nodes
96#endif
97
98    PUSH_SUB(poisson_isf_init)
99
100#ifdef HAVE_MPI
101    init_world_ = .true.
102    if(present(init_world)) init_world_ = init_world
103#endif
104
105    ! we need to nullify the pointer so they can be deallocated safely
106    ! afterwards
107    do i_cnf = 1, N_CNF
108      nullify(this%cnf(i_cnf)%kernel)
109    end do
110
111    if(.not. mesh%parallel_in_domains) then
112      ! The serial version is always needed (as used, e.g., in the casida runmode)
113      call calculate_dimensions(cube%rs_n_global(1), cube%rs_n_global(2), cube%rs_n_global(3), &
114        this%cnf(SERIAL)%nfft1, this%cnf(SERIAL)%nfft2, this%cnf(SERIAL)%nfft3)
115
116      n1 = this%cnf(SERIAL)%nfft1/2 + 1
117      n2 = this%cnf(SERIAL)%nfft2/2 + 1
118      n3 = this%cnf(SERIAL)%nfft3/2 + 1
119
120      SAFE_ALLOCATE(this%cnf(SERIAL)%kernel(1:n1, 1:n2, 1:n3))
121
122      call build_kernel(cube%rs_n_global(1), cube%rs_n_global(2), cube%rs_n_global(3),   &
123        this%cnf(SERIAL)%nfft1, this%cnf(SERIAL)%nfft2, this%cnf(SERIAL)%nfft3, &
124        real(mesh%spacing(1), 8), order_scaling_function, this%cnf(SERIAL)%kernel)
125    end if
126#if defined(HAVE_MPI)
127
128    ! Allocate to configurations. The initialisation, especially the kernel,
129    ! depends on the number of nodes used for the calculations. To avoid
130    ! recalculating the kernel on each call of poisson_isf_solve depending on
131    ! the all_nodes argument, both kernels are calculated.
132    this%cnf(DOMAIN)%mpi_grp = mesh%mpi_grp
133
134    ! For the world configuration we build a new communicator
135
136    default_nodes = 0 !All nodes
137
138    !%Variable PoissonSolverNodes
139    !%Type integer
140    !%Section Hamiltonian::Poisson
141    !%Default 0
142    !%Description
143    !% How many nodes to use to solve the Poisson equation. A value of
144    !% 0, the default, implies that all available nodes are used.
145    !%End
146    call parse_variable(namespace, 'PoissonSolverNodes', default_nodes, nodes)
147
148    this%all_nodes_comm = all_nodes_comm
149
150    call MPI_Comm_size(all_nodes_comm, world_size, mpi_err)
151
152    if(nodes <= 0 .or. nodes > world_size) nodes = world_size
153    this%cnf(WORLD)%all_nodes = (nodes == mpi_world%size)
154
155    SAFE_ALLOCATE(ranks(1:nodes))
156
157    do ii = 1, nodes
158      ranks(ii) = ii - 1
159    end do
160
161    !create a new communicator
162    !Extract the original group handle and create new comm.
163    call MPI_Comm_group(all_nodes_comm, world_grp, ierr)
164    call MPI_Group_incl(world_grp, nodes, ranks, poisson_grp, ierr)
165    call MPI_Comm_create(mpi_world%comm, poisson_grp, this%cnf(WORLD)%mpi_grp%comm, ierr)
166
167    SAFE_DEALLOCATE_A(ranks)
168
169    !Fill the new data structure, for all nodes
170    if (this%cnf(WORLD)%mpi_grp%comm /= MPI_COMM_NULL) then
171      call MPI_Comm_rank(this%cnf(WORLD)%mpi_grp%comm, this%cnf(WORLD)%mpi_grp%rank, ierr)
172      call MPI_Comm_size(this%cnf(WORLD)%mpi_grp%comm, this%cnf(WORLD)%mpi_grp%size, ierr)
173    else
174      this%cnf(WORLD)%mpi_grp%rank = -1
175      this%cnf(WORLD)%mpi_grp%size = -1
176    end if
177
178    ! Build the kernel for all configurations. At the moment, this is
179    ! solving the poisson equation with all nodes (i_cnf == WORLD) and
180    ! with the domain nodes only (i_cnf == DOMAIN).
181    do i_cnf = 2, N_CNF
182      if( (i_cnf == WORLD .and. .not. init_world_) &                  ! world is disabled
183        .or. (i_cnf == DOMAIN .and. .not. mesh%parallel_in_domains) & ! not parallel in domains
184        ) then
185        nullify(this%cnf(i_cnf)%kernel)
186        cycle
187      end if
188      if (this%cnf(i_cnf)%mpi_grp%rank /= -1 .or. i_cnf /= WORLD ) then
189        call par_calculate_dimensions(cube%rs_n_global(1), cube%rs_n_global(2), cube%rs_n_global(3), &
190          m1, m2, m3, n1, n2, n3, md1, md2, md3, this%cnf(i_cnf)%nfft1, this%cnf(i_cnf)%nfft2, &
191          this%cnf(i_cnf)%nfft3, this%cnf(i_cnf)%mpi_grp%size)
192
193        ! Shortcuts to avoid to "line too long" errors.
194        n(1) = this%cnf(i_cnf)%nfft1
195        n(2) = this%cnf(i_cnf)%nfft2
196        n(3) = this%cnf(i_cnf)%nfft3
197
198        SAFE_ALLOCATE(this%cnf(i_cnf)%kernel(1:n(1), 1:n(2), 1:n(3)/this%cnf(i_cnf)%mpi_grp%size))
199
200        call par_build_kernel(cube%rs_n_global(1), cube%rs_n_global(2), cube%rs_n_global(3), n1, n2, n3,     &
201          this%cnf(i_cnf)%nfft1, this%cnf(i_cnf)%nfft2, this%cnf(i_cnf)%nfft3,                      &
202          mesh%spacing(1), order_scaling_function,                                            &
203          this%cnf(i_cnf)%mpi_grp%rank, this%cnf(i_cnf)%mpi_grp%size, this%cnf(i_cnf)%mpi_grp%comm, &
204          this%cnf(i_cnf)%kernel)
205      else
206      	nullify(this%cnf(i_cnf)%kernel)
207        cycle
208      end if
209    end do
210#endif
211
212    POP_SUB(poisson_isf_init)
213  end subroutine poisson_isf_init
214
215  ! ---------------------------------------------------------
216  subroutine poisson_isf_solve(this, mesh, cube, pot, rho, all_nodes, sm)
217    type(poisson_isf_t), intent(in)    :: this
218    type(mesh_t),        intent(in)    :: mesh
219    type(cube_t),        intent(in)    :: cube
220    FLOAT,               intent(out)   :: pot(:)
221    FLOAT,               intent(in)    :: rho(:)
222    logical,             intent(in)    :: all_nodes
223    type(submesh_t),     optional,  intent(in)    :: sm  !< If present pot and rho are assumed to come from it
224
225    integer :: i_cnf, nn(1:3)
226    type(cube_function_t) :: rho_cf
227
228    PUSH_SUB(poisson_isf_solve)
229
230    call cube_function_null(rho_cf)
231    call dcube_function_alloc_RS(cube, rho_cf)
232
233    if(present(sm)) then
234      call dsubmesh_to_cube(sm, rho, cube, rho_cf)
235    else
236      if(mesh%parallel_in_domains) then
237        call dmesh_to_cube(mesh, rho, cube, rho_cf, local=.true.)
238      else
239        call dmesh_to_cube(mesh, rho, cube, rho_cf)
240      end if
241    end if
242
243    ! Choose configuration.
244    i_cnf = SERIAL
245
246#if defined(HAVE_MPI)
247    if(all_nodes) then
248      i_cnf = WORLD
249    else if(mesh%parallel_in_domains) then
250      i_cnf = DOMAIN
251    end if
252#endif
253
254#if !defined(HAVE_MPI)
255    ASSERT(i_cnf == SERIAL)
256#endif
257
258    if(i_cnf == SERIAL) then
259
260      nn(1) = this%cnf(SERIAL)%nfft1
261      nn(2) = this%cnf(SERIAL)%nfft2
262      nn(3) = this%cnf(SERIAL)%nfft3
263      call psolver_kernel(cube%rs_n_global(1), cube%rs_n_global(2), cube%rs_n_global(3),    &
264        nn(1), nn(2), nn(3), &
265        real(mesh%spacing(1), 8), this%cnf(SERIAL)%kernel, rho_cf%dRS)
266
267#if defined(HAVE_MPI)
268    else
269      nn(1) = this%cnf(i_cnf)%nfft1
270      nn(2) = this%cnf(i_cnf)%nfft2
271      nn(3) = this%cnf(i_cnf)%nfft3
272      if (this%cnf(i_cnf)%mpi_grp%size /= -1 .or. i_cnf /= WORLD) then
273        call par_psolver_kernel(cube%rs_n_global(1), cube%rs_n_global(2), cube%rs_n_global(3), &
274          nn(1), nn(2), nn(3), &
275          real(mesh%spacing(1), 8), this%cnf(i_cnf)%kernel, rho_cf%dRS,                      &
276          this%cnf(i_cnf)%mpi_grp%rank, this%cnf(i_cnf)%mpi_grp%size, this%cnf(i_cnf)%mpi_grp%comm)
277      end if
278      ! we need to be sure that the root of every domain-partition has a copy of the potential
279      ! for the moment we broadcast to all nodes, but this is more than what we really need
280      if(i_cnf == WORLD .and. .not. this%cnf(WORLD)%all_nodes) then
281        call MPI_Bcast(rho_cf%drs(1, 1, 1), cube%rs_n_global(1)*cube%rs_n_global(2)*cube%rs_n_global(3), &
282          MPI_FLOAT, 0, this%all_nodes_comm, mpi_err)
283      end if
284#endif
285    end if
286
287    if(present(sm)) then
288      call dcube_to_submesh(cube, rho_cf, sm, pot)
289    else
290      if(mesh%parallel_in_domains) then
291        call dcube_to_mesh(cube, rho_cf, mesh, pot, local=.true.)
292      else
293        call dcube_to_mesh(cube, rho_cf, mesh, pot)
294      end if
295    end if
296
297    call dcube_function_free_RS(cube, rho_cf)
298
299    POP_SUB(poisson_isf_solve)
300  end subroutine poisson_isf_solve
301
302  ! ---------------------------------------------------------
303  subroutine poisson_isf_end(this)
304    type(poisson_isf_t), intent(inout) :: this
305
306#if defined(HAVE_MPI)
307    integer :: i_cnf
308#endif
309
310    PUSH_SUB(poisson_isf_end)
311
312#if defined(HAVE_MPI)
313    do i_cnf = 1, N_CNF
314      SAFE_DEALLOCATE_P(this%cnf(i_cnf)%kernel)
315    end do
316    if(this%cnf(WORLD)%mpi_grp%comm /= MPI_COMM_NULL) then
317      call MPI_Comm_free(this%cnf(WORLD)%mpi_grp%comm, mpi_err)
318    end if
319#else
320    SAFE_DEALLOCATE_P(this%cnf(SERIAL)%kernel)
321#endif
322
323    POP_SUB(poisson_isf_end)
324  end subroutine poisson_isf_end
325
326  ! --------------------------------------------------------------
327
328  !!****h* BigDFT/psolver_kernel
329  !! NAME
330  !!   psolver_kernel
331  !!
332  !! FUNCTION
333  !!    Solver of Poisson equation applying a kernel
334  !!
335  !! SYNOPSIS
336  !!    Poisson solver applying a kernel and
337  !!    using Fourier transform for the convolution.
338  !!    rhopot : input  -> the density
339  !!             output -> the Hartree potential + pot_ion
340  !!    The potential pot_ion is ADDED in the array rhopot.
341  !!    Calculate also the Hartree potential
342  !!
343  !!    Replaces the charge density contained in rhopot
344  !!    by the Hartree stored as well in rhopot.
345  !!    If xc_on is true, it also adds the XC potential and
346  !!    ionic potential pot_ion
347  !!
348  !!    We double the size of the mesh except in one dimension
349  !!    in order to use the property of the density to be real.
350  !! WARNING
351  !!    For the use of FFT routine
352  !!        inzee=1: first part of Z is data (output) array,
353  !!                 second part work array
354  !!        inzee=2: first part of Z is work array, second part data array
355  !!                 real(F(i1,i2,i3))=Z(1,i1,i2,i3,inzee)
356  !!                 imag(F(i1,i2,i3))=Z(2,i1,i2,i3,inzee)
357  !!        inzee on output is in general different from inzee on input
358  !!
359  !! AUTHOR
360  !!    Thierry Deutsch, Luigi Genovese
361  !! COPYRIGHT
362  !!    Copyright (C) 2005 CEA
363  !! CREATION DATE
364  !!    13/07/2005
365  !!
366  !! MODIFICATION HISTORY
367  !!    12/2005 Kernel stored into memory
368  !!    12/2005 Real Kernel FFT and use less memory
369  !!
370  !! SOURCE
371  !!
372  subroutine psolver_kernel(n01, n02, n03, nfft1, nfft2, nfft3, hgrid, karray, rhopot)
373    integer, intent(in)    :: n01
374    integer, intent(in)    :: n02
375    integer, intent(in)    :: n03
376    integer, intent(in)    :: nfft1
377    integer, intent(in)    :: nfft2
378    integer, intent(in)    :: nfft3
379    real(8), intent(in)    :: hgrid
380    real(8), intent(in)    :: karray(nfft1/2 + 1,nfft2/2 + 1, nfft3/2 + 1)
381    real(8), intent(inout) :: rhopot(n01, n02, n03)
382
383    real(8), allocatable :: zarray(:,:,:)
384    real(8) :: factor
385    integer :: n1, n2, n3, nd1, nd2, nd3, n1h, nd1h
386    integer :: inzee, i_sign
387
388    PUSH_SUB(psolver_kernel)
389
390    !Dimension of the FFT
391    call calculate_dimensions(n01, n02, n03, n1, n2, n3)
392
393    !Half size of nd1
394    n1h=n1/2
395    nd1 = n1 + modulo(n1+1,2)
396    nd2 = n2 + modulo(n2+1,2)
397    nd3 = n3 + modulo(n3+1,2)
398    nd1h=(nd1+1)/2
399
400    SAFE_ALLOCATE(zarray(1:2, 1:nd1h*nd2*nd3, 1:2))
401
402    !Set zarray
403    call zarray_in(n01,n02,n03,nd1h,nd2,nd3,rhopot,zarray)
404
405    !FFT
406    !print *,"Do a 3D HalFFT for the density"
407    i_sign=1
408    inzee=1
409    call fft(n1h,n2,n3,nd1h,nd2,nd3,zarray,i_sign,inzee)
410
411    !print *, "Apply the kernel"
412    call kernel_application(n1,n2,n3,nd1h,nd2,nd3,nfft1,nfft2,nfft3,zarray,karray,inzee)
413
414    !Inverse FFT
415    i_sign=-1
416    !print *,"Do a 3D inverse HalFFT"
417    call fft(n1h,n2,n3,nd1h,nd2,nd3,zarray,i_sign,inzee)
418
419    !Recollect the result
420    !We have to multiply by a factor
421    factor = hgrid**3/(n1*n2*n3)
422
423    ! Calling this routine gives only the Hartree potential
424    call zarray_out(n01, n02, n03, nd1h, nd2, nd3, rhopot, zarray(1, 1, inzee), factor)
425
426    SAFE_DEALLOCATE_A(zarray)
427    POP_SUB(psolver_kernel)
428  end subroutine psolver_kernel
429  !!***
430
431  !!****h* BigDFT/kernel_application
432  !! NAME
433  !!   kernel_application
434  !!
435  !! FUNCTION
436  !!    Multiply the FFT of the density by the FFT of the kernel
437  !!
438  !! SYNOPSIS
439  !!    zarray(:,:,:,:,inzee) : IN -> FFT of the density with the x dimension divided by two
440  !!                            (HalFFT), OUT -> FFT of the potential
441  !!    karray                : kernel FFT (real, 1/8 of the total grid)
442  !!    n1h,n2,n3             : dimension of the FFT grid for zarray
443  !!    nd1h,nd2,nd3          : dimensions of zarray
444  !!    nfft1,nfft2,nfft3     : original FFT grid dimensions, to be used for karray dimensions
445  !!
446  !! WARNING
447  !!    We use all the zarray vector, storing the auxiliary part using ouzee=3-inzee
448  !!    All the loop are unrolled such to avoid different conditions
449  !!    the "min" functions are substituted by kink computed with absolute values
450  !! AUTHOR
451  !!    Luigi Genovese
452  !! CREATION DATE
453  !!    March 2006
454  !!
455  !! SOURCE
456  !!
457  subroutine kernel_application(n1,n2,n3,nd1h,nd2,nd3,nfft1,nfft2,nfft3,zarray,karray,inzee)
458    integer, intent(in)    :: n1,n2,n3,nd1h,nd2,nd3,nfft1,nfft2,nfft3,inzee
459    real(8), intent(in)    :: karray(1:nfft1/2 + 1, 1:nfft2/2 + 1, 1:nfft3/2 + 1)
460    real(8), intent(inout) :: zarray(1:2, 1:nd1h, 1:nd2, 1:nd3, 1:2)
461
462    real(8), dimension(:), allocatable :: cos_array,sin_array
463    real(8) :: a,b,c,d,pi2,g1,cp,sp
464    real(8) :: rfe,ife,rfo,ifo,rk,ik,rk2,ik2,re,ro,ie,io,rhk,ihk
465    integer :: i1,i2,i3,j1,j2,j3, ouzee,n1h,n2h,n3h
466    integer :: si1,si2,si3
467
468    PUSH_SUB(kernel_application)
469
470    n1h = n1/2
471    n2h = n2/2
472    n3h = n3/2
473
474    SAFE_ALLOCATE(cos_array(1:n1h + 1))
475    SAFE_ALLOCATE(sin_array(1:n1h + 1))
476
477    pi2=8.d0*datan(1.d0)
478    pi2=pi2/real(n1,8)
479    do i1 = 1,n1h+1
480      cos_array(i1)=dcos(pi2*(i1-1))
481      sin_array(i1)=-dsin(pi2*(i1-1))
482    end do
483
484    ouzee=3-inzee
485
486    !--------------------------------------------!
487    !--- Starting reconstruction half -> full ---!
488    !--------------------------------------------!
489
490    !-------------Case i3 = 1
491    i3=1
492    j3=1
493    si3=1
494
495    !-------------Case i2 = 1, i3 = 1
496    i2=1
497    j2=1
498    si2=1
499
500    !Case i1 == 1
501    i1=1
502    si1=1
503    a=zarray(1,i1,i2,i3,inzee)
504    b=zarray(2,i1,i2,i3,inzee)
505    c=zarray(1,si1,si2,si3,inzee)
506    d=zarray(2,si1,si2,si3,inzee)
507    rfe=.5d0*(a+c)
508    ife=.5d0*(b-d)
509    rfo=.5d0*(a-c)
510    ifo=.5d0*(b+d)
511    cp=cos_array(i1)
512    sp=sin_array(i1)
513    rk=rfe+cp*ifo-sp*rfo
514    ik=ife-cp*rfo-sp*ifo
515    g1=karray(i1,j2,j3)
516    rk2=rk*g1
517    ik2=ik*g1
518
519    zarray(1,1,i2,i3,ouzee) = rk2
520    zarray(2,1,i2,i3,ouzee) = ik2
521
522    !Case i1=2,n1h
523    do i1=2,n1h
524      si1=n1h+2-i1
525
526      a=zarray(1,i1,i2,i3,inzee)
527      b=zarray(2,i1,i2,i3,inzee)
528      c=zarray(1,si1,si2,si3,inzee)
529      d=zarray(2,si1,si2,si3,inzee)
530      rfe=.5d0*(a+c)
531      ife=.5d0*(b-d)
532      rfo=.5d0*(a-c)
533      ifo=.5d0*(b+d)
534      cp=cos_array(i1)
535      sp=sin_array(i1)
536      rk=rfe+cp*ifo-sp*rfo
537      ik=ife-cp*rfo-sp*ifo
538      g1=karray(i1,j2,j3)
539      rk2=rk*g1
540      ik2=ik*g1
541
542      zarray(1,i1,i2,i3,ouzee) = rk2
543      zarray(2,i1,i2,i3,ouzee) = ik2
544    end do
545
546    !Case i1=n1h+1
547    i1=n1h+1
548    si1=n1h+2-i1
549
550    a=zarray(1,1,i2,i3,inzee) !beware here i1 -> 1
551    b=zarray(2,1,i2,i3,inzee) !beware here i1 -> 1
552    c=zarray(1,si1,si2,si3,inzee)
553    d=zarray(2,si1,si2,si3,inzee)
554    rfe=.5d0*(a+c)
555    ife=.5d0*(b-d)
556    rfo=.5d0*(a-c)
557    ifo=.5d0*(b+d)
558    cp=cos_array(i1)
559    sp=sin_array(i1)
560    rk=rfe+cp*ifo-sp*rfo
561    ik=ife-cp*rfo-sp*ifo
562    g1=karray(i1,j2,j3)
563    rk2=rk*g1
564    ik2=ik*g1
565
566    zarray(1,i1,i2,i3,ouzee) = rk2
567    zarray(2,i1,i2,i3,ouzee) = ik2
568    !-------------END case i2 = 1 , i3=1
569
570    !case i2 >=2
571    do i2=2,n2
572      j2=n2h+1-abs(n2h+1-i2)
573      si2=n2+2-i2 !if i2 /=1, otherwise si2=1
574
575      !Case i1 == 1
576      i1=1
577      si1=1
578      a=zarray(1,i1,i2,i3,inzee)
579      b=zarray(2,i1,i2,i3,inzee)
580      c=zarray(1,si1,si2,si3,inzee)
581      d=zarray(2,si1,si2,si3,inzee)
582      rfe=.5d0*(a+c)
583      ife=.5d0*(b-d)
584      rfo=.5d0*(a-c)
585      ifo=.5d0*(b+d)
586      cp=cos_array(i1)
587      sp=sin_array(i1)
588      rk=rfe+cp*ifo-sp*rfo
589      ik=ife-cp*rfo-sp*ifo
590      g1=karray(i1,j2,j3)
591      rk2=rk*g1
592      ik2=ik*g1
593
594      zarray(1,1,i2,i3,ouzee) = rk2
595      zarray(2,1,i2,i3,ouzee) = ik2
596
597      !Case i1=2,n1h
598      do i1=2,n1h
599        si1=n1h+2-i1
600
601        a=zarray(1,i1,i2,i3,inzee)
602        b=zarray(2,i1,i2,i3,inzee)
603        c=zarray(1,si1,si2,si3,inzee)
604        d=zarray(2,si1,si2,si3,inzee)
605        rfe=.5d0*(a+c)
606        ife=.5d0*(b-d)
607        rfo=.5d0*(a-c)
608        ifo=.5d0*(b+d)
609        cp=cos_array(i1)
610        sp=sin_array(i1)
611        rk=rfe+cp*ifo-sp*rfo
612        ik=ife-cp*rfo-sp*ifo
613        g1=karray(i1,j2,j3)
614        rk2=rk*g1
615        ik2=ik*g1
616
617        zarray(1,i1,i2,i3,ouzee) = rk2
618        zarray(2,i1,i2,i3,ouzee) = ik2
619      end do
620
621      !Case i1=n1h+1
622      i1=n1h+1
623      si1=n1h+2-i1
624
625      a=zarray(1,1,i2,i3,inzee) !beware here i1 -> 1
626      b=zarray(2,1,i2,i3,inzee) !beware here i1 -> 1
627      c=zarray(1,si1,si2,si3,inzee)
628      d=zarray(2,si1,si2,si3,inzee)
629      rfe=.5d0*(a+c)
630      ife=.5d0*(b-d)
631      rfo=.5d0*(a-c)
632      ifo=.5d0*(b+d)
633      cp=cos_array(i1)
634      sp=sin_array(i1)
635      rk=rfe+cp*ifo-sp*rfo
636      ik=ife-cp*rfo-sp*ifo
637      g1=karray(i1,j2,j3)
638      rk2=rk*g1
639      ik2=ik*g1
640
641      zarray(1,i1,i2,i3,ouzee) = rk2
642      zarray(2,i1,i2,i3,ouzee) = ik2
643    end do
644    !-------------END Case i3 = 1
645
646    !case i3 >=2
647    do i3=2,n3
648      j3=n3h+1-abs(n3h+1-i3)
649      si3=n3+2-i3 !if i3 /=1, otherwise si3=1
650
651      !-------------Case i2 = 1
652      i2=1
653      j2=1
654      si2=1
655
656      !Case i1 == 1
657      i1=1
658      si1=1
659      a=zarray(1,i1,i2,i3,inzee)
660      b=zarray(2,i1,i2,i3,inzee)
661      c=zarray(1,si1,si2,si3,inzee)
662      d=zarray(2,si1,si2,si3,inzee)
663      rfe=.5d0*(a+c)
664      ife=.5d0*(b-d)
665      rfo=.5d0*(a-c)
666      ifo=.5d0*(b+d)
667      cp=cos_array(i1)
668      sp=sin_array(i1)
669      rk=rfe+cp*ifo-sp*rfo
670      ik=ife-cp*rfo-sp*ifo
671      g1=karray(i1,j2,j3)
672      rk2=rk*g1
673      ik2=ik*g1
674
675      zarray(1,1,i2,i3,ouzee) = rk2
676      zarray(2,1,i2,i3,ouzee) = ik2
677
678      !Case i1=2,n1h
679      do i1=2,n1h
680        si1=n1h+2-i1
681
682        a=zarray(1,i1,i2,i3,inzee)
683        b=zarray(2,i1,i2,i3,inzee)
684        c=zarray(1,si1,si2,si3,inzee)
685        d=zarray(2,si1,si2,si3,inzee)
686        rfe=.5d0*(a+c)
687        ife=.5d0*(b-d)
688        rfo=.5d0*(a-c)
689        ifo=.5d0*(b+d)
690        cp=cos_array(i1)
691        sp=sin_array(i1)
692        rk=rfe+cp*ifo-sp*rfo
693        ik=ife-cp*rfo-sp*ifo
694        g1=karray(i1,j2,j3)
695        rk2=rk*g1
696        ik2=ik*g1
697
698        zarray(1,i1,i2,i3,ouzee) = rk2
699        zarray(2,i1,i2,i3,ouzee) = ik2
700      end do
701
702      !Case i1=n1h+1
703      i1=n1h+1
704      si1=n1h+2-i1
705
706      a=zarray(1,1,i2,i3,inzee) !beware here i1 -> 1
707      b=zarray(2,1,i2,i3,inzee) !beware here i1 -> 1
708      c=zarray(1,si1,si2,si3,inzee)
709      d=zarray(2,si1,si2,si3,inzee)
710      rfe=.5d0*(a+c)
711      ife=.5d0*(b-d)
712      rfo=.5d0*(a-c)
713      ifo=.5d0*(b+d)
714      cp=cos_array(i1)
715      sp=sin_array(i1)
716      rk=rfe+cp*ifo-sp*rfo
717      ik=ife-cp*rfo-sp*ifo
718      g1=karray(i1,j2,j3)
719      rk2=rk*g1
720      ik2=ik*g1
721
722      zarray(1,i1,i2,i3,ouzee) = rk2
723      zarray(2,i1,i2,i3,ouzee) = ik2
724      !-------------END case i2 = 1
725
726      !case i2 >=2
727      do i2=2,n2
728        j2=n2h+1-abs(n2h+1-i2)
729        si2=n2+2-i2 !if i2 /=1, otherwise si2=1
730
731        !Case i1 == 1
732        i1=1
733        si1=1
734        a=zarray(1,i1,i2,i3,inzee)
735        b=zarray(2,i1,i2,i3,inzee)
736        c=zarray(1,si1,si2,si3,inzee)
737        d=zarray(2,si1,si2,si3,inzee)
738        rfe=.5d0*(a+c)
739        ife=.5d0*(b-d)
740        rfo=.5d0*(a-c)
741        ifo=.5d0*(b+d)
742        cp=cos_array(i1)
743        sp=sin_array(i1)
744        rk=rfe+cp*ifo-sp*rfo
745        ik=ife-cp*rfo-sp*ifo
746        g1=karray(i1,j2,j3)
747        rk2=rk*g1
748        ik2=ik*g1
749
750        zarray(1,1,i2,i3,ouzee) = rk2
751        zarray(2,1,i2,i3,ouzee) = ik2
752
753        !Case i1=2,n1h
754        do i1=2,n1h
755          si1=n1h+2-i1
756
757          a=zarray(1,i1,i2,i3,inzee)
758          b=zarray(2,i1,i2,i3,inzee)
759          c=zarray(1,si1,si2,si3,inzee)
760          d=zarray(2,si1,si2,si3,inzee)
761          rfe=.5d0*(a+c)
762          ife=.5d0*(b-d)
763          rfo=.5d0*(a-c)
764          ifo=.5d0*(b+d)
765          cp=cos_array(i1)
766          sp=sin_array(i1)
767          rk=rfe+cp*ifo-sp*rfo
768          ik=ife-cp*rfo-sp*ifo
769          g1=karray(i1,j2,j3)
770          rk2=rk*g1
771          ik2=ik*g1
772
773          zarray(1,i1,i2,i3,ouzee) = rk2
774          zarray(2,i1,i2,i3,ouzee) = ik2
775        end do
776
777        !Case i1=n1h+1
778        i1=n1h+1
779        si1=n1h+2-i1
780
781        a=zarray(1,1,i2,i3,inzee) !beware here i1 -> 1
782        b=zarray(2,1,i2,i3,inzee) !beware here i1 -> 1
783        c=zarray(1,si1,si2,si3,inzee)
784        d=zarray(2,si1,si2,si3,inzee)
785        rfe=.5d0*(a+c)
786        ife=.5d0*(b-d)
787        rfo=.5d0*(a-c)
788        ifo=.5d0*(b+d)
789        cp=cos_array(i1)
790        sp=sin_array(i1)
791        rk=rfe+cp*ifo-sp*rfo
792        ik=ife-cp*rfo-sp*ifo
793        g1=karray(i1,j2,j3)
794        rk2=rk*g1
795        ik2=ik*g1
796
797        zarray(1,i1,i2,i3,ouzee) = rk2
798        zarray(2,i1,i2,i3,ouzee) = ik2
799      end do
800
801    end do
802
803
804    !--------------------------------------------!
805    !--- Starting reconstruction full -> half ---!
806    !--------------------------------------------!
807
808    !case i3=1
809    i3=1
810    j3=1
811    !case i2=1
812    i2=1
813    j2=1
814    do i1 = 1,n1h
815      j1=n1h+2-i1
816
817      a=zarray(1,i1,i2,i3,ouzee)
818      b=zarray(2,i1,i2,i3,ouzee)
819      c=zarray(1,j1,j2,j3,ouzee)
820      d=-zarray(2,j1,j2,j3,ouzee)
821      cp=cos_array(i1)
822      sp=sin_array(i1)
823      re=(a+c)
824      ie=(b+d)
825      ro=(a-c)*cp-(b-d)*sp
826      io=(a-c)*sp+(b-d)*cp
827      rhk=re-io
828      ihk=ie+ro
829
830      zarray(1,i1,i2,i3,inzee)=rhk
831      zarray(2,i1,i2,i3,inzee)=ihk
832    end do
833    !case i2 >= 2
834    do i2=2,n2
835      j2=nd2+1-i2
836      do i1 = 1,n1h
837        j1=n1h+2-i1
838
839        a=zarray(1,i1,i2,i3,ouzee)
840        b=zarray(2,i1,i2,i3,ouzee)
841        c=zarray(1,j1,j2,j3,ouzee)
842        d=-zarray(2,j1,j2,j3,ouzee)
843        cp=cos_array(i1)
844        sp=sin_array(i1)
845        re=(a+c)
846        ie=(b+d)
847        ro=(a-c)*cp-(b-d)*sp
848        io=(a-c)*sp+(b-d)*cp
849        rhk=re-io
850        ihk=ie+ro
851
852        zarray(1,i1,i2,i3,inzee)=rhk
853        zarray(2,i1,i2,i3,inzee)=ihk
854      end do
855    end do
856
857
858    !case i3 >=2
859    do i3=2,n3
860      j3=nd3+1-i3
861      !case i2=1
862      i2=1
863      j2=1
864      do i1 = 1,n1h
865        j1=n1h+2-i1
866
867        a=zarray(1,i1,i2,i3,ouzee)
868        b=zarray(2,i1,i2,i3,ouzee)
869        c=zarray(1,j1,j2,j3,ouzee)
870        d=-zarray(2,j1,j2,j3,ouzee)
871        cp=cos_array(i1)
872        sp=sin_array(i1)
873        re=(a+c)
874        ie=(b+d)
875        ro=(a-c)*cp-(b-d)*sp
876        io=(a-c)*sp+(b-d)*cp
877        rhk=re-io
878        ihk=ie+ro
879
880        zarray(1,i1,i2,i3,inzee)=rhk
881        zarray(2,i1,i2,i3,inzee)=ihk
882      end do
883      !case i2 >= 2
884      do i2=2,n2
885        j2=nd2+1-i2
886        do i1 = 1,n1h
887          j1=n1h+2-i1
888
889          a=zarray(1,i1,i2,i3,ouzee)
890          b=zarray(2,i1,i2,i3,ouzee)
891          c=zarray(1,j1,j2,j3,ouzee)
892          d=-zarray(2,j1,j2,j3,ouzee)
893          cp=cos_array(i1)
894          sp=sin_array(i1)
895          re=(a+c)
896          ie=(b+d)
897          ro=(a-c)*cp-(b-d)*sp
898          io=(a-c)*sp+(b-d)*cp
899          rhk=re-io
900          ihk=ie+ro
901
902          zarray(1,i1,i2,i3,inzee)=rhk
903          zarray(2,i1,i2,i3,inzee)=ihk
904        end do
905      end do
906
907    end do
908
909    !De-allocations
910    SAFE_DEALLOCATE_A(cos_array)
911    SAFE_DEALLOCATE_A(sin_array)
912
913    POP_SUB(kernel_application)
914  end subroutine kernel_application
915
916  !!****h* BigDFT/norm_ind
917  !! NAME
918  !!   norm_ind
919  !!
920  !! FUNCTION
921  !!   Index in zarray
922  !!
923  !! SOURCE
924  !!
925  subroutine norm_ind(nd1,nd2,nd3,i1,i2,i3,ind)
926    integer :: nd1,nd2,nd3,i1,i2,i3
927    integer :: ind
928
929    !Local variables
930    integer :: a1,a2,a3
931    if ( i1 == nd1 ) then
932      a1=1
933    else
934      a1=i1
935    end if
936    if ( i2 == nd2 ) then
937      a2=1
938    else
939      a2=i2
940    end if
941    if ( i3 == nd3 ) then
942      a3=1
943    else
944      a3=i3
945    end if
946    ind=a1+nd1*(a2-1)+nd1*nd2*(a3-1)
947  end subroutine norm_ind
948  !!***
949
950
951  !!****h* BigDFT/symm_ind
952  !! NAME
953  !!   symm_ind
954  !!
955  !! FUNCTION
956  !!   Index in zarray for -g vector
957  !!
958  !! SOURCE
959  !!
960  subroutine symm_ind(nd1,nd2,nd3,i1,i2,i3,ind)
961    integer :: nd1,nd2,nd3,i1,i2,i3
962    integer :: ind
963
964    integer ::  a1,a2,a3
965    if (i1 /= 1) then
966      a1=nd1+1-i1
967    else
968      a1=i1
969    end if
970    if (i2 /= 1) then
971      a2=nd2+1-i2
972    else
973      a2=i2
974    end if
975    if (i3 /= 1) then
976      a3=nd3+1-i3
977    else
978      a3=i3
979    end if
980    ind=a1+nd1*(a2-1)+nd1*nd2*(a3-1)
981  end subroutine symm_ind
982  !!***
983
984  !!****h* BigDFT/zarray_in
985  !! NAME
986  !!   zarray_in
987  !!
988  !! FUNCTION
989  !!   Put the density into zarray
990  !!
991  !! SOURCE
992  !!
993  subroutine zarray_in(n01,n02,n03,nd1,nd2,nd3,density,zarray)
994    integer :: n01,n02,n03,nd1,nd2,nd3
995    real(8), dimension(n01,n02,n03) :: density
996    real(8), dimension(2,nd1,nd2,nd3) :: zarray
997
998    integer :: i1,i2,i3,n01h,nd1hm,nd3hm,nd2hm
999
1000    PUSH_SUB(zarray_in)
1001
1002    !Half the size of n01
1003    n01h=n01/2
1004    nd1hm=(nd1-1)/2
1005    nd2hm=(nd2-1)/2
1006    nd3hm=(nd3-1)/2
1007    !Set to zero
1008    do i3 = 1,nd3
1009      do i2 = 1,nd2
1010        do i1 = 1,nd1
1011          zarray(1,i1,i2,i3) = 0.0_8
1012          zarray(2,i1,i2,i3) = 0.0_8
1013        end do
1014      end do
1015    end do
1016    !Set zarray
1017    do i3 = 1,n03
1018      do i2 = 1,n02
1019        do i1 = 1,n01h
1020          zarray(1,i1+nd1hm,i2+nd2hm,i3+nd3hm) = density(2*i1-1,i2,i3)
1021          zarray(2,i1+nd1hm,i2+nd2hm,i3+nd3hm) = density(2*i1,i2,i3)
1022        end do
1023      end do
1024    end do
1025    if(modulo(n01,2) == 1) then
1026      do i3 = 1,n03
1027        do i2 = 1,n02
1028          zarray(1,n01h+1+nd1hm,i2+nd2hm,i3+nd3hm) = density(n01,i2,i3)
1029        end do
1030      end do
1031    end if
1032
1033    POP_SUB(zarray_in)
1034  end subroutine zarray_in
1035  !!***
1036
1037
1038  !!****h* BigDFT/zarray_out
1039  !! NAME
1040  !!   zarray_out
1041  !!
1042  !! FUNCTION
1043  !!   Set the potential (rhopot) from zarray
1044  !!   Calculate the Hartree energy.
1045  !!
1046  !! SOURCE
1047  !!
1048  subroutine zarray_out(n01, n02, n03, nd1, nd2, nd3, rhopot, zarray, factor)
1049    integer, intent(in)    :: n01,n02,n03,nd1,nd2,nd3
1050    real(8), intent(out)   :: rhopot(n01,n02,n03)
1051    real(8), intent(in)    :: zarray(2*nd1,nd2,nd3)  ! Convert zarray(2,nd1,nd2,nd3) -> zarray(2*nd1,nd2,nd3) to use i1=1,n01
1052                                                     ! instead of i1=1,n1h + special case for modulo(n01,2)
1053    real(8), intent(in)    :: factor
1054
1055    integer :: i1,i2,i3
1056
1057    PUSH_SUB(zarray_out)
1058
1059    do i3 = 1,n03
1060      do i2 = 1,n02
1061        do i1 = 1,n01
1062          rhopot(i1, i2, i3) = factor*zarray(i1,i2,i3)
1063        end do
1064      end do
1065    end do
1066
1067    POP_SUB(zarray_out)
1068  end subroutine zarray_out
1069  !!***
1070
1071  !!****h* BigDFT/build_kernel
1072  !! NAME
1073  !!   build_kernel
1074  !!
1075  !! FUNCTION
1076  !!    Build the kernel of a gaussian function
1077  !!    for interpolating scaling functions.
1078  !!
1079  !! SYNOPSIS
1080  !!    Build the kernel (karrayout) of a gaussian function
1081  !!    for interpolating scaling functions
1082  !!    $$ K(j) = \int \int \phi(x) g(x`-x) \delta(x`- j) dx dx` $$
1083  !!
1084  !!    n01,n02,n03     Mesh dimensions of the density
1085  !!    n1k,n2k,n3k     Dimensions of the kernel
1086  !!    hgrid           Mesh step
1087  !!    itype_scf       Order of the scaling function (8,14,16)
1088  !!
1089  !! AUTHORS
1090  !!    T. Deutsch, L. Genovese
1091  !! COPYRIGHT
1092  !!    Copyright (C) 2005 CEA
1093  !! CREATION DATE
1094  !!    13/07/2005
1095  !!
1096  !! MODIFICATION HISTORY
1097  !!    13/09/2005 Use hgrid instead of acell
1098  !!    09/12/2005 Real kernel, stocked only half components
1099  !!    13/12/2005 Add routines to simplify the port into Stefan`s code
1100  !!
1101  !! SOURCE
1102  !!
1103  subroutine build_kernel(n01,n02,n03,nfft1,nfft2,nfft3,hgrid,itype_scf,karrayout)
1104    integer, intent(in) :: n01,n02,n03,nfft1,nfft2,nfft3,itype_scf
1105    real(8), intent(in) :: hgrid
1106    real(8), dimension(nfft1/2+1,nfft2/2+1,nfft3/2+1), intent(out) :: karrayout
1107
1108    !Do not touch !!!!
1109    integer, parameter :: N_GAUSS = 89
1110    !Better if higher (1024 points are enough 10^{-14}: 2*itype_scf*n_points)
1111    integer, parameter :: n_points = 2**6
1112
1113    !Better p_gauss for calculation
1114    !(the support of the exponential should be inside [-n_range/2,n_range/2])
1115    real(8), parameter :: p0_ref = 1.d0
1116    real(8), dimension(N_GAUSS) :: p_gauss,w_gauss
1117
1118    real(8), allocatable :: kernel_scf(:), kern_1_scf(:)
1119    real(8), allocatable :: x_scf(:), y_scf(:)
1120    real(8), allocatable :: karrayhalf(:, :, :)
1121
1122    real(8) :: ur_gauss,dr_gauss,acc_gauss,pgauss,kern,a_range
1123    real(8) :: factor,factor2,dx,absci,p0gauss,p0_cell
1124    real(8) :: a1,a2,a3
1125    integer :: nd1,nd2,nd3,n1k,n2k,n3k,n_scf
1126    integer :: i_gauss,n_range,n_cell
1127    integer :: i,n_iter,i1,i2,i3,i_kern
1128    integer :: i01,i02,i03,inkee,n1h,n2h,n3h,nd1h
1129
1130    PUSH_SUB(build_kernel)
1131
1132    !Number of integration points : 2*itype_scf*n_points
1133    n_scf=2*itype_scf*n_points
1134    !Dimensions of Kernel
1135    n1k=nfft1/2+1 ; n2k=nfft2/2+1 ; n3k=nfft3/2+1
1136    n1h=nfft1/2  ; n2h=nfft2/2 ; n3h=nfft3/2
1137    nd1 = nfft1 + modulo(nfft1+1,2)
1138    nd2 = nfft2 + modulo(nfft2+1,2)
1139    nd3 = nfft3 + modulo(nfft3+1,2)
1140
1141    !Half size for the half FFT
1142    nd1h=(nd1+1)/2
1143
1144    !Allocations
1145    SAFE_ALLOCATE(x_scf(0:n_scf))
1146    SAFE_ALLOCATE(y_scf(0:n_scf))
1147
1148    !Build the scaling function
1149    call scaling_function(itype_scf,n_scf,n_range,x_scf,y_scf)
1150    !Step grid for the integration
1151    dx = real(n_range,8)/real(n_scf,8)
1152    !Extend the range (no more calculations because fill in by 0.0_8)
1153    n_cell = max(n01,n02,n03)
1154    n_range = max(n_cell,n_range)
1155
1156    !Allocations
1157    SAFE_ALLOCATE(kernel_scf(-n_range:n_range))
1158    SAFE_ALLOCATE(kern_1_scf(-n_range:n_range))
1159
1160    !Lengthes of the box (use FFT dimension)
1161    a1 = hgrid * real(n01,8)
1162    a2 = hgrid * real(n02,8)
1163    a3 = hgrid * real(n03,8)
1164
1165    x_scf(:) = hgrid * x_scf(:)
1166    y_scf(:) = 1.d0/hgrid * y_scf(:)
1167    dx = hgrid * dx
1168    !To have a correct integration
1169    p0_cell = p0_ref/(hgrid*hgrid)
1170
1171    !Initialisation of the gaussian (Beylkin)
1172    call gequad(N_GAUSS,p_gauss,w_gauss,ur_gauss,dr_gauss,acc_gauss)
1173    !In order to have a range from a_range=sqrt(a1*a1+a2*a2+a3*a3)
1174    !(biggest length in the cube)
1175    !We divide the p_gauss by a_range**2 and a_gauss by a_range
1176    a_range = sqrt(a1*a1+a2*a2+a3*a3)
1177    factor = 1.d0/a_range
1178    !factor2 = factor*factor
1179    factor2 = 1.d0/(a1*a1+a2*a2+a3*a3)
1180    do i_gauss=1,N_GAUSS
1181      p_gauss(i_gauss) = factor2*p_gauss(i_gauss)
1182    end do
1183    do i_gauss=1,N_GAUSS
1184      w_gauss(i_gauss) = factor*w_gauss(i_gauss)
1185    end do
1186
1187    karrayout(:,:,:) = 0.0_8
1188
1189    !Use in this order (better for accuracy).
1190    loop_gauss: do i_gauss=N_GAUSS,1,-1
1191      !Gaussian
1192      pgauss = p_gauss(i_gauss)
1193
1194      !We calculate the number of iterations to go from pgauss to p0_ref
1195      n_iter = nint((log(pgauss) - log(p0_cell))/log(4.d0))
1196      if (n_iter <= 0)then
1197        n_iter = 0
1198        p0gauss = pgauss
1199      else
1200        p0gauss = pgauss/4.d0**n_iter
1201      end if
1202
1203      !Stupid integration
1204      !Do the integration with the exponential centered in i_kern
1205      kernel_scf(:) = 0.0_8
1206      do i_kern=0,n_range
1207        kern = 0.0_8
1208        do i=0,n_scf
1209          absci = x_scf(i) - real(i_kern,8)*hgrid
1210          absci = absci*absci
1211          kern = kern + y_scf(i)*exp(-p0gauss*absci)*dx
1212        end do
1213        kernel_scf(i_kern) = kern
1214        kernel_scf(-i_kern) = kern
1215        if (abs(kern) < 1.d-18) then
1216          !Too small not useful to calculate
1217          exit
1218        end if
1219      end do
1220
1221      !Start the iteration to go from p0gauss to pgauss
1222      call scf_recursion(itype_scf,n_iter,n_range,kernel_scf,kern_1_scf)
1223
1224      !Add to the kernel.
1225      do i3 = 1,n03
1226        i03 = i3-1
1227        do i2 = 1,n02
1228          i02 = i2-1
1229          do i1 = 1,n01
1230            i01 = i1-1
1231            karrayout(i1,i2,i3) = karrayout(i1,i2,i3) + w_gauss(i_gauss)* &
1232              kernel_scf(i01)*kernel_scf(i02)*kernel_scf(i03)
1233          end do
1234        end do
1235      end do
1236
1237    end do loop_gauss
1238
1239    SAFE_DEALLOCATE_A(kernel_scf)
1240    SAFE_DEALLOCATE_A(kern_1_scf)
1241    SAFE_DEALLOCATE_A(x_scf)
1242    SAFE_DEALLOCATE_A(y_scf)
1243
1244    !Set karray
1245    SAFE_ALLOCATE(karrayhalf(1:2, 1:nd1h*nd2*nd3, 1:2))
1246
1247    !Set karray : use mirror symmetries
1248    inkee=1
1249    call karrayhalf_in(n01,n02,n03,n1k,n2k,n3k,nfft1,nfft2,nfft3,nd1,nd2,nd3,&
1250      karrayout,karrayhalf)
1251    call fft(n1h,nfft2,nfft3,nd1h,nd2,nd3,karrayhalf,1,inkee)
1252    !Reconstruct the real kernel
1253    call kernel_recon(n1k,n2k,n3k,nfft1,nfft2,nfft3,nd1,nd2,nd3,&
1254      karrayhalf(1,1,inkee),karrayout)
1255
1256    SAFE_DEALLOCATE_A(karrayhalf)
1257    POP_SUB(build_kernel)
1258  end subroutine Build_Kernel
1259  !!***
1260
1261
1262  !!****h* BigDFT/calculate_dimensions
1263  !! NAME
1264  !!   calculate_dimensions
1265  !!
1266  !! FUNCTION
1267  !!   Give the dimensions of the FFT
1268  !!
1269  !! SOURCE
1270  !!
1271  subroutine calculate_dimensions(n01,n02,n03,nfft1,nfft2,nfft3)
1272    integer, intent(in) :: n01,n02,n03
1273    integer, intent(out) :: nfft1,nfft2,nfft3
1274
1275    integer :: i1,i2,i3,l1
1276
1277    PUSH_SUB(calculate_dimensions)
1278
1279    !Test 2*n01, 2*n02, 2*n03
1280    !write(*,*) 'in dimensions_fft',n01,n02,n03
1281    i1=2*n01
1282    i2=2*n02
1283    i3=2*n03
1284    do
1285      call fourier_dim(i1,nfft1)
1286      call fourier_dim(nfft1/2,l1)
1287      if (modulo(nfft1,2) == 0 .and. modulo(l1,2) == 0 .and. 2*l1 == nfft1) then
1288        exit
1289      end if
1290      i1=i1+1
1291    end do
1292    do
1293      call fourier_dim(i2,nfft2)
1294      if (modulo(nfft2,2) == 0) then
1295        exit
1296      end if
1297      i2=i2+1
1298    end do
1299    do
1300      call fourier_dim(i3,nfft3)
1301      if (modulo(nfft3,2) == 0) then
1302        exit
1303      end if
1304      i3=i3+1
1305    end do
1306    !nd1 = nfft1 + modulo(nfft1+1,2)
1307    !nd2 = nfft2 + modulo(nfft2+1,2)
1308    !nd3 = nfft3 + modulo(nfft3+1,2)
1309    !write(*,*) 'out dimensions_fft',nfft1,nfft2,nfft3
1310
1311    POP_SUB(calculate_dimensions)
1312  end subroutine calculate_dimensions
1313  !!***
1314
1315
1316  !!****h* BigDFT/karrayhalf_in
1317  !! NAME
1318  !!   karrayhalf_in
1319  !!
1320  !! FUNCTION
1321  !!    Put in the array for4446666444 FFT
1322  !!
1323  !! SOURCE
1324  !!
1325  subroutine karrayhalf_in(n01,n02,n03,n1k,n2k,n3k,nfft1,nfft2,nfft3,nd1,nd2,nd3, kernel,karrayhalf)
1326    integer, intent(in) :: n01,n02,n03,n1k,n2k,n3k,nfft1,nfft2,nfft3,nd1,nd2,nd3
1327    real(8), dimension(n1k,n2k,n3k), intent(in) :: kernel
1328    real(8), dimension(2,(nd1+1)/2,nd2,nd3), intent(out) :: karrayhalf
1329
1330    real(8), dimension(:), allocatable :: karray
1331    integer :: i1,i2,i3,nd1h,n1h,n2h,n3h
1332
1333    PUSH_SUB(karrayhalf_in)
1334
1335    !Body
1336    n1h=nfft1/2
1337    n2h=nfft2/2
1338    n3h=nfft3/2
1339
1340    SAFE_ALLOCATE(karray(1:nfft1))
1341
1342    nd1h=(nd1+1)/2
1343    karrayhalf(:,:,:,:) = 0.0_8
1344    do i3 = 1,n03
1345      do i2 = 1,n02
1346        karray(:) = 0.0_8
1347        do i1 = 1,n01
1348          karray(i1+n1h) = kernel(i1,i2,i3)
1349        end do
1350        do i1 = 2,n01
1351          karray(n1h-i1+1+nd1-nfft1) = kernel(i1,i2,i3)
1352        end do
1353        do i1 = 1,n1h
1354          karrayhalf(1,i1,i2+n2h,i3+n3h) = karray(2*i1-1)
1355          karrayhalf(2,i1,i2+n2h,i3+n3h) = karray(2*i1)
1356        end do
1357      end do
1358      do i2 = 2,n02
1359        do i1 = 1,nd1h
1360          karrayhalf(:,i1,n2h-i2+1+nd2-nfft2,i3+n3h) = &
1361            karrayhalf(:,i1,i2+n2h,i3+n3h)
1362        end do
1363      end do
1364    end do
1365    do i3 = 2,n03
1366      do i2 = 1,nd2
1367        do i1 = 1,nd1h
1368          karrayhalf(:,i1,i2,n3h-i3+1+nd3-nfft3) = karrayhalf(:,i1,i2,i3+n3h)
1369        end do
1370      end do
1371    end do
1372
1373    SAFE_DEALLOCATE_A(karray)
1374    POP_SUB(karrayhalf_in)
1375  end subroutine karrayhalf_in
1376  !!***
1377
1378
1379  !!****h* BigDFT/kernel_recon
1380  !! NAME
1381  !!   kernel_recon
1382  !!
1383  !! FUNCTION
1384  !!    Reconstruction of the kernel from the FFT array zarray
1385  !!    We keep only the half kernel in each direction (x,y,z).
1386  !!
1387  !! SOURCE
1388  !!
1389  subroutine kernel_recon(n1k,n2k,n3k,nfft1,nfft2,nfft3,nd1,nd2,nd3,zarray,karray)
1390    integer, intent(in) :: n1k,n2k,n3k,nfft1,nfft2,nfft3,nd1,nd2,nd3
1391    real(8), dimension(2,(nd1+1)/2*nd2*nd3), intent(in) :: zarray
1392    real(8), dimension(n1k,n2k,n3k), intent(out) :: karray
1393
1394    real(8), dimension(:), allocatable :: cos_array,sin_array
1395    integer :: i1,i2,i3,ind1,ind2,nd1h,n1h,n2h,n3h
1396    real(8) :: rfe,ife,rfo,ifo,cp,sp,rk,ik,a,b,c,d,pi2
1397
1398    PUSH_SUB(kernel_recon)
1399
1400    !Body
1401    n1h=nfft1/2
1402    n2h=nfft2/2
1403    n3h=nfft3/2
1404    nd1h=(nd1+1)/2
1405    pi2=8.d0*datan(1.d0)
1406    pi2=pi2/real(nfft1,8)
1407
1408    SAFE_ALLOCATE(cos_array(1:nd1h))
1409    SAFE_ALLOCATE(sin_array(1:nd1h))
1410
1411    do i1 = 1,nd1h
1412      cos_array(i1)= dcos(pi2*(i1-1))
1413      sin_array(i1)=-dsin(pi2*(i1-1))
1414    end do
1415    do i3 = 1,n3h+1
1416      do i2 = 1,n2h+1
1417        do i1 = 1,nd1h
1418          call norm_ind(nd1h,nd2,nd3,i1,i2,i3,ind1)
1419          call symm_ind(nd1h,nd2,nd3,i1,i2,i3,ind2)
1420          a=zarray(1,ind1)
1421          b=zarray(2,ind1)
1422          c=zarray(1,ind2)
1423          d=zarray(2,ind2)
1424          rfe=0.5d0*(a+c)
1425          ife=0.5d0*(b-d)
1426          rfo=0.5d0*(a-c)
1427          ifo=0.5d0*(b+d)
1428          cp=cos_array(i1)
1429          sp=sin_array(i1)
1430          rk=rfe+cp*ifo-sp*rfo
1431          ik=ife-cp*rfo-sp*ifo
1432          !For big dimension 1.d-9 otherwise 1.d-10
1433          !Remove the test
1434          !if(abs(ik) >= 1.d-10) then
1435          !   print *,"non real kernel FFT",i1,i2,i3,ik
1436          !   stop
1437          !end if
1438          !Build the intermediate FFT convolution (full)
1439          !call norm_ind(nd1,nd2,nd3,i1,i2,i3,indA)
1440          karray(i1,i2,i3)=rk
1441        end do
1442      end do
1443    end do
1444
1445    SAFE_DEALLOCATE_A(cos_array)
1446    SAFE_DEALLOCATE_A(sin_array)
1447
1448    POP_SUB(kernel_recon)
1449  end subroutine kernel_recon
1450  !!***
1451
1452  !!****h* BigDFT/par_calculate_dimensions
1453  !! NAME
1454  !!   par_calculate_dimensions
1455  !!
1456  !! FUNCTION
1457  !!    Calculate four sets of dimension needed for the calculation of the
1458  !!    zero-padded convolution
1459  !!
1460  !! SYNOPSIS
1461  !!    n01,n02,n03 original real dimensions (input)
1462  !!
1463  !!    m1,m2,m3 original real dimension with the dimension 2 and 3 exchanged
1464  !!
1465  !!    n1,n2 the first FFT even dimensions greater that 2*m1, 2*m2
1466  !!    n3    the double of the first FFT even dimension greater than m3
1467  !!          (improved for the HalFFT procedure)
1468  !!
1469  !!    md1,md2,md3 half of n1,n2,n3 dimension. They contain the real unpadded space,
1470  !!                properly enlarged to be compatible with the FFT dimensions n_i.
1471  !!                md2 is further enlarged to be a multiple of nproc
1472  !!
1473  !!    nd1,nd2,nd3 fourier dimensions for which the kernel FFT is injective,
1474  !!                formally 1/8 of the fourier grid. Here the dimension nd3 is
1475  !!                enlarged to be a multiple of nproc
1476  !!
1477  !! WARNING
1478  !!    The dimension m2 and m3 correspond to n03 and n02 respectively
1479  !!    this is needed since the convolution routine manage arrays of dimension
1480  !!    (md1,md3,md2/nproc)
1481  !!
1482  !! AUTHOR
1483  !!    Luigi Genovese
1484  !! CREATION DATE
1485  !!    February 2006
1486  !!
1487  !! SOURCE
1488  !!
1489  subroutine par_calculate_dimensions(n01,n02,n03,m1,m2,m3,n1,n2,n3, md1,md2,md3,nd1,nd2,nd3,nproc)
1490    integer, intent(in) :: n01,n02,n03,nproc
1491    integer, intent(out) :: m1,m2,m3,n1,n2,n3,md1,md2,md3,nd1,nd2,nd3
1492
1493    integer :: l1,l2,l3
1494
1495    PUSH_SUB(par_calculate_dimensions)
1496
1497    !dimensions of the density in the real space, inverted for convenience
1498
1499    m1=n01
1500    m2=n03
1501    m3=n02
1502
1503    ! real space grid dimension (suitable for number of processors)
1504
1505    !     n1=2*m1; n2=2*m2; n3=2*m3
1506
1507    l1=2*m1
1508    l2=2*m2
1509    l3=m3 !beware of the half dimension
1510    do
1511      !this is for the FFT of the kernel
1512      !one can erase it when the kernel is parallelized
1513      call fourier_dim(l1,n1)
1514      !call fourier_dim(n1/2,l1A)
1515      if (modulo(n1,2) == 0&! .and. 2*l1A == n1
1516        ) then
1517        exit
1518      end if
1519      l1=l1+1
1520    end do
1521    do
1522      call fourier_dim(l2,n2)
1523      if (modulo(n2,2) == 0) then
1524        exit
1525      end if
1526      l2=l2+1
1527    end do
1528    do
1529      call fourier_dim(l3,n3)
1530      !call fourier_dim(n3/2,l3A)
1531      if (modulo(n3,2) == 0 &!.and. 2*l3A == n3 .and. modulo(l3A,2) == 0
1532        ) then
1533        exit
1534      end if
1535      l3=l3+1
1536    end do
1537    n3=2*n3
1538
1539    !dimensions that contain the unpadded real space,
1540    ! compatible with the number of processes
1541    md1=n1/2
1542    md2=n2/2
1543    md3=n3/2
1544151 if (nproc*(md2/nproc) < n2/2) then
1545      md2=md2+1
1546      goto 151
1547    end if
1548
1549
1550    !dimensions of the kernel, 1/8 of the total volume,
1551    !compatible with nproc
1552
1553    nd1=n1/2+1;  nd2=n2/2+1
1554    nd3=n3/2+1
1555250 if (modulo(nd3,nproc) /= 0) then
1556      nd3=nd3+1
1557      goto 250
1558    end if
1559
1560    POP_SUB(par_calculate_dimensions)
1561  end subroutine par_calculate_dimensions
1562  !!***
1563
1564
1565  !!****h* BigDFT/par_psolver_kernel
1566  !! NAME
1567  !!   par_psolver_kernel
1568  !!
1569  !! FUNCTION
1570  !!    Solver of Poisson equation applying a kernel, parallel computation
1571  !!
1572  !! SYNOPSIS
1573  !!    Poisson solver applying a kernel and
1574  !!    using Fourier transform for the convolution.
1575  !!    rhopot : input  -> the density
1576  !!             output -> the Hartree potential + pot_ion
1577  !!    All the processes manage the same global rhopot array
1578  !!    The potential pot_ion is ADDED in the array rhopot.
1579  !!    Calculate also the Hartree potential
1580  !!
1581  !!    Replaces the charge density contained in rhopot
1582  !!    by the Hartree stored as well in rhopot.
1583  !!    If xc_on is true, it also adds the XC potential and
1584  !!    ionic potential pot_ion
1585  !!
1586  !!    kernelLOC: the kernel in fourier space, calculated from ParBuil_Kernel routine
1587  !!               it is a local vector (each process have its own part)
1588  !!
1589  !!    comm: MPI communicator to use
1590  !!
1591  !!    We double the size of the mesh except in one dimension
1592  !!    in order to use the property of the density to be real.
1593  !! WARNING
1594  !!
1595  !! AUTHOR
1596  !!    Luigi Genovese
1597  !! CREATION DATE
1598  !!    February 2006
1599  !!
1600  !! SOURCE
1601  !!
1602  subroutine par_psolver_kernel(n01, n02, n03, nd1, nd2, nd3, hgrid, kernelLOC, rhopot, iproc, nproc, comm)
1603    integer, intent(in)  :: n01,n02,n03,iproc,nproc
1604    integer, intent(inout) :: nd1,nd2,nd3
1605    real(8), intent(in) :: hgrid
1606    real(8), intent(in), dimension(nd1,nd2,nd3/nproc) :: kernelLOC
1607    real(8), intent(inout), dimension(n01,n02,n03) :: rhopot
1608    integer, intent(in) :: comm
1609
1610    integer :: m1,m2,m3,n1,n2,n3,md1,md2,md3
1611
1612    PUSH_SUB(par_psolver_kernel)
1613
1614    call par_calculate_dimensions(n01, n02, n03, m1, m2, m3, n1, n2, n3, md1, md2, md3, nd1, nd2, nd3, nproc)
1615    call pconvxc_off(m1, m2, m3, n1, n2, n3, nd1, nd2, nd3, md1, md2, md3, iproc, nproc, rhopot, kernelLOC, hgrid, comm)
1616
1617    POP_SUB(par_psolver_kernel)
1618  end subroutine par_psolver_kernel
1619  !!***
1620
1621
1622  !!****h* BigDFT/pconvxc_off
1623  !! NAME
1624  !!   pconvxc_off
1625  !!
1626  !! FUNCTION
1627  !!    Calculate the parallel convolution with the kernel
1628  !!    without the exchange-correlation part
1629  !!
1630  !! SYNOPSIS
1631  !!    Poisson solver applying a kernel and
1632  !!    using Fourier transform for the convolution.
1633  !!    rhopot : input  -> the density
1634  !!             output -> the Hartree potential + pot_ion
1635  !!    All the processes manage the same global rhopot array
1636  !!    The potential pot_ion is ADDED in the array rhopot.
1637  !!    Calculate also the Hartree potential
1638  !!
1639  !!    Replaces the charge density contained in rhopot
1640  !!    by the Hartree stored as well in rhopot.
1641  !!
1642  !!    kernelLOC: the kernel in fourier space, calculated from ParBuild_Kernel routine
1643  !!               it is a local vector (each process have its own part)
1644  !!
1645  !!    comm: MPI communicator to use
1646  !!
1647  !!    We double the size of the mesh except in one dimension
1648  !!    in order to use the property of the density to be real.
1649  !! WARNING
1650  !!
1651  !! AUTHOR
1652  !!    Luigi Genovese
1653  !! CREATION DATE
1654  !!    February 2006
1655  !!
1656  !! SOURCE
1657  !!
1658  subroutine pconvxc_off(m1, m2, m3, n1, n2, n3, nd1, nd2, nd3, md1, md2, md3, iproc, nproc, rhopot, kernelloc, hgrid, comm)
1659    integer, intent(in) :: m1,m2,m3,n1,n2,n3,nd1,nd2,nd3,md1,md2,md3,iproc,nproc
1660    real(8), dimension(nd1,nd2,nd3/nproc), intent(in) :: kernelloc
1661    real(8), dimension(m1,m3,m2), intent(inout) :: rhopot
1662    real(8), intent(in) :: hgrid
1663    integer, intent(in) :: comm
1664
1665#if defined(HAVE_MPI)
1666    !Local variables
1667    integer :: istart,iend,jend,jproc
1668    real(8) :: scal
1669    real(8), dimension(:,:,:), allocatable :: zf, lrhopot(:, :, :)
1670    integer, dimension(:,:), allocatable :: gather_arr
1671    type(profile_t), save :: prof
1672
1673    PUSH_SUB(pconvxc_off)
1674
1675    !factor to be used to keep unitarity
1676    scal=hgrid**3/real(n1*n2*n3,8)
1677
1678    SAFE_ALLOCATE(zf(1:md1, 1:md3, 1:md2/nproc))
1679    SAFE_ALLOCATE(gather_arr(0:nproc-1, 1:2))
1680
1681    !Here we insert the process-related values of the density, starting from the total density
1682    call enterdensity(rhopot(1,1,1), m1, m2, m3, md1, md2, md3, iproc, nproc, zf(1,1,1))
1683
1684    !this routine builds the values for each process of the potential (zf), multiplying by the factor
1685    call convolxc_off(n1, n2, n3, nd1, nd2, nd3, md1, md2, md3, nproc, iproc, kernelloc, zf, scal, comm)
1686
1687    !building the array of the data to be sent from each process
1688    !and the array of the displacement
1689    do jproc = 0, nproc - 1
1690      istart = min(jproc*(md2/nproc),m2-1)
1691      jend = max(min(md2/nproc, m2 - md2/nproc*jproc), 0)
1692      gather_arr(jproc, 1) = m1*m3*jend
1693      gather_arr(jproc, 2) = istart*m1*m3
1694    end do
1695
1696    !assign the distributed density to the rhopot array
1697    istart=min(iproc*(md2/nproc),m2-1)
1698    jend=max(min(md2/nproc,m2-md2/nproc*iproc),0)
1699    iend=istart+jend
1700
1701    if(jend == 0) jend = 1
1702
1703    SAFE_ALLOCATE(lrhopot(1:m1, 1:m3, 1:jend))
1704
1705    lrhopot(1:m1, 1:m3, 1:jend) = zf(1:m1, 1:m3, 1:jend)
1706
1707    call profiling_in(prof, "ISF_GATHER")
1708    call MPI_Allgatherv(lrhopot(1, 1, 1), gather_arr(iproc, 1), MPI_DOUBLE_PRECISION, rhopot(1, 1, 1), gather_arr(0, 1),&
1709      gather_arr(0, 2), MPI_DOUBLE_PRECISION, comm, mpi_err)
1710    call profiling_out(prof)
1711
1712    SAFE_DEALLOCATE_A(zf)
1713    SAFE_DEALLOCATE_A(lrhopot)
1714    SAFE_DEALLOCATE_A(gather_arr)
1715
1716    POP_SUB(pconvxc_off)
1717#endif
1718  end subroutine pconvxc_off
1719!!***
1720
1721
1722  !!****h* BigDFT/enterdensity
1723  !! NAME
1724  !!   enterdensity
1725  !!
1726  !! FUNCTION
1727  !!
1728  !!   Define a real space process-dependent vector zf with the global dimensions that are half of the FFT grid
1729  !!   in order to perform convolution. The dimension md2 is a multiple of nproc
1730  !!   Can be used also to define the local part of pot_ion
1731  !!
1732  !! AUTHOR
1733  !!    L. Genovese
1734  !! CREATION DATE
1735  !!    February 2006
1736  !!
1737  !! SOURCE
1738  !!
1739  subroutine enterdensity(rhopot,m1,m2,m3,md1,md2,md3,iproc,nproc,zf)
1740    integer, intent(in) :: m1,m2,m3,md1,md2,md3,iproc,nproc
1741    real(8), dimension(0:md1-1,0:md3-1,0:md2/nproc-1), intent(out) :: zf
1742    real(8), dimension(0:m1-1,0:m3-1,0:m2-1), intent(in) :: rhopot
1743
1744    integer :: j1,j2,j3,jp2
1745
1746    PUSH_SUB(enterdensity)
1747
1748    !Body
1749    do jp2=0,md2/nproc-1
1750      j2=iproc*(md2/nproc)+jp2
1751      if (j2 <= m2-1) then
1752        do j3=0,m3-1
1753          do j1=0,m1-1
1754            zf(j1,j3,jp2)=rhopot(j1,j3,j2)
1755          end do
1756          do j1=m1,md1-1
1757            zf(j1,j3,jp2)=0.0_8
1758          end do
1759        end do
1760        do j3=m3,md3-1
1761          do j1=0,md1-1
1762            zf(j1,j3,jp2)=0.0_8
1763          end do
1764        end do
1765      else
1766        do j3=0,md3-1
1767          do j1=0,md1-1
1768            zf(j1,j3,jp2)=0.0_8
1769          end do
1770        end do
1771      end if
1772    end do
1773
1774    POP_SUB(enterdensity)
1775  end subroutine enterdensity
1776  !!***
1777
1778
1779  !!****h* BigDFT/par_build_kernel
1780  !! NAME
1781  !!   par_build_kernel
1782  !!
1783  !! FUNCTION
1784  !!    Build the kernel of a gaussian function
1785  !!    for interpolating scaling functions.
1786  !!    Do the parallel HalFFT of the symmetrized function and stores into
1787  !!    memory only 1/8 of the grid divided by the number of processes nproc
1788  !!
1789  !! SYNOPSIS
1790  !!    Build the kernel (karray) of a gaussian function
1791  !!    for interpolating scaling functions
1792  !!    $$ K(j) = \sum_k \omega_k \int \int \phi(x) g_k(y-x) \delta(y-j) dx dy $$
1793  !!
1794  !!    n01,n02,n03        Mesh dimensions of the density
1795  !!    nfft1,nfft2,nfft3  Dimensions of the FFT grid (HalFFT in the third direction)
1796  !!    n1k,n2k,n3k        Dimensions of the kernel FFT
1797  !!    hgrid              Mesh step
1798  !!    itype_scf          Order of the scaling function (8,14,16)
1799  !!    comm               MPI communicator to use
1800  !!
1801  !! AUTHORS
1802  !!    T. Deutsch, L. Genovese
1803  !! CREATION DATE
1804  !!    February 2006
1805  !!
1806  !! SOURCE
1807  !!
1808  subroutine par_build_kernel(n01,n02,n03,nfft1,nfft2,nfft3,n1k,n2k,n3k,hgrid,itype_scf, &
1809    iproc,nproc,comm,karrayoutLOC)
1810    integer, intent(in)    :: n01,n02,n03,nfft1,nfft2,nfft3,n1k,n2k,n3k,itype_scf,iproc,nproc
1811    real(8), intent(in)    :: hgrid
1812    real(8), intent(out)   :: karrayoutLOC(1:n1k, 1:n2k, 1:n3k/nproc)
1813    integer, intent(in)    :: comm
1814
1815    !Do not touch !!!!
1816    integer, parameter :: N_GAUSS = 89
1817    !Better if higher (1024 points are enough 10^{-14}: 2*itype_scf*n_points)
1818    integer, parameter :: N_POINTS = 2**6
1819
1820    !Better p_gauss for calculation
1821    !(the support of the exponential should be inside [-n_range/2,n_range/2])
1822    real(8), parameter :: p0_ref = 1.d0
1823    real(8) :: p_gauss(N_GAUSS), w_gauss(N_GAUSS)
1824
1825    real(8), dimension(:), allocatable :: kernel_scf,kern_1_scf
1826    real(8), dimension(:), allocatable :: x_scf ,y_scf
1827    real(8), dimension(:,:,:,:), allocatable :: karrayfour
1828    real(8), dimension(:,:,:), allocatable :: karray
1829
1830    real(8) :: ur_gauss,dr_gauss,acc_gauss,pgauss,kern,a_range
1831    real(8) :: factor,factor2,dx,absci,p0gauss,p0_cell
1832    real(8) :: a1,a2,a3
1833    integer :: n_scf,nker1,nker2,nker3
1834    integer :: i_gauss,n_range,n_cell,istart,iend,istart1,istart2,iend1,iend2
1835    integer :: i,n_iter,i1,i2,i3,i_kern
1836    integer :: i01,i02,i03,n1h,n2h,n3h
1837
1838    PUSH_SUB(par_build_kernel)
1839
1840    !Number of integration points : 2*itype_scf*n_points
1841    n_scf=2*itype_scf*n_points
1842    !Set karray
1843    !dimension test
1844
1845    !here we must set the dimensions for the fft part, starting from the nfft
1846    !remember that actually nfft2 is associated with n03 and viceversa
1847
1848    !dimensions that define the center of symmetry
1849    n1h=nfft1/2
1850    n2h=nfft2/2
1851    n3h=nfft3/2
1852
1853    !Auxiliary dimensions only for building the FFT part
1854    nker1=nfft1
1855    nker2=nfft2
1856    nker3=nfft3/2+1
1857
1858    !adjusting the last two dimensions to be multiples of nproc
1859    do
1860      if(modulo(nker2,nproc) == 0) exit
1861      nker2=nker2+1
1862    end do
1863    do
1864      if(modulo(nker3,nproc) == 0) exit
1865      nker3=nker3+1
1866    end do
1867
1868    !this will be the array of the kernel in the real space
1869    SAFE_ALLOCATE(karray(1:nker1,1:nfft3,1:nker2/nproc))
1870
1871    !defining proper extremes for the calculation of the
1872    !local part of the kernel
1873
1874    istart=iproc*nker2/nproc+1
1875    iend=min((iproc+1)*nker2/nproc,n2h+n03)
1876
1877    istart1=istart
1878    if(iproc  ==  0) istart1=n2h-n03+2
1879
1880    iend2=iend
1881
1882    iend1=n2h
1883    istart2=n2h+1
1884    if(istart > n2h) then
1885      iend1=istart1-1
1886      istart2=istart
1887    end if
1888    if(iend  <=  n2h) then
1889      istart2=iend2+1
1890      iend1=iend
1891    end if
1892
1893!!!!!START KERNEL CONSTRUCTION
1894    !  if(iproc  ==  0) then
1895    !     write(unit=*,fmt="(1x,a,i0,a)") &
1896    !          "Build the kernel in parallel using a sum of ",N_GAUSS," gaussians"
1897    !     write(unit=*,fmt="(1x,a,i0,a)") &
1898    !          "Use interpolating scaling functions of ",itype_scf," order"
1899    !  end if
1900
1901    SAFE_ALLOCATE(x_scf(0:n_scf))
1902    SAFE_ALLOCATE(y_scf(0:n_scf))
1903
1904    !Build the scaling function
1905    call scaling_function(itype_scf, n_scf, n_range, x_scf, y_scf)
1906    !Step grid for the integration
1907    dx = real(n_range,8)/real(n_scf,8)
1908    !Extend the range (no more calculations because fill in by 0.0_8)
1909    n_cell = max(n01,n02,n03)
1910    n_range = max(n_cell,n_range)
1911
1912    !Allocations
1913    SAFE_ALLOCATE(kernel_scf(-n_range:n_range))
1914    SAFE_ALLOCATE(kern_1_scf(-n_range:n_range))
1915
1916    !Lengthes of the box (use FFT dimension)
1917    a1 = hgrid * real(n01,8)
1918    a2 = hgrid * real(n02,8)
1919    a3 = hgrid * real(n03,8)
1920
1921    x_scf(:) = hgrid * x_scf(:)
1922    y_scf(:) = 1.d0/hgrid * y_scf(:)
1923    dx = hgrid * dx
1924    !To have a correct integration
1925    p0_cell = p0_ref/(hgrid*hgrid)
1926
1927    !Initialization of the gaussian (Beylkin)
1928    call gequad(N_GAUSS,p_gauss,w_gauss,ur_gauss,dr_gauss,acc_gauss)
1929    !In order to have a range from a_range=sqrt(a1*a1+a2*a2+a3*a3)
1930    !(biggest length in the cube)
1931    !We divide the p_gauss by a_range**2 and a_gauss by a_range
1932    a_range = sqrt(a1*a1+a2*a2+a3*a3)
1933    factor = 1.d0/a_range
1934    !factor2 = factor*factor
1935    factor2 = 1.d0/(a1*a1+a2*a2+a3*a3)
1936    do i_gauss=1,N_GAUSS
1937      p_gauss(i_gauss) = factor2*p_gauss(i_gauss)
1938    end do
1939    do i_gauss=1,N_GAUSS
1940      w_gauss(i_gauss) = factor*w_gauss(i_gauss)
1941    end do
1942
1943    karray(:,:,:) = 0.0_8
1944    !Use in this order (better for accuracy).
1945    loop_gauss: do i_gauss = N_GAUSS, 1, -1
1946      !Gaussian
1947      pgauss = p_gauss(i_gauss)
1948
1949      !We calculate the number of iterations to go from pgauss to p0_ref
1950      n_iter = nint((log(pgauss) - log(p0_cell))/log(4.d0))
1951      if (n_iter <= 0)then
1952        n_iter = 0
1953        p0gauss = pgauss
1954      else
1955        p0gauss = pgauss/4.d0**n_iter
1956      end if
1957
1958      !Stupid integration
1959      !Do the integration with the exponential centered in i_kern
1960      kernel_scf(:) = 0.0_8
1961      do i_kern=0,n_range
1962        kern = 0.0_8
1963        do i=0,n_scf
1964          absci = x_scf(i) - real(i_kern,8)*hgrid
1965          absci = absci*absci
1966          kern = kern + y_scf(i)*exp(-p0gauss*absci)*dx
1967        end do
1968        kernel_scf(i_kern) = kern
1969        kernel_scf(-i_kern) = kern
1970        if (abs(kern) < 1.d-18) then
1971          !Too small not useful to calculate
1972          exit
1973        end if
1974      end do
1975
1976      !Start the iteration to go from p0gauss to pgauss
1977      call scf_recursion(itype_scf,n_iter,n_range,kernel_scf,kern_1_scf)
1978
1979      !Add to the kernel (only the local part)
1980
1981      do i3=istart1,iend1
1982        i03 =  n2h - i3 + 1
1983        do i2 = 1,n02
1984          i02 = i2-1
1985          do i1 = 1,n01
1986            i01 = i1-1
1987            karray(i1+n1h,i2+n3h,i3-istart+1) = karray(i1+n1h,i2+n3h,i3-istart+1) + w_gauss(i_gauss)* &
1988              kernel_scf(i01)*kernel_scf(i02)*kernel_scf(i03)
1989          end do
1990        end do
1991      end do
1992      do i3=istart2,iend2
1993        i03 = i3 - n2h -1
1994        do i2 = 1,n02
1995          i02 = i2-1
1996          do i1 = 1,n01
1997            i01 = i1-1
1998            karray(i1+n1h,i2+n3h,i3-istart+1) = karray(i1+n1h,i2+n3h,i3-istart+1) + w_gauss(i_gauss)* &
1999              kernel_scf(i01)*kernel_scf(i02)*kernel_scf(i03)
2000          end do
2001        end do
2002      end do
2003
2004
2005    end do loop_gauss
2006
2007    !Build the kernel in the real space as an even function, thus having a real FFT
2008
2009    do i3=istart1,iend2
2010      do i2 = 1,n02
2011        do i1=2,n01
2012          karray(n1h+2-i1,i2+n3h,i3-istart+1) = karray(i1+n1h,i2+n3h,i3-istart+1)
2013        end do
2014      end do
2015      do i2=2,n02
2016        do i1 = 1,nker1
2017          karray(i1,n3h+2-i2,i3-istart+1) = karray(i1,i2+n3h,i3-istart+1)
2018        end do
2019      end do
2020    end do
2021
2022
2023    !De-allocations
2024    SAFE_DEALLOCATE_A(kernel_scf)
2025    SAFE_DEALLOCATE_A(kern_1_scf)
2026    SAFE_DEALLOCATE_A(x_scf)
2027    SAFE_DEALLOCATE_A(y_scf)
2028
2029!!!!END KERNEL CONSTRUCTION
2030
2031    SAFE_ALLOCATE(karrayfour(1:2, 1:nker1, 1:nker2, 1:nker3/nproc))
2032
2033    ! if(iproc  ==  0) print *,"Do a 3D PHalFFT for the kernel"
2034
2035    call kernelfft(nfft1,nfft2,nfft3,nker1,nker2,nker3,nproc,iproc,karray,karrayfour,comm)
2036
2037    !Reconstruct the real kernel FFT
2038    do i3 = 1,n3k/nproc
2039      do i2 = 1,n2k
2040        do i1 = 1,n1k
2041          karrayoutLOC(i1,i2,i3)=karrayfour(1,i1,i2,i3)
2042        end do
2043      end do
2044    end do
2045
2046    !De-allocations
2047    SAFE_DEALLOCATE_A(karray)
2048    SAFE_DEALLOCATE_A(karrayfour)
2049    POP_SUB(par_build_kernel)
2050  end subroutine par_build_kernel
2051  !!***
2052
2053  ! -------------------------------------------------------------------------
2054
2055  subroutine gequad(n_gauss, p_gauss, w_gauss, ur_gauss, dr_gauss, acc_gauss)
2056    integer, intent(in)    :: n_gauss
2057    real(8), intent(out)   :: p_gauss(:)
2058    real(8), intent(out)   :: w_gauss(:)
2059    real(8), intent(out)   :: ur_gauss
2060    real(8), intent(out)   :: dr_gauss
2061    real(8), intent(out)   :: acc_gauss
2062
2063    integer :: iunit, i, idx
2064
2065    PUSH_SUB(gequad)
2066
2067    ur_gauss = 1.0_8
2068    dr_gauss = 1.0e-08_8
2069    acc_gauss = 1.0e-08_8
2070
2071    iunit = io_open(trim(conf%share)//'/gequad.data', action = 'read', status = 'old', die = .true.)
2072
2073    do i = 1, n_gauss
2074      read(iunit, *) idx, p_gauss(i), w_gauss(i)
2075    end do
2076
2077    call io_close(iunit)
2078
2079    POP_SUB(gequad)
2080  end subroutine gequad
2081
2082end module poisson_isf_oct_m
2083
2084!! Local Variables:
2085!! mode: f90
2086!! coding: utf-8
2087!! End:
2088