1!! Copyright (C) 2002-2011 M. Marques, A. Castro, X. Andrade, J. Alberdi-Rodriguez, M. Oliveira 2!! Copyright (C) Luigi Genovese, Thierry Deutsch, CEA Grenoble, 2006 3!! 4!! This program is free software; you can redistribute it and/or modify 5!! it under the terms of the GNU General Public License as published by 6!! the Free Software Foundation; either version 2, or (at your option) 7!! any later version. 8!! 9!! This program is distributed in the hope that it will be useful, 10!! but WITHOUT ANY WARRANTY; without even the implied warranty of 11!! MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 12!! GNU General Public License for more details. 13!! 14!! You should have received a copy of the GNU General Public License 15!! along with this program; if not, write to the Free Software 16!! Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 17!! 02110-1301, USA. 18!! 19 20#include "global.h" 21 22module poisson_isf_oct_m 23 use cube_function_oct_m 24 use cube_oct_m 25 use global_oct_m 26 use io_oct_m 27 use messages_oct_m 28 use mesh_oct_m 29 use mpi_oct_m 30 use namespace_oct_m 31 use parser_oct_m 32 use profiling_oct_m 33 use scaling_function_oct_m 34 use sgfft_oct_m 35 use submesh_oct_m 36 37 implicit none 38 39 private 40 41 public :: & 42 poisson_isf_t, & 43 isf_cnf_t, & 44 poisson_isf_init, & 45 poisson_isf_solve, & 46 poisson_isf_end 47 48 ! Indices for the cnf array 49 integer, parameter :: SERIAL = 1 50 integer, parameter :: WORLD = 2 51 integer, parameter :: DOMAIN = 3 52 integer, parameter :: N_CNF = 3 53 54 ! Datatype to store kernel values to solve Poisson equation 55 ! on different communicators (configurations). 56 type isf_cnf_t 57 private 58 real(8), pointer :: kernel(:, :, :) 59 integer :: nfft1, nfft2, nfft3 60 type(mpi_grp_t) :: mpi_grp 61 logical :: all_nodes 62 end type isf_cnf_t 63 64 type poisson_isf_t 65 private 66 integer :: all_nodes_comm 67 type(isf_cnf_t) :: cnf(1:N_CNF) 68 end type poisson_isf_t 69 70 integer, parameter :: order_scaling_function = 8 71 72 73contains 74 75 ! --------------------------------------------------------- 76 subroutine poisson_isf_init(this, namespace, mesh, cube, all_nodes_comm, init_world) 77 type(poisson_isf_t), intent(out) :: this 78 type(namespace_t), target, intent(in) :: namespace 79 type(mesh_t), intent(in) :: mesh 80 type(cube_t), intent(inout) :: cube 81 integer, intent(in) :: all_nodes_comm 82 logical, optional, intent(in) :: init_world 83 84 integer :: n1, n2, n3 85 integer :: i_cnf 86#if defined(HAVE_MPI) 87 integer :: m1, m2, m3, md1, md2, md3 88 integer :: n(3) 89 logical :: init_world_ 90 integer :: default_nodes 91 integer :: ierr, world_grp, poisson_grp, ii 92 integer, allocatable :: ranks(:) 93 !data ranks /0, 1/ 94 integer :: world_size 95 integer :: nodes 96#endif 97 98 PUSH_SUB(poisson_isf_init) 99 100#ifdef HAVE_MPI 101 init_world_ = .true. 102 if(present(init_world)) init_world_ = init_world 103#endif 104 105 ! we need to nullify the pointer so they can be deallocated safely 106 ! afterwards 107 do i_cnf = 1, N_CNF 108 nullify(this%cnf(i_cnf)%kernel) 109 end do 110 111 if(.not. mesh%parallel_in_domains) then 112 ! The serial version is always needed (as used, e.g., in the casida runmode) 113 call calculate_dimensions(cube%rs_n_global(1), cube%rs_n_global(2), cube%rs_n_global(3), & 114 this%cnf(SERIAL)%nfft1, this%cnf(SERIAL)%nfft2, this%cnf(SERIAL)%nfft3) 115 116 n1 = this%cnf(SERIAL)%nfft1/2 + 1 117 n2 = this%cnf(SERIAL)%nfft2/2 + 1 118 n3 = this%cnf(SERIAL)%nfft3/2 + 1 119 120 SAFE_ALLOCATE(this%cnf(SERIAL)%kernel(1:n1, 1:n2, 1:n3)) 121 122 call build_kernel(cube%rs_n_global(1), cube%rs_n_global(2), cube%rs_n_global(3), & 123 this%cnf(SERIAL)%nfft1, this%cnf(SERIAL)%nfft2, this%cnf(SERIAL)%nfft3, & 124 real(mesh%spacing(1), 8), order_scaling_function, this%cnf(SERIAL)%kernel) 125 end if 126#if defined(HAVE_MPI) 127 128 ! Allocate to configurations. The initialisation, especially the kernel, 129 ! depends on the number of nodes used for the calculations. To avoid 130 ! recalculating the kernel on each call of poisson_isf_solve depending on 131 ! the all_nodes argument, both kernels are calculated. 132 this%cnf(DOMAIN)%mpi_grp = mesh%mpi_grp 133 134 ! For the world configuration we build a new communicator 135 136 default_nodes = 0 !All nodes 137 138 !%Variable PoissonSolverNodes 139 !%Type integer 140 !%Section Hamiltonian::Poisson 141 !%Default 0 142 !%Description 143 !% How many nodes to use to solve the Poisson equation. A value of 144 !% 0, the default, implies that all available nodes are used. 145 !%End 146 call parse_variable(namespace, 'PoissonSolverNodes', default_nodes, nodes) 147 148 this%all_nodes_comm = all_nodes_comm 149 150 call MPI_Comm_size(all_nodes_comm, world_size, mpi_err) 151 152 if(nodes <= 0 .or. nodes > world_size) nodes = world_size 153 this%cnf(WORLD)%all_nodes = (nodes == mpi_world%size) 154 155 SAFE_ALLOCATE(ranks(1:nodes)) 156 157 do ii = 1, nodes 158 ranks(ii) = ii - 1 159 end do 160 161 !create a new communicator 162 !Extract the original group handle and create new comm. 163 call MPI_Comm_group(all_nodes_comm, world_grp, ierr) 164 call MPI_Group_incl(world_grp, nodes, ranks, poisson_grp, ierr) 165 call MPI_Comm_create(mpi_world%comm, poisson_grp, this%cnf(WORLD)%mpi_grp%comm, ierr) 166 167 SAFE_DEALLOCATE_A(ranks) 168 169 !Fill the new data structure, for all nodes 170 if (this%cnf(WORLD)%mpi_grp%comm /= MPI_COMM_NULL) then 171 call MPI_Comm_rank(this%cnf(WORLD)%mpi_grp%comm, this%cnf(WORLD)%mpi_grp%rank, ierr) 172 call MPI_Comm_size(this%cnf(WORLD)%mpi_grp%comm, this%cnf(WORLD)%mpi_grp%size, ierr) 173 else 174 this%cnf(WORLD)%mpi_grp%rank = -1 175 this%cnf(WORLD)%mpi_grp%size = -1 176 end if 177 178 ! Build the kernel for all configurations. At the moment, this is 179 ! solving the poisson equation with all nodes (i_cnf == WORLD) and 180 ! with the domain nodes only (i_cnf == DOMAIN). 181 do i_cnf = 2, N_CNF 182 if( (i_cnf == WORLD .and. .not. init_world_) & ! world is disabled 183 .or. (i_cnf == DOMAIN .and. .not. mesh%parallel_in_domains) & ! not parallel in domains 184 ) then 185 nullify(this%cnf(i_cnf)%kernel) 186 cycle 187 end if 188 if (this%cnf(i_cnf)%mpi_grp%rank /= -1 .or. i_cnf /= WORLD ) then 189 call par_calculate_dimensions(cube%rs_n_global(1), cube%rs_n_global(2), cube%rs_n_global(3), & 190 m1, m2, m3, n1, n2, n3, md1, md2, md3, this%cnf(i_cnf)%nfft1, this%cnf(i_cnf)%nfft2, & 191 this%cnf(i_cnf)%nfft3, this%cnf(i_cnf)%mpi_grp%size) 192 193 ! Shortcuts to avoid to "line too long" errors. 194 n(1) = this%cnf(i_cnf)%nfft1 195 n(2) = this%cnf(i_cnf)%nfft2 196 n(3) = this%cnf(i_cnf)%nfft3 197 198 SAFE_ALLOCATE(this%cnf(i_cnf)%kernel(1:n(1), 1:n(2), 1:n(3)/this%cnf(i_cnf)%mpi_grp%size)) 199 200 call par_build_kernel(cube%rs_n_global(1), cube%rs_n_global(2), cube%rs_n_global(3), n1, n2, n3, & 201 this%cnf(i_cnf)%nfft1, this%cnf(i_cnf)%nfft2, this%cnf(i_cnf)%nfft3, & 202 mesh%spacing(1), order_scaling_function, & 203 this%cnf(i_cnf)%mpi_grp%rank, this%cnf(i_cnf)%mpi_grp%size, this%cnf(i_cnf)%mpi_grp%comm, & 204 this%cnf(i_cnf)%kernel) 205 else 206 nullify(this%cnf(i_cnf)%kernel) 207 cycle 208 end if 209 end do 210#endif 211 212 POP_SUB(poisson_isf_init) 213 end subroutine poisson_isf_init 214 215 ! --------------------------------------------------------- 216 subroutine poisson_isf_solve(this, mesh, cube, pot, rho, all_nodes, sm) 217 type(poisson_isf_t), intent(in) :: this 218 type(mesh_t), intent(in) :: mesh 219 type(cube_t), intent(in) :: cube 220 FLOAT, intent(out) :: pot(:) 221 FLOAT, intent(in) :: rho(:) 222 logical, intent(in) :: all_nodes 223 type(submesh_t), optional, intent(in) :: sm !< If present pot and rho are assumed to come from it 224 225 integer :: i_cnf, nn(1:3) 226 type(cube_function_t) :: rho_cf 227 228 PUSH_SUB(poisson_isf_solve) 229 230 call cube_function_null(rho_cf) 231 call dcube_function_alloc_RS(cube, rho_cf) 232 233 if(present(sm)) then 234 call dsubmesh_to_cube(sm, rho, cube, rho_cf) 235 else 236 if(mesh%parallel_in_domains) then 237 call dmesh_to_cube(mesh, rho, cube, rho_cf, local=.true.) 238 else 239 call dmesh_to_cube(mesh, rho, cube, rho_cf) 240 end if 241 end if 242 243 ! Choose configuration. 244 i_cnf = SERIAL 245 246#if defined(HAVE_MPI) 247 if(all_nodes) then 248 i_cnf = WORLD 249 else if(mesh%parallel_in_domains) then 250 i_cnf = DOMAIN 251 end if 252#endif 253 254#if !defined(HAVE_MPI) 255 ASSERT(i_cnf == SERIAL) 256#endif 257 258 if(i_cnf == SERIAL) then 259 260 nn(1) = this%cnf(SERIAL)%nfft1 261 nn(2) = this%cnf(SERIAL)%nfft2 262 nn(3) = this%cnf(SERIAL)%nfft3 263 call psolver_kernel(cube%rs_n_global(1), cube%rs_n_global(2), cube%rs_n_global(3), & 264 nn(1), nn(2), nn(3), & 265 real(mesh%spacing(1), 8), this%cnf(SERIAL)%kernel, rho_cf%dRS) 266 267#if defined(HAVE_MPI) 268 else 269 nn(1) = this%cnf(i_cnf)%nfft1 270 nn(2) = this%cnf(i_cnf)%nfft2 271 nn(3) = this%cnf(i_cnf)%nfft3 272 if (this%cnf(i_cnf)%mpi_grp%size /= -1 .or. i_cnf /= WORLD) then 273 call par_psolver_kernel(cube%rs_n_global(1), cube%rs_n_global(2), cube%rs_n_global(3), & 274 nn(1), nn(2), nn(3), & 275 real(mesh%spacing(1), 8), this%cnf(i_cnf)%kernel, rho_cf%dRS, & 276 this%cnf(i_cnf)%mpi_grp%rank, this%cnf(i_cnf)%mpi_grp%size, this%cnf(i_cnf)%mpi_grp%comm) 277 end if 278 ! we need to be sure that the root of every domain-partition has a copy of the potential 279 ! for the moment we broadcast to all nodes, but this is more than what we really need 280 if(i_cnf == WORLD .and. .not. this%cnf(WORLD)%all_nodes) then 281 call MPI_Bcast(rho_cf%drs(1, 1, 1), cube%rs_n_global(1)*cube%rs_n_global(2)*cube%rs_n_global(3), & 282 MPI_FLOAT, 0, this%all_nodes_comm, mpi_err) 283 end if 284#endif 285 end if 286 287 if(present(sm)) then 288 call dcube_to_submesh(cube, rho_cf, sm, pot) 289 else 290 if(mesh%parallel_in_domains) then 291 call dcube_to_mesh(cube, rho_cf, mesh, pot, local=.true.) 292 else 293 call dcube_to_mesh(cube, rho_cf, mesh, pot) 294 end if 295 end if 296 297 call dcube_function_free_RS(cube, rho_cf) 298 299 POP_SUB(poisson_isf_solve) 300 end subroutine poisson_isf_solve 301 302 ! --------------------------------------------------------- 303 subroutine poisson_isf_end(this) 304 type(poisson_isf_t), intent(inout) :: this 305 306#if defined(HAVE_MPI) 307 integer :: i_cnf 308#endif 309 310 PUSH_SUB(poisson_isf_end) 311 312#if defined(HAVE_MPI) 313 do i_cnf = 1, N_CNF 314 SAFE_DEALLOCATE_P(this%cnf(i_cnf)%kernel) 315 end do 316 if(this%cnf(WORLD)%mpi_grp%comm /= MPI_COMM_NULL) then 317 call MPI_Comm_free(this%cnf(WORLD)%mpi_grp%comm, mpi_err) 318 end if 319#else 320 SAFE_DEALLOCATE_P(this%cnf(SERIAL)%kernel) 321#endif 322 323 POP_SUB(poisson_isf_end) 324 end subroutine poisson_isf_end 325 326 ! -------------------------------------------------------------- 327 328 !!****h* BigDFT/psolver_kernel 329 !! NAME 330 !! psolver_kernel 331 !! 332 !! FUNCTION 333 !! Solver of Poisson equation applying a kernel 334 !! 335 !! SYNOPSIS 336 !! Poisson solver applying a kernel and 337 !! using Fourier transform for the convolution. 338 !! rhopot : input -> the density 339 !! output -> the Hartree potential + pot_ion 340 !! The potential pot_ion is ADDED in the array rhopot. 341 !! Calculate also the Hartree potential 342 !! 343 !! Replaces the charge density contained in rhopot 344 !! by the Hartree stored as well in rhopot. 345 !! If xc_on is true, it also adds the XC potential and 346 !! ionic potential pot_ion 347 !! 348 !! We double the size of the mesh except in one dimension 349 !! in order to use the property of the density to be real. 350 !! WARNING 351 !! For the use of FFT routine 352 !! inzee=1: first part of Z is data (output) array, 353 !! second part work array 354 !! inzee=2: first part of Z is work array, second part data array 355 !! real(F(i1,i2,i3))=Z(1,i1,i2,i3,inzee) 356 !! imag(F(i1,i2,i3))=Z(2,i1,i2,i3,inzee) 357 !! inzee on output is in general different from inzee on input 358 !! 359 !! AUTHOR 360 !! Thierry Deutsch, Luigi Genovese 361 !! COPYRIGHT 362 !! Copyright (C) 2005 CEA 363 !! CREATION DATE 364 !! 13/07/2005 365 !! 366 !! MODIFICATION HISTORY 367 !! 12/2005 Kernel stored into memory 368 !! 12/2005 Real Kernel FFT and use less memory 369 !! 370 !! SOURCE 371 !! 372 subroutine psolver_kernel(n01, n02, n03, nfft1, nfft2, nfft3, hgrid, karray, rhopot) 373 integer, intent(in) :: n01 374 integer, intent(in) :: n02 375 integer, intent(in) :: n03 376 integer, intent(in) :: nfft1 377 integer, intent(in) :: nfft2 378 integer, intent(in) :: nfft3 379 real(8), intent(in) :: hgrid 380 real(8), intent(in) :: karray(nfft1/2 + 1,nfft2/2 + 1, nfft3/2 + 1) 381 real(8), intent(inout) :: rhopot(n01, n02, n03) 382 383 real(8), allocatable :: zarray(:,:,:) 384 real(8) :: factor 385 integer :: n1, n2, n3, nd1, nd2, nd3, n1h, nd1h 386 integer :: inzee, i_sign 387 388 PUSH_SUB(psolver_kernel) 389 390 !Dimension of the FFT 391 call calculate_dimensions(n01, n02, n03, n1, n2, n3) 392 393 !Half size of nd1 394 n1h=n1/2 395 nd1 = n1 + modulo(n1+1,2) 396 nd2 = n2 + modulo(n2+1,2) 397 nd3 = n3 + modulo(n3+1,2) 398 nd1h=(nd1+1)/2 399 400 SAFE_ALLOCATE(zarray(1:2, 1:nd1h*nd2*nd3, 1:2)) 401 402 !Set zarray 403 call zarray_in(n01,n02,n03,nd1h,nd2,nd3,rhopot,zarray) 404 405 !FFT 406 !print *,"Do a 3D HalFFT for the density" 407 i_sign=1 408 inzee=1 409 call fft(n1h,n2,n3,nd1h,nd2,nd3,zarray,i_sign,inzee) 410 411 !print *, "Apply the kernel" 412 call kernel_application(n1,n2,n3,nd1h,nd2,nd3,nfft1,nfft2,nfft3,zarray,karray,inzee) 413 414 !Inverse FFT 415 i_sign=-1 416 !print *,"Do a 3D inverse HalFFT" 417 call fft(n1h,n2,n3,nd1h,nd2,nd3,zarray,i_sign,inzee) 418 419 !Recollect the result 420 !We have to multiply by a factor 421 factor = hgrid**3/(n1*n2*n3) 422 423 ! Calling this routine gives only the Hartree potential 424 call zarray_out(n01, n02, n03, nd1h, nd2, nd3, rhopot, zarray(1, 1, inzee), factor) 425 426 SAFE_DEALLOCATE_A(zarray) 427 POP_SUB(psolver_kernel) 428 end subroutine psolver_kernel 429 !!*** 430 431 !!****h* BigDFT/kernel_application 432 !! NAME 433 !! kernel_application 434 !! 435 !! FUNCTION 436 !! Multiply the FFT of the density by the FFT of the kernel 437 !! 438 !! SYNOPSIS 439 !! zarray(:,:,:,:,inzee) : IN -> FFT of the density with the x dimension divided by two 440 !! (HalFFT), OUT -> FFT of the potential 441 !! karray : kernel FFT (real, 1/8 of the total grid) 442 !! n1h,n2,n3 : dimension of the FFT grid for zarray 443 !! nd1h,nd2,nd3 : dimensions of zarray 444 !! nfft1,nfft2,nfft3 : original FFT grid dimensions, to be used for karray dimensions 445 !! 446 !! WARNING 447 !! We use all the zarray vector, storing the auxiliary part using ouzee=3-inzee 448 !! All the loop are unrolled such to avoid different conditions 449 !! the "min" functions are substituted by kink computed with absolute values 450 !! AUTHOR 451 !! Luigi Genovese 452 !! CREATION DATE 453 !! March 2006 454 !! 455 !! SOURCE 456 !! 457 subroutine kernel_application(n1,n2,n3,nd1h,nd2,nd3,nfft1,nfft2,nfft3,zarray,karray,inzee) 458 integer, intent(in) :: n1,n2,n3,nd1h,nd2,nd3,nfft1,nfft2,nfft3,inzee 459 real(8), intent(in) :: karray(1:nfft1/2 + 1, 1:nfft2/2 + 1, 1:nfft3/2 + 1) 460 real(8), intent(inout) :: zarray(1:2, 1:nd1h, 1:nd2, 1:nd3, 1:2) 461 462 real(8), dimension(:), allocatable :: cos_array,sin_array 463 real(8) :: a,b,c,d,pi2,g1,cp,sp 464 real(8) :: rfe,ife,rfo,ifo,rk,ik,rk2,ik2,re,ro,ie,io,rhk,ihk 465 integer :: i1,i2,i3,j1,j2,j3, ouzee,n1h,n2h,n3h 466 integer :: si1,si2,si3 467 468 PUSH_SUB(kernel_application) 469 470 n1h = n1/2 471 n2h = n2/2 472 n3h = n3/2 473 474 SAFE_ALLOCATE(cos_array(1:n1h + 1)) 475 SAFE_ALLOCATE(sin_array(1:n1h + 1)) 476 477 pi2=8.d0*datan(1.d0) 478 pi2=pi2/real(n1,8) 479 do i1 = 1,n1h+1 480 cos_array(i1)=dcos(pi2*(i1-1)) 481 sin_array(i1)=-dsin(pi2*(i1-1)) 482 end do 483 484 ouzee=3-inzee 485 486 !--------------------------------------------! 487 !--- Starting reconstruction half -> full ---! 488 !--------------------------------------------! 489 490 !-------------Case i3 = 1 491 i3=1 492 j3=1 493 si3=1 494 495 !-------------Case i2 = 1, i3 = 1 496 i2=1 497 j2=1 498 si2=1 499 500 !Case i1 == 1 501 i1=1 502 si1=1 503 a=zarray(1,i1,i2,i3,inzee) 504 b=zarray(2,i1,i2,i3,inzee) 505 c=zarray(1,si1,si2,si3,inzee) 506 d=zarray(2,si1,si2,si3,inzee) 507 rfe=.5d0*(a+c) 508 ife=.5d0*(b-d) 509 rfo=.5d0*(a-c) 510 ifo=.5d0*(b+d) 511 cp=cos_array(i1) 512 sp=sin_array(i1) 513 rk=rfe+cp*ifo-sp*rfo 514 ik=ife-cp*rfo-sp*ifo 515 g1=karray(i1,j2,j3) 516 rk2=rk*g1 517 ik2=ik*g1 518 519 zarray(1,1,i2,i3,ouzee) = rk2 520 zarray(2,1,i2,i3,ouzee) = ik2 521 522 !Case i1=2,n1h 523 do i1=2,n1h 524 si1=n1h+2-i1 525 526 a=zarray(1,i1,i2,i3,inzee) 527 b=zarray(2,i1,i2,i3,inzee) 528 c=zarray(1,si1,si2,si3,inzee) 529 d=zarray(2,si1,si2,si3,inzee) 530 rfe=.5d0*(a+c) 531 ife=.5d0*(b-d) 532 rfo=.5d0*(a-c) 533 ifo=.5d0*(b+d) 534 cp=cos_array(i1) 535 sp=sin_array(i1) 536 rk=rfe+cp*ifo-sp*rfo 537 ik=ife-cp*rfo-sp*ifo 538 g1=karray(i1,j2,j3) 539 rk2=rk*g1 540 ik2=ik*g1 541 542 zarray(1,i1,i2,i3,ouzee) = rk2 543 zarray(2,i1,i2,i3,ouzee) = ik2 544 end do 545 546 !Case i1=n1h+1 547 i1=n1h+1 548 si1=n1h+2-i1 549 550 a=zarray(1,1,i2,i3,inzee) !beware here i1 -> 1 551 b=zarray(2,1,i2,i3,inzee) !beware here i1 -> 1 552 c=zarray(1,si1,si2,si3,inzee) 553 d=zarray(2,si1,si2,si3,inzee) 554 rfe=.5d0*(a+c) 555 ife=.5d0*(b-d) 556 rfo=.5d0*(a-c) 557 ifo=.5d0*(b+d) 558 cp=cos_array(i1) 559 sp=sin_array(i1) 560 rk=rfe+cp*ifo-sp*rfo 561 ik=ife-cp*rfo-sp*ifo 562 g1=karray(i1,j2,j3) 563 rk2=rk*g1 564 ik2=ik*g1 565 566 zarray(1,i1,i2,i3,ouzee) = rk2 567 zarray(2,i1,i2,i3,ouzee) = ik2 568 !-------------END case i2 = 1 , i3=1 569 570 !case i2 >=2 571 do i2=2,n2 572 j2=n2h+1-abs(n2h+1-i2) 573 si2=n2+2-i2 !if i2 /=1, otherwise si2=1 574 575 !Case i1 == 1 576 i1=1 577 si1=1 578 a=zarray(1,i1,i2,i3,inzee) 579 b=zarray(2,i1,i2,i3,inzee) 580 c=zarray(1,si1,si2,si3,inzee) 581 d=zarray(2,si1,si2,si3,inzee) 582 rfe=.5d0*(a+c) 583 ife=.5d0*(b-d) 584 rfo=.5d0*(a-c) 585 ifo=.5d0*(b+d) 586 cp=cos_array(i1) 587 sp=sin_array(i1) 588 rk=rfe+cp*ifo-sp*rfo 589 ik=ife-cp*rfo-sp*ifo 590 g1=karray(i1,j2,j3) 591 rk2=rk*g1 592 ik2=ik*g1 593 594 zarray(1,1,i2,i3,ouzee) = rk2 595 zarray(2,1,i2,i3,ouzee) = ik2 596 597 !Case i1=2,n1h 598 do i1=2,n1h 599 si1=n1h+2-i1 600 601 a=zarray(1,i1,i2,i3,inzee) 602 b=zarray(2,i1,i2,i3,inzee) 603 c=zarray(1,si1,si2,si3,inzee) 604 d=zarray(2,si1,si2,si3,inzee) 605 rfe=.5d0*(a+c) 606 ife=.5d0*(b-d) 607 rfo=.5d0*(a-c) 608 ifo=.5d0*(b+d) 609 cp=cos_array(i1) 610 sp=sin_array(i1) 611 rk=rfe+cp*ifo-sp*rfo 612 ik=ife-cp*rfo-sp*ifo 613 g1=karray(i1,j2,j3) 614 rk2=rk*g1 615 ik2=ik*g1 616 617 zarray(1,i1,i2,i3,ouzee) = rk2 618 zarray(2,i1,i2,i3,ouzee) = ik2 619 end do 620 621 !Case i1=n1h+1 622 i1=n1h+1 623 si1=n1h+2-i1 624 625 a=zarray(1,1,i2,i3,inzee) !beware here i1 -> 1 626 b=zarray(2,1,i2,i3,inzee) !beware here i1 -> 1 627 c=zarray(1,si1,si2,si3,inzee) 628 d=zarray(2,si1,si2,si3,inzee) 629 rfe=.5d0*(a+c) 630 ife=.5d0*(b-d) 631 rfo=.5d0*(a-c) 632 ifo=.5d0*(b+d) 633 cp=cos_array(i1) 634 sp=sin_array(i1) 635 rk=rfe+cp*ifo-sp*rfo 636 ik=ife-cp*rfo-sp*ifo 637 g1=karray(i1,j2,j3) 638 rk2=rk*g1 639 ik2=ik*g1 640 641 zarray(1,i1,i2,i3,ouzee) = rk2 642 zarray(2,i1,i2,i3,ouzee) = ik2 643 end do 644 !-------------END Case i3 = 1 645 646 !case i3 >=2 647 do i3=2,n3 648 j3=n3h+1-abs(n3h+1-i3) 649 si3=n3+2-i3 !if i3 /=1, otherwise si3=1 650 651 !-------------Case i2 = 1 652 i2=1 653 j2=1 654 si2=1 655 656 !Case i1 == 1 657 i1=1 658 si1=1 659 a=zarray(1,i1,i2,i3,inzee) 660 b=zarray(2,i1,i2,i3,inzee) 661 c=zarray(1,si1,si2,si3,inzee) 662 d=zarray(2,si1,si2,si3,inzee) 663 rfe=.5d0*(a+c) 664 ife=.5d0*(b-d) 665 rfo=.5d0*(a-c) 666 ifo=.5d0*(b+d) 667 cp=cos_array(i1) 668 sp=sin_array(i1) 669 rk=rfe+cp*ifo-sp*rfo 670 ik=ife-cp*rfo-sp*ifo 671 g1=karray(i1,j2,j3) 672 rk2=rk*g1 673 ik2=ik*g1 674 675 zarray(1,1,i2,i3,ouzee) = rk2 676 zarray(2,1,i2,i3,ouzee) = ik2 677 678 !Case i1=2,n1h 679 do i1=2,n1h 680 si1=n1h+2-i1 681 682 a=zarray(1,i1,i2,i3,inzee) 683 b=zarray(2,i1,i2,i3,inzee) 684 c=zarray(1,si1,si2,si3,inzee) 685 d=zarray(2,si1,si2,si3,inzee) 686 rfe=.5d0*(a+c) 687 ife=.5d0*(b-d) 688 rfo=.5d0*(a-c) 689 ifo=.5d0*(b+d) 690 cp=cos_array(i1) 691 sp=sin_array(i1) 692 rk=rfe+cp*ifo-sp*rfo 693 ik=ife-cp*rfo-sp*ifo 694 g1=karray(i1,j2,j3) 695 rk2=rk*g1 696 ik2=ik*g1 697 698 zarray(1,i1,i2,i3,ouzee) = rk2 699 zarray(2,i1,i2,i3,ouzee) = ik2 700 end do 701 702 !Case i1=n1h+1 703 i1=n1h+1 704 si1=n1h+2-i1 705 706 a=zarray(1,1,i2,i3,inzee) !beware here i1 -> 1 707 b=zarray(2,1,i2,i3,inzee) !beware here i1 -> 1 708 c=zarray(1,si1,si2,si3,inzee) 709 d=zarray(2,si1,si2,si3,inzee) 710 rfe=.5d0*(a+c) 711 ife=.5d0*(b-d) 712 rfo=.5d0*(a-c) 713 ifo=.5d0*(b+d) 714 cp=cos_array(i1) 715 sp=sin_array(i1) 716 rk=rfe+cp*ifo-sp*rfo 717 ik=ife-cp*rfo-sp*ifo 718 g1=karray(i1,j2,j3) 719 rk2=rk*g1 720 ik2=ik*g1 721 722 zarray(1,i1,i2,i3,ouzee) = rk2 723 zarray(2,i1,i2,i3,ouzee) = ik2 724 !-------------END case i2 = 1 725 726 !case i2 >=2 727 do i2=2,n2 728 j2=n2h+1-abs(n2h+1-i2) 729 si2=n2+2-i2 !if i2 /=1, otherwise si2=1 730 731 !Case i1 == 1 732 i1=1 733 si1=1 734 a=zarray(1,i1,i2,i3,inzee) 735 b=zarray(2,i1,i2,i3,inzee) 736 c=zarray(1,si1,si2,si3,inzee) 737 d=zarray(2,si1,si2,si3,inzee) 738 rfe=.5d0*(a+c) 739 ife=.5d0*(b-d) 740 rfo=.5d0*(a-c) 741 ifo=.5d0*(b+d) 742 cp=cos_array(i1) 743 sp=sin_array(i1) 744 rk=rfe+cp*ifo-sp*rfo 745 ik=ife-cp*rfo-sp*ifo 746 g1=karray(i1,j2,j3) 747 rk2=rk*g1 748 ik2=ik*g1 749 750 zarray(1,1,i2,i3,ouzee) = rk2 751 zarray(2,1,i2,i3,ouzee) = ik2 752 753 !Case i1=2,n1h 754 do i1=2,n1h 755 si1=n1h+2-i1 756 757 a=zarray(1,i1,i2,i3,inzee) 758 b=zarray(2,i1,i2,i3,inzee) 759 c=zarray(1,si1,si2,si3,inzee) 760 d=zarray(2,si1,si2,si3,inzee) 761 rfe=.5d0*(a+c) 762 ife=.5d0*(b-d) 763 rfo=.5d0*(a-c) 764 ifo=.5d0*(b+d) 765 cp=cos_array(i1) 766 sp=sin_array(i1) 767 rk=rfe+cp*ifo-sp*rfo 768 ik=ife-cp*rfo-sp*ifo 769 g1=karray(i1,j2,j3) 770 rk2=rk*g1 771 ik2=ik*g1 772 773 zarray(1,i1,i2,i3,ouzee) = rk2 774 zarray(2,i1,i2,i3,ouzee) = ik2 775 end do 776 777 !Case i1=n1h+1 778 i1=n1h+1 779 si1=n1h+2-i1 780 781 a=zarray(1,1,i2,i3,inzee) !beware here i1 -> 1 782 b=zarray(2,1,i2,i3,inzee) !beware here i1 -> 1 783 c=zarray(1,si1,si2,si3,inzee) 784 d=zarray(2,si1,si2,si3,inzee) 785 rfe=.5d0*(a+c) 786 ife=.5d0*(b-d) 787 rfo=.5d0*(a-c) 788 ifo=.5d0*(b+d) 789 cp=cos_array(i1) 790 sp=sin_array(i1) 791 rk=rfe+cp*ifo-sp*rfo 792 ik=ife-cp*rfo-sp*ifo 793 g1=karray(i1,j2,j3) 794 rk2=rk*g1 795 ik2=ik*g1 796 797 zarray(1,i1,i2,i3,ouzee) = rk2 798 zarray(2,i1,i2,i3,ouzee) = ik2 799 end do 800 801 end do 802 803 804 !--------------------------------------------! 805 !--- Starting reconstruction full -> half ---! 806 !--------------------------------------------! 807 808 !case i3=1 809 i3=1 810 j3=1 811 !case i2=1 812 i2=1 813 j2=1 814 do i1 = 1,n1h 815 j1=n1h+2-i1 816 817 a=zarray(1,i1,i2,i3,ouzee) 818 b=zarray(2,i1,i2,i3,ouzee) 819 c=zarray(1,j1,j2,j3,ouzee) 820 d=-zarray(2,j1,j2,j3,ouzee) 821 cp=cos_array(i1) 822 sp=sin_array(i1) 823 re=(a+c) 824 ie=(b+d) 825 ro=(a-c)*cp-(b-d)*sp 826 io=(a-c)*sp+(b-d)*cp 827 rhk=re-io 828 ihk=ie+ro 829 830 zarray(1,i1,i2,i3,inzee)=rhk 831 zarray(2,i1,i2,i3,inzee)=ihk 832 end do 833 !case i2 >= 2 834 do i2=2,n2 835 j2=nd2+1-i2 836 do i1 = 1,n1h 837 j1=n1h+2-i1 838 839 a=zarray(1,i1,i2,i3,ouzee) 840 b=zarray(2,i1,i2,i3,ouzee) 841 c=zarray(1,j1,j2,j3,ouzee) 842 d=-zarray(2,j1,j2,j3,ouzee) 843 cp=cos_array(i1) 844 sp=sin_array(i1) 845 re=(a+c) 846 ie=(b+d) 847 ro=(a-c)*cp-(b-d)*sp 848 io=(a-c)*sp+(b-d)*cp 849 rhk=re-io 850 ihk=ie+ro 851 852 zarray(1,i1,i2,i3,inzee)=rhk 853 zarray(2,i1,i2,i3,inzee)=ihk 854 end do 855 end do 856 857 858 !case i3 >=2 859 do i3=2,n3 860 j3=nd3+1-i3 861 !case i2=1 862 i2=1 863 j2=1 864 do i1 = 1,n1h 865 j1=n1h+2-i1 866 867 a=zarray(1,i1,i2,i3,ouzee) 868 b=zarray(2,i1,i2,i3,ouzee) 869 c=zarray(1,j1,j2,j3,ouzee) 870 d=-zarray(2,j1,j2,j3,ouzee) 871 cp=cos_array(i1) 872 sp=sin_array(i1) 873 re=(a+c) 874 ie=(b+d) 875 ro=(a-c)*cp-(b-d)*sp 876 io=(a-c)*sp+(b-d)*cp 877 rhk=re-io 878 ihk=ie+ro 879 880 zarray(1,i1,i2,i3,inzee)=rhk 881 zarray(2,i1,i2,i3,inzee)=ihk 882 end do 883 !case i2 >= 2 884 do i2=2,n2 885 j2=nd2+1-i2 886 do i1 = 1,n1h 887 j1=n1h+2-i1 888 889 a=zarray(1,i1,i2,i3,ouzee) 890 b=zarray(2,i1,i2,i3,ouzee) 891 c=zarray(1,j1,j2,j3,ouzee) 892 d=-zarray(2,j1,j2,j3,ouzee) 893 cp=cos_array(i1) 894 sp=sin_array(i1) 895 re=(a+c) 896 ie=(b+d) 897 ro=(a-c)*cp-(b-d)*sp 898 io=(a-c)*sp+(b-d)*cp 899 rhk=re-io 900 ihk=ie+ro 901 902 zarray(1,i1,i2,i3,inzee)=rhk 903 zarray(2,i1,i2,i3,inzee)=ihk 904 end do 905 end do 906 907 end do 908 909 !De-allocations 910 SAFE_DEALLOCATE_A(cos_array) 911 SAFE_DEALLOCATE_A(sin_array) 912 913 POP_SUB(kernel_application) 914 end subroutine kernel_application 915 916 !!****h* BigDFT/norm_ind 917 !! NAME 918 !! norm_ind 919 !! 920 !! FUNCTION 921 !! Index in zarray 922 !! 923 !! SOURCE 924 !! 925 subroutine norm_ind(nd1,nd2,nd3,i1,i2,i3,ind) 926 integer :: nd1,nd2,nd3,i1,i2,i3 927 integer :: ind 928 929 !Local variables 930 integer :: a1,a2,a3 931 if ( i1 == nd1 ) then 932 a1=1 933 else 934 a1=i1 935 end if 936 if ( i2 == nd2 ) then 937 a2=1 938 else 939 a2=i2 940 end if 941 if ( i3 == nd3 ) then 942 a3=1 943 else 944 a3=i3 945 end if 946 ind=a1+nd1*(a2-1)+nd1*nd2*(a3-1) 947 end subroutine norm_ind 948 !!*** 949 950 951 !!****h* BigDFT/symm_ind 952 !! NAME 953 !! symm_ind 954 !! 955 !! FUNCTION 956 !! Index in zarray for -g vector 957 !! 958 !! SOURCE 959 !! 960 subroutine symm_ind(nd1,nd2,nd3,i1,i2,i3,ind) 961 integer :: nd1,nd2,nd3,i1,i2,i3 962 integer :: ind 963 964 integer :: a1,a2,a3 965 if (i1 /= 1) then 966 a1=nd1+1-i1 967 else 968 a1=i1 969 end if 970 if (i2 /= 1) then 971 a2=nd2+1-i2 972 else 973 a2=i2 974 end if 975 if (i3 /= 1) then 976 a3=nd3+1-i3 977 else 978 a3=i3 979 end if 980 ind=a1+nd1*(a2-1)+nd1*nd2*(a3-1) 981 end subroutine symm_ind 982 !!*** 983 984 !!****h* BigDFT/zarray_in 985 !! NAME 986 !! zarray_in 987 !! 988 !! FUNCTION 989 !! Put the density into zarray 990 !! 991 !! SOURCE 992 !! 993 subroutine zarray_in(n01,n02,n03,nd1,nd2,nd3,density,zarray) 994 integer :: n01,n02,n03,nd1,nd2,nd3 995 real(8), dimension(n01,n02,n03) :: density 996 real(8), dimension(2,nd1,nd2,nd3) :: zarray 997 998 integer :: i1,i2,i3,n01h,nd1hm,nd3hm,nd2hm 999 1000 PUSH_SUB(zarray_in) 1001 1002 !Half the size of n01 1003 n01h=n01/2 1004 nd1hm=(nd1-1)/2 1005 nd2hm=(nd2-1)/2 1006 nd3hm=(nd3-1)/2 1007 !Set to zero 1008 do i3 = 1,nd3 1009 do i2 = 1,nd2 1010 do i1 = 1,nd1 1011 zarray(1,i1,i2,i3) = 0.0_8 1012 zarray(2,i1,i2,i3) = 0.0_8 1013 end do 1014 end do 1015 end do 1016 !Set zarray 1017 do i3 = 1,n03 1018 do i2 = 1,n02 1019 do i1 = 1,n01h 1020 zarray(1,i1+nd1hm,i2+nd2hm,i3+nd3hm) = density(2*i1-1,i2,i3) 1021 zarray(2,i1+nd1hm,i2+nd2hm,i3+nd3hm) = density(2*i1,i2,i3) 1022 end do 1023 end do 1024 end do 1025 if(modulo(n01,2) == 1) then 1026 do i3 = 1,n03 1027 do i2 = 1,n02 1028 zarray(1,n01h+1+nd1hm,i2+nd2hm,i3+nd3hm) = density(n01,i2,i3) 1029 end do 1030 end do 1031 end if 1032 1033 POP_SUB(zarray_in) 1034 end subroutine zarray_in 1035 !!*** 1036 1037 1038 !!****h* BigDFT/zarray_out 1039 !! NAME 1040 !! zarray_out 1041 !! 1042 !! FUNCTION 1043 !! Set the potential (rhopot) from zarray 1044 !! Calculate the Hartree energy. 1045 !! 1046 !! SOURCE 1047 !! 1048 subroutine zarray_out(n01, n02, n03, nd1, nd2, nd3, rhopot, zarray, factor) 1049 integer, intent(in) :: n01,n02,n03,nd1,nd2,nd3 1050 real(8), intent(out) :: rhopot(n01,n02,n03) 1051 real(8), intent(in) :: zarray(2*nd1,nd2,nd3) ! Convert zarray(2,nd1,nd2,nd3) -> zarray(2*nd1,nd2,nd3) to use i1=1,n01 1052 ! instead of i1=1,n1h + special case for modulo(n01,2) 1053 real(8), intent(in) :: factor 1054 1055 integer :: i1,i2,i3 1056 1057 PUSH_SUB(zarray_out) 1058 1059 do i3 = 1,n03 1060 do i2 = 1,n02 1061 do i1 = 1,n01 1062 rhopot(i1, i2, i3) = factor*zarray(i1,i2,i3) 1063 end do 1064 end do 1065 end do 1066 1067 POP_SUB(zarray_out) 1068 end subroutine zarray_out 1069 !!*** 1070 1071 !!****h* BigDFT/build_kernel 1072 !! NAME 1073 !! build_kernel 1074 !! 1075 !! FUNCTION 1076 !! Build the kernel of a gaussian function 1077 !! for interpolating scaling functions. 1078 !! 1079 !! SYNOPSIS 1080 !! Build the kernel (karrayout) of a gaussian function 1081 !! for interpolating scaling functions 1082 !! $$ K(j) = \int \int \phi(x) g(x`-x) \delta(x`- j) dx dx` $$ 1083 !! 1084 !! n01,n02,n03 Mesh dimensions of the density 1085 !! n1k,n2k,n3k Dimensions of the kernel 1086 !! hgrid Mesh step 1087 !! itype_scf Order of the scaling function (8,14,16) 1088 !! 1089 !! AUTHORS 1090 !! T. Deutsch, L. Genovese 1091 !! COPYRIGHT 1092 !! Copyright (C) 2005 CEA 1093 !! CREATION DATE 1094 !! 13/07/2005 1095 !! 1096 !! MODIFICATION HISTORY 1097 !! 13/09/2005 Use hgrid instead of acell 1098 !! 09/12/2005 Real kernel, stocked only half components 1099 !! 13/12/2005 Add routines to simplify the port into Stefan`s code 1100 !! 1101 !! SOURCE 1102 !! 1103 subroutine build_kernel(n01,n02,n03,nfft1,nfft2,nfft3,hgrid,itype_scf,karrayout) 1104 integer, intent(in) :: n01,n02,n03,nfft1,nfft2,nfft3,itype_scf 1105 real(8), intent(in) :: hgrid 1106 real(8), dimension(nfft1/2+1,nfft2/2+1,nfft3/2+1), intent(out) :: karrayout 1107 1108 !Do not touch !!!! 1109 integer, parameter :: N_GAUSS = 89 1110 !Better if higher (1024 points are enough 10^{-14}: 2*itype_scf*n_points) 1111 integer, parameter :: n_points = 2**6 1112 1113 !Better p_gauss for calculation 1114 !(the support of the exponential should be inside [-n_range/2,n_range/2]) 1115 real(8), parameter :: p0_ref = 1.d0 1116 real(8), dimension(N_GAUSS) :: p_gauss,w_gauss 1117 1118 real(8), allocatable :: kernel_scf(:), kern_1_scf(:) 1119 real(8), allocatable :: x_scf(:), y_scf(:) 1120 real(8), allocatable :: karrayhalf(:, :, :) 1121 1122 real(8) :: ur_gauss,dr_gauss,acc_gauss,pgauss,kern,a_range 1123 real(8) :: factor,factor2,dx,absci,p0gauss,p0_cell 1124 real(8) :: a1,a2,a3 1125 integer :: nd1,nd2,nd3,n1k,n2k,n3k,n_scf 1126 integer :: i_gauss,n_range,n_cell 1127 integer :: i,n_iter,i1,i2,i3,i_kern 1128 integer :: i01,i02,i03,inkee,n1h,n2h,n3h,nd1h 1129 1130 PUSH_SUB(build_kernel) 1131 1132 !Number of integration points : 2*itype_scf*n_points 1133 n_scf=2*itype_scf*n_points 1134 !Dimensions of Kernel 1135 n1k=nfft1/2+1 ; n2k=nfft2/2+1 ; n3k=nfft3/2+1 1136 n1h=nfft1/2 ; n2h=nfft2/2 ; n3h=nfft3/2 1137 nd1 = nfft1 + modulo(nfft1+1,2) 1138 nd2 = nfft2 + modulo(nfft2+1,2) 1139 nd3 = nfft3 + modulo(nfft3+1,2) 1140 1141 !Half size for the half FFT 1142 nd1h=(nd1+1)/2 1143 1144 !Allocations 1145 SAFE_ALLOCATE(x_scf(0:n_scf)) 1146 SAFE_ALLOCATE(y_scf(0:n_scf)) 1147 1148 !Build the scaling function 1149 call scaling_function(itype_scf,n_scf,n_range,x_scf,y_scf) 1150 !Step grid for the integration 1151 dx = real(n_range,8)/real(n_scf,8) 1152 !Extend the range (no more calculations because fill in by 0.0_8) 1153 n_cell = max(n01,n02,n03) 1154 n_range = max(n_cell,n_range) 1155 1156 !Allocations 1157 SAFE_ALLOCATE(kernel_scf(-n_range:n_range)) 1158 SAFE_ALLOCATE(kern_1_scf(-n_range:n_range)) 1159 1160 !Lengthes of the box (use FFT dimension) 1161 a1 = hgrid * real(n01,8) 1162 a2 = hgrid * real(n02,8) 1163 a3 = hgrid * real(n03,8) 1164 1165 x_scf(:) = hgrid * x_scf(:) 1166 y_scf(:) = 1.d0/hgrid * y_scf(:) 1167 dx = hgrid * dx 1168 !To have a correct integration 1169 p0_cell = p0_ref/(hgrid*hgrid) 1170 1171 !Initialisation of the gaussian (Beylkin) 1172 call gequad(N_GAUSS,p_gauss,w_gauss,ur_gauss,dr_gauss,acc_gauss) 1173 !In order to have a range from a_range=sqrt(a1*a1+a2*a2+a3*a3) 1174 !(biggest length in the cube) 1175 !We divide the p_gauss by a_range**2 and a_gauss by a_range 1176 a_range = sqrt(a1*a1+a2*a2+a3*a3) 1177 factor = 1.d0/a_range 1178 !factor2 = factor*factor 1179 factor2 = 1.d0/(a1*a1+a2*a2+a3*a3) 1180 do i_gauss=1,N_GAUSS 1181 p_gauss(i_gauss) = factor2*p_gauss(i_gauss) 1182 end do 1183 do i_gauss=1,N_GAUSS 1184 w_gauss(i_gauss) = factor*w_gauss(i_gauss) 1185 end do 1186 1187 karrayout(:,:,:) = 0.0_8 1188 1189 !Use in this order (better for accuracy). 1190 loop_gauss: do i_gauss=N_GAUSS,1,-1 1191 !Gaussian 1192 pgauss = p_gauss(i_gauss) 1193 1194 !We calculate the number of iterations to go from pgauss to p0_ref 1195 n_iter = nint((log(pgauss) - log(p0_cell))/log(4.d0)) 1196 if (n_iter <= 0)then 1197 n_iter = 0 1198 p0gauss = pgauss 1199 else 1200 p0gauss = pgauss/4.d0**n_iter 1201 end if 1202 1203 !Stupid integration 1204 !Do the integration with the exponential centered in i_kern 1205 kernel_scf(:) = 0.0_8 1206 do i_kern=0,n_range 1207 kern = 0.0_8 1208 do i=0,n_scf 1209 absci = x_scf(i) - real(i_kern,8)*hgrid 1210 absci = absci*absci 1211 kern = kern + y_scf(i)*exp(-p0gauss*absci)*dx 1212 end do 1213 kernel_scf(i_kern) = kern 1214 kernel_scf(-i_kern) = kern 1215 if (abs(kern) < 1.d-18) then 1216 !Too small not useful to calculate 1217 exit 1218 end if 1219 end do 1220 1221 !Start the iteration to go from p0gauss to pgauss 1222 call scf_recursion(itype_scf,n_iter,n_range,kernel_scf,kern_1_scf) 1223 1224 !Add to the kernel. 1225 do i3 = 1,n03 1226 i03 = i3-1 1227 do i2 = 1,n02 1228 i02 = i2-1 1229 do i1 = 1,n01 1230 i01 = i1-1 1231 karrayout(i1,i2,i3) = karrayout(i1,i2,i3) + w_gauss(i_gauss)* & 1232 kernel_scf(i01)*kernel_scf(i02)*kernel_scf(i03) 1233 end do 1234 end do 1235 end do 1236 1237 end do loop_gauss 1238 1239 SAFE_DEALLOCATE_A(kernel_scf) 1240 SAFE_DEALLOCATE_A(kern_1_scf) 1241 SAFE_DEALLOCATE_A(x_scf) 1242 SAFE_DEALLOCATE_A(y_scf) 1243 1244 !Set karray 1245 SAFE_ALLOCATE(karrayhalf(1:2, 1:nd1h*nd2*nd3, 1:2)) 1246 1247 !Set karray : use mirror symmetries 1248 inkee=1 1249 call karrayhalf_in(n01,n02,n03,n1k,n2k,n3k,nfft1,nfft2,nfft3,nd1,nd2,nd3,& 1250 karrayout,karrayhalf) 1251 call fft(n1h,nfft2,nfft3,nd1h,nd2,nd3,karrayhalf,1,inkee) 1252 !Reconstruct the real kernel 1253 call kernel_recon(n1k,n2k,n3k,nfft1,nfft2,nfft3,nd1,nd2,nd3,& 1254 karrayhalf(1,1,inkee),karrayout) 1255 1256 SAFE_DEALLOCATE_A(karrayhalf) 1257 POP_SUB(build_kernel) 1258 end subroutine Build_Kernel 1259 !!*** 1260 1261 1262 !!****h* BigDFT/calculate_dimensions 1263 !! NAME 1264 !! calculate_dimensions 1265 !! 1266 !! FUNCTION 1267 !! Give the dimensions of the FFT 1268 !! 1269 !! SOURCE 1270 !! 1271 subroutine calculate_dimensions(n01,n02,n03,nfft1,nfft2,nfft3) 1272 integer, intent(in) :: n01,n02,n03 1273 integer, intent(out) :: nfft1,nfft2,nfft3 1274 1275 integer :: i1,i2,i3,l1 1276 1277 PUSH_SUB(calculate_dimensions) 1278 1279 !Test 2*n01, 2*n02, 2*n03 1280 !write(*,*) 'in dimensions_fft',n01,n02,n03 1281 i1=2*n01 1282 i2=2*n02 1283 i3=2*n03 1284 do 1285 call fourier_dim(i1,nfft1) 1286 call fourier_dim(nfft1/2,l1) 1287 if (modulo(nfft1,2) == 0 .and. modulo(l1,2) == 0 .and. 2*l1 == nfft1) then 1288 exit 1289 end if 1290 i1=i1+1 1291 end do 1292 do 1293 call fourier_dim(i2,nfft2) 1294 if (modulo(nfft2,2) == 0) then 1295 exit 1296 end if 1297 i2=i2+1 1298 end do 1299 do 1300 call fourier_dim(i3,nfft3) 1301 if (modulo(nfft3,2) == 0) then 1302 exit 1303 end if 1304 i3=i3+1 1305 end do 1306 !nd1 = nfft1 + modulo(nfft1+1,2) 1307 !nd2 = nfft2 + modulo(nfft2+1,2) 1308 !nd3 = nfft3 + modulo(nfft3+1,2) 1309 !write(*,*) 'out dimensions_fft',nfft1,nfft2,nfft3 1310 1311 POP_SUB(calculate_dimensions) 1312 end subroutine calculate_dimensions 1313 !!*** 1314 1315 1316 !!****h* BigDFT/karrayhalf_in 1317 !! NAME 1318 !! karrayhalf_in 1319 !! 1320 !! FUNCTION 1321 !! Put in the array for4446666444 FFT 1322 !! 1323 !! SOURCE 1324 !! 1325 subroutine karrayhalf_in(n01,n02,n03,n1k,n2k,n3k,nfft1,nfft2,nfft3,nd1,nd2,nd3, kernel,karrayhalf) 1326 integer, intent(in) :: n01,n02,n03,n1k,n2k,n3k,nfft1,nfft2,nfft3,nd1,nd2,nd3 1327 real(8), dimension(n1k,n2k,n3k), intent(in) :: kernel 1328 real(8), dimension(2,(nd1+1)/2,nd2,nd3), intent(out) :: karrayhalf 1329 1330 real(8), dimension(:), allocatable :: karray 1331 integer :: i1,i2,i3,nd1h,n1h,n2h,n3h 1332 1333 PUSH_SUB(karrayhalf_in) 1334 1335 !Body 1336 n1h=nfft1/2 1337 n2h=nfft2/2 1338 n3h=nfft3/2 1339 1340 SAFE_ALLOCATE(karray(1:nfft1)) 1341 1342 nd1h=(nd1+1)/2 1343 karrayhalf(:,:,:,:) = 0.0_8 1344 do i3 = 1,n03 1345 do i2 = 1,n02 1346 karray(:) = 0.0_8 1347 do i1 = 1,n01 1348 karray(i1+n1h) = kernel(i1,i2,i3) 1349 end do 1350 do i1 = 2,n01 1351 karray(n1h-i1+1+nd1-nfft1) = kernel(i1,i2,i3) 1352 end do 1353 do i1 = 1,n1h 1354 karrayhalf(1,i1,i2+n2h,i3+n3h) = karray(2*i1-1) 1355 karrayhalf(2,i1,i2+n2h,i3+n3h) = karray(2*i1) 1356 end do 1357 end do 1358 do i2 = 2,n02 1359 do i1 = 1,nd1h 1360 karrayhalf(:,i1,n2h-i2+1+nd2-nfft2,i3+n3h) = & 1361 karrayhalf(:,i1,i2+n2h,i3+n3h) 1362 end do 1363 end do 1364 end do 1365 do i3 = 2,n03 1366 do i2 = 1,nd2 1367 do i1 = 1,nd1h 1368 karrayhalf(:,i1,i2,n3h-i3+1+nd3-nfft3) = karrayhalf(:,i1,i2,i3+n3h) 1369 end do 1370 end do 1371 end do 1372 1373 SAFE_DEALLOCATE_A(karray) 1374 POP_SUB(karrayhalf_in) 1375 end subroutine karrayhalf_in 1376 !!*** 1377 1378 1379 !!****h* BigDFT/kernel_recon 1380 !! NAME 1381 !! kernel_recon 1382 !! 1383 !! FUNCTION 1384 !! Reconstruction of the kernel from the FFT array zarray 1385 !! We keep only the half kernel in each direction (x,y,z). 1386 !! 1387 !! SOURCE 1388 !! 1389 subroutine kernel_recon(n1k,n2k,n3k,nfft1,nfft2,nfft3,nd1,nd2,nd3,zarray,karray) 1390 integer, intent(in) :: n1k,n2k,n3k,nfft1,nfft2,nfft3,nd1,nd2,nd3 1391 real(8), dimension(2,(nd1+1)/2*nd2*nd3), intent(in) :: zarray 1392 real(8), dimension(n1k,n2k,n3k), intent(out) :: karray 1393 1394 real(8), dimension(:), allocatable :: cos_array,sin_array 1395 integer :: i1,i2,i3,ind1,ind2,nd1h,n1h,n2h,n3h 1396 real(8) :: rfe,ife,rfo,ifo,cp,sp,rk,ik,a,b,c,d,pi2 1397 1398 PUSH_SUB(kernel_recon) 1399 1400 !Body 1401 n1h=nfft1/2 1402 n2h=nfft2/2 1403 n3h=nfft3/2 1404 nd1h=(nd1+1)/2 1405 pi2=8.d0*datan(1.d0) 1406 pi2=pi2/real(nfft1,8) 1407 1408 SAFE_ALLOCATE(cos_array(1:nd1h)) 1409 SAFE_ALLOCATE(sin_array(1:nd1h)) 1410 1411 do i1 = 1,nd1h 1412 cos_array(i1)= dcos(pi2*(i1-1)) 1413 sin_array(i1)=-dsin(pi2*(i1-1)) 1414 end do 1415 do i3 = 1,n3h+1 1416 do i2 = 1,n2h+1 1417 do i1 = 1,nd1h 1418 call norm_ind(nd1h,nd2,nd3,i1,i2,i3,ind1) 1419 call symm_ind(nd1h,nd2,nd3,i1,i2,i3,ind2) 1420 a=zarray(1,ind1) 1421 b=zarray(2,ind1) 1422 c=zarray(1,ind2) 1423 d=zarray(2,ind2) 1424 rfe=0.5d0*(a+c) 1425 ife=0.5d0*(b-d) 1426 rfo=0.5d0*(a-c) 1427 ifo=0.5d0*(b+d) 1428 cp=cos_array(i1) 1429 sp=sin_array(i1) 1430 rk=rfe+cp*ifo-sp*rfo 1431 ik=ife-cp*rfo-sp*ifo 1432 !For big dimension 1.d-9 otherwise 1.d-10 1433 !Remove the test 1434 !if(abs(ik) >= 1.d-10) then 1435 ! print *,"non real kernel FFT",i1,i2,i3,ik 1436 ! stop 1437 !end if 1438 !Build the intermediate FFT convolution (full) 1439 !call norm_ind(nd1,nd2,nd3,i1,i2,i3,indA) 1440 karray(i1,i2,i3)=rk 1441 end do 1442 end do 1443 end do 1444 1445 SAFE_DEALLOCATE_A(cos_array) 1446 SAFE_DEALLOCATE_A(sin_array) 1447 1448 POP_SUB(kernel_recon) 1449 end subroutine kernel_recon 1450 !!*** 1451 1452 !!****h* BigDFT/par_calculate_dimensions 1453 !! NAME 1454 !! par_calculate_dimensions 1455 !! 1456 !! FUNCTION 1457 !! Calculate four sets of dimension needed for the calculation of the 1458 !! zero-padded convolution 1459 !! 1460 !! SYNOPSIS 1461 !! n01,n02,n03 original real dimensions (input) 1462 !! 1463 !! m1,m2,m3 original real dimension with the dimension 2 and 3 exchanged 1464 !! 1465 !! n1,n2 the first FFT even dimensions greater that 2*m1, 2*m2 1466 !! n3 the double of the first FFT even dimension greater than m3 1467 !! (improved for the HalFFT procedure) 1468 !! 1469 !! md1,md2,md3 half of n1,n2,n3 dimension. They contain the real unpadded space, 1470 !! properly enlarged to be compatible with the FFT dimensions n_i. 1471 !! md2 is further enlarged to be a multiple of nproc 1472 !! 1473 !! nd1,nd2,nd3 fourier dimensions for which the kernel FFT is injective, 1474 !! formally 1/8 of the fourier grid. Here the dimension nd3 is 1475 !! enlarged to be a multiple of nproc 1476 !! 1477 !! WARNING 1478 !! The dimension m2 and m3 correspond to n03 and n02 respectively 1479 !! this is needed since the convolution routine manage arrays of dimension 1480 !! (md1,md3,md2/nproc) 1481 !! 1482 !! AUTHOR 1483 !! Luigi Genovese 1484 !! CREATION DATE 1485 !! February 2006 1486 !! 1487 !! SOURCE 1488 !! 1489 subroutine par_calculate_dimensions(n01,n02,n03,m1,m2,m3,n1,n2,n3, md1,md2,md3,nd1,nd2,nd3,nproc) 1490 integer, intent(in) :: n01,n02,n03,nproc 1491 integer, intent(out) :: m1,m2,m3,n1,n2,n3,md1,md2,md3,nd1,nd2,nd3 1492 1493 integer :: l1,l2,l3 1494 1495 PUSH_SUB(par_calculate_dimensions) 1496 1497 !dimensions of the density in the real space, inverted for convenience 1498 1499 m1=n01 1500 m2=n03 1501 m3=n02 1502 1503 ! real space grid dimension (suitable for number of processors) 1504 1505 ! n1=2*m1; n2=2*m2; n3=2*m3 1506 1507 l1=2*m1 1508 l2=2*m2 1509 l3=m3 !beware of the half dimension 1510 do 1511 !this is for the FFT of the kernel 1512 !one can erase it when the kernel is parallelized 1513 call fourier_dim(l1,n1) 1514 !call fourier_dim(n1/2,l1A) 1515 if (modulo(n1,2) == 0&! .and. 2*l1A == n1 1516 ) then 1517 exit 1518 end if 1519 l1=l1+1 1520 end do 1521 do 1522 call fourier_dim(l2,n2) 1523 if (modulo(n2,2) == 0) then 1524 exit 1525 end if 1526 l2=l2+1 1527 end do 1528 do 1529 call fourier_dim(l3,n3) 1530 !call fourier_dim(n3/2,l3A) 1531 if (modulo(n3,2) == 0 &!.and. 2*l3A == n3 .and. modulo(l3A,2) == 0 1532 ) then 1533 exit 1534 end if 1535 l3=l3+1 1536 end do 1537 n3=2*n3 1538 1539 !dimensions that contain the unpadded real space, 1540 ! compatible with the number of processes 1541 md1=n1/2 1542 md2=n2/2 1543 md3=n3/2 1544151 if (nproc*(md2/nproc) < n2/2) then 1545 md2=md2+1 1546 goto 151 1547 end if 1548 1549 1550 !dimensions of the kernel, 1/8 of the total volume, 1551 !compatible with nproc 1552 1553 nd1=n1/2+1; nd2=n2/2+1 1554 nd3=n3/2+1 1555250 if (modulo(nd3,nproc) /= 0) then 1556 nd3=nd3+1 1557 goto 250 1558 end if 1559 1560 POP_SUB(par_calculate_dimensions) 1561 end subroutine par_calculate_dimensions 1562 !!*** 1563 1564 1565 !!****h* BigDFT/par_psolver_kernel 1566 !! NAME 1567 !! par_psolver_kernel 1568 !! 1569 !! FUNCTION 1570 !! Solver of Poisson equation applying a kernel, parallel computation 1571 !! 1572 !! SYNOPSIS 1573 !! Poisson solver applying a kernel and 1574 !! using Fourier transform for the convolution. 1575 !! rhopot : input -> the density 1576 !! output -> the Hartree potential + pot_ion 1577 !! All the processes manage the same global rhopot array 1578 !! The potential pot_ion is ADDED in the array rhopot. 1579 !! Calculate also the Hartree potential 1580 !! 1581 !! Replaces the charge density contained in rhopot 1582 !! by the Hartree stored as well in rhopot. 1583 !! If xc_on is true, it also adds the XC potential and 1584 !! ionic potential pot_ion 1585 !! 1586 !! kernelLOC: the kernel in fourier space, calculated from ParBuil_Kernel routine 1587 !! it is a local vector (each process have its own part) 1588 !! 1589 !! comm: MPI communicator to use 1590 !! 1591 !! We double the size of the mesh except in one dimension 1592 !! in order to use the property of the density to be real. 1593 !! WARNING 1594 !! 1595 !! AUTHOR 1596 !! Luigi Genovese 1597 !! CREATION DATE 1598 !! February 2006 1599 !! 1600 !! SOURCE 1601 !! 1602 subroutine par_psolver_kernel(n01, n02, n03, nd1, nd2, nd3, hgrid, kernelLOC, rhopot, iproc, nproc, comm) 1603 integer, intent(in) :: n01,n02,n03,iproc,nproc 1604 integer, intent(inout) :: nd1,nd2,nd3 1605 real(8), intent(in) :: hgrid 1606 real(8), intent(in), dimension(nd1,nd2,nd3/nproc) :: kernelLOC 1607 real(8), intent(inout), dimension(n01,n02,n03) :: rhopot 1608 integer, intent(in) :: comm 1609 1610 integer :: m1,m2,m3,n1,n2,n3,md1,md2,md3 1611 1612 PUSH_SUB(par_psolver_kernel) 1613 1614 call par_calculate_dimensions(n01, n02, n03, m1, m2, m3, n1, n2, n3, md1, md2, md3, nd1, nd2, nd3, nproc) 1615 call pconvxc_off(m1, m2, m3, n1, n2, n3, nd1, nd2, nd3, md1, md2, md3, iproc, nproc, rhopot, kernelLOC, hgrid, comm) 1616 1617 POP_SUB(par_psolver_kernel) 1618 end subroutine par_psolver_kernel 1619 !!*** 1620 1621 1622 !!****h* BigDFT/pconvxc_off 1623 !! NAME 1624 !! pconvxc_off 1625 !! 1626 !! FUNCTION 1627 !! Calculate the parallel convolution with the kernel 1628 !! without the exchange-correlation part 1629 !! 1630 !! SYNOPSIS 1631 !! Poisson solver applying a kernel and 1632 !! using Fourier transform for the convolution. 1633 !! rhopot : input -> the density 1634 !! output -> the Hartree potential + pot_ion 1635 !! All the processes manage the same global rhopot array 1636 !! The potential pot_ion is ADDED in the array rhopot. 1637 !! Calculate also the Hartree potential 1638 !! 1639 !! Replaces the charge density contained in rhopot 1640 !! by the Hartree stored as well in rhopot. 1641 !! 1642 !! kernelLOC: the kernel in fourier space, calculated from ParBuild_Kernel routine 1643 !! it is a local vector (each process have its own part) 1644 !! 1645 !! comm: MPI communicator to use 1646 !! 1647 !! We double the size of the mesh except in one dimension 1648 !! in order to use the property of the density to be real. 1649 !! WARNING 1650 !! 1651 !! AUTHOR 1652 !! Luigi Genovese 1653 !! CREATION DATE 1654 !! February 2006 1655 !! 1656 !! SOURCE 1657 !! 1658 subroutine pconvxc_off(m1, m2, m3, n1, n2, n3, nd1, nd2, nd3, md1, md2, md3, iproc, nproc, rhopot, kernelloc, hgrid, comm) 1659 integer, intent(in) :: m1,m2,m3,n1,n2,n3,nd1,nd2,nd3,md1,md2,md3,iproc,nproc 1660 real(8), dimension(nd1,nd2,nd3/nproc), intent(in) :: kernelloc 1661 real(8), dimension(m1,m3,m2), intent(inout) :: rhopot 1662 real(8), intent(in) :: hgrid 1663 integer, intent(in) :: comm 1664 1665#if defined(HAVE_MPI) 1666 !Local variables 1667 integer :: istart,iend,jend,jproc 1668 real(8) :: scal 1669 real(8), dimension(:,:,:), allocatable :: zf, lrhopot(:, :, :) 1670 integer, dimension(:,:), allocatable :: gather_arr 1671 type(profile_t), save :: prof 1672 1673 PUSH_SUB(pconvxc_off) 1674 1675 !factor to be used to keep unitarity 1676 scal=hgrid**3/real(n1*n2*n3,8) 1677 1678 SAFE_ALLOCATE(zf(1:md1, 1:md3, 1:md2/nproc)) 1679 SAFE_ALLOCATE(gather_arr(0:nproc-1, 1:2)) 1680 1681 !Here we insert the process-related values of the density, starting from the total density 1682 call enterdensity(rhopot(1,1,1), m1, m2, m3, md1, md2, md3, iproc, nproc, zf(1,1,1)) 1683 1684 !this routine builds the values for each process of the potential (zf), multiplying by the factor 1685 call convolxc_off(n1, n2, n3, nd1, nd2, nd3, md1, md2, md3, nproc, iproc, kernelloc, zf, scal, comm) 1686 1687 !building the array of the data to be sent from each process 1688 !and the array of the displacement 1689 do jproc = 0, nproc - 1 1690 istart = min(jproc*(md2/nproc),m2-1) 1691 jend = max(min(md2/nproc, m2 - md2/nproc*jproc), 0) 1692 gather_arr(jproc, 1) = m1*m3*jend 1693 gather_arr(jproc, 2) = istart*m1*m3 1694 end do 1695 1696 !assign the distributed density to the rhopot array 1697 istart=min(iproc*(md2/nproc),m2-1) 1698 jend=max(min(md2/nproc,m2-md2/nproc*iproc),0) 1699 iend=istart+jend 1700 1701 if(jend == 0) jend = 1 1702 1703 SAFE_ALLOCATE(lrhopot(1:m1, 1:m3, 1:jend)) 1704 1705 lrhopot(1:m1, 1:m3, 1:jend) = zf(1:m1, 1:m3, 1:jend) 1706 1707 call profiling_in(prof, "ISF_GATHER") 1708 call MPI_Allgatherv(lrhopot(1, 1, 1), gather_arr(iproc, 1), MPI_DOUBLE_PRECISION, rhopot(1, 1, 1), gather_arr(0, 1),& 1709 gather_arr(0, 2), MPI_DOUBLE_PRECISION, comm, mpi_err) 1710 call profiling_out(prof) 1711 1712 SAFE_DEALLOCATE_A(zf) 1713 SAFE_DEALLOCATE_A(lrhopot) 1714 SAFE_DEALLOCATE_A(gather_arr) 1715 1716 POP_SUB(pconvxc_off) 1717#endif 1718 end subroutine pconvxc_off 1719!!*** 1720 1721 1722 !!****h* BigDFT/enterdensity 1723 !! NAME 1724 !! enterdensity 1725 !! 1726 !! FUNCTION 1727 !! 1728 !! Define a real space process-dependent vector zf with the global dimensions that are half of the FFT grid 1729 !! in order to perform convolution. The dimension md2 is a multiple of nproc 1730 !! Can be used also to define the local part of pot_ion 1731 !! 1732 !! AUTHOR 1733 !! L. Genovese 1734 !! CREATION DATE 1735 !! February 2006 1736 !! 1737 !! SOURCE 1738 !! 1739 subroutine enterdensity(rhopot,m1,m2,m3,md1,md2,md3,iproc,nproc,zf) 1740 integer, intent(in) :: m1,m2,m3,md1,md2,md3,iproc,nproc 1741 real(8), dimension(0:md1-1,0:md3-1,0:md2/nproc-1), intent(out) :: zf 1742 real(8), dimension(0:m1-1,0:m3-1,0:m2-1), intent(in) :: rhopot 1743 1744 integer :: j1,j2,j3,jp2 1745 1746 PUSH_SUB(enterdensity) 1747 1748 !Body 1749 do jp2=0,md2/nproc-1 1750 j2=iproc*(md2/nproc)+jp2 1751 if (j2 <= m2-1) then 1752 do j3=0,m3-1 1753 do j1=0,m1-1 1754 zf(j1,j3,jp2)=rhopot(j1,j3,j2) 1755 end do 1756 do j1=m1,md1-1 1757 zf(j1,j3,jp2)=0.0_8 1758 end do 1759 end do 1760 do j3=m3,md3-1 1761 do j1=0,md1-1 1762 zf(j1,j3,jp2)=0.0_8 1763 end do 1764 end do 1765 else 1766 do j3=0,md3-1 1767 do j1=0,md1-1 1768 zf(j1,j3,jp2)=0.0_8 1769 end do 1770 end do 1771 end if 1772 end do 1773 1774 POP_SUB(enterdensity) 1775 end subroutine enterdensity 1776 !!*** 1777 1778 1779 !!****h* BigDFT/par_build_kernel 1780 !! NAME 1781 !! par_build_kernel 1782 !! 1783 !! FUNCTION 1784 !! Build the kernel of a gaussian function 1785 !! for interpolating scaling functions. 1786 !! Do the parallel HalFFT of the symmetrized function and stores into 1787 !! memory only 1/8 of the grid divided by the number of processes nproc 1788 !! 1789 !! SYNOPSIS 1790 !! Build the kernel (karray) of a gaussian function 1791 !! for interpolating scaling functions 1792 !! $$ K(j) = \sum_k \omega_k \int \int \phi(x) g_k(y-x) \delta(y-j) dx dy $$ 1793 !! 1794 !! n01,n02,n03 Mesh dimensions of the density 1795 !! nfft1,nfft2,nfft3 Dimensions of the FFT grid (HalFFT in the third direction) 1796 !! n1k,n2k,n3k Dimensions of the kernel FFT 1797 !! hgrid Mesh step 1798 !! itype_scf Order of the scaling function (8,14,16) 1799 !! comm MPI communicator to use 1800 !! 1801 !! AUTHORS 1802 !! T. Deutsch, L. Genovese 1803 !! CREATION DATE 1804 !! February 2006 1805 !! 1806 !! SOURCE 1807 !! 1808 subroutine par_build_kernel(n01,n02,n03,nfft1,nfft2,nfft3,n1k,n2k,n3k,hgrid,itype_scf, & 1809 iproc,nproc,comm,karrayoutLOC) 1810 integer, intent(in) :: n01,n02,n03,nfft1,nfft2,nfft3,n1k,n2k,n3k,itype_scf,iproc,nproc 1811 real(8), intent(in) :: hgrid 1812 real(8), intent(out) :: karrayoutLOC(1:n1k, 1:n2k, 1:n3k/nproc) 1813 integer, intent(in) :: comm 1814 1815 !Do not touch !!!! 1816 integer, parameter :: N_GAUSS = 89 1817 !Better if higher (1024 points are enough 10^{-14}: 2*itype_scf*n_points) 1818 integer, parameter :: N_POINTS = 2**6 1819 1820 !Better p_gauss for calculation 1821 !(the support of the exponential should be inside [-n_range/2,n_range/2]) 1822 real(8), parameter :: p0_ref = 1.d0 1823 real(8) :: p_gauss(N_GAUSS), w_gauss(N_GAUSS) 1824 1825 real(8), dimension(:), allocatable :: kernel_scf,kern_1_scf 1826 real(8), dimension(:), allocatable :: x_scf ,y_scf 1827 real(8), dimension(:,:,:,:), allocatable :: karrayfour 1828 real(8), dimension(:,:,:), allocatable :: karray 1829 1830 real(8) :: ur_gauss,dr_gauss,acc_gauss,pgauss,kern,a_range 1831 real(8) :: factor,factor2,dx,absci,p0gauss,p0_cell 1832 real(8) :: a1,a2,a3 1833 integer :: n_scf,nker1,nker2,nker3 1834 integer :: i_gauss,n_range,n_cell,istart,iend,istart1,istart2,iend1,iend2 1835 integer :: i,n_iter,i1,i2,i3,i_kern 1836 integer :: i01,i02,i03,n1h,n2h,n3h 1837 1838 PUSH_SUB(par_build_kernel) 1839 1840 !Number of integration points : 2*itype_scf*n_points 1841 n_scf=2*itype_scf*n_points 1842 !Set karray 1843 !dimension test 1844 1845 !here we must set the dimensions for the fft part, starting from the nfft 1846 !remember that actually nfft2 is associated with n03 and viceversa 1847 1848 !dimensions that define the center of symmetry 1849 n1h=nfft1/2 1850 n2h=nfft2/2 1851 n3h=nfft3/2 1852 1853 !Auxiliary dimensions only for building the FFT part 1854 nker1=nfft1 1855 nker2=nfft2 1856 nker3=nfft3/2+1 1857 1858 !adjusting the last two dimensions to be multiples of nproc 1859 do 1860 if(modulo(nker2,nproc) == 0) exit 1861 nker2=nker2+1 1862 end do 1863 do 1864 if(modulo(nker3,nproc) == 0) exit 1865 nker3=nker3+1 1866 end do 1867 1868 !this will be the array of the kernel in the real space 1869 SAFE_ALLOCATE(karray(1:nker1,1:nfft3,1:nker2/nproc)) 1870 1871 !defining proper extremes for the calculation of the 1872 !local part of the kernel 1873 1874 istart=iproc*nker2/nproc+1 1875 iend=min((iproc+1)*nker2/nproc,n2h+n03) 1876 1877 istart1=istart 1878 if(iproc == 0) istart1=n2h-n03+2 1879 1880 iend2=iend 1881 1882 iend1=n2h 1883 istart2=n2h+1 1884 if(istart > n2h) then 1885 iend1=istart1-1 1886 istart2=istart 1887 end if 1888 if(iend <= n2h) then 1889 istart2=iend2+1 1890 iend1=iend 1891 end if 1892 1893!!!!!START KERNEL CONSTRUCTION 1894 ! if(iproc == 0) then 1895 ! write(unit=*,fmt="(1x,a,i0,a)") & 1896 ! "Build the kernel in parallel using a sum of ",N_GAUSS," gaussians" 1897 ! write(unit=*,fmt="(1x,a,i0,a)") & 1898 ! "Use interpolating scaling functions of ",itype_scf," order" 1899 ! end if 1900 1901 SAFE_ALLOCATE(x_scf(0:n_scf)) 1902 SAFE_ALLOCATE(y_scf(0:n_scf)) 1903 1904 !Build the scaling function 1905 call scaling_function(itype_scf, n_scf, n_range, x_scf, y_scf) 1906 !Step grid for the integration 1907 dx = real(n_range,8)/real(n_scf,8) 1908 !Extend the range (no more calculations because fill in by 0.0_8) 1909 n_cell = max(n01,n02,n03) 1910 n_range = max(n_cell,n_range) 1911 1912 !Allocations 1913 SAFE_ALLOCATE(kernel_scf(-n_range:n_range)) 1914 SAFE_ALLOCATE(kern_1_scf(-n_range:n_range)) 1915 1916 !Lengthes of the box (use FFT dimension) 1917 a1 = hgrid * real(n01,8) 1918 a2 = hgrid * real(n02,8) 1919 a3 = hgrid * real(n03,8) 1920 1921 x_scf(:) = hgrid * x_scf(:) 1922 y_scf(:) = 1.d0/hgrid * y_scf(:) 1923 dx = hgrid * dx 1924 !To have a correct integration 1925 p0_cell = p0_ref/(hgrid*hgrid) 1926 1927 !Initialization of the gaussian (Beylkin) 1928 call gequad(N_GAUSS,p_gauss,w_gauss,ur_gauss,dr_gauss,acc_gauss) 1929 !In order to have a range from a_range=sqrt(a1*a1+a2*a2+a3*a3) 1930 !(biggest length in the cube) 1931 !We divide the p_gauss by a_range**2 and a_gauss by a_range 1932 a_range = sqrt(a1*a1+a2*a2+a3*a3) 1933 factor = 1.d0/a_range 1934 !factor2 = factor*factor 1935 factor2 = 1.d0/(a1*a1+a2*a2+a3*a3) 1936 do i_gauss=1,N_GAUSS 1937 p_gauss(i_gauss) = factor2*p_gauss(i_gauss) 1938 end do 1939 do i_gauss=1,N_GAUSS 1940 w_gauss(i_gauss) = factor*w_gauss(i_gauss) 1941 end do 1942 1943 karray(:,:,:) = 0.0_8 1944 !Use in this order (better for accuracy). 1945 loop_gauss: do i_gauss = N_GAUSS, 1, -1 1946 !Gaussian 1947 pgauss = p_gauss(i_gauss) 1948 1949 !We calculate the number of iterations to go from pgauss to p0_ref 1950 n_iter = nint((log(pgauss) - log(p0_cell))/log(4.d0)) 1951 if (n_iter <= 0)then 1952 n_iter = 0 1953 p0gauss = pgauss 1954 else 1955 p0gauss = pgauss/4.d0**n_iter 1956 end if 1957 1958 !Stupid integration 1959 !Do the integration with the exponential centered in i_kern 1960 kernel_scf(:) = 0.0_8 1961 do i_kern=0,n_range 1962 kern = 0.0_8 1963 do i=0,n_scf 1964 absci = x_scf(i) - real(i_kern,8)*hgrid 1965 absci = absci*absci 1966 kern = kern + y_scf(i)*exp(-p0gauss*absci)*dx 1967 end do 1968 kernel_scf(i_kern) = kern 1969 kernel_scf(-i_kern) = kern 1970 if (abs(kern) < 1.d-18) then 1971 !Too small not useful to calculate 1972 exit 1973 end if 1974 end do 1975 1976 !Start the iteration to go from p0gauss to pgauss 1977 call scf_recursion(itype_scf,n_iter,n_range,kernel_scf,kern_1_scf) 1978 1979 !Add to the kernel (only the local part) 1980 1981 do i3=istart1,iend1 1982 i03 = n2h - i3 + 1 1983 do i2 = 1,n02 1984 i02 = i2-1 1985 do i1 = 1,n01 1986 i01 = i1-1 1987 karray(i1+n1h,i2+n3h,i3-istart+1) = karray(i1+n1h,i2+n3h,i3-istart+1) + w_gauss(i_gauss)* & 1988 kernel_scf(i01)*kernel_scf(i02)*kernel_scf(i03) 1989 end do 1990 end do 1991 end do 1992 do i3=istart2,iend2 1993 i03 = i3 - n2h -1 1994 do i2 = 1,n02 1995 i02 = i2-1 1996 do i1 = 1,n01 1997 i01 = i1-1 1998 karray(i1+n1h,i2+n3h,i3-istart+1) = karray(i1+n1h,i2+n3h,i3-istart+1) + w_gauss(i_gauss)* & 1999 kernel_scf(i01)*kernel_scf(i02)*kernel_scf(i03) 2000 end do 2001 end do 2002 end do 2003 2004 2005 end do loop_gauss 2006 2007 !Build the kernel in the real space as an even function, thus having a real FFT 2008 2009 do i3=istart1,iend2 2010 do i2 = 1,n02 2011 do i1=2,n01 2012 karray(n1h+2-i1,i2+n3h,i3-istart+1) = karray(i1+n1h,i2+n3h,i3-istart+1) 2013 end do 2014 end do 2015 do i2=2,n02 2016 do i1 = 1,nker1 2017 karray(i1,n3h+2-i2,i3-istart+1) = karray(i1,i2+n3h,i3-istart+1) 2018 end do 2019 end do 2020 end do 2021 2022 2023 !De-allocations 2024 SAFE_DEALLOCATE_A(kernel_scf) 2025 SAFE_DEALLOCATE_A(kern_1_scf) 2026 SAFE_DEALLOCATE_A(x_scf) 2027 SAFE_DEALLOCATE_A(y_scf) 2028 2029!!!!END KERNEL CONSTRUCTION 2030 2031 SAFE_ALLOCATE(karrayfour(1:2, 1:nker1, 1:nker2, 1:nker3/nproc)) 2032 2033 ! if(iproc == 0) print *,"Do a 3D PHalFFT for the kernel" 2034 2035 call kernelfft(nfft1,nfft2,nfft3,nker1,nker2,nker3,nproc,iproc,karray,karrayfour,comm) 2036 2037 !Reconstruct the real kernel FFT 2038 do i3 = 1,n3k/nproc 2039 do i2 = 1,n2k 2040 do i1 = 1,n1k 2041 karrayoutLOC(i1,i2,i3)=karrayfour(1,i1,i2,i3) 2042 end do 2043 end do 2044 end do 2045 2046 !De-allocations 2047 SAFE_DEALLOCATE_A(karray) 2048 SAFE_DEALLOCATE_A(karrayfour) 2049 POP_SUB(par_build_kernel) 2050 end subroutine par_build_kernel 2051 !!*** 2052 2053 ! ------------------------------------------------------------------------- 2054 2055 subroutine gequad(n_gauss, p_gauss, w_gauss, ur_gauss, dr_gauss, acc_gauss) 2056 integer, intent(in) :: n_gauss 2057 real(8), intent(out) :: p_gauss(:) 2058 real(8), intent(out) :: w_gauss(:) 2059 real(8), intent(out) :: ur_gauss 2060 real(8), intent(out) :: dr_gauss 2061 real(8), intent(out) :: acc_gauss 2062 2063 integer :: iunit, i, idx 2064 2065 PUSH_SUB(gequad) 2066 2067 ur_gauss = 1.0_8 2068 dr_gauss = 1.0e-08_8 2069 acc_gauss = 1.0e-08_8 2070 2071 iunit = io_open(trim(conf%share)//'/gequad.data', action = 'read', status = 'old', die = .true.) 2072 2073 do i = 1, n_gauss 2074 read(iunit, *) idx, p_gauss(i), w_gauss(i) 2075 end do 2076 2077 call io_close(iunit) 2078 2079 POP_SUB(gequad) 2080 end subroutine gequad 2081 2082end module poisson_isf_oct_m 2083 2084!! Local Variables: 2085!! mode: f90 2086!! coding: utf-8 2087!! End: 2088