1!--------------------------------------------------------------------------------------------------! 2! CP2K: A general program to perform molecular dynamics simulations ! 3! Copyright (C) 2000 - 2019 CP2K developers group ! 4!--------------------------------------------------------------------------------------------------! 5 6! ************************************************************************************************** 7!> \brief Calculate the plane wave density by collocating the primitive Gaussian 8!> functions (pgf). 9!> \par History 10!> - rewrote collocate for increased accuracy and speed 11!> - introduced the PGI hack for increased speed with that compiler 12!> (22.02.02) 13!> - Added Multiple Grid feature 14!> - new way to get over the grid (01.03.02) 15!> - removed timing calls since they were getting expensive 16!> - Updated with the new QS data structures (09.04.02,MK) 17!> - introduction of the real space grid type ( prelim. version JVdV 05.02) 18!> - parallel FFT (JGH 22.05.02) 19!> - multigrid arrays independent from density (JGH 30.08.02) 20!> - old density stored in g space (JGH 30.08.02) 21!> - distributed real space code (JGH 17.07.03) 22!> - refactoring and new loop ordering (JGH 23.11.03) 23!> - OpenMP parallelization (JGH 03.12.03) 24!> - Modified to compute tau (Joost 12.03) 25!> - removed the incremental density rebuild (Joost 01.04) 26!> - introduced realspace multigridding (Joost 02.04) 27!> - introduced map_consistent (Joost 02.04) 28!> - Addition of the subroutine calculate_atomic_charge_density (TdK, 08.05) 29!> - rewrite of the collocate/integrate kernels (Joost VandeVondele, 03.07) 30!> \author Matthias Krack (03.04.2001) 31!> 1) Joost VandeVondele (01.2002) 32!> Thomas D. Kuehne (04.08.2005) 33! ************************************************************************************************** 34MODULE qs_collocate_density 35 USE ao_util, ONLY: exp_radius_very_extended 36 USE atomic_kind_types, ONLY: atomic_kind_type,& 37 get_atomic_kind,& 38 get_atomic_kind_set 39 USE basis_set_types, ONLY: get_gto_basis_set,& 40 gto_basis_set_type 41 USE cell_types, ONLY: cell_type,& 42 pbc 43 USE cp_control_types, ONLY: dft_control_type 44 USE cp_dbcsr_operations, ONLY: dbcsr_allocate_matrix_set,& 45 dbcsr_deallocate_matrix_set 46 USE cp_fm_types, ONLY: cp_fm_get_element,& 47 cp_fm_get_info,& 48 cp_fm_type 49 USE cube_utils, ONLY: compute_cube_center,& 50 cube_info_type,& 51 return_cube,& 52 return_cube_nonortho 53 USE d3_poly, ONLY: poly_cp2k2d3 54 USE dbcsr_api, ONLY: dbcsr_copy,& 55 dbcsr_get_block_p,& 56 dbcsr_p_type,& 57 dbcsr_type 58 USE external_potential_types, ONLY: get_potential,& 59 gth_potential_type 60 USE gauss_colloc, ONLY: collocGauss 61 USE gaussian_gridlevels, ONLY: gaussian_gridlevel,& 62 gridlevel_info_type 63 USE kinds, ONLY: default_string_length,& 64 dp,& 65 int_8 66 USE lgrid_types, ONLY: lgrid_allocate_grid,& 67 lgrid_type 68 USE lri_environment_types, ONLY: lri_kind_type 69 USE mathconstants, ONLY: fac,& 70 pi,& 71 twopi 72 USE memory_utilities, ONLY: reallocate 73 USE orbital_pointers, ONLY: coset,& 74 indco,& 75 ncoset 76 USE particle_types, ONLY: particle_type 77 USE pw_env_types, ONLY: pw_env_get,& 78 pw_env_type 79 USE pw_methods, ONLY: pw_axpy,& 80 pw_integrate_function,& 81 pw_transfer,& 82 pw_zero 83 USE pw_pool_types, ONLY: pw_pool_create_pw,& 84 pw_pool_give_back_pw,& 85 pw_pool_p_type,& 86 pw_pool_type,& 87 pw_pools_create_pws,& 88 pw_pools_give_back_pws 89 USE pw_types, ONLY: COMPLEXDATA1D,& 90 REALDATA3D,& 91 REALSPACE,& 92 RECIPROCALSPACE,& 93 pw_p_type,& 94 pw_type 95 USE qs_environment_types, ONLY: get_qs_env,& 96 qs_environment_type 97 USE qs_kind_types, ONLY: get_qs_kind,& 98 get_qs_kind_set,& 99 qs_kind_type 100 USE qs_ks_types, ONLY: get_ks_env,& 101 qs_ks_env_type 102 USE qs_modify_pab_block, ONLY: & 103 FUNC_AB, FUNC_ADBmDAB, FUNC_ARB, FUNC_ARDBmDARB, FUNC_DABpADB, FUNC_DADB, FUNC_DER, & 104 FUNC_DX, FUNC_DXDX, FUNC_DXDY, FUNC_DY, FUNC_DYDY, FUNC_DYDZ, FUNC_DZ, FUNC_DZDX, & 105 FUNC_DZDZ, prepare_adb_m_dab, prepare_arb, prepare_ardb_m_darb, prepare_dab_p_adb, & 106 prepare_dadb, prepare_diadib, prepare_diiadiib, prepare_dijadijb 107 USE qs_neighbor_list_types, ONLY: neighbor_list_set_p_type 108 USE realspace_grid_types, ONLY: realspace_grid_desc_p_type,& 109 realspace_grid_p_type,& 110 realspace_grid_type,& 111 rs2pw,& 112 rs_grid_release,& 113 rs_grid_retain,& 114 rs_grid_zero,& 115 rs_pw_transfer 116 USE rs_pw_interface, ONLY: density_rs2pw,& 117 density_rs2pw_basic 118 USE task_list_methods, ONLY: int2pair,& 119 rs_distribute_matrix 120 USE task_list_types, ONLY: task_list_type 121 122!$ USE OMP_LIB, ONLY: omp_get_max_threads, omp_get_thread_num, omp_get_num_threads 123 124#include "./base/base_uses.f90" 125 126 IMPLICIT NONE 127 128 PRIVATE 129 130 CHARACTER(len=*), PARAMETER, PRIVATE :: moduleN = 'qs_collocate_density' 131! *** Public subroutines *** 132 133 PUBLIC :: calculate_ppl_grid, & 134 calculate_rho_core, & 135 calculate_lri_rho_elec, & 136 calculate_rho_single_gaussian, & 137 calculate_rho_metal, & 138 calculate_rho_resp_single, & 139 calculate_rho_resp_all, & 140 calculate_rho_elec, & 141 calculate_drho_elec, & 142 calculate_wavefunction, & 143 collocate_pgf_product_gspace, & 144 collocate_pgf_product_rspace, & 145 calculate_rho_nlcc 146 147 INTEGER :: debug_count = 0 148 149CONTAINS 150 151! ************************************************************************************************** 152!> \brief computes the density of the non-linear core correction on the grid 153!> \param rho_nlcc ... 154!> \param qs_env ... 155! ************************************************************************************************** 156 SUBROUTINE calculate_rho_nlcc(rho_nlcc, qs_env) 157 158 TYPE(pw_p_type), INTENT(INOUT) :: rho_nlcc 159 TYPE(qs_environment_type), POINTER :: qs_env 160 161 CHARACTER(len=*), PARAMETER :: routineN = 'calculate_rho_nlcc', & 162 routineP = moduleN//':'//routineN 163 164 INTEGER :: atom_a, handle, iatom, iexp_nlcc, ikind, & 165 ithread, j, lmax_global, n, natom, nc, & 166 nexp_nlcc, ni, npme, nthread 167 INTEGER(KIND=int_8) :: subpatch_pattern 168 INTEGER, DIMENSION(:), POINTER :: atom_list, cores, mylmax, nct_nlcc 169 LOGICAL :: nlcc 170 REAL(KIND=dp) :: alpha, eps_rho_rspace 171 REAL(KIND=dp), DIMENSION(3) :: ra 172 REAL(KIND=dp), DIMENSION(:), POINTER :: alpha_nlcc 173 REAL(KIND=dp), DIMENSION(:, :), POINTER :: cval_nlcc, pab 174 TYPE(atomic_kind_type), DIMENSION(:), POINTER :: atomic_kind_set 175 TYPE(cell_type), POINTER :: cell 176 TYPE(cube_info_type) :: cube_info 177 TYPE(dft_control_type), POINTER :: dft_control 178 TYPE(gth_potential_type), POINTER :: gth_potential 179 TYPE(particle_type), DIMENSION(:), POINTER :: particle_set 180 TYPE(pw_env_type), POINTER :: pw_env 181 TYPE(pw_pool_type), POINTER :: auxbas_pw_pool 182 TYPE(qs_kind_type), DIMENSION(:), POINTER :: qs_kind_set 183 TYPE(realspace_grid_type), POINTER :: rs_rho 184 185 CALL timeset(routineN, handle) 186 187 NULLIFY (cell, dft_control, pab, particle_set, atomic_kind_set, qs_kind_set, & 188 atom_list, pw_env, rs_rho, auxbas_pw_pool, cores, mylmax) 189 190 CALL get_qs_env(qs_env=qs_env, & 191 atomic_kind_set=atomic_kind_set, & 192 qs_kind_set=qs_kind_set, & 193 cell=cell, & 194 dft_control=dft_control, & 195 particle_set=particle_set, & 196 pw_env=pw_env) 197 CALL pw_env_get(pw_env, auxbas_rs_grid=rs_rho, & 198 auxbas_pw_pool=auxbas_pw_pool) 199 cube_info = pw_env%cube_info(1) 200 ! be careful in parallel nsmax is choosen with multigrid in mind! 201 CALL rs_grid_retain(rs_rho) 202 CALL rs_grid_zero(rs_rho) 203 204 eps_rho_rspace = dft_control%qs_control%eps_rho_rspace 205 206 !Find the maximum la_max over all of the atomic_kind_set 207 !We are using FUNC_AB so no need to cater for different ga_gb_functions 208 lmax_global = 0 209 DO ikind = 1, SIZE(atomic_kind_set) 210 CALL get_qs_kind(qs_kind_set(ikind), gth_potential=gth_potential) 211 IF (.NOT. ASSOCIATED(gth_potential)) CYCLE 212 CALL get_potential(potential=gth_potential, nct_nlcc=mylmax, nlcc_present=nlcc) 213 IF (.NOT. nlcc) CYCLE 214 lmax_global = MAX(MAXVAL(mylmax), lmax_global) 215 END DO 216 !lmax_global set according to ni = 2*nc-2 below 217 lmax_global = 2*lmax_global - 2 218 219 DO ikind = 1, SIZE(atomic_kind_set) 220 CALL get_atomic_kind(atomic_kind_set(ikind), natom=natom, atom_list=atom_list) 221 CALL get_qs_kind(qs_kind_set(ikind), gth_potential=gth_potential) 222 223 IF (.NOT. ASSOCIATED(gth_potential)) CYCLE 224 CALL get_potential(potential=gth_potential, nlcc_present=nlcc, nexp_nlcc=nexp_nlcc, & 225 alpha_nlcc=alpha_nlcc, nct_nlcc=nct_nlcc, cval_nlcc=cval_nlcc) 226 227 IF (.NOT. nlcc) CYCLE 228 229 DO iexp_nlcc = 1, nexp_nlcc 230 231 alpha = alpha_nlcc(iexp_nlcc) 232 nc = nct_nlcc(iexp_nlcc) 233 234 ni = ncoset(2*nc - 2) 235 ALLOCATE (pab(ni, 1)) 236 pab = 0._dp 237 238 nthread = 1 239 ithread = 0 240 241 CALL reallocate(cores, 1, natom) 242 npme = 0 243 cores = 0 244 245 ! prepare core function 246 DO j = 1, nc 247 SELECT CASE (j) 248 CASE (1) 249 pab(1, 1) = cval_nlcc(1, iexp_nlcc) 250 CASE (2) 251 n = coset(2, 0, 0) 252 pab(n, 1) = cval_nlcc(2, iexp_nlcc)/alpha**2 253 n = coset(0, 2, 0) 254 pab(n, 1) = cval_nlcc(2, iexp_nlcc)/alpha**2 255 n = coset(0, 0, 2) 256 pab(n, 1) = cval_nlcc(2, iexp_nlcc)/alpha**2 257 CASE (3) 258 n = coset(4, 0, 0) 259 pab(n, 1) = cval_nlcc(3, iexp_nlcc)/alpha**4 260 n = coset(0, 4, 0) 261 pab(n, 1) = cval_nlcc(3, iexp_nlcc)/alpha**4 262 n = coset(0, 0, 4) 263 pab(n, 1) = cval_nlcc(3, iexp_nlcc)/alpha**4 264 n = coset(2, 2, 0) 265 pab(n, 1) = 2._dp*cval_nlcc(3, iexp_nlcc)/alpha**4 266 n = coset(2, 0, 2) 267 pab(n, 1) = 2._dp*cval_nlcc(3, iexp_nlcc)/alpha**4 268 n = coset(0, 2, 2) 269 pab(n, 1) = 2._dp*cval_nlcc(3, iexp_nlcc)/alpha**4 270 CASE (4) 271 n = coset(6, 0, 0) 272 pab(n, 1) = cval_nlcc(4, iexp_nlcc)/alpha**6 273 n = coset(0, 6, 0) 274 pab(n, 1) = cval_nlcc(4, iexp_nlcc)/alpha**6 275 n = coset(0, 0, 6) 276 pab(n, 1) = cval_nlcc(4, iexp_nlcc)/alpha**6 277 n = coset(4, 2, 0) 278 pab(n, 1) = 3._dp*cval_nlcc(4, iexp_nlcc)/alpha**6 279 n = coset(4, 0, 2) 280 pab(n, 1) = 3._dp*cval_nlcc(4, iexp_nlcc)/alpha**6 281 n = coset(2, 4, 0) 282 pab(n, 1) = 3._dp*cval_nlcc(4, iexp_nlcc)/alpha**6 283 n = coset(2, 0, 4) 284 pab(n, 1) = 3._dp*cval_nlcc(4, iexp_nlcc)/alpha**6 285 n = coset(0, 4, 2) 286 pab(n, 1) = 3._dp*cval_nlcc(4, iexp_nlcc)/alpha**6 287 n = coset(0, 2, 4) 288 pab(n, 1) = 3._dp*cval_nlcc(4, iexp_nlcc)/alpha**6 289 n = coset(2, 2, 2) 290 pab(n, 1) = 6._dp*cval_nlcc(4, iexp_nlcc)/alpha**6 291 CASE DEFAULT 292 CPABORT("") 293 END SELECT 294 END DO 295 IF (dft_control%nspins == 2) pab = pab*0.5_dp 296 297 DO iatom = 1, natom 298 atom_a = atom_list(iatom) 299 ra(:) = pbc(particle_set(atom_a)%r, cell) 300 IF (rs_rho%desc%parallel .AND. .NOT. rs_rho%desc%distributed) THEN 301 ! replicated realspace grid, split the atoms up between procs 302 IF (MODULO(iatom, rs_rho%desc%group_size) == rs_rho%desc%my_pos) THEN 303 npme = npme + 1 304 cores(npme) = iatom 305 ENDIF 306 ELSE 307 npme = npme + 1 308 cores(npme) = iatom 309 ENDIF 310 END DO 311 312 DO j = 1, npme 313 314 iatom = cores(j) 315 atom_a = atom_list(iatom) 316 ra(:) = pbc(particle_set(atom_a)%r, cell) 317 subpatch_pattern = 0 318 ni = 2*nc - 2 319 CALL collocate_pgf_product_rspace(ni, 1/(2*alpha**2), 0, 0, 0.0_dp, 0, ra, & 320 (/0.0_dp, 0.0_dp, 0.0_dp/), 0.0_dp, 1.0_dp, pab, 0, 0, rs_rho, & 321 cell, cube_info, eps_rho_rspace, ga_gb_function=FUNC_AB, & 322 ithread=ithread, use_subpatch=.TRUE., subpatch_pattern=subpatch_pattern, & 323 lmax_global=lmax_global) 324 325 END DO 326 327 DEALLOCATE (pab) 328 329 END DO 330 331 END DO 332 333 IF (ASSOCIATED(cores)) THEN 334 DEALLOCATE (cores) 335 END IF 336 337 CALL rs_pw_transfer(rs_rho, rho_nlcc%pw, rs2pw) 338 CALL rs_grid_release(rs_rho) 339 340 CALL timestop(handle) 341 342 END SUBROUTINE calculate_rho_nlcc 343 344! ************************************************************************************************** 345!> \brief computes the local pseudopotential (without erf term) on the grid 346!> \param vppl ... 347!> \param qs_env ... 348! ************************************************************************************************** 349 SUBROUTINE calculate_ppl_grid(vppl, qs_env) 350 351 TYPE(pw_p_type), INTENT(INOUT) :: vppl 352 TYPE(qs_environment_type), POINTER :: qs_env 353 354 CHARACTER(len=*), PARAMETER :: routineN = 'calculate_ppl_grid', & 355 routineP = moduleN//':'//routineN 356 357 INTEGER :: atom_a, handle, iatom, ikind, ithread, & 358 j, lmax_global, lppl, n, natom, ni, & 359 npme, nthread 360 INTEGER(KIND=int_8) :: subpatch_pattern 361 INTEGER, DIMENSION(:), POINTER :: atom_list, cores 362 REAL(KIND=dp) :: alpha, eps_rho_rspace 363 REAL(KIND=dp), DIMENSION(3) :: ra 364 REAL(KIND=dp), DIMENSION(:), POINTER :: cexp_ppl 365 REAL(KIND=dp), DIMENSION(:, :), POINTER :: pab 366 TYPE(atomic_kind_type), DIMENSION(:), POINTER :: atomic_kind_set 367 TYPE(cell_type), POINTER :: cell 368 TYPE(cube_info_type) :: cube_info 369 TYPE(dft_control_type), POINTER :: dft_control 370 TYPE(gth_potential_type), POINTER :: gth_potential 371 TYPE(particle_type), DIMENSION(:), POINTER :: particle_set 372 TYPE(pw_env_type), POINTER :: pw_env 373 TYPE(pw_pool_type), POINTER :: auxbas_pw_pool 374 TYPE(qs_kind_type), DIMENSION(:), POINTER :: qs_kind_set 375 TYPE(realspace_grid_type), POINTER :: rs_rho 376 377 CALL timeset(routineN, handle) 378 379 NULLIFY (cell, dft_control, pab, atomic_kind_set, qs_kind_set, particle_set, & 380 atom_list, pw_env, rs_rho, auxbas_pw_pool, cores) 381 382 CALL get_qs_env(qs_env=qs_env, & 383 atomic_kind_set=atomic_kind_set, & 384 qs_kind_set=qs_kind_set, & 385 cell=cell, & 386 dft_control=dft_control, & 387 particle_set=particle_set, & 388 pw_env=pw_env) 389 CALL pw_env_get(pw_env, auxbas_rs_grid=rs_rho, & 390 auxbas_pw_pool=auxbas_pw_pool) 391 cube_info = pw_env%cube_info(1) 392 ! be careful in parallel nsmax is choosen with multigrid in mind! 393 CALL rs_grid_retain(rs_rho) 394 CALL rs_grid_zero(rs_rho) 395 396 eps_rho_rspace = dft_control%qs_control%eps_rho_rspace 397 398 !Find maximum la_max over all of the atomic kind set where la_max=ni & ni=2*lppl-2 399 !Thus need to find the max of lppl over atomic_kind_set and use to define lmax_global 400 !We are using FUNC_AB so no need to cater for different ga_gb_functions 401 lmax_global = 0 402 DO ikind = 1, SIZE(atomic_kind_set) 403 CALL get_qs_kind(qs_kind_set(ikind), gth_potential=gth_potential) 404 IF (.NOT. ASSOCIATED(gth_potential)) CYCLE 405 CALL get_potential(potential=gth_potential, nexp_ppl=lppl) 406 lmax_global = MAX(lppl, lmax_global) 407 END DO 408 lmax_global = 2*lmax_global - 2 409 410 DO ikind = 1, SIZE(atomic_kind_set) 411 CALL get_atomic_kind(atomic_kind_set(ikind), natom=natom, atom_list=atom_list) 412 CALL get_qs_kind(qs_kind_set(ikind), gth_potential=gth_potential) 413 414 IF (.NOT. ASSOCIATED(gth_potential)) CYCLE 415 CALL get_potential(potential=gth_potential, alpha_ppl=alpha, nexp_ppl=lppl, cexp_ppl=cexp_ppl) 416 417 IF (lppl <= 0) CYCLE 418 419 ni = ncoset(2*lppl - 2) 420 ALLOCATE (pab(ni, 1)) 421 pab = 0._dp 422 423 nthread = 1 424 ithread = 0 425 426 CALL reallocate(cores, 1, natom) 427 npme = 0 428 cores = 0 429 430 ! prepare core function 431 DO j = 1, lppl 432 SELECT CASE (j) 433 CASE (1) 434 pab(1, 1) = cexp_ppl(1) 435 CASE (2) 436 n = coset(2, 0, 0) 437 pab(n, 1) = cexp_ppl(2) 438 n = coset(0, 2, 0) 439 pab(n, 1) = cexp_ppl(2) 440 n = coset(0, 0, 2) 441 pab(n, 1) = cexp_ppl(2) 442 CASE (3) 443 n = coset(4, 0, 0) 444 pab(n, 1) = cexp_ppl(3) 445 n = coset(0, 4, 0) 446 pab(n, 1) = cexp_ppl(3) 447 n = coset(0, 0, 4) 448 pab(n, 1) = cexp_ppl(3) 449 n = coset(2, 2, 0) 450 pab(n, 1) = 2._dp*cexp_ppl(3) 451 n = coset(2, 0, 2) 452 pab(n, 1) = 2._dp*cexp_ppl(3) 453 n = coset(0, 2, 2) 454 pab(n, 1) = 2._dp*cexp_ppl(3) 455 CASE (4) 456 n = coset(6, 0, 0) 457 pab(n, 1) = cexp_ppl(4) 458 n = coset(0, 6, 0) 459 pab(n, 1) = cexp_ppl(4) 460 n = coset(0, 0, 6) 461 pab(n, 1) = cexp_ppl(4) 462 n = coset(4, 2, 0) 463 pab(n, 1) = 3._dp*cexp_ppl(4) 464 n = coset(4, 0, 2) 465 pab(n, 1) = 3._dp*cexp_ppl(4) 466 n = coset(2, 4, 0) 467 pab(n, 1) = 3._dp*cexp_ppl(4) 468 n = coset(2, 0, 4) 469 pab(n, 1) = 3._dp*cexp_ppl(4) 470 n = coset(0, 4, 2) 471 pab(n, 1) = 3._dp*cexp_ppl(4) 472 n = coset(0, 2, 4) 473 pab(n, 1) = 3._dp*cexp_ppl(4) 474 n = coset(2, 2, 2) 475 pab(n, 1) = 6._dp*cexp_ppl(4) 476 CASE DEFAULT 477 CPABORT("") 478 END SELECT 479 END DO 480 481 DO iatom = 1, natom 482 atom_a = atom_list(iatom) 483 ra(:) = pbc(particle_set(atom_a)%r, cell) 484 IF (rs_rho%desc%parallel .AND. .NOT. rs_rho%desc%distributed) THEN 485 ! replicated realspace grid, split the atoms up between procs 486 IF (MODULO(iatom, rs_rho%desc%group_size) == rs_rho%desc%my_pos) THEN 487 npme = npme + 1 488 cores(npme) = iatom 489 ENDIF 490 ELSE 491 npme = npme + 1 492 cores(npme) = iatom 493 ENDIF 494 END DO 495 496 IF (npme .GT. 0) THEN 497 DO j = 1, npme 498 499 iatom = cores(j) 500 atom_a = atom_list(iatom) 501 ra(:) = pbc(particle_set(atom_a)%r, cell) 502 subpatch_pattern = 0 503 ni = 2*lppl - 2 504 CALL collocate_pgf_product_rspace(ni, alpha, 0, 0, 0.0_dp, 0, ra, & 505 (/0.0_dp, 0.0_dp, 0.0_dp/), 0.0_dp, 1.0_dp, pab, 0, 0, rs_rho, & 506 cell, cube_info, eps_rho_rspace, ga_gb_function=FUNC_AB, & 507 ithread=ithread, use_subpatch=.TRUE., subpatch_pattern=subpatch_pattern, & 508 lmax_global=lmax_global) 509 510 END DO 511 ENDIF 512 513 DEALLOCATE (pab) 514 515 END DO 516 517 IF (ASSOCIATED(cores)) THEN 518 DEALLOCATE (cores) 519 END IF 520 521 CALL rs_pw_transfer(rs_rho, vppl%pw, rs2pw) 522 CALL rs_grid_release(rs_rho) 523 524 CALL timestop(handle) 525 526 END SUBROUTINE calculate_ppl_grid 527 528! ************************************************************************************************** 529!> \brief Collocates the fitted lri density on a grid. 530!> \param lri_rho_g ... 531!> \param lri_rho_r ... 532!> \param qs_env ... 533!> \param lri_coef ... 534!> \param total_rho ... 535!> \param basis_type ... 536!> \param exact_1c_terms ... 537!> \param pmat replicated block diagonal density matrix (optional) 538!> \param atomlist list of atoms to be included (optional) 539!> \par History 540!> 04.2013 541!> \author Dorothea Golze 542! ************************************************************************************************** 543 SUBROUTINE calculate_lri_rho_elec(lri_rho_g, lri_rho_r, qs_env, & 544 lri_coef, total_rho, basis_type, exact_1c_terms, pmat, atomlist) 545 546 TYPE(pw_p_type), INTENT(INOUT) :: lri_rho_g, lri_rho_r 547 TYPE(qs_environment_type), POINTER :: qs_env 548 TYPE(lri_kind_type), DIMENSION(:), POINTER :: lri_coef 549 REAL(KIND=dp), INTENT(OUT) :: total_rho 550 CHARACTER(len=*), INTENT(IN) :: basis_type 551 LOGICAL, INTENT(IN) :: exact_1c_terms 552 TYPE(dbcsr_type), OPTIONAL :: pmat 553 INTEGER, DIMENSION(:), OPTIONAL :: atomlist 554 555 CHARACTER(len=*), PARAMETER :: routineN = 'calculate_lri_rho_elec', & 556 routineP = moduleN//':'//routineN 557 558 INTEGER :: atom_a, dir, group_size, handle, iatom, igrid_level, ikind, ipgf, iset, jpgf, & 559 jset, lmax_global, m1, maxco, maxpgf, maxset, maxsgf, maxsgf_set, my_pos, na1, natom, & 560 nb1, ncoa, ncob, nseta, offset, sgfa, sgfb 561 INTEGER, DIMENSION(3) :: lb, location, tp, ub 562 INTEGER, DIMENSION(:), POINTER :: atom_list, la_max, la_min, mylmax, & 563 npgfa, nsgfa 564 INTEGER, DIMENSION(:, :), POINTER :: first_sgfa 565 LOGICAL :: found, map_consistent 566 LOGICAL, ALLOCATABLE, DIMENSION(:) :: map_it 567 LOGICAL, ALLOCATABLE, DIMENSION(:, :) :: map_it2 568 REAL(KIND=dp) :: eps_rho_rspace, rab2, zetp 569 REAL(KIND=dp), DIMENSION(3) :: ra, rab 570 REAL(KIND=dp), DIMENSION(:), POINTER :: aci 571 REAL(KIND=dp), DIMENSION(:, :), POINTER :: p_block, pab, sphi_a, work, zeta 572 TYPE(atomic_kind_type), DIMENSION(:), POINTER :: atomic_kind_set 573 TYPE(cell_type), POINTER :: cell 574 TYPE(cube_info_type), DIMENSION(:), POINTER :: cube_info 575 TYPE(dft_control_type), POINTER :: dft_control 576 TYPE(gridlevel_info_type), POINTER :: gridlevel_info 577 TYPE(gto_basis_set_type), POINTER :: lri_basis_set, orb_basis_set 578 TYPE(particle_type), DIMENSION(:), POINTER :: particle_set 579 TYPE(pw_env_type), POINTER :: pw_env 580 TYPE(pw_p_type), DIMENSION(:), POINTER :: mgrid_gspace, mgrid_rspace 581 TYPE(pw_pool_p_type), DIMENSION(:), POINTER :: pw_pools 582 TYPE(qs_kind_type), DIMENSION(:), POINTER :: qs_kind_set 583 TYPE(qs_kind_type), POINTER :: qs_kind 584 TYPE(realspace_grid_p_type), DIMENSION(:), POINTER :: rs_rho 585 TYPE(realspace_grid_type), POINTER :: rs_grid 586 587 NULLIFY (aci, atomic_kind_set, qs_kind_set, atom_list, cell, cube_info, & 588 dft_control, first_sgfa, gridlevel_info, la_max, la_min, lri_basis_set, & 589 mgrid_gspace, mgrid_rspace, npgfa, nsgfa, pab, & 590 particle_set, pw_env, pw_pools, rs_grid, rs_rho, sphi_a, & 591 work, zeta, mylmax) 592 593 CALL timeset(routineN, handle) 594 595 IF (exact_1c_terms) THEN 596 CPASSERT(PRESENT(pmat)) 597 END IF 598 599 CALL get_qs_env(qs_env=qs_env, qs_kind_set=qs_kind_set, & 600 atomic_kind_set=atomic_kind_set, & 601 cell=cell, particle_set=particle_set, & 602 pw_env=pw_env, & 603 dft_control=dft_control) 604 605 cube_info => pw_env%cube_info 606 eps_rho_rspace = dft_control%qs_control%eps_rho_rspace 607 map_consistent = dft_control%qs_control%map_consistent 608 gridlevel_info => pw_env%gridlevel_info 609 610 ! *** set up the pw multi-grids *** ! 611 CPASSERT(ASSOCIATED(pw_env)) 612 CALL pw_env_get(pw_env=pw_env, rs_grids=rs_rho, pw_pools=pw_pools) 613 614 CALL pw_pools_create_pws(pw_pools, mgrid_rspace, & 615 use_data=REALDATA3D, & 616 in_space=REALSPACE) 617 618 CALL pw_pools_create_pws(pw_pools, mgrid_gspace, & 619 use_data=COMPLEXDATA1D, & 620 in_space=RECIPROCALSPACE) 621 622 ! *** set up the rs multi-grids *** ! 623 DO igrid_level = 1, gridlevel_info%ngrid_levels 624 CALL rs_grid_retain(rs_rho(igrid_level)%rs_grid) 625 CALL rs_grid_zero(rs_rho(igrid_level)%rs_grid) 626 END DO 627 628 !take maxco from the LRI basis set! 629 CALL get_qs_kind_set(qs_kind_set=qs_kind_set, & 630 maxco=maxco, basis_type=basis_type) 631 632 ALLOCATE (pab(maxco, 1)) 633 offset = 0 634 my_pos = mgrid_rspace(1)%pw%pw_grid%para%my_pos 635 group_size = mgrid_rspace(1)%pw%pw_grid%para%group_size 636 637 !Find lmax_global across atomic_kind_set 638 lmax_global = 0 639 DO ikind = 1, SIZE(atomic_kind_set) 640 CALL get_qs_kind(qs_kind_set(ikind), basis_set=lri_basis_set, basis_type=basis_type) 641 CALL get_gto_basis_set(gto_basis_set=lri_basis_set, lmax=mylmax) 642 lmax_global = MAX(MAXVAL(mylmax), lmax_global) 643 END DO 644 645 DO ikind = 1, SIZE(atomic_kind_set) 646 647 CALL get_atomic_kind(atomic_kind_set(ikind), natom=natom, atom_list=atom_list) 648 CALL get_qs_kind(qs_kind_set(ikind), basis_set=lri_basis_set, basis_type=basis_type) 649 650 !Take the lri basis set here! 651 CALL get_gto_basis_set(gto_basis_set=lri_basis_set, lmax=la_max, & 652 lmin=la_min, zet=zeta, nset=nseta, npgf=npgfa, & 653 sphi=sphi_a, first_sgf=first_sgfa, nsgf_set=nsgfa) 654 655 DO iatom = 1, natom 656 atom_a = atom_list(iatom) 657 IF (PRESENT(ATOMLIST)) THEN 658 IF (atomlist(atom_a) == 0) CYCLE 659 END IF 660 ra(:) = pbc(particle_set(atom_a)%r, cell) 661 aci => lri_coef(ikind)%acoef(iatom, :) 662 663 m1 = MAXVAL(npgfa(1:nseta)) 664 ALLOCATE (map_it(m1)) 665 DO iset = 1, nseta 666 ! collocate this set locally? 667 map_it = .FALSE. 668 DO ipgf = 1, npgfa(iset) 669 igrid_level = gaussian_gridlevel(gridlevel_info, zeta(ipgf, iset)) 670 rs_grid => rs_rho(igrid_level)%rs_grid 671 IF (.NOT. ALL(rs_grid%desc%perd == 1)) THEN 672 DO dir = 1, 3 673 ! bounds of local grid (i.e. removing the 'wings'), if periodic 674 tp(dir) = FLOOR(DOT_PRODUCT(cell%h_inv(dir, :), ra)*rs_grid%desc%npts(dir)) 675 tp(dir) = MODULO(tp(dir), rs_grid%desc%npts(dir)) 676 IF (rs_grid%desc%perd(dir) .NE. 1) THEN 677 lb(dir) = rs_grid%lb_local(dir) + rs_grid%desc%border 678 ub(dir) = rs_grid%ub_local(dir) - rs_grid%desc%border 679 ELSE 680 lb(dir) = rs_grid%lb_local(dir) 681 ub(dir) = rs_grid%ub_local(dir) 682 ENDIF 683 ! distributed grid, only map if it is local to the grid 684 location(dir) = tp(dir) + rs_grid%desc%lb(dir) 685 ENDDO 686 IF (lb(1) <= location(1) .AND. location(1) <= ub(1) .AND. & 687 lb(2) <= location(2) .AND. location(2) <= ub(2) .AND. & 688 lb(3) <= location(3) .AND. location(3) <= ub(3)) THEN 689 map_it(ipgf) = .TRUE. 690 ENDIF 691 ELSE 692 ! not distributed, just a round-robin distribution over the full set of CPUs 693 IF (MODULO(offset, group_size) == my_pos) map_it(ipgf) = .TRUE. 694 ENDIF 695 END DO 696 offset = offset + 1 697 698 IF (ANY(map_it(1:npgfa(iset)))) THEN 699 sgfa = first_sgfa(1, iset) 700 ncoa = npgfa(iset)*ncoset(la_max(iset)) 701 m1 = sgfa + nsgfa(iset) - 1 702 ALLOCATE (work(nsgfa(iset), 1)) 703 work(1:nsgfa(iset), 1) = aci(sgfa:m1) 704 pab = 0._dp 705 706 CALL dgemm("N", "N", ncoa, 1, nsgfa(iset), 1.0_dp, lri_basis_set%sphi(1, sgfa), & 707 SIZE(lri_basis_set%sphi, 1), work(1, 1), SIZE(work, 1), 0.0_dp, pab(1, 1), & 708 SIZE(pab, 1)) 709 710 DO ipgf = 1, npgfa(iset) 711 na1 = (ipgf - 1)*ncoset(la_max(iset)) 712 igrid_level = gaussian_gridlevel(gridlevel_info, zeta(ipgf, iset)) 713 rs_grid => rs_rho(igrid_level)%rs_grid 714 IF (map_it(ipgf)) THEN 715 CALL collocate_pgf_product_rspace(la_max=la_max(iset), & 716 zeta=zeta(ipgf, iset), & 717 la_min=la_min(iset), & 718 lb_max=0, zetb=0.0_dp, lb_min=0, & 719 ra=ra, rab=(/0.0_dp, 0.0_dp, 0.0_dp/), & 720 rab2=0.0_dp, scale=1._dp, & 721 pab=pab, o1=na1, o2=0, & 722 rsgrid=rs_grid, cell=cell, & 723 cube_info=cube_info(igrid_level), & 724 eps_rho_rspace=eps_rho_rspace, & 725 ga_gb_function=FUNC_AB, & 726 map_consistent=map_consistent, & 727 lmax_global=lmax_global) 728 ENDIF 729 END DO 730 DEALLOCATE (work) 731 END IF 732 END DO 733 DEALLOCATE (map_it) 734 END DO 735 END DO 736 737 DEALLOCATE (pab) 738 739 ! process the one-center terms 740 IF (exact_1c_terms) THEN 741 ! find maximum numbers 742 maxset = 0 743 maxpgf = 0 744 lmax_global = 0 745 offset = 0 746 DO ikind = 1, SIZE(qs_kind_set) 747 qs_kind => qs_kind_set(ikind) 748 CALL get_qs_kind(qs_kind=qs_kind, & 749 basis_set=orb_basis_set, basis_type="ORB") 750 IF (.NOT. ASSOCIATED(orb_basis_set)) CYCLE 751 CALL get_gto_basis_set(gto_basis_set=orb_basis_set, & 752 npgf=npgfa, nset=nseta, lmax=mylmax) 753 maxset = MAX(nseta, maxset) 754 maxpgf = MAX(MAXVAL(npgfa), maxpgf) 755 lmax_global = MAX(MAXVAL(mylmax), lmax_global) 756 END DO 757 CALL get_qs_kind_set(qs_kind_set=qs_kind_set, & 758 maxco=maxco, & 759 maxsgf=maxsgf, & 760 maxsgf_set=maxsgf_set, & 761 basis_type="ORB") 762 ALLOCATE (pab(maxco, maxco), work(maxco, maxsgf_set)) 763 764 DO ikind = 1, SIZE(atomic_kind_set) 765 CALL get_atomic_kind(atomic_kind_set(ikind), natom=natom, atom_list=atom_list) 766 CALL get_qs_kind(qs_kind_set(ikind), basis_set=orb_basis_set, basis_type="ORB") 767 CALL get_gto_basis_set(gto_basis_set=orb_basis_set, lmax=la_max, & 768 lmin=la_min, zet=zeta, nset=nseta, npgf=npgfa, & 769 sphi=sphi_a, first_sgf=first_sgfa, nsgf_set=nsgfa) 770 DO iatom = 1, natom 771 atom_a = atom_list(iatom) 772 ra(:) = pbc(particle_set(atom_a)%r, cell) 773 rab(:) = 0.0_dp 774 rab2 = 0.0_dp 775 CALL dbcsr_get_block_p(matrix=pmat, row=atom_a, col=atom_a, BLOCK=p_block, found=found) 776 m1 = MAXVAL(npgfa(1:nseta)) 777 ALLOCATE (map_it2(m1, m1)) 778 DO iset = 1, nseta 779 DO jset = 1, nseta 780 ! processor mappint 781 map_it2 = .FALSE. 782 DO ipgf = 1, npgfa(iset) 783 DO jpgf = 1, npgfa(jset) 784 zetp = zeta(ipgf, iset) + zeta(jpgf, jset) 785 igrid_level = gaussian_gridlevel(gridlevel_info, zetp) 786 rs_grid => rs_rho(igrid_level)%rs_grid 787 IF (.NOT. ALL(rs_grid%desc%perd == 1)) THEN 788 DO dir = 1, 3 789 ! bounds of local grid (i.e. removing the 'wings'), if periodic 790 tp(dir) = FLOOR(DOT_PRODUCT(cell%h_inv(dir, :), ra)*rs_grid%desc%npts(dir)) 791 tp(dir) = MODULO(tp(dir), rs_grid%desc%npts(dir)) 792 IF (rs_grid%desc%perd(dir) .NE. 1) THEN 793 lb(dir) = rs_grid%lb_local(dir) + rs_grid%desc%border 794 ub(dir) = rs_grid%ub_local(dir) - rs_grid%desc%border 795 ELSE 796 lb(dir) = rs_grid%lb_local(dir) 797 ub(dir) = rs_grid%ub_local(dir) 798 ENDIF 799 ! distributed grid, only map if it is local to the grid 800 location(dir) = tp(dir) + rs_grid%desc%lb(dir) 801 ENDDO 802 IF (lb(1) <= location(1) .AND. location(1) <= ub(1) .AND. & 803 lb(2) <= location(2) .AND. location(2) <= ub(2) .AND. & 804 lb(3) <= location(3) .AND. location(3) <= ub(3)) THEN 805 map_it2(ipgf, jpgf) = .TRUE. 806 ENDIF 807 ELSE 808 ! not distributed, just a round-robin distribution over the full set of CPUs 809 IF (MODULO(offset, group_size) == my_pos) map_it2(ipgf, jpgf) = .TRUE. 810 ENDIF 811 END DO 812 END DO 813 offset = offset + 1 814 ! 815 IF (ANY(map_it2(1:npgfa(iset), 1:npgfa(jset)))) THEN 816 ncoa = npgfa(iset)*ncoset(la_max(iset)) 817 sgfa = first_sgfa(1, iset) 818 ncob = npgfa(jset)*ncoset(la_max(jset)) 819 sgfb = first_sgfa(1, jset) 820 ! decontract density block 821 CALL dgemm("N", "N", ncoa, nsgfa(jset), nsgfa(iset), & 822 1.0_dp, sphi_a(1, sgfa), SIZE(sphi_a, 1), & 823 p_block(sgfa, sgfb), SIZE(p_block, 1), & 824 0.0_dp, work(1, 1), maxco) 825 CALL dgemm("N", "T", ncoa, ncob, nsgfa(jset), & 826 1.0_dp, work(1, 1), maxco, & 827 sphi_a(1, sgfb), SIZE(sphi_a, 1), & 828 0.0_dp, pab(1, 1), maxco) 829 DO ipgf = 1, npgfa(iset) 830 DO jpgf = 1, npgfa(jset) 831 zetp = zeta(ipgf, iset) + zeta(jpgf, jset) 832 igrid_level = gaussian_gridlevel(gridlevel_info, zetp) 833 rs_grid => rs_rho(igrid_level)%rs_grid 834 835 na1 = (ipgf - 1)*ncoset(la_max(iset)) 836 nb1 = (jpgf - 1)*ncoset(la_max(jset)) 837 838 IF (map_it2(ipgf, jpgf)) THEN 839 CALL collocate_pgf_product_rspace( & 840 la_max(iset), zeta(ipgf, iset), la_min(iset), & 841 la_max(jset), zeta(jpgf, jset), la_min(jset), & 842 ra, rab, rab2, 1.0_dp, pab, na1, nb1, & 843 rs_grid, cell, cube_info(igrid_level), & 844 eps_rho_rspace, ga_gb_function=FUNC_AB, & 845 map_consistent=map_consistent, lmax_global=lmax_global) 846 END IF 847 END DO 848 END DO 849 END IF 850 END DO 851 END DO 852 DEALLOCATE (map_it2) 853 ! 854 END DO 855 END DO 856 DEALLOCATE (pab, work) 857 END IF 858 859 CALL pw_zero(lri_rho_g%pw) 860 CALL pw_zero(lri_rho_r%pw) 861 862 DO igrid_level = 1, gridlevel_info%ngrid_levels 863 CALL pw_zero(mgrid_rspace(igrid_level)%pw) 864 CALL rs_pw_transfer(rs=rs_rho(igrid_level)%rs_grid, & 865 pw=mgrid_rspace(igrid_level)%pw, dir=rs2pw) 866 CALL rs_grid_release(rs_rho(igrid_level)%rs_grid) 867 END DO 868 869 DO igrid_level = 1, gridlevel_info%ngrid_levels 870 CALL pw_zero(mgrid_gspace(igrid_level)%pw) 871 CALL pw_transfer(mgrid_rspace(igrid_level)%pw, & 872 mgrid_gspace(igrid_level)%pw) 873 CALL pw_axpy(mgrid_gspace(igrid_level)%pw, lri_rho_g%pw) 874 END DO 875 CALL pw_transfer(lri_rho_g%pw, lri_rho_r%pw) 876 total_rho = pw_integrate_function(lri_rho_r%pw, isign=-1) 877 878 ! *** give back the multi-grids *** ! 879 CALL pw_pools_give_back_pws(pw_pools, mgrid_gspace) 880 CALL pw_pools_give_back_pws(pw_pools, mgrid_rspace) 881 882 CALL timestop(handle) 883 884 END SUBROUTINE calculate_lri_rho_elec 885 886! ************************************************************************************************** 887!> \brief computes the density of the core charges on the grid 888!> \param rho_core ... 889!> \param total_rho ... 890!> \param qs_env ... 891!> \param only_nopaw ... 892! ************************************************************************************************** 893 SUBROUTINE calculate_rho_core(rho_core, total_rho, qs_env, only_nopaw) 894 895 TYPE(pw_p_type), INTENT(INOUT) :: rho_core 896 REAL(KIND=dp), INTENT(OUT) :: total_rho 897 TYPE(qs_environment_type), POINTER :: qs_env 898 LOGICAL, INTENT(IN), OPTIONAL :: only_nopaw 899 900 CHARACTER(len=*), PARAMETER :: routineN = 'calculate_rho_core', & 901 routineP = moduleN//':'//routineN 902 903 INTEGER :: atom_a, handle, iatom, ikind, ithread, & 904 j, natom, npme, nthread 905 INTEGER(KIND=int_8) :: subpatch_pattern 906 INTEGER, DIMENSION(:), POINTER :: atom_list, cores 907 LOGICAL :: my_only_nopaw, paw_atom 908 REAL(KIND=dp) :: alpha, eps_rho_rspace 909 REAL(KIND=dp), DIMENSION(3) :: ra 910 REAL(KIND=dp), DIMENSION(:, :), POINTER :: pab 911 TYPE(atomic_kind_type), DIMENSION(:), POINTER :: atomic_kind_set 912 TYPE(cell_type), POINTER :: cell 913 TYPE(cube_info_type) :: cube_info 914 TYPE(dft_control_type), POINTER :: dft_control 915 TYPE(particle_type), DIMENSION(:), POINTER :: particle_set 916 TYPE(pw_env_type), POINTER :: pw_env 917 TYPE(pw_p_type) :: rhoc_r 918 TYPE(pw_pool_type), POINTER :: auxbas_pw_pool 919 TYPE(qs_kind_type), DIMENSION(:), POINTER :: qs_kind_set 920 TYPE(realspace_grid_type), POINTER :: rs_rho 921 922 CALL timeset(routineN, handle) 923 NULLIFY (cell, dft_control, pab, atomic_kind_set, qs_kind_set, particle_set, & 924 atom_list, pw_env, rs_rho, auxbas_pw_pool, cores) 925 ALLOCATE (pab(1, 1)) 926 927 my_only_nopaw = .FALSE. 928 IF (PRESENT(only_nopaw)) my_only_nopaw = only_nopaw 929 930 CALL get_qs_env(qs_env=qs_env, & 931 atomic_kind_set=atomic_kind_set, & 932 qs_kind_set=qs_kind_set, & 933 cell=cell, & 934 dft_control=dft_control, & 935 particle_set=particle_set, & 936 pw_env=pw_env) 937 CALL pw_env_get(pw_env, auxbas_rs_grid=rs_rho, & 938 auxbas_pw_pool=auxbas_pw_pool) 939 cube_info = pw_env%cube_info(1) 940 ! be careful in parallel nsmax is choosen with multigrid in mind! 941 CALL rs_grid_retain(rs_rho) 942 CALL rs_grid_zero(rs_rho) 943 944 eps_rho_rspace = dft_control%qs_control%eps_rho_rspace 945 946 DO ikind = 1, SIZE(atomic_kind_set) 947 CALL get_atomic_kind(atomic_kind_set(ikind), natom=natom, atom_list=atom_list) 948 CALL get_qs_kind(qs_kind_set(ikind), paw_atom=paw_atom, & 949 alpha_core_charge=alpha, ccore_charge=pab(1, 1)) 950 951 IF (my_only_nopaw .AND. paw_atom) CYCLE 952 IF (alpha == 0.0_dp .OR. pab(1, 1) == 0.0_dp) CYCLE 953 954 nthread = 1 955 ithread = 0 956 957 CALL reallocate(cores, 1, natom) 958 npme = 0 959 cores = 0 960 961 DO iatom = 1, natom 962 IF (rs_rho%desc%parallel .AND. .NOT. rs_rho%desc%distributed) THEN 963 ! replicated realspace grid, split the atoms up between procs 964 IF (MODULO(iatom, rs_rho%desc%group_size) == rs_rho%desc%my_pos) THEN 965 npme = npme + 1 966 cores(npme) = iatom 967 ENDIF 968 ELSE 969 npme = npme + 1 970 cores(npme) = iatom 971 ENDIF 972 END DO 973 974 IF (npme .GT. 0) THEN 975 DO j = 1, npme 976 977 iatom = cores(j) 978 atom_a = atom_list(iatom) 979 ra(:) = pbc(particle_set(atom_a)%r, cell) 980 subpatch_pattern = 0 981 !lmax == 0 so set lmax_global to 0 982 CALL collocate_pgf_product_rspace(0, alpha, 0, 0, 0.0_dp, 0, ra, & 983 (/0.0_dp, 0.0_dp, 0.0_dp/), 0.0_dp, -1.0_dp, pab, 0, 0, rs_rho, & 984 cell, cube_info, eps_rho_rspace, ga_gb_function=FUNC_AB, & 985 ithread=ithread, use_subpatch=.TRUE., subpatch_pattern=subpatch_pattern, & 986 lmax_global=0) 987 988 END DO 989 ENDIF 990 991 END DO 992 993 IF (ASSOCIATED(cores)) THEN 994 DEALLOCATE (cores) 995 END IF 996 DEALLOCATE (pab) 997 998 CALL pw_pool_create_pw(auxbas_pw_pool, rhoc_r%pw, & 999 use_data=REALDATA3D, in_space=REALSPACE) 1000 1001 CALL rs_pw_transfer(rs_rho, rhoc_r%pw, rs2pw) 1002 CALL rs_grid_release(rs_rho) 1003 1004 total_rho = pw_integrate_function(rhoc_r%pw, isign=-1) 1005 1006 CALL pw_transfer(rhoc_r%pw, rho_core%pw) 1007 1008 CALL pw_pool_give_back_pw(auxbas_pw_pool, rhoc_r%pw) 1009 1010 CALL timestop(handle) 1011 1012 END SUBROUTINE calculate_rho_core 1013 1014! ************************************************************************************************** 1015!> \brief collocate a single Gaussian on the grid 1016!> \param rho_gb charge density generated by a single gaussian 1017!> \param qs_env qs environment 1018!> \param iatom_in atom index 1019!> \par History 1020!> 12.2011 created 1021!> \author Dorothea Golze 1022! ************************************************************************************************** 1023 SUBROUTINE calculate_rho_single_gaussian(rho_gb, qs_env, iatom_in) 1024 1025 TYPE(pw_p_type), INTENT(INOUT) :: rho_gb 1026 TYPE(qs_environment_type), POINTER :: qs_env 1027 INTEGER, INTENT(IN) :: iatom_in 1028 1029 CHARACTER(len=*), PARAMETER :: routineN = 'calculate_rho_single_gaussian', & 1030 routineP = moduleN//':'//routineN 1031 1032 INTEGER :: atom_a, handle, iatom, npme 1033 INTEGER(KIND=int_8) :: subpatch_pattern 1034 REAL(KIND=dp) :: eps_rho_rspace 1035 REAL(KIND=dp), DIMENSION(3) :: ra 1036 REAL(KIND=dp), DIMENSION(:, :), POINTER :: pab 1037 TYPE(cell_type), POINTER :: cell 1038 TYPE(dft_control_type), POINTER :: dft_control 1039 TYPE(pw_env_type), POINTER :: pw_env 1040 TYPE(pw_p_type) :: rhoc_r 1041 TYPE(pw_pool_type), POINTER :: auxbas_pw_pool 1042 TYPE(realspace_grid_type), POINTER :: rs_rho 1043 1044 CALL timeset(routineN, handle) 1045 NULLIFY (cell, dft_control, pab, pw_env, rs_rho, auxbas_pw_pool) 1046 1047 ALLOCATE (pab(1, 1)) 1048 1049 CALL get_qs_env(qs_env=qs_env, & 1050 cell=cell, & 1051 dft_control=dft_control, & 1052 pw_env=pw_env) 1053 CALL pw_env_get(pw_env, auxbas_rs_grid=rs_rho, & 1054 auxbas_pw_pool=auxbas_pw_pool) 1055 CALL rs_grid_retain(rs_rho) 1056 CALL rs_grid_zero(rs_rho) 1057 1058 eps_rho_rspace = dft_control%qs_control%eps_rho_rspace 1059 pab(1, 1) = 1.0_dp 1060 iatom = iatom_in 1061 1062 npme = 0 1063 1064 IF (rs_rho%desc%parallel .AND. .NOT. rs_rho%desc%distributed) THEN 1065 IF (MODULO(iatom, rs_rho%desc%group_size) == rs_rho%desc%my_pos) THEN 1066 npme = npme + 1 1067 ENDIF 1068 ELSE 1069 npme = npme + 1 1070 ENDIF 1071 1072 IF (npme .GT. 0) THEN 1073 atom_a = qs_env%qmmm_env_qm%image_charge_pot%image_mm_list(iatom) 1074 ra(:) = pbc(qs_env%qmmm_env_qm%image_charge_pot%particles_all(atom_a)%r, cell) 1075 subpatch_pattern = 0 1076 !lmax == 0 so set lmax_global to 0 1077 CALL collocate_pgf_product_rspace(0, qs_env%qmmm_env_qm%image_charge_pot%eta, & 1078 0, 0, 0.0_dp, 0, ra, (/0.0_dp, 0.0_dp, 0.0_dp/), 0.0_dp, 1.0_dp, pab, 0, 0, rs_rho, & 1079 cell, pw_env%cube_info(1), eps_rho_rspace, ga_gb_function=FUNC_AB, & 1080 use_subpatch=.TRUE., subpatch_pattern=subpatch_pattern, & 1081 lmax_global=0) 1082 ENDIF 1083 1084 DEALLOCATE (pab) 1085 1086 CALL pw_pool_create_pw(auxbas_pw_pool, rhoc_r%pw, & 1087 use_data=REALDATA3D, in_space=REALSPACE) 1088 1089 CALL rs_pw_transfer(rs_rho, rhoc_r%pw, rs2pw) 1090 CALL rs_grid_release(rs_rho) 1091 1092 CALL pw_transfer(rhoc_r%pw, rho_gb%pw) 1093 1094 CALL pw_pool_give_back_pw(auxbas_pw_pool, rhoc_r%pw) 1095 1096 CALL timestop(handle) 1097 1098 END SUBROUTINE calculate_rho_single_gaussian 1099 1100! ************************************************************************************************** 1101!> \brief computes the image charge density on the grid (including coeffcients) 1102!> \param rho_metal image charge density 1103!> \param coeff expansion coefficients of the image charge density, i.e. 1104!> rho_metal=sum_a c_a*g_a 1105!> \param total_rho_metal total induced image charge density 1106!> \param qs_env qs environment 1107!> \par History 1108!> 01.2012 created 1109!> \author Dorothea Golze 1110! ************************************************************************************************** 1111 SUBROUTINE calculate_rho_metal(rho_metal, coeff, total_rho_metal, qs_env) 1112 1113 TYPE(pw_p_type), INTENT(INOUT) :: rho_metal 1114 REAL(KIND=dp), DIMENSION(:), POINTER :: coeff 1115 REAL(KIND=dp), INTENT(OUT), OPTIONAL :: total_rho_metal 1116 TYPE(qs_environment_type), POINTER :: qs_env 1117 1118 CHARACTER(len=*), PARAMETER :: routineN = 'calculate_rho_metal', & 1119 routineP = moduleN//':'//routineN 1120 1121 INTEGER :: atom_a, handle, iatom, j, natom, npme 1122 INTEGER(KIND=int_8) :: subpatch_pattern 1123 INTEGER, DIMENSION(:), POINTER :: cores 1124 REAL(KIND=dp) :: eps_rho_rspace 1125 REAL(KIND=dp), DIMENSION(3) :: ra 1126 REAL(KIND=dp), DIMENSION(:, :), POINTER :: pab 1127 TYPE(cell_type), POINTER :: cell 1128 TYPE(dft_control_type), POINTER :: dft_control 1129 TYPE(pw_env_type), POINTER :: pw_env 1130 TYPE(pw_p_type) :: rhoc_r 1131 TYPE(pw_pool_type), POINTER :: auxbas_pw_pool 1132 TYPE(realspace_grid_type), POINTER :: rs_rho 1133 1134 CALL timeset(routineN, handle) 1135 1136 NULLIFY (cell, dft_control, pab, pw_env, rs_rho, auxbas_pw_pool, cores) 1137 1138 ALLOCATE (pab(1, 1)) 1139 1140 CALL get_qs_env(qs_env=qs_env, & 1141 cell=cell, & 1142 dft_control=dft_control, & 1143 pw_env=pw_env) 1144 CALL pw_env_get(pw_env, auxbas_rs_grid=rs_rho, & 1145 auxbas_pw_pool=auxbas_pw_pool) 1146 CALL rs_grid_retain(rs_rho) 1147 CALL rs_grid_zero(rs_rho) 1148 1149 eps_rho_rspace = dft_control%qs_control%eps_rho_rspace 1150 pab(1, 1) = 1.0_dp 1151 1152 natom = SIZE(qs_env%qmmm_env_qm%image_charge_pot%image_mm_list) 1153 1154 CALL reallocate(cores, 1, natom) 1155 npme = 0 1156 cores = 0 1157 1158 DO iatom = 1, natom 1159 IF (rs_rho%desc%parallel .AND. .NOT. rs_rho%desc%distributed) THEN 1160 IF (MODULO(iatom, rs_rho%desc%group_size) == rs_rho%desc%my_pos) THEN 1161 npme = npme + 1 1162 cores(npme) = iatom 1163 ENDIF 1164 ELSE 1165 npme = npme + 1 1166 cores(npme) = iatom 1167 ENDIF 1168 ENDDO 1169 1170 IF (npme .GT. 0) THEN 1171 DO j = 1, npme 1172 iatom = cores(j) 1173 atom_a = qs_env%qmmm_env_qm%image_charge_pot%image_mm_list(iatom) 1174 ra(:) = pbc(qs_env%qmmm_env_qm%image_charge_pot%particles_all(atom_a)%r, cell) 1175 subpatch_pattern = 0 1176 !lmax == 0 so set lmax_global to 0 1177 CALL collocate_pgf_product_rspace( & 1178 0, qs_env%qmmm_env_qm%image_charge_pot%eta, & 1179 0, 0, 0.0_dp, 0, ra, (/0.0_dp, 0.0_dp, 0.0_dp/), 0.0_dp, coeff(iatom), pab, 0, 0, rs_rho, & 1180 cell, pw_env%cube_info(1), eps_rho_rspace, ga_gb_function=FUNC_AB, & 1181 use_subpatch=.TRUE., subpatch_pattern=subpatch_pattern, lmax_global=0) 1182 ENDDO 1183 ENDIF 1184 1185 DEALLOCATE (pab, cores) 1186 1187 CALL pw_pool_create_pw(auxbas_pw_pool, rhoc_r%pw, & 1188 use_data=REALDATA3D, in_space=REALSPACE) 1189 1190 CALL rs_pw_transfer(rs_rho, rhoc_r%pw, rs2pw) 1191 CALL rs_grid_release(rs_rho) 1192 1193 IF (PRESENT(total_rho_metal)) & 1194 !minus sign: account for the fact that rho_metal has opposite sign 1195 total_rho_metal = pw_integrate_function(rhoc_r%pw, isign=-1) 1196 1197 CALL pw_transfer(rhoc_r%pw, rho_metal%pw) 1198 CALL pw_pool_give_back_pw(auxbas_pw_pool, rhoc_r%pw) 1199 1200 CALL timestop(handle) 1201 1202 END SUBROUTINE calculate_rho_metal 1203 1204! ************************************************************************************************** 1205!> \brief collocate a single Gaussian on the grid for periodic RESP fitting 1206!> \param rho_gb charge density generated by a single gaussian 1207!> \param qs_env qs environment 1208!> \param eta width of single Gaussian 1209!> \param iatom_in atom index 1210!> \par History 1211!> 06.2012 created 1212!> \author Dorothea Golze 1213! ************************************************************************************************** 1214 SUBROUTINE calculate_rho_resp_single(rho_gb, qs_env, eta, iatom_in) 1215 1216 TYPE(pw_p_type), INTENT(INOUT) :: rho_gb 1217 TYPE(qs_environment_type), POINTER :: qs_env 1218 REAL(KIND=dp), INTENT(IN) :: eta 1219 INTEGER, INTENT(IN) :: iatom_in 1220 1221 CHARACTER(len=*), PARAMETER :: routineN = 'calculate_rho_resp_single', & 1222 routineP = moduleN//':'//routineN 1223 1224 INTEGER :: handle, iatom, npme 1225 INTEGER(KIND=int_8) :: subpatch_pattern 1226 REAL(KIND=dp) :: eps_rho_rspace 1227 REAL(KIND=dp), DIMENSION(3) :: ra 1228 REAL(KIND=dp), DIMENSION(:, :), POINTER :: pab 1229 TYPE(cell_type), POINTER :: cell 1230 TYPE(dft_control_type), POINTER :: dft_control 1231 TYPE(particle_type), DIMENSION(:), POINTER :: particle_set 1232 TYPE(pw_env_type), POINTER :: pw_env 1233 TYPE(pw_p_type) :: rhoc_r 1234 TYPE(pw_pool_type), POINTER :: auxbas_pw_pool 1235 TYPE(realspace_grid_type), POINTER :: rs_rho 1236 1237 CALL timeset(routineN, handle) 1238 NULLIFY (cell, dft_control, pab, pw_env, rs_rho, auxbas_pw_pool, & 1239 particle_set) 1240 1241 ALLOCATE (pab(1, 1)) 1242 1243 CALL get_qs_env(qs_env=qs_env, & 1244 cell=cell, & 1245 dft_control=dft_control, & 1246 particle_set=particle_set, & 1247 pw_env=pw_env) 1248 CALL pw_env_get(pw_env, auxbas_rs_grid=rs_rho, & 1249 auxbas_pw_pool=auxbas_pw_pool) 1250 CALL rs_grid_retain(rs_rho) 1251 CALL rs_grid_zero(rs_rho) 1252 1253 eps_rho_rspace = dft_control%qs_control%eps_rho_rspace 1254 pab(1, 1) = 1.0_dp 1255 iatom = iatom_in 1256 1257 npme = 0 1258 1259 IF (rs_rho%desc%parallel .AND. .NOT. rs_rho%desc%distributed) THEN 1260 IF (MODULO(iatom, rs_rho%desc%group_size) == rs_rho%desc%my_pos) THEN 1261 npme = npme + 1 1262 ENDIF 1263 ELSE 1264 npme = npme + 1 1265 ENDIF 1266 1267 IF (npme .GT. 0) THEN 1268 ra(:) = pbc(particle_set(iatom)%r, cell) 1269 subpatch_pattern = 0 1270 ! lmax == 0 so set lmax_global to 0 1271 CALL collocate_pgf_product_rspace(0, eta, 0, 0, 0.0_dp, 0, ra, & 1272 (/0.0_dp, 0.0_dp, 0.0_dp/), 0.0_dp, 1.0_dp, pab, 0, 0, rs_rho, & 1273 cell, pw_env%cube_info(1), eps_rho_rspace, ga_gb_function=FUNC_AB, & 1274 use_subpatch=.TRUE., subpatch_pattern=subpatch_pattern, & 1275 lmax_global=0) 1276 ENDIF 1277 1278 DEALLOCATE (pab) 1279 1280 CALL pw_pool_create_pw(auxbas_pw_pool, rhoc_r%pw, & 1281 use_data=REALDATA3D, in_space=REALSPACE) 1282 1283 CALL rs_pw_transfer(rs_rho, rhoc_r%pw, rs2pw) 1284 CALL rs_grid_release(rs_rho) 1285 1286 CALL pw_transfer(rhoc_r%pw, rho_gb%pw) 1287 1288 CALL pw_pool_give_back_pw(auxbas_pw_pool, rhoc_r%pw) 1289 1290 CALL timestop(handle) 1291 1292 END SUBROUTINE calculate_rho_resp_single 1293 1294! ************************************************************************************************** 1295!> \brief computes the RESP charge density on a grid based on the RESP charges 1296!> \param rho_resp RESP charge density 1297!> \param coeff RESP charges, take care of normalization factor 1298!> (eta/pi)**1.5 later 1299!> \param natom number of atoms 1300!> \param eta width of single Gaussian 1301!> \param qs_env qs environment 1302!> \par History 1303!> 01.2012 created 1304!> \author Dorothea Golze 1305! ************************************************************************************************** 1306 SUBROUTINE calculate_rho_resp_all(rho_resp, coeff, natom, eta, qs_env) 1307 1308 TYPE(pw_p_type), INTENT(INOUT) :: rho_resp 1309 REAL(KIND=dp), DIMENSION(:), POINTER :: coeff 1310 INTEGER, INTENT(IN) :: natom 1311 REAL(KIND=dp), INTENT(IN) :: eta 1312 TYPE(qs_environment_type), POINTER :: qs_env 1313 1314 CHARACTER(len=*), PARAMETER :: routineN = 'calculate_rho_resp_all', & 1315 routineP = moduleN//':'//routineN 1316 1317 INTEGER :: handle, iatom, j, npme 1318 INTEGER(KIND=int_8) :: subpatch_pattern 1319 INTEGER, DIMENSION(:), POINTER :: cores 1320 REAL(KIND=dp) :: eps_rho_rspace 1321 REAL(KIND=dp), DIMENSION(3) :: ra 1322 REAL(KIND=dp), DIMENSION(:, :), POINTER :: pab 1323 TYPE(cell_type), POINTER :: cell 1324 TYPE(dft_control_type), POINTER :: dft_control 1325 TYPE(particle_type), DIMENSION(:), POINTER :: particle_set 1326 TYPE(pw_env_type), POINTER :: pw_env 1327 TYPE(pw_p_type) :: rhoc_r 1328 TYPE(pw_pool_type), POINTER :: auxbas_pw_pool 1329 TYPE(realspace_grid_type), POINTER :: rs_rho 1330 1331 CALL timeset(routineN, handle) 1332 1333 NULLIFY (cell, cores, dft_control, pab, pw_env, rs_rho, auxbas_pw_pool, & 1334 particle_set) 1335 1336 ALLOCATE (pab(1, 1)) 1337 1338 CALL get_qs_env(qs_env=qs_env, & 1339 cell=cell, & 1340 dft_control=dft_control, & 1341 particle_set=particle_set, & 1342 pw_env=pw_env) 1343 CALL pw_env_get(pw_env, auxbas_rs_grid=rs_rho, & 1344 auxbas_pw_pool=auxbas_pw_pool) 1345 CALL rs_grid_retain(rs_rho) 1346 CALL rs_grid_zero(rs_rho) 1347 1348 eps_rho_rspace = dft_control%qs_control%eps_rho_rspace 1349 pab(1, 1) = 1.0_dp 1350 1351 CALL reallocate(cores, 1, natom) 1352 npme = 0 1353 cores = 0 1354 1355 DO iatom = 1, natom 1356 IF (rs_rho%desc%parallel .AND. .NOT. rs_rho%desc%distributed) THEN 1357 IF (MODULO(iatom, rs_rho%desc%group_size) == rs_rho%desc%my_pos) THEN 1358 npme = npme + 1 1359 cores(npme) = iatom 1360 ENDIF 1361 ELSE 1362 npme = npme + 1 1363 cores(npme) = iatom 1364 ENDIF 1365 ENDDO 1366 1367 IF (npme .GT. 0) THEN 1368 DO j = 1, npme 1369 iatom = cores(j) 1370 ra(:) = pbc(particle_set(iatom)%r, cell) 1371 subpatch_pattern = 0 1372 ! la_max==0 so set lmax_global to 0 1373 CALL collocate_pgf_product_rspace( & 1374 0, eta, & 1375 0, 0, 0.0_dp, 0, ra, (/0.0_dp, 0.0_dp, 0.0_dp/), 0.0_dp, coeff(iatom), pab, 0, 0, rs_rho, & 1376 cell, pw_env%cube_info(1), eps_rho_rspace, ga_gb_function=FUNC_AB, & 1377 use_subpatch=.TRUE., subpatch_pattern=subpatch_pattern, lmax_global=0) 1378 ENDDO 1379 ENDIF 1380 1381 DEALLOCATE (pab, cores) 1382 1383 CALL pw_pool_create_pw(auxbas_pw_pool, rhoc_r%pw, & 1384 use_data=REALDATA3D, in_space=REALSPACE) 1385 1386 CALL rs_pw_transfer(rs_rho, rhoc_r%pw, rs2pw) 1387 CALL rs_grid_release(rs_rho) 1388 1389 CALL pw_transfer(rhoc_r%pw, rho_resp%pw) 1390 CALL pw_pool_give_back_pw(auxbas_pw_pool, rhoc_r%pw) 1391 1392 CALL timestop(handle) 1393 1394 END SUBROUTINE calculate_rho_resp_all 1395 1396! ************************************************************************************************** 1397!> \brief computes the density corresponding to a given density matrix on the grid 1398!> \param matrix_p ... 1399!> \param matrix_p_kp ... 1400!> \param rho ... 1401!> \param rho_gspace ... 1402!> \param total_rho ... 1403!> \param ks_env ... 1404!> \param soft_valid ... 1405!> \param compute_tau ... 1406!> \param compute_grad ... 1407!> \param basis_type ... 1408!> \param der_type ... 1409!> \param idir ... 1410!> \param task_list_external ... 1411!> \param pw_env_external ... 1412!> \par History 1413!> IAB (15-Feb-2010): Added OpenMP parallelisation to task loop 1414!> (c) The Numerical Algorithms Group (NAG) Ltd, 2010 on behalf of the HECToR project 1415!> \note 1416!> both rho and rho_gspace contain the new rho 1417!> (in real and g-space respectively) 1418! ************************************************************************************************** 1419 SUBROUTINE calculate_rho_elec(matrix_p, matrix_p_kp, rho, rho_gspace, total_rho, & 1420 ks_env, soft_valid, compute_tau, compute_grad, & 1421 basis_type, der_type, idir, task_list_external, pw_env_external) 1422 1423 TYPE(dbcsr_type), OPTIONAL, POINTER :: matrix_p 1424 TYPE(dbcsr_p_type), DIMENSION(:), OPTIONAL, & 1425 POINTER :: matrix_p_kp 1426 TYPE(pw_p_type), INTENT(INOUT) :: rho, rho_gspace 1427 REAL(KIND=dp), INTENT(OUT) :: total_rho 1428 TYPE(qs_ks_env_type), POINTER :: ks_env 1429 LOGICAL, INTENT(IN), OPTIONAL :: soft_valid, compute_tau, compute_grad 1430 CHARACTER(LEN=*), INTENT(IN), OPTIONAL :: basis_type 1431 INTEGER, INTENT(IN), OPTIONAL :: der_type, idir 1432 TYPE(task_list_type), OPTIONAL, POINTER :: task_list_external 1433 TYPE(pw_env_type), OPTIONAL, POINTER :: pw_env_external 1434 1435 CHARACTER(len=*), PARAMETER :: routineN = 'calculate_rho_elec', & 1436 routineP = moduleN//':'//routineN 1437 1438 CHARACTER(LEN=default_string_length) :: my_basis_type 1439 INTEGER :: bcol, brow, ga_gb_function, handle, iatom, iatom_old, igrid_level, & 1440 igrid_level_dummy, ikind, ikind_old, img, img_old, ipair, ipgf, iset, iset_old, itask, & 1441 ithread, jatom, jatom_old, jkind, jkind_old, jpgf, jset, jset_old, lb, lbr, lbw, & 1442 lmax_global, maxco, maxpgf, maxset, maxsgf, maxsgf_set, my_der_type, my_idir, n, na1, & 1443 na2, natoms, nb1, nb2, nblock, ncoa, ncob, nimages, nr, nrlevel, nseta, nsetb, ntasks, & 1444 nthread, nw, nxy, nz, nzsize, sgfa, sgfb, ub 1445 INTEGER(kind=int_8), DIMENSION(:), POINTER :: atom_pair_recv, atom_pair_send 1446 INTEGER(kind=int_8), DIMENSION(:, :), POINTER :: tasks 1447 INTEGER, DIMENSION(:), POINTER :: la_max, la_min, lb_max, lb_min, mylmax, & 1448 npgfa, npgfb, nsgfa, nsgfb 1449 INTEGER, DIMENSION(:, :), POINTER :: first_sgfa, first_sgfb 1450 LOGICAL :: atom_pair_changed, distributed_rs_grids, do_kp, found, map_consistent, & 1451 my_compute_grad, my_compute_tau, my_soft, use_subpatch 1452 REAL(KIND=dp) :: eps_rho_rspace, rab2, scale, zetp 1453 REAL(KIND=dp), DIMENSION(3) :: ra, rab, rab_inv, rb 1454 REAL(KIND=dp), DIMENSION(:, :), POINTER :: dist_ab, p_block, pab, sphi_a, sphi_b, & 1455 work, zeta, zetb 1456 REAL(KIND=dp), DIMENSION(:, :, :), POINTER :: pabt, workt 1457 TYPE(cell_type), POINTER :: cell 1458 TYPE(cube_info_type), DIMENSION(:), POINTER :: cube_info 1459 TYPE(dbcsr_p_type), DIMENSION(:), POINTER :: deltap 1460 TYPE(dft_control_type), POINTER :: dft_control 1461 TYPE(gridlevel_info_type), POINTER :: gridlevel_info 1462 TYPE(gto_basis_set_type), POINTER :: orb_basis_set 1463 TYPE(lgrid_type), POINTER :: lgrid 1464 TYPE(particle_type), DIMENSION(:), POINTER :: particle_set 1465 TYPE(pw_env_type), POINTER :: pw_env 1466 TYPE(qs_kind_type), DIMENSION(:), POINTER :: qs_kind_set 1467 TYPE(qs_kind_type), POINTER :: qs_kind 1468 TYPE(realspace_grid_desc_p_type), DIMENSION(:), & 1469 POINTER :: rs_descs 1470 TYPE(realspace_grid_p_type), DIMENSION(:), POINTER :: rs_rho 1471 TYPE(task_list_type), POINTER :: task_list, task_list_soft 1472 1473 CALL timeset(routineN, handle) 1474 1475 CPASSERT(PRESENT(matrix_p) .OR. PRESENT(matrix_p_kp)) 1476 do_kp = PRESENT(matrix_p_kp) 1477 1478 NULLIFY (qs_kind, cell, dft_control, orb_basis_set, deltap, & 1479 qs_kind_set, particle_set, rs_rho, pw_env, rs_descs, & 1480 dist_ab, la_max, la_min, & 1481 lb_max, lb_min, npgfa, npgfb, nsgfa, nsgfb, p_block, & 1482 sphi_a, sphi_b, zeta, zetb, first_sgfa, first_sgfb, tasks, pabt, & 1483 workt, lgrid, mylmax) 1484 1485 debug_count = debug_count + 1 1486 1487 ! by default, the full density is calculated 1488 my_soft = .FALSE. 1489 IF (PRESENT(soft_valid)) my_soft = soft_valid 1490 1491 ! by default, do not compute the kinetic energy density (tau) 1492 ! if compute_tau, all grids referencing to rho are actually tau 1493 IF (PRESENT(compute_tau)) THEN 1494 my_compute_tau = compute_tau 1495 ELSE 1496 my_compute_tau = .FALSE. 1497 ENDIF 1498 1499 IF (PRESENT(compute_grad)) THEN 1500 my_compute_grad = compute_grad 1501 ELSE 1502 my_compute_grad = .FALSE. 1503 ENDIF 1504 1505 my_der_type = 0 1506 my_idir = 0 1507 IF (PRESENT(der_type)) THEN 1508 my_der_type = der_type 1509 ga_gb_function = FUNC_DER(my_der_type) 1510 ELSE IF (my_compute_tau) THEN 1511 ga_gb_function = FUNC_DADB 1512 ELSE IF (my_compute_grad) THEN 1513 ga_gb_function = FUNC_DABpADB 1514 ELSE 1515 ga_gb_function = FUNC_AB 1516 ENDIF 1517 IF (PRESENT(idir)) my_idir = idir 1518 1519 IF (PRESENT(basis_type)) THEN 1520 my_basis_type = basis_type 1521 ELSE 1522 my_basis_type = "ORB" 1523 END IF 1524 1525 CALL get_ks_env(ks_env, & 1526 qs_kind_set=qs_kind_set, & 1527 cell=cell, & 1528 dft_control=dft_control, & 1529 particle_set=particle_set, & 1530 pw_env=pw_env) 1531 1532 SELECT CASE (my_basis_type) 1533 CASE ("ORB") 1534 CALL get_ks_env(ks_env, & 1535 task_list=task_list, & 1536 task_list_soft=task_list_soft) 1537 CASE ("AUX_FIT") 1538 CALL get_ks_env(ks_env, & 1539 task_list_aux_fit=task_list, & 1540 task_list_soft=task_list_soft) 1541 END SELECT 1542 1543 nimages = dft_control%nimages 1544 CPASSERT(nimages == 1 .OR. do_kp) 1545 1546 IF (PRESENT(pw_env_external)) pw_env => pw_env_external 1547 IF (PRESENT(task_list_external)) task_list => task_list_external 1548 1549 ! *** assign from pw_env 1550 gridlevel_info => pw_env%gridlevel_info 1551 cube_info => pw_env%cube_info 1552 1553 ! *** Allocate work storage *** 1554 nthread = 1 1555!$ nthread = omp_get_max_threads() 1556 CALL get_qs_kind_set(qs_kind_set=qs_kind_set, & 1557 maxco=maxco, & 1558 maxsgf=maxsgf, & 1559 maxsgf_set=maxsgf_set, & 1560 basis_type=my_basis_type) 1561 CALL reallocate(pabt, 1, maxco, 1, maxco, 0, nthread - 1) 1562 CALL reallocate(workt, 1, maxco, 1, maxsgf_set, 0, nthread - 1) 1563 1564 ! find maximum numbers 1565 natoms = SIZE(particle_set) 1566 maxset = 0 1567 maxpgf = 0 1568 lmax_global = 0 1569 1570 DO ikind = 1, SIZE(qs_kind_set) 1571 qs_kind => qs_kind_set(ikind) 1572 CALL get_qs_kind(qs_kind=qs_kind, & 1573 softb=my_soft, & 1574 basis_set=orb_basis_set, basis_type=my_basis_type) 1575 1576 IF (.NOT. ASSOCIATED(orb_basis_set)) CYCLE 1577 1578 CALL get_gto_basis_set(gto_basis_set=orb_basis_set, & 1579 npgf=npgfa, nset=nseta, lmax=mylmax) 1580 1581 maxset = MAX(nseta, maxset) 1582 maxpgf = MAX(MAXVAL(npgfa), maxpgf) 1583 lmax_global = MAX(MAXVAL(mylmax), lmax_global) 1584 END DO 1585 1586 ! Optionally increment lmax_global depending on which ga_gb_function 1587 ! will be used for collocation 1588 SELECT CASE (ga_gb_function) 1589 CASE (FUNC_DADB) 1590 lmax_global = lmax_global + 1 1591 CASE (FUNC_ADBmDAB) 1592 lmax_global = lmax_global + 1 1593 CASE (FUNC_DABpADB) 1594 lmax_global = lmax_global + 1 1595 CASE (FUNC_DX, FUNC_DY, FUNC_DZ) 1596 lmax_global = lmax_global + 1 1597 CASE (FUNC_DXDY, FUNC_DYDZ, FUNC_DZDX) 1598 lmax_global = lmax_global + 2 1599 CASE (FUNC_DXDX, FUNC_DYDY, FUNC_DZDZ) 1600 lmax_global = lmax_global + 2 1601 CASE (FUNC_ARDBmDARB) 1602 lmax_global = lmax_global + 2 1603 CASE (FUNC_ARB) 1604 lmax_global = lmax_global + 1 1605 CASE (FUNC_AB) 1606 lmax_global = lmax_global 1607 CASE DEFAULT 1608 CPABORT("") 1609 END SELECT 1610 1611 ! get the task lists 1612 IF (my_soft) task_list => task_list_soft 1613 CPASSERT(ASSOCIATED(task_list)) 1614 tasks => task_list%tasks 1615 dist_ab => task_list%dist_ab 1616 atom_pair_send => task_list%atom_pair_send 1617 atom_pair_recv => task_list%atom_pair_recv 1618 ntasks = task_list%ntasks 1619 1620 ! *** set up the pw multi-grids 1621 CPASSERT(ASSOCIATED(pw_env)) 1622 CALL pw_env_get(pw_env, rs_descs=rs_descs, rs_grids=rs_rho, lgrid=lgrid) 1623 ! *** set up the rs multi-grids 1624 CPASSERT(ASSOCIATED(rs_rho)) 1625 CPASSERT(ASSOCIATED(rs_descs)) 1626 CPASSERT(ASSOCIATED(lgrid)) 1627 distributed_rs_grids = .FALSE. 1628 1629 DO igrid_level = 1, gridlevel_info%ngrid_levels 1630 CALL rs_grid_retain(rs_rho(igrid_level)%rs_grid) 1631 CALL rs_grid_zero(rs_rho(igrid_level)%rs_grid) 1632 IF (rs_rho(igrid_level)%rs_grid%desc%distributed) THEN 1633 distributed_rs_grids = .TRUE. 1634 ENDIF 1635 END DO 1636 1637 IF (nthread > 1) THEN 1638 IF (.NOT. ASSOCIATED(lgrid%r)) THEN 1639 CALL lgrid_allocate_grid(lgrid, nthread) 1640 ENDIF 1641 END IF 1642 1643 eps_rho_rspace = dft_control%qs_control%eps_rho_rspace 1644 map_consistent = dft_control%qs_control%map_consistent 1645 1646 ! *** Initialize working density matrix *** 1647 ! distributed rs grids require a matrix that will be changed 1648 ! whereas this is not the case for replicated grids 1649 NULLIFY (deltap) 1650 CALL dbcsr_allocate_matrix_set(deltap, nimages) 1651 IF (distributed_rs_grids) THEN 1652 DO img = 1, nimages 1653 ALLOCATE (deltap(img)%matrix) 1654 END DO 1655 ! this matrix has no strict sparsity pattern in parallel 1656 ! deltap%sparsity_id=-1 1657 IF (do_kp) THEN 1658 DO img = 1, nimages 1659 CALL dbcsr_copy(deltap(img)%matrix, matrix_p_kp(img)%matrix, & 1660 name="DeltaP") 1661 END DO 1662 ELSE 1663 CALL dbcsr_copy(deltap(1)%matrix, matrix_p, name="DeltaP") 1664 END IF 1665 ELSE 1666 IF (do_kp) THEN 1667 DO img = 1, nimages 1668 deltap(img)%matrix => matrix_p_kp(img)%matrix 1669 END DO 1670 ELSE 1671 deltap(1)%matrix => matrix_p 1672 END IF 1673 ENDIF 1674 1675 ! distribute the matrix 1676 IF (distributed_rs_grids) THEN 1677 CALL rs_distribute_matrix(rs_descs, deltap, atom_pair_send, atom_pair_recv, & 1678 natoms, nimages, scatter=.TRUE.) 1679 ENDIF 1680 1681 ! map all tasks on the grids 1682 1683!$OMP PARALLEL DEFAULT(NONE), & 1684!$OMP SHARED(ntasks,tasks,nimages,natoms,maxset,maxpgf,particle_set,pabt,workt), & 1685!$OMP SHARED(my_basis_type,my_soft,deltap,maxco,dist_ab,ncoset,nthread), & 1686!$OMP SHARED(cell,cube_info,eps_rho_rspace,ga_gb_function, my_idir,map_consistent), & 1687!$OMP SHARED(rs_rho,lgrid,gridlevel_info,task_list,qs_kind_set,lmax_global), & 1688!$OMP PRIVATE(igrid_level,iatom,jatom,iset,jset,ipgf,jpgf,ikind,jkind,pab,work), & 1689!$OMP PRIVATE(img,img_old,iatom_old,jatom_old,iset_old,jset_old,ikind_old,jkind_old), & 1690!$OMP PRIVATE(brow,bcol,qs_kind,orb_basis_set,first_sgfa,la_max,la_min), & 1691!$OMP PRIVATE(npgfa,nseta,nsgfa,sphi_a,zeta,first_sgfb,lb_max,lb_min,npgfb), & 1692!$OMP PRIVATE(nsetb,nsgfb,sphi_b,zetb,p_block,found), & 1693!$OMP PRIVATE(atom_pair_changed,ncoa,sgfa,ncob,sgfb,rab,rab2,ra,rb,zetp), & 1694!$OMP PRIVATE(na1,na2,nb1,nb2,scale,use_subpatch,rab_inv,ithread,lb,ub,n,nw), & 1695!$OMP PRIVATE(itask,nz,nxy,nzsize,nrlevel,nblock,lbw,lbr,nr,igrid_level_dummy) 1696 1697 ithread = 0 1698!$ ithread = omp_get_thread_num() 1699 pab => pabt(:, :, ithread) 1700 work => workt(:, :, ithread) 1701 1702 iatom_old = -1; jatom_old = -1; iset_old = -1; jset_old = -1 1703 ikind_old = -1; jkind_old = -1; img_old = -1 1704 1705 ! Loop over each gridlevel first, then loop and load balance over atom pairs 1706 ! We only need a single lgrid, which we sum back onto the rsgrid after each 1707 ! grid level is completed 1708 loop_gridlevels: DO igrid_level = 1, gridlevel_info%ngrid_levels 1709 1710 ! Only zero the region of the lgrid required for this grid level 1711 IF (nthread > 1) THEN 1712 lgrid%r(1:rs_rho(igrid_level)%rs_grid%ngpts_local, ithread) = 0._dp 1713 END IF 1714!$OMP DO schedule(dynamic, MAX(1,task_list%npairs(igrid_level)/(nthread*50))) 1715 loop_pairs: DO ipair = 1, task_list%npairs(igrid_level) 1716 loop_tasks: DO itask = task_list%taskstart(ipair, igrid_level), task_list%taskstop(ipair, igrid_level) 1717 !decode the atom pair and basis info (igrid_level_dummy equals do loop variable by construction). 1718 CALL int2pair(tasks(3, itask), igrid_level_dummy, img, iatom, jatom, iset, jset, ipgf, jpgf, & 1719 nimages, natoms, maxset, maxpgf) 1720 ikind = particle_set(iatom)%atomic_kind%kind_number 1721 jkind = particle_set(jatom)%atomic_kind%kind_number 1722 1723 IF (iatom .NE. iatom_old .OR. jatom .NE. jatom_old .OR. img .NE. img_old) THEN 1724 1725 IF (iatom .NE. iatom_old) ra(:) = pbc(particle_set(iatom)%r, cell) 1726 1727 IF (iatom <= jatom) THEN 1728 brow = iatom 1729 bcol = jatom 1730 ELSE 1731 brow = jatom 1732 bcol = iatom 1733 END IF 1734 1735 IF (ikind .NE. ikind_old) THEN 1736 CALL get_qs_kind(qs_kind_set(ikind), softb=my_soft, & 1737 basis_set=orb_basis_set, basis_type=my_basis_type) 1738 CALL get_gto_basis_set(gto_basis_set=orb_basis_set, & 1739 first_sgf=first_sgfa, & 1740 lmax=la_max, & 1741 lmin=la_min, & 1742 npgf=npgfa, & 1743 nset=nseta, & 1744 nsgf_set=nsgfa, & 1745 sphi=sphi_a, & 1746 zet=zeta) 1747 ENDIF 1748 1749 IF (jkind .NE. jkind_old) THEN 1750 CALL get_qs_kind(qs_kind_set(jkind), softb=my_soft, & 1751 basis_set=orb_basis_set, basis_type=my_basis_type) 1752 CALL get_gto_basis_set(gto_basis_set=orb_basis_set, & 1753 first_sgf=first_sgfb, & 1754 lmax=lb_max, & 1755 lmin=lb_min, & 1756 npgf=npgfb, & 1757 nset=nsetb, & 1758 nsgf_set=nsgfb, & 1759 sphi=sphi_b, & 1760 zet=zetb) 1761 ENDIF 1762 1763 CALL dbcsr_get_block_p(matrix=deltap(img)%matrix, & 1764 row=brow, col=bcol, BLOCK=p_block, found=found) 1765 CPASSERT(found) 1766 1767 iatom_old = iatom 1768 jatom_old = jatom 1769 ikind_old = ikind 1770 jkind_old = jkind 1771 img_old = img 1772 atom_pair_changed = .TRUE. 1773 1774 ELSE 1775 1776 atom_pair_changed = .FALSE. 1777 1778 ENDIF 1779 1780 IF (atom_pair_changed .OR. iset_old .NE. iset .OR. jset_old .NE. jset) THEN 1781 ncoa = npgfa(iset)*ncoset(la_max(iset)) 1782 sgfa = first_sgfa(1, iset) 1783 ncob = npgfb(jset)*ncoset(lb_max(jset)) 1784 sgfb = first_sgfb(1, jset) 1785 1786 IF (iatom <= jatom) THEN 1787 CALL dgemm("N", "N", ncoa, nsgfb(jset), nsgfa(iset), & 1788 1.0_dp, sphi_a(1, sgfa), SIZE(sphi_a, 1), & 1789 p_block(sgfa, sgfb), SIZE(p_block, 1), & 1790 0.0_dp, work(1, 1), maxco) 1791 CALL dgemm("N", "T", ncoa, ncob, nsgfb(jset), & 1792 1.0_dp, work(1, 1), maxco, & 1793 sphi_b(1, sgfb), SIZE(sphi_b, 1), & 1794 0.0_dp, pab(1, 1), maxco) 1795 ELSE 1796 CALL dgemm("N", "N", ncob, nsgfa(iset), nsgfb(jset), & 1797 1.0_dp, sphi_b(1, sgfb), SIZE(sphi_b, 1), & 1798 p_block(sgfb, sgfa), SIZE(p_block, 1), & 1799 0.0_dp, work(1, 1), maxco) 1800 CALL dgemm("N", "T", ncob, ncoa, nsgfa(iset), & 1801 1.0_dp, work(1, 1), maxco, & 1802 sphi_a(1, sgfa), SIZE(sphi_a, 1), & 1803 0.0_dp, pab(1, 1), maxco) 1804 END IF 1805 1806 iset_old = iset 1807 jset_old = jset 1808 ENDIF 1809 1810 rab(:) = dist_ab(:, itask) 1811 rab2 = rab(1)*rab(1) + rab(2)*rab(2) + rab(3)*rab(3) 1812 rb(:) = ra(:) + rab(:) 1813 zetp = zeta(ipgf, iset) + zetb(jpgf, jset) 1814 1815 na1 = (ipgf - 1)*ncoset(la_max(iset)) + 1 1816 na2 = ipgf*ncoset(la_max(iset)) 1817 nb1 = (jpgf - 1)*ncoset(lb_max(jset)) + 1 1818 nb2 = jpgf*ncoset(lb_max(jset)) 1819 1820 ! takes the density matrix symmetry in account, i.e. off-diagonal blocks need to be mapped 'twice' 1821 IF (iatom == jatom) THEN 1822 scale = 1.0_dp 1823 ELSE 1824 scale = 2.0_dp 1825 END IF 1826 1827 ! check whether we need to use fawzi's generalised collocation scheme 1828 IF (rs_rho(igrid_level)%rs_grid%desc%distributed) THEN 1829 !tasks(4,:) is 0 for replicated, 1 for distributed 2 for exceptional distributed tasks 1830 IF (tasks(4, itask) .EQ. 2) THEN 1831 use_subpatch = .TRUE. 1832 ELSE 1833 use_subpatch = .FALSE. 1834 ENDIF 1835 ELSE 1836 use_subpatch = .FALSE. 1837 ENDIF 1838 1839 IF (nthread > 1) THEN 1840 IF (iatom <= jatom) THEN 1841 CALL collocate_pgf_product_rspace( & 1842 la_max(iset), zeta(ipgf, iset), la_min(iset), & 1843 lb_max(jset), zetb(jpgf, jset), lb_min(jset), & 1844 ra, rab, rab2, scale, pab, na1 - 1, nb1 - 1, & 1845 rs_rho(igrid_level)%rs_grid, cell, cube_info(igrid_level), & 1846 eps_rho_rspace, & 1847 ga_gb_function=ga_gb_function, & 1848 idir=my_idir, & 1849 lgrid=lgrid, ithread=ithread, & 1850 map_consistent=map_consistent, use_subpatch=use_subpatch, & 1851 subpatch_pattern=tasks(6, itask), lmax_global=lmax_global) 1852 ELSE 1853 rab_inv = -rab 1854 CALL collocate_pgf_product_rspace( & 1855 lb_max(jset), zetb(jpgf, jset), lb_min(jset), & 1856 la_max(iset), zeta(ipgf, iset), la_min(iset), & 1857 rb, rab_inv, rab2, scale, pab, nb1 - 1, na1 - 1, & 1858 rs_rho(igrid_level)%rs_grid, cell, cube_info(igrid_level), & 1859 eps_rho_rspace, & 1860 ga_gb_function=ga_gb_function, & 1861 idir=my_idir, & 1862 lgrid=lgrid, ithread=ithread, & 1863 map_consistent=map_consistent, use_subpatch=use_subpatch, & 1864 subpatch_pattern=tasks(6, itask), lmax_global=lmax_global) 1865 END IF 1866 ELSE 1867 IF (iatom <= jatom) THEN 1868 CALL collocate_pgf_product_rspace( & 1869 la_max(iset), zeta(ipgf, iset), la_min(iset), & 1870 lb_max(jset), zetb(jpgf, jset), lb_min(jset), & 1871 ra, rab, rab2, scale, pab, na1 - 1, nb1 - 1, & 1872 rs_rho(igrid_level)%rs_grid, cell, cube_info(igrid_level), & 1873 eps_rho_rspace, & 1874 ga_gb_function=ga_gb_function, & 1875 idir=my_idir, & 1876 map_consistent=map_consistent, use_subpatch=use_subpatch, & 1877 subpatch_pattern=tasks(6, itask), lmax_global=lmax_global) 1878 ELSE 1879 rab_inv = -rab 1880 CALL collocate_pgf_product_rspace( & 1881 lb_max(jset), zetb(jpgf, jset), lb_min(jset), & 1882 la_max(iset), zeta(ipgf, iset), la_min(iset), & 1883 rb, rab_inv, rab2, scale, pab, nb1 - 1, na1 - 1, & 1884 rs_rho(igrid_level)%rs_grid, cell, cube_info(igrid_level), & 1885 eps_rho_rspace, & 1886 ga_gb_function=ga_gb_function, & 1887 idir=my_idir, & 1888 map_consistent=map_consistent, use_subpatch=use_subpatch, & 1889 subpatch_pattern=tasks(6, itask), lmax_global=lmax_global) 1890 END IF 1891 END IF 1892 END DO loop_tasks 1893 END DO loop_pairs 1894!$OMP END DO 1895 1896 ! Now sum the thread-local grids back into the rs_grid 1897 ! (in parallel, each thread writes to a section of the rs_grid at a time) 1898 IF (nthread > 1) THEN 1899 nz = (1 + rs_rho(igrid_level)%rs_grid%ub_local(3) & 1900 - rs_rho(igrid_level)%rs_grid%lb_local(3)) 1901 nxy = (1 + rs_rho(igrid_level)%rs_grid%ub_local(1) & 1902 - rs_rho(igrid_level)%rs_grid%lb_local(1)) & 1903 *(1 + rs_rho(igrid_level)%rs_grid%ub_local(2) & 1904 - rs_rho(igrid_level)%rs_grid%lb_local(2)) 1905 1906 ! Work out the number of tree levels, and start the reduction 1907 1908 nrlevel = CEILING(LOG10(1.0_dp*nthread)/LOG10(2.0_dp)) 1909 1910 DO nr = 1, nrlevel 1911 nblock = 2**nr 1912 nzsize = MIN(nblock, nthread - (ithread/nblock)*nblock) 1913 itask = MODULO(ithread, nblock) 1914 1915 lb = (nz*itask)/nzsize 1916 ub = (nz*(itask + 1))/nzsize - 1 1917 nw = (ithread/nblock)*nblock 1918 lbw = 1 + nxy*lb 1919 1920 n = nw + nblock/2 1921 IF (n < nthread) THEN 1922 lbr = 1 + nxy*lb 1923 1924 CALL daxpy(nxy*(1 + ub - lb), 1.0_dp, lgrid%r(lbr, n), 1, lgrid%r(lbw, nw), 1) 1925 END IF 1926!$OMP BARRIER 1927 END DO 1928 1929 ! Copy final result from first local grid to rs_grid 1930 lb = (nz*ithread)/nthread 1931 ub = (nz*(ithread + 1))/nthread - 1 1932 nzsize = 1 + (ub - lb) 1933 1934 lb = lb + rs_rho(igrid_level)%rs_grid%lb_local(3) 1935 ub = ub + rs_rho(igrid_level)%rs_grid%lb_local(3) 1936 lbr = 1 + nxy*(nz*ithread/nthread) 1937 1938 CALL daxpy(nxy*nzsize, 1.0_dp, lgrid%r(lbr, 0), 1, & 1939 rs_rho(igrid_level)%rs_grid%r(:, :, lb:ub), 1) 1940!$OMP BARRIER 1941 END IF 1942 END DO loop_gridlevels 1943!$OMP END PARALLEL 1944 1945 ! *** Release work storage *** 1946 1947 IF (distributed_rs_grids) THEN 1948 CALL dbcsr_deallocate_matrix_set(deltap) 1949 ELSE 1950 DO img = 1, nimages 1951 NULLIFY (deltap(img)%matrix) 1952 END DO 1953 DEALLOCATE (deltap) 1954 ENDIF 1955 1956 DEALLOCATE (pabt, workt) 1957 1958 CALL density_rs2pw(pw_env, rs_rho, rho, rho_gspace) 1959 1960 total_rho = pw_integrate_function(rho%pw, isign=-1) 1961 1962 CALL timestop(handle) 1963 1964 END SUBROUTINE calculate_rho_elec 1965 1966! ************************************************************************************************** 1967!> \brief computes the gradient of the density corresponding to a given 1968!> density matrix on the grid 1969!> \param matrix_p ... 1970!> \param matrix_p_kp ... 1971!> \param drho ... 1972!> \param drho_gspace ... 1973!> \param qs_env ... 1974!> \param soft_valid ... 1975!> \param basis_type ... 1976!> \note this is an alternative to calculate the gradient through FFTs 1977! ************************************************************************************************** 1978 SUBROUTINE calculate_drho_elec(matrix_p, matrix_p_kp, drho, drho_gspace, qs_env, & 1979 soft_valid, basis_type) 1980 1981 TYPE(dbcsr_type), OPTIONAL, POINTER :: matrix_p 1982 TYPE(dbcsr_p_type), DIMENSION(:), OPTIONAL, & 1983 POINTER :: matrix_p_kp 1984 TYPE(pw_p_type), DIMENSION(:), INTENT(INOUT) :: drho, drho_gspace 1985 TYPE(qs_environment_type), POINTER :: qs_env 1986 LOGICAL, INTENT(IN), OPTIONAL :: soft_valid 1987 CHARACTER(LEN=*), INTENT(IN), OPTIONAL :: basis_type 1988 1989 CHARACTER(len=*), PARAMETER :: routineN = 'calculate_drho_elec', & 1990 routineP = moduleN//':'//routineN 1991 1992 CHARACTER(LEN=default_string_length) :: my_basis_type 1993 INTEGER :: bcol, brow, handle, i, iatom, iatom_old, idir, igrid_level, ikind, ikind_old, & 1994 img, img_old, ipgf, iset, iset_old, itask, ithread, jatom, jatom_old, jkind, jkind_old, & 1995 jpgf, jset, jset_old, lmax_global, maxco, maxpgf, maxset, maxsgf, maxsgf_set, na1, na2, & 1996 natoms, nb1, nb2, ncoa, ncob, nimages, nseta, nsetb, ntasks, nthread, sgfa, sgfb 1997 INTEGER(kind=int_8), DIMENSION(:), POINTER :: atom_pair_recv, atom_pair_send 1998 INTEGER(kind=int_8), DIMENSION(:, :), POINTER :: tasks 1999 INTEGER, DIMENSION(:), POINTER :: la_max, la_min, lb_max, lb_min, mylmax, & 2000 npgfa, npgfb, nsgfa, nsgfb 2001 INTEGER, DIMENSION(:, :), POINTER :: first_sgfa, first_sgfb 2002 LOGICAL :: atom_pair_changed, distributed_rs_grids, & 2003 do_kp, found, map_consistent, my_soft, & 2004 use_subpatch 2005 REAL(KIND=dp) :: eps_rho_rspace, rab2, scale, zetp 2006 REAL(KIND=dp), DIMENSION(3) :: ra, rab, rab_inv, rb 2007 REAL(KIND=dp), DIMENSION(:, :), POINTER :: dist_ab, p_block, pab, sphi_a, sphi_b, & 2008 work, zeta, zetb 2009 REAL(KIND=dp), DIMENSION(:, :, :), POINTER :: pabt, workt 2010 TYPE(cell_type), POINTER :: cell 2011 TYPE(cube_info_type), DIMENSION(:), POINTER :: cube_info 2012 TYPE(dbcsr_p_type), DIMENSION(:), POINTER :: deltap 2013 TYPE(dft_control_type), POINTER :: dft_control 2014 TYPE(gridlevel_info_type), POINTER :: gridlevel_info 2015 TYPE(gto_basis_set_type), POINTER :: orb_basis_set 2016 TYPE(neighbor_list_set_p_type), DIMENSION(:), & 2017 POINTER :: sab_orb 2018 TYPE(particle_type), DIMENSION(:), POINTER :: particle_set 2019 TYPE(pw_env_type), POINTER :: pw_env 2020 TYPE(qs_kind_type), DIMENSION(:), POINTER :: qs_kind_set 2021 TYPE(qs_kind_type), POINTER :: qs_kind 2022 TYPE(realspace_grid_desc_p_type), DIMENSION(:), & 2023 POINTER :: rs_descs 2024 TYPE(realspace_grid_p_type), DIMENSION(:), POINTER :: rs_rho 2025 TYPE(task_list_type), POINTER :: task_list, task_list_soft 2026 2027 CALL timeset(routineN, handle) 2028 2029 CPASSERT(PRESENT(matrix_p) .OR. PRESENT(matrix_p_kp)) 2030 do_kp = PRESENT(matrix_p_kp) 2031 2032 NULLIFY (qs_kind, cell, dft_control, orb_basis_set, deltap, & 2033 qs_kind_set, sab_orb, particle_set, rs_rho, pw_env, rs_descs, & 2034 dist_ab, la_max, la_min, & 2035 lb_max, lb_min, npgfa, npgfb, nsgfa, nsgfb, p_block, & 2036 sphi_a, sphi_b, zeta, zetb, first_sgfa, first_sgfb, tasks, pabt, & 2037 workt, mylmax) 2038 2039 debug_count = debug_count + 1 2040 2041 ! by default, the full density is calculated 2042 my_soft = .FALSE. 2043 IF (PRESENT(soft_valid)) my_soft = soft_valid 2044 2045 IF (PRESENT(basis_type)) THEN 2046 my_basis_type = basis_type 2047 ELSE 2048 my_basis_type = "ORB" 2049 END IF 2050 2051 CALL get_qs_env(qs_env=qs_env, & 2052 qs_kind_set=qs_kind_set, & 2053 cell=cell, & 2054 dft_control=dft_control, & 2055 particle_set=particle_set, & 2056 sab_orb=sab_orb, & 2057 pw_env=pw_env) 2058 2059 SELECT CASE (my_basis_type) 2060 CASE ("ORB") 2061 CALL get_qs_env(qs_env=qs_env, & 2062 task_list=task_list, & 2063 task_list_soft=task_list_soft) 2064 CASE ("AUX_FIT") 2065 CALL get_qs_env(qs_env=qs_env, & 2066 task_list_aux_fit=task_list, & 2067 task_list_soft=task_list_soft) 2068 END SELECT 2069 2070 ! *** assign from pw_env 2071 gridlevel_info => pw_env%gridlevel_info 2072 cube_info => pw_env%cube_info 2073 2074 ! *** Allocate work storage *** 2075 nthread = 1 2076 CALL get_qs_kind_set(qs_kind_set=qs_kind_set, & 2077 maxco=maxco, & 2078 maxsgf=maxsgf, & 2079 maxsgf_set=maxsgf_set, & 2080 basis_type=my_basis_type) 2081 CALL reallocate(pabt, 1, maxco, 1, maxco, 0, nthread - 1) 2082 CALL reallocate(workt, 1, maxco, 1, maxsgf_set, 0, nthread - 1) 2083 2084 ! find maximum numbers 2085 nimages = dft_control%nimages 2086 CPASSERT(nimages == 1 .OR. do_kp) 2087 2088 natoms = SIZE(particle_set) 2089 maxset = 0 2090 maxpgf = 0 2091 lmax_global = 0 2092 2093 DO ikind = 1, SIZE(qs_kind_set) 2094 qs_kind => qs_kind_set(ikind) 2095 CALL get_qs_kind(qs_kind=qs_kind, & 2096 softb=my_soft, & 2097 basis_set=orb_basis_set, basis_type=my_basis_type) 2098 2099 IF (.NOT. ASSOCIATED(orb_basis_set)) CYCLE 2100 2101 CALL get_gto_basis_set(gto_basis_set=orb_basis_set, & 2102 npgf=npgfa, nset=nseta, lmax=mylmax) 2103 2104 maxset = MAX(nseta, maxset) 2105 maxpgf = MAX(MAXVAL(npgfa), maxpgf) 2106 lmax_global = MAX(MAXVAL(mylmax), lmax_global) 2107 END DO 2108 2109 !update lmax_global since ga_gb_function=FUNC_DABpADB 2110 lmax_global = lmax_global + 1 2111 2112 ! get the task lists 2113 IF (my_soft) task_list => task_list_soft 2114 CPASSERT(ASSOCIATED(task_list)) 2115 tasks => task_list%tasks 2116 dist_ab => task_list%dist_ab 2117 atom_pair_send => task_list%atom_pair_send 2118 atom_pair_recv => task_list%atom_pair_recv 2119 ntasks = task_list%ntasks 2120 2121 ! *** set up the rs multi-grids 2122 CPASSERT(ASSOCIATED(pw_env)) 2123 CALL pw_env_get(pw_env, rs_descs=rs_descs, rs_grids=rs_rho) 2124 DO igrid_level = 1, gridlevel_info%ngrid_levels 2125 CALL rs_grid_retain(rs_rho(igrid_level)%rs_grid) 2126 distributed_rs_grids = rs_rho(igrid_level)%rs_grid%desc%distributed 2127 END DO 2128 2129 eps_rho_rspace = dft_control%qs_control%eps_rho_rspace 2130 map_consistent = dft_control%qs_control%map_consistent 2131 2132 ! *** Initialize working density matrix *** 2133 ! distributed rs grids require a matrix that will be changed 2134 ! whereas this is not the case for replicated grids 2135 ALLOCATE (deltap(nimages)) 2136 IF (distributed_rs_grids) THEN 2137 DO img = 1, nimages 2138 END DO 2139 ! this matrix has no strict sparsity pattern in parallel 2140 ! deltap%sparsity_id=-1 2141 IF (do_kp) THEN 2142 DO img = 1, nimages 2143 CALL dbcsr_copy(deltap(img)%matrix, matrix_p_kp(img)%matrix, & 2144 name="DeltaP") 2145 END DO 2146 ELSE 2147 CALL dbcsr_copy(deltap(1)%matrix, matrix_p, name="DeltaP") 2148 END IF 2149 ELSE 2150 IF (do_kp) THEN 2151 DO img = 1, nimages 2152 deltap(img)%matrix => matrix_p_kp(img)%matrix 2153 END DO 2154 ELSE 2155 deltap(1)%matrix => matrix_p 2156 END IF 2157 ENDIF 2158 2159 ! distribute the matrix 2160 IF (distributed_rs_grids) THEN 2161 CALL rs_distribute_matrix(rs_descs, deltap, atom_pair_send, atom_pair_recv, & 2162 natoms, nimages, scatter=.TRUE.) 2163 ENDIF 2164 2165 ! map all tasks on the grids 2166 2167 ithread = 0 2168 pab => pabt(:, :, ithread) 2169 work => workt(:, :, ithread) 2170 2171 loop_xyz: DO idir = 1, 3 2172 2173 DO igrid_level = 1, gridlevel_info%ngrid_levels 2174 CALL rs_grid_zero(rs_rho(igrid_level)%rs_grid) 2175 END DO 2176 2177 iatom_old = -1; jatom_old = -1; iset_old = -1; jset_old = -1 2178 ikind_old = -1; jkind_old = -1; img_old = -1 2179 loop_tasks: DO itask = 1, ntasks 2180 2181 !decode the atom pair and basis info 2182 CALL int2pair(tasks(3, itask), igrid_level, img, iatom, jatom, iset, jset, ipgf, jpgf, & 2183 nimages, natoms, maxset, maxpgf) 2184 2185 ikind = particle_set(iatom)%atomic_kind%kind_number 2186 jkind = particle_set(jatom)%atomic_kind%kind_number 2187 2188 IF (iatom .NE. iatom_old .OR. jatom .NE. jatom_old .OR. img .NE. img_old) THEN 2189 2190 IF (iatom .NE. iatom_old) ra(:) = pbc(particle_set(iatom)%r, cell) 2191 2192 IF (iatom <= jatom) THEN 2193 brow = iatom 2194 bcol = jatom 2195 ELSE 2196 brow = jatom 2197 bcol = iatom 2198 END IF 2199 2200 IF (ikind .NE. ikind_old) THEN 2201 CALL get_qs_kind(qs_kind_set(ikind), softb=my_soft, & 2202 basis_set=orb_basis_set, basis_type=my_basis_type) 2203 CALL get_gto_basis_set(gto_basis_set=orb_basis_set, & 2204 first_sgf=first_sgfa, & 2205 lmax=la_max, & 2206 lmin=la_min, & 2207 npgf=npgfa, & 2208 nset=nseta, & 2209 nsgf_set=nsgfa, & 2210 sphi=sphi_a, & 2211 zet=zeta) 2212 ENDIF 2213 2214 IF (jkind .NE. jkind_old) THEN 2215 CALL get_qs_kind(qs_kind_set(jkind), softb=my_soft, & 2216 basis_set=orb_basis_set, basis_type=my_basis_type) 2217 CALL get_gto_basis_set(gto_basis_set=orb_basis_set, & 2218 first_sgf=first_sgfb, & 2219 lmax=lb_max, & 2220 lmin=lb_min, & 2221 npgf=npgfb, & 2222 nset=nsetb, & 2223 nsgf_set=nsgfb, & 2224 sphi=sphi_b, & 2225 zet=zetb) 2226 ENDIF 2227 2228 CALL dbcsr_get_block_p(matrix=deltap(img)%matrix, & 2229 row=brow, col=bcol, BLOCK=p_block, found=found) 2230 CPASSERT(found) 2231 2232 iatom_old = iatom 2233 jatom_old = jatom 2234 ikind_old = ikind 2235 jkind_old = jkind 2236 img_old = img 2237 atom_pair_changed = .TRUE. 2238 2239 ELSE 2240 2241 atom_pair_changed = .FALSE. 2242 2243 ENDIF 2244 2245 IF (atom_pair_changed .OR. iset_old .NE. iset .OR. jset_old .NE. jset) THEN 2246 2247 ncoa = npgfa(iset)*ncoset(la_max(iset)) 2248 sgfa = first_sgfa(1, iset) 2249 ncob = npgfb(jset)*ncoset(lb_max(jset)) 2250 sgfb = first_sgfb(1, jset) 2251 2252 IF (iatom <= jatom) THEN 2253 CALL dgemm("N", "N", ncoa, nsgfb(jset), nsgfa(iset), & 2254 1.0_dp, sphi_a(1, sgfa), SIZE(sphi_a, 1), & 2255 p_block(sgfa, sgfb), SIZE(p_block, 1), & 2256 0.0_dp, work(1, 1), maxco) 2257 CALL dgemm("N", "T", ncoa, ncob, nsgfb(jset), & 2258 1.0_dp, work(1, 1), maxco, & 2259 sphi_b(1, sgfb), SIZE(sphi_b, 1), & 2260 0.0_dp, pab(1, 1), maxco) 2261 ELSE 2262 CALL dgemm("N", "N", ncob, nsgfa(iset), nsgfb(jset), & 2263 1.0_dp, sphi_b(1, sgfb), SIZE(sphi_b, 1), & 2264 p_block(sgfb, sgfa), SIZE(p_block, 1), & 2265 0.0_dp, work(1, 1), maxco) 2266 CALL dgemm("N", "T", ncob, ncoa, nsgfa(iset), & 2267 1.0_dp, work(1, 1), maxco, & 2268 sphi_a(1, sgfa), SIZE(sphi_a, 1), & 2269 0.0_dp, pab(1, 1), maxco) 2270 END IF 2271 2272 iset_old = iset 2273 jset_old = jset 2274 2275 ENDIF 2276 2277 rab(:) = dist_ab(:, itask) 2278 rab2 = rab(1)*rab(1) + rab(2)*rab(2) + rab(3)*rab(3) 2279 rb(:) = ra(:) + rab(:) 2280 zetp = zeta(ipgf, iset) + zetb(jpgf, jset) 2281 2282 na1 = (ipgf - 1)*ncoset(la_max(iset)) + 1 2283 na2 = ipgf*ncoset(la_max(iset)) 2284 nb1 = (jpgf - 1)*ncoset(lb_max(jset)) + 1 2285 nb2 = jpgf*ncoset(lb_max(jset)) 2286 2287 ! takes the density matrix symmetry in account, i.e. off-diagonal blocks need to be mapped 'twice' 2288 IF (iatom == jatom .AND. img == 1) THEN 2289 scale = 1.0_dp 2290 ELSE 2291 scale = 2.0_dp 2292 END IF 2293 2294 ! check whether we need to use fawzi's generalised collocation scheme 2295 IF (rs_rho(igrid_level)%rs_grid%desc%distributed) THEN 2296 !tasks(4,:) is 0 for replicated, 1 for distributed 2 for exceptional distributed tasks 2297 IF (tasks(4, itask) .EQ. 2) THEN 2298 use_subpatch = .TRUE. 2299 ELSE 2300 use_subpatch = .FALSE. 2301 ENDIF 2302 ELSE 2303 use_subpatch = .FALSE. 2304 ENDIF 2305 IF (iatom <= jatom) THEN 2306 CALL collocate_pgf_product_rspace( & 2307 la_max(iset), zeta(ipgf, iset), la_min(iset), & 2308 lb_max(jset), zetb(jpgf, jset), lb_min(jset), & 2309 ra, rab, rab2, scale, pab, na1 - 1, nb1 - 1, & 2310 rs_rho(igrid_level)%rs_grid, cell, cube_info(igrid_level), & 2311 eps_rho_rspace, & 2312 ga_gb_function=FUNC_DABpADB, & 2313 idir=idir, & 2314 map_consistent=map_consistent, use_subpatch=use_subpatch, subpatch_pattern=tasks(6, itask), & 2315 lmax_global=lmax_global) 2316 ELSE 2317 rab_inv = -rab 2318 CALL collocate_pgf_product_rspace( & 2319 lb_max(jset), zetb(jpgf, jset), lb_min(jset), & 2320 la_max(iset), zeta(ipgf, iset), la_min(iset), & 2321 rb, rab_inv, rab2, scale, pab, nb1 - 1, na1 - 1, & 2322 rs_rho(igrid_level)%rs_grid, cell, cube_info(igrid_level), & 2323 eps_rho_rspace, & 2324 ga_gb_function=FUNC_DABpADB, & 2325 idir=idir, & 2326 map_consistent=map_consistent, use_subpatch=use_subpatch, subpatch_pattern=tasks(6, itask), & 2327 lmax_global=lmax_global) 2328 END IF 2329 2330 END DO loop_tasks 2331 2332 CALL density_rs2pw_basic(pw_env, rs_rho, drho(idir), drho_gspace(idir)) 2333 2334 END DO loop_xyz 2335 2336 ! *** Release work storage *** 2337 IF (ASSOCIATED(rs_rho)) THEN 2338 DO i = 1, SIZE(rs_rho) 2339 CALL rs_grid_release(rs_rho(i)%rs_grid) 2340 END DO 2341 END IF 2342 2343 IF (distributed_rs_grids) THEN 2344 CALL dbcsr_deallocate_matrix_set(deltap) 2345 ELSE 2346 DO img = 1, nimages 2347 NULLIFY (deltap(img)%matrix) 2348 END DO 2349 DEALLOCATE (deltap) 2350 ENDIF 2351 2352 DEALLOCATE (pabt, workt) 2353 2354 CALL timestop(handle) 2355 2356 END SUBROUTINE calculate_drho_elec 2357 2358! ************************************************************************************************** 2359!> \brief maps a given wavefunction on the grid 2360!> \param mo_vectors ... 2361!> \param ivector ... 2362!> \param rho ... 2363!> \param rho_gspace ... 2364!> \param atomic_kind_set ... 2365!> \param qs_kind_set ... 2366!> \param cell ... 2367!> \param dft_control ... 2368!> \param particle_set ... 2369!> \param pw_env ... 2370!> \param basis_type ... 2371!> \param external_vector ... 2372!> \par History 2373!> 08.2002 created [Joost VandeVondele] 2374!> 03.2006 made independent of qs_env [Joost VandeVondele] 2375!> \note 2376!> modified calculate_rho_elec, should write the wavefunction represented by 2377!> it's presumably dominated by the FFT and the rs->pw and back routines 2378!> 2379!> BUGS ??? 2380!> does it take correctly the periodic images of the basis functions into account 2381!> i.e. is only correct if the basis functions are localised enough to be just in 1 cell ? 2382! ************************************************************************************************** 2383 SUBROUTINE calculate_wavefunction(mo_vectors, ivector, rho, rho_gspace, & 2384 atomic_kind_set, qs_kind_set, cell, dft_control, particle_set, & 2385 pw_env, basis_type, external_vector) 2386 2387 TYPE(cp_fm_type), POINTER :: mo_vectors 2388 INTEGER :: ivector 2389 TYPE(pw_p_type), INTENT(INOUT) :: rho, rho_gspace 2390 TYPE(atomic_kind_type), DIMENSION(:), POINTER :: atomic_kind_set 2391 TYPE(qs_kind_type), DIMENSION(:), POINTER :: qs_kind_set 2392 TYPE(cell_type), POINTER :: cell 2393 TYPE(dft_control_type), POINTER :: dft_control 2394 TYPE(particle_type), DIMENSION(:), POINTER :: particle_set 2395 TYPE(pw_env_type), POINTER :: pw_env 2396 CHARACTER(LEN=*), INTENT(IN), OPTIONAL :: basis_type 2397 REAL(KIND=dp), DIMENSION(:), OPTIONAL :: external_vector 2398 2399 CHARACTER(LEN=*), PARAMETER :: routineN = 'calculate_wavefunction', & 2400 routineP = moduleN//':'//routineN 2401 2402 CHARACTER(LEN=default_string_length) :: my_basis_type 2403 INTEGER :: dir, group, group_size, handle, i, iatom, igrid_level, ikind, ipgf, iset, & 2404 lmax_global, maxco, maxsgf_set, my_pos, na1, na2, nao, natom, ncoa, nseta, offset, sgfa 2405 INTEGER, ALLOCATABLE, DIMENSION(:) :: where_is_the_point 2406 INTEGER, DIMENSION(3) :: lb, location, tp, ub 2407 INTEGER, DIMENSION(:), POINTER :: la_max, la_min, mylmax, npgfa, nsgfa 2408 INTEGER, DIMENSION(:, :), POINTER :: first_sgfa 2409 LOGICAL :: local, map_it_here 2410 REAL(KIND=dp) :: dab, eps_rho_rspace, rab2, scale, zetp 2411 REAL(KIND=dp), DIMENSION(3) :: ra, rab 2412 REAL(KIND=dp), DIMENSION(:), POINTER :: eigenvector 2413 REAL(KIND=dp), DIMENSION(:, :), POINTER :: pab, sphi_a, work, zeta 2414 TYPE(cube_info_type), DIMENSION(:), POINTER :: cube_info 2415 TYPE(gridlevel_info_type), POINTER :: gridlevel_info 2416 TYPE(gto_basis_set_type), POINTER :: orb_basis_set 2417 TYPE(pw_p_type), DIMENSION(:), POINTER :: mgrid_gspace, mgrid_rspace 2418 TYPE(pw_pool_p_type), DIMENSION(:), POINTER :: pw_pools 2419 TYPE(REALSPACE_GRID_P_TYPE), DIMENSION(:), POINTER :: rs_rho 2420 2421 IF (PRESENT(basis_type)) THEN 2422 my_basis_type = basis_type 2423 ELSE 2424 my_basis_type = "ORB" 2425 END IF 2426 2427 CALL timeset(routineN, handle) 2428 2429 NULLIFY (eigenvector, orb_basis_set, & 2430 pab, work, la_max, la_min, & 2431 npgfa, nsgfa, & 2432 sphi_a, zeta, first_sgfa, & 2433 rs_rho, pw_pools, mgrid_rspace, mgrid_gspace, mylmax) 2434 2435 IF (PRESENT(external_vector)) THEN 2436 nao = SIZE(external_vector) 2437 ALLOCATE (eigenvector(nao)) 2438 eigenvector = external_vector 2439 ELSE 2440 CALL cp_fm_get_info(matrix=mo_vectors, nrow_global=nao) 2441 ALLOCATE (eigenvector(nao)) 2442 DO i = 1, nao 2443 CALL cp_fm_get_element(mo_vectors, i, ivector, eigenvector(i), local) 2444 ENDDO 2445 ENDIF 2446 2447 ! *** set up the pw multi-grids 2448 CPASSERT(ASSOCIATED(pw_env)) 2449 CALL pw_env_get(pw_env, rs_grids=rs_rho, pw_pools=pw_pools, & 2450 cube_info=cube_info, gridlevel_info=gridlevel_info) 2451 2452 CALL pw_pools_create_pws(pw_pools, mgrid_gspace, & 2453 use_data=COMPLEXDATA1D, & 2454 in_space=RECIPROCALSPACE) 2455 CALL pw_pools_create_pws(pw_pools, mgrid_rspace, & 2456 use_data=REALDATA3D, & 2457 in_space=REALSPACE) 2458 2459 ! *** set up rs multi-grids 2460 DO igrid_level = 1, gridlevel_info%ngrid_levels 2461 CALL rs_grid_retain(rs_rho(igrid_level)%rs_grid) 2462 CALL rs_grid_zero(rs_rho(igrid_level)%rs_grid) 2463 END DO 2464 2465 eps_rho_rspace = dft_control%qs_control%eps_rho_rspace 2466! *** Allocate work storage *** 2467 CALL get_atomic_kind_set(atomic_kind_set, natom=natom) 2468 CALL get_qs_kind_set(qs_kind_set, & 2469 maxco=maxco, & 2470 maxsgf_set=maxsgf_set, & 2471 basis_type=my_basis_type) 2472 2473 ALLOCATE (pab(maxco, 1)) 2474 ALLOCATE (work(maxco, 1)) 2475 2476 offset = 0 2477 group = mgrid_rspace(1)%pw%pw_grid%para%group 2478 my_pos = mgrid_rspace(1)%pw%pw_grid%para%my_pos 2479 group_size = mgrid_rspace(1)%pw%pw_grid%para%group_size 2480 ALLOCATE (where_is_the_point(0:group_size - 1)) 2481 2482 !loop over natom to find maximum value of la_max 2483 lmax_global = 0 2484 DO iatom = 1, natom 2485 ikind = particle_set(iatom)%atomic_kind%kind_number 2486 CALL get_qs_kind(qs_kind_set(ikind), basis_set=orb_basis_set, basis_type=my_basis_type) 2487 CALL get_gto_basis_set(gto_basis_set=orb_basis_set, lmax=mylmax) 2488 lmax_global = MAX(MAXVAL(mylmax), lmax_global) 2489 END DO 2490 2491 DO iatom = 1, natom 2492 ikind = particle_set(iatom)%atomic_kind%kind_number 2493 CALL get_qs_kind(qs_kind_set(ikind), basis_set=orb_basis_set, basis_type=my_basis_type) 2494 CALL get_gto_basis_set(gto_basis_set=orb_basis_set, & 2495 first_sgf=first_sgfa, & 2496 lmax=la_max, & 2497 lmin=la_min, & 2498 npgf=npgfa, & 2499 nset=nseta, & 2500 nsgf_set=nsgfa, & 2501 sphi=sphi_a, & 2502 zet=zeta) 2503 ra(:) = pbc(particle_set(iatom)%r, cell) 2504 rab(:) = 0.0_dp 2505 rab2 = 0.0_dp 2506 dab = 0.0_dp 2507 2508 DO iset = 1, nseta 2509 2510 ncoa = npgfa(iset)*ncoset(la_max(iset)) 2511 sgfa = first_sgfa(1, iset) 2512 2513 DO i = 1, nsgfa(iset) 2514 work(i, 1) = eigenvector(offset + i) 2515 ENDDO 2516 2517 CALL dgemm("N", "N", ncoa, 1, nsgfa(iset), & 2518 1.0_dp, sphi_a(1, sgfa), SIZE(sphi_a, 1), & 2519 work(1, 1), SIZE(work, 1), & 2520 0.0_dp, pab(1, 1), SIZE(pab, 1)) 2521 2522 DO ipgf = 1, npgfa(iset) 2523 2524 na1 = (ipgf - 1)*ncoset(la_max(iset)) + 1 2525 na2 = ipgf*ncoset(la_max(iset)) 2526 2527 scale = 1.0_dp 2528 zetp = zeta(ipgf, iset) 2529 igrid_level = gaussian_gridlevel(gridlevel_info, zetp) 2530 2531 map_it_here = .FALSE. 2532 2533 IF (.NOT. ALL(rs_rho(igrid_level)%rs_grid%desc%perd == 1)) THEN 2534 DO dir = 1, 3 2535 ! bounds of local grid (i.e. removing the 'wings'), if periodic 2536 tp(dir) = FLOOR(DOT_PRODUCT(cell%h_inv(dir, :), ra)*rs_rho(igrid_level)%rs_grid%desc%npts(dir)) 2537 tp(dir) = MODULO(tp(dir), rs_rho(igrid_level)%rs_grid%desc%npts(dir)) 2538 IF (rs_rho(igrid_level)%rs_grid%desc%perd(dir) .NE. 1) THEN 2539 lb(dir) = rs_rho(igrid_level)%rs_grid%lb_local(dir) + rs_rho(igrid_level)%rs_grid%desc%border 2540 ub(dir) = rs_rho(igrid_level)%rs_grid%ub_local(dir) - rs_rho(igrid_level)%rs_grid%desc%border 2541 ELSE 2542 lb(dir) = rs_rho(igrid_level)%rs_grid%lb_local(dir) 2543 ub(dir) = rs_rho(igrid_level)%rs_grid%ub_local(dir) 2544 ENDIF 2545 ! distributed grid, only map if it is local to the grid 2546 location(dir) = tp(dir) + rs_rho(igrid_level)%rs_grid%desc%lb(dir) 2547 ENDDO 2548 IF (lb(1) <= location(1) .AND. location(1) <= ub(1) .AND. & 2549 lb(2) <= location(2) .AND. location(2) <= ub(2) .AND. & 2550 lb(3) <= location(3) .AND. location(3) <= ub(3)) THEN 2551 map_it_here = .TRUE. 2552 ENDIF 2553 ELSE 2554 ! not distributed, just a round-robin distribution over the full set of CPUs 2555 IF (MODULO(offset, group_size) == my_pos) map_it_here = .TRUE. 2556 ENDIF 2557 2558 IF (map_it_here) CALL collocate_pgf_product_rspace( & 2559 la_max(iset), zeta(ipgf, iset), la_min(iset), & 2560 0, 0.0_dp, 0, & 2561 ra, rab, rab2, scale, pab, na1 - 1, 0, & 2562 rs_rho(igrid_level)%rs_grid, cell, cube_info(igrid_level), & 2563 eps_rho_rspace, map_consistent=.TRUE., ga_gb_function=FUNC_AB, & 2564 lmax_global=lmax_global) 2565 2566 END DO 2567 2568 offset = offset + nsgfa(iset) 2569 2570 END DO 2571 2572 END DO 2573 2574 DO igrid_level = 1, gridlevel_info%ngrid_levels 2575 CALL rs_pw_transfer(rs_rho(igrid_level)%rs_grid, & 2576 mgrid_rspace(igrid_level)%pw, rs2pw) 2577 CALL rs_grid_release(rs_rho(igrid_level)%rs_grid) 2578 ENDDO 2579 2580 CALL pw_zero(rho_gspace%pw) 2581 DO igrid_level = 1, gridlevel_info%ngrid_levels 2582 CALL pw_transfer(mgrid_rspace(igrid_level)%pw, & 2583 mgrid_gspace(igrid_level)%pw) 2584 CALL pw_axpy(mgrid_gspace(igrid_level)%pw, rho_gspace%pw) 2585 END DO 2586 2587 CALL pw_transfer(rho_gspace%pw, rho%pw) 2588 2589 ! Release work storage 2590 DEALLOCATE (eigenvector) 2591 2592 DEALLOCATE (pab) 2593 2594 DEALLOCATE (work) 2595 2596 ! give back the pw multi-grids 2597 CALL pw_pools_give_back_pws(pw_pools, mgrid_gspace) 2598 CALL pw_pools_give_back_pws(pw_pools, mgrid_rspace) 2599 2600 CALL timestop(handle) 2601 2602 END SUBROUTINE calculate_wavefunction 2603 2604! ************************************************************************************************** 2605!> \brief low level collocation of primitive gaussian functions 2606!> \param la_max ... 2607!> \param zeta ... 2608!> \param la_min ... 2609!> \param lb_max ... 2610!> \param zetb ... 2611!> \param lb_min ... 2612!> \param ra ... 2613!> \param rab ... 2614!> \param rab2 ... 2615!> \param scale ... 2616!> \param pab ... 2617!> \param o1 ... 2618!> \param o2 ... 2619!> \param rsgrid ... 2620!> \param cell ... 2621!> \param cube_info ... 2622!> \param eps_rho_rspace ... 2623!> \param ga_gb_function ... 2624!> \param lgrid ... 2625!> \param ithread ... 2626!> \param map_consistent ... 2627!> \param collocate_rho0 ... 2628!> \param rpgf0_s ... 2629!> \param idir ... 2630!> \param ir ... 2631!> \param rsgauge ... 2632!> \param rsbuf ... 2633!> \param use_subpatch ... 2634!> \param subpatch_pattern ... 2635!> \param lmax_global Maximum possible value of lmax used to dimension arrays 2636! ************************************************************************************************** 2637 SUBROUTINE collocate_pgf_product_rspace(la_max, zeta, la_min, & 2638 lb_max, zetb, lb_min, & 2639 ra, rab, rab2, scale, pab, o1, o2, & 2640 rsgrid, cell, cube_info, & 2641 eps_rho_rspace, ga_gb_function, & 2642 lgrid, ithread, & 2643 map_consistent, & 2644 collocate_rho0, & 2645 rpgf0_s, idir, ir, rsgauge, rsbuf, & 2646 use_subpatch, subpatch_pattern, & 2647 lmax_global) 2648 2649 INTEGER, INTENT(IN) :: la_max 2650 REAL(KIND=dp), INTENT(IN) :: zeta 2651 INTEGER, INTENT(IN) :: la_min, lb_max 2652 REAL(KIND=dp), INTENT(IN) :: zetb 2653 INTEGER, INTENT(IN) :: lb_min 2654 REAL(KIND=dp), DIMENSION(3), INTENT(IN) :: ra, rab 2655 REAL(KIND=dp), INTENT(IN) :: rab2, scale 2656 REAL(KIND=dp), DIMENSION(:, :), POINTER :: pab 2657 INTEGER, INTENT(IN) :: o1, o2 2658 TYPE(realspace_grid_type), POINTER :: rsgrid 2659 TYPE(cell_type), POINTER :: cell 2660 TYPE(cube_info_type), INTENT(IN) :: cube_info 2661 REAL(KIND=dp), INTENT(IN) :: eps_rho_rspace 2662 INTEGER, INTENT(IN) :: ga_gb_function 2663 TYPE(lgrid_type), OPTIONAL :: lgrid 2664 INTEGER, INTENT(IN), OPTIONAL :: ithread 2665 LOGICAL, INTENT(IN), OPTIONAL :: map_consistent, collocate_rho0 2666 REAL(dp), INTENT(IN), OPTIONAL :: rpgf0_s 2667 INTEGER, INTENT(IN), OPTIONAL :: idir, ir 2668 TYPE(realspace_grid_type), POINTER, OPTIONAL :: rsgauge, rsbuf 2669 LOGICAL, OPTIONAL :: use_subpatch 2670 INTEGER(KIND=int_8), OPTIONAL, INTENT(IN):: subpatch_pattern 2671 INTEGER, INTENT(IN) :: lmax_global 2672 2673 CHARACTER(len=*), PARAMETER :: routineN = 'collocate_pgf_product_rspace', & 2674 routineP = moduleN//':'//routineN 2675 2676 INTEGER :: cmax, gridbounds(2, 3), i, ico, icoef, ider1, ider2, ig, ithread_l, & 2677 jco, k, l, la_max_local, la_min_local, lb_max_local, lb_min_local, & 2678 length, lxa, lxb, lxy, lxyz, lya, lyb, & 2679 lza, lzb, o1_local, o2_local, offset, start 2680 INTEGER, DIMENSION(3) :: cubecenter, lb_cube, ng, & 2681 ub_cube 2682 INTEGER, DIMENSION(:), POINTER :: sphere_bounds 2683 LOGICAL :: my_collocate_rho0, & 2684 my_map_consistent, subpatch_collocate 2685 REAL(KIND=dp) :: a, b, binomial_k_lxa, binomial_l_lxb, cutoff, f, pg, & 2686 prefactor, radius, rpg, zetp 2687 REAL(KIND=dp), DIMENSION(3) :: dr, rap, rb, rbp, roffset, rp 2688 REAL(KIND=dp), DIMENSION(:, :), POINTER :: pab_local 2689 REAL(KIND=dp), DIMENSION(:, :, :), & 2690 POINTER :: grid 2691 2692 INTEGER :: lxp, lyp, lzp, lp, lxpm, iaxis 2693 INTEGER, ALLOCATABLE, DIMENSION(:, :) :: map 2694 REAL(kind=dp) :: p_ele 2695 REAL(kind=dp), DIMENSION(0:lmax_global*2, 0:lmax_global, 0:lmax_global, 3) :: alpha 2696 REAL(kind=dp), DIMENSION(((lmax_global*2 + 1)*(lmax_global*2 + 2)*(lmax_global*2 + 3))/6) :: coef_xyz 2697 REAL(kind=dp), DIMENSION(((lmax_global*2 + 1)*(lmax_global*2 + 2))/2) :: coef_xyt 2698 REAL(kind=dp), DIMENSION(0:lmax_global*2) :: coef_xtt 2699 2700 REAL(kind=dp), ALLOCATABLE, DIMENSION(:, :, :) :: pol_z 2701 REAL(kind=dp), ALLOCATABLE, DIMENSION(:, :, :) :: pol_y 2702 REAL(kind=dp), ALLOCATABLE, DIMENSION(:, :) :: pol_x 2703 REAL(KIND=dp) :: t_exp_1, t_exp_2, t_exp_min_1, t_exp_min_2, t_exp_plus_1, t_exp_plus_2 2704 2705 REAL(kind=dp), POINTER, DIMENSION(:, :, :) :: grid_tmp, gauge 2706 2707 subpatch_collocate = .FALSE. 2708 2709 IF (PRESENT(use_subpatch)) THEN 2710 IF (use_subpatch) THEN 2711 subpatch_collocate = .TRUE. 2712 CPASSERT(PRESENT(subpatch_pattern)) 2713 ENDIF 2714 ENDIF 2715 2716 IF (PRESENT(ithread)) THEN 2717 ithread_l = ithread 2718 ELSE 2719 ithread_l = 0 2720 ENDIF 2721 2722 ! use identical radii for integrate and collocate ? 2723 IF (PRESENT(map_consistent)) THEN 2724 my_map_consistent = map_consistent 2725 ELSE 2726 my_map_consistent = .FALSE. 2727 ENDIF 2728 2729 IF (PRESENT(collocate_rho0) .AND. PRESENT(rpgf0_s)) THEN 2730 my_collocate_rho0 = collocate_rho0 2731 ELSE 2732 my_collocate_rho0 = .FALSE. 2733 END IF 2734 2735 zetp = zeta + zetb 2736 f = zetb/zetp 2737 rap(:) = f*rab(:) 2738 rbp(:) = rap(:) - rab(:) 2739 rp(:) = ra(:) + rap(:) 2740 rb(:) = ra(:) + rab(:) 2741 2742 ! check to avoid overflows 2743 a = MAXVAL(ABS(rsgrid%desc%dh)) 2744 a = 300._dp/(a*a) 2745 ! CPASSERT(zetp < a) 2746 2747 IF (my_map_consistent) THEN 2748 cutoff = 1.0_dp 2749 prefactor = EXP(-zeta*f*rab2) 2750 radius = exp_radius_very_extended(la_min, la_max, lb_min, lb_max, ra=ra, rb=rb, rp=rp, & 2751 zetp=zetp, eps=eps_rho_rspace, & 2752 prefactor=prefactor, cutoff=cutoff) 2753 prefactor = scale*EXP(-zeta*f*rab2) 2754 ELSE IF (my_collocate_rho0) THEN 2755 cutoff = 0.0_dp 2756 prefactor = 1.0_dp 2757 radius = rpgf0_s 2758 ELSE 2759 cutoff = 0.0_dp 2760 prefactor = scale*EXP(-zeta*f*rab2) 2761 radius = exp_radius_very_extended(la_min, la_max, lb_min, lb_max, pab, o1, o2, ra, rb, rp, & 2762 zetp, eps_rho_rspace, prefactor, cutoff) 2763 ENDIF 2764 2765 a = MAXVAL(ABS(rsgrid%desc%dh)) 2766 IF (radius .LT. a/2.0_dp) THEN 2767 RETURN 2768 END IF 2769 2770 ! it's a choice to compute lX_min/max, pab here, 2771 ! this way we get the same radius as we use for the corresponding density 2772 SELECT CASE (ga_gb_function) 2773 CASE (FUNC_DADB) 2774 la_max_local = la_max + 1 2775 la_min_local = MAX(la_min - 1, 0) 2776 lb_max_local = lb_max + 1 2777 lb_min_local = MAX(lb_min - 1, 0) 2778 ! create a new pab_local so that mapping pab_local with pgf_a pgf_b 2779 ! is equivalent to mapping pab with 0.5 * (nabla pgf_a) . (nabla pgf_b) 2780 ! (ddx pgf_a ) (ddx pgf_b) = (lax pgf_{a-1x} - 2*zeta*pgf_{a+1x})*(lbx pgf_{b-1x} - 2*zetb*pgf_{b+1x}) 2781 ! cleaner would possibly be to touch pzyx directly (avoiding the following allocate) 2782 ALLOCATE (pab_local(ncoset(la_max_local), ncoset(lb_max_local))) 2783 pab_local = 0.0_dp 2784 DO lxa = 0, la_max 2785 DO lxb = 0, lb_max 2786 DO lya = 0, la_max - lxa 2787 DO lyb = 0, lb_max - lxb 2788 DO lza = MAX(la_min - lxa - lya, 0), la_max - lxa - lya 2789 DO lzb = MAX(lb_min - lxb - lyb, 0), lb_max - lxb - lyb 2790 2791 ! this element of pab results in 12 elements of pab_local 2792 CALL prepare_dadb(pab_local, pab, lxa, lya, lza, lxb, lyb, lzb, o1, o2, zeta, zetb) 2793 2794 ENDDO 2795 ENDDO 2796 ENDDO 2797 ENDDO 2798 ENDDO 2799 ENDDO 2800 o1_local = 0 2801 o2_local = 0 2802 pab_local = pab_local*0.5_dp 2803 CASE (FUNC_ADBmDAB) 2804 CPASSERT(PRESENT(idir)) 2805 la_max_local = la_max + 1 2806 la_min_local = MAX(la_min - 1, 0) 2807 lb_max_local = lb_max + 1 2808 lb_min_local = MAX(lb_min - 1, 0) 2809 ! create a new pab_local so that mapping pab_local with pgf_a pgf_b 2810 ! is equivalent to mapping pab with 2811 ! pgf_a (nabla_{idir} pgf_b) - (nabla_{idir} pgf_a) pgf_b 2812 ! ( pgf_a ) (ddx pgf_b) - (ddx pgf_a)( pgf_b ) = 2813 ! pgf_a *(lbx pgf_{b-1x} - 2*zetb*pgf_{b+1x}) - 2814 ! (lax pgf_{a-1x} - 2*zeta*pgf_{a+1x}) pgf_b 2815 2816 ALLOCATE (pab_local(ncoset(la_max_local), ncoset(lb_max_local))) 2817 pab_local = 0.0_dp 2818 DO lxa = 0, la_max 2819 DO lxb = 0, lb_max 2820 DO lya = 0, la_max - lxa 2821 DO lyb = 0, lb_max - lxb 2822 DO lza = MAX(la_min - lxa - lya, 0), la_max - lxa - lya 2823 DO lzb = MAX(lb_min - lxb - lyb, 0), lb_max - lxb - lyb 2824 ! this element of pab results in 4 elements of pab_local 2825 CALL prepare_adb_m_dab(pab_local, pab, idir, & 2826 lxa, lya, lza, lxb, lyb, lzb, o1, o2, zeta, zetb) 2827 END DO 2828 END DO 2829 END DO 2830 END DO 2831 END DO 2832 END DO 2833 o1_local = 0 2834 o2_local = 0 2835 CASE (FUNC_DABpADB) 2836 CPASSERT(PRESENT(idir)) 2837 la_max_local = la_max + 1 2838 la_min_local = MAX(la_min - 1, 0) 2839 lb_max_local = lb_max + 1 2840 lb_min_local = MAX(lb_min - 1, 0) 2841 ! create a new pab_local so that mapping pab_local with pgf_a pgf_b 2842 ! is equivalent to mapping pab with 2843 ! pgf_a (nabla_{idir} pgf_b) + (nabla_{idir} pgf_a) pgf_b 2844 ! ( pgf_a ) (ddx pgf_b) + (ddx pgf_a)( pgf_b ) = 2845 ! pgf_a *(lbx pgf_{b-1x} + 2*zetb*pgf_{b+1x}) + 2846 ! (lax pgf_{a-1x} + 2*zeta*pgf_{a+1x}) pgf_b 2847 2848 ALLOCATE (pab_local(ncoset(la_max_local), ncoset(lb_max_local))) 2849 pab_local = 0.0_dp 2850 DO lxa = 0, la_max 2851 DO lxb = 0, lb_max 2852 DO lya = 0, la_max - lxa 2853 DO lyb = 0, lb_max - lxb 2854 DO lza = MAX(la_min - lxa - lya, 0), la_max - lxa - lya 2855 DO lzb = MAX(lb_min - lxb - lyb, 0), lb_max - lxb - lyb 2856 ! this element of pab results in 4 elements of pab_local 2857 CALL prepare_dab_p_adb(pab_local, pab, idir, & 2858 lxa, lya, lza, lxb, lyb, lzb, o1, o2, zeta, zetb) 2859 END DO 2860 END DO 2861 END DO 2862 END DO 2863 END DO 2864 END DO 2865 o1_local = 0 2866 o2_local = 0 2867 CASE (FUNC_DX, FUNC_DY, FUNC_DZ) 2868 ider1 = ga_gb_function - 500 2869 la_max_local = la_max + 1 2870 la_min_local = MAX(la_min - 1, 0) 2871 lb_max_local = lb_max + 1 2872 lb_min_local = MAX(lb_min - 1, 0) 2873 ! create a new pab_local so that mapping pab_local with pgf_a pgf_b 2874 ! is equivalent to mapping pab with 2875 ! d_{ider1} pgf_a d_{ider1} pgf_b 2876 ! dx pgf_a dx pgf_b = 2877 ! (lax pgf_{a-1x})*(lbx pgf_{b-1x}) - 2*zetb*lax*pgf_{a-1x}*pgf_{b+1x} - 2878 ! lbx pgf_{b-1x}*2*zeta*pgf_{a+1x}+ 4*zeta*zetab*pgf_{a+1x}pgf_{b+1x} 2879 2880 ALLOCATE (pab_local(ncoset(la_max_local), ncoset(lb_max_local))) 2881 pab_local = 0.0_dp 2882 DO lxa = 0, la_max 2883 DO lxb = 0, lb_max 2884 DO lya = 0, la_max - lxa 2885 DO lyb = 0, lb_max - lxb 2886 DO lza = MAX(la_min - lxa - lya, 0), la_max - lxa - lya 2887 DO lzb = MAX(lb_min - lxb - lyb, 0), lb_max - lxb - lyb 2888 ! this element of pab results in 4 elements of pab_local 2889 CALL prepare_dIadIb(pab_local, pab, ider1, & 2890 lxa, lya, lza, lxb, lyb, lzb, o1, o2, zeta, zetb) 2891 END DO 2892 END DO 2893 END DO 2894 END DO 2895 END DO 2896 END DO 2897 o1_local = 0 2898 o2_local = 0 2899 CASE (FUNC_DXDY, FUNC_DYDZ, FUNC_DZDX) 2900 ider1 = ga_gb_function - 600 2901 ider2 = ga_gb_function - 600 + 1 2902 IF (ider2 > 3) ider2 = ider1 - 2 2903 la_max_local = la_max + 2 2904 la_min_local = MAX(la_min - 2, 0) 2905 lb_max_local = lb_max + 2 2906 lb_min_local = MAX(lb_min - 2, 0) 2907 ! create a new pab_local so that mapping pab_local with pgf_a pgf_b 2908 ! is equivalent to mapping pab with 2909 ! d_{ider1} pgf_a d_{ider1} pgf_b 2910 ALLOCATE (pab_local(ncoset(la_max_local), ncoset(lb_max_local))) 2911 pab_local = 0.0_dp 2912 DO lxa = 0, la_max 2913 DO lxb = 0, lb_max 2914 DO lya = 0, la_max - lxa 2915 DO lyb = 0, lb_max - lxb 2916 DO lza = MAX(la_min - lxa - lya, 0), la_max - lxa - lya 2917 DO lzb = MAX(lb_min - lxb - lyb, 0), lb_max - lxb - lyb 2918 ! this element of pab results in 16 elements of pab_local 2919 CALL prepare_dijadijb(pab_local, pab, ider1, ider2, & 2920 lxa, lya, lza, lxb, lyb, lzb, o1, o2, zeta, zetb) 2921 END DO 2922 END DO 2923 END DO 2924 END DO 2925 END DO 2926 END DO 2927 o1_local = 0 2928 o2_local = 0 2929 CASE (FUNC_DXDX, FUNC_DYDY, FUNC_DZDZ) 2930 ider1 = ga_gb_function - 603 2931 la_max_local = la_max + 2 2932 la_min_local = MAX(la_min - 2, 0) 2933 lb_max_local = lb_max + 2 2934 lb_min_local = MAX(lb_min - 2, 0) 2935 ! create a new pab_local so that mapping pab_local with pgf_a pgf_b 2936 ! is equivalent to mapping pab with 2937 ! dd_{ider1} pgf_a dd_{ider1} pgf_b 2938 2939 ALLOCATE (pab_local(ncoset(la_max_local), ncoset(lb_max_local))) 2940 pab_local = 0.0_dp 2941 DO lxa = 0, la_max 2942 DO lxb = 0, lb_max 2943 DO lya = 0, la_max - lxa 2944 DO lyb = 0, lb_max - lxb 2945 DO lza = MAX(la_min - lxa - lya, 0), la_max - lxa - lya 2946 DO lzb = MAX(lb_min - lxb - lyb, 0), lb_max - lxb - lyb 2947 ! this element of pab results in 9 elements of pab_local 2948 CALL prepare_diiadiib(pab_local, pab, ider1, & 2949 lxa, lya, lza, lxb, lyb, lzb, o1, o2, zeta, zetb) 2950 END DO 2951 END DO 2952 END DO 2953 END DO 2954 END DO 2955 END DO 2956 o1_local = 0 2957 o2_local = 0 2958 CASE (FUNC_ARDBmDARB) 2959 CPASSERT(PRESENT(idir)) 2960 CPASSERT(PRESENT(ir)) 2961 la_max_local = la_max + 1 2962 la_min_local = MAX(la_min - 1, 0) 2963 lb_max_local = lb_max + 2 2964 lb_min_local = MAX(lb_min - 1, 0) 2965 ! create a new pab_local so that mapping pab_local with pgf_a pgf_b 2966 ! is equivalent to mapping pab with 2967 ! pgf_a (r-Rb)_{ir} (nabla_{idir} pgf_b) - (nabla_{idir} pgf_a) (r-Rb)_{ir} pgf_b 2968 ! ( pgf_a )(r-Rb)_{ir} (ddx pgf_b) - (ddx pgf_a) (r-Rb)_{ir} ( pgf_b ) = 2969 ! pgf_a *(lbx pgf_{b-1x+1ir} - 2*zetb*pgf_{b+1x+1ir}) - 2970 ! (lax pgf_{a-1x} - 2*zeta*pgf_{a+1x}) pgf_{b+1ir} 2971 2972 ALLOCATE (pab_local(ncoset(la_max_local), ncoset(lb_max_local))) 2973 pab_local = 0.0_dp 2974 DO lxa = 0, la_max 2975 DO lxb = 0, lb_max 2976 DO lya = 0, la_max - lxa 2977 DO lyb = 0, lb_max - lxb 2978 DO lza = MAX(la_min - lxa - lya, 0), la_max - lxa - lya 2979 DO lzb = MAX(lb_min - lxb - lyb, 0), lb_max - lxb - lyb 2980 2981 ! this element of pab results in 4 elements of pab_local 2982 CALL prepare_ardb_m_darb(pab_local, pab, idir, ir, & 2983 lxa, lya, lza, lxb, lyb, lzb, o1, o2, zeta, zetb) 2984 END DO 2985 END DO 2986 END DO 2987 END DO 2988 END DO 2989 END DO 2990 o1_local = 0 2991 o2_local = 0 2992 CASE (FUNC_ARB) 2993 CPASSERT(PRESENT(ir)) 2994 la_max_local = la_max 2995 la_min_local = la_min 2996 lb_max_local = lb_max + 1 2997 lb_min_local = lb_min 2998 ! create a new pab_local so that mapping pab_local with pgf_a pgf_b 2999 ! is equivalent to mapping pab with 3000 ! pgf_a (r-Rb)_{ir} pgf_b = pgf_a * pgf_{b+1ir} 3001 ALLOCATE (pab_local(ncoset(la_max_local), ncoset(lb_max_local))) 3002 pab_local = 0.0_dp 3003 DO lxa = 0, la_max 3004 DO lxb = 0, lb_max 3005 DO lya = 0, la_max - lxa 3006 DO lyb = 0, lb_max - lxb 3007 DO lza = MAX(la_min - lxa - lya, 0), la_max - lxa - lya 3008 DO lzb = MAX(lb_min - lxb - lyb, 0), lb_max - lxb - lyb 3009 ! this element of pab results in 4 elements of pab_local 3010 CALL prepare_arb(pab_local, pab, ir, lxa, lya, lza, lxb, lyb, lzb, o1, o2) 3011 END DO 3012 END DO 3013 END DO 3014 END DO 3015 END DO 3016 END DO 3017 o1_local = 0 3018 o2_local = 0 3019 CASE (FUNC_AB) 3020 la_max_local = la_max 3021 la_min_local = la_min 3022 lb_max_local = lb_max 3023 lb_min_local = lb_min 3024 pab_local => pab 3025 o1_local = o1 3026 o2_local = o2 3027 CASE DEFAULT 3028 CPASSERT(.FALSE.) 3029 END SELECT 3030 3031 ng(:) = rsgrid%desc%npts(:) 3032 grid => rsgrid%r(:, :, :) 3033 gridbounds(1, 1) = LBOUND(GRID, 1) 3034 gridbounds(2, 1) = UBOUND(GRID, 1) 3035 gridbounds(1, 2) = LBOUND(GRID, 2) 3036 gridbounds(2, 2) = UBOUND(GRID, 2) 3037 gridbounds(1, 3) = LBOUND(GRID, 3) 3038 gridbounds(2, 3) = UBOUND(GRID, 3) 3039 3040 IF (PRESENT(ir) .AND. PRESENT(rsgauge) .AND. PRESENT(rsbuf)) THEN 3041 grid => rsbuf%r(:, :, :) 3042 grid_tmp => rsgrid%r(:, :, :) 3043 gauge => rsgauge%r(:, :, :) 3044 ENDIF 3045 3046! *** initialise the coefficient matrix, we transform the sum 3047! 3048! sum_{lxa,lya,lza,lxb,lyb,lzb} P_{lxa,lya,lza,lxb,lyb,lzb} * 3049! (x-a_x)**lxa (y-a_y)**lya (z-a_z)**lza (x-b_x)**lxb (y-a_y)**lya (z-a_z)**lza 3050! 3051! into 3052! 3053! sum_{lxp,lyp,lzp} P_{lxp,lyp,lzp} (x-p_x)**lxp (y-p_y)**lyp (z-p_z)**lzp 3054! 3055! where p is center of the product gaussian, and lp = la_max + lb_max 3056! (current implementation is l**7) 3057! 3058 lp = la_max_local + lb_max_local 3059! 3060! compute polynomial expansion coefs -> (x-a)**lxa (x-b)**lxb -> sum_{ls} alpha(ls,lxa,lxb,1)*(x-p)**ls 3061! 3062! *** make the alpha matrix *** 3063 alpha(:, :, :, :) = 0.0_dp 3064 DO iaxis = 1, 3 3065 DO lxa = 0, la_max_local 3066 DO lxb = 0, lb_max_local 3067 binomial_k_lxa = 1.0_dp 3068 a = 1.0_dp 3069 DO k = 0, lxa 3070 binomial_l_lxb = 1.0_dp 3071 b = 1.0_dp 3072 DO l = 0, lxb 3073 alpha(lxa - l + lxb - k, lxa, lxb, iaxis) = alpha(lxa - l + lxb - k, lxa, lxb, iaxis) + & 3074 binomial_k_lxa*binomial_l_lxb*a*b 3075 binomial_l_lxb = binomial_l_lxb*REAL(lxb - l, dp)/REAL(l + 1, dp) 3076 b = b*(rp(iaxis) - (ra(iaxis) + rab(iaxis))) 3077 ENDDO 3078 binomial_k_lxa = binomial_k_lxa*REAL(lxa - k, dp)/REAL(k + 1, dp) 3079 a = a*(-ra(iaxis) + rp(iaxis)) 3080 ENDDO 3081 ENDDO 3082 ENDDO 3083 ENDDO 3084 3085! 3086! compute P_{lxp,lyp,lzp} given P_{lxa,lya,lza,lxb,lyb,lzb} and alpha(ls,lxa,lxb,1) 3087! use a three step procedure 3088! we don't store zeros, so counting is done using lxyz,lxy in order to have contiguous memory access in collocate_fast.F 3089! 3090 lxyz = 0 3091 DO lzp = 0, lp 3092 DO lyp = 0, lp - lzp 3093 DO lxp = 0, lp - lzp - lyp 3094 lxyz = lxyz + 1 3095 coef_xyz(lxyz) = 0.0_dp 3096 ENDDO 3097 ENDDO 3098 ENDDO 3099 DO lzb = 0, lb_max_local 3100 DO lza = 0, la_max_local 3101 lxy = 0 3102 DO lyp = 0, lp - lza - lzb 3103 DO lxp = 0, lp - lza - lzb - lyp 3104 lxy = lxy + 1 3105 coef_xyt(lxy) = 0.0_dp 3106 ENDDO 3107 lxy = lxy + lza + lzb 3108 ENDDO 3109 DO lyb = 0, lb_max_local - lzb 3110 DO lya = 0, la_max_local - lza 3111 lxpm = (lb_max_local - lzb - lyb) + (la_max_local - lza - lya) 3112 coef_xtt(0:lxpm) = 0.0_dp 3113 DO lxb = MAX(lb_min_local - lzb - lyb, 0), lb_max_local - lzb - lyb 3114 DO lxa = MAX(la_min_local - lza - lya, 0), la_max_local - lza - lya 3115 ico = coset(lxa, lya, lza) 3116 jco = coset(lxb, lyb, lzb) 3117 p_ele = prefactor*pab_local(o1_local + ico, o2_local + jco) 3118 DO lxp = 0, lxa + lxb 3119 coef_xtt(lxp) = coef_xtt(lxp) + p_ele*alpha(lxp, lxa, lxb, 1) 3120 ENDDO 3121 ENDDO 3122 ENDDO 3123 lxy = 0 3124 DO lyp = 0, lya + lyb 3125 DO lxp = 0, lp - lza - lzb - lya - lyb 3126 lxy = lxy + 1 3127 coef_xyt(lxy) = coef_xyt(lxy) + alpha(lyp, lya, lyb, 2)*coef_xtt(lxp) 3128 ENDDO 3129 lxy = lxy + lza + lzb + lya + lyb - lyp 3130 ENDDO 3131 ENDDO 3132 ENDDO 3133 lxyz = 0 3134 DO lzp = 0, lza + lzb 3135 lxy = 0 3136 DO lyp = 0, lp - lza - lzb 3137 DO lxp = 0, lp - lza - lzb - lyp 3138 lxy = lxy + 1; lxyz = lxyz + 1 3139 coef_xyz(lxyz) = coef_xyz(lxyz) + alpha(lzp, lza, lzb, 3)*coef_xyt(lxy) 3140 ENDDO 3141 lxy = lxy + lza + lzb; lxyz = lxyz + lza + lzb - lzp 3142 ENDDO 3143 DO lyp = lp - lza - lzb + 1, lp - lzp 3144 DO lxp = 0, lp - lyp - lzp 3145 lxyz = lxyz + 1 3146 ENDDO 3147 ENDDO 3148 ENDDO 3149 ENDDO 3150 ENDDO 3151 3152 IF (subpatch_collocate) THEN 3153 CALL collocate_general_subpatch() 3154 ELSE 3155 IF (rsgrid%desc%orthorhombic) THEN 3156 CALL collocate_ortho() 3157 ! CALL collocate_general() 3158 ELSE 3159 CALL collocate_general_wings() 3160 !CALL collocate_general_opt() 3161 END IF 3162 END IF 3163 3164 IF (ga_gb_function /= FUNC_AB) THEN 3165 DEALLOCATE (pab_local) 3166 ENDIF 3167 3168 CONTAINS 3169 3170 ! 3171 ! this treats efficiently the orthogonal case 3172 ! 3173! ************************************************************************************************** 3174!> \brief ... 3175! ************************************************************************************************** 3176 SUBROUTINE collocate_ortho() 3177 3178! *** properties of the grid *** 3179 3180 ! notice we're in the ortho case 3181 dr(1) = rsgrid%desc%dh(1, 1) 3182 dr(2) = rsgrid%desc%dh(2, 2) 3183 dr(3) = rsgrid%desc%dh(3, 3) 3184 3185! *** get the sub grid properties for the given radius *** 3186 CALL return_cube(cube_info, radius, lb_cube, ub_cube, sphere_bounds) 3187 cmax = MAXVAL(ub_cube) 3188 3189! *** position of the gaussian product 3190! 3191! this is the actual definition of the position on the grid 3192! i.e. a point rp(:) gets here grid coordinates 3193! MODULO(rp(:)/dr(:),ng(:))+1 3194! hence (0.0,0.0,0.0) in real space is rsgrid%lb on the rsgrid ((1,1,1) on grid) 3195! 3196 3197 ALLOCATE (map(-cmax:cmax, 3)) 3198 CALL compute_cube_center(cubecenter, rsgrid%desc, zeta, zetb, ra, rab) 3199 roffset(:) = rp(:) - REAL(cubecenter(:), dp)*dr(:) 3200! *** a mapping so that the ig corresponds to the right grid point 3201 DO i = 1, 3 3202 IF (rsgrid%desc%perd(i) == 1) THEN 3203 start = lb_cube(i) 3204 DO 3205 offset = MODULO(cubecenter(i) + start, ng(i)) + 1 - start 3206 length = MIN(ub_cube(i), ng(i) - offset) - start 3207 DO ig = start, start + length 3208 map(ig, i) = ig + offset 3209 END DO 3210 IF (start + length .GE. ub_cube(i)) EXIT 3211 start = start + length + 1 3212 END DO 3213 ELSE 3214 ! this takes partial grid + border regions into account 3215 offset = MODULO(cubecenter(i) + lb_cube(i) + rsgrid%desc%lb(i) - rsgrid%lb_local(i), ng(i)) + 1 - lb_cube(i) 3216 ! check for out of bounds 3217 IF (ub_cube(i) + offset > UBOUND(grid, i) .OR. lb_cube(i) + offset < LBOUND(grid, i)) THEN 3218 CPASSERT(.FALSE.) 3219 ENDIF 3220 DO ig = lb_cube(i), ub_cube(i) 3221 map(ig, i) = ig + offset 3222 END DO 3223 END IF 3224 ENDDO 3225 ALLOCATE (pol_z(1:2, 0:lp, -cmax:0)) 3226 ALLOCATE (pol_y(1:2, 0:lp, -cmax:0)) 3227 ALLOCATE (pol_x(0:lp, -cmax:cmax)) 3228 3229 IF (PRESENT(ir) .AND. PRESENT(rsgauge)) CALL collocate_ortho_set_to_0() 3230 3231#include "prep.f90" 3232 3233 IF (PRESENT(lgrid)) THEN 3234#include "call_collocate_omp.f90" 3235 ELSE 3236#include "call_collocate.f90" 3237 END IF 3238 3239 IF (PRESENT(ir) .AND. PRESENT(rsgauge)) CALL collocate_gauge_ortho() 3240 3241 ! deallocation needed to pass around a pgi bug.. 3242 DEALLOCATE (pol_z) 3243 DEALLOCATE (pol_y) 3244 DEALLOCATE (pol_x) 3245 DEALLOCATE (map) 3246 3247 END SUBROUTINE collocate_ortho 3248 3249! ************************************************************************************************** 3250!> \brief ... 3251! ************************************************************************************************** 3252 SUBROUTINE collocate_gauge_ortho() 3253 INTEGER :: i, igmax, igmin, j, j2, jg, jg2, jgmin, & 3254 k, k2, kg, kg2, kgmin, sci 3255 REAL(KIND=dp) :: point(3, 4), res(4), x, y, y2, z, z2 3256 3257! notice we're in the ortho case 3258 3259 dr(1) = rsgrid%desc%dh(1, 1) 3260 dr(2) = rsgrid%desc%dh(2, 2) 3261 dr(3) = rsgrid%desc%dh(3, 3) 3262 ! 3263 sci = 1 3264 kgmin = sphere_bounds(sci) 3265 sci = sci + 1 3266 DO kg = kgmin, 0 3267 kg2 = 1 - kg 3268 k = map(kg, 3) 3269 k2 = map(kg2, 3) 3270 jgmin = sphere_bounds(sci) 3271 sci = sci + 1 3272 z = (REAL(kg, dp) + REAL(cubecenter(3), dp))*dr(3) 3273 z2 = (REAL(kg2, dp) + REAL(cubecenter(3), dp))*dr(3) 3274 DO jg = jgmin, 0 3275 jg2 = 1 - jg 3276 j = map(jg, 2) 3277 j2 = map(jg2, 2) 3278 igmin = sphere_bounds(sci) 3279 sci = sci + 1 3280 igmax = 1 - igmin 3281 y = (REAL(jg, dp) + REAL(cubecenter(2), dp))*dr(2) 3282 y2 = (REAL(jg2, dp) + REAL(cubecenter(2), dp))*dr(2) 3283 DO ig = igmin, igmax 3284 i = map(ig, 1) 3285 x = (REAL(ig, dp) + REAL(cubecenter(1), dp))*dr(1) 3286 point(1, 1) = x; point(2, 1) = y; point(3, 1) = z 3287 point(1, 2) = x; point(2, 2) = y2; point(3, 2) = z 3288 point(1, 3) = x; point(2, 3) = y; point(3, 3) = z2 3289 point(1, 4) = x; point(2, 4) = y2; point(3, 4) = z2 3290 ! 3291 res(1) = (point(ir, 1) - rb(ir)) - gauge(i, j, k) 3292 res(2) = (point(ir, 2) - rb(ir)) - gauge(i, j2, k) 3293 res(3) = (point(ir, 3) - rb(ir)) - gauge(i, j, k2) 3294 res(4) = (point(ir, 4) - rb(ir)) - gauge(i, j2, k2) 3295 ! 3296 grid_tmp(i, j, k) = grid_tmp(i, j, k) + grid(i, j, k)*res(1) 3297 grid_tmp(i, j2, k) = grid_tmp(i, j2, k) + grid(i, j2, k)*res(2) 3298 grid_tmp(i, j, k2) = grid_tmp(i, j, k2) + grid(i, j, k2)*res(3) 3299 grid_tmp(i, j2, k2) = grid_tmp(i, j2, k2) + grid(i, j2, k2)*res(4) 3300 ENDDO 3301 ENDDO 3302 ENDDO 3303 END SUBROUTINE collocate_gauge_ortho 3304 3305! ************************************************************************************************** 3306!> \brief ... 3307! ************************************************************************************************** 3308 SUBROUTINE collocate_ortho_set_to_0() 3309 INTEGER :: i, igmax, igmin, j, j2, jg, jg2, jgmin, & 3310 k, k2, kg, kg2, kgmin, sci 3311 3312! 3313 3314 dr(1) = rsgrid%desc%dh(1, 1) 3315 dr(2) = rsgrid%desc%dh(2, 2) 3316 dr(3) = rsgrid%desc%dh(3, 3) 3317 ! 3318 sci = 1 3319 kgmin = sphere_bounds(sci) 3320 sci = sci + 1 3321 DO kg = kgmin, 0 3322 kg2 = 1 - kg 3323 k = map(kg, 3) 3324 k2 = map(kg2, 3) 3325 jgmin = sphere_bounds(sci) 3326 sci = sci + 1 3327 DO jg = jgmin, 0 3328 jg2 = 1 - jg 3329 j = map(jg, 2) 3330 j2 = map(jg2, 2) 3331 igmin = sphere_bounds(sci) 3332 sci = sci + 1 3333 igmax = 1 - igmin 3334 DO ig = igmin, igmax 3335 i = map(ig, 1) 3336 grid(i, j, k) = 0.0_dp 3337 grid(i, j2, k) = 0.0_dp 3338 grid(i, j, k2) = 0.0_dp 3339 grid(i, j2, k2) = 0.0_dp 3340 ENDDO 3341 ENDDO 3342 ENDDO 3343 END SUBROUTINE collocate_ortho_set_to_0 3344 3345! 3346! this is a general 'optimized' routine to do the collocation 3347! 3348! ************************************************************************************************** 3349!> \brief ... 3350! ************************************************************************************************** 3351 SUBROUTINE collocate_general_opt() 3352 3353 INTEGER :: i, i_index, il, ilx, ily, ilz, index_max(3), index_min(3), ismax, ismin, j, & 3354 j_index, jl, jlx, jly, jlz, k, k_index, kl, klx, kly, klz, lpx, lpy, lpz, lx, ly, lz, & 3355 offset(3) 3356 INTEGER, ALLOCATABLE, DIMENSION(:) :: grid_map 3357 INTEGER, ALLOCATABLE, DIMENSION(:, :, :) :: coef_map 3358 REAL(KIND=dp) :: a, b, c, d, di, dip, dj, djp, dk, dkp, & 3359 exp0i, exp1i, exp2i, gp(3), & 3360 hmatgrid(3, 3), pointj(3), pointk(3), & 3361 res, v(3) 3362 REAL(KIND=dp), ALLOCATABLE, DIMENSION(:) :: coef_ijk 3363 REAL(KIND=dp), ALLOCATABLE, DIMENSION(:, :, :) :: hmatgridp 3364 3365! 3366! transform P_{lxp,lyp,lzp} into a P_{lip,ljp,lkp} such that 3367! sum_{lxp,lyp,lzp} P_{lxp,lyp,lzp} (x-x_p)**lxp (y-y_p)**lyp (z-z_p)**lzp = 3368! sum_{lip,ljp,lkp} P_{lip,ljp,lkp} (i-i_p)**lip (j-j_p)**ljp (k-k_p)**lkp 3369! 3370 3371 ALLOCATE (coef_ijk(((lp + 1)*(lp + 2)*(lp + 3))/6)) 3372 3373 ! aux mapping array to simplify life 3374 ALLOCATE (coef_map(0:lp, 0:lp, 0:lp)) 3375 coef_map = HUGE(coef_map) 3376 lxyz = 0 3377 DO lzp = 0, lp 3378 DO lyp = 0, lp - lzp 3379 DO lxp = 0, lp - lzp - lyp 3380 lxyz = lxyz + 1 3381 coef_ijk(lxyz) = 0.0_dp 3382 coef_map(lxp, lyp, lzp) = lxyz 3383 ENDDO 3384 ENDDO 3385 ENDDO 3386 3387 ! cell hmat in grid points 3388 hmatgrid = rsgrid%desc%dh 3389 3390 ! center in grid coords 3391 gp = MATMUL(rsgrid%desc%dh_inv, rp) 3392 cubecenter(:) = FLOOR(gp) 3393 3394 ! transform using multinomials 3395 ALLOCATE (hmatgridp(3, 3, 0:lp)) 3396 hmatgridp(:, :, 0) = 1.0_dp 3397 DO k = 1, lp 3398 hmatgridp(:, :, k) = hmatgridp(:, :, k - 1)*hmatgrid(:, :) 3399 ENDDO 3400 3401 lpx = lp 3402 DO klx = 0, lpx 3403 DO jlx = 0, lpx - klx 3404 DO ilx = 0, lpx - klx - jlx 3405 lx = ilx + jlx + klx 3406 lpy = lp - lx 3407 DO kly = 0, lpy 3408 DO jly = 0, lpy - kly 3409 DO ily = 0, lpy - kly - jly 3410 ly = ily + jly + kly 3411 lpz = lp - lx - ly 3412 DO klz = 0, lpz 3413 DO jlz = 0, lpz - klz 3414 DO ilz = 0, lpz - klz - jlz 3415 lz = ilz + jlz + klz 3416 3417 il = ilx + ily + ilz 3418 jl = jlx + jly + jlz 3419 kl = klx + kly + klz 3420 coef_ijk(coef_map(il, jl, kl)) = & 3421 coef_ijk(coef_map(il, jl, kl)) + coef_xyz(coef_map(lx, ly, lz))* & 3422 hmatgridp(1, 1, ilx)*hmatgridp(1, 2, jlx)*hmatgridp(1, 3, klx)* & 3423 hmatgridp(2, 1, ily)*hmatgridp(2, 2, jly)*hmatgridp(2, 3, kly)* & 3424 hmatgridp(3, 1, ilz)*hmatgridp(3, 2, jlz)*hmatgridp(3, 3, klz)* & 3425 fac(lx)*fac(ly)*fac(lz)/ & 3426 (fac(ilx)*fac(ily)*fac(ilz)*fac(jlx)*fac(jly)*fac(jlz)*fac(klx)*fac(kly)*fac(klz)) 3427 ENDDO 3428 ENDDO 3429 ENDDO 3430 ENDDO 3431 ENDDO 3432 ENDDO 3433 ENDDO 3434 ENDDO 3435 ENDDO 3436 3437 CALL return_cube_nonortho(cube_info, radius, index_min, index_max, rp) 3438 3439 offset(:) = MODULO(index_min(:) + rsgrid%desc%lb(:) - rsgrid%lb_local(:), ng(:)) + 1 3440 3441 ALLOCATE (grid_map(index_min(1):index_max(1))) 3442 DO i = index_min(1), index_max(1) 3443 grid_map(i) = MODULO(i, ng(1)) + 1 3444 IF (rsgrid%desc%perd(1) == 1) THEN 3445 grid_map(i) = MODULO(i, ng(1)) + 1 3446 ELSE 3447 grid_map(i) = i - index_min(1) + offset(1) 3448 ENDIF 3449 ENDDO 3450 3451 ! go over the grid, but cycle if the point is not within the radius 3452 DO k = index_min(3), index_max(3) 3453 dk = k - gp(3) 3454 pointk = hmatgrid(:, 3)*dk 3455 3456 IF (rsgrid%desc%perd(3) == 1) THEN 3457 k_index = MODULO(k, ng(3)) + 1 3458 ELSE 3459 k_index = k - index_min(3) + offset(3) 3460 ENDIF 3461 3462 coef_xyt = 0.0_dp 3463 lxyz = 0 3464 dkp = 1.0_dp 3465 DO kl = 0, lp 3466 lxy = 0 3467 DO jl = 0, lp - kl 3468 DO il = 0, lp - kl - jl 3469 lxyz = lxyz + 1; lxy = lxy + 1 3470 coef_xyt(lxy) = coef_xyt(lxy) + coef_ijk(lxyz)*dkp 3471 ENDDO 3472 lxy = lxy + kl 3473 ENDDO 3474 dkp = dkp*dk 3475 ENDDO 3476 3477 DO j = index_min(2), index_max(2) 3478 dj = j - gp(2) 3479 pointj = pointk + hmatgrid(:, 2)*dj 3480 IF (rsgrid%desc%perd(2) == 1) THEN 3481 j_index = MODULO(j, ng(2)) + 1 3482 ELSE 3483 j_index = j - index_min(2) + offset(2) 3484 ENDIF 3485 3486 coef_xtt = 0.0_dp 3487 lxy = 0 3488 djp = 1.0_dp 3489 DO jl = 0, lp 3490 DO il = 0, lp - jl 3491 lxy = lxy + 1 3492 coef_xtt(il) = coef_xtt(il) + coef_xyt(lxy)*djp 3493 ENDDO 3494 djp = djp*dj 3495 ENDDO 3496 3497 ! find bounds for the inner loop 3498 ! based on a quadratic equation in i 3499 ! a*i**2+b*i+c=radius**2 3500 v = pointj - gp(1)*hmatgrid(:, 1) 3501 a = DOT_PRODUCT(hmatgrid(:, 1), hmatgrid(:, 1)) 3502 b = 2*DOT_PRODUCT(v, hmatgrid(:, 1)) 3503 c = DOT_PRODUCT(v, v) 3504 d = b*b - 4*a*(c - radius**2) 3505 3506 IF (d < 0) THEN 3507 CYCLE 3508 ELSE 3509 d = SQRT(d) 3510 ismin = CEILING((-b - d)/(2*a)) 3511 ismax = FLOOR((-b + d)/(2*a)) 3512 ENDIF 3513 ! prepare for computing -zetp*rsq 3514 a = -zetp*a 3515 b = -zetp*b 3516 c = -zetp*c 3517 i = ismin - 1 3518 3519 ! the recursion relation might have to be done 3520 ! from the center of the gaussian (in both directions) 3521 ! instead as the current implementation from an edge 3522 exp2i = EXP((a*i + b)*i + c) 3523 exp1i = EXP(2*a*i + a + b) 3524 exp0i = EXP(2*a) 3525 3526 DO i = ismin, ismax 3527 di = i - gp(1) 3528 3529 ! polynomial terms 3530 res = 0.0_dp 3531 dip = 1.0_dp 3532 DO il = 0, lp 3533 res = res + coef_xtt(il)*dip 3534 dip = dip*di 3535 ENDDO 3536 3537 ! the exponential recursion 3538 exp2i = exp2i*exp1i 3539 exp1i = exp1i*exp0i 3540 res = res*exp2i 3541 3542 i_index = grid_map(i) 3543 IF (PRESENT(lgrid)) THEN 3544 ig = (k_index - 1)*ng(2)*ng(1) + (j_index - 1)*ng(1) + (i_index - 1) + 1 3545 lgrid%r(ig, ithread_l) = lgrid%r(ig, ithread_l) + res 3546 ELSE 3547 grid(i_index, j_index, k_index) = grid(i_index, j_index, k_index) + res 3548 ENDIF 3549 ENDDO 3550 ENDDO 3551 ENDDO 3552 !t2=nanotime_ia32() 3553 !write(*,*) t2-t1 3554 ! deallocation needed to pass around a pgi bug.. 3555 DEALLOCATE (coef_ijk) 3556 DEALLOCATE (coef_map) 3557 DEALLOCATE (hmatgridp) 3558 DEALLOCATE (grid_map) 3559 3560 END SUBROUTINE collocate_general_opt 3561 3562! ************************************************************************************************** 3563!> \brief ... 3564! ************************************************************************************************** 3565 SUBROUTINE collocate_general_subpatch() 3566 INTEGER, DIMENSION(2, 3) :: local_b 3567 INTEGER, DIMENSION(3) :: local_s, periodic 3568 REAL(dp), DIMENSION((lp+1)*(lp+2)*(lp+3)/6) :: poly_d3 3569 3570 periodic = 1 ! cell%perd 3571 CALL poly_cp2k2d3(coef_xyz, lp, poly_d3) 3572 local_b(1, :) = rsgrid%lb_real - rsgrid%desc%lb 3573 local_b(2, :) = rsgrid%ub_real - rsgrid%desc%lb 3574 local_s = rsgrid%lb_real - rsgrid%lb_local 3575 IF (BTEST(subpatch_pattern, 0)) local_b(1, 1) = local_b(1, 1) - rsgrid%desc%border 3576 IF (BTEST(subpatch_pattern, 1)) local_b(2, 1) = local_b(2, 1) + rsgrid%desc%border 3577 IF (BTEST(subpatch_pattern, 2)) local_b(1, 2) = local_b(1, 2) - rsgrid%desc%border 3578 IF (BTEST(subpatch_pattern, 3)) local_b(2, 2) = local_b(2, 2) + rsgrid%desc%border 3579 IF (BTEST(subpatch_pattern, 4)) local_b(1, 3) = local_b(1, 3) - rsgrid%desc%border 3580 IF (BTEST(subpatch_pattern, 5)) local_b(2, 3) = local_b(2, 3) + rsgrid%desc%border 3581 IF (BTEST(subpatch_pattern, 0)) local_s(1) = local_s(1) - rsgrid%desc%border 3582 IF (BTEST(subpatch_pattern, 2)) local_s(2) = local_s(2) - rsgrid%desc%border 3583 IF (BTEST(subpatch_pattern, 4)) local_s(3) = local_s(3) - rsgrid%desc%border 3584 IF (PRESENT(lgrid)) THEN 3585 CALL collocGauss(h=cell%hmat, h_inv=cell%h_inv, & 3586 grid=grid, poly=poly_d3, alphai=zetp, posi=rp, max_r2=radius*radius, & 3587 periodic=periodic, gdim=ng, local_bounds=local_b, local_shift=local_s, & 3588 lgrid=lgrid) 3589 ELSE 3590 CALL collocGauss(h=cell%hmat, h_inv=cell%h_inv, & 3591 grid=grid, poly=poly_d3, alphai=zetp, posi=rp, max_r2=radius*radius, & 3592 periodic=periodic, gdim=ng, local_bounds=local_b, local_shift=local_s) 3593 END IF 3594 ! defaults: local_shift=(/0,0,0/),poly_shift=(/0.0_dp,0.0_dp,0.0_dp/),scale=1.0_dp, 3595 3596 END SUBROUTINE 3597 3598! ************************************************************************************************** 3599!> \brief ... 3600! ************************************************************************************************** 3601 SUBROUTINE collocate_general_wings() 3602 INTEGER, DIMENSION(2, 3) :: local_b 3603 INTEGER, DIMENSION(3) :: periodic 3604 REAL(dp), DIMENSION((lp+1)*(lp+2)*(lp+3)/6) :: poly_d3 3605 REAL(dp), DIMENSION(3) :: local_shift, rShifted 3606 3607 periodic = 1 ! cell%perd 3608 CALL poly_cp2k2d3(coef_xyz, lp, poly_d3) 3609 local_b(1, :) = 0 3610 local_b(2, :) = MIN(rsgrid%desc%npts - 1, rsgrid%ub_local - rsgrid%lb_local) 3611 local_shift = REAL(rsgrid%desc%lb - rsgrid%lb_local, dp)/REAL(rsgrid%desc%npts, dp) 3612 rShifted(1) = rp(1) + cell%hmat(1, 1)*local_shift(1) & 3613 + cell%hmat(1, 2)*local_shift(2) & 3614 + cell%hmat(1, 3)*local_shift(3) 3615 rShifted(2) = rp(2) + cell%hmat(2, 1)*local_shift(1) & 3616 + cell%hmat(2, 2)*local_shift(2) & 3617 + cell%hmat(2, 3)*local_shift(3) 3618 rShifted(3) = rp(3) + cell%hmat(3, 1)*local_shift(1) & 3619 + cell%hmat(3, 2)*local_shift(2) & 3620 + cell%hmat(3, 3)*local_shift(3) 3621 IF (PRESENT(lgrid)) THEN 3622 CALL collocGauss(h=cell%hmat, h_inv=cell%h_inv, & 3623 grid=grid, poly=poly_d3, alphai=zetp, posi=rShifted, max_r2=radius*radius, & 3624 periodic=periodic, gdim=ng, local_bounds=local_b, & 3625 lgrid=lgrid) 3626 ELSE 3627 CALL collocGauss(h=cell%hmat, h_inv=cell%h_inv, & 3628 grid=grid, poly=poly_d3, alphai=zetp, posi=rShifted, max_r2=radius*radius, & 3629 periodic=periodic, gdim=ng, local_bounds=local_b) 3630 END IF 3631 ! defaults: local_shift=(/0,0,0/),poly_shift=(/0.0_dp,0.0_dp,0.0_dp/),scale=1.0_dp, 3632 3633 END SUBROUTINE 3634 3635! 3636! this is a general 'reference' routine to do the collocation 3637! 3638! ************************************************************************************************** 3639!> \brief ... 3640! ************************************************************************************************** 3641 SUBROUTINE collocate_general() 3642 3643 INTEGER :: i, index_max(3), index_min(3), & 3644 ipoint(3), j, k 3645 REAL(KIND=dp) :: inv_ng(3), point(3), primpt 3646 3647! still hard-wired (see MODULO) 3648 3649 CPASSERT(ALL(rsgrid%desc%perd == 1)) 3650 3651 CALL return_cube_nonortho(cube_info, radius, index_min, index_max, rp) 3652 3653 inv_ng = 1.0_dp/ng 3654 3655 ! go over the grid, but cycle if the point is not within the radius 3656 DO k = index_min(3), index_max(3) 3657 DO j = index_min(2), index_max(2) 3658 DO i = index_min(1), index_max(1) 3659 ! point in real space 3660 point = MATMUL(cell%hmat, REAL((/i, j, k/), KIND=dp)*inv_ng) 3661 ! primitive_value of point 3662 primpt = primitive_value(point) 3663 ! skip if outside of the sphere 3664 IF (SUM((point - rp)**2) > radius**2) CYCLE 3665 ! point on the grid (including pbc) 3666 ipoint = MODULO((/i, j, k/), ng) + 1 3667 ! add to grid 3668 IF (PRESENT(lgrid)) THEN 3669 ig = ipoint(3)*ng(2)*ng(1) + ipoint(2)*ng(1) + ipoint(1) + 1 3670 lgrid%r(ig, ithread_l) = lgrid%r(ig, ithread_l) + primpt 3671 ELSE 3672 grid(ipoint(1), ipoint(2), ipoint(3)) = grid(ipoint(1), ipoint(2), ipoint(3)) + primpt 3673 ENDIF 3674 ENDDO 3675 ENDDO 3676 ENDDO 3677 3678 END SUBROUTINE collocate_general 3679 3680! ************************************************************************************************** 3681!> \brief ... 3682!> \param point ... 3683!> \return ... 3684! ************************************************************************************************** 3685 FUNCTION primitive_value(point) RESULT(res) 3686 REAL(KIND=dp) :: point(3), res 3687 3688 REAL(KIND=dp) :: dra(3), drap(3), drb(3), drbp(3), myexp, & 3689 pdrap 3690 3691 res = 0.0_dp 3692 myexp = EXP(-zetp*SUM((point - rp)**2))*prefactor 3693 dra = point - ra 3694 drb = point - rb 3695 drap(1) = 1.0_dp 3696 DO lxa = 0, la_max_local 3697 drbp(1) = 1.0_dp 3698 DO lxb = 0, lb_max_local 3699 drap(2) = 1.0_dp 3700 DO lya = 0, la_max_local - lxa 3701 drbp(2) = 1.0_dp 3702 DO lyb = 0, lb_max_local - lxb 3703 drap(3) = 1.0_dp 3704 DO lza = 1, MAX(la_min_local - lxa - lya, 0) 3705 drap(3) = drap(3)*dra(3) 3706 ENDDO 3707 DO lza = MAX(la_min_local - lxa - lya, 0), la_max_local - lxa - lya 3708 drbp(3) = 1.0_dp 3709 DO lzb = 1, MAX(lb_min_local - lxb - lyb, 0) 3710 drbp(3) = drbp(3)*drb(3) 3711 ENDDO 3712 ico = coset(lxa, lya, lza) 3713 pdrap = PRODUCT(drap) 3714 DO lzb = MAX(lb_min_local - lxb - lyb, 0), lb_max_local - lxb - lyb 3715 jco = coset(lxb, lyb, lzb) 3716 res = res + pab_local(ico + o1_local, jco + o2_local)*myexp*pdrap*PRODUCT(drbp) 3717 drbp(3) = drbp(3)*drb(3) 3718 ENDDO 3719 drap(3) = drap(3)*dra(3) 3720 ENDDO 3721 drbp(2) = drbp(2)*drb(2) 3722 ENDDO 3723 drap(2) = drap(2)*dra(2) 3724 ENDDO 3725 drbp(1) = drbp(1)*drb(1) 3726 ENDDO 3727 drap(1) = drap(1)*dra(1) 3728 ENDDO 3729 3730 END FUNCTION primitive_value 3731 3732 END SUBROUTINE collocate_pgf_product_rspace 3733 3734! ************************************************************************************************** 3735!> \brief low level collocation of primitive gaussian functions in g-space 3736!> \param la_max ... 3737!> \param zeta ... 3738!> \param la_min ... 3739!> \param lb_max ... 3740!> \param zetb ... 3741!> \param lb_min ... 3742!> \param ra ... 3743!> \param rab ... 3744!> \param rab2 ... 3745!> \param scale ... 3746!> \param pab ... 3747!> \param na ... 3748!> \param nb ... 3749!> \param eps_rho_gspace ... 3750!> \param gsq_max ... 3751!> \param pw ... 3752! ************************************************************************************************** 3753 SUBROUTINE collocate_pgf_product_gspace(la_max, zeta, la_min, & 3754 lb_max, zetb, lb_min, & 3755 ra, rab, rab2, scale, pab, na, nb, & 3756 eps_rho_gspace, gsq_max, pw) 3757 3758 ! NOTE: this routine is much slower than collocate_pgf_product_rspace 3759 3760 INTEGER, INTENT(IN) :: la_max 3761 REAL(dp), INTENT(IN) :: zeta 3762 INTEGER, INTENT(IN) :: la_min, lb_max 3763 REAL(dp), INTENT(IN) :: zetb 3764 INTEGER, INTENT(IN) :: lb_min 3765 REAL(dp), DIMENSION(3), INTENT(IN) :: ra, rab 3766 REAL(dp), INTENT(IN) :: rab2, scale 3767 REAL(dp), DIMENSION(:, :), POINTER :: pab 3768 INTEGER, INTENT(IN) :: na, nb 3769 REAL(dp), INTENT(IN) :: eps_rho_gspace, gsq_max 3770 TYPE(pw_type), POINTER :: pw 3771 3772 CHARACTER(LEN=*), PARAMETER :: routineN = 'collocate_pgf_product_gspace', & 3773 routineP = moduleN//':'//routineN 3774 3775 COMPLEX(dp), DIMENSION(3) :: phasefactor 3776 COMPLEX(dp), DIMENSION(:), POINTER :: rag, rbg 3777 COMPLEX(dp), DIMENSION(:, :, :, :), POINTER :: cubeaxis 3778 INTEGER :: ax, ay, az, bx, by, bz, handle, i, ico, & 3779 ig, ig2, jco, jg, kg, la, lb, & 3780 lb_cube_min, lb_grid, ub_cube_max, & 3781 ub_grid 3782 INTEGER, DIMENSION(3) :: lb_cube, ub_cube 3783 REAL(dp) :: f, fa, fb, pij, prefactor, rzetp, & 3784 twozetp, zetp 3785 REAL(dp), DIMENSION(3) :: dg, expfactor, fap, fbp, rap, rbp, rp 3786 REAL(dp), DIMENSION(:), POINTER :: g 3787 3788 CALL timeset(routineN, handle) 3789 3790 dg(:) = twopi/(pw%pw_grid%npts(:)*pw%pw_grid%dr(:)) 3791 3792 zetp = zeta + zetb 3793 rzetp = 1.0_dp/zetp 3794 f = zetb*rzetp 3795 rap(:) = f*rab(:) 3796 rbp(:) = rap(:) - rab(:) 3797 rp(:) = ra(:) + rap(:) 3798 twozetp = 2.0_dp*zetp 3799 fap(:) = twozetp*rap(:) 3800 fbp(:) = twozetp*rbp(:) 3801 3802 prefactor = scale*SQRT((pi*rzetp)**3)*EXP(-zeta*f*rab2) 3803 phasefactor(:) = EXP(CMPLX(0.0_dp, -rp(:)*dg(:), KIND=dp)) 3804 expfactor(:) = EXP(-0.25*rzetp*dg(:)*dg(:)) 3805 3806 lb_cube(:) = pw%pw_grid%bounds(1, :) 3807 ub_cube(:) = pw%pw_grid%bounds(2, :) 3808 3809 lb_cube_min = MINVAL(lb_cube(:)) 3810 ub_cube_max = MAXVAL(ub_cube(:)) 3811 3812 NULLIFY (cubeaxis, g, rag, rbg) 3813 3814 CALL reallocate(cubeaxis, lb_cube_min, ub_cube_max, 1, 3, 0, la_max, 0, lb_max) 3815 CALL reallocate(g, lb_cube_min, ub_cube_max) 3816 CALL reallocate(rag, lb_cube_min, ub_cube_max) 3817 CALL reallocate(rbg, lb_cube_min, ub_cube_max) 3818 3819 lb_grid = LBOUND(pw%cc, 1) 3820 ub_grid = UBOUND(pw%cc, 1) 3821 3822 DO i = 1, 3 3823 3824 DO ig = lb_cube(i), ub_cube(i) 3825 ig2 = ig*ig 3826 cubeaxis(ig, i, 0, 0) = expfactor(i)**ig2*phasefactor(i)**ig 3827 END DO 3828 3829 IF (la_max > 0) THEN 3830 DO ig = lb_cube(i), ub_cube(i) 3831 g(ig) = REAL(ig, dp)*dg(i) 3832 rag(ig) = CMPLX(fap(i), -g(ig), KIND=dp) 3833 cubeaxis(ig, i, 1, 0) = rag(ig)*cubeaxis(ig, i, 0, 0) 3834 END DO 3835 DO la = 2, la_max 3836 fa = REAL(la - 1, dp)*twozetp 3837 DO ig = lb_cube(i), ub_cube(i) 3838 cubeaxis(ig, i, la, 0) = rag(ig)*cubeaxis(ig, i, la - 1, 0) + & 3839 fa*cubeaxis(ig, i, la - 2, 0) 3840 END DO 3841 END DO 3842 IF (lb_max > 0) THEN 3843 fa = twozetp 3844 DO ig = lb_cube(i), ub_cube(i) 3845 rbg(ig) = CMPLX(fbp(i), -g(ig), KIND=dp) 3846 cubeaxis(ig, i, 0, 1) = rbg(ig)*cubeaxis(ig, i, 0, 0) 3847 cubeaxis(ig, i, 1, 1) = rbg(ig)*cubeaxis(ig, i, 1, 0) + & 3848 fa*cubeaxis(ig, i, 0, 0) 3849 END DO 3850 DO lb = 2, lb_max 3851 fb = REAL(lb - 1, dp)*twozetp 3852 DO ig = lb_cube(i), ub_cube(i) 3853 cubeaxis(ig, i, 0, lb) = rbg(ig)*cubeaxis(ig, i, 0, lb - 1) + & 3854 fb*cubeaxis(ig, i, 0, lb - 2) 3855 cubeaxis(ig, i, 1, lb) = rbg(ig)*cubeaxis(ig, i, 1, lb - 1) + & 3856 fb*cubeaxis(ig, i, 1, lb - 2) + & 3857 fa*cubeaxis(ig, i, 0, lb - 1) 3858 END DO 3859 END DO 3860 DO la = 2, la_max 3861 fa = REAL(la, dp)*twozetp 3862 DO ig = lb_cube(i), ub_cube(i) 3863 cubeaxis(ig, i, la, 1) = rbg(ig)*cubeaxis(ig, i, la, 0) + & 3864 fa*cubeaxis(ig, i, la - 1, 0) 3865 END DO 3866 DO lb = 2, lb_max 3867 fb = REAL(lb - 1, dp)*twozetp 3868 DO ig = lb_cube(i), ub_cube(i) 3869 cubeaxis(ig, i, la, lb) = rbg(ig)*cubeaxis(ig, i, la, lb - 1) + & 3870 fb*cubeaxis(ig, i, la, lb - 2) + & 3871 fa*cubeaxis(ig, i, la - 1, lb - 1) 3872 END DO 3873 END DO 3874 END DO 3875 END IF 3876 ELSE 3877 IF (lb_max > 0) THEN 3878 DO ig = lb_cube(i), ub_cube(i) 3879 g(ig) = REAL(ig, dp)*dg(i) 3880 rbg(ig) = CMPLX(fbp(i), -g(ig), KIND=dp) 3881 cubeaxis(ig, i, 0, 1) = rbg(ig)*cubeaxis(ig, i, 0, 0) 3882 END DO 3883 DO lb = 2, lb_max 3884 fb = REAL(lb - 1, dp)*twozetp 3885 DO ig = lb_cube(i), ub_cube(i) 3886 cubeaxis(ig, i, 0, lb) = rbg(ig)*cubeaxis(ig, i, 0, lb - 1) + & 3887 fb*cubeaxis(ig, i, 0, lb - 2) 3888 END DO 3889 END DO 3890 END IF 3891 END IF 3892 3893 END DO 3894 3895 DO la = 0, la_max 3896 DO lb = 0, lb_max 3897 IF (la + lb == 0) CYCLE 3898 fa = (1.0_dp/twozetp)**(la + lb) 3899 DO i = 1, 3 3900 DO ig = lb_cube(i), ub_cube(i) 3901 cubeaxis(ig, i, la, lb) = fa*cubeaxis(ig, i, la, lb) 3902 END DO 3903 END DO 3904 END DO 3905 END DO 3906 3907 ! Add the current primitive Gaussian function product to grid 3908 3909 DO ico = ncoset(la_min - 1) + 1, ncoset(la_max) 3910 3911 ax = indco(1, ico) 3912 ay = indco(2, ico) 3913 az = indco(3, ico) 3914 3915 DO jco = ncoset(lb_min - 1) + 1, ncoset(lb_max) 3916 3917 pij = prefactor*pab(na + ico, nb + jco) 3918 3919 IF (ABS(pij) < eps_rho_gspace) CYCLE 3920 3921 bx = indco(1, jco) 3922 by = indco(2, jco) 3923 bz = indco(3, jco) 3924 3925 DO i = lb_grid, ub_grid 3926 IF (pw%pw_grid%gsq(i) > gsq_max) CYCLE 3927 ig = pw%pw_grid%g_hat(1, i) 3928 jg = pw%pw_grid%g_hat(2, i) 3929 kg = pw%pw_grid%g_hat(3, i) 3930 pw%cc(i) = pw%cc(i) + pij*cubeaxis(ig, 1, ax, bx)* & 3931 cubeaxis(jg, 2, ay, by)* & 3932 cubeaxis(kg, 3, az, bz) 3933 END DO 3934 3935 END DO 3936 3937 END DO 3938 3939 DEALLOCATE (cubeaxis) 3940 DEALLOCATE (g) 3941 DEALLOCATE (rag) 3942 DEALLOCATE (rbg) 3943 3944 CALL timestop(handle) 3945 3946 END SUBROUTINE collocate_pgf_product_gspace 3947 3948END MODULE qs_collocate_density 3949