1!! Copyright (C) 2009 X. Andrade 2!! 3!! This program is free software; you can redistribute it and/or modify 4!! it under the terms of the GNU General Public License as published by 5!! the Free Software Foundation; either version 2, or (at your option) 6!! any later version. 7!! 8!! This program is distributed in the hope that it will be useful, 9!! but WITHOUT ANY WARRANTY; without even the implied warranty of 10!! MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 11!! GNU General Public License for more details. 12!! 13!! You should have received a copy of the GNU General Public License 14!! along with this program; if not, write to the Free Software 15!! Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 16!! 02110-1301, USA. 17!! 18 19#include "global.h" 20 21module hamiltonian_elec_base_oct_m 22 use accel_oct_m 23 use batch_oct_m 24 use batch_ops_oct_m 25 use boundaries_oct_m 26 use blas_oct_m 27 use comm_oct_m 28 use derivatives_oct_m 29 use epot_oct_m 30 use geometry_oct_m 31 use global_oct_m 32 use hardware_oct_m 33 use hgh_projector_oct_m 34 use kb_projector_oct_m 35 use math_oct_m 36 use mesh_oct_m 37 use messages_oct_m 38 use mpi_oct_m 39 use nl_operator_oct_m 40 use profiling_oct_m 41 use projector_oct_m 42 use projector_matrix_oct_m 43 use ps_oct_m 44 use simul_box_oct_m 45 use states_elec_oct_m 46 use states_elec_dim_oct_m 47 use submesh_oct_m 48 use types_oct_m 49 use wfs_elec_oct_m 50 51 implicit none 52 53 private 54 55 public :: & 56 hamiltonian_elec_base_t, & 57 dhamiltonian_elec_base_local, & 58 zhamiltonian_elec_base_local, & 59 dhamiltonian_elec_base_local_sub, & 60 zhamiltonian_elec_base_local_sub, & 61 dhamiltonian_elec_base_magnetic, & 62 zhamiltonian_elec_base_magnetic, & 63 dhamiltonian_elec_base_rashba, & 64 zhamiltonian_elec_base_rashba, & 65 dhamiltonian_elec_base_nlocal_start, & 66 zhamiltonian_elec_base_nlocal_start, & 67 dhamiltonian_elec_base_nlocal_finish, & 68 zhamiltonian_elec_base_nlocal_finish, & 69 dhamiltonian_elec_base_nlocal_position_commutator, & 70 zhamiltonian_elec_base_nlocal_position_commutator, & 71 hamiltonian_elec_base_has_magnetic, & 72 hamiltonian_elec_base_init, & 73 hamiltonian_elec_base_end, & 74 hamiltonian_elec_base_allocate, & 75 hamiltonian_elec_base_clear, & 76 hamiltonian_elec_base_build_proj, & 77 hamiltonian_elec_base_update, & 78 hamiltonian_elec_base_accel_copy_pot, & 79 dhamiltonian_elec_base_phase, & 80 zhamiltonian_elec_base_phase, & 81 dhamiltonian_elec_base_phase_spiral, & 82 zhamiltonian_elec_base_phase_spiral, & 83 dhamiltonian_elec_base_nlocal_force, & 84 zhamiltonian_elec_base_nlocal_force, & 85 projection_t, & 86 hamiltonian_elec_base_projector_self_overlap, & 87 hamiltonian_elec_base_set_phase_corr, & 88 hamiltonian_elec_base_unset_phase_corr 89 90 !> This object stores and applies an electromagnetic potential that 91 !! can be represented by different types of potentials. 92 93 type hamiltonian_elec_base_t 94 private 95 integer :: nspin 96 FLOAT :: mass !< Needed to compute the magnetic terms, if the mass is not one. 97 FLOAT :: rashba_coupling 98 type(nl_operator_t), pointer, public :: kinetic 99 type(projector_matrix_t), allocatable, public :: projector_matrices(:) 100 FLOAT, allocatable, public :: potential(:, :) 101 FLOAT, allocatable, public :: Impotential(:, :) 102 FLOAT, allocatable, public :: uniform_magnetic_field(:) 103 FLOAT, allocatable, public :: uniform_vector_potential(:) 104 FLOAT, allocatable, public :: vector_potential(:, :) 105 integer, public :: nprojector_matrices 106 logical, public :: apply_projector_matrices 107 logical, public :: has_non_local_potential 108 integer :: full_projection_size 109 integer, public :: max_npoints 110 integer, public :: total_points 111 integer :: max_nprojs 112 logical :: projector_mix 113 CMPLX, allocatable, public :: projector_phases(:, :, :, :) 114 integer, allocatable, public :: projector_to_atom(:) 115 integer :: nregions 116 integer, public :: nphase 117 integer, allocatable :: regions(:) 118 type(accel_mem_t) :: potential_opencl 119 type(accel_mem_t) :: buff_offsets 120 type(accel_mem_t) :: buff_matrices 121 type(accel_mem_t) :: buff_maps 122 type(accel_mem_t) :: buff_scals 123 type(accel_mem_t) :: buff_position 124 type(accel_mem_t) :: buff_pos 125 type(accel_mem_t) :: buff_invmap 126 type(accel_mem_t), public :: buff_projector_phases 127 type(accel_mem_t) :: buff_mix 128 CMPLX, pointer, public :: phase(:, :) 129 CMPLX, allocatable, public :: phase_corr(:,:) 130 CMPLX, allocatable, public :: phase_spiral(:,:) 131 type(accel_mem_t), public :: buff_phase 132 type(accel_mem_t), public :: buff_phase_spiral 133 integer, public :: buff_phase_qn_start 134 logical :: projector_self_overlap !< if .true. some projectors overlap with themselves 135 FLOAT, pointer, public :: spin(:,:,:) 136 end type hamiltonian_elec_base_t 137 138 type projection_t 139 private 140 FLOAT, allocatable :: dprojection(:, :) 141 CMPLX, allocatable :: zprojection(:, :) 142 type(accel_mem_t) :: buff_projection 143 type(accel_mem_t) :: buff_spin_to_phase 144 end type projection_t 145 146 integer, parameter, public :: & 147 TERM_ALL = HUGE(1), & 148 TERM_KINETIC = 1, & 149 TERM_LOCAL_POTENTIAL = 2, & 150 TERM_NON_LOCAL_POTENTIAL = 4, & 151 TERM_OTHERS = 8, & 152 TERM_LOCAL_EXTERNAL = 16, & 153 TERM_MGGA = 32, & 154 TERM_DFT_U = 64, & 155 TERM_RDMFT_OCC = 128 156 157 integer, parameter, public :: & 158 FIELD_POTENTIAL = 1, & 159 FIELD_VECTOR_POTENTIAL = 2, & 160 FIELD_UNIFORM_VECTOR_POTENTIAL = 4, & 161 FIELD_UNIFORM_MAGNETIC_FIELD = 8 162 163 type(profile_t), save :: prof_vnlpsi_start, prof_vnlpsi_finish, prof_magnetic, prof_vlpsi, prof_scatter, & 164 prof_matelement, prof_matelement_gather, prof_matelement_reduce 165 166contains 167 168 ! --------------------------------------------------------- 169 subroutine hamiltonian_elec_base_init(this, nspin, mass, rashba_coupling) 170 type(hamiltonian_elec_base_t), intent(inout) :: this 171 integer, intent(in) :: nspin 172 FLOAT, intent(in) :: mass 173 FLOAT, intent(in) :: rashba_coupling 174 175 PUSH_SUB(hamiltonian_elec_base_init) 176 177 this%nspin = nspin 178 this%mass = mass 179 this%rashba_coupling = rashba_coupling 180 181 this%apply_projector_matrices = .false. 182 this%has_non_local_potential = .false. 183 this%nprojector_matrices = 0 184 185 nullify(this%spin) 186 this%projector_self_overlap = .false. 187 188 POP_SUB(hamiltonian_elec_base_init) 189 end subroutine hamiltonian_elec_base_init 190 191 ! --------------------------------------------------------- 192 subroutine hamiltonian_elec_base_end(this) 193 type(hamiltonian_elec_base_t), intent(inout) :: this 194 195 PUSH_SUB(hamiltonian_elec_base_end) 196 197 if(allocated(this%potential) .and. accel_is_enabled()) then 198 call accel_release_buffer(this%potential_opencl) 199 end if 200 201 SAFE_DEALLOCATE_A(this%potential) 202 SAFE_DEALLOCATE_A(this%Impotential) 203 SAFE_DEALLOCATE_A(this%vector_potential) 204 SAFE_DEALLOCATE_A(this%uniform_vector_potential) 205 SAFE_DEALLOCATE_A(this%uniform_magnetic_field) 206 call hamiltonian_elec_base_destroy_proj(this) 207 208 nullify(this%spin) 209 210 POP_SUB(hamiltonian_elec_base_end) 211 end subroutine hamiltonian_elec_base_end 212 213 ! ---------------------------------------------------------- 214 ! 215 !> This functions sets to zero all fields that are currently 216 !! allocated. 217 ! 218 subroutine hamiltonian_elec_base_clear(this) 219 type(hamiltonian_elec_base_t), intent(inout) :: this 220 221 PUSH_SUB(hamiltonian_elec_clear) 222 223 if(allocated(this%potential)) this%potential = M_ZERO 224 if(allocated(this%Impotential)) this%Impotential = M_ZERO 225 if(allocated(this%uniform_vector_potential)) this%uniform_vector_potential = M_ZERO 226 if(allocated(this%vector_potential)) this%vector_potential = M_ZERO 227 if(allocated(this%uniform_magnetic_field)) this%uniform_magnetic_field = M_ZERO 228 229 POP_SUB(hamiltonian_elec_clear) 230 end subroutine hamiltonian_elec_base_clear 231 232 233 ! --------------------------------------------------------------- 234 !> This function ensures that the corresponding field is allocated. 235 subroutine hamiltonian_elec_base_allocate(this, mesh, field, complex_potential) 236 type(hamiltonian_elec_base_t), intent(inout) :: this 237 type(mesh_t), intent(in) :: mesh 238 integer, intent(in) :: field 239 logical, intent(in) :: complex_potential 240 241 PUSH_SUB(hamiltonian_elec_base_allocate) 242 243 if(bitand(FIELD_POTENTIAL, field) /= 0) then 244 if(.not. allocated(this%potential)) then 245 SAFE_ALLOCATE(this%potential(1:mesh%np, 1:this%nspin)) 246 this%potential = M_ZERO 247 if(complex_potential) then 248 SAFE_ALLOCATE(this%Impotential(1:mesh%np, 1:this%nspin)) 249 this%Impotential = M_ZERO 250 end if 251 if(accel_is_enabled()) then 252 call accel_create_buffer(this%potential_opencl, ACCEL_MEM_READ_ONLY, TYPE_FLOAT, accel_padded_size(mesh%np)*this%nspin) 253 end if 254 end if 255 end if 256 257 if(bitand(FIELD_UNIFORM_VECTOR_POTENTIAL, field) /= 0) then 258 if(.not. allocated(this%uniform_vector_potential)) then 259 SAFE_ALLOCATE(this%uniform_vector_potential(1:mesh%sb%dim)) 260 this%uniform_vector_potential = M_ZERO 261 end if 262 end if 263 264 if(bitand(FIELD_VECTOR_POTENTIAL, field) /= 0) then 265 if(.not. allocated(this%vector_potential)) then 266 SAFE_ALLOCATE(this%vector_potential(1:mesh%sb%dim, 1:mesh%np)) 267 this%vector_potential = M_ZERO 268 end if 269 end if 270 271 if(bitand(FIELD_UNIFORM_MAGNETIC_FIELD, field) /= 0) then 272 if(.not. allocated(this%uniform_magnetic_field)) then 273 SAFE_ALLOCATE(this%uniform_magnetic_field(1:max(mesh%sb%dim, 3))) 274 this%uniform_magnetic_field = M_ZERO 275 end if 276 end if 277 278 POP_SUB(hamiltonian_elec_base_allocate) 279 end subroutine hamiltonian_elec_base_allocate 280 281 ! ---------------------------------------------------------- 282 ! 283 !> If both a uniform and non-uniform vector potentials are allocated, 284 !! this function copies the uniform in the non-uniform one. In the 285 !! future it may perform other internal consistency operations. 286 ! 287 subroutine hamiltonian_elec_base_update(this, mesh) 288 type(hamiltonian_elec_base_t), intent(inout) :: this 289 type(mesh_t), intent(in) :: mesh 290 291 integer :: idir, ip 292 293 PUSH_SUB(hamiltonian_elec_base_update) 294 295 if(allocated(this%uniform_vector_potential) .and. allocated(this%vector_potential)) then 296 ! copy the uniform vector potential onto the non-uniform one 297 do idir = 1, mesh%sb%dim 298 !$omp parallel do schedule(static) 299 do ip = 1, mesh%np 300 this%vector_potential(idir, ip) = & 301 this%vector_potential(idir, ip) + this%uniform_vector_potential(idir) 302 end do 303 end do 304 SAFE_DEALLOCATE_A(this%uniform_vector_potential) 305 end if 306 307 POP_SUB(hamiltonian_elec_base_update) 308 end subroutine hamiltonian_elec_base_update 309 310 311 !-------------------------------------------------------- 312 313 subroutine hamiltonian_elec_base_accel_copy_pot(this, mesh) 314 type(hamiltonian_elec_base_t), intent(inout) :: this 315 type(mesh_t), intent(in) :: mesh 316 317 integer :: offset, ispin 318 319 PUSH_SUB(hamiltonian_elec_base_accel_copy_pot) 320 321 if(allocated(this%potential) .and. accel_is_enabled()) then 322 offset = 0 323 do ispin = 1, this%nspin 324 call accel_write_buffer(this%potential_opencl, mesh%np, this%potential(:, ispin), offset = offset) 325 offset = offset + accel_padded_size(mesh%np) 326 end do 327 end if 328 329 POP_SUB(hamiltonian_elec_base_accel_copy_pot) 330 end subroutine hamiltonian_elec_base_accel_copy_pot 331 332 333 !-------------------------------------------------------- 334 335 subroutine hamiltonian_elec_base_destroy_proj(this) 336 type(hamiltonian_elec_base_t), intent(inout) :: this 337 338 integer :: iproj 339 340 PUSH_SUB(hamiltonian_elec_base_destroy_proj) 341 342 if(allocated(this%projector_matrices)) then 343 344 if(accel_is_enabled()) then 345 call accel_release_buffer(this%buff_offsets) 346 call accel_release_buffer(this%buff_matrices) 347 call accel_release_buffer(this%buff_maps) 348 call accel_release_buffer(this%buff_scals) 349 call accel_release_buffer(this%buff_position) 350 call accel_release_buffer(this%buff_pos) 351 call accel_release_buffer(this%buff_invmap) 352 if(this%projector_mix) call accel_release_buffer(this%buff_mix) 353 if(allocated(this%projector_phases)) call accel_release_buffer(this%buff_projector_phases) 354 end if 355 356 do iproj = 1, this%nprojector_matrices 357 call projector_matrix_deallocate(this%projector_matrices(iproj)) 358 end do 359 SAFE_DEALLOCATE_A(this%regions) 360 SAFE_DEALLOCATE_A(this%projector_matrices) 361 SAFE_DEALLOCATE_A(this%projector_phases) 362 SAFE_DEALLOCATE_A(this%projector_to_atom) 363 end if 364 365 POP_SUB(hamiltonian_elec_base_destroy_proj) 366 end subroutine hamiltonian_elec_base_destroy_proj 367 368 !----------------------------------------------------------------- 369 370 subroutine hamiltonian_elec_base_build_proj(this, mesh, epot) 371 type(hamiltonian_elec_base_t), target, intent(inout) :: this 372 type(mesh_t), intent(in) :: mesh 373 type(epot_t), target, intent(in) :: epot 374 375 integer :: iatom, iproj, ll, lmax, lloc, mm, ic, jc 376 integer :: nmat, imat, ip, iorder 377 integer :: nregion, jatom, katom, iregion 378 integer, allocatable :: order(:), head(:), region_count(:) 379 logical, allocatable :: atom_counted(:) 380 logical :: overlap 381 type(projector_matrix_t), pointer :: pmat 382 type(kb_projector_t), pointer :: kb_p 383 type(hgh_projector_t), pointer :: hgh_p 384 type(profile_t), save :: color_prof 385 386 PUSH_SUB(hamiltonian_elec_base_build_proj) 387 388 call profiling_in(color_prof, "ATOM_COLORING") 389 390 ! this is most likely a very inefficient algorithm, O(natom**2) or 391 ! O(natom**3), probably it should be replaced by something better. 392 393 SAFE_ALLOCATE(order(1:epot%natoms)) 394 SAFE_ALLOCATE(head(1:epot%natoms + 1)) 395 SAFE_ALLOCATE(region_count(1:epot%natoms)) 396 SAFE_ALLOCATE(atom_counted(1:epot%natoms)) 397 398 this%projector_self_overlap = .false. 399 atom_counted = .false. 400 order = -1 401 402 head(1) = 1 403 nregion = 0 404 do 405 nregion = nregion + 1 406 ASSERT(nregion <= epot%natoms) 407 408 region_count(nregion) = 0 409 410 do iatom = 1, epot%natoms 411 if(atom_counted(iatom)) cycle 412 413 overlap = .false. 414 415 if(.not. projector_is(epot%proj(iatom), PROJ_NONE)) then 416 ASSERT(associated(epot%proj(iatom)%sphere%mesh)) 417 do jatom = 1, region_count(nregion) 418 katom = order(head(nregion) + jatom - 1) 419 if(projector_is(epot%proj(katom), PROJ_NONE)) cycle 420 overlap = submesh_overlap(epot%proj(iatom)%sphere, epot%proj(katom)%sphere) 421 if(overlap) exit 422 end do 423 end if 424 425 if(.not. overlap) then 426 INCR(region_count(nregion), 1) 427 order(head(nregion) - 1 + region_count(nregion)) = iatom 428 atom_counted(iatom) = .true. 429 end if 430 431 end do 432 433 head(nregion + 1) = head(nregion) + region_count(nregion) 434 435 if(all(atom_counted)) exit 436 end do 437 438 SAFE_DEALLOCATE_A(atom_counted) 439 SAFE_DEALLOCATE_A(region_count) 440 441 if(debug%info) then 442 call messages_write('The atoms can be separated in ') 443 call messages_write(nregion) 444 call messages_write(' non-overlapping groups.') 445 call messages_info() 446 end if 447 448 do iregion = 1, nregion 449 do iatom = head(iregion), head(iregion + 1) - 1 450 if(.not. projector_is(epot%proj(order(iatom)), PROJ_KB)) cycle 451 do jatom = head(iregion), iatom - 1 452 if(.not. projector_is(epot%proj(order(jatom)), PROJ_KB)) cycle 453 ASSERT(.not. submesh_overlap(epot%proj(order(iatom))%sphere, epot%proj(order(jatom))%sphere)) 454 end do 455 end do 456 end do 457 458 call profiling_out(color_prof) 459 460 ! deallocate previous projectors 461 call hamiltonian_elec_base_destroy_proj(this) 462 463 ! count projectors 464 this%nprojector_matrices = 0 465 this%apply_projector_matrices = .false. 466 this%has_non_local_potential = .false. 467 this%nregions = nregion 468 469 !We determine if we have only local potential or not. 470 do iorder = 1, epot%natoms 471 iatom = order(iorder) 472 473 if(.not. projector_is_null(epot%proj(iatom))) then 474 this%has_non_local_potential = .true. 475 exit 476 end if 477 end do 478 479 do iorder = 1, epot%natoms 480 iatom = order(iorder) 481 482 if(projector_is(epot%proj(iatom), PROJ_KB) .or. projector_is(epot%proj(iatom), PROJ_HGH)) then 483 INCR(this%nprojector_matrices, 1) 484 this%apply_projector_matrices = .true. 485 !The HGH pseudopotentials are now supporting the SOC 486 if(epot%reltype /= NOREL .and. & 487 (.not. projector_is(epot%proj(iatom), PROJ_HGH) .or. accel_is_enabled())) then 488 this%apply_projector_matrices = .false. 489 exit 490 end if 491 else if(.not. projector_is_null(epot%proj(iatom))) then 492 this%apply_projector_matrices = .false. 493 exit 494 end if 495 end do 496 497 if(mesh%use_curvilinear) this%apply_projector_matrices = .false. 498 499 if(.not. this%apply_projector_matrices) then 500 SAFE_DEALLOCATE_A(order) 501 SAFE_DEALLOCATE_A(head) 502 503 POP_SUB(hamiltonian_elec_base_build_proj) 504 return 505 end if 506 507 508 SAFE_ALLOCATE(this%projector_matrices(1:this%nprojector_matrices)) 509 SAFE_ALLOCATE(this%regions(1:this%nprojector_matrices + 1)) 510 SAFE_ALLOCATE(this%projector_to_atom(1:epot%natoms)) 511 512 this%full_projection_size = 0 513 this%regions(this%nregions + 1) = this%nprojector_matrices + 1 514 515 this%projector_mix = .false. 516 517 iproj = 0 518 do iregion = 1, this%nregions 519 this%regions(iregion) = iproj + 1 520 do iorder = head(iregion), head(iregion + 1) - 1 521 522 iatom = order(iorder) 523 524 if(projector_is(epot%proj(iatom), PROJ_NONE)) cycle 525 526 INCR(iproj, 1) 527 528 pmat => this%projector_matrices(iproj) 529 530 this%projector_to_atom(iproj) = iatom 531 532 lmax = epot%proj(iatom)%lmax 533 lloc = epot%proj(iatom)%lloc 534 535 if(projector_is(epot%proj(iatom), PROJ_KB)) then 536 537 ! count the number of projectors for this matrix 538 nmat = 0 539 do ll = 0, lmax 540 if (ll == lloc) cycle 541 do mm = -ll, ll 542 INCR(nmat, epot%proj(iatom)%kb_p(ll, mm)%n_c) 543 end do 544 end do 545 546 call projector_matrix_allocate(pmat, epot%proj(iatom)%sphere%np, nmat, has_mix_matrix = .false.) 547 548 ! generate the matrix 549 pmat%dprojectors = M_ZERO 550 imat = 1 551 do ll = 0, lmax 552 if (ll == lloc) cycle 553 do mm = -ll, ll 554 kb_p => epot%proj(iatom)%kb_p(ll, mm) 555 do ic = 1, kb_p%n_c 556 do ip = 1, pmat%npoints 557 pmat%dprojectors(ip, imat) = kb_p%p(ip, ic) 558 end do 559 pmat%scal(imat) = kb_p%e(ic)*mesh%vol_pp(1) 560 INCR(imat, 1) 561 end do 562 end do 563 end do 564 565 this%projector_self_overlap = this%projector_self_overlap .or. epot%proj(iatom)%sphere%overlap 566 567 else if(projector_is(epot%proj(iatom), PROJ_HGH)) then 568 569 this%projector_mix = .true. 570 571 ! count the number of projectors for this matrix 572 nmat = 0 573 do ll = 0, lmax 574 if (ll == lloc) cycle 575 do mm = -ll, ll 576 nmat = nmat + 3 577 end do 578 end do 579 580 call projector_matrix_allocate(pmat, epot%proj(iatom)%sphere%np, nmat, has_mix_matrix = .true., & 581 is_cmplx = (epot%reltype == SPIN_ORBIT) ) 582 583 ! generate the matrix 584 if(epot%reltype == SPIN_ORBIT) then 585 pmat%zprojectors = M_ZERO 586 pmat%zmix = M_ZERO 587 else 588 pmat%dprojectors = M_ZERO 589 pmat%dmix = M_ZERO 590 end if 591 592 imat = 1 593 do ll = 0, lmax 594 if (ll == lloc) cycle 595 do mm = -ll, ll 596 hgh_p => epot%proj(iatom)%hgh_p(ll, mm) 597 598 ! HGH pseudos mix different components, so we need to 599 ! generate a matrix that mixes the projections 600 if(epot%reltype == SPIN_ORBIT) then 601 do ic = 1, 3 602 do jc = 1, 3 603 pmat%zmix(imat - 1 + ic, imat - 1 + jc, 1) = hgh_p%h(ic, jc) + M_HALF*mm*hgh_p%k(ic, jc) 604 pmat%zmix(imat - 1 + ic, imat - 1 + jc, 2) = hgh_p%h(ic, jc) - M_HALF*mm*hgh_p%k(ic, jc) 605 606 if(mm < ll) then 607 pmat%zmix(imat - 1 + ic, imat + 3 - 1 + jc, 3) = M_HALF*hgh_p%k(ic, jc)*sqrt(TOFLOAT(ll*(ll+1)-mm*(mm+1))) 608 end if 609 610 if(-mm < ll) then 611 pmat%zmix(imat - 1 + ic, imat - 3 - 1 + jc, 4) = M_HALF*hgh_p%k(ic, jc)*sqrt(TOFLOAT(ll*(ll+1)-mm*(mm-1))) 612 end if 613 end do 614 end do 615 else 616 do ic = 1, 3 617 do jc = 1, 3 618 pmat%dmix(imat - 1 + ic, imat - 1 + jc) = hgh_p%h(ic, jc) 619 end do 620 end do 621 end if 622 623 do ic = 1, 3 624 if(epot%reltype == SPIN_ORBIT) then 625 do ip = 1, pmat%npoints 626 pmat%zprojectors(ip, imat) = hgh_p%zp(ip, ic) 627 end do 628 else 629 do ip = 1, pmat%npoints 630 pmat%dprojectors(ip, imat) = hgh_p%dp(ip, ic) 631 end do 632 end if 633 pmat%scal(imat) = mesh%volume_element 634 INCR(imat, 1) 635 end do 636 637 end do 638 end do 639 640 this%projector_self_overlap = this%projector_self_overlap .or. epot%proj(iatom)%sphere%overlap 641 642 else 643 cycle 644 end if 645 646 do ip = 1, pmat%npoints 647 pmat%map(ip) = epot%proj(iatom)%sphere%map(ip) 648 pmat%position(1:3, ip) = epot%proj(iatom)%sphere%x(ip, 1:3) 649 end do 650 651 INCR(this%full_projection_size, pmat%nprojs) 652 653 end do 654 end do 655 656#ifdef HAVE_MPI 657 if(mesh%parallel_in_domains) then 658 call MPI_Allreduce(MPI_IN_PLACE, this%projector_self_overlap, 1, MPI_LOGICAL, MPI_LOR, mesh%mpi_grp%comm, mpi_err) 659 end if 660#endif 661 662 SAFE_DEALLOCATE_A(order) 663 SAFE_DEALLOCATE_A(head) 664 665! do iregion = 1, this%nregions 666! print*, iregion, this%regions(iregion), this%regions(iregion + 1) - 1 667! end do 668 669 this%total_points = 0 670 this%max_npoints = 0 671 this%max_nprojs = 0 672 do imat = 1, this%nprojector_matrices 673 pmat => this%projector_matrices(imat) 674 675 this%max_npoints = max(this%max_npoints, pmat%npoints) 676 this%max_nprojs = max(this%max_nprojs, pmat%nprojs) 677 INCR(this%total_points, pmat%npoints) 678 end do 679 680 if(accel_is_enabled()) call build_opencl() 681 682 POP_SUB(hamiltonian_elec_base_build_proj) 683 684 contains 685 686 subroutine build_opencl() 687 integer :: matrix_size, scal_size 688 integer, allocatable :: cnt(:), invmap(:, :), invmap2(:), pos(:) 689 integer, allocatable :: offsets(:, :) 690 integer, parameter :: OFFSET_SIZE = 6 ! also defined in share/opencl/projectors.cl 691 integer, parameter :: POINTS = 1, PROJS = 2, MATRIX = 3, MAP = 4, SCAL = 5, MIX = 6 ! update OFFSET_SIZE 692 integer :: ip, is, ii, ipos, mix_offset 693 694 PUSH_SUB(hamiltonian_elec_base_build_proj.build_opencl) 695 696 SAFE_ALLOCATE(offsets(1:OFFSET_SIZE, 1:this%nprojector_matrices)) 697 SAFE_ALLOCATE(cnt(1:mesh%np)) 698 699 cnt = 0 700 701 ! Here we construct the offsets for accessing various arrays within the GPU kernels. 702 ! The offset(:,:) array contains a number of sizes and offsets, describing how to address the arrays. 703 ! This allows to transfer all these number to the GPU in one memory transfer. 704 ! 705 ! For each projection matrix (addressed by imap), we have: 706 ! 707 ! offset(POINTS, imap) : number of points of the sphere imap 708 ! offset(PROJS, imap) : number of projectors for imap 709 ! offset(MATRIX, imap) : address offset: cumulative of pmat%npoints * pmat%nprojs 710 ! offset(MAP, imap) : address offset: cumulative of pmat%npoints for each imap 711 ! offset(SCAL, imap) : address_offset: cumulative of pmat%nprojs 712 ! offset(MIX, imap) : address_offset: cumulative of pmat%nprojs**2 713 714 ! first we count 715 matrix_size = 0 716 this%total_points = 0 717 scal_size = 0 718 this%max_npoints = 0 719 this%max_nprojs = 0 720 mix_offset = 0 721 do imat = 1, this%nprojector_matrices 722 pmat => this%projector_matrices(imat) 723 724 this%max_npoints = max(this%max_npoints, pmat%npoints) 725 this%max_nprojs = max(this%max_nprojs, pmat%nprojs) 726 727 offsets(POINTS, imat) = pmat%npoints 728 offsets(PROJS, imat) = pmat%nprojs 729 730 offsets(MATRIX, imat) = matrix_size 731 INCR(matrix_size, pmat%npoints*pmat%nprojs) 732 733 offsets(MAP, imat) = this%total_points 734 INCR(this%total_points, pmat%npoints) 735 736 offsets(SCAL, imat) = scal_size 737 INCR(scal_size, pmat%nprojs) 738 739 if(allocated(pmat%dmix) .or. allocated(pmat%zmix)) then 740 offsets(MIX, imat) = mix_offset 741 INCR(mix_offset, pmat%nprojs**2) 742 else 743 offsets(MIX, imat) = -1 744 end if 745 746 do is = 1, pmat%npoints 747 ip = pmat%map(is) 748 INCR(cnt(ip), 1) 749 end do 750 end do 751 752 SAFE_ALLOCATE(invmap(1:maxval(cnt), 1:mesh%np)) 753 SAFE_ALLOCATE(invmap2(1:maxval(cnt)*mesh%np)) 754 SAFE_ALLOCATE(pos(1:mesh%np + 1)) 755 756 cnt = 0 757 ii = 0 758 do imat = 1, this%nprojector_matrices 759 pmat => this%projector_matrices(imat) 760 do is = 1, pmat%npoints 761 ip = pmat%map(is) 762 INCR(cnt(ip), 1) 763 invmap(cnt(ip), ip) = ii 764 INCR(ii, 1) 765 end do 766 end do 767 768 ipos = 0 769 pos(1) = 0 770 do ip = 1, mesh%np 771 do ii = 1, cnt(ip) 772 INCR(ipos, 1) 773 invmap2(ipos) = invmap(ii, ip) 774 end do 775 pos(ip + 1) = ipos 776 end do 777 778 ! allocate 779 call accel_create_buffer(this%buff_matrices, ACCEL_MEM_READ_ONLY, TYPE_FLOAT, matrix_size) 780 call accel_create_buffer(this%buff_maps, ACCEL_MEM_READ_ONLY, TYPE_INTEGER, this%total_points) 781 call accel_create_buffer(this%buff_position, ACCEL_MEM_READ_ONLY, TYPE_FLOAT, 3*this%total_points) 782 call accel_create_buffer(this%buff_scals, ACCEL_MEM_READ_ONLY, TYPE_FLOAT, scal_size) 783 if(mix_offset > 0) call accel_create_buffer(this%buff_mix, ACCEL_MEM_READ_ONLY, TYPE_FLOAT, mix_offset) 784 785 ! now copy 786 do imat = 1, this%nprojector_matrices 787 pmat => this%projector_matrices(imat) 788 ! in parallel some spheres might not have points 789 if(pmat%npoints > 0) then 790 call accel_write_buffer(this%buff_matrices, pmat%nprojs*pmat%npoints, pmat%dprojectors, offset = offsets(MATRIX, imat)) 791 call accel_write_buffer(this%buff_maps, pmat%npoints, pmat%map, offset = offsets(MAP, imat)) 792 call accel_write_buffer(this%buff_position, 3*pmat%npoints, pmat%position, offset = 3*offsets(MAP, imat)) 793 end if 794 call accel_write_buffer(this%buff_scals, pmat%nprojs, pmat%scal, offset = offsets(SCAL, imat)) 795 if(offsets(MIX, imat) /= -1) call accel_write_buffer(this%buff_mix, pmat%nprojs**2, pmat%dmix, offset = offsets(MIX, imat)) 796 end do 797 798 ! write the offsets 799 call accel_create_buffer(this%buff_offsets, ACCEL_MEM_READ_ONLY, TYPE_INTEGER, OFFSET_SIZE*this%nprojector_matrices) 800 call accel_write_buffer(this%buff_offsets, OFFSET_SIZE*this%nprojector_matrices, offsets) 801 802 ! the inverse map 803 call accel_create_buffer(this%buff_pos, ACCEL_MEM_READ_ONLY, TYPE_INTEGER, mesh%np + 1) 804 call accel_write_buffer(this%buff_pos, mesh%np + 1, pos) 805 806 call accel_create_buffer(this%buff_invmap, ACCEL_MEM_READ_ONLY, TYPE_INTEGER, ipos) 807 call accel_write_buffer(this%buff_invmap, ipos, invmap2) 808 809 SAFE_DEALLOCATE_A(offsets) 810 SAFE_DEALLOCATE_A(cnt) 811 SAFE_DEALLOCATE_A(invmap) 812 SAFE_DEALLOCATE_A(invmap2) 813 SAFE_DEALLOCATE_A(pos) 814 815 POP_SUB(hamiltonian_elec_base_build_proj.build_opencl) 816 end subroutine build_opencl 817 818 end subroutine hamiltonian_elec_base_build_proj 819 820 ! ---------------------------------------------------------------------------------- 821 822 logical pure function hamiltonian_elec_base_has_magnetic(this) result(has_magnetic) 823 type(hamiltonian_elec_base_t), intent(in) :: this 824 825 has_magnetic = allocated(this%vector_potential) & 826 .or. allocated(this%uniform_magnetic_field) 827 828 end function hamiltonian_elec_base_has_magnetic 829 830 ! ---------------------------------------------------------------------------------- 831 832 logical pure function hamiltonian_elec_base_projector_self_overlap(this) result(projector_self_overlap) 833 type(hamiltonian_elec_base_t), intent(in) :: this 834 835 projector_self_overlap = this%projector_self_overlap 836 end function hamiltonian_elec_base_projector_self_overlap 837 838 ! ---------------------------------------------------------------------------------- 839 840 subroutine hamiltonian_elec_base_set_phase_corr(hm_base, mesh, psib) 841 type(hamiltonian_elec_base_t), intent(in) :: hm_base 842 type(mesh_t), intent(in) :: mesh 843 type(wfs_elec_t), intent(inout) :: psib 844 845 logical :: phase_correction 846 847 PUSH_SUB(hamiltonian_elec_base_set_phase_corr) 848 849 ! check if we only want a phase correction for the boundary points 850 phase_correction = .false. 851 if(associated(hm_base%phase)) phase_correction = .true. 852 853 !We apply the phase only to np points, and the phase for the np+1 to np_part points 854 !will be treated as a phase correction in the Hamiltonian 855 if(phase_correction) then 856 call zhamiltonian_elec_base_phase(hm_base, mesh, mesh%np, .false., psib) 857 end if 858 859 POP_SUB(hamiltonian_elec_base_set_phase_corr) 860 861 end subroutine hamiltonian_elec_base_set_phase_corr 862 863 ! ---------------------------------------------------------------------------------- 864 865 subroutine hamiltonian_elec_base_unset_phase_corr(hm_base, mesh, psib) 866 type(hamiltonian_elec_base_t), intent(in) :: hm_base 867 type(mesh_t), intent(in) :: mesh 868 type(wfs_elec_t), intent(inout) :: psib 869 870 logical :: phase_correction 871 872 PUSH_SUB(hamiltonian_elec_base_unset_phase_corr) 873 874 ! check if we only want a phase correction for the boundary points 875 phase_correction = .false. 876 if(associated(hm_base%phase)) phase_correction = .true. 877 878 !We apply the phase only to np points, and the phase for the np+1 to np_part points 879 !will be treated as a phase correction in the Hamiltonian 880 if(phase_correction) then 881 call zhamiltonian_elec_base_phase(hm_base, mesh, mesh%np, .true., psib) 882 end if 883 884 POP_SUB(hamiltonian_elec_base_unset_phase_corr) 885 886 end subroutine hamiltonian_elec_base_unset_phase_corr 887 888 889#include "undef.F90" 890#include "real.F90" 891#include "hamiltonian_elec_base_inc.F90" 892 893#include "undef.F90" 894#include "complex.F90" 895#include "hamiltonian_elec_base_inc.F90" 896 897end module hamiltonian_elec_base_oct_m 898 899!! Local Variables: 900!! mode: f90 901!! coding: utf-8 902!! End: 903