1!! Copyright (C) 2009 X. Andrade
2!!
3!! This program is free software; you can redistribute it and/or modify
4!! it under the terms of the GNU General Public License as published by
5!! the Free Software Foundation; either version 2, or (at your option)
6!! any later version.
7!!
8!! This program is distributed in the hope that it will be useful,
9!! but WITHOUT ANY WARRANTY; without even the implied warranty of
10!! MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
11!! GNU General Public License for more details.
12!!
13!! You should have received a copy of the GNU General Public License
14!! along with this program; if not, write to the Free Software
15!! Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
16!! 02110-1301, USA.
17!!
18
19#include "global.h"
20
21module hamiltonian_elec_base_oct_m
22  use accel_oct_m
23  use batch_oct_m
24  use batch_ops_oct_m
25  use boundaries_oct_m
26  use blas_oct_m
27  use comm_oct_m
28  use derivatives_oct_m
29  use epot_oct_m
30  use geometry_oct_m
31  use global_oct_m
32  use hardware_oct_m
33  use hgh_projector_oct_m
34  use kb_projector_oct_m
35  use math_oct_m
36  use mesh_oct_m
37  use messages_oct_m
38  use mpi_oct_m
39  use nl_operator_oct_m
40  use profiling_oct_m
41  use projector_oct_m
42  use projector_matrix_oct_m
43  use ps_oct_m
44  use simul_box_oct_m
45  use states_elec_oct_m
46  use states_elec_dim_oct_m
47  use submesh_oct_m
48  use types_oct_m
49  use wfs_elec_oct_m
50
51  implicit none
52
53  private
54
55  public ::                                         &
56    hamiltonian_elec_base_t,                        &
57    dhamiltonian_elec_base_local,                   &
58    zhamiltonian_elec_base_local,                   &
59    dhamiltonian_elec_base_local_sub,               &
60    zhamiltonian_elec_base_local_sub,               &
61    dhamiltonian_elec_base_magnetic,                &
62    zhamiltonian_elec_base_magnetic,                &
63    dhamiltonian_elec_base_rashba,                  &
64    zhamiltonian_elec_base_rashba,                  &
65    dhamiltonian_elec_base_nlocal_start,            &
66    zhamiltonian_elec_base_nlocal_start,            &
67    dhamiltonian_elec_base_nlocal_finish,           &
68    zhamiltonian_elec_base_nlocal_finish,           &
69    dhamiltonian_elec_base_nlocal_position_commutator, &
70    zhamiltonian_elec_base_nlocal_position_commutator, &
71    hamiltonian_elec_base_has_magnetic,             &
72    hamiltonian_elec_base_init,                     &
73    hamiltonian_elec_base_end,                      &
74    hamiltonian_elec_base_allocate,                 &
75    hamiltonian_elec_base_clear,                    &
76    hamiltonian_elec_base_build_proj,               &
77    hamiltonian_elec_base_update,                   &
78    hamiltonian_elec_base_accel_copy_pot,           &
79    dhamiltonian_elec_base_phase,                   &
80    zhamiltonian_elec_base_phase,                   &
81    dhamiltonian_elec_base_phase_spiral,            &
82    zhamiltonian_elec_base_phase_spiral,            &
83    dhamiltonian_elec_base_nlocal_force,            &
84    zhamiltonian_elec_base_nlocal_force,            &
85    projection_t,                                   &
86    hamiltonian_elec_base_projector_self_overlap,   &
87    hamiltonian_elec_base_set_phase_corr,           &
88    hamiltonian_elec_base_unset_phase_corr
89
90  !> This object stores and applies an electromagnetic potential that
91  !! can be represented by different types of potentials.
92
93  type hamiltonian_elec_base_t
94    private
95    integer                                       :: nspin
96    FLOAT                                         :: mass  !< Needed to compute the magnetic terms, if the mass is not one.
97    FLOAT                                         :: rashba_coupling
98    type(nl_operator_t),      pointer,     public :: kinetic
99    type(projector_matrix_t), allocatable, public :: projector_matrices(:)
100    FLOAT,                    allocatable, public :: potential(:, :)
101    FLOAT,                    allocatable, public :: Impotential(:, :)
102    FLOAT,                    allocatable, public :: uniform_magnetic_field(:)
103    FLOAT,                    allocatable, public :: uniform_vector_potential(:)
104    FLOAT,                    allocatable, public :: vector_potential(:, :)
105    integer,                               public :: nprojector_matrices
106    logical,                               public :: apply_projector_matrices
107    logical,                               public :: has_non_local_potential
108    integer                                       :: full_projection_size
109    integer,                               public :: max_npoints
110    integer,                               public :: total_points
111    integer                                       :: max_nprojs
112    logical                                       :: projector_mix
113    CMPLX,                    allocatable, public :: projector_phases(:, :, :, :)
114    integer,                  allocatable, public :: projector_to_atom(:)
115    integer                                       :: nregions
116    integer,                               public :: nphase
117    integer,                  allocatable         :: regions(:)
118    type(accel_mem_t)                             :: potential_opencl
119    type(accel_mem_t)                             :: buff_offsets
120    type(accel_mem_t)                             :: buff_matrices
121    type(accel_mem_t)                             :: buff_maps
122    type(accel_mem_t)                             :: buff_scals
123    type(accel_mem_t)                             :: buff_position
124    type(accel_mem_t)                             :: buff_pos
125    type(accel_mem_t)                             :: buff_invmap
126    type(accel_mem_t),                     public :: buff_projector_phases
127    type(accel_mem_t)                             :: buff_mix
128    CMPLX,                    pointer,     public :: phase(:, :)
129    CMPLX,                    allocatable, public :: phase_corr(:,:)
130    CMPLX,                    allocatable, public :: phase_spiral(:,:)
131    type(accel_mem_t),                     public :: buff_phase
132    type(accel_mem_t),                     public :: buff_phase_spiral
133    integer,                               public :: buff_phase_qn_start
134    logical                                       :: projector_self_overlap  !< if .true. some projectors overlap with themselves
135    FLOAT,                    pointer,     public :: spin(:,:,:)
136  end type hamiltonian_elec_base_t
137
138  type projection_t
139    private
140    FLOAT, allocatable     :: dprojection(:, :)
141    CMPLX, allocatable     :: zprojection(:, :)
142    type(accel_mem_t)      :: buff_projection
143    type(accel_mem_t)      :: buff_spin_to_phase
144  end type projection_t
145
146  integer, parameter, public ::          &
147    TERM_ALL                 = HUGE(1),  &
148    TERM_KINETIC             =   1,      &
149    TERM_LOCAL_POTENTIAL     =   2,      &
150    TERM_NON_LOCAL_POTENTIAL =   4,      &
151    TERM_OTHERS              =   8,      &
152    TERM_LOCAL_EXTERNAL      =  16,      &
153    TERM_MGGA                =  32,      &
154    TERM_DFT_U               =  64,      &
155    TERM_RDMFT_OCC           = 128
156
157  integer, parameter, public ::            &
158    FIELD_POTENTIAL                = 1,    &
159    FIELD_VECTOR_POTENTIAL         = 2,    &
160    FIELD_UNIFORM_VECTOR_POTENTIAL = 4,    &
161    FIELD_UNIFORM_MAGNETIC_FIELD   = 8
162
163  type(profile_t), save :: prof_vnlpsi_start, prof_vnlpsi_finish, prof_magnetic, prof_vlpsi, prof_scatter, &
164    prof_matelement, prof_matelement_gather, prof_matelement_reduce
165
166contains
167
168  ! ---------------------------------------------------------
169  subroutine hamiltonian_elec_base_init(this, nspin, mass, rashba_coupling)
170    type(hamiltonian_elec_base_t), intent(inout) :: this
171    integer,                  intent(in)    :: nspin
172    FLOAT,                    intent(in)    :: mass
173    FLOAT,                    intent(in)    :: rashba_coupling
174
175    PUSH_SUB(hamiltonian_elec_base_init)
176
177    this%nspin = nspin
178    this%mass  = mass
179    this%rashba_coupling = rashba_coupling
180
181    this%apply_projector_matrices = .false.
182    this%has_non_local_potential = .false.
183    this%nprojector_matrices = 0
184
185    nullify(this%spin)
186    this%projector_self_overlap = .false.
187
188    POP_SUB(hamiltonian_elec_base_init)
189  end subroutine hamiltonian_elec_base_init
190
191  ! ---------------------------------------------------------
192  subroutine hamiltonian_elec_base_end(this)
193    type(hamiltonian_elec_base_t), intent(inout) :: this
194
195    PUSH_SUB(hamiltonian_elec_base_end)
196
197    if(allocated(this%potential) .and. accel_is_enabled()) then
198      call accel_release_buffer(this%potential_opencl)
199    end if
200
201    SAFE_DEALLOCATE_A(this%potential)
202    SAFE_DEALLOCATE_A(this%Impotential)
203    SAFE_DEALLOCATE_A(this%vector_potential)
204    SAFE_DEALLOCATE_A(this%uniform_vector_potential)
205    SAFE_DEALLOCATE_A(this%uniform_magnetic_field)
206    call hamiltonian_elec_base_destroy_proj(this)
207
208    nullify(this%spin)
209
210    POP_SUB(hamiltonian_elec_base_end)
211  end subroutine hamiltonian_elec_base_end
212
213  ! ----------------------------------------------------------
214  !
215  !> This functions sets to zero all fields that are currently
216  !! allocated.
217  !
218  subroutine hamiltonian_elec_base_clear(this)
219    type(hamiltonian_elec_base_t), intent(inout) :: this
220
221    PUSH_SUB(hamiltonian_elec_clear)
222
223    if(allocated(this%potential))                this%potential = M_ZERO
224    if(allocated(this%Impotential))              this%Impotential = M_ZERO
225    if(allocated(this%uniform_vector_potential)) this%uniform_vector_potential = M_ZERO
226    if(allocated(this%vector_potential))         this%vector_potential = M_ZERO
227    if(allocated(this%uniform_magnetic_field))   this%uniform_magnetic_field = M_ZERO
228
229    POP_SUB(hamiltonian_elec_clear)
230  end subroutine hamiltonian_elec_base_clear
231
232
233  ! ---------------------------------------------------------------
234  !> This function ensures that the corresponding field is allocated.
235  subroutine hamiltonian_elec_base_allocate(this, mesh, field, complex_potential)
236    type(hamiltonian_elec_base_t), intent(inout) :: this
237    type(mesh_t),             intent(in)    :: mesh
238    integer,                  intent(in)    :: field
239    logical,                  intent(in)    :: complex_potential
240
241    PUSH_SUB(hamiltonian_elec_base_allocate)
242
243    if(bitand(FIELD_POTENTIAL, field) /= 0) then
244      if(.not. allocated(this%potential)) then
245        SAFE_ALLOCATE(this%potential(1:mesh%np, 1:this%nspin))
246        this%potential = M_ZERO
247        if(complex_potential) then
248          SAFE_ALLOCATE(this%Impotential(1:mesh%np, 1:this%nspin))
249          this%Impotential = M_ZERO
250        end if
251        if(accel_is_enabled()) then
252          call accel_create_buffer(this%potential_opencl, ACCEL_MEM_READ_ONLY, TYPE_FLOAT, accel_padded_size(mesh%np)*this%nspin)
253        end if
254      end if
255    end if
256
257    if(bitand(FIELD_UNIFORM_VECTOR_POTENTIAL, field) /= 0) then
258      if(.not. allocated(this%uniform_vector_potential)) then
259        SAFE_ALLOCATE(this%uniform_vector_potential(1:mesh%sb%dim))
260        this%uniform_vector_potential = M_ZERO
261      end if
262    end if
263
264    if(bitand(FIELD_VECTOR_POTENTIAL, field) /= 0) then
265      if(.not. allocated(this%vector_potential)) then
266        SAFE_ALLOCATE(this%vector_potential(1:mesh%sb%dim, 1:mesh%np))
267        this%vector_potential = M_ZERO
268      end if
269    end if
270
271    if(bitand(FIELD_UNIFORM_MAGNETIC_FIELD, field) /= 0) then
272      if(.not. allocated(this%uniform_magnetic_field)) then
273        SAFE_ALLOCATE(this%uniform_magnetic_field(1:max(mesh%sb%dim, 3)))
274        this%uniform_magnetic_field = M_ZERO
275      end if
276    end if
277
278    POP_SUB(hamiltonian_elec_base_allocate)
279  end subroutine hamiltonian_elec_base_allocate
280
281  ! ----------------------------------------------------------
282  !
283  !> If both a uniform and non-uniform vector potentials are allocated,
284  !! this function copies the uniform in the non-uniform one. In the
285  !! future it may perform other internal consistency operations.
286  !
287  subroutine hamiltonian_elec_base_update(this, mesh)
288    type(hamiltonian_elec_base_t), intent(inout) :: this
289    type(mesh_t),             intent(in)    :: mesh
290
291    integer :: idir, ip
292
293    PUSH_SUB(hamiltonian_elec_base_update)
294
295    if(allocated(this%uniform_vector_potential) .and. allocated(this%vector_potential)) then
296      ! copy the uniform vector potential onto the non-uniform one
297      do idir = 1, mesh%sb%dim
298        !$omp parallel do schedule(static)
299        do ip = 1, mesh%np
300          this%vector_potential(idir, ip) = &
301            this%vector_potential(idir, ip) + this%uniform_vector_potential(idir)
302        end do
303      end do
304      SAFE_DEALLOCATE_A(this%uniform_vector_potential)
305    end if
306
307    POP_SUB(hamiltonian_elec_base_update)
308  end subroutine hamiltonian_elec_base_update
309
310
311  !--------------------------------------------------------
312
313  subroutine hamiltonian_elec_base_accel_copy_pot(this, mesh)
314    type(hamiltonian_elec_base_t), intent(inout) :: this
315    type(mesh_t),             intent(in)    :: mesh
316
317    integer :: offset, ispin
318
319    PUSH_SUB(hamiltonian_elec_base_accel_copy_pot)
320
321    if(allocated(this%potential) .and. accel_is_enabled()) then
322      offset = 0
323      do ispin = 1, this%nspin
324        call accel_write_buffer(this%potential_opencl, mesh%np, this%potential(:, ispin), offset = offset)
325        offset = offset + accel_padded_size(mesh%np)
326      end do
327    end if
328
329    POP_SUB(hamiltonian_elec_base_accel_copy_pot)
330  end subroutine hamiltonian_elec_base_accel_copy_pot
331
332
333  !--------------------------------------------------------
334
335  subroutine hamiltonian_elec_base_destroy_proj(this)
336    type(hamiltonian_elec_base_t), intent(inout) :: this
337
338    integer :: iproj
339
340    PUSH_SUB(hamiltonian_elec_base_destroy_proj)
341
342    if(allocated(this%projector_matrices)) then
343
344      if(accel_is_enabled()) then
345        call accel_release_buffer(this%buff_offsets)
346        call accel_release_buffer(this%buff_matrices)
347        call accel_release_buffer(this%buff_maps)
348        call accel_release_buffer(this%buff_scals)
349        call accel_release_buffer(this%buff_position)
350        call accel_release_buffer(this%buff_pos)
351        call accel_release_buffer(this%buff_invmap)
352        if(this%projector_mix) call accel_release_buffer(this%buff_mix)
353        if(allocated(this%projector_phases)) call accel_release_buffer(this%buff_projector_phases)
354      end if
355
356      do iproj = 1, this%nprojector_matrices
357        call projector_matrix_deallocate(this%projector_matrices(iproj))
358      end do
359      SAFE_DEALLOCATE_A(this%regions)
360      SAFE_DEALLOCATE_A(this%projector_matrices)
361      SAFE_DEALLOCATE_A(this%projector_phases)
362      SAFE_DEALLOCATE_A(this%projector_to_atom)
363    end if
364
365    POP_SUB(hamiltonian_elec_base_destroy_proj)
366  end subroutine hamiltonian_elec_base_destroy_proj
367
368  !-----------------------------------------------------------------
369
370  subroutine hamiltonian_elec_base_build_proj(this, mesh, epot)
371    type(hamiltonian_elec_base_t), target, intent(inout) :: this
372    type(mesh_t),                     intent(in)    :: mesh
373    type(epot_t),             target, intent(in)    :: epot
374
375    integer :: iatom, iproj, ll, lmax, lloc, mm, ic, jc
376    integer :: nmat, imat, ip, iorder
377    integer :: nregion, jatom, katom, iregion
378    integer, allocatable :: order(:), head(:), region_count(:)
379    logical, allocatable :: atom_counted(:)
380    logical :: overlap
381    type(projector_matrix_t), pointer :: pmat
382    type(kb_projector_t),     pointer :: kb_p
383    type(hgh_projector_t),    pointer :: hgh_p
384    type(profile_t), save :: color_prof
385
386    PUSH_SUB(hamiltonian_elec_base_build_proj)
387
388    call profiling_in(color_prof, "ATOM_COLORING")
389
390    ! this is most likely a very inefficient algorithm, O(natom**2) or
391    ! O(natom**3), probably it should be replaced by something better.
392
393    SAFE_ALLOCATE(order(1:epot%natoms))
394    SAFE_ALLOCATE(head(1:epot%natoms + 1))
395    SAFE_ALLOCATE(region_count(1:epot%natoms))
396    SAFE_ALLOCATE(atom_counted(1:epot%natoms))
397
398    this%projector_self_overlap = .false.
399    atom_counted = .false.
400    order = -1
401
402    head(1) = 1
403    nregion = 0
404    do
405      nregion = nregion + 1
406      ASSERT(nregion <= epot%natoms)
407
408      region_count(nregion) = 0
409
410      do iatom = 1, epot%natoms
411        if(atom_counted(iatom)) cycle
412
413        overlap = .false.
414
415        if(.not. projector_is(epot%proj(iatom), PROJ_NONE)) then
416          ASSERT(associated(epot%proj(iatom)%sphere%mesh))
417          do jatom = 1, region_count(nregion)
418            katom = order(head(nregion) + jatom - 1)
419            if(projector_is(epot%proj(katom), PROJ_NONE)) cycle
420            overlap = submesh_overlap(epot%proj(iatom)%sphere, epot%proj(katom)%sphere)
421            if(overlap) exit
422          end do
423        end if
424
425        if(.not. overlap) then
426          INCR(region_count(nregion), 1)
427          order(head(nregion) - 1 + region_count(nregion)) = iatom
428          atom_counted(iatom) = .true.
429        end if
430
431      end do
432
433      head(nregion + 1) = head(nregion) + region_count(nregion)
434
435      if(all(atom_counted)) exit
436    end do
437
438    SAFE_DEALLOCATE_A(atom_counted)
439    SAFE_DEALLOCATE_A(region_count)
440
441    if(debug%info) then
442      call messages_write('The atoms can be separated in ')
443      call messages_write(nregion)
444      call messages_write(' non-overlapping groups.')
445      call messages_info()
446    end if
447
448    do iregion = 1, nregion
449      do iatom = head(iregion), head(iregion + 1) - 1
450        if(.not. projector_is(epot%proj(order(iatom)), PROJ_KB)) cycle
451        do jatom = head(iregion), iatom - 1
452          if(.not. projector_is(epot%proj(order(jatom)), PROJ_KB)) cycle
453          ASSERT(.not. submesh_overlap(epot%proj(order(iatom))%sphere, epot%proj(order(jatom))%sphere))
454        end do
455      end do
456    end do
457
458    call profiling_out(color_prof)
459
460    ! deallocate previous projectors
461    call hamiltonian_elec_base_destroy_proj(this)
462
463    ! count projectors
464    this%nprojector_matrices = 0
465    this%apply_projector_matrices = .false.
466    this%has_non_local_potential = .false.
467    this%nregions = nregion
468
469    !We determine if we have only local potential or not.
470    do iorder = 1, epot%natoms
471      iatom = order(iorder)
472
473      if(.not. projector_is_null(epot%proj(iatom))) then
474        this%has_non_local_potential = .true.
475        exit
476      end if
477    end do
478
479    do iorder = 1, epot%natoms
480      iatom = order(iorder)
481
482      if(projector_is(epot%proj(iatom), PROJ_KB) .or. projector_is(epot%proj(iatom), PROJ_HGH)) then
483        INCR(this%nprojector_matrices, 1)
484        this%apply_projector_matrices = .true.
485        !The HGH pseudopotentials are now supporting the SOC
486        if(epot%reltype /= NOREL .and. &
487             (.not. projector_is(epot%proj(iatom), PROJ_HGH) .or. accel_is_enabled())) then
488          this%apply_projector_matrices = .false.
489          exit
490        end if
491      else if(.not. projector_is_null(epot%proj(iatom))) then
492        this%apply_projector_matrices = .false.
493        exit
494      end if
495    end do
496
497    if(mesh%use_curvilinear)  this%apply_projector_matrices = .false.
498
499    if(.not. this%apply_projector_matrices) then
500      SAFE_DEALLOCATE_A(order)
501      SAFE_DEALLOCATE_A(head)
502
503      POP_SUB(hamiltonian_elec_base_build_proj)
504      return
505    end if
506
507
508    SAFE_ALLOCATE(this%projector_matrices(1:this%nprojector_matrices))
509    SAFE_ALLOCATE(this%regions(1:this%nprojector_matrices + 1))
510    SAFE_ALLOCATE(this%projector_to_atom(1:epot%natoms))
511
512    this%full_projection_size = 0
513    this%regions(this%nregions + 1) = this%nprojector_matrices + 1
514
515    this%projector_mix = .false.
516
517    iproj = 0
518    do iregion = 1, this%nregions
519      this%regions(iregion) = iproj + 1
520      do iorder = head(iregion), head(iregion + 1) - 1
521
522        iatom = order(iorder)
523
524        if(projector_is(epot%proj(iatom), PROJ_NONE)) cycle
525
526        INCR(iproj, 1)
527
528        pmat => this%projector_matrices(iproj)
529
530        this%projector_to_atom(iproj) = iatom
531
532        lmax = epot%proj(iatom)%lmax
533        lloc = epot%proj(iatom)%lloc
534
535        if(projector_is(epot%proj(iatom), PROJ_KB)) then
536
537          ! count the number of projectors for this matrix
538          nmat = 0
539          do ll = 0, lmax
540            if (ll == lloc) cycle
541            do mm = -ll, ll
542              INCR(nmat, epot%proj(iatom)%kb_p(ll, mm)%n_c)
543            end do
544          end do
545
546          call projector_matrix_allocate(pmat, epot%proj(iatom)%sphere%np, nmat, has_mix_matrix = .false.)
547
548          ! generate the matrix
549          pmat%dprojectors = M_ZERO
550          imat = 1
551          do ll = 0, lmax
552            if (ll == lloc) cycle
553            do mm = -ll, ll
554              kb_p =>  epot%proj(iatom)%kb_p(ll, mm)
555              do ic = 1, kb_p%n_c
556                do ip = 1, pmat%npoints
557                  pmat%dprojectors(ip, imat) = kb_p%p(ip, ic)
558                end do
559                pmat%scal(imat) = kb_p%e(ic)*mesh%vol_pp(1)
560                INCR(imat, 1)
561              end do
562            end do
563          end do
564
565          this%projector_self_overlap = this%projector_self_overlap .or. epot%proj(iatom)%sphere%overlap
566
567        else if(projector_is(epot%proj(iatom), PROJ_HGH)) then
568
569          this%projector_mix = .true.
570
571          ! count the number of projectors for this matrix
572          nmat = 0
573          do ll = 0, lmax
574            if (ll == lloc) cycle
575            do mm = -ll, ll
576              nmat = nmat + 3
577            end do
578          end do
579
580          call projector_matrix_allocate(pmat, epot%proj(iatom)%sphere%np, nmat, has_mix_matrix = .true., &
581                                            is_cmplx = (epot%reltype == SPIN_ORBIT) )
582
583          ! generate the matrix
584          if(epot%reltype == SPIN_ORBIT) then
585            pmat%zprojectors = M_ZERO
586            pmat%zmix = M_ZERO
587          else
588            pmat%dprojectors = M_ZERO
589            pmat%dmix = M_ZERO
590          end if
591
592          imat = 1
593          do ll = 0, lmax
594            if (ll == lloc) cycle
595            do mm = -ll, ll
596              hgh_p =>  epot%proj(iatom)%hgh_p(ll, mm)
597
598              ! HGH pseudos mix different components, so we need to
599              ! generate a matrix that mixes the projections
600              if(epot%reltype == SPIN_ORBIT) then
601                do ic = 1, 3
602                  do jc = 1, 3
603                    pmat%zmix(imat - 1 + ic, imat - 1 + jc, 1) = hgh_p%h(ic, jc) + M_HALF*mm*hgh_p%k(ic, jc)
604                    pmat%zmix(imat - 1 + ic, imat - 1 + jc, 2) = hgh_p%h(ic, jc) - M_HALF*mm*hgh_p%k(ic, jc)
605
606                    if(mm < ll) then
607                      pmat%zmix(imat - 1 + ic, imat + 3 - 1 + jc, 3) = M_HALF*hgh_p%k(ic, jc)*sqrt(TOFLOAT(ll*(ll+1)-mm*(mm+1)))
608                    end if
609
610                    if(-mm < ll) then
611                      pmat%zmix(imat - 1 + ic, imat - 3 - 1 + jc, 4) = M_HALF*hgh_p%k(ic, jc)*sqrt(TOFLOAT(ll*(ll+1)-mm*(mm-1)))
612                    end if
613                  end do
614                end do
615              else
616                do ic = 1, 3
617                  do jc = 1, 3
618                    pmat%dmix(imat - 1 + ic, imat - 1 + jc) = hgh_p%h(ic, jc)
619                  end do
620                end do
621              end if
622
623              do ic = 1, 3
624                if(epot%reltype == SPIN_ORBIT) then
625                  do ip = 1, pmat%npoints
626                    pmat%zprojectors(ip, imat) = hgh_p%zp(ip, ic)
627                  end do
628                else
629                  do ip = 1, pmat%npoints
630                    pmat%dprojectors(ip, imat) = hgh_p%dp(ip, ic)
631                  end do
632                end if
633                pmat%scal(imat) = mesh%volume_element
634                INCR(imat, 1)
635              end do
636
637            end do
638          end do
639
640          this%projector_self_overlap = this%projector_self_overlap .or. epot%proj(iatom)%sphere%overlap
641
642        else
643          cycle
644        end if
645
646        do ip = 1, pmat%npoints
647          pmat%map(ip) = epot%proj(iatom)%sphere%map(ip)
648          pmat%position(1:3, ip) = epot%proj(iatom)%sphere%x(ip, 1:3)
649        end do
650
651        INCR(this%full_projection_size, pmat%nprojs)
652
653      end do
654    end do
655
656#ifdef HAVE_MPI
657    if(mesh%parallel_in_domains) then
658      call MPI_Allreduce(MPI_IN_PLACE, this%projector_self_overlap, 1, MPI_LOGICAL, MPI_LOR, mesh%mpi_grp%comm, mpi_err)
659    end if
660#endif
661
662    SAFE_DEALLOCATE_A(order)
663    SAFE_DEALLOCATE_A(head)
664
665!    do iregion = 1, this%nregions
666!      print*, iregion, this%regions(iregion), this%regions(iregion + 1) - 1
667!    end do
668
669    this%total_points = 0
670    this%max_npoints = 0
671    this%max_nprojs = 0
672    do imat = 1, this%nprojector_matrices
673      pmat => this%projector_matrices(imat)
674
675      this%max_npoints = max(this%max_npoints, pmat%npoints)
676      this%max_nprojs = max(this%max_nprojs, pmat%nprojs)
677      INCR(this%total_points, pmat%npoints)
678    end do
679
680    if(accel_is_enabled()) call build_opencl()
681
682    POP_SUB(hamiltonian_elec_base_build_proj)
683
684  contains
685
686    subroutine build_opencl()
687      integer              :: matrix_size, scal_size
688      integer, allocatable :: cnt(:), invmap(:, :), invmap2(:), pos(:)
689      integer, allocatable :: offsets(:, :)
690      integer, parameter   :: OFFSET_SIZE = 6 ! also defined in share/opencl/projectors.cl
691      integer, parameter   :: POINTS = 1, PROJS = 2, MATRIX = 3, MAP = 4, SCAL = 5, MIX = 6 ! update OFFSET_SIZE
692      integer              :: ip, is, ii, ipos, mix_offset
693
694      PUSH_SUB(hamiltonian_elec_base_build_proj.build_opencl)
695
696      SAFE_ALLOCATE(offsets(1:OFFSET_SIZE, 1:this%nprojector_matrices))
697      SAFE_ALLOCATE(cnt(1:mesh%np))
698
699      cnt = 0
700
701      ! Here we construct the offsets for accessing various arrays within the GPU kernels.
702      ! The offset(:,:) array contains a number of sizes and offsets, describing how to address the arrays.
703      ! This allows to transfer all these number to the GPU in one memory transfer.
704      !
705      ! For each projection matrix (addressed by imap), we have:
706      !
707      ! offset(POINTS, imap) : number of points of the sphere imap
708      ! offset(PROJS, imap)  : number of projectors for imap
709      ! offset(MATRIX, imap) : address offset: cumulative of pmat%npoints * pmat%nprojs
710      ! offset(MAP, imap)    : address offset: cumulative of pmat%npoints for each imap
711      ! offset(SCAL, imap)   : address_offset: cumulative of pmat%nprojs
712      ! offset(MIX, imap)    : address_offset: cumulative of pmat%nprojs**2
713
714      ! first we count
715      matrix_size = 0
716      this%total_points = 0
717      scal_size = 0
718      this%max_npoints = 0
719      this%max_nprojs = 0
720      mix_offset = 0
721      do imat = 1, this%nprojector_matrices
722        pmat => this%projector_matrices(imat)
723
724        this%max_npoints = max(this%max_npoints, pmat%npoints)
725        this%max_nprojs = max(this%max_nprojs, pmat%nprojs)
726
727        offsets(POINTS, imat) = pmat%npoints
728        offsets(PROJS, imat) = pmat%nprojs
729
730        offsets(MATRIX, imat) = matrix_size
731        INCR(matrix_size, pmat%npoints*pmat%nprojs)
732
733        offsets(MAP, imat) = this%total_points
734        INCR(this%total_points, pmat%npoints)
735
736        offsets(SCAL, imat) = scal_size
737        INCR(scal_size, pmat%nprojs)
738
739        if(allocated(pmat%dmix) .or. allocated(pmat%zmix)) then
740          offsets(MIX, imat) = mix_offset
741          INCR(mix_offset, pmat%nprojs**2)
742        else
743          offsets(MIX, imat) = -1
744        end if
745
746        do is = 1, pmat%npoints
747          ip = pmat%map(is)
748          INCR(cnt(ip), 1)
749        end do
750      end do
751
752      SAFE_ALLOCATE(invmap(1:maxval(cnt), 1:mesh%np))
753      SAFE_ALLOCATE(invmap2(1:maxval(cnt)*mesh%np))
754      SAFE_ALLOCATE(pos(1:mesh%np + 1))
755
756      cnt = 0
757      ii = 0
758      do imat = 1, this%nprojector_matrices
759        pmat => this%projector_matrices(imat)
760        do is = 1, pmat%npoints
761          ip = pmat%map(is)
762          INCR(cnt(ip), 1)
763          invmap(cnt(ip), ip) = ii
764          INCR(ii, 1)
765        end do
766      end do
767
768      ipos = 0
769      pos(1) = 0
770      do ip = 1, mesh%np
771        do ii = 1, cnt(ip)
772          INCR(ipos, 1)
773          invmap2(ipos) = invmap(ii, ip)
774        end do
775        pos(ip + 1) = ipos
776      end do
777
778      ! allocate
779      call accel_create_buffer(this%buff_matrices, ACCEL_MEM_READ_ONLY, TYPE_FLOAT, matrix_size)
780      call accel_create_buffer(this%buff_maps, ACCEL_MEM_READ_ONLY, TYPE_INTEGER, this%total_points)
781      call accel_create_buffer(this%buff_position, ACCEL_MEM_READ_ONLY, TYPE_FLOAT, 3*this%total_points)
782      call accel_create_buffer(this%buff_scals, ACCEL_MEM_READ_ONLY, TYPE_FLOAT, scal_size)
783      if(mix_offset > 0) call accel_create_buffer(this%buff_mix, ACCEL_MEM_READ_ONLY, TYPE_FLOAT, mix_offset)
784
785      ! now copy
786      do imat = 1, this%nprojector_matrices
787        pmat => this%projector_matrices(imat)
788        ! in parallel some spheres might not have points
789        if(pmat%npoints > 0) then
790          call accel_write_buffer(this%buff_matrices, pmat%nprojs*pmat%npoints, pmat%dprojectors, offset = offsets(MATRIX, imat))
791          call accel_write_buffer(this%buff_maps, pmat%npoints, pmat%map, offset = offsets(MAP, imat))
792          call accel_write_buffer(this%buff_position, 3*pmat%npoints, pmat%position, offset = 3*offsets(MAP, imat))
793        end if
794        call accel_write_buffer(this%buff_scals, pmat%nprojs, pmat%scal, offset = offsets(SCAL, imat))
795        if(offsets(MIX, imat) /= -1) call accel_write_buffer(this%buff_mix, pmat%nprojs**2, pmat%dmix, offset = offsets(MIX, imat))
796      end do
797
798      ! write the offsets
799      call accel_create_buffer(this%buff_offsets, ACCEL_MEM_READ_ONLY, TYPE_INTEGER, OFFSET_SIZE*this%nprojector_matrices)
800      call accel_write_buffer(this%buff_offsets, OFFSET_SIZE*this%nprojector_matrices, offsets)
801
802      ! the inverse map
803      call accel_create_buffer(this%buff_pos, ACCEL_MEM_READ_ONLY, TYPE_INTEGER, mesh%np + 1)
804      call accel_write_buffer(this%buff_pos, mesh%np + 1, pos)
805
806      call accel_create_buffer(this%buff_invmap, ACCEL_MEM_READ_ONLY, TYPE_INTEGER, ipos)
807      call accel_write_buffer(this%buff_invmap, ipos, invmap2)
808
809      SAFE_DEALLOCATE_A(offsets)
810      SAFE_DEALLOCATE_A(cnt)
811      SAFE_DEALLOCATE_A(invmap)
812      SAFE_DEALLOCATE_A(invmap2)
813      SAFE_DEALLOCATE_A(pos)
814
815      POP_SUB(hamiltonian_elec_base_build_proj.build_opencl)
816    end subroutine build_opencl
817
818  end subroutine hamiltonian_elec_base_build_proj
819
820  ! ----------------------------------------------------------------------------------
821
822  logical pure function hamiltonian_elec_base_has_magnetic(this) result(has_magnetic)
823    type(hamiltonian_elec_base_t), intent(in) :: this
824
825    has_magnetic = allocated(this%vector_potential) &
826      .or. allocated(this%uniform_magnetic_field)
827
828  end function hamiltonian_elec_base_has_magnetic
829
830  ! ----------------------------------------------------------------------------------
831
832  logical pure function hamiltonian_elec_base_projector_self_overlap(this) result(projector_self_overlap)
833    type(hamiltonian_elec_base_t), intent(in) :: this
834
835    projector_self_overlap = this%projector_self_overlap
836  end function hamiltonian_elec_base_projector_self_overlap
837
838 ! ----------------------------------------------------------------------------------
839
840  subroutine hamiltonian_elec_base_set_phase_corr(hm_base, mesh, psib)
841    type(hamiltonian_elec_base_t), intent(in) :: hm_base
842    type(mesh_t),                  intent(in) :: mesh
843    type(wfs_elec_t),           intent(inout) :: psib
844
845    logical :: phase_correction
846
847    PUSH_SUB(hamiltonian_elec_base_set_phase_corr)
848
849    ! check if we only want a phase correction for the boundary points
850    phase_correction = .false.
851    if(associated(hm_base%phase)) phase_correction = .true.
852
853    !We apply the phase only to np points, and the phase for the np+1 to np_part points
854    !will be treated as a phase correction in the Hamiltonian
855    if(phase_correction) then
856      call zhamiltonian_elec_base_phase(hm_base, mesh, mesh%np, .false., psib)
857    end if
858
859    POP_SUB(hamiltonian_elec_base_set_phase_corr)
860
861  end subroutine hamiltonian_elec_base_set_phase_corr
862
863 ! ----------------------------------------------------------------------------------
864
865  subroutine hamiltonian_elec_base_unset_phase_corr(hm_base, mesh, psib)
866    type(hamiltonian_elec_base_t), intent(in) :: hm_base
867    type(mesh_t),                  intent(in) :: mesh
868    type(wfs_elec_t),           intent(inout) :: psib
869
870    logical :: phase_correction
871
872    PUSH_SUB(hamiltonian_elec_base_unset_phase_corr)
873
874    ! check if we only want a phase correction for the boundary points
875    phase_correction = .false.
876    if(associated(hm_base%phase)) phase_correction = .true.
877
878    !We apply the phase only to np points, and the phase for the np+1 to np_part points
879    !will be treated as a phase correction in the Hamiltonian
880    if(phase_correction) then
881      call zhamiltonian_elec_base_phase(hm_base, mesh, mesh%np, .true., psib)
882    end if
883
884    POP_SUB(hamiltonian_elec_base_unset_phase_corr)
885
886  end subroutine hamiltonian_elec_base_unset_phase_corr
887
888
889#include "undef.F90"
890#include "real.F90"
891#include "hamiltonian_elec_base_inc.F90"
892
893#include "undef.F90"
894#include "complex.F90"
895#include "hamiltonian_elec_base_inc.F90"
896
897end module hamiltonian_elec_base_oct_m
898
899!! Local Variables:
900!! mode: f90
901!! coding: utf-8
902!! End:
903