1!! Copyright (C) 2002-2006 M. Marques, A. Castro, A. Rubio, G. Bertsch
2!!
3!! This program is free software; you can redistribute it and/or modify
4!! it under the terms of the GNU General Public License as published by
5!! the Free Software Foundation; either version 2, or (at your option)
6!! any later version.
7!!
8!! This program is distributed in the hope that it will be useful,
9!! but WITHOUT ANY WARRANTY; without even the implied warranty of
10!! MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
11!! GNU General Public License for more details.
12!!
13!! You should have received a copy of the GNU General Public License
14!! along with this program; if not, write to the Free Software
15!! Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
16!! 02110-1301, USA.
17!!
18#include "global.h"
19
20module states_elec_oct_m
21  use accel_oct_m
22  use blacs_proc_grid_oct_m
23  use boundaries_oct_m
24  use calc_mode_par_oct_m
25  use comm_oct_m
26  use batch_oct_m
27  use batch_ops_oct_m
28  use derivatives_oct_m
29  use distributed_oct_m
30  use geometry_oct_m
31  use global_oct_m
32  use grid_oct_m
33  use kpoints_oct_m
34  use loct_oct_m
35  use loct_pointer_oct_m
36  use math_oct_m
37  use mesh_oct_m
38  use mesh_function_oct_m
39  use messages_oct_m
40  use modelmb_particles_oct_m
41  use mpi_oct_m
42  use multicomm_oct_m
43  use namespace_oct_m
44#ifdef HAVE_OPENMP
45  use omp_lib
46#endif
47  use parser_oct_m
48  use profiling_oct_m
49  use restart_oct_m
50  use simul_box_oct_m
51  use smear_oct_m
52  use states_abst_oct_m
53  use states_elec_group_oct_m
54  use states_elec_dim_oct_m
55  use symmetrizer_oct_m
56  use types_oct_m
57  use unit_oct_m
58  use unit_system_oct_m
59  use varinfo_oct_m
60  use wfs_elec_oct_m
61
62  implicit none
63
64  private
65
66  public ::                           &
67    states_elec_t,                         &
68    states_elec_init,                      &
69    states_elec_look,                      &
70    states_elec_densities_init,            &
71    states_elec_exec_init,                 &
72    states_elec_allocate_wfns,             &
73    states_elec_allocate_current,          &
74    states_elec_deallocate_wfns,           &
75    states_elec_null,                      &
76    states_elec_end,                       &
77    states_elec_copy,                      &
78    states_elec_generate_random,           &
79    states_elec_fermi,                     &
80    states_elec_eigenvalues_sum,           &
81    states_elec_calc_quantities,           &
82    state_is_local,                        &
83    state_kpt_is_local,                    &
84    states_elec_distribute_nodes,          &
85    states_elec_wfns_memory,               &
86    states_elec_get_state,                 &
87    states_elec_set_state,                 &
88    states_elec_get_points,                &
89    states_elec_block_min,                 &
90    states_elec_block_max,                 &
91    states_elec_block_size,                &
92    states_elec_count_pairs,               &
93    occupied_states,                       &
94    states_elec_set_phase
95
96  type, extends(states_abst_t) :: states_elec_t
97    ! Components are public by default
98    type(states_elec_dim_t)  :: d
99    integer                  :: nst_conv              !< Number of states to be converged for unocc calc.
100
101    logical                  :: only_userdef_istates  !< only use user-defined states as initial states in propagation
102
103    type(states_elec_group_t)     :: group
104
105    !> used for the user-defined wavefunctions (they are stored as formula strings)
106    !! (st%d%dim, st%nst, st%d%nik)
107    character(len=1024), allocatable :: user_def_states(:,:,:)
108
109    !> the densities and currents (after all we are doing DFT :)
110    FLOAT, pointer :: rho(:,:)         !< rho(gr%mesh%np_part, st%d%nspin)
111    FLOAT, pointer :: current(:, :, :) !<   current(gr%mesh%np_part, gr%sb%dim, st%d%nspin)
112
113    !> k-point resolved current
114    FLOAT, pointer :: current_kpt(:,:,:) !< current(gr%mesh%np gr%sb%dim, kpt_start:kpt_end)
115
116
117    FLOAT, pointer :: rho_core(:)      !< core charge for nl core corrections
118
119    !> It may be required to "freeze" the deepest orbitals during the evolution; the density
120    !! of these orbitals is kept in frozen_rho. It is different from rho_core.
121    FLOAT, pointer :: frozen_rho(:, :)
122    FLOAT, pointer :: frozen_tau(:, :)
123    FLOAT, pointer :: frozen_gdens(:,:,:)
124    FLOAT, pointer :: frozen_ldens(:,:)
125
126    logical        :: calc_eigenval
127    logical        :: uniform_occ   !< .true. if occupations are equal for all states: no empty states, and no smearing
128
129    FLOAT, pointer :: eigenval(:,:) !< obviously the eigenvalues
130    logical        :: fixed_occ     !< should the occupation numbers be fixed?
131    logical        :: restart_fixed_occ !< should the occupation numbers be fixed by restart?
132    logical        :: restart_reorder_occs !< used for restart with altered occupation numbers
133    FLOAT, pointer :: occ(:,:)      !< the occupation numbers
134    logical, private :: fixed_spins   !< In spinors mode, the spin direction is set
135                                    !< for the initial (random) orbitals.
136    FLOAT, pointer :: spin(:, :, :)
137
138    FLOAT          :: qtot          !< (-) The total charge in the system (used in Fermi)
139    FLOAT          :: val_charge    !< valence charge
140
141    logical        :: fromScratch
142    type(smear_t)  :: smear         ! smearing of the electronic occupations
143
144    type(modelmb_particle_t) :: modelmbparticles
145    integer,pointer:: mmb_nspindown(:,:) !< number of down spins in the selected Young diagram for each type and state
146    integer,pointer:: mmb_iyoung(:,:)    !< index of the selected Young diagram for each type and state
147    FLOAT, pointer :: mmb_proj(:)        !< projection of the state onto the chosen Young diagram
148
149    !> This is stuff needed for the parallelization in states.
150    logical                     :: parallel_in_states !< Am I parallel in states?
151    type(mpi_grp_t)             :: mpi_grp            !< The MPI group related to the parallelization in states.
152    type(mpi_grp_t)             :: dom_st_mpi_grp     !< The MPI group related to the domains-states "plane".
153    type(mpi_grp_t)             :: st_kpt_mpi_grp     !< The MPI group related to the states-kpoints "plane".
154    type(mpi_grp_t)             :: dom_st_kpt_mpi_grp !< The MPI group related to the domains-states-kpoints "cube".
155#ifdef HAVE_SCALAPACK
156    type(blacs_proc_grid_t)     :: dom_st_proc_grid   !< The BLACS process grid for the domains-states plane
157#endif
158    type(distributed_t)         :: dist
159    logical                     :: scalapack_compatible !< Whether the states parallelization uses ScaLAPACK layout
160    integer                     :: lnst               !< Number of states on local node.
161    integer                     :: st_start, st_end   !< Range of states processed by local node.
162    integer, pointer            :: node(:)            !< To which node belongs each state.
163    type(multicomm_all_pairs_t), private :: ap        !< All-pairs schedule.
164
165    logical                     :: symmetrize_density
166
167    integer                     :: randomization      !< Method used to generate random states
168
169    contains
170      procedure :: nullify => states_elec_null
171      procedure :: write_info => states_elec_write_info
172      procedure :: pack => states_elec_pack
173      procedure :: unpack => states_elec_unpack
174      procedure :: set_zero => states_elec_set_zero
175  end type states_elec_t
176
177  !> Method used to generate random states
178  integer, public, parameter :: &
179    PAR_INDEPENDENT = 1,              &
180    PAR_DEPENDENT   = 2
181
182
183  interface states_elec_get_state
184    module procedure dstates_elec_get_state1, zstates_elec_get_state1, dstates_elec_get_state2, zstates_elec_get_state2
185    module procedure dstates_elec_get_state3, zstates_elec_get_state3, dstates_elec_get_state4, zstates_elec_get_state4
186  end interface states_elec_get_state
187
188  interface states_elec_set_state
189    module procedure dstates_elec_set_state1, zstates_elec_set_state1, dstates_elec_set_state2, zstates_elec_set_state2
190    module procedure dstates_elec_set_state3, zstates_elec_set_state3, dstates_elec_set_state4, zstates_elec_set_state4
191  end interface states_elec_set_state
192
193  interface states_elec_get_points
194    module procedure dstates_elec_get_points1, zstates_elec_get_points1, dstates_elec_get_points2, zstates_elec_get_points2
195  end interface states_elec_get_points
196
197contains
198
199  ! ---------------------------------------------------------
200  subroutine states_elec_null(st)
201    class(states_elec_t), intent(inout) :: st
202
203    PUSH_SUB(states_elec_null)
204
205    call states_elec_dim_null(st%d)
206    call states_elec_group_null(st%group)
207    call distributed_nullify(st%dist)
208
209    st%d%orth_method = 0
210    call modelmb_particles_nullify(st%modelmbparticles)
211    nullify(st%mmb_nspindown)
212    nullify(st%mmb_iyoung)
213    nullify(st%mmb_proj)
214
215    st%wfs_type = TYPE_FLOAT ! By default, calculations use real wavefunctions
216
217    nullify(st%rho, st%current)
218    nullify(st%current_kpt)
219    nullify(st%rho_core, st%frozen_rho)
220    nullify(st%frozen_tau, st%frozen_gdens, st%frozen_ldens)
221    nullify(st%eigenval, st%occ, st%spin)
222
223    st%parallel_in_states = .false.
224#ifdef HAVE_SCALAPACK
225    call blacs_proc_grid_nullify(st%dom_st_proc_grid)
226#endif
227    nullify(st%node)
228    nullify(st%ap%schedule)
229
230    st%packed = .false.
231
232    POP_SUB(states_elec_null)
233  end subroutine states_elec_null
234
235
236  ! ---------------------------------------------------------
237  subroutine states_elec_init(st, namespace, gr, geo)
238    type(states_elec_t), target, intent(inout) :: st
239    type(namespace_t), target,   intent(in)    :: namespace
240    type(grid_t),                intent(in)    :: gr
241    type(geometry_t),            intent(in)    :: geo
242
243    FLOAT :: excess_charge
244    integer :: nempty, ntot, default, nthreads
245    integer :: nempty_conv
246    logical :: force
247
248    PUSH_SUB(states_elec_init)
249
250    st%fromScratch = .true. ! this will be reset if restart_read is called
251    call states_elec_null(st)
252
253    !%Variable SpinComponents
254    !%Type integer
255    !%Default unpolarized
256    !%Section States
257    !%Description
258    !% The calculations may be done in three different ways: spin-restricted (TD)DFT (<i>i.e.</i>, doubly
259    !% occupied "closed shells"), spin-unrestricted or "spin-polarized" (TD)DFT (<i>i.e.</i> we have two
260    !% electronic systems, one with spin up and one with spin down), or making use of two-component
261    !% spinors.
262    !%Option unpolarized 1
263    !% Spin-restricted calculations.
264    !%Option polarized 2
265    !%Option spin_polarized 2
266    !% (Synonym <tt>polarized</tt>.) Spin-unrestricted, also known as spin-DFT, SDFT. This mode will double the number of
267    !% wavefunctions necessary for a spin-unpolarized calculation.
268    !%Option non_collinear 3
269    !%Option spinors 3
270    !% (Synonym: <tt>non_collinear</tt>.) The spin-orbitals are two-component spinors. This effectively allows the spin-density to
271    !% be oriented non-collinearly: <i>i.e.</i> the magnetization vector is allowed to take different
272    !% directions at different points. This vector is always in 3D regardless of <tt>Dimensions</tt>.
273    !%End
274    call parse_variable(namespace, 'SpinComponents', UNPOLARIZED, st%d%ispin)
275    if(.not.varinfo_valid_option('SpinComponents', st%d%ispin)) call messages_input_error(namespace, 'SpinComponents')
276    call messages_print_var_option(stdout, 'SpinComponents', st%d%ispin)
277    ! Use of spinors requires complex wavefunctions.
278    if (st%d%ispin == SPINORS) call states_set_complex(st)
279
280    if(st%d%ispin /= UNPOLARIZED .and. gr%sb%kpoints%use_time_reversal) then
281      message(1) = "Time reversal symmetry is only implemented for unpolarized spins."
282      message(2) = "Use KPointsUseTimeReversal = no."
283      call messages_fatal(2, namespace=namespace)
284    end if
285
286
287    !%Variable ExcessCharge
288    !%Type float
289    !%Default 0.0
290    !%Section States
291    !%Description
292    !% The net charge of the system. A negative value means that we are adding
293    !% electrons, while a positive value means we are taking electrons
294    !% from the system.
295    !%End
296    call parse_variable(namespace, 'ExcessCharge', M_ZERO, excess_charge)
297
298    !%Variable CalcEigenvalues
299    !%Type logical
300    !%Default yes
301    !%Section SCF
302    !%Description
303    !% (Experimental) When this variable is set to <tt>no</tt>,
304    !% Octopus will not calculate the eigenvalues or eigenvectors of
305    !% the Hamiltonian. Instead, Octopus will obtain the occupied
306    !% subspace. The advantage that calculation can be made faster by
307    !% avoiding subspace diagonalization and other calculations.
308    !%
309    !% This mode cannot be used with unoccupied states.
310    !%End
311    call parse_variable(namespace, 'CalcEigenvalues', .true., st%calc_eigenval)
312    if(.not. st%calc_eigenval) call messages_experimental('CalcEigenvalues = .false.')
313
314    !%Variable TotalStates
315    !%Type integer
316    !%Default 0
317    !%Section States
318    !%Description
319    !% This variable sets the total number of states that Octopus will
320    !% use. This is normally not necessary since by default Octopus
321    !% sets the number of states to the minimum necessary to hold the
322    !% electrons present in the system. (This default behavior is
323    !% obtained by setting <tt>TotalStates</tt> to 0).
324    !%
325    !% If you want to add some unoccupied states, probably it is more convenient to use the variable
326    !% <tt>ExtraStates</tt>.
327    !%End
328    call parse_variable(namespace, 'TotalStates', 0, ntot)
329    if (ntot < 0) then
330      write(message(1), '(a,i5,a)') "Input: '", ntot, "' is not a valid value for TotalStates."
331      call messages_fatal(1, namespace=namespace)
332    end if
333
334    !%Variable ExtraStates
335    !%Type integer
336    !%Default 0
337    !%Section States
338    !%Description
339    !% The number of states is in principle calculated considering the minimum
340    !% numbers of states necessary to hold the electrons present in the system.
341    !% The number of electrons is
342    !% in turn calculated considering the nature of the species supplied in the
343    !% <tt>Species</tt> block, and the value of the <tt>ExcessCharge</tt> variable.
344    !% However, one may command <tt>Octopus</tt> to use more states, which is necessary if one wants to
345    !% use fractional occupational numbers, either fixed from the beginning through
346    !% the <tt>Occupations</tt> block or by prescribing
347    !% an electronic temperature with <tt>Smearing</tt>, or in order to calculate
348    !% excited states (including with <tt>CalculationMode = unocc</tt>).
349    !%End
350    call parse_variable(namespace, 'ExtraStates', 0, nempty)
351    if (nempty < 0) then
352      write(message(1), '(a,i5,a)') "Input: '", nempty, "' is not a valid value for ExtraStates."
353      message(2) = '(0 <= ExtraStates)'
354      call messages_fatal(2, namespace=namespace)
355    end if
356
357    if(ntot > 0 .and. nempty > 0) then
358      message(1) = 'You cannot set TotalStates and ExtraStates at the same time.'
359      call messages_fatal(1, namespace=namespace)
360    end if
361
362    !%Variable ExtraStatesToConverge
363    !%Type integer
364    !%Default 0
365    !%Section States
366    !%Description
367    !% Only for unocc calculations.
368    !% Specifies the number of extra states that will be considered for reaching the convergence.
369    !% Together with <tt>ExtraStates</tt>, one can have some more states which will not be
370    !% considered for the convergence criteria, thus making the convergence of the
371    !% unocc calculation faster.
372    !% By default, all extra states need to be converged.
373    !%End
374    call parse_variable(namespace, 'ExtraStatesToConverge', nempty, nempty_conv)
375    if (nempty < 0) then
376      write(message(1), '(a,i5,a)') "Input: '", nempty_conv, "' is not a valid value for ExtraStatesToConverge."
377      message(2) = '(0 <= ExtraStatesToConverge)'
378      call messages_fatal(2, namespace=namespace)
379    end if
380
381    if(nempty_conv > nempty) then
382      message(1) = 'You cannot set ExtraStatesToConverge to an higer value than ExtraStates.'
383      call messages_fatal(1, namespace=namespace)
384    end if
385
386    ! For non-periodic systems this should just return the Gamma point
387    call states_elec_choose_kpoints(st%d, gr%sb, namespace)
388
389    call geometry_val_charge(geo, st%val_charge)
390
391    st%qtot = -(st%val_charge + excess_charge)
392
393    if(st%qtot < -M_EPSILON) then
394      write(message(1),'(a,f12.6,a)') 'Total charge = ', st%qtot, ' < 0'
395      message(2) = 'Check Species and ExcessCharge.'
396      call messages_fatal(2, only_root_writes = .true., namespace=namespace)
397    endif
398
399    select case(st%d%ispin)
400    case(UNPOLARIZED)
401      st%d%dim = 1
402      st%nst = int(st%qtot/2)
403      if(st%nst*2 < st%qtot) st%nst = st%nst + 1
404      st%d%nspin = 1
405      st%d%spin_channels = 1
406    case(SPIN_POLARIZED)
407      st%d%dim = 1
408      st%nst = int(st%qtot/2)
409      if(st%nst*2 < st%qtot) st%nst = st%nst + 1
410      st%d%nspin = 2
411      st%d%spin_channels = 2
412    case(SPINORS)
413      st%d%dim = 2
414      st%nst = int(st%qtot)
415      if(st%nst < st%qtot) st%nst = st%nst + 1
416      st%d%nspin = 4
417      st%d%spin_channels = 2
418    end select
419
420    if(ntot > 0) then
421      if(ntot < st%nst) then
422        message(1) = 'TotalStates is smaller than the number of states required by the system.'
423        call messages_fatal(1, namespace=namespace)
424      end if
425
426      st%nst = ntot
427    end if
428
429    st%nst_conv = st%nst + nempty_conv
430    st%nst = st%nst + nempty
431    if(st%nst == 0) then
432      message(1) = "Cannot run with number of states = zero."
433      call messages_fatal(1, namespace=namespace)
434    end if
435
436    !%Variable StatesBlockSize
437    !%Type integer
438    !%Section Execution::Optimization
439    !%Description
440    !% Some routines work over blocks of eigenfunctions, which
441    !% generally improves performance at the expense of increased
442    !% memory consumption. This variable selects the size of the
443    !% blocks to be used. If OpenCl is enabled, the default is 32;
444    !% otherwise it is max(4, 2*nthreads).
445    !%End
446
447    nthreads = 1
448#ifdef HAVE_OPENMP
449    !$omp parallel
450    !$omp master
451    nthreads = omp_get_num_threads()
452    !$omp end master
453    !$omp end parallel
454#endif
455
456    if(accel_is_enabled()) then
457      default = 32
458    else
459      default = max(4, 2*nthreads)
460    end if
461
462    if(default > pad_pow2(st%nst)) default = pad_pow2(st%nst)
463
464    ASSERT(default > 0)
465
466    call parse_variable(namespace, 'StatesBlockSize', default, st%d%block_size)
467    if(st%d%block_size < 1) then
468      call messages_write("The variable 'StatesBlockSize' must be greater than 0.")
469      call messages_fatal(namespace=namespace)
470    end if
471
472    st%d%block_size = min(st%d%block_size, st%nst)
473    conf%target_states_block_size = st%d%block_size
474
475    SAFE_ALLOCATE(st%eigenval(1:st%nst, 1:st%d%nik))
476    st%eigenval = huge(st%eigenval)
477
478    ! Periodic systems require complex wavefunctions
479    ! but not if it is Gamma-point only
480    if (.not. kpoints_gamma_only(gr%sb%kpoints)) then
481      call states_set_complex(st)
482    end if
483
484    !%Variable OnlyUserDefinedInitialStates
485    !%Type logical
486    !%Default no
487    !%Section States
488    !%Description
489    !% If true, then only user-defined states from the block <tt>UserDefinedStates</tt>
490    !% will be used as initial states for a time-propagation. No attempt is made
491    !% to load ground-state orbitals from a previous ground-state run.
492    !%End
493    call parse_variable(namespace, 'OnlyUserDefinedInitialStates', .false., st%only_userdef_istates)
494
495    ! we now allocate some arrays
496    SAFE_ALLOCATE(st%occ     (1:st%nst, 1:st%d%nik))
497    st%occ      = M_ZERO
498    ! allocate space for formula strings that define user-defined states
499    if(parse_is_defined(namespace, 'UserDefinedStates') .or. parse_is_defined(namespace, 'OCTInitialUserdefined') &
500         .or. parse_is_defined(namespace, 'OCTTargetUserdefined')) then
501      SAFE_ALLOCATE(st%user_def_states(1:st%d%dim, 1:st%nst, 1:st%d%nik))
502      ! initially we mark all 'formulas' as undefined
503      st%user_def_states(1:st%d%dim, 1:st%nst, 1:st%d%nik) = 'undefined'
504    end if
505
506    if(st%d%ispin == SPINORS) then
507      SAFE_ALLOCATE(st%spin(1:3, 1:st%nst, 1:st%d%nik))
508    else
509      nullify(st%spin)
510    end if
511
512    !%Variable StatesRandomization
513    !%Type integer
514    !%Default par_independent
515    !%Section States
516    !%Description
517    !% The randomization of states can be done in two ways:
518    !% i) a parallelisation independent way (default), where the random states are identical,
519    !% irrespectively of the number of tasks and
520    !% ii) a parallelisation dependent way, which can prevent linear dependency
521    !%  to occur for large systems.
522    !%Option par_independent 1
523    !% Parallelisation-independent randomization of states.
524    !%Option par_dependent 2
525    !% The randomization depends on the number of taks used in the calculation.
526    !%End
527    call parse_variable(namespace, 'StatesRandomization', PAR_INDEPENDENT, st%randomization)
528
529
530    call states_elec_read_initial_occs(st, namespace, excess_charge, gr%sb%kpoints)
531    call states_elec_read_initial_spins(st, namespace)
532
533    st%st_start = 1
534    st%st_end = st%nst
535    st%lnst = st%nst
536    SAFE_ALLOCATE(st%node(1:st%nst))
537    st%node(1:st%nst) = 0
538
539    call mpi_grp_init(st%mpi_grp, -1)
540    st%parallel_in_states = .false.
541
542    call distributed_nullify(st%d%kpt, st%d%nik)
543
544    call modelmb_particles_init(st%modelmbparticles, namespace, gr)
545    if (st%modelmbparticles%nparticle > 0) then
546      ! FIXME: check why this is not initialized properly in the test, or why it is written out when not initialized
547      SAFE_ALLOCATE(st%mmb_nspindown(1:st%modelmbparticles%ntype_of_particle, 1:st%nst))
548      st%mmb_nspindown(:,:) = -1
549      SAFE_ALLOCATE(st%mmb_iyoung(1:st%modelmbparticles%ntype_of_particle, 1:st%nst))
550      st%mmb_iyoung(:,:) = -1
551      SAFE_ALLOCATE(st%mmb_proj(1:st%nst))
552      st%mmb_proj(:) = M_ZERO
553    end if
554
555    !%Variable SymmetrizeDensity
556    !%Type logical
557    !%Default no
558    !%Section States
559    !%Description
560    !% When enabled the density is symmetrized. Currently, this can
561    !% only be done for periodic systems. (Experimental.)
562    !%End
563    call parse_variable(namespace, 'SymmetrizeDensity', gr%sb%kpoints%use_symmetries, st%symmetrize_density)
564    call messages_print_var_value(stdout, 'SymmetrizeDensity', st%symmetrize_density)
565
566#ifdef HAVE_SCALAPACK
567    call blacs_proc_grid_nullify(st%dom_st_proc_grid)
568#endif
569
570    !%Variable ForceComplex
571    !%Type logical
572    !%Default no
573    !%Section Execution::Debug
574    !%Description
575    !% Normally <tt>Octopus</tt> determines automatically the type necessary
576    !% for the wavefunctions. When set to yes this variable will
577    !% force the use of complex wavefunctions.
578    !%
579    !% Warning: This variable is designed for testing and
580    !% benchmarking and normal users need not use it.
581    !%End
582    call parse_variable(namespace, 'ForceComplex', .false., force)
583
584    if(force) call states_set_complex(st)
585
586    st%packed = .false.
587
588    POP_SUB(states_elec_init)
589  end subroutine states_elec_init
590
591  !> Reads the 'states' file in the restart directory, and finds out
592  !! the nik, dim, and nst contained in it.
593  ! ---------------------------------------------------------
594  subroutine states_elec_look(restart, nik, dim, nst, ierr)
595    type(restart_t), intent(in)  :: restart
596    integer,         intent(out) :: nik
597    integer,         intent(out) :: dim
598    integer,         intent(out) :: nst
599    integer,         intent(out) :: ierr
600
601    character(len=256) :: lines(3)
602    character(len=20)   :: char
603    integer :: iunit
604
605    PUSH_SUB(states_elec_look)
606
607    ierr = 0
608
609    iunit = restart_open(restart, 'states')
610    call restart_read(restart, iunit, lines, 3, ierr)
611    if (ierr == 0) then
612      read(lines(1), *) char, nst
613      read(lines(2), *) char, dim
614      read(lines(3), *) char, nik
615    end if
616    call restart_close(restart, iunit)
617
618    POP_SUB(states_elec_look)
619  end subroutine states_elec_look
620
621  ! ---------------------------------------------------------
622  !> Reads from the input file the initial occupations, if the
623  !! block "Occupations" is present. Otherwise, it makes an initial
624  !! guess for the occupations, maybe using the "Smearing"
625  !! variable.
626  !!
627  !! The resulting occupations are placed on the st\%occ variable. The
628  !! boolean st\%fixed_occ is also set to .true., if the occupations are
629  !! set by the user through the "Occupations" block; false otherwise.
630  subroutine states_elec_read_initial_occs(st, namespace, excess_charge, kpoints)
631    type(states_elec_t),  intent(inout) :: st
632    type(namespace_t),    intent(in)    :: namespace
633    FLOAT,                intent(in)    :: excess_charge
634    type(kpoints_t),      intent(in)    :: kpoints
635
636    integer :: ik, ist, ispin, nspin, ncols, nrows, el_per_state, icol, start_pos, spin_n
637    type(block_t) :: blk
638    FLOAT :: rr, charge
639    logical :: integral_occs, unoccupied_states
640    FLOAT, allocatable :: read_occs(:, :)
641    FLOAT :: charge_in_block
642
643    PUSH_SUB(states_elec_read_initial_occs)
644
645    !%Variable RestartFixedOccupations
646    !%Type logical
647    !%Default no
648    !%Section States
649    !%Description
650    !% Setting this variable will make the restart proceed as
651    !% if the occupations from the previous calculation had been set via the <tt>Occupations</tt> block,
652    !% <i>i.e.</i> fixed. Otherwise, occupations will be determined by smearing.
653    !%End
654    call parse_variable(namespace, 'RestartFixedOccupations', .false., st%restart_fixed_occ)
655    ! we will turn on st%fixed_occ if restart_read is ever called
656
657    !%Variable Occupations
658    !%Type block
659    !%Section States
660    !%Description
661    !% The occupation numbers of the orbitals can be fixed through the use of this
662    !% variable. For example:
663    !%
664    !% <tt>%Occupations
665    !% <br>&nbsp;&nbsp;2 | 2 | 2 | 2 | 2
666    !% <br>%</tt>
667    !%
668    !% would fix the occupations of the five states to 2. There can be
669    !% at most as many columns as states in the calculation. If there are fewer columns
670    !% than states, then the code will assume that the user is indicating the occupations
671    !% of the uppermost states where all lower states have full occupation (i.e. 2 for spin-unpolarized
672    !% calculations, 1 otherwise) and all higher states have zero occupation. The first column
673    !% will be taken to refer to the lowest state such that the occupations would be consistent
674    !% with the correct total charge. For example, if there are 8 electrons and 10 states (from
675    !% <tt>ExtraStates = 6</tt>), then an abbreviated specification
676    !%
677    !% <tt>%Occupations
678    !% <br>&nbsp;&nbsp;1 | 0 | 1
679    !% <br>%</tt>
680    !%
681    !% would be equivalent to a full specification
682    !%
683    !% <tt>%Occupations
684    !% <br>&nbsp;&nbsp;2 | 2 | 2 | 1 | 0 | 1 | 0 | 0 | 0 | 0
685    !% <br>%</tt>
686    !%
687    !% This is an example of use for constrained density-functional theory,
688    !% crudely emulating a HOMO->LUMO+1 optical excitation.
689    !% The number of rows should be equal
690    !% to the number of k-points times the number of spins. For example, for a finite system
691    !% with <tt>SpinComponents == spin_polarized</tt>,
692    !% this block should contain two lines, one for each spin channel.
693    !% All rows must have the same number of columns.
694    !%
695    !% The <tt>Occupations</tt> block is useful for the ground state of highly symmetric
696    !% small systems (like an open-shell atom), to fix the occupation numbers
697    !% of degenerate states in order to help <tt>octopus</tt> to converge. This is to
698    !% be used in conjuction with <tt>ExtraStates</tt>. For example, to calculate the
699    !% carbon atom, one would do:
700    !%
701    !% <tt>ExtraStates = 2
702    !% <br>%Occupations
703    !% <br>&nbsp;&nbsp;2 | 2/3 | 2/3 | 2/3
704    !% <br>%</tt>
705    !%
706    !% If you want the calculation to be spin-polarized (which makes more sense), you could do:
707    !%
708    !% <tt>ExtraStates = 2
709    !% <br>%Occupations
710    !% <br>&nbsp;&nbsp; 2/3 | 2/3 | 2/3
711    !% <br>&nbsp;&nbsp; 0   |   0 |   0
712    !% <br>%</tt>
713    !%
714    !% Note that in this case the first state is absent, the code will calculate four states
715    !% (two because there are four electrons, plus two because <tt>ExtraStates</tt> = 2), and since
716    !% it finds only three columns, it will occupy the first state with one electron for each
717    !% of the spin options.
718    !%
719    !% If the sum of occupations is not equal to the total charge set by <tt>ExcessCharge</tt>,
720    !% an error message is printed.
721    !% If <tt>FromScratch = no</tt> and <tt>RestartFixedOccupations = yes</tt>,
722    !% this block will be ignored.
723    !%End
724
725    integral_occs = .true.
726
727    occ_fix: if(parse_block(namespace, 'Occupations', blk) == 0) then
728      ! read in occupations
729      st%fixed_occ = .true.
730
731      ncols = parse_block_cols(blk, 0)
732      if(ncols > st%nst) then
733        call messages_input_error(namespace, "Occupations", "Too many columns in block Occupations.")
734      end if
735
736      nrows = parse_block_n(blk)
737      if(nrows /= st%d%nik) then
738        call messages_input_error(namespace, "Occupations", "Wrong number of rows in block Occupations.")
739      end if
740
741      do ik = 1, st%d%nik - 1
742        if(parse_block_cols(blk, ik) /= ncols) then
743          call messages_input_error(namespace, "Occupations", &
744            "All rows in block Occupations must have the same number of columns.")
745        end if
746      end do
747
748      ! Now we fill all the "missing" states with the maximum occupation.
749      if(st%d%ispin == UNPOLARIZED) then
750        el_per_state = 2
751      else
752        el_per_state = 1
753      end if
754
755      SAFE_ALLOCATE(read_occs(1:ncols, 1:st%d%nik))
756
757      charge_in_block = M_ZERO
758      do ik = 1, st%d%nik
759        do icol = 1, ncols
760          call parse_block_float(blk, ik - 1, icol - 1, read_occs(icol, ik))
761          charge_in_block = charge_in_block + read_occs(icol, ik) * st%d%kweights(ik)
762        end do
763      end do
764
765      spin_n = 2
766      select case(st%d%ispin)
767        case(UNPOLARIZED)
768          spin_n = 2
769        case(SPIN_POLARIZED)
770          spin_n = 2
771        case(SPINORS)
772          spin_n = 1
773      end select
774
775      start_pos = int((st%qtot - charge_in_block)/spin_n)
776
777      if(start_pos + ncols > st%nst) then
778        message(1) = "To balance charge, the first column in block Occupations is taken to refer to state"
779        write(message(2),'(a,i6,a)') "number ", start_pos, " but there are too many columns for the number of states."
780        write(message(3),'(a,i6,a)') "Solution: set ExtraStates = ", start_pos + ncols - st%nst
781        call messages_fatal(3, namespace=namespace)
782      end if
783
784      do ik = 1, st%d%nik
785        do ist = 1, start_pos
786          st%occ(ist, ik) = el_per_state
787        end do
788      end do
789
790      do ik = 1, st%d%nik
791        do ist = start_pos + 1, start_pos + ncols
792          st%occ(ist, ik) = read_occs(ist - start_pos, ik)
793          integral_occs = integral_occs .and. &
794            abs((st%occ(ist, ik) - el_per_state) * st%occ(ist, ik))  <=  M_EPSILON
795        end do
796      end do
797
798      do ik = 1, st%d%nik
799        do ist = start_pos + ncols + 1, st%nst
800          st%occ(ist, ik) = M_ZERO
801        end do
802      end do
803
804      call parse_block_end(blk)
805
806      SAFE_DEALLOCATE_A(read_occs)
807
808    else
809      st%fixed_occ = .false.
810      integral_occs = .false.
811
812      ! first guess for occupation...paramagnetic configuration
813      rr = M_ONE
814      if(st%d%ispin == UNPOLARIZED) rr = M_TWO
815
816      st%occ  = M_ZERO
817      st%qtot = -(st%val_charge + excess_charge)
818
819      nspin = 1
820      if(st%d%nspin == 2) nspin = 2
821
822      do ik = 1, st%d%nik, nspin
823        charge = M_ZERO
824        do ist = 1, st%nst
825          do ispin = ik, ik + nspin - 1
826            st%occ(ist, ispin) = min(rr, -(st%val_charge + excess_charge) - charge)
827            charge = charge + st%occ(ist, ispin)
828          end do
829        end do
830      end do
831
832    end if occ_fix
833
834    !%Variable RestartReorderOccs
835    !%Type logical
836    !%Default no
837    !%Section States
838    !%Description
839    !% Consider doing a ground-state calculation, and then restarting with new occupations set
840    !% with the <tt>Occupations</tt> block, in an attempt to populate the orbitals of the original
841    !% calculation. However, the eigenvalues may reorder as the density changes, in which case the
842    !% occupations will now be referring to different orbitals. Setting this variable to yes will
843    !% try to solve this issue when the restart data is being read, by reordering the occupations
844    !% according to the order of the expectation values of the restart wavefunctions.
845    !%End
846    if(st%fixed_occ) then
847      call parse_variable(namespace, 'RestartReorderOccs', .false., st%restart_reorder_occs)
848    else
849      st%restart_reorder_occs = .false.
850    end if
851
852    call smear_init(st%smear, namespace, st%d%ispin, st%fixed_occ, integral_occs, kpoints)
853
854    unoccupied_states = (st%d%ispin /= SPINORS .and. st%nst*2 > st%qtot) .or. (st%d%ispin == SPINORS .and. st%nst > st%qtot)
855
856    if(.not. smear_is_semiconducting(st%smear) .and. .not. st%smear%method == SMEAR_FIXED_OCC) then
857      if(.not. unoccupied_states) then
858        call messages_write('Smearing needs unoccupied states (via ExtraStates or TotalStates) to be useful.')
859        call messages_warning(namespace=namespace)
860      end if
861    end if
862
863    ! sanity check
864    charge = M_ZERO
865    do ist = 1, st%nst
866      charge = charge + sum(st%occ(ist, 1:st%d%nik) * st%d%kweights(1:st%d%nik))
867    end do
868    if(abs(charge - st%qtot) > CNST(1e-6)) then
869      message(1) = "Initial occupations do not integrate to total charge."
870      write(message(2), '(6x,f12.6,a,f12.6)') charge, ' != ', st%qtot
871      call messages_fatal(2, only_root_writes = .true., namespace=namespace)
872    end if
873
874    st%uniform_occ = smear_is_semiconducting(st%smear) .and. .not. unoccupied_states
875
876    if(.not. st%calc_eigenval .and. .not. st%uniform_occ) then
877      call messages_write('Calculation of the eigenvalues is required with unoccupied states', new_line = .true.)
878      call messages_write('or smearing.')
879      call messages_fatal(namespace=namespace)
880    end if
881
882    POP_SUB(states_elec_read_initial_occs)
883  end subroutine states_elec_read_initial_occs
884
885
886  ! ---------------------------------------------------------
887  !> Reads, if present, the "InitialSpins" block. This is only
888  !! done in spinors mode; otherwise the routine does nothing. The
889  !! resulting spins are placed onto the st\%spin pointer. The boolean
890  !! st\%fixed_spins is set to true if (and only if) the InitialSpins
891  !! block is present.
892  subroutine states_elec_read_initial_spins(st, namespace)
893    type(states_elec_t), intent(inout) :: st
894    type(namespace_t),   intent(in)    :: namespace
895
896    integer :: i, j
897    type(block_t) :: blk
898
899    PUSH_SUB(states_elec_read_initial_spins)
900
901    st%fixed_spins = .false.
902    if(st%d%ispin /= SPINORS) then
903      POP_SUB(states_elec_read_initial_spins)
904      return
905    end if
906
907    !%Variable InitialSpins
908    !%Type block
909    !%Section States
910    !%Description
911    !% The spin character of the initial random guesses for the spinors can
912    !% be fixed by making use of this block. Note that this will not "fix" the
913    !% the spins during the calculation (this cannot be done in spinors mode, in
914    !% being able to change the spins is why the spinors mode exists in the first
915    !% place).
916    !%
917    !% This block is meaningless and ignored if the run is not in spinors mode
918    !% (<tt>SpinComponents = spinors</tt>).
919    !%
920    !% The structure of the block is very simple: each column contains the desired
921    !% <math>\left< S_x \right>, \left< S_y \right>, \left< S_z \right> </math> for each spinor.
922    !% If the calculation is for a periodic system
923    !% and there is more than one <i>k</i>-point, the spins of all the <i>k</i>-points are
924    !% the same.
925    !%
926    !% For example, if we have two spinors, and we want one in the <math>S_x</math> "down" state,
927    !% and another one in the <math>S_x</math> "up" state:
928    !%
929    !% <tt>%InitialSpins
930    !% <br>&nbsp;&nbsp;&nbsp; 0.5 | 0.0 | 0.0
931    !% <br>&nbsp;&nbsp; -0.5 | 0.0 | 0.0
932    !% <br>%</tt>
933    !%
934    !% WARNING: if the calculation is for a system described by pseudopotentials (as
935    !% opposed to user-defined potentials or model systems), this option is
936    !% meaningless since the random spinors are overwritten by the atomic orbitals.
937    !%
938    !% This constraint must be fulfilled:
939    !% <br><math> \left< S_x \right>^2 + \left< S_y \right>^2 + \left< S_z \right>^2 = \frac{1}{4} </math>
940    !%End
941    spin_fix: if(parse_block(namespace, 'InitialSpins', blk)==0) then
942      do i = 1, st%nst
943        do j = 1, 3
944          call parse_block_float(blk, i-1, j-1, st%spin(j, i, 1))
945        end do
946        if( abs(sum(st%spin(1:3, i, 1)**2) - M_FOURTH) > CNST(1.0e-6)) call messages_input_error(namespace, 'InitialSpins')
947      end do
948      call parse_block_end(blk)
949      st%fixed_spins = .true.
950      do i = 2, st%d%nik
951        st%spin(:, :, i) = st%spin(:, :, 1)
952      end do
953    end if spin_fix
954
955    POP_SUB(states_elec_read_initial_spins)
956  end subroutine states_elec_read_initial_spins
957
958
959  ! ---------------------------------------------------------
960  !> Allocates the KS wavefunctions defined within a states_elec_t structure.
961  subroutine states_elec_allocate_wfns(st, mesh, wfs_type, skip, packed)
962    type(states_elec_t),    intent(inout)   :: st
963    type(mesh_t),           intent(in)      :: mesh
964    type(type_t), optional, intent(in)      :: wfs_type
965    logical,      optional, intent(in)      :: skip(:)
966    logical,      optional, intent(in)      :: packed
967
968    PUSH_SUB(states_elec_allocate_wfns)
969
970    if (present(wfs_type)) then
971      ASSERT(wfs_type == TYPE_FLOAT .or. wfs_type == TYPE_CMPLX)
972      st%wfs_type = wfs_type
973    end if
974
975    call states_elec_init_block(st, mesh, skip = skip, packed=packed)
976    call states_elec_set_zero(st)
977
978    POP_SUB(states_elec_allocate_wfns)
979  end subroutine states_elec_allocate_wfns
980
981  !---------------------------------------------------------------------
982  !> Initializes the data components in st that describe how the states
983  !! are distributed in blocks:
984  !!
985  !! st\%nblocks: this is the number of blocks in which the states are divided. Note that
986  !!   this number is the total number of blocks, regardless of how many are actually stored
987  !!   in each node.
988  !! block_start: in each node, the index of the first block.
989  !! block_end: in each node, the index of the last block.
990  !!   If the states are not parallelized, then block_start is 1 and block_end is st\%nblocks.
991  !! st\%iblock(1:st\%nst, 1:st\%d\%nik): it points, for each state, to the block that contains it.
992  !! st\%block_is_local(): st\%block_is_local(ib) is .true. if block ib is stored in the running node.
993  !! st\%block_range(1:st\%nblocks, 1:2): Block ib contains states fromn st\%block_range(ib, 1) to st\%block_range(ib, 2)
994  !! st\%block_size(1:st\%nblocks): Block ib contains a number st\%block_size(ib) of states.
995  !! st\%block_initialized: it should be .false. on entry, and .true. after exiting this routine.
996  !!
997  !! The set of batches st\%psib(1:st\%nblocks) contains the blocks themselves.
998  subroutine states_elec_init_block(st, mesh, verbose, skip, packed)
999    type(states_elec_t),           intent(inout) :: st
1000    type(mesh_t),                  intent(in)    :: mesh
1001    logical, optional,             intent(in)    :: verbose
1002    logical, optional,             intent(in)    :: skip(:)
1003    logical, optional,             intent(in)    :: packed
1004
1005    integer :: ib, iqn, ist, istmin, istmax
1006    logical :: same_node, verbose_, packed_
1007    integer, allocatable :: bstart(:), bend(:)
1008
1009    PUSH_SUB(states_elec_init_block)
1010
1011    SAFE_ALLOCATE(bstart(1:st%nst))
1012    SAFE_ALLOCATE(bend(1:st%nst))
1013    SAFE_ALLOCATE(st%group%iblock(1:st%nst, 1:st%d%nik))
1014
1015    st%group%iblock = 0
1016
1017    verbose_ = optional_default(verbose, .true.)
1018    packed_ = optional_default(packed, .false.)
1019
1020    !In case we have a list of state to skip, we do not allocate them
1021    istmin = 1
1022    if(present(skip)) then
1023      do ist = 1, st%nst
1024        if(.not.skip(ist)) then
1025          istmin = ist
1026          exit
1027        end if
1028      end do
1029    end if
1030
1031    istmax = st%nst
1032    if(present(skip)) then
1033      do ist = st%nst, istmin, -1
1034        if(.not.skip(ist)) then
1035          istmax = ist
1036          exit
1037        end if
1038      end do
1039    end if
1040
1041    if(present(skip) .and. verbose_) then
1042      call messages_write('Info: Allocating states from ')
1043      call messages_write(istmin, fmt = 'i8')
1044      call messages_write(' to ')
1045      call messages_write(istmax, fmt = 'i8')
1046      call messages_info()
1047    end if
1048
1049    ! count and assign blocks
1050    ib = 0
1051    st%group%nblocks = 0
1052    bstart(1) = istmin
1053    do ist = istmin, istmax
1054      INCR(ib, 1)
1055
1056      st%group%iblock(ist, st%d%kpt%start:st%d%kpt%end) = st%group%nblocks + 1
1057
1058      same_node = .true.
1059      if(st%parallel_in_states .and. ist /= istmax) then
1060        ! We have to avoid that states that are in different nodes end
1061        ! up in the same block
1062        same_node = (st%node(ist + 1) == st%node(ist))
1063      end if
1064
1065      if(ib == st%d%block_size .or. ist == istmax .or. .not. same_node) then
1066        ib = 0
1067        INCR(st%group%nblocks, 1)
1068        bend(st%group%nblocks) = ist
1069        if(ist /= istmax) bstart(st%group%nblocks + 1) = ist + 1
1070      end if
1071    end do
1072
1073    SAFE_ALLOCATE(st%group%psib(1:st%group%nblocks, st%d%kpt%start:st%d%kpt%end))
1074
1075    SAFE_ALLOCATE(st%group%block_is_local(1:st%group%nblocks, st%d%kpt%start:st%d%kpt%end))
1076    st%group%block_is_local = .false.
1077    st%group%block_start  = -1
1078    st%group%block_end    = -2  ! this will make that loops block_start:block_end do not run if not initialized
1079
1080    do ib = 1, st%group%nblocks
1081      if(bstart(ib) >= st%st_start .and. bend(ib) <= st%st_end) then
1082        if(st%group%block_start == -1) st%group%block_start = ib
1083        st%group%block_end = ib
1084        do iqn = st%d%kpt%start, st%d%kpt%end
1085          st%group%block_is_local(ib, iqn) = .true.
1086
1087          if (states_are_real(st)) then
1088            call dwfs_elec_init(st%group%psib(ib, iqn), st%d%dim, bstart(ib), bend(ib), mesh%np_part, iqn, &
1089              special=.true., packed=packed_)
1090          else
1091            call zwfs_elec_init(st%group%psib(ib, iqn), st%d%dim, bstart(ib), bend(ib), mesh%np_part, iqn, &
1092              special=.true., packed=packed_)
1093          end if
1094
1095        end do
1096      end if
1097    end do
1098
1099    SAFE_ALLOCATE(st%group%block_range(1:st%group%nblocks, 1:2))
1100    SAFE_ALLOCATE(st%group%block_size(1:st%group%nblocks))
1101
1102    st%group%block_range(1:st%group%nblocks, 1) = bstart(1:st%group%nblocks)
1103    st%group%block_range(1:st%group%nblocks, 2) = bend(1:st%group%nblocks)
1104    st%group%block_size(1:st%group%nblocks) = bend(1:st%group%nblocks) - bstart(1:st%group%nblocks) + 1
1105
1106    st%group%block_initialized = .true.
1107
1108    SAFE_ALLOCATE(st%group%block_node(1:st%group%nblocks))
1109
1110    ASSERT(associated(st%node))
1111    ASSERT(all(st%node >= 0) .and. all(st%node < st%mpi_grp%size))
1112
1113    do ib = 1, st%group%nblocks
1114      st%group%block_node(ib) = st%node(st%group%block_range(ib, 1))
1115      ASSERT(st%group%block_node(ib) == st%node(st%group%block_range(ib, 2)))
1116    end do
1117
1118    if(verbose_) then
1119      call messages_write('Info: Blocks of states')
1120      call messages_info()
1121      do ib = 1, st%group%nblocks
1122        call messages_write('      Block ')
1123        call messages_write(ib, fmt = 'i8')
1124        call messages_write(' contains ')
1125        call messages_write(st%group%block_size(ib), fmt = 'i8')
1126        call messages_write(' states')
1127        if(st%group%block_size(ib) > 0) then
1128          call messages_write(':')
1129          call messages_write(st%group%block_range(ib, 1), fmt = 'i8')
1130          call messages_write(' - ')
1131          call messages_write(st%group%block_range(ib, 2), fmt = 'i8')
1132        end if
1133        call messages_info()
1134      end do
1135    end if
1136
1137!!$!!!!DEBUG
1138!!$    ! some debug output that I will keep here for the moment
1139!!$    if(mpi_grp_is_root(mpi_world)) then
1140!!$      print*, "NST       ", st%nst
1141!!$      print*, "BLOCKSIZE ", st%d%block_size
1142!!$      print*, "NBLOCKS   ", st%group%nblocks
1143!!$
1144!!$      print*, "==============="
1145!!$      do ist = 1, st%nst
1146!!$        print*, st%node(ist), ist, st%group%iblock(ist, 1)
1147!!$      end do
1148!!$      print*, "==============="
1149!!$
1150!!$      do ib = 1, st%group%nblocks
1151!!$        print*, ib, bstart(ib), bend(ib)
1152!!$      end do
1153!!$
1154!!$    end if
1155!!$!!!!ENDOFDEBUG
1156
1157    SAFE_DEALLOCATE_A(bstart)
1158    SAFE_DEALLOCATE_A(bend)
1159    POP_SUB(states_elec_init_block)
1160  end subroutine states_elec_init_block
1161
1162
1163  ! ---------------------------------------------------------
1164  !> Deallocates the KS wavefunctions defined within a states_elec_t structure.
1165  subroutine states_elec_deallocate_wfns(st)
1166    type(states_elec_t), intent(inout) :: st
1167
1168    PUSH_SUB(states_elec_deallocate_wfns)
1169
1170    call states_elec_group_end(st%group, st%d)
1171
1172    POP_SUB(states_elec_deallocate_wfns)
1173  end subroutine states_elec_deallocate_wfns
1174
1175
1176  ! ---------------------------------------------------------
1177  subroutine states_elec_densities_init(st, gr, geo)
1178    type(states_elec_t), target, intent(inout) :: st
1179    type(grid_t),                intent(in)    :: gr
1180    type(geometry_t),            intent(in)    :: geo
1181
1182    FLOAT :: fsize
1183
1184    PUSH_SUB(states_elec_densities_init)
1185
1186    SAFE_ALLOCATE(st%rho(1:gr%fine%mesh%np_part, 1:st%d%nspin))
1187    st%rho = M_ZERO
1188
1189    if(geo%nlcc) then
1190      SAFE_ALLOCATE(st%rho_core(1:gr%fine%mesh%np))
1191      st%rho_core(:) = M_ZERO
1192    end if
1193
1194    fsize = gr%mesh%np_part*CNST(8.0)*st%d%block_size
1195
1196    call messages_write('Info: states-block size = ')
1197    call messages_write(fsize, fmt = '(f10.1)', align_left = .true., units = unit_megabytes, print_units = .true.)
1198    call messages_info()
1199
1200    POP_SUB(states_elec_densities_init)
1201  end subroutine states_elec_densities_init
1202
1203  !---------------------------------------------------------------------
1204  subroutine states_elec_allocate_current(st, gr)
1205    type(states_elec_t), target, intent(inout) :: st
1206    type(grid_t),                intent(in)    :: gr
1207
1208    PUSH_SUB(states_elec_allocate_current)
1209
1210    if(.not. associated(st%current)) then
1211      SAFE_ALLOCATE(st%current(1:gr%mesh%np_part, 1:gr%mesh%sb%dim, 1:st%d%nspin))
1212      st%current = M_ZERO
1213    end if
1214
1215    if(.not. associated(st%current_kpt)) then
1216      SAFE_ALLOCATE(st%current_kpt(1:gr%mesh%np,1:gr%mesh%sb%dim,st%d%kpt%start:st%d%kpt%end))
1217      st%current_kpt = M_ZERO
1218    end if
1219
1220    POP_SUB(states_elec_allocate_current)
1221  end subroutine states_elec_allocate_current
1222
1223  !---------------------------------------------------------------------
1224  !> This subroutine: (i) Fills in the block size (st\%d\%block_size);
1225  !! (ii) Finds out whether or not to pack the states (st\%d\%pack_states);
1226  !! (iii) Finds out the orthogonalization method (st\%d\%orth_method).
1227  subroutine states_elec_exec_init(st, namespace, mc)
1228    type(states_elec_t),  intent(inout) :: st
1229    type(namespace_t),    intent(in)    :: namespace
1230    type(multicomm_t),    intent(in)    :: mc
1231
1232    integer :: default
1233    logical :: defaultl
1234
1235    PUSH_SUB(states_elec_exec_init)
1236
1237    !%Variable StatesPack
1238    !%Type logical
1239    !%Section Execution::Optimization
1240    !%Description
1241    !% When set to yes, states are stored in packed mode, which improves
1242    !% performance considerably. Not all parts of the code will profit from
1243    !% this, but should nevertheless work regardless of how the states are
1244    !% stored.
1245    !%
1246    !% If OpenCL is used and this variable is set to yes, Octopus
1247    !% will store the wave-functions in device (GPU) memory. If
1248    !% there is not enough memory to store all the wave-functions,
1249    !% execution will stop with an error.
1250    !%
1251    !% See also the related <tt>HamiltonianApplyPacked</tt> variable.
1252    !%
1253    !% The default is yes except when using OpenCL.
1254    !%End
1255
1256    defaultl = .true.
1257    if(accel_is_enabled()) then
1258      defaultl = .false.
1259    end if
1260    call parse_variable(namespace, 'StatesPack', defaultl, st%d%pack_states)
1261
1262    call messages_print_var_value(stdout, 'StatesPack', st%d%pack_states)
1263
1264    call messages_obsolete_variable(namespace, 'StatesMirror')
1265
1266    !%Variable StatesOrthogonalization
1267    !%Type integer
1268    !%Section SCF::Eigensolver
1269    !%Description
1270    !% The full orthogonalization method used by some
1271    !% eigensolvers. The default is <tt>cholesky_serial</tt>, except with state
1272    !% parallelization, the default is <tt>cholesky_parallel</tt>.
1273    !%Option cholesky_serial 1
1274    !% Cholesky decomposition implemented using
1275    !% BLAS/LAPACK. Can be used with domain parallelization but not
1276    !% state parallelization.
1277    !%Option cholesky_parallel 2
1278    !% Cholesky decomposition implemented using
1279    !% ScaLAPACK. Compatible with states parallelization.
1280    !%Option cgs 3
1281    !% Classical Gram-Schmidt (CGS) orthogonalization.
1282    !% Can be used with domain parallelization but not state parallelization.
1283    !% The algorithm is defined in Giraud et al., Computers and Mathematics with Applications 50, 1069 (2005).
1284    !%Option mgs 4
1285    !% Modified Gram-Schmidt (MGS) orthogonalization.
1286    !% Can be used with domain parallelization but not state parallelization.
1287    !% The algorithm is defined in Giraud et al., Computers and Mathematics with Applications 50, 1069 (2005).
1288    !%Option drcgs 5
1289    !% Classical Gram-Schmidt orthogonalization with double-step reorthogonalization.
1290    !% Can be used with domain parallelization but not state parallelization.
1291    !% The algorithm is taken from Giraud et al., Computers and Mathematics with Applications 50, 1069 (2005).
1292    !% According to this reference, this is much more precise than CGS or MGS algorithms. The MGS version seems not to improve much the stability and would require more communications over the domains.
1293    !%End
1294
1295    default = OPTION__STATESORTHOGONALIZATION__CHOLESKY_SERIAL
1296#ifdef HAVE_SCALAPACK
1297    if(multicomm_strategy_is_parallel(mc, P_STRATEGY_STATES)) then
1298      default = OPTION__STATESORTHOGONALIZATION__CHOLESKY_PARALLEL
1299    end if
1300#endif
1301
1302    call parse_variable(namespace, 'StatesOrthogonalization', default, st%d%orth_method)
1303
1304    if(.not.varinfo_valid_option('StatesOrthogonalization', st%d%orth_method)) then
1305      call messages_input_error(namespace, 'StatesOrthogonalization')
1306    end if
1307    call messages_print_var_option(stdout, 'StatesOrthogonalization', st%d%orth_method)
1308
1309    !%Variable StatesCLDeviceMemory
1310    !%Type float
1311    !%Section Execution::Optimization
1312    !%Default -512
1313    !%Description
1314    !% This variable selects the amount of OpenCL device memory that
1315    !% will be used by Octopus to store the states.
1316    !%
1317    !% A positive number smaller than 1 indicates a fraction of the total
1318    !% device memory. A number larger than one indicates an absolute
1319    !% amount of memory in megabytes. A negative number indicates an
1320    !% amount of memory in megabytes that would be subtracted from
1321    !% the total device memory.
1322    !%End
1323    call parse_variable(namespace, 'StatesCLDeviceMemory', CNST(-512.0), st%d%cl_states_mem)
1324
1325    POP_SUB(states_elec_exec_init)
1326  end subroutine states_elec_exec_init
1327
1328
1329  ! ---------------------------------------------------------
1330  subroutine states_elec_copy(stout, stin, exclude_wfns, exclude_eigenval)
1331    type(states_elec_t), target, intent(inout) :: stout
1332    type(states_elec_t),         intent(in)    :: stin
1333    logical, optional,           intent(in)    :: exclude_wfns !< do not copy wavefunctions, densities, node
1334    logical, optional,           intent(in)    :: exclude_eigenval !< do not copy eigenvalues, occ, spin
1335
1336    logical :: exclude_wfns_
1337
1338    PUSH_SUB(states_elec_copy)
1339
1340    exclude_wfns_ = optional_default(exclude_wfns, .false.)
1341
1342    call states_elec_null(stout)
1343
1344    call states_elec_dim_copy(stout%d, stin%d)
1345
1346    call modelmb_particles_copy(stout%modelmbparticles, stin%modelmbparticles)
1347    if (stin%modelmbparticles%nparticle > 0) then
1348      call loct_pointer_copy(stout%mmb_nspindown, stin%mmb_nspindown)
1349      call loct_pointer_copy(stout%mmb_iyoung, stin%mmb_iyoung)
1350      call loct_pointer_copy(stout%mmb_proj, stin%mmb_proj)
1351    end if
1352
1353    stout%wfs_type = stin%wfs_type
1354    stout%nst      = stin%nst
1355
1356    stout%only_userdef_istates = stin%only_userdef_istates
1357
1358    if(.not. exclude_wfns_) call loct_pointer_copy(stout%rho, stin%rho)
1359
1360    stout%calc_eigenval = stin%calc_eigenval
1361    stout%uniform_occ = stin%uniform_occ
1362
1363    if(.not. optional_default(exclude_eigenval, .false.)) then
1364      call loct_pointer_copy(stout%eigenval, stin%eigenval)
1365      call loct_pointer_copy(stout%occ, stin%occ)
1366      call loct_pointer_copy(stout%spin, stin%spin)
1367    end if
1368
1369    ! the call to init_block is done at the end of this subroutine
1370    ! it allocates iblock, psib, block_is_local
1371    stout%group%nblocks = stin%group%nblocks
1372
1373    call loct_allocatable_copy(stout%user_def_states, stin%user_def_states)
1374
1375    call loct_pointer_copy(stout%current, stin%current)
1376    call loct_pointer_copy(stout%current_kpt, stin%current_kpt)
1377
1378    call loct_pointer_copy(stout%rho_core, stin%rho_core)
1379    call loct_pointer_copy(stout%frozen_rho, stin%frozen_rho)
1380    call loct_pointer_copy(stout%frozen_tau, stin%frozen_tau)
1381    call loct_pointer_copy(stout%frozen_gdens, stin%frozen_gdens)
1382    call loct_pointer_copy(stout%frozen_ldens, stin%frozen_ldens)
1383
1384    stout%fixed_occ = stin%fixed_occ
1385    stout%restart_fixed_occ = stin%restart_fixed_occ
1386
1387    stout%fixed_spins = stin%fixed_spins
1388
1389    stout%qtot       = stin%qtot
1390    stout%val_charge = stin%val_charge
1391
1392    call smear_copy(stout%smear, stin%smear)
1393
1394    stout%parallel_in_states = stin%parallel_in_states
1395    call mpi_grp_copy(stout%mpi_grp, stin%mpi_grp)
1396    stout%dom_st_kpt_mpi_grp = stin%dom_st_kpt_mpi_grp
1397    stout%st_kpt_mpi_grp     = stin%st_kpt_mpi_grp
1398    call loct_pointer_copy(stout%node, stin%node)
1399
1400#ifdef HAVE_SCALAPACK
1401    call blacs_proc_grid_copy(stin%dom_st_proc_grid, stout%dom_st_proc_grid)
1402#endif
1403
1404    call distributed_copy(stin%dist, stout%dist)
1405
1406    stout%scalapack_compatible = stin%scalapack_compatible
1407
1408    stout%lnst       = stin%lnst
1409    stout%st_start   = stin%st_start
1410    stout%st_end     = stin%st_end
1411
1412    if(stin%parallel_in_states) call multicomm_all_pairs_copy(stout%ap, stin%ap)
1413
1414    stout%symmetrize_density = stin%symmetrize_density
1415
1416    if(.not. exclude_wfns_) call states_elec_group_copy(stin%d,stin%group, stout%group)
1417
1418    stout%packed = stin%packed
1419
1420    stout%randomization = stin%randomization
1421
1422    POP_SUB(states_elec_copy)
1423  end subroutine states_elec_copy
1424
1425
1426  ! ---------------------------------------------------------
1427  subroutine states_elec_end(st)
1428    type(states_elec_t), intent(inout) :: st
1429
1430    PUSH_SUB(states_elec_end)
1431
1432    call states_elec_dim_end(st%d)
1433
1434    if (st%modelmbparticles%nparticle > 0) then
1435      SAFE_DEALLOCATE_P(st%mmb_nspindown)
1436      SAFE_DEALLOCATE_P(st%mmb_iyoung)
1437      SAFE_DEALLOCATE_P(st%mmb_proj)
1438    end if
1439    call modelmb_particles_end(st%modelmbparticles)
1440
1441    ! this deallocates dpsi, zpsi, psib, iblock, iblock
1442    call states_elec_deallocate_wfns(st)
1443
1444    SAFE_DEALLOCATE_A(st%user_def_states)
1445
1446    SAFE_DEALLOCATE_P(st%rho)
1447    SAFE_DEALLOCATE_P(st%eigenval)
1448
1449    SAFE_DEALLOCATE_P(st%current)
1450    SAFE_DEALLOCATE_P(st%current_kpt)
1451    SAFE_DEALLOCATE_P(st%rho_core)
1452    SAFE_DEALLOCATE_P(st%frozen_rho)
1453    SAFE_DEALLOCATE_P(st%frozen_tau)
1454    SAFE_DEALLOCATE_P(st%frozen_gdens)
1455    SAFE_DEALLOCATE_P(st%frozen_ldens)
1456    SAFE_DEALLOCATE_P(st%occ)
1457    SAFE_DEALLOCATE_P(st%spin)
1458
1459#ifdef HAVE_SCALAPACK
1460    call blacs_proc_grid_end(st%dom_st_proc_grid)
1461#endif
1462
1463    call distributed_end(st%dist)
1464
1465    SAFE_DEALLOCATE_P(st%node)
1466
1467    if(st%parallel_in_states) then
1468      SAFE_DEALLOCATE_P(st%ap%schedule)
1469    end if
1470
1471    POP_SUB(states_elec_end)
1472  end subroutine states_elec_end
1473
1474  ! ---------------------------------------------------------
1475  !> generate a hydrogen s-wavefunction around a random point
1476  subroutine states_elec_generate_random(st, mesh, sb, ist_start_, ist_end_, ikpt_start_, ikpt_end_, normalized)
1477    type(states_elec_t),    intent(inout) :: st
1478    type(mesh_t),           intent(in)    :: mesh
1479    type(simul_box_t),      intent(in)    :: sb
1480    integer, optional,      intent(in)    :: ist_start_
1481    integer, optional,      intent(in)    :: ist_end_
1482    integer, optional,      intent(in)    :: ikpt_start_
1483    integer, optional,      intent(in)    :: ikpt_end_
1484    logical, optional,      intent(in)    :: normalized !< whether generate states should have norm 1, true by default
1485
1486    integer :: ist, ik, id, ist_start, ist_end, jst, ikpt_start, ikpt_end
1487    CMPLX   :: alpha, beta
1488    FLOAT, allocatable :: dpsi(:,  :)
1489    CMPLX, allocatable :: zpsi(:,  :), zpsi2(:)
1490    integer :: ikpoint, ip
1491
1492    logical :: normalized_
1493
1494    normalized_ = optional_default(normalized, .true.)
1495
1496    PUSH_SUB(states_elec_generate_random)
1497
1498    ist_start = optional_default(ist_start_, 1)
1499    ist_end = optional_default(ist_end_, st%nst)
1500    ikpt_start = optional_default(ikpt_start_, 1)
1501    ikpt_end = optional_default(ikpt_end_, st%d%nik)
1502
1503    SAFE_ALLOCATE(dpsi(1:mesh%np, 1:st%d%dim))
1504    if (states_are_complex(st)) then
1505      SAFE_ALLOCATE(zpsi(1:mesh%np, 1:st%d%dim))
1506    end if
1507
1508    select case(st%d%ispin)
1509    case(UNPOLARIZED, SPIN_POLARIZED)
1510
1511      do ik = ikpt_start, ikpt_end
1512        ikpoint = states_elec_dim_get_kpoint_index(st%d, ik)
1513        do ist = ist_start, ist_end
1514          if (states_are_real(st).or.kpoints_point_is_gamma(sb%kpoints, ikpoint)) then
1515            if(st%randomization == PAR_INDEPENDENT) then
1516              call dmf_random(mesh, dpsi(:, 1), &
1517                pre_shift = mesh%vp%xlocal-1, &
1518                post_shift = mesh%vp%np_global - mesh%vp%xlocal - mesh%np + 1, &
1519                normalized = normalized)
1520            else
1521              call dmf_random(mesh, dpsi(:, 1), normalized = normalized)
1522            end if
1523            if(.not. state_kpt_is_local(st, ist, ik)) cycle
1524            if(states_are_complex(st)) then !Gamma point
1525              do ip = 1, mesh%np
1526                zpsi(ip,1) = cmplx(dpsi(ip,1), M_ZERO)
1527              end do
1528              call states_elec_set_state(st, mesh, ist,  ik, zpsi)
1529            else
1530              call states_elec_set_state(st, mesh, ist,  ik, dpsi)
1531            end if
1532          else
1533            if(st%randomization == PAR_INDEPENDENT) then
1534              call zmf_random(mesh, zpsi(:, 1), &
1535                pre_shift = mesh%vp%xlocal-1, &
1536                post_shift = mesh%vp%np_global - mesh%vp%xlocal - mesh%np + 1, &
1537                normalized = normalized)
1538            else
1539              call zmf_random(mesh, zpsi(:, 1), normalized = normalized)
1540            end if
1541            if(.not. state_kpt_is_local(st, ist, ik)) cycle
1542            call states_elec_set_state(st, mesh, ist,  ik, zpsi)
1543          end if
1544        end do
1545      end do
1546
1547    case(SPINORS)
1548
1549      ASSERT(states_are_complex(st))
1550
1551      if(st%fixed_spins) then
1552
1553        do ik = ikpt_start, ikpt_end
1554          ikpoint = states_elec_dim_get_kpoint_index(st%d, ik)
1555          do ist = ist_start, ist_end
1556            if(kpoints_point_is_gamma(sb%kpoints, ikpoint)) then
1557              if(st%randomization == PAR_INDEPENDENT) then
1558                call dmf_random(mesh, dpsi(:, 1), &
1559                  pre_shift = mesh%vp%xlocal-1, &
1560                  post_shift = mesh%vp%np_global - mesh%vp%xlocal - mesh%np + 1, &
1561                  normalized = normalized)
1562              else
1563                call dmf_random(mesh, dpsi(:, 1), normalized = normalized)
1564                if(.not. state_kpt_is_local(st, ist, ik)) cycle
1565              end if
1566              do ip = 1, mesh%np
1567                zpsi(ip,1) = cmplx(dpsi(ip,1), M_ZERO)
1568              end do
1569              call states_elec_set_state(st, mesh, ist,  ik, zpsi)
1570            else
1571              if(st%randomization == PAR_INDEPENDENT) then
1572                call zmf_random(mesh, zpsi(:, 1), &
1573                  pre_shift = mesh%vp%xlocal-1, &
1574                  post_shift = mesh%vp%np_global - mesh%vp%xlocal - mesh%np + 1, &
1575                  normalized = normalized)
1576              else
1577                call zmf_random(mesh, zpsi(:, 1), normalized = normalized)
1578                if(.not. state_kpt_is_local(st, ist, ik)) cycle
1579              end if
1580            end if
1581            if(.not. state_kpt_is_local(st, ist, ik)) cycle
1582            ! In this case, the spinors are made of a spatial part times a vector [alpha beta]^T in
1583            ! spin space (i.e., same spatial part for each spin component). So (alpha, beta)
1584            ! determines the spin values. The values of (alpha, beta) can be be obtained
1585            ! with simple formulae from <Sx>, <Sy>, <Sz>.
1586            !
1587            ! Note that here we orthonormalize the orbital part. This ensures that the spinors
1588            ! are untouched later in the general orthonormalization, and therefore the spin values
1589            ! of each spinor remain the same.
1590            SAFE_ALLOCATE(zpsi2(1:mesh%np))
1591            do jst = ist_start, ist - 1
1592              call states_elec_get_state(st, mesh, 1, jst, ik, zpsi2)
1593              zpsi(1:mesh%np, 1) = zpsi(1:mesh%np, 1) - zmf_dotp(mesh, zpsi(:, 1), zpsi2)*zpsi2(1:mesh%np)
1594            end do
1595            SAFE_DEALLOCATE_A(zpsi2)
1596
1597            call zmf_normalize(mesh, 1, zpsi)
1598            zpsi(1:mesh%np, 2) = zpsi(1:mesh%np, 1)
1599
1600            alpha = TOCMPLX(sqrt(M_HALF + st%spin(3, ist, ik)), M_ZERO)
1601            beta  = TOCMPLX(sqrt(M_ONE - abs(alpha)**2), M_ZERO)
1602            if(abs(alpha) > M_ZERO) then
1603              beta = TOCMPLX(st%spin(1, ist, ik) / abs(alpha), st%spin(2, ist, ik) / abs(alpha))
1604            end if
1605            zpsi(1:mesh%np, 1) = alpha*zpsi(1:mesh%np, 1)
1606            zpsi(1:mesh%np, 2) = beta*zpsi(1:mesh%np, 2)
1607            call states_elec_set_state(st, mesh, ist,  ik, zpsi)
1608          end do
1609        end do
1610      else
1611        do ik = ikpt_start, ikpt_end
1612          do ist = ist_start, ist_end
1613            do id = 1, st%d%dim
1614              if(st%randomization == PAR_INDEPENDENT) then
1615                call zmf_random(mesh, zpsi(:, id), &
1616                  pre_shift = mesh%vp%xlocal-1, &
1617                  post_shift = mesh%vp%np_global - mesh%vp%xlocal - mesh%np + 1, &
1618                  normalized = .false.)
1619              else
1620                call zmf_random(mesh, zpsi(:, id), normalized = .false.)
1621              end if
1622            end do
1623            ! We need to generate the wave functions even if not using them in order to be consistent with the random seed in parallel runs.
1624            if(.not. state_kpt_is_local(st, ist, ik)) cycle
1625            ! Note that mf_random normalizes each spin channel independently to 1.
1626            ! Therefore we need to renormalize the spinor:
1627            if(normalized_) call zmf_normalize(mesh, st%d%dim, zpsi)
1628            call states_elec_set_state(st, mesh, ist,  ik, zpsi)
1629          end do
1630        end do
1631      end if
1632
1633    end select
1634
1635    SAFE_DEALLOCATE_A(dpsi)
1636    SAFE_DEALLOCATE_A(zpsi)
1637
1638    POP_SUB(states_elec_generate_random)
1639  end subroutine states_elec_generate_random
1640
1641  ! ---------------------------------------------------------
1642  subroutine states_elec_fermi(st, namespace, mesh, compute_spin)
1643    type(states_elec_t), intent(inout) :: st
1644    type(namespace_t),   intent(in)    :: namespace
1645    type(mesh_t),        intent(in)    :: mesh
1646    logical, optional,   intent(in)    :: compute_spin
1647
1648    !> Local variables.
1649    integer            :: ist, ik
1650    FLOAT              :: charge
1651    CMPLX, allocatable :: zpsi(:, :)
1652
1653    PUSH_SUB(states_elec_fermi)
1654
1655    call smear_find_fermi_energy(st%smear, namespace, st%eigenval, st%occ, st%qtot, &
1656      st%d%nik, st%nst, st%d%kweights)
1657
1658    call smear_fill_occupations(st%smear, st%eigenval, st%occ, st%d%kweights, &
1659      st%d%nik, st%nst)
1660
1661    ! check if everything is OK
1662    charge = M_ZERO
1663    do ist = 1, st%nst
1664      charge = charge + sum(st%occ(ist, 1:st%d%nik) * st%d%kweights(1:st%d%nik))
1665    end do
1666    if(abs(charge-st%qtot) > CNST(1e-6)) then
1667      message(1) = 'Occupations do not integrate to total charge.'
1668      write(message(2), '(6x,f12.8,a,f12.8)') charge, ' != ', st%qtot
1669      call messages_warning(2, namespace=namespace)
1670      if(charge < M_EPSILON) then
1671        message(1) = "There don't seem to be any electrons at all!"
1672        call messages_fatal(1, namespace=namespace)
1673      end if
1674    end if
1675
1676    if(st%d%ispin == SPINORS .and. optional_default(compute_spin,.true.)) then
1677      ASSERT(states_are_complex(st))
1678
1679      st%spin(:,:,:) = M_ZERO
1680
1681      SAFE_ALLOCATE(zpsi(1:mesh%np, st%d%dim))
1682      do ik = st%d%kpt%start, st%d%kpt%end
1683        do ist = st%st_start, st%st_end
1684          call states_elec_get_state(st, mesh, ist, ik, zpsi)
1685          st%spin(1:3, ist, ik) = state_spin(mesh, zpsi)
1686        end do
1687      end do
1688      SAFE_DEALLOCATE_A(zpsi)
1689
1690#if defined(HAVE_MPI)
1691        if(st%parallel_in_states .or. st%d%kpt%parallel) then
1692          call comm_allreduce(st%st_kpt_mpi_grp%comm, st%spin)
1693        end if
1694#endif
1695
1696    end if
1697
1698    POP_SUB(states_elec_fermi)
1699  end subroutine states_elec_fermi
1700
1701
1702  ! ---------------------------------------------------------
1703  !> function to calculate the eigenvalues sum using occupations as weights
1704  function states_elec_eigenvalues_sum(st, alt_eig) result(tot)
1705    type(states_elec_t),  intent(in) :: st
1706    FLOAT,      optional, intent(in) :: alt_eig(st%st_start:, st%d%kpt%start:) !< (:st%st_end, :st%d%kpt%end)
1707    FLOAT                            :: tot
1708
1709    integer :: ik
1710
1711    PUSH_SUB(states_elec_eigenvalues_sum)
1712
1713    tot = M_ZERO
1714    do ik = st%d%kpt%start, st%d%kpt%end
1715      if(present(alt_eig)) then
1716        tot = tot + st%d%kweights(ik) * sum(st%occ(st%st_start:st%st_end, ik) * &
1717          alt_eig(st%st_start:st%st_end, ik))
1718      else
1719        tot = tot + st%d%kweights(ik) * sum(st%occ(st%st_start:st%st_end, ik) * &
1720          st%eigenval(st%st_start:st%st_end, ik))
1721      end if
1722    end do
1723
1724    if(st%parallel_in_states .or. st%d%kpt%parallel) call comm_allreduce(st%st_kpt_mpi_grp%comm, tot)
1725
1726    POP_SUB(states_elec_eigenvalues_sum)
1727  end function states_elec_eigenvalues_sum
1728
1729
1730  ! ---------------------------------------------------------
1731  subroutine states_elec_distribute_nodes(st, namespace, mc)
1732    type(states_elec_t),    intent(inout) :: st
1733    type(namespace_t),      intent(in)    :: namespace
1734    type(multicomm_t),      intent(in)    :: mc
1735
1736    PUSH_SUB(states_elec_distribute_nodes)
1737
1738    ! Defaults.
1739    st%node(:)            = 0
1740    st%st_start           = 1
1741    st%st_end             = st%nst
1742    st%lnst               = st%nst
1743    st%parallel_in_states = .false.
1744    call mpi_grp_init(st%mpi_grp, mc%group_comm(P_STRATEGY_STATES))
1745    call mpi_grp_init(st%dom_st_kpt_mpi_grp, mc%dom_st_kpt_comm)
1746    call mpi_grp_init(st%dom_st_mpi_grp, mc%dom_st_comm)
1747    call mpi_grp_init(st%st_kpt_mpi_grp, mc%st_kpt_comm)
1748
1749#ifdef HAVE_SCALAPACK
1750    !%Variable ScaLAPACKCompatible
1751    !%Type logical
1752    !%Section Execution::Parallelization
1753    !%Description
1754    !% Whether to use a layout for states parallelization which is compatible with ScaLAPACK.
1755    !% The default is yes for <tt>CalculationMode = gs, unocc, go</tt> without k-point parallelization,
1756    !% and no otherwise. (Setting to other than default is experimental.)
1757    !% The value must be yes if any ScaLAPACK routines are called in the course of the run;
1758    !% it must be set by hand for <tt>td</tt> with <tt>TDDynamics = bo</tt>.
1759    !% This variable has no effect unless you are using states parallelization and have linked ScaLAPACK.
1760    !% Note: currently, use of ScaLAPACK is not compatible with task parallelization (<i>i.e.</i> slaves).
1761    !%End
1762    call parse_variable(namespace, 'ScaLAPACKCompatible', &
1763      calc_mode_par_scalapack_compat() .and. .not. st%d%kpt%parallel, st%scalapack_compatible)
1764    if((calc_mode_par_scalapack_compat() .and. .not. st%d%kpt%parallel) .neqv. st%scalapack_compatible) &
1765      call messages_experimental('Setting ScaLAPACKCompatible to other than default')
1766
1767    if(st%scalapack_compatible) then
1768      if(multicomm_have_slaves(mc)) &
1769        call messages_not_implemented("ScaLAPACK usage with task parallelization (slaves)", namespace=namespace)
1770      call blacs_proc_grid_init(st%dom_st_proc_grid, st%dom_st_mpi_grp)
1771    else
1772      call blacs_proc_grid_nullify(st%dom_st_proc_grid)
1773    end if
1774#else
1775    st%scalapack_compatible = .false.
1776#endif
1777
1778    if(multicomm_strategy_is_parallel(mc, P_STRATEGY_STATES)) then
1779
1780#ifdef HAVE_MPI
1781      call multicomm_create_all_pairs(st%mpi_grp, st%ap)
1782#endif
1783
1784      if(st%nst < st%mpi_grp%size) then
1785        message(1) = "Have more processors than necessary"
1786        write(message(2),'(i4,a,i4,a)') st%mpi_grp%size, " processors and ", st%nst, " states."
1787        call messages_fatal(2, namespace=namespace)
1788      end if
1789
1790      call distributed_init(st%dist, st%nst, st%mpi_grp%comm, "states", scalapack_compat = st%scalapack_compatible)
1791
1792      st%st_start = st%dist%start
1793      st%st_end   = st%dist%end
1794      st%lnst     = st%dist%nlocal
1795      st%node(1:st%nst) = st%dist%node(1:st%nst)
1796      st%parallel_in_states = st%dist%parallel
1797
1798    end if
1799
1800    POP_SUB(states_elec_distribute_nodes)
1801  end subroutine states_elec_distribute_nodes
1802
1803  ! ---------------------------------------------------------
1804  !
1805  !> This function can calculate several quantities that depend on
1806  !! derivatives of the orbitals from the states and the density.
1807  !! The quantities to be calculated depend on the arguments passed.
1808  subroutine states_elec_calc_quantities(der, st, nlcc, &
1809    kinetic_energy_density, paramagnetic_current, density_gradient, density_laplacian, &
1810    gi_kinetic_energy_density, st_end)
1811    type(derivatives_t),     intent(in)    :: der
1812    type(states_elec_t),     intent(in)    :: st
1813    logical,                 intent(in)    :: nlcc
1814    FLOAT, optional, target, intent(out)   :: kinetic_energy_density(:,:)       !< The kinetic energy density.
1815    FLOAT, optional, target, intent(out)   :: paramagnetic_current(:,:,:)       !< The paramagnetic current.
1816    FLOAT, optional,         intent(out)   :: density_gradient(:,:,:)           !< The gradient of the density.
1817    FLOAT, optional,         intent(out)   :: density_laplacian(:,:)            !< The Laplacian of the density.
1818    FLOAT, optional,         intent(out)   :: gi_kinetic_energy_density(:,:)    !< The gauge-invariant kinetic energy density.
1819    integer, optional,       intent(in)    :: st_end                            !< Maximum state used to compute the quantities
1820
1821    FLOAT, pointer :: jp(:, :, :)
1822    FLOAT, pointer :: tau(:, :)
1823    CMPLX, allocatable :: wf_psi(:,:), gwf_psi(:,:,:), wf_psi_conj(:,:), lwf_psi(:,:)
1824    FLOAT, allocatable :: abs_wf_psi(:,:), abs_gwf_psi(:)
1825    CMPLX, allocatable :: psi_gpsi(:,:)
1826    CMPLX   :: c_tmp
1827    integer :: is, ik, ist, i_dim, st_dim, ii, st_end_, idir
1828    FLOAT   :: ww, kpoint(1:MAX_DIM)
1829    logical :: something_to_do
1830    FLOAT, allocatable :: symm(:, :)
1831    type(symmetrizer_t) :: symmetrizer
1832    type(profile_t), save :: prof
1833
1834    call profiling_in(prof, "STATES_CALC_QUANTITIES")
1835
1836    PUSH_SUB(states_elec_calc_quantities)
1837
1838    st_end_ = min(st%st_end, optional_default(st_end, st%st_end))
1839
1840    something_to_do = present(kinetic_energy_density) .or. present(gi_kinetic_energy_density) .or. &
1841      present(paramagnetic_current) .or. present(density_gradient) .or. present(density_laplacian)
1842    ASSERT(something_to_do)
1843
1844    SAFE_ALLOCATE( wf_psi(1:der%mesh%np_part, 1:st%d%dim))
1845    SAFE_ALLOCATE( wf_psi_conj(1:der%mesh%np_part, 1:st%d%dim))
1846    SAFE_ALLOCATE(gwf_psi(1:der%mesh%np, 1:der%mesh%sb%dim, 1:st%d%dim))
1847    SAFE_ALLOCATE(abs_wf_psi(1:der%mesh%np, 1:st%d%dim))
1848    SAFE_ALLOCATE(abs_gwf_psi(1:der%mesh%np))
1849    SAFE_ALLOCATE(psi_gpsi(1:der%mesh%np, 1:st%d%dim))
1850    if(present(density_laplacian)) then
1851      SAFE_ALLOCATE(lwf_psi(1:der%mesh%np, 1:st%d%dim))
1852    end if
1853
1854    nullify(tau)
1855    if(present(kinetic_energy_density)) tau => kinetic_energy_density
1856
1857    nullify(jp)
1858    if(present(paramagnetic_current)) jp => paramagnetic_current
1859
1860    ! for the gauge-invariant kinetic energy density we need the
1861    ! current and the kinetic energy density
1862    if(present(gi_kinetic_energy_density)) then
1863      if(.not. present(paramagnetic_current) .and. states_are_complex(st)) then
1864        SAFE_ALLOCATE(jp(1:der%mesh%np, 1:der%mesh%sb%dim, 1:st%d%nspin))
1865      end if
1866      if(.not. present(kinetic_energy_density)) then
1867        SAFE_ALLOCATE(tau(1:der%mesh%np, 1:st%d%nspin))
1868      end if
1869    end if
1870
1871    if(associated(tau)) tau = M_ZERO
1872    if(associated(jp)) jp = M_ZERO
1873    if(present(density_gradient)) density_gradient(:,:,:) = M_ZERO
1874    if(present(density_laplacian)) density_laplacian(:,:) = M_ZERO
1875    if(present(gi_kinetic_energy_density)) gi_kinetic_energy_density = M_ZERO
1876
1877    do ik = st%d%kpt%start, st%d%kpt%end
1878
1879      kpoint(1:der%mesh%sb%dim) = kpoints_get_point(der%mesh%sb%kpoints, states_elec_dim_get_kpoint_index(st%d, ik))
1880      is = states_elec_dim_get_spin_index(st%d, ik)
1881
1882      do ist = st%st_start, st_end_
1883        ww = st%d%kweights(ik)*st%occ(ist, ik)
1884        if(abs(ww) <= M_EPSILON) cycle
1885
1886        ! all calculations will be done with complex wavefunctions
1887        call states_elec_get_state(st, der%mesh, ist, ik, wf_psi)
1888
1889        do st_dim = 1, st%d%dim
1890          call boundaries_set(der%boundaries, wf_psi(:, st_dim))
1891        end do
1892
1893        ! calculate gradient of the wavefunction
1894        do st_dim = 1, st%d%dim
1895          call zderivatives_grad(der, wf_psi(:,st_dim), gwf_psi(:,:,st_dim), set_bc = .false.)
1896        end do
1897
1898        ! calculate the Laplacian of the wavefunction
1899        if (present(density_laplacian)) then
1900          do st_dim = 1, st%d%dim
1901            call zderivatives_lapl(der, wf_psi(:,st_dim), lwf_psi(:,st_dim), set_bc = .false.)
1902          end do
1903        end if
1904
1905        !We precompute some quantites, to avoid to compute it many times
1906        wf_psi_conj(1:der%mesh%np, 1:st%d%dim) = conjg(wf_psi(1:der%mesh%np,1:st%d%dim))
1907        abs_wf_psi(1:der%mesh%np, 1:st%d%dim) = real(wf_psi_conj(1:der%mesh%np, 1:st%d%dim)*wf_psi(1:der%mesh%np, 1:st%d%dim))
1908
1909        if(present(density_laplacian)) then
1910          density_laplacian(1:der%mesh%np, is) = density_laplacian(1:der%mesh%np, is) + &
1911               ww*M_TWO*real(wf_psi_conj(1:der%mesh%np, 1)*lwf_psi(1:der%mesh%np, 1))
1912          if(st%d%ispin == SPINORS) then
1913            density_laplacian(1:der%mesh%np, 2) = density_laplacian(1:der%mesh%np, 2) + &
1914                 ww*M_TWO*real(wf_psi_conj(1:der%mesh%np, 2)*lwf_psi(1:der%mesh%np, 2))
1915            density_laplacian(1:der%mesh%np, 3) = density_laplacian(1:der%mesh%np, 3) + &
1916                 ww*real (lwf_psi(1:der%mesh%np, 1)*wf_psi_conj(1:der%mesh%np, 2) + &
1917                 wf_psi(1:der%mesh%np, 1)*conjg(lwf_psi(1:der%mesh%np, 2)))
1918            density_laplacian(1:der%mesh%np, 4) = density_laplacian(1:der%mesh%np, 4) + &
1919                 ww*aimag(lwf_psi(1:der%mesh%np, 1)*wf_psi_conj(1:der%mesh%np, 2) + &
1920                 wf_psi(1:der%mesh%np, 1)*conjg(lwf_psi(1:der%mesh%np, 2)))
1921          end if
1922        end if
1923
1924        do i_dim = 1, der%mesh%sb%dim
1925
1926          !We precompute some quantites, to avoid to compute it many times
1927          psi_gpsi(1:der%mesh%np, 1:st%d%dim) = wf_psi_conj(1:der%mesh%np, 1:st%d%dim)*gwf_psi(1:der%mesh%np,i_dim,1:st%d%dim)
1928          abs_gwf_psi(1:der%mesh%np) = real(conjg(gwf_psi(1:der%mesh%np, i_dim, 1))*gwf_psi(1:der%mesh%np, i_dim, 1))
1929
1930          if(present(density_gradient)) &
1931               density_gradient(1:der%mesh%np, i_dim, is) = density_gradient(1:der%mesh%np, i_dim, is) &
1932                      + ww*M_TWO*real(psi_gpsi(1:der%mesh%np, 1))
1933          if(present(density_laplacian)) &
1934               density_laplacian(1:der%mesh%np, is) = density_laplacian(1:der%mesh%np, is)             &
1935                      + ww*M_TWO*abs_gwf_psi(1:der%mesh%np)
1936
1937          if(associated(jp)) then
1938            if (.not.(states_are_real(st))) then
1939              jp(1:der%mesh%np, i_dim, is) = jp(1:der%mesh%np, i_dim, is) + &
1940                    ww*aimag(psi_gpsi(1:der%mesh%np, 1)) &
1941                  - ww*abs_wf_psi(1:der%mesh%np, 1)*kpoint(i_dim)
1942            else
1943              jp(1:der%mesh%np, i_dim, is) = M_ZERO
1944            end if
1945          end if
1946
1947          if (associated(tau)) then
1948            tau (1:der%mesh%np, is)   = tau (1:der%mesh%np, is)        + &
1949                 ww*(abs_gwf_psi(1:der%mesh%np) + abs(kpoint(i_dim))**2*abs_wf_psi(1:der%mesh%np, 1)  &
1950                     - M_TWO*aimag(psi_gpsi(1:der%mesh%np, 1))*kpoint(i_dim))
1951          end if
1952
1953          if(st%d%ispin == SPINORS) then
1954            if(present(density_gradient)) then
1955              density_gradient(1:der%mesh%np, i_dim, 2) = density_gradient(1:der%mesh%np, i_dim, 2) + &
1956                   ww*M_TWO*real(wf_psi_conj(1:der%mesh%np, 2)*gwf_psi(1:der%mesh%np, i_dim, 2))
1957              density_gradient(1:der%mesh%np, i_dim, 3) = density_gradient(1:der%mesh%np, i_dim, 3) + ww* &
1958                   real (gwf_psi(1:der%mesh%np, i_dim, 1)*wf_psi_conj(1:der%mesh%np, 2) + &
1959                   wf_psi(1:der%mesh%np, 1)*conjg(gwf_psi(1:der%mesh%np, i_dim, 2)))
1960              density_gradient(1:der%mesh%np, i_dim, 4) = density_gradient(1:der%mesh%np, i_dim, 4) + ww* &
1961                   aimag(gwf_psi(1:der%mesh%np, i_dim, 1)*wf_psi_conj(1:der%mesh%np, 2) + &
1962                   wf_psi(1:der%mesh%np, 1)*conjg(gwf_psi(1:der%mesh%np, i_dim, 2)))
1963            end if
1964
1965            if(present(density_laplacian)) then
1966              density_laplacian(1:der%mesh%np, 2) = density_laplacian(1:der%mesh%np, 2)         + &
1967                   ww*M_TWO*real(conjg(gwf_psi(1:der%mesh%np, i_dim, 2))*gwf_psi(1:der%mesh%np, i_dim, 2))
1968              density_laplacian(1:der%mesh%np, 3) = density_laplacian(1:der%mesh%np, 3)         + &
1969                   ww*M_TWO*real (gwf_psi(1:der%mesh%np, i_dim, 1)*conjg(gwf_psi(1:der%mesh%np, i_dim, 2)))
1970              density_laplacian(1:der%mesh%np, 4) = density_laplacian(1:der%mesh%np, 4)         + &
1971                   ww*M_TWO*aimag(gwf_psi(1:der%mesh%np, i_dim, 1)*conjg(gwf_psi(1:der%mesh%np, i_dim, 2)))
1972            end if
1973
1974            ! the expression for the paramagnetic current with spinors is
1975            !     j = ( jp(1)             jp(3) + i jp(4) )
1976            !         (-jp(3) + i jp(4)   jp(2)           )
1977            if(associated(jp)) then
1978              jp(1:der%mesh%np, i_dim, 2) = jp(1:der%mesh%np, i_dim, 2) + &
1979                   ww*( aimag(psi_gpsi(1:der%mesh%np, 2)) &
1980                       - ww*abs_wf_psi(1:der%mesh%np, 2)*kpoint(i_dim))
1981              do ii = 1, der%mesh%np
1982                c_tmp = M_ONE/(M_TWO*M_zI)*wf_psi_conj(ii, 2)*gwf_psi(ii, i_dim, 1) - wf_psi(ii, 1)*conjg(gwf_psi(ii, i_dim, 2))
1983                jp(ii, i_dim, 3) = jp(ii, i_dim, 3) + ww* real(c_tmp)
1984                jp(ii, i_dim, 4) = jp(ii, i_dim, 4) + ww*aimag(c_tmp)
1985              end do
1986            end if
1987
1988            ! the expression for the paramagnetic current with spinors is
1989            !     t = ( tau(1)              tau(3) + i tau(4) )
1990            !         ( tau(3) - i tau(4)   tau(2)            )
1991            if(associated(tau)) then
1992              tau(1:der%mesh%np, 2) = tau(1:der%mesh%np, 2) + ww*(abs(gwf_psi(1:der%mesh%np, i_dim, 2))**2 &
1993                     + abs(kpoint(i_dim))**2*abs_wf_psi(1:der%mesh%np, 2)  &
1994                     - M_TWO*aimag(psi_gpsi(1:der%mesh%np, 2))*kpoint(i_dim))
1995
1996              do ii = 1, der%mesh%np
1997                c_tmp = gwf_psi(ii, i_dim, 1)*conjg(gwf_psi(ii, i_dim, 2))
1998                c_tmp = c_tmp + abs(kpoint(i_dim))**2*real(wf_psi_conj(ii, 2)*wf_psi(ii, 1))
1999                c_tmp = c_tmp - M_zI * (wf_psi_conj(ii, 2)*gwf_psi(ii,i_dim,1) &
2000                                      - wf_psi(ii, 1)*conjg(gwf_psi(ii,i_dim,2)))*kpoint(i_dim)
2001                tau(ii, 3) = tau(ii, 3) + ww* real(c_tmp)
2002                tau(ii, 4) = tau(ii, 4) + ww*aimag(c_tmp)
2003              end do
2004            end if
2005
2006            ASSERT(.not. present(gi_kinetic_energy_density))
2007
2008          end if !SPINORS
2009
2010        end do
2011
2012      end do
2013    end do
2014
2015    SAFE_DEALLOCATE_A(wf_psi_conj)
2016    SAFE_DEALLOCATE_A(abs_wf_psi)
2017    SAFE_DEALLOCATE_A(abs_gwf_psi)
2018    SAFE_DEALLOCATE_A(psi_gpsi)
2019
2020    if(.not. present(gi_kinetic_energy_density)) then
2021      if(.not. present(paramagnetic_current)) then
2022        SAFE_DEALLOCATE_P(jp)
2023      end if
2024      if(.not. present(kinetic_energy_density)) then
2025        SAFE_DEALLOCATE_P(tau)
2026      end if
2027    end if
2028
2029    if(st%parallel_in_states .or. st%d%kpt%parallel) call reduce_all(st%st_kpt_mpi_grp)
2030
2031    ! We have to symmetrize everything as they are calculated from the
2032    ! wavefunctions.
2033    ! This must be done before compute the gauge-invariant kinetic energy density
2034    if(st%symmetrize_density) then
2035      SAFE_ALLOCATE(symm(1:der%mesh%np, 1:der%mesh%sb%dim))
2036      call symmetrizer_init(symmetrizer, der%mesh)
2037      do is = 1, st%d%nspin
2038        if(associated(tau)) then
2039          call dsymmetrizer_apply(symmetrizer, der%mesh%np, field = tau(:, is), symmfield = symm(:,1), &
2040            suppress_warning = .true.)
2041          tau(1:der%mesh%np, is) = symm(1:der%mesh%np,1)
2042        end if
2043
2044        if(present(density_laplacian)) then
2045          call dsymmetrizer_apply(symmetrizer, der%mesh%np, field = density_laplacian(:, is), symmfield = symm(:,1), &
2046            suppress_warning = .true.)
2047          density_laplacian(1:der%mesh%np, is) = symm(1:der%mesh%np,1)
2048        end if
2049
2050        if(associated(jp)) then
2051          call dsymmetrizer_apply(symmetrizer, der%mesh%np, field_vector = jp(:, :, is), symmfield_vector = symm, &
2052            suppress_warning = .true.)
2053          jp(1:der%mesh%np, 1:der%mesh%sb%dim, is) = symm(1:der%mesh%np, 1:der%mesh%sb%dim)
2054        end if
2055
2056        if(present(density_gradient)) then
2057          call dsymmetrizer_apply(symmetrizer, der%mesh%np, field_vector = density_gradient(:, :, is), &
2058            symmfield_vector = symm, suppress_warning = .true.)
2059          density_gradient(1:der%mesh%np, 1:der%mesh%sb%dim, is) = symm(1:der%mesh%np, 1:der%mesh%sb%dim)
2060        end if
2061      end do
2062      call symmetrizer_end(symmetrizer)
2063      SAFE_DEALLOCATE_A(symm)
2064    end if
2065
2066
2067    if(associated(st%rho_core) .and. nlcc .and. (present(density_laplacian) .or. present(density_gradient))) then
2068       do ii = 1, der%mesh%np
2069         wf_psi(ii, 1) = st%rho_core(ii)/st%d%spin_channels
2070       end do
2071
2072       call boundaries_set(der%boundaries, wf_psi(:, 1))
2073
2074       if(present(density_gradient)) then
2075         ! calculate gradient of the NLCC
2076         call zderivatives_grad(der, wf_psi(:,1), gwf_psi(:,:,1), set_bc = .false.)
2077         do is = 1, st%d%spin_channels
2078           density_gradient(1:der%mesh%np, 1:der%mesh%sb%dim, is) = density_gradient(1:der%mesh%np, 1:der%mesh%sb%dim, is) + &
2079                                                                    gwf_psi(1:der%mesh%np, 1:der%mesh%sb%dim,1)
2080         end do
2081       end if
2082
2083       ! calculate the Laplacian of the wavefunction
2084       if (present(density_laplacian)) then
2085         call zderivatives_lapl(der, wf_psi(:,1), lwf_psi(:,1), set_bc = .false.)
2086
2087         do is = 1, st%d%spin_channels
2088           density_laplacian(1:der%mesh%np, is) = density_laplacian(1:der%mesh%np, is) + lwf_psi(1:der%mesh%np, 1)
2089         end do
2090      end if
2091    end if
2092
2093    !If we freeze some of the orbitals, we need to had the contributions here
2094    !Only in the case we are not computing it
2095    if(associated(st%frozen_tau) .and. .not. present(st_end)) then
2096      do is = 1, st%d%nspin
2097        do ii = 1, der%mesh%np
2098          tau(ii, is) = tau(ii, is) + st%frozen_tau(ii, is)
2099        end do
2100      end do
2101    end if
2102    if(associated(st%frozen_gdens) .and. .not. present(st_end)) then
2103      do is = 1, st%d%nspin
2104        do idir = 1, der%mesh%sb%dim
2105          do ii = 1, der%mesh%np
2106            density_gradient(ii, idir, is) = density_gradient(ii, idir, is) + st%frozen_gdens(ii, idir, is)
2107          end do
2108        end do
2109      end do
2110    end if
2111    if(associated(st%frozen_tau) .and. .not. present(st_end)) then
2112      do is = 1, st%d%nspin
2113        do ii = 1, der%mesh%np
2114          density_laplacian(ii, is) = density_laplacian(ii, is) + st%frozen_ldens(ii, is)
2115        end do
2116      end do
2117    end if
2118
2119    SAFE_DEALLOCATE_A(wf_psi)
2120    SAFE_DEALLOCATE_A(gwf_psi)
2121    SAFE_DEALLOCATE_A(lwf_psi)
2122
2123
2124    !We compute the gauge-invariant kinetic energy density
2125    if(present(gi_kinetic_energy_density) .and. st%d%ispin /= SPINORS) then
2126      do is = 1, st%d%nspin
2127        ASSERT(associated(tau))
2128        gi_kinetic_energy_density(1:der%mesh%np, is) = tau(1:der%mesh%np, is)
2129        if(states_are_complex(st)) then
2130          ASSERT(associated(jp))
2131          do ii = 1, der%mesh%np
2132            if(st%rho(ii, is) < CNST(1.0e-7)) cycle
2133            gi_kinetic_energy_density(ii, is) = &
2134              gi_kinetic_energy_density(ii, is) - sum(jp(ii,1:der%mesh%sb%dim, is)**2)/st%rho(ii, is)
2135          end do
2136        end if
2137      end do
2138    end if
2139
2140    if(.not. present(kinetic_energy_density)) then
2141      SAFE_DEALLOCATE_P(tau)
2142    end if
2143    if(.not. present(paramagnetic_current)) then
2144      SAFE_DEALLOCATE_P(jp)
2145    end if
2146
2147
2148    POP_SUB(states_elec_calc_quantities)
2149
2150    call profiling_out(prof)
2151
2152  contains
2153
2154    subroutine reduce_all(grp)
2155      type(mpi_grp_t), intent(in)  :: grp
2156
2157      PUSH_SUB(states_elec_calc_quantities.reduce_all)
2158
2159      if(associated(tau)) call comm_allreduce(grp%comm, tau, dim = (/der%mesh%np, st%d%nspin/))
2160
2161      if (present(density_laplacian)) call comm_allreduce(grp%comm, density_laplacian, dim = (/der%mesh%np, st%d%nspin/))
2162
2163      do is = 1, st%d%nspin
2164        if(associated(jp)) call comm_allreduce(grp%comm, jp(:, :, is), dim = (/der%mesh%np, der%mesh%sb%dim/))
2165
2166        if(present(density_gradient)) &
2167          call comm_allreduce(grp%comm, density_gradient(:, :, is), dim = (/der%mesh%np, der%mesh%sb%dim/))
2168      end do
2169
2170      POP_SUB(states_elec_calc_quantities.reduce_all)
2171    end subroutine reduce_all
2172
2173  end subroutine states_elec_calc_quantities
2174
2175
2176  ! ---------------------------------------------------------
2177  function state_spin(mesh, f1) result(spin)
2178    type(mesh_t), intent(in) :: mesh
2179    CMPLX,        intent(in) :: f1(:, :)
2180    FLOAT                    :: spin(1:3)
2181
2182    CMPLX :: z
2183
2184    PUSH_SUB(state_spin)
2185
2186    z = zmf_dotp(mesh, f1(:, 1) , f1(:, 2))
2187
2188    spin(1) = M_TWO*TOFLOAT(z)
2189    spin(2) = M_TWO*aimag(z)
2190    spin(3) = zmf_nrm2(mesh, f1(:, 1))**2 - zmf_nrm2(mesh, f1(:, 2))**2
2191    spin = M_HALF*spin ! spin is half the sigma matrix.
2192
2193    POP_SUB(state_spin)
2194  end function state_spin
2195
2196  ! ---------------------------------------------------------
2197  logical function state_is_local(st, ist)
2198    type(states_elec_t), intent(in) :: st
2199    integer,             intent(in) :: ist
2200
2201    PUSH_SUB(state_is_local)
2202
2203    state_is_local = ist >= st%st_start.and.ist <= st%st_end
2204
2205    POP_SUB(state_is_local)
2206  end function state_is_local
2207
2208  ! ---------------------------------------------------------
2209  logical function state_kpt_is_local(st, ist, ik)
2210    type(states_elec_t), intent(in) :: st
2211    integer,             intent(in) :: ist
2212    integer,             intent(in) :: ik
2213
2214    PUSH_SUB(state_kpt_is_local)
2215
2216    state_kpt_is_local = ist >= st%st_start .and. ist <= st%st_end .and. &
2217      ik >= st%d%kpt%start .and. ik <= st%d%kpt%end
2218
2219    POP_SUB(state_kpt_is_local)
2220  end function state_kpt_is_local
2221
2222
2223  ! ---------------------------------------------------------
2224
2225  FLOAT function states_elec_wfns_memory(st, mesh) result(memory)
2226    type(states_elec_t), intent(in) :: st
2227    type(mesh_t),        intent(in) :: mesh
2228
2229    PUSH_SUB(states_elec_wfns_memory)
2230    memory = M_ZERO
2231
2232    ! orbitals
2233    memory = memory + REAL_PRECISION*TOFLOAT(mesh%np_part_global)*st%d%dim*TOFLOAT(st%nst)*st%d%kpt%nglobal
2234
2235    POP_SUB(states_elec_wfns_memory)
2236  end function states_elec_wfns_memory
2237
2238  ! ---------------------------------------------------------
2239
2240  subroutine states_elec_pack(st, copy)
2241    class(states_elec_t),    intent(inout) :: st
2242    logical,      optional, intent(in)    :: copy
2243
2244    integer :: iqn, ib
2245    integer(8) :: max_mem, mem
2246
2247    PUSH_SUB(states_elec_pack)
2248
2249    ! nothing to do, already packed
2250    if (st%packed) then
2251      POP_SUB(states_elec_pack)
2252      return
2253    end if
2254
2255    st%packed = .true.
2256
2257    if(accel_is_enabled()) then
2258      max_mem = accel_global_memory_size()
2259
2260      if(st%d%cl_states_mem > CNST(1.0)) then
2261        max_mem = int(st%d%cl_states_mem, 8)*(1024_8)**2
2262      else if(st%d%cl_states_mem < CNST(0.0)) then
2263        max_mem = max_mem + int(st%d%cl_states_mem, 8)*(1024_8)**2
2264      else
2265        max_mem = int(st%d%cl_states_mem*TOFLOAT(max_mem), 8)
2266      end if
2267    else
2268      max_mem = HUGE(max_mem)
2269    end if
2270
2271    mem = 0
2272    qnloop: do iqn = st%d%kpt%start, st%d%kpt%end
2273      do ib = st%group%block_start, st%group%block_end
2274
2275        mem = mem + st%group%psib(ib, iqn)%pack_total_size()
2276
2277        if(mem > max_mem) then
2278          call messages_write('Not enough CL device memory to store all states simultaneously.', new_line = .true.)
2279          call messages_write('Only ')
2280          call messages_write(ib - st%group%block_start)
2281          call messages_write(' of ')
2282          call messages_write(st%group%block_end - st%group%block_start + 1)
2283          call messages_write(' blocks will be stored in device memory.', new_line = .true.)
2284          call messages_warning()
2285          exit qnloop
2286        end if
2287
2288        call st%group%psib(ib, iqn)%do_pack(copy)
2289      end do
2290    end do qnloop
2291
2292    POP_SUB(states_elec_pack)
2293  end subroutine states_elec_pack
2294
2295  ! ------------------------------------------------------------
2296
2297  subroutine states_elec_unpack(st, copy)
2298    class(states_elec_t),    intent(inout) :: st
2299    logical,      optional, intent(in)    :: copy
2300
2301    integer :: iqn, ib
2302
2303    PUSH_SUB(states_elec_unpack)
2304
2305    if(st%packed) then
2306      st%packed = .false.
2307
2308      do iqn = st%d%kpt%start, st%d%kpt%end
2309        do ib = st%group%block_start, st%group%block_end
2310          if(st%group%psib(ib, iqn)%is_packed()) call st%group%psib(ib, iqn)%do_unpack(copy)
2311        end do
2312      end do
2313    end if
2314
2315    POP_SUB(states_elec_unpack)
2316  end subroutine states_elec_unpack
2317
2318  ! -----------------------------------------------------------
2319
2320  subroutine states_elec_write_info(st, namespace)
2321    class(states_elec_t),    intent(in) :: st
2322    type(namespace_t),       intent(in)    :: namespace
2323
2324    PUSH_SUB(states_elec_write_info)
2325
2326    call messages_print_stress(stdout, "States", namespace=namespace)
2327
2328    write(message(1), '(a,f12.3)') 'Total electronic charge  = ', st%qtot
2329    write(message(2), '(a,i8)')    'Number of states         = ', st%nst
2330    write(message(3), '(a,i8)')    'States block-size        = ', st%d%block_size
2331    call messages_info(3)
2332
2333    call messages_print_stress(stdout, namespace=namespace)
2334
2335    POP_SUB(states_elec_write_info)
2336  end subroutine states_elec_write_info
2337
2338  ! ------------------------------------------------------------
2339
2340  subroutine states_elec_set_zero(st)
2341    class(states_elec_t),    intent(inout) :: st
2342
2343    integer :: iqn, ib
2344
2345    PUSH_SUB(states_elec_set_zero)
2346
2347    do iqn = st%d%kpt%start, st%d%kpt%end
2348      do ib = st%group%block_start, st%group%block_end
2349        call batch_set_zero(st%group%psib(ib, iqn))
2350      end do
2351    end do
2352
2353    POP_SUB(states_elec_set_zero)
2354  end subroutine states_elec_set_zero
2355
2356  ! ------------------------------------------------------------
2357
2358  integer pure function states_elec_block_min(st, ib) result(range)
2359    type(states_elec_t),    intent(in) :: st
2360    integer,                intent(in) :: ib
2361
2362    range = st%group%block_range(ib, 1)
2363  end function states_elec_block_min
2364
2365  ! ------------------------------------------------------------
2366
2367  integer pure function states_elec_block_max(st, ib) result(range)
2368    type(states_elec_t),    intent(in) :: st
2369    integer,                intent(in) :: ib
2370
2371    range = st%group%block_range(ib, 2)
2372  end function states_elec_block_max
2373
2374  ! ------------------------------------------------------------
2375
2376  integer pure function states_elec_block_size(st, ib) result(size)
2377    type(states_elec_t),    intent(in) :: st
2378    integer,           intent(in) :: ib
2379
2380    size = st%group%block_size(ib)
2381  end function states_elec_block_size
2382
2383  ! ---------------------------------------------------------
2384  !> number of occupied-unoccipied pairs for Casida
2385  subroutine states_elec_count_pairs(st, namespace, n_pairs, n_occ, n_unocc, is_included, is_frac_occ)
2386    type(states_elec_t),  intent(in)  :: st
2387    type(namespace_t),    intent(in)  :: namespace
2388    integer,              intent(out) :: n_pairs
2389    integer,              intent(out) :: n_occ(:)   !< nik
2390    integer,              intent(out) :: n_unocc(:) !< nik
2391    logical, allocatable, intent(out) :: is_included(:,:,:) !< (max(n_occ), max(n_unocc), st%d%nik)
2392    logical,              intent(out) :: is_frac_occ !< are there fractional occupations?
2393
2394    integer :: ik, ist, ast, n_filled, n_partially_filled, n_half_filled
2395    character(len=80) :: nst_string, default, wfn_list
2396    FLOAT :: energy_window
2397
2398    PUSH_SUB(states_elec_count_pairs)
2399
2400    is_frac_occ = .false.
2401    do ik = 1, st%d%nik
2402      call occupied_states(st, namespace, ik, n_filled, n_partially_filled, n_half_filled)
2403      if(n_partially_filled > 0 .or. n_half_filled > 0) is_frac_occ = .true.
2404      n_occ(ik) = n_filled + n_partially_filled + n_half_filled
2405      n_unocc(ik) = st%nst - n_filled
2406      ! when we implement occupations, partially occupied levels need to be counted as both occ and unocc.
2407    end do
2408
2409    !%Variable CasidaKSEnergyWindow
2410    !%Type float
2411    !%Section Linear Response::Casida
2412    !%Description
2413    !% An alternative to <tt>CasidaKohnShamStates</tt> for specifying which occupied-unoccupied
2414    !% transitions will be used: all those whose eigenvalue differences are less than this
2415    !% number will be included. If a value less than 0 is supplied, this criterion will not be used.
2416    !%End
2417
2418    call parse_variable(namespace, 'CasidaKSEnergyWindow', -M_ONE, energy_window, units_inp%energy)
2419
2420    !%Variable CasidaKohnShamStates
2421    !%Type string
2422    !%Section Linear Response::Casida
2423    !%Default all states
2424    !%Description
2425    !% The calculation of the excitation spectrum of a system in the Casida frequency-domain
2426    !% formulation of linear-response time-dependent density functional theory (TDDFT)
2427    !% implies the use of a basis set of occupied/unoccupied Kohn-Sham orbitals. This
2428    !% basis set should, in principle, include all pairs formed by all occupied states,
2429    !% and an infinite number of unoccupied states. In practice, one has to truncate this
2430    !% basis set, selecting a number of occupied and unoccupied states that will form the
2431    !% pairs. These states are specified with this variable. If there are, say, 15 occupied
2432    !% states, and one sets this variable to the value "10-18", this means that occupied
2433    !% states from 10 to 15, and unoccupied states from 16 to 18 will be considered.
2434    !%
2435    !% This variable is a string in list form, <i>i.e.</i> expressions such as "1,2-5,8-15" are
2436    !% valid. You should include a non-zero number of unoccupied states and a non-zero number
2437    !% of occupied states.
2438    !%End
2439
2440    n_pairs = 0
2441    SAFE_ALLOCATE(is_included(maxval(n_occ), minval(n_occ) + 1:st%nst , st%d%nik))
2442    is_included(:,:,:) = .false.
2443
2444    if(energy_window < M_ZERO) then
2445      write(nst_string,'(i6)') st%nst
2446      write(default,'(a,a)') "1-", trim(adjustl(nst_string))
2447      call parse_variable(namespace, 'CasidaKohnShamStates', default, wfn_list)
2448
2449      write(message(1),'(a,a)') "Info: States that form the basis: ", trim(wfn_list)
2450      call messages_info(1)
2451
2452      ! count pairs
2453      n_pairs = 0
2454      do ik = 1, st%d%nik
2455        do ast = n_occ(ik) + 1, st%nst
2456          if(loct_isinstringlist(ast, wfn_list)) then
2457            do ist = 1, n_occ(ik)
2458              if(loct_isinstringlist(ist, wfn_list)) then
2459                n_pairs = n_pairs + 1
2460                is_included(ist, ast, ik) = .true.
2461              end if
2462            end do
2463          end if
2464        end do
2465      end do
2466
2467    else ! using CasidaKSEnergyWindow
2468
2469      write(message(1),'(a,f12.6,a)') "Info: including transitions with energy < ", &
2470        units_from_atomic(units_out%energy, energy_window), trim(units_abbrev(units_out%energy))
2471      call messages_info(1)
2472
2473      ! count pairs
2474      n_pairs = 0
2475      do ik = 1, st%d%nik
2476        do ast = n_occ(ik) + 1, st%nst
2477          do ist = 1, n_occ(ik)
2478            if(st%eigenval(ast, ik) - st%eigenval(ist, ik) < energy_window) then
2479              n_pairs = n_pairs + 1
2480              is_included(ist, ast, ik) = .true.
2481            end if
2482          end do
2483        end do
2484      end do
2485
2486    end if
2487
2488    POP_SUB(states_elec_count_pairs)
2489  end subroutine states_elec_count_pairs
2490
2491  ! ---------------------------------------------------------
2492  !> Returns information about which single-particle orbitals are
2493  !! occupied or not in a _many-particle_ state st:
2494  !!   n_filled are the number of orbitals that are totally filled
2495  !!            (the occupation number is two, if ispin = UNPOLARIZED,
2496  !!            or it is one in the other cases).
2497  !!   n_half_filled is only meaningful if ispin = UNPOLARIZED. It
2498  !!            is the number of orbitals where there is only one
2499  !!            electron in the orbital.
2500  !!   n_partially_filled is the number of orbitals that are neither filled,
2501  !!            half-filled, nor empty.
2502  !! The integer arrays filled, partially_filled and half_filled point
2503  !!   to the indices where the filled, partially filled and half_filled
2504  !!   orbitals are, respectively.
2505  subroutine occupied_states(st, namespace, ik, n_filled, n_partially_filled, n_half_filled, &
2506                             filled, partially_filled, half_filled)
2507    type(states_elec_t),    intent(in)  :: st
2508    type(namespace_t),      intent(in)  :: namespace
2509    integer,                intent(in)  :: ik
2510    integer,                intent(out) :: n_filled, n_partially_filled, n_half_filled
2511    integer,      optional, intent(out) :: filled(:), partially_filled(:), half_filled(:)
2512
2513    integer :: ist
2514    FLOAT, parameter :: M_THRESHOLD = CNST(1.0e-6)
2515
2516    PUSH_SUB(occupied_states)
2517
2518    if(present(filled))           filled(:) = 0
2519    if(present(partially_filled)) partially_filled(:) = 0
2520    if(present(half_filled))      half_filled(:) = 0
2521    n_filled = 0
2522    n_partially_filled = 0
2523    n_half_filled = 0
2524
2525    select case(st%d%ispin)
2526    case(UNPOLARIZED)
2527      do ist = 1, st%nst
2528        if(abs(st%occ(ist, ik) - M_TWO) < M_THRESHOLD) then
2529          n_filled = n_filled + 1
2530          if(present(filled)) filled(n_filled) = ist
2531        elseif(abs(st%occ(ist, ik) - M_ONE) < M_THRESHOLD) then
2532          n_half_filled = n_half_filled + 1
2533          if(present(half_filled)) half_filled(n_half_filled) = ist
2534        elseif(st%occ(ist, ik) > M_THRESHOLD ) then
2535          n_partially_filled = n_partially_filled + 1
2536          if(present(partially_filled)) partially_filled(n_partially_filled) = ist
2537        elseif(abs(st%occ(ist, ik)) > M_THRESHOLD ) then
2538          write(message(1),*) 'Internal error in occupied_states: Illegal occupation value ', st%occ(ist, ik)
2539          call messages_fatal(1, namespace=namespace)
2540         end if
2541      end do
2542    case(SPIN_POLARIZED, SPINORS)
2543      do ist = 1, st%nst
2544        if(abs(st%occ(ist, ik)-M_ONE) < M_THRESHOLD) then
2545          n_filled = n_filled + 1
2546          if(present(filled)) filled(n_filled) = ist
2547        elseif(st%occ(ist, ik) > M_THRESHOLD ) then
2548          n_partially_filled = n_partially_filled + 1
2549          if(present(partially_filled)) partially_filled(n_partially_filled) = ist
2550        elseif(abs(st%occ(ist, ik)) > M_THRESHOLD ) then
2551          write(message(1),*) 'Internal error in occupied_states: Illegal occupation value ', st%occ(ist, ik)
2552          call messages_fatal(1, namespace=namespace)
2553         end if
2554      end do
2555    end select
2556
2557    POP_SUB(occupied_states)
2558  end subroutine occupied_states
2559
2560
2561  ! ------------------------------------------------------------
2562subroutine states_elec_set_phase(st_d, psi, phase, np, conjugate)
2563  type(states_elec_dim_t),intent(in)    :: st_d
2564  CMPLX,               intent(inout)    :: psi(:, :)
2565  CMPLX,                  intent(in)    :: phase(:)
2566  integer,                intent(in)    :: np
2567  logical,                intent(in)    :: conjugate
2568
2569  integer :: idim, ip
2570
2571  PUSH_SUB(states_elec_set_phase)
2572
2573  if(conjugate) then
2574    ! Apply the phase that contains both the k-point and vector-potential terms.
2575    do idim = 1, st_d%dim
2576      !$omp parallel do
2577      do ip = 1, np
2578        psi(ip, idim) = conjg(phase(ip))*psi(ip, idim)
2579      end do
2580      !$omp end parallel do
2581    end do
2582  else
2583    ! Apply the conjugate of the phase that contains both the k-point and vector-potential terms.
2584    do idim = 1, st_d%dim
2585      !$omp parallel do
2586      do ip = 1, np
2587        psi(ip, idim) = phase(ip)*psi(ip, idim)
2588      end do
2589      !$omp end parallel do
2590    end do
2591  end if
2592
2593  POP_SUB(states_elec_set_phase)
2594
2595end subroutine  states_elec_set_phase
2596
2597
2598#include "undef.F90"
2599#include "real.F90"
2600#include "states_elec_inc.F90"
2601
2602#include "undef.F90"
2603#include "complex.F90"
2604#include "states_elec_inc.F90"
2605#include "undef.F90"
2606
2607end module states_elec_oct_m
2608
2609
2610!! Local Variables:
2611!! mode: f90
2612!! coding: utf-8
2613!! End:
2614