1!--------------------------------------------------------------------------------------------------!
2!   CP2K: A general program to perform molecular dynamics simulations                              !
3!   Copyright (C) 2000 - 2020  CP2K developers group                                               !
4!--------------------------------------------------------------------------------------------------!
5
6! **************************************************************************************************
7!> \brief Optimization routines for all ALMO-based SCF methods
8!> \par History
9!>       2011.05 created [Rustam Z Khaliullin]
10!>       2014.10 as a separate file [Rustam Z Khaliullin]
11!> \author Rustam Z Khaliullin
12! **************************************************************************************************
13MODULE almo_scf_optimizer
14   USE almo_scf_diis_types,             ONLY: almo_scf_diis_extrapolate,&
15                                              almo_scf_diis_init,&
16                                              almo_scf_diis_push,&
17                                              almo_scf_diis_release,&
18                                              almo_scf_diis_type
19   USE almo_scf_lbfgs_types,            ONLY: lbfgs_create,&
20                                              lbfgs_get_direction,&
21                                              lbfgs_history_type,&
22                                              lbfgs_release,&
23                                              lbfgs_seed
24   USE almo_scf_methods,                ONLY: &
25        almo_scf_ks_blk_to_tv_blk, almo_scf_ks_to_ks_blk, almo_scf_ks_to_ks_xx, &
26        almo_scf_ks_xx_to_tv_xx, almo_scf_p_blk_to_t_blk, almo_scf_t_rescaling, &
27        almo_scf_t_to_proj, apply_domain_operators, apply_projector, &
28        construct_domain_preconditioner, construct_domain_r_down, construct_domain_s_inv, &
29        construct_domain_s_sqrt, fill_matrix_with_ones, get_overlap, orthogonalize_mos, &
30        pseudo_invert_diagonal_blk, xalmo_initial_guess
31   USE almo_scf_qs,                     ONLY: almo_dm_to_almo_ks,&
32                                              almo_dm_to_qs_env,&
33                                              almo_scf_update_ks_energy,&
34                                              matrix_qs_to_almo
35   USE almo_scf_types,                  ONLY: almo_scf_env_type,&
36                                              optimizer_options_type
37   USE cell_types,                      ONLY: cell_type
38   USE cp_blacs_env,                    ONLY: cp_blacs_env_type
39   USE cp_dbcsr_cholesky,               ONLY: cp_dbcsr_cholesky_decompose,&
40                                              cp_dbcsr_cholesky_invert,&
41                                              cp_dbcsr_cholesky_restore
42   USE cp_external_control,             ONLY: external_control
43   USE cp_files,                        ONLY: close_file,&
44                                              open_file
45   USE cp_log_handling,                 ONLY: cp_get_default_logger,&
46                                              cp_logger_get_default_unit_nr,&
47                                              cp_logger_type,&
48                                              cp_to_string
49   USE cp_output_handling,              ONLY: cp_print_key_finished_output,&
50                                              cp_print_key_unit_nr
51   USE cp_para_types,                   ONLY: cp_para_env_type
52   USE ct_methods,                      ONLY: analytic_line_search,&
53                                              ct_step_execute,&
54                                              diagonalize_diagonal_blocks
55   USE ct_types,                        ONLY: ct_step_env_clean,&
56                                              ct_step_env_get,&
57                                              ct_step_env_init,&
58                                              ct_step_env_set,&
59                                              ct_step_env_type
60   USE dbcsr_api,                       ONLY: &
61        dbcsr_add, dbcsr_add_on_diag, dbcsr_copy, dbcsr_create, dbcsr_desymmetrize, &
62        dbcsr_distribution_get, dbcsr_distribution_type, dbcsr_dot, dbcsr_filter, dbcsr_finalize, &
63        dbcsr_frobenius_norm, dbcsr_func_dtanh, dbcsr_func_inverse, dbcsr_func_tanh, &
64        dbcsr_function_of_elements, dbcsr_get_block_p, dbcsr_get_diag, dbcsr_get_info, &
65        dbcsr_hadamard_product, dbcsr_iterator_blocks_left, dbcsr_iterator_next_block, &
66        dbcsr_iterator_start, dbcsr_iterator_stop, dbcsr_iterator_type, dbcsr_multiply, &
67        dbcsr_nblkcols_total, dbcsr_nblkrows_total, dbcsr_norm, dbcsr_norm_maxabsnorm, &
68        dbcsr_p_type, dbcsr_print_block_sum, dbcsr_release, dbcsr_reserve_block2d, dbcsr_scale, &
69        dbcsr_set, dbcsr_set_diag, dbcsr_triu, dbcsr_type, dbcsr_type_no_symmetry, &
70        dbcsr_work_create
71   USE domain_submatrix_methods,        ONLY: add_submatrices,&
72                                              construct_submatrices,&
73                                              copy_submatrices,&
74                                              init_submatrices,&
75                                              maxnorm_submatrices,&
76                                              release_submatrices
77   USE domain_submatrix_types,          ONLY: domain_map_type,&
78                                              domain_submatrix_type,&
79                                              select_row
80   USE input_constants,                 ONLY: &
81        almo_scf_diag, almo_scf_dm_sign, cg_dai_yuan, cg_fletcher, cg_fletcher_reeves, &
82        cg_hager_zhang, cg_hestenes_stiefel, cg_liu_storey, cg_polak_ribiere, cg_zero, &
83        op_loc_berry, op_loc_pipek, trustr_cauchy, trustr_dogleg, virt_full, &
84        xalmo_case_block_diag, xalmo_case_fully_deloc, xalmo_case_normal, xalmo_prec_domain, &
85        xalmo_prec_full, xalmo_prec_zero
86   USE input_section_types,             ONLY: section_vals_get_subs_vals,&
87                                              section_vals_type
88   USE iterate_matrix,                  ONLY: determinant,&
89                                              invert_Hotelling,&
90                                              matrix_sqrt_Newton_Schulz
91   USE kinds,                           ONLY: dp
92   USE machine,                         ONLY: m_flush,&
93                                              m_walltime
94   USE message_passing,                 ONLY: mp_sum
95   USE particle_methods,                ONLY: get_particle_set
96   USE particle_types,                  ONLY: particle_type
97   USE qs_energy_types,                 ONLY: qs_energy_type
98   USE qs_environment_types,            ONLY: get_qs_env,&
99                                              qs_environment_type
100   USE qs_kind_types,                   ONLY: qs_kind_type
101   USE qs_loc_utils,                    ONLY: compute_berry_operator
102   USE qs_localization_methods,         ONLY: initialize_weights
103#include "./base/base_uses.f90"
104
105   IMPLICIT NONE
106
107   PRIVATE
108
109   CHARACTER(len=*), PARAMETER, PRIVATE :: moduleN = 'almo_scf_optimizer'
110
111   PUBLIC :: almo_scf_block_diagonal, &
112             almo_scf_xalmo_eigensolver, &
113             almo_scf_xalmo_trustr, &
114             almo_scf_xalmo_pcg, &
115             almo_scf_construct_nlmos
116
117   LOGICAL, PARAMETER :: debug_mode = .FALSE.
118   LOGICAL, PARAMETER :: safe_mode = .FALSE.
119   LOGICAL, PARAMETER :: almo_mathematica = .FALSE.
120   INTEGER, PARAMETER :: hessian_path_reuse = 1, &
121                         hessian_path_assemble = 2
122
123CONTAINS
124
125! **************************************************************************************************
126!> \brief An SCF procedure that optimizes block-diagonal ALMOs using DIIS
127!> \param qs_env ...
128!> \param almo_scf_env ...
129!> \param optimizer ...
130!> \par History
131!>       2011.06 created [Rustam Z Khaliullin]
132!>       2018.09 smearing support [Ruben Staub]
133!> \author Rustam Z Khaliullin
134! **************************************************************************************************
135   SUBROUTINE almo_scf_block_diagonal(qs_env, almo_scf_env, optimizer)
136      TYPE(qs_environment_type), POINTER                 :: qs_env
137      TYPE(almo_scf_env_type), INTENT(INOUT)             :: almo_scf_env
138      TYPE(optimizer_options_type), INTENT(IN)           :: optimizer
139
140      CHARACTER(len=*), PARAMETER :: routineN = 'almo_scf_block_diagonal', &
141         routineP = moduleN//':'//routineN
142
143      INTEGER                                            :: handle, iscf, ispin, nspin, unit_nr
144      INTEGER, ALLOCATABLE, DIMENSION(:)                 :: local_nocc_of_domain
145      LOGICAL                                            :: converged, prepare_to_exit, should_stop, &
146                                                            use_diis, use_prev_as_guess
147      REAL(KIND=dp) :: density_rec, energy_diff, energy_new, energy_old, error_norm, &
148         error_norm_ispin, kTS_sum, prev_error_norm, t1, t2, true_mixing_fraction
149      REAL(KIND=dp), ALLOCATABLE, DIMENSION(:)           :: local_mu
150      TYPE(almo_scf_diis_type), ALLOCATABLE, &
151         DIMENSION(:)                                    :: almo_diis
152      TYPE(cp_logger_type), POINTER                      :: logger
153      TYPE(dbcsr_type), ALLOCATABLE, DIMENSION(:)        :: matrix_mixing_old_blk
154      TYPE(qs_energy_type), POINTER                      :: qs_energy
155
156      CALL timeset(routineN, handle)
157
158      ! get a useful output_unit
159      logger => cp_get_default_logger()
160      IF (logger%para_env%ionode) THEN
161         unit_nr = cp_logger_get_default_unit_nr(logger, local=.TRUE.)
162      ELSE
163         unit_nr = -1
164      ENDIF
165
166      ! use DIIS, it's superior to simple mixing
167      use_diis = .TRUE.
168      use_prev_as_guess = .FALSE.
169
170      nspin = almo_scf_env%nspins
171      ALLOCATE (local_mu(almo_scf_env%ndomains))
172      ALLOCATE (local_nocc_of_domain(almo_scf_env%ndomains))
173
174      ! init mixing matrices
175      ALLOCATE (matrix_mixing_old_blk(nspin))
176      ALLOCATE (almo_diis(nspin))
177      DO ispin = 1, nspin
178         CALL dbcsr_create(matrix_mixing_old_blk(ispin), &
179                           template=almo_scf_env%matrix_ks_blk(ispin))
180         CALL almo_scf_diis_init(diis_env=almo_diis(ispin), &
181                                 sample_err=almo_scf_env%matrix_ks_blk(ispin), &
182                                 sample_var=almo_scf_env%matrix_s_blk(1), &
183                                 error_type=1, &
184                                 max_length=optimizer%ndiis)
185      ENDDO
186
187      CALL get_qs_env(qs_env, energy=qs_energy)
188      energy_old = qs_energy%total
189
190      iscf = 0
191      prepare_to_exit = .FALSE.
192      true_mixing_fraction = 0.0_dp
193      error_norm = 1.0E+10_dp ! arbitrary big step
194
195      IF (unit_nr > 0) THEN
196         WRITE (unit_nr, '(T2,A,A,A)') REPEAT("-", 20), &
197            " Optimization of block-diagonal ALMOs ", REPEAT("-", 21)
198         WRITE (unit_nr, *)
199         WRITE (unit_nr, '(T2,A13,A6,A23,A14,A14,A9)') "Method", "Iter", &
200            "Total Energy", "Change", "Convergence", "Time"
201         WRITE (unit_nr, '(T2,A)') REPEAT("-", 79)
202      ENDIF
203
204      ! the real SCF loop
205      t1 = m_walltime()
206      DO
207
208         iscf = iscf + 1
209
210         ! obtain projected KS matrix and the DIIS-error vector
211         CALL almo_scf_ks_to_ks_blk(almo_scf_env)
212
213         ! inform the DIIS handler about the new KS matrix and its error vector
214         IF (use_diis) THEN
215            DO ispin = 1, nspin
216               CALL almo_scf_diis_push(diis_env=almo_diis(ispin), &
217                                       var=almo_scf_env%matrix_ks_blk(ispin), &
218                                       err=almo_scf_env%matrix_err_blk(ispin))
219            ENDDO
220         ENDIF
221
222         ! get error_norm: choose the largest of the two spins
223         prev_error_norm = error_norm
224         DO ispin = 1, nspin
225            !error_norm=dbcsr_frobenius_norm(almo_scf_env%matrix_err_blk(ispin))
226            CALL dbcsr_norm(almo_scf_env%matrix_err_blk(ispin), &
227                            dbcsr_norm_maxabsnorm, &
228                            norm_scalar=error_norm_ispin)
229            IF (ispin .EQ. 1) error_norm = error_norm_ispin
230            IF (ispin .GT. 1 .AND. error_norm_ispin .GT. error_norm) &
231               error_norm = error_norm_ispin
232         ENDDO
233
234         IF (error_norm .LT. almo_scf_env%eps_prev_guess) THEN
235            use_prev_as_guess = .TRUE.
236         ELSE
237            use_prev_as_guess = .FALSE.
238         ENDIF
239
240         ! check convergence
241         converged = .TRUE.
242         IF (error_norm .GT. optimizer%eps_error) converged = .FALSE.
243
244         ! check other exit criteria: max SCF steps and timing
245         CALL external_control(should_stop, "SCF", &
246                               start_time=qs_env%start_time, &
247                               target_time=qs_env%target_time)
248         IF (should_stop .OR. iscf >= optimizer%max_iter .OR. converged) THEN
249            prepare_to_exit = .TRUE.
250            IF (iscf == 1) energy_new = energy_old
251         ENDIF
252
253         ! if early stopping is on do at least one iteration
254         IF (optimizer%early_stopping_on .AND. iscf .EQ. 1) &
255            prepare_to_exit = .FALSE.
256
257         IF (.NOT. prepare_to_exit) THEN ! update the ALMOs and density matrix
258
259            ! perform mixing of KS matrices
260            IF (iscf .NE. 1) THEN
261               IF (use_diis) THEN ! use diis instead of mixing
262                  DO ispin = 1, nspin
263                     CALL almo_scf_diis_extrapolate(diis_env=almo_diis(ispin), &
264                                                    extr_var=almo_scf_env%matrix_ks_blk(ispin))
265                  ENDDO
266               ELSE ! use mixing
267                  true_mixing_fraction = almo_scf_env%mixing_fraction
268                  DO ispin = 1, nspin
269                     CALL dbcsr_add(almo_scf_env%matrix_ks_blk(ispin), &
270                                    matrix_mixing_old_blk(ispin), &
271                                    true_mixing_fraction, &
272                                    1.0_dp - true_mixing_fraction)
273                  END DO
274               ENDIF
275            ENDIF
276            ! save the new matrix for the future mixing
277            DO ispin = 1, nspin
278               CALL dbcsr_copy(matrix_mixing_old_blk(ispin), &
279                               almo_scf_env%matrix_ks_blk(ispin))
280            ENDDO
281
282            ! obtain ALMOs from the new KS matrix
283            SELECT CASE (almo_scf_env%almo_update_algorithm)
284            CASE (almo_scf_diag)
285
286               CALL almo_scf_ks_blk_to_tv_blk(almo_scf_env)
287
288            CASE (almo_scf_dm_sign)
289
290               ! update the density matrix
291               DO ispin = 1, nspin
292
293                  local_nocc_of_domain(:) = almo_scf_env%nocc_of_domain(:, ispin)
294                  local_mu(:) = almo_scf_env%mu_of_domain(:, ispin)
295                  ! RZK UPDATE! the update algorithm is removed because
296                  ! RZK UPDATE! it requires updating core LS_SCF routines
297                  ! RZK UPDATE! (the code exists in the CVS version)
298                  CPABORT("Density_matrix_sign has not been tested yet")
299                  ! RZK UPDATE!  CALL density_matrix_sign(almo_scf_env%matrix_p_blk(ispin),&
300                  ! RZK UPDATE!          local_mu,&
301                  ! RZK UPDATE!          almo_scf_env%fixed_mu,&
302                  ! RZK UPDATE!          almo_scf_env%matrix_ks_blk(ispin),&
303                  ! RZK UPDATE!          !matrix_mixing_old_blk(ispin),&
304                  ! RZK UPDATE!          almo_scf_env%matrix_s_blk(1), &
305                  ! RZK UPDATE!          almo_scf_env%matrix_s_blk_inv(1), &
306                  ! RZK UPDATE!          local_nocc_of_domain,&
307                  ! RZK UPDATE!          almo_scf_env%eps_filter,&
308                  ! RZK UPDATE!          almo_scf_env%domain_index_of_ao)
309                  ! RZK UPDATE!
310                  almo_scf_env%mu_of_domain(:, ispin) = local_mu(:)
311
312               ENDDO
313
314               ! obtain ALMOs from matrix_p_blk: T_new = P_blk S_blk T_old
315               CALL almo_scf_p_blk_to_t_blk(almo_scf_env, ionic=.FALSE.)
316
317               DO ispin = 1, almo_scf_env%nspins
318
319                  CALL orthogonalize_mos(ket=almo_scf_env%matrix_t_blk(ispin), &
320                                         overlap=almo_scf_env%matrix_sigma_blk(ispin), &
321                                         metric=almo_scf_env%matrix_s_blk(1), &
322                                         retain_locality=.TRUE., &
323                                         only_normalize=.FALSE., &
324                                         nocc_of_domain=almo_scf_env%nocc_of_domain(:, ispin), &
325                                         eps_filter=almo_scf_env%eps_filter, &
326                                         order_lanczos=almo_scf_env%order_lanczos, &
327                                         eps_lanczos=almo_scf_env%eps_lanczos, &
328                                         max_iter_lanczos=almo_scf_env%max_iter_lanczos)
329
330               ENDDO
331
332            END SELECT
333
334            ! obtain density matrix from ALMOs
335            DO ispin = 1, almo_scf_env%nspins
336
337               !! Application of an occupation-rescaling trick for smearing, if requested
338               IF (almo_scf_env%smear) THEN
339                  CALL almo_scf_t_rescaling(matrix_t=almo_scf_env%matrix_t_blk(ispin), &
340                                            mo_energies=almo_scf_env%mo_energies(:, ispin), &
341                                            mu_of_domain=almo_scf_env%mu_of_domain(:, ispin), &
342                                            real_ne_of_domain=almo_scf_env%real_ne_of_domain(:, ispin), &
343                                            spin_kTS=almo_scf_env%kTS(ispin), &
344                                            smear_e_temp=almo_scf_env%smear_e_temp, &
345                                            ndomains=almo_scf_env%ndomains, &
346                                            nocc_of_domain=almo_scf_env%nocc_of_domain(:, ispin))
347               END IF
348
349               CALL almo_scf_t_to_proj(t=almo_scf_env%matrix_t_blk(ispin), &
350                                       p=almo_scf_env%matrix_p(ispin), &
351                                       eps_filter=almo_scf_env%eps_filter, &
352                                       orthog_orbs=.FALSE., &
353                                       nocc_of_domain=almo_scf_env%nocc_of_domain(:, ispin), &
354                                       s=almo_scf_env%matrix_s(1), &
355                                       sigma=almo_scf_env%matrix_sigma(ispin), &
356                                       sigma_inv=almo_scf_env%matrix_sigma_inv(ispin), &
357                                       use_guess=use_prev_as_guess, &
358                                       smear=almo_scf_env%smear, &
359                                       algorithm=almo_scf_env%sigma_inv_algorithm, &
360                                       inverse_accelerator=almo_scf_env%order_lanczos, &
361                                       inv_eps_factor=almo_scf_env%matrix_iter_eps_error_factor, &
362                                       eps_lanczos=almo_scf_env%eps_lanczos, &
363                                       max_iter_lanczos=almo_scf_env%max_iter_lanczos, &
364                                       para_env=almo_scf_env%para_env, &
365                                       blacs_env=almo_scf_env%blacs_env)
366
367            ENDDO
368
369            IF (almo_scf_env%nspins == 1) THEN
370               CALL dbcsr_scale(almo_scf_env%matrix_p(1), 2.0_dp)
371               !! Rescaling electronic entropy contribution by spin_factor
372               IF (almo_scf_env%smear) THEN
373                  almo_scf_env%kTS(1) = almo_scf_env%kTS(1)*2.0_dp
374               END IF
375            ENDIF
376
377            IF (almo_scf_env%smear) THEN
378               kTS_sum = SUM(almo_scf_env%kTS)
379            ELSE
380               kTS_sum = 0.0_dp
381            ENDIF
382
383            ! compute the new KS matrix and new energy
384            CALL almo_dm_to_almo_ks(qs_env, &
385                                    almo_scf_env%matrix_p, &
386                                    almo_scf_env%matrix_ks, &
387                                    energy_new, &
388                                    almo_scf_env%eps_filter, &
389                                    almo_scf_env%mat_distr_aos, &
390                                    smear=almo_scf_env%smear, &
391                                    kTS_sum=kTS_sum)
392
393         ENDIF ! prepare_to_exit
394
395         energy_diff = energy_new - energy_old
396         energy_old = energy_new
397         almo_scf_env%almo_scf_energy = energy_new
398
399         t2 = m_walltime()
400         ! brief report on the current SCF loop
401         IF (unit_nr > 0) THEN
402            WRITE (unit_nr, '(T2,A13,I6,F23.10,E14.5,F14.9,F9.2)') "ALMO SCF DIIS", &
403               iscf, &
404               energy_new, energy_diff, error_norm, t2 - t1
405         ENDIF
406         t1 = m_walltime()
407
408         IF (prepare_to_exit) EXIT
409
410      ENDDO ! end scf cycle
411
412      !! Print number of electrons recovered if smearing was requested
413      IF (almo_scf_env%smear) THEN
414         DO ispin = 1, nspin
415            CALL dbcsr_dot(almo_scf_env%matrix_p(ispin), almo_scf_env%matrix_s(1), density_rec)
416            IF (unit_nr > 0) THEN
417               WRITE (unit_nr, '(T2,A20,F23.10)') "Electrons recovered:", density_rec
418            END IF
419         END DO
420      END IF
421
422      IF (.NOT. converged .AND. (.NOT. optimizer%early_stopping_on)) THEN
423         IF (unit_nr > 0) THEN
424            CPABORT("SCF for block-diagonal ALMOs not converged!")
425         ENDIF
426      ENDIF
427
428      DO ispin = 1, nspin
429         CALL dbcsr_release(matrix_mixing_old_blk(ispin))
430         CALL almo_scf_diis_release(diis_env=almo_diis(ispin))
431      ENDDO
432      DEALLOCATE (almo_diis)
433      DEALLOCATE (matrix_mixing_old_blk)
434      DEALLOCATE (local_mu)
435      DEALLOCATE (local_nocc_of_domain)
436
437      CALL timestop(handle)
438
439   END SUBROUTINE almo_scf_block_diagonal
440
441! **************************************************************************************************
442!> \brief An eigensolver-based SCF to optimize extended ALMOs (i.e. ALMOs on
443!>        overlapping domains)
444!> \param qs_env ...
445!> \param almo_scf_env ...
446!> \param optimizer ...
447!> \par History
448!>       2013.03 created [Rustam Z Khaliullin]
449!>       2018.09 smearing support [Ruben Staub]
450!> \author Rustam Z Khaliullin
451! **************************************************************************************************
452   SUBROUTINE almo_scf_xalmo_eigensolver(qs_env, almo_scf_env, optimizer)
453      TYPE(qs_environment_type), POINTER                 :: qs_env
454      TYPE(almo_scf_env_type), INTENT(INOUT)             :: almo_scf_env
455      TYPE(optimizer_options_type), INTENT(IN)           :: optimizer
456
457      CHARACTER(len=*), PARAMETER :: routineN = 'almo_scf_xalmo_eigensolver', &
458         routineP = moduleN//':'//routineN
459
460      INTEGER                                            :: handle, iscf, ispin, nspin, unit_nr
461      LOGICAL                                            :: converged, prepare_to_exit, should_stop
462      REAL(KIND=dp) :: denergy_tot, density_rec, energy_diff, energy_new, energy_old, error_norm, &
463         error_norm_0, kTS_sum, spin_factor, t1, t2
464      REAL(KIND=dp), DIMENSION(2)                        :: denergy_spin
465      TYPE(almo_scf_diis_type), ALLOCATABLE, &
466         DIMENSION(:)                                    :: almo_diis
467      TYPE(cp_logger_type), POINTER                      :: logger
468      TYPE(dbcsr_type)                                   :: matrix_p_almo_scf_converged
469      TYPE(domain_submatrix_type), ALLOCATABLE, &
470         DIMENSION(:, :)                                 :: submatrix_mixing_old_blk
471
472      CALL timeset(routineN, handle)
473
474      ! get a useful output_unit
475      logger => cp_get_default_logger()
476      IF (logger%para_env%ionode) THEN
477         unit_nr = cp_logger_get_default_unit_nr(logger, local=.TRUE.)
478      ELSE
479         unit_nr = -1
480      ENDIF
481
482      nspin = almo_scf_env%nspins
483      IF (nspin == 1) THEN
484         spin_factor = 2.0_dp
485      ELSE
486         spin_factor = 1.0_dp
487      ENDIF
488
489      ! RZK-warning domain_s_sqrt and domain_s_sqrt_inv do not have spin
490      ! components yet (may be used later)
491      ispin = 1
492      CALL construct_domain_s_sqrt( &
493         matrix_s=almo_scf_env%matrix_s(1), &
494         subm_s_sqrt=almo_scf_env%domain_s_sqrt(:, ispin), &
495         subm_s_sqrt_inv=almo_scf_env%domain_s_sqrt_inv(:, ispin), &
496         dpattern=almo_scf_env%quench_t(ispin), &
497         map=almo_scf_env%domain_map(ispin), &
498         node_of_domain=almo_scf_env%cpu_of_domain)
499      ! TRY: construct s_inv
500      !CALL construct_domain_s_inv(&
501      !       matrix_s=almo_scf_env%matrix_s(1),&
502      !       subm_s_inv=almo_scf_env%domain_s_inv(:,ispin),&
503      !       dpattern=almo_scf_env%quench_t(ispin),&
504      !       map=almo_scf_env%domain_map(ispin),&
505      !       node_of_domain=almo_scf_env%cpu_of_domain)
506
507      ! construct the domain template for the occupied orbitals
508      DO ispin = 1, nspin
509         ! RZK-warning we need only the matrix structure, not data
510         ! replace construct_submatrices with lighter procedure with
511         ! no heavy communications
512         CALL construct_submatrices( &
513            matrix=almo_scf_env%quench_t(ispin), &
514            submatrix=almo_scf_env%domain_t(:, ispin), &
515            distr_pattern=almo_scf_env%quench_t(ispin), &
516            domain_map=almo_scf_env%domain_map(ispin), &
517            node_of_domain=almo_scf_env%cpu_of_domain, &
518            job_type=select_row)
519      ENDDO
520
521      ! init mixing matrices
522      ALLOCATE (submatrix_mixing_old_blk(almo_scf_env%ndomains, nspin))
523      CALL init_submatrices(submatrix_mixing_old_blk)
524      ALLOCATE (almo_diis(nspin))
525
526      ! TRY: construct block-projector
527      !ALLOCATE(submatrix_tmp(almo_scf_env%ndomains))
528      !DO ispin=1,nspin
529      !   CALL init_submatrices(submatrix_tmp)
530      !   CALL construct_domain_r_down(&
531      !           matrix_t=almo_scf_env%matrix_t_blk(ispin),&
532      !           matrix_sigma_inv=almo_scf_env%matrix_sigma_inv(ispin),&
533      !           matrix_s=almo_scf_env%matrix_s(1),&
534      !           subm_r_down=submatrix_tmp(:),&
535      !           dpattern=almo_scf_env%quench_t(ispin),&
536      !           map=almo_scf_env%domain_map(ispin),&
537      !           node_of_domain=almo_scf_env%cpu_of_domain,&
538      !           filter_eps=almo_scf_env%eps_filter)
539      !   CALL multiply_submatrices('N','N',1.0_dp,&
540      !           submatrix_tmp(:),&
541      !           almo_scf_env%domain_s_inv(:,1),0.0_dp,&
542      !           almo_scf_env%domain_r_down_up(:,ispin))
543      !   CALL release_submatrices(submatrix_tmp)
544      !ENDDO
545      !DEALLOCATE(submatrix_tmp)
546
547      DO ispin = 1, nspin
548         ! use s_sqrt since they are already properly constructed
549         ! and have the same distributions as domain_err and domain_ks_xx
550         CALL almo_scf_diis_init(diis_env=almo_diis(ispin), &
551                                 sample_err=almo_scf_env%domain_s_sqrt(:, ispin), &
552                                 error_type=1, &
553                                 max_length=optimizer%ndiis)
554      ENDDO
555
556      denergy_tot = 0.0_dp
557      energy_old = 0.0_dp
558      iscf = 0
559      prepare_to_exit = .FALSE.
560
561      ! the SCF loop
562      t1 = m_walltime()
563      DO
564
565         iscf = iscf + 1
566
567         ! obtain projected KS matrix and the DIIS-error vector
568         CALL almo_scf_ks_to_ks_xx(almo_scf_env)
569
570         ! inform the DIIS handler about the new KS matrix and its error vector
571         DO ispin = 1, nspin
572            CALL almo_scf_diis_push(diis_env=almo_diis(ispin), &
573                                    d_var=almo_scf_env%domain_ks_xx(:, ispin), &
574                                    d_err=almo_scf_env%domain_err(:, ispin))
575         ENDDO
576
577         ! check convergence
578         converged = .TRUE.
579         DO ispin = 1, nspin
580            !error_norm=dbcsr_frobenius_norm(almo_scf_env%matrix_err_blk(ispin))
581            CALL dbcsr_norm(almo_scf_env%matrix_err_xx(ispin), &
582                            dbcsr_norm_maxabsnorm, &
583                            norm_scalar=error_norm)
584            CALL maxnorm_submatrices(almo_scf_env%domain_err(:, ispin), &
585                                     norm=error_norm_0)
586            IF (error_norm .GT. optimizer%eps_error) THEN
587               converged = .FALSE.
588               EXIT ! no need to check the other spin
589            ENDIF
590         ENDDO
591         ! check other exit criteria: max SCF steps and timing
592         CALL external_control(should_stop, "SCF", &
593                               start_time=qs_env%start_time, &
594                               target_time=qs_env%target_time)
595         IF (should_stop .OR. iscf >= optimizer%max_iter .OR. converged) THEN
596            prepare_to_exit = .TRUE.
597         ENDIF
598
599         ! if early stopping is on do at least one iteration
600         IF (optimizer%early_stopping_on .AND. iscf .EQ. 1) &
601            prepare_to_exit = .FALSE.
602
603         IF (.NOT. prepare_to_exit) THEN ! update the ALMOs and density matrix
604
605            ! perform mixing of KS matrices
606            IF (iscf .NE. 1) THEN
607               IF (.FALSE.) THEN ! use diis instead of mixing
608                  DO ispin = 1, nspin
609                     CALL add_submatrices( &
610                        almo_scf_env%mixing_fraction, &
611                        almo_scf_env%domain_ks_xx(:, ispin), &
612                        1.0_dp - almo_scf_env%mixing_fraction, &
613                        submatrix_mixing_old_blk(:, ispin), &
614                        'N')
615                  END DO
616               ELSE
617                  DO ispin = 1, nspin
618                     CALL almo_scf_diis_extrapolate(diis_env=almo_diis(ispin), &
619                                                    d_extr_var=almo_scf_env%domain_ks_xx(:, ispin))
620                  ENDDO
621               ENDIF
622            ENDIF
623            ! save the new matrix for the future mixing
624            DO ispin = 1, nspin
625               CALL copy_submatrices( &
626                  almo_scf_env%domain_ks_xx(:, ispin), &
627                  submatrix_mixing_old_blk(:, ispin), &
628                  copy_data=.TRUE.)
629            ENDDO
630
631            ! obtain a new set of ALMOs from the updated KS matrix
632            CALL almo_scf_ks_xx_to_tv_xx(almo_scf_env)
633
634            ! update the density matrix
635            DO ispin = 1, nspin
636
637               ! save the initial density matrix (to get the perturbative energy lowering)
638               IF (iscf .EQ. 1) THEN
639                  CALL dbcsr_create(matrix_p_almo_scf_converged, &
640                                    template=almo_scf_env%matrix_p(ispin))
641                  CALL dbcsr_copy(matrix_p_almo_scf_converged, &
642                                  almo_scf_env%matrix_p(ispin))
643               ENDIF
644
645               !! Application of an occupation-rescaling trick for smearing, if requested
646               IF (almo_scf_env%smear) THEN
647                  CALL almo_scf_t_rescaling(matrix_t=almo_scf_env%matrix_t_blk(ispin), &
648                                            mo_energies=almo_scf_env%mo_energies(:, ispin), &
649                                            mu_of_domain=almo_scf_env%mu_of_domain(:, ispin), &
650                                            real_ne_of_domain=almo_scf_env%real_ne_of_domain(:, ispin), &
651                                            spin_kTS=almo_scf_env%kTS(ispin), &
652                                            smear_e_temp=almo_scf_env%smear_e_temp, &
653                                            ndomains=almo_scf_env%ndomains, &
654                                            nocc_of_domain=almo_scf_env%nocc_of_domain(:, ispin))
655               END IF
656
657               ! update now
658               CALL almo_scf_t_to_proj( &
659                  t=almo_scf_env%matrix_t(ispin), &
660                  p=almo_scf_env%matrix_p(ispin), &
661                  eps_filter=almo_scf_env%eps_filter, &
662                  orthog_orbs=.FALSE., &
663                  nocc_of_domain=almo_scf_env%nocc_of_domain(:, ispin), &
664                  s=almo_scf_env%matrix_s(1), &
665                  sigma=almo_scf_env%matrix_sigma(ispin), &
666                  sigma_inv=almo_scf_env%matrix_sigma_inv(ispin), &
667                  use_guess=.TRUE., &
668                  smear=almo_scf_env%smear, &
669                  algorithm=almo_scf_env%sigma_inv_algorithm, &
670                  inverse_accelerator=almo_scf_env%order_lanczos, &
671                  inv_eps_factor=almo_scf_env%matrix_iter_eps_error_factor, &
672                  eps_lanczos=almo_scf_env%eps_lanczos, &
673                  max_iter_lanczos=almo_scf_env%max_iter_lanczos, &
674                  para_env=almo_scf_env%para_env, &
675                  blacs_env=almo_scf_env%blacs_env)
676               CALL dbcsr_scale(almo_scf_env%matrix_p(ispin), spin_factor)
677               !! Rescaling electronic entropy contribution by spin_factor
678               IF (almo_scf_env%smear) THEN
679                  almo_scf_env%kTS(ispin) = almo_scf_env%kTS(ispin)*spin_factor
680               END IF
681
682               ! obtain perturbative estimate (at no additional cost)
683               ! of the energy lowering relative to the block-diagonal ALMOs
684               IF (iscf .EQ. 1) THEN
685
686                  CALL dbcsr_add(matrix_p_almo_scf_converged, &
687                                 almo_scf_env%matrix_p(ispin), -1.0_dp, 1.0_dp)
688                  CALL dbcsr_dot(almo_scf_env%matrix_ks_0deloc(ispin), &
689                                 matrix_p_almo_scf_converged, &
690                                 denergy_spin(ispin))
691
692                  CALL dbcsr_release(matrix_p_almo_scf_converged)
693
694                  !! RS-WARNING: If smearing ALMO is requested, electronic entropy contribution should probably be included here
695
696                  denergy_tot = denergy_tot + denergy_spin(ispin)
697
698                  ! RZK-warning Energy correction can be evaluated using matrix_x
699                  ! as shown in the attempt below and in the PCG procedure.
700                  ! Using matrix_x allows immediate decomposition of the energy
701                  ! lowering into 2-body components for EDA. However, it does not
702                  ! work here because the diagonalization routine does not necessarily
703                  ! produce orbitals with the same sign as the block-diagonal ALMOs
704                  ! Any fixes?!
705
706                  !CALL dbcsr_init(matrix_x)
707                  !CALL dbcsr_create(matrix_x,&
708                  !        template=almo_scf_env%matrix_t(ispin))
709                  !
710                  !CALL dbcsr_init(matrix_tmp_no)
711                  !CALL dbcsr_create(matrix_tmp_no,&
712                  !        template=almo_scf_env%matrix_t(ispin))
713                  !
714                  !CALL dbcsr_copy(matrix_x,&
715                  !        almo_scf_env%matrix_t_blk(ispin))
716                  !CALL dbcsr_add(matrix_x,almo_scf_env%matrix_t(ispin),&
717                  !        -1.0_dp,1.0_dp)
718
719                  !CALL dbcsr_dot(matrix_x, almo_scf_env%matrix_err_xx(ispin),denergy)
720
721                  !denergy=denergy*spin_factor
722
723                  !IF (unit_nr>0) THEN
724                  !   WRITE(unit_nr,*) "_ENERGY-0: ", almo_scf_env%almo_scf_energy
725                  !   WRITE(unit_nr,*) "_ENERGY-D: ", denergy
726                  !   WRITE(unit_nr,*) "_ENERGY-F: ", almo_scf_env%almo_scf_energy+denergy
727                  !ENDIF
728                  !! RZK-warning update will not work since the energy is overwritten almost immediately
729                  !!CALL almo_scf_update_ks_energy(qs_env,&
730                  !!        almo_scf_env%almo_scf_energy+denergy)
731                  !!
732
733                  !! print out the results of the decomposition analysis
734                  !CALL dbcsr_hadamard_product(matrix_x,&
735                  !        almo_scf_env%matrix_err_xx(ispin),&
736                  !        matrix_tmp_no)
737                  !CALL dbcsr_scale(matrix_tmp_no,spin_factor)
738                  !CALL dbcsr_filter(matrix_tmp_no,almo_scf_env%eps_filter)
739                  !
740                  !IF (unit_nr>0) THEN
741                  !   WRITE(unit_nr,*)
742                  !   WRITE(unit_nr,'(T2,A)') "DECOMPOSITION OF THE DELOCALIZATION ENERGY"
743                  !ENDIF
744
745                  !mynode=dbcsr_mp_mynode(dbcsr_distribution_mp(&
746                  !   dbcsr_distribution(matrix_tmp_no)))
747                  !WRITE(mynodestr,'(I6.6)') mynode
748                  !mylogfile='EDA.'//TRIM(ADJUSTL(mynodestr))
749                  !OPEN (iunit,file=mylogfile,status='REPLACE')
750                  !CALL dbcsr_print_block_sum(matrix_tmp_no,iunit)
751                  !CLOSE(iunit)
752                  !
753                  !CALL dbcsr_release(matrix_tmp_no)
754                  !CALL dbcsr_release(matrix_x)
755
756               ENDIF ! iscf.eq.1
757
758            ENDDO
759
760            ! print out the energy lowering
761            IF (iscf .EQ. 1) THEN
762               CALL energy_lowering_report( &
763                  unit_nr=unit_nr, &
764                  ref_energy=almo_scf_env%almo_scf_energy, &
765                  energy_lowering=denergy_tot)
766               CALL almo_scf_update_ks_energy(qs_env, &
767                                              energy=almo_scf_env%almo_scf_energy, &
768                                              energy_singles_corr=denergy_tot)
769            ENDIF
770
771            ! compute the new KS matrix and new energy
772            IF (.NOT. almo_scf_env%perturbative_delocalization) THEN
773
774               IF (almo_scf_env%smear) THEN
775                  kTS_sum = SUM(almo_scf_env%kTS)
776               ELSE
777                  kTS_sum = 0.0_dp
778               ENDIF
779
780               CALL almo_dm_to_almo_ks(qs_env, &
781                                       almo_scf_env%matrix_p, &
782                                       almo_scf_env%matrix_ks, &
783                                       energy_new, &
784                                       almo_scf_env%eps_filter, &
785                                       almo_scf_env%mat_distr_aos, &
786                                       smear=almo_scf_env%smear, &
787                                       kTS_sum=kTS_sum)
788            ENDIF
789
790         ENDIF ! prepare_to_exit
791
792         IF (almo_scf_env%perturbative_delocalization) THEN
793
794            ! exit after the first step if we do not need the SCF procedure
795            CALL almo_dm_to_qs_env(qs_env, almo_scf_env%matrix_p, almo_scf_env%mat_distr_aos)
796            converged = .TRUE.
797            prepare_to_exit = .TRUE.
798
799         ELSE ! not a perturbative treatment
800
801            energy_diff = energy_new - energy_old
802            energy_old = energy_new
803            almo_scf_env%almo_scf_energy = energy_new
804
805            t2 = m_walltime()
806            ! brief report on the current SCF loop
807            IF (unit_nr > 0) THEN
808               WRITE (unit_nr, '(T2,A,I6,F20.9,E11.3,E11.3,E11.3,F8.2)') "ALMO SCF", &
809                  iscf, &
810                  energy_new, energy_diff, error_norm, error_norm_0, t2 - t1
811            ENDIF
812            t1 = m_walltime()
813
814         ENDIF
815
816         IF (prepare_to_exit) EXIT
817
818      ENDDO ! end scf cycle
819
820      !! Print number of electrons recovered if smearing was requested
821      IF (almo_scf_env%smear) THEN
822         DO ispin = 1, nspin
823            CALL dbcsr_dot(almo_scf_env%matrix_p(ispin), almo_scf_env%matrix_s(1), density_rec)
824            IF (unit_nr > 0) THEN
825               WRITE (unit_nr, '(T2,A20,F23.10)') "Electrons recovered:", density_rec
826            END IF
827         END DO
828      END IF
829
830      IF (.NOT. converged .AND. .NOT. optimizer%early_stopping_on) THEN
831         CPABORT("SCF for ALMOs on overlapping domains not converged! ")
832      ENDIF
833
834      DO ispin = 1, nspin
835         CALL release_submatrices(submatrix_mixing_old_blk(:, ispin))
836         CALL almo_scf_diis_release(diis_env=almo_diis(ispin))
837      ENDDO
838      DEALLOCATE (almo_diis)
839      DEALLOCATE (submatrix_mixing_old_blk)
840
841      CALL timestop(handle)
842
843   END SUBROUTINE almo_scf_xalmo_eigensolver
844
845! **************************************************************************************************
846!> \brief Optimization of ALMOs using PCG-like minimizers
847!> \param qs_env ...
848!> \param almo_scf_env ...
849!> \param optimizer   controls the optimization algorithm
850!> \param quench_t ...
851!> \param matrix_t_in ...
852!> \param matrix_t_out ...
853!> \param assume_t0_q0x - since it is extremely difficult to converge the iterative
854!>                        procedure using T as an optimized variable, assume
855!>                        T = T_0 + (1-R_0)*X and optimize X
856!>                        T_0 is assumed to be the zero-delocalization reference
857!> \param perturbation_only - perturbative (do not update Hamiltonian)
858!> \param special_case   to reduce the overhead special cases are implemented:
859!>                       xalmo_case_normal - no special case (i.e. xALMOs)
860!>                       xalmo_case_block_diag
861!>                       xalmo_case_fully_deloc
862!> \par History
863!>       2011.11 created [Rustam Z Khaliullin]
864!> \author Rustam Z Khaliullin
865! **************************************************************************************************
866   SUBROUTINE almo_scf_xalmo_pcg(qs_env, almo_scf_env, optimizer, quench_t, &
867                                 matrix_t_in, matrix_t_out, assume_t0_q0x, perturbation_only, &
868                                 special_case)
869
870      TYPE(qs_environment_type), POINTER                 :: qs_env
871      TYPE(almo_scf_env_type), INTENT(INOUT)             :: almo_scf_env
872      TYPE(optimizer_options_type), INTENT(IN)           :: optimizer
873      TYPE(dbcsr_type), ALLOCATABLE, DIMENSION(:), &
874         INTENT(INOUT)                                   :: quench_t, matrix_t_in, matrix_t_out
875      LOGICAL, INTENT(IN)                                :: assume_t0_q0x, perturbation_only
876      INTEGER, INTENT(IN), OPTIONAL                      :: special_case
877
878      CHARACTER(len=*), PARAMETER :: routineN = 'almo_scf_xalmo_pcg', &
879         routineP = moduleN//':'//routineN
880
881      CHARACTER(LEN=20)                                  :: iter_type
882      INTEGER :: cg_iteration, dim_op, fixed_line_search_niter, handle, idim0, ielem, ispin, &
883         iteration, line_search_iteration, max_iter, my_special_case, ndomains, nmo, nspins, &
884         outer_iteration, outer_max_iter, para_group, prec_type, reim, unit_nr
885      INTEGER, ALLOCATABLE, DIMENSION(:)                 :: nocc
886      LOGICAL :: blissful_neglect, converged, just_started, line_search, normalize_orbitals, &
887         optimize_theta, outer_prepare_to_exit, penalty_occ_local, penalty_occ_vol, &
888         prepare_to_exit, reset_conjugator, skip_grad, use_guess
889      REAL(dp), ALLOCATABLE, DIMENSION(:)                :: reim_diag, weights, z2
890      REAL(kind=dp) :: appr_sec_der, beta, denom, denom2, e0, e1, energy_coeff, energy_diff, &
891         energy_new, energy_old, eps_skip_gradients, fval, g0, g1, grad_norm, grad_norm_frob, &
892         line_search_error, localiz_coeff, localization_obj_function, next_step_size_guess, &
893         penalty_amplitude, penalty_func_new, spin_factor, step_size, t1, t2, tempreal
894      REAL(kind=dp), ALLOCATABLE, DIMENSION(:)           :: grad_norm_spin, &
895                                                            penalty_occ_vol_g_prefactor, &
896                                                            penalty_occ_vol_h_prefactor
897      TYPE(cell_type), POINTER                           :: cell
898      TYPE(cp_logger_type), POINTER                      :: logger
899      TYPE(dbcsr_p_type), DIMENSION(:), POINTER          :: qs_matrix_s
900      TYPE(dbcsr_p_type), DIMENSION(:, :), POINTER       :: op_sm_set_almo, op_sm_set_qs
901      TYPE(dbcsr_type), ALLOCATABLE, DIMENSION(:) :: FTsiginv, grad, m_sig_sqrti_ii, m_t_in_local, &
902         m_theta, prec_vv, prev_grad, prev_minus_prec_grad, prev_step, siginvTFTsiginv, ST, step, &
903         STsiginv_0, tempNOcc, tempNOcc_1, tempOccOcc
904      TYPE(domain_submatrix_type), ALLOCATABLE, &
905         DIMENSION(:, :)                                 :: bad_modes_projector_down, domain_r_down
906
907      CALL timeset(routineN, handle)
908
909      my_special_case = xalmo_case_normal
910      IF (PRESENT(special_case)) my_special_case = special_case
911
912      ! get a useful output_unit
913      logger => cp_get_default_logger()
914      IF (logger%para_env%ionode) THEN
915         unit_nr = cp_logger_get_default_unit_nr(logger, local=.TRUE.)
916      ELSE
917         unit_nr = -1
918      ENDIF
919
920      nspins = almo_scf_env%nspins
921
922      ! if unprojected XALMOs are optimized
923      ! then we must use the "blissful_neglect" procedure
924      blissful_neglect = .FALSE.
925      IF (my_special_case .EQ. xalmo_case_normal .AND. .NOT. assume_t0_q0x) THEN
926         blissful_neglect = .TRUE.
927      ENDIF
928
929      IF (unit_nr > 0) THEN
930         WRITE (unit_nr, *)
931         SELECT CASE (my_special_case)
932         CASE (xalmo_case_block_diag)
933            WRITE (unit_nr, '(T2,A,A,A)') REPEAT("-", 20), &
934               " Optimization of block-diagonal ALMOs ", REPEAT("-", 21)
935         CASE (xalmo_case_fully_deloc)
936            WRITE (unit_nr, '(T2,A,A,A)') REPEAT("-", 20), &
937               " Optimization of fully delocalized MOs ", REPEAT("-", 20)
938         CASE (xalmo_case_normal)
939            IF (blissful_neglect) THEN
940               WRITE (unit_nr, '(T2,A,A,A)') REPEAT("-", 25), &
941                  " LCP optimization of XALMOs ", REPEAT("-", 26)
942            ELSE
943               WRITE (unit_nr, '(T2,A,A,A)') REPEAT("-", 27), &
944                  " Optimization of XALMOs ", REPEAT("-", 28)
945            ENDIF
946         END SELECT
947         WRITE (unit_nr, *)
948         WRITE (unit_nr, '(T2,A13,A6,A23,A14,A14,A9)') "Method", "Iter", &
949            "Objective Function", "Change", "Convergence", "Time"
950         WRITE (unit_nr, '(T2,A)') REPEAT("-", 79)
951      ENDIF
952
953      ! set local parameters using developer's keywords
954      ! RZK-warning: change to normal keywords later
955      optimize_theta = almo_scf_env%logical05
956      eps_skip_gradients = almo_scf_env%real01
957
958      ! penalty amplitude adjusts the strength of volume conservation
959      energy_coeff = 1.0_dp !optimizer%opt_penalty%energy_coeff
960      localiz_coeff = 0.0_dp !optimizer%opt_penalty%occ_loc_coeff
961      penalty_amplitude = 0.0_dp !optimizer%opt_penalty%occ_vol_coeff
962      penalty_occ_vol = .FALSE. !( optimizer%opt_penalty%occ_vol_method &
963      !.NE. penalty_type_none .AND. my_special_case .EQ. xalmo_case_fully_deloc )
964      penalty_occ_local = .FALSE. !( optimizer%opt_penalty%occ_loc_method &
965      !.NE. penalty_type_none .AND. my_special_case .EQ. xalmo_case_fully_deloc )
966      normalize_orbitals = penalty_occ_vol .OR. penalty_occ_local
967      ALLOCATE (penalty_occ_vol_g_prefactor(nspins))
968      ALLOCATE (penalty_occ_vol_h_prefactor(nspins))
969      penalty_occ_vol_g_prefactor(:) = 0.0_dp
970      penalty_occ_vol_h_prefactor(:) = 0.0_dp
971      penalty_func_new = 0.0_dp
972
973      ! preconditioner control
974      prec_type = optimizer%preconditioner
975
976      ! control of the line search
977      fixed_line_search_niter = 0 ! init to zero, change when eps is small enough
978
979      IF (nspins == 1) THEN
980         spin_factor = 2.0_dp
981      ELSE
982         spin_factor = 1.0_dp
983      ENDIF
984
985      ALLOCATE (grad_norm_spin(nspins))
986      ALLOCATE (nocc(nspins))
987
988      ! create a local copy of matrix_t_in because
989      ! matrix_t_in and matrix_t_out can be the same matrix
990      ! we need to make sure data in matrix_t_in is intact
991      ! after we start writing to matrix_t_out
992      ALLOCATE (m_t_in_local(nspins))
993      DO ispin = 1, nspins
994         CALL dbcsr_create(m_t_in_local(ispin), &
995                           template=matrix_t_in(ispin), &
996                           matrix_type=dbcsr_type_no_symmetry)
997         CALL dbcsr_copy(m_t_in_local(ispin), matrix_t_in(ispin))
998      ENDDO
999
1000      ! m_theta contains a set of variational parameters
1001      ! that define one-electron orbitals (simple, projected, etc.)
1002      ALLOCATE (m_theta(nspins))
1003      DO ispin = 1, nspins
1004         CALL dbcsr_create(m_theta(ispin), &
1005                           template=matrix_t_out(ispin), &
1006                           matrix_type=dbcsr_type_no_symmetry)
1007      ENDDO
1008
1009      ! Compute localization matrices
1010      IF (penalty_occ_local) THEN
1011
1012         CALL get_qs_env(qs_env=qs_env, &
1013                         matrix_s=qs_matrix_s, &
1014                         cell=cell)
1015
1016         IF (cell%orthorhombic) THEN
1017            dim_op = 3
1018         ELSE
1019            dim_op = 6
1020         END IF
1021         ALLOCATE (weights(6))
1022         weights = 0.0_dp
1023
1024         CALL initialize_weights(cell, weights)
1025
1026         ALLOCATE (op_sm_set_qs(2, dim_op))
1027         ALLOCATE (op_sm_set_almo(2, dim_op))
1028
1029         DO idim0 = 1, dim_op
1030            DO reim = 1, SIZE(op_sm_set_qs, 1)
1031               NULLIFY (op_sm_set_qs(reim, idim0)%matrix)
1032               ALLOCATE (op_sm_set_qs(reim, idim0)%matrix)
1033               CALL dbcsr_copy(op_sm_set_qs(reim, idim0)%matrix, qs_matrix_s(1)%matrix, &
1034                             name="almo_scf_env%op_sm_"//TRIM(ADJUSTL(cp_to_string(reim)))//"-"//TRIM(ADJUSTL(cp_to_string(idim0))))
1035               CALL dbcsr_set(op_sm_set_qs(reim, idim0)%matrix, 0.0_dp)
1036               NULLIFY (op_sm_set_almo(reim, idim0)%matrix)
1037               ALLOCATE (op_sm_set_almo(reim, idim0)%matrix)
1038               CALL dbcsr_copy(op_sm_set_almo(reim, idim0)%matrix, almo_scf_env%matrix_s(1), &
1039                             name="almo_scf_env%op_sm_"//TRIM(ADJUSTL(cp_to_string(reim)))//"-"//TRIM(ADJUSTL(cp_to_string(idim0))))
1040               CALL dbcsr_set(op_sm_set_almo(reim, idim0)%matrix, 0.0_dp)
1041            ENDDO
1042         END DO
1043
1044         CALL compute_berry_operator(qs_env, cell, op_sm_set_qs, dim_op)
1045
1046         !CALL matrix_qs_to_almo(op_sm_set_qs, op_sm_set_almo, &
1047         !                       almo_scf_env%mat_distr_aos, .FALSE.)
1048
1049      ENDIF
1050
1051      ! create initial guess from the initial orbitals
1052      CALL xalmo_initial_guess(m_guess=m_theta, &
1053                               m_t_in=m_t_in_local, &
1054                               m_t0=almo_scf_env%matrix_t_blk, &
1055                               m_quench_t=quench_t, &
1056                               m_overlap=almo_scf_env%matrix_s(1), &
1057                               m_sigma_tmpl=almo_scf_env%matrix_sigma_inv, &
1058                               nspins=nspins, &
1059                               xalmo_history=almo_scf_env%xalmo_history, &
1060                               assume_t0_q0x=assume_t0_q0x, &
1061                               optimize_theta=optimize_theta, &
1062                               envelope_amplitude=almo_scf_env%envelope_amplitude, &
1063                               eps_filter=almo_scf_env%eps_filter, &
1064                               order_lanczos=almo_scf_env%order_lanczos, &
1065                               eps_lanczos=almo_scf_env%eps_lanczos, &
1066                               max_iter_lanczos=almo_scf_env%max_iter_lanczos, &
1067                               nocc_of_domain=almo_scf_env%nocc_of_domain)
1068
1069      ndomains = almo_scf_env%ndomains
1070      ALLOCATE (domain_r_down(ndomains, nspins))
1071      CALL init_submatrices(domain_r_down)
1072      ALLOCATE (bad_modes_projector_down(ndomains, nspins))
1073      CALL init_submatrices(bad_modes_projector_down)
1074
1075      ALLOCATE (prec_vv(nspins))
1076      ALLOCATE (siginvTFTsiginv(nspins))
1077      ALLOCATE (STsiginv_0(nspins))
1078      ALLOCATE (FTsiginv(nspins))
1079      ALLOCATE (ST(nspins))
1080      ALLOCATE (prev_grad(nspins))
1081      ALLOCATE (grad(nspins))
1082      ALLOCATE (prev_step(nspins))
1083      ALLOCATE (step(nspins))
1084      ALLOCATE (prev_minus_prec_grad(nspins))
1085      ALLOCATE (m_sig_sqrti_ii(nspins))
1086      ALLOCATE (tempNOcc(nspins))
1087      ALLOCATE (tempNOcc_1(nspins))
1088      ALLOCATE (tempOccOcc(nspins))
1089      DO ispin = 1, nspins
1090
1091         ! init temporary storage
1092         CALL dbcsr_create(prec_vv(ispin), &
1093                           template=almo_scf_env%matrix_ks(ispin), &
1094                           matrix_type=dbcsr_type_no_symmetry)
1095         CALL dbcsr_create(siginvTFTsiginv(ispin), &
1096                           template=almo_scf_env%matrix_sigma(ispin), &
1097                           matrix_type=dbcsr_type_no_symmetry)
1098         CALL dbcsr_create(STsiginv_0(ispin), &
1099                           template=matrix_t_out(ispin), &
1100                           matrix_type=dbcsr_type_no_symmetry)
1101         CALL dbcsr_create(FTsiginv(ispin), &
1102                           template=matrix_t_out(ispin), &
1103                           matrix_type=dbcsr_type_no_symmetry)
1104         CALL dbcsr_create(ST(ispin), &
1105                           template=matrix_t_out(ispin), &
1106                           matrix_type=dbcsr_type_no_symmetry)
1107         CALL dbcsr_create(prev_grad(ispin), &
1108                           template=matrix_t_out(ispin), &
1109                           matrix_type=dbcsr_type_no_symmetry)
1110         CALL dbcsr_create(grad(ispin), &
1111                           template=matrix_t_out(ispin), &
1112                           matrix_type=dbcsr_type_no_symmetry)
1113         CALL dbcsr_create(prev_step(ispin), &
1114                           template=matrix_t_out(ispin), &
1115                           matrix_type=dbcsr_type_no_symmetry)
1116         CALL dbcsr_create(step(ispin), &
1117                           template=matrix_t_out(ispin), &
1118                           matrix_type=dbcsr_type_no_symmetry)
1119         CALL dbcsr_create(prev_minus_prec_grad(ispin), &
1120                           template=matrix_t_out(ispin), &
1121                           matrix_type=dbcsr_type_no_symmetry)
1122         CALL dbcsr_create(m_sig_sqrti_ii(ispin), &
1123                           template=almo_scf_env%matrix_sigma_inv(ispin), &
1124                           matrix_type=dbcsr_type_no_symmetry)
1125         CALL dbcsr_create(tempNOcc(ispin), &
1126                           template=matrix_t_out(ispin), &
1127                           matrix_type=dbcsr_type_no_symmetry)
1128         CALL dbcsr_create(tempNOcc_1(ispin), &
1129                           template=matrix_t_out(ispin), &
1130                           matrix_type=dbcsr_type_no_symmetry)
1131         CALL dbcsr_create(tempOccOcc(ispin), &
1132                           template=almo_scf_env%matrix_sigma_inv(ispin), &
1133                           matrix_type=dbcsr_type_no_symmetry)
1134
1135         CALL dbcsr_set(step(ispin), 0.0_dp)
1136         CALL dbcsr_set(prev_step(ispin), 0.0_dp)
1137
1138         CALL dbcsr_get_info(almo_scf_env%matrix_sigma_inv(ispin), &
1139                             nfullrows_total=nocc(ispin))
1140
1141         ! invert S domains if necessary
1142         ! Note: domains for alpha and beta electrons might be different
1143         ! that is why the inversion of the AO overlap is inside the spin loop
1144         IF (my_special_case .EQ. xalmo_case_normal) THEN
1145            CALL construct_domain_s_inv( &
1146               matrix_s=almo_scf_env%matrix_s(1), &
1147               subm_s_inv=almo_scf_env%domain_s_inv(:, ispin), &
1148               dpattern=quench_t(ispin), &
1149               map=almo_scf_env%domain_map(ispin), &
1150               node_of_domain=almo_scf_env%cpu_of_domain)
1151
1152            CALL construct_domain_s_sqrt( &
1153               matrix_s=almo_scf_env%matrix_s(1), &
1154               subm_s_sqrt=almo_scf_env%domain_s_sqrt(:, ispin), &
1155               subm_s_sqrt_inv=almo_scf_env%domain_s_sqrt_inv(:, ispin), &
1156               dpattern=almo_scf_env%quench_t(ispin), &
1157               map=almo_scf_env%domain_map(ispin), &
1158               node_of_domain=almo_scf_env%cpu_of_domain)
1159
1160         ENDIF
1161
1162         IF (assume_t0_q0x) THEN
1163
1164            ! save S.T_0.siginv_0
1165            IF (my_special_case .EQ. xalmo_case_fully_deloc) THEN
1166               CALL dbcsr_multiply("N", "N", 1.0_dp, &
1167                                   almo_scf_env%matrix_s(1), &
1168                                   almo_scf_env%matrix_t_blk(ispin), &
1169                                   0.0_dp, ST(ispin), &
1170                                   filter_eps=almo_scf_env%eps_filter)
1171               CALL dbcsr_multiply("N", "N", 1.0_dp, &
1172                                   ST(ispin), &
1173                                   almo_scf_env%matrix_sigma_inv_0deloc(ispin), &
1174                                   0.0_dp, STsiginv_0(ispin), &
1175                                   filter_eps=almo_scf_env%eps_filter)
1176            ENDIF
1177
1178            ! construct domain-projector
1179            IF (my_special_case .EQ. xalmo_case_normal) THEN
1180               CALL construct_domain_r_down( &
1181                  matrix_t=almo_scf_env%matrix_t_blk(ispin), &
1182                  matrix_sigma_inv=almo_scf_env%matrix_sigma_inv(ispin), &
1183                  matrix_s=almo_scf_env%matrix_s(1), &
1184                  subm_r_down=domain_r_down(:, ispin), &
1185                  dpattern=quench_t(ispin), &
1186                  map=almo_scf_env%domain_map(ispin), &
1187                  node_of_domain=almo_scf_env%cpu_of_domain, &
1188                  filter_eps=almo_scf_env%eps_filter)
1189            ENDIF
1190
1191         ENDIF ! assume_t0_q0x
1192
1193         ! localization functional
1194         IF (penalty_occ_local) THEN
1195
1196            ! compute S.R0.B.R0.S
1197            CALL dbcsr_multiply("N", "N", 1.0_dp, &
1198                                almo_scf_env%matrix_s(1), &
1199                                matrix_t_in(ispin), &
1200                                0.0_dp, tempNOcc(ispin), &
1201                                filter_eps=almo_scf_env%eps_filter)
1202            CALL dbcsr_multiply("N", "N", 1.0_dp, &
1203                                tempNOcc(ispin), &
1204                                almo_scf_env%matrix_sigma_inv(ispin), &
1205                                0.0_dp, tempNOCC_1(ispin), &
1206                                filter_eps=almo_scf_env%eps_filter)
1207
1208            DO idim0 = 1, SIZE(op_sm_set_qs, 2) ! this loop is over miller ind
1209               DO reim = 1, SIZE(op_sm_set_qs, 1) ! this loop is over Re/Im
1210
1211                  CALL matrix_qs_to_almo(op_sm_set_qs(reim, idim0)%matrix, op_sm_set_almo(reim, idim0)%matrix, &
1212                                         almo_scf_env%mat_distr_aos, .FALSE.)
1213
1214                  CALL dbcsr_multiply("N", "N", 1.0_dp, &
1215                                      op_sm_set_almo(reim, idim0)%matrix, &
1216                                      matrix_t_in(ispin), &
1217                                      0.0_dp, tempNOcc(ispin), &
1218                                      filter_eps=almo_scf_env%eps_filter)
1219
1220                  CALL dbcsr_multiply("T", "N", 1.0_dp, &
1221                                      matrix_t_in(ispin), &
1222                                      tempNOcc(ispin), &
1223                                      0.0_dp, tempOccOcc(ispin), &
1224                                      filter_eps=almo_scf_env%eps_filter)
1225
1226                  CALL dbcsr_multiply("N", "N", 1.0_dp, &
1227                                      tempNOCC_1(ispin), &
1228                                      tempOccOcc(ispin), &
1229                                      0.0_dp, tempNOcc(ispin), &
1230                                      filter_eps=almo_scf_env%eps_filter)
1231
1232                  CALL dbcsr_multiply("N", "T", 1.0_dp, &
1233                                      tempNOcc(ispin), &
1234                                      tempNOcc_1(ispin), &
1235                                      0.0_dp, op_sm_set_almo(reim, idim0)%matrix, &
1236                                      filter_eps=almo_scf_env%eps_filter)
1237
1238               ENDDO
1239            ENDDO ! end loop over idim0
1240
1241         ENDIF !penalty_occ_local
1242
1243      ENDDO ! ispin
1244
1245      ! start the outer SCF loop
1246      outer_max_iter = optimizer%max_iter_outer_loop
1247      outer_prepare_to_exit = .FALSE.
1248      outer_iteration = 0
1249      grad_norm = 0.0_dp
1250      grad_norm_frob = 0.0_dp
1251      use_guess = .FALSE.
1252
1253      DO
1254
1255         ! start the inner SCF loop
1256         max_iter = optimizer%max_iter
1257         prepare_to_exit = .FALSE.
1258         line_search = .FALSE.
1259         converged = .FALSE.
1260         iteration = 0
1261         cg_iteration = 0
1262         line_search_iteration = 0
1263         energy_new = 0.0_dp
1264         energy_old = 0.0_dp
1265         energy_diff = 0.0_dp
1266         localization_obj_function = 0.0_dp
1267         line_search_error = 0.0_dp
1268
1269         t1 = m_walltime()
1270
1271         DO
1272
1273            just_started = (iteration .EQ. 0) .AND. (outer_iteration .EQ. 0)
1274
1275            CALL main_var_to_xalmos_and_loss_func( &
1276               almo_scf_env=almo_scf_env, &
1277               qs_env=qs_env, &
1278               m_main_var_in=m_theta, &
1279               m_t_out=matrix_t_out, &
1280               m_sig_sqrti_ii_out=m_sig_sqrti_ii, &
1281               energy_out=energy_new, &
1282               penalty_out=penalty_func_new, &
1283               m_FTsiginv_out=FTsiginv, &
1284               m_siginvTFTsiginv_out=siginvTFTsiginv, &
1285               m_ST_out=ST, &
1286               m_STsiginv0_in=STsiginv_0, &
1287               m_quench_t_in=quench_t, &
1288               domain_r_down_in=domain_r_down, &
1289               assume_t0_q0x=assume_t0_q0x, &
1290               just_started=just_started, &
1291               optimize_theta=optimize_theta, &
1292               normalize_orbitals=normalize_orbitals, &
1293               perturbation_only=perturbation_only, &
1294               do_penalty=penalty_occ_vol, &
1295               special_case=my_special_case)
1296            IF (penalty_occ_vol) THEN
1297               ! this is not pure energy anymore
1298               energy_new = energy_new + penalty_func_new
1299            ENDIF
1300            DO ispin = 1, nspins
1301               IF (penalty_occ_vol) THEN
1302                  penalty_occ_vol_g_prefactor(ispin) = &
1303                     -2.0_dp*penalty_amplitude*spin_factor*nocc(ispin)
1304                  penalty_occ_vol_h_prefactor(ispin) = 0.0_dp
1305               ENDIF
1306            ENDDO
1307
1308            localization_obj_function = 0.0_dp
1309            ! RZK-warning: This block must be combined with the loss function
1310            IF (penalty_occ_local) THEN
1311               DO ispin = 1, nspins
1312
1313                  ! LzL insert localization penalty
1314                  localization_obj_function = 0.0_dp
1315                  CALL dbcsr_get_info(almo_scf_env%matrix_sigma_inv(ispin), nfullrows_total=nmo)
1316                  ALLOCATE (z2(nmo))
1317                  ALLOCATE (reim_diag(nmo))
1318
1319                  CALL dbcsr_get_info(tempOccOcc(ispin), group=para_group)
1320
1321                  DO idim0 = 1, SIZE(op_sm_set_qs, 2) ! this loop is over miller ind
1322
1323                     z2(:) = 0.0_dp
1324
1325                     DO reim = 1, SIZE(op_sm_set_qs, 1) ! this loop is over Re/Im
1326
1327                        !CALL matrix_qs_to_almo(op_sm_set_qs(reim, idim0)%matrix, op_sm_set_almo(reim, idim0)%matrix, &
1328                        !                       almo_scf_env%mat_distr_aos, .FALSE.)
1329                        CALL dbcsr_multiply("N", "N", 1.0_dp, &
1330                                            op_sm_set_almo(reim, idim0)%matrix, &
1331                                            matrix_t_out(ispin), &
1332                                            0.0_dp, tempNOcc(ispin), &
1333                                            filter_eps=almo_scf_env%eps_filter)
1334                        !warning - save time by computing only the diagonal elements
1335                        CALL dbcsr_multiply("T", "N", 1.0_dp, &
1336                                            matrix_t_out(ispin), &
1337                                            tempNOcc(ispin), &
1338                                            0.0_dp, tempOccOcc(ispin), &
1339                                            filter_eps=almo_scf_env%eps_filter)
1340
1341                        reim_diag = 0.0_dp
1342                        CALL dbcsr_get_diag(tempOccOcc(ispin), reim_diag)
1343                        CALL mp_sum(reim_diag, para_group)
1344                        z2(:) = z2(:) + reim_diag(:)*reim_diag(:)
1345
1346                     ENDDO
1347
1348                     DO ielem = 1, nmo
1349                        SELECT CASE (2) ! allows for selection of different spread functionals
1350                        CASE (1) ! functional =  -W_I * log( |z_I|^2 )
1351                           fval = -weights(idim0)*LOG(ABS(z2(ielem)))
1352                        CASE (2) ! functional =  W_I * ( 1 - |z_I|^2 )
1353                           fval = weights(idim0) - weights(idim0)*ABS(z2(ielem))
1354                        CASE (3) ! functional =  W_I * ( 1 - |z_I| )
1355                           fval = weights(idim0) - weights(idim0)*SQRT(ABS(z2(ielem)))
1356                        END SELECT
1357                        localization_obj_function = localization_obj_function + fval
1358                     ENDDO
1359
1360                  ENDDO ! end loop over idim0
1361
1362                  DEALLOCATE (z2)
1363                  DEALLOCATE (reim_diag)
1364
1365                  energy_new = energy_new + localiz_coeff*localization_obj_function
1366
1367               ENDDO ! ispin
1368            ENDIF ! penalty_occ_local
1369
1370            DO ispin = 1, nspins
1371
1372               IF (just_started .AND. almo_mathematica) THEN
1373                  IF (ispin .GT. 1) CPWARN("Mathematica files will be overwritten")
1374                  CALL print_mathematica_matrix(almo_scf_env%matrix_s(1), "matrixS.dat")
1375                  CALL print_mathematica_matrix(almo_scf_env%matrix_ks(ispin), "matrixF.dat")
1376                  CALL print_mathematica_matrix(matrix_t_out(ispin), "matrixT.dat")
1377                  CALL print_mathematica_matrix(quench_t(ispin), "matrixQ.dat")
1378               ENDIF
1379
1380               ! save the previous gradient to compute beta
1381               ! do it only if the previous grad was computed
1382               ! for .NOT.line_search
1383               IF (line_search_iteration .EQ. 0 .AND. iteration .NE. 0) &
1384                  CALL dbcsr_copy(prev_grad(ispin), grad(ispin))
1385
1386            ENDDO ! ispin
1387
1388            ! compute the energy gradient if necessary
1389            skip_grad = (iteration .GT. 0 .AND. &
1390                         fixed_line_search_niter .NE. 0 .AND. &
1391                         line_search_iteration .NE. fixed_line_search_niter)
1392
1393            IF (.NOT. skip_grad) THEN
1394
1395               DO ispin = 1, nspins
1396
1397                  CALL compute_gradient( &
1398                     m_grad_out=grad(ispin), &
1399                     m_ks=almo_scf_env%matrix_ks(ispin), &
1400                     m_s=almo_scf_env%matrix_s(1), &
1401                     m_t=matrix_t_out(ispin), &
1402                     m_t0=almo_scf_env%matrix_t_blk(ispin), &
1403                     m_siginv=almo_scf_env%matrix_sigma_inv(ispin), &
1404                     m_quench_t=quench_t(ispin), &
1405                     m_FTsiginv=FTsiginv(ispin), &
1406                     m_siginvTFTsiginv=siginvTFTsiginv(ispin), &
1407                     m_ST=ST(ispin), &
1408                     m_STsiginv0=STsiginv_0(ispin), &
1409                     m_theta=m_theta(ispin), &
1410                     m_sig_sqrti_ii=m_sig_sqrti_ii(ispin), &
1411                     domain_s_inv=almo_scf_env%domain_s_inv(:, ispin), &
1412                     domain_r_down=domain_r_down(:, ispin), &
1413                     cpu_of_domain=almo_scf_env%cpu_of_domain, &
1414                     domain_map=almo_scf_env%domain_map(ispin), &
1415                     assume_t0_q0x=assume_t0_q0x, &
1416                     optimize_theta=optimize_theta, &
1417                     normalize_orbitals=normalize_orbitals, &
1418                     penalty_occ_vol=penalty_occ_vol, &
1419                     penalty_occ_vol_prefactor=penalty_occ_vol_g_prefactor(ispin), &
1420                     envelope_amplitude=almo_scf_env%envelope_amplitude, &
1421                     eps_filter=almo_scf_env%eps_filter, &
1422                     spin_factor=spin_factor, &
1423                     special_case=my_special_case, &
1424                     penalty_occ_local=penalty_occ_local, &
1425                     op_sm_set=op_sm_set_almo, &
1426                     weights=weights, &
1427                     energy_coeff=energy_coeff, &
1428                     localiz_coeff=localiz_coeff)
1429
1430               ENDDO ! ispin
1431
1432            ENDIF ! skip_grad
1433
1434            ! if unprojected XALMOs are optimized then compute both
1435            ! HessianInv/preconditioner and the "bad-mode" projector
1436
1437            IF (blissful_neglect) THEN
1438               DO ispin = 1, nspins
1439                  !compute the prec only for the first step,
1440                  !but project the gradient every step
1441                  IF (iteration .EQ. 0) THEN
1442                     CALL compute_preconditioner( &
1443                        domain_prec_out=almo_scf_env%domain_preconditioner(:, ispin), &
1444                        bad_modes_projector_down_out=bad_modes_projector_down(:, ispin), &
1445                        m_prec_out=prec_vv(ispin), &
1446                        m_ks=almo_scf_env%matrix_ks(ispin), &
1447                        m_s=almo_scf_env%matrix_s(1), &
1448                        m_siginv=almo_scf_env%matrix_sigma_inv(ispin), &
1449                        m_quench_t=quench_t(ispin), &
1450                        m_FTsiginv=FTsiginv(ispin), &
1451                        m_siginvTFTsiginv=siginvTFTsiginv(ispin), &
1452                        m_ST=ST(ispin), &
1453                        para_env=almo_scf_env%para_env, &
1454                        blacs_env=almo_scf_env%blacs_env, &
1455                        nocc_of_domain=almo_scf_env%nocc_of_domain(:, ispin), &
1456                        domain_s_inv=almo_scf_env%domain_s_inv(:, ispin), &
1457                        domain_s_inv_half=almo_scf_env%domain_s_sqrt_inv(:, ispin), &
1458                        domain_s_half=almo_scf_env%domain_s_sqrt(:, ispin), &
1459                        domain_r_down=domain_r_down(:, ispin), &
1460                        cpu_of_domain=almo_scf_env%cpu_of_domain, &
1461                        domain_map=almo_scf_env%domain_map(ispin), &
1462                        assume_t0_q0x=assume_t0_q0x, &
1463                        penalty_occ_vol=penalty_occ_vol, &
1464                        penalty_occ_vol_prefactor=penalty_occ_vol_g_prefactor(ispin), &
1465                        eps_filter=almo_scf_env%eps_filter, &
1466                        neg_thr=optimizer%neglect_threshold, &
1467                        spin_factor=spin_factor, &
1468                        skip_inversion=.FALSE., &
1469                        special_case=my_special_case)
1470                  ENDIF
1471                  ! remove bad modes from the gradient
1472                  CALL apply_domain_operators( &
1473                     matrix_in=grad(ispin), &
1474                     matrix_out=grad(ispin), &
1475                     operator1=almo_scf_env%domain_s_inv(:, ispin), &
1476                     operator2=bad_modes_projector_down(:, ispin), &
1477                     dpattern=quench_t(ispin), &
1478                     map=almo_scf_env%domain_map(ispin), &
1479                     node_of_domain=almo_scf_env%cpu_of_domain, &
1480                     my_action=1, &
1481                     filter_eps=almo_scf_env%eps_filter)
1482
1483               ENDDO ! ispin
1484
1485            ENDIF ! blissful neglect
1486
1487            ! check convergence and other exit criteria
1488            DO ispin = 1, nspins
1489               CALL dbcsr_norm(grad(ispin), dbcsr_norm_maxabsnorm, &
1490                               norm_scalar=grad_norm_spin(ispin))
1491            ENDDO ! ispin
1492            grad_norm = MAXVAL(grad_norm_spin)
1493
1494            converged = (grad_norm .LE. optimizer%eps_error)
1495            IF (converged .OR. (iteration .GE. max_iter)) THEN
1496               prepare_to_exit = .TRUE.
1497            ENDIF
1498            ! if early stopping is on do at least one iteration
1499            IF (optimizer%early_stopping_on .AND. just_started) &
1500               prepare_to_exit = .FALSE.
1501
1502            IF (grad_norm .LT. almo_scf_env%eps_prev_guess) &
1503               use_guess = .TRUE.
1504
1505            ! it is not time to exit just yet
1506            IF (.NOT. prepare_to_exit) THEN
1507
1508               ! check the gradient along the step direction
1509               ! and decide whether to switch to the line-search mode
1510               ! do not do this in the first iteration
1511               IF (iteration .NE. 0) THEN
1512
1513                  IF (fixed_line_search_niter .EQ. 0) THEN
1514
1515                     ! enforce at least one line search
1516                     ! without even checking the error
1517                     IF (.NOT. line_search) THEN
1518
1519                        line_search = .TRUE.
1520                        line_search_iteration = line_search_iteration + 1
1521
1522                     ELSE
1523
1524                        ! check the line-search error and decide whether to
1525                        ! change the direction
1526                        line_search_error = 0.0_dp
1527                        denom = 0.0_dp
1528                        denom2 = 0.0_dp
1529
1530                        DO ispin = 1, nspins
1531
1532                           CALL dbcsr_dot(grad(ispin), step(ispin), tempreal)
1533                           line_search_error = line_search_error + tempreal
1534                           CALL dbcsr_dot(grad(ispin), grad(ispin), tempreal)
1535                           denom = denom + tempreal
1536                           CALL dbcsr_dot(step(ispin), step(ispin), tempreal)
1537                           denom2 = denom2 + tempreal
1538
1539                        ENDDO ! ispin
1540
1541                        ! cosine of the angle between the step and grad
1542                        ! (must be close to zero at convergence)
1543                        line_search_error = line_search_error/SQRT(denom)/SQRT(denom2)
1544
1545                        IF (ABS(line_search_error) .GT. optimizer%lin_search_eps_error) THEN
1546                           line_search = .TRUE.
1547                           line_search_iteration = line_search_iteration + 1
1548                        ELSE
1549                           line_search = .FALSE.
1550                           line_search_iteration = 0
1551                           IF (grad_norm .LT. eps_skip_gradients) THEN
1552                              fixed_line_search_niter = ABS(almo_scf_env%integer04)
1553                           ENDIF
1554                        ENDIF
1555
1556                     ENDIF
1557
1558                  ELSE ! decision for fixed_line_search_niter
1559
1560                     IF (.NOT. line_search) THEN
1561                        line_search = .TRUE.
1562                        line_search_iteration = line_search_iteration + 1
1563                     ELSE
1564                        IF (line_search_iteration .EQ. fixed_line_search_niter) THEN
1565                           line_search = .FALSE.
1566                           line_search_iteration = 0
1567                           line_search_iteration = line_search_iteration + 1
1568                        ENDIF
1569                     ENDIF
1570
1571                  ENDIF ! fixed_line_search_niter fork
1572
1573               ENDIF ! iteration.ne.0
1574
1575               IF (line_search) THEN
1576                  energy_diff = 0.0_dp
1577               ELSE
1578                  energy_diff = energy_new - energy_old
1579                  energy_old = energy_new
1580               ENDIF
1581
1582               ! update the step direction
1583               IF (.NOT. line_search) THEN
1584
1585                  !IF (unit_nr>0) THEN
1586                  !   WRITE(unit_nr,*) "....updating step direction...."
1587                  !ENDIF
1588
1589                  cg_iteration = cg_iteration + 1
1590
1591                  ! save the previous step
1592                  DO ispin = 1, nspins
1593                     CALL dbcsr_copy(prev_step(ispin), step(ispin))
1594                  ENDDO ! ispin
1595
1596                  ! compute the new step (apply preconditioner if available)
1597                  SELECT CASE (prec_type)
1598                  CASE (xalmo_prec_full)
1599
1600                     ! solving approximate Newton eq in the full (linearized) space
1601                     CALL newton_grad_to_step( &
1602                        optimizer=almo_scf_env%opt_xalmo_newton_pcg_solver, &
1603                        m_grad=grad(:), &
1604                        m_delta=step(:), &
1605                        m_s=almo_scf_env%matrix_s(:), &
1606                        m_ks=almo_scf_env%matrix_ks(:), &
1607                        m_siginv=almo_scf_env%matrix_sigma_inv(:), &
1608                        m_quench_t=quench_t(:), &
1609                        m_FTsiginv=FTsiginv(:), &
1610                        m_siginvTFTsiginv=siginvTFTsiginv(:), &
1611                        m_ST=ST(:), &
1612                        m_t=matrix_t_out(:), &
1613                        m_sig_sqrti_ii=m_sig_sqrti_ii(:), &
1614                        domain_s_inv=almo_scf_env%domain_s_inv(:, :), &
1615                        domain_r_down=domain_r_down(:, :), &
1616                        domain_map=almo_scf_env%domain_map(:), &
1617                        cpu_of_domain=almo_scf_env%cpu_of_domain, &
1618                        nocc_of_domain=almo_scf_env%nocc_of_domain(:, :), &
1619                        para_env=almo_scf_env%para_env, &
1620                        blacs_env=almo_scf_env%blacs_env, &
1621                        eps_filter=almo_scf_env%eps_filter, &
1622                        optimize_theta=optimize_theta, &
1623                        penalty_occ_vol=penalty_occ_vol, &
1624                        normalize_orbitals=normalize_orbitals, &
1625                        penalty_occ_vol_prefactor=penalty_occ_vol_g_prefactor(:), &
1626                        penalty_occ_vol_pf2=penalty_occ_vol_h_prefactor(:), &
1627                        special_case=my_special_case &
1628                        )
1629
1630                  CASE (xalmo_prec_domain)
1631
1632                     ! compute and invert preconditioner?
1633                     IF (.NOT. blissful_neglect .AND. &
1634                         ((just_started .AND. perturbation_only) .OR. &
1635                          (iteration .EQ. 0 .AND. (.NOT. perturbation_only))) &
1636                         ) THEN
1637
1638                        ! computing preconditioner
1639                        DO ispin = 1, nspins
1640                           CALL compute_preconditioner( &
1641                              domain_prec_out=almo_scf_env%domain_preconditioner(:, ispin), &
1642                              m_prec_out=prec_vv(ispin), &
1643                              m_ks=almo_scf_env%matrix_ks(ispin), &
1644                              m_s=almo_scf_env%matrix_s(1), &
1645                              m_siginv=almo_scf_env%matrix_sigma_inv(ispin), &
1646                              m_quench_t=quench_t(ispin), &
1647                              m_FTsiginv=FTsiginv(ispin), &
1648                              m_siginvTFTsiginv=siginvTFTsiginv(ispin), &
1649                              m_ST=ST(ispin), &
1650                              para_env=almo_scf_env%para_env, &
1651                              blacs_env=almo_scf_env%blacs_env, &
1652                              nocc_of_domain=almo_scf_env%nocc_of_domain(:, ispin), &
1653                              domain_s_inv=almo_scf_env%domain_s_inv(:, ispin), &
1654                              domain_r_down=domain_r_down(:, ispin), &
1655                              cpu_of_domain=almo_scf_env%cpu_of_domain, &
1656                              domain_map=almo_scf_env%domain_map(ispin), &
1657                              assume_t0_q0x=assume_t0_q0x, &
1658                              penalty_occ_vol=penalty_occ_vol, &
1659                              penalty_occ_vol_prefactor=penalty_occ_vol_g_prefactor(ispin), &
1660                              eps_filter=almo_scf_env%eps_filter, &
1661                              neg_thr=0.5_dp, &
1662                              spin_factor=spin_factor, &
1663                              skip_inversion=.FALSE., &
1664                              special_case=my_special_case)
1665                        ENDDO ! ispin
1666                     ENDIF ! compute_prec
1667
1668                     !IF (unit_nr>0) THEN
1669                     !   WRITE(unit_nr,*) "....applying precomputed preconditioner...."
1670                     !ENDIF
1671
1672                     IF (my_special_case .EQ. xalmo_case_block_diag .OR. &
1673                         my_special_case .EQ. xalmo_case_fully_deloc) THEN
1674
1675                        DO ispin = 1, nspins
1676
1677                           CALL dbcsr_multiply("N", "N", -1.0_dp, &
1678                                               prec_vv(ispin), &
1679                                               grad(ispin), &
1680                                               0.0_dp, step(ispin), &
1681                                               filter_eps=almo_scf_env%eps_filter)
1682
1683                        ENDDO ! ispin
1684
1685                     ELSE
1686
1687                        !!! RZK-warning Currently for non-theta only
1688                        IF (optimize_theta) THEN
1689                           CPABORT("theta is NYI")
1690                        ENDIF
1691
1692                        DO ispin = 1, nspins
1693
1694                           CALL apply_domain_operators( &
1695                              matrix_in=grad(ispin), &
1696                              matrix_out=step(ispin), &
1697                              operator1=almo_scf_env%domain_preconditioner(:, ispin), &
1698                              dpattern=quench_t(ispin), &
1699                              map=almo_scf_env%domain_map(ispin), &
1700                              node_of_domain=almo_scf_env%cpu_of_domain, &
1701                              my_action=0, &
1702                              filter_eps=almo_scf_env%eps_filter)
1703                           CALL dbcsr_scale(step(ispin), -1.0_dp)
1704
1705                           !CALL dbcsr_copy(m_tmp_no_3,&
1706                           !        quench_t(ispin))
1707                           !CALL dbcsr_function_of_elements(m_tmp_no_3,&
1708                           !        func=dbcsr_func_inverse,&
1709                           !        a0=0.0_dp,&
1710                           !        a1=1.0_dp)
1711                           !CALL dbcsr_copy(m_tmp_no_2,step)
1712                           !CALL dbcsr_hadamard_product(&
1713                           !        m_tmp_no_2,&
1714                           !        m_tmp_no_3,&
1715                           !        step)
1716                           !CALL dbcsr_copy(m_tmp_no_3,quench_t(ispin))
1717
1718                        ENDDO ! ispin
1719
1720                     ENDIF ! special case
1721
1722                  CASE (xalmo_prec_zero)
1723
1724                     ! no preconditioner
1725                     DO ispin = 1, nspins
1726
1727                        CALL dbcsr_copy(step(ispin), grad(ispin))
1728                        CALL dbcsr_scale(step(ispin), -1.0_dp)
1729
1730                     ENDDO ! ispin
1731
1732                  END SELECT ! preconditioner type fork
1733
1734                  ! check whether we need to reset conjugate directions
1735                  IF (iteration .EQ. 0) THEN
1736                     reset_conjugator = .TRUE.
1737                  ENDIF
1738
1739                  ! compute the conjugation coefficient - beta
1740                  IF (.NOT. reset_conjugator) THEN
1741
1742                     CALL compute_cg_beta( &
1743                        beta=beta, &
1744                        reset_conjugator=reset_conjugator, &
1745                        conjugator=optimizer%conjugator, &
1746                        grad=grad(:), &
1747                        prev_grad=prev_grad(:), &
1748                        step=step(:), &
1749                        prev_step=prev_step(:), &
1750                        prev_minus_prec_grad=prev_minus_prec_grad(:) &
1751                        )
1752
1753                  ENDIF
1754
1755                  IF (reset_conjugator) THEN
1756
1757                     beta = 0.0_dp
1758                     IF (unit_nr > 0 .AND. (.NOT. just_started)) THEN
1759                        WRITE (unit_nr, '(T2,A35)') "Re-setting conjugator to zero"
1760                     ENDIF
1761                     reset_conjugator = .FALSE.
1762
1763                  ENDIF
1764
1765                  ! save the preconditioned gradient (useful for beta)
1766                  DO ispin = 1, nspins
1767
1768                     CALL dbcsr_copy(prev_minus_prec_grad(ispin), step(ispin))
1769
1770                     !IF (unit_nr>0) THEN
1771                     !   WRITE(unit_nr,*) "....final beta....", beta
1772                     !ENDIF
1773
1774                     ! conjugate the step direction
1775                     CALL dbcsr_add(step(ispin), prev_step(ispin), 1.0_dp, beta)
1776
1777                  ENDDO ! ispin
1778
1779               ENDIF ! update the step direction
1780
1781               ! estimate the step size
1782               IF (.NOT. line_search) THEN
1783                  ! we just changed the direction and
1784                  ! we have only E and grad from the current step
1785                  ! it is not enouhg to compute step_size - just guess it
1786                  e0 = energy_new
1787                  g0 = 0.0_dp
1788                  DO ispin = 1, nspins
1789                     CALL dbcsr_dot(grad(ispin), step(ispin), tempreal)
1790                     g0 = g0 + tempreal
1791                  ENDDO ! ispin
1792                  IF (iteration .EQ. 0) THEN
1793                     step_size = optimizer%lin_search_step_size_guess
1794                  ELSE
1795                     IF (next_step_size_guess .LE. 0.0_dp) THEN
1796                        step_size = optimizer%lin_search_step_size_guess
1797                     ELSE
1798                        ! take the last value
1799                        step_size = next_step_size_guess*1.05_dp
1800                     ENDIF
1801                  ENDIF
1802                  !IF (unit_nr > 0) THEN
1803                  !   WRITE (unit_nr, '(A2,3F12.5)') &
1804                  !      "EG", e0, g0, step_size
1805                  !ENDIF
1806                  next_step_size_guess = step_size
1807               ELSE
1808                  IF (fixed_line_search_niter .EQ. 0) THEN
1809                     e1 = energy_new
1810                     g1 = 0.0_dp
1811                     DO ispin = 1, nspins
1812                        CALL dbcsr_dot(grad(ispin), step(ispin), tempreal)
1813                        g1 = g1 + tempreal
1814                     ENDDO ! ispin
1815                     ! we have accumulated some points along this direction
1816                     ! use only the most recent g0 (quadratic approximation)
1817                     appr_sec_der = (g1 - g0)/step_size
1818                     !IF (unit_nr > 0) THEN
1819                     !   WRITE (unit_nr, '(A2,7F12.5)') &
1820                     !      "EG", e0, e1, g0, g1, appr_sec_der, step_size, -g1/appr_sec_der
1821                     !ENDIF
1822                     step_size = -g1/appr_sec_der
1823                     e0 = e1
1824                     g0 = g1
1825                  ELSE
1826                     ! use e0, g0 and e1 to compute g1 and make a step
1827                     ! if the next iteration is also line_search
1828                     ! use e1 and the calculated g1 as e0 and g0
1829                     e1 = energy_new
1830                     appr_sec_der = 2.0*((e1 - e0)/step_size - g0)/step_size
1831                     g1 = appr_sec_der*step_size + g0
1832                     !IF (unit_nr > 0) THEN
1833                     !   WRITE (unit_nr, '(A2,7F12.5)') &
1834                     !      "EG", e0, e1, g0, g1, appr_sec_der, step_size, -g1/appr_sec_der
1835                     !ENDIF
1836                     !appr_sec_der=(g1-g0)/step_size
1837                     step_size = -g1/appr_sec_der
1838                     e0 = e1
1839                     g0 = g1
1840                  ENDIF
1841                  next_step_size_guess = next_step_size_guess + step_size
1842               ENDIF
1843
1844               ! update theta
1845               DO ispin = 1, nspins
1846                  CALL dbcsr_add(m_theta(ispin), step(ispin), 1.0_dp, step_size)
1847               ENDDO ! ispin
1848
1849            ENDIF ! not.prepare_to_exit
1850
1851            IF (line_search) THEN
1852               iter_type = "LS"
1853            ELSE
1854               iter_type = "CG"
1855            ENDIF
1856
1857            t2 = m_walltime()
1858            IF (unit_nr > 0) THEN
1859               iter_type = TRIM("ALMO SCF "//iter_type)
1860               WRITE (unit_nr, '(T2,A13,I6,F23.10,E14.5,F14.9,F9.2)') &
1861                  iter_type, iteration, &
1862                  energy_new, energy_diff, grad_norm, &
1863                  t2 - t1
1864               IF (penalty_occ_local .OR. penalty_occ_vol) THEN
1865                  WRITE (unit_nr, '(T2,A25,F23.10)') &
1866                     "Energy component:", (energy_new - penalty_func_new - localization_obj_function)
1867               ENDIF
1868               IF (penalty_occ_local) THEN
1869                  WRITE (unit_nr, '(T2,A25,F23.10)') &
1870                     "Localization component:", localization_obj_function
1871               ENDIF
1872               IF (penalty_occ_vol) THEN
1873                  WRITE (unit_nr, '(T2,A25,F23.10)') &
1874                     "Penalty component:", penalty_func_new
1875               ENDIF
1876            ENDIF
1877
1878            IF (my_special_case .EQ. xalmo_case_block_diag) THEN
1879               IF (penalty_occ_vol) THEN
1880                  almo_scf_env%almo_scf_energy = energy_new - penalty_func_new - localization_obj_function
1881               ELSE
1882                  almo_scf_env%almo_scf_energy = energy_new - localization_obj_function
1883               ENDIF
1884            ENDIF
1885
1886            t1 = m_walltime()
1887
1888            iteration = iteration + 1
1889            IF (prepare_to_exit) EXIT
1890
1891         ENDDO ! inner SCF loop
1892
1893         IF (converged .OR. (outer_iteration .GE. outer_max_iter)) THEN
1894            outer_prepare_to_exit = .TRUE.
1895         ENDIF
1896
1897         outer_iteration = outer_iteration + 1
1898         IF (outer_prepare_to_exit) EXIT
1899
1900      ENDDO ! outer SCF loop
1901
1902      DO ispin = 1, nspins
1903         IF (converged .AND. almo_mathematica) THEN
1904            IF (ispin .GT. 1) CPWARN("Mathematica files will be overwritten")
1905            CALL print_mathematica_matrix(matrix_t_out(ispin), "matrixTf.dat")
1906         ENDIF
1907      ENDDO ! ispin
1908
1909      ! post SCF-loop calculations
1910      IF (converged) THEN
1911
1912         CALL wrap_up_xalmo_scf( &
1913            qs_env=qs_env, &
1914            almo_scf_env=almo_scf_env, &
1915            perturbation_in=perturbation_only, &
1916            m_xalmo_in=matrix_t_out, &
1917            m_quench_in=quench_t, &
1918            energy_inout=energy_new)
1919
1920      ENDIF ! if converged
1921
1922      DO ispin = 1, nspins
1923         CALL dbcsr_release(prec_vv(ispin))
1924         CALL dbcsr_release(STsiginv_0(ispin))
1925         CALL dbcsr_release(ST(ispin))
1926         CALL dbcsr_release(FTsiginv(ispin))
1927         CALL dbcsr_release(siginvTFTsiginv(ispin))
1928         CALL dbcsr_release(prev_grad(ispin))
1929         CALL dbcsr_release(prev_step(ispin))
1930         CALL dbcsr_release(grad(ispin))
1931         CALL dbcsr_release(step(ispin))
1932         CALL dbcsr_release(prev_minus_prec_grad(ispin))
1933         CALL dbcsr_release(m_theta(ispin))
1934         CALL dbcsr_release(m_t_in_local(ispin))
1935         CALL dbcsr_release(m_sig_sqrti_ii(ispin))
1936         CALL release_submatrices(domain_r_down(:, ispin))
1937         CALL release_submatrices(bad_modes_projector_down(:, ispin))
1938         CALL dbcsr_release(tempNOcc(ispin))
1939         CALL dbcsr_release(tempNOcc_1(ispin))
1940         CALL dbcsr_release(tempOccOcc(ispin))
1941      ENDDO ! ispin
1942
1943      DEALLOCATE (tempNOcc)
1944      DEALLOCATE (tempNOcc_1)
1945      DEALLOCATE (tempOccOcc)
1946      DEALLOCATE (prec_vv)
1947      DEALLOCATE (siginvTFTsiginv)
1948      DEALLOCATE (STsiginv_0)
1949      DEALLOCATE (FTsiginv)
1950      DEALLOCATE (ST)
1951      DEALLOCATE (prev_grad)
1952      DEALLOCATE (grad)
1953      DEALLOCATE (prev_step)
1954      DEALLOCATE (step)
1955      DEALLOCATE (prev_minus_prec_grad)
1956      DEALLOCATE (m_sig_sqrti_ii)
1957
1958      DEALLOCATE (domain_r_down)
1959      DEALLOCATE (bad_modes_projector_down)
1960
1961      DEALLOCATE (penalty_occ_vol_g_prefactor)
1962      DEALLOCATE (penalty_occ_vol_h_prefactor)
1963      DEALLOCATE (grad_norm_spin)
1964      DEALLOCATE (nocc)
1965
1966      DEALLOCATE (m_theta, m_t_in_local)
1967      IF (penalty_occ_local) THEN
1968         DO idim0 = 1, dim_op
1969            DO reim = 1, SIZE(op_sm_set_qs, 1)
1970               DEALLOCATE (op_sm_set_qs(reim, idim0)%matrix)
1971               DEALLOCATE (op_sm_set_almo(reim, idim0)%matrix)
1972            ENDDO
1973         END DO
1974         DEALLOCATE (op_sm_set_qs)
1975         DEALLOCATE (op_sm_set_almo)
1976         DEALLOCATE (weights)
1977      ENDIF
1978
1979      IF (.NOT. converged .AND. .NOT. optimizer%early_stopping_on) THEN
1980         CPABORT("Optimization not converged! ")
1981      ENDIF
1982
1983      CALL timestop(handle)
1984
1985   END SUBROUTINE almo_scf_xalmo_pcg
1986
1987! **************************************************************************************************
1988!> \brief Optimization of NLMOs using PCG minimizers
1989!> \param qs_env ...
1990!> \param optimizer   controls the optimization algorithm
1991!> \param matrix_s - AO overlap (NAOs x NAOs)
1992!> \param matrix_mo_in - initial MOs (NAOs x NMOs)
1993!> \param matrix_mo_out - final MOs (NAOs x NMOs)
1994!> \param template_matrix_sigma - template (NMOs x NMOs)
1995!> \param overlap_determinant - the determinant of the MOs overlap
1996!> \param mat_distr_aos - info on the distribution of AOs
1997!> \param virtuals ...
1998!> \param eps_filter ...
1999!> \par History
2000!>       2018.10 created [Rustam Z Khaliullin]
2001!> \author Rustam Z Khaliullin
2002! **************************************************************************************************
2003   SUBROUTINE almo_scf_construct_nlmos(qs_env, optimizer, &
2004                                       matrix_s, matrix_mo_in, matrix_mo_out, &
2005                                       template_matrix_sigma, overlap_determinant, &
2006                                       mat_distr_aos, virtuals, eps_filter)
2007      TYPE(qs_environment_type), POINTER                 :: qs_env
2008      TYPE(optimizer_options_type), INTENT(INOUT)        :: optimizer
2009      TYPE(dbcsr_type), INTENT(IN)                       :: matrix_s
2010      TYPE(dbcsr_type), ALLOCATABLE, DIMENSION(:), &
2011         INTENT(INOUT)                                   :: matrix_mo_in, matrix_mo_out
2012      TYPE(dbcsr_type), ALLOCATABLE, DIMENSION(:), &
2013         INTENT(IN)                                      :: template_matrix_sigma
2014      REAL(KIND=dp), INTENT(INOUT)                       :: overlap_determinant
2015      INTEGER, INTENT(IN)                                :: mat_distr_aos
2016      LOGICAL, INTENT(IN)                                :: virtuals
2017      REAL(KIND=dp), INTENT(IN)                          :: eps_filter
2018
2019      CHARACTER(len=*), PARAMETER :: routineN = 'almo_scf_construct_nlmos', &
2020         routineP = moduleN//':'//routineN
2021
2022      CHARACTER(LEN=30)                                  :: iter_type, print_string
2023      INTEGER :: cg_iteration, dim_op, handle, iatom, idim0, isgf, ispin, iteration, &
2024         line_search_iteration, linear_search_type, max_iter, natom, ncol, nspins, &
2025         outer_iteration, outer_max_iter, para_group, prec_type, reim, unit_nr
2026      INTEGER, ALLOCATABLE, DIMENSION(:)                 :: first_sgf, last_sgf, nocc, nsgf
2027      LOGICAL                                            :: converged, d_bfgs, just_started, l_bfgs, &
2028                                                            line_search, outer_prepare_to_exit, &
2029                                                            prepare_to_exit, reset_conjugator
2030      REAL(KIND=dp) :: appr_sec_der, beta, bfgs_rho, bfgs_sum, denom, denom2, e0, e1, g0, g0sign, &
2031         g1, g1sign, grad_norm, line_search_error, localization_obj_function, &
2032         localization_obj_function_ispin, next_step_size_guess, obj_function_ispin, objf_diff, &
2033         objf_new, objf_old, penalty_amplitude, penalty_func_ispin, penalty_func_new, spin_factor, &
2034         step_size, t1, t2, tempreal
2035      REAL(KIND=dp), ALLOCATABLE, DIMENSION(:)           :: diagonal, grad_norm_spin, &
2036                                                            penalty_vol_prefactor, &
2037                                                            suggested_vol_penalty, weights
2038      TYPE(cell_type), POINTER                           :: cell
2039      TYPE(cp_logger_type), POINTER                      :: logger
2040      TYPE(dbcsr_p_type), DIMENSION(:), POINTER          :: qs_matrix_s
2041      TYPE(dbcsr_p_type), DIMENSION(:, :), POINTER       :: op_sm_set_almo, op_sm_set_qs
2042      TYPE(dbcsr_type), ALLOCATABLE, DIMENSION(:) :: approx_inv_hessian, bfgs_s, bfgs_y, grad, &
2043         m_S0, m_sig_sqrti_ii, m_siginv, m_sigma, m_t_mo_local, m_theta, m_theta_normalized, &
2044         prev_grad, prev_m_theta, prev_minus_prec_grad, prev_step, step, tempNOcc1, tempOccOcc1, &
2045         tempOccOcc2, tempOccOcc3
2046      TYPE(dbcsr_type), ALLOCATABLE, DIMENSION(:, :, :)  :: m_B0
2047      TYPE(lbfgs_history_type)                           :: nlmo_lbfgs_history
2048      TYPE(particle_type), DIMENSION(:), POINTER         :: particle_set
2049      TYPE(qs_kind_type), DIMENSION(:), POINTER          :: qs_kind_set
2050
2051      CALL timeset(routineN, handle)
2052
2053      ! get a useful output_unit
2054      logger => cp_get_default_logger()
2055      IF (logger%para_env%mepos == logger%para_env%source) THEN
2056         unit_nr = cp_logger_get_default_unit_nr(logger, local=.TRUE.)
2057      ELSE
2058         unit_nr = -1
2059      ENDIF
2060
2061      nspins = SIZE(matrix_mo_in)
2062
2063      IF (unit_nr > 0) THEN
2064         WRITE (unit_nr, *)
2065         IF (.NOT. virtuals) THEN
2066            WRITE (unit_nr, '(T2,A,A,A)') REPEAT("-", 24), &
2067               " Optimization of occupied NLMOs ", REPEAT("-", 23)
2068         ELSE
2069            WRITE (unit_nr, '(T2,A,A,A)') REPEAT("-", 24), &
2070               " Optimization of virtual NLMOs ", REPEAT("-", 24)
2071         ENDIF
2072         WRITE (unit_nr, *)
2073         WRITE (unit_nr, '(T2,A13,A6,A23,A14,A14,A9)') "Method", "Iter", &
2074            "Objective Function", "Change", "Convergence", "Time"
2075         WRITE (unit_nr, '(T2,A)') REPEAT("-", 79)
2076      ENDIF
2077
2078      NULLIFY (particle_set)
2079
2080      CALL get_qs_env(qs_env=qs_env, &
2081                      matrix_s=qs_matrix_s, &
2082                      cell=cell, &
2083                      particle_set=particle_set, &
2084                      qs_kind_set=qs_kind_set)
2085
2086      natom = SIZE(particle_set, 1)
2087      ALLOCATE (first_sgf(natom))
2088      ALLOCATE (last_sgf(natom))
2089      ALLOCATE (nsgf(natom))
2090      !   construction of
2091      CALL get_particle_set(particle_set, qs_kind_set, &
2092                            first_sgf=first_sgf, last_sgf=last_sgf, nsgf=nsgf)
2093
2094      ! m_theta contains a set of variational parameters
2095      ! that define one-electron orbitals
2096      ALLOCATE (m_theta(nspins))
2097      DO ispin = 1, nspins
2098         CALL dbcsr_create(m_theta(ispin), &
2099                           template=template_matrix_sigma(ispin), &
2100                           matrix_type=dbcsr_type_no_symmetry)
2101         ! create initial guess for the main variable - identity matrix
2102         CALL dbcsr_set(m_theta(ispin), 0.0_dp)
2103         CALL dbcsr_add_on_diag(m_theta(ispin), 1.0_dp)
2104      ENDDO
2105
2106      SELECT CASE (optimizer%opt_penalty%operator_type)
2107      CASE (op_loc_berry)
2108
2109         IF (cell%orthorhombic) THEN
2110            dim_op = 3
2111         ELSE
2112            dim_op = 6
2113         END IF
2114         ALLOCATE (weights(6))
2115         weights = 0.0_dp
2116         CALL initialize_weights(cell, weights)
2117         ALLOCATE (op_sm_set_qs(2, dim_op))
2118         ALLOCATE (op_sm_set_almo(2, dim_op))
2119         ! allocate space for T0^t.B.T0
2120         ALLOCATE (m_B0(2, dim_op, nspins))
2121         DO idim0 = 1, dim_op
2122            DO reim = 1, SIZE(op_sm_set_qs, 1)
2123               NULLIFY (op_sm_set_qs(reim, idim0)%matrix, op_sm_set_almo(reim, idim0)%matrix)
2124               ALLOCATE (op_sm_set_qs(reim, idim0)%matrix)
2125               ALLOCATE (op_sm_set_almo(reim, idim0)%matrix)
2126               CALL dbcsr_copy(op_sm_set_qs(reim, idim0)%matrix, qs_matrix_s(1)%matrix, &
2127                             name="almo_scf_env%op_sm_"//TRIM(ADJUSTL(cp_to_string(reim)))//"-"//TRIM(ADJUSTL(cp_to_string(idim0))))
2128               CALL dbcsr_set(op_sm_set_qs(reim, idim0)%matrix, 0.0_dp)
2129               CALL dbcsr_copy(op_sm_set_almo(reim, idim0)%matrix, matrix_s, &
2130                             name="almo_scf_env%op_sm_"//TRIM(ADJUSTL(cp_to_string(reim)))//"-"//TRIM(ADJUSTL(cp_to_string(idim0))))
2131               CALL dbcsr_set(op_sm_set_almo(reim, idim0)%matrix, 0.0_dp)
2132               DO ispin = 1, nspins
2133                  CALL dbcsr_create(m_B0(reim, idim0, ispin), &
2134                                    template=m_theta(ispin), &
2135                                    matrix_type=dbcsr_type_no_symmetry)
2136                  CALL dbcsr_set(m_B0(reim, idim0, ispin), 0.0_dp)
2137               ENDDO
2138            ENDDO
2139         ENDDO
2140
2141         CALL compute_berry_operator(qs_env, cell, op_sm_set_qs, dim_op)
2142
2143      CASE (op_loc_pipek)
2144
2145         dim_op = natom
2146         ALLOCATE (weights(dim_op))
2147         weights = 1.0_dp
2148
2149         ALLOCATE (m_B0(1, dim_op, nspins))
2150         !m_B0 first dim is 1 now!
2151         DO idim0 = 1, dim_op
2152            DO reim = 1, 1 !SIZE(op_sm_set_qs, 1)
2153               DO ispin = 1, nspins
2154                  CALL dbcsr_create(m_B0(reim, idim0, ispin), &
2155                                    template=m_theta(ispin), &
2156                                    matrix_type=dbcsr_type_no_symmetry)
2157                  CALL dbcsr_set(m_B0(reim, idim0, ispin), 0.0_dp)
2158               ENDDO
2159            ENDDO
2160         ENDDO
2161
2162      END SELECT
2163
2164      ! penalty amplitude adjusts the strenght of volume conservation
2165      penalty_amplitude = optimizer%opt_penalty%penalty_strength
2166      !penalty_occ_vol = ( optimizer%opt_penalty%occ_vol_method .NE. penalty_type_none )
2167      !penalty_local = ( optimizer%opt_penalty%occ_loc_method .NE. penalty_type_none )
2168
2169      ! preconditioner control
2170      prec_type = optimizer%preconditioner
2171
2172      ! use diagonal BFGS if preconditioner is set
2173      d_bfgs = .FALSE.
2174      l_bfgs = .FALSE.
2175      IF (prec_type .NE. xalmo_prec_zero) l_bfgs = .TRUE.
2176      IF (l_bfgs .AND. (optimizer%conjugator .NE. cg_zero)) THEN
2177         CPABORT("Cannot use conjugators with BFGS")
2178      ENDIF
2179      IF (l_bfgs) THEN
2180         CALL lbfgs_create(nlmo_lbfgs_history, nspins, nstore=10)
2181      ENDIF
2182
2183      IF (nspins == 1) THEN
2184         spin_factor = 2.0_dp
2185      ELSE
2186         spin_factor = 1.0_dp
2187      ENDIF
2188
2189      ALLOCATE (grad_norm_spin(nspins))
2190      ALLOCATE (nocc(nspins))
2191      ALLOCATE (penalty_vol_prefactor(nspins))
2192      ALLOCATE (suggested_vol_penalty(nspins))
2193
2194      ! create a local copy of matrix_mo_in because
2195      ! matrix_mo_in and matrix_mo_out can be the same matrix
2196      ! we need to make sure data in matrix_mo_in is intact
2197      ! after we start writing to matrix_mo_out
2198      ALLOCATE (m_t_mo_local(nspins))
2199      DO ispin = 1, nspins
2200         CALL dbcsr_create(m_t_mo_local(ispin), &
2201                           template=matrix_mo_in(ispin), &
2202                           matrix_type=dbcsr_type_no_symmetry)
2203         CALL dbcsr_copy(m_t_mo_local(ispin), matrix_mo_in(ispin))
2204      ENDDO
2205
2206      ALLOCATE (approx_inv_hessian(nspins))
2207      ALLOCATE (m_theta_normalized(nspins))
2208      ALLOCATE (prev_m_theta(nspins))
2209      ALLOCATE (m_S0(nspins))
2210      ALLOCATE (prev_grad(nspins))
2211      ALLOCATE (grad(nspins))
2212      ALLOCATE (prev_step(nspins))
2213      ALLOCATE (step(nspins))
2214      ALLOCATE (prev_minus_prec_grad(nspins))
2215      ALLOCATE (m_sig_sqrti_ii(nspins))
2216      ALLOCATE (m_sigma(nspins))
2217      ALLOCATE (m_siginv(nspins))
2218      ALLOCATE (tempNOcc1(nspins))
2219      ALLOCATE (tempOccOcc1(nspins))
2220      ALLOCATE (tempOccOcc2(nspins))
2221      ALLOCATE (tempOccOcc3(nspins))
2222      ALLOCATE (bfgs_y(nspins))
2223      ALLOCATE (bfgs_s(nspins))
2224
2225      DO ispin = 1, nspins
2226
2227         ! init temporary storage
2228         CALL dbcsr_create(tempNOcc1(ispin), &
2229                           template=matrix_mo_out(ispin), &
2230                           matrix_type=dbcsr_type_no_symmetry)
2231         CALL dbcsr_create(approx_inv_hessian(ispin), &
2232                           template=m_theta(ispin), &
2233                           matrix_type=dbcsr_type_no_symmetry)
2234         CALL dbcsr_create(m_theta_normalized(ispin), &
2235                           template=m_theta(ispin), &
2236                           matrix_type=dbcsr_type_no_symmetry)
2237         CALL dbcsr_create(prev_m_theta(ispin), &
2238                           template=m_theta(ispin), &
2239                           matrix_type=dbcsr_type_no_symmetry)
2240         CALL dbcsr_create(m_S0(ispin), &
2241                           template=m_theta(ispin), &
2242                           matrix_type=dbcsr_type_no_symmetry)
2243         CALL dbcsr_create(prev_grad(ispin), &
2244                           template=m_theta(ispin), &
2245                           matrix_type=dbcsr_type_no_symmetry)
2246         CALL dbcsr_create(grad(ispin), &
2247                           template=m_theta(ispin), &
2248                           matrix_type=dbcsr_type_no_symmetry)
2249         CALL dbcsr_create(prev_step(ispin), &
2250                           template=m_theta(ispin), &
2251                           matrix_type=dbcsr_type_no_symmetry)
2252         CALL dbcsr_create(step(ispin), &
2253                           template=m_theta(ispin), &
2254                           matrix_type=dbcsr_type_no_symmetry)
2255         CALL dbcsr_create(prev_minus_prec_grad(ispin), &
2256                           template=m_theta(ispin), &
2257                           matrix_type=dbcsr_type_no_symmetry)
2258         CALL dbcsr_create(m_sig_sqrti_ii(ispin), &
2259                           template=m_theta(ispin), &
2260                           matrix_type=dbcsr_type_no_symmetry)
2261         CALL dbcsr_create(m_sigma(ispin), &
2262                           template=m_theta(ispin), &
2263                           matrix_type=dbcsr_type_no_symmetry)
2264         CALL dbcsr_create(m_siginv(ispin), &
2265                           template=m_theta(ispin), &
2266                           matrix_type=dbcsr_type_no_symmetry)
2267         CALL dbcsr_create(tempOccOcc1(ispin), &
2268                           template=m_theta(ispin), &
2269                           matrix_type=dbcsr_type_no_symmetry)
2270         CALL dbcsr_create(tempOccOcc2(ispin), &
2271                           template=m_theta(ispin), &
2272                           matrix_type=dbcsr_type_no_symmetry)
2273         CALL dbcsr_create(tempOccOcc3(ispin), &
2274                           template=m_theta(ispin), &
2275                           matrix_type=dbcsr_type_no_symmetry)
2276         CALL dbcsr_create(bfgs_s(ispin), &
2277                           template=m_theta(ispin), &
2278                           matrix_type=dbcsr_type_no_symmetry)
2279         CALL dbcsr_create(bfgs_y(ispin), &
2280                           template=m_theta(ispin), &
2281                           matrix_type=dbcsr_type_no_symmetry)
2282
2283         CALL dbcsr_set(step(ispin), 0.0_dp)
2284         CALL dbcsr_set(prev_step(ispin), 0.0_dp)
2285
2286         CALL dbcsr_get_info(template_matrix_sigma(ispin), &
2287                             nfullrows_total=nocc(ispin))
2288
2289         penalty_vol_prefactor(ispin) = -penalty_amplitude !KEEP: * spin_factor * nocc(ispin)
2290
2291         ! compute m_S0=T0^t.S.T0
2292         CALL dbcsr_multiply("N", "N", 1.0_dp, &
2293                             matrix_s, &
2294                             m_t_mo_local(ispin), &
2295                             0.0_dp, tempNOcc1(ispin), &
2296                             filter_eps=eps_filter)
2297         CALL dbcsr_multiply("T", "N", 1.0_dp, &
2298                             m_t_mo_local(ispin), &
2299                             tempNOcc1(ispin), &
2300                             0.0_dp, m_S0(ispin), &
2301                             filter_eps=eps_filter)
2302
2303         SELECT CASE (optimizer%opt_penalty%operator_type)
2304
2305         CASE (op_loc_berry)
2306
2307            ! compute m_B0=T0^t.B.T0
2308            DO idim0 = 1, SIZE(op_sm_set_qs, 2) ! this loop is over miller ind
2309
2310               DO reim = 1, SIZE(op_sm_set_qs, 1) ! this loop is over Re/Im
2311
2312                  CALL matrix_qs_to_almo(op_sm_set_qs(reim, idim0)%matrix, op_sm_set_almo(reim, idim0)%matrix, &
2313                                         mat_distr_aos, .FALSE.)
2314
2315                  CALL dbcsr_multiply("N", "N", 1.0_dp, &
2316                                      op_sm_set_almo(reim, idim0)%matrix, &
2317                                      m_t_mo_local(ispin), &
2318                                      0.0_dp, tempNOcc1(ispin), &
2319                                      filter_eps=eps_filter)
2320
2321                  CALL dbcsr_multiply("T", "N", 1.0_dp, &
2322                                      m_t_mo_local(ispin), &
2323                                      tempNOcc1(ispin), &
2324                                      0.0_dp, m_B0(reim, idim0, ispin), &
2325                                      filter_eps=eps_filter)
2326
2327                  DEALLOCATE (op_sm_set_qs(reim, idim0)%matrix)
2328                  DEALLOCATE (op_sm_set_almo(reim, idim0)%matrix)
2329
2330               ENDDO
2331
2332            ENDDO ! end loop over idim0
2333
2334         CASE (op_loc_pipek)
2335
2336            ! compute m_B0=T0^t.B.T0
2337            DO iatom = 1, natom ! this loop is over "miller" ind
2338
2339               isgf = first_sgf(iatom)
2340               ncol = nsgf(iatom)
2341
2342               CALL dbcsr_multiply("N", "N", 1.0_dp, &
2343                                   matrix_s, &
2344                                   m_t_mo_local(ispin), &
2345                                   0.0_dp, tempNOcc1(ispin), &
2346                                   filter_eps=eps_filter)
2347
2348               CALL dbcsr_multiply("T", "N", 0.5_dp, &
2349                                   m_t_mo_local(ispin), &
2350                                   tempNOcc1(ispin), &
2351                                   0.0_dp, m_B0(1, iatom, ispin), &
2352                                   first_k=isgf, last_k=isgf + ncol - 1, &
2353                                   filter_eps=eps_filter)
2354
2355               CALL dbcsr_multiply("N", "N", 1.0_dp, &
2356                                   matrix_s, &
2357                                   m_t_mo_local(ispin), &
2358                                   0.0_dp, tempNOcc1(ispin), &
2359                                   first_k=isgf, last_k=isgf + ncol - 1, &
2360                                   filter_eps=eps_filter)
2361
2362               CALL dbcsr_multiply("T", "N", 0.5_dp, &
2363                                   m_t_mo_local(ispin), &
2364                                   tempNOcc1(ispin), &
2365                                   1.0_dp, m_B0(1, iatom, ispin), &
2366                                   filter_eps=eps_filter)
2367
2368            ENDDO ! end loop over iatom
2369
2370         END SELECT
2371
2372      ENDDO ! ispin
2373
2374      IF (optimizer%opt_penalty%operator_type .EQ. op_loc_berry) THEN
2375         DO idim0 = 1, SIZE(op_sm_set_qs, 2) ! this loop is over miller ind
2376            DO reim = 1, SIZE(op_sm_set_qs, 1) ! this loop is over Re/Im
2377               DEALLOCATE (op_sm_set_qs(reim, idim0)%matrix)
2378               DEALLOCATE (op_sm_set_almo(reim, idim0)%matrix)
2379            ENDDO
2380         ENDDO
2381         DEALLOCATE (op_sm_set_qs, op_sm_set_almo)
2382      ENDIF
2383
2384      ! start the outer SCF loop
2385      outer_max_iter = optimizer%max_iter_outer_loop
2386      outer_prepare_to_exit = .FALSE.
2387      outer_iteration = 0
2388      grad_norm = 0.0_dp
2389      penalty_func_new = 0.0_dp
2390      linear_search_type = 1 ! safe restart, no quadratic assumption, takes more steps
2391      localization_obj_function = 0.0_dp
2392      penalty_func_new = 0.0_dp
2393
2394      DO
2395
2396         ! start the inner SCF loop
2397         max_iter = optimizer%max_iter
2398         prepare_to_exit = .FALSE.
2399         line_search = .FALSE.
2400         converged = .FALSE.
2401         iteration = 0
2402         cg_iteration = 0
2403         line_search_iteration = 0
2404         obj_function_ispin = 0.0_dp
2405         objf_new = 0.0_dp
2406         objf_old = 0.0_dp
2407         objf_diff = 0.0_dp
2408         line_search_error = 0.0_dp
2409         t1 = m_walltime()
2410         next_step_size_guess = 0.0_dp
2411
2412         DO
2413
2414            just_started = (iteration .EQ. 0) .AND. (outer_iteration .EQ. 0)
2415
2416            DO ispin = 1, nspins
2417
2418               CALL dbcsr_get_info(m_sig_sqrti_ii(ispin), group=para_group)
2419
2420               ! compute diagonal (a^t.sigma0.a)^(-1/2)
2421               CALL dbcsr_multiply("N", "N", 1.0_dp, &
2422                                   m_S0(ispin), m_theta(ispin), 0.0_dp, &
2423                                   tempOccOcc1(ispin), &
2424                                   filter_eps=eps_filter)
2425               CALL dbcsr_set(m_sig_sqrti_ii(ispin), 0.0_dp)
2426               CALL dbcsr_add_on_diag(m_sig_sqrti_ii(ispin), 1.0_dp)
2427               CALL dbcsr_multiply("T", "N", 1.0_dp, &
2428                                   m_theta(ispin), tempOccOcc1(ispin), 0.0_dp, &
2429                                   m_sig_sqrti_ii(ispin), &
2430                                   retain_sparsity=.TRUE.)
2431               ALLOCATE (diagonal(nocc(ispin)))
2432               CALL dbcsr_get_diag(m_sig_sqrti_ii(ispin), diagonal)
2433               CALL mp_sum(diagonal, para_group)
2434               ! TODO: works for zero diagonal elements?
2435               diagonal(:) = 1.0_dp/SQRT(diagonal(:))
2436               CALL dbcsr_set(m_sig_sqrti_ii(ispin), 0.0_dp)
2437               CALL dbcsr_set_diag(m_sig_sqrti_ii(ispin), diagonal)
2438               DEALLOCATE (diagonal)
2439
2440               CALL dbcsr_multiply("N", "N", 1.0_dp, &
2441                                   m_theta(ispin), &
2442                                   m_sig_sqrti_ii(ispin), &
2443                                   0.0_dp, m_theta_normalized(ispin), &
2444                                   filter_eps=eps_filter)
2445
2446               ! compute new orbitals
2447               CALL dbcsr_multiply("N", "N", 1.0_dp, &
2448                                   m_t_mo_local(ispin), &
2449                                   m_theta_normalized(ispin), &
2450                                   0.0_dp, matrix_mo_out(ispin), &
2451                                   filter_eps=eps_filter)
2452
2453            ENDDO
2454
2455            ! compute objective function
2456            localization_obj_function = 0.0_dp
2457            penalty_func_new = 0.0_dp
2458            DO ispin = 1, nspins
2459
2460               CALL compute_obj_nlmos( &
2461                  !obj_function_ispin=obj_function_ispin, &
2462                  localization_obj_function_ispin=localization_obj_function_ispin, &
2463                  penalty_func_ispin=penalty_func_ispin, &
2464                  overlap_determinant=overlap_determinant, &
2465                  m_sigma=m_sigma(ispin), &
2466                  nocc=nocc(ispin), &
2467                  m_B0=m_B0(:, :, ispin), &
2468                  m_theta_normalized=m_theta_normalized(ispin), &
2469                  template_matrix_mo=matrix_mo_out(ispin), &
2470                  weights=weights, &
2471                  m_S0=m_S0(ispin), &
2472                  just_started=just_started, &
2473                  penalty_vol_prefactor=penalty_vol_prefactor(ispin), &
2474                  penalty_amplitude=penalty_amplitude, &
2475                  eps_filter=eps_filter)
2476
2477               localization_obj_function = localization_obj_function + localization_obj_function_ispin
2478               penalty_func_new = penalty_func_new + penalty_func_ispin
2479
2480            ENDDO ! ispin
2481            objf_new = penalty_func_new + localization_obj_function
2482
2483            DO ispin = 1, nspins
2484               ! save the previous gradient to compute beta
2485               ! do it only if the previous grad was computed
2486               ! for .NOT.line_search
2487               IF (line_search_iteration .EQ. 0 .AND. iteration .NE. 0) THEN
2488                  CALL dbcsr_copy(prev_grad(ispin), grad(ispin))
2489               ENDIF
2490
2491            ENDDO ! ispin
2492
2493            ! compute the gradient
2494            DO ispin = 1, nspins
2495
2496               CALL invert_Hotelling( &
2497                  matrix_inverse=m_siginv(ispin), &
2498                  matrix=m_sigma(ispin), &
2499                  threshold=eps_filter*10.0_dp, &
2500                  filter_eps=eps_filter, &
2501                  silent=.FALSE.)
2502
2503               CALL compute_gradient_nlmos( &
2504                  m_grad_out=grad(ispin), &
2505                  m_B0=m_B0(:, :, ispin), &
2506                  weights=weights, &
2507                  m_S0=m_S0(ispin), &
2508                  m_theta_normalized=m_theta_normalized(ispin), &
2509                  m_siginv=m_siginv(ispin), &
2510                  m_sig_sqrti_ii=m_sig_sqrti_ii(ispin), &
2511                  penalty_vol_prefactor=penalty_vol_prefactor(ispin), &
2512                  eps_filter=eps_filter, &
2513                  suggested_vol_penalty=suggested_vol_penalty(ispin))
2514
2515            ENDDO ! ispin
2516
2517            ! check convergence and other exit criteria
2518            DO ispin = 1, nspins
2519               CALL dbcsr_norm(grad(ispin), dbcsr_norm_maxabsnorm, &
2520                               norm_scalar=grad_norm_spin(ispin))
2521            ENDDO ! ispin
2522            grad_norm = MAXVAL(grad_norm_spin)
2523
2524            converged = (grad_norm .LE. optimizer%eps_error)
2525            IF (converged .OR. (iteration .GE. max_iter)) THEN
2526               prepare_to_exit = .TRUE.
2527            ENDIF
2528
2529            ! it is not time to exit just yet
2530            IF (.NOT. prepare_to_exit) THEN
2531
2532               ! check the gradient along the step direction
2533               ! and decide whether to switch to the line-search mode
2534               ! do not do this in the first iteration
2535               IF (iteration .NE. 0) THEN
2536
2537                  ! enforce at least one line search
2538                  ! without even checking the error
2539                  IF (.NOT. line_search) THEN
2540
2541                     line_search = .TRUE.
2542                     line_search_iteration = line_search_iteration + 1
2543
2544                  ELSE
2545
2546                     ! check the line-search error and decide whether to
2547                     ! change the direction
2548                     line_search_error = 0.0_dp
2549                     denom = 0.0_dp
2550                     denom2 = 0.0_dp
2551
2552                     DO ispin = 1, nspins
2553
2554                        CALL dbcsr_dot(grad(ispin), step(ispin), tempreal)
2555                        line_search_error = line_search_error + tempreal
2556                        CALL dbcsr_dot(grad(ispin), grad(ispin), tempreal)
2557                        denom = denom + tempreal
2558                        CALL dbcsr_dot(step(ispin), step(ispin), tempreal)
2559                        denom2 = denom2 + tempreal
2560
2561                     ENDDO ! ispin
2562
2563                     ! cosine of the angle between the step and grad
2564                     ! (must be close to zero at convergence)
2565                     line_search_error = line_search_error/SQRT(denom)/SQRT(denom2)
2566
2567                     IF (ABS(line_search_error) .GT. optimizer%lin_search_eps_error) THEN
2568                        line_search = .TRUE.
2569                        line_search_iteration = line_search_iteration + 1
2570                     ELSE
2571                        line_search = .FALSE.
2572                        line_search_iteration = 0
2573                     ENDIF
2574
2575                  ENDIF
2576
2577               ENDIF ! iteration.ne.0
2578
2579               IF (line_search) THEN
2580                  objf_diff = 0.0_dp
2581               ELSE
2582                  objf_diff = objf_new - objf_old
2583                  objf_old = objf_new
2584               ENDIF
2585
2586               ! update the step direction
2587               IF (.NOT. line_search) THEN
2588
2589                  cg_iteration = cg_iteration + 1
2590
2591                  ! save the previous step
2592                  DO ispin = 1, nspins
2593                     CALL dbcsr_copy(prev_step(ispin), step(ispin))
2594                  ENDDO ! ispin
2595
2596                  ! compute the new step:
2597                  ! if available use second derivative info - bfgs, hessian, preconditioner
2598                  IF (prec_type .EQ. xalmo_prec_zero) THEN ! no second derivatives
2599
2600                     ! no preconditioner
2601                     DO ispin = 1, nspins
2602
2603                        CALL dbcsr_copy(step(ispin), grad(ispin))
2604                        CALL dbcsr_scale(step(ispin), -1.0_dp)
2605
2606                     ENDDO ! ispin
2607
2608                  ELSE ! use second derivatives
2609
2610                     ! compute and invert hessian/precond?
2611                     IF (iteration .EQ. 0) THEN
2612
2613                        IF (d_bfgs) THEN
2614
2615                           ! create matrix filled with 1.0 here
2616                           CALL fill_matrix_with_ones(approx_inv_hessian(1))
2617                           IF (nspins .GT. 1) THEN
2618                              DO ispin = 2, nspins
2619                                 CALL dbcsr_copy(approx_inv_hessian(ispin), approx_inv_hessian(1))
2620                              ENDDO
2621                           ENDIF
2622
2623                        ELSE IF (l_bfgs) THEN
2624
2625                           CALL lbfgs_seed(nlmo_lbfgs_history, m_theta, grad)
2626                           DO ispin = 1, nspins
2627                              CALL dbcsr_copy(step(ispin), grad(ispin))
2628                              CALL dbcsr_scale(step(ispin), -1.0_dp)
2629                           ENDDO ! ispin
2630
2631                        ELSE
2632
2633                           ! computing preconditioner
2634                           DO ispin = 1, nspins
2635
2636                              ! TODO: write preconditioner code later
2637                              ! For now, create matrix filled with 1.0 here
2638                              CALL fill_matrix_with_ones(approx_inv_hessian(ispin))
2639                              !CALL compute_preconditioner(&
2640                              !       m_prec_out=approx_hessian(ispin),&
2641                              !       m_ks=almo_scf_env%matrix_ks(ispin),&
2642                              !       m_s=matrix_s,&
2643                              !       m_siginv=almo_scf_env%template_matrix_sigma(ispin),&
2644                              !       m_quench_t=quench_t(ispin),&
2645                              !       m_FTsiginv=FTsiginv(ispin),&
2646                              !       m_siginvTFTsiginv=siginvTFTsiginv(ispin),&
2647                              !       m_ST=ST(ispin),&
2648                              !       para_env=almo_scf_env%para_env,&
2649                              !       blacs_env=almo_scf_env%blacs_env,&
2650                              !       nocc_of_domain=almo_scf_env%nocc_of_domain(:,ispin),&
2651                              !       domain_s_inv=almo_scf_env%domain_s_inv(:,ispin),&
2652                              !       domain_r_down=domain_r_down(:,ispin),&
2653                              !       cpu_of_domain=almo_scf_env%cpu_of_domain,&
2654                              !       domain_map=almo_scf_env%domain_map(ispin),&
2655                              !       assume_t0_q0x=assume_t0_q0x,&
2656                              !       penalty_occ_vol=penalty_occ_vol,&
2657                              !       penalty_occ_vol_prefactor=penalty_occ_vol_g_prefactor(ispin),&
2658                              !       eps_filter=eps_filter,&
2659                              !       neg_thr=0.5_dp,&
2660                              !       spin_factor=spin_factor,&
2661                              !       special_case=my_special_case)
2662                              !CALL invert hessian
2663                           ENDDO ! ispin
2664
2665                        ENDIF
2666
2667                     ELSE ! not iteration zero
2668
2669                        ! update approx inverse hessian
2670                        IF (d_bfgs) THEN ! diagonal BFGS
2671
2672                           DO ispin = 1, nspins
2673
2674                              ! compute s and y
2675                              CALL dbcsr_copy(bfgs_y(ispin), grad(ispin))
2676                              CALL dbcsr_add(bfgs_y(ispin), prev_grad(ispin), 1.0_dp, -1.0_dp)
2677                              CALL dbcsr_copy(bfgs_s(ispin), m_theta(ispin))
2678                              CALL dbcsr_add(bfgs_s(ispin), prev_m_theta(ispin), 1.0_dp, -1.0_dp)
2679
2680                              ! compute rho
2681                              CALL dbcsr_dot(grad(ispin), step(ispin), bfgs_rho)
2682                              bfgs_rho = 1.0_dp/bfgs_rho
2683
2684                              ! compute the sum of the squared elements of bfgs_y
2685                              CALL dbcsr_dot(bfgs_y(ispin), bfgs_y(ispin), bfgs_sum)
2686
2687                              ! first term: start collecting new inv hessian in this temp matrix
2688                              CALL dbcsr_copy(tempOccOcc2(ispin), approx_inv_hessian(ispin))
2689
2690                              ! second term: + rho * s * s
2691                              CALL dbcsr_hadamard_product(bfgs_s(ispin), bfgs_s(ispin), tempOccOcc1(ispin))
2692                              CALL dbcsr_add(tempOccOcc2(ispin), tempOccOcc1(ispin), 1.0_dp, bfgs_rho)
2693
2694                              ! third term: + rho^2 * s * s * H * sum_(y * y)
2695                              CALL dbcsr_hadamard_product(tempOccOcc1(ispin), &
2696                                                          approx_inv_hessian(ispin), tempOccOcc3(ispin))
2697                              CALL dbcsr_add(tempOccOcc2(ispin), tempOccOcc3(ispin), &
2698                                             1.0_dp, bfgs_rho*bfgs_rho*bfgs_sum)
2699
2700                              ! fourth term: - 2 * rho * s * y * H
2701                              CALL dbcsr_hadamard_product(bfgs_y(ispin), &
2702                                                          approx_inv_hessian(ispin), tempOccOcc1(ispin))
2703                              CALL dbcsr_hadamard_product(bfgs_s(ispin), tempOccOcc1(ispin), tempOccOcc3(ispin))
2704                              CALL dbcsr_add(tempOccOcc2(ispin), tempOccOcc3(ispin), &
2705                                             1.0_dp, -2.0_dp*bfgs_rho)
2706
2707                              CALL dbcsr_copy(approx_inv_hessian(ispin), tempOccOcc2(ispin))
2708
2709                           ENDDO
2710
2711                        ELSE IF (l_bfgs) THEN
2712
2713                           CALL lbfgs_get_direction(nlmo_lbfgs_history, m_theta, grad, step)
2714
2715                        ENDIF ! which method?
2716
2717                     ENDIF ! compute approximate inverse hessian
2718
2719                     IF (.NOT. l_bfgs) THEN
2720
2721                        DO ispin = 1, nspins
2722
2723                           CALL dbcsr_hadamard_product(approx_inv_hessian(ispin), &
2724                                                       grad(ispin), step(ispin))
2725                           CALL dbcsr_scale(step(ispin), -1.0_dp)
2726
2727                        ENDDO ! ispin
2728
2729                     ENDIF
2730
2731                  ENDIF ! second derivative type fork
2732
2733                  ! check whether we need to reset conjugate directions
2734                  IF (iteration .EQ. 0) THEN
2735                     reset_conjugator = .TRUE.
2736                  ENDIF
2737
2738                  ! compute the conjugation coefficient - beta
2739                  IF (.NOT. reset_conjugator) THEN
2740                     CALL compute_cg_beta( &
2741                        beta=beta, &
2742                        reset_conjugator=reset_conjugator, &
2743                        conjugator=optimizer%conjugator, &
2744                        grad=grad(:), &
2745                        prev_grad=prev_grad(:), &
2746                        step=step(:), &
2747                        prev_step=prev_step(:), &
2748                        prev_minus_prec_grad=prev_minus_prec_grad(:) &
2749                        )
2750
2751                  ENDIF
2752
2753                  IF (reset_conjugator) THEN
2754
2755                     beta = 0.0_dp
2756                     IF (unit_nr > 0 .AND. (.NOT. just_started)) THEN
2757                        WRITE (unit_nr, '(T2,A35)') "Re-setting conjugator to zero"
2758                     ENDIF
2759                     reset_conjugator = .FALSE.
2760
2761                  ENDIF
2762
2763                  ! save the preconditioned gradient (useful for beta)
2764                  DO ispin = 1, nspins
2765
2766                     CALL dbcsr_copy(prev_minus_prec_grad(ispin), step(ispin))
2767
2768                     ! conjugate the step direction
2769                     CALL dbcsr_add(step(ispin), prev_step(ispin), 1.0_dp, beta)
2770
2771                  ENDDO ! ispin
2772
2773               ENDIF ! update the step direction
2774
2775               ! estimate the step size
2776               IF (.NOT. line_search) THEN
2777                  ! we just changed the direction and
2778                  ! we have only E and grad from the current step
2779                  ! it is not enough to compute step_size - just guess it
2780                  e0 = objf_new
2781                  g0 = 0.0_dp
2782                  DO ispin = 1, nspins
2783                     CALL dbcsr_dot(grad(ispin), step(ispin), tempreal)
2784                     g0 = g0 + tempreal
2785                  ENDDO ! ispin
2786                  g0sign = SIGN(1.0_dp, g0) ! sign of g0
2787                  IF (linear_search_type .EQ. 1) THEN ! this is quadratic LS
2788                     IF (iteration .EQ. 0) THEN
2789                        step_size = optimizer%lin_search_step_size_guess
2790                     ELSE
2791                        IF (next_step_size_guess .LE. 0.0_dp) THEN
2792                           step_size = optimizer%lin_search_step_size_guess
2793                        ELSE
2794                           ! take the last value
2795                           step_size = optimizer%lin_search_step_size_guess
2796                           !step_size = next_step_size_guess*1.05_dp
2797                        ENDIF
2798                     ENDIF
2799                  ELSE IF (linear_search_type .EQ. 2) THEN ! this is cautious LS
2800                     ! this LS type is designed not to trust quadratic appr
2801                     ! so it always restarts from a safe step size
2802                     step_size = optimizer%lin_search_step_size_guess
2803                  ENDIF
2804                  IF (unit_nr > 0) THEN
2805                     WRITE (unit_nr, '(T21,3A19)') "Line position", "Line grad", "Next line step"
2806                     WRITE (unit_nr, '(T2,A19,3F19.5)') "Line search", 0.0_dp, g0, step_size
2807                  ENDIF
2808                  next_step_size_guess = step_size
2809               ELSE ! this is not the first line search
2810                  e1 = objf_new
2811                  g1 = 0.0_dp
2812                  DO ispin = 1, nspins
2813                     CALL dbcsr_dot(grad(ispin), step(ispin), tempreal)
2814                     g1 = g1 + tempreal
2815                  ENDDO ! ispin
2816                  g1sign = SIGN(1.0_dp, g1) ! sign of g1
2817                  IF (linear_search_type .EQ. 1) THEN
2818                     ! we have accumulated some points along this direction
2819                     ! use only the most recent g0 (quadratic approximation)
2820                     appr_sec_der = (g1 - g0)/step_size
2821                     !IF (unit_nr > 0) THEN
2822                     !   WRITE (unit_nr, '(A2,7F12.5)') &
2823                     !      "DT", e0, e1, g0, g1, appr_sec_der, step_size, -g1/appr_sec_der
2824                     !ENDIF
2825                     step_size = -g1/appr_sec_der
2826                  ELSE IF (linear_search_type .EQ. 2) THEN
2827                     ! alternative method for finding step size
2828                     ! do not use quadratic approximation, only gradient signs
2829                     IF (g1sign .NE. g0sign) THEN
2830                        step_size = -step_size/2.0;
2831                     ELSE
2832                        step_size = step_size*1.5;
2833                     ENDIF
2834                  ENDIF
2835                  ! end alternative LS types
2836                  IF (unit_nr > 0) THEN
2837                     WRITE (unit_nr, '(T21,3A19)') "Line position", "Line grad", "Next line step"
2838                     WRITE (unit_nr, '(T2,A19,3F19.5)') "Line search", next_step_size_guess, g1, step_size
2839                  ENDIF
2840                  e0 = e1
2841                  g0 = g1
2842                  g0sign = g1sign
2843                  next_step_size_guess = next_step_size_guess + step_size
2844               ENDIF
2845
2846               ! update theta
2847               DO ispin = 1, nspins
2848                  IF (.NOT. line_search) THEN ! we prepared to perform the first line search
2849                     ! "previous" refers to the previous CG step, not the previous LS step
2850                     CALL dbcsr_copy(prev_m_theta(ispin), m_theta(ispin))
2851                  ENDIF
2852                  CALL dbcsr_add(m_theta(ispin), step(ispin), 1.0_dp, step_size)
2853               ENDDO ! ispin
2854
2855            ENDIF ! not.prepare_to_exit
2856
2857            IF (line_search) THEN
2858               iter_type = "LS"
2859            ELSE
2860               iter_type = "CG"
2861            ENDIF
2862
2863            t2 = m_walltime()
2864            IF (unit_nr > 0) THEN
2865               iter_type = TRIM("NLMO OPT "//iter_type)
2866               WRITE (unit_nr, '(T2,A13,I6,F23.10,E14.5,F14.9,F9.2)') &
2867                  iter_type, iteration, &
2868                  objf_new, objf_diff, grad_norm, &
2869                  t2 - t1
2870               WRITE (unit_nr, '(T2,A19,F23.10)') &
2871                  "Localization:", localization_obj_function
2872               WRITE (unit_nr, '(T2,A19,F23.10)') &
2873                  "Orthogonalization:", penalty_func_new
2874            ENDIF
2875            t1 = m_walltime()
2876
2877            iteration = iteration + 1
2878            IF (prepare_to_exit) EXIT
2879
2880         ENDDO ! inner loop
2881
2882         IF (converged .OR. (outer_iteration .GE. outer_max_iter)) THEN
2883            outer_prepare_to_exit = .TRUE.
2884         ENDIF
2885
2886         outer_iteration = outer_iteration + 1
2887         IF (outer_prepare_to_exit) EXIT
2888
2889      ENDDO ! outer loop
2890
2891      ! return the optimal determinant penalty
2892      optimizer%opt_penalty%penalty_strength = 0.0_dp
2893      DO ispin = 1, nspins
2894         optimizer%opt_penalty%penalty_strength = optimizer%opt_penalty%penalty_strength + &
2895                                                  (-1.0_dp)*penalty_vol_prefactor(ispin)
2896      ENDDO
2897      optimizer%opt_penalty%penalty_strength = optimizer%opt_penalty%penalty_strength/nspins
2898
2899      IF (converged) THEN
2900         iter_type = "Final"
2901      ELSE
2902         iter_type = "Unconverged"
2903      ENDIF
2904
2905      IF (unit_nr > 0) THEN
2906         WRITE (unit_nr, '()')
2907         print_string = TRIM(iter_type)//" localization:"
2908         WRITE (unit_nr, '(T2,A29,F30.10)') &
2909            print_string, localization_obj_function
2910         print_string = TRIM(iter_type)//" determinant:"
2911         WRITE (unit_nr, '(T2,A29,F30.10)') &
2912            print_string, overlap_determinant
2913         print_string = TRIM(iter_type)//" penalty strength:"
2914         WRITE (unit_nr, '(T2,A29,F30.10)') &
2915            print_string, optimizer%opt_penalty%penalty_strength
2916      ENDIF
2917
2918      ! clean up
2919      IF (l_bfgs) THEN
2920         CALL lbfgs_release(nlmo_lbfgs_history)
2921      ENDIF
2922      DO ispin = 1, nspins
2923         DO idim0 = 1, SIZE(m_B0, 2)
2924            DO reim = 1, SIZE(m_B0, 1)
2925               CALL dbcsr_release(m_B0(reim, idim0, ispin))
2926            ENDDO
2927         ENDDO
2928         CALL dbcsr_release(m_theta(ispin))
2929         CALL dbcsr_release(m_t_mo_local(ispin))
2930         CALL dbcsr_release(tempNOcc1(ispin))
2931         CALL dbcsr_release(approx_inv_hessian(ispin))
2932         CALL dbcsr_release(prev_m_theta(ispin))
2933         CALL dbcsr_release(m_theta_normalized(ispin))
2934         CALL dbcsr_release(m_S0(ispin))
2935         CALL dbcsr_release(prev_grad(ispin))
2936         CALL dbcsr_release(grad(ispin))
2937         CALL dbcsr_release(prev_step(ispin))
2938         CALL dbcsr_release(step(ispin))
2939         CALL dbcsr_release(prev_minus_prec_grad(ispin))
2940         CALL dbcsr_release(m_sig_sqrti_ii(ispin))
2941         CALL dbcsr_release(m_sigma(ispin))
2942         CALL dbcsr_release(m_siginv(ispin))
2943         CALL dbcsr_release(tempOccOcc1(ispin))
2944         CALL dbcsr_release(tempOccOcc2(ispin))
2945         CALL dbcsr_release(tempOccOcc3(ispin))
2946         CALL dbcsr_release(bfgs_y(ispin))
2947         CALL dbcsr_release(bfgs_s(ispin))
2948      ENDDO ! ispin
2949
2950      DEALLOCATE (grad_norm_spin)
2951      DEALLOCATE (nocc)
2952      DEALLOCATE (penalty_vol_prefactor)
2953      DEALLOCATE (suggested_vol_penalty)
2954
2955      DEALLOCATE (approx_inv_hessian)
2956      DEALLOCATE (prev_m_theta)
2957      DEALLOCATE (m_theta_normalized)
2958      DEALLOCATE (m_S0)
2959      DEALLOCATE (prev_grad)
2960      DEALLOCATE (grad)
2961      DEALLOCATE (prev_step)
2962      DEALLOCATE (step)
2963      DEALLOCATE (prev_minus_prec_grad)
2964      DEALLOCATE (m_sig_sqrti_ii)
2965      DEALLOCATE (m_sigma)
2966      DEALLOCATE (m_siginv)
2967      DEALLOCATE (tempNOcc1)
2968      DEALLOCATE (tempOccOcc1)
2969      DEALLOCATE (tempOccOcc2)
2970      DEALLOCATE (tempOccOcc3)
2971      DEALLOCATE (bfgs_y)
2972      DEALLOCATE (bfgs_s)
2973
2974      DEALLOCATE (m_theta, m_t_mo_local)
2975      DEALLOCATE (m_B0)
2976      DEALLOCATE (weights)
2977      DEALLOCATE (first_sgf, last_sgf, nsgf)
2978
2979      IF (.NOT. converged) THEN
2980         CPABORT("Optimization not converged! ")
2981      ENDIF
2982
2983      CALL timestop(handle)
2984
2985   END SUBROUTINE almo_scf_construct_nlmos
2986
2987! **************************************************************************************************
2988!> \brief Analysis of the orbitals
2989!> \param detailed_analysis ...
2990!> \param eps_filter ...
2991!> \param m_T_in ...
2992!> \param m_T0_in ...
2993!> \param m_siginv_in ...
2994!> \param m_siginv0_in ...
2995!> \param m_S_in ...
2996!> \param m_KS0_in ...
2997!> \param m_quench_t_in ...
2998!> \param energy_out ...
2999!> \param m_eda_out ...
3000!> \param m_cta_out ...
3001!> \par History
3002!>       2017.07 created [Rustam Z Khaliullin]
3003!> \author Rustam Z Khaliullin
3004! **************************************************************************************************
3005   SUBROUTINE xalmo_analysis(detailed_analysis, eps_filter, m_T_in, m_T0_in, &
3006                             m_siginv_in, m_siginv0_in, m_S_in, m_KS0_in, m_quench_t_in, energy_out, &
3007                             m_eda_out, m_cta_out)
3008
3009      LOGICAL, INTENT(IN)                                :: detailed_analysis
3010      REAL(KIND=dp), INTENT(IN)                          :: eps_filter
3011      TYPE(dbcsr_type), DIMENSION(:), INTENT(IN)         :: m_T_in, m_T0_in, m_siginv_in, &
3012                                                            m_siginv0_in, m_S_in, m_KS0_in, &
3013                                                            m_quench_t_in
3014      REAL(KIND=dp), INTENT(INOUT)                       :: energy_out
3015      TYPE(dbcsr_type), DIMENSION(:), INTENT(INOUT)      :: m_eda_out, m_cta_out
3016
3017      CHARACTER(len=*), PARAMETER :: routineN = 'xalmo_analysis', routineP = moduleN//':'//routineN
3018
3019      INTEGER                                            :: handle, ispin, nspins
3020      REAL(KIND=dp)                                      :: energy_ispin, spin_factor
3021      TYPE(dbcsr_type)                                   :: FTsiginv0, Fvo0, m_X, siginvTFTsiginv0, &
3022                                                            ST0
3023
3024      CALL timeset(routineN, handle)
3025
3026      nspins = SIZE(m_T_in)
3027
3028      IF (nspins == 1) THEN
3029         spin_factor = 2.0_dp
3030      ELSE
3031         spin_factor = 1.0_dp
3032      ENDIF
3033
3034      energy_out = 0.0_dp
3035      DO ispin = 1, nspins
3036
3037         ! create temporary matrices
3038         CALL dbcsr_create(Fvo0, &
3039                           template=m_T_in(ispin), &
3040                           matrix_type=dbcsr_type_no_symmetry)
3041         CALL dbcsr_create(FTsiginv0, &
3042                           template=m_T_in(ispin), &
3043                           matrix_type=dbcsr_type_no_symmetry)
3044         CALL dbcsr_create(ST0, &
3045                           template=m_T_in(ispin), &
3046                           matrix_type=dbcsr_type_no_symmetry)
3047         CALL dbcsr_create(m_X, &
3048                           template=m_T_in(ispin), &
3049                           matrix_type=dbcsr_type_no_symmetry)
3050         CALL dbcsr_create(siginvTFTsiginv0, &
3051                           template=m_siginv0_in(ispin), &
3052                           matrix_type=dbcsr_type_no_symmetry)
3053
3054         ! compute F_{virt,occ} for the zero-delocalization state
3055         CALL compute_frequently_used_matrices( &
3056            filter_eps=eps_filter, &
3057            m_T_in=m_T0_in(ispin), &
3058            m_siginv_in=m_siginv0_in(ispin), &
3059            m_S_in=m_S_in(1), &
3060            m_F_in=m_KS0_in(ispin), &
3061            m_FTsiginv_out=FTsiginv0, &
3062            m_siginvTFTsiginv_out=siginvTFTsiginv0, &
3063            m_ST_out=ST0)
3064         CALL dbcsr_copy(Fvo0, m_quench_t_in(ispin))
3065         CALL dbcsr_copy(Fvo0, FTsiginv0, keep_sparsity=.TRUE.)
3066         CALL dbcsr_multiply("N", "N", -1.0_dp, &
3067                             ST0, &
3068                             siginvTFTsiginv0, &
3069                             1.0_dp, Fvo0, &
3070                             retain_sparsity=.TRUE.)
3071
3072         ! get single excitation amplitudes
3073         CALL dbcsr_copy(m_X, m_T0_in(ispin))
3074         CALL dbcsr_add(m_X, m_T_in(ispin), -1.0_dp, 1.0_dp)
3075
3076         CALL dbcsr_dot(m_X, Fvo0, energy_ispin)
3077         energy_out = energy_out + energy_ispin*spin_factor
3078
3079         IF (detailed_analysis) THEN
3080
3081            CALL dbcsr_hadamard_product(m_X, Fvo0, m_eda_out(ispin))
3082            CALL dbcsr_scale(m_eda_out(ispin), spin_factor)
3083            CALL dbcsr_filter(m_eda_out(ispin), eps_filter)
3084
3085            ! first, compute [QR'R]_mu^i = [(S-SRS).X.siginv']_mu^i
3086            ! a. FTsiginv0 = S.T0*siginv0
3087            CALL dbcsr_multiply("N", "N", 1.0_dp, &
3088                                ST0, &
3089                                m_siginv0_in(ispin), &
3090                                0.0_dp, FTsiginv0, &
3091                                filter_eps=eps_filter)
3092            ! c. tmp1(use ST0) = S.X
3093            CALL dbcsr_multiply("N", "N", 1.0_dp, &
3094                                m_S_in(1), &
3095                                m_X, &
3096                                0.0_dp, ST0, &
3097                                filter_eps=eps_filter)
3098            ! d. tmp2 = tr(T0).tmp1 = tr(T0).S.X
3099            CALL dbcsr_multiply("T", "N", 1.0_dp, &
3100                                m_T0_in(ispin), &
3101                                ST0, &
3102                                0.0_dp, siginvTFTsiginv0, &
3103                                filter_eps=eps_filter)
3104            ! e. tmp1 = tmp1 - tmp3.tmp2 = S.X - S.T0.siginv0*tr(T0).S.X
3105            !         = (1-S.R0).S.X
3106            CALL dbcsr_multiply("N", "N", -1.0_dp, &
3107                                FTsiginv0, &
3108                                siginvTFTsiginv0, &
3109                                1.0_dp, ST0, &
3110                                filter_eps=eps_filter)
3111            ! f. tmp2(use FTsiginv0) = tmp1*siginv
3112            CALL dbcsr_multiply("N", "N", 1.0_dp, &
3113                                ST0, &
3114                                m_siginv_in(ispin), &
3115                                0.0_dp, FTsiginv0, &
3116                                filter_eps=eps_filter)
3117            ! second, compute traces of blocks [RR'Q]^x_y * [X]^y_x
3118            CALL dbcsr_hadamard_product(m_X, &
3119                                        FTsiginv0, m_cta_out(ispin))
3120            CALL dbcsr_scale(m_cta_out(ispin), spin_factor)
3121            CALL dbcsr_filter(m_cta_out(ispin), eps_filter)
3122
3123         ENDIF ! do ALMO EDA/CTA
3124
3125         CALL dbcsr_release(Fvo0)
3126         CALL dbcsr_release(FTsiginv0)
3127         CALL dbcsr_release(ST0)
3128         CALL dbcsr_release(m_X)
3129         CALL dbcsr_release(siginvTFTsiginv0)
3130
3131      ENDDO ! ispin
3132
3133      CALL timestop(handle)
3134
3135   END SUBROUTINE xalmo_analysis
3136
3137! **************************************************************************************************
3138!> \brief Compute matrices that are used often in various parts of the
3139!>        optimization procedure
3140!> \param filter_eps ...
3141!> \param m_T_in ...
3142!> \param m_siginv_in ...
3143!> \param m_S_in ...
3144!> \param m_F_in ...
3145!> \param m_FTsiginv_out ...
3146!> \param m_siginvTFTsiginv_out ...
3147!> \param m_ST_out ...
3148!> \par History
3149!>       2016.12 created [Rustam Z Khaliullin]
3150!> \author Rustam Z Khaliullin
3151! **************************************************************************************************
3152   SUBROUTINE compute_frequently_used_matrices(filter_eps, &
3153                                               m_T_in, m_siginv_in, m_S_in, m_F_in, m_FTsiginv_out, &
3154                                               m_siginvTFTsiginv_out, m_ST_out)
3155
3156      REAL(KIND=dp), INTENT(IN)                          :: filter_eps
3157      TYPE(dbcsr_type), INTENT(IN)                       :: m_T_in, m_siginv_in, m_S_in, m_F_in
3158      TYPE(dbcsr_type), INTENT(INOUT)                    :: m_FTsiginv_out, m_siginvTFTsiginv_out, &
3159                                                            m_ST_out
3160
3161      CHARACTER(len=*), PARAMETER :: routineN = 'compute_frequently_used_matrices', &
3162         routineP = moduleN//':'//routineN
3163
3164      INTEGER                                            :: handle
3165      TYPE(dbcsr_type)                                   :: m_tmp_no_1, m_tmp_oo_1
3166
3167      CALL timeset(routineN, handle)
3168
3169      CALL dbcsr_create(m_tmp_no_1, &
3170                        template=m_T_in, &
3171                        matrix_type=dbcsr_type_no_symmetry)
3172      CALL dbcsr_create(m_tmp_oo_1, &
3173                        template=m_siginv_in, &
3174                        matrix_type=dbcsr_type_no_symmetry)
3175
3176      CALL dbcsr_multiply("N", "N", 1.0_dp, &
3177                          m_F_in, &
3178                          m_T_in, &
3179                          0.0_dp, m_tmp_no_1, &
3180                          filter_eps=filter_eps)
3181
3182      CALL dbcsr_multiply("N", "N", 1.0_dp, &
3183                          m_tmp_no_1, &
3184                          m_siginv_in, &
3185                          0.0_dp, m_FTsiginv_out, &
3186                          filter_eps=filter_eps)
3187
3188      CALL dbcsr_multiply("T", "N", 1.0_dp, &
3189                          m_T_in, &
3190                          m_FTsiginv_out, &
3191                          0.0_dp, m_tmp_oo_1, &
3192                          filter_eps=filter_eps)
3193
3194      CALL dbcsr_multiply("N", "N", 1.0_dp, &
3195                          m_siginv_in, &
3196                          m_tmp_oo_1, &
3197                          0.0_dp, m_siginvTFTsiginv_out, &
3198                          filter_eps=filter_eps)
3199
3200      CALL dbcsr_multiply("N", "N", 1.0_dp, &
3201                          m_S_in, &
3202                          m_T_in, &
3203                          0.0_dp, m_ST_out, &
3204                          filter_eps=filter_eps)
3205
3206      CALL dbcsr_release(m_tmp_no_1)
3207      CALL dbcsr_release(m_tmp_oo_1)
3208
3209      CALL timestop(handle)
3210
3211   END SUBROUTINE compute_frequently_used_matrices
3212
3213! **************************************************************************************************
3214!> \brief Split the matrix of virtual orbitals into two:
3215!>        retained orbs and discarded
3216!> \param almo_scf_env ...
3217!> \par History
3218!>       2011.09 created [Rustam Z Khaliullin]
3219!> \author Rustam Z Khaliullin
3220! **************************************************************************************************
3221   SUBROUTINE split_v_blk(almo_scf_env)
3222
3223      TYPE(almo_scf_env_type), INTENT(INOUT)             :: almo_scf_env
3224
3225      CHARACTER(len=*), PARAMETER :: routineN = 'split_v_blk', routineP = moduleN//':'//routineN
3226
3227      INTEGER                                            :: discarded_v, handle, iblock_col, &
3228                                                            iblock_col_size, iblock_row, &
3229                                                            iblock_row_size, ispin, retained_v
3230      REAL(kind=dp), DIMENSION(:, :), POINTER            :: data_p, p_new_block
3231      TYPE(dbcsr_iterator_type)                          :: iter
3232
3233      CALL timeset(routineN, handle)
3234
3235      DO ispin = 1, almo_scf_env%nspins
3236
3237         CALL dbcsr_work_create(almo_scf_env%matrix_v_blk(ispin), &
3238                                work_mutable=.TRUE.)
3239         CALL dbcsr_work_create(almo_scf_env%matrix_v_disc_blk(ispin), &
3240                                work_mutable=.TRUE.)
3241
3242         CALL dbcsr_iterator_start(iter, almo_scf_env%matrix_v_full_blk(ispin))
3243
3244         DO WHILE (dbcsr_iterator_blocks_left(iter))
3245
3246            CALL dbcsr_iterator_next_block(iter, iblock_row, iblock_col, data_p, &
3247                                           row_size=iblock_row_size, col_size=iblock_col_size)
3248
3249            IF (iblock_row .NE. iblock_col) THEN
3250               CPABORT("off-diagonal block found")
3251            ENDIF
3252
3253            retained_v = almo_scf_env%nvirt_of_domain(iblock_col, ispin)
3254            discarded_v = almo_scf_env%nvirt_disc_of_domain(iblock_col, ispin)
3255            CPASSERT(retained_v .GT. 0)
3256            CPASSERT(discarded_v .GT. 0)
3257
3258            NULLIFY (p_new_block)
3259            CALL dbcsr_reserve_block2d(almo_scf_env%matrix_v_disc_blk(ispin), &
3260                                       iblock_row, iblock_col, p_new_block)
3261            CPASSERT(ASSOCIATED(p_new_block))
3262            CPASSERT(retained_v + discarded_v .EQ. iblock_col_size)
3263            p_new_block(:, :) = data_p(:, (retained_v + 1):iblock_col_size)
3264
3265            NULLIFY (p_new_block)
3266            CALL dbcsr_reserve_block2d(almo_scf_env%matrix_v_blk(ispin), &
3267                                       iblock_row, iblock_col, p_new_block)
3268            CPASSERT(ASSOCIATED(p_new_block))
3269            p_new_block(:, :) = data_p(:, 1:retained_v)
3270
3271         ENDDO ! iterator
3272         CALL dbcsr_iterator_stop(iter)
3273
3274         CALL dbcsr_finalize(almo_scf_env%matrix_v_blk(ispin))
3275         CALL dbcsr_finalize(almo_scf_env%matrix_v_disc_blk(ispin))
3276
3277      ENDDO ! ispin
3278
3279      CALL timestop(handle)
3280
3281   END SUBROUTINE split_v_blk
3282
3283! **************************************************************************************************
3284!> \brief various methods for calculating the Harris-Foulkes correction
3285!> \param almo_scf_env ...
3286!> \par History
3287!>       2011.06 created [Rustam Z Khaliullin]
3288!> \author Rustam Z Khaliullin
3289! **************************************************************************************************
3290   SUBROUTINE harris_foulkes_correction(almo_scf_env)
3291
3292      TYPE(almo_scf_env_type), INTENT(INOUT)             :: almo_scf_env
3293
3294      CHARACTER(len=*), PARAMETER :: routineN = 'harris_foulkes_correction', &
3295         routineP = moduleN//':'//routineN
3296      INTEGER, PARAMETER                                 :: cayley_transform = 1, dm_ls_step = 2
3297
3298      INTEGER :: algorithm_id, handle, handle1, handle2, handle3, handle4, handle5, handle6, &
3299         handle7, handle8, ispin, iteration, n, nmins, nspin, opt_k_max_iter, &
3300         outer_opt_k_iteration, outer_opt_k_max_iter, unit_nr
3301      INTEGER, DIMENSION(1)                              :: fake, nelectron_spin_real
3302      LOGICAL :: converged, line_search, md_in_k_space, outer_opt_k_prepare_to_exit, &
3303         prepare_to_exit, reset_conjugator, reset_step_size, use_cubic_approximation, &
3304         use_quadratic_approximation
3305      REAL(KIND=dp) :: aa, bb, beta, conjugacy_error, conjugacy_error_threshold, &
3306         delta_obj_function, denom, energy_correction_final, frob_matrix, frob_matrix_base, fun0, &
3307         fun1, gfun0, gfun1, grad_norm, grad_norm_frob, kappa, kin_energy, line_search_error, &
3308         line_search_error_threshold, num_threshold, numer, obj_function, quadratic_approx_error, &
3309         quadratic_approx_error_threshold, safety_multiplier, spin_factor, step_size, &
3310         step_size_quadratic_approx, step_size_quadratic_approx2, t1, t1a, t1cholesky, t2, t2a, &
3311         t2cholesky, tau, time_step, x_opt_eps_adaptive, x_opt_eps_adaptive_factor
3312      REAL(KIND=dp), DIMENSION(1)                        :: local_mu
3313      REAL(KIND=dp), DIMENSION(2)                        :: energy_correction
3314      REAL(KIND=dp), DIMENSION(3)                        :: minima
3315      TYPE(cp_logger_type), POINTER                      :: logger
3316      TYPE(ct_step_env_type)                             :: ct_step_env
3317      TYPE(dbcsr_type) :: grad, k_vd_index_down, k_vr_index_down, matrix_k_central, matrix_tmp1, &
3318         matrix_tmp2, prec, prev_grad, prev_minus_prec_grad, prev_step, sigma_oo_curr, &
3319         sigma_oo_curr_inv, sigma_vv_sqrt, sigma_vv_sqrt_guess, sigma_vv_sqrt_inv, &
3320         sigma_vv_sqrt_inv_guess, step, t_curr, tmp1_n_vr, tmp2_n_o, tmp3_vd_vr, tmp4_o_vr, &
3321         tmp_k_blk, vd_fixed, vd_index_sqrt, vd_index_sqrt_inv, velocity, vr_fixed, vr_index_sqrt, &
3322         vr_index_sqrt_inv
3323      TYPE(dbcsr_type), ALLOCATABLE, DIMENSION(:)        :: matrix_p_almo_scf_converged
3324
3325      CALL timeset(routineN, handle)
3326
3327      ! get a useful output_unit
3328      logger => cp_get_default_logger()
3329      IF (logger%para_env%ionode) THEN
3330         unit_nr = cp_logger_get_default_unit_nr(logger, local=.TRUE.)
3331      ELSE
3332         unit_nr = -1
3333      ENDIF
3334
3335      nspin = almo_scf_env%nspins
3336      energy_correction_final = 0.0_dp
3337      IF (nspin .EQ. 1) THEN
3338         spin_factor = 2.0_dp
3339      ELSE
3340         spin_factor = 1.0_dp
3341      ENDIF
3342
3343      IF (almo_scf_env%deloc_use_occ_orbs) THEN
3344         algorithm_id = cayley_transform
3345      ELSE
3346         algorithm_id = dm_ls_step
3347      ENDIF
3348
3349      t1 = m_walltime()
3350
3351      SELECT CASE (algorithm_id)
3352      CASE (cayley_transform)
3353
3354         ! rescale density matrix by spin factor
3355         ! so the orbitals and density are consistent with each other
3356         IF (almo_scf_env%nspins == 1) THEN
3357            CALL dbcsr_scale(almo_scf_env%matrix_p(1), 1.0_dp/spin_factor)
3358         ENDIF
3359
3360         ! transform matrix_t not matrix_t_blk (we might need ALMOs later)
3361         DO ispin = 1, nspin
3362
3363            CALL dbcsr_copy(almo_scf_env%matrix_t(ispin), &
3364                            almo_scf_env%matrix_t_blk(ispin))
3365
3366            ! obtain orthogonalization matrices for ALMOs
3367            ! RZK-warning - remove this sqrt(sigma) and inv(sqrt(sigma))
3368            ! ideally ALMO scf should use sigma and sigma_inv in
3369            ! the tensor_up_down representation
3370
3371            IF (unit_nr > 0) THEN
3372               WRITE (unit_nr, *) "sqrt and inv(sqrt) of MO overlap matrix"
3373            ENDIF
3374            CALL dbcsr_create(almo_scf_env%matrix_sigma_sqrt(ispin), &
3375                              template=almo_scf_env%matrix_sigma(ispin), &
3376                              matrix_type=dbcsr_type_no_symmetry)
3377            CALL dbcsr_create(almo_scf_env%matrix_sigma_sqrt_inv(ispin), &
3378                              template=almo_scf_env%matrix_sigma(ispin), &
3379                              matrix_type=dbcsr_type_no_symmetry)
3380
3381            CALL matrix_sqrt_Newton_Schulz(almo_scf_env%matrix_sigma_sqrt(ispin), &
3382                                           almo_scf_env%matrix_sigma_sqrt_inv(ispin), &
3383                                           almo_scf_env%matrix_sigma(ispin), &
3384                                           threshold=almo_scf_env%eps_filter, &
3385                                           order=almo_scf_env%order_lanczos, &
3386                                           eps_lanczos=almo_scf_env%eps_lanczos, &
3387                                           max_iter_lanczos=almo_scf_env%max_iter_lanczos)
3388
3389            IF (safe_mode) THEN
3390               CALL dbcsr_create(matrix_tmp1, template=almo_scf_env%matrix_sigma(ispin), &
3391                                 matrix_type=dbcsr_type_no_symmetry)
3392               CALL dbcsr_create(matrix_tmp2, template=almo_scf_env%matrix_sigma(ispin), &
3393                                 matrix_type=dbcsr_type_no_symmetry)
3394
3395               CALL dbcsr_multiply("N", "N", 1.0_dp, almo_scf_env%matrix_sigma_sqrt_inv(ispin), &
3396                                   almo_scf_env%matrix_sigma(ispin), &
3397                                   0.0_dp, matrix_tmp1, filter_eps=almo_scf_env%eps_filter)
3398               CALL dbcsr_multiply("N", "N", 1.0_dp, matrix_tmp1, &
3399                                   almo_scf_env%matrix_sigma_sqrt_inv(ispin), &
3400                                   0.0_dp, matrix_tmp2, filter_eps=almo_scf_env%eps_filter)
3401
3402               frob_matrix_base = dbcsr_frobenius_norm(matrix_tmp2)
3403               CALL dbcsr_add_on_diag(matrix_tmp2, -1.0_dp)
3404               frob_matrix = dbcsr_frobenius_norm(matrix_tmp2)
3405               IF (unit_nr > 0) THEN
3406                  WRITE (unit_nr, *) "Error for (inv(sqrt(SIG))*SIG*inv(sqrt(SIG))-I)", frob_matrix/frob_matrix_base
3407               ENDIF
3408
3409               CALL dbcsr_release(matrix_tmp1)
3410               CALL dbcsr_release(matrix_tmp2)
3411            ENDIF
3412         ENDDO
3413
3414         IF (almo_scf_env%almo_update_algorithm .EQ. almo_scf_diag) THEN
3415
3416            DO ispin = 1, nspin
3417
3418               t1a = m_walltime()
3419
3420               line_search_error_threshold = almo_scf_env%real01
3421               conjugacy_error_threshold = almo_scf_env%real02
3422               quadratic_approx_error_threshold = almo_scf_env%real03
3423               x_opt_eps_adaptive_factor = almo_scf_env%real04
3424
3425               !! the outer loop for k optimization
3426               outer_opt_k_max_iter = almo_scf_env%opt_k_outer_max_iter
3427               outer_opt_k_prepare_to_exit = .FALSE.
3428               outer_opt_k_iteration = 0
3429               grad_norm = 0.0_dp
3430               grad_norm_frob = 0.0_dp
3431               CALL dbcsr_set(almo_scf_env%matrix_x(ispin), 0.0_dp)
3432               IF (almo_scf_env%deloc_truncate_virt .EQ. virt_full) outer_opt_k_max_iter = 0
3433
3434               DO
3435
3436                  ! obtain proper retained virtuals (1-R)|ALMO_vr>
3437                  CALL apply_projector(psi_in=almo_scf_env%matrix_v_blk(ispin), &
3438                                       psi_out=almo_scf_env%matrix_v(ispin), &
3439                                       psi_projector=almo_scf_env%matrix_t_blk(ispin), &
3440                                       metric=almo_scf_env%matrix_s(1), &
3441                                       project_out=.TRUE., &
3442                                       psi_projector_orthogonal=.FALSE., &
3443                                       proj_in_template=almo_scf_env%matrix_ov(ispin), &
3444                                       eps_filter=almo_scf_env%eps_filter, &
3445                                       sig_inv_projector=almo_scf_env%matrix_sigma_inv(ispin))
3446                  !sig_inv_template=almo_scf_env%matrix_sigma_inv(ispin),&
3447
3448                  ! save initial retained virtuals
3449                  CALL dbcsr_create(vr_fixed, &
3450                                    template=almo_scf_env%matrix_v(ispin))
3451                  CALL dbcsr_copy(vr_fixed, almo_scf_env%matrix_v(ispin))
3452
3453                  ! init matrices common for optimized and non-optimized virts
3454                  CALL dbcsr_create(sigma_vv_sqrt, &
3455                                    template=almo_scf_env%matrix_sigma_vv(ispin), &
3456                                    matrix_type=dbcsr_type_no_symmetry)
3457                  CALL dbcsr_create(sigma_vv_sqrt_inv, &
3458                                    template=almo_scf_env%matrix_sigma_vv(ispin), &
3459                                    matrix_type=dbcsr_type_no_symmetry)
3460                  CALL dbcsr_create(sigma_vv_sqrt_inv_guess, &
3461                                    template=almo_scf_env%matrix_sigma_vv(ispin), &
3462                                    matrix_type=dbcsr_type_no_symmetry)
3463                  CALL dbcsr_create(sigma_vv_sqrt_guess, &
3464                                    template=almo_scf_env%matrix_sigma_vv(ispin), &
3465                                    matrix_type=dbcsr_type_no_symmetry)
3466                  CALL dbcsr_set(sigma_vv_sqrt_guess, 0.0_dp)
3467                  CALL dbcsr_add_on_diag(sigma_vv_sqrt_guess, 1.0_dp)
3468                  CALL dbcsr_filter(sigma_vv_sqrt_guess, almo_scf_env%eps_filter)
3469                  CALL dbcsr_set(sigma_vv_sqrt_inv_guess, 0.0_dp)
3470                  CALL dbcsr_add_on_diag(sigma_vv_sqrt_inv_guess, 1.0_dp)
3471                  CALL dbcsr_filter(sigma_vv_sqrt_inv_guess, almo_scf_env%eps_filter)
3472
3473                  ! do things required to optimize virtuals
3474                  IF (almo_scf_env%deloc_truncate_virt .NE. virt_full) THEN
3475
3476                     ! project retained virtuals out of discarded block-by-block
3477                     ! (1-Q^VR_ALMO)|ALMO_vd>
3478                     ! this is probably not necessary, do it just to be safe
3479                     !CALL apply_projector(psi_in=almo_scf_env%matrix_v_disc_blk(ispin),&
3480                     !        psi_out=almo_scf_env%matrix_v_disc(ispin),&
3481                     !        psi_projector=almo_scf_env%matrix_v_blk(ispin),&
3482                     !        metric=almo_scf_env%matrix_s_blk(1),&
3483                     !        project_out=.TRUE.,&
3484                     !        psi_projector_orthogonal=.FALSE.,&
3485                     !        proj_in_template=almo_scf_env%matrix_k_tr(ispin),&
3486                     !        eps_filter=almo_scf_env%eps_filter,&
3487                     !        sig_inv_template=almo_scf_env%matrix_sigma_vv(ispin))
3488                     !CALL dbcsr_copy(almo_scf_env%matrix_v_disc_blk(ispin),&
3489                     !        almo_scf_env%matrix_v_disc(ispin))
3490
3491                     ! construct discarded virtuals (1-R)|ALMO_vd>
3492                     CALL apply_projector(psi_in=almo_scf_env%matrix_v_disc_blk(ispin), &
3493                                          psi_out=almo_scf_env%matrix_v_disc(ispin), &
3494                                          psi_projector=almo_scf_env%matrix_t_blk(ispin), &
3495                                          metric=almo_scf_env%matrix_s(1), &
3496                                          project_out=.TRUE., &
3497                                          psi_projector_orthogonal=.FALSE., &
3498                                          proj_in_template=almo_scf_env%matrix_ov_disc(ispin), &
3499                                          eps_filter=almo_scf_env%eps_filter, &
3500                                          sig_inv_projector=almo_scf_env%matrix_sigma_inv(ispin))
3501                     !sig_inv_template=almo_scf_env%matrix_sigma_inv(ispin),&
3502
3503                     ! save initial discarded
3504                     CALL dbcsr_create(vd_fixed, &
3505                                       template=almo_scf_env%matrix_v_disc(ispin))
3506                     CALL dbcsr_copy(vd_fixed, almo_scf_env%matrix_v_disc(ispin))
3507
3508                     !! create the down metric in the retained k-subspace
3509                     CALL dbcsr_create(k_vr_index_down, &
3510                                       template=almo_scf_env%matrix_sigma_vv_blk(ispin), &
3511                                       matrix_type=dbcsr_type_no_symmetry)
3512                     !CALL dbcsr_copy(k_vr_index_down,&
3513                     !        almo_scf_env%matrix_sigma_vv_blk(ispin))
3514
3515                     !CALL get_overlap(bra=almo_scf_env%matrix_v_blk(ispin),&
3516                     !        ket=almo_scf_env%matrix_v_blk(ispin),&
3517                     !        overlap=k_vr_index_down,&
3518                     !        metric=almo_scf_env%matrix_s_blk(1),&
3519                     !        retain_overlap_sparsity=.FALSE.,&
3520                     !        eps_filter=almo_scf_env%eps_filter)
3521
3522                     !! create the up metric in the discarded k-subspace
3523                     CALL dbcsr_create(k_vd_index_down, &
3524                                       template=almo_scf_env%matrix_vv_disc_blk(ispin), &
3525                                       matrix_type=dbcsr_type_no_symmetry)
3526                     !CALL dbcsr_init(k_vd_index_up)
3527                     !CALL dbcsr_create(k_vd_index_up,&
3528                     !        template=almo_scf_env%matrix_vv_disc_blk(ispin),&
3529                     !        matrix_type=dbcsr_type_no_symmetry)
3530                     !CALL dbcsr_copy(k_vd_index_down,&
3531                     !        almo_scf_env%matrix_vv_disc_blk(ispin))
3532
3533                     !CALL get_overlap(bra=almo_scf_env%matrix_v_disc_blk(ispin),&
3534                     !        ket=almo_scf_env%matrix_v_disc_blk(ispin),&
3535                     !        overlap=k_vd_index_down,&
3536                     !        metric=almo_scf_env%matrix_s_blk(1),&
3537                     !        retain_overlap_sparsity=.FALSE.,&
3538                     !        eps_filter=almo_scf_env%eps_filter)
3539
3540                     !IF (unit_nr>0) THEN
3541                     !   WRITE(unit_nr,*) "Inverting blocked overlap matrix of discarded virtuals"
3542                     !ENDIF
3543                     !CALL invert_Hotelling(k_vd_index_up,&
3544                     !        k_vd_index_down,&
3545                     !        almo_scf_env%eps_filter)
3546                     !IF (safe_mode) THEN
3547                     !   CALL dbcsr_init(matrix_tmp1)
3548                     !   CALL dbcsr_create(matrix_tmp1,template=k_vd_index_down,&
3549                     !                        matrix_type=dbcsr_type_no_symmetry)
3550                     !   CALL dbcsr_multiply("N","N",1.0_dp,k_vd_index_up,&
3551                     !                          k_vd_index_down,&
3552                     !                          0.0_dp, matrix_tmp1,&
3553                     !                          filter_eps=almo_scf_env%eps_filter)
3554                     !   frob_matrix_base=dbcsr_frobenius_norm(matrix_tmp1)
3555                     !   CALL dbcsr_add_on_diag(matrix_tmp1,-1.0_dp)
3556                     !   frob_matrix=dbcsr_frobenius_norm(matrix_tmp1)
3557                     !   IF (unit_nr>0) THEN
3558                     !      WRITE(unit_nr,*) "Error for (inv(SIG)*SIG-I)",&
3559                     !            frob_matrix/frob_matrix_base
3560                     !   ENDIF
3561                     !   CALL dbcsr_release(matrix_tmp1)
3562                     !ENDIF
3563
3564                     ! init matrices necessary for optimization of truncated virts
3565                     ! init blocked gradient before setting K to zero
3566                     ! otherwise the block structure might be lost
3567                     CALL dbcsr_create(grad, &
3568                                       template=almo_scf_env%matrix_k_blk(ispin))
3569                     CALL dbcsr_copy(grad, almo_scf_env%matrix_k_blk(ispin))
3570
3571                     ! init MD in the k-space
3572                     md_in_k_space = almo_scf_env%logical01
3573                     IF (md_in_k_space) THEN
3574                        CALL dbcsr_create(velocity, &
3575                                          template=almo_scf_env%matrix_k_blk(ispin))
3576                        CALL dbcsr_copy(velocity, almo_scf_env%matrix_k_blk(ispin))
3577                        CALL dbcsr_set(velocity, 0.0_dp)
3578                        time_step = almo_scf_env%opt_k_trial_step_size
3579                     ENDIF
3580
3581                     CALL dbcsr_create(prev_step, &
3582                                       template=almo_scf_env%matrix_k_blk(ispin))
3583
3584                     CALL dbcsr_create(prev_minus_prec_grad, &
3585                                       template=almo_scf_env%matrix_k_blk(ispin))
3586
3587                     ! initialize diagonal blocks of the preconditioner to 1.0_dp
3588                     CALL dbcsr_create(prec, &
3589                                       template=almo_scf_env%matrix_k_blk(ispin))
3590                     CALL dbcsr_copy(prec, almo_scf_env%matrix_k_blk(ispin))
3591                     CALL dbcsr_set(prec, 1.0_dp)
3592
3593                     ! generate initial K (extrapolate if previous values are available)
3594                     CALL dbcsr_set(almo_scf_env%matrix_k_blk(ispin), 0.0_dp)
3595                     ! matrix_k_central stores current k because matrix_k_blk is updated
3596                     ! during linear search
3597                     CALL dbcsr_create(matrix_k_central, &
3598                                       template=almo_scf_env%matrix_k_blk(ispin))
3599                     CALL dbcsr_copy(matrix_k_central, &
3600                                     almo_scf_env%matrix_k_blk(ispin))
3601                     CALL dbcsr_create(tmp_k_blk, &
3602                                       template=almo_scf_env%matrix_k_blk(ispin))
3603                     CALL dbcsr_create(step, &
3604                                       template=almo_scf_env%matrix_k_blk(ispin))
3605                     CALL dbcsr_set(step, 0.0_dp)
3606                     CALL dbcsr_create(t_curr, &
3607                                       template=almo_scf_env%matrix_t(ispin))
3608                     CALL dbcsr_create(sigma_oo_curr, &
3609                                       template=almo_scf_env%matrix_sigma(ispin), &
3610                                       matrix_type=dbcsr_type_no_symmetry)
3611                     CALL dbcsr_create(sigma_oo_curr_inv, &
3612                                       template=almo_scf_env%matrix_sigma(ispin), &
3613                                       matrix_type=dbcsr_type_no_symmetry)
3614                     CALL dbcsr_create(tmp1_n_vr, &
3615                                       template=almo_scf_env%matrix_v(ispin))
3616                     CALL dbcsr_create(tmp3_vd_vr, &
3617                                       template=almo_scf_env%matrix_k_blk(ispin))
3618                     CALL dbcsr_create(tmp2_n_o, &
3619                                       template=almo_scf_env%matrix_t(ispin))
3620                     CALL dbcsr_create(tmp4_o_vr, &
3621                                       template=almo_scf_env%matrix_ov(ispin))
3622                     CALL dbcsr_create(prev_grad, &
3623                                       template=almo_scf_env%matrix_k_blk(ispin))
3624                     CALL dbcsr_set(prev_grad, 0.0_dp)
3625
3626                     !CALL dbcsr_init(sigma_oo_guess)
3627                     !CALL dbcsr_create(sigma_oo_guess,&
3628                     !        template=almo_scf_env%matrix_sigma(ispin),&
3629                     !        matrix_type=dbcsr_type_no_symmetry)
3630                     !CALL dbcsr_set(sigma_oo_guess,0.0_dp)
3631                     !CALL dbcsr_add_on_diag(sigma_oo_guess,1.0_dp)
3632                     !CALL dbcsr_filter(sigma_oo_guess,almo_scf_env%eps_filter)
3633                     !CALL dbcsr_print(sigma_oo_guess)
3634
3635                  ENDIF ! done constructing discarded virtuals
3636
3637                  ! init variables
3638                  opt_k_max_iter = almo_scf_env%opt_k_max_iter
3639                  iteration = 0
3640                  converged = .FALSE.
3641                  prepare_to_exit = .FALSE.
3642                  beta = 0.0_dp
3643                  line_search = .FALSE.
3644                  obj_function = 0.0_dp
3645                  conjugacy_error = 0.0_dp
3646                  line_search_error = 0.0_dp
3647                  fun0 = 0.0_dp
3648                  fun1 = 0.0_dp
3649                  gfun0 = 0.0_dp
3650                  gfun1 = 0.0_dp
3651                  step_size_quadratic_approx = 0.0_dp
3652                  reset_step_size = .TRUE.
3653                  IF (almo_scf_env%deloc_truncate_virt .EQ. virt_full) opt_k_max_iter = 0
3654
3655                  ! start cg iterations to optimize matrix_k_blk
3656                  DO
3657
3658                     CALL timeset('k_opt_vr', handle1)
3659
3660                     IF (almo_scf_env%deloc_truncate_virt .NE. virt_full) THEN
3661
3662                        ! construct k-excited virtuals
3663                        CALL dbcsr_multiply("N", "N", 1.0_dp, vd_fixed, &
3664                                            almo_scf_env%matrix_k_blk(ispin), &
3665                                            0.0_dp, almo_scf_env%matrix_v(ispin), &
3666                                            filter_eps=almo_scf_env%eps_filter)
3667                        CALL dbcsr_add(almo_scf_env%matrix_v(ispin), vr_fixed, &
3668                                       +1.0_dp, +1.0_dp)
3669                     ENDIF
3670
3671                     ! decompose the overlap matrix of the current retained orbitals
3672                     !IF (unit_nr>0) THEN
3673                     !   WRITE(unit_nr,*) "decompose the active VV overlap matrix"
3674                     !ENDIF
3675                     CALL get_overlap(bra=almo_scf_env%matrix_v(ispin), &
3676                                      ket=almo_scf_env%matrix_v(ispin), &
3677                                      overlap=almo_scf_env%matrix_sigma_vv(ispin), &
3678                                      metric=almo_scf_env%matrix_s(1), &
3679                                      retain_overlap_sparsity=.FALSE., &
3680                                      eps_filter=almo_scf_env%eps_filter)
3681                     ! use either cholesky or sqrt
3682                     !! RZK-warning: strangely, cholesky does not work with k-optimization
3683                     IF (almo_scf_env%deloc_truncate_virt .EQ. virt_full) THEN
3684                        CALL timeset('cholesky', handle2)
3685                        t1cholesky = m_walltime()
3686
3687                        ! re-create sigma_vv_sqrt because desymmetrize is buggy -
3688                        ! it will create multiple copies of blocks
3689                        CALL dbcsr_create(sigma_vv_sqrt, &
3690                                          template=almo_scf_env%matrix_sigma_vv(ispin), &
3691                                          matrix_type=dbcsr_type_no_symmetry)
3692                        CALL dbcsr_desymmetrize(almo_scf_env%matrix_sigma_vv(ispin), &
3693                                                sigma_vv_sqrt)
3694                        CALL cp_dbcsr_cholesky_decompose(sigma_vv_sqrt, &
3695                                                         para_env=almo_scf_env%para_env, &
3696                                                         blacs_env=almo_scf_env%blacs_env)
3697                        CALL dbcsr_triu(sigma_vv_sqrt)
3698                        CALL dbcsr_filter(sigma_vv_sqrt, almo_scf_env%eps_filter)
3699                        ! apply SOLVE to compute U^(-1) : U*U^(-1)=I
3700                        CALL dbcsr_get_info(sigma_vv_sqrt, nfullrows_total=n)
3701                        CALL dbcsr_create(matrix_tmp1, template=almo_scf_env%matrix_sigma_vv(ispin), &
3702                                          matrix_type=dbcsr_type_no_symmetry)
3703                        CALL dbcsr_set(matrix_tmp1, 0.0_dp)
3704                        CALL dbcsr_add_on_diag(matrix_tmp1, 1.0_dp)
3705                        CALL cp_dbcsr_cholesky_restore(matrix_tmp1, n, sigma_vv_sqrt, &
3706                                                       sigma_vv_sqrt_inv, op="SOLVE", pos="RIGHT", &
3707                                                       para_env=almo_scf_env%para_env, &
3708                                                       blacs_env=almo_scf_env%blacs_env)
3709                        CALL dbcsr_filter(sigma_vv_sqrt_inv, almo_scf_env%eps_filter)
3710                        CALL dbcsr_release(matrix_tmp1)
3711                        IF (safe_mode) THEN
3712                           CALL dbcsr_create(matrix_tmp1, template=almo_scf_env%matrix_sigma_vv(ispin), &
3713                                             matrix_type=dbcsr_type_no_symmetry)
3714                           CALL dbcsr_desymmetrize(almo_scf_env%matrix_sigma_vv(ispin), &
3715                                                   matrix_tmp1)
3716                           CALL dbcsr_multiply("T", "N", 1.0_dp, sigma_vv_sqrt, &
3717                                               sigma_vv_sqrt, &
3718                                               -1.0_dp, matrix_tmp1, filter_eps=almo_scf_env%eps_filter)
3719                           frob_matrix = dbcsr_frobenius_norm(matrix_tmp1)
3720                           CALL dbcsr_add_on_diag(matrix_tmp1, 1.0_dp)
3721                           frob_matrix_base = dbcsr_frobenius_norm(matrix_tmp1)
3722                           IF (unit_nr > 0) THEN
3723                              WRITE (unit_nr, *) "Error for ( U^T * U - Sig )", &
3724                                 frob_matrix/frob_matrix_base
3725                           ENDIF
3726                           CALL dbcsr_multiply("N", "N", 1.0_dp, sigma_vv_sqrt_inv, &
3727                                               sigma_vv_sqrt, &
3728                                               0.0_dp, matrix_tmp1, filter_eps=almo_scf_env%eps_filter)
3729                           frob_matrix_base = dbcsr_frobenius_norm(matrix_tmp1)
3730                           CALL dbcsr_add_on_diag(matrix_tmp1, -1.0_dp)
3731                           frob_matrix = dbcsr_frobenius_norm(matrix_tmp1)
3732                           IF (unit_nr > 0) THEN
3733                              WRITE (unit_nr, *) "Error for ( inv(U) * U - I )", &
3734                                 frob_matrix/frob_matrix_base
3735                           ENDIF
3736                           CALL dbcsr_release(matrix_tmp1)
3737                        ENDIF ! safe_mode
3738                        t2cholesky = m_walltime()
3739                        IF (unit_nr > 0) THEN
3740                           WRITE (unit_nr, *) "Cholesky+inverse wall-time: ", t2cholesky - t1cholesky
3741                        ENDIF
3742                        CALL timestop(handle2)
3743                     ELSE
3744                        CALL matrix_sqrt_Newton_Schulz(sigma_vv_sqrt, &
3745                                                       sigma_vv_sqrt_inv, &
3746                                                       almo_scf_env%matrix_sigma_vv(ispin), &
3747                                                       !matrix_sqrt_inv_guess=sigma_vv_sqrt_inv_guess,&
3748                                                       !matrix_sqrt_guess=sigma_vv_sqrt_guess,&
3749                                                       threshold=almo_scf_env%eps_filter, &
3750                                                       order=almo_scf_env%order_lanczos, &
3751                                                       eps_lanczos=almo_scf_env%eps_lanczos, &
3752                                                       max_iter_lanczos=almo_scf_env%max_iter_lanczos)
3753                        CALL dbcsr_copy(sigma_vv_sqrt_inv_guess, sigma_vv_sqrt_inv)
3754                        CALL dbcsr_copy(sigma_vv_sqrt_guess, sigma_vv_sqrt)
3755                        IF (safe_mode) THEN
3756                           CALL dbcsr_create(matrix_tmp1, template=almo_scf_env%matrix_sigma_vv(ispin), &
3757                                             matrix_type=dbcsr_type_no_symmetry)
3758                           CALL dbcsr_create(matrix_tmp2, template=almo_scf_env%matrix_sigma_vv(ispin), &
3759                                             matrix_type=dbcsr_type_no_symmetry)
3760
3761                           CALL dbcsr_multiply("N", "N", 1.0_dp, sigma_vv_sqrt_inv, &
3762                                               almo_scf_env%matrix_sigma_vv(ispin), &
3763                                               0.0_dp, matrix_tmp1, filter_eps=almo_scf_env%eps_filter)
3764                           CALL dbcsr_multiply("N", "N", 1.0_dp, matrix_tmp1, &
3765                                               sigma_vv_sqrt_inv, &
3766                                               0.0_dp, matrix_tmp2, filter_eps=almo_scf_env%eps_filter)
3767
3768                           frob_matrix_base = dbcsr_frobenius_norm(matrix_tmp2)
3769                           CALL dbcsr_add_on_diag(matrix_tmp2, -1.0_dp)
3770                           frob_matrix = dbcsr_frobenius_norm(matrix_tmp2)
3771                           IF (unit_nr > 0) THEN
3772                              WRITE (unit_nr, *) "Error for (inv(sqrt(SIGVV))*SIGVV*inv(sqrt(SIGVV))-I)", &
3773                                 frob_matrix/frob_matrix_base
3774                           ENDIF
3775
3776                           CALL dbcsr_release(matrix_tmp1)
3777                           CALL dbcsr_release(matrix_tmp2)
3778                        ENDIF
3779                     ENDIF
3780                     CALL timestop(handle1)
3781
3782                     ! compute excitation amplitudes (to the current set of retained virtuals)
3783                     ! set convergence criterion for x-optimization
3784                     IF ((iteration .EQ. 0) .AND. (.NOT. line_search) .AND. &
3785                         (outer_opt_k_iteration .EQ. 0)) THEN
3786                        x_opt_eps_adaptive = &
3787                           almo_scf_env%deloc_cayley_eps_convergence
3788                     ELSE
3789                        x_opt_eps_adaptive = &
3790                           MAX(ABS(almo_scf_env%deloc_cayley_eps_convergence), &
3791                               ABS(x_opt_eps_adaptive_factor*grad_norm))
3792                     ENDIF
3793                     CALL ct_step_env_init(ct_step_env)
3794                     CALL ct_step_env_set(ct_step_env, &
3795                                          para_env=almo_scf_env%para_env, &
3796                                          blacs_env=almo_scf_env%blacs_env, &
3797                                          use_occ_orbs=.TRUE., &
3798                                          use_virt_orbs=.TRUE., &
3799                                          occ_orbs_orthogonal=.FALSE., &
3800                                          virt_orbs_orthogonal=.FALSE., &
3801                                          pp_preconditioner_full=almo_scf_env%deloc_cayley_occ_precond, &
3802                                          qq_preconditioner_full=almo_scf_env%deloc_cayley_vir_precond, &
3803                                          tensor_type=almo_scf_env%deloc_cayley_tensor_type, &
3804                                          neglect_quadratic_term=almo_scf_env%deloc_cayley_linear, &
3805                                          conjugator=almo_scf_env%deloc_cayley_conjugator, &
3806                                          max_iter=almo_scf_env%deloc_cayley_max_iter, &
3807                                          calculate_energy_corr=.TRUE., &
3808                                          update_p=.FALSE., &
3809                                          update_q=.FALSE., &
3810                                          eps_convergence=x_opt_eps_adaptive, &
3811                                          eps_filter=almo_scf_env%eps_filter, &
3812                                          !nspins=1,&
3813                                          q_index_up=sigma_vv_sqrt_inv, &
3814                                          q_index_down=sigma_vv_sqrt, &
3815                                          p_index_up=almo_scf_env%matrix_sigma_sqrt_inv(ispin), &
3816                                          p_index_down=almo_scf_env%matrix_sigma_sqrt(ispin), &
3817                                          matrix_ks=almo_scf_env%matrix_ks_0deloc(ispin), &
3818                                          matrix_t=almo_scf_env%matrix_t(ispin), &
3819                                          matrix_qp_template=almo_scf_env%matrix_vo(ispin), &
3820                                          matrix_pq_template=almo_scf_env%matrix_ov(ispin), &
3821                                          matrix_v=almo_scf_env%matrix_v(ispin), &
3822                                          matrix_x_guess=almo_scf_env%matrix_x(ispin))
3823                     ! perform calculations
3824                     CALL ct_step_execute(ct_step_env)
3825                     ! get the energy correction
3826                     CALL ct_step_env_get(ct_step_env, &
3827                                          energy_correction=energy_correction(ispin), &
3828                                          copy_matrix_x=almo_scf_env%matrix_x(ispin))
3829                     CALL ct_step_env_clean(ct_step_env)
3830                     ! RZK-warning matrix_x is being transformed
3831                     ! back and forth between orth and up_down representations
3832                     energy_correction(1) = energy_correction(1)*spin_factor
3833
3834                     IF (opt_k_max_iter .NE. 0) THEN
3835
3836                        CALL timeset('k_opt_t_curr', handle3)
3837
3838                        ! construct current occupied orbitals T_blk + V_r*X
3839                        CALL dbcsr_multiply("N", "N", 1.0_dp, &
3840                                            almo_scf_env%matrix_v(ispin), &
3841                                            almo_scf_env%matrix_x(ispin), &
3842                                            0.0_dp, t_curr, &
3843                                            filter_eps=almo_scf_env%eps_filter)
3844                        CALL dbcsr_add(t_curr, almo_scf_env%matrix_t_blk(ispin), &
3845                                       +1.0_dp, +1.0_dp)
3846
3847                        ! calculate current occupied overlap
3848                        !IF (unit_nr>0) THEN
3849                        !   WRITE(unit_nr,*) "Inverting current occ overlap matrix"
3850                        !ENDIF
3851                        CALL get_overlap(bra=t_curr, &
3852                                         ket=t_curr, &
3853                                         overlap=sigma_oo_curr, &
3854                                         metric=almo_scf_env%matrix_s(1), &
3855                                         retain_overlap_sparsity=.FALSE., &
3856                                         eps_filter=almo_scf_env%eps_filter)
3857                        IF (iteration .EQ. 0) THEN
3858                           CALL invert_Hotelling(sigma_oo_curr_inv, &
3859                                                 sigma_oo_curr, &
3860                                                 threshold=almo_scf_env%eps_filter, &
3861                                                 use_inv_as_guess=.FALSE.)
3862                        ELSE
3863                           CALL invert_Hotelling(sigma_oo_curr_inv, &
3864                                                 sigma_oo_curr, &
3865                                                 threshold=almo_scf_env%eps_filter, &
3866                                                 use_inv_as_guess=.TRUE.)
3867                           !CALL dbcsr_copy(sigma_oo_guess,sigma_oo_curr_inv)
3868                        ENDIF
3869                        IF (safe_mode) THEN
3870                           CALL dbcsr_create(matrix_tmp1, template=sigma_oo_curr, &
3871                                             matrix_type=dbcsr_type_no_symmetry)
3872                           CALL dbcsr_multiply("N", "N", 1.0_dp, sigma_oo_curr, &
3873                                               sigma_oo_curr_inv, &
3874                                               0.0_dp, matrix_tmp1, &
3875                                               filter_eps=almo_scf_env%eps_filter)
3876                           frob_matrix_base = dbcsr_frobenius_norm(matrix_tmp1)
3877                           CALL dbcsr_add_on_diag(matrix_tmp1, -1.0_dp)
3878                           frob_matrix = dbcsr_frobenius_norm(matrix_tmp1)
3879                           !CALL dbcsr_filter(matrix_tmp1,almo_scf_env%eps_filter)
3880                           !CALL dbcsr_print(matrix_tmp1)
3881                           IF (unit_nr > 0) THEN
3882                              WRITE (unit_nr, *) "Error for (SIG*inv(SIG)-I)", &
3883                                 frob_matrix/frob_matrix_base, frob_matrix_base
3884                           ENDIF
3885                           CALL dbcsr_release(matrix_tmp1)
3886                        ENDIF
3887                        IF (safe_mode) THEN
3888                           CALL dbcsr_create(matrix_tmp1, template=sigma_oo_curr, &
3889                                             matrix_type=dbcsr_type_no_symmetry)
3890                           CALL dbcsr_multiply("N", "N", 1.0_dp, sigma_oo_curr_inv, &
3891                                               sigma_oo_curr, &
3892                                               0.0_dp, matrix_tmp1, &
3893                                               filter_eps=almo_scf_env%eps_filter)
3894                           frob_matrix_base = dbcsr_frobenius_norm(matrix_tmp1)
3895                           CALL dbcsr_add_on_diag(matrix_tmp1, -1.0_dp)
3896                           frob_matrix = dbcsr_frobenius_norm(matrix_tmp1)
3897                           !CALL dbcsr_filter(matrix_tmp1,almo_scf_env%eps_filter)
3898                           !CALL dbcsr_print(matrix_tmp1)
3899                           IF (unit_nr > 0) THEN
3900                              WRITE (unit_nr, *) "Error for (inv(SIG)*SIG-I)", &
3901                                 frob_matrix/frob_matrix_base, frob_matrix_base
3902                           ENDIF
3903                           CALL dbcsr_release(matrix_tmp1)
3904                        ENDIF
3905
3906                        CALL timestop(handle3)
3907                        CALL timeset('k_opt_vd', handle4)
3908
3909                        ! construct current discarded virtuals:
3910                        ! (1-R_curr)(1-Q^VR_curr)|ALMO_vd_basis> =
3911                        ! = (1-Q^VR_curr)|ALMO_vd_basis>
3912                        ! use sigma_vv_sqrt to store the inverse of the overlap
3913                        ! sigma_vv_inv is computed from sqrt/cholesky
3914                        CALL dbcsr_multiply("N", "T", 1.0_dp, &
3915                                            sigma_vv_sqrt_inv, &
3916                                            sigma_vv_sqrt_inv, &
3917                                            0.0_dp, sigma_vv_sqrt, &
3918                                            filter_eps=almo_scf_env%eps_filter)
3919                        CALL apply_projector(psi_in=almo_scf_env%matrix_v_disc_blk(ispin), &
3920                                             psi_out=almo_scf_env%matrix_v_disc(ispin), &
3921                                             psi_projector=almo_scf_env%matrix_v(ispin), &
3922                                             metric=almo_scf_env%matrix_s(1), &
3923                                             project_out=.FALSE., &
3924                                             psi_projector_orthogonal=.FALSE., &
3925                                             proj_in_template=almo_scf_env%matrix_k_tr(ispin), &
3926                                             eps_filter=almo_scf_env%eps_filter, &
3927                                             sig_inv_projector=sigma_vv_sqrt)
3928                        !sig_inv_template=almo_scf_env%matrix_sigma_vv(ispin),&
3929                        CALL dbcsr_add(almo_scf_env%matrix_v_disc(ispin), &
3930                                       vd_fixed, -1.0_dp, +1.0_dp)
3931
3932                        CALL timestop(handle4)
3933                        CALL timeset('k_opt_grad', handle5)
3934
3935                        ! evaluate the gradient from the assembled components
3936                        ! grad_xx = c0 [ (Vd_curr^tr)*F*T_curr*sigma_oo_curr_inv*(X^tr)]_xx
3937                        ! save previous gradient to calculate conjugation coef
3938                        IF (line_search) THEN
3939                           CALL dbcsr_copy(prev_grad, grad)
3940                        ENDIF
3941                        CALL dbcsr_multiply("N", "N", 1.0_dp, &
3942                                            almo_scf_env%matrix_ks_0deloc(ispin), &
3943                                            t_curr, &
3944                                            0.0_dp, tmp2_n_o, &
3945                                            filter_eps=almo_scf_env%eps_filter)
3946                        CALL dbcsr_multiply("N", "T", 1.0_dp, &
3947                                            sigma_oo_curr_inv, &
3948                                            almo_scf_env%matrix_x(ispin), &
3949                                            0.0_dp, tmp4_o_vr, &
3950                                            filter_eps=almo_scf_env%eps_filter)
3951                        CALL dbcsr_multiply("N", "N", 1.0_dp, &
3952                                            tmp2_n_o, &
3953                                            tmp4_o_vr, &
3954                                            0.0_dp, tmp1_n_vr, &
3955                                            filter_eps=almo_scf_env%eps_filter)
3956                        CALL dbcsr_multiply("T", "N", 2.0_dp*spin_factor, &
3957                                            almo_scf_env%matrix_v_disc(ispin), &
3958                                            tmp1_n_vr, &
3959                                            0.0_dp, grad, &
3960                                            retain_sparsity=.TRUE.)
3961                        !filter_eps=almo_scf_env%eps_filter,&
3962                        ! keep tmp2_n_o for the next step
3963                        ! keep tmp4_o_vr for the preconditioner
3964
3965                        ! check convergence and other exit criteria
3966                        grad_norm_frob = dbcsr_frobenius_norm(grad)
3967                        CALL dbcsr_norm(grad, dbcsr_norm_maxabsnorm, norm_scalar=grad_norm)
3968                        converged = (grad_norm .LT. almo_scf_env%opt_k_eps_convergence)
3969                        IF (converged .OR. (iteration .GE. opt_k_max_iter)) THEN
3970                           prepare_to_exit = .TRUE.
3971                        ENDIF
3972                        CALL timestop(handle5)
3973
3974                        IF (.NOT. prepare_to_exit) THEN
3975
3976                           CALL timeset('k_opt_energy', handle6)
3977
3978                           ! compute "energy" c0*Tr[sig_inv_oo*t*F*t]
3979                           CALL dbcsr_multiply("T", "N", spin_factor, &
3980                                               t_curr, &
3981                                               tmp2_n_o, &
3982                                               0.0_dp, sigma_oo_curr, &
3983                                               filter_eps=almo_scf_env%eps_filter)
3984                           delta_obj_function = fun0
3985                           CALL dbcsr_dot(sigma_oo_curr_inv, sigma_oo_curr, obj_function)
3986                           delta_obj_function = obj_function - delta_obj_function
3987                           IF (line_search) THEN
3988                              fun1 = obj_function
3989                           ELSE
3990                              fun0 = obj_function
3991                           ENDIF
3992
3993                           CALL timestop(handle6)
3994
3995                           ! update the step direction
3996                           IF (.NOT. line_search) THEN
3997
3998                              CALL timeset('k_opt_step', handle7)
3999
4000                              IF ((.NOT. md_in_k_space) .AND. &
4001                                  (iteration .GE. MAX(0, almo_scf_env%opt_k_prec_iter_start) .AND. &
4002                                   MOD(iteration - almo_scf_env%opt_k_prec_iter_start, &
4003                                       almo_scf_env%opt_k_prec_iter_freq) .EQ. 0)) THEN
4004
4005                                 !IF ((iteration.eq.0).AND.(.NOT.md_in_k_space)) THEN
4006
4007                                 ! compute the preconditioner
4008                                 IF (unit_nr > 0) THEN
4009                                    WRITE (unit_nr, *) "Computing preconditioner"
4010                                 ENDIF
4011                                 !CALL opt_k_create_preconditioner(prec,&
4012                                 !        almo_scf_env%matrix_v_disc(ispin),&
4013                                 !        almo_scf_env%matrix_ks_0deloc(ispin),&
4014                                 !        almo_scf_env%matrix_x(ispin),&
4015                                 !        tmp4_o_vr,&
4016                                 !        almo_scf_env%matrix_s(1),&
4017                                 !        grad,&
4018                                 !        !almo_scf_env%matrix_v_disc_blk(ispin),&
4019                                 !        vd_fixed,&
4020                                 !        t_curr,&
4021                                 !        k_vd_index_up,&
4022                                 !        k_vr_index_down,&
4023                                 !        tmp1_n_vr,&
4024                                 !        spin_factor,&
4025                                 !        almo_scf_env%eps_filter)
4026                                 CALL opt_k_create_preconditioner_blk(almo_scf_env, &
4027                                                                      almo_scf_env%matrix_v_disc(ispin), &
4028                                                                      tmp4_o_vr, &
4029                                                                      t_curr, &
4030                                                                      ispin, &
4031                                                                      spin_factor)
4032
4033                              ENDIF
4034
4035                              ! save the previous step
4036                              CALL dbcsr_copy(prev_step, step)
4037
4038                              ! compute the new step
4039                              CALL opt_k_apply_preconditioner_blk(almo_scf_env, &
4040                                                                  step, grad, ispin)
4041                              !CALL dbcsr_hadamard_product(prec,grad,step)
4042                              CALL dbcsr_scale(step, -1.0_dp)
4043
4044                              ! check whether we need to reset conjugate directions
4045                              reset_conjugator = .FALSE.
4046                              ! first check if manual reset is active
4047                              IF (iteration .LT. MAX(almo_scf_env%opt_k_conj_iter_start, 1) .OR. &
4048                                  MOD(iteration - almo_scf_env%opt_k_conj_iter_start, &
4049                                      almo_scf_env%opt_k_conj_iter_freq) .EQ. 0) THEN
4050
4051                                 reset_conjugator = .TRUE.
4052
4053                              ELSE
4054
4055                                 ! check for the errors in the cg algorithm
4056                                 !CALL dbcsr_hadamard_product(prec,prev_grad,tmp_k_blk)
4057                                 !CALL dbcsr_dot(grad,tmp_k_blk,numer)
4058                                 !CALL dbcsr_dot(prev_grad,tmp_k_blk,denom)
4059                                 CALL dbcsr_dot(grad, prev_minus_prec_grad, numer)
4060                                 CALL dbcsr_dot(prev_grad, prev_minus_prec_grad, denom)
4061                                 conjugacy_error = numer/denom
4062
4063                                 IF (conjugacy_error .GT. MIN(0.5_dp, conjugacy_error_threshold)) THEN
4064                                    reset_conjugator = .TRUE.
4065                                    IF (unit_nr > 0) THEN
4066                                       WRITE (unit_nr, *) "Lack of progress, conjugacy error is ", conjugacy_error
4067                                    ENDIF
4068                                 ENDIF
4069
4070                                 ! check the gradient along the previous direction
4071                                 IF ((iteration .NE. 0) .AND. (.NOT. reset_conjugator)) THEN
4072                                    CALL dbcsr_dot(grad, prev_step, numer)
4073                                    CALL dbcsr_dot(prev_grad, prev_step, denom)
4074                                    line_search_error = numer/denom
4075                                    IF (line_search_error .GT. line_search_error_threshold) THEN
4076                                       reset_conjugator = .TRUE.
4077                                       IF (unit_nr > 0) THEN
4078                                          WRITE (unit_nr, *) "Bad line search, line search error is ", line_search_error
4079                                       ENDIF
4080                                    ENDIF
4081                                 ENDIF
4082
4083                              ENDIF
4084
4085                              ! compute the conjugation coefficient - beta
4086                              IF (.NOT. reset_conjugator) THEN
4087
4088                                 SELECT CASE (almo_scf_env%opt_k_conjugator)
4089                                 CASE (cg_hestenes_stiefel)
4090                                    CALL dbcsr_copy(tmp_k_blk, grad)
4091                                    CALL dbcsr_add(tmp_k_blk, prev_grad, 1.0_dp, -1.0_dp)
4092                                    CALL dbcsr_dot(tmp_k_blk, step, numer)
4093                                    CALL dbcsr_dot(tmp_k_blk, prev_step, denom)
4094                                    beta = -1.0_dp*numer/denom
4095                                 CASE (cg_fletcher_reeves)
4096                                    !CALL dbcsr_hadamard_product(prec,prev_grad,tmp_k_blk)
4097                                    !CALL dbcsr_dot(prev_grad,tmp_k_blk,denom)
4098                                    !CALL dbcsr_hadamard_product(prec,grad,tmp_k_blk)
4099                                    !CALL dbcsr_dot(grad,tmp_k_blk,numer)
4100                                    !beta=numer/denom
4101                                    CALL dbcsr_dot(grad, step, numer)
4102                                    CALL dbcsr_dot(prev_grad, prev_minus_prec_grad, denom)
4103                                    beta = numer/denom
4104                                 CASE (cg_polak_ribiere)
4105                                    !CALL dbcsr_hadamard_product(prec,prev_grad,tmp_k_blk)
4106                                    !CALL dbcsr_dot(prev_grad,tmp_k_blk,denom)
4107                                    !CALL dbcsr_add(prev_grad,grad,-1.0_dp,1.0_dp)
4108                                    !CALL dbcsr_hadamard_product(prec,prev_grad,tmp_k_blk)
4109                                    !CALL dbcsr_dot(tmp_k_blk,grad,numer)
4110                                    CALL dbcsr_dot(prev_grad, prev_minus_prec_grad, denom)
4111                                    CALL dbcsr_copy(tmp_k_blk, grad)
4112                                    CALL dbcsr_add(tmp_k_blk, prev_grad, 1.0_dp, -1.0_dp)
4113                                    CALL dbcsr_dot(tmp_k_blk, step, numer)
4114                                    beta = numer/denom
4115                                 CASE (cg_fletcher)
4116                                    !CALL dbcsr_hadamard_product(prec,grad,tmp_k_blk)
4117                                    !CALL dbcsr_dot(grad,tmp_k_blk,numer)
4118                                    !CALL dbcsr_dot(prev_grad,prev_step,denom)
4119                                    !beta=-1.0_dp*numer/denom
4120                                    CALL dbcsr_dot(grad, step, numer)
4121                                    CALL dbcsr_dot(prev_grad, prev_step, denom)
4122                                    beta = numer/denom
4123                                 CASE (cg_liu_storey)
4124                                    CALL dbcsr_dot(prev_grad, prev_step, denom)
4125                                    !CALL dbcsr_add(prev_grad,grad,-1.0_dp,1.0_dp)
4126                                    !CALL dbcsr_hadamard_product(prec,prev_grad,tmp_k_blk)
4127                                    !CALL dbcsr_dot(tmp_k_blk,grad,numer)
4128                                    CALL dbcsr_copy(tmp_k_blk, grad)
4129                                    CALL dbcsr_add(tmp_k_blk, prev_grad, 1.0_dp, -1.0_dp)
4130                                    CALL dbcsr_dot(tmp_k_blk, step, numer)
4131                                    beta = numer/denom
4132                                 CASE (cg_dai_yuan)
4133                                    !CALL dbcsr_hadamard_product(prec,grad,tmp_k_blk)
4134                                    !CALL dbcsr_dot(grad,tmp_k_blk,numer)
4135                                    !CALL dbcsr_add(prev_grad,grad,-1.0_dp,1.0_dp)
4136                                    !CALL dbcsr_dot(prev_grad,prev_step,denom)
4137                                    !beta=numer/denom
4138                                    CALL dbcsr_dot(grad, step, numer)
4139                                    CALL dbcsr_copy(tmp_k_blk, grad)
4140                                    CALL dbcsr_add(tmp_k_blk, prev_grad, 1.0_dp, -1.0_dp)
4141                                    CALL dbcsr_dot(tmp_k_blk, prev_step, denom)
4142                                    beta = -1.0_dp*numer/denom
4143                                 CASE (cg_hager_zhang)
4144                                    !CALL dbcsr_add(prev_grad,grad,-1.0_dp,1.0_dp)
4145                                    !CALL dbcsr_dot(prev_grad,prev_step,denom)
4146                                    !CALL dbcsr_hadamard_product(prec,prev_grad,tmp_k_blk)
4147                                    !CALL dbcsr_dot(tmp_k_blk,prev_grad,numer)
4148                                    !kappa=2.0_dp*numer/denom
4149                                    !CALL dbcsr_dot(tmp_k_blk,grad,numer)
4150                                    !tau=numer/denom
4151                                    !CALL dbcsr_dot(prev_step,grad,numer)
4152                                    !beta=tau-kappa*numer/denom
4153                                    CALL dbcsr_copy(tmp_k_blk, grad)
4154                                    CALL dbcsr_add(tmp_k_blk, prev_grad, 1.0_dp, -1.0_dp)
4155                                    CALL dbcsr_dot(tmp_k_blk, prev_step, denom)
4156                                    CALL dbcsr_dot(tmp_k_blk, prev_minus_prec_grad, numer)
4157                                    kappa = -2.0_dp*numer/denom
4158                                    CALL dbcsr_dot(tmp_k_blk, step, numer)
4159                                    tau = -1.0_dp*numer/denom
4160                                    CALL dbcsr_dot(prev_step, grad, numer)
4161                                    beta = tau - kappa*numer/denom
4162                                 CASE (cg_zero)
4163                                    beta = 0.0_dp
4164                                 CASE DEFAULT
4165                                    CPABORT("illegal conjugator")
4166                                 END SELECT
4167
4168                                 IF (beta .LT. 0.0_dp) THEN
4169                                    IF (unit_nr > 0) THEN
4170                                       WRITE (unit_nr, *) "Beta is negative, ", beta
4171                                    ENDIF
4172                                    reset_conjugator = .TRUE.
4173                                 ENDIF
4174
4175                              ENDIF
4176
4177                              IF (md_in_k_space) THEN
4178                                 reset_conjugator = .TRUE.
4179                              ENDIF
4180
4181                              IF (reset_conjugator) THEN
4182
4183                                 beta = 0.0_dp
4184                                 !reset_step_size=.TRUE.
4185
4186                                 IF (unit_nr > 0) THEN
4187                                    WRITE (unit_nr, *) "(Re)-setting conjugator to zero"
4188                                 ENDIF
4189
4190                              ENDIF
4191
4192                              ! save the preconditioned gradient
4193                              CALL dbcsr_copy(prev_minus_prec_grad, step)
4194
4195                              ! conjugate the step direction
4196                              CALL dbcsr_add(step, prev_step, 1.0_dp, beta)
4197
4198                              CALL timestop(handle7)
4199
4200                              ! update the step direction
4201                           ELSE ! step update
4202                              conjugacy_error = 0.0_dp
4203                           ENDIF
4204
4205                           ! compute the gradient with respect to the step size in the curr direction
4206                           IF (line_search) THEN
4207                              CALL dbcsr_dot(grad, step, gfun1)
4208                              line_search_error = gfun1/gfun0
4209                           ELSE
4210                              CALL dbcsr_dot(grad, step, gfun0)
4211                           ENDIF
4212
4213                           ! make a step - update k
4214                           IF (line_search) THEN
4215
4216                              ! check if the trial step provides enough numerical accuracy
4217                              safety_multiplier = 1.0E+1_dp ! must be more than one
4218                              num_threshold = MAX(EPSILON(1.0_dp), &
4219                                                  safety_multiplier*(almo_scf_env%eps_filter**2)*almo_scf_env%ndomains)
4220                              IF (ABS(fun1 - fun0 - gfun0*step_size) .LT. num_threshold) THEN
4221                                 IF (unit_nr > 0) THEN
4222                                    WRITE (unit_nr, '(T3,A,1X,E17.7)') &
4223                                       "Numerical accuracy is too low to observe non-linear behavior", &
4224                                       ABS(fun1 - fun0 - gfun0*step_size)
4225                                    WRITE (unit_nr, '(T3,A,1X,E17.7,A,1X,E12.3)') "Error computing ", &
4226                                       ABS(gfun0), &
4227                                       " is smaller than the threshold", num_threshold
4228                                 ENDIF
4229                                 CPABORT("")
4230                              ENDIF
4231                              IF (ABS(gfun0) .LT. num_threshold) THEN
4232                                 IF (unit_nr > 0) THEN
4233                                    WRITE (unit_nr, '(T3,A,1X,E17.7,A,1X,E12.3)') "Linear gradient", &
4234                                       ABS(gfun0), &
4235                                       " is smaller than the threshold", num_threshold
4236                                 ENDIF
4237                                 CPABORT("")
4238                              ENDIF
4239
4240                              use_quadratic_approximation = .TRUE.
4241                              use_cubic_approximation = .FALSE.
4242
4243                              ! find the minimum assuming quadratic form
4244                              ! use f0, f1, g0
4245                              step_size_quadratic_approx = -(gfun0*step_size*step_size)/(2.0_dp*(fun1 - fun0 - gfun0*step_size))
4246                              ! use f0, f1, g1
4247                             step_size_quadratic_approx2 = -(fun1 - fun0 - step_size*gfun1/2.0_dp)/(gfun1 - (fun1 - fun0)/step_size)
4248
4249                              IF ((step_size_quadratic_approx .LT. 0.0_dp) .AND. &
4250                                  (step_size_quadratic_approx2 .LT. 0.0_dp)) THEN
4251                                 IF (unit_nr > 0) THEN
4252                                    WRITE (unit_nr, '(T3,A,1X,E17.7,1X,E17.7,1X,A)') &
4253                                       "Quadratic approximation gives negative steps", &
4254                                       step_size_quadratic_approx, step_size_quadratic_approx2, &
4255                                       "trying cubic..."
4256                                 ENDIF
4257                                 use_cubic_approximation = .TRUE.
4258                                 use_quadratic_approximation = .FALSE.
4259                              ELSE
4260                                 IF (step_size_quadratic_approx .LT. 0.0_dp) THEN
4261                                    step_size_quadratic_approx = step_size_quadratic_approx2
4262                                 ENDIF
4263                                 IF (step_size_quadratic_approx2 .LT. 0.0_dp) THEN
4264                                    step_size_quadratic_approx2 = step_size_quadratic_approx
4265                                 ENDIF
4266                              ENDIF
4267
4268                              ! check accuracy of the quadratic approximation
4269                              IF (use_quadratic_approximation) THEN
4270                                 quadratic_approx_error = ABS(step_size_quadratic_approx - &
4271                                                              step_size_quadratic_approx2)/step_size_quadratic_approx
4272                                 IF (quadratic_approx_error .GT. quadratic_approx_error_threshold) THEN
4273                                    IF (unit_nr > 0) THEN
4274                                       WRITE (unit_nr, '(T3,A,1X,E17.7,1X,E17.7,1X,A)') "Quadratic approximation is poor", &
4275                                          step_size_quadratic_approx, step_size_quadratic_approx2, &
4276                                          "Try cubic approximation"
4277                                    ENDIF
4278                                    use_cubic_approximation = .TRUE.
4279                                    use_quadratic_approximation = .FALSE.
4280                                 ENDIF
4281                              ENDIF
4282
4283                              ! check if numerics is fine enough to capture the cubic form
4284                              IF (use_cubic_approximation) THEN
4285
4286                                 ! if quadratic approximation is not accurate enough
4287                                 ! try to find the minimum assuming cubic form
4288                                 ! aa*x**3 + bb*x**2 + cc*x + dd = f(x)
4289                                 bb = (-step_size*gfun1 + 3.0_dp*(fun1 - fun0) - 2.0_dp*step_size*gfun0)/(step_size*step_size)
4290                                 aa = (gfun1 - 2.0_dp*step_size*bb - gfun0)/(3.0_dp*step_size*step_size)
4291
4292                                 IF (ABS(gfun1 - 2.0_dp*step_size*bb - gfun0) .LT. num_threshold) THEN
4293                                    IF (unit_nr > 0) THEN
4294                                       WRITE (unit_nr, '(T3,A,1X,E17.7)') &
4295                                          "Numerical accuracy is too low to observe cubic behavior", &
4296                                          ABS(gfun1 - 2.0_dp*step_size*bb - gfun0)
4297                                    ENDIF
4298                                    use_cubic_approximation = .FALSE.
4299                                    use_quadratic_approximation = .TRUE.
4300                                 ENDIF
4301                                 IF (ABS(gfun1) .LT. num_threshold) THEN
4302                                    IF (unit_nr > 0) THEN
4303                                       WRITE (unit_nr, '(T3,A,1X,E17.7,A,1X,E12.3)') "Linear gradient", &
4304                                          ABS(gfun1), &
4305                                          " is smaller than the threshold", num_threshold
4306                                    ENDIF
4307                                    use_cubic_approximation = .FALSE.
4308                                    use_quadratic_approximation = .TRUE.
4309                                 ENDIF
4310                              ENDIF
4311
4312                              ! find the step assuming cubic approximation
4313                              IF (use_cubic_approximation) THEN
4314                                 ! to obtain the minimum of the cubic function solve the quadratic equation
4315                                 ! 0.0*x**3 + 3.0*aa*x**2 + 2.0*bb*x + cc = 0
4316                                 CALL analytic_line_search(0.0_dp, 3.0_dp*aa, 2.0_dp*bb, gfun0, minima, nmins)
4317                                 IF (nmins .LT. 1) THEN
4318                                    IF (unit_nr > 0) THEN
4319                                       WRITE (unit_nr, '(T3,A)') &
4320                                          "Cubic approximation gives zero soultions! Use quadratic approximation"
4321                                    ENDIF
4322                                    use_quadratic_approximation = .TRUE.
4323                                    use_cubic_approximation = .TRUE.
4324                                 ELSE
4325                                    step_size = minima(1)
4326                                    IF (nmins .GT. 1) THEN
4327                                       IF (unit_nr > 0) THEN
4328                                          WRITE (unit_nr, '(T3,A)') &
4329                                             "More than one solution found! Use quadratic approximation"
4330                                       ENDIF
4331                                       use_quadratic_approximation = .TRUE.
4332                                       use_cubic_approximation = .TRUE.
4333                                    ENDIF
4334                                 ENDIF
4335                              ENDIF
4336
4337                              IF (use_quadratic_approximation) THEN ! use quadratic approximation
4338                                 IF (unit_nr > 0) THEN
4339                                    WRITE (unit_nr, '(T3,A)') "Use quadratic approximation"
4340                                 ENDIF
4341                                 step_size = (step_size_quadratic_approx + step_size_quadratic_approx2)*0.5_dp
4342                              ENDIF
4343
4344                              ! one more check on the step size
4345                              IF (step_size .LT. 0.0_dp) THEN
4346                                 CPABORT("Negative step proposed")
4347                              ENDIF
4348
4349                              CALL dbcsr_copy(almo_scf_env%matrix_k_blk(ispin), &
4350                                              matrix_k_central)
4351                              CALL dbcsr_add(almo_scf_env%matrix_k_blk(ispin), &
4352                                             step, 1.0_dp, step_size)
4353                              CALL dbcsr_copy(matrix_k_central, &
4354                                              almo_scf_env%matrix_k_blk(ispin))
4355                              line_search = .FALSE.
4356
4357                           ELSE
4358
4359                              IF (md_in_k_space) THEN
4360
4361                                 ! update velocities v(i) = v(i-1) + 0.5*dT*(a(i-1) + a(i))
4362                                 IF (iteration .NE. 0) THEN
4363                                    CALL dbcsr_add(velocity, &
4364                                                   step, 1.0_dp, 0.5_dp*time_step)
4365                                    CALL dbcsr_add(velocity, &
4366                                                   prev_step, 1.0_dp, 0.5_dp*time_step)
4367                                 ENDIF
4368                                 kin_energy = dbcsr_frobenius_norm(velocity)
4369                                 kin_energy = 0.5_dp*kin_energy*kin_energy
4370
4371                                 ! update positions k(i) = k(i-1) + dT*v(i-1) + 0.5*dT*dT*a(i-1)
4372                                 CALL dbcsr_add(almo_scf_env%matrix_k_blk(ispin), &
4373                                                velocity, 1.0_dp, time_step)
4374                                 CALL dbcsr_add(almo_scf_env%matrix_k_blk(ispin), &
4375                                                step, 1.0_dp, 0.5_dp*time_step*time_step)
4376
4377                              ELSE
4378
4379                                 IF (reset_step_size) THEN
4380                                    step_size = almo_scf_env%opt_k_trial_step_size
4381                                    reset_step_size = .FALSE.
4382                                 ELSE
4383                                    step_size = step_size*almo_scf_env%opt_k_trial_step_size_multiplier
4384                                 ENDIF
4385                                 CALL dbcsr_copy(almo_scf_env%matrix_k_blk(ispin), &
4386                                                 matrix_k_central)
4387                                 CALL dbcsr_add(almo_scf_env%matrix_k_blk(ispin), &
4388                                                step, 1.0_dp, step_size)
4389                                 line_search = .TRUE.
4390                              ENDIF
4391
4392                           ENDIF
4393
4394                        ENDIF ! .NOT.prepare_to_exit
4395
4396                        ! print the status of the optimization
4397                        t2a = m_walltime()
4398                        IF (unit_nr > 0) THEN
4399                           IF (md_in_k_space) THEN
4400                              WRITE (unit_nr, '(T6,A,1X,I5,1X,E12.3,E16.7,F15.9,F15.9,F15.9,E12.3,F15.9,F15.9,F8.3)') &
4401                                 "K iter CG", iteration, time_step, time_step*iteration, &
4402                                 energy_correction(ispin), obj_function, delta_obj_function, grad_norm, &
4403                                 kin_energy, kin_energy + obj_function, beta
4404                           ELSE
4405                              IF (line_search .OR. prepare_to_exit) THEN
4406                                 WRITE (unit_nr, '(T6,A,1X,I3,1X,E12.3,F16.10,F16.10,E12.3,E12.3,E12.3,F8.3,F8.3,F10.3)') &
4407                                    "K iter CG", iteration, step_size, &
4408                                    energy_correction(ispin), delta_obj_function, grad_norm, &
4409                                    gfun0, line_search_error, beta, conjugacy_error, t2a - t1a
4410                                 !(flop1+flop2)/(1.0E6_dp*(t2-t1))
4411                              ELSE
4412                                 WRITE (unit_nr, '(T6,A,1X,I3,1X,E12.3,F16.10,F16.10,E12.3,E12.3,E12.3,F8.3,F8.3,F10.3)') &
4413                                    "K iter LS", iteration, step_size, &
4414                                    energy_correction(ispin), delta_obj_function, grad_norm, &
4415                                    gfun1, line_search_error, beta, conjugacy_error, t2a - t1a
4416                                 !(flop1+flop2)/(1.0E6_dp*(t2-t1))
4417                              ENDIF
4418                           ENDIF
4419                           CALL m_flush(unit_nr)
4420                        ENDIF
4421                        t1a = m_walltime()
4422
4423                     ELSE ! opt_k_max_iter .eq. 0
4424                        prepare_to_exit = .TRUE.
4425                     ENDIF ! opt_k_max_iter .ne. 0
4426
4427                     IF (.NOT. line_search) iteration = iteration + 1
4428
4429                     IF (prepare_to_exit) EXIT
4430
4431                  ENDDO ! end iterations on K
4432
4433                  IF (converged .OR. (outer_opt_k_iteration .GE. outer_opt_k_max_iter)) THEN
4434                     outer_opt_k_prepare_to_exit = .TRUE.
4435                  ENDIF
4436
4437                  IF (almo_scf_env%deloc_truncate_virt .NE. virt_full) THEN
4438
4439                     IF (unit_nr > 0) THEN
4440                        WRITE (unit_nr, *) "Updating ALMO virtuals"
4441                     ENDIF
4442
4443                     CALL timeset('k_opt_v0_update', handle8)
4444
4445                     ! update retained ALMO virtuals to restart the cg iterations
4446                     CALL dbcsr_multiply("N", "N", 1.0_dp, &
4447                                         almo_scf_env%matrix_v_disc_blk(ispin), &
4448                                         almo_scf_env%matrix_k_blk(ispin), &
4449                                         0.0_dp, vr_fixed, &
4450                                         filter_eps=almo_scf_env%eps_filter)
4451                     CALL dbcsr_add(vr_fixed, almo_scf_env%matrix_v_blk(ispin), &
4452                                    +1.0_dp, +1.0_dp)
4453
4454                     ! update discarded ALMO virtuals to restart the cg iterations
4455                     CALL dbcsr_multiply("N", "T", 1.0_dp, &
4456                                         almo_scf_env%matrix_v_blk(ispin), &
4457                                         almo_scf_env%matrix_k_blk(ispin), &
4458                                         0.0_dp, vd_fixed, &
4459                                         filter_eps=almo_scf_env%eps_filter)
4460                     CALL dbcsr_add(vd_fixed, almo_scf_env%matrix_v_disc_blk(ispin), &
4461                                    -1.0_dp, +1.0_dp)
4462
4463                     ! orthogonalize new orbitals on fragments
4464                     CALL get_overlap(bra=vr_fixed, &
4465                                      ket=vr_fixed, &
4466                                      overlap=k_vr_index_down, &
4467                                      metric=almo_scf_env%matrix_s_blk(1), &
4468                                      retain_overlap_sparsity=.FALSE., &
4469                                      eps_filter=almo_scf_env%eps_filter)
4470                     CALL dbcsr_create(vr_index_sqrt_inv, template=k_vr_index_down, &
4471                                       matrix_type=dbcsr_type_no_symmetry)
4472                     CALL dbcsr_create(vr_index_sqrt, template=k_vr_index_down, &
4473                                       matrix_type=dbcsr_type_no_symmetry)
4474                     CALL matrix_sqrt_Newton_Schulz(vr_index_sqrt, &
4475                                                    vr_index_sqrt_inv, &
4476                                                    k_vr_index_down, &
4477                                                    threshold=almo_scf_env%eps_filter, &
4478                                                    order=almo_scf_env%order_lanczos, &
4479                                                    eps_lanczos=almo_scf_env%eps_lanczos, &
4480                                                    max_iter_lanczos=almo_scf_env%max_iter_lanczos)
4481                     IF (safe_mode) THEN
4482                        CALL dbcsr_create(matrix_tmp1, template=k_vr_index_down, &
4483                                          matrix_type=dbcsr_type_no_symmetry)
4484                        CALL dbcsr_create(matrix_tmp2, template=k_vr_index_down, &
4485                                          matrix_type=dbcsr_type_no_symmetry)
4486
4487                        CALL dbcsr_multiply("N", "N", 1.0_dp, vr_index_sqrt_inv, &
4488                                            k_vr_index_down, &
4489                                            0.0_dp, matrix_tmp1, filter_eps=almo_scf_env%eps_filter)
4490                        CALL dbcsr_multiply("N", "N", 1.0_dp, matrix_tmp1, &
4491                                            vr_index_sqrt_inv, &
4492                                            0.0_dp, matrix_tmp2, filter_eps=almo_scf_env%eps_filter)
4493
4494                        frob_matrix_base = dbcsr_frobenius_norm(matrix_tmp2)
4495                        CALL dbcsr_add_on_diag(matrix_tmp2, -1.0_dp)
4496                        frob_matrix = dbcsr_frobenius_norm(matrix_tmp2)
4497                        IF (unit_nr > 0) THEN
4498                           WRITE (unit_nr, *) "Error for (inv(sqrt(SIGVV))*SIGVV*inv(sqrt(SIGVV))-I)", &
4499                              frob_matrix/frob_matrix_base
4500                        ENDIF
4501
4502                        CALL dbcsr_release(matrix_tmp1)
4503                        CALL dbcsr_release(matrix_tmp2)
4504                     ENDIF
4505                     CALL dbcsr_multiply("N", "N", 1.0_dp, &
4506                                         vr_fixed, &
4507                                         vr_index_sqrt_inv, &
4508                                         0.0_dp, almo_scf_env%matrix_v_blk(ispin), &
4509                                         filter_eps=almo_scf_env%eps_filter)
4510
4511                     CALL get_overlap(bra=vd_fixed, &
4512                                      ket=vd_fixed, &
4513                                      overlap=k_vd_index_down, &
4514                                      metric=almo_scf_env%matrix_s_blk(1), &
4515                                      retain_overlap_sparsity=.FALSE., &
4516                                      eps_filter=almo_scf_env%eps_filter)
4517                     CALL dbcsr_create(vd_index_sqrt_inv, template=k_vd_index_down, &
4518                                       matrix_type=dbcsr_type_no_symmetry)
4519                     CALL dbcsr_create(vd_index_sqrt, template=k_vd_index_down, &
4520                                       matrix_type=dbcsr_type_no_symmetry)
4521                     CALL matrix_sqrt_Newton_Schulz(vd_index_sqrt, &
4522                                                    vd_index_sqrt_inv, &
4523                                                    k_vd_index_down, &
4524                                                    threshold=almo_scf_env%eps_filter, &
4525                                                    order=almo_scf_env%order_lanczos, &
4526                                                    eps_lanczos=almo_scf_env%eps_lanczos, &
4527                                                    max_iter_lanczos=almo_scf_env%max_iter_lanczos)
4528                     IF (safe_mode) THEN
4529                        CALL dbcsr_create(matrix_tmp1, template=k_vd_index_down, &
4530                                          matrix_type=dbcsr_type_no_symmetry)
4531                        CALL dbcsr_create(matrix_tmp2, template=k_vd_index_down, &
4532                                          matrix_type=dbcsr_type_no_symmetry)
4533
4534                        CALL dbcsr_multiply("N", "N", 1.0_dp, vd_index_sqrt_inv, &
4535                                            k_vd_index_down, &
4536                                            0.0_dp, matrix_tmp1, filter_eps=almo_scf_env%eps_filter)
4537                        CALL dbcsr_multiply("N", "N", 1.0_dp, matrix_tmp1, &
4538                                            vd_index_sqrt_inv, &
4539                                            0.0_dp, matrix_tmp2, filter_eps=almo_scf_env%eps_filter)
4540
4541                        frob_matrix_base = dbcsr_frobenius_norm(matrix_tmp2)
4542                        CALL dbcsr_add_on_diag(matrix_tmp2, -1.0_dp)
4543                        frob_matrix = dbcsr_frobenius_norm(matrix_tmp2)
4544                        IF (unit_nr > 0) THEN
4545                           WRITE (unit_nr, *) "Error for (inv(sqrt(SIGVV))*SIGVV*inv(sqrt(SIGVV))-I)", &
4546                              frob_matrix/frob_matrix_base
4547                        ENDIF
4548
4549                        CALL dbcsr_release(matrix_tmp1)
4550                        CALL dbcsr_release(matrix_tmp2)
4551                     ENDIF
4552                     CALL dbcsr_multiply("N", "N", 1.0_dp, &
4553                                         vd_fixed, &
4554                                         vd_index_sqrt_inv, &
4555                                         0.0_dp, almo_scf_env%matrix_v_disc_blk(ispin), &
4556                                         filter_eps=almo_scf_env%eps_filter)
4557
4558                     CALL dbcsr_release(vr_index_sqrt_inv)
4559                     CALL dbcsr_release(vr_index_sqrt)
4560                     CALL dbcsr_release(vd_index_sqrt_inv)
4561                     CALL dbcsr_release(vd_index_sqrt)
4562
4563                     CALL timestop(handle8)
4564
4565                  ENDIF ! ne.virt_full
4566
4567                  ! RZK-warning released outside the outer loop
4568                  CALL dbcsr_release(sigma_vv_sqrt)
4569                  CALL dbcsr_release(sigma_vv_sqrt_inv)
4570                  IF (almo_scf_env%deloc_truncate_virt .NE. virt_full) THEN
4571                     CALL dbcsr_release(k_vr_index_down)
4572                     CALL dbcsr_release(k_vd_index_down)
4573                     !CALL dbcsr_release(k_vd_index_up)
4574                     CALL dbcsr_release(matrix_k_central)
4575                     CALL dbcsr_release(vr_fixed)
4576                     CALL dbcsr_release(vd_fixed)
4577                     CALL dbcsr_release(grad)
4578                     CALL dbcsr_release(prec)
4579                     CALL dbcsr_release(prev_grad)
4580                     CALL dbcsr_release(tmp3_vd_vr)
4581                     CALL dbcsr_release(tmp1_n_vr)
4582                     CALL dbcsr_release(tmp_k_blk)
4583                     CALL dbcsr_release(t_curr)
4584                     CALL dbcsr_release(sigma_oo_curr)
4585                     CALL dbcsr_release(sigma_oo_curr_inv)
4586                     CALL dbcsr_release(step)
4587                     CALL dbcsr_release(tmp2_n_o)
4588                     CALL dbcsr_release(tmp4_o_vr)
4589                     CALL dbcsr_release(prev_step)
4590                     CALL dbcsr_release(prev_minus_prec_grad)
4591                     IF (md_in_k_space) THEN
4592                        CALL dbcsr_release(velocity)
4593                     ENDIF
4594
4595                  ENDIF
4596
4597                  outer_opt_k_iteration = outer_opt_k_iteration + 1
4598                  IF (outer_opt_k_prepare_to_exit) EXIT
4599
4600               ENDDO ! outer loop for k
4601
4602            ENDDO ! ispin
4603
4604            ! RZK-warning update mo orbitals
4605
4606         ELSE ! virtual orbitals might not be available use projected AOs
4607
4608            ! compute sqrt(S) and inv(sqrt(S))
4609            ! RZK-warning - remove this sqrt(S) and inv(sqrt(S))
4610            ! ideally ALMO scf should use sigma and sigma_inv in
4611            ! the tensor_up_down representation
4612            IF (.NOT. almo_scf_env%s_sqrt_done) THEN
4613
4614               IF (unit_nr > 0) THEN
4615                  WRITE (unit_nr, *) "sqrt and inv(sqrt) of AO overlap matrix"
4616               ENDIF
4617               CALL dbcsr_create(almo_scf_env%matrix_s_sqrt(1), &
4618                                 template=almo_scf_env%matrix_s(1), &
4619                                 matrix_type=dbcsr_type_no_symmetry)
4620               CALL dbcsr_create(almo_scf_env%matrix_s_sqrt_inv(1), &
4621                                 template=almo_scf_env%matrix_s(1), &
4622                                 matrix_type=dbcsr_type_no_symmetry)
4623
4624               CALL matrix_sqrt_Newton_Schulz(almo_scf_env%matrix_s_sqrt(1), &
4625                                              almo_scf_env%matrix_s_sqrt_inv(1), &
4626                                              almo_scf_env%matrix_s(1), &
4627                                              threshold=almo_scf_env%eps_filter, &
4628                                              order=almo_scf_env%order_lanczos, &
4629                                              eps_lanczos=almo_scf_env%eps_lanczos, &
4630                                              max_iter_lanczos=almo_scf_env%max_iter_lanczos)
4631
4632               IF (safe_mode) THEN
4633                  CALL dbcsr_create(matrix_tmp1, template=almo_scf_env%matrix_s(1), &
4634                                    matrix_type=dbcsr_type_no_symmetry)
4635                  CALL dbcsr_create(matrix_tmp2, template=almo_scf_env%matrix_s(1), &
4636                                    matrix_type=dbcsr_type_no_symmetry)
4637
4638                  CALL dbcsr_multiply("N", "N", 1.0_dp, almo_scf_env%matrix_s_sqrt_inv(1), &
4639                                      almo_scf_env%matrix_s(1), &
4640                                      0.0_dp, matrix_tmp1, filter_eps=almo_scf_env%eps_filter)
4641                  CALL dbcsr_multiply("N", "N", 1.0_dp, matrix_tmp1, almo_scf_env%matrix_s_sqrt_inv(1), &
4642                                      0.0_dp, matrix_tmp2, filter_eps=almo_scf_env%eps_filter)
4643
4644                  frob_matrix_base = dbcsr_frobenius_norm(matrix_tmp2)
4645                  CALL dbcsr_add_on_diag(matrix_tmp2, -1.0_dp)
4646                  frob_matrix = dbcsr_frobenius_norm(matrix_tmp2)
4647                  IF (unit_nr > 0) THEN
4648                     WRITE (unit_nr, *) "Error for (inv(sqrt(S))*S*inv(sqrt(S))-I)", frob_matrix/frob_matrix_base
4649                  ENDIF
4650
4651                  CALL dbcsr_release(matrix_tmp1)
4652                  CALL dbcsr_release(matrix_tmp2)
4653               ENDIF
4654
4655               almo_scf_env%s_sqrt_done = .TRUE.
4656
4657            ENDIF
4658
4659            DO ispin = 1, nspin
4660
4661               CALL ct_step_env_init(ct_step_env)
4662               CALL ct_step_env_set(ct_step_env, &
4663                                    para_env=almo_scf_env%para_env, &
4664                                    blacs_env=almo_scf_env%blacs_env, &
4665                                    use_occ_orbs=.TRUE., &
4666                                    use_virt_orbs=almo_scf_env%deloc_cayley_use_virt_orbs, &
4667                                    occ_orbs_orthogonal=.FALSE., &
4668                                    virt_orbs_orthogonal=almo_scf_env%orthogonal_basis, &
4669                                    tensor_type=almo_scf_env%deloc_cayley_tensor_type, &
4670                                    neglect_quadratic_term=almo_scf_env%deloc_cayley_linear, &
4671                                    calculate_energy_corr=.TRUE., &
4672                                    update_p=.TRUE., &
4673                                    update_q=.FALSE., &
4674                                    pp_preconditioner_full=almo_scf_env%deloc_cayley_occ_precond, &
4675                                    qq_preconditioner_full=almo_scf_env%deloc_cayley_vir_precond, &
4676                                    eps_convergence=almo_scf_env%deloc_cayley_eps_convergence, &
4677                                    eps_filter=almo_scf_env%eps_filter, &
4678                                    !nspins=almo_scf_env%nspins,&
4679                                    q_index_up=almo_scf_env%matrix_s_sqrt_inv(1), &
4680                                    q_index_down=almo_scf_env%matrix_s_sqrt(1), &
4681                                    p_index_up=almo_scf_env%matrix_sigma_sqrt_inv(ispin), &
4682                                    p_index_down=almo_scf_env%matrix_sigma_sqrt(ispin), &
4683                                    matrix_ks=almo_scf_env%matrix_ks_0deloc(ispin), &
4684                                    matrix_p=almo_scf_env%matrix_p(ispin), &
4685                                    matrix_qp_template=almo_scf_env%matrix_t(ispin), &
4686                                    matrix_pq_template=almo_scf_env%matrix_t_tr(ispin), &
4687                                    matrix_t=almo_scf_env%matrix_t(ispin), &
4688                                    conjugator=almo_scf_env%deloc_cayley_conjugator, &
4689                                    max_iter=almo_scf_env%deloc_cayley_max_iter)
4690
4691               ! perform calculations
4692               CALL ct_step_execute(ct_step_env)
4693
4694               ! for now we do not need the new set of orbitals
4695               ! just get the energy correction
4696               CALL ct_step_env_get(ct_step_env, &
4697                                    energy_correction=energy_correction(ispin))
4698               !copy_da_energy_matrix=matrix_eda(ispin),&
4699               !copy_da_charge_matrix=matrix_cta(ispin),&
4700
4701               CALL ct_step_env_clean(ct_step_env)
4702
4703            ENDDO
4704
4705            energy_correction(1) = energy_correction(1)*spin_factor
4706
4707         ENDIF
4708
4709         ! print the energy correction and exit
4710         DO ispin = 1, nspin
4711
4712            IF (unit_nr > 0) THEN
4713               WRITE (unit_nr, *)
4714               WRITE (unit_nr, '(T2,A,I6,F20.9)') "ECORR", ispin, &
4715                  energy_correction(ispin)
4716               WRITE (unit_nr, *)
4717            ENDIF
4718            energy_correction_final = energy_correction_final + energy_correction(ispin)
4719
4720            !!! print out the results of decomposition analysis
4721            !!IF (unit_nr>0) THEN
4722            !!   WRITE(unit_nr,*)
4723            !!   WRITE(unit_nr,'(T2,A)') "ENERGY DECOMPOSITION"
4724            !!ENDIF
4725            !!CALL dbcsr_print_block_sum(eda_matrix(ispin))
4726            !!IF (unit_nr>0) THEN
4727            !!   WRITE(unit_nr,*)
4728            !!   WRITE(unit_nr,'(T2,A)') "CHARGE DECOMPOSITION"
4729            !!ENDIF
4730            !!CALL dbcsr_print_block_sum(cta_matrix(ispin))
4731
4732            ! obtain density matrix from updated MOs
4733            ! RZK-later sigma and sigma_inv are lost here
4734            CALL almo_scf_t_to_proj(t=almo_scf_env%matrix_t(ispin), &
4735                                    p=almo_scf_env%matrix_p(ispin), &
4736                                    eps_filter=almo_scf_env%eps_filter, &
4737                                    orthog_orbs=.FALSE., &
4738                                    nocc_of_domain=almo_scf_env%nocc_of_domain(:, ispin), &
4739                                    s=almo_scf_env%matrix_s(1), &
4740                                    sigma=almo_scf_env%matrix_sigma(ispin), &
4741                                    sigma_inv=almo_scf_env%matrix_sigma_inv(ispin), &
4742                                    !use_guess=use_guess, &
4743                                    algorithm=almo_scf_env%sigma_inv_algorithm, &
4744                                    inverse_accelerator=almo_scf_env%order_lanczos, &
4745                                    inv_eps_factor=almo_scf_env%matrix_iter_eps_error_factor, &
4746                                    eps_lanczos=almo_scf_env%eps_lanczos, &
4747                                    max_iter_lanczos=almo_scf_env%max_iter_lanczos, &
4748                                    para_env=almo_scf_env%para_env, &
4749                                    blacs_env=almo_scf_env%blacs_env)
4750
4751            IF (almo_scf_env%nspins == 1) &
4752               CALL dbcsr_scale(almo_scf_env%matrix_p(ispin), &
4753                                spin_factor)
4754
4755         ENDDO
4756
4757      CASE (dm_ls_step)
4758
4759         ! compute the inverse of S
4760         IF (.NOT. almo_scf_env%s_inv_done) THEN
4761            IF (unit_nr > 0) THEN
4762               WRITE (unit_nr, *) "Inverting AO overlap matrix"
4763            ENDIF
4764            CALL dbcsr_create(almo_scf_env%matrix_s_inv(1), &
4765                              template=almo_scf_env%matrix_s(1), &
4766                              matrix_type=dbcsr_type_no_symmetry)
4767            IF (.NOT. almo_scf_env%s_sqrt_done) THEN
4768               CALL invert_Hotelling(almo_scf_env%matrix_s_inv(1), &
4769                                     almo_scf_env%matrix_s(1), &
4770                                     threshold=almo_scf_env%eps_filter)
4771            ELSE
4772               CALL dbcsr_multiply("N", "N", 1.0_dp, almo_scf_env%matrix_s_sqrt_inv(1), &
4773                                   almo_scf_env%matrix_s_sqrt_inv(1), &
4774                                   0.0_dp, almo_scf_env%matrix_s_inv(1), &
4775                                   filter_eps=almo_scf_env%eps_filter)
4776            ENDIF
4777
4778            IF (safe_mode) THEN
4779               CALL dbcsr_create(matrix_tmp1, template=almo_scf_env%matrix_s(1), &
4780                                 matrix_type=dbcsr_type_no_symmetry)
4781               CALL dbcsr_multiply("N", "N", 1.0_dp, almo_scf_env%matrix_s_inv(1), &
4782                                   almo_scf_env%matrix_s(1), &
4783                                   0.0_dp, matrix_tmp1, &
4784                                   filter_eps=almo_scf_env%eps_filter)
4785               frob_matrix_base = dbcsr_frobenius_norm(matrix_tmp1)
4786               CALL dbcsr_add_on_diag(matrix_tmp1, -1.0_dp)
4787               frob_matrix = dbcsr_frobenius_norm(matrix_tmp1)
4788               IF (unit_nr > 0) THEN
4789                  WRITE (unit_nr, *) "Error for (inv(S)*S-I)", &
4790                     frob_matrix/frob_matrix_base
4791               ENDIF
4792               CALL dbcsr_release(matrix_tmp1)
4793            ENDIF
4794
4795            almo_scf_env%s_inv_done = .TRUE.
4796
4797         ENDIF
4798
4799         DO ispin = 1, nspin
4800            ! RZK-warning the preconditioner is very important
4801            !       IF (.FALSE.) THEN
4802            !           CALL apply_matrix_preconditioner(almo_scf_env%matrix_ks(ispin),&
4803            !                   "forward",almo_scf_env%matrix_s_blk_sqrt(1),&
4804            !                   almo_scf_env%matrix_s_blk_sqrt_inv(1))
4805            !       ENDIF
4806            !CALL dbcsr_filter(almo_scf_env%matrix_ks(ispin),&
4807            !         almo_scf_env%eps_filter)
4808         ENDDO
4809
4810         ALLOCATE (matrix_p_almo_scf_converged(nspin))
4811         DO ispin = 1, nspin
4812            CALL dbcsr_create(matrix_p_almo_scf_converged(ispin), &
4813                              template=almo_scf_env%matrix_p(ispin))
4814            CALL dbcsr_copy(matrix_p_almo_scf_converged(ispin), &
4815                            almo_scf_env%matrix_p(ispin))
4816         ENDDO
4817
4818         ! update the density matrix
4819         DO ispin = 1, nspin
4820
4821            nelectron_spin_real(1) = almo_scf_env%nelectrons_spin(ispin)
4822            IF (almo_scf_env%nspins == 1) &
4823               nelectron_spin_real(1) = nelectron_spin_real(1)/2
4824
4825            local_mu(1) = SUM(almo_scf_env%mu_of_domain(:, ispin))/almo_scf_env%ndomains
4826            fake(1) = 123523
4827
4828            ! RZK UPDATE! the update algorithm is removed because
4829            ! RZK UPDATE! it requires updating core LS_SCF routines
4830            ! RZK UPDATE! (the code exists in the CVS version)
4831            CPABORT("CVS only: density_matrix_sign has not been updated in SVN")
4832            ! RZK UPDATE!CALL density_matrix_sign(almo_scf_env%matrix_p(ispin),&
4833            ! RZK UPDATE!                     local_mu,&
4834            ! RZK UPDATE!                     almo_scf_env%fixed_mu,&
4835            ! RZK UPDATE!                     almo_scf_env%matrix_ks_0deloc(ispin),&
4836            ! RZK UPDATE!                     almo_scf_env%matrix_s(1), &
4837            ! RZK UPDATE!                     almo_scf_env%matrix_s_inv(1), &
4838            ! RZK UPDATE!                     nelectron_spin_real,&
4839            ! RZK UPDATE!                     almo_scf_env%eps_filter,&
4840            ! RZK UPDATE!                     fake)
4841            ! RZK UPDATE!
4842            almo_scf_env%mu = local_mu(1)
4843
4844            !IF (almo_scf_env%has_s_preconditioner) THEN
4845            !    CALL apply_matrix_preconditioner(&
4846            !             almo_scf_env%matrix_p_blk(ispin),&
4847            !             "forward",almo_scf_env%matrix_s_blk_sqrt(1),&
4848            !             almo_scf_env%matrix_s_blk_sqrt_inv(1))
4849            !ENDIF
4850            !CALL dbcsr_filter(almo_scf_env%matrix_p(ispin),&
4851            !        almo_scf_env%eps_filter)
4852
4853            IF (almo_scf_env%nspins == 1) &
4854               CALL dbcsr_scale(almo_scf_env%matrix_p(ispin), &
4855                                spin_factor)
4856
4857            !CALL dbcsr_dot(almo_scf_env%matrix_ks_0deloc(ispin),&
4858            !  almo_scf_env%matrix_p(ispin),&
4859            !  energy_correction(ispin))
4860            !IF (unit_nr>0) THEN
4861            !   WRITE(unit_nr,*)
4862            !   WRITE(unit_nr,'(T2,A,I6,F20.9)') "EFAKE",ispin,&
4863            !           energy_correction(ispin)
4864            !   WRITE(unit_nr,*)
4865            !ENDIF
4866            CALL dbcsr_add(matrix_p_almo_scf_converged(ispin), &
4867                           almo_scf_env%matrix_p(ispin), -1.0_dp, 1.0_dp)
4868            CALL dbcsr_dot(almo_scf_env%matrix_ks_0deloc(ispin), &
4869                           matrix_p_almo_scf_converged(ispin), &
4870                           energy_correction(ispin))
4871
4872            energy_correction_final = energy_correction_final + energy_correction(ispin)
4873
4874            IF (unit_nr > 0) THEN
4875               WRITE (unit_nr, *)
4876               WRITE (unit_nr, '(T2,A,I6,F20.9)') "ECORR", ispin, &
4877                  energy_correction(ispin)
4878               WRITE (unit_nr, *)
4879            ENDIF
4880
4881         ENDDO
4882
4883         DO ispin = 1, nspin
4884            CALL dbcsr_release(matrix_p_almo_scf_converged(ispin))
4885         ENDDO
4886         DEALLOCATE (matrix_p_almo_scf_converged)
4887
4888      END SELECT ! algorithm selection
4889
4890      t2 = m_walltime()
4891
4892      IF (unit_nr > 0) THEN
4893         WRITE (unit_nr, *)
4894         WRITE (unit_nr, '(T2,A,F18.9,F18.9,F18.9,F12.6)') "ETOT", &
4895            almo_scf_env%almo_scf_energy, &
4896            energy_correction_final, &
4897            almo_scf_env%almo_scf_energy + energy_correction_final, &
4898            t2 - t1
4899         WRITE (unit_nr, *)
4900      ENDIF
4901
4902      CALL timestop(handle)
4903
4904   END SUBROUTINE harris_foulkes_correction
4905
4906! **************************************************************************************************
4907!> \brief Computes a diagonal preconditioner for the cg optimization of k matrix
4908!> \param prec ...
4909!> \param vd_prop ...
4910!> \param f ...
4911!> \param x ...
4912!> \param oo_inv_x_tr ...
4913!> \param s ...
4914!> \param grad ...
4915!> \param vd_blk ...
4916!> \param t ...
4917!> \param template_vd_vd_blk ...
4918!> \param template_vr_vr_blk ...
4919!> \param template_n_vr ...
4920!> \param spin_factor ...
4921!> \param eps_filter ...
4922!> \par History
4923!>       2011.09 created [Rustam Z Khaliullin]
4924!> \author Rustam Z Khaliullin
4925! **************************************************************************************************
4926   SUBROUTINE opt_k_create_preconditioner(prec, vd_prop, f, x, oo_inv_x_tr, s, grad, &
4927                                          vd_blk, t, template_vd_vd_blk, template_vr_vr_blk, template_n_vr, &
4928                                          spin_factor, eps_filter)
4929
4930      TYPE(dbcsr_type), INTENT(INOUT)                    :: prec
4931      TYPE(dbcsr_type), INTENT(IN)                       :: vd_prop, f, x, oo_inv_x_tr, s, grad, &
4932                                                            vd_blk, t, template_vd_vd_blk, &
4933                                                            template_vr_vr_blk, template_n_vr
4934      REAL(KIND=dp), INTENT(IN)                          :: spin_factor, eps_filter
4935
4936      CHARACTER(len=*), PARAMETER :: routineN = 'opt_k_create_preconditioner', &
4937         routineP = moduleN//':'//routineN
4938
4939      INTEGER                                            :: handle, p_nrows, q_nrows
4940      REAL(KIND=dp), ALLOCATABLE, DIMENSION(:)           :: p_diagonal, q_diagonal
4941      TYPE(dbcsr_type)                                   :: pp_diag, qq_diag, t1, t2, tmp, &
4942                                                            tmp1_n_vr, tmp2_n_vr, tmp_n_vd, &
4943                                                            tmp_vd_vd_blk, tmp_vr_vr_blk
4944
4945! init diag blocks outside
4946! init diag blocks otside
4947!INTEGER                                  :: iblock_row, iblock_col,&
4948!                                            nblkrows_tot, nblkcols_tot
4949!REAL(KIND=dp), DIMENSION(:, :), POINTER  :: p_new_block
4950!INTEGER                                  :: mynode, hold, row, col
4951
4952      CALL timeset(routineN, handle)
4953
4954      ! initialize a matrix to 1.0
4955      CALL dbcsr_create(tmp, template=prec)
4956      ! in order to use dbcsr_set matrix blocks must exist
4957      CALL dbcsr_copy(tmp, prec)
4958      CALL dbcsr_set(tmp, 1.0_dp)
4959
4960      ! compute qq = (Vd^tr)*F*Vd
4961      CALL dbcsr_create(tmp_n_vd, template=vd_prop)
4962      CALL dbcsr_multiply("N", "N", 1.0_dp, f, vd_prop, &
4963                          0.0_dp, tmp_n_vd, filter_eps=eps_filter)
4964      CALL dbcsr_create(tmp_vd_vd_blk, &
4965                        template=template_vd_vd_blk)
4966      CALL dbcsr_copy(tmp_vd_vd_blk, template_vd_vd_blk)
4967      CALL dbcsr_multiply("T", "N", 1.0_dp, vd_prop, tmp_n_vd, &
4968                          0.0_dp, tmp_vd_vd_blk, &
4969                          retain_sparsity=.TRUE., &
4970                          filter_eps=eps_filter)
4971      ! copy diagonal elements of the result into rows of a matrix
4972      CALL dbcsr_get_info(tmp_vd_vd_blk, nfullrows_total=q_nrows)
4973      ALLOCATE (q_diagonal(q_nrows))
4974      CALL dbcsr_get_diag(tmp_vd_vd_blk, q_diagonal)
4975      CALL dbcsr_create(qq_diag, &
4976                        template=template_vd_vd_blk)
4977      CALL dbcsr_add_on_diag(qq_diag, 1.0_dp)
4978      CALL dbcsr_set_diag(qq_diag, q_diagonal)
4979      CALL dbcsr_create(t1, template=prec)
4980      CALL dbcsr_multiply("N", "N", 1.0_dp, qq_diag, tmp, &
4981                          0.0_dp, t1, filter_eps=eps_filter)
4982
4983      ! compute pp = X*sigma_oo_inv*X^tr
4984      CALL dbcsr_create(tmp_vr_vr_blk, template=template_vr_vr_blk)
4985      CALL dbcsr_copy(tmp_vr_vr_blk, template_vr_vr_blk)
4986      CALL dbcsr_multiply("N", "N", 1.0_dp, x, oo_inv_x_tr, &
4987                          0.0_dp, tmp_vr_vr_blk, &
4988                          retain_sparsity=.TRUE., &
4989                          filter_eps=eps_filter)
4990      ! copy diagonal elements of the result into cols of a matrix
4991      CALL dbcsr_get_info(tmp_vr_vr_blk, nfullrows_total=p_nrows)
4992      ALLOCATE (p_diagonal(p_nrows))
4993      CALL dbcsr_get_diag(tmp_vr_vr_blk, p_diagonal)
4994      CALL dbcsr_create(pp_diag, template=template_vr_vr_blk)
4995      CALL dbcsr_add_on_diag(pp_diag, 1.0_dp)
4996      CALL dbcsr_set_diag(pp_diag, p_diagonal)
4997      CALL dbcsr_set(tmp, 1.0_dp)
4998      CALL dbcsr_create(t2, template=prec)
4999      CALL dbcsr_multiply("N", "N", 1.0_dp, tmp, pp_diag, &
5000                          0.0_dp, t2, filter_eps=eps_filter)
5001
5002      CALL dbcsr_hadamard_product(t1, t2, prec)
5003
5004      ! compute qq = (Vd^tr)*S*Vd
5005      CALL dbcsr_multiply("N", "N", 1.0_dp, s, vd_prop, &
5006                          0.0_dp, tmp_n_vd, filter_eps=eps_filter)
5007      CALL dbcsr_multiply("T", "N", 1.0_dp, vd_prop, tmp_n_vd, &
5008                          0.0_dp, tmp_vd_vd_blk, &
5009                          retain_sparsity=.TRUE., &
5010                          filter_eps=eps_filter)
5011      ! copy diagonal elements of the result into rows of a matrix
5012      CALL dbcsr_get_diag(tmp_vd_vd_blk, q_diagonal)
5013      CALL dbcsr_add_on_diag(qq_diag, 1.0_dp)
5014      CALL dbcsr_set_diag(qq_diag, q_diagonal)
5015      CALL dbcsr_set(tmp, 1.0_dp)
5016      CALL dbcsr_multiply("N", "N", 1.0_dp, qq_diag, tmp, &
5017                          0.0_dp, t1, filter_eps=eps_filter)
5018
5019      ! compute pp = X*sig_oo_inv*(T^tr)*F*T*sig_oo_inv*(X^tr)
5020      CALL dbcsr_create(tmp1_n_vr, template=template_n_vr)
5021      CALL dbcsr_create(tmp2_n_vr, template=template_n_vr)
5022      CALL dbcsr_multiply("N", "N", 1.0_dp, t, oo_inv_x_tr, &
5023                          0.0_dp, tmp1_n_vr, filter_eps=eps_filter)
5024      CALL dbcsr_multiply("N", "N", 1.0_dp, f, tmp1_n_vr, &
5025                          0.0_dp, tmp2_n_vr, filter_eps=eps_filter)
5026      CALL dbcsr_multiply("T", "N", 1.0_dp, tmp1_n_vr, tmp2_n_vr, &
5027                          0.0_dp, tmp_vr_vr_blk, &
5028                          retain_sparsity=.TRUE., &
5029                          filter_eps=eps_filter)
5030      ! copy diagonal elements of the result into cols of a matrix
5031      CALL dbcsr_get_diag(tmp_vr_vr_blk, p_diagonal)
5032      CALL dbcsr_add_on_diag(pp_diag, 1.0_dp)
5033      CALL dbcsr_set_diag(pp_diag, p_diagonal)
5034      CALL dbcsr_set(tmp, 1.0_dp)
5035      CALL dbcsr_multiply("N", "N", 1.0_dp, tmp, pp_diag, &
5036                          0.0_dp, t2, filter_eps=eps_filter)
5037
5038      CALL dbcsr_hadamard_product(t1, t2, tmp)
5039      CALL dbcsr_add(prec, tmp, 1.0_dp, -1.0_dp)
5040      CALL dbcsr_scale(prec, 2.0_dp*spin_factor)
5041
5042      ! compute qp = X*sig_oo_inv*(T^tr)*S*Vd
5043      CALL dbcsr_multiply("N", "N", 1.0_dp, s, vd_blk, &
5044                          0.0_dp, tmp_n_vd, filter_eps=eps_filter)
5045      CALL dbcsr_multiply("T", "N", 1.0_dp, tmp_n_vd, tmp1_n_vr, &
5046                          0.0_dp, tmp, retain_sparsity=.TRUE., &
5047                          filter_eps=eps_filter)
5048      CALL dbcsr_hadamard_product(grad, tmp, t1)
5049      ! gradient already contains 2.0*spin_factor
5050      CALL dbcsr_scale(t1, -2.0_dp)
5051
5052      CALL dbcsr_add(prec, t1, 1.0_dp, 1.0_dp)
5053
5054      CALL dbcsr_function_of_elements(prec, dbcsr_func_inverse)
5055      CALL dbcsr_filter(prec, eps_filter)
5056
5057      DEALLOCATE (q_diagonal)
5058      DEALLOCATE (p_diagonal)
5059      CALL dbcsr_release(tmp)
5060      CALL dbcsr_release(qq_diag)
5061      CALL dbcsr_release(t1)
5062      CALL dbcsr_release(pp_diag)
5063      CALL dbcsr_release(t2)
5064      CALL dbcsr_release(tmp_n_vd)
5065      CALL dbcsr_release(tmp_vd_vd_blk)
5066      CALL dbcsr_release(tmp_vr_vr_blk)
5067      CALL dbcsr_release(tmp1_n_vr)
5068      CALL dbcsr_release(tmp2_n_vr)
5069
5070      CALL timestop(handle)
5071
5072   END SUBROUTINE opt_k_create_preconditioner
5073
5074! **************************************************************************************************
5075!> \brief Computes a block-diagonal preconditioner for the optimization of
5076!>        k matrix
5077!> \param almo_scf_env ...
5078!> \param vd_prop ...
5079!> \param oo_inv_x_tr ...
5080!> \param t_curr ...
5081!> \param ispin ...
5082!> \param spin_factor ...
5083!> \par History
5084!>       2011.10 created [Rustam Z Khaliullin]
5085!> \author Rustam Z Khaliullin
5086! **************************************************************************************************
5087   SUBROUTINE opt_k_create_preconditioner_blk(almo_scf_env, vd_prop, oo_inv_x_tr, &
5088                                              t_curr, ispin, spin_factor)
5089
5090      TYPE(almo_scf_env_type), INTENT(INOUT)             :: almo_scf_env
5091      TYPE(dbcsr_type), INTENT(IN)                       :: vd_prop, oo_inv_x_tr, t_curr
5092      INTEGER, INTENT(IN)                                :: ispin
5093      REAL(KIND=dp), INTENT(IN)                          :: spin_factor
5094
5095      CHARACTER(len=*), PARAMETER :: routineN = 'opt_k_create_preconditioner_blk', &
5096         routineP = moduleN//':'//routineN
5097
5098      INTEGER                                            :: handle
5099      REAL(KIND=dp)                                      :: eps_filter
5100      TYPE(dbcsr_type)                                   :: opt_k_e_dd, opt_k_e_rr, s_dd_sqrt, &
5101                                                            s_rr_sqrt, t1, tmp, tmp1_n_vr, &
5102                                                            tmp2_n_vr, tmp_n_vd, tmp_vd_vd_blk, &
5103                                                            tmp_vr_vr_blk
5104
5105! matrices that has been computed outside the routine already
5106
5107      CALL timeset(routineN, handle)
5108
5109      eps_filter = almo_scf_env%eps_filter
5110
5111      ! compute S_qq = (Vd^tr)*S*Vd
5112      CALL dbcsr_create(tmp_n_vd, template=almo_scf_env%matrix_v_disc(ispin))
5113      CALL dbcsr_create(tmp_vd_vd_blk, &
5114                        template=almo_scf_env%matrix_vv_disc_blk(ispin), &
5115                        matrix_type=dbcsr_type_no_symmetry)
5116      CALL dbcsr_multiply("N", "N", 1.0_dp, &
5117                          almo_scf_env%matrix_s(1), &
5118                          vd_prop, &
5119                          0.0_dp, tmp_n_vd, filter_eps=eps_filter)
5120      CALL dbcsr_copy(tmp_vd_vd_blk, &
5121                      almo_scf_env%matrix_vv_disc_blk(ispin))
5122      CALL dbcsr_multiply("T", "N", 1.0_dp, vd_prop, tmp_n_vd, &
5123                          0.0_dp, tmp_vd_vd_blk, &
5124                          retain_sparsity=.TRUE.)
5125
5126      CALL dbcsr_create(s_dd_sqrt, &
5127                        template=almo_scf_env%matrix_vv_disc_blk(ispin), &
5128                        matrix_type=dbcsr_type_no_symmetry)
5129      CALL matrix_sqrt_Newton_Schulz(s_dd_sqrt, &
5130                                     almo_scf_env%opt_k_t_dd(ispin), &
5131                                     tmp_vd_vd_blk, &
5132                                     threshold=eps_filter, &
5133                                     order=almo_scf_env%order_lanczos, &
5134                                     eps_lanczos=almo_scf_env%eps_lanczos, &
5135                                     max_iter_lanczos=almo_scf_env%max_iter_lanczos)
5136
5137      ! compute F_qq = (Vd^tr)*F*Vd
5138      CALL dbcsr_multiply("N", "N", 1.0_dp, &
5139                          almo_scf_env%matrix_ks_0deloc(ispin), &
5140                          vd_prop, &
5141                          0.0_dp, tmp_n_vd, filter_eps=eps_filter)
5142      CALL dbcsr_copy(tmp_vd_vd_blk, &
5143                      almo_scf_env%matrix_vv_disc_blk(ispin))
5144      CALL dbcsr_multiply("T", "N", 1.0_dp, vd_prop, tmp_n_vd, &
5145                          0.0_dp, tmp_vd_vd_blk, &
5146                          retain_sparsity=.TRUE.)
5147      CALL dbcsr_release(tmp_n_vd)
5148
5149      ! bring to the blocked-orthogonalized basis
5150      CALL dbcsr_multiply("N", "N", 1.0_dp, &
5151                          tmp_vd_vd_blk, &
5152                          almo_scf_env%opt_k_t_dd(ispin), &
5153                          0.0_dp, s_dd_sqrt, filter_eps=eps_filter)
5154      CALL dbcsr_multiply("N", "N", 1.0_dp, &
5155                          almo_scf_env%opt_k_t_dd(ispin), &
5156                          s_dd_sqrt, &
5157                          0.0_dp, tmp_vd_vd_blk, filter_eps=eps_filter)
5158
5159      ! diagonalize the matrix
5160      CALL dbcsr_create(opt_k_e_dd, &
5161                        template=almo_scf_env%matrix_vv_disc_blk(ispin))
5162      CALL dbcsr_release(s_dd_sqrt)
5163      CALL dbcsr_create(s_dd_sqrt, &
5164                        template=almo_scf_env%matrix_vv_disc_blk(ispin), &
5165                        matrix_type=dbcsr_type_no_symmetry)
5166      CALL diagonalize_diagonal_blocks(tmp_vd_vd_blk, &
5167                                       s_dd_sqrt, &
5168                                       opt_k_e_dd)
5169
5170      ! obtain the transformation matrix in the discarded subspace
5171      ! T = S^{-1/2}.U
5172      CALL dbcsr_copy(tmp_vd_vd_blk, &
5173                      almo_scf_env%opt_k_t_dd(ispin))
5174      CALL dbcsr_multiply("N", "N", 1.0_dp, &
5175                          tmp_vd_vd_blk, &
5176                          s_dd_sqrt, &
5177                          0.0_dp, almo_scf_env%opt_k_t_dd(ispin), &
5178                          filter_eps=eps_filter)
5179      CALL dbcsr_release(s_dd_sqrt)
5180      CALL dbcsr_release(tmp_vd_vd_blk)
5181
5182      ! copy diagonal elements of the result into rows of a matrix
5183      CALL dbcsr_create(tmp, &
5184                        template=almo_scf_env%matrix_k_blk_ones(ispin))
5185      CALL dbcsr_copy(tmp, &
5186                      almo_scf_env%matrix_k_blk_ones(ispin))
5187      CALL dbcsr_create(t1, &
5188                        template=almo_scf_env%matrix_k_blk_ones(ispin))
5189      CALL dbcsr_multiply("N", "N", 1.0_dp, &
5190                          opt_k_e_dd, tmp, &
5191                          0.0_dp, t1, filter_eps=eps_filter)
5192      CALL dbcsr_release(opt_k_e_dd)
5193
5194      ! compute S_pp = X*sigma_oo_inv*X^tr
5195      CALL dbcsr_create(tmp_vr_vr_blk, &
5196                        template=almo_scf_env%matrix_sigma_vv_blk(ispin), &
5197                        matrix_type=dbcsr_type_no_symmetry)
5198      CALL dbcsr_copy(tmp_vr_vr_blk, &
5199                      almo_scf_env%matrix_sigma_vv_blk(ispin))
5200      CALL dbcsr_multiply("N", "N", 1.0_dp, &
5201                          almo_scf_env%matrix_x(ispin), &
5202                          oo_inv_x_tr, &
5203                          0.0_dp, tmp_vr_vr_blk, &
5204                          retain_sparsity=.TRUE.)
5205
5206      ! obtain the orthogonalization matrix
5207      CALL dbcsr_create(s_rr_sqrt, &
5208                        template=almo_scf_env%matrix_sigma_vv_blk(ispin), &
5209                        matrix_type=dbcsr_type_no_symmetry)
5210      CALL matrix_sqrt_Newton_Schulz(s_rr_sqrt, &
5211                                     almo_scf_env%opt_k_t_rr(ispin), &
5212                                     tmp_vr_vr_blk, &
5213                                     threshold=eps_filter, &
5214                                     order=almo_scf_env%order_lanczos, &
5215                                     eps_lanczos=almo_scf_env%eps_lanczos, &
5216                                     max_iter_lanczos=almo_scf_env%max_iter_lanczos)
5217
5218      ! compute F_pp = X*sig_oo_inv*(T^tr)*F*T*sig_oo_inv*(X^tr)
5219      CALL dbcsr_create(tmp1_n_vr, &
5220                        template=almo_scf_env%matrix_v(ispin))
5221      CALL dbcsr_create(tmp2_n_vr, &
5222                        template=almo_scf_env%matrix_v(ispin))
5223      CALL dbcsr_multiply("N", "N", 1.0_dp, t_curr, oo_inv_x_tr, &
5224                          0.0_dp, tmp1_n_vr, filter_eps=eps_filter)
5225      CALL dbcsr_multiply("N", "N", 1.0_dp, &
5226                          almo_scf_env%matrix_ks_0deloc(ispin), &
5227                          tmp1_n_vr, &
5228                          0.0_dp, tmp2_n_vr, filter_eps=eps_filter)
5229      CALL dbcsr_multiply("T", "N", 1.0_dp, tmp1_n_vr, tmp2_n_vr, &
5230                          0.0_dp, tmp_vr_vr_blk, &
5231                          retain_sparsity=.TRUE.)
5232      CALL dbcsr_release(tmp1_n_vr)
5233      CALL dbcsr_release(tmp2_n_vr)
5234
5235      ! bring to the blocked-orthogonalized basis
5236      CALL dbcsr_multiply("N", "N", 1.0_dp, &
5237                          tmp_vr_vr_blk, &
5238                          almo_scf_env%opt_k_t_rr(ispin), &
5239                          0.0_dp, s_rr_sqrt, filter_eps=eps_filter)
5240      CALL dbcsr_multiply("N", "N", 1.0_dp, &
5241                          almo_scf_env%opt_k_t_rr(ispin), &
5242                          s_rr_sqrt, &
5243                          0.0_dp, tmp_vr_vr_blk, filter_eps=eps_filter)
5244
5245      ! diagonalize the matrix
5246      CALL dbcsr_create(opt_k_e_rr, &
5247                        template=almo_scf_env%matrix_sigma_vv_blk(ispin))
5248      CALL dbcsr_release(s_rr_sqrt)
5249      CALL dbcsr_create(s_rr_sqrt, &
5250                        template=almo_scf_env%matrix_sigma_vv_blk(ispin), &
5251                        matrix_type=dbcsr_type_no_symmetry)
5252      CALL diagonalize_diagonal_blocks(tmp_vr_vr_blk, &
5253                                       s_rr_sqrt, &
5254                                       opt_k_e_rr)
5255
5256      ! obtain the transformation matrix in the retained subspace
5257      ! T = S^{-1/2}.U
5258      CALL dbcsr_copy(tmp_vr_vr_blk, &
5259                      almo_scf_env%opt_k_t_rr(ispin))
5260      CALL dbcsr_multiply("N", "N", 1.0_dp, &
5261                          tmp_vr_vr_blk, &
5262                          s_rr_sqrt, &
5263                          0.0_dp, almo_scf_env%opt_k_t_rr(ispin), &
5264                          filter_eps=eps_filter)
5265      CALL dbcsr_release(s_rr_sqrt)
5266      CALL dbcsr_release(tmp_vr_vr_blk)
5267
5268      ! copy diagonal elements of the result into cols of a matrix
5269      CALL dbcsr_multiply("N", "N", 1.0_dp, &
5270                          tmp, opt_k_e_rr, &
5271                          0.0_dp, almo_scf_env%opt_k_denom(ispin), &
5272                          filter_eps=eps_filter)
5273      CALL dbcsr_release(opt_k_e_rr)
5274      CALL dbcsr_release(tmp)
5275
5276      ! form the denominator matrix
5277      CALL dbcsr_add(almo_scf_env%opt_k_denom(ispin), t1, &
5278                     -1.0_dp, 1.0_dp)
5279      CALL dbcsr_release(t1)
5280      CALL dbcsr_scale(almo_scf_env%opt_k_denom(ispin), &
5281                       2.0_dp*spin_factor)
5282
5283      CALL dbcsr_function_of_elements(almo_scf_env%opt_k_denom(ispin), &
5284                                      dbcsr_func_inverse)
5285      CALL dbcsr_filter(almo_scf_env%opt_k_denom(ispin), &
5286                        eps_filter)
5287
5288      CALL timestop(handle)
5289
5290   END SUBROUTINE opt_k_create_preconditioner_blk
5291
5292! **************************************************************************************************
5293!> \brief Applies a block-diagonal preconditioner for the optimization of
5294!>        k matrix (preconditioner matrices must be calculated and stored
5295!>        beforehand)
5296!> \param almo_scf_env ...
5297!> \param step ...
5298!> \param grad ...
5299!> \param ispin ...
5300!> \par History
5301!>       2011.10 created [Rustam Z Khaliullin]
5302!> \author Rustam Z Khaliullin
5303! **************************************************************************************************
5304   SUBROUTINE opt_k_apply_preconditioner_blk(almo_scf_env, step, grad, ispin)
5305
5306      TYPE(almo_scf_env_type), INTENT(INOUT)             :: almo_scf_env
5307      TYPE(dbcsr_type), INTENT(OUT)                      :: step
5308      TYPE(dbcsr_type), INTENT(IN)                       :: grad
5309      INTEGER, INTENT(IN)                                :: ispin
5310
5311      CHARACTER(len=*), PARAMETER :: routineN = 'opt_k_apply_preconditioner_blk', &
5312         routineP = moduleN//':'//routineN
5313
5314      INTEGER                                            :: handle
5315      REAL(KIND=dp)                                      :: eps_filter
5316      TYPE(dbcsr_type)                                   :: tmp_k
5317
5318      CALL timeset(routineN, handle)
5319
5320      eps_filter = almo_scf_env%eps_filter
5321
5322      CALL dbcsr_create(tmp_k, template=almo_scf_env%matrix_k_blk(ispin))
5323
5324      ! transform gradient to the correct "diagonal" basis
5325      CALL dbcsr_multiply("N", "N", 1.0_dp, &
5326                          grad, almo_scf_env%opt_k_t_rr(ispin), &
5327                          0.0_dp, tmp_k, filter_eps=eps_filter)
5328      CALL dbcsr_multiply("T", "N", 1.0_dp, &
5329                          almo_scf_env%opt_k_t_dd(ispin), tmp_k, &
5330                          0.0_dp, step, filter_eps=eps_filter)
5331
5332      ! apply diagonal preconditioner
5333      CALL dbcsr_hadamard_product(step, &
5334                                  almo_scf_env%opt_k_denom(ispin), tmp_k)
5335
5336      ! back-transform the result to the initial basis
5337      CALL dbcsr_multiply("N", "N", 1.0_dp, &
5338                          almo_scf_env%opt_k_t_dd(ispin), tmp_k, &
5339                          0.0_dp, step, filter_eps=eps_filter)
5340      CALL dbcsr_multiply("N", "T", 1.0_dp, &
5341                          step, almo_scf_env%opt_k_t_rr(ispin), &
5342                          0.0_dp, tmp_k, filter_eps=eps_filter)
5343
5344      CALL dbcsr_copy(step, tmp_k)
5345
5346      CALL dbcsr_release(tmp_k)
5347
5348      CALL timestop(handle)
5349
5350   END SUBROUTINE opt_k_apply_preconditioner_blk
5351
5352!! **************************************************************************************************
5353!!> \brief Reduce the number of virtual orbitals by rotating them within
5354!!>        a domain. The rotation is such that minimizes the frobenius norm of
5355!!>        the Fov domain-blocks of the discarded virtuals
5356!!> \par History
5357!!>       2011.08 created [Rustam Z Khaliullin]
5358!!> \author Rustam Z Khaliullin
5359!! **************************************************************************************************
5360!  SUBROUTINE truncate_subspace_v_blk(qs_env,almo_scf_env)
5361!
5362!    TYPE(qs_environment_type), POINTER       :: qs_env
5363!    TYPE(almo_scf_env_type)                  :: almo_scf_env
5364!
5365!    CHARACTER(len=*), PARAMETER :: routineN = 'truncate_subspace_v_blk', &
5366!      routineP = moduleN//':'//routineN
5367!
5368!    INTEGER                                  :: handle, ispin, iblock_row, &
5369!                                                iblock_col, iblock_row_size, &
5370!                                                iblock_col_size, retained_v, &
5371!                                                iteration, line_search_step, &
5372!                                                unit_nr, line_search_step_last
5373!    REAL(KIND=dp)                            :: t1, obj_function, grad_norm,&
5374!                                                c0, b0, a0, obj_function_new,&
5375!                                                t2, alpha, ff1, ff2, step1,&
5376!                                                step2,&
5377!                                                frob_matrix_base,&
5378!                                                frob_matrix
5379!    LOGICAL                                  :: safe_mode, converged, &
5380!                                                prepare_to_exit, failure
5381!    TYPE(cp_logger_type), POINTER            :: logger
5382!    TYPE(dbcsr_type)                      :: Fon, Fov, Fov_filtered, &
5383!                                                temp1_oo, temp2_oo, Fov_original, &
5384!                                                temp0_ov, U_blk_tot, U_blk, &
5385!                                                grad_blk, step_blk, matrix_filter, &
5386!                                                v_full_new,v_full_tmp,&
5387!                                                matrix_sigma_vv_full,&
5388!                                                matrix_sigma_vv_full_sqrt,&
5389!                                                matrix_sigma_vv_full_sqrt_inv,&
5390!                                                matrix_tmp1,&
5391!                                                matrix_tmp2
5392!
5393!    REAL(kind=dp), DIMENSION(:, :), POINTER  :: data_p, p_new_block
5394!    TYPE(dbcsr_iterator_type)                  :: iter
5395!
5396!
5397!REAL(kind=dp), DIMENSION(:), ALLOCATABLE     :: eigenvalues, WORK
5398!REAL(kind=dp), DIMENSION(:,:), ALLOCATABLE   :: data_copy, left_vectors, right_vectors
5399!INTEGER                                      :: LWORK, INFO
5400!TYPE(dbcsr_type)                          :: temp_u_v_full_blk
5401!
5402!    CALL timeset(routineN,handle)
5403!
5404!    safe_mode=.TRUE.
5405!
5406!    ! get a useful output_unit
5407!    logger => cp_get_default_logger()
5408!    IF (logger%para_env%mepos==logger%para_env%source) THEN
5409!       unit_nr=cp_logger_get_default_unit_nr(logger,local=.TRUE.)
5410!    ELSE
5411!       unit_nr=-1
5412!    ENDIF
5413!
5414!    DO ispin=1,almo_scf_env%nspins
5415!
5416!       t1 = m_walltime()
5417!
5418!       !!!!!!!!!!!!!!!!!
5419!       ! 0. Orthogonalize virtuals
5420!       !    Unfortunately, we have to do it in the FULL V subspace :(
5421!
5422!       CALL dbcsr_init(v_full_new)
5423!       CALL dbcsr_create(v_full_new,&
5424!               template=almo_scf_env%matrix_v_full_blk(ispin),&
5425!               matrix_type=dbcsr_type_no_symmetry)
5426!
5427!       ! project the occupied subspace out
5428!       CALL almo_scf_p_out_from_v(almo_scf_env%matrix_v_full_blk(ispin),&
5429!              v_full_new,almo_scf_env%matrix_ov_full(ispin),&
5430!              ispin,almo_scf_env)
5431!
5432!       ! init overlap and its functions
5433!       CALL dbcsr_init(matrix_sigma_vv_full)
5434!       CALL dbcsr_init(matrix_sigma_vv_full_sqrt)
5435!       CALL dbcsr_init(matrix_sigma_vv_full_sqrt_inv)
5436!       CALL dbcsr_create(matrix_sigma_vv_full,&
5437!               template=almo_scf_env%matrix_vv_full_blk(ispin),&
5438!               matrix_type=dbcsr_type_no_symmetry)
5439!       CALL dbcsr_create(matrix_sigma_vv_full_sqrt,&
5440!               template=almo_scf_env%matrix_vv_full_blk(ispin),&
5441!               matrix_type=dbcsr_type_no_symmetry)
5442!       CALL dbcsr_create(matrix_sigma_vv_full_sqrt_inv,&
5443!               template=almo_scf_env%matrix_vv_full_blk(ispin),&
5444!               matrix_type=dbcsr_type_no_symmetry)
5445!
5446!       ! construct VV overlap
5447!       CALL almo_scf_mo_to_sigma(v_full_new,&
5448!               matrix_sigma_vv_full,&
5449!               almo_scf_env%matrix_s(1),&
5450!               almo_scf_env%eps_filter)
5451!
5452!       IF (unit_nr>0) THEN
5453!          WRITE(unit_nr,*) "sqrt and inv(sqrt) of the FULL virtual MO overlap"
5454!       ENDIF
5455!
5456!       ! construct orthogonalization matrices
5457!       CALL matrix_sqrt_Newton_Schulz(matrix_sigma_vv_full_sqrt,&
5458!                                      matrix_sigma_vv_full_sqrt_inv,&
5459!                                      matrix_sigma_vv_full,&
5460!                                      threshold=almo_scf_env%eps_filter,&
5461!                                      order=almo_scf_env%order_lanczos,&
5462!                                      eps_lanczos=almo_scf_env%eps_lanczos,&
5463!                                      max_iter_lanczos=almo_scf_env%max_iter_lanczos)
5464!       IF (safe_mode) THEN
5465!          CALL dbcsr_init(matrix_tmp1)
5466!          CALL dbcsr_create(matrix_tmp1,template=matrix_sigma_vv_full,&
5467!                               matrix_type=dbcsr_type_no_symmetry)
5468!          CALL dbcsr_init(matrix_tmp2)
5469!          CALL dbcsr_create(matrix_tmp2,template=matrix_sigma_vv_full,&
5470!                               matrix_type=dbcsr_type_no_symmetry)
5471!
5472!          CALL dbcsr_multiply("N","N",1.0_dp,matrix_sigma_vv_full_sqrt_inv,&
5473!                                 matrix_sigma_vv_full,&
5474!                                 0.0_dp,matrix_tmp1,filter_eps=almo_scf_env%eps_filter)
5475!          CALL dbcsr_multiply("N","N",1.0_dp,matrix_tmp1,&
5476!                                 matrix_sigma_vv_full_sqrt_inv,&
5477!                                 0.0_dp,matrix_tmp2,filter_eps=almo_scf_env%eps_filter)
5478!
5479!          frob_matrix_base=dbcsr_frobenius_norm(matrix_tmp2)
5480!          CALL dbcsr_add_on_diag(matrix_tmp2,-1.0_dp)
5481!          frob_matrix=dbcsr_frobenius_norm(matrix_tmp2)
5482!          IF (unit_nr>0) THEN
5483!             WRITE(unit_nr,*) "Error for (inv(sqrt(SIGVV))*SIGVV*inv(sqrt(SIGVV))-I)",frob_matrix/frob_matrix_base
5484!          ENDIF
5485!
5486!          CALL dbcsr_release(matrix_tmp1)
5487!          CALL dbcsr_release(matrix_tmp2)
5488!       ENDIF
5489!
5490!       ! discard unnecessary overlap functions
5491!       CALL dbcsr_release(matrix_sigma_vv_full)
5492!       CALL dbcsr_release(matrix_sigma_vv_full_sqrt)
5493!
5494!! this can be re-written because we have (1-P)|v>
5495!
5496!       !!!!!!!!!!!!!!!!!!!
5497!       ! 1. Compute F_ov
5498!       CALL dbcsr_init(Fon)
5499!       CALL dbcsr_create(Fon,&
5500!               template=almo_scf_env%matrix_v_full_blk(ispin))
5501!       CALL dbcsr_init(Fov)
5502!       CALL dbcsr_create(Fov,&
5503!               template=almo_scf_env%matrix_ov_full(ispin))
5504!       CALL dbcsr_init(Fov_filtered)
5505!       CALL dbcsr_create(Fov_filtered,&
5506!               template=almo_scf_env%matrix_ov_full(ispin))
5507!       CALL dbcsr_init(temp1_oo)
5508!       CALL dbcsr_create(temp1_oo,&
5509!               template=almo_scf_env%matrix_sigma(ispin),&
5510!               !matrix_type=dbcsr_type_no_symmetry)
5511!       CALL dbcsr_init(temp2_oo)
5512!       CALL dbcsr_create(temp2_oo,&
5513!               template=almo_scf_env%matrix_sigma(ispin),&
5514!               matrix_type=dbcsr_type_no_symmetry)
5515!
5516!       CALL dbcsr_multiply("T","N",1.0_dp,almo_scf_env%matrix_t_blk(ispin),&
5517!               almo_scf_env%matrix_ks_0deloc(ispin),&
5518!               0.0_dp,Fon,filter_eps=almo_scf_env%eps_filter)
5519!
5520!       CALL dbcsr_multiply("N","N",1.0_dp,Fon,&
5521!               almo_scf_env%matrix_v_full_blk(ispin),&
5522!               0.0_dp,Fov,filter_eps=almo_scf_env%eps_filter)
5523!
5524!       CALL dbcsr_multiply("N","N",1.0_dp,Fon,&
5525!               almo_scf_env%matrix_t_blk(ispin),&
5526!               0.0_dp,temp1_oo,filter_eps=almo_scf_env%eps_filter)
5527!
5528!       CALL dbcsr_multiply("N","N",1.0_dp,temp1_oo,&
5529!               almo_scf_env%matrix_sigma_inv(ispin),&
5530!               0.0_dp,temp2_oo,filter_eps=almo_scf_env%eps_filter)
5531!       CALL dbcsr_release(temp1_oo)
5532!
5533!       CALL dbcsr_multiply("T","N",1.0_dp,almo_scf_env%matrix_t_blk(ispin),&
5534!               almo_scf_env%matrix_s(1),&
5535!               0.0_dp,Fon,filter_eps=almo_scf_env%eps_filter)
5536!
5537!       CALL dbcsr_multiply("N","N",1.0_dp,Fon,&
5538!               almo_scf_env%matrix_v_full_blk(ispin),&
5539!               0.0_dp,Fov_filtered,filter_eps=almo_scf_env%eps_filter)
5540!       CALL dbcsr_release(Fon)
5541!
5542!       CALL dbcsr_multiply("N","N",-1.0_dp,temp2_oo,&
5543!               Fov_filtered,&
5544!               1.0_dp,Fov,filter_eps=almo_scf_env%eps_filter)
5545!       CALL dbcsr_release(temp2_oo)
5546!
5547!       CALL dbcsr_multiply("N","N",1.0_dp,almo_scf_env%matrix_sigma_inv(ispin),&
5548!               Fov,0.0_dp,Fov_filtered,filter_eps=almo_scf_env%eps_filter)
5549!
5550!       CALL dbcsr_multiply("N","N",1.0_dp,Fov_filtered,&
5551!               matrix_sigma_vv_full_sqrt_inv,&
5552!               0.0_dp,Fov,filter_eps=almo_scf_env%eps_filter)
5553!       !CALL dbcsr_copy(Fov,Fov_filtered)
5554!CALL dbcsr_print(Fov)
5555!
5556!       IF (safe_mode) THEN
5557!          CALL dbcsr_init(Fov_original)
5558!          CALL dbcsr_create(Fov_original,template=Fov)
5559!          CALL dbcsr_copy(Fov_original,Fov)
5560!       ENDIF
5561!
5562!!! remove diagonal blocks
5563!!CALL dbcsr_iterator_start(iter,Fov)
5564!!DO WHILE (dbcsr_iterator_blocks_left(iter))
5565!!
5566!!   CALL dbcsr_iterator_next_block(iter,iblock_row,iblock_col,data_p,&
5567!!           row_size=iblock_row_size,col_size=iblock_col_size)
5568!!
5569!!   IF (iblock_row.eq.iblock_col) data_p(:,:)=0.0_dp
5570!!
5571!!ENDDO
5572!!CALL dbcsr_iterator_stop(iter)
5573!!CALL dbcsr_finalize(Fov)
5574!
5575!!! perform svd of blocks
5576!!!!! THIS ROUTINE WORKS ONLY ON ONE CPU AND ONLY FOR 2 MOLECULES !!!
5577!!CALL dbcsr_init(temp_u_v_full_blk)
5578!!CALL dbcsr_create(temp_u_v_full_blk,&
5579!!        template=almo_scf_env%matrix_vv_full_blk(ispin),&
5580!!        matrix_type=dbcsr_type_no_symmetry)
5581!!
5582!!CALL dbcsr_work_create(temp_u_v_full_blk,&
5583!!        work_mutable=.TRUE.)
5584!!CALL dbcsr_iterator_start(iter,Fov)
5585!!DO WHILE (dbcsr_iterator_blocks_left(iter))
5586!!
5587!!   CALL dbcsr_iterator_next_block(iter,iblock_row,iblock_col,data_p,&
5588!!           row_size=iblock_row_size,col_size=iblock_col_size)
5589!!
5590!!   IF (iblock_row.ne.iblock_col) THEN
5591!!
5592!!      ! Prepare data
5593!!      allocate(eigenvalues(min(iblock_row_size,iblock_col_size)))
5594!!      allocate(data_copy(iblock_row_size,iblock_col_size))
5595!!      allocate(left_vectors(iblock_row_size,iblock_row_size))
5596!!      allocate(right_vectors(iblock_col_size,iblock_col_size))
5597!!      data_copy(:,:)=data_p(:,:)
5598!!
5599!!      ! Query the optimal workspace for dgesvd
5600!!      LWORK = -1
5601!!      allocate(WORK(MAX(1,LWORK)))
5602!!      CALL DGESVD('N','A',iblock_row_size,iblock_col_size,data_copy,&
5603!!              iblock_row_size,eigenvalues,left_vectors,iblock_row_size,&
5604!!              right_vectors,iblock_col_size,WORK,LWORK,INFO)
5605!!      LWORK = INT(WORK( 1 ))
5606!!      deallocate(WORK)
5607!!
5608!!      ! Allocate the workspace and perform svd
5609!!      allocate(WORK(MAX(1,LWORK)))
5610!!      CALL DGESVD('N','A',iblock_row_size,iblock_col_size,data_copy,&
5611!!              iblock_row_size,eigenvalues,left_vectors,iblock_row_size,&
5612!!              right_vectors,iblock_col_size,WORK,LWORK,INFO)
5613!!      deallocate(WORK)
5614!!      IF( INFO.NE.0 ) THEN
5615!!         CPABORT("DGESVD failed")
5616!!      END IF
5617!!
5618!!      ! copy right singular vectors into a unitary matrix
5619!!      NULLIFY (p_new_block)
5620!!      CALL dbcsr_reserve_block2d(temp_u_v_full_blk,iblock_col,iblock_col,p_new_block)
5621!!      CPASSERT(ASSOCIATED(p_new_block))
5622!!      p_new_block(:,:) = right_vectors(:,:)
5623!!
5624!!      deallocate(eigenvalues)
5625!!      deallocate(data_copy)
5626!!      deallocate(left_vectors)
5627!!      deallocate(right_vectors)
5628!!
5629!!   ENDIF
5630!!ENDDO
5631!!CALL dbcsr_iterator_stop(iter)
5632!!CALL dbcsr_finalize(temp_u_v_full_blk)
5633!!!CALL dbcsr_print(temp_u_v_full_blk)
5634!!CALL dbcsr_multiply("N","T",1.0_dp,Fov,temp_u_v_full_blk,&
5635!!        0.0_dp,Fov_filtered,filter_eps=almo_scf_env%eps_filter)
5636!!
5637!!CALL dbcsr_copy(Fov,Fov_filtered)
5638!!CALL dbcsr_print(Fov)
5639!
5640!       !!!!!!!!!!!!!!!!!!!
5641!       ! 2. Initialize variables
5642!
5643!       ! temp space
5644!       CALL dbcsr_init(temp0_ov)
5645!       CALL dbcsr_create(temp0_ov,&
5646!               template=almo_scf_env%matrix_ov_full(ispin))
5647!
5648!       ! current unitary matrix
5649!       CALL dbcsr_init(U_blk)
5650!       CALL dbcsr_create(U_blk,&
5651!               template=almo_scf_env%matrix_vv_full_blk(ispin),&
5652!               matrix_type=dbcsr_type_no_symmetry)
5653!
5654!       ! unitary matrix accumulator
5655!       CALL dbcsr_init(U_blk_tot)
5656!       CALL dbcsr_create(U_blk_tot,&
5657!               template=almo_scf_env%matrix_vv_full_blk(ispin),&
5658!               matrix_type=dbcsr_type_no_symmetry)
5659!       CALL dbcsr_add_on_diag(U_blk_tot,1.0_dp)
5660!
5661!!CALL dbcsr_add_on_diag(U_blk,1.0_dp)
5662!!CALL dbcsr_multiply("N","T",1.0_dp,U_blk,temp_u_v_full_blk,&
5663!!        0.0_dp,U_blk_tot,filter_eps=almo_scf_env%eps_filter)
5664!!
5665!!CALL dbcsr_release(temp_u_v_full_blk)
5666!
5667!       ! init gradient
5668!       CALL dbcsr_init(grad_blk)
5669!       CALL dbcsr_create(grad_blk,&
5670!               template=almo_scf_env%matrix_vv_full_blk(ispin),&
5671!               matrix_type=dbcsr_type_no_symmetry)
5672!
5673!       ! init step matrix
5674!       CALL dbcsr_init(step_blk)
5675!       CALL dbcsr_create(step_blk,&
5676!               template=almo_scf_env%matrix_vv_full_blk(ispin),&
5677!               matrix_type=dbcsr_type_no_symmetry)
5678!
5679!       ! "retain discarded" filter (0.0 - retain, 1.0 - discard)
5680!       CALL dbcsr_init(matrix_filter)
5681!       CALL dbcsr_create(matrix_filter,&
5682!               template=almo_scf_env%matrix_ov_full(ispin))
5683!       ! copy Fov into the filter matrix temporarily
5684!       ! so we know which blocks contain significant elements
5685!       CALL dbcsr_copy(matrix_filter,Fov)
5686!
5687!       ! fill out filter elements block-by-block
5688!       CALL dbcsr_iterator_start(iter,matrix_filter)
5689!       DO WHILE (dbcsr_iterator_blocks_left(iter))
5690!
5691!          CALL dbcsr_iterator_next_block(iter,iblock_row,iblock_col,data_p,&
5692!                  row_size=iblock_row_size,col_size=iblock_col_size)
5693!
5694!          retained_v=almo_scf_env%nvirt_of_domain(iblock_col,ispin)
5695!
5696!          data_p(:,1:retained_v)=0.0_dp
5697!          data_p(:,(retained_v+1):iblock_col_size)=1.0_dp
5698!
5699!       ENDDO
5700!       CALL dbcsr_iterator_stop(iter)
5701!       CALL dbcsr_finalize(matrix_filter)
5702!
5703!       ! apply the filter
5704!       CALL dbcsr_hadamard_product(Fov,matrix_filter,Fov_filtered)
5705!
5706!       !!!!!!!!!!!!!!!!!!!!!
5707!       ! 3. start iterative minimization of the elements to be discarded
5708!       iteration=0
5709!       converged=.FALSE.
5710!       prepare_to_exit=.FALSE.
5711!       DO
5712!
5713!          iteration=iteration+1
5714!
5715!          !!!!!!!!!!!!!!!!!!!!!!!!!
5716!          ! 4. compute the gradient
5717!          CALL dbcsr_set(grad_blk,0.0_dp)
5718!          ! create the diagonal blocks only
5719!          CALL dbcsr_add_on_diag(grad_blk,1.0_dp)
5720!
5721!          CALL dbcsr_multiply("T","N",2.0_dp,Fov_filtered,Fov,&
5722!                  0.0_dp,grad_blk,retain_sparsity=.TRUE.,&
5723!                  filter_eps=almo_scf_env%eps_filter)
5724!          CALL dbcsr_multiply("T","N",-2.0_dp,Fov,Fov_filtered,&
5725!                  1.0_dp,grad_blk,retain_sparsity=.TRUE.,&
5726!                  filter_eps=almo_scf_env%eps_filter)
5727!
5728!          !!!!!!!!!!!!!!!!!!!!!!!
5729!          ! 5. check convergence
5730!          obj_function = 0.5_dp*(dbcsr_frobenius_norm(Fov_filtered))**2
5731!          grad_norm = dbcsr_frobenius_norm(grad_blk)
5732!          converged=(grad_norm.lt.almo_scf_env%truncate_v_eps_convergence)
5733!          IF (converged.OR.(iteration.ge.almo_scf_env%truncate_v_max_iter)) THEN
5734!             prepare_to_exit=.TRUE.
5735!          ENDIF
5736!
5737!          IF (.NOT.prepare_to_exit) THEN
5738!
5739!             !!!!!!!!!!!!!!!!!!!!!!!
5740!             ! 6. perform steps in the direction of the gradient
5741!             !    a. first, perform a trial step to "see" the parameters
5742!             !       of the parabola along the gradient:
5743!             !       a0 * x^2 + b0 * x + c0
5744!             !    b. then perform the step to the bottom of the parabola
5745!
5746!             ! get c0
5747!             c0 = obj_function
5748!             ! get b0 <= d_f/d_alpha along grad
5749!             !!!CALL dbcsr_multiply("N","N",4.0_dp,Fov,grad_blk,&
5750!             !!!        0.0_dp,temp0_ov,&
5751!             !!!        filter_eps=almo_scf_env%eps_filter)
5752!             !!!CALL dbcsr_dot(Fov_filtered,temp0_ov,b0)
5753!
5754!             alpha=almo_scf_env%truncate_v_trial_step_size
5755!
5756!             line_search_step_last=3
5757!             DO line_search_step=1,line_search_step_last
5758!                CALL dbcsr_copy(step_blk,grad_blk)
5759!                CALL dbcsr_scale(step_blk,-1.0_dp*alpha)
5760!                CALL generator_to_unitary(step_blk,U_blk,&
5761!                        almo_scf_env%eps_filter)
5762!                CALL dbcsr_multiply("N","N",1.0_dp,Fov,U_blk,0.0_dp,temp0_ov,&
5763!                        filter_eps=almo_scf_env%eps_filter)
5764!                CALL dbcsr_hadamard_product(temp0_ov,matrix_filter,&
5765!                        Fov_filtered)
5766!
5767!                obj_function_new = 0.5_dp*(dbcsr_frobenius_norm(Fov_filtered))**2
5768!                IF (line_search_step.eq.1) THEN
5769!                   ff1 = obj_function_new
5770!                   step1 = alpha
5771!                ELSE IF (line_search_step.eq.2) THEN
5772!                   ff2 = obj_function_new
5773!                   step2 = alpha
5774!                ENDIF
5775!
5776!                IF (unit_nr>0.AND.(line_search_step.ne.line_search_step_last)) THEN
5777!                   WRITE(unit_nr,'(T6,A,1X,I3,1X,F10.3,E12.3,E12.3,E12.3)') &
5778!                         "JOINT_SVD_lin",&
5779!                         iteration,&
5780!                         alpha,&
5781!                         obj_function,&
5782!                         obj_function_new,&
5783!                         obj_function_new-obj_function
5784!                ENDIF
5785!
5786!                IF (line_search_step.eq.1) THEN
5787!                   alpha=2.0_dp*alpha
5788!                ENDIF
5789!                IF (line_search_step.eq.2) THEN
5790!                   a0 = ((ff1-c0)/step1 - (ff2-c0)/step2) / (step1 - step2)
5791!                   b0 = (ff1-c0)/step1 - a0*step1
5792!                   ! step size in to the bottom of "the parabola"
5793!                   alpha=-b0/(2.0_dp*a0)
5794!                   ! update the default step size
5795!                   almo_scf_env%truncate_v_trial_step_size=alpha
5796!                ENDIF
5797!                !!!IF (line_search_step.eq.1) THEN
5798!                !!!   a0 = (obj_function_new - b0 * alpha - c0) / (alpha*alpha)
5799!                !!!   ! step size in to the bottom of "the parabola"
5800!                !!!   alpha=-b0/(2.0_dp*a0)
5801!                !!!   !IF (alpha.gt.10.0_dp) alpha=10.0_dp
5802!                !!!ENDIF
5803!
5804!             ENDDO
5805!
5806!             ! update Fov and U_blk_tot (use grad_blk as tmp storage)
5807!             CALL dbcsr_copy(Fov,temp0_ov)
5808!             CALL dbcsr_multiply("N","N",1.0_dp,U_blk_tot,U_blk,&
5809!                     0.0_dp,grad_blk,&
5810!                     filter_eps=almo_scf_env%eps_filter)
5811!             CALL dbcsr_copy(U_blk_tot,grad_blk)
5812!
5813!          ENDIF
5814!
5815!          t2 = m_walltime()
5816!
5817!          IF (unit_nr>0) THEN
5818!             WRITE(unit_nr,'(T6,A,1X,I3,1X,F10.3,E12.3,E12.3,E12.3,E12.3,F10.3)') &
5819!                   "JOINT_SVD_itr",&
5820!                   iteration,&
5821!                   alpha,&
5822!                   obj_function,&
5823!                   obj_function_new,&
5824!                   obj_function_new-obj_function,&
5825!                   grad_norm,&
5826!                   t2-t1
5827!                   !(flop1+flop2)/(1.0E6_dp*(t2-t1))
5828!             CALL m_flush(unit_nr)
5829!          ENDIF
5830!
5831!          t1 = m_walltime()
5832!
5833!          IF (prepare_to_exit) EXIT
5834!
5835!       ENDDO ! stop iterations
5836!
5837!       IF (safe_mode) THEN
5838!          CALL dbcsr_multiply("N","N",1.0_dp,Fov_original,&
5839!                  U_blk_tot,0.0_dp,temp0_ov,&
5840!                  filter_eps=almo_scf_env%eps_filter)
5841!CALL dbcsr_print(temp0_ov)
5842!          CALL dbcsr_hadamard_product(temp0_ov,matrix_filter,&
5843!                  Fov_filtered)
5844!          obj_function_new = 0.5_dp*(dbcsr_frobenius_norm(Fov_filtered))**2
5845!
5846!          IF (unit_nr>0) THEN
5847!             WRITE(unit_nr,'(T6,A,1X,E12.3)') &
5848!                   "SANITY CHECK:",&
5849!                   obj_function_new
5850!             CALL m_flush(unit_nr)
5851!          ENDIF
5852!
5853!          CALL dbcsr_release(Fov_original)
5854!       ENDIF
5855!
5856!       CALL dbcsr_release(temp0_ov)
5857!       CALL dbcsr_release(U_blk)
5858!       CALL dbcsr_release(grad_blk)
5859!       CALL dbcsr_release(step_blk)
5860!       CALL dbcsr_release(matrix_filter)
5861!       CALL dbcsr_release(Fov)
5862!       CALL dbcsr_release(Fov_filtered)
5863!
5864!       ! compute rotated virtual orbitals
5865!       CALL dbcsr_init(v_full_tmp)
5866!       CALL dbcsr_create(v_full_tmp,&
5867!               template=almo_scf_env%matrix_v_full_blk(ispin),&
5868!               matrix_type=dbcsr_type_no_symmetry)
5869!       CALL dbcsr_multiply("N","N",1.0_dp,&
5870!               v_full_new,&
5871!               matrix_sigma_vv_full_sqrt_inv,0.0_dp,v_full_tmp,&
5872!               filter_eps=almo_scf_env%eps_filter)
5873!       CALL dbcsr_multiply("N","N",1.0_dp,&
5874!               v_full_tmp,&
5875!               U_blk_tot,0.0_dp,v_full_new,&
5876!               filter_eps=almo_scf_env%eps_filter)
5877!
5878!       CALL dbcsr_release(matrix_sigma_vv_full_sqrt_inv)
5879!       CALL dbcsr_release(v_full_tmp)
5880!       CALL dbcsr_release(U_blk_tot)
5881!
5882!!!!! orthogonalized virtuals are not blocked
5883!       ! copy new virtuals into the truncated matrix
5884!       !CALL dbcsr_work_create(almo_scf_env%matrix_v_blk(ispin),&
5885!       CALL dbcsr_work_create(almo_scf_env%matrix_v(ispin),&
5886!               work_mutable=.TRUE.)
5887!       CALL dbcsr_iterator_start(iter,v_full_new)
5888!       DO WHILE (dbcsr_iterator_blocks_left(iter))
5889!
5890!          CALL dbcsr_iterator_next_block(iter,iblock_row,iblock_col,data_p,&
5891!                  row_size=iblock_row_size,col_size=iblock_col_size)
5892!
5893!          retained_v=almo_scf_env%nvirt_of_domain(iblock_col,ispin)
5894!
5895!          NULLIFY (p_new_block)
5896!          !CALL dbcsr_reserve_block2d(almo_scf_env%matrix_v_blk(ispin),&
5897!          CALL dbcsr_reserve_block2d(almo_scf_env%matrix_v(ispin),&
5898!                  iblock_row,iblock_col,p_new_block)
5899!          CPASSERT(ASSOCIATED(p_new_block))
5900!          CPASSERT(retained_v.gt.0)
5901!          p_new_block(:,:) = data_p(:,1:retained_v)
5902!
5903!       ENDDO ! iterator
5904!       CALL dbcsr_iterator_stop(iter)
5905!       !!CALL dbcsr_finalize(almo_scf_env%matrix_v_blk(ispin))
5906!       CALL dbcsr_finalize(almo_scf_env%matrix_v(ispin))
5907!
5908!       CALL dbcsr_release(v_full_new)
5909!
5910!    ENDDO ! ispin
5911!
5912!    CALL timestop(handle)
5913!
5914!  END SUBROUTINE truncate_subspace_v_blk
5915
5916! *****************************************************************************
5917!> \brief Compute the gradient wrt the main variable (e.g. Theta, X)
5918!> \param m_grad_out ...
5919!> \param m_ks ...
5920!> \param m_s ...
5921!> \param m_t ...
5922!> \param m_t0 ...
5923!> \param m_siginv ...
5924!> \param m_quench_t ...
5925!> \param m_FTsiginv ...
5926!> \param m_siginvTFTsiginv ...
5927!> \param m_ST ...
5928!> \param m_STsiginv0 ...
5929!> \param m_theta ...
5930!> \param domain_s_inv ...
5931!> \param domain_r_down ...
5932!> \param cpu_of_domain ...
5933!> \param domain_map ...
5934!> \param assume_t0_q0x ...
5935!> \param optimize_theta ...
5936!> \param normalize_orbitals ...
5937!> \param penalty_occ_vol ...
5938!> \param penalty_occ_local ...
5939!> \param penalty_occ_vol_prefactor ...
5940!> \param envelope_amplitude ...
5941!> \param eps_filter ...
5942!> \param spin_factor ...
5943!> \param special_case ...
5944!> \param m_sig_sqrti_ii ...
5945!> \param op_sm_set ...
5946!> \param weights ...
5947!> \param energy_coeff ...
5948!> \param localiz_coeff ...
5949!> \par History
5950!>       2015.03 created [Rustam Z Khaliullin]
5951!> \author Rustam Z Khaliullin
5952! **************************************************************************************************
5953   SUBROUTINE compute_gradient(m_grad_out, m_ks, m_s, m_t, m_t0, &
5954                               m_siginv, m_quench_t, m_FTsiginv, m_siginvTFTsiginv, m_ST, m_STsiginv0, &
5955                               m_theta, domain_s_inv, domain_r_down, &
5956                               cpu_of_domain, domain_map, assume_t0_q0x, optimize_theta, &
5957                               normalize_orbitals, penalty_occ_vol, penalty_occ_local, &
5958                               penalty_occ_vol_prefactor, envelope_amplitude, eps_filter, spin_factor, &
5959                               special_case, m_sig_sqrti_ii, op_sm_set, weights, energy_coeff, &
5960                               localiz_coeff)
5961
5962      TYPE(dbcsr_type), INTENT(INOUT)                    :: m_grad_out
5963      TYPE(dbcsr_type), INTENT(IN)                       :: m_ks, m_s, m_t, m_t0, m_siginv, &
5964                                                            m_quench_t, m_FTsiginv, &
5965                                                            m_siginvTFTsiginv, m_ST, m_STsiginv0, &
5966                                                            m_theta
5967      TYPE(domain_submatrix_type), DIMENSION(:), &
5968         INTENT(IN)                                      :: domain_s_inv, domain_r_down
5969      INTEGER, DIMENSION(:), INTENT(IN)                  :: cpu_of_domain
5970      TYPE(domain_map_type), INTENT(IN)                  :: domain_map
5971      LOGICAL, INTENT(IN)                                :: assume_t0_q0x, optimize_theta, &
5972                                                            normalize_orbitals, penalty_occ_vol
5973      LOGICAL, INTENT(IN), OPTIONAL                      :: penalty_occ_local
5974      REAL(KIND=dp), INTENT(IN)                          :: penalty_occ_vol_prefactor, &
5975                                                            envelope_amplitude, eps_filter, &
5976                                                            spin_factor
5977      INTEGER, INTENT(IN)                                :: special_case
5978      TYPE(dbcsr_type), INTENT(IN), OPTIONAL             :: m_sig_sqrti_ii
5979      TYPE(dbcsr_p_type), DIMENSION(:, :), OPTIONAL, &
5980         POINTER                                         :: op_sm_set
5981      REAL(KIND=dp), DIMENSION(:), INTENT(IN), OPTIONAL  :: weights
5982      REAL(KIND=dp), INTENT(IN), OPTIONAL                :: energy_coeff, localiz_coeff
5983
5984      CHARACTER(len=*), PARAMETER :: routineN = 'compute_gradient', &
5985         routineP = moduleN//':'//routineN
5986
5987      INTEGER                                            :: dim0, handle, idim0, nao, reim
5988      LOGICAL                                            :: my_penalty_local
5989      REAL(KIND=dp)                                      :: coeff, energy_g_norm, my_energy_coeff, &
5990                                                            my_localiz_coeff, &
5991                                                            penalty_occ_vol_g_norm
5992      REAL(KIND=dp), ALLOCATABLE, DIMENSION(:)           :: tg_diagonal
5993      TYPE(dbcsr_type)                                   :: m_tmp_no_1, m_tmp_no_2, m_tmp_no_3, &
5994                                                            m_tmp_oo_1, m_tmp_oo_2, temp1, temp2, &
5995                                                            tempNOcc1, tempOccOcc1
5996
5997      CALL timeset(routineN, handle)
5998
5999      IF (normalize_orbitals .AND. (.NOT. PRESENT(m_sig_sqrti_ii))) THEN
6000         CPABORT("Normalization matrix is required")
6001      ENDIF
6002
6003      my_penalty_local = .FALSE.
6004      my_localiz_coeff = 1.0_dp
6005      my_energy_coeff = 0.0_dp
6006      IF (PRESENT(localiz_coeff)) THEN
6007         my_localiz_coeff = localiz_coeff
6008      ENDIF
6009      IF (PRESENT(energy_coeff)) THEN
6010         my_energy_coeff = energy_coeff
6011      ENDIF
6012      IF (PRESENT(penalty_occ_local)) THEN
6013         my_penalty_local = penalty_occ_local
6014      ENDIF
6015
6016      ! use this otherways unused variables
6017      CALL dbcsr_get_info(matrix=m_ks, nfullrows_total=nao)
6018      CALL dbcsr_get_info(matrix=m_s, nfullrows_total=nao)
6019      CALL dbcsr_get_info(matrix=m_t, nfullrows_total=nao)
6020
6021      CALL dbcsr_create(m_tmp_no_1, &
6022                        template=m_quench_t, &
6023                        matrix_type=dbcsr_type_no_symmetry)
6024      CALL dbcsr_create(m_tmp_no_2, &
6025                        template=m_quench_t, &
6026                        matrix_type=dbcsr_type_no_symmetry)
6027      CALL dbcsr_create(m_tmp_no_3, &
6028                        template=m_quench_t, &
6029                        matrix_type=dbcsr_type_no_symmetry)
6030      CALL dbcsr_create(m_tmp_oo_1, &
6031                        template=m_siginv, &
6032                        matrix_type=dbcsr_type_no_symmetry)
6033      CALL dbcsr_create(m_tmp_oo_2, &
6034                        template=m_siginv, &
6035                        matrix_type=dbcsr_type_no_symmetry)
6036      CALL dbcsr_create(tempNOcc1, &
6037                        template=m_t, &
6038                        matrix_type=dbcsr_type_no_symmetry)
6039      CALL dbcsr_create(tempOccOcc1, &
6040                        template=m_siginv, &
6041                        matrix_type=dbcsr_type_no_symmetry)
6042      CALL dbcsr_create(temp1, &
6043                        template=m_t, &
6044                        matrix_type=dbcsr_type_no_symmetry)
6045      CALL dbcsr_create(temp2, &
6046                        template=m_t, &
6047                        matrix_type=dbcsr_type_no_symmetry)
6048
6049      ! do d_E/d_T first
6050      !IF (.NOT.PRESENT(m_FTsiginv)) THEN
6051      !   CALL dbcsr_multiply("N","N",1.0_dp,&
6052      !           m_ks,&
6053      !           m_t,&
6054      !           0.0_dp,m_tmp_no_1,&
6055      !           filter_eps=eps_filter)
6056      !   CALL dbcsr_multiply("N","N",1.0_dp,&
6057      !           m_tmp_no_1,&
6058      !           m_siginv,&
6059      !           0.0_dp,m_FTsiginv,&
6060      !           filter_eps=eps_filter)
6061      !ENDIF
6062
6063      CALL dbcsr_copy(m_tmp_no_2, m_quench_t)
6064      CALL dbcsr_copy(m_tmp_no_2, m_FTsiginv, keep_sparsity=.TRUE.)
6065
6066      !IF (.NOT.PRESENT(m_siginvTFTsiginv)) THEN
6067      !   CALL dbcsr_multiply("T","N",1.0_dp,&
6068      !           m_t,&
6069      !           m_FTsiginv,&
6070      !           0.0_dp,m_tmp_oo_1,&
6071      !           filter_eps=eps_filter)
6072      !   CALL dbcsr_multiply("N","N",1.0_dp,&
6073      !           m_siginv,&
6074      !           m_tmp_oo_1,&
6075      !           0.0_dp,m_siginvTFTsiginv,&
6076      !           filter_eps=eps_filter)
6077      !ENDIF
6078
6079      !IF (.NOT.PRESENT(m_ST)) THEN
6080      !   CALL dbcsr_multiply("N","N",1.0_dp,&
6081      !           m_s,&
6082      !           m_t,&
6083      !           0.0_dp,m_ST,&
6084      !           filter_eps=eps_filter)
6085      !ENDIF
6086
6087      CALL dbcsr_multiply("N", "N", -1.0_dp, &
6088                          m_ST, &
6089                          m_siginvTFTsiginv, &
6090                          1.0_dp, m_tmp_no_2, &
6091                          retain_sparsity=.TRUE.)
6092      CALL dbcsr_scale(m_tmp_no_2, 2.0_dp*spin_factor)
6093
6094      ! LzL Add gradient for Localization
6095      IF (my_penalty_local) THEN
6096
6097         CALL dbcsr_set(temp2, 0.0_dp) ! accumulate the localization gradient here
6098
6099         DO idim0 = 1, SIZE(op_sm_set, 2) ! this loop is over miller ind
6100
6101            DO reim = 1, SIZE(op_sm_set, 1) ! this loop is over Re/Im
6102
6103               CALL dbcsr_multiply("N", "N", 1.0_dp, &
6104                                   op_sm_set(reim, idim0)%matrix, &
6105                                   m_t, &
6106                                   0.0_dp, tempNOcc1, &
6107                                   filter_eps=eps_filter)
6108
6109               ! warning - save time by computing only the diagonal elements
6110               CALL dbcsr_multiply("T", "N", 1.0_dp, &
6111                                   m_t, &
6112                                   tempNOcc1, &
6113                                   0.0_dp, tempOccOcc1, &
6114                                   filter_eps=eps_filter)
6115
6116               CALL dbcsr_get_info(tempOccOcc1, nfullrows_total=dim0)
6117               ALLOCATE (tg_diagonal(dim0))
6118               CALL dbcsr_get_diag(tempOccOcc1, tg_diagonal)
6119               CALL dbcsr_set(tempOccOcc1, 0.0_dp)
6120               CALL dbcsr_set_diag(tempOccOcc1, tg_diagonal)
6121               DEALLOCATE (tg_diagonal)
6122
6123               CALL dbcsr_multiply("N", "N", 1.0_dp, &
6124                                   tempNOcc1, &
6125                                   tempOccOcc1, &
6126                                   0.0_dp, temp1, &
6127                                   filter_eps=eps_filter)
6128
6129            ENDDO
6130
6131            SELECT CASE (2) ! allows for selection of different spread functionals
6132            CASE (1) ! functional =  -W_I * log( |z_I|^2 )
6133               CPABORT("Localization function is not implemented")
6134               !coeff = -(weights(idim0)/z2(ielem))
6135            CASE (2) ! functional =  W_I * ( 1 - |z_I|^2 )
6136               coeff = -weights(idim0)
6137            CASE (3) ! functional =  W_I * ( 1 - |z_I| )
6138               CPABORT("Localization function is not implemented")
6139               !coeff = -(weights(idim0)/(2.0_dp*z2(ielem)))
6140            END SELECT
6141            CALL dbcsr_add(temp2, temp1, 1.0_dp, coeff)
6142            !CALL dbcsr_add(grad_loc, temp1, 1.0_dp, 1.0_dp)
6143
6144         ENDDO ! end loop over idim0
6145         CALL dbcsr_add(m_tmp_no_2, temp2, my_energy_coeff, my_localiz_coeff*4.0_dp)
6146      ENDIF
6147
6148      ! add penalty on the occupied volume: det(sigma)
6149      IF (penalty_occ_vol) THEN
6150         !RZK-warning CALL dbcsr_multiply("N","N",&
6151         !RZK-warning         penalty_occ_vol_prefactor,&
6152         !RZK-warning         m_ST,&
6153         !RZK-warning         m_siginv,&
6154         !RZK-warning         1.0_dp,m_tmp_no_2,&
6155         !RZK-warning         retain_sparsity=.TRUE.,&
6156         !RZK-warning         )
6157         CALL dbcsr_copy(m_tmp_no_1, m_quench_t)
6158         CALL dbcsr_multiply("N", "N", &
6159                             penalty_occ_vol_prefactor, &
6160                             m_ST, &
6161                             m_siginv, &
6162                             0.0_dp, m_tmp_no_1, &
6163                             retain_sparsity=.TRUE.)
6164         ! this norm does not contain the normalization factors
6165         CALL dbcsr_norm(m_tmp_no_1, dbcsr_norm_maxabsnorm, &
6166                         norm_scalar=penalty_occ_vol_g_norm)
6167         CALL dbcsr_norm(m_tmp_no_2, dbcsr_norm_maxabsnorm, &
6168                         norm_scalar=energy_g_norm)
6169         !WRITE (*, "(A30,2F20.10)") "Energy/penalty g norms (no norm): ", energy_g_norm, penalty_occ_vol_g_norm
6170         CALL dbcsr_add(m_tmp_no_2, m_tmp_no_1, 1.0_dp, 1.0_dp)
6171      ENDIF
6172
6173      ! take into account the factor from the normalization constraint
6174      IF (normalize_orbitals) THEN
6175
6176         ! G = ( G - ST.[tr(T).G]_ii ) . [sig_sqrti]_ii
6177         ! this expression can be simplified to
6178         ! G = ( G - c0*ST ) . [sig_sqrti]_ii
6179         ! where c0 = penalty_occ_vol_prefactor
6180         ! This is because tr(T).G_Energy = 0 and
6181         !                 tr(T).G_Penalty = c0*I
6182
6183         !! faster way to take the norm into account (tested for vol penalty olny)
6184         !!CALL dbcsr_copy(m_tmp_no_1, m_quench_t)
6185         !!CALL dbcsr_copy(m_tmp_no_1, m_ST, keep_sparsity=.TRUE.)
6186         !!CALL dbcsr_add(m_tmp_no_2, m_tmp_no_1, 1.0_dp, -penalty_occ_vol_prefactor)
6187         !!CALL dbcsr_copy(m_tmp_no_1, m_quench_t)
6188         !!CALL dbcsr_multiply("N", "N", 1.0_dp, &
6189         !!                    m_tmp_no_2, &
6190         !!                    m_sig_sqrti_ii, &
6191         !!                    0.0_dp, m_tmp_no_1, &
6192         !!                    retain_sparsity=.TRUE.)
6193
6194         ! slower way of taking the norm into account
6195         CALL dbcsr_copy(m_tmp_no_1, m_quench_t)
6196         CALL dbcsr_multiply("N", "N", 1.0_dp, &
6197                             m_tmp_no_2, &
6198                             m_sig_sqrti_ii, &
6199                             0.0_dp, m_tmp_no_1, &
6200                             retain_sparsity=.TRUE.)
6201
6202         ! get [tr(T).G]_ii
6203         CALL dbcsr_copy(m_tmp_oo_1, m_sig_sqrti_ii)
6204         CALL dbcsr_multiply("T", "N", 1.0_dp, &
6205                             m_t, &
6206                             m_tmp_no_2, &
6207                             0.0_dp, m_tmp_oo_1, &
6208                             retain_sparsity=.TRUE.)
6209
6210         CALL dbcsr_get_info(m_sig_sqrti_ii, nfullrows_total=dim0)
6211         ALLOCATE (tg_diagonal(dim0))
6212         CALL dbcsr_get_diag(m_tmp_oo_1, tg_diagonal)
6213         CALL dbcsr_set(m_tmp_oo_1, 0.0_dp)
6214         CALL dbcsr_set_diag(m_tmp_oo_1, tg_diagonal)
6215         DEALLOCATE (tg_diagonal)
6216
6217         CALL dbcsr_multiply("N", "N", 1.0_dp, &
6218                             m_sig_sqrti_ii, &
6219                             m_tmp_oo_1, &
6220                             0.0_dp, m_tmp_oo_2, &
6221                             filter_eps=eps_filter)
6222         CALL dbcsr_multiply("N", "N", -1.0_dp, &
6223                             m_ST, &
6224                             m_tmp_oo_2, &
6225                             1.0_dp, m_tmp_no_1, &
6226                             retain_sparsity=.TRUE.)
6227
6228      ELSE
6229
6230         CALL dbcsr_copy(m_tmp_no_1, m_tmp_no_2)
6231
6232      ENDIF ! normalize_orbitals
6233
6234      ! project out the occupied space from the gradient
6235      IF (assume_t0_q0x) THEN
6236         IF (special_case .EQ. xalmo_case_fully_deloc) THEN
6237            CALL dbcsr_copy(m_grad_out, m_tmp_no_1)
6238            CALL dbcsr_multiply("T", "N", 1.0_dp, &
6239                                m_t0, &
6240                                m_grad_out, &
6241                                0.0_dp, m_tmp_oo_1, &
6242                                filter_eps=eps_filter)
6243            CALL dbcsr_multiply("N", "N", -1.0_dp, &
6244                                m_STsiginv0, &
6245                                m_tmp_oo_1, &
6246                                1.0_dp, m_grad_out, &
6247                                filter_eps=eps_filter)
6248         ELSE IF (special_case .EQ. xalmo_case_block_diag) THEN
6249            CPABORT("Cannot project the zero-order space from itself")
6250         ELSE
6251            ! no special case: normal xALMOs
6252            CALL apply_domain_operators( &
6253               matrix_in=m_tmp_no_1, &
6254               matrix_out=m_grad_out, &
6255               operator2=domain_r_down(:), &
6256               operator1=domain_s_inv(:), &
6257               dpattern=m_quench_t, &
6258               map=domain_map, &
6259               node_of_domain=cpu_of_domain, &
6260               my_action=1, &
6261               filter_eps=eps_filter, &
6262               !matrix_trimmer=,&
6263               use_trimmer=.FALSE.)
6264         ENDIF ! my_special_case
6265         CALL dbcsr_copy(m_tmp_no_1, m_grad_out)
6266      ENDIF
6267
6268      !! check whether the gradient lies entirely in R or Q
6269      !CALL dbcsr_multiply("T","N",1.0_dp,&
6270      !        m_t,&
6271      !        m_tmp_no_1,&
6272      !        0.0_dp,m_tmp_oo_1,&
6273      !        filter_eps=eps_filter,&
6274      !        )
6275      !CALL dbcsr_multiply("N","N",1.0_dp,&
6276      !        m_siginv,&
6277      !        m_tmp_oo_1,&
6278      !        0.0_dp,m_tmp_oo_2,&
6279      !        filter_eps=eps_filter,&
6280      !        )
6281      !CALL dbcsr_copy(m_tmp_no_2,m_tmp_no_1)
6282      !CALL dbcsr_multiply("N","N",-1.0_dp,&
6283      !        m_ST,&
6284      !        m_tmp_oo_2,&
6285      !        1.0_dp,m_tmp_no_2,&
6286      !        retain_sparsity=.TRUE.,&
6287      !        )
6288      !CALL dbcsr_norm(m_tmp_no_2, dbcsr_norm_maxabsnorm,&
6289      !        norm_scalar=penalty_occ_vol_g_norm, )
6290      !WRITE(*,"(A50,2F20.10)") "Virtual-space projection of the gradient", penalty_occ_vol_g_norm
6291      !CALL dbcsr_add(m_tmp_no_2,m_tmp_no_1,1.0_dp,-1.0_dp)
6292      !CALL dbcsr_norm(m_tmp_no_2, dbcsr_norm_maxabsnorm,&
6293      !        norm_scalar=penalty_occ_vol_g_norm, )
6294      !WRITE(*,"(A50,2F20.10)") "Occupied-space projection of the gradient", penalty_occ_vol_g_norm
6295      !CALL dbcsr_norm(m_tmp_no_1, dbcsr_norm_maxabsnorm,&
6296      !        norm_scalar=penalty_occ_vol_g_norm, )
6297      !WRITE(*,"(A50,2F20.10)") "Full gradient", penalty_occ_vol_g_norm
6298
6299      ! transform d_E/d_T to d_E/d_theta
6300      IF (optimize_theta) THEN
6301         CALL dbcsr_copy(m_tmp_no_2, m_theta)
6302         CALL dbcsr_function_of_elements(m_tmp_no_2, &
6303                                         !func=dbcsr_func_cos,&
6304                                         func=dbcsr_func_dtanh, &
6305                                         a0=0.0_dp, &
6306                                         a1=1.0_dp/envelope_amplitude)
6307         CALL dbcsr_scale(m_tmp_no_2, &
6308                          envelope_amplitude)
6309         CALL dbcsr_set(m_tmp_no_3, 0.0_dp)
6310         CALL dbcsr_filter(m_tmp_no_3, eps=eps_filter)
6311         CALL dbcsr_hadamard_product(m_tmp_no_1, &
6312                                     m_tmp_no_2, &
6313                                     m_tmp_no_3, &
6314                                     b_assume_value=1.0_dp)
6315         CALL dbcsr_hadamard_product(m_tmp_no_3, &
6316                                     m_quench_t, &
6317                                     m_grad_out)
6318      ELSE ! simply copy
6319         CALL dbcsr_hadamard_product(m_tmp_no_1, &
6320                                     m_quench_t, &
6321                                     m_grad_out)
6322      ENDIF
6323      CALL dbcsr_filter(m_grad_out, eps=eps_filter)
6324
6325      CALL dbcsr_release(m_tmp_no_1)
6326      CALL dbcsr_release(m_tmp_no_2)
6327      CALL dbcsr_release(m_tmp_no_3)
6328      CALL dbcsr_release(m_tmp_oo_1)
6329      CALL dbcsr_release(m_tmp_oo_2)
6330      CALL dbcsr_release(tempNOcc1)
6331      CALL dbcsr_release(tempOccOcc1)
6332      CALL dbcsr_release(temp1)
6333      CALL dbcsr_release(temp2)
6334
6335      CALL timestop(handle)
6336
6337   END SUBROUTINE compute_gradient
6338
6339! *****************************************************************************
6340!> \brief Serial code that prints matrices readable by Mathematica
6341!> \param matrix - matrix to print
6342!> \param filename ...
6343!> \par History
6344!>       2015.05 created [Rustam Z. Khaliullin]
6345!> \author Rustam Z. Khaliullin
6346! **************************************************************************************************
6347   SUBROUTINE print_mathematica_matrix(matrix, filename)
6348
6349      TYPE(dbcsr_type), INTENT(IN)                       :: matrix
6350      CHARACTER(len=*), INTENT(IN)                       :: filename
6351
6352      CHARACTER(len=*), PARAMETER :: routineN = 'print_mathematica_matrix', &
6353         routineP = moduleN//':'//routineN
6354
6355      CHARACTER(LEN=20)                                  :: formatstr, Scols
6356      INTEGER                                            :: col, fiunit, handle, hori_offset, jj, &
6357                                                            nblkcols_tot, nblkrows_tot, Ncols, &
6358                                                            ncores, Nrows, row, unit_nr, &
6359                                                            vert_offset
6360      INTEGER, ALLOCATABLE, DIMENSION(:)                 :: ao_block_sizes, mo_block_sizes
6361      INTEGER, DIMENSION(:), POINTER                     :: ao_blk_sizes, mo_blk_sizes
6362      LOGICAL                                            :: found
6363      REAL(KIND=dp), ALLOCATABLE, DIMENSION(:, :)        :: H
6364      REAL(KIND=dp), DIMENSION(:, :), POINTER            :: block_p
6365      TYPE(cp_logger_type), POINTER                      :: logger
6366      TYPE(dbcsr_distribution_type)                      :: dist
6367      TYPE(dbcsr_type)                                   :: matrix_asym
6368
6369      CALL timeset(routineN, handle)
6370
6371      ! get a useful output_unit
6372      logger => cp_get_default_logger()
6373      IF (logger%para_env%ionode) THEN
6374         unit_nr = cp_logger_get_default_unit_nr(logger, local=.TRUE.)
6375      ELSE
6376         unit_nr = -1
6377      ENDIF
6378
6379      ! serial code only
6380      CALL dbcsr_get_info(matrix, distribution=dist)
6381      CALL dbcsr_distribution_get(dist, numnodes=ncores)
6382      IF (ncores .GT. 1) THEN
6383         CPABORT("mathematica files: serial code only")
6384      ENDIF
6385
6386      nblkrows_tot = dbcsr_nblkrows_total(matrix)
6387      nblkcols_tot = dbcsr_nblkcols_total(matrix)
6388      CPASSERT(nblkrows_tot == nblkcols_tot)
6389      CALL dbcsr_get_info(matrix, row_blk_size=ao_blk_sizes)
6390      CALL dbcsr_get_info(matrix, col_blk_size=mo_blk_sizes)
6391      ALLOCATE (mo_block_sizes(nblkcols_tot), ao_block_sizes(nblkcols_tot))
6392      mo_block_sizes(:) = mo_blk_sizes(:)
6393      ao_block_sizes(:) = ao_blk_sizes(:)
6394
6395      CALL dbcsr_create(matrix_asym, &
6396                        template=matrix, &
6397                        matrix_type=dbcsr_type_no_symmetry)
6398      CALL dbcsr_desymmetrize(matrix, matrix_asym)
6399
6400      Ncols = SUM(mo_block_sizes)
6401      Nrows = SUM(ao_block_sizes)
6402      ALLOCATE (H(Nrows, Ncols))
6403      H(:, :) = 0.0_dp
6404
6405      hori_offset = 0
6406      DO col = 1, nblkcols_tot
6407
6408         vert_offset = 0
6409         DO row = 1, nblkrows_tot
6410
6411            CALL dbcsr_get_block_p(matrix_asym, row, col, block_p, found)
6412            IF (found) THEN
6413
6414               H(vert_offset + 1:vert_offset + ao_block_sizes(row), &
6415                 hori_offset + 1:hori_offset + mo_block_sizes(col)) &
6416                  = block_p(:, :)
6417
6418            ENDIF
6419
6420            vert_offset = vert_offset + ao_block_sizes(row)
6421
6422         ENDDO
6423
6424         hori_offset = hori_offset + mo_block_sizes(col)
6425
6426      ENDDO ! loop over electron blocks
6427
6428      CALL dbcsr_release(matrix_asym)
6429
6430      IF (unit_nr > 0) THEN
6431         CALL open_file(filename, unit_number=fiunit, file_status='REPLACE')
6432         WRITE (Scols, "(I10)") Ncols
6433         formatstr = "("//TRIM(Scols)//"E27.17)"
6434         DO jj = 1, Nrows
6435            WRITE (fiunit, formatstr) H(jj, :)
6436         ENDDO
6437         CALL close_file(fiunit)
6438      ENDIF
6439
6440      DEALLOCATE (mo_block_sizes)
6441      DEALLOCATE (ao_block_sizes)
6442      DEALLOCATE (H)
6443
6444      CALL timestop(handle)
6445
6446   END SUBROUTINE print_mathematica_matrix
6447
6448! *****************************************************************************
6449!> \brief Compute the objective functional of NLMOs
6450!> \param localization_obj_function_ispin ...
6451!> \param penalty_func_ispin ...
6452!> \param penalty_vol_prefactor ...
6453!> \param overlap_determinant ...
6454!> \param m_sigma ...
6455!> \param nocc ...
6456!> \param m_B0 ...
6457!> \param m_theta_normalized ...
6458!> \param template_matrix_mo ...
6459!> \param weights ...
6460!> \param m_S0 ...
6461!> \param just_started ...
6462!> \param penalty_amplitude ...
6463!> \param eps_filter ...
6464!> \par History
6465!>       2020.01 created [Ziling Luo]
6466!> \author Ziling Luo
6467! **************************************************************************************************
6468   SUBROUTINE compute_obj_nlmos(localization_obj_function_ispin, penalty_func_ispin, &
6469                                penalty_vol_prefactor, overlap_determinant, m_sigma, nocc, m_B0, &
6470                                m_theta_normalized, template_matrix_mo, weights, m_S0, just_started, &
6471                                penalty_amplitude, eps_filter)
6472
6473      REAL(KIND=dp), INTENT(INOUT) :: localization_obj_function_ispin, penalty_func_ispin, &
6474         penalty_vol_prefactor, overlap_determinant
6475      TYPE(dbcsr_type), INTENT(INOUT)                    :: m_sigma
6476      INTEGER, INTENT(IN)                                :: nocc
6477      TYPE(dbcsr_type), DIMENSION(:, :), INTENT(IN)      :: m_B0
6478      TYPE(dbcsr_type), INTENT(IN)                       :: m_theta_normalized, template_matrix_mo
6479      REAL(KIND=dp), DIMENSION(:), INTENT(IN)            :: weights
6480      TYPE(dbcsr_type), INTENT(IN)                       :: m_S0
6481      LOGICAL, INTENT(IN)                                :: just_started
6482      REAL(KIND=dp), INTENT(IN)                          :: penalty_amplitude, eps_filter
6483
6484      CHARACTER(len=*), PARAMETER :: routineN = 'compute_obj_nlmos', &
6485         routineP = moduleN//':'//routineN
6486
6487      INTEGER                                            :: handle, idim0, ielem, para_group, reim
6488      REAL(KIND=dp)                                      :: det1, fval
6489      REAL(KIND=dp), ALLOCATABLE, DIMENSION(:)           :: reim_diag, z2
6490      TYPE(dbcsr_type)                                   :: tempNOcc1, tempOccOcc1, tempOccOcc2
6491
6492      CALL timeset(routineN, handle)
6493
6494      CALL dbcsr_create(tempNOcc1, &
6495                        template=template_matrix_mo, &
6496                        matrix_type=dbcsr_type_no_symmetry)
6497      CALL dbcsr_create(tempOccOcc1, &
6498                        template=m_theta_normalized, &
6499                        matrix_type=dbcsr_type_no_symmetry)
6500      CALL dbcsr_create(tempOccOcc2, &
6501                        template=m_theta_normalized, &
6502                        matrix_type=dbcsr_type_no_symmetry)
6503
6504      localization_obj_function_ispin = 0.0_dp
6505      penalty_func_ispin = 0.0_dp
6506      ALLOCATE (z2(nocc))
6507      ALLOCATE (reim_diag(nocc))
6508
6509      CALL dbcsr_get_info(tempOccOcc2, group=para_group)
6510
6511      DO idim0 = 1, SIZE(m_B0, 2) ! this loop is over miller ind
6512
6513         z2(:) = 0.0_dp
6514
6515         DO reim = 1, SIZE(m_B0, 1) ! this loop is over Re/Im
6516
6517            CALL dbcsr_multiply("N", "N", 1.0_dp, &
6518                                m_B0(reim, idim0), &
6519                                m_theta_normalized, &
6520                                0.0_dp, tempOccOcc1, &
6521                                filter_eps=eps_filter)
6522            CALL dbcsr_set(tempOccOcc2, 0.0_dp)
6523            CALL dbcsr_add_on_diag(tempOccOcc2, 1.0_dp)
6524            CALL dbcsr_multiply("T", "N", 1.0_dp, &
6525                                m_theta_normalized, &
6526                                tempOccOcc1, &
6527                                0.0_dp, tempOccOcc2, &
6528                                retain_sparsity=.TRUE.)
6529
6530            reim_diag = 0.0_dp
6531            CALL dbcsr_get_diag(tempOccOcc2, reim_diag)
6532            CALL mp_sum(reim_diag, para_group)
6533            z2(:) = z2(:) + reim_diag(:)*reim_diag(:)
6534
6535         ENDDO
6536
6537         DO ielem = 1, nocc
6538            SELECT CASE (2) ! allows for selection of different spread functionals
6539            CASE (1) ! functional =  -W_I * log( |z_I|^2 )
6540               fval = -weights(idim0)*LOG(ABS(z2(ielem)))
6541            CASE (2) ! functional =  W_I * ( 1 - |z_I|^2 )
6542               fval = weights(idim0) - weights(idim0)*ABS(z2(ielem))
6543            CASE (3) ! functional =  W_I * ( 1 - |z_I| )
6544               fval = weights(idim0) - weights(idim0)*SQRT(ABS(z2(ielem)))
6545            END SELECT
6546            localization_obj_function_ispin = localization_obj_function_ispin + fval
6547         ENDDO
6548
6549      ENDDO ! end loop over idim0
6550
6551      DEALLOCATE (z2)
6552      DEALLOCATE (reim_diag)
6553
6554      CALL dbcsr_multiply("N", "N", 1.0_dp, &
6555                          m_S0, &
6556                          m_theta_normalized, &
6557                          0.0_dp, tempOccOcc1, &
6558                          filter_eps=eps_filter)
6559      ! compute current sigma
6560      CALL dbcsr_multiply("T", "N", 1.0_dp, &
6561                          m_theta_normalized, &
6562                          tempOccOcc1, &
6563                          0.0_dp, m_sigma, &
6564                          filter_eps=eps_filter)
6565
6566      CALL determinant(m_sigma, det1, &
6567                       eps_filter)
6568      ! save the current determinant
6569      overlap_determinant = det1
6570
6571      IF (just_started .AND. penalty_amplitude .LT. 0.0_dp) THEN
6572         penalty_vol_prefactor = -(-penalty_amplitude)*localization_obj_function_ispin
6573      ENDIF
6574      penalty_func_ispin = penalty_func_ispin + penalty_vol_prefactor*LOG(det1)
6575
6576      CALL dbcsr_release(tempNOcc1)
6577      CALL dbcsr_release(tempOccOcc1)
6578      CALL dbcsr_release(tempOccOcc2)
6579
6580      CALL timestop(handle)
6581
6582   END SUBROUTINE compute_obj_nlmos
6583
6584! *****************************************************************************
6585!> \brief Compute the gradient wrt the main variable
6586!> \param m_grad_out ...
6587!> \param m_B0 ...
6588!> \param weights ...
6589!> \param m_S0 ...
6590!> \param m_theta_normalized ...
6591!> \param m_siginv ...
6592!> \param m_sig_sqrti_ii ...
6593!> \param penalty_vol_prefactor ...
6594!> \param eps_filter ...
6595!> \param suggested_vol_penalty ...
6596!> \par History
6597!>       2018.10 created [Ziling Luo]
6598!> \author Ziling Luo
6599! **************************************************************************************************
6600   SUBROUTINE compute_gradient_nlmos(m_grad_out, m_B0, weights, &
6601                                     m_S0, m_theta_normalized, m_siginv, m_sig_sqrti_ii, &
6602                                     penalty_vol_prefactor, eps_filter, suggested_vol_penalty)
6603
6604      TYPE(dbcsr_type), INTENT(INOUT)                    :: m_grad_out
6605      TYPE(dbcsr_type), DIMENSION(:, :), INTENT(IN)      :: m_B0
6606      REAL(KIND=dp), DIMENSION(:), INTENT(IN)            :: weights
6607      TYPE(dbcsr_type), INTENT(IN)                       :: m_S0, m_theta_normalized, m_siginv, &
6608                                                            m_sig_sqrti_ii
6609      REAL(KIND=dp), INTENT(IN)                          :: penalty_vol_prefactor, eps_filter
6610      REAL(KIND=dp), INTENT(INOUT)                       :: suggested_vol_penalty
6611
6612      CHARACTER(len=*), PARAMETER :: routineN = 'compute_gradient_nlmos', &
6613         routineP = moduleN//':'//routineN
6614
6615      INTEGER                                            :: dim0, handle, idim0, reim
6616      REAL(KIND=dp)                                      :: norm_loc, norm_vol
6617      REAL(KIND=dp), ALLOCATABLE, DIMENSION(:)           :: tg_diagonal, z2
6618      TYPE(dbcsr_type)                                   :: m_temp_oo_1, m_temp_oo_2, m_temp_oo_3, &
6619                                                            m_temp_oo_4
6620
6621      CALL timeset(routineN, handle)
6622
6623      CALL dbcsr_create(m_temp_oo_1, &
6624                        template=m_theta_normalized, &
6625                        matrix_type=dbcsr_type_no_symmetry)
6626      CALL dbcsr_create(m_temp_oo_2, &
6627                        template=m_theta_normalized, &
6628                        matrix_type=dbcsr_type_no_symmetry)
6629      CALL dbcsr_create(m_temp_oo_3, &
6630                        template=m_theta_normalized, &
6631                        matrix_type=dbcsr_type_no_symmetry)
6632      CALL dbcsr_create(m_temp_oo_4, &
6633                        template=m_theta_normalized, &
6634                        matrix_type=dbcsr_type_no_symmetry)
6635
6636      CALL dbcsr_get_info(m_siginv, nfullrows_total=dim0)
6637      ALLOCATE (tg_diagonal(dim0))
6638      ALLOCATE (z2(dim0))
6639      CALL dbcsr_set(m_temp_oo_1, 0.0_dp) ! accumulate the gradient wrt a_norm here
6640
6641      ! do d_Omega/d_a_normalized first
6642      DO idim0 = 1, SIZE(m_B0, 2) ! this loop is over miller ind
6643
6644         z2(:) = 0.0_dp
6645         CALL dbcsr_set(m_temp_oo_2, 0.0_dp) ! accumulate index gradient here
6646         DO reim = 1, SIZE(m_B0, 1) ! this loop is over Re/Im
6647
6648            CALL dbcsr_multiply("N", "N", 1.0_dp, &
6649                                m_B0(reim, idim0), &
6650                                m_theta_normalized, &
6651                                0.0_dp, m_temp_oo_3, &
6652                                filter_eps=eps_filter)
6653
6654            ! result contain Re/Im part of Z for the current Miller index
6655            ! warning - save time by computing only the diagonal elements
6656            CALL dbcsr_multiply("T", "N", 1.0_dp, &
6657                                m_theta_normalized, &
6658                                m_temp_oo_3, &
6659                                0.0_dp, m_temp_oo_4, &
6660                                filter_eps=eps_filter)
6661
6662            tg_diagonal(:) = 0.0_dp
6663            CALL dbcsr_get_diag(m_temp_oo_4, tg_diagonal)
6664            CALL dbcsr_set(m_temp_oo_4, 0.0_dp)
6665            CALL dbcsr_set_diag(m_temp_oo_4, tg_diagonal)
6666            !CALL mp_sum(tg_diagonal, para_group)
6667            z2(:) = z2(:) + tg_diagonal(:)*tg_diagonal(:)
6668
6669            CALL dbcsr_multiply("N", "N", 1.0_dp, &
6670                                m_temp_oo_3, &
6671                                m_temp_oo_4, &
6672                                1.0_dp, m_temp_oo_2, &
6673                                filter_eps=eps_filter)
6674
6675         ENDDO
6676
6677         ! TODO: because some elements are zeros on some MPI tasks the
6678         ! gradient evaluation will fail for CASE 1 and 3
6679         SELECT CASE (2) ! allows for selection of different spread functionals
6680         CASE (1) ! functional =  -W_I * log( |z_I|^2 )
6681            z2(:) = -weights(idim0)/z2(:)
6682         CASE (2) ! functional =  W_I * ( 1 - |z_I|^2 )
6683            z2(:) = -weights(idim0)
6684         CASE (3) ! functional =  W_I * ( 1 - |z_I| )
6685            z2(:) = -weights(idim0)/(2*SQRT(z2(:)))
6686         END SELECT
6687         CALL dbcsr_set(m_temp_oo_3, 0.0_dp)
6688         CALL dbcsr_set_diag(m_temp_oo_3, z2)
6689         ! TODO: print this matrix to make sure its block structure is fine
6690         ! and there are no unecessary elements
6691
6692         CALL dbcsr_multiply("N", "N", 4.0_dp, &
6693                             m_temp_oo_2, &
6694                             m_temp_oo_3, &
6695                             1.0_dp, m_temp_oo_1, &
6696                             filter_eps=eps_filter)
6697
6698      ENDDO ! end loop over idim0
6699      DEALLOCATE (z2)
6700
6701      ! sigma0.a_norm is necessary for the volume penalty and normalization
6702      CALL dbcsr_multiply("N", "N", &
6703                          1.0_dp, &
6704                          m_S0, &
6705                          m_theta_normalized, &
6706                          0.0_dp, m_temp_oo_2, &
6707                          filter_eps=eps_filter)
6708
6709      ! add gradient of the penalty functional log[det(sigma)]
6710      ! G = 2*prefactor*sigma0.a_norm.sigma_inv
6711      CALL dbcsr_multiply("N", "N", &
6712                          1.0_dp, &
6713                          m_temp_oo_2, &
6714                          m_siginv, &
6715                          0.0_dp, m_temp_oo_3, &
6716                          filter_eps=eps_filter)
6717      CALL dbcsr_norm(m_temp_oo_3, &
6718                      dbcsr_norm_maxabsnorm, norm_scalar=norm_vol)
6719      CALL dbcsr_norm(m_temp_oo_1, &
6720                      dbcsr_norm_maxabsnorm, norm_scalar=norm_loc)
6721      suggested_vol_penalty = norm_loc/norm_vol
6722      CALL dbcsr_add(m_temp_oo_1, m_temp_oo_3, &
6723                     1.0_dp, 2.0_dp*penalty_vol_prefactor)
6724
6725      ! take into account the factor from the normalization constraint
6726      ! G = ( G - sigma0.a_norm.[tr(a_norm).G]_ii ) . [sig_sqrti]_ii
6727      ! 1. get G.[sig_sqrti]_ii
6728      CALL dbcsr_multiply("N", "N", 1.0_dp, &
6729                          m_temp_oo_1, &
6730                          m_sig_sqrti_ii, &
6731                          0.0_dp, m_grad_out, &
6732                          filter_eps=eps_filter)
6733
6734      ! 2. get [tr(a_norm).G]_ii
6735      ! it is possible to save time by computing only the diagonal elements
6736      CALL dbcsr_multiply("T", "N", 1.0_dp, &
6737                          m_theta_normalized, &
6738                          m_temp_oo_1, &
6739                          0.0_dp, m_temp_oo_3, &
6740                          filter_eps=eps_filter)
6741      CALL dbcsr_get_diag(m_temp_oo_3, tg_diagonal)
6742      CALL dbcsr_set(m_temp_oo_3, 0.0_dp)
6743      CALL dbcsr_set_diag(m_temp_oo_3, tg_diagonal)
6744
6745      ! 3. [X]_ii . [sig_sqrti]_ii
6746      ! it is possible to save time by computing only the diagonal elements
6747      CALL dbcsr_multiply("N", "N", 1.0_dp, &
6748                          m_sig_sqrti_ii, &
6749                          m_temp_oo_3, &
6750                          0.0_dp, m_temp_oo_1, &
6751                          filter_eps=eps_filter)
6752      ! 4. (sigma0*a_norm) .[X]_ii
6753      CALL dbcsr_multiply("N", "N", -1.0_dp, &
6754                          m_temp_oo_2, &
6755                          m_temp_oo_1, &
6756                          1.0_dp, m_grad_out, &
6757                          filter_eps=eps_filter)
6758
6759      DEALLOCATE (tg_diagonal)
6760      CALL dbcsr_release(m_temp_oo_1)
6761      CALL dbcsr_release(m_temp_oo_2)
6762      CALL dbcsr_release(m_temp_oo_3)
6763      CALL dbcsr_release(m_temp_oo_4)
6764
6765      CALL timestop(handle)
6766
6767   END SUBROUTINE compute_gradient_nlmos
6768
6769! *****************************************************************************
6770!> \brief Compute MO coeffs from the main optimized variable (e.g. Theta, X)
6771!> \param m_var_in ...
6772!> \param m_t_out ...
6773!> \param m_quench_t ...
6774!> \param m_t0 ...
6775!> \param m_oo_template ...
6776!> \param m_STsiginv0 ...
6777!> \param m_s ...
6778!> \param m_sig_sqrti_ii_out ...
6779!> \param domain_r_down ...
6780!> \param domain_s_inv ...
6781!> \param domain_map ...
6782!> \param cpu_of_domain ...
6783!> \param assume_t0_q0x ...
6784!> \param just_started ...
6785!> \param optimize_theta ...
6786!> \param normalize_orbitals ...
6787!> \param envelope_amplitude ...
6788!> \param eps_filter ...
6789!> \param special_case ...
6790!> \param nocc_of_domain ...
6791!> \param order_lanczos ...
6792!> \param eps_lanczos ...
6793!> \param max_iter_lanczos ...
6794!> \par History
6795!>       2015.03 created [Rustam Z Khaliullin]
6796!> \author Rustam Z Khaliullin
6797! **************************************************************************************************
6798   SUBROUTINE compute_xalmos_from_main_var(m_var_in, m_t_out, m_quench_t, &
6799                                           m_t0, m_oo_template, m_STsiginv0, m_s, m_sig_sqrti_ii_out, domain_r_down, &
6800                                           domain_s_inv, domain_map, cpu_of_domain, assume_t0_q0x, just_started, &
6801                                           optimize_theta, normalize_orbitals, envelope_amplitude, eps_filter, &
6802                                           special_case, nocc_of_domain, order_lanczos, eps_lanczos, max_iter_lanczos)
6803
6804      TYPE(dbcsr_type), INTENT(IN)                       :: m_var_in
6805      TYPE(dbcsr_type), INTENT(INOUT)                    :: m_t_out
6806      TYPE(dbcsr_type), INTENT(IN)                       :: m_quench_t, m_t0, m_oo_template, &
6807                                                            m_STsiginv0, m_s
6808      TYPE(dbcsr_type), INTENT(INOUT)                    :: m_sig_sqrti_ii_out
6809      TYPE(domain_submatrix_type), DIMENSION(:), &
6810         INTENT(IN)                                      :: domain_r_down, domain_s_inv
6811      TYPE(domain_map_type), INTENT(IN)                  :: domain_map
6812      INTEGER, DIMENSION(:), INTENT(IN)                  :: cpu_of_domain
6813      LOGICAL, INTENT(IN)                                :: assume_t0_q0x, just_started, &
6814                                                            optimize_theta, normalize_orbitals
6815      REAL(KIND=dp), INTENT(IN)                          :: envelope_amplitude, eps_filter
6816      INTEGER, INTENT(IN)                                :: special_case
6817      INTEGER, DIMENSION(:), INTENT(IN)                  :: nocc_of_domain
6818      INTEGER, INTENT(IN)                                :: order_lanczos
6819      REAL(KIND=dp), INTENT(IN)                          :: eps_lanczos
6820      INTEGER, INTENT(IN)                                :: max_iter_lanczos
6821
6822      CHARACTER(len=*), PARAMETER :: routineN = 'compute_xalmos_from_main_var', &
6823         routineP = moduleN//':'//routineN
6824
6825      INTEGER                                            :: handle, unit_nr
6826      REAL(KIND=dp)                                      :: t_norm
6827      TYPE(cp_logger_type), POINTER                      :: logger
6828      TYPE(dbcsr_type)                                   :: m_tmp_no_1, m_tmp_oo_1
6829
6830      CALL timeset(routineN, handle)
6831
6832      ! get a useful output_unit
6833      logger => cp_get_default_logger()
6834      IF (logger%para_env%ionode) THEN
6835         unit_nr = cp_logger_get_default_unit_nr(logger, local=.TRUE.)
6836      ELSE
6837         unit_nr = -1
6838      ENDIF
6839
6840      CALL dbcsr_create(m_tmp_no_1, &
6841                        template=m_quench_t, &
6842                        matrix_type=dbcsr_type_no_symmetry)
6843      CALL dbcsr_create(m_tmp_oo_1, &
6844                        template=m_oo_template, &
6845                        matrix_type=dbcsr_type_no_symmetry)
6846
6847      CALL dbcsr_copy(m_tmp_no_1, m_var_in)
6848      IF (optimize_theta) THEN
6849         ! check that all MO coefficients of the guess are less
6850         ! than the maximum allowed amplitude
6851         CALL dbcsr_norm(m_tmp_no_1, &
6852                         dbcsr_norm_maxabsnorm, norm_scalar=t_norm)
6853         IF (unit_nr > 0) THEN
6854            WRITE (unit_nr, *) "Maximum norm of the initial guess: ", t_norm
6855            WRITE (unit_nr, *) "Maximum allowed amplitude: ", &
6856               envelope_amplitude
6857         ENDIF
6858         IF (t_norm .GT. envelope_amplitude .AND. just_started) THEN
6859            CPABORT("Max norm of the initial guess is too large")
6860         ENDIF
6861         ! use artanh to tame MOs
6862         CALL dbcsr_function_of_elements(m_tmp_no_1, &
6863                                         func=dbcsr_func_tanh, &
6864                                         a0=0.0_dp, &
6865                                         a1=1.0_dp/envelope_amplitude)
6866         CALL dbcsr_scale(m_tmp_no_1, &
6867                          envelope_amplitude)
6868      ENDIF
6869      CALL dbcsr_hadamard_product(m_tmp_no_1, m_quench_t, &
6870                                  m_t_out)
6871
6872      ! project out R_0
6873      IF (assume_t0_q0x) THEN
6874         IF (special_case .EQ. xalmo_case_fully_deloc) THEN
6875            CALL dbcsr_multiply("T", "N", 1.0_dp, &
6876                                m_STsiginv0, &
6877                                m_t_out, &
6878                                0.0_dp, m_tmp_oo_1, &
6879                                filter_eps=eps_filter)
6880            CALL dbcsr_multiply("N", "N", -1.0_dp, &
6881                                m_t0, &
6882                                m_tmp_oo_1, &
6883                                1.0_dp, m_t_out, &
6884                                filter_eps=eps_filter)
6885         ELSE IF (special_case .EQ. xalmo_case_block_diag) THEN
6886            CPABORT("cannot use projector with block-daigonal ALMOs")
6887         ELSE
6888            ! no special case
6889            CALL apply_domain_operators( &
6890               matrix_in=m_t_out, &
6891               matrix_out=m_tmp_no_1, &
6892               operator1=domain_r_down, &
6893               operator2=domain_s_inv, &
6894               dpattern=m_quench_t, &
6895               map=domain_map, &
6896               node_of_domain=cpu_of_domain, &
6897               my_action=1, &
6898               filter_eps=eps_filter, &
6899               use_trimmer=.FALSE.)
6900            CALL dbcsr_copy(m_t_out, &
6901                            m_tmp_no_1)
6902         ENDIF ! special case
6903         CALL dbcsr_add(m_t_out, &
6904                        m_t0, 1.0_dp, 1.0_dp)
6905      ENDIF
6906
6907      IF (normalize_orbitals) THEN
6908         CALL orthogonalize_mos( &
6909            ket=m_t_out, &
6910            overlap=m_tmp_oo_1, &
6911            metric=m_s, &
6912            retain_locality=.TRUE., &
6913            only_normalize=.TRUE., &
6914            nocc_of_domain=nocc_of_domain(:), &
6915            eps_filter=eps_filter, &
6916            order_lanczos=order_lanczos, &
6917            eps_lanczos=eps_lanczos, &
6918            max_iter_lanczos=max_iter_lanczos, &
6919            overlap_sqrti=m_sig_sqrti_ii_out)
6920      ENDIF
6921
6922      CALL dbcsr_filter(m_t_out, eps=eps_filter)
6923
6924      CALL dbcsr_release(m_tmp_no_1)
6925      CALL dbcsr_release(m_tmp_oo_1)
6926
6927      CALL timestop(handle)
6928
6929   END SUBROUTINE compute_xalmos_from_main_var
6930
6931! *****************************************************************************
6932!> \brief Compute the preconditioner matrices and invert them if necessary
6933!> \param domain_prec_out ...
6934!> \param m_prec_out ...
6935!> \param m_ks ...
6936!> \param m_s ...
6937!> \param m_siginv ...
6938!> \param m_quench_t ...
6939!> \param m_FTsiginv ...
6940!> \param m_siginvTFTsiginv ...
6941!> \param m_ST ...
6942!> \param m_STsiginv_out ...
6943!> \param m_s_vv_out ...
6944!> \param m_f_vv_out ...
6945!> \param para_env ...
6946!> \param blacs_env ...
6947!> \param nocc_of_domain ...
6948!> \param domain_s_inv ...
6949!> \param domain_s_inv_half ...
6950!> \param domain_s_half ...
6951!> \param domain_r_down ...
6952!> \param cpu_of_domain ...
6953!> \param domain_map ...
6954!> \param assume_t0_q0x ...
6955!> \param penalty_occ_vol ...
6956!> \param penalty_occ_vol_prefactor ...
6957!> \param eps_filter ...
6958!> \param neg_thr ...
6959!> \param spin_factor ...
6960!> \param special_case ...
6961!> \param bad_modes_projector_down_out ...
6962!> \param skip_inversion ...
6963!> \par History
6964!>       2015.03 created [Rustam Z Khaliullin]
6965!> \author Rustam Z Khaliullin
6966! **************************************************************************************************
6967   SUBROUTINE compute_preconditioner(domain_prec_out, m_prec_out, m_ks, m_s, &
6968                                     m_siginv, m_quench_t, m_FTsiginv, m_siginvTFTsiginv, m_ST, &
6969                                     m_STsiginv_out, m_s_vv_out, m_f_vv_out, para_env, &
6970                                     blacs_env, nocc_of_domain, domain_s_inv, domain_s_inv_half, domain_s_half, &
6971                                     domain_r_down, cpu_of_domain, &
6972                                     domain_map, assume_t0_q0x, penalty_occ_vol, penalty_occ_vol_prefactor, &
6973                                     eps_filter, neg_thr, spin_factor, special_case, bad_modes_projector_down_out, &
6974                                     skip_inversion)
6975
6976      TYPE(domain_submatrix_type), DIMENSION(:), &
6977         INTENT(INOUT)                                   :: domain_prec_out
6978      TYPE(dbcsr_type), INTENT(INOUT)                    :: m_prec_out, m_ks, m_s
6979      TYPE(dbcsr_type), INTENT(IN)                       :: m_siginv, m_quench_t, m_FTsiginv, &
6980                                                            m_siginvTFTsiginv, m_ST
6981      TYPE(dbcsr_type), INTENT(INOUT), OPTIONAL          :: m_STsiginv_out, m_s_vv_out, m_f_vv_out
6982      TYPE(cp_para_env_type), POINTER                    :: para_env
6983      TYPE(cp_blacs_env_type), POINTER                   :: blacs_env
6984      INTEGER, DIMENSION(:), INTENT(IN)                  :: nocc_of_domain
6985      TYPE(domain_submatrix_type), DIMENSION(:), &
6986         INTENT(IN)                                      :: domain_s_inv
6987      TYPE(domain_submatrix_type), DIMENSION(:), &
6988         INTENT(IN), OPTIONAL                            :: domain_s_inv_half, domain_s_half
6989      TYPE(domain_submatrix_type), DIMENSION(:), &
6990         INTENT(IN)                                      :: domain_r_down
6991      INTEGER, DIMENSION(:), INTENT(IN)                  :: cpu_of_domain
6992      TYPE(domain_map_type), INTENT(IN)                  :: domain_map
6993      LOGICAL, INTENT(IN)                                :: assume_t0_q0x, penalty_occ_vol
6994      REAL(KIND=dp), INTENT(IN)                          :: penalty_occ_vol_prefactor, eps_filter, &
6995                                                            neg_thr, spin_factor
6996      INTEGER, INTENT(IN)                                :: special_case
6997      TYPE(domain_submatrix_type), DIMENSION(:), &
6998         INTENT(INOUT), OPTIONAL                         :: bad_modes_projector_down_out
6999      LOGICAL, INTENT(IN)                                :: skip_inversion
7000
7001      CHARACTER(len=*), PARAMETER :: routineN = 'compute_preconditioner', &
7002         routineP = moduleN//':'//routineN
7003
7004      INTEGER                                            :: handle, precond_domain_projector
7005      TYPE(dbcsr_type)                                   :: m_tmp_nn_1, m_tmp_no_3
7006
7007      CALL timeset(routineN, handle)
7008
7009      CALL dbcsr_create(m_tmp_nn_1, &
7010                        template=m_s, &
7011                        matrix_type=dbcsr_type_no_symmetry)
7012      CALL dbcsr_create(m_tmp_no_3, &
7013                        template=m_quench_t, &
7014                        matrix_type=dbcsr_type_no_symmetry)
7015
7016      ! calculate (1-R)F(1-R) and S-SRS
7017      ! RZK-warning take advantage: some elements will be removed by the quencher
7018      ! RZK-warning S operations can be performed outside the spin loop to save time
7019      ! IT IS REQUIRED THAT PRECONDITIONER DOES NOT BREAK THE LOCALITY!!!!
7020      ! RZK-warning: further optimization is ABSOLUTELY NECESSARY
7021
7022      ! First S-SRS
7023      CALL dbcsr_multiply("N", "N", 1.0_dp, &
7024                          m_ST, &
7025                          m_siginv, &
7026                          0.0_dp, m_tmp_no_3, &
7027                          filter_eps=eps_filter)
7028      CALL dbcsr_desymmetrize(m_s, m_tmp_nn_1)
7029      ! return STsiginv if necessary
7030      IF (PRESENT(m_STsiginv_out)) THEN
7031         CALL dbcsr_copy(m_STsiginv_out, m_tmp_no_3)
7032      ENDIF
7033      IF (special_case .EQ. xalmo_case_fully_deloc) THEN
7034         ! use S instead of S-SRS
7035      ELSE
7036         CALL dbcsr_multiply("N", "T", -1.0_dp, &
7037                             m_ST, &
7038                             m_tmp_no_3, &
7039                             1.0_dp, m_tmp_nn_1, &
7040                             filter_eps=eps_filter)
7041      ENDIF
7042      ! return S_vv = (S or S-SRS) if necessary
7043      IF (PRESENT(m_s_vv_out)) THEN
7044         CALL dbcsr_copy(m_s_vv_out, m_tmp_nn_1)
7045      ENDIF
7046
7047      ! Second (1-R)F(1-R)
7048      ! re-create matrix because desymmetrize is buggy -
7049      ! it will create multiple copies of blocks
7050      CALL dbcsr_desymmetrize(m_ks, m_prec_out)
7051      CALL dbcsr_multiply("N", "T", -1.0_dp, &
7052                          m_FTsiginv, &
7053                          m_ST, &
7054                          1.0_dp, m_prec_out, &
7055                          filter_eps=eps_filter)
7056      CALL dbcsr_multiply("N", "T", -1.0_dp, &
7057                          m_ST, &
7058                          m_FTsiginv, &
7059                          1.0_dp, m_prec_out, &
7060                          filter_eps=eps_filter)
7061      CALL dbcsr_multiply("N", "N", 1.0_dp, &
7062                          m_ST, &
7063                          m_siginvTFTsiginv, &
7064                          0.0_dp, m_tmp_no_3, &
7065                          filter_eps=eps_filter)
7066      CALL dbcsr_multiply("N", "T", 1.0_dp, &
7067                          m_tmp_no_3, &
7068                          m_ST, &
7069                          1.0_dp, m_prec_out, &
7070                          filter_eps=eps_filter)
7071      ! return F_vv = (I-SR)F(I-RS) if necessary
7072      IF (PRESENT(m_f_vv_out)) THEN
7073         CALL dbcsr_copy(m_f_vv_out, m_prec_out)
7074      ENDIF
7075
7076#if 0
7077!penalty_only=.TRUE.
7078      WRITE (unit_nr, *) "prefactor0:", penalty_occ_vol_prefactor
7079      !IF (penalty_occ_vol) THEN
7080      CALL dbcsr_desymmetrize(m_s, &
7081                              m_prec_out)
7082      !CALL dbcsr_scale(m_prec_out,-penalty_occ_vol_prefactor)
7083      !ENDIF
7084#else
7085      ! sum up the F_vv and S_vv terms
7086      CALL dbcsr_add(m_prec_out, m_tmp_nn_1, &
7087                     1.0_dp, 1.0_dp)
7088      ! Scale to obtain unit step length
7089      CALL dbcsr_scale(m_prec_out, 2.0_dp*spin_factor)
7090
7091      ! add the contribution from the penalty on the occupied volume
7092      IF (penalty_occ_vol) THEN
7093         CALL dbcsr_add(m_prec_out, m_tmp_nn_1, &
7094                        1.0_dp, penalty_occ_vol_prefactor)
7095      ENDIF
7096#endif
7097
7098      CALL dbcsr_copy(m_tmp_nn_1, m_prec_out)
7099
7100      ! invert using various algorithms
7101      IF (special_case .EQ. xalmo_case_block_diag) THEN ! non-overlapping diagonal blocks
7102
7103         IF (skip_inversion) THEN
7104            CPABORT("NYI: impose blk structure on m_prec_out first")
7105            CALL dbcsr_copy(m_prec_out, m_tmp_nn_1, keep_sparsity=.TRUE.)
7106         ELSE
7107            CALL pseudo_invert_diagonal_blk( &
7108               matrix_in=m_tmp_nn_1, &
7109               matrix_out=m_prec_out, &
7110               nocc=nocc_of_domain(:) &
7111               )
7112         ENDIF
7113
7114      ELSE IF (special_case .EQ. xalmo_case_fully_deloc) THEN ! the entire system is a block
7115
7116         IF (skip_inversion) THEN
7117            CALL dbcsr_copy(m_prec_out, m_tmp_nn_1)
7118         ELSE
7119
7120            ! invert using cholesky (works with S matrix, will not work with S-SRS matrix)
7121            CALL cp_dbcsr_cholesky_decompose(m_prec_out, &
7122                                             para_env=para_env, &
7123                                             blacs_env=blacs_env)
7124            CALL cp_dbcsr_cholesky_invert(m_prec_out, &
7125                                          para_env=para_env, &
7126                                          blacs_env=blacs_env, &
7127                                          upper_to_full=.TRUE.)
7128         ENDIF !skip_inversion
7129
7130         CALL dbcsr_filter(m_prec_out, eps=eps_filter)
7131
7132      ELSE
7133
7134         !!! use a true domain preconditioner with overlapping domains
7135         IF (assume_t0_q0x) THEN
7136            precond_domain_projector = -1
7137         ELSE
7138            precond_domain_projector = 0
7139         ENDIF
7140         !! RZK-warning: use PRESENT to make two nearly-identical calls
7141         !! this is done because intel compiler does not seem to conform
7142         !! to the FORTRAN standard for passing through optional arguments
7143         IF (PRESENT(bad_modes_projector_down_out)) THEN
7144            CALL construct_domain_preconditioner( &
7145               matrix_main=m_tmp_nn_1, &
7146               subm_s_inv=domain_s_inv(:), &
7147               subm_s_inv_half=domain_s_inv_half(:), &
7148               subm_s_half=domain_s_half(:), &
7149               subm_r_down=domain_r_down(:), &
7150               matrix_trimmer=m_quench_t, &
7151               dpattern=m_quench_t, &
7152               map=domain_map, &
7153               node_of_domain=cpu_of_domain, &
7154               preconditioner=domain_prec_out(:), &
7155               use_trimmer=.FALSE., &
7156               bad_modes_projector_down=bad_modes_projector_down_out(:), &
7157               eps_zero_eigenvalues=neg_thr, &
7158               my_action=precond_domain_projector, &
7159               skip_inversion=skip_inversion &
7160               )
7161         ELSE
7162            CALL construct_domain_preconditioner( &
7163               matrix_main=m_tmp_nn_1, &
7164               subm_s_inv=domain_s_inv(:), &
7165               subm_r_down=domain_r_down(:), &
7166               matrix_trimmer=m_quench_t, &
7167               dpattern=m_quench_t, &
7168               map=domain_map, &
7169               node_of_domain=cpu_of_domain, &
7170               preconditioner=domain_prec_out(:), &
7171               use_trimmer=.FALSE., &
7172               !eps_zero_eigenvalues=neg_thr,&
7173               my_action=precond_domain_projector, &
7174               skip_inversion=skip_inversion &
7175               )
7176         ENDIF
7177
7178      ENDIF ! special_case
7179
7180      ! invert using cholesky (works with S matrix, will not work with S-SRS matrix)
7181      !!!CALL cp_dbcsr_cholesky_decompose(prec_vv,&
7182      !!!        para_env=almo_scf_env%para_env,&
7183      !!!        blacs_env=almo_scf_env%blacs_env)
7184      !!!CALL cp_dbcsr_cholesky_invert(prec_vv,&
7185      !!!        para_env=almo_scf_env%para_env,&
7186      !!!        blacs_env=almo_scf_env%blacs_env,&
7187      !!!        upper_to_full=.TRUE.)
7188      !!!CALL dbcsr_filter(prec_vv,&
7189      !!!        eps=almo_scf_env%eps_filter)
7190      !!!
7191
7192      ! re-create the matrix because desymmetrize is buggy -
7193      ! it will create multiple copies of blocks
7194      !!!DESYM!CALL dbcsr_create(prec_vv,&
7195      !!!DESYM!        template=almo_scf_env%matrix_s(1),&
7196      !!!DESYM!        matrix_type=dbcsr_type_no_symmetry)
7197      !!!DESYM!CALL dbcsr_desymmetrize(almo_scf_env%matrix_s(1),&
7198      !!!DESYM!        prec_vv)
7199      !CALL dbcsr_multiply("N","N",1.0_dp,&
7200      !        almo_scf_env%matrix_s(1),&
7201      !        matrix_t_out(ispin),&
7202      !        0.0_dp,m_tmp_no_1,&
7203      !        filter_eps=almo_scf_env%eps_filter)
7204      !CALL dbcsr_multiply("N","N",1.0_dp,&
7205      !        m_tmp_no_1,&
7206      !        almo_scf_env%matrix_sigma_inv(ispin),&
7207      !        0.0_dp,m_tmp_no_3,&
7208      !        filter_eps=almo_scf_env%eps_filter)
7209      !CALL dbcsr_multiply("N","T",-1.0_dp,&
7210      !        m_tmp_no_3,&
7211      !        m_tmp_no_1,&
7212      !        1.0_dp,prec_vv,&
7213      !        filter_eps=almo_scf_env%eps_filter)
7214      !CALL dbcsr_add_on_diag(prec_vv,&
7215      !        prec_sf_mixing_s)
7216
7217      !CALL dbcsr_create(prec_oo,&
7218      !        template=almo_scf_env%matrix_sigma(ispin),&
7219      !        matrix_type=dbcsr_type_no_symmetry)
7220      !CALL dbcsr_desymmetrize(almo_scf_env%matrix_sigma(ispin),&
7221      !        matrix_type=dbcsr_type_no_symmetry)
7222      !CALL dbcsr_desymmetrize(almo_scf_env%matrix_sigma(ispin),&
7223      !        prec_oo)
7224      !CALL dbcsr_filter(prec_oo,&
7225      !        eps=almo_scf_env%eps_filter)
7226
7227      !! invert using cholesky
7228      !CALL dbcsr_create(prec_oo_inv,&
7229      !        template=prec_oo,&
7230      !        matrix_type=dbcsr_type_no_symmetry)
7231      !CALL dbcsr_desymmetrize(prec_oo,&
7232      !        prec_oo_inv)
7233      !CALL cp_dbcsr_cholesky_decompose(prec_oo_inv,&
7234      !        para_env=almo_scf_env%para_env,&
7235      !        blacs_env=almo_scf_env%blacs_env)
7236      !CALL cp_dbcsr_cholesky_invert(prec_oo_inv,&
7237      !        para_env=almo_scf_env%para_env,&
7238      !        blacs_env=almo_scf_env%blacs_env,&
7239      !        upper_to_full=.TRUE.)
7240
7241      CALL dbcsr_release(m_tmp_nn_1)
7242      CALL dbcsr_release(m_tmp_no_3)
7243
7244      CALL timestop(handle)
7245
7246   END SUBROUTINE compute_preconditioner
7247
7248! *****************************************************************************
7249!> \brief Compute beta for conjugate gradient algorithms
7250!> \param beta ...
7251!> \param numer ...
7252!> \param denom ...
7253!> \param reset_conjugator ...
7254!> \param conjugator ...
7255!> \param grad ...
7256!> \param prev_grad ...
7257!> \param step ...
7258!> \param prev_step ...
7259!> \param prev_minus_prec_grad ...
7260!> \par History
7261!>       2015.04 created [Rustam Z Khaliullin]
7262!> \author Rustam Z Khaliullin
7263! **************************************************************************************************
7264   SUBROUTINE compute_cg_beta(beta, numer, denom, reset_conjugator, conjugator, &
7265                              grad, prev_grad, step, prev_step, prev_minus_prec_grad)
7266
7267      REAL(KIND=dp), INTENT(INOUT)                       :: beta
7268      REAL(KIND=dp), INTENT(INOUT), OPTIONAL             :: numer, denom
7269      LOGICAL, INTENT(INOUT)                             :: reset_conjugator
7270      INTEGER, INTENT(IN)                                :: conjugator
7271      TYPE(dbcsr_type), DIMENSION(:), INTENT(INOUT)      :: grad, prev_grad, step, prev_step
7272      TYPE(dbcsr_type), DIMENSION(:), INTENT(INOUT), &
7273         OPTIONAL                                        :: prev_minus_prec_grad
7274
7275      CHARACTER(len=*), PARAMETER :: routineN = 'compute_cg_beta', &
7276         routineP = moduleN//':'//routineN
7277
7278      INTEGER                                            :: handle, i, nsize, unit_nr
7279      REAL(KIND=dp)                                      :: den, kappa, my_denom, my_numer, &
7280                                                            my_numer2, my_numer3, num, num2, num3, &
7281                                                            tau
7282      TYPE(cp_logger_type), POINTER                      :: logger
7283      TYPE(dbcsr_type)                                   :: m_tmp_no_1
7284
7285      CALL timeset(routineN, handle)
7286
7287      ! get a useful output_unit
7288      logger => cp_get_default_logger()
7289      IF (logger%para_env%ionode) THEN
7290         unit_nr = cp_logger_get_default_unit_nr(logger, local=.TRUE.)
7291      ELSE
7292         unit_nr = -1
7293      ENDIF
7294
7295      IF (.NOT. PRESENT(prev_minus_prec_grad)) THEN
7296         IF (conjugator .EQ. cg_fletcher_reeves .OR. &
7297             conjugator .EQ. cg_polak_ribiere .OR. &
7298             conjugator .EQ. cg_hager_zhang) THEN
7299            CPABORT("conjugator needs more input")
7300         ENDIF
7301      ENDIF
7302
7303      ! return num denom so beta can be calculated spin-by-spin
7304      IF (PRESENT(numer) .OR. PRESENT(denom)) THEN
7305         IF (conjugator .EQ. cg_hestenes_stiefel .OR. &
7306             conjugator .EQ. cg_dai_yuan .OR. &
7307             conjugator .EQ. cg_hager_zhang) THEN
7308            CPABORT("cannot return numer/denom")
7309         ENDIF
7310      ENDIF
7311
7312      nsize = SIZE(grad)
7313
7314      my_numer = 0.0_dp
7315      my_numer2 = 0.0_dp
7316      my_numer3 = 0.0_dp
7317      my_denom = 0.0_dp
7318
7319      DO i = 1, nsize
7320
7321         CALL dbcsr_create(m_tmp_no_1, &
7322                           template=grad(i), &
7323                           matrix_type=dbcsr_type_no_symmetry)
7324
7325         SELECT CASE (conjugator)
7326         CASE (cg_hestenes_stiefel)
7327            CALL dbcsr_copy(m_tmp_no_1, grad(i))
7328            CALL dbcsr_add(m_tmp_no_1, prev_grad(i), &
7329                           1.0_dp, -1.0_dp)
7330            CALL dbcsr_dot(m_tmp_no_1, step(i), num)
7331            CALL dbcsr_dot(m_tmp_no_1, prev_step(i), den)
7332         CASE (cg_fletcher_reeves)
7333            CALL dbcsr_dot(grad(i), step(i), num)
7334            CALL dbcsr_dot(prev_grad(i), prev_minus_prec_grad(i), den)
7335         CASE (cg_polak_ribiere)
7336            CALL dbcsr_dot(prev_grad(i), prev_minus_prec_grad(i), den)
7337            CALL dbcsr_copy(m_tmp_no_1, grad(i))
7338            CALL dbcsr_add(m_tmp_no_1, prev_grad(i), 1.0_dp, -1.0_dp)
7339            CALL dbcsr_dot(m_tmp_no_1, step(i), num)
7340         CASE (cg_fletcher)
7341            CALL dbcsr_dot(grad(i), step(i), num)
7342            CALL dbcsr_dot(prev_grad(i), prev_step(i), den)
7343         CASE (cg_liu_storey)
7344            CALL dbcsr_dot(prev_grad(i), prev_step(i), den)
7345            CALL dbcsr_copy(m_tmp_no_1, grad(i))
7346            CALL dbcsr_add(m_tmp_no_1, prev_grad(i), 1.0_dp, -1.0_dp)
7347            CALL dbcsr_dot(m_tmp_no_1, step(i), num)
7348         CASE (cg_dai_yuan)
7349            CALL dbcsr_dot(grad(i), step(i), num)
7350            CALL dbcsr_copy(m_tmp_no_1, grad(i))
7351            CALL dbcsr_add(m_tmp_no_1, prev_grad(i), 1.0_dp, -1.0_dp)
7352            CALL dbcsr_dot(m_tmp_no_1, prev_step(i), den)
7353         CASE (cg_hager_zhang)
7354            CALL dbcsr_copy(m_tmp_no_1, grad(i))
7355            CALL dbcsr_add(m_tmp_no_1, prev_grad(i), 1.0_dp, -1.0_dp)
7356            CALL dbcsr_dot(m_tmp_no_1, prev_step(i), den)
7357            CALL dbcsr_dot(m_tmp_no_1, prev_minus_prec_grad(i), num)
7358            CALL dbcsr_dot(m_tmp_no_1, step(i), num2)
7359            CALL dbcsr_dot(prev_step(i), grad(i), num3)
7360            my_numer2 = my_numer2 + num2
7361            my_numer3 = my_numer3 + num3
7362         CASE (cg_zero)
7363            num = 0.0_dp
7364            den = 1.0_dp
7365         CASE DEFAULT
7366            CPABORT("illegal conjugator")
7367         END SELECT
7368         my_numer = my_numer + num
7369         my_denom = my_denom + den
7370
7371         CALL dbcsr_release(m_tmp_no_1)
7372
7373      ENDDO ! i - nsize
7374
7375      DO i = 1, nsize
7376
7377         SELECT CASE (conjugator)
7378         CASE (cg_hestenes_stiefel, cg_dai_yuan)
7379            beta = -1.0_dp*my_numer/my_denom
7380         CASE (cg_fletcher_reeves, cg_polak_ribiere, cg_fletcher, cg_liu_storey)
7381            beta = my_numer/my_denom
7382         CASE (cg_hager_zhang)
7383            kappa = -2.0_dp*my_numer/my_denom
7384            tau = -1.0_dp*my_numer2/my_denom
7385            beta = tau - kappa*my_numer3/my_denom
7386         CASE (cg_zero)
7387            beta = 0.0_dp
7388         CASE DEFAULT
7389            CPABORT("illegal conjugator")
7390         END SELECT
7391
7392      ENDDO ! i - nsize
7393
7394      IF (beta .LT. 0.0_dp) THEN
7395         IF (unit_nr > 0) THEN
7396            WRITE (unit_nr, *) " Resetting conjugator because beta is negative: ", beta
7397         ENDIF
7398         reset_conjugator = .TRUE.
7399      ENDIF
7400
7401      IF (PRESENT(numer)) THEN
7402         numer = my_numer
7403      ENDIF
7404      IF (PRESENT(denom)) THEN
7405         denom = my_denom
7406      ENDIF
7407
7408      CALL timestop(handle)
7409
7410   END SUBROUTINE compute_cg_beta
7411
7412! *****************************************************************************
7413!> \brief computes the step matrix from the gradient and Hessian using
7414!>         the Newton-Raphson method
7415!> \param optimizer ...
7416!> \param m_grad ...
7417!> \param m_delta ...
7418!> \param m_s ...
7419!> \param m_ks ...
7420!> \param m_siginv ...
7421!> \param m_quench_t ...
7422!> \param m_FTsiginv ...
7423!> \param m_siginvTFTsiginv ...
7424!> \param m_ST ...
7425!> \param m_t ...
7426!> \param m_sig_sqrti_ii ...
7427!> \param domain_s_inv ...
7428!> \param domain_r_down ...
7429!> \param domain_map ...
7430!> \param cpu_of_domain ...
7431!> \param nocc_of_domain ...
7432!> \param para_env ...
7433!> \param blacs_env ...
7434!> \param eps_filter ...
7435!> \param optimize_theta ...
7436!> \param penalty_occ_vol ...
7437!> \param normalize_orbitals ...
7438!> \param penalty_occ_vol_prefactor ...
7439!> \param penalty_occ_vol_pf2 ...
7440!> \param special_case ...
7441!> \par History
7442!>       2015.04 created [Rustam Z. Khaliullin]
7443!> \author Rustam Z. Khaliullin
7444! **************************************************************************************************
7445   SUBROUTINE newton_grad_to_step(optimizer, m_grad, m_delta, m_s, m_ks, &
7446                                  m_siginv, m_quench_t, m_FTsiginv, m_siginvTFTsiginv, m_ST, m_t, &
7447                                  m_sig_sqrti_ii, domain_s_inv, domain_r_down, domain_map, cpu_of_domain, &
7448                                  nocc_of_domain, para_env, blacs_env, eps_filter, optimize_theta, &
7449                                  penalty_occ_vol, normalize_orbitals, penalty_occ_vol_prefactor, &
7450                                  penalty_occ_vol_pf2, special_case)
7451
7452      TYPE(optimizer_options_type), INTENT(IN)           :: optimizer
7453      TYPE(dbcsr_type), DIMENSION(:), INTENT(IN)         :: m_grad
7454      TYPE(dbcsr_type), DIMENSION(:), INTENT(INOUT)      :: m_delta, m_s, m_ks, m_siginv, m_quench_t
7455      TYPE(dbcsr_type), DIMENSION(:), INTENT(IN)         :: m_FTsiginv, m_siginvTFTsiginv, m_ST, &
7456                                                            m_t, m_sig_sqrti_ii
7457      TYPE(domain_submatrix_type), DIMENSION(:, :), &
7458         INTENT(IN)                                      :: domain_s_inv, domain_r_down
7459      TYPE(domain_map_type), DIMENSION(:), INTENT(IN)    :: domain_map
7460      INTEGER, DIMENSION(:), INTENT(IN)                  :: cpu_of_domain
7461      INTEGER, DIMENSION(:, :), INTENT(IN)               :: nocc_of_domain
7462      TYPE(cp_para_env_type), POINTER                    :: para_env
7463      TYPE(cp_blacs_env_type), POINTER                   :: blacs_env
7464      REAL(KIND=dp), INTENT(IN)                          :: eps_filter
7465      LOGICAL, INTENT(IN)                                :: optimize_theta, penalty_occ_vol, &
7466                                                            normalize_orbitals
7467      REAL(KIND=dp), DIMENSION(:), INTENT(IN)            :: penalty_occ_vol_prefactor, &
7468                                                            penalty_occ_vol_pf2
7469      INTEGER, INTENT(IN)                                :: special_case
7470
7471      CHARACTER(len=*), PARAMETER :: routineN = 'newton_grad_to_step', &
7472         routineP = moduleN//':'//routineN
7473
7474      CHARACTER(LEN=20)                                  :: iter_type
7475      INTEGER                                            :: handle, ispin, iteration, max_iter, &
7476                                                            ndomains, nspins, outer_iteration, &
7477                                                            outer_max_iter, unit_nr
7478      LOGICAL :: converged, do_exact_inversion, outer_prepare_to_exit, prepare_to_exit, &
7479         reset_conjugator, use_preconditioner
7480      REAL(KIND=dp)                                      :: alpha, beta, denom, denom_ispin, &
7481                                                            eps_error_target, numer, numer_ispin, &
7482                                                            residue_norm, spin_factor, t1, t2
7483      REAL(KIND=dp), ALLOCATABLE, DIMENSION(:)           :: residue_max_norm
7484      TYPE(cp_logger_type), POINTER                      :: logger
7485      TYPE(dbcsr_type)                                   :: m_tmp_oo_1, m_tmp_oo_2
7486      TYPE(dbcsr_type), ALLOCATABLE, DIMENSION(:)        :: m_f_vo, m_f_vv, m_Hstep, m_prec, &
7487                                                            m_residue, m_residue_prev, m_s_vv, &
7488                                                            m_step, m_STsiginv, m_zet, m_zet_prev
7489      TYPE(domain_submatrix_type), ALLOCATABLE, &
7490         DIMENSION(:, :)                                 :: domain_prec
7491
7492      CALL timeset(routineN, handle)
7493
7494      ! get a useful output_unit
7495      logger => cp_get_default_logger()
7496      IF (logger%para_env%ionode) THEN
7497         unit_nr = cp_logger_get_default_unit_nr(logger, local=.TRUE.)
7498      ELSE
7499         unit_nr = -1
7500      ENDIF
7501
7502      !!! Currently for non-theta only
7503      IF (optimize_theta) THEN
7504         CPABORT("theta is NYI")
7505      ENDIF
7506
7507      ! set optimizer options
7508      use_preconditioner = (optimizer%preconditioner .NE. xalmo_prec_zero)
7509      outer_max_iter = optimizer%max_iter_outer_loop
7510      max_iter = optimizer%max_iter
7511      eps_error_target = optimizer%eps_error
7512
7513      ! set key dimensions
7514      nspins = SIZE(m_ks)
7515      ndomains = SIZE(domain_s_inv, 1)
7516
7517      IF (nspins == 1) THEN
7518         spin_factor = 2.0_dp
7519      ELSE
7520         spin_factor = 1.0_dp
7521      ENDIF
7522
7523      ALLOCATE (domain_prec(ndomains, nspins))
7524      CALL init_submatrices(domain_prec)
7525
7526      ! allocate matrices
7527      ALLOCATE (m_residue(nspins))
7528      ALLOCATE (m_residue_prev(nspins))
7529      ALLOCATE (m_step(nspins))
7530      ALLOCATE (m_zet(nspins))
7531      ALLOCATE (m_zet_prev(nspins))
7532      ALLOCATE (m_Hstep(nspins))
7533      ALLOCATE (m_prec(nspins))
7534      ALLOCATE (m_s_vv(nspins))
7535      ALLOCATE (m_f_vv(nspins))
7536      ALLOCATE (m_f_vo(nspins))
7537      ALLOCATE (m_STsiginv(nspins))
7538
7539      ALLOCATE (residue_max_norm(nspins))
7540
7541      ! initiate objects before iterations
7542      DO ispin = 1, nspins
7543
7544         ! init matrices
7545         CALL dbcsr_create(m_residue(ispin), &
7546                           template=m_quench_t(ispin), &
7547                           matrix_type=dbcsr_type_no_symmetry)
7548         CALL dbcsr_create(m_residue_prev(ispin), &
7549                           template=m_quench_t(ispin), &
7550                           matrix_type=dbcsr_type_no_symmetry)
7551         CALL dbcsr_create(m_step(ispin), &
7552                           template=m_quench_t(ispin), &
7553                           matrix_type=dbcsr_type_no_symmetry)
7554         CALL dbcsr_create(m_zet_prev(ispin), &
7555                           template=m_quench_t(ispin), &
7556                           matrix_type=dbcsr_type_no_symmetry)
7557         CALL dbcsr_create(m_zet(ispin), &
7558                           template=m_quench_t(ispin), &
7559                           matrix_type=dbcsr_type_no_symmetry)
7560         CALL dbcsr_create(m_Hstep(ispin), &
7561                           template=m_quench_t(ispin), &
7562                           matrix_type=dbcsr_type_no_symmetry)
7563         CALL dbcsr_create(m_f_vo(ispin), &
7564                           template=m_quench_t(ispin), &
7565                           matrix_type=dbcsr_type_no_symmetry)
7566         CALL dbcsr_create(m_STsiginv(ispin), &
7567                           template=m_quench_t(ispin), &
7568                           matrix_type=dbcsr_type_no_symmetry)
7569         CALL dbcsr_create(m_f_vv(ispin), &
7570                           template=m_ks(ispin), &
7571                           matrix_type=dbcsr_type_no_symmetry)
7572         CALL dbcsr_create(m_s_vv(ispin), &
7573                           template=m_s(1), &
7574                           matrix_type=dbcsr_type_no_symmetry)
7575         CALL dbcsr_create(m_prec(ispin), &
7576                           template=m_ks(ispin), &
7577                           matrix_type=dbcsr_type_no_symmetry)
7578
7579         ! compute the full "gradient" - it is necessary to
7580         ! evaluate Hessian.X
7581         CALL dbcsr_copy(m_f_vo(ispin), m_FTsiginv(ispin))
7582         CALL dbcsr_multiply("N", "N", -1.0_dp, &
7583                             m_ST(ispin), &
7584                             m_siginvTFTsiginv(ispin), &
7585                             1.0_dp, m_f_vo(ispin), &
7586                             filter_eps=eps_filter)
7587
7588! RZK-warning
7589! compute preconditioner even if we do not use it
7590! this is for debugging because compute_preconditioner includes
7591! computing F_vv and S_vv necessary for
7592!       IF ( use_preconditioner ) THEN
7593
7594! domain_s_inv and domain_r_down are never used with assume_t0_q0x=FALSE
7595         CALL compute_preconditioner( &
7596            domain_prec_out=domain_prec(:, ispin), &
7597            m_prec_out=m_prec(ispin), &
7598            m_ks=m_ks(ispin), &
7599            m_s=m_s(1), &
7600            m_siginv=m_siginv(ispin), &
7601            m_quench_t=m_quench_t(ispin), &
7602            m_FTsiginv=m_FTsiginv(ispin), &
7603            m_siginvTFTsiginv=m_siginvTFTsiginv(ispin), &
7604            m_ST=m_ST(ispin), &
7605            m_STsiginv_out=m_STsiginv(ispin), &
7606            m_s_vv_out=m_s_vv(ispin), &
7607            m_f_vv_out=m_f_vv(ispin), &
7608            para_env=para_env, &
7609            blacs_env=blacs_env, &
7610            nocc_of_domain=nocc_of_domain(:, ispin), &
7611            domain_s_inv=domain_s_inv(:, ispin), &
7612            domain_r_down=domain_r_down(:, ispin), &
7613            cpu_of_domain=cpu_of_domain(:), &
7614            domain_map=domain_map(ispin), &
7615            assume_t0_q0x=.FALSE., &
7616            penalty_occ_vol=penalty_occ_vol, &
7617            penalty_occ_vol_prefactor=penalty_occ_vol_prefactor(ispin), &
7618            eps_filter=eps_filter, &
7619            neg_thr=0.5_dp, &
7620            spin_factor=spin_factor, &
7621            special_case=special_case, &
7622            skip_inversion=.FALSE. &
7623            )
7624
7625!       ENDIF ! use_preconditioner
7626
7627         ! initial guess
7628         CALL dbcsr_copy(m_delta(ispin), m_quench_t(ispin))
7629         ! in order to use dbcsr_set matrix blocks must exist
7630         CALL dbcsr_set(m_delta(ispin), 0.0_dp)
7631         CALL dbcsr_copy(m_residue(ispin), m_grad(ispin))
7632         CALL dbcsr_scale(m_residue(ispin), -1.0_dp)
7633
7634         do_exact_inversion = .FALSE.
7635         IF (do_exact_inversion) THEN
7636
7637            ! copy grad to m_step temporarily
7638            ! use m_step as input to the inversion routine
7639            CALL dbcsr_copy(m_step(ispin), m_grad(ispin))
7640
7641            ! expensive "exact" inversion of the "nearly-exact" Hessian
7642            ! hopefully returns Z=-H^(-1).G
7643            CALL hessian_diag_apply( &
7644               matrix_grad=m_step(ispin), &
7645               matrix_step=m_zet(ispin), &
7646               matrix_S_ao=m_s_vv(ispin), &
7647               matrix_F_ao=m_f_vv(ispin), &
7648               !matrix_S_ao=m_s(ispin),&
7649               !matrix_F_ao=m_ks(ispin),&
7650               matrix_S_mo=m_siginv(ispin), &
7651               matrix_F_mo=m_siginvTFTsiginv(ispin), &
7652               matrix_S_vo=m_STsiginv(ispin), &
7653               matrix_F_vo=m_f_vo(ispin), &
7654               quench_t=m_quench_t(ispin), &
7655               spin_factor=spin_factor, &
7656               eps_zero=eps_filter*10.0_dp, &
7657               penalty_occ_vol=penalty_occ_vol, &
7658               penalty_occ_vol_prefactor=penalty_occ_vol_prefactor(ispin), &
7659               penalty_occ_vol_pf2=penalty_occ_vol_pf2(ispin), &
7660               m_s=m_s(1), &
7661               para_env=para_env, &
7662               blacs_env=blacs_env &
7663               )
7664            ! correct solution by the spin factor
7665            !CALL dbcsr_scale(m_zet(ispin),1.0_dp/(2.0_dp*spin_factor))
7666
7667         ELSE ! use PCG to solve H.D=-G
7668
7669            IF (use_preconditioner) THEN
7670
7671               IF (special_case .EQ. xalmo_case_block_diag .OR. &
7672                   special_case .EQ. xalmo_case_fully_deloc) THEN
7673
7674                  CALL dbcsr_multiply("N", "N", 1.0_dp, &
7675                                      m_prec(ispin), &
7676                                      m_residue(ispin), &
7677                                      0.0_dp, m_zet(ispin), &
7678                                      filter_eps=eps_filter)
7679
7680               ELSE
7681
7682                  CALL apply_domain_operators( &
7683                     matrix_in=m_residue(ispin), &
7684                     matrix_out=m_zet(ispin), &
7685                     operator1=domain_prec(:, ispin), &
7686                     dpattern=m_quench_t(ispin), &
7687                     map=domain_map(ispin), &
7688                     node_of_domain=cpu_of_domain(:), &
7689                     my_action=0, &
7690                     filter_eps=eps_filter &
7691                     !matrix_trimmer=,&
7692                     !use_trimmer=.FALSE.,&
7693                     )
7694
7695               ENDIF ! special_case
7696
7697            ELSE ! do not use preconditioner
7698
7699               CALL dbcsr_copy(m_zet(ispin), m_residue(ispin))
7700
7701            ENDIF ! use_preconditioner
7702
7703         ENDIF ! do_exact_inversion
7704
7705         CALL dbcsr_copy(m_step(ispin), m_zet(ispin))
7706
7707      ENDDO !ispin
7708
7709      ! start the outer SCF loop
7710      outer_prepare_to_exit = .FALSE.
7711      outer_iteration = 0
7712      residue_norm = 0.0_dp
7713
7714      DO
7715
7716         ! start the inner SCF loop
7717         prepare_to_exit = .FALSE.
7718         converged = .FALSE.
7719         iteration = 0
7720         t1 = m_walltime()
7721
7722         DO
7723
7724            ! apply hessian to the step matrix
7725            CALL apply_hessian( &
7726               m_x_in=m_step, &
7727               m_x_out=m_Hstep, &
7728               m_ks=m_ks, &
7729               m_s=m_s, &
7730               m_siginv=m_siginv, &
7731               m_quench_t=m_quench_t, &
7732               m_FTsiginv=m_FTsiginv, &
7733               m_siginvTFTsiginv=m_siginvTFTsiginv, &
7734               m_ST=m_ST, &
7735               m_STsiginv=m_STsiginv, &
7736               m_s_vv=m_s_vv, &
7737               m_ks_vv=m_f_vv, &
7738               !m_s_vv=m_s,&
7739               !m_ks_vv=m_ks,&
7740               m_g_full=m_f_vo, &
7741               m_t=m_t, &
7742               m_sig_sqrti_ii=m_sig_sqrti_ii, &
7743               penalty_occ_vol=penalty_occ_vol, &
7744               normalize_orbitals=normalize_orbitals, &
7745               penalty_occ_vol_prefactor=penalty_occ_vol_prefactor, &
7746               eps_filter=eps_filter, &
7747               path_num=hessian_path_reuse &
7748               )
7749
7750            ! alpha is computed outside the spin loop
7751            numer = 0.0_dp
7752            denom = 0.0_dp
7753            DO ispin = 1, nspins
7754
7755               CALL dbcsr_dot(m_residue(ispin), m_zet(ispin), numer_ispin)
7756               CALL dbcsr_dot(m_step(ispin), m_Hstep(ispin), denom_ispin)
7757
7758               numer = numer + numer_ispin
7759               denom = denom + denom_ispin
7760
7761            ENDDO !ispin
7762
7763            alpha = numer/denom
7764
7765            DO ispin = 1, nspins
7766
7767               ! update the variable
7768               CALL dbcsr_add(m_delta(ispin), m_step(ispin), 1.0_dp, alpha)
7769               CALL dbcsr_copy(m_residue_prev(ispin), m_residue(ispin))
7770               CALL dbcsr_add(m_residue(ispin), m_Hstep(ispin), &
7771                              1.0_dp, -1.0_dp*alpha)
7772               CALL dbcsr_norm(m_residue(ispin), dbcsr_norm_maxabsnorm, &
7773                               norm_scalar=residue_max_norm(ispin))
7774
7775            ENDDO ! ispin
7776
7777            ! check convergence and other exit criteria
7778            residue_norm = MAXVAL(residue_max_norm)
7779            converged = (residue_norm .LT. eps_error_target)
7780            IF (converged .OR. (iteration .GE. max_iter)) THEN
7781               prepare_to_exit = .TRUE.
7782            ENDIF
7783
7784            IF (.NOT. prepare_to_exit) THEN
7785
7786               DO ispin = 1, nspins
7787
7788                  ! save current z before the update
7789                  CALL dbcsr_copy(m_zet_prev(ispin), m_zet(ispin))
7790
7791                  ! compute the new step (apply preconditioner if available)
7792                  IF (use_preconditioner) THEN
7793
7794                     !IF (unit_nr>0) THEN
7795                     !   WRITE(unit_nr,*) "....applying preconditioner...."
7796                     !ENDIF
7797
7798                     IF (special_case .EQ. xalmo_case_block_diag .OR. &
7799                         special_case .EQ. xalmo_case_fully_deloc) THEN
7800
7801                        CALL dbcsr_multiply("N", "N", 1.0_dp, &
7802                                            m_prec(ispin), &
7803                                            m_residue(ispin), &
7804                                            0.0_dp, m_zet(ispin), &
7805                                            filter_eps=eps_filter)
7806
7807                     ELSE
7808
7809                        CALL apply_domain_operators( &
7810                           matrix_in=m_residue(ispin), &
7811                           matrix_out=m_zet(ispin), &
7812                           operator1=domain_prec(:, ispin), &
7813                           dpattern=m_quench_t(ispin), &
7814                           map=domain_map(ispin), &
7815                           node_of_domain=cpu_of_domain(:), &
7816                           my_action=0, &
7817                           filter_eps=eps_filter &
7818                           !matrix_trimmer=,&
7819                           !use_trimmer=.FALSE.,&
7820                           )
7821
7822                     ENDIF ! special case
7823
7824                  ELSE
7825
7826                     CALL dbcsr_copy(m_zet(ispin), m_residue(ispin))
7827
7828                  ENDIF
7829
7830               ENDDO !ispin
7831
7832               ! compute the conjugation coefficient - beta
7833               CALL compute_cg_beta( &
7834                  beta=beta, &
7835                  reset_conjugator=reset_conjugator, &
7836                  conjugator=cg_fletcher, &
7837                  grad=m_residue, &
7838                  prev_grad=m_residue_prev, &
7839                  step=m_zet, &
7840                  prev_step=m_zet_prev)
7841
7842               DO ispin = 1, nspins
7843
7844                  ! conjugate the step direction
7845                  CALL dbcsr_add(m_step(ispin), m_zet(ispin), beta, 1.0_dp)
7846
7847               ENDDO !ispin
7848
7849            ENDIF ! not.prepare_to_exit
7850
7851            t2 = m_walltime()
7852            IF (unit_nr > 0) THEN
7853               !iter_type=TRIM("ALMO SCF "//iter_type)
7854               iter_type = TRIM("NR STEP")
7855               WRITE (unit_nr, '(T6,A9,I6,F14.5,F14.5,F15.10,F9.2)') &
7856                  iter_type, iteration, &
7857                  alpha, beta, residue_norm, &
7858                  t2 - t1
7859            ENDIF
7860            t1 = m_walltime()
7861
7862            iteration = iteration + 1
7863            IF (prepare_to_exit) EXIT
7864
7865         ENDDO ! inner loop
7866
7867         IF (converged .OR. (outer_iteration .GE. outer_max_iter)) THEN
7868            outer_prepare_to_exit = .TRUE.
7869         ENDIF
7870
7871         outer_iteration = outer_iteration + 1
7872         IF (outer_prepare_to_exit) EXIT
7873
7874      ENDDO ! outer loop
7875
7876! is not necessary if penalty_occ_vol_pf2=0.0
7877#if 0
7878
7879      IF (penalty_occ_vol) THEN
7880
7881         DO ispin = 1, nspins
7882
7883            CALL dbcsr_copy(m_zet(ispin), m_grad(ispin))
7884            CALL dbcsr_dot(m_delta(ispin), m_zet(ispin), alpha)
7885            WRITE (unit_nr, *) "trace(grad.delta): ", alpha
7886            alpha = -1.0_dp/(penalty_occ_vol_pf2(ispin)*alpha - 1.0_dp)
7887            WRITE (unit_nr, *) "correction alpha: ", alpha
7888            CALL dbcsr_scale(m_delta(ispin), alpha)
7889
7890         ENDDO
7891
7892      ENDIF
7893
7894#endif
7895
7896      DO ispin = 1, nspins
7897
7898         ! check whether the step lies entirely in R or Q
7899         CALL dbcsr_create(m_tmp_oo_1, &
7900                           template=m_siginv(ispin), &
7901                           matrix_type=dbcsr_type_no_symmetry)
7902         CALL dbcsr_create(m_tmp_oo_2, &
7903                           template=m_siginv(ispin), &
7904                           matrix_type=dbcsr_type_no_symmetry)
7905         CALL dbcsr_multiply("T", "N", 1.0_dp, &
7906                             m_ST(ispin), &
7907                             m_delta(ispin), &
7908                             0.0_dp, m_tmp_oo_1, &
7909                             filter_eps=eps_filter)
7910         CALL dbcsr_multiply("N", "N", 1.0_dp, &
7911                             m_siginv(ispin), &
7912                             m_tmp_oo_1, &
7913                             0.0_dp, m_tmp_oo_2, &
7914                             filter_eps=eps_filter)
7915         CALL dbcsr_copy(m_zet(ispin), m_quench_t(ispin))
7916         CALL dbcsr_multiply("N", "N", 1.0_dp, &
7917                             m_t(ispin), &
7918                             m_tmp_oo_2, &
7919                             0.0_dp, m_zet(ispin), &
7920                             retain_sparsity=.TRUE.)
7921         CALL dbcsr_norm(m_zet(ispin), dbcsr_norm_maxabsnorm, &
7922                         norm_scalar=alpha)
7923         WRITE (unit_nr, "(A50,2F20.10)") "Occupied-space projection of the step", alpha
7924         CALL dbcsr_add(m_zet(ispin), m_delta(ispin), -1.0_dp, 1.0_dp)
7925         CALL dbcsr_norm(m_zet(ispin), dbcsr_norm_maxabsnorm, &
7926                         norm_scalar=alpha)
7927         WRITE (unit_nr, "(A50,2F20.10)") "Virtual-space projection of the step", alpha
7928         CALL dbcsr_norm(m_delta(ispin), dbcsr_norm_maxabsnorm, &
7929                         norm_scalar=alpha)
7930         WRITE (unit_nr, "(A50,2F20.10)") "Full step", alpha
7931         CALL dbcsr_release(m_tmp_oo_1)
7932         CALL dbcsr_release(m_tmp_oo_2)
7933
7934      ENDDO
7935
7936      ! clean up
7937      DO ispin = 1, nspins
7938         CALL release_submatrices(domain_prec(:, ispin))
7939         CALL dbcsr_release(m_residue(ispin))
7940         CALL dbcsr_release(m_residue_prev(ispin))
7941         CALL dbcsr_release(m_step(ispin))
7942         CALL dbcsr_release(m_zet(ispin))
7943         CALL dbcsr_release(m_zet_prev(ispin))
7944         CALL dbcsr_release(m_Hstep(ispin))
7945         CALL dbcsr_release(m_f_vo(ispin))
7946         CALL dbcsr_release(m_f_vv(ispin))
7947         CALL dbcsr_release(m_s_vv(ispin))
7948         CALL dbcsr_release(m_prec(ispin))
7949         CALL dbcsr_release(m_STsiginv(ispin))
7950      ENDDO !ispin
7951      DEALLOCATE (domain_prec)
7952      DEALLOCATE (m_residue)
7953      DEALLOCATE (m_residue_prev)
7954      DEALLOCATE (m_step)
7955      DEALLOCATE (m_zet)
7956      DEALLOCATE (m_zet_prev)
7957      DEALLOCATE (m_prec)
7958      DEALLOCATE (m_Hstep)
7959      DEALLOCATE (m_s_vv)
7960      DEALLOCATE (m_f_vv)
7961      DEALLOCATE (m_f_vo)
7962      DEALLOCATE (m_STsiginv)
7963      DEALLOCATE (residue_max_norm)
7964
7965      IF (.NOT. converged) THEN
7966         CPABORT("Optimization not converged!")
7967      ENDIF
7968
7969      ! check that the step satisfies H.step=-grad
7970
7971      CALL timestop(handle)
7972
7973   END SUBROUTINE newton_grad_to_step
7974
7975! *****************************************************************************
7976!> \brief Computes Hessian.X
7977!> \param m_x_in ...
7978!> \param m_x_out ...
7979!> \param m_ks ...
7980!> \param m_s ...
7981!> \param m_siginv ...
7982!> \param m_quench_t ...
7983!> \param m_FTsiginv ...
7984!> \param m_siginvTFTsiginv ...
7985!> \param m_ST ...
7986!> \param m_STsiginv ...
7987!> \param m_s_vv ...
7988!> \param m_ks_vv ...
7989!> \param m_g_full ...
7990!> \param m_t ...
7991!> \param m_sig_sqrti_ii ...
7992!> \param penalty_occ_vol ...
7993!> \param normalize_orbitals ...
7994!> \param penalty_occ_vol_prefactor ...
7995!> \param eps_filter ...
7996!> \param path_num ...
7997!> \par History
7998!>       2015.04 created [Rustam Z Khaliullin]
7999!> \author Rustam Z Khaliullin
8000! **************************************************************************************************
8001   SUBROUTINE apply_hessian(m_x_in, m_x_out, m_ks, m_s, m_siginv, &
8002                            m_quench_t, m_FTsiginv, m_siginvTFTsiginv, m_ST, m_STsiginv, m_s_vv, &
8003                            m_ks_vv, m_g_full, m_t, m_sig_sqrti_ii, penalty_occ_vol, &
8004                            normalize_orbitals, penalty_occ_vol_prefactor, eps_filter, path_num)
8005
8006      TYPE(dbcsr_type), DIMENSION(:), INTENT(INOUT)      :: m_x_in, m_x_out, m_ks, m_s
8007      TYPE(dbcsr_type), DIMENSION(:), INTENT(IN)         :: m_siginv, m_quench_t, m_FTsiginv, &
8008                                                            m_siginvTFTsiginv, m_ST, m_STsiginv
8009      TYPE(dbcsr_type), DIMENSION(:), INTENT(INOUT)      :: m_s_vv, m_ks_vv, m_g_full
8010      TYPE(dbcsr_type), DIMENSION(:), INTENT(IN)         :: m_t, m_sig_sqrti_ii
8011      LOGICAL, INTENT(IN)                                :: penalty_occ_vol, normalize_orbitals
8012      REAL(KIND=dp), DIMENSION(:), INTENT(IN)            :: penalty_occ_vol_prefactor
8013      REAL(KIND=dp), INTENT(IN)                          :: eps_filter
8014      INTEGER, INTENT(IN)                                :: path_num
8015
8016      CHARACTER(len=*), PARAMETER :: routineN = 'apply_hessian', routineP = moduleN//':'//routineN
8017
8018      INTEGER                                            :: dim0, handle, ispin, nspins
8019      REAL(KIND=dp)                                      :: penalty_prefactor_local, spin_factor
8020      REAL(KIND=dp), ALLOCATABLE, DIMENSION(:)           :: tg_diagonal
8021      TYPE(dbcsr_type)                                   :: m_tmp_no_1, m_tmp_no_2, m_tmp_oo_1, &
8022                                                            m_tmp_x_in
8023
8024      CALL timeset(routineN, handle)
8025
8026      !JHU: test and use for unused debug variables
8027      IF (penalty_occ_vol) penalty_prefactor_local = 1._dp
8028      CPASSERT(SIZE(m_STsiginv) >= 0)
8029      CPASSERT(SIZE(m_siginvTFTsiginv) >= 0)
8030      CPASSERT(SIZE(m_s) >= 0)
8031      CPASSERT(SIZE(m_g_full) >= 0)
8032      CPASSERT(SIZE(m_FTsiginv) >= 0)
8033
8034      nspins = SIZE(m_ks)
8035
8036      IF (nspins .EQ. 1) THEN
8037         spin_factor = 2.0_dp
8038      ELSE
8039         spin_factor = 1.0_dp
8040      ENDIF
8041
8042      DO ispin = 1, nspins
8043
8044         penalty_prefactor_local = penalty_occ_vol_prefactor(ispin)/(2.0_dp*spin_factor)
8045
8046         CALL dbcsr_create(m_tmp_oo_1, &
8047                           template=m_siginv(ispin), &
8048                           matrix_type=dbcsr_type_no_symmetry)
8049         CALL dbcsr_create(m_tmp_no_1, &
8050                           template=m_quench_t(ispin), &
8051                           matrix_type=dbcsr_type_no_symmetry)
8052         CALL dbcsr_create(m_tmp_no_2, &
8053                           template=m_quench_t(ispin), &
8054                           matrix_type=dbcsr_type_no_symmetry)
8055         CALL dbcsr_create(m_tmp_x_in, &
8056                           template=m_quench_t(ispin), &
8057                           matrix_type=dbcsr_type_no_symmetry)
8058
8059         ! transform the input X to take into account the normalization constraint
8060         IF (normalize_orbitals) THEN
8061
8062            ! H.D = ( (H.D) - ST.[tr(T).(H.D)]_ii ) . [sig_sqrti]_ii
8063
8064            ! get [tr(T).HD]_ii
8065            CALL dbcsr_copy(m_tmp_oo_1, m_sig_sqrti_ii(ispin))
8066            CALL dbcsr_multiply("T", "N", 1.0_dp, &
8067                                m_x_in(ispin), &
8068                                m_ST(ispin), &
8069                                0.0_dp, m_tmp_oo_1, &
8070                                retain_sparsity=.TRUE.)
8071            CALL dbcsr_get_info(m_sig_sqrti_ii(ispin), nfullrows_total=dim0)
8072            ALLOCATE (tg_diagonal(dim0))
8073            CALL dbcsr_get_diag(m_tmp_oo_1, tg_diagonal)
8074            CALL dbcsr_set(m_tmp_oo_1, 0.0_dp)
8075            CALL dbcsr_set_diag(m_tmp_oo_1, tg_diagonal)
8076            DEALLOCATE (tg_diagonal)
8077
8078            CALL dbcsr_copy(m_tmp_no_1, m_x_in(ispin))
8079            CALL dbcsr_multiply("N", "N", -1.0_dp, &
8080                                m_t(ispin), &
8081                                m_tmp_oo_1, &
8082                                1.0_dp, m_tmp_no_1, &
8083                                filter_eps=eps_filter)
8084            CALL dbcsr_multiply("N", "N", 1.0_dp, &
8085                                m_tmp_no_1, &
8086                                m_sig_sqrti_ii(ispin), &
8087                                0.0_dp, m_tmp_x_in, &
8088                                filter_eps=eps_filter)
8089
8090         ELSE
8091
8092            CALL dbcsr_copy(m_tmp_x_in, m_x_in(ispin))
8093
8094         ENDIF ! normalize_orbitals
8095
8096         IF (path_num .EQ. hessian_path_reuse) THEN
8097
8098            ! apply pre-computed F_vv and S_vv to X
8099
8100#if 0
8101! RZK-warning: negative sign at penalty_prefactor_local is that
8102! magical fix for the negative definite problem
8103! (since penalty_prefactor_local<0 the coeff before S_vv must
8104! be multiplied by -1 to take the step in the right direction)
8105!CALL dbcsr_multiply("N","N",-4.0_dp*penalty_prefactor_local,&
8106!        m_s_vv(ispin),&
8107!        m_tmp_x_in,&
8108!        0.0_dp,m_tmp_no_1,&
8109!        filter_eps=eps_filter)
8110!CALL dbcsr_copy(m_x_out(ispin),m_quench_t(ispin))
8111!CALL dbcsr_multiply("N","N",1.0_dp,&
8112!        m_tmp_no_1,&
8113!        m_siginv(ispin),&
8114!        0.0_dp,m_x_out(ispin),&
8115!        retain_sparsity=.TRUE.)
8116
8117            CALL dbcsr_multiply("N", "N", 1.0_dp, &
8118                                m_s(1), &
8119                                m_tmp_x_in, &
8120                                0.0_dp, m_tmp_no_1, &
8121                                filter_eps=eps_filter)
8122            CALL dbcsr_copy(m_x_out(ispin), m_quench_t(ispin))
8123            CALL dbcsr_multiply("N", "N", 1.0_dp, &
8124                                m_tmp_no_1, &
8125                                m_siginv(ispin), &
8126                                0.0_dp, m_x_out(ispin), &
8127                                retain_sparsity=.TRUE.)
8128
8129!CALL dbcsr_copy(m_x_out(ispin),m_quench_t(ispin))
8130!CALL dbcsr_multiply("N","N",1.0_dp,&
8131!        m_s(1),&
8132!        m_tmp_x_in,&
8133!        0.0_dp,m_x_out(ispin),&
8134!        retain_sparsity=.TRUE.)
8135
8136#else
8137
8138            ! debugging: only vv matrices, oo matrices are kronecker
8139            CALL dbcsr_copy(m_x_out(ispin), m_quench_t(ispin))
8140            CALL dbcsr_multiply("N", "N", 1.0_dp, &
8141                                m_ks_vv(ispin), &
8142                                m_tmp_x_in, &
8143                                0.0_dp, m_x_out(ispin), &
8144                                retain_sparsity=.TRUE.)
8145
8146            CALL dbcsr_copy(m_tmp_no_2, m_quench_t(ispin))
8147            CALL dbcsr_multiply("N", "N", 1.0_dp, &
8148                                m_s_vv(ispin), &
8149                                m_tmp_x_in, &
8150                                0.0_dp, m_tmp_no_2, &
8151                                retain_sparsity=.TRUE.)
8152            CALL dbcsr_add(m_x_out(ispin), m_tmp_no_2, &
8153                           1.0_dp, -4.0_dp*penalty_prefactor_local + 1.0_dp)
8154#endif
8155
8156!          ! F_vv.X.S_oo
8157!          CALL dbcsr_multiply("N","N",1.0_dp,&
8158!                  m_ks_vv(ispin),&
8159!                  m_tmp_x_in,&
8160!                  0.0_dp,m_tmp_no_1,&
8161!                  filter_eps=eps_filter,&
8162!                  )
8163!          CALL dbcsr_copy(m_x_out(ispin),m_quench_t(ispin))
8164!          CALL dbcsr_multiply("N","N",1.0_dp,&
8165!                  m_tmp_no_1,&
8166!                  m_siginv(ispin),&
8167!                  0.0_dp,m_x_out(ispin),&
8168!                  retain_sparsity=.TRUE.,&
8169!                  )
8170!
8171!          ! S_vv.X.F_oo
8172!          CALL dbcsr_multiply("N","N",1.0_dp,&
8173!                  m_s_vv(ispin),&
8174!                  m_tmp_x_in,&
8175!                  0.0_dp,m_tmp_no_1,&
8176!                  filter_eps=eps_filter,&
8177!                  )
8178!          CALL dbcsr_copy(m_tmp_no_2,m_quench_t(ispin))
8179!          CALL dbcsr_multiply("N","N",1.0_dp,&
8180!                  m_tmp_no_1,&
8181!                  m_siginvTFTsiginv(ispin),&
8182!                  0.0_dp,m_tmp_no_2,&
8183!                  retain_sparsity=.TRUE.,&
8184!                  )
8185!          CALL dbcsr_add(m_x_out(ispin),m_tmp_no_2,&
8186!               1.0_dp,-1.0_dp)
8187!! we have to add occ voll penalty here (the Svv termi (i.e. both Svv.D.Soo)
8188!!  and STsiginv terms)
8189!
8190!         ! S_vo.X^t.F_vo
8191!          CALL dbcsr_multiply("T","N",1.0_dp,&
8192!                  m_tmp_x_in,&
8193!                  m_g_full(ispin),&
8194!                  0.0_dp,m_tmp_oo_1,&
8195!                  filter_eps=eps_filter,&
8196!                  )
8197!          CALL dbcsr_copy(m_tmp_no_2,m_quench_t(ispin))
8198!          CALL dbcsr_multiply("N","N",1.0_dp,&
8199!                  m_STsiginv(ispin),&
8200!                  m_tmp_oo_1,&
8201!                  0.0_dp,m_tmp_no_2,&
8202!                  retain_sparsity=.TRUE.,&
8203!                  )
8204!          CALL dbcsr_add(m_x_out(ispin),m_tmp_no_2,&
8205!                  1.0_dp,-1.0_dp)
8206!
8207!          ! S_vo.X^t.F_vo
8208!          CALL dbcsr_multiply("T","N",1.0_dp,&
8209!                  m_tmp_x_in,&
8210!                  m_STsiginv(ispin),&
8211!                  0.0_dp,m_tmp_oo_1,&
8212!                  filter_eps=eps_filter,&
8213!                  )
8214!          CALL dbcsr_copy(m_tmp_no_2,m_quench_t(ispin))
8215!          CALL dbcsr_multiply("N","N",1.0_dp,&
8216!                  m_g_full(ispin),&
8217!                  m_tmp_oo_1,&
8218!                  0.0_dp,m_tmp_no_2,&
8219!                  retain_sparsity=.TRUE.,&
8220!                  )
8221!          CALL dbcsr_add(m_x_out(ispin),m_tmp_no_2,&
8222!                  1.0_dp,-1.0_dp)
8223
8224         ELSE IF (path_num .EQ. hessian_path_assemble) THEN
8225
8226            ! compute F_vv.X and S_vv.X directly
8227            ! this path will be advantageous if the number
8228            ! of PCG iterations is small
8229            CPABORT("path is NYI")
8230
8231         ELSE
8232            CPABORT("illegal path")
8233         ENDIF ! path
8234
8235         ! transform the output to take into account the normalization constraint
8236         IF (normalize_orbitals) THEN
8237
8238            ! H.D = ( (H.D) - ST.[tr(T).(H.D)]_ii ) . [sig_sqrti]_ii
8239
8240            ! get [tr(T).HD]_ii
8241            CALL dbcsr_copy(m_tmp_oo_1, m_sig_sqrti_ii(ispin))
8242            CALL dbcsr_multiply("T", "N", 1.0_dp, &
8243                                m_t(ispin), &
8244                                m_x_out(ispin), &
8245                                0.0_dp, m_tmp_oo_1, &
8246                                retain_sparsity=.TRUE.)
8247            CALL dbcsr_get_info(m_sig_sqrti_ii(ispin), nfullrows_total=dim0)
8248            ALLOCATE (tg_diagonal(dim0))
8249            CALL dbcsr_get_diag(m_tmp_oo_1, tg_diagonal)
8250            CALL dbcsr_set(m_tmp_oo_1, 0.0_dp)
8251            CALL dbcsr_set_diag(m_tmp_oo_1, tg_diagonal)
8252            DEALLOCATE (tg_diagonal)
8253
8254            CALL dbcsr_multiply("N", "N", -1.0_dp, &
8255                                m_ST(ispin), &
8256                                m_tmp_oo_1, &
8257                                1.0_dp, m_x_out(ispin), &
8258                                retain_sparsity=.TRUE.)
8259            CALL dbcsr_copy(m_tmp_no_1, m_x_out(ispin))
8260            CALL dbcsr_multiply("N", "N", 1.0_dp, &
8261                                m_tmp_no_1, &
8262                                m_sig_sqrti_ii(ispin), &
8263                                0.0_dp, m_x_out(ispin), &
8264                                retain_sparsity=.TRUE.)
8265
8266         ENDIF ! normalize_orbitals
8267
8268         CALL dbcsr_scale(m_x_out(ispin), &
8269                          2.0_dp*spin_factor)
8270
8271         CALL dbcsr_release(m_tmp_oo_1)
8272         CALL dbcsr_release(m_tmp_no_1)
8273         CALL dbcsr_release(m_tmp_no_2)
8274         CALL dbcsr_release(m_tmp_x_in)
8275
8276      ENDDO !ispin
8277
8278      ! there is one more part of the hessian that comes
8279      ! from T-dependence of the KS matrix
8280      ! it is neglected here
8281
8282      CALL timestop(handle)
8283
8284   END SUBROUTINE apply_hessian
8285
8286! *****************************************************************************
8287!> \brief Serial code that constructs an approximate Hessian
8288!> \param matrix_grad ...
8289!> \param matrix_step ...
8290!> \param matrix_S_ao ...
8291!> \param matrix_F_ao ...
8292!> \param matrix_S_mo ...
8293!> \param matrix_F_mo ...
8294!> \param matrix_S_vo ...
8295!> \param matrix_F_vo ...
8296!> \param quench_t ...
8297!> \param penalty_occ_vol ...
8298!> \param penalty_occ_vol_prefactor ...
8299!> \param penalty_occ_vol_pf2 ...
8300!> \param spin_factor ...
8301!> \param eps_zero ...
8302!> \param m_s ...
8303!> \param para_env ...
8304!> \param blacs_env ...
8305!> \par History
8306!>       2012.02 created [Rustam Z. Khaliullin]
8307!> \author Rustam Z. Khaliullin
8308! **************************************************************************************************
8309   SUBROUTINE hessian_diag_apply(matrix_grad, matrix_step, matrix_S_ao, &
8310                                 matrix_F_ao, matrix_S_mo, matrix_F_mo, matrix_S_vo, matrix_F_vo, quench_t, &
8311                                 penalty_occ_vol, penalty_occ_vol_prefactor, penalty_occ_vol_pf2, &
8312                                 spin_factor, eps_zero, m_s, para_env, blacs_env)
8313
8314      TYPE(dbcsr_type), INTENT(INOUT)                    :: matrix_grad, matrix_step, matrix_S_ao, &
8315                                                            matrix_F_ao, matrix_S_mo
8316      TYPE(dbcsr_type), INTENT(IN)                       :: matrix_F_mo
8317      TYPE(dbcsr_type), INTENT(INOUT)                    :: matrix_S_vo, matrix_F_vo, quench_t
8318      LOGICAL, INTENT(IN)                                :: penalty_occ_vol
8319      REAL(KIND=dp), INTENT(IN)                          :: penalty_occ_vol_prefactor, &
8320                                                            penalty_occ_vol_pf2, spin_factor, &
8321                                                            eps_zero
8322      TYPE(dbcsr_type), INTENT(IN)                       :: m_s
8323      TYPE(cp_para_env_type), POINTER                    :: para_env
8324      TYPE(cp_blacs_env_type), POINTER                   :: blacs_env
8325
8326      CHARACTER(len=*), PARAMETER :: routineN = 'hessian_diag_apply', &
8327         routineP = moduleN//':'//routineN
8328
8329      INTEGER :: ao_hori_offset, ao_vert_offset, block_col, block_row, col, H_size, handle, ii, &
8330         INFO, jj, lev1_hori_offset, lev1_vert_offset, lev2_hori_offset, lev2_vert_offset, LWORK, &
8331         nblkcols_tot, nblkrows_tot, ncores, orb_i, orb_j, row, unit_nr, zero_neg_eiv
8332      INTEGER, ALLOCATABLE, DIMENSION(:)                 :: ao_block_sizes, ao_domain_sizes, &
8333                                                            mo_block_sizes
8334      INTEGER, DIMENSION(:), POINTER                     :: ao_blk_sizes, mo_blk_sizes
8335      LOGICAL                                            :: found, found_col, found_row
8336      REAL(KIND=dp)                                      :: penalty_prefactor_local, test_error
8337      REAL(KIND=dp), ALLOCATABLE, DIMENSION(:)           :: eigenvalues, Grad_vec, Step_vec, tmp, &
8338                                                            tmpr, work
8339      REAL(KIND=dp), ALLOCATABLE, DIMENSION(:, :)        :: F_ao_block, F_mo_block, H, Hinv, &
8340                                                            S_ao_block, S_mo_block, test, test2
8341      REAL(KIND=dp), DIMENSION(:, :), POINTER            :: block_p, p_new_block
8342      TYPE(cp_logger_type), POINTER                      :: logger
8343      TYPE(dbcsr_distribution_type)                      :: main_dist
8344      TYPE(dbcsr_type)                                   :: matrix_F_ao_sym, matrix_F_mo_sym, &
8345                                                            matrix_S_ao_sym, matrix_S_mo_sym
8346
8347      CALL timeset(routineN, handle)
8348
8349      ! get a useful output_unit
8350      logger => cp_get_default_logger()
8351      IF (logger%para_env%ionode) THEN
8352         unit_nr = cp_logger_get_default_unit_nr(logger, local=.TRUE.)
8353      ELSE
8354         unit_nr = -1
8355      ENDIF
8356
8357      !JHU use and test for unused debug variables
8358      CPASSERT(ASSOCIATED(blacs_env))
8359      CPASSERT(ASSOCIATED(para_env))
8360      CALL dbcsr_get_info(m_s, row_blk_size=ao_blk_sizes)
8361      CALL dbcsr_get_info(matrix_S_vo, row_blk_size=ao_blk_sizes)
8362      CALL dbcsr_get_info(matrix_F_vo, row_blk_size=ao_blk_sizes)
8363
8364      ! serial code only
8365      CALL dbcsr_get_info(matrix=matrix_S_ao, distribution=main_dist)
8366      CALL dbcsr_distribution_get(main_dist, numnodes=ncores)
8367      IF (ncores .GT. 1) THEN
8368         CPABORT("serial code only")
8369      ENDIF
8370
8371      nblkrows_tot = dbcsr_nblkrows_total(quench_t)
8372      nblkcols_tot = dbcsr_nblkcols_total(quench_t)
8373      CPASSERT(nblkrows_tot == nblkcols_tot)
8374      CALL dbcsr_get_info(quench_t, row_blk_size=ao_blk_sizes)
8375      CALL dbcsr_get_info(quench_t, col_blk_size=mo_blk_sizes)
8376      ALLOCATE (mo_block_sizes(nblkcols_tot), ao_block_sizes(nblkcols_tot))
8377      ALLOCATE (ao_domain_sizes(nblkcols_tot))
8378      mo_block_sizes(:) = mo_blk_sizes(:)
8379      ao_block_sizes(:) = ao_blk_sizes(:)
8380      ao_domain_sizes(:) = 0
8381
8382      CALL dbcsr_create(matrix_S_ao_sym, &
8383                        template=matrix_S_ao, &
8384                        matrix_type=dbcsr_type_no_symmetry)
8385      CALL dbcsr_desymmetrize(matrix_S_ao, matrix_S_ao_sym)
8386      CALL dbcsr_scale(matrix_S_ao_sym, 2.0_dp*spin_factor)
8387
8388      CALL dbcsr_create(matrix_F_ao_sym, &
8389                        template=matrix_F_ao, &
8390                        matrix_type=dbcsr_type_no_symmetry)
8391      CALL dbcsr_desymmetrize(matrix_F_ao, matrix_F_ao_sym)
8392      CALL dbcsr_scale(matrix_F_ao_sym, 2.0_dp*spin_factor)
8393
8394      CALL dbcsr_create(matrix_S_mo_sym, &
8395                        template=matrix_S_mo, &
8396                        matrix_type=dbcsr_type_no_symmetry)
8397      CALL dbcsr_desymmetrize(matrix_S_mo, matrix_S_mo_sym)
8398
8399      CALL dbcsr_create(matrix_F_mo_sym, &
8400                        template=matrix_F_mo, &
8401                        matrix_type=dbcsr_type_no_symmetry)
8402      CALL dbcsr_desymmetrize(matrix_F_mo, matrix_F_mo_sym)
8403
8404      IF (penalty_occ_vol) THEN
8405         penalty_prefactor_local = penalty_occ_vol_prefactor/(2.0_dp*spin_factor)
8406      ELSE
8407         penalty_prefactor_local = 0.0_dp
8408      ENDIF
8409
8410      WRITE (unit_nr, *) "penalty_prefactor_local: ", penalty_prefactor_local
8411      WRITE (unit_nr, *) "penalty_prefactor_2: ", penalty_occ_vol_pf2
8412
8413      !CALL dbcsr_print(matrix_grad)
8414      !CALL dbcsr_print(matrix_F_ao_sym)
8415      !CALL dbcsr_print(matrix_S_ao_sym)
8416      !CALL dbcsr_print(matrix_F_mo_sym)
8417      !CALL dbcsr_print(matrix_S_mo_sym)
8418
8419      ! loop over domains to find the size of the Hessian
8420      H_size = 0
8421      DO col = 1, nblkcols_tot
8422
8423         ! find sizes of AO submatrices
8424         DO row = 1, nblkrows_tot
8425
8426            CALL dbcsr_get_block_p(quench_t, &
8427                                   row, col, block_p, found)
8428            IF (found) THEN
8429               ao_domain_sizes(col) = ao_domain_sizes(col) + ao_blk_sizes(row)
8430            ENDIF
8431
8432         ENDDO
8433
8434         H_size = H_size + ao_domain_sizes(col)*mo_block_sizes(col)
8435
8436      ENDDO
8437
8438      ALLOCATE (H(H_size, H_size))
8439      H(:, :) = 0.0_dp
8440
8441      ! fill the Hessian matrix
8442      lev1_vert_offset = 0
8443      ! loop over all pairs of fragments
8444      DO row = 1, nblkcols_tot
8445
8446         lev1_hori_offset = 0
8447         DO col = 1, nblkcols_tot
8448
8449            ! prepare blocks for the current row-column fragment pair
8450            ALLOCATE (F_ao_block(ao_domain_sizes(row), ao_domain_sizes(col)))
8451            ALLOCATE (S_ao_block(ao_domain_sizes(row), ao_domain_sizes(col)))
8452            ALLOCATE (F_mo_block(mo_block_sizes(row), mo_block_sizes(col)))
8453            ALLOCATE (S_mo_block(mo_block_sizes(row), mo_block_sizes(col)))
8454
8455            F_ao_block(:, :) = 0.0_dp
8456            S_ao_block(:, :) = 0.0_dp
8457            F_mo_block(:, :) = 0.0_dp
8458            S_mo_block(:, :) = 0.0_dp
8459
8460            ! fill AO submatrices
8461            ! loop over all blocks of the AO dbcsr matrix
8462            ao_vert_offset = 0
8463            DO block_row = 1, nblkcols_tot
8464
8465               CALL dbcsr_get_block_p(quench_t, &
8466                                      block_row, row, block_p, found_row)
8467               IF (found_row) THEN
8468
8469                  ao_hori_offset = 0
8470                  DO block_col = 1, nblkcols_tot
8471
8472                     CALL dbcsr_get_block_p(quench_t, &
8473                                            block_col, col, block_p, found_col)
8474                     IF (found_col) THEN
8475
8476                        CALL dbcsr_get_block_p(matrix_F_ao_sym, &
8477                                               block_row, block_col, block_p, found)
8478                        IF (found) THEN
8479                           ! copy the block into the submatrix
8480                           F_ao_block(ao_vert_offset + 1:ao_vert_offset + ao_block_sizes(block_row), &
8481                                      ao_hori_offset + 1:ao_hori_offset + ao_block_sizes(block_col)) &
8482                              = block_p(:, :)
8483                        ENDIF
8484
8485                        CALL dbcsr_get_block_p(matrix_S_ao_sym, &
8486                                               block_row, block_col, block_p, found)
8487                        IF (found) THEN
8488                           ! copy the block into the submatrix
8489                           S_ao_block(ao_vert_offset + 1:ao_vert_offset + ao_block_sizes(block_row), &
8490                                      ao_hori_offset + 1:ao_hori_offset + ao_block_sizes(block_col)) &
8491                              = block_p(:, :)
8492                        ENDIF
8493
8494                        ao_hori_offset = ao_hori_offset + ao_block_sizes(block_col)
8495
8496                     ENDIF
8497
8498                  ENDDO
8499
8500                  ao_vert_offset = ao_vert_offset + ao_block_sizes(block_row)
8501
8502               ENDIF
8503
8504            ENDDO
8505
8506            ! fill MO submatrices
8507            CALL dbcsr_get_block_p(matrix_F_mo_sym, row, col, block_p, found)
8508            IF (found) THEN
8509               ! copy the block into the submatrix
8510               F_mo_block(1:mo_block_sizes(row), 1:mo_block_sizes(col)) = block_p(:, :)
8511            ENDIF
8512            CALL dbcsr_get_block_p(matrix_S_mo_sym, row, col, block_p, found)
8513            IF (found) THEN
8514               ! copy the block into the submatrix
8515               S_mo_block(1:mo_block_sizes(row), 1:mo_block_sizes(col)) = block_p(:, :)
8516            ENDIF
8517
8518            !WRITE(*,*) "F_AO_BLOCK", row, col, ao_domain_sizes(row), ao_domain_sizes(col)
8519            !DO ii=1,ao_domain_sizes(row)
8520            !  WRITE(*,'(100F13.9)') F_ao_block(ii,:)
8521            !ENDDO
8522            !WRITE(*,*) "S_AO_BLOCK", row, col
8523            !DO ii=1,ao_domain_sizes(row)
8524            !  WRITE(*,'(100F13.9)') S_ao_block(ii,:)
8525            !ENDDO
8526            !WRITE(*,*) "F_MO_BLOCK", row, col
8527            !DO ii=1,mo_block_sizes(row)
8528            !  WRITE(*,'(100F13.9)') F_mo_block(ii,:)
8529            !ENDDO
8530            !WRITE(*,*) "S_MO_BLOCK", row, col, mo_block_sizes(row), mo_block_sizes(col)
8531            !DO ii=1,mo_block_sizes(row)
8532            !  WRITE(*,'(100F13.9)') S_mo_block(ii,:)
8533            !ENDDO
8534
8535            ! construct tensor products for the current row-column fragment pair
8536            lev2_vert_offset = 0
8537            DO orb_j = 1, mo_block_sizes(row)
8538
8539               lev2_hori_offset = 0
8540               DO orb_i = 1, mo_block_sizes(col)
8541                  IF (orb_i .EQ. orb_j .AND. row .EQ. col) THEN
8542                     H(lev1_vert_offset + lev2_vert_offset + 1:lev1_vert_offset + lev2_vert_offset + ao_domain_sizes(row), &
8543                       lev1_hori_offset + lev2_hori_offset + 1:lev1_hori_offset + lev2_hori_offset + ao_domain_sizes(col)) &
8544                        != -penalty_prefactor_local*S_ao_block(:,:)
8545                        = F_ao_block(:, :) + S_ao_block(:, :)
8546!=S_ao_block(:,:)
8547!RZK-warning               =F_ao_block(:,:)+( 1.0_dp + penalty_prefactor_local )*S_ao_block(:,:)
8548!               =S_mo_block(orb_j,orb_i)*F_ao_block(:,:)&
8549!               -F_mo_block(orb_j,orb_i)*S_ao_block(:,:)&
8550!               +penalty_prefactor_local*S_mo_block(orb_j,orb_i)*S_ao_block(:,:)
8551                  ENDIF
8552                  !WRITE(*,*) row, col, orb_j, orb_i, lev1_vert_offset+lev2_vert_offset+1, ao_domain_sizes(row),&
8553                  !   lev1_hori_offset+lev2_hori_offset+1, ao_domain_sizes(col), S_mo_block(orb_j,orb_i)
8554
8555                  lev2_hori_offset = lev2_hori_offset + ao_domain_sizes(col)
8556
8557               ENDDO
8558
8559               lev2_vert_offset = lev2_vert_offset + ao_domain_sizes(row)
8560
8561            ENDDO
8562
8563            lev1_hori_offset = lev1_hori_offset + ao_domain_sizes(col)*mo_block_sizes(col)
8564
8565            DEALLOCATE (F_ao_block)
8566            DEALLOCATE (S_ao_block)
8567            DEALLOCATE (F_mo_block)
8568            DEALLOCATE (S_mo_block)
8569
8570         ENDDO ! col fragment
8571
8572         lev1_vert_offset = lev1_vert_offset + ao_domain_sizes(row)*mo_block_sizes(row)
8573
8574      ENDDO ! row fragment
8575
8576      CALL dbcsr_release(matrix_S_ao_sym)
8577      CALL dbcsr_release(matrix_F_ao_sym)
8578      CALL dbcsr_release(matrix_S_mo_sym)
8579      CALL dbcsr_release(matrix_F_mo_sym)
8580
8581!!    ! Two more terms of the Hessian: S_vo.D.F_vo and F_vo.D.S_vo
8582!!    ! It seems that these terms break positive definite property of the Hessian
8583!!    ALLOCATE(H1(H_size,H_size))
8584!!    ALLOCATE(H2(H_size,H_size))
8585!!    H1=0.0_dp
8586!!    H2=0.0_dp
8587!!    DO row = 1, nblkcols_tot
8588!!
8589!!       lev1_hori_offset=0
8590!!       DO col = 1, nblkcols_tot
8591!!
8592!!          CALL dbcsr_get_block_p(matrix_F_vo,&
8593!!                  row, col, block_p, found)
8594!!          CALL dbcsr_get_block_p(matrix_S_vo,&
8595!!                  row, col, block_p2, found2)
8596!!
8597!!          lev1_vert_offset=0
8598!!          DO block_col = 1, nblkcols_tot
8599!!
8600!!             CALL dbcsr_get_block_p(quench_t,&
8601!!                     row, block_col, p_new_block, found_row)
8602!!
8603!!             IF (found_row) THEN
8604!!
8605!!                ! determine offset in this short loop
8606!!                lev2_vert_offset=0
8607!!                DO block_row=1,row-1
8608!!                   CALL dbcsr_get_block_p(quench_t,&
8609!!                           block_row, block_col, p_new_block, found_col)
8610!!                   IF (found_col) lev2_vert_offset=lev2_vert_offset+ao_block_sizes(block_row)
8611!!                ENDDO
8612!!                !!!!!!!! short loop
8613!!
8614!!                ! over all electrons of the block
8615!!                DO orb_i=1, mo_block_sizes(col)
8616!!
8617!!                   ! into all possible locations
8618!!                   DO orb_j=1, mo_block_sizes(block_col)
8619!!
8620!!                      ! column is copied several times
8621!!                      DO copy=1, ao_domain_sizes(col)
8622!!
8623!!                         IF (found) THEN
8624!!
8625!!                            !WRITE(*,*) row, col, block_col, orb_i, orb_j, copy,&
8626!!                            ! lev1_vert_offset+(orb_j-1)*ao_domain_sizes(block_col)+lev2_vert_offset+1,&
8627!!                            ! lev1_hori_offset+(orb_i-1)*ao_domain_sizes(col)+copy
8628!!
8629!!                            H1( lev1_vert_offset+(orb_j-1)*ao_domain_sizes(block_col)+lev2_vert_offset+1:&
8630!!                                lev1_vert_offset+(orb_j-1)*ao_domain_sizes(block_col)+lev2_vert_offset+ao_block_sizes(row),&
8631!!                                lev1_hori_offset+(orb_i-1)*ao_domain_sizes(col)+copy )&
8632!!                              =block_p(:,orb_i)
8633!!
8634!!                         ENDIF ! found block in the data matrix
8635!!
8636!!                         IF (found2) THEN
8637!!
8638!!                            H2( lev1_vert_offset+(orb_j-1)*ao_domain_sizes(block_col)+lev2_vert_offset+1:&
8639!!                                lev1_vert_offset+(orb_j-1)*ao_domain_sizes(block_col)+lev2_vert_offset+ao_block_sizes(row),&
8640!!                                lev1_hori_offset+(orb_i-1)*ao_domain_sizes(col)+copy )&
8641!!                              =block_p2(:,orb_i)
8642!!
8643!!                         ENDIF ! found block in the data matrix
8644!!
8645!!                      ENDDO
8646!!
8647!!                   ENDDO
8648!!
8649!!                ENDDO
8650!!
8651!!                !lev2_vert_offset=lev2_vert_offset+ao_block_sizes(row)
8652!!
8653!!             ENDIF ! found block in the quench matrix
8654!!
8655!!             lev1_vert_offset=lev1_vert_offset+&
8656!!                ao_domain_sizes(block_col)*mo_block_sizes(block_col)
8657!!
8658!!          ENDDO
8659!!
8660!!          lev1_hori_offset=lev1_hori_offset+&
8661!!             ao_domain_sizes(col)*mo_block_sizes(col)
8662!!
8663!!       ENDDO
8664!!
8665!!       !lev2_vert_offset=lev2_vert_offset+ao_block_sizes(row)
8666!!
8667!!    ENDDO
8668!!    H1(:,:)=H1(:,:)*2.0_dp*spin_factor
8669!!    !!!WRITE(*,*) "F_vo"
8670!!    !!!DO ii=1,H_size
8671!!    !!! WRITE(*,'(100F13.9)') H1(ii,:)
8672!!    !!!ENDDO
8673!!    !!!WRITE(*,*) "S_vo"
8674!!    !!!DO ii=1,H_size
8675!!    !!! WRITE(*,'(100F13.9)') H2(ii,:)
8676!!    !!!ENDDO
8677!!    !!!!! add terms to the hessian
8678!!    DO ii=1,H_size
8679!!       DO jj=1,H_size
8680!!! add penalty_occ_vol term
8681!!          H(ii,jj)=H(ii,jj)-H1(ii,jj)*H2(jj,ii)-H1(jj,ii)*H2(ii,jj)
8682!!       ENDDO
8683!!    ENDDO
8684!!    DEALLOCATE(H1)
8685!!    DEALLOCATE(H2)
8686
8687!!    ! S_vo.S_vo diagonal component due to determiant constraint
8688!!    ! use grad vector temporarily
8689!!    IF (penalty_occ_vol) THEN
8690!!       ALLOCATE(Grad_vec(H_size))
8691!!       Grad_vec(:)=0.0_dp
8692!!       lev1_vert_offset=0
8693!!       ! loop over all electron blocks
8694!!       DO col = 1, nblkcols_tot
8695!!
8696!!          ! loop over AO-rows of the dbcsr matrix
8697!!          lev2_vert_offset=0
8698!!          DO row = 1, nblkrows_tot
8699!!
8700!!             CALL dbcsr_get_block_p(quench_t,&
8701!!                     row, col, block_p, found_row)
8702!!             IF (found_row) THEN
8703!!
8704!!                CALL dbcsr_get_block_p(matrix_S_vo,&
8705!!                        row, col, block_p, found)
8706!!                IF (found) THEN
8707!!                   ! copy the data into the vector, column by column
8708!!                   DO orb_i=1, mo_block_sizes(col)
8709!!                      Grad_vec(lev1_vert_offset+ao_domain_sizes(col)*(orb_i-1)+lev2_vert_offset+1:&
8710!!                               lev1_vert_offset+ao_domain_sizes(col)*(orb_i-1)+lev2_vert_offset+ao_block_sizes(row))&
8711!!                               =block_p(:,orb_i)
8712!!                   ENDDO
8713!!
8714!!                ENDIF
8715!!
8716!!                lev2_vert_offset=lev2_vert_offset+ao_block_sizes(row)
8717!!
8718!!             ENDIF
8719!!
8720!!          ENDDO
8721!!
8722!!          lev1_vert_offset=lev1_vert_offset+ao_domain_sizes(col)*mo_block_sizes(col)
8723!!
8724!!       ENDDO ! loop over electron blocks
8725!!       ! update H now
8726!!       DO ii=1,H_size
8727!!          DO jj=1,H_size
8728!!             H(ii,jj)=H(ii,jj)+penalty_occ_vol_prefactor*&
8729!!                      penalty_occ_vol_pf2*Grad_vec(ii)*Grad_vec(jj)
8730!!          ENDDO
8731!!       ENDDO
8732!!       DEALLOCATE(Grad_vec)
8733!!    ENDIF ! penalty_occ_vol
8734
8735!S-1.G ! invert S using cholesky
8736!S-1.G CALL dbcsr_create(m_prec_out,&
8737!S-1.G         template=m_s,&
8738!S-1.G         matrix_type=dbcsr_type_no_symmetry)
8739!S-1.G CALL dbcsr_copy(m_prec_out,m_s)
8740!S-1.G CALL dbcsr_cholesky_decompose(m_prec_out,&
8741!S-1.G         para_env=para_env,&
8742!S-1.G         blacs_env=blacs_env)
8743!S-1.G CALL dbcsr_cholesky_invert(m_prec_out,&
8744!S-1.G         para_env=para_env,&
8745!S-1.G         blacs_env=blacs_env,&
8746!S-1.G         upper_to_full=.TRUE.)
8747!S-1.G CALL dbcsr_multiply("N","N",1.0_dp,&
8748!S-1.G         m_prec_out,&
8749!S-1.G         matrix_grad,&
8750!S-1.G         0.0_dp,matrix_step,&
8751!S-1.G         filter_eps=1.0E-10_dp)
8752!S-1.G !CALL dbcsr_release(m_prec_out)
8753!S-1.G ALLOCATE(test3(H_size))
8754
8755      ! convert gradient from the dbcsr matrix to the vector form
8756      ALLOCATE (Grad_vec(H_size))
8757      Grad_vec(:) = 0.0_dp
8758      lev1_vert_offset = 0
8759      ! loop over all electron blocks
8760      DO col = 1, nblkcols_tot
8761
8762         ! loop over AO-rows of the dbcsr matrix
8763         lev2_vert_offset = 0
8764         DO row = 1, nblkrows_tot
8765
8766            CALL dbcsr_get_block_p(quench_t, &
8767                                   row, col, block_p, found_row)
8768            IF (found_row) THEN
8769
8770               CALL dbcsr_get_block_p(matrix_grad, &
8771                                      row, col, block_p, found)
8772               IF (found) THEN
8773                  ! copy the data into the vector, column by column
8774                  DO orb_i = 1, mo_block_sizes(col)
8775                     Grad_vec(lev1_vert_offset + ao_domain_sizes(col)*(orb_i - 1) + lev2_vert_offset + 1: &
8776                              lev1_vert_offset + ao_domain_sizes(col)*(orb_i - 1) + lev2_vert_offset + ao_block_sizes(row)) &
8777                        = block_p(:, orb_i)
8778!WRITE(*,*) "GRAD: ", row, col, orb_i, lev1_vert_offset+ao_domain_sizes(col)*(orb_i-1)+lev2_vert_offset+1, ao_block_sizes(row)
8779                  ENDDO
8780
8781               ENDIF
8782
8783!S-1.G CALL dbcsr_get_block_p(matrix_step,&
8784!S-1.G         row, col, block_p, found)
8785!S-1.G IF (found) THEN
8786!S-1.G    ! copy the data into the vector, column by column
8787!S-1.G    DO orb_i=1, mo_block_sizes(col)
8788!S-1.G       test3(lev1_vert_offset+ao_domain_sizes(col)*(orb_i-1)+lev2_vert_offset+1:&
8789!S-1.G                lev1_vert_offset+ao_domain_sizes(col)*(orb_i-1)+lev2_vert_offset+ao_block_sizes(row))&
8790!S-1.G                =block_p(:,orb_i)
8791!S-1.G    ENDDO
8792!S-1.G ENDIF
8793
8794               lev2_vert_offset = lev2_vert_offset + ao_block_sizes(row)
8795
8796            ENDIF
8797
8798         ENDDO
8799
8800         lev1_vert_offset = lev1_vert_offset + ao_domain_sizes(col)*mo_block_sizes(col)
8801
8802      ENDDO ! loop over electron blocks
8803
8804      !WRITE(*,*) "HESSIAN"
8805      !DO ii=1,H_size
8806      ! WRITE(*,*) ii
8807      ! WRITE(*,'(20F14.10)') H(ii,:)
8808      !ENDDO
8809
8810      ! invert the Hessian
8811      INFO = 0
8812      ALLOCATE (Hinv(H_size, H_size))
8813      Hinv(:, :) = H(:, :)
8814
8815      ! before inverting diagonalize
8816      ALLOCATE (eigenvalues(H_size))
8817      ! Query the optimal workspace for dsyev
8818      LWORK = -1
8819      ALLOCATE (WORK(MAX(1, LWORK)))
8820      CALL DSYEV('V', 'L', H_size, Hinv, H_size, eigenvalues, WORK, LWORK, INFO)
8821      LWORK = INT(WORK(1))
8822      DEALLOCATE (WORK)
8823      ! Allocate the workspace and solve the eigenproblem
8824      ALLOCATE (WORK(MAX(1, LWORK)))
8825      CALL DSYEV('V', 'L', H_size, Hinv, H_size, eigenvalues, WORK, LWORK, INFO)
8826      IF (INFO .NE. 0) THEN
8827         WRITE (unit_nr, *) 'DSYEV ERROR MESSAGE: ', INFO
8828         CPABORT("DSYEV failed")
8829      END IF
8830      DEALLOCATE (WORK)
8831
8832      ! compute grad vector in the basis of Hessian eigenvectors
8833      ALLOCATE (Step_vec(H_size))
8834      ! Step_vec contains Grad_vec here
8835      Step_vec(:) = MATMUL(TRANSPOSE(Hinv), Grad_vec)
8836
8837      ! compute U.tr(U)-1 = error
8838      !ALLOCATE(test(H_size,H_size))
8839      !test(:,:)=MATMUL(TRANSPOSE(Hinv),Hinv)
8840      !DO ii=1,H_size
8841      !   test(ii,ii)=test(ii,ii)-1.0_dp
8842      !ENDDO
8843      !test_error=0.0_dp
8844      !DO ii=1,H_size
8845      !   DO jj=1,H_size
8846      !      test_error=test_error+test(jj,ii)*test(jj,ii)
8847      !   ENDDO
8848      !ENDDO
8849      !WRITE(*,*) "U.tr(U)-1 error: ", SQRT(test_error)
8850      !DEALLOCATE(test)
8851
8852      ! invert eigenvalues and use eigenvectors to compute the Hessian inverse
8853      ! project out zero-eigenvalue directions
8854      ALLOCATE (test(H_size, H_size))
8855      zero_neg_eiv = 0
8856      DO jj = 1, H_size
8857         WRITE (unit_nr, "(I10,F20.10,F20.10)") jj, eigenvalues(jj), Step_vec(jj)
8858         IF (eigenvalues(jj) .GT. eps_zero) THEN
8859            test(jj, :) = Hinv(:, jj)/eigenvalues(jj)
8860         ELSE
8861            test(jj, :) = Hinv(:, jj)*0.0_dp
8862            zero_neg_eiv = zero_neg_eiv + 1
8863         ENDIF
8864      ENDDO
8865      WRITE (unit_nr, *) 'ZERO OR NEGATIVE EIGENVALUES: ', zero_neg_eiv
8866      DEALLOCATE (Step_vec)
8867
8868      ALLOCATE (test2(H_size, H_size))
8869      test2(:, :) = MATMUL(Hinv, test)
8870      Hinv(:, :) = test2(:, :)
8871      DEALLOCATE (test, test2)
8872
8873      !! shift to kill singularity
8874      !shift=0.0_dp
8875      !IF (eigenvalues(1).lt.0.0_dp) THEN
8876      !   CPABORT("Negative eigenvalue(s)")
8877      !   shift=abs(eigenvalues(1))
8878      !   WRITE(*,*) "Lowest eigenvalue: ", eigenvalues(1)
8879      !ENDIF
8880      !DO ii=1, H_size
8881      !   IF (eigenvalues(ii).gt.eps_zero) THEN
8882      !      shift=shift+min(1.0_dp,eigenvalues(ii))*1.0E-4_dp
8883      !      EXIT
8884      !   ENDIF
8885      !ENDDO
8886      !WRITE(*,*) "Hessian shift: ", shift
8887      !DO ii=1, H_size
8888      !   H(ii,ii)=H(ii,ii)+shift
8889      !ENDDO
8890      !! end shift
8891
8892      DEALLOCATE (eigenvalues)
8893
8894!!!!    Hinv=H
8895!!!!    INFO=0
8896!!!!    CALL DPOTRF('L', H_size, Hinv, H_size, INFO )
8897!!!!    IF( INFO.NE.0 ) THEN
8898!!!!       WRITE(*,*) 'DPOTRF ERROR MESSAGE: ', INFO
8899!!!!       CPABORT("DPOTRF failed")
8900!!!!    END IF
8901!!!!    CALL DPOTRI('L', H_size, Hinv, H_size, INFO )
8902!!!!    IF( INFO.NE.0 ) THEN
8903!!!!       WRITE(*,*) 'DPOTRI ERROR MESSAGE: ', INFO
8904!!!!       CPABORT("DPOTRI failed")
8905!!!!    END IF
8906!!!!    ! complete the matrix
8907!!!!    DO ii=1,H_size
8908!!!!       DO jj=ii+1,H_size
8909!!!!          Hinv(ii,jj)=Hinv(jj,ii)
8910!!!!       ENDDO
8911!!!!    ENDDO
8912
8913      ! compute the inversion error
8914      ALLOCATE (test(H_size, H_size))
8915      test(:, :) = MATMUL(Hinv, H)
8916      DO ii = 1, H_size
8917         test(ii, ii) = test(ii, ii) - 1.0_dp
8918      ENDDO
8919      test_error = 0.0_dp
8920      DO ii = 1, H_size
8921         DO jj = 1, H_size
8922            test_error = test_error + test(jj, ii)*test(jj, ii)
8923         ENDDO
8924      ENDDO
8925      WRITE (unit_nr, *) "Hessian inversion error: ", SQRT(test_error)
8926      DEALLOCATE (test)
8927
8928      ! prepare the output vector
8929      ALLOCATE (Step_vec(H_size))
8930      ALLOCATE (tmp(H_size))
8931      tmp(:) = MATMUL(Hinv, Grad_vec)
8932      !tmp(:)=MATMUL(Hinv,test3)
8933      Step_vec(:) = -1.0_dp*tmp(:)
8934
8935      ALLOCATE (tmpr(H_size))
8936      tmpr(:) = MATMUL(H, Step_vec)
8937      tmp(:) = tmpr(:) + Grad_vec(:)
8938      DEALLOCATE (tmpr)
8939      WRITE (unit_nr, *) "NEWTOV step error: ", MAXVAL(ABS(tmp))
8940
8941      DEALLOCATE (tmp)
8942
8943      DEALLOCATE (H)
8944      DEALLOCATE (Hinv)
8945      DEALLOCATE (Grad_vec)
8946
8947!S-1.G DEALLOCATE(test3)
8948
8949      ! copy the step from the vector into the dbcsr matrix
8950
8951      ! re-create the step matrix to remove all blocks
8952      CALL dbcsr_create(matrix_step, &
8953                        template=matrix_grad, &
8954                        matrix_type=dbcsr_type_no_symmetry)
8955      CALL dbcsr_work_create(matrix_step, work_mutable=.TRUE.)
8956
8957      lev1_vert_offset = 0
8958      ! loop over all electron blocks
8959      DO col = 1, nblkcols_tot
8960
8961         ! loop over AO-rows of the dbcsr matrix
8962         lev2_vert_offset = 0
8963         DO row = 1, nblkrows_tot
8964
8965            CALL dbcsr_get_block_p(quench_t, &
8966                                   row, col, block_p, found_row)
8967            IF (found_row) THEN
8968
8969               NULLIFY (p_new_block)
8970               CALL dbcsr_reserve_block2d(matrix_step, row, col, p_new_block)
8971               CPASSERT(ASSOCIATED(p_new_block))
8972               ! copy the data column by column
8973               DO orb_i = 1, mo_block_sizes(col)
8974                  p_new_block(:, orb_i) = &
8975                     Step_vec(lev1_vert_offset + ao_domain_sizes(col)*(orb_i - 1) + lev2_vert_offset + 1: &
8976                              lev1_vert_offset + ao_domain_sizes(col)*(orb_i - 1) + lev2_vert_offset + ao_block_sizes(row))
8977               ENDDO
8978
8979               lev2_vert_offset = lev2_vert_offset + ao_block_sizes(row)
8980
8981            ENDIF
8982
8983         ENDDO
8984
8985         lev1_vert_offset = lev1_vert_offset + ao_domain_sizes(col)*mo_block_sizes(col)
8986
8987      ENDDO ! loop over electron blocks
8988
8989      DEALLOCATE (Step_vec)
8990
8991      CALL dbcsr_finalize(matrix_step)
8992
8993!S-1.G CALL dbcsr_create(m_tmp_no_1,&
8994!S-1.G         template=matrix_step,&
8995!S-1.G         matrix_type=dbcsr_type_no_symmetry)
8996!S-1.G CALL dbcsr_multiply("N","N",1.0_dp,&
8997!S-1.G         m_prec_out,&
8998!S-1.G         matrix_step,&
8999!S-1.G         0.0_dp,m_tmp_no_1,&
9000!S-1.G         filter_eps=1.0E-10_dp,&
9001!S-1.G         )
9002!S-1.G CALL dbcsr_copy(matrix_step,m_tmp_no_1)
9003!S-1.G CALL dbcsr_release(m_tmp_no_1)
9004!S-1.G CALL dbcsr_release(m_prec_out)
9005
9006      DEALLOCATE (mo_block_sizes, ao_block_sizes)
9007      DEALLOCATE (ao_domain_sizes)
9008
9009      CALL dbcsr_create(matrix_S_ao_sym, &
9010                        template=quench_t, &
9011                        matrix_type=dbcsr_type_no_symmetry)
9012      CALL dbcsr_copy(matrix_S_ao_sym, quench_t)
9013      CALL dbcsr_multiply("N", "N", 1.0_dp, &
9014                          matrix_F_ao, &
9015                          matrix_step, &
9016                          0.0_dp, matrix_S_ao_sym, &
9017                          retain_sparsity=.TRUE.)
9018      CALL dbcsr_create(matrix_F_ao_sym, &
9019                        template=quench_t, &
9020                        matrix_type=dbcsr_type_no_symmetry)
9021      CALL dbcsr_copy(matrix_F_ao_sym, quench_t)
9022      CALL dbcsr_multiply("N", "N", 1.0_dp, &
9023                          matrix_S_ao, &
9024                          matrix_step, &
9025                          0.0_dp, matrix_F_ao_sym, &
9026                          retain_sparsity=.TRUE.)
9027      CALL dbcsr_add(matrix_S_ao_sym, matrix_F_ao_sym, &
9028                     1.0_dp, 1.0_dp)
9029      CALL dbcsr_scale(matrix_S_ao_sym, 2.0_dp*spin_factor)
9030      CALL dbcsr_add(matrix_S_ao_sym, matrix_grad, &
9031                     1.0_dp, 1.0_dp)
9032      CALL dbcsr_norm(matrix_S_ao_sym, dbcsr_norm_maxabsnorm, &
9033                      norm_scalar=test_error)
9034      WRITE (unit_nr, *) "NEWTOL step error: ", test_error
9035      CALL dbcsr_release(matrix_S_ao_sym)
9036      CALL dbcsr_release(matrix_F_ao_sym)
9037
9038      CALL timestop(handle)
9039
9040   END SUBROUTINE hessian_diag_apply
9041
9042! **************************************************************************************************
9043!> \brief Optimization of ALMOs using trust region minimizers
9044!> \param qs_env ...
9045!> \param almo_scf_env ...
9046!> \param optimizer   controls the optimization algorithm
9047!> \param quench_t ...
9048!> \param matrix_t_in ...
9049!> \param matrix_t_out ...
9050!> \param perturbation_only - perturbative (do not update Hamiltonian)
9051!> \param special_case   to reduce the overhead special cases are implemented:
9052!>                       xalmo_case_normal - no special case (i.e. xALMOs)
9053!>                       xalmo_case_block_diag
9054!>                       xalmo_case_fully_deloc
9055!> \par History
9056!>       2020.01 created [Rustam Z Khaliullin]
9057!> \author Rustam Z Khaliullin
9058! **************************************************************************************************
9059   SUBROUTINE almo_scf_xalmo_trustr(qs_env, almo_scf_env, optimizer, quench_t, &
9060                                    matrix_t_in, matrix_t_out, perturbation_only, &
9061                                    special_case)
9062
9063      TYPE(qs_environment_type), POINTER                 :: qs_env
9064      TYPE(almo_scf_env_type), INTENT(INOUT)             :: almo_scf_env
9065      TYPE(optimizer_options_type), INTENT(IN)           :: optimizer
9066      TYPE(dbcsr_type), ALLOCATABLE, DIMENSION(:)        :: quench_t, matrix_t_in, matrix_t_out
9067      LOGICAL, INTENT(IN)                                :: perturbation_only
9068      INTEGER, INTENT(IN), OPTIONAL                      :: special_case
9069
9070      CHARACTER(len=*), PARAMETER :: routineN = 'almo_scf_xalmo_trustr', &
9071         routineP = moduleN//':'//routineN
9072
9073      INTEGER :: handle, ispin, iteration, iteration_type_to_report, my_special_case, ndomains, &
9074         nspins, outer_iteration, prec_type, unit_nr
9075      INTEGER, ALLOCATABLE, DIMENSION(:)                 :: nocc
9076      LOGICAL :: assume_t0_q0x, border_reached, inner_loop_success, normalize_orbitals, &
9077         optimize_theta, penalty_occ_vol, reset_conjugator, same_position, scf_converged
9078      REAL(kind=dp) :: beta, energy_start, energy_trial, eta, expected_reduction, &
9079         fake_step_size_to_report, grad_norm_ratio, grad_norm_ref, loss_change_to_report, &
9080         loss_start, loss_trial, model_grad_norm, penalty_amplitude, penalty_start, penalty_trial, &
9081         radius_current, radius_max, real_temp, rho, spin_factor, step_norm, step_size, t1, &
9082         t1outer, t2, t2outer, y_scalar
9083      REAL(kind=dp), ALLOCATABLE, DIMENSION(:)           :: grad_norm_spin, &
9084                                                            penalty_occ_vol_g_prefactor, &
9085                                                            penalty_occ_vol_h_prefactor
9086      TYPE(cp_logger_type), POINTER                      :: logger
9087      TYPE(dbcsr_type)                                   :: m_s_inv
9088      TYPE(dbcsr_type), ALLOCATABLE, DIMENSION(:) :: FTsiginv, grad, m_model_Bd, m_model_d, &
9089         m_model_hessian, m_model_hessian_inv, m_model_r, m_model_r_prev, m_model_rt, &
9090         m_model_rt_prev, m_sig_sqrti_ii, m_theta, m_theta_trial, prev_step, siginvTFTsiginv, ST, &
9091         step, STsiginv_0
9092      TYPE(domain_submatrix_type), ALLOCATABLE, &
9093         DIMENSION(:, :)                                 :: domain_model_hessian_inv, domain_r_down
9094
9095      ! RZK-warning: number of temporary storage matrices can be reduced
9096      CALL timeset(routineN, handle)
9097
9098      t1outer = m_walltime()
9099
9100      my_special_case = xalmo_case_normal
9101      IF (PRESENT(special_case)) my_special_case = special_case
9102
9103      ! get a useful output_unit
9104      logger => cp_get_default_logger()
9105      IF (logger%para_env%ionode) THEN
9106         unit_nr = cp_logger_get_default_unit_nr(logger, local=.TRUE.)
9107      ELSE
9108         unit_nr = -1
9109      ENDIF
9110
9111      ! Trust radius code is written to obviate the need in projected orbitals
9112      assume_t0_q0x = .FALSE.
9113      ! Smoothing of the orbitals have not been implemented
9114      optimize_theta = .FALSE.
9115
9116      nspins = almo_scf_env%nspins
9117      IF (nspins == 1) THEN
9118         spin_factor = 2.0_dp
9119      ELSE
9120         spin_factor = 1.0_dp
9121      ENDIF
9122
9123      IF (unit_nr > 0) THEN
9124         WRITE (unit_nr, *)
9125         SELECT CASE (my_special_case)
9126         CASE (xalmo_case_block_diag)
9127            WRITE (unit_nr, '(T2,A,A,A)') REPEAT("-", 20), &
9128               " Optimization of block-diagonal ALMOs ", REPEAT("-", 21)
9129         CASE (xalmo_case_fully_deloc)
9130            WRITE (unit_nr, '(T2,A,A,A)') REPEAT("-", 20), &
9131               " Optimization of fully delocalized MOs ", REPEAT("-", 20)
9132         CASE (xalmo_case_normal)
9133            WRITE (unit_nr, '(T2,A,A,A)') REPEAT("-", 27), &
9134               " Optimization of XALMOs ", REPEAT("-", 28)
9135         END SELECT
9136         WRITE (unit_nr, *)
9137         CALL trust_r_report(unit_nr, &
9138                             iter_type=0, & ! print header, all values are ignored
9139                             iteration=0, &
9140                             radius=0.0_dp, &
9141                             loss=0.0_dp, &
9142                             delta_loss=0.0_dp, &
9143                             grad_norm=0.0_dp, &
9144                             predicted_reduction=0.0_dp, &
9145                             rho=0.0_dp, &
9146                             new=.TRUE., &
9147                             time=0.0_dp)
9148         WRITE (unit_nr, '(T2,A)') REPEAT("-", 79)
9149      ENDIF
9150
9151      ! penalty amplitude adjusts the strength of volume conservation
9152      penalty_occ_vol = .FALSE.
9153      !(almo_scf_env%penalty%occ_vol_method .NE. almo_occ_vol_penalty_none .AND. &
9154      !                   my_special_case .EQ. xalmo_case_fully_deloc)
9155      normalize_orbitals = penalty_occ_vol
9156      penalty_amplitude = 0.0_dp !almo_scf_env%penalty%occ_vol_coeff
9157      ALLOCATE (penalty_occ_vol_g_prefactor(nspins))
9158      ALLOCATE (penalty_occ_vol_h_prefactor(nspins))
9159      penalty_occ_vol_g_prefactor(:) = 0.0_dp
9160      penalty_occ_vol_h_prefactor(:) = 0.0_dp
9161
9162      ! here preconditioner is the Hessian of model function
9163      prec_type = optimizer%preconditioner
9164
9165      ALLOCATE (grad_norm_spin(nspins))
9166      ALLOCATE (nocc(nspins))
9167
9168      ! m_theta contains a set of variational parameters
9169      ! that define one-electron orbitals (simple, projected, etc.)
9170      ALLOCATE (m_theta(nspins))
9171      DO ispin = 1, nspins
9172         CALL dbcsr_create(m_theta(ispin), &
9173                           template=matrix_t_out(ispin), &
9174                           matrix_type=dbcsr_type_no_symmetry)
9175      ENDDO
9176
9177      ! create initial guess from the initial orbitals
9178      CALL xalmo_initial_guess(m_guess=m_theta, &
9179                               m_t_in=matrix_t_in, &
9180                               m_t0=almo_scf_env%matrix_t_blk, &
9181                               m_quench_t=quench_t, &
9182                               m_overlap=almo_scf_env%matrix_s(1), &
9183                               m_sigma_tmpl=almo_scf_env%matrix_sigma_inv, &
9184                               nspins=nspins, &
9185                               xalmo_history=almo_scf_env%xalmo_history, &
9186                               assume_t0_q0x=assume_t0_q0x, &
9187                               optimize_theta=optimize_theta, &
9188                               envelope_amplitude=almo_scf_env%envelope_amplitude, &
9189                               eps_filter=almo_scf_env%eps_filter, &
9190                               order_lanczos=almo_scf_env%order_lanczos, &
9191                               eps_lanczos=almo_scf_env%eps_lanczos, &
9192                               max_iter_lanczos=almo_scf_env%max_iter_lanczos, &
9193                               nocc_of_domain=almo_scf_env%nocc_of_domain)
9194
9195      ndomains = almo_scf_env%ndomains
9196      ALLOCATE (domain_r_down(ndomains, nspins))
9197      CALL init_submatrices(domain_r_down)
9198      ALLOCATE (domain_model_hessian_inv(ndomains, nspins))
9199      CALL init_submatrices(domain_model_hessian_inv)
9200
9201      ALLOCATE (m_model_hessian(nspins))
9202      ALLOCATE (m_model_hessian_inv(nspins))
9203      ALLOCATE (siginvTFTsiginv(nspins))
9204      ALLOCATE (STsiginv_0(nspins))
9205      ALLOCATE (FTsiginv(nspins))
9206      ALLOCATE (ST(nspins))
9207      ALLOCATE (grad(nspins))
9208      ALLOCATE (prev_step(nspins))
9209      ALLOCATE (step(nspins))
9210      ALLOCATE (m_sig_sqrti_ii(nspins))
9211      ALLOCATE (m_model_r(nspins))
9212      ALLOCATE (m_model_rt(nspins))
9213      ALLOCATE (m_model_d(nspins))
9214      ALLOCATE (m_model_Bd(nspins))
9215      ALLOCATE (m_model_r_prev(nspins))
9216      ALLOCATE (m_model_rt_prev(nspins))
9217      ALLOCATE (m_theta_trial(nspins))
9218
9219      DO ispin = 1, nspins
9220
9221         ! init temporary storage
9222         CALL dbcsr_create(m_model_hessian_inv(ispin), &
9223                           template=almo_scf_env%matrix_ks(ispin), &
9224                           matrix_type=dbcsr_type_no_symmetry)
9225         CALL dbcsr_create(m_model_hessian(ispin), &
9226                           template=almo_scf_env%matrix_ks(ispin), &
9227                           matrix_type=dbcsr_type_no_symmetry)
9228         CALL dbcsr_create(siginvTFTsiginv(ispin), &
9229                           template=almo_scf_env%matrix_sigma(ispin), &
9230                           matrix_type=dbcsr_type_no_symmetry)
9231         CALL dbcsr_create(STsiginv_0(ispin), &
9232                           template=matrix_t_out(ispin), &
9233                           matrix_type=dbcsr_type_no_symmetry)
9234         CALL dbcsr_create(FTsiginv(ispin), &
9235                           template=matrix_t_out(ispin), &
9236                           matrix_type=dbcsr_type_no_symmetry)
9237         CALL dbcsr_create(ST(ispin), &
9238                           template=matrix_t_out(ispin), &
9239                           matrix_type=dbcsr_type_no_symmetry)
9240         CALL dbcsr_create(grad(ispin), &
9241                           template=matrix_t_out(ispin), &
9242                           matrix_type=dbcsr_type_no_symmetry)
9243         CALL dbcsr_create(prev_step(ispin), &
9244                           template=matrix_t_out(ispin), &
9245                           matrix_type=dbcsr_type_no_symmetry)
9246         CALL dbcsr_create(step(ispin), &
9247                           template=matrix_t_out(ispin), &
9248                           matrix_type=dbcsr_type_no_symmetry)
9249         CALL dbcsr_create(m_sig_sqrti_ii(ispin), &
9250                           template=almo_scf_env%matrix_sigma_inv(ispin), &
9251                           matrix_type=dbcsr_type_no_symmetry)
9252         CALL dbcsr_create(m_model_r(ispin), &
9253                           template=matrix_t_out(ispin), &
9254                           matrix_type=dbcsr_type_no_symmetry)
9255         CALL dbcsr_create(m_model_rt(ispin), &
9256                           template=matrix_t_out(ispin), &
9257                           matrix_type=dbcsr_type_no_symmetry)
9258         CALL dbcsr_create(m_model_d(ispin), &
9259                           template=matrix_t_out(ispin), &
9260                           matrix_type=dbcsr_type_no_symmetry)
9261         CALL dbcsr_create(m_model_Bd(ispin), &
9262                           template=matrix_t_out(ispin), &
9263                           matrix_type=dbcsr_type_no_symmetry)
9264         CALL dbcsr_create(m_model_r_prev(ispin), &
9265                           template=matrix_t_out(ispin), &
9266                           matrix_type=dbcsr_type_no_symmetry)
9267         CALL dbcsr_create(m_model_rt_prev(ispin), &
9268                           template=matrix_t_out(ispin), &
9269                           matrix_type=dbcsr_type_no_symmetry)
9270         CALL dbcsr_create(m_theta_trial(ispin), &
9271                           template=matrix_t_out(ispin), &
9272                           matrix_type=dbcsr_type_no_symmetry)
9273
9274         CALL dbcsr_set(step(ispin), 0.0_dp)
9275         CALL dbcsr_set(prev_step(ispin), 0.0_dp)
9276
9277         CALL dbcsr_get_info(almo_scf_env%matrix_sigma_inv(ispin), &
9278                             nfullrows_total=nocc(ispin))
9279
9280         ! invert S domains if necessary
9281         ! Note: domains for alpha and beta electrons might be different
9282         ! that is why the inversion of the AO overlap is inside the spin loop
9283         IF (my_special_case .EQ. xalmo_case_normal) THEN
9284
9285            CALL construct_domain_s_inv( &
9286               matrix_s=almo_scf_env%matrix_s(1), &
9287               subm_s_inv=almo_scf_env%domain_s_inv(:, ispin), &
9288               dpattern=quench_t(ispin), &
9289               map=almo_scf_env%domain_map(ispin), &
9290               node_of_domain=almo_scf_env%cpu_of_domain)
9291
9292         ELSE IF (my_special_case .EQ. xalmo_case_block_diag) THEN
9293
9294            CALL dbcsr_create(m_s_inv, &
9295                              template=almo_scf_env%matrix_s(1), &
9296                              matrix_type=dbcsr_type_no_symmetry)
9297            CALL invert_Hotelling(m_s_inv, &
9298                                  almo_scf_env%matrix_s_blk(1), &
9299                                  threshold=almo_scf_env%eps_filter, &
9300                                  filter_eps=almo_scf_env%eps_filter)
9301
9302         ELSE IF (my_special_case .EQ. xalmo_case_fully_deloc) THEN
9303
9304            ! invert S using cholesky
9305            CALL dbcsr_create(m_s_inv, &
9306                              template=almo_scf_env%matrix_s(1), &
9307                              matrix_type=dbcsr_type_no_symmetry)
9308            CALL dbcsr_desymmetrize(almo_scf_env%matrix_s(1), m_s_inv)
9309            CALL cp_dbcsr_cholesky_decompose(m_s_inv, &
9310                                             para_env=almo_scf_env%para_env, &
9311                                             blacs_env=almo_scf_env%blacs_env)
9312            CALL cp_dbcsr_cholesky_invert(m_s_inv, &
9313                                          para_env=almo_scf_env%para_env, &
9314                                          blacs_env=almo_scf_env%blacs_env, &
9315                                          upper_to_full=.TRUE.)
9316            CALL dbcsr_filter(m_s_inv, eps=almo_scf_env%eps_filter)
9317
9318         ENDIF
9319
9320      ENDDO ! ispin
9321
9322      radius_max = optimizer%max_trust_radius
9323      radius_current = MIN(optimizer%initial_trust_radius, radius_max)
9324      ! eta must be between 0 and 0.25
9325      eta = MIN(MAX(optimizer%rho_do_not_update, 0.0_dp), 0.25_dp)
9326      energy_start = 0.0_dp
9327      energy_trial = 0.0_dp
9328      penalty_start = 0.0_dp
9329      penalty_trial = 0.0_dp
9330      loss_start = 0.0_dp ! sum of the energy and penalty
9331      loss_trial = 0.0_dp
9332
9333      same_position = .FALSE.
9334
9335      ! compute the energy
9336      CALL main_var_to_xalmos_and_loss_func( &
9337         almo_scf_env=almo_scf_env, &
9338         qs_env=qs_env, &
9339         m_main_var_in=m_theta, &
9340         m_t_out=matrix_t_out, &
9341         m_sig_sqrti_ii_out=m_sig_sqrti_ii, &
9342         energy_out=energy_start, &
9343         penalty_out=penalty_start, &
9344         m_FTsiginv_out=FTsiginv, &
9345         m_siginvTFTsiginv_out=siginvTFTsiginv, &
9346         m_ST_out=ST, &
9347         m_STsiginv0_in=STsiginv_0, &
9348         m_quench_t_in=quench_t, &
9349         domain_r_down_in=domain_r_down, &
9350         assume_t0_q0x=assume_t0_q0x, &
9351         just_started=.TRUE., &
9352         optimize_theta=optimize_theta, &
9353         normalize_orbitals=normalize_orbitals, &
9354         perturbation_only=perturbation_only, &
9355         do_penalty=penalty_occ_vol, &
9356         special_case=my_special_case)
9357      loss_start = energy_start + penalty_start
9358      IF (my_special_case .EQ. xalmo_case_block_diag) THEN
9359         almo_scf_env%almo_scf_energy = energy_start
9360      ENDIF
9361      DO ispin = 1, nspins
9362         IF (penalty_occ_vol) THEN
9363            penalty_occ_vol_g_prefactor(ispin) = &
9364               -2.0_dp*penalty_amplitude*spin_factor*nocc(ispin)
9365            penalty_occ_vol_h_prefactor(ispin) = 0.0_dp
9366         ENDIF
9367      ENDDO ! ispin
9368
9369      ! start the outer step-size-adjustment loop
9370      scf_converged = .FALSE.
9371      adjust_r_loop: DO outer_iteration = 1, optimizer%max_iter_outer_loop
9372
9373         ! start the inner fixed-radius loop
9374         border_reached = .FALSE.
9375
9376         DO ispin = 1, nspins
9377            CALL dbcsr_set(step(ispin), 0.0_dp)
9378            CALL dbcsr_filter(step(ispin), almo_scf_env%eps_filter)
9379         ENDDO
9380
9381         IF (.NOT. same_position) THEN
9382
9383            DO ispin = 1, nspins
9384
9385               IF (unit_nr > 0 .AND. debug_mode) WRITE (unit_nr, *) "...Compute model gradient"
9386               CALL compute_gradient( &
9387                  m_grad_out=grad(ispin), &
9388                  m_ks=almo_scf_env%matrix_ks(ispin), &
9389                  m_s=almo_scf_env%matrix_s(1), &
9390                  m_t=matrix_t_out(ispin), &
9391                  m_t0=almo_scf_env%matrix_t_blk(ispin), &
9392                  m_siginv=almo_scf_env%matrix_sigma_inv(ispin), &
9393                  m_quench_t=quench_t(ispin), &
9394                  m_FTsiginv=FTsiginv(ispin), &
9395                  m_siginvTFTsiginv=siginvTFTsiginv(ispin), &
9396                  m_ST=ST(ispin), &
9397                  m_STsiginv0=STsiginv_0(ispin), &
9398                  m_theta=m_theta(ispin), &
9399                  m_sig_sqrti_ii=m_sig_sqrti_ii(ispin), &
9400                  domain_s_inv=almo_scf_env%domain_s_inv(:, ispin), &
9401                  domain_r_down=domain_r_down(:, ispin), &
9402                  cpu_of_domain=almo_scf_env%cpu_of_domain, &
9403                  domain_map=almo_scf_env%domain_map(ispin), &
9404                  assume_t0_q0x=assume_t0_q0x, &
9405                  optimize_theta=optimize_theta, &
9406                  normalize_orbitals=normalize_orbitals, &
9407                  penalty_occ_vol=penalty_occ_vol, &
9408                  penalty_occ_vol_prefactor=penalty_occ_vol_g_prefactor(ispin), &
9409                  envelope_amplitude=almo_scf_env%envelope_amplitude, &
9410                  eps_filter=almo_scf_env%eps_filter, &
9411                  spin_factor=spin_factor, &
9412                  special_case=my_special_case)
9413
9414            ENDDO ! ispin
9415
9416         ENDIF ! skip_grad
9417
9418         ! check convergence and other exit criteria
9419         DO ispin = 1, nspins
9420            CALL dbcsr_norm(grad(ispin), dbcsr_norm_maxabsnorm, &
9421                            norm_scalar=grad_norm_spin(ispin))
9422            !grad_norm_frob = dbcsr_frobenius_norm(grad(ispin)) / &
9423            !                 dbcsr_frobenius_norm(quench_t(ispin))
9424         ENDDO ! ispin
9425         grad_norm_ref = MAXVAL(grad_norm_spin)
9426
9427         t2outer = m_walltime()
9428         CALL trust_r_report(unit_nr, &
9429                             iter_type=1, & ! only some data is important
9430                             iteration=outer_iteration, &
9431                             loss=loss_start, &
9432                             delta_loss=0.0_dp, &
9433                             grad_norm=grad_norm_ref, &
9434                             predicted_reduction=0.0_dp, &
9435                             rho=0.0_dp, &
9436                             radius=radius_current, &
9437                             new=.NOT. same_position, &
9438                             time=t2outer - t1outer)
9439         t1outer = m_walltime()
9440
9441         IF (grad_norm_ref .LE. optimizer%eps_error) THEN
9442            scf_converged = .TRUE.
9443            border_reached = .FALSE.
9444            expected_reduction = 0.0_dp
9445            IF (.NOT. (optimizer%early_stopping_on .AND. outer_iteration .EQ. 1)) &
9446               EXIT adjust_r_loop
9447         ELSE
9448            scf_converged = .FALSE.
9449         ENDIF
9450
9451         DO ispin = 1, nspins
9452
9453            CALL dbcsr_copy(m_model_r(ispin), grad(ispin))
9454            CALL dbcsr_scale(m_model_r(ispin), -1.0_dp)
9455
9456            IF (my_special_case .EQ. xalmo_case_block_diag .OR. &
9457                my_special_case .EQ. xalmo_case_fully_deloc) THEN
9458
9459               IF (unit_nr > 0 .AND. debug_mode) WRITE (unit_nr, *) "...Multiply Sinv.r"
9460               CALL dbcsr_multiply("N", "N", 1.0_dp, &
9461                                   m_s_inv, &
9462                                   m_model_r(ispin), &
9463                                   0.0_dp, m_model_rt(ispin), &
9464                                   filter_eps=almo_scf_env%eps_filter)
9465
9466            ELSE IF (my_special_case .EQ. xalmo_case_normal) THEN
9467
9468               IF (unit_nr > 0 .AND. debug_mode) WRITE (unit_nr, *) "...Multiply Sinv_xx.r"
9469               CALL apply_domain_operators( &
9470                  matrix_in=m_model_r(ispin), &
9471                  matrix_out=m_model_rt(ispin), &
9472                  operator1=almo_scf_env%domain_s_inv(:, ispin), &
9473                  dpattern=quench_t(ispin), &
9474                  map=almo_scf_env%domain_map(ispin), &
9475                  node_of_domain=almo_scf_env%cpu_of_domain, &
9476                  my_action=0, &
9477                  filter_eps=almo_scf_env%eps_filter)
9478
9479            ELSE
9480               CPABORT("Unknown XALMO special case")
9481            ENDIF
9482
9483            CALL dbcsr_copy(m_model_d(ispin), m_model_rt(ispin))
9484
9485         ENDDO ! ispin
9486
9487         ! compute model Hessian
9488         IF (.NOT. same_position) THEN
9489
9490            SELECT CASE (prec_type)
9491            CASE (xalmo_prec_domain)
9492
9493               IF (unit_nr > 0 .AND. debug_mode) WRITE (unit_nr, *) "...Compute model Hessian"
9494               DO ispin = 1, nspins
9495                  CALL compute_preconditioner( &
9496                     domain_prec_out=almo_scf_env%domain_preconditioner(:, ispin), &
9497                     m_prec_out=m_model_hessian(ispin), &
9498                     m_ks=almo_scf_env%matrix_ks(ispin), &
9499                     m_s=almo_scf_env%matrix_s(1), &
9500                     m_siginv=almo_scf_env%matrix_sigma_inv(ispin), &
9501                     m_quench_t=quench_t(ispin), &
9502                     m_FTsiginv=FTsiginv(ispin), &
9503                     m_siginvTFTsiginv=siginvTFTsiginv(ispin), &
9504                     m_ST=ST(ispin), &
9505                     para_env=almo_scf_env%para_env, &
9506                     blacs_env=almo_scf_env%blacs_env, &
9507                     nocc_of_domain=almo_scf_env%nocc_of_domain(:, ispin), &
9508                     domain_s_inv=almo_scf_env%domain_s_inv(:, ispin), &
9509                     domain_r_down=domain_r_down(:, ispin), &
9510                     cpu_of_domain=almo_scf_env%cpu_of_domain, &
9511                     domain_map=almo_scf_env%domain_map(ispin), &
9512                     assume_t0_q0x=.FALSE., &
9513                     penalty_occ_vol=penalty_occ_vol, &
9514                     penalty_occ_vol_prefactor=penalty_occ_vol_g_prefactor(ispin), &
9515                     eps_filter=almo_scf_env%eps_filter, &
9516                     neg_thr=0.5_dp, &
9517                     spin_factor=spin_factor, &
9518                     skip_inversion=.TRUE., &
9519                     special_case=my_special_case)
9520               ENDDO ! ispin
9521
9522            CASE DEFAULT
9523
9524               CPABORT("Unknown preconditioner")
9525
9526            END SELECT ! preconditioner type fork
9527
9528         ENDIF  ! not same position
9529
9530         ! print the header (argument values are ignored)
9531         CALL fixed_r_report(unit_nr, &
9532                             iter_type=0, &
9533                             iteration=0, &
9534                             step_size=0.0_dp, &
9535                             border_reached=.FALSE., &
9536                             curvature=0.0_dp, &
9537                             grad_norm_ratio=0.0_dp, &
9538                             time=0.0_dp)
9539
9540         IF (unit_nr > 0 .AND. debug_mode) WRITE (unit_nr, *) "...Start inner loop"
9541
9542         t1 = m_walltime()
9543         inner_loop_success = .FALSE.
9544         ! trustr_steihaug, trustr_cauchy, trustr_dogleg
9545         fixed_r_loop: DO iteration = 1, optimizer%max_iter
9546
9547            ! Step 2. Get curvature. If negative, step to the border
9548            y_scalar = 0.0_dp
9549            DO ispin = 1, nspins
9550
9551               ! Get B.d
9552               IF (my_special_case .EQ. xalmo_case_block_diag .OR. &
9553                   my_special_case .EQ. xalmo_case_fully_deloc) THEN
9554
9555                  CALL dbcsr_multiply("N", "N", 1.0_dp, &
9556                                      m_model_hessian(ispin), &
9557                                      m_model_d(ispin), &
9558                                      0.0_dp, m_model_Bd(ispin), &
9559                                      filter_eps=almo_scf_env%eps_filter)
9560
9561               ELSE
9562
9563                  CALL apply_domain_operators( &
9564                     matrix_in=m_model_d(ispin), &
9565                     matrix_out=m_model_Bd(ispin), &
9566                     operator1=almo_scf_env%domain_preconditioner(:, ispin), &
9567                     dpattern=quench_t(ispin), &
9568                     map=almo_scf_env%domain_map(ispin), &
9569                     node_of_domain=almo_scf_env%cpu_of_domain, &
9570                     my_action=0, &
9571                     filter_eps=almo_scf_env%eps_filter)
9572
9573               ENDIF ! special case
9574
9575               ! Get y=d^T.B.d
9576               CALL dbcsr_dot(m_model_d(ispin), m_model_Bd(ispin), real_temp)
9577               y_scalar = y_scalar + real_temp
9578
9579            ENDDO ! ispin
9580            IF (unit_nr > 0 .AND. debug_mode) WRITE (unit_nr, *) "...Curvature: ", y_scalar
9581
9582            ! step to the border
9583            IF (y_scalar .LT. 0.0_dp) THEN
9584
9585               CALL step_size_to_border( &
9586                  step_size_out=step_size, &
9587                  metric_in=almo_scf_env%matrix_s, &
9588                  position_in=step, &
9589                  direction_in=m_model_d, &
9590                  trust_radius_in=radius_current, &
9591                  quench_t_in=quench_t, &
9592                  eps_filter_in=almo_scf_env%eps_filter &
9593                  )
9594
9595               DO ispin = 1, nspins
9596                  CALL dbcsr_add(step(ispin), m_model_d(ispin), 1.0_dp, step_size)
9597               ENDDO
9598
9599               border_reached = .TRUE.
9600               inner_loop_success = .TRUE.
9601
9602               CALL predicted_reduction( &
9603                  reduction_out=expected_reduction, &
9604                  grad_in=grad, &
9605                  step_in=step, &
9606                  hess_in=m_model_hessian, &
9607                  hess_submatrix_in=almo_scf_env%domain_preconditioner, &
9608                  quench_t_in=quench_t, &
9609                  special_case=my_special_case, &
9610                  eps_filter=almo_scf_env%eps_filter, &
9611                  domain_map=almo_scf_env%domain_map, &
9612                  cpu_of_domain=almo_scf_env%cpu_of_domain &
9613                  )
9614
9615               t2 = m_walltime()
9616               CALL fixed_r_report(unit_nr, &
9617                                   iter_type=2, &
9618                                   iteration=iteration, &
9619                                   step_size=step_size, &
9620                                   border_reached=border_reached, &
9621                                   curvature=y_scalar, &
9622                                   grad_norm_ratio=expected_reduction, &
9623                                   time=t2 - t1)
9624
9625               EXIT fixed_r_loop ! the inner loop
9626
9627            ENDIF ! y is negative
9628
9629            ! Step 3. Compute the step size along the direction
9630            step_size = 0.0_dp
9631            DO ispin = 1, nspins
9632               CALL dbcsr_dot(m_model_r(ispin), m_model_rt(ispin), real_temp)
9633               step_size = step_size + real_temp
9634            ENDDO ! ispin
9635            step_size = step_size/y_scalar
9636            IF (unit_nr > 0 .AND. debug_mode) WRITE (unit_nr, *) "...Proposed step size: ", step_size
9637
9638            ! Update the step matrix
9639            DO ispin = 1, nspins
9640               CALL dbcsr_copy(prev_step(ispin), step(ispin))
9641               CALL dbcsr_add(step(ispin), m_model_d(ispin), 1.0_dp, step_size)
9642            ENDDO
9643
9644            ! Compute step norm
9645            CALL contravariant_matrix_norm( &
9646               norm_out=step_norm, &
9647               matrix_in=step, &
9648               metric_in=almo_scf_env%matrix_s, &
9649               quench_t_in=quench_t, &
9650               eps_filter_in=almo_scf_env%eps_filter &
9651               )
9652            IF (unit_nr > 0 .AND. debug_mode) WRITE (unit_nr, *) "...Step norm: ", step_norm
9653
9654            ! Do not step beyond the trust radius
9655            IF (step_norm .GT. radius_current) THEN
9656
9657               IF (unit_nr > 0 .AND. debug_mode) WRITE (unit_nr, *) "...Norm is too large"
9658               CALL step_size_to_border( &
9659                  step_size_out=step_size, &
9660                  metric_in=almo_scf_env%matrix_s, &
9661                  position_in=prev_step, &
9662                  direction_in=m_model_d, &
9663                  trust_radius_in=radius_current, &
9664                  quench_t_in=quench_t, &
9665                  eps_filter_in=almo_scf_env%eps_filter &
9666                  )
9667               IF (unit_nr > 0 .AND. debug_mode) WRITE (unit_nr, *) "...Step size to border: ", step_size
9668
9669               DO ispin = 1, nspins
9670                  CALL dbcsr_copy(step(ispin), prev_step(ispin))
9671                  CALL dbcsr_add(step(ispin), m_model_d(ispin), 1.0_dp, step_size)
9672               ENDDO
9673
9674               IF (debug_mode) THEN
9675                  ! Compute step norm
9676                  IF (unit_nr > 0) WRITE (unit_nr, *) "...Extra norm evaluation"
9677                  CALL contravariant_matrix_norm( &
9678                     norm_out=step_norm, &
9679                     matrix_in=step, &
9680                     metric_in=almo_scf_env%matrix_s, &
9681                     quench_t_in=quench_t, &
9682                     eps_filter_in=almo_scf_env%eps_filter &
9683                     )
9684                  IF (unit_nr > 0) WRITE (unit_nr, *) "...Step norm: ", step_norm
9685                  IF (unit_nr > 0) WRITE (unit_nr, *) "...Current radius: ", radius_current
9686               ENDIF
9687
9688               border_reached = .TRUE.
9689               inner_loop_success = .TRUE.
9690
9691               CALL predicted_reduction( &
9692                  reduction_out=expected_reduction, &
9693                  grad_in=grad, &
9694                  step_in=step, &
9695                  hess_in=m_model_hessian, &
9696                  hess_submatrix_in=almo_scf_env%domain_preconditioner, &
9697                  quench_t_in=quench_t, &
9698                  special_case=my_special_case, &
9699                  eps_filter=almo_scf_env%eps_filter, &
9700                  domain_map=almo_scf_env%domain_map, &
9701                  cpu_of_domain=almo_scf_env%cpu_of_domain &
9702                  )
9703
9704               t2 = m_walltime()
9705               CALL fixed_r_report(unit_nr, &
9706                                   iter_type=3, &
9707                                   iteration=iteration, &
9708                                   step_size=step_size, &
9709                                   border_reached=border_reached, &
9710                                   curvature=y_scalar, &
9711                                   grad_norm_ratio=expected_reduction, &
9712                                   time=t2 - t1)
9713
9714               EXIT fixed_r_loop ! the inner loop
9715
9716            ENDIF
9717
9718            IF (optimizer%trustr_algorithm .EQ. trustr_cauchy) THEN
9719               ! trustr_steihaug, trustr_cauchy, trustr_dogleg
9720
9721               border_reached = .FALSE.
9722               inner_loop_success = .TRUE.
9723
9724               CALL predicted_reduction( &
9725                  reduction_out=expected_reduction, &
9726                  grad_in=grad, &
9727                  step_in=step, &
9728                  hess_in=m_model_hessian, &
9729                  hess_submatrix_in=almo_scf_env%domain_preconditioner, &
9730                  quench_t_in=quench_t, &
9731                  special_case=my_special_case, &
9732                  eps_filter=almo_scf_env%eps_filter, &
9733                  domain_map=almo_scf_env%domain_map, &
9734                  cpu_of_domain=almo_scf_env%cpu_of_domain &
9735                  )
9736
9737               t2 = m_walltime()
9738               CALL fixed_r_report(unit_nr, &
9739                                   iter_type=5, & ! Cauchy point
9740                                   iteration=iteration, &
9741                                   step_size=step_size, &
9742                                   border_reached=border_reached, &
9743                                   curvature=y_scalar, &
9744                                   grad_norm_ratio=expected_reduction, &
9745                                   time=t2 - t1)
9746
9747               EXIT fixed_r_loop ! the inner loop
9748
9749            ELSE IF (optimizer%trustr_algorithm .EQ. trustr_dogleg) THEN
9750
9751               ! invert or pseudo-invert B
9752               SELECT CASE (prec_type)
9753               CASE (xalmo_prec_domain)
9754
9755                  IF (unit_nr > 0 .AND. debug_mode) WRITE (unit_nr, *) "...Pseudo-invert model Hessian"
9756                  IF (special_case .EQ. xalmo_case_block_diag) THEN ! non-overlapping diagonal blocks
9757
9758                     DO ispin = 1, nspins
9759                        CALL pseudo_invert_diagonal_blk( &
9760                           matrix_in=m_model_hessian(ispin), &
9761                           matrix_out=m_model_hessian_inv(ispin), &
9762                           nocc=almo_scf_env%nocc_of_domain(:, ispin) &
9763                           )
9764                     ENDDO
9765
9766                  ELSE IF (special_case .EQ. xalmo_case_fully_deloc) THEN ! the entire system is a block
9767
9768                     ! invert using cholesky decomposition
9769                     DO ispin = 1, nspins
9770                        CALL dbcsr_copy(m_model_hessian_inv(ispin), &
9771                                        m_model_hessian(ispin))
9772                        CALL cp_dbcsr_cholesky_decompose(m_model_hessian_inv(ispin), &
9773                                                         para_env=almo_scf_env%para_env, &
9774                                                         blacs_env=almo_scf_env%blacs_env)
9775                        CALL cp_dbcsr_cholesky_invert(m_model_hessian_inv(ispin), &
9776                                                      para_env=almo_scf_env%para_env, &
9777                                                      blacs_env=almo_scf_env%blacs_env, &
9778                                                      upper_to_full=.TRUE.)
9779                        CALL dbcsr_filter(m_model_hessian_inv(ispin), &
9780                                          eps=almo_scf_env%eps_filter)
9781                     ENDDO
9782
9783                  ELSE
9784
9785                     DO ispin = 1, nspins
9786                        CALL construct_domain_preconditioner( &
9787                           matrix_main=m_model_hessian(ispin), &
9788                           subm_s_inv=almo_scf_env%domain_s_inv(:, ispin), &
9789                           subm_r_down=domain_r_down(:, ispin), &
9790                           matrix_trimmer=quench_t(ispin), &
9791                           dpattern=quench_t(ispin), &
9792                           map=almo_scf_env%domain_map(ispin), &
9793                           node_of_domain=almo_scf_env%cpu_of_domain, &
9794                           preconditioner=domain_model_hessian_inv(:, ispin), &
9795                           use_trimmer=.FALSE., &
9796                           my_action=0, & ! do not do domain (1-r0) projection
9797                           skip_inversion=.FALSE. &
9798                           )
9799                     ENDDO
9800
9801                  ENDIF ! special_case
9802
9803                  ! slower but more reliable way to get inverted hessian
9804                  !DO ispin = 1, nspins
9805                  !   CALL compute_preconditioner( &
9806                  !      domain_prec_out=domain_model_hessian_inv(:, ispin), &
9807                  !      m_prec_out=m_model_hessian_inv(ispin), & ! RZK-warning: this one is not inverted if DOMAINs
9808                  !      m_ks=almo_scf_env%matrix_ks(ispin), &
9809                  !      m_s=almo_scf_env%matrix_s(1), &
9810                  !      m_siginv=almo_scf_env%matrix_sigma_inv(ispin), &
9811                  !      m_quench_t=quench_t(ispin), &
9812                  !      m_FTsiginv=FTsiginv(ispin), &
9813                  !      m_siginvTFTsiginv=siginvTFTsiginv(ispin), &
9814                  !      m_ST=ST(ispin), &
9815                  !      para_env=almo_scf_env%para_env, &
9816                  !      blacs_env=almo_scf_env%blacs_env, &
9817                  !      nocc_of_domain=almo_scf_env%nocc_of_domain(:, ispin), &
9818                  !      domain_s_inv=almo_scf_env%domain_s_inv(:, ispin), &
9819                  !      domain_r_down=domain_r_down(:, ispin), &
9820                  !      cpu_of_domain=almo_scf_env%cpu_of_domain, &
9821                  !      domain_map=almo_scf_env%domain_map(ispin), &
9822                  !      assume_t0_q0x=.FALSE., &
9823                  !      penalty_occ_vol=penalty_occ_vol, &
9824                  !      penalty_occ_vol_prefactor=penalty_occ_vol_g_prefactor(ispin), &
9825                  !      eps_filter=almo_scf_env%eps_filter, &
9826                  !      neg_thr=1.0E10_dp, &
9827                  !      spin_factor=spin_factor, &
9828                  !      skip_inversion=.FALSE., &
9829                  !      special_case=my_special_case)
9830                  !ENDDO ! ispin
9831
9832               CASE DEFAULT
9833
9834                  CPABORT("Unknown preconditioner")
9835
9836               END SELECT ! preconditioner type fork
9837
9838               ! get pB = Binv.m_model_r = -Binv.grad
9839               DO ispin = 1, nspins
9840
9841                  ! Get B.d
9842                  IF (my_special_case .EQ. xalmo_case_block_diag .OR. &
9843                      my_special_case .EQ. xalmo_case_fully_deloc) THEN
9844
9845                     CALL dbcsr_multiply("N", "N", 1.0_dp, &
9846                                         m_model_hessian_inv(ispin), &
9847                                         m_model_r(ispin), &
9848                                         0.0_dp, m_model_Bd(ispin), &
9849                                         filter_eps=almo_scf_env%eps_filter)
9850
9851                  ELSE
9852
9853                     CALL apply_domain_operators( &
9854                        matrix_in=m_model_r(ispin), &
9855                        matrix_out=m_model_Bd(ispin), &
9856                        operator1=domain_model_hessian_inv(:, ispin), &
9857                        dpattern=quench_t(ispin), &
9858                        map=almo_scf_env%domain_map(ispin), &
9859                        node_of_domain=almo_scf_env%cpu_of_domain, &
9860                        my_action=0, &
9861                        filter_eps=almo_scf_env%eps_filter)
9862
9863                  ENDIF ! special case
9864
9865               ENDDO ! ispin
9866
9867               ! Compute norm of pB
9868               CALL contravariant_matrix_norm( &
9869                  norm_out=step_norm, &
9870                  matrix_in=m_model_Bd, &
9871                  metric_in=almo_scf_env%matrix_s, &
9872                  quench_t_in=quench_t, &
9873                  eps_filter_in=almo_scf_env%eps_filter &
9874                  )
9875               IF (unit_nr > 0 .AND. debug_mode) WRITE (unit_nr, *) "...pB norm: ", step_norm
9876
9877               ! Do not step beyond the trust radius
9878               IF (step_norm .LE. radius_current) THEN
9879
9880                  IF (unit_nr > 0 .AND. debug_mode) WRITE (unit_nr, *) "...Full dogleg"
9881
9882                  border_reached = .FALSE.
9883
9884                  DO ispin = 1, nspins
9885                     CALL dbcsr_copy(step(ispin), m_model_Bd(ispin))
9886                  ENDDO
9887
9888                  fake_step_size_to_report = 2.0_dp
9889                  iteration_type_to_report = 6
9890
9891               ELSE ! take a shorter dogleg step
9892
9893                  IF (unit_nr > 0 .AND. debug_mode) WRITE (unit_nr, *) "...pB norm is too large"
9894
9895                  border_reached = .TRUE.
9896
9897                  ! compute the dogleg vector = pB - pU
9898                  ! this destroys -Binv.grad content
9899                  DO ispin = 1, nspins
9900                     CALL dbcsr_add(m_model_Bd(ispin), step(ispin), 1.0_dp, -1.0_dp)
9901                  ENDDO
9902
9903                  CALL step_size_to_border( &
9904                     step_size_out=step_size, &
9905                     metric_in=almo_scf_env%matrix_s, &
9906                     position_in=step, &
9907                     direction_in=m_model_Bd, &
9908                     trust_radius_in=radius_current, &
9909                     quench_t_in=quench_t, &
9910                     eps_filter_in=almo_scf_env%eps_filter &
9911                     )
9912                  IF (unit_nr > 0 .AND. debug_mode) WRITE (unit_nr, *) "...Step size to border: ", step_size
9913                  IF (step_size .GE. 1.0_dp .OR. step_size .LE. 0.0_dp) THEN
9914                     CPABORT("Wrong dog-leg step, should not be here")
9915                  ENDIF
9916
9917                  DO ispin = 1, nspins
9918                     CALL dbcsr_add(step(ispin), m_model_Bd(ispin), 1.0_dp, step_size)
9919                  ENDDO
9920
9921                  fake_step_size_to_report = 1.0_dp + step_size
9922                  iteration_type_to_report = 7
9923
9924               ENDIF ! full or partial dogleg?
9925
9926               IF (debug_mode) THEN
9927                  ! Compute step norm
9928                  IF (unit_nr > 0) WRITE (unit_nr, *) "...Extra norm evaluation"
9929                  CALL contravariant_matrix_norm( &
9930                     norm_out=step_norm, &
9931                     matrix_in=step, &
9932                     metric_in=almo_scf_env%matrix_s, &
9933                     quench_t_in=quench_t, &
9934                     eps_filter_in=almo_scf_env%eps_filter &
9935                     )
9936                  IF (unit_nr > 0) WRITE (unit_nr, *) "...Step norm: ", step_norm
9937                  IF (unit_nr > 0) WRITE (unit_nr, *) "...Current radius: ", radius_current
9938               ENDIF
9939
9940               CALL predicted_reduction( &
9941                  reduction_out=expected_reduction, &
9942                  grad_in=grad, &
9943                  step_in=step, &
9944                  hess_in=m_model_hessian, &
9945                  hess_submatrix_in=almo_scf_env%domain_preconditioner, &
9946                  quench_t_in=quench_t, &
9947                  special_case=my_special_case, &
9948                  eps_filter=almo_scf_env%eps_filter, &
9949                  domain_map=almo_scf_env%domain_map, &
9950                  cpu_of_domain=almo_scf_env%cpu_of_domain &
9951                  )
9952
9953               inner_loop_success = .TRUE.
9954
9955               t2 = m_walltime()
9956               CALL fixed_r_report(unit_nr, &
9957                                   iter_type=iteration_type_to_report, &
9958                                   iteration=iteration, &
9959                                   step_size=fake_step_size_to_report, &
9960                                   border_reached=border_reached, &
9961                                   curvature=y_scalar, &
9962                                   grad_norm_ratio=expected_reduction, &
9963                                   time=t2 - t1)
9964
9965               EXIT fixed_r_loop ! the inner loop
9966
9967            ENDIF ! Non-iterative subproblem methods exit here
9968
9969            ! Step 4: update model gradient
9970            DO ispin = 1, nspins
9971               ! save previous data
9972               CALL dbcsr_copy(m_model_r_prev(ispin), m_model_r(ispin))
9973               CALL dbcsr_add(m_model_r(ispin), m_model_Bd(ispin), &
9974                              1.0_dp, -step_size)
9975            ENDDO ! ispin
9976
9977            ! Model grad norm
9978            DO ispin = 1, nspins
9979               CALL dbcsr_norm(m_model_r(ispin), dbcsr_norm_maxabsnorm, &
9980                               norm_scalar=grad_norm_spin(ispin))
9981               !grad_norm_frob = dbcsr_frobenius_norm(grad(ispin)) / &
9982               !                 dbcsr_frobenius_norm(quench_t(ispin))
9983            ENDDO ! ispin
9984            model_grad_norm = MAXVAL(grad_norm_spin)
9985
9986            ! Check norm reduction
9987            grad_norm_ratio = model_grad_norm/grad_norm_ref
9988            IF (grad_norm_ratio .LT. optimizer%model_grad_norm_ratio) THEN
9989
9990               border_reached = .FALSE.
9991               inner_loop_success = .TRUE.
9992
9993               CALL predicted_reduction( &
9994                  reduction_out=expected_reduction, &
9995                  grad_in=grad, &
9996                  step_in=step, &
9997                  hess_in=m_model_hessian, &
9998                  hess_submatrix_in=almo_scf_env%domain_preconditioner, &
9999                  quench_t_in=quench_t, &
10000                  special_case=my_special_case, &
10001                  eps_filter=almo_scf_env%eps_filter, &
10002                  domain_map=almo_scf_env%domain_map, &
10003                  cpu_of_domain=almo_scf_env%cpu_of_domain &
10004                  )
10005
10006               t2 = m_walltime()
10007               CALL fixed_r_report(unit_nr, &
10008                                   iter_type=4, &
10009                                   iteration=iteration, &
10010                                   step_size=step_size, &
10011                                   border_reached=border_reached, &
10012                                   curvature=y_scalar, &
10013                                   grad_norm_ratio=expected_reduction, &
10014                                   time=t2 - t1)
10015
10016               EXIT fixed_r_loop ! the inner loop
10017
10018            ENDIF
10019
10020            ! Step 5: update model direction
10021            DO ispin = 1, nspins
10022               ! save previous data
10023               CALL dbcsr_copy(m_model_rt_prev(ispin), m_model_rt(ispin))
10024            ENDDO ! ispin
10025
10026            DO ispin = 1, nspins
10027
10028               IF (my_special_case .EQ. xalmo_case_block_diag .OR. &
10029                   my_special_case .EQ. xalmo_case_fully_deloc) THEN
10030
10031                  CALL dbcsr_multiply("N", "N", 1.0_dp, &
10032                                      m_s_inv, &
10033                                      m_model_r(ispin), &
10034                                      0.0_dp, m_model_rt(ispin), &
10035                                      filter_eps=almo_scf_env%eps_filter)
10036
10037               ELSE IF (my_special_case .EQ. xalmo_case_normal) THEN
10038
10039                  CALL apply_domain_operators( &
10040                     matrix_in=m_model_r(ispin), &
10041                     matrix_out=m_model_rt(ispin), &
10042                     operator1=almo_scf_env%domain_s_inv(:, ispin), &
10043                     dpattern=quench_t(ispin), &
10044                     map=almo_scf_env%domain_map(ispin), &
10045                     node_of_domain=almo_scf_env%cpu_of_domain, &
10046                     my_action=0, &
10047                     filter_eps=almo_scf_env%eps_filter)
10048
10049               ENDIF
10050
10051            ENDDO ! ispin
10052
10053            CALL compute_cg_beta( &
10054               beta=beta, &
10055               reset_conjugator=reset_conjugator, &
10056               conjugator=optimizer%conjugator, &
10057               grad=m_model_r(:), &
10058               prev_grad=m_model_r_prev(:), &
10059               step=m_model_rt(:), &
10060               prev_step=m_model_rt_prev(:) &
10061               )
10062
10063            DO ispin = 1, nspins
10064               ! update direction
10065               CALL dbcsr_add(m_model_d(ispin), m_model_rt(ispin), beta, 1.0_dp)
10066            ENDDO ! ispin
10067
10068            t2 = m_walltime()
10069            CALL fixed_r_report(unit_nr, &
10070                                iter_type=1, &
10071                                iteration=iteration, &
10072                                step_size=step_size, &
10073                                border_reached=border_reached, &
10074                                curvature=y_scalar, &
10075                                grad_norm_ratio=grad_norm_ratio, &
10076                                time=t2 - t1)
10077            t1 = m_walltime()
10078
10079         ENDDO fixed_r_loop
10080         !!!! done with the inner loop
10081         ! the inner loop must return: step, predicted reduction,
10082         ! whether it reached the border and completed successfully
10083
10084         IF (.NOT. inner_loop_success) THEN
10085            CPABORT("Inner loop did not produce solution")
10086         ENDIF
10087
10088         DO ispin = 1, nspins
10089
10090            CALL dbcsr_copy(m_theta_trial(ispin), m_theta(ispin))
10091            CALL dbcsr_add(m_theta_trial(ispin), step(ispin), 1.0_dp, 1.0_dp)
10092
10093         ENDDO ! ispin
10094
10095         ! compute the energy
10096         !IF (.NOT. same_position) THEN
10097         CALL main_var_to_xalmos_and_loss_func( &
10098            almo_scf_env=almo_scf_env, &
10099            qs_env=qs_env, &
10100            m_main_var_in=m_theta_trial, &
10101            m_t_out=matrix_t_out, &
10102            m_sig_sqrti_ii_out=m_sig_sqrti_ii, &
10103            energy_out=energy_trial, &
10104            penalty_out=penalty_trial, &
10105            m_FTsiginv_out=FTsiginv, &
10106            m_siginvTFTsiginv_out=siginvTFTsiginv, &
10107            m_ST_out=ST, &
10108            m_STsiginv0_in=STsiginv_0, &
10109            m_quench_t_in=quench_t, &
10110            domain_r_down_in=domain_r_down, &
10111            assume_t0_q0x=assume_t0_q0x, &
10112            just_started=.FALSE., &
10113            optimize_theta=optimize_theta, &
10114            normalize_orbitals=normalize_orbitals, &
10115            perturbation_only=perturbation_only, &
10116            do_penalty=penalty_occ_vol, &
10117            special_case=my_special_case)
10118         loss_trial = energy_trial + penalty_trial
10119         !ENDIF ! not same_position
10120
10121         rho = (loss_trial - loss_start)/expected_reduction
10122         loss_change_to_report = loss_trial - loss_start
10123
10124         IF (rho < 0.25_dp) THEN
10125            radius_current = 0.25_dp*radius_current
10126         ELSE
10127            IF (rho > 0.75_dp .AND. border_reached) THEN
10128               radius_current = MIN(2.0_dp*radius_current, radius_max)
10129            ENDIF
10130         ENDIF ! radius adjustment
10131
10132         IF (rho > eta) THEN
10133            DO ispin = 1, nspins
10134               CALL dbcsr_copy(m_theta(ispin), m_theta_trial(ispin))
10135            ENDDO ! ispin
10136            loss_start = loss_trial
10137            energy_start = energy_trial
10138            penalty_start = penalty_trial
10139            same_position = .FALSE.
10140            IF (my_special_case .EQ. xalmo_case_block_diag) THEN
10141               almo_scf_env%almo_scf_energy = energy_trial
10142            ENDIF
10143         ELSE
10144            same_position = .TRUE.
10145            IF (my_special_case .EQ. xalmo_case_block_diag) THEN
10146               almo_scf_env%almo_scf_energy = energy_start
10147            ENDIF
10148         ENDIF ! finalize step
10149
10150         t2outer = m_walltime()
10151         CALL trust_r_report(unit_nr, &
10152                             iter_type=2, &
10153                             iteration=outer_iteration, &
10154                             loss=loss_trial, &
10155                             delta_loss=loss_change_to_report, &
10156                             grad_norm=0.0_dp, &
10157                             predicted_reduction=expected_reduction, &
10158                             rho=rho, &
10159                             radius=radius_current, &
10160                             new=.NOT. same_position, &
10161                             time=t2outer - t1outer)
10162         t1outer = m_walltime()
10163
10164      ENDDO adjust_r_loop
10165
10166      ! post SCF-loop calculations
10167      IF (scf_converged) THEN
10168
10169         CALL wrap_up_xalmo_scf( &
10170            qs_env=qs_env, &
10171            almo_scf_env=almo_scf_env, &
10172            perturbation_in=perturbation_only, &
10173            m_xalmo_in=matrix_t_out, &
10174            m_quench_in=quench_t, &
10175            energy_inout=energy_start)
10176
10177      ENDIF ! if converged
10178
10179      DO ispin = 1, nspins
10180         CALL dbcsr_release(m_model_hessian_inv(ispin))
10181         CALL dbcsr_release(m_model_hessian(ispin))
10182         CALL dbcsr_release(STsiginv_0(ispin))
10183         CALL dbcsr_release(ST(ispin))
10184         CALL dbcsr_release(FTsiginv(ispin))
10185         CALL dbcsr_release(siginvTFTsiginv(ispin))
10186         CALL dbcsr_release(prev_step(ispin))
10187         CALL dbcsr_release(grad(ispin))
10188         CALL dbcsr_release(step(ispin))
10189         CALL dbcsr_release(m_theta(ispin))
10190         CALL dbcsr_release(m_sig_sqrti_ii(ispin))
10191         CALL dbcsr_release(m_model_r(ispin))
10192         CALL dbcsr_release(m_model_rt(ispin))
10193         CALL dbcsr_release(m_model_d(ispin))
10194         CALL dbcsr_release(m_model_Bd(ispin))
10195         CALL dbcsr_release(m_model_r_prev(ispin))
10196         CALL dbcsr_release(m_model_rt_prev(ispin))
10197         CALL dbcsr_release(m_theta_trial(ispin))
10198         CALL release_submatrices(domain_r_down(:, ispin))
10199         CALL release_submatrices(domain_model_hessian_inv(:, ispin))
10200      ENDDO ! ispin
10201
10202      DEALLOCATE (m_model_hessian)
10203      DEALLOCATE (m_model_hessian_inv)
10204      DEALLOCATE (siginvTFTsiginv)
10205      DEALLOCATE (STsiginv_0)
10206      DEALLOCATE (FTsiginv)
10207      DEALLOCATE (ST)
10208      DEALLOCATE (grad)
10209      DEALLOCATE (prev_step)
10210      DEALLOCATE (step)
10211      DEALLOCATE (m_sig_sqrti_ii)
10212      DEALLOCATE (m_model_r)
10213      DEALLOCATE (m_model_rt)
10214      DEALLOCATE (m_model_d)
10215      DEALLOCATE (m_model_Bd)
10216      DEALLOCATE (m_model_r_prev)
10217      DEALLOCATE (m_model_rt_prev)
10218      DEALLOCATE (m_theta_trial)
10219
10220      DEALLOCATE (domain_r_down)
10221      DEALLOCATE (domain_model_hessian_inv)
10222
10223      DEALLOCATE (penalty_occ_vol_g_prefactor)
10224      DEALLOCATE (penalty_occ_vol_h_prefactor)
10225      DEALLOCATE (grad_norm_spin)
10226      DEALLOCATE (nocc)
10227
10228      DEALLOCATE (m_theta)
10229
10230      IF (.NOT. scf_converged .AND. .NOT. optimizer%early_stopping_on) THEN
10231         CPABORT("Optimization not converged! ")
10232      ENDIF
10233
10234      CALL timestop(handle)
10235
10236   END SUBROUTINE almo_scf_xalmo_trustr
10237
10238! **************************************************************************************************
10239!> \brief Computes molecular orbitals and the objective (loss) function from the main variables
10240!>        Most important input and output variables are given as arguments explicitly.
10241!>        Some variables inside almo_scf_env (KS, DM) and qs_env are also updated but are not
10242!>        listed as arguments for brevity
10243!> \param almo_scf_env ...
10244!> \param qs_env ...
10245!> \param m_main_var_in ...
10246!> \param m_t_out ...
10247!> \param energy_out ...
10248!> \param penalty_out ...
10249!> \param m_sig_sqrti_ii_out ...
10250!> \param m_FTsiginv_out ...
10251!> \param m_siginvTFTsiginv_out ...
10252!> \param m_ST_out ...
10253!> \param m_STsiginv0_in ...
10254!> \param m_quench_t_in ...
10255!> \param domain_r_down_in ...
10256!> \param assume_t0_q0x ...
10257!> \param just_started ...
10258!> \param optimize_theta ...
10259!> \param normalize_orbitals ...
10260!> \param perturbation_only ...
10261!> \param do_penalty ...
10262!> \param special_case ...
10263!> \par History
10264!>       2019.12 created [Rustam Z Khaliullin]
10265!> \author Rustam Z Khaliullin
10266! **************************************************************************************************
10267   SUBROUTINE main_var_to_xalmos_and_loss_func(almo_scf_env, qs_env, m_main_var_in, &
10268                                               m_t_out, energy_out, penalty_out, m_sig_sqrti_ii_out, m_FTsiginv_out, &
10269                                               m_siginvTFTsiginv_out, m_ST_out, m_STsiginv0_in, m_quench_t_in, domain_r_down_in, &
10270                                               assume_t0_q0x, just_started, optimize_theta, normalize_orbitals, perturbation_only, &
10271                                               do_penalty, special_case)
10272
10273      TYPE(almo_scf_env_type), INTENT(INOUT)             :: almo_scf_env
10274      TYPE(qs_environment_type), POINTER                 :: qs_env
10275      TYPE(dbcsr_type), DIMENSION(:), INTENT(IN)         :: m_main_var_in
10276      TYPE(dbcsr_type), DIMENSION(:), INTENT(INOUT)      :: m_t_out
10277      REAL(KIND=dp), INTENT(OUT)                         :: energy_out, penalty_out
10278      TYPE(dbcsr_type), DIMENSION(:), INTENT(INOUT)      :: m_sig_sqrti_ii_out, m_FTsiginv_out, &
10279                                                            m_siginvTFTsiginv_out, m_ST_out
10280      TYPE(dbcsr_type), DIMENSION(:), INTENT(IN)         :: m_STsiginv0_in, m_quench_t_in
10281      TYPE(domain_submatrix_type), DIMENSION(:, :), &
10282         INTENT(IN)                                      :: domain_r_down_in
10283      LOGICAL, INTENT(IN)                                :: assume_t0_q0x, just_started, &
10284                                                            optimize_theta, normalize_orbitals, &
10285                                                            perturbation_only, do_penalty
10286      INTEGER, INTENT(IN)                                :: special_case
10287
10288      CHARACTER(len=*), PARAMETER :: routineN = 'main_var_to_xalmos_and_loss_func', &
10289         routineP = moduleN//':'//routineN
10290
10291      INTEGER                                            :: handle, ispin, nspins
10292      INTEGER, ALLOCATABLE, DIMENSION(:)                 :: nocc
10293      REAL(KIND=dp)                                      :: det1, energy_ispin, penalty_amplitude, &
10294                                                            spin_factor
10295
10296      CALL timeset(routineN, handle)
10297
10298      energy_out = 0.0_dp
10299      penalty_out = 0.0_dp
10300
10301      nspins = SIZE(m_main_var_in)
10302      IF (nspins == 1) THEN
10303         spin_factor = 2.0_dp
10304      ELSE
10305         spin_factor = 1.0_dp
10306      ENDIF
10307
10308      penalty_amplitude = 0.0_dp !almo_scf_env%penalty%occ_vol_coeff
10309
10310      ALLOCATE (nocc(nspins))
10311      DO ispin = 1, nspins
10312         CALL dbcsr_get_info(almo_scf_env%matrix_sigma_inv(ispin), &
10313                             nfullrows_total=nocc(ispin))
10314      ENDDO
10315
10316      DO ispin = 1, nspins
10317
10318         ! compute MO coefficients from the main variable
10319         CALL compute_xalmos_from_main_var( &
10320            m_var_in=m_main_var_in(ispin), &
10321            m_t_out=m_t_out(ispin), &
10322            m_quench_t=m_quench_t_in(ispin), &
10323            m_t0=almo_scf_env%matrix_t_blk(ispin), &
10324            m_oo_template=almo_scf_env%matrix_sigma_inv(ispin), &
10325            m_STsiginv0=m_STsiginv0_in(ispin), &
10326            m_s=almo_scf_env%matrix_s(1), &
10327            m_sig_sqrti_ii_out=m_sig_sqrti_ii_out(ispin), &
10328            domain_r_down=domain_r_down_in(:, ispin), &
10329            domain_s_inv=almo_scf_env%domain_s_inv(:, ispin), &
10330            domain_map=almo_scf_env%domain_map(ispin), &
10331            cpu_of_domain=almo_scf_env%cpu_of_domain, &
10332            assume_t0_q0x=assume_t0_q0x, &
10333            just_started=just_started, &
10334            optimize_theta=optimize_theta, &
10335            normalize_orbitals=normalize_orbitals, &
10336            envelope_amplitude=almo_scf_env%envelope_amplitude, &
10337            eps_filter=almo_scf_env%eps_filter, &
10338            special_case=special_case, &
10339            nocc_of_domain=almo_scf_env%nocc_of_domain(:, ispin), &
10340            order_lanczos=almo_scf_env%order_lanczos, &
10341            eps_lanczos=almo_scf_env%eps_lanczos, &
10342            max_iter_lanczos=almo_scf_env%max_iter_lanczos)
10343
10344         ! compute the global projectors (for the density matrix)
10345         CALL almo_scf_t_to_proj( &
10346            t=m_t_out(ispin), &
10347            p=almo_scf_env%matrix_p(ispin), &
10348            eps_filter=almo_scf_env%eps_filter, &
10349            orthog_orbs=.FALSE., &
10350            nocc_of_domain=almo_scf_env%nocc_of_domain(:, ispin), &
10351            s=almo_scf_env%matrix_s(1), &
10352            sigma=almo_scf_env%matrix_sigma(ispin), &
10353            sigma_inv=almo_scf_env%matrix_sigma_inv(ispin), &
10354            use_guess=.FALSE., &
10355            algorithm=almo_scf_env%sigma_inv_algorithm, &
10356            inv_eps_factor=almo_scf_env%matrix_iter_eps_error_factor, &
10357            inverse_accelerator=almo_scf_env%order_lanczos, &
10358            eps_lanczos=almo_scf_env%eps_lanczos, &
10359            max_iter_lanczos=almo_scf_env%max_iter_lanczos, &
10360            para_env=almo_scf_env%para_env, &
10361            blacs_env=almo_scf_env%blacs_env)
10362
10363         ! compute dm from the projector(s)
10364         CALL dbcsr_scale(almo_scf_env%matrix_p(ispin), &
10365                          spin_factor)
10366
10367      ENDDO ! ispin
10368
10369      ! update the KS matrix and energy if necessary
10370      IF (perturbation_only) THEN
10371         ! note: do not combine the two IF statements
10372         IF (just_started) THEN
10373            DO ispin = 1, nspins
10374               CALL dbcsr_copy(almo_scf_env%matrix_ks(ispin), &
10375                               almo_scf_env%matrix_ks_0deloc(ispin))
10376            ENDDO
10377         ENDIF
10378      ELSE
10379         ! the KS matrix is updated outside the spin loop
10380         CALL almo_dm_to_almo_ks(qs_env, &
10381                                 almo_scf_env%matrix_p, &
10382                                 almo_scf_env%matrix_ks, &
10383                                 energy_out, &
10384                                 almo_scf_env%eps_filter, &
10385                                 almo_scf_env%mat_distr_aos)
10386      ENDIF
10387
10388      penalty_out = 0.0_dp
10389      DO ispin = 1, nspins
10390
10391         CALL compute_frequently_used_matrices( &
10392            filter_eps=almo_scf_env%eps_filter, &
10393            m_T_in=m_t_out(ispin), &
10394            m_siginv_in=almo_scf_env%matrix_sigma_inv(ispin), &
10395            m_S_in=almo_scf_env%matrix_s(1), &
10396            m_F_in=almo_scf_env%matrix_ks(ispin), &
10397            m_FTsiginv_out=m_FTsiginv_out(ispin), &
10398            m_siginvTFTsiginv_out=m_siginvTFTsiginv_out(ispin), &
10399            m_ST_out=m_ST_out(ispin))
10400
10401         IF (perturbation_only) THEN
10402            ! calculate objective function Tr(F_0 R)
10403            IF (ispin .EQ. 1) energy_out = 0.0_dp
10404            CALL dbcsr_dot(m_t_out(ispin), m_FTsiginv_out(ispin), energy_ispin)
10405            energy_out = energy_out + energy_ispin*spin_factor
10406         ENDIF
10407
10408         IF (do_penalty) THEN
10409
10410            CALL determinant(almo_scf_env%matrix_sigma(ispin), det1, &
10411                             almo_scf_env%eps_filter)
10412            penalty_out = penalty_out - &
10413                          penalty_amplitude*spin_factor*nocc(ispin)*LOG(det1)
10414
10415         ENDIF
10416
10417      ENDDO ! ispin
10418
10419      DEALLOCATE (nocc)
10420
10421      CALL timestop(handle)
10422
10423   END SUBROUTINE main_var_to_xalmos_and_loss_func
10424
10425! **************************************************************************************************
10426!> \brief Computes the step size required to reach the trust-radius border,
10427!>        measured from the origin,
10428!>        given the current position (position) in the direction (direction)
10429!> \param step_size_out ...
10430!> \param metric_in ...
10431!> \param position_in ...
10432!> \param direction_in ...
10433!> \param trust_radius_in ...
10434!> \param quench_t_in ...
10435!> \param eps_filter_in ...
10436!> \par History
10437!>       2019.12 created [Rustam Z Khaliullin]
10438!> \author Rustam Z Khaliullin
10439! **************************************************************************************************
10440   SUBROUTINE step_size_to_border(step_size_out, metric_in, position_in, &
10441                                  direction_in, trust_radius_in, quench_t_in, eps_filter_in)
10442
10443      REAL(KIND=dp), INTENT(INOUT)                       :: step_size_out
10444      TYPE(dbcsr_type), DIMENSION(:), INTENT(IN)         :: metric_in, position_in, direction_in
10445      REAL(KIND=dp), INTENT(IN)                          :: trust_radius_in
10446      TYPE(dbcsr_type), DIMENSION(:), INTENT(IN)         :: quench_t_in
10447      REAL(KIND=dp), INTENT(IN)                          :: eps_filter_in
10448
10449      INTEGER                                            :: isol, ispin, nsolutions, &
10450                                                            nsolutions_found, nspins
10451      INTEGER, ALLOCATABLE, DIMENSION(:)                 :: nocc
10452      REAL(KIND=dp)                                      :: discrim_sign, discriminant, solution, &
10453                                                            spin_factor, temp_real
10454      REAL(KIND=dp), DIMENSION(3)                        :: coef
10455      TYPE(dbcsr_type), ALLOCATABLE, DIMENSION(:)        :: m_temp_no
10456
10457      step_size_out = 0.0_dp
10458
10459      nspins = SIZE(position_in)
10460      IF (nspins == 1) THEN
10461         spin_factor = 2.0_dp
10462      ELSE
10463         spin_factor = 1.0_dp
10464      ENDIF
10465
10466      ALLOCATE (nocc(nspins))
10467      ALLOCATE (m_temp_no(nspins))
10468
10469      coef(:) = 0.0_dp
10470      DO ispin = 1, nspins
10471
10472         CALL dbcsr_create(m_temp_no(ispin), &
10473                           template=direction_in(ispin))
10474
10475         CALL dbcsr_get_info(direction_in(ispin), &
10476                             nfullcols_total=nocc(ispin))
10477
10478         CALL dbcsr_copy(m_temp_no(ispin), quench_t_in(ispin))
10479         CALL dbcsr_multiply("N", "N", 1.0_dp, &
10480                             metric_in(1), &
10481                             position_in(ispin), &
10482                             0.0_dp, m_temp_no(ispin), &
10483                             retain_sparsity=.TRUE.)
10484         CALL dbcsr_filter(m_temp_no(ispin), eps_filter_in)
10485         CALL dbcsr_dot(position_in(ispin), m_temp_no(ispin), temp_real)
10486         coef(3) = coef(3) + temp_real/nocc(ispin)
10487         CALL dbcsr_dot(direction_in(ispin), m_temp_no(ispin), temp_real)
10488         coef(2) = coef(2) + 2.0_dp*temp_real/nocc(ispin)
10489         CALL dbcsr_copy(m_temp_no(ispin), quench_t_in(ispin))
10490         CALL dbcsr_multiply("N", "N", 1.0_dp, &
10491                             metric_in(1), &
10492                             direction_in(ispin), &
10493                             0.0_dp, m_temp_no(ispin), &
10494                             retain_sparsity=.TRUE.)
10495         CALL dbcsr_filter(m_temp_no(ispin), eps_filter_in)
10496         CALL dbcsr_dot(direction_in(ispin), m_temp_no(ispin), temp_real)
10497         coef(1) = coef(1) + temp_real/nocc(ispin)
10498
10499         CALL dbcsr_release(m_temp_no(ispin))
10500
10501      ENDDO !ispin
10502
10503      DEALLOCATE (nocc)
10504      DEALLOCATE (m_temp_no)
10505
10506      coef(:) = coef(:)*spin_factor
10507      coef(3) = coef(3) - trust_radius_in*trust_radius_in
10508
10509      ! solve the quadratic equation
10510      discriminant = coef(2)*coef(2) - 4.0_dp*coef(1)*coef(3)
10511      IF (discriminant .GT. TINY(discriminant)) THEN
10512         nsolutions = 2
10513      ELSE IF (discriminant .LT. 0.0_dp) THEN
10514         nsolutions = 0
10515         CPABORT("Step to border: no solutions")
10516      ELSE
10517         nsolutions = 1
10518      ENDIF
10519
10520      discrim_sign = 1.0_dp
10521      nsolutions_found = 0
10522      DO isol = 1, nsolutions
10523         solution = -coef(2) + discrim_sign*SQRT(discriminant)/(2.0_dp*coef(1))
10524         IF (solution .GT. 0.0_dp) THEN
10525            nsolutions_found = nsolutions_found + 1
10526            step_size_out = solution
10527         ENDIF
10528         discrim_sign = -discrim_sign
10529      ENDDO
10530
10531      IF (nsolutions_found == 0) THEN
10532         CPABORT("Step to border: no positive solutions")
10533      ELSE IF (nsolutions_found == 2) THEN
10534         CPABORT("Two positive border steps possible!")
10535      ENDIF
10536
10537   END SUBROUTINE step_size_to_border
10538
10539! **************************************************************************************************
10540!> \brief Computes a norm of a contravariant NBasis x Occ matrix using proper metric
10541!> \param norm_out ...
10542!> \param matrix_in ...
10543!> \param metric_in ...
10544!> \param quench_t_in ...
10545!> \param eps_filter_in ...
10546!> \par History
10547!>       2019.12 created [Rustam Z Khaliullin]
10548!> \author Rustam Z Khaliullin
10549! **************************************************************************************************
10550   SUBROUTINE contravariant_matrix_norm(norm_out, matrix_in, metric_in, &
10551                                        quench_t_in, eps_filter_in)
10552
10553      REAL(KIND=dp), INTENT(OUT)                         :: norm_out
10554      TYPE(dbcsr_type), DIMENSION(:), INTENT(IN)         :: matrix_in, metric_in, quench_t_in
10555      REAL(KIND=dp), INTENT(IN)                          :: eps_filter_in
10556
10557      INTEGER                                            :: ispin, nspins
10558      INTEGER, ALLOCATABLE, DIMENSION(:)                 :: nocc
10559      REAL(KIND=dp)                                      :: my_norm, spin_factor, temp_real
10560      TYPE(dbcsr_type), ALLOCATABLE, DIMENSION(:)        :: m_temp_no
10561
10562      ! Frist thing: assign the output value to avoid norms being undefined
10563      norm_out = 0.0_dp
10564
10565      nspins = SIZE(matrix_in)
10566      IF (nspins == 1) THEN
10567         spin_factor = 2.0_dp
10568      ELSE
10569         spin_factor = 1.0_dp
10570      ENDIF
10571
10572      ALLOCATE (nocc(nspins))
10573      ALLOCATE (m_temp_no(nspins))
10574
10575      my_norm = 0.0_dp
10576      DO ispin = 1, nspins
10577
10578         CALL dbcsr_create(m_temp_no(ispin), template=matrix_in(ispin))
10579
10580         CALL dbcsr_get_info(matrix_in(ispin), &
10581                             nfullcols_total=nocc(ispin))
10582
10583         CALL dbcsr_copy(m_temp_no(ispin), quench_t_in(ispin))
10584         CALL dbcsr_multiply("N", "N", 1.0_dp, &
10585                             metric_in(1), &
10586                             matrix_in(ispin), &
10587                             0.0_dp, m_temp_no(ispin), &
10588                             retain_sparsity=.TRUE.)
10589         CALL dbcsr_filter(m_temp_no(ispin), eps_filter_in)
10590         CALL dbcsr_dot(matrix_in(ispin), m_temp_no(ispin), temp_real)
10591
10592         my_norm = my_norm + temp_real/nocc(ispin)
10593
10594         CALL dbcsr_release(m_temp_no(ispin))
10595
10596      ENDDO !ispin
10597
10598      DEALLOCATE (nocc)
10599      DEALLOCATE (m_temp_no)
10600
10601      my_norm = my_norm*spin_factor
10602      norm_out = SQRT(my_norm)
10603
10604   END SUBROUTINE contravariant_matrix_norm
10605
10606! **************************************************************************************************
10607!> \brief Loss reduction for a given step is estimated using
10608!>        gradient and hessian
10609!> \param reduction_out ...
10610!> \param grad_in ...
10611!> \param step_in ...
10612!> \param hess_in ...
10613!> \param hess_submatrix_in ...
10614!> \param quench_t_in ...
10615!> \param special_case ...
10616!> \param eps_filter ...
10617!> \param domain_map ...
10618!> \param cpu_of_domain ...
10619!> \par History
10620!>       2019.12 created [Rustam Z Khaliullin]
10621!> \author Rustam Z Khaliullin
10622! **************************************************************************************************
10623   SUBROUTINE predicted_reduction(reduction_out, grad_in, step_in, hess_in, &
10624                                  hess_submatrix_in, quench_t_in, special_case, eps_filter, domain_map, &
10625                                  cpu_of_domain)
10626
10627      !RZK-noncritical: can be formulated without submatrices
10628      REAL(KIND=dp), INTENT(INOUT)                       :: reduction_out
10629      TYPE(dbcsr_type), DIMENSION(:), INTENT(IN)         :: grad_in, step_in, hess_in
10630      TYPE(domain_submatrix_type), DIMENSION(:, :), &
10631         INTENT(IN)                                      :: hess_submatrix_in
10632      TYPE(dbcsr_type), DIMENSION(:), INTENT(IN)         :: quench_t_in
10633      INTEGER, INTENT(IN)                                :: special_case
10634      REAL(KIND=dp), INTENT(IN)                          :: eps_filter
10635      TYPE(domain_map_type), DIMENSION(:), INTENT(IN)    :: domain_map
10636      INTEGER, DIMENSION(:), INTENT(IN)                  :: cpu_of_domain
10637
10638      INTEGER                                            :: ispin, nspins
10639      REAL(KIND=dp)                                      :: my_reduction, spin_factor, temp_real
10640      TYPE(dbcsr_type), ALLOCATABLE, DIMENSION(:)        :: m_temp_no
10641
10642      reduction_out = 0.0_dp
10643
10644      nspins = SIZE(grad_in)
10645      IF (nspins == 1) THEN
10646         spin_factor = 2.0_dp
10647      ELSE
10648         spin_factor = 1.0_dp
10649      ENDIF
10650
10651      ALLOCATE (m_temp_no(nspins))
10652
10653      my_reduction = 0.0_dp
10654      DO ispin = 1, nspins
10655
10656         CALL dbcsr_create(m_temp_no(ispin), template=grad_in(ispin))
10657
10658         CALL dbcsr_dot(step_in(ispin), grad_in(ispin), temp_real)
10659         my_reduction = my_reduction + temp_real
10660
10661         ! Get Hess.step
10662         IF (special_case .EQ. xalmo_case_block_diag .OR. &
10663             special_case .EQ. xalmo_case_fully_deloc) THEN
10664
10665            CALL dbcsr_multiply("N", "N", 1.0_dp, &
10666                                hess_in(ispin), &
10667                                step_in(ispin), &
10668                                0.0_dp, m_temp_no(ispin), &
10669                                filter_eps=eps_filter)
10670
10671         ELSE
10672
10673            CALL apply_domain_operators( &
10674               matrix_in=step_in(ispin), &
10675               matrix_out=m_temp_no(ispin), &
10676               operator1=hess_submatrix_in(:, ispin), &
10677               dpattern=quench_t_in(ispin), &
10678               map=domain_map(ispin), &
10679               node_of_domain=cpu_of_domain, &
10680               my_action=0, &
10681               filter_eps=eps_filter)
10682
10683         ENDIF ! special case
10684
10685         ! Get y=step^T.Hess.step
10686         CALL dbcsr_dot(step_in(ispin), m_temp_no(ispin), temp_real)
10687         my_reduction = my_reduction + 0.5_dp*temp_real
10688
10689         CALL dbcsr_release(m_temp_no(ispin))
10690
10691      ENDDO ! ispin
10692
10693      !RZK-critical: do we need to multiply by the spin factor?
10694      !my_reduction = spin_factor*my_reduction
10695
10696      reduction_out = my_reduction
10697
10698      DEALLOCATE (m_temp_no)
10699
10700   END SUBROUTINE predicted_reduction
10701
10702! **************************************************************************************************
10703!> \brief Prints key quantities from the fixed-radius minimizer
10704!> \param unit_nr ...
10705!> \param iter_type ...
10706!> \param iteration ...
10707!> \param step_size ...
10708!> \param border_reached ...
10709!> \param curvature ...
10710!> \param grad_norm_ratio ...
10711!> \param predicted_reduction ...
10712!> \param time ...
10713!> \par History
10714!>       2019.12 created [Rustam Z Khaliullin]
10715!> \author Rustam Z Khaliullin
10716! **************************************************************************************************
10717   SUBROUTINE fixed_r_report(unit_nr, iter_type, iteration, step_size, &
10718                             border_reached, curvature, grad_norm_ratio, predicted_reduction, time)
10719
10720      INTEGER, INTENT(IN)                                :: unit_nr, iter_type, iteration
10721      REAL(KIND=dp), INTENT(IN)                          :: step_size
10722      LOGICAL, INTENT(IN)                                :: border_reached
10723      REAL(KIND=dp), INTENT(IN)                          :: curvature
10724      REAL(KIND=dp), INTENT(IN), OPTIONAL                :: grad_norm_ratio, predicted_reduction
10725      REAL(KIND=dp), INTENT(IN)                          :: time
10726
10727      CHARACTER(LEN=20)                                  :: iter_type_str
10728      REAL(KIND=dp)                                      :: loss_or_grad_change
10729
10730      loss_or_grad_change = 0.0_dp
10731      IF (PRESENT(grad_norm_ratio)) THEN
10732         loss_or_grad_change = grad_norm_ratio
10733      ELSE IF (PRESENT(predicted_reduction)) THEN
10734         loss_or_grad_change = predicted_reduction
10735      ELSE
10736         CPABORT("one argument is missing")
10737      ENDIF
10738
10739      SELECT CASE (iter_type)
10740      CASE (0)
10741         iter_type_str = TRIM("Ignored")
10742      CASE (1)
10743         iter_type_str = TRIM("PCG")
10744      CASE (2)
10745         iter_type_str = TRIM("Neg. curvatr.")
10746      CASE (3)
10747         iter_type_str = TRIM("Step too long")
10748      CASE (4)
10749         iter_type_str = TRIM("Grad. reduced")
10750      CASE (5)
10751         iter_type_str = TRIM("Cauchy point")
10752      CASE (6)
10753         iter_type_str = TRIM("Full dogleg")
10754      CASE (7)
10755         iter_type_str = TRIM("Part. dogleg")
10756      CASE DEFAULT
10757         CPABORT("unknown report type")
10758      END SELECT
10759
10760      IF (unit_nr > 0) THEN
10761
10762         SELECT CASE (iter_type)
10763         CASE (0)
10764
10765            WRITE (unit_nr, *)
10766            WRITE (unit_nr, '(T4,A15,A6,A10,A10,A7,A20,A8)') &
10767               "Action", &
10768               "Iter", &
10769               "Curv", &
10770               "Step", &
10771               "Edge?", &
10772               "Grad/o.f. reduc", &
10773               "Time"
10774
10775         CASE DEFAULT
10776
10777            WRITE (unit_nr, '(T4,A15,I6,F10.5,F10.5,L7,F20.10,F8.2)') &
10778               iter_type_str, &
10779               iteration, &
10780               curvature, step_size, border_reached, &
10781               loss_or_grad_change, &
10782               time
10783
10784         END SELECT
10785
10786         ! epilogue
10787         SELECT CASE (iter_type)
10788         CASE (2, 3, 4, 5, 6, 7)
10789
10790            WRITE (unit_nr, *)
10791
10792         END SELECT
10793
10794      ENDIF
10795
10796   END SUBROUTINE fixed_r_report
10797
10798! **************************************************************************************************
10799!> \brief Prints key quantities from the loop that tunes trust radius
10800!> \param unit_nr ...
10801!> \param iter_type ...
10802!> \param iteration ...
10803!> \param radius ...
10804!> \param loss ...
10805!> \param delta_loss ...
10806!> \param grad_norm ...
10807!> \param predicted_reduction ...
10808!> \param rho ...
10809!> \param new ...
10810!> \param time ...
10811!> \par History
10812!>       2019.12 created [Rustam Z Khaliullin]
10813!> \author Rustam Z Khaliullin
10814! **************************************************************************************************
10815   SUBROUTINE trust_r_report(unit_nr, iter_type, iteration, radius, &
10816                             loss, delta_loss, grad_norm, predicted_reduction, rho, new, time)
10817
10818      INTEGER, INTENT(IN)                                :: unit_nr, iter_type, iteration
10819      REAL(KIND=dp), INTENT(IN)                          :: radius, loss, delta_loss, grad_norm, &
10820                                                            predicted_reduction, rho
10821      LOGICAL, INTENT(IN)                                :: new
10822      REAL(KIND=dp), INTENT(IN)                          :: time
10823
10824      CHARACTER(LEN=20)                                  :: iter_status, iter_type_str
10825
10826      SELECT CASE (iter_type)
10827      CASE (0) ! header
10828         iter_type_str = TRIM("Iter")
10829         iter_status = TRIM("Stat")
10830      CASE (1) ! first iteration, not all data is available yet
10831         iter_type_str = TRIM("TR INI")
10832         IF (new) THEN
10833            iter_status = "  New" ! new point
10834         ELSE
10835            iter_status = " Redo" ! restarted
10836         ENDIF
10837      CASE (2) ! typical
10838         iter_type_str = TRIM("TR FIN")
10839         IF (new) THEN
10840            iter_status = "  Acc" ! accepted
10841         ELSE
10842            iter_status = "  Rej" ! rejected
10843         ENDIF
10844      CASE DEFAULT
10845         CPABORT("unknown report type")
10846      END SELECT
10847
10848      IF (unit_nr > 0) THEN
10849
10850         SELECT CASE (iter_type)
10851         CASE (0)
10852
10853            WRITE (unit_nr, '(T2,A6,A5,A6,A22,A10,T67,A7,A6)') &
10854               "Method", &
10855               "Stat", &
10856               "Iter", &
10857               "Objective Function", &
10858               "Conver", &!"Model Change", "Rho", &
10859               "Radius", &
10860               "Time"
10861            WRITE (unit_nr, '(T41,A10,A10,A6)') &
10862               !"Method", &
10863               !"Iter", &
10864               !"Objective Function", &
10865               "Change", "Expct.", "Rho"
10866            !"Radius", &
10867            !"Time"
10868
10869         CASE (1)
10870
10871            WRITE (unit_nr, '(T2,A6,A5,I6,F22.10,ES10.2,T67,ES7.0,F6.1)') &
10872               iter_type_str, &
10873               iter_status, &
10874               iteration, &
10875               loss, &
10876               grad_norm, & ! distinct
10877               radius, &
10878               time
10879
10880         CASE (2)
10881
10882            WRITE (unit_nr, '(T2,A6,A5,I6,F22.10,ES10.2,ES10.2,F6.1,ES7.0,F6.1)') &
10883               iter_type_str, &
10884               iter_status, &
10885               iteration, &
10886               loss, &
10887               delta_loss, predicted_reduction, rho, & ! distinct
10888               radius, &
10889               time
10890
10891         END SELECT
10892      ENDIF
10893
10894   END SUBROUTINE trust_r_report
10895
10896! **************************************************************************************************
10897!> \brief ...
10898!> \param unit_nr ...
10899!> \param ref_energy ...
10900!> \param energy_lowering ...
10901! **************************************************************************************************
10902   SUBROUTINE energy_lowering_report(unit_nr, ref_energy, energy_lowering)
10903
10904      INTEGER, INTENT(IN)                                :: unit_nr
10905      REAL(KIND=dp), INTENT(IN)                          :: ref_energy, energy_lowering
10906
10907      ! print out the energy lowering
10908      IF (unit_nr > 0) THEN
10909         WRITE (unit_nr, *)
10910         WRITE (unit_nr, '(T2,A35,F25.10)') "ENERGY OF BLOCK-DIAGONAL ALMOs:", &
10911            ref_energy
10912         WRITE (unit_nr, '(T2,A35,F25.10)') "ENERGY LOWERING:", &
10913            energy_lowering
10914         WRITE (unit_nr, '(T2,A35,F25.10)') "CORRECTED ENERGY:", &
10915            ref_energy + energy_lowering
10916         WRITE (unit_nr, *)
10917      ENDIF
10918
10919   END SUBROUTINE energy_lowering_report
10920
10921   ! post SCF-loop calculations
10922! **************************************************************************************************
10923!> \brief ...
10924!> \param qs_env ...
10925!> \param almo_scf_env ...
10926!> \param perturbation_in ...
10927!> \param m_xalmo_in ...
10928!> \param m_quench_in ...
10929!> \param energy_inout ...
10930! **************************************************************************************************
10931   SUBROUTINE wrap_up_xalmo_scf(qs_env, almo_scf_env, perturbation_in, &
10932                                m_xalmo_in, m_quench_in, energy_inout)
10933
10934      TYPE(qs_environment_type), POINTER                 :: qs_env
10935      TYPE(almo_scf_env_type), INTENT(INOUT)             :: almo_scf_env
10936      LOGICAL, INTENT(IN)                                :: perturbation_in
10937      TYPE(dbcsr_type), DIMENSION(:), INTENT(IN)         :: m_xalmo_in, m_quench_in
10938      REAL(KIND=dp), INTENT(INOUT)                       :: energy_inout
10939
10940      CHARACTER(len=*), PARAMETER :: routineN = 'wrap_up_xalmo_scf', &
10941         routineP = moduleN//':'//routineN
10942
10943      INTEGER                                            :: eda_unit, handle, ispin, nspins, unit_nr
10944      TYPE(cp_logger_type), POINTER                      :: logger
10945      TYPE(dbcsr_type), ALLOCATABLE, DIMENSION(:)        :: m_temp_no1, m_temp_no2
10946      TYPE(section_vals_type), POINTER                   :: almo_print_section, input
10947
10948      CALL timeset(routineN, handle)
10949
10950      ! get a useful output_unit
10951      logger => cp_get_default_logger()
10952      IF (logger%para_env%ionode) THEN
10953         unit_nr = cp_logger_get_default_unit_nr(logger, local=.TRUE.)
10954      ELSE
10955         unit_nr = -1
10956      ENDIF
10957
10958      nspins = almo_scf_env%nspins
10959
10960      ! RZK-warning: must obtain MO coefficients from final theta
10961
10962      IF (perturbation_in) THEN
10963
10964         ALLOCATE (m_temp_no1(nspins))
10965         ALLOCATE (m_temp_no2(nspins))
10966
10967         DO ispin = 1, nspins
10968            CALL dbcsr_create(m_temp_no1(ispin), template=m_xalmo_in(ispin))
10969            CALL dbcsr_create(m_temp_no2(ispin), template=m_xalmo_in(ispin))
10970         ENDDO
10971
10972         ! return perturbed density to qs_env
10973         CALL almo_dm_to_qs_env(qs_env, almo_scf_env%matrix_p, &
10974                                almo_scf_env%mat_distr_aos)
10975
10976         ! compute energy correction and perform
10977         ! detailed decomposition analysis (if requested)
10978         ! reuse step and grad matrices to store decomposition results
10979         CALL xalmo_analysis( &
10980            detailed_analysis=almo_scf_env%almo_analysis%do_analysis, &
10981            eps_filter=almo_scf_env%eps_filter, &
10982            m_T_in=m_xalmo_in, &
10983            m_T0_in=almo_scf_env%matrix_t_blk, &
10984            m_siginv_in=almo_scf_env%matrix_sigma_inv, &
10985            m_siginv0_in=almo_scf_env%matrix_sigma_inv_0deloc, &
10986            m_S_in=almo_scf_env%matrix_s, &
10987            m_KS0_in=almo_scf_env%matrix_ks_0deloc, &
10988            m_quench_t_in=m_quench_in, &
10989            energy_out=energy_inout, & ! get energy loewring
10990            m_eda_out=m_temp_no1, &
10991            m_cta_out=m_temp_no2 &
10992            )
10993
10994         IF (almo_scf_env%almo_analysis%do_analysis) THEN
10995
10996            DO ispin = 1, nspins
10997
10998               ! energy decomposition analysis (EDA)
10999               IF (unit_nr > 0) THEN
11000                  WRITE (unit_nr, '(T2,A)') "DECOMPOSITION OF THE DELOCALIZATION ENERGY"
11001               ENDIF
11002
11003               ! open the output file, print and close
11004               CALL get_qs_env(qs_env, input=input)
11005               almo_print_section => section_vals_get_subs_vals(input, "DFT%ALMO_SCF%ANALYSIS%PRINT")
11006               eda_unit = cp_print_key_unit_nr(logger, almo_print_section, &
11007                                               "ALMO_EDA_CT", extension=".dat", local=.TRUE.)
11008               CALL dbcsr_print_block_sum(m_temp_no1(ispin), eda_unit)
11009               CALL cp_print_key_finished_output(eda_unit, logger, almo_print_section, &
11010                                                 "ALMO_EDA_CT", local=.TRUE.)
11011
11012               ! charge transfer analysis (CTA)
11013               IF (unit_nr > 0) THEN
11014                  WRITE (unit_nr, '(T2,A)') "DECOMPOSITION OF CHARGE TRANSFER TERMS"
11015               ENDIF
11016
11017               eda_unit = cp_print_key_unit_nr(logger, almo_print_section, &
11018                                               "ALMO_CTA", extension=".dat", local=.TRUE.)
11019               CALL dbcsr_print_block_sum(m_temp_no2(ispin), eda_unit)
11020               CALL cp_print_key_finished_output(eda_unit, logger, almo_print_section, &
11021                                                 "ALMO_CTA", local=.TRUE.)
11022
11023            ENDDO ! ispin
11024
11025         ENDIF ! do ALMO EDA/CTA
11026
11027         CALL energy_lowering_report( &
11028            unit_nr=unit_nr, &
11029            ref_energy=almo_scf_env%almo_scf_energy, &
11030            energy_lowering=energy_inout)
11031         CALL almo_scf_update_ks_energy(qs_env, &
11032                                        energy=almo_scf_env%almo_scf_energy, &
11033                                        energy_singles_corr=energy_inout)
11034
11035         DO ispin = 1, nspins
11036            CALL dbcsr_release(m_temp_no1(ispin))
11037            CALL dbcsr_release(m_temp_no2(ispin))
11038         ENDDO
11039
11040         DEALLOCATE (m_temp_no1)
11041         DEALLOCATE (m_temp_no2)
11042
11043      ELSE ! non-perturbative
11044
11045         CALL almo_scf_update_ks_energy(qs_env, &
11046                                        energy=energy_inout)
11047
11048      ENDIF ! if perturbation only
11049
11050      CALL timestop(handle)
11051
11052   END SUBROUTINE wrap_up_xalmo_scf
11053
11054END MODULE almo_scf_optimizer
11055
11056