1!--------------------------------------------------------------------------------------------------!
2!   CP2K: A general program to perform molecular dynamics simulations                              !
3!   Copyright (C) 2000 - 2019  CP2K developers group                                               !
4!--------------------------------------------------------------------------------------------------!
5
6! **************************************************************************************************
7!> \brief module that contains the algorithms to perform an itrative
8!>         diagonalization by the block-Lanczos approach
9!> \par History
10!>      05.2009 created [MI]
11!> \author fawzi
12! **************************************************************************************************
13MODULE qs_scf_lanczos
14
15   USE cp_fm_basic_linalg,              ONLY: cp_fm_column_scale,&
16                                              cp_fm_qr_factorization,&
17                                              cp_fm_scale_and_add,&
18                                              cp_fm_transpose,&
19                                              cp_fm_triangular_multiply
20   USE cp_fm_cholesky,                  ONLY: cp_fm_cholesky_decompose
21   USE cp_fm_diag,                      ONLY: choose_eigv_solver
22   USE cp_fm_struct,                    ONLY: cp_fm_struct_create,&
23                                              cp_fm_struct_release,&
24                                              cp_fm_struct_type
25   USE cp_fm_types,                     ONLY: &
26        cp_fm_create, cp_fm_get_submatrix, cp_fm_p_type, cp_fm_release, cp_fm_set_all, &
27        cp_fm_set_submatrix, cp_fm_to_fm, cp_fm_type, cp_fm_vectorsnorm
28   USE cp_gemm_interface,               ONLY: cp_gemm
29   USE cp_log_handling,                 ONLY: cp_to_string
30   USE kinds,                           ONLY: dp
31   USE message_passing,                 ONLY: mp_sum
32   USE qs_mo_types,                     ONLY: get_mo_set,&
33                                              mo_set_p_type
34   USE qs_scf_types,                    ONLY: krylov_space_type
35   USE scf_control_types,               ONLY: scf_control_type
36#include "./base/base_uses.f90"
37
38   IMPLICIT NONE
39   PRIVATE
40   CHARACTER(len=*), PARAMETER, PRIVATE :: moduleN = 'qs_scf_lanczos'
41
42   PUBLIC :: krylov_space_allocate, lanczos_refinement, lanczos_refinement_2v
43
44CONTAINS
45
46! **************************************************************************************************
47
48! **************************************************************************************************
49!> \brief  allocates matrices and vectros used in the construction of
50!>        the krylov space and for the lanczos refinement
51!> \param krylov_space ...
52!> \param scf_control ...
53!> \param mos ...
54!> \param
55!> \par History
56!>      05.2009 created [MI]
57! **************************************************************************************************
58
59   SUBROUTINE krylov_space_allocate(krylov_space, scf_control, mos)
60
61      TYPE(krylov_space_type), POINTER                   :: krylov_space
62      TYPE(scf_control_type), POINTER                    :: scf_control
63      TYPE(mo_set_p_type), DIMENSION(:), POINTER         :: mos
64
65      CHARACTER(LEN=*), PARAMETER :: routineN = 'krylov_space_allocate', &
66         routineP = moduleN//':'//routineN
67
68      INTEGER                                            :: handle, ik, ispin, max_nmo, nao, nblock, &
69                                                            ndim, nk, nmo, nspin
70      TYPE(cp_fm_struct_type), POINTER                   :: fm_struct_tmp
71      TYPE(cp_fm_type), POINTER                          :: mo_coeff
72
73      CALL timeset(routineN, handle)
74
75      CPASSERT(ASSOCIATED(krylov_space))
76
77      IF (.NOT. ASSOCIATED(krylov_space%mo_conv)) THEN
78         NULLIFY (fm_struct_tmp, mo_coeff)
79
80         krylov_space%nkrylov = scf_control%diagonalization%nkrylov
81         krylov_space%nblock = scf_control%diagonalization%nblock_krylov
82
83         nk = krylov_space%nkrylov
84         nblock = krylov_space%nblock
85         nspin = SIZE(mos, 1)
86
87         ALLOCATE (krylov_space%mo_conv(nspin))
88         ALLOCATE (krylov_space%mo_refine(nspin))
89         ALLOCATE (krylov_space%chc_mat(nspin))
90         ALLOCATE (krylov_space%c_vec(nspin))
91         max_nmo = 0
92         DO ispin = 1, nspin
93            CALL get_mo_set(mos(ispin)%mo_set, mo_coeff=mo_coeff, nao=nao, nmo=nmo)
94            CALL cp_fm_create(krylov_space%mo_conv(ispin)%matrix, mo_coeff%matrix_struct)
95            CALL cp_fm_create(krylov_space%mo_refine(ispin)%matrix, mo_coeff%matrix_struct)
96            NULLIFY (fm_struct_tmp)
97            CALL cp_fm_struct_create(fm_struct_tmp, nrow_global=nmo, ncol_global=nmo, &
98                                     para_env=mo_coeff%matrix_struct%para_env, &
99                                     context=mo_coeff%matrix_struct%context)
100            CALL cp_fm_create(krylov_space%chc_mat(ispin)%matrix, fm_struct_tmp, "chc")
101            CALL cp_fm_create(krylov_space%c_vec(ispin)%matrix, fm_struct_tmp, "vec")
102            CALL cp_fm_struct_release(fm_struct_tmp)
103            max_nmo = MAX(max_nmo, nmo)
104         END DO
105
106         !the use of max_nmo might not be ok, in this case allocate nspin matrices
107         ALLOCATE (krylov_space%c_eval(max_nmo))
108
109         ALLOCATE (krylov_space%v_mat(nk))
110
111         NULLIFY (fm_struct_tmp)
112         CALL cp_fm_struct_create(fm_struct_tmp, nrow_global=nao, ncol_global=nblock, &
113                                  para_env=mo_coeff%matrix_struct%para_env, &
114                                  context=mo_coeff%matrix_struct%context)
115         DO ik = 1, nk
116            CALL cp_fm_create(krylov_space%v_mat(ik)%matrix, matrix_struct=fm_struct_tmp, &
117                              name="v_mat_"//TRIM(ADJUSTL(cp_to_string(ik))))
118         END DO
119         CALL cp_fm_create(krylov_space%tmp_mat, matrix_struct=fm_struct_tmp, &
120                           name="tmp_mat")
121         CALL cp_fm_struct_release(fm_struct_tmp)
122
123         ! NOTE: the following matrices are small and could be defined
124!           as standard array rather than istributed fm
125         NULLIFY (fm_struct_tmp)
126         CALL cp_fm_struct_create(fm_struct_tmp, nrow_global=nblock, ncol_global=nblock, &
127                                  para_env=mo_coeff%matrix_struct%para_env, &
128                                  context=mo_coeff%matrix_struct%context)
129         CALL cp_fm_create(krylov_space%block1_mat, matrix_struct=fm_struct_tmp, &
130                           name="a_mat_"//TRIM(ADJUSTL(cp_to_string(ik))))
131         CALL cp_fm_create(krylov_space%block2_mat, matrix_struct=fm_struct_tmp, &
132                           name="b_mat_"//TRIM(ADJUSTL(cp_to_string(ik))))
133         CALL cp_fm_create(krylov_space%block3_mat, matrix_struct=fm_struct_tmp, &
134                           name="b2_mat_"//TRIM(ADJUSTL(cp_to_string(ik))))
135         CALL cp_fm_struct_release(fm_struct_tmp)
136
137         ndim = nblock*nk
138         NULLIFY (fm_struct_tmp)
139         CALL cp_fm_struct_create(fm_struct_tmp, nrow_global=ndim, ncol_global=ndim, &
140                                  para_env=mo_coeff%matrix_struct%para_env, &
141                                  context=mo_coeff%matrix_struct%context)
142         CALL cp_fm_create(krylov_space%block4_mat, matrix_struct=fm_struct_tmp, &
143                           name="t_mat")
144         CALL cp_fm_create(krylov_space%block5_mat, matrix_struct=fm_struct_tmp, &
145                           name="t_vec")
146         CALL cp_fm_struct_release(fm_struct_tmp)
147         ALLOCATE (krylov_space%t_eval(ndim))
148
149      ELSE
150         !Nothing should be done
151      END IF
152
153      CALL timestop(handle)
154
155   END SUBROUTINE krylov_space_allocate
156
157! **************************************************************************************************
158!> \brief lanczos refinement by blocks of not-converged MOs
159!> \param krylov_space ...
160!> \param ks ...
161!> \param c0 ...
162!> \param c1 ...
163!> \param eval ...
164!> \param nao ...
165!> \param eps_iter ...
166!> \param ispin ...
167!> \param check_moconv_only ...
168!> \param
169!> \par History
170!>      05.2009 created [MI]
171! **************************************************************************************************
172
173   SUBROUTINE lanczos_refinement(krylov_space, ks, c0, c1, eval, nao, &
174                                 eps_iter, ispin, check_moconv_only)
175
176      TYPE(krylov_space_type), POINTER                   :: krylov_space
177      TYPE(cp_fm_type), POINTER                          :: ks, c0, c1
178      REAL(dp), DIMENSION(:), POINTER                    :: eval
179      INTEGER, INTENT(IN)                                :: nao
180      REAL(dp), INTENT(IN)                               :: eps_iter
181      INTEGER, INTENT(IN)                                :: ispin
182      LOGICAL, INTENT(IN), OPTIONAL                      :: check_moconv_only
183
184      CHARACTER(LEN=*), PARAMETER :: routineN = 'lanczos_refinement', &
185         routineP = moduleN//':'//routineN
186      REAL(KIND=dp), PARAMETER                           :: rmone = -1.0_dp, rone = 1.0_dp, &
187                                                            rzero = 0.0_dp
188
189      INTEGER :: hand1, hand2, hand3, hand4, hand5, handle, ib, ik, imo, imo_low, imo_up, it, jt, &
190         nblock, ndim, nmo, nmo_converged, nmo_nc, nmob, num_blocks
191      INTEGER, ALLOCATABLE, DIMENSION(:)                 :: itaken
192      LOGICAL                                            :: my_check_moconv_only
193      REAL(dp)                                           :: max_norm, min_norm, vmax
194      REAL(dp), ALLOCATABLE, DIMENSION(:, :)             :: q_mat, tblock, tvblock
195      REAL(dp), DIMENSION(:), POINTER                    :: c_res, t_eval
196      TYPE(cp_fm_p_type), DIMENSION(:), POINTER          :: v_mat
197      TYPE(cp_fm_struct_type), POINTER                   :: fm_struct_tmp
198      TYPE(cp_fm_type), POINTER                          :: a_mat, b2_mat, b_mat, c2_tmp, c3_tmp, &
199                                                            c_tmp, chc, evec, hc, t_mat, t_vec
200
201      CALL timeset(routineN, handle)
202
203      NULLIFY (fm_struct_tmp)
204      NULLIFY (chc, evec)
205      NULLIFY (c_res, t_eval)
206      NULLIFY (hc, t_mat, t_vec, c2_tmp)
207      NULLIFY (a_mat, b_mat, b2_mat, v_mat)
208
209      nmo = SIZE(eval, 1)
210      my_check_moconv_only = .FALSE.
211      IF (PRESENT(check_moconv_only)) my_check_moconv_only = check_moconv_only
212
213      chc => krylov_space%chc_mat(ispin)%matrix
214      evec => krylov_space%c_vec(ispin)%matrix
215      c_res => krylov_space%c_eval
216      t_eval => krylov_space%t_eval
217
218      NULLIFY (fm_struct_tmp, c_tmp)
219      CALL cp_fm_struct_create(fm_struct_tmp, nrow_global=nao, ncol_global=nmo, &
220                               para_env=c0%matrix_struct%para_env, &
221                               context=c0%matrix_struct%context)
222      CALL cp_fm_create(c_tmp, matrix_struct=fm_struct_tmp, &
223                        name="c_tmp")
224      CALL cp_fm_create(hc, matrix_struct=fm_struct_tmp, &
225                        name="hc")
226      CALL cp_fm_struct_release(fm_struct_tmp)
227
228      !Compute (C^t)HC
229      CALL cp_gemm('N', 'N', nao, nmo, nao, rone, ks, c0, rzero, hc)
230      CALL cp_gemm('T', 'N', nmo, nmo, nao, rone, c0, hc, rzero, chc)
231
232      !Diagonalize  (C^t)HC
233      CALL timeset(routineN//"diag_chc", hand1)
234      CALL choose_eigv_solver(chc, evec, eval)
235      CALL timestop(hand1)
236
237      !Rotate the C vectors
238      CALL cp_gemm('N', 'N', nao, nmo, nmo, rone, c0, evec, rzero, c1)
239
240      !Check for converged states
241      CALL cp_gemm('N', 'N', nao, nmo, nmo, rone, hc, evec, rzero, c_tmp)
242      CALL cp_fm_to_fm(c1, c0, nmo, 1, 1)
243      CALL cp_fm_column_scale(c1, eval)
244      CALL cp_fm_scale_and_add(1.0_dp, c_tmp, rmone, c1)
245      CALL cp_fm_vectorsnorm(c_tmp, c_res)
246
247      nmo_converged = 0
248      nmo_nc = 0
249      max_norm = 0.0_dp
250      min_norm = 1.e10_dp
251      CALL cp_fm_set_all(c1, rzero)
252      DO imo = 1, nmo
253         max_norm = MAX(max_norm, c_res(imo))
254         min_norm = MIN(min_norm, c_res(imo))
255      END DO
256      DO imo = 1, nmo
257         IF (c_res(imo) <= eps_iter) THEN
258            nmo_converged = nmo_converged + 1
259         ELSE
260            nmo_nc = nmo - nmo_converged
261            EXIT
262         END IF
263      END DO
264
265      nblock = krylov_space%nblock
266      num_blocks = nmo_nc/nblock
267
268      krylov_space%nmo_nc = nmo_nc
269      krylov_space%nmo_conv = nmo_converged
270      krylov_space%max_res_norm = max_norm
271      krylov_space%min_res_norm = min_norm
272
273      IF (my_check_moconv_only) THEN
274         CALL cp_fm_release(c_tmp)
275         CALL cp_fm_release(hc)
276         CALL timestop(handle)
277         RETURN
278      ELSE IF (krylov_space%nmo_nc > 0) THEN
279
280         CALL cp_fm_to_fm(c0, c1, nmo_nc, nmo_converged + 1, 1)
281
282         nblock = krylov_space%nblock
283         IF (MODULO(nmo_nc, nblock) > 0.0_dp) THEN
284            num_blocks = nmo_nc/nblock + 1
285         ELSE
286            num_blocks = nmo_nc/nblock
287         END IF
288
289         DO ib = 1, num_blocks
290
291            imo_low = (ib - 1)*nblock + 1
292            imo_up = MIN(ib*nblock, nmo_nc)
293            nmob = imo_up - imo_low + 1
294            ndim = krylov_space%nkrylov*nmob
295
296            NULLIFY (fm_struct_tmp)
297            CALL cp_fm_struct_create(fm_struct_tmp, nrow_global=nao, ncol_global=ndim, &
298                                     para_env=c0%matrix_struct%para_env, &
299                                     context=c0%matrix_struct%context)
300            CALL cp_fm_create(c2_tmp, matrix_struct=fm_struct_tmp, &
301                              name="c2_tmp")
302            CALL cp_fm_struct_release(fm_struct_tmp)
303            NULLIFY (fm_struct_tmp)
304            CALL cp_fm_struct_create(fm_struct_tmp, nrow_global=nmob, ncol_global=ndim, &
305                                     para_env=c0%matrix_struct%para_env, &
306                                     context=c0%matrix_struct%context)
307            CALL cp_fm_create(c3_tmp, matrix_struct=fm_struct_tmp, &
308                              name="c3_tmp")
309            CALL cp_fm_struct_release(fm_struct_tmp)
310
311            ! Create local matrix of right size
312            IF (nmob /= nblock) THEN
313               NULLIFY (a_mat, b_mat, b2_mat, t_mat, t_vec, v_mat)
314               NULLIFY (fm_struct_tmp)
315               CALL cp_fm_struct_create(fm_struct_tmp, nrow_global=nmob, ncol_global=nmob, &
316                                        para_env=chc%matrix_struct%para_env, &
317                                        context=chc%matrix_struct%context)
318               CALL cp_fm_create(a_mat, matrix_struct=fm_struct_tmp, &
319                                 name="a_mat")
320               CALL cp_fm_create(b_mat, matrix_struct=fm_struct_tmp, &
321                                 name="b_mat")
322               CALL cp_fm_create(b2_mat, matrix_struct=fm_struct_tmp, &
323                                 name="b2_mat")
324               CALL cp_fm_struct_release(fm_struct_tmp)
325               NULLIFY (fm_struct_tmp)
326               CALL cp_fm_struct_create(fm_struct_tmp, nrow_global=ndim, ncol_global=ndim, &
327                                        para_env=chc%matrix_struct%para_env, &
328                                        context=chc%matrix_struct%context)
329               CALL cp_fm_create(t_mat, matrix_struct=fm_struct_tmp, &
330                                 name="t_mat")
331               CALL cp_fm_create(t_vec, matrix_struct=fm_struct_tmp, &
332                                 name="t_vec")
333               CALL cp_fm_struct_release(fm_struct_tmp)
334               NULLIFY (fm_struct_tmp)
335               CALL cp_fm_struct_create(fm_struct_tmp, nrow_global=nao, ncol_global=nmob, &
336                                        para_env=c0%matrix_struct%para_env, &
337                                        context=c0%matrix_struct%context)
338               ALLOCATE (v_mat(krylov_space%nkrylov))
339               DO ik = 1, krylov_space%nkrylov
340                  CALL cp_fm_create(v_mat(ik)%matrix, matrix_struct=fm_struct_tmp, &
341                                    name="v_mat")
342               END DO
343               CALL cp_fm_struct_release(fm_struct_tmp)
344            ELSE
345               a_mat => krylov_space%block1_mat
346               b_mat => krylov_space%block2_mat
347               b2_mat => krylov_space%block3_mat
348               t_mat => krylov_space%block4_mat
349               t_vec => krylov_space%block5_mat
350               v_mat => krylov_space%v_mat
351            END IF
352
353            ALLOCATE (tblock(nmob, nmob))
354            ALLOCATE (tvblock(nmob, ndim))
355
356            CALL timeset(routineN//"_kry_loop", hand2)
357            CALL cp_fm_set_all(b_mat, rzero)
358            CALL cp_fm_set_all(t_mat, rzero)
359            CALL cp_fm_to_fm(c1, v_mat(1)%matrix, nmob, imo_low, 1)
360
361            !Compute A =(V^t)HV
362            CALL cp_gemm('N', 'N', nao, nmob, nao, rone, ks, v_mat(1)%matrix, rzero, hc)
363            CALL cp_gemm('T', 'N', nmob, nmob, nao, rone, v_mat(1)%matrix, hc, &
364                         rzero, a_mat)
365
366            !Compute the residual matrix R for next
367            !factorisation
368            CALL cp_gemm('N', 'N', nao, nmob, nmob, rone, v_mat(1)%matrix, a_mat, &
369                         rzero, c_tmp)
370            CALL cp_fm_scale_and_add(rmone, c_tmp, rone, hc)
371
372            ! Build the block tridiagonal matrix
373            CALL cp_fm_get_submatrix(a_mat, tblock, 1, 1, nmob, nmob)
374            CALL cp_fm_set_submatrix(t_mat, tblock, 1, 1, nmob, nmob)
375
376            DO ik = 2, krylov_space%nkrylov
377
378               ! Call lapack for QR factorization
379               CALL cp_fm_set_all(b_mat, rzero)
380               CALL cp_fm_to_fm(c_tmp, v_mat(ik)%matrix, nmob, 1, 1)
381               CALL cp_fm_qr_factorization(c_tmp, b_mat, nao, nmob, 1, 1)
382
383               CALL cp_fm_triangular_multiply(b_mat, v_mat(ik)%matrix, side="R", invert_tr=.TRUE., &
384                                              n_rows=nao, n_cols=nmob)
385
386               !Compute A =(V^t)HV
387               CALL cp_gemm('N', 'N', nao, nmob, nao, rone, ks, v_mat(ik)%matrix, rzero, hc)
388               CALL cp_gemm('T', 'N', nmob, nmob, nao, rone, v_mat(ik)%matrix, hc, rzero, a_mat)
389
390               !Compute the !residual matrix R !for next !factorisation
391               CALL cp_gemm('N', 'N', nao, nmob, nmob, rone, v_mat(ik)%matrix, a_mat, &
392                            rzero, c_tmp)
393               CALL cp_fm_scale_and_add(rmone, c_tmp, rone, hc)
394               CALL cp_fm_to_fm(v_mat(ik - 1)%matrix, hc, nmob, 1, 1)
395               CALL cp_fm_triangular_multiply(b_mat, hc, side='R', transpose_tr=.TRUE., &
396                                              n_rows=nao, n_cols=nmob, alpha=rmone)
397               CALL cp_fm_scale_and_add(rone, c_tmp, rone, hc)
398
399               ! Build the block tridiagonal matrix
400               it = (ik - 2)*nmob + 1
401               jt = (ik - 1)*nmob + 1
402
403               CALL cp_fm_get_submatrix(a_mat, tblock, 1, 1, nmob, nmob)
404               CALL cp_fm_set_submatrix(t_mat, tblock, jt, jt, nmob, nmob)
405               CALL cp_fm_transpose(b_mat, a_mat)
406               CALL cp_fm_get_submatrix(a_mat, tblock, 1, 1, nmob, nmob)
407               CALL cp_fm_set_submatrix(t_mat, tblock, it, jt, nmob, nmob)
408
409            END DO ! ik
410            CALL timestop(hand2)
411
412            DEALLOCATE (tblock)
413
414            CALL timeset(routineN//"_diag_tri", hand3)
415
416            CALL choose_eigv_solver(t_mat, t_vec, t_eval)
417            ! Diagonalize the block-tridiagonal matrix
418            CALL timestop(hand3)
419
420            CALL timeset(routineN//"_build_cnew", hand4)
421!        !Compute the refined vectors
422            CALL cp_fm_set_all(c2_tmp, rzero)
423            DO ik = 1, krylov_space%nkrylov
424               jt = (ik - 1)*nmob
425               CALL cp_gemm('N', 'N', nao, ndim, nmob, rone, v_mat(ik)%matrix, t_vec, rone, c2_tmp, &
426                            b_first_row=(jt + 1))
427            END DO
428            DEALLOCATE (tvblock)
429
430            CALL cp_fm_set_all(c3_tmp, rzero)
431            CALL cp_gemm('T', 'N', nmob, ndim, nao, rone, v_mat(1)%matrix, c2_tmp, rzero, c3_tmp)
432
433            !Try to avoid linear dependencies
434            ALLOCATE (q_mat(nmob, ndim))
435            !get max
436            CALL cp_fm_get_submatrix(c3_tmp, q_mat, 1, 1, nmob, ndim)
437
438            ALLOCATE (itaken(ndim))
439            itaken = 0
440            DO it = 1, nmob
441               vmax = 0.0_dp
442               !select index ik
443               DO jt = 1, ndim
444                  IF (itaken(jt) == 0 .AND. ABS(q_mat(it, jt)) > vmax) THEN
445                     vmax = ABS(q_mat(it, jt))
446                     ik = jt
447                  END IF
448               END DO
449               itaken(ik) = 1
450
451               CALL cp_fm_to_fm(c2_tmp, v_mat(1)%matrix, 1, ik, it)
452            END DO
453            DEALLOCATE (itaken)
454            DEALLOCATE (q_mat)
455
456            !Copy in the converged set to enlarge the converged subspace
457            CALL cp_fm_to_fm(v_mat(1)%matrix, c0, nmob, 1, (nmo_converged + imo_low))
458            CALL timestop(hand4)
459
460            IF (nmob < nblock) THEN
461               CALL cp_fm_release(a_mat)
462               CALL cp_fm_release(b_mat)
463               CALL cp_fm_release(b2_mat)
464               CALL cp_fm_release(t_mat)
465               CALL cp_fm_release(t_vec)
466               DO ik = 1, krylov_space%nkrylov
467                  CALL cp_fm_release(v_mat(ik)%matrix)
468               END DO
469               DEALLOCATE (v_mat)
470
471            END IF
472            CALL cp_fm_release(c2_tmp)
473            CALL cp_fm_release(c3_tmp)
474         END DO ! ib
475
476         CALL timeset(routineN//"_ortho", hand5)
477         CALL cp_gemm('T', 'N', nmo, nmo, nao, rone, c0, c0, rzero, chc)
478
479         CALL cp_fm_cholesky_decompose(chc, nmo)
480         CALL cp_fm_triangular_multiply(chc, c0, 'R', invert_tr=.TRUE.)
481         CALL timestop(hand5)
482
483         CALL cp_fm_release(c_tmp)
484         CALL cp_fm_release(hc)
485      ELSE
486         CALL cp_fm_release(c_tmp)
487         CALL cp_fm_release(hc)
488         CALL timestop(handle)
489         RETURN
490      END IF
491
492      CALL timestop(handle)
493
494   END SUBROUTINE lanczos_refinement
495
496!+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
497
498! **************************************************************************************************
499!> \brief ...
500!> \param krylov_space ...
501!> \param ks ...
502!> \param c0 ...
503!> \param c1 ...
504!> \param eval ...
505!> \param nao ...
506!> \param eps_iter ...
507!> \param ispin ...
508!> \param check_moconv_only ...
509! **************************************************************************************************
510   SUBROUTINE lanczos_refinement_2v(krylov_space, ks, c0, c1, eval, nao, &
511                                    eps_iter, ispin, check_moconv_only)
512
513      TYPE(krylov_space_type), POINTER                   :: krylov_space
514      TYPE(cp_fm_type), POINTER                          :: ks, c0, c1
515      REAL(dp), DIMENSION(:), POINTER                    :: eval
516      INTEGER, INTENT(IN)                                :: nao
517      REAL(dp), INTENT(IN)                               :: eps_iter
518      INTEGER, INTENT(IN)                                :: ispin
519      LOGICAL, INTENT(IN), OPTIONAL                      :: check_moconv_only
520
521      CHARACTER(LEN=*), PARAMETER :: routineN = 'lanczos_refinement_2v', &
522         routineP = moduleN//':'//routineN
523      REAL(KIND=dp), PARAMETER                           :: rmone = -1.0_dp, rone = 1.0_dp, &
524                                                            rzero = 0.0_dp
525
526      INTEGER :: hand1, hand2, hand3, hand4, hand5, hand6, handle, i, ia, ib, ik, imo, imo_low, &
527         imo_up, info, it, j, jt, liwork, lwork, nblock, ndim, nmo, nmo_converged, nmo_nc, nmob, &
528         num_blocks
529      INTEGER, ALLOCATABLE, DIMENSION(:)                 :: itaken
530      INTEGER, DIMENSION(:), POINTER                     :: iwork
531      LOGICAL                                            :: my_check_moconv_only
532      REAL(dp)                                           :: max_norm, min_norm, vmax
533      REAL(dp), ALLOCATABLE, DIMENSION(:, :)             :: a_block, b_block, q_mat, t_mat
534      REAL(dp), DIMENSION(:), POINTER                    :: c_res, t_eval
535      REAL(KIND=dp), DIMENSION(:), POINTER               :: work
536      REAL(KIND=dp), DIMENSION(:, :), POINTER            :: a_loc, b_loc
537      TYPE(cp_fm_p_type), DIMENSION(:), POINTER          :: v_mat
538      TYPE(cp_fm_struct_type), POINTER                   :: fm_struct_tmp
539      TYPE(cp_fm_type), POINTER                          :: b_mat, c2_tmp, c_tmp, chc, evec, hc, &
540                                                            v_tmp
541
542      CALL timeset(routineN, handle)
543
544      NULLIFY (fm_struct_tmp)
545      NULLIFY (chc, evec)
546      NULLIFY (c_res, t_eval)
547      NULLIFY (hc, c_tmp, c2_tmp)
548      NULLIFY (v_tmp)
549      NULLIFY (b_mat, v_mat)
550      NULLIFY (b_loc, a_loc)
551
552      nmo = SIZE(eval, 1)
553      my_check_moconv_only = .FALSE.
554      IF (PRESENT(check_moconv_only)) my_check_moconv_only = check_moconv_only
555
556      chc => krylov_space%chc_mat(ispin)%matrix
557      evec => krylov_space%c_vec(ispin)%matrix
558      c_res => krylov_space%c_eval
559      t_eval => krylov_space%t_eval
560
561      NULLIFY (fm_struct_tmp, c_tmp)
562      CALL cp_fm_struct_create(fm_struct_tmp, nrow_global=nao, ncol_global=nmo, &
563                               para_env=c0%matrix_struct%para_env, &
564                               context=c0%matrix_struct%context)
565      CALL cp_fm_create(c_tmp, matrix_struct=fm_struct_tmp, &
566                        name="c_tmp")
567      CALL cp_fm_create(hc, matrix_struct=fm_struct_tmp, &
568                        name="hc")
569      CALL cp_fm_struct_release(fm_struct_tmp)
570
571      !Compute (C^t)HC
572      CALL cp_gemm('N', 'N', nao, nmo, nao, rone, ks, c0, rzero, hc)
573      CALL cp_gemm('T', 'N', nmo, nmo, nao, rone, c0, hc, rzero, chc)
574
575      !Diagonalize  (C^t)HC
576      CALL timeset(routineN//"diag_chc", hand1)
577      CALL choose_eigv_solver(chc, evec, eval)
578
579      CALL timestop(hand1)
580
581      CALL timeset(routineN//"check_conv", hand6)
582      !Rotate the C vectors
583      CALL cp_gemm('N', 'N', nao, nmo, nmo, rone, c0, evec, rzero, c1)
584
585      !Check for converged states
586      CALL cp_gemm('N', 'N', nao, nmo, nmo, rone, hc, evec, rzero, c_tmp)
587      CALL cp_fm_to_fm(c1, c0, nmo, 1, 1)
588      CALL cp_fm_column_scale(c1, eval)
589      CALL cp_fm_scale_and_add(1.0_dp, c_tmp, rmone, c1)
590      CALL cp_fm_vectorsnorm(c_tmp, c_res)
591
592      nmo_converged = 0
593      nmo_nc = 0
594      max_norm = 0.0_dp
595      min_norm = 1.e10_dp
596      CALL cp_fm_set_all(c1, rzero)
597      DO imo = 1, nmo
598         max_norm = MAX(max_norm, c_res(imo))
599         min_norm = MIN(min_norm, c_res(imo))
600      END DO
601      DO imo = 1, nmo
602         IF (c_res(imo) <= eps_iter) THEN
603            nmo_converged = nmo_converged + 1
604         ELSE
605            nmo_nc = nmo - nmo_converged
606            EXIT
607         END IF
608      END DO
609      CALL timestop(hand6)
610
611      CALL cp_fm_release(c_tmp)
612      CALL cp_fm_release(hc)
613
614      krylov_space%nmo_nc = nmo_nc
615      krylov_space%nmo_conv = nmo_converged
616      krylov_space%max_res_norm = max_norm
617      krylov_space%min_res_norm = min_norm
618
619      IF (my_check_moconv_only) THEN
620         ! Do nothing
621      ELSE IF (krylov_space%nmo_nc > 0) THEN
622
623         CALL cp_fm_to_fm(c0, c1, nmo_nc, nmo_converged + 1, 1)
624
625         nblock = krylov_space%nblock
626         IF (MODULO(nmo_nc, nblock) > 0.0_dp) THEN
627            num_blocks = nmo_nc/nblock + 1
628         ELSE
629            num_blocks = nmo_nc/nblock
630         END IF
631
632         DO ib = 1, num_blocks
633
634            imo_low = (ib - 1)*nblock + 1
635            imo_up = MIN(ib*nblock, nmo_nc)
636            nmob = imo_up - imo_low + 1
637            ndim = krylov_space%nkrylov*nmob
638
639            ! Allocation
640            CALL timeset(routineN//"alloc", hand6)
641            ALLOCATE (a_block(nmob, nmob))
642            ALLOCATE (b_block(nmob, nmob))
643            ALLOCATE (t_mat(ndim, ndim))
644
645            NULLIFY (fm_struct_tmp)
646            ! by forcing ncol_block=nmo, the needed part of the matrix is distributed on a subset of processes
647            ! this is due to the use of two-dimensional grids of processes
648            ! nrow_global is distributed over num_pe(1)
649            ! a local_data array is anyway allocated for the processes non included
650            ! this should have a minimum size
651            ! with ncol_local=1.
652            CALL cp_fm_struct_create(fm_struct_tmp, nrow_global=nao, ncol_global=nmob, &
653                                     ncol_block=nmob, &
654                                     para_env=c0%matrix_struct%para_env, &
655                                     context=c0%matrix_struct%context, &
656                                     force_block=.TRUE.)
657            CALL cp_fm_create(c_tmp, matrix_struct=fm_struct_tmp, &
658                              name="c_tmp")
659            CALL cp_fm_set_all(c_tmp, rzero)
660            CALL cp_fm_create(v_tmp, matrix_struct=fm_struct_tmp, &
661                              name="v_tmp")
662            CALL cp_fm_set_all(v_tmp, rzero)
663            CALL cp_fm_struct_release(fm_struct_tmp)
664            NULLIFY (fm_struct_tmp)
665            ALLOCATE (v_mat(krylov_space%nkrylov))
666            CALL cp_fm_struct_create(fm_struct_tmp, nrow_global=nao, ncol_global=nmob, &
667                                     ncol_block=nmob, &
668                                     para_env=c0%matrix_struct%para_env, &
669                                     context=c0%matrix_struct%context, &
670                                     force_block=.TRUE.)
671            DO ik = 1, krylov_space%nkrylov
672               CALL cp_fm_create(v_mat(ik)%matrix, matrix_struct=fm_struct_tmp, &
673                                 name="v_mat")
674            END DO
675            CALL cp_fm_create(hc, matrix_struct=fm_struct_tmp, &
676                              name="hc")
677            CALL cp_fm_struct_release(fm_struct_tmp)
678            NULLIFY (fm_struct_tmp)
679            CALL cp_fm_struct_create(fm_struct_tmp, nrow_global=nao, ncol_global=ndim, &
680                                     ncol_block=ndim, &
681                                     para_env=c0%matrix_struct%para_env, &
682                                     context=c0%matrix_struct%context, &
683                                     force_block=.TRUE.)
684            CALL cp_fm_create(c2_tmp, matrix_struct=fm_struct_tmp, &
685                              name="c2_tmp")
686            CALL cp_fm_struct_release(fm_struct_tmp)
687
688            NULLIFY (fm_struct_tmp)
689            CALL cp_fm_struct_create(fm_struct_tmp, nrow_global=nmob, ncol_global=nmob, &
690                                     para_env=c0%matrix_struct%para_env, &
691                                     context=c0%matrix_struct%context)
692            CALL cp_fm_create(b_mat, matrix_struct=fm_struct_tmp, &
693                              name="b_mat")
694            CALL cp_fm_struct_release(fm_struct_tmp)
695            CALL timestop(hand6)
696            !End allocation
697
698            CALL cp_fm_set_all(b_mat, rzero)
699            CALL cp_fm_to_fm(c1, v_mat(1)%matrix, nmob, imo_low, 1)
700
701            ! Here starts the construction of krylov space
702            CALL timeset(routineN//"_kry_loop", hand2)
703            !Compute A =(V^t)HV
704            CALL cp_gemm('N', 'N', nao, nmob, nao, rone, ks, v_mat(1)%matrix, rzero, hc)
705
706            a_block = 0.0_dp
707            a_loc => v_mat(1)%matrix%local_data
708            b_loc => hc%local_data
709
710            IF (SIZE(hc%local_data, 2) == nmob) THEN
711               ! this is a work around to avoid problems due to the two dimensional grid of processes
712               CALL dgemm('T', 'N', nmob, nmob, SIZE(hc%local_data, 1), 1.0_dp, a_loc(1, 1), &
713                          SIZE(hc%local_data, 1), b_loc(1, 1), SIZE(hc%local_data, 1), 0.0_dp, a_block(1, 1), nmob)
714            END IF
715            CALL mp_sum(a_block, hc%matrix_struct%para_env%group)
716
717            !Compute the residual matrix R for next
718            !factorisation
719            c_tmp%local_data = 0.0_dp
720            IF (SIZE(c_tmp%local_data, 2) == nmob) THEN
721               a_loc => v_mat(1)%matrix%local_data
722               b_loc => c_tmp%local_data
723               CALL dgemm('N', 'N', SIZE(c_tmp%local_data, 1), nmob, nmob, 1.0_dp, a_loc(1, 1), &
724                          SIZE(c_tmp%local_data, 1), a_block(1, 1), nmob, 0.0_dp, &
725                          b_loc(1, 1), SIZE(c_tmp%local_data, 1))
726            END IF
727            CALL cp_fm_scale_and_add(rmone, c_tmp, rone, hc)
728
729            ! Build the block tridiagonal matrix
730            t_mat = 0.0_dp
731            DO i = 1, nmob
732               t_mat(1:nmob, i) = a_block(1:nmob, i)
733            END DO
734
735            DO ik = 2, krylov_space%nkrylov
736               ! Call lapack for QR factorization
737               CALL cp_fm_set_all(b_mat, rzero)
738               CALL cp_fm_to_fm(c_tmp, v_mat(ik)%matrix, nmob, 1, 1)
739               CALL cp_fm_qr_factorization(c_tmp, b_mat, nao, nmob, 1, 1)
740
741               CALL cp_fm_triangular_multiply(b_mat, v_mat(ik)%matrix, side="R", invert_tr=.TRUE., &
742                                              n_rows=nao, n_cols=nmob)
743
744               CALL cp_fm_get_submatrix(b_mat, b_block, 1, 1, nmob, nmob)
745
746               !Compute A =(V^t)HV
747               CALL cp_gemm('N', 'N', nao, nmob, nao, rone, ks, v_mat(ik)%matrix, rzero, hc)
748
749               a_block = 0.0_dp
750               IF (SIZE(hc%local_data, 2) == nmob) THEN
751                  a_loc => v_mat(ik)%matrix%local_data
752                  b_loc => hc%local_data
753                  CALL dgemm('T', 'N', nmob, nmob, SIZE(hc%local_data, 1), 1.0_dp, a_loc(1, 1), &
754                             SIZE(hc%local_data, 1), b_loc(1, 1), SIZE(hc%local_data, 1), 0.0_dp, a_block(1, 1), nmob)
755               END IF
756               CALL mp_sum(a_block, hc%matrix_struct%para_env%group)
757
758               !Compute the residual matrix R for next
759               !factorisation
760               c_tmp%local_data = 0.0_dp
761               IF (SIZE(c_tmp%local_data, 2) == nmob) THEN
762                  a_loc => v_mat(ik)%matrix%local_data
763                  b_loc => c_tmp%local_data
764                  CALL dgemm('N', 'N', SIZE(c_tmp%local_data, 1), nmob, nmob, 1.0_dp, a_loc(1, 1), &
765                             SIZE(c_tmp%local_data, 1), a_block(1, 1), nmob, 0.0_dp, &
766                             b_loc(1, 1), SIZE(c_tmp%local_data, 1))
767               END IF
768               CALL cp_fm_scale_and_add(rmone, c_tmp, rone, hc)
769
770               IF (SIZE(c_tmp%local_data, 2) == nmob) THEN
771                  a_loc => v_mat(ik - 1)%matrix%local_data
772                  DO j = 1, nmob
773                     DO i = 1, j
774                        DO ia = 1, SIZE(c_tmp%local_data, 1)
775                           b_loc(ia, i) = b_loc(ia, i) - a_loc(ia, j)*b_block(i, j)
776                        END DO
777                     END DO
778                  END DO
779               END IF
780
781               ! Build the block tridiagonal matrix
782               it = (ik - 2)*nmob
783               jt = (ik - 1)*nmob
784               DO j = 1, nmob
785                  t_mat(jt + 1:jt + nmob, jt + j) = a_block(1:nmob, j)
786                  DO i = 1, nmob
787                     t_mat(it + i, jt + j) = b_block(j, i)
788                     t_mat(jt + j, it + i) = b_block(j, i)
789                  END DO
790               END DO
791            END DO ! ik
792            CALL timestop(hand2)
793
794            CALL timeset(routineN//"_diag_tri", hand3)
795            lwork = 1 + 6*ndim + 2*ndim**2 + 5000
796            liwork = 5*ndim + 3
797            ALLOCATE (work(lwork))
798            ALLOCATE (iwork(liwork))
799
800            ! Diagonalize the block-tridiagonal matrix
801            CALL dsyevd('V', 'U', ndim, t_mat(1, 1), ndim, t_eval(1), &
802                        work(1), lwork, iwork(1), liwork, info)
803            DEALLOCATE (work)
804            DEALLOCATE (iwork)
805            CALL timestop(hand3)
806
807            CALL timeset(routineN//"_build_cnew", hand4)
808!        !Compute the refined vectors
809
810            c2_tmp%local_data = 0.0_dp
811            ALLOCATE (q_mat(nmob, ndim))
812            q_mat = 0.0_dp
813            b_loc => c2_tmp%local_data
814            DO ik = 1, krylov_space%nkrylov
815               CALL cp_fm_to_fm(v_mat(ik)%matrix, v_tmp, nmob, 1, 1)
816               IF (SIZE(c2_tmp%local_data, 2) == ndim) THEN
817!            a_loc => v_mat(ik)%matrix%local_data
818                  a_loc => v_tmp%local_data
819                  it = (ik - 1)*nmob
820                  CALL dgemm('N', 'N', SIZE(c2_tmp%local_data, 1), ndim, nmob, 1.0_dp, a_loc(1, 1), &
821                             SIZE(c2_tmp%local_data, 1), t_mat(it + 1, 1), ndim, 1.0_dp, &
822                             b_loc(1, 1), SIZE(c2_tmp%local_data, 1))
823               END IF
824            END DO !ik
825
826            !Try to avoid linear dependencies
827            CALL cp_fm_to_fm(v_mat(1)%matrix, v_tmp, nmob, 1, 1)
828            IF (SIZE(c2_tmp%local_data, 2) == ndim) THEN
829!          a_loc => v_mat(1)%matrix%local_data
830               a_loc => v_tmp%local_data
831               b_loc => c2_tmp%local_data
832               CALL dgemm('T', 'N', nmob, ndim, SIZE(v_tmp%local_data, 1), 1.0_dp, a_loc(1, 1), &
833                          SIZE(v_tmp%local_data, 1), b_loc(1, 1), SIZE(v_tmp%local_data, 1), &
834                          0.0_dp, q_mat(1, 1), nmob)
835            END IF
836            CALL mp_sum(q_mat, hc%matrix_struct%para_env%group)
837
838            ALLOCATE (itaken(ndim))
839            itaken = 0
840            DO it = 1, nmob
841               vmax = 0.0_dp
842               !select index ik
843               DO jt = 1, ndim
844                  IF (itaken(jt) == 0 .AND. ABS(q_mat(it, jt)) > vmax) THEN
845                     vmax = ABS(q_mat(it, jt))
846                     ik = jt
847                  END IF
848               END DO
849               itaken(ik) = 1
850
851               CALL cp_fm_to_fm(c2_tmp, v_mat(1)%matrix, 1, ik, it)
852            END DO
853            DEALLOCATE (itaken)
854            DEALLOCATE (q_mat)
855
856            !Copy in the converged set to enlarge the converged subspace
857            CALL cp_fm_to_fm(v_mat(1)%matrix, c0, nmob, 1, (nmo_converged + imo_low))
858            CALL timestop(hand4)
859
860            CALL cp_fm_release(c2_tmp)
861            CALL cp_fm_release(c_tmp)
862            CALL cp_fm_release(hc)
863            CALL cp_fm_release(v_tmp)
864            CALL cp_fm_release(b_mat)
865
866            DEALLOCATE (t_mat)
867            DEALLOCATE (a_block)
868            DEALLOCATE (b_block)
869            DO ik = 1, krylov_space%nkrylov
870               CALL cp_fm_release(v_mat(ik)%matrix)
871            END DO
872            DEALLOCATE (v_mat)
873
874         END DO ! ib
875
876         CALL timeset(routineN//"_ortho", hand5)
877         CALL cp_gemm('T', 'N', nmo, nmo, nao, rone, c0, c0, rzero, chc)
878
879         CALL cp_fm_cholesky_decompose(chc, nmo)
880         CALL cp_fm_triangular_multiply(chc, c0, 'R', invert_tr=.TRUE.)
881         CALL timestop(hand5)
882      ELSE
883         ! Do nothing
884      END IF
885
886      CALL timestop(handle)
887   END SUBROUTINE lanczos_refinement_2v
888
889END MODULE qs_scf_lanczos
890