1!--------------------------------------------------------------------------------------------------!
2!   CP2K: A general program to perform molecular dynamics simulations                              !
3!   Copyright (C) 2000 - 2020  CP2K developers group                                               !
4!--------------------------------------------------------------------------------------------------!
5
6! **************************************************************************************************
7!> \brief Localization methods such as 2x2 Jacobi rotations
8!>                                   Steepest Decents
9!>                                   Conjugate Gradient
10!> \par History
11!>      Initial parallellization of jacobi (JVDV 07.2003)
12!>      direct minimization using exponential parametrization (JVDV 09.2003)
13!>      crazy rotations go fast (JVDV 10.2003)
14!> \author CJM (04.2003)
15! **************************************************************************************************
16MODULE qs_localization_methods
17   USE cell_types,                      ONLY: cell_type
18   USE cp_blacs_env,                    ONLY: cp_blacs_env_type
19   USE cp_cfm_basic_linalg,             ONLY: cp_cfm_column_scale,&
20                                              cp_cfm_gemm,&
21                                              cp_cfm_schur_product
22   USE cp_cfm_diag,                     ONLY: cp_cfm_heevd
23   USE cp_cfm_types,                    ONLY: cp_cfm_create,&
24                                              cp_cfm_get_element,&
25                                              cp_cfm_get_info,&
26                                              cp_cfm_p_type,&
27                                              cp_cfm_release,&
28                                              cp_cfm_set_all,&
29                                              cp_cfm_to_cfm,&
30                                              cp_cfm_type
31   USE cp_dbcsr_operations,             ONLY: cp_dbcsr_sm_fm_multiply
32   USE cp_external_control,             ONLY: external_control
33   USE cp_fm_basic_linalg,              ONLY: cp_fm_frobenius_norm,&
34                                              cp_fm_pdgeqpf,&
35                                              cp_fm_pdorgqr,&
36                                              cp_fm_scale,&
37                                              cp_fm_scale_and_add,&
38                                              cp_fm_trace,&
39                                              cp_fm_transpose
40   USE cp_fm_diag,                      ONLY: cp_fm_syevd
41   USE cp_fm_struct,                    ONLY: cp_fm_struct_create,&
42                                              cp_fm_struct_get,&
43                                              cp_fm_struct_release,&
44                                              cp_fm_struct_type
45   USE cp_fm_types,                     ONLY: &
46        cp_fm_create, cp_fm_get_element, cp_fm_get_info, cp_fm_get_submatrix, cp_fm_maxabsrownorm, &
47        cp_fm_maxabsval, cp_fm_p_type, cp_fm_release, cp_fm_set_all, cp_fm_set_submatrix, &
48        cp_fm_to_fm, cp_fm_to_fm_submat, cp_fm_type
49   USE cp_gemm_interface,               ONLY: cp_gemm
50   USE cp_log_handling,                 ONLY: cp_logger_get_default_io_unit,&
51                                              cp_logger_get_default_unit_nr
52   USE cp_para_types,                   ONLY: cp_para_env_type
53   USE dbcsr_api,                       ONLY: dbcsr_p_type
54   USE kinds,                           ONLY: dp
55   USE machine,                         ONLY: m_flush,&
56                                              m_walltime
57   USE mathconstants,                   ONLY: pi,&
58                                              twopi
59   USE matrix_exp,                      ONLY: exp_pade_real,&
60                                              get_nsquare_norder
61   USE message_passing,                 ONLY: mp_allgather,&
62                                              mp_bcast,&
63                                              mp_max,&
64                                              mp_sendrecv,&
65                                              mp_sum,&
66                                              mp_sync
67#include "./base/base_uses.f90"
68
69   IMPLICIT NONE
70   PUBLIC :: initialize_weights, crazy_rotations, &
71             direct_mini, rotate_orbitals, approx_l1_norm_sd, jacobi_rotations, scdm_qrfact, zij_matrix
72
73   CHARACTER(len=*), PARAMETER, PRIVATE :: moduleN = 'qs_localization_methods'
74
75   PRIVATE
76
77   TYPE set_c_1d_type
78      COMPLEX(KIND=dp), POINTER, DIMENSION(:) :: c_array
79   END TYPE
80   TYPE set_c_2d_type
81      COMPLEX(KIND=dp), POINTER, DIMENSION(:, :) :: c_array
82   END TYPE
83
84CONTAINS
85! **************************************************************************************************
86!> \brief ...
87!> \param C ...
88!> \param iterations ...
89!> \param eps ...
90!> \param converged ...
91!> \param sweeps ...
92! **************************************************************************************************
93   SUBROUTINE approx_l1_norm_sd(C, iterations, eps, converged, sweeps)
94      TYPE(cp_fm_type), POINTER                          :: C
95      INTEGER, INTENT(IN)                                :: iterations
96      REAL(KIND=dp), INTENT(IN)                          :: eps
97      LOGICAL, INTENT(INOUT)                             :: converged
98      INTEGER, INTENT(INOUT)                             :: sweeps
99
100      CHARACTER(len=*), PARAMETER :: routineN = 'approx_l1_norm_sd', &
101         routineP = moduleN//':'//routineN
102      INTEGER, PARAMETER                                 :: taylor_order = 100
103      REAL(KIND=dp), PARAMETER                           :: alpha = 0.1_dp, f2_eps = 0.01_dp
104
105      INTEGER                                            :: handle, i, istep, k, n, ncol_local, &
106                                                            nrow_local, output_unit, p
107      REAL(KIND=dp)                                      :: expfactor, f2, f2old, gnorm, tnorm
108      TYPE(cp_blacs_env_type), POINTER                   :: context
109      TYPE(cp_fm_struct_type), POINTER                   :: fm_struct_k_k
110      TYPE(cp_fm_type), POINTER                          :: CTmp, G, Gp1, Gp2, U
111      TYPE(cp_para_env_type), POINTER                    :: para_env
112
113      CALL timeset(routineN, handle)
114
115      NULLIFY (CTmp, U, G, Gp1, Gp2, context, para_env, fm_struct_k_k)
116
117      output_unit = cp_logger_get_default_io_unit()
118
119      CALL cp_fm_struct_get(C%matrix_struct, nrow_global=n, ncol_global=k, &
120                            nrow_local=nrow_local, ncol_local=ncol_local, &
121                            para_env=para_env, context=context)
122      CALL cp_fm_struct_create(fm_struct_k_k, para_env=para_env, context=context, &
123                               nrow_global=k, ncol_global=k)
124      CALL cp_fm_create(CTmp, C%matrix_struct)
125      CALL cp_fm_create(U, fm_struct_k_k)
126      CALL cp_fm_create(G, fm_struct_k_k)
127      CALL cp_fm_create(Gp1, fm_struct_k_k)
128      CALL cp_fm_create(Gp2, fm_struct_k_k)
129      !
130      ! printing
131      IF (output_unit > 0) THEN
132         WRITE (output_unit, '(1X)')
133         WRITE (output_unit, '(2X,A)') '-----------------------------------------------------------------------------'
134         WRITE (output_unit, '(A,I5)') '      Nbr iterations =', iterations
135         WRITE (output_unit, '(A,E10.2)') '     eps convergence =', eps
136         WRITE (output_unit, '(A,I5)') '    Max Taylor order =', taylor_order
137         WRITE (output_unit, '(A,E10.2)') '              f2 eps =', f2_eps
138         WRITE (output_unit, '(A,E10.2)') '               alpha =', alpha
139         WRITE (output_unit, '(A)') '     iteration    approx_l1_norm    g_norm   rel_err'
140      ENDIF
141      !
142      f2old = 0.0_dp
143      converged = .FALSE.
144      !
145      ! Start the steepest descent
146      DO istep = 1, iterations
147         !
148         !-------------------------------------------------------------------
149         ! compute f_2
150         ! f_2(x)=(x^2+eps)^1/2
151         f2 = 0.0_dp
152         DO p = 1, ncol_local ! p
153            DO i = 1, nrow_local ! i
154               f2 = f2 + SQRT(C%local_data(i, p)**2 + f2_eps)
155            ENDDO
156         ENDDO
157         CALL mp_sum(f2, C%matrix_struct%para_env%group)
158         !write(*,*) 'qs_localize: f_2=',f2
159         !-------------------------------------------------------------------
160         ! compute the derivative of f_2
161         ! f_2(x)=(x^2+eps)^1/2
162         DO p = 1, ncol_local ! p
163            DO i = 1, nrow_local ! i
164               CTmp%local_data(i, p) = C%local_data(i, p)/SQRT(C%local_data(i, p)**2 + f2_eps)
165            ENDDO
166         ENDDO
167         CALL cp_gemm('T', 'N', k, k, n, 1.0_dp, CTmp, C, 0.0_dp, G)
168         ! antisymmetrize
169         CALL cp_fm_transpose(G, U)
170         CALL cp_fm_scale_and_add(-0.5_dp, G, 0.5_dp, U)
171         !
172         !-------------------------------------------------------------------
173         !
174         CALL cp_fm_frobenius_norm(G, gnorm)
175         !write(*,*) 'qs_localize: norm(G)=',gnorm
176         !
177         ! rescale for steepest descent
178         CALL cp_fm_scale(-alpha, G)
179         !
180         ! compute unitary transform
181         ! zeroth order
182         CALL cp_fm_set_all(U, 0.0_dp, 1.0_dp)
183         ! first order
184         expfactor = 1.0_dp
185         CALL cp_fm_scale_and_add(1.0_dp, U, expfactor, G)
186         CALL cp_fm_frobenius_norm(G, tnorm)
187         !write(*,*) 'Taylor expansion i=',1,' norm(X^i)/i!=',tnorm
188         IF (tnorm .GT. 1.0E-10_dp) THEN
189            ! other orders
190            CALL cp_fm_to_fm(G, Gp1)
191            DO i = 2, taylor_order
192               ! new power of G
193               CALL cp_gemm('N', 'N', k, k, k, 1.0_dp, G, Gp1, 0.0_dp, Gp2)
194               CALL cp_fm_to_fm(Gp2, Gp1)
195               ! add to the taylor expansion so far
196               expfactor = expfactor/REAL(i, KIND=dp)
197               CALL cp_fm_scale_and_add(1.0_dp, U, expfactor, Gp1)
198               CALL cp_fm_frobenius_norm(Gp1, tnorm)
199               !write(*,*) 'Taylor expansion i=',i,' norm(X^i)/i!=',tnorm*expfactor
200               IF (tnorm*expfactor .LT. 1.0E-10_dp) EXIT
201            ENDDO
202         ENDIF
203         !
204         ! incrementaly rotate the MOs
205         CALL cp_gemm('N', 'N', n, k, k, 1.0_dp, C, U, 0.0_dp, CTmp)
206         CALL cp_fm_to_fm(CTmp, C)
207         !
208         ! printing
209         IF (output_unit .GT. 0) THEN
210            WRITE (output_unit, '(10X,I4,E18.10,2E10.2)') istep, f2, gnorm, ABS((f2 - f2old)/f2)
211         ENDIF
212         !
213         ! Are we done?
214         sweeps = istep
215         !IF(gnorm.LE.grad_thresh.AND.ABS((f2-f2old)/f2).LE.f2_thresh.AND.istep.GT.1) THEN
216         IF (ABS((f2 - f2old)/f2) .LE. eps .AND. istep .GT. 1) THEN
217            converged = .TRUE.
218            EXIT
219         ENDIF
220         f2old = f2
221      ENDDO
222      !
223      ! here we should do one refine step to enforce C'*S*C=1 for any case
224      !
225      ! Print the final result
226      IF (output_unit .GT. 0) WRITE (output_unit, '(A,E16.10)') ' sparseness function f2 = ', f2
227      !
228      ! sparsity
229      !DO i=1,size(thresh,1)
230      !   gnorm = 0.0_dp
231      !   DO o=1,ncol_local
232      !      DO p=1,nrow_local
233      !         IF(ABS(C%local_data(p,o)).GT.thresh(i)) THEN
234      !            gnorm = gnorm + 1.0_dp
235      !         ENDIF
236      !      ENDDO
237      !   ENDDO
238      !   CALL mp_sum(gnorm,C%matrix_struct%para_env%group)
239      !   IF(output_unit.GT.0) THEN
240      !      WRITE(output_unit,*) 'qs_localize: ratio2=',gnorm / ( REAL(k,KIND=dp)*REAL(n,KIND=dp) ),thresh(i)
241      !   ENDIF
242      !ENDDO
243      !
244      ! deallocate
245      CALL cp_fm_struct_release(fm_struct_k_k)
246      CALL cp_fm_release(CTmp)
247      CALL cp_fm_release(U)
248      CALL cp_fm_release(G)
249      CALL cp_fm_release(Gp1)
250      CALL cp_fm_release(Gp2)
251
252      CALL timestop(handle)
253
254   END SUBROUTINE approx_l1_norm_sd
255! **************************************************************************************************
256!> \brief ...
257!> \param cell ...
258!> \param weights ...
259! **************************************************************************************************
260   SUBROUTINE initialize_weights(cell, weights)
261      TYPE(cell_type), INTENT(IN)                        :: cell
262      REAL(KIND=dp), DIMENSION(:)                        :: weights
263
264      REAL(KIND=dp), DIMENSION(3, 3)                     :: metric
265
266      metric = 0.0_dp
267      CALL dgemm('T', 'N', 3, 3, 3, 1._dp, cell%hmat, 3, cell%hmat, 3, 0.0_dp, metric, 3)
268
269      weights(1) = METRIC(1, 1) - METRIC(1, 2) - METRIC(1, 3)
270      weights(2) = METRIC(2, 2) - METRIC(1, 2) - METRIC(2, 3)
271      weights(3) = METRIC(3, 3) - METRIC(1, 3) - METRIC(2, 3)
272      weights(4) = METRIC(1, 2)
273      weights(5) = METRIC(1, 3)
274      weights(6) = METRIC(2, 3)
275
276   END SUBROUTINE initialize_weights
277
278! **************************************************************************************************
279!> \brief wrapper for the jacobi routines, should be removed if jacobi_rot_para
280!>        can deal with serial para_envs.
281!> \param weights ...
282!> \param zij ...
283!> \param vectors ...
284!> \param para_env ...
285!> \param max_iter ...
286!> \param eps_localization ...
287!> \param sweeps ...
288!> \param out_each ...
289!> \param target_time ...
290!> \param start_time ...
291!> \par History
292!> \author Joost VandeVondele (02.2010)
293! **************************************************************************************************
294   SUBROUTINE jacobi_rotations(weights, zij, vectors, para_env, max_iter, eps_localization, &
295                               sweeps, out_each, target_time, start_time)
296
297      REAL(KIND=dp), INTENT(IN)                          :: weights(:)
298      TYPE(cp_fm_p_type), INTENT(INOUT)                  :: ZIJ(:, :)
299      TYPE(cp_fm_type), POINTER                          :: vectors
300      TYPE(cp_para_env_type), POINTER                    :: para_env
301      INTEGER, INTENT(IN)                                :: max_iter
302      REAL(KIND=dp), INTENT(IN)                          :: eps_localization
303      INTEGER                                            :: sweeps
304      INTEGER, INTENT(IN)                                :: out_each
305      REAL(dp)                                           :: target_time, start_time
306
307      IF (para_env%num_pe == 1) THEN
308         CALL jacobi_rotations_serial(weights, zij, vectors, max_iter, eps_localization, sweeps, out_each)
309      ELSE
310         CALL jacobi_rot_para(weights, zij, vectors, para_env, max_iter, eps_localization, &
311                              sweeps, out_each, target_time, start_time)
312      ENDIF
313
314   END SUBROUTINE jacobi_rotations
315
316! **************************************************************************************************
317!> \brief this routine, private to the module is a serial backup, till we have jacobi_rot_para to work in serial
318!>        while the routine below works in parallel, it is too slow to be useful
319!> \param weights ...
320!> \param zij ...
321!> \param vectors ...
322!> \param max_iter ...
323!> \param eps_localization ...
324!> \param sweeps ...
325!> \param out_each ...
326! **************************************************************************************************
327   SUBROUTINE jacobi_rotations_serial(weights, zij, vectors, max_iter, eps_localization, sweeps, out_each)
328      REAL(KIND=dp), INTENT(IN)                          :: weights(:)
329      TYPE(cp_fm_p_type), INTENT(INOUT)                  :: ZIJ(:, :)
330      TYPE(cp_fm_type), POINTER                          :: vectors
331      INTEGER, INTENT(IN)                                :: max_iter
332      REAL(KIND=dp), INTENT(IN)                          :: eps_localization
333      INTEGER                                            :: sweeps
334      INTEGER, INTENT(IN)                                :: out_each
335
336      CHARACTER(len=*), PARAMETER :: routineN = 'jacobi_rotations_serial', &
337         routineP = moduleN//':'//routineN
338
339      COMPLEX(KIND=dp), POINTER                          :: mii(:), mij(:), mjj(:)
340      INTEGER                                            :: dim2, handle, idim, istate, jstate, &
341                                                            nstate, unit_nr
342      REAL(KIND=dp)                                      :: ct, st, t1, t2, theta, tolerance
343      TYPE(cp_cfm_p_type), DIMENSION(:), POINTER         :: c_zij
344      TYPE(cp_cfm_type), POINTER                         :: c_rmat
345      TYPE(cp_fm_type), POINTER                          :: rmat
346
347      CALL timeset(routineN, handle)
348
349      dim2 = SIZE(zij, 2)
350      NULLIFY (rmat, c_rmat, c_zij)
351      ALLOCATE (c_zij(dim2))
352      NULLIFY (mii, mij, mjj)
353      ALLOCATE (mii(dim2), mij(dim2), mjj(dim2))
354
355      CALL cp_fm_create(rmat, zij(1, 1)%matrix%matrix_struct)
356      CALL cp_fm_set_all(rmat, 0._dp, 1._dp)
357
358      CALL cp_cfm_create(c_rmat, zij(1, 1)%matrix%matrix_struct)
359      CALL cp_cfm_set_all(c_rmat, (0._dp, 0._dp), (1._dp, 0._dp))
360      DO idim = 1, dim2
361         NULLIFY (c_zij(idim)%matrix)
362         CALL cp_cfm_create(c_zij(idim)%matrix, zij(1, 1)%matrix%matrix_struct)
363         c_zij(idim)%matrix%local_data = CMPLX(zij(1, idim)%matrix%local_data, &
364                                               zij(2, idim)%matrix%local_data, dp)
365      ENDDO
366
367      CALL cp_fm_get_info(rmat, nrow_global=nstate)
368      tolerance = 1.0e10_dp
369      sweeps = 0
370      unit_nr = -1
371      IF (rmat%matrix_struct%para_env%mepos .EQ. rmat%matrix_struct%para_env%source) THEN
372         unit_nr = cp_logger_get_default_unit_nr()
373         WRITE (unit_nr, '(T4,A )') " Localization by iterative Jacobi rotation"
374      END IF
375      ! do jacobi sweeps until converged
376      DO WHILE (tolerance >= eps_localization .AND. sweeps < max_iter)
377         sweeps = sweeps + 1
378         t1 = m_walltime()
379         DO istate = 1, nstate
380            DO jstate = istate + 1, nstate
381               DO idim = 1, dim2
382                  CALL cp_cfm_get_element(c_zij(idim)%matrix, istate, istate, mii(idim))
383                  CALL cp_cfm_get_element(c_zij(idim)%matrix, istate, jstate, mij(idim))
384                  CALL cp_cfm_get_element(c_zij(idim)%matrix, jstate, jstate, mjj(idim))
385               END DO
386               CALL get_angle(mii, mjj, mij, weights, theta)
387               st = SIN(theta)
388               ct = COS(theta)
389               CALL rotate_zij(istate, jstate, st, ct, c_zij)
390               CALL rotate_rmat(istate, jstate, st, ct, c_rmat)
391            END DO
392         END DO
393         CALL check_tolerance(c_zij, weights, tolerance)
394         t2 = m_walltime()
395         IF (unit_nr > 0 .AND. MODULO(sweeps, out_each) == 0) THEN
396            WRITE (unit_nr, '(T4,A,I7,T30,A,E12.4,T60,A,F8.3)') &
397               "Iteration:", sweeps, "Tolerance:", tolerance, "Time:", t2 - t1
398            CALL m_flush(unit_nr)
399         ENDIF
400      END DO
401
402      DO idim = 1, dim2
403         zij(1, idim)%matrix%local_data = REAL(c_zij(idim)%matrix%local_data, dp)
404         zij(2, idim)%matrix%local_data = AIMAG(c_zij(idim)%matrix%local_data)
405         CALL cp_cfm_release(c_zij(idim)%matrix)
406      ENDDO
407      DEALLOCATE (c_zij)
408      DEALLOCATE (mii, mij, mjj)
409      rmat%local_data = REAL(c_rmat%local_data, dp)
410      CALL cp_cfm_release(c_rmat)
411      CALL rotate_orbitals(rmat, vectors)
412      CALL cp_fm_release(rmat)
413
414      CALL timestop(handle)
415
416   END SUBROUTINE jacobi_rotations_serial
417! **************************************************************************************************
418!> \brief ...
419!> \param istate ...
420!> \param jstate ...
421!> \param st ...
422!> \param ct ...
423!> \param zij ...
424! **************************************************************************************************
425   SUBROUTINE rotate_zij(istate, jstate, st, ct, zij)
426      IMPLICIT NONE
427      INTEGER, INTENT(IN) :: istate, jstate
428      TYPE(cp_cfm_p_type) :: zij(:)
429      REAL(KIND=dp), INTENT(IN) :: st, ct
430      ! Locals
431      INTEGER :: idim, nstate
432      COMPLEX(KIND=dp) :: st_cmplx
433#if defined(__SCALAPACK)
434      INTEGER, DIMENSION(9) :: desc
435#else
436      INTEGER :: stride
437#endif
438
439      st_cmplx = CMPLX(st, 0.0_dp, dp)
440      CALL cp_cfm_get_info(zij(1)%matrix, nrow_global=nstate)
441      DO idim = 1, SIZE(zij, 1)
442#if defined(__SCALAPACK)
443         desc(:) = zij(idim)%matrix%matrix_struct%descriptor(:)
444         CALL pzrot(nstate, zij(idim)%matrix%local_data(1, 1), 1, istate, desc, 1, &
445                    zij(idim)%matrix%local_data(1, 1), 1, jstate, desc, 1, ct, st_cmplx)
446         CALL pzrot(nstate, zij(idim)%matrix%local_data(1, 1), istate, 1, desc, nstate, &
447                    zij(idim)%matrix%local_data(1, 1), jstate, 1, desc, nstate, ct, st_cmplx)
448#else
449         CALL zrot(nstate, zij(idim)%matrix%local_data(1, istate), 1, &
450                   zij(idim)%matrix%local_data(1, jstate), 1, ct, st_cmplx)
451         stride = SIZE(zij(idim)%matrix%local_data, 1)
452         CALL zrot(nstate, zij(idim)%matrix%local_data(istate, 1), stride, &
453                   zij(idim)%matrix%local_data(jstate, 1), stride, ct, st_cmplx)
454#endif
455      END DO
456   END SUBROUTINE rotate_zij
457! **************************************************************************************************
458!> \brief ...
459!> \param istate ...
460!> \param jstate ...
461!> \param st ...
462!> \param ct ...
463!> \param rmat ...
464! **************************************************************************************************
465   SUBROUTINE rotate_rmat(istate, jstate, st, ct, rmat)
466      IMPLICIT NONE
467      INTEGER, INTENT(IN) :: istate, jstate
468      TYPE(cp_cfm_type), POINTER :: rmat
469      REAL(KIND=dp), INTENT(IN) :: ct, st
470! Locals
471      INTEGER :: nstate
472      COMPLEX(KIND=dp) :: st_cmplx
473#if defined(__SCALAPACK)
474      INTEGER, DIMENSION(9) :: desc
475#endif
476
477      st_cmplx = CMPLX(st, 0.0_dp, dp)
478      CALL cp_cfm_get_info(rmat, nrow_global=nstate)
479#if defined(__SCALAPACK)
480      desc(:) = rmat%matrix_struct%descriptor(:)
481      CALL pzrot(nstate, rmat%local_data(1, 1), 1, istate, desc, 1, &
482                 rmat%local_data(1, 1), 1, jstate, desc, 1, ct, st_cmplx)
483#else
484      CALL zrot(nstate, rmat%local_data(1, istate), 1, rmat%local_data(1, jstate), 1, ct, st_cmplx)
485#endif
486   END SUBROUTINE rotate_rmat
487! **************************************************************************************************
488!> \brief ...
489!> \param mii ...
490!> \param mjj ...
491!> \param mij ...
492!> \param weights ...
493!> \param theta ...
494! **************************************************************************************************
495   SUBROUTINE get_angle(mii, mjj, mij, weights, theta)
496      COMPLEX(KIND=dp), POINTER                          :: mii(:), mjj(:), mij(:)
497      REAL(KIND=dp), INTENT(IN)                          :: weights(:)
498      REAL(KIND=dp), INTENT(OUT)                         :: theta
499
500      COMPLEX(KIND=dp)                                   :: z11, z12, z22
501      INTEGER                                            :: dim_m, idim
502      REAL(KIND=dp)                                      :: a12, b12, d2, ratio
503
504      a12 = 0.0_dp
505      b12 = 0.0_dp
506      dim_m = SIZE(mii)
507      DO idim = 1, dim_m
508         z11 = mii(idim)
509         z22 = mjj(idim)
510         z12 = mij(idim)
511         a12 = a12 + weights(idim)*REAL(CONJG(z12)*(z11 - z22), KIND=dp)
512         b12 = b12 + weights(idim)*REAL((z12*CONJG(z12) - &
513                                         0.25_dp*(z11 - z22)*(CONJG(z11) - CONJG(z22))), KIND=dp)
514      END DO
515      IF (ABS(b12) > 1.e-10_dp) THEN
516         ratio = -a12/b12
517         theta = 0.25_dp*ATAN(ratio)
518      ELSEIF (ABS(b12) < 1.e-10_dp) THEN
519         b12 = 0.0_dp
520         theta = 0.0_dp
521      ELSE
522         theta = 0.25_dp*pi
523      ENDIF
524! Check second derivative info
525      d2 = a12*SIN(4._dp*theta) - b12*COS(4._dp*theta)
526      IF (d2 <= 0._dp) THEN ! go to the maximum, not the minimum
527         IF (theta > 0.0_dp) THEN ! make theta as small as possible
528            theta = theta - 0.25_dp*pi
529         ELSE
530            theta = theta + 0.25_dp*pi
531         ENDIF
532      ENDIF
533   END SUBROUTINE get_angle
534! **************************************************************************************************
535!> \brief ...
536!> \param zij ...
537!> \param weights ...
538!> \param tolerance ...
539! **************************************************************************************************
540   SUBROUTINE check_tolerance(zij, weights, tolerance)
541      TYPE(cp_cfm_p_type)                                :: zij(:)
542      REAL(KIND=dp), INTENT(IN)                          :: weights(:)
543      REAL(KIND=dp), INTENT(OUT)                         :: tolerance
544
545      CHARACTER(len=*), PARAMETER :: routineN = 'check_tolerance', &
546         routineP = moduleN//':'//routineN
547
548      INTEGER                                            :: handle
549      TYPE(cp_fm_type), POINTER                          :: force
550
551      CALL timeset(routineN, handle)
552
553! compute gradient at t=0
554
555      NULLIFY (force)
556      CALL cp_fm_create(force, zij(1)%matrix%matrix_struct)
557      CALL cp_fm_set_all(force, 0._dp)
558      CALL grad_at_0(zij, weights, force)
559      CALL cp_fm_maxabsval(force, tolerance)
560      CALL cp_fm_release(force)
561
562      CALL timestop(handle)
563
564   END SUBROUTINE check_tolerance
565! **************************************************************************************************
566!> \brief ...
567!> \param rmat ...
568!> \param vectors ...
569! **************************************************************************************************
570   SUBROUTINE rotate_orbitals(rmat, vectors)
571      TYPE(cp_fm_type), POINTER                          :: rmat, vectors
572
573      INTEGER                                            :: k, n
574      TYPE(cp_fm_type), POINTER                          :: wf
575
576      NULLIFY (wf)
577      CALL cp_fm_create(wf, vectors%matrix_struct)
578      CALL cp_fm_get_info(vectors, nrow_global=n, ncol_global=k)
579      CALL cp_gemm("N", "N", n, k, k, 1.0_dp, vectors, rmat, 0.0_dp, wf)
580      CALL cp_fm_to_fm(wf, vectors)
581      CALL cp_fm_release(wf)
582   END SUBROUTINE rotate_orbitals
583! **************************************************************************************************
584!> \brief ...
585!> \param diag ...
586!> \param weights ...
587!> \param matrix ...
588!> \param ndim ...
589! **************************************************************************************************
590   SUBROUTINE gradsq_at_0(diag, weights, matrix, ndim)
591      COMPLEX(KIND=dp), DIMENSION(:, :), POINTER         :: diag
592      REAL(KIND=dp), INTENT(IN)                          :: weights(:)
593      TYPE(cp_fm_type), POINTER                          :: matrix
594      INTEGER, INTENT(IN)                                :: ndim
595
596      COMPLEX(KIND=dp)                                   :: zii, zjj
597      INTEGER                                            :: idim, istate, jstate, ncol_local, &
598                                                            nrow_global, nrow_local
599      INTEGER, DIMENSION(:), POINTER                     :: col_indices, row_indices
600      REAL(KIND=dp)                                      :: gradsq_ij
601
602      CALL cp_fm_get_info(matrix, nrow_local=nrow_local, &
603                          ncol_local=ncol_local, nrow_global=nrow_global, &
604                          row_indices=row_indices, col_indices=col_indices)
605
606      DO istate = 1, nrow_local
607         DO jstate = 1, ncol_local
608! get real and imaginary parts
609            gradsq_ij = 0.0_dp
610            DO idim = 1, ndim
611               zii = diag(row_indices(istate), idim)
612               zjj = diag(col_indices(jstate), idim)
613               gradsq_ij = gradsq_ij + weights(idim)* &
614                           4.0_dp*REAL((CONJG(zii)*zii + CONJG(zjj)*zjj), KIND=dp)
615            END DO
616            matrix%local_data(istate, jstate) = gradsq_ij
617         END DO
618      END DO
619   END SUBROUTINE gradsq_at_0
620! **************************************************************************************************
621!> \brief ...
622!> \param matrix_p ...
623!> \param weights ...
624!> \param matrix ...
625! **************************************************************************************************
626   SUBROUTINE grad_at_0(matrix_p, weights, matrix)
627      TYPE(cp_cfm_p_type)                                :: matrix_p(:)
628      REAL(KIND=dp), INTENT(IN)                          :: weights(:)
629      TYPE(cp_fm_type), POINTER                          :: matrix
630
631      COMPLEX(KIND=dp)                                   :: zii, zij, zjj
632      COMPLEX(KIND=dp), DIMENSION(:, :), POINTER         :: diag
633      INTEGER                                            :: dim_m, idim, istate, jstate, ncol_local, &
634                                                            nrow_global, nrow_local
635      INTEGER, DIMENSION(:), POINTER                     :: col_indices, row_indices
636      REAL(KIND=dp)                                      :: grad_ij
637
638      NULLIFY (diag)
639      CALL cp_fm_get_info(matrix, nrow_local=nrow_local, &
640                          ncol_local=ncol_local, nrow_global=nrow_global, &
641                          row_indices=row_indices, col_indices=col_indices)
642      dim_m = SIZE(matrix_p, 1)
643      ALLOCATE (diag(nrow_global, dim_m))
644
645      DO idim = 1, dim_m
646         DO istate = 1, nrow_global
647            CALL cp_cfm_get_element(matrix_p(idim)%matrix, istate, istate, diag(istate, idim))
648         ENDDO
649      ENDDO
650
651      DO istate = 1, nrow_local
652         DO jstate = 1, ncol_local
653! get real and imaginary parts
654            grad_ij = 0.0_dp
655            DO idim = 1, dim_m
656               zii = diag(row_indices(istate), idim)
657               zjj = diag(col_indices(jstate), idim)
658               zij = matrix_p(idim)%matrix%local_data(istate, jstate)
659               grad_ij = grad_ij + weights(idim)* &
660                         REAL(4.0_dp*CONJG(zij)*(zjj - zii), dp)
661            END DO
662            matrix%local_data(istate, jstate) = grad_ij
663         END DO
664      END DO
665      DEALLOCATE (diag)
666   END SUBROUTINE grad_at_0
667
668! return energy and maximum gradient in the current point
669! **************************************************************************************************
670!> \brief ...
671!> \param weights ...
672!> \param zij ...
673!> \param tolerance ...
674!> \param value ...
675! **************************************************************************************************
676   SUBROUTINE check_tolerance_new(weights, zij, tolerance, value)
677      REAL(KIND=dp), INTENT(IN)                          :: weights(:)
678      TYPE(cp_fm_p_type), INTENT(INOUT)                  :: ZIJ(:, :)
679      REAL(KIND=dp)                                      :: tolerance, value
680
681      COMPLEX(KIND=dp)                                   :: kii, kij, kjj
682      COMPLEX(KIND=dp), DIMENSION(:, :), POINTER         :: diag
683      INTEGER                                            :: idim, istate, jstate, ncol_local, &
684                                                            nrow_global, nrow_local
685      INTEGER, DIMENSION(:), POINTER                     :: col_indices, row_indices
686      REAL(KIND=dp)                                      :: grad_ij, ra, rb
687
688      NULLIFY (diag)
689      CALL cp_fm_get_info(zij(1, 1)%matrix, nrow_local=nrow_local, &
690                          ncol_local=ncol_local, nrow_global=nrow_global, &
691                          row_indices=row_indices, col_indices=col_indices)
692      ALLOCATE (diag(nrow_global, SIZE(zij, 2)))
693      value = 0.0_dp
694      DO idim = 1, SIZE(zij, 2)
695         DO istate = 1, nrow_global
696            CALL cp_fm_get_element(zij(1, idim)%matrix, istate, istate, ra)
697            CALL cp_fm_get_element(zij(2, idim)%matrix, istate, istate, rb)
698            diag(istate, idim) = CMPLX(ra, rb, dp)
699            value = value + weights(idim) - weights(idim)*ABS(diag(istate, idim))**2
700         ENDDO
701      ENDDO
702      tolerance = 0.0_dp
703      DO istate = 1, nrow_local
704         DO jstate = 1, ncol_local
705            grad_ij = 0.0_dp
706            DO idim = 1, SIZE(zij, 2)
707               kii = diag(row_indices(istate), idim)
708               kjj = diag(col_indices(jstate), idim)
709               ra = zij(1, idim)%matrix%local_data(istate, jstate)
710               rb = zij(2, idim)%matrix%local_data(istate, jstate)
711               kij = CMPLX(ra, rb, dp)
712               grad_ij = grad_ij + weights(idim)* &
713                         REAL(4.0_dp*CONJG(kij)*(kjj - kii), dp)
714            END DO
715            tolerance = MAX(ABS(grad_ij), tolerance)
716         ENDDO
717      ENDDO
718      CALL mp_max(tolerance, zij(1, 1)%matrix%matrix_struct%para_env%group)
719
720      DEALLOCATE (diag)
721
722   END SUBROUTINE check_tolerance_new
723
724! **************************************************************************************************
725!> \brief yet another crazy try, computes the angles needed to rotate the orbitals first
726!>        and rotates them all at the same time (hoping for the best of course)
727!> \param weights ...
728!> \param zij ...
729!> \param vectors ...
730!> \param max_iter ...
731!> \param max_crazy_angle ...
732!> \param crazy_scale ...
733!> \param crazy_use_diag ...
734!> \param eps_localization ...
735!> \param iterations ...
736!> \param converged ...
737! **************************************************************************************************
738   SUBROUTINE crazy_rotations(weights, zij, vectors, max_iter, max_crazy_angle, crazy_scale, crazy_use_diag, &
739                              eps_localization, iterations, converged)
740      REAL(KIND=dp), INTENT(IN)                          :: weights(:)
741      TYPE(cp_fm_p_type), INTENT(INOUT)                  :: ZIJ(:, :)
742      TYPE(cp_fm_type), POINTER                          :: vectors
743      INTEGER, INTENT(IN)                                :: max_iter
744      REAL(KIND=dp), INTENT(IN)                          :: max_crazy_angle
745      REAL(KIND=dp)                                      :: crazy_scale
746      LOGICAL                                            :: crazy_use_diag
747      REAL(KIND=dp), INTENT(IN)                          :: eps_localization
748      INTEGER                                            :: iterations
749      LOGICAL, INTENT(out), OPTIONAL                     :: converged
750
751      CHARACTER(len=*), PARAMETER :: routineN = 'crazy_rotations', &
752         routineP = moduleN//':'//routineN
753      COMPLEX(KIND=dp), PARAMETER                        :: cone = (1.0_dp, 0.0_dp), &
754                                                            czero = (0.0_dp, 0.0_dp)
755
756      COMPLEX(KIND=dp), DIMENSION(:), POINTER            :: evals_exp
757      COMPLEX(KIND=dp), DIMENSION(:, :), POINTER         :: diag_z
758      COMPLEX(KIND=dp), POINTER                          :: mii(:), mij(:), mjj(:)
759      INTEGER                                            :: dim2, handle, i, icol, idim, irow, &
760                                                            method, ncol_global, ncol_local, &
761                                                            norder, nrow_global, nrow_local, &
762                                                            nsquare, unit_nr
763      INTEGER, DIMENSION(:), POINTER                     :: col_indices, row_indices
764      LOGICAL                                            :: do_emd
765      REAL(KIND=dp)                                      :: eps_exp, limit_crazy_angle, maxeval, &
766                                                            norm, ra, rb, theta, tolerance, value
767      REAL(KIND=dp), DIMENSION(:), POINTER               :: evals
768      TYPE(cp_cfm_type), POINTER                         :: cmat_A, cmat_R, cmat_t1
769      TYPE(cp_fm_type), POINTER                          :: mat_R, mat_t, mat_theta, mat_U
770
771      CALL timeset(routineN, handle)
772      NULLIFY (row_indices, col_indices)
773      NULLIFY (mat_U, mat_t, mat_R)
774      NULLIFY (cmat_A, cmat_R, cmat_t1)
775      CALL cp_fm_get_info(zij(1, 1)%matrix, nrow_global=nrow_global, &
776                          ncol_global=ncol_global, &
777                          row_indices=row_indices, col_indices=col_indices, &
778                          nrow_local=nrow_local, ncol_local=ncol_local)
779
780      limit_crazy_angle = max_crazy_angle
781
782      NULLIFY (diag_z, evals, evals_exp, mii, mij, mjj)
783      dim2 = SIZE(zij, 2)
784      ALLOCATE (diag_z(nrow_global, dim2))
785      ALLOCATE (evals(nrow_global))
786      ALLOCATE (evals_exp(nrow_global))
787
788      CALL cp_cfm_create(cmat_A, zij(1, 1)%matrix%matrix_struct)
789      CALL cp_cfm_create(cmat_R, zij(1, 1)%matrix%matrix_struct)
790      CALL cp_cfm_create(cmat_t1, zij(1, 1)%matrix%matrix_struct)
791
792      CALL cp_fm_create(mat_U, zij(1, 1)%matrix%matrix_struct)
793      CALL cp_fm_create(mat_t, zij(1, 1)%matrix%matrix_struct)
794      CALL cp_fm_create(mat_R, zij(1, 1)%matrix%matrix_struct)
795
796      NULLIFY (mat_theta)
797      CALL cp_fm_create(mat_theta, zij(1, 1)%matrix%matrix_struct)
798
799      CALL cp_fm_set_all(mat_R, 0.0_dp, 1.0_dp)
800      CALL cp_fm_set_all(mat_t, 0.0_dp)
801      ALLOCATE (mii(dim2), mij(dim2), mjj(dim2))
802      DO idim = 1, dim2
803         CALL cp_fm_scale_and_add(1.0_dp, mat_t, weights(idim), zij(1, idim)%matrix)
804         CALL cp_fm_scale_and_add(1.0_dp, mat_t, weights(idim), zij(2, idim)%matrix)
805      ENDDO
806      CALL cp_fm_syevd(mat_t, mat_U, evals)
807      DO idim = 1, dim2
808         ! rotate z's
809         CALL cp_gemm('N', 'N', nrow_global, nrow_global, nrow_global, 1.0_dp, zij(1, idim)%matrix, mat_U, 0.0_dp, mat_t)
810         CALL cp_gemm('T', 'N', nrow_global, nrow_global, nrow_global, 1.0_dp, mat_U, mat_t, 0.0_dp, zij(1, idim)%matrix)
811         CALL cp_gemm('N', 'N', nrow_global, nrow_global, nrow_global, 1.0_dp, zij(2, idim)%matrix, mat_U, 0.0_dp, mat_t)
812         CALL cp_gemm('T', 'N', nrow_global, nrow_global, nrow_global, 1.0_dp, mat_U, mat_t, 0.0_dp, zij(2, idim)%matrix)
813      ENDDO
814      ! collect rotation matrix
815      CALL cp_gemm('N', 'N', nrow_global, nrow_global, nrow_global, 1.0_dp, mat_R, mat_U, 0.0_dp, mat_t)
816      CALL cp_fm_to_fm(mat_t, mat_R)
817
818      unit_nr = -1
819      IF (cmat_A%matrix_struct%para_env%mepos .EQ. cmat_A%matrix_struct%para_env%source) THEN
820         unit_nr = cp_logger_get_default_unit_nr()
821         WRITE (unit_nr, '(T2,A7,A6,1X,A20,A12,A12,A12)') &
822            "CRAZY| ", "Iter", "value    ", "gradient", "Max. eval", "limit"
823      ENDIF
824
825      iterations = 0
826      tolerance = 1.0_dp
827
828      DO
829         iterations = iterations + 1
830         DO idim = 1, dim2
831            DO i = 1, nrow_global
832               CALL cp_fm_get_element(zij(1, idim)%matrix, i, i, ra)
833               CALL cp_fm_get_element(zij(2, idim)%matrix, i, i, rb)
834               diag_z(i, idim) = CMPLX(ra, rb, dp)
835            ENDDO
836         ENDDO
837         DO irow = 1, nrow_local
838            DO icol = 1, ncol_local
839               DO idim = 1, dim2
840                  ra = zij(1, idim)%matrix%local_data(irow, icol)
841                  rb = zij(2, idim)%matrix%local_data(irow, icol)
842                  mij(idim) = CMPLX(ra, rb, dp)
843                  mii(idim) = diag_z(row_indices(irow), idim)
844                  mjj(idim) = diag_z(col_indices(icol), idim)
845               ENDDO
846               IF (row_indices(irow) .NE. col_indices(icol)) THEN
847                  CALL get_angle(mii, mjj, mij, weights, theta)
848                  theta = crazy_scale*theta
849                  IF (theta .GT. limit_crazy_angle) theta = limit_crazy_angle
850                  IF (theta .LT. -limit_crazy_angle) theta = -limit_crazy_angle
851                  IF (crazy_use_diag) THEN
852                     cmat_A%local_data(irow, icol) = -CMPLX(0.0_dp, theta, dp)
853                  ELSE
854                     mat_theta%local_data(irow, icol) = -theta
855                  ENDIF
856               ELSE
857                  IF (crazy_use_diag) THEN
858                     cmat_A%local_data(irow, icol) = czero
859                  ELSE
860                     mat_theta%local_data(irow, icol) = 0.0_dp
861                  ENDIF
862               ENDIF
863            ENDDO
864         ENDDO
865
866         ! construct rotation matrix U based on A using diagonalization
867         ! alternatively, exp based on repeated squaring could be faster
868         IF (crazy_use_diag) THEN
869            CALL cp_cfm_heevd(cmat_A, cmat_R, evals)
870            maxeval = MAXVAL(ABS(evals))
871            evals_exp(:) = EXP((0.0_dp, -1.0_dp)*evals(:))
872            CALL cp_cfm_to_cfm(cmat_R, cmat_t1)
873            CALL cp_cfm_column_scale(cmat_t1, evals_exp)
874            CALL cp_cfm_gemm('N', 'C', nrow_global, nrow_global, nrow_global, cone, &
875                             cmat_t1, cmat_R, czero, cmat_A)
876            mat_U%local_data = REAL(cmat_A%local_data, KIND=dp) ! U is a real matrix
877         ELSE
878            do_emd = .FALSE.
879            method = 2
880            eps_exp = 1.0_dp*EPSILON(eps_exp)
881            CALL cp_fm_maxabsrownorm(mat_theta, norm)
882            maxeval = norm ! an upper bound
883            CALL get_nsquare_norder(norm, nsquare, norder, eps_exp, method, do_emd)
884            CALL exp_pade_real(mat_U, mat_theta, nsquare, norder)
885         ENDIF
886
887         DO idim = 1, dim2
888            ! rotate z's
889            CALL cp_gemm('N', 'N', nrow_global, nrow_global, nrow_global, 1.0_dp, zij(1, idim)%matrix, mat_U, 0.0_dp, mat_t)
890            CALL cp_gemm('T', 'N', nrow_global, nrow_global, nrow_global, 1.0_dp, mat_U, mat_t, 0.0_dp, zij(1, idim)%matrix)
891            CALL cp_gemm('N', 'N', nrow_global, nrow_global, nrow_global, 1.0_dp, zij(2, idim)%matrix, mat_U, 0.0_dp, mat_t)
892            CALL cp_gemm('T', 'N', nrow_global, nrow_global, nrow_global, 1.0_dp, mat_U, mat_t, 0.0_dp, zij(2, idim)%matrix)
893         ENDDO
894         ! collect rotation matrix
895         CALL cp_gemm('N', 'N', nrow_global, nrow_global, nrow_global, 1.0_dp, mat_R, mat_U, 0.0_dp, mat_t)
896         CALL cp_fm_to_fm(mat_t, mat_R)
897
898         CALL check_tolerance_new(weights, zij, tolerance, value)
899
900         IF (unit_nr > 0) THEN
901            WRITE (unit_nr, '(T2,A7,I6,1X,G20.15,E12.4,E12.4,E12.4)') &
902               "CRAZY| ", iterations, value, tolerance, maxeval, limit_crazy_angle
903            CALL m_flush(unit_nr)
904         ENDIF
905         IF (tolerance .LT. eps_localization .OR. iterations .GE. max_iter) EXIT
906      ENDDO
907
908      IF (PRESENT(converged)) converged = (tolerance .LT. eps_localization)
909
910      CALL cp_cfm_release(cmat_A)
911      CALL cp_cfm_release(cmat_R)
912      CALL cp_cfm_release(cmat_T1)
913
914      CALL cp_fm_release(mat_U)
915      CALL cp_fm_release(mat_T)
916      CALL cp_fm_release(mat_theta)
917
918      CALL rotate_orbitals(mat_R, vectors)
919
920      CALL cp_fm_release(mat_R)
921      DEALLOCATE (evals_exp, evals, diag_z)
922      DEALLOCATE (mii, mij, mjj)
923
924      CALL timestop(handle)
925
926   END SUBROUTINE crazy_rotations
927
928! **************************************************************************************************
929!> \brief use the exponential parametrization as described in to perform a direct mini
930!>        Gerd Berghold et al. PRB 61 (15), pag. 10040 (2000)
931!> none of the input is modified for the time being, just finds the rotations
932!> that minimizes, and throws it away afterwards :-)
933!> apart from being expensive and not cleaned, this works fine
934!> useful to try different spread functionals
935!> \param weights ...
936!> \param zij ...
937!> \param vectors ...
938!> \param max_iter ...
939!> \param eps_localization ...
940!> \param iterations ...
941! **************************************************************************************************
942   SUBROUTINE direct_mini(weights, zij, vectors, max_iter, eps_localization, iterations)
943      REAL(KIND=dp), INTENT(IN)                          :: weights(:)
944      TYPE(cp_fm_p_type), INTENT(INOUT)                  :: ZIJ(:, :)
945      TYPE(cp_fm_type), POINTER                          :: vectors
946      INTEGER, INTENT(IN)                                :: max_iter
947      REAL(KIND=dp), INTENT(IN)                          :: eps_localization
948      INTEGER                                            :: iterations
949
950      CHARACTER(len=*), PARAMETER :: routineN = 'direct_mini', routineP = moduleN//':'//routineN
951      COMPLEX(KIND=dp), PARAMETER                        :: cone = (1.0_dp, 0.0_dp), &
952                                                            czero = (0.0_dp, 0.0_dp)
953      REAL(KIND=dp), PARAMETER                           :: gold_sec = 0.3819_dp
954
955      COMPLEX(KIND=dp)                                   :: lk, ll, tmp
956      COMPLEX(KIND=dp), DIMENSION(:), POINTER            :: evals_exp
957      COMPLEX(KIND=dp), DIMENSION(:, :), POINTER         :: diag_z
958      INTEGER                                            :: handle, i, icol, idim, irow, &
959                                                            line_search_count, line_searches, lsl, &
960                                                            lsm, lsr, n, ncol_local, ndim, &
961                                                            nrow_local, output_unit
962      INTEGER, DIMENSION(:), POINTER                     :: col_indices, row_indices
963      LOGICAL                                            :: new_direction
964      REAL(KIND=dp)                                      :: a, b, beta_pr, c, denom, ds, ds_min, fa, &
965                                                            fb, fc, nom, normg, normg_cross, &
966                                                            normg_old, npos, omega, tol, val, x0, &
967                                                            x1, xa, xb, xc
968      REAL(KIND=dp), DIMENSION(150)                      :: energy, grad, pos
969      REAL(KIND=dp), DIMENSION(:), POINTER               :: evals, fval, fvald
970      TYPE(cp_cfm_p_type), DIMENSION(:), POINTER         :: c_zij
971      TYPE(cp_cfm_type), POINTER                         :: cmat_A, cmat_B, cmat_M, cmat_R, cmat_t1, &
972                                                            cmat_t2, cmat_U
973      TYPE(cp_fm_type), POINTER                          :: matrix_A, matrix_G, matrix_G_old, &
974                                                            matrix_G_search, matrix_H, matrix_R, &
975                                                            matrix_T
976
977      NULLIFY (evals, evals_exp, diag_z, fval, fvald, c_zij)
978      NULLIFY (matrix_A, matrix_G, matrix_T, matrix_G_search, matrix_G_old)
979      NULLIFY (cmat_A, cmat_U, cmat_R, cmat_t1, cmat_t2, cmat_B, cmat_M)
980
981      CALL timeset(routineN, handle)
982      output_unit = cp_logger_get_default_io_unit()
983
984      n = zij(1, 1)%matrix%matrix_struct%nrow_global
985      ndim = (SIZE(zij, 2))
986
987      IF (output_unit > 0) THEN
988         WRITE (output_unit, '(T4,A )') "Localization by direct minimization of the functional; "
989         WRITE (output_unit, '(T5,2A13,A20,A20,A10 )') " Line search ", " Iteration ", " Functional ", " Tolerance ", " ds Min "
990      END IF
991
992      ALLOCATE (evals(n), evals_exp(n), diag_z(n, ndim), fval(n), fvald(n))
993      ALLOCATE (c_zij(ndim))
994
995      ! create the three complex matrices Z
996      DO idim = 1, ndim
997         NULLIFY (c_zij(idim)%matrix)
998         CALL cp_cfm_create(c_zij(idim)%matrix, zij(1, 1)%matrix%matrix_struct)
999         c_zij(idim)%matrix%local_data = CMPLX(zij(1, idim)%matrix%local_data, &
1000                                               zij(2, idim)%matrix%local_data, dp)
1001      ENDDO
1002
1003      CALL cp_fm_create(matrix_A, zij(1, 1)%matrix%matrix_struct)
1004      CALL cp_fm_create(matrix_G, zij(1, 1)%matrix%matrix_struct)
1005      CALL cp_fm_create(matrix_T, zij(1, 1)%matrix%matrix_struct)
1006      CALL cp_fm_create(matrix_H, zij(1, 1)%matrix%matrix_struct)
1007      CALL cp_fm_create(matrix_G_search, zij(1, 1)%matrix%matrix_struct)
1008      CALL cp_fm_create(matrix_G_old, zij(1, 1)%matrix%matrix_struct)
1009      CALL cp_fm_create(matrix_R, zij(1, 1)%matrix%matrix_struct)
1010      CALL cp_fm_set_all(matrix_R, 0.0_dp, 1.0_dp)
1011
1012      CALL cp_fm_set_all(matrix_A, 0.0_dp)
1013!    CALL cp_fm_init_random ( matrix_A )
1014
1015      CALL cp_cfm_create(cmat_A, zij(1, 1)%matrix%matrix_struct)
1016      CALL cp_cfm_create(cmat_U, zij(1, 1)%matrix%matrix_struct)
1017      CALL cp_cfm_create(cmat_R, zij(1, 1)%matrix%matrix_struct)
1018      CALL cp_cfm_create(cmat_t1, zij(1, 1)%matrix%matrix_struct)
1019      CALL cp_cfm_create(cmat_t2, zij(1, 1)%matrix%matrix_struct)
1020      CALL cp_cfm_create(cmat_B, zij(1, 1)%matrix%matrix_struct)
1021      CALL cp_cfm_create(cmat_M, zij(1, 1)%matrix%matrix_struct)
1022
1023      CALL cp_cfm_get_info(cmat_B, nrow_local=nrow_local, ncol_local=ncol_local, &
1024                           row_indices=row_indices, col_indices=col_indices)
1025
1026      CALL cp_fm_set_all(matrix_G_old, 0.0_dp)
1027      CALL cp_fm_set_all(matrix_G_search, 0.0_dp)
1028      normg_old = 1.0E30_dp
1029      ds_min = 1.0_dp
1030      new_direction = .TRUE.
1031      Iterations = 0
1032      line_searches = 0
1033      line_search_count = 0
1034      DO
1035         iterations = iterations + 1
1036         ! compute U,R,evals given A
1037         cmat_A%local_data = CMPLX(0.0_dp, matrix_A%local_data, dp) ! cmat_A is hermitian, evals are reals
1038         CALL cp_cfm_heevd(cmat_A, cmat_R, evals)
1039         evals_exp(:) = EXP((0.0_dp, -1.0_dp)*evals(:))
1040         CALL cp_cfm_to_cfm(cmat_R, cmat_t1)
1041         CALL cp_cfm_column_scale(cmat_t1, evals_exp)
1042         CALL cp_cfm_gemm('N', 'C', n, n, n, cone, cmat_t1, cmat_R, czero, cmat_U)
1043         cmat_U%local_data = REAL(cmat_U%local_data, KIND=dp) ! enforce numerics, U is a real matrix
1044
1045         IF (new_direction .AND. MOD(line_searches, 20) .EQ. 5) THEN ! reset with A .eq. 0
1046            DO idim = 1, ndim
1047               CALL cp_cfm_gemm('N', 'N', n, n, n, cone, c_zij(idim)%matrix, cmat_U, czero, cmat_t1)
1048               CALL cp_cfm_gemm('C', 'N', n, n, n, cone, cmat_U, cmat_t1, czero, c_zij(idim)%matrix)
1049            ENDDO
1050            ! collect rotation matrix
1051            matrix_H%local_data = REAL(cmat_U%local_data, KIND=dp)
1052            CALL cp_gemm('N', 'N', n, n, n, 1.0_dp, matrix_R, matrix_H, 0.0_dp, matrix_T)
1053            CALL cp_fm_to_fm(matrix_T, matrix_R)
1054
1055            CALL cp_cfm_set_all(cmat_U, czero, cone)
1056            CALL cp_cfm_set_all(cmat_R, czero, cone)
1057            CALL cp_cfm_set_all(cmat_A, czero)
1058            CALL cp_fm_set_all(matrix_A, 0.0_dp)
1059            evals(:) = 0.0_dp
1060            evals_exp(:) = EXP((0.0_dp, -1.0_dp)*evals(:))
1061            CALL cp_fm_set_all(matrix_G_old, 0.0_dp)
1062            CALL cp_fm_set_all(matrix_G_search, 0.0_dp)
1063            normg_old = 1.0E30_dp
1064         ENDIF
1065
1066         ! compute Omega and M
1067         CALL cp_cfm_set_all(cmat_M, czero)
1068         omega = 0.0_dp
1069         DO idim = 1, ndim
1070            CALL cp_cfm_gemm('N', 'N', n, n, n, cone, c_zij(idim)%matrix, cmat_U, czero, cmat_t1) ! t1=ZU
1071            CALL cp_cfm_gemm('C', 'N', n, n, n, cone, cmat_U, cmat_t1, czero, cmat_t2) ! t2=(U^T)ZU
1072            DO i = 1, n
1073               CALL cp_cfm_get_element(cmat_t2, i, i, diag_z(i, idim))
1074               SELECT CASE (2) ! allows for selection of different spread functionals
1075               CASE (1)
1076                  fval(i) = -weights(idim)*LOG(ABS(diag_z(i, idim))**2)
1077                  fvald(i) = -weights(idim)/(ABS(diag_z(i, idim))**2)
1078               CASE (2) ! corresponds to the jacobi setup
1079                  fval(i) = weights(idim) - weights(idim)*ABS(diag_z(i, idim))**2
1080                  fvald(i) = -weights(idim)
1081               END SELECT
1082               omega = omega + fval(i)
1083            ENDDO
1084            DO icol = 1, ncol_local
1085               DO irow = 1, nrow_local
1086                  tmp = cmat_t1%local_data(irow, icol)*CONJG(diag_z(col_indices(icol), idim))
1087                  cmat_M%local_data(irow, icol) = cmat_M%local_data(irow, icol) &
1088                                                  + 4.0_dp*fvald(col_indices(icol))*REAL(tmp, KIND=dp)
1089               ENDDO
1090            ENDDO
1091         ENDDO
1092
1093         ! compute Hessian diagonal approximation for the preconditioner
1094         IF (.TRUE.) THEN
1095            CALL gradsq_at_0(diag_z, weights, matrix_H, ndim)
1096         ELSE
1097            CALL cp_fm_set_all(matrix_H, 1.0_dp)
1098         ENDIF
1099
1100         ! compute B
1101         DO icol = 1, ncol_local
1102            DO irow = 1, nrow_local
1103               ll = (0.0_dp, -1.0_dp)*evals(row_indices(irow))
1104               lk = (0.0_dp, -1.0_dp)*evals(col_indices(icol))
1105               IF (ABS(ll - lk) .LT. 0.5_dp) THEN ! use a series expansion to avoid loss of precision
1106                  tmp = 1.0_dp
1107                  cmat_B%local_data(irow, icol) = 0.0_dp
1108                  DO i = 1, 16
1109                     cmat_B%local_data(irow, icol) = cmat_B%local_data(irow, icol) + tmp
1110                     tmp = tmp*(ll - lk)/(i + 1)
1111                  ENDDO
1112                  cmat_B%local_data(irow, icol) = cmat_B%local_data(irow, icol)*EXP(lk)
1113               ELSE
1114                  cmat_B%local_data(irow, icol) = (EXP(lk) - EXP(ll))/(lk - ll)
1115               ENDIF
1116            ENDDO
1117         ENDDO
1118         ! compute gradient matrix_G
1119
1120         CALL cp_cfm_gemm('C', 'N', n, n, n, cone, cmat_M, cmat_R, czero, cmat_t1) ! t1=(M^T)(R^T)
1121         CALL cp_cfm_gemm('C', 'N', n, n, n, cone, cmat_R, cmat_t1, czero, cmat_t2) ! t2=(R)t1
1122         CALL cp_cfm_schur_product(cmat_t2, cmat_B, cmat_t1)
1123         CALL cp_cfm_gemm('N', 'C', n, n, n, cone, cmat_t1, cmat_R, czero, cmat_t2)
1124         CALL cp_cfm_gemm('N', 'N', n, n, n, cone, cmat_R, cmat_t2, czero, cmat_t1)
1125         matrix_G%local_data = REAL(cmat_t1%local_data, KIND=dp)
1126         CALL cp_fm_transpose(matrix_G, matrix_T)
1127         CALL cp_fm_scale_and_add(-1.0_dp, matrix_G, 1.0_dp, matrix_T)
1128         CALL cp_fm_maxabsval(matrix_G, tol)
1129
1130         ! from here on, minimizing technology
1131         IF (new_direction) THEN
1132            ! energy converged up to machine precision ?
1133            line_searches = line_searches + 1
1134            ! DO i=1,line_search_count
1135            !   write(15,*) pos(i),energy(i)
1136            ! ENDDO
1137            ! write(15,*) ""
1138            ! CALL m_flush(15)
1139            !write(16,*) evals(:)
1140            !write(17,*) matrix_A%local_data(:,:)
1141            !write(18,*) matrix_G%local_data(:,:)
1142            IF (output_unit > 0) THEN
1143               WRITE (output_unit, '(T5,I10,T18,I10,T31,2F20.6,F10.3)') line_searches, Iterations, Omega, tol, ds_min
1144               CALL m_flush(output_unit)
1145            ENDIF
1146            IF (tol < eps_localization .OR. iterations > max_iter) EXIT
1147
1148            IF (.TRUE.) THEN ! do conjugate gradient CG
1149               CALL cp_fm_trace(matrix_G, matrix_G_old, normg_cross)
1150               normg_cross = normg_cross*0.5_dp ! takes into account the fact that A is antisymmetric
1151               ! apply the preconditioner
1152               DO icol = 1, ncol_local
1153                  DO irow = 1, nrow_local
1154                     matrix_G_old%local_data(irow, icol) = matrix_G%local_data(irow, icol)/matrix_H%local_data(irow, icol)
1155                  ENDDO
1156               ENDDO
1157               CALL cp_fm_trace(matrix_G, matrix_G_old, normg)
1158               normg = normg*0.5_dp
1159               beta_pr = (normg - normg_cross)/normg_old
1160               normg_old = normg
1161               beta_pr = MAX(beta_pr, 0.0_dp)
1162               CALL cp_fm_scale_and_add(beta_pr, matrix_G_search, -1.0_dp, matrix_G_old)
1163               CALL cp_fm_trace(matrix_G_search, matrix_G_old, normg_cross)
1164               IF (normg_cross .GE. 0) THEN ! back to SD
1165                  IF (matrix_A%matrix_struct%para_env%mepos .EQ. &
1166                      matrix_A%matrix_struct%para_env%source) THEN
1167                     WRITE (cp_logger_get_default_unit_nr(), *) "!"
1168                  ENDIF
1169                  beta_pr = 0.0_dp
1170                  CALL cp_fm_scale_and_add(beta_pr, matrix_G_search, -1.0_dp, matrix_G_old)
1171               ENDIF
1172            ELSE ! SD
1173               CALL cp_fm_scale_and_add(0.0_dp, matrix_G_search, -1.0_dp, matrix_G)
1174            ENDIF
1175            ! ds_min=1.0E-4_dp
1176            line_search_count = 0
1177         END IF
1178         line_search_count = line_search_count + 1
1179         energy(line_search_count) = Omega
1180
1181         ! line search section
1182         SELECT CASE (3)
1183         CASE (1) ! two point line search
1184            SELECT CASE (line_search_count)
1185            CASE (1)
1186               pos(1) = 0.0_dp
1187               pos(2) = ds_min
1188               CALL cp_fm_trace(matrix_G, matrix_G_search, grad(1))
1189               grad(1) = grad(1)/2.0_dp
1190               new_direction = .FALSE.
1191            CASE (2)
1192               new_direction = .TRUE.
1193               x0 = pos(1) ! 0.0_dp
1194               c = energy(1)
1195               b = grad(1)
1196               x1 = pos(2)
1197               a = (energy(2) - b*x1 - c)/(x1**2)
1198               IF (a .LE. 0.0_dp) a = 1.0E-15_dp
1199               npos = -b/(2.0_dp*a)
1200               val = a*npos**2 + b*npos + c
1201               IF (val .LT. energy(1) .AND. val .LE. energy(2)) THEN
1202                  ! we go to a minimum, but ...
1203                  ! we take a guard against too large steps
1204                  pos(3) = MIN(npos, MAXVAL(pos(1:2))*4.0_dp)
1205               ELSE ! just take an extended step
1206                  pos(3) = MAXVAL(pos(1:2))*2.0_dp
1207               ENDIF
1208            END SELECT
1209         CASE (2) ! 3 point line search
1210            SELECT CASE (line_search_count)
1211            CASE (1)
1212               new_direction = .FALSE.
1213               pos(1) = 0.0_dp
1214               pos(2) = ds_min*0.8_dp
1215            CASE (2)
1216               new_direction = .FALSE.
1217               IF (energy(2) .GT. energy(1)) THEN
1218                  pos(3) = ds_min*0.7_dp
1219               ELSE
1220                  pos(3) = ds_min*1.4_dp
1221               ENDIF
1222            CASE (3)
1223               new_direction = .TRUE.
1224               xa = pos(1)
1225               xb = pos(2)
1226               xc = pos(3)
1227               fa = energy(1)
1228               fb = energy(2)
1229               fc = energy(3)
1230               nom = (xb - xa)**2*(fb - fc) - (xb - xc)**2*(fb - fa)
1231               denom = (xb - xa)*(fb - fc) - (xb - xc)*(fb - fa)
1232               IF (ABS(denom) .LE. 1.0E-18_dp*MAX(ABS(fb - fc), ABS(fb - fa))) THEN
1233                  npos = xb
1234               ELSE
1235                  npos = xb - 0.5_dp*nom/denom ! position of the stationary point
1236               ENDIF
1237               val = (npos - xa)*(npos - xb)*fc/((xc - xa)*(xc - xb)) + &
1238                     (npos - xb)*(npos - xc)*fa/((xa - xb)*(xa - xc)) + &
1239                     (npos - xc)*(npos - xa)*fb/((xb - xc)*(xb - xa))
1240               IF (val .LT. fa .AND. val .LE. fb .AND. val .LE. fc) THEN ! OK, we go to a minimum
1241                  ! we take a guard against too large steps
1242                  pos(4) = MAX(MAXVAL(pos(1:3))*0.01_dp, &
1243                               MIN(npos, MAXVAL(pos(1:3))*4.0_dp))
1244               ELSE ! just take an extended step
1245                  pos(4) = MAXVAL(pos(1:3))*2.0_dp
1246               ENDIF
1247            END SELECT
1248         CASE (3) ! golden section hunt
1249            new_direction = .FALSE.
1250            IF (line_search_count .EQ. 1) THEN
1251               lsl = 1
1252               lsr = 0
1253               lsm = 1
1254               pos(1) = 0.0_dp
1255               pos(2) = ds_min/gold_sec
1256            ELSE
1257               IF (line_search_count .EQ. 150) CPABORT("Too many")
1258               IF (lsr .EQ. 0) THEN
1259                  IF (energy(line_search_count - 1) .LT. energy(line_search_count)) THEN
1260                     lsr = line_search_count
1261                     pos(line_search_count + 1) = pos(lsm) + (pos(lsr) - pos(lsm))*gold_sec
1262                  ELSE
1263                     lsl = lsm
1264                     lsm = line_search_count
1265                     pos(line_search_count + 1) = pos(line_search_count)/gold_sec
1266                  ENDIF
1267               ELSE
1268                  IF (pos(line_search_count) .LT. pos(lsm)) THEN
1269                     IF (energy(line_search_count) .LT. energy(lsm)) THEN
1270                        lsr = lsm
1271                        lsm = line_search_count
1272                     ELSE
1273                        lsl = line_search_count
1274                     ENDIF
1275                  ELSE
1276                     IF (energy(line_search_count) .LT. energy(lsm)) THEN
1277                        lsl = lsm
1278                        lsm = line_search_count
1279                     ELSE
1280                        lsr = line_search_count
1281                     ENDIF
1282                  ENDIF
1283                  IF (pos(lsr) - pos(lsm) .GT. pos(lsm) - pos(lsl)) THEN
1284                     pos(line_search_count + 1) = pos(lsm) + gold_sec*(pos(lsr) - pos(lsm))
1285                  ELSE
1286                     pos(line_search_count + 1) = pos(lsl) + gold_sec*(pos(lsm) - pos(lsl))
1287                  ENDIF
1288                  IF ((pos(lsr) - pos(lsl)) .LT. 1.0E-3_dp*pos(lsr)) THEN
1289                     new_direction = .TRUE.
1290                  ENDIF
1291               ENDIF ! lsr .eq. 0
1292            ENDIF ! first step
1293         END SELECT
1294         ! now go to the suggested point
1295         ds_min = pos(line_search_count + 1)
1296         ds = pos(line_search_count + 1) - pos(line_search_count)
1297         CALL cp_fm_scale_and_add(1.0_dp, matrix_A, ds, matrix_G_search)
1298      ENDDO
1299
1300      ! collect rotation matrix
1301      matrix_H%local_data = REAL(cmat_U%local_data, KIND=dp)
1302      CALL cp_gemm('N', 'N', n, n, n, 1.0_dp, matrix_R, matrix_H, 0.0_dp, matrix_T)
1303      CALL cp_fm_to_fm(matrix_T, matrix_R)
1304      CALL rotate_orbitals(matrix_R, vectors)
1305      CALL cp_fm_release(matrix_R)
1306
1307      CALL cp_fm_release(matrix_A)
1308      CALL cp_fm_release(matrix_G)
1309      CALL cp_fm_release(matrix_H)
1310      CALL cp_fm_release(matrix_T)
1311      CALL cp_fm_release(matrix_G_search)
1312      CALL cp_fm_release(matrix_G_old)
1313      CALL cp_cfm_release(cmat_A)
1314      CALL cp_cfm_release(cmat_U)
1315      CALL cp_cfm_release(cmat_R)
1316      CALL cp_cfm_release(cmat_t1)
1317      CALL cp_cfm_release(cmat_t2)
1318      CALL cp_cfm_release(cmat_B)
1319      CALL cp_cfm_release(cmat_M)
1320
1321      DEALLOCATE (evals, evals_exp, fval, fvald)
1322
1323      DO idim = 1, SIZE(c_zij)
1324         zij(1, idim)%matrix%local_data = REAL(c_zij(idim)%matrix%local_data, dp)
1325         zij(2, idim)%matrix%local_data = AIMAG(c_zij(idim)%matrix%local_data)
1326         CALL cp_cfm_release(c_zij(idim)%matrix)
1327      ENDDO
1328      DEALLOCATE (c_zij)
1329      DEALLOCATE (diag_z)
1330
1331      CALL timestop(handle)
1332
1333   END SUBROUTINE
1334
1335! **************************************************************************************************
1336!> \brief Parallel algorithm for jacobi rotations
1337!> \param weights ...
1338!> \param zij ...
1339!> \param vectors ...
1340!> \param para_env ...
1341!> \param max_iter ...
1342!> \param eps_localization ...
1343!> \param sweeps ...
1344!> \param out_each ...
1345!> \param target_time ...
1346!> \param start_time ...
1347!> \par History
1348!>      use allgather for improved performance
1349!> \author MI (11.2009)
1350! **************************************************************************************************
1351   SUBROUTINE jacobi_rot_para(weights, zij, vectors, para_env, max_iter, eps_localization, &
1352                              sweeps, out_each, target_time, start_time)
1353
1354      REAL(KIND=dp), INTENT(IN)                          :: weights(:)
1355      TYPE(cp_fm_p_type), INTENT(INOUT)                  :: ZIJ(:, :)
1356      TYPE(cp_fm_type), POINTER                          :: vectors
1357      TYPE(cp_para_env_type), POINTER                    :: para_env
1358      INTEGER, INTENT(IN)                                :: max_iter
1359      REAL(KIND=dp), INTENT(IN)                          :: eps_localization
1360      INTEGER                                            :: sweeps
1361      INTEGER, INTENT(IN)                                :: out_each
1362      REAL(dp)                                           :: target_time, start_time
1363
1364      CHARACTER(len=*), PARAMETER :: routineN = 'jacobi_rot_para', &
1365         routineP = moduleN//':'//routineN
1366
1367      COMPLEX(KIND=dp)                                   :: zi, zj
1368      COMPLEX(KIND=dp), ALLOCATABLE, DIMENSION(:, :, :)  :: c_array_me, c_array_partner
1369      COMPLEX(KIND=dp), POINTER                          :: mii(:), mij(:), mjj(:)
1370      INTEGER :: dim2, handle, i, idim, ii, ik, il1, il2, il_recv, il_recv_partner, ilow1, ilow2, &
1371         ip, ip_has_i, ip_partner, ip_recv_from, ip_recv_partner, ipair, iperm, istat, istate, &
1372         iu1, iu2, iup1, iup2, j, jj, jstate, k, kk, n1, n2, nblock, nblock_max, npair, nperm, &
1373         ns_me, ns_partner, ns_recv_from, ns_recv_partner, nstate, output_unit
1374      INTEGER, ALLOCATABLE, DIMENSION(:)                 :: rcount, rdispl
1375      INTEGER, ALLOCATABLE, DIMENSION(:, :)              :: list_pair, ns_bound
1376      LOGICAL                                            :: should_stop
1377      REAL(dp), ALLOCATABLE, DIMENSION(:, :)             :: gmat, rmat_loc, rmat_recv, rmat_send, &
1378                                                            rotmat, z_ij_loc_im, z_ij_loc_re
1379      REAL(dp), ALLOCATABLE, DIMENSION(:, :, :)          :: rmat_recv_all
1380      REAL(KIND=dp)                                      :: ct, func, gmax, grad, ri, rj, st, t1, &
1381                                                            t2, theta, tolerance, xlow, xstate, &
1382                                                            xup, zc, zr
1383      TYPE(cp_fm_type), POINTER                          :: rmat
1384      TYPE(set_c_1d_type), DIMENSION(:), POINTER         :: zdiag_all, zdiag_me
1385      TYPE(set_c_2d_type), DIMENSION(:), POINTER         :: cz_ij_loc, xyz_mix, xyz_mix_ns
1386
1387      CALL timeset(routineN, handle)
1388
1389      output_unit = cp_logger_get_default_io_unit()
1390
1391      NULLIFY (rmat, cz_ij_loc, zdiag_all, zdiag_me)
1392      NULLIFY (xyz_mix, xyz_mix_ns)
1393      NULLIFY (mii, mij, mjj)
1394
1395      dim2 = SIZE(zij, 2)
1396      ALLOCATE (mii(dim2), mij(dim2), mjj(dim2))
1397
1398      CALL cp_fm_create(rmat, zij(1, 1)%matrix%matrix_struct)
1399      CALL cp_fm_set_all(rmat, 0._dp, 1._dp)
1400
1401      CALL cp_fm_get_info(rmat, nrow_global=nstate)
1402
1403      ALLOCATE (rcount(para_env%num_pe), STAT=istat)
1404      ALLOCATE (rdispl(para_env%num_pe), STAT=istat)
1405
1406      tolerance = 1.0e10_dp
1407      sweeps = 0
1408
1409      ! number of processor pairs and number of permutations
1410      npair = (para_env%num_pe + 1)/2
1411      nperm = para_env%num_pe - MOD(para_env%num_pe + 1, 2)
1412      ALLOCATE (list_pair(2, npair))
1413
1414      ! Distribution of the states (XXXXX safe against more pe than states ??? XXXXX)
1415      xstate = REAL(nstate, dp)/REAL(para_env%num_pe, dp)
1416      nblock_max = 0
1417      ALLOCATE (ns_bound(0:para_env%num_pe - 1, 2))
1418      Xlow = 0.0D0
1419      Xup = 0.0D0
1420      DO ip = 1, para_env%num_pe
1421         xup = xlow + xstate
1422         ns_bound(ip - 1, 1) = NINT(xlow) + 1
1423         ns_bound(ip - 1, 2) = NINT(xup)
1424         IF (NINT(xup) .GT. nstate) THEN
1425            ns_bound(ip - 1, 2) = nstate
1426         ENDIF
1427         IF (NINT(xlow) .GT. nstate) THEN
1428            ns_bound(ip - 1, 1) = nstate + 1
1429         ENDIF
1430         xlow = xup
1431      ENDDO
1432      DO ip = 0, para_env%num_pe - 1
1433         nblock = ns_bound(ip, 2) - ns_bound(ip, 1) + 1
1434         nblock_max = MAX(nblock_max, nblock)
1435      ENDDO
1436
1437      ! otbtain local part of the matrix (could be made faster, but is likely irrelevant).
1438      ALLOCATE (z_ij_loc_re(nstate, nblock_max))
1439      ALLOCATE (z_ij_loc_im(nstate, nblock_max))
1440      ALLOCATE (cz_ij_loc(dim2))
1441      DO idim = 1, dim2
1442         DO ip = 0, para_env%num_pe - 1
1443            nblock = ns_bound(ip, 2) - ns_bound(ip, 1) + 1
1444            CALL cp_fm_get_submatrix(zij(1, idim)%matrix, z_ij_loc_re, 1, ns_bound(ip, 1), nstate, nblock)
1445            CALL cp_fm_get_submatrix(zij(2, idim)%matrix, z_ij_loc_im, 1, ns_bound(ip, 1), nstate, nblock)
1446            IF (para_env%mepos == ip) THEN
1447               ns_me = nblock
1448               ALLOCATE (cz_ij_loc(idim)%c_array(nstate, ns_me))
1449               DO i = 1, ns_me
1450                  DO j = 1, nstate
1451                     cz_ij_loc(idim)%c_array(j, i) = CMPLX(z_ij_loc_re(j, i), z_ij_loc_im(j, i), dp)
1452                  END DO
1453               END DO
1454            END IF
1455         END DO ! ip
1456      END DO
1457      DEALLOCATE (z_ij_loc_re)
1458      DEALLOCATE (z_ij_loc_im)
1459
1460      ! initialize rotation matrix
1461      ALLOCATE (rotmat(nstate, 2*nblock_max))
1462      rotmat = 0.0_dp
1463      DO i = ns_bound(para_env%mepos, 1), ns_bound(para_env%mepos, 2)
1464         ii = i - ns_bound(para_env%mepos, 1) + 1
1465         rotmat(i, ii) = 1.0_dp
1466      END DO
1467
1468      ALLOCATE (xyz_mix(dim2))
1469      ALLOCATE (xyz_mix_ns(dim2))
1470      ALLOCATE (zdiag_me(dim2))
1471      ALLOCATE (zdiag_all(dim2))
1472
1473      ns_me = ns_bound(para_env%mepos, 2) - ns_bound(para_env%mepos, 1) + 1
1474      IF (ns_me /= 0) THEN
1475         ALLOCATE (c_array_me(nstate, ns_me, dim2))
1476         DO idim = 1, dim2
1477            ALLOCATE (xyz_mix_ns(idim)%c_array(nstate, ns_me))
1478         END DO
1479         ALLOCATE (gmat(nstate, ns_me))
1480      END IF
1481
1482      DO idim = 1, dim2
1483         ALLOCATE (zdiag_me(idim)%c_array(nblock_max))
1484         zdiag_me(idim)%c_array = CMPLX(0.0_dp, 0.0_dp, dp)
1485         ALLOCATE (zdiag_all(idim)%c_array(para_env%num_pe*nblock_max))
1486         zdiag_all(idim)%c_array = CMPLX(0.0_dp, 0.0_dp, dp)
1487      END DO
1488      ALLOCATE (rmat_recv(nblock_max*2, nblock_max))
1489      ALLOCATE (rmat_send(nblock_max*2, nblock_max))
1490
1491      ! buffer for message passing
1492      ALLOCATE (rmat_recv_all(nblock_max*2, nblock_max, 0:para_env%num_pe - 1))
1493
1494      IF (output_unit > 0) THEN
1495         WRITE (output_unit, '(T4,A )') " Localization by iterative distributed Jacobi rotation"
1496         WRITE (output_unit, '(T20,A12,T32, A22,T60, A12,A8 )') "Iteration", "Functional", "Tolerance", " Time "
1497      END IF
1498
1499      DO sweeps = 1, max_iter + 1
1500         IF (sweeps == max_iter + 1) THEN
1501            IF (output_unit > 0) THEN
1502               WRITE (output_unit, *) ' LOCALIZATION! loop did not converge within the maximum number of iterations.'
1503               WRITE (output_unit, *) '               Present  Max. gradient = ', tolerance
1504            END IF
1505            EXIT
1506         ENDIF
1507         t1 = m_walltime()
1508
1509         DO iperm = 1, nperm
1510
1511            ! fix partners for this permutation, and get the number of states
1512            CALL eberlein(iperm, para_env, list_pair)
1513            ip_partner = -1
1514            ns_partner = 0
1515            DO ipair = 1, npair
1516               IF (list_pair(1, ipair) == para_env%mepos) THEN
1517                  ip_partner = list_pair(2, ipair)
1518                  EXIT
1519               ELSE IF (list_pair(2, ipair) == para_env%mepos) THEN
1520                  ip_partner = list_pair(1, ipair)
1521                  EXIT
1522               END IF
1523            END DO
1524            IF (ip_partner >= 0) THEN
1525               ns_partner = ns_bound(ip_partner, 2) - ns_bound(ip_partner, 1) + 1
1526            ELSE
1527               ns_partner = 0
1528            END IF
1529
1530            ! if there is a non-zero block connecting two partners, jacobi-sweep it.
1531            IF (ns_partner*ns_me /= 0) THEN
1532
1533               ALLOCATE (rmat_loc(ns_me + ns_partner, ns_me + ns_partner))
1534               rmat_loc = 0.0_dp
1535               DO i = 1, ns_me + ns_partner
1536                  rmat_loc(i, i) = 1.0_dp
1537               END DO
1538
1539               ALLOCATE (c_array_partner(nstate, ns_partner, dim2))
1540
1541               DO idim = 1, dim2
1542                  ALLOCATE (xyz_mix(idim)%c_array(ns_me + ns_partner, ns_me + ns_partner))
1543                  DO i = 1, ns_me
1544                     c_array_me(1:nstate, i, idim) = cz_ij_loc(idim)%c_array(1:nstate, i)
1545                  END DO
1546               END DO
1547
1548               CALL mp_sendrecv(msgin=c_array_me, dest=ip_partner, &
1549                                msgout=c_array_partner, source=ip_partner, comm=para_env%group)
1550
1551               n1 = ns_me
1552               n2 = ns_partner
1553               ilow1 = ns_bound(para_env%mepos, 1)
1554               iup1 = ns_bound(para_env%mepos, 1) + n1 - 1
1555               ilow2 = ns_bound(ip_partner, 1)
1556               iup2 = ns_bound(ip_partner, 1) + n2 - 1
1557               IF (ns_bound(para_env%mepos, 1) < ns_bound(ip_partner, 1)) THEN
1558                  il1 = 1
1559                  iu1 = n1
1560                  iu1 = n1
1561                  il2 = 1 + n1
1562                  iu2 = n1 + n2
1563               ELSE
1564                  il1 = 1 + n2
1565                  iu1 = n1 + n2
1566                  iu1 = n1 + n2
1567                  il2 = 1
1568                  iu2 = n2
1569               END IF
1570
1571               DO idim = 1, dim2
1572                  DO i = 1, n1
1573                     xyz_mix(idim)%c_array(il1:iu1, il1 + i - 1) = c_array_me(ilow1:iup1, i, idim)
1574                     xyz_mix(idim)%c_array(il2:iu2, il1 + i - 1) = c_array_me(ilow2:iup2, i, idim)
1575                  END DO
1576                  DO i = 1, n2
1577                     xyz_mix(idim)%c_array(il2:iu2, il2 + i - 1) = c_array_partner(ilow2:iup2, i, idim)
1578                     xyz_mix(idim)%c_array(il1:iu1, il2 + i - 1) = c_array_partner(ilow1:iup1, i, idim)
1579                  END DO
1580               END DO
1581
1582               DO istate = 1, n1 + n2
1583                  DO jstate = istate + 1, n1 + n2
1584                     DO idim = 1, dim2
1585                        mii(idim) = xyz_mix(idim)%c_array(istate, istate)
1586                        mij(idim) = xyz_mix(idim)%c_array(istate, jstate)
1587                        mjj(idim) = xyz_mix(idim)%c_array(jstate, jstate)
1588                     END DO
1589                     CALL get_angle(mii, mjj, mij, weights, theta)
1590                     st = SIN(theta)
1591                     ct = COS(theta)
1592                     DO idim = 1, dim2
1593                        DO i = 1, n1 + n2
1594                           zi = ct*xyz_mix(idim)%c_array(i, istate) + st*xyz_mix(idim)%c_array(i, jstate)
1595                           zj = -st*xyz_mix(idim)%c_array(i, istate) + ct*xyz_mix(idim)%c_array(i, jstate)
1596                           xyz_mix(idim)%c_array(i, istate) = zi
1597                           xyz_mix(idim)%c_array(i, jstate) = zj
1598                        END DO
1599                        DO i = 1, n1 + n2
1600                           zi = ct*xyz_mix(idim)%c_array(istate, i) + st*xyz_mix(idim)%c_array(jstate, i)
1601                           zj = -st*xyz_mix(idim)%c_array(istate, i) + ct*xyz_mix(idim)%c_array(jstate, i)
1602                           xyz_mix(idim)%c_array(istate, i) = zi
1603                           xyz_mix(idim)%c_array(jstate, i) = zj
1604                        END DO
1605                     END DO
1606
1607                     DO i = 1, n1 + n2
1608                        ri = ct*rmat_loc(i, istate) + st*rmat_loc(i, jstate)
1609                        rj = ct*rmat_loc(i, jstate) - st*rmat_loc(i, istate)
1610                        rmat_loc(i, istate) = ri
1611                        rmat_loc(i, jstate) = rj
1612                     END DO
1613                  END DO
1614               END DO
1615
1616               k = nblock_max + 1
1617               CALL mp_sendrecv(rotmat(1:nstate, 1:ns_me), ip_partner, &
1618                                rotmat(1:nstate, k:k + n2 - 1), ip_partner, para_env%group)
1619
1620               IF (ilow1 < ilow2) THEN
1621              CALL dgemm("N", "N", nstate, n1, n2, 1.0_dp, rotmat(1, k), nstate, rmat_loc(1 + n1, 1), n1 + n2, 0.0_dp, gmat, nstate)
1622                  CALL dgemm("N", "N", nstate, n1, n1, 1.0_dp, rotmat(1, 1), nstate, rmat_loc(1, 1), n1 + n2, 1.0_dp, gmat, nstate)
1623               ELSE
1624              CALL dgemm("N", "N", nstate, n1, n2, 1.0_dp, rotmat(1, k), nstate, rmat_loc(1, n2 + 1), n1 + n2, 0.0_dp, gmat, nstate)
1625                  CALL dgemm("N", "N", nstate, n1, n1, 1.0_dp, rotmat(1, 1), nstate, &
1626                             rmat_loc(n2 + 1, n2 + 1), n1 + n2, 1.0_dp, gmat, nstate)
1627               END IF
1628
1629               CALL dcopy(nstate*n1, gmat(1, 1), 1, rotmat(1, 1), 1)
1630
1631               DO idim = 1, dim2
1632                  DO i = 1, n1
1633                     xyz_mix_ns(idim)%c_array(1:nstate, i) = CMPLX(0.0_dp, 0.0_dp, dp)
1634                  END DO
1635
1636                  DO istate = 1, n1
1637                     DO jstate = 1, nstate
1638                        DO i = 1, n2
1639                           xyz_mix_ns(idim)%c_array(jstate, istate) = &
1640                              xyz_mix_ns(idim)%c_array(jstate, istate) + &
1641                              c_array_partner(jstate, i, idim)*rmat_loc(il2 + i - 1, il1 + istate - 1)
1642                        END DO
1643                     END DO
1644                  END DO
1645                  DO istate = 1, n1
1646                     DO jstate = 1, nstate
1647                        DO i = 1, n1
1648                           xyz_mix_ns(idim)%c_array(jstate, istate) = xyz_mix_ns(idim)%c_array(jstate, istate) + &
1649                                                                 c_array_me(jstate, i, idim)*rmat_loc(il1 + i - 1, il1 + istate - 1)
1650                        END DO
1651                     END DO
1652                  END DO
1653               END DO ! idim
1654
1655               DEALLOCATE (c_array_partner)
1656
1657            ELSE ! save my data
1658               DO idim = 1, dim2
1659                  DO i = 1, ns_me
1660                     xyz_mix_ns(idim)%c_array(1:nstate, i) = cz_ij_loc(idim)%c_array(1:nstate, i)
1661                  END DO
1662               END DO
1663            END IF
1664
1665            DO idim = 1, dim2
1666               DO i = 1, ns_me
1667                  cz_ij_loc(idim)%c_array(1:nstate, i) = CMPLX(0.0_dp, 0.0_dp, dp)
1668               END DO
1669            END DO
1670
1671            IF (ns_partner*ns_me /= 0) THEN
1672               ! transpose rotation matrix rmat_loc
1673               DO i = 1, ns_me + ns_partner
1674                  DO j = i + 1, ns_me + ns_partner
1675                     ri = rmat_loc(i, j)
1676                     rmat_loc(i, j) = rmat_loc(j, i)
1677                     rmat_loc(j, i) = ri
1678                  END DO
1679               END DO
1680
1681               ! prepare for distribution
1682               DO i = 1, n1
1683                  rmat_send(1:n1, i) = rmat_loc(il1:iu1, il1 + i - 1)
1684               END DO
1685               ik = nblock_max
1686               DO i = 1, n2
1687                  rmat_send(ik + 1:ik + n1, i) = rmat_loc(il1:iu1, il2 + i - 1)
1688               END DO
1689            ELSE
1690               rmat_send = 0.0_dp
1691            END IF
1692
1693            ! collect data from all tasks (this takes some significant time)
1694            CALL mp_allgather(rmat_send, rmat_recv_all, para_env%group)
1695
1696            ! update blocks everywhere
1697            DO ip = 0, para_env%num_pe - 1
1698
1699               ip_recv_from = MOD(para_env%mepos - IP + para_env%num_pe, para_env%num_pe)
1700               rmat_recv(:, :) = rmat_recv_all(:, :, ip_recv_from)
1701
1702               ns_recv_from = ns_bound(ip_recv_from, 2) - ns_bound(ip_recv_from, 1) + 1
1703
1704               IF (ns_me /= 0) THEN
1705                  IF (ns_recv_from /= 0) THEN
1706                     !look for the partner of ip_recv_from
1707                     ip_recv_partner = -1
1708                     ns_recv_partner = 0
1709                     DO ipair = 1, npair
1710                        IF (list_pair(1, ipair) == ip_recv_from) THEN
1711                           ip_recv_partner = list_pair(2, ipair)
1712                           EXIT
1713                        ELSE IF (list_pair(2, ipair) == ip_recv_from) THEN
1714                           ip_recv_partner = list_pair(1, ipair)
1715                           EXIT
1716                        END IF
1717                     END DO
1718
1719                     IF (ip_recv_partner >= 0) THEN
1720                        ns_recv_partner = ns_bound(ip_recv_partner, 2) - ns_bound(ip_recv_partner, 1) + 1
1721                     END IF
1722                     IF (ns_recv_partner > 0) THEN
1723                        il1 = ns_bound(para_env%mepos, 1)
1724                        il_recv = ns_bound(ip_recv_from, 1)
1725                        il_recv_partner = ns_bound(ip_recv_partner, 1)
1726                        ik = nblock_max
1727
1728                        DO idim = 1, dim2
1729                           DO i = 1, ns_recv_from
1730                              ii = il_recv + i - 1
1731                              DO j = 1, ns_me
1732                                 jj = j
1733                                 DO k = 1, ns_recv_from
1734                                    kk = il_recv + k - 1
1735                                    cz_ij_loc(idim)%c_array(ii, jj) = cz_ij_loc(idim)%c_array(ii, jj) + &
1736                                                                      rmat_recv(i, k)*xyz_mix_ns(idim)%c_array(kk, j)
1737                                 END DO
1738                              END DO
1739                           END DO
1740                           DO i = 1, ns_recv_from
1741                              ii = il_recv + i - 1
1742                              DO j = 1, ns_me
1743                                 jj = j
1744                                 DO k = 1, ns_recv_partner
1745                                    kk = il_recv_partner + k - 1
1746                                    cz_ij_loc(idim)%c_array(ii, jj) = cz_ij_loc(idim)%c_array(ii, jj) + &
1747                                                                      rmat_recv(ik + i, k)*xyz_mix_ns(idim)%c_array(kk, j)
1748                                 END DO
1749                              END DO
1750                           END DO
1751                        END DO ! idim
1752                     ELSE
1753                        il1 = ns_bound(para_env%mepos, 1)
1754                        il_recv = ns_bound(ip_recv_from, 1)
1755                        DO idim = 1, dim2
1756                           DO j = 1, ns_me
1757                              jj = j
1758                              DO i = 1, ns_recv_from
1759                                 ii = il_recv + i - 1
1760                                 cz_ij_loc(idim)%c_array(ii, jj) = xyz_mix_ns(idim)%c_array(ii, j)
1761                              END DO
1762                           END DO
1763                        END DO ! idim
1764                     END IF
1765                  END IF
1766               END IF ! ns_me
1767            END DO ! ip
1768
1769            IF (ns_partner*ns_me /= 0) THEN
1770               DEALLOCATE (rmat_loc)
1771               DO idim = 1, dim2
1772                  DEALLOCATE (xyz_mix(idim)%c_array)
1773               END DO
1774            END IF
1775
1776         END DO ! iperm
1777
1778         ! calculate the max gradient
1779         DO idim = 1, dim2
1780            DO i = ns_bound(para_env%mepos, 1), ns_bound(para_env%mepos, 2)
1781               ii = i - ns_bound(para_env%mepos, 1) + 1
1782               zdiag_me(idim)%c_array(ii) = cz_ij_loc(idim)%c_array(i, ii)
1783               zdiag_me(idim)%c_array(ii) = cz_ij_loc(idim)%c_array(i, ii)
1784            END DO
1785            rcount(:) = SIZE(zdiag_me(idim)%c_array)
1786            rdispl(1) = 0
1787            DO ip = 2, para_env%num_pe
1788               rdispl(ip) = rdispl(ip - 1) + rcount(ip - 1)
1789            ENDDO
1790            ! collect all the diagonal elements in a replicated 1d array
1791            CALL mp_allgather(zdiag_me(idim)%c_array, zdiag_all(idim)%c_array, rcount, rdispl, para_env%group)
1792         END DO
1793
1794         gmax = 0.0_dp
1795         DO j = ns_bound(para_env%mepos, 1), ns_bound(para_env%mepos, 2)
1796            k = j - ns_bound(para_env%mepos, 1) + 1
1797            DO i = 1, j - 1
1798               ! find the location of the diagonal element (i,i)
1799               DO ip = 0, para_env%num_pe - 1
1800                  IF (i >= ns_bound(ip, 1) .AND. i <= ns_bound(ip, 2)) THEN
1801                     ip_has_i = ip
1802                     EXIT
1803                  END IF
1804               END DO
1805               ii = nblock_max*ip_has_i + i - ns_bound(ip_has_i, 1) + 1
1806               ! mepos has the diagonal element (j,j), as well as the off diagonal (i,j)
1807               jj = nblock_max*para_env%mepos + j - ns_bound(para_env%mepos, 1) + 1
1808               grad = 0.0_dp
1809               DO idim = 1, dim2
1810                  zi = zdiag_all(idim)%c_array(ii)
1811                  zj = zdiag_all(idim)%c_array(jj)
1812                  grad = grad + weights(idim)*REAL(4.0_dp*CONJG(cz_ij_loc(idim)%c_array(i, k))*(zj - zi), dp)
1813               END DO
1814               gmax = MAX(gmax, ABS(grad))
1815            END DO
1816         END DO
1817
1818         CALL mp_max(gmax, para_env%group)
1819         tolerance = gmax
1820
1821         func = 0.0_dp
1822         DO i = ns_bound(para_env%mepos, 1), ns_bound(para_env%mepos, 2)
1823            k = i - ns_bound(para_env%mepos, 1) + 1
1824            DO idim = 1, dim2
1825               zr = REAL(cz_ij_loc(idim)%c_array(i, k), dp)
1826               zc = AIMAG(cz_ij_loc(idim)%c_array(i, k))
1827               func = func + weights(idim)*(1.0_dp - (zr*zr + zc*zc))/twopi/twopi
1828            END DO
1829         END DO
1830         CALL mp_sum(func, para_env%group)
1831         t2 = m_walltime()
1832
1833         IF (output_unit > 0 .AND. MODULO(sweeps, out_each) == 0) THEN
1834            WRITE (output_unit, '(T20,I12,T35,F20.10,T60,E12.4,F8.3)') sweeps, func, tolerance, t2 - t1
1835            CALL m_flush(output_unit)
1836         END IF
1837         IF (tolerance < eps_localization) EXIT
1838         CALL external_control(should_stop, "LOC", target_time=target_time, start_time=start_time)
1839         IF (should_stop) EXIT
1840
1841      END DO ! sweeps
1842
1843      ! buffer for message passing
1844      DEALLOCATE (rmat_recv_all)
1845
1846      DEALLOCATE (rmat_recv)
1847      DEALLOCATE (rmat_send)
1848      IF (ns_me > 0) THEN
1849         DEALLOCATE (c_array_me)
1850      ENDIF
1851      DO idim = 1, dim2
1852         DEALLOCATE (zdiag_me(idim)%c_array)
1853         DEALLOCATE (zdiag_all(idim)%c_array)
1854      END DO
1855      DEALLOCATE (zdiag_me)
1856      DEALLOCATE (zdiag_all)
1857      DEALLOCATE (xyz_mix)
1858      DO idim = 1, dim2
1859         IF (ns_me /= 0) THEN
1860            DEALLOCATE (xyz_mix_ns(idim)%c_array)
1861         ENDIF
1862      END DO
1863      DEALLOCATE (xyz_mix_ns)
1864      IF (ns_me /= 0) THEN
1865         DEALLOCATE (gmat)
1866      ENDIF
1867      DEALLOCATE (mii)
1868      DEALLOCATE (mij)
1869      DEALLOCATE (mjj)
1870
1871      ilow1 = ns_bound(para_env%mepos, 1)
1872      ns_me = ns_bound(para_env%mepos, 2) - ns_bound(para_env%mepos, 1) + 1
1873      ALLOCATE (z_ij_loc_re(nstate, nblock_max))
1874      ALLOCATE (z_ij_loc_im(nstate, nblock_max))
1875      DO idim = 1, dim2
1876         DO ip = 0, para_env%num_pe - 1
1877            z_ij_loc_re = 0.0_dp
1878            z_ij_loc_im = 0.0_dp
1879            nblock = ns_bound(ip, 2) - ns_bound(ip, 1) + 1
1880            IF (ip == para_env%mepos) THEN
1881               ns_me = nblock
1882               DO i = 1, ns_me
1883                  ii = ilow1 + i - 1
1884                  DO j = 1, nstate
1885                     z_ij_loc_re(j, i) = REAL(cz_ij_loc(idim)%c_array(j, i), dp)
1886                     z_ij_loc_im(j, i) = AIMAG(cz_ij_loc(idim)%c_array(j, i))
1887                  END DO
1888               END DO
1889            END IF
1890            CALL mp_bcast(z_ij_loc_re, ip, para_env%group)
1891            CALL mp_bcast(z_ij_loc_im, ip, para_env%group)
1892            CALL cp_fm_set_submatrix(zij(1, idim)%matrix, z_ij_loc_re, 1, ns_bound(ip, 1), nstate, nblock)
1893            CALL cp_fm_set_submatrix(zij(2, idim)%matrix, z_ij_loc_im, 1, ns_bound(ip, 1), nstate, nblock)
1894         END DO ! ip
1895      END DO
1896
1897      DO ip = 0, para_env%num_pe - 1
1898         z_ij_loc_re = 0.0_dp
1899         nblock = ns_bound(ip, 2) - ns_bound(ip, 1) + 1
1900         IF (ip == para_env%mepos) THEN
1901            ns_me = nblock
1902            DO i = 1, ns_me
1903               ii = ilow1 + i - 1
1904               DO j = 1, nstate
1905                  z_ij_loc_re(j, i) = rotmat(j, i)
1906               END DO
1907            END DO
1908         END IF
1909         CALL mp_bcast(z_ij_loc_re, ip, para_env%group)
1910         CALL cp_fm_set_submatrix(rmat, z_ij_loc_re, 1, ns_bound(ip, 1), nstate, nblock)
1911      END DO
1912
1913      DEALLOCATE (z_ij_loc_re)
1914      DEALLOCATE (z_ij_loc_im)
1915      DO idim = 1, dim2
1916         DEALLOCATE (cz_ij_loc(idim)%c_array)
1917      END DO
1918      DEALLOCATE (cz_ij_loc)
1919
1920      CALL mp_sync(para_env%group)
1921      CALL rotate_orbitals(rmat, vectors)
1922      CALL cp_fm_release(rmat)
1923
1924      DEALLOCATE (rotmat)
1925      DEALLOCATE (ns_bound, list_pair)
1926
1927      CALL timestop(handle)
1928
1929   END SUBROUTINE jacobi_rot_para
1930
1931! **************************************************************************************************
1932!> \brief ...
1933!> \param iperm ...
1934!> \param para_env ...
1935!> \param list_pair ...
1936! **************************************************************************************************
1937   SUBROUTINE eberlein(iperm, para_env, list_pair)
1938      INTEGER, INTENT(IN)                                :: iperm
1939      TYPE(cp_para_env_type), POINTER                    :: para_env
1940      INTEGER, DIMENSION(:, :)                           :: list_pair
1941
1942      CHARACTER(len=*), PARAMETER :: routineN = 'eberlein', routineP = moduleN//':'//routineN
1943
1944      INTEGER                                            :: i, ii, jj, npair
1945
1946      npair = (para_env%num_pe + 1)/2
1947      IF (iperm == 1) THEN
1948!..set up initial ordering
1949         DO I = 0, para_env%num_pe - 1
1950            II = ((i + 1) + 1)/2
1951            JJ = MOD((i + 1) + 1, 2) + 1
1952            list_pair(JJ, II) = i
1953         ENDDO
1954         IF (MOD(para_env%num_pe, 2) == 1) list_pair(2, npair) = -1
1955      ELSEIF (MOD(iperm, 2) == 0) THEN
1956!..a type shift
1957         jj = list_pair(1, npair)
1958         DO I = npair, 3, -1
1959            list_pair(1, I) = list_pair(1, I - 1)
1960         ENDDO
1961         list_pair(1, 2) = list_pair(2, 1)
1962         list_pair(2, 1) = jj
1963      ELSE
1964!..b type shift
1965         jj = list_pair(2, 1)
1966         DO I = 1, npair - 1
1967            list_pair(2, I) = list_pair(2, I + 1)
1968         ENDDO
1969         list_pair(2, npair) = jj
1970      ENDIF
1971
1972   END SUBROUTINE eberlein
1973
1974! **************************************************************************************************
1975!> \brief ...
1976!> \param vectors ...
1977!> \param op_sm_set ...
1978!> \param zij_fm_set ...
1979! **************************************************************************************************
1980   SUBROUTINE zij_matrix(vectors, op_sm_set, zij_fm_set)
1981
1982      TYPE(cp_fm_type), POINTER                          :: vectors
1983      TYPE(dbcsr_p_type), DIMENSION(:, :), POINTER       :: op_sm_set
1984      TYPE(cp_fm_p_type), DIMENSION(:, :), POINTER       :: zij_fm_set
1985
1986      CHARACTER(len=*), PARAMETER :: routineN = 'zij_matrix', routineP = moduleN//':'//routineN
1987
1988      INTEGER                                            :: handle, i, j, nao, nmoloc
1989      TYPE(cp_fm_type), POINTER                          :: opvec
1990
1991      CALL timeset(routineN, handle)
1992
1993      NULLIFY (opvec)
1994      ! get rows and cols of the input
1995      CALL cp_fm_get_info(vectors, nrow_global=nao, ncol_global=nmoloc)
1996      ! replicate the input kind of matrix
1997      CALL cp_fm_create(opvec, vectors%matrix_struct)
1998
1999      ! Compute zij here
2000      DO i = 1, SIZE(zij_fm_set, 2)
2001         DO j = 1, SIZE(zij_fm_set, 1)
2002            CALL cp_fm_set_all(zij_fm_set(j, i)%matrix, 0.0_dp)
2003            CALL cp_dbcsr_sm_fm_multiply(op_sm_set(j, i)%matrix, vectors, opvec, ncol=nmoloc)
2004            CALL cp_gemm("T", "N", nmoloc, nmoloc, nao, 1.0_dp, vectors, opvec, 0.0_dp, &
2005                         zij_fm_set(j, i)%matrix)
2006         ENDDO
2007      ENDDO
2008
2009      CALL cp_fm_release(opvec)
2010      CALL timestop(handle)
2011
2012   END SUBROUTINE zij_matrix
2013
2014! **************************************************************************************************
2015!> \brief ...
2016!> \param vectors ...
2017! **************************************************************************************************
2018   SUBROUTINE scdm_qrfact(vectors)
2019
2020      TYPE(cp_fm_type), POINTER                          :: vectors
2021
2022      CHARACTER(len=*), PARAMETER :: routineN = 'scdm_qrfact', routineP = moduleN//':'//routineN
2023
2024      INTEGER                                            :: handle, ncolT, nrowT
2025      REAL(KIND=dp), DIMENSION(:), POINTER               :: tau
2026      TYPE(cp_fm_struct_type), POINTER                   :: cstruct
2027      TYPE(cp_fm_type), POINTER                          :: CTp, Qf, tmp
2028
2029      CALL timeset(routineN, handle)
2030
2031      ! Create Transpose of Coefficient Matrix vectors
2032      nrowT = vectors%matrix_struct%ncol_global
2033      ncolT = vectors%matrix_struct%nrow_global
2034
2035      CALL cp_fm_struct_create(cstruct, vectors%matrix_struct%para_env, &
2036                               vectors%matrix_struct%context, nrowT, ncolT, vectors%matrix_struct%ncol_block, &
2037                               vectors%matrix_struct%nrow_block, first_p_pos=vectors%matrix_struct%first_p_pos)
2038      CALL cp_fm_create(CTp, cstruct)
2039      CALL cp_fm_struct_release(cstruct)
2040
2041      ALLOCATE (tau(nrowT))
2042
2043      CALL cp_fm_transpose(vectors, CTp)
2044
2045      ! Call SCALAPACK routine to get QR decomposition of CTs
2046      CALL cp_fm_pdgeqpf(CTp, tau, nrowT, ncolT, 1, 1)
2047
2048      ! Construction of Q from the scalapack output
2049      CALL cp_fm_struct_create(cstruct, para_env=CTp%matrix_struct%para_env, &
2050                               context=CTp%matrix_struct%context, nrow_global=CTp%matrix_struct%nrow_global, &
2051                               ncol_global=CTp%matrix_struct%nrow_global)
2052      CALL cp_fm_create(Qf, cstruct)
2053      CALL cp_fm_struct_release(cstruct)
2054      CALL cp_fm_to_fm_submat(CTp, Qf, nrowT, nrowT, 1, 1, 1, 1)
2055
2056      ! Call SCALAPACK routine to get Q
2057      CALL cp_fm_pdorgqr(Qf, tau, nrowT, 1, 1)
2058
2059      ! Transform original coefficient matrix vectors
2060      CALL cp_fm_create(tmp, vectors%matrix_struct)
2061      CALL cp_fm_set_all(tmp, 0.0_dp, 1.0_dp)
2062      CALL cp_fm_to_fm(vectors, tmp)
2063      CALL cp_gemm('N', 'N', ncolT, nrowT, nrowT, 1.0_dp, tmp, Qf, 0.0_dp, vectors)
2064
2065      ! Cleanup
2066      CALL cp_fm_release(CTp)
2067      CALL cp_fm_release(tmp)
2068      CALL cp_fm_release(Qf)
2069      DEALLOCATE (tau)
2070
2071      CALL timestop(handle)
2072
2073   END SUBROUTINE scdm_qrfact
2074
2075END MODULE qs_localization_methods
2076