1!--------------------------------------------------------------------------------------------------!
2!   CP2K: A general program to perform molecular dynamics simulations                              !
3!   Copyright (C) 2000 - 2019  CP2K developers group                                               !
4!--------------------------------------------------------------------------------------------------!
5
6! **************************************************************************************************
7!> \brief Performance tests for basic tasks like matrix multiplies, copy, fft.
8!> \par History
9!>      30-Nov-2000 (JGH) added input
10!>      02-Jan-2001 (JGH) Parallel FFT
11!>      28-Feb-2002 (JGH) Clebsch-Gordon Coefficients
12!>      06-Jun-2003 (JGH) Real space grid test
13!>      Eigensolver test (29.08.05,MK)
14!> \author JGH  6-NOV-2000
15! **************************************************************************************************
16MODULE library_tests
17
18   USE ai_coulomb_test,                 ONLY: eri_test
19   USE cell_types,                      ONLY: cell_create,&
20                                              cell_release,&
21                                              cell_type,&
22                                              init_cell
23   USE cg_test,                         ONLY: clebsch_gordon_test
24   USE cp_blacs_env,                    ONLY: cp_blacs_env_create,&
25                                              cp_blacs_env_release,&
26                                              cp_blacs_env_type
27   USE cp_eri_mme_interface,            ONLY: cp_eri_mme_perf_acc_test
28   USE cp_files,                        ONLY: close_file,&
29                                              open_file
30   USE cp_fm_basic_linalg,              ONLY: cp_fm_gemm
31   USE cp_fm_diag,                      ONLY: cp_fm_syevd,&
32                                              cp_fm_syevx
33   USE cp_fm_struct,                    ONLY: cp_fm_struct_create,&
34                                              cp_fm_struct_get,&
35                                              cp_fm_struct_release,&
36                                              cp_fm_struct_type
37   USE cp_fm_types,                     ONLY: cp_fm_create,&
38                                              cp_fm_release,&
39                                              cp_fm_set_all,&
40                                              cp_fm_set_submatrix,&
41                                              cp_fm_to_fm,&
42                                              cp_fm_type
43   USE cp_gemm_interface,               ONLY: cp_gemm
44   USE cp_log_handling,                 ONLY: cp_get_default_logger,&
45                                              cp_logger_type
46   USE cp_output_handling,              ONLY: cp_print_key_finished_output,&
47                                              cp_print_key_unit_nr
48   USE cp_para_types,                   ONLY: cp_para_env_type
49   USE cp_realspace_grid_init,          ONLY: init_input_type
50   USE dbcsr_api,                       ONLY: dbcsr_reset_randmat_seed,&
51                                              dbcsr_run_tests
52   USE fft_tools,                       ONLY: BWFFT,&
53                                              FFT_RADIX_CLOSEST,&
54                                              FWFFT,&
55                                              fft3d,&
56                                              fft_radix_operations,&
57                                              finalize_fft,&
58                                              init_fft
59   USE global_types,                    ONLY: global_environment_type
60   USE input_constants,                 ONLY: do_diag_syevd,&
61                                              do_diag_syevx,&
62                                              do_mat_random,&
63                                              do_mat_read,&
64                                              do_pwgrid_ns_fullspace,&
65                                              do_pwgrid_ns_halfspace,&
66                                              do_pwgrid_spherical
67   USE input_section_types,             ONLY: section_vals_get,&
68                                              section_vals_get_subs_vals,&
69                                              section_vals_type,&
70                                              section_vals_val_get
71   USE kinds,                           ONLY: dp
72   USE machine,                         ONLY: m_flush,&
73                                              m_walltime
74   USE message_passing,                 ONLY: mp_bcast,&
75                                              mp_max,&
76                                              mp_sum,&
77                                              mp_sync,&
78                                              mpi_perf_test
79   USE minimax_exp,                     ONLY: validate_exp_minimax
80   USE mp2_weights,                     ONLY: test_least_square_ft
81   USE parallel_rng_types,              ONLY: GAUSSIAN,&
82                                              UNIFORM,&
83                                              check_rng,&
84                                              create_rng_stream,&
85                                              delete_rng_stream,&
86                                              next_random_number,&
87                                              rng_stream_type,&
88                                              write_rng_stream
89   USE pw_grid_types,                   ONLY: FULLSPACE,&
90                                              HALFSPACE,&
91                                              pw_grid_type
92   USE pw_grids,                        ONLY: pw_grid_create,&
93                                              pw_grid_release,&
94                                              pw_grid_setup
95   USE pw_methods,                      ONLY: pw_transfer,&
96                                              pw_zero
97   USE pw_types,                        ONLY: COMPLEXDATA1D,&
98                                              COMPLEXDATA3D,&
99                                              REALDATA3D,&
100                                              REALSPACE,&
101                                              RECIPROCALSPACE,&
102                                              pw_create,&
103                                              pw_p_type,&
104                                              pw_release
105   USE realspace_grid_types,            ONLY: &
106        pw2rs, realspace_grid_desc_type, realspace_grid_input_type, realspace_grid_type, rs2pw, &
107        rs_grid_create, rs_grid_create_descriptor, rs_grid_print, rs_grid_release, &
108        rs_grid_release_descriptor, rs_grid_zero, rs_pw_transfer
109   USE shg_integrals_test,              ONLY: shg_integrals_perf_acc_test
110#include "./base/base_uses.f90"
111
112   IMPLICIT NONE
113
114   PRIVATE
115   PUBLIC :: lib_test
116
117   INTEGER                  :: runtest(100)
118   REAL(KIND=dp)           :: max_memory
119   REAL(KIND=dp), PARAMETER :: threshold = 1.0E-8_dp
120   CHARACTER(len=*), PARAMETER, PRIVATE :: moduleN = 'library_tests'
121
122CONTAINS
123
124! **************************************************************************************************
125!> \brief Master routine for tests
126!> \param root_section ...
127!> \param para_env ...
128!> \param globenv ...
129!> \par History
130!>      none
131!> \author JGH  6-NOV-2000
132! **************************************************************************************************
133   SUBROUTINE lib_test(root_section, para_env, globenv)
134
135      TYPE(section_vals_type), POINTER                   :: root_section
136      TYPE(cp_para_env_type), POINTER                    :: para_env
137      TYPE(global_environment_type), POINTER             :: globenv
138
139      CHARACTER(LEN=*), PARAMETER :: routineN = 'lib_test', routineP = moduleN//':'//routineN
140
141      INTEGER                                            :: handle, iw
142      LOGICAL                                            :: explicit
143      TYPE(cp_logger_type), POINTER                      :: logger
144      TYPE(section_vals_type), POINTER :: cp_dbcsr_test_section, cp_fm_gemm_test_section, &
145         eigensolver_section, eri_mme_test_section, pw_transfer_section, rs_pw_transfer_section, &
146         shg_integrals_test_section
147
148      CALL timeset(routineN, handle)
149
150      logger => cp_get_default_logger()
151      iw = cp_print_key_unit_nr(logger, root_section, "TEST%PROGRAM_RUN_INFO", extension=".log")
152
153      IF (iw > 0) THEN
154         WRITE (iw, '(T2,79("*"))')
155         WRITE (iw, '(A,T31,A,T80,A)') ' *', ' PERFORMANCE TESTS ', '*'
156         WRITE (iw, '(T2,79("*"))')
157      END IF
158      !
159      CALL test_input(root_section, para_env)
160      !
161      IF (runtest(1) /= 0) CALL copy_test(para_env, iw)
162      !
163      IF (runtest(2) /= 0) CALL matmul_test(para_env, test_matmul=.TRUE., test_dgemm=.FALSE., iw=iw)
164      IF (runtest(5) /= 0) CALL matmul_test(para_env, test_matmul=.FALSE., test_dgemm=.TRUE., iw=iw)
165      !
166      IF (runtest(3) /= 0) CALL fft_test(para_env, iw, globenv%fftw_plan_type, &
167                                         globenv%fftw_wisdom_file_name)
168      !
169      IF (runtest(4) /= 0) CALL eri_test(iw)
170      !
171      IF (runtest(6) /= 0) CALL clebsch_gordon_test()
172      !
173      ! runtest 7 has been deleted and can be recycled
174      !
175      IF (runtest(8) /= 0) CALL mpi_perf_test(para_env%group, runtest(8), iw)
176      !
177      IF (runtest(9) /= 0) CALL rng_test(para_env, iw)
178      !
179      IF (runtest(10) /= 0) CALL validate_exp_minimax(runtest(10), iw)
180      !
181      IF (runtest(11) /= 0) CALL test_least_square_ft(runtest(11), iw)
182      !
183
184      rs_pw_transfer_section => section_vals_get_subs_vals(root_section, "TEST%RS_PW_TRANSFER")
185      CALL section_vals_get(rs_pw_transfer_section, explicit=explicit)
186      IF (explicit) THEN
187         CALL rs_pw_transfer_test(para_env, iw, globenv, rs_pw_transfer_section)
188      ENDIF
189
190      pw_transfer_section => section_vals_get_subs_vals(root_section, "TEST%PW_TRANSFER")
191      CALL section_vals_get(pw_transfer_section, explicit=explicit)
192      IF (explicit) THEN
193         CALL pw_fft_test(para_env, iw, globenv, pw_transfer_section)
194      ENDIF
195
196      cp_fm_gemm_test_section => section_vals_get_subs_vals(root_section, "TEST%CP_FM_GEMM")
197      CALL section_vals_get(cp_fm_gemm_test_section, explicit=explicit)
198      IF (explicit) THEN
199         CALL cp_fm_gemm_test(para_env, iw, cp_fm_gemm_test_section)
200      ENDIF
201
202      eigensolver_section => section_vals_get_subs_vals(root_section, "TEST%EIGENSOLVER")
203      CALL section_vals_get(eigensolver_section, explicit=explicit)
204      IF (explicit) THEN
205         CALL eigensolver_test(para_env, iw, eigensolver_section)
206      ENDIF
207
208      eri_mme_test_section => section_vals_get_subs_vals(root_section, "TEST%ERI_MME_TEST")
209      CALL section_vals_get(eri_mme_test_section, explicit=explicit)
210      IF (explicit) THEN
211         CALL cp_eri_mme_perf_acc_test(para_env, iw, eri_mme_test_section)
212      ENDIF
213
214      shg_integrals_test_section => section_vals_get_subs_vals(root_section, "TEST%SHG_INTEGRALS_TEST")
215      CALL section_vals_get(shg_integrals_test_section, explicit=explicit)
216      IF (explicit) THEN
217         CALL shg_integrals_perf_acc_test(iw, shg_integrals_test_section)
218      ENDIF
219
220      ! DBCSR tests
221      ! matrix_multiplication
222      cp_dbcsr_test_section => section_vals_get_subs_vals(root_section, &
223                                                          "TEST%CP_DBCSR")
224      CALL section_vals_get(cp_dbcsr_test_section, explicit=explicit)
225      IF (explicit) THEN
226         CALL cp_dbcsr_tests(para_env, iw, cp_dbcsr_test_section)
227      ENDIF
228
229      CALL cp_print_key_finished_output(iw, logger, root_section, "TEST%PROGRAM_RUN_INFO")
230
231      CALL timestop(handle)
232
233   END SUBROUTINE lib_test
234
235! **************************************************************************************************
236!> \brief Reads input section &TEST ... &END
237!> \param root_section ...
238!> \param para_env ...
239!> \author JGH 30-NOV-2000
240!> \note
241!> I---------------------------------------------------------------------------I
242!> I SECTION: &TEST ... &END                                                   I
243!> I                                                                           I
244!> I    MEMORY   max_memory                                                    I
245!> I    COPY     n                                                             I
246!> I    MATMUL   n                                                             I
247!> I    FFT      n                                                             I
248!> I    ERI      n                                                             I
249!> I    PW_FFT   n                                                             I
250!> I    Clebsch-Gordon n                                                       I
251!> I    RS_GRIDS n                                                             I
252!> I    MPI      n                                                             I
253!> I    RNG      n             -> Parallel random number generator             I
254!> I---------------------------------------------------------------------------I
255! **************************************************************************************************
256   SUBROUTINE test_input(root_section, para_env)
257      TYPE(section_vals_type), POINTER                   :: root_section
258      TYPE(cp_para_env_type), POINTER                    :: para_env
259
260      CHARACTER(LEN=*), PARAMETER :: routineN = 'test_input', routineP = moduleN//':'//routineN
261
262      TYPE(section_vals_type), POINTER                   :: test_section
263
264!
265!..defaults
266! using this style is not recommended, introduce sections instead (see e.g. cp_fm_gemm)
267
268      runtest = 0
269      test_section => section_vals_get_subs_vals(root_section, "TEST")
270      CALL section_vals_val_get(test_section, "MEMORY", r_val=max_memory)
271      CALL section_vals_val_get(test_section, 'COPY', i_val=runtest(1))
272      CALL section_vals_val_get(test_section, 'MATMUL', i_val=runtest(2))
273      CALL section_vals_val_get(test_section, 'DGEMM', i_val=runtest(5))
274      CALL section_vals_val_get(test_section, 'FFT', i_val=runtest(3))
275      CALL section_vals_val_get(test_section, 'ERI', i_val=runtest(4))
276      CALL section_vals_val_get(test_section, 'CLEBSCH_GORDON', i_val=runtest(6))
277      CALL section_vals_val_get(test_section, 'MPI', i_val=runtest(8))
278      CALL section_vals_val_get(test_section, 'RNG', i_val=runtest(9))
279      CALL section_vals_val_get(test_section, 'MINIMAX', i_val=runtest(10))
280      CALL section_vals_val_get(test_section, 'LEAST_SQ_FT', i_val=runtest(11))
281
282      CALL mp_sync(para_env%group)
283   END SUBROUTINE test_input
284
285! **************************************************************************************************
286!> \brief Tests the performance to copy two vectors.
287!> \param para_env ...
288!> \param iw ...
289!> \par History
290!>      none
291!> \author JGH  6-NOV-2000
292!> \note
293!>      The results of these tests allow to determine the size of the cache
294!>      of the CPU. This can be used to optimize the performance of the
295!>      FFTSG library.
296! **************************************************************************************************
297   SUBROUTINE copy_test(para_env, iw)
298      TYPE(cp_para_env_type), POINTER                    :: para_env
299      INTEGER                                            :: iw
300
301      INTEGER                                            :: i, ierr, j, len, ntim, siz
302      CHARACTER(LEN=*), PARAMETER :: routineN = 'copy_test', routineP = moduleN//':'//routineN
303
304      REAL(KIND=dp)                                      :: perf, t, tend, tstart
305      REAL(KIND=dp), ALLOCATABLE, DIMENSION(:)           :: ca, cb
306
307! test for copy --> Cache size
308
309      siz = ABS(runtest(1))
310      IF (para_env%ionode) WRITE (iw, '(//,A,/)') " Test of copy ( F95 ) "
311      DO i = 6, 24
312         len = 2**i
313         IF (8.0_dp*REAL(len, KIND=dp) > max_memory*0.5_dp) EXIT
314         ALLOCATE (ca(len), STAT=ierr)
315         IF (ierr /= 0) EXIT
316         ALLOCATE (cb(len), STAT=ierr)
317         IF (ierr /= 0) EXIT
318
319         CALL RANDOM_NUMBER(ca)
320         ntim = NINT(1.e7_dp/REAL(len, KIND=dp))
321         ntim = MAX(ntim, 1)
322         ntim = MIN(ntim, siz*10000)
323
324         tstart = m_walltime()
325         DO j = 1, ntim
326            cb(:) = ca(:)
327            ca(1) = REAL(j, KIND=dp)
328         END DO
329         tend = m_walltime()
330         t = tend - tstart + threshold
331         IF (t > 0.0_dp) THEN
332            perf = REAL(ntim, KIND=dp)*REAL(len, KIND=dp)*1.e-6_dp/t
333         ELSE
334            perf = 0.0_dp
335         END IF
336
337         IF (para_env%ionode) THEN
338            WRITE (iw, '(A,i2,i10,A,T59,F14.4,A)') " Copy test:   Size = 2^", i, &
339               len/1024, " Kwords", perf, " Mcopy/s"
340         END IF
341
342         DEALLOCATE (ca)
343         DEALLOCATE (cb)
344      END DO
345      CALL mp_sync(para_env%group)
346   END SUBROUTINE copy_test
347
348! **************************************************************************************************
349!> \brief Tests the performance of different kinds of matrix matrix multiply
350!>      kernels for the BLAS and F95 intrinsic matmul.
351!> \param para_env ...
352!> \param test_matmul ...
353!> \param test_dgemm ...
354!> \param iw ...
355!> \par History
356!>      none
357!> \author JGH  6-NOV-2000
358! **************************************************************************************************
359   SUBROUTINE matmul_test(para_env, test_matmul, test_dgemm, iw)
360      TYPE(cp_para_env_type), POINTER                    :: para_env
361      LOGICAL                                            :: test_matmul, test_dgemm
362      INTEGER                                            :: iw
363
364      INTEGER                                            :: i, ierr, j, len, ntim, siz
365      CHARACTER(LEN=*), PARAMETER :: routineN = 'matmul_test', routineP = moduleN//':'//routineN
366
367      REAL(KIND=dp)                                      :: perf, t, tend, tstart, xdum
368      REAL(KIND=dp), ALLOCATABLE, DIMENSION(:, :)        :: ma, mb, mc
369
370! test for matrix multpies
371
372      IF (test_matmul) THEN
373         siz = ABS(runtest(2))
374         IF (para_env%ionode) WRITE (iw, '(//,A,/)') " Test of matmul ( F95 ) "
375         DO i = 5, siz, 2
376            len = 2**i + 1
377            IF (8.0_dp*REAL(len*len, KIND=dp) > max_memory*0.3_dp) EXIT
378            ALLOCATE (ma(len, len), STAT=ierr)
379            IF (ierr /= 0) EXIT
380            ALLOCATE (mb(len, len), STAT=ierr)
381            IF (ierr /= 0) EXIT
382            ALLOCATE (mc(len, len), STAT=ierr)
383            IF (ierr /= 0) EXIT
384            mc = 0.0_dp
385
386            CALL RANDOM_NUMBER(xdum)
387            ma = xdum
388            CALL RANDOM_NUMBER(xdum)
389            mb = xdum
390            ntim = NINT(1.e8_dp/(2.0_dp*REAL(len, KIND=dp)**3))
391            ntim = MAX(ntim, 1)
392            ntim = MIN(ntim, siz*200)
393            tstart = m_walltime()
394            DO j = 1, ntim
395               mc(:, :) = MATMUL(ma, mb)
396               ma(1, 1) = REAL(j, KIND=dp)
397            END DO
398            tend = m_walltime()
399            t = tend - tstart + threshold
400            perf = REAL(ntim, KIND=dp)*2.0_dp*REAL(len, KIND=dp)**3*1.e-6_dp/t
401            IF (para_env%ionode) THEN
402               WRITE (iw, '(A,i6,T59,F14.4,A)') &
403                  " Matrix multiply test: c = a * b         Size = ", len, perf, " Mflop/s"
404            END IF
405            tstart = m_walltime()
406            DO j = 1, ntim
407               mc(:, :) = mc + MATMUL(ma, mb)
408               ma(1, 1) = REAL(j, KIND=dp)
409            END DO
410            tend = m_walltime()
411            t = tend - tstart + threshold
412            IF (t > 0.0_dp) THEN
413               perf = REAL(ntim, KIND=dp)*2.0_dp*REAL(len, KIND=dp)**3*1.e-6_dp/t
414            ELSE
415               perf = 0.0_dp
416            END IF
417
418            IF (para_env%ionode) THEN
419               WRITE (iw, '(A,i6,T59,F14.4,A)') &
420                  " Matrix multiply test: a = a * b         Size = ", len, perf, " Mflop/s"
421            END IF
422
423            tstart = m_walltime()
424            DO j = 1, ntim
425               mc(:, :) = mc + MATMUL(ma, TRANSPOSE(mb))
426               ma(1, 1) = REAL(j, KIND=dp)
427            END DO
428            tend = m_walltime()
429            t = tend - tstart + threshold
430            IF (t > 0.0_dp) THEN
431               perf = REAL(ntim, KIND=dp)*2.0_dp*REAL(len, KIND=dp)**3*1.e-6_dp/t
432            ELSE
433               perf = 0.0_dp
434            END IF
435
436            IF (para_env%ionode) THEN
437               WRITE (iw, '(A,i6,T59,F14.4,A)') &
438                  " Matrix multiply test: c = a * b(T)      Size = ", len, perf, " Mflop/s"
439            END IF
440
441            tstart = m_walltime()
442            DO j = 1, ntim
443               mc(:, :) = mc + MATMUL(TRANSPOSE(ma), mb)
444               ma(1, 1) = REAL(j, KIND=dp)
445            END DO
446            tend = m_walltime()
447            t = tend - tstart + threshold
448            IF (t > 0.0_dp) THEN
449               perf = REAL(ntim, KIND=dp)*2.0_dp*REAL(len, KIND=dp)**3*1.e-6_dp/t
450            ELSE
451               perf = 0.0_dp
452            END IF
453
454            IF (para_env%ionode) THEN
455               WRITE (iw, '(A,i6,T59,F14.4,A)') &
456                  " Matrix multiply test: c = a(T) * b      Size = ", len, perf, " Mflop/s"
457            END IF
458
459            DEALLOCATE (ma)
460            DEALLOCATE (mb)
461            DEALLOCATE (mc)
462         END DO
463      ENDIF
464
465      ! test for matrix multpies
466      IF (test_dgemm) THEN
467         siz = ABS(runtest(5))
468         IF (para_env%ionode) WRITE (iw, '(//,A,/)') " Test of matmul ( BLAS ) "
469         DO i = 5, siz, 2
470            len = 2**i + 1
471            IF (8.0_dp*REAL(len*len, KIND=dp) > max_memory*0.3_dp) EXIT
472            ALLOCATE (ma(len, len), STAT=ierr)
473            IF (ierr /= 0) EXIT
474            ALLOCATE (mb(len, len), STAT=ierr)
475            IF (ierr /= 0) EXIT
476            ALLOCATE (mc(len, len), STAT=ierr)
477            IF (ierr /= 0) EXIT
478            mc = 0.0_dp
479
480            CALL RANDOM_NUMBER(xdum)
481            ma = xdum
482            CALL RANDOM_NUMBER(xdum)
483            mb = xdum
484            ntim = NINT(1.e8_dp/(2.0_dp*REAL(len, KIND=dp)**3))
485            ntim = MAX(ntim, 1)
486            ntim = MIN(ntim, 1000)
487
488            tstart = m_walltime()
489            DO j = 1, ntim
490               CALL dgemm("N", "N", len, len, len, 1.0_dp, ma, len, mb, len, 1.0_dp, mc, len)
491            END DO
492            tend = m_walltime()
493            t = tend - tstart + threshold
494            IF (t > 0.0_dp) THEN
495               perf = REAL(ntim, KIND=dp)*2.0_dp*REAL(len, KIND=dp)**3*1.e-6_dp/t
496            ELSE
497               perf = 0.0_dp
498            END IF
499
500            IF (para_env%ionode) THEN
501               WRITE (iw, '(A,i6,T59,F14.4,A)') &
502                  " Matrix multiply test: c = a * b         Size = ", len, perf, " Mflop/s"
503            END IF
504
505            tstart = m_walltime()
506            DO j = 1, ntim
507               CALL dgemm("N", "N", len, len, len, 1.0_dp, ma, len, mb, len, 1.0_dp, mc, len)
508            END DO
509            tend = m_walltime()
510            t = tend - tstart + threshold
511            IF (t > 0.0_dp) THEN
512               perf = REAL(ntim, KIND=dp)*2.0_dp*REAL(len, KIND=dp)**3*1.e-6_dp/t
513            ELSE
514               perf = 0.0_dp
515            END IF
516
517            IF (para_env%ionode) THEN
518               WRITE (iw, '(A,i6,T59,F14.4,A)') &
519                  " Matrix multiply test: a = a * b         Size = ", len, perf, " Mflop/s"
520            END IF
521
522            tstart = m_walltime()
523            DO j = 1, ntim
524               CALL dgemm("N", "T", len, len, len, 1.0_dp, ma, len, mb, len, 1.0_dp, mc, len)
525            END DO
526            tend = m_walltime()
527            t = tend - tstart + threshold
528            IF (t > 0.0_dp) THEN
529               perf = REAL(ntim, KIND=dp)*2.0_dp*REAL(len, KIND=dp)**3*1.e-6_dp/t
530            ELSE
531               perf = 0.0_dp
532            END IF
533
534            IF (para_env%ionode) THEN
535               WRITE (iw, '(A,i6,T59,F14.4,A)') &
536                  " Matrix multiply test: c = a * b(T)      Size = ", len, perf, " Mflop/s"
537            END IF
538
539            tstart = m_walltime()
540            DO j = 1, ntim
541               CALL dgemm("T", "N", len, len, len, 1.0_dp, ma, len, mb, len, 1.0_dp, mc, len)
542            END DO
543            tend = m_walltime()
544            t = tend - tstart + threshold
545            IF (t > 0.0_dp) THEN
546               perf = REAL(ntim, KIND=dp)*2.0_dp*REAL(len, KIND=dp)**3*1.e-6_dp/t
547            ELSE
548               perf = 0.0_dp
549            END IF
550
551            IF (para_env%ionode) THEN
552               WRITE (iw, '(A,i6,T59,F14.4,A)') &
553                  " Matrix multiply test: c = a(T) * b      Size = ", len, perf, " Mflop/s"
554            END IF
555
556            DEALLOCATE (ma)
557            DEALLOCATE (mb)
558            DEALLOCATE (mc)
559         END DO
560      ENDIF
561
562      CALL mp_sync(para_env%group)
563
564   END SUBROUTINE matmul_test
565
566! **************************************************************************************************
567!> \brief Tests the performance of all available FFT libraries for 3D FFTs
568!> \param para_env ...
569!> \param iw ...
570!> \param fftw_plan_type ...
571!> \param wisdom_file where FFTW3 should look to save/load wisdom
572!> \par History
573!>      none
574!> \author JGH  6-NOV-2000
575! **************************************************************************************************
576   SUBROUTINE fft_test(para_env, iw, fftw_plan_type, wisdom_file)
577
578      TYPE(cp_para_env_type), POINTER                    :: para_env
579      INTEGER                                            :: iw, fftw_plan_type
580      CHARACTER(LEN=*), INTENT(IN)                       :: wisdom_file
581
582      INTEGER                                            :: iall, ierr, it, j, len, n(3), ntim, &
583                                                            radix_in, radix_out, siz, stat
584      INTEGER, PARAMETER                                 :: ndate(3) = (/12, 48, 96/)
585      CHARACTER(LEN=*), PARAMETER :: routineN = 'fft_test', routineP = moduleN//':'//routineN
586
587      COMPLEX(KIND=dp), DIMENSION(4, 4, 4)               :: zz
588      COMPLEX(KIND=dp), ALLOCATABLE, DIMENSION(:, :, :)  :: ca, cb, cc
589      CHARACTER(LEN=7)                                   :: method
590      REAL(KIND=dp)                                      :: flops, perf, scale, t, tdiff, tend, &
591                                                            tstart
592      REAL(KIND=dp), ALLOCATABLE, DIMENSION(:, :, :)     :: ra
593
594! test for 3d FFT
595
596      IF (para_env%ionode) WRITE (iw, '(//,A,/)') " Test of 3D-FFT "
597      siz = ABS(runtest(3))
598
599      DO iall = 1, 100
600         SELECT CASE (iall)
601         CASE DEFAULT
602            EXIT
603         CASE (1)
604            CALL init_fft("FFTSG", alltoall=.FALSE., fftsg_sizes=.TRUE., wisdom_file=wisdom_file, &
605                          pool_limit=10, plan_style=fftw_plan_type)
606            method = "FFTSG  "
607         CASE (2)
608            CYCLE
609         CASE (3)
610            CALL init_fft("FFTW3", alltoall=.FALSE., fftsg_sizes=.TRUE., wisdom_file=wisdom_file, &
611                          pool_limit=10, plan_style=fftw_plan_type)
612            method = "FFTW3  "
613         END SELECT
614         n = 4
615         zz = 0.0_dp
616         CALL fft3d(1, n, zz, status=stat)
617         IF (stat == 0) THEN
618            DO it = 1, 3
619               radix_in = ndate(it)
620               CALL fft_radix_operations(radix_in, radix_out, FFT_RADIX_CLOSEST)
621               len = radix_out
622               n = len
623               IF (16.0_dp*REAL(len*len*len, KIND=dp) > max_memory*0.5_dp) EXIT
624               ALLOCATE (ra(len, len, len), STAT=ierr)
625               ALLOCATE (ca(len, len, len), STAT=ierr)
626               CALL RANDOM_NUMBER(ra)
627               ca(:, :, :) = ra
628               CALL RANDOM_NUMBER(ra)
629               ca(:, :, :) = ca + CMPLX(0.0_dp, 1.0_dp, KIND=dp)*ra
630               flops = REAL(len**3, KIND=dp)*15.0_dp*LOG(REAL(len, KIND=dp))
631               ntim = NINT(siz*1.e7_dp/flops)
632               ntim = MAX(ntim, 1)
633               ntim = MIN(ntim, 200)
634               scale = 1.0_dp/REAL(len**3, KIND=dp)
635               tstart = m_walltime()
636               DO j = 1, ntim
637                  CALL fft3d(FWFFT, n, ca)
638                  CALL fft3d(BWFFT, n, ca, SCALE=scale)
639               END DO
640               tend = m_walltime()
641               t = tend - tstart + threshold
642               IF (t > 0.0_dp) THEN
643                  perf = REAL(ntim, KIND=dp)*2.0_dp*flops*1.e-6_dp/t
644               ELSE
645                  perf = 0.0_dp
646               END IF
647
648               IF (para_env%ionode) THEN
649                  WRITE (iw, '(T2,A,A,i6,T59,F14.4,A)') &
650                     ADJUSTR(method), " test (in-place)    Size = ", len, perf, " Mflop/s"
651               END IF
652               DEALLOCATE (ca)
653               DEALLOCATE (ra)
654            END DO
655            IF (para_env%ionode) WRITE (iw, *)
656            ! test if input data is preserved
657            len = 24
658            n = len
659            ALLOCATE (ra(len, len, len))
660            ALLOCATE (ca(len, len, len))
661            ALLOCATE (cb(len, len, len))
662            ALLOCATE (cc(len, len, len))
663            CALL RANDOM_NUMBER(ra)
664            ca(:, :, :) = ra
665            CALL RANDOM_NUMBER(ra)
666            ca(:, :, :) = ca + CMPLX(0.0_dp, 1.0_dp, KIND=dp)*ra
667            cc(:, :, :) = ca
668            CALL fft3d(FWFFT, n, ca, cb)
669            tdiff = MAXVAL(ABS(ca - cc))
670            IF (tdiff > 1.0E-12_dp) THEN
671               IF (para_env%ionode) &
672                  WRITE (iw, '(T2,A,A,A)') ADJUSTR(method), "         FWFFT ", &
673                  "             Input array is changed in out-of-place FFT !"
674            ELSE
675               IF (para_env%ionode) &
676                  WRITE (iw, '(T2,A,A,A)') ADJUSTR(method), "         FWFFT ", &
677                  "         Input array is not changed in out-of-place FFT !"
678            END IF
679            ca(:, :, :) = cc
680            CALL fft3d(BWFFT, n, ca, cb)
681            tdiff = MAXVAL(ABS(ca - cc))
682            IF (tdiff > 1.0E-12_dp) THEN
683               IF (para_env%ionode) &
684                  WRITE (iw, '(T2,A,A,A)') ADJUSTR(method), "         BWFFT ", &
685                  "             Input array is changed in out-of-place FFT !"
686            ELSE
687               IF (para_env%ionode) &
688                  WRITE (iw, '(T2,A,A,A)') ADJUSTR(method), "         BWFFT ", &
689                  "         Input array is not changed in out-of-place FFT !"
690            END IF
691            IF (para_env%ionode) WRITE (iw, *)
692
693            DEALLOCATE (ra)
694            DEALLOCATE (ca)
695            DEALLOCATE (cb)
696            DEALLOCATE (cc)
697         END IF
698         CALL finalize_fft(para_env, wisdom_file=wisdom_file)
699      END DO
700
701   END SUBROUTINE fft_test
702
703! **************************************************************************************************
704!> \brief   test rs_pw_transfer performance
705!> \param para_env ...
706!> \param iw ...
707!> \param globenv ...
708!> \param rs_pw_transfer_section ...
709!> \author  Joost VandeVondele
710!>      9.2008 Randomise rs grid [Iain Bethune]
711!>      (c) The Numerical Algorithms Group (NAG) Ltd, 2008 on behalf of the HECToR project
712! **************************************************************************************************
713   SUBROUTINE rs_pw_transfer_test(para_env, iw, globenv, rs_pw_transfer_section)
714
715      TYPE(cp_para_env_type), POINTER                    :: para_env
716      INTEGER                                            :: iw
717      TYPE(global_environment_type), POINTER             :: globenv
718      TYPE(section_vals_type), POINTER                   :: rs_pw_transfer_section
719
720      CHARACTER(LEN=*), PARAMETER :: routineN = 'rs_pw_transfer_test', &
721         routineP = moduleN//':'//routineN
722
723      INTEGER                                            :: dir, halo_size, handle, i_loop, n_loop, &
724                                                            ns_max
725      INTEGER, DIMENSION(3)                              :: no, np
726      INTEGER, DIMENSION(:), POINTER                     :: i_vals
727      LOGICAL                                            :: do_rs2pw
728      REAL(KIND=dp)                                      :: tend, tstart
729      TYPE(cell_type), POINTER                           :: box
730      TYPE(pw_grid_type), POINTER                        :: grid
731      TYPE(pw_p_type)                                    :: ca
732      TYPE(realspace_grid_desc_type), POINTER            :: rs_desc
733      TYPE(realspace_grid_input_type)                    :: input_settings
734      TYPE(realspace_grid_type), POINTER                 :: rs_grid
735      TYPE(section_vals_type), POINTER                   :: rs_grid_section
736
737      CALL timeset(routineN, handle)
738
739      !..set fft lib
740      CALL init_fft(globenv%default_fft_library, alltoall=.FALSE., fftsg_sizes=.TRUE., &
741                    pool_limit=globenv%fft_pool_scratch_limit, &
742                    wisdom_file=globenv%fftw_wisdom_file_name, &
743                    plan_style=globenv%fftw_plan_type)
744
745      ! .. set cell (should otherwise be irrelevant)
746      NULLIFY (box)
747      CALL cell_create(box)
748      box%hmat = RESHAPE((/20.0_dp, 0.0_dp, 0.0_dp, 0.0_dp, 20.0_dp, 0.0_dp, &
749                           0.0_dp, 0.0_dp, 20.0_dp/), (/3, 3/))
750      CALL init_cell(box)
751
752      ! .. grid type and pw_grid
753      NULLIFY (grid)
754      CALL section_vals_val_get(rs_pw_transfer_section, "GRID", i_vals=i_vals)
755      np = i_vals
756      CALL pw_grid_create(grid, para_env%group)
757      CALL pw_grid_setup(box%hmat, grid, grid_span=FULLSPACE, npts=np, fft_usage=.TRUE., iounit=iw)
758      no = grid%npts
759
760      NULLIFY (ca%pw)
761      CALL pw_create(ca%pw, grid, REALDATA3D)
762      ca%pw%in_space = REALSPACE
763      CALL pw_zero(ca%pw)
764
765      ! .. rs input setting type
766      CALL section_vals_val_get(rs_pw_transfer_section, "HALO_SIZE", i_val=halo_size)
767      rs_grid_section => section_vals_get_subs_vals(rs_pw_transfer_section, "RS_GRID")
768      ns_max = 2*halo_size + 1
769      CALL init_input_type(input_settings, ns_max, rs_grid_section, 1, (/-1, -1, -1/))
770
771      ! .. rs type
772      NULLIFY (rs_grid)
773      NULLIFY (rs_desc)
774      CALL rs_grid_create_descriptor(rs_desc, pw_grid=grid, input_settings=input_settings)
775      CALL rs_grid_create(rs_grid, rs_desc)
776      CALL rs_grid_print(rs_grid, iw)
777      CALL rs_grid_zero(rs_grid)
778
779      ! Put random values on the grid, so summation check will pick up errors
780      CALL RANDOM_NUMBER(rs_grid%r)
781
782      CALL section_vals_val_get(rs_pw_transfer_section, "N_loop", i_val=N_loop)
783      CALL section_vals_val_get(rs_pw_transfer_section, "RS2PW", l_val=do_rs2pw)
784      IF (do_rs2pw) THEN
785         dir = rs2pw
786      ELSE
787         dir = pw2rs
788      ENDIF
789
790      ! go for the real loops, sync to get max timings
791      IF (para_env%ionode) THEN
792         WRITE (iw, '(T2,A)') ""
793         WRITE (iw, '(T2,A)') "Timing rs_pw_transfer routine"
794         WRITE (iw, '(T2,A)') ""
795         WRITE (iw, '(T2,A)') "iteration      time[s]"
796      ENDIF
797      DO i_loop = 1, N_loop
798         CALL mp_sync(para_env%group)
799         tstart = m_walltime()
800         CALL rs_pw_transfer(rs_grid, ca%pw, dir)
801         CALL mp_sync(para_env%group)
802         tend = m_walltime()
803         IF (para_env%ionode) THEN
804            WRITE (iw, '(T2,I9,1X,F12.6)') i_loop, tend - tstart
805         ENDIF
806      ENDDO
807
808      !cleanup
809      CALL rs_grid_release(rs_grid)
810      CALL rs_grid_release_descriptor(rs_desc)
811      CALL pw_release(ca%pw)
812      CALL pw_grid_release(grid)
813      CALL cell_release(box)
814      CALL finalize_fft(para_env, wisdom_file=globenv%fftw_wisdom_file_name)
815
816      CALL timestop(handle)
817
818   END SUBROUTINE rs_pw_transfer_test
819
820! **************************************************************************************************
821!> \brief Tests the performance of PW calls to FFT routines
822!> \param para_env ...
823!> \param iw ...
824!> \param globenv ...
825!> \param pw_transfer_section ...
826!> \par History
827!>      JGH  6-Feb-2001 : Test and performance code
828!>      Made input sensitive [Joost VandeVondele]
829!> \author JGH  1-JAN-2001
830! **************************************************************************************************
831   SUBROUTINE pw_fft_test(para_env, iw, globenv, pw_transfer_section)
832
833      TYPE(cp_para_env_type), POINTER                    :: para_env
834      INTEGER                                            :: iw
835      TYPE(global_environment_type), POINTER             :: globenv
836      TYPE(section_vals_type), POINTER                   :: pw_transfer_section
837
838      CHARACTER(LEN=*), PARAMETER :: routineN = 'pw_fft_test', routineP = moduleN//':'//routineN
839      REAL(KIND=dp), PARAMETER                           :: toler = 1.e-11_dp
840
841      INTEGER                                            :: blocked_id, grid_span, i_layout, i_rep, &
842                                                            ig, ip, itmp, n_loop, n_rep, nn, p, q
843      INTEGER, ALLOCATABLE, DIMENSION(:, :)              :: layouts
844      INTEGER, DIMENSION(2)                              :: distribution_layout
845      INTEGER, DIMENSION(3)                              :: no, np
846      INTEGER, DIMENSION(:), POINTER                     :: i_vals
847      LOGICAL                                            :: debug, is_fullspace, odd, &
848                                                            pw_grid_layout_all, spherical
849      REAL(KIND=dp)                                      :: em, et, flops, gsq, perf, t, t_max, &
850                                                            t_min, tend, tstart
851      REAL(KIND=dp), ALLOCATABLE, DIMENSION(:)           :: t_end, t_start
852      TYPE(cell_type), POINTER                           :: box
853      TYPE(pw_grid_type), POINTER                        :: grid
854      TYPE(pw_p_type)                                    :: ca, cb, cc
855
856!..set fft lib
857
858      CALL init_fft(globenv%default_fft_library, alltoall=.FALSE., fftsg_sizes=.TRUE., &
859                    pool_limit=globenv%fft_pool_scratch_limit, &
860                    wisdom_file=globenv%fftw_wisdom_file_name, &
861                    plan_style=globenv%fftw_plan_type)
862
863      !..the unit cell (should not really matter, the number of grid points do)
864      NULLIFY (box, grid)
865      CALL cell_create(box)
866      box%hmat = RESHAPE((/10.0_dp, 0.0_dp, 0.0_dp, 0.0_dp, 8.0_dp, 0.0_dp, &
867                           0.0_dp, 0.0_dp, 7.0_dp/), (/3, 3/))
868      CALL init_cell(box)
869
870      CALL section_vals_get(pw_transfer_section, n_repetition=n_rep)
871      DO i_rep = 1, n_rep
872
873         ! how often should we do the transfer
874         CALL section_vals_val_get(pw_transfer_section, "N_loop", i_rep_section=i_rep, i_val=N_loop)
875         ALLOCATE (t_start(N_loop))
876         ALLOCATE (t_end(N_loop))
877
878         ! setup of the grids
879         CALL section_vals_val_get(pw_transfer_section, "GRID", i_rep_section=i_rep, i_vals=i_vals)
880         np = i_vals
881
882         CALL section_vals_val_get(pw_transfer_section, "PW_GRID_BLOCKED", i_rep_section=i_rep, i_val=blocked_id)
883         CALL section_vals_val_get(pw_transfer_section, "DEBUG", i_rep_section=i_rep, l_val=debug)
884
885         CALL section_vals_val_get(pw_transfer_section, "PW_GRID_LAYOUT_ALL", i_rep_section=i_rep, &
886                                   l_val=pw_grid_layout_all)
887
888         ! prepare to loop over all or a specific layout
889         IF (pw_grid_layout_all) THEN
890            ! count layouts that fit
891            itmp = 0
892            ! start from 2, (/1,para_env%num_pe/) is not supported
893            DO p = 2, para_env%num_pe
894               q = para_env%num_pe/p
895               IF (p*q == para_env%num_pe) THEN
896                  itmp = itmp + 1
897               ENDIF
898            ENDDO
899            ! build list
900            ALLOCATE (layouts(2, itmp))
901            itmp = 0
902            DO p = 2, para_env%num_pe
903               q = para_env%num_pe/p
904               IF (p*q == para_env%num_pe) THEN
905                  itmp = itmp + 1
906                  layouts(:, itmp) = (/p, q/)
907               ENDIF
908            ENDDO
909         ELSE
910            CALL section_vals_val_get(pw_transfer_section, "PW_GRID_LAYOUT", i_rep_section=i_rep, i_vals=i_vals)
911            ALLOCATE (layouts(2, 1))
912            layouts(:, 1) = i_vals
913         ENDIF
914
915         DO i_layout = 1, SIZE(layouts, 2)
916
917            distribution_layout = layouts(:, i_layout)
918
919            CALL pw_grid_create(grid, para_env%group)
920
921            CALL section_vals_val_get(pw_transfer_section, "PW_GRID", i_rep_section=i_rep, i_val=itmp)
922
923            ! from cp_control_utils
924            SELECT CASE (itmp)
925            CASE (do_pwgrid_spherical)
926               spherical = .TRUE.
927               is_fullspace = .FALSE.
928            CASE (do_pwgrid_ns_fullspace)
929               spherical = .FALSE.
930               is_fullspace = .TRUE.
931            CASE (do_pwgrid_ns_halfspace)
932               spherical = .FALSE.
933               is_fullspace = .FALSE.
934            END SELECT
935
936            ! from pw_env_methods
937            IF (spherical) THEN
938               grid_span = HALFSPACE
939               spherical = .TRUE.
940               odd = .TRUE.
941            ELSE IF (is_fullspace) THEN
942               grid_span = FULLSPACE
943               spherical = .FALSE.
944               odd = .FALSE.
945            ELSE
946               grid_span = HALFSPACE
947               spherical = .FALSE.
948               odd = .TRUE.
949            END IF
950
951            ! actual setup
952            CALL pw_grid_setup(box%hmat, grid, grid_span=grid_span, odd=odd, spherical=spherical, &
953                               blocked=blocked_id, npts=np, fft_usage=.TRUE., &
954                               rs_dims=distribution_layout, iounit=iw)
955
956            IF (iw > 0) CALL m_flush(iw)
957
958            ! note that the number of grid points might be different from what the user requested (fft-able needed)
959            no = grid%npts
960
961            NULLIFY (ca%pw)
962            NULLIFY (cb%pw)
963            NULLIFY (cc%pw)
964
965            CALL pw_create(ca%pw, grid, COMPLEXDATA1D)
966            CALL pw_create(cb%pw, grid, COMPLEXDATA3D)
967            CALL pw_create(cc%pw, grid, COMPLEXDATA1D)
968
969            ! initialize data
970            CALL pw_zero(ca%pw)
971            CALL pw_zero(cb%pw)
972            CALL pw_zero(cc%pw)
973            ca%pw%in_space = RECIPROCALSPACE
974            nn = SIZE(ca%pw%cc)
975            DO ig = 1, nn
976               gsq = grid%gsq(ig)
977               ca%pw%cc(ig) = EXP(-gsq)
978            END DO
979
980            flops = PRODUCT(no)*30.0_dp*LOG(REAL(MAXVAL(no), KIND=dp))
981            tstart = m_walltime()
982            DO ip = 1, n_loop
983               CALL mp_sync(para_env%group)
984               t_start(ip) = m_walltime()
985               CALL pw_transfer(ca%pw, cb%pw, debug)
986               CALL pw_transfer(cb%pw, cc%pw, debug)
987               CALL mp_sync(para_env%group)
988               t_end(ip) = m_walltime()
989            END DO
990            tend = m_walltime()
991            t = tend - tstart + threshold
992            IF (t > 0.0_dp) THEN
993               perf = REAL(n_loop, KIND=dp)*2.0_dp*flops*1.e-6_dp/t
994            ELSE
995               perf = 0.0_dp
996            END IF
997
998            em = MAXVAL(ABS(ca%pw%cc(:) - cc%pw%cc(:)))
999            CALL mp_max(em, para_env%group)
1000            et = SUM(ABS(ca%pw%cc(:) - cc%pw%cc(:)))
1001            CALL mp_sum(et, para_env%group)
1002            t_min = MINVAL(t_end - t_start)
1003            t_max = MAXVAL(t_end - t_start)
1004
1005            IF (para_env%ionode) THEN
1006               WRITE (iw, *)
1007               WRITE (iw, '(A,T67,E14.6)') " Parallel FFT Tests: Maximal Error ", em
1008               WRITE (iw, '(A,T67,E14.6)') " Parallel FFT Tests: Total Error ", et
1009               WRITE (iw, '(A,T67,F14.0)') &
1010                  " Parallel FFT Tests: Performance [Mflops] ", perf
1011               WRITE (iw, '(A,T67,F14.6)') " Best time : ", t_min
1012               WRITE (iw, '(A,T67,F14.6)') " Worst time: ", t_max
1013               IF (iw > 0) CALL m_flush(iw)
1014            END IF
1015
1016            ! need debugging ???
1017            IF (em > toler .OR. et > toler) THEN
1018               CPWARN("The FFT results are not accurate ... starting debug pw_transfer")
1019               CALL pw_transfer(ca%pw, cb%pw, .TRUE.)
1020               CALL pw_transfer(cb%pw, cc%pw, .TRUE.)
1021            ENDIF
1022
1023            ! done with these grids
1024            CALL pw_release(ca%pw)
1025            CALL pw_release(cb%pw)
1026            CALL pw_release(cc%pw)
1027            CALL pw_grid_release(grid)
1028
1029         END DO
1030
1031         ! local arrays
1032         DEALLOCATE (layouts)
1033         DEALLOCATE (t_start)
1034         DEALLOCATE (t_end)
1035
1036      ENDDO
1037
1038      ! cleanup
1039      CALL cell_release(box)
1040      CALL finalize_fft(para_env, wisdom_file=globenv%fftw_wisdom_file_name)
1041
1042   END SUBROUTINE pw_fft_test
1043
1044! **************************************************************************************************
1045!> \brief Test the parallel (pseudo)random number generator (RNG).
1046!> \param para_env ...
1047!> \param output_unit ...
1048!> \par History
1049!>      JGH  6-Feb-2001 : Test and performance code
1050!> \author JGH  1-JAN-2001
1051! **************************************************************************************************
1052   SUBROUTINE rng_test(para_env, output_unit)
1053      TYPE(cp_para_env_type), POINTER                    :: para_env
1054      INTEGER                                            :: output_unit
1055
1056      CHARACTER(LEN=*), PARAMETER :: routineN = 'rng_test', routineP = moduleN//':'//routineN
1057
1058      INTEGER                                            :: i, n
1059      LOGICAL                                            :: ionode
1060      REAL(KIND=dp)                                      :: t, tend, tmax, tmin, tstart, tsum, tsum2
1061      TYPE(rng_stream_type), POINTER                     :: rng_stream
1062
1063      ionode = para_env%ionode
1064      n = runtest(9)
1065      NULLIFY (rng_stream)
1066
1067      ! Check correctness
1068
1069      CALL check_rng(output_unit, ionode)
1070
1071      ! Check performance
1072
1073      IF (ionode) THEN
1074         WRITE (UNIT=output_unit, FMT="(/,/,T2,A,I10,A)") &
1075            "Check distributions using", n, " random numbers:"
1076      END IF
1077
1078      ! Test uniform distribution [0,1]
1079
1080      CALL create_rng_stream(rng_stream=rng_stream, &
1081                             name="Test uniform distribution [0,1]", &
1082                             distribution_type=UNIFORM, &
1083                             extended_precision=.TRUE.)
1084
1085      IF (ionode) THEN
1086         CALL write_rng_stream(rng_stream, output_unit, write_all=.TRUE.)
1087      END IF
1088
1089      tmax = -HUGE(0.0_dp)
1090      tmin = +HUGE(0.0_dp)
1091      tsum = 0.0_dp
1092      tsum2 = 0.0_dp
1093
1094      tstart = m_walltime()
1095      DO i = 1, n
1096         t = next_random_number(rng_stream)
1097         tsum = tsum + t
1098         tsum2 = tsum2 + t*t
1099         IF (t > tmax) tmax = t
1100         IF (t < tmin) tmin = t
1101      END DO
1102      tend = m_walltime()
1103
1104      IF (ionode) THEN
1105         WRITE (UNIT=output_unit, FMT="(/,(T4,A,F12.6))") &
1106            "Minimum: ", tmin, &
1107            "Maximum: ", tmax, &
1108            "Average: ", tsum/REAL(n, KIND=dp), &
1109            "Variance:", tsum2/REAL(n, KIND=dp), &
1110            "Time [s]:", tend - tstart
1111      END IF
1112
1113      CALL delete_rng_stream(rng_stream)
1114
1115      ! Test normal Gaussian distribution
1116
1117      CALL create_rng_stream(rng_stream=rng_stream, &
1118                             name="Test normal Gaussian distribution", &
1119                             distribution_type=GAUSSIAN, &
1120                             extended_precision=.TRUE.)
1121
1122      tmax = -HUGE(0.0_dp)
1123      tmin = +HUGE(0.0_dp)
1124      tsum = 0.0_dp
1125      tsum2 = 0.0_dp
1126
1127      tstart = m_walltime()
1128      DO i = 1, n
1129         t = next_random_number(rng_stream)
1130         tsum = tsum + t
1131         tsum2 = tsum2 + t*t
1132         IF (t > tmax) tmax = t
1133         IF (t < tmin) tmin = t
1134      END DO
1135      tend = m_walltime()
1136
1137      IF (ionode) THEN
1138         CALL write_rng_stream(rng_stream, output_unit)
1139         WRITE (UNIT=output_unit, FMT="(/,(T4,A,F12.6))") &
1140            "Minimum: ", tmin, &
1141            "Maximum: ", tmax, &
1142            "Average: ", tsum/REAL(n, KIND=dp), &
1143            "Variance:", tsum2/REAL(n, KIND=dp), &
1144            "Time [s]:", tend - tstart
1145      END IF
1146
1147      CALL delete_rng_stream(rng_stream)
1148
1149   END SUBROUTINE rng_test
1150
1151! **************************************************************************************************
1152!> \brief Tests the eigensolver library routines
1153!> \param para_env ...
1154!> \param iw ...
1155!> \param eigensolver_section ...
1156!> \par History
1157!>      JGH  6-Feb-2001 : Test and performance code
1158!> \author JGH  1-JAN-2001
1159! **************************************************************************************************
1160   SUBROUTINE eigensolver_test(para_env, iw, eigensolver_section)
1161
1162      TYPE(cp_para_env_type), POINTER                    :: para_env
1163      INTEGER                                            :: iw
1164      TYPE(section_vals_type), POINTER                   :: eigensolver_section
1165
1166      CHARACTER(LEN=*), PARAMETER :: routineN = 'eigensolver_test', &
1167         routineP = moduleN//':'//routineN
1168
1169      INTEGER                                            :: diag_method, group, i, i_loop, i_rep, &
1170                                                            init_method, j, n, n_loop, n_rep, &
1171                                                            neig, source, unit_number
1172      REAL(KIND=dp)                                      :: t1, t2
1173      REAL(KIND=dp), ALLOCATABLE, DIMENSION(:)           :: eigenvalues
1174      REAL(KIND=dp), ALLOCATABLE, DIMENSION(:, :)        :: buffer
1175      TYPE(cp_blacs_env_type), POINTER                   :: blacs_env
1176      TYPE(cp_fm_struct_type), POINTER                   :: fmstruct
1177      TYPE(cp_fm_type), POINTER                          :: eigenvectors, matrix, work
1178      TYPE(rng_stream_type), POINTER                     :: rng_stream
1179
1180      group = para_env%group
1181      source = para_env%source
1182
1183      IF (iw > 0) THEN
1184         WRITE (UNIT=iw, FMT="(/,/,T2,A,/)") "EIGENSOLVER TEST"
1185      END IF
1186
1187      ! create blacs env corresponding to para_env
1188      NULLIFY (blacs_env)
1189      CALL cp_blacs_env_create(blacs_env=blacs_env, &
1190                               para_env=para_env)
1191
1192      ! loop over all tests
1193      CALL section_vals_get(eigensolver_section, n_repetition=n_rep)
1194      DO i_rep = 1, n_rep
1195
1196         ! parse section
1197         CALL section_vals_val_get(eigensolver_section, "N", i_rep_section=i_rep, i_val=n)
1198         CALL section_vals_val_get(eigensolver_section, "EIGENVALUES", i_rep_section=i_rep, i_val=neig)
1199         CALL section_vals_val_get(eigensolver_section, "DIAG_METHOD", i_rep_section=i_rep, i_val=diag_method)
1200         CALL section_vals_val_get(eigensolver_section, "INIT_METHOD", i_rep_section=i_rep, i_val=init_method)
1201         CALL section_vals_val_get(eigensolver_section, "N_loop", i_rep_section=i_rep, i_val=n_loop)
1202
1203         ! proper number of eigs
1204         IF (neig < 0) neig = n
1205         neig = MIN(neig, n)
1206
1207         ! report
1208         IF (iw > 0) THEN
1209            WRITE (iw, *) "Matrix size", n
1210            WRITE (iw, *) "Number of eigenvalues", neig
1211            WRITE (iw, *) "Timing loops", n_loop
1212            SELECT CASE (diag_method)
1213            CASE (do_diag_syevd)
1214               WRITE (iw, *) "Diag using syevd"
1215            CASE (do_diag_syevx)
1216               WRITE (iw, *) "Diag using syevx"
1217            CASE DEFAULT
1218               ! stop
1219            END SELECT
1220
1221            SELECT CASE (init_method)
1222            CASE (do_mat_random)
1223               WRITE (iw, *) "using random matrix"
1224            CASE (do_mat_read)
1225               WRITE (iw, *) "reading from file"
1226            CASE DEFAULT
1227               ! stop
1228            END SELECT
1229         ENDIF
1230
1231         ! create matrix struct type
1232         NULLIFY (fmstruct)
1233         CALL cp_fm_struct_create(fmstruct=fmstruct, &
1234                                  para_env=para_env, &
1235                                  context=blacs_env, &
1236                                  nrow_global=n, &
1237                                  ncol_global=n)
1238
1239         ! create all needed matrices, and buffers for the eigenvalues
1240         NULLIFY (matrix)
1241         CALL cp_fm_create(matrix=matrix, &
1242                           matrix_struct=fmstruct, &
1243                           name="MATRIX")
1244         CALL cp_fm_set_all(matrix, 0.0_dp)
1245
1246         NULLIFY (eigenvectors)
1247         CALL cp_fm_create(matrix=eigenvectors, &
1248                           matrix_struct=fmstruct, &
1249                           name="EIGENVECTORS")
1250         CALL cp_fm_set_all(eigenvectors, 0.0_dp)
1251
1252         NULLIFY (work)
1253         CALL cp_fm_create(matrix=work, &
1254                           matrix_struct=fmstruct, &
1255                           name="WORK")
1256         CALL cp_fm_set_all(matrix, 0.0_dp)
1257
1258         ALLOCATE (eigenvalues(n))
1259         eigenvalues = 0.0_dp
1260         ALLOCATE (buffer(1, n))
1261
1262         ! generate initial matrix, either by reading a file, or using random numbers
1263         IF (para_env%ionode) THEN
1264            SELECT CASE (init_method)
1265            CASE (do_mat_random)
1266               NULLIFY (rng_stream)
1267               CALL create_rng_stream(rng_stream=rng_stream, &
1268                                      name="rng_stream", &
1269                                      distribution_type=UNIFORM, &
1270                                      extended_precision=.TRUE.)
1271            CASE (do_mat_read)
1272               CALL open_file(file_name="MATRIX", &
1273                              file_action="READ", &
1274                              file_form="FORMATTED", &
1275                              file_status="OLD", &
1276                              unit_number=unit_number)
1277            END SELECT
1278         END IF
1279
1280         DO i = 1, n
1281            IF (para_env%ionode) THEN
1282               SELECT CASE (init_method)
1283               CASE (do_mat_random)
1284                  DO j = i, n
1285                     buffer(1, j) = next_random_number(rng_stream) - 0.5_dp
1286                  END DO
1287                  !MK activate/modify for a diagonal dominant symmetric matrix:
1288                  !MK buffer(1,i) = 10.0_dp*buffer(1,i)
1289               CASE (do_mat_read)
1290                  READ (UNIT=unit_number, FMT=*) buffer(1, 1:n)
1291               END SELECT
1292            END IF
1293            CALL mp_bcast(buffer, source, group)
1294            SELECT CASE (init_method)
1295            CASE (do_mat_random)
1296               CALL cp_fm_set_submatrix(fm=matrix, &
1297                                        new_values=buffer, &
1298                                        start_row=i, &
1299                                        start_col=i, &
1300                                        n_rows=1, &
1301                                        n_cols=n - i + 1, &
1302                                        alpha=1.0_dp, &
1303                                        beta=0.0_dp, &
1304                                        transpose=.FALSE.)
1305               CALL cp_fm_set_submatrix(fm=matrix, &
1306                                        new_values=buffer, &
1307                                        start_row=i, &
1308                                        start_col=i, &
1309                                        n_rows=n - i + 1, &
1310                                        n_cols=1, &
1311                                        alpha=1.0_dp, &
1312                                        beta=0.0_dp, &
1313                                        transpose=.TRUE.)
1314            CASE (do_mat_read)
1315               CALL cp_fm_set_submatrix(fm=matrix, &
1316                                        new_values=buffer, &
1317                                        start_row=i, &
1318                                        start_col=1, &
1319                                        n_rows=1, &
1320                                        n_cols=n, &
1321                                        alpha=1.0_dp, &
1322                                        beta=0.0_dp, &
1323                                        transpose=.FALSE.)
1324            END SELECT
1325         END DO
1326
1327         DEALLOCATE (buffer)
1328
1329         IF (para_env%ionode) THEN
1330            SELECT CASE (init_method)
1331            CASE (do_mat_random)
1332               CALL delete_rng_stream(rng_stream=rng_stream)
1333            CASE (do_mat_read)
1334               CALL close_file(unit_number=unit_number)
1335            END SELECT
1336         END IF
1337
1338         DO i_loop = 1, n_loop
1339            eigenvalues = 0.0_dp
1340            CALL cp_fm_set_all(eigenvectors, 0.0_dp)
1341            CALL cp_fm_to_fm(source=matrix, &
1342                             destination=work)
1343
1344            ! DONE, now testing
1345            t1 = m_walltime()
1346            SELECT CASE (diag_method)
1347            CASE (do_diag_syevd)
1348               CALL cp_fm_syevd(matrix=work, &
1349                                eigenvectors=eigenvectors, &
1350                                eigenvalues=eigenvalues)
1351            CASE (do_diag_syevx)
1352               CALL cp_fm_syevx(matrix=work, &
1353                                eigenvectors=eigenvectors, &
1354                                eigenvalues=eigenvalues, &
1355                                neig=neig, &
1356                                work_syevx=1.0_dp)
1357            END SELECT
1358            t2 = m_walltime()
1359            IF (iw > 0) WRITE (iw, *) "Timing for loop ", i_loop, " : ", t2 - t1
1360         ENDDO
1361
1362         IF (iw > 0) THEN
1363            WRITE (iw, *) "Eigenvalues: "
1364            WRITE (UNIT=iw, FMT="(T3,5F14.6)") eigenvalues(1:neig)
1365            WRITE (UNIT=iw, FMT="(T3,A4,F16.6)") "Sum:", SUM(eigenvalues(1:neig))
1366            WRITE (iw, *) ""
1367         END IF
1368
1369         ! Clean up
1370         DEALLOCATE (eigenvalues)
1371         CALL cp_fm_release(matrix=work)
1372         CALL cp_fm_release(matrix=eigenvectors)
1373         CALL cp_fm_release(matrix=matrix)
1374         CALL cp_fm_struct_release(fmstruct=fmstruct)
1375
1376      ENDDO
1377
1378      CALL cp_blacs_env_release(blacs_env=blacs_env)
1379
1380   END SUBROUTINE eigensolver_test
1381
1382! **************************************************************************************************
1383!> \brief Tests the parallel matrix multiply
1384!> \param para_env ...
1385!> \param iw ...
1386!> \param cp_fm_gemm_test_section ...
1387! **************************************************************************************************
1388   SUBROUTINE cp_fm_gemm_test(para_env, iw, cp_fm_gemm_test_section)
1389
1390      TYPE(cp_para_env_type), POINTER          :: para_env
1391      INTEGER                                  :: iw
1392      TYPE(section_vals_type), POINTER         :: cp_fm_gemm_test_section
1393
1394      CHARACTER(LEN=*), PARAMETER :: routineN = 'cp_fm_gemm_test', &
1395                                     routineP = moduleN//':'//routineN
1396
1397      CHARACTER(LEN=1)                         :: transa, transb
1398      INTEGER :: i_loop, i_rep, k, m, n, N_loop, n_rep, ncol_block, &
1399                 ncol_block_actual, ncol_global, nrow_block, nrow_block_actual, &
1400                 nrow_global
1401      INTEGER, DIMENSION(:), POINTER           :: grid_2d
1402      LOGICAL                                  :: force_blocksize, row_major, &
1403                                                  transa_p, transb_p
1404      REAL(KIND=dp)                            :: t1, t2, t3, t4
1405      TYPE(cp_blacs_env_type), POINTER         :: blacs_env
1406      TYPE(cp_fm_struct_type), POINTER         :: fmstruct_a, fmstruct_b, &
1407                                                  fmstruct_c
1408      TYPE(cp_fm_type), POINTER                :: matrix_a, matrix_b, matrix_c
1409#if defined(__SCALAPACK)
1410      INTEGER                                  :: pilaenv
1411#endif
1412
1413      CALL section_vals_get(cp_fm_gemm_test_section, n_repetition=n_rep)
1414      DO i_rep = 1, n_rep
1415
1416         ! how often should we do the multiply
1417         CALL section_vals_val_get(cp_fm_gemm_test_section, "N_loop", i_rep_section=i_rep, i_val=N_loop)
1418
1419         ! matrices def.
1420         CALL section_vals_val_get(cp_fm_gemm_test_section, "K", i_rep_section=i_rep, i_val=k)
1421         CALL section_vals_val_get(cp_fm_gemm_test_section, "N", i_rep_section=i_rep, i_val=n)
1422         CALL section_vals_val_get(cp_fm_gemm_test_section, "M", i_rep_section=i_rep, i_val=m)
1423         CALL section_vals_val_get(cp_fm_gemm_test_section, "transa", i_rep_section=i_rep, l_val=transa_p)
1424         CALL section_vals_val_get(cp_fm_gemm_test_section, "transb", i_rep_section=i_rep, l_val=transb_p)
1425         CALL section_vals_val_get(cp_fm_gemm_test_section, "nrow_block", i_rep_section=i_rep, i_val=nrow_block)
1426         CALL section_vals_val_get(cp_fm_gemm_test_section, "ncol_block", i_rep_section=i_rep, i_val=ncol_block)
1427         CALL section_vals_val_get(cp_fm_gemm_test_section, "ROW_MAJOR", i_rep_section=i_rep, l_val=row_major)
1428         CALL section_vals_val_get(cp_fm_gemm_test_section, "GRID_2D", i_rep_section=i_rep, i_vals=grid_2d)
1429         CALL section_vals_val_get(cp_fm_gemm_test_section, "FORCE_BLOCKSIZE", i_rep_section=i_rep, l_val=force_blocksize)
1430         transa = "N"
1431         transb = "N"
1432         IF (transa_p) transa = "T"
1433         IF (transb_p) transb = "T"
1434
1435         IF (iw > 0) THEN
1436            WRITE (iw, '(T2,A)') "----------- TESTING PARALLEL MATRIX MULTIPLY -------------"
1437            WRITE (iw, '(T2,A)', ADVANCE="NO") "C = "
1438            IF (transa_p) THEN
1439               WRITE (iw, '(A)', ADVANCE="NO") "TRANSPOSE(A) x"
1440            ELSE
1441               WRITE (iw, '(A)', ADVANCE="NO") "A x "
1442            ENDIF
1443            IF (transb_p) THEN
1444               WRITE (iw, '(A)') "TRANSPOSE(B) "
1445            ELSE
1446               WRITE (iw, '(A)') "B "
1447            ENDIF
1448            WRITE (iw, '(T2,A,T50,I5,A,I5)') 'requested block size', nrow_block, ' by ', ncol_block
1449            WRITE (iw, '(T2,A,T50,I5)') 'number of repetitions of cp_fm_gemm ', n_loop
1450            WRITE (iw, '(T2,A,T50,L5)') 'Row Major', row_major
1451            WRITE (iw, '(T2,A,T50,2I7)') 'GRID_2D ', grid_2d
1452            WRITE (iw, '(T2,A,T50,L5)') 'Force blocksize ', force_blocksize
1453            ! check the return value of pilaenv, too small values limit the performance (assuming pdgemm is the vanilla variant)
1454#if defined(__SCALAPACK)
1455            WRITE (iw, '(T2,A,T50,I5)') 'PILAENV blocksize', pilaenv(0, 'D')
1456#endif
1457         ENDIF
1458
1459         NULLIFY (blacs_env)
1460         CALL cp_blacs_env_create(blacs_env=blacs_env, &
1461                                  para_env=para_env, &
1462                                  row_major=row_major, &
1463                                  grid_2d=grid_2d)
1464
1465         NULLIFY (fmstruct_a)
1466         IF (transa_p) THEN
1467            nrow_global = m; ncol_global = k
1468         ELSE
1469            nrow_global = k; ncol_global = m
1470         ENDIF
1471         CALL cp_fm_struct_create(fmstruct=fmstruct_a, para_env=para_env, context=blacs_env, &
1472                                  nrow_global=nrow_global, ncol_global=ncol_global, &
1473                                  nrow_block=nrow_block, ncol_block=ncol_block, force_block=force_blocksize)
1474         CALL cp_fm_struct_get(fmstruct_a, nrow_block=nrow_block_actual, ncol_block=ncol_block_actual)
1475         IF (iw > 0) WRITE (iw, '(T2,A,I9,A,I9,A,I5,A,I5)') 'matrix A ', nrow_global, " by ", ncol_global, &
1476            ' using blocks of ', nrow_block_actual, ' by ', ncol_block_actual
1477
1478         IF (transb_p) THEN
1479            nrow_global = n; ncol_global = m
1480         ELSE
1481            nrow_global = m; ncol_global = n
1482         ENDIF
1483         NULLIFY (fmstruct_b)
1484         CALL cp_fm_struct_create(fmstruct=fmstruct_b, para_env=para_env, context=blacs_env, &
1485                                  nrow_global=nrow_global, ncol_global=ncol_global, &
1486                                  nrow_block=nrow_block, ncol_block=ncol_block, force_block=force_blocksize)
1487         CALL cp_fm_struct_get(fmstruct_b, nrow_block=nrow_block_actual, ncol_block=ncol_block_actual)
1488         IF (iw > 0) WRITE (iw, '(T2,A,I9,A,I9,A,I5,A,I5)') 'matrix B ', nrow_global, " by ", ncol_global, &
1489            ' using blocks of ', nrow_block_actual, ' by ', ncol_block_actual
1490
1491         NULLIFY (fmstruct_c)
1492         nrow_global = k
1493         ncol_global = n
1494         CALL cp_fm_struct_create(fmstruct=fmstruct_c, para_env=para_env, context=blacs_env, &
1495                                  nrow_global=nrow_global, ncol_global=ncol_global, &
1496                                  nrow_block=nrow_block, ncol_block=ncol_block, force_block=force_blocksize)
1497         CALL cp_fm_struct_get(fmstruct_c, nrow_block=nrow_block_actual, ncol_block=ncol_block_actual)
1498         IF (iw > 0) WRITE (iw, '(T2,A,I9,A,I9,A,I5,A,I5)') 'matrix C ', nrow_global, " by ", ncol_global, &
1499            ' using blocks of ', nrow_block_actual, ' by ', ncol_block_actual
1500
1501         NULLIFY (matrix_a)
1502         CALL cp_fm_create(matrix=matrix_a, matrix_struct=fmstruct_a, name="MATRIX A")
1503         NULLIFY (matrix_b)
1504         CALL cp_fm_create(matrix=matrix_b, matrix_struct=fmstruct_b, name="MATRIX B")
1505         NULLIFY (matrix_c)
1506         CALL cp_fm_create(matrix=matrix_c, matrix_struct=fmstruct_c, name="MATRIX C")
1507
1508         CALL RANDOM_NUMBER(matrix_a%local_data)
1509         CALL RANDOM_NUMBER(matrix_b%local_data)
1510         CALL RANDOM_NUMBER(matrix_c%local_data)
1511
1512         IF (iw > 0) CALL m_flush(iw)
1513
1514         t1 = m_walltime()
1515         DO i_loop = 1, N_loop
1516            t3 = m_walltime()
1517            CALL cp_gemm(transa, transb, k, n, m, 1.0_dp, matrix_a, matrix_b, 0.0_dp, matrix_c)
1518            t4 = m_walltime()
1519            IF (iw > 0) THEN
1520               WRITE (iw, '(T2,A,T50,F12.6)') "cp_fm_gemm timing: ", (t4 - t3)
1521               CALL m_flush(iw)
1522            ENDIF
1523         ENDDO
1524         t2 = m_walltime()
1525
1526         IF (iw > 0) THEN
1527            WRITE (iw, '(T2,A,T50,F12.6)') "average cp_fm_gemm timing: ", (t2 - t1)/N_loop
1528            IF (t2 > t1) THEN
1529               WRITE (iw, '(T2,A,T50,F12.6)') "cp_fm_gemm Gflops per MPI task: ", &
1530                  2*REAL(m, kind=dp)*REAL(n, kind=dp)*REAL(k, kind=dp)*N_loop/MAX(0.001_dp, t2 - t1)/1.0E9_dp/para_env%num_pe
1531            ENDIF
1532         ENDIF
1533
1534         CALL cp_fm_release(matrix=matrix_a)
1535         CALL cp_fm_release(matrix=matrix_b)
1536         CALL cp_fm_release(matrix=matrix_c)
1537         CALL cp_fm_struct_release(fmstruct=fmstruct_a)
1538         CALL cp_fm_struct_release(fmstruct=fmstruct_b)
1539         CALL cp_fm_struct_release(fmstruct=fmstruct_c)
1540         CALL cp_blacs_env_release(blacs_env=blacs_env)
1541
1542      ENDDO
1543
1544   END SUBROUTINE cp_fm_gemm_test
1545
1546! **************************************************************************************************
1547!> \brief Tests the DBCSR interface.
1548!> \param para_env ...
1549!> \param iw ...
1550!> \param input_section ...
1551! **************************************************************************************************
1552   SUBROUTINE cp_dbcsr_tests(para_env, iw, input_section)
1553
1554      TYPE(cp_para_env_type), POINTER                    :: para_env
1555      INTEGER                                            :: iw
1556      TYPE(section_vals_type), POINTER                   :: input_section
1557
1558      CHARACTER(LEN=*), PARAMETER :: routineN = 'cp_dbcsr_tests', routineP = moduleN//':'//routineN
1559
1560      CHARACTER, DIMENSION(3)                            :: types
1561      INTEGER                                            :: data_type, i_rep, k, m, n, N_loop, &
1562                                                            n_rep, test_type
1563      INTEGER, DIMENSION(:), POINTER                     :: bs_k, bs_m, bs_n, nproc
1564      LOGICAL                                            :: always_checksum, retain_sparsity, &
1565                                                            transa_p, transb_p
1566      REAL(KIND=dp)                                      :: alpha, beta, filter_eps, s_a, s_b, s_c
1567
1568!   ---------------------------------------------------------------------------
1569
1570      NULLIFY (bs_m, bs_n, bs_k)
1571      CALL section_vals_get(input_section, n_repetition=n_rep)
1572      CALL dbcsr_reset_randmat_seed()
1573      DO i_rep = 1, n_rep
1574         ! how often should we do the multiply
1575         CALL section_vals_val_get(input_section, "N_loop", i_rep_section=i_rep, i_val=N_loop)
1576
1577         ! matrices def.
1578         CALL section_vals_val_get(input_section, "TEST_TYPE", i_rep_section=i_rep, i_val=test_type)
1579         CALL section_vals_val_get(input_section, "DATA_TYPE", i_rep_section=i_rep, i_val=data_type)
1580         CALL section_vals_val_get(input_section, "K", i_rep_section=i_rep, i_val=k)
1581         CALL section_vals_val_get(input_section, "N", i_rep_section=i_rep, i_val=n)
1582         CALL section_vals_val_get(input_section, "M", i_rep_section=i_rep, i_val=m)
1583         CALL section_vals_val_get(input_section, "transa", i_rep_section=i_rep, l_val=transa_p)
1584         CALL section_vals_val_get(input_section, "transb", i_rep_section=i_rep, l_val=transb_p)
1585         CALL section_vals_val_get(input_section, "bs_m", i_rep_section=i_rep, &
1586                                   i_vals=bs_m)
1587         CALL section_vals_val_get(input_section, "bs_n", i_rep_section=i_rep, &
1588                                   i_vals=bs_n)
1589         CALL section_vals_val_get(input_section, "bs_k", i_rep_section=i_rep, &
1590                                   i_vals=bs_k)
1591         CALL section_vals_val_get(input_section, "keepsparse", i_rep_section=i_rep, l_val=retain_sparsity)
1592         CALL section_vals_val_get(input_section, "asparsity", i_rep_section=i_rep, r_val=s_a)
1593         CALL section_vals_val_get(input_section, "bsparsity", i_rep_section=i_rep, r_val=s_b)
1594         CALL section_vals_val_get(input_section, "csparsity", i_rep_section=i_rep, r_val=s_c)
1595         CALL section_vals_val_get(input_section, "alpha", i_rep_section=i_rep, r_val=alpha)
1596         CALL section_vals_val_get(input_section, "beta", i_rep_section=i_rep, r_val=beta)
1597         CALL section_vals_val_get(input_section, "nproc", i_rep_section=i_rep, &
1598                                   i_vals=nproc)
1599         CALL section_vals_val_get(input_section, "atype", i_rep_section=i_rep, &
1600                                   c_val=types(1))
1601         CALL section_vals_val_get(input_section, "btype", i_rep_section=i_rep, &
1602                                   c_val=types(2))
1603         CALL section_vals_val_get(input_section, "ctype", i_rep_section=i_rep, &
1604                                   c_val=types(3))
1605         CALL section_vals_val_get(input_section, "filter_eps", &
1606                                   i_rep_section=i_rep, r_val=filter_eps)
1607         CALL section_vals_val_get(input_section, "ALWAYS_CHECKSUM", i_rep_section=i_rep, l_val=always_checksum)
1608
1609         CALL dbcsr_run_tests(para_env%group, iw, nproc, &
1610                              (/m, n, k/), &
1611                              (/transa_p, transb_p/), &
1612                              bs_m, bs_n, bs_k, &
1613                              (/s_a, s_b, s_c/), &
1614                              alpha, beta, &
1615                              data_type=data_type, &
1616                              test_type=test_type, &
1617                              n_loops=n_loop, eps=filter_eps, retain_sparsity=retain_sparsity, &
1618                              always_checksum=always_checksum)
1619      END DO
1620   END SUBROUTINE cp_dbcsr_tests
1621
1622END MODULE library_tests
1623