1!    This file is part of ELPA.
2!
3!    The ELPA library was originally created by the ELPA consortium,
4!    consisting of the following organizations:
5!
6!    - Max Planck Computing and Data Facility (MPCDF), formerly known as
7!      Rechenzentrum Garching der Max-Planck-Gesellschaft (RZG),
8!    - Bergische Universität Wuppertal, Lehrstuhl für angewandte
9!      Informatik,
10!    - Technische Universität München, Lehrstuhl für Informatik mit
11!      Schwerpunkt Wissenschaftliches Rechnen ,
12!    - Fritz-Haber-Institut, Berlin, Abt. Theorie,
13!    - Max-Plack-Institut für Mathematik in den Naturwissenschaften,
14!      Leipzig, Abt. Komplexe Strukutren in Biologie und Kognition,
15!      and
16!    - IBM Deutschland GmbH
17!
18!
19!    More information can be found here:
20!    http://elpa.mpcdf.mpg.de/
21!
22!    ELPA is free software: you can redistribute it and/or modify
23!    it under the terms of the version 3 of the license of the
24!    GNU Lesser General Public License as published by the Free
25!    Software Foundation.
26!
27!    ELPA is distributed in the hope that it will be useful,
28!    but WITHOUT ANY WARRANTY; without even the implied warranty of
29!    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
30!    GNU Lesser General Public License for more details.
31!
32!    You should have received a copy of the GNU Lesser General Public License
33!    along with ELPA.  If not, see <http://www.gnu.org/licenses/>
34!
35!    ELPA reflects a substantial effort on the part of the original
36!    ELPA consortium, and we ask you to respect the spirit of the
37!    license that we chose: i.e., please contribute any changes you
38!    may have back to the original ELPA library distribution, and keep
39!    any derivatives of ELPA under the same license that we chose for
40!    the original distribution, the GNU Lesser General Public License.
41!
42!
43#include "config-f90.h"
44
45! Define one of TEST_REAL or TEST_COMPLEX
46! Define one of TEST_SINGLE or TEST_DOUBLE
47! Define one of TEST_SOLVER_1STAGE or TEST_SOLVER_2STAGE
48! Define TEST_GPU \in [0, 1]
49! Define either TEST_ALL_KERNELS or a TEST_KERNEL \in [any valid kernel]
50
51#if !(defined(TEST_REAL) ^ defined(TEST_COMPLEX))
52error: define exactly one of TEST_REAL or TEST_COMPLEX
53#endif
54
55#if !(defined(TEST_SINGLE) ^ defined(TEST_DOUBLE))
56error: define exactly one of TEST_SINGLE or TEST_DOUBLE
57#endif
58
59#ifdef TEST_SINGLE
60#  define EV_TYPE real(kind=C_FLOAT)
61#  ifdef TEST_REAL
62#    define MATRIX_TYPE real(kind=C_FLOAT)
63#  else
64#    define MATRIX_TYPE complex(kind=C_FLOAT_COMPLEX)
65#  endif
66#else
67#  define EV_TYPE real(kind=C_DOUBLE)
68#  ifdef TEST_REAL
69#    define MATRIX_TYPE real(kind=C_DOUBLE)
70#  else
71#    define MATRIX_TYPE complex(kind=C_DOUBLE_COMPLEX)
72#  endif
73#endif
74
75
76#ifdef TEST_REAL
77#  define AUTOTUNE_DOMAIN ELPA_AUTOTUNE_DOMAIN_REAL
78#else
79#  define AUTOTUNE_DOMAIN ELPA_AUTOTUNE_DOMAIN_COMPLEX
80#endif
81
82#include "assert.h"
83
84program test
85   use elpa
86
87   use test_util
88   use test_setup_mpi
89   use test_prepare_matrix
90   use test_read_input_parameters
91   use test_blacs_infrastructure
92   use test_check_correctness
93   use test_analytic
94   use iso_fortran_env
95
96#ifdef HAVE_REDIRECT
97   use test_redirect
98#endif
99   implicit none
100
101   ! matrix dimensions
102   integer                     :: na, nev, nblk
103
104   ! mpi
105   integer                     :: myid, nprocs
106   integer                     :: na_cols, na_rows  ! local matrix size
107   integer                     :: np_cols, np_rows  ! number of MPI processes per column/row
108   integer                     :: my_prow, my_pcol  ! local MPI task position (my_prow, my_pcol) in the grid (0..np_cols -1, 0..np_rows -1)
109   integer                     :: mpierr, ierr
110
111   ! blacs
112   character(len=1)            :: layout
113   integer                     :: my_blacs_ctxt, sc_desc(9), info, nprow, npcol
114
115   ! The Matrix
116   MATRIX_TYPE, allocatable    :: a(:,:), as(:,:)
117   ! eigenvectors
118   MATRIX_TYPE, allocatable    :: z(:,:)
119   ! eigenvalues
120   EV_TYPE, allocatable        :: ev(:)
121
122   integer                     :: error, status
123
124   type(output_t)              :: write_to_file
125   class(elpa_t), pointer      :: e1, e2, e_ptr
126   class(elpa_autotune_t), pointer :: tune_state
127
128   integer                     :: iter
129   character(len=5)            :: iter_string
130   integer                     :: timings, debug, gpu
131
132   call read_input_parameters(na, nev, nblk, write_to_file)
133   call setup_mpi(myid, nprocs)
134#ifdef HAVE_REDIRECT
135#ifdef WITH_MPI
136   call MPI_BARRIER(MPI_COMM_WORLD, mpierr)
137   call redirect_stdout(myid)
138#endif
139#endif
140
141   if (elpa_init(CURRENT_API_VERSION) /= ELPA_OK) then
142     print *, "ELPA API version not supported"
143     stop 1
144   endif
145
146   layout = 'C'
147   do np_cols = NINT(SQRT(REAL(nprocs))),2,-1
148      if(mod(nprocs,np_cols) == 0 ) exit
149   enddo
150   np_rows = nprocs/np_cols
151   assert(nprocs == np_rows * np_cols)
152
153   if (myid == 0) then
154     print '((a,i0))', 'Matrix size: ', na
155     print '((a,i0))', 'Num eigenvectors: ', nev
156     print '((a,i0))', 'Blocksize: ', nblk
157#ifdef WITH_MPI
158     print '((a,i0))', 'Num MPI proc: ', nprocs
159     print '(3(a,i0))','Number of processor rows=',np_rows,', cols=',np_cols,', total=',nprocs
160     print '(a)',      'Process layout: ' // layout
161#endif
162     print *,''
163   endif
164
165   call set_up_blacsgrid(mpi_comm_world, np_rows, np_cols, layout, &
166                         my_blacs_ctxt, my_prow, my_pcol)
167
168   call set_up_blacs_descriptor(na, nblk, my_prow, my_pcol, np_rows, np_cols, &
169                                na_rows, na_cols, sc_desc, my_blacs_ctxt, info)
170
171   allocate(a (na_rows,na_cols))
172   allocate(as(na_rows,na_cols))
173   allocate(z (na_rows,na_cols))
174   allocate(ev(na))
175
176   a(:,:) = 0.0
177   z(:,:) = 0.0
178   ev(:) = 0.0
179
180   call prepare_matrix_analytic(na, a, nblk, myid, np_rows, np_cols, my_prow, my_pcol, print_times=.false.)
181   as(:,:) = a(:,:)
182
183   e1 => elpa_allocate()
184   !assert_elpa_ok(error)
185
186   call set_basic_params(e1, na, nev, na_rows, na_cols, my_prow, my_pcol)
187
188   call e1%set("timings",1, error)
189   assert_elpa_ok(error)
190
191   call e1%set("debug",1, error)
192   assert_elpa_ok(error)
193
194   call e1%set("gpu", 0, error)
195   assert_elpa_ok(error)
196   !call e1%set("max_stored_rows", 15, error)
197
198   assert_elpa_ok(e1%setup())
199
200   call e1%store_settings("initial_parameters.txt", error)
201   assert_elpa_ok(error)
202
203#ifdef WITH_MPI
204     ! barrier after store settings, file created from one MPI rank only, but loaded everywhere
205     call MPI_BARRIER(MPI_COMM_WORLD, ierr)
206#endif
207
208   ! try to load parameters into another object
209   e2 => elpa_allocate(error)
210   assert_elpa_ok(error)
211
212   call set_basic_params(e2, na, nev, na_rows, na_cols, my_prow, my_pcol)
213   call e2%load_settings("initial_parameters.txt", error)
214   assert_elpa_ok(error)
215
216   assert_elpa_ok(e2%setup())
217
218   ! test whether the user setting of e1 are correctly loade to e2
219   call e2%get("timings", timings, error)
220   assert_elpa_ok(error)
221   call e2%get("debug", debug, error)
222   assert_elpa_ok(error)
223   call e2%get("gpu", gpu, error)
224   assert_elpa_ok(error)
225
226   if ((timings .ne. 1) .or. (debug .ne. 1) .or. (gpu .ne. 0)) then
227     print *, "Parameters not stored or loaded correctly. Aborting...", timings, debug, gpu
228     stop 1
229   endif
230
231   if(myid == 0) print *, "parameters of e1"
232   call e1%print_settings(error)
233   assert_elpa_ok(error)
234
235   if(myid == 0) print *, ""
236   if(myid == 0) print *, "parameters of e2"
237   call e2%print_settings(error)
238   assert_elpa_ok(error)
239
240   e_ptr => e2
241
242
243   tune_state => e_ptr%autotune_setup(ELPA_AUTOTUNE_FAST, AUTOTUNE_DOMAIN, error)
244   assert_elpa_ok(error)
245
246
247   iter=0
248   do while (e_ptr%autotune_step(tune_state, error))
249     assert_elpa_ok(error)
250
251     iter=iter+1
252     write(iter_string,'(I5.5)') iter
253     call e_ptr%print_settings(error)
254     assert_elpa_ok(error)
255
256     call e_ptr%store_settings("saved_parameters_"//trim(iter_string)//".txt", error)
257     assert_elpa_ok(error)
258
259     call e_ptr%timer_start("eigenvectors: iteration "//trim(iter_string))
260     call e_ptr%eigenvectors(a, ev, z, error)
261     assert_elpa_ok(error)
262     call e_ptr%timer_stop("eigenvectors: iteration "//trim(iter_string))
263
264     assert_elpa_ok(error)
265     if (myid .eq. 0) then
266       print *, ""
267       call e_ptr%print_times("eigenvectors: iteration "//trim(iter_string))
268     endif
269     status = check_correctness_analytic(na, nev, ev, z, nblk, myid, np_rows, np_cols, my_prow, my_pcol, &
270                                         .true., .true., print_times=.false.)
271     a(:,:) = as(:,:)
272     call e_ptr%autotune_print_state(tune_state, error)
273     assert_elpa_ok(error)
274
275     call e_ptr%autotune_save_state(tune_state, "saved_state_"//trim(iter_string)//".txt", error)
276     assert_elpa_ok(error)
277#ifdef WITH_MPI
278     ! barrier after save state, file created from one MPI rank only, but loaded everywhere
279     call MPI_BARRIER(MPI_COMM_WORLD, ierr)
280#endif
281     call e_ptr%autotune_load_state(tune_state, "saved_state_"//trim(iter_string)//".txt", error)
282     assert_elpa_ok(error)
283
284   end do
285
286   ! set and print the autotuned-settings
287   call e_ptr%autotune_set_best(tune_state, error)
288   assert_elpa_ok(error)
289
290   if (myid .eq. 0) then
291     print *, "The best combination found by the autotuning:"
292     flush(output_unit)
293     call e_ptr%autotune_print_best(tune_state, error)
294     assert_elpa_ok(error)
295   endif
296   ! de-allocate autotune object
297   call elpa_autotune_deallocate(tune_state, error)
298   assert_elpa_ok(error)
299
300   if (myid .eq. 0) then
301     print *, "Running once more time with the best found setting..."
302   endif
303   call e_ptr%timer_start("eigenvectors: best setting")
304   call e_ptr%eigenvectors(a, ev, z, error)
305   assert_elpa_ok(error)
306
307   call e_ptr%timer_stop("eigenvectors: best setting")
308   assert_elpa_ok(error)
309   if (myid .eq. 0) then
310     print *, ""
311     call e_ptr%print_times("eigenvectors: best setting")
312   endif
313   status = check_correctness_analytic(na, nev, ev, z, nblk, myid, np_rows, np_cols, my_prow, my_pcol, &
314                                       .true., .true., print_times=.false.)
315
316   call elpa_deallocate(e_ptr)
317   !assert_elpa_ok(error)
318
319   deallocate(a)
320   deallocate(as)
321   deallocate(z)
322   deallocate(ev)
323
324   call elpa_uninit()
325   !assert_elpa_ok(error)
326
327#ifdef WITH_MPI
328   call blacs_gridexit(my_blacs_ctxt)
329   call mpi_finalize(mpierr)
330#endif
331
332   call exit(status)
333
334contains
335   subroutine set_basic_params(elpa, na, nev, na_rows, na_cols, my_prow, my_pcol)
336     implicit none
337     class(elpa_t), pointer      :: elpa
338     integer, intent(in)         :: na, nev, na_rows, na_cols, my_prow, my_pcol
339
340     call elpa%set("na", na, error)
341     assert_elpa_ok(error)
342     call elpa%set("nev", nev, error)
343     assert_elpa_ok(error)
344     call elpa%set("local_nrows", na_rows, error)
345     assert_elpa_ok(error)
346     call elpa%set("local_ncols", na_cols, error)
347     assert_elpa_ok(error)
348     call elpa%set("nblk", nblk, error)
349     assert_elpa_ok(error)
350
351#ifdef WITH_MPI
352     call elpa%set("mpi_comm_parent", MPI_COMM_WORLD, error)
353     assert_elpa_ok(error)
354     call elpa%set("process_row", my_prow, error)
355     assert_elpa_ok(error)
356     call elpa%set("process_col", my_pcol, error)
357     assert_elpa_ok(error)
358#endif
359   end subroutine
360
361end program
362