1!--------------------------------------------------------------------------------------------------!
2!   CP2K: A general program to perform molecular dynamics simulations                              !
3!   Copyright (C) 2000 - 2020  CP2K developers group                                               !
4!--------------------------------------------------------------------------------------------------!
5
6! **************************************************************************************************
7!> \brief Timing routines for accounting
8!> \par History
9!>      02.2004 made a stacked version (of stacks...) [Joost VandeVondele]
10!>      11.2004 storable timer_envs (for f77 interface) [fawzi]
11!>      10.2005 binary search to speed up lookup in timeset [fawzi]
12!>      12.2012 Complete rewrite based on dictionaries. [ole]
13!>      01.2014 Collect statistics from all MPI ranks. [ole]
14!> \author JGH
15! **************************************************************************************************
16MODULE timings_report
17   USE callgraph,                       ONLY: callgraph_item_type,&
18                                              callgraph_items
19   USE cp_files,                        ONLY: close_file,&
20                                              open_file
21   USE cp_para_types,                   ONLY: cp_para_env_type
22   USE kinds,                           ONLY: default_string_length,&
23                                              dp,&
24                                              int_8
25   USE list,                            ONLY: list_destroy,&
26                                              list_get,&
27                                              list_init,&
28                                              list_isready,&
29                                              list_pop,&
30                                              list_push,&
31                                              list_size
32   USE list_routinereport,              ONLY: list_routinereport_type
33   USE message_passing,                 ONLY: mp_bcast,&
34                                              mp_max,&
35                                              mp_maxloc,&
36                                              mp_sum
37   USE routine_map,                     ONLY: routine_map_get,&
38                                              routine_map_haskey
39   USE timings,                         ONLY: get_timer_env
40   USE timings_base_type,               ONLY: call_stat_type,&
41                                              routine_report_type,&
42                                              routine_stat_type
43   USE timings_types,                   ONLY: timer_env_type
44   USE util,                            ONLY: sort
45#include "../base/base_uses.f90"
46
47   IMPLICIT NONE
48   PRIVATE
49
50   INTEGER, PUBLIC, PARAMETER :: cost_type_time = 17, cost_type_energy = 18
51
52   PUBLIC :: timings_report_print, timings_report_callgraph
53
54CONTAINS
55
56! **************************************************************************************************
57!> \brief Print accumulated information on timers
58!> \param iw ...
59!> \param r_timings ...
60!> \param sort_by_self_time ...
61!> \param cost_type ...
62!> \param report_maxloc ...
63!> \param para_env is needed to collect statistics from other nodes.
64!> \par History
65!>      none
66!> \author JGH
67! **************************************************************************************************
68   SUBROUTINE timings_report_print(iw, r_timings, sort_by_self_time, cost_type, report_maxloc, para_env)
69      INTEGER, INTENT(IN)                                :: iw
70      REAL(KIND=dp), INTENT(IN)                          :: r_timings
71      LOGICAL, INTENT(IN)                                :: sort_by_self_time
72      INTEGER, INTENT(IN)                                :: cost_type
73      LOGICAL, INTENT(IN)                                :: report_maxloc
74      TYPE(cp_para_env_type), INTENT(IN)                 :: para_env
75
76      TYPE(list_routinereport_type)                      :: reports
77      TYPE(routine_report_type), POINTER                 :: r_report
78
79      CALL list_init(reports)
80      CALL collect_reports_from_ranks(reports, cost_type, para_env)
81
82      IF (list_size(reports) > 0 .AND. iw > 0) &
83         CALL print_reports(reports, iw, r_timings, sort_by_self_time, cost_type, report_maxloc, para_env)
84
85      ! deallocate reports
86      DO WHILE (list_size(reports) > 0)
87         r_report => list_pop(reports)
88         DEALLOCATE (r_report)
89      END DO
90      CALL list_destroy(reports)
91
92   END SUBROUTINE timings_report_print
93
94! **************************************************************************************************
95!> \brief Collects the timing or energy reports from all MPI ranks.
96!> \param reports ...
97!> \param cost_type ...
98!> \param para_env ...
99!> \author Ole Schuett
100! **************************************************************************************************
101   SUBROUTINE collect_reports_from_ranks(reports, cost_type, para_env)
102      TYPE(list_routinereport_type), INTENT(INOUT)       :: reports
103      INTEGER, INTENT(IN)                                :: cost_type
104      TYPE(cp_para_env_type), INTENT(IN)                 :: para_env
105
106      CHARACTER(LEN=default_string_length)               :: routineN
107      INTEGER                                            :: local_routine_id, sending_rank
108      INTEGER, ALLOCATABLE, DIMENSION(:)                 :: collected
109      REAL(KIND=dp)                                      :: foobar
110      REAL(KIND=dp), DIMENSION(2)                        :: dbuf
111      TYPE(routine_report_type), POINTER                 :: r_report
112      TYPE(routine_stat_type), POINTER                   :: r_stat
113      TYPE(timer_env_type), POINTER                      :: timer_env
114
115      NULLIFY (r_stat, r_report, timer_env)
116      IF (.NOT. list_isready(reports)) &
117         CPABORT("BUG")
118
119      timer_env => get_timer_env()
120
121      ! make sure all functions have been called so that list_size(timer_env%routine_stats)
122      ! and the actual dictionary are consistent in the loop below, preventing out of bounds.
123      ! this hack makes sure they are called before
124      routineN = ""
125      CALL mp_bcast(routineN, 0, para_env%group)
126      sending_rank = 0
127      CALL mp_max(sending_rank, para_env%group)
128      CALL mp_sum(sending_rank, para_env%group)
129      foobar = 0.0_dp
130      CALL mp_max(foobar, para_env%group)
131      dbuf = 0.0_dp
132      CALL mp_maxloc(dbuf, para_env%group)
133      CALL mp_sum(foobar, para_env%group)
134      ! end hack
135
136      ! Array collected is used as a bit field.
137      ! It's of type integer in order to use the convenient MINLOC routine.
138      ALLOCATE (collected(list_size(timer_env%routine_stats)))
139      collected(:) = 0
140
141      DO
142         ! does any rank have uncollected stats?
143         sending_rank = -1
144         IF (.NOT. ALL(collected == 1)) sending_rank = para_env%mepos
145         CALL mp_max(sending_rank, para_env%group)
146         IF (sending_rank < 0) EXIT ! every rank got all routines collected
147         IF (sending_rank == para_env%mepos) THEN
148            local_routine_id = MINLOC(collected, dim=1)
149            r_stat => list_get(timer_env%routine_stats, local_routine_id)
150            routineN = r_stat%routineN
151         ENDIF
152         CALL mp_bcast(routineN, sending_rank, para_env%group)
153
154         ! Create new report for routineN
155         ALLOCATE (r_report)
156         CALL list_push(reports, r_report)
157         r_report%routineN = routineN
158
159         ! If routineN was called on local node, add local stats
160         IF (routine_map_haskey(timer_env%routine_names, routineN)) THEN
161            local_routine_id = routine_map_get(timer_env%routine_names, routineN)
162            collected(local_routine_id) = 1
163            r_stat => list_get(timer_env%routine_stats, local_routine_id)
164            r_report%max_total_calls = r_stat%total_calls
165            r_report%sum_total_calls = r_stat%total_calls
166            r_report%sum_stackdepth = r_stat%stackdepth_accu
167            SELECT CASE (cost_type)
168            CASE (cost_type_energy)
169               r_report%max_icost = r_stat%incl_energy_accu
170               r_report%sum_icost = r_stat%incl_energy_accu
171               r_report%max_ecost = r_stat%excl_energy_accu
172               r_report%sum_ecost = r_stat%excl_energy_accu
173            CASE (cost_type_time)
174               r_report%max_icost = r_stat%incl_walltime_accu
175               r_report%sum_icost = r_stat%incl_walltime_accu
176               r_report%max_ecost = r_stat%excl_walltime_accu
177               r_report%sum_ecost = r_stat%excl_walltime_accu
178            CASE DEFAULT
179               CPABORT("BUG")
180            END SELECT
181         END IF
182
183         ! collect stats of routineN via MPI
184         CALL mp_max(r_report%max_total_calls, para_env%group)
185         CALL mp_sum(r_report%sum_total_calls, para_env%group)
186         CALL mp_sum(r_report%sum_stackdepth, para_env%group)
187
188         ! get value and rank of the maximum inclusive cost
189         dbuf = (/r_report%max_icost, REAL(para_env%mepos, KIND=dp)/)
190         CALL mp_maxloc(dbuf, para_env%group)
191         r_report%max_icost = dbuf(1)
192         r_report%max_irank = INT(dbuf(2))
193
194         CALL mp_sum(r_report%sum_icost, para_env%group)
195
196         ! get value and rank of the maximum exclusive cost
197         dbuf = (/r_report%max_ecost, REAL(para_env%mepos, KIND=dp)/)
198         CALL mp_maxloc(dbuf, para_env%group)
199         r_report%max_ecost = dbuf(1)
200         r_report%max_erank = INT(dbuf(2))
201
202         CALL mp_sum(r_report%sum_ecost, para_env%group)
203      ENDDO
204
205   END SUBROUTINE collect_reports_from_ranks
206
207! **************************************************************************************************
208!> \brief Print the collected reports
209!> \param reports ...
210!> \param iw ...
211!> \param threshold ...
212!> \param sort_by_exclusiv_cost ...
213!> \param cost_type ...
214!> \param report_maxloc ...
215!> \param para_env ...
216!> \par History
217!>      01.2014 Refactored (Ole Schuett)
218!> \author JGH
219! **************************************************************************************************
220   SUBROUTINE print_reports(reports, iw, threshold, sort_by_exclusiv_cost, cost_type, report_maxloc, para_env)
221      TYPE(list_routinereport_type), INTENT(IN)          :: reports
222      INTEGER, INTENT(IN)                                :: iw
223      REAL(KIND=dp), INTENT(IN)                          :: threshold
224      LOGICAL, INTENT(IN)                                :: sort_by_exclusiv_cost
225      INTEGER, INTENT(IN)                                :: cost_type
226      LOGICAL, INTENT(IN)                                :: report_maxloc
227      TYPE(cp_para_env_type), INTENT(IN)                 :: para_env
228
229      CHARACTER(LEN=4)                                   :: label
230      CHARACTER(LEN=default_string_length)               :: fmt, title
231      INTEGER                                            :: decimals, i, j, num_routines
232      INTEGER, ALLOCATABLE, DIMENSION(:)                 :: indices
233      REAL(KIND=dp)                                      :: asd, maxcost, mincost
234      REAL(KIND=dp), ALLOCATABLE, DIMENSION(:)           :: max_costs
235      TYPE(routine_report_type), POINTER                 :: r_report_i, r_report_j
236
237      NULLIFY (r_report_i, r_report_j)
238      IF (.NOT. list_isready(reports)) &
239         CPABORT("BUG")
240
241      ! are we printing timing or energy ?
242      SELECT CASE (cost_type)
243      CASE (cost_type_energy)
244         title = "E N E R G Y"
245         label = "ENER"
246      CASE (cost_type_time)
247         title = "T I M I N G"
248         label = "TIME"
249      CASE DEFAULT
250         CPABORT("BUG")
251      END SELECT
252
253      ! write banner
254      WRITE (UNIT=iw, FMT="(/,T2,A)") REPEAT("-", 79)
255      WRITE (UNIT=iw, FMT="(T2,A,T80,A)") "-", "-"
256      WRITE (UNIT=iw, FMT="(T2,A,T35,A,T80,A)") "-", TRIM(title), "-"
257      WRITE (UNIT=iw, FMT="(T2,A,T80,A)") "-", "-"
258      WRITE (UNIT=iw, FMT="(T2,A)") REPEAT("-", 79)
259      IF (report_maxloc) THEN
260         WRITE (UNIT=iw, FMT="(T2,A,T35,A,T41,A,T45,2A18,A8)") &
261            "SUBROUTINE", "CALLS", " ASD", "SELF "//label, "TOTAL "//label, "MAXRANK"
262      ELSE
263         WRITE (UNIT=iw, FMT="(T2,A,T35,A,T41,A,T45,2A18)") &
264            "SUBROUTINE", "CALLS", " ASD", "SELF "//label, "TOTAL "//label
265      ENDIF
266
267      WRITE (UNIT=iw, FMT="(T33,A)") &
268         "MAXIMUM       AVERAGE  MAXIMUM  AVERAGE  MAXIMUM"
269
270      ! sort statistics
271      num_routines = list_size(reports)
272      ALLOCATE (max_costs(num_routines))
273      DO i = 1, num_routines
274         r_report_i => list_get(reports, i)
275         IF (sort_by_exclusiv_cost) THEN
276            max_costs(i) = r_report_i%max_ecost
277         ELSE
278            max_costs(i) = r_report_i%max_icost
279         END IF
280      ENDDO
281      ALLOCATE (indices(num_routines))
282      CALL sort(max_costs, num_routines, indices)
283
284      maxcost = MAXVAL(max_costs)
285      mincost = maxcost*threshold
286
287      ! adjust fmt dynamically based on the max walltime.
288      ! few clocks have more than 3 digits resolution, so stop there
289      decimals = 3
290      IF (maxcost >= 10000) decimals = 2
291      IF (maxcost >= 100000) decimals = 1
292      IF (maxcost >= 1000000) decimals = 0
293      IF (report_maxloc) THEN
294         WRITE (UNIT=fmt, FMT="(A,I0,A)") &
295            "(T2,A30,1X,I7,1X,F4.1,4(1X,F8.", decimals, "),I8)"
296      ELSE
297         WRITE (UNIT=fmt, FMT="(A,I0,A)") &
298            "(T2,A30,1X,I7,1X,F4.1,4(1X,F8.", decimals, "))"
299      ENDIF
300
301      !write output
302      DO i = num_routines, 1, -1
303         IF (max_costs(i) >= mincost) THEN
304            j = indices(i)
305            r_report_j => list_get(reports, j)
306            ! average stack depth
307            asd = REAL(r_report_j%sum_stackdepth, KIND=dp)/ &
308                  REAL(MAX(1_int_8, r_report_j%sum_total_calls), KIND=dp)
309            IF (report_maxloc) THEN
310               WRITE (UNIT=iw, FMT=fmt) &
311                  ADJUSTL(r_report_j%routineN(1:31)), &
312                  r_report_j%max_total_calls, &
313                  asd, &
314                  r_report_j%sum_ecost/para_env%num_pe, &
315                  r_report_j%max_ecost, &
316                  r_report_j%sum_icost/para_env%num_pe, &
317                  r_report_j%max_icost, &
318                  r_report_j%max_erank
319            ELSE
320               WRITE (UNIT=iw, FMT=fmt) &
321                  ADJUSTL(r_report_j%routineN(1:31)), &
322                  r_report_j%max_total_calls, &
323                  asd, &
324                  r_report_j%sum_ecost/para_env%num_pe, &
325                  r_report_j%max_ecost, &
326                  r_report_j%sum_icost/para_env%num_pe, &
327                  r_report_j%max_icost
328            ENDIF
329         END IF
330      END DO
331      WRITE (UNIT=iw, FMT="(T2,A,/)") REPEAT("-", 79)
332
333   END SUBROUTINE print_reports
334
335! **************************************************************************************************
336!> \brief Write accumulated callgraph information as cachegrind-file.
337!> http://kcachegrind.sourceforge.net/cgi-bin/show.cgi/KcacheGrindCalltreeFormat
338!> \param filename ...
339!> \par History
340!>     12.2012  initial version[ole]
341!> \author Ole Schuett
342! **************************************************************************************************
343   SUBROUTINE timings_report_callgraph(filename)
344      CHARACTER(len=*), INTENT(in)                       :: filename
345
346      INTEGER, PARAMETER                                 :: E = 1000, T = 100000
347
348      INTEGER                                            :: i, unit
349      TYPE(call_stat_type), POINTER                      :: c_stat
350      TYPE(callgraph_item_type), DIMENSION(:), POINTER   :: ct_items
351      TYPE(routine_stat_type), POINTER                   :: r_stat
352      TYPE(timer_env_type), POINTER                      :: timer_env
353
354      CALL open_file(file_name=filename, file_status="REPLACE", file_action="WRITE", &
355                     file_form="FORMATTED", unit_number=unit)
356      timer_env => get_timer_env()
357
358      ! use outermost routine as total runtime
359      r_stat => list_get(timer_env%routine_stats, 1)
360      WRITE (UNIT=unit, FMT="(A)") "events: Walltime Energy"
361      WRITE (UNIT=unit, FMT="(A,I0,1X,I0)") "summary: ", &
362         INT(T*r_stat%incl_walltime_accu, KIND=int_8), &
363         INT(E*r_stat%incl_energy_accu, KIND=int_8)
364
365      DO i = 1, list_size(timer_env%routine_stats)
366         r_stat => list_get(timer_env%routine_stats, i)
367         WRITE (UNIT=unit, FMT="(A,I0,A,A)") "fn=(", r_stat%routine_id, ") ", r_stat%routineN
368         WRITE (UNIT=unit, FMT="(A,I0,1X,I0)") "1 ", &
369            INT(T*r_stat%excl_walltime_accu, KIND=int_8), &
370            INT(E*r_stat%excl_energy_accu, KIND=int_8)
371      END DO
372
373      ct_items => callgraph_items(timer_env%callgraph)
374      DO i = 1, SIZE(ct_items)
375         c_stat => ct_items(i)%value
376         WRITE (UNIT=unit, FMT="(A,I0,A)") "fn=(", ct_items(i)%key(1), ")"
377         WRITE (UNIT=unit, FMT="(A,I0,A)") "cfn=(", ct_items(i)%key(2), ")"
378         WRITE (UNIT=unit, FMT="(A,I0,A)") "calls=", c_stat%total_calls, " 1"
379         WRITE (UNIT=unit, FMT="(A,I0,1X,I0)") "1 ", &
380            INT(T*c_stat%incl_walltime_accu, KIND=int_8), &
381            INT(E*c_stat%incl_energy_accu, KIND=int_8)
382      END DO
383      DEALLOCATE (ct_items)
384
385      CALL close_file(unit_number=unit, file_status="KEEP")
386
387   END SUBROUTINE timings_report_callgraph
388END MODULE timings_report
389
390