1!==============================================================================
2!
3! Routines:
4!
5! (1) date_time()       Originally by ?         Last Modified: 5/12/2008 (JRD)
6!
7!     Gets current date and time.
8!
9! (2) timget()          Originally by gsm       Last Modified: 4/29/2010 (gsm)
10!
11!     Gets current cpu and wall time.
12!     Note: it`s almost a private subroutine, if not for io_utils.f90
13!
14! (3) timacc(n,option,tsec,nslices)   Originally by ?
15!                                               Last Modified: 6/17/2009 (PWD)
16!     DEPRECATED
17!
18!     Timing subroutine.  Calls machine-dependent subroutine timget
19!     which returns elapsed cpu and wall clock times in seconds
20!     Also return the number of times the counter has been called
21!
22!     Depending on value of "option" routine will:
23!       (0) zero all accumulators
24!       (1) start with new incremental time slice for accumulator n
25!           also increase by one the counter for this accumulator
26!       (2) stop time slice; add time to accumlator n
27!       (3) report accumulated time for accumulator n
28!           and number of time that the routine has been called
29!       (4) report time slice for accumulator n (not full time accumulated)
30!
31!       If, on first entry, subroutine is not being initialized, it
32!       will automatically initialize as well as rezero accumulator n.
33!       However, initialization SHOULD be done explicitly by the user
34!       so that it can be done near the top of his/her main routine.
35!
36!       Input:
37!         n=index of accumulator (distinguish what is being timed); not used if
38!         option=0 option=see comment above
39!       Output:
40!         on option=3:
41!         tottim(2,n)=accumulated time for accumulator n; otherwise
42!         tottim is a dummy variable.
43!         nslices is optional variable that give number of slices collected
44!
45! (4) logit()    Originally By (SIB)     Last Modified 6/12/2008 (JRD)
46!
47!    Write out a debugging message with an inputed string and write time.
48!
49! (5) logitint() Originally By (SIB)     Last Modified 6/12/2008 (JRD)
50!
51!    Same as logit but with an integer constant.
52!
53! (6) Timing class. See at the type definition for its (verbose) description.
54!
55!==============================================================================
56!
57! Todo: move logit and logitint to another place?
58
59#include "f_defs.h"
60
61module timing_m
62
63  use intrinsics_m
64  use message_m
65  use nrtype_m
66  use peinfo_m
67  use push_pop_m
68
69  implicit none
70
71  private
72
73  public :: date_time, timget, timacc, logit, logitint
74
75  !> MTIM determines the maximum number of "timing slots" available
76  integer, parameter, private :: MTIM=100
77  real(DP), private, save :: acctim(2,MTIM), tzero(2,MTIM)
78  integer, private, save :: ncount(MTIM)
79  !
80  !----------------------------------------------------------------------------
81  !
82  ! Timing class
83  !
84  ! Reference object to handle the timing of subroutines and of the program.
85  ! The object is implemented as following.
86  ! type(timing_class) is a baseclass that should not be used directly, as it
87  ! only contains the definition of class methods.
88  ! Below, the base class is extended for each code
89  !
90  ! First, the type timing_class is an abstract implementation of the
91  ! timing class. It contains the common methods that are used to
92  ! time the duration.
93  ! The subclasses are the types that should be used in the code, that is:
94  ! - timing_epsilon_class: timing of subroutines in ./Epsilon
95  ! - timing_sigma_class: timing of subroutines in ./Sigma
96  ! - timing_bse_class: timing of subroutines in ./BSE
97  ! - timing_common_class: timing of subroutines in ./Common
98  ! - timing_extra_class: the rest (PlotXct, ...)
99  !
100  ! The module defines 5 objects, that should be loaded when needed:
101  ! - timing_epsilon, timing_sigma, timing_bse, timing_common, timing_extra
102  !
103  ! The schematic usage is as follow (using epsilon as example)
104  !
105  ! program epsilon
106  !   use timing_m, only: timing => timing_epsilon
107
108  !   call timing%init()
109  !
110  !   call timing%start(timing%sub1)
111  !   ...
112  !   call timing%stop(timing%sub1)
113  !
114  !   call timing%print()
115  ! end program epsilon
116  !
117  ! where:
118  ! - we renamed timing_epsilon as timing, so that it looks as in the code we
119  !   always use the same object
120  ! - we initialized the timing object (zeroing reference timings)
121  ! - timing%sub1 is an attribute of the timing class
122  ! - we kept track of the elapsed time between start() and stop().
123  !   One can make several calls that refer to the same attribute (e.g. %sub1).
124  !   The total time associated to %sub1 will be the sum of all the timings.
125  ! - we printed the timing information to screen.
126  !
127  ! Note: print() should generally be at the end of the program
128  !
129  ! Note: if you call %start twice without calling %stop before, for example:
130  !   call timing%start(timing%sub1)
131  !   ...
132  !   call timing%start(timing%sub1)
133  !   ...
134  !   call timing%stop(timing%sub1)
135  !   Then, the timing will keep track of the time from the 2nd start to stop.
136  !
137  ! Note: if you call stop() without preceding it by a start(), timings will be
138  !   almost random numbers. Note also that safe-proofing this case requires
139  !   introducing a few `if` instructions that would slow the code.
140  !   At the moment, the choice is to not correct this.
141  !
142  ! Modifications / New tags:
143  !
144  ! It`s recommended to not add new subclasses, rather, one should try to
145  ! merge everything, if possible.
146  ! This is an example to add a new tag (%sub2) to timing_sigma
147  ! (or timing_epsilon, ...). The procedure is:
148  !
149  ! 1) Locate the definition of timing_sigma_class, e.g. the line with
150  !    type, extends(timing_class) :: timing_sigma_class
151  !
152  !    Add a new variable 'sub2' and assign an integer value to it between
153  !    1 and 100. MAKE SURE that the integer value hasn`t been taken already,
154  !    or that the integer namespace hasn`t been taken already.
155  !    As an example, suppose we can writeL
156  !    integer :: sub2 = 9
157  !
158  ! 2) Locate the subroutine init_sigma_labels (or init_epsilon_labels, ...).
159  !    Add a label to sub2:
160  !    instance%labels(34) = 'Subroutine 2'
161  !    This string will be used only for a nice formatting of the printing info
162  !
163  ! Devel note: one could have implemented a sort of dictionary in fortran.
164  ! Then, one could have simply used a syntax "call timing%start('sub2')"
165  ! However, it may lead to poor performance, since parts of the code call
166  ! the timing information several thousand times
167  !
168  ! Devel note: due to a limitation of Fortran, it`s more convenient to
169  ! initialize the chartacter array of labels at runtime, rather than
170  ! writing them in the class description
171  !
172  type :: timing_class
173     !
174     ! Picky note: wall time is probably incorrect when there is a change in
175     ! the OS time: for example, when daylight saving time changes or if the
176     ! computer is moved across time zones.
177     !
178     integer :: num_times = MTIM ! max number of timing attributes
179     character(len=100) :: labels(MTIM) ! array of timing labels, for printing
180     !
181     ! arrays used to make timing measurements.
182     real(DP), dimension(MTIM) :: wall_times, cpu_times, &
183          tmp_cpu_times, tmp_wall_times
184     integer :: call_numbers(MTIM) ! the number of start() calls for each
185     ! attribute
186
187   contains
188     ! Initialize arrays and timings
189     procedure :: init => timing_class_init
190     ! Print to screen a summary of timings
191     procedure :: print => timing_class_print
192     ! Start the chronometer for a tag
193     procedure :: start => timing_class_start
194     ! Stop the chronometer for a tag
195     procedure :: stop => timing_class_stop
196     ! Initialize the object
197     procedure :: init_labels => bare_init_labels
198  end type timing_class
199  !
200  type, extends(timing_class) :: timing_epsilon_class
201     ! class definition for timing of subrourines in ./Epsilon
202     integer :: input = 2
203     integer :: input_q = 3
204     integer :: fullbz = 4
205     integer :: gvec = 5
206     integer :: subgrp = 6
207     integer :: irrbz = 8
208     integer :: genwf = 9
209     integer :: mtxel = 10
210     integer :: rqstar = 11
211     integer :: gmap = 12
212     integer :: epsinv_total = 13
213     integer :: chi_sum_comm = 14
214     integer :: chi_sum_total = 15
215     integer :: genwf_val = 16
216     integer :: genwf_cond = 17
217     integer :: epsinv_vcoul = 18
218     integer :: job_setup = 19
219     integer :: q_loop_setup = 20
220     integer :: init_cutoff = 21
221     integer :: init_scalapack = 22
222     integer :: init_arrays = 23
223     integer :: converge_tests = 24
224     integer :: mtxel_denom = 25
225     integer :: mtxel_fft = 26
226     integer :: genwf_ekin = 28
227     integer :: genwf_sort = 29
228     integer :: chi_sum_gemm = 30
229     integer :: chi_sum_prep = 31
230     integer :: mtxel_exp_denom = 32
231     integer :: mtxel_exp_fft = 33
232     integer :: chi_sum_sub_vcoul = 34
233     integer :: chi_sum_sub_diag = 35
234     integer :: chi_sum_sub_omega_0 = 36
235     integer :: chi_sum_sub_eigvet_comm = 37
236     integer :: chi_sum_sub_transf = 38
237     integer :: chi_sum_sub_omega_neq_0 = 39
238     integer :: opt_fft = 40
239     integer :: opt_fft_init = 41
240     integer :: opt_fft_comm_fft = 42
241     integer :: opt_fft_fft = 43
242     integer :: chi_sum_array_alloc = 44
243     integer :: epsinv_i_o = 45
244     integer :: epsinv_invert = 46
245     integer :: chi_sum_bar = 49
246     integer :: chi_sum_flt = 50
247     integer :: chi_sum_row = 51
248     integer :: chi_sum_column = 52
249     integer :: chi_sum_ht_nb = 53
250     integer :: subspace_pgemm = 60
251     integer :: epsinv_omega_0 = 61
252     integer :: epsinv_omega_neq_0 = 62
253     ! Epsilon doesn`t use the put/mltply of Common.
254     integer :: fft_put = 92
255     integer :: fft_mltply = 95
256     integer :: total = 100
257   contains
258     procedure :: init_labels => epsilon_init_labels
259  end type timing_epsilon_class
260  !
261  !----------------------------------------------------------------------------
262  !
263  type, extends(timing_class) :: timing_sigma_class
264     ! class definition for timing of subrourines in ./Sigma
265     integer :: input = 2
266     integer :: epscopy = 3
267     integer :: fullbz = 4
268     integer :: vxc = 5
269     integer :: subgrp = 6
270     integer :: irrbz = 7
271     integer :: gmap = 8
272     integer :: genwf = 9
273     integer :: mtxel = 10
274     integer :: mtxel_cor_tot = 11
275     integer :: vcoul = 13
276     integer :: epsread = 14
277     integer :: input_outer = 15
278     integer :: mtxel_ch = 6
279     integer :: mtxel_comm =17
280     integer :: bare_x = 18
281     integer :: wf_comm = 19
282     integer :: wf_ch_comm = 20
283     integer :: input_read = 21
284     integer :: input_write = 22
285     integer :: sub_transf_tot = 31
286     integer :: sub_transf_com = 32
287     integer :: sub_transf_gemm = 33
288     integer :: m_cor_init = 41
289     integer :: m_cor_epsinit = 42
290     integer :: m_cor_comm = 43
291     integer :: m_cor_pp_prep = 44
292     integer :: m_cor_sx_ch = 45
293     integer :: m_cor_ra_sx = 46
294     integer :: m_cor_ra_ch = 47
295     integer :: m_cor_ra_ch2 = 48
296     integer :: m_cor_ra_sum = 49
297     integer :: m_cor_cd_res = 50
298     integer :: m_cor_cd_int = 51
299     integer :: m_cor_cd_sum = 52
300     integer :: m_cor_cd_gemm = 53
301     integer :: m_cor_remain = 55
302     integer :: m_cor_sub_wings = 56
303     integer :: read_neps = 59
304     integer :: epscopy_io = 61
305     integer :: epscopy_comm = 62
306     integer :: epscopy_sub = 63
307     integer :: epscopy_pgemm = 64
308     integer :: epscopy_redstr = 65
309     integer :: sub_io_vec = 66
310     integer :: sub_prep_vec = 67
311     integer :: sub_comm_vec = 68
312     integer :: sub_io_eps = 69
313     integer :: sub_prep_eps = 70
314     integer :: sub_comm_eps = 71
315     integer :: epscopy_vcoul = 72
316     integer :: total = 100
317   contains
318     procedure :: init_labels => sigma_init_labels
319  end type timing_sigma_class
320  !
321  !----------------------------------------------------------------------------
322  !
323  type, extends(timing_class) :: timing_bse_class
324     ! class definition for timing of subrourines in ./BSE
325     integer :: input = 2
326     integer :: input_q = 3
327     integer :: intwfn = 4
328     integer :: intkernel = 5
329     integer :: epsdiag = 7
330     integer :: eps_comm = 8
331     integer :: absorp0 = 9
332     integer :: vmtxel = 10
333     integer :: trans_mtxel = 11
334     integer :: absorp = 12
335     integer :: write_eig = 13
336     integer :: iw_input_co = 41
337     integer :: iw_interp = 42
338     integer :: iw_genwf = 43
339     integer :: iw_genwf_co = 44
340     integer :: iw_mtxel_t = 45
341     integer :: iw_write = 46
342     integer :: iw_reduce = 47
343     integer :: ik_setup = 51
344     integer :: ik_c_check = 52
345     integer :: ik_input = 53
346     integer :: ik_inteps = 54
347     integer :: ik_vcoul = 55
348     integer :: ik_cache = 56
349     integer :: ik_interp = 57
350     integer :: ik_sum = 58
351     integer :: diagonalize = 61
352     integer :: lanczos = 62
353     integer :: iterate = 63
354     integer :: peig_inter = 64
355     integer :: total = 100
356   contains
357     procedure :: init_labels => bse_init_labels
358  end type timing_bse_class
359  !
360  !----------------------------------------------------------------------------
361  !
362  type, extends(timing_class) :: timing_common_class
363     ! class definition for timing of subrourines in ./Common
364     integer :: eps_i_o_comm = 47
365     integer :: eps_i_o_io = 48
366     integer :: epscopy_comm = 62
367     integer :: input_i_o = 81
368     integer :: input_comm = 82
369     integer :: fft_zero = 91
370     integer :: fft_put = 92
371     integer :: fft_plan = 93
372     integer :: fft_exec = 94
373     integer :: fft_mltply = 95
374     integer :: fft_conjg = 96
375     integer :: fft_get = 97
376   contains
377     procedure :: init_labels => common_init_labels
378  end type timing_common_class
379  !
380  !----------------------------------------------------------------------------
381  !
382  type, extends(timing_class) :: timing_extra_class
383    ! class definition for timing of subrourines in various parts of BGW
384    ! that do not fall in folders ./Sigma, ./BSE, ./Common or ./Epsilon
385    integer :: total = 100
386    integer :: input = 2
387    integer :: input_q = 3
388    integer :: vmtxel = 4
389    integer :: readasvck = 5
390    integer :: os_comm = 6
391    integer :: os_sums = 7
392    integer :: genwf = 8
393    integer :: genwf_q = 9
394    integer :: summing = 9
395    integer :: gather = 9
396   contains
397     procedure :: init_labels => extra_init_labels
398  end type timing_extra_class
399  !
400  !----------------------------------------------------------------------------
401  !
402  ! After the definition of the classes, these are the object instances used in
403  ! the code.
404  ! These should be used throughotu the BGW code by importing them as
405  ! "use timing_m, only: timing => epsilon_timing"
406  !
407  type(timing_epsilon_class), save, public :: epsilon_timing
408  type(timing_sigma_class), save, public :: sigma_timing
409  type(timing_bse_class), save, public :: bse_timing
410  type(timing_common_class), save, public :: common_timing
411  type(timing_extra_class), save, public :: extra_timing
412  !
413contains
414  !
415  subroutine bare_init_labels(instance)
416    ! abstract implementation
417    implicit none
418    class(timing_class), intent(inout) :: instance
419    call die("Need a specific implementation of init_labels")
420    return
421  end subroutine bare_init_labels
422  !
423  !----------------------------------------------------------------------------
424  !
425  subroutine epsilon_init_labels(instance)
426    ! Labels for attributes of timing_epsilon
427    implicit none
428    class(timing_epsilon_class), intent(inout) :: instance
429    PUSH_SUB(epsilon_init_labels)
430
431    instance%labels(2) = 'INPUT'
432    instance%labels(3) = 'INPUT_Q'
433    instance%labels(4) = 'FULLBZ'
434    instance%labels(5) = 'GVEC'
435    instance%labels(6) = 'SUBGRP'
436    instance%labels(8) = 'IRRBZ'
437    instance%labels(9) = 'GENWF'
438    instance%labels(10) = 'MTXEL'
439    instance%labels(11) = 'RQSTAR'
440    instance%labels(12) = 'GMAP'
441    instance%labels(13) = 'EPSINV (TOTAL)'
442    instance%labels(14) = 'CHI SUM (COMM)'
443    instance%labels(15) = 'CHI SUM (TOTAL)'
444    instance%labels(16) = 'GENWF (VAL)'
445    instance%labels(17) = 'GENWF (COND)'
446    instance%labels(18) = 'EPSINV (VCOUL)'
447    instance%labels(19) = 'JOB SETUP'
448    instance%labels(20) = 'Q LOOP SETUP'
449    instance%labels(21) = 'INIT CUTOFF'
450    instance%labels(22) = 'INIT SCALAPACK'
451    instance%labels(23) = 'INIT ARRAYS'
452    instance%labels(24) = 'CONVERGE TESTS'
453    instance%labels(25) = 'MTXEL (DENOM)'
454    instance%labels(26) = 'MTXEL (FFT)'
455    instance%labels(28) = 'GENWF (Ekin)'
456    instance%labels(29) = 'GENWF (Sort)'
457    instance%labels(30) = 'CHI SUM (' + TOSTRING(X(GEMM)) + ')'
458    instance%labels(31) = 'CHI SUM (PREP)'
459    instance%labels(32) = 'MTXEL EXP(DENOM)'
460    instance%labels(33) = 'MTXEL EXP (FFT)'
461    instance%labels(34) = 'CHI SUM SUB (VCOUL)'
462    instance%labels(35) = 'CHI SUM SUB DIAG'
463    instance%labels(36) = 'CHI SUM SUB OMEGA=0'
464    instance%labels(37) = 'CHI SUM SUB EIGVET COMM'
465    instance%labels(38) = 'CHI SUM SUB TRANSF'
466    instance%labels(39) = 'CHI SUM SUB OMEGA neq 0'
467    instance%labels(40) = 'OPT FFT'
468    instance%labels(41) = 'OPT FFT (INIT)'
469    instance%labels(42) = 'OPT FFT (COMM_FFT)'
470    instance%labels(43) = 'OPT FFT (FFT)'
471    instance%labels(44) = 'CHI SUM (ARRAY ALLOC)'
472    instance%labels(45) = 'EPSINV (I/O)'
473    instance%labels(46) = 'EPSINV (INVERT)'
474    instance%labels(49) = 'CHI SUM (BAR)'
475    instance%labels(50) = 'CHI SUM (FLT)'
476    instance%labels(51) = 'CHI SUM (ROW)'
477    instance%labels(52) = 'CHI SUM (COLUMN)'
478    instance%labels(53) = 'CHI SUM (HT/NB)'
479    instance%labels(60) = 'SUBSPACE (P' + TOSTRING(X(GEMM)) + ')'
480    instance%labels(61) = 'EPSINV OMEGA=0'
481    instance%labels(62) = 'EPSINV OMEGA neq 0'
482    instance%labels(92) = 'FFT PUT'
483    instance%labels(95) = 'FFT MLTPLY'
484    instance%labels(100) = 'TOTAL'
485    !
486    POP_SUB(epsilon_init_labels)
487    return
488  end subroutine epsilon_init_labels
489  !
490  !----------------------------------------------------------------------------
491  !
492  subroutine sigma_init_labels(instance)
493    ! Labels for attributes of timing_sigma
494    implicit none
495    class(timing_sigma_class), intent(inout) :: instance
496    PUSH_SUB(sigma_init_labels)
497    !
498    instance%labels(2) = 'INPUT'
499    instance%labels(3) = 'EPSCOPY'
500    instance%labels(4) = 'FULLBZ'
501    instance%labels(5) = 'VXC'
502    instance%labels(6) = 'SUBGRP'
503    instance%labels(7) = 'IRRBZ'
504    instance%labels(8) = 'GMAP'
505    instance%labels(9) = 'GENWF'
506    instance%labels(10) = 'MTXEL'
507    instance%labels(11) = 'MTXEL_COR TOT'
508    instance%labels(13) = 'VCOUL'
509    instance%labels(14) = 'EPSREAD'
510    instance%labels(15) = 'INPUT_OUTER'
511    instance%labels(16) = 'MTXEL_CH'
512    instance%labels(17) = 'MTXEL COMM'
513    instance%labels(18) = 'BARE X'
514    instance%labels(19) = 'WF COMM'
515    instance%labels(20) = 'WF_CH COMM'
516    instance%labels(21) = 'INPUT (READ)'
517    instance%labels(22) = 'INPUT (WRITE)'
518    instance%labels(31) = 'SUB-TRANSF TOT'
519    instance%labels(32) = 'SUB-TRANSF COM'
520    instance%labels(33) = 'SUB-TRANSF GEMM'
521    instance%labels(41) = 'M.COR INIT'
522    instance%labels(42) = 'M.COR EPSINIT'
523    instance%labels(43) = 'M.COR COMM'
524    instance%labels(44) = 'M.COR PP PREP'
525    instance%labels(45) = 'M.COR SX+CH'
526    instance%labels(46) = 'M.COR RA SX'
527    instance%labels(47) = 'M.COR RA CH'
528    instance%labels(48) = 'M.COR RA CH2'
529    instance%labels(49) = 'M.COR RA SUM'
530    instance%labels(50) = 'M.COR CD RES'
531    instance%labels(51) = 'M.COR CD INT'
532    instance%labels(52) = 'M.COR CD SUM'
533    instance%labels(53) = 'M.COR CD GEMM'
534    instance%labels(55) = 'M.COR REMAIN'
535    instance%labels(56) = 'M.COR SUB WINGS'
536    instance%labels(59) = 'READ NEPS'
537    instance%labels(61) = 'EPSCOPY IO'
538    ! Epscopy comm is a duplicate of common_timing:
539    ! the common timing has the HDF5, this one has the binary
540    instance%labels(62) = 'EPSCOPY COMM'
541    instance%labels(63) = 'EPSCOPY SUB'
542    instance%labels(64) = 'EPSCOPY PGEMM'
543    instance%labels(65) = 'EPSCOPY REDSTR'
544    instance%labels(66) = 'SUB IO Vec'
545    instance%labels(67) = 'SUB Prep Vec'
546    instance%labels(68) = 'SUB COMM Vec'
547    instance%labels(69) = 'SUB IO Eps'
548    instance%labels(70) = 'SUB Prep Eps'
549    instance%labels(71) = 'SUB COMM Eps'
550    instance%labels(72) = 'EPSCOPY VCOUL'
551    instance%labels(100) = 'TOTAL'
552    !
553    POP_SUB(sigma_init_labels)
554    return
555  end subroutine sigma_init_labels
556  !
557  !----------------------------------------------------------------------------
558  !
559  subroutine bse_init_labels(instance)
560    ! Labels for attributes of timing_bse
561    implicit none
562    class(timing_bse_class), intent(inout) :: instance
563    POP_SUB(bse_init_labels)
564    !
565    instance%labels(2)='Input'
566    instance%labels(3)='Input q'
567    instance%labels(4)='Intwfn'
568    instance%labels(5)='Intkernel'
569    instance%labels(7)='Epsdiag'
570    instance%labels(8)='Eps Comm'
571    instance%labels(9)='Absorp0'
572    instance%labels(10)='Vmtxel'
573    instance%labels(11)='Trans Mtxel'
574    instance%labels(12)='Absorp'
575    instance%labels(13)='Write Eig'
576    instance%labels(41)='Iw Input_co'
577    instance%labels(42)='Iw Interp'
578    instance%labels(43)='Iw Genwf'
579    instance%labels(44)='Iw Gwnwf_Co'
580    instance%labels(45)='Iw Mtxel_t'
581    instance%labels(46)='Iw Write'
582    instance%labels(47)='Iw Reduce'
583    instance%labels(51)='Ik Setup'
584    instance%labels(52)='Ik C-Check'
585    instance%labels(53)='Ik Input'
586    instance%labels(54)='Ik Inteps'
587    instance%labels(55)='Ik Vcoul'
588    instance%labels(56)='Ik Cache'
589    instance%labels(57)='Ik Interp'
590    instance%labels(58)='Ik Sum'
591    instance%labels(61)='Diagonalize'
592    instance%labels(62)='Lanczos'
593    instance%labels(63)='Iterate'
594    instance%labels(64)='Peig_Inter'
595    instance%labels(100) = 'TOTAL'
596    !
597    POP_SUB(bse_init_labels)
598    return
599  end subroutine bse_init_labels
600  !
601  !----------------------------------------------------------------------------
602  !
603  subroutine common_init_labels(instance)
604    ! Labels for attributes of timing_common
605    implicit none
606    class(timing_common_class), intent(inout) :: instance
607    PUSH_SUB(common_init_labels)
608    !
609    instance%labels(47) = 'Eps (I/O) Comm'
610    instance%labels(48) = 'Eps (I/O) IO'
611    instance%labels(62) = 'Epscopy Comm'
612    instance%labels(81) = 'Input I/O'
613    instance%labels(82) = 'Input Comm'
614    instance%labels(91) = 'Fft Zero'
615    instance%labels(92) = 'Fft Put'
616    instance%labels(93) = 'Fft Plan'
617    instance%labels(94) = 'Fft Exec'
618    instance%labels(95) = 'Fft Mltply'
619    instance%labels(96) = 'Fft Conjg'
620    instance%labels(97) = 'Fft Get'
621    !
622    POP_SUB(common_init_labels)
623    return
624  end subroutine common_init_labels
625  !
626  !----------------------------------------------------------------------------
627  !
628  subroutine extra_init_labels(instance)
629    ! Labels for attributes of timing_extra
630    implicit none
631    class(timing_extra_class), intent(inout) :: instance
632    PUSH_SUB(extra_init_labels)
633    !
634    instance%labels(2) = 'Input'
635    instance%labels(3) = 'Input_q'
636    instance%labels(4) = 'Vmtxel'
637    instance%labels(5) = 'Readasvck'
638    instance%labels(6) = 'OS - Comm'
639    instance%labels(7) = 'OS - Sums'
640    instance%labels(8) = 'Genwf'
641    instance%labels(9) = 'Genwf_q'
642    instance%labels(10) = 'Summing'
643    instance%labels(11) = 'Gather'
644    instance%labels(100) = 'TOTAL'
645    !
646    POP_SUB(extra_init_labels)
647    return
648  end subroutine extra_init_labels
649  !
650  !----------------------------------------------------------------------------
651  !
652  subroutine timing_class_init(instance)
653    ! Initialize the timing methods
654    ! Essentially, sets labels and sets times to zero
655    implicit none
656    class(timing_class), intent(inout) :: instance
657    integer :: cm
658    PUSH_SUB(timing_class_init)
659    !
660    instance%cpu_times = 0.0d0
661    instance%wall_times = 0.0d0
662    instance%tmp_wall_times = 0.0d0
663    instance%tmp_cpu_times = 0.0d0
664    instance%call_numbers = 0
665    instance%labels = ""
666    call instance%init_labels()
667    !
668    POP_SUB(timing_class_init)
669    return
670  end subroutine timing_class_init
671  !
672  !----------------------------------------------------------------------------
673  !
674  subroutine timing_class_print(instance, c_timing, root_only)
675    ! Print to screen all the timing information.
676    ! specifically, we will print the max, min (over the timing info of the
677    ! MPI processes) and the root time associated to that tag
678    ! Args:
679    ! c_timing, optional: one could pass another timing object, e.g. the one
680    !     for timing the calls in ./Common. The printing info will be merged.
681    ! root_only, optional: in the codes Epsilon and Sigma, only the root node
682    !     gets information on execution time. If root_only=T, we print only the
683    !     timing info of root, without max and min timings.
684    !     default = .false.
685    implicit none
686    class(timing_class), intent(inout) :: instance
687    type(timing_common_class), optional, intent(inout) :: c_timing
688    logical, intent(in), optional :: root_only
689    !
690    integer :: i, error, call_numbers(2*instance%num_times), N, N2
691    integer, allocatable :: buffer_i(:)
692    logical :: do_comms
693    real(DP), allocatable :: buffer_r(:)
694    real(DP) :: min_cpu_times(2*instance%num_times), &
695         max_cpu_times(2*instance%num_times),             &
696         root_cpu_times(2*instance%num_times),            &
697         min_wall_times(2*instance%num_times),            &
698         max_wall_times(2*instance%num_times),            &
699         root_wall_times(2*instance%num_times)
700    character(len=100) :: labels(2*instance%num_times)
701    !
702    PUSH_SUB(timing_class_print)
703    !
704    labels = ""
705    N2 = 2*instance%num_times
706    N = instance%num_times
707    !
708    ! store times and labels in a temporary array.
709    !
710    ! The simplest thing to do to merge with c_timing, given also that these
711    ! arrays are small, is to create a bigger array that accomodates everything
712    !
713    min_cpu_times(N+1:) = instance%cpu_times
714    max_cpu_times(N+1:) = instance%cpu_times
715    root_cpu_times(N+1:) = instance%cpu_times
716    min_wall_times(N+1:) = instance%wall_times
717    max_wall_times(N+1:) = instance%wall_times
718    root_wall_times(N+1:) = instance%wall_times
719    call_numbers(N+1:) = instance%call_numbers
720    labels(N+1:) = instance%labels
721    !
722    if ( present(c_timing) ) then
723       min_cpu_times(:N) = c_timing%cpu_times
724       max_cpu_times(:N) = c_timing%cpu_times
725       root_cpu_times(:N) = c_timing%cpu_times
726       min_wall_times(:N) = c_timing%wall_times
727       max_wall_times(:N) = c_timing%wall_times
728       root_wall_times(:N) = c_timing%wall_times
729       call_numbers(:N) = c_timing%call_numbers
730       labels(:N) = c_timing%labels
731    end if
732    !
733    do_comms = .true.
734    if ( present(root_only) ) then
735       if ( root_only ) do_comms = .false.
736    end if
737    !
738    if ( do_comms ) then
739       !
740       ! if MPI isn`t used, there`s no need to get info across MPI processes
741#ifdef MPI
742       SAFE_ALLOCATE(buffer_r,(N2))
743       SAFE_ALLOCATE(buffer_i,(N2))
744       call MPI_allreduce(max_cpu_times, buffer_r, N2, MPI_double_precision,  &
745            MPI_max, mpi_comm_world, error)
746       max_cpu_times = buffer_r
747       call MPI_allreduce(min_cpu_times, buffer_r, N2, MPI_double_precision,  &
748            MPI_min, mpi_comm_world, error)
749       min_cpu_times = buffer_r
750       call MPI_allreduce(max_wall_times, buffer_r, N2, MPI_double_precision, &
751            MPI_max, mpi_comm_world, error)
752       max_wall_times = buffer_r
753       call MPI_allreduce(min_wall_times, buffer_r, N2, MPI_double_precision, &
754            MPI_min, mpi_comm_world, error)
755       min_wall_times = buffer_r
756       call MPI_allreduce(call_numbers, buffer_i, N2, MPI_integer, MPI_max,   &
757            mpi_comm_world, error)
758       call_numbers = buffer_i
759       SAFE_DEALLOCATE(buffer_r)
760       SAFE_DEALLOCATE(buffer_i)
761#endif
762       !
763    end if
764
765    ! print to screen
766    if ( peinf%inode == 0 ) then
767       print*, ""
768       print*, "Timing information"
769       print*, ""
770       write(6,"(23x,a13,3x,a13,3x,a8)") 'CPU (s)','WALL (s)','#'
771       do i = 1,N2
772          ! if the label is empty, or it`s never been called, we don`t print it
773          if ( len_trim(labels(i)) == 0 ) cycle
774          if ( call_numbers(i) == 0 ) cycle
775          print*, "- " // trim(labels(i)) // ":"
776          if ( do_comms ) then
777             write(6,"(a23,f13.3,3x,f13.3,3x,i8)") "(min.)", min_cpu_times(i),&
778                  min_wall_times(i), call_numbers(i)
779          end if
780          write(6,"(a23,f13.3,3x,f13.3,3x,i8)") "(root)", root_cpu_times(i),  &
781               root_wall_times(i), call_numbers(i)
782          if ( do_comms ) then
783             write(6,"(a23,f13.3,3x,f13.3,3x,i8)") "(max.)", max_cpu_times(i),&
784                  max_wall_times(i), call_numbers(i)
785          end if
786       end do
787       print*, ""
788       print*, "Job Done"
789       print*, ""
790    end if
791    !
792    POP_SUB(timing_class_print)
793    return
794  end subroutine timing_class_print
795  !
796  !----------------------------------------------------------------------------
797  !
798  subroutine timing_class_start(instance, tag)
799    ! start measuring time for tag
800    implicit none
801    class(timing_class), intent(inout) :: instance
802    integer, intent(in) :: tag
803    !
804    integer :: i, j, values(8), wt0
805    real(DP) :: cpu, wall
806    !
807    ! increase the counter by 1
808    instance%call_numbers(tag) = instance%call_numbers(tag) + 1
809    ! save the initial time
810    call timget(cpu,wall)
811    instance%tmp_cpu_times(tag) = cpu
812    instance%tmp_wall_times(tag) = wall
813    !
814    return
815  end subroutine timing_class_start
816  !
817  !----------------------------------------------------------------------------
818  !
819  subroutine timing_class_stop(instance, tag)
820    ! Stop measuring time for this tag
821    !
822    implicit none
823    class(timing_class), intent(inout) :: instance
824    integer, intent(in) :: tag
825    real(DP) :: cpu, wall, t0_c, t0_w
826    !
827    call timget(cpu, wall)
828    t0_c = instance%tmp_cpu_times(tag)
829    t0_w = instance%tmp_wall_times(tag)
830    instance%cpu_times(tag)  = instance%cpu_times(tag)  + (cpu  - t0_c)
831    instance%wall_times(tag) = instance%wall_times(tag) + (wall - t0_w)
832    !
833    return
834  end subroutine timing_class_stop
835  !
836  !----------------------------------------------------------------------------
837  !
838  subroutine date_time(bdate,btime)
839    ! returns:
840    ! - bdate: string with date
841    ! - btime: string with time
842    character(len=11), intent(out) :: bdate
843    character(len=14), intent(out) :: btime
844    !
845    integer :: lmonth
846    integer :: idate(8)
847    character(len=10) :: atime
848    character(len=8) :: adate
849    character(len=5) :: azone
850    character(len=4) :: year
851    character(len=3) :: month(12)
852    character(len=2) :: hour, min, sec, day
853    !
854    DATA month/'Jan','Feb','Mar','Apr','May','Jun','Jul','Aug','Sep', &
855         'Oct','Nov','Dec'/
856    !
857    PUSH_SUB(date_time)
858
859    call date_and_time(adate,atime,azone,idate)
860    read(adate,"(a4,i2,a2)") year, lmonth, day
861    write(bdate,"(a2,a1,a3,a1,a4)") day, '-', month(lmonth), '-', year
862    read(atime,'(a2,a2,a2,a4)') hour, min, sec
863    write(btime,"(a2,a1,a2,a1,a2,1x,a5)") hour, ':', min, ':', sec, azone
864    !
865    POP_SUB(date_time)
866    return
867  end subroutine date_time
868
869  !============================================================================
870
871  subroutine timget(cpu, wall)
872    real(DP), intent(out) :: cpu, wall
873    integer :: values(8)
874    ! no push_sub, called too frequently
875    !
876    TIMGET(cpu)
877    call date_and_time(VALUES=values)
878    wall=((values(3)*24.0d0+values(5))*60.0d0 &
879         +values(6))*60.0d0+values(7)+values(8)*1.0d-3
880    !
881    return
882  end subroutine timget
883
884  !============================================================================
885
886  subroutine timacc(n, option, tottim, nslices)
887    ! DEPRECATED
888    ! old subroutine for measuring execution time
889    integer, intent(in) :: n !< not used for option = 0
890    integer, intent(in) :: option !< 0, 1, 2, 3, 4
891    real(DP), intent(out), optional :: tottim(2) !present if option=3 or 4
892    integer, intent(out), optional :: nslices !< optionally used when option=3
893    !
894    real(DP) :: cpu,wall
895    character(len=100) :: tmpstr
896    !
897    ! no push_sub, called too frequently
898
899    ! Check that n lies in sensible bounds
900
901    if (n .lt. 0 .or. n .gt. MTIM) then
902       write(tmpstr,'(a,i6,a,i8)')'timacc: dim MTIM = ',MTIM,' but input n =',n
903       call die(tmpstr)
904    end if
905
906    if (option==0) then
907
908       ! Zero out all accumulators of time and init timers
909
910       acctim(:,:)=0.0d0
911       tzero(:,:)=0.0d0
912       ncount(:)=0
913
914    else if (option==1) then
915
916       ! Initialize timepw for n
917
918       call timget(cpu,wall)
919       tzero(1,n)=cpu
920       tzero(2,n)=wall
921
922    else if (option==2) then
923
924       ! Accumulate time for n
925
926       call timget(cpu,wall)
927       acctim(1,n)=acctim(1,n)+cpu -tzero(1,n)
928       acctim(2,n)=acctim(2,n)+wall-tzero(2,n)
929       ncount(n)=ncount(n)+1
930
931    else if (option==3) then
932
933       ! Return accumulated time for n
934
935       if(.not. present(tottim)) call die("timacc requires tottim for option 3.")
936
937       tottim(1)=acctim(1,n)
938       tottim(2)=acctim(2,n)
939       if(present(nslices)) then
940          nslices=ncount(n)
941       end if
942
943    else if (option==4) then
944
945       ! Return elapsed time for n (do not accumulate)
946
947       if(.not. present(tottim)) call die("timacc requires tottim for option 4.")
948
949       call timget(cpu,wall)
950       tottim(1)=cpu-tzero(1,n)
951       tottim(2)=wall-tzero(2,n)
952
953    else
954
955       write(tmpstr,'(a,i10,a)') 'timacc: input option = ', option, 'not valid.'
956       call die(tmpstr)
957
958    end if
959
960    return
961  end subroutine timacc
962
963  !============================================================================
964
965  subroutine logit(str, should_print, iunit)
966    character (len=*), intent(in) :: str
967    logical, intent(in), optional :: should_print
968    integer, intent(in), optional :: iunit
969
970    character(len=15) :: mydate,mytime,tmpstr
971    logical :: should_print_
972    integer :: iunit_
973
974    if ( .not. peinf%verb_log ) return
975
976    iunit_ = 6
977    if (present(iunit)) iunit_ = iunit
978    should_print_ = peinf%inode==0
979    if (present(should_print)) should_print_ = should_print
980
981    if (should_print_) then
982       call date_and_time(mydate,mytime)
983       tmpstr = mytime(1:2)//':'//mytime(3:4)//':'//mytime(5:6)//'.'//mytime(8:10)
984       mytime = tmpstr
985       write(iunit_,'(4a)') '*** LOG: ', TRUNC(str),'  time = ', TRUNC(mytime)
986    endif
987
988  end subroutine logit
989  !
990  !============================================================================
991  !
992  subroutine logitint(str,i)
993    character(len=*), intent(in) :: str
994    integer, intent(in) :: i
995    character(len=100) :: tmpstr
996    !
997    if (.not.peinf%verb_log) return
998    write(tmpstr,'(a,i5)') str(1:len_trim(str)),i
999    call logit(tmpstr)
1000    !
1001    return
1002  end subroutine logitint
1003
1004  !============================================================================
1005
1006end module timing_m
1007