1*
2* $Id$
3*
4
5* nwpw_timing.F
6* Author - Eric Bylaska
7*
8*   These routines are to be used to time the nwpw module
9*
10*
11*   1 - total FFT
12*   2 - total dot products
13*   3 - lagrange multipliers
14*   4 - exchange correlation
15*   5 - local pseudopotentials
16*   6 - non-local pseudopotentials
17*   7 - hartree potentials
18*   8 - structure factors
19*   9 - masking and packing
20*   10 - geodesic time
21*   11 - gen psi_r and dn
22*   12 - allocating memory from stack
23*   13 - miscellaneous steepest descent update
24*   15 - ffm_dgemm
25*   16 - fmf_dgemm
26*   17 - m_diagonalize
27*   18 - mmm_dgemm
28
29*
30*   20 - phase factors
31*   21 - ewald /ion-ion
32
33*   22 - tredq
34*   23 - getdiags
35*   24 - tqliq
36*   25 - eigsrt
37
38*   30 - queue fft
39*   31 - queue fft serial
40*   32 - queue fft parallel
41*   33 - HFX
42
43*   34 - paw gaussian integrals
44*   35 - paw atomic coulomb
45*   36 - paw atomic xc
46*   37 - paw gen dEmult/dQlm
47*   38 - paw gen dElocal/dQlm
48*   39 - paw cmp operations
49
50*   40 - qmmm LJ
51*   41 - qmmm residual Q
52
53*   42 - MATHIAS InnerLoop
54*   43 - MATHIAS Phaze
55*   44 - MATHIAS Pipelined FFTs
56*   45 - MATHIAS Lagrange
57*   46 - MATHIAS Exch Corr
58*   47 - MATHIAS Hpsi
59
60
61*   50 - io time
62
63*   52 - HFX localization
64*   53 - HFX DM columns
65*   54 - HFX DM Cholesky
66*   55 - re-gridding
67
68
69      subroutine nwpw_timing_init()
70      implicit none
71
72#include "nwpw_timing_common.fh"
73      call current_second(t0)
74      call dcopy(60,0.0d0,0,times,1)
75      call dcopy(272*60,0.0d0,0,thr_times,1)
76      return
77      end
78
79      subroutine nwpw_timing_start(counter)
80      implicit none
81      integer counter
82
83#include "nwpw_timing_common.fh"
84!$OMP MASTER
85      call current_second(nwpw_tim1(counter))
86!$OMP END MASTER
87      return
88      end
89
90      subroutine nwpw_timing_end(counter)
91      implicit none
92      integer counter
93
94#include "nwpw_timing_common.fh"
95
96
97!$OMP MASTER
98      call current_second(nwpw_tim2(counter))
99
100      times(counter) = times(counter)
101     >               + (nwpw_tim2(counter)-nwpw_tim1(counter))
102!$OMP END MASTER
103      return
104      end
105
106      subroutine nwpw_timing_start_thr(counter)
107      USE omp_lib
108      implicit none
109      integer counter
110      integer tid
111#include "nwpw_timing_common.fh"
112#ifdef USE_OPENMP
113      tid = omp_get_thread_num()
114#else
115      tid = 0
116#endif
117      call current_second(thr_nwpw_tim1(counter,tid+1))
118      return
119      end
120
121      subroutine nwpw_timing_end_thr(counter)
122      USE omp_lib
123      implicit none
124      integer counter
125#include "nwpw_timing_common.fh"
126      integer tid
127
128#ifdef USE_OPENMP
129      tid = omp_get_thread_num()
130#else
131      tid = 0
132#endif
133      call current_second(thr_nwpw_tim2(counter,tid+1))
134
135      thr_times(counter,tid+1) = thr_times(counter,tid+1)
136     >     + (thr_nwpw_tim2(counter,tid+1)-thr_nwpw_tim1(counter,tid+1))
137      return
138      end
139
140
141      real*8 function nwpw_timing(counter)
142      implicit none
143      integer counter
144#include "nwpw_timing_common.fh"
145
146      nwpw_timing = times(counter)
147      return
148      end
149
150      subroutine nwpw_timing_print(msg,time,counter,ttime)
151      implicit none
152      character*(*) msg
153      real*8 time,ttime
154      integer counter
155#include "stdio.fh"
156      if (time>1.0d-9) then
157         write(luout,1708) msg,time,time/dble(counter),100*time/ttime
158      end if
159 1708 FORMAT(A,E14.6,E14.6,F12.1,' %')
160      return
161      end
162
163      subroutine nwpw_timing_print_thr(msg,id,counter,ttime)
164      USE omp_lib
165      implicit none
166#include "nwpw_timing_common.fh"
167      character*(*) msg
168      real*8 time,ttime
169      integer counter,tid,nthr,used_threads,id
170
171      used_threads = 0
172#ifdef USE_OPENMP
173      nthr = omp_get_max_threads()
174#else
175      nthr = 1
176#endif
177      time = 0
178      do tid=1,nthr
179        if (thr_times(id,tid)>1.0d-9) then
180          time = time + thr_times(id,tid)
181          used_threads=used_threads+1
182        end if
183      end do
184
185      if (used_threads>0) then
186        time = time / used_threads
187        if (time>1.0d-9) then
188          write(*,1708) msg,time,time/dble(counter),100*time/ttime
189        end if
190      end if
191 1708 FORMAT(A,E14.6,E14.6,F12.1,' %')
192      return
193      end
194
195      subroutine nwpw_timing_print_thr_max(msg,id,counter,ttime)
196      USE omp_lib
197      implicit none
198#include "nwpw_timing_common.fh"
199      character*(*) msg
200      real*8 time,ttime
201      integer counter,tid,nthr,id
202
203#ifdef USE_OPENMP
204      nthr = omp_get_max_threads()
205#else
206      nthr = 1
207#endif
208      time = 0
209      do tid=1,nthr
210        if (thr_times(id,tid)>1.0d-9) then
211          time = max(time,thr_times(id,tid))
212        end if
213      end do
214
215      if (time>1.0d-9) then
216        write(*,1708) msg,time,time/dble(counter),100*time/ttime
217      end if
218 1708 FORMAT(A,E14.6,E14.6,F12.1,' %')
219      return
220      end
221
222
223
224
225      subroutine nwpw_timing_print_final(oprint,counter)
226      implicit none
227      logical  oprint
228      integer counter
229
230#include "stdio.fh"
231
232      real*8 ttime
233c     **** external functions ****
234      real*8   nwpw_timing
235      external nwpw_timing
236
237#include "nwpw_timing_common.fh"
238
239      call current_second(tf)
240      ttime = tf-t0
241      if (oprint) then
242      write(luout,1809) 'Time spent doing               ',
243     >                  'total','step', 'percent'
244         call nwpw_timing_print(
245     >                 '  total time                 : ',
246     >                 ttime,counter,ttime)
247         call nwpw_timing_print(
248     >                 '  i/o time                   : ',
249     >                 nwpw_timing(50),counter,ttime)
250         call nwpw_timing_print(
251     >                 '  FFTs                       : ',
252     >                 nwpw_timing(1),counter,ttime)
253         call nwpw_timing_print(
254     >                 '  dot products               : ',
255     >                 nwpw_timing(2),counter,ttime)
256         call nwpw_timing_print(
257     >                 '  geodesic                   : ',
258     >                 nwpw_timing(10),counter,ttime)
259         call nwpw_timing_print(
260     >                 '  two-electron Gaussian      : ',
261     >                 nwpw_timing(13),counter,ttime)
262         call nwpw_timing_print(
263     >                 '  ffm_dgemm                  : ',
264     >                 nwpw_timing(15),counter,ttime)
265         call nwpw_timing_print(
266     >                 '  fmf_dgemm                  : ',
267     >                 nwpw_timing(16),counter,ttime)
268         call nwpw_timing_print(
269     >                 '  mmm_dgemm                  : ',
270     >                 nwpw_timing(18),counter,ttime)
271         call nwpw_timing_print(
272     >                 '  m_diagonalize              : ',
273     >                 nwpw_timing(17),counter,ttime)
274
275         call nwpw_timing_print(
276     >                 '    - m_tredq                : ',
277     >                 nwpw_timing(22),counter,ttime)
278         call nwpw_timing_print(
279     >                 '       - m_tredq_houseq      : ',
280     >                 nwpw_timing(26),counter,ttime)
281         call nwpw_timing_print(
282     >                 '       - m_tredq_houseq_dgemm: ',
283     >                 nwpw_timing(28),counter,ttime)
284         call nwpw_timing_print(
285     >                 '       - m_tredq_dgemm1      : ',
286     >                 nwpw_timing(27),counter,ttime)
287
288         call nwpw_timing_print(
289     >                 '    - m_getdiags             : ',
290     >                 nwpw_timing(23),counter,ttime)
291         call nwpw_timing_print(
292     >                 '    - m_tqliq                : ',
293     >                 nwpw_timing(24),counter,ttime)
294         call nwpw_timing_print(
295     >                 '    - m_eigsrt               : ',
296     >                 nwpw_timing(25),counter,ttime)
297
298         call nwpw_timing_print(
299     >                 '  exchange correlation       : ',
300     >                 nwpw_timing(4),counter,ttime)
301         call nwpw_timing_print(
302     >                 '  local pseudopotentials     : ',
303     >                 nwpw_timing(5),counter,ttime)
304         call nwpw_timing_print(
305     >                 '  non-local pseudopotentials : ',
306     >                 nwpw_timing(6),counter,ttime)
307         call nwpw_timing_print(
308     >                 '  hartree potentials         : ',
309     >                 nwpw_timing(7),counter,ttime)
310         call nwpw_timing_print(
311     >                 '  ion-ion interaction        : ',
312     >                 nwpw_timing(21),counter,ttime)
313         call nwpw_timing_print(
314     >                 '  structure factors          : ',
315     >                 nwpw_timing(8),counter,ttime)
316         call nwpw_timing_print(
317     >                 '  phase factors              : ',
318     >                 nwpw_timing(20),counter,ttime)
319         call nwpw_timing_print(
320     >                 '  masking and packing        : ',
321     >                 nwpw_timing(9),counter,ttime)
322         call nwpw_timing_print(
323     >                 '  queue fft                  : ',
324     >                 nwpw_timing(30),counter,ttime)
325         call nwpw_timing_print(
326     >                 '  queue fft (serial)         : ',
327     >                 nwpw_timing(31),counter,ttime)
328         call nwpw_timing_print(
329     >                 '  queue fft (message passing): ',
330     >                 nwpw_timing(32),counter,ttime)
331         call nwpw_timing_print(
332     >                 '  HFX potential              : ',
333     >                 nwpw_timing(33),counter,ttime)
334         call nwpw_timing_print(
335     >                 '  paw gaussian integrals     : ',
336     >                 nwpw_timing(34),counter,ttime)
337         call nwpw_timing_print(
338     >                 '  paw atomic coulomb         : ',
339     >                 nwpw_timing(35),counter,ttime)
340         call nwpw_timing_print(
341     >                 '  paw atomic xc              : ',
342     >                 nwpw_timing(36),counter,ttime)
343         call nwpw_timing_print(
344     >                 '  paw gen dEmult/dQlm        : ',
345     >                 nwpw_timing(37),counter,ttime)
346         call nwpw_timing_print(
347     >                 '  paw gen dElocal/dQlm       : ',
348     >                 nwpw_timing(38),counter,ttime)
349         call nwpw_timing_print(
350     >                 '  paw cmp operations         : ',
351     >                 nwpw_timing(38),counter,ttime)
352         call nwpw_timing_print(
353     >                 '  qmmm LJ                    : ',
354     >                 nwpw_timing(40),counter,ttime)
355         call nwpw_timing_print(
356     >                 '  qmmm residual Q            : ',
357     >                 nwpw_timing(41),counter,ttime)
358         call nwpw_timing_print(
359     >                 '  MATHIAS InnerLoop          : ',
360     >                 nwpw_timing(42),counter,ttime)
361         call nwpw_timing_print(
362     >                 '  MATHIAS Phaze              : ',
363     >                 nwpw_timing(43),counter,ttime)
364         call nwpw_timing_print(
365     >                 '  MATHIAS Pipelined FFTs     : ',
366     >                 nwpw_timing(44),counter,ttime)
367         call nwpw_timing_print(
368     >                 '  MATHIAS Lagrange           : ',
369     >                 nwpw_timing(45),counter,ttime)
370         call nwpw_timing_print(
371     >                 '  MATHIAS Exch Corr          : ',
372     >                 nwpw_timing(46),counter,ttime)
373         call nwpw_timing_print(
374     >                 '  MATHIAS Hpsi               : ',
375     >                 nwpw_timing(47),counter,ttime)
376         call nwpw_timing_print(
377     >                 '  nwpw_ugauss                : ',
378     >                 nwpw_timing(48),counter,ttime)
379         call nwpw_timing_print(
380     >                 '  nwpw_wgauss                : ',
381     >                 nwpw_timing(49),counter,ttime)
382         call nwpw_timing_print(
383     >                 '  nwpw_dwgauss               : ',
384     >                 nwpw_timing(55),counter,ttime)
385         call nwpw_timing_print(
386     >                 '  nwpw_gaunt                 : ',
387     >                 nwpw_timing(51),counter,ttime)
388         call nwpw_timing_print(
389     >                 '  HFX localization           : ',
390     >                 nwpw_timing(52),counter,ttime)
391         call nwpw_timing_print(
392     >                 '  HFX Finding DM columns     : ',
393     >                 nwpw_timing(53),counter,ttime)
394         call nwpw_timing_print(
395     >                 '  HFX DM Cholesky            : ',
396     >                 nwpw_timing(54),counter,ttime)
397         call nwpw_timing_print(
398     >                 '  HFX localized re-gridding  : ',
399     >                 nwpw_timing(55),counter,ttime)
400         call nwpw_timing_print(
401     >                 '  non-local psp FFM          : ',
402     >                 nwpw_timing(56),counter,ttime)
403         call nwpw_timing_print(
404     >                 '  non-local psp FMF          : ',
405     >                 nwpw_timing(57),counter,ttime)
406         call nwpw_timing_print(
407     >                 '  non-local psp FFM A        : ',
408     >                 nwpw_timing(58),counter,ttime)
409         call nwpw_timing_print(
410     >                 '  non-local psp FFM B        : ',
411     >                 nwpw_timing(59),counter,ttime)
412
413      end if
414
415
416      return
417 1808 FORMAT(A,E14.6,E14.6)
418 1809 FORMAT(//A,3A14)
419      end
420
421