1* 2* $Id$ 3* 4 5* nwpw_timing.F 6* Author - Eric Bylaska 7* 8* These routines are to be used to time the nwpw module 9* 10* 11* 1 - total FFT 12* 2 - total dot products 13* 3 - lagrange multipliers 14* 4 - exchange correlation 15* 5 - local pseudopotentials 16* 6 - non-local pseudopotentials 17* 7 - hartree potentials 18* 8 - structure factors 19* 9 - masking and packing 20* 10 - geodesic time 21* 11 - gen psi_r and dn 22* 12 - allocating memory from stack 23* 13 - miscellaneous steepest descent update 24* 15 - ffm_dgemm 25* 16 - fmf_dgemm 26* 17 - m_diagonalize 27* 18 - mmm_dgemm 28 29* 30* 20 - phase factors 31* 21 - ewald /ion-ion 32 33* 22 - tredq 34* 23 - getdiags 35* 24 - tqliq 36* 25 - eigsrt 37 38* 30 - queue fft 39* 31 - queue fft serial 40* 32 - queue fft parallel 41* 33 - HFX 42 43* 34 - paw gaussian integrals 44* 35 - paw atomic coulomb 45* 36 - paw atomic xc 46* 37 - paw gen dEmult/dQlm 47* 38 - paw gen dElocal/dQlm 48* 39 - paw cmp operations 49 50* 40 - qmmm LJ 51* 41 - qmmm residual Q 52 53* 42 - MATHIAS InnerLoop 54* 43 - MATHIAS Phaze 55* 44 - MATHIAS Pipelined FFTs 56* 45 - MATHIAS Lagrange 57* 46 - MATHIAS Exch Corr 58* 47 - MATHIAS Hpsi 59 60 61* 50 - io time 62 63* 52 - HFX localization 64* 53 - HFX DM columns 65* 54 - HFX DM Cholesky 66* 55 - re-gridding 67 68 69 subroutine nwpw_timing_init() 70 implicit none 71 72#include "nwpw_timing_common.fh" 73 call current_second(t0) 74 call dcopy(nwpw_tim_max,0.0d0,0,times,1) 75 call dcopy(272*nwpw_tim_max,0.0d0,0,thr_times,1) 76 return 77 end 78 79 subroutine nwpw_timing_start(counter) 80 implicit none 81 integer counter 82 83#include "nwpw_timing_common.fh" 84!$OMP MASTER 85 call current_second(nwpw_tim1(counter)) 86!$OMP END MASTER 87 return 88 end 89 90 subroutine nwpw_timing_end(counter) 91 implicit none 92 integer counter 93 94#include "nwpw_timing_common.fh" 95 96 97!$OMP MASTER 98 call current_second(nwpw_tim2(counter)) 99 100 times(counter) = times(counter) 101 > + (nwpw_tim2(counter)-nwpw_tim1(counter)) 102!$OMP END MASTER 103 return 104 end 105 106 subroutine nwpw_timing_start_thr(counter) 107 USE omp_lib 108 implicit none 109 integer counter 110 integer tid 111#include "nwpw_timing_common.fh" 112#ifdef USE_OPENMP 113 tid = omp_get_thread_num() 114#else 115 tid = 0 116#endif 117 call current_second(thr_nwpw_tim1(counter,tid+1)) 118 return 119 end 120 121 subroutine nwpw_timing_end_thr(counter) 122 USE omp_lib 123 implicit none 124 integer counter 125#include "nwpw_timing_common.fh" 126 integer tid 127 128#ifdef USE_OPENMP 129 tid = omp_get_thread_num() 130#else 131 tid = 0 132#endif 133 call current_second(thr_nwpw_tim2(counter,tid+1)) 134 135 thr_times(counter,tid+1) = thr_times(counter,tid+1) 136 > + (thr_nwpw_tim2(counter,tid+1)-thr_nwpw_tim1(counter,tid+1)) 137 return 138 end 139 140 141 real*8 function nwpw_timing(counter) 142 implicit none 143 integer counter 144#include "nwpw_timing_common.fh" 145 146 nwpw_timing = times(counter) 147 return 148 end 149 150 subroutine nwpw_timing_print(msg,time,counter,ttime) 151 implicit none 152 character*(*) msg 153 real*8 time,ttime 154 integer counter 155#include "stdio.fh" 156 if (time>1.0d-9) then 157 write(luout,1708) msg,time,time/dble(counter),100*time/ttime 158 end if 159 1708 FORMAT(A,E14.6,E14.6,F12.1,' %') 160 return 161 end 162 163 subroutine nwpw_timing_print_thr(msg,id,counter,ttime) 164 USE omp_lib 165 implicit none 166#include "nwpw_timing_common.fh" 167 character*(*) msg 168 real*8 time,ttime 169 integer counter,tid,nthr,used_threads,id 170 171 used_threads = 0 172#ifdef USE_OPENMP 173 nthr = omp_get_max_threads() 174#else 175 nthr = 1 176#endif 177 time = 0 178 do tid=1,nthr 179 if (thr_times(id,tid)>1.0d-9) then 180 time = time + thr_times(id,tid) 181 used_threads=used_threads+1 182 end if 183 end do 184 185 if (used_threads>0) then 186 time = time / used_threads 187 if (time>1.0d-9) then 188 write(*,1708) msg,time,time/dble(counter),100*time/ttime 189 end if 190 end if 191 1708 FORMAT(A,E14.6,E14.6,F12.1,' %') 192 return 193 end 194 195 subroutine nwpw_timing_print_thr_max(msg,id,counter,ttime) 196 USE omp_lib 197 implicit none 198#include "nwpw_timing_common.fh" 199 character*(*) msg 200 real*8 time,ttime 201 integer counter,tid,nthr,id 202 203#ifdef USE_OPENMP 204 nthr = omp_get_max_threads() 205#else 206 nthr = 1 207#endif 208 time = 0 209 do tid=1,nthr 210 if (thr_times(id,tid)>1.0d-9) then 211 time = max(time,thr_times(id,tid)) 212 end if 213 end do 214 215 if (time>1.0d-9) then 216 write(*,1708) msg,time,time/dble(counter),100*time/ttime 217 end if 218 1708 FORMAT(A,E14.6,E14.6,F12.1,' %') 219 return 220 end 221 222 223 224 225 subroutine nwpw_timing_print_final(oprint,counter) 226 implicit none 227 logical oprint 228 integer counter 229 230#include "stdio.fh" 231 232 real*8 ttime 233c **** external functions **** 234 real*8 nwpw_timing 235 external nwpw_timing 236 237#include "nwpw_timing_common.fh" 238 239 call current_second(tf) 240 ttime = tf-t0 241 if (oprint) then 242 write(luout,1809) 'Time spent doing ', 243 > 'total','step', 'percent' 244 call nwpw_timing_print( 245 > ' total time : ', 246 > ttime,counter,ttime) 247 call nwpw_timing_print( 248 > ' i/o time : ', 249 > nwpw_timing(50),counter,ttime) 250 call nwpw_timing_print( 251 > ' FFTs : ', 252 > nwpw_timing(1),counter,ttime) 253 call nwpw_timing_print( 254 > ' dot products : ', 255 > nwpw_timing(2),counter,ttime) 256 call nwpw_timing_print( 257 > ' geodesic : ', 258 > nwpw_timing(10),counter,ttime) 259 call nwpw_timing_print( 260 > ' two-electron Gaussian : ', 261 > nwpw_timing(13),counter,ttime) 262 call nwpw_timing_print( 263 > ' ffm_dgemm : ', 264 > nwpw_timing(15),counter,ttime) 265 call nwpw_timing_print( 266 > ' fmf_dgemm : ', 267 > nwpw_timing(16),counter,ttime) 268 call nwpw_timing_print( 269 > ' mmm_dgemm : ', 270 > nwpw_timing(18),counter,ttime) 271 call nwpw_timing_print( 272 > ' m_diagonalize : ', 273 > nwpw_timing(17),counter,ttime) 274 275 call nwpw_timing_print( 276 > ' - m_tredq : ', 277 > nwpw_timing(22),counter,ttime) 278 call nwpw_timing_print( 279 > ' - m_tredq_houseq : ', 280 > nwpw_timing(26),counter,ttime) 281 call nwpw_timing_print( 282 > ' - m_tredq_houseq_dgemm: ', 283 > nwpw_timing(28),counter,ttime) 284 call nwpw_timing_print( 285 > ' - m_tredq_dgemm1 : ', 286 > nwpw_timing(27),counter,ttime) 287 288 call nwpw_timing_print( 289 > ' - m_getdiags : ', 290 > nwpw_timing(23),counter,ttime) 291 call nwpw_timing_print( 292 > ' - m_tqliq : ', 293 > nwpw_timing(24),counter,ttime) 294 call nwpw_timing_print( 295 > ' - m_eigsrt : ', 296 > nwpw_timing(25),counter,ttime) 297 298 call nwpw_timing_print( 299 > ' exchange correlation : ', 300 > nwpw_timing(4),counter,ttime) 301 call nwpw_timing_print( 302 > ' local pseudopotentials : ', 303 > nwpw_timing(5),counter,ttime) 304 call nwpw_timing_print( 305 > ' non-local pseudopotentials : ', 306 > nwpw_timing(6),counter,ttime) 307 call nwpw_timing_print( 308 > ' hartree potentials : ', 309 > nwpw_timing(7),counter,ttime) 310 call nwpw_timing_print( 311 > ' ion-ion interaction : ', 312 > nwpw_timing(21),counter,ttime) 313 call nwpw_timing_print( 314 > ' structure factors : ', 315 > nwpw_timing(8),counter,ttime) 316 call nwpw_timing_print( 317 > ' phase factors : ', 318 > nwpw_timing(20),counter,ttime) 319 call nwpw_timing_print( 320 > ' masking and packing : ', 321 > nwpw_timing(9),counter,ttime) 322 call nwpw_timing_print( 323 > ' queue fft : ', 324 > nwpw_timing(30),counter,ttime) 325 call nwpw_timing_print( 326 > ' queue fft (serial) : ', 327 > nwpw_timing(31),counter,ttime) 328 call nwpw_timing_print( 329 > ' queue fft (message passing): ', 330 > nwpw_timing(32),counter,ttime) 331 call nwpw_timing_print( 332 > ' HFX potential : ', 333 > nwpw_timing(33),counter,ttime) 334 335 call nwpw_timing_print( 336 > ' paw gaussian integrals : ', 337 > nwpw_timing(34),counter,ttime) 338 call nwpw_timing_print( 339 > ' paw cgaussian integrals : ', 340 > nwpw_timing(64),counter,ttime) 341 342 call nwpw_timing_print( 343 > ' paw atomic coulomb : ', 344 > nwpw_timing(35),counter,ttime) 345 call nwpw_timing_print( 346 > ' paw atomic xc : ', 347 > nwpw_timing(36),counter,ttime) 348 call nwpw_timing_print( 349 > ' paw gen dEmult/dQlm : ', 350 > nwpw_timing(37),counter,ttime) 351 call nwpw_timing_print( 352 > ' paw gen dElocal/dQlm : ', 353 > nwpw_timing(38),counter,ttime) 354 call nwpw_timing_print( 355 > ' paw cmp operations : ', 356 > nwpw_timing(38),counter,ttime) 357 call nwpw_timing_print( 358 > ' qmmm LJ : ', 359 > nwpw_timing(40),counter,ttime) 360 call nwpw_timing_print( 361 > ' qmmm residual Q : ', 362 > nwpw_timing(41),counter,ttime) 363 call nwpw_timing_print( 364 > ' MATHIAS InnerLoop : ', 365 > nwpw_timing(42),counter,ttime) 366 call nwpw_timing_print( 367 > ' MATHIAS Phaze : ', 368 > nwpw_timing(43),counter,ttime) 369 call nwpw_timing_print( 370 > ' MATHIAS Pipelined FFTs : ', 371 > nwpw_timing(44),counter,ttime) 372 call nwpw_timing_print( 373 > ' MATHIAS Lagrange : ', 374 > nwpw_timing(45),counter,ttime) 375 call nwpw_timing_print( 376 > ' MATHIAS Exch Corr : ', 377 > nwpw_timing(46),counter,ttime) 378 call nwpw_timing_print( 379 > ' MATHIAS Hpsi : ', 380 > nwpw_timing(47),counter,ttime) 381 call nwpw_timing_print( 382 > ' nwpw_ugauss : ', 383 > nwpw_timing(48),counter,ttime) 384 call nwpw_timing_print( 385 > ' nwpw_wgauss : ', 386 > nwpw_timing(49),counter,ttime) 387 call nwpw_timing_print( 388 > ' nwpw_dwgauss : ', 389 > nwpw_timing(55),counter,ttime) 390 call nwpw_timing_print( 391 > ' nwpw_gaunt : ', 392 > nwpw_timing(51),counter,ttime) 393 call nwpw_timing_print( 394 > ' HFX localization : ', 395 > nwpw_timing(52),counter,ttime) 396 call nwpw_timing_print( 397 > ' HFX Finding DM columns : ', 398 > nwpw_timing(53),counter,ttime) 399 call nwpw_timing_print( 400 > ' HFX DM Cholesky : ', 401 > nwpw_timing(54),counter,ttime) 402 call nwpw_timing_print( 403 > ' HFX localized re-gridding : ', 404 > nwpw_timing(55),counter,ttime) 405 call nwpw_timing_print( 406 > ' non-local psp FFM : ', 407 > nwpw_timing(56),counter,ttime) 408 call nwpw_timing_print( 409 > ' non-local psp FMF : ', 410 > nwpw_timing(57),counter,ttime) 411 call nwpw_timing_print( 412 > ' non-local psp FFM A : ', 413 > nwpw_timing(58),counter,ttime) 414 call nwpw_timing_print( 415 > ' non-local psp FFM B : ', 416 > nwpw_timing(59),counter,ttime) 417 418 call nwpw_timing_print( 419 > ' Timing probe 0 : ', 420 > nwpw_timing(60),counter,ttime) 421 call nwpw_timing_print( 422 > ' Timing probe 1 : ', 423 > nwpw_timing(61),counter,ttime) 424 call nwpw_timing_print( 425 > ' Timing probe 2 : ', 426 > nwpw_timing(62),counter,ttime) 427 call nwpw_timing_print( 428 > ' Timing probe 3 : ', 429 > nwpw_timing(63),counter,ttime) 430 call nwpw_timing_print( 431 > ' Timing probe 4 : ', 432 > nwpw_timing(64),counter,ttime) 433 call nwpw_timing_print( 434 > ' Timing probe 5 : ', 435 > nwpw_timing(65),counter,ttime) 436 437 end if 438 439 440 return 441 1808 FORMAT(A,E14.6,E14.6) 442 1809 FORMAT(//A,3A14) 443 end 444 445