1C 2C This file is part of MUMPS 5.1.2, released 3C on Mon Oct 2 07:37:01 UTC 2017 4C 5C 6C Copyright 1991-2017 CERFACS, CNRS, ENS Lyon, INP Toulouse, Inria, 7C University of Bordeaux. 8C 9C This version of MUMPS is provided to you free of charge. It is 10C released under the CeCILL-C license: 11C http://www.cecill.info/licences/Licence_CeCILL-C_V1-en.html 12C 13 MODULE SMUMPS_LR_STATS 14 USE SMUMPS_LR_TYPE 15 IMPLICIT NONE 16 DOUBLE PRECISION :: ACC_MRY_CB_GAIN, 17 & ACC_MRY_CB_FR, 18 & FRONT_L11_BLR_SAVINGS, 19 & FRONT_U11_BLR_SAVINGS, 20 & FRONT_L21_BLR_SAVINGS, 21 & FRONT_U12_BLR_SAVINGS, 22 & ACC_FR_MRY, 23 & GLOBAL_BLR_SAVINGS, 24 & GLOBAL_MRY_LPRO_COMPR, 25 & GLOBAL_MRY_LTOT_COMPR 26 INTEGER :: CNT_NODES 27 DOUBLE PRECISION :: FLOP_FR_UPDT, 28 & FLOP_LR_UPDT, 29 & FLOP_LR_UPDT_OUT, 30 & FLOP_RMB, 31 & FLOP_FR_TRSM, 32 & FLOP_LR_TRSM, 33 & FLOP_PANEL, 34 & FLOP_TRSM, 35 & FLOP_DEC_ACC, 36 & FLOP_REC_ACC, 37 & FLOP_DEMOTE, 38 & FLOP_CB_DEMOTE, 39 & FLOP_CB_PROMOTE, 40 & LR_FLOP_GAIN 41 DOUBLE PRECISION :: ACC_LR_FLOP_GAIN 42 DOUBLE PRECISION :: ACC_FLOP_FR_FACTO, 43 & ACC_FLOP_LR_FACTO, 44 & ACC_FLOP_FR_TRSM, 45 & ACC_FLOP_LR_TRSM, 46 & ACC_FLOP_FR_UPDT, 47 & ACC_FLOP_LR_UPDT, 48 & ACC_FLOP_LR_UPDT_OUT, 49 & ACC_FLOP_RMB, 50 & ACC_FLOP_DEMOTE, 51 & ACC_FLOP_CB_DEMOTE, 52 & ACC_FLOP_CB_PROMOTE, 53 & ACC_FLOP_TRSM, 54 & ACC_FLOP_DEC_ACC, 55 & ACC_FLOP_REC_ACC, 56 & ACC_FLOP_PANEL, 57 & ACC_FLOP_FRFRONTS, 58 & ACC_FLOP_FR_SOLVE, 59 & ACC_FLOP_LR_SOLVE 60 DOUBLE PRECISION :: FACTOR_PROCESSED_FRACTION 61 INTEGER(KIND=8) :: FACTOR_SIZE 62 DOUBLE PRECISION :: TOTAL_FLOP 63 DOUBLE PRECISION :: BLR_TIME_LRGROUPING 64 DOUBLE PRECISION :: BLR_TIME_SEPGROUPING 65 DOUBLE PRECISION :: BLR_TIME_GETHALO 66 DOUBLE PRECISION :: BLR_TIME_KWAY 67 DOUBLE PRECISION :: BLR_TIME_GNEW 68 DOUBLE PRECISION :: ACC_UPDT_TIME 69 DOUBLE PRECISION :: ACC_RMB_TIME 70 DOUBLE PRECISION :: ACC_UPDT_TIME_OUT 71 DOUBLE PRECISION :: ACC_PROMOTING_TIME 72 DOUBLE PRECISION :: ACC_DEMOTING_TIME 73 DOUBLE PRECISION :: ACC_CB_DEMOTING_TIME 74 DOUBLE PRECISION :: ACC_LR_MODULE_TIME 75 DOUBLE PRECISION :: ACC_TRSM_TIME 76 DOUBLE PRECISION :: ACC_FRPANELS_TIME 77 DOUBLE PRECISION :: ACC_FAC_I_TIME 78 DOUBLE PRECISION :: ACC_FAC_MQ_TIME 79 DOUBLE PRECISION :: ACC_FAC_SQ_TIME 80 DOUBLE PRECISION :: ACC_FRFRONTS_TIME 81 DOUBLE PRECISION :: AVG_ACC_FLOP_LR_FACTO 82 DOUBLE PRECISION :: MIN_ACC_FLOP_LR_FACTO 83 DOUBLE PRECISION :: MAX_ACC_FLOP_LR_FACTO 84 INTEGER :: TOTAL_NBLOCKS_ASS, TOTAL_NBLOCKS_CB 85 INTEGER :: MIN_BLOCKSIZE_ASS, MAX_BLOCKSIZE_ASS 86 INTEGER :: MIN_BLOCKSIZE_CB, MAX_BLOCKSIZE_CB 87 DOUBLE PRECISION :: AVG_BLOCKSIZE_ASS, AVG_BLOCKSIZE_CB 88 INTEGER, POINTER :: STEP_STATS(:) 89 CONTAINS 90 SUBROUTINE COLLECT_BLOCKSIZES(CUT,NPARTSASS,NPARTSCB) 91 INTEGER, INTENT(IN) :: NPARTSASS, NPARTSCB 92 INTEGER, POINTER, DIMENSION(:) :: CUT 93 INTEGER :: LOC_MIN_ASS, LOC_MIN_CB, LOC_MAX_ASS, LOC_MAX_CB, 94 & LOC_TOT_ASS, LOC_TOT_CB 95 DOUBLE PRECISION :: LOC_AVG_ASS, LOC_AVG_CB 96 INTEGER :: I 97 LOC_TOT_ASS = 0 98 LOC_TOT_CB = 0 99 LOC_AVG_ASS = 0.D0 100 LOC_AVG_CB = 0.D0 101 LOC_MIN_ASS = 100000 102 LOC_MIN_CB = 100000 103 LOC_MAX_ASS = 0 104 LOC_MAX_CB = 0 105 DO I = 1,NPARTSASS 106 LOC_AVG_ASS = ( LOC_TOT_ASS * LOC_AVG_ASS 107 & + CUT(I+1) - CUT(I) ) 108 & / (LOC_TOT_ASS + 1) 109 LOC_TOT_ASS = LOC_TOT_ASS + 1 110 IF (CUT(I+1) - CUT(I) .LE. LOC_MIN_ASS) THEN 111 LOC_MIN_ASS = CUT(I+1) - CUT(I) 112 END IF 113 IF (CUT(I+1) - CUT(I) .GE. LOC_MAX_ASS) THEN 114 LOC_MAX_ASS = CUT(I+1) - CUT(I) 115 END IF 116 END DO 117 DO I = NPARTSASS+1,NPARTSASS+NPARTSCB 118 LOC_AVG_CB = ( LOC_TOT_CB * LOC_AVG_CB 119 & + CUT(I+1) - CUT(I) ) 120 & / (LOC_TOT_CB + 1) 121 LOC_TOT_CB = LOC_TOT_CB + 1 122 IF (CUT(I+1) - CUT(I) .LE. LOC_MIN_CB) THEN 123 LOC_MIN_CB = CUT(I+1) - CUT(I) 124 END IF 125 IF (CUT(I+1) - CUT(I) .GE. LOC_MAX_CB) THEN 126 LOC_MAX_CB = CUT(I+1) - CUT(I) 127 END IF 128 END DO 129 AVG_BLOCKSIZE_ASS = (TOTAL_NBLOCKS_ASS*AVG_BLOCKSIZE_ASS 130 & + LOC_TOT_ASS*LOC_AVG_ASS) / (TOTAL_NBLOCKS_ASS+LOC_TOT_ASS) 131 AVG_BLOCKSIZE_CB = (TOTAL_NBLOCKS_CB*AVG_BLOCKSIZE_CB 132 & + LOC_TOT_CB*LOC_AVG_CB) / (TOTAL_NBLOCKS_CB+LOC_TOT_CB) 133 TOTAL_NBLOCKS_ASS = TOTAL_NBLOCKS_ASS + LOC_TOT_ASS 134 TOTAL_NBLOCKS_CB = TOTAL_NBLOCKS_CB + LOC_TOT_CB 135 MIN_BLOCKSIZE_ASS = min(MIN_BLOCKSIZE_ASS,LOC_MIN_ASS) 136 MIN_BLOCKSIZE_CB = min(MIN_BLOCKSIZE_CB,LOC_MIN_CB) 137 MAX_BLOCKSIZE_ASS = max(MAX_BLOCKSIZE_ASS,LOC_MAX_ASS) 138 MAX_BLOCKSIZE_CB = max(MAX_BLOCKSIZE_CB,LOC_MAX_CB) 139 END SUBROUTINE COLLECT_BLOCKSIZES 140 SUBROUTINE UPDATE_ALL_TIMES(INODE, LOC_FACTO_TIME, 141 & LOC_PROMOTING_TIME, LOC_DEMOTING_TIME, LOC_CB_DEMOTING_TIME, 142 & LOC_FRPANELS_TIME, LOC_FRFRONTS_TIME, 143 & LOC_TRSM_TIME, LOC_LR_MODULE_TIME, 144 & LOC_FAC_I_TIME, LOC_FAC_MQ_TIME, LOC_FAC_SQ_TIME) 145 INTEGER, INTENT(IN) :: INODE 146 DOUBLE PRECISION, INTENT(IN) :: LOC_FACTO_TIME, 147 & LOC_PROMOTING_TIME, LOC_DEMOTING_TIME, 148 & LOC_CB_DEMOTING_TIME, LOC_FRPANELS_TIME, 149 & LOC_FRFRONTS_TIME, LOC_TRSM_TIME, LOC_LR_MODULE_TIME, 150 & LOC_FAC_I_TIME, LOC_FAC_MQ_TIME, LOC_FAC_SQ_TIME 151 ACC_UPDT_TIME = ACC_UPDT_TIME + LOC_FACTO_TIME 152 ACC_PROMOTING_TIME = ACC_PROMOTING_TIME + LOC_PROMOTING_TIME 153 ACC_DEMOTING_TIME = ACC_DEMOTING_TIME + LOC_DEMOTING_TIME 154 ACC_CB_DEMOTING_TIME = ACC_CB_DEMOTING_TIME + 155 & LOC_CB_DEMOTING_TIME 156 ACC_FRPANELS_TIME = ACC_FRPANELS_TIME + LOC_FRPANELS_TIME 157 ACC_FAC_I_TIME = ACC_FAC_I_TIME + LOC_FAC_I_TIME 158 ACC_FAC_MQ_TIME = ACC_FAC_MQ_TIME + LOC_FAC_MQ_TIME 159 ACC_FAC_SQ_TIME = ACC_FAC_SQ_TIME + LOC_FAC_SQ_TIME 160 ACC_FRFRONTS_TIME = ACC_FRFRONTS_TIME + LOC_FRFRONTS_TIME 161 ACC_TRSM_TIME = ACC_TRSM_TIME + LOC_TRSM_TIME 162 ACC_LR_MODULE_TIME = ACC_LR_MODULE_TIME + LOC_LR_MODULE_TIME 163 END SUBROUTINE UPDATE_ALL_TIMES 164 SUBROUTINE UPDATE_CB_DEMOTING_TIME(INODE, LOC_CB_DEMOTING_TIME) 165 INTEGER, INTENT(IN) :: INODE 166 DOUBLE PRECISION, INTENT(IN) :: LOC_CB_DEMOTING_TIME 167 ACC_CB_DEMOTING_TIME = ACC_CB_DEMOTING_TIME + 168 & LOC_CB_DEMOTING_TIME 169 END SUBROUTINE UPDATE_CB_DEMOTING_TIME 170 SUBROUTINE UPDATE_UPDT_TIME(INODE, LOC_UPDT_TIME) 171 INTEGER, INTENT(IN) :: INODE 172 DOUBLE PRECISION, INTENT(IN) :: LOC_UPDT_TIME 173 ACC_UPDT_TIME = ACC_UPDT_TIME + LOC_UPDT_TIME 174 END SUBROUTINE UPDATE_UPDT_TIME 175 SUBROUTINE UPDATE_UPDT_TIME_OUT(LOC_UPDT_TIME_OUT) 176 DOUBLE PRECISION, INTENT(IN) :: LOC_UPDT_TIME_OUT 177 ACC_UPDT_TIME_OUT = ACC_UPDT_TIME_OUT + LOC_UPDT_TIME_OUT 178 END SUBROUTINE UPDATE_UPDT_TIME_OUT 179 SUBROUTINE UPDATE_RMB_TIME(LOC_RMB_TIME) 180 DOUBLE PRECISION, INTENT(IN) :: LOC_RMB_TIME 181 ACC_RMB_TIME = ACC_RMB_TIME + LOC_RMB_TIME 182 END SUBROUTINE UPDATE_RMB_TIME 183 SUBROUTINE UPDATE_PROMOTING_TIME(INODE, LOC_PROMOTING_TIME) 184 INTEGER, INTENT(IN) :: INODE 185 DOUBLE PRECISION, INTENT(IN) :: LOC_PROMOTING_TIME 186 ACC_PROMOTING_TIME = ACC_PROMOTING_TIME + 187 & LOC_PROMOTING_TIME 188 END SUBROUTINE UPDATE_PROMOTING_TIME 189 SUBROUTINE UPDATE_FLOP_STATS_CB_PROMOTE(COST, NIV) 190 DOUBLE PRECISION :: COST 191 INTEGER :: NIV 192 IF (NIV.EQ.1) THEN 193!$OMP CRITICAL(cb_flop_cost_pro_cri) 194 FLOP_CB_PROMOTE = FLOP_CB_PROMOTE + COST 195!$OMP END CRITICAL(cb_flop_cost_pro_cri) 196 ELSE 197!$OMP CRITICAL(acc_cb_flop_cost_pro_cri) 198 ACC_FLOP_CB_PROMOTE = ACC_FLOP_CB_PROMOTE + COST 199!$OMP END CRITICAL(acc_cb_flop_cost_pro_cri) 200 ENDIF 201 END SUBROUTINE UPDATE_FLOP_STATS_CB_PROMOTE 202 SUBROUTINE UPDATE_FLOP_STATS_CB_DEMOTE(COST, NIV) 203 DOUBLE PRECISION :: COST 204 INTEGER :: NIV 205 IF (NIV.EQ.1) THEN 206!$OMP CRITICAL(cb_flop_cost_dem_cri) 207 FLOP_CB_DEMOTE = FLOP_CB_DEMOTE + COST 208!$OMP END CRITICAL(cb_flop_cost_dem_cri) 209 ELSE 210!$OMP CRITICAL(acc_cb_flop_cost_dem_cri) 211 ACC_FLOP_CB_DEMOTE = ACC_FLOP_CB_DEMOTE + COST 212!$OMP END CRITICAL(acc_cb_flop_cost_dem_cri) 213 ENDIF 214 END SUBROUTINE UPDATE_FLOP_STATS_CB_DEMOTE 215 SUBROUTINE UPDATE_FLOP_STATS_DEMOTE(LR_B, NIV, REC_ACC) 216 TYPE(LRB_TYPE),INTENT(IN) :: LR_B 217 INTEGER(8) :: M,N,K 218 INTEGER :: NIV 219 DOUBLE PRECISION :: HR_COST,BUILDQ_COST 220 LOGICAL, OPTIONAL :: REC_ACC 221 M = int(LR_B%M,8) 222 N = int(LR_B%N,8) 223 K = int(LR_B%K,8) 224 HR_COST = dble(4_8*K*K*K/3_8 + 4_8*K*M*N - 2_8*(M+N)*K*K) 225 IF (LR_B%ISLR) THEN 226 BUILDQ_COST = dble(4_8*K*K*M - K*K*K) 227 ELSE 228 BUILDQ_COST = 0.0d0 229 END IF 230 IF (NIV .EQ. 1) THEN 231!$OMP CRITICAL(lr_flop_gain_cri) 232 FLOP_DEMOTE = FLOP_DEMOTE + HR_COST + BUILDQ_COST 233 IF (present(REC_ACC)) THEN 234 IF (REC_ACC) THEN 235 FLOP_REC_ACC = FLOP_REC_ACC + HR_COST+BUILDQ_COST 236 ENDIF 237 ENDIF 238!$OMP END CRITICAL(lr_flop_gain_cri) 239 ELSE 240!$OMP CRITICAL(lr_flop_gain_cri) 241 ACC_FLOP_DEMOTE = ACC_FLOP_DEMOTE + (HR_COST + BUILDQ_COST) 242 IF (present(REC_ACC)) THEN 243 IF (REC_ACC) THEN 244 ACC_FLOP_REC_ACC = ACC_FLOP_REC_ACC +HR_COST+BUILDQ_COST 245 ENDIF 246 ENDIF 247!$OMP END CRITICAL(lr_flop_gain_cri) 248 ENDIF 249 END SUBROUTINE UPDATE_FLOP_STATS_DEMOTE 250 SUBROUTINE UPDATE_FLOP_STATS_REC_ACC(LR_B, NIV, K1, K2, BUILDQ1) 251 TYPE(LRB_TYPE),INTENT(IN) :: LR_B 252 INTEGER,INTENT(IN) :: NIV, K1, K2 253 LOGICAL,INTENT(IN) :: BUILDQ1 254 INTEGER(8) :: M,N,K 255 DOUBLE PRECISION :: HR_COST, BUILDQ_COST, GS_COST, UPDT_COST, 256 & TOT_COST 257 M = int(LR_B%M,8) 258 N = int(LR_B%N,8) 259 K = int(LR_B%K - K1,8) 260 GS_COST = dble((4_8*(K1)+1_8)*M*K2) 261 HR_COST = dble(4_8*K*K*K/3_8 + 4_8*K*M*K2 - 2_8*(M+K2)*K*K) 262 IF (BUILDQ1) THEN 263 BUILDQ_COST = dble(4_8*K*K*M - K*K*K) 264 UPDT_COST = dble(2_8*K*K2*N) 265 ELSE 266 BUILDQ_COST = 0.0d0 267 UPDT_COST = 0.0d0 268 ENDIF 269 TOT_COST = BUILDQ_COST + HR_COST + GS_COST + UPDT_COST 270 IF (NIV .EQ. 1) THEN 271!$OMP CRITICAL(lr_flop_gain_cri) 272 FLOP_DEMOTE = FLOP_DEMOTE + TOT_COST 273 FLOP_REC_ACC = FLOP_REC_ACC + TOT_COST 274!$OMP END CRITICAL(lr_flop_gain_cri) 275 ELSE 276!$OMP CRITICAL(lr_flop_gain_cri) 277 ACC_FLOP_DEMOTE = ACC_FLOP_DEMOTE + TOT_COST 278 ACC_FLOP_REC_ACC = ACC_FLOP_REC_ACC + TOT_COST 279!$OMP END CRITICAL(lr_flop_gain_cri) 280 ENDIF 281 END SUBROUTINE UPDATE_FLOP_STATS_REC_ACC 282 SUBROUTINE UPDATE_FLOP_STATS_PANEL(NFRONT, NPIV, NIV, SYM) 283 INTEGER :: NFRONT, NPIV, NIV, SYM 284 DOUBLE PRECISION :: COST_PANEL, COST_TRSM 285 IF (SYM.EQ.0) THEN 286 COST_TRSM = dble(2 * NPIV-1) * dble(NPIV) 287 & * dble(NFRONT-NPIV) 288 COST_PANEL = dble(NPIV) * dble(NPIV - 1) 289 & * dble(4 * NPIV + 1)/dble(6) 290 ELSE 291 COST_TRSM = dble(NPIV) * dble(NPIV) * dble(NFRONT-NPIV) 292 COST_PANEL = dble(NPIV) * dble(NPIV - 1) 293 & * dble(2 * NPIV + 1)/dble(6) 294 ENDIF 295 IF (NIV .EQ. 1) THEN 296!$OMP CRITICAL(lr_flop_gain_cri) 297 FLOP_PANEL = FLOP_PANEL + COST_PANEL 298 FLOP_TRSM = FLOP_TRSM + COST_TRSM 299!$OMP END CRITICAL(lr_flop_gain_cri) 300 ELSE 301!$OMP CRITICAL(lr_flop_gain_cri) 302 ACC_FLOP_PANEL = ACC_FLOP_PANEL + COST_PANEL 303 ACC_FLOP_TRSM = ACC_FLOP_TRSM + COST_TRSM 304!$OMP END CRITICAL(lr_flop_gain_cri) 305 ENDIF 306 END SUBROUTINE UPDATE_FLOP_STATS_PANEL 307 SUBROUTINE UPDATE_FLOP_STATS_TRSM(LRB, NIV, LorU, K470) 308 TYPE(LRB_TYPE),INTENT(IN) :: LRB 309 INTEGER,INTENT(IN) :: NIV, LorU, K470 310 DOUBLE PRECISION :: LR_FLOP_COST, FR_FLOP_COST 311 IF (LorU.EQ.0) THEN 312 FR_FLOP_COST = dble(LRB%M)*dble(LRB%N)*dble(LRB%N) 313 IF (LRB%ISLR) THEN 314 LR_FLOP_COST = dble(LRB%K)*dble(LRB%N)*dble(LRB%N) 315 ELSE 316 LR_FLOP_COST = FR_FLOP_COST 317 ENDIF 318 ELSE 319 IF (K470.EQ.1) THEN 320 FR_FLOP_COST = dble(LRB%M-1)*dble(LRB%N)*dble(LRB%N) 321 ELSE 322 FR_FLOP_COST = dble(LRB%M-1)*dble(LRB%M)*dble(LRB%N) 323 ENDIF 324 IF (LRB%ISLR) THEN 325 IF (K470.EQ.1) THEN 326 LR_FLOP_COST = dble(LRB%N-1)*dble(LRB%N)*dble(LRB%K) 327 ELSE 328 LR_FLOP_COST = dble(LRB%M-1)*dble(LRB%M)*dble(LRB%K) 329 ENDIF 330 ELSE 331 LR_FLOP_COST = FR_FLOP_COST 332 ENDIF 333 ENDIF 334 IF (NIV .EQ. 1) THEN 335!$OMP CRITICAL(lr_flop_gain_cri) 336 FLOP_FR_TRSM = FLOP_FR_TRSM + FR_FLOP_COST 337 FLOP_LR_TRSM = FLOP_LR_TRSM + LR_FLOP_COST 338 LR_FLOP_GAIN = LR_FLOP_GAIN + FR_FLOP_COST 339 & - LR_FLOP_COST 340!$OMP END CRITICAL(lr_flop_gain_cri) 341 ELSE 342!$OMP CRITICAL(lr_flop_gain_cri) 343 ACC_FLOP_FR_TRSM = ACC_FLOP_FR_TRSM + FR_FLOP_COST 344 ACC_FLOP_LR_TRSM = ACC_FLOP_LR_TRSM + LR_FLOP_COST 345 ACC_LR_FLOP_GAIN = ACC_LR_FLOP_GAIN + FR_FLOP_COST 346 & - LR_FLOP_COST 347!$OMP END CRITICAL(lr_flop_gain_cri) 348 END IF 349 END SUBROUTINE UPDATE_FLOP_STATS_TRSM 350 SUBROUTINE UPDATE_FLOP_STATS_LRB_PRODUCT(LRB1, LRB2, TRANSB1, 351 & TRANSB2, NIV, COMPRESS_MID_PRODUCT, RANK_IN, BUILDQ, 352 & IS_DIAG, K480, REC_ACC_IN) 353!$ USE OMP_LIB 354 TYPE(LRB_TYPE),INTENT(IN) :: LRB1,LRB2 355 CHARACTER(len=1), INTENT(IN) :: TRANSB1, TRANSB2 356 LOGICAL, INTENT(IN), OPTIONAL :: BUILDQ, IS_DIAG, REC_ACC_IN 357 INTEGER, INTENT(IN), OPTIONAL :: NIV, RANK_IN, 358 & COMPRESS_MID_PRODUCT, K480 359 LOGICAL :: REC_ACC 360 DOUBLE PRECISION :: LR_FLOP_COST, LR_FLOP_COST_OUT, FR_FLOP_COST 361 DOUBLE PRECISION :: HR_COST, BUILDQ_COST 362 DOUBLE PRECISION :: M1,N1,K1,M2,N2,K2,RANK 363 CHARACTER(len=2) :: PROD, TRANS 364 IF(present(K480).AND.present(REC_ACC_IN)) THEN 365 IF (K480.GE.4) THEN 366 REC_ACC = REC_ACC_IN 367 ELSE 368 REC_ACC = .FALSE. 369 ENDIF 370 ELSE 371 REC_ACC = .FALSE. 372 ENDIF 373 M1 = dble(LRB1%M) 374 N1 = dble(LRB1%N) 375 K1 = dble(LRB1%K) 376 M2 = dble(LRB2%M) 377 N2 = dble(LRB2%N) 378 K2 = dble(LRB2%K) 379 RANK = dble(RANK_IN) 380 IF ((LRB1%LRFORM==0).AND.(LRB2%LRFORM==0)) THEN 381 PROD = '00' 382 ELSE IF ((LRB1%LRFORM==1).AND.(LRB2%LRFORM==0)) THEN 383 PROD = '10' 384 ELSE IF ((LRB1%LRFORM==0).AND.(LRB2%LRFORM==1)) THEN 385 PROD = '01' 386 ELSE 387 PROD = '11' 388 END IF 389 IF ((TRANSB1=='N').AND.(TRANSB2=='N')) THEN 390 TRANS = 'NN' 391 ELSE IF ((TRANSB1=='T').AND.(TRANSB2=='N')) THEN 392 TRANS = 'TN' 393 ELSE IF ((TRANSB1=='N').AND.(TRANSB2=='T')) THEN 394 TRANS = 'NT' 395 ELSE 396 TRANS = 'TT' 397 END IF 398 LR_FLOP_COST_OUT = 0.0D0 399 HR_COST = 0.0D0 400 BUILDQ_COST = 0.0D0 401 SELECT CASE (PROD) 402 CASE('00') 403 SELECT CASE (TRANS) 404 CASE('NN') 405 FR_FLOP_COST = 2.0D0*M1*N2*N1 406 LR_FLOP_COST = 2.0D0*M1*N2*N1 407 CASE('TN') 408 FR_FLOP_COST = 2.0D0*N1*N2*M1 409 LR_FLOP_COST = 2.0D0*M1*N2*N1 410 CASE('NT') 411 FR_FLOP_COST = 2.0D0*M1*M2*N1 412 LR_FLOP_COST = 2.0D0*M1*M2*N1 413 CASE('TT') 414 FR_FLOP_COST = 2.0D0*N1*M2*M1 415 LR_FLOP_COST = 2.0D0*N1*M2*M1 416 END SELECT 417 CASE('10') 418 SELECT CASE (TRANS) 419 CASE('NN') 420 FR_FLOP_COST = 2.0D0*M1*N2*N1 421 LR_FLOP_COST = 2.0D0*K1*N2*N1 + 2.0D0*M1*N2*K1 422 LR_FLOP_COST_OUT = 2.0D0*M1*N2*K1 423 CASE('TN') 424 FR_FLOP_COST = 2.0D0*N1*N2*M1 425 LR_FLOP_COST = 2.0D0*K1*N2*M1 + 2.0D0*N1*N2*K1 426 LR_FLOP_COST_OUT = 2.0D0*N1*N2*K1 427 CASE('NT') 428 FR_FLOP_COST = 2.0D0*M1*M2*N1 429 LR_FLOP_COST = 2.0D0*K1*M2*N1 + 2.0D0*M1*M2*K1 430 LR_FLOP_COST_OUT = 2.0D0*M1*M2*K1 431 CASE('TT') 432 FR_FLOP_COST = 2.0D0*N1*M2*M1 433 LR_FLOP_COST = 2.0D0*K1*M2*M1 + 2.0D0*N1*M2*K1 434 LR_FLOP_COST_OUT = 2.0D0*N1*M2*K1 435 END SELECT 436 CASE('01') 437 SELECT CASE (TRANS) 438 CASE('NN') 439 FR_FLOP_COST = 2.0D0*M1*N2*N1 440 LR_FLOP_COST = 2.0D0*M1*K2*N1 + 2.0D0*M1*N2*K2 441 LR_FLOP_COST_OUT = 2.0D0*M1*N2*K2 442 CASE('TN') 443 FR_FLOP_COST = 2.0D0*N1*N2*M1 444 LR_FLOP_COST = 2.0D0*N1*K2*M1 + 2.0D0*N1*N2*K2 445 LR_FLOP_COST_OUT = 2.0D0*N1*N2*K2 446 CASE('NT') 447 FR_FLOP_COST = 2.0D0*M1*M2*N1 448 LR_FLOP_COST = 2.0D0*M1*K2*N1 + 2.0D0*M1*M2*K2 449 LR_FLOP_COST_OUT = 2.0D0*M1*M2*K2 450 CASE('TT') 451 FR_FLOP_COST = 2*N1*M2*M1 452 LR_FLOP_COST = 2.0D0*N1*K2*M1 + 2.0D0*N1*M2*K2 453 LR_FLOP_COST_OUT = 2.0D0*N1*M2*K2 454 END SELECT 455 CASE('11') 456 IF (COMPRESS_MID_PRODUCT.GE.1) THEN 457 HR_COST = 4.0D0*RANK*RANK*RANK/3.0D0 + 458 & 4.0D0*RANK*K1*K2 - 459 & 2.0D0*(K1+K2)*RANK*RANK 460 IF (BUILDQ) THEN 461 BUILDQ_COST = 4.0D0*RANK*RANK*K1 - RANK*RANK*RANK 462 ENDIF 463 ENDIF 464 SELECT CASE (TRANS) 465 CASE('NN') 466 FR_FLOP_COST = 2.0D0*M1*N2*N1 467 IF ((COMPRESS_MID_PRODUCT.GE.1).AND.BUILDQ) THEN 468 LR_FLOP_COST = 2.0D0*K1*K2*N1 + 469 & 2.0D0*K1*M1*RANK + 2.0D0*K2*N2*RANK + 470 & 2.0D0*M1*N2*RANK 471 LR_FLOP_COST_OUT = 2.0D0*M1*N2*RANK 472 ELSE 473 IF (K1 .GE. K2) THEN 474 LR_FLOP_COST = 2.0D0*K1*K2*N1 + 475 & 2.0D0*K1*M1*K2 + 2.0D0*M1*N2*K2 476 LR_FLOP_COST_OUT = 2.0D0*M1*N2*K2 477 ELSE 478 LR_FLOP_COST = 2.0D0*K1*K2*N1 + 479 & 2.0D0*K1*N2*K2 + 2.0D0*M1*N2*K1 480 LR_FLOP_COST_OUT = 2.0D0*M1*N2*K1 481 ENDIF 482 ENDIF 483 CASE('TN') 484 FR_FLOP_COST = 2.0D0*N1*N2*M1 485 IF ((COMPRESS_MID_PRODUCT.GE.1).AND.BUILDQ) THEN 486 LR_FLOP_COST = 2.0D0*K1*K2*M1 + 487 & 2.0D0*K1*N1*RANK + 2.0D0*K2*N2*RANK + 488 & 2.0D0*N1*N2*RANK 489 LR_FLOP_COST_OUT = 2.0D0*N1*N2*RANK 490 ELSE 491 IF (K1 .GE. K2) THEN 492 LR_FLOP_COST = 2.0D0*K1*K2*M1 + 493 & 2.0D0*K1*N1*K2 + 2.0D0*N1*N2*K2 494 LR_FLOP_COST_OUT = 2.0D0*N1*N2*K2 495 ELSE 496 LR_FLOP_COST = 2.0D0*K1*K2*M1 + 497 & 2.0D0*K1*N2*K2 + 2.0D0*N1*N2*K1 498 LR_FLOP_COST_OUT = 2.0D0*N1*N2*K1 499 ENDIF 500 ENDIF 501 CASE('NT') 502 FR_FLOP_COST = 2.0D0*M1*M2*N1 503 IF ((COMPRESS_MID_PRODUCT.GE.1).AND.BUILDQ) THEN 504 LR_FLOP_COST = 2.0D0*K1*K2*N1 + 505 & 2.0D0*K1*M1*RANK + 2.0D0*K2*M2*RANK + 506 & 2.0D0*M1*M2*RANK 507 LR_FLOP_COST_OUT = 2.0D0*M1*M2*RANK 508 ELSE 509 IF (K1 .GE. K2) THEN 510 LR_FLOP_COST = 2.0D0*K1*K2*N1 + 511 & 2.0D0*K1*M1*K2 + 2.0D0*M1*M2*K2 512 LR_FLOP_COST_OUT = 2.0D0*M1*M2*K2 513 ELSE 514 LR_FLOP_COST = 2.0D0*K1*K2*N1 + 515 & 2.0D0*K1*M2*K2 + 2.0D0*M1*M2*K1 516 LR_FLOP_COST_OUT = 2.0D0*M1*M2*K1 517 ENDIF 518 ENDIF 519 CASE('TT') 520 FR_FLOP_COST = 2.0D0*N1*M2*M1 521 IF ((COMPRESS_MID_PRODUCT.GE.1).AND.BUILDQ) THEN 522 LR_FLOP_COST = 2.0D0*K1*K2*M1 + 523 & 2.0D0*K1*N1*RANK + 2.0D0*K2*M2*RANK + 524 & 2.0D0*N1*M2*RANK 525 LR_FLOP_COST_OUT = 2.0D0*N1*M2*RANK 526 ELSE 527 IF (K1 .GE. K2) THEN 528 LR_FLOP_COST = 2.0D0*K1*K2*M1 + 529 & 2.0D0*K1*N1*K2 + 2.0D0*N1*M2*K2 530 LR_FLOP_COST_OUT = 2.0D0*N1*M2*K2 531 ELSE 532 LR_FLOP_COST = 2.0D0*K1*K2*M1 + 533 & 2.0D0*K1*M2*K2 + 2.0D0*N1*M2*K1 534 LR_FLOP_COST_OUT = 2.0D0*N1*M2*K1 535 ENDIF 536 ENDIF 537 END SELECT 538 END SELECT 539 IF (present(IS_DIAG)) THEN 540 IF (IS_DIAG) THEN 541 FR_FLOP_COST = FR_FLOP_COST/2.0D0 542 LR_FLOP_COST = LR_FLOP_COST/2.0D0 543 ENDIF 544 ENDIF 545 IF (present(K480)) THEN 546 IF (K480.GE.3) THEN 547 LR_FLOP_COST = LR_FLOP_COST - LR_FLOP_COST_OUT 548 LR_FLOP_COST_OUT = 0.0D0 549 IF (REC_ACC) THEN 550 IF (NIV .EQ. 1) THEN 551!$OMP CRITICAL(lr_flop_gain_cri) 552 FLOP_REC_ACC = FLOP_REC_ACC + LR_FLOP_COST 553 & + HR_COST + BUILDQ_COST 554 FLOP_DEMOTE = FLOP_DEMOTE + LR_FLOP_COST 555 & + HR_COST + BUILDQ_COST 556!$OMP END CRITICAL(lr_flop_gain_cri) 557 ELSE 558!$OMP CRITICAL(lr_flop_gain_cri) 559 ACC_FLOP_REC_ACC = ACC_FLOP_REC_ACC + LR_FLOP_COST 560 & + HR_COST + BUILDQ_COST 561 ACC_FLOP_DEMOTE = ACC_FLOP_DEMOTE + LR_FLOP_COST 562 & + HR_COST + BUILDQ_COST 563!$OMP END CRITICAL(lr_flop_gain_cri) 564 ENDIF 565 ENDIF 566 ENDIF 567 ENDIF 568 IF (.NOT.REC_ACC) THEN 569 IF (NIV .EQ. 1) THEN 570!$OMP CRITICAL(lr_flop_gain_cri) 571 LR_FLOP_GAIN = LR_FLOP_GAIN + FR_FLOP_COST - LR_FLOP_COST 572 FLOP_FR_UPDT = FLOP_FR_UPDT + FR_FLOP_COST 573 FLOP_LR_UPDT = FLOP_LR_UPDT + LR_FLOP_COST 574 FLOP_LR_UPDT_OUT = FLOP_LR_UPDT_OUT + LR_FLOP_COST_OUT 575 FLOP_DEMOTE = FLOP_DEMOTE + HR_COST + BUILDQ_COST 576 FLOP_RMB = FLOP_RMB + HR_COST + BUILDQ_COST 577!$OMP END CRITICAL(lr_flop_gain_cri) 578 ELSE 579!$OMP CRITICAL(lr_flop_gain_cri) 580 ACC_LR_FLOP_GAIN = ACC_LR_FLOP_GAIN + 581 & FR_FLOP_COST - LR_FLOP_COST 582 ACC_FLOP_FR_UPDT = ACC_FLOP_FR_UPDT + FR_FLOP_COST 583 ACC_FLOP_LR_UPDT = ACC_FLOP_LR_UPDT + LR_FLOP_COST 584 ACC_FLOP_LR_UPDT_OUT = ACC_FLOP_LR_UPDT_OUT + 585 & LR_FLOP_COST_OUT 586 ACC_FLOP_DEMOTE = ACC_FLOP_DEMOTE + HR_COST + BUILDQ_COST 587 ACC_FLOP_RMB = ACC_FLOP_RMB + HR_COST + BUILDQ_COST 588!$OMP END CRITICAL(lr_flop_gain_cri) 589 ENDIF 590 ENDIF 591 END SUBROUTINE UPDATE_FLOP_STATS_LRB_PRODUCT 592 SUBROUTINE UPDATE_FLOP_STATS_DEC_ACC(LRB, NIV) 593 TYPE(LRB_TYPE),INTENT(IN) :: LRB 594 INTEGER,INTENT(IN) :: NIV 595 DOUBLE PRECISION :: FLOP_COST 596 FLOP_COST = 2.0D0*dble(LRB%M)*dble(LRB%N)*dble(LRB%K) 597 IF (NIV .EQ. 1) THEN 598!$OMP CRITICAL(lr_flop_gain_cri) 599 LR_FLOP_GAIN = LR_FLOP_GAIN - FLOP_COST 600 FLOP_LR_UPDT = FLOP_LR_UPDT + FLOP_COST 601 FLOP_LR_UPDT_OUT = FLOP_LR_UPDT_OUT + FLOP_COST 602 FLOP_DEC_ACC = FLOP_DEC_ACC + FLOP_COST 603!$OMP END CRITICAL(lr_flop_gain_cri) 604 ELSE 605!$OMP CRITICAL(lr_flop_gain_cri) 606 ACC_LR_FLOP_GAIN = ACC_LR_FLOP_GAIN - FLOP_COST 607 ACC_FLOP_LR_UPDT = ACC_FLOP_LR_UPDT + FLOP_COST 608 ACC_FLOP_LR_UPDT_OUT = ACC_FLOP_LR_UPDT_OUT + 609 & FLOP_COST 610 ACC_FLOP_DEC_ACC = ACC_FLOP_DEC_ACC + FLOP_COST 611!$OMP END CRITICAL(lr_flop_gain_cri) 612 ENDIF 613 END SUBROUTINE UPDATE_FLOP_STATS_DEC_ACC 614 SUBROUTINE UPDATE_FLOPS_STATS_ROOT(KEEP50, NFRONT, NPIV, 615 & NPROW, NPCOL, MYID) 616 INTEGER, intent(in) :: KEEP50, NFRONT, NPIV, 617 & NPROW, NPCOL, MYID 618 DOUBLE PRECISION :: COST, COST_PER_PROC 619 INTEGER, PARAMETER :: LEVEL3 = 3 620 CALL MUMPS_GET_FLOPS_COST(NFRONT, NPIV, NFRONT, KEEP50, LEVEL3, 621 & COST) 622 COST_PER_PROC = dble(int( COST,8) / int(NPROW * NPCOL,8)) 623 ACC_FLOP_FRFRONTS = ACC_FLOP_FRFRONTS + COST_PER_PROC 624 RETURN 625 END SUBROUTINE UPDATE_FLOPS_STATS_ROOT 626 SUBROUTINE INIT_STATS_FRONT(NFRONT,INODE,NASS,NCB) 627 INTEGER,INTENT(IN) :: NFRONT,INODE,NASS,NCB 628 FRONT_L11_BLR_SAVINGS = 0.D0 629 FRONT_U11_BLR_SAVINGS = 0.D0 630 FRONT_L21_BLR_SAVINGS = 0.D0 631 FRONT_U12_BLR_SAVINGS = 0.D0 632 LR_FLOP_GAIN = 0.D0 633 FLOP_CB_DEMOTE = 0.D0 634 FLOP_CB_PROMOTE = 0.D0 635 FLOP_FR_UPDT = 0.D0 636 FLOP_LR_UPDT = 0.D0 637 FLOP_LR_UPDT_OUT = 0.D0 638 FLOP_RMB = 0.D0 639 FLOP_FR_TRSM = 0.D0 640 FLOP_LR_TRSM = 0.D0 641 FLOP_DEMOTE = 0.D0 642 FLOP_DEC_ACC = 0.D0 643 FLOP_REC_ACC = 0.D0 644 FLOP_PANEL = 0.D0 645 FLOP_TRSM = 0.D0 646 END SUBROUTINE INIT_STATS_FRONT 647 SUBROUTINE INIT_STATS_GLOBAL(id) 648 use SMUMPS_STRUC_DEF 649 TYPE (SMUMPS_STRUC), TARGET :: id 650 ACC_MRY_CB_GAIN = 0.D0 651 ACC_MRY_CB_FR = 0.D0 652 ACC_FLOP_CB_DEMOTE = 0.D0 653 ACC_FLOP_CB_PROMOTE = 0.D0 654 ACC_FLOP_FR_FACTO = 0.D0 655 ACC_FLOP_LR_FACTO = 0.D0 656 ACC_FLOP_FR_UPDT = 0.D0 657 ACC_FLOP_LR_UPDT = 0.D0 658 ACC_FLOP_LR_UPDT_OUT = 0.D0 659 ACC_FLOP_RMB = 0.D0 660 ACC_FLOP_FR_TRSM = 0.D0 661 ACC_FLOP_LR_TRSM = 0.D0 662 ACC_FLOP_DEMOTE = 0.D0 663 ACC_FLOP_TRSM = 0.D0 664 ACC_FLOP_DEC_ACC = 0.D0 665 ACC_FLOP_REC_ACC = 0.D0 666 ACC_FLOP_PANEL = 0.D0 667 ACC_FLOP_FRFRONTS = 0.D0 668 ACC_FLOP_FR_SOLVE = 0.D0 669 ACC_FLOP_LR_SOLVE = 0.D0 670 ACC_LR_FLOP_GAIN = 0.D0 671 TOTAL_NBLOCKS_ASS = 0 672 TOTAL_NBLOCKS_CB = 0 673 AVG_BLOCKSIZE_ASS = 0.D0 674 AVG_BLOCKSIZE_CB = 0.D0 675 MIN_BLOCKSIZE_ASS = huge(1) 676 MAX_BLOCKSIZE_ASS = 0 677 MIN_BLOCKSIZE_CB = huge(1) 678 MAX_BLOCKSIZE_CB = 0 679 ACC_FR_MRY = 0.D0 680 GLOBAL_BLR_SAVINGS = 0.D0 681 ACC_UPDT_TIME = 0.D0 682 ACC_UPDT_TIME_OUT = 0.D0 683 ACC_RMB_TIME = 0.D0 684 ACC_PROMOTING_TIME = 0.D0 685 ACC_DEMOTING_TIME = 0.D0 686 ACC_CB_DEMOTING_TIME = 0.D0 687 ACC_FRPANELS_TIME = 0.0D0 688 ACC_FAC_I_TIME = 0.0D0 689 ACC_FAC_MQ_TIME = 0.0D0 690 ACC_FAC_SQ_TIME = 0.0D0 691 ACC_FRFRONTS_TIME = 0.0D0 692 ACC_TRSM_TIME = 0.D0 693 ACC_LR_MODULE_TIME = 0.D0 694 CNT_NODES = 0 695 STEP_STATS => id%STEP 696 END SUBROUTINE INIT_STATS_GLOBAL 697 SUBROUTINE STATS_COMPUTE_MRY_FRONT_TYPE1(NASS, NCB, 698 & SYM, INODE, NELIM) 699 INTEGER,INTENT(IN) :: NASS, NCB, SYM, INODE, NELIM 700 DOUBLE PRECISION :: FRONT_BLR_SAVINGS, FRONT_FR_MRY 701 IF (SYM .GT. 0) THEN 702 FRONT_BLR_SAVINGS = FRONT_L11_BLR_SAVINGS 703 & + FRONT_L21_BLR_SAVINGS 704 FRONT_FR_MRY = dble(NASS-NELIM) * 705 & (dble(NASS-NELIM)+1.D0)/2.D0 706 & + dble(NASS-NELIM) * dble(NCB+NELIM) 707 ELSE 708 FRONT_BLR_SAVINGS = FRONT_L11_BLR_SAVINGS 709 & + FRONT_L21_BLR_SAVINGS 710 & + FRONT_U11_BLR_SAVINGS 711 & + FRONT_U12_BLR_SAVINGS 712 FRONT_FR_MRY = dble(NASS-NELIM) * dble(NASS-NELIM) 713 & + 2.0D0 * dble(NASS-NELIM) * dble(NCB+NELIM) 714 END IF 715 ACC_FR_MRY = ACC_FR_MRY + FRONT_FR_MRY 716 GLOBAL_BLR_SAVINGS = GLOBAL_BLR_SAVINGS + FRONT_BLR_SAVINGS 717 END SUBROUTINE STATS_COMPUTE_MRY_FRONT_TYPE1 718 SUBROUTINE STATS_COMPUTE_MRY_FRONT_TYPE2(NASS, NFRONT, 719 & SYM, INODE, NELIM) 720 INTEGER,INTENT(IN) :: NASS, NFRONT, SYM, INODE, NELIM 721 IF (SYM .GT. 0) THEN 722 ACC_FR_MRY = ACC_FR_MRY + 723 & dble(NASS-NELIM) * 724 & (dble(NASS-NELIM)+1.D0)/2.D0 725 & + dble(NASS-NELIM) * dble(NFRONT-NASS+NELIM) 726 ELSE 727 ACC_FR_MRY = ACC_FR_MRY + 728 & dble(NASS-NELIM) * dble(NASS-NELIM) 729 & + 2.0D0 * dble(NASS-NELIM) * dble(NFRONT-NASS+NELIM) 730 ENDIF 731 END SUBROUTINE STATS_COMPUTE_MRY_FRONT_TYPE2 732 SUBROUTINE STATS_COMPUTE_MRY_FRONT_CB(NCB, NROW, 733 & SYM, NIV, INODE, 734 & FRONT_CB_BLR_SAVINGS) 735 INTEGER,INTENT(IN) :: NROW, NCB, SYM, NIV, INODE, 736 & FRONT_CB_BLR_SAVINGS 737 DOUBLE PRECISION :: MRY_CB_FR 738 IF (SYM==0) THEN 739 MRY_CB_FR = dble(NCB)*dble(NROW) 740 ELSE 741 MRY_CB_FR = dble(NCB-NROW)*dble(NROW) + 742 & dble(NROW)*dble(NROW+1)/2.D0 743 ENDIF 744 ACC_MRY_CB_FR = ACC_MRY_CB_FR + MRY_CB_FR 745 ACC_MRY_CB_GAIN = ACC_MRY_CB_GAIN + FRONT_CB_BLR_SAVINGS 746 END SUBROUTINE STATS_COMPUTE_MRY_FRONT_CB 747 SUBROUTINE STATS_STORE_BLR_PANEL_MRY(BLR_PANEL, NB_INASM, 748 & NB_INCB, DIR, NIV) 749 INTEGER,INTENT(IN) :: NB_INASM, NB_INCB, NIV 750 TYPE(LRB_TYPE), INTENT(IN) :: BLR_PANEL(NB_INASM+NB_INCB) 751 CHARACTER(len=1) :: DIR 752 INTEGER :: I 753 IF (NB_INASM.GT.0.AND.DIR .EQ.'V') THEN 754 ACC_FLOP_FR_SOLVE = ACC_FLOP_FR_SOLVE + 755 & dble(BLR_PANEL(1)%N)*dble(BLR_PANEL(1)%N) 756 ACC_FLOP_LR_SOLVE = ACC_FLOP_LR_SOLVE + 757 & dble(BLR_PANEL(1)%N)*dble(BLR_PANEL(1)%N) 758 ENDIF 759 DO I = 1 , NB_INASM 760 ACC_FLOP_FR_SOLVE = ACC_FLOP_FR_SOLVE + 761 & dble(2)*dble(BLR_PANEL(I)%M)*dble(BLR_PANEL(I)%N) 762 IF (BLR_PANEL(I)%ISLR) THEN 763 ACC_FLOP_LR_SOLVE = ACC_FLOP_LR_SOLVE + 764 & dble(4)*(dble(BLR_PANEL(I)%M)+dble(BLR_PANEL(I)%N))* 765 & dble(BLR_PANEL(I)%K) 766 IF (DIR .EQ. 'H') THEN 767 IF (NIV .EQ. 1) THEN 768 FRONT_U11_BLR_SAVINGS = 769 & FRONT_U11_BLR_SAVINGS + 770 & dble( BLR_PANEL(I)%M ) * dble( BLR_PANEL(I)%N ) - 771 & dble( BLR_PANEL(I)%K ) * 772 & dble( BLR_PANEL(I)%M + BLR_PANEL(I)%N ) 773 ELSE 774 GLOBAL_BLR_SAVINGS = 775 & GLOBAL_BLR_SAVINGS + 776 & dble( BLR_PANEL(I)%M ) * dble( BLR_PANEL(I)%N ) - 777 & dble( BLR_PANEL(I)%K ) * 778 & dble( BLR_PANEL(I)%M + BLR_PANEL(I)%N ) 779 ENDIF 780 ELSE 781 IF (NIV .EQ. 1) THEN 782 FRONT_L11_BLR_SAVINGS = 783 & FRONT_L11_BLR_SAVINGS + 784 & dble( BLR_PANEL(I)%M ) * dble( BLR_PANEL(I)%N ) - 785 & dble( BLR_PANEL(I)%K ) * 786 & dble( BLR_PANEL(I)%M + BLR_PANEL(I)%N ) 787 ELSE 788 GLOBAL_BLR_SAVINGS = 789 & GLOBAL_BLR_SAVINGS + 790 & dble( BLR_PANEL(I)%M ) * dble( BLR_PANEL(I)%N ) - 791 & dble( BLR_PANEL(I)%K ) * 792 & dble( BLR_PANEL(I)%M) + dble(BLR_PANEL(I)%N ) 793 ENDIF 794 ENDIF 795 ELSE 796 ACC_FLOP_LR_SOLVE = ACC_FLOP_LR_SOLVE + 797 & dble(2)*dble(BLR_PANEL(I)%M)*dble(BLR_PANEL(I)%N) 798 ENDIF 799 END DO 800 DO I = NB_INASM + 1 , NB_INASM + NB_INCB 801 IF (BLR_PANEL(I)%ISLR) THEN 802 IF (DIR .EQ. 'H') THEN 803 IF (NIV .EQ. 1) THEN 804 FRONT_U12_BLR_SAVINGS = 805 & FRONT_U12_BLR_SAVINGS + 806 & dble( BLR_PANEL(I)%M ) * dble( BLR_PANEL(I)%N ) - 807 & dble( BLR_PANEL(I)%K ) * 808 & dble( BLR_PANEL(I)%M + BLR_PANEL(I)%N ) 809 ELSE 810 GLOBAL_BLR_SAVINGS = 811 & GLOBAL_BLR_SAVINGS + 812 & dble( BLR_PANEL(I)%M ) * dble( BLR_PANEL(I)%N ) - 813 & dble( BLR_PANEL(I)%K ) * 814 & dble( BLR_PANEL(I)%M + BLR_PANEL(I)%N ) 815 ENDIF 816 ELSE 817 IF (NIV .EQ. 1) THEN 818 FRONT_L21_BLR_SAVINGS = 819 & FRONT_L21_BLR_SAVINGS + 820 & dble( BLR_PANEL(I)%M ) * dble( BLR_PANEL(I)%N ) - 821 & dble( BLR_PANEL(I)%K ) * 822 & dble( BLR_PANEL(I)%M + BLR_PANEL(I)%N ) 823 ELSE 824 GLOBAL_BLR_SAVINGS = 825 & GLOBAL_BLR_SAVINGS + 826 & dble( BLR_PANEL(I)%M ) * dble ( BLR_PANEL(I)%N ) - 827 & dble( BLR_PANEL(I)%K ) * 828 & dble( BLR_PANEL(I)%M + BLR_PANEL(I)%N ) 829 ENDIF 830 ENDIF 831 END IF 832 END DO 833 END SUBROUTINE STATS_STORE_BLR_PANEL_MRY 834 SUBROUTINE STATS_COMPUTE_FLOP_FRONT_TYPE1( NFRONT, NASS, NPIV, 835 & KEEP50, INODE) 836 INTEGER,INTENT(IN) :: NFRONT, KEEP50, NASS, NPIV, INODE 837 DOUBLE PRECISION :: FLOP_FR_FACTO 838 CALL MUMPS_GET_FLOPS_COST(NFRONT, NPIV, NASS, 839 & KEEP50, 1, FLOP_FR_FACTO) 840 ACC_FLOP_FR_FACTO = ACC_FLOP_FR_FACTO + FLOP_FR_FACTO 841 ACC_LR_FLOP_GAIN = ACC_LR_FLOP_GAIN + LR_FLOP_GAIN 842 ACC_FLOP_FR_UPDT = ACC_FLOP_FR_UPDT + FLOP_FR_UPDT 843 ACC_FLOP_LR_UPDT = ACC_FLOP_LR_UPDT + FLOP_LR_UPDT 844 ACC_FLOP_LR_UPDT_OUT= ACC_FLOP_LR_UPDT_OUT+ FLOP_LR_UPDT_OUT 845 ACC_FLOP_RMB = ACC_FLOP_RMB + FLOP_RMB 846 ACC_FLOP_FR_TRSM = ACC_FLOP_FR_TRSM + FLOP_FR_TRSM 847 ACC_FLOP_LR_TRSM = ACC_FLOP_LR_TRSM + FLOP_LR_TRSM 848 ACC_FLOP_DEMOTE = ACC_FLOP_DEMOTE + FLOP_DEMOTE 849 ACC_FLOP_CB_DEMOTE = ACC_FLOP_CB_DEMOTE + FLOP_CB_DEMOTE 850 ACC_FLOP_CB_PROMOTE = ACC_FLOP_CB_PROMOTE + FLOP_CB_PROMOTE 851 ACC_FLOP_DEC_ACC = ACC_FLOP_DEC_ACC + FLOP_DEC_ACC 852 ACC_FLOP_REC_ACC = ACC_FLOP_REC_ACC + FLOP_REC_ACC 853 ACC_FLOP_TRSM = ACC_FLOP_TRSM + FLOP_TRSM 854 ACC_FLOP_PANEL = ACC_FLOP_PANEL + FLOP_PANEL 855 END SUBROUTINE STATS_COMPUTE_FLOP_FRONT_TYPE1 856 SUBROUTINE STATS_COMPUTE_FLOP_FRONT_TYPE2( NFRONT, NASS, 857 & KEEP50, INODE, NELIM) 858 INTEGER,INTENT(IN) :: NFRONT, KEEP50, NASS, INODE, NELIM 859 DOUBLE PRECISION :: FLOP_FR_FACTO 860 CALL MUMPS_GET_FLOPS_COST(NFRONT, NASS-NELIM, NASS, 861 & KEEP50, 2, FLOP_FR_FACTO) 862 ACC_FLOP_FR_FACTO = ACC_FLOP_FR_FACTO + FLOP_FR_FACTO 863 END SUBROUTINE STATS_COMPUTE_FLOP_FRONT_TYPE2 864 SUBROUTINE STATS_COMPUTE_FLOP_SLAVE_TYPE2( NROW1, NCOL1, 865 & NASS1, KEEP50, INODE) 866 INTEGER,INTENT(IN) :: NROW1, NCOL1, KEEP50, NASS1, INODE 867 DOUBLE PRECISION :: NROW2, NCOL2, NASS2 868 DOUBLE PRECISION :: FLOP_FR_FACTO 869 NROW2 = dble(NROW1) 870 NCOL2 = dble(NCOL1) 871 NASS2 = dble(NASS1) 872 IF (KEEP50.EQ.0) THEN 873 FLOP_FR_FACTO = NROW2*NASS2*NASS2 874 & + 2.0D0*NROW2*NASS2*(NCOL2-NASS2) 875 ELSE 876 FLOP_FR_FACTO = 877 & NROW2*NASS2*NASS2 878 & + NROW2*NASS2*NROW2 879 & + 2.0D0*NROW2*NASS2*(NCOL2-NASS2-NROW2) 880 ENDIF 881 ACC_FLOP_FR_FACTO = ACC_FLOP_FR_FACTO + FLOP_FR_FACTO 882 END SUBROUTINE STATS_COMPUTE_FLOP_SLAVE_TYPE2 883 SUBROUTINE UPDATE_FLOP_STATS_FRFRONTS(NFRONT, NPIV, NASS, SYM, 884 & NIV) 885 INTEGER, INTENT(IN) :: NFRONT, NPIV, NASS, SYM, NIV 886 DOUBLE PRECISION :: FLOP_FRFRONTS, FLOP_SOLVE 887 CALL MUMPS_GET_FLOPS_COST(NFRONT, NPIV, NASS, 888 & SYM, NIV, FLOP_FRFRONTS) 889 ACC_FLOP_FRFRONTS = ACC_FLOP_FRFRONTS + FLOP_FRFRONTS 890 FLOP_SOLVE = dble(NASS)*dble(NASS) + 891 & dble(NFRONT-NASS)*dble(NASS) 892 IF (SYM.EQ.0) FLOP_SOLVE = 2.0D0*FLOP_SOLVE 893 ACC_FLOP_FR_SOLVE = ACC_FLOP_FR_SOLVE + FLOP_SOLVE 894 ACC_FLOP_LR_SOLVE = ACC_FLOP_LR_SOLVE + FLOP_SOLVE 895 END SUBROUTINE UPDATE_FLOP_STATS_FRFRONTS 896 SUBROUTINE UPD_FLOP_FRFRONT_SLAVE(NROW1, NCOL1, NASS1, 897 & KEEP50, INODE) 898 INTEGER,INTENT(IN) :: NROW1, NCOL1, KEEP50, NASS1, INODE 899 DOUBLE PRECISION :: NROW2, NCOL2, NASS2 900 DOUBLE PRECISION :: FLOP_FRFRONTS 901 NROW2 = dble(NROW1) 902 NCOL2 = dble(NCOL1) 903 NASS2 = dble(NASS1) 904 IF (KEEP50.EQ.0) THEN 905 FLOP_FRFRONTS = NROW2*NASS2*NASS2 906 & + 2.0D0*NROW2*NASS2*(NCOL2-NASS2) 907 ELSE 908 FLOP_FRFRONTS = 909 & NROW2*NASS2*NASS2 910 & + NROW2*NASS2*NROW2 911 & + 2.0D0*NROW2*NASS2*(NCOL2-NASS2-NROW2) 912 ENDIF 913 ACC_FLOP_FRFRONTS = ACC_FLOP_FRFRONTS + FLOP_FRFRONTS 914 END SUBROUTINE UPD_FLOP_FRFRONT_SLAVE 915 SUBROUTINE COMPUTE_GLOBAL_GAINS(NB_ENTRIES_FACTOR, 916 & FLOP_NUMBER, NIV, PROKG, MPG) 917 INTEGER(KIND=8), INTENT(IN) :: NB_ENTRIES_FACTOR 918 INTEGER, INTENT(IN) :: NIV, MPG 919 LOGICAL, INTENT(IN) :: PROKG 920 REAL , INTENT(IN) :: FLOP_NUMBER 921 IF (NB_ENTRIES_FACTOR < 0) THEN 922 IF (PROKG.AND.MPG.GT.0) THEN 923 WRITE(MPG,*) "NEGATIVE NUMBER OF ENTRIES IN FACTOR" 924 WRITE(MPG,*) "===> OVERFLOW ?" 925 END IF 926 END IF 927 IF (ACC_FR_MRY .EQ. 0) THEN 928 GLOBAL_MRY_LPRO_COMPR = 100.0D0 929 ELSE 930 GLOBAL_MRY_LPRO_COMPR = 100.0D0 * 931 & GLOBAL_BLR_SAVINGS/ACC_FR_MRY 932 ENDIF 933 IF (ACC_MRY_CB_FR .EQ. 0) THEN 934 ACC_MRY_CB_FR = 100.0D0 935 END IF 936 IF (NB_ENTRIES_FACTOR.EQ.0) THEN 937 FACTOR_PROCESSED_FRACTION = 100.0D0 938 GLOBAL_MRY_LTOT_COMPR = 100.0D0 939 ELSE 940 FACTOR_PROCESSED_FRACTION = 100.0D0 * 941 & ACC_FR_MRY/dble(NB_ENTRIES_FACTOR) 942 GLOBAL_MRY_LTOT_COMPR = 943 & 100.0D0*GLOBAL_BLR_SAVINGS/dble(NB_ENTRIES_FACTOR) 944 ENDIF 945 TOTAL_FLOP = FLOP_NUMBER 946 ACC_FLOP_LR_FACTO = ACC_FLOP_FR_FACTO - ACC_LR_FLOP_GAIN 947 & + ACC_FLOP_DEMOTE 948 RETURN 949 END SUBROUTINE COMPUTE_GLOBAL_GAINS 950 SUBROUTINE SAVEandWRITE_GAINS(LOCAL, K489, DKEEP, N, 951 & DEPTH, BCKSZ, NASSMIN, NFRONTMIN, SYM, K486, 952 & K472, K475, K478, K480, K481, K483, K484, K485, K467, 953 & NBTREENODES, NPROCS, MPG, PROKG) 954 INTEGER, INTENT(IN) :: LOCAL,K489,N,DEPTH,BCKSZ,NASSMIN, 955 & NFRONTMIN, K486, NBTREENODES, MPG, K467, 956 & K472, K475, K478, K480, K481, K483, K484, K485, SYM, NPROCS 957 LOGICAL, INTENT(IN) :: PROKG 958 REAL :: DKEEP(230) 959 LOGICAL PROK 960 PROK = (PROKG.AND.(MPG.GE.0)) 961 IF (PROK) THEN 962 WRITE(MPG,'(/A,A)') 963 & '-------------- Beginning of BLR statistics -------------------', 964 & '--------------' 965 WRITE(MPG,'(A)') 966 & ' Settings for Block Low-Rank (BLR) are :' 967 WRITE(MPG,'(A)') ' BLR algorithm characteristics :' 968 WRITE(MPG,'(A,A)') ' Variant used: FSCU ', 969 & '(Factor-Solve-Compress-Update)' 970 SELECT CASE (K489) 971 CASE (0) 972 CASE (1) 973 WRITE(MPG,'(A)') 974 & ' Experimental CB compression (for stats only)' 975 CASE DEFAULT 976 WRITE(*,*)' Internal error K489=',K489 977 CALL MUMPS_ABORT() 978 END SELECT 979 IF (K472.EQ.0) THEN 980 WRITE(MPG,'(A,A,I4)') ' Target BLR block size (fixed)', 981 & ' =', 982 & BCKSZ 983 ELSE 984 WRITE(MPG,'(A,A,I4,A,I4)') 985 & ' Target BLR block size (variable)', 986 & ' =', 987 & 128, ' -', BCKSZ 988 ENDIF 989 WRITE(MPG,'(A,A,ES8.1)') ' RRQR precision (epsilon) ', 990 & ' =', 991 & DKEEP(8) 992 WRITE(MPG,'(A)') 993 & ' Statistics after BLR factorization :' 994 WRITE(MPG,'(A,I8)') 995 & ' Number of BLR fronts =', 996 & CNT_NODES 997 ENDIF 998 IF (PROK) WRITE(MPG,'(A)') 999 & ' Statistics on operation counts (OPC):' 1000 TOTAL_FLOP = MAX(TOTAL_FLOP,EPSILON(1.0D0)) 1001 DKEEP(55)=real(TOTAL_FLOP) 1002 DKEEP(60)=real(100) 1003 DKEEP(56)=real(ACC_FLOP_LR_FACTO+ACC_FLOP_FRFRONTS) 1004 DKEEP(61)=real(100*(ACC_FLOP_LR_FACTO+ 1005 & ACC_FLOP_FRFRONTS) /TOTAL_FLOP) 1006 IF (PROK) THEN 1007 WRITE(MPG,'(A,ES10.3,A,F5.1,A)') 1008 & ' Total theoretical full-rank OPC (i.e. FR OPC) =' 1009 & ,TOTAL_FLOP,' (',100*TOTAL_FLOP/TOTAL_FLOP,'%)' 1010 WRITE(MPG,'(A,ES10.3,A,F5.1,A)') 1011 & ' Total effective OPC (% FR OPC) =' 1012 & ,ACC_FLOP_LR_FACTO+ACC_FLOP_FRFRONTS,' (' 1013 &,100*(ACC_FLOP_LR_FACTO+ACC_FLOP_FRFRONTS)/TOTAL_FLOP 1014 &,'%)' 1015 ENDIF 1016 IF (PROK) WRITE(MPG,'(A,A)') 1017 & '-------------- End of BLR statistics -------------------------', 1018 & '--------------' 1019 RETURN 1020 END SUBROUTINE SAVEandWRITE_GAINS 1021 END MODULE SMUMPS_LR_STATS 1022