1C 2C This file is part of MUMPS 5.1.2, released 3C on Mon Oct 2 07:37:01 UTC 2017 4C 5C 6C Copyright 1991-2017 CERFACS, CNRS, ENS Lyon, INP Toulouse, Inria, 7C University of Bordeaux. 8C 9C This version of MUMPS is provided to you free of charge. It is 10C released under the CeCILL-C license: 11C http://www.cecill.info/licences/Licence_CeCILL-C_V1-en.html 12C 13 MODULE DMUMPS_LR_CORE 14 USE MUMPS_LR_COMMON 15 USE DMUMPS_LR_TYPE 16 USE DMUMPS_LR_STATS 17!$ USE OMP_LIB 18 IMPLICIT NONE 19 CONTAINS 20 SUBROUTINE INIT_LRB(LRB_OUT,K,KSVD,M,N,ISLR) 21 TYPE(LRB_TYPE), INTENT(OUT) :: LRB_OUT 22 INTEGER,INTENT(IN) :: K,KSVD,M,N 23 LOGICAL,INTENT(IN) :: ISLR 24C This routine simply initializes a LR block but does NOT allocate it 25 LRB_OUT%M = M 26 LRB_OUT%N = N 27 LRB_OUT%K = K 28 LRB_OUT%KSVD = KSVD 29 LRB_OUT%ISLR = ISLR 30 NULLIFY(LRB_OUT%Q) 31 NULLIFY(LRB_OUT%R) 32 IF (ISLR) THEN 33 LRB_OUT%LRFORM = 1 34 ELSE 35 LRB_OUT%LRFORM = 0 36 ENDIF 37 END SUBROUTINE INIT_LRB 38 SUBROUTINE IS_FRONT_BLR_CANDIDATE(INODE, NFRONT, NASS, K486, K489, 39 & K490, K491, K492, N, LRGROUPS, LRSTATUS) 40 INTEGER,INTENT(IN) :: INODE, NFRONT, NASS, K486, K489, K490, 41 & K491, K492 42 INTEGER,INTENT(IN) :: N, LRGROUPS(N) 43 INTEGER,INTENT(OUT):: LRSTATUS 44C 45C Local variables 46 LOGICAL :: COMPRESS_PANEL, COMPRESS_CB 47 COMPRESS_PANEL = .FALSE. 48 IF ((K486.GT.0).and.( 49 & ((K492.LT.0).and.INODE.EQ.abs(K492)) 50 & .or. 51 & ( (K492.GT.0).and.(K491.LE.NFRONT) 52 & .and.(K490.LE.NASS)))) THEN 53 COMPRESS_PANEL = .TRUE. 54C Compression for NASS =1 is useless 55 IF (NASS.EQ.1) COMPRESS_PANEL =.FALSE. 56 IF (LRGROUPS (INODE) .LT. 0) COMPRESS_PANEL = .FALSE. 57 ENDIF 58 COMPRESS_CB = .FALSE. 59 IF ((K492.GT.0).AND.(K489.EQ.1).AND.(NFRONT-NASS.GT.K491)) THEN 60 COMPRESS_CB = .TRUE. 61 ENDIF 62 IF (COMPRESS_PANEL.OR.COMPRESS_CB) THEN 63 IF (COMPRESS_CB.AND.(.NOT.COMPRESS_PANEL)) THEN 64 LRSTATUS = 1 65 ELSE IF (COMPRESS_PANEL.AND.(.NOT.COMPRESS_CB)) THEN 66 LRSTATUS = 2 67 ELSE 68 LRSTATUS = 3 69 ENDIF 70 ELSE 71 LRSTATUS = 0 72 ENDIF 73 END SUBROUTINE IS_FRONT_BLR_CANDIDATE 74 SUBROUTINE ALLOC_LRB(LRB_OUT,K,KSVD,M,N,ISLR,IFLAG,IERROR,KEEP8) 75 TYPE(LRB_TYPE), INTENT(OUT) :: LRB_OUT 76 INTEGER,INTENT(IN) :: K,KSVD,M,N 77 INTEGER,INTENT(OUT) :: IFLAG, IERROR 78 LOGICAL,INTENT(IN) :: ISLR 79 INTEGER(8) :: KEEP8(150) 80 INTEGER :: MEM, allocok 81 DOUBLE PRECISION :: ZERO 82 PARAMETER (ZERO = 0.0D0) 83 IF (ISLR) THEN 84 IF (K.EQ.0) THEN 85 nullify(LRB_OUT%Q) 86 nullify(LRB_OUT%R) 87 ELSE 88 allocate(LRB_OUT%Q(M,K),LRB_OUT%R(K,N),stat=allocok) 89 IF (allocok > 0) THEN 90 IFLAG = -13 91 IERROR = K*(M+N) 92 write(*,*) 'Allocation problem in BLR routine ALLOC_LRB:', 93 & ' not enough memory? memory requested = ' , IERROR 94 RETURN 95 ENDIF 96 ENDIF 97 ELSE 98 allocate(LRB_OUT%Q(M,N),stat=allocok) 99 IF (allocok > 0) THEN 100 IFLAG = -13 101 IERROR = M*N 102 write(*,*) 'Allocation problem in BLR routine ALLOC_LRB:', 103 & ' not enough memory? memory requested = ' , IERROR 104 RETURN 105 ENDIF 106 nullify(LRB_OUT%R) 107 ENDIF 108 LRB_OUT%M = M 109 LRB_OUT%N = N 110 LRB_OUT%K = K 111 LRB_OUT%KSVD = KSVD 112 LRB_OUT%ISLR = ISLR 113 IF (ISLR) THEN 114 LRB_OUT%LRFORM = 1 115 ELSE 116 LRB_OUT%LRFORM = 0 117 ENDIF 118 IF (ISLR) THEN 119 MEM = M*K + N*K 120 ELSE 121 MEM = M*N 122 ENDIF 123 KEEP8(70) = KEEP8(70) - int(MEM,8) 124 KEEP8(68) = min(KEEP8(70), KEEP8(68)) 125 KEEP8(71) = KEEP8(71) - int(MEM,8) 126 KEEP8(69) = min(KEEP8(71), KEEP8(69)) 127 END SUBROUTINE ALLOC_LRB 128 SUBROUTINE REGROUPING2(CUT, NPARTSASS, NASS, 129 & NPARTSCB, NCB, IBCKSZ, ONLYCB, K472) 130 INTEGER, INTENT(IN) :: IBCKSZ, NASS, NCB 131 INTEGER, INTENT(INOUT) :: NPARTSCB, NPARTSASS 132 INTEGER, POINTER, DIMENSION(:) :: CUT 133 INTEGER, POINTER, DIMENSION(:) :: NEW_CUT 134 INTEGER :: I, INEW, MINSIZE, NEW_NPARTSASS 135 LOGICAL :: ONLYCB, TRACE 136 INTEGER, INTENT(IN) :: K472 137 INTEGER :: IBCKSZ2 138 ALLOCATE(NEW_CUT(max(NPARTSASS,1)+NPARTSCB+1)) 139 CALL COMPUTE_BLR_VCS(K472, IBCKSZ2, IBCKSZ, NASS) 140 MINSIZE = int(IBCKSZ2 / 2) 141 NEW_NPARTSASS = max(NPARTSASS,1) 142 IF (.NOT. ONLYCB) THEN 143 NEW_CUT(1) = 1 144 INEW = 2 145 I = 2 146 DO WHILE (I .LE. NPARTSASS + 1) 147 NEW_CUT(INEW) = CUT(I) 148 TRACE = .FALSE. 149 IF (NEW_CUT(INEW) - NEW_CUT(INEW-1) .GT. MINSIZE) THEN 150 INEW = INEW + 1 151 TRACE = .TRUE. 152 ENDIF 153 I = I + 1 154 END DO 155 IF (TRACE) THEN 156 INEW = INEW - 1 157 ELSE 158 IF (INEW .NE. 2) THEN 159 NEW_CUT(INEW-1) = NEW_CUT(INEW) 160 INEW = INEW - 1 161 ENDIF 162 ENDIF 163 NEW_NPARTSASS = INEW - 1 164 ENDIF 165 IF (ONLYCB) THEN 166 DO I=1,max(NPARTSASS,1)+1 167 NEW_CUT(I) = CUT(I) 168 ENDDO 169 ENDIF 170 IF (NCB .EQ. 0) GO TO 50 171 INEW = NEW_NPARTSASS+2 172 I = max(NPARTSASS,1) + 2 173 DO WHILE (I .LE. max(NPARTSASS,1) + NPARTSCB + 1) 174 NEW_CUT(INEW) = CUT(I) 175 TRACE = .FALSE. 176 IF (NEW_CUT(INEW) - NEW_CUT(INEW-1) .GT. MINSIZE) THEN 177 INEW = INEW + 1 178 TRACE = .TRUE. 179 ENDIF 180 I = I + 1 181 END DO 182 IF (TRACE) THEN 183 INEW = INEW - 1 184 ELSE 185 IF (INEW .NE. NEW_NPARTSASS+2) THEN 186 NEW_CUT(INEW-1) = NEW_CUT(INEW) 187 INEW = INEW - 1 188 ENDIF 189 ENDIF 190 NPARTSCB = INEW - 1 - NEW_NPARTSASS 191 50 CONTINUE 192 NPARTSASS = NEW_NPARTSASS 193 DEALLOCATE(CUT) 194 ALLOCATE(CUT(NPARTSASS+NPARTSCB+1)) 195 DO I=1,NPARTSASS+NPARTSCB+1 196 CUT(I) = NEW_CUT(I) 197 ENDDO 198 DEALLOCATE(NEW_CUT) 199 END SUBROUTINE REGROUPING2 200 SUBROUTINE DMUMPS_LRGEMM_SCALING(LRB, SCALED, A, LA, POSELTD, 201 & LD_DIAG, IW2, POSELTT, NFRONT, BLOCK, MAXI_CLUSTER) 202C This routine does the scaling (for the symmetric case) before 203C computing the LR product (done in DMUMPS_LRGEMM3) 204 TYPE(LRB_TYPE),INTENT(IN) :: LRB 205 INTEGER(8), intent(in) :: LA 206 DOUBLE PRECISION, intent(inout) :: A(LA) 207 DOUBLE PRECISION, intent(inout), DIMENSION(:,:) :: SCALED 208 INTEGER,INTENT(IN) :: LD_DIAG, NFRONT, IW2(*) 209 INTEGER(8), INTENT(IN) :: POSELTD, POSELTT 210 INTEGER, INTENT(IN) :: MAXI_CLUSTER 211 DOUBLE PRECISION, intent(inout) :: BLOCK(MAXI_CLUSTER) 212 INTEGER :: J, NROWS 213 DOUBLE PRECISION :: PIV1, PIV2, OFFDIAG 214 IF (LRB%LRFORM.EQ.1) THEN 215 NROWS = LRB%K 216 ELSE ! Full Rank Block 217 NROWS = LRB%M 218 ENDIF 219 J = 1 220 DO WHILE (J <= LRB%N) 221 IF (IW2(J) > 0) THEN 222 SCALED(1:NROWS,J) = A(POSELTD+LD_DIAG*(J-1)+J-1) 223 & * SCALED(1:NROWS,J) 224 J = J+1 225 ELSE !2x2 pivot 226 PIV1 = A(POSELTD+LD_DIAG*(J-1)+J-1) 227 PIV2 = A(POSELTD+LD_DIAG*J+J) 228 OFFDIAG = A(POSELTD+LD_DIAG*(J-1)+J) 229 BLOCK(1:NROWS) = SCALED(1:NROWS,J) 230 SCALED(1:NROWS,J) = PIV1 * SCALED(1:NROWS,J) 231 & + OFFDIAG * SCALED(1:NROWS,J+1) 232 SCALED(1:NROWS,J+1) = OFFDIAG * BLOCK(1:NROWS) 233 & + PIV2 * SCALED(1:NROWS,J+1) 234 J=J+2 235 ENDIF 236 END DO 237 END SUBROUTINE DMUMPS_LRGEMM_SCALING 238 SUBROUTINE DMUMPS_LRGEMM3(TRANSB1, TRANSB2, ALPHA, 239 & LRB1, LRB2, BETA, A, LA, POSELTT, NFRONT, SYM, NIV, 240 & IFLAG, IERROR, 241 & COMPRESS_MID_PRODUCT, TOLEPS, KPERCENT, RANK, BUILDQ, 242 & POSELTD, LD_DIAG, IW2, BLOCK, MAXI_CLUSTER) 243 TYPE(LRB_TYPE),INTENT(IN) :: LRB1,LRB2 244 INTEGER(8), intent(in) :: LA 245 DOUBLE PRECISION, intent(inout) :: A(LA) 246 INTEGER,INTENT(IN) :: NFRONT, SYM, NIV 247 INTEGER,INTENT(OUT) :: IFLAG, IERROR 248 INTEGER(8), INTENT(IN) :: POSELTT 249 INTEGER(8), INTENT(IN), OPTIONAL :: POSELTD 250 INTEGER,INTENT(IN), OPTIONAL :: LD_DIAG, IW2(*) 251 INTEGER, INTENT(IN), OPTIONAL :: MAXI_CLUSTER 252 CHARACTER(len=1),INTENT(IN) :: TRANSB1, TRANSB2 253 INTEGER,intent(in) :: COMPRESS_MID_PRODUCT, KPERCENT 254 DOUBLE PRECISION, intent(in) :: TOLEPS 255 DOUBLE PRECISION :: ALPHA,BETA 256 DOUBLE PRECISION, intent(inout), OPTIONAL :: BLOCK(:) 257 DOUBLE PRECISION, ALLOCATABLE, DIMENSION(:,:) :: XY_YZ 258 DOUBLE PRECISION, ALLOCATABLE, TARGET, DIMENSION(:,:) :: XQ, R_Y 259 DOUBLE PRECISION, POINTER, DIMENSION(:,:) :: X, Y, Y1, Y2, Z 260 CHARACTER(len=1) :: SIDE, TRANSX, TRANSY, TRANSZ 261 INTEGER :: M_X, K_XY, K_YZ, N_Z, LDX, LDY, LDY1, LDY2, LDZ, K_Y 262 INTEGER :: I, J, RANK, MAXRANK, INFO, LWORK 263 LOGICAL :: BUILDQ 264 DOUBLE PRECISION, ALLOCATABLE :: RWORK_RRQR(:) 265 DOUBLE PRECISION, ALLOCATABLE :: WORK_RRQR(:), TAU_RRQR(:), 266 & Y_RRQR(:,:) 267 INTEGER, ALLOCATABLE :: JPVT_RRQR(:) 268 INTEGER :: T1, T2, CR 269 INTEGER :: allocok, MREQ 270 DOUBLE PRECISION :: LOC_UPDT_TIME_OUT 271 DOUBLE PRECISION, EXTERNAL ::dnrm2 272 DOUBLE PRECISION :: ONE, MONE, ZERO 273 PARAMETER (ONE = 1.0D0, MONE=-1.0D0) 274 PARAMETER (ZERO=0.0D0) 275 IF (LRB2%M.EQ.0) THEN 276 write(*,*) "Internal error in DMUMPS_LRGEMM3, LRB2%M=0" 277 CALL MUMPS_ABORT() 278 ENDIF 279 IF ((SYM.NE.0).AND.((TRANSB1.NE.'N').OR.(TRANSB2.NE.'T'))) THEN 280 WRITE(*,*) "SYM > 0 and (", TRANSB1, ",", TRANSB2, 281 & ") parameters found. Symmetric LRGEMM is only ", 282 & "compatible with (N,T) parameters" 283 CALL MUMPS_ABORT() 284 ENDIF 285 RANK = 0 286 BUILDQ = .FALSE. 287 IF ((LRB1%LRFORM==1).AND.(LRB2%LRFORM==1)) THEN 288 IF ((LRB1%K.EQ.0).OR.(LRB2%K.EQ.0)) GOTO 700 289 allocate(Y(LRB1%K,LRB2%K),stat=allocok) 290 IF (allocok > 0) THEN 291 MREQ = LRB1%K*LRB2%K 292 GOTO 860 293 ENDIF 294 IF (TRANSB1 == 'N') THEN 295 X => LRB1%Q 296 LDX = LRB1%M 297 M_X = LRB1%M 298 K_Y = LRB1%N 299 IF (SYM .EQ. 0) THEN 300 Y1 => LRB1%R 301 ELSE 302 allocate(Y1(LRB1%K,LRB1%N),stat=allocok) 303 IF (allocok > 0) THEN 304 MREQ = LRB1%K*LRB1%N 305 GOTO 860 306 ENDIF 307 DO J=1,LRB1%N 308 DO I=1,LRB1%K 309 Y1(I,J) = LRB1%R(I,J) 310 ENDDO 311 ENDDO 312 CALL DMUMPS_LRGEMM_SCALING(LRB1, Y1, A, LA, POSELTD, 313 & LD_DIAG, IW2, POSELTT, NFRONT, BLOCK, 314 & MAXI_CLUSTER) 315 ENDIF 316 LDY1 = LRB1%K 317 ELSE !TRANSB1 == 'T' 318 M_X = LRB1%N 319 X => LRB1%R 320 LDX = LRB1%K 321 K_Y = LRB1%M 322 Y1 => LRB1%Q 323 LDY1 = LRB1%M 324 ENDIF 325 IF (TRANSB2 == 'N') THEN 326 Z => LRB2%R 327 LDZ = LRB2%K 328 N_Z = LRB2%N 329 Y2 => LRB2%Q 330 LDY2 = LRB2%M 331 ELSE !TRANSB2 == 'T' 332 N_Z = LRB2%M 333 Z => LRB2%Q 334 LDZ = LRB2%M 335 Y2 => LRB2%R 336 LDY2 = LRB2%K 337 ENDIF 338 TRANSZ = TRANSB2 339 CALL dgemm(TRANSB1 , TRANSB2 , LRB1%K , LRB2%K, K_Y, ONE, 340 & Y1(1,1), LDY1, Y2(1,1), LDY2, ZERO, Y(1,1), LRB1%K ) 341 BUILDQ = .FALSE. 342 IF (COMPRESS_MID_PRODUCT.GE.1) THEN 343 LWORK = MAX(LRB2%K**2, M_X**2) 344 allocate(Y_RRQR(LRB1%K,LRB2%K), 345 & WORK_RRQR(LWORK), RWORK_RRQR(2*LRB2%K), 346 & TAU_RRQR(MIN(LRB1%K,LRB2%K)), 347 & JPVT_RRQR(LRB2%K),stat=allocok) 348 IF (allocok > 0) THEN 349 MREQ = LRB1%K*LRB2%K + LWORK + 2*LRB2%K + 350 & MIN(LRB1%K,LRB2%K) + LRB2%K 351 GOTO 860 352 ENDIF 353 DO J=1,LRB2%K 354 DO I=1,LRB1%K 355 Y_RRQR(I,J) = Y(I,J) 356 ENDDO 357 ENDDO 358 MAXRANK = MIN(LRB1%K, LRB2%K)-1 359 MAXRANK = max (1, int((MAXRANK*KPERCENT/100))) 360 JPVT_RRQR = 0 361 CALL DMUMPS_TRUNCATED_RRQR(LRB1%K, LRB2%K, Y_RRQR(1,1), 362 & LRB1%K, JPVT_RRQR(1), TAU_RRQR(1), WORK_RRQR(1), 363 & LRB2%K, RWORK_RRQR(1), TOLEPS, RANK, MAXRANK, INFO) 364 IF ((RANK.GT.MAXRANK).OR.(RANK.EQ.0)) THEN 365 deallocate(Y_RRQR, WORK_RRQR, RWORK_RRQR, TAU_RRQR, 366 & JPVT_RRQR) 367 BUILDQ = .FALSE. 368 ELSE 369 BUILDQ = .TRUE. 370 ENDIF 371 IF (BUILDQ) THEN ! Successfully compressed middle block 372 allocate(XQ(M_X,RANK), R_Y(RANK,LRB2%K),stat=allocok) 373 IF (allocok > 0) THEN 374 MREQ = M_X*RANK + RANK*LRB2%K 375 GOTO 860 376 ENDIF 377 DO J=1, LRB2%K 378 R_Y(1:MIN(RANK,J),JPVT_RRQR(J)) = 379 & Y_RRQR(1:MIN(RANK,J),J) 380 IF(J.LT.RANK) R_Y(MIN(RANK,J)+1: 381 & RANK,JPVT_RRQR(J))= ZERO 382 END DO 383 CALL dorgqr 384 & (LRB1%K, RANK, RANK, Y_RRQR(1,1), 385 & LRB1%K, TAU_RRQR(1), 386 & WORK_RRQR(1), LWORK, INFO ) 387 CALL dgemm(TRANSB1, 'N', M_X, RANK, LRB1%K, ONE, 388 & X(1,1), LDX, Y_RRQR(1,1), LRB1%K, ZERO, 389 & XQ(1,1), M_X) 390 deallocate(Y_RRQR, WORK_RRQR, RWORK_RRQR, TAU_RRQR, 391 & JPVT_RRQR) 392 nullify(X) 393 X => XQ 394 LDX = M_X 395 K_XY = RANK 396 TRANSX = 'N' 397 deallocate(Y) 398 nullify(Y) 399 Y => R_Y 400 LDY = RANK 401 K_YZ = LRB2%K 402 TRANSY = 'N' 403 SIDE = 'R' 404 ENDIF 405 ENDIF 406 IF (.NOT.BUILDQ) THEN 407 LDY = LRB1%K 408 K_XY = LRB1%K 409 K_YZ = LRB2%K 410 TRANSX = TRANSB1 411 TRANSY = 'N' 412 IF (LRB1%K .GE. LRB2%K) THEN 413 SIDE = 'L' 414 ELSE ! LRB1%K < LRB2%K 415 SIDE = 'R' 416 ENDIF 417 ENDIF 418 ENDIF 419 IF ((LRB1%LRFORM==1).AND.(LRB2%LRFORM==0)) THEN 420 IF (LRB1%K.EQ.0) GOTO 700 421 SIDE = 'R' 422 K_XY = LRB1%K 423 TRANSX = TRANSB1 424 TRANSY = TRANSB1 425 Z => LRB2%Q 426 LDZ = LRB2%M 427 TRANSZ = TRANSB2 428 IF (TRANSB1 == 'N') THEN 429 X => LRB1%Q 430 LDX = LRB1%M 431 M_X = LRB1%M 432 LDY = LRB1%K 433 IF (SYM .EQ. 0) THEN 434 Y => LRB1%R 435 ELSE 436 allocate(Y(LRB1%K,LRB1%N),stat=allocok) 437 IF (allocok > 0) THEN 438 MREQ = LRB1%K*LRB1%N 439 GOTO 860 440 ENDIF 441 DO J=1,LRB1%N 442 DO I=1,LRB1%K 443 Y(I,J) = LRB1%R(I,J) 444 ENDDO 445 ENDDO 446 CALL DMUMPS_LRGEMM_SCALING(LRB1, Y, A, LA, POSELTD, 447 & LD_DIAG, IW2, POSELTT, NFRONT, BLOCK, 448 & MAXI_CLUSTER) 449 ENDIF 450 ELSE ! TRANSB1 == 'T' 451 X => LRB1%R 452 LDX = LRB1%K 453 M_X = LRB1%N 454 Y => LRB1%Q 455 LDY = LRB1%M 456 ENDIF 457 IF (TRANSB2 == 'N') THEN 458 K_YZ = LRB2%M 459 N_Z = LRB2%N 460 ELSE ! TRANSB2 == 'T' 461 K_YZ = LRB2%N 462 N_Z = LRB2%M 463 ENDIF 464 ENDIF 465 IF ((LRB1%LRFORM==0).AND.(LRB2%LRFORM==1)) THEN 466 IF (LRB2%K.EQ.0) GOTO 700 467 SIDE = 'L' 468 K_YZ = LRB2%K 469 X => LRB1%Q 470 LDX = LRB1%M 471 TRANSX = TRANSB1 472 TRANSY = TRANSB2 473 TRANSZ = TRANSB2 474 IF (TRANSB1 == 'N') THEN 475 M_X = LRB1%M 476 K_XY = LRB1%N 477 ELSE ! TRANSB1 == 'T' 478 M_X = LRB1%N 479 K_XY = LRB1%M 480 ENDIF 481 IF (TRANSB2 == 'N') THEN 482 Y => LRB2%Q 483 LDY = LRB2%M 484 Z => LRB2%R 485 LDZ = LRB2%K 486 N_Z = LRB2%N 487 ELSE ! TRANSB2 == 'T' 488 IF (SYM .EQ. 0) THEN 489 Y => LRB2%R 490 ELSE ! Symmetric case: column scaling of R2 is done 491 allocate(Y(LRB2%K,LRB2%N),stat=allocok) 492 IF (allocok > 0) THEN 493 MREQ = LRB2%K*LRB2%N 494 GOTO 860 495 ENDIF 496 DO J=1,LRB2%N 497 DO I=1,LRB2%K 498 Y(I,J) = LRB2%R(I,J) 499 ENDDO 500 ENDDO 501 CALL DMUMPS_LRGEMM_SCALING(LRB2, Y, A, LA, POSELTD, 502 & LD_DIAG, IW2, POSELTT, NFRONT, BLOCK, 503 & MAXI_CLUSTER) 504 ENDIF 505 LDY = LRB2%K 506 Z => LRB2%Q 507 LDZ = LRB2%M 508 N_Z = LRB2%M 509 ENDIF 510 ENDIF 511 IF ((LRB1%LRFORM==0).AND.(LRB2%LRFORM==0)) THEN 512 IF (SYM .EQ. 0) THEN 513 X => LRB1%Q 514 ELSE 515 allocate(X(LRB1%M,LRB1%N),stat=allocok) 516 IF (allocok > 0) THEN 517 MREQ = LRB1%M*LRB1%N 518 GOTO 860 519 ENDIF 520 DO J=1,LRB1%N 521 DO I=1,LRB1%M 522 X(I,J) = LRB1%Q(I,J) 523 ENDDO 524 ENDDO 525 CALL DMUMPS_LRGEMM_SCALING(LRB1, X, A, LA, POSELTD, 526 & LD_DIAG, IW2, POSELTT, NFRONT, BLOCK, 527 & MAXI_CLUSTER) 528 ENDIF 529 SIDE = 'N' 530 LDX = LRB1%M 531 TRANSX = TRANSB1 532 Z => LRB2%Q 533 LDZ = LRB2%M 534 TRANSZ = TRANSB2 535 IF (TRANSB1 == 'N') THEN 536 M_X = LRB1%M 537 K_XY = LRB1%N 538 ELSE ! TRANSB1 == 'T' 539 M_X = LRB1%N 540 K_XY = LRB1%M 541 ENDIF 542 IF (TRANSB2 == 'N') THEN 543 N_Z = LRB2%N 544 ELSE ! TRANSB2 == 'T' 545 N_Z = LRB2%M 546 ENDIF 547 ENDIF 548 IF (SIDE == 'L') THEN ! LEFT: XY_YZ = X*Y; A = XY_YZ*Z 549 allocate(XY_YZ(M_X,K_YZ),stat=allocok) 550 IF (allocok > 0) THEN 551 MREQ = M_X*K_YZ 552 GOTO 860 553 ENDIF 554 CALL dgemm(TRANSX , TRANSY , M_X , K_YZ, K_XY, ONE, 555 & X(1,1), LDX, Y(1,1), LDY, ZERO, XY_YZ(1,1), M_X) 556 CALL SYSTEM_CLOCK(T1) 557 CALL dgemm('N', TRANSZ, M_X, N_Z, K_YZ, ALPHA, 558 & XY_YZ(1,1), M_X, Z(1,1), LDZ, BETA, A(POSELTT), 559 & NFRONT) 560 CALL SYSTEM_CLOCK(T2,CR) 561 LOC_UPDT_TIME_OUT = dble(T2-T1)/dble(CR) 562 CALL UPDATE_UPDT_TIME_OUT(LOC_UPDT_TIME_OUT) 563 deallocate(XY_YZ) 564 ELSEIF (SIDE == 'R') THEN ! RIGHT: XY_YZ = Y*Z; A = X*XY_YZ 565 allocate(XY_YZ(K_XY,N_Z),stat=allocok) 566 IF (allocok > 0) THEN 567 MREQ = K_XY*N_Z 568 GOTO 860 569 ENDIF 570 CALL dgemm(TRANSY , TRANSZ , K_XY , N_Z, K_YZ, ONE, 571 & Y(1,1), LDY, Z(1,1), LDZ, ZERO, XY_YZ(1,1), K_XY) 572 CALL SYSTEM_CLOCK(T1) 573 CALL dgemm(TRANSX, 'N', M_X, N_Z, K_XY, ALPHA, 574 & X(1,1), LDX, XY_YZ(1,1), K_XY, BETA, A(POSELTT), 575 & NFRONT) 576 CALL SYSTEM_CLOCK(T2,CR) 577 LOC_UPDT_TIME_OUT = dble(T2-T1)/dble(CR) 578 CALL UPDATE_UPDT_TIME_OUT(LOC_UPDT_TIME_OUT) 579 deallocate(XY_YZ) 580 ELSE ! SIDE == 'N' : NONE; A = X*Z 581 CALL dgemm(TRANSX, TRANSZ, M_X, N_Z, K_XY, ALPHA, 582 & X(1,1), LDX, Z(1,1), LDZ, BETA, A(POSELTT), 583 & NFRONT) 584 ENDIF 585 GOTO 870 586 860 CONTINUE 587C Alloc NOT ok!! 588 write(*,*) 'Allocation problem in BLR routine DMUMPS_LRGEMM3: ', 589 & 'not enough memory? memory requested = ' , MREQ 590 IFLAG = - 13 591 IERROR = MREQ 592 RETURN 593 870 CONTINUE 594C Alloc ok!! 595 IF ((LRB1%LRFORM==0).AND.(LRB2%LRFORM==0)) THEN 596 IF (SYM .NE. 0) deallocate(X) 597 ELSEIF ((LRB1%LRFORM==0).AND.(LRB2%LRFORM==1)) THEN 598 IF (SYM .NE. 0) deallocate(Y) 599 ELSEIF ((LRB1%LRFORM==1).AND.(LRB2%LRFORM==0)) THEN 600 IF (SYM .NE. 0) deallocate(Y) 601 ELSE ! 1 AND 1 602 IF ((TRANSB1=='N').AND.(SYM .NE. 0)) deallocate(Y1) 603 IF ((COMPRESS_MID_PRODUCT.GE.1).AND.BUILDQ) THEN 604 deallocate(XQ) 605 deallocate(R_Y) 606 ELSE 607 deallocate(Y) 608 ENDIF 609 ENDIF 610 700 CONTINUE 611 END SUBROUTINE DMUMPS_LRGEMM3 612 SUBROUTINE MAX_CLUSTER(CUT,CUT_SIZE,MAXI_CLUSTER) 613 INTEGER, INTENT(IN) :: CUT_SIZE 614 INTEGER, intent(out) :: MAXI_CLUSTER 615 INTEGER, POINTER, DIMENSION(:) :: CUT 616 INTEGER :: I 617 MAXI_CLUSTER = 0 618 DO I = 1, CUT_SIZE 619 IF (CUT(I+1) - CUT(I) .GE. MAXI_CLUSTER) THEN 620 MAXI_CLUSTER = CUT(I+1) - CUT(I) 621 END IF 622 END DO 623 END SUBROUTINE MAX_CLUSTER 624 END MODULE DMUMPS_LR_CORE 625 SUBROUTINE DMUMPS_TRUNCATED_RRQR( M, N, A, LDA, JPVT, TAU, WORK, 626 & LDW, RWORK, TOLEPS, RANK, MAXRANK, INFO) 627C This routine computes a Rank-Revealing QR factorization of a dense 628C matrix A. The factorization is truncated when the absolute value of 629C a diagonal coefficient of the R factor becomes smaller than a 630C prescribed threshold TOLEPS. The resulting partial Q and R factors 631C provide a rank-k approximation of the input matrix A with accuracy 632C TOLEPS. 633C 634C This routine is obtained by merging the LAPACK 635C (http://www.netlib.org/lapack/) CGEQP3 and CLAQPS routines and by 636C applying a minor modification to the outer factorization loop in 637C order to stop computations as soon as possible when the required 638C accuracy is reached. 639C 640C The authors of the LAPACK library are: 641C - Univ. of Tennessee 642C - Univ. of California Berkeley 643C - Univ. of Colorado Denver 644C - NAG Ltd. 645 IMPLICIT NONE 646 INTEGER :: INFO, LDA, LDW, M, N, RANK, MAXRANK 647 DOUBLE PRECISION :: TOLEPS 648 INTEGER :: JPVT(*) 649 DOUBLE PRECISION :: RWORK(*) 650 DOUBLE PRECISION :: A(LDA,*), TAU(*) 651 DOUBLE PRECISION :: WORK(LDW,*) 652 INTEGER, PARAMETER :: INB=1, INBMIN=2 653 INTEGER :: J, JB, MINMN, NB 654 INTEGER :: OFFSET, ITEMP 655 INTEGER :: LSTICC, PVT, K, RK 656 DOUBLE PRECISION :: TEMP, TEMP2, TOL3Z 657 DOUBLE PRECISION :: AKK 658 DOUBLE PRECISION, PARAMETER :: RZERO=0.0D+0, RONE=1.0D+0 659 DOUBLE PRECISION :: ZERO 660 DOUBLE PRECISION :: ONE 661 PARAMETER ( ONE = 1.0D+0 ) 662 PARAMETER ( ZERO = 0.0D+0 ) 663 DOUBLE PRECISION :: dlamch 664 INTEGER :: ilaenv, idamax 665 EXTERNAL :: idamax, dlamch 666 EXTERNAL dgeqrf, dormqr, xerbla 667 EXTERNAL ilaenv 668 EXTERNAL dgemm, dgemv, dlarfg, dswap 669 DOUBLE PRECISION, EXTERNAL :: dnrm2 670 INFO = 0 671 IF( M.LT.0 ) THEN 672 INFO = -1 673 ELSE IF( N.LT.0 ) THEN 674 INFO = -2 675 ELSE IF( LDA.LT.MAX( 1, M ) ) THEN 676 INFO = -4 677 END IF 678 IF( INFO.EQ.0 ) THEN 679 IF( LDW.LT.N ) THEN 680 INFO = -8 681 END IF 682 END IF 683 IF( INFO.NE.0 ) THEN 684 CALL XERBLA( 'CGEQP3', -INFO ) 685 RETURN 686 END IF 687 MINMN = MIN(M,N) 688 IF( MINMN.EQ.0 ) THEN 689 RETURN 690 END IF 691 NB = ILAENV( INB, 'CGEQRF', ' ', M, N, -1, -1 ) 692C 693C Avoid pointers (and TARGET attribute on RWORK/WORK) 694C because of implicit interface. An implicit interface 695C is needed to avoid intermediate array copies 696C VN1 => RWORK(1:N) 697C VN2 => RWORK(N+1:2*N) 698C AUXV => WORK(1:LDW,1:1) 699C F => WORK(1:LDW,2:NB+1) 700C LDF = LDW 701* Initialize partial column norms. The first N elements of work 702* store the exact column norms. 703 DO J = 1, N 704C VN1( J ) = dnrm2( M, A( 1, J ), 1 ) 705 RWORK( J ) = dnrm2( M, A( 1, J ), 1 ) 706C VN2( J ) = VN1( J ) 707 RWORK( N + J ) = RWORK( J ) 708 JPVT(J) = J 709 END DO 710 OFFSET = 0 711 TOL3Z = SQRT(dlamch('Epsilon')) 712 DO 713 JB = MIN(NB,MINMN-OFFSET) 714 LSTICC = 0 715 K = 0 716 DO 717 IF(K.EQ.JB) EXIT 718 K = K+1 719 RK = OFFSET+K 720C PVT = ( RK-1 ) + IDAMAX( N-RK+1, VN1( RK ), 1 ) 721 PVT = ( RK-1 ) + IDAMAX( N-RK+1, RWORK( RK ), 1 ) 722C IF(VN1(PVT).LT.TOLEPS) THEN 723 IF(RWORK(PVT).LT.TOLEPS) THEN 724 RANK = RK-1 725 RETURN 726 END IF 727 IF (RK.GT.MAXRANK) THEN 728 RANK = RK 729 INFO = RK 730 RETURN 731 END IF 732 IF( PVT.NE.RK ) THEN 733 CALL dswap( M, A( 1, PVT ), 1, A( 1, RK ), 1 ) 734c CALL dswap( K-1, F( PVT-OFFSET, 1 ), LDF, 735c & F( K, 1 ), LDF ) 736 CALL dswap( K-1, WORK( PVT-OFFSET, 2 ), LDW, 737 & WORK( K, 2 ), LDW ) 738 ITEMP = JPVT(PVT) 739 JPVT(PVT) = JPVT(RK) 740 JPVT(RK) = ITEMP 741C VN1(PVT) = VN1(RK) 742C VN2(PVT) = VN2(RK) 743 RWORK(PVT) = RWORK(RK) 744 RWORK(N+PVT) = RWORK(N+RK) 745 END IF 746* Apply previous Householder reflectors to column K: 747* A(RK:M,RK) := A(RK:M,RK) - A(RK:M,OFFSET+1:RK-1)*F(K,1:K-1)**H. 748 IF( K.GT.1 ) THEN 749 CALL dgemv( 'No transpose', M-RK+1, K-1, -ONE, 750C & A(RK,OFFSET+1), LDA, F(K,1), LDF, 751 & A(RK,OFFSET+1), LDA, WORK(K,2), LDW, 752 & ONE, A(RK,RK), 1 ) 753 END IF 754* Generate elementary reflector H(k). 755 IF( RK.LT.M ) THEN 756 CALL dlarfg( M-RK+1, A(RK,RK), A(RK+1,RK), 1, TAU(RK) ) 757 ELSE 758 CALL dlarfg( 1, A(RK,RK), A(RK,RK), 1, TAU(RK) ) 759 END IF 760 AKK = A(RK,RK) 761 A(RK,RK) = ONE 762* Compute Kth column of F: 763* F(K+1:N,K) := tau(K)*A(RK:M,K+1:N)**H*A(RK:M,K). 764 IF( RK.LT.N ) THEN 765 CALL dgemv( 'Transpose', M-RK+1, N-RK, TAU(RK), 766 & A(RK,RK+1), LDA, A(RK,RK), 1, ZERO, 767C & F( K+1, K ), 1 ) 768 & WORK( K+1, K+1 ), 1 ) 769 END IF 770* Padding F(1:K,K) with zeros. 771 DO J = 1, K 772C F( J, K ) = ZERO 773 WORK( J, K+1 ) = ZERO 774 END DO 775* Incremental updating of F: 776* F(1:N,K) := F(1:N-OFFSET,K) - tau(RK)*F(1:N,1:K-1)*A(RK:M,OFFSET+1:RK-1)**H*A(RK:M,RK). 777 IF( K.GT.1 ) THEN 778 CALL dgemv( 'Transpose', M-RK+1, K-1, -TAU(RK), 779 & A(RK,OFFSET+1), LDA, A(RK,RK), 1, ZERO, 780 & WORK(1,1), 1 ) 781C & AUXV(1,1), 1 ) 782 CALL dgemv( 'No transpose', N-OFFSET, K-1, ONE, 783 & WORK(1,2), LDW, WORK(1,1), 1, ONE, WORK(1,K+1), 1 ) 784C & F(1,1), LDF, AUXV(1,1), 1, ONE, F(1,K), 1 ) 785 END IF 786* Update the current row of A: 787* A(RK,RK+1:N) := A(RK,RK+1:N) - A(RK,OFFSET+1:RK)*F(K+1:N,1:K)**H. 788 IF( RK.LT.N ) THEN 789C CALL dgemv( 'No Transpose', N-RK, K, -ONE, F( K+1, 1 ), 790 CALL dgemv( 'No Transpose', N-RK, K, -ONE, WORK( K+1,2 ), 791 & LDW, 792 & A( RK, OFFSET+1 ), LDA, ONE, A( RK, RK+1 ), LDA ) 793 END IF 794* Update partial column norms. 795* 796 IF( RK.LT.MINMN ) THEN 797 DO J = RK + 1, N 798C IF( VN1( J ).NE.RZERO ) THEN 799 IF( RWORK( J ).NE.RZERO ) THEN 800* 801* NOTE: The following 4 lines follow from the analysis in 802* Lapack Working Note 176. 803* 804C TEMP = ABS( A( RK, J ) ) / VN1( J ) 805 TEMP = ABS( A( RK, J ) ) / RWORK( J ) 806 TEMP = MAX( RZERO, ( RONE+TEMP )*( RONE-TEMP ) ) 807C TEMP2 = TEMP*( VN1( J ) / VN2( J ) )**2 808 TEMP2 = TEMP*( RWORK( J ) / RWORK( N+J ) )**2 809 IF( TEMP2 .LE. TOL3Z ) THEN 810C VN2( J ) = dble( LSTICC ) 811 RWORK( N+J ) = dble( LSTICC ) 812 LSTICC = J 813 ELSE 814C VN1( J ) = VN1( J )*SQRT( TEMP ) 815 RWORK( J ) = RWORK( J )*SQRT( TEMP ) 816 END IF 817 END IF 818 END DO 819 END IF 820 A( RK, RK ) = AKK 821 IF (LSTICC.NE.0) EXIT 822 END DO 823* Apply the block reflector to the rest of the matrix: 824* A(RK+1:M,RK+1:N) := A(RK+1:M,RK+1:N) - 825* A(RK+1:M,OFFSET+1:RK)*F(K+1:N-OFFSET,1:K)**H. 826 IF( RK.LT.MIN(N,M) ) THEN 827 CALL dgemm( 'No transpose', 'Transpose', M-RK, 828 & N-RK, K, -ONE, A(RK+1,OFFSET+1), LDA, 829C & F(K+1,1), LDF, ONE, A(RK+1,RK+1), LDA ) 830 & WORK(K+1,2), LDW, ONE, A(RK+1,RK+1), LDA ) 831 END IF 832* Recomputation of difficult columns. 833 DO WHILE( LSTICC.GT.0 ) 834C ITEMP = NINT( VN2( LSTICC ) ) 835 ITEMP = NINT( RWORK( N + LSTICC ) ) 836C VN1( LSTICC ) = dnrm2( M-RK, A( RK+1, LSTICC ), 1 ) 837 RWORK( LSTICC ) = dnrm2( M-RK, A( RK+1, LSTICC ), 1 ) 838* 839* NOTE: The computation of RWORK( LSTICC ) relies on the fact that 840* SNRM2 does not fail on vectors with norm below the value of 841* SQRT(DLAMCH('S')) 842* 843C VN2( LSTICC ) = VN1( LSTICC ) 844 RWORK( N + LSTICC ) = RWORK( LSTICC ) 845 LSTICC = ITEMP 846 END DO 847 IF(RK.GE.MINMN) EXIT 848 OFFSET = RK 849 END DO 850 RANK = RK 851 END SUBROUTINE DMUMPS_TRUNCATED_RRQR 852