1C 2C This file is part of MUMPS 5.1.2, released 3C on Mon Oct 2 07:37:01 UTC 2017 4C 5C 6C Copyright 1991-2017 CERFACS, CNRS, ENS Lyon, INP Toulouse, Inria, 7C University of Bordeaux. 8C 9C This version of MUMPS is provided to you free of charge. It is 10C released under the CeCILL-C license: 11C http://www.cecill.info/licences/Licence_CeCILL-C_V1-en.html 12C 13 SUBROUTINE DMUMPS_FAC_DRIVER( id) 14 USE DMUMPS_BUF 15 USE DMUMPS_LOAD 16 USE DMUMPS_OOC 17 USE DMUMPS_STRUC_DEF 18 USE DMUMPS_LR_CORE 19 USE DMUMPS_LR_STATS 20 USE DMUMPS_LR_DATA_M, only: DMUMPS_BLR_INIT_MODULE, 21 & DMUMPS_BLR_END_MODULE 22 USE MUMPS_FRONT_DATA_MGT_M 23#if ! defined(NO_FDM_DESCBAND) 24 USE MUMPS_FAC_DESCBAND_DATA_M 25#endif 26#if ! defined(NO_FDM_MAPROW) 27 USE MUMPS_FAC_MAPROW_DATA_M 28#endif 29 IMPLICIT NONE 30C 31C Purpose 32C ======= 33C 34C Performs scaling, sorting in arrowhead, then 35C distributes the matrix, and perform 36C factorization. 37C 38C 39 INTERFACE 40C Explicit interface needed because 41C of "id" derived datatype argument 42 SUBROUTINE DMUMPS_ANORMINF(id, ANORMINF, LSCAL) 43 USE DMUMPS_STRUC_DEF 44 TYPE (DMUMPS_STRUC), TARGET :: id 45 DOUBLE PRECISION, INTENT(OUT) :: ANORMINF 46 LOGICAL :: LSCAL 47 END SUBROUTINE DMUMPS_ANORMINF 48C 49 END INTERFACE 50C 51C Parameters 52C ========== 53C 54 TYPE(DMUMPS_STRUC), TARGET :: id 55C 56C MPI 57C === 58C 59 INCLUDE 'mpif.h' 60 INCLUDE 'mumps_tags.h' 61 INTEGER :: STATUS(MPI_STATUS_SIZE) 62 INTEGER :: IERR 63 INTEGER, PARAMETER :: MASTER = 0 64C 65C Local variables 66C =============== 67C 68 INCLUDE 'mumps_headers.h' 69 INTEGER(8) :: NSEND8, NSEND_TOT8 70 INTEGER(8) :: NLOCAL8, NLOCAL_TOT8 71 INTEGER :: LDPTRAR, NELT_arg, NBRECORDS 72 INTEGER :: ITMP 73 INTEGER(8) ::KEEP826_SAVE 74 INTEGER(8) K67 75 INTEGER(8) K68,K69 76 INTEGER(8) ITMP8 77 INTEGER MUMPS_PROCNODE 78 EXTERNAL MUMPS_PROCNODE 79 INTEGER MP, LP, MPG, allocok 80 LOGICAL PROK, PROKG, LSCAL, LPOK, COMPUTE_ANORMINF 81 INTEGER DMUMPS_LBUF, DMUMPS_LBUFR_BYTES, DMUMPS_LBUF_INT 82 INTEGER(8) DMUMPS_LBUFR_BYTES8, DMUMPS_LBUF8 83 INTEGER PTRIST, PTRWB, MAXELT_SIZE, 84 & ITLOC, IPOOL, NSTEPS, K28, LPOOL, LIW 85 INTEGER IRANK, ID_ROOT 86 INTEGER KKKK 87 INTEGER(8) :: NZ_locMAX8 88 INTEGER(8) MEMORY_MD_ARG 89 INTEGER(8) MAXS_BASE8, MAXS_BASE_RELAXED8 90 DOUBLE PRECISION CNTL4 91 INTEGER MIN_PERLU, MAXIS_ESTIM 92 INTEGER MAXIS 93 INTEGER(8) :: MAXS 94 DOUBLE PRECISION TIME, TIMEET 95 DOUBLE PRECISION ZERO, ONE, MONE 96 PARAMETER( ZERO = 0.0D0, ONE = 1.0D0, MONE = -1.0D0) 97 DOUBLE PRECISION CZERO 98 PARAMETER( CZERO = 0.0D0 ) 99 INTEGER PERLU, TOTAL_MBYTES, K231, K232, K233 100 INTEGER COLOUR, COMM_FOR_SCALING ! For Simultaneous scaling 101 INTEGER LIWK, LWK_REAL 102 INTEGER(8) :: LWK 103C SLAVE: used to determine if proc has the role of a slave 104 LOGICAL I_AM_SLAVE, PERLU_ON, WK_USER_PROVIDED 105C WK_USER_PROVIDED is set to true when workspace WK_USER is provided by user 106 DOUBLE PRECISION :: ANORMINF, SEUIL, SEUIL_LDLT_NIV2 107 DOUBLE PRECISION :: CNTL1, CNTL3, CNTL5, CNTL6, EPS 108 INTEGER N, LPN_LIST,POSBUF 109 INTEGER, DIMENSION (:), ALLOCATABLE :: ITMP2 110 INTEGER I,K 111 INTEGER FRONTWISE 112C temporary variable for collecting stats from all processors 113 DOUBLE PRECISION :: TMP_GLOBAL_BLR_SAVINGS 114 DOUBLE PRECISION :: TMP_ACC_FR_MRY 115 DOUBLE PRECISION :: TMP_ACC_LR_FLOP_GAIN 116 DOUBLE PRECISION :: TMP_ACC_FLOP_TRSM 117 DOUBLE PRECISION :: TMP_ACC_FLOP_PANEL 118 DOUBLE PRECISION :: TMP_ACC_FLOP_FRFRONTS 119 DOUBLE PRECISION :: TMP_ACC_FLOP_LR_TRSM 120 DOUBLE PRECISION :: TMP_ACC_FLOP_FR_TRSM 121 DOUBLE PRECISION :: TMP_ACC_FLOP_LR_UPDT 122 DOUBLE PRECISION :: TMP_ACC_FLOP_LR_UPDT_OUT 123 DOUBLE PRECISION :: TMP_ACC_FLOP_RMB 124 DOUBLE PRECISION :: TMP_ACC_FLOP_DEC_ACC 125 DOUBLE PRECISION :: TMP_ACC_FLOP_REC_ACC 126 DOUBLE PRECISION :: TMP_ACC_FLOP_FR_UPDT 127 DOUBLE PRECISION :: TMP_ACC_FLOP_DEMOTE 128 DOUBLE PRECISION :: TMP_ACC_FLOP_CB_DEMOTE 129 DOUBLE PRECISION :: TMP_ACC_FLOP_CB_PROMOTE 130 DOUBLE PRECISION :: TMP_ACC_FLOP_FR_FACTO 131 INTEGER :: TMP_CNT_NODES 132 DOUBLE PRECISION :: TMP_ACC_UPDT_TIME 133 DOUBLE PRECISION :: TMP_ACC_DEMOTING_TIME 134 DOUBLE PRECISION :: TMP_ACC_CB_DEMOTING_TIME 135 DOUBLE PRECISION :: TMP_ACC_PROMOTING_TIME 136 DOUBLE PRECISION :: TMP_ACC_FRPANELS_TIME 137 DOUBLE PRECISION :: TMP_ACC_FAC_I_TIME 138 DOUBLE PRECISION :: TMP_ACC_FAC_MQ_TIME 139 DOUBLE PRECISION :: TMP_ACC_FAC_SQ_TIME 140 DOUBLE PRECISION :: TMP_ACC_TRSM_TIME 141 DOUBLE PRECISION :: TMP_ACC_FRFRONTS_TIME 142 DOUBLE PRECISION :: TMP_ACC_LR_MODULE_TIME 143C 144C Workspace. 145C 146 INTEGER, DIMENSION(:), ALLOCATABLE :: IWK 147 DOUBLE PRECISION, DIMENSION(:), ALLOCATABLE :: WK 148 DOUBLE PRECISION, DIMENSION(:), ALLOCATABLE :: WK_REAL 149 INTEGER(8), DIMENSION(:), ALLOCATABLE :: IWK8 150 INTEGER, DIMENSION(:), ALLOCATABLE :: BURP 151 INTEGER, DIMENSION(:), ALLOCATABLE :: BUCP 152 INTEGER, DIMENSION(:), ALLOCATABLE :: BURS 153 INTEGER, DIMENSION(:), ALLOCATABLE :: BUCS 154 INTEGER BUREGISTRE(12) 155 INTEGER BUINTSZ, BURESZ, BUJOB 156 INTEGER BUMAXMN, M, SCMYID, SCNPROCS 157 DOUBLE PRECISION SCONEERR, SCINFERR 158C 159C Parameters arising from the structure 160C ===================================== 161C 162 INTEGER, POINTER :: JOB 163* Control parameters: see description in DMUMPSID 164 DOUBLE PRECISION,DIMENSION(:),POINTER::RINFO, RINFOG 165 DOUBLE PRECISION,DIMENSION(:),POINTER:: CNTL 166 INTEGER,DIMENSION(:),POINTER:: INFOG, KEEP 167 INTEGER, DIMENSION(:), POINTER :: MYIRN_loc, MYJCN_loc 168 DOUBLE PRECISION, DIMENSION(:), POINTER :: MYA_loc 169 INTEGER, TARGET :: DUMMYIRN_loc(1), DUMMYJCN_loc(1) 170 DOUBLE PRECISION, TARGET :: DUMMYA_loc(1) 171 INTEGER,DIMENSION(:),POINTER::ICNTL 172 EXTERNAL MUMPS_GET_POOL_LENGTH 173 INTEGER MUMPS_GET_POOL_LENGTH 174 INTEGER(8) TOTAL_BYTES 175 INTEGER(8) :: I8TMP 176C 177C External references 178C =================== 179 INTEGER numroc 180 EXTERNAL numroc 181C Fwd in facto: 182 DOUBLE PRECISION, DIMENSION(:), POINTER :: RHS_MUMPS 183 LOGICAL :: RHS_MUMPS_ALLOCATED 184 INTEGER :: NB_ACTIVE_FRONTS_ESTIM 185C 186C 187 JOB=>id%JOB 188 RINFO=>id%RINFO 189 RINFOG=>id%RINFOG 190 CNTL=>id%CNTL 191 INFOG=>id%INFOG 192 KEEP=>id%KEEP 193 ICNTL=>id%ICNTL 194 IF (id%KEEP8(29) .NE. 0) THEN 195 MYIRN_loc=>id%IRN_loc 196 MYJCN_loc=>id%JCN_loc 197 MYA_loc=>id%A_loc 198 ELSE 199 MYIRN_loc=>DUMMYIRN_loc 200 MYJCN_loc=>DUMMYJCN_loc 201 MYA_loc=>DUMMYA_loc 202 ENDIF 203 N = id%N 204 EPS = epsilon ( ZERO ) 205C TIMINGS: reset to 0 206 id%DKEEP(92)=0.0D0 207 id%DKEEP(93)=0.0D0 208 id%DKEEP(94)=0.0D0 209 id%DKEEP(97)=0.0D0 210 id%DKEEP(98)=0.0D0 211 id%DKEEP(56)=0.0D0 212C Count of MPI messages: reset to 0 213 id%KEEP(266)=0 214 id%KEEP(267)=0 215C 216C BEGIN CASE OF ALLOCATED DATA FROM PREVIOUS CALLS 217 IF (associated(id%RHSCOMP)) THEN 218 DEALLOCATE(id%RHSCOMP) 219 NULLIFY(id%RHSCOMP) 220 id%KEEP8(25)=0_8 221 ENDIF 222 IF (associated(id%POSINRHSCOMP_ROW)) THEN 223 DEALLOCATE(id%POSINRHSCOMP_ROW) 224 NULLIFY(id%POSINRHSCOMP_ROW) 225 ENDIF 226 IF (id%POSINRHSCOMP_COL_ALLOC) THEN 227 DEALLOCATE(id%POSINRHSCOMP_COL) 228 NULLIFY(id%POSINRHSCOMP_COL) 229 id%POSINRHSCOMP_COL_ALLOC = .FALSE. 230 ENDIF 231C END CASE OF ALLOCATED DATA FROM PREVIOUS CALLS 232C 233C Related to forward in facto functionality (referred to as "Fwd in facto") 234 NULLIFY(RHS_MUMPS) 235 RHS_MUMPS_ALLOCATED = .FALSE. 236C ----------------------------------------------------------------------- 237C Set WK_USER_PROVIDED to true when workspace WK_USER is provided by user 238C We can accept WK_USER to be provided on only one proc and 239C different values of WK_USER per processor 240C 241 IF (id%KEEP8(24).GT.0_8) THEN 242C We nullify S so that later when we test 243C if (associated(S) we can free space and reallocate it). 244 NULLIFY(id%S) 245 ENDIF 246C 247C -- KEEP8(24) can now then be reset safely 248 WK_USER_PROVIDED = (id%LWK_USER.NE.0) 249 IF (WK_USER_PROVIDED) THEN 250 IF (id%LWK_USER.GT.0) THEN 251 id%KEEP8(24) = int(id%LWK_USER,8) 252 ELSE 253 iD%KEEP8(24) = -int(id%LWK_USER,8)* 1000000_8 254 ENDIF 255 ELSE 256 id%KEEP8(24) = 0_8 257 ENDIF 258C 259C KEEP8(26) might be modified 260C (element entry format) 261C but need be restore for 262C future factorisation 263C with different scaling option 264C 265 KEEP826_SAVE = id%KEEP8(26) 266C In case of loop on factorization with 267C different scaling options, initialize 268C DKEEP(4:5) to 0. 269 id%DKEEP(4)=-1.0D0 270 id%DKEEP(5)=-1.0D0 271C Mapping information used during solve. In case of several facto+solve 272C it has to be recomputed. In case of several solves with the same 273C facto, it is not recomputed. 274 IF (associated(id%IPTR_WORKING)) THEN 275 DEALLOCATE(id%IPTR_WORKING) 276 NULLIFY(id%IPTR_WORKING) 277 END IF 278 IF (associated(id%WORKING)) THEN 279 DEALLOCATE(id%WORKING) 280 NULLIFY(id%WORKING) 281 END IF 282C 283C Units for printing 284C MP: diagnostics 285C LP: errors 286C 287 LP = ICNTL( 1 ) 288 MP = ICNTL( 2 ) 289 MPG = ICNTL( 3 ) 290 LPOK = ((LP.GT.0).AND.(id%ICNTL(4).GE.1)) 291 PROK = ((MP.GT.0).AND.(id%ICNTL(4).GE.2)) 292 PROKG = ( MPG .GT. 0 .and. id%MYID .eq. MASTER ) 293 PROKG = (PROKG.AND.(id%ICNTL(4).GE.2)) 294 IF ( PROK ) WRITE( MP, 130 ) 295 IF ( PROKG ) WRITE( MPG, 130 ) 296 IF ( PROKG .and. KEEP(53).GT.0 ) THEN 297 WRITE(MPG,'(/A,I3)') ' Null space option :', KEEP(19) 298 IF ( KEEP(21) .ne. N ) THEN 299 WRITE( MPG, '(A,I10)') ' Max deficiency : ', KEEP(21) 300 END IF 301 IF ( KEEP(22) .ne. 0 ) THEN 302 WRITE( MPG, '(A,I10)') ' Min deficiency : ', KEEP(22) 303 END IF 304 END IF 305C ------------------------------------- 306C Depending on the type of parallelism, 307C the master can now (soon) potentially 308C have the role of a slave 309C ------------------------------------- 310 I_AM_SLAVE = ( id%MYID .ne. MASTER .OR. 311 & ( id%MYID .eq. MASTER .AND. 312 & KEEP(46) .eq. 1 ) ) 313C 314C Prepare work for out-of-core 315C 316 IF (id%MYID .EQ. MASTER .AND. KEEP(201) .NE. -1) THEN 317C Note that if KEEP(201)=-1, then we have decided 318C at analysis phase that factors will not be stored 319C (neither in memory nor on disk). In that case, 320C ICNTL(22) is ignored. 321C -- ICNTL(22) must be set before facto phase 322C (=1 OOC on; =0 OOC off) 323C and cannot be changed for subsequent solve phases. 324 KEEP(201)=id%ICNTL(22) 325 IF (KEEP(201) .NE. 0) THEN !Later: .GT. to allow ICNTL(22)=-1 326# if defined(OLD_OOC_NOPANEL) 327 KEEP(201)=2 328# else 329 KEEP(201)=1 330# endif 331 ENDIF 332 ENDIF 333 IF (id%MYID .EQ. MASTER) THEN 334 IF (id%KEEP(480).NE.0) THEN 335 id%KEEP(480) = 0 336 IF (PROK) 337 & write(MP,'(A)') 338 & ' MUMPS is not compiled with -DBLR_LUA ', 339 & ' => Resetting KEEP(480) to 0' 340 ENDIF 341 IF (id%KEEP(475).NE.0) THEN 342 id%KEEP(475) = 0 343 IF (PROK) 344 & write(MP,'(A)') 345 & ' MUMPS is not compiled with -DLRTRSM ', 346 & ' => Resetting KEEP(475) to 0' 347 ENDIF 348 ENDIF 349C ---------------------- 350C Broadcast KEEP options 351C defined for facto: 352C ---------------------- 353 CALL MPI_BCAST( KEEP(12), 1, MPI_INTEGER, 354 & MASTER, id%COMM, IERR ) 355 CALL MPI_BCAST( KEEP(19), 1, MPI_INTEGER, 356 & MASTER, id%COMM, IERR ) 357 CALL MPI_BCAST( KEEP(21), 1, MPI_INTEGER, 358 & MASTER, id%COMM, IERR ) 359 CALL MPI_BCAST( KEEP(201), 1, MPI_INTEGER, 360 & MASTER, id%COMM, IERR ) 361 IF (id%MYID.EQ.MASTER) THEN 362 IF ((KEEP(486).NE.0).AND.(KEEP(494).EQ.0)) THEN 363 IF (PROKG) THEN 364 WRITE(MPG,'(A)') 365 & " Internal ERROR with BLR setting " 366 WRITE(MPG,'(A)') " BLR was not activated during ", 367 & " analysis and is requested during factorization. " 368 id%INFO(1)=-900 369 ENDIF 370 ENDIF 371 ENDIF 372 CALL MPI_BCAST( KEEP(470), 23, MPI_INTEGER, 373 & MASTER, id%COMM, IERR ) 374 IF (id%MYID.EQ.MASTER) THEN 375 IF (KEEP(217).GT.2.OR.KEEP(217).LT.0) THEN 376 KEEP(217)=0 377 ENDIF 378 KEEP(214)=KEEP(217) 379 IF (KEEP(214).EQ.0) THEN 380 IF (KEEP(201).NE.0) THEN ! OOC or no factors 381 KEEP(214)=1 382 ELSE 383 KEEP(214)=2 384 ENDIF 385 ENDIF 386 ENDIF 387 CALL MPI_BCAST( KEEP(214), 1, MPI_INTEGER, 388 & MASTER, id%COMM, IERR ) 389 IF (KEEP(201).NE.0) THEN 390C -- Low Level I/O strategy 391 CALL MPI_BCAST( KEEP(99), 1, MPI_INTEGER, 392 & MASTER, id%COMM, IERR ) 393 CALL MPI_BCAST( KEEP(205), 1, MPI_INTEGER, 394 & MASTER, id%COMM, IERR ) 395 CALL MPI_BCAST( KEEP(211), 1, MPI_INTEGER, 396 & MASTER, id%COMM, IERR ) 397 ENDIF 398C 399C 400C KEEP(50) case 401C ============== 402C 403C KEEP(50) = 0 : matrix is unsymmetric 404C KEEP(50) /= 0 : matrix is symmetric 405C KEEP(50) = 1 : Ask L L^T on the root. Matrix is PSD. 406C KEEP(50) = 2 : Ask for L U on the root 407C KEEP(50) = 3 ... L D L^T ?? 408C 409C --------------------------------------- 410C For symmetric (non general) matrices 411C set (directly) CNTL(1) = 0.0 412C --------------------------------------- 413 IF ( KEEP(50) .eq. 1 ) THEN 414 IF (id%CNTL(1) .ne. ZERO ) THEN 415 IF ( PROKG ) THEN 416 WRITE(MPG,'(A)') 417 &' ** Warning : SPD solver called, resetting CNTL(1) to 0.0D0' 418 END IF 419 END IF 420 id%CNTL(1) = ZERO 421 END IF 422C Fwd in facto: explicitly forbid 423C sparse RHS and A-1 computation 424 IF (id%KEEP(252).EQ.1 .AND. id%MYID.EQ.MASTER) THEN 425 IF (id%ICNTL(20).EQ.1) THEN ! out-of-range => 0 426C NB: in doc ICNTL(20) only accessed during solve 427C In practice, will have failed earlier if RHS not allocated. 428C Still it looks safer to keep this test. 429 id%INFO(1)=-43 430 id%INFO(2)=20 431 IF (PROKG) WRITE(MPG,'(A)') 432 & ' ERROR: Sparse RHS is incompatible with forward', 433 & ' performed during factorization (ICNTL(32)=1)' 434 ELSE IF (id%ICNTL(30).NE.0) THEN ! out-of-range => 1 435 id%INFO(1)=-43 436 id%INFO(2)=30 437 IF (PROKG) WRITE(MPG,'(A)') 438 & ' ERROR: A-1 functionality incompatible with forward', 439 & ' performed during factorization (ICNTL(32)=1)' 440 ELSE IF (id%ICNTL(9) .NE. 1) THEN 441 id%INFO(1)=-43 442 id%INFO(2)=9 443 IF (PROKG) WRITE(MPG,'(A)') 444 & ' ERROR: Transpose system (ICNTL(9).NE.0) not ', 445 & ' compatible with forward performed during', 446 & ' factorization (ICNTL(32)=1)' 447 ENDIF 448 ENDIF 449 CALL MUMPS_PROPINFO( id%ICNTL(1), id%INFO(1), 450 & id%COMM, id%MYID ) 451C 452 IF (id%INFO(1).LT.0) GOTO 530 453 IF ( PROKG ) THEN 454 WRITE( MPG, 172 ) id%NSLAVES, id%ICNTL(22), 455 & id%KEEP8(111), KEEP(126), KEEP(127), KEEP(28), 456 & id%KEEP8(4)/1000000_8, id%CNTL(1) 457 IF (KEEP(252).GT.0) 458 & WRITE(MPG,173) KEEP(253) 459 ENDIF 460 IF (KEEP(201).LE.0) THEN 461C In-core version or no factors 462 KEEP(IXSZ)=XSIZE_IC 463 ELSE IF (KEEP(201).EQ.2) THEN 464C OOC version, no panels 465 KEEP(IXSZ)=XSIZE_OOC_NOPANEL 466 ELSE IF (KEEP(201).EQ.1) THEN 467C Panel versions: 468 IF (KEEP(50).EQ.0) THEN 469 KEEP(IXSZ)=XSIZE_OOC_UNSYM 470 ELSE 471 KEEP(IXSZ)=XSIZE_OOC_SYM 472 ENDIF 473 ENDIF 474 IF ( KEEP(486) .NE. 0 ) THEN !LR is activated 475C Stats initialization for LR 476 CALL INIT_STATS_GLOBAL(id) 477 END IF 478C 479* ********************************** 480* Begin intializations regarding the 481* computation of the determinant 482* ********************************** 483 IF (id%MYID.EQ.MASTER) KEEP(258)=ICNTL(33) 484 CALL MPI_BCAST(KEEP(258), 1, MPI_INTEGER, 485 & MASTER, id%COMM, IERR) 486 IF (KEEP(258) .NE. 0) THEN 487 KEEP(259) = 0 ! Initial exponent of the local determinant 488 KEEP(260) = 1 ! Number of permutations 489 id%DKEEP(6) = 1.0D0 ! real part of the local determinant 490 ENDIF 491* ******************************** 492* End intializations regarding the 493* computation of the determinant 494* ******************************** 495C 496* ********************** 497* Begin of Scaling phase 498* ********************** 499C 500C SCALING MANAGEMENT 501C * Options 1, 3, 4 centralized only 502C 503C * Options 7, 8 : also works for distributed matrix 504C 505C At this point, we have the scaling arrays allocated 506C on the master. They have been allocated on the master 507C inside the main MUMPS driver. 508C 509 CALL MPI_BCAST(KEEP(52), 1, MPI_INTEGER, 510 & MASTER, id%COMM, IERR) 511 LSCAL = ((KEEP(52) .GT. 0) .AND. (KEEP(52) .LE. 8)) 512 IF (LSCAL) THEN 513C 514 IF ( id%MYID.EQ.MASTER ) THEN 515 CALL MUMPS_SECDEB(TIMEET) 516 ENDIF 517C ----------------------- 518C Retrieve parameters for 519C simultaneous scaling 520C ----------------------- 521 IF (KEEP(52) .EQ. 7) THEN 522C -- Cheap setting of SIMSCALING (it is the default in 4.8.4) 523 K231= KEEP(231) 524 K232= KEEP(232) 525 K233= KEEP(233) 526 ELSEIF (KEEP(52) .EQ. 8) THEN 527C -- More expensive setting of SIMSCALING (it was the default in 4.8.1,2,3) 528 K231= KEEP(239) 529 K232= KEEP(240) 530 K233= KEEP(241) 531 ENDIF 532 CALL MPI_BCAST(id%DKEEP(3),1,MPI_DOUBLE_PRECISION,MASTER, 533 & id%COMM,IERR) 534C 535 IF ( ((KEEP(52).EQ.7).OR.(KEEP(52).EQ.8)) .AND. 536 & KEEP(54).NE.0 ) THEN 537C ------------------------------ 538C Scaling for distributed matrix 539C We need to allocate scaling 540C arrays on all processors, not 541C only the master. 542C ------------------------------ 543 IF ( id%MYID .NE. MASTER ) THEN 544 IF ( associated(id%COLSCA)) 545 & DEALLOCATE( id%COLSCA ) 546 IF ( associated(id%ROWSCA)) 547 & DEALLOCATE( id%ROWSCA ) 548 ALLOCATE( id%COLSCA(N), stat=IERR) 549 IF (IERR .GT.0) THEN 550 id%INFO(1)=-13 551 id%INFO(2)=N 552 ENDIF 553 ALLOCATE( id%ROWSCA(N), stat=IERR) 554 IF (IERR .GT.0) THEN 555 id%INFO(1)=-13 556 id%INFO(2)=N 557 ENDIF 558 ENDIF 559 M = N 560 BUMAXMN=M 561 IF(N > BUMAXMN) BUMAXMN = N 562 LIWK = 4*BUMAXMN 563 ALLOCATE (IWK(LIWK),BURP(M),BUCP(N), 564 & BURS(2* (id%NPROCS)),BUCS(2* (id%NPROCS)), 565 & stat=allocok) 566 IF (allocok > 0) THEN 567 id%INFO(1)=-13 568 id%INFO(2)=LIWK+M+N+4* (id%NPROCS) 569 ENDIF 570C --- Propagate enventual error 571 CALL MUMPS_PROPINFO( ICNTL(1), id%INFO(1), 572 & id%COMM, id%MYID ) 573 IF (id%INFO(1).LT.0) GOTO 530 574C -- estimation of memory and construction of partvecs 575 BUJOB = 1 576C -- LWK not used 577 LWK_REAL = 1 578 ALLOCATE(WK_REAL(LWK_REAL)) 579 CALL DMUMPS_SIMSCALEABS( 580 & MYIRN_loc(1), MYJCN_loc(1), MYA_loc(1), 581 & id%KEEP8(29), 582 & M, N, id%NPROCS, id%MYID, id%COMM, 583 & BURP, BUCP, 584 & BURS, BUCS, BUREGISTRE, 585 & IWK, LIWK, 586 & BUINTSZ, BURESZ, BUJOB, 587 & id%ROWSCA(1), id%COLSCA(1), WK_REAL, LWK_REAL, 588 & id%KEEP(50), 589 & K231, K232, K233, 590 & id%DKEEP(3), 591 & SCONEERR, SCINFERR) 592 IF(LIWK < BUINTSZ) THEN 593 DEALLOCATE(IWK) 594 LIWK = BUINTSZ 595 ALLOCATE(IWK(LIWK), stat=allocok) 596 IF (allocok > 0) THEN 597 id%INFO(1)=-13 598 id%INFO(2)=LIWK 599 ENDIF 600 ENDIF 601 LWK_REAL = BURESZ 602 DEALLOCATE(WK_REAL) 603 ALLOCATE (WK_REAL(LWK_REAL), stat=allocok) 604 IF (allocok > 0) THEN 605 id%INFO(1)=-13 606 id%INFO(2)=LWK_REAL 607 ENDIF 608C --- Propagate enventual error 609 CALL MUMPS_PROPINFO( ICNTL(1), id%INFO(1), 610 & id%COMM, id%MYID ) 611 IF (id%INFO(1).LT.0) GOTO 530 612C -- estimation of memory and construction of partvecs 613 BUJOB = 2 614 CALL DMUMPS_SIMSCALEABS( 615 & MYIRN_loc(1), MYJCN_loc(1), MYA_loc(1), 616 & id%KEEP8(29), 617 & M, N, id%NPROCS, id%MYID, id%COMM, 618 & BURP, BUCP, 619 & BURS, BUCS, BUREGISTRE, 620 & IWK, LIWK, 621 & BUINTSZ, BURESZ, BUJOB, 622 & id%ROWSCA(1), id%COLSCA(1), WK_REAL, LWK_REAL, 623 & id%KEEP(50), 624 & K231, K232, K233, 625 & id%DKEEP(3), 626 & SCONEERR, SCINFERR) 627 id%DKEEP(4) = SCONEERR 628 id%DKEEP(5) = SCINFERR 629CXXXX 630 DEALLOCATE(IWK, WK_REAL,BURP,BUCP,BURS, BUCS) 631 ELSE IF ( KEEP(54) .EQ. 0 ) THEN 632C ------------------ 633C Centralized matrix 634C ------------------ 635 IF ((KEEP(52).EQ.7).OR.(KEEP(52).EQ.8)) THEN 636C ------------------------------- 637C Create a communicator of size 1 638C ------------------------------- 639 IF (id%MYID.EQ.MASTER) THEN 640 COLOUR = 0 641 ELSE 642 COLOUR = MPI_UNDEFINED 643 ENDIF 644 CALL MPI_COMM_SPLIT( id%COMM, COLOUR, 0, 645 & COMM_FOR_SCALING, IERR ) 646 IF (id%MYID.EQ.MASTER) THEN 647 M = N 648 BUMAXMN=N 649CXXXX 650 IF(N > BUMAXMN) BUMAXMN = N 651 LIWK = 1 652 ALLOCATE (IWK(LIWK),BURP(1),BUCP(1), 653 & BURS(1),BUCS(1), 654 & stat=allocok) 655 LWK_REAL = M + N 656 ALLOCATE (WK_REAL(LWK_REAL), stat=allocok) 657 IF (allocok > 0) THEN 658 id%INFO(1)=-13 659 id%INFO(2)=1 660 ENDIF 661 IF (id%INFO(1) .LT. 0) GOTO 400 662 CALL MPI_COMM_RANK(COMM_FOR_SCALING, SCMYID, IERR) 663 CALL MPI_COMM_SIZE(COMM_FOR_SCALING, SCNPROCS, IERR) 664 BUJOB = 1 665 CALL DMUMPS_SIMSCALEABS( 666 & id%IRN(1), id%JCN(1), id%A(1), 667 & id%KEEP8(28), 668 & M, N, SCNPROCS, SCMYID, COMM_FOR_SCALING, 669 & BURP, BUCP, 670 & BURS, BUCS, BUREGISTRE, 671 & IWK, LIWK, 672 & BUINTSZ, BURESZ, BUJOB, 673 & id%ROWSCA(1), id%COLSCA(1), WK_REAL, LWK_REAL, 674 & id%KEEP(50), 675 & K231, K232, K233, 676 & id%DKEEP(3), 677 & SCONEERR, SCINFERR) 678 IF(LWK_REAL < BURESZ) THEN 679 ! internal error since LWK_REAL=BURESZ=M+N 680 id%INFO(1) = -136 681 GOTO 400 682 ENDIF 683 BUJOB = 2 684 CALL DMUMPS_SIMSCALEABS(id%IRN(1), 685 & id%JCN(1), id%A(1), 686 & id%KEEP8(28), 687 & M, N, SCNPROCS, SCMYID, COMM_FOR_SCALING, 688 & BURP, BUCP, 689 & BURS, BUCS, BUREGISTRE, 690 & IWK, LIWK, 691 & BUINTSZ, BURESZ, BUJOB, 692 & id%ROWSCA(1), id%COLSCA(1), WK_REAL, LWK_REAL, 693 & id%KEEP(50), 694 & K231, K232, K233, 695 & id%DKEEP(3), 696 & SCONEERR, SCINFERR) 697 id%DKEEP(4) = SCONEERR 698 id%DKEEP(5) = SCINFERR 699CXXXX 700 DEALLOCATE(WK_REAL) 701 DEALLOCATE (IWK,BURP,BUCP, 702 & BURS,BUCS) 703 ENDIF 704C Centralized matrix: make DKEEP(4:5) available to all processors 705 CALL MPI_BCAST( id%DKEEP(4),2,MPI_DOUBLE_PRECISION, 706 & MASTER, id%COMM, IERR ) 707 400 CONTINUE 708 IF (id%MYID.EQ.MASTER) THEN 709C Communicator should only be 710C freed on the master process 711 CALL MPI_COMM_FREE(COMM_FOR_SCALING, IERR) 712 ENDIF 713 CALL MUMPS_PROPINFO(ICNTL(1), id%INFO(1), 714 & id%COMM, id%MYID) 715 IF (id%INFO(1).LT.0) GOTO 530 716 ELSE IF (id%MYID.EQ.MASTER) THEN 717C ---------------------------------- 718C Centralized scaling, options 1 to 6 719C ---------------------------------- 720 IF (KEEP(52).GT.0 .AND. KEEP(52).LE.6) THEN 721C --------------------- 722C Allocate temporary 723C workspace for scaling 724C --------------------- 725 IF ( KEEP(52) .eq. 5 .or. 726 & KEEP(52) .eq. 6 ) THEN 727C We have an explicit copy of the original 728C matrix in complex format which should probably 729C be avoided (but do we want to keep all 730C those old scaling options ?) 731 LWK = id%KEEP8(28) 732 ELSE 733 LWK = 1_8 734 END IF 735 LWK_REAL = 5 * N 736 ALLOCATE( WK_REAL( LWK_REAL ), stat = IERR ) 737 IF ( IERR .GT. 0 ) THEN 738 id%INFO(1) = -13 739 id%INFO(2) = LWK_REAL 740 GOTO 137 741 END IF 742 ALLOCATE( WK( LWK ), stat = IERR ) 743 IF ( IERR .GT. 0 ) THEN 744 id%INFO(1) = -13 745 CALL MUMPS_SET_IERROR(LWK, id%INFO(2)) 746 GOTO 137 747 END IF 748 CALL DMUMPS_FAC_A(N, id%KEEP8(28), KEEP(52), id%A(1), 749 & id%IRN(1), id%JCN(1), 750 & id%COLSCA(1), id%ROWSCA(1), 751 & WK, LWK, WK_REAL, LWK_REAL, ICNTL(1), id%INFO(1) ) 752 DEALLOCATE( WK_REAL ) 753 DEALLOCATE( WK ) 754 ENDIF 755 ENDIF 756 ENDIF ! Scaling distributed matrices or centralized 757 IF (id%MYID.EQ.MASTER) THEN 758 CALL MUMPS_SECFIN(TIMEET) 759 id%DKEEP(92)=TIMEET 760C Print inf-norm after last KEEP(233) iterations of 761C scaling option KEEP(52)=7 or 8 (SimScale) 762C 763 IF (PROKG.AND.(KEEP(52).EQ.7.OR.KEEP(52).EQ.8) 764 & .AND. (K233+K231+K232).GT.0) THEN 765 IF (K232.GT.0) WRITE(MPG, 166) id%DKEEP(4) 766 ENDIF 767 ENDIF 768 ENDIF ! LSCAL 769C 770C scaling might also be provided by the user 771 LSCAL = (LSCAL .OR. (KEEP(52) .EQ. -1) .OR. KEEP(52) .EQ. -2) 772 IF (LSCAL .AND. KEEP(258).NE.0 .AND. id%MYID .EQ. MASTER) THEN 773 DO I = 1, id%N 774 CALL DMUMPS_UPDATEDETER_SCALING(id%ROWSCA(I), 775 & id%DKEEP(6), ! determinant 776 & KEEP(259)) ! exponent of the determinant 777 ENDDO 778 IF (KEEP(50) .EQ. 0) THEN ! unsymmetric 779 DO I = 1, id%N 780 CALL DMUMPS_UPDATEDETER_SCALING(id%COLSCA(I), 781 & id%DKEEP(6), ! determinant 782 & KEEP(259)) ! exponent of the determinant 783 ENDDO 784 ELSE 785C ----------------------------------------- 786C In this case COLSCA = ROWSCA 787C Since determinant was initialized to 1, 788C compute square of the current determinant 789C rather than going through COLSCA. 790C ----------------------------------------- 791 CALL DMUMPS_DETER_SQUARE(id%DKEEP(6), KEEP(259)) 792 ENDIF 793C Now we should have taken the 794C inverse of the scaling vectors 795 CALL DMUMPS_DETER_SCALING_INVERSE(id%DKEEP(6), KEEP(259)) 796 ENDIF 797C 798C ******************** 799C End of Scaling phase 800C At this point: either (matrix is distributed and KEEP(52)=7 or 8) 801C in which case scaling arrays are allocated on all processors, 802C or scaling arrays are only on the host processor. 803C In case of distributed matrix input, we will free the scaling 804C arrays on procs with MYID .NE. 0 after the all-to-all distribution 805C of the original matrix. 806C ******************** 807C 808 137 CONTINUE 809C Fwd in facto: in case of repeated factorizations 810C with different Schur options we prefer to free 811C systematically this array now than waiting for 812C the root node. We rely on the fact that it is 813C allocated or not during the solve phase so if 814C it was allocated in a 1st call to facto and not 815C in a second, we don't want the solve to think 816C it was allocated in the second call. 817 IF (associated(id%root%RHS_CNTR_MASTER_ROOT)) THEN 818 DEALLOCATE (id%root%RHS_CNTR_MASTER_ROOT) 819 NULLIFY (id%root%RHS_CNTR_MASTER_ROOT) 820 ENDIF 821C Fwd in facto: check that id%NRHS has not changed 822 IF ( id%MYID.EQ.MASTER.AND. KEEP(252).EQ.1 .AND. 823 & id%NRHS .NE. id%KEEP(253) ) THEN 824C Error: NRHS should not have 825C changed since the analysis 826 id%INFO(1)=-42 827 id%INFO(2)=id%KEEP(253) 828 ENDIF 829C Fwd in facto: allocate and broadcast RHS_MUMPS 830C to make it available on all processors. 831 IF (id%KEEP(252) .EQ. 1) THEN 832 IF ( id%MYID.NE.MASTER ) THEN 833 id%KEEP(254) = N ! Leading dimension 834 id%KEEP(255) = N*id%KEEP(253) ! Tot size 835 ALLOCATE(RHS_MUMPS(id%KEEP(255)),stat=IERR) 836 IF (IERR > 0) THEN 837 id%INFO(1)=-13 838 id%INFO(2)=id%KEEP(255) 839 IF (LPOK) 840 & WRITE(LP,*) 'ERREUR while allocating RHS on a slave' 841 NULLIFY(RHS_MUMPS) 842 ENDIF 843 RHS_MUMPS_ALLOCATED = .TRUE. 844 ELSE 845C Case of non working master 846 id%KEEP(254)=id%LRHS ! Leading dimension 847 id%KEEP(255)=id%LRHS*(id%KEEP(253)-1)+id%N ! Tot size 848 RHS_MUMPS=>id%RHS 849 RHS_MUMPS_ALLOCATED = .FALSE. 850 IF (LSCAL) THEN 851C Scale before broadcast: apply row 852C scaling (remark that we assume no 853C transpose). 854 DO K=1, id%KEEP(253) 855 DO I=1, N 856 RHS_MUMPS( id%KEEP(254) * (K-1) + I ) 857 & = RHS_MUMPS( id%KEEP(254) * (K-1) + I ) 858 & * id%ROWSCA(I) 859 ENDDO 860 ENDDO 861 ENDIF 862 ENDIF 863 ELSE 864 id%KEEP(255)=1 865 ALLOCATE(RHS_MUMPS(1)) 866 RHS_MUMPS_ALLOCATED = .TRUE. 867 ENDIF 868 CALL MUMPS_PROPINFO( ICNTL(1), id%INFO(1), 869 & id%COMM, id%MYID ) 870 IF ( id%INFO(1).lt.0 ) GOTO 530 871 IF (KEEP(252) .EQ. 1) THEN 872C 873C Broadcast the columns of the right-hand side 874C one by one. Leading dimension is keep(254)=N 875C on procs with MYID > 0 but may be larger on 876C the master processor. 877 DO I= 1, id%KEEP(253) 878 CALL MPI_BCAST(RHS_MUMPS((I-1)*id%KEEP(254)+1), N, 879 & MPI_DOUBLE_PRECISION, MASTER,id%COMM,IERR) 880 END DO 881 ENDIF 882C Keep a copy of ICNTL(24) and make it 883C available on all working processors. 884 KEEP(110)=id%ICNTL(24) 885 CALL MPI_BCAST(KEEP(110), 1, MPI_INTEGER, 886 & MASTER, id%COMM, IERR) 887C KEEP(110) defaults to 0 for out of range values 888 IF (KEEP(110).NE.1) KEEP(110)=0 889 IF (KEEP(219).NE.0) THEN 890 CALL DMUMPS_BUF_MAX_ARRAY_MINSIZE(max(KEEP(108),1),IERR) 891 IF (IERR .NE. 0) THEN 892C ------------------------ 893C Error allocating DMUMPS_BUF 894C ------------------------ 895 id%INFO(1) = -13 896 id%INFO(2) = max(KEEP(108),1) 897 END IF 898 ENDIF 899* 900* 901C ----------------------------------------------- 902C Depending on the option used for 903C -detecting null pivots (ICNTL(24)/KEEP(110)) 904C CNTL(3) is used to set DKEEP(1) 905C ( A row is considered as null if ||row|| < DKEEP(1) ) 906C CNTL(5) is then used to define if a large 907C value is set on the diagonal or if a 1 is set 908C and other values in the row are reset to zeros. 909C -Rank revealing on the Schur (ICNTL(16)/KEEP(19)) 910C CNTL(6) is used to set SEUIL and SEUIL_LDLT_NIV2 911C SEUIL* corresponds to the minimum required 912C absolute value of pivot. 913C SEUIL_LDLT_NIV2 is used only in the 914C case of SYM=2 within a niv2 node for which 915C we have only a partial view of the fully summed rows. 916C Note that SEUIL* might be reset later in this routine 917C but only when static pivoting is on 918C which will be excluded if null pivots or 919C rank-revealing (RR) is on 920C ----------------------------------------------- 921 IF (id%MYID .EQ. MASTER) CNTL3 = id%CNTL(3) 922 CALL MPI_BCAST(CNTL3, 1, MPI_DOUBLE_PRECISION, 923 & MASTER, id%COMM, IERR) 924 IF (id%MYID .EQ. MASTER) CNTL5 = id%CNTL(5) 925 CALL MPI_BCAST(CNTL5, 1, MPI_DOUBLE_PRECISION, 926 & MASTER, id%COMM, IERR) 927 IF (id%MYID .EQ. MASTER) CNTL6 = id%CNTL(6) 928 CALL MPI_BCAST(CNTL6, 1, MPI_DOUBLE_PRECISION, 929 & MASTER, id%COMM, IERR) 930 IF (id%MYID .EQ. MASTER) CNTL1 = id%CNTL(1) 931 CALL MPI_BCAST(CNTL1, 1, MPI_DOUBLE_PRECISION, 932 & MASTER, id%COMM, IERR) 933 IF (id%MYID .EQ. MASTER) id%DKEEP(8) = id%CNTL(7) 934 CALL MPI_BCAST(id%DKEEP(8), 1, MPI_DOUBLE_PRECISION, 935 & MASTER, id%COMM, IERR) 936 IF (KEEP(486).EQ.0) id%DKEEP(8) = ZERO 937 COMPUTE_ANORMINF = .FALSE. 938 IF ( (KEEP(486) .NE. 0).AND. (id%DKEEP(8).LT.ZERO)) THEN 939 COMPUTE_ANORMINF = .TRUE. 940 ENDIF 941 IF (KEEP(19).NE.0) THEN 942 COMPUTE_ANORMINF = .TRUE. 943 ENDIF 944C ------------------------------------------------------- 945C We compute ANORMINF, when needed, based on 946C the infinite norm of Rowsca *A*Colsca 947C and make it available on all working processes. 948 IF (COMPUTE_ANORMINF) THEN 949 CALL DMUMPS_ANORMINF( id , ANORMINF, LSCAL ) 950 ELSE 951 ANORMINF = ZERO 952 ENDIF 953C 954C Set BLR threshold 955 IF (id%DKEEP(8).LT.ZERO) THEN 956 id%DKEEP(8) = abs(id%DKEEP(8))*ANORMINF 957 ENDIF 958 IF (KEEP(19).EQ.0) THEN 959C -- RR is off 960 SEUIL = ZERO 961 id%DKEEP(9) = ZERO 962 ELSE 963C -- RR is on 964C July 2012 965C CNTL(3) is the threshold used in the following 966C to compute the SEUIL used for postponing pivots to root 967C SEUIL*CNTL(6) is then the treshold for null pivot detection 968C (with 0< CNTL(6) <= 1) 969 IF (CNTL3 .LT. ZERO) THEN 970 SEUIL = abs(CNTL(3)) 971 ELSE IF (CNTL3 .GT. ZERO) THEN 972 SEUIL = CNTL3*ANORMINF 973 ELSE ! (CNTL(3) .EQ. ZERO) THEN 974 SEUIL = N*EPS*ANORMINF ! standard articles 975 ENDIF 976 IF (PROKG) WRITE(MPG,*) 977 & ' ABSOLUTE PIVOT THRESHOLD for rank revealing =',SEUIL 978 ENDIF 979C After QR with pivoting of root or SVD, diagonal entries 980C need be analysed to determine null space vectors. 981C Two strategies are provided : 982 id%DKEEP(9) = SEUIL 983 IF (id%DKEEP(10).LT.MONE) THEN 984 id%DKEEP(10)=MONE 985 ELSEIF((id%DKEEP(10).LE.ONE).AND.(id%DKEEP(10).GE.ZERO)) THEN 986 id%DKEEP(10)=1000.0D0 987 ENDIF 988 SEUIL_LDLT_NIV2 = SEUIL 989C ------------------------------- 990C -- Null pivot row detection 991C ------------------------------- 992 IF (KEEP(110).EQ.0) THEN 993C Initialize DKEEP(1) to a negative value 994C in order to avoid detection of null pivots 995C (test max(AMAX,RMAX,abs(PIVOT)).LE.PIVNUL 996C in DMUMPS_FAC_I, where PIVNUL=DKEEP(1)) 997 id%DKEEP(1) = -1.0D0 998 id%DKEEP(2) = ZERO 999 ELSE 1000 IF (ANORMINF.EQ.ZERO) 1001 & CALL DMUMPS_ANORMINF( id , ANORMINF, LSCAL ) 1002 IF (KEEP(19).NE.0) THEN 1003C RR postponing considers that pivot rows of norm smaller that SEUIL 1004C should be postponed. 1005C Pivot rows smaller than DKEEP(1) are directly added to null space 1006C and thus considered as null pivot rows. Thus we define id%DKEEP(1) 1007C relatively to SEUIL (which is based on CNTL(3)) 1008 IF (CNTL(6).GT.0.AND.CNTL(6).LT.1) THEN 1009C we want DKEEP(1) < SEUIL 1010 id%DKEEP(1) = SEUIL*CNTL(6) 1011 ELSE 1012 id%DKEEP(1) = SEUIL* 0.01D0 1013 ENDIF 1014 ELSE 1015C We keep strategy currently used in MUMPS 4.10.0 1016 IF (CNTL3 .LT. ZERO) THEN 1017 id%DKEEP(1) = abs(CNTL(3)) 1018 ELSE IF (CNTL3 .GT. ZERO) THEN 1019 id%DKEEP(1) = CNTL3*ANORMINF 1020 ELSE ! (CNTL(3) .EQ. ZERO) THEN 1021 id%DKEEP(1) = 1.0D-5*EPS*ANORMINF 1022 ENDIF 1023 ENDIF 1024 IF (PROKG) WRITE(MPG,*) 1025 & ' ZERO PIVOT DETECTION ON, THRESHOLD =',id%DKEEP(1) 1026 IF (CNTL5.GT.ZERO) THEN 1027 id%DKEEP(2) = CNTL5 * ANORMINF 1028 IF (PROKG) WRITE(MPG,*) 1029 & ' FIXATION FOR NULL PIVOTS =',id%DKEEP(2) 1030 ELSE 1031 IF (PROKG) WRITE(MPG,*) 'INFINITE FIXATION ' 1032 IF (id%KEEP(50).EQ.0) THEN 1033C Unsym 1034 ! the user let us choose a fixation. set in NEGATIVE 1035 ! to detect during facto when to set row to zero ! 1036 id%DKEEP(2) = -max(1.0D10*ANORMINF, 1037 & sqrt(huge(ANORMINF))/1.0D8) 1038 ELSE 1039C Sym 1040 id%DKEEP(2) = ZERO 1041 ENDIF 1042 ENDIF 1043 ENDIF 1044C Find id of root node if RR is on 1045 IF (KEEP(53).NE.0) THEN 1046 ID_ROOT =MUMPS_PROCNODE(id%PROCNODE_STEPS(id%STEP(KEEP(20))), 1047 & id%NSLAVES) 1048 IF ( KEEP( 46 ) .NE. 1 ) THEN 1049 ID_ROOT = ID_ROOT + 1 1050 END IF 1051 ENDIF 1052C Second pass: set parameters for null pivot detection 1053C Allocate PIVNUL_LIST in case of null pivot detection 1054C and in case of rank revealing 1055 LPN_LIST = 1 1056 IF ( associated( id%PIVNUL_LIST) ) DEALLOCATE(id%PIVNUL_LIST) 1057 IF(KEEP(110) .EQ. 1) THEN 1058 LPN_LIST = N 1059 ENDIF 1060 IF (KEEP(19).NE.0 .AND. 1061 & (ID_ROOT.EQ.id%MYID .OR. id%MYID.EQ.MASTER)) THEN 1062 LPN_LIST = N 1063 ENDIF 1064 ALLOCATE( id%PIVNUL_LIST(LPN_LIST),stat = IERR ) 1065 IF ( IERR .GT. 0 ) THEN 1066 id%INFO(1)=-13 1067 id%INFO(2)=LPN_LIST 1068 END IF 1069 id%PIVNUL_LIST(1:LPN_LIST) = 0 1070 KEEP(109) = 0 1071C end set parameter for null pivot detection 1072 CALL MUMPS_PROPINFO( ICNTL(1), id%INFO(1), 1073 & id%COMM, id%MYID ) 1074 IF ( id%INFO(1).lt.0 ) GOTO 530 1075C -------------------------------------------------------------- 1076C STATIC PIVOTING 1077C -- Static pivoting only when RR and Null pivot detection OFF 1078C -------------------------------------------------------------- 1079 IF ((KEEP(19).EQ.0).AND.(KEEP(110).EQ.0)) THEN 1080C -- Set KEEP(97) and compute static pivoting threshold. 1081 IF (id%MYID .EQ. MASTER) CNTL4 = id%CNTL(4) 1082 CALL MPI_BCAST( CNTL4, 1, MPI_DOUBLE_PRECISION, 1083 & MASTER, id%COMM, IERR ) 1084C 1085 IF ( CNTL4 .GE. ZERO ) THEN 1086 KEEP(97) = 1 1087 IF ( CNTL4 .EQ. ZERO ) THEN 1088C -- set seuil to sqrt(eps)*||A|| 1089 IF(ANORMINF .EQ. ZERO) THEN 1090 CALL DMUMPS_ANORMINF( id , ANORMINF, LSCAL ) 1091C WRITE(*,*) id%MYID,': ANORMINF',ANORMINF 1092 ENDIF 1093 SEUIL = sqrt(EPS) * ANORMINF 1094 ELSE 1095C WRITE(*,*) 'id%CNTL(4)',id%CNTL(4) 1096 SEUIL = CNTL4 1097 ENDIF 1098 SEUIL_LDLT_NIV2 = SEUIL 1099C 1100 ELSE 1101 SEUIL = ZERO 1102 ENDIF 1103 ENDIF 1104C set number of tiny pivots / 2x2 pivots in types 1 / 1105C 2x2 pivots in types 2, to zero. This is because the 1106C user can call the factorization step several times. 1107 KEEP(98) = 0 1108 KEEP(103) = 0 1109 KEEP(105) = 0 1110 MAXS = 1_8 1111C 1112C The memory allowed is given by ICNTL(23) in Mbytes 1113C 0 means that nothing is provided. 1114C Save memory available, ICNTL(23) in KEEP8(4) 1115C 1116 IF ( id%MYID.EQ.MASTER ) THEN 1117 ITMP = ICNTL(23) 1118 END IF 1119 CALL MPI_BCAST( ITMP, 1, MPI_INTEGER, 1120 & MASTER, id%COMM, IERR ) 1121C 1122C Ignore ICNTL(23) when WK_USER is provided 1123c by resetting ITMP to zero on each proc where WK_USER is provided 1124 IF (WK_USER_PROVIDED) ITMP = 0 1125 ITMP8 = int(ITMP, 8) 1126 id%KEEP8(4) = ITMP8 * 1000000_8 ! convert to nb of bytes 1127* 1128* Start allocations 1129* ***************** 1130* 1131C 1132C The slaves can now perform the factorization 1133C 1134C 1135C Allocate S on all nodes 1136C or point to user provided data WK_USER when LWK_USER>0 1137C ======================= 1138C 1139C 1140 PERLU = KEEP(12) 1141 IF (KEEP(201) .EQ. 0) THEN 1142C In-core 1143 MAXS_BASE8=id%KEEP8(12) 1144 ELSE 1145C OOC or no factors stored 1146 MAXS_BASE8=id%KEEP8(14) 1147 ENDIF 1148 IF (WK_USER_PROVIDED) THEN 1149C -- Set MAXS to size of WK_USER_ 1150 MAXS = id%KEEP8(24) 1151 ELSE 1152 IF ( MAXS_BASE8 .GT. 0_8 ) THEN 1153 MAXS_BASE_RELAXED8 = 1154 & MAXS_BASE8 + int(PERLU,8) * ( MAXS_BASE8 / 100_8 + 1_8) 1155C If PERLU < 0, we may obtain a 1156C null or negative value of MAXS. 1157 IF (MAXS_BASE_RELAXED8 > huge(MAXS)) THEN 1158C id%INFO(1)=-37 1159C id%INFO(2)=int(MAXS_BASE_RELAXED8/1000000_8) 1160 WRITE(*,*) "Internal error: I8 overflow" 1161 CALL MUMPS_ABORT() 1162 ENDIF 1163 MAXS_BASE_RELAXED8 = max(MAXS_BASE_RELAXED8, 1_8) 1164 MAXS = MAXS_BASE_RELAXED8 1165C Note that in OOC this value of MAXS will be 1166C overwritten if KEEP(96) .NE. 0 or if 1167C ICNTL(23) (that is, KEEP8(4)) is provided. 1168 ELSE 1169 MAXS = 1_8 1170 MAXS_BASE_RELAXED8 = 1_8 1171 END IF 1172 ENDIF 1173 CALL MUMPS_PROPINFO( ICNTL(1), id%INFO(1), 1174 & id%COMM, id%MYID ) 1175 IF (id%INFO(1) .LT. 0) THEN 1176 GOTO 530 1177 ENDIF 1178C 1179C If KEEP(96) is provided, 1180C use it without asking questions 1181C 1182 IF ((.NOT.WK_USER_PROVIDED).AND.(I_AM_SLAVE)) THEN 1183C 1184C 1185 IF (KEEP(96).GT.0) THEN 1186C -- useful mostly for internal testing: 1187C -- we can force in this way a given value 1188C -- of MAXS and forget about other input values 1189C -- such as ICNTL(23) (KEEP8(4)/1D6) 1190C -- that could change MAXS value. 1191 MAXS=int(KEEP(96),8) 1192 ELSE 1193 IF (id%KEEP8(4) .NE. 0_8) THEN 1194C ------------------------- 1195C WE TRY TO USE MEM_ALLOWED (KEEP8(4)/1D6) 1196C ------------------------- 1197C First compute what we have: TOTAL_MBYTES(PERLU) 1198C and TOTAL_BYTES(PERLU) 1199C 1200 PERLU_ON = .TRUE. 1201 CALL DMUMPS_MAX_MEM( id%KEEP(1), id%KEEP8(1), 1202 & id%MYID, id%N, id%NELT, id%NA(1), id%LNA, 1203 & id%KEEP8(28), id%KEEP8(30), 1204 & id%NSLAVES, TOTAL_MBYTES, .FALSE., KEEP(201), 1205 & PERLU_ON, TOTAL_BYTES) 1206C 1207C Assuming that TOTAL_BYTES is due to MAXS rather than 1208C to the temporary buffers used for the distribution of 1209C the matrix on the slaves (arrowheads or element distrib), 1210C then we have: 1211C 1212C KEEP8(4)-TOTAL_BYTES is the extra free space 1213C 1214C A simple algorithm to redistribute the extra space: 1215C All extra freedom (it could be negative !) is added to MAXS: 1216 MAXS_BASE_RELAXED8=MAXS_BASE_RELAXED8 + 1217 & (id%KEEP8(4)-TOTAL_BYTES)/int(KEEP(35),8) 1218 IF (MAXS_BASE_RELAXED8 > int(huge(MAXS),8)) THEN 1219 WRITE(*,*) "Internal error: I8 overflow" 1220 CALL MUMPS_ABORT() 1221 ELSE IF (MAXS_BASE_RELAXED8 .LE. 0_8) THEN 1222C We need more space in order to at least enough 1223 id%INFO(1)=-9 1224 IF ( -MAXS_BASE_RELAXED8 .GT. 1225 & int(huge(id%INFO(1)),8) ) THEN 1226 WRITE(*,*) "I8: OVERFLOW" 1227 CALL MUMPS_ABORT() 1228 ENDIF 1229 id%INFO(2)=-int(MAXS_BASE_RELAXED8) 1230 ELSE 1231 MAXS=MAXS_BASE_RELAXED8 1232 ENDIF 1233 ENDIF 1234 ENDIF 1235 ENDIF ! I_AM_SLAVE 1236 CALL MUMPS_PROPINFO( ICNTL(1), id%INFO(1), 1237 & id%COMM, id%MYID ) 1238 IF (id%INFO(1) .LT. 0) THEN 1239 GOTO 530 1240 ENDIF 1241 CALL DMUMPS_AVGMAX_STAT8(PROKG, MPG, MAXS, id%NSLAVES, 1242 & id%COMM, "effective relaxed size of S =") 1243C Next PROPINFO is there for possible negative 1244C values of MAXS resulting from small MEM_ALLOWED 1245 CALL MUMPS_PROPINFO( ICNTL(1), id%INFO(1), 1246 & id%COMM, id%MYID ) 1247 IF (id%INFO(1) .LT. 0) THEN 1248C We jump after the call to LOAD_END and OOC_END since we didn't 1249C called yet OOC_INIT and LOAD_INIT 1250 GOTO 530 1251 ENDIF 1252 IF ( I_AM_SLAVE ) THEN 1253C ------------------ 1254C Dynamic scheduling 1255C ------------------ 1256 CALL DMUMPS_LOAD_SET_INICOST( dble(id%COST_SUBTREES), 1257 & KEEP(64), KEEP(66), KEEP(375), MAXS ) 1258 K28=KEEP(28) 1259 MEMORY_MD_ARG = min(int(PERLU,8) * ( MAXS_BASE8 / 100_8 + 1_8 ), 1260C Restrict freedom from dynamic scheduler when 1261C MEM_ALLOWED=ICNTL(23) is small (case where KEEP8(4)-TOTAL_BYTES 1262C is negative after call to DMUMPS_MAX_MEM) 1263 & max(0_8, MAXS-MAXS_BASE8)) 1264 CALL DMUMPS_LOAD_INIT( id, MEMORY_MD_ARG, MAXS ) 1265C 1266C Out-Of-Core (OOC) issues. Case where we ran one factorization OOC 1267C and the second one is in-core: we try to free OOC 1268C related data from previous factorization. 1269C 1270 CALL DMUMPS_CLEAN_OOC_DATA(id, IERR) 1271 IF (IERR < 0) THEN 1272 id%INFO(1) = -90 1273 id%INFO(2) = 0 1274 GOTO 112 1275 ENDIF 1276 IF (KEEP(201) .GT. 0) THEN 1277C ------------------- 1278C OOC initializations 1279C ------------------- 1280 IF (KEEP(201).EQ.1 !PANEL Version 1281 & .AND.KEEP(50).EQ.0 ! Unsymmetric 1282 & .AND.KEEP(251).NE.2 ! Store L to disk 1283 & ) THEN 1284 id%OOC_NB_FILE_TYPE=2 ! declared in MUMPS_OOC_COMMON 1285 ELSE 1286 id%OOC_NB_FILE_TYPE=1 ! declared in MUMPS_OOC_COMMON 1287 ENDIF 1288C ------------------------------ 1289C Dimension IO buffer, KEEP(100) 1290C ------------------------------ 1291 IF (KEEP(205) .GT. 0) THEN 1292 KEEP(100) = KEEP(205) 1293 ELSE 1294 IF (KEEP(201).EQ.1) THEN ! PANEL version 1295 I8TMP = int(id%OOC_NB_FILE_TYPE,8) * 1296 & 2_8 * int(KEEP(226),8) 1297 ELSE 1298 I8TMP = 2_8 * id%KEEP8(119) 1299 ENDIF 1300 I8TMP = I8TMP + int(max(KEEP(12),0),8) * 1301 & (I8TMP/100_8+1_8) 1302C we want to avoid too large IO buffers. 1303C 12M corresponds to 100Mbytes given to buffers. 1304 I8TMP = min(I8TMP, 12000000_8) 1305 KEEP(100)=int(I8TMP) 1306 ENDIF 1307 IF (KEEP(201).EQ.1) THEN 1308C Panel version. Force the use of a buffer. 1309 IF ( KEEP(99) < 3 ) THEN 1310 KEEP(99) = KEEP(99) + 3 1311 ENDIF 1312 ENDIF 1313C -------------------------- 1314C Reset KEEP(100) to 0 if no 1315C buffer is used for OOC. 1316C -------------------------- 1317 IF (KEEP(99) .LT.3) KEEP(100)=0 1318 IF((dble(KEEP(100))*dble(KEEP(35))/dble(2)).GT. 1319 & (dble(1999999999)))THEN 1320 IF (PROKG) THEN 1321 WRITE(MPG,*)id%MYID,': Warning: DIM_BUF_IO might be 1322 & too big for Filesystem' 1323 ENDIF 1324 ENDIF 1325 ALLOCATE (id%OOC_INODE_SEQUENCE(KEEP(28), 1326 & id%OOC_NB_FILE_TYPE), 1327 & stat=IERR) 1328 IF ( IERR .GT. 0 ) THEN 1329 id%INFO(1) = -13 1330 id%INFO(2) = id%OOC_NB_FILE_TYPE*KEEP(28) 1331 NULLIFY(id%OOC_INODE_SEQUENCE) 1332 GOTO 112 1333 ENDIF 1334 ALLOCATE (id%OOC_TOTAL_NB_NODES(id%OOC_NB_FILE_TYPE), 1335 & stat=IERR) 1336 IF ( IERR .GT. 0 ) THEN 1337 id%INFO(1) = -13 1338 id%INFO(2) = id%OOC_NB_FILE_TYPE 1339 NULLIFY(id%OOC_TOTAL_NB_NODES) 1340 GOTO 112 1341 ENDIF 1342 ALLOCATE (id%OOC_SIZE_OF_BLOCK(KEEP(28), 1343 & id%OOC_NB_FILE_TYPE), 1344 & stat=IERR) 1345 IF ( IERR .GT. 0 ) THEN 1346 id%INFO(1) = -13 1347 id%INFO(2) = id%OOC_NB_FILE_TYPE*KEEP(28) 1348 NULLIFY(id%OOC_SIZE_OF_BLOCK) 1349 GOTO 112 1350 ENDIF 1351 ALLOCATE (id%OOC_VADDR(KEEP(28),id%OOC_NB_FILE_TYPE), 1352 & stat=IERR) 1353 IF ( IERR .GT. 0 ) THEN 1354 id%INFO(1) = -13 1355 id%INFO(2) = id%OOC_NB_FILE_TYPE*KEEP(28) 1356 NULLIFY(id%OOC_VADDR) 1357 GOTO 112 1358 ENDIF 1359 ENDIF 1360 ENDIF 1361 112 CALL MUMPS_PROPINFO( ICNTL(1), id%INFO(1), 1362 & id%COMM, id%MYID ) 1363 IF (id%INFO(1) < 0) THEN 1364C LOAD_END must be done but not OOC_END_FACTO 1365 GOTO 513 1366 ENDIF 1367 IF (I_AM_SLAVE) THEN 1368 IF (KEEP(201) .GT. 0) THEN 1369 IF ((KEEP(201).EQ.1).OR.(KEEP(201).EQ.2)) THEN 1370 CALL DMUMPS_OOC_INIT_FACTO(id,MAXS) 1371 ELSE 1372 WRITE(*,*) "Internal error in DMUMPS_FAC_DRIVER" 1373 CALL MUMPS_ABORT() 1374 ENDIF 1375 IF(id%INFO(1).LT.0)THEN 1376 GOTO 111 1377 ENDIF 1378 ENDIF 1379#if ! defined(OLD_LOAD_MECHANISM) 1380C First increment corresponds to the number of 1381C floating-point operations for subtrees allocated 1382C to the local processor. 1383 CALL DMUMPS_LOAD_UPDATE(0,.FALSE.,dble(id%COST_SUBTREES), 1384 & id%KEEP(1),id%KEEP8(1)) 1385#endif 1386 IF (id%INFO(1).LT.0) GOTO 111 1387 END IF 1388C ----------------------- 1389C Manage main workarray S 1390C ----------------------- 1391 IF ( associated (id%S) ) THEN 1392 DEALLOCATE(id%S) 1393 NULLIFY(id%S) 1394 id%KEEP8(23)=0_8 ! reset space allocated to zero 1395 ENDIF 1396#if defined (LARGEMATRICES) 1397 IF ( id%MYID .ne. MASTER ) THEN 1398#endif 1399 IF (.NOT.WK_USER_PROVIDED) THEN 1400 ALLOCATE (id%S(MAXS),stat=IERR) 1401 id%KEEP8(23) = MAXS 1402 IF ( IERR .GT. 0 ) THEN 1403 id%INFO(1) = -13 1404 CALL MUMPS_SET_IERROR(MAXS, id%INFO(2)) 1405C On some platforms (IBM for example), an 1406C allocation failure returns a non-null pointer. 1407C Therefore we nullify S 1408 NULLIFY(id%S) 1409 id%KEEP8(23)=0_8 1410 ENDIF 1411 ELSE 1412 id%S => id%WK_USER(1:id%KEEP8(24)) 1413 ENDIF 1414#if defined (LARGEMATRICES) 1415 END IF 1416#endif 1417C 1418 111 CALL MUMPS_PROPINFO( ICNTL(1), id%INFO(1), 1419 & id%COMM, id%MYID ) 1420 IF ( id%INFO(1).LT.0 ) GOTO 514 1421C -------------------------- 1422C Initialization of modules 1423C related to data management 1424C -------------------------- 1425 NB_ACTIVE_FRONTS_ESTIM = 3 1426 IF (I_AM_SLAVE) THEN 1427 CALL MUMPS_FDM_INIT('A',NB_ACTIVE_FRONTS_ESTIM, id%INFO) 1428 CALL MUMPS_FDM_INIT('F',NB_ACTIVE_FRONTS_ESTIM, id%INFO ) 1429 IF (id%INFO(1) .LT. 0 ) GOTO 114 1430#if ! defined(NO_FDM_DESCBAND) 1431C Storage of DESCBAND information 1432 CALL MUMPS_FDBD_INIT( NB_ACTIVE_FRONTS_ESTIM, id%INFO ) 1433#endif 1434#if ! defined(NO_FDM_MAPROW) 1435C Storage of MAPROW and ROOT2SON information 1436 CALL MUMPS_FMRD_INIT( NB_ACTIVE_FRONTS_ESTIM, id%INFO ) 1437#endif 1438 CALL DMUMPS_BLR_INIT_MODULE( NB_ACTIVE_FRONTS_ESTIM, id%INFO ) 1439 114 CONTINUE 1440 ENDIF 1441 CALL MUMPS_PROPINFO( ICNTL(1), id%INFO(1), 1442 & id%COMM, id%MYID ) 1443C GOTO 500: one of the above module initializations failed 1444 IF ( id%INFO(1).LT.0 ) GOTO 500 1445C 1446C 1447C Allocate space for matrix in arrowhead 1448C ====================================== 1449C 1450C CASE 1 : Matrix is assembled 1451C CASE 2 : Matrix is elemental 1452C 1453 IF ( KEEP(55) .eq. 0 ) THEN 1454C ------------------------------------ 1455C Space has been allocated already for 1456C the integer part during analysis 1457C Only slaves need the arrowheads. 1458C ------------------------------------ 1459 IF (associated( id%DBLARR)) THEN 1460 DEALLOCATE(id%DBLARR) 1461 NULLIFY(id%DBLARR) 1462 ENDIF 1463 IF ( I_AM_SLAVE .and. id%KEEP8(26) .ne. 0_8 ) THEN 1464 ALLOCATE( id%DBLARR( id%KEEP8(26) ), stat = IERR ) 1465 ELSE 1466 ALLOCATE( id%DBLARR( 1 ), stat =IERR ) 1467 END IF 1468 IF ( IERR .NE. 0 ) THEN 1469 IF (LPOK) THEN 1470 WRITE(LP,*) id%MYID, 1471 & ': Allocation error for DBLARR(',id%KEEP8(26),')' 1472 ENDIF 1473 id%INFO(1)=-13 1474 CALL MUMPS_SET_IERROR(id%KEEP8(26), id%INFO(2)) 1475 NULLIFY(id%DBLARR) 1476 GOTO 100 1477 END IF 1478 ELSE 1479C ---------------------------------------- 1480C Allocate variable lists. Systematically. 1481C ---------------------------------------- 1482 IF ( associated( id%INTARR ) ) THEN 1483 DEALLOCATE( id%INTARR ) 1484 NULLIFY( id%INTARR ) 1485 END IF 1486 IF ( I_AM_SLAVE .and. id%KEEP8(27) .ne. 0_8 ) THEN 1487 ALLOCATE( id%INTARR( id%KEEP8(27) ), stat = allocok ) 1488 IF ( allocok .GT. 0 ) THEN 1489 id%INFO(1) = -13 1490 CALL MUMPS_SET_IERROR(id%KEEP8(27), id%INFO(2)) 1491 NULLIFY(id%INTARR) 1492 GOTO 100 1493 END IF 1494 ELSE 1495 ALLOCATE( id%INTARR(1),stat=allocok ) 1496 IF ( allocok .GT. 0 ) THEN 1497 id%INFO(1) = -13 1498 id%INFO(2) = 1 1499 NULLIFY(id%INTARR) 1500 GOTO 100 1501 END IF 1502 END IF 1503C ----------------------------- 1504C Allocate real values. 1505C On master, if hybrid host and 1506C no scaling, avoid the copy. 1507C ----------------------------- 1508 IF (associated( id%DBLARR)) THEN 1509 DEALLOCATE(id%DBLARR) 1510 NULLIFY(id%DBLARR) 1511 ENDIF 1512 IF ( I_AM_SLAVE ) THEN 1513 IF ( id%MYID_NODES .eq. MASTER 1514 & .AND. KEEP(46) .eq. 1 1515 & .AND. KEEP(52) .eq. 0 ) THEN 1516C -------------------------- 1517C Simple pointer association 1518C -------------------------- 1519 id%DBLARR => id%A_ELT 1520 ELSE 1521C ---------- 1522C Allocation 1523C ---------- 1524 IF ( id%KEEP8(26) .ne. 0_8 ) THEN 1525 ALLOCATE( id%DBLARR( id%KEEP8(26) ), stat = allocok ) 1526 IF ( allocok .GT. 0 ) THEN 1527 id%INFO(1) = -13 1528 CALL MUMPS_SET_IERROR(id%KEEP8(26), id%INFO(2)) 1529 NULLIFY(id%DBLARR) 1530 GOTO 100 1531 END IF 1532 ELSE 1533 ALLOCATE( id%DBLARR(1), stat = allocok ) 1534 IF ( allocok .GT. 0 ) THEN 1535 id%INFO(1) = -13 1536 id%INFO(2) = 1 1537 NULLIFY(id%DBLARR) 1538 GOTO 100 1539 END IF 1540 END IF 1541 END IF 1542 ELSE 1543 ALLOCATE( id%DBLARR(1), stat = allocok ) 1544 IF ( allocok .GT. 0 ) THEN 1545 id%INFO(1) = -13 1546 id%INFO(2) = 1 1547 NULLIFY(id%DBLARR) 1548 GOTO 100 1549 END IF 1550 END IF 1551 END IF 1552C ----------------- 1553C Also prepare some 1554C data for the root 1555C ----------------- 1556 IF ( KEEP(38).NE.0 .AND. I_AM_SLAVE ) THEN 1557 CALL DMUMPS_INIT_ROOT_FAC( id%N, 1558 & id%root, id%FILS(1), KEEP(38), id%KEEP(1), id%INFO(1) ) 1559 END IF 1560C 1561C 1562 100 CONTINUE 1563C ---------------- 1564C Check for errors 1565C ---------------- 1566 CALL MUMPS_PROPINFO( id%ICNTL(1), id%INFO(1), 1567 & id%COMM, id%MYID ) 1568 IF ( id%INFO(1).LT.0 ) GOTO 500 1569C 1570C ----------------------------------- 1571C 1572C DISTRIBUTION OF THE ORIGINAL MATRIX 1573C 1574C ----------------------------------- 1575C 1576C TIMINGS: computed (and printed) on the host 1577C Next line: global time for distrib(arrowheads,elts) 1578C on the host. Synchronization has been performed. 1579 IF (id%MYID.EQ.MASTER) CALL MUMPS_SECDEB(TIME) 1580C 1581 IF ( KEEP( 55 ) .eq. 0 ) THEN 1582C ---------------------------- 1583C Original matrix is assembled 1584C Arrowhead format to be used. 1585C ---------------------------- 1586C KEEP8(26) and KEEP8(27) hold the number of entries for real/integer 1587C for the matrix in arrowhead format. They have been set by the 1588C analysis phase (DMUMPS_ANA_F and DMUMPS_ANA_G) 1589C 1590C ------------------------------------------------------------------ 1591C Blocking is used for sending arrowhead records (I,J,VAL) 1592C buffer(1) is used to store number of bytes already packed 1593C buffer(2) number of records already packed 1594C KEEP(39) : Number of records (blocking factor) 1595C ------------------------------------------------------------------ 1596C 1597C ---------------------------------------- 1598C In case of parallel root compute minimum 1599C size of workspace to receive arrowheads 1600C of root node. Will be used to check that 1601C MAXS is large enough 1602C ---------------------------------------- 1603 IF (KEEP(38).NE.0 .AND. I_AM_SLAVE) THEN 1604 LWK = int(numroc( id%root%ROOT_SIZE, id%root%MBLOCK, 1605 & id%root%MYROW, 0, id%root%NPROW ),8) 1606 LWK = max( 1_8, LWK ) 1607 LWK = LWK* 1608 & int(numroc( id%root%ROOT_SIZE, id%root%NBLOCK, 1609 & id%root%MYCOL, 0, id%root%NPCOL ),8) 1610 LWK = max( 1_8, LWK ) 1611 ELSE 1612 LWK = 1_8 1613 ENDIF 1614C MAXS must be at least 1, and in case of 1615C parallel root, large enough to receive 1616C arrowheads of root. 1617 IF (MAXS .LT. int(LWK,8)) THEN 1618 id%INFO(1) = -9 1619 CALL MUMPS_SET_IERROR(LWK, id%INFO(2)) 1620 ENDIF 1621 CALL MUMPS_PROPINFO( id%ICNTL(1), id%INFO(1), 1622 & id%COMM, id%MYID ) 1623 IF ( id%INFO(1).LT.0 ) GOTO 500 1624 IF ( KEEP(54) .eq. 0 ) THEN 1625C ================================================ 1626C FIRST CASE : MATRIX IS NOT INITIALLY DISTRIBUTED 1627C ================================================ 1628C A small integer workspace is needed to 1629C send the arrowheads. 1630 IF ( id%MYID .eq. MASTER ) THEN 1631 ALLOCATE(IWK(id%N), stat=allocok) 1632 IF ( allocok .NE. 0 ) THEN 1633 id%INFO(1)=-13 1634 id%INFO(2)=id%N 1635 END IF 1636#if defined(LARGEMATRICES) 1637 IF ( associated (id%S) ) THEN 1638 DEALLOCATE(id%S) 1639 NULLIFY(id%S) 1640 id%KEEP8(23)=0_8 1641 ENDIF 1642 ALLOCATE (WK(LWK),stat=IERR) 1643 IF ( IERR .GT. 0 ) THEN 1644 id%INFO(1) = -13 1645 CALL MUMPS_SET_IERROR(LWK, id%INFO(2)) 1646 write(6,*) ' PB1 ALLOC LARGEMAT' 1647 ENDIF 1648#endif 1649 ENDIF 1650 CALL MUMPS_PROPINFO( id%ICNTL(1), id%INFO(1), 1651 & id%COMM, id%MYID ) 1652 IF ( id%INFO(1).LT.0 ) GOTO 500 1653 IF ( id%MYID .eq. MASTER ) THEN 1654C 1655C -------------------------------- 1656C MASTER sends arowheads using the 1657C global communicator with ranks 1658C also in global communicator 1659C IWK is used as temporary 1660C workspace of size N. 1661C -------------------------------- 1662 IF ( .not. associated( id%INTARR ) ) THEN 1663 ALLOCATE( id%INTARR( 1 ) ) 1664 ENDIF 1665 NBRECORDS = KEEP(39) 1666 IF (id%KEEP8(28) .LT. int(NBRECORDS,8)) THEN 1667 NBRECORDS = int(id%KEEP8(28)) 1668 ENDIF 1669#if defined(LARGEMATRICES) 1670 CALL DMUMPS_FACTO_SEND_ARROWHEADS(id%N, id%KEEP8(28), id%A(1), 1671 & id%IRN(1), id%JCN(1), id%SYM_PERM(1), 1672 & LSCAL, id%COLSCA(1), id%ROWSCA(1), 1673 & id%MYID, id%NSLAVES, id%PROCNODE_STEPS(1), 1674 & NBRECORDS, 1675 & LP, id%COMM, id%root, KEEP,id%KEEP8, 1676 & id%FILS(1), IWK(1), ! workspace of size N 1677 & 1678 & id%INTARR(1), id%KEEP8(27), id%DBLARR(1), id%KEEP8(26), 1679 & id%PTRAR(1), id%PTRAR(id%N+1), 1680 & id%FRERE_STEPS(1), id%STEP(1), WK(1), LWK, 1681 & id%ISTEP_TO_INIV2, id%I_AM_CAND, 1682 & id%CANDIDATES) 1683C 1684C write(6,*) '!!! A,IRN,JCN are freed during factorization ' 1685 DEALLOCATE (id%A) 1686 NULLIFY(id%A) 1687 DEALLOCATE (id%IRN) 1688 NULLIFY (id%IRN) 1689 DEALLOCATE (id%JCN) 1690 NULLIFY (id%JCN) 1691 IF (.NOT.WK_USER_PROVIDED) THEN 1692 ALLOCATE (id%S(MAXS),stat=IERR) 1693 id%KEEP8(23) = MAXS 1694 IF ( IERR .GT. 0 ) THEN 1695 id%INFO(1) = -13 1696 id%INFO(2) = MAXS 1697 NULLIFY(id%S) 1698 id%KEEP8(23)=0_8 1699 write(6,*) ' PB2 ALLOC LARGEMAT',MAXS 1700 CALL MUMPS_ABORT() 1701 ENDIF 1702 ELSE 1703 id%S => id%WK_USER(1:id%KEEP8(24)) 1704 ENDIF 1705 id%S(MAXS-LWK+1_8:MAXS) = WK(1_8:LWK) 1706 DEALLOCATE (WK) 1707#else 1708 CALL DMUMPS_FACTO_SEND_ARROWHEADS(id%N, id%KEEP8(28), id%A(1), 1709 & id%IRN(1), id%JCN(1), id%SYM_PERM(1), 1710 & LSCAL, id%COLSCA(1), id%ROWSCA(1), 1711 & id%MYID, id%NSLAVES, id%PROCNODE_STEPS(1), 1712 & NBRECORDS, 1713 & LP, id%COMM, id%root, KEEP(1),id%KEEP8(1), 1714 & id%FILS(1), IWK(1), 1715 & 1716 & id%INTARR(1), id%KEEP8(27), id%DBLARR(1), id%KEEP8(26), 1717 & id%PTRAR(1), id%PTRAR(id%N+1), 1718 & id%FRERE_STEPS(1), id%STEP(1), id%S(1), MAXS, 1719 & id%ISTEP_TO_INIV2(1), id%I_AM_CAND(1), 1720 & id%CANDIDATES(1,1) ) 1721#endif 1722 DEALLOCATE(IWK) 1723 ELSE 1724 NBRECORDS = KEEP(39) 1725 IF (id%KEEP8(28) .LT. int(NBRECORDS,8)) THEN 1726 NBRECORDS = int(id%KEEP8(28)) 1727 ENDIF 1728 CALL DMUMPS_FACTO_RECV_ARROWHD2( id%N, 1729 & id%DBLARR(1), id%KEEP8(26), 1730 & id%INTARR(1), id%KEEP8(27), 1731 & id%PTRAR( 1 ), 1732 & id%PTRAR(id%N+1), 1733 & KEEP( 1 ), id%KEEP8(1), id%MYID, id%COMM, 1734 & NBRECORDS, 1735 & 1736 & id%S(1), MAXS, 1737 & id%root, 1738 & id%PROCNODE_STEPS(1), id%NSLAVES, 1739 & id%SYM_PERM(1), id%FRERE_STEPS(1), id%STEP(1), 1740 & id%INFO(1), id%INFO(2) ) 1741 ENDIF 1742 ELSE 1743C 1744C ============================================= 1745C SECOND CASE : MATRIX IS INITIALLY DISTRIBUTED 1746C ============================================= 1747C Timing on master. 1748 IF (id%MYID.EQ.MASTER) THEN 1749 CALL MUMPS_SECDEB(TIME) 1750 END IF 1751 IF ( I_AM_SLAVE ) THEN 1752C --------------------------------------------------- 1753C In order to have possibly IRN_loc/JCN_loc/A_loc 1754C of size 0, avoid to pass them inside REDISTRIBUTION 1755C and pass id instead 1756C NZ_locMAX8 gives as a maximum buffer size (send/recv) used 1757C an upper bound to limit buffers on small matrices 1758C --------------------------------------------------- 1759 CALL MPI_ALLREDUCE(id%KEEP8(29), NZ_locMAX8, 1, MPI_INTEGER8, 1760 & MPI_MAX, id%COMM_NODES, IERR) 1761 NBRECORDS = KEEP(39) 1762 IF (NZ_locMAX8 .LT. int(NBRECORDS,8)) THEN 1763 NBRECORDS = int(NZ_locMAX8) 1764 ENDIF 1765 CALL DMUMPS_REDISTRIBUTION( id%N, 1766 & id%KEEP8(29), 1767 & id, 1768 & id%DBLARR(1), id%KEEP8(26), id%INTARR(1), 1769 & id%KEEP8(27), id%PTRAR(1), id%PTRAR(id%N+1), 1770 & KEEP(1), id%KEEP8(1), id%MYID_NODES, 1771 & id%COMM_NODES, NBRECORDS, 1772 & id%S(1), MAXS, id%root, id%PROCNODE_STEPS(1), 1773 & id%NSLAVES, id%SYM_PERM(1), id%STEP(1), 1774 & id%ICNTL(1), id%INFO(1), NSEND8, NLOCAL8, 1775 & id%ISTEP_TO_INIV2(1), 1776 & id%CANDIDATES(1,1) ) 1777 IF ( ( KEEP(52).EQ.7 ).OR. (KEEP(52).EQ.8) ) THEN 1778C ------------------------------------------------- 1779C In that case, scaling arrays have been allocated 1780C on all processors. They were useful for matrix 1781C distribution. But we now really only need them 1782C on the host. In case of distributed solution, we 1783C will have to broadcast either ROWSCA or COLSCA 1784C (depending on MTYPE) but this is done later. 1785C 1786C In other words, on exit from the factorization, 1787C we want to have scaling arrays available only 1788C on the host. 1789C ------------------------------------------------- 1790 IF ( id%MYID > 0 ) THEN 1791 IF (associated(id%ROWSCA)) THEN 1792 DEALLOCATE(id%ROWSCA) 1793 NULLIFY(id%ROWSCA) 1794 ENDIF 1795 IF (associated(id%COLSCA)) THEN 1796 DEALLOCATE(id%COLSCA) 1797 NULLIFY(id%COLSCA) 1798 ENDIF 1799 ENDIF 1800 ENDIF 1801#if defined(LARGEMATRICES) 1802C deallocate id%IRN_loc, id%JCN(loc) to free extra space 1803C Note that in this case IRN_loc cannot be used 1804C anymore during the solve phase for IR and Error analysis. 1805 IF (associated(id%IRN_loc)) THEN 1806 DEALLOCATE(id%IRN_loc) 1807 NULLIFY(id%IRN_loc) 1808 ENDIF 1809 IF (associated(id%JCN_loc)) THEN 1810 DEALLOCATE(id%JCN_loc) 1811 NULLIFY(id%JCN_loc) 1812 ENDIF 1813 IF (associated(id%A_loc)) THEN 1814 DEALLOCATE(id%A_loc) 1815 NULLIFY(id%A_loc) 1816 ENDIF 1817 write(6,*) ' Warning :', 1818 & ' id%A_loc, IRN_loc, JCN_loc deallocated !!! ' 1819#endif 1820 IF (PROK) THEN 1821 WRITE(MP,120) NLOCAL8, NSEND8 1822 END IF 1823 END IF 1824 IF ( KEEP(46) .eq. 0 .AND. id%MYID.eq.MASTER ) THEN 1825C ------------------------------ 1826C The host is not working -> had 1827C no data from initial matrix 1828C ------------------------------ 1829 NSEND8 = 0_8 1830 NLOCAL8 = 0_8 1831 END IF 1832C -------------------------- 1833C Put into some info/infog ? 1834C -------------------------- 1835 CALL MPI_REDUCE( NSEND8, NSEND_TOT8, 1, MPI_INTEGER8, 1836 & MPI_SUM, MASTER, id%COMM, IERR ) 1837 CALL MPI_REDUCE( NLOCAL8, NLOCAL_TOT8, 1, MPI_INTEGER8, 1838 & MPI_SUM, MASTER, id%COMM, IERR ) 1839 IF ( PROKG ) THEN 1840 WRITE(MPG,125) NLOCAL_TOT8, NSEND_TOT8 1841 END IF 1842C 1843C ------------------------- 1844C Check for possible errors 1845C ------------------------- 1846 CALL MUMPS_PROPINFO( ICNTL(1), id%INFO(1), 1847 & id%COMM, id%MYID ) 1848 IF ( id%INFO( 1 ) .LT. 0 ) GOTO 500 1849C 1850 ENDIF 1851 ELSE 1852C ------------------- 1853C Matrix is elemental, 1854C provided on the 1855C master only 1856C ------------------- 1857 IF ( id%MYID.eq.MASTER) 1858 & CALL DMUMPS_MAXELT_SIZE( id%ELTPTR(1), 1859 & id%NELT, 1860 & MAXELT_SIZE ) 1861C 1862C Perform the distribution of the elements. 1863C A this point, 1864C PTRAIW/PTRARW have been computed. 1865C INTARR/DBLARR have been allocated 1866C ELTPROC gives the mapping of elements 1867C 1868 CALL DMUMPS_ELT_DISTRIB( id%N, id%NELT, id%KEEP8(30), 1869 & id%COMM, id%MYID, 1870 & id%NSLAVES, id%PTRAR(1), 1871 & id%PTRAR(id%NELT+2), 1872 & id%INTARR(1), id%DBLARR(1), id%KEEP8(27), id%KEEP8(26), 1873 & id%KEEP(1), id%KEEP8(1), MAXELT_SIZE, 1874 & id%FRTPTR(1), id%FRTELT(1), 1875 & id%S(1), MAXS, id%FILS(1), 1876 & id, id%root ) 1877C ---------------- 1878C Broadcast errors 1879C ---------------- 1880 CALL MUMPS_PROPINFO( ICNTL(1), id%INFO(1), 1881 & id%COMM, id%MYID ) 1882 IF ( id%INFO( 1 ) .LT. 0 ) GOTO 500 1883 END IF ! Element entry 1884C ------------------------ 1885C Time the redistribution: 1886C ------------------------ 1887 IF ( id%MYID.EQ.MASTER) THEN 1888 CALL MUMPS_SECFIN(TIME) 1889 id%DKEEP(93) = TIME 1890 IF (PROKG) WRITE(MPG,160) TIME 1891 END IF 1892C 1893C TIMINGS: 1894C Next line: elapsed time for factorization 1895 IF (id%MYID.EQ.MASTER) CALL MUMPS_SECDEB(TIME) 1896C 1897C Allocate buffers on the slaves 1898C ============================== 1899C 1900 IF ( I_AM_SLAVE ) THEN 1901 CALL DMUMPS_BUF_INI_MYID(id%MYID_NODES) 1902C 1903C Some buffers are required to pack/unpack data and for 1904C receiving MPI messages. 1905C For packing/unpacking : the buffer must be large 1906C enough to send several messages while receives might not 1907C be posted yet. 1908C It is assumed that the size of an integer is held in KEEP(34) 1909C while the size of a complex is held in KEEP(35). 1910C BUFR and LBUFR are declared integers, since byte is not 1911C a standard datatype. 1912C We now use KEEP(43) and KEEP(44) as estimated at analysis 1913C to allocate appropriate buffer sizes. 1914C 1915C Reception buffer 1916C ---------------- 1917 DMUMPS_LBUFR_BYTES8 = int(KEEP( 44 ),8) * int(KEEP( 35 ), 8) 1918C ------------------- 1919C Ensure a reasonable 1920C buffer size 1921C ------------------- 1922 DMUMPS_LBUFR_BYTES8 = max( DMUMPS_LBUFR_BYTES8, 1923 & 100000_8 ) 1924C 1925C If there is pivoting, size of the message might still increase. 1926C We use a relaxation (so called PERLU) to increase the estimate. 1927C 1928C Note: PERLU is a global estimate for pivoting. 1929C It may happen that one large contribution block size is increased by more than that. 1930C This is why we use an extra factor 2 relaxation coefficient for the relaxation of 1931C the reception buffer in the case where pivoting is allowed. 1932C A more dynamic strategy could be applied: if message to 1933C be received is larger than expected, reallocate a larger 1934C buffer. (But this won't work with IRECV.) 1935C Finally, one may want (as we are currently doing it for moste messages) 1936C to cut large messages into a series of smaller ones. 1937C 1938 PERLU = KEEP( 12 ) 1939C For hybrid scheduling (strategy 5), Abdou 1940C wants a minimal amount of freedom even for 1941C small/negative PERLU values. 1942C 1943 IF (KEEP(48).EQ.5) THEN 1944 MIN_PERLU=2 1945 ELSE 1946 MIN_PERLU=0 1947 ENDIF 1948C 1949 DMUMPS_LBUFR_BYTES8 = DMUMPS_LBUFR_BYTES8 1950 & + int( 2.0D0 * dble(max(PERLU,MIN_PERLU))* 1951 & dble(DMUMPS_LBUFR_BYTES8)/100D0, 8) 1952 DMUMPS_LBUFR_BYTES8 = min(DMUMPS_LBUFR_BYTES8, 1953 & int(huge (KEEP(43))-100,8)) 1954 DMUMPS_LBUFR_BYTES = int( DMUMPS_LBUFR_BYTES8 ) 1955 IF (KEEP(48)==5) THEN 1956C Since the buffer is allocated, use 1957C it as the constraint for memory/granularity 1958C in hybrid scheduler 1959C 1960 id%KEEP8(21) = id%KEEP8(22) + 1961 & int( dble(max(PERLU,MIN_PERLU))* 1962 & dble(id%KEEP8(22))/100D0,8) 1963 ENDIF 1964C 1965C Now estimate the size for the buffer for asynchronous 1966C sends of contribution blocks (so called CB). We want to be able to send at 1967C least KEEP(213)/100 (two in general) messages at the 1968C same time. 1969C 1970C Send buffer 1971C ----------- 1972 DMUMPS_LBUF8 = int( dble(KEEP(213)) / 100.0D0 * 1973 & dble(KEEP(43)) * dble(KEEP(35)), 8 ) 1974 DMUMPS_LBUF8 = max( DMUMPS_LBUF8, 100000_8 ) 1975 DMUMPS_LBUF8 = DMUMPS_LBUF8 1976 & + int( 2.0D0 * dble(max(PERLU,MIN_PERLU))* 1977 & dble(DMUMPS_LBUF8)/100D0, 8) 1978C Make DMUMPS_LBUF8 small enough to be stored in a standard integer 1979 DMUMPS_LBUF8 = min(DMUMPS_LBUF8, int(huge (KEEP(43))-100,8)) 1980C 1981C No reason to have send buffer smaller than receive buffer. 1982C This should never occur with the formulas above but just 1983C in case: 1984 DMUMPS_LBUF8 = max(DMUMPS_LBUF8, DMUMPS_LBUFR_BYTES8+3*KEEP(34)) 1985 DMUMPS_LBUF = int(DMUMPS_LBUF8) 1986 IF(id%KEEP(48).EQ.4)THEN 1987 DMUMPS_LBUFR_BYTES=DMUMPS_LBUFR_BYTES*5 1988 DMUMPS_LBUF=DMUMPS_LBUF*5 1989 ENDIF 1990C 1991C Estimate size of buffer for small messages 1992C Each node can send ( NSLAVES - 1 ) messages to (NSLAVES-1) nodes 1993C 1994C KEEP(56) is the number of nodes of level II. 1995C Messages will be sent for the symmetric case 1996C for synchronisation issues. 1997C 1998C We take an upperbound 1999C 2000 DMUMPS_LBUF_INT = ( KEEP(56) + id%NSLAVES * id%NSLAVES ) * 5 2001 & * KEEP(34) 2002 IF ( KEEP( 38 ) .NE. 0 ) THEN 2003C 2004C 2005 KKKK = MUMPS_PROCNODE( id%PROCNODE_STEPS(id%STEP(KEEP(38))), 2006 & id%NSLAVES ) 2007 IF ( KKKK .EQ. id%MYID_NODES ) THEN 2008 DMUMPS_LBUF_INT = DMUMPS_LBUF_INT + 4 * KEEP(34) * 2009 & ( id%NSLAVES + id%NE_STEPS(id%STEP(KEEP(38))) 2010 & + min(KEEP(56), id%NE_STEPS(id%STEP(KEEP(38)))) * id%NSLAVES 2011 & ) 2012 END IF 2013 END IF 2014 IF ( PROK ) THEN 2015 WRITE( MP, 9999 ) DMUMPS_LBUFR_BYTES, 2016 & DMUMPS_LBUF, DMUMPS_LBUF_INT 2017 END IF 2018 9999 FORMAT( /,' Allocated buffers',/,' ------------------',/, 2019 & ' Size of reception buffer in bytes ...... = ', I10, 2020 & /, 2021 & ' Size of async. emission buffer (bytes).. = ', I10,/, 2022 & ' Small emission buffer (bytes) .......... = ', I10) 2023C --------------------------- 2024C Allocate the 2 send buffers 2025C --------------------------- 2026 CALL DMUMPS_BUF_ALLOC_SMALL_BUF( DMUMPS_LBUF_INT, IERR ) 2027 IF ( IERR .NE. 0 ) THEN 2028 id%INFO(1)= -13 2029C convert to size in integer id%INFO(2)= DMUMPS_LBUF_INT 2030 id%INFO(2)= (DMUMPS_LBUF_INT+KEEP(34)-1)/KEEP(34) 2031 IF (LPOK) THEN 2032 WRITE(LP,*) id%MYID, 2033 & ':Allocation error in DMUMPS_BUF_ALLOC_SMALL_BUF' 2034 & ,id%INFO(2) 2035 ENDIF 2036 GO TO 110 2037 END IF 2038 CALL DMUMPS_BUF_ALLOC_CB( DMUMPS_LBUF, IERR ) 2039 IF ( IERR .NE. 0 ) THEN 2040 id%INFO(1)= -13 2041C convert to size in integer id%INFO(2)= DMUMPS_LBUF 2042 id%INFO(2)= (DMUMPS_LBUF+KEEP(34)-1)/KEEP(34) 2043 IF (LPOK) THEN 2044 WRITE(LP,*) id%MYID, 2045 & ': Allocation error in DMUMPS_BUF_ALLOC_CB' 2046 & ,id%INFO(2) 2047 ENDIF 2048 GO TO 110 2049 END IF 2050C ----------------------------- 2051C Allocate reception buffer and 2052C keep it in the structure 2053C ----------------------------- 2054 id%LBUFR_BYTES = DMUMPS_LBUFR_BYTES 2055 id%LBUFR = (DMUMPS_LBUFR_BYTES+KEEP(34)-1)/KEEP(34) 2056 IF (associated(id%BUFR)) DEALLOCATE(id%BUFR) 2057 ALLOCATE( id%BUFR( id%LBUFR ),stat=IERR ) 2058 IF ( IERR .NE. 0 ) THEN 2059 id%INFO(1)=-13 2060 id%INFO(2)=id%LBUFR 2061 NULLIFY(id%BUFR) 2062 IF (LPOK) THEN 2063 WRITE(LP,*) id%MYID, 2064 & ': Allocation error for id%BFUR(', id%LBUFR,')', IERR 2065 ENDIF 2066 GO TO 110 2067 END IF 2068C 2069C The buffers are declared INTEGER, because BYTE is not a 2070C standard data type. The sizes are in bytes, so we allocate 2071C a number of INTEGERs. The allocated size in integer is the 2072C size in bytes divided by KEEP(34) 2073C ------------------------------- 2074C Allocate IS. IS will contain 2075C factors and contribution blocks 2076C ------------------------------- 2077C Relax workspace at facto now 2078C PERLU might have been modified reload initial value 2079 PERLU = KEEP( 12 ) 2080 IF (KEEP(201).GT.0) THEN 2081C OOC panel or non panel (note that 2082C KEEP(15)=KEEP(225) if non panel) 2083 MAXIS_ESTIM = KEEP(225) 2084 ELSE 2085C In-core or reals for factors not stored 2086 MAXIS_ESTIM = KEEP(15) 2087 ENDIF 2088 MAXIS = max( 1, 2089 & MAXIS_ESTIM + 2 * max(PERLU,10) * 2090 & ( MAXIS_ESTIM / 100 + 1 ) 2091 & ) 2092 IF (associated(id%IS)) DEALLOCATE( id%IS ) 2093 ALLOCATE( id%IS( MAXIS ), stat = IERR ) 2094 IF ( IERR .NE. 0 ) THEN 2095 id%INFO(1)=-13 2096 id%INFO(2)=MAXIS 2097 NULLIFY(id%IS) 2098 IF (LPOK) THEN 2099 WRITE(*,*) id%MYID,': Allocation error for id%IS(',MAXIS,')' 2100 ENDIF 2101 GO TO 110 2102 END IF 2103 LIW = MAXIS 2104C ----------------------- 2105C Allocate PTLUST_S. PTLUST_S 2106C is used by solve later 2107C ----------------------- 2108 IF (associated( id%PTLUST_S )) DEALLOCATE(id%PTLUST_S) 2109 ALLOCATE( id%PTLUST_S( id%KEEP(28) ), stat = IERR ) 2110 IF ( IERR .NE. 0 ) THEN 2111 id%INFO(1)=-13 2112 id%INFO(2)=id%KEEP(28) 2113 IF (LPOK) THEN 2114 WRITE(LP,*) id%MYID, 2115 & ': Allocation error for id%PTLUST_S(', id%KEEP(28),')' 2116 ENDIF 2117 NULLIFY(id%PTLUST_S) 2118 GOTO 100 2119 END IF 2120 IF (associated( id%PTRFAC )) DEALLOCATE(id%PTRFAC) 2121 ALLOCATE( id%PTRFAC( id%KEEP(28) ), stat = IERR ) 2122 IF ( IERR .NE. 0 ) THEN 2123 id%INFO(1)=-13 2124 id%INFO(2)=id%KEEP(28) 2125 NULLIFY(id%PTRFAC) 2126 IF (LPOK) THEN 2127 WRITE(LP,*) id%MYID, 2128 & ': Allocation error for id%PTRFAC(', id%KEEP(28),')' 2129 ENDIF 2130 GOTO 100 2131 END IF 2132C ----------------------------- 2133C Reserve temporary workspace : 2134C IPOOL, PTRWB, ITLOC, PTRIST 2135C PTRWB will be subdivided again 2136C in routine DMUMPS_FAC_B 2137C ----------------------------- 2138 PTRIST = 1 2139 PTRWB = PTRIST + id%KEEP(28) 2140 ITLOC = PTRWB + 3 * id%KEEP(28) 2141C Fwd in facto: ITLOC of size id%N + id%KEEP(253) 2142 IPOOL = ITLOC + id%N + id%KEEP(253) 2143C 2144C -------------------------------- 2145C NA(1) is an upperbound for LPOOL 2146C -------------------------------- 2147C Structure of the pool: 2148C ____________________________________________________ 2149C | Subtrees | | Top nodes | 1 2 3 | 2150C ---------------------------------------------------- 2151 LPOOL = MUMPS_GET_POOL_LENGTH(id%NA(1), id%KEEP(1),id%KEEP8(1)) 2152 ALLOCATE( IWK( IPOOL + LPOOL - 1 ), stat = IERR ) 2153 IF ( IERR .NE. 0 ) THEN 2154 id%INFO(1)=-13 2155 id%INFO(2)=IPOOL + LPOOL - 1 2156 IF (LPOK) THEN 2157 WRITE(LP,*) id%MYID, 2158 & ': Allocation error for IWK(',IPOOL+LPOOL-1,')' 2159 ENDIF 2160 GOTO 110 2161 END IF 2162 ALLOCATE(IWK8( 2 * id%KEEP(28)), stat = IERR) 2163 IF ( IERR .NE. 0 ) THEN 2164 id%INFO(1)=-13 2165 id%INFO(2)=2 * id%KEEP(28) 2166 IF (LPOK) THEN 2167 WRITE(LP,*) id%MYID, 2168 & ': Allocation error for IWKB(', 2*id%KEEP(28),')' 2169 ENDIF 2170 GOTO 110 2171 END IF 2172C 2173C Return to SPMD 2174C 2175 ENDIF 2176C 2177 110 CONTINUE 2178C ---------------- 2179C Broadcast errors 2180C ---------------- 2181 CALL MUMPS_PROPINFO( ICNTL(1), id%INFO(1), 2182 & id%COMM, id%MYID ) 2183 IF ( id%INFO( 1 ) .LT. 0 ) GOTO 500 2184C 2185 IF ( I_AM_SLAVE ) THEN 2186C 2187C Store size of receive buffers in module 2188 CALL DMUMPS_BUF_DIST_IRECV_SIZE( id%LBUFR_BYTES ) 2189 IF (PROK) THEN 2190 WRITE( MP, 170 ) MAXS, MAXIS, id%KEEP8(12), KEEP(15), 2191 & id%KEEP8(26), id%KEEP8(27), id%KEEP8(11), KEEP(26), KEEP(27) 2192 ENDIF 2193 END IF 2194C 2195C SPMD 2196C 2197 PERLU_ON = .TRUE. 2198 CALL DMUMPS_MAX_MEM( id%KEEP(1), id%KEEP8(1), 2199 & id%MYID, id%N, id%NELT, id%NA(1), id%LNA, id%KEEP8(28), 2200 & id%KEEP8(30), 2201 & id%NSLAVES, TOTAL_MBYTES, .FALSE., id%KEEP(201), 2202 & PERLU_ON, TOTAL_BYTES) 2203 id%INFO(16) = TOTAL_MBYTES 2204 IF ( PROK ) THEN 2205 WRITE(MP,'(A,I10) ') 2206 & ' ** Space in MBYTES used during factorization :', 2207 & id%INFO(16) 2208 END IF 2209C 2210C ---------------------------------------------------- 2211C Centralize memory statistics on the host 2212C id%INFOG(18) = size of mem in bytes for facto, 2213C for the processor using largest memory 2214C id%INFOG(19) = size of mem in bytes for facto, 2215C sum over all processors 2216C ---------------------------------------------------- 2217C 2218 CALL MUMPS_MEM_CENTRALIZE( id%MYID, id%COMM, 2219 & id%INFO(16), id%INFOG(18), IRANK ) 2220 IF ( PROKG ) THEN 2221 WRITE( MPG,'(A,I10) ') 2222 & ' ** Memory relaxation parameter ( ICNTL(14) ) :', 2223 & KEEP(12) 2224 WRITE( MPG,'(A,I10) ') 2225 & ' ** Rank of processor needing largest memory in facto :', 2226 & IRANK 2227 WRITE( MPG,'(A,I10) ') 2228 & ' ** Space in MBYTES used by this processor for facto :', 2229 & id%INFOG(18) 2230 IF ( KEEP(46) .eq. 0 ) THEN 2231 WRITE( MPG,'(A,I10) ') 2232 & ' ** Avg. Space in MBYTES per working proc during facto :', 2233 & ( id%INFOG(19)-id%INFO(16) ) / id%NSLAVES 2234 ELSE 2235 WRITE( MPG,'(A,I10) ') 2236 & ' ** Avg. Space in MBYTES per working proc during facto :', 2237 & id%INFOG(19) / id%NSLAVES 2238 END IF 2239 END IF 2240C -------------------------------------------- 2241C Before calling the main driver, DMUMPS_FAC_B, 2242C some statistics should be initialized to 0, 2243C even on the host node because they will be 2244C used in REDUCE operations afterwards. 2245C -------------------------------------------- 2246C Size of factors written. It will be set to POSFAC in 2247C IC, otherwise we accumulate written factors in it. 2248 id%KEEP8(31)= 0_8 2249C Number of entries in factors 2250 id%KEEP8(10) = 0_8 2251C KEEP8(8) will hold the volume of extra copies due to 2252C in-place stacking in fac_mem_stack.F 2253 id%KEEP8(8)=0_8 2254 id%INFO(9:14)=0 2255 RINFO(2:3)=ZERO 2256 IF ( I_AM_SLAVE ) THEN 2257C ------------------------------------ 2258C Call effective factorization routine 2259C ------------------------------------ 2260 IF ( KEEP(55) .eq. 0 ) THEN 2261 LDPTRAR = id%N 2262 ELSE 2263 LDPTRAR = id%NELT + 1 2264 END IF 2265 IF ( id%KEEP(55) .NE. 0 ) THEN 2266 NELT_arg = id%NELT 2267 ELSE 2268C ------------------------------ 2269C Use size 1 to avoid complaints 2270C when using check bound options 2271C ------------------------------ 2272 NELT_arg = 1 2273 END IF 2274 CALL DMUMPS_FAC_B( id%N, NSTEPS,id%S(1),MAXS,id%IS(1),LIW, 2275 & id%SYM_PERM(1),id%NA(1),id%LNA,id%NE_STEPS(1), 2276 & id%ND_STEPS(1),id%FILS(1),id%STEP(1),id%FRERE_STEPS(1), 2277 & id%DAD_STEPS(1),id%CANDIDATES(1,1),id%ISTEP_TO_INIV2(1), 2278 & id%TAB_POS_IN_PERE(1,1), 2279 & id%PTRAR(1), 2280 & LDPTRAR,IWK(PTRIST), 2281 & id%PTLUST_S(1), id%PTRFAC(1), IWK(PTRWB), IWK8, IWK(ITLOC), 2282 & RHS_MUMPS(1), IWK(IPOOL), LPOOL, CNTL1, ICNTL(1), id%INFO(1), 2283 & RINFO(1),KEEP(1),id%KEEP8(1), id%PROCNODE_STEPS(1), 2284 & id%NSLAVES, 2285 & id%COMM_NODES, id%MYID, id%MYID_NODES, id%BUFR(1),id%LBUFR, 2286 & id%LBUFR_BYTES, id%INTARR(1),id%DBLARR(1), id%root, NELT_arg, 2287 & id%FRTPTR(1), id%FRTELT(1),id%COMM_LOAD, id%ASS_IRECV,SEUIL, 2288 & SEUIL_LDLT_NIV2, id%MEM_DIST(0), id%DKEEP(1), 2289 & id%PIVNUL_LIST(1),LPN_LIST 2290 & ,id%LRGROUPS(1) 2291 & ) 2292 IF ( PROK .and. KEEP(38) .ne. 0 ) THEN 2293 WRITE( MP, 175 ) KEEP(49) 2294 END IF 2295C 2296C ------------------------------ 2297C Deallocate temporary workspace 2298C ------------------------------ 2299 DEALLOCATE( IWK ) 2300 DEALLOCATE( IWK8 ) 2301 ENDIF 2302C --------------------------------- 2303C Free some workspace corresponding 2304C to the original matrix in 2305C arrowhead or elemental format. 2306C ----- 2307C Note : INTARR was not allocated 2308C during factorization in the case 2309C of an assembled matrix. 2310C --------------------------------- 2311 IF ( KEEP(55) .eq. 0 ) THEN 2312C 2313C ---------------- 2314C Assembled matrix 2315C ---------------- 2316 IF (associated( id%DBLARR)) THEN 2317 DEALLOCATE(id%DBLARR) 2318 NULLIFY(id%DBLARR) 2319 ENDIF 2320C 2321 ELSE 2322C 2323C ---------------- 2324C Elemental matrix 2325C ---------------- 2326 DEALLOCATE( id%INTARR) 2327 NULLIFY( id%INTARR ) 2328C ------------------------------------ 2329C For the master from an hybrid host 2330C execution without scaling, then real 2331C values have not been copied ! 2332C ------------------------------------- 2333 IF ( id%MYID_NODES .eq. MASTER 2334 & .AND. KEEP(46) .eq. 1 2335 & .AND. KEEP(52) .eq. 0 ) THEN 2336 NULLIFY( id%DBLARR ) 2337 ELSE 2338C next line should be enough but ... 2339C DEALLOCATE( id%DBLARR ) 2340 IF (associated( id%DBLARR)) THEN 2341 DEALLOCATE(id%DBLARR) 2342 NULLIFY(id%DBLARR) 2343 ENDIF 2344 END IF 2345 END IF 2346C Memroy statistics 2347C ----------------------------------- 2348C If QR (Keep(19)) is not zero, and if 2349C the host does not have the information 2350C (ie is not slave), send information 2351C computed on the slaves during facto 2352C to the host. 2353C ----------------------------------- 2354 IF ( KEEP(19) .NE. 0 ) THEN 2355 IF ( KEEP(46) .NE. 1 ) THEN 2356C Host was not working during facto_root 2357C Send him the information 2358 IF ( id%MYID .eq. MASTER ) THEN 2359 CALL MPI_RECV( KEEP(17), 1, MPI_INTEGER, 1, DEFIC_TAG, 2360 & id%COMM, STATUS, IERR ) 2361 ELSE IF ( id%MYID .EQ. 1 ) THEN 2362 CALL MPI_SEND( KEEP(17), 1, MPI_INTEGER, 0, DEFIC_TAG, 2363 & id%COMM, IERR ) 2364 END IF 2365 END IF 2366 END IF 2367C --------------------------- 2368C Deallocate send buffers 2369C They will be reallocated 2370C in the solve. 2371C ------------------------ 2372 IF (associated(id%BUFR)) THEN 2373 DEALLOCATE(id%BUFR) 2374 NULLIFY(id%BUFR) 2375 END IF 2376 CALL DMUMPS_BUF_DEALL_CB( IERR ) 2377 CALL DMUMPS_BUF_DEALL_SMALL_BUF( IERR ) 2378C//PIV 2379 IF (KEEP(219).NE.0) THEN 2380 CALL DMUMPS_BUF_DEALL_MAX_ARRAY() 2381 ENDIF 2382C 2383C Check for errors, 2384C every slave is aware of an error. 2385C If master is included in computations, the call below should 2386C not be necessary. 2387 CALL MUMPS_PROPINFO( ICNTL(1), id%INFO(1), 2388 & id%COMM, id%MYID ) 2389C 2390 CALL DMUMPS_EXTRACT_SCHUR_REDRHS(id) 2391 IF (KEEP(201) .GT. 0) THEN 2392 IF ((KEEP(201).EQ.1) .OR. (KEEP(201).EQ.2)) THEN 2393 IF ( I_AM_SLAVE ) THEN 2394 CALL DMUMPS_OOC_CLEAN_PENDING(IERR) 2395 IF(IERR.LT.0)THEN 2396 id%INFO(1)=IERR 2397 id%INFO(2)=0 2398 ENDIF 2399 ENDIF 2400 CALL MUMPS_PROPINFO( id%ICNTL(1), id%INFO(1), 2401 & id%COMM, id%MYID ) 2402C We want to collect statistics even in case of 2403C error to understand if it is due to numerical 2404C issues 2405CC IF ( id%INFO(1) < 0 ) GOTO 500 2406 END IF 2407 END IF 2408 IF (id%MYID.EQ.MASTER) THEN 2409 CALL MUMPS_SECFIN(TIME) 2410 id%DKEEP(94)=TIME 2411 IF ( PROKG ) THEN 2412 IF (id%INFO(1) .GE.0) THEN 2413 WRITE(MPG,180) TIME 2414 ELSE 2415 WRITE(MPG,185) TIME 2416 ENDIF 2417 ENDIF 2418 ENDIF 2419CC Made available to users on release 4.4 (April 2005) 2420 PERLU_ON = .TRUE. 2421 CALL DMUMPS_MAX_MEM( id%KEEP(1),id%KEEP8(1), 2422 & id%MYID, N, id%NELT, id%NA(1), id%LNA, id%KEEP8(28), 2423 & id%KEEP8(30), 2424 & id%NSLAVES, TOTAL_MBYTES, .TRUE., id%KEEP(201), 2425 & PERLU_ON, TOTAL_BYTES) 2426 id%KEEP8(7) = TOTAL_BYTES 2427C -- INFO(22) holds the effective space (in Mbytes) used by MUMPS 2428C -- (it includes part of WK_USER used if provided by user) 2429 id%INFO(22) = TOTAL_MBYTES 2430 IF (PROK ) THEN 2431 WRITE(MP,'(A,I10) ') 2432 & ' ** Effective minimum Space in MBYTES for facto :', 2433 & TOTAL_MBYTES 2434 ENDIF 2435C 2436 IF (I_AM_SLAVE) THEN 2437 K67 = id%KEEP8(67) 2438 K68 = id%KEEP8(68) 2439 K69 = id%KEEP8(69) 2440 ELSE 2441 K67 = 0_8 2442 K68 = 0_8 2443 K69 = 0_8 2444 ENDIF 2445C -- Save the number of entries effectively used 2446C in main working array S 2447 CALL MUMPS_SETI8TOI4(K67,id%INFO(21)) 2448C 2449 CALL DMUMPS_AVGMAX_STAT8(PROKG, MPG, K67, id%NSLAVES, 2450 & id%COMM, "effective space used in S (KEEP8(67)) =") 2451C 2452C ---------------------------------------------------- 2453C Centralize memory statistics on the host 2454C 2455C INFOG(21) = size of mem (Mbytes) for facto, 2456C for the processor using largest memory 2457C INFOG(22) = size of mem (Mbytes) for facto, 2458C sum over all processors 2459C ---------------------------------------------------- 2460C 2461 CALL MUMPS_MEM_CENTRALIZE( id%MYID, id%COMM, 2462 & TOTAL_MBYTES, id%INFOG(21), IRANK ) 2463 IF ( PROKG ) THEN 2464 WRITE( MPG,'(A,I10) ') 2465 & ' ** EFF Min: Rank of processor needing largest memory :', 2466 & IRANK 2467 WRITE( MPG,'(A,I10) ') 2468 & ' ** EFF Min: Space in MBYTES used by this processor :', 2469 & id%INFOG(21) 2470 IF ( KEEP(46) .eq. 0 ) THEN 2471 WRITE( MPG,'(A,I10) ') 2472 & ' ** EFF Min: Avg. Space in MBYTES per working proc :', 2473 & ( id%INFOG(22)-TOTAL_MBYTES ) / id%NSLAVES 2474 ELSE 2475 WRITE( MPG,'(A,I10) ') 2476 & ' ** EFF Min: Avg. Space in MBYTES per working proc :', 2477 & id%INFOG(22) / id%NSLAVES 2478 END IF 2479 END IF 2480* save statistics in KEEP array. 2481 KEEP(33) = id%INFO(11) ! this should be the other way round 2482C 2483C Sum RINFO(2) : total number of flops for assemblies 2484C Sum RINFO(3) : total number of flops for eliminations 2485C 2486C Should work even if the master does some work 2487C 2488 CALL MPI_REDUCE( RINFO(2), RINFOG(2), 2, 2489 & MPI_DOUBLE_PRECISION, 2490 & MPI_SUM, MASTER, id%COMM, IERR) 2491C Reduce needed to dimension small working array 2492C on all procs during DMUMPS_GATHER_SOLUTION 2493 KEEP(247) = 0 2494 CALL MPI_REDUCE( KEEP(246), KEEP(247), 1, MPI_INTEGER, 2495 & MPI_MAX, MASTER, id%COMM, IERR) 2496C 2497C Reduce compression times: get max compression times 2498 CALL MPI_REDUCE( id%DKEEP(97), id%DKEEP(98), 1, 2499 & MPI_DOUBLE_PRECISION, 2500 & MPI_MAX, MASTER, id%COMM, IERR) 2501C 2502 CALL MPI_REDUCE( RINFO(2), RINFOG(2), 2, 2503 & MPI_DOUBLE_PRECISION, 2504 & MPI_SUM, MASTER, id%COMM, IERR) 2505 CALL MUMPS_REDUCEI8( id%KEEP8(31),id%KEEP8(6), MPI_SUM, 2506 & MASTER, id%COMM ) 2507 CALL MUMPS_SETI8TOI4(id%KEEP8(6), INFOG(9)) 2508 CALL MPI_REDUCE( id%INFO(10), INFOG(10), 1, MPI_INTEGER, 2509 & MPI_SUM, MASTER, id%COMM, IERR) 2510C Use MPI_MAX for this one to get largest front size 2511 CALL MPI_ALLREDUCE( id%INFO(11), INFOG(11), 1, MPI_INTEGER, 2512 & MPI_MAX, id%COMM, IERR) 2513C make maximum effective frontal size available on all procs 2514C for solve phase 2515C (Note that INFO(11) includes root size on root master) 2516 KEEP(133) = INFOG(11) 2517 CALL MPI_REDUCE( id%INFO(12), INFOG(12), 3, MPI_INTEGER, 2518 & MPI_SUM, MASTER, id%COMM, IERR) 2519 CALL MPI_REDUCE( KEEP(103), INFOG(25), 1, MPI_INTEGER, 2520 & MPI_SUM, MASTER, id%COMM, IERR) 2521 KEEP(229) = INFOG(25) 2522 CALL MPI_REDUCE( KEEP(105), INFOG(25), 1, MPI_INTEGER, 2523 & MPI_SUM, MASTER, id%COMM, IERR) 2524 KEEP(230) = INFOG(25) 2525C 2526 id%INFO(25) = KEEP(98) 2527 CALL MPI_ALLREDUCE( id%INFO(25), INFOG(25), 1, MPI_INTEGER, 2528 & MPI_SUM, id%COMM, IERR) 2529C Extra copies due to in-place stacking 2530 CALL MUMPS_REDUCEI8( id%KEEP8(8), id%KEEP8(108), MPI_SUM, 2531 & MASTER, id%COMM ) 2532C Entries in factors 2533 CALL MUMPS_SETI8TOI4(id%KEEP8(10), id%INFO(27)) 2534 CALL MUMPS_REDUCEI8( id%KEEP8(10),id%KEEP8(110), MPI_SUM, 2535 & MASTER, id%COMM ) 2536 CALL MUMPS_SETI8TOI4(id%KEEP8(110), INFOG(29)) 2537C ============================== 2538C LOW-RANK 2539C ============================== 2540 IF ( KEEP(486) .GT. 0 ) THEN !LR is activated 2541 CALL MPI_REDUCE( GLOBAL_BLR_SAVINGS, TMP_GLOBAL_BLR_SAVINGS 2542 & , 1, MPI_DOUBLE_PRECISION, 2543 & MPI_SUM, MASTER, id%COMM, IERR) 2544 CALL MPI_REDUCE( ACC_FR_MRY, TMP_ACC_FR_MRY 2545 & , 1, MPI_DOUBLE_PRECISION, 2546 & MPI_SUM, MASTER, id%COMM, IERR) 2547 CALL MPI_REDUCE( ACC_LR_FLOP_GAIN, TMP_ACC_LR_FLOP_GAIN 2548 & , 1, MPI_DOUBLE_PRECISION, 2549 & MPI_SUM, MASTER, id%COMM, IERR) 2550 CALL MPI_REDUCE( ACC_FLOP_FR_TRSM, TMP_ACC_FLOP_FR_TRSM 2551 & , 1, MPI_DOUBLE_PRECISION, 2552 & MPI_SUM, MASTER, id%COMM, IERR) 2553 CALL MPI_REDUCE( ACC_FLOP_LR_TRSM, TMP_ACC_FLOP_LR_TRSM 2554 & , 1, MPI_DOUBLE_PRECISION, 2555 & MPI_SUM, MASTER, id%COMM, IERR) 2556 CALL MPI_REDUCE( ACC_FLOP_FR_UPDT, TMP_ACC_FLOP_FR_UPDT 2557 & , 1, MPI_DOUBLE_PRECISION, 2558 & MPI_SUM, MASTER, id%COMM, IERR) 2559 CALL MPI_REDUCE( ACC_FLOP_LR_UPDT, TMP_ACC_FLOP_LR_UPDT 2560 & , 1, MPI_DOUBLE_PRECISION, 2561 & MPI_SUM, MASTER, id%COMM, IERR) 2562 CALL MPI_REDUCE( ACC_FLOP_RMB, TMP_ACC_FLOP_RMB 2563 & , 1, MPI_DOUBLE_PRECISION, 2564 & MPI_SUM, MASTER, id%COMM, IERR) 2565 CALL MPI_REDUCE( ACC_FLOP_LR_UPDT_OUT, 2566 & TMP_ACC_FLOP_LR_UPDT_OUT 2567 & , 1, MPI_DOUBLE_PRECISION, 2568 & MPI_SUM, MASTER, id%COMM, IERR) 2569 CALL MPI_REDUCE( ACC_FLOP_DEC_ACC, TMP_ACC_FLOP_DEC_ACC 2570 & , 1, MPI_DOUBLE_PRECISION, 2571 & MPI_SUM, MASTER, id%COMM, IERR) 2572 CALL MPI_REDUCE( ACC_FLOP_REC_ACC, TMP_ACC_FLOP_REC_ACC 2573 & , 1, MPI_DOUBLE_PRECISION, 2574 & MPI_SUM, MASTER, id%COMM, IERR) 2575 CALL MPI_REDUCE( ACC_FLOP_TRSM, TMP_ACC_FLOP_TRSM 2576 & , 1, MPI_DOUBLE_PRECISION, 2577 & MPI_SUM, MASTER, id%COMM, IERR) 2578 CALL MPI_REDUCE( ACC_FLOP_PANEL, TMP_ACC_FLOP_PANEL 2579 & , 1, MPI_DOUBLE_PRECISION, 2580 & MPI_SUM, MASTER, id%COMM, IERR) 2581 CALL MPI_REDUCE( ACC_FLOP_FRFRONTS, TMP_ACC_FLOP_FRFRONTS 2582 & , 1, MPI_DOUBLE_PRECISION, 2583 & MPI_SUM, MASTER, id%COMM, IERR) 2584 CALL MPI_REDUCE( ACC_FLOP_DEMOTE, TMP_ACC_FLOP_DEMOTE 2585 & , 1, MPI_DOUBLE_PRECISION, 2586 & MPI_SUM, MASTER, id%COMM, IERR) 2587 CALL MPI_REDUCE( ACC_FLOP_CB_DEMOTE, TMP_ACC_FLOP_CB_DEMOTE 2588 & , 1, MPI_DOUBLE_PRECISION, 2589 & MPI_SUM, MASTER, id%COMM, IERR) 2590 CALL MPI_REDUCE( ACC_FLOP_CB_PROMOTE,TMP_ACC_FLOP_CB_PROMOTE 2591 & , 1, MPI_DOUBLE_PRECISION, 2592 & MPI_SUM, MASTER, id%COMM, IERR) 2593 CALL MPI_REDUCE( ACC_FLOP_FR_FACTO,TMP_ACC_FLOP_FR_FACTO 2594 & , 1, MPI_DOUBLE_PRECISION, 2595 & MPI_SUM, MASTER, id%COMM, IERR) 2596 CALL MPI_REDUCE( CNT_NODES,TMP_CNT_NODES 2597 & , 1, MPI_INTEGER, 2598 & MPI_SUM, MASTER, id%COMM, IERR) 2599 IF (id%NPROCS.GT.1) THEN 2600 ACC_FLOP_LR_FACTO = ACC_FLOP_FR_FACTO - ACC_LR_FLOP_GAIN 2601 & + ACC_FLOP_DEMOTE + ACC_FLOP_FRFRONTS 2602 CALL MPI_REDUCE( ACC_FLOP_LR_FACTO,AVG_ACC_FLOP_LR_FACTO 2603 & , 1, MPI_DOUBLE_PRECISION, 2604 & MPI_SUM, MASTER, id%COMM, IERR) 2605 IF (id%MYID.EQ.MASTER) THEN 2606 AVG_ACC_FLOP_LR_FACTO = AVG_ACC_FLOP_LR_FACTO/id%NPROCS 2607 ENDIF 2608 CALL MPI_REDUCE( ACC_FLOP_LR_FACTO,MIN_ACC_FLOP_LR_FACTO 2609 & , 1, MPI_DOUBLE_PRECISION, 2610 & MPI_MIN, MASTER, id%COMM, IERR) 2611 CALL MPI_REDUCE( ACC_FLOP_LR_FACTO,MAX_ACC_FLOP_LR_FACTO 2612 & , 1, MPI_DOUBLE_PRECISION, 2613 & MPI_MAX, MASTER, id%COMM, IERR) 2614 ENDIF ! NPROCS > 1 2615 CALL MPI_REDUCE( ACC_UPDT_TIME,TMP_ACC_UPDT_TIME 2616 & , 1, MPI_DOUBLE_PRECISION, 2617 & MPI_SUM, MASTER, id%COMM, IERR) 2618 CALL MPI_REDUCE( ACC_DEMOTING_TIME,TMP_ACC_DEMOTING_TIME 2619 & , 1, MPI_DOUBLE_PRECISION, 2620 & MPI_SUM, MASTER, id%COMM, IERR) 2621 CALL MPI_REDUCE( ACC_CB_DEMOTING_TIME, 2622 & TMP_ACC_CB_DEMOTING_TIME, 1, 2623 & MPI_DOUBLE_PRECISION, MPI_SUM, MASTER, 2624 & id%COMM, IERR) 2625 CALL MPI_REDUCE( ACC_PROMOTING_TIME,TMP_ACC_PROMOTING_TIME 2626 & , 1, MPI_DOUBLE_PRECISION, 2627 & MPI_SUM, MASTER, id%COMM, IERR) 2628 CALL MPI_REDUCE( ACC_FRPANELS_TIME,TMP_ACC_FRPANELS_TIME 2629 & , 1, MPI_DOUBLE_PRECISION, 2630 & MPI_SUM, MASTER, id%COMM, IERR) 2631 CALL MPI_REDUCE( ACC_FAC_I_TIME,TMP_ACC_FAC_I_TIME 2632 & , 1, MPI_DOUBLE_PRECISION, 2633 & MPI_SUM, MASTER, id%COMM, IERR) 2634 CALL MPI_REDUCE( ACC_FAC_MQ_TIME,TMP_ACC_FAC_MQ_TIME 2635 & , 1, MPI_DOUBLE_PRECISION, 2636 & MPI_SUM, MASTER, id%COMM, IERR) 2637 CALL MPI_REDUCE( ACC_FAC_SQ_TIME,TMP_ACC_FAC_SQ_TIME 2638 & , 1, MPI_DOUBLE_PRECISION, 2639 & MPI_SUM, MASTER, id%COMM, IERR) 2640 CALL MPI_REDUCE( ACC_TRSM_TIME,TMP_ACC_TRSM_TIME 2641 & , 1, MPI_DOUBLE_PRECISION, 2642 & MPI_SUM, MASTER, id%COMM, IERR) 2643 CALL MPI_REDUCE( ACC_FRFRONTS_TIME,TMP_ACC_FRFRONTS_TIME 2644 & , 1, MPI_DOUBLE_PRECISION, 2645 & MPI_SUM, MASTER, id%COMM, IERR) 2646 CALL MPI_REDUCE( ACC_LR_MODULE_TIME,TMP_ACC_LR_MODULE_TIME 2647 & , 1, MPI_DOUBLE_PRECISION, 2648 & MPI_SUM, MASTER, id%COMM, IERR) 2649 IF (id%MYID.EQ.MASTER) THEN 2650 IF (id%NPROCS.GT.1) THEN 2651C rename the stat variable so that COMPUTE_GLOBAL_GAINS can work for any 2652C number of procs 2653 GLOBAL_BLR_SAVINGS = TMP_GLOBAL_BLR_SAVINGS 2654 ACC_FR_MRY = TMP_ACC_FR_MRY 2655 ACC_LR_FLOP_GAIN = TMP_ACC_LR_FLOP_GAIN 2656 ACC_FLOP_TRSM = TMP_ACC_FLOP_TRSM 2657 ACC_FLOP_PANEL = TMP_ACC_FLOP_PANEL 2658 ACC_FLOP_LR_TRSM = TMP_ACC_FLOP_LR_TRSM 2659 ACC_FLOP_FR_TRSM = TMP_ACC_FLOP_FR_TRSM 2660 ACC_FLOP_LR_UPDT = TMP_ACC_FLOP_LR_UPDT 2661 ACC_FLOP_LR_UPDT_OUT = TMP_ACC_FLOP_LR_UPDT_OUT 2662 ACC_FLOP_RMB = TMP_ACC_FLOP_RMB 2663 ACC_FLOP_DEC_ACC = TMP_ACC_FLOP_DEC_ACC 2664 ACC_FLOP_REC_ACC = TMP_ACC_FLOP_REC_ACC 2665 ACC_FLOP_FR_UPDT = TMP_ACC_FLOP_FR_UPDT 2666 ACC_FLOP_DEMOTE = TMP_ACC_FLOP_DEMOTE 2667 ACC_FLOP_CB_DEMOTE = TMP_ACC_FLOP_CB_DEMOTE 2668 ACC_FLOP_CB_PROMOTE = TMP_ACC_FLOP_CB_PROMOTE 2669 ACC_FLOP_FRFRONTS = TMP_ACC_FLOP_FRFRONTS 2670 CNT_NODES = TMP_CNT_NODES 2671 ACC_FLOP_FR_FACTO = TMP_ACC_FLOP_FR_FACTO 2672C ACC_FLOP_LR_FACTO = ACC_FLOP_FR_FACTO - ACC_LR_FLOP_GAIN 2673C & + ACC_FLOP_DEMOTE 2674 ACC_UPDT_TIME = TMP_ACC_UPDT_TIME /id%NPROCS 2675 ACC_DEMOTING_TIME = TMP_ACC_DEMOTING_TIME /id%NPROCS 2676 ACC_CB_DEMOTING_TIME = TMP_ACC_CB_DEMOTING_TIME/id%NPROCS 2677 ACC_PROMOTING_TIME = TMP_ACC_PROMOTING_TIME /id%NPROCS 2678 ACC_FRPANELS_TIME = TMP_ACC_FRPANELS_TIME /id%NPROCS 2679 ACC_FAC_I_TIME = TMP_ACC_FAC_I_TIME /id%NPROCS 2680 ACC_FAC_MQ_TIME = TMP_ACC_FAC_MQ_TIME /id%NPROCS 2681 ACC_FAC_SQ_TIME = TMP_ACC_FAC_SQ_TIME /id%NPROCS 2682 ACC_TRSM_TIME = TMP_ACC_TRSM_TIME /id%NPROCS 2683 ACC_FRFRONTS_TIME = TMP_ACC_FRFRONTS_TIME /id%NPROCS 2684 ACC_LR_MODULE_TIME = TMP_ACC_LR_MODULE_TIME /id%NPROCS 2685 ENDIF 2686 CALL COMPUTE_GLOBAL_GAINS(id%KEEP8(110),RINFOG(3),id%NPROCS, 2687 & PROKG, MPG) 2688 FRONTWISE = 0 2689 IF (id%KEEP(486).EQ.1) THEN 2690C BLR was activated 2691C WRITE gains also compute stats stored in DKEEP array 2692 IF (LPOK) THEN 2693 IF (CNTL(7) < 0.0D0) THEN 2694C Warning : using negative values is an experimental and 2695C non recommended setting. 2696 WRITE(LP,'(/A/,A/,A/,A,A)') 2697 & ' WARNING in BLR input setting', 2698 & ' CNTL(7) < 0 is experimental: ', 2699 & ' RRQR precision = |CNTL(7| x ||A_pre||, ', 2700 & ' where A_pre is the preprocessed matrix as defined', 2701 & ' in the Users guide ' 2702 ENDIF 2703 ENDIF 2704 CALL SAVEandWRITE_GAINS(FRONTWISE, 2705 & KEEP(489), id%DKEEP, N, 2706 & KEEP(487), KEEP(488), KEEP(490), 2707 & KEEP(491), KEEP(50), KEEP(486), KEEP(472), 2708 & KEEP(475), KEEP(478), KEEP(480), KEEP(481), 2709 & KEEP(483), KEEP(484), KEEP(485), KEEP(467), 2710 & KEEP(28), id%NPROCS, MPG, PROKG) 2711C flops when BLR activated 2712 RINFOG(14) = id%DKEEP(56) 2713 ELSE 2714 RINFOG(14) = 0.0D00 2715 ENDIF 2716 ENDIF 2717 ENDIF 2718C ============================== 2719C NULL PIVOTS AND RANK-REVEALING 2720C ============================== 2721 IF(KEEP(110) .EQ. 1) THEN 2722C -- make available to users the local number of null pivots detected 2723C -- with ICNTL(24) = 1. 2724 id%INFO(18) = KEEP(109) 2725 CALL MPI_ALLREDUCE( KEEP(109), KEEP(112), 1, MPI_INTEGER, 2726 & MPI_SUM, id%COMM, IERR) 2727 ELSE 2728 id%INFO(18) = 0 2729 KEEP(109) = 0 2730 KEEP(112) = 0 2731 ENDIF 2732C INFOG(28) deficiency resulting from ICNTL(24) and ICNTL(16). 2733C Note that KEEP(17) already has the same value on all procs 2734 INFOG(28)=KEEP(112)+KEEP(17) 2735C ======================================== 2736C We now provide to the host the part of 2737C PIVNUL_LIST resulting from the processing 2738C of the root node and we update id%INFO(18) 2739C on the processor holding the root to 2740C include null pivots relative to the root 2741C ======================================== 2742 IF (KEEP(17) .NE. 0) THEN 2743 IF (id%MYID .EQ. ID_ROOT) THEN 2744C Include in id%INFO(18) null pivots resulting 2745C from deficiency on the root. In this way, 2746C the sum of all id%INFO(18) is equal to INFOG(28). 2747 id%INFO(18)=id%INFO(18)+KEEP(17) 2748 ENDIF 2749 IF (ID_ROOT .EQ. MASTER) THEN 2750 IF (id%MYID.EQ.MASTER) THEN 2751C -------------------------------------------------- 2752C Null pivots of root have been stored in 2753C PIVNUL_LIST(KEEP(109)+1:KEEP(109)+KEEP(17). 2754C Shift them at the end of the list because: 2755C * this is what we need to build the null space 2756C * we would otherwise overwrite them on the host 2757C when gathering null pivots from other processors 2758C -------------------------------------------------- 2759 DO I=1, KEEP(17) 2760 id%PIVNUL_LIST(KEEP(112)+I)=id%PIVNUL_LIST(KEEP(109)+I) 2761 ENDDO 2762 ENDIF 2763 ELSE 2764C --------------------------------- 2765C Null pivots of root must be sent 2766C from the processor responsible of 2767C the root to the host (or MASTER). 2768C --------------------------------- 2769 IF (id%MYID .EQ. ID_ROOT) THEN 2770 CALL MPI_SEND(id%PIVNUL_LIST(KEEP(109)+1), KEEP(17), 2771 & MPI_INTEGER, MASTER, ZERO_PIV, 2772 & id%COMM, IERR) 2773 ELSE IF (id%MYID .EQ. MASTER) THEN 2774 CALL MPI_RECV(id%PIVNUL_LIST(KEEP(112)+1), KEEP(17), 2775 & MPI_INTEGER, ID_ROOT, ZERO_PIV, 2776 & id%COMM, STATUS, IERR ) 2777 ENDIF 2778 ENDIF 2779 ENDIF 2780C =========================== 2781C gather zero pivots indices 2782C on the host node 2783C =========================== 2784C In case of non working host, the following code also 2785C works considering that KEEP(109) is equal to 0 on 2786C the non-working host 2787 IF(KEEP(110) .EQ. 1) THEN 2788 ALLOCATE(ITMP2(id%NPROCS),stat = IERR ) ! deallocated in 490 2789 IF ( IERR .GT. 0 ) THEN 2790 id%INFO(1)=-13 2791 id%INFO(2)=id%NPROCS 2792 END IF 2793 CALL MUMPS_PROPINFO( ICNTL(1), id%INFO(1), 2794 & id%COMM, id%MYID ) 2795 IF (id%INFO(1).LT.0) GOTO 490 2796 CALL MPI_GATHER ( KEEP(109),1, MPI_INTEGER, 2797 & ITMP2(1), 1, MPI_INTEGER, 2798 & MASTER, id%COMM, IERR) 2799 IF(id%MYID .EQ. MASTER) THEN 2800 POSBUF = ITMP2(1)+1 2801C First null pivot of master is in 2802C position 1 of global list 2803 KEEP(220)=1 2804 DO I = 1,id%NPROCS-1 2805 CALL MPI_RECV(id%PIVNUL_LIST(POSBUF), ITMP2(I+1), 2806 & MPI_INTEGER,I, 2807 & ZERO_PIV, id%COMM, STATUS, IERR) 2808C Send position POSBUF of first null pivot of proc I 2809C in global list. Will allow to quickly identify during 2810C the solve step if one is concerned by a global position 2811C K, 0 <= K <= INFOG(28). 2812 CALL MPI_SEND(POSBUF, 1, MPI_INTEGER, I, ZERO_PIV, 2813 & id%COMM, IERR) 2814 POSBUF = POSBUF + ITMP2(I+1) 2815 ENDDO 2816 ELSE 2817 CALL MPI_SEND( id%PIVNUL_LIST(1), KEEP(109), MPI_INTEGER, 2818 & MASTER,ZERO_PIV, id%COMM, IERR) 2819 CALL MPI_RECV( KEEP(220), 1, MPI_INTEGER, MASTER, ZERO_PIV, 2820 & id%COMM, STATUS, IERR ) 2821 ENDIF 2822 ENDIF 2823C ===================================== 2824C Statistics concerning the determinant 2825C ===================================== 2826C 2827C 1/ on the host better take into account null pivots if scaling: 2828C 2829C Since null pivots are excluded from the computation 2830C of the determinant, we also exclude the corresponding 2831C scaling entries. Since those entries have already been 2832C taken into account before the factorization, we multiply 2833C the determinant on the host by the scaling values corresponding 2834C to pivots in PIVNUL_LIST. 2835 IF (id%MYID.EQ.MASTER .AND. LSCAL. AND. KEEP(258).NE.0) THEN 2836 DO I = 1, id%INFOG(28) 2837 CALL DMUMPS_UPDATEDETER(id%ROWSCA(id%PIVNUL_LIST(I)), 2838 & id%DKEEP(6), KEEP(259)) 2839 CALL DMUMPS_UPDATEDETER(id%COLSCA(id%PIVNUL_LIST(I)), 2840 & id%DKEEP(6), KEEP(259)) 2841 ENDDO 2842 ENDIF 2843C 2844C 2/ Swap signs depending on pivoting on each proc 2845C 2846 IF (KEEP(258).NE.0) THEN 2847C Return the determinant in INFOG(34) and RINFOG(12/13) 2848C In case of real arithmetic, initialize 2849C RINFOG(13) to 0 (no imaginary part and 2850C not touched by DMUMPS_DETER_REDUCTION) 2851 RINFOG(13)=0.0D0 2852 IF (KEEP(260).EQ.-1) THEN ! Local to each processor 2853 id%DKEEP(6)=-id%DKEEP(6) 2854 ENDIF 2855C 2856C 3/ Perform a reduction 2857C 2858 CALL DMUMPS_DETER_REDUCTION( 2859 & id%COMM, id%DKEEP(6), KEEP(259), 2860 & RINFOG(12), INFOG(34), id%NPROCS) 2861C 2862C 4/ Swap sign if needed 2863C 2864 IF (id%KEEP(50).EQ.0 .AND. id%MYID.EQ. MASTER) THEN 2865C Modify sign of determinant according 2866C to unsymmetric permutation (max-trans 2867C of max-weighted matching) 2868 IF (id%KEEP(23).NE.0) THEN 2869 CALL DMUMPS_DETER_SIGN_PERM( 2870 & RINFOG(12), id%N, 2871C id%STEP: used as workspace of size N still 2872C allocated on master; restored on exit 2873 & id%STEP(1), 2874 & id%UNS_PERM(1) ) 2875C Remark that RINFOG(12/13) are modified only 2876C on the host but will be broadcast on exit 2877C from MUMPS (see DMUMPS_DRIVER) 2878 ENDIF 2879 ENDIF 2880 ENDIF 2881 490 IF (allocated(ITMP2)) DEALLOCATE(ITMP2) 2882 IF ( PROKG ) THEN 2883C --------------------------------------- 2884C PRINT STATISTICS (on master) 2885C ----------------------------- 2886 WRITE(MPG,99984) RINFOG(2),RINFOG(3),id%KEEP8(6),INFOG(10), 2887 & INFOG(11), id%KEEP8(110) 2888 IF (id%KEEP(50) == 1 .OR. id%KEEP(50) == 2) THEN 2889 ! negative pivots 2890 WRITE(MPG, 99987) INFOG(12) 2891 END IF 2892 IF (id%KEEP(50) == 0) THEN 2893 ! off diag pivots 2894 WRITE(MPG, 99985) INFOG(12) 2895 END IF 2896 IF (id%KEEP(50) .NE. 1) THEN 2897 ! delayed pivots 2898 WRITE(MPG, 99982) INFOG(13) 2899 END IF 2900 IF (KEEP(97) .NE. 0) THEN 2901 ! tiny pivots 2902 WRITE(MPG, 99986) INFOG(25) 2903 ENDIF 2904 IF (id%KEEP(50) == 2) THEN 2905 !number of 2x2 pivots in type 1 nodes 2906 WRITE(MPG, 99988) KEEP(229) 2907 !number of 2x2 pivots in type 2 nodes 2908 WRITE(MPG, 99989) KEEP(230) 2909 ENDIF 2910 !number of zero pivots 2911 IF (KEEP(110) .NE.0) THEN 2912 WRITE(MPG, 99991) KEEP(112) 2913 ENDIF 2914 !Deficiency on root 2915 IF ( KEEP(17) .ne. 0 ) 2916 & WRITE(MPG, 99983) KEEP(17) 2917 !Total deficiency 2918 IF (KEEP(110).NE.0.OR.KEEP(17).NE.0) 2919 & WRITE(MPG, 99992) KEEP(17)+KEEP(112) 2920 ! Memory compress 2921 WRITE(MPG, 99981) INFOG(14) 2922 ! Extra copies due to ip stack in unsym case 2923 ! in core case (or OLD_OOC_PANEL) 2924 IF (id%KEEP8(108) .GT. 0_8) THEN 2925 WRITE(MPG, 99980) id%KEEP8(108) 2926 ENDIF 2927 IF ((KEEP(60).NE.0) .AND. INFOG(25).GT.0) THEN 2928 ! Schur on and tiny pivots set in last level 2929 ! before the SChur 2930 WRITE(MPG, '(A)') 2931 & " ** Warning Static pivoting was necessary" 2932 WRITE(MPG, '(A)') 2933 & " ** to factor interior variables with Schur ON" 2934 ENDIF 2935 IF (KEEP(258).NE.0) THEN 2936 WRITE(MPG,99978) RINFOG(12) 2937 WRITE(MPG,99977) INFOG(34) 2938 ENDIF 2939 END IF 2940* ========================================== 2941* 2942* End of Factorization Phase 2943* 2944* ========================================== 2945C 2946C Goto 500 is done when 2947C LOAD_INIT 2948C OOC_INIT_FACTO 2949C MUMPS_FDM_INIT 2950#if ! defined(NO_FDM_DESCBAND) 2951C MUMPS_FDBD_INIT 2952#endif 2953#if ! defined(NO_FDM_MAPROW) 2954C MUMPS_FMRD_INIT 2955#endif 2956C are all called. 2957C 2958 500 CONTINUE 2959#if ! defined(NO_FDM_DESCBAND) 2960 IF (I_AM_SLAVE) THEN 2961 CALL MUMPS_FDBD_END(id%INFO(1)) ! INFO(1): input only 2962 ENDIF 2963#endif 2964#if ! defined(NO_FDM_MAPROW) 2965 IF (I_AM_SLAVE) THEN 2966 CALL MUMPS_FMRD_END(id%INFO(1)) ! INFO(1): input only 2967 ENDIF 2968#endif 2969 IF (I_AM_SLAVE) THEN 2970 CALL DMUMPS_BLR_END_MODULE(id%INFO(1), id%KEEP8, .TRUE.) 2971C INFO(1): input only 2972 ENDIF 2973 IF (I_AM_SLAVE) THEN 2974 CALL MUMPS_FDM_END('A') 2975 CALL MUMPS_FDM_END('F') 2976 ENDIF 2977C 2978C Goto 514 is done when an 2979C error occurred in MUMPS_FDM_INIT 2980C or (after FDM_INIT but before 2981C OOC_INIT) 2982C 2983 514 CONTINUE 2984 IF ( I_AM_SLAVE ) THEN 2985 IF ((KEEP(201).EQ.1).OR.(KEEP(201).EQ.2)) THEN 2986 CALL DMUMPS_OOC_END_FACTO(id,IERR) 2987 IF (id%ASSOCIATED_OOC_FILES) THEN 2988 id%ASSOCIATED_OOC_FILES = .FALSE. 2989 ENDIF 2990 IF (IERR.LT.0 .AND. id%INFO(1) .GE. 0) id%INFO(1) = IERR 2991 ENDIF 2992 IF (WK_USER_PROVIDED) THEN 2993C at the end of a phase S is always freed when WK_USER provided 2994 NULLIFY(id%S) 2995 ELSE IF (KEEP(201).NE.0) THEN 2996C ---------------------------------------- 2997C In OOC or if KEEP(201).EQ.-1 we always 2998C free S at end of factorization. As id%S 2999C may be unassociated in case of error 3000C during or before the allocation of id%S, 3001C we only free S when it was associated. 3002C ---------------------------------------- 3003 IF (associated(id%S)) DEALLOCATE(id%S) 3004 NULLIFY(id%S) ! in all cases 3005 id%KEEP8(23)=0_8 3006 ENDIF 3007 ELSE ! host not working 3008 IF (WK_USER_PROVIDED) THEN 3009C at the end of a phase S is always freed when WK_USER provided 3010 NULLIFY(id%S) 3011 ELSE 3012 IF (associated(id%S)) DEALLOCATE(id%S) 3013 NULLIFY(id%S) ! in all cases 3014 id%KEEP8(23)=0_8 3015 END IF 3016 END IF 3017C 3018C Goto 513 is done in case of error where LOAD_INIT was 3019C called but not OOC_INIT_FACTO. 3020 513 CONTINUE 3021 IF ( I_AM_SLAVE ) THEN 3022 CALL DMUMPS_LOAD_END( id%INFO(1), id%NSLAVES, IERR ) 3023 IF (IERR.LT.0 .AND. id%INFO(1) .GE. 0) id%INFO(1) = IERR 3024 ENDIF 3025 CALL MUMPS_PROPINFO( ICNTL(1), id%INFO(1), 3026 & id%COMM, id%MYID ) 3027C 3028C Goto 530 is done when an error occurs before 3029C the calls to LOAD_INIT and OOC_INIT_FACTO 3030 530 CONTINUE 3031C Fwd in facto: free RHS_MUMPS in case 3032C it was allocated. 3033 IF (RHS_MUMPS_ALLOCATED) DEALLOCATE(RHS_MUMPS) 3034 NULLIFY(RHS_MUMPS) 3035C 3036 id%KEEP8(26) = KEEP826_SAVE 3037 RETURN 3038 120 FORMAT(/' LOCAL REDISTRIB: DATA LOCAL/SENT =',I16,I16) 3039 125 FORMAT(/' REDISTRIB: TOTAL DATA LOCAL/SENT =',I16,I16) 3040 130 FORMAT(/' ****** FACTORIZATION STEP ********'/) 3041 160 FORMAT(' ELAPSED TIME FOR MATRIX DISTRIBUTION =',F12.4) 3042 166 FORMAT(' Convergence error after scaling for ONE-NORM', 3043 & ' (option 7/8) =',D9.2) 3044 170 FORMAT(/' STATISTICS PRIOR NUMERICAL FACTORIZATION ...'/ 3045 & ' Size of internal working array S =',I16/ 3046 & ' Size of internal working array IS =',I16/ 3047 & ' MINIMUM (ICNTL(14)=0) size of S =',I16/ 3048 & ' MINIMUM (ICNTL(14)=0) size of IS =',I16/ 3049 & ' REAL SPACE FOR ORIGINAL MATRIX =',I16/ 3050 & ' INTEGER SPACE FOR ORIGINAL MATRIX =',I16/ 3051 & ' REAL SPACE FOR FACTORS =',I16/ 3052 & ' INTEGER SPACE FOR FACTORS =',I16/ 3053 & ' MAXIMUM FRONTAL SIZE (ESTIMATED) =',I16) 3054 172 FORMAT(/' GLOBAL STATISTICS PRIOR NUMERICAL FACTORIZATION ...'/ 3055 & ' NUMBER OF WORKING PROCESSES =',I16/ 3056 & ' OUT-OF-CORE OPTION (ICNTL(22)) =',I16/ 3057 & ' REAL SPACE FOR FACTORS =',I16/ 3058 & ' INTEGER SPACE FOR FACTORS =',I16/ 3059 & ' MAXIMUM FRONTAL SIZE (ESTIMATED) =',I16/ 3060 & ' NUMBER OF NODES IN THE TREE =',I16/ 3061 & ' MEMORY ALLOWED (MB -- 0: N/A ) =',I16/ 3062 & ' RELATIVE THRESHOLD FOR PIVOTING, CNTL(1) =',D16.4) 3063 173 FORMAT( ' PERFORM FORWARD DURING FACTO, NRHS =',I16) 3064 175 FORMAT(/' NUMBER OF ENTRIES FOR // ROOT =',I16) 3065 180 FORMAT(/' ELAPSED TIME FOR FACTORIZATION =',F12.4) 3066 185 FORMAT(/' ELAPSED TIME FOR (FAILED) FACTORIZATION =',F12.4) 306799977 FORMAT( ' INFOG(34) DETERMINANT (base 2 exponent) =',I16) 306899978 FORMAT( ' RINFOG(12) DETERMINANT (real part) =',F12.4) 306999980 FORMAT( ' Extra copies due to In-Place stacking =',I16) 307099981 FORMAT( ' INFOG(14) NUMBER OF MEMORY COMPRESS =',I16) 307199982 FORMAT( ' INFOG(13) NUMBER OF DELAYED PIVOTS =',I16) 307299983 FORMAT( ' NB OF NULL PIVOTS DETECTED BY ICNTL(16) =',I16) 307399991 FORMAT( ' NB OF NULL PIVOTS DETECTED BY ICNTL(24) =',I16) 307499992 FORMAT( ' INFOG(28) ESTIMATED DEFICIENCY =',I16) 307599984 FORMAT(/' GLOBAL STATISTICS '/ 3076 & ' RINFOG(2) OPERATIONS IN NODE ASSEMBLY =',1PD10.3/ 3077 & ' ------(3) OPERATIONS IN NODE ELIMINATION=',1PD10.3/ 3078 & ' INFOG (9) REAL SPACE FOR FACTORS =',I16/ 3079 & ' INFOG(10) INTEGER SPACE FOR FACTORS =',I16/ 3080 & ' INFOG(11) MAXIMUM FRONT SIZE =',I16/ 3081 & ' INFOG(29) NUMBER OF ENTRIES IN FACTORS =',I16) 308299985 FORMAT( ' INFOG(12) NUMBER OF OFF DIAGONAL PIVOTS =',I16) 308399986 FORMAT( ' INFOG(25) NUMBER OF TINY PIVOTS(STATIC) =',I16) 308499987 FORMAT( ' INFOG(12) NUMBER OF NEGATIVE PIVOTS =',I16) 308599988 FORMAT( ' NUMBER OF 2x2 PIVOTS in type 1 nodes =',I16) 308699989 FORMAT( ' NUMBER OF 2x2 PIVOTS in type 2 nodes =',I16) 3087 END SUBROUTINE DMUMPS_FAC_DRIVER 3088 SUBROUTINE DMUMPS_AVGMAX_STAT8(PROKG, MPG, VAL, NSLAVES, 3089 & COMM, MSG) 3090 IMPLICIT NONE 3091 INCLUDE 'mpif.h' 3092 LOGICAL PROKG 3093 INTEGER MPG 3094 INTEGER(8) VAL 3095 INTEGER NSLAVES 3096 INTEGER COMM 3097 CHARACTER*42 MSG 3098C Local 3099 INTEGER(8) MAX_VAL 3100 INTEGER IERR, MASTER 3101 DOUBLE PRECISION LOC_VAL, AVG_VAL 3102 PARAMETER(MASTER=0) 3103C 3104 CALL MUMPS_REDUCEI8( VAL, MAX_VAL, MPI_MAX, MASTER, COMM) 3105 LOC_VAL = dble(VAL)/dble(NSLAVES) 3106 CALL MPI_REDUCE( LOC_VAL, AVG_VAL, 1, MPI_DOUBLE_PRECISION, 3107 & MPI_SUM, MASTER, COMM, IERR ) 3108 IF (PROKG) THEN 3109 WRITE(MPG,100) " Maximum ", MSG, MAX_VAL 3110 WRITE(MPG,100) " Average ", MSG, int(AVG_VAL,8) 3111 ENDIF 3112 RETURN 3113 100 FORMAT(A9,A42,I16) 3114 END SUBROUTINE DMUMPS_AVGMAX_STAT8 3115C 3116 SUBROUTINE DMUMPS_EXTRACT_SCHUR_REDRHS(id) 3117 USE DMUMPS_STRUC_DEF 3118 IMPLICIT NONE 3119C 3120C Purpose 3121C ======= 3122C 3123C Extract the Schur and possibly also the reduced right-hand side 3124C (if Fwd in facto) from the processor working on Schur and copy 3125C it into the user datastructures id%SCHUR and id%REDRHS on the host. 3126C This routine assumes that the integer list of the Schur has not 3127C been permuted and still corresponds to LISTVAR_SCHUR. 3128C 3129C If the Schur is centralized, the master of the Schur holds the 3130C Schur and possibly also the reduced right-hand side. 3131C If the Schur is distribued (already built in user's datastructure), 3132C then the master of the Schur may hold the reduced right-hand side, 3133C in which case it is available in root%RHS_CNTR_MASTER_ROOT. 3134C 3135 TYPE(DMUMPS_STRUC) :: id 3136C 3137C Local variables 3138C =============== 3139C 3140 INCLUDE 'mpif.h' 3141 INCLUDE 'mumps_tags.h' 3142 INCLUDE 'mumps_headers.h' 3143 INTEGER :: STATUS(MPI_STATUS_SIZE) 3144 INTEGER :: IERR 3145 INTEGER, PARAMETER :: MASTER = 0 3146 INTEGER :: ID_SCHUR, SIZE_SCHUR, LD_SCHUR, IB, BL4 3147 INTEGER :: ROW_LENGTH, I 3148 INTEGER(8) :: SURFSCHUR8, BL8, SHIFT8 3149 INTEGER(8) :: ISCHUR_SRC, ISCHUR_DEST, ISCHUR_SYM, ISCHUR_UNS 3150C 3151C External functions 3152C ================== 3153C 3154 INTEGER MUMPS_PROCNODE 3155 EXTERNAL MUMPS_PROCNODE 3156C Quick return in case factorization did not terminate correctly 3157 IF (id%INFO(1) .LT. 0) RETURN 3158C Quick return if Schur option off 3159 IF (id%KEEP(60) .EQ. 0) RETURN 3160C Get Schur id 3161 ID_SCHUR =MUMPS_PROCNODE( 3162 & id%PROCNODE_STEPS(id%STEP(max(id%KEEP(20),id%KEEP(38)))), 3163 & id%NSLAVES) 3164 IF ( id%KEEP( 46 ) .NE. 1 ) THEN 3165 ID_SCHUR = ID_SCHUR + 1 3166 END IF 3167C Get size of Schur 3168 IF (id%MYID.EQ.ID_SCHUR) THEN 3169 IF (id%KEEP(60).EQ.1) THEN 3170C Sequential Schur 3171 LD_SCHUR = 3172 & id%IS(id%PTLUST_S(id%STEP(id%KEEP(20)))+2+id%KEEP(IXSZ)) 3173 SIZE_SCHUR = LD_SCHUR - id%KEEP(253) 3174 ELSE 3175C Parallel Schur 3176 LD_SCHUR = -999999 ! not used 3177 SIZE_SCHUR = id%root%TOT_ROOT_SIZE 3178 ENDIF 3179 ELSE IF (id%MYID .EQ. MASTER) THEN 3180 SIZE_SCHUR = id%KEEP(116) 3181 LD_SCHUR = -44444 ! Not used 3182 ELSE 3183C Proc is not concerned with Schur, return 3184 RETURN 3185 ENDIF 3186 SURFSCHUR8 = int(SIZE_SCHUR,8)*int(SIZE_SCHUR,8) 3187C ================================= 3188C Case of parallel Schur: if REDRHS 3189C was requested, obtain it directly 3190C from id%root%RHS_CNTR_MASTER_ROOT 3191C ================================= 3192 IF (id%KEEP(60) .GT. 1) THEN 3193 IF (id%KEEP(221).EQ.1 .AND. id%KEEP(252).GT.0) THEN 3194 DO I = 1, id%KEEP(253) 3195 IF (ID_SCHUR.EQ.MASTER) THEN ! Necessarily = id%MYID 3196 CALL dcopy(SIZE_SCHUR, 3197 & id%root%RHS_CNTR_MASTER_ROOT((I-1)*SIZE_SCHUR+1), 1, 3198 & id%REDRHS((I-1)*id%LREDRHS+1), 1) 3199 ELSE 3200 IF (id%MYID.EQ.ID_SCHUR) THEN 3201C Send 3202 CALL MPI_SEND( 3203 & id%root%RHS_CNTR_MASTER_ROOT((I-1)*SIZE_SCHUR+1), 3204 & SIZE_SCHUR, 3205 & MPI_DOUBLE_PRECISION, 3206 & MASTER, TAG_SCHUR, 3207 & id%COMM, IERR ) 3208 ELSE ! MYID.EQ.MASTER 3209C Receive 3210 CALL MPI_RECV( id%REDRHS((I-1)*id%LREDRHS+1), 3211 & SIZE_SCHUR, 3212 & MPI_DOUBLE_PRECISION, ID_SCHUR, TAG_SCHUR, 3213 & id%COMM, STATUS, IERR ) 3214 ENDIF 3215 ENDIF 3216 ENDDO 3217C ------------------------------ 3218C In case of parallel Schur, we 3219C free root%RHS_CNTR_MASTER_ROOT 3220C ------------------------------ 3221 IF (id%MYID.EQ.ID_SCHUR) THEN 3222 DEALLOCATE(id%root%RHS_CNTR_MASTER_ROOT) 3223 NULLIFY (id%root%RHS_CNTR_MASTER_ROOT) 3224 ENDIF 3225 ENDIF 3226C return because this is all we need to do 3227C in case of parallel Schur complement 3228 RETURN 3229 ENDIF 3230C ============================ 3231C Centralized Schur complement 3232C ============================ 3233C PTRAST has been freed at the moment of calling this 3234C routine. Schur is available through 3235C PTRFAC(IW( PTLUST_S( STEP(KEEP(20)) ) + 4 +KEEP(IXSZ) )) 3236 IF (id%KEEP(252).EQ.0) THEN 3237C CASE 1 (ORIGINAL CODE): 3238C Schur is contiguous on ID_SCHUR 3239 IF ( ID_SCHUR .EQ. MASTER ) THEN ! Necessarily equals id%MYID 3240C --------------------- 3241C Copy Schur complement 3242C --------------------- 3243 CALL DMUMPS_COPYI8SIZE( SURFSCHUR8, 3244 & id%S(id%PTRFAC(id%STEP(id%KEEP(20)))), 3245 & id%SCHUR(1) ) 3246 ELSE 3247C ----------------------------------------- 3248C The processor responsible of the Schur 3249C complement sends it to the host processor 3250C ----------------------------------------- 3251 BL8=int(huge(BL4)/id%KEEP(35)/10,8) 3252 DO IB=1, int((SURFSCHUR8+BL8-1_8) / BL8) 3253 SHIFT8 = int(IB-1,8) * BL8 ! Where to send 3254 BL4 = int(min(BL8,SURFSCHUR8-SHIFT8)) ! Size of block 3255 IF ( id%MYID .eq. ID_SCHUR ) THEN 3256C Send Schur complement 3257 CALL MPI_SEND( id%S( SHIFT8 + 3258 & id%PTRFAC(id%IS(id%PTLUST_S(id%STEP(id%KEEP(20))) 3259 & +4+id%KEEP(IXSZ)))), 3260 & BL4, 3261 & MPI_DOUBLE_PRECISION, 3262 & MASTER, TAG_SCHUR, 3263 & id%COMM, IERR ) 3264 ELSE IF ( id%MYID .eq. MASTER ) THEN 3265C Receive Schur complement 3266 CALL MPI_RECV( id%SCHUR(1_8 + SHIFT8), 3267 & BL4, 3268 & MPI_DOUBLE_PRECISION, ID_SCHUR, TAG_SCHUR, 3269 & id%COMM, STATUS, IERR ) 3270 END IF 3271 ENDDO 3272 END IF 3273 ELSE 3274C CASE 2 (Fwd in facto): Schur is not contiguous on ID_SCHUR, 3275C process it row by row. 3276C 3277C 2.1: We first centralize Schur complement into id%SCHUR 3278 ISCHUR_SRC = id%PTRFAC(id%IS(id%PTLUST_S(id%STEP(id%KEEP(20))) 3279 & +4+id%KEEP(IXSZ))) 3280 ISCHUR_DEST= 1_8 3281 DO I=1, SIZE_SCHUR 3282 ROW_LENGTH = SIZE_SCHUR 3283 IF (ID_SCHUR.EQ.MASTER) THEN ! Necessarily = id%MYID 3284 CALL dcopy(ROW_LENGTH, id%S(ISCHUR_SRC), 1, 3285 & id%SCHUR(ISCHUR_DEST),1) 3286 ELSE 3287 IF (id%MYID.EQ.ID_SCHUR) THEN 3288C Send 3289 CALL MPI_SEND( id%S(ISCHUR_SRC), ROW_LENGTH, 3290 & MPI_DOUBLE_PRECISION, 3291 & MASTER, TAG_SCHUR, 3292 & id%COMM, IERR ) 3293 ELSE 3294C Recv 3295 CALL MPI_RECV( id%SCHUR(ISCHUR_DEST), 3296 & ROW_LENGTH, 3297 & MPI_DOUBLE_PRECISION, ID_SCHUR, TAG_SCHUR, 3298 & id%COMM, STATUS, IERR ) 3299 ENDIF 3300 ENDIF 3301 ISCHUR_SRC = ISCHUR_SRC+int(LD_SCHUR,8) 3302 ISCHUR_DEST= ISCHUR_DEST+int(SIZE_SCHUR,8) 3303 ENDDO 3304C 2.2: Get REDRHS on host 3305C 2.2.1: Symmetric => REDRHS is available in last KEEP(253) 3306C rows of Schur structure on ID_SCHUR 3307C 2.2.2: Unsymmetric => REDRHS corresponds to last KEEP(253) 3308C columns. However it must be transposed. 3309 IF (id%KEEP(221).EQ.1) THEN ! Implies Fwd in facto 3310 ISCHUR_SYM = id%PTRFAC(id%IS(id%PTLUST_S(id%STEP(id%KEEP(20))) 3311 & +4+id%KEEP(IXSZ))) + int(SIZE_SCHUR,8) * 3312 & int(LD_SCHUR,8) 3313 ISCHUR_UNS = 3314 & id%PTRFAC(id%IS(id%PTLUST_S(id%STEP(id%KEEP(20))) 3315 & +4+id%KEEP(IXSZ))) + int(SIZE_SCHUR,8) 3316 ISCHUR_DEST = 1_8 3317 DO I = 1, id%KEEP(253) 3318 IF (ID_SCHUR .EQ. MASTER) THEN ! necessarily = id%MYID 3319 IF (id%KEEP(50) .EQ. 0) THEN 3320 CALL dcopy(SIZE_SCHUR, id%S(ISCHUR_UNS), LD_SCHUR, 3321 & id%REDRHS(ISCHUR_DEST), 1) 3322 ELSE 3323 CALL dcopy(SIZE_SCHUR, id%S(ISCHUR_SYM), 1, 3324 & id%REDRHS(ISCHUR_DEST), 1) 3325 ENDIF 3326 ELSE 3327 IF (id%MYID .NE. MASTER) THEN 3328 IF (id%KEEP(50) .EQ. 0) THEN 3329C Use id%S(ISCHUR_SYM) as temporary contig. workspace 3330C of size SIZE_SCHUR. 3331 CALL dcopy(SIZE_SCHUR, id%S(ISCHUR_UNS), LD_SCHUR, 3332 & id%S(ISCHUR_SYM), 1) 3333 ENDIF 3334 CALL MPI_SEND(id%S(ISCHUR_SYM), SIZE_SCHUR, 3335 & MPI_DOUBLE_PRECISION, MASTER, TAG_SCHUR, 3336 & id%COMM, IERR ) 3337 ELSE 3338 CALL MPI_RECV(id%REDRHS(ISCHUR_DEST), 3339 & SIZE_SCHUR, MPI_DOUBLE_PRECISION, ID_SCHUR, TAG_SCHUR, 3340 & id%COMM, STATUS, IERR ) 3341 ENDIF 3342 ENDIF 3343 IF (id%KEEP(50).EQ.0) THEN 3344 ISCHUR_UNS = ISCHUR_UNS + int(LD_SCHUR,8) 3345 ELSE 3346 ISCHUR_SYM = ISCHUR_SYM + int(LD_SCHUR,8) 3347 ENDIF 3348 ISCHUR_DEST = ISCHUR_DEST + int(id%LREDRHS,8) 3349 ENDDO 3350 ENDIF 3351 ENDIF 3352 RETURN 3353 END SUBROUTINE DMUMPS_EXTRACT_SCHUR_REDRHS 3354