1C 2C This file is part of MUMPS 5.1.2, released 3C on Mon Oct 2 07:37:01 UTC 2017 4C 5C 6C Copyright 1991-2017 CERFACS, CNRS, ENS Lyon, INP Toulouse, Inria, 7C University of Bordeaux. 8C 9C This version of MUMPS is provided to you free of charge. It is 10C released under the CeCILL-C license: 11C http://www.cecill.info/licences/Licence_CeCILL-C_V1-en.html 12C 13 SUBROUTINE ZMUMPS_FAC_DRIVER( id) 14 USE ZMUMPS_BUF 15 USE ZMUMPS_LOAD 16 USE ZMUMPS_OOC 17 USE ZMUMPS_STRUC_DEF 18 USE ZMUMPS_LR_CORE 19 USE ZMUMPS_LR_STATS 20 USE ZMUMPS_LR_DATA_M, only: ZMUMPS_BLR_INIT_MODULE, 21 & ZMUMPS_BLR_END_MODULE 22 USE MUMPS_FRONT_DATA_MGT_M 23#if ! defined(NO_FDM_DESCBAND) 24 USE MUMPS_FAC_DESCBAND_DATA_M 25#endif 26#if ! defined(NO_FDM_MAPROW) 27 USE MUMPS_FAC_MAPROW_DATA_M 28#endif 29 IMPLICIT NONE 30C 31C Purpose 32C ======= 33C 34C Performs scaling, sorting in arrowhead, then 35C distributes the matrix, and perform 36C factorization. 37C 38C 39 INTERFACE 40C Explicit interface needed because 41C of "id" derived datatype argument 42 SUBROUTINE ZMUMPS_ANORMINF(id, ANORMINF, LSCAL) 43 USE ZMUMPS_STRUC_DEF 44 TYPE (ZMUMPS_STRUC), TARGET :: id 45 DOUBLE PRECISION, INTENT(OUT) :: ANORMINF 46 LOGICAL :: LSCAL 47 END SUBROUTINE ZMUMPS_ANORMINF 48C 49 END INTERFACE 50C 51C Parameters 52C ========== 53C 54 TYPE(ZMUMPS_STRUC), TARGET :: id 55C 56C MPI 57C === 58C 59 INCLUDE 'mpif.h' 60 INCLUDE 'mumps_tags.h' 61 INTEGER :: STATUS(MPI_STATUS_SIZE) 62 INTEGER :: IERR 63 INTEGER, PARAMETER :: MASTER = 0 64C 65C Local variables 66C =============== 67C 68 INCLUDE 'mumps_headers.h' 69 INTEGER(8) :: NSEND8, NSEND_TOT8 70 INTEGER(8) :: NLOCAL8, NLOCAL_TOT8 71 INTEGER :: LDPTRAR, NELT_arg, NBRECORDS 72 INTEGER :: ITMP 73 INTEGER(8) ::KEEP826_SAVE 74 INTEGER(8) K67 75 INTEGER(8) K68,K69 76 INTEGER(8) ITMP8 77 INTEGER MUMPS_PROCNODE 78 EXTERNAL MUMPS_PROCNODE 79 INTEGER MP, LP, MPG, allocok 80 LOGICAL PROK, PROKG, LSCAL, LPOK, COMPUTE_ANORMINF 81 INTEGER ZMUMPS_LBUF, ZMUMPS_LBUFR_BYTES, ZMUMPS_LBUF_INT 82 INTEGER(8) ZMUMPS_LBUFR_BYTES8, ZMUMPS_LBUF8 83 INTEGER PTRIST, PTRWB, MAXELT_SIZE, 84 & ITLOC, IPOOL, NSTEPS, K28, LPOOL, LIW 85 INTEGER IRANK, ID_ROOT 86 INTEGER KKKK 87 INTEGER(8) :: NZ_locMAX8 88 INTEGER(8) MEMORY_MD_ARG 89 INTEGER(8) MAXS_BASE8, MAXS_BASE_RELAXED8 90 DOUBLE PRECISION CNTL4 91 INTEGER MIN_PERLU, MAXIS_ESTIM 92 INTEGER MAXIS 93 INTEGER(8) :: MAXS 94 DOUBLE PRECISION TIME, TIMEET 95 DOUBLE PRECISION ZERO, ONE, MONE 96 PARAMETER( ZERO = 0.0D0, ONE = 1.0D0, MONE = -1.0D0) 97 COMPLEX(kind=8) CZERO 98 PARAMETER( CZERO = (0.0D0, 0.0D0) ) 99 INTEGER PERLU, TOTAL_MBYTES, K231, K232, K233 100 INTEGER COLOUR, COMM_FOR_SCALING ! For Simultaneous scaling 101 INTEGER LIWK, LWK_REAL 102 INTEGER(8) :: LWK 103C SLAVE: used to determine if proc has the role of a slave 104 LOGICAL I_AM_SLAVE, PERLU_ON, WK_USER_PROVIDED 105C WK_USER_PROVIDED is set to true when workspace WK_USER is provided by user 106 DOUBLE PRECISION :: ANORMINF, SEUIL, SEUIL_LDLT_NIV2 107 DOUBLE PRECISION :: CNTL1, CNTL3, CNTL5, CNTL6, EPS 108 INTEGER N, LPN_LIST,POSBUF 109 INTEGER, DIMENSION (:), ALLOCATABLE :: ITMP2 110 INTEGER I,K 111 INTEGER FRONTWISE 112C temporary variable for collecting stats from all processors 113 DOUBLE PRECISION :: TMP_GLOBAL_BLR_SAVINGS 114 DOUBLE PRECISION :: TMP_ACC_FR_MRY 115 DOUBLE PRECISION :: TMP_ACC_LR_FLOP_GAIN 116 DOUBLE PRECISION :: TMP_ACC_FLOP_TRSM 117 DOUBLE PRECISION :: TMP_ACC_FLOP_PANEL 118 DOUBLE PRECISION :: TMP_ACC_FLOP_FRFRONTS 119 DOUBLE PRECISION :: TMP_ACC_FLOP_LR_TRSM 120 DOUBLE PRECISION :: TMP_ACC_FLOP_FR_TRSM 121 DOUBLE PRECISION :: TMP_ACC_FLOP_LR_UPDT 122 DOUBLE PRECISION :: TMP_ACC_FLOP_LR_UPDT_OUT 123 DOUBLE PRECISION :: TMP_ACC_FLOP_RMB 124 DOUBLE PRECISION :: TMP_ACC_FLOP_DEC_ACC 125 DOUBLE PRECISION :: TMP_ACC_FLOP_REC_ACC 126 DOUBLE PRECISION :: TMP_ACC_FLOP_FR_UPDT 127 DOUBLE PRECISION :: TMP_ACC_FLOP_DEMOTE 128 DOUBLE PRECISION :: TMP_ACC_FLOP_CB_DEMOTE 129 DOUBLE PRECISION :: TMP_ACC_FLOP_CB_PROMOTE 130 DOUBLE PRECISION :: TMP_ACC_FLOP_FR_FACTO 131 INTEGER :: TMP_CNT_NODES 132 DOUBLE PRECISION :: TMP_ACC_UPDT_TIME 133 DOUBLE PRECISION :: TMP_ACC_DEMOTING_TIME 134 DOUBLE PRECISION :: TMP_ACC_CB_DEMOTING_TIME 135 DOUBLE PRECISION :: TMP_ACC_PROMOTING_TIME 136 DOUBLE PRECISION :: TMP_ACC_FRPANELS_TIME 137 DOUBLE PRECISION :: TMP_ACC_FAC_I_TIME 138 DOUBLE PRECISION :: TMP_ACC_FAC_MQ_TIME 139 DOUBLE PRECISION :: TMP_ACC_FAC_SQ_TIME 140 DOUBLE PRECISION :: TMP_ACC_TRSM_TIME 141 DOUBLE PRECISION :: TMP_ACC_FRFRONTS_TIME 142 DOUBLE PRECISION :: TMP_ACC_LR_MODULE_TIME 143C 144C Workspace. 145C 146 INTEGER, DIMENSION(:), ALLOCATABLE :: IWK 147 COMPLEX(kind=8), DIMENSION(:), ALLOCATABLE :: WK 148 DOUBLE PRECISION, DIMENSION(:), ALLOCATABLE :: WK_REAL 149 INTEGER(8), DIMENSION(:), ALLOCATABLE :: IWK8 150 INTEGER, DIMENSION(:), ALLOCATABLE :: BURP 151 INTEGER, DIMENSION(:), ALLOCATABLE :: BUCP 152 INTEGER, DIMENSION(:), ALLOCATABLE :: BURS 153 INTEGER, DIMENSION(:), ALLOCATABLE :: BUCS 154 INTEGER BUREGISTRE(12) 155 INTEGER BUINTSZ, BURESZ, BUJOB 156 INTEGER BUMAXMN, M, SCMYID, SCNPROCS 157 DOUBLE PRECISION SCONEERR, SCINFERR 158C 159C Parameters arising from the structure 160C ===================================== 161C 162 INTEGER, POINTER :: JOB 163* Control parameters: see description in ZMUMPSID 164 DOUBLE PRECISION,DIMENSION(:),POINTER::RINFO, RINFOG 165 DOUBLE PRECISION,DIMENSION(:),POINTER:: CNTL 166 INTEGER,DIMENSION(:),POINTER:: INFOG, KEEP 167 INTEGER, DIMENSION(:), POINTER :: MYIRN_loc, MYJCN_loc 168 COMPLEX(kind=8), DIMENSION(:), POINTER :: MYA_loc 169 INTEGER, TARGET :: DUMMYIRN_loc(1), DUMMYJCN_loc(1) 170 COMPLEX(kind=8), TARGET :: DUMMYA_loc(1) 171 INTEGER,DIMENSION(:),POINTER::ICNTL 172 EXTERNAL MUMPS_GET_POOL_LENGTH 173 INTEGER MUMPS_GET_POOL_LENGTH 174 INTEGER(8) TOTAL_BYTES 175 INTEGER(8) :: I8TMP 176C 177C External references 178C =================== 179 INTEGER numroc 180 EXTERNAL numroc 181C Fwd in facto: 182 COMPLEX(kind=8), DIMENSION(:), POINTER :: RHS_MUMPS 183 LOGICAL :: RHS_MUMPS_ALLOCATED 184 INTEGER :: NB_ACTIVE_FRONTS_ESTIM 185C 186C 187 JOB=>id%JOB 188 RINFO=>id%RINFO 189 RINFOG=>id%RINFOG 190 CNTL=>id%CNTL 191 INFOG=>id%INFOG 192 KEEP=>id%KEEP 193 ICNTL=>id%ICNTL 194 IF (id%KEEP8(29) .NE. 0) THEN 195 MYIRN_loc=>id%IRN_loc 196 MYJCN_loc=>id%JCN_loc 197 MYA_loc=>id%A_loc 198 ELSE 199 MYIRN_loc=>DUMMYIRN_loc 200 MYJCN_loc=>DUMMYJCN_loc 201 MYA_loc=>DUMMYA_loc 202 ENDIF 203 N = id%N 204 EPS = epsilon ( ZERO ) 205C TIMINGS: reset to 0 206 id%DKEEP(92)=0.0D0 207 id%DKEEP(93)=0.0D0 208 id%DKEEP(94)=0.0D0 209 id%DKEEP(97)=0.0D0 210 id%DKEEP(98)=0.0D0 211 id%DKEEP(56)=0.0D0 212C Count of MPI messages: reset to 0 213 id%KEEP(266)=0 214 id%KEEP(267)=0 215C 216C BEGIN CASE OF ALLOCATED DATA FROM PREVIOUS CALLS 217 IF (associated(id%RHSCOMP)) THEN 218 DEALLOCATE(id%RHSCOMP) 219 NULLIFY(id%RHSCOMP) 220 id%KEEP8(25)=0_8 221 ENDIF 222 IF (associated(id%POSINRHSCOMP_ROW)) THEN 223 DEALLOCATE(id%POSINRHSCOMP_ROW) 224 NULLIFY(id%POSINRHSCOMP_ROW) 225 ENDIF 226 IF (id%POSINRHSCOMP_COL_ALLOC) THEN 227 DEALLOCATE(id%POSINRHSCOMP_COL) 228 NULLIFY(id%POSINRHSCOMP_COL) 229 id%POSINRHSCOMP_COL_ALLOC = .FALSE. 230 ENDIF 231C END CASE OF ALLOCATED DATA FROM PREVIOUS CALLS 232C 233C Related to forward in facto functionality (referred to as "Fwd in facto") 234 NULLIFY(RHS_MUMPS) 235 RHS_MUMPS_ALLOCATED = .FALSE. 236C ----------------------------------------------------------------------- 237C Set WK_USER_PROVIDED to true when workspace WK_USER is provided by user 238C We can accept WK_USER to be provided on only one proc and 239C different values of WK_USER per processor 240C 241 IF (id%KEEP8(24).GT.0_8) THEN 242C We nullify S so that later when we test 243C if (associated(S) we can free space and reallocate it). 244 NULLIFY(id%S) 245 ENDIF 246C 247C -- KEEP8(24) can now then be reset safely 248 WK_USER_PROVIDED = (id%LWK_USER.NE.0) 249 IF (WK_USER_PROVIDED) THEN 250 IF (id%LWK_USER.GT.0) THEN 251 id%KEEP8(24) = int(id%LWK_USER,8) 252 ELSE 253 iD%KEEP8(24) = -int(id%LWK_USER,8)* 1000000_8 254 ENDIF 255 ELSE 256 id%KEEP8(24) = 0_8 257 ENDIF 258C 259C KEEP8(26) might be modified 260C (element entry format) 261C but need be restore for 262C future factorisation 263C with different scaling option 264C 265 KEEP826_SAVE = id%KEEP8(26) 266C In case of loop on factorization with 267C different scaling options, initialize 268C DKEEP(4:5) to 0. 269 id%DKEEP(4)=-1.0D0 270 id%DKEEP(5)=-1.0D0 271C Mapping information used during solve. In case of several facto+solve 272C it has to be recomputed. In case of several solves with the same 273C facto, it is not recomputed. 274 IF (associated(id%IPTR_WORKING)) THEN 275 DEALLOCATE(id%IPTR_WORKING) 276 NULLIFY(id%IPTR_WORKING) 277 END IF 278 IF (associated(id%WORKING)) THEN 279 DEALLOCATE(id%WORKING) 280 NULLIFY(id%WORKING) 281 END IF 282C 283C Units for printing 284C MP: diagnostics 285C LP: errors 286C 287 LP = ICNTL( 1 ) 288 MP = ICNTL( 2 ) 289 MPG = ICNTL( 3 ) 290 LPOK = ((LP.GT.0).AND.(id%ICNTL(4).GE.1)) 291 PROK = ((MP.GT.0).AND.(id%ICNTL(4).GE.2)) 292 PROKG = ( MPG .GT. 0 .and. id%MYID .eq. MASTER ) 293 PROKG = (PROKG.AND.(id%ICNTL(4).GE.2)) 294 IF ( PROK ) WRITE( MP, 130 ) 295 IF ( PROKG ) WRITE( MPG, 130 ) 296 IF ( PROKG .and. KEEP(53).GT.0 ) THEN 297 WRITE(MPG,'(/A,I3)') ' Null space option :', KEEP(19) 298 IF ( KEEP(21) .ne. N ) THEN 299 WRITE( MPG, '(A,I10)') ' Max deficiency : ', KEEP(21) 300 END IF 301 IF ( KEEP(22) .ne. 0 ) THEN 302 WRITE( MPG, '(A,I10)') ' Min deficiency : ', KEEP(22) 303 END IF 304 END IF 305C ------------------------------------- 306C Depending on the type of parallelism, 307C the master can now (soon) potentially 308C have the role of a slave 309C ------------------------------------- 310 I_AM_SLAVE = ( id%MYID .ne. MASTER .OR. 311 & ( id%MYID .eq. MASTER .AND. 312 & KEEP(46) .eq. 1 ) ) 313C 314C Prepare work for out-of-core 315C 316 IF (id%MYID .EQ. MASTER .AND. KEEP(201) .NE. -1) THEN 317C Note that if KEEP(201)=-1, then we have decided 318C at analysis phase that factors will not be stored 319C (neither in memory nor on disk). In that case, 320C ICNTL(22) is ignored. 321C -- ICNTL(22) must be set before facto phase 322C (=1 OOC on; =0 OOC off) 323C and cannot be changed for subsequent solve phases. 324 KEEP(201)=id%ICNTL(22) 325 IF (KEEP(201) .NE. 0) THEN !Later: .GT. to allow ICNTL(22)=-1 326# if defined(OLD_OOC_NOPANEL) 327 KEEP(201)=2 328# else 329 KEEP(201)=1 330# endif 331 ENDIF 332 ENDIF 333 IF (id%MYID .EQ. MASTER) THEN 334 IF (id%KEEP(480).NE.0) THEN 335 id%KEEP(480) = 0 336 IF (PROK) 337 & write(MP,'(A)') 338 & ' MUMPS is not compiled with -DBLR_LUA ', 339 & ' => Resetting KEEP(480) to 0' 340 ENDIF 341 IF (id%KEEP(475).NE.0) THEN 342 id%KEEP(475) = 0 343 IF (PROK) 344 & write(MP,'(A)') 345 & ' MUMPS is not compiled with -DLRTRSM ', 346 & ' => Resetting KEEP(475) to 0' 347 ENDIF 348 ENDIF 349C ---------------------- 350C Broadcast KEEP options 351C defined for facto: 352C ---------------------- 353 CALL MPI_BCAST( KEEP(12), 1, MPI_INTEGER, 354 & MASTER, id%COMM, IERR ) 355 CALL MPI_BCAST( KEEP(19), 1, MPI_INTEGER, 356 & MASTER, id%COMM, IERR ) 357 CALL MPI_BCAST( KEEP(21), 1, MPI_INTEGER, 358 & MASTER, id%COMM, IERR ) 359 CALL MPI_BCAST( KEEP(201), 1, MPI_INTEGER, 360 & MASTER, id%COMM, IERR ) 361 IF (id%MYID.EQ.MASTER) THEN 362 IF ((KEEP(486).NE.0).AND.(KEEP(494).EQ.0)) THEN 363 IF (PROKG) THEN 364 WRITE(MPG,'(A)') 365 & " Internal ERROR with BLR setting " 366 WRITE(MPG,'(A)') " BLR was not activated during ", 367 & " analysis and is requested during factorization. " 368 id%INFO(1)=-900 369 ENDIF 370 ENDIF 371 ENDIF 372 CALL MPI_BCAST( KEEP(470), 23, MPI_INTEGER, 373 & MASTER, id%COMM, IERR ) 374 IF (id%MYID.EQ.MASTER) THEN 375 IF (KEEP(217).GT.2.OR.KEEP(217).LT.0) THEN 376 KEEP(217)=0 377 ENDIF 378 KEEP(214)=KEEP(217) 379 IF (KEEP(214).EQ.0) THEN 380 IF (KEEP(201).NE.0) THEN ! OOC or no factors 381 KEEP(214)=1 382 ELSE 383 KEEP(214)=2 384 ENDIF 385 ENDIF 386 ENDIF 387 CALL MPI_BCAST( KEEP(214), 1, MPI_INTEGER, 388 & MASTER, id%COMM, IERR ) 389 IF (KEEP(201).NE.0) THEN 390C -- Low Level I/O strategy 391 CALL MPI_BCAST( KEEP(99), 1, MPI_INTEGER, 392 & MASTER, id%COMM, IERR ) 393 CALL MPI_BCAST( KEEP(205), 1, MPI_INTEGER, 394 & MASTER, id%COMM, IERR ) 395 CALL MPI_BCAST( KEEP(211), 1, MPI_INTEGER, 396 & MASTER, id%COMM, IERR ) 397 ENDIF 398C 399C 400C KEEP(50) case 401C ============== 402C 403C KEEP(50) = 0 : matrix is unsymmetric 404C KEEP(50) /= 0 : matrix is symmetric 405C KEEP(50) = 1 : Ask L L^T on the root. Matrix is PSD. 406C KEEP(50) = 2 : Ask for L U on the root 407C KEEP(50) = 3 ... L D L^T ?? 408C 409C --------------------------------------- 410C For symmetric (non general) matrices 411C set (directly) CNTL(1) = 0.0 412C --------------------------------------- 413 IF ( KEEP(50) .eq. 1 ) THEN 414 IF (id%CNTL(1) .ne. ZERO ) THEN 415 IF ( PROKG ) THEN 416 WRITE(MPG,'(A)') 417 &' ** Warning : SPD solver called, resetting CNTL(1) to 0.0D0' 418 END IF 419 END IF 420 id%CNTL(1) = ZERO 421 END IF 422C Fwd in facto: explicitly forbid 423C sparse RHS and A-1 computation 424 IF (id%KEEP(252).EQ.1 .AND. id%MYID.EQ.MASTER) THEN 425 IF (id%ICNTL(20).EQ.1) THEN ! out-of-range => 0 426C NB: in doc ICNTL(20) only accessed during solve 427C In practice, will have failed earlier if RHS not allocated. 428C Still it looks safer to keep this test. 429 id%INFO(1)=-43 430 id%INFO(2)=20 431 IF (PROKG) WRITE(MPG,'(A)') 432 & ' ERROR: Sparse RHS is incompatible with forward', 433 & ' performed during factorization (ICNTL(32)=1)' 434 ELSE IF (id%ICNTL(30).NE.0) THEN ! out-of-range => 1 435 id%INFO(1)=-43 436 id%INFO(2)=30 437 IF (PROKG) WRITE(MPG,'(A)') 438 & ' ERROR: A-1 functionality incompatible with forward', 439 & ' performed during factorization (ICNTL(32)=1)' 440 ELSE IF (id%ICNTL(9) .NE. 1) THEN 441 id%INFO(1)=-43 442 id%INFO(2)=9 443 IF (PROKG) WRITE(MPG,'(A)') 444 & ' ERROR: Transpose system (ICNTL(9).NE.0) not ', 445 & ' compatible with forward performed during', 446 & ' factorization (ICNTL(32)=1)' 447 ENDIF 448 ENDIF 449 CALL MUMPS_PROPINFO( id%ICNTL(1), id%INFO(1), 450 & id%COMM, id%MYID ) 451C 452 IF (id%INFO(1).LT.0) GOTO 530 453 IF ( PROKG ) THEN 454 WRITE( MPG, 172 ) id%NSLAVES, id%ICNTL(22), 455 & id%KEEP8(111), KEEP(126), KEEP(127), KEEP(28), 456 & id%KEEP8(4)/1000000_8, id%CNTL(1) 457 IF (KEEP(252).GT.0) 458 & WRITE(MPG,173) KEEP(253) 459 ENDIF 460 IF (KEEP(201).LE.0) THEN 461C In-core version or no factors 462 KEEP(IXSZ)=XSIZE_IC 463 ELSE IF (KEEP(201).EQ.2) THEN 464C OOC version, no panels 465 KEEP(IXSZ)=XSIZE_OOC_NOPANEL 466 ELSE IF (KEEP(201).EQ.1) THEN 467C Panel versions: 468 IF (KEEP(50).EQ.0) THEN 469 KEEP(IXSZ)=XSIZE_OOC_UNSYM 470 ELSE 471 KEEP(IXSZ)=XSIZE_OOC_SYM 472 ENDIF 473 ENDIF 474 IF ( KEEP(486) .NE. 0 ) THEN !LR is activated 475C Stats initialization for LR 476 CALL INIT_STATS_GLOBAL(id) 477 END IF 478C 479* ********************************** 480* Begin intializations regarding the 481* computation of the determinant 482* ********************************** 483 IF (id%MYID.EQ.MASTER) KEEP(258)=ICNTL(33) 484 CALL MPI_BCAST(KEEP(258), 1, MPI_INTEGER, 485 & MASTER, id%COMM, IERR) 486 IF (KEEP(258) .NE. 0) THEN 487 KEEP(259) = 0 ! Initial exponent of the local determinant 488 KEEP(260) = 1 ! Number of permutations 489 id%DKEEP(6) = 1.0D0 ! real part of the local determinant 490 id%DKEEP(7) = 0.0D0 ! imaginary part of the local determinant 491 ENDIF 492* ******************************** 493* End intializations regarding the 494* computation of the determinant 495* ******************************** 496C 497* ********************** 498* Begin of Scaling phase 499* ********************** 500C 501C SCALING MANAGEMENT 502C * Options 1, 3, 4 centralized only 503C 504C * Options 7, 8 : also works for distributed matrix 505C 506C At this point, we have the scaling arrays allocated 507C on the master. They have been allocated on the master 508C inside the main MUMPS driver. 509C 510 CALL MPI_BCAST(KEEP(52), 1, MPI_INTEGER, 511 & MASTER, id%COMM, IERR) 512 LSCAL = ((KEEP(52) .GT. 0) .AND. (KEEP(52) .LE. 8)) 513 IF (LSCAL) THEN 514C 515 IF ( id%MYID.EQ.MASTER ) THEN 516 CALL MUMPS_SECDEB(TIMEET) 517 ENDIF 518C ----------------------- 519C Retrieve parameters for 520C simultaneous scaling 521C ----------------------- 522 IF (KEEP(52) .EQ. 7) THEN 523C -- Cheap setting of SIMSCALING (it is the default in 4.8.4) 524 K231= KEEP(231) 525 K232= KEEP(232) 526 K233= KEEP(233) 527 ELSEIF (KEEP(52) .EQ. 8) THEN 528C -- More expensive setting of SIMSCALING (it was the default in 4.8.1,2,3) 529 K231= KEEP(239) 530 K232= KEEP(240) 531 K233= KEEP(241) 532 ENDIF 533 CALL MPI_BCAST(id%DKEEP(3),1,MPI_DOUBLE_PRECISION,MASTER, 534 & id%COMM,IERR) 535C 536 IF ( ((KEEP(52).EQ.7).OR.(KEEP(52).EQ.8)) .AND. 537 & KEEP(54).NE.0 ) THEN 538C ------------------------------ 539C Scaling for distributed matrix 540C We need to allocate scaling 541C arrays on all processors, not 542C only the master. 543C ------------------------------ 544 IF ( id%MYID .NE. MASTER ) THEN 545 IF ( associated(id%COLSCA)) 546 & DEALLOCATE( id%COLSCA ) 547 IF ( associated(id%ROWSCA)) 548 & DEALLOCATE( id%ROWSCA ) 549 ALLOCATE( id%COLSCA(N), stat=IERR) 550 IF (IERR .GT.0) THEN 551 id%INFO(1)=-13 552 id%INFO(2)=N 553 ENDIF 554 ALLOCATE( id%ROWSCA(N), stat=IERR) 555 IF (IERR .GT.0) THEN 556 id%INFO(1)=-13 557 id%INFO(2)=N 558 ENDIF 559 ENDIF 560 M = N 561 BUMAXMN=M 562 IF(N > BUMAXMN) BUMAXMN = N 563 LIWK = 4*BUMAXMN 564 ALLOCATE (IWK(LIWK),BURP(M),BUCP(N), 565 & BURS(2* (id%NPROCS)),BUCS(2* (id%NPROCS)), 566 & stat=allocok) 567 IF (allocok > 0) THEN 568 id%INFO(1)=-13 569 id%INFO(2)=LIWK+M+N+4* (id%NPROCS) 570 ENDIF 571C --- Propagate enventual error 572 CALL MUMPS_PROPINFO( ICNTL(1), id%INFO(1), 573 & id%COMM, id%MYID ) 574 IF (id%INFO(1).LT.0) GOTO 530 575C -- estimation of memory and construction of partvecs 576 BUJOB = 1 577C -- LWK not used 578 LWK_REAL = 1 579 ALLOCATE(WK_REAL(LWK_REAL)) 580 CALL ZMUMPS_SIMSCALEABS( 581 & MYIRN_loc(1), MYJCN_loc(1), MYA_loc(1), 582 & id%KEEP8(29), 583 & M, N, id%NPROCS, id%MYID, id%COMM, 584 & BURP, BUCP, 585 & BURS, BUCS, BUREGISTRE, 586 & IWK, LIWK, 587 & BUINTSZ, BURESZ, BUJOB, 588 & id%ROWSCA(1), id%COLSCA(1), WK_REAL, LWK_REAL, 589 & id%KEEP(50), 590 & K231, K232, K233, 591 & id%DKEEP(3), 592 & SCONEERR, SCINFERR) 593 IF(LIWK < BUINTSZ) THEN 594 DEALLOCATE(IWK) 595 LIWK = BUINTSZ 596 ALLOCATE(IWK(LIWK), stat=allocok) 597 IF (allocok > 0) THEN 598 id%INFO(1)=-13 599 id%INFO(2)=LIWK 600 ENDIF 601 ENDIF 602 LWK_REAL = BURESZ 603 DEALLOCATE(WK_REAL) 604 ALLOCATE (WK_REAL(LWK_REAL), stat=allocok) 605 IF (allocok > 0) THEN 606 id%INFO(1)=-13 607 id%INFO(2)=LWK_REAL 608 ENDIF 609C --- Propagate enventual error 610 CALL MUMPS_PROPINFO( ICNTL(1), id%INFO(1), 611 & id%COMM, id%MYID ) 612 IF (id%INFO(1).LT.0) GOTO 530 613C -- estimation of memory and construction of partvecs 614 BUJOB = 2 615 CALL ZMUMPS_SIMSCALEABS( 616 & MYIRN_loc(1), MYJCN_loc(1), MYA_loc(1), 617 & id%KEEP8(29), 618 & M, N, id%NPROCS, id%MYID, id%COMM, 619 & BURP, BUCP, 620 & BURS, BUCS, BUREGISTRE, 621 & IWK, LIWK, 622 & BUINTSZ, BURESZ, BUJOB, 623 & id%ROWSCA(1), id%COLSCA(1), WK_REAL, LWK_REAL, 624 & id%KEEP(50), 625 & K231, K232, K233, 626 & id%DKEEP(3), 627 & SCONEERR, SCINFERR) 628 id%DKEEP(4) = SCONEERR 629 id%DKEEP(5) = SCINFERR 630CXXXX 631 DEALLOCATE(IWK, WK_REAL,BURP,BUCP,BURS, BUCS) 632 ELSE IF ( KEEP(54) .EQ. 0 ) THEN 633C ------------------ 634C Centralized matrix 635C ------------------ 636 IF ((KEEP(52).EQ.7).OR.(KEEP(52).EQ.8)) THEN 637C ------------------------------- 638C Create a communicator of size 1 639C ------------------------------- 640 IF (id%MYID.EQ.MASTER) THEN 641 COLOUR = 0 642 ELSE 643 COLOUR = MPI_UNDEFINED 644 ENDIF 645 CALL MPI_COMM_SPLIT( id%COMM, COLOUR, 0, 646 & COMM_FOR_SCALING, IERR ) 647 IF (id%MYID.EQ.MASTER) THEN 648 M = N 649 BUMAXMN=N 650CXXXX 651 IF(N > BUMAXMN) BUMAXMN = N 652 LIWK = 1 653 ALLOCATE (IWK(LIWK),BURP(1),BUCP(1), 654 & BURS(1),BUCS(1), 655 & stat=allocok) 656 LWK_REAL = M + N 657 ALLOCATE (WK_REAL(LWK_REAL), stat=allocok) 658 IF (allocok > 0) THEN 659 id%INFO(1)=-13 660 id%INFO(2)=1 661 ENDIF 662 IF (id%INFO(1) .LT. 0) GOTO 400 663 CALL MPI_COMM_RANK(COMM_FOR_SCALING, SCMYID, IERR) 664 CALL MPI_COMM_SIZE(COMM_FOR_SCALING, SCNPROCS, IERR) 665 BUJOB = 1 666 CALL ZMUMPS_SIMSCALEABS( 667 & id%IRN(1), id%JCN(1), id%A(1), 668 & id%KEEP8(28), 669 & M, N, SCNPROCS, SCMYID, COMM_FOR_SCALING, 670 & BURP, BUCP, 671 & BURS, BUCS, BUREGISTRE, 672 & IWK, LIWK, 673 & BUINTSZ, BURESZ, BUJOB, 674 & id%ROWSCA(1), id%COLSCA(1), WK_REAL, LWK_REAL, 675 & id%KEEP(50), 676 & K231, K232, K233, 677 & id%DKEEP(3), 678 & SCONEERR, SCINFERR) 679 IF(LWK_REAL < BURESZ) THEN 680 ! internal error since LWK_REAL=BURESZ=M+N 681 id%INFO(1) = -136 682 GOTO 400 683 ENDIF 684 BUJOB = 2 685 CALL ZMUMPS_SIMSCALEABS(id%IRN(1), 686 & id%JCN(1), id%A(1), 687 & id%KEEP8(28), 688 & M, N, SCNPROCS, SCMYID, COMM_FOR_SCALING, 689 & BURP, BUCP, 690 & BURS, BUCS, BUREGISTRE, 691 & IWK, LIWK, 692 & BUINTSZ, BURESZ, BUJOB, 693 & id%ROWSCA(1), id%COLSCA(1), WK_REAL, LWK_REAL, 694 & id%KEEP(50), 695 & K231, K232, K233, 696 & id%DKEEP(3), 697 & SCONEERR, SCINFERR) 698 id%DKEEP(4) = SCONEERR 699 id%DKEEP(5) = SCINFERR 700CXXXX 701 DEALLOCATE(WK_REAL) 702 DEALLOCATE (IWK,BURP,BUCP, 703 & BURS,BUCS) 704 ENDIF 705C Centralized matrix: make DKEEP(4:5) available to all processors 706 CALL MPI_BCAST( id%DKEEP(4),2,MPI_DOUBLE_PRECISION, 707 & MASTER, id%COMM, IERR ) 708 400 CONTINUE 709 IF (id%MYID.EQ.MASTER) THEN 710C Communicator should only be 711C freed on the master process 712 CALL MPI_COMM_FREE(COMM_FOR_SCALING, IERR) 713 ENDIF 714 CALL MUMPS_PROPINFO(ICNTL(1), id%INFO(1), 715 & id%COMM, id%MYID) 716 IF (id%INFO(1).LT.0) GOTO 530 717 ELSE IF (id%MYID.EQ.MASTER) THEN 718C ---------------------------------- 719C Centralized scaling, options 1 to 6 720C ---------------------------------- 721 IF (KEEP(52).GT.0 .AND. KEEP(52).LE.6) THEN 722C --------------------- 723C Allocate temporary 724C workspace for scaling 725C --------------------- 726 IF ( KEEP(52) .eq. 5 .or. 727 & KEEP(52) .eq. 6 ) THEN 728C We have an explicit copy of the original 729C matrix in complex format which should probably 730C be avoided (but do we want to keep all 731C those old scaling options ?) 732 LWK = id%KEEP8(28) 733 ELSE 734 LWK = 1_8 735 END IF 736 LWK_REAL = 5 * N 737 ALLOCATE( WK_REAL( LWK_REAL ), stat = IERR ) 738 IF ( IERR .GT. 0 ) THEN 739 id%INFO(1) = -13 740 id%INFO(2) = LWK_REAL 741 GOTO 137 742 END IF 743 ALLOCATE( WK( LWK ), stat = IERR ) 744 IF ( IERR .GT. 0 ) THEN 745 id%INFO(1) = -13 746 CALL MUMPS_SET_IERROR(LWK, id%INFO(2)) 747 GOTO 137 748 END IF 749 CALL ZMUMPS_FAC_A(N, id%KEEP8(28), KEEP(52), id%A(1), 750 & id%IRN(1), id%JCN(1), 751 & id%COLSCA(1), id%ROWSCA(1), 752 & WK, LWK, WK_REAL, LWK_REAL, ICNTL(1), id%INFO(1) ) 753 DEALLOCATE( WK_REAL ) 754 DEALLOCATE( WK ) 755 ENDIF 756 ENDIF 757 ENDIF ! Scaling distributed matrices or centralized 758 IF (id%MYID.EQ.MASTER) THEN 759 CALL MUMPS_SECFIN(TIMEET) 760 id%DKEEP(92)=TIMEET 761C Print inf-norm after last KEEP(233) iterations of 762C scaling option KEEP(52)=7 or 8 (SimScale) 763C 764 IF (PROKG.AND.(KEEP(52).EQ.7.OR.KEEP(52).EQ.8) 765 & .AND. (K233+K231+K232).GT.0) THEN 766 IF (K232.GT.0) WRITE(MPG, 166) id%DKEEP(4) 767 ENDIF 768 ENDIF 769 ENDIF ! LSCAL 770C 771C scaling might also be provided by the user 772 LSCAL = (LSCAL .OR. (KEEP(52) .EQ. -1) .OR. KEEP(52) .EQ. -2) 773 IF (LSCAL .AND. KEEP(258).NE.0 .AND. id%MYID .EQ. MASTER) THEN 774 DO I = 1, id%N 775 CALL ZMUMPS_UPDATEDETER_SCALING(id%ROWSCA(I), 776 & id%DKEEP(6), ! determinant 777 & KEEP(259)) ! exponent of the determinant 778 ENDDO 779 IF (KEEP(50) .EQ. 0) THEN ! unsymmetric 780 DO I = 1, id%N 781 CALL ZMUMPS_UPDATEDETER_SCALING(id%COLSCA(I), 782 & id%DKEEP(6), ! determinant 783 & KEEP(259)) ! exponent of the determinant 784 ENDDO 785 ELSE 786C ----------------------------------------- 787C In this case COLSCA = ROWSCA 788C Since determinant was initialized to 1, 789C compute square of the current determinant 790C rather than going through COLSCA. 791C ----------------------------------------- 792 CALL ZMUMPS_DETER_SQUARE(id%DKEEP(6), KEEP(259)) 793 ENDIF 794C Now we should have taken the 795C inverse of the scaling vectors 796 CALL ZMUMPS_DETER_SCALING_INVERSE(id%DKEEP(6), KEEP(259)) 797 ENDIF 798C 799C ******************** 800C End of Scaling phase 801C At this point: either (matrix is distributed and KEEP(52)=7 or 8) 802C in which case scaling arrays are allocated on all processors, 803C or scaling arrays are only on the host processor. 804C In case of distributed matrix input, we will free the scaling 805C arrays on procs with MYID .NE. 0 after the all-to-all distribution 806C of the original matrix. 807C ******************** 808C 809 137 CONTINUE 810C Fwd in facto: in case of repeated factorizations 811C with different Schur options we prefer to free 812C systematically this array now than waiting for 813C the root node. We rely on the fact that it is 814C allocated or not during the solve phase so if 815C it was allocated in a 1st call to facto and not 816C in a second, we don't want the solve to think 817C it was allocated in the second call. 818 IF (associated(id%root%RHS_CNTR_MASTER_ROOT)) THEN 819 DEALLOCATE (id%root%RHS_CNTR_MASTER_ROOT) 820 NULLIFY (id%root%RHS_CNTR_MASTER_ROOT) 821 ENDIF 822C Fwd in facto: check that id%NRHS has not changed 823 IF ( id%MYID.EQ.MASTER.AND. KEEP(252).EQ.1 .AND. 824 & id%NRHS .NE. id%KEEP(253) ) THEN 825C Error: NRHS should not have 826C changed since the analysis 827 id%INFO(1)=-42 828 id%INFO(2)=id%KEEP(253) 829 ENDIF 830C Fwd in facto: allocate and broadcast RHS_MUMPS 831C to make it available on all processors. 832 IF (id%KEEP(252) .EQ. 1) THEN 833 IF ( id%MYID.NE.MASTER ) THEN 834 id%KEEP(254) = N ! Leading dimension 835 id%KEEP(255) = N*id%KEEP(253) ! Tot size 836 ALLOCATE(RHS_MUMPS(id%KEEP(255)),stat=IERR) 837 IF (IERR > 0) THEN 838 id%INFO(1)=-13 839 id%INFO(2)=id%KEEP(255) 840 IF (LPOK) 841 & WRITE(LP,*) 'ERREUR while allocating RHS on a slave' 842 NULLIFY(RHS_MUMPS) 843 ENDIF 844 RHS_MUMPS_ALLOCATED = .TRUE. 845 ELSE 846C Case of non working master 847 id%KEEP(254)=id%LRHS ! Leading dimension 848 id%KEEP(255)=id%LRHS*(id%KEEP(253)-1)+id%N ! Tot size 849 RHS_MUMPS=>id%RHS 850 RHS_MUMPS_ALLOCATED = .FALSE. 851 IF (LSCAL) THEN 852C Scale before broadcast: apply row 853C scaling (remark that we assume no 854C transpose). 855 DO K=1, id%KEEP(253) 856 DO I=1, N 857 RHS_MUMPS( id%KEEP(254) * (K-1) + I ) 858 & = RHS_MUMPS( id%KEEP(254) * (K-1) + I ) 859 & * id%ROWSCA(I) 860 ENDDO 861 ENDDO 862 ENDIF 863 ENDIF 864 ELSE 865 id%KEEP(255)=1 866 ALLOCATE(RHS_MUMPS(1)) 867 RHS_MUMPS_ALLOCATED = .TRUE. 868 ENDIF 869 CALL MUMPS_PROPINFO( ICNTL(1), id%INFO(1), 870 & id%COMM, id%MYID ) 871 IF ( id%INFO(1).lt.0 ) GOTO 530 872 IF (KEEP(252) .EQ. 1) THEN 873C 874C Broadcast the columns of the right-hand side 875C one by one. Leading dimension is keep(254)=N 876C on procs with MYID > 0 but may be larger on 877C the master processor. 878 DO I= 1, id%KEEP(253) 879 CALL MPI_BCAST(RHS_MUMPS((I-1)*id%KEEP(254)+1), N, 880 & MPI_DOUBLE_COMPLEX, MASTER,id%COMM,IERR) 881 END DO 882 ENDIF 883C Keep a copy of ICNTL(24) and make it 884C available on all working processors. 885 KEEP(110)=id%ICNTL(24) 886 CALL MPI_BCAST(KEEP(110), 1, MPI_INTEGER, 887 & MASTER, id%COMM, IERR) 888C KEEP(110) defaults to 0 for out of range values 889 IF (KEEP(110).NE.1) KEEP(110)=0 890 IF (KEEP(219).NE.0) THEN 891 CALL ZMUMPS_BUF_MAX_ARRAY_MINSIZE(max(KEEP(108),1),IERR) 892 IF (IERR .NE. 0) THEN 893C ------------------------ 894C Error allocating ZMUMPS_BUF 895C ------------------------ 896 id%INFO(1) = -13 897 id%INFO(2) = max(KEEP(108),1) 898 END IF 899 ENDIF 900* 901* 902C ----------------------------------------------- 903C Depending on the option used for 904C -detecting null pivots (ICNTL(24)/KEEP(110)) 905C CNTL(3) is used to set DKEEP(1) 906C ( A row is considered as null if ||row|| < DKEEP(1) ) 907C CNTL(5) is then used to define if a large 908C value is set on the diagonal or if a 1 is set 909C and other values in the row are reset to zeros. 910C -Rank revealing on the Schur (ICNTL(16)/KEEP(19)) 911C CNTL(6) is used to set SEUIL and SEUIL_LDLT_NIV2 912C SEUIL* corresponds to the minimum required 913C absolute value of pivot. 914C SEUIL_LDLT_NIV2 is used only in the 915C case of SYM=2 within a niv2 node for which 916C we have only a partial view of the fully summed rows. 917C Note that SEUIL* might be reset later in this routine 918C but only when static pivoting is on 919C which will be excluded if null pivots or 920C rank-revealing (RR) is on 921C ----------------------------------------------- 922 IF (id%MYID .EQ. MASTER) CNTL3 = id%CNTL(3) 923 CALL MPI_BCAST(CNTL3, 1, MPI_DOUBLE_PRECISION, 924 & MASTER, id%COMM, IERR) 925 IF (id%MYID .EQ. MASTER) CNTL5 = id%CNTL(5) 926 CALL MPI_BCAST(CNTL5, 1, MPI_DOUBLE_PRECISION, 927 & MASTER, id%COMM, IERR) 928 IF (id%MYID .EQ. MASTER) CNTL6 = id%CNTL(6) 929 CALL MPI_BCAST(CNTL6, 1, MPI_DOUBLE_PRECISION, 930 & MASTER, id%COMM, IERR) 931 IF (id%MYID .EQ. MASTER) CNTL1 = id%CNTL(1) 932 CALL MPI_BCAST(CNTL1, 1, MPI_DOUBLE_PRECISION, 933 & MASTER, id%COMM, IERR) 934 IF (id%MYID .EQ. MASTER) id%DKEEP(8) = id%CNTL(7) 935 CALL MPI_BCAST(id%DKEEP(8), 1, MPI_DOUBLE_PRECISION, 936 & MASTER, id%COMM, IERR) 937 IF (KEEP(486).EQ.0) id%DKEEP(8) = ZERO 938 COMPUTE_ANORMINF = .FALSE. 939 IF ( (KEEP(486) .NE. 0).AND. (id%DKEEP(8).LT.ZERO)) THEN 940 COMPUTE_ANORMINF = .TRUE. 941 ENDIF 942 IF (KEEP(19).NE.0) THEN 943 COMPUTE_ANORMINF = .TRUE. 944 ENDIF 945C ------------------------------------------------------- 946C We compute ANORMINF, when needed, based on 947C the infinite norm of Rowsca *A*Colsca 948C and make it available on all working processes. 949 IF (COMPUTE_ANORMINF) THEN 950 CALL ZMUMPS_ANORMINF( id , ANORMINF, LSCAL ) 951 ELSE 952 ANORMINF = ZERO 953 ENDIF 954C 955C Set BLR threshold 956 IF (id%DKEEP(8).LT.ZERO) THEN 957 id%DKEEP(8) = abs(id%DKEEP(8))*ANORMINF 958 ENDIF 959 IF (KEEP(19).EQ.0) THEN 960C -- RR is off 961 SEUIL = ZERO 962 id%DKEEP(9) = ZERO 963 ELSE 964C -- RR is on 965C July 2012 966C CNTL(3) is the threshold used in the following 967C to compute the SEUIL used for postponing pivots to root 968C SEUIL*CNTL(6) is then the treshold for null pivot detection 969C (with 0< CNTL(6) <= 1) 970 IF (CNTL3 .LT. ZERO) THEN 971 SEUIL = abs(CNTL(3)) 972 ELSE IF (CNTL3 .GT. ZERO) THEN 973 SEUIL = CNTL3*ANORMINF 974 ELSE ! (CNTL(3) .EQ. ZERO) THEN 975 SEUIL = N*EPS*ANORMINF ! standard articles 976 ENDIF 977 IF (PROKG) WRITE(MPG,*) 978 & ' ABSOLUTE PIVOT THRESHOLD for rank revealing =',SEUIL 979 ENDIF 980C After QR with pivoting of root or SVD, diagonal entries 981C need be analysed to determine null space vectors. 982C Two strategies are provided : 983 id%DKEEP(9) = SEUIL 984 IF (id%DKEEP(10).LT.MONE) THEN 985 id%DKEEP(10)=MONE 986 ELSEIF((id%DKEEP(10).LE.ONE).AND.(id%DKEEP(10).GE.ZERO)) THEN 987 id%DKEEP(10)=1000.0D0 988 ENDIF 989 SEUIL_LDLT_NIV2 = SEUIL 990C ------------------------------- 991C -- Null pivot row detection 992C ------------------------------- 993 IF (KEEP(110).EQ.0) THEN 994C Initialize DKEEP(1) to a negative value 995C in order to avoid detection of null pivots 996C (test max(AMAX,RMAX,abs(PIVOT)).LE.PIVNUL 997C in ZMUMPS_FAC_I, where PIVNUL=DKEEP(1)) 998 id%DKEEP(1) = -1.0D0 999 id%DKEEP(2) = ZERO 1000 ELSE 1001 IF (ANORMINF.EQ.ZERO) 1002 & CALL ZMUMPS_ANORMINF( id , ANORMINF, LSCAL ) 1003 IF (KEEP(19).NE.0) THEN 1004C RR postponing considers that pivot rows of norm smaller that SEUIL 1005C should be postponed. 1006C Pivot rows smaller than DKEEP(1) are directly added to null space 1007C and thus considered as null pivot rows. Thus we define id%DKEEP(1) 1008C relatively to SEUIL (which is based on CNTL(3)) 1009 IF (CNTL(6).GT.0.AND.CNTL(6).LT.1) THEN 1010C we want DKEEP(1) < SEUIL 1011 id%DKEEP(1) = SEUIL*CNTL(6) 1012 ELSE 1013 id%DKEEP(1) = SEUIL* 0.01D0 1014 ENDIF 1015 ELSE 1016C We keep strategy currently used in MUMPS 4.10.0 1017 IF (CNTL3 .LT. ZERO) THEN 1018 id%DKEEP(1) = abs(CNTL(3)) 1019 ELSE IF (CNTL3 .GT. ZERO) THEN 1020 id%DKEEP(1) = CNTL3*ANORMINF 1021 ELSE ! (CNTL(3) .EQ. ZERO) THEN 1022 id%DKEEP(1) = 1.0D-5*EPS*ANORMINF 1023 ENDIF 1024 ENDIF 1025 IF (PROKG) WRITE(MPG,*) 1026 & ' ZERO PIVOT DETECTION ON, THRESHOLD =',id%DKEEP(1) 1027 IF (CNTL5.GT.ZERO) THEN 1028 id%DKEEP(2) = CNTL5 * ANORMINF 1029 IF (PROKG) WRITE(MPG,*) 1030 & ' FIXATION FOR NULL PIVOTS =',id%DKEEP(2) 1031 ELSE 1032 IF (PROKG) WRITE(MPG,*) 'INFINITE FIXATION ' 1033 IF (id%KEEP(50).EQ.0) THEN 1034C Unsym 1035 ! the user let us choose a fixation. set in NEGATIVE 1036 ! to detect during facto when to set row to zero ! 1037 id%DKEEP(2) = -max(1.0D10*ANORMINF, 1038 & sqrt(huge(ANORMINF))/1.0D8) 1039 ELSE 1040C Sym 1041 id%DKEEP(2) = ZERO 1042 ENDIF 1043 ENDIF 1044 ENDIF 1045C Find id of root node if RR is on 1046 IF (KEEP(53).NE.0) THEN 1047 ID_ROOT =MUMPS_PROCNODE(id%PROCNODE_STEPS(id%STEP(KEEP(20))), 1048 & id%NSLAVES) 1049 IF ( KEEP( 46 ) .NE. 1 ) THEN 1050 ID_ROOT = ID_ROOT + 1 1051 END IF 1052 ENDIF 1053C Second pass: set parameters for null pivot detection 1054C Allocate PIVNUL_LIST in case of null pivot detection 1055C and in case of rank revealing 1056 LPN_LIST = 1 1057 IF ( associated( id%PIVNUL_LIST) ) DEALLOCATE(id%PIVNUL_LIST) 1058 IF(KEEP(110) .EQ. 1) THEN 1059 LPN_LIST = N 1060 ENDIF 1061 IF (KEEP(19).NE.0 .AND. 1062 & (ID_ROOT.EQ.id%MYID .OR. id%MYID.EQ.MASTER)) THEN 1063 LPN_LIST = N 1064 ENDIF 1065 ALLOCATE( id%PIVNUL_LIST(LPN_LIST),stat = IERR ) 1066 IF ( IERR .GT. 0 ) THEN 1067 id%INFO(1)=-13 1068 id%INFO(2)=LPN_LIST 1069 END IF 1070 id%PIVNUL_LIST(1:LPN_LIST) = 0 1071 KEEP(109) = 0 1072C end set parameter for null pivot detection 1073 CALL MUMPS_PROPINFO( ICNTL(1), id%INFO(1), 1074 & id%COMM, id%MYID ) 1075 IF ( id%INFO(1).lt.0 ) GOTO 530 1076C -------------------------------------------------------------- 1077C STATIC PIVOTING 1078C -- Static pivoting only when RR and Null pivot detection OFF 1079C -------------------------------------------------------------- 1080 IF ((KEEP(19).EQ.0).AND.(KEEP(110).EQ.0)) THEN 1081C -- Set KEEP(97) and compute static pivoting threshold. 1082 IF (id%MYID .EQ. MASTER) CNTL4 = id%CNTL(4) 1083 CALL MPI_BCAST( CNTL4, 1, MPI_DOUBLE_PRECISION, 1084 & MASTER, id%COMM, IERR ) 1085C 1086 IF ( CNTL4 .GE. ZERO ) THEN 1087 KEEP(97) = 1 1088 IF ( CNTL4 .EQ. ZERO ) THEN 1089C -- set seuil to sqrt(eps)*||A|| 1090 IF(ANORMINF .EQ. ZERO) THEN 1091 CALL ZMUMPS_ANORMINF( id , ANORMINF, LSCAL ) 1092C WRITE(*,*) id%MYID,': ANORMINF',ANORMINF 1093 ENDIF 1094 SEUIL = sqrt(EPS) * ANORMINF 1095 ELSE 1096C WRITE(*,*) 'id%CNTL(4)',id%CNTL(4) 1097 SEUIL = CNTL4 1098 ENDIF 1099 SEUIL_LDLT_NIV2 = SEUIL 1100C 1101 ELSE 1102 SEUIL = ZERO 1103 ENDIF 1104 ENDIF 1105C set number of tiny pivots / 2x2 pivots in types 1 / 1106C 2x2 pivots in types 2, to zero. This is because the 1107C user can call the factorization step several times. 1108 KEEP(98) = 0 1109 KEEP(103) = 0 1110 KEEP(105) = 0 1111 MAXS = 1_8 1112C 1113C The memory allowed is given by ICNTL(23) in Mbytes 1114C 0 means that nothing is provided. 1115C Save memory available, ICNTL(23) in KEEP8(4) 1116C 1117 IF ( id%MYID.EQ.MASTER ) THEN 1118 ITMP = ICNTL(23) 1119 END IF 1120 CALL MPI_BCAST( ITMP, 1, MPI_INTEGER, 1121 & MASTER, id%COMM, IERR ) 1122C 1123C Ignore ICNTL(23) when WK_USER is provided 1124c by resetting ITMP to zero on each proc where WK_USER is provided 1125 IF (WK_USER_PROVIDED) ITMP = 0 1126 ITMP8 = int(ITMP, 8) 1127 id%KEEP8(4) = ITMP8 * 1000000_8 ! convert to nb of bytes 1128* 1129* Start allocations 1130* ***************** 1131* 1132C 1133C The slaves can now perform the factorization 1134C 1135C 1136C Allocate S on all nodes 1137C or point to user provided data WK_USER when LWK_USER>0 1138C ======================= 1139C 1140C 1141 PERLU = KEEP(12) 1142 IF (KEEP(201) .EQ. 0) THEN 1143C In-core 1144 MAXS_BASE8=id%KEEP8(12) 1145 ELSE 1146C OOC or no factors stored 1147 MAXS_BASE8=id%KEEP8(14) 1148 ENDIF 1149 IF (WK_USER_PROVIDED) THEN 1150C -- Set MAXS to size of WK_USER_ 1151 MAXS = id%KEEP8(24) 1152 ELSE 1153 IF ( MAXS_BASE8 .GT. 0_8 ) THEN 1154 MAXS_BASE_RELAXED8 = 1155 & MAXS_BASE8 + int(PERLU,8) * ( MAXS_BASE8 / 100_8 + 1_8) 1156C If PERLU < 0, we may obtain a 1157C null or negative value of MAXS. 1158 IF (MAXS_BASE_RELAXED8 > huge(MAXS)) THEN 1159C id%INFO(1)=-37 1160C id%INFO(2)=int(MAXS_BASE_RELAXED8/1000000_8) 1161 WRITE(*,*) "Internal error: I8 overflow" 1162 CALL MUMPS_ABORT() 1163 ENDIF 1164 MAXS_BASE_RELAXED8 = max(MAXS_BASE_RELAXED8, 1_8) 1165 MAXS = MAXS_BASE_RELAXED8 1166C Note that in OOC this value of MAXS will be 1167C overwritten if KEEP(96) .NE. 0 or if 1168C ICNTL(23) (that is, KEEP8(4)) is provided. 1169 ELSE 1170 MAXS = 1_8 1171 MAXS_BASE_RELAXED8 = 1_8 1172 END IF 1173 ENDIF 1174 CALL MUMPS_PROPINFO( ICNTL(1), id%INFO(1), 1175 & id%COMM, id%MYID ) 1176 IF (id%INFO(1) .LT. 0) THEN 1177 GOTO 530 1178 ENDIF 1179C 1180C If KEEP(96) is provided, 1181C use it without asking questions 1182C 1183 IF ((.NOT.WK_USER_PROVIDED).AND.(I_AM_SLAVE)) THEN 1184C 1185C 1186 IF (KEEP(96).GT.0) THEN 1187C -- useful mostly for internal testing: 1188C -- we can force in this way a given value 1189C -- of MAXS and forget about other input values 1190C -- such as ICNTL(23) (KEEP8(4)/1D6) 1191C -- that could change MAXS value. 1192 MAXS=int(KEEP(96),8) 1193 ELSE 1194 IF (id%KEEP8(4) .NE. 0_8) THEN 1195C ------------------------- 1196C WE TRY TO USE MEM_ALLOWED (KEEP8(4)/1D6) 1197C ------------------------- 1198C First compute what we have: TOTAL_MBYTES(PERLU) 1199C and TOTAL_BYTES(PERLU) 1200C 1201 PERLU_ON = .TRUE. 1202 CALL ZMUMPS_MAX_MEM( id%KEEP(1), id%KEEP8(1), 1203 & id%MYID, id%N, id%NELT, id%NA(1), id%LNA, 1204 & id%KEEP8(28), id%KEEP8(30), 1205 & id%NSLAVES, TOTAL_MBYTES, .FALSE., KEEP(201), 1206 & PERLU_ON, TOTAL_BYTES) 1207C 1208C Assuming that TOTAL_BYTES is due to MAXS rather than 1209C to the temporary buffers used for the distribution of 1210C the matrix on the slaves (arrowheads or element distrib), 1211C then we have: 1212C 1213C KEEP8(4)-TOTAL_BYTES is the extra free space 1214C 1215C A simple algorithm to redistribute the extra space: 1216C All extra freedom (it could be negative !) is added to MAXS: 1217 MAXS_BASE_RELAXED8=MAXS_BASE_RELAXED8 + 1218 & (id%KEEP8(4)-TOTAL_BYTES)/int(KEEP(35),8) 1219 IF (MAXS_BASE_RELAXED8 > int(huge(MAXS),8)) THEN 1220 WRITE(*,*) "Internal error: I8 overflow" 1221 CALL MUMPS_ABORT() 1222 ELSE IF (MAXS_BASE_RELAXED8 .LE. 0_8) THEN 1223C We need more space in order to at least enough 1224 id%INFO(1)=-9 1225 IF ( -MAXS_BASE_RELAXED8 .GT. 1226 & int(huge(id%INFO(1)),8) ) THEN 1227 WRITE(*,*) "I8: OVERFLOW" 1228 CALL MUMPS_ABORT() 1229 ENDIF 1230 id%INFO(2)=-int(MAXS_BASE_RELAXED8) 1231 ELSE 1232 MAXS=MAXS_BASE_RELAXED8 1233 ENDIF 1234 ENDIF 1235 ENDIF 1236 ENDIF ! I_AM_SLAVE 1237 CALL MUMPS_PROPINFO( ICNTL(1), id%INFO(1), 1238 & id%COMM, id%MYID ) 1239 IF (id%INFO(1) .LT. 0) THEN 1240 GOTO 530 1241 ENDIF 1242 CALL ZMUMPS_AVGMAX_STAT8(PROKG, MPG, MAXS, id%NSLAVES, 1243 & id%COMM, "effective relaxed size of S =") 1244C Next PROPINFO is there for possible negative 1245C values of MAXS resulting from small MEM_ALLOWED 1246 CALL MUMPS_PROPINFO( ICNTL(1), id%INFO(1), 1247 & id%COMM, id%MYID ) 1248 IF (id%INFO(1) .LT. 0) THEN 1249C We jump after the call to LOAD_END and OOC_END since we didn't 1250C called yet OOC_INIT and LOAD_INIT 1251 GOTO 530 1252 ENDIF 1253 IF ( I_AM_SLAVE ) THEN 1254C ------------------ 1255C Dynamic scheduling 1256C ------------------ 1257 CALL ZMUMPS_LOAD_SET_INICOST( dble(id%COST_SUBTREES), 1258 & KEEP(64), KEEP(66), KEEP(375), MAXS ) 1259 K28=KEEP(28) 1260 MEMORY_MD_ARG = min(int(PERLU,8) * ( MAXS_BASE8 / 100_8 + 1_8 ), 1261C Restrict freedom from dynamic scheduler when 1262C MEM_ALLOWED=ICNTL(23) is small (case where KEEP8(4)-TOTAL_BYTES 1263C is negative after call to ZMUMPS_MAX_MEM) 1264 & max(0_8, MAXS-MAXS_BASE8)) 1265 CALL ZMUMPS_LOAD_INIT( id, MEMORY_MD_ARG, MAXS ) 1266C 1267C Out-Of-Core (OOC) issues. Case where we ran one factorization OOC 1268C and the second one is in-core: we try to free OOC 1269C related data from previous factorization. 1270C 1271 CALL ZMUMPS_CLEAN_OOC_DATA(id, IERR) 1272 IF (IERR < 0) THEN 1273 id%INFO(1) = -90 1274 id%INFO(2) = 0 1275 GOTO 112 1276 ENDIF 1277 IF (KEEP(201) .GT. 0) THEN 1278C ------------------- 1279C OOC initializations 1280C ------------------- 1281 IF (KEEP(201).EQ.1 !PANEL Version 1282 & .AND.KEEP(50).EQ.0 ! Unsymmetric 1283 & .AND.KEEP(251).NE.2 ! Store L to disk 1284 & ) THEN 1285 id%OOC_NB_FILE_TYPE=2 ! declared in MUMPS_OOC_COMMON 1286 ELSE 1287 id%OOC_NB_FILE_TYPE=1 ! declared in MUMPS_OOC_COMMON 1288 ENDIF 1289C ------------------------------ 1290C Dimension IO buffer, KEEP(100) 1291C ------------------------------ 1292 IF (KEEP(205) .GT. 0) THEN 1293 KEEP(100) = KEEP(205) 1294 ELSE 1295 IF (KEEP(201).EQ.1) THEN ! PANEL version 1296 I8TMP = int(id%OOC_NB_FILE_TYPE,8) * 1297 & 2_8 * int(KEEP(226),8) 1298 ELSE 1299 I8TMP = 2_8 * id%KEEP8(119) 1300 ENDIF 1301 I8TMP = I8TMP + int(max(KEEP(12),0),8) * 1302 & (I8TMP/100_8+1_8) 1303C we want to avoid too large IO buffers. 1304C 12M corresponds to 100Mbytes given to buffers. 1305 I8TMP = min(I8TMP, 12000000_8) 1306 KEEP(100)=int(I8TMP) 1307 ENDIF 1308 IF (KEEP(201).EQ.1) THEN 1309C Panel version. Force the use of a buffer. 1310 IF ( KEEP(99) < 3 ) THEN 1311 KEEP(99) = KEEP(99) + 3 1312 ENDIF 1313 ENDIF 1314C -------------------------- 1315C Reset KEEP(100) to 0 if no 1316C buffer is used for OOC. 1317C -------------------------- 1318 IF (KEEP(99) .LT.3) KEEP(100)=0 1319 IF((dble(KEEP(100))*dble(KEEP(35))/dble(2)).GT. 1320 & (dble(1999999999)))THEN 1321 IF (PROKG) THEN 1322 WRITE(MPG,*)id%MYID,': Warning: DIM_BUF_IO might be 1323 & too big for Filesystem' 1324 ENDIF 1325 ENDIF 1326 ALLOCATE (id%OOC_INODE_SEQUENCE(KEEP(28), 1327 & id%OOC_NB_FILE_TYPE), 1328 & stat=IERR) 1329 IF ( IERR .GT. 0 ) THEN 1330 id%INFO(1) = -13 1331 id%INFO(2) = id%OOC_NB_FILE_TYPE*KEEP(28) 1332 NULLIFY(id%OOC_INODE_SEQUENCE) 1333 GOTO 112 1334 ENDIF 1335 ALLOCATE (id%OOC_TOTAL_NB_NODES(id%OOC_NB_FILE_TYPE), 1336 & stat=IERR) 1337 IF ( IERR .GT. 0 ) THEN 1338 id%INFO(1) = -13 1339 id%INFO(2) = id%OOC_NB_FILE_TYPE 1340 NULLIFY(id%OOC_TOTAL_NB_NODES) 1341 GOTO 112 1342 ENDIF 1343 ALLOCATE (id%OOC_SIZE_OF_BLOCK(KEEP(28), 1344 & id%OOC_NB_FILE_TYPE), 1345 & stat=IERR) 1346 IF ( IERR .GT. 0 ) THEN 1347 id%INFO(1) = -13 1348 id%INFO(2) = id%OOC_NB_FILE_TYPE*KEEP(28) 1349 NULLIFY(id%OOC_SIZE_OF_BLOCK) 1350 GOTO 112 1351 ENDIF 1352 ALLOCATE (id%OOC_VADDR(KEEP(28),id%OOC_NB_FILE_TYPE), 1353 & stat=IERR) 1354 IF ( IERR .GT. 0 ) THEN 1355 id%INFO(1) = -13 1356 id%INFO(2) = id%OOC_NB_FILE_TYPE*KEEP(28) 1357 NULLIFY(id%OOC_VADDR) 1358 GOTO 112 1359 ENDIF 1360 ENDIF 1361 ENDIF 1362 112 CALL MUMPS_PROPINFO( ICNTL(1), id%INFO(1), 1363 & id%COMM, id%MYID ) 1364 IF (id%INFO(1) < 0) THEN 1365C LOAD_END must be done but not OOC_END_FACTO 1366 GOTO 513 1367 ENDIF 1368 IF (I_AM_SLAVE) THEN 1369 IF (KEEP(201) .GT. 0) THEN 1370 IF ((KEEP(201).EQ.1).OR.(KEEP(201).EQ.2)) THEN 1371 CALL ZMUMPS_OOC_INIT_FACTO(id,MAXS) 1372 ELSE 1373 WRITE(*,*) "Internal error in ZMUMPS_FAC_DRIVER" 1374 CALL MUMPS_ABORT() 1375 ENDIF 1376 IF(id%INFO(1).LT.0)THEN 1377 GOTO 111 1378 ENDIF 1379 ENDIF 1380#if ! defined(OLD_LOAD_MECHANISM) 1381C First increment corresponds to the number of 1382C floating-point operations for subtrees allocated 1383C to the local processor. 1384 CALL ZMUMPS_LOAD_UPDATE(0,.FALSE.,dble(id%COST_SUBTREES), 1385 & id%KEEP(1),id%KEEP8(1)) 1386#endif 1387 IF (id%INFO(1).LT.0) GOTO 111 1388 END IF 1389C ----------------------- 1390C Manage main workarray S 1391C ----------------------- 1392 IF ( associated (id%S) ) THEN 1393 DEALLOCATE(id%S) 1394 NULLIFY(id%S) 1395 id%KEEP8(23)=0_8 ! reset space allocated to zero 1396 ENDIF 1397#if defined (LARGEMATRICES) 1398 IF ( id%MYID .ne. MASTER ) THEN 1399#endif 1400 IF (.NOT.WK_USER_PROVIDED) THEN 1401 ALLOCATE (id%S(MAXS),stat=IERR) 1402 id%KEEP8(23) = MAXS 1403 IF ( IERR .GT. 0 ) THEN 1404 id%INFO(1) = -13 1405 CALL MUMPS_SET_IERROR(MAXS, id%INFO(2)) 1406C On some platforms (IBM for example), an 1407C allocation failure returns a non-null pointer. 1408C Therefore we nullify S 1409 NULLIFY(id%S) 1410 id%KEEP8(23)=0_8 1411 ENDIF 1412 ELSE 1413 id%S => id%WK_USER(1:id%KEEP8(24)) 1414 ENDIF 1415#if defined (LARGEMATRICES) 1416 END IF 1417#endif 1418C 1419 111 CALL MUMPS_PROPINFO( ICNTL(1), id%INFO(1), 1420 & id%COMM, id%MYID ) 1421 IF ( id%INFO(1).LT.0 ) GOTO 514 1422C -------------------------- 1423C Initialization of modules 1424C related to data management 1425C -------------------------- 1426 NB_ACTIVE_FRONTS_ESTIM = 3 1427 IF (I_AM_SLAVE) THEN 1428 CALL MUMPS_FDM_INIT('A',NB_ACTIVE_FRONTS_ESTIM, id%INFO) 1429 CALL MUMPS_FDM_INIT('F',NB_ACTIVE_FRONTS_ESTIM, id%INFO ) 1430 IF (id%INFO(1) .LT. 0 ) GOTO 114 1431#if ! defined(NO_FDM_DESCBAND) 1432C Storage of DESCBAND information 1433 CALL MUMPS_FDBD_INIT( NB_ACTIVE_FRONTS_ESTIM, id%INFO ) 1434#endif 1435#if ! defined(NO_FDM_MAPROW) 1436C Storage of MAPROW and ROOT2SON information 1437 CALL MUMPS_FMRD_INIT( NB_ACTIVE_FRONTS_ESTIM, id%INFO ) 1438#endif 1439 CALL ZMUMPS_BLR_INIT_MODULE( NB_ACTIVE_FRONTS_ESTIM, id%INFO ) 1440 114 CONTINUE 1441 ENDIF 1442 CALL MUMPS_PROPINFO( ICNTL(1), id%INFO(1), 1443 & id%COMM, id%MYID ) 1444C GOTO 500: one of the above module initializations failed 1445 IF ( id%INFO(1).LT.0 ) GOTO 500 1446C 1447C 1448C Allocate space for matrix in arrowhead 1449C ====================================== 1450C 1451C CASE 1 : Matrix is assembled 1452C CASE 2 : Matrix is elemental 1453C 1454 IF ( KEEP(55) .eq. 0 ) THEN 1455C ------------------------------------ 1456C Space has been allocated already for 1457C the integer part during analysis 1458C Only slaves need the arrowheads. 1459C ------------------------------------ 1460 IF (associated( id%DBLARR)) THEN 1461 DEALLOCATE(id%DBLARR) 1462 NULLIFY(id%DBLARR) 1463 ENDIF 1464 IF ( I_AM_SLAVE .and. id%KEEP8(26) .ne. 0_8 ) THEN 1465 ALLOCATE( id%DBLARR( id%KEEP8(26) ), stat = IERR ) 1466 ELSE 1467 ALLOCATE( id%DBLARR( 1 ), stat =IERR ) 1468 END IF 1469 IF ( IERR .NE. 0 ) THEN 1470 IF (LPOK) THEN 1471 WRITE(LP,*) id%MYID, 1472 & ': Allocation error for DBLARR(',id%KEEP8(26),')' 1473 ENDIF 1474 id%INFO(1)=-13 1475 CALL MUMPS_SET_IERROR(id%KEEP8(26), id%INFO(2)) 1476 NULLIFY(id%DBLARR) 1477 GOTO 100 1478 END IF 1479 ELSE 1480C ---------------------------------------- 1481C Allocate variable lists. Systematically. 1482C ---------------------------------------- 1483 IF ( associated( id%INTARR ) ) THEN 1484 DEALLOCATE( id%INTARR ) 1485 NULLIFY( id%INTARR ) 1486 END IF 1487 IF ( I_AM_SLAVE .and. id%KEEP8(27) .ne. 0_8 ) THEN 1488 ALLOCATE( id%INTARR( id%KEEP8(27) ), stat = allocok ) 1489 IF ( allocok .GT. 0 ) THEN 1490 id%INFO(1) = -13 1491 CALL MUMPS_SET_IERROR(id%KEEP8(27), id%INFO(2)) 1492 NULLIFY(id%INTARR) 1493 GOTO 100 1494 END IF 1495 ELSE 1496 ALLOCATE( id%INTARR(1),stat=allocok ) 1497 IF ( allocok .GT. 0 ) THEN 1498 id%INFO(1) = -13 1499 id%INFO(2) = 1 1500 NULLIFY(id%INTARR) 1501 GOTO 100 1502 END IF 1503 END IF 1504C ----------------------------- 1505C Allocate real values. 1506C On master, if hybrid host and 1507C no scaling, avoid the copy. 1508C ----------------------------- 1509 IF (associated( id%DBLARR)) THEN 1510 DEALLOCATE(id%DBLARR) 1511 NULLIFY(id%DBLARR) 1512 ENDIF 1513 IF ( I_AM_SLAVE ) THEN 1514 IF ( id%MYID_NODES .eq. MASTER 1515 & .AND. KEEP(46) .eq. 1 1516 & .AND. KEEP(52) .eq. 0 ) THEN 1517C -------------------------- 1518C Simple pointer association 1519C -------------------------- 1520 id%DBLARR => id%A_ELT 1521 ELSE 1522C ---------- 1523C Allocation 1524C ---------- 1525 IF ( id%KEEP8(26) .ne. 0_8 ) THEN 1526 ALLOCATE( id%DBLARR( id%KEEP8(26) ), stat = allocok ) 1527 IF ( allocok .GT. 0 ) THEN 1528 id%INFO(1) = -13 1529 CALL MUMPS_SET_IERROR(id%KEEP8(26), id%INFO(2)) 1530 NULLIFY(id%DBLARR) 1531 GOTO 100 1532 END IF 1533 ELSE 1534 ALLOCATE( id%DBLARR(1), stat = allocok ) 1535 IF ( allocok .GT. 0 ) THEN 1536 id%INFO(1) = -13 1537 id%INFO(2) = 1 1538 NULLIFY(id%DBLARR) 1539 GOTO 100 1540 END IF 1541 END IF 1542 END IF 1543 ELSE 1544 ALLOCATE( id%DBLARR(1), stat = allocok ) 1545 IF ( allocok .GT. 0 ) THEN 1546 id%INFO(1) = -13 1547 id%INFO(2) = 1 1548 NULLIFY(id%DBLARR) 1549 GOTO 100 1550 END IF 1551 END IF 1552 END IF 1553C ----------------- 1554C Also prepare some 1555C data for the root 1556C ----------------- 1557 IF ( KEEP(38).NE.0 .AND. I_AM_SLAVE ) THEN 1558 CALL ZMUMPS_INIT_ROOT_FAC( id%N, 1559 & id%root, id%FILS(1), KEEP(38), id%KEEP(1), id%INFO(1) ) 1560 END IF 1561C 1562C 1563 100 CONTINUE 1564C ---------------- 1565C Check for errors 1566C ---------------- 1567 CALL MUMPS_PROPINFO( id%ICNTL(1), id%INFO(1), 1568 & id%COMM, id%MYID ) 1569 IF ( id%INFO(1).LT.0 ) GOTO 500 1570C 1571C ----------------------------------- 1572C 1573C DISTRIBUTION OF THE ORIGINAL MATRIX 1574C 1575C ----------------------------------- 1576C 1577C TIMINGS: computed (and printed) on the host 1578C Next line: global time for distrib(arrowheads,elts) 1579C on the host. Synchronization has been performed. 1580 IF (id%MYID.EQ.MASTER) CALL MUMPS_SECDEB(TIME) 1581C 1582 IF ( KEEP( 55 ) .eq. 0 ) THEN 1583C ---------------------------- 1584C Original matrix is assembled 1585C Arrowhead format to be used. 1586C ---------------------------- 1587C KEEP8(26) and KEEP8(27) hold the number of entries for real/integer 1588C for the matrix in arrowhead format. They have been set by the 1589C analysis phase (ZMUMPS_ANA_F and ZMUMPS_ANA_G) 1590C 1591C ------------------------------------------------------------------ 1592C Blocking is used for sending arrowhead records (I,J,VAL) 1593C buffer(1) is used to store number of bytes already packed 1594C buffer(2) number of records already packed 1595C KEEP(39) : Number of records (blocking factor) 1596C ------------------------------------------------------------------ 1597C 1598C ---------------------------------------- 1599C In case of parallel root compute minimum 1600C size of workspace to receive arrowheads 1601C of root node. Will be used to check that 1602C MAXS is large enough 1603C ---------------------------------------- 1604 IF (KEEP(38).NE.0 .AND. I_AM_SLAVE) THEN 1605 LWK = int(numroc( id%root%ROOT_SIZE, id%root%MBLOCK, 1606 & id%root%MYROW, 0, id%root%NPROW ),8) 1607 LWK = max( 1_8, LWK ) 1608 LWK = LWK* 1609 & int(numroc( id%root%ROOT_SIZE, id%root%NBLOCK, 1610 & id%root%MYCOL, 0, id%root%NPCOL ),8) 1611 LWK = max( 1_8, LWK ) 1612 ELSE 1613 LWK = 1_8 1614 ENDIF 1615C MAXS must be at least 1, and in case of 1616C parallel root, large enough to receive 1617C arrowheads of root. 1618 IF (MAXS .LT. int(LWK,8)) THEN 1619 id%INFO(1) = -9 1620 CALL MUMPS_SET_IERROR(LWK, id%INFO(2)) 1621 ENDIF 1622 CALL MUMPS_PROPINFO( id%ICNTL(1), id%INFO(1), 1623 & id%COMM, id%MYID ) 1624 IF ( id%INFO(1).LT.0 ) GOTO 500 1625 IF ( KEEP(54) .eq. 0 ) THEN 1626C ================================================ 1627C FIRST CASE : MATRIX IS NOT INITIALLY DISTRIBUTED 1628C ================================================ 1629C A small integer workspace is needed to 1630C send the arrowheads. 1631 IF ( id%MYID .eq. MASTER ) THEN 1632 ALLOCATE(IWK(id%N), stat=allocok) 1633 IF ( allocok .NE. 0 ) THEN 1634 id%INFO(1)=-13 1635 id%INFO(2)=id%N 1636 END IF 1637#if defined(LARGEMATRICES) 1638 IF ( associated (id%S) ) THEN 1639 DEALLOCATE(id%S) 1640 NULLIFY(id%S) 1641 id%KEEP8(23)=0_8 1642 ENDIF 1643 ALLOCATE (WK(LWK),stat=IERR) 1644 IF ( IERR .GT. 0 ) THEN 1645 id%INFO(1) = -13 1646 CALL MUMPS_SET_IERROR(LWK, id%INFO(2)) 1647 write(6,*) ' PB1 ALLOC LARGEMAT' 1648 ENDIF 1649#endif 1650 ENDIF 1651 CALL MUMPS_PROPINFO( id%ICNTL(1), id%INFO(1), 1652 & id%COMM, id%MYID ) 1653 IF ( id%INFO(1).LT.0 ) GOTO 500 1654 IF ( id%MYID .eq. MASTER ) THEN 1655C 1656C -------------------------------- 1657C MASTER sends arowheads using the 1658C global communicator with ranks 1659C also in global communicator 1660C IWK is used as temporary 1661C workspace of size N. 1662C -------------------------------- 1663 IF ( .not. associated( id%INTARR ) ) THEN 1664 ALLOCATE( id%INTARR( 1 ) ) 1665 ENDIF 1666 NBRECORDS = KEEP(39) 1667 IF (id%KEEP8(28) .LT. int(NBRECORDS,8)) THEN 1668 NBRECORDS = int(id%KEEP8(28)) 1669 ENDIF 1670#if defined(LARGEMATRICES) 1671 CALL ZMUMPS_FACTO_SEND_ARROWHEADS(id%N, id%KEEP8(28), id%A(1), 1672 & id%IRN(1), id%JCN(1), id%SYM_PERM(1), 1673 & LSCAL, id%COLSCA(1), id%ROWSCA(1), 1674 & id%MYID, id%NSLAVES, id%PROCNODE_STEPS(1), 1675 & NBRECORDS, 1676 & LP, id%COMM, id%root, KEEP,id%KEEP8, 1677 & id%FILS(1), IWK(1), ! workspace of size N 1678 & 1679 & id%INTARR(1), id%KEEP8(27), id%DBLARR(1), id%KEEP8(26), 1680 & id%PTRAR(1), id%PTRAR(id%N+1), 1681 & id%FRERE_STEPS(1), id%STEP(1), WK(1), LWK, 1682 & id%ISTEP_TO_INIV2, id%I_AM_CAND, 1683 & id%CANDIDATES) 1684C 1685C write(6,*) '!!! A,IRN,JCN are freed during factorization ' 1686 DEALLOCATE (id%A) 1687 NULLIFY(id%A) 1688 DEALLOCATE (id%IRN) 1689 NULLIFY (id%IRN) 1690 DEALLOCATE (id%JCN) 1691 NULLIFY (id%JCN) 1692 IF (.NOT.WK_USER_PROVIDED) THEN 1693 ALLOCATE (id%S(MAXS),stat=IERR) 1694 id%KEEP8(23) = MAXS 1695 IF ( IERR .GT. 0 ) THEN 1696 id%INFO(1) = -13 1697 id%INFO(2) = MAXS 1698 NULLIFY(id%S) 1699 id%KEEP8(23)=0_8 1700 write(6,*) ' PB2 ALLOC LARGEMAT',MAXS 1701 CALL MUMPS_ABORT() 1702 ENDIF 1703 ELSE 1704 id%S => id%WK_USER(1:id%KEEP8(24)) 1705 ENDIF 1706 id%S(MAXS-LWK+1_8:MAXS) = WK(1_8:LWK) 1707 DEALLOCATE (WK) 1708#else 1709 CALL ZMUMPS_FACTO_SEND_ARROWHEADS(id%N, id%KEEP8(28), id%A(1), 1710 & id%IRN(1), id%JCN(1), id%SYM_PERM(1), 1711 & LSCAL, id%COLSCA(1), id%ROWSCA(1), 1712 & id%MYID, id%NSLAVES, id%PROCNODE_STEPS(1), 1713 & NBRECORDS, 1714 & LP, id%COMM, id%root, KEEP(1),id%KEEP8(1), 1715 & id%FILS(1), IWK(1), 1716 & 1717 & id%INTARR(1), id%KEEP8(27), id%DBLARR(1), id%KEEP8(26), 1718 & id%PTRAR(1), id%PTRAR(id%N+1), 1719 & id%FRERE_STEPS(1), id%STEP(1), id%S(1), MAXS, 1720 & id%ISTEP_TO_INIV2(1), id%I_AM_CAND(1), 1721 & id%CANDIDATES(1,1) ) 1722#endif 1723 DEALLOCATE(IWK) 1724 ELSE 1725 NBRECORDS = KEEP(39) 1726 IF (id%KEEP8(28) .LT. int(NBRECORDS,8)) THEN 1727 NBRECORDS = int(id%KEEP8(28)) 1728 ENDIF 1729 CALL ZMUMPS_FACTO_RECV_ARROWHD2( id%N, 1730 & id%DBLARR(1), id%KEEP8(26), 1731 & id%INTARR(1), id%KEEP8(27), 1732 & id%PTRAR( 1 ), 1733 & id%PTRAR(id%N+1), 1734 & KEEP( 1 ), id%KEEP8(1), id%MYID, id%COMM, 1735 & NBRECORDS, 1736 & 1737 & id%S(1), MAXS, 1738 & id%root, 1739 & id%PROCNODE_STEPS(1), id%NSLAVES, 1740 & id%SYM_PERM(1), id%FRERE_STEPS(1), id%STEP(1), 1741 & id%INFO(1), id%INFO(2) ) 1742 ENDIF 1743 ELSE 1744C 1745C ============================================= 1746C SECOND CASE : MATRIX IS INITIALLY DISTRIBUTED 1747C ============================================= 1748C Timing on master. 1749 IF (id%MYID.EQ.MASTER) THEN 1750 CALL MUMPS_SECDEB(TIME) 1751 END IF 1752 IF ( I_AM_SLAVE ) THEN 1753C --------------------------------------------------- 1754C In order to have possibly IRN_loc/JCN_loc/A_loc 1755C of size 0, avoid to pass them inside REDISTRIBUTION 1756C and pass id instead 1757C NZ_locMAX8 gives as a maximum buffer size (send/recv) used 1758C an upper bound to limit buffers on small matrices 1759C --------------------------------------------------- 1760 CALL MPI_ALLREDUCE(id%KEEP8(29), NZ_locMAX8, 1, MPI_INTEGER8, 1761 & MPI_MAX, id%COMM_NODES, IERR) 1762 NBRECORDS = KEEP(39) 1763 IF (NZ_locMAX8 .LT. int(NBRECORDS,8)) THEN 1764 NBRECORDS = int(NZ_locMAX8) 1765 ENDIF 1766 CALL ZMUMPS_REDISTRIBUTION( id%N, 1767 & id%KEEP8(29), 1768 & id, 1769 & id%DBLARR(1), id%KEEP8(26), id%INTARR(1), 1770 & id%KEEP8(27), id%PTRAR(1), id%PTRAR(id%N+1), 1771 & KEEP(1), id%KEEP8(1), id%MYID_NODES, 1772 & id%COMM_NODES, NBRECORDS, 1773 & id%S(1), MAXS, id%root, id%PROCNODE_STEPS(1), 1774 & id%NSLAVES, id%SYM_PERM(1), id%STEP(1), 1775 & id%ICNTL(1), id%INFO(1), NSEND8, NLOCAL8, 1776 & id%ISTEP_TO_INIV2(1), 1777 & id%CANDIDATES(1,1) ) 1778 IF ( ( KEEP(52).EQ.7 ).OR. (KEEP(52).EQ.8) ) THEN 1779C ------------------------------------------------- 1780C In that case, scaling arrays have been allocated 1781C on all processors. They were useful for matrix 1782C distribution. But we now really only need them 1783C on the host. In case of distributed solution, we 1784C will have to broadcast either ROWSCA or COLSCA 1785C (depending on MTYPE) but this is done later. 1786C 1787C In other words, on exit from the factorization, 1788C we want to have scaling arrays available only 1789C on the host. 1790C ------------------------------------------------- 1791 IF ( id%MYID > 0 ) THEN 1792 IF (associated(id%ROWSCA)) THEN 1793 DEALLOCATE(id%ROWSCA) 1794 NULLIFY(id%ROWSCA) 1795 ENDIF 1796 IF (associated(id%COLSCA)) THEN 1797 DEALLOCATE(id%COLSCA) 1798 NULLIFY(id%COLSCA) 1799 ENDIF 1800 ENDIF 1801 ENDIF 1802#if defined(LARGEMATRICES) 1803C deallocate id%IRN_loc, id%JCN(loc) to free extra space 1804C Note that in this case IRN_loc cannot be used 1805C anymore during the solve phase for IR and Error analysis. 1806 IF (associated(id%IRN_loc)) THEN 1807 DEALLOCATE(id%IRN_loc) 1808 NULLIFY(id%IRN_loc) 1809 ENDIF 1810 IF (associated(id%JCN_loc)) THEN 1811 DEALLOCATE(id%JCN_loc) 1812 NULLIFY(id%JCN_loc) 1813 ENDIF 1814 IF (associated(id%A_loc)) THEN 1815 DEALLOCATE(id%A_loc) 1816 NULLIFY(id%A_loc) 1817 ENDIF 1818 write(6,*) ' Warning :', 1819 & ' id%A_loc, IRN_loc, JCN_loc deallocated !!! ' 1820#endif 1821 IF (PROK) THEN 1822 WRITE(MP,120) NLOCAL8, NSEND8 1823 END IF 1824 END IF 1825 IF ( KEEP(46) .eq. 0 .AND. id%MYID.eq.MASTER ) THEN 1826C ------------------------------ 1827C The host is not working -> had 1828C no data from initial matrix 1829C ------------------------------ 1830 NSEND8 = 0_8 1831 NLOCAL8 = 0_8 1832 END IF 1833C -------------------------- 1834C Put into some info/infog ? 1835C -------------------------- 1836 CALL MPI_REDUCE( NSEND8, NSEND_TOT8, 1, MPI_INTEGER8, 1837 & MPI_SUM, MASTER, id%COMM, IERR ) 1838 CALL MPI_REDUCE( NLOCAL8, NLOCAL_TOT8, 1, MPI_INTEGER8, 1839 & MPI_SUM, MASTER, id%COMM, IERR ) 1840 IF ( PROKG ) THEN 1841 WRITE(MPG,125) NLOCAL_TOT8, NSEND_TOT8 1842 END IF 1843C 1844C ------------------------- 1845C Check for possible errors 1846C ------------------------- 1847 CALL MUMPS_PROPINFO( ICNTL(1), id%INFO(1), 1848 & id%COMM, id%MYID ) 1849 IF ( id%INFO( 1 ) .LT. 0 ) GOTO 500 1850C 1851 ENDIF 1852 ELSE 1853C ------------------- 1854C Matrix is elemental, 1855C provided on the 1856C master only 1857C ------------------- 1858 IF ( id%MYID.eq.MASTER) 1859 & CALL ZMUMPS_MAXELT_SIZE( id%ELTPTR(1), 1860 & id%NELT, 1861 & MAXELT_SIZE ) 1862C 1863C Perform the distribution of the elements. 1864C A this point, 1865C PTRAIW/PTRARW have been computed. 1866C INTARR/DBLARR have been allocated 1867C ELTPROC gives the mapping of elements 1868C 1869 CALL ZMUMPS_ELT_DISTRIB( id%N, id%NELT, id%KEEP8(30), 1870 & id%COMM, id%MYID, 1871 & id%NSLAVES, id%PTRAR(1), 1872 & id%PTRAR(id%NELT+2), 1873 & id%INTARR(1), id%DBLARR(1), id%KEEP8(27), id%KEEP8(26), 1874 & id%KEEP(1), id%KEEP8(1), MAXELT_SIZE, 1875 & id%FRTPTR(1), id%FRTELT(1), 1876 & id%S(1), MAXS, id%FILS(1), 1877 & id, id%root ) 1878C ---------------- 1879C Broadcast errors 1880C ---------------- 1881 CALL MUMPS_PROPINFO( ICNTL(1), id%INFO(1), 1882 & id%COMM, id%MYID ) 1883 IF ( id%INFO( 1 ) .LT. 0 ) GOTO 500 1884 END IF ! Element entry 1885C ------------------------ 1886C Time the redistribution: 1887C ------------------------ 1888 IF ( id%MYID.EQ.MASTER) THEN 1889 CALL MUMPS_SECFIN(TIME) 1890 id%DKEEP(93) = TIME 1891 IF (PROKG) WRITE(MPG,160) TIME 1892 END IF 1893C 1894C TIMINGS: 1895C Next line: elapsed time for factorization 1896 IF (id%MYID.EQ.MASTER) CALL MUMPS_SECDEB(TIME) 1897C 1898C Allocate buffers on the slaves 1899C ============================== 1900C 1901 IF ( I_AM_SLAVE ) THEN 1902 CALL ZMUMPS_BUF_INI_MYID(id%MYID_NODES) 1903C 1904C Some buffers are required to pack/unpack data and for 1905C receiving MPI messages. 1906C For packing/unpacking : the buffer must be large 1907C enough to send several messages while receives might not 1908C be posted yet. 1909C It is assumed that the size of an integer is held in KEEP(34) 1910C while the size of a complex is held in KEEP(35). 1911C BUFR and LBUFR are declared integers, since byte is not 1912C a standard datatype. 1913C We now use KEEP(43) and KEEP(44) as estimated at analysis 1914C to allocate appropriate buffer sizes. 1915C 1916C Reception buffer 1917C ---------------- 1918 ZMUMPS_LBUFR_BYTES8 = int(KEEP( 44 ),8) * int(KEEP( 35 ), 8) 1919C ------------------- 1920C Ensure a reasonable 1921C buffer size 1922C ------------------- 1923 ZMUMPS_LBUFR_BYTES8 = max( ZMUMPS_LBUFR_BYTES8, 1924 & 100000_8 ) 1925C 1926C If there is pivoting, size of the message might still increase. 1927C We use a relaxation (so called PERLU) to increase the estimate. 1928C 1929C Note: PERLU is a global estimate for pivoting. 1930C It may happen that one large contribution block size is increased by more than that. 1931C This is why we use an extra factor 2 relaxation coefficient for the relaxation of 1932C the reception buffer in the case where pivoting is allowed. 1933C A more dynamic strategy could be applied: if message to 1934C be received is larger than expected, reallocate a larger 1935C buffer. (But this won't work with IRECV.) 1936C Finally, one may want (as we are currently doing it for moste messages) 1937C to cut large messages into a series of smaller ones. 1938C 1939 PERLU = KEEP( 12 ) 1940C For hybrid scheduling (strategy 5), Abdou 1941C wants a minimal amount of freedom even for 1942C small/negative PERLU values. 1943C 1944 IF (KEEP(48).EQ.5) THEN 1945 MIN_PERLU=2 1946 ELSE 1947 MIN_PERLU=0 1948 ENDIF 1949C 1950 ZMUMPS_LBUFR_BYTES8 = ZMUMPS_LBUFR_BYTES8 1951 & + int( 2.0D0 * dble(max(PERLU,MIN_PERLU))* 1952 & dble(ZMUMPS_LBUFR_BYTES8)/100D0, 8) 1953 ZMUMPS_LBUFR_BYTES8 = min(ZMUMPS_LBUFR_BYTES8, 1954 & int(huge (KEEP(43))-100,8)) 1955 ZMUMPS_LBUFR_BYTES = int( ZMUMPS_LBUFR_BYTES8 ) 1956 IF (KEEP(48)==5) THEN 1957C Since the buffer is allocated, use 1958C it as the constraint for memory/granularity 1959C in hybrid scheduler 1960C 1961 id%KEEP8(21) = id%KEEP8(22) + 1962 & int( dble(max(PERLU,MIN_PERLU))* 1963 & dble(id%KEEP8(22))/100D0,8) 1964 ENDIF 1965C 1966C Now estimate the size for the buffer for asynchronous 1967C sends of contribution blocks (so called CB). We want to be able to send at 1968C least KEEP(213)/100 (two in general) messages at the 1969C same time. 1970C 1971C Send buffer 1972C ----------- 1973 ZMUMPS_LBUF8 = int( dble(KEEP(213)) / 100.0D0 * 1974 & dble(KEEP(43)) * dble(KEEP(35)), 8 ) 1975 ZMUMPS_LBUF8 = max( ZMUMPS_LBUF8, 100000_8 ) 1976 ZMUMPS_LBUF8 = ZMUMPS_LBUF8 1977 & + int( 2.0D0 * dble(max(PERLU,MIN_PERLU))* 1978 & dble(ZMUMPS_LBUF8)/100D0, 8) 1979C Make ZMUMPS_LBUF8 small enough to be stored in a standard integer 1980 ZMUMPS_LBUF8 = min(ZMUMPS_LBUF8, int(huge (KEEP(43))-100,8)) 1981C 1982C No reason to have send buffer smaller than receive buffer. 1983C This should never occur with the formulas above but just 1984C in case: 1985 ZMUMPS_LBUF8 = max(ZMUMPS_LBUF8, ZMUMPS_LBUFR_BYTES8+3*KEEP(34)) 1986 ZMUMPS_LBUF = int(ZMUMPS_LBUF8) 1987 IF(id%KEEP(48).EQ.4)THEN 1988 ZMUMPS_LBUFR_BYTES=ZMUMPS_LBUFR_BYTES*5 1989 ZMUMPS_LBUF=ZMUMPS_LBUF*5 1990 ENDIF 1991C 1992C Estimate size of buffer for small messages 1993C Each node can send ( NSLAVES - 1 ) messages to (NSLAVES-1) nodes 1994C 1995C KEEP(56) is the number of nodes of level II. 1996C Messages will be sent for the symmetric case 1997C for synchronisation issues. 1998C 1999C We take an upperbound 2000C 2001 ZMUMPS_LBUF_INT = ( KEEP(56) + id%NSLAVES * id%NSLAVES ) * 5 2002 & * KEEP(34) 2003 IF ( KEEP( 38 ) .NE. 0 ) THEN 2004C 2005C 2006 KKKK = MUMPS_PROCNODE( id%PROCNODE_STEPS(id%STEP(KEEP(38))), 2007 & id%NSLAVES ) 2008 IF ( KKKK .EQ. id%MYID_NODES ) THEN 2009 ZMUMPS_LBUF_INT = ZMUMPS_LBUF_INT + 4 * KEEP(34) * 2010 & ( id%NSLAVES + id%NE_STEPS(id%STEP(KEEP(38))) 2011 & + min(KEEP(56), id%NE_STEPS(id%STEP(KEEP(38)))) * id%NSLAVES 2012 & ) 2013 END IF 2014 END IF 2015 IF ( PROK ) THEN 2016 WRITE( MP, 9999 ) ZMUMPS_LBUFR_BYTES, 2017 & ZMUMPS_LBUF, ZMUMPS_LBUF_INT 2018 END IF 2019 9999 FORMAT( /,' Allocated buffers',/,' ------------------',/, 2020 & ' Size of reception buffer in bytes ...... = ', I10, 2021 & /, 2022 & ' Size of async. emission buffer (bytes).. = ', I10,/, 2023 & ' Small emission buffer (bytes) .......... = ', I10) 2024C --------------------------- 2025C Allocate the 2 send buffers 2026C --------------------------- 2027 CALL ZMUMPS_BUF_ALLOC_SMALL_BUF( ZMUMPS_LBUF_INT, IERR ) 2028 IF ( IERR .NE. 0 ) THEN 2029 id%INFO(1)= -13 2030C convert to size in integer id%INFO(2)= ZMUMPS_LBUF_INT 2031 id%INFO(2)= (ZMUMPS_LBUF_INT+KEEP(34)-1)/KEEP(34) 2032 IF (LPOK) THEN 2033 WRITE(LP,*) id%MYID, 2034 & ':Allocation error in ZMUMPS_BUF_ALLOC_SMALL_BUF' 2035 & ,id%INFO(2) 2036 ENDIF 2037 GO TO 110 2038 END IF 2039 CALL ZMUMPS_BUF_ALLOC_CB( ZMUMPS_LBUF, IERR ) 2040 IF ( IERR .NE. 0 ) THEN 2041 id%INFO(1)= -13 2042C convert to size in integer id%INFO(2)= ZMUMPS_LBUF 2043 id%INFO(2)= (ZMUMPS_LBUF+KEEP(34)-1)/KEEP(34) 2044 IF (LPOK) THEN 2045 WRITE(LP,*) id%MYID, 2046 & ': Allocation error in ZMUMPS_BUF_ALLOC_CB' 2047 & ,id%INFO(2) 2048 ENDIF 2049 GO TO 110 2050 END IF 2051C ----------------------------- 2052C Allocate reception buffer and 2053C keep it in the structure 2054C ----------------------------- 2055 id%LBUFR_BYTES = ZMUMPS_LBUFR_BYTES 2056 id%LBUFR = (ZMUMPS_LBUFR_BYTES+KEEP(34)-1)/KEEP(34) 2057 IF (associated(id%BUFR)) DEALLOCATE(id%BUFR) 2058 ALLOCATE( id%BUFR( id%LBUFR ),stat=IERR ) 2059 IF ( IERR .NE. 0 ) THEN 2060 id%INFO(1)=-13 2061 id%INFO(2)=id%LBUFR 2062 NULLIFY(id%BUFR) 2063 IF (LPOK) THEN 2064 WRITE(LP,*) id%MYID, 2065 & ': Allocation error for id%BFUR(', id%LBUFR,')', IERR 2066 ENDIF 2067 GO TO 110 2068 END IF 2069C 2070C The buffers are declared INTEGER, because BYTE is not a 2071C standard data type. The sizes are in bytes, so we allocate 2072C a number of INTEGERs. The allocated size in integer is the 2073C size in bytes divided by KEEP(34) 2074C ------------------------------- 2075C Allocate IS. IS will contain 2076C factors and contribution blocks 2077C ------------------------------- 2078C Relax workspace at facto now 2079C PERLU might have been modified reload initial value 2080 PERLU = KEEP( 12 ) 2081 IF (KEEP(201).GT.0) THEN 2082C OOC panel or non panel (note that 2083C KEEP(15)=KEEP(225) if non panel) 2084 MAXIS_ESTIM = KEEP(225) 2085 ELSE 2086C In-core or reals for factors not stored 2087 MAXIS_ESTIM = KEEP(15) 2088 ENDIF 2089 MAXIS = max( 1, 2090 & MAXIS_ESTIM + 2 * max(PERLU,10) * 2091 & ( MAXIS_ESTIM / 100 + 1 ) 2092 & ) 2093 IF (associated(id%IS)) DEALLOCATE( id%IS ) 2094 ALLOCATE( id%IS( MAXIS ), stat = IERR ) 2095 IF ( IERR .NE. 0 ) THEN 2096 id%INFO(1)=-13 2097 id%INFO(2)=MAXIS 2098 NULLIFY(id%IS) 2099 IF (LPOK) THEN 2100 WRITE(*,*) id%MYID,': Allocation error for id%IS(',MAXIS,')' 2101 ENDIF 2102 GO TO 110 2103 END IF 2104 LIW = MAXIS 2105C ----------------------- 2106C Allocate PTLUST_S. PTLUST_S 2107C is used by solve later 2108C ----------------------- 2109 IF (associated( id%PTLUST_S )) DEALLOCATE(id%PTLUST_S) 2110 ALLOCATE( id%PTLUST_S( id%KEEP(28) ), stat = IERR ) 2111 IF ( IERR .NE. 0 ) THEN 2112 id%INFO(1)=-13 2113 id%INFO(2)=id%KEEP(28) 2114 IF (LPOK) THEN 2115 WRITE(LP,*) id%MYID, 2116 & ': Allocation error for id%PTLUST_S(', id%KEEP(28),')' 2117 ENDIF 2118 NULLIFY(id%PTLUST_S) 2119 GOTO 100 2120 END IF 2121 IF (associated( id%PTRFAC )) DEALLOCATE(id%PTRFAC) 2122 ALLOCATE( id%PTRFAC( id%KEEP(28) ), stat = IERR ) 2123 IF ( IERR .NE. 0 ) THEN 2124 id%INFO(1)=-13 2125 id%INFO(2)=id%KEEP(28) 2126 NULLIFY(id%PTRFAC) 2127 IF (LPOK) THEN 2128 WRITE(LP,*) id%MYID, 2129 & ': Allocation error for id%PTRFAC(', id%KEEP(28),')' 2130 ENDIF 2131 GOTO 100 2132 END IF 2133C ----------------------------- 2134C Reserve temporary workspace : 2135C IPOOL, PTRWB, ITLOC, PTRIST 2136C PTRWB will be subdivided again 2137C in routine ZMUMPS_FAC_B 2138C ----------------------------- 2139 PTRIST = 1 2140 PTRWB = PTRIST + id%KEEP(28) 2141 ITLOC = PTRWB + 3 * id%KEEP(28) 2142C Fwd in facto: ITLOC of size id%N + id%KEEP(253) 2143 IPOOL = ITLOC + id%N + id%KEEP(253) 2144C 2145C -------------------------------- 2146C NA(1) is an upperbound for LPOOL 2147C -------------------------------- 2148C Structure of the pool: 2149C ____________________________________________________ 2150C | Subtrees | | Top nodes | 1 2 3 | 2151C ---------------------------------------------------- 2152 LPOOL = MUMPS_GET_POOL_LENGTH(id%NA(1), id%KEEP(1),id%KEEP8(1)) 2153 ALLOCATE( IWK( IPOOL + LPOOL - 1 ), stat = IERR ) 2154 IF ( IERR .NE. 0 ) THEN 2155 id%INFO(1)=-13 2156 id%INFO(2)=IPOOL + LPOOL - 1 2157 IF (LPOK) THEN 2158 WRITE(LP,*) id%MYID, 2159 & ': Allocation error for IWK(',IPOOL+LPOOL-1,')' 2160 ENDIF 2161 GOTO 110 2162 END IF 2163 ALLOCATE(IWK8( 2 * id%KEEP(28)), stat = IERR) 2164 IF ( IERR .NE. 0 ) THEN 2165 id%INFO(1)=-13 2166 id%INFO(2)=2 * id%KEEP(28) 2167 IF (LPOK) THEN 2168 WRITE(LP,*) id%MYID, 2169 & ': Allocation error for IWKB(', 2*id%KEEP(28),')' 2170 ENDIF 2171 GOTO 110 2172 END IF 2173C 2174C Return to SPMD 2175C 2176 ENDIF 2177C 2178 110 CONTINUE 2179C ---------------- 2180C Broadcast errors 2181C ---------------- 2182 CALL MUMPS_PROPINFO( ICNTL(1), id%INFO(1), 2183 & id%COMM, id%MYID ) 2184 IF ( id%INFO( 1 ) .LT. 0 ) GOTO 500 2185C 2186 IF ( I_AM_SLAVE ) THEN 2187C 2188C Store size of receive buffers in module 2189 CALL ZMUMPS_BUF_DIST_IRECV_SIZE( id%LBUFR_BYTES ) 2190 IF (PROK) THEN 2191 WRITE( MP, 170 ) MAXS, MAXIS, id%KEEP8(12), KEEP(15), 2192 & id%KEEP8(26), id%KEEP8(27), id%KEEP8(11), KEEP(26), KEEP(27) 2193 ENDIF 2194 END IF 2195C 2196C SPMD 2197C 2198 PERLU_ON = .TRUE. 2199 CALL ZMUMPS_MAX_MEM( id%KEEP(1), id%KEEP8(1), 2200 & id%MYID, id%N, id%NELT, id%NA(1), id%LNA, id%KEEP8(28), 2201 & id%KEEP8(30), 2202 & id%NSLAVES, TOTAL_MBYTES, .FALSE., id%KEEP(201), 2203 & PERLU_ON, TOTAL_BYTES) 2204 id%INFO(16) = TOTAL_MBYTES 2205 IF ( PROK ) THEN 2206 WRITE(MP,'(A,I10) ') 2207 & ' ** Space in MBYTES used during factorization :', 2208 & id%INFO(16) 2209 END IF 2210C 2211C ---------------------------------------------------- 2212C Centralize memory statistics on the host 2213C id%INFOG(18) = size of mem in bytes for facto, 2214C for the processor using largest memory 2215C id%INFOG(19) = size of mem in bytes for facto, 2216C sum over all processors 2217C ---------------------------------------------------- 2218C 2219 CALL MUMPS_MEM_CENTRALIZE( id%MYID, id%COMM, 2220 & id%INFO(16), id%INFOG(18), IRANK ) 2221 IF ( PROKG ) THEN 2222 WRITE( MPG,'(A,I10) ') 2223 & ' ** Memory relaxation parameter ( ICNTL(14) ) :', 2224 & KEEP(12) 2225 WRITE( MPG,'(A,I10) ') 2226 & ' ** Rank of processor needing largest memory in facto :', 2227 & IRANK 2228 WRITE( MPG,'(A,I10) ') 2229 & ' ** Space in MBYTES used by this processor for facto :', 2230 & id%INFOG(18) 2231 IF ( KEEP(46) .eq. 0 ) THEN 2232 WRITE( MPG,'(A,I10) ') 2233 & ' ** Avg. Space in MBYTES per working proc during facto :', 2234 & ( id%INFOG(19)-id%INFO(16) ) / id%NSLAVES 2235 ELSE 2236 WRITE( MPG,'(A,I10) ') 2237 & ' ** Avg. Space in MBYTES per working proc during facto :', 2238 & id%INFOG(19) / id%NSLAVES 2239 END IF 2240 END IF 2241C -------------------------------------------- 2242C Before calling the main driver, ZMUMPS_FAC_B, 2243C some statistics should be initialized to 0, 2244C even on the host node because they will be 2245C used in REDUCE operations afterwards. 2246C -------------------------------------------- 2247C Size of factors written. It will be set to POSFAC in 2248C IC, otherwise we accumulate written factors in it. 2249 id%KEEP8(31)= 0_8 2250C Number of entries in factors 2251 id%KEEP8(10) = 0_8 2252C KEEP8(8) will hold the volume of extra copies due to 2253C in-place stacking in fac_mem_stack.F 2254 id%KEEP8(8)=0_8 2255 id%INFO(9:14)=0 2256 RINFO(2:3)=ZERO 2257 IF ( I_AM_SLAVE ) THEN 2258C ------------------------------------ 2259C Call effective factorization routine 2260C ------------------------------------ 2261 IF ( KEEP(55) .eq. 0 ) THEN 2262 LDPTRAR = id%N 2263 ELSE 2264 LDPTRAR = id%NELT + 1 2265 END IF 2266 IF ( id%KEEP(55) .NE. 0 ) THEN 2267 NELT_arg = id%NELT 2268 ELSE 2269C ------------------------------ 2270C Use size 1 to avoid complaints 2271C when using check bound options 2272C ------------------------------ 2273 NELT_arg = 1 2274 END IF 2275 CALL ZMUMPS_FAC_B( id%N, NSTEPS,id%S(1),MAXS,id%IS(1),LIW, 2276 & id%SYM_PERM(1),id%NA(1),id%LNA,id%NE_STEPS(1), 2277 & id%ND_STEPS(1),id%FILS(1),id%STEP(1),id%FRERE_STEPS(1), 2278 & id%DAD_STEPS(1),id%CANDIDATES(1,1),id%ISTEP_TO_INIV2(1), 2279 & id%TAB_POS_IN_PERE(1,1), 2280 & id%PTRAR(1), 2281 & LDPTRAR,IWK(PTRIST), 2282 & id%PTLUST_S(1), id%PTRFAC(1), IWK(PTRWB), IWK8, IWK(ITLOC), 2283 & RHS_MUMPS(1), IWK(IPOOL), LPOOL, CNTL1, ICNTL(1), id%INFO(1), 2284 & RINFO(1),KEEP(1),id%KEEP8(1), id%PROCNODE_STEPS(1), 2285 & id%NSLAVES, 2286 & id%COMM_NODES, id%MYID, id%MYID_NODES, id%BUFR(1),id%LBUFR, 2287 & id%LBUFR_BYTES, id%INTARR(1),id%DBLARR(1), id%root, NELT_arg, 2288 & id%FRTPTR(1), id%FRTELT(1),id%COMM_LOAD, id%ASS_IRECV,SEUIL, 2289 & SEUIL_LDLT_NIV2, id%MEM_DIST(0), id%DKEEP(1), 2290 & id%PIVNUL_LIST(1),LPN_LIST 2291 & ,id%LRGROUPS(1) 2292 & ) 2293 IF ( PROK .and. KEEP(38) .ne. 0 ) THEN 2294 WRITE( MP, 175 ) KEEP(49) 2295 END IF 2296C 2297C ------------------------------ 2298C Deallocate temporary workspace 2299C ------------------------------ 2300 DEALLOCATE( IWK ) 2301 DEALLOCATE( IWK8 ) 2302 ENDIF 2303C --------------------------------- 2304C Free some workspace corresponding 2305C to the original matrix in 2306C arrowhead or elemental format. 2307C ----- 2308C Note : INTARR was not allocated 2309C during factorization in the case 2310C of an assembled matrix. 2311C --------------------------------- 2312 IF ( KEEP(55) .eq. 0 ) THEN 2313C 2314C ---------------- 2315C Assembled matrix 2316C ---------------- 2317 IF (associated( id%DBLARR)) THEN 2318 DEALLOCATE(id%DBLARR) 2319 NULLIFY(id%DBLARR) 2320 ENDIF 2321C 2322 ELSE 2323C 2324C ---------------- 2325C Elemental matrix 2326C ---------------- 2327 DEALLOCATE( id%INTARR) 2328 NULLIFY( id%INTARR ) 2329C ------------------------------------ 2330C For the master from an hybrid host 2331C execution without scaling, then real 2332C values have not been copied ! 2333C ------------------------------------- 2334 IF ( id%MYID_NODES .eq. MASTER 2335 & .AND. KEEP(46) .eq. 1 2336 & .AND. KEEP(52) .eq. 0 ) THEN 2337 NULLIFY( id%DBLARR ) 2338 ELSE 2339C next line should be enough but ... 2340C DEALLOCATE( id%DBLARR ) 2341 IF (associated( id%DBLARR)) THEN 2342 DEALLOCATE(id%DBLARR) 2343 NULLIFY(id%DBLARR) 2344 ENDIF 2345 END IF 2346 END IF 2347C Memroy statistics 2348C ----------------------------------- 2349C If QR (Keep(19)) is not zero, and if 2350C the host does not have the information 2351C (ie is not slave), send information 2352C computed on the slaves during facto 2353C to the host. 2354C ----------------------------------- 2355 IF ( KEEP(19) .NE. 0 ) THEN 2356 IF ( KEEP(46) .NE. 1 ) THEN 2357C Host was not working during facto_root 2358C Send him the information 2359 IF ( id%MYID .eq. MASTER ) THEN 2360 CALL MPI_RECV( KEEP(17), 1, MPI_INTEGER, 1, DEFIC_TAG, 2361 & id%COMM, STATUS, IERR ) 2362 ELSE IF ( id%MYID .EQ. 1 ) THEN 2363 CALL MPI_SEND( KEEP(17), 1, MPI_INTEGER, 0, DEFIC_TAG, 2364 & id%COMM, IERR ) 2365 END IF 2366 END IF 2367 END IF 2368C --------------------------- 2369C Deallocate send buffers 2370C They will be reallocated 2371C in the solve. 2372C ------------------------ 2373 IF (associated(id%BUFR)) THEN 2374 DEALLOCATE(id%BUFR) 2375 NULLIFY(id%BUFR) 2376 END IF 2377 CALL ZMUMPS_BUF_DEALL_CB( IERR ) 2378 CALL ZMUMPS_BUF_DEALL_SMALL_BUF( IERR ) 2379C//PIV 2380 IF (KEEP(219).NE.0) THEN 2381 CALL ZMUMPS_BUF_DEALL_MAX_ARRAY() 2382 ENDIF 2383C 2384C Check for errors, 2385C every slave is aware of an error. 2386C If master is included in computations, the call below should 2387C not be necessary. 2388 CALL MUMPS_PROPINFO( ICNTL(1), id%INFO(1), 2389 & id%COMM, id%MYID ) 2390C 2391 CALL ZMUMPS_EXTRACT_SCHUR_REDRHS(id) 2392 IF (KEEP(201) .GT. 0) THEN 2393 IF ((KEEP(201).EQ.1) .OR. (KEEP(201).EQ.2)) THEN 2394 IF ( I_AM_SLAVE ) THEN 2395 CALL ZMUMPS_OOC_CLEAN_PENDING(IERR) 2396 IF(IERR.LT.0)THEN 2397 id%INFO(1)=IERR 2398 id%INFO(2)=0 2399 ENDIF 2400 ENDIF 2401 CALL MUMPS_PROPINFO( id%ICNTL(1), id%INFO(1), 2402 & id%COMM, id%MYID ) 2403C We want to collect statistics even in case of 2404C error to understand if it is due to numerical 2405C issues 2406CC IF ( id%INFO(1) < 0 ) GOTO 500 2407 END IF 2408 END IF 2409 IF (id%MYID.EQ.MASTER) THEN 2410 CALL MUMPS_SECFIN(TIME) 2411 id%DKEEP(94)=TIME 2412 IF ( PROKG ) THEN 2413 IF (id%INFO(1) .GE.0) THEN 2414 WRITE(MPG,180) TIME 2415 ELSE 2416 WRITE(MPG,185) TIME 2417 ENDIF 2418 ENDIF 2419 ENDIF 2420CC Made available to users on release 4.4 (April 2005) 2421 PERLU_ON = .TRUE. 2422 CALL ZMUMPS_MAX_MEM( id%KEEP(1),id%KEEP8(1), 2423 & id%MYID, N, id%NELT, id%NA(1), id%LNA, id%KEEP8(28), 2424 & id%KEEP8(30), 2425 & id%NSLAVES, TOTAL_MBYTES, .TRUE., id%KEEP(201), 2426 & PERLU_ON, TOTAL_BYTES) 2427 id%KEEP8(7) = TOTAL_BYTES 2428C -- INFO(22) holds the effective space (in Mbytes) used by MUMPS 2429C -- (it includes part of WK_USER used if provided by user) 2430 id%INFO(22) = TOTAL_MBYTES 2431 IF (PROK ) THEN 2432 WRITE(MP,'(A,I10) ') 2433 & ' ** Effective minimum Space in MBYTES for facto :', 2434 & TOTAL_MBYTES 2435 ENDIF 2436C 2437 IF (I_AM_SLAVE) THEN 2438 K67 = id%KEEP8(67) 2439 K68 = id%KEEP8(68) 2440 K69 = id%KEEP8(69) 2441 ELSE 2442 K67 = 0_8 2443 K68 = 0_8 2444 K69 = 0_8 2445 ENDIF 2446C -- Save the number of entries effectively used 2447C in main working array S 2448 CALL MUMPS_SETI8TOI4(K67,id%INFO(21)) 2449C 2450 CALL ZMUMPS_AVGMAX_STAT8(PROKG, MPG, K67, id%NSLAVES, 2451 & id%COMM, "effective space used in S (KEEP8(67)) =") 2452C 2453C ---------------------------------------------------- 2454C Centralize memory statistics on the host 2455C 2456C INFOG(21) = size of mem (Mbytes) for facto, 2457C for the processor using largest memory 2458C INFOG(22) = size of mem (Mbytes) for facto, 2459C sum over all processors 2460C ---------------------------------------------------- 2461C 2462 CALL MUMPS_MEM_CENTRALIZE( id%MYID, id%COMM, 2463 & TOTAL_MBYTES, id%INFOG(21), IRANK ) 2464 IF ( PROKG ) THEN 2465 WRITE( MPG,'(A,I10) ') 2466 & ' ** EFF Min: Rank of processor needing largest memory :', 2467 & IRANK 2468 WRITE( MPG,'(A,I10) ') 2469 & ' ** EFF Min: Space in MBYTES used by this processor :', 2470 & id%INFOG(21) 2471 IF ( KEEP(46) .eq. 0 ) THEN 2472 WRITE( MPG,'(A,I10) ') 2473 & ' ** EFF Min: Avg. Space in MBYTES per working proc :', 2474 & ( id%INFOG(22)-TOTAL_MBYTES ) / id%NSLAVES 2475 ELSE 2476 WRITE( MPG,'(A,I10) ') 2477 & ' ** EFF Min: Avg. Space in MBYTES per working proc :', 2478 & id%INFOG(22) / id%NSLAVES 2479 END IF 2480 END IF 2481* save statistics in KEEP array. 2482 KEEP(33) = id%INFO(11) ! this should be the other way round 2483C 2484C Sum RINFO(2) : total number of flops for assemblies 2485C Sum RINFO(3) : total number of flops for eliminations 2486C 2487C Should work even if the master does some work 2488C 2489 CALL MPI_REDUCE( RINFO(2), RINFOG(2), 2, 2490 & MPI_DOUBLE_PRECISION, 2491 & MPI_SUM, MASTER, id%COMM, IERR) 2492C Reduce needed to dimension small working array 2493C on all procs during ZMUMPS_GATHER_SOLUTION 2494 KEEP(247) = 0 2495 CALL MPI_REDUCE( KEEP(246), KEEP(247), 1, MPI_INTEGER, 2496 & MPI_MAX, MASTER, id%COMM, IERR) 2497C 2498C Reduce compression times: get max compression times 2499 CALL MPI_REDUCE( id%DKEEP(97), id%DKEEP(98), 1, 2500 & MPI_DOUBLE_PRECISION, 2501 & MPI_MAX, MASTER, id%COMM, IERR) 2502C 2503 CALL MPI_REDUCE( RINFO(2), RINFOG(2), 2, 2504 & MPI_DOUBLE_PRECISION, 2505 & MPI_SUM, MASTER, id%COMM, IERR) 2506 CALL MUMPS_REDUCEI8( id%KEEP8(31),id%KEEP8(6), MPI_SUM, 2507 & MASTER, id%COMM ) 2508 CALL MUMPS_SETI8TOI4(id%KEEP8(6), INFOG(9)) 2509 CALL MPI_REDUCE( id%INFO(10), INFOG(10), 1, MPI_INTEGER, 2510 & MPI_SUM, MASTER, id%COMM, IERR) 2511C Use MPI_MAX for this one to get largest front size 2512 CALL MPI_ALLREDUCE( id%INFO(11), INFOG(11), 1, MPI_INTEGER, 2513 & MPI_MAX, id%COMM, IERR) 2514C make maximum effective frontal size available on all procs 2515C for solve phase 2516C (Note that INFO(11) includes root size on root master) 2517 KEEP(133) = INFOG(11) 2518 CALL MPI_REDUCE( id%INFO(12), INFOG(12), 3, MPI_INTEGER, 2519 & MPI_SUM, MASTER, id%COMM, IERR) 2520 CALL MPI_REDUCE( KEEP(103), INFOG(25), 1, MPI_INTEGER, 2521 & MPI_SUM, MASTER, id%COMM, IERR) 2522 KEEP(229) = INFOG(25) 2523 CALL MPI_REDUCE( KEEP(105), INFOG(25), 1, MPI_INTEGER, 2524 & MPI_SUM, MASTER, id%COMM, IERR) 2525 KEEP(230) = INFOG(25) 2526C 2527 id%INFO(25) = KEEP(98) 2528 CALL MPI_ALLREDUCE( id%INFO(25), INFOG(25), 1, MPI_INTEGER, 2529 & MPI_SUM, id%COMM, IERR) 2530C Extra copies due to in-place stacking 2531 CALL MUMPS_REDUCEI8( id%KEEP8(8), id%KEEP8(108), MPI_SUM, 2532 & MASTER, id%COMM ) 2533C Entries in factors 2534 CALL MUMPS_SETI8TOI4(id%KEEP8(10), id%INFO(27)) 2535 CALL MUMPS_REDUCEI8( id%KEEP8(10),id%KEEP8(110), MPI_SUM, 2536 & MASTER, id%COMM ) 2537 CALL MUMPS_SETI8TOI4(id%KEEP8(110), INFOG(29)) 2538C ============================== 2539C LOW-RANK 2540C ============================== 2541 IF ( KEEP(486) .GT. 0 ) THEN !LR is activated 2542 CALL MPI_REDUCE( GLOBAL_BLR_SAVINGS, TMP_GLOBAL_BLR_SAVINGS 2543 & , 1, MPI_DOUBLE_PRECISION, 2544 & MPI_SUM, MASTER, id%COMM, IERR) 2545 CALL MPI_REDUCE( ACC_FR_MRY, TMP_ACC_FR_MRY 2546 & , 1, MPI_DOUBLE_PRECISION, 2547 & MPI_SUM, MASTER, id%COMM, IERR) 2548 CALL MPI_REDUCE( ACC_LR_FLOP_GAIN, TMP_ACC_LR_FLOP_GAIN 2549 & , 1, MPI_DOUBLE_PRECISION, 2550 & MPI_SUM, MASTER, id%COMM, IERR) 2551 CALL MPI_REDUCE( ACC_FLOP_FR_TRSM, TMP_ACC_FLOP_FR_TRSM 2552 & , 1, MPI_DOUBLE_PRECISION, 2553 & MPI_SUM, MASTER, id%COMM, IERR) 2554 CALL MPI_REDUCE( ACC_FLOP_LR_TRSM, TMP_ACC_FLOP_LR_TRSM 2555 & , 1, MPI_DOUBLE_PRECISION, 2556 & MPI_SUM, MASTER, id%COMM, IERR) 2557 CALL MPI_REDUCE( ACC_FLOP_FR_UPDT, TMP_ACC_FLOP_FR_UPDT 2558 & , 1, MPI_DOUBLE_PRECISION, 2559 & MPI_SUM, MASTER, id%COMM, IERR) 2560 CALL MPI_REDUCE( ACC_FLOP_LR_UPDT, TMP_ACC_FLOP_LR_UPDT 2561 & , 1, MPI_DOUBLE_PRECISION, 2562 & MPI_SUM, MASTER, id%COMM, IERR) 2563 CALL MPI_REDUCE( ACC_FLOP_RMB, TMP_ACC_FLOP_RMB 2564 & , 1, MPI_DOUBLE_PRECISION, 2565 & MPI_SUM, MASTER, id%COMM, IERR) 2566 CALL MPI_REDUCE( ACC_FLOP_LR_UPDT_OUT, 2567 & TMP_ACC_FLOP_LR_UPDT_OUT 2568 & , 1, MPI_DOUBLE_PRECISION, 2569 & MPI_SUM, MASTER, id%COMM, IERR) 2570 CALL MPI_REDUCE( ACC_FLOP_DEC_ACC, TMP_ACC_FLOP_DEC_ACC 2571 & , 1, MPI_DOUBLE_PRECISION, 2572 & MPI_SUM, MASTER, id%COMM, IERR) 2573 CALL MPI_REDUCE( ACC_FLOP_REC_ACC, TMP_ACC_FLOP_REC_ACC 2574 & , 1, MPI_DOUBLE_PRECISION, 2575 & MPI_SUM, MASTER, id%COMM, IERR) 2576 CALL MPI_REDUCE( ACC_FLOP_TRSM, TMP_ACC_FLOP_TRSM 2577 & , 1, MPI_DOUBLE_PRECISION, 2578 & MPI_SUM, MASTER, id%COMM, IERR) 2579 CALL MPI_REDUCE( ACC_FLOP_PANEL, TMP_ACC_FLOP_PANEL 2580 & , 1, MPI_DOUBLE_PRECISION, 2581 & MPI_SUM, MASTER, id%COMM, IERR) 2582 CALL MPI_REDUCE( ACC_FLOP_FRFRONTS, TMP_ACC_FLOP_FRFRONTS 2583 & , 1, MPI_DOUBLE_PRECISION, 2584 & MPI_SUM, MASTER, id%COMM, IERR) 2585 CALL MPI_REDUCE( ACC_FLOP_DEMOTE, TMP_ACC_FLOP_DEMOTE 2586 & , 1, MPI_DOUBLE_PRECISION, 2587 & MPI_SUM, MASTER, id%COMM, IERR) 2588 CALL MPI_REDUCE( ACC_FLOP_CB_DEMOTE, TMP_ACC_FLOP_CB_DEMOTE 2589 & , 1, MPI_DOUBLE_PRECISION, 2590 & MPI_SUM, MASTER, id%COMM, IERR) 2591 CALL MPI_REDUCE( ACC_FLOP_CB_PROMOTE,TMP_ACC_FLOP_CB_PROMOTE 2592 & , 1, MPI_DOUBLE_PRECISION, 2593 & MPI_SUM, MASTER, id%COMM, IERR) 2594 CALL MPI_REDUCE( ACC_FLOP_FR_FACTO,TMP_ACC_FLOP_FR_FACTO 2595 & , 1, MPI_DOUBLE_PRECISION, 2596 & MPI_SUM, MASTER, id%COMM, IERR) 2597 CALL MPI_REDUCE( CNT_NODES,TMP_CNT_NODES 2598 & , 1, MPI_INTEGER, 2599 & MPI_SUM, MASTER, id%COMM, IERR) 2600 IF (id%NPROCS.GT.1) THEN 2601 ACC_FLOP_LR_FACTO = ACC_FLOP_FR_FACTO - ACC_LR_FLOP_GAIN 2602 & + ACC_FLOP_DEMOTE + ACC_FLOP_FRFRONTS 2603 CALL MPI_REDUCE( ACC_FLOP_LR_FACTO,AVG_ACC_FLOP_LR_FACTO 2604 & , 1, MPI_DOUBLE_PRECISION, 2605 & MPI_SUM, MASTER, id%COMM, IERR) 2606 IF (id%MYID.EQ.MASTER) THEN 2607 AVG_ACC_FLOP_LR_FACTO = AVG_ACC_FLOP_LR_FACTO/id%NPROCS 2608 ENDIF 2609 CALL MPI_REDUCE( ACC_FLOP_LR_FACTO,MIN_ACC_FLOP_LR_FACTO 2610 & , 1, MPI_DOUBLE_PRECISION, 2611 & MPI_MIN, MASTER, id%COMM, IERR) 2612 CALL MPI_REDUCE( ACC_FLOP_LR_FACTO,MAX_ACC_FLOP_LR_FACTO 2613 & , 1, MPI_DOUBLE_PRECISION, 2614 & MPI_MAX, MASTER, id%COMM, IERR) 2615 ENDIF ! NPROCS > 1 2616 CALL MPI_REDUCE( ACC_UPDT_TIME,TMP_ACC_UPDT_TIME 2617 & , 1, MPI_DOUBLE_PRECISION, 2618 & MPI_SUM, MASTER, id%COMM, IERR) 2619 CALL MPI_REDUCE( ACC_DEMOTING_TIME,TMP_ACC_DEMOTING_TIME 2620 & , 1, MPI_DOUBLE_PRECISION, 2621 & MPI_SUM, MASTER, id%COMM, IERR) 2622 CALL MPI_REDUCE( ACC_CB_DEMOTING_TIME, 2623 & TMP_ACC_CB_DEMOTING_TIME, 1, 2624 & MPI_DOUBLE_PRECISION, MPI_SUM, MASTER, 2625 & id%COMM, IERR) 2626 CALL MPI_REDUCE( ACC_PROMOTING_TIME,TMP_ACC_PROMOTING_TIME 2627 & , 1, MPI_DOUBLE_PRECISION, 2628 & MPI_SUM, MASTER, id%COMM, IERR) 2629 CALL MPI_REDUCE( ACC_FRPANELS_TIME,TMP_ACC_FRPANELS_TIME 2630 & , 1, MPI_DOUBLE_PRECISION, 2631 & MPI_SUM, MASTER, id%COMM, IERR) 2632 CALL MPI_REDUCE( ACC_FAC_I_TIME,TMP_ACC_FAC_I_TIME 2633 & , 1, MPI_DOUBLE_PRECISION, 2634 & MPI_SUM, MASTER, id%COMM, IERR) 2635 CALL MPI_REDUCE( ACC_FAC_MQ_TIME,TMP_ACC_FAC_MQ_TIME 2636 & , 1, MPI_DOUBLE_PRECISION, 2637 & MPI_SUM, MASTER, id%COMM, IERR) 2638 CALL MPI_REDUCE( ACC_FAC_SQ_TIME,TMP_ACC_FAC_SQ_TIME 2639 & , 1, MPI_DOUBLE_PRECISION, 2640 & MPI_SUM, MASTER, id%COMM, IERR) 2641 CALL MPI_REDUCE( ACC_TRSM_TIME,TMP_ACC_TRSM_TIME 2642 & , 1, MPI_DOUBLE_PRECISION, 2643 & MPI_SUM, MASTER, id%COMM, IERR) 2644 CALL MPI_REDUCE( ACC_FRFRONTS_TIME,TMP_ACC_FRFRONTS_TIME 2645 & , 1, MPI_DOUBLE_PRECISION, 2646 & MPI_SUM, MASTER, id%COMM, IERR) 2647 CALL MPI_REDUCE( ACC_LR_MODULE_TIME,TMP_ACC_LR_MODULE_TIME 2648 & , 1, MPI_DOUBLE_PRECISION, 2649 & MPI_SUM, MASTER, id%COMM, IERR) 2650 IF (id%MYID.EQ.MASTER) THEN 2651 IF (id%NPROCS.GT.1) THEN 2652C rename the stat variable so that COMPUTE_GLOBAL_GAINS can work for any 2653C number of procs 2654 GLOBAL_BLR_SAVINGS = TMP_GLOBAL_BLR_SAVINGS 2655 ACC_FR_MRY = TMP_ACC_FR_MRY 2656 ACC_LR_FLOP_GAIN = TMP_ACC_LR_FLOP_GAIN 2657 ACC_FLOP_TRSM = TMP_ACC_FLOP_TRSM 2658 ACC_FLOP_PANEL = TMP_ACC_FLOP_PANEL 2659 ACC_FLOP_LR_TRSM = TMP_ACC_FLOP_LR_TRSM 2660 ACC_FLOP_FR_TRSM = TMP_ACC_FLOP_FR_TRSM 2661 ACC_FLOP_LR_UPDT = TMP_ACC_FLOP_LR_UPDT 2662 ACC_FLOP_LR_UPDT_OUT = TMP_ACC_FLOP_LR_UPDT_OUT 2663 ACC_FLOP_RMB = TMP_ACC_FLOP_RMB 2664 ACC_FLOP_DEC_ACC = TMP_ACC_FLOP_DEC_ACC 2665 ACC_FLOP_REC_ACC = TMP_ACC_FLOP_REC_ACC 2666 ACC_FLOP_FR_UPDT = TMP_ACC_FLOP_FR_UPDT 2667 ACC_FLOP_DEMOTE = TMP_ACC_FLOP_DEMOTE 2668 ACC_FLOP_CB_DEMOTE = TMP_ACC_FLOP_CB_DEMOTE 2669 ACC_FLOP_CB_PROMOTE = TMP_ACC_FLOP_CB_PROMOTE 2670 ACC_FLOP_FRFRONTS = TMP_ACC_FLOP_FRFRONTS 2671 CNT_NODES = TMP_CNT_NODES 2672 ACC_FLOP_FR_FACTO = TMP_ACC_FLOP_FR_FACTO 2673C ACC_FLOP_LR_FACTO = ACC_FLOP_FR_FACTO - ACC_LR_FLOP_GAIN 2674C & + ACC_FLOP_DEMOTE 2675 ACC_UPDT_TIME = TMP_ACC_UPDT_TIME /id%NPROCS 2676 ACC_DEMOTING_TIME = TMP_ACC_DEMOTING_TIME /id%NPROCS 2677 ACC_CB_DEMOTING_TIME = TMP_ACC_CB_DEMOTING_TIME/id%NPROCS 2678 ACC_PROMOTING_TIME = TMP_ACC_PROMOTING_TIME /id%NPROCS 2679 ACC_FRPANELS_TIME = TMP_ACC_FRPANELS_TIME /id%NPROCS 2680 ACC_FAC_I_TIME = TMP_ACC_FAC_I_TIME /id%NPROCS 2681 ACC_FAC_MQ_TIME = TMP_ACC_FAC_MQ_TIME /id%NPROCS 2682 ACC_FAC_SQ_TIME = TMP_ACC_FAC_SQ_TIME /id%NPROCS 2683 ACC_TRSM_TIME = TMP_ACC_TRSM_TIME /id%NPROCS 2684 ACC_FRFRONTS_TIME = TMP_ACC_FRFRONTS_TIME /id%NPROCS 2685 ACC_LR_MODULE_TIME = TMP_ACC_LR_MODULE_TIME /id%NPROCS 2686 ENDIF 2687 CALL COMPUTE_GLOBAL_GAINS(id%KEEP8(110),RINFOG(3),id%NPROCS, 2688 & PROKG, MPG) 2689 FRONTWISE = 0 2690 IF (id%KEEP(486).EQ.1) THEN 2691C BLR was activated 2692C WRITE gains also compute stats stored in DKEEP array 2693 IF (LPOK) THEN 2694 IF (CNTL(7) < 0.0D0) THEN 2695C Warning : using negative values is an experimental and 2696C non recommended setting. 2697 WRITE(LP,'(/A/,A/,A/,A,A)') 2698 & ' WARNING in BLR input setting', 2699 & ' CNTL(7) < 0 is experimental: ', 2700 & ' RRQR precision = |CNTL(7| x ||A_pre||, ', 2701 & ' where A_pre is the preprocessed matrix as defined', 2702 & ' in the Users guide ' 2703 ENDIF 2704 ENDIF 2705 CALL SAVEandWRITE_GAINS(FRONTWISE, 2706 & KEEP(489), id%DKEEP, N, 2707 & KEEP(487), KEEP(488), KEEP(490), 2708 & KEEP(491), KEEP(50), KEEP(486), KEEP(472), 2709 & KEEP(475), KEEP(478), KEEP(480), KEEP(481), 2710 & KEEP(483), KEEP(484), KEEP(485), KEEP(467), 2711 & KEEP(28), id%NPROCS, MPG, PROKG) 2712C flops when BLR activated 2713 RINFOG(14) = id%DKEEP(56) 2714 ELSE 2715 RINFOG(14) = 0.0D00 2716 ENDIF 2717 ENDIF 2718 ENDIF 2719C ============================== 2720C NULL PIVOTS AND RANK-REVEALING 2721C ============================== 2722 IF(KEEP(110) .EQ. 1) THEN 2723C -- make available to users the local number of null pivots detected 2724C -- with ICNTL(24) = 1. 2725 id%INFO(18) = KEEP(109) 2726 CALL MPI_ALLREDUCE( KEEP(109), KEEP(112), 1, MPI_INTEGER, 2727 & MPI_SUM, id%COMM, IERR) 2728 ELSE 2729 id%INFO(18) = 0 2730 KEEP(109) = 0 2731 KEEP(112) = 0 2732 ENDIF 2733C INFOG(28) deficiency resulting from ICNTL(24) and ICNTL(16). 2734C Note that KEEP(17) already has the same value on all procs 2735 INFOG(28)=KEEP(112)+KEEP(17) 2736C ======================================== 2737C We now provide to the host the part of 2738C PIVNUL_LIST resulting from the processing 2739C of the root node and we update id%INFO(18) 2740C on the processor holding the root to 2741C include null pivots relative to the root 2742C ======================================== 2743 IF (KEEP(17) .NE. 0) THEN 2744 IF (id%MYID .EQ. ID_ROOT) THEN 2745C Include in id%INFO(18) null pivots resulting 2746C from deficiency on the root. In this way, 2747C the sum of all id%INFO(18) is equal to INFOG(28). 2748 id%INFO(18)=id%INFO(18)+KEEP(17) 2749 ENDIF 2750 IF (ID_ROOT .EQ. MASTER) THEN 2751 IF (id%MYID.EQ.MASTER) THEN 2752C -------------------------------------------------- 2753C Null pivots of root have been stored in 2754C PIVNUL_LIST(KEEP(109)+1:KEEP(109)+KEEP(17). 2755C Shift them at the end of the list because: 2756C * this is what we need to build the null space 2757C * we would otherwise overwrite them on the host 2758C when gathering null pivots from other processors 2759C -------------------------------------------------- 2760 DO I=1, KEEP(17) 2761 id%PIVNUL_LIST(KEEP(112)+I)=id%PIVNUL_LIST(KEEP(109)+I) 2762 ENDDO 2763 ENDIF 2764 ELSE 2765C --------------------------------- 2766C Null pivots of root must be sent 2767C from the processor responsible of 2768C the root to the host (or MASTER). 2769C --------------------------------- 2770 IF (id%MYID .EQ. ID_ROOT) THEN 2771 CALL MPI_SEND(id%PIVNUL_LIST(KEEP(109)+1), KEEP(17), 2772 & MPI_INTEGER, MASTER, ZERO_PIV, 2773 & id%COMM, IERR) 2774 ELSE IF (id%MYID .EQ. MASTER) THEN 2775 CALL MPI_RECV(id%PIVNUL_LIST(KEEP(112)+1), KEEP(17), 2776 & MPI_INTEGER, ID_ROOT, ZERO_PIV, 2777 & id%COMM, STATUS, IERR ) 2778 ENDIF 2779 ENDIF 2780 ENDIF 2781C =========================== 2782C gather zero pivots indices 2783C on the host node 2784C =========================== 2785C In case of non working host, the following code also 2786C works considering that KEEP(109) is equal to 0 on 2787C the non-working host 2788 IF(KEEP(110) .EQ. 1) THEN 2789 ALLOCATE(ITMP2(id%NPROCS),stat = IERR ) ! deallocated in 490 2790 IF ( IERR .GT. 0 ) THEN 2791 id%INFO(1)=-13 2792 id%INFO(2)=id%NPROCS 2793 END IF 2794 CALL MUMPS_PROPINFO( ICNTL(1), id%INFO(1), 2795 & id%COMM, id%MYID ) 2796 IF (id%INFO(1).LT.0) GOTO 490 2797 CALL MPI_GATHER ( KEEP(109),1, MPI_INTEGER, 2798 & ITMP2(1), 1, MPI_INTEGER, 2799 & MASTER, id%COMM, IERR) 2800 IF(id%MYID .EQ. MASTER) THEN 2801 POSBUF = ITMP2(1)+1 2802C First null pivot of master is in 2803C position 1 of global list 2804 KEEP(220)=1 2805 DO I = 1,id%NPROCS-1 2806 CALL MPI_RECV(id%PIVNUL_LIST(POSBUF), ITMP2(I+1), 2807 & MPI_INTEGER,I, 2808 & ZERO_PIV, id%COMM, STATUS, IERR) 2809C Send position POSBUF of first null pivot of proc I 2810C in global list. Will allow to quickly identify during 2811C the solve step if one is concerned by a global position 2812C K, 0 <= K <= INFOG(28). 2813 CALL MPI_SEND(POSBUF, 1, MPI_INTEGER, I, ZERO_PIV, 2814 & id%COMM, IERR) 2815 POSBUF = POSBUF + ITMP2(I+1) 2816 ENDDO 2817 ELSE 2818 CALL MPI_SEND( id%PIVNUL_LIST(1), KEEP(109), MPI_INTEGER, 2819 & MASTER,ZERO_PIV, id%COMM, IERR) 2820 CALL MPI_RECV( KEEP(220), 1, MPI_INTEGER, MASTER, ZERO_PIV, 2821 & id%COMM, STATUS, IERR ) 2822 ENDIF 2823 ENDIF 2824C ===================================== 2825C Statistics concerning the determinant 2826C ===================================== 2827C 2828C 1/ on the host better take into account null pivots if scaling: 2829C 2830C Since null pivots are excluded from the computation 2831C of the determinant, we also exclude the corresponding 2832C scaling entries. Since those entries have already been 2833C taken into account before the factorization, we multiply 2834C the determinant on the host by the scaling values corresponding 2835C to pivots in PIVNUL_LIST. 2836 IF (id%MYID.EQ.MASTER .AND. LSCAL. AND. KEEP(258).NE.0) THEN 2837 DO I = 1, id%INFOG(28) 2838 CALL ZMUMPS_UPDATEDETER( 2839 & cmplx(id%ROWSCA(id%PIVNUL_LIST(I)),0.0D0, 2840 & kind=kind(0.0D0)), 2841 & id%DKEEP(6), KEEP(259)) 2842 CALL ZMUMPS_UPDATEDETER( 2843 & cmplx(id%COLSCA(id%PIVNUL_LIST(I)),0.0D0, 2844 & kind=kind(0.0D0)), 2845 & id%DKEEP(6), KEEP(259)) 2846 ENDDO 2847 ENDIF 2848C 2849C 2/ Swap signs depending on pivoting on each proc 2850C 2851 IF (KEEP(258).NE.0) THEN 2852C Return the determinant in INFOG(34) and RINFOG(12/13) 2853 IF (KEEP(260).EQ.-1) THEN ! Local to each processor 2854 id%DKEEP(6)=-id%DKEEP(6) 2855 id%DKEEP(7)=-id%DKEEP(7) 2856 ENDIF 2857C 2858C 3/ Perform a reduction 2859C 2860 CALL ZMUMPS_DETER_REDUCTION( 2861 & id%COMM, id%DKEEP(6), KEEP(259), 2862 & RINFOG(12), INFOG(34), id%NPROCS) 2863C 2864C 4/ Swap sign if needed 2865C 2866 IF (id%KEEP(50).EQ.0 .AND. id%MYID.EQ. MASTER) THEN 2867C Modify sign of determinant according 2868C to unsymmetric permutation (max-trans 2869C of max-weighted matching) 2870 IF (id%KEEP(23).NE.0) THEN 2871 CALL ZMUMPS_DETER_SIGN_PERM( 2872 & RINFOG(12), id%N, 2873C id%STEP: used as workspace of size N still 2874C allocated on master; restored on exit 2875 & id%STEP(1), 2876 & id%UNS_PERM(1) ) 2877C Remark that RINFOG(12/13) are modified only 2878C on the host but will be broadcast on exit 2879C from MUMPS (see ZMUMPS_DRIVER) 2880 ENDIF 2881 ENDIF 2882 ENDIF 2883 490 IF (allocated(ITMP2)) DEALLOCATE(ITMP2) 2884 IF ( PROKG ) THEN 2885C --------------------------------------- 2886C PRINT STATISTICS (on master) 2887C ----------------------------- 2888 WRITE(MPG,99984) RINFOG(2),RINFOG(3),id%KEEP8(6),INFOG(10), 2889 & INFOG(11), id%KEEP8(110) 2890 IF (id%KEEP(50) == 0) THEN 2891 ! off diag pivots 2892 WRITE(MPG, 99985) INFOG(12) 2893 END IF 2894 IF (id%KEEP(50) .NE. 1) THEN 2895 ! delayed pivots 2896 WRITE(MPG, 99982) INFOG(13) 2897 END IF 2898 IF (KEEP(97) .NE. 0) THEN 2899 ! tiny pivots 2900 WRITE(MPG, 99986) INFOG(25) 2901 ENDIF 2902 IF (id%KEEP(50) == 2) THEN 2903 !number of 2x2 pivots in type 1 nodes 2904 WRITE(MPG, 99988) KEEP(229) 2905 !number of 2x2 pivots in type 2 nodes 2906 WRITE(MPG, 99989) KEEP(230) 2907 ENDIF 2908 !number of zero pivots 2909 IF (KEEP(110) .NE.0) THEN 2910 WRITE(MPG, 99991) KEEP(112) 2911 ENDIF 2912 !Deficiency on root 2913 IF ( KEEP(17) .ne. 0 ) 2914 & WRITE(MPG, 99983) KEEP(17) 2915 !Total deficiency 2916 IF (KEEP(110).NE.0.OR.KEEP(17).NE.0) 2917 & WRITE(MPG, 99992) KEEP(17)+KEEP(112) 2918 ! Memory compress 2919 WRITE(MPG, 99981) INFOG(14) 2920 ! Extra copies due to ip stack in unsym case 2921 ! in core case (or OLD_OOC_PANEL) 2922 IF (id%KEEP8(108) .GT. 0_8) THEN 2923 WRITE(MPG, 99980) id%KEEP8(108) 2924 ENDIF 2925 IF ((KEEP(60).NE.0) .AND. INFOG(25).GT.0) THEN 2926 ! Schur on and tiny pivots set in last level 2927 ! before the SChur 2928 WRITE(MPG, '(A)') 2929 & " ** Warning Static pivoting was necessary" 2930 WRITE(MPG, '(A)') 2931 & " ** to factor interior variables with Schur ON" 2932 ENDIF 2933 IF (KEEP(258).NE.0) THEN 2934 WRITE(MPG,99978) RINFOG(12) 2935 WRITE(MPG,99979) RINFOG(13) 2936 WRITE(MPG,99977) INFOG(34) 2937 ENDIF 2938 END IF 2939* ========================================== 2940* 2941* End of Factorization Phase 2942* 2943* ========================================== 2944C 2945C Goto 500 is done when 2946C LOAD_INIT 2947C OOC_INIT_FACTO 2948C MUMPS_FDM_INIT 2949#if ! defined(NO_FDM_DESCBAND) 2950C MUMPS_FDBD_INIT 2951#endif 2952#if ! defined(NO_FDM_MAPROW) 2953C MUMPS_FMRD_INIT 2954#endif 2955C are all called. 2956C 2957 500 CONTINUE 2958#if ! defined(NO_FDM_DESCBAND) 2959 IF (I_AM_SLAVE) THEN 2960 CALL MUMPS_FDBD_END(id%INFO(1)) ! INFO(1): input only 2961 ENDIF 2962#endif 2963#if ! defined(NO_FDM_MAPROW) 2964 IF (I_AM_SLAVE) THEN 2965 CALL MUMPS_FMRD_END(id%INFO(1)) ! INFO(1): input only 2966 ENDIF 2967#endif 2968 IF (I_AM_SLAVE) THEN 2969 CALL ZMUMPS_BLR_END_MODULE(id%INFO(1), id%KEEP8, .TRUE.) 2970C INFO(1): input only 2971 ENDIF 2972 IF (I_AM_SLAVE) THEN 2973 CALL MUMPS_FDM_END('A') 2974 CALL MUMPS_FDM_END('F') 2975 ENDIF 2976C 2977C Goto 514 is done when an 2978C error occurred in MUMPS_FDM_INIT 2979C or (after FDM_INIT but before 2980C OOC_INIT) 2981C 2982 514 CONTINUE 2983 IF ( I_AM_SLAVE ) THEN 2984 IF ((KEEP(201).EQ.1).OR.(KEEP(201).EQ.2)) THEN 2985 CALL ZMUMPS_OOC_END_FACTO(id,IERR) 2986 IF (id%ASSOCIATED_OOC_FILES) THEN 2987 id%ASSOCIATED_OOC_FILES = .FALSE. 2988 ENDIF 2989 IF (IERR.LT.0 .AND. id%INFO(1) .GE. 0) id%INFO(1) = IERR 2990 ENDIF 2991 IF (WK_USER_PROVIDED) THEN 2992C at the end of a phase S is always freed when WK_USER provided 2993 NULLIFY(id%S) 2994 ELSE IF (KEEP(201).NE.0) THEN 2995C ---------------------------------------- 2996C In OOC or if KEEP(201).EQ.-1 we always 2997C free S at end of factorization. As id%S 2998C may be unassociated in case of error 2999C during or before the allocation of id%S, 3000C we only free S when it was associated. 3001C ---------------------------------------- 3002 IF (associated(id%S)) DEALLOCATE(id%S) 3003 NULLIFY(id%S) ! in all cases 3004 id%KEEP8(23)=0_8 3005 ENDIF 3006 ELSE ! host not working 3007 IF (WK_USER_PROVIDED) THEN 3008C at the end of a phase S is always freed when WK_USER provided 3009 NULLIFY(id%S) 3010 ELSE 3011 IF (associated(id%S)) DEALLOCATE(id%S) 3012 NULLIFY(id%S) ! in all cases 3013 id%KEEP8(23)=0_8 3014 END IF 3015 END IF 3016C 3017C Goto 513 is done in case of error where LOAD_INIT was 3018C called but not OOC_INIT_FACTO. 3019 513 CONTINUE 3020 IF ( I_AM_SLAVE ) THEN 3021 CALL ZMUMPS_LOAD_END( id%INFO(1), id%NSLAVES, IERR ) 3022 IF (IERR.LT.0 .AND. id%INFO(1) .GE. 0) id%INFO(1) = IERR 3023 ENDIF 3024 CALL MUMPS_PROPINFO( ICNTL(1), id%INFO(1), 3025 & id%COMM, id%MYID ) 3026C 3027C Goto 530 is done when an error occurs before 3028C the calls to LOAD_INIT and OOC_INIT_FACTO 3029 530 CONTINUE 3030C Fwd in facto: free RHS_MUMPS in case 3031C it was allocated. 3032 IF (RHS_MUMPS_ALLOCATED) DEALLOCATE(RHS_MUMPS) 3033 NULLIFY(RHS_MUMPS) 3034C 3035 id%KEEP8(26) = KEEP826_SAVE 3036 RETURN 3037 120 FORMAT(/' LOCAL REDISTRIB: DATA LOCAL/SENT =',I16,I16) 3038 125 FORMAT(/' REDISTRIB: TOTAL DATA LOCAL/SENT =',I16,I16) 3039 130 FORMAT(/' ****** FACTORIZATION STEP ********'/) 3040 160 FORMAT(' ELAPSED TIME FOR MATRIX DISTRIBUTION =',F12.4) 3041 166 FORMAT(' Convergence error after scaling for ONE-NORM', 3042 & ' (option 7/8) =',D9.2) 3043 170 FORMAT(/' STATISTICS PRIOR NUMERICAL FACTORIZATION ...'/ 3044 & ' Size of internal working array S =',I16/ 3045 & ' Size of internal working array IS =',I16/ 3046 & ' MINIMUM (ICNTL(14)=0) size of S =',I16/ 3047 & ' MINIMUM (ICNTL(14)=0) size of IS =',I16/ 3048 & ' REAL SPACE FOR ORIGINAL MATRIX =',I16/ 3049 & ' INTEGER SPACE FOR ORIGINAL MATRIX =',I16/ 3050 & ' REAL SPACE FOR FACTORS =',I16/ 3051 & ' INTEGER SPACE FOR FACTORS =',I16/ 3052 & ' MAXIMUM FRONTAL SIZE (ESTIMATED) =',I16) 3053 172 FORMAT(/' GLOBAL STATISTICS PRIOR NUMERICAL FACTORIZATION ...'/ 3054 & ' NUMBER OF WORKING PROCESSES =',I16/ 3055 & ' OUT-OF-CORE OPTION (ICNTL(22)) =',I16/ 3056 & ' REAL SPACE FOR FACTORS =',I16/ 3057 & ' INTEGER SPACE FOR FACTORS =',I16/ 3058 & ' MAXIMUM FRONTAL SIZE (ESTIMATED) =',I16/ 3059 & ' NUMBER OF NODES IN THE TREE =',I16/ 3060 & ' MEMORY ALLOWED (MB -- 0: N/A ) =',I16/ 3061 & ' RELATIVE THRESHOLD FOR PIVOTING, CNTL(1) =',D16.4) 3062 173 FORMAT( ' PERFORM FORWARD DURING FACTO, NRHS =',I16) 3063 175 FORMAT(/' NUMBER OF ENTRIES FOR // ROOT =',I16) 3064 180 FORMAT(/' ELAPSED TIME FOR FACTORIZATION =',F12.4) 3065 185 FORMAT(/' ELAPSED TIME FOR (FAILED) FACTORIZATION =',F12.4) 306699977 FORMAT( ' INFOG(34) DETERMINANT (base 2 exponent) =',I16) 306799978 FORMAT( ' RINFOG(12) DETERMINANT (real part) =',F12.4) 306899979 FORMAT( ' RINFOG(12) DETERMINANT (imaginary part) =',F12.4) 306999980 FORMAT( ' Extra copies due to In-Place stacking =',I16) 307099981 FORMAT( ' INFOG(14) NUMBER OF MEMORY COMPRESS =',I16) 307199982 FORMAT( ' INFOG(13) NUMBER OF DELAYED PIVOTS =',I16) 307299983 FORMAT( ' NB OF NULL PIVOTS DETECTED BY ICNTL(16) =',I16) 307399991 FORMAT( ' NB OF NULL PIVOTS DETECTED BY ICNTL(24) =',I16) 307499992 FORMAT( ' INFOG(28) ESTIMATED DEFICIENCY =',I16) 307599984 FORMAT(/' GLOBAL STATISTICS '/ 3076 & ' RINFOG(2) OPERATIONS IN NODE ASSEMBLY =',1PD10.3/ 3077 & ' ------(3) OPERATIONS IN NODE ELIMINATION=',1PD10.3/ 3078 & ' INFOG (9) REAL SPACE FOR FACTORS =',I16/ 3079 & ' INFOG(10) INTEGER SPACE FOR FACTORS =',I16/ 3080 & ' INFOG(11) MAXIMUM FRONT SIZE =',I16/ 3081 & ' INFOG(29) NUMBER OF ENTRIES IN FACTORS =',I16) 308299985 FORMAT( ' INFOG(12) NUMBER OF OFF DIAGONAL PIVOTS =',I16) 308399986 FORMAT( ' INFOG(25) NUMBER OF TINY PIVOTS(STATIC) =',I16) 308499988 FORMAT( ' NUMBER OF 2x2 PIVOTS in type 1 nodes =',I16) 308599989 FORMAT( ' NUMBER OF 2x2 PIVOTS in type 2 nodes =',I16) 3086 END SUBROUTINE ZMUMPS_FAC_DRIVER 3087 SUBROUTINE ZMUMPS_AVGMAX_STAT8(PROKG, MPG, VAL, NSLAVES, 3088 & COMM, MSG) 3089 IMPLICIT NONE 3090 INCLUDE 'mpif.h' 3091 LOGICAL PROKG 3092 INTEGER MPG 3093 INTEGER(8) VAL 3094 INTEGER NSLAVES 3095 INTEGER COMM 3096 CHARACTER*42 MSG 3097C Local 3098 INTEGER(8) MAX_VAL 3099 INTEGER IERR, MASTER 3100 DOUBLE PRECISION LOC_VAL, AVG_VAL 3101 PARAMETER(MASTER=0) 3102C 3103 CALL MUMPS_REDUCEI8( VAL, MAX_VAL, MPI_MAX, MASTER, COMM) 3104 LOC_VAL = dble(VAL)/dble(NSLAVES) 3105 CALL MPI_REDUCE( LOC_VAL, AVG_VAL, 1, MPI_DOUBLE_PRECISION, 3106 & MPI_SUM, MASTER, COMM, IERR ) 3107 IF (PROKG) THEN 3108 WRITE(MPG,100) " Maximum ", MSG, MAX_VAL 3109 WRITE(MPG,100) " Average ", MSG, int(AVG_VAL,8) 3110 ENDIF 3111 RETURN 3112 100 FORMAT(A9,A42,I16) 3113 END SUBROUTINE ZMUMPS_AVGMAX_STAT8 3114C 3115 SUBROUTINE ZMUMPS_EXTRACT_SCHUR_REDRHS(id) 3116 USE ZMUMPS_STRUC_DEF 3117 IMPLICIT NONE 3118C 3119C Purpose 3120C ======= 3121C 3122C Extract the Schur and possibly also the reduced right-hand side 3123C (if Fwd in facto) from the processor working on Schur and copy 3124C it into the user datastructures id%SCHUR and id%REDRHS on the host. 3125C This routine assumes that the integer list of the Schur has not 3126C been permuted and still corresponds to LISTVAR_SCHUR. 3127C 3128C If the Schur is centralized, the master of the Schur holds the 3129C Schur and possibly also the reduced right-hand side. 3130C If the Schur is distribued (already built in user's datastructure), 3131C then the master of the Schur may hold the reduced right-hand side, 3132C in which case it is available in root%RHS_CNTR_MASTER_ROOT. 3133C 3134 TYPE(ZMUMPS_STRUC) :: id 3135C 3136C Local variables 3137C =============== 3138C 3139 INCLUDE 'mpif.h' 3140 INCLUDE 'mumps_tags.h' 3141 INCLUDE 'mumps_headers.h' 3142 INTEGER :: STATUS(MPI_STATUS_SIZE) 3143 INTEGER :: IERR 3144 INTEGER, PARAMETER :: MASTER = 0 3145 INTEGER :: ID_SCHUR, SIZE_SCHUR, LD_SCHUR, IB, BL4 3146 INTEGER :: ROW_LENGTH, I 3147 INTEGER(8) :: SURFSCHUR8, BL8, SHIFT8 3148 INTEGER(8) :: ISCHUR_SRC, ISCHUR_DEST, ISCHUR_SYM, ISCHUR_UNS 3149C 3150C External functions 3151C ================== 3152C 3153 INTEGER MUMPS_PROCNODE 3154 EXTERNAL MUMPS_PROCNODE 3155C Quick return in case factorization did not terminate correctly 3156 IF (id%INFO(1) .LT. 0) RETURN 3157C Quick return if Schur option off 3158 IF (id%KEEP(60) .EQ. 0) RETURN 3159C Get Schur id 3160 ID_SCHUR =MUMPS_PROCNODE( 3161 & id%PROCNODE_STEPS(id%STEP(max(id%KEEP(20),id%KEEP(38)))), 3162 & id%NSLAVES) 3163 IF ( id%KEEP( 46 ) .NE. 1 ) THEN 3164 ID_SCHUR = ID_SCHUR + 1 3165 END IF 3166C Get size of Schur 3167 IF (id%MYID.EQ.ID_SCHUR) THEN 3168 IF (id%KEEP(60).EQ.1) THEN 3169C Sequential Schur 3170 LD_SCHUR = 3171 & id%IS(id%PTLUST_S(id%STEP(id%KEEP(20)))+2+id%KEEP(IXSZ)) 3172 SIZE_SCHUR = LD_SCHUR - id%KEEP(253) 3173 ELSE 3174C Parallel Schur 3175 LD_SCHUR = -999999 ! not used 3176 SIZE_SCHUR = id%root%TOT_ROOT_SIZE 3177 ENDIF 3178 ELSE IF (id%MYID .EQ. MASTER) THEN 3179 SIZE_SCHUR = id%KEEP(116) 3180 LD_SCHUR = -44444 ! Not used 3181 ELSE 3182C Proc is not concerned with Schur, return 3183 RETURN 3184 ENDIF 3185 SURFSCHUR8 = int(SIZE_SCHUR,8)*int(SIZE_SCHUR,8) 3186C ================================= 3187C Case of parallel Schur: if REDRHS 3188C was requested, obtain it directly 3189C from id%root%RHS_CNTR_MASTER_ROOT 3190C ================================= 3191 IF (id%KEEP(60) .GT. 1) THEN 3192 IF (id%KEEP(221).EQ.1 .AND. id%KEEP(252).GT.0) THEN 3193 DO I = 1, id%KEEP(253) 3194 IF (ID_SCHUR.EQ.MASTER) THEN ! Necessarily = id%MYID 3195 CALL zcopy(SIZE_SCHUR, 3196 & id%root%RHS_CNTR_MASTER_ROOT((I-1)*SIZE_SCHUR+1), 1, 3197 & id%REDRHS((I-1)*id%LREDRHS+1), 1) 3198 ELSE 3199 IF (id%MYID.EQ.ID_SCHUR) THEN 3200C Send 3201 CALL MPI_SEND( 3202 & id%root%RHS_CNTR_MASTER_ROOT((I-1)*SIZE_SCHUR+1), 3203 & SIZE_SCHUR, 3204 & MPI_DOUBLE_COMPLEX, 3205 & MASTER, TAG_SCHUR, 3206 & id%COMM, IERR ) 3207 ELSE ! MYID.EQ.MASTER 3208C Receive 3209 CALL MPI_RECV( id%REDRHS((I-1)*id%LREDRHS+1), 3210 & SIZE_SCHUR, 3211 & MPI_DOUBLE_COMPLEX, ID_SCHUR, TAG_SCHUR, 3212 & id%COMM, STATUS, IERR ) 3213 ENDIF 3214 ENDIF 3215 ENDDO 3216C ------------------------------ 3217C In case of parallel Schur, we 3218C free root%RHS_CNTR_MASTER_ROOT 3219C ------------------------------ 3220 IF (id%MYID.EQ.ID_SCHUR) THEN 3221 DEALLOCATE(id%root%RHS_CNTR_MASTER_ROOT) 3222 NULLIFY (id%root%RHS_CNTR_MASTER_ROOT) 3223 ENDIF 3224 ENDIF 3225C return because this is all we need to do 3226C in case of parallel Schur complement 3227 RETURN 3228 ENDIF 3229C ============================ 3230C Centralized Schur complement 3231C ============================ 3232C PTRAST has been freed at the moment of calling this 3233C routine. Schur is available through 3234C PTRFAC(IW( PTLUST_S( STEP(KEEP(20)) ) + 4 +KEEP(IXSZ) )) 3235 IF (id%KEEP(252).EQ.0) THEN 3236C CASE 1 (ORIGINAL CODE): 3237C Schur is contiguous on ID_SCHUR 3238 IF ( ID_SCHUR .EQ. MASTER ) THEN ! Necessarily equals id%MYID 3239C --------------------- 3240C Copy Schur complement 3241C --------------------- 3242 CALL ZMUMPS_COPYI8SIZE( SURFSCHUR8, 3243 & id%S(id%PTRFAC(id%STEP(id%KEEP(20)))), 3244 & id%SCHUR(1) ) 3245 ELSE 3246C ----------------------------------------- 3247C The processor responsible of the Schur 3248C complement sends it to the host processor 3249C ----------------------------------------- 3250 BL8=int(huge(BL4)/id%KEEP(35)/10,8) 3251 DO IB=1, int((SURFSCHUR8+BL8-1_8) / BL8) 3252 SHIFT8 = int(IB-1,8) * BL8 ! Where to send 3253 BL4 = int(min(BL8,SURFSCHUR8-SHIFT8)) ! Size of block 3254 IF ( id%MYID .eq. ID_SCHUR ) THEN 3255C Send Schur complement 3256 CALL MPI_SEND( id%S( SHIFT8 + 3257 & id%PTRFAC(id%IS(id%PTLUST_S(id%STEP(id%KEEP(20))) 3258 & +4+id%KEEP(IXSZ)))), 3259 & BL4, 3260 & MPI_DOUBLE_COMPLEX, 3261 & MASTER, TAG_SCHUR, 3262 & id%COMM, IERR ) 3263 ELSE IF ( id%MYID .eq. MASTER ) THEN 3264C Receive Schur complement 3265 CALL MPI_RECV( id%SCHUR(1_8 + SHIFT8), 3266 & BL4, 3267 & MPI_DOUBLE_COMPLEX, ID_SCHUR, TAG_SCHUR, 3268 & id%COMM, STATUS, IERR ) 3269 END IF 3270 ENDDO 3271 END IF 3272 ELSE 3273C CASE 2 (Fwd in facto): Schur is not contiguous on ID_SCHUR, 3274C process it row by row. 3275C 3276C 2.1: We first centralize Schur complement into id%SCHUR 3277 ISCHUR_SRC = id%PTRFAC(id%IS(id%PTLUST_S(id%STEP(id%KEEP(20))) 3278 & +4+id%KEEP(IXSZ))) 3279 ISCHUR_DEST= 1_8 3280 DO I=1, SIZE_SCHUR 3281 ROW_LENGTH = SIZE_SCHUR 3282 IF (ID_SCHUR.EQ.MASTER) THEN ! Necessarily = id%MYID 3283 CALL zcopy(ROW_LENGTH, id%S(ISCHUR_SRC), 1, 3284 & id%SCHUR(ISCHUR_DEST),1) 3285 ELSE 3286 IF (id%MYID.EQ.ID_SCHUR) THEN 3287C Send 3288 CALL MPI_SEND( id%S(ISCHUR_SRC), ROW_LENGTH, 3289 & MPI_DOUBLE_COMPLEX, 3290 & MASTER, TAG_SCHUR, 3291 & id%COMM, IERR ) 3292 ELSE 3293C Recv 3294 CALL MPI_RECV( id%SCHUR(ISCHUR_DEST), 3295 & ROW_LENGTH, 3296 & MPI_DOUBLE_COMPLEX, ID_SCHUR, TAG_SCHUR, 3297 & id%COMM, STATUS, IERR ) 3298 ENDIF 3299 ENDIF 3300 ISCHUR_SRC = ISCHUR_SRC+int(LD_SCHUR,8) 3301 ISCHUR_DEST= ISCHUR_DEST+int(SIZE_SCHUR,8) 3302 ENDDO 3303C 2.2: Get REDRHS on host 3304C 2.2.1: Symmetric => REDRHS is available in last KEEP(253) 3305C rows of Schur structure on ID_SCHUR 3306C 2.2.2: Unsymmetric => REDRHS corresponds to last KEEP(253) 3307C columns. However it must be transposed. 3308 IF (id%KEEP(221).EQ.1) THEN ! Implies Fwd in facto 3309 ISCHUR_SYM = id%PTRFAC(id%IS(id%PTLUST_S(id%STEP(id%KEEP(20))) 3310 & +4+id%KEEP(IXSZ))) + int(SIZE_SCHUR,8) * 3311 & int(LD_SCHUR,8) 3312 ISCHUR_UNS = 3313 & id%PTRFAC(id%IS(id%PTLUST_S(id%STEP(id%KEEP(20))) 3314 & +4+id%KEEP(IXSZ))) + int(SIZE_SCHUR,8) 3315 ISCHUR_DEST = 1_8 3316 DO I = 1, id%KEEP(253) 3317 IF (ID_SCHUR .EQ. MASTER) THEN ! necessarily = id%MYID 3318 IF (id%KEEP(50) .EQ. 0) THEN 3319 CALL zcopy(SIZE_SCHUR, id%S(ISCHUR_UNS), LD_SCHUR, 3320 & id%REDRHS(ISCHUR_DEST), 1) 3321 ELSE 3322 CALL zcopy(SIZE_SCHUR, id%S(ISCHUR_SYM), 1, 3323 & id%REDRHS(ISCHUR_DEST), 1) 3324 ENDIF 3325 ELSE 3326 IF (id%MYID .NE. MASTER) THEN 3327 IF (id%KEEP(50) .EQ. 0) THEN 3328C Use id%S(ISCHUR_SYM) as temporary contig. workspace 3329C of size SIZE_SCHUR. 3330 CALL zcopy(SIZE_SCHUR, id%S(ISCHUR_UNS), LD_SCHUR, 3331 & id%S(ISCHUR_SYM), 1) 3332 ENDIF 3333 CALL MPI_SEND(id%S(ISCHUR_SYM), SIZE_SCHUR, 3334 & MPI_DOUBLE_COMPLEX, MASTER, TAG_SCHUR, 3335 & id%COMM, IERR ) 3336 ELSE 3337 CALL MPI_RECV(id%REDRHS(ISCHUR_DEST), 3338 & SIZE_SCHUR, MPI_DOUBLE_COMPLEX, ID_SCHUR, TAG_SCHUR, 3339 & id%COMM, STATUS, IERR ) 3340 ENDIF 3341 ENDIF 3342 IF (id%KEEP(50).EQ.0) THEN 3343 ISCHUR_UNS = ISCHUR_UNS + int(LD_SCHUR,8) 3344 ELSE 3345 ISCHUR_SYM = ISCHUR_SYM + int(LD_SCHUR,8) 3346 ENDIF 3347 ISCHUR_DEST = ISCHUR_DEST + int(id%LREDRHS,8) 3348 ENDDO 3349 ENDIF 3350 ENDIF 3351 RETURN 3352 END SUBROUTINE ZMUMPS_EXTRACT_SCHUR_REDRHS 3353