1C 2C This file is part of MUMPS 5.1.2, released 3C on Mon Oct 2 07:37:01 UTC 2017 4C 5C 6C Copyright 1991-2017 CERFACS, CNRS, ENS Lyon, INP Toulouse, Inria, 7C University of Bordeaux. 8C 9C This version of MUMPS is provided to you free of charge. It is 10C released under the CeCILL-C license: 11C http://www.cecill.info/licences/Licence_CeCILL-C_V1-en.html 12C 13C 14 SUBROUTINE ZMUMPS_ANA_DRIVER(id) 15 USE ZMUMPS_LOAD 16 USE MUMPS_STATIC_MAPPING 17 USE ZMUMPS_STRUC_DEF 18 USE MUMPS_MEMORY_MOD 19 USE ZMUMPS_PARALLEL_ANALYSIS 20 USE ZMUMPS_ANA_LR 21 USE ZMUMPS_LR_CORE 22 USE ZMUMPS_LR_STATS 23 USE MUMPS_LR_COMMON 24 IMPLICIT NONE 25C 26 INCLUDE 'mpif.h' 27 INCLUDE 'mumps_tags.h' 28 INTEGER IERR, MASTER 29 PARAMETER( MASTER = 0 ) 30C 31C Purpose 32C ======= 33C 34C Performs analysis and (if required) Max-trans on the master, then 35C broadcasts information to the slaves. Also includes mapping. 36C 37C 38C Parameters 39C ========== 40C 41 TYPE(ZMUMPS_STRUC), TARGET :: id 42C 43C Local variables 44C =============== 45C 46C 47C Pointers inside integer array, various data 48 INTEGER IKEEP, FILS, FRERE, NFSIZ 49 INTEGER NE, NA 50 INTEGER I, allocok 51 INTEGER MAXIS1_CHECK 52C Other locals 53 INTEGER NB_NIV2, IDEST 54 INTEGER :: STATUS(MPI_STATUS_SIZE) 55 INTEGER LOCAL_M, LOCAL_N 56 INTEGER numroc 57 EXTERNAL numroc 58 INTEGER IRANK 59 INTEGER MP, LP, MPG 60 LOGICAL PROK, PROKG, LISTVAR_SCHUR_2BE_FREED, LPOK 61 INTEGER SIZE_SCHUR_PASSED 62 INTEGER SBUF_SEND, SBUF_REC, TOTAL_MBYTES 63 INTEGER(8) SBUF_RECOLD8, MIN_BUF_SIZE8 64 INTEGER MIN_BUF_SIZE 65 INTEGER(8) MAX_SIZE_FACTOR_TMP 66 INTEGER LEAF, INODE, ISTEP, INN, LPTRAR 67 INTEGER NBLEAF, NBROOT, MYROW_CHECK, INIV2 68C to store the size of the sequencial peak of stack 69C (or an estimation for not calling REORDER_TREE_N ) 70 DOUBLE PRECISION PEAK 71C 72C INTEGER WORKSPACE 73C 74 INTEGER, ALLOCATABLE, DIMENSION(:) :: PAR2_NODES 75 INTEGER, DIMENSION(:), ALLOCATABLE :: IWtemp 76 INTEGER, DIMENSION(:), ALLOCATABLE :: XNODEL, NODEL 77 INTEGER, DIMENSION(:), POINTER :: SSARBR 78C Element matrix entry 79 INTEGER, POINTER :: NELT, LELTVAR 80 INTEGER, DIMENSION(:), POINTER :: KEEP,INFO, INFOG 81 INTEGER(8), DIMENSION(:), POINTER :: KEEP8 82 INTEGER(8) :: ENTRIES_IN_FACTORS_LOC_MASTERS 83 DOUBLE PRECISION, DIMENSION(:), POINTER :: RINFO 84 DOUBLE PRECISION, DIMENSION(:), POINTER :: RINFOG 85 INTEGER, DIMENSION(:), POINTER :: ICNTL 86 LOGICAL I_AM_SLAVE, PERLU_ON, COND 87 INTEGER :: OOC_STAT 88 INTEGER MUMPS_TYPENODE, MUMPS_PROCNODE 89 EXTERNAL MUMPS_TYPENODE, MUMPS_PROCNODE 90 INTEGER K,J, IFS 91 INTEGER SIZE_TEMP_MEM,SIZE_DEPTH_FIRST,SIZE_COST_TRAV 92 LOGICAL IS_BUILD_LOAD_MEM_CALLED 93 DOUBLE PRECISION, DIMENSION (:,:), ALLOCATABLE :: TEMP_MEM 94 INTEGER, DIMENSION (:,:), ALLOCATABLE :: TEMP_ROOT 95 INTEGER, DIMENSION (:,:), ALLOCATABLE :: TEMP_LEAF 96 INTEGER, DIMENSION (:,:), ALLOCATABLE :: TEMP_SIZE 97 INTEGER, DIMENSION (:), ALLOCATABLE :: DEPTH_FIRST 98 INTEGER, DIMENSION (:), ALLOCATABLE :: DEPTH_FIRST_SEQ 99 INTEGER, DIMENSION (:), ALLOCATABLE :: SBTR_ID 100 DOUBLE PRECISION, DIMENSION (:), ALLOCATABLE :: COST_TRAV_TMP 101 INTEGER(8) :: TOTAL_BYTES 102 INTEGER, POINTER, DIMENSION(:) :: WORK1PTR, WORK2PTR, 103 & NFSIZPTR, 104 & FILSPTR, 105 & FREREPTR 106 ! Used because of multithreaded SIM_NP_ 107 INTEGER :: locMYID, locMYID_NODES 108 LOGICAL, POINTER :: locI_AM_CAND(:) 109 INTEGER(kind=8) :: N8, NZ8, LIW8 110 INTEGER :: LIW_ELT 111C 112C Beginning of executable statements 113C 114 IS_BUILD_LOAD_MEM_CALLED=.FALSE. 115 KEEP => id%KEEP 116 KEEP8 => id%KEEP8 117 INFO => id%INFO 118 RINFO => id%RINFO 119 INFOG => id%INFOG 120 RINFOG => id%RINFOG 121 ICNTL => id%ICNTL 122 NELT => id%NELT 123 LELTVAR => id%LELTVAR 124 KEEP8(24) = 0_8 ! reinitialize last used size of WK_USER 125 KEEP(264) = 0 ! reinitialise out-of-range status (0=yes) 126 KEEP(265) = 0 ! reinitialise dupplicates (0=yes) 127C ------------------------------------- 128C Depending on the type of parallelism, 129C the master can now (soon) potentially 130C have the role of a slave 131C ------------------------------------- 132 I_AM_SLAVE = ( id%MYID .ne. MASTER .OR. 133 & ( id%MYID .eq. MASTER .AND. 134 & id%KEEP(46) .eq. 1 ) ) 135 LP = ICNTL( 1 ) 136 MP = ICNTL( 2 ) 137 MPG = ICNTL( 3 ) 138C LP : errors 139C MP : INFO 140 LPOK = ((LP.GT.0).AND.(id%ICNTL(4).GE.1)) 141 PROK = (( MP .GT. 0 ).AND.(ICNTL(4).GE.2)) 142 PROKG = ( MPG .GT. 0 .and. id%MYID .eq. MASTER ) 143 PROKG = (PROKG.AND.(ICNTL(4).GE.2)) 144 IF ( PROK ) THEN 145 IF ( KEEP(50) .eq. 0 ) THEN 146 WRITE(MP, '(A)') 'L U Solver for unsymmetric matrices' 147 ELSE IF ( KEEP(50) .eq. 1 ) THEN 148 WRITE(MP, '(A)') 149 & 'L D L^T Solver for symmetric positive definite matrices' 150 ELSE 151 WRITE(MP, '(A)') 152 & 'L D L^T Solver for general symmetric matrices' 153 END IF 154 IF ( KEEP(46) .eq. 1 ) THEN 155 WRITE(MP, '(A)') 'Type of parallelism: Working host' 156 ELSE 157 WRITE(MP, '(A)') 'Type of parallelism: Host not working' 158 END IF 159 END IF 160 IF ( PROKG .AND. (MP.NE.MPG)) THEN 161 IF ( KEEP(50) .eq. 0 ) THEN 162 WRITE(MPG, '(A)') 'L U Solver for unsymmetric matrices' 163 ELSE IF ( KEEP(50) .eq. 1 ) THEN 164 WRITE(MPG, '(A)') 165 & 'L D L^T Solver for symmetric positive definite matrices' 166 ELSE 167 WRITE(MPG, '(A)') 168 & 'L D L^T Solver for general symmetric matrices' 169 END IF 170 IF ( KEEP(46) .eq. 1 ) THEN 171 WRITE(MPG, '(A)') 'Type of parallelism: Working host' 172 ELSE 173 WRITE(MPG, '(A)') 'Type of parallelism: Host not working' 174 END IF 175 END IF 176 IF (PROK) WRITE( MP, 110 ) 177 IF (PROKG .AND. (MPG.NE.MP)) WRITE( MPG, 110 ) 178C 179C BEGIN CASE OF ALLOCATED DATA FROM PREVIOUS CALLS 180C ---------------------------------------- 181C Free some memory from factorization, 182C if allocated, at least large arrays. 183C ---------------------------------------- 184 IF (id%KEEP8(24).EQ.0_8) THEN 185C -- deallocate only when not provided/allocated by the user 186 IF (associated(id%S)) DEALLOCATE(id%S) 187 ENDIF 188 NULLIFY(id%S) 189 IF (associated(id%IS)) THEN 190 DEALLOCATE(id%IS) 191 NULLIFY(id%IS) 192 ENDIF 193 IF (associated(id%root%RG2L_ROW))THEN 194 DEALLOCATE(id%root%RG2L_ROW) 195 NULLIFY(id%root%RG2L_ROW) 196 ENDIF 197 IF (associated(id%root%RG2L_COL))THEN 198 DEALLOCATE(id%root%RG2L_COL) 199 NULLIFY(id%root%RG2L_COL) 200 ENDIF 201 IF (associated(id%PTRFAC)) THEN 202 DEALLOCATE(id%PTRFAC) 203 NULLIFY(id%PTRFAC) 204 END IF 205 IF (associated(id%RHSCOMP)) THEN 206 DEALLOCATE(id%RHSCOMP) 207 NULLIFY(id%RHSCOMP) 208 id%KEEP8(25)=0_8 209 ENDIF 210 IF (associated(id%POSINRHSCOMP_ROW)) THEN 211 DEALLOCATE(id%POSINRHSCOMP_ROW) 212 NULLIFY(id%POSINRHSCOMP_ROW) 213 ENDIF 214 IF (id%POSINRHSCOMP_COL_ALLOC) THEN 215 DEALLOCATE(id%POSINRHSCOMP_COL) 216 NULLIFY(id%POSINRHSCOMP_COL) 217 id%POSINRHSCOMP_COL_ALLOC = .FALSE. 218 ENDIF 219C -------------------------------------------- 220C If analysis redone, suppress old, 221C meaningless, Step2node array. 222C This is necessary since we could otherwise 223C end up having a wrong Step2node during solve 224C -------------------------------------------- 225 IF (associated(id%Step2node)) THEN 226 DEALLOCATE(id%Step2node) 227 NULLIFY(id%Step2node) 228 ENDIF 229C END CASE OF ALLOCATED DATA FROM PREVIOUS CALLS 230C 231C Decode API (ICNTL parameters, mainly) 232C and check consistency of the KEEP array. 233C Note: ZMUMPS_ANA_CHECK_KEEP also sets 234C some INFOG parameters 235 CALL ZMUMPS_ANA_CHECK_KEEP(id) 236C 237 CALL MUMPS_PROPINFO( ICNTL(1), INFO(1), 238 & id%COMM, id%MYID ) 239 IF ( INFO(1) .LT. 0 ) RETURN 240C ------------------------------------------- 241C Broadcast KEEP(60) since we need to broadcast 242C related information 243C ------------------------------------------ 244 CALL MPI_BCAST( KEEP(60), 1, MPI_INTEGER, MASTER, id%COMM, IERR ) 245 IF (id%KEEP(60) .EQ. 2 .or. id%KEEP(60). EQ. 3) THEN 246 CALL MPI_BCAST( id%NPROW, 1, 247 & MPI_INTEGER, MASTER, id%COMM, IERR ) 248 CALL MPI_BCAST( id%NPCOL, 1, 249 & MPI_INTEGER, MASTER, id%COMM, IERR ) 250 CALL MPI_BCAST( id%MBLOCK, 1, 251 & MPI_INTEGER, MASTER, id%COMM, IERR ) 252 CALL MPI_BCAST( id%NBLOCK, 1, 253 & MPI_INTEGER, MASTER, id%COMM, IERR ) 254C Note that ZMUMPS_INIT_ROOT_ANA will 255C then use that information. 256 ENDIF 257 CALL MUMPS_PROPINFO( ICNTL(1), INFO(1), 258 & id%COMM, id%MYID ) 259 IF ( INFO(1) .LT. 0 ) RETURN 260C ---------------------------------------------- 261C Broadcast KEEP(54) now to know if the 262C structure of the graph is intially distributed 263C and should be assembled on the master 264C Broadcast KEEP(55) now to know if the 265C matrix is in assembled or elemental format 266C ---------------------------------------------- 267 CALL MPI_BCAST( KEEP(54), 2, MPI_INTEGER, MASTER, id%COMM, IERR ) 268C ---------------------------------------------- 269C Broadcast KEEP(69) now to know if 270C we will need to communicate during analysis 271C ---------------------------------------------- 272 CALL MPI_BCAST( KEEP(69), 1, MPI_INTEGER, MASTER, id%COMM, IERR ) 273C ---------------------------------------------- 274C Broadcast Out of core strategy (used only on master so far) 275C ---------------------------------------------- 276 CALL MPI_BCAST( KEEP(201), 1, MPI_INTEGER, MASTER, id%COMM, IERR ) 277C ---------------------------------------------- 278C Broadcast analysis strategy (used only on master so far) 279C ---------------------------------------------- 280 CALL MPI_BCAST( KEEP(244), 1, MPI_INTEGER, MASTER, id%COMM, IERR ) 281C --------------------------- 282C Fwd in facto 283C Broadcast KEEP(251,252,253) defined on master so far 284 CALL MPI_BCAST( KEEP(251), 3, MPI_INTEGER,MASTER,id%COMM,IERR) 285C ---------------------------------------------- 286C Broadcast N 287C ---------------------------------------------- 288 CALL MPI_BCAST( id%N, 1, MPI_INTEGER, MASTER, id%COMM, IERR ) 289C ---------------------------------------------- 290C Broadcast NZ for assembled entry 291C ---------------------------------------------- 292 IF ( KEEP(55) .EQ. 0) THEN 293 IF ( KEEP(54) .eq. 3 ) THEN 294C Compute total number of non-zeros 295 CALL MPI_ALLREDUCE( id%KEEP8(29), id%KEEP8(28), 1, 296 & MPI_INTEGER8, 297 & MPI_SUM, id%COMM, IERR ) 298 ELSE 299C Broadcast NZ from the master node 300 CALL MPI_BCAST( id%KEEP8(28), 1, MPI_INTEGER8, MASTER, 301 & id%COMM, IERR ) 302 END IF 303 ELSE 304C Broadcast NA_ELT <=> KEEP8(30) for elemental entry 305 CALL MPI_BCAST( id%KEEP8(30), 1, MPI_INTEGER8, MASTER, 306 & id%COMM, IERR ) 307 ENDIF 308 IF ( associated(id%MEM_DIST) ) deallocate( id%MEM_DIST ) 309 allocate( id%MEM_DIST( 0:id%NSLAVES-1 ), STAT=IERR ) 310 IF ( IERR .GT. 0 ) THEN 311 INFO(1) = -7 312 INFO(2) = id%NSLAVES 313 IF ( LPOK ) THEN 314 WRITE(LP, 150) 'MEM_DIST' 315 END IF 316 END IF 317 CALL MUMPS_PROPINFO( ICNTL(1), INFO(1), 318 & id%COMM, id%MYID ) 319 IF ( INFO(1) .LT. 0 ) RETURN 320 id%MEM_DIST(0:id%NSLAVES-1) = 0 321 CALL MUMPS_INIT_ARCH_PARAMETERS( 322 & id%COMM,id%COMM_NODES,KEEP(69),KEEP(46), 323 & id%NSLAVES,id%MEM_DIST,INFO) 324C ======================== 325C Write problem to a file, 326C if requested by the user 327C ======================== 328 CALL ZMUMPS_DUMP_PROBLEM(id) 329C 330C ==================================================== 331C TEST FOR SEQUENTIAL OR PARALLEL ANALYSIS (KEEP(244)) 332C ==================================================== 333 IF (KEEP(244) .EQ. 1) THEN 334C Sequential analysis 335 IF ( KEEP(54) .eq. 3 ) THEN 336C ----------------------------------------------- 337C Collect on the host -- if matrix is distributed 338C at analysis -- all integer information. 339C ----------------------------------------------- 340 CALL ZMUMPS_GATHER_MATRIX(id) 341 CALL MUMPS_PROPINFO( ICNTL(1), INFO(1), 342 & id%COMM, id%MYID ) 343 IF ( INFO(1) .LT. 0 ) RETURN 344 END IF 345C ************************************************ 346C BEGINNING OF MASTER CODE FOR SEQUENTIAL ANALYSIS 347C ************************************************ 348 IF ( id%MYID .eq. MASTER ) THEN 349C Prepare arguments for call to ZMUMPS_ANA_F and 350C ZMUMPS_ANA_F_ELT in case id%SCHUR was not allocated 351C by user. The objective is to avoid passing a null 352C pointer. Done before 1234 label in order to avoid 353C two allocations of size 1 and a memory leak in case 354C there are two passes (see 1234 label below and 355C "GOTO 1234" statement) 356 IF ( .NOT. associated( id%LISTVAR_SCHUR ) ) THEN 357 SIZE_SCHUR_PASSED = 1 358 LISTVAR_SCHUR_2BE_FREED=.TRUE. 359 allocate( id%LISTVAR_SCHUR( 1 ), STAT=allocok ) 360 IF ( allocok .GT. 0 ) THEN 361 WRITE(*,*) 362 & 'PB allocating an array of size 1 in Schur ' 363 CALL MUMPS_ABORT() 364 END IF 365 ELSE 366 SIZE_SCHUR_PASSED=id%SIZE_SCHUR 367 LISTVAR_SCHUR_2BE_FREED = .FALSE. 368 END IF 369 1234 CONTINUE 370 IF ( ( (KEEP(23) .NE. 0) .AND. 371 & ( (KEEP(23).NE.7) .OR. KEEP(50).EQ. 2 ) ) 372 & .OR. 373 & ( associated(id%A) .AND. KEEP(52) .EQ. 77 .AND. 374 & (KEEP(50).EQ.2)) 375 & .OR. 376 & KEEP(52) .EQ. -2 ) THEN 377C MAXIMUM TRANSVERSAL ALGORITHM called on original matrix. 378C KEEP(23) = 7 means that automatic choice 379C of max trans value will be done during Analysis. 380C We compute a permutation of the original matrix to have zero free diagonal 381C the col. Permutation is held in IS1(1, ...,N). 382C Max-trans (ZMUMPS_ANA_O) is not used for element entry. 383 IF (.not.associated(id%A)) THEN 384C -- If maxtrans is required and A not allocated then reset 385C -- it to structural based maxtrans. 386 IF (KEEP(23).GT.2) KEEP(23) = 1 387 ENDIF 388 CALL ZMUMPS_ANA_O(id%N, id%KEEP8(28), KEEP(23), 389 & id%IS1(1), id, 390 & ICNTL(1), INFO(1)) 391 IF (INFO(1) .LT. 0) THEN 392C ----------- 393C Fatal error 394C ----------- 395C Permutation was not computed; reset keep(23) 396 KEEP(23) = 0 397 GOTO 10 398 END IF 399 END IF 400C END OF MAX-TRANS ON THE MASTER 401C 402C ********************************************************** 403C 404C BEGINNING OF ANALYSIS, STILL ON THE MASTER 405C 406C Set up subdivisions of arrays for analysis 407C 408C ------------------------------------------------------ 409C Define the size of a working array 410C that will be used as workspace ZMUMPS_ANA_F. 411C For element entry (KEEP(55).ne.0), we do not know NZ, 412C and so the whole allocation of IW cannot be done at this 413C point and more workspace is declared/allocated/used 414C inside ZMUMPS_ANA_F_ELT. 415C ------------------------------------------------------ 416C 417 N8=int(id%N,8) 418 IF (KEEP(55) .EQ. 0) THEN 419C ---------------- 420C Assembled format 421C ---------------- 422 NZ8=int(id%KEEP8(28),8) 423 IF ( KEEP(256) .EQ. 1 ) THEN ! KEEP(256) <-- ICNTL(7) 424 LIW8 = 2_8 * NZ8 + N8 + 1_8 425 ELSE 426 LIW8 = 2_8 * NZ8 + N8 + 1_8 427 ENDIF 428 ELSE 429C ---------------- 430C Elemental format 431C ---------------- 432C Only available for AMD, METIS, and given ordering 433#if defined(metis) || defined(parmetis) || defined(metis4) || defined(parmetis3) 434 COND = (KEEP(60) .NE. 0) .OR. (KEEP(256) .EQ. 5) 435#else 436 COND = (KEEP(60) .NE. 0) 437#endif 438 IF( COND ) THEN 439C 440C 441C we suppress supervariable detection when Schur 442C is active or when METIS is applied 443C Workspaces for FLAG(N), and either LEN(N) or some pointers(N+1) 444 LIW_ELT = id%N + id%N + 1 445 ELSE 446C Spaces FLAG(N), LEN(N), N+3, SVAR(0:N), 447 LIW_ELT = id%N + id%N + id%N + 3 + id%N + 1 448 ENDIF 449C 450 ENDIF 451C We must ensure that an array of order 452C 3*N is available for ZMUMPS_ANA_LNEW 453 IF (KEEP(55) .EQ. 0) THEN 454 IF (LIW8.LT.3_8*N8) LIW8 = 3_8*N8 455 ELSE 456 IF (LIW_ELT.LT.3*id%N) LIW_ELT = 3*id%N 457 ENDIF 458 IF (KEEP(23) .NE. 0) THEN 459 IKEEP = id%N + 1 460 ELSE 461 IKEEP = 1 462 END IF 463 NA = IKEEP + id%N 464 NE = IKEEP + 2 * id%N 465 FILS = IKEEP + 3 * id%N 466 FRERE = FILS + id%N 467 NFSIZ = FRERE + id%N 468 MAXIS1_CHECK = NFSIZ + id%N - 1 469C 470C ANALYSIS PHASE 471C Some workspace of ZMUMPS_ANA_F can be reused in subsequent phases. 472C IS(IKEEP) OF LENGTH 3*N 473C IS(NFSIZ) OF LENGTH N holds the frontal matrix sizes 474C IS(FILS) and IS(FRERE) OF LENGTH N holds the assembly tree 475C 476 IF ( KEEP(256) .EQ. 1 ) THEN 477C Note that id%PERM_IN has been checked before. 478 DO I = 1, id%N 479 id%IS1( IKEEP + I - 1 ) = id%PERM_IN( I ) 480 END DO 481 END IF 482 INFOG(1) = 0 483 INFOG(2) = 0 484C Initialize structural symmetry value to not yet computed. 485 INFOG(8) = -1 486 IF (KEEP(55) .EQ. 0) THEN 487 CALL ZMUMPS_ANA_F(id%N, id%KEEP8(28), 488 & id%IRN(1), id%JCN(1), 489 & LIW8, id%IS1(IKEEP), 490 & KEEP(256), id%IS1(NFSIZ), 491 & id%IS1(FILS), id%IS1(FRERE), 492 & id%LISTVAR_SCHUR(1), SIZE_SCHUR_PASSED, 493 & ICNTL(1), INFOG(1), KEEP(1),KEEP8(1),id%NSLAVES, 494 & id%IS1(1),id) 495 IF ( (KEEP(23).LE.-1).AND.(KEEP(23).GE.-6) ) THEN 496C -- Perform max trans 497 KEEP(23) = -KEEP(23) 498 IF (.NOT. associated(id%A)) KEEP(23) = 1 499 GOTO 1234 500 ENDIF 501 INFOG(7) = KEEP(256) 502 ELSE 503 allocate( XNODEL ( id%N+1 ), stat = IERR ) 504 IF ( IERR .GT. 0 ) THEN 505 INFO( 1 ) = -7 506 INFO( 2 ) = id%N + 1 507 IF ( LPOK ) THEN 508 WRITE(LP, 150) 'XNODEL' 509 END IF 510 GOTO 10 511 ENDIF 512 IF (LELTVAR.ne.id%ELTPTR(NELT+1)-1) THEN 513C -- internal error 514 INFO(1) = -2002 515 INFO(2) = id%ELTPTR(NELT+1)-1 516 GOTO 10 517 ENDIF 518 allocate( NODEL ( LELTVAR ), stat = IERR ) 519 IF ( IERR .GT. 0 ) THEN 520 INFO( 1 ) = -7 521 INFO( 2 ) = LELTVAR 522 IF ( LPOK ) THEN 523 WRITE(LP, 150) 'NODEL' 524 END IF 525 GOTO 10 526 ENDIF 527 CALL ZMUMPS_ANA_F_ELT(id%N, NELT, 528 & id%ELTPTR(1), id%ELTVAR(1), LIW_ELT, 529 & id%IS1(IKEEP), 530 & KEEP(256), id%IS1(NFSIZ), id%IS1(FILS), 531 & id%IS1(FRERE), id%LISTVAR_SCHUR(1), 532 & SIZE_SCHUR_PASSED, 533 & ICNTL(1), INFOG(1), KEEP(1),KEEP8(1), 534 & id%NSLAVES, 535 & XNODEL(1), NODEL(1)) 536 INFOG(7)=KEEP(256) 537C 538C XNODEL and NODEL as output to ZMUMPS_ANA_F_ELT 539C be used in ZMUMPS_FRTELT and thus 540C cannot be deallocated at this point 541C 542 ENDIF 543 IF ( LISTVAR_SCHUR_2BE_FREED ) THEN 544C We do not want to have LISTVAR_SCHUR 545C allocated of size 1 if Schur is off. 546 deallocate( id%LISTVAR_SCHUR ) 547 NULLIFY ( id%LISTVAR_SCHUR ) 548 ENDIF 549C ------------------------------ 550C Significant error codes should 551C always be in INFO(1/2) 552C ------------------------------ 553 INFO(1)=INFOG(1) 554 INFO(2)=INFOG(2) 555C save statistics in KEEP array. 556 KEEP(28) = INFOG(6) 557C Check error during ZMUMPS_ANA_F OR ZMUMPS_ANA_F_ELT 558 IF ( INFO(1) .LT. 0 ) THEN 559 GO TO 10 560 ENDIF 561 ENDIF 562 ELSE 563C Parallel analysis 564 IKEEP = 1 565 NA = IKEEP + id%N 566 NE = IKEEP + 2 * id%N 567 FILS = IKEEP + 3 * id%N 568 FRERE = FILS + id%N 569 NFSIZ = FRERE + id%N 570 IF (id%MYID .EQ. MASTER) THEN 571C this correspond to the old PTRAR part of IS1 572C WORK2PTR => id%IS1(PTRAR : PTRAR + 4*id%N-1) 573 ALLOCATE(WORK2PTR(4*id%N), stat=IERR) 574 ELSE 575C Because our purpose is to minimize the peak memory consumption, 576C we can afford to allocate on processes other than host 577 ALLOCATE(WORK1PTR(3*id%N),WORK2PTR(4*id%N), stat=IERR ) 578 ENDIF 579 IF (IERR.GT.0) THEN 580 INFO(1) = -7 581 IF (id%MYID .EQ. MASTER) THEN 582 INFO( 2 ) = 4*id%N 583 ELSE 584 INFO( 2 ) = 7*id%N 585 ENDIF 586 ENDIF 587 CALL MUMPS_PROPINFO( ICNTL(1), INFO(1), id%COMM, id%MYID ) 588 IF ( INFO(1) < 0 ) RETURN 589 IF(id%MYID .EQ. MASTER) THEN 590 WORK1PTR => id%IS1(IKEEP : IKEEP + 3*id%N-1) 591 NFSIZPTR => id%IS1(NFSIZ : NFSIZ + id%N-1) 592 FILSPTR => id%IS1(FILS : FILS + id%N-1) 593 FREREPTR => id%IS1(FRERE : FRERE + id%N-1) 594 END IF 595 CALL ZMUMPS_ANA_F_PAR(id, 596 & WORK1PTR, 597 & WORK2PTR, 598 & NFSIZPTR, 599 & FILSPTR, 600 & FREREPTR) 601 DEALLOCATE(WORK2PTR) 602 IF(id%MYID .EQ. 0) THEN 603 NULLIFY(WORK1PTR, NFSIZPTR) 604 NULLIFY(FILSPTR, FREREPTR) 605 ELSE 606 DEALLOCATE(WORK1PTR) 607 END IF 608 KEEP(28) = INFOG(6) 609 END IF 610 10 CONTINUE 611 CALL MUMPS_PROPINFO( ICNTL(1), INFO(1), id%COMM, id%MYID ) 612 IF ( INFO(1) < 0 ) RETURN 613 IF(id%MYID .EQ. MASTER) THEN 614C Save ICNTL(14) value into KEEP(12) 615 CALL MUMPS_GET_PERLU(KEEP(12),ICNTL(14), 616 & KEEP(50),KEEP(54),ICNTL(6),KEEP(52)) 617 CALL ZMUMPS_ANA_R(id%N, id%IS1(FILS), id%IS1(FRERE), 618 & id%IS1(NE), id%IS1(NA)) 619C ********************************************************** 620C Continue with CALL to MAPPING routine 621C ********************* 622C BEGIN SEQUENTIAL CODE 623C No mapping computed 624C ********************* 625C 626C In sequential, if no special root 627C reset KEEP(20) and KEEP(38) to 0 628C 629 IF (id%NSLAVES .EQ. 1) THEN 630 id%NBSA = 0 631 IF ( (id%KEEP(60).EQ.0). 632 & AND.(id%KEEP(53).EQ.0)) THEN 633C If Schur is on (keep(60).ne.0) 634C or if RR is on (keep (53) > 0 635C then we keep root numbers 636 id%KEEP(20)=0 637 id%KEEP(38)=0 638 ENDIF 639C No type 2 nodes: 640 id%KEEP(56)=0 641C 642 id%PROCNODE = 0 643C It may also happen that KEEP(38) has already been set, 644C in the case of a distributed Schur complement (KEEP(60)=2 or 3). 645C In that case, PROCNODE should be set accordingly and KEEP(38) is 646C not modified. 647 IF (id%KEEP(60) .EQ. 2 .OR. id%KEEP(60).EQ.3) THEN 648 CALL ZMUMPS_SET_PROCNODE(id%KEEP(38), id%PROCNODE(1), 649 & 1+2*id%NSLAVES, id%IS1(FILS),id%N) 650 ENDIF 651C ******************* 652C END SEQUENTIAL CODE 653C ******************* 654 ELSE 655C ***************************** 656C BEGIN MAPPING WITH CANDIDATES 657C (NSLAVES > 1) 658C ***************************** 659C 660C 661C peak is set by default to 1 largest front + One largest CB 662 PEAK = dble(id%INFOG(5))*dble(id%INFOG(5)) + ! front matrix 663 & dble(id%KEEP(2))*dble(id%KEEP(2)) ! cb bloc 664C IKEEP(1:N,1) can be used as a work space since it is set 665C to its final state by the SORT_PERM subroutine below. 666 SSARBR => id%IS1(IKEEP:IKEEP+id%N-1) 667C Map nodes and assign candidates for dynamic scheduling 668 CALL ZMUMPS_DIST_AVOID_COPIES(id%N,id%NSLAVES,ICNTL(1), 669 & INFOG(1), 670 & id%IS1(NE), 671 & id%IS1(NFSIZ), 672 & id%IS1(FRERE), 673 & id%IS1(FILS), 674 & KEEP(1),KEEP8(1),id%PROCNODE(1), 675 & SSARBR(1),id%NBSA,PEAK,IERR 676 & ) 677 NULLIFY(SSARBR) 678 if(IERR.eq.-999) then 679 write(6,*) ' Internal error during static mapping ' 680 INFO(1) = IERR 681 GOTO 11 682 ENDIF 683 IF(IERR.NE.0) THEN 684 INFO(1) = -135 685 INFO(2) = IERR 686 GOTO 11 687 ENDIF 688 CALL ZMUMPS_ANA_R(id%N, id%IS1(FILS), 689 & id%IS1(FRERE), id%IS1(NE), 690 & id%IS1(NA)) 691 ENDIF 692 11 CONTINUE 693 ENDIF 694 CALL MUMPS_PROPINFO( ICNTL(1), INFO(1), id%COMM, id%MYID ) 695 IF ( INFO(1) < 0 ) RETURN 696C The following part is done in parallel 697 CALL MPI_BCAST( id%NELT, 1, MPI_INTEGER, MASTER, 698 & id%COMM, IERR ) 699 IF (KEEP(55) .EQ. 0) THEN 700C Assembled matrix format. Fill up the id%PTRAR array 701C Broadcast id%SYM_PERM needed to fill up id%PTRAR 702C postpone to after computation of id%SYM_PERM 703C computed after id%DAD_STEPS 704 if (associated(id%FRTPTR)) DEALLOCATE(id%FRTPTR) 705 if (associated(id%FRTELT)) DEALLOCATE(id%FRTELT) 706 allocate( id%FRTPTR(1), id%FRTELT(1) ) 707 ELSE 708C Element Entry: 709C ------------------------------- 710C COMPUTE THE LIST OF ELEMENTS THAT WILL BE ASSEMBLED 711C AT EACH NODE OF THE ELIMINATION TREE. ALSO COMPUTE 712C FOR EACH ELEMENT THE TREE NODE TO WHICH IT IS ASSIGNED. 713C 714C FRTPTR is an INTEGER array of length N+1 which need not be set by 715C the user. On output, FRTPTR(I) points in FRTELT to first element 716C in the list of elements assigned to node I in the elimination tree. 717C 718C FRTELT is an INTEGER array of length NELT which need not be set by 719C the user. On output, positions FRTELT(FRTPTR(I)) to 720C FRTELT(FRTPTR(I+1)-1) contain the list of elements assigned to 721C node I in the elimination tree. 722C 723 LPTRAR = id%NELT+id%NELT+2 724 CALL MUMPS_I8REALLOC(id%PTRAR, LPTRAR, id%INFO, LP, 725 & FORCE=.TRUE., STRING='id%PTRAR (Analysis)', ERRCODE=-7) 726 CALL MUMPS_REALLOC(id%FRTPTR, id%N+1, id%INFO, LP, 727 & FORCE=.TRUE., STRING='id%FRTPTR (Analysis)', ERRCODE=-7) 728 CALL MUMPS_REALLOC(id%FRTELT, id%NELT, id%INFO, LP, 729 & FORCE=.TRUE., STRING='id%FRTELT (Analysis)', ERRCODE=-7) 730 CALL MUMPS_PROPINFO( ICNTL(1), INFO(1), id%COMM, id%MYID ) 731 IF ( INFO(1) < 0 ) RETURN 732 IF(id%MYID .EQ. MASTER) THEN 733C In the elemental format case, PTRAR&friends are still 734C computed sequentially and then broadcasted 735 CALL ZMUMPS_FRTELT( 736 & id%N, NELT, id%ELTPTR(NELT+1)-1, id%IS1(FRERE), 737 & id%IS1(FILS), 738 & id%IS1(IKEEP+id%N), id%IS1(IKEEP+2*id%N), XNODEL, 739 & NODEL, id%FRTPTR(1), id%FRTELT(1), id%ELTPROC(1)) 740 DO I=1, id%NELT+1 741C PTRAR declared 64-bit 742 id%PTRAR(id%NELT+I+1)=int(id%ELTPTR(I),8) 743 ENDDO 744 deallocate(XNODEL) 745 deallocate(NODEL) 746 END IF 747 CALL MPI_BCAST( id%PTRAR(id%NELT+2), id%NELT+1, MPI_INTEGER8, 748 & MASTER, id%COMM, IERR ) 749 CALL MPI_BCAST( id%FRTPTR(1), id%N+1, MPI_INTEGER, 750 & MASTER, id%COMM, IERR ) 751 CALL MPI_BCAST( id%FRTELT(1), id%NELT, MPI_INTEGER, 752 & MASTER, id%COMM, IERR ) 753 ENDIF 754C We switch again to sequential computations on the master node 755 IF(id%MYID .EQ. MASTER) THEN 756 IF ( INFO( 1 ) .LT. 0 ) GOTO 12 757 IF ( KEEP(55) .ne. 0 ) THEN 758C --------------------------------------- 759C Build ELTPROC: correspondance between elements and slave numbers. 760C This is used later in the initial elemental 761C matrix distribution at the beginning of the factorisation phase 762C --------------------------------------- 763 CALL ZMUMPS_ELTPROC(id%N, NELT, id%ELTPROC(1),id%NSLAVES, 764 & id%PROCNODE(1)) 765 END IF 766 NB_NIV2 = KEEP(56) 767 IF ( NB_NIV2.GT.0 ) THEN 768C 769 allocate(PAR2_NODES(NB_NIV2), 770 & STAT=allocok) 771 IF (allocok .GT.0) then 772 INFO(1)= -7 773 INFO(2)= NB_NIV2 774 IF ( LPOK ) THEN 775 WRITE(LP, 150) 'PAR2_NODES' 776 END IF 777 GOTO 12 778 END IF 779 ENDIF 780 IF ((NB_NIV2.GT.0) .AND. (KEEP(24).EQ.0)) THEN 781 INIV2 = 0 782 DO 777 INODE = 1, id%N 783 IF ( ( id%IS1(FRERE+INODE-1) .NE. id%N+1 ) .AND. 784 & ( MUMPS_TYPENODE(id%PROCNODE(INODE),id%NSLAVES) 785 & .eq. 2) ) THEN 786 INIV2 = INIV2 + 1 787 PAR2_NODES(INIV2) = INODE 788 END IF 789 777 CONTINUE 790 IF ( INIV2 .NE. NB_NIV2 ) THEN 791 WRITE(*,*) "Internal Error 2 in ZMUMPS_ANA_DRIVER", 792 & INIV2, NB_NIV2 793 CALL MUMPS_ABORT() 794 ENDIF 795 ENDIF 796 IF ( (KEEP(24) .NE. 0) .AND. (NB_NIV2.GT.0) ) THEN 797C allocate array to store cadidates stategy 798C for each level two nodes 799 IF ( associated(id%CANDIDATES)) deallocate(id%CANDIDATES) 800 allocate( id%CANDIDATES(id%NSLAVES+1,NB_NIV2), 801 & stat=allocok) 802 if (allocok .gt.0) then 803 INFO(1)= -7 804 INFO(2)= NB_NIV2*(id%NSLAVES+1) 805 IF ( LPOK ) THEN 806 WRITE(LP, 150) 'CANDIDATES' 807 END IF 808 GOTO 12 809 END IF 810 CALL MUMPS_RETURN_CANDIDATES 811 & (PAR2_NODES,id%CANDIDATES,IERR) 812 IF(IERR.NE.0) THEN 813 INFO(1) = -2002 814 GOTO 12 815 ENDIF 816C deallocation of variables of module mumps_static_mapping 817 CALL MUMPS_END_ARCH_CV() 818 IF(IERR.NE.0) THEN 819 INFO(1) = -2002 820 GOTO 12 821 ENDIF 822 ELSE 823 IF (associated(id%CANDIDATES)) DEALLOCATE(id%CANDIDATES) 824 allocate(id%CANDIDATES(1,1), stat=allocok) 825 IF (allocok .NE. 0) THEN 826 INFO(1)= -7 827 INFO(2)= 1 828 IF ( LPOK ) THEN 829 WRITE(LP, 150) 'CANDIDATES' 830 END IF 831 GOTO 12 832 ENDIF 833 ENDIF 834C******************************************************************* 835C --------------- 836 12 CONTINUE 837C --------------- 838* 839* =============================== 840* End of analysis phase on master 841* =============================== 842* 843! blocking factor for multiple RHS for ana_distm 844 KEEP(84) = ICNTL(27) 845 END IF 846 CALL MUMPS_PROPINFO( ICNTL(1), INFO(1), id%COMM, id%MYID ) 847 IF ( INFO(1) < 0 ) RETURN 848C 849C We now allocate and compute arrays in NSTEPS 850C on the master, as this makes more sense. 851C 852C ============================== 853C PREPARE DATA FOR FACTORIZATION 854C ============================== 855C ------------------ 856 CALL MPI_BCAST( id%KEEP(1), 110, MPI_INTEGER, MASTER, 857 & id%COMM, IERR ) 858C We also need to broadcast KEEP8(21) 859 CALL MUMPS_BCAST_I8( id%KEEP8(21), MASTER, 860 & id%MYID, id%COMM, IERR) 861C -------------------------------------------------- 862C Broadcast KEEP(205) which is outside the first 110 863C KEEP entries but is needed for factorization. 864C -------------------------------------------------- 865 CALL MPI_BCAST( id%KEEP(205), 1, MPI_INTEGER, MASTER, 866 & id%COMM, IERR ) 867C -------------- 868C Broadcast NBSA 869 CALL MPI_BCAST( id%NBSA, 1, MPI_INTEGER, MASTER, 870 & id%COMM, IERR ) 871C ----------------- 872C Global MAXFRT (computed in ZMUMPS_ANA_M) 873C is needed on all the procs during ZMUMPS_ANA_DISTM 874C to evaluate workspace for solve. 875C We could also recompute it in ZMUMPS_ANA_DISTM 876 IF (id%MYID==MASTER) KEEP(127)=INFOG(5) 877 CALL MPI_BCAST( id%KEEP(127), 1, MPI_INTEGER, MASTER, 878 & id%COMM, IERR ) 879C ----------------- 880C Global max panel size KEEP(226) 881 CALL MPI_BCAST( id%KEEP(226), 1, MPI_INTEGER, MASTER, 882 & id%COMM, IERR ) 883C ----------------- 884C Broadcast LR related keep informations KEEP(483-492) 885C if includes MPI_BCAST(idKEEP(486) 886 CALL MPI_BCAST( id%KEEP(483), 10, MPI_INTEGER, MASTER, 887 & id%COMM, IERR ) 888C Save setting (used later during factorization) 889C to enable BLR 890 KEEP(494) = KEEP(486) 891C Number of leaves not belonging to L0 KEEP(262) 892C and KEEP(263) : inner or outer sends for blocked facto 893 CALL MPI_BCAST( id%KEEP(262), 2, MPI_INTEGER, MASTER, 894 & id%COMM, IERR ) 895C 896C 897C ---------------------------------------- 898C Allocate new workspace on all processors 899C ---------------------------------------- 900 CALL MUMPS_REALLOC(id%STEP, id%N, id%INFO, LP, FORCE=.TRUE., 901 & STRING='id%STEP (Analysis)', ERRCODE=-7) 902 IF(INFO(1).LT.0) GOTO 94 903 CALL MUMPS_REALLOC(id%PROCNODE_STEPS, id%KEEP(28), id%INFO, LP, 904 & FORCE=.TRUE., 905 & STRING='id%PROCNODE_STEPS (Analysis)', ERRCODE=-7) 906 IF(INFO(1).LT.0) GOTO 94 907 CALL MUMPS_REALLOC(id%NE_STEPS, id%KEEP(28), id%INFO, LP, 908 & FORCE=.TRUE., 909 & STRING='id%NE_STEPS (Analysis)', ERRCODE=-7) 910 IF(INFO(1).LT.0) GOTO 94 911 CALL MUMPS_REALLOC(id%ND_STEPS, id%KEEP(28), id%INFO, LP, 912 & FORCE=.TRUE., 913 & STRING='id%ND_STEPS (Analysis)', ERRCODE=-7) 914 IF(INFO(1).LT.0) GOTO 94 915 CALL MUMPS_REALLOC(id%FRERE_STEPS, id%KEEP(28), id%INFO, LP, 916 & FORCE=.TRUE., 917 & STRING='id%FRERE_STEPS (Analysis)', ERRCODE=-7) 918 IF(INFO(1).LT.0) GOTO 94 919 CALL MUMPS_REALLOC(id%DAD_STEPS, id%KEEP(28), id%INFO, LP, 920 & FORCE=.TRUE., 921 & STRING='id%DAD_STEPS (Analysis)', ERRCODE=-7) 922 IF(INFO(1).LT.0) GOTO 94 923 CALL MUMPS_REALLOC(id%FILS, id%N, id%INFO, LP, FORCE=.TRUE., 924 & STRING='id%FILS (Analysis)', ERRCODE=-7) 925 IF(INFO(1).LT.0) GOTO 94 926 IF (KEEP(55) .EQ. 0) THEN 927 LPTRAR = id%N+id%N 928 CALL MUMPS_I8REALLOC(id%PTRAR, LPTRAR, id%INFO, LP, 929 & FORCE=.TRUE., STRING='id%PTRAR (Analysis)', ERRCODE=-7) 930 IF(INFO(1).LT.0) GOTO 94 931 ENDIF 932 CALL MUMPS_REALLOC(id%LRGROUPS, id%N, id%INFO, LP, 933 & FORCE=.TRUE. 934 & ,STRING='id%LRGROUPS (Analysis)', ERRCODE=-7) 935 IF(INFO(1).LT.0) GOTO 94 936C Copy data for factorization and/or solve. 937 IF ( associated( id%UNS_PERM ) ) deallocate(id%UNS_PERM) 938C ================================ 939C COMPUTE ON THE MASTER, BROADCAST 940C TO OTHER PROCESSES 941C ================================ 942 IF ( id%MYID == MASTER .AND. id%KEEP(23) .NE. 0 ) THEN 943C This one is only on the master 944 allocate(id%UNS_PERM(id%N),stat=allocok) 945 IF ( allocok .ne. 0) THEN 946 INFO(1) = -7 947 INFO(2) = id%N 948 IF ( LPOK ) THEN 949 WRITE(LP, 150) 'id%UNS_PERM' 950 END IF 951 GOTO 94 952 ENDIF 953C 954 DO I=1,id%N 955 id%UNS_PERM(I) = id%IS1(I) 956 END DO 957 ENDIF 958 94 CONTINUE 959 CALL MUMPS_PROPINFO( ICNTL(1), INFO(1), 960 & id%COMM, id%MYID ) 961 IF ( id%MYID .EQ. MASTER ) THEN 962 DO I=1,id%N 963 id%FILS(I) = id%IS1(FILS+I-1) 964 ENDDO 965 END IF 966 IF (id%MYID .EQ. MASTER ) THEN 967C NA -> compressed NA containing only list 968C of leaves of the elimination tree and list of roots 969C (the two useful informations for factorization/solve). 970 IF (id%N.eq.1) THEN 971 NBROOT = 1 972 NBLEAF = 1 973 ELSE IF (id%IS1(NA+id%N-1) .LT.0) THEN 974 NBLEAF = id%N 975 NBROOT = id%N 976 ELSE IF (id%IS1(NA+id%N-2) .LT.0) THEN 977 NBLEAF = id%N-1 978 NBROOT = id%IS1(NA+id%N-1) 979 ELSE 980 NBLEAF = id%IS1(NA+id%N-2) 981 NBROOT = id%IS1(NA+id%N-1) 982 ENDIF 983 id%LNA = 2+NBLEAF+NBROOT 984 ENDIF 985 CALL MPI_BCAST( id%LNA, 1, MPI_INTEGER, 986 & MASTER, id%COMM, IERR ) 987 CALL MUMPS_REALLOC(id%NA, id%LNA, id%INFO, LP, FORCE=.TRUE., 988 & STRING='id%NA (Analysis)', ERRCODE=-7) 989 IF(INFO(1).LT.0) GOTO 96 990 IF (id%MYID .EQ.MASTER ) THEN 991C The structure of NA is the following: 992C NA(1) is the number of leaves. 993C NA(2) is the number of roots. 994C NA(3:2+NA(1)) are the leaves. 995C NA(3+NA(1):2+NA(1)+NA(2)) are the roots. 996 id%NA(1) = NBLEAF 997 id%NA(2) = NBROOT 998C 999C Initialize NA with the leaves and roots 1000 LEAF = 3 1001 IF ( id%N == 1 ) THEN 1002 id%NA(LEAF) = 1 1003 LEAF = LEAF + 1 1004 ELSE IF (id%IS1(NA+id%N-1) < 0) THEN 1005 id%NA(LEAF) = - id%IS1(NA+id%N-1)-1 1006 LEAF = LEAF + 1 1007 DO I = 1, NBLEAF - 1 1008 id%NA(LEAF) = id%IS1(NA+I-1) 1009 LEAF = LEAF + 1 1010 ENDDO 1011 ELSE IF (id%IS1(NA+id%N-2) < 0 ) THEN 1012 INODE = - id%IS1(NA+id%N-2) - 1 1013 id%NA(LEAF) = INODE 1014 LEAF =LEAF + 1 1015 IF ( NBLEAF > 1 ) THEN 1016 DO I = 1, NBLEAF - 1 1017 id%NA(LEAF) = id%IS1(NA+I-1) 1018 LEAF = LEAF + 1 1019 ENDDO 1020 ENDIF 1021 ELSE 1022 DO I = 1, NBLEAF 1023 id%NA(LEAF) = id%IS1(NA+I-1) 1024 LEAF = LEAF + 1 1025 ENDDO 1026 END IF 1027 END IF 1028 96 CONTINUE 1029 CALL MUMPS_PROPINFO( ICNTL(1), INFO(1), 1030 & id%COMM, id%MYID ) 1031 IF ( INFO(1).LT.0 ) RETURN 1032 IF ( id%MYID .EQ. MASTER ) THEN 1033C Build array STEP(1:id%N) to hold step numbers in 1034C range 1..id%KEEP(28), allowing compression of 1035C other arrays from id%N to id%KEEP(28) 1036C (the number of nodes/steps in the assembly tree) 1037 ISTEP = 0 1038 DO I = 1, id%N 1039 IF ( id%IS1(FRERE+I-1) .ne. id%N + 1 ) THEN 1040C New node in the tree. 1041c (Set step( inode_n ) = inode_nsteps for principal 1042C variables and -inode_nsteps for internal variables 1043C of the node) 1044 ISTEP = ISTEP + 1 1045 id%STEP(I)=ISTEP 1046 INN = id%IS1(FILS+I-1) 1047 DO WHILE ( INN .GT. 0 ) 1048 id%STEP(INN) = - ISTEP 1049 INN = id%IS1(FILS + INN -1) 1050 END DO 1051 IF (id%IS1(FRERE+I-1) .eq. 0) THEN 1052C Keep root nodes list in NA 1053 id%NA(LEAF) = I 1054 LEAF = LEAF + 1 1055 ENDIF 1056 ENDIF 1057 END DO 1058 IF ( LEAF - 1 .NE. 2+NBROOT + NBLEAF ) THEN 1059 WRITE(*,*) 'Internal error 2 in ZMUMPS_ANA_DRIVER' 1060 CALL MUMPS_ABORT() 1061 ENDIF 1062 IF ( ISTEP .NE. id%KEEP(28) ) THEN 1063 write(*,*) 'Internal error 3 in ZMUMPS_ANA_DRIVER' 1064 CALL MUMPS_ABORT() 1065 ENDIF 1066C ============ 1067C SET PROCNODE, FRERE, NE 1068C ============ 1069 DO I = 1, id%N 1070 IF (id%IS1(FRERE+I-1) .NE. id%N+1) THEN 1071 id%PROCNODE_STEPS(id%STEP(I)) = id%PROCNODE( I ) 1072 id%FRERE_STEPS(id%STEP(I)) = id%IS1(FRERE+I-1) 1073 id%NE_STEPS(id%STEP(I)) = id%IS1(NE+I-1) 1074 id%ND_STEPS(id%STEP(I)) = id%IS1(NFSIZ+I-1) 1075 ENDIF 1076 ENDDO 1077C =============================== 1078C Algoritme to compute array DAD_STEPS: 1079C ---- 1080C For each node set dad for all of its sons 1081C plus, for root nodes set dad to zero. 1082C 1083C =============================== 1084 DO I = 1, id%N 1085C -- skip non principal nodes 1086 IF ( id%STEP(I) .LE. 0) CYCLE 1087C -- (I) is a principal node 1088 IF (id%IS1(FRERE+I-1) .eq. 0) THEN 1089C -- I is a root node and has no father 1090 id%DAD_STEPS(id%STEP(I)) = 0 1091 ENDIF 1092C -- Find first son node (IFS) 1093 IFS = id%IS1(FILS+I-1) 1094 DO WHILE ( IFS .GT. 0 ) 1095 IFS= id%IS1(FILS + IFS -1) 1096 END DO 1097C -- IFS > 0 if I is not a leave node 1098C -- Go through list of brothers of IFS if any 1099 IFS = -IFS 1100 DO WHILE (IFS.GT.0) 1101C -- I is not a leave node and has a son node IFS 1102 id%DAD_STEPS(id%STEP(IFS)) = I 1103 IFS = id%IS1(FRERE+IFS-1) 1104 ENDDO 1105 END DO 1106C 1107C 1108C Following arrays (PROCNODE and IS1) not used anymore 1109C during analysis 1110 DEALLOCATE(id%PROCNODE) 1111 NULLIFY(id%PROCNODE) 1112 DEALLOCATE(id%IS1) 1113 NULLIFY(id%IS1) 1114C Reorder the tree using a variant of Liu's algorithm. Note that 1115C REORDER_TREE MUST always be called since it sorts NA (the list of 1116C leaves) in a valid order in the sense of a depth-first traversal. 1117 CALL ZMUMPS_REORDER_TREE(id%N, id%FRERE_STEPS(1), 1118 & id%STEP(1),id%FILS(1), id%NA(1), id%LNA, 1119 & id%NE_STEPS(1), id%ND_STEPS(1), id%DAD_STEPS(1), 1120 & id%KEEP(28), .TRUE., id%KEEP(28), id%KEEP(70), 1121 & id%KEEP(50), id%INFO(1), id%ICNTL(1),id%KEEP(215), 1122 & id%KEEP(234), id%KEEP(55), 1123 & id%PROCNODE_STEPS(1),id%NSLAVES,PEAK,id%KEEP(90) 1124 & ) 1125 IF(id%KEEP(261).EQ.1)THEN 1126 CALL MUMPS_SORT_STEP(id%N, id%FRERE_STEPS(1), 1127 & id%STEP(1),id%FILS(1), id%NA(1), id%LNA, 1128 & id%NE_STEPS(1), id%ND_STEPS(1), id%DAD_STEPS(1), 1129 & id%KEEP(28), .TRUE., id%KEEP(28), id%INFO(1), 1130 & id%ICNTL(1),id%PROCNODE_STEPS(1),id%NSLAVES 1131 & ) 1132 ENDIF 1133C Compute and export some global information on the tree needed by 1134C dynamic schedulers during the factorization. The type of 1135C information depends on the selected strategy. 1136 IF ((id%KEEP(76).GE.4).OR.(id%KEEP(76).GE.6).OR. 1137 & (id%KEEP(47).EQ.4).OR.((id%KEEP(81).GT.0) 1138 & .AND.(id%KEEP(47).GE.2)))THEN 1139 IS_BUILD_LOAD_MEM_CALLED=.TRUE. 1140 IF ((id%KEEP(47) .EQ. 4).OR. 1141 & (( id%KEEP(81) .GT. 0).AND.(id%KEEP(47).GE.2))) THEN 1142 IF(id%NSLAVES.GT.1) THEN 1143C NBSA is the total number of subtrees and 1144C is an upperbound of the local number of 1145C subtrees 1146 SIZE_TEMP_MEM = id%NBSA 1147 ELSE 1148C Only one processor, NA(2) is the number of leaves 1149 SIZE_TEMP_MEM = id%NA(2) 1150 ENDIF 1151 ELSE 1152 SIZE_TEMP_MEM = 1 1153 ENDIF 1154 IF((id%KEEP(76).EQ.4).OR.(id%KEEP(76).EQ.6))THEN 1155 SIZE_DEPTH_FIRST=id%KEEP(28) 1156 ELSE 1157 SIZE_DEPTH_FIRST=1 1158 ENDIF 1159 allocate(TEMP_MEM(SIZE_TEMP_MEM,id%NSLAVES),STAT=allocok) 1160 IF (allocok .NE.0) THEN 1161 INFO(1)= -7 1162 INFO(2)= SIZE_TEMP_MEM*id%NSLAVES 1163 IF ( LPOK ) THEN 1164 WRITE(LP, 150) 'TEMP_MEM' 1165 END IF 1166 GOTO 80 1167 END IF 1168 allocate(TEMP_LEAF(SIZE_TEMP_MEM,id%NSLAVES), 1169 & stat=allocok) 1170 IF (allocok .ne.0) then 1171 IF ( LPOK ) THEN 1172 WRITE(LP, 150) 'TEMP_LEAF' 1173 END IF 1174 INFO(1)= -7 1175 INFO(2)= SIZE_TEMP_MEM*id%NSLAVES 1176 GOTO 80 1177 end if 1178 allocate(TEMP_SIZE(SIZE_TEMP_MEM,id%NSLAVES), 1179 & stat=allocok) 1180 IF (allocok .ne.0) then 1181 IF ( LPOK ) THEN 1182 WRITE(LP, 150) 'TEMP_SIZE' 1183 END IF 1184 INFO(1)= -7 1185 INFO(2)= SIZE_TEMP_MEM*id%NSLAVES 1186 GOTO 80 1187 end if 1188 allocate(TEMP_ROOT(SIZE_TEMP_MEM,id%NSLAVES), 1189 & stat=allocok) 1190 IF (allocok .ne.0) then 1191 IF ( LPOK ) THEN 1192 WRITE(LP, 150) 'TEMP_ROOT' 1193 END IF 1194 INFO(1)= -7 1195 INFO(2)= SIZE_TEMP_MEM*id%NSLAVES 1196 GOTO 80 1197 end if 1198 allocate(DEPTH_FIRST(SIZE_DEPTH_FIRST),stat=allocok) 1199 IF (allocok .ne.0) then 1200 IF ( LPOK ) THEN 1201 WRITE(LP, 150) 'DEPTH_FIRST' 1202 END IF 1203 INFO(1)= -7 1204 INFO(2)= SIZE_DEPTH_FIRST 1205 GOTO 80 1206 end if 1207 ALLOCATE(DEPTH_FIRST_SEQ(SIZE_DEPTH_FIRST),stat=allocok) 1208 IF (allocok .ne.0) then 1209 IF ( LPOK ) THEN 1210 WRITE(LP, 150) 'DEPTH_FIRST_SEQ' 1211 END IF 1212 INFO(1)= -7 1213 INFO(2)= SIZE_DEPTH_FIRST 1214 GOTO 80 1215 end if 1216 ALLOCATE(SBTR_ID(SIZE_DEPTH_FIRST),stat=allocok) 1217 IF (allocok .ne.0) then 1218 IF ( LPOK ) THEN 1219 WRITE(LP, 150) 'SBTR_ID' 1220 END IF 1221 INFO(1)= -7 1222 INFO(2)= SIZE_DEPTH_FIRST 1223 GOTO 80 1224 end if 1225 IF(id%KEEP(76).EQ.5)THEN 1226C We reuse the same variable as before 1227 SIZE_COST_TRAV=id%KEEP(28) 1228 ELSE 1229 SIZE_COST_TRAV=1 1230 ENDIF 1231 allocate(COST_TRAV_TMP(SIZE_COST_TRAV),stat=allocok) 1232 IF (allocok .ne.0) then 1233 IF ( LPOK ) THEN 1234 WRITE(LP, 150) 'COST_TRAV_TMP' 1235 END IF 1236 INFO(1)= -7 1237 INFO(2)= SIZE_COST_TRAV 1238 GOTO 80 1239 END IF 1240 IF(id%KEEP(76).EQ.5)THEN 1241 IF(id%KEEP(70).EQ.0)THEN 1242 id%KEEP(70)=5 1243 ENDIF 1244 IF(id%KEEP(70).EQ.1)THEN 1245 id%KEEP(70)=6 1246 ENDIF 1247 ENDIF 1248 IF(id%KEEP(76).EQ.4)THEN 1249 IF(id%KEEP(70).EQ.0)THEN 1250 id%KEEP(70)=3 1251 ENDIF 1252 IF(id%KEEP(70).EQ.1)THEN 1253 id%KEEP(70)=4 1254 ENDIF 1255 ENDIF 1256 CALL ZMUMPS_BUILD_LOAD_MEM_INFO(id%N, id%FRERE_STEPS(1), 1257 & id%STEP(1),id%FILS(1), id%NA(1), id%LNA, 1258 & id%NE_STEPS(1), id%ND_STEPS(1), id%DAD_STEPS(1), 1259 & id%KEEP(28), .TRUE., id%KEEP(28), id%KEEP(70), 1260 & id%KEEP(50), id%INFO(1), id%ICNTL(1),id%KEEP(47), 1261 & id%KEEP(81),id%KEEP(76),id%KEEP(215), 1262 & id%KEEP(234), id%KEEP(55), 1263 & id%PROCNODE_STEPS(1),TEMP_MEM,id%NSLAVES, 1264 & SIZE_TEMP_MEM, PEAK,id%KEEP(90),SIZE_DEPTH_FIRST, 1265 & SIZE_COST_TRAV,DEPTH_FIRST(1),DEPTH_FIRST_SEQ(1), 1266 & COST_TRAV_TMP(1), 1267 & TEMP_LEAF,TEMP_SIZE,TEMP_ROOT,SBTR_ID(1) 1268 & ) 1269 END IF 1270C Compute a grouping of variables for LR approximations. 1271C id%SYM_PERM is used as a work array 1272 IF(KEEP(486) .EQ. 1) THEN 1273 IF ( (KEEP(54).eq.3) .AND. (KEEP(244).eq.2) ) THEN 1274C If the input matrix is distributed and the parallel analysis is 1275C chosen, the graph has to be centralized in order to compute the 1276C clustering. 1277 CALL ZMUMPS_GATHER_MATRIX(id) 1278 END IF 1279 IF (KEEP(469).EQ.0) THEN 1280 CALL ZMUMPS_LR_GROUPING(id%N, id%KEEP8(28), id%KEEP(28), 1281 & id%IRN(1), 1282 & id%JCN(1), id%FILS(1), id%FRERE_STEPS(1), 1283 & id%DAD_STEPS(1), id%NE_STEPS(1), id%STEP(1), id%NA(1), 1284 & id%LNA, id%LRGROUPS(1), 1285 & id%KEEP(50), 1286 & id%ICNTL(1), id%KEEP(487), id%KEEP(488), id%KEEP(489), 1287 & id%KEEP(490), id%KEEP(38), id%KEEP(20), id%KEEP(60), 1288 & id%INFO(1), id%INFO(2), 1289 & id%KEEP(264), id%KEEP(265), id%KEEP(482), id%KEEP(472), 1290 & id%KEEP(127), id%KEEP(10), LPOK, LP) 1291 ELSE 1292 CALL ZMUMPS_LR_GROUPING_NEW(id%N, id%KEEP8(28), 1293 & id%KEEP(28), id%IRN(1), 1294 & id%JCN(1), id%FILS(1), id%FRERE_STEPS(1), 1295 & id%DAD_STEPS(1), id%NE_STEPS(1), id%STEP(1), id%NA(1), 1296 & id%LNA, id%LRGROUPS(1), id%KEEP(50), 1297 & id%ICNTL(1), id%KEEP(487), id%KEEP(488), id%KEEP(489), 1298 & id%KEEP(490), id%KEEP(38), id%KEEP(20), id%KEEP(60), 1299 & id%INFO(1), id%INFO(2), 1300 & id%KEEP(264), id%KEEP(265), id%KEEP(482), id%KEEP(472), 1301 & id%KEEP(127), id%KEEP(469), id%KEEP(10), LPOK, LP) 1302 ENDIF 1303 IF ( (KEEP(54).eq.3) .AND. (KEEP(244).eq.2) ) THEN 1304C Cleanup the irn and jcn arrays filled up by the 1305C cmumps_gather_matrix above 1306 deallocate(id%IRN, id%JCN) 1307 NULLIFY(id%IRN) 1308 NULLIFY(id%JCN) 1309 END IF 1310 END IF 1311 CALL MUMPS_REALLOC(id%SYM_PERM, id%N, id%INFO, LP, 1312 & FORCE=.TRUE., 1313 & STRING='id%SYM_PERM (Analysis)', ERRCODE=-7) 1314 IF(INFO(1).LT.0) GOTO 80 1315 CALL ZMUMPS_SORT_PERM(id%N, id%NA(1), id%LNA, 1316 & id%NE_STEPS(1), id%SYM_PERM(1), 1317 & id%FILS(1), id%DAD_STEPS(1), 1318 & id%STEP(1), id%KEEP(28), id%INFO(1) ) 1319 ELSE ! matches the IF (id%MYID .EQ. MASTER) THEN ... above 1320 CALL MUMPS_REALLOC(id%SYM_PERM, id%N, id%INFO, LP, 1321 & FORCE=.TRUE., 1322 & STRING='id%SYM_PERM (Analysis)', ERRCODE=-7) 1323 IF(INFO(1).LT.0) GOTO 80 1324 IF ( (KEEP(54).EQ.3) .AND. (KEEP(244).EQ.2) 1325 & .AND. (abs(KEEP(486)).EQ.1)) THEN 1326C If the input matrix is distributed and the parallel analysis is 1327C chosen, the graph has to be centralized in order to compute the 1328C clustering. 1329 CALL ZMUMPS_GATHER_MATRIX(id) 1330 END IF 1331 ENDIF 1332C Root principal variable 1333C for scalapack (KEEP(38)) might have been updated 1334C since root variables might have been permuted. 1335C It should thus be redistributed to all procs 1336 IF((abs(KEEP(486)) .EQ. 1).AND.(id%KEEP(38).GT.0)) 1337 & THEN ! grouping at analysis (1 => LR 1338 CALL MPI_BCAST( id%KEEP(38), 1, MPI_INTEGER, MASTER, 1339 & id%COMM, IERR ) 1340 ENDIF 1341 80 CONTINUE 1342C Broadcast errors 1343 CALL MUMPS_PROPINFO( ICNTL(1), INFO(1), 1344 & id%COMM, id%MYID ) 1345 IF ( INFO(1).LT.0 ) RETURN 1346C --------------------------------------------------- 1347C Broadcast information computed on the master to 1348C the slaves. 1349C The matrix itself with numerical values and 1350C integer data for the arrowhead/element description 1351C will be received at the beginning of FACTO. 1352C --------------------------------------------------- 1353 CALL MPI_BCAST( id%FILS(1), id%N, MPI_INTEGER, 1354 & MASTER, id%COMM, IERR ) 1355 CALL MPI_BCAST( id%NA(1), id%LNA, MPI_INTEGER, 1356 & MASTER, id%COMM, IERR ) 1357 CALL MPI_BCAST( id%STEP(1), id%N, MPI_INTEGER, 1358 & MASTER, id%COMM, IERR ) 1359 CALL MPI_BCAST( id%PROCNODE_STEPS(1), id%KEEP(28), MPI_INTEGER, 1360 & MASTER, id%COMM, IERR ) 1361 CALL MPI_BCAST( id%DAD_STEPS(1), id%KEEP(28), MPI_INTEGER, 1362 & MASTER, id%COMM, IERR ) 1363 CALL MPI_BCAST( id%FRERE_STEPS(1), id%KEEP(28), MPI_INTEGER, 1364 & MASTER, id%COMM, IERR) 1365 CALL MPI_BCAST( id%NE_STEPS(1), id%KEEP(28), MPI_INTEGER, 1366 & MASTER, id%COMM, IERR ) 1367 CALL MPI_BCAST( id%ND_STEPS(1), id%KEEP(28), MPI_INTEGER, 1368 & MASTER, id%COMM, IERR ) 1369 CALL MPI_BCAST( id%SYM_PERM(1), id%N, MPI_INTEGER, 1370 & MASTER, id%COMM, IERR ) 1371 IF(KEEP(486).EQ.1) THEN 1372 CALL MPI_BCAST( id%LRGROUPS(1), id%N, MPI_INTEGER, 1373 & MASTER, id%COMM, IERR ) 1374 END IF 1375 IF (KEEP(55) .EQ. 0) THEN 1376C Assembled matrix format. Fill up the id%PTRAR array 1377C Broadcast id%SYM_PERM needed to fill up id%PTRAR 1378C At the end of ANA_N_PAR, id%PTRAR is already on every processor 1379C because it is computed in a distributed way. 1380C No need to broadcast it again 1381 CALL ZMUMPS_ANA_N_PAR(id, id%PTRAR(1)) 1382 IF(id%MYID .EQ. MASTER) THEN 1383C ----------------------------------- 1384C For distributed structure on entry, 1385C we can now deallocate the complete 1386C structure IRN / JCN. 1387C ----------------------------------- 1388 IF ( (KEEP(244) .EQ. 1) .AND. (KEEP(54) .EQ. 3) ) THEN 1389 DEALLOCATE( id%IRN ) 1390 DEALLOCATE( id%JCN ) 1391 END IF 1392 END IF 1393 ENDIF 1394C 1395C Store size of the stack memory for each 1396C of the sequential subtree. 1397 IF((id%KEEP(76).EQ.4).OR.(id%KEEP(76).EQ.6))THEN 1398 IF(associated(id%DEPTH_FIRST)) 1399 & deallocate(id%DEPTH_FIRST) 1400 allocate(id%DEPTH_FIRST(id%KEEP(28)),stat=allocok) 1401 IF (allocok .ne.0) then 1402 INFO(1)= -7 1403 INFO(2)= id%KEEP(28) 1404 IF ( LPOK ) THEN 1405 WRITE(LP, 150) 'id%DEPTH_FIRST' 1406 END IF 1407 GOTO 87 1408 END IF 1409 IF(associated(id%DEPTH_FIRST_SEQ)) 1410 * DEALLOCATE(id%DEPTH_FIRST_SEQ) 1411 ALLOCATE(id%DEPTH_FIRST_SEQ(id%KEEP(28)),stat=allocok) 1412 IF (allocok .ne.0) then 1413 INFO(1)= -7 1414 INFO(2)= id%KEEP(28) 1415 IF ( LPOK ) THEN 1416 WRITE(LP, 150) 'id%DEPTH_FIRST_SEQ' 1417 END IF 1418 GOTO 87 1419 END IF 1420 IF(associated(id%SBTR_ID)) 1421 * DEALLOCATE(id%SBTR_ID) 1422 ALLOCATE(id%SBTR_ID(id%KEEP(28)),stat=allocok) 1423 IF (allocok .ne.0) then 1424 INFO(1)= -7 1425 INFO(2)= id%KEEP(28) 1426 IF ( LPOK ) THEN 1427 WRITE(LP, 150) 'id%DEPTH_FIRST_SEQ' 1428 END IF 1429 GOTO 87 1430 END IF 1431 IF(id%MYID.EQ.MASTER)THEN 1432 id%DEPTH_FIRST(1:id%KEEP(28))=DEPTH_FIRST(1:id%KEEP(28)) 1433 id%DEPTH_FIRST_SEQ(1:id%KEEP(28))= 1434 & DEPTH_FIRST_SEQ(1:id%KEEP(28)) 1435 id%SBTR_ID(1:KEEP(28))=SBTR_ID(1:KEEP(28)) 1436 ENDIF 1437 CALL MPI_BCAST( id%DEPTH_FIRST(1), id%KEEP(28), MPI_INTEGER, 1438 & MASTER, id%COMM, IERR ) 1439 CALL MPI_BCAST( id%DEPTH_FIRST_SEQ(1), id%KEEP(28), 1440 & MPI_INTEGER,MASTER, id%COMM, IERR ) 1441 CALL MPI_BCAST( id%SBTR_ID(1), id%KEEP(28), 1442 & MPI_INTEGER,MASTER, id%COMM, IERR ) 1443 ELSE 1444 IF(associated(id%DEPTH_FIRST)) 1445 & deallocate(id%DEPTH_FIRST) 1446 allocate(id%DEPTH_FIRST(1),stat=allocok) 1447 IF (allocok .ne.0) then 1448 INFO(1)= -7 1449 INFO(2)= 1 1450 IF ( LPOK ) THEN 1451 WRITE(LP, 150) 'id%DEPTH_FIRST' 1452 END IF 1453 GOTO 87 1454 END IF 1455 IF(associated(id%DEPTH_FIRST_SEQ)) 1456 * DEALLOCATE(id%DEPTH_FIRST_SEQ) 1457 ALLOCATE(id%DEPTH_FIRST_SEQ(1),stat=allocok) 1458 IF (allocok .ne.0) then 1459 INFO(1)= -7 1460 INFO(2)= 1 1461 IF ( LPOK ) THEN 1462 WRITE(LP, 150) 'id%DEPTH_FIRST_SEQ' 1463 END IF 1464 GOTO 87 1465 END IF 1466 IF(associated(id%SBTR_ID)) 1467 * DEALLOCATE(id%SBTR_ID) 1468 ALLOCATE(id%SBTR_ID(1),stat=allocok) 1469 IF (allocok .ne.0) then 1470 INFO(1)= -7 1471 INFO(2)= 1 1472 IF ( LPOK ) THEN 1473 WRITE(LP, 150) 'id%DEPTH_FIRST_SEQ' 1474 END IF 1475 GOTO 87 1476 END IF 1477 id%SBTR_ID(1)=0 1478 id%DEPTH_FIRST(1)=0 1479 id%DEPTH_FIRST_SEQ(1)=0 1480 ENDIF 1481 IF(id%KEEP(76).EQ.5)THEN 1482 IF(associated(id%COST_TRAV)) 1483 & deallocate(id%COST_TRAV) 1484 allocate(id%COST_TRAV(id%KEEP(28)),stat=allocok) 1485 IF (allocok .ne.0) then 1486 IF ( LPOK ) THEN 1487 WRITE(LP, 150) 'id%COST_TRAV' 1488 END IF 1489 INFO(1)= -7 1490 INFO(2)= id%KEEP(28) 1491 GOTO 87 1492 END IF 1493 IF(id%MYID.EQ.MASTER)THEN 1494 id%COST_TRAV(1:id%KEEP(28))= 1495 & dble(COST_TRAV_TMP(1:id%KEEP(28))) 1496 ENDIF 1497 CALL MPI_BCAST( id%COST_TRAV(1), id%KEEP(28), 1498 & MPI_DOUBLE_PRECISION,MASTER, id%COMM, IERR ) 1499 ELSE 1500 IF(associated(id%COST_TRAV)) 1501 & deallocate(id%COST_TRAV) 1502 allocate(id%COST_TRAV(1),stat=allocok) 1503 IF (allocok .ne.0) then 1504 IF ( LPOK ) THEN 1505 WRITE(LP, 150) 'id%COST_TRAV(1)' 1506 END IF 1507 INFO(1)= -7 1508 INFO(2)= 1 1509 GOTO 87 1510 END IF 1511 id%COST_TRAV(1)=0.0d0 1512 ENDIF 1513 IF (id%KEEP(47) .EQ. 4 .OR. 1514 & ((id%KEEP(81) .GT. 0).AND.(id%KEEP(47).GE.2))) THEN 1515 IF(id%MYID .EQ. MASTER)THEN 1516 DO K=1,id%NSLAVES 1517 DO J=1,SIZE_TEMP_MEM 1518 IF(TEMP_MEM(J,K) < 0.0D0) GOTO 666 1519 ENDDO 1520 666 CONTINUE 1521 J=J-1 1522 IF (id%KEEP(46) == 1) THEN 1523 IDEST = K - 1 1524 ELSE 1525 IDEST = K 1526 ENDIF 1527 IF (IDEST .NE. MASTER) THEN 1528 CALL MPI_SEND(J,1,MPI_INTEGER,IDEST,0, 1529 & id%COMM,IERR) 1530 CALL MPI_SEND(TEMP_MEM(1,K),J,MPI_DOUBLE_PRECISION, 1531 & IDEST, 0, id%COMM,IERR) 1532 CALL MPI_SEND(TEMP_LEAF(1,K),J,MPI_INTEGER, 1533 & IDEST, 0, id%COMM,IERR) 1534 CALL MPI_SEND(TEMP_SIZE(1,K),J,MPI_INTEGER, 1535 & IDEST, 0, id%COMM,IERR) 1536 CALL MPI_SEND(TEMP_ROOT(1,K),J,MPI_INTEGER, 1537 & IDEST, 0, id%COMM,IERR) 1538 ELSE 1539 IF(associated(id%MEM_SUBTREE)) 1540 & deallocate(id%MEM_SUBTREE) 1541 allocate(id%MEM_SUBTREE(J),stat=allocok) 1542 IF (allocok .ne.0) then 1543 IF ( LPOK ) THEN 1544 WRITE(LP, 150) 'id%MEM_SUBTREE' 1545 END IF 1546 INFO(1)= -7 1547 INFO(2)= J 1548 GOTO 87 1549 END IF 1550 id%NBSA_LOCAL = J 1551 id%MEM_SUBTREE(1:J)=TEMP_MEM(1:J,1) 1552 IF(associated(id%MY_ROOT_SBTR)) 1553 & deallocate(id%MY_ROOT_SBTR) 1554 allocate(id%MY_ROOT_SBTR(J),stat=allocok) 1555 IF (allocok .ne.0) then 1556 IF ( LPOK ) THEN 1557 WRITE(LP, 150) 'id%MY_ROOT_SBTR' 1558 END IF 1559 INFO(1)= -7 1560 INFO(2)= J 1561 GOTO 87 1562 END IF 1563 id%MY_ROOT_SBTR(1:J)=TEMP_ROOT(1:J,1) 1564 IF(associated(id%MY_FIRST_LEAF)) 1565 & deallocate(id%MY_FIRST_LEAF) 1566 allocate(id%MY_FIRST_LEAF(J),stat=allocok) 1567 IF (allocok .ne.0) then 1568 IF ( LPOK ) THEN 1569 WRITE(LP, 150) 'id%MY_FIRST_LEAF' 1570 END IF 1571 INFO(1)= -7 1572 INFO(2)= J 1573 GOTO 87 1574 END IF 1575 id%MY_FIRST_LEAF(1:J)=TEMP_LEAF(1:J,1) 1576 IF(associated(id%MY_NB_LEAF)) 1577 & deallocate(id%MY_NB_LEAF) 1578 allocate(id%MY_NB_LEAF(J),stat=allocok) 1579 IF (allocok .ne.0) then 1580 IF ( LPOK ) THEN 1581 WRITE(LP, 150) 'id%MY_NB_LEAF' 1582 END IF 1583 INFO(1)= -7 1584 INFO(2)= J 1585 GOTO 87 1586 END IF 1587 id%MY_NB_LEAF(1:J)=TEMP_SIZE(1:J,1) 1588 ENDIF 1589 ENDDO 1590 ELSE 1591 CALL MPI_RECV(id%NBSA_LOCAL,1,MPI_INTEGER, 1592 & MASTER,0,id%COMM,STATUS, IERR) 1593 IF(associated(id%MEM_SUBTREE)) 1594 & deallocate(id%MEM_SUBTREE) 1595 allocate(id%MEM_SUBTREE(id%NBSA_LOCAL),stat=allocok) 1596 IF (allocok .ne.0) then 1597 IF ( LPOK ) THEN 1598 WRITE(LP, 150) 'id%MEM_SUBTREE' 1599 END IF 1600 INFO(1)= -7 1601 INFO(2)= id%NBSA_LOCAL 1602 GOTO 87 1603 END IF 1604 IF(associated(id%MY_ROOT_SBTR)) 1605 & deallocate(id%MY_ROOT_SBTR) 1606 allocate(id%MY_ROOT_SBTR(id%NBSA_LOCAL),stat=allocok) 1607 IF (allocok .ne.0) then 1608 IF ( LPOK ) THEN 1609 WRITE(LP, 150) 'id%MY_ROOT_SBTR' 1610 END IF 1611 INFO(1)= -7 1612 INFO(2)= id%NBSA_LOCAL 1613 GOTO 87 1614 END IF 1615 IF(associated(id%MY_FIRST_LEAF)) 1616 & deallocate(id%MY_FIRST_LEAF) 1617 allocate(id%MY_FIRST_LEAF(id%NBSA_LOCAL),stat=allocok) 1618 IF (allocok .ne.0) then 1619 IF ( LPOK ) THEN 1620 WRITE(LP, 150) 'MY_FIRST_LEAF' 1621 END IF 1622 INFO(1)= -7 1623 INFO(2)= id%NBSA_LOCAL 1624 GOTO 87 1625 END IF 1626 IF(associated(id%MY_NB_LEAF)) 1627 & deallocate(id%MY_NB_LEAF) 1628 allocate(id%MY_NB_LEAF(id%NBSA_LOCAL),stat=allocok) 1629 IF (allocok .ne.0) then 1630 IF ( LPOK ) THEN 1631 WRITE(LP, 150) 'MY_NB_LEAF' 1632 END IF 1633 INFO(1)= -7 1634 INFO(2)= id%NBSA_LOCAL 1635 GOTO 87 1636 END IF 1637 CALL MPI_RECV(id%MEM_SUBTREE(1),id%NBSA_LOCAL, 1638 & MPI_DOUBLE_PRECISION,MASTER,0, 1639 & id%COMM,STATUS,IERR) 1640 CALL MPI_RECV(id%MY_FIRST_LEAF(1),id%NBSA_LOCAL, 1641 & MPI_INTEGER,MASTER,0, 1642 & id%COMM,STATUS,IERR) 1643 CALL MPI_RECV(id%MY_NB_LEAF(1),id%NBSA_LOCAL, 1644 & MPI_INTEGER,MASTER,0, 1645 & id%COMM,STATUS,IERR) 1646 CALL MPI_RECV(id%MY_ROOT_SBTR(1),id%NBSA_LOCAL, 1647 & MPI_INTEGER,MASTER,0, 1648 & id%COMM,STATUS,IERR) 1649 ENDIF 1650 ELSE 1651 id%NBSA_LOCAL = -999999 1652 IF(associated(id%MEM_SUBTREE)) 1653 & deallocate(id%MEM_SUBTREE) 1654 allocate(id%MEM_SUBTREE(1),stat=allocok) 1655 IF (allocok .ne.0) then 1656 IF ( LPOK ) THEN 1657 WRITE(LP, 150) 'id%MEM_SUBTREE(1)' 1658 END IF 1659 INFO(1)= -7 1660 INFO(2)= 1 1661 GOTO 87 1662 END IF 1663 IF(associated(id%MY_ROOT_SBTR)) 1664 & deallocate(id%MY_ROOT_SBTR) 1665 allocate(id%MY_ROOT_SBTR(1),stat=allocok) 1666 IF (allocok .ne.0) then 1667 IF ( LPOK ) THEN 1668 WRITE(LP, 150) 'id%MY_ROOT_SBTR(1)' 1669 END IF 1670 INFO(1)= -7 1671 INFO(2)= 1 1672 GOTO 87 1673 END IF 1674 IF(associated(id%MY_FIRST_LEAF)) 1675 & deallocate(id%MY_FIRST_LEAF) 1676 allocate(id%MY_FIRST_LEAF(1),stat=allocok) 1677 IF (allocok .ne.0) then 1678 IF ( LPOK ) THEN 1679 WRITE(LP, 150) 'id%MY_FIRST_LEAF(1)' 1680 END IF 1681 INFO(1)= -7 1682 INFO(2)= 1 1683 GOTO 87 1684 END IF 1685 IF(associated(id%MY_NB_LEAF)) 1686 & deallocate(id%MY_NB_LEAF) 1687 allocate(id%MY_NB_LEAF(1),stat=allocok) 1688 IF (allocok .ne.0) then 1689 IF ( LPOK ) THEN 1690 WRITE(LP, 150) 'id%MY_NB_LEAF(1)' 1691 END IF 1692 INFO(1)= -7 1693 INFO(2)= 1 1694 GOTO 87 1695 END IF 1696 ENDIF 1697 IF(id%MYID.EQ.MASTER)THEN 1698 IF(IS_BUILD_LOAD_MEM_CALLED)THEN 1699 deallocate(TEMP_MEM) 1700 deallocate(TEMP_SIZE) 1701 deallocate(TEMP_ROOT) 1702 deallocate(TEMP_LEAF) 1703 deallocate(COST_TRAV_TMP) 1704 deallocate(DEPTH_FIRST) 1705 deallocate(DEPTH_FIRST_SEQ) 1706 deallocate(SBTR_ID) 1707 ENDIF 1708 ENDIF 1709 87 CONTINUE 1710 CALL MUMPS_PROPINFO( ICNTL(1), INFO(1), 1711 & id%COMM, id%MYID ) 1712 IF ( INFO(1).LT.0 ) RETURN 1713C 1714 NB_NIV2 = KEEP(56) ! KEEP(1:110) was broadcast earlier 1715C NB_NIV2 is now available on all processors. 1716 IF ( NB_NIV2.GT.0 ) THEN 1717C Allocate arrays on slaves 1718 if (id%MYID.ne.MASTER) then 1719 IF (associated(id%CANDIDATES)) deallocate(id%CANDIDATES) 1720 allocate(PAR2_NODES(NB_NIV2), 1721 & id%CANDIDATES(id%NSLAVES+1,NB_NIV2), 1722 & STAT=allocok) 1723 IF (allocok .ne.0) then 1724 INFO(1)= -7 1725 INFO(2)= NB_NIV2*(id%NSLAVES+1) 1726 IF ( LPOK ) THEN 1727 WRITE(LP, 150) 'PAR2_NODES/id%CANDIDATES' 1728 END IF 1729 end if 1730 end if 1731 CALL MUMPS_PROPINFO( ICNTL(1), INFO(1), 1732 & id%COMM, id%MYID ) 1733 IF ( INFO(1).LT.0 ) RETURN 1734 CALL MPI_BCAST(PAR2_NODES(1),NB_NIV2, 1735 & MPI_INTEGER, MASTER, id%COMM, IERR ) 1736 IF (KEEP(24) .NE.0 ) THEN 1737 CALL MPI_BCAST(id%CANDIDATES(1,1), 1738 & (NB_NIV2*(id%NSLAVES+1)), 1739 & MPI_INTEGER, MASTER, id%COMM, IERR ) 1740 ENDIF 1741 ENDIF 1742 IF ( associated(id%ISTEP_TO_INIV2)) THEN 1743 deallocate(id%ISTEP_TO_INIV2) 1744 NULLIFY(id%ISTEP_TO_INIV2) 1745 ENDIF 1746 IF ( associated(id%I_AM_CAND)) THEN 1747 deallocate(id%I_AM_CAND) 1748 NULLIFY(id%I_AM_CAND) 1749 ENDIF 1750 IF (NB_NIV2.EQ.0) THEN 1751C allocate dummy arrays 1752C ISTEP_TO_INIV2 will never be used 1753C Add a parameter SIZE_ISTEP_TO_INIV2 and make 1754C it always available in a keep(71) 1755 id%KEEP(71) = 1 1756 ELSE 1757 id%KEEP(71) = id%KEEP(28) 1758 ENDIF 1759 allocate(id%ISTEP_TO_INIV2(id%KEEP(71)), 1760 & id%I_AM_CAND(max(NB_NIV2,1)), 1761 & stat=allocok) 1762 IF (allocok .gt.0) THEN 1763 IF ( LPOK ) THEN 1764 WRITE(LP, 150) 'id%ISTEP_TO_INIV2' 1765 WRITE(LP, 150) 'id%TAB_POS_IN_PERE' 1766 END IF 1767 INFO(1)= -7 1768 IF (NB_NIV2.EQ.0) THEN 1769 INFO(2)= 2 1770 ELSE 1771 INFO(2)= id%KEEP(28)+NB_NIV2*(id%NSLAVES+2) 1772 END IF 1773 GOTO 321 1774 ENDIF 1775 IF ( NB_NIV2 .GT.0 ) THEN 1776C If BLR grouping was performed then PAR2_NODES(INIV2) 1777C might then point to a non principal variable 1778C for which STEP might be negative 1779C 1780 DO INIV2 = 1, NB_NIV2 1781 INN = PAR2_NODES(INIV2) 1782 id%ISTEP_TO_INIV2(abs(id%STEP(INN))) = INIV2 1783 END DO 1784 CALL ZMUMPS_BUILD_I_AM_CAND( id%NSLAVES, KEEP(79), 1785 & NB_NIV2, id%MYID_NODES, 1786 & id%CANDIDATES(1,1), id%I_AM_CAND(1) ) 1787 ENDIF 1788 IF ( I_AM_SLAVE ) THEN 1789#if ! defined(OLD_LOAD_MECHANISM) 1790 IF (associated(id%FUTURE_NIV2)) THEN 1791 deallocate(id%FUTURE_NIV2) 1792 NULLIFY(id%FUTURE_NIV2) 1793 ENDIF 1794 allocate(id%FUTURE_NIV2(id%NSLAVES), stat=allocok) 1795 IF (allocok .gt.0) THEN 1796 IF ( LPOK ) THEN 1797 WRITE(LP, 150) 'FUTURE_NIV2' 1798 END IF 1799 INFO(1)= -7 1800 INFO(2)= id%NSLAVES 1801 GOTO 321 1802 ENDIF 1803 id%FUTURE_NIV2=0 1804 DO INIV2 = 1, NB_NIV2 1805 IDEST = MUMPS_PROCNODE( 1806 & id%PROCNODE_STEPS(abs(id%STEP(PAR2_NODES(INIV2)))), 1807 & id%NSLAVES) 1808 id%FUTURE_NIV2(IDEST+1)=id%FUTURE_NIV2(IDEST+1)+1 1809 ENDDO 1810#endif 1811C Allocate id%TAB_POS_IN_PERE, 1812C TAB_POS_IN_PERE is an array of size (id%NSLAVES+2,NB_NIV2) 1813C where NB_NIV2 is the number of type 2 nodes in the tree. 1814 IF ( associated(id%TAB_POS_IN_PERE)) THEN 1815 deallocate(id%TAB_POS_IN_PERE) 1816 NULLIFY(id%TAB_POS_IN_PERE) 1817 ENDIF 1818 allocate(id%TAB_POS_IN_PERE(id%NSLAVES+2,max(NB_NIV2,1)), 1819 & stat=allocok) 1820 IF (allocok .gt.0) THEN 1821 IF ( LPOK ) THEN 1822 WRITE(LP, 150) 'id%ISTEP_TO_INIV2' 1823 WRITE(LP, 150) 'id%TAB_POS_IN_PERE' 1824 END IF 1825 INFO(1)= -7 1826 IF (NB_NIV2.EQ.0) THEN 1827 INFO(2)= 2 1828 ELSE 1829 INFO(2)= id%KEEP(28)+NB_NIV2*(id%NSLAVES+2) 1830 END IF 1831 GOTO 321 1832 ENDIF 1833 END IF 1834C deallocate PAR2_NODES that was computed 1835C on master and broadcasted on all slaves 1836 IF (NB_NIV2.GT.0) deallocate (PAR2_NODES) 1837 321 CONTINUE 1838C ---------------- 1839C Check for errors 1840C ---------------- 1841 CALL MUMPS_PROPINFO( ICNTL(1), INFO(1), 1842 & id%COMM, id%MYID ) 1843 IF ( INFO(1).LT.0 ) RETURN 1844C ------------------------------ 1845C Perform again the subdivision of array 1846C IS1, both on the master and on 1847C the slaves. This is done so to 1848C ease the passage to the model 1849C where master will work. 1850C ------------------------------ 1851C 1852 IF ( KEEP(23).NE.0 .and. id%MYID .EQ. MASTER ) THEN 1853 IKEEP = id%N + 1 1854 ELSE 1855 IKEEP = 1 1856 END IF 1857 FILS = IKEEP + 3 * id%N 1858 NE = IKEEP + 2 * id%N 1859 NA = IKEEP + id%N 1860 FRERE = FILS + id%N 1861 NFSIZ = FRERE + id%N 1862 IF ( KEEP(38) .NE. 0 ) THEN 1863C ------------------------- 1864C Initialize root structure 1865C ------------------------- 1866 CALL ZMUMPS_INIT_ROOT_ANA( id%MYID, 1867 & id%NSLAVES, id%N, id%root, 1868 & id%COMM_NODES, KEEP( 38 ), id%FILS(1), 1869 & id%KEEP(50), id%KEEP(46), 1870 & id%KEEP(51) 1871 & , id%KEEP(60), id%NPROW, id%NPCOL, id%MBLOCK, id%NBLOCK 1872 & ) 1873 ELSE 1874 id%root%yes = .FALSE. 1875 END IF 1876 IF ( KEEP(38) .NE. 0 .and. I_AM_SLAVE ) THEN 1877C ----------------------------------------------- 1878C Check if at least one processor belongs to the 1879C root. In the case where all of them have MYROW 1880C equal to -1, this could be a problem due to the 1881C BLACS. (mpxlf90_r and IBM BLACS). 1882C ----------------------------------------------- 1883 CALL MPI_ALLREDUCE(id%root%MYROW, MYROW_CHECK, 1, 1884 & MPI_INTEGER, MPI_MAX, id%COMM_NODES, IERR) 1885 IF ( MYROW_CHECK .eq. -1) THEN 1886 INFO(1) = -25 1887 INFO(2) = 0 1888 END IF 1889 IF ( id%root%MYROW .LT. -1 .OR. 1890 & id%root%MYCOL .LT. -1 ) THEN 1891 INFO(1) = -25 1892 INFO(2) = 0 1893 END IF 1894 IF ( LPOK .AND. INFO(1) == -25 ) THEN 1895 WRITE(LP, '(A)') 1896 & 'Problem with your version of the BLACS.' 1897 WRITE(LP, '(A)') 'Try using a BLACS version from netlib.' 1898 ENDIF 1899 END IF 1900C ---------------- 1901C Check for errors 1902C ---------------- 1903 CALL MUMPS_PROPINFO( ICNTL(1), INFO(1), 1904 & id%COMM, id%MYID ) 1905 IF ( INFO(1).LT.0 ) RETURN 1906 IF ( I_AM_SLAVE ) THEN 1907C 1908C 1909 IF (KEEP(55) .EQ. 0) THEN 1910 CALL ZMUMPS_ANA_DIST_ARROWHEADS( id%MYID, 1911 & id%NSLAVES, id%N, id%PROCNODE_STEPS(1), 1912 & id%STEP(1), id%PTRAR(1), 1913 & id%PTRAR(id%N +1), 1914 & id%ISTEP_TO_INIV2(1), id%I_AM_CAND(1), 1915 & KEEP(1),KEEP8(1), ICNTL(1), id ) 1916 ELSE 1917 CALL ZMUMPS_ANA_DIST_ELEMENTS( id%MYID, 1918 & id%NSLAVES, id%N, id%PROCNODE_STEPS(1), 1919 & id%STEP(1), 1920 & id%PTRAR(1), 1921 & id%PTRAR(id%NELT+2 ), 1922 & id%NELT, 1923 & id%FRTPTR(1), id%FRTELT(1), 1924 & KEEP(1), KEEP8(1), ICNTL(1), id%KEEP(50) ) 1925 ENDIF 1926 ENDIF 1927C ----------------------------------------- 1928C Perform some local analysis on the slaves 1929C to estimate the size of the working space 1930C for factorization 1931C ----------------------------------------- 1932 IF ( I_AM_SLAVE ) THEN 1933 locI_AM_CAND => id%I_AM_CAND 1934 locMYID_NODES = id%MYID_NODES 1935 locMYID = id%MYID 1936C 1937C Precompute estimates of local_m,local_n 1938C (number of rows/columns mapped on each processor) 1939C in case of parallel root node. 1940C 1941 IF ( id%root%yes ) THEN 1942 LOCAL_M = numroc( id%ND_STEPS(id%STEP(KEEP(38))), 1943 & id%root%MBLOCK, id%root%MYROW, 0, 1944 & id%root%NPROW ) 1945 LOCAL_M = max(1, LOCAL_M) 1946 LOCAL_N = numroc( id%ND_STEPS(id%STEP(KEEP(38))), 1947 & id%root%NBLOCK, id%root%MYCOL, 0, 1948 & id%root%NPCOL ) 1949 ELSE 1950 LOCAL_M = 0 1951 LOCAL_N = 0 1952 END IF 1953 IF ( KEEP(60) .EQ. 2 .OR. KEEP(60) .EQ. 3 ) THEN 1954C Return minimum nb rows/cols to user 1955 id%SCHUR_MLOC=LOCAL_M 1956 id%SCHUR_NLOC=LOCAL_N 1957C Also store them in root structure for convenience 1958 id%root%SCHUR_MLOC=LOCAL_M 1959 id%root%SCHUR_NLOC=LOCAL_N 1960 ENDIF 1961 IF ( .NOT. associated(id%CANDIDATES)) THEN 1962 ALLOCATE(id%CANDIDATES(id%NSLAVES+1,1)) 1963 ENDIF 1964 CALL ZMUMPS_ANA_DISTM( locMYID_NODES, id%N, 1965 & id%STEP(1), id%FRERE_STEPS(1), id%FILS(1), 1966 & id%NA(1), id%LNA, id%NE_STEPS(1), id%DAD_STEPS(1), 1967 & id%ND_STEPS(1), id%PROCNODE_STEPS(1), 1968 & id%NSLAVES, 1969 & KEEP8(11), KEEP(26), KEEP(15), 1970 & KEEP8(12), ! formerly KEEP(16), 1971 & KEEP8(14), ! formerly KEEP(200), 1972 & KEEP(224), KEEP(225), 1973 & KEEP(27), RINFO(1), 1974 & KEEP(1), KEEP8(1), LOCAL_M, LOCAL_N, SBUF_RECOLD8, 1975 & SBUF_SEND, SBUF_REC, id%COST_SUBTREES, KEEP(28), 1976 & locI_AM_CAND(1), max(KEEP(56),1), 1977 & id%ISTEP_TO_INIV2(1), id%CANDIDATES(1,1), 1978 & INFO(1), INFO(2) 1979 & ,KEEP8(15) 1980 & ,MAX_SIZE_FACTOR_TMP, KEEP8(9) 1981 & ,ENTRIES_IN_FACTORS_LOC_MASTERS 1982 & ,id%root%yes, id%root%NPROW, id%root%NPCOL 1983 & ) 1984 IF(ASSOCIATED(locI_AM_CAND)) NULLIFY(locI_AM_CAND) 1985 id%MAX_SURF_MASTER = KEEP8(15) 1986C 1987 KEEP8(19)=MAX_SIZE_FACTOR_TMP 1988 KEEP( 29 ) = KEEP(15) + 2* max(KEEP(12),10) 1989 & * ( KEEP(15) / 100 + 1) 1990C Relaxed value of size of IS is not needed internally; 1991C we save it directly in INFO(19) 1992 INFO( 19 ) = KEEP(225) + 2* max(KEEP(12),10) 1993 & * ( KEEP(225) / 100 + 1) 1994C size of S 1995 KEEP8(13) = KEEP8(12) + int(KEEP(12),8) * 1996 & ( KEEP8(12) / 100_8 + 1_8 ) 1997C size of S 1998 KEEP8(17) = KEEP8(14) + int(KEEP(12),8) * 1999 & ( KEEP8(14) /100_8 +1_8) 2000C KEEP8( 22 ) is the OLD maximum size of receive buffer 2001C that includes CB related communications. 2002C KEEP( 43 ) : min size for send buffer 2003C KEEP( 44 ) : min size for receive buffer 2004C KEEP(43-44) kept for allocating buffers during 2005C factorization phase 2006 CALL MUMPS_ALLREDUCEI8 ( SBUF_RECOLD8, KEEP8(22), MPI_MAX, 2007 & id%COMM_NODES ) 2008C We do a max with KEEP(27)=maxfront because for small 2009C buffers, we need at least one row of cb to be sent/ 2010C received. 2011 SBUF_SEND = max(SBUF_SEND,KEEP(27)) 2012 SBUF_REC = max(SBUF_REC ,KEEP(27)) 2013 CALL MPI_ALLREDUCE (SBUF_REC, KEEP(44), 1, 2014 & MPI_INTEGER, MPI_MAX, 2015 & id%COMM_NODES, IERR) 2016 IF (KEEP(48)==5) THEN 2017 KEEP(43)=KEEP(44) 2018 ELSE 2019 KEEP(43)=SBUF_SEND 2020 ENDIF 2021C 2022 MIN_BUF_SIZE8 = KEEP8(22) / int(KEEP(238),8) 2023 MIN_BUF_SIZE8 = min( MIN_BUF_SIZE8, int(huge (KEEP(43)),8)) 2024 MIN_BUF_SIZE = int( MIN_BUF_SIZE8 ) 2025C 2026 KEEP(44) = max(KEEP(44), MIN_BUF_SIZE) 2027 KEEP(43) = max(KEEP(43), MIN_BUF_SIZE) 2028 IF ( PROK ) THEN 2029 WRITE(MP,'(A,I16) ') 2030 & ' Estimated INTEGER space for factors :', 2031 & KEEP(26) 2032 WRITE(MP,'(A,I16) ') 2033 & ' INFO(3), est. complex space to store factors:', 2034 & KEEP8(11) 2035 WRITE(MP,'(A,I16) ') 2036 & ' Estimated number of entries in factors :', 2037 & KEEP8(9) 2038 WRITE(MP,'(A,I16) ') 2039 & ' Current value of space relaxation parameter :', 2040 & KEEP(12) 2041 WRITE(MP,'(A,I16) ') 2042 & ' Estimated size of IS (In Core factorization):', 2043 & KEEP(29) 2044 WRITE(MP,'(A,I16) ') 2045 & ' Estimated size of S (In Core factorization):', 2046 & KEEP8(13) 2047 WRITE(MP,'(A,I16) ') 2048 & ' Estimated size of S (OOC factorization) :', 2049 & KEEP8(17) 2050 END IF 2051 ELSE 2052C --------------------- 2053C Master is not working 2054C --------------------- 2055 ENTRIES_IN_FACTORS_LOC_MASTERS = 0_8 2056 KEEP8(13) = 0_8 2057 KEEP(29) = 0 2058 KEEP8(17)= 0_8 2059 INFO(19) = 0 2060 KEEP8(11) = 0_8 2061 KEEP(26) = 0 2062 KEEP(27) = 0 2063 RINFO(1) = 0.0D0 2064 END IF 2065C -------------------------------------- 2066C KEEP8( 26 ) : Real arrowhead size 2067C KEEP8( 27 ) : Integer arrowhead size 2068C INFO(3)/KEEP8( 11 ) : Estimated real space needed for factors 2069C INFO(4)/KEEP( 26 ) : Estimated integer space needed for factors 2070C INFO(5)/KEEP( 27 ) : Estimated max front size 2071C KEEP8(109) : Estimated number of entries in factor 2072C (based on ENTRIES_IN_FACTORS_LOC_MASTERS computed 2073C during ZMUMPS_ANA_DISTM, where we assume 2074C that each master of a node computes 2075C the complete factor size. 2076C -------------------------------------- 2077 CALL MUMPS_ALLREDUCEI8( ENTRIES_IN_FACTORS_LOC_MASTERS, 2078 & KEEP8(109), MPI_SUM, id%COMM) 2079 CALL MUMPS_ALLREDUCEI8( KEEP8(19), KEEP8(119), 2080 & MPI_MAX, id%COMM) 2081 CALL MPI_ALLREDUCE( KEEP(27), KEEP(127), 1, 2082 & MPI_INTEGER, MPI_MAX, 2083 & id%COMM, IERR) 2084 CALL MPI_ALLREDUCE( KEEP(26), KEEP(126), 1, 2085 & MPI_INTEGER, MPI_SUM, 2086 & id%COMM, IERR) 2087 CALL MUMPS_REDUCEI8( KEEP8(11), KEEP8(111), MPI_SUM, 2088 & MASTER, id%COMM ) 2089 CALL MUMPS_SETI8TOI4( KEEP8(111), INFOG(3) ) 2090C -------------- 2091C Flops estimate 2092C -------------- 2093 CALL MPI_ALLREDUCE( RINFO(1), RINFOG(1), 1, 2094 & MPI_DOUBLE_PRECISION, MPI_SUM, 2095 & id%COMM, IERR) 2096 CALL MUMPS_SETI8TOI4( KEEP8(11), INFO(3) ) 2097 INFO ( 4 ) = KEEP( 26 ) 2098 INFO ( 5 ) = KEEP( 27 ) 2099 INFO ( 7 ) = KEEP( 29 ) 2100 CALL MUMPS_SETI8TOI4( KEEP8(13), INFO(8) ) 2101 CALL MUMPS_SETI8TOI4( KEEP8(17), INFO(20) ) 2102 CALL MUMPS_SETI8TOI4( KEEP8(9), INFO(24) ) 2103 INFOG( 4 ) = KEEP( 126 ) 2104 INFOG( 5 ) = KEEP( 127 ) 2105 CALL MUMPS_SETI8TOI4( KEEP8(109), INFOG(20) ) 2106 CALL ZMUMPS_DIAG_ANA(id%MYID, id%COMM, KEEP(1), KEEP8(1), 2107 & INFO(1), INFOG(1), RINFO(1), RINFOG(1), ICNTL(1)) 2108C ========================= 2109C IN-CORE MEMORY STATISTICS 2110C ========================= 2111 OOC_STAT = KEEP(201) 2112 IF (KEEP(201) .NE. -1) OOC_STAT=0 ! We want in-core statistics 2113 PERLU_ON = .FALSE. ! switch off PERLU to compute KEEP8(2) 2114 CALL ZMUMPS_MAX_MEM( KEEP(1), KEEP8(1), 2115 & id%MYID, id%N, id%NELT, id%NA(1), id%LNA, id%KEEP8(28), 2116 & id%KEEP8(30), 2117 & id%NSLAVES, TOTAL_MBYTES, .FALSE., 2118 & OOC_STAT, PERLU_ON, TOTAL_BYTES) 2119 KEEP8(2) = TOTAL_BYTES 2120 PERLU_ON = .TRUE. 2121 CALL ZMUMPS_MAX_MEM( KEEP(1), KEEP8(1), 2122 & id%MYID, id%N, id%NELT, id%NA(1), id%LNA, id%KEEP8(28), 2123 & id%KEEP8(30), 2124 & id%NSLAVES, TOTAL_MBYTES, .FALSE., 2125 & OOC_STAT, PERLU_ON, TOTAL_BYTES) 2126 IF ( PROK ) THEN 2127 WRITE(MP,'(A,I10) ') 2128 & ' Estimated space in MBYTES for IC factorization :', 2129 & TOTAL_MBYTES 2130 END IF 2131 id%INFO(15) = TOTAL_MBYTES 2132C 2133C Centralize memory statistics on the host 2134C 2135C INFOG(16) = after analysis, est. mem size in Mbytes for facto, 2136C for the processor using largest memory 2137C INFOG(17) = after analysis, est. mem size in Mbytes for facto, 2138C sum over all processors 2139C INFOG(18/19) = idem at facto. 2140C 2141 CALL MUMPS_MEM_CENTRALIZE( id%MYID, id%COMM, 2142 & id%INFO(15), id%INFOG(16), IRANK ) 2143 IF ( PROKG ) THEN 2144 WRITE( MPG,'(A,I16) ') 2145 & ' ** Rank of proc needing largest memory in IC facto :', 2146 & IRANK 2147 WRITE( MPG,'(A,I16) ') 2148 & ' ** Estimated corresponding MBYTES for IC facto :', 2149 & id%INFOG(16) 2150 IF ( KEEP(46) .eq. 0 ) THEN 2151C Host not working 2152 WRITE( MPG,'(A,I16) ') 2153 & ' ** Estimated avg. MBYTES per work. proc at facto (IC) :' 2154 & ,(id%INFOG(17)-id%INFO(15))/id%NSLAVES 2155 ELSE 2156 WRITE( MPG,'(A,I16) ') 2157 & ' ** Estimated avg. MBYTES per work. proc at facto (IC) :' 2158 & ,id%INFOG(17)/id%NSLAVES 2159 END IF 2160 WRITE(MPG,'(A,I16) ') 2161 & ' ** TOTAL space in MBYTES for IC factorization :' 2162 & ,id%INFOG(17) 2163 END IF 2164C ========================================= 2165C NOW COMPUTE OUT-OF-CORE MEMORY STATISTICS 2166C (except when OOC_STAT is equal to -1 in 2167C which case IC and OOC statistics are 2168C identical) 2169C ========================================= 2170 OOC_STAT = KEEP(201) 2171#if defined(OLD_OOC_NOPANEL) 2172 IF (OOC_STAT .NE. -1) OOC_STAT=2 2173#else 2174 IF (OOC_STAT .NE. -1) OOC_STAT=1 2175#endif 2176 PERLU_ON = .FALSE. ! PERLU NOT taken into account 2177C Used to compute KEEP8(3) (minimum number of bytes for OOC) 2178 CALL ZMUMPS_MAX_MEM( KEEP(1), KEEP8(1), 2179 & id%MYID, id%N, id%NELT, id%NA(1), id%LNA, id%KEEP8(28), 2180 & id%KEEP8(30), 2181 & id%NSLAVES, TOTAL_MBYTES, .FALSE., 2182 & OOC_STAT, PERLU_ON, TOTAL_BYTES) 2183 KEEP8(3) = TOTAL_BYTES 2184 PERLU_ON = .TRUE. ! PERLU taken into account 2185 CALL ZMUMPS_MAX_MEM( KEEP(1), KEEP8(1), 2186 & id%MYID, id%N, id%NELT, id%NA(1), id%LNA, id%KEEP8(28), 2187 & id%KEEP8(30), 2188 & id%NSLAVES, TOTAL_MBYTES, .FALSE., 2189 & OOC_STAT, PERLU_ON, TOTAL_BYTES) 2190 id%INFO(17) = TOTAL_MBYTES 2191 CALL MUMPS_MEM_CENTRALIZE( id%MYID, id%COMM, 2192 & id%INFO(17), id%INFOG(26), IRANK ) 2193 IF ( PROKG ) THEN 2194 WRITE( MPG,'(A,I16) ') 2195 & ' ** Rank of proc needing largest memory for OOC facto :', 2196 & IRANK 2197 WRITE( MPG,'(A,I16) ') 2198 & ' ** Estimated corresponding MBYTES for OOC facto :', 2199 & id%INFOG(26) 2200 IF ( KEEP(46) .eq. 0 ) THEN 2201C Host not working 2202 WRITE( MPG,'(A,I16) ') 2203 & ' ** Estimated avg. MBYTES per work. proc at facto (OOC) :' 2204 & ,(id%INFOG(27)-id%INFO(15))/id%NSLAVES 2205 ELSE 2206 WRITE( MPG,'(A,I16) ') 2207 & ' ** Estimated avg. MBYTES per work. proc at facto (OOC) :' 2208 & ,id%INFOG(27)/id%NSLAVES 2209 END IF 2210 WRITE(MPG,'(A,I16) ') 2211 & ' ** TOTAL space in MBYTES for OOC factorization :' 2212 & ,id%INFOG(27) 2213 END IF 2214c #endif 2215C ------------------------- 2216C Define a specific mapping 2217C for the user 2218C ------------------------- 2219 IF ( id%MYID. eq. MASTER .AND. KEEP(54) .eq. 1 ) THEN 2220 IF (associated( id%MAPPING)) 2221 & deallocate( id%MAPPING) 2222 allocate( id%MAPPING(id%KEEP8(28)), stat=allocok) 2223 IF ( allocok .GT. 0 ) THEN 2224 INFO(1) = -7 2225 CALL MUMPS_SETI8TOI4(id%KEEP8(28), INFO(2)) 2226 IF ( LPOK ) THEN 2227 WRITE(LP, 150) 'id%MAPPING' 2228 END IF 2229 GOTO 92 2230 END IF 2231 allocate(IWtemp( id%N ), stat=allocok) 2232 IF ( allocok .GT. 0 ) THEN 2233 INFO(1)=-7 2234 INFO(2)=id%N 2235 IF ( LPOK ) THEN 2236 WRITE(LP, 150) 'IWtemp(N)' 2237 END IF 2238 GOTO 92 2239 END IF 2240 CALL ZMUMPS_BUILD_MAPPING( 2241 & id%N, id%MAPPING(1), id%KEEP8(28), 2242 & id%IRN(1),id%JCN(1), id%PROCNODE_STEPS(1), 2243 & id%STEP(1), 2244 & id%NSLAVES, id%SYM_PERM(1), 2245 & id%FILS(1), IWtemp, id%KEEP(1),id%KEEP8(1), 2246 & id%root%MBLOCK, id%root%NBLOCK, 2247 & id%root%NPROW, id%root%NPCOL ) 2248 deallocate( IWtemp ) 2249 92 CONTINUE 2250 END IF 2251 CALL MUMPS_PROPINFO( ICNTL(1), INFO(1), 2252 & id%COMM, id%MYID ) 2253 IF ( INFO(1) .LT. 0 ) RETURN 2254 KEEP8(26)=max(1_8,KEEP8(26)) 2255 KEEP8(27)=max(1_8,KEEP8(27)) 2256 RETURN 2257 110 FORMAT(/' ****** ANALYSIS STEP ********'/) 2258 150 FORMAT( 2259 & /' ** FAILURE DURING ZMUMPS_ANA_DRIVER, DYNAMIC ALLOCATION OF', 2260 & A30) 2261 END SUBROUTINE ZMUMPS_ANA_DRIVER 2262 SUBROUTINE ZMUMPS_ANA_CHECK_KEEP(id) 2263C This subroutine decodes the control parameters, 2264C stores them in the KEEP array, and performs a 2265C consistency check on the KEEP array. 2266 USE ZMUMPS_STRUC_DEF 2267 IMPLICIT NONE 2268 TYPE(ZMUMPS_STRUC) :: id 2269C internal variables 2270 INTEGER :: LP, MP, MPG, I 2271 INTEGER :: MASTER 2272 LOGICAL :: PROK, PROKG, LPOK 2273 PARAMETER( MASTER = 0 ) 2274 LP = id%ICNTL( 1 ) 2275 MP = id%ICNTL( 2 ) 2276 MPG = id%ICNTL( 3 ) 2277C LP : errors 2278C MP : INFO 2279 LPOK = ((LP.GT.0).AND.(id%ICNTL(4).GE.1)) 2280 PROK = (( MP .GT. 0 ).AND.(id%ICNTL(4).GE.2)) 2281 PROKG = ( MPG .GT. 0 .and. id%MYID .eq. MASTER ) 2282 PROKG = (PROKG.AND.(id%ICNTL(4).GE.2)) 2283C Fwd in facto 2284 IF (id%MYID.eq.MASTER) THEN 2285 id%KEEP(256) = id%ICNTL(7) ! copy ordering option 2286 id%KEEP(252) = id%ICNTL(32) 2287 IF (id%KEEP(252) < 0 .OR. id%KEEP(252) > 1 ) THEN 2288 id%KEEP(252) = 0 2289 ENDIF 2290C Which factors to store 2291 id%KEEP(251) = id%ICNTL(31) 2292 IF (id%KEEP(251) < 0 .OR. id%KEEP(251) > 2 ) THEN 2293 id%KEEP(251)=0 2294 ENDIF 2295C For unsymmetric matrices, if forward solve 2296C performed during facto, 2297C no reason to store L factors at all. Reset 2298C KEEP(251) accordingly... except if the user 2299C tells that no solve is needed. 2300 IF (id%KEEP(50) .EQ. 0 .AND. id%KEEP(252).EQ.1) THEN 2301 IF (id%KEEP(251) .NE. 1) id%KEEP(251) = 2 2302 ENDIF 2303C Symmetric case, even if no backward needed, 2304C store all factors 2305 IF (id%KEEP(50) .NE.0 .AND. id%KEEP(251) .EQ. 2) THEN 2306 id%KEEP(251) = 0 2307 ENDIF 2308C Case of solve not needed: 2309 IF (id%KEEP(251) .EQ. 1) THEN 2310 id%KEEP(201) = -1 2311C In that case, id%ICNTL(22) will 2312C be ignored in future phases 2313 ENDIF 2314 IF (id%KEEP(252).EQ.1) THEN 2315 id%KEEP(253) = id%NRHS 2316 IF (id%KEEP(253) .LE. 0) THEN 2317 id%INFO(1)=-42 2318 id%INFO(2)=id%NRHS 2319 RETURN 2320 ENDIF 2321 ELSE 2322 id%KEEP(253) = 0 2323 ENDIF 2324 ENDIF 2325 IF ( (id%KEEP(24).NE.0) .AND. 2326 & id%NSLAVES.eq.1 ) THEN 2327 id%KEEP(24) = 0 2328 IF ( PROKG ) THEN 2329 WRITE(MPG, '(A)') 2330 & ' Resetting candidate strategy to 0 because NSLAVES=1' 2331 WRITE(MPG, '(A)') ' ' 2332 END IF 2333 END IF 2334 IF ( (id%KEEP(24).EQ.0) .AND. 2335 & id%NSLAVES.GT.1 ) THEN 2336 id%KEEP(24) = 8 2337 ENDIF 2338 IF ( (id%KEEP(24).NE.0) .AND. (id%KEEP(24).NE.1) .AND. 2339 & (id%KEEP(24).NE.8) .AND. (id%KEEP(24).NE.10) .AND. 2340 & (id%KEEP(24).NE.12) .AND. (id%KEEP(24).NE.14) .AND. 2341 & (id%KEEP(24).NE.16) .AND. (id%KEEP(24).NE.18)) THEN 2342 id%KEEP(24) = 8 2343 IF ( PROKG ) THEN 2344 WRITE(MPG, '(A)') 2345 & ' Resetting candidate strategy to 8 ' 2346 WRITE(MPG, '(A)') ' ' 2347 END IF 2348 END IF 2349C**************************************************** 2350C 2351C The master is doing most of the work 2352C 2353C NOTE: Treatment of the errors on the master= 2354C Go to the next SPMD part of the code in which 2355C the first statement must be a call to PROPINFO 2356C 2357C**************************************************** 2358C ========================================= 2359C Check (raise error or modify) some input 2360C parameters or KEEP values on the master. 2361C ========================================= 2362 id%KEEP8(21) = int(id%KEEP(85),8) 2363 IF ( id%MYID .EQ. MASTER ) THEN 2364C -- OOC/Incore strategy 2365 IF (id%KEEP(201).NE.-1) THEN 2366 id%KEEP(201)=id%ICNTL(22) 2367 IF (id%KEEP(201) .GT. 0) THEN 2368#if defined(OLD_OOC_NOPANEL) 2369 id%KEEP(201)=2 2370#else 2371 id%KEEP(201)=1 2372#endif 2373 ENDIF 2374 ENDIF 2375C 2376C ---------------------------- 2377C Save id%ICNTL(18) (distributed 2378C matrix on entry) in id%KEEP(54) 2379C ---------------------------- 2380 id%KEEP(54) = id%ICNTL(18) 2381 IF ( id%KEEP(54) .LT. 0 .or. id%KEEP(54).GT.3 ) THEN 2382 IF ( PROKG ) THEN 2383 WRITE(MPG, *) ' Out-of-range value for id%ICNTL(18).' 2384 WRITE(MPG, *) ' Used 0 ie matrix not distributed' 2385 END IF 2386 id%KEEP(54) = 0 2387 END IF 2388 IF ( id%KEEP(54) .EQ. 1 ) THEN 2389 IF ( PROKG ) THEN 2390 WRITE(MPG, *) ' Option kept for backward compatibility.' 2391 WRITE(MPG, *) ' We recommend not to use it.' 2392 WRITE(MPG, *) ' It will disappear in a future release' 2393 END IF 2394 END IF 2395C ----------------------------------------- 2396C Save id%ICNTL(5) (matrix format) in id%KEEP(55) 2397C ----------------------------------------- 2398 id%KEEP(55) = id%ICNTL(5) 2399 IF ( id%KEEP(55) .LT. 0 .OR. id%KEEP(55) .GT. 1 ) THEN 2400 IF ( PROKG ) THEN 2401 WRITE(MPG, *) ' Out-of-range value for id%ICNTL(5).' 2402 WRITE(MPG, *) ' Used 0 ie matrix is assembled' 2403 END IF 2404 id%KEEP(55) = 0 2405 END IF 2406 id%KEEP(60) = id%ICNTL(19) 2407 IF ( id%KEEP( 60 ) .LE. 0 ) id%KEEP( 60 ) = 0 2408 IF ( id%KEEP( 60 ) .GT. 3 ) id%KEEP( 60 ) = 0 2409 IF (id%KEEP(60) .NE. 0 .AND. id%SIZE_SCHUR == 0 ) THEN 2410 IF (PROKG) THEN 2411 WRITE(MPG,'(A)') 2412 & ' ** Schur option ignored because SIZE_SCHUR=0' 2413 ENDIF 2414 id%KEEP(60)=0 2415 END IF 2416C --------------------------------------- 2417C Save SIZE_SCHUR in a KEEP, for possible 2418C check at factorization and solve phases 2419C --------------------------------------- 2420 IF ( id%KEEP(60) .NE.0 ) THEN 2421 id%KEEP(116) = id%SIZE_SCHUR 2422 IF (id%SIZE_SCHUR .LT. 0 .OR. id%SIZE_SCHUR .GE. id%N) THEN 2423 id%INFO(1)=-49 2424 id%INFO(2)=id%SIZE_SCHUR 2425 RETURN 2426 ENDIF 2427C List of Schur variables provided by user. 2428 IF ( .NOT. associated( id%LISTVAR_SCHUR ) ) THEN 2429 id%INFO(1) = -22 2430 id%INFO(2) = 8 2431 RETURN 2432 ELSE IF (size(id%LISTVAR_SCHUR)<id%SIZE_SCHUR) THEN 2433 id%INFO(1) = -22 2434 id%INFO(2) = 8 2435 RETURN 2436 END IF 2437 ENDIF 2438 IF (id%KEEP(60) .EQ. 3 .AND. id%KEEP(50).NE.0) THEN 2439 IF (id%MBLOCK > 0 .AND. id%NBLOCK > 0 .AND. 2440 & id%NPROW > 0 .AND. id%NPCOL > 0 ) THEN 2441 IF (id%NPROW *id%NPCOL .LE. id%NSLAVES) THEN 2442C We will eventually have to "symmetrize the 2443C Schur complement. For that NBLOCK and MBLOCK 2444C must be equal. 2445 IF (id%MBLOCK .NE. id%NBLOCK ) THEN 2446 id%INFO(1)=-31 2447 id%INFO(2)=id%MBLOCK - id%NBLOCK 2448 RETURN 2449 ENDIF 2450 ENDIF 2451 ENDIF 2452 ENDIF 2453C Check the ordering strategy and compatibility with 2454C other control parameters 2455 id%KEEP(244) = id%ICNTL(28) 2456 id%KEEP(245) = id%ICNTL(29) 2457#if ! defined(parmetis) && ! defined(parmetis3) 2458 IF ((id%KEEP(244) .EQ. 2) .AND. (id%KEEP(245) .EQ. 2)) THEN 2459 id%INFO(1) = -38 2460 IF ( LPOK ) THEN 2461 WRITE(LP,'("ParMETIS not available.")') 2462 END IF 2463 RETURN 2464 END IF 2465#endif 2466#if ! defined(ptscotch) 2467 IF ((id%KEEP(244) .EQ. 2) .AND. (id%KEEP(245) .EQ. 1)) THEN 2468 id%INFO(1) = -38 2469 IF ( LPOK ) THEN 2470 WRITE(LP,'("PT-SCOTCH not available.")') 2471 END IF 2472 RETURN 2473 END IF 2474#endif 2475C Analysis strategy is set to automatic in case of out-of-range values. 2476 IF((id%KEEP(244) .GT. 2) .OR. 2477 & (id%KEEP(244) .LT. 0)) id%KEEP(244)=0 2478 IF(id%KEEP(244) .EQ. 0) THEN ! Automatic 2479C One could check for availability of parallel ordering 2480C tools, or for possible options incompatible with // 2481C analysis to decide (e.g. avoid returning an error if 2482C // analysis not compatible with some option but user 2483C lets MUMPS decide to choose sequential or paralllel 2484C analysis) 2485C Current strategy for automatic is sequential analysis 2486 id%KEEP(244) = 1 2487 ELSE IF (id%KEEP(244) .EQ. 2) THEN 2488 IF(id%KEEP(55) .NE. 0) THEN 2489 id%INFO(1) = -39 2490 IF (LPOK) THEN 2491 WRITE(LP, 2492 & '("Incompatible values for ICNTL(5), ICNTL(28)")') 2493 WRITE(LP, 2494 & '("Parallel analysis is not possible if the")') 2495 WRITE(LP, 2496 & '("matrix is not assembled")') 2497 ENDIF 2498 RETURN 2499 ELSE IF(id%KEEP(60) .NE. 0) THEN 2500 id%INFO(1) = -39 2501 IF (LPOK) THEN 2502 WRITE(LP, 2503 & '("Incompatible values for ICNTL(19), ICNTL(28)")') 2504 WRITE(LP, 2505 & '("Parallel analysis is not possible if SCHUR")') 2506 WRITE(LP, 2507 & '("complement must be returned")') 2508 ENDIF 2509 RETURN 2510 END IF 2511C In the case where there are too few processes to do 2512C the parallel analysis we simply revert to sequential version 2513 IF(id%NSLAVES .LT. 2) THEN 2514 id%KEEP(244) = 1 2515 IF(PROKG) WRITE(MPG, 2516 & '("Too few processes. 2517 & Reverting to sequential analysis")',advance='no') 2518 IF(id%KEEP(245) .EQ. 1) THEN 2519C Scotch necessarily available because pt-scotch 2520C is, otherwise an error would have occurred 2521 IF(PROKG) WRITE(MPG, '(" with SCOTCH.")') 2522 id%KEEP(256) = 3 2523 ELSE IF(id%KEEP(245) .EQ. 2) THEN 2524C Metis necessarily available because parmetis 2525C is, otherwise an error would have occurred 2526 IF(PROKG) WRITE(MPG, '(" with Metis.")') 2527 id%KEEP(256) = 5 2528 ELSE 2529 IF(PROKG) WRITE(MPG, '(".")') 2530 id%KEEP(256) = 7 2531 END IF 2532 END IF 2533C In the case where there the input matrix is too small to do 2534C the parallel analysis we simply revert to sequential version 2535 IF(id%N .LE. 50) THEN 2536 id%KEEP(244) = 1 2537 IF(PROKG) WRITE(MPG, 2538 & '("Input matrix is too small for the parallel 2539 & analysis. Reverting to sequential analysis")',advance='no') 2540 IF(id%KEEP(245) .EQ. 1) THEN 2541 IF(PROKG) WRITE(MPG, '(" with SCOTCH.")') 2542 id%KEEP(256) = 3 2543 ELSE IF(id%KEEP(245) .EQ. 2) THEN 2544 IF(PROKG) WRITE(MPG, '(" with Metis.")') 2545 id%KEEP(256) = 5 2546 ELSE 2547 IF(PROKG) WRITE(MPG, '(".")') 2548 id%KEEP(256) = 7 2549 END IF 2550 END IF 2551 END IF 2552 id%INFOG(32) = id%KEEP(244) 2553 IF ( (id%KEEP(244) .EQ. 1) .AND. 2554 & (id%KEEP(256) .EQ. 1) ) THEN 2555C ordering given, PERM_IN must be of size N 2556 IF ( .NOT. associated( id%PERM_IN ) ) THEN 2557 id%INFO(1) = -22 2558 id%INFO(2) = 3 2559 RETURN 2560 ELSE IF ( size( id%PERM_IN ) < id%N ) THEN 2561 id%INFO(1) = -22 2562 id%INFO(2) = 3 2563 RETURN 2564 END IF 2565 ENDIF 2566C Check KEEP(9-10) for level 2 2567 IF (id%KEEP(9) .LE. 1 ) id%KEEP(9) = 500 2568 IF ( id%KEEP8(21) .GT. 0_8 ) THEN 2569 IF ((id%KEEP8(21).LE.1_8) .OR. 2570 & (id%KEEP8(21).GT.int(id%KEEP(9),8))) 2571 & id%KEEP8(21) = int(min(id%KEEP(9),100),8) 2572 ENDIF 2573C 2574 IF (id%KEEP(48). EQ. 1 ) id%KEEP(48) = -12345 2575C 2576 IF ( (id%KEEP(48).LT.0) .OR. (id%KEEP(48).GT.5) ) THEN 2577 id%KEEP(48)=5 2578 ENDIF 2579C Schur 2580C Given ordering must be compatible with Schur variables. 2581 IF ( (id%KEEP(60) .NE. 0) .AND. (id%KEEP(256) .EQ. 1) ) THEN 2582 DO I = 1, id%SIZE_SCHUR 2583 IF (id%PERM_IN(id%LISTVAR_SCHUR(I)) 2584 & .EQ. id%N-id%SIZE_SCHUR+I) 2585 & CYCLE 2586C ------------------------------- 2587C Problem with PERM_IN: -22/3 2588C Above constrained explained in 2589C doc of PERM_IN in user guide. 2590C ------------------------------- 2591 id%INFO(1) = -4 2592 id%INFO(2) = id%LISTVAR_SCHUR(I) 2593 RETURN 2594 IF (PROKG) THEN 2595 WRITE(MPG,'(A)') 2596 & ' ** Ignoring user-ordering, because incompatible with Schur.' 2597 WRITE(MPG,'(A)') ' ** id%ICNTL(7) treated as 0.' 2598 END IF 2599 EXIT 2600 ENDDO 2601 END IF 2602C 2603C Note that schur is not compatible with 2604C 2605C 1/Max-trans DONE 2606C 2/Null space 2607C 3/Ordering given DONE 2608C 4/Scaling 2609C 5/Iterative Refinement 2610C 6/Error analysis 2611C 7/Parallel Analysis 2612* 2613* Graph modification prior to ordering (id%ICNTL(12) option) 2614* id%KEEP (95) will hold the eventually modified value of id%ICNTL(12) 2615* 2616 id%KEEP(95) = id%ICNTL(12) 2617 IF (id%KEEP(50).NE.2) id%KEEP(95) = 1 2618 IF ((id%KEEP(95).GT.3).OR.(id%KEEP(95).LT.0)) id%KEEP(95) = 0 2619C MAX-TRANS 2620C 2621C id%KEEP (23) will hold the eventually modified value of id%ICNTL(6) 2622C (maximum transversal if >= 1) 2623C 2624 id%KEEP(23) = id%ICNTL(6) 2625C 2626C 2627C -------------------------------------------- 2628C Avoid max-trans unsymmetric permutation in case of 2629C ordering is given, 2630C or matrix is in element form, or Schur is asked 2631C or initial matrix is distributed 2632C -------------------------------------------- 2633 IF (id%KEEP(23).LT.0.OR.id%KEEP(23).GT.7) id%KEEP(23) = 7 2634C still forbid max trans for LLT 2635 IF ( id%KEEP(50) .EQ. 1 ) THEN 2636 IF (id%KEEP(23) .NE. 0) THEN 2637 IF (PROKG) THEN 2638 WRITE(MPG,'(A)') 2639 & ' ** Max-trans not compatible with LLT factorization' 2640 END IF 2641 id%KEEP(23) = 0 2642 ENDIF 2643 IF (id%KEEP(95) .GT. 1) THEN 2644 IF (PROKG) THEN 2645 WRITE(MPG,'(A)') 2646 & ' ** ICNTL(12) ignored: not compatible with LLT factorization' 2647 END IF 2648 ENDIF 2649 id%KEEP(95) = 1 2650 END IF 2651C 2652 IF (id%KEEP(60) .GT. 0) THEN 2653 IF (id%KEEP(23) .NE. 0) THEN 2654 IF (PROKG) THEN 2655 WRITE(MPG,'(A)') 2656 & ' ** Max-trans not allowed because of Schur' 2657 END IF 2658 id%KEEP(23) = 0 2659 ENDIF 2660 IF (id%KEEP(52).NE.0) THEN 2661 IF (PROKG) THEN 2662 WRITE(MPG,'(A)') 2663 & ' ** Scaling during analysis not allowed because of Schur' 2664 ENDIF 2665 id%KEEP(52) = 0 2666 ENDIF 2667C also forbid compressed/constrained ordering... 2668 IF (id%KEEP(95) .GT. 1) THEN 2669 IF (PROKG) THEN 2670 WRITE(MPG,'(A)') 2671 & ' ** ICNTL(12) option not allowed because of Schur' 2672 END IF 2673 ENDIF 2674 id%KEEP(95) = 1 2675 END IF 2676 IF ( (id%KEEP(23) .NE. 0) .AND. (id%KEEP(256).EQ.1)) THEN 2677 id%KEEP(23) = 0 2678 id%KEEP(95) = 1 2679 IF (PROKG) THEN 2680 WRITE(MPG,'(A)') 2681 & ' ** Max-trans not allowed because ordering is given' 2682 END IF 2683 END IF 2684 IF ( id%KEEP(256) .EQ. 1 ) THEN 2685 IF (id%KEEP(95) > 1 .AND. PROKG) THEN 2686 WRITE(MPG,'(A)') 2687 & ' ** ICNTL(12) option incompatible with given ordering' 2688 END IF 2689 id%KEEP(95) = 1 2690 END IF 2691 IF (id%KEEP(54) .NE. 0) THEN 2692 IF( id%KEEP(23) .NE. 0 ) THEN 2693 IF (PROKG) THEN 2694 WRITE(MPG,'(A)') 2695 & ' ** Max-trans not allowed because matrix is distributed' 2696 END IF 2697 id%KEEP(23) = 0 2698 ENDIF 2699 IF (id%KEEP(52).EQ.-2) THEN 2700C Only Ruiz & Bora scaling available for dist format 2701C (Work supported by ANR-SOLSTICE (ANR-06-CIS6-010)) 2702 IF (PROKG) THEN 2703 WRITE(MPG,'(A)') 2704 & ' ** Scaling during analysis not allowed (matrix is distributed)' 2705 ENDIF 2706 ENDIF 2707 id%KEEP(52) = 0 2708 IF (id%KEEP(95) .GT. 1 .AND. MPG.GT.0) THEN 2709 WRITE(MPG,'(A)') 2710 & ' ** ICNTL(12) option not allowed because matrix is 2711 &distributed' 2712 ENDIF 2713 id%KEEP(95) = 1 2714 END IF 2715 IF ( id%KEEP(55) .NE. 0 ) THEN 2716 IF( id%KEEP(23) .NE. 0 ) THEN 2717 IF (PROKG) THEN 2718 WRITE(MPG,'(A)') 2719 & ' ** Max-trans not allowed for element matrix' 2720 END IF 2721 id%KEEP(23) = 0 2722 ENDIF 2723 IF (PROKG .AND. id%KEEP(52).EQ.-2) THEN 2724 WRITE(MPG,'(A)') 2725 & ' ** Scaling not allowed at analysis for element matrix' 2726 ENDIF 2727 id%KEEP(52) = 0 2728 id%KEEP(95) = 1 2729 ENDIF 2730C In the case where parallel analysis is done, column permutation 2731C is not allowed 2732 IF(id%KEEP(244) .EQ. 2) THEN 2733 IF(id%KEEP(23) .EQ. 7) THEN 2734C Automatic hoice: set it to 0 2735 id%KEEP(23) = 0 2736 ELSE IF (id%KEEP(23) .GT. 0) THEN 2737 id%INFO(1) = -39 2738 id%KEEP(23) = 0 2739 IF (LPOK) THEN 2740 WRITE(LP, 2741 & '("Incompatible values for ICNTL(6), ICNTL(28)")') 2742 WRITE(LP, 2743 & '("Maximum transversal not allowed 2744 & in parallel analysis")') 2745 ENDIF 2746 RETURN 2747 END IF 2748 END IF 2749C -------------------------------------------- 2750C Avoid distributed entry for element matrix. 2751C -------------------------------------------- 2752 IF ( id%KEEP(54) .NE. 0 .AND. id%KEEP(55) .NE. 0 ) THEN 2753 id%KEEP(54) = 0 2754 IF (PROKG) THEN 2755 WRITE(MPG,'(A)') 2756 & ' ** Distributed entry not available for element matrix' 2757 END IF 2758 ENDIF 2759C ---------------------------------- 2760C Choice of symbolic analysis option 2761C ---------------------------------- 2762 IF (id%ICNTL(39).NE.1 .and. id%ICNTL(39).NE.2) THEN 2763 id%KEEP(106)=1 2764C Automatic choice leads to new symbolic 2765C factorization except(see below) if KEEP(256)==1. 2766 ELSE 2767 id%KEEP(106)=id%ICNTL(39) 2768 ENDIF 2769C modify input parameters to avoid incompatible 2770C input data between ordering, scaling and maxtrans 2771C note that if id%ICNTL(12)/id%KEEP(95) = 0 then 2772C the automatic choice will be done in ANA_O 2773 IF(id%KEEP(50) .EQ. 2) THEN 2774C LDLT case 2775 IF( .NOT. associated(id%A) ) THEN 2776C constraint ordering can be computed only if values are 2777C given to analysis 2778 IF(id%KEEP(95) .EQ. 3) THEN 2779 id%KEEP(95) = 2 2780 ENDIF 2781 ENDIF 2782 IF(id%KEEP(95) .EQ. 3 .AND. id%KEEP(256) .NE. 2) THEN 2783C if constraint and ordering is not AMF then use compress 2784 IF (PROK) WRITE(MP,*) 2785 & 'WARNING: ZMUMPS_ANA_O constrained ordering not ', 2786 & 'available with selected ordering' 2787 id%KEEP(95) = 2 2788 ENDIF 2789 IF(id%KEEP(95) .EQ. 3) THEN 2790C if constraint ordering required then we need to compute scaling 2791C and max trans 2792C NOTE that if we enter this condition then 2793C id%A is associated because of the test above: 2794C (IF( .NOT. associated(id%A) ) THEN) 2795 id%KEEP(23) = 5 2796 id%KEEP(52) = -2 2797 ELSE IF(id%KEEP(95) .EQ. 2 .AND. 2798 & (id%KEEP(23) .EQ. 0 .OR. id%KEEP(23) .EQ. 7) ) THEN 2799C compressed ordering requires max trans but not necessary scaling 2800 IF( associated(id%A) ) THEN 2801 id%KEEP(23) = 5 2802 ELSE 2803C we can do compressed ordering without 2804C information on the numerical values: 2805C a maximum transversal already provides 2806C information on the location of off-diagonal 2807C nonzeros which can be candidates for 2x2 2808C pivots 2809 id%KEEP(23) = 1 2810 ENDIF 2811 ELSE IF(id%KEEP(95) .EQ. 1) THEN 2812 id%KEEP(23) = 0 2813 ELSE IF(id%KEEP(95) .EQ. 0 .AND. id%KEEP(23) .EQ. 0) THEN 2814C if max trans desactivated then the automatic choice for type of ord 2815C is set to 1, which means that we will use usual ordering 2816C (no constraints or compression) 2817 id%KEEP(95) = 1 2818 ENDIF 2819 ELSE 2820 id%KEEP(95) = 1 2821 ENDIF 2822C -------------------------------- 2823C Save ICNTL(16) (QR) in KEEP(53) 2824C Will be broadcasted to all other 2825C nodes in routine ZMUMPS_BDCAST 2826C -------------------------------- 2827 id%KEEP(53)=0 2828 IF(id%KEEP(86).EQ.1)THEN 2829C Force the exchange of both the memory and flops information during 2830C the factorization 2831 IF(id%KEEP(47).LT.2) id%KEEP(47)=2 2832 ENDIF 2833 IF(id%KEEP(48).EQ.5)THEN 2834 IF(id%KEEP(50).EQ.0)THEN 2835 id%KEEP(87)=50 2836 id%KEEP(88)=50 2837 ELSE 2838 id%KEEP(87)=70 2839 id%KEEP(88)=70 2840 ENDIF 2841 ENDIF 2842 IF((id%NSLAVES.EQ.1).AND.(id%KEEP(76).GT.3))THEN 2843 id%KEEP(76)=2 2844 ENDIF 2845 IF(id%KEEP(81).GT.0)THEN 2846 IF(id%KEEP(47).LT.2) id%KEEP(47)=2 2847 ENDIF 2848C 2849C -- Block low rank input parameter checking 2850 id%KEEP(486) = id%ICNTL(35) 2851C KEEP(486)!=0,1 => KEEP(486)=0 2852 IF (id%KEEP(486).NE.1) id%KEEP(486) = 0 2853 IF(id%KEEP(486).NE.0) THEN 2854C tests that may switch off BLR 2855C 2856C LR is incompatible with elemental matrices 2857 IF (id%KEEP(55).NE.0) THEN 2858 IF (PROK) WRITE(MP,*) 2859 & "WARNING: BLR feature currently incompatible " 2860 & ,"with elemental matrices" 2861C Switch off BLR 2862 id%KEEP(486)=0 2863 ENDIF 2864C 2865C LR incompatible with forward in facto in facto 2866 IF (id%KEEP(252).NE.0) THEN 2867 IF (PROK) WRITE(MP,*) 2868 & "WARNING: BLR feature currently incompatible " 2869 & ,"with forward during factorization" 2870C Switch off BLR 2871 id%KEEP(486)=0 2872 ENDIF 2873 IF((id%KEEP(492).EQ.0)) THEN 2874 id%KEEP(486)=0 2875 ENDIF 2876 ENDIF 2877C 2878 IF(id%KEEP(486).NE.0) THEN 2879C id%KEEP(469)=0,1,2,3,4 2880 IF ((id%KEEP(469).GT.4).OR.(id%KEEP(469).LT.0)) THEN 2881 id%KEEP(469)=0 2882 ENDIF 2883C Not implemented yet 2884 IF (id%KEEP(469).EQ.4) id%KEEP(469)=0 2885C id%KEEP(470)=0 or 1 2886 IF ((id%KEEP(470).NE.0).AND.(id%KEEP(470).NE.1)) THEN 2887 id%KEEP(470)=1 2888 ENDIF 2889C id%KEEP(471)=-1,0,1 2890 IF ((id%KEEP(471).LT.-1).AND.(id%KEEP(471).GT.1)) THEN 2891 id%KEEP(471)=-1 2892 ENDIF 2893C id%KEEP(472)=0 or 1 2894 IF ((id%KEEP(472).NE.0).AND.(id%KEEP(472).NE.1)) THEN 2895 id%KEEP(472)=1 2896 ENDIF 2897C id%KEEP(473)=0 or 1 2898 IF ((id%KEEP(473).NE.0).AND.(id%KEEP(473).NE.1)) THEN 2899 id%KEEP(473)=0 2900 ENDIF 2901C id%KEEP(479)>0 2902 IF (id%KEEP(479).LE.0) THEN 2903 id%KEEP(479)=4 2904 ENDIF 2905C id%KEEP(474)=0,1,2,3 2906 IF ((id%KEEP(474).GT.3).OR.(id%KEEP(474).LT.0)) THEN 2907 id%KEEP(474)=0 2908 ENDIF 2909 IF (id%KEEP(474).NE.0.AND.id%KEEP(480).EQ.0) THEN 2910 id%KEEP(474) = 0 2911 write(*,*) 'KEEP(480) = 0 => Resetting KEEP(474) to 0' 2912 ENDIF 2913 IF (id%KEEP(478).NE.0.AND.id%KEEP(480).LT.4) THEN 2914 id%KEEP(478) = 0 2915 write(*,*) 'KEEP(480) < 4 => Resetting KEEP(478) to 0' 2916 ENDIF 2917C In LUA strategy KEEP(480)>=5, we exploit LRTRSM to further 2918C reduce the flops. It requires KEEP(475)>=2. 2919 IF (id%KEEP(480).GE.5 .OR. 2920 & (id%KEEP(480).NE.0.AND.id%KEEP(474).EQ.3)) THEN 2921 IF (id%KEEP(475).LT.2) THEN 2922 IF (id%KEEP(474).EQ.3) THEN 2923 write(*,*) 'KEEP(480) = ',id%KEEP(480), 2924 & ' and KEEP(474) = 3 ', 2925 & 'requires KEEP(475) >= 2, but it is = ', id%KEEP(475) 2926 ELSE 2927 write(*,*) 'KEEP(480) = ',id%KEEP(480), 2928 & 'requires KEEP(475) >= 2, but it is = ', id%KEEP(475) 2929 ENDIF 2930C Reset to 3 if 5 or to 4 if 6 2931 id%KEEP(480) = id%KEEP(480) - 2 2932 write(*,*) ' Resetting KEEP(480) to ', id%KEEP(480) 2933 ENDIF 2934 ENDIF 2935C id%KEEP(481)=0,1,2 2936 IF ((id%KEEP(481).GT.2).OR.(id%KEEP(481).LT.0)) THEN 2937 id%KEEP(481)=0 2938 ENDIF 2939C id%KEEP(482)=0,1,2,3 2940 IF ((id%KEEP(482).GT.3).OR.(id%KEEP(482).LT.0)) THEN 2941 id%KEEP(482)=0 2942 ENDIF 2943C id%KEEP(476) \in [1,100] 2944 IF ((id%KEEP(476).GT.100).OR.(id%KEEP(476).LT.1)) THEN 2945 id%KEEP(476)= 50 2946 ENDIF 2947C id%KEEP(477) \in [1,100] 2948 IF ((id%KEEP(477).GT.100).OR.(id%KEEP(477).LT.1)) THEN 2949 id%KEEP(477)= 100 2950 ENDIF 2951C id%KEEP(483) \in [1,100] 2952 IF ((id%KEEP(483).GT.100).OR.(id%KEEP(483).LT.1)) THEN 2953 id%KEEP(483)= 50 2954 ENDIF 2955C id%KEEP(484) \in [1,100] 2956 IF ((id%KEEP(484).GT.100).OR.(id%KEEP(484).LT.1)) THEN 2957 id%KEEP(484)= 50 2958 ENDIF 2959C id%KEEP(485)>0 2960 IF((id%KEEP(485).LT.0)) THEN 2961 id%KEEP(485)= 1 2962 ENDIF 2963 IF((id%KEEP(487).LT.0)) THEN 2964 id%KEEP(487)= 2 ! default value 2965 ENDIF 2966C id%KEEP(488)>0 2967 IF((id%KEEP(488).LE.0)) THEN 2968 id%KEEP(488)= 8*id%KEEP(6) 2969 ENDIF 2970C id%KEEP(489)=0 or 1 2971 IF ((id%KEEP(489).NE.0).AND.(id%KEEP(489).NE.1)) THEN 2972 id%KEEP(489)=0 2973 ENDIF 2974C id%KEEP(490)>0 2975 IF((id%KEEP(490).LE.0)) THEN 2976 id%KEEP(490) = 128 2977 ENDIF 2978C KEEP(491)>0 2979 IF((id%KEEP(491).LE.0)) THEN 2980 id%KEEP(491) = 1000 2981 ENDIF 2982 ENDIF 2983C 2984C end id%MYID.EQ.MASTER 2985 END IF 2986 RETURN 2987 END SUBROUTINE ZMUMPS_ANA_CHECK_KEEP 2988 SUBROUTINE ZMUMPS_GATHER_MATRIX(id) 2989C This subroutine gathers a distributed matrix 2990C on the host node 2991 USE ZMUMPS_STRUC_DEF 2992 IMPLICIT NONE 2993 INCLUDE 'mpif.h' 2994 INCLUDE 'mumps_tags.h' 2995 TYPE(ZMUMPS_STRUC) :: id 2996C local variables 2997 INTEGER, ALLOCATABLE :: REQPTR(:,:) 2998 INTEGER(8), ALLOCATABLE :: MATPTR(:) 2999 INTEGER(8), ALLOCATABLE :: MATPTR_cp(:) 3000 INTEGER(8) :: IBEG8, IEND8 3001 INTEGER :: MASTER, IERR, INDX 3002 INTEGER :: STATUS(MPI_STATUS_SIZE) 3003 INTEGER :: LP, MP, MPG, I, K 3004 INTEGER(8) :: I8 3005 LOGICAL :: PROK, PROKG 3006C 3007C messages are split into blocks of size BLOCKSIZE 3008C (smaller than IOVFLO (=2^31-1)) 3009C on all processors 3010 INTEGER(4) :: IOVFLO 3011 INTEGER :: BLOCKSIZE 3012 INTEGER :: MAX_NBBLOCK_loc, NBBLOCK_loc 3013 INTEGER :: SIZE_SENT, NRECV 3014 LOGICAL :: OMP_FLAG, I_AM_SLAVE 3015 INTEGER(8) :: NZ_loc8 3016C for validation only: 3017 INTEGER :: NB_BLOCKS, NB_BLOCK_SENT 3018 PARAMETER( MASTER = 0 ) 3019 LP = id%ICNTL( 1 ) 3020 MP = id%ICNTL( 2 ) 3021 MPG = id%ICNTL( 3 ) 3022C LP : errors 3023C MP : INFO 3024 PROK = (( MP .GT. 0 ).AND.(id%ICNTL(4).GE.2)) 3025 PROKG = ( MPG .GT. 0 .and. id%MYID .eq. MASTER ) 3026 PROKG = (PROKG.AND.(id%ICNTL(4).GE.2)) 3027 I_AM_SLAVE = ( id%MYID .ne. MASTER .OR. 3028 & ( id%MYID .eq. MASTER .AND. 3029 & id%KEEP(46) .eq. 1 ) ) 3030C test IRN_loc and JCN_loc allocated on working procs 3031 IF (I_AM_SLAVE .AND. id%KEEP8(29).GT.0 .AND. 3032 & ( (.NOT. associated(id%IRN_loc)) .OR. 3033 & (.NOT. associated(id%JCN_loc)) ) 3034 & ) THEN 3035 id%INFO(1) = -22 3036 id%INFO(2) = 16 3037 GOTO 13 3038 ENDIF 3039C iovflo = huge(INTEGER, kind=4) 3040 IOVFLO = huge(IOVFLO) 3041C we do not want too large messages 3042 BLOCKSIZE = int(max(100000_8,int(IOVFLO,8)/20_8)) 3043 IF ( id%KEEP(46) .EQ. 0 .AND. id%MYID .EQ. MASTER ) THEN 3044C host-node mode: master has no entries. 3045 id%KEEP8(29) = 0_8 3046 END IF 3047 IF ( id%MYID .eq. MASTER ) THEN 3048C ----------------------------------- 3049C Allocate small arrays for pointers 3050C into arrays IRN/JCN 3051C ----------------------------------- 3052 ALLOCATE( MATPTR( id%NPROCS ), STAT = IERR ) 3053 IF ( IERR .GT. 0 ) THEN 3054 id%INFO(1) = -7 3055 id%INFO(2) = id%NPROCS 3056 IF ( LP .GT. 0 ) THEN 3057 WRITE(LP, 150) ' array MATPTR' 3058 END IF 3059 GOTO 13 3060 END IF 3061 ALLOCATE( MATPTR_cp( id%NPROCS ), STAT = IERR ) 3062 IF ( IERR .GT. 0 ) THEN 3063 id%INFO(1) = -7 3064 id%INFO(2) = id%NPROCS 3065 IF ( LP .GT. 0 ) THEN 3066 WRITE(LP, 150) ' array MATPTR' 3067 END IF 3068 GOTO 13 3069 END IF 3070C ----------------------------------- 3071C Allocate a small array for requests 3072C ----------------------------------- 3073 ALLOCATE( REQPTR( id%NPROCS-1, 2 ), STAT = IERR ) 3074 IF ( IERR .GT. 0 ) THEN 3075 id%INFO(1) = -7 3076 id%INFO(2) = 2 * (id%NPROCS-1) 3077 IF ( LP .GT. 0 ) THEN 3078 WRITE(LP, 150) 'array REQPTR' 3079 END IF 3080 GOTO 13 3081 END IF 3082C -------------------- 3083C Allocate now IRN/JCN 3084C -------------------- 3085 ALLOCATE( id%IRN( id%KEEP8(28) ), STAT = IERR ) 3086 IF ( IERR .GT. 0 ) THEN 3087 id%INFO(1) = -7 3088 CALL MUMPS_SETI8TOI4(id%KEEP8(28),id%INFO(2)) 3089 IF ( LP .GT. 0 ) THEN 3090 WRITE(LP, 150) 'array IRN' 3091 END IF 3092 GOTO 13 3093 END IF 3094 ALLOCATE( id%JCN( id%KEEP8(28) ), STAT = IERR ) 3095 IF ( IERR .GT. 0 ) THEN 3096 id%INFO(1) = -7 3097 CALL MUMPS_SETI8TOI4(id%KEEP8(28),id%INFO(2)) 3098 IF ( LP .GT. 0 ) THEN 3099 WRITE(LP, 150) 'array JCN' 3100 END IF 3101 GOTO 13 3102 END IF 3103 END IF 3104 13 CONTINUE 3105C Propagate errors 3106 CALL MUMPS_PROPINFO( id%ICNTL(1), id%INFO(1), 3107 & id%COMM, id%MYID ) 3108 IF ( id%INFO(1) < 0 ) RETURN 3109C ------------------------------------- 3110C Get numbers of non-zeros for everyone 3111C and count total and maximum 3112C nb of blocks of size BLOCKSIZE 3113C that slaves will sent 3114C ------------------------------------- 3115 IF ( id%MYID .EQ. MASTER ) THEN 3116C each block will correspond to 2 messages (IRN_LOC,JCN_LOC) 3117 NB_BLOCK_SENT = 0 3118 MAX_NBBLOCK_loc = 0 3119 DO I = 1, id%NPROCS - 1 3120 CALL MPI_RECV( MATPTR( I+1 ), 1, 3121 & MPI_INTEGER8, I, 3122 & COLLECT_NZ, id%COMM, STATUS, IERR ) 3123 NBBLOCK_loc = ceiling(dble(MATPTR(I+1))/dble(BLOCKSIZE)) 3124 MAX_NBBLOCK_loc = max(MAX_NBBLOCK_loc, NBBLOCK_loc) 3125 NB_BLOCK_SENT = NB_BLOCK_SENT + NBBLOCK_loc 3126 END DO 3127 IF ( id%KEEP(46) .eq. 0 ) THEN 3128 MATPTR( 1 ) = 1_8 3129 ELSE 3130 NZ_loc8=id%KEEP8(29) 3131 MATPTR( 1 ) = NZ_loc8 + 1_8 3132 END IF 3133C -------------- 3134C Build pointers 3135C -------------- 3136 DO I = 2, id%NPROCS 3137 MATPTR( I ) = MATPTR( I ) + MATPTR( I-1 ) 3138 END DO 3139 ELSE 3140 NZ_loc8=id%KEEP8(29) 3141 CALL MPI_SEND( NZ_loc8, 1, MPI_INTEGER8, MASTER, 3142 & COLLECT_NZ, id%COMM, IERR ) 3143 END IF 3144 IF ( id%MYID .eq. MASTER ) THEN 3145C ----------------------------------------------- 3146C Bottleneck is here master; use synchronous send 3147C for slaves, but asynchronous receives on master 3148C Then while master receives indices do the local 3149C copies for better overlap. 3150C (If master has other things to do, he could try 3151C to do them here.) 3152C ------------------------------------ 3153C copy pointers to position in IRN/JCN 3154 MATPTR_cp = MATPTR 3155 IF ( id%KEEP8(29) .NE. 0_8 ) THEN 3156 OMP_FLAG = ( id%KEEP8(29).GE.50000_8 ) 3157!$OMP PARALLEL DO PRIVATE(I8) 3158!$OMP& IF(OMP_FLAG) 3159 DO I8=1,id%KEEP8(29) 3160 id%IRN(I8) = id%IRN_loc(I8) 3161 id%JCN(I8) = id%JCN_loc(I8) 3162 ENDDO 3163!$OMP END PARALLEL DO 3164 ENDIF 3165C 3166C Compute position for each block to be received 3167C and store it. 3168 NB_BLOCKS = 0 3169C at least one slave will send MAX_NBBLOCK_loc 3170C couple of messages (IRN_loc/JCN_loc) 3171 DO K = 1, MAX_NBBLOCK_loc 3172C Post irecv for all messages from proc I 3173C that have been sent 3174 NRECV = 0 3175 DO I = 1, id%NPROCS - 1 3176C Check if message was sent 3177 IBEG8 = MATPTR_cp( I ) 3178 IF ( IBEG8 .LT. MATPTR(I+1)) THEN 3179C Count number of request in NRECV 3180 NRECV = NRECV + 2 3181 IEND8 = min(IBEG8+int(BLOCKSIZE,8)-1_8, 3182 & MATPTR(I+1)-1_8) 3183C update pointer for receiving messages 3184C from proc I in MATPTR_cp: 3185 MATPTR_cp( I ) = IEND8 + 1_8 3186 SIZE_SENT = int(IEND8 - IBEG8 + 1_8) 3187 NB_BLOCKS = NB_BLOCKS + 1 3188C 3189 CALL MPI_IRECV( id%IRN(IBEG8), SIZE_SENT, MPI_INTEGER, 3190 & I, COLLECT_IRN, id%COMM, REQPTR(I,1), IERR ) 3191C 3192 CALL MPI_IRECV( id%JCN(IBEG8), SIZE_SENT, MPI_INTEGER, 3193 & I, COLLECT_JCN, id%COMM, REQPTR(I,2), IERR ) 3194 ELSE 3195 REQPTR( I,1 ) = MPI_REQUEST_NULL 3196 REQPTR( I,2 ) = MPI_REQUEST_NULL 3197 ENDIF 3198 END DO 3199C Wait set of messages corresponding to current block 3200C ( we dont exploit the fact that 3201C messages are not overtaking 3202C (if sent by one source to the same destination) ) 3203C 3204C Loop on only non MPI_REQUEST_NULL requests 3205 DO I = 1, NRECV 3206 CALL MPI_WAITANY 3207 & ( 2 * (id%NPROCS-1), REQPTR( 1, 1 ), INDX, 3208 & STATUS, IERR ) 3209 ENDDO 3210C 3211C process next block 3212 END DO 3213 DEALLOCATE( REQPTR ) 3214 DEALLOCATE( MATPTR ) 3215 DEALLOCATE( MATPTR_cp ) 3216C end of reception by master 3217 ELSE 3218C ----------------------------- 3219C Send only if size is not zero 3220C ----------------------------- 3221 IF ( id%KEEP8(29) .NE. 0_8 ) THEN 3222 NZ_loc8=id%KEEP8(29) 3223C send by blocks of size BLOCKSIZE 3224 DO I8=1_8, NZ_loc8, int(BLOCKSIZE,8) 3225 SIZE_SENT = BLOCKSIZE 3226 IF (NZ_loc8-I8+1_8.LT.int(BLOCKSIZE,8)) THEN 3227 SIZE_SENT = int(NZ_loc8-I8+1_8) 3228 ENDIF 3229 CALL MPI_SEND( id%IRN_loc(I8), SIZE_SENT, 3230 & MPI_INTEGER, MASTER, 3231 & COLLECT_IRN, id%COMM, IERR ) 3232 CALL MPI_SEND( id%JCN_loc(I8), SIZE_SENT, 3233 & MPI_INTEGER, MASTER, 3234 & COLLECT_JCN, id%COMM, IERR ) 3235 END DO 3236 END IF 3237 END IF 3238 RETURN 3239 150 FORMAT( 3240 &/' ** FAILURE DURING ZMUMPS_GATHER_MATRIX, DYNAMIC ALLOCATION OF', 3241 & A30) 3242 END SUBROUTINE ZMUMPS_GATHER_MATRIX 3243 SUBROUTINE ZMUMPS_DUMP_PROBLEM(id) 3244 USE ZMUMPS_STRUC_DEF 3245 IMPLICIT NONE 3246C 3247C Purpose: 3248C ======= 3249C 3250C If id%WRITE_PROBLEM has been set by the user, 3251C possibly on all processors in case of distributed 3252C matrix, opens a file and dumps the matrix and/or 3253C the right hand side. This subroutine calls 3254C ZMUMPS_DUMP_MATRIX and ZMUMPS_DUMP_RHS. 3255C The routine should be called on all processors. 3256C 3257 INCLUDE 'mpif.h' 3258C Arguments 3259C ========= 3260 TYPE(ZMUMPS_STRUC) :: id 3261C 3262C Local variables 3263C =============== 3264C 3265 INTEGER :: MASTER, IERR 3266 INTEGER :: IUNIT 3267 LOGICAL :: IS_ELEMENTAL 3268 LOGICAL :: IS_DISTRIBUTED 3269 INTEGER :: MM_WRITE 3270 INTEGER :: MM_WRITE_CHECK 3271 CHARACTER(LEN=20) :: MM_IDSTR 3272 LOGICAL :: I_AM_SLAVE, I_AM_MASTER 3273 PARAMETER( MASTER = 0 ) 3274 IUNIT = 69 3275 I_AM_SLAVE = ( id%MYID .NE. MASTER .OR. 3276 & ( id%MYID .EQ. MASTER .AND. 3277 & id%KEEP(46) .EQ. 1 ) ) 3278 I_AM_MASTER = (id%MYID.EQ.MASTER) 3279C Remark: if id%KEEP(54) = 1 or 2, the structure 3280C is centralized at analysis. Since ZMUMPS_DUMP_PROBLEM 3281C is called at analysis phase, we define IS_DISTRIBUTED 3282C as below, which implies that the structure of the problem 3283C is distributed in IRN_loc/JCN_loc at analysis. 3284C equal to 3285 IS_DISTRIBUTED = (id%KEEP(54) .EQ. 3) 3286 IS_ELEMENTAL = (id%KEEP(55) .NE. 0) 3287 IF (id%MYID.EQ.MASTER .AND. .NOT. IS_DISTRIBUTED) THEN 3288C ==================== 3289C Matrix is assembled 3290C and centralized 3291C ==================== 3292 IF (id%WRITE_PROBLEM(1:20) .NE. "NAME_NOT_INITIALIZED")THEN 3293 OPEN(IUNIT,FILE=trim(id%WRITE_PROBLEM)) 3294 CALL ZMUMPS_DUMP_MATRIX( id, IUNIT, I_AM_SLAVE, I_AM_MASTER, 3295 & IS_DISTRIBUTED, ! = .FALSE., centralized 3296 & IS_ELEMENTAL ) ! Elemental or not 3297 CLOSE(IUNIT) 3298 ENDIF 3299 ELSE IF (id%KEEP(54).EQ.3) THEN 3300C ===================== 3301C Matrix is distributed 3302C ===================== 3303 IF (id%WRITE_PROBLEM(1:20) .EQ. "NAME_NOT_INITIALIZED" 3304 & .OR. .NOT. I_AM_SLAVE )THEN 3305 MM_WRITE = 0 3306 ELSE 3307 MM_WRITE = 1 3308 ENDIF 3309 CALL MPI_ALLREDUCE(MM_WRITE, MM_WRITE_CHECK, 1, 3310 & MPI_INTEGER, MPI_SUM, id%COMM, IERR) 3311C ----------------------------------------- 3312C If yes, each processor writes its share 3313C of the matrix in a file in matrix market 3314C format (otherwise nothing written). We 3315C append the process id to the filename. 3316C Safer in case all filenames are the 3317C same if all processors share the same 3318C file system. 3319C ----------------------------------------- 3320 IF (MM_WRITE_CHECK.EQ.id%NSLAVES .AND. I_AM_SLAVE) THEN 3321 WRITE(MM_IDSTR,'(I9)') id%MYID_NODES 3322 OPEN(IUNIT, 3323 & FILE=trim(id%WRITE_PROBLEM)//trim(adjustl(MM_IDSTR))) 3324 CALL ZMUMPS_DUMP_MATRIX(id, IUNIT, I_AM_SLAVE, I_AM_MASTER, 3325 & IS_DISTRIBUTED, ! =.TRUE., distributed 3326 & IS_ELEMENTAL ) ! Elemental or not 3327 CLOSE(IUNIT) 3328 ENDIF 3329C ELSE ... 3330C Nothing written in other cases. 3331 ENDIF 3332C =============== 3333C Right-hand side 3334C =============== 3335 IF ( id%MYID.EQ.MASTER .AND. 3336 & associated(id%RHS) .AND. 3337 & id%WRITE_PROBLEM(1:20) 3338 & .NE. "NAME_NOT_INITIALIZED")THEN 3339 OPEN(IUNIT,FILE=trim(id%WRITE_PROBLEM) //".rhs") 3340 CALL ZMUMPS_DUMP_RHS(IUNIT, id) 3341 CLOSE(IUNIT) 3342 ENDIF 3343 RETURN 3344 END SUBROUTINE ZMUMPS_DUMP_PROBLEM 3345 SUBROUTINE ZMUMPS_DUMP_MATRIX 3346 & (id, IUNIT, I_AM_SLAVE, I_AM_MASTER, 3347 & IS_DISTRIBUTED, IS_ELEMENTAL ) 3348 USE ZMUMPS_STRUC_DEF 3349 IMPLICIT NONE 3350C 3351C Purpose: 3352C ======= 3353C This subroutine dumps a routine in matrix-market format 3354C if the matrix is assembled, and in "MUMPS" format (see 3355C example in the MUMPS users'guide, if the matrix is 3356C centralized and elemental). 3357C The routine can be called on all processors. In case of 3358C distributed assembled matrix, each processor writes its 3359C share as a matrix market file on IUNIT (IUNIT may have 3360C different values on different processors). 3361C 3362C 3363C 3364C Arguments (input parameters) 3365C ============================ 3366C 3367C IUNIT: should be set to the Fortran unit where 3368C data should be written. 3369C I_AM_SLAVE: .TRUE. except on a non working master 3370C IS_DISTRIBUTED: .TRUE. if matrix is distributed, 3371C i.e., if IRN_loc/JCN_loc are provided. 3372C IS_ELEMENTAL : .TRUE. if matrix is elemental 3373C id : main MUMPS structure 3374C 3375 LOGICAL, intent(in) :: I_AM_SLAVE, 3376 & I_AM_MASTER, 3377 & IS_DISTRIBUTED, 3378 & IS_ELEMENTAL 3379 INTEGER, intent(in) :: IUNIT 3380 TYPE(ZMUMPS_STRUC), intent(in) :: id 3381C 3382C Local variables: 3383C =============== 3384C 3385 CHARACTER (LEN=10) :: SYMM 3386 CHARACTER (LEN=8) :: ARITH 3387 INTEGER(8) :: I8, NNZ_i 3388C 3389C Executable statements: 3390C ===================== 3391 IF (I_AM_MASTER .AND. .NOT. IS_DISTRIBUTED .AND. 3392 & .NOT. IS_ELEMENTAL) THEN 3393C ================== 3394C CENTRALIZED MATRIX 3395C ================== 3396 IF (id%KEEP8(28) .EQ. 0_8) THEN 3397 CALL MUMPS_GET_NNZ_INTERNAL(id%NNZ, id%NZ, NNZ_i) 3398 ELSE 3399 NNZ_i=id%KEEP8(28) 3400 ENDIF 3401 IF (associated(id%A)) THEN 3402C Write header line: 3403 ARITH='complex' 3404 ELSE 3405 ARITH='pattern ' 3406 ENDIF 3407 IF (id%KEEP(50) .eq. 0) THEN 3408 SYMM="general" 3409 ELSE 3410 SYMM="symmetric" 3411 END IF 3412 WRITE(IUNIT,FMT=*)'%%MatrixMarket matrix coordinate ', 3413 & trim(ARITH)," ",trim(SYMM) 3414 WRITE(IUNIT,*) id%N, id%N, NNZ_i 3415 IF (associated(id%A)) THEN 3416 DO I8=1_8,NNZ_i 3417 IF (id%KEEP(50).NE.0 .AND. id%IRN(I8).LT.id%JCN(I8)) THEN 3418C permute upper diag entry 3419 WRITE(IUNIT,*) id%JCN(I8), id%IRN(I8), 3420 & dble(id%A(I8)), aimag(id%A(I8)) 3421 ELSE 3422 WRITE(IUNIT,*) id%IRN(I8), id%JCN(I8), 3423 & dble(id%A(I8)), aimag(id%A(I8)) 3424 ENDIF 3425 ENDDO 3426 ELSE 3427C pattern only 3428 DO I8=1_8,id%KEEP8(28) 3429 IF (id%KEEP(50).NE.0 .AND. id%IRN(I8).LT.id%JCN(I8)) THEN 3430C permute upper diag entry 3431 WRITE(IUNIT,*) id%JCN(I8), id%IRN(I8) 3432 ELSE 3433 WRITE(IUNIT,*) id%IRN(I8), id%JCN(I8) 3434 ENDIF 3435 ENDDO 3436 ENDIF 3437 ELSE IF ( IS_DISTRIBUTED .AND. I_AM_SLAVE ) THEN 3438C ================== 3439C DISTRIBUTED MATRIX 3440C ================== 3441 IF (id%KEEP8(29) .EQ. 0_8) THEN 3442 CALL MUMPS_GET_NNZ_INTERNAL(id%NNZ_loc, id%NZ_loc, NNZ_i) 3443 ELSE 3444 NNZ_i=id%KEEP8(29) 3445 ENDIF 3446 IF (associated(id%A_loc)) THEN 3447 ARITH='complex' 3448 ELSE 3449 ARITH='pattern ' 3450 ENDIF 3451 IF (id%KEEP(50) .eq. 0) THEN 3452 SYMM="general" 3453 ELSE 3454 SYMM="symmetric" 3455 END IF 3456 WRITE(IUNIT,FMT=*)'%%MatrixMarket matrix coordinate ', 3457 & trim(ARITH)," ",trim(SYMM) 3458 WRITE(IUNIT,*) id%N, id%N, NNZ_i 3459 IF (associated(id%A_loc)) THEN 3460 DO I8=1_8,NNZ_i 3461 IF (id%KEEP(50).NE.0 .AND. 3462 & id%IRN_loc(I8).LT.id%JCN_loc(I8)) THEN 3463 WRITE(IUNIT,*) id%JCN_loc(I8), id%IRN_loc(I8), 3464 & dble(id%A_loc(I8)), aimag(id%A_loc(I8)) 3465 ELSE 3466 WRITE(IUNIT,*) id%IRN_loc(I8), id%JCN_loc(I8), 3467 & dble(id%A_loc(I8)), aimag(id%A_loc(I8)) 3468 ENDIF 3469 ENDDO 3470 ELSE 3471 DO I8=1_8,NNZ_i 3472 IF (id%KEEP(50).NE.0 .AND. 3473 & id%IRN_loc(I8).LT.id%JCN_loc(I8)) THEN 3474C permute upper diag entry 3475 WRITE(IUNIT,*) id%JCN_loc(I8), id%IRN_loc(I8) 3476 ELSE 3477 WRITE(IUNIT,*) id%IRN_loc(I8), id%JCN_loc(I8) 3478 ENDIF 3479 ENDDO 3480 ENDIF 3481 ELSE IF (IS_ELEMENTAL .AND. I_AM_MASTER) THEN 3482C ================== 3483C ELEMENTAL MATRIX 3484C ================== 3485 WRITE(IUNIT,*) id%N," :: N" 3486 WRITE(IUNIT,*) id%NELT," :: NELT" 3487 WRITE(IUNIT,*) size(id%ELTVAR)," :: NELTVAR" 3488 WRITE(IUNIT,*) size(id%A_ELT)," :: NELTVL" 3489 WRITE(IUNIT,*) id%ELTPTR(:)," ::ELTPTR" 3490 WRITE(IUNIT,*) id%ELTVAR(:)," ::ELTVAR" 3491 WRITE(IUNIT,*) id%A_ELT(:) 3492 ENDIF 3493 RETURN 3494 END SUBROUTINE ZMUMPS_DUMP_MATRIX 3495 SUBROUTINE ZMUMPS_DUMP_RHS(IUNIT, id) 3496C 3497C Purpose: 3498C ======= 3499C Dumps a dense, centralized, 3500C right-hand side in matrix market format on unit 3501C IUNIT. Should be called on the host only. 3502C 3503 USE ZMUMPS_STRUC_DEF 3504 IMPLICIT NONE 3505C Arguments 3506C ========= 3507 TYPE(ZMUMPS_STRUC), intent(in) :: id 3508 INTEGER, intent(in) :: IUNIT 3509C 3510C Local variables 3511C =============== 3512C 3513 CHARACTER (LEN=8) :: ARITH 3514 INTEGER :: I, J, K, LD_RHS 3515C 3516C Executable statements 3517C ===================== 3518C 3519 IF (associated(id%RHS)) THEN 3520 ARITH='complex' 3521 WRITE(IUNIT,FMT=*)'%%MatrixMarket matrix array ', 3522 & trim(ARITH), 3523 & ' general' 3524 WRITE(IUNIT,*) id%N, id%NRHS 3525 IF ( id%NRHS .EQ. 1 ) THEN 3526 LD_RHS = id%N 3527 ELSE 3528 LD_RHS = id%LRHS 3529 ENDIF 3530 DO J = 1, id%NRHS 3531 DO I = 1, id%N 3532 K=(J-1)*LD_RHS+I 3533 WRITE(IUNIT,*) dble(id%RHS(K)), aimag(id%RHS(K)) 3534 ENDDO 3535 ENDDO 3536 ENDIF 3537 RETURN 3538 END SUBROUTINE ZMUMPS_DUMP_RHS 3539 SUBROUTINE ZMUMPS_BUILD_I_AM_CAND( NSLAVES, K79, 3540 & NB_NIV2, MYID_NODES, 3541 & CANDIDATES, I_AM_CAND ) 3542 IMPLICIT NONE 3543C 3544C Purpose: 3545C ======= 3546C Given a list of candidate processors per node, 3547C returns an array of booleans telling whether the 3548C processor is candidate or not for a given node. 3549C 3550C K79 holds splitting strategy (KEEP(79)). If K79>1 then 3551C TPYE4,5,6 nodes might have been introduced and 3552C in this case "hidden" slaves should be taken 3553C into account to enable dynamic redistribution 3554C of the hidden slaves while climbing the chain of 3555C split nodes. The master of the first node in the 3556C chain requires a special treatment and is thus here 3557C not considered as a slave. 3558C 3559 INTEGER, intent(in) :: NSLAVES, NB_NIV2, MYID_NODES, K79 3560 INTEGER, intent(in) :: CANDIDATES( NSLAVES+1, NB_NIV2 ) 3561 LOGICAL, intent(out):: I_AM_CAND( NB_NIV2 ) 3562 INTEGER I, INIV2, NCAND 3563 IF (K79.GT.0) THEN 3564C Because of potential restarting the number of 3565C candidates that will be used to distribute 3566C arrowheads have to include all possible candidates. 3567 DO INIV2=1, NB_NIV2 3568 I_AM_CAND(INIV2)=.FALSE. 3569 NCAND = CANDIDATES(NSLAVES+1,INIV2) 3570C check if some hidden slaves are there 3571C Note that if hidden candidates exists (type 5 or 6 nodes) then 3572C in position CANDIDATES (NCAND+1,INIV2) must be the master 3573C of the first node in the chain (type 4) that we skip here because 3574C a special treatment (it has to be "considered as a master" for all 3575C nodes in the list) is needed. 3576 DO I=1, NSLAVES 3577 IF (CANDIDATES(I,INIV2).LT.0) EXIT ! end of extra slaves 3578 IF (I.EQ.NCAND+1) CYCLE 3579! skip master of associated TYPE 4 node 3580 IF (CANDIDATES(I,INIV2).EQ.MYID_NODES) THEN 3581 I_AM_CAND(INIV2)=.TRUE. 3582 EXIT 3583 ENDIF 3584 ENDDO 3585 END DO 3586 ELSE 3587 DO INIV2=1, NB_NIV2 3588 I_AM_CAND(INIV2)=.FALSE. 3589 NCAND = CANDIDATES(NSLAVES+1,INIV2) 3590 DO I=1, NCAND 3591 IF (CANDIDATES(I,INIV2).EQ.MYID_NODES) THEN 3592 I_AM_CAND(INIV2)=.TRUE. 3593 EXIT 3594 ENDIF 3595 ENDDO 3596 END DO 3597 ENDIF 3598 RETURN 3599 END SUBROUTINE ZMUMPS_BUILD_I_AM_CAND 3600 SUBROUTINE ZMUMPS_LR_GROUPING(N, NZ8, NSTEPS, IRN, JCN, FILS, 3601 & FRERE_STEPS, DAD_STEPS, NE_STEPS, STEP, NA, LNA, 3602 & LRGROUPS, SYM, ICNTL, HALO_DEPTH, GROUP_SIZE, K489, SEP_SIZE, 3603 & K38, K20, K60, 3604 & IFLAG, IERROR, K264, K265, K482, K472, MAXFRONT, K10, 3605 & LPOK, LP) 3606 USE ZMUMPS_ANA_LR 3607C This routine is meant to compute a grouping of the variables in 3608C all the separators. This grouping defines the blocks that will 3609C be compressed by means of low-rank approximations. Because the 3610C principal variables of all separators will be changed, it is 3611C necessary to update the arrays FILS, FRERE_STEPS, DAD_STEPS, STEP, 3612C NA. 3613C 3614C N - the size of the input matrix 3615C NZ8 - the nnz in the input matrix 3616C NSTEPS - the numbers of nodes in the tree 3617C IRN - the row indices of the input matrix 3618C JCN - the col indices of the input matrix 3619C FILS - the fils array of size N. This array will be 3620C modified on output according to the new relative 3621C order computed for the variables in the separators 3622C FRERE_STEPS - the FRERE_STEPS array. Modified on output (as for FILS) 3623C DAD_STEPS - the DAD_STEPS array. Modified on output (as for FILS) 3624C NE_STEPS - the NE_STEPS array. Modified on output (as for FILS) 3625C STEP - the STEP array. Modified on output (as for FILS) 3626C NA - the NA array. Modified on output (as for FILS) 3627C LNA - The length of the NA array 3628C LRGROUPS - the array mapping variables onto groups. 3629C LRGROUPS(i)=k means that variable i belongs to 3630C group k 3631C SYM - the type of matrix (KEEP(50)) 3632C ICNTL - the ICNTL array 3633C HALO_DEPTH - the depth of the halo around the separator subgraph 3634C GROUP_SIZE - the size of variables groups in the separators 3635C K489 - BLR strategy (=3 compress CB) 3636C SEP_SIZE - the minimum size of a separator to be treated with 3637C low-rank approximations 3638C has to be used for computing the clustering 3639C IFLAG - < 0 in case of error 3640C IERROR - complementary information in case of error 3641C e- =0 upon succesful return, > 0 otherwise 3642C 3643C LP, LPOK to control error printing 3644C 3645C 3646C This routine traverses the tree in a DFS fashion using a pool 3647C where nodes are pushed as soon as their parent is treated. Nodes 3648C are pushed in the pool in the same order as FRERE_STEPS and, since 3649C nodes are popped from the head of this pool, this means that 3650C siblings are treated in reverse order. This makes it easier to 3651C modify FRERE_STEPS because it will be always updated wrt a node 3652C which has already been treated. The update of NA relies on the 3653C assumption that a DFS touches the leaves in the same order as they 3654C appear in NA (in reverse order in this case for what said above). 3655C The roots are therefore pushed in the pool in reverse order. 3656C An array of order NSTEPS is allocated to store the principal 3657C variables of all the nodes that have been treated. This array 3658C could be spared at the price of expensive pointer chasing inside 3659C FILS. 3660 IMPLICIT NONE 3661 INTEGER, INTENT(IN) :: N, NSTEPS, LNA, SYM, 3662 & HALO_DEPTH, SEP_SIZE, GROUP_SIZE, K489 3663 INTEGER(8), INTENT(IN) :: NZ8 3664 INTEGER, INTENT(INOUT) :: IFLAG, IERROR 3665 INTEGER, INTENT(INOUT) :: K38, K20, K264, K265 3666 INTEGER, INTENT(IN) :: K482, K10, K60 3667 INTEGER, INTENT(IN) :: LP 3668 LOGICAL, INTENT(IN) :: LPOK 3669 INTEGER, INTENT(IN) :: IRN(NZ8), JCN(NZ8), NE_STEPS(NSTEPS), 3670 & ICNTL(40) 3671 INTEGER, INTENT(INOUT) :: FILS(N), FRERE_STEPS(NSTEPS), STEP(N), 3672 & NA(LNA), DAD_STEPS(NSTEPS), LRGROUPS(N) 3673 INTEGER, INTENT(IN) :: K472, MAXFRONT 3674 INTEGER :: K482_LOC, K38ou20 3675 INTEGER :: I, F, PV, NV, NLEAVES, NROOTS, PP, C, NF, NODE, 3676 & SYMTRY, NBQD, AD 3677 INTEGER(8) :: LW, IWFR, NRORM, NIORM 3678 INTEGER :: LPTR, RPTR, NBGROUPS 3679 LOGICAL :: FIRST 3680 INTEGER, ALLOCATABLE, DIMENSION (:) :: POOL, PVS, WORK 3681 INTEGER, ALLOCATABLE, DIMENSION (:) :: LEN, IW 3682 INTEGER(8), ALLOCATABLE, DIMENSION (:) :: IPE, IQ 3683 INTEGER, ALLOCATABLE, DIMENSION (:) :: TRACE, WORKH, GEN2HALO 3684 INTEGER :: STEP_SCALAPACK_ROOT 3685 INTEGER :: GROUP_SIZE2, IERR 3686 INTERFACE 3687 SUBROUTINE ZMUMPS_ANA_GNEW 3688 & (N, NZ, IRN, ICN, IW, LW, IPE, LEN, 3689 & IQ, FLAG, IWFR, 3690 & NRORM, NIORM, IFLAG,IERROR, ICNTL, 3691 & symmetry, SYM, NBQD, AvgDens, 3692 & KEEP264, KEEP265) 3693 INTEGER, intent(in) :: N, SYM 3694 INTEGER(8), intent(in) :: LW 3695 INTEGER(8), intent(in) :: NZ 3696 INTEGER, intent(in) :: ICNTL(40) 3697 INTEGER, intent(in) :: IRN(NZ), ICN(NZ) 3698 INTEGER, intent(out) :: IERROR, symmetry 3699 INTEGER, intent(out) :: NBQD, AvgDens 3700 INTEGER, intent(out) :: LEN(N), IW(LW) 3701 INTEGER(8), intent(out):: IWFR 3702 INTEGER(8), intent(out):: NRORM, NIORM 3703 INTEGER(8), intent(out):: IPE(N+1) 3704 INTEGER, intent(inout) :: IFLAG, KEEP264, KEEP265 3705 INTEGER(8), intent(out):: IQ(N) 3706 INTEGER, intent(out) :: FLAG(N) 3707 END SUBROUTINE 3708 END INTERFACE 3709C Check for Schur (// or sequential) 3710 K38ou20=max(K38,K20) 3711 IF (K38ou20.GT.0) THEN 3712 STEP_SCALAPACK_ROOT = STEP(K38ou20) 3713 ELSE 3714 STEP_SCALAPACK_ROOT = 0 3715 ENDIF 3716C If automatic choice of partitioning tool is required, then metis 3717C comes first, if available; otherwise scotch; otherwise 3718C permuted matrix is simply split. 3719C If a particular tool 3720C is required, we check for its availability, otherwise we revert to 3721C automatic choice 3722 IF((K482.LE.0) .OR. (K482.GT.3)) THEN 3723#if defined(parmetis) || defined(metis) || defined(parmetis3) || defined(metis4) 3724 K482_LOC = 1 3725#elif defined(ptscotch) || defined(scotch) 3726 K482_LOC = 2 3727#else 3728 K482_LOC = 3 3729#endif 3730 ELSE IF (K482.EQ.1) THEN 3731#if !defined(parmetis) && !defined(metis) && !defined(parmetis3) && !defined(metis4) 3732#if defined(ptscotch) || defined(scotch) 3733 K482_LOC = 2 3734#else 3735 K482_LOC = 3 3736#endif 3737#else 3738 K482_LOC = 1 3739#endif 3740 ELSE IF (K482.EQ.2) THEN 3741#if !defined(ptscotch) && !defined(scotch) 3742#if defined(parmetis) || defined(metis) || defined(parmetis3) || defined(metis4) 3743 K482_LOC = 1 3744#else 3745 K482_LOC = 3 3746#endif 3747#else 3748 K482_LOC = 2 3749#endif 3750 ELSE IF (K482.EQ.3) THEN 3751 K482_LOC = 3 3752 END IF 3753C The global number of groups computed 3754 NBGROUPS = 0 3755C Build the unsymmetrized graph of the input matrix. The LGROUPS 3756C array will be immediately allocated and used as a scratchpad 3757C memory for ZMUMPS_ANA_GNEW 3758 IF (K265.EQ.-1) THEN 3759C unsymmetric matrix, structurally symmetric 3760 LW = NZ8 3761 ELSE 3762C worst case need to double matrix size 3763 LW = 2_8 * NZ8 3764 ENDIF 3765 ALLOCATE(IW(LW), IPE(N+1), LEN(N), IQ(N), 3766 & POOL(NA(1)), PVS(NSTEPS), 3767 & STAT=IERR) 3768 IF (IERR.GT.0) THEN 3769 IF (LPOK) WRITE(LP,*) " Error allocate integer array of size: ", 3770 * LW+int(N,8)+int(K10*(2*N+1),8) 3771 IFLAG = -7 3772 CALL MUMPS_SET_IERROR(LW+int(N,8)+int(K10*(2*N+1),8),IERROR) 3773 GOTO 500 3774 ENDIF 3775 CALL ZMUMPS_ANA_GNEW(N, NZ8, IRN(1), JCN(1), IW(1), LW, IPE(1), 3776 & LEN(1), IQ(1), LRGROUPS, IWFR, NRORM, NIORM, IFLAG, IERROR, 3777 & ICNTL(1) , SYMTRY, SYM, NBQD, AD, K264, K265) 3778 IF (allocated(IQ)) DEALLOCATE(IQ) 3779C LRGROUPS has been used as a workspace in ana_gnew so we should 3780C reinitialize it to -1 to be sure that a variable which is in no 3781C group (ie in no grouped separator) can be identified correctly 3782 LRGROUPS = -1 3783 NLEAVES = NA(1) 3784 NROOTS = NA(2) 3785 LPTR = 2+NLEAVES 3786 RPTR = 2+NLEAVES+NROOTS 3787C Push the roots in the pool in reverse order 3788C DO I = 1, NROOTS 3789C POOL(I) = NA(2+NLEAVES+NROOTS-I+1) 3790C END DO 3791C BUGFIX 18/11/2016 3792C Because the elements from the pool are taken in reverse order and the 3793C NA is also updated in reverse order in MUMPS_UPD_TREE, this was 3794C actually false! The roots should be pushed in the pool in natural 3795C order. Cf email "Bugs L0" 18/11/2016. 3796 DO I = 1, NROOTS 3797 POOL(I) = NA(2+NLEAVES+I) 3798 END DO 3799 PP = NROOTS 3800C arrays of size N used to computed each halo 3801 ALLOCATE(WORK(MAXFRONT), TRACE(N), WORKH(N), GEN2HALO(N), 3802 & STAT=IERR) 3803 IF (IERR.GT.0) THEN 3804 IF (LPOK) WRITE(LP,*) " Error allocate integer array of size: ", 3805 * 3*N+MAXFRONT 3806 IFLAG = -7 3807 IERROR = 3*N+MAXFRONT 3808 RETURN 3809 ENDIF 3810 TRACE = 0 3811C Loop until the pool is empty 3812 DO WHILE(PP .GT. 0) 3813 PV = ABS(POOL(PP)) 3814 NODE = STEP(PV) 3815C This variable tells whether node is the oldest son of its parent. 3816C In this case fils(fils(...fils(dad_steps(node)))) is updated 3817 FIRST = POOL(PP) .LT. 0 3818C Go down until the last variable in this front and make a list of 3819C the fully assembled variables in it inside the work array 3820 NV = 0 3821 F = PV 3822 DO WHILE(F .GT. 0) 3823 NV = NV+1 3824 WORK(NV) = F 3825 F = FILS(F) 3826 END DO 3827C Do the grouping. Upon return, work contains the variable in the 3828C new order and NBGROUPS has been increased by the number of groups 3829C computed in the current separator 3830C Grouping is done if the current node is large enough, i.e. bigger 3831C than the cluster size GROUP_SIZE. The grouping must be done 3832C even if NV is smaller than SEP_SIZE: in that case, we give to all 3833C of its variables a negative group number so that we have grouping 3834C for all the variables which is needed in case we have for example 3835C a chain like (say we do low-rank if nass > 8) father (nass=5) son (nass=10) 3836C in this case we need a clustering of the CB of 'son' which may be partly 3837C inherited from the clustering of the FS of 'father' so this latter 3838C clustering should be done even if 'father' is not eligible for LR. Not 3839C likely to happen often with metis-like ordering but it should be done 3840C for robustness. 3841C Moreover, as a front can be chosen for LR during facto even if the 3842C separator was too small for proper grouping ( this occurs with delayed 3843C pivots), we need the negative sign to avoid trying to do a LR facto in 3844C such a case. 3845 CALL COMPUTE_BLR_VCS(K472, GROUP_SIZE2, GROUP_SIZE, NV) 3846 IF (NV .GE. GROUP_SIZE2) THEN 3847 IF ( (K482_LOC.EQ.3) 3848 & .OR. 3849 & ( (K60.NE.0).AND.(WORK(1).EQ.K38ou20) ) 3850 & ) 3851 & THEN 3852C Disable permutation/clustering. Leaves the ordering unchanged 3853C and simply pack variables into groups of size SIZE_GROUP. 3854C NB: this doesn't care about FS/CB, or about slaves, etc, so 3855C it is useful only for a NIV1 root basically. 3856 DO I=1,NV 3857 LRGROUPS(WORK(I))=NBGROUPS+1+I/GROUP_SIZE2 3858 END DO 3859 NBGROUPS = NBGROUPS + NV/GROUP_SIZE2 + 1 3860 ELSE 3861 CALL SEP_GROUPING(NV, WORK(1), N, NZ8, 3862 & LRGROUPS(1), NBGROUPS, IW(1), LW, IPE(1), LEN(1), 3863 & GROUP_SIZE, HALO_DEPTH, TRACE(1), WORKH(1), NODE, 3864 & GEN2HALO(1), K482_LOC, K472, 0, SEP_SIZE, 3865 & K10, LP, LPOK, IFLAG, IERROR) 3866 IF (IFLAG.LT.0) GOTO 500 3867 END IF 3868 ELSE 3869C If NV is smaller than GROUP_SIZE then all variables are in a 3870C single group, which value is negative if NV is also smaller 3871C than SEP_SIZE. 3872 IF (NV .GE. SEP_SIZE) THEN 3873 DO I = 1, NV 3874 LRGROUPS( WORK(I) ) = (NBGROUPS + 1) 3875 ENDDO 3876 ELSE 3877 DO I = 1, NV 3878 LRGROUPS( WORK(I) ) = -(NBGROUPS + 1) 3879 ENDDO 3880 ENDIF 3881 NBGROUPS = NBGROUPS + 1 3882C be careful, both val and -val are not present in the LRGROUPS array 3883 ENDIF 3884C Update the tree according to the newly computed order 3885 CALL MUMPS_UPD_TREE(NV, NSTEPS, N, FIRST, LPTR, RPTR, F, 3886 & WORK(1), 3887 & FILS(1), FRERE_STEPS(1), STEP(1), DAD_STEPS(1), 3888 & NE_STEPS(1), NA(1), LNA, PVS(1), K38ou20, 3889 & STEP_SCALAPACK_ROOT) 3890 IF (STEP_SCALAPACK_ROOT.GT.0) THEN 3891C Restore potentially modified root number 3892 IF (K38.GT.0) THEN 3893 K38 = K38ou20 3894 ELSE 3895 K20 = K38ou20 3896 ENDIF 3897 ENDIF 3898C Put all the children of node in the pool. The first child is 3899C always pushed with a negative index in order to establish when to 3900C update the FILS array for the last variable in its parent (through 3901C the FIRST variable above) 3902 PP = PP-1 3903 NF = NE_STEPS(NODE) 3904 IF(NF .GT. 0) THEN 3905 PP = PP+1 3906 POOL(PP) = F 3907 C = STEP(-F) 3908 F = FRERE_STEPS(C) 3909 DO WHILE(F .GT. 0) 3910 PP = PP+1 3911 POOL(PP) = F 3912 C = STEP(F) 3913 F = FRERE_STEPS(C) 3914 END DO 3915 END IF 3916 END DO 3917 500 IF (allocated(POOL)) DEALLOCATE(POOL) 3918 IF (allocated(PVS)) DEALLOCATE(PVS) 3919 IF (allocated(WORK)) DEALLOCATE(WORK) 3920 IF (allocated(IPE)) DEALLOCATE(IPE) 3921 IF (allocated(LEN)) DEALLOCATE(LEN) 3922 IF (allocated(TRACE)) DEALLOCATE(TRACE) 3923 IF (allocated(WORKH)) DEALLOCATE(WORKH) 3924 IF (allocated(GEN2HALO)) DEALLOCATE(GEN2HALO) 3925C 3926 RETURN 3927 END SUBROUTINE ZMUMPS_LR_GROUPING 3928 SUBROUTINE ZMUMPS_LR_GROUPING_NEW(N, NZ8, NSTEPS, IRN, JCN, FILS, 3929 & FRERE_STEPS, DAD_STEPS, NE_STEPS, STEP, NA, LNA, LRGROUPS, 3930 & SYM, ICNTL, HALO_DEPTH, GROUP_SIZE, K489, SEP_SIZE, K38, K20, 3931 & K60, IFLAG, IERROR, K264, K265, K482, K472, MAXFRONT, K469, 3932 & K10, LPOK, LP) 3933 USE ZMUMPS_ANA_LR 3934C This routine is meant to compute a grouping of the variables in 3935C all the separators. This grouping defines the blocks that will 3936C be compressed by means of low-rank approximations. Because the 3937C principal variables of all separators will be changed, it is 3938C necessary to update the arrays FILS, FRERE_STEPS, DAD_STEPS, STEP, 3939C NA. 3940C 3941C N - the size of the input matrix 3942C NZ8 - the nnz in the input matrix 3943C NSTEPS - the numbers of nodes in the tree 3944C IRN - the row indices of the input matrix 3945C JCN - the col indices of the input matrix 3946C FILS - the fils array of size N. This array will be 3947C modified on output according to the new relative 3948C order computed for the variables in the separators 3949C FRERE_STEPS - the FRERE_STEPS array. Modified on output (as for FILS) 3950C DAD_STEPS - the DAD_STEPS array. Modified on output (as for FILS) 3951C NE_STEPS - the NE_STEPS array. Modified on output (as for FILS) 3952C STEP - the STEP array. Modified on output (as for FILS) 3953C NA - the NA array. Modified on output (as for FILS) 3954C LNA - The length of the NA array 3955C LRGROUPS - the array mapping variables onto groups. 3956C LRGROUPS(i)=k means that variable i belongs to 3957C group k 3958C SYM - the type of matrix (KEEP(50)) 3959C ICNTL - the ICNTL array 3960C HALO_DEPTH - the depth of the halo around the separator subgraph 3961C GROUP_SIZE - the size of variables groups in the separators 3962C SEP_SIZE - the minimum size of a separator to be treated with 3963C low-rank approximations 3964C has to be used for computing the clustering 3965C IFLAG - < 0 in case of error 3966C IERROR - complementary information in case of error 3967C e- =0 upon succesful return, > 0 otherwise 3968C 3969C LP, LPOK to control error printing 3970C 3971C 3972C This routine traverses the tree in a DFS fashion using a pool 3973C where nodes are pushed as soon as their parent is treated. Nodes 3974C are pushed in the pool in the same order as FRERE_STEPS and, since 3975C nodes are popped from the head of this pool, this means that 3976C siblings are treated in reverse order. This makes it easier to 3977C modify FRERE_STEPS because it will be always updated wrt a node 3978C which has already been treated. The update of NA relies on the 3979C assumption that a DFS touches the leaves in the same order as they 3980C appear in NA (in reverse order in this case for what said above). 3981C The roots are therefore pushed in the pool in reverse order. 3982C An array of order NSTEPS is allocated to store the principal 3983C variables of all the nodes that have been treated. This array 3984C could be spared at the price of expensive pointer chasing inside 3985C FILS. 3986 IMPLICIT NONE 3987 INTEGER, INTENT(IN) :: N, NSTEPS, LNA, SYM, 3988 & HALO_DEPTH, SEP_SIZE, GROUP_SIZE, K489 3989 INTEGER(8), INTENT(IN) :: NZ8 3990 INTEGER, INTENT(INOUT) :: IFLAG, IERROR 3991 INTEGER, INTENT(INOUT) :: K38, K20, K264, K265 3992 INTEGER, INTENT(IN) :: K482, K10, MAXFRONT, K60 3993 INTEGER, INTENT(IN) :: LP 3994 LOGICAL, INTENT(IN) :: LPOK 3995 INTEGER, INTENT(IN) :: IRN(NZ8), JCN(NZ8), NE_STEPS(NSTEPS), 3996 & ICNTL(40) 3997 INTEGER, INTENT(INOUT) :: FILS(N), FRERE_STEPS(NSTEPS), STEP(N), 3998 & NA(LNA), DAD_STEPS(NSTEPS), LRGROUPS(N) 3999 INTEGER, INTENT(IN) :: K472, K469 4000 INTEGER :: K482_LOC, K38ou20 4001 INTEGER :: I, F, PV, NV, NODE, 4002 & SYMTRY, NBQD, AD 4003 LOGICAL :: PVSCHANGED 4004 INTEGER(8) :: LW, IWFR, NRORM, NIORM 4005 INTEGER :: NBGROUPS 4006 INTEGER, ALLOCATABLE, DIMENSION (:) :: PVS, WORK 4007 INTEGER, ALLOCATABLE, DIMENSION (:) :: LEN, IW 4008 INTEGER(8), ALLOCATABLE, DIMENSION (:) :: IPE, IQ 4009 INTEGER, ALLOCATABLE, TARGET, DIMENSION (:) :: TRACE, WORKH, 4010 & GEN2HALO 4011 INTEGER, POINTER, DIMENSION (:) :: TRACE_PTR, WORKH_PTR, 4012 & GEN2HALO_PTR 4013 INTEGER :: STEP_SCALAPACK_ROOT 4014 INTEGER :: GROUP_SIZE2, IERR 4015 INTERFACE 4016 SUBROUTINE ZMUMPS_ANA_GNEW 4017 & (N, NZ, IRN, ICN, IW, LW, IPE, LEN, 4018 & IQ, FLAG, IWFR, 4019 & NRORM, NIORM, IFLAG,IERROR, ICNTL, 4020 & symmetry, SYM, NBQD, AvgDens, 4021 & KEEP264, KEEP265) 4022 INTEGER, intent(in) :: N, SYM 4023 INTEGER(8), intent(in) :: LW 4024 INTEGER(8), intent(in) :: NZ 4025 INTEGER, intent(in) :: ICNTL(40) 4026 INTEGER, intent(in) :: IRN(NZ), ICN(NZ) 4027 INTEGER, intent(out) :: IERROR, symmetry 4028 INTEGER, intent(out) :: NBQD, AvgDens 4029 INTEGER, intent(out) :: LEN(N), IW(LW) 4030 INTEGER(8), intent(out):: IWFR 4031 INTEGER(8), intent(out):: NRORM, NIORM 4032 INTEGER(8), intent(out):: IPE(N+1) 4033 INTEGER, intent(inout) :: IFLAG, KEEP264, KEEP265 4034 INTEGER(8), intent(out):: IQ(N) 4035 INTEGER, intent(out) :: FLAG(N) 4036 END SUBROUTINE 4037 END INTERFACE 4038C Check for Schur (// or sequential) 4039 K38ou20=max(K38,K20) 4040 IF (K38ou20.GT.0) THEN 4041 STEP_SCALAPACK_ROOT = STEP(K38ou20) 4042 ELSE 4043 STEP_SCALAPACK_ROOT = 0 4044 ENDIF 4045C If automatic choice of partitioning tool is required, then metis 4046C comes first, if available; otherwise scotch; otherwise 4047C permuted matrix is simply split. 4048C If a particular tool 4049C is required, we check for its availability, otherwise we revert to 4050C automatic choice 4051 IF((K482.LE.0) .OR. (K482.GT.3)) THEN 4052#if defined(parmetis) || defined(metis) || defined(parmetis3) || defined(metis4) 4053 K482_LOC = 1 4054#elif defined(ptscotch) || defined(scotch) 4055 K482_LOC = 2 4056#else 4057 K482_LOC = 3 4058#endif 4059 ELSE IF (K482.EQ.1) THEN 4060#if !defined(parmetis) && !defined(metis) && !defined(parmetis3) && !defined(metis4) 4061#if defined(ptscotch) || defined(scotch) 4062 K482_LOC = 2 4063#else 4064 K482_LOC = 3 4065#endif 4066#else 4067 K482_LOC = 1 4068#endif 4069 ELSE IF (K482.EQ.2) THEN 4070#if !defined(ptscotch) && !defined(scotch) 4071#if defined(parmetis) || defined(metis) || defined(parmetis3) || defined(metis4) 4072 K482_LOC = 1 4073#else 4074 K482_LOC = 3 4075#endif 4076#else 4077 K482_LOC = 2 4078#endif 4079 ELSE IF (K482.EQ.3) THEN 4080 K482_LOC = 3 4081 END IF 4082C The global number of groups computed 4083 NBGROUPS = 0 4084C Build the unsymmetrized graph of the input matrix. The LGROUPS 4085C array will be immediately allocated and used as a scratchpad 4086C memory for ZMUMPS_ANA_GNEW 4087 LW = 2_8 * NZ8 4088 ALLOCATE(IW(LW), IPE(N+1), LEN(N), IQ(N), 4089 & PVS(NSTEPS), 4090 & STAT=IERR) 4091 IF (IERR.GT.0) THEN 4092 IF (LPOK) WRITE(LP,*) " Error allocate integer array of size: ", 4093 * LW+int(N,8)+int(K10*(2*N+1),8) 4094 IFLAG = -7 4095 CALL MUMPS_SET_IERROR(LW+int(N,8)+int(K10*(2*N+1),8),IERROR) 4096 GOTO 501 4097 ENDIF 4098 CALL ZMUMPS_ANA_GNEW(N, NZ8, IRN(1), JCN(1), IW(1), LW, IPE(1), 4099 & LEN(1), IQ(1), LRGROUPS, IWFR, NRORM, NIORM, IFLAG, IERROR, 4100 & ICNTL(1) , SYMTRY, SYM, NBQD, AD, K264, K265) 4101 IF (allocated(IQ)) DEALLOCATE(IQ) 4102C LRGROUPS has been used as a workspace in ana_gnew so we should 4103C reinitialize it to -1 to be sure that a variable which is in no 4104C group (ie in no grouped separator) can be identified correctly 4105 LRGROUPS = -1 4106 IF (K469.NE.2) THEN 4107C K469=1 or 3: arrays of size N shared by all threads 4108 ALLOCATE(TRACE(N), WORKH(N), GEN2HALO(N), 4109 & STAT=IERR) 4110 IF (IERR.GT.0) THEN 4111 IF (LPOK) WRITE(LP,*) " Error allocate integer array of ", 4112 * "size: ", 3*N 4113 IFLAG = -7 4114 IERROR = 3*N 4115 GOTO 501 4116 ENDIF 4117 ENDIF 4118!$OMP PARALLEL PRIVATE(I, NODE, PV, NV, F, GROUP_SIZE2, WORK, 4119!$OMP& WORKH_PTR, TRACE_PTR, GEN2HALO_PTR) IF(K469.GT.1) 4120 ALLOCATE(WORK(MAXFRONT), STAT=IERR) 4121 IF (IERR.GT.0) THEN 4122 IF (LPOK) WRITE(LP,*) " Error allocate integer array of ", 4123 * "size: ", MAXFRONT 4124 IFLAG = -7 4125 IERROR = MAXFRONT 4126 GOTO 500 4127 ENDIF 4128 IF (K469.EQ.2) THEN 4129C K469=2: arrays of size N allocated on each thread 4130 ALLOCATE(TRACE_PTR(N), WORKH_PTR(N), GEN2HALO_PTR(N), 4131 & STAT=IERR) 4132 IF (IERR.GT.0) THEN 4133 IF (LPOK) WRITE(LP,*) " Error allocate integer array of ", 4134 * "size: ", 3*N 4135 IFLAG = -7 4136 IERROR = 3*N 4137 GOTO 500 4138 ENDIF 4139 ELSE 4140 TRACE_PTR => TRACE 4141 WORKH_PTR => WORKH 4142 GEN2HALO_PTR => GEN2HALO 4143 ENDIF 4144 IF (K469.EQ.2) THEN 4145 TRACE_PTR = 0 4146 ELSE 4147!$OMP SINGLE 4148 TRACE_PTR = 0 4149!$OMP END SINGLE 4150 ENDIF 4151C I) Parcours parallele en N pour initialiser PVS 4152 PVSCHANGED = .FALSE. 4153!$OMP DO 4154 DO I = 1,N 4155 IF (STEP(I).GT.0) PVS(STEP(I)) = I 4156 END DO 4157!$OMP END DO 4158C II) Parcours parallele en NSTEPS pour faire le grouping avec 4159C PVS, STEP et FILS (sauf derniere variable) qui sont mis a jour 4160!$OMP DO SCHEDULE(DYNAMIC,1) 4161 DO NODE=NSTEPS,1,-1 4162 IF (IFLAG.LT.0) CYCLE 4163 PV = PVS(NODE) 4164C Construire VLIST a partir de FILS(PV) 4165C Go down until the last variable in this front and make a list of 4166C the fully assembled variables in it inside the work array 4167 NV = 0 4168 F = PV 4169 DO WHILE(F .GT. 0) 4170 NV = NV+1 4171 WORK(NV) = F 4172 F = FILS(F) 4173 END DO 4174C Appel a SEP_GROUPING sur VLIST: la variable principale de NODE 4175C change et devient PVS(NODE) 4176C Do the grouping. Upon return, work contains the variable in the 4177C new order and NBGROUPS has been increased by the number of groups 4178C computed in the current separator 4179C Grouping is done if the current node is large enough, i.e. bigger 4180C than the cluster size GROUP_SIZE. The grouping must be done 4181C even if NV is smaller than SEP_SIZE: in that case, we give to all 4182C of its variables a negative group number so that we have grouping 4183C for all the variables which is needed in case we have for example 4184C a chain like (say we do low-rank if nass > 8) father (nass=5) son (nass=10) 4185C in this case we need a clustering of the CB of 'son' which may be partly 4186C inherited from the clustering of the FS of 'father' so this latter 4187C clustering should be done even if 'father' is not eligible for LR. Not 4188C likely to happen often with metis-like ordering but it should be done 4189C for robustness. 4190C Moreover, as a front can be chosen for LR during facto even if the 4191C separator was too small for proper grouping ( this occurs with delayed 4192C pivots), we need the negative sign to avoid trying to do a LR facto in 4193C such a case. 4194 CALL COMPUTE_BLR_VCS(K472, GROUP_SIZE2, GROUP_SIZE, NV) 4195 IF (NV .GE. GROUP_SIZE2) THEN 4196 IF ( (K482_LOC.EQ.3) 4197 & .OR. 4198 & ( (K60.NE.0).AND.(WORK(1).EQ.K38ou20) ) 4199 & ) 4200 & THEN 4201C 4202C Disable permutation/clustering. Leaves the ordering unchanged 4203C and simply pack variables into groups of size SIZE_GROUP. 4204C NB: this doesn't care about FS/CB, or about slaves, etc, so 4205C it is useful only for a NIV1 root basically. 4206!$OMP CRITICAL(lrgrouping_cri) 4207 DO I=1,NV 4208 LRGROUPS(WORK(I))=NBGROUPS+1+I/GROUP_SIZE2 4209 END DO 4210 NBGROUPS = NBGROUPS + NV/GROUP_SIZE2 + 1 4211!$OMP END CRITICAL(lrgrouping_cri) 4212 ELSE 4213 CALL SEP_GROUPING(NV, WORK(1), N, NZ8, 4214 & LRGROUPS(1), NBGROUPS, IW(1), LW, IPE(1), LEN(1), 4215 & GROUP_SIZE, HALO_DEPTH, TRACE_PTR, WORKH_PTR, 4216 & NODE, GEN2HALO_PTR, K482_LOC, K472, K469, 4217 & SEP_SIZE, K10, LP, LPOK, IFLAG, IERROR) 4218 IF (IFLAG.LT.0) CYCLE 4219C Maj de PVS 4220 PVS(NODE) = WORK(1) 4221 PVSCHANGED = .TRUE. 4222C Maj de STEP 4223 STEP(WORK(1)) = ABS(STEP(WORK(1))) 4224 IF (STEP(WORK(1)).EQ.STEP_SCALAPACK_ROOT) THEN 4225 IF (K38.GT.0) THEN 4226 K38 = WORK(1) 4227 ELSE 4228 K20 = WORK(1) 4229 ENDIF 4230 ENDIF 4231C Maj de FILS 4232 DO I=1, NV-1 4233 STEP(WORK(I+1)) = -STEP(WORK(1)) 4234 IF (FILS(WORK(I)).LE.0) THEN 4235C La derniere variable de FILS memorise l'ancienne 4236C variable principale pointee 4237 FILS(WORK(NV)) = FILS(WORK(I)) 4238 ENDIF 4239 FILS(WORK(I)) = WORK(I+1) 4240 ENDDO 4241 ENDIF 4242 ELSE 4243C If NV is smaller than GROUP_SIZE then all variables are in a 4244C single group, which value is negative if NV is also smaller 4245C than SEP_SIZE. 4246!$OMP CRITICAL(lrgrouping_cri) 4247 IF (NV .GE. SEP_SIZE) THEN 4248 DO I = 1, NV 4249 LRGROUPS( WORK(I) ) = (NBGROUPS + 1) 4250 ENDDO 4251 ELSE 4252 DO I = 1, NV 4253 LRGROUPS( WORK(I) ) = -(NBGROUPS + 1) 4254 ENDDO 4255 ENDIF 4256 NBGROUPS = NBGROUPS + 1 4257!$OMP END CRITICAL(lrgrouping_cri) 4258 ENDIF 4259 ENDDO 4260!$OMP END DO 4261 IF (IFLAG.LT.0) GOTO 500 4262C <<<< Synchro >>>> 4263C A ce stade tous les noeuds ont ete traites et PVS, STEP et FILS (sauf derniere variable) 4264C sont a jour 4265C On economise les maj suivantes si inutiles 4266 IF (.NOT.PVSCHANGED) GOTO 500 4267C III) Maj de DAD_STEPS, FRERE_STEPS, NA, et derniere variable de chaque noeud de FILS 4268!$OMP DO 4269 DO NODE = 1,NSTEPS 4270 IF(FRERE_STEPS(NODE) .GT. 0) THEN 4271C Node has a younger brother, update frere_steps(node) 4272 FRERE_STEPS(NODE) = PVS(ABS(STEP(FRERE_STEPS(NODE)))) 4273 ELSE IF(FRERE_STEPS(NODE) .LT. 0) THEN 4274C node is the youngest brother, update frere_steps(node) to make 4275C it point to the father 4276 FRERE_STEPS(NODE) = -PVS(ABS(STEP(DAD_STEPS(NODE)))) 4277 ENDIF 4278 IF(DAD_STEPS(NODE) .NE. 0) THEN 4279 DAD_STEPS(NODE) = PVS(ABS(STEP(DAD_STEPS(NODE)))) 4280 END IF 4281 ENDDO 4282!$OMP END DO NOWAIT 4283!$OMP DO 4284 DO I=3,LNA 4285 NA(I) = PVS(ABS(STEP(NA(I)))) 4286 ENDDO 4287!$OMP END DO NOWAIT 4288!$OMP DO 4289 DO I=1,N 4290 IF (FILS(I).LT.0) THEN 4291 FILS(I) = -PVS(ABS(STEP(-FILS(I)))) 4292 ENDIF 4293 ENDDO 4294!$OMP END DO 4295 500 CONTINUE 4296 IF (allocated(WORK)) DEALLOCATE(WORK) 4297 IF (K469.EQ.2) THEN 4298 DEALLOCATE(TRACE_PTR) 4299 DEALLOCATE(WORKH_PTR) 4300 DEALLOCATE(GEN2HALO_PTR) 4301 ENDIF 4302!$OMP END PARALLEL 4303 501 CONTINUE 4304 IF (K469.NE.2) THEN 4305 IF (allocated(TRACE)) DEALLOCATE(TRACE) 4306 IF (allocated(WORKH)) DEALLOCATE(WORKH) 4307 IF (allocated(GEN2HALO)) DEALLOCATE(GEN2HALO) 4308 ENDIF 4309 IF (allocated(PVS)) DEALLOCATE(PVS) 4310 IF (allocated(IPE)) DEALLOCATE(IPE) 4311 IF (allocated(LEN)) DEALLOCATE(LEN) 4312C 4313 RETURN 4314 END SUBROUTINE ZMUMPS_LR_GROUPING_NEW 4315C SUBROUTINE SEP_GROUPING(NV, VLIST, N, NZ, LRGROUPS, NBGROUPS, IW, 4316C & LW, IPE, LEN, GROUP_SIZE, HALO_DEPTH) 4317C IMPLICIT NONE 4318C INTEGER :: NV, N, NZ, LW, NBGROUPS, GROUP_SIZE, HALO_DEPTH 4319C INTEGER :: VLIST(NV), LRGROUPS(N), IW(LW), IPE(N+1), LEN(N) 4320C 4321C INTEGER :: TMP, I 4322C 4323CC Just invert the list 4324C DO I=1, NV/2 4325C TMP = VLIST(I) 4326C VLIST(I) = VLIST(NV-I+1) 4327C VLIST(NV-I+1) = TMP 4328C END DO 4329C 4330C RETURN 4331C END SUBROUTINE SEP_GROUPING 4332