1C 2C This file is part of MUMPS 5.1.2, released 3C on Mon Oct 2 07:37:01 UTC 2017 4C 5C 6C Copyright 1991-2017 CERFACS, CNRS, ENS Lyon, INP Toulouse, Inria, 7C University of Bordeaux. 8C 9C This version of MUMPS is provided to you free of charge. It is 10C released under the CeCILL-C license: 11C http://www.cecill.info/licences/Licence_CeCILL-C_V1-en.html 12C 13 SUBROUTINE ZMUMPS_SOLVE_DRIVER(id) 14 USE ZMUMPS_STRUC_DEF 15 USE MUMPS_SOL_ES 16C 17C Purpose 18C ======= 19C 20C Performs solution phase (solve), Iterative Refinements 21C and Error analysis. 22C 23C 24C 25C 26 USE ZMUMPS_BUF 27 USE ZMUMPS_OOC 28 USE MUMPS_MEMORY_MOD 29 IMPLICIT NONE 30C ------------------- 31C Explicit interfaces 32C ------------------- 33 INTERFACE 34 SUBROUTINE ZMUMPS_SIZE_IN_STRUCT( id, NB_INT,NB_CMPLX,NB_CHAR ) 35 USE ZMUMPS_STRUC_DEF 36 TYPE (ZMUMPS_STRUC) :: id 37 INTEGER(8) :: NB_INT,NB_CMPLX,NB_CHAR 38 END SUBROUTINE ZMUMPS_SIZE_IN_STRUCT 39 SUBROUTINE ZMUMPS_CHECK_DENSE_RHS 40 &(idRHS, idINFO, idN, idNRHS, idLRHS) 41 COMPLEX(kind=8), DIMENSION(:), POINTER :: idRHS 42 INTEGER, intent(in) :: idN, idNRHS, idLRHS 43 INTEGER, intent(inout) :: idINFO(:) 44 END SUBROUTINE ZMUMPS_CHECK_DENSE_RHS 45 END INTERFACE 46C 47 INCLUDE 'mpif.h' 48 INCLUDE 'mumps_headers.h' 49 INCLUDE 'mumps_tags.h' 50#if defined(V_T) 51 INCLUDE 'VT.inc' 52#endif 53 INTEGER :: STATUS(MPI_STATUS_SIZE) 54 INTEGER :: IERR 55 INTEGER, PARAMETER :: MASTER = 0 56C 57C Parameters 58C ========== 59C 60 TYPE (ZMUMPS_STRUC), TARGET :: id 61C 62C Local variables 63C =============== 64C 65 INTEGER MP,LP, MPG 66 LOGICAL PROK, PROKG, LPOK 67 INTEGER MTYPE, ICNTL21 68 LOGICAL LSCAL, POSTPros, GIVSOL 69 INTEGER ICNTL10, ICNTL11 70 INTEGER I,K,JPERM, J, II, IZ2 71 INTEGER IZ, NZ_THIS_BLOCK 72C pointers in IS 73 INTEGER LIW 74C pointers in id%S 75 INTEGER(8) :: LA, LA_PASSED 76 INTEGER LIW_PASSED 77 INTEGER(8) :: LWCB8_MIN, LWCB8, LWCB8_SOL_C 78C buffer sizes 79 INTEGER ZMUMPS_LBUF, ZMUMPS_LBUF_INT 80 INTEGER(8) :: ZMUMPS_LBUF_8 81 INTEGER MSG_MAX_BYTES_SOLVE, MSG_MAX_BYTES_GTHRSOL 82C null space 83 INTEGER IBEG_ROOT_DEF, IEND_ROOT_DEF, 84 & IBEG_GLOB_DEF, IEND_GLOB_DEF, 85 & IROOT_DEF_RHS_COL1 86C 87 INTEGER NITREF, NOITER, SOLVET, KASE 88C Meaningful only with tree pruning and sparse RHS 89 LOGICAL INTERLEAVE_PAR, DO_PERMUTE_RHS 90C true if ZMUMPS_SOL_C called during postprocessing 91 LOGICAL FROM_PP 92C 93C TIMINGS 94 DOUBLE PRECISION TIMEIT, TIMEEA, TIMEEA1, TIMELCOND 95 DOUBLE PRECISION TIME3 96 DOUBLE PRECISION TIMEC1,TIMEC2 97 DOUBLE PRECISION TIMEGATHER1,TIMEGATHER2 98 DOUBLE PRECISION TIMESCATTER1,TIMESCATTER2 99 DOUBLE PRECISION TIMECOPYSCALE1,TIMECOPYSCALE2 100C ------------------------------------------ 101C Declarations related to exploit sparsity 102C ------------------------------------------ 103 INTEGER :: NRHS_NONEMPTY 104 INTEGER :: STRAT_PERMAM1 105 LOGICAL :: DO_NULL_PIV 106 INTEGER, DIMENSION(:), POINTER :: IRHS_PTR_COPY 107 INTEGER, DIMENSION(:), POINTER :: IRHS_SPARSE_COPY 108 COMPLEX(kind=8), DIMENSION(:), POINTER :: RHS_SPARSE_COPY 109 LOGICAL IRHS_SPARSE_COPY_ALLOCATED, IRHS_PTR_COPY_ALLOCATED, 110 & RHS_SPARSE_COPY_ALLOCATED 111C 112 INTEGER(8) :: NBT 113 INTEGER :: NBCOL, COLSIZE, JBEG_RHS, JEND_RHS, JBEG_NEW, 114 & NBCOL_INBLOC, IPOS, IPOSRHSCOMP 115 INTEGER :: PERM_PIV_LIST(max(id%KEEP(112),1)) 116 INTEGER :: MAP_PIVNUL_LIST(max(id%KEEP(112),1)) 117 INTEGER, DIMENSION(:), ALLOCATABLE :: PERM_RHS 118 INTEGER, DIMENSION(:), POINTER :: PTR_POSINRHSCOMP_FWD, 119 & PTR_POSINRHSCOMP_BWD 120 COMPLEX(kind=8), DIMENSION(:), POINTER :: PTR_RHS 121 INTEGER :: SIZE_IPTR_WORKING, SIZE_WORKING 122C NRHS_NONEMPTY: holds 123C either the original number of RHS (id%NRHS defined on host) 124C or, when the RHS is sparse, it holds the 125C number of non empty columns. 126C it is computed on master and is 127C then broadcasted on all processes. 128C IRHS_PTR_COPY holds a compressed local copy of IRHS_PTR (or points 129C on the master to id%IRHS_PTR if no permutation requested) 130C IRHS_SPARSE_COPY might be allocated or might also point to 131C id%IRHS_SPARSE. To test if we can deallocate it we trace 132C with IRHS_SPARSE_COPY_ALLOCATED when it was effectively 133C allocated. 134C NBCOL_INBLOC total nb columns to process in this block 135c JBEG_RHS global ptr for starting column requested for this block 136c JEND_RHS global ptr for end column_number requested for this block 137C PERM_PIV_LIST permuted array of pivots 138C MAP_PIVNUL_LIST: mapping of permuted list 139C PERM_RHS -- Permutation of RHS computed on master and broadcasted 140C on all procs (of size id%NRHS orginal) 141C PERM_RHS(k) = i means that i is the kth column to be processed 142C Note that PERM_RHS will be used also in case of interleaving 143C ------------------------------------ 144 COMPLEX(kind=8) ONE 145 COMPLEX(kind=8) ZERO 146 PARAMETER( ONE = (1.0D0,0.0D0) ) 147 PARAMETER( ZERO = (0.0D0,0.0D0) ) 148 DOUBLE PRECISION RZERO, RONE 149 PARAMETER( RZERO = 0.0D0, RONE = 1.0D0 ) 150C 151C RHS_IR is internal to ZMUMPS and used for iterative refinement 152C or the error analysis section. It either points to the user's 153C RHS (on the host when the solution is centralized or the RHS 154C is dense), or is a workarray allocated inside this routine 155C of size N. 156 COMPLEX(kind=8), DIMENSION(:), POINTER :: RHS_IR 157 COMPLEX(kind=8), DIMENSION(:), POINTER :: WORK_WCB 158 COMPLEX(kind=8), DIMENSION(:), POINTER :: PTR_RHS_ROOT 159 INTEGER(8) :: LPTR_RHS_ROOT 160C 161C Local workarrays that will be dynamically allocated 162C 163 COMPLEX(kind=8), ALLOCATABLE :: SAVERHS(:), C_RW1(:), 164 & C_RW2(:), 165 & SRW3(:), C_Y(:), 166 & C_W(:) 167 INTEGER :: LCWORK 168 COMPLEX(kind=8), ALLOCATABLE :: CWORK(:) 169 DOUBLE PRECISION, ALLOCATABLE :: R_Y(:), D(:) 170 DOUBLE PRECISION, ALLOCATABLE :: R_W(:) 171C The 2 following workarrays are temporary local 172C arrays only used for distributed matrix input 173C (KEEP(54) .NE. 0). 174 DOUBLE PRECISION, ALLOCATABLE, DIMENSION(:) :: R_LOCWK54 175 COMPLEX(kind=8), ALLOCATABLE, DIMENSION(:) :: C_LOCWK54 176 INTEGER :: NBENT_RHSCOMP, NB_FS_IN_RHSCOMP_F, 177 & NB_FS_IN_RHSCOMP_TOT 178 INTEGER, DIMENSION(:), ALLOCATABLE :: UNS_PERM_INV 179 INTEGER LIWK_SOLVE, LIWCB 180 INTEGER, ALLOCATABLE :: IW1(:), IWK_SOLVE(:), IWCB(:) 181 INTEGER :: LIWK_PTRACB 182 INTEGER(8), ALLOCATABLE :: PTRACB(:) 183C 184C Parameters arising from the structure 185C 186 INTEGER(8) :: MAXS 187 DOUBLE PRECISION, DIMENSION(:), POINTER :: CNTL 188 INTEGER, DIMENSION (:), POINTER :: KEEP,ICNTL,INFO 189 INTEGER(8), DIMENSION (:), POINTER :: KEEP8 190 INTEGER, DIMENSION (:), POINTER :: IS 191 DOUBLE PRECISION, DIMENSION(:),POINTER:: RINFOG 192C =============================================================== 193C SCALING issues: 194C When scaling was performed 195C RHS holds the solution of the scaled system 196C The unscaled second member (b0) was given 197C then we have to scale both rhs adn solution: 198C A(sca) = LU = D1*A*D2 , with D2 = COLSCA 199C D1 = ROWSCA 200C -------------- 201C CASE OF A X =B 202C -------------- 203C (ICNTL(9)=1 or MTYPE=1) 204C A*x0 = b0 205C b(sca) = D1 * b0 = ROWSCA*S(ISTW3) 206C A(sca) [(D2) **(-1)] x0 = b(sca) 207C so the computed solution by Check y0 of LU *y0 = b(sca) 208C is : y0 =[(D2) **(-1)] x0 and so x0= D2*y0 is modified 209C -------------- 210C CASE OF AT X =B 211C -------------- 212C (ICNTL(9).NE.0 or MTYPE=0) 213C A(sca) = LU = D1*A*D2 214C AT*x0 = b0 => D2ATD1 D1-1 x0 = D2b0 215C b(sca) = D2 * b0 = COLSCA*S(ISTW3) 216C A(sca)T [(D1) **(-1)] x0 = b(sca) 217C so the computed solution by Check y0 of LU *y0 = b(sca) 218C is : y0 =[(D1) **(-1)] x0 and so x0= D1*y0 is modified 219C 220C In case of distributed RHS we need 221C scaling information on each processor 222C 223 type scaling_data_t 224 SEQUENCE 225 DOUBLE PRECISION, dimension(:), pointer :: SCALING 226 DOUBLE PRECISION, dimension(:), pointer :: SCALING_LOC 227 end type scaling_data_t 228 type (scaling_data_t) :: scaling_data 229C To scale on the fly during GATHER SOLUTION 230 DOUBLE PRECISION, DIMENSION(:), POINTER :: PT_SCALING 231 DOUBLE PRECISION, TARGET :: Dummy_SCAL(1) 232C 233C ==================== END OF SCALING related data ================ 234C 235C Local variables 236C 237C Interval associated to the subblocks of RHS a node has to process 238 INTEGER, DIMENSION(:), ALLOCATABLE, TARGET :: RHS_BOUNDS 239 INTEGER :: LPTR_RHS_BOUNDS 240 INTEGER, DIMENSION(:), POINTER :: PTR_RHS_BOUNDS 241 LOGICAL :: DO_NBSPARSE, NBSPARSE_LOC 242 DOUBLE PRECISION ARRET 243 COMPLEX(kind=8) C_DUMMY(1) 244 DOUBLE PRECISION R_DUMMY(1) 245 INTEGER IDUMMY(1), JDUMMY(1), KDUMMY(1), LDUMMY(1), MDUMMY(1) 246 INTEGER, TARGET :: IDUMMY_TARGET(1) 247 COMPLEX(kind=8), TARGET :: CDUMMY_TARGET(1) 248 INTEGER JJ 249 INTEGER allocok 250 INTEGER NBRHS, NBRHS_EFF, BEG_RHS, NB_RHSSKIPPED, 251 & LD_RHS, 252 & MASTER_ROOT, MASTER_ROOT_IN_COMM 253 INTEGER SIZE_ROOT, LD_REDRHS 254 INTEGER(8) :: IPT_RHS_ROOT 255 INTEGER(8) :: IBEG, IBEG_RHSCOMP, KDEC, IBEG_loc, IBEG_REDRHS 256 INTEGER LD_RHSCOMP, NCOL_RHS_loc 257 INTEGER LD_RHS_loc, JBEG_RHS_loc 258 INTEGER NB_K133, IRANK, TSIZE 259 INTEGER KMAX_246_247 260 INTEGER IFLAG_IR, IRStep 261 LOGICAL TESTConv 262 LOGICAL WORKSPACE_MINIMAL_PREFERRED, WK_USER_PROVIDED 263 INTEGER(8) NB_BYTES !size of data allocated during solve 264 INTEGER(8) NB_BYTES_MAX !MAX size of data allocated during solve 265 INTEGER(8) NB_BYTES_EXTRA !For Step2Node, which may be freed later 266 INTEGER(8) NB_INT, NB_CMPLX, NB_CHAR, K34_8, K35_8 267 INTEGER(8) K16_8, ITMP8, NB_BYTES_ON_ENTRY 268#if defined(V_T) 269C Vampir 270 INTEGER soln_drive_class, glob_comm_ini, perm_scal_ini, soln_dist, 271 & soln_assem, perm_scal_post 272#endif 273 LOGICAL I_AM_SLAVE, BUILD_POSINRHSCOMP 274 LOGICAL WORK_WCB_ALLOCATED, IS_INIT_OOC_DONE 275 LOGICAL STOP_AT_NEXT_EMPTY_COL 276 INTEGER MTYPE_LOC 277 INTEGER MAT_ALLOC_LOC, MAT_ALLOC 278 INTEGER MUMPS_PROCNODE 279 EXTERNAL MUMPS_PROCNODE 280C 281C First executable statement 282C 283#if defined(V_T) 284 CALL VTCLASSDEF( 'Soln driver',soln_drive_class,IERR) 285 CALL VTFUNCDEF( 'glob_comm_ini',soln_drive_class, 286 & glob_comm_ini,IERR) 287 CALL VTFUNCDEF( 'perm_scal_ini',soln_drive_class, 288 & perm_scal_ini,IERR) 289 CALL VTFUNCDEF( 'soln_dist',soln_drive_class,soln_dist,IERR) 290 CALL VTFUNCDEF( 'soln_assem',soln_drive_class,soln_assem,IERR) 291 CALL VTFUNCDEF( 'perm_scal_post',soln_drive_class, 292 & perm_scal_post,IERR) 293#endif 294C -- The following pointers xxCOPY might be allocated but then 295C -- the associated xxCOPY_ALLOCATED will be set to 296C -- enable deallocation 297 IRHS_PTR_COPY => IDUMMY_TARGET 298 IRHS_PTR_COPY_ALLOCATED = .FALSE. 299 IRHS_SPARSE_COPY => IDUMMY_TARGET 300 IRHS_SPARSE_COPY_ALLOCATED=.FALSE. 301 RHS_SPARSE_COPY => CDUMMY_TARGET 302 RHS_SPARSE_COPY_ALLOCATED=.FALSE. 303 NULLIFY(RHS_IR) 304 NULLIFY(WORK_WCB) 305 IS_INIT_OOC_DONE = .FALSE. 306 WK_USER_PROVIDED = .FALSE. 307 WORK_WCB_ALLOCATED = .FALSE. 308 CNTL =>id%CNTL 309 KEEP =>id%KEEP 310 KEEP8=>id%KEEP8 311 IS =>id%IS 312 ICNTL=>id%ICNTL 313 INFO =>id%INFO 314C ASPK =>id%A 315C COLSCA =>id%COLSCA 316C ROWSCA =>id%ROWSCA 317 RINFOG =>id%RINFOG 318 LP = ICNTL( 1 ) 319 MP = ICNTL( 2 ) 320 MPG = ICNTL( 3 ) 321 LPOK = ((LP.GT.0).AND.(id%ICNTL(4).GE.1)) 322 PROK = ((MP.GT.0).AND.(id%ICNTL(4).GE.2)) 323 PROKG = ( MPG .GT. 0 .and. id%MYID .eq. MASTER ) 324 PROKG = (PROKG.AND.(id%ICNTL(4).GE.2)) 325 IF (.not.PROK) MP =0 326 IF (.not.PROKG) MPG=0 327 IF ( PROK ) WRITE(MP,100) 328 IF ( PROKG ) WRITE(MPG,100) 329 NB_BYTES = 0_8 330 NB_BYTES_MAX = 0_8 331 NB_BYTES_EXTRA = 0_8 332 K34_8 = int(KEEP(34), 8) 333 K35_8 = int(KEEP(35), 8) 334 K16_8 = int(KEEP(16), 8) 335 NBENT_RHSCOMP = 0 336C Used by DISTRIBUTED_SOLUTION to skip empty columns 337C that are skipped (case of sparse RHS) 338 NB_RHSSKIPPED = 0 339C next 4 initialisations needed in case of error 340C to free space allocated 341 LSCAL = .FALSE. 342 WORK_WCB_ALLOCATED = .FALSE. 343 ICNTL21 = -99998 ! will be bcasted later to slaves 344 IBEG_RHSCOMP =-152525_8 ! Should not be used 345 BUILD_POSINRHSCOMP = .TRUE. 346C Not needed anymore (since new version of gather) 347C LD_RHSCOMP = max(KEEP(89),1) ! at the nb of pivots eliminated on 348 ! that proc 349 LD_RHSCOMP = 1 350 NB_FS_IN_RHSCOMP_TOT = KEEP(89) 351! number of FS var of the pruned tree 352! mapped on this proc 353 NB_FS_IN_RHSCOMP_F = NB_FS_IN_RHSCOMP_TOT 354C 355C Depending on the type of parallelism, 356C the master can have the role of a slave 357 I_AM_SLAVE = ( id%MYID .ne. MASTER .OR. 358 & ( id%MYID .eq. MASTER .AND. 359 & KEEP(46) .eq. 1 ) ) 360C 361C Compute the number of integers and nb of reals in the structure 362 CALL ZMUMPS_SIZE_IN_STRUCT (id, NB_INT,NB_CMPLX,NB_CHAR) 363 NB_BYTES = NB_BYTES + NB_INT * K34_8 + NB_CMPLX * K35_8 + NB_CHAR 364 NB_BYTES_ON_ENTRY = NB_BYTES !used to check alloc/dealloc count ok 365 NB_BYTES_MAX = max(NB_BYTES_MAX,NB_BYTES) 366C ====================================== 367C BEGIN CHECK KEEP ENTRIES AND INTERFACE 368C ====================================== 369C The checks below used to be in ZMUMPS_DRIVER. It is much better 370C to have them here in ZMUMPS_SOL_DRIVER because this enables 371C more flexibility in the management of priorities between various 372C checks. 373 IF (id%MYID .EQ. MASTER) THEN 374c subroutine only because called at facto and solve 375 CALL ZMUMPS_SET_K221(id) 376 id%KEEP(111) = id%ICNTL(25) 377C For the case of ICNTL(20)=1 one could 378C switch off exploit sparsity when RHS is too dense. 379 IF (id%ICNTL(20) .EQ. 1) id%KEEP(235) = -1 !automatic 380 IF (id%ICNTL(20) .EQ. 2) id%KEEP(235) = 0 !off 381 IF (id%ICNTL(20) .EQ. 3) id%KEEP(235) = 1 !on 382 IF (id%ICNTL(20).EQ.1.or.id%ICNTL(20).EQ.2.or. 383 & id%ICNTL(20).EQ.3) THEN 384 id%KEEP(248) = 1 !sparse RHS 385 ELSE 386 id%KEEP(248) = 0 !dense RHS 387 ENDIF 388 ICNTL21 = id%ICNTL(21) 389 IF (ICNTL21 .ne.0.and.ICNTL21.ne.1) ICNTL21=0 390 IF ( id%ICNTL(30) .NE.0 ) THEN 391C A-1 is on 392 id%KEEP(237) = 1 393 ELSE 394C A-1 is off 395 id%KEEP(237) = 0 396 ENDIF 397 IF (id%KEEP(248) .eq.0.and. id%KEEP(237).ne.0) THEN 398C For A-1 we have a sparse RHS in the API. 399C Force KEEP(248) accordingly. 400 id%KEEP(248)=1 401 ENDIF 402 IF ((id%KEEP(221).EQ.2 ).AND.(id%KEEP(248).NE.0) ) THEN 403C -- input RHS is in fact effectively 404C -- stored in REDRHS and RHSCOMP 405 id%KEEP(248) = 0 406 ENDIF 407 IF ((id%KEEP(221).EQ.2 ).AND.(id%KEEP(235).NE.0) ) THEN 408C -- input RHS is in fact effectively 409C -- stored in REDRHS and RHSCOMP 410 id%KEEP(235) = 0 411 ENDIF 412 IF ( (id%KEEP(248).EQ.0).AND.(id%KEEP(111).EQ.0) ) THEN 413C RHS is not sparse and thus exploit sparsity is reset to 0 414 id%KEEP(235) = 0 415 ENDIF 416C Case of Automatic setting of exploit sparsity (KEEP(235)=-1) 417C (in MUMPS_DRIVER original value of KEEP(235) is reset) 418 IF(id%KEEP(111).NE.0) id%KEEP(235)=0 419C 420 IF (id%KEEP(235).EQ.-1) THEN 421 IF (id%KEEP(237).NE.0) THEN 422C for A-1 423 id%KEEP(235)=1 424 ELSE 425 id%KEEP(235)=1 426 ENDIF 427 ELSE IF (id%KEEP(235).NE.0) THEN 428 id%KEEP(235)=1 429 ENDIF 430C Setting of KEEP(242) (permute RHS) 431 IF ((KEEP(111).NE.0)) THEN 432C In the context of null space, the null pivots 433C are by default permuted to post-order 434C However for null space there is in this case no need to 435C permute null pivots since they are already in correct order. 436C Setting KEEP(242)=1 would just force to go through 437C part of the code permuting to identity. 438C Apart for validation purposes this is not interesting 439C costly (and more risky). 440 KEEP(242) = 0 441 ENDIF 442 IF (KEEP(248).EQ.0.AND.KEEP(111).EQ.0) THEN 443C Permutation possible if sparse RHS 444C (KEEP(248).NE.0: A-1 or General Sparse) 445C or null space (even if in current version 446C it is deactived) 447 KEEP(242) = 0 448 ENDIF 449 IF ((KEEP(242).NE.0).AND.KEEP(237).EQ.0) THEN 450 IF ((KEEP(242).NE.-9).AND.KEEP(242).NE.1.AND. 451 & KEEP(242).NE.-1) THEN 452 WRITE(6,*) " WARNING !!! A-1 OFF and KEEP(242)= ", 453 & KEEP(242), " is reset to zero (OFF)" 454C Reset it to 0 455 KEEP(242) = 0 456 ENDIF 457 ENDIF 458 IF (KEEP(242).EQ.-9) THEN 459C Automatic setting of permute RHS 460 IF (id%KEEP(237).NE.0) THEN 461 KEEP(242) = 1 ! postorder 462 ELSE 463 KEEP(242) = 0 ! no permutation 464 ENDIF 465 ENDIF 466 IF ( (id%KEEP(221).EQ.1 ).AND.(id%KEEP(235).NE.0) ) THEN 467C -- Do not permute RHS with REDRHS for the time being 468 id%KEEP(242) = 0 469 ENDIF 470 IF (KEEP(242).EQ.0) KEEP(243)=0 ! interleave off 471 IF ((KEEP(237).EQ.0).OR.(KEEP(242).EQ.0)) THEN 472C Interleave (243) possible only 473C when permute RHS (242) is on and with A-1 474 KEEP(243) = 0 475 ENDIF 476 IF (id%KEEP(237).EQ.1) THEN ! A-1 entries 477C Case of automatic setting of KEEP(243), KEEP(493-498) 478C (exploit sparsity parameters) 479 IF (id%NSLAVES.EQ.1) THEN 480 IF (id%KEEP(243).EQ.-1) id%KEEP(243)=0 481 IF (id%KEEP(495).EQ.-1) id%KEEP(495)=1 482 IF (id%KEEP(497).EQ.-1) id%KEEP(497)=1 483 ELSE 484 IF (id%KEEP(243).EQ.-1) id%KEEP(243)=1 485 IF (id%KEEP(495).EQ.-1) id%KEEP(495)=1 486 IF (id%KEEP(497).EQ.-1) id%KEEP(497)=1 487 ENDIF 488 ELSE ! dense or general sparse 489 id%KEEP(243)=0 490 id%KEEP(495)=0 491 IF (id%KEEP(235) .EQ. 1) THEN 492 IF (id%KEEP(497).EQ.-1) id%KEEP(497)=0 493 ENDIF 494 ENDIF 495 MTYPE = id%ICNTL( 9 ) 496 IF (MTYPE.NE.1) MTYPE=0 ! see interface 497 IF ((MTYPE.EQ.0).AND.KEEP(50).NE.0) MTYPE =1 498! suppress option Atx=b for A-1 499 IF (id%KEEP(237).NE.0) MTYPE = 1 500 ENDIF 501 CALL MPI_BCAST(MTYPE,1,MPI_INTEGER,MASTER, 502 & id%COMM,IERR) 503 CALL MPI_BCAST( id%KEEP(111), 1, MPI_INTEGER, MASTER, id%COMM, 504 & IERR ) 505 CALL MPI_BCAST( id%KEEP(221), 1, MPI_INTEGER, MASTER, id%COMM, 506 & IERR ) 507 CALL MPI_BCAST( id%KEEP(235), 1, MPI_INTEGER, MASTER, id%COMM, 508 & IERR ) 509 CALL MPI_BCAST( id%KEEP(237), 1, MPI_INTEGER, MASTER, id%COMM, 510 & IERR ) 511 CALL MPI_BCAST( id%KEEP(242), 2, MPI_INTEGER, MASTER, id%COMM, 512 & IERR ) 513 CALL MPI_BCAST( id%KEEP(248), 1, MPI_INTEGER, MASTER, id%COMM, 514 & IERR ) 515 CALL MPI_BCAST( id%KEEP(350), 1, MPI_INTEGER, MASTER, id%COMM, 516 & IERR ) 517 CALL MPI_BCAST( id%KEEP(495), 3, MPI_INTEGER, MASTER, id%COMM, 518 & IERR ) 519 CALL MPI_BCAST( ICNTL21, 1, MPI_INTEGER, MASTER, id%COMM, IERR ) 520C Broadcast original id%NRHS (used at least for checks on ISOL_loc 521C and to allocate PERM_RHS in case of exploit sparsity) 522 CALL MPI_BCAST( id%NRHS,1, MPI_INTEGER, MASTER, id%COMM,IERR) 523C 524C TIMINGS: reset to 0 525 TIMEC2=0.0D0 526 TIMECOPYSCALE2=0.0D0 527 TIMEGATHER2=0.0D0 528 TIMESCATTER2=0.0D0 529 id%DKEEP(112)=0.0D0 530 id%DKEEP(113)=0.0D0 531C id%DKEEP(114) time for the iterative refinement 532C id%DKEEP(120) time for the error analysis 533C id%DKEEP(121) time for condition number 534C id%DKEEP(122) time for matrix redistribution (copy+scale solution) 535 id%DKEEP(114)=0.0D0 536 id%DKEEP(120)=0.0D0 537 id%DKEEP(121)=0.0D0 538 id%DKEEP(115)=0.0D0 539 id%DKEEP(116)=0.0D0 540 id%DKEEP(122)=0.0D0 541C Time for fwd, bwd and scalapack is 542C accumulated in DKEEP(117-119) within SOL_C 543C If requested time for each call to FWD/BWD 544C might be print but on output to solve 545C phase DKEEP will hold on each proc the accumulated time 546 id%DKEEP(117)=0.0D0 547 id%DKEEP(118)=0.0D0 548 id%DKEEP(119)=0.0D0 549 id%DKEEP(123)=0.0D0 550 id%DKEEP(124)=0.0D0 551 id%DKEEP(125)=0.0D0 552 id%DKEEP(126)=0.0D0 553 id%DKEEP(127)=0.0D0 554 id%DKEEP(128:134)=0.0D0 555 id%DKEEP(140:153)=0.0D0 556C 557 CALL MUMPS_SECDEB(TIME3) 558C ------------------------------ 559C Check parameters on the master 560C ------------------------------ 561 IF ( id%MYID .EQ. MASTER ) THEN 562 IF ((KEEP(23).NE.0).AND.KEEP(50).NE.0) THEN 563C Maximum transversal permutation 564C has not been saved (KEEP(23)>0 and UNS_PERM allocated) 565C when matrix is symmetric. 566 IF (PROKG) WRITE(MPG,'(A)') 567 & ' Internal Error 1 in solution driver ' 568 id%INFO(1)=-444 569 id%INFO(2)=KEEP(23) 570 ENDIF 571C ------------------------------------ 572C Check that factors are available 573C either in-core or on disk, case 574C where factors were discarded during 575C factorization (e.g. useful to simulate 576C an OOC factorization or just get nb of 577C negative pivots or determinant) 578C ------------------------------------ 579 IF (KEEP(201) .EQ. -1) THEN 580 IF (PROKG) WRITE(MPG,'(A)') 581 & ' ERROR: Solve impossible because factors not kept' 582 id%INFO(1)=-44 583 id%INFO(2)=KEEP(251) 584 GOTO 333 585 ELSE IF (KEEP(221).EQ.0 .AND. KEEP(251) .EQ. 2 586 & .AND. KEEP(252).EQ.0) THEN 587 IF (PROKG) WRITE(MPG,'(A)') 588 & ' ERROR: Solve impossible because factors not kept' 589 id%INFO(1)=-44 590 id%INFO(2)=KEEP(251) 591 GOTO 333 592 ENDIF 593C ------------------ 594 IF (KEEP(252).NE.0 .AND. id%NRHS .NE. id%KEEP(253)) THEN 595C Fwd in facto 596C KEEP(252-253) available on all procs since analysis phase 597C Error: id%NRHS is not allowed to change since analysis 598C because fwd has been performed during facto with 599C KEEP(253) RHS 600 IF (PROKG) WRITE(MPG,'(A)') 601 & ' ERROR: id%NRHS not allowed to change when ICNTL(32)=1' 602 id%INFO(1)=-42 603 id%INFO(2)=id%KEEP(253) 604 GOTO 333 605 ENDIF 606C Testing MTYPE instead of ICNTL(9) 607 IF (KEEP(252).NE.0 .AND. MTYPE.NE.1) THEN 608C Fwd in facto is not compatible with transpose system 609 INFO(1) = -43 610 INFO(2) = 9 611 IF (PROKG) WRITE(MPG,'(A)') 612 & ' ERROR: Transpose system (ICNTL(9).NE.0) not ', 613 & ' compatible with forward performed during', 614 & ' factorization (ICNTL(32)=1)' 615 GOTO 333 616 ENDIF 617 IF (KEEP(248) .NE. 0.AND.KEEP(252).NE.0) THEN 618C Fwd during facto incompatible with sparse RHS 619C Forbid sparse RHS when Fwd performed during facto 620C Sparse RHS may be due to A-1 (ICNTL(30) 621 INFO(1) = -43 622 IF (KEEP(237).NE.0) THEN 623 INFO(2) = 30 ! icntl(30) 624 IF (PROKG) WRITE(MPG,'(A)') 625 & ' ERROR: A-1 functionality incompatible with forward', 626 & ' performed during factorization (ICNTL(32)=1)' 627 ELSE 628 INFO(2) = 20 ! ICNTL(20) 629 IF (PROKG) WRITE(MPG,'(A)') 630 & ' ERROR: sparse RHS incompatible with forward', 631 & ' performed during factorization (ICNTL(32)=1)' 632 ENDIF 633 GOTO 333 634 ENDIF 635 IF (KEEP(237) .NE. 0 .AND. ICNTL21.NE.0) THEN 636 IF (PROKG) WRITE(MPG,'(A)') 637 & ' ERROR: A-1 functionality is incompatible', 638 & ' with distributed solution.' 639 INFO(1)=-48 640 INFO(2)=21 641 GOTO 333 642 ENDIF 643 IF (KEEP(237) .NE. 0 .AND. KEEP(60) .NE.0) THEN 644 IF (PROKG) WRITE(MPG,'(A)') 645 & ' ERROR: A-1 functionality is incompatible', 646 & ' with Schur.' 647 INFO(1)=-48 648 INFO(2)=19 649 GOTO 333 650 ENDIF 651 IF (KEEP(237) .NE. 0 .AND. KEEP(111) .NE.0) THEN 652 IF (PROKG) WRITE(MPG,'(A)') 653 & ' ERROR: A-1 functionality is incompatible', 654 & ' with null space.' 655 INFO(1)=-48 656 INFO(2)=25 657 GOTO 333 658 ENDIF 659 IF (id%NRHS .LE. 0) THEN 660 id%INFO(1)=-45 661 id%INFO(2)=id%NRHS 662 GOTO 333 663 ENDIF 664C Entries of A-1 are stored in place of the input sparse RHS 665C thus no need for RHS to be allocated. 666 IF ( (id%KEEP(237).EQ.0) ) THEN 667 IF ((id%KEEP(248) == 0 .AND.KEEP(221).NE.2) 668 & .OR. ICNTL21==0) THEN 669C RHS must be of size N on the master either to 670C store the dense centralized RHS, either to store 671C the dense centralized solution. 672 CALL ZMUMPS_CHECK_DENSE_RHS 673 & (id%RHS,id%INFO,id%N,id%NRHS,id%LRHS) 674 IF (id%INFO(1) .LT. 0) GOTO 333 675 ENDIF 676 ELSE 677C Check that the constraint NRHS=N is respected 678C Check for valid sparse RHS structure done 679 IF (id%NRHS .NE. id%N) THEN 680 id%INFO(1)=-47 681 id%INFO(2)=id%NRHS 682 GOTO 333 683 ENDIF 684 ENDIF 685 IF (id%KEEP(248) == 1) THEN 686C ------------------------------------ 687C RHS_SPARSE, IRHS_SPARSE and IRHS_PTR 688C must be allocated of adequate size 689C ------------------------------------ 690 IF (( id%NZ_RHS .LE.0 ).AND.(KEEP(237).NE.0)) THEN 691C At least one entry of A-1 must be requested 692 id%INFO(1)=-46 693 id%INFO(2)=id%NZ_RHS 694 GOTO 333 695 ENDIF 696 IF (( id%NZ_RHS .LE.0 ).AND.(KEEP(221).EQ.1)) THEN 697C At least one entry of RHS must be nonzero with 698c Schur reduced RHS option 699 id%INFO(1)=-46 700 id%INFO(2)=id%NZ_RHS 701 GOTO 333 702 ENDIF 703 IF ( .not. associated(id%RHS_SPARSE) )THEN 704 id%INFO(1)=-22 705 id%INFO(2)=10 706 GOTO 333 707 ENDIF 708 IF ( .not. associated(id%IRHS_SPARSE) )THEN 709 id%INFO(1)=-22 710 id%INFO(2)=11 711 GOTO 333 712 ENDIF 713 IF ( .not. associated(id%IRHS_PTR) )THEN 714 id%INFO(1)=-22 715 id%INFO(2)=12 716 GOTO 333 717 ENDIF 718C 719 IF (size(id%IRHS_PTR) < id%NRHS + 1) THEN 720 id%INFO(1)=-22 721 id%INFO(2)=12 722 GOTO 333 723 END IF 724 IF (id%IRHS_PTR(id%NRHS + 1).ne.id%NZ_RHS+1) THEN 725 id%INFO(1)=-27 726 id%INFO(2)=id%IRHS_PTR(id%NRHS+1) 727 GOTO 333 728 END IF 729C compare with dble to prevent overflow 730 IF (dble(id%N)*dble(id%NRHS).LT.dble(id%NZ_RHS)) THEN 731C Possible in case of dupplicate entries in Sparse RHS 732 IF (PROKG) THEN 733 write(MPG,*) 734 & " WARNING: many dupplicate entries in ", 735 & " sparse RHS provided by the user ", 736 & " id%NZ_RHS,id%N,id%NRHS =", 737 & id%NZ_RHS,id%N,id%NRHS 738 ENDIF 739 END IF 740 IF (id%IRHS_PTR(1).ne.1) THEN 741 id%INFO(1)=-28 742 id%INFO(2)=id%IRHS_PTR(1) 743 GOTO 333 744 END IF 745 IF (size(id%IRHS_SPARSE) < id%NZ_RHS) THEN 746 id%INFO(1)=-22 747 id%INFO(2)=11 748 GOTO 333 749 END IF 750 IF (size(id%RHS_SPARSE) < id%NZ_RHS) THEN 751 id%INFO(1)=-22 752 id%INFO(2)=10 753 GOTO 333 754 END IF 755 ENDIF 756C -------------------------------- 757C Set null space options for solve 758C -------------------------------- 759 CALL ZMUMPS_GET_NS_OPTIONS_SOLVE(ICNTL(1),KEEP(1),MPG,INFO(1)) 760 IF (INFO(1) .LT. 0) GOTO 333 761 IF (KEEP(111).eq.-1.AND.id%NRHS.NE.KEEP(112)+KEEP(17))THEN 762 INFO(1)=-32 763 INFO(2)=id%NRHS 764 GOTO 333 765 ENDIF 766 IF (KEEP(111).gt.0 .AND. id%NRHS .NE. 1) THEN 767 INFO(1)=-32 768 INFO(2)=id%NRHS 769 GOTO 333 770 ENDIF 771 IF (KEEP(248) .NE.0.AND.KEEP(111).NE.0) THEN 772C Ignore sparse RHS in case we compute 773C vectors of the null space (KEEP(111)).NE.0.) 774 IF (PROKG) WRITE(MPG,'(A)') 775 & ' ERROR: ICNTL(20) and ICNTL(30) functionalities ', 776 & ' incompatible with null space' 777 INFO(1) = -37 778 IF (KEEP(237).NE.0) THEN 779 INFO(2) = 30 ! icntl(30) 780 IF (PROKG) WRITE(MPG,'(A)') 781 & ' ERROR: ICNTL(30) functionality ', 782 & ' incompatible with null space' 783 ELSE 784 IF (PROKG) WRITE(MPG,'(A)') 785 & ' ERROR: ICNTL(20) functionality ', 786 & ' incompatible with null space' 787 INFO(2) = 20 ! inclt(20) 788 ENDIF 789 GOTO 333 790 ENDIF 791 IF (( KEEP(111) .LT. -1 ) .OR. 792 & (KEEP(111).GT.KEEP(112)+KEEP(17)) .OR. 793 & (KEEP(111) .EQ.-1 .AND. KEEP(112)+KEEP(17).EQ.0)) 794 & THEN 795 INFO(1)=-36 796 INFO(2)=KEEP(111) 797 GOTO 333 798 ENDIF 799 IF (KEEP(221).NE.0.AND.KEEP(111).NE.0) THEN 800 INFO(1)=-37 801 INFO(2)=26 802 GOTO 333 803 ENDIF 804 END IF ! MASTER 805C -------------------------------------- 806C Check distributed solution vectors 807C -------------------------------------- 808 IF (ICNTL21==1) THEN 809 IF ( id%MYID .ne. MASTER .OR. 810 & ( id%MYID .eq. MASTER .AND. 811 & id%KEEP(46) .eq. 1 ) ) THEN 812C (I)SOL_loc should be allocated to hold the 813C distributed solution on exit 814 IF ( id%LSOL_loc < id%KEEP(89) ) THEN 815 id%INFO(1)= -29 816 id%INFO(2)= id%LSOL_loc 817 GOTO 333 818 ENDIF 819 IF (id%KEEP(89) .NE. 0) THEN 820 IF ( .not. associated(id%ISOL_loc) )THEN 821 id%INFO(1)=-22 822 id%INFO(2)=13 823 GOTO 333 824 ENDIF 825 IF ( .not. associated(id%SOL_loc) )THEN 826 id%INFO(1)=-22 827 id%INFO(2)=14 828 GOTO 333 829 ENDIF 830 IF (size(id%ISOL_loc) < id%KEEP(89) ) THEN 831 id%INFO(1)=-22 832 id%INFO(2)=13 833 GOTO 333 834 END IF 835# if defined(MUMPS_F2003) 836 IF (size(id%SOL_loc,kind=8) < 837 & int(id%NRHS-1,8)*int(id%LSOL_loc,8)+ 838 & int(id%KEEP(89),8)) THEN 839 id%INFO(1)=-22 840 id%INFO(2)=14 841 GOTO 333 842 END IF 843# else 844C Warning: size returns a standard INTEGER and could 845C overflow if id%SOL_loc was allocated of size > 2^31-1; 846C still we prefer to perform this test since only (1) very 847C large problems with large NRHS and small numbers of MPI 848C can result in such a situation; (2) the test could be 849C suppressed if needed but might be still be ok in case 850C the right-hand side overflows too. 851 IF (size(id%SOL_loc) < 852 & (id%NRHS-1)*id%LSOL_loc+id%KEEP(89)) THEN 853 id%INFO(1)=-22 854 id%INFO(2)=14 855 GOTO 333 856 END IF 857# endif 858 ENDIF 859 ENDIF 860 ENDIF 861 IF (id%MYID .NE. MASTER) THEN 862 IF (id%KEEP(248) == 1) THEN 863C RHS should NOT be associated 864C if I am not master since it is 865C not even used to store the solution 866 IF ( associated( id%RHS ) ) THEN 867 id%INFO( 1 ) = -22 868 id%INFO( 2 ) = 7 869 GOTO 333 870 END IF 871 IF ( associated( id%RHS_SPARSE ) ) THEN 872 id%INFO( 1 ) = -22 873 id%INFO( 2 ) = 10 874 GOTO 333 875 END IF 876 IF ( associated( id%IRHS_SPARSE ) ) THEN 877 id%INFO( 1 ) = -22 878 id%INFO( 2 ) = 11 879 GOTO 333 880 END IF 881 IF ( associated( id%IRHS_PTR ) ) THEN 882 id%INFO( 1 ) = -22 883 id%INFO( 2 ) = 12 884 GOTO 333 885 END IF 886 END IF 887 ENDIF 888 IF (id%MYID.EQ.MASTER) THEN 889C Do some checks (REDRHS), depending on KEEP(221) 890 CALL ZMUMPS_CHECK_REDRHS(id) 891 END IF ! MYID.EQ.MASTER 892 IF (id%INFO(1) .LT. 0) GOTO 333 893C ------------------------- 894C Propagate possible errors 895C ------------------------- 896 333 CONTINUE 897 CALL MUMPS_PROPINFO( id%ICNTL(1), 898 & id%INFO(1), 899 & id%COMM, id%MYID ) 900 IF ( id%INFO(1) .LT. 0 ) GO TO 90 901C ==================================== 902C END CHECK INTERFACE AND KEEP ENTRIES 903C ==================================== 904C ==================================== 905C Process case of NZ_RHS = 0 with 906C sparse RHS and General Sparse (NOT A-1) 907C ----------------------------------- 908 IF ((id%KEEP(248).EQ.1).AND.(id%KEEP(237).EQ.0)) THEN 909C 910 CALL MPI_BCAST(id%NZ_RHS,1,MPI_INTEGER,MASTER, 911 & id%COMM,IERR) 912C 913 IF (id%NZ_RHS.EQ.0) THEN 914C We reset solution to zero and we return 915C (first freeing working space at label 90) 916 IF ((ICNTL21.EQ.1).AND.(I_AM_SLAVE)) THEN 917C ---------------------- 918C SOL_loc reset to zero 919C ---------------------- 920C ---------------------- 921C Prepare ISOL_loc array 922C ---------------------- 923 LIW_PASSED=max(1,KEEP(32)) 924C Only called if more than 1 pivot 925C was eliminated by the processor. 926C Note that LSOL_loc >= KEEP(89) 927 IF (KEEP(89) .GT. 0) THEN 928 CALL ZMUMPS_DISTSOL_INDICES( MTYPE, id%ISOL_loc(1), 929 & id%PTLUST_S(1), 930 & id%KEEP(1),id%KEEP8(1), 931 & id%IS(1), LIW_PASSED,id%MYID_NODES, 932 & id%N, id%STEP(1), id%PROCNODE_STEPS(1), 933 & id%NSLAVES, scaling_data, LSCAL ) 934 DO J=1, id%NRHS 935 DO I=1, KEEP(89) 936 id%SOL_loc((J-1)*id%LSOL_loc + I) =ZERO 937 ENDDO 938 ENDDO 939 ENDIF 940 ENDIF 941 IF (ICNTL21.NE.1) THEN ! centralized solution 942C ---------------------------- 943C RHS reset to zero on master 944C ---------------------------- 945 IF (id%MYID.EQ.MASTER) THEN 946 DO J=1, id%NRHS 947 DO I=1, id%N 948 id%RHS((J-1)*id%LRHS + I) =ZERO 949 ENDDO 950 ENDDO 951 ENDIF 952 ENDIF 953C 954C print solve phase stats if requested 955 IF ( PROKG ) THEN 956C write(6,*) " NZ_RHS is zero " 957 WRITE( MPG, 150 ) 958 & id%NRHS, ICNTL(27), ICNTL(9), ICNTL(10), ICNTL(11), 959 & ICNTL(20), ICNTL(21), ICNTL(30) 960 IF (KEEP(221).NE.0) THEN 961 WRITE (MPG, 152) KEEP(221) 962 ENDIF 963 IF (KEEP(252).GT.0) THEN ! Fwd during facto 964 WRITE (MPG, 153) KEEP(252) 965 ENDIF 966 ENDIF 967C 968C -------- 969 GOTO 90 ! end of solve deallocate what is needed 970C ==================================== 971C END CHECK INTERFACE AND KEEP ENTRIES 972C ==================================== 973 ENDIF ! test NZ_RHS.EQ.0 974C -------- 975 ENDIF ! (id%KEEP(248).EQ.1).AND.(id%KEEP(237).EQ.0) 976 INTERLEAVE_PAR =.FALSE. 977 DO_PERMUTE_RHS =.FALSE. 978C 979 IF ((id%KEEP(235).NE.0).or.(id%KEEP(237).NE.0)) THEN 980C Case of pruned elimination tree or selected entries in A-1 981 IF (id%KEEP(237).NE.0.AND. 982 & id%KEEP(248).EQ.0) THEN 983C When A-1 is requested (keep(237).ne.0) 984C sparse RHS has been forced to be on. 985 IF (LPOK) THEN 986 WRITE(LP,'(A,I4,I4)') 987 & ' Internal Error 2 in solution driver (A-1) ', 988 & id%KEEP(237), id%KEEP(248) 989 ENDIF 990 CALL MUMPS_ABORT() 991 ENDIF 992C NBT is inout in MUMPS_REALLOC and should be initialized. 993 NBT = 0 994C -- Allocate Step2node on each proc 995 CALL MUMPS_REALLOC(id%Step2node, id%KEEP(28), id%INFO, LP, 996 & FORCE=.TRUE., 997 & STRING='id%Step2node (Solve)', MEMCNT=NBT, ERRCODE=-13) 998 CALL MUMPS_PROPINFO( ICNTL(1), INFO(1), 999 & id%COMM, id%MYID ) 1000 IF ( INFO(1).LT.0 ) RETURN 1001C -- build Step2node on each proc; 1002C -- this is usefull to have at each step a unique 1003C -- representative node (associated with principal variable of 1004C -- that node. 1005 IF (NBT.NE.0) THEN 1006 ! Step2node was reallocated and needs be recomputed 1007 DO I=1, id%N 1008 IF (id%STEP(I).LE.0) CYCLE ! nonprincipal variables 1009 id%Step2node(id%STEP(I)) = I 1010 ENDDO 1011C ELSE 1012C we reuse Step2node computed in a previous solve phase 1013C Step2node is deallocated each time a new analysis is 1014C performed or when job=-2 is called 1015 ENDIF 1016 NB_BYTES = NB_BYTES + NBT*K34_8 1017 NB_BYTES_MAX = max(NB_BYTES_MAX,NB_BYTES) 1018 NB_BYTES_EXTRA = NB_BYTES_EXTRA + NBT * K34_8 1019C Mapping information used during solve. In case of several 1020C facto+solve it has to be recomputed. 1021C In case of several solves with the same 1022C facto, it is not recomputed. 1023C It used to compute the interleaving 1024C for A-1, and, in dev_version, passed to sol_c to compute 1025C some stats 1026 IF((KEEP(235).NE.0).OR.(KEEP(237).NE.0)) THEN 1027 IF(.NOT.associated(id%IPTR_WORKING)) THEN 1028 CALL ZMUMPS_BUILD_MAPPING_INFO(id) 1029 END IF 1030 END IF 1031 ENDIF 1032C 1033C Initialize SIZE_OF_BLOCK from MUMPS_SOL_ES module 1034 IF ( I_AM_SLAVE ) 1035 & CALL MUMPS_SOL_ES_INIT(id%OOC_SIZE_OF_BLOCK, id%KEEP(201)) 1036 DO_NULL_PIV = .TRUE. 1037 NBCOL_INBLOC = -9998 1038 NZ_THIS_BLOCK= -9998 1039 JBEG_RHS = -9998 1040c 1041 IF (id%MYID.EQ.MASTER) THEN ! Compute NRHS_NONEMPTY 1042C 1043C -- Sparse RHS does 1044 IF ( KEEP(111)==0 .AND. KEEP(248)==1 1045#if defined(RHSCOMP_BYROWS) 1046C In case of row storage with reduced right hand side, we 1047C do not take into account empty columns during forward. 1048C Therefore NRHS_NONEMPTY will simply be set to id%NRHS 1049 & .AND. KEEP(221) .NE. 1 1050#endif 1051 & ) THEN 1052C -- Note that KEEP(111).NE.0 (null space on) 1053C -- and KEEP(248).NE.0 will be made incompatible 1054C -- When computing entries of A-1 (or SparseRHS only) 1055 NRHS_NONEMPTY = 0 1056 DO I=1, id%NRHS 1057 IF (id%IRHS_PTR(I).LT.id%IRHS_PTR(I+1)) 1058 & NRHS_NONEMPTY = NRHS_NONEMPTY+1 !ith col in non empty 1059 ENDDO 1060 IF (NRHS_NONEMPTY.LE.0) THEN 1061C Internal error: tested before in mumps_driver 1062 IF (LPOK) 1063 & WRITE(LP,*) " Internal Error 3 in solution driver ", 1064 & " NRHS_NONEMPTY= ", 1065 & NRHS_NONEMPTY 1066 CALL MUMPS_ABORT() 1067 ENDIF 1068 ELSE 1069 NRHS_NONEMPTY = id%NRHS 1070 ENDIF 1071 ENDIF 1072C ------------------------------------ 1073C If there is a special root node, 1074C precompute mapping of root's master 1075C ------------------------------------ 1076 SIZE_ROOT = -33333 1077 IF ( KEEP( 38 ) .ne. 0 ) THEN 1078 MASTER_ROOT = MUMPS_PROCNODE( 1079 & id%PROCNODE_STEPS(id%STEP( KEEP(38))), 1080 & id%NSLAVES ) 1081 IF (id%MYID_NODES .eq. MASTER_ROOT) THEN 1082 SIZE_ROOT = id%root%TOT_ROOT_SIZE 1083 ELSE IF ((id%MYID.EQ.MASTER).AND.KEEP(60).NE.0) THEN 1084C SIZE_ROOT also used for KEEP(221).NE.0 1085 SIZE_ROOT=id%KEEP(116) 1086 ENDIF 1087 ELSE IF (KEEP( 20 ) .ne. 0 ) THEN 1088 MASTER_ROOT = MUMPS_PROCNODE( 1089 & id%PROCNODE_STEPS(id%STEP(KEEP(20))), 1090 & id%NSLAVES ) 1091 IF (id%MYID_NODES .eq. MASTER_ROOT) THEN 1092 SIZE_ROOT = id%IS( 1093 & id%PTLUST_S(id%STEP(KEEP(20)))+KEEP(IXSZ) + 3) 1094 ELSE IF ((id%MYID.EQ.MASTER).AND.KEEP(60).NE.0) THEN 1095C SIZE_ROOT also used for KEEP(221).NE.0 1096 SIZE_ROOT=id%KEEP(116) 1097 ENDIF 1098 ELSE 1099 MASTER_ROOT = -44444 1100 END IF 1101C -------------- 1102C Get block size 1103C -------------- 1104C We work on a maximum of NBRHS at a time. 1105C The leading dimension of RHS is id%LRHS on the host process 1106C and it is set to N on slave processes. 1107 IF (id%MYID .eq. MASTER) THEN 1108 KEEP(84) = ICNTL(27) 1109C Treating ICNTL(27)=0 as if ICNTL(27)=1 1110 IF(ICNTL(27).EQ.0) KEEP(84)=1 1111 IF (KEEP(252).NE.0) THEN 1112! Fwd in facto: all rhs (KEEP(253) need be processed in one pass 1113 NBRHS = KEEP(253) 1114 ELSE 1115 IF (KEEP(201) .EQ. 0 .OR. KEEP(84) .GT. 0) THEN 1116 NBRHS = abs(KEEP(84)) 1117 ELSE 1118 NBRHS = -2*KEEP(84) 1119 END IF 1120 IF (NBRHS .GT. NRHS_NONEMPTY ) NBRHS = NRHS_NONEMPTY 1121C 1122 ENDIF 1123C Avoid to have overflows in NFRONT * NBRHS 1124C 32-bit integer compuitations. 1125C Should be hopefully large-enough for a while. 1126 IF(huge(NBRHS)/id%KEEP(133).LT.NBRHS) THEN 1127 IF (PROKG) WRITE(MPG,'(A,I6,A)')'Warning: NBRHS = ',NBRHS, 1128 & ' might be too large.' 1129 NBRHS = huge(NBRHS)/id%KEEP(133)-1 ! -1 to avoid rounding pbs 1130 IF (PROKG) WRITE(MPG,'(A,I6)')'NBRHS reset to ',NBRHS 1131 END IF 1132 ENDIF 1133#if defined(V_T) 1134 CALL VTBEGIN(glob_comm_ini,IERR) 1135#endif 1136C NRHS_NONEMPTY needed on all procs to allocate RHSCOMP on slaves 1137 CALL MPI_BCAST(NRHS_NONEMPTY,1,MPI_INTEGER,MASTER, 1138 & id%COMM,IERR) 1139 CALL MPI_BCAST(NBRHS,1,MPI_INTEGER,MASTER, 1140 & id%COMM,IERR) 1141C 1142 IF (KEEP(201).GT.0) THEN 1143C --- id%KEEP(201) indicates if OOC is on (=1) of not (=0) 1144C -- 107: number of buffers 1145C Define number of types of files (L, possibly U) 1146 WORKSPACE_MINIMAL_PREFERRED = .FALSE. 1147 IF (id%MYID .eq. MASTER) THEN 1148 KEEP(107) = max(0,KEEP(107)) 1149 IF ((KEEP(107).EQ.0).AND. 1150 & (KEEP(204).EQ.0).AND.(KEEP(211).NE.1) ) THEN 1151C -- default setting for release 4.8 1152 ! Case of 1153 ! -Emmergency buffer only and 1154 ! -Synchronous mode 1155 ! -NO_O_DIRECT (because of synchronous choice) 1156 ! THEN 1157 ! "Basic system-based version" 1158 ! We can force to allocate S to a minimal 1159 ! value. 1160 WORKSPACE_MINIMAL_PREFERRED=.TRUE. 1161 ENDIF 1162 ENDIF 1163 CALL MPI_BCAST( KEEP(107), 1, MPI_INTEGER, 1164 & MASTER, id%COMM, IERR ) 1165 CALL MPI_BCAST( KEEP(204), 1, MPI_INTEGER, 1166 & MASTER, id%COMM, IERR ) 1167 CALL MPI_BCAST( KEEP(208), 2, MPI_INTEGER, 1168 & MASTER, id%COMM, IERR ) 1169 CALL MPI_BCAST( WORKSPACE_MINIMAL_PREFERRED, 1, 1170 & MPI_LOGICAL, 1171 & MASTER, id%COMM, IERR ) 1172C --- end of OOC case 1173 ENDIF 1174 IF ( I_AM_SLAVE ) THEN 1175C 1176C NB_K133: Max number of simultaneously processed 1177C active fronts. 1178C Why more than one active node ? 1179C 1/ In parallel when we start a level 2 node 1180C then we do not know exactly when we will 1181C have received all contributions from the 1182C slaves. 1183C This is very critical in OOC since the 1184C size provided to the solve phase is 1185C much smaller and since we need 1186C to determine the size fo the buffers for IO. 1187C We pospone the allocation of the block NFRONT*NB_NRHS 1188C and solve the problem. 1189C 1190C 1191C 2/ While processing a node and sending information 1192C if we have not enough memory in send buffer 1193C then we must receive. 1194C We feel that this is not so critical. 1195C 1196 NB_K133 = 3 1197C 1198C To this we must add one time KEEP(133) to store 1199C the RHS of the root node if the root is local. 1200C Furthermore this quantity has to be multiplied by the 1201C blocking size in case of multiple RHS. 1202C 1203 IF ( KEEP( 38 ) .NE. 0 .OR. KEEP( 20 ) .NE. 0 ) THEN 1204 IF ( MASTER_ROOT .eq. id%MYID_NODES ) THEN 1205 IF ( 1206 & .NOT. associated(id%root%RHS_CNTR_MASTER_ROOT) 1207 & ) THEN 1208 NB_K133 = NB_K133 + 1 1209 ENDIF 1210 END IF 1211 ENDIF 1212 LWCB8_MIN = int(NB_K133,8)*int(KEEP(133),8)*int(NBRHS,8) 1213C 1214C --------------------------------------------------------------- 1215C Set WK_USER_PROVIDED to true when workspace WK_USER is provided 1216C by user 1217C We can accept WK_USER to be provided on only one proc and 1218C different values of WK_USER per processor. Note that we are 1219C inside a block "IF (I_AM_SLAVE)" 1220 WK_USER_PROVIDED = (id%LWK_USER.NE.0) 1221 IF (id%LWK_USER.EQ.0) THEN 1222 ITMP8 = 0_8 1223 ELSE IF (id%LWK_USER.GT.0) THEN 1224 ITMP8= int(id%LWK_USER,8) 1225 ELSE 1226 ITMP8 = -int(id%LWK_USER,8)* 1000000_8 1227 ENDIF 1228C Incore: Check if the provided size is equal to that used during 1229C facto (case of ITMP8/=0 and KEEP8(24)/=ITMP8) 1230C But also check case of space not provided during solve 1231C but was provided during facto 1232C (case of ITMP8=0 and KEEP8(24)/=0) 1233 IF (KEEP(201).EQ.0) THEN ! incore 1234C Compare provided size with previous size 1235 IF (ITMP8.NE.KEEP8(24)) THEN 1236C -- error when reusing space allocated 1237 INFO(1) = -41 1238 INFO(2) = id%LWK_USER 1239 GOTO 99 ! jump to propinfo 1240 ! (S is used in between and not allocated) 1241 ! NO COMM must occur then before next propinfo 1242 ! it happens in Mila's code but only with 1243 ! KEEP(209) > 0 1244 ENDIF 1245 ELSE 1246 KEEP8(24)=ITMP8 1247 ENDIF 1248C KEEP8(24) holds the size of WK_USER provided by user. 1249C 1250 MAXS = 0_8 1251 IF (WK_USER_PROVIDED) THEN 1252 MAXS = KEEP8(24) 1253 IF (MAXS.LT. KEEP8(20)) THEN 1254 INFO(1)= -11 1255 ! MAXS should be increased by at least ITMP8 1256 ITMP8 = KEEP8(20)+1_8-MAXS 1257 CALL MUMPS_SET_IERROR(ITMP8, INFO(2)) 1258 ENDIF 1259 IF (INFO(1) .GE. 0 ) id%S => id%WK_USER(1:KEEP8(24)) 1260 ELSE IF (associated(id%S)) THEN 1261C Avoid the use of "size(id%S)" because it returns 1262C a default integer that may overflow. Also "size(id%S,kind=8)" 1263C will only be available with Fortran 2003 compilers. 1264 MAXS = KEEP8(23) 1265 ELSE 1266 ! S not allocated and WK_USER not provided ==> must be in OOC 1267 IF (KEEP(201).EQ.0) THEN ! incore 1268 WRITE(*,*) ' Working array S not allocated ', 1269 & ' on entry to solve phase (in core) ' 1270 CALL MUMPS_ABORT() 1271 ELSE 1272C -- OOC and WK_USER not provided: 1273C define size (S) and allocate it 1274C ---- modify size of MAXS: in a simple 1275C ---- system-based version, we want to 1276C ---- use a small size for MAXS, to 1277C ---- avoid the system pagecache to be 1278C ---- polluted by 'our memory' 1279C 1280 IF ( KEEP(209).EQ.-1 .AND. WORKSPACE_MINIMAL_PREFERRED) 1281 & THEN 1282C We need space to load at least the largest factor 1283 MAXS = KEEP8(20) + 1_8 1284 ELSE IF ( KEEP(209) .GE.0 ) THEN 1285C Use suggested value of MAXS provided in KEEP(209) 1286 MAXS = max(int(KEEP(209),8), KEEP8(20) + 1_8) 1287 ELSE 1288 MAXS = id%KEEP8(14) ! initial value: do not use more than 1289 ! minimum (non relaxed) size of OOC facto 1290 ENDIF 1291 ALLOCATE (id%S(MAXS), stat = allocok) 1292 KEEP8(23)=MAXS 1293 IF ( allocok .GT. 0 ) THEN 1294 IF (LPOK) THEN 1295 WRITE(LP,*) id%MYID,': problem allocation of S at solve' 1296 ENDIF 1297 INFO(1) = -13 1298 CALL MUMPS_SET_IERROR(MAXS, INFO(2)) 1299 NULLIFY(id%S) 1300 KEEP8(23)=0_8 1301 ENDIF 1302 NB_BYTES = NB_BYTES + KEEP8(23) * K35_8 1303 NB_BYTES_MAX = max(NB_BYTES_MAX,NB_BYTES) 1304C --- end of OOC case 1305 ENDIF 1306C -- end of id%S already associated 1307 ENDIF 1308C 1309C On the slaves, S is divided as follows: 1310C S(1..LA) holds the factors, 1311C S(LA+1..MAXS) is free workspace 1312 IF(KEEP(201).EQ.0)THEN 1313 LA = KEEP8(31) 1314 ELSE 1315C MAXS has normally be dimensionned to store only factors. 1316 LA = MAXS 1317 IF(MAXS.GT.KEEP8(31)+KEEP8(20)*int(KEEP(107)+1,8))THEN 1318C If we have a very large MAXS, the size reserved for 1319C loading the factors into memory does not need to exceed the 1320C total size of factors. The (KEEP8(20)*(KEEP(107)+1)) term 1321C is here in order to ensure that even with round-off 1322C problems (linked to the number of solve zones) factors can 1323C all be stored in-core 1324 LA=KEEP8(31)+KEEP8(20)*int(KEEP(107)+1,8) 1325 ENDIF 1326 ENDIF 1327C 1328C We need to allocate a workspace of size LWCB8 for the solve phase. 1329C Either it is available at the end of MAXS, or we perform a 1330C dynamic allocation. 1331 IF ( MAXS-LA .GT. LWCB8_MIN ) THEN 1332 LWCB8 = MAXS - LA 1333 WORK_WCB => id%S(LA+1_8:LA+LWCB8) 1334 WORK_WCB_ALLOCATED=.FALSE. 1335 ELSE 1336 LWCB8 = LWCB8_MIN 1337 ALLOCATE(WORK_WCB(LWCB8), stat = allocok) 1338 IF (allocok < 0 ) THEN 1339 INFO(1)=-13 1340 CALL MUMPS_SET_IERROR(LWCB8,INFO(2)) 1341 ENDIF 1342 WORK_WCB_ALLOCATED=.TRUE. 1343 NB_BYTES = NB_BYTES + LWCB8*K35_8 1344 NB_BYTES_MAX = max(NB_BYTES_MAX,NB_BYTES) 1345 ENDIF 1346 ENDIF ! I_AM_SLAVE 1347C ----------------------------------- 1348 99 CONTINUE 1349 CALL MUMPS_PROPINFO( ICNTL(1), INFO(1), 1350 & id%COMM,id%MYID) 1351 IF (INFO(1) < 0) GOTO 90 1352C ----------------------------------- 1353 IF ( I_AM_SLAVE ) THEN 1354 IF (KEEP(201).GT.0) THEN 1355 CALL ZMUMPS_INIT_FACT_AREA_SIZE_S(LA) 1356C -- This includes thread creation 1357C -- for asynchronous strategies 1358 CALL ZMUMPS_OOC_INIT_SOLVE(id) 1359 IS_INIT_OOC_DONE = .TRUE. 1360 ENDIF ! KEEP(201).GT.0 1361 ENDIF 1362C 1363 CALL MUMPS_PROPINFO( ICNTL(1), INFO(1), 1364 & id%COMM,id%MYID) 1365 IF (INFO(1) < 0) GOTO 90 1366C 1367 IF (id%MYID.EQ.MASTER) THEN 1368 IF ( PROKG ) THEN 1369 WRITE( MPG, 150 ) 1370 & id%NRHS, NBRHS, ICNTL(9), ICNTL(10), ICNTL(11), 1371 & ICNTL(20), ICNTL(21), ICNTL(30) 1372 IF (KEEP(111).NE.0) THEN 1373 WRITE (MPG, 151) KEEP(111) 1374 ENDIF 1375 IF (KEEP(221).NE.0) THEN 1376 WRITE (MPG, 152) KEEP(221) 1377 ENDIF 1378 IF (KEEP(252).GT.0) THEN ! Fwd during facto 1379 WRITE (MPG, 153) KEEP(252) 1380 ENDIF 1381 ENDIF 1382C 1383C ==================================== 1384C Define LSCAL, ICNTL10 and ICNTL11 1385C ==================================== 1386C 1387 LSCAL = (((KEEP(52) .GT. 0) .AND. (KEEP(52) .LE. 8)) .OR. ( 1388 & KEEP(52) .EQ. -1) .OR. KEEP(52) .EQ. -2) 1389 ICNTL10 = ICNTL(10) 1390 ICNTL11 = ICNTL(11) 1391C Values of ICNTL(11) out of range 1392 IF ((ICNTL11 .LT. 0).OR.(ICNTL11 .GE. 3)) THEN 1393 ICNTL11 = 0 1394 IF (PROKG) WRITE(MPG,'(A)') 1395 & ' WARNING: ICNTL(11) out of range' 1396 ENDIF 1397 POSTPros = .FALSE. 1398 IF (ICNTL11.NE.0 .OR. ICNTL10.NE.0) THEN 1399 POSTPros = .TRUE. 1400C FORBID ERROR ANALYSIS AND ITERATIVE REFINEMENT 1401C if there are options that are not compatible 1402 IF ((KEEP(111).NE.0).OR.(KEEP(237).NE.0).OR. 1403 & (KEEP(252).NE.0) ) THEN 1404C IF WE RETURN A NULL SPACE BASIS or compute entries in A-1 1405C of Fwd in facto 1406C -When only one columns of A-1 is requested then 1407C we could try to reactivate IR even if 1408C -code need be updated 1409C -accuracy could be # when one or more columns are requested 1410 IF (PROKG) WRITE(MPG,'(A)') 1411 & ' WARNING: Incompatible features: null space basis ', 1412 & ' and Iter. Ref and/or Err. Anal.' 1413 POSTPros = .FALSE. 1414 ELSE IF (KEEP(221).NE.0) THEN 1415C Forbid error analysis and iterative refinement 1416C in case of reduced rhs/solution 1417 IF (PROKG) WRITE(MPG,'(A)') 1418 & ' WARNING: Incompatible features: reduced RHS ', 1419 & ' and Iter. Ref and/or Err. Anal.' 1420 POSTPros = .FALSE. 1421 ELSE IF (NBRHS.GT. 1 .OR. ICNTL(21) .GT. 0) THEN 1422C Forbid error analysis and iterative refinement if 1423C the solution is distributed or 1424C in the case where nrhs > 1 1425 IF (PROKG) WRITE(MPG,'(A)') 1426 & ' WARNING: Incompatible features: nrhs>1 or distrib sol', 1427 & ' and Iter. Ref and/or Err. Anal.' 1428 POSTPros = .FALSE. 1429 ENDIF 1430 IF (.NOT.POSTPros) THEN 1431 ICNTL11 = 0 1432 ICNTL10 = 0 1433 ENDIF 1434 ENDIF 1435C Write a warning. 1436 IF ((ICNTL(10) .NE. 0) .AND. (ICNTL10 .EQ. 0)) THEN 1437 IF (PROKG) WRITE(MPG,'(A)') 1438 & ' WARNING: ICNTL(10) treated as if set to 0 ' 1439 ENDIF 1440 IF ((ICNTL(11) .NE. 0) 1441 & .AND.(ICNTL11 .EQ. 0)) THEN 1442 IF (PROKG) WRITE(MPG,'(A)') 1443 & ' WARNING: ICNTL(11) treated as if set to 0 ' 1444 ENDIF 1445C -- end of test master 1446 END IF 1447 CALL MPI_BCAST(POSTPros,1,MPI_LOGICAL,MASTER, 1448 & id%COMM,IERR) 1449C We need the original matrix only in the case of 1450C we want to perform IR or Error Analysis, i.e. if 1451C POSTPros = TRUE 1452 MAT_ALLOC_LOC = 0 1453 IF ( POSTPros ) THEN 1454 MAT_ALLOC_LOC = 1 1455C Check if the original matrix has been allocated. 1456 IF ( KEEP(54) .EQ. 0 ) THEN 1457C The original matrix is centralized 1458 IF ( id%MYID .eq. MASTER ) THEN 1459 IF (KEEP(55).eq.0) THEN 1460C Case of matrix assembled centralized 1461 IF (.NOT.associated(id%A) .OR. 1462 & (.NOT.associated(id%IRN)) .OR. 1463 & ( .NOT.associated(id%JCN))) THEN 1464 IF (PROKG) WRITE(MPG,'(A)') 1465 & ' WARNING: original centralized assembled', 1466 & ' matrix is not allocated ' 1467 MAT_ALLOC_LOC = 0 1468 ENDIF 1469 ELSE 1470C Case of matrix in elemental format 1471 IF (.NOT.associated(id%A_ELT).OR. 1472 & .NOT.associated(id%ELTPTR).OR. 1473 & .NOT.associated(id%ELTVAR)) THEN 1474 IF (PROKG) WRITE(MPG,'(A)') 1475 & ' WARNING: original elemental matrix is not allocated ' 1476 MAT_ALLOC_LOC = 0 1477 ENDIF 1478 ENDIF 1479 ENDIF !end master, centralized matrix 1480 ELSE 1481C The original matrix is assembled distributed 1482 IF ( I_AM_SLAVE .AND. (id%KEEP8(29) .GT. 0_8) ) THEN 1483C If MAT_ALLOC_LOC = 1 the local distributed matrix is 1484C allocated, otherwise MAT_ALLOC_LOC = 0 1485 IF ((.NOT.associated(id%A_loc)) .OR. 1486 & (.NOT.associated(id%IRN_loc)) .OR. 1487 & (.NOT.associated(id%JCN_loc))) THEN 1488 IF (PROKG) WRITE(MPG,'(A)') 1489 & ' WARNING: original distributed assembled', 1490 & ' matrix is not allocated ' 1491 MAT_ALLOC_LOC = 0 1492 ENDIF 1493 ENDIF 1494 ENDIF ! end test allocation matrix (keep(54)) 1495 ENDIF ! POSTPros 1496 CALL MPI_REDUCE( MAT_ALLOC_LOC, MAT_ALLOC, 1, 1497 & MPI_INTEGER, 1498 & MPI_MIN, MASTER, id%COMM, IERR) 1499 IF ( id%MYID .eq. MASTER ) THEN 1500 IF (MAT_ALLOC.EQ.0) THEN 1501 POSTPros = .FALSE. 1502 ICNTL11 = 0 1503 ICNTL10 = 0 1504C Write a warning. 1505 IF ((ICNTL(10) .NE. 0) .AND. (ICNTL10 .EQ. 0)) THEN 1506 IF (PROKG) WRITE(MPG,'(A)') 1507 & ' WARNING: ICNTL(10) treated as if set to 0 ' 1508 ENDIF 1509 IF ((ICNTL(11) .EQ. 1).OR.(ICNTL(11) .EQ. 2) 1510 & .AND.(ICNTL11 .EQ. 0)) THEN 1511 IF (PROKG) WRITE(MPG,'(A)') 1512 & ' WARNING: ICNTL(11) treated as if set to 0 ' 1513 ENDIF 1514 ENDIF 1515 IF (POSTPros) THEN 1516 ALLOCATE(SAVERHS(id%N*NBRHS),stat = allocok) 1517 IF ( allocok .GT. 0 ) THEN 1518 IF (LPOK) 1519 & WRITE(LP,*) id%MYID, 1520 & ':Problem in solve: error allocating SAVERHS' 1521 INFO(1) = -13 1522 INFO(2) = id%N*NBRHS 1523 GOTO 111 1524 END IF 1525 NB_BYTES = NB_BYTES + int(size(SAVERHS),8)*K35_8 1526 NB_BYTES_MAX = max(NB_BYTES_MAX,NB_BYTES) 1527 ENDIF 1528C 1529C Forbid entries in a-1, in case of null space computations 1530c 1531 IF (KEEP(237).NE.0 .AND.KEEP(111).NE.0) THEN 1532C Ignore ENTRIES IN A-1 in case we compute 1533C vectors of the null space (KEEP(111)).NE.0.) 1534C We should still allocate IRHS_SPARSE 1535 IF (PROKG) WRITE(MPG,'(A)') 1536 & ' WARNING: KEEP(237) treated as if set to 0 (null space)' 1537 KEEP(237)=0 1538 ENDIF 1539C -- end of test master 1540 END IF 1541C -------------------------------------------------- 1542C Broadcast information to have all processes do the 1543C same thing (error analysis/iterative refinements/ 1544C scaling/distribution of solution) 1545C -------------------------------------------------- 1546 CALL MPI_BCAST(ICNTL10,1,MPI_INTEGER,MASTER, 1547 & id%COMM,IERR) 1548 CALL MPI_BCAST(ICNTL11,1,MPI_INTEGER,MASTER, 1549 & id%COMM,IERR) 1550 CALL MPI_BCAST(ICNTL21,1,MPI_INTEGER,MASTER, 1551 & id%COMM,IERR) 1552 CALL MPI_BCAST(POSTPros,1,MPI_LOGICAL,MASTER, 1553 & id%COMM,IERR) 1554 CALL MPI_BCAST(LSCAL,1,MPI_LOGICAL,MASTER, 1555 & id%COMM,IERR) 1556 CALL MPI_BCAST(KEEP(111),1,MPI_INTEGER,MASTER, 1557 & id%COMM,IERR) 1558C KEEP(248)==1 if not_NullSpace (KEEP(111)=0) 1559C and sparse RHS on input (id%ICNTL(20)/KEEP(248)==1) 1560C (KEEP(248)==1 implies KEEP(111) = 0, otherwise error was raised) 1561C We cant thus isolate the case of 1562C sparse RHS associated to Null space computation because 1563C in this case preparation is different since 1564C -we skip the forward step and 1565C -the pattern of the RHS 1566C of the bwd is related to null pivot indices found and not 1567C to information contained in the sparse rhs input format. 1568 DO_PERMUTE_RHS = (KEEP(242).NE.0) 1569C apply interleaving in parallel (FOR A-1 or Null space only) 1570 IF ( (id%NSLAVES.GT.1) .AND. (KEEP(243).NE.0) 1571 & ) THEN 1572C -- Option to interleave RHS only makes sense when 1573C -- A-1 option is on or Null space compution are on 1574C (note also that KEEP(243).NE.0 only when PERMUTE_RHS is on) 1575 IF ((KEEP(237).NE.0).or.(KEEP(111).GT.0)) THEN 1576 INTERLEAVE_PAR= .TRUE. 1577 ELSE 1578 IF (PROKG) THEN 1579 write(MPG,*) ' Warning incompatible options ', 1580 & ' interleave RHS reset to false ' 1581 ENDIF 1582 ENDIF 1583 ENDIF 1584C -------------------------------------- 1585C Compute an upperbound of message size 1586C for forward and backward solutions: 1587C -------------------------------------- 1588 MSG_MAX_BYTES_SOLVE = ( 4 + KEEP(133) ) * KEEP(34) + 1589 & KEEP(133) * NBRHS * KEEP(35) 1590 & + 16 * KEEP(34) ! for request id, pointer to next + safety 1591C -------------------------------------- 1592C Compute an upperbound of message size 1593C for ZMUMPS_GATHER_SOLUTION 1594C -------------------------------------- 1595C 1596 IF (KEEP(237).EQ.0) THEN 1597C Note that for ZMUMPS_GATHER_SOLUTION LBUFR buffer should 1598C be larger that MAX_inode(NPIV))*NBRHS + NPIV 1599C which is covered by next formula since KMAX_246_247 is larger 1600C than MAX_inode(NPIV)) 1601C 2 integers packed (npiv and termination) 1602 KMAX_246_247 = max(KEEP(246),KEEP(247)) 1603 MSG_MAX_BYTES_GTHRSOL = ( 2 + KMAX_246_247 ) * KEEP(34) + 1604 & KMAX_246_247 * NBRHS * KEEP(35) 1605 ELSE IF (ICNTL21.EQ.0) THEN 1606C Each message from a slave is of size max 4: 1607C 2 integers : I,J 1608C 1 complex : (Aij)-1 1609C 1 terminaison 1610 MSG_MAX_BYTES_GTHRSOL = ( 3 * KEEP(34) + KEEP(35) ) 1611 ELSE 1612C Not needed in case of distributed solution 1613 MSG_MAX_BYTES_GTHRSOL = 0 1614 ENDIF 1615C The buffer is used both for solve and for ZMUMPS_GATHER_SOLUTION 1616 id%LBUFR_BYTES = max(MSG_MAX_BYTES_SOLVE, MSG_MAX_BYTES_GTHRSOL) 1617 TSIZE = int(min(100_8*int(MSG_MAX_BYTES_GTHRSOL,8), 1618 & 10000000_8)) 1619 id%LBUFR_BYTES = max(id%LBUFR_BYTES,TSIZE) 1620 id%LBUFR = ( id%LBUFR_BYTES + KEEP(34) - 1 ) / KEEP(34) 1621 IF ( associated (id%BUFR) ) THEN 1622 NB_BYTES = NB_BYTES - int(size(id%BUFR),8)*K34_8 1623 DEALLOCATE(id%BUFR) 1624 NULLIFY(id%BUFR) 1625 ENDIF 1626 ALLOCATE (id%BUFR(id%LBUFR),stat=allocok) 1627 IF ( allocok .GT. 0 ) THEN 1628 IF (LPOK) 1629 & WRITE(LP,*) id%MYID, 1630 & ' Problem in solve: error allocating BUFR' 1631 INFO(1) = -13 1632 INFO(2) = id%LBUFR 1633 GOTO 111 1634 ENDIF 1635 NB_BYTES = NB_BYTES + int(size(id%BUFR),8)*K34_8 1636 NB_BYTES_MAX = max(NB_BYTES_MAX,NB_BYTES) 1637 IF ( I_AM_SLAVE .AND. id%NSLAVES .GT. 1 ) THEN 1638C ------------------------------------------------------ 1639C Dimension send buffer for small integers, e.g. TRACINE 1640C ------------------------------------------------------ 1641 ZMUMPS_LBUF_INT = ( 20 + id%NSLAVES * id%NSLAVES * 4 ) 1642 & * KEEP(34) 1643 CALL ZMUMPS_BUF_ALLOC_SMALL_BUF( ZMUMPS_LBUF_INT, IERR ) 1644 IF ( IERR .NE. 0 ) THEN 1645 INFO(1) = -13 1646 INFO(2) = ZMUMPS_LBUF_INT 1647 IF ( LPOK) THEN 1648 WRITE(LP,*) id%MYID, 1649 & ':Error allocating small Send buffer:IERR=',IERR 1650 END IF 1651 GOTO 111 1652 END IF 1653C 1654C --------------------------------------- 1655C Dimension cyclic send buffer for normal 1656C messages, based on largest message 1657C size during forward and backward solves 1658C --------------------------------------- 1659C Compute buffer size in BYTES (ZMUMPS_LBUF) 1660C using integer8 in ZMUMPS_LBUF_8 1661C then convert it in integer4 and bound it to largest integer value 1662C 1663 ZMUMPS_LBUF_8 = 1664 & (int(MSG_MAX_BYTES_SOLVE,8)+2_8*int(KEEP(34),8))* 1665 & int(id%NSLAVES,8) 1666C Avoid buffers larger than 100 Mbytes ... 1667 ZMUMPS_LBUF_8 = min(ZMUMPS_LBUF_8, 100000000_8) 1668C ... as long as we can send messages to at least 3 1669C destinations simultaneously 1670 ZMUMPS_LBUF_8 = max(ZMUMPS_LBUF_8, 1671 & int((MSG_MAX_BYTES_SOLVE+2*KEEP(34)),8) * 1672 & int(min(id%NSLAVES,3),8) ) 1673 ZMUMPS_LBUF_8 = ZMUMPS_LBUF_8 + 2_8*int(KEEP(34),8) 1674C Convert to integer and bound it to largest integer 1675C and suppress 10 integers (one should be enough!) 1676C to enable computation of integer size. 1677 ZMUMPS_LBUF_8 = min(ZMUMPS_LBUF_8, 1678 & int(huge(ZMUMPS_LBUF),8) 1679 & - 10_8*int(KEEP(34),8) 1680 & ) 1681 ZMUMPS_LBUF = int(ZMUMPS_LBUF_8, kind(ZMUMPS_LBUF)) 1682 CALL ZMUMPS_BUF_ALLOC_CB( ZMUMPS_LBUF, IERR ) 1683 IF ( IERR .NE. 0 ) THEN 1684 INFO(1) = -13 1685 INFO(2) = ZMUMPS_LBUF/KEEP(34) + 1 1686 IF ( LPOK) THEN 1687 WRITE(LP,*) id%MYID, 1688 & ':Error allocating Send buffer:IERR=', IERR 1689 END IF 1690 GOTO 111 1691 END IF 1692C 1693C 1694C -- end of I am slave 1695 ENDIF 1696C 1697 IF ( POSTPros ) THEN 1698C When Iterative refinement of error analysis requested 1699C Allocate RHS_IR on slave processors 1700C (note that on MASTER RHS_IR points to RHS) 1701 IF ( id%MYID .NE. MASTER ) THEN 1702C 1703 ALLOCATE(RHS_IR(id%N),stat=IERR) 1704 NB_BYTES = NB_BYTES + int(size(RHS_IR),8)*K35_8 1705 NB_BYTES_MAX = max(NB_BYTES_MAX,NB_BYTES) 1706 IF ( IERR .GT. 0 ) THEN 1707 INFO(1)=-13 1708 INFO(2)=id%N 1709 IF (LPOK) 1710 & WRITE(LP,*) 'ERROR while allocating RHS on a slave' 1711 GOTO 111 1712 END IF 1713 ELSE 1714 RHS_IR=>id%RHS 1715 ENDIF 1716 ENDIF 1717C 1718 CALL MPI_BCAST(KEEP(497),1,MPI_INTEGER,MASTER, 1719 & id%COMM,IERR) 1720C Parallel A-1 or General sparse and 1721C exploit sparsity between columns 1722 DO_NBSPARSE = ( ( (KEEP(237).NE.0).OR.(KEEP(235).NE.0) ) 1723 & .AND. 1724 & ( KEEP(497).NE.0 ) 1725 & ) 1726 IF ( I_AM_SLAVE ) THEN 1727 IF(DO_NBSPARSE) THEN 1728c --- ALLOCATE outside loop RHS_BOUNDS is needed 1729 LPTR_RHS_BOUNDS = 2*KEEP(28) 1730 ALLOCATE(RHS_BOUNDS(LPTR_RHS_BOUNDS), STAT=IERR) 1731 IF (IERR.GT.0) THEN 1732 INFO(1)=-13 1733 INFO(2)=LPTR_RHS_BOUNDS 1734 IF (LPOK) 1735 & WRITE(LP,*) 'ERROR while allocating RHS_BOUNDS on a slave' 1736 GOTO 111 1737 END IF 1738 NB_BYTES = NB_BYTES + 1739 & int(size(RHS_BOUNDS),8)*K34_8 1740 NB_BYTES_MAX = max(NB_BYTES_MAX,NB_BYTES) 1741 PTR_RHS_BOUNDS => RHS_BOUNDS 1742 ELSE 1743 LPTR_RHS_BOUNDS = 1 1744 PTR_RHS_BOUNDS => IDUMMY_TARGET 1745 ENDIF 1746 ENDIF 1747C -------------------------------------------------- 1748 IF ( I_AM_SLAVE ) THEN 1749 IF ((KEEP(221).EQ.2 .AND. KEEP(252).EQ.0)) THEN 1750C -- RHSCOMP must have been allocated in 1751C -- previous solve step (with option KEEP(221)=1) 1752 IF (.NOT.associated(id%RHSCOMP)) THEN 1753 INFO(1) = -35 1754 INFO(2) = 1 1755 GOTO 111 1756 ENDIF 1757C IF ((KEEP(248).EQ.0) .OR. (id%NRHS.EQ.1)) THEN 1758C POSINRHSCOMP_ROW/COL are meaningful and could even be reused 1759 IF (.NOT.associated(id%POSINRHSCOMP_ROW) ) ! .OR. 1760! & .NOT.(id%POSINRHSCOMP_COL_ALLOC)) 1761 & THEN 1762 INFO(1) = -35 1763 INFO(2) = 2 1764 GOTO 111 1765 ENDIF 1766 IF (.not.id%POSINRHSCOMP_COL_ALLOC) THEN 1767C POSINRHSCOMP_COL that is kept from 1768C previous call to solve must then (already) 1769C point to id%POSINRHSCOMP_ROW 1770 id%POSINRHSCOMP_COL => id%POSINRHSCOMP_ROW 1771 ENDIF 1772 ELSE 1773C ---------------------- 1774C Allocate POSINRHSCOMP_ROW/COL 1775C ---------------------- 1776C The size of POSINRHSCOMP arrays 1777C does not depend on the block of RHS 1778C POSINRHSCOMP_ROW/COL are initialized in the loop of RHS 1779 IF (associated(id%POSINRHSCOMP_ROW)) THEN 1780 NB_BYTES = NB_BYTES - 1781 & int(size(id%POSINRHSCOMP_ROW),8)*K34_8 1782 DEALLOCATE(id%POSINRHSCOMP_ROW) 1783 ENDIF 1784 ALLOCATE (id%POSINRHSCOMP_ROW(id%N), stat = allocok) 1785 IF ( allocok .GT. 0 ) THEN 1786 INFO(1)=-13 1787 INFO(2)=id%N 1788 GOTO 111 1789 END IF 1790 NB_BYTES = NB_BYTES + 1791 & int(size(id%POSINRHSCOMP_ROW),8)*K34_8 1792 NB_BYTES_MAX = max(NB_BYTES_MAX,NB_BYTES) 1793 IF (id%POSINRHSCOMP_COL_ALLOC) THEN 1794 NB_BYTES = NB_BYTES - 1795 & int(size(id%POSINRHSCOMP_COL),8)*K34_8 1796 DEALLOCATE(id%POSINRHSCOMP_COL) 1797 NULLIFY(id%POSINRHSCOMP_COL) 1798 id%POSINRHSCOMP_COL_ALLOC = .FALSE. 1799 ENDIF 1800C 1801 IF ((KEEP(50).EQ.0).OR.KEEP(237).NE.0) THEN 1802 ALLOCATE (id%POSINRHSCOMP_COL(id%N), stat = allocok) 1803 IF ( allocok .GT. 0 ) THEN 1804 INFO(1)=-13 1805 INFO(2)=id%N 1806 GOTO 111 1807 END IF 1808 id%POSINRHSCOMP_COL_ALLOC = .TRUE. 1809 NB_BYTES = NB_BYTES + 1810 & int(size(id%POSINRHSCOMP_COL),8)*K34_8 1811 NB_BYTES_MAX = max(NB_BYTES_MAX,NB_BYTES) 1812 ELSE 1813C Do no allocate POSINRHSCOMP_COL 1814 id%POSINRHSCOMP_COL => id%POSINRHSCOMP_ROW 1815 id%POSINRHSCOMP_COL_ALLOC = .FALSE. 1816 ENDIF 1817 IF (KEEP(221).NE.2) THEN 1818C -- only in the case of bwd after reduced RHS 1819C -- we have to keep "old" RHSCOMP 1820 IF (associated(id%RHSCOMP)) THEN 1821 NB_BYTES = NB_BYTES - id%KEEP8(25)*K35_8 1822 DEALLOCATE(id%RHSCOMP) 1823 NULLIFY(id%RHSCOMP) 1824 id%KEEP8(25)=0_8 1825 ENDIF 1826 ENDIF 1827 ENDIF 1828C --------------------------- 1829C Allocate local workspace 1830C for the solve (ZMUMPS_SOL_C) 1831C --------------------------- 1832 LIWK_SOLVE = 3 * KEEP(28) + 1 1833 LIWK_PTRACB= KEEP(28) 1834C KEEP(228)+1 temporary integer positions 1835C will be needed in ZMUMPS_SOL_S 1836 IF (KEEP(201).EQ.1) THEN 1837 LIWK_SOLVE = LIWK_SOLVE + KEEP(228) + 1 1838 ELSE 1839C Reserve 1 position to pass array of size 1 in routines 1840 LIWK_SOLVE = LIWK_SOLVE + 1 1841 ENDIF 1842 ALLOCATE ( IWK_SOLVE(LIWK_SOLVE), 1843 & PTRACB(LIWK_PTRACB), stat = allocok ) 1844 IF (allocok .GT. 0 ) THEN 1845 INFO(1)=-13 1846 INFO(2)=LIWK_SOLVE + LIWK_PTRACB*KEEP(10) 1847 GOTO 111 1848 END IF 1849 NB_BYTES = NB_BYTES + int(LIWK_SOLVE,8)*K34_8 + 1850 & int(LIWK_PTRACB,8)*K34_8 *int(KEEP(10),8) 1851 NB_BYTES_MAX = max(NB_BYTES_MAX,NB_BYTES) 1852C array IWCB used temporarily to hold 1853C indices of a front unpacked from a message 1854C and to stack (potentially in a recursive call) 1855C headers of size 2 positions of CB blocks. 1856 LIWCB = 20*NB_K133*2 + KEEP(133) 1857 ALLOCATE ( IWCB( LIWCB), stat = allocok ) 1858 IF (allocok .GT. 0 ) THEN 1859 INFO(1)=-13 1860 INFO(2)=LIWCB 1861 GOTO 111 1862 END IF 1863 NB_BYTES = NB_BYTES + int(LIWCB,8)*K34_8 1864 NB_BYTES_MAX = max(NB_BYTES_MAX,NB_BYTES) 1865C 1866C -- Code for a slave 1867C ----------- 1868C Subdivision 1869C of array IS 1870C ----------- 1871 LIW = KEEP(32) 1872C Define a work array of size maximum global frontal 1873C size (KEEP(133)) for the call to ZMUMPS_SOL_C 1874C This used to be of size id%N. 1875 ALLOCATE(SRW3(KEEP(133)), stat = allocok ) 1876 IF ( allocok .GT. 0 ) THEN 1877 INFO(1)=-13 1878 INFO(2)=KEEP(133) 1879 GOTO 111 1880 END IF 1881 NB_BYTES = NB_BYTES + int(size(SRW3),8)*K35_8 1882 NB_BYTES_MAX = max(NB_BYTES_MAX,NB_BYTES) 1883C ----------------- 1884C End of slave code 1885C ----------------- 1886 ELSE 1887C I am the master with host not working 1888C 1889C LIW is used on master when calling 1890C the routine ZMUMPS_GATHER_SOLUTION. 1891 LIW=0 1892 END IF 1893C 1894C Precompute inverse of UNS_PERM outside loop 1895 IF (allocated(UNS_PERM_INV)) DEALLOCATE(UNS_PERM_INV) 1896 IF ( ( id%MYID .eq. MASTER.AND.(KEEP(23).GT.0) .AND. 1897 & (MTYPE .NE. 1).AND.(KEEP(248).NE.0) 1898 & ) 1899C Permute UNS_PERM on master only with 1900C sparse RHS (KEEP(248).NE.0 ) when AT x = b is solved 1901 & .OR. ( KEEP(237).NE.0 .AND. KEEP(23).NE.0 ) 1902C When A-1 is active and when the matrix is unsymmetric 1903C and a column permutation has been applied (Max transversal) 1904C then we have performed a 1905C factorization of a column permuted matrix AQ = LU. 1906C In this case, 1907C the permuted entry must be used to select the target 1908C entries for the BWD (note that a diagonal entry of A-1 1909C is not anymore a diagonal of AQ. Thus a diagonal 1910C of A-1 does not correspond to the same path 1911C in the tree during FWD and BWD steps when MAXTRANS is on 1912C and permutation is not identity.) 1913C Note that the inverse permutation 1914C UNS_PERM_INV needs to be allocated on each proc 1915C since it is used in ZMUMPS_SOL_C routine for pruning. 1916C It is allocated only once and its allocation has been 1917C migrated outside the blocking on the right hand sides. 1918 & ) THEN 1919 ALLOCATE(UNS_PERM_INV(id%N),stat=allocok) 1920 if (allocok .GT.0 ) THEN 1921 INFO(1)=-13 1922 INFO(2)=id%N 1923 GOTO 111 1924 endif 1925 NB_BYTES = NB_BYTES + int(id%N,8)*K34_8 1926 NB_BYTES_MAX = max(NB_BYTES_MAX,NB_BYTES) 1927 IF (id%MYID.EQ.MASTER) THEN 1928C Build inverse permutation 1929 DO I = 1, id%N 1930 UNS_PERM_INV(id%UNS_PERM(I))=I 1931 ENDDO 1932 ENDIF 1933C 1934 ELSE 1935 ALLOCATE(UNS_PERM_INV(1), stat=allocok) 1936 if (allocok .GT.0 ) THEN 1937 INFO(1)=-13 1938 INFO(2)=1 1939 GOTO 111 1940 endif 1941 NB_BYTES = NB_BYTES + 1_8*K34_8 1942 NB_BYTES_MAX = max(NB_BYTES_MAX,NB_BYTES) 1943 ENDIF 1944C 1945 111 CONTINUE 1946#if defined(V_T) 1947 CALL VTEND(glob_comm_ini,IERR) 1948#endif 1949C 1950C Synchro point + Broadcast of errors 1951C 1952C Propagate errors 1953 CALL MUMPS_PROPINFO( ICNTL(1), INFO(1), 1954 & id%COMM,id%MYID) 1955 IF (INFO(1) .LT.0 ) GOTO 90 1956 IF ( ( KEEP(237).NE.0 ) .AND. (KEEP(23).NE.0) ) THEN 1957C Broadcast UNS_PERM_INV 1958 CALL MPI_BCAST(UNS_PERM_INV,id%N,MPI_INTEGER,MASTER, 1959 & id%COMM,IERR) 1960 ENDIF 1961C ------------------------------------- 1962C BEGIN 1963C Preparation for distributed solution 1964C ------------------------------------- 1965 IF ( ICNTL21==1 ) THEN 1966 IF (LSCAL) THEN 1967C In case of scaling we will need to scale 1968C back the RHS. Put the values of the scaling 1969C arrays needed to do that on each processor. 1970 IF (id%MYID.NE.MASTER) THEN 1971 IF (MTYPE == 1) THEN 1972 ALLOCATE(id%COLSCA(id%N),stat=allocok) 1973 ELSE 1974 ALLOCATE(id%ROWSCA(id%N),stat=allocok) 1975 ENDIF 1976 IF (allocok > 0) THEN 1977 IF (LPOK) THEN 1978 WRITE(LP,*) 'Error allocating temporary scaling array' 1979 ENDIF 1980 INFO(1)=-13 1981 INFO(2)=id%N 1982 GOTO 40 1983 ENDIF 1984 NB_BYTES = NB_BYTES + int(id%N,8)*K16_8 1985 NB_BYTES_MAX = max(NB_BYTES_MAX,NB_BYTES) 1986 ENDIF 1987 IF (MTYPE == 1) THEN 1988 CALL MPI_BCAST(id%COLSCA(1),id%N, 1989 & MPI_DOUBLE_PRECISION,MASTER, 1990 & id%COMM,IERR) 1991 scaling_data%SCALING=>id%COLSCA 1992 ELSE 1993 CALL MPI_BCAST(id%ROWSCA(1),id%N, 1994 & MPI_DOUBLE_PRECISION,MASTER, 1995 & id%COMM,IERR) 1996 scaling_data%SCALING=>id%ROWSCA 1997 ENDIF 1998 IF (I_AM_SLAVE) THEN 1999 ALLOCATE(scaling_data%SCALING_LOC(id%KEEP(89)), 2000 & stat=allocok) 2001 IF (allocok > 0) THEN 2002 IF (LPOK) THEN 2003 WRITE(LP,*) 'Error allocating local scaling array' 2004 ENDIF 2005 INFO(1)=-13 2006 INFO(2)=id%KEEP(89) 2007 GOTO 40 2008 ENDIF 2009 NB_BYTES = NB_BYTES + int(id%KEEP(89),8)*K16_8 2010 NB_BYTES_MAX = max(NB_BYTES_MAX,NB_BYTES) 2011 ENDIF 2012 ENDIF 2013 IF ( I_AM_SLAVE ) THEN 2014C ---------------------- 2015C Prepare ISOL_loc array 2016C ---------------------- 2017 LIW_PASSED=max(1,LIW) 2018C Only called if more than 1 pivot 2019C was eliminated by the processor. 2020C Note that LSOL_loc >= KEEP(89) 2021 IF (KEEP(89) .GT. 0) THEN 2022 CALL ZMUMPS_DISTSOL_INDICES( MTYPE, id%ISOL_loc(1), 2023 & id%PTLUST_S(1), 2024 & id%KEEP(1),id%KEEP8(1), 2025 & id%IS(1), LIW_PASSED,id%MYID_NODES, 2026 & id%N, id%STEP(1), id%PROCNODE_STEPS(1), 2027 & id%NSLAVES, scaling_data, LSCAL ) 2028 ENDIF 2029 IF (id%MYID.NE.MASTER .AND. LSCAL) THEN 2030C --------------------------------- 2031C Local (small) scaling arrays have 2032C been built, free temporary copies 2033C --------------------------------- 2034 IF (MTYPE == 1) THEN 2035 DEALLOCATE(id%COLSCA) 2036 NULLIFY(id%COLSCA) 2037 ELSE 2038 DEALLOCATE(id%ROWSCA) 2039 NULLIFY(id%ROWSCA) 2040 ENDIF 2041 NB_BYTES = NB_BYTES - int(id%N,8)*K16_8 2042 ENDIF 2043 ENDIF 2044 IF (KEEP(23) .NE. 0 .AND. MTYPE==1) THEN 2045C Broadcast the unsymmetric permutation and 2046C permute the indices in ISOL_loc 2047 IF (id%MYID.NE.MASTER) THEN 2048 ALLOCATE(id%UNS_PERM(id%N),stat=allocok) 2049 IF (allocok > 0) THEN 2050 INFO(1)=-13 2051 INFO(2)=id%N 2052 GOTO 40 2053 ENDIF 2054 ENDIF 2055 ENDIF 2056C 2057C ===================== ERROR handling and propagation ================ 2058 40 CONTINUE 2059 CALL MUMPS_PROPINFO( ICNTL(1), INFO(1), 2060 & id%COMM,id%MYID) 2061 IF (INFO(1) .LT.0 ) GOTO 90 2062C ====================================================================== 2063C 2064 IF (KEEP(23) .NE. 0 .AND. MTYPE==1) THEN 2065 CALL MPI_BCAST(id%UNS_PERM(1),id%N,MPI_INTEGER,MASTER, 2066 & id%COMM,IERR) 2067 IF (I_AM_SLAVE) THEN 2068 DO I=1, KEEP(89) 2069 id%ISOL_loc(I) = id%UNS_PERM(id%ISOL_loc(I)) 2070 ENDDO 2071 ENDIF 2072 IF (id%MYID.NE.MASTER) THEN 2073 DEALLOCATE(id%UNS_PERM) 2074 NULLIFY(id%UNS_PERM) 2075 ENDIF 2076 ENDIF 2077 ENDIF 2078C -------------------------------------- 2079C Preparation for distributed solution 2080C END 2081C -------------------------------------- 2082C ---------------------------- 2083C Preparation for reduced RHS 2084C ---------------------------- 2085 IF ( ( KEEP(221) .EQ. 1 ) .OR. 2086 & ( KEEP(221) .EQ. 2 ) 2087 & ) THEN 2088C -- First compute MASTER_ROOT_IN_COMM proc number in 2089C COMM_NODES on which is mapped the master of the root. 2090 IF (KEEP(46).EQ.1) THEN 2091 MASTER_ROOT_IN_COMM=MASTER_ROOT 2092 ELSE 2093 MASTER_ROOT_IN_COMM =MASTER_ROOT+1 2094 ENDIF 2095 IF ( id%MYID .EQ. MASTER ) THEN 2096C -------------------------------- 2097C Avoid using LREDRHS when id%NRHS is 2098C equal to 1, as was done for RHS 2099C -------------------------------- 2100 IF (id%NRHS.EQ.1) THEN 2101 LD_REDRHS = id%KEEP(116) 2102 ELSE 2103 LD_REDRHS = id%LREDRHS 2104 ENDIF 2105 ENDIF 2106 IF (MASTER.NE.MASTER_ROOT_IN_COMM) THEN 2107C -- Make available LD_REDRHS on MASTER_ROOT_IN_COMM 2108C This will then be used to test if a single 2109C message can be sent 2110C (this is possible if LD_REDRHS=SIZE_SCHUR) 2111 IF ( id%MYID .EQ. MASTER ) THEN 2112C -- send LD_REDRHS to MASTER_ROOT_IN_COMM 2113C using COMM communicator 2114 CALL MPI_SEND(LD_REDRHS,1,MPI_INTEGER, 2115 & MASTER_ROOT_IN_COMM, 0, id%COMM,IERR) 2116 ELSEIF ( id%MYID.EQ.MASTER_ROOT_IN_COMM) THEN 2117C -- recv LD_REDRHS 2118 CALL MPI_RECV(LD_REDRHS,1,MPI_INTEGER, 2119 & MASTER, 0, id%COMM,STATUS,IERR) 2120 ENDIF 2121C -- other procs not concerned 2122 ENDIF 2123 ENDIF 2124C 2125 IF ( KEEP(248)==1 ) THEN ! Sparse RHS (A-1 or general sparse) 2126! JBEG_RHS - current starting column within A-1 or sparse rhs 2127! set in the loop below and used to obtain the 2128! global index of the column of the sparse RHS 2129! Also used to get index in global permutation. 2130! It also allows to skip empty columns; 2131 JEND_RHS = 0 ! last column in current blockin A-1 2132C 2133C Compute and apply permutations 2134 IF (DO_PERMUTE_RHS) THEN 2135C Allocate PERM_RHS 2136 ALLOCATE(PERM_RHS(id%NRHS),stat=allocok) 2137 IF (allocok > 0) THEN 2138 INFO(1) = -13 2139 INFO(2) = id%NRHS 2140 GOTO 109 2141 ENDIF 2142 NB_BYTES = NB_BYTES + int(id%NRHS,8)*K34_8 2143 NB_BYTES_MAX = max(NB_BYTES_MAX,NB_BYTES) 2144 IF (id%MYID.EQ.MASTER) THEN 2145C PERM_RHS is computed on MASTER, it might be modified 2146C in case of interleaving and will thus be distributed 2147C (BCAST) to all slaves only later. 2148C Compute PERM_RHS 2149C on output: PERM_RHS(k) = i means that i is the kth column 2150C to be processed 2151 IF (KEEP(237).EQ.0) THEN 2152C Permute RHS : case of GS (General Sparse) RHS 2153 CALL MUMPS_PERMUTE_RHS_GS( 2154 & LP, LPOK, PROKG, MPG, KEEP(242), 2155 & id%SYM_PERM(1), id%N, id%NRHS, 2156 & id%IRHS_PTR(1), id%NRHS+1, 2157 & id%IRHS_SPARSE(1), id%NZ_RHS, 2158 & PERM_RHS, IERR) 2159 IF (IERR.LT.0) THEN 2160 INFO(1) = -9999 2161 INFO(2) = IERR 2162 GOTO 109 ! propagate error 2163 ENDIF 2164 ELSE 2165C Case of A-1 : 2166C We compute the permutation of the RHS (sparse matrix) 2167C (to compute all inverse entries) 2168C We apply permutation to IRHS_SPARSE ONLY. 2169C Note NRHS_NONEMPTY holds the nb of non empty columns 2170C in A-1. 2171 STRAT_PERMAM1 = KEEP(242) 2172 CALL MUMPS_PERMUTE_RHS_AM1 2173 & (STRAT_PERMAM1, id%SYM_PERM(1), 2174 & id%IRHS_PTR(1), id%NRHS+1, 2175 & PERM_RHS, id%NRHS, 2176 & IERR 2177 & ) 2178 ENDIF 2179 ENDIF 2180 ENDIF 2181 ENDIF 2182C 2183C Note that within ZMUMPS_SOL_C, PERM_RHS could be used 2184C for A-1 case (with DO_PERMUTE_RHS OR INTERLEAVE_RHS 2185C being tested) to get the column index for the 2186C original matrix of RHS (column index in A-1) 2187C of the permuted columns that have been selected. 2188C PERM_RHS is also used in ZMUMPS_GATHER_SOLUTION 2189C in case of sparse RHS awith DO_PERMUTE_RHS. 2190C 2191C Allocate PERM_RHS of size 1 if not allocated 2192 IF (.NOT. allocated(PERM_RHS)) THEN 2193 ALLOCATE(PERM_RHS(1),stat=allocok) 2194 IF (allocok > 0) THEN 2195 INFO(1) = -13 2196 INFO(2) = 1 2197 GOTO 109 2198 ENDIF 2199 NB_BYTES = NB_BYTES + int(size(PERM_RHS),8)*K34_8 2200 NB_BYTES_MAX = max(NB_BYTES_MAX,NB_BYTES) 2201 ENDIF 2202C Propagate errors 2203109 CALL MUMPS_PROPINFO( ICNTL(1), INFO(1), 2204 & id%COMM,id%MYID) 2205 IF (INFO(1) .LT.0 ) GOTO 90 2206c -------------------------- 2207c -------------------------- 2208 IF (id%NSLAVES .EQ. 1) THEN 2209c - In case of NS/A-1 we may want to permute RHS 2210c - for NS thus is to apply permutation to PIVNUL_LIST 2211* - before starting loop of NBRHS 2212 IF (DO_PERMUTE_RHS .AND. KEEP(111).NE.0 ) THEN 2213C NOTE: 2214C when host not working both master and slaves have 2215C in this case the complete list 2216 WRITE(*,*) id%MYID, ':INTERNAL ERROR 1 : ', 2217 & ' PERMUTE RHS during null space computation ', 2218 & ' not available yet ' 2219 CALL MUMPS_ABORT() 2220 ENDIF ! End Permute_RHS 2221 ELSE 2222C Phase 1 : ZMUMPS_PERMUTE_RHS_NS 2223C local permutations to minimize sequential disk access 2224C with chunck of size KEEP(84)/NSLAVES 2225C Phase 2 : ZMUMPS_SOL_APPLY_PARPERM 2226C parallel redistribution to exploit // disk access feature 2227 IF (DO_PERMUTE_RHS .AND. KEEP(111).NE.0 ) THEN 2228C Phase 1 to be called on each proc 2229 WRITE(*,*) id%MYID, ':INTERNAL ERROR 2 : ', 2230 & ' PERMUTE RHS during null space computation ', 2231 & ' not available yet ' 2232 CALL MUMPS_ABORT() 2233C 2234C 2235 ENDIF ! End DO_PERMUTE_RHS 2236 IF (INTERLEAVE_PAR) THEN 2237 IF ( KEEP(111).NE.0 ) THEN 2238 WRITE(*,*) id%MYID, ':INTERNAL ERROR 3 : ', 2239 & ' INTERLEAVE RHS during null space computation ', 2240 & ' not available yet ' 2241 CALL MUMPS_ABORT() 2242 ELSE 2243C - A-1 + Interleave: 2244C permute RHS on master 2245 IF (id%MYID.EQ.MASTER) THEN 2246C -- PERM_RHS must have been already set or initialized 2247C -- it is then modified in next routine 2248 SIZE_WORKING = id%IPTR_WORKING(id%NPROCS+1)-1 2249 SIZE_IPTR_WORKING = id%NPROCS+1 2250 CALL MUMPS_INTERLEAVE_RHS_AM1( 2251 & PERM_RHS, id%NRHS, 2252 & id%IPTR_WORKING(1), SIZE_IPTR_WORKING, 2253 & id%WORKING(1), SIZE_WORKING, 2254 & id%IRHS_PTR(1), 2255 & id%STEP(1), id%SYM_PERM(1), id%N, NBRHS, 2256 & id%PROCNODE_STEPS(1), KEEP(28), id%NSLAVES, 2257 & KEEP(493).NE.0, 2258 & KEEP(495).NE.0, KEEP(496), PROKG, MPG 2259 & ) 2260 ENDIF ! End Master 2261 ENDIF ! End A-1 / NS 2262 ENDIF ! End INTERLEAVE_PAR 2263C ------------- 2264 ENDIF ! End Parallel Case 2265c -------------------------- 2266c 2267 IF (DO_PERMUTE_RHS.AND.(KEEP(111).EQ.0)) THEN 2268C --- Distribute PERM_RHS before loop of RHS 2269C --- (with null space option PERM_RHS is not allocated / needed 2270C to permute the null column pivot list) 2271 CALL MPI_BCAST(PERM_RHS(1), 2272 & id%NRHS, 2273 & MPI_INTEGER, 2274 & MASTER, id%COMM,IERR) 2275 ENDIF 2276C ============================== 2277C BLOCKING ON the number of RHS 2278C We work on a maximum of NBRHS at a time. 2279C the leading dimension of RHS is id%LRHS on master 2280C and is set to N on slaves 2281C ============================== 2282C We may want to allow to have NBRHS that varies 2283C this is typically the case when a partitionning of 2284C the right hand side is performed and leads to 2285C irregular partitions. 2286C We only have to be sure that the size of each partition 2287C is smaller than NBRHS. 2288 BEG_RHS=1 2289 DO WHILE (BEG_RHS.LE.NRHS_NONEMPTY) 2290C ========================== 2291C -- NBRHS : Original block size 2292C -- BEG_RHS : Column index of the first RHS in the list of 2293C non empty RHS (RHS_LOC) to 2294C be processed during this iteration 2295C -- NBRHS_EFF : Effective block size at current iteration 2296C In case of sparse RHS (KEEP(248)==1) NBRHS_EFF only refers to 2297C non-empty columns and is used to compute NBCOL_INBLOC 2298C -- NBCOL_INBLOC : the number of columns of sparse RHS needed 2299C to get NBRHS_EFF non empty columns columns of 2300C sparse RHS processed at each step 2301C 2302 NBRHS_EFF = min(NRHS_NONEMPTY-BEG_RHS+1, NBRHS) 2303C 2304C Sparse RHS 2305C Free space and reset pointers if needed 2306 IF (IRHS_SPARSE_COPY_ALLOCATED) THEN 2307 NB_BYTES = NB_BYTES - 2308 & int(size(IRHS_SPARSE_COPY),8)*K34_8 2309 DEALLOCATE(IRHS_SPARSE_COPY) 2310 IRHS_SPARSE_COPY_ALLOCATED=.FALSE. 2311 NULLIFY(IRHS_SPARSE_COPY) 2312 ENDIF 2313 IF (IRHS_PTR_COPY_ALLOCATED) THEN 2314 NB_BYTES = NB_BYTES - 2315 & int(size(IRHS_PTR_COPY),8)*K34_8 2316 DEALLOCATE(IRHS_PTR_COPY) 2317 IRHS_PTR_COPY_ALLOCATED=.FALSE. 2318 NULLIFY(IRHS_PTR_COPY) 2319 ENDIF 2320 IF (RHS_SPARSE_COPY_ALLOCATED) THEN 2321 NB_BYTES = NB_BYTES - 2322 & int(size(RHS_SPARSE_COPY),8)*K35_8 2323 DEALLOCATE(RHS_SPARSE_COPY) 2324 RHS_SPARSE_COPY_ALLOCATED=.FALSE. 2325 NULLIFY(RHS_SPARSE_COPY) 2326 ENDIF 2327C 2328C =========================================================== 2329C Set LD_RHS and IBEG for the accesses to id%RHS (in cases 2330C id%RHS is accessed). Remark that IBEG might still be 2331C overwritten later, in case of general sparse right-hand side 2332C and centralized solution to skip empty columns 2333C =========================================================== 2334 IF ( 2335C slave procs 2336 & ( id%MYID .NE. MASTER ) 2337C even on master when RHS not allocated 2338 & .or. 2339C Case of Master working but with distributed sol and 2340C ( sparse RHS or null space ) 2341C -- Allocate not needed on host not working 2342 & ( I_AM_SLAVE .AND. id%MYID .EQ. MASTER .AND. 2343 & ICNTL21 .NE.0 .AND. 2344 & ( KEEP(248).ne.0 .OR. KEEP(221).EQ.2 2345 & .OR. KEEP(111).NE.0 ) 2346 & ) 2347 & .or. 2348C Case of Master and 2349C (compute entries of INV(A)) 2350C Even when I am a master with host not working I 2351C am in charge of gathering solution to scale it 2352C and to copy it back in the sparse RHS format 2353 & ( id%MYID .EQ. MASTER .AND. (KEEP(237).NE.0) ) 2354C 2355 & ) THEN 2356 LD_RHS = id%N 2357 IBEG = 1 2358 ELSE 2359 ! (id%MYID .eq. MASTER) 2360 IF ( associated(id%RHS) ) THEN 2361C Leading dimension of RHS on master is id%LRHS 2362 LD_RHS = max(id%LRHS, id%N) 2363 ELSE 2364C --- LRHS might not be defined (dont use it) 2365 LD_RHS = id%N 2366 ENDIF 2367 IBEG = int(BEG_RHS-1,8) * int(LD_RHS,8) + 1_8 2368 ENDIF 2369C JBEG_RHS might also be used in DISTRIBUTED_SOLUTION 2370C even when RHS is not sparse on input. In this case, 2371C there are no empty columns. (If RHS is sparse JBEG_RHS 2372C is overwritten). 2373 JBEG_RHS = BEG_RHS 2374C ========================================== 2375C Shift empty columns in case of sparse RHS 2376C ========================================== 2377 IF ( (id%MYID.EQ.MASTER) .AND. 2378 & KEEP(248)==1 ) THEN 2379C update position of JBEG_RHS on first non-empty 2380C column of this block 2381 JBEG_RHS = JEND_RHS + 1 2382#if defined(RHSCOMP_BYROWS) 2383C In case RHSCOMP is stored by rows, we need to ensure 2384C that the blocks during forward and backward are the 2385C same. For that, a simple and safe solution consists in 2386C avoiding skipping empty columns during the forward step. 2387 IF (KEEP(221).NE.1) THEN 2388#endif 2389 IF (DO_PERMUTE_RHS.OR.INTERLEAVE_PAR) THEN 2390 DO WHILE ( id%IRHS_PTR(PERM_RHS(JBEG_RHS)) .EQ. 2391 & id%IRHS_PTR(PERM_RHS(JBEG_RHS)+1) ) 2392C Empty column 2393 IF ((KEEP(237).EQ.0).AND.(ICNTL21.EQ.0).AND. 2394 & (KEEP(221).NE.1) ) THEN 2395C General sparse RHS (NOT A-1) and centralized solution 2396C Set to zero part of the 2397C solution corresponding to empty columns 2398 DO I=1, id%N 2399 id%RHS((PERM_RHS(JBEG_RHS) -1)*LD_RHS+I) 2400 & = ZERO 2401 ENDDO 2402 ENDIF 2403 JBEG_RHS = JBEG_RHS +1 2404 ENDDO 2405 ELSE 2406 DO WHILE( id%IRHS_PTR(JBEG_RHS) .EQ. 2407 & id%IRHS_PTR(JBEG_RHS+1) ) 2408 IF ((KEEP(237).EQ.0).AND.(ICNTL21.EQ.0).AND. 2409 & (KEEP(221).NE.1) ) THEN 2410C Case of general sparse RHS (NOT A-1) and 2411C centralized solution: set to zero part of 2412C the solution corresponding to empty columns 2413 DO I=1, id%N 2414 id%RHS((JBEG_RHS -1)*LD_RHS + I) = ZERO 2415 ENDDO 2416 ENDIF 2417 IF (KEEP(221).EQ.1) THEN 2418C Reduced RHS set to ZERO 2419 DO I = 1, id%SIZE_SCHUR 2420 id%REDRHS((JBEG_RHS-1)*LD_REDRHS + I) = ZERO 2421 ENDDO 2422 ENDIF 2423 JBEG_RHS = JBEG_RHS +1 2424 ENDDO 2425 ENDIF ! End DO_PERMUTE_RHS.OR.INTERLEAVE_PAR 2426#if defined(RHSCOMP_BYROWS) 2427 ENDIF 2428C In that case we will have NB_RHSSKIPPED=0 2429C and we have JBEG_RHS = JEND_RHS+1 2430 IF (KEEP(221).EQ.1) THEN 2431 IF ( id%IRHS_PTR(JBEG_RHS) .EQ. 2432 & id%IRHS_PTR(JBEG_RHS+1) ) THEN 2433 DO J=JBEG_RHS, JBEG_RHS + NBRHS_EFF -1 2434 DO I=1, id%SIZE_SCHUR 2435 id%REDRHS((J-1)*LD_REDRHS + I) = ZERO 2436 ENDDO 2437 ENDDO 2438 ENDIF 2439 ENDIF 2440#endif 2441C Count nb of RHS columns skipped: useful for 2442C * ZMUMPS_DISTRIBUTED_SOLUTION to reset those 2443C columns to zero. 2444C * in case of reduced right-hand side, to set 2445C corresponding entries of RHSCOMP to 0 after 2446C forward phase. 2447 NB_RHSSKIPPED = JBEG_RHS - (JEND_RHS + 1) 2448 IF ((KEEP(248).EQ.1).AND.(KEEP(237).EQ.0) 2449 & .AND. (ICNTL21.EQ.0)) 2450 & THEN 2451 ! case of general sparse rhs with centralized solution, 2452 !set IBEG to shifted columns 2453 ! (after empty columns have been skipped) 2454 IBEG = int(JBEG_RHS-1,8) * int(LD_RHS,8) + 1_8 2455 ENDIF 2456 ENDIF ! of if (id%MYID.EQ.MASTER) .AND. KEEP(248)==1 2457 CALL MPI_BCAST( JBEG_RHS, 1, MPI_INTEGER, 2458 & MASTER, id%COMM, IERR ) 2459C 2460C Shift on REDRHS in reduced RHS functionality 2461C 2462 IF (id%MYID.EQ.MASTER .AND. KEEP(221).NE.0) THEN 2463C Initialize IBEG_REDRHS 2464C Note that REDRHS always has id%NRHS Colmuns 2465 IBEG_REDRHS= int(JBEG_RHS-1,8)*int(LD_REDRHS,8) + 1_8 2466 ELSE 2467 IBEG_REDRHS=-142424_8 ! Should not be used 2468 ENDIF 2469C 2470C ===================== 2471C BEGIN 2472C Prepare RHS on master 2473C 2474#if defined(V_T) 2475 CALL VTBEGIN(perm_scal_ini,IERR) 2476#endif 2477 IF (id%MYID .eq. MASTER) THEN 2478C ====================== 2479 IF (KEEP(248)==1) THEN 2480C ====================== 2481C 2482C Sparse RHS format ( A-1 or sparse input format) 2483C is provided as input by the user (IRHS_SPARSE ...) 2484C -------------------------------------------------- 2485C Compute NZ_THIS_BLOCK and NBCOL_INBLOC 2486C where 2487C NZ_THIS_BLOCK is defined 2488C as the number of entries in the next NBRHS_EFF 2489C non empty columns (note that since they might be permuted 2490C then the following formula is not always valid: 2491C NZ_THIS_BLOCK=id%IRHS_PTR(BEG_RHS+NBRHS_EFF)- 2492C & id%IRHS_PTR(BEG_RHS) 2493C anyway NBCOL_INBLOC also need be computed so going through 2494C columns one at a time is needed. 2495C 2496 NBCOL = 0 2497 NBCOL_INBLOC = 0 2498 NZ_THIS_BLOCK = 0 2499C With exploit sparsity we skip empty rows up to reaching 2500C the first non empty column; then we process a block of 2501C maximum size NBRHS_EFF except if we reach another empty 2502C column. (We are not sure to have a copy allocated 2503C and thus cannot compress on the fly, as done naturally 2504C for A-1). 2505 STOP_AT_NEXT_EMPTY_COL = .FALSE. 2506 DO I=JBEG_RHS, id%NRHS 2507 NBCOL_INBLOC = NBCOL_INBLOC +1 2508 IF (DO_PERMUTE_RHS.OR.INTERLEAVE_PAR) THEN 2509C PERM_RHS(k) = i means that i is the kth 2510C column to be processed 2511C PERM_RHS should also be defined for 2512C empty columns i in A-1 (PERM_RHS(K) = i) 2513 COLSIZE = id%IRHS_PTR(PERM_RHS(I)+1) 2514 & - id%IRHS_PTR(PERM_RHS(I)) 2515 ELSE 2516 COLSIZE = id%IRHS_PTR(I+1) - id%IRHS_PTR(I) 2517 ENDIF 2518 IF ((.NOT.STOP_AT_NEXT_EMPTY_COL).AND.(COLSIZE.GT.0).AND. 2519 & (KEEP(237).EQ.0)) THEN 2520C -- set STOP_NEXT_EMPTY_COL only for general 2521C -- sparse case (not AM-1) 2522 STOP_AT_NEXT_EMPTY_COL =.TRUE. 2523 ENDIF 2524 IF (COLSIZE.GT.0 2525#if defined(RHSCOMP_BYROWS) 2526C In case of forward-only, we do not skip empty RHS. 2527C This would cause problems during the backward phase: since 2528C each block of RHSCOMP has a row-major storage and inside 2529C each block, data is congiguous, blocks must be the same 2530C during forward and during backward. Hence NB_RHSSKIPPED 2531C will be 0. 2532C 2533 & .OR. KEEP(221) .EQ. 1 2534#endif 2535 & ) THEN 2536 NBCOL = NBCOL+1 2537 NZ_THIS_BLOCK = NZ_THIS_BLOCK + COLSIZE 2538 ELSE IF (STOP_AT_NEXT_EMPTY_COL) THEN 2539C We have reached an empty column with already selected non empty 2540C columns: reduce block size to non empty columns reached so far. 2541 NBCOL_INBLOC = NBCOL_INBLOC -1 2542 NBRHS_EFF = NBCOL 2543 EXIT 2544 ENDIF 2545 IF (NBCOL.EQ.NBRHS_EFF) EXIT 2546 ENDDO 2547#if defined(RHSCOMP_BYROWS) 2548 IF (NZ_THIS_BLOCK .eq. 0) THEN 2549C Skip block, 2550C set REDRHS, RHSCOMP will be set later 2551 IF (KEEP(221).EQ.1) THEN 2552 DO J=JBEG_RHS, JBEG_RHS+ NBRHS_EFF -1 2553 DO I = 1, id%SIZE_SCHUR 2554 id%REDRHS((JBEG_RHS-1)*LD_REDRHS + I) = ZERO 2555 ENDDO 2556 ENDDO 2557 ELSE 2558 WRITE(*,*) "Internal error 15 is sol_driver" 2559 CALL MUMPS_ABORT() 2560 ENDIF 2561 ENDIF 2562#else 2563 IF (NZ_THIS_BLOCK.EQ.0) THEN 2564 WRITE(*,*) " Internal Error 16 in sol driver NZ_THIS_BLOCK=", 2565 & NZ_THIS_BLOCK 2566 CALL MUMPS_ABORT() 2567 ENDIF 2568#endif 2569C 2570 IF (NBCOL.NE.NBRHS_EFF.AND. (KEEP(237).NE.0) 2571 & .AND.KEEP(221).NE.1) THEN 2572C With exploit sparsity for general sparse RHS (Not A-1) 2573C we skip empty rows up to reaching 2574C the first non empty column; then we process a block of 2575C maximum size NBRHS_EFF except if we reach another empty 2576C column. (We are not sure to have a copy allocated 2577C and thus cannot compress on the fly, as done naturally 2578C for A-1). Thus NBCOL might be smaller than NBRHS_EFF 2579 WRITE(6,*) ' Internal Error 8 in solution driver ', 2580 & NBCOL, NBRHS_EFF 2581 call MUMPS_ABORT() 2582 ENDIF 2583C ------------------------------------------------------------- 2584C 2585 IF (NZ_THIS_BLOCK .NE. 0) THEN 2586C ----------------------------------------------------------- 2587C We recall that 2588C NBCOL_INBLOC is the number of columns of sparse RHS needed 2589C to get NBRHS_EFF non empty columns: 2590 ALLOCATE(IRHS_PTR_COPY(NBCOL_INBLOC+1),stat=allocok) 2591 if (allocok .GT.0 ) then 2592 INFO(1)=-13 2593 INFO(2)=NBCOL_INBLOC+1 2594 GOTO 30 2595 endif 2596 IRHS_PTR_COPY_ALLOCATED = .TRUE. 2597 NB_BYTES = NB_BYTES + int(NBCOL_INBLOC+1,8)*K34_8 2598 NB_BYTES_MAX = max(NB_BYTES_MAX,NB_BYTES) 2599C 2600 JEND_RHS =JBEG_RHS + NBCOL_INBLOC - 1 2601C ----------------------------------------------------------- 2602C Initialize IRHS_PTR_COPY 2603C compute local copy (compressed) of id%IRHS_PTR on Master 2604 IF (DO_PERMUTE_RHS.OR.INTERLEAVE_PAR) THEN 2605 IPOS = 1 2606 J = 0 2607 DO I=JBEG_RHS, JBEG_RHS + NBCOL_INBLOC -1 2608 J = J+1 2609 IRHS_PTR_COPY(J) = IPOS 2610 COLSIZE = id%IRHS_PTR(PERM_RHS(I)+1) 2611 & - id%IRHS_PTR(PERM_RHS(I)) 2612 IPOS = IPOS + COLSIZE 2613 ENDDO 2614 ELSE 2615 IPOS = 1 2616 J = 0 2617 DO I=JBEG_RHS, JBEG_RHS + NBCOL_INBLOC -1 2618 J = J+1 2619 IRHS_PTR_COPY(J) = IPOS 2620 COLSIZE = id%IRHS_PTR(I+1) 2621 & - id%IRHS_PTR(I) 2622 IPOS = IPOS + COLSIZE 2623 ENDDO 2624 ENDIF ! End DO_PERMUTE_RHS.OR.INTERLEAVE_PAR 2625 IRHS_PTR_COPY(NBCOL_INBLOC+1)= IPOS 2626 IF ( IPOS-1 .NE. NZ_THIS_BLOCK ) THEN 2627 WRITE(*,*) "Error in compressed copy of IRHS_PTR" 2628 IERR = 99 2629 call MUMPS_ABORT() 2630 ENDIF 2631C ----------------------------------------------------------- 2632C IRHS_SPARSE : do a copy or point to the original indices 2633C 2634C Check whether IRHS_SPARSE_COPY need be allocated 2635 IF (KEEP(23) .NE. 0 .and. MTYPE .NE. 1) THEN 2636C AP = LU and At x = b ==> b need be permuted 2637 ALLOCATE(IRHS_SPARSE_COPY(NZ_THIS_BLOCK) 2638 & ,stat=allocok) 2639 if (allocok .GT.0 ) then 2640 INFO(1)=-13 2641 INFO(2)=NZ_THIS_BLOCK 2642 GOTO 30 2643 endif 2644 IRHS_SPARSE_COPY_ALLOCATED=.TRUE. 2645 NB_BYTES = NB_BYTES + int(NZ_THIS_BLOCK,8)*K34_8 2646 NB_BYTES_MAX = max(NB_BYTES_MAX,NB_BYTES) 2647 ELSE IF (DO_PERMUTE_RHS.OR.INTERLEAVE_PAR.OR. 2648 & (KEEP(237).NE.0)) THEN 2649C Columns are not contiguous and need be copied one by one 2650C IRHS_SPARSE_COPY will hold a copy of contiguous permuted 2651C columns so an explicit copy is needed. 2652C IRHS_SPARSE_COPY is also allways allocated with A-1, 2653C to enable receiving during mumps_gather_solution 2654C . on the master in any order. 2655 ALLOCATE(IRHS_SPARSE_COPY(NZ_THIS_BLOCK), 2656 & stat=allocok) 2657 IF (allocok .GT.0 ) THEN 2658 IERR = 99 2659 GOTO 30 2660 ENDIF 2661 IRHS_SPARSE_COPY_ALLOCATED=.TRUE. 2662 NB_BYTES = NB_BYTES + int(NZ_THIS_BLOCK,8)*K34_8 2663 NB_BYTES_MAX = max(NB_BYTES_MAX,NB_BYTES) 2664C 2665 ENDIF 2666C 2667C Initialize IRHS_SPARSE_COPY 2668 IF (IRHS_SPARSE_COPY_ALLOCATED) THEN 2669 IF ( DO_PERMUTE_RHS.OR.INTERLEAVE_PAR ) THEN 2670 IPOS = 1 2671 DO I=JBEG_RHS, JBEG_RHS + NBCOL_INBLOC -1 2672 COLSIZE = id%IRHS_PTR(PERM_RHS(I)+1) 2673 & - id%IRHS_PTR(PERM_RHS(I)) 2674 IRHS_SPARSE_COPY(IPOS:IPOS+COLSIZE-1) = 2675 & id%IRHS_SPARSE(id%IRHS_PTR(PERM_RHS(I)): 2676 & id%IRHS_PTR(PERM_RHS(I)+1) -1) 2677 IPOS = IPOS + COLSIZE 2678 ENDDO 2679 ELSE 2680 IRHS_SPARSE_COPY = id%IRHS_SPARSE( 2681 & id%IRHS_PTR(JBEG_RHS): 2682 & id%IRHS_PTR(JBEG_RHS)+NZ_THIS_BLOCK-1) 2683 ENDIF 2684 ELSE 2685 IRHS_SPARSE_COPY 2686c * (1:NZ_THIS_BLOCK) 2687 & => 2688 & id%IRHS_SPARSE(id%IRHS_PTR(JBEG_RHS): 2689 & id%IRHS_PTR(JBEG_RHS)+NZ_THIS_BLOCK-1) 2690 ENDIF 2691 IF (LSCAL.OR.DO_PERMUTE_RHS.OR.INTERLEAVE_PAR.OR. 2692 & (KEEP(237).NE.0)) THEN 2693C if scaling is on or if columns of the RHS are 2694C permuted then a copy of RHS_SPARSE is needed. 2695C Also always allocated with A-1, 2696c to enable receiving during mumps_gather_solution 2697C on the master in any order. 2698C 2699 ALLOCATE(RHS_SPARSE_COPY(NZ_THIS_BLOCK), 2700 & stat=allocok) 2701 IF (allocok .GT.0 ) THEN 2702 INFO(1)=-13 2703 INFO(2)=NZ_THIS_BLOCK 2704 GOTO 30 2705 ENDIF 2706 RHS_SPARSE_COPY_ALLOCATED = .TRUE. 2707 NB_BYTES = NB_BYTES + int(NZ_THIS_BLOCK,8)*K35_8 2708 NB_BYTES_MAX = max(NB_BYTES_MAX,NB_BYTES) 2709 ELSE 2710 IF ( KEEP(248)==1 ) THEN 2711 RHS_SPARSE_COPY 2712c * (1:NZ_THIS_BLOCK) 2713 & => id%RHS_SPARSE(id%IRHS_PTR(JBEG_RHS): 2714 & id%IRHS_PTR(JBEG_RHS)+NZ_THIS_BLOCK-1) 2715 ELSE 2716 RHS_SPARSE_COPY 2717c * (1:NZ_THIS_BLOCK) 2718 & => id%RHS_SPARSE(id%IRHS_PTR(BEG_RHS): 2719 & id%IRHS_PTR(BEG_RHS)+NZ_THIS_BLOCK-1) 2720 ENDIF 2721 ENDIF 2722 IF (DO_PERMUTE_RHS.OR.INTERLEAVE_PAR.OR. 2723 & (id%KEEP(237).NE.0)) THEN 2724 IF (id%KEEP(237).NE.0) THEN 2725C --initialized to one; it might be 2726C modified if scaling is on (one first entry in each col is scaled) 2727 RHS_SPARSE_COPY = ONE 2728 ELSE IF (.NOT. LSCAL) THEN 2729C -- Columns are not contiguous and need be copied one by one 2730C -- This need not be done if scaling is on because it 2731C -- will done and scaled later. 2732 IPOS = 1 2733 DO I=JBEG_RHS, JBEG_RHS + NBCOL_INBLOC -1 2734 COLSIZE = id%IRHS_PTR(PERM_RHS(I)+1) 2735 & - id%IRHS_PTR(PERM_RHS(I)) 2736 IF (COLSIZE .EQ. 0) CYCLE 2737 RHS_SPARSE_COPY(IPOS:IPOS+COLSIZE-1) = 2738 & id%RHS_SPARSE(id%IRHS_PTR(PERM_RHS(I)): 2739 & id%IRHS_PTR(PERM_RHS(I)+1) -1) 2740 IPOS = IPOS + COLSIZE 2741 ENDDO 2742 ENDIF 2743 ENDIF 2744C ========================= 2745 IF (KEEP(23) .NE. 0) THEN 2746C ========================= 2747* maximum transversal was performed 2748 IF (MTYPE .NE. 1) THEN 2749* At x = b is asked while 2750* we have AP = LU where P is the column permutation 2751* due to max trans. 2752* Therefore we need to modify rhs: 2753* b' = P-1 b (P-1=Pt) 2754* Apply column permutation to the right hand side RHS 2755* Column J of the permuted matrix corresponds to 2756* column PERMW(J) of the original matrix. 2757* 2758C ========== 2759C SPARSE RHS : permute indices rather than values 2760C ========== 2761C Solve with At X = B should never occur for A-1 2762 IPOS = 1 2763 DO I=1, NBCOL_INBLOC 2764C Note that: (i) IRHS_PTR_COPY is compressed; 2765C (ii) columns might have been permuted 2766 COLSIZE = IRHS_PTR_COPY(I+1) - IRHS_PTR_COPY(I) 2767 DO K = 1, COLSIZE 2768 JPERM = UNS_PERM_INV(IRHS_SPARSE_COPY(IPOS+K-1)) 2769 IRHS_SPARSE_COPY(IPOS+K-1) = JPERM 2770 ENDDO 2771 IPOS = IPOS + COLSIZE 2772 ENDDO 2773 ENDIF ! MTYPE.NE.1 2774 ENDIF ! KEEP(23).NE.0 2775 ENDIF ! NZ_THIS_BLOCK .NE. 0 2776C ----- 2777 ENDIF ! ============ KEEP(248)==1 2778C ----- 2779 ENDIF ! (id%MYID .eq. MASTER) 2780C 2781C ===================== ERROR handling and propagation ================ 2782 30 CONTINUE 2783 CALL MUMPS_PROPINFO( ICNTL(1), INFO(1), 2784 & id%COMM,id%MYID) 2785 IF (INFO(1) .LT.0 ) GOTO 90 2786C ====================================================================== 2787C 2788C NBCOL_INBLOC depends on loop 2789 IF (KEEP(248)==1) THEN 2790 CALL MPI_BCAST( NBCOL_INBLOC,1, MPI_INTEGER, 2791 & MASTER, id%COMM,IERR) 2792 ELSE 2793 NBCOL_INBLOC = NBRHS_EFF 2794 ENDIF 2795 JEND_RHS =JBEG_RHS + NBCOL_INBLOC - 1 2796 IF ((KEEP(111).eq.0).AND.(KEEP(252).EQ.0) 2797 & .AND.(KEEP(221).NE.2 ).AND.(KEEP(248).NE.0) ) THEN 2798C ---------------------------- 2799C -- SPARSE RIGHT-HAND-SIDE 2800C ---------------------------- 2801 CALL MPI_BCAST( NZ_THIS_BLOCK,1, MPI_INTEGER, 2802 & MASTER, id%COMM,IERR) 2803 IF (id%MYID.NE.MASTER .and. NZ_THIS_BLOCK.NE.0) THEN 2804 ALLOCATE(IRHS_SPARSE_COPY(NZ_THIS_BLOCK), 2805 & stat=allocok) 2806 if (allocok .GT.0 ) then 2807 INFO(1)=-13 2808 INFO(2)=NZ_THIS_BLOCK 2809 GOTO 45 2810 endif 2811 IRHS_SPARSE_COPY_ALLOCATED=.TRUE. 2812C RHS_SPARSE_COPY is broadcasted 2813C for A-1 even if on the slaves the initialisation of the RHS 2814C could be only based on the pattern. Doing so we 2815C broadcast the scaled version of the RHS (scaling arrays 2816C that are not available on slaves). 2817 ALLOCATE(RHS_SPARSE_COPY(NZ_THIS_BLOCK), 2818 & stat=allocok) 2819 if (allocok .GT.0 ) then 2820 INFO(1)=-13 2821 INFO(2)=NZ_THIS_BLOCK 2822 GOTO 45 2823 endif 2824 RHS_SPARSE_COPY_ALLOCATED=.TRUE. 2825 NB_BYTES = NB_BYTES + int(NZ_THIS_BLOCK,8)*(K34_8+K35_8) 2826 NB_BYTES_MAX = max(NB_BYTES_MAX,NB_BYTES) 2827C 2828 ALLOCATE(IRHS_PTR_COPY(NBCOL_INBLOC+1),stat=allocok) 2829 if (allocok .GT.0 ) then 2830 INFO(1)=-13 2831 INFO(2)=NBCOL_INBLOC+1 2832 GOTO 45 2833 endif 2834 IRHS_PTR_COPY_ALLOCATED = .TRUE. 2835 NB_BYTES = NB_BYTES + int(NBCOL_INBLOC+1,8)*K34_8 2836 NB_BYTES_MAX = max(NB_BYTES_MAX,NB_BYTES) 2837 ENDIF 2838C 2839C ===================== ERROR handling and propagation ================ 2840 45 CONTINUE 2841 CALL MUMPS_PROPINFO( ICNTL(1), INFO(1), 2842 & id%COMM,id%MYID) 2843 IF (INFO(1) .LT.0 ) GOTO 90 2844C ====================================================================== 2845 IF (NZ_THIS_BLOCK > 0) THEN 2846 CALL MPI_BCAST(IRHS_SPARSE_COPY(1), 2847 & NZ_THIS_BLOCK, 2848 & MPI_INTEGER, 2849 & MASTER, id%COMM,IERR) 2850 CALL MPI_BCAST(IRHS_PTR_COPY(1), 2851 & NBCOL_INBLOC+1, 2852 & MPI_INTEGER, 2853 & MASTER, id%COMM,IERR) 2854 IF (IERR.GT.0) THEN 2855 WRITE (*,*)'NOT OK FOR ALLOC PTR ON SLAVES' 2856 call MUMPS_ABORT() 2857 ENDIF 2858 ENDIF 2859 ENDIF 2860C 2861C ========================================================== 2862C INITIALIZE POSINRHSCOMP_ROW/COL, RHSCOMP and related data 2863C ========================================================== 2864 IF ( I_AM_SLAVE ) THEN 2865C -------------------------------------------------- 2866C If I am involved in the solve and if 2867C either 2868C no null space comput (keep(111)=0) and sparse rhs 2869C or 2870C null space computation 2871C then 2872C compute POSINRHSCOMP 2873C endif 2874C 2875C Fwd in facto: in this case only POSINRHSCOMP need be computed 2876C 2877C (POSINRHSCOMP_ROW/COL indirection arrays should 2878C have been allocated once outside loop) 2879C Compute size of RHSCOMP since it might depend 2880C on the process index and of the sparsity of the RHS 2881C if it is exploited. 2882C Initialize POSINRHSCOMP_ROW/COL 2883C 2884C Note that LD_RHSCOMP and id%KEEP8(25) 2885C are not set on the host in this routine in 2886C the case of a non-working host. 2887C Note that POSINRHSCOMP is now always computed in SOL_DRIVER 2888C at least during the first block of RHS when sparsity of RHS 2889C is not exploited. 2890C ------------------------------- 2891C INITTIALZE POSINRHSCOMP_ROW/COL 2892C ------------------------------- 2893C 2894 IF ( KEEP(221).EQ.2 .AND. KEEP(252).EQ.0 2895 & .AND. (KEEP(248).EQ.0 .OR. (id%NRHS.EQ.1)) 2896 & ) THEN 2897C Reduced RHS was already computed during 2898C a previous forward step AND is valid. 2899C By valid we mean: 2900C -no forward in facto (KEEP(252)==0) during which 2901C POSINRHSCOMP was not computed 2902C AND 2903C -no exploit sparsity with multiple RHS 2904C because in this case POSINRHSCOMP would 2905C be valid only for the last block processed during fwd. 2906C In those cases since we only perform backward step, we do not 2907C need to compute POSINRHSCOMP 2908 BUILD_POSINRHSCOMP = .FALSE. 2909 ENDIF 2910C ------------------------ 2911C INITIALIZE POSINRHSCOMP 2912C ------------------------ 2913 IF (BUILD_POSINRHSCOMP) THEN 2914C -- we first set MTYPE_LOC and 2915C -- reset BUILD_POSINRHSCOMP for next iteration in loop 2916C 2917C general case only POSINRHSCOMP is computed 2918 BUILD_POSINRHSCOMP = .FALSE. 2919! POSINRHSCOMP does not change between blocks 2920 MTYPE_LOC = MTYPE 2921C 2922 IF ( (KEEP(111).NE.0) .OR. (KEEP(237).NE.0) .OR. 2923 & (KEEP(252).NE.0) ) THEN 2924C 2925 IF (KEEP(111).NE.0) THEN 2926C -- in the context of null space, we need to 2927C -- build RHSCOMP to skip SOL_R. Therefore 2928C -- we need to know for each concerned 2929C -- row index its position in 2930C -- RHSCOMP 2931C We use row indices, as these are the ones that 2932C were used to detect zero pivots during factorization. 2933C POSINRHSCOMP_ROW will allow to find the (row) index of a 2934C zero in RHSCOMP before calling ZMUMPS_SOL_S. Then 2935C ZMUMPS_SOL_S uses column indices to build the solution 2936C (corresponding to null space vectors) 2937 MTYPE_LOC = 1 2938 ELSE IF (KEEP(252).NE.0) THEN 2939C -- Fwd in facto: since fwd is skipped we need to build POSINRHSCOMP 2940 MTYPE_LOC = 1 ! (no transpose) 2941C BUILD_POSINRHSCOMP = .FALSE. ! POSINRHSCOMP does not change between blocks 2942 ELSE 2943C -- A-1 only 2944 MTYPE_LOC = MTYPE 2945 BUILD_POSINRHSCOMP = .TRUE. 2946 ENDIF 2947 ENDIF 2948C -- compute POSINRHSCOMP 2949 LIW_PASSED=max(1,LIW) 2950 IF (KEEP(237).EQ.0) THEN 2951 CALL ZMUMPS_BUILD_POSINRHSCOMP( 2952 & id%NSLAVES,id%N, 2953 & id%MYID_NODES, id%PTLUST_S(1), 2954 & id%KEEP(1),id%KEEP8(1), 2955 & id%PROCNODE_STEPS(1), id%IS(1), LIW_PASSED, 2956 & id%STEP(1), 2957 & id%POSINRHSCOMP_ROW(1), id%POSINRHSCOMP_COL(1), 2958 & id%POSINRHSCOMP_COL_ALLOC, 2959 & MTYPE_LOC, 2960 & NBENT_RHSCOMP, NB_FS_IN_RHSCOMP_TOT ) 2961 NB_FS_IN_RHSCOMP_F = NB_FS_IN_RHSCOMP_TOT 2962 ELSE 2963 CALL ZMUMPS_BUILD_POSINRHSCOMP_AM1( 2964 & id%NSLAVES,id%N, 2965 & id%MYID_NODES, id%PTLUST_S(1), id%DAD_STEPS(1), 2966 & id%KEEP(1),id%KEEP8(1), 2967 & id%PROCNODE_STEPS(1), id%IS(1), LIW, 2968 & id%STEP(1), 2969 & id%POSINRHSCOMP_ROW(1), id%POSINRHSCOMP_COL(1), 2970 & id%POSINRHSCOMP_COL_ALLOC, 2971 & MTYPE_LOC, 2972 & IRHS_PTR_COPY(1), NBCOL_INBLOC, IRHS_SPARSE_COPY(1), 2973 & NZ_THIS_BLOCK,PERM_RHS, size(PERM_RHS) , JBEG_RHS, 2974 & NBENT_RHSCOMP, 2975 & NB_FS_IN_RHSCOMP_F, NB_FS_IN_RHSCOMP_TOT, 2976 & UNS_PERM_INV, size(UNS_PERM_INV) ! size 1 if not used 2977 & ) 2978 ENDIF 2979 ENDIF ! BUILD_POSINRHSCOMP=.TRUE. 2980 IF (KEEP(221).EQ.1) THEN 2981C we need to save the reduced RHS for all RHS to perform 2982C later the backward phase with an updated reduced RHS 2983C thus we allocate NRHS_NONEMPTY columns in one shot. 2984C Note that RHSCOMP might have been allocated in previous block 2985C and RHSCOMP has been deallocated previous to entering loop on RHS 2986 IF (.not. associated(id%RHSCOMP)) THEN 2987C So far we cannot combine this to exploit sparsity 2988C so that NBENT_RHSCOMP will not change in the loop 2989C and can be used to dimension RHSCOMP 2990C Furthermore, during bwd phase the REDRHS provided 2991C by the user might also have a different non empty 2992C column pattern than the sparse RHS provided on input to 2993C this phase: thus we need to allocate id%NRHS columns too. 2994 LD_RHSCOMP = max(NBENT_RHSCOMP,1) 2995 id%KEEP8(25) = int(LD_RHSCOMP,8)*int(id%NRHS,8) 2996 ALLOCATE (id%RHSCOMP(id%KEEP8(25)), stat = allocok) 2997 IF ( allocok .GT. 0 ) THEN 2998 INFO(1)=-13 2999 CALL MUMPS_SET_IERROR(id%KEEP8(25),INFO(2)) 3000 id%KEEP8(25)=0_8 3001 GOTO 41 3002 END IF 3003 NB_BYTES = NB_BYTES + id%KEEP8(25)*K35_8 3004 NB_BYTES_MAX = max(NB_BYTES_MAX,NB_BYTES) 3005 ENDIF 3006 ENDIF 3007 IF ((KEEP(221).NE.1).AND. 3008 & ((KEEP(221).NE.2).OR.(KEEP(252).NE.0)) 3009 & ) THEN 3010C ------------------ 3011C Allocate RHSCOMP (case of RHSCOMP allocated at each block of RHS) 3012C ------------------ 3013C RHSCOMP allocated per block of maximum size NBRHS 3014 LD_RHSCOMP = max(NBENT_RHSCOMP, LD_RHSCOMP) 3015C NBRHS_EFF could be used instead on NBRHS 3016 IF (associated(id%RHSCOMP)) THEN 3017 IF ( (id%KEEP8(25).LT.int(LD_RHSCOMP,8)*int(NBRHS,8)) 3018 & .OR. (KEEP(235).NE.0).OR.(KEEP(237).NE.0) ) THEN 3019 ! deallocate and reallocate if: 3020 ! _larger array needed 3021 ! OR 3022 ! _exploit sparsity/A-1: since size of RHSCOMP 3023 ! is expected to vary much in these cases 3024 ! this should improve locality 3025 NB_BYTES = NB_BYTES - id%KEEP8(25)*K35_8 3026 DEALLOCATE(id%RHSCOMP) 3027 NULLIFY(id%RHSCOMP) 3028 id%KEEP8(25)=0_8 3029 ENDIF 3030 ENDIF 3031 IF (.not. associated(id%RHSCOMP)) THEN 3032 LD_RHSCOMP = max(NBENT_RHSCOMP, 1) 3033 id%KEEP8(25) = int(LD_RHSCOMP,8)*int(NBRHS,8) 3034 ALLOCATE (id%RHSCOMP(id%KEEP8(25)), stat = allocok ) 3035 IF ( allocok .GT. 0 ) THEN 3036 INFO(1)=-13 3037 CALL MUMPS_SET_IERROR(id%KEEP8(25),INFO(2)) 3038 GOTO 41 3039 END IF 3040 NB_BYTES = NB_BYTES + id%KEEP8(25)*K35_8 3041 NB_BYTES_MAX = max(NB_BYTES_MAX,NB_BYTES) 3042 ENDIF 3043 ENDIF 3044 IF (KEEP(221).EQ.2) THEN 3045C RHSCOMP has been allocated (call with KEEP(221).EQ.1) 3046C even in the case fwd in facto 3047 ! Not correct: LD_RHSCOMP = LENRHSCOMP/id%NRHS_NONEMPTY 3048 LD_RHSCOMP = int(id%KEEP8(25)/int(id%NRHS,8)) 3049 ENDIF 3050C 3051C Shift on RHSCOMP 3052C 3053 IF ( KEEP(221).EQ.0 ) THEN 3054C -- RHSCOMP reused in the loop 3055 IBEG_RHSCOMP= 1_8 3056 ELSE 3057C Initialize IBEG_RHSCOMP 3058#if defined(RHSCOMP_BYROWS) 3059C Stored by rows but only inside each 3060C block. We keep IBEG_RHSCOMP unchanged 3061C for locality since both SCATTER_RHS and 3062C GATHER_SOLUTION will be done block-by-block? 3063 IBEG_RHSCOMP= int(JBEG_RHS-1,8)*int(LD_RHSCOMP,8) + 1_8 3064#else 3065 IBEG_RHSCOMP= int(JBEG_RHS-1,8)*int(LD_RHSCOMP,8) + 1_8 3066#endif 3067 ENDIF 3068 ENDIF ! I_AM_SLAVE 3069C ===================== ERROR handling and propagation ================ 3070 41 CONTINUE 3071 CALL MUMPS_PROPINFO( ICNTL(1), INFO(1), 3072 & id%COMM,id%MYID) 3073 IF (INFO(1) .LT.0 ) GOTO 90 3074C ====================================================================== 3075C 3076 IF (id%MYID .eq. MASTER) THEN 3077C ========================= 3078 IF (KEEP(23) .NE. 0) THEN 3079C ========================= 3080* maximum transversal was performed 3081 IF (MTYPE .NE. 1) THEN 3082* At x = b is asked while 3083* we have AP = LU where P is the column permutation 3084* due to max trans. 3085* Therefore we need to modify rhs: 3086* b' = P-1 b (P-1=Pt) 3087* Apply column permutation to the right hand side RHS 3088* Column J of the permuted matrix corresponds to 3089* column PERMW(J) of the original matrix. 3090* 3091 IF (KEEP(248)==0) THEN 3092C ========= 3093C DENSE RHS : permute values in RHS 3094C ========= 3095 ALLOCATE( C_RW2( id%N ),stat =allocok ) 3096 IF ( allocok .GT. 0 ) THEN 3097 INFO(1)=-13 3098 INFO(2)=id%N 3099 IF ( LPOK) THEN 3100 WRITE(LP,*) id%MYID, 3101 & ':Error allocating C_RW2 in ZMUMPS_SOLVE_DRIVE' 3102 END IF 3103 GOTO 30 3104 END IF 3105C We directly permute in id%RHS. 3106 DO K = 1, NBRHS_EFF 3107 KDEC = IBEG+int(K-1,8)*int(LD_RHS,8) 3108 DO I = 1, id%N 3109 C_RW2(I)=id%RHS(I-1+KDEC) 3110 END DO 3111 DO I = 1, id%N 3112 JPERM = id%UNS_PERM(I) 3113 id%RHS(I-1+KDEC) = C_RW2(JPERM) 3114 END DO 3115 END DO 3116 DEALLOCATE(C_RW2) 3117 ENDIF 3118 ENDIF 3119 ENDIF 3120C 3121 IF (POSTPros) THEN 3122 IF ( KEEP(248) == 0 ) THEN 3123 DO K = 1, NBRHS_EFF 3124 KDEC = IBEG+int(K-1,8)*int(LD_RHS,8) 3125 DO I = 1, id%N 3126 SAVERHS(I+(K-1)*id%N) = id%RHS(KDEC+I-1) 3127 END DO 3128 ENDDO 3129 ELSE 3130 SAVERHS(:) = ZERO 3131 DO K = 1, NBRHS 3132 DO J = id%IRHS_PTR(K), id%IRHS_PTR(K+1)-1 3133 I = id%IRHS_SPARSE(J) 3134 SAVERHS(I+(K-1)*id%N) = id%RHS_SPARSE(J) 3135 ENDDO 3136 ENDDO 3137 ENDIF 3138 ENDIF 3139C 3140C RHS is set to scaled right hand side 3141C 3142 IF (LSCAL) THEN 3143C scaling was performed 3144 IF (KEEP(248)==0) THEN 3145C dense RHS 3146 IF (MTYPE .EQ. 1) THEN 3147C we solve Ax=b, use ROWSCA to scale the RHS 3148 DO K =1, NBRHS_EFF 3149 KDEC = int(K-1,8) * int(LD_RHS,8) + int(IBEG-1,8) 3150 DO I = 1, id%N 3151 id%RHS(KDEC+I) = id%RHS(KDEC+I) * 3152 & id%ROWSCA(I) 3153 ENDDO 3154 ENDDO 3155 ELSE 3156C we solve Atx=b, use COLSCA to scale the RHS 3157 DO K =1, NBRHS_EFF 3158 KDEC = int(K-1,8) * int(LD_RHS,8) + int(IBEG-1,8) 3159 DO I = 1, id%N 3160 id%RHS(KDEC+I) = id%RHS(KDEC+I) * 3161 & id%COLSCA(I) 3162 ENDDO 3163 ENDDO 3164 ENDIF 3165 ELSE 3166C ------------------------- 3167C KEEP(248)==1 (and MASTER) 3168C ------------------------- 3169 KDEC=int(id%IRHS_PTR(JBEG_RHS),8) 3170C Compute 3171 IF ((KEEP(248)==1) .AND. 3172 & (DO_PERMUTE_RHS.OR.INTERLEAVE_PAR.OR. 3173 & (id%KEEP(237).NE.0)) 3174 & ) THEN 3175C -- copy from RHS_SPARSE need be done per 3176C column following PERM_RHS 3177C Columns are not contiguous and need be copied one by one 3178 IPOS = 1 3179 J = 0 3180 DO I=JBEG_RHS, JBEG_RHS + NBCOL_INBLOC -1 3181 J = J+1 3182C Note that we work here on compressed IRHS_PTR_COPY 3183 COLSIZE = IRHS_PTR_COPY(J+1) - IRHS_PTR_COPY(J) 3184C -- skip empty column 3185 IF (COLSIZE .EQ. 0) CYCLE 3186 IF (id%KEEP(237).NE.0) THEN 3187 IF (DO_PERMUTE_RHS.OR.INTERLEAVE_PAR) THEN 3188C if A-1 only, then, for each non empty target 3189C column PERM_RHS(I), scale in first position 3190C in column the diagonal entry 3191C build the scaled rhs ej on each slave. 3192 RHS_SPARSE_COPY(IPOS) = id%ROWSCA(PERM_RHS(I)) * 3193 & ONE 3194 ELSE 3195 RHS_SPARSE_COPY(IPOS) = id%ROWSCA(I) * ONE 3196 ENDIF 3197 ELSE 3198C Loop over nonzeros in column 3199 DO K = 1, COLSIZE 3200C Formula for II below is ok, except in case 3201C of maximum transversal (KEEP(23).NE.0) and 3202C transpose system (MTYPE .NE. 1): 3203C II = id%IRHS_SPARSE(id%IRHS_PTR(PERM_RHS(I))+K-1) 3204C In case of maximum transversal + transpose, one 3205C should then apply II=UNS_PERM_INV(II) after the 3206C above definition of II. 3207C 3208C Instead, we rely on IRHS_SPARSE_COPY, whose row 3209C indices have already been permuted in case of 3210C maximum transversal. 3211 II = IRHS_SPARSE_COPY( 3212 & IRHS_PTR_COPY(I-JBEG_RHS+1) 3213 & +K-1) 3214C PERM_RHS(I) corresponds to column in original RHS. 3215C Original IRHS_PTR must be used to access id%RHS_SPARSE 3216 IF (MTYPE.EQ.1) THEN 3217 RHS_SPARSE_COPY(IPOS+K-1) = 3218 & id%RHS_SPARSE(id%IRHS_PTR(PERM_RHS(I))+K-1)* 3219 & id%ROWSCA(II) 3220 ELSE 3221 RHS_SPARSE_COPY(IPOS+K-1) = 3222 & id%RHS_SPARSE(id%IRHS_PTR(PERM_RHS(I))+K-1)* 3223 & id%COLSCA(II) 3224 ENDIF 3225 ENDDO 3226 ENDIF 3227 IPOS = IPOS + COLSIZE 3228 ENDDO 3229 ELSE 3230 ! general sparse RHS 3231 ! without permutation 3232 IF (MTYPE .eq. 1) THEN 3233 DO IZ=1,NZ_THIS_BLOCK 3234 I=IRHS_SPARSE_COPY(IZ) 3235 RHS_SPARSE_COPY(IZ)=id%RHS_SPARSE(KDEC+IZ-1)* 3236 & id%ROWSCA(I) 3237 ENDDO 3238 ELSE 3239 DO IZ=1,NZ_THIS_BLOCK 3240 I=IRHS_SPARSE_COPY(IZ) 3241 RHS_SPARSE_COPY(IZ)=id%RHS_SPARSE(KDEC+IZ-1)* 3242 & id%COLSCA(I) 3243 ENDDO 3244 ENDIF 3245 ENDIF 3246 ENDIF ! KEEP(248)==1 3247 ENDIF ! LSCAL 3248 ENDIF ! id%MYID.EQ.MASTER 3249#if defined(V_T) 3250 CALL VTEND(perm_scal_ini,IERR) 3251#endif 3252C 3253C Prepare RHS on master 3254C END 3255C ===================== 3256 IF ((KEEP(248).EQ.1).AND.(KEEP(237).EQ.0)) THEN 3257 ! case of general sparse: in case of empty columns 3258 ! modifed version of 3259 ! NBRHS_EFF need be broadcasted since it is used 3260 ! to update BEG_RHS at the end of the DO WHILE 3261 CALL MPI_BCAST( NBRHS_EFF,1, MPI_INTEGER, 3262 & MASTER, id%COMM,IERR) 3263 CALL MPI_BCAST(NB_RHSSKIPPED,1,MPI_INTEGER,MASTER, 3264 & id%COMM,IERR) 3265 ENDIF 3266C ----------------------------------- 3267C Two main cases depending on option 3268C for null space computation: 3269C 3270C KEEP(111)=0 : use RHS from user 3271C (sparse or dense) 3272C KEEP(111)!=0: build an RHS on each 3273C proc for null space 3274C computations 3275C ----------------------------------- 3276#if defined(V_T) 3277 CALL VTBEGIN(soln_dist,IERR) 3278#endif 3279 IF(id%MYID.EQ.MASTER) TIMESCATTER1=MPI_WTIME() 3280 IF ((KEEP(111).eq.0).AND.(KEEP(252).EQ.0) 3281 & .AND.(KEEP(221).NE.2 )) THEN 3282C ------------------------ 3283C Use RHS provided by user 3284C when not null space and not Fwd in facto 3285C ------------------------ 3286 IF (KEEP(248) == 0) THEN 3287C ---------------------------- 3288C -- DENSE RIGHT-HAND-SIDE 3289C ---------------------------- 3290 IF ( .NOT.I_AM_SLAVE ) THEN 3291C -- Master not working 3292 CALL ZMUMPS_SCATTER_RHS(id%NSLAVES,id%N, id%MYID, 3293 & id%COMM, 3294 & MTYPE, id%RHS(IBEG), LD_RHS, NBRHS_EFF, 3295 & NBRHS_EFF, 3296 & C_DUMMY, 1, 1, 3297 & IDUMMY, 0, 3298 & JDUMMY, id%KEEP(1), id%KEEP8(1), id%PROCNODE_STEPS(1), 3299 & IDUMMY, 1, 3300 & id%STEP(1), 3301 & id%ICNTL(1),id%INFO(1)) 3302 ELSE 3303 IF (id%MYID .eq. MASTER) THEN 3304 PTR_RHS => id%RHS 3305 LD_RHS_loc = LD_RHS 3306 NCOL_RHS_loc = NBRHS_EFF 3307 IBEG_loc = IBEG 3308 ELSE 3309 PTR_RHS => CDUMMY_TARGET 3310 LD_RHS_loc = 1 3311 NCOL_RHS_loc = 1 3312 IBEG_loc = 1_8 3313 ENDIF 3314 LIW_PASSED = max( LIW, 1 ) 3315 CALL ZMUMPS_SCATTER_RHS(id%NSLAVES,id%N, id%MYID, 3316 & id%COMM, 3317 & MTYPE, PTR_RHS(IBEG_loc),LD_RHS_loc,NCOL_RHS_loc, 3318 & NBRHS_EFF, 3319 & id%RHSCOMP(IBEG_RHSCOMP), LD_RHSCOMP, NBRHS_EFF, 3320 & id%POSINRHSCOMP_ROW(1), NB_FS_IN_RHSCOMP_F, 3321C 3322 & id%PTLUST_S(1), id%KEEP(1), id%KEEP8(1), 3323 & id%PROCNODE_STEPS(1), 3324 & IS(1), LIW_PASSED, 3325 & id%STEP(1), 3326 & id%ICNTL(1),id%INFO(1)) 3327 ENDIF 3328 IF (INFO(1).LT.0) GOTO 90 3329 ELSE 3330C === KEEP(248)==1 ========= 3331C -- SPARSE RIGHT-HAND-SIDE 3332C ---------------------------- 3333 IF (NZ_THIS_BLOCK > 0) THEN 3334 CALL MPI_BCAST(RHS_SPARSE_COPY(1), 3335 & NZ_THIS_BLOCK, 3336 & MPI_DOUBLE_COMPLEX, 3337 & MASTER, id%COMM, IERR) 3338 ENDIF 3339C -- At this point each process has a copy of the 3340C -- sparse RHS. We need to store it into RHSCOMP. 3341C 3342 IF (KEEP(237).NE.0) THEN 3343 IF ( I_AM_SLAVE ) THEN 3344c ----- 3345c case of A-1 3346c ----- 3347c - Take columns with non-zero entry, say j, 3348* - to build Ej and store it in RHSCOMP 3349 K=1 ! Column index in RHSCOMP 3350 id%RHSCOMP(1:NBRHS_EFF*LD_RHSCOMP) = ZERO 3351 IPOS = 1 3352 DO I = 1, NBCOL_INBLOC 3353 COLSIZE = IRHS_PTR_COPY(I+1) - IRHS_PTR_COPY(I) 3354 IF (COLSIZE.GT.0) THEN 3355 ! Find global column index J and set 3356 ! column K of RHSCOMP to ej (here IBEG is one) 3357 J = I - 1 + JBEG_RHS 3358 IF (DO_PERMUTE_RHS.OR.INTERLEAVE_PAR) THEN 3359 J = PERM_RHS(J) 3360 ENDIF 3361 IPOSRHSCOMP = id%POSINRHSCOMP_ROW(J) 3362C IF ( (IPOSRHSCOMP.LE.NB_FS_IN_RHSCOMP_F) 3363C & .AND.(IPOSRHSCOMP.GT.0) ) THEN 3364 IF (IPOSRHSCOMP.GT.0) THEN 3365C Columns J corresponds to ej and thus to variable j 3366C that is on my proc 3367C Note that : 3368C In first entry in column 3369C we have and MUST have already scaled value of diagonal. 3370C This need have been done on master because we do not 3371C have scaling arrays available on slaves. 3372C Furthermore we know that only one entry is 3373C needed the diagonal entry (for the forward with A-1). 3374C 3375#if defined(RHSCOMP_BYROWS) 3376 id%RHSCOMP((IPOSRHSCOMP-1)*NBRHS_EFF+K) = 3377 & RHS_SPARSE_COPY(IPOS) 3378#else 3379 id%RHSCOMP((K-1)*LD_RHSCOMP+IPOSRHSCOMP) = 3380 & RHS_SPARSE_COPY(IPOS) 3381#endif 3382 ENDIF ! End of J on my proc 3383 K = K + 1 3384 IPOS = IPOS + COLSIZE ! go to next column 3385 ENDIF 3386 ENDDO 3387 IF (K.NE.NBRHS_EFF+1) THEN 3388 WRITE(6,*) 'Internal Error 9 in solution driver ', 3389 & K,NBRHS_EFF 3390 call MUMPS_ABORT() 3391 ENDIF 3392 ENDIF ! I_AM_SLAVE 3393C ------- 3394c END A-1 3395C ------- 3396 ELSE 3397C -------------- 3398C General sparse 3399C -------------- 3400C -- reset to zero RHSCOMP for skipped columns (if any) 3401 IF ((KEEP(221).EQ.1).AND.(NB_RHSSKIPPED.GT.0) 3402 & .AND.I_AM_SLAVE) THEN 3403#if defined(RHSCOMP_BYROWS) 3404 WRITE(*,*) "Internal error 17 is sol driver" 3405 CALL MUMPS_ABORT() 3406#else 3407 DO K = JBEG_RHS-NB_RHSSKIPPED, JBEG_RHS-1 3408 DO I = 1, LD_RHSCOMP 3409 id%RHSCOMP((K-1)*LD_RHSCOMP + I) = ZERO 3410 ENDDO 3411 ENDDO 3412#endif 3413 ENDIF 3414#if defined(RHSCOMP_BYROWS) 3415 IF (I_AM_SLAVE) THEN 3416 DO I=1, NBENT_RHSCOMP 3417 DO K = 1, NBCOL_INBLOC 3418C NBCOL_INBLOC is equal to NBRHS_EFF in this case 3419 id%RHSCOMP(IBEG_RHSCOMP+ 3420 & int(I-1,8)*int(NBRHS_EFF,8)+int(K-1,8))=ZERO 3421 ENDDO 3422 ENDDO 3423 ENDIF 3424C Test below must be done also on non-working host !! 3425 IF (NZ_THIS_BLOCK .EQ. 0 .AND. KEEP(221).EQ.1) THEN 3426C Skip the rest, go to next block. 3427 GOTO 1000 3428 ENDIF 3429 IF (I_AM_SLAVE) THEN 3430 DO K = 1, NBCOL_INBLOC 3431! it is equal to NBRHS_EFF in this case 3432 KDEC = IBEG_RHSCOMP + int(K-1,8) 3433#else 3434 IF (I_AM_SLAVE) THEN 3435 DO K = 1, NBCOL_INBLOC 3436! it is equal to NBRHS_EFF in this case 3437 KDEC = int(K-1,8) * int(LD_RHSCOMP,8) + 3438 & IBEG_RHSCOMP - 1_8 3439 id%RHSCOMP(KDEC+1_8:KDEC+NBENT_RHSCOMP) = ZERO 3440#endif 3441 DO IZ=IRHS_PTR_COPY(K), IRHS_PTR_COPY(K+1)-1 3442 I=IRHS_SPARSE_COPY(IZ) 3443 IPOSRHSCOMP = id%POSINRHSCOMP_ROW(I) 3444C Since all fully summed variables mapped 3445C on each proc are stored at the beginning 3446C of RHSCOMP, we can compare to KEEP(89) 3447C to know if RHSCOMP should be initialized 3448C So far the tree has not been pruned to exploit 3449C sparsity to compress RHSCOMP so we compare to 3450C NB_FS_IN_RHSCOMP_TOT 3451 IF ( (IPOSRHSCOMP.LE.NB_FS_IN_RHSCOMP_TOT) 3452 & .AND.(IPOSRHSCOMP.GT.0) ) THEN 3453C ! I is fully summed var mapped on my proc 3454#if defined(RHSCOMP_BYROWS) 3455 id%RHSCOMP(KDEC+(IPOSRHSCOMP-1)*NBRHS_EFF)= 3456 & id%RHSCOMP(KDEC+(IPOSRHSCOMP-1)*NBRHS_EFF) + 3457 & RHS_SPARSE_COPY(IZ) 3458#else 3459 id%RHSCOMP(KDEC+IPOSRHSCOMP)= 3460 & id%RHSCOMP(KDEC+IPOSRHSCOMP) + 3461 & RHS_SPARSE_COPY(IZ) 3462#endif 3463 ENDIF 3464 ENDDO 3465 ENDDO 3466 END IF ! I_AM_SLAVE 3467 ENDIF ! KEEP(237) 3468 ENDIF ! ==== KEEP(248)==1 ===== 3469C 3470 ELSE IF (I_AM_SLAVE) THEN 3471 ! I_AM_SLAVE AND (null space or Fwd in facto) 3472 IF (KEEP(111).NE.0) THEN 3473C ----------------------- 3474C Null space computations 3475C ----------------------- 3476C 3477C We are working on columns BEG_RHS:BEG_RHS+NBRHS_EFF-1 3478C of RHS. 3479C Columns in 1..KEEP(112): 3480C Put a one in corresponding 3481C position of the right-hand-side, 3482C and zeros in other places. 3483C Columns in KEEP(112)+1: KEEP(112)+KEEP(17): 3484C root node => set 3485C 0 everywhere and compute the local range 3486C corresponding to IBEG/IEND in root 3487C that will be passed to ZMUMPS_SEQ_SOLVE_ROOT_RR 3488C Also keep track of which part of 3489C ZMUMPS_RHS must be passed to 3490C ZMUMPS_SEQ_SOLVE_ROOT_RR. 3491C 3492 IF (KEEP(111).GT.0) THEN 3493 IBEG_GLOB_DEF = KEEP(111) 3494 IEND_GLOB_DEF = KEEP(111) 3495 ELSE 3496 IBEG_GLOB_DEF = BEG_RHS 3497 IEND_GLOB_DEF = BEG_RHS+NBRHS_EFF-1 3498 ENDIF 3499 IF ( id%KEEP(112) .GT. 0 .AND. DO_NULL_PIV) THEN 3500 IF (IBEG_GLOB_DEF .GT.id%KEEP(112)) THEN 3501 id%KEEP(235) = 0 3502 DO_NULL_PIV = .FALSE. 3503 ENDIF 3504 IF (IBEG_GLOB_DEF .LT.id%KEEP(112) 3505 & .AND. IEND_GLOB_DEF .GT.id%KEEP(112) 3506 & .AND. DO_NULL_PIV ) THEN 3507C IEND_GLOB_DEF = id%KEEP(112) 3508C forcing exploit sparsity 3509C - cannot be done at this point 3510C - and is not what the user would have expected the 3511C code to to do anyway !!!! 3512C suppress: id%KEEP(235) = 1 ! End Block of sparsity ON 3513 DO_NULL_PIV = .FALSE. 3514 ENDIF 3515 ENDIF 3516 IF (id%KEEP(235).NE.0) THEN 3517C Exploit Sparsity in null space computations 3518C We build /allocate the sparse RHS on MASTER 3519C based on pivnul_list. Then we broadcast it 3520C on the slaves 3521C In this case we have ONLY ONE ENTRY per RHS 3522C 3523 NZ_THIS_BLOCK=IEND_GLOB_DEF-IBEG_GLOB_DEF+1 3524 ALLOCATE(IRHS_PTR_COPY(NZ_THIS_BLOCK+1),stat=allocok) 3525 IF (allocok .GT.0 ) THEN 3526 INFO(1)=-13 3527 INFO(2)=NZ_THIS_BLOCK 3528 GOTO 50 3529 ENDIF 3530 IRHS_PTR_COPY_ALLOCATED = .TRUE. 3531 ALLOCATE(IRHS_SPARSE_COPY(NZ_THIS_BLOCK),stat=allocok) 3532 IF (allocok .GT.0 ) THEN 3533 INFO(1)=-13 3534 INFO(2)=NZ_THIS_BLOCK 3535 GOTO 50 3536 ENDIF 3537 IRHS_SPARSE_COPY_ALLOCATED=.TRUE. 3538 NB_BYTES = NB_BYTES + 3539 & int(NZ_THIS_BLOCK,8)*(K34_8+K34_8) 3540 & + K34_8 3541 NB_BYTES_MAX = max(NB_BYTES_MAX,NB_BYTES) 3542 IF (id%MYID.eq.MASTER) THEN 3543 ! compute IRHS_PTR and IRHS_SPARSE_COPY 3544 II = 1 3545 DO I = IBEG_GLOB_DEF, IEND_GLOB_DEF 3546 IRHS_PTR_COPY(I-IBEG_GLOB_DEF+1) = I 3547 IF (INTERLEAVE_PAR .AND.(id%NSLAVES .GT. 1) ) THEN 3548 IRHS_SPARSE_COPY(II) = PERM_PIV_LIST(I) 3549 ELSE 3550 IRHS_SPARSE_COPY(II) = id%PIVNUL_LIST(I) 3551 ENDIF 3552 II = II +1 3553 ENDDO 3554 IRHS_PTR_COPY(NZ_THIS_BLOCK+1) = NZ_THIS_BLOCK+1 3555 ENDIF 3556C 3557C ===================== ERROR handling and propagation ================ 3558 50 CONTINUE 3559 CALL MUMPS_PROPINFO( ICNTL(1), INFO(1), 3560 & id%COMM,id%MYID) 3561 IF (INFO(1) .LT.0 ) GOTO 90 3562C ====================================================================== 3563 CALL MPI_BCAST(IRHS_SPARSE_COPY(1), 3564 & NZ_THIS_BLOCK, 3565 & MPI_INTEGER, 3566 & MASTER, id%COMM,IERR) 3567 CALL MPI_BCAST(IRHS_PTR_COPY(1), 3568 & NZ_THIS_BLOCK+1, 3569 & MPI_INTEGER, 3570 & MASTER, id%COMM,IERR) 3571C End IF Exploit Sparsity 3572 ENDIF 3573c 3574C Initialize RHSCOMP to 0 ! to be suppressed 3575#if defined(RHSCOMP_BYROWS) 3576 id%RHSCOMP(1:NBRHS_EFF*LD_RHSCOMP)=ZERO 3577#else 3578 DO K=1, NBRHS_EFF 3579 KDEC = int(K-1,8) * int(LD_RHSCOMP,8) 3580 id%RHSCOMP(KDEC+1_8:KDEC+int(LD_RHSCOMP,8))=ZERO 3581 END DO 3582#endif 3583C Loop over the columns. 3584C Note that if ( KEEP(220)+KEEP(109)-1 < IBEG_GLOB_DEF 3585C .OR. KEEP(220) > IEND_GLOB_DEF ) then we do not enter 3586C the loop. 3587C Note that local processor has indices 3588C KEEP(220):KEEP(220)+KEEP(109)-1 3589 IF ((KEEP(235).NE.0) .AND. INTERLEAVE_PAR) THEN 3590C When the PIVNUL_LIST has been permuted (in PERM_PIV_LIST) 3591C then to exploit sparsity RHSCOMP need be initialized with 3592c some care; taking into acount the processor localisation 3593C of the indices of the null pivots. 3594 DO I=IBEG_GLOB_DEF,IEND_GLOB_DEF 3595C Local processor is concerned by I-th column of 3596C global right-hand side. 3597 IF (id%MYID_NODES .EQ. MAP_PIVNUL_LIST(I) ) THEN 3598 JJ= id%POSINRHSCOMP_ROW(PERM_PIV_LIST(I)) 3599 IF (JJ.GT.LD_RHSCOMP) THEN 3600 WRITE(6,*) ' Internal Error 10 JJ, LD_RHSCOMP=', 3601 & JJ, LD_RHSCOMP 3602 ENDIF 3603 IF (JJ.GT.0) THEN 3604 IF (KEEP(50).EQ.0) THEN 3605C unsymmetric : always set to fixation used during facto 3606C because during factorization we aimed at preserving the 3607C sign of the diagonal element, sign here may be different 3608C from sign of corresponding diagonal element (not critical) 3609#if defined(RHSCOMP_BYROWS) 3610 id%RHSCOMP(IBEG_RHSCOMP + 3611 & int(I-IBEG_GLOB_DEF,8) + 3612 & int(JJ-1,8)* int(NBRHS_EFF,8)) = 3613#else 3614 id%RHSCOMP(IBEG_RHSCOMP + 3615 & int(I-IBEG_GLOB_DEF,8)*int(LD_RHSCOMP,8) + 3616 & int(JJ-1,8)) = 3617#endif 3618 & cmplx(abs(id%DKEEP(2)),kind=kind(id%RHSCOMP)) 3619 ELSE 3620#if defined(RHSCOMP_BYROWS) 3621 id%RHSCOMP(IBEG_RHSCOMP + 3622 & int(I-IBEG_GLOB_DEF,8) + 3623 & int(JJ-1,8) *int(NBRHS_EFF,8)) = ONE 3624#else 3625 id%RHSCOMP(IBEG_RHSCOMP+ 3626 & int(I-IBEG_GLOB_DEF,8)*int(LD_RHSCOMP,8) + 3627 & int(JJ-1,8)) = ONE 3628#endif 3629 ENDIF 3630 ENDIF 3631 ENDIF 3632 ENDDO 3633 ELSE 3634C 3635C Computation of null space and computation of backward 3636C step incompatible, do one or the other. 3637 DO I=max(IBEG_GLOB_DEF,KEEP(220)), 3638 & min(IEND_GLOB_DEF,KEEP(220)+KEEP(109)-1) 3639C Local processor is concerned by I-th column of 3640C global right-hand side. 3641 JJ= id%POSINRHSCOMP_ROW(id%PIVNUL_LIST(I-KEEP(220)+1)) 3642 IF (JJ.GT.0) THEN 3643 IF (KEEP(50).EQ.0) THEN 3644 ! unsymmetric : always set to fixation 3645#if defined(RHSCOMP_BYROWS) 3646 id%RHSCOMP( IBEG_RHSCOMP+ 3647 & int(I-IBEG_GLOB_DEF,8) + 3648 & int(JJ-1,8)*int(NBRHS_EFF,8) ) = 3649#else 3650 id%RHSCOMP( IBEG_RHSCOMP+ 3651 & int(I-IBEG_GLOB_DEF,8)*int(LD_RHSCOMP,8) + 3652 & int(JJ-1,8) ) = 3653#endif 3654 & cmplx(id%DKEEP(2),kind=kind(id%RHSCOMP)) 3655 ELSE 3656 ! Symmetric: always set to one 3657#if defined(RHSCOMP_BYROWS) 3658 id%RHSCOMP( IBEG_RHSCOMP+int(I-IBEG_GLOB_DEF,8) + 3659 & int(JJ-1,8)*int(NBRHS_EFF,8) )= 3660#else 3661 id%RHSCOMP( IBEG_RHSCOMP+ 3662 & int(I-IBEG_GLOB_DEF,8)*int(LD_RHSCOMP,8)+ 3663 & int(JJ-1,8) )= 3664#endif 3665 & ONE 3666 ENDIF 3667 ENDIF 3668 ENDDO 3669 ENDIF ! exploit sparsity 3670 IF ( KEEP(17).NE.0 .AND. 3671 & id%MYID_NODES.EQ.MASTER_ROOT) THEN 3672C --------------------------- 3673C Deficiency of the root node 3674C Find range relative to root 3675C --------------------------- 3676C Among IBEG_GLOB_DEF:IEND_GLOB_DEF, find 3677C intersection with KEEP(112)+1:KEEP(112)+KEEP(17) 3678 IBEG_ROOT_DEF = max(IBEG_GLOB_DEF,KEEP(112)+1) 3679 IEND_ROOT_DEF = min(IEND_GLOB_DEF,KEEP(112)+KEEP(17)) 3680C First column of right-hand side that must 3681C be passed to ZMUMPS_SEQ_SOLVE_ROOT_RR is: 3682 IROOT_DEF_RHS_COL1 = IBEG_ROOT_DEF-IBEG_GLOB_DEF + 1 3683C We look for indices relatively to the root node, 3684C substract number of null pivots outside root node 3685 IBEG_ROOT_DEF = IBEG_ROOT_DEF-KEEP(112) 3686 IEND_ROOT_DEF = IEND_ROOT_DEF-KEEP(112) 3687C Note that if IBEG_ROOT_DEF > IEND_ROOT_DEF, then this 3688C means that nothing must be done on the root node 3689C for this set of right-hand sides. 3690 ELSE 3691 IBEG_ROOT_DEF = -90999 3692 IEND_ROOT_DEF = -95999 3693 IROOT_DEF_RHS_COL1= 1 3694 ENDIF 3695 ELSE ! End of null space (test on KEEP(111)) 3696C case of Fwd in facto 3697C id%RHSCOMP need not be initialized. It will be set on the fly 3698C to zero for normal fully summed variables of the fronts and 3699C to -1 on the roots for the id%N+KEEP(253) variables added 3700C to the roots. 3701 ENDIF ! End of null space (test on KEEP(111)) 3702 ENDIF ! I am slave 3703 IF(id%MYID.EQ.MASTER) THEN 3704 TIMESCATTER2=MPI_WTIME()-TIMESCATTER1+TIMESCATTER2 3705 ENDIF 3706C ------------------------------------------- 3707C Reserve space at the end of WORK_WCB on the 3708C master of the root node. It will be used to 3709C store the reduced RHS. 3710C ------------------------------------------- 3711 IF ( I_AM_SLAVE ) THEN 3712 LWCB8_SOL_C = LWCB8 3713 IF ( id%MYID_NODES .EQ. MASTER_ROOT ) THEN 3714C This is a special root (otherwise MASTER_ROOT < 0) 3715 IF ( associated(id%root%RHS_CNTR_MASTER_ROOT) ) THEN 3716C RHS_CNTR_MASTER_ROOT may have been allocated 3717C during the factorization phase. 3718 PTR_RHS_ROOT => id%root%RHS_CNTR_MASTER_ROOT 3719# if defined(MUMPS_F2003) 3720 LPTR_RHS_ROOT = size(id%root%RHS_CNTR_MASTER_ROOT,kind=8) 3721# else 3722 LPTR_RHS_ROOT = int(size(id%root%RHS_CNTR_MASTER_ROOT),8) 3723# endif 3724 ELSE 3725C Otherwise, we use workspace in WCB 3726 LPTR_RHS_ROOT = int(NBRHS_EFF,8) * int(SIZE_ROOT,8) 3727 IPT_RHS_ROOT = LWCB8 - LPTR_RHS_ROOT + 1_8 3728 PTR_RHS_ROOT => WORK_WCB(IPT_RHS_ROOT:LWCB8) 3729 LWCB8_SOL_C = LWCB8_SOL_C - LPTR_RHS_ROOT 3730 ENDIF 3731 ELSE 3732 LPTR_RHS_ROOT = 1_8 3733 IPT_RHS_ROOT = LWCB8 ! Will be passed, but not accessed 3734 PTR_RHS_ROOT => WORK_WCB(IPT_RHS_ROOT:LWCB8) 3735 LWCB8_SOL_C = LWCB8_SOL_C - LPTR_RHS_ROOT 3736 ENDIF 3737 ENDIF 3738 IF (KEEP(221) .EQ. 2 ) THEN 3739C Copy/send REDRHS in PTR_RHS_ROOT 3740C (column by column if leading dimension LD_REDRHS 3741C of REDRHS is not equal to SIZE_ROOT). 3742C REDRHS was provided on the host 3743 IF ( ( id%MYID .EQ. MASTER_ROOT_IN_COMM ) .AND. 3744 & ( id%MYID .EQ. MASTER ) ) THEN 3745C -- Same proc : copy is possible: 3746 II = 0 3747 DO K=1, NBRHS_EFF 3748 KDEC = IBEG_REDRHS+int(K-1,8)*int(LD_REDRHS,8)-1_8 3749 DO I = 1, SIZE_ROOT 3750 PTR_RHS_ROOT(II+I) = id%REDRHS(KDEC+I) 3751 ENDDO 3752 II = II+SIZE_ROOT 3753 ENDDO 3754 ELSE 3755C -- send REDRHS 3756 IF ( id%MYID .EQ. MASTER) THEN 3757C -- send to MASTER_ROOT_IN_COMM using COMM communicator 3758C assert: id%KEEP(116).EQ.SIZE_ROOT 3759 IF (LD_REDRHS.EQ.SIZE_ROOT) THEN 3760C -- One send 3761 KDEC = IBEG_REDRHS 3762 CALL MPI_SEND(id%REDRHS(KDEC), 3763 & SIZE_ROOT*NBRHS_EFF, 3764 & MPI_DOUBLE_COMPLEX, 3765 & MASTER_ROOT_IN_COMM, 0, id%COMM,IERR) 3766 ELSE 3767C -- NBRHS_EFF sends 3768 DO K=1, NBRHS_EFF 3769 KDEC = IBEG_REDRHS+int(K-1,8)*int(LD_REDRHS,8) 3770 CALL MPI_SEND(id%REDRHS(KDEC),SIZE_ROOT, 3771 & MPI_DOUBLE_COMPLEX, 3772 & MASTER_ROOT_IN_COMM, 0, id%COMM,IERR) 3773 ENDDO 3774 ENDIF 3775 ELSE IF ( id%MYID .EQ. MASTER_ROOT_IN_COMM ) THEN 3776C -- receive from MASTER 3777 II = 1 3778 IF (LD_REDRHS.EQ.SIZE_ROOT) THEN 3779C -- receive all in on shot 3780 CALL MPI_RECV(PTR_RHS_ROOT(II), 3781 & SIZE_ROOT*NBRHS_EFF, 3782 & MPI_DOUBLE_COMPLEX, 3783 & MASTER, 0, id%COMM,STATUS,IERR) 3784 ELSE 3785 DO K=1, NBRHS_EFF 3786 CALL MPI_RECV(PTR_RHS_ROOT(II),SIZE_ROOT, 3787 & MPI_DOUBLE_COMPLEX, 3788 & MASTER, 0, id%COMM,STATUS,IERR) 3789 II = II + SIZE_ROOT 3790 ENDDO 3791 ENDIF 3792 ENDIF 3793C -- other procs are not concerned 3794 ENDIF 3795 ENDIF 3796 TIMEC1=MPI_WTIME() 3797 IF ( I_AM_SLAVE ) THEN 3798 LIW_PASSED = max( LIW, 1 ) 3799 LA_PASSED = max( LA, 1_8 ) 3800C 3801 IF ((id%KEEP(235).EQ.0).and.(id%KEEP(237).EQ.0) ) THEN 3802C 3803C --- Normal case : we do not exploit sparsity of the RHS 3804C 3805 FROM_PP = .FALSE. 3806 NBSPARSE_LOC = (DO_NBSPARSE.AND.NBRHS_EFF.GT.1) 3807 PRUNED_SIZE_LOADED = 0_8 ! From ZMUMPS_SOL_ES module 3808 CALL ZMUMPS_SOL_C(id%root, id%N, id%S(1), LA_PASSED, 3809 & IS(1), LIW_PASSED, WORK_WCB(1), LWCB8_SOL_C, IWCB, LIWCB, 3810 & NBRHS_EFF, id%NA(1),id%LNA,id%NE_STEPS(1), SRW3, MTYPE, 3811 & ICNTL(1), FROM_PP, id%STEP(1), id%FRERE_STEPS(1), 3812 & id%DAD_STEPS(1), id%FILS(1), id%PTLUST_S(1), id%PTRFAC(1), 3813 & IWK_SOLVE, LIWK_SOLVE, PTRACB, LIWK_PTRACB, 3814 & id%PROCNODE_STEPS(1), 3815 & id%NSLAVES, INFO(1), KEEP(1), KEEP8(1), id%DKEEP(1), 3816 & id%COMM_NODES, id%MYID, id%MYID_NODES, 3817 & id%BUFR(1), id%LBUFR, id%LBUFR_BYTES, 3818 & id%ISTEP_TO_INIV2(1), id%TAB_POS_IN_PERE(1,1), 3819 & IBEG_ROOT_DEF, IEND_ROOT_DEF, IROOT_DEF_RHS_COL1, 3820 & PTR_RHS_ROOT(1), LPTR_RHS_ROOT, SIZE_ROOT, MASTER_ROOT, 3821 & id%RHSCOMP(IBEG_RHSCOMP), LD_RHSCOMP, 3822 & id%POSINRHSCOMP_ROW(1), id%POSINRHSCOMP_COL(1) 3823 & , 1 , 1 , 1 , 1 3824 & , IDUMMY, 1, JDUMMY, KDUMMY, 1, LDUMMY, 1, MDUMMY 3825 & , 1 , 1 , NBSPARSE_LOC, PTR_RHS_BOUNDS(1), LPTR_RHS_BOUNDS 3826 & ) 3827 ELSE 3828C Exploit sparsity of the RHS (all cases) 3829C Remark that JBEG_RHS is already initialized 3830C 3831 FROM_PP = .FALSE. 3832 NBSPARSE_LOC = (DO_NBSPARSE.AND.NBRHS_EFF.GT.1) 3833 CALL ZMUMPS_SOL_C(id%root, id%N, id%S(1), LA_PASSED,IS(1), 3834 & LIW_PASSED, WORK_WCB(1), LWCB8_SOL_C, IWCB, LIWCB, NBRHS_EFF, 3835 & id%NA(1),id%LNA, id%NE_STEPS(1), SRW3, MTYPE, ICNTL(1), 3836 & FROM_PP, id%STEP(1), id%FRERE_STEPS(1), id%DAD_STEPS(1), 3837 & id%FILS(1), id%PTLUST_S(1), id%PTRFAC(1), 3838 & IWK_SOLVE, LIWK_SOLVE, PTRACB, LIWK_PTRACB, 3839 & id%PROCNODE_STEPS(1),id%NSLAVES,INFO(1),KEEP(1), 3840 & KEEP8(1), id%DKEEP(1), id%COMM_NODES, id%MYID, id%MYID_NODES, 3841 & id%BUFR(1), id%LBUFR, id%LBUFR_BYTES, id%ISTEP_TO_INIV2(1), 3842 & id%TAB_POS_IN_PERE(1,1), IBEG_ROOT_DEF, IEND_ROOT_DEF, 3843 & IROOT_DEF_RHS_COL1, PTR_RHS_ROOT(1), LPTR_RHS_ROOT, SIZE_ROOT, 3844 & MASTER_ROOT, id%RHSCOMP(IBEG_RHSCOMP), LD_RHSCOMP, 3845 & id%POSINRHSCOMP_ROW(1), id%POSINRHSCOMP_COL(1), 3846 & NZ_THIS_BLOCK, NBCOL_INBLOC, id%NRHS, JBEG_RHS , 3847 & id%Step2node(1), id%KEEP(28), IRHS_SPARSE_COPY(1), 3848 & IRHS_PTR_COPY(1), size(PERM_RHS), PERM_RHS, size(UNS_PERM_INV) 3849C size 1 if not used 3850 & , UNS_PERM_INV, NB_FS_IN_RHSCOMP_F, NB_FS_IN_RHSCOMP_TOT 3851 & , NBSPARSE_LOC, PTR_RHS_BOUNDS(1), LPTR_RHS_BOUNDS 3852 & ) 3853 ENDIF ! end of exploit sparsity (pruning nodes of the tree) 3854 END IF 3855C ----------------- 3856C End of slave code 3857C ----------------- 3858C 3859 TIMEC2=MPI_WTIME()-TIMEC1+TIMEC2 3860C 3861C Propagate errors 3862 CALL MUMPS_PROPINFO( ICNTL(1), INFO(1), 3863 & id%COMM,id%MYID) 3864C 3865C Change error code. 3866 IF (INFO(1).eq.-2) then 3867 INFO(1)=-11 3868 IF (LPOK) 3869 & write(LP,*) 3870 & ' WARNING : -11 error code obtained in solve' 3871 END IF 3872 IF (INFO(1).eq.-3) then 3873 INFO(1)=-14 3874 IF (LPOK) 3875 & write(LP,*) 3876 & ' WARNING : -14 error code obtained in solve' 3877 END IF 3878C 3879C Return in case of error. 3880 IF (INFO(1).LT.0) GO TO 90 3881C 3882C ====================================================== 3883C ONLY FORWARD was performed (case of reduced RHS with Schur 3884C option during factorisation) 3885C ====================================================== 3886 IF ( KEEP(221) .EQ. 1 ) THEN ! === Begin OF REDUCED RHS ====== 3887C -------------------------------------- 3888C Send (or copy) reduced RHS from PTR_RHS_ROOT located on 3889C MASTER_ROOT_IN_COMM to REDRHS located on MASTER (host node). 3890C (column by column if leading dimension LD_REDRHS 3891C of REDRHS is not equal to SIZE_ROOT) 3892C -------------------------------------- 3893 IF ( ( id%MYID .EQ. MASTER_ROOT_IN_COMM ) .AND. 3894 & ( id%MYID .EQ. MASTER ) ) THEN 3895C -- same proc --> copy 3896 II = 0 3897 DO K=1, NBRHS_EFF 3898 KDEC = IBEG_REDRHS+int(K-1,8)*int(LD_REDRHS,8) - 1_8 3899 DO I = 1, SIZE_ROOT 3900 id%REDRHS(KDEC+I) = PTR_RHS_ROOT(II+I) 3901 ENDDO 3902 II = II+SIZE_ROOT 3903 ENDDO 3904 ELSE 3905C -- recv in REDRHS 3906 IF ( id%MYID .EQ. MASTER ) THEN 3907C -- recv from MASTER_ROOT_IN_COMM 3908 IF (LD_REDRHS.EQ.SIZE_ROOT) THEN 3909C -- One message to receive 3910 KDEC = IBEG_REDRHS 3911 CALL MPI_RECV(id%REDRHS(KDEC), 3912 & SIZE_ROOT*NBRHS_EFF, 3913 & MPI_DOUBLE_COMPLEX, 3914 & MASTER_ROOT_IN_COMM, 0, id%COMM, 3915 & STATUS,IERR) 3916 ELSE 3917C -- NBRHS_EFF receives 3918 DO K=1, NBRHS_EFF 3919 KDEC = IBEG_REDRHS+int(K-1,8)*int(LD_REDRHS,8) 3920 CALL MPI_RECV(id%REDRHS(KDEC),SIZE_ROOT, 3921 & MPI_DOUBLE_COMPLEX, 3922 & MASTER_ROOT_IN_COMM, 0, id%COMM, 3923 & STATUS,IERR) 3924 ENDDO 3925 ENDIF 3926 ELSE IF ( id%MYID .EQ. MASTER_ROOT_IN_COMM ) THEN 3927C -- send to MASTER 3928 II = 1 3929 IF (LD_REDRHS.EQ.SIZE_ROOT) THEN 3930C -- send all in on shot 3931 CALL MPI_SEND(PTR_RHS_ROOT(II), 3932 & SIZE_ROOT*NBRHS_EFF, 3933 & MPI_DOUBLE_COMPLEX, 3934 & MASTER, 0, id%COMM,IERR) 3935 ELSE 3936 DO K=1, NBRHS_EFF 3937 CALL MPI_SEND(PTR_RHS_ROOT(II),SIZE_ROOT, 3938 & MPI_DOUBLE_COMPLEX, 3939 & MASTER, 0, id%COMM,IERR) 3940 II = II + SIZE_ROOT 3941 ENDDO 3942 ENDIF 3943 ENDIF 3944C -- other procs are not concerned 3945 ENDIF 3946 ENDIF ! ====== END OF REDUCED RHS (Fwd only performed) ====== 3947C ======================================================= 3948C BACKWARD was PERFORMED 3949C Postprocess solution that is distributed 3950 IF ( KEEP(221) .NE. 1 ) THEN ! BACKWARD was PERFORMED 3951C -- KEEP(221).NE.1 => we are sure that backward has been performed 3952 IF (ICNTL21 == 0) THEN ! CENTRALIZED SOLUTION 3953C ======================================================== 3954C GATHER SOLUTION computed during bwd 3955C Each proc holds the pieces of solution corresponding 3956C to all fully summed variables mapped on that processor 3957C (i.e. corresponding to master nodes mapped on that proc) 3958C In case of A-1 we gather directly in RHS_SPARSE 3959C the distributed solution. 3960C Scaling is done in all case on the fly of the reception 3961C Note that when only FORWARD has been performed 3962C RSH_MUMPS holds the solution computed during forward step 3963C (ZMUMPS_SOL_R) 3964C there is no need to copy back in RSH_MUMPS the solution 3965C ======================================================== 3966C centralized solution 3967 IF (KEEP(237).EQ.0) THEN 3968C CWORK not needed for AM1 3969#if defined(RHSCOMP_BYROWS) 3970 LCWORK = NBRHS_EFF 3971#else 3972 LCWORK = max(max(KEEP(247),KEEP(246)),1) 3973#endif 3974 ALLOCATE( CWORK(LCWORK), stat=allocok ) 3975 IF (allocok > 0) THEN 3976 INFO(1)=-13 3977 INFO(2)=max(max(KEEP(247),KEEP(246)),1) 3978 ENDIF 3979 ENDIF 3980C Propagate errors 3981 CALL MUMPS_PROPINFO( ICNTL(1), INFO(1), 3982 & id%COMM,id%MYID) 3983C Return in case of error. 3984 IF (INFO(1).LT.0) GO TO 90 3985 IF ((id%MYID.NE.MASTER).OR. .NOT.LSCAL) THEN 3986 PT_SCALING => Dummy_SCAL 3987 ELSE 3988 IF (MTYPE.EQ.1) THEN 3989 PT_SCALING => id%COLSCA 3990 ELSE 3991 PT_SCALING => id%ROWSCA 3992 ENDIF 3993 ENDIF 3994 LIW_PASSED = max( LIW, 1 ) 3995 IF(id%MYID.EQ.MASTER) TIMEGATHER1=MPI_WTIME() 3996 IF ( .NOT.I_AM_SLAVE ) THEN 3997C I did not participate to computing part of the solution 3998C (id%RHSCOMP not set/allocate) : receive solution, store 3999C it and scale it. 4000 IF (KEEP(237).EQ.0) THEN 4001C We need a workspace of minimal size KEEP(247) 4002C in order to unpack pieces of the solution. 4003 CALL ZMUMPS_GATHER_SOLUTION(id%NSLAVES,id%N, 4004 & id%MYID, id%COMM, NBRHS_EFF, 4005 & MTYPE, id%RHS(1), LD_RHS, id%NRHS, JBEG_RHS, 4006 & JDUMMY, id%KEEP(1), id%KEEP8(1), 4007 & id%PROCNODE_STEPS(1), IDUMMY, 1, 4008 & id%STEP(1), id%BUFR(1), id%LBUFR, id%LBUFR_BYTES, 4009 & CWORK(1), LCWORK, 4010 & LSCAL, PT_SCALING(1), size(PT_SCALING), 4011 & C_DUMMY, 1 , 1, IDUMMY, 1, 4012 & PERM_RHS, size(PERM_RHS) ! for sparse permuted RHS 4013 & ) 4014 ELSE 4015C only gather target entries of A-1 4016 CALL ZMUMPS_GATHER_SOLUTION_AM1(id%NSLAVES,id%N, 4017 & id%MYID, id%COMM, NBRHS_EFF, 4018 & C_DUMMY, 1, 1, 4019 & id%KEEP(1), id%BUFR(1), id%LBUFR, id%LBUFR_BYTES, 4020 & LSCAL, PT_SCALING(1), size(PT_SCALING) 4021C --- A-1 related entries 4022 & ,IRHS_PTR_COPY(1), size(IRHS_PTR_COPY), 4023 & IRHS_SPARSE_COPY(1), size(IRHS_SPARSE_COPY), 4024 & RHS_SPARSE_COPY(1), size(RHS_SPARSE_COPY), 4025 & UNS_PERM_INV, size(UNS_PERM_INV), 4026 & IDUMMY, 1, 0 4027 & ) 4028 ENDIF 4029 ELSE 4030C Avoid temporary copy (IS(1)) that some old 4031C compilers would do otherwise 4032 IF (KEEP(237).EQ.0) THEN 4033 IF (id%MYID.EQ.MASTER) THEN 4034 PTR_RHS => id%RHS 4035 NCOL_RHS_loc = id%NRHS 4036 LD_RHS_loc = LD_RHS 4037 JBEG_RHS_loc = JBEG_RHS 4038 ELSE 4039 PTR_RHS => CDUMMY_TARGET 4040 NCOL_RHS_loc = 1 4041 LD_RHS_loc = 1 4042 JBEG_RHS_loc = 1 4043 ENDIF 4044 CALL ZMUMPS_GATHER_SOLUTION(id%NSLAVES,id%N, 4045 & id%MYID, id%COMM, NBRHS_EFF, MTYPE, 4046 & PTR_RHS(1), LD_RHS_loc, NCOL_RHS_loc, JBEG_RHS_loc, 4047 & id%PTLUST_S(1), id%KEEP(1), id%KEEP8(1), 4048 & id%PROCNODE_STEPS(1), IS(1), LIW_PASSED, 4049 & id%STEP(1), id%BUFR(1), id%LBUFR, id%LBUFR_BYTES, 4050 & CWORK(1), LCWORK, 4051 & LSCAL, PT_SCALING(1), size(PT_SCALING), 4052 & id%RHSCOMP(IBEG_RHSCOMP), LD_RHSCOMP, NBRHS_EFF, 4053 & id%POSINRHSCOMP_COL(1), id%N, 4054 & PERM_RHS, size(PERM_RHS) ! For sparse permuted RHS 4055 & ) 4056 ELSE ! only gather target entries of A-1 4057 CALL ZMUMPS_GATHER_SOLUTION_AM1(id%NSLAVES,id%N, 4058 & id%MYID, id%COMM, NBRHS_EFF, 4059 & id%RHSCOMP(IBEG_RHSCOMP), LD_RHSCOMP, NBRHS_EFF, 4060 & id%KEEP(1), id%BUFR(1), id%LBUFR, id%LBUFR_BYTES, 4061 & LSCAL, PT_SCALING(1), size(PT_SCALING) 4062C --- A-1 related entries 4063 & , IRHS_PTR_COPY(1), size(IRHS_PTR_COPY), 4064 & IRHS_SPARSE_COPY(1), size(IRHS_SPARSE_COPY), 4065 & RHS_SPARSE_COPY(1), size(RHS_SPARSE_COPY), 4066 & UNS_PERM_INV, size(UNS_PERM_INV), 4067 & id%POSINRHSCOMP_COL(1), id%N, NB_FS_IN_RHSCOMP_TOT 4068 & ) 4069 ENDIF 4070 ENDIF 4071 IF (id%MYID.EQ.MASTER) THEN 4072 TIMEGATHER2=MPI_WTIME()-TIMEGATHER1+TIMEGATHER2 4073 ENDIF 4074 IF (KEEP(237).EQ.0) DEALLOCATE( CWORK ) 4075 IF ( (id%MYID.EQ.MASTER).AND. (KEEP(237).NE.0) 4076 & ) THEN 4077C Copy back solution from RHS_SPARSE_COPY TO RHS_SPARSE 4078 IF (DO_PERMUTE_RHS.OR.INTERLEAVE_PAR) THEN 4079 DO J = JBEG_RHS, JBEG_RHS+NBCOL_INBLOC-1 4080 COLSIZE = id%IRHS_PTR(PERM_RHS(J)+1) - 4081 & id%IRHS_PTR(PERM_RHS(J)) 4082 IF (COLSIZE.EQ.0) CYCLE 4083 JJ = J-JBEG_RHS+1 4084c IZ - Column index in Sparse RHS 4085 DO IZ= id%IRHS_PTR(PERM_RHS(J)), 4086 & id%IRHS_PTR(PERM_RHS(J)+1)-1 4087 I = id%IRHS_SPARSE (IZ) 4088 DO IZ2 = IRHS_PTR_COPY(JJ),IRHS_PTR_COPY(JJ+1)-1 4089 IF (IRHS_SPARSE_COPY(IZ2).EQ.I) EXIT 4090 IF (IZ2.EQ.IRHS_PTR_COPY(JJ+1)-1) THEN 4091 WRITE(6,*) " Internal Error 13 in solution ", 4092 & " driver, gather " 4093 CALL MUMPS_ABORT() 4094 ENDIF 4095 ENDDO 4096 id%RHS_SPARSE(IZ) = RHS_SPARSE_COPY(IZ2) 4097 ENDDO 4098 ENDDO 4099 ELSE ! Not (DO_PERMUTE_RHS.OR.INTERLEAVE_PAR) 4100 DO J = JBEG_RHS, JBEG_RHS+NBCOL_INBLOC-1 4101 COLSIZE = id%IRHS_PTR(J+1) - id%IRHS_PTR(J) 4102 IF (COLSIZE.EQ.0) CYCLE 4103 JJ = J-JBEG_RHS+1 4104c IZ - Column index in Sparse RHS 4105 DO IZ= id%IRHS_PTR(J), id%IRHS_PTR(J+1) - 1 4106 I = id%IRHS_SPARSE (IZ) 4107 DO IZ2 = IRHS_PTR_COPY(JJ),IRHS_PTR_COPY(JJ+1)-1 4108 IF (IRHS_SPARSE_COPY(IZ2).EQ.I) EXIT 4109 IF (IZ2.EQ.IRHS_PTR_COPY(JJ+1)-1) THEN 4110 WRITE(6,*) " Internal Error 14 in solution", 4111 & " driver, gather " 4112 CALL MUMPS_ABORT() 4113 ENDIF 4114 ENDDO 4115 id%RHS_SPARSE(IZ) = RHS_SPARSE_COPY(IZ2) 4116 ENDDO 4117 ENDDO 4118 ENDIF ! End DO_PERMUTE_RHS.OR.INTERLEAVE_PAR 4119 ENDIF ! end A-1 on master 4120C 4121C -- END of backward was performed with centralized solution 4122 ELSE ! (KEEP(221).NE.1) .AND.(ICNTL21.NE.0)) 4123C 4124C BEGIN of backward performed with distributed solution 4125C time local copy + scaling 4126 TIMECOPYSCALE1=MPI_WTIME() 4127C The non working host should not do this: 4128 IF ( I_AM_SLAVE ) THEN 4129 LIW_PASSED = max( LIW, 1 ) 4130C Only called if more than 1 pivot 4131C was eliminated by the processor. 4132C Note that LSOL_loc >= KEEP(89) 4133 IF ( KEEP(89) .GT. 0 ) THEN 4134 CALL ZMUMPS_DISTRIBUTED_SOLUTION(id%NSLAVES, 4135 & id%N,id%MYID_NODES, 4136 & MTYPE, id%RHSCOMP(IBEG_RHSCOMP), LD_RHSCOMP, 4137 & NBRHS_EFF, id%POSINRHSCOMP_COL(1), 4138 & id%ISOL_loc(1), id%SOL_loc(1), id%NRHS, 4139 & JBEG_RHS-NB_RHSSKIPPED, id%LSOL_loc, 4140 & id%PTLUST_S(1), id%PROCNODE_STEPS(1), 4141 & id%KEEP(1),id%KEEP8(1), 4142 & IS(1), LIW_PASSED, 4143 & id%STEP(1), scaling_data, LSCAL, NB_RHSSKIPPED, 4144 & PERM_RHS, size(PERM_RHS) ) ! For permuted sparse RHS 4145 ENDIF 4146 ENDIF 4147 TIMECOPYSCALE2=MPI_WTIME()-TIMECOPYSCALE1+TIMECOPYSCALE2 4148 ENDIF 4149C === BACKWARD was PERFORMED WITH DISTRIBUTED SOLUTION === 4150C ======================================================== 4151 ENDIF ! ==== END of BACKWARD was PERFORMED (KEEP(221).NE.1) 4152C note that the main DO-loop on blocks is not ended yet 4153C 4154C ============================================ 4155C BEGIN 4156C 4157C ITERATIVE REFINEMENT AND/OR ERROR ANALYSIS 4158C 4159C ============================================ 4160 IF ( ICNTL10 > 0 .AND. NBRHS_EFF > 1 ) THEN 4161C 4162C ---------------------------------- 4163C Multiple RHS: apply a fixed number 4164C of iterative refinement steps 4165C ---------------------------------- 4166C DO I = 1, ICNTL10 4167 write(6,*) ' Internal ERROR 15 in sol_driver ' 4168C Compute residual: Y <- SAVERHS - A * RHS 4169C Solve RHS <- A^-1 Y, Y modified 4170C Assemble in RHS(REDUCE) 4171C RHS <- RHS + Y 4172C END DO 4173 END IF 4174 IF (POSTPros) THEN 4175C 4176C SAVERHS holds the original right hand side 4177C Sparse rhs are saved in SAVERHS as dense rhs 4178C 4179C * * * * * * * * * * * * * * * * * * * * * * * * * * * * * 4180C 4181C Start iterative refinements. The master is managing the 4182C organisation of work, but slaves are used to solve systems of 4183C equations and, in case of distributed matrix, perform 4184C matrix-vector products. It is more complicated to do this with 4185C the SPMD version than it was with the master/slave approach. 4186C 4187C * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * 4188c IF ( PROK .AND. ICNTL10 .NE. 0 ) WRITE( MP, 270 ) 4189 IF ( PROKG .AND. ICNTL10 .NE. 0 ) WRITE( MPG, 270 ) 4190C Initializations and allocations 4191 NITREF = abs(ICNTL10) 4192 ALLOCATE(R_Y(id%N), stat = allocok) 4193 IF ( allocok .GT. 0 ) THEN 4194 INFO(1)=-13 4195 INFO(2)=id%N 4196 GOTO 777 4197 ENDIF 4198 NB_BYTES = NB_BYTES + int(id%N,8)*K16_8 4199 ALLOCATE(C_Y(id%N), stat = allocok) 4200 IF ( allocok .GT. 0 ) THEN 4201 INFO(1)=-13 4202 INFO(2)=id%N 4203 GOTO 777 4204 ENDIF 4205 NB_BYTES = NB_BYTES + int(id%N,8)*K35_8 4206 IF ( id%MYID .EQ. MASTER ) THEN 4207 ALLOCATE( IW1( 2 * id%N ),stat = allocok ) 4208 IF ( allocok .GT. 0 ) THEN 4209 INFO(1)=-13 4210 INFO(2)=2 * id%N 4211 GOTO 777 4212 ENDIF 4213 NB_BYTES = NB_BYTES + int(2*id%N,8)*K34_8 4214 ALLOCATE( C_W(id%N), stat = allocok ) 4215 IF ( allocok .GT. 0 ) THEN 4216 INFO(1)=-13 4217 INFO(2)=id%N 4218 GOTO 777 4219 ENDIF 4220 NB_BYTES = NB_BYTES + int(id%N,8)*K35_8 4221 ALLOCATE( R_W(2*id%N), stat = allocok ) 4222 IF ( allocok .GT. 0 ) THEN 4223 INFO(1)=-13 4224 INFO(2)=id%N 4225 GOTO 777 4226 ENDIF 4227 NB_BYTES = NB_BYTES + int(2*id%N,8)*K16_8 4228 IF ( PROKG .AND. ICNTL10 .GT. 0 ) 4229 & WRITE( MPG, 240) 'MAXIMUM NUMBER OF STEPS =', NITREF 4230C end allocations on Master 4231 END IF 4232 ALLOCATE(C_LOCWK54(id%N),stat = allocok) 4233 IF ( allocok .GT. 0 ) THEN 4234 INFO(1)=-13 4235 INFO(2)=id%N 4236 GOTO 777 4237 ENDIF 4238 NB_BYTES = NB_BYTES + int(id%N,8)*K35_8 4239 ALLOCATE(R_LOCWK54(id%N),stat = allocok) 4240 IF ( allocok .GT. 0 ) THEN 4241 INFO(1)=-13 4242 INFO(2)=id%N 4243 GOTO 777 4244 ENDIF 4245 NB_BYTES = NB_BYTES + int(id%N,8)*K16_8 4246 KASE = 0 4247C Synchro point with broadcast of errors 4248 777 CONTINUE 4249 NB_BYTES_MAX = max(NB_BYTES_MAX,NB_BYTES) 4250 CALL MUMPS_PROPINFO( ICNTL(1), INFO(1), 4251 & id%COMM,id%MYID) 4252 IF ( INFO(1) .LT. 0 ) GOTO 90 4253C TIMEEA needed if EA and IR with stopping criterium 4254C and IR with fixed n.of steps. 4255 TIMEEA = 0.0D0 4256C TIMEEA1 needed if EA and IR with fixed n.of steps 4257 TIMEEA1 = 0.0D0 4258 CALL MUMPS_SECDEB(TIMEIT) 4259C ------------------------- 4260C 4261C RHSOL holds the initial guess for the solution 4262C We start the loop on the Iterative refinement procedure 4263C 4264C 4265C 4266C |- IRefin. L O O P -| 4267C V V 4268C 4269C ========================================================= 4270C Computation of the infinity norm of A 4271C ========================================================= 4272 IF ((ICNTL11.GT.0).OR.(ICNTL10.GT.0)) THEN 4273C We don't get through these lines if ICNTL10<=0 AND ICNTL11<=0 4274 IF ( KEEP(54) .eq. 0 ) THEN 4275C ------------------ 4276C Centralized matrix 4277C ------------------ 4278 IF ( id%MYID .eq. MASTER ) THEN 4279C ----------------------------------------- 4280C Call ZMUMPS_SOL_X outside, if needed, 4281C in order to compute w(i,2)=sum|Aij|,j=1:n 4282C in vector R_W(id%N+i) 4283C ----------------------------------------- 4284 IF (KEEP(55).NE.0) THEN 4285C unassembled matrix and norm of row required 4286 CALL ZMUMPS_SOL_X_ELT(MTYPE, id%N, 4287 & id%NELT, id%ELTPTR(1), 4288 & id%LELTVAR, id%ELTVAR(1), 4289 & id%KEEP8(30), id%A_ELT(1), 4290 & R_W(id%N+1), KEEP(1),KEEP8(1) ) 4291 ELSE 4292C assembled matrix 4293 IF ( MTYPE .eq. 1 ) THEN 4294 CALL ZMUMPS_SOL_X 4295 & ( id%A(1), id%KEEP8(28), id%N, id%IRN(1), id%JCN(1), 4296 & R_W(id%N+1), KEEP(1),KEEP8(1)) 4297 ELSE 4298 CALL ZMUMPS_SOL_X 4299 & ( id%A(1), id%KEEP8(28), id%N, id%JCN(1), id%IRN(1), 4300 & R_W(id%N+1), KEEP(1),KEEP8(1)) 4301 END IF 4302 ENDIF 4303 ENDIF 4304 ELSE 4305C --------------------- 4306C Matrix is distributed 4307C --------------------- 4308 IF ( I_AM_SLAVE .and. 4309 & id%KEEP8(29) .NE. 0_8 ) THEN 4310 IF ( MTYPE .eq. 1 ) THEN 4311 CALL ZMUMPS_SOL_X(id%A_loc(1), 4312 & id%KEEP8(29), id%N, 4313 & id%IRN_loc(1), id%JCN_loc(1), 4314 & R_LOCWK54, id%KEEP(1),id%KEEP8(1) ) 4315 ELSE 4316 CALL ZMUMPS_SOL_X(id%A_loc(1), 4317 & id%KEEP8(29), id%N, 4318 & id%JCN_loc(1), id%IRN_loc(1), 4319 & R_LOCWK54, id%KEEP(1),id%KEEP8(1) ) 4320 END IF 4321 ELSE 4322 R_LOCWK54 = RZERO 4323 END IF 4324C ------------------------- 4325C Assemble result on master 4326C ------------------------- 4327 IF ( id%MYID .eq. MASTER ) THEN 4328 CALL MPI_REDUCE( R_LOCWK54, R_W( id%N + 1 ), 4329 & id%N, MPI_DOUBLE_PRECISION, 4330 & MPI_SUM,MASTER,id%COMM, IERR) 4331 ELSE 4332 CALL MPI_REDUCE( R_LOCWK54, R_DUMMY, 4333 & id%N, MPI_DOUBLE_PRECISION, 4334 & MPI_SUM,MASTER,id%COMM, IERR) 4335 END IF 4336C End if KEEP(54) 4337 END IF 4338C 4339 IF ( id%MYID .eq. MASTER ) THEN 4340C R_W is available on the master process only 4341 RINFOG(4) = dble(ZERO) 4342 DO I = 1, id%N 4343 RINFOG(4) = max(R_W( id%N +I), RINFOG(4)) 4344 ENDDO 4345 ENDIF 4346C end ICNTL11 =/0 v ICNTL10>0 4347 ENDIF 4348C ========================================================= 4349C END norm of A 4350C ========================================================= 4351C Initializations for the IR 4352 NOITER = 0 4353 IFLAG_IR = 0 4354 TESTConv = .FALSE. 4355C Test of convergence should be made 4356 IF (( id%MYID .eq. MASTER ).AND.(ICNTL10.GT.0)) THEN 4357 TESTConv = .TRUE. 4358 ARRET = CNTL(2) 4359 IF (ARRET .LT. 0.0D0) THEN 4360 ARRET = sqrt(epsilon(0.0D0)) 4361 END IF 4362 ENDIF 4363C ========================================================= 4364C Starting IR 4365 DO 22 IRStep = 1, NITREF +1 4366C ========================================================= 4367C 4368C ========================================================= 4369C Refine the solution starting from the second step of do loop 4370C ========================================================= 4371 IF (( id%MYID .eq. MASTER ).AND.(IRStep.GT.1)) THEN 4372 NOITER = NOITER + 1 4373 DO I = 1, id%N 4374 id%RHS(IBEG+I-1) = id%RHS(IBEG+I-1) + C_Y(I) 4375 ENDDO 4376 ENDIF 4377C =========================================== 4378C Computation of the RESIDUAL and of |A||x| 4379C =========================================== 4380 IF ( KEEP(54) .eq. 0 ) THEN 4381 IF ( id%MYID .eq. MASTER ) THEN 4382 IF (KEEP(55).NE.0) THEN 4383C input matrix by element 4384 CALL ZMUMPS_ELTYD( MTYPE, id%N, 4385 & id%NELT, id%ELTPTR(1), id%LELTVAR, 4386 & id%ELTVAR(1), id%KEEP8(30), id%A_ELT(1), 4387 & SAVERHS, id%RHS(IBEG), 4388 & C_Y, R_W, KEEP(50)) 4389 ELSE 4390 IF ( MTYPE .eq. 1 ) THEN 4391 CALL ZMUMPS_SOL_Y(id%A(1), id%KEEP8(28), 4392 & id%N, id%IRN(1), 4393 & id%JCN(1), SAVERHS, 4394 & id%RHS(IBEG), C_Y, R_W, KEEP(1),KEEP8(1)) 4395 ELSE 4396 CALL ZMUMPS_SOL_Y(id%A(1), id%KEEP8(28), 4397 & id%N, id%JCN(1), 4398 & id%IRN(1), SAVERHS, 4399 & id%RHS(IBEG), C_Y, R_W, KEEP(1),KEEP8(1)) 4400 ENDIF 4401 ENDIF 4402 ENDIF 4403 ELSE 4404C --------------------- 4405C Matrix is distributed 4406C --------------------- 4407 CALL MPI_BCAST( RHS_IR(IBEG), id%N, 4408 & MPI_DOUBLE_COMPLEX, MASTER, 4409 & id%COMM, IERR ) 4410C -------------------------------------- 4411C Compute Y = SAVERHS - A * RHS 4412C Y, SAVERHS defined only on master 4413C -------------------------------------- 4414 IF ( I_AM_SLAVE .and. 4415 & id%KEEP8(29) .NE. 0_8 ) THEN 4416 CALL ZMUMPS_LOC_MV8( id%N, id%KEEP8(29), 4417 & id%IRN_loc(1), id%JCN_loc(1), id%A_loc(1), 4418 & RHS_IR(IBEG), C_LOCWK54, KEEP(50), MTYPE ) 4419 ELSE 4420 C_LOCWK54 = ZERO 4421 END IF 4422 IF ( id%MYID .eq. MASTER ) THEN 4423 CALL MPI_REDUCE( C_LOCWK54, C_Y, 4424 & id%N, MPI_DOUBLE_COMPLEX, 4425 & MPI_SUM,MASTER,id%COMM, IERR) 4426C =========================== 4427 C_Y = SAVERHS - C_Y 4428C =========================== 4429 ELSE 4430 CALL MPI_REDUCE( C_LOCWK54, C_DUMMY, 4431 & id%N, MPI_DOUBLE_COMPLEX, 4432 & MPI_SUM,MASTER,id%COMM, IERR) 4433 END IF 4434C -------------------------------------- 4435C Compute 4436C * If MTYPE = 1 4437C W(i) = Sum | Aij | | RHSj | 4438C j 4439C * If MTYPE = 0 4440C W(j) = Sum | Aij | | RHSi | 4441C i 4442C R_LOCWK54 used as local array for W 4443C RHS has been broadcasted 4444C -------------------------------------- 4445 IF ( I_AM_SLAVE .and. id%KEEP8(29) .NE. 0_8 ) THEN 4446 CALL ZMUMPS_LOC_OMEGA1( id%N, id%KEEP8(29), 4447 & id%IRN_loc(1), id%JCN_loc(1), id%A_loc(1), 4448 & RHS_IR(IBEG), R_LOCWK54, KEEP(50), MTYPE ) 4449 ELSE 4450 R_LOCWK54 = RZERO 4451 END IF 4452 IF ( id%MYID .eq. MASTER ) THEN 4453 CALL MPI_REDUCE( R_LOCWK54, R_W, 4454 & id%N, MPI_DOUBLE_PRECISION, 4455 & MPI_SUM,MASTER,id%COMM, IERR) 4456 ELSE 4457 CALL MPI_REDUCE( R_LOCWK54, R_DUMMY, 4458 & id%N, MPI_DOUBLE_PRECISION, 4459 & MPI_SUM, MASTER, id%COMM, IERR) 4460 ENDIF 4461 ENDIF 4462C ===================================== 4463C END computation RESIDUAL and |A||x| 4464C ===================================== 4465 IF ( id%MYID .eq. MASTER ) THEN 4466C 4467 IF ((ICNTL11.GT.0).OR.(ICNTL10.GT.0)) THEN 4468C -------------- 4469C Error analysis and test of convergence, 4470C Compute the sparse componentwise backward error: 4471C - at each step if test of convergence of IR is 4472C requested (ICNTL(10)>0) 4473C - at step 1 and NITREF+1 if error analysis 4474C to be computed (ICNTL(11)>0) and if ICNTL(10)< 0 4475 IF (((ICNTL11.GT.0).OR.((ICNTL10.LT.0).AND. 4476 & ((IRStep.EQ.1).OR.(IRStep.EQ.NITREF+1))) 4477 & .OR.((ICNTL10.EQ.0).AND.(IRStep.EQ.1))) 4478 & .OR.(ICNTL10.GT.0)) THEN 4479C Compute w1 and w2 4480C always if ICNTL10>0 in the other case if ICNTL11>0 4481C ----------------- 4482 IF (ICNTL10.LT.0) CALL MUMPS_SECDEB(TIMEEA1) 4483 CALL ZMUMPS_SOL_OMEGA(id%N,SAVERHS, 4484 & id%RHS(IBEG), C_Y, R_W, C_W, IW1, IFLAG_IR, 4485 & RINFOG(7), NOITER, TESTConv, 4486 & MP, ARRET ) 4487 IF (ICNTL10.LT.0) THEN 4488 CALL MUMPS_SECFIN(TIMEEA1) 4489 id%DKEEP(120)=id%DKEEP(120)+TIMEEA1 4490 ENDIF 4491 ENDIF 4492 IF ((ICNTL11.GT.0).AND.( 4493 & (ICNTL10.LT.0.AND.(IRStep.EQ.1.OR.IRStep.EQ.NITREF+1)) 4494 & .OR.((ICNTL10.GE.0).AND.(IRStep.EQ.1)) 4495 & )) THEN 4496C Error analysis before iterative refinement 4497C or for last if icntl10<0 4498C ------------------------------------------ 4499 CALL MUMPS_SECDEB(TIMEEA) 4500 IF (ICNTL10.EQ.0) THEN 4501C No IR : there will be only the EA of the 1st sol. 4502 IF ( MPG .GT. 0 ) WRITE( MPG, 170 ) 4503 ELSEIF (IRStep.EQ.1) THEN 4504C IR : we print the EA of the 1st sol. 4505 IF ( MPG .GT. 0 ) WRITE( MPG, 55 ) 4506 ELSEIF ((ICNTL10.LT.0).AND.(IRStep.EQ.NITREF+1)) THEN 4507C IR with fixed n. of steps: we print the EA 4508C of the last sol. 4509 IF ( MPG .GT. 0 ) THEN 4510 WRITE( MPG, 81 ) 4511 WRITE( MPG, * ) 4512 WRITE( MPG, 141 ) 4513 & 'NUMBER OF STEPS OF ITERATIVE REFINEMENT REQUESTED =', 4514 & NOITER 4515 ENDIF 4516 ENDIF 4517 GIVSOL = .TRUE. 4518 CALL ZMUMPS_SOL_Q(MTYPE,INFO(1),id%N, 4519 & id%RHS(IBEG), 4520 & SAVERHS,R_W(id%N+1),C_Y,GIVSOL, 4521 & RINFOG(4),RINFOG(5),RINFOG(6),MPG,ICNTL(1), 4522 & KEEP(1),KEEP8(1)) 4523 IF ( MPG .GT. 0 ) THEN 4524C Error analysis before iterative refinement 4525 WRITE( MPG, 115 ) 4526 & 'RINFOG(7):COMPONENTWISE SCALED RESIDUAL(W1)=', 4527 & RINFOG(7) 4528 WRITE( MPG, 115 ) 4529 & '------(8):---------------------------- (W2)=', 4530 & RINFOG(8) 4531 END IF 4532 CALL MUMPS_SECFIN(TIMEEA) 4533 id%DKEEP(120)=id%DKEEP(120)+TIMEEA 4534C end EA of the first solution 4535 END IF 4536 END IF 4537C -------------- 4538 IF (IRStep.EQ.NITREF +1) THEN 4539C If we are at the NITREF+1 step , we have refined the 4540C solution NITREF times so we have to stop. 4541 KASE = 0 4542C If we test the convergence (ICNTL10.GT.0) and 4543C IFLAG_IR = 0 we set a warning : more than NITREF steps 4544C needed 4545 IF ((ICNTL10.GT.0).AND.(IFLAG_IR.EQ.0)) 4546 & id%INFO(1) = id%INFO(1) + 8 4547 ELSE 4548 IF (ICNTL10.GT.0) THEN 4549C ------------------- 4550C Results of the test of convergence. 4551C IFLAG_IR = 0 we should try to improve the solution 4552C = 1 the stopping criterium is satisfied 4553C = 2 the method is diverging, we go back 4554C to the previous iterate 4555C = 3 the convergence is too slow 4556 IF (IFLAG_IR.GT.0) THEN 4557C If the convergence criterion is satisfied 4558C or the convergence too slow 4559C we set KASE=0 (end of the Iterative refinement) 4560 KASE = 0 4561C If the convergence is not improved, 4562C we go back to the previous iterate. 4563C IFLAG_IR can be equal to 2 only if IRStep >= 2 4564 IF (IFLAG_IR.EQ.2) NOITER = NOITER - 1 4565 ELSE 4566C IFLAG_IR=0, try to improve the solution 4567 KASE = 2 4568 ENDIF 4569 ELSEIF (ICNTL10.LT.0) THEN 4570C ------------------- 4571 KASE = 2 4572 ELSE 4573C ICNTL10 = 0, we want to perform only EA and not IR. 4574C ----------------- 4575 KASE = 0 4576 END IF 4577 ENDIF 4578C End Master 4579 ENDIF 4580C -------------- 4581C Broadcast KASE 4582C -------------- 4583 CALL MPI_BCAST( KASE, 1, MPI_INTEGER, MASTER, 4584 & id%COMM, IERR ) 4585C If Kase= 0 we quit the IR process 4586 IF (KASE.LE.0) GOTO 666 4587 IF (KASE.LT.0) THEN 4588 WRITE(*,*) "Internal error 17 in ZMUMPS_SOL_DRIVER" 4589 ENDIF 4590C ========================================================= 4591C COMPUTE the solution of Ay = r 4592C ========================================================= 4593C Call internal routine to avoid code duplication 4594 CALL ZMUMPS_PP_SOLVE() 4595 IF (INFO(1) .LT. 0) GOTO 90 4596C ----------------------- 4597C Go back to beginning of 4598C loop to apply next step 4599C of iterative refinement 4600C ----------------------- 4601 22 CONTINUE 4602 666 CONTINUE 4603C ************************************************ 4604C 4605C End of the iterative refinement procedure 4606C 4607C ************************************************ 4608 CALL MUMPS_SECFIN(TIMEIT) 4609 IF ( id%MYID .EQ. MASTER ) THEN 4610 IF ( NITREF .GT. 0 ) THEN 4611 id%INFOG(15) = NOITER 4612 END IF 4613C id%DKEEP(114) time for the iterative refinement 4614C id%DKEEP(120) time for the error analysis 4615C id%DKEEP(121) time for condition number 4616C these values are meaningful only on the host. 4617 IF (ICNTL10.EQ.0) THEN 4618C No IR has been requested. All the time is needed 4619C for computing EA 4620 id%DKEEP(120)=TIMEIT 4621 ELSE 4622C IR has been requested 4623 id%DKEEP(114)=TIMEIT - id%DKEEP(120) 4624 ENDIF 4625 END IF 4626 IF ( PROKG ) THEN 4627 IF (ICNTL10.GT.0) THEN 4628 WRITE( MPG, 81 ) 4629 WRITE( MPG, * ) 4630 WRITE( MPG, 141 ) 4631 & 'NUMBER OF STEPS OF ITERATIVE REFINEMENTS PERFORMED =', 4632 & NOITER 4633 ENDIF 4634 ENDIF 4635C 4636C ================================================== 4637C BEGIN 4638C Perform error analysis after iterative refinement 4639C ================================================== 4640 IF ((ICNTL11 .GT. 0).AND.(ICNTL10.GT.0)) THEN 4641C If IR is requested with test of convergence, 4642C the EA of the last step of IR is done here, 4643C otherwise EA of the last step is done at the 4644C end of IR 4645 CALL MUMPS_SECDEB(TIMEEA) 4646 KASE = 0 4647 IF (id%MYID .eq. MASTER ) THEN 4648C Test if IFLAG_IR = 2, that is if the the IR was diverging, 4649C we went back to the previous iterate 4650C We have to do EA on the last computed solution. 4651 IF (IFLAG_IR.EQ.2) KASE = 2 4652 ENDIF 4653C -------------- 4654C Broadcast KASE 4655C -------------- 4656 CALL MPI_BCAST( KASE, 1, MPI_INTEGER, MASTER, 4657 & id%COMM, IERR ) 4658 IF (KASE.EQ.2) THEN 4659C We went back to the previous iterate 4660C We have to do EA on the last computed solution. 4661C Compute the residual in C_Y using IRN, JCN, ASPK 4662C and the solution RHS(IBEG) 4663C The norm of the ith row in R_Y(I). 4664 IF ( KEEP(54) .eq. 0 ) THEN 4665C --------------------- 4666C Matrix is centralized 4667C --------------------- 4668 IF (id%MYID .EQ. MASTER) THEN 4669 IF (KEEP(55).EQ.0) THEN 4670 CALL ZMUMPS_QD2( MTYPE, id%N, id%KEEP8(28), id%A(1), 4671 & id%IRN(1), id%JCN(1), 4672 & id%RHS(IBEG), SAVERHS, R_Y, C_Y, KEEP(1),KEEP8(1)) 4673 ELSE 4674 CALL ZMUMPS_ELTQD2( MTYPE, id%N, 4675 & id%NELT, id%ELTPTR(1), 4676 & id%LELTVAR, id%ELTVAR(1), 4677 & id%KEEP8(30), id%A_ELT(1), 4678 & id%RHS(IBEG), SAVERHS, R_Y, C_Y, KEEP(1),KEEP8(1)) 4679 ENDIF 4680 ENDIF 4681 ELSE 4682C --------------------- 4683C Matrix is distributed 4684C --------------------- 4685 CALL MPI_BCAST( RHS_IR(IBEG), id%N, 4686 & MPI_DOUBLE_COMPLEX, MASTER, 4687 & id%COMM, IERR ) 4688C ---------------- 4689C Compute residual 4690C ---------------- 4691 IF ( I_AM_SLAVE .and. 4692 & id%KEEP8(29) .NE. 0_8 ) THEN 4693 CALL ZMUMPS_LOC_MV8( id%N, id%KEEP8(29), 4694 & id%IRN_loc(1), id%JCN_loc(1), id%A_loc(1), 4695 & RHS_IR(IBEG), C_LOCWK54, KEEP(50), MTYPE ) 4696 ELSE 4697 C_LOCWK54 = ZERO 4698 END IF 4699 IF ( id%MYID .eq. MASTER ) THEN 4700 CALL MPI_REDUCE( C_LOCWK54, C_Y, 4701 & id%N, MPI_DOUBLE_COMPLEX, 4702 & MPI_SUM,MASTER,id%COMM, IERR) 4703 C_Y = SAVERHS - C_Y 4704 ELSE 4705 CALL MPI_REDUCE( C_LOCWK54, C_DUMMY, 4706 & id%N, MPI_DOUBLE_COMPLEX, 4707 & MPI_SUM,MASTER,id%COMM, IERR) 4708 END IF 4709 ENDIF 4710 ENDIF ! KASE.EQ.2 4711 IF (id%MYID .EQ. MASTER) THEN 4712C Compute which equations are associated to w1 and which 4713C ones are associated to w2 in case of IFLAG_IR=2. 4714C If IFLAG_IR = 0 or 1 IW1 should be correct 4715 IF (IFLAG_IR.EQ.2) THEN 4716 TESTConv = .FALSE. 4717 CALL ZMUMPS_SOL_OMEGA(id%N,SAVERHS, 4718 & id%RHS(IBEG), C_Y, R_W, C_W, IW1, IFLAG_IR, 4719 & RINFOG(7), 0, TESTConv, 4720 & MP, ARRET ) 4721 ENDIF ! (IFLAG_IR.EQ.2) 4722c Compute some statistics for 4723 GIVSOL = .TRUE. 4724 CALL ZMUMPS_SOL_Q(MTYPE,INFO(1),id%N, 4725 & id%RHS(IBEG), 4726 & SAVERHS,R_W(id%N+1),C_Y,GIVSOL, 4727 & RINFOG(4),RINFOG(5),RINFOG(6),MPG,ICNTL(1), 4728 & KEEP(1),KEEP8(1)) 4729 ENDIF ! Master 4730 CALL MUMPS_SECFIN(TIMEEA) 4731 id%DKEEP(120)=id%DKEEP(120)+TIMEEA 4732 ENDIF ! ICNTL11>0 and ICNTL10>0 4733C ========================================================= 4734C Compute the Condition number associated if requested. 4735C ========================================================= 4736 CALL MUMPS_SECDEB(TIMELCOND) 4737 IF (ICNTL11 .EQ. 1) THEN 4738 IF ( id%MYID .eq. MASTER ) THEN 4739C Notice that D is always the identity 4740 ALLOCATE( D(id%N),stat =allocok ) 4741 IF ( allocok .GT. 0 ) THEN 4742 INFO(1)=-13 4743 INFO(2)=id%N 4744 GOTO 777 4745 ENDIF 4746 NB_BYTES = NB_BYTES + int(id%N,8)*K16_8 4747 DO I = 1, id%N 4748 D( I ) = RONE 4749 END DO 4750 ENDIF 4751 KASE = 0 4752 222 CONTINUE 4753 IF ( id%MYID .EQ. MASTER ) THEN 4754 CALL ZMUMPS_SOL_LCOND(id%N, SAVERHS, 4755 & id%RHS(IBEG), C_Y, D, R_W, C_W, IW1, KASE, 4756 & RINFOG(7), RINFOG(9), RINFOG(10), 4757 & MP, KEEP(1),KEEP8(1)) 4758 ENDIF 4759C -------------- 4760C Broadcast KASE 4761C -------------- 4762 CALL MPI_BCAST( KASE, 1, MPI_INTEGER, MASTER, 4763 & id%COMM, IERR ) 4764C KASE <= 0 4765C We reach the end of iterative method to compute 4766C LCOND1 and LCOND2 4767 IF (KASE.LE.0) GOTO 224 4768 CALL ZMUMPS_PP_SOLVE() 4769 IF (INFO(1) .LT. 0) GOTO 90 4770C --------------------------- 4771C Go back to beginning of 4772C loop to apply next step 4773C of iterative method 4774C ----------------------- 4775 GO TO 222 4776C End ICNTL11 = 1 4777 ENDIF 4778 224 CONTINUE 4779 CALL MUMPS_SECFIN(TIMELCOND) 4780 id%DKEEP(121)=id%DKEEP(121)+TIMELCOND 4781 IF ((id%MYID .EQ. MASTER).AND.(ICNTL11.GT.0)) THEN 4782 IF (ICNTL10.GT.0) THEN 4783C If ICNTL10<0 these stats have been printed before IR 4784 IF ( MPG .GT. 0 ) THEN 4785 WRITE( MPG, 115 ) 4786 & 'RINFOG(7):COMPONENTWISE SCALED RESIDUAL(W1)=', 4787 & RINFOG(7) 4788 WRITE( MPG, 115 ) 4789 & '------(8):---------------------------- (W2)=', 4790 & RINFOG(8) 4791 ENDIF 4792 END IF 4793 IF (ICNTL11.EQ.1) THEN 4794C If ICNTL11/=1 these stats haven't been computed 4795 IF (MPG.GT.0) THEN 4796 WRITE( MPG, 115 ) 4797 & '------(9):Upper bound ERROR ...............=', 4798 & RINFOG(9) 4799 WRITE( MPG, 115 ) 4800 & '-----(10):CONDITION NUMBER (1) ............=', 4801 & RINFOG(10) 4802 WRITE( MPG, 115 ) 4803 & '-----(11):CONDITION NUMBER (2) ............=', 4804 & RINFOG(11) 4805 END IF 4806 END IF 4807 END IF ! MASTER && ICNTL11.GT.0 4808 IF ( PROKG .AND. abs(ICNTL10) .GT.0 ) WRITE( MPG, 131 ) 4809C=================================================== 4810C Perform error analysis after iterative refinements 4811C END 4812C=================================================== 4813C 4814 IF (id%MYID == MASTER) THEN 4815 NB_BYTES = NB_BYTES - int(size(C_W),8)*K35_8 4816 DEALLOCATE(C_W) 4817 NB_BYTES = NB_BYTES - int(size(R_W),8)*K16_8 4818 & - int(size(IW1),8)*K34_8 4819 DEALLOCATE(R_W) 4820 DEALLOCATE(IW1) 4821 IF (ICNTL11 .EQ. 1) THEN 4822C We have used D only for LCOND1,2 4823 NB_BYTES = NB_BYTES - int(size(D ),8)*K16_8 4824 DEALLOCATE(D) 4825 ENDIF 4826 ENDIF 4827 NB_BYTES = NB_BYTES - 4828 & (int(size(R_Y),8)+int(size(R_LOCWK54),8))*K16_8 4829 NB_BYTES = NB_BYTES - 4830 & (int(size(C_Y),8)+int(size(C_LOCWK54),8))*K35_8 4831 DEALLOCATE(R_Y) 4832 DEALLOCATE(C_Y) 4833 DEALLOCATE(R_LOCWK54) 4834 DEALLOCATE(C_LOCWK54) 4835C End POSTPros 4836 END IF 4837C============================================ 4838C 4839C ITERATIVE REFINEMENT AND/OR ERROR ANALYSIS 4840C 4841C END 4842C 4843C============================================ 4844C ========================== 4845C Begin reordering on master 4846C corresponding to maximum transversal permutation 4847C in case of centralized solution 4848C (ICNTL21==0) 4849C 4850 IF ( id%MYID .EQ. MASTER .AND. ICNTL21==0 4851 & .AND. KEEP(23) .NE. 0.AND.KEEP(237).EQ.0) THEN 4852C ((No transpose and backward performed and NO A-1) 4853C or null space computation): permutation 4854C must be done on solution. 4855 IF ((KEEP(221).NE.1 .AND. MTYPE .EQ. 1) 4856 & .OR. KEEP(111) .NE.0 .OR. KEEP(252).NE.0 ) THEN 4857C Permute the solution RHS according to the column 4858C permutation held in UNS_PERM 4859C Column J of the permuted matrix corresponds to 4860C column UNS_PERM(J) of the original matrix. 4861C RHS holds the permuted solution 4862C Note that id%N>1 since KEEP(23)=0 when id%N=1 4863C 4864 ALLOCATE( C_RW1( id%N ),stat =allocok ) 4865! temporary not in NB_BYTES 4866 IF ( allocok .GT. 0 ) THEN 4867 INFO(1)=-13 4868 INFO(2)=id%N 4869 WRITE(*,*) 'could not allocate ', id%N, 'integers.' 4870 CALL MUMPS_ABORT() 4871 END IF 4872 DO K = 1, NBRHS_EFF 4873 IF (KEEP(242).EQ.0) THEN 4874 KDEC = (K-1)*LD_RHS+IBEG-1 4875 ELSE 4876C ------------------------------- 4877C Columns just computed might not 4878C be contiguous in original RHS 4879C ------------------------------- 4880 KDEC = int(PERM_RHS(K-1+JBEG_RHS)-1,8)*int(LD_RHS,8) 4881 ENDIF 4882 DO I = 1, id%N 4883 C_RW1(I) = id%RHS(KDEC+I) 4884 ENDDO 4885 DO I = 1, id%N 4886 JPERM = id%UNS_PERM(I) 4887 id%RHS( KDEC+JPERM ) = C_RW1( I ) 4888 ENDDO 4889 ENDDO 4890 DEALLOCATE( C_RW1 ) !temporary not in NB_BYTES 4891 END IF 4892 END IF 4893C 4894C End reordering on master 4895C ======================== 4896 IF (id%MYID.EQ.MASTER .and.ICNTL21==0.and.KEEP(221).NE.1.AND. 4897 & (KEEP(237).EQ.0) ) THEN 4898* print out the solution 4899 IF ( INFO(1) .GE. 0 .AND. ICNTL(4).GE.3 .AND. ICNTL(3).GT.0) 4900 & THEN 4901 K = min0(10, id%N) 4902 IF (ICNTL(4) .eq. 4 ) K = id%N 4903 J = min0(10,NBRHS_EFF) 4904 IF (ICNTL(4) .eq. 4 ) J = NBRHS_EFF 4905 DO II=1, J 4906 WRITE(ICNTL(3),110) BEG_RHS+II-1 4907 WRITE(ICNTL(3),160) 4908 & (id%RHS(IBEG+(II-1)*LD_RHS+I-1),I=1,K) 4909 ENDDO 4910 END IF 4911 END IF 4912#if defined(RHSCOMP_BYROWS) 4913 1000 CONTINUE 4914#endif 4915C ========================== 4916C blocking for multiple RHS (END OF DO WHILE (BEG_RHS.LE.NBRHS) 4917 IF ((KEEP(248).EQ.1).AND.(KEEP(237).EQ.0)) THEN 4918 ! case of general sparse: in case of empty columns 4919 ! NBRHS_EFF might has been updated and broadcasted 4920 ! and holds the effective size of a contiguous block of 4921 ! non empty columns 4922 BEG_RHS = BEG_RHS + NBRHS_EFF ! nb of nonempty columns 4923 ELSE 4924 BEG_RHS = BEG_RHS + NBRHS 4925 ENDIF 4926 ENDDO 4927C DO WHILE (BEG_RHS.LE.id%NRHS) 4928C ========================== 4929C 4930C ======================================================== 4931C Reset RHS to zero for all remaining columns that 4932C have not been processed because they were emtpy 4933C ======================================================== 4934 IF ( (id%MYID.EQ.MASTER) 4935 & .AND. ( KEEP(248).NE.0 ) ! sparse RHS on input 4936 & .AND. ( KEEP(237).EQ.0 ) ! No A-1 4937 & .AND. ( ICNTL21.EQ.0 ) ! Centralized solution 4938 & .AND. ( KEEP(221) .NE.1 ) ! Not Reduced RHS step of Schur 4939 & .AND. ( JEND_RHS .LT. id%NRHS ) 4940 & ) 4941 & THEN 4942 JBEG_NEW = JEND_RHS + 1 4943 IF (DO_PERMUTE_RHS.OR.INTERLEAVE_PAR) THEN 4944 DO WHILE ( JBEG_NEW.LE. id%NRHS) 4945 DO I=1, id%N 4946 id%RHS((PERM_RHS(JBEG_NEW) -1)*LD_RHS+I) 4947 & = ZERO 4948 ENDDO 4949 JBEG_NEW = JBEG_NEW +1 4950 CYCLE 4951 ENDDO 4952 ELSE 4953 DO WHILE ( JBEG_NEW.LE. id%NRHS) 4954 DO I=1, id%N 4955 id%RHS((JBEG_NEW -1)*LD_RHS + I) = ZERO 4956 ENDDO 4957 JBEG_NEW = JBEG_NEW +1 4958 ENDDO 4959 ENDIF ! End DO_PERMUTE_RHS.OR.INTERLEAVE_PAR 4960 ENDIF 4961C ======================================================== 4962C Reset id%SOL_loc to zero for all remaining columns that 4963C have not been processed because they were emtpy 4964C ======================================================== 4965 IF ( I_AM_SLAVE .AND. (ICNTL21.NE.0) .AND. 4966 & ( JEND_RHS .LT. id%NRHS ) .AND. KEEP(221).NE.1 ) THEN 4967 JBEG_NEW = JEND_RHS + 1 4968 IF (DO_PERMUTE_RHS.OR.INTERLEAVE_PAR) THEN 4969 DO WHILE ( JBEG_NEW.LE. id%NRHS) 4970 DO I=1, KEEP(89) 4971 id%SOL_loc((PERM_RHS(JBEG_NEW) -1)*id%LSOL_LOC+I) 4972 & = ZERO 4973 ENDDO 4974 JBEG_NEW = JBEG_NEW +1 4975 ENDDO 4976 ELSE 4977C 4978 DO WHILE ( JBEG_NEW.LE. id%NRHS) 4979 DO I=1, KEEP(89) 4980 id%SOL_loc((JBEG_NEW -1)*id%LSOL_loc + I) = ZERO 4981 ENDDO 4982 JBEG_NEW = JBEG_NEW +1 4983 ENDDO 4984 ENDIF 4985 ENDIF 4986C 4987C ================================================================ 4988C Reset id%RHSCOMP and id%REDRHS to zero for all remaining columns 4989C that have not been processed because they were emtpy 4990C ================================================================ 4991 IF ((KEEP(221).EQ.1) .AND. 4992 & ( JEND_RHS .LT. id%NRHS ) ) THEN 4993 IF (id%MYID .EQ. MASTER) THEN 4994 JBEG_NEW = JEND_RHS + 1 4995 DO WHILE ( JBEG_NEW.LE. id%NRHS) 4996 DO I=1, id%SIZE_SCHUR 4997 id%REDRHS((JBEG_NEW -1)*LD_REDRHS + I) = ZERO 4998 ENDDO 4999 JBEG_NEW = JBEG_NEW +1 5000 ENDDO 5001 ENDIF 5002 IF (I_AM_SLAVE) THEN 5003#if defined(RHSCOMP_BYROWS) 5004 DO I=1,NBENT_RHSCOMP 5005 JBEG_NEW = JEND_RHS + 1 5006 DO WHILE ( JBEG_NEW.LE. id%NRHS) 5007 id%RHSCOMP(JBEG_NEW + (I-1)*NBRHS_EFF) = ZERO 5008 JBEG_NEW = JBEG_NEW +1 5009 ENDDO 5010 ENDDO 5011#else 5012 JBEG_NEW = JEND_RHS + 1 5013 DO WHILE ( JBEG_NEW.LE. id%NRHS) 5014 DO I=1,NBENT_RHSCOMP 5015 id%RHSCOMP((JBEG_NEW -1)*LD_RHSCOMP + I) = ZERO 5016 ENDDO 5017 JBEG_NEW = JBEG_NEW +1 5018 ENDDO 5019#endif 5020 ENDIF 5021 ENDIF 5022C 5023C 5024C ! maximum size used on that proc 5025 id%INFO(26) = int(NB_BYTES_MAX / 1000000_8) 5026C Centralize memory statistics on the host 5027C 5028C INFOG(30) = size of mem in bytes for solve 5029C for the processor using largest memory 5030C INFOG(31) = size of mem in bytes for solve 5031C sum over all processors 5032C ---------------------------------------------------- 5033 CALL MUMPS_MEM_CENTRALIZE( id%MYID, id%COMM, 5034 & id%INFO(26), id%INFOG(30), IRANK ) 5035 IF ( PROKG ) THEN 5036 WRITE( MPG,'(A,I10) ') 5037 & ' ** Rank of processor needing largest memory in solve :', 5038 & IRANK 5039 WRITE( MPG,'(A,I10) ') 5040 & ' ** Space in MBYTES used by this processor for solve :', 5041 & id%INFOG(30) 5042 IF ( KEEP(46) .eq. 0 ) THEN 5043 WRITE( MPG,'(A,I10) ') 5044 & ' ** Avg. Space in MBYTES per working proc during solve :', 5045 & ( id%INFOG(31)-id%INFO(26) ) / id%NSLAVES 5046 ELSE 5047 WRITE( MPG,'(A,I10) ') 5048 & ' ** Avg. Space in MBYTES per working proc during solve :', 5049 & id%INFOG(31) / id%NSLAVES 5050 END IF 5051 END IF 5052*=============================== 5053*End of Solve Phase 5054*=============================== 5055C Store and print timings 5056 CALL MUMPS_SECFIN(TIME3) 5057 id%DKEEP(112)=TIME3 5058 id%DKEEP(113)=TIMEC2 5059 id%DKEEP(115)=TIMESCATTER2 5060 id%DKEEP(116)=TIMEGATHER2 5061 id%DKEEP(122)=TIMECOPYSCALE2 5062 IF (PROKG) THEN 5063 WRITE ( MPG, *) 5064 WRITE ( MPG, *) "Global statistics" 5065 WRITE( MPG, 434 ) id%DKEEP(115) 5066 WRITE( MPG, 432 ) id%DKEEP(113) 5067 WRITE( MPG, 435 ) id%DKEEP(117) 5068 IF ((KEEP(38).NE.0).OR.(KEEP(20).NE.0)) 5069 & WRITE( MPG, 437 ) id%DKEEP(119) 5070 WRITE( MPG, 436 ) id%DKEEP(118) 5071 WRITE( MPG, 433 ) id%DKEEP(116) ! non-zero if gather 5072 WRITE( MPG, 431 ) id%DKEEP(122) ! Distributed solution 5073 ENDIF 5074 IF ( PROK ) THEN 5075 WRITE ( MP, *) 5076 WRITE ( MP, *) "Local statistics" 5077 WRITE( MP, 434 ) id%DKEEP(115) 5078 WRITE( MP, 432 ) id%DKEEP(113) 5079 WRITE( MP, 435 ) id%DKEEP(117) 5080 IF ((KEEP(38).NE.0).OR.(KEEP(20).NE.0)) 5081 & WRITE( MP, 437 ) id%DKEEP(119) 5082 WRITE( MP, 436 ) id%DKEEP(118) 5083 WRITE( MP, 433 ) id%DKEEP(116) 5084 WRITE( MP, 431 ) id%DKEEP(122) 5085 END IF 5086 90 CONTINUE 5087 IF (INFO(1) .LT.0 ) THEN 5088 ENDIF 5089 IF (KEEP(201).GT.0)THEN 5090 IF (IS_INIT_OOC_DONE) THEN 5091 CALL ZMUMPS_OOC_END_SOLVE(IERR) 5092 IF (IERR.LT.0 .AND. INFO(1) .GE. 0) INFO(1) = IERR 5093 ENDIF 5094 CALL MUMPS_PROPINFO( ICNTL(1), INFO(1), 5095 & id%COMM,id%MYID) 5096 ENDIF 5097C ------------------------ 5098C Check allocation before 5099C to deallocate (cases of 5100C errors that could happen 5101C before or after allocate 5102C statement) 5103C 5104C Sparse RHS 5105C Free space and reset pointers if needed 5106 IF (IRHS_SPARSE_COPY_ALLOCATED) THEN 5107 NB_BYTES = NB_BYTES - 5108 & int(size(IRHS_SPARSE_COPY),8)*K34_8 5109 DEALLOCATE(IRHS_SPARSE_COPY) 5110 IRHS_SPARSE_COPY_ALLOCATED=.FALSE. 5111 NULLIFY(IRHS_SPARSE_COPY) 5112 ENDIF 5113 IF (IRHS_PTR_COPY_ALLOCATED) THEN 5114 NB_BYTES = NB_BYTES - 5115 & int(size(IRHS_PTR_COPY),8)*K34_8 5116 DEALLOCATE(IRHS_PTR_COPY) 5117 IRHS_PTR_COPY_ALLOCATED=.FALSE. 5118 NULLIFY(IRHS_PTR_COPY) 5119 ENDIF 5120 IF (RHS_SPARSE_COPY_ALLOCATED) THEN 5121 NB_BYTES = NB_BYTES - 5122 & int(size(RHS_SPARSE_COPY),8)*K35_8 5123 DEALLOCATE(RHS_SPARSE_COPY) 5124 RHS_SPARSE_COPY_ALLOCATED=.FALSE. 5125 NULLIFY(RHS_SPARSE_COPY) 5126 ENDIF 5127 IF (allocated(PERM_RHS)) THEN 5128 NB_BYTES = NB_BYTES - int(size(PERM_RHS),8)*K34_8 5129 DEALLOCATE(PERM_RHS) 5130 ENDIF 5131C END A-1 5132 IF (allocated(UNS_PERM_INV)) THEN 5133 NB_BYTES = NB_BYTES - int(size(UNS_PERM_INV),8)*K34_8 5134 DEALLOCATE(UNS_PERM_INV) 5135 ENDIF 5136 IF (associated(id%BUFR)) THEN 5137 NB_BYTES = NB_BYTES - int(size(id%BUFR),8)*K34_8 5138 DEALLOCATE(id%BUFR) 5139 NULLIFY(id%BUFR) 5140 ENDIF 5141 IF ( I_AM_SLAVE ) THEN 5142 IF (allocated(RHS_BOUNDS)) THEN 5143 NB_BYTES = NB_BYTES - 5144 & int(size(RHS_BOUNDS),8)*K34_8 5145 DEALLOCATE(RHS_BOUNDS) 5146 ENDIF 5147 IF (allocated(IWK_SOLVE)) THEN 5148 NB_BYTES = NB_BYTES - int(size(IWK_SOLVE),8)*K34_8 5149 DEALLOCATE( IWK_SOLVE ) 5150 ENDIF 5151 IF (allocated(PTRACB)) THEN 5152 NB_BYTES = NB_BYTES - int(size(PTRACB),8)*K34_8* 5153 & int(KEEP(10),8) 5154 DEALLOCATE( PTRACB ) 5155 ENDIF 5156 IF (allocated(IWCB)) THEN 5157 NB_BYTES = NB_BYTES - int(size(IWCB),8)*K34_8 5158 DEALLOCATE( IWCB ) 5159 ENDIF 5160C ------------------------ 5161C SLAVE CODE 5162C ----------------------- 5163C Deallocate send buffers 5164C ----------------------- 5165 IF (id%NSLAVES .GT. 1) THEN 5166 CALL ZMUMPS_BUF_DEALL_CB( IERR ) 5167 CALL ZMUMPS_BUF_DEALL_SMALL_BUF( IERR ) 5168 ENDIF 5169 END IF 5170C 5171 IF ( id%MYID .eq. MASTER ) THEN 5172C ------------------------ 5173C SAVERHS may have been 5174C allocated only on master 5175C ------------------------ 5176 IF (allocated(SAVERHS)) THEN 5177 NB_BYTES = NB_BYTES - int(size(SAVERHS),8)*K35_8 5178 DEALLOCATE( SAVERHS) 5179 ENDIF 5180C Nullify RHS_IR might have been pointing to id%RHS 5181 NULLIFY(RHS_IR) 5182 ELSE 5183C -------------------- 5184C Free right-hand-side 5185C on slave processors 5186C -------------------- 5187 IF (associated(RHS_IR)) THEN 5188 NB_BYTES = NB_BYTES - int(size(RHS_IR),8)*K35_8 5189 DEALLOCATE(RHS_IR) 5190 NULLIFY(RHS_IR) 5191 END IF 5192 END IF 5193 IF (I_AM_SLAVE) THEN 5194C Deallocate temporary workspace SRW3 5195 IF (allocated(SRW3)) THEN 5196 NB_BYTES = NB_BYTES - int(size(SRW3),8)*K35_8 5197 DEALLOCATE(SRW3) 5198 ENDIF 5199 IF (LSCAL .AND. ICNTL21==1) THEN 5200C Free local scaling arrays 5201 NB_BYTES = NB_BYTES - 5202 & int(size(scaling_data%SCALING_LOC),8)*K16_8 5203 DEALLOCATE(scaling_data%SCALING_LOC) 5204 NULLIFY(scaling_data%SCALING_LOC) 5205 ENDIF 5206C Free memory until next call to ZMUMPS 5207 IF (WK_USER_PROVIDED) THEN 5208C S points to WK_USER provided by user 5209C KEEP8(24) holds size of WK_USER 5210C it should be saved and is used 5211C in incore to check that size provided is consistent 5212C (see error -41) 5213 NULLIFY(id%S) 5214 ELSE IF (associated(id%S).AND.KEEP(201).GT.0) THEN 5215C OOC: free space for S that was allocated 5216 NB_BYTES = NB_BYTES - KEEP8(23)*K35_8 5217 id%KEEP8(23)=0_8 5218 DEALLOCATE(id%S) 5219 NULLIFY(id%S) 5220 ENDIF 5221 IF (KEEP(221).NE.1) THEN 5222C -- After reduction of RHS to Schur variables 5223C -- keep compressed RHS generated during FWD step 5224C -- to be used for future expansion 5225 IF (associated(id%RHSCOMP)) THEN 5226 NB_BYTES = NB_BYTES - id%KEEP8(25)*K35_8 5227 DEALLOCATE(id%RHSCOMP) 5228 NULLIFY(id%RHSCOMP) 5229 id%KEEP8(25)=0_8 5230 ENDIF 5231 IF (associated(id%POSINRHSCOMP_ROW)) THEN 5232 NB_BYTES = NB_BYTES - 5233 & int(size(id%POSINRHSCOMP_ROW),8)*K34_8 5234 DEALLOCATE(id%POSINRHSCOMP_ROW) 5235 NULLIFY(id%POSINRHSCOMP_ROW) 5236 ENDIF 5237 IF (id%POSINRHSCOMP_COL_ALLOC) THEN 5238 NB_BYTES = NB_BYTES - 5239 & int(size(id%POSINRHSCOMP_COL),8)*K34_8 5240 DEALLOCATE(id%POSINRHSCOMP_COL) 5241 NULLIFY(id%POSINRHSCOMP_COL) 5242 id%POSINRHSCOMP_COL_ALLOC = .FALSE. 5243 ENDIF 5244 ENDIF 5245 IF ( WORK_WCB_ALLOCATED ) THEN 5246 NB_BYTES = NB_BYTES - int(size(WORK_WCB),8)*K35_8 5247 DEALLOCATE( WORK_WCB ) 5248 ENDIF 5249C Otherwise, WORK_WCB may point to some 5250C position inside id%S, nullify it 5251 NULLIFY( WORK_WCB ) 5252 ENDIF 5253 RETURN 5254 55 FORMAT (//' ERROR ANALYSIS BEFORE ITERATIVE REFINEMENT') 5255 100 FORMAT(//' ****** SOLVE & CHECK STEP ********'/) 5256 110 FORMAT (//' VECTOR SOLUTION FOR COLUMN ',I12) 5257 115 FORMAT(1X, A44,1P,D9.2) 5258 434 FORMAT(' TIME to build/scatter RHS =',F15.6) 5259 432 FORMAT(' TIME in solution step (fwd/bwd) =',F15.6) 5260 435 FORMAT(' .. TIME in forward (fwd) step = ',F15.6) 5261 437 FORMAT(' .. TIME in ScaLAPACK root = ',F15.6) 5262 436 FORMAT(' .. TIME in backward (bwd) step = ',F15.6) 5263 433 FORMAT(' TIME to gather solution(cent.sol)=',F15.6) 5264 431 FORMAT(' TIME to copy/scale RHS (dist.sol)=',F15.6) 5265 150 FORMAT(/' STATISTICS PRIOR SOLVE PHASE ...........'/ 5266 & ' NUMBER OF RIGHT-HAND-SIDES =',I12/ 5267 & ' BLOCKING FACTOR FOR MULTIPLE RHS =',I12/ 5268 & ' ICNTL (9) =',I12/ 5269 & ' --- (10) =',I12/ 5270 & ' --- (11) =',I12/ 5271 & ' --- (20) =',I12/ 5272 & ' --- (21) =',I12/ 5273 & ' --- (30) =',I12) 5274 151 FORMAT (' --- (25) =',I12) 5275 152 FORMAT (' --- (26) =',I12) 5276 153 FORMAT (' --- (32) =',I12) 5277 160 FORMAT (' RHS'/(1X,1P,5D14.6)) 5278 170 FORMAT (//' ERROR ANALYSIS' ) 5279 240 FORMAT (1X, A42,I4) 5280 270 FORMAT (//' BEGIN ITERATIVE REFINEMENT' ) 5281 81 FORMAT (/' STATISTICS AFTER ITERATIVE REFINEMENT ') 5282 131 FORMAT (/' END ITERATIVE REFINEMENT ') 5283 141 FORMAT(1X, A52,I4) 5284 CONTAINS 5285 SUBROUTINE ZMUMPS_PP_SOLVE() 5286 IMPLICIT NONE 5287C 5288C Purpose: 5289C ======= 5290C Scatter right-hand side, solve the system, 5291C and gather the solution on the host during 5292C post-processing. 5293C We use an internal subroutine to avoid code 5294C duplication without the complication of adding 5295C new parameters or local variables. All variables 5296C in this routine have the scope of ZMUMPS_SOL_DRIVER. 5297C 5298C 5299 IF (KASE .NE. 1 .AND. KASE .NE. 2) THEN 5300 WRITE(*,*) "Internal error 1 in ZMUMPS_PP_SOLVE" 5301 CALL MUMPS_ABORT() 5302 ENDIF 5303 IF ( id%MYID .eq. MASTER ) THEN 5304C Define matrix B as follows: 5305C MTYPE=1 => B=A other values B=At 5306C The user asked to solve the system Bx=b 5307C 5308C THEN 5309C KASE = 1........ RW1 = INV(TRANSPOSE(B)) * RW1 5310C KASE = 2........ RW1 = INV(B) * RW1 5311 IF ( MTYPE .EQ. 1 ) THEN 5312 SOLVET = KASE - 1 5313 ELSE 5314 SOLVET = KASE 5315 END IF 5316C SOLVET= 1 -> solve A x = B, other values solve Atx=b 5317C We force SOLVET to have value either 0 or 1, in order 5318C to be able to test both values, and also, be able to 5319C test whether SOLVET = MTYPE or not. 5320 IF ( SOLVET.EQ.2 ) SOLVET = 0 5321 IF ( LSCAL ) THEN 5322 IF ( SOLVET .EQ. 1 ) THEN 5323C Apply rowscaling 5324 DO K = 1, id%N 5325 C_Y( K ) = C_Y( K ) * id%ROWSCA( K ) 5326 END DO 5327 ELSE 5328C Apply column scaling 5329 DO K = 1, id%N 5330 C_Y( K ) = C_Y( K ) * id%COLSCA( K ) 5331 END DO 5332 END IF 5333 END IF 5334 END IF ! MYID.EQ.MASTER 5335C ------------------------------ 5336C Broadcast SOLVET to the slaves 5337C ------------------------------ 5338 CALL MPI_BCAST( SOLVET, 1, MPI_INTEGER, MASTER, 5339 & id%COMM, IERR) 5340C -------------------------------------------- 5341C Scatter the right hand side C_Y on all procs 5342C -------------------------------------------- 5343 IF ( .NOT.I_AM_SLAVE ) THEN 5344C -- Master not working 5345 CALL ZMUMPS_SCATTER_RHS(id%NSLAVES,id%N, id%MYID, 5346 & id%COMM, 5347 & SOLVET, C_Y(1), id%N, 1, 5348 & 1, 5349 & C_DUMMY, 1, 1, 5350 & IDUMMY, 0, 5351 & JDUMMY, id%KEEP(1), id%KEEP8(1), id%PROCNODE_STEPS(1), 5352 & IDUMMY, 1, 5353 & id%STEP(1), 5354 & id%ICNTL(1),id%INFO(1)) 5355 ELSE 5356 IF (SOLVET.EQ.MTYPE) THEN 5357C POSINRHSCOMP_ROW is with respect to the 5358C original linear system (transposed or not) 5359 PTR_POSINRHSCOMP_FWD => id%POSINRHSCOMP_ROW 5360 ELSE 5361C Transposed, use column indices of original 5362C system (ie, col indices of A or A^T) 5363 PTR_POSINRHSCOMP_FWD => id%POSINRHSCOMP_COL 5364 ENDIF 5365 LIW_PASSED = max( LIW, 1 ) 5366 CALL ZMUMPS_SCATTER_RHS(id%NSLAVES,id%N, id%MYID, 5367 & id%COMM, 5368 & SOLVET, C_Y(1), id%N, 1, 5369 & 1, 5370 & id%RHSCOMP(IBEG_RHSCOMP), LD_RHSCOMP, 1, 5371 & PTR_POSINRHSCOMP_FWD(1), NB_FS_IN_RHSCOMP_F, 5372C 5373 & id%PTLUST_S(1), id%KEEP(1), id%KEEP8(1), 5374 & id%PROCNODE_STEPS(1), 5375 & IS(1), LIW_PASSED, 5376 & id%STEP(1), 5377 & id%ICNTL(1),id%INFO(1)) 5378 ENDIF 5379 IF (INFO(1).LT.0) GOTO 89 5380C 5381C Solve the system 5382C 5383 IF ( I_AM_SLAVE ) THEN 5384 LIW_PASSED = max( LIW, 1 ) 5385 LA_PASSED = max( LA, 1_8 ) 5386 IF (SOLVET.EQ.MTYPE) THEN 5387 PTR_POSINRHSCOMP_FWD => id%POSINRHSCOMP_ROW 5388 PTR_POSINRHSCOMP_BWD => id%POSINRHSCOMP_COL 5389 ELSE 5390 PTR_POSINRHSCOMP_FWD => id%POSINRHSCOMP_COL 5391 PTR_POSINRHSCOMP_BWD => id%POSINRHSCOMP_ROW 5392 ENDIF 5393 FROM_PP=.TRUE. 5394 NBSPARSE_LOC = .FALSE. 5395 CALL ZMUMPS_SOL_C( id%root, id%N, id%S(1), LA_PASSED, 5396 & id%IS(1), LIW_PASSED, WORK_WCB(1), LWCB8_SOL_C, IWCB, LIWCB, 5397 & NBRHS_EFF, id%NA(1), id%LNA, id%NE_STEPS(1), SRW3, SOLVET, 5398 & ICNTL(1), FROM_PP, id%STEP(1), id%FRERE_STEPS(1), 5399 & id%DAD_STEPS(1), id%FILS(1), id%PTLUST_S(1), id%PTRFAC(1), 5400 & IWK_SOLVE(1), LIWK_SOLVE, PTRACB, LIWK_PTRACB, 5401 & id%PROCNODE_STEPS(1), id%NSLAVES, INFO(1), KEEP(1), KEEP8(1), 5402 & id%DKEEP(1), 5403 & id%COMM_NODES, id%MYID, id%MYID_NODES, id%BUFR(1), id%LBUFR, 5404 & id%LBUFR_BYTES, id%ISTEP_TO_INIV2(1), id%TAB_POS_IN_PERE(1,1), 5405C Next 3 arguments are not used in this call 5406 & IBEG_ROOT_DEF, IEND_ROOT_DEF, IROOT_DEF_RHS_COL1, 5407C Case of special root node 5408 & PTR_RHS_ROOT(1), LPTR_RHS_ROOT, SIZE_ROOT, MASTER_ROOT, 5409 & id%RHSCOMP(IBEG_RHSCOMP), LD_RHSCOMP, 5410 & PTR_POSINRHSCOMP_FWD(1), PTR_POSINRHSCOMP_BWD(1) 5411 & , 1 , 1 , 1 5412 & , 1 5413 & , IDUMMY, 1, JDUMMY, KDUMMY, 1, LDUMMY, 1, MDUMMY 5414 & , 1, 1 5415 & , NBSPARSE_LOC, PTR_RHS_BOUNDS(1), LPTR_RHS_BOUNDS 5416 & ) 5417 END IF 5418C ------------------ 5419C Change error codes 5420C ------------------ 5421 IF (INFO(1).eq.-2) INFO(1)=-12 5422 IF (INFO(1).eq.-3) INFO(1)=-15 5423C 5424 IF (INFO(1) .GE. 0) THEN 5425C We need a workspace of minimal size KEEP(247) 5426C in order to unpack pieces of the solution during 5427C ZMUMPS_GATHER_SOLUTION below 5428C - Avoid allocation if error already occurred. 5429C - DEALLOCATE called after GATHER_SOLUTION 5430C CWORK not needed for AM1 5431 ALLOCATE( CWORK(max(max(KEEP(247),KEEP(246)),1)), 5432 & stat=allocok) 5433 IF (allocok > 0) THEN 5434 INFO(1)=-13 5435 INFO(2)=max(max(KEEP(247),KEEP(246)),1) 5436 ENDIF 5437 ENDIF 5438C ------------------------- 5439C Propagate possible errors 5440C ------------------------- 5441 89 CALL MUMPS_PROPINFO( ICNTL(1), INFO(1), 5442 & id%COMM,id%MYID) 5443C 5444C Return in case of error. 5445 IF (INFO(1).LT.0) RETURN 5446C ------------------------------- 5447C Assemble the solution on master 5448C ------------------------------- 5449C (Note: currently, if this part of code is executed, 5450C then necessarily NBRHS_EFF = 1) 5451C 5452C === GATHER and SCALE solution ============== 5453C 5454 IF ((id%MYID.NE.MASTER).OR. .NOT.LSCAL) THEN 5455 PT_SCALING => Dummy_SCAL 5456 ELSE 5457 IF (SOLVET.EQ.1) THEN 5458 PT_SCALING => id%COLSCA 5459 ELSE 5460 PT_SCALING => id%ROWSCA 5461 ENDIF 5462 ENDIF 5463 LIW_PASSED = max( LIW, 1 ) 5464C Solution computed during ZMUMPS_SOL_C has been stored 5465C in id%RHSCOMP and is gathered on the master in C_Y 5466 IF ( .NOT. I_AM_SLAVE ) THEN 5467C I did not participate to computing part of the solution 5468C (id%RHSCOMP not set/allocate) : receive solution, store 5469C it and scale it. 5470 CALL ZMUMPS_GATHER_SOLUTION(id%NSLAVES,id%N, 5471 & id%MYID, id%COMM, NBRHS_EFF, 5472 & SOLVET, C_Y, id%N, NBRHS_EFF, 1, 5473 & JDUMMY, id%KEEP(1),id%KEEP8(1), id%PROCNODE_STEPS(1), 5474 & IDUMMY, 1, 5475 & id%STEP(1), id%BUFR(1), id%LBUFR, id%LBUFR_BYTES, 5476 & CWORK(1), size(CWORK), 5477 & LSCAL, PT_SCALING(1), size(PT_SCALING), 5478! RHSCOMP not on non-working master 5479 & C_DUMMY, 1 , 1, IDUMMY, 1, 5480! for sparse permuted RHS on host 5481 & PERM_RHS, size(PERM_RHS) 5482 & ) 5483 ELSE 5484 CALL ZMUMPS_GATHER_SOLUTION(id%NSLAVES,id%N, 5485 & id%MYID, id%COMM, NBRHS_EFF, 5486 & SOLVET, C_Y, id%N, NBRHS_EFF, 1, 5487 & id%PTLUST_S(1), id%KEEP(1),id%KEEP8(1), 5488 & id%PROCNODE_STEPS(1), 5489 & IS(1), LIW_PASSED, 5490 & id%STEP(1), id%BUFR(1), id%LBUFR, id%LBUFR_BYTES, 5491 & CWORK(1), size(CWORK), 5492 & LSCAL, PT_SCALING(1), size(PT_SCALING), 5493 & id%RHSCOMP(IBEG_RHSCOMP), LD_RHSCOMP, NBRHS_EFF, 5494 & PTR_POSINRHSCOMP_BWD(1), id%N, 5495 & PERM_RHS, size(PERM_RHS)) ! for sparse permuted RHS on host 5496 ENDIF 5497 DEALLOCATE( CWORK ) 5498 END SUBROUTINE ZMUMPS_PP_SOLVE 5499 END SUBROUTINE ZMUMPS_SOLVE_DRIVER 5500