1 PROGRAM PZDBDRIVER 2* 3* 4* -- ScaLAPACK routine (version 1.7) -- 5* University of Tennessee, Knoxville, Oak Ridge National Laboratory, 6* and University of California, Berkeley. 7* November 15, 1997 8* 9* Purpose 10* ======= 11* 12* PZDBDRIVER is a test program for the 13* ScaLAPACK Band Cholesky routines corresponding to the options 14* indicated by ZDB. This test driver performs an 15* A = L*U factorization 16* and solves a linear system with the factors for 1 or more RHS. 17* 18* The program must be driven by a short data file. 19* Here's an example file: 20*'ScaLAPACK, Version 1.2, banded linear systems input file' 21*'PVM.' 22*'' output file name (if any) 23*6 device out 24*'L' define Lower or Upper 25*9 number of problem sizes 26*1 5 17 28 37 121 200 1023 2048 3073 values of N 27*6 number of bandwidths 28*1 2 4 10 31 64 values of BW 29*1 number of NB's 30*-1 3 4 5 values of NB (-1 for automatic choice) 31*1 number of NRHS's (must be 1) 32*8 values of NRHS 33*1 number of NBRHS's (ignored) 34*1 values of NBRHS (ignored) 35*6 number of process grids 36*1 2 3 4 5 7 8 15 26 47 64 values of "Number of Process Columns" 37*3.0 threshold 38* 39* Internal Parameters 40* =================== 41* 42* TOTMEM INTEGER, default = 6200000. 43* TOTMEM is a machine-specific parameter indicating the 44* maximum amount of available memory in bytes. 45* The user should customize TOTMEM to his platform. Remember 46* to leave room in memory for the operating system, the BLACS 47* buffer, etc. For example, on a system with 8 MB of memory 48* per process (e.g., one processor on an Intel iPSC/860), the 49* parameters we use are TOTMEM=6200000 (leaving 1.8 MB for OS, 50* code, BLACS buffer, etc). However, for PVM, we usually set 51* TOTMEM = 2000000. Some experimenting with the maximum value 52* of TOTMEM may be required. 53* 54* INTGSZ INTEGER, default = 4 bytes. 55* ZPLXSZ INTEGER, default = 16 bytes. 56* INTGSZ and ZPLXSZ indicate the length in bytes on the 57* given platform for an integer and a double precision 58* complex. 59* MEM COMPLEX*16 array, dimension ( TOTMEM/ZPLXSZ ) 60* All arrays used by ScaLAPACK routines are allocated from 61* this array and referenced by pointers. The integer IPB, 62* for example, is a pointer to the starting element of MEM for 63* the solution vector(s) B. 64* 65* ===================================================================== 66* 67* Code Developer: Andrew J. Cleary, University of Tennessee. 68* Current address: Lawrence Livermore National Labs. 69* This version released: August, 2001. 70* 71* ===================================================================== 72* 73* .. Parameters .. 74 INTEGER TOTMEM 75 PARAMETER ( TOTMEM = 3000000 ) 76 INTEGER BLOCK_CYCLIC_2D, CSRC_, CTXT_, DLEN_, DTYPE_, 77 $ LLD_, MB_, M_, NB_, N_, RSRC_ 78 PARAMETER ( BLOCK_CYCLIC_2D = 1, DLEN_ = 9, DTYPE_ = 1, 79 $ CTXT_ = 2, M_ = 3, N_ = 4, MB_ = 5, NB_ = 6, 80 $ RSRC_ = 7, CSRC_ = 8, LLD_ = 9 ) 81* 82 DOUBLE PRECISION ZERO 83 INTEGER MEMSIZ, NTESTS, ZPLXSZ 84 COMPLEX*16 PADVAL 85 PARAMETER ( ZPLXSZ = 16, 86 $ MEMSIZ = TOTMEM / ZPLXSZ, NTESTS = 20, 87 $ PADVAL = ( -9923.0D+0, -9923.0D+0 ), 88 $ ZERO = 0.0D+0 ) 89 INTEGER INT_ONE 90 PARAMETER ( INT_ONE = 1 ) 91* .. 92* .. Local Scalars .. 93 LOGICAL CHECK 94 CHARACTER TRANS 95 CHARACTER*6 PASSED 96 CHARACTER*80 OUTFILE 97 INTEGER BWL, BWU, BW_NUM, FILLIN_SIZE, FREE_PTR, H, HH, 98 $ I, IAM, IASEED, IBSEED, ICTXT, ICTXTB, 99 $ IERR_TEMP, IMIDPAD, INFO, IPA, IPB, IPOSTPAD, 100 $ IPREPAD, IPW, IPW_SIZE, IPW_SOLVE, 101 $ IPW_SOLVE_SIZE, IP_DRIVER_W, IP_FILLIN, J, K, 102 $ KFAIL, KPASS, KSKIP, KTESTS, MYCOL, MYRHS_SIZE, 103 $ MYROW, N, NB, NBW, NGRIDS, NMAT, NNB, NNBR, 104 $ NNR, NOUT, NP, NPCOL, NPROCS, NPROCS_REAL, 105 $ NPROW, NQ, NRHS, N_FIRST, N_LAST, WORKSIZ 106 REAL THRESH 107 DOUBLE PRECISION ANORM, NOPS, NOPS2, SRESID, TMFLOPS, 108 $ TMFLOPS2 109* .. 110* .. Local Arrays .. 111 INTEGER BWLVAL( NTESTS ), BWUVAL( NTESTS ), DESCA( 7 ), 112 $ DESCA2D( DLEN_ ), DESCB( 7 ), DESCB2D( DLEN_ ), 113 $ IERR( 1 ), NBRVAL( NTESTS ), NBVAL( NTESTS ), 114 $ NRVAL( NTESTS ), NVAL( NTESTS ), 115 $ PVAL( NTESTS ), QVAL( NTESTS ) 116 DOUBLE PRECISION CTIME( 2 ), WTIME( 2 ) 117 COMPLEX*16 MEM( MEMSIZ ) 118* .. 119* .. External Subroutines .. 120 EXTERNAL BLACS_BARRIER, BLACS_EXIT, BLACS_GET, 121 $ BLACS_GRIDEXIT, BLACS_GRIDINFO, BLACS_GRIDINIT, 122 $ BLACS_PINFO, DESCINIT, IGSUM2D, PZBMATGEN, 123 $ PZCHEKPAD, PZDBINFO, PZDBLASCHK, PZDBTRF, 124 $ PZDBTRS, PZFILLPAD, PZMATGEN, SLBOOT, 125 $ SLCOMBINE, SLTIMER 126* .. 127* .. External Functions .. 128 INTEGER NUMROC 129 LOGICAL LSAME 130 DOUBLE PRECISION PZLANGE 131 EXTERNAL LSAME, NUMROC, PZLANGE 132* .. 133* .. Intrinsic Functions .. 134 INTRINSIC DBLE, MAX, MIN, MOD 135* .. 136* .. Data Statements .. 137 DATA KFAIL, KPASS, KSKIP, KTESTS / 4*0 / 138* .. 139* 140* 141* 142* .. Executable Statements .. 143* 144* Get starting information 145* 146 CALL BLACS_PINFO( IAM, NPROCS ) 147 IASEED = 100 148 IBSEED = 200 149* 150 CALL PZDBINFO( OUTFILE, NOUT, TRANS, NMAT, NVAL, NTESTS, NBW, 151 $ BWLVAL, BWUVAL, NTESTS, NNB, NBVAL, NTESTS, NNR, 152 $ NRVAL, NTESTS, NNBR, NBRVAL, NTESTS, NGRIDS, PVAL, 153 $ NTESTS, QVAL, NTESTS, THRESH, MEM, IAM, NPROCS ) 154* 155 CHECK = ( THRESH.GE.0.0D+0 ) 156* 157* Print headings 158* 159 IF( IAM.EQ.0 ) THEN 160 WRITE( NOUT, FMT = * ) 161 WRITE( NOUT, FMT = 9995 ) 162 WRITE( NOUT, FMT = 9994 ) 163 WRITE( NOUT, FMT = * ) 164 END IF 165* 166* Loop over different process grids 167* 168 DO 60 I = 1, NGRIDS 169* 170 NPROW = PVAL( I ) 171 NPCOL = QVAL( I ) 172* 173* Make sure grid information is correct 174* 175 IERR( 1 ) = 0 176 IF( NPROW.LT.1 ) THEN 177 IF( IAM.EQ.0 ) 178 $ WRITE( NOUT, FMT = 9999 ) 'GRID', 'nprow', NPROW 179 IERR( 1 ) = 1 180 ELSE IF( NPCOL.LT.1 ) THEN 181 IF( IAM.EQ.0 ) 182 $ WRITE( NOUT, FMT = 9999 ) 'GRID', 'npcol', NPCOL 183 IERR( 1 ) = 1 184 ELSE IF( NPROW*NPCOL.GT.NPROCS ) THEN 185 IF( IAM.EQ.0 ) 186 $ WRITE( NOUT, FMT = 9998 ) NPROW*NPCOL, NPROCS 187 IERR( 1 ) = 1 188 END IF 189* 190 IF( IERR( 1 ).GT.0 ) THEN 191 IF( IAM.EQ.0 ) 192 $ WRITE( NOUT, FMT = 9997 ) 'grid' 193 KSKIP = KSKIP + 1 194 GO TO 50 195 END IF 196* 197* Define process grid 198* 199 CALL BLACS_GET( -1, 0, ICTXT ) 200 CALL BLACS_GRIDINIT( ICTXT, 'Row-major', NPROW, NPCOL ) 201* 202* 203* Define transpose process grid 204* 205 CALL BLACS_GET( -1, 0, ICTXTB ) 206 CALL BLACS_GRIDINIT( ICTXTB, 'Column-major', NPCOL, NPROW ) 207* 208* Go to bottom of process grid loop if this case doesn't use my 209* process 210* 211 CALL BLACS_GRIDINFO( ICTXT, NPROW, NPCOL, MYROW, MYCOL ) 212* 213 IF( MYROW.LT.0 .OR. MYCOL.LT.0 ) THEN 214 GO TO 50 215 ENDIF 216* 217 DO 40 J = 1, NMAT 218* 219 IERR( 1 ) = 0 220* 221 N = NVAL( J ) 222* 223* Make sure matrix information is correct 224* 225 IF( N.LT.1 ) THEN 226 IF( IAM.EQ.0 ) 227 $ WRITE( NOUT, FMT = 9999 ) 'MATRIX', 'N', N 228 IERR( 1 ) = 1 229 END IF 230* 231* Check all processes for an error 232* 233 CALL IGSUM2D( ICTXT, 'All', ' ', 1, 1, IERR, 1, 234 $ -1, 0 ) 235* 236 IF( IERR( 1 ).GT.0 ) THEN 237 IF( IAM.EQ.0 ) 238 $ WRITE( NOUT, FMT = 9997 ) 'size' 239 KSKIP = KSKIP + 1 240 GO TO 40 241 END IF 242* 243* 244 DO 45 BW_NUM = 1, NBW 245* 246 IERR( 1 ) = 0 247* 248 BWL = BWLVAL( BW_NUM ) 249 IF( BWL.LT.1 ) THEN 250 IF( IAM.EQ.0 ) 251 $ WRITE( NOUT, FMT = 9999 ) 'Lower Band', 'bwl', BWL 252 IERR( 1 ) = 1 253 END IF 254* 255 BWU = BWUVAL( BW_NUM ) 256 IF( BWU.LT.1 ) THEN 257 IF( IAM.EQ.0 ) 258 $ WRITE( NOUT, FMT = 9999 ) 'Upper Band', 'bwu', BWU 259 IERR( 1 ) = 1 260 END IF 261* 262 IF( BWL.GT.N-1 ) THEN 263 IF( IAM.EQ.0 ) THEN 264 IERR( 1 ) = 1 265 ENDIF 266 END IF 267* 268 IF( BWU.GT.N-1 ) THEN 269 IF( IAM.EQ.0 ) THEN 270 IERR( 1 ) = 1 271 ENDIF 272 END IF 273* 274* Check all processes for an error 275* 276 CALL IGSUM2D( ICTXT, 'All', ' ', 1, 1, IERR, 1, 277 $ -1, 0 ) 278* 279 IF( IERR( 1 ).GT.0 ) THEN 280 KSKIP = KSKIP + 1 281 GO TO 45 282 END IF 283* 284 DO 30 K = 1, NNB 285* 286 IERR( 1 ) = 0 287* 288 NB = NBVAL( K ) 289 IF( NB.LT.0 ) THEN 290 NB =( (N-(NPCOL-1)*MAX(BWL,BWU)-1)/NPCOL + 1 ) 291 $ + MAX(BWL,BWU) 292 NB = MAX( NB, 2*MAX(BWL,BWU) ) 293 NB = MIN( N, NB ) 294 END IF 295* 296* Make sure NB is legal 297* 298 IERR( 1 ) = 0 299 IF( NB.LT.MIN( 2*MAX(BWL,BWU), N ) ) THEN 300 IERR( 1 ) = 1 301 END IF 302* 303* Check all processes for an error 304* 305 CALL IGSUM2D( ICTXT, 'All', ' ', 1, 1, IERR, 1, 306 $ -1, 0 ) 307* 308 IF( IERR( 1 ).GT.0 ) THEN 309 KSKIP = KSKIP + 1 310 GO TO 30 311 END IF 312* 313* Padding constants 314* 315 NP = NUMROC( (BWL+BWU+1), (BWL+BWU+1), 316 $ MYROW, 0, NPROW ) 317 NQ = NUMROC( N, NB, MYCOL, 0, NPCOL ) 318* 319 IF( CHECK ) THEN 320 IPREPAD = ((BWL+BWU+1)+10) 321 IMIDPAD = 10 322 IPOSTPAD = ((BWL+BWU+1)+10) 323 ELSE 324 IPREPAD = 0 325 IMIDPAD = 0 326 IPOSTPAD = 0 327 END IF 328* 329* Initialize the array descriptor for the matrix A 330* 331 CALL DESCINIT( DESCA2D, (BWL+BWU+1), N, 332 $ (BWL+BWU+1), NB, 0, 0, 333 $ ICTXT,((BWL+BWU+1)+10), IERR( 1 ) ) 334* 335* Convert this to 1D descriptor 336* 337 DESCA( 1 ) = 501 338 DESCA( 3 ) = N 339 DESCA( 4 ) = NB 340 DESCA( 5 ) = 0 341 DESCA( 2 ) = ICTXT 342 DESCA( 6 ) = ((BWL+BWU+1)+10) 343 DESCA( 7 ) = 0 344* 345 IERR_TEMP = IERR( 1 ) 346 IERR( 1 ) = 0 347 IERR( 1 ) = MIN( IERR( 1 ), IERR_TEMP ) 348* 349* Check all processes for an error 350* 351 CALL IGSUM2D( ICTXT, 'All', ' ', 1, 1, IERR, 1, -1, 0 ) 352* 353 IF( IERR( 1 ).LT.0 ) THEN 354 IF( IAM.EQ.0 ) 355 $ WRITE( NOUT, FMT = 9997 ) 'descriptor' 356 KSKIP = KSKIP + 1 357 GO TO 30 358 END IF 359* 360* Assign pointers into MEM for SCALAPACK arrays, A is 361* allocated starting at position MEM( IPREPAD+1 ) 362* 363 FREE_PTR = 1 364 IPB = 0 365* 366* Save room for prepadding 367 FREE_PTR = FREE_PTR + IPREPAD 368* 369 IPA = FREE_PTR 370 FREE_PTR = FREE_PTR + DESCA2D( LLD_ )* 371 $ DESCA2D( NB_ ) 372 $ + IPOSTPAD 373* 374* Add memory for fillin 375* Fillin space needs to store: 376* Fillin spike: 377* Contribution to previous proc's diagonal block of 378* reduced system: 379* Off-diagonal block of reduced system: 380* Diagonal block of reduced system: 381* 382 FILLIN_SIZE = 383 $ NB*(BWL+BWU)+6*MAX(BWL,BWU)*MAX(BWL,BWU) 384* 385* Claim memory for fillin 386* 387 FREE_PTR = FREE_PTR + IPREPAD 388 IP_FILLIN = FREE_PTR 389 FREE_PTR = FREE_PTR + FILLIN_SIZE 390* 391* Workspace needed by computational routines: 392* 393 IPW_SIZE = 0 394* 395* factorization: 396* 397 IPW_SIZE = MAX(BWL,BWU)*MAX(BWL,BWU) 398* 399* Claim memory for IPW 400* 401 IPW = FREE_PTR 402 FREE_PTR = FREE_PTR + IPW_SIZE 403* 404* Check for adequate memory for problem size 405* 406 IERR( 1 ) = 0 407 IF( FREE_PTR.GT.MEMSIZ ) THEN 408 IF( IAM.EQ.0 ) 409 $ WRITE( NOUT, FMT = 9996 ) 410 $ 'divide and conquer factorization', 411 $ (FREE_PTR )*ZPLXSZ 412 IERR( 1 ) = 1 413 END IF 414* 415* Check all processes for an error 416* 417 CALL IGSUM2D( ICTXT, 'All', ' ', 1, 1, IERR, 418 $ 1, -1, 0 ) 419* 420 IF( IERR( 1 ).GT.0 ) THEN 421 IF( IAM.EQ.0 ) 422 $ WRITE( NOUT, FMT = 9997 ) 'MEMORY' 423 KSKIP = KSKIP + 1 424 GO TO 30 425 END IF 426* 427* Worksize needed for LAPRNT 428 WORKSIZ = MAX( ((BWL+BWU+1)+10), NB ) 429* 430 IF( CHECK ) THEN 431* 432* Calculate the amount of workspace required by 433* the checking routines. 434* 435* PZLANGE 436 WORKSIZ = MAX( WORKSIZ, DESCA2D( NB_ ) ) 437* 438* PZDBLASCHK 439 WORKSIZ = MAX( WORKSIZ, 440 $ MAX(5,MAX(MAX(BWL,BWU)*(MAX(BWL,BWU)+2),NB))+2*NB ) 441 END IF 442* 443 FREE_PTR = FREE_PTR + IPREPAD 444 IP_DRIVER_W = FREE_PTR 445 FREE_PTR = FREE_PTR + WORKSIZ + IPOSTPAD 446* 447* 448* Check for adequate memory for problem size 449* 450 IERR( 1 ) = 0 451 IF( FREE_PTR.GT.MEMSIZ ) THEN 452 IF( IAM.EQ.0 ) 453 $ WRITE( NOUT, FMT = 9996 ) 'factorization', 454 $ ( FREE_PTR )*ZPLXSZ 455 IERR( 1 ) = 1 456 END IF 457* 458* Check all processes for an error 459* 460 CALL IGSUM2D( ICTXT, 'All', ' ', 1, 1, IERR, 461 $ 1, -1, 0 ) 462* 463 IF( IERR( 1 ).GT.0 ) THEN 464 IF( IAM.EQ.0 ) 465 $ WRITE( NOUT, FMT = 9997 ) 'MEMORY' 466 KSKIP = KSKIP + 1 467 GO TO 30 468 END IF 469* 470 CALL PZBMATGEN( ICTXT, 'G', 'D', BWL, BWU, N, 471 $ (BWL+BWU+1), NB, MEM( IPA ), 472 $ ((BWL+BWU+1)+10), 0, 0, IASEED, MYROW, 473 $ MYCOL, NPROW, NPCOL ) 474* 475 CALL PZFILLPAD( ICTXT, NP, NQ, MEM( IPA-IPREPAD ), 476 $ ((BWL+BWU+1)+10), IPREPAD, IPOSTPAD, 477 $ PADVAL ) 478* 479 CALL PZFILLPAD( ICTXT, WORKSIZ, 1, 480 $ MEM( IP_DRIVER_W-IPREPAD ), WORKSIZ, 481 $ IPREPAD, IPOSTPAD, PADVAL ) 482* 483* Calculate norm of A for residual error-checking 484* 485 IF( CHECK ) THEN 486* 487 ANORM = PZLANGE( '1', (BWL+BWU+1), 488 $ N, MEM( IPA ), 1, 1, 489 $ DESCA2D, MEM( IP_DRIVER_W ) ) 490 CALL PZCHEKPAD( ICTXT, 'PZLANGE', NP, NQ, 491 $ MEM( IPA-IPREPAD ), ((BWL+BWU+1)+10), 492 $ IPREPAD, IPOSTPAD, PADVAL ) 493 CALL PZCHEKPAD( ICTXT, 'PZLANGE', 494 $ WORKSIZ, 1, 495 $ MEM( IP_DRIVER_W-IPREPAD ), WORKSIZ, 496 $ IPREPAD, IPOSTPAD, PADVAL ) 497 END IF 498* 499* 500 CALL SLBOOT() 501 CALL BLACS_BARRIER( ICTXT, 'All' ) 502* 503* Perform factorization 504* 505 CALL SLTIMER( 1 ) 506* 507 CALL PZDBTRF( N, BWL, BWU, MEM( IPA ), 1, DESCA, 508 $ MEM( IP_FILLIN ), FILLIN_SIZE, MEM( IPW ), 509 $ IPW_SIZE, INFO ) 510* 511 CALL SLTIMER( 1 ) 512* 513 IF( INFO.NE.0 ) THEN 514 IF( IAM.EQ.0 ) THEN 515 WRITE( NOUT, FMT = * ) 'PZDBTRF INFO=', INFO 516 ENDIF 517 KFAIL = KFAIL + 1 518 GO TO 30 519 END IF 520* 521 IF( CHECK ) THEN 522* 523* Check for memory overwrite in factorization 524* 525 CALL PZCHEKPAD( ICTXT, 'PZDBTRF', NP, 526 $ NQ, MEM( IPA-IPREPAD ), ((BWL+BWU+1)+10), 527 $ IPREPAD, IPOSTPAD, PADVAL ) 528 END IF 529* 530* 531* Loop over the different values for NRHS 532* 533 DO 20 HH = 1, NNR 534* 535 IERR( 1 ) = 0 536* 537 NRHS = NRVAL( HH ) 538* 539* Initialize Array Descriptor for rhs 540* 541 CALL DESCINIT( DESCB2D, N, NRHS, NB, 1, 0, 0, 542 $ ICTXTB, NB+10, IERR( 1 ) ) 543* 544* Convert this to 1D descriptor 545* 546 DESCB( 1 ) = 502 547 DESCB( 3 ) = N 548 DESCB( 4 ) = NB 549 DESCB( 5 ) = 0 550 DESCB( 2 ) = ICTXT 551 DESCB( 6 ) = DESCB2D( LLD_ ) 552 DESCB( 7 ) = 0 553* 554* reset free_ptr to reuse space for right hand sides 555* 556 IF( IPB .GT. 0 ) THEN 557 FREE_PTR = IPB 558 ENDIF 559* 560 FREE_PTR = FREE_PTR + IPREPAD 561 IPB = FREE_PTR 562 FREE_PTR = FREE_PTR + NRHS*DESCB2D( LLD_ ) 563 $ + IPOSTPAD 564* 565* Allocate workspace for workspace in TRS routine: 566* 567 IPW_SOLVE_SIZE = (MAX(BWL,BWU)*NRHS) 568* 569 IPW_SOLVE = FREE_PTR 570 FREE_PTR = FREE_PTR + IPW_SOLVE_SIZE 571* 572 IERR( 1 ) = 0 573 IF( FREE_PTR.GT.MEMSIZ ) THEN 574 IF( IAM.EQ.0 ) 575 $ WRITE( NOUT, FMT = 9996 )'solve', 576 $ ( FREE_PTR )*ZPLXSZ 577 IERR( 1 ) = 1 578 END IF 579* 580* Check all processes for an error 581* 582 CALL IGSUM2D( ICTXT, 'All', ' ', 1, 1, 583 $ IERR, 1, -1, 0 ) 584* 585 IF( IERR( 1 ).GT.0 ) THEN 586 IF( IAM.EQ.0 ) 587 $ WRITE( NOUT, FMT = 9997 ) 'MEMORY' 588 KSKIP = KSKIP + 1 589 GO TO 15 590 END IF 591* 592 MYRHS_SIZE = NUMROC( N, NB, MYCOL, 0, NPCOL ) 593* 594* Generate RHS 595* 596 CALL PZMATGEN(ICTXTB, 'No', 'No', 597 $ DESCB2D( M_ ), DESCB2D( N_ ), 598 $ DESCB2D( MB_ ), DESCB2D( NB_ ), 599 $ MEM( IPB ), 600 $ DESCB2D( LLD_ ), DESCB2D( RSRC_ ), 601 $ DESCB2D( CSRC_ ), 602 $ IBSEED, 0, MYRHS_SIZE, 0, NRHS, MYCOL, 603 $ MYROW, NPCOL, NPROW ) 604* 605 IF( CHECK ) THEN 606 CALL PZFILLPAD( ICTXTB, NB, NRHS, 607 $ MEM( IPB-IPREPAD ), 608 $ DESCB2D( LLD_ ), 609 $ IPREPAD, IPOSTPAD, 610 $ PADVAL ) 611 CALL PZFILLPAD( ICTXT, WORKSIZ, 1, 612 $ MEM( IP_DRIVER_W-IPREPAD ), 613 $ WORKSIZ, IPREPAD, 614 $ IPOSTPAD, PADVAL ) 615 END IF 616* 617* 618 CALL BLACS_BARRIER( ICTXT, 'All') 619 CALL SLTIMER( 2 ) 620* 621* Solve linear system via factorization 622* 623 CALL PZDBTRS( TRANS, N, BWL, BWU, NRHS, MEM( IPA ), 624 $ 1, DESCA, MEM( IPB ), 1, DESCB, 625 $ MEM( IP_FILLIN ), FILLIN_SIZE, 626 $ MEM( IPW_SOLVE ), IPW_SOLVE_SIZE, 627 $ INFO ) 628* 629 CALL SLTIMER( 2 ) 630* 631 IF( INFO.NE.0 ) THEN 632 IF( IAM.EQ.0 ) 633 $ WRITE( NOUT, FMT = * ) 'PZDBTRS INFO=', INFO 634 KFAIL = KFAIL + 1 635 PASSED = 'FAILED' 636 GO TO 20 637 END IF 638* 639 IF( CHECK ) THEN 640* 641* check for memory overwrite 642* 643 CALL PZCHEKPAD( ICTXT, 'PZDBTRS-work', 644 $ WORKSIZ, 1, 645 $ MEM( IP_DRIVER_W-IPREPAD ), 646 $ WORKSIZ, IPREPAD, 647 $ IPOSTPAD, PADVAL ) 648* 649* check the solution to rhs 650* 651 SRESID = ZERO 652* 653 CALL PZDBLASCHK( 'N', 'D', TRANS, 654 $ N, BWL, BWU, NRHS, 655 $ MEM( IPB ), 1, 1, DESCB2D, 656 $ IASEED, MEM( IPA ), 1, 1, DESCA2D, 657 $ IBSEED, ANORM, SRESID, 658 $ MEM( IP_DRIVER_W ), WORKSIZ ) 659* 660 IF( IAM.EQ.0 ) THEN 661 IF( SRESID.GT.THRESH ) 662 $ WRITE( NOUT, FMT = 9985 ) SRESID 663 END IF 664* 665* The second test is a NaN trap 666* 667 IF( ( SRESID.LE.THRESH ).AND. 668 $ ( (SRESID-SRESID).EQ.0.0D+0 ) ) THEN 669 KPASS = KPASS + 1 670 PASSED = 'PASSED' 671 ELSE 672 KFAIL = KFAIL + 1 673 PASSED = 'FAILED' 674 END IF 675* 676 END IF 677* 678 15 CONTINUE 679* Skipped tests jump to here to print out "SKIPPED" 680* 681* Gather maximum of all CPU and WALL clock timings 682* 683 CALL SLCOMBINE( ICTXT, 'All', '>', 'W', 2, 1, 684 $ WTIME ) 685 CALL SLCOMBINE( ICTXT, 'All', '>', 'C', 2, 1, 686 $ CTIME ) 687* 688* Print results 689* 690 IF( MYROW.EQ.0 .AND. MYCOL.EQ.0 ) THEN 691* 692 NOPS = 0 693 NOPS2 = 0 694* 695 N_FIRST = NB 696 NPROCS_REAL = ( N-1 )/NB + 1 697 N_LAST = MOD( N-1, NB ) + 1 698* 699* 2 N bwl bwu + N (bwl) flops 700* for LU factorization 701* 702 NOPS = 2*(DBLE(N)*DBLE(BWL)* 703 $ DBLE(BWU)) + 704 $ (DBLE(N)*DBLE(BWL)) 705* 706* nrhs * 2 N*(bwl+bwu) flops for LU solve. 707* 708 NOPS = NOPS + 709 $ 2 * (DBLE(N)*(DBLE(BWL)+DBLE(BWU)) 710 $ *DBLE(NRHS)) 711* 712* Multiply by 4 to get complex count 713* 714 NOPS = NOPS * DBLE(4) 715* 716* Second calc to represent actual hardware speed 717* 718* 2*N_FIRST bwl*bwu Flops for LU 719* factorization in proc 1 720* 721 NOPS2 = 2*( (DBLE(N_FIRST)* 722 $ DBLE(BWL)*DBLE(BWU))) 723* 724 IF ( NPROCS_REAL .GT. 1) THEN 725* 8 N_LAST bwl*bwu 726* flops for LU and spike 727* calc in last processor 728* 729 NOPS2 = NOPS2 + 730 $ 8*( (DBLE(N_LAST)*DBLE(BWL) 731 $ *DBLE(BWU)) ) 732 ENDIF 733* 734 IF ( NPROCS_REAL .GT. 2) THEN 735* 8 NB bwl*bwu flops for LU and spike 736* calc in other processors 737* 738 NOPS2 = NOPS2 + (NPROCS_REAL-2)* 739 $ 8*( (DBLE(NB)*DBLE(BWL) 740 $ *DBLE(BWU)) ) 741 ENDIF 742* 743* Reduced system 744* 745 NOPS2 = NOPS2 + 746 $ 2*( NPROCS_REAL-1 ) * 747 $ ( BWL*BWU*BWL/3 ) 748 IF( NPROCS_REAL .GT. 1 ) THEN 749 NOPS2 = NOPS2 + 750 $ 2*( NPROCS_REAL-2 ) * 751 $ (2*BWL*BWU*BWL) 752 ENDIF 753* 754* Solve stage 755* 756* nrhs*2 n_first* 757* (bwl+bwu) 758* flops for L,U solve in proc 1. 759* 760 NOPS2 = NOPS2 + 761 $ 2* 762 $ DBLE(N_FIRST)* 763 $ DBLE(NRHS) * 764 $ ( DBLE(BWL)+DBLE(BWU)) 765* 766 IF ( NPROCS_REAL .GT. 1 ) THEN 767* 768* 2*nrhs*2 n_last 769* (bwl+bwu) 770* flops for LU solve in other procs 771* 772 NOPS2 = NOPS2 + 773 $ 4* 774 $ (DBLE(N_LAST)*(DBLE(BWL)+ 775 $ DBLE(BWU)))*DBLE(NRHS) 776 ENDIF 777* 778 IF ( NPROCS_REAL .GT. 2 ) THEN 779* 780* 2*nrhs*2 NB 781* (bwl+bwu) 782* flops for LU solve in other procs 783* 784 NOPS2 = NOPS2 + 785 $ ( NPROCS_REAL-2)*2* 786 $ ( (DBLE(NB)*(DBLE(BWL)+ 787 $ DBLE(BWU)))*DBLE(NRHS) ) 788 ENDIF 789* 790* Reduced system 791* 792 NOPS2 = NOPS2 + 793 $ NRHS*( NPROCS_REAL-1)*2*(BWL*BWU ) 794 IF( NPROCS_REAL .GT. 1 ) THEN 795 NOPS2 = NOPS2 + 796 $ NRHS*( NPROCS_REAL-2 ) * 797 $ ( 6 * BWL*BWU ) 798 ENDIF 799* 800* 801* Multiply by 4 to get complex count 802* 803 NOPS2 = NOPS2 * DBLE(4) 804* 805* Calculate total megaflops - factorization and/or 806* solve -- for WALL and CPU time, and print output 807* 808* Print WALL time if machine supports it 809* 810 IF( WTIME( 1 ) + WTIME( 2 ) .GT. 0.0D+0 ) THEN 811 TMFLOPS = NOPS / 812 $ ( ( WTIME( 1 )+WTIME( 2 ) ) * 1.0D+6 ) 813 ELSE 814 TMFLOPS = 0.0D+0 815 END IF 816* 817 IF( WTIME( 1 )+WTIME( 2 ).GT.0.0D+0 ) THEN 818 TMFLOPS2 = NOPS2 / 819 $ ( ( WTIME( 1 )+WTIME( 2 ) ) * 1.0D+6 ) 820 ELSE 821 TMFLOPS2 = 0.0D+0 822 END IF 823* 824 IF( WTIME( 2 ).GE.0.0D+0 ) 825 $ WRITE( NOUT, FMT = 9993 ) 'WALL', TRANS, 826 $ N, 827 $ BWL, BWU, 828 $ NB, NRHS, NPROW, NPCOL, 829 $ WTIME( 1 ), WTIME( 2 ), TMFLOPS, 830 $ TMFLOPS2, PASSED 831* 832* Print CPU time if machine supports it 833* 834 IF( CTIME( 1 )+CTIME( 2 ).GT.0.0D+0 ) THEN 835 TMFLOPS = NOPS / 836 $ ( ( CTIME( 1 )+CTIME( 2 ) ) * 1.0D+6 ) 837 ELSE 838 TMFLOPS = 0.0D+0 839 END IF 840* 841 IF( CTIME( 1 )+CTIME( 2 ).GT.0.0D+0 ) THEN 842 TMFLOPS2 = NOPS2 / 843 $ ( ( CTIME( 1 )+CTIME( 2 ) ) * 1.0D+6 ) 844 ELSE 845 TMFLOPS2 = 0.0D+0 846 END IF 847* 848 IF( CTIME( 2 ).GE.0.0D+0 ) 849 $ WRITE( NOUT, FMT = 9993 ) 'CPU ', TRANS, 850 $ N, 851 $ BWL, BWU, 852 $ NB, NRHS, NPROW, NPCOL, 853 $ CTIME( 1 ), CTIME( 2 ), TMFLOPS, 854 $ TMFLOPS2, PASSED 855* 856 END IF 857 20 CONTINUE 858* 859* 860 30 CONTINUE 861* NNB loop 862* 863 45 CONTINUE 864* BW[] loop 865* 866 40 CONTINUE 867* NMAT loop 868* 869 CALL BLACS_GRIDEXIT( ICTXT ) 870 CALL BLACS_GRIDEXIT( ICTXTB ) 871* 872 50 CONTINUE 873* NGRIDS DROPOUT 874 60 CONTINUE 875* NGRIDS loop 876* 877* Print ending messages and close output file 878* 879 IF( IAM.EQ.0 ) THEN 880 KTESTS = KPASS + KFAIL + KSKIP 881 WRITE( NOUT, FMT = * ) 882 WRITE( NOUT, FMT = 9992 ) KTESTS 883 IF( CHECK ) THEN 884 WRITE( NOUT, FMT = 9991 ) KPASS 885 WRITE( NOUT, FMT = 9989 ) KFAIL 886 ELSE 887 WRITE( NOUT, FMT = 9990 ) KPASS 888 END IF 889 WRITE( NOUT, FMT = 9988 ) KSKIP 890 WRITE( NOUT, FMT = * ) 891 WRITE( NOUT, FMT = * ) 892 WRITE( NOUT, FMT = 9987 ) 893 IF( NOUT.NE.6 .AND. NOUT.NE.0 ) 894 $ CLOSE ( NOUT ) 895 END IF 896* 897 CALL BLACS_EXIT( 0 ) 898* 899 9999 FORMAT( 'ILLEGAL ', A6, ': ', A5, ' = ', I3, 900 $ '; It should be at least 1' ) 901 9998 FORMAT( 'ILLEGAL GRID: nprow*npcol = ', I4, '. It can be at most', 902 $ I4 ) 903 9997 FORMAT( 'Bad ', A6, ' parameters: going on to next test case.' ) 904 9996 FORMAT( 'Unable to perform ', A, ': need TOTMEM of at least', 905 $ I11 ) 906 9995 FORMAT( 'TIME TR N BWL BWU NB NRHS P Q L*U Time ', 907 $ 'Slv Time MFLOPS MFLOP2 CHECK' ) 908 9994 FORMAT( '---- -- ------ --- --- ---- ----- ---- ---- -------- ', 909 $ '-------- -------- -------- ------' ) 910 9993 FORMAT( A4,1X,A1,2X,I6,1X,I3,1X,I3,1X,I4,1X,I5, 911 $ 1X,I4,1X,I4,1X,F9.3, 912 $ F9.4, F9.2, F9.2, 1X, A6 ) 913 9992 FORMAT( 'Finished ', I6, ' tests, with the following results:' ) 914 9991 FORMAT( I5, ' tests completed and passed residual checks.' ) 915 9990 FORMAT( I5, ' tests completed without checking.' ) 916 9989 FORMAT( I5, ' tests completed and failed residual checks.' ) 917 9988 FORMAT( I5, ' tests skipped because of illegal input values.' ) 918 9987 FORMAT( 'END OF TESTS.' ) 919 9986 FORMAT( '||A - ', A4, '|| / (||A|| * N * eps) = ', G25.7 ) 920 9985 FORMAT( '||Ax-b||/(||x||*||A||*eps*N) ', F25.7 ) 921* 922 STOP 923* 924* End of PZDBTRS_DRIVER 925* 926 END 927* 928