1 PROGRAM PSGBDRIVER 2* 3* 4* -- ScaLAPACK routine (version 1.7) -- 5* University of Tennessee, Knoxville, Oak Ridge National Laboratory, 6* and University of California, Berkeley. 7* November 15, 1997 8* 9* Purpose 10* ======= 11* 12* PSGBDRIVER is a test program for the 13* ScaLAPACK Band Cholesky routines corresponding to the options 14* indicated by SGB. This test driver performs an 15* A = L*U factorization 16* and solves a linear system with the factors for 1 or more RHS. 17* 18* The program must be driven by a short data file. 19* Here's an example file: 20*'ScaLAPACK, Version 1.2, banded linear systems input file' 21*'PVM.' 22*'' output file name (if any) 23*6 device out 24*'L' define Lower or Upper 25*9 number of problem sizes 26*1 5 17 28 37 121 200 1023 2048 3073 values of N 27*6 number of bandwidths 28*1 2 4 10 31 64 values of BW 29*1 number of NB's 30*-1 3 4 5 values of NB (-1 for automatic choice) 31*1 number of NRHS's (must be 1) 32*8 values of NRHS 33*1 number of NBRHS's (ignored) 34*1 values of NBRHS (ignored) 35*6 number of process grids 36*1 2 3 4 5 7 8 15 26 47 64 values of "Number of Process Columns" 37*3.0 threshold 38* 39* Internal Parameters 40* =================== 41* 42* TOTMEM INTEGER, default = 6200000. 43* TOTMEM is a machine-specific parameter indicating the 44* maximum amount of available memory in bytes. 45* The user should customize TOTMEM to his platform. Remember 46* to leave room in memory for the operating system, the BLACS 47* buffer, etc. For example, on a system with 8 MB of memory 48* per process (e.g., one processor on an Intel iPSC/860), the 49* parameters we use are TOTMEM=6200000 (leaving 1.8 MB for OS, 50* code, BLACS buffer, etc). However, for PVM, we usually set 51* TOTMEM = 2000000. Some experimenting with the maximum value 52* of TOTMEM may be required. 53* 54* INTMEM INTEGER, default = 2048. 55* INTMEM is the size of the integer workspace used in this 56* driver as input as the IPIV vector. It represents an 57* upper bound on NB, the blocksize of the data 58* distribution. 59* 60* INTGSZ INTEGER, default = 4 bytes. 61* REALSZ INTEGER, default = 4 bytes. 62* INTGSZ and REALSZ indicate the length in bytes on the 63* given platform for an integer and a single precision real. 64* MEM REAL array, dimension ( TOTMEM / REALSZ ) 65* All arrays used by ScaLAPACK routines are allocated from 66* this array and referenced by pointers. The integer IPB, 67* for example, is a pointer to the starting element of MEM for 68* the solution vector(s) B. 69* 70* ===================================================================== 71* 72* Code Developer: Andrew J. Cleary, University of Tennessee. 73* Current address: Lawrence Livermore National Labs. 74* This version released: August, 2001. 75* 76* ===================================================================== 77* 78* .. Parameters .. 79 INTEGER TOTMEM 80 PARAMETER ( TOTMEM = 3000000 ) 81 INTEGER INTMEM 82 PARAMETER ( INTMEM = 2048 ) 83 INTEGER BLOCK_CYCLIC_2D, CSRC_, CTXT_, DLEN_, DTYPE_, 84 $ LLD_, MB_, M_, NB_, N_, RSRC_ 85 PARAMETER ( BLOCK_CYCLIC_2D = 1, DLEN_ = 9, DTYPE_ = 1, 86 $ CTXT_ = 2, M_ = 3, N_ = 4, MB_ = 5, NB_ = 6, 87 $ RSRC_ = 7, CSRC_ = 8, LLD_ = 9 ) 88* 89 REAL ZERO 90 INTEGER MEMSIZ, NTESTS, REALSZ 91 REAL PADVAL 92 PARAMETER ( REALSZ = 4, 93 $ MEMSIZ = TOTMEM / REALSZ, NTESTS = 20, 94 $ PADVAL = -9923.0E+0, ZERO = 0.0E+0 ) 95 INTEGER INT_ONE 96 PARAMETER ( INT_ONE = 1 ) 97* .. 98* .. Local Scalars .. 99 LOGICAL CHECK 100 CHARACTER TRANS 101 CHARACTER*6 PASSED 102 CHARACTER*80 OUTFILE 103 INTEGER BWL, BWU, BW_NUM, FILLIN_SIZE, FREE_PTR, H, HH, 104 $ I, IAM, IASEED, IBSEED, ICTXT, ICTXTB, 105 $ IERR_TEMP, IMIDPAD, INFO, IPA, IPB, IPOSTPAD, 106 $ IPREPAD, IPW, IPW_SIZE, IPW_SOLVE, 107 $ IPW_SOLVE_SIZE, IP_DRIVER_W, IP_FILLIN, J, K, 108 $ KFAIL, KPASS, KSKIP, KTESTS, MYCOL, MYRHS_SIZE, 109 $ MYROW, N, NB, NBW, NGRIDS, NMAT, NNB, NNBR, 110 $ NNR, NOUT, NP, NPCOL, NPROCS, NPROCS_REAL, 111 $ NPROW, NQ, NRHS, N_FIRST, N_LAST, WORKSIZ 112 REAL ANORM, SRESID, THRESH 113 DOUBLE PRECISION NOPS, NOPS2, TMFLOPS, TMFLOPS2 114* .. 115* .. Local Arrays .. 116 INTEGER IPIV(INTMEM) 117 INTEGER BWLVAL( NTESTS ), BWUVAL( NTESTS ), DESCA( 7 ), 118 $ DESCA2D( DLEN_ ), DESCB( 7 ), DESCB2D( DLEN_ ), 119 $ IERR( 1 ), NBRVAL( NTESTS ), NBVAL( NTESTS ), 120 $ NRVAL( NTESTS ), NVAL( NTESTS ), 121 $ PVAL( NTESTS ), QVAL( NTESTS ) 122 REAL MEM( MEMSIZ ) 123 DOUBLE PRECISION CTIME( 2 ), WTIME( 2 ) 124* .. 125* .. External Subroutines .. 126 EXTERNAL BLACS_BARRIER, BLACS_EXIT, BLACS_GET, 127 $ BLACS_GRIDEXIT, BLACS_GRIDINFO, BLACS_GRIDINIT, 128 $ BLACS_PINFO, DESCINIT, IGSUM2D, PSBMATGEN, 129 $ PSCHEKPAD, PSDBLASCHK, PSFILLPAD, PSGBINFO, 130 $ PSGBTRF, PSGBTRS, PSMATGEN, SLBOOT, SLCOMBINE, 131 $ SLTIMER 132* .. 133* .. External Functions .. 134 INTEGER NUMROC 135 LOGICAL LSAME 136 REAL PSLANGE 137 EXTERNAL LSAME, NUMROC, PSLANGE 138* .. 139* .. Intrinsic Functions .. 140 INTRINSIC DBLE, MAX, MIN, MOD 141* .. 142* .. Data Statements .. 143 DATA KFAIL, KPASS, KSKIP, KTESTS / 4*0 / 144* .. 145* 146* 147* 148* .. Executable Statements .. 149* 150* Get starting information 151* 152 CALL BLACS_PINFO( IAM, NPROCS ) 153 IASEED = 100 154 IBSEED = 200 155* 156 CALL PSGBINFO( OUTFILE, NOUT, TRANS, NMAT, NVAL, NTESTS, NBW, 157 $ BWLVAL, BWUVAL, NTESTS, NNB, NBVAL, NTESTS, NNR, 158 $ NRVAL, NTESTS, NNBR, NBRVAL, NTESTS, NGRIDS, PVAL, 159 $ NTESTS, QVAL, NTESTS, THRESH, MEM, IAM, NPROCS ) 160* 161 CHECK = ( THRESH.GE.0.0E+0 ) 162* 163* Print headings 164* 165 IF( IAM.EQ.0 ) THEN 166 WRITE( NOUT, FMT = * ) 167 WRITE( NOUT, FMT = 9995 ) 168 WRITE( NOUT, FMT = 9994 ) 169 WRITE( NOUT, FMT = * ) 170 END IF 171* 172* Loop over different process grids 173* 174 DO 60 I = 1, NGRIDS 175* 176 NPROW = PVAL( I ) 177 NPCOL = QVAL( I ) 178* 179* Make sure grid information is correct 180* 181 IERR( 1 ) = 0 182 IF( NPROW.LT.1 ) THEN 183 IF( IAM.EQ.0 ) 184 $ WRITE( NOUT, FMT = 9999 ) 'GRID', 'nprow', NPROW 185 IERR( 1 ) = 1 186 ELSE IF( NPCOL.LT.1 ) THEN 187 IF( IAM.EQ.0 ) 188 $ WRITE( NOUT, FMT = 9999 ) 'GRID', 'npcol', NPCOL 189 IERR( 1 ) = 1 190 ELSE IF( NPROW*NPCOL.GT.NPROCS ) THEN 191 IF( IAM.EQ.0 ) 192 $ WRITE( NOUT, FMT = 9998 ) NPROW*NPCOL, NPROCS 193 IERR( 1 ) = 1 194 END IF 195* 196 IF( IERR( 1 ).GT.0 ) THEN 197 IF( IAM.EQ.0 ) 198 $ WRITE( NOUT, FMT = 9997 ) 'grid' 199 KSKIP = KSKIP + 1 200 GO TO 50 201 END IF 202* 203* Define process grid 204* 205 CALL BLACS_GET( -1, 0, ICTXT ) 206 CALL BLACS_GRIDINIT( ICTXT, 'Row-major', NPROW, NPCOL ) 207* 208* 209* Define transpose process grid 210* 211 CALL BLACS_GET( -1, 0, ICTXTB ) 212 CALL BLACS_GRIDINIT( ICTXTB, 'Column-major', NPCOL, NPROW ) 213* 214* Go to bottom of process grid loop if this case doesn't use my 215* process 216* 217 CALL BLACS_GRIDINFO( ICTXT, NPROW, NPCOL, MYROW, MYCOL ) 218* 219 IF( MYROW.LT.0 .OR. MYCOL.LT.0 ) THEN 220 GO TO 50 221 ENDIF 222* 223 DO 40 J = 1, NMAT 224* 225 IERR( 1 ) = 0 226* 227 N = NVAL( J ) 228* 229* Make sure matrix information is correct 230* 231 IF( N.LT.1 ) THEN 232 IF( IAM.EQ.0 ) 233 $ WRITE( NOUT, FMT = 9999 ) 'MATRIX', 'N', N 234 IERR( 1 ) = 1 235 END IF 236* 237* Check all processes for an error 238* 239 CALL IGSUM2D( ICTXT, 'All', ' ', 1, 1, IERR, 1, 240 $ -1, 0 ) 241* 242 IF( IERR( 1 ).GT.0 ) THEN 243 IF( IAM.EQ.0 ) 244 $ WRITE( NOUT, FMT = 9997 ) 'size' 245 KSKIP = KSKIP + 1 246 GO TO 40 247 END IF 248* 249* 250 DO 45 BW_NUM = 1, NBW 251* 252 IERR( 1 ) = 0 253* 254 BWL = BWLVAL( BW_NUM ) 255 IF( BWL.LT.1 ) THEN 256 IF( IAM.EQ.0 ) 257 $ WRITE( NOUT, FMT = 9999 ) 'Lower Band', 'bwl', BWL 258 IERR( 1 ) = 1 259 END IF 260* 261 BWU = BWUVAL( BW_NUM ) 262 IF( BWU.LT.1 ) THEN 263 IF( IAM.EQ.0 ) 264 $ WRITE( NOUT, FMT = 9999 ) 'Upper Band', 'bwu', BWU 265 IERR( 1 ) = 1 266 END IF 267* 268 IF( BWL.GT.N-1 ) THEN 269 IF( IAM.EQ.0 ) THEN 270 IERR( 1 ) = 1 271 ENDIF 272 END IF 273* 274 IF( BWU.GT.N-1 ) THEN 275 IF( IAM.EQ.0 ) THEN 276 IERR( 1 ) = 1 277 ENDIF 278 END IF 279* 280* Check all processes for an error 281* 282 CALL IGSUM2D( ICTXT, 'All', ' ', 1, 1, IERR, 1, 283 $ -1, 0 ) 284* 285 IF( IERR( 1 ).GT.0 ) THEN 286 KSKIP = KSKIP + 1 287 GO TO 45 288 END IF 289* 290 DO 30 K = 1, NNB 291* 292 IERR( 1 ) = 0 293* 294 NB = NBVAL( K ) 295 IF( NB.LT.0 ) THEN 296 NB =( (N-(NPCOL-1)*(BWL+BWU)-1)/NPCOL + 1 ) 297 $ + (BWL+BWU) 298 NB = MAX( NB, 2*(BWL+BWU) ) 299 NB = MIN( N, NB ) 300 END IF 301* 302* Make sure NB is legal 303* 304 IERR( 1 ) = 0 305* 306 IF( NB.GT.INTMEM ) THEN 307 IERR( 1 ) = 1 308 IF( IAM.EQ.0 ) THEN 309 WRITE( NOUT,* )'You have chosen an ' 310 $ ,'NB > INTMEM in the driver.' 311 WRITE(NOUT, *)'Please edit the driver ' 312 $ ,'and increase the value of INTMEM' 313 ENDIF 314 END IF 315* 316* Check all processes for an error 317* 318 CALL IGSUM2D( ICTXT, 'All', ' ', 1, 1, IERR, 1, 319 $ -1, 0 ) 320* 321 IF( IERR( 1 ).GT.0 ) THEN 322 KSKIP = KSKIP + 1 323 GO TO 30 324 END IF 325* 326* Padding constants 327* 328 NP = NUMROC( (2*BWL+2*BWU+1), (2*BWL+2*BWU+1), 329 $ MYROW, 0, NPROW ) 330 NQ = NUMROC( N, NB, MYCOL, 0, NPCOL ) 331* 332 IF( CHECK ) THEN 333 IPREPAD = ((2*BWL+2*BWU+1)+10) 334 IMIDPAD = 10 335 IPOSTPAD = ((2*BWL+2*BWU+1)+10) 336 ELSE 337 IPREPAD = 0 338 IMIDPAD = 0 339 IPOSTPAD = 0 340 END IF 341* 342* Initialize the array descriptor for the matrix A 343* 344 CALL DESCINIT( DESCA2D, (2*BWL+2*BWU+1), N, 345 $ (2*BWL+2*BWU+1), NB, 0, 0, 346 $ ICTXT,((2*BWL+2*BWU+1)+10), IERR( 1 ) ) 347* 348* Convert this to 1D descriptor 349* 350 DESCA( 1 ) = 501 351 DESCA( 3 ) = N 352 DESCA( 4 ) = NB 353 DESCA( 5 ) = 0 354 DESCA( 2 ) = ICTXT 355 DESCA( 6 ) = ((2*BWL+2*BWU+1)+10) 356 DESCA( 7 ) = 0 357* 358 IERR_TEMP = IERR( 1 ) 359 IERR( 1 ) = 0 360 IERR( 1 ) = MIN( IERR( 1 ), IERR_TEMP ) 361* 362* Check all processes for an error 363* 364 CALL IGSUM2D( ICTXT, 'All', ' ', 1, 1, IERR, 1, -1, 0 ) 365* 366 IF( IERR( 1 ).LT.0 ) THEN 367 IF( IAM.EQ.0 ) 368 $ WRITE( NOUT, FMT = 9997 ) 'descriptor' 369 KSKIP = KSKIP + 1 370 GO TO 30 371 END IF 372* 373* Assign pointers into MEM for SCALAPACK arrays, A is 374* allocated starting at position MEM( IPREPAD+1 ) 375* 376 FREE_PTR = 1 377 IPB = 0 378* 379* Save room for prepadding 380 FREE_PTR = FREE_PTR + IPREPAD 381* 382 IPA = FREE_PTR 383 FREE_PTR = FREE_PTR + DESCA2D( LLD_ )* 384 $ DESCA2D( NB_ ) 385 $ + IPOSTPAD 386* 387* Add memory for fillin 388* Fillin space needs to store: 389* Fillin spike: 390* Contribution to previous proc's diagonal block of 391* reduced system: 392* Off-diagonal block of reduced system: 393* Diagonal block of reduced system: 394* 395 FILLIN_SIZE = 396 $ (NB+BWU)*(BWL+BWU)+6*(BWL+BWU)*(BWL+2*BWU) 397* 398* Claim memory for fillin 399* 400 FREE_PTR = FREE_PTR + IPREPAD 401 IP_FILLIN = FREE_PTR 402 FREE_PTR = FREE_PTR + FILLIN_SIZE 403* 404* Workspace needed by computational routines: 405* 406 IPW_SIZE = 0 407* 408* factorization: 409* 410 IPW_SIZE = 1 411* 412* Claim memory for IPW 413* 414 IPW = FREE_PTR 415 FREE_PTR = FREE_PTR + IPW_SIZE 416* 417* Check for adequate memory for problem size 418* 419 IERR( 1 ) = 0 420 IF( FREE_PTR.GT.MEMSIZ ) THEN 421 IF( IAM.EQ.0 ) 422 $ WRITE( NOUT, FMT = 9996 ) 423 $ 'divide and conquer factorization', 424 $ (FREE_PTR )*REALSZ 425 IERR( 1 ) = 1 426 END IF 427* 428* Check all processes for an error 429* 430 CALL IGSUM2D( ICTXT, 'All', ' ', 1, 1, IERR, 431 $ 1, -1, 0 ) 432* 433 IF( IERR( 1 ).GT.0 ) THEN 434 IF( IAM.EQ.0 ) 435 $ WRITE( NOUT, FMT = 9997 ) 'MEMORY' 436 KSKIP = KSKIP + 1 437 GO TO 30 438 END IF 439* 440* Worksize needed for LAPRNT 441 WORKSIZ = MAX( ((2*BWL+2*BWU+1)+10), NB ) 442* 443 IF( CHECK ) THEN 444* 445* Calculate the amount of workspace required by 446* the checking routines. 447* 448* PSLANGE 449 WORKSIZ = MAX( WORKSIZ, DESCA2D( NB_ ) ) 450* 451* PSDBLASCHK 452 WORKSIZ = MAX( WORKSIZ, 453 $ MAX(5,MAX(MAX(BWL,BWU)*(MAX(BWL,BWU)+2),NB))+2*NB ) 454 END IF 455* 456 FREE_PTR = FREE_PTR + IPREPAD 457 IP_DRIVER_W = FREE_PTR 458 FREE_PTR = FREE_PTR + WORKSIZ + IPOSTPAD 459* 460* 461* Check for adequate memory for problem size 462* 463 IERR( 1 ) = 0 464 IF( FREE_PTR.GT.MEMSIZ ) THEN 465 IF( IAM.EQ.0 ) 466 $ WRITE( NOUT, FMT = 9996 ) 'factorization', 467 $ ( FREE_PTR )*REALSZ 468 IERR( 1 ) = 1 469 END IF 470* 471* Check all processes for an error 472* 473 CALL IGSUM2D( ICTXT, 'All', ' ', 1, 1, IERR, 474 $ 1, -1, 0 ) 475* 476 IF( IERR( 1 ).GT.0 ) THEN 477 IF( IAM.EQ.0 ) 478 $ WRITE( NOUT, FMT = 9997 ) 'MEMORY' 479 KSKIP = KSKIP + 1 480 GO TO 30 481 END IF 482* 483 CALL PSBMATGEN( ICTXT, 'G', 'N', BWL, BWU, N, 484 $ (2*BWL+2*BWU+1), NB, MEM( IPA+BWL+BWU ), 485 $ ((2*BWL+2*BWU+1)+10), 0, 0, IASEED, 486 $ MYROW, MYCOL, NPROW, NPCOL ) 487* 488 CALL PSFILLPAD( ICTXT, NP, NQ, MEM( IPA-IPREPAD ), 489 $ ((2*BWL+2*BWU+1)+10), IPREPAD, IPOSTPAD, 490 $ PADVAL ) 491* 492 CALL PSFILLPAD( ICTXT, WORKSIZ, 1, 493 $ MEM( IP_DRIVER_W-IPREPAD ), WORKSIZ, 494 $ IPREPAD, IPOSTPAD, PADVAL ) 495* 496* Calculate norm of A for residual error-checking 497* 498 IF( CHECK ) THEN 499* 500 ANORM = PSLANGE( '1', (2*BWL+2*BWU+1), 501 $ N, MEM( IPA ), 1, 1, 502 $ DESCA2D, MEM( IP_DRIVER_W ) ) 503 CALL PSCHEKPAD( ICTXT, 'PSLANGE', NP, NQ, 504 $ MEM( IPA-IPREPAD ), ((2*BWL+2*BWU+1)+10), 505 $ IPREPAD, IPOSTPAD, PADVAL ) 506 CALL PSCHEKPAD( ICTXT, 'PSLANGE', 507 $ WORKSIZ, 1, 508 $ MEM( IP_DRIVER_W-IPREPAD ), WORKSIZ, 509 $ IPREPAD, IPOSTPAD, PADVAL ) 510 END IF 511* 512* 513 CALL SLBOOT() 514 CALL BLACS_BARRIER( ICTXT, 'All' ) 515* 516* Perform factorization 517* 518 CALL SLTIMER( 1 ) 519* 520 CALL PSGBTRF( N, BWL, BWU, MEM( IPA ), 1, DESCA, IPIV, 521 $ MEM( IP_FILLIN ), FILLIN_SIZE, MEM( IPW ), 522 $ IPW_SIZE, INFO ) 523* 524 CALL SLTIMER( 1 ) 525* 526 IF( INFO.NE.0 ) THEN 527 IF( IAM.EQ.0 ) THEN 528 WRITE( NOUT, FMT = * ) 'PSGBTRF INFO=', INFO 529 ENDIF 530 KFAIL = KFAIL + 1 531 GO TO 30 532 END IF 533* 534 IF( CHECK ) THEN 535* 536* Check for memory overwrite in factorization 537* 538 CALL PSCHEKPAD( ICTXT, 'PSGBTRF', NP, 539 $ NQ, MEM( IPA-IPREPAD ), ((2*BWL+2*BWU+1)+10), 540 $ IPREPAD, IPOSTPAD, PADVAL ) 541 END IF 542* 543* 544* Loop over the different values for NRHS 545* 546 DO 20 HH = 1, NNR 547* 548 IERR( 1 ) = 0 549* 550 NRHS = NRVAL( HH ) 551* 552* Initialize Array Descriptor for rhs 553* 554 CALL DESCINIT( DESCB2D, N, NRHS, NB, 1, 0, 0, 555 $ ICTXTB, NB+10, IERR( 1 ) ) 556* 557* Convert this to 1D descriptor 558* 559 DESCB( 1 ) = 502 560 DESCB( 3 ) = N 561 DESCB( 4 ) = NB 562 DESCB( 5 ) = 0 563 DESCB( 2 ) = ICTXT 564 DESCB( 6 ) = DESCB2D( LLD_ ) 565 DESCB( 7 ) = 0 566* 567* reset free_ptr to reuse space for right hand sides 568* 569 IF( IPB .GT. 0 ) THEN 570 FREE_PTR = IPB 571 ENDIF 572* 573 FREE_PTR = FREE_PTR + IPREPAD 574 IPB = FREE_PTR 575 FREE_PTR = FREE_PTR + NRHS*DESCB2D( LLD_ ) 576 $ + IPOSTPAD 577* 578* Allocate workspace for workspace in TRS routine: 579* 580 IPW_SOLVE_SIZE = NRHS*(NB+2*BWL+4*BWU) 581* 582 IPW_SOLVE = FREE_PTR 583 FREE_PTR = FREE_PTR + IPW_SOLVE_SIZE 584* 585 IERR( 1 ) = 0 586 IF( FREE_PTR.GT.MEMSIZ ) THEN 587 IF( IAM.EQ.0 ) 588 $ WRITE( NOUT, FMT = 9996 )'solve', 589 $ ( FREE_PTR )*REALSZ 590 IERR( 1 ) = 1 591 END IF 592* 593* Check all processes for an error 594* 595 CALL IGSUM2D( ICTXT, 'All', ' ', 1, 1, 596 $ IERR, 1, -1, 0 ) 597* 598 IF( IERR( 1 ).GT.0 ) THEN 599 IF( IAM.EQ.0 ) 600 $ WRITE( NOUT, FMT = 9997 ) 'MEMORY' 601 KSKIP = KSKIP + 1 602 GO TO 15 603 END IF 604* 605 MYRHS_SIZE = NUMROC( N, NB, MYCOL, 0, NPCOL ) 606* 607* Generate RHS 608* 609 CALL PSMATGEN(ICTXTB, 'No', 'No', 610 $ DESCB2D( M_ ), DESCB2D( N_ ), 611 $ DESCB2D( MB_ ), DESCB2D( NB_ ), 612 $ MEM( IPB ), 613 $ DESCB2D( LLD_ ), DESCB2D( RSRC_ ), 614 $ DESCB2D( CSRC_ ), 615 $ IBSEED, 0, MYRHS_SIZE, 0, NRHS, MYCOL, 616 $ MYROW, NPCOL, NPROW ) 617* 618 IF( CHECK ) THEN 619 CALL PSFILLPAD( ICTXTB, NB, NRHS, 620 $ MEM( IPB-IPREPAD ), 621 $ DESCB2D( LLD_ ), 622 $ IPREPAD, IPOSTPAD, 623 $ PADVAL ) 624 CALL PSFILLPAD( ICTXT, WORKSIZ, 1, 625 $ MEM( IP_DRIVER_W-IPREPAD ), 626 $ WORKSIZ, IPREPAD, 627 $ IPOSTPAD, PADVAL ) 628 END IF 629* 630* 631 CALL BLACS_BARRIER( ICTXT, 'All') 632 CALL SLTIMER( 2 ) 633* 634* Solve linear system via factorization 635* 636 CALL PSGBTRS( TRANS, N, BWL, BWU, NRHS, MEM( IPA ), 637 $ 1, DESCA, IPIV, MEM( IPB ), 1, DESCB, 638 $ MEM( IP_FILLIN ), FILLIN_SIZE, 639 $ MEM( IPW_SOLVE ), IPW_SOLVE_SIZE, 640 $ INFO ) 641* 642 CALL SLTIMER( 2 ) 643* 644 IF( INFO.NE.0 ) THEN 645 IF( IAM.EQ.0 ) 646 $ WRITE( NOUT, FMT = * ) 'PSGBTRS INFO=', INFO 647 KFAIL = KFAIL + 1 648 PASSED = 'FAILED' 649 GO TO 20 650 END IF 651* 652 IF( CHECK ) THEN 653* 654* check for memory overwrite 655* 656 CALL PSCHEKPAD( ICTXT, 'PSGBTRS-work', 657 $ WORKSIZ, 1, 658 $ MEM( IP_DRIVER_W-IPREPAD ), 659 $ WORKSIZ, IPREPAD, 660 $ IPOSTPAD, PADVAL ) 661* 662* check the solution to rhs 663* 664 SRESID = ZERO 665* 666 CALL PSDBLASCHK( 'N', 'N', TRANS, 667 $ N, BWL, BWU, NRHS, 668 $ MEM( IPB ), 1, 1, DESCB2D, 669 $ IASEED, MEM( IPA+BWL+BWU ), 1, 1, DESCA2D, 670 $ IBSEED, ANORM, SRESID, 671 $ MEM( IP_DRIVER_W ), WORKSIZ ) 672* 673 IF( IAM.EQ.0 ) THEN 674 IF( SRESID.GT.THRESH ) 675 $ WRITE( NOUT, FMT = 9985 ) SRESID 676 END IF 677* 678* The second test is a NaN trap 679* 680 IF( ( SRESID.LE.THRESH ).AND. 681 $ ( (SRESID-SRESID).EQ.0.0E+0 ) ) THEN 682 KPASS = KPASS + 1 683 PASSED = 'PASSED' 684 ELSE 685 KFAIL = KFAIL + 1 686 PASSED = 'FAILED' 687 END IF 688* 689 END IF 690* 691 15 CONTINUE 692* Skipped tests jump to here to print out "SKIPPED" 693* 694* Gather maximum of all CPU and WALL clock timings 695* 696 CALL SLCOMBINE( ICTXT, 'All', '>', 'W', 2, 1, 697 $ WTIME ) 698 CALL SLCOMBINE( ICTXT, 'All', '>', 'C', 2, 1, 699 $ CTIME ) 700* 701* Print results 702* 703 IF( MYROW.EQ.0 .AND. MYCOL.EQ.0 ) THEN 704* 705 NOPS = 0 706 NOPS2 = 0 707* 708 N_FIRST = NB 709 NPROCS_REAL = ( N-1 )/NB + 1 710 N_LAST = MOD( N-1, NB ) + 1 711* 712* 2 N bwl (bwl+bwu) + N (bwl) flops 713* for LU factorization 714* 715 NOPS = 2*(DBLE(N)*DBLE(BWL)* 716 $ DBLE((BWL+BWU))) + 717 $ (DBLE(N)*DBLE(BWL)) 718* 719* nrhs * 2 N*(bwl+(bwl+bwu)) flops for LU solve. 720* 721 NOPS = NOPS + 722 $ 2 * (DBLE(N)*(DBLE(BWL)+DBLE((BWL+BWU))) 723 $ *DBLE(NRHS)) 724* 725* Second calc to represent actual hardware speed 726* 727* 2*N_FIRST (bwl+bwu)*bwu Flops for LU 728* factorization in proc 1 729* 730 NOPS2 = 2*( (DBLE(N_FIRST)* 731 $ DBLE((BWL+BWU))*DBLE(BWU))) 732* 733 IF ( NPROCS_REAL .GT. 1) THEN 734* 8 N_LAST (bwl+bwu)*(bwl+bwu) 735* flops for LU and spike 736* calc in last processor 737* 738 NOPS2 = NOPS2 + 739 $ 8*( (DBLE(N_LAST)*DBLE((BWL+BWU)) 740 $ *DBLE((BWL+BWU))) ) 741 ENDIF 742* 743 IF ( NPROCS_REAL .GT. 2) THEN 744* 8 NB (bwl+bwu)*(bwl+bwu) flops for LU and spike 745* calc in other processors 746* 747 NOPS2 = NOPS2 + (NPROCS_REAL-2)* 748 $ 8*( (DBLE(NB)*DBLE((BWL+BWU)) 749 $ *DBLE((BWL+BWU))) ) 750 ENDIF 751* 752* Reduced system 753* 754 NOPS2 = NOPS2 + 755 $ 2*( NPROCS_REAL-1 ) * 756 $ ( (BWL+BWU)*(BWL+BWU)*(BWL+BWU)/3 ) 757 IF( NPROCS_REAL .GT. 1 ) THEN 758 NOPS2 = NOPS2 + 759 $ 2*( NPROCS_REAL-2 ) * 760 $ (2*(BWL+BWU)*(BWL+BWU)*(BWL+BWU)) 761 ENDIF 762* 763* Solve stage 764* 765* nrhs*2 n_first* 766* (bwl+(bwl+bwu)) 767* flops for L,U solve in proc 1. 768* 769 NOPS2 = NOPS2 + 770 $ 2* 771 $ DBLE(N_FIRST)* 772 $ DBLE(NRHS) * 773 $ ( DBLE(BWL)+DBLE((BWL+BWU))) 774* 775 IF ( NPROCS_REAL .GT. 1 ) THEN 776* 777* 2*nrhs*2 n_last 778* ((bwl+bwu)+(bwl+bwu)) 779* flops for LU solve in other procs 780* 781 NOPS2 = NOPS2 + 782 $ 4* 783 $ (DBLE(N_LAST)*(DBLE((BWL+BWU))+ 784 $ DBLE((BWL+BWU))))*DBLE(NRHS) 785 ENDIF 786* 787 IF ( NPROCS_REAL .GT. 2 ) THEN 788* 789* 2*nrhs*2 NB 790* ((bwl+bwu)+(bwl+bwu)) 791* flops for LU solve in other procs 792* 793 NOPS2 = NOPS2 + 794 $ ( NPROCS_REAL-2)*2* 795 $ ( (DBLE(NB)*(DBLE((BWL+BWU))+ 796 $ DBLE((BWL+BWU))))*DBLE(NRHS) ) 797 ENDIF 798* 799* Reduced system 800* 801 NOPS2 = NOPS2 + 802 $ NRHS*( NPROCS_REAL-1)*2*((BWL+BWU)*(BWL+BWU) ) 803 IF( NPROCS_REAL .GT. 1 ) THEN 804 NOPS2 = NOPS2 + 805 $ NRHS*( NPROCS_REAL-2 ) * 806 $ ( 6 * (BWL+BWU)*(BWL+BWU) ) 807 ENDIF 808* 809* 810* Calculate total megaflops - factorization and/or 811* solve -- for WALL and CPU time, and print output 812* 813* Print WALL time if machine supports it 814* 815 IF( WTIME( 1 ) + WTIME( 2 ) .GT. 0.0D+0 ) THEN 816 TMFLOPS = NOPS / 817 $ ( ( WTIME( 1 )+WTIME( 2 ) ) * 1.0D+6 ) 818 ELSE 819 TMFLOPS = 0.0D+0 820 END IF 821* 822 IF( WTIME( 1 )+WTIME( 2 ).GT.0.0D+0 ) THEN 823 TMFLOPS2 = NOPS2 / 824 $ ( ( WTIME( 1 )+WTIME( 2 ) ) * 1.0D+6 ) 825 ELSE 826 TMFLOPS2 = 0.0D+0 827 END IF 828* 829 IF( WTIME( 2 ).GE.0.0D+0 ) 830 $ WRITE( NOUT, FMT = 9993 ) 'WALL', TRANS, 831 $ N, 832 $ BWL, BWU, 833 $ NB, NRHS, NPROW, NPCOL, 834 $ WTIME( 1 ), WTIME( 2 ), TMFLOPS, 835 $ TMFLOPS2, PASSED 836* 837* Print CPU time if machine supports it 838* 839 IF( CTIME( 1 )+CTIME( 2 ).GT.0.0D+0 ) THEN 840 TMFLOPS = NOPS / 841 $ ( ( CTIME( 1 )+CTIME( 2 ) ) * 1.0D+6 ) 842 ELSE 843 TMFLOPS = 0.0D+0 844 END IF 845* 846 IF( CTIME( 1 )+CTIME( 2 ).GT.0.0D+0 ) THEN 847 TMFLOPS2 = NOPS2 / 848 $ ( ( CTIME( 1 )+CTIME( 2 ) ) * 1.0D+6 ) 849 ELSE 850 TMFLOPS2 = 0.0D+0 851 END IF 852* 853 IF( CTIME( 2 ).GE.0.0D+0 ) 854 $ WRITE( NOUT, FMT = 9993 ) 'CPU ', TRANS, 855 $ N, 856 $ BWL, BWU, 857 $ NB, NRHS, NPROW, NPCOL, 858 $ CTIME( 1 ), CTIME( 2 ), TMFLOPS, 859 $ TMFLOPS2, PASSED 860* 861 END IF 862 20 CONTINUE 863* 864* 865 30 CONTINUE 866* NNB loop 867* 868 45 CONTINUE 869* BW[] loop 870* 871 40 CONTINUE 872* NMAT loop 873* 874 CALL BLACS_GRIDEXIT( ICTXT ) 875 CALL BLACS_GRIDEXIT( ICTXTB ) 876* 877 50 CONTINUE 878* NGRIDS DROPOUT 879 60 CONTINUE 880* NGRIDS loop 881* 882* Print ending messages and close output file 883* 884 IF( IAM.EQ.0 ) THEN 885 KTESTS = KPASS + KFAIL + KSKIP 886 WRITE( NOUT, FMT = * ) 887 WRITE( NOUT, FMT = 9992 ) KTESTS 888 IF( CHECK ) THEN 889 WRITE( NOUT, FMT = 9991 ) KPASS 890 WRITE( NOUT, FMT = 9989 ) KFAIL 891 ELSE 892 WRITE( NOUT, FMT = 9990 ) KPASS 893 END IF 894 WRITE( NOUT, FMT = 9988 ) KSKIP 895 WRITE( NOUT, FMT = * ) 896 WRITE( NOUT, FMT = * ) 897 WRITE( NOUT, FMT = 9987 ) 898 IF( NOUT.NE.6 .AND. NOUT.NE.0 ) 899 $ CLOSE ( NOUT ) 900 END IF 901* 902 CALL BLACS_EXIT( 0 ) 903* 904 9999 FORMAT( 'ILLEGAL ', A6, ': ', A5, ' = ', I3, 905 $ '; It should be at least 1' ) 906 9998 FORMAT( 'ILLEGAL GRID: nprow*npcol = ', I4, '. It can be at most', 907 $ I4 ) 908 9997 FORMAT( 'Bad ', A6, ' parameters: going on to next test case.' ) 909 9996 FORMAT( 'Unable to perform ', A, ': need TOTMEM of at least', 910 $ I11 ) 911 9995 FORMAT( 'TIME TR N BWL BWU NB NRHS P Q L*U Time ', 912 $ 'Slv Time MFLOPS MFLOP2 CHECK' ) 913 9994 FORMAT( '---- -- ------ --- --- ---- ----- ---- ---- -------- ', 914 $ '-------- -------- -------- ------' ) 915 9993 FORMAT( A4,1X,A1,2X,I6,1X,I3,1X,I3,1X,I4,1X,I5, 916 $ 1X,I4,1X,I4,1X,F9.3, 917 $ F9.4, F9.2, F9.2, 1X, A6 ) 918 9992 FORMAT( 'Finished ', I6, ' tests, with the following results:' ) 919 9991 FORMAT( I5, ' tests completed and passed residual checks.' ) 920 9990 FORMAT( I5, ' tests completed without checking.' ) 921 9989 FORMAT( I5, ' tests completed and failed residual checks.' ) 922 9988 FORMAT( I5, ' tests skipped because of illegal input values.' ) 923 9987 FORMAT( 'END OF TESTS.' ) 924 9986 FORMAT( '||A - ', A4, '|| / (||A|| * N * eps) = ', G25.7 ) 925 9985 FORMAT( '||Ax-b||/(||x||*||A||*eps*N) ', F25.7 ) 926* 927 STOP 928* 929* End of PSGBTRS_DRIVER 930* 931 END 932* 933