1 PROGRAM PDPBDRIVER 2* 3* 4* -- ScaLAPACK routine (version 1.7) -- 5* University of Tennessee, Knoxville, Oak Ridge National Laboratory, 6* and University of California, Berkeley. 7* November 15, 1997 8* 9* Purpose 10* ======= 11* 12* PDPBDRIVER is a test program for the 13* ScaLAPACK Band Cholesky routines corresponding to the options 14* indicated by DPB. This test driver performs an 15* A = L*L**T factorization 16* and solves a linear system with the factors for 1 or more RHS. 17* 18* The program must be driven by a short data file. 19* Here's an example file: 20*'ScaLAPACK, Version 1.2, banded linear systems input file' 21*'PVM.' 22*'' output file name (if any) 23*6 device out 24*'L' define Lower or Upper 25*9 number of problem sizes 26*1 5 17 28 37 121 200 1023 2048 3073 values of N 27*6 number of bandwidths 28*1 2 4 10 31 64 values of BW 29*1 number of NB's 30*-1 3 4 5 values of NB (-1 for automatic choice) 31*1 number of NRHS's (must be 1) 32*8 values of NRHS 33*1 number of NBRHS's (ignored) 34*1 values of NBRHS (ignored) 35*6 number of process grids 36*1 2 3 4 5 7 8 15 26 47 64 values of "Number of Process Columns" 37*3.0 threshold 38* 39* Internal Parameters 40* =================== 41* 42* TOTMEM INTEGER, default = 6200000. 43* TOTMEM is a machine-specific parameter indicating the 44* maximum amount of available memory in bytes. 45* The user should customize TOTMEM to his platform. Remember 46* to leave room in memory for the operating system, the BLACS 47* buffer, etc. For example, on a system with 8 MB of memory 48* per process (e.g., one processor on an Intel iPSC/860), the 49* parameters we use are TOTMEM=6200000 (leaving 1.8 MB for OS, 50* code, BLACS buffer, etc). However, for PVM, we usually set 51* TOTMEM = 2000000. Some experimenting with the maximum value 52* of TOTMEM may be required. 53* 54* INTGSZ INTEGER, default = 4 bytes. 55* DBLESZ INTEGER, default = 8 bytes. 56* INTGSZ and DBLESZ indicate the length in bytes on the 57* given platform for an integer and a double precision real. 58* MEM DOUBLE PRECISION array, dimension ( TOTMEM / DBLESZ ) 59* All arrays used by ScaLAPACK routines are allocated from 60* this array and referenced by pointers. The integer IPB, 61* for example, is a pointer to the starting element of MEM for 62* the solution vector(s) B. 63* 64* ===================================================================== 65* 66* Code Developer: Andrew J. Cleary, University of Tennessee. 67* Current address: Lawrence Livermore National Labs. 68* This version released: August, 2001. 69* 70* ===================================================================== 71* 72* .. Parameters .. 73 INTEGER TOTMEM 74 PARAMETER ( TOTMEM = 3000000 ) 75 INTEGER BLOCK_CYCLIC_2D, CSRC_, CTXT_, DLEN_, DTYPE_, 76 $ LLD_, MB_, M_, NB_, N_, RSRC_ 77 PARAMETER ( BLOCK_CYCLIC_2D = 1, DLEN_ = 9, DTYPE_ = 1, 78 $ CTXT_ = 2, M_ = 3, N_ = 4, MB_ = 5, NB_ = 6, 79 $ RSRC_ = 7, CSRC_ = 8, LLD_ = 9 ) 80* 81 DOUBLE PRECISION ZERO 82 INTEGER DBLESZ, MEMSIZ, NTESTS 83 DOUBLE PRECISION PADVAL 84 PARAMETER ( DBLESZ = 8, 85 $ MEMSIZ = TOTMEM / DBLESZ, NTESTS = 20, 86 $ PADVAL = -9923.0D+0, ZERO = 0.0D+0 ) 87 INTEGER INT_ONE 88 PARAMETER ( INT_ONE = 1 ) 89* .. 90* .. Local Scalars .. 91 LOGICAL CHECK 92 CHARACTER UPLO 93 CHARACTER*6 PASSED 94 CHARACTER*80 OUTFILE 95 INTEGER BW, BW_NUM, FILLIN_SIZE, FREE_PTR, H, HH, I, 96 $ IAM, IASEED, IBSEED, ICTXT, ICTXTB, IERR_TEMP, 97 $ IMIDPAD, INFO, IPA, IPB, IPOSTPAD, IPREPAD, 98 $ IPW, IPW_SIZE, IPW_SOLVE, IPW_SOLVE_SIZE, 99 $ IP_DRIVER_W, IP_FILLIN, J, K, KFAIL, KPASS, 100 $ KSKIP, KTESTS, MYCOL, MYRHS_SIZE, MYROW, N, NB, 101 $ NBW, NGRIDS, NMAT, NNB, NNBR, NNR, NOUT, NP, 102 $ NPCOL, NPROCS, NPROCS_REAL, NPROW, NQ, NRHS, 103 $ N_FIRST, N_LAST, WORKSIZ 104 REAL THRESH 105 DOUBLE PRECISION ANORM, NOPS, NOPS2, SRESID, TMFLOPS, 106 $ TMFLOPS2 107* .. 108* .. Local Arrays .. 109 INTEGER BWVAL( NTESTS ), DESCA( 7 ), DESCA2D( DLEN_ ), 110 $ DESCB( 7 ), DESCB2D( DLEN_ ), IERR( 1 ), 111 $ NBRVAL( NTESTS ), NBVAL( NTESTS ), 112 $ NRVAL( NTESTS ), NVAL( NTESTS ), 113 $ PVAL( NTESTS ), QVAL( NTESTS ) 114 DOUBLE PRECISION CTIME( 2 ), MEM( MEMSIZ ), WTIME( 2 ) 115* .. 116* .. External Subroutines .. 117 EXTERNAL BLACS_BARRIER, BLACS_EXIT, BLACS_GET, 118 $ BLACS_GRIDEXIT, BLACS_GRIDINFO, BLACS_GRIDINIT, 119 $ BLACS_PINFO, DESCINIT, IGSUM2D, PDBMATGEN, 120 $ PDCHEKPAD, PDFILLPAD, PDMATGEN, PDPBINFO, 121 $ PDPBLASCHK, PDPBTRF, PDPBTRS, SLBOOT, 122 $ SLCOMBINE, SLTIMER 123* .. 124* .. External Functions .. 125 INTEGER NUMROC 126 LOGICAL LSAME 127 DOUBLE PRECISION PDLANGE 128 EXTERNAL LSAME, NUMROC, PDLANGE 129* .. 130* .. Intrinsic Functions .. 131 INTRINSIC DBLE, MAX, MIN, MOD 132* .. 133* .. Data Statements .. 134 DATA KFAIL, KPASS, KSKIP, KTESTS / 4*0 / 135* .. 136* 137* 138* 139* .. Executable Statements .. 140* 141* Get starting information 142* 143 CALL BLACS_PINFO( IAM, NPROCS ) 144 IASEED = 100 145 IBSEED = 200 146* 147 CALL PDPBINFO( OUTFILE, NOUT, UPLO, NMAT, NVAL, NTESTS, NBW, 148 $ BWVAL, NTESTS, NNB, NBVAL, NTESTS, NNR, NRVAL, 149 $ NTESTS, NNBR, NBRVAL, NTESTS, NGRIDS, PVAL, NTESTS, 150 $ QVAL, NTESTS, THRESH, MEM, IAM, NPROCS ) 151* 152 CHECK = ( THRESH.GE.0.0D+0 ) 153* 154* Print headings 155* 156 IF( IAM.EQ.0 ) THEN 157 WRITE( NOUT, FMT = * ) 158 WRITE( NOUT, FMT = 9995 ) 159 WRITE( NOUT, FMT = 9994 ) 160 WRITE( NOUT, FMT = * ) 161 END IF 162* 163* Loop over different process grids 164* 165 DO 60 I = 1, NGRIDS 166* 167 NPROW = PVAL( I ) 168 NPCOL = QVAL( I ) 169* 170* Make sure grid information is correct 171* 172 IERR( 1 ) = 0 173 IF( NPROW.LT.1 ) THEN 174 IF( IAM.EQ.0 ) 175 $ WRITE( NOUT, FMT = 9999 ) 'GRID', 'nprow', NPROW 176 IERR( 1 ) = 1 177 ELSE IF( NPCOL.LT.1 ) THEN 178 IF( IAM.EQ.0 ) 179 $ WRITE( NOUT, FMT = 9999 ) 'GRID', 'npcol', NPCOL 180 IERR( 1 ) = 1 181 ELSE IF( NPROW*NPCOL.GT.NPROCS ) THEN 182 IF( IAM.EQ.0 ) 183 $ WRITE( NOUT, FMT = 9998 ) NPROW*NPCOL, NPROCS 184 IERR( 1 ) = 1 185 END IF 186* 187 IF( IERR( 1 ).GT.0 ) THEN 188 IF( IAM.EQ.0 ) 189 $ WRITE( NOUT, FMT = 9997 ) 'grid' 190 KSKIP = KSKIP + 1 191 GO TO 50 192 END IF 193* 194* Define process grid 195* 196 CALL BLACS_GET( -1, 0, ICTXT ) 197 CALL BLACS_GRIDINIT( ICTXT, 'Row-major', NPROW, NPCOL ) 198* 199* 200* Define transpose process grid 201* 202 CALL BLACS_GET( -1, 0, ICTXTB ) 203 CALL BLACS_GRIDINIT( ICTXTB, 'Column-major', NPCOL, NPROW ) 204* 205* Go to bottom of process grid loop if this case doesn't use my 206* process 207* 208 CALL BLACS_GRIDINFO( ICTXT, NPROW, NPCOL, MYROW, MYCOL ) 209* 210 IF( MYROW.LT.0 .OR. MYCOL.LT.0 ) THEN 211 GO TO 50 212 ENDIF 213* 214 DO 40 J = 1, NMAT 215* 216 IERR( 1 ) = 0 217* 218 N = NVAL( J ) 219* 220* Make sure matrix information is correct 221* 222 IF( N.LT.1 ) THEN 223 IF( IAM.EQ.0 ) 224 $ WRITE( NOUT, FMT = 9999 ) 'MATRIX', 'N', N 225 IERR( 1 ) = 1 226 END IF 227* 228* Check all processes for an error 229* 230 CALL IGSUM2D( ICTXT, 'All', ' ', 1, 1, IERR, 1, 231 $ -1, 0 ) 232* 233 IF( IERR( 1 ).GT.0 ) THEN 234 IF( IAM.EQ.0 ) 235 $ WRITE( NOUT, FMT = 9997 ) 'size' 236 KSKIP = KSKIP + 1 237 GO TO 40 238 END IF 239* 240* 241 DO 45 BW_NUM = 1, NBW 242* 243 IERR( 1 ) = 0 244* 245 BW = BWVAL( BW_NUM ) 246 IF( BW.LT.0 ) THEN 247 IF( IAM.EQ.0 ) 248 $ WRITE( NOUT, FMT = 9999 ) 'Band', 'bw', BW 249 IERR( 1 ) = 1 250 END IF 251* 252 IF( BW.GT.N-1 ) THEN 253 IERR( 1 ) = 1 254 END IF 255* 256* Check all processes for an error 257* 258 CALL IGSUM2D( ICTXT, 'All', ' ', 1, 1, IERR, 1, 259 $ -1, 0 ) 260* 261 IF( IERR( 1 ).GT.0 ) THEN 262 KSKIP = KSKIP + 1 263 GO TO 45 264 END IF 265* 266 DO 30 K = 1, NNB 267* 268 IERR( 1 ) = 0 269* 270 NB = NBVAL( K ) 271 IF( NB.LT.0 ) THEN 272 NB =( (N-(NPCOL-1)*BW-1)/NPCOL + 1 ) 273 $ + BW 274 NB = MAX( NB, 2*BW ) 275 NB = MIN( N, NB ) 276 END IF 277* 278* Make sure NB is legal 279* 280 IERR( 1 ) = 0 281 IF( NB.LT.MIN( 2*BW, N ) ) THEN 282 IERR( 1 ) = 1 283 ENDIF 284* 285* Check all processes for an error 286* 287 CALL IGSUM2D( ICTXT, 'All', ' ', 1, 1, IERR, 1, 288 $ -1, 0 ) 289* 290 IF( IERR( 1 ).GT.0 ) THEN 291 KSKIP = KSKIP + 1 292 GO TO 30 293 END IF 294* 295* Padding constants 296* 297 NP = NUMROC( (BW+1), (BW+1), 298 $ MYROW, 0, NPROW ) 299 NQ = NUMROC( N, NB, MYCOL, 0, NPCOL ) 300* 301 IF( CHECK ) THEN 302 IPREPAD = ((BW+1)+10) 303 IMIDPAD = 10 304 IPOSTPAD = ((BW+1)+10) 305 ELSE 306 IPREPAD = 0 307 IMIDPAD = 0 308 IPOSTPAD = 0 309 END IF 310* 311* Initialize the array descriptor for the matrix A 312* 313 CALL DESCINIT( DESCA2D, (BW+1), N, 314 $ (BW+1), NB, 0, 0, 315 $ ICTXT,((BW+1)+10), IERR( 1 ) ) 316* 317* Convert this to 1D descriptor 318* 319 DESCA( 1 ) = 501 320 DESCA( 3 ) = N 321 DESCA( 4 ) = NB 322 DESCA( 5 ) = 0 323 DESCA( 2 ) = ICTXT 324 DESCA( 6 ) = ((BW+1)+10) 325 DESCA( 7 ) = 0 326* 327 IERR_TEMP = IERR( 1 ) 328 IERR( 1 ) = 0 329 IERR( 1 ) = MIN( IERR( 1 ), IERR_TEMP ) 330* 331* Check all processes for an error 332* 333 CALL IGSUM2D( ICTXT, 'All', ' ', 1, 1, IERR, 1, -1, 0 ) 334* 335 IF( IERR( 1 ).LT.0 ) THEN 336 IF( IAM.EQ.0 ) 337 $ WRITE( NOUT, FMT = 9997 ) 'descriptor' 338 KSKIP = KSKIP + 1 339 GO TO 30 340 END IF 341* 342* Assign pointers into MEM for SCALAPACK arrays, A is 343* allocated starting at position MEM( IPREPAD+1 ) 344* 345 FREE_PTR = 1 346 IPB = 0 347* 348* Save room for prepadding 349 FREE_PTR = FREE_PTR + IPREPAD 350* 351 IPA = FREE_PTR 352 FREE_PTR = FREE_PTR + DESCA2D( LLD_ )* 353 $ DESCA2D( NB_ ) 354 $ + IPOSTPAD 355* 356* Add memory for fillin 357* Fillin space needs to store: 358* Fillin spike: 359* Contribution to previous proc's diagonal block of 360* reduced system: 361* Off-diagonal block of reduced system: 362* Diagonal block of reduced system: 363* 364 FILLIN_SIZE = 365 $ (NB+2*BW)*BW 366* 367* Claim memory for fillin 368* 369 FREE_PTR = FREE_PTR + IPREPAD 370 IP_FILLIN = FREE_PTR 371 FREE_PTR = FREE_PTR + FILLIN_SIZE 372* 373* Workspace needed by computational routines: 374* 375 IPW_SIZE = 0 376* 377* factorization: 378* 379 IPW_SIZE = BW*BW 380* 381* Claim memory for IPW 382* 383 IPW = FREE_PTR 384 FREE_PTR = FREE_PTR + IPW_SIZE 385* 386* Check for adequate memory for problem size 387* 388 IERR( 1 ) = 0 389 IF( FREE_PTR.GT.MEMSIZ ) THEN 390 IF( IAM.EQ.0 ) 391 $ WRITE( NOUT, FMT = 9996 ) 392 $ 'divide and conquer factorization', 393 $ (FREE_PTR )*DBLESZ 394 IERR( 1 ) = 1 395 END IF 396* 397* Check all processes for an error 398* 399 CALL IGSUM2D( ICTXT, 'All', ' ', 1, 1, IERR, 400 $ 1, -1, 0 ) 401* 402 IF( IERR( 1 ).GT.0 ) THEN 403 IF( IAM.EQ.0 ) 404 $ WRITE( NOUT, FMT = 9997 ) 'MEMORY' 405 KSKIP = KSKIP + 1 406 GO TO 30 407 END IF 408* 409* Worksize needed for LAPRNT 410 WORKSIZ = MAX( ((BW+1)+10), NB ) 411* 412 IF( CHECK ) THEN 413* 414* Calculate the amount of workspace required by 415* the checking routines. 416* 417* PDLANGE 418 WORKSIZ = MAX( WORKSIZ, DESCA2D( NB_ ) ) 419* 420* PDPBLASCHK 421 WORKSIZ = MAX( WORKSIZ, 422 $ MAX(5,MAX(BW*(BW+2),NB))+2*NB ) 423 END IF 424* 425 FREE_PTR = FREE_PTR + IPREPAD 426 IP_DRIVER_W = FREE_PTR 427 FREE_PTR = FREE_PTR + WORKSIZ + IPOSTPAD 428* 429* 430* Check for adequate memory for problem size 431* 432 IERR( 1 ) = 0 433 IF( FREE_PTR.GT.MEMSIZ ) THEN 434 IF( IAM.EQ.0 ) 435 $ WRITE( NOUT, FMT = 9996 ) 'factorization', 436 $ ( FREE_PTR )*DBLESZ 437 IERR( 1 ) = 1 438 END IF 439* 440* Check all processes for an error 441* 442 CALL IGSUM2D( ICTXT, 'All', ' ', 1, 1, IERR, 443 $ 1, -1, 0 ) 444* 445 IF( IERR( 1 ).GT.0 ) THEN 446 IF( IAM.EQ.0 ) 447 $ WRITE( NOUT, FMT = 9997 ) 'MEMORY' 448 KSKIP = KSKIP + 1 449 GO TO 30 450 END IF 451* 452 CALL PDBMATGEN( ICTXT, UPLO, 'B', BW, BW, N, (BW+1), NB, 453 $ MEM( IPA ), ((BW+1)+10), 0, 0, IASEED, 454 $ MYROW, MYCOL, NPROW, NPCOL ) 455* 456 CALL PDFILLPAD( ICTXT, NP, NQ, MEM( IPA-IPREPAD ), 457 $ ((BW+1)+10), IPREPAD, IPOSTPAD, 458 $ PADVAL ) 459* 460 CALL PDFILLPAD( ICTXT, WORKSIZ, 1, 461 $ MEM( IP_DRIVER_W-IPREPAD ), WORKSIZ, 462 $ IPREPAD, IPOSTPAD, PADVAL ) 463* 464* Calculate norm of A for residual error-checking 465* 466 IF( CHECK ) THEN 467* 468 ANORM = PDLANGE( '1', (BW+1), 469 $ N, MEM( IPA ), 1, 1, 470 $ DESCA2D, MEM( IP_DRIVER_W ) ) 471 CALL PDCHEKPAD( ICTXT, 'PDLANGE', NP, NQ, 472 $ MEM( IPA-IPREPAD ), ((BW+1)+10), 473 $ IPREPAD, IPOSTPAD, PADVAL ) 474 CALL PDCHEKPAD( ICTXT, 'PDLANGE', 475 $ WORKSIZ, 1, 476 $ MEM( IP_DRIVER_W-IPREPAD ), WORKSIZ, 477 $ IPREPAD, IPOSTPAD, PADVAL ) 478 END IF 479* 480* 481 CALL SLBOOT() 482 CALL BLACS_BARRIER( ICTXT, 'All' ) 483* 484* Perform factorization 485* 486 CALL SLTIMER( 1 ) 487* 488 CALL PDPBTRF( UPLO, N, BW, MEM( IPA ), 1, DESCA, 489 $ MEM( IP_FILLIN ), FILLIN_SIZE, MEM( IPW ), 490 $ IPW_SIZE, INFO ) 491* 492 CALL SLTIMER( 1 ) 493* 494 IF( INFO.NE.0 ) THEN 495 IF( IAM.EQ.0 ) THEN 496 WRITE( NOUT, FMT = * ) 'PDPBTRF INFO=', INFO 497 ENDIF 498 KFAIL = KFAIL + 1 499 GO TO 30 500 END IF 501* 502 IF( CHECK ) THEN 503* 504* Check for memory overwrite in factorization 505* 506 CALL PDCHEKPAD( ICTXT, 'PDPBTRF', NP, 507 $ NQ, MEM( IPA-IPREPAD ), ((BW+1)+10), 508 $ IPREPAD, IPOSTPAD, PADVAL ) 509 END IF 510* 511* 512* Loop over the different values for NRHS 513* 514 DO 20 HH = 1, NNR 515* 516 IERR( 1 ) = 0 517* 518 NRHS = NRVAL( HH ) 519* 520* Initialize Array Descriptor for rhs 521* 522 CALL DESCINIT( DESCB2D, N, NRHS, NB, 1, 0, 0, 523 $ ICTXTB, NB+10, IERR( 1 ) ) 524* 525* Convert this to 1D descriptor 526* 527 DESCB( 1 ) = 502 528 DESCB( 3 ) = N 529 DESCB( 4 ) = NB 530 DESCB( 5 ) = 0 531 DESCB( 2 ) = ICTXT 532 DESCB( 6 ) = DESCB2D( LLD_ ) 533 DESCB( 7 ) = 0 534* 535* reset free_ptr to reuse space for right hand sides 536* 537 IF( IPB .GT. 0 ) THEN 538 FREE_PTR = IPB 539 ENDIF 540* 541 FREE_PTR = FREE_PTR + IPREPAD 542 IPB = FREE_PTR 543 FREE_PTR = FREE_PTR + NRHS*DESCB2D( LLD_ ) 544 $ + IPOSTPAD 545* 546* Allocate workspace for workspace in TRS routine: 547* 548 IPW_SOLVE_SIZE = (BW*NRHS) 549* 550 IPW_SOLVE = FREE_PTR 551 FREE_PTR = FREE_PTR + IPW_SOLVE_SIZE 552* 553 IERR( 1 ) = 0 554 IF( FREE_PTR.GT.MEMSIZ ) THEN 555 IF( IAM.EQ.0 ) 556 $ WRITE( NOUT, FMT = 9996 )'solve', 557 $ ( FREE_PTR )*DBLESZ 558 IERR( 1 ) = 1 559 END IF 560* 561* Check all processes for an error 562* 563 CALL IGSUM2D( ICTXT, 'All', ' ', 1, 1, 564 $ IERR, 1, -1, 0 ) 565* 566 IF( IERR( 1 ).GT.0 ) THEN 567 IF( IAM.EQ.0 ) 568 $ WRITE( NOUT, FMT = 9997 ) 'MEMORY' 569 KSKIP = KSKIP + 1 570 GO TO 15 571 END IF 572* 573 MYRHS_SIZE = NUMROC( N, NB, MYCOL, 0, NPCOL ) 574* 575* Generate RHS 576* 577 CALL PDMATGEN(ICTXTB, 'No', 'No', 578 $ DESCB2D( M_ ), DESCB2D( N_ ), 579 $ DESCB2D( MB_ ), DESCB2D( NB_ ), 580 $ MEM( IPB ), 581 $ DESCB2D( LLD_ ), DESCB2D( RSRC_ ), 582 $ DESCB2D( CSRC_ ), 583 $ IBSEED, 0, MYRHS_SIZE, 0, NRHS, MYCOL, 584 $ MYROW, NPCOL, NPROW ) 585* 586 IF( CHECK ) THEN 587 CALL PDFILLPAD( ICTXTB, NB, NRHS, 588 $ MEM( IPB-IPREPAD ), 589 $ DESCB2D( LLD_ ), 590 $ IPREPAD, IPOSTPAD, 591 $ PADVAL ) 592 CALL PDFILLPAD( ICTXT, WORKSIZ, 1, 593 $ MEM( IP_DRIVER_W-IPREPAD ), 594 $ WORKSIZ, IPREPAD, 595 $ IPOSTPAD, PADVAL ) 596 END IF 597* 598* 599 CALL BLACS_BARRIER( ICTXT, 'All') 600 CALL SLTIMER( 2 ) 601* 602* Solve linear system via factorization 603* 604 CALL PDPBTRS( UPLO, N, BW, NRHS, MEM( IPA ), 1, 605 $ DESCA, MEM( IPB ), 1, DESCB, 606 $ MEM( IP_FILLIN ), FILLIN_SIZE, 607 $ MEM( IPW_SOLVE ), IPW_SOLVE_SIZE, 608 $ INFO ) 609* 610 CALL SLTIMER( 2 ) 611* 612 IF( INFO.NE.0 ) THEN 613 IF( IAM.EQ.0 ) 614 $ WRITE( NOUT, FMT = * ) 'PDPBTRS INFO=', INFO 615 KFAIL = KFAIL + 1 616 PASSED = 'FAILED' 617 GO TO 20 618 END IF 619* 620 IF( CHECK ) THEN 621* 622* check for memory overwrite 623* 624 CALL PDCHEKPAD( ICTXT, 'PDPBTRS-work', 625 $ WORKSIZ, 1, 626 $ MEM( IP_DRIVER_W-IPREPAD ), 627 $ WORKSIZ, IPREPAD, 628 $ IPOSTPAD, PADVAL ) 629* 630* check the solution to rhs 631* 632 SRESID = ZERO 633* 634 CALL PDPBLASCHK( 'S', UPLO, N, BW, BW, NRHS, 635 $ MEM( IPB ), 1, 1, DESCB2D, 636 $ IASEED, MEM( IPA ), 1, 1, DESCA2D, 637 $ IBSEED, ANORM, SRESID, 638 $ MEM( IP_DRIVER_W ), WORKSIZ ) 639* 640 IF( IAM.EQ.0 ) THEN 641 IF( SRESID.GT.THRESH ) 642 $ WRITE( NOUT, FMT = 9985 ) SRESID 643 END IF 644* 645* The second test is a NaN trap 646* 647 IF( ( SRESID.LE.THRESH ).AND. 648 $ ( (SRESID-SRESID).EQ.0.0D+0 ) ) THEN 649 KPASS = KPASS + 1 650 PASSED = 'PASSED' 651 ELSE 652 KFAIL = KFAIL + 1 653 PASSED = 'FAILED' 654 END IF 655* 656 END IF 657* 658 15 CONTINUE 659* Skipped tests jump to here to print out "SKIPPED" 660* 661* Gather maximum of all CPU and WALL clock timings 662* 663 CALL SLCOMBINE( ICTXT, 'All', '>', 'W', 2, 1, 664 $ WTIME ) 665 CALL SLCOMBINE( ICTXT, 'All', '>', 'C', 2, 1, 666 $ CTIME ) 667* 668* Print results 669* 670 IF( MYROW.EQ.0 .AND. MYCOL.EQ.0 ) THEN 671* 672 NOPS = 0 673 NOPS2 = 0 674* 675 N_FIRST = NB 676 NPROCS_REAL = ( N-1 )/NB + 1 677 N_LAST = MOD( N-1, NB ) + 1 678* 679* 680 NOPS = NOPS + DBLE(BW)*( -2.D0 / 3.D0+DBLE(BW)* 681 $ ( -1.D0+DBLE(BW)*( -1.D0 / 3.D0 ) ) ) + 682 $ DBLE(N)*( 1.D0+DBLE(BW)*( 3.D0 / 683 $ 2.D0+DBLE(BW)*( 1.D0 / 2.D0 ) ) ) 684 NOPS = NOPS + DBLE(BW)*( -1.D0 / 6.D0+DBLE(BW) 685 $ *( -1.D0 /2.D0+DBLE(BW) 686 $ *( -1.D0 / 3.D0 ) ) ) + 687 $ DBLE(N)*( DBLE(BW) / 688 $ 2.D0*( 1.D0+DBLE(BW) ) ) 689* 690 NOPS = NOPS + 691 $ DBLE(NRHS)*( ( 2*DBLE(N)-DBLE(BW) )* 692 $ ( DBLE(BW)+1.D0 ) )+ DBLE(NRHS)* 693 $ ( DBLE(BW)*( 2*DBLE(N)- 694 $ ( DBLE(BW)+1.D0 ) ) ) 695* 696* 697* Second calc to represent actual hardware speed 698* 699* NB bw^2 flops for LLt factorization in 1st proc 700* 701 NOPS2 = ( (DBLE(N_FIRST))* DBLE(BW)**2 ) 702* 703 IF ( NPROCS_REAL .GT. 1) THEN 704* 4 NB bw^2 flops for LLt factorization and 705* spike calc in last processor 706* 707 NOPS2 = NOPS2 + 708 $ 4*( (DBLE(N_LAST)*DBLE(BW)**2) ) 709 ENDIF 710* 711 IF ( NPROCS_REAL .GT. 2) THEN 712* 4 NB bw^2 flops for LLt factorization and 713* spike calc in other processors 714* 715 NOPS2 = NOPS2 + (NPROCS_REAL-2)* 716 $ 4*( (DBLE(NB)*DBLE(BW)**2) ) 717 ENDIF 718* 719* Reduced system 720* 721 NOPS2 = NOPS2 + 722 $ ( NPROCS_REAL-1 ) * ( BW*BW*BW/3 ) 723 IF( NPROCS_REAL .GT. 1 ) THEN 724 NOPS2 = NOPS2 + 725 $ ( NPROCS_REAL-2 ) * ( 2 * BW*BW*BW ) 726 ENDIF 727* 728* 729* nrhs * 4 n_first*bw flops for LLt solve in proc 1. 730* 731 NOPS2 = NOPS2 + 732 $ ( 4.0D+0*(DBLE(N_FIRST)*DBLE(BW))*DBLE(NRHS) ) 733* 734 IF ( NPROCS_REAL .GT. 1 ) THEN 735* 736* 2*nrhs*4 n_last*bw flops for LLt solve in last. 737* 738 NOPS2 = NOPS2 + 739 $ 2*( 4.0D+0*(DBLE(N_LAST)*DBLE(BW))*DBLE(NRHS) ) 740 ENDIF 741* 742 IF ( NPROCS_REAL .GT. 2 ) THEN 743* 744* 2 * nrhs * 4 NB*bw flops for LLt solve in others. 745* 746 NOPS2 = NOPS2 + 747 $ ( NPROCS_REAL-2)*2* 748 $ ( 4.0D+0*(DBLE(NB)*DBLE(BW))*DBLE(NRHS) ) 749 ENDIF 750* 751* Reduced system 752* 753 NOPS2 = NOPS2 + 754 $ NRHS*( NPROCS_REAL-1 ) * ( BW*BW ) 755 IF( NPROCS_REAL .GT. 1 ) THEN 756 NOPS2 = NOPS2 + 757 $ NRHS*( NPROCS_REAL-2 ) * ( 3 * BW*BW ) 758 ENDIF 759* 760* 761* Calculate total megaflops - factorization and/or 762* solve -- for WALL and CPU time, and print output 763* 764* Print WALL time if machine supports it 765* 766 IF( WTIME( 1 ) + WTIME( 2 ) .GT. 0.0D+0 ) THEN 767 TMFLOPS = NOPS / 768 $ ( ( WTIME( 1 )+WTIME( 2 ) ) * 1.0D+6 ) 769 ELSE 770 TMFLOPS = 0.0D+0 771 END IF 772* 773 IF( WTIME( 1 )+WTIME( 2 ).GT.0.0D+0 ) THEN 774 TMFLOPS2 = NOPS2 / 775 $ ( ( WTIME( 1 )+WTIME( 2 ) ) * 1.0D+6 ) 776 ELSE 777 TMFLOPS2 = 0.0D+0 778 END IF 779* 780 IF( WTIME( 2 ).GE.0.0D+0 ) 781 $ WRITE( NOUT, FMT = 9993 ) 'WALL', UPLO, 782 $ N, 783 $ BW, 784 $ NB, NRHS, NPROW, NPCOL, 785 $ WTIME( 1 ), WTIME( 2 ), TMFLOPS, 786 $ TMFLOPS2, PASSED 787* 788* Print CPU time if machine supports it 789* 790 IF( CTIME( 1 )+CTIME( 2 ).GT.0.0D+0 ) THEN 791 TMFLOPS = NOPS / 792 $ ( ( CTIME( 1 )+CTIME( 2 ) ) * 1.0D+6 ) 793 ELSE 794 TMFLOPS = 0.0D+0 795 END IF 796* 797 IF( CTIME( 1 )+CTIME( 2 ).GT.0.0D+0 ) THEN 798 TMFLOPS2 = NOPS2 / 799 $ ( ( CTIME( 1 )+CTIME( 2 ) ) * 1.0D+6 ) 800 ELSE 801 TMFLOPS2 = 0.0D+0 802 END IF 803* 804 IF( CTIME( 2 ).GE.0.0D+0 ) 805 $ WRITE( NOUT, FMT = 9993 ) 'CPU ', UPLO, 806 $ N, 807 $ BW, 808 $ NB, NRHS, NPROW, NPCOL, 809 $ CTIME( 1 ), CTIME( 2 ), TMFLOPS, 810 $ TMFLOPS2, PASSED 811* 812 END IF 813 20 CONTINUE 814* 815* 816 30 CONTINUE 817* NNB loop 818* 819 45 CONTINUE 820* BW[] loop 821* 822 40 CONTINUE 823* NMAT loop 824* 825 CALL BLACS_GRIDEXIT( ICTXT ) 826 CALL BLACS_GRIDEXIT( ICTXTB ) 827* 828 50 CONTINUE 829* NGRIDS DROPOUT 830 60 CONTINUE 831* NGRIDS loop 832* 833* Print ending messages and close output file 834* 835 IF( IAM.EQ.0 ) THEN 836 KTESTS = KPASS + KFAIL + KSKIP 837 WRITE( NOUT, FMT = * ) 838 WRITE( NOUT, FMT = 9992 ) KTESTS 839 IF( CHECK ) THEN 840 WRITE( NOUT, FMT = 9991 ) KPASS 841 WRITE( NOUT, FMT = 9989 ) KFAIL 842 ELSE 843 WRITE( NOUT, FMT = 9990 ) KPASS 844 END IF 845 WRITE( NOUT, FMT = 9988 ) KSKIP 846 WRITE( NOUT, FMT = * ) 847 WRITE( NOUT, FMT = * ) 848 WRITE( NOUT, FMT = 9987 ) 849 IF( NOUT.NE.6 .AND. NOUT.NE.0 ) 850 $ CLOSE ( NOUT ) 851 END IF 852* 853 CALL BLACS_EXIT( 0 ) 854* 855 9999 FORMAT( 'ILLEGAL ', A6, ': ', A5, ' = ', I3, 856 $ '; It should be at least 1' ) 857 9998 FORMAT( 'ILLEGAL GRID: nprow*npcol = ', I4, '. It can be at most', 858 $ I4 ) 859 9997 FORMAT( 'Bad ', A6, ' parameters: going on to next test case.' ) 860 9996 FORMAT( 'Unable to perform ', A, ': need TOTMEM of at least', 861 $ I11 ) 862 9995 FORMAT( 'TIME UL N BW NB NRHS P Q L*U Time ', 863 $ 'Slv Time MFLOPS MFLOP2 CHECK' ) 864 9994 FORMAT( '---- -- ------ --- ---- ----- -- ---- -------- ', 865 $ '-------- ------ ------ ------' ) 866 9993 FORMAT( A4, 2X, A1, 1X, I6, 1X, I3, 1X, I4, 1X, 867 $ I5, 1X, I2, 1X, 868 $ I4, 1X, F8.3, F9.4, F9.2, F9.2, 1X, A6 ) 869 9992 FORMAT( 'Finished ', I6, ' tests, with the following results:' ) 870 9991 FORMAT( I5, ' tests completed and passed residual checks.' ) 871 9990 FORMAT( I5, ' tests completed without checking.' ) 872 9989 FORMAT( I5, ' tests completed and failed residual checks.' ) 873 9988 FORMAT( I5, ' tests skipped because of illegal input values.' ) 874 9987 FORMAT( 'END OF TESTS.' ) 875 9986 FORMAT( '||A - ', A4, '|| / (||A|| * N * eps) = ', G25.7 ) 876 9985 FORMAT( '||Ax-b||/(||x||*||A||*eps*N) ', F25.7 ) 877* 878 STOP 879* 880* End of PDPBTRS_DRIVER 881* 882 END 883* 884