1      PROGRAM PDPBDRIVER
2*
3*
4*  -- ScaLAPACK routine (version 1.7) --
5*     University of Tennessee, Knoxville, Oak Ridge National Laboratory,
6*     and University of California, Berkeley.
7*     November 15, 1997
8*
9*  Purpose
10*  =======
11*
12*  PDPBDRIVER is a test program for the
13*  ScaLAPACK Band Cholesky routines corresponding to the options
14*  indicated by DPB.  This test driver performs an
15*  A = L*L**T factorization
16*  and solves a linear system with the factors for 1 or more RHS.
17*
18*  The program must be driven by a short data file.
19*  Here's an example file:
20*'ScaLAPACK, Version 1.2, banded linear systems input file'
21*'PVM.'
22*''                              output file name (if any)
23*6                               device out
24*'L'                             define Lower or Upper
25*9                               number of problem sizes
26*1 5 17 28 37 121 200 1023 2048 3073     values of N
27*6                               number of bandwidths
28*1 2 4 10 31 64                values of BW
29*1                               number of NB's
30*-1 3 4 5                        values of NB (-1 for automatic choice)
31*1                               number of NRHS's (must be 1)
32*8                               values of NRHS
33*1                               number of NBRHS's (ignored)
34*1                               values of NBRHS (ignored)
35*6                               number of process grids
36*1 2 3 4 5 7 8 15 26 47 64       values of "Number of Process Columns"
37*3.0                             threshold
38*
39*  Internal Parameters
40*  ===================
41*
42*  TOTMEM   INTEGER, default = 6200000.
43*           TOTMEM is a machine-specific parameter indicating the
44*           maximum amount of available memory in bytes.
45*           The user should customize TOTMEM to his platform.  Remember
46*           to leave room in memory for the operating system, the BLACS
47*           buffer, etc.  For example, on a system with 8 MB of memory
48*           per process (e.g., one processor on an Intel iPSC/860), the
49*           parameters we use are TOTMEM=6200000 (leaving 1.8 MB for OS,
50*           code, BLACS buffer, etc).  However, for PVM, we usually set
51*           TOTMEM = 2000000.  Some experimenting with the maximum value
52*           of TOTMEM may be required.
53*
54*  INTGSZ   INTEGER, default = 4 bytes.
55*  DBLESZ   INTEGER, default = 8 bytes.
56*           INTGSZ and DBLESZ indicate the length in bytes on the
57*           given platform for an integer and a double precision real.
58*  MEM      DOUBLE PRECISION array, dimension ( TOTMEM / DBLESZ )
59*           All arrays used by ScaLAPACK routines are allocated from
60*           this array and referenced by pointers.  The integer IPB,
61*           for example, is a pointer to the starting element of MEM for
62*           the solution vector(s) B.
63*
64*  =====================================================================
65*
66*  Code Developer: Andrew J. Cleary, University of Tennessee.
67*    Current address: Lawrence Livermore National Labs.
68*  This version released: August, 2001.
69*
70*  =====================================================================
71*
72*     .. Parameters ..
73      INTEGER            TOTMEM
74      PARAMETER          ( TOTMEM = 3000000 )
75      INTEGER            BLOCK_CYCLIC_2D, CSRC_, CTXT_, DLEN_, DTYPE_,
76     $                   LLD_, MB_, M_, NB_, N_, RSRC_
77      PARAMETER          ( BLOCK_CYCLIC_2D = 1, DLEN_ = 9, DTYPE_ = 1,
78     $                     CTXT_ = 2, M_ = 3, N_ = 4, MB_ = 5, NB_ = 6,
79     $                     RSRC_ = 7, CSRC_ = 8, LLD_ = 9 )
80*
81      DOUBLE PRECISION   ZERO
82      INTEGER            DBLESZ, MEMSIZ, NTESTS
83      DOUBLE PRECISION   PADVAL
84      PARAMETER          ( DBLESZ = 8,
85     $                     MEMSIZ = TOTMEM / DBLESZ, NTESTS = 20,
86     $                     PADVAL = -9923.0D+0, ZERO = 0.0D+0 )
87      INTEGER            INT_ONE
88      PARAMETER          ( INT_ONE = 1 )
89*     ..
90*     .. Local Scalars ..
91      LOGICAL            CHECK
92      CHARACTER          UPLO
93      CHARACTER*6        PASSED
94      CHARACTER*80       OUTFILE
95      INTEGER            BW, BW_NUM, FILLIN_SIZE, FREE_PTR, H, HH, I,
96     $                   IAM, IASEED, IBSEED, ICTXT, ICTXTB, IERR_TEMP,
97     $                   IMIDPAD, INFO, IPA, IPB, IPOSTPAD, IPREPAD,
98     $                   IPW, IPW_SIZE, IPW_SOLVE, IPW_SOLVE_SIZE,
99     $                   IP_DRIVER_W, IP_FILLIN, J, K, KFAIL, KPASS,
100     $                   KSKIP, KTESTS, MYCOL, MYRHS_SIZE, MYROW, N, NB,
101     $                   NBW, NGRIDS, NMAT, NNB, NNBR, NNR, NOUT, NP,
102     $                   NPCOL, NPROCS, NPROCS_REAL, NPROW, NQ, NRHS,
103     $                   N_FIRST, N_LAST, WORKSIZ
104      REAL               THRESH
105            DOUBLE PRECISION    ANORM, NOPS, NOPS2, SRESID, TMFLOPS,
106     $                          TMFLOPS2
107*     ..
108*     .. Local Arrays ..
109      INTEGER            BWVAL( NTESTS ), DESCA( 7 ), DESCA2D( DLEN_ ),
110     $                   DESCB( 7 ), DESCB2D( DLEN_ ), IERR( 1 ),
111     $                   NBRVAL( NTESTS ), NBVAL( NTESTS ),
112     $                   NRVAL( NTESTS ), NVAL( NTESTS ),
113     $                   PVAL( NTESTS ), QVAL( NTESTS )
114      DOUBLE PRECISION   CTIME( 2 ), MEM( MEMSIZ ), WTIME( 2 )
115*     ..
116*     .. External Subroutines ..
117      EXTERNAL           BLACS_BARRIER, BLACS_EXIT, BLACS_GET,
118     $                   BLACS_GRIDEXIT, BLACS_GRIDINFO, BLACS_GRIDINIT,
119     $                   BLACS_PINFO, DESCINIT, IGSUM2D, PDBMATGEN,
120     $                   PDCHEKPAD, PDFILLPAD, PDMATGEN, PDPBINFO,
121     $                   PDPBLASCHK, PDPBTRF, PDPBTRS, SLBOOT,
122     $                   SLCOMBINE, SLTIMER
123*     ..
124*     .. External Functions ..
125      INTEGER            NUMROC
126      LOGICAL            LSAME
127      DOUBLE PRECISION   PDLANGE
128      EXTERNAL           LSAME, NUMROC, PDLANGE
129*     ..
130*     .. Intrinsic Functions ..
131      INTRINSIC          DBLE, MAX, MIN, MOD
132*     ..
133*     .. Data Statements ..
134      DATA               KFAIL, KPASS, KSKIP, KTESTS / 4*0 /
135*     ..
136*
137*
138*
139*     .. Executable Statements ..
140*
141*     Get starting information
142*
143      CALL BLACS_PINFO( IAM, NPROCS )
144      IASEED = 100
145      IBSEED = 200
146*
147      CALL PDPBINFO( OUTFILE, NOUT, UPLO, NMAT, NVAL, NTESTS, NBW,
148     $               BWVAL, NTESTS, NNB, NBVAL, NTESTS, NNR, NRVAL,
149     $               NTESTS, NNBR, NBRVAL, NTESTS, NGRIDS, PVAL, NTESTS,
150     $               QVAL, NTESTS, THRESH, MEM, IAM, NPROCS )
151*
152      CHECK = ( THRESH.GE.0.0D+0 )
153*
154*     Print headings
155*
156      IF( IAM.EQ.0 ) THEN
157         WRITE( NOUT, FMT = * )
158         WRITE( NOUT, FMT = 9995 )
159         WRITE( NOUT, FMT = 9994 )
160         WRITE( NOUT, FMT = * )
161      END IF
162*
163*     Loop over different process grids
164*
165      DO 60 I = 1, NGRIDS
166*
167         NPROW = PVAL( I )
168         NPCOL = QVAL( I )
169*
170*        Make sure grid information is correct
171*
172         IERR( 1 ) = 0
173         IF( NPROW.LT.1 ) THEN
174            IF( IAM.EQ.0 )
175     $         WRITE( NOUT, FMT = 9999 ) 'GRID', 'nprow', NPROW
176            IERR( 1 ) = 1
177         ELSE IF( NPCOL.LT.1 ) THEN
178            IF( IAM.EQ.0 )
179     $         WRITE( NOUT, FMT = 9999 ) 'GRID', 'npcol', NPCOL
180            IERR( 1 ) = 1
181         ELSE IF( NPROW*NPCOL.GT.NPROCS ) THEN
182            IF( IAM.EQ.0 )
183     $         WRITE( NOUT, FMT = 9998 ) NPROW*NPCOL, NPROCS
184            IERR( 1 ) = 1
185         END IF
186*
187         IF( IERR( 1 ).GT.0 ) THEN
188            IF( IAM.EQ.0 )
189     $         WRITE( NOUT, FMT = 9997 ) 'grid'
190            KSKIP = KSKIP + 1
191            GO TO 50
192         END IF
193*
194*        Define process grid
195*
196         CALL BLACS_GET( -1, 0, ICTXT )
197         CALL BLACS_GRIDINIT( ICTXT, 'Row-major', NPROW, NPCOL )
198*
199*
200*        Define transpose process grid
201*
202         CALL BLACS_GET( -1, 0, ICTXTB )
203         CALL BLACS_GRIDINIT( ICTXTB, 'Column-major', NPCOL, NPROW )
204*
205*        Go to bottom of process grid loop if this case doesn't use my
206*        process
207*
208         CALL BLACS_GRIDINFO( ICTXT, NPROW, NPCOL, MYROW, MYCOL )
209*
210         IF( MYROW.LT.0 .OR. MYCOL.LT.0 ) THEN
211            GO TO 50
212         ENDIF
213*
214         DO 40 J = 1, NMAT
215*
216           IERR( 1 ) = 0
217*
218           N = NVAL( J )
219*
220*          Make sure matrix information is correct
221*
222           IF( N.LT.1 ) THEN
223               IF( IAM.EQ.0 )
224     $            WRITE( NOUT, FMT = 9999 ) 'MATRIX', 'N', N
225               IERR( 1 ) = 1
226           END IF
227*
228*          Check all processes for an error
229*
230           CALL IGSUM2D( ICTXT, 'All', ' ', 1, 1, IERR, 1,
231     $                    -1, 0 )
232*
233           IF( IERR( 1 ).GT.0 ) THEN
234               IF( IAM.EQ.0 )
235     $            WRITE( NOUT, FMT = 9997 ) 'size'
236               KSKIP = KSKIP + 1
237               GO TO 40
238           END IF
239*
240*
241           DO 45 BW_NUM = 1, NBW
242*
243             IERR( 1 ) = 0
244*
245             BW = BWVAL( BW_NUM )
246             IF( BW.LT.0 ) THEN
247               IF( IAM.EQ.0 )
248     $            WRITE( NOUT, FMT = 9999 ) 'Band', 'bw', BW
249               IERR( 1 ) = 1
250             END IF
251*
252             IF( BW.GT.N-1 ) THEN
253               IERR( 1 ) = 1
254             END IF
255*
256*            Check all processes for an error
257*
258             CALL IGSUM2D( ICTXT, 'All', ' ', 1, 1, IERR, 1,
259     $                    -1, 0 )
260*
261             IF( IERR( 1 ).GT.0 ) THEN
262               KSKIP = KSKIP + 1
263               GO TO 45
264             END IF
265*
266             DO 30 K = 1, NNB
267*
268               IERR( 1 ) = 0
269*
270               NB = NBVAL( K )
271               IF( NB.LT.0 ) THEN
272                  NB =( (N-(NPCOL-1)*BW-1)/NPCOL + 1 )
273     $               + BW
274                  NB = MAX( NB, 2*BW )
275                  NB = MIN( N, NB )
276               END IF
277*
278*              Make sure NB is legal
279*
280               IERR( 1 ) = 0
281               IF( NB.LT.MIN( 2*BW, N ) ) THEN
282                  IERR( 1 ) = 1
283               ENDIF
284*
285*              Check all processes for an error
286*
287               CALL IGSUM2D( ICTXT, 'All', ' ', 1, 1, IERR, 1,
288     $                       -1, 0 )
289*
290               IF( IERR( 1 ).GT.0 ) THEN
291                  KSKIP = KSKIP + 1
292                  GO TO 30
293               END IF
294*
295*              Padding constants
296*
297               NP = NUMROC( (BW+1), (BW+1),
298     $                      MYROW, 0, NPROW )
299               NQ = NUMROC( N, NB, MYCOL, 0, NPCOL )
300*
301               IF( CHECK ) THEN
302                  IPREPAD  = ((BW+1)+10)
303                  IMIDPAD  = 10
304                  IPOSTPAD = ((BW+1)+10)
305               ELSE
306                  IPREPAD  = 0
307                  IMIDPAD  = 0
308                  IPOSTPAD = 0
309               END IF
310*
311*              Initialize the array descriptor for the matrix A
312*
313               CALL DESCINIT( DESCA2D, (BW+1), N,
314     $                       (BW+1), NB, 0, 0,
315     $                       ICTXT,((BW+1)+10), IERR( 1 ) )
316*
317*              Convert this to 1D descriptor
318*
319               DESCA( 1 ) = 501
320               DESCA( 3 ) = N
321               DESCA( 4 ) = NB
322               DESCA( 5 ) = 0
323               DESCA( 2 ) = ICTXT
324               DESCA( 6 ) = ((BW+1)+10)
325               DESCA( 7 ) = 0
326*
327               IERR_TEMP = IERR( 1 )
328               IERR( 1 ) = 0
329               IERR( 1 ) = MIN( IERR( 1 ), IERR_TEMP )
330*
331*              Check all processes for an error
332*
333               CALL IGSUM2D( ICTXT, 'All', ' ', 1, 1, IERR, 1, -1, 0 )
334*
335               IF( IERR( 1 ).LT.0 ) THEN
336                  IF( IAM.EQ.0 )
337     $               WRITE( NOUT, FMT = 9997 ) 'descriptor'
338                  KSKIP = KSKIP + 1
339                  GO TO 30
340               END IF
341*
342*              Assign pointers into MEM for SCALAPACK arrays, A is
343*              allocated starting at position MEM( IPREPAD+1 )
344*
345               FREE_PTR = 1
346               IPB = 0
347*
348*              Save room for prepadding
349               FREE_PTR = FREE_PTR + IPREPAD
350*
351               IPA = FREE_PTR
352               FREE_PTR = FREE_PTR + DESCA2D( LLD_ )*
353     $                     DESCA2D( NB_ )
354     $                     + IPOSTPAD
355*
356*              Add memory for fillin
357*              Fillin space needs to store:
358*                Fillin spike:
359*                Contribution to previous proc's diagonal block of
360*                  reduced system:
361*                Off-diagonal block of reduced system:
362*                Diagonal block of reduced system:
363*
364               FILLIN_SIZE =
365     $            (NB+2*BW)*BW
366*
367*              Claim memory for fillin
368*
369               FREE_PTR = FREE_PTR + IPREPAD
370               IP_FILLIN = FREE_PTR
371               FREE_PTR = FREE_PTR + FILLIN_SIZE
372*
373*              Workspace needed by computational routines:
374*
375               IPW_SIZE = 0
376*
377*              factorization:
378*
379               IPW_SIZE = BW*BW
380*
381*              Claim memory for IPW
382*
383               IPW = FREE_PTR
384               FREE_PTR = FREE_PTR + IPW_SIZE
385*
386*              Check for adequate memory for problem size
387*
388               IERR( 1 ) = 0
389               IF( FREE_PTR.GT.MEMSIZ ) THEN
390                  IF( IAM.EQ.0 )
391     $               WRITE( NOUT, FMT = 9996 )
392     $               'divide and conquer factorization',
393     $               (FREE_PTR )*DBLESZ
394                  IERR( 1 ) = 1
395               END IF
396*
397*              Check all processes for an error
398*
399               CALL IGSUM2D( ICTXT, 'All', ' ', 1, 1, IERR,
400     $                       1, -1, 0 )
401*
402               IF( IERR( 1 ).GT.0 ) THEN
403                  IF( IAM.EQ.0 )
404     $               WRITE( NOUT, FMT = 9997 ) 'MEMORY'
405                  KSKIP = KSKIP + 1
406                  GO TO 30
407               END IF
408*
409*              Worksize needed for LAPRNT
410               WORKSIZ = MAX( ((BW+1)+10), NB )
411*
412               IF( CHECK ) THEN
413*
414*                 Calculate the amount of workspace required by
415*                 the checking routines.
416*
417*                 PDLANGE
418                  WORKSIZ = MAX( WORKSIZ, DESCA2D( NB_ ) )
419*
420*                 PDPBLASCHK
421                  WORKSIZ = MAX( WORKSIZ,
422     $                   MAX(5,MAX(BW*(BW+2),NB))+2*NB )
423               END IF
424*
425               FREE_PTR = FREE_PTR + IPREPAD
426               IP_DRIVER_W = FREE_PTR
427               FREE_PTR = FREE_PTR + WORKSIZ + IPOSTPAD
428*
429*
430*              Check for adequate memory for problem size
431*
432               IERR( 1 ) = 0
433               IF( FREE_PTR.GT.MEMSIZ ) THEN
434                  IF( IAM.EQ.0 )
435     $               WRITE( NOUT, FMT = 9996 ) 'factorization',
436     $               ( FREE_PTR )*DBLESZ
437                  IERR( 1 ) = 1
438               END IF
439*
440*              Check all processes for an error
441*
442               CALL IGSUM2D( ICTXT, 'All', ' ', 1, 1, IERR,
443     $                       1, -1, 0 )
444*
445               IF( IERR( 1 ).GT.0 ) THEN
446                  IF( IAM.EQ.0 )
447     $               WRITE( NOUT, FMT = 9997 ) 'MEMORY'
448                  KSKIP = KSKIP + 1
449                  GO TO 30
450               END IF
451*
452               CALL PDBMATGEN( ICTXT, UPLO, 'B', BW, BW, N, (BW+1), NB,
453     $                         MEM( IPA ), ((BW+1)+10), 0, 0, IASEED,
454     $                         MYROW, MYCOL, NPROW, NPCOL )
455*
456               CALL PDFILLPAD( ICTXT, NP, NQ, MEM( IPA-IPREPAD ),
457     $                          ((BW+1)+10), IPREPAD, IPOSTPAD,
458     $                          PADVAL )
459*
460               CALL PDFILLPAD( ICTXT, WORKSIZ, 1,
461     $                          MEM( IP_DRIVER_W-IPREPAD ), WORKSIZ,
462     $                          IPREPAD, IPOSTPAD, PADVAL )
463*
464*              Calculate norm of A for residual error-checking
465*
466               IF( CHECK ) THEN
467*
468                  ANORM = PDLANGE( '1', (BW+1),
469     $                            N, MEM( IPA ), 1, 1,
470     $                            DESCA2D, MEM( IP_DRIVER_W ) )
471                  CALL PDCHEKPAD( ICTXT, 'PDLANGE', NP, NQ,
472     $                         MEM( IPA-IPREPAD ), ((BW+1)+10),
473     $                         IPREPAD, IPOSTPAD, PADVAL )
474                  CALL PDCHEKPAD( ICTXT, 'PDLANGE',
475     $                            WORKSIZ, 1,
476     $                            MEM( IP_DRIVER_W-IPREPAD ), WORKSIZ,
477     $                            IPREPAD, IPOSTPAD, PADVAL )
478               END IF
479*
480*
481               CALL SLBOOT()
482               CALL BLACS_BARRIER( ICTXT, 'All' )
483*
484*              Perform factorization
485*
486               CALL SLTIMER( 1 )
487*
488               CALL PDPBTRF( UPLO, N, BW, MEM( IPA ), 1, DESCA,
489     $                       MEM( IP_FILLIN ), FILLIN_SIZE, MEM( IPW ),
490     $                       IPW_SIZE, INFO )
491*
492               CALL SLTIMER( 1 )
493*
494               IF( INFO.NE.0 ) THEN
495                  IF( IAM.EQ.0 ) THEN
496                    WRITE( NOUT, FMT = * ) 'PDPBTRF INFO=', INFO
497                  ENDIF
498                  KFAIL = KFAIL + 1
499                  GO TO 30
500               END IF
501*
502               IF( CHECK ) THEN
503*
504*                 Check for memory overwrite in factorization
505*
506                  CALL PDCHEKPAD( ICTXT, 'PDPBTRF', NP,
507     $                    NQ, MEM( IPA-IPREPAD ), ((BW+1)+10),
508     $                    IPREPAD, IPOSTPAD, PADVAL )
509               END IF
510*
511*
512*              Loop over the different values for NRHS
513*
514               DO 20 HH = 1, NNR
515*
516                  IERR( 1 ) = 0
517*
518                  NRHS = NRVAL( HH )
519*
520*                    Initialize Array Descriptor for rhs
521*
522                     CALL DESCINIT( DESCB2D, N, NRHS, NB, 1, 0, 0,
523     $                             ICTXTB, NB+10, IERR( 1 ) )
524*
525*                    Convert this to 1D descriptor
526*
527                     DESCB( 1 ) = 502
528                     DESCB( 3 ) = N
529                     DESCB( 4 ) = NB
530                     DESCB( 5 ) = 0
531                     DESCB( 2 ) = ICTXT
532                     DESCB( 6 ) = DESCB2D( LLD_ )
533                     DESCB( 7 ) = 0
534*
535*                    reset free_ptr to reuse space for right hand sides
536*
537                     IF( IPB .GT. 0 ) THEN
538                       FREE_PTR = IPB
539                     ENDIF
540*
541                     FREE_PTR = FREE_PTR + IPREPAD
542                     IPB = FREE_PTR
543                     FREE_PTR = FREE_PTR + NRHS*DESCB2D( LLD_ )
544     $                          + IPOSTPAD
545*
546*                    Allocate workspace for workspace in TRS routine:
547*
548                     IPW_SOLVE_SIZE = (BW*NRHS)
549*
550                     IPW_SOLVE = FREE_PTR
551                     FREE_PTR = FREE_PTR + IPW_SOLVE_SIZE
552*
553                     IERR( 1 ) = 0
554                     IF( FREE_PTR.GT.MEMSIZ ) THEN
555                        IF( IAM.EQ.0 )
556     $                     WRITE( NOUT, FMT = 9996 )'solve',
557     $                            ( FREE_PTR )*DBLESZ
558                        IERR( 1 ) = 1
559                     END IF
560*
561*                    Check all processes for an error
562*
563                     CALL IGSUM2D( ICTXT, 'All', ' ', 1, 1,
564     $                             IERR, 1, -1, 0 )
565*
566                     IF( IERR( 1 ).GT.0 ) THEN
567                        IF( IAM.EQ.0 )
568     $                     WRITE( NOUT, FMT = 9997 ) 'MEMORY'
569                        KSKIP = KSKIP + 1
570                        GO TO 15
571                     END IF
572*
573                     MYRHS_SIZE = NUMROC( N, NB, MYCOL, 0, NPCOL )
574*
575*                    Generate RHS
576*
577                     CALL PDMATGEN(ICTXTB, 'No', 'No',
578     $                        DESCB2D( M_ ), DESCB2D( N_ ),
579     $                        DESCB2D( MB_ ), DESCB2D( NB_ ),
580     $                        MEM( IPB ),
581     $                        DESCB2D( LLD_ ), DESCB2D( RSRC_ ),
582     $                        DESCB2D( CSRC_ ),
583     $                        IBSEED, 0, MYRHS_SIZE, 0, NRHS, MYCOL,
584     $                        MYROW, NPCOL, NPROW )
585*
586                     IF( CHECK ) THEN
587                        CALL PDFILLPAD( ICTXTB, NB, NRHS,
588     $                                  MEM( IPB-IPREPAD ),
589     $                                  DESCB2D( LLD_ ),
590     $                                  IPREPAD, IPOSTPAD,
591     $                                  PADVAL )
592                        CALL PDFILLPAD( ICTXT, WORKSIZ, 1,
593     $                                  MEM( IP_DRIVER_W-IPREPAD ),
594     $                                  WORKSIZ, IPREPAD,
595     $                                  IPOSTPAD, PADVAL )
596                     END IF
597*
598*
599                     CALL BLACS_BARRIER( ICTXT, 'All')
600                     CALL SLTIMER( 2 )
601*
602*                    Solve linear system via factorization
603*
604                     CALL PDPBTRS( UPLO, N, BW, NRHS, MEM( IPA ), 1,
605     $                             DESCA, MEM( IPB ), 1, DESCB,
606     $                             MEM( IP_FILLIN ), FILLIN_SIZE,
607     $                             MEM( IPW_SOLVE ), IPW_SOLVE_SIZE,
608     $                             INFO )
609*
610                     CALL SLTIMER( 2 )
611*
612                     IF( INFO.NE.0 ) THEN
613                       IF( IAM.EQ.0 )
614     $  WRITE( NOUT, FMT = * ) 'PDPBTRS INFO=', INFO
615                       KFAIL = KFAIL + 1
616                       PASSED = 'FAILED'
617                       GO TO 20
618                     END IF
619*
620                     IF( CHECK ) THEN
621*
622*                       check for memory overwrite
623*
624                        CALL PDCHEKPAD( ICTXT, 'PDPBTRS-work',
625     $                                  WORKSIZ, 1,
626     $                                  MEM( IP_DRIVER_W-IPREPAD ),
627     $                                  WORKSIZ, IPREPAD,
628     $                                  IPOSTPAD, PADVAL )
629*
630*                       check the solution to rhs
631*
632                        SRESID = ZERO
633*
634                        CALL PDPBLASCHK( 'S', UPLO, N, BW, BW, NRHS,
635     $                              MEM( IPB ), 1, 1, DESCB2D,
636     $                              IASEED, MEM( IPA ), 1, 1, DESCA2D,
637     $                              IBSEED, ANORM, SRESID,
638     $                              MEM( IP_DRIVER_W ), WORKSIZ )
639*
640                        IF( IAM.EQ.0 ) THEN
641                           IF( SRESID.GT.THRESH )
642     $                        WRITE( NOUT, FMT = 9985 ) SRESID
643                        END IF
644*
645*                       The second test is a NaN trap
646*
647                        IF( ( SRESID.LE.THRESH          ).AND.
648     $                      ( (SRESID-SRESID).EQ.0.0D+0 ) ) THEN
649                           KPASS = KPASS + 1
650                           PASSED = 'PASSED'
651                        ELSE
652                           KFAIL = KFAIL + 1
653                           PASSED = 'FAILED'
654                        END IF
655*
656                     END IF
657*
658   15                CONTINUE
659*                    Skipped tests jump to here to print out "SKIPPED"
660*
661*                    Gather maximum of all CPU and WALL clock timings
662*
663                     CALL SLCOMBINE( ICTXT, 'All', '>', 'W', 2, 1,
664     $                               WTIME )
665                     CALL SLCOMBINE( ICTXT, 'All', '>', 'C', 2, 1,
666     $                               CTIME )
667*
668*                    Print results
669*
670                     IF( MYROW.EQ.0 .AND. MYCOL.EQ.0 ) THEN
671*
672                        NOPS = 0
673                        NOPS2 = 0
674*
675                        N_FIRST = NB
676                        NPROCS_REAL = ( N-1 )/NB + 1
677                        N_LAST = MOD( N-1, NB ) + 1
678*
679*
680                        NOPS = NOPS + DBLE(BW)*( -2.D0 / 3.D0+DBLE(BW)*
681     $                        ( -1.D0+DBLE(BW)*( -1.D0 / 3.D0 ) ) ) +
682     $                        DBLE(N)*( 1.D0+DBLE(BW)*( 3.D0 /
683     $                        2.D0+DBLE(BW)*( 1.D0 / 2.D0 ) ) )
684                        NOPS = NOPS + DBLE(BW)*( -1.D0 / 6.D0+DBLE(BW)
685     $                        *( -1.D0 /2.D0+DBLE(BW)
686     $                        *( -1.D0 / 3.D0 ) ) ) +
687     $                        DBLE(N)*( DBLE(BW) /
688     $                        2.D0*( 1.D0+DBLE(BW) ) )
689*
690                        NOPS = NOPS +
691     $                         DBLE(NRHS)*( ( 2*DBLE(N)-DBLE(BW) )*
692     $                         ( DBLE(BW)+1.D0 ) )+ DBLE(NRHS)*
693     $                         ( DBLE(BW)*( 2*DBLE(N)-
694     $                         ( DBLE(BW)+1.D0 ) ) )
695*
696*
697*                       Second calc to represent actual hardware speed
698*
699*                     NB bw^2  flops for LLt factorization in 1st proc
700*
701                      NOPS2 = ( (DBLE(N_FIRST))* DBLE(BW)**2  )
702*
703                      IF ( NPROCS_REAL .GT. 1) THEN
704*                       4 NB bw^2  flops for LLt factorization and
705*                         spike calc in last processor
706*
707                        NOPS2 = NOPS2 +
708     $                          4*( (DBLE(N_LAST)*DBLE(BW)**2) )
709                      ENDIF
710*
711                      IF ( NPROCS_REAL .GT. 2) THEN
712*                       4 NB bw^2  flops for LLt factorization and
713*                         spike calc in other processors
714*
715                        NOPS2 = NOPS2 + (NPROCS_REAL-2)*
716     $                          4*( (DBLE(NB)*DBLE(BW)**2) )
717                      ENDIF
718*
719*                     Reduced system
720*
721                      NOPS2 = NOPS2 +
722     $                  ( NPROCS_REAL-1 ) * ( BW*BW*BW/3 )
723                      IF( NPROCS_REAL .GT. 1 ) THEN
724                        NOPS2 = NOPS2 +
725     $                     ( NPROCS_REAL-2 ) * ( 2 * BW*BW*BW )
726                      ENDIF
727*
728*
729*                     nrhs * 4 n_first*bw flops for LLt solve in proc 1.
730*
731                      NOPS2 = NOPS2 +
732     $                    ( 4.0D+0*(DBLE(N_FIRST)*DBLE(BW))*DBLE(NRHS) )
733*
734                      IF ( NPROCS_REAL .GT. 1 ) THEN
735*
736*                     2*nrhs*4 n_last*bw flops for LLt solve in last.
737*
738                        NOPS2 = NOPS2 +
739     $                  2*( 4.0D+0*(DBLE(N_LAST)*DBLE(BW))*DBLE(NRHS) )
740                      ENDIF
741*
742                      IF ( NPROCS_REAL .GT. 2 ) THEN
743*
744*                     2 * nrhs * 4 NB*bw flops for LLt solve in others.
745*
746                        NOPS2 = NOPS2 +
747     $                    ( NPROCS_REAL-2)*2*
748     $                    ( 4.0D+0*(DBLE(NB)*DBLE(BW))*DBLE(NRHS) )
749                      ENDIF
750*
751*                     Reduced system
752*
753                      NOPS2 = NOPS2 +
754     $                  NRHS*( NPROCS_REAL-1 ) * ( BW*BW )
755                      IF( NPROCS_REAL .GT. 1 ) THEN
756                        NOPS2 = NOPS2 +
757     $                   NRHS*( NPROCS_REAL-2 ) * ( 3 * BW*BW )
758                      ENDIF
759*
760*
761*                       Calculate total megaflops - factorization and/or
762*                       solve -- for WALL and CPU time, and print output
763*
764*                       Print WALL time if machine supports it
765*
766                        IF( WTIME( 1 ) + WTIME( 2 ) .GT. 0.0D+0 ) THEN
767                           TMFLOPS = NOPS /
768     $                            ( ( WTIME( 1 )+WTIME( 2 ) ) * 1.0D+6 )
769                        ELSE
770                           TMFLOPS = 0.0D+0
771                        END IF
772*
773                        IF( WTIME( 1 )+WTIME( 2 ).GT.0.0D+0 ) THEN
774                           TMFLOPS2 = NOPS2 /
775     $                            ( ( WTIME( 1 )+WTIME( 2 ) ) * 1.0D+6 )
776                        ELSE
777                           TMFLOPS2 = 0.0D+0
778                        END IF
779*
780                        IF( WTIME( 2 ).GE.0.0D+0 )
781     $                     WRITE( NOUT, FMT = 9993 ) 'WALL', UPLO,
782     $                            N,
783     $                            BW,
784     $                            NB, NRHS, NPROW, NPCOL,
785     $                            WTIME( 1 ), WTIME( 2 ), TMFLOPS,
786     $                            TMFLOPS2, PASSED
787*
788*                       Print CPU time if machine supports it
789*
790                        IF( CTIME( 1 )+CTIME( 2 ).GT.0.0D+0 ) THEN
791                           TMFLOPS = NOPS /
792     $                            ( ( CTIME( 1 )+CTIME( 2 ) ) * 1.0D+6 )
793                        ELSE
794                           TMFLOPS = 0.0D+0
795                        END IF
796*
797                        IF( CTIME( 1 )+CTIME( 2 ).GT.0.0D+0 ) THEN
798                           TMFLOPS2 = NOPS2 /
799     $                            ( ( CTIME( 1 )+CTIME( 2 ) ) * 1.0D+6 )
800                        ELSE
801                           TMFLOPS2 = 0.0D+0
802                        END IF
803*
804                        IF( CTIME( 2 ).GE.0.0D+0 )
805     $                     WRITE( NOUT, FMT = 9993 ) 'CPU ', UPLO,
806     $                            N,
807     $                            BW,
808     $                            NB, NRHS, NPROW, NPCOL,
809     $                            CTIME( 1 ), CTIME( 2 ), TMFLOPS,
810     $                            TMFLOPS2, PASSED
811*
812                     END IF
813   20          CONTINUE
814*
815*
816   30       CONTINUE
817*           NNB loop
818*
819   45      CONTINUE
820*          BW[] loop
821*
822   40   CONTINUE
823*       NMAT loop
824*
825        CALL BLACS_GRIDEXIT( ICTXT )
826        CALL BLACS_GRIDEXIT( ICTXTB )
827*
828   50   CONTINUE
829*       NGRIDS DROPOUT
830   60 CONTINUE
831*     NGRIDS loop
832*
833*     Print ending messages and close output file
834*
835      IF( IAM.EQ.0 ) THEN
836         KTESTS = KPASS + KFAIL + KSKIP
837         WRITE( NOUT, FMT = * )
838         WRITE( NOUT, FMT = 9992 ) KTESTS
839         IF( CHECK ) THEN
840            WRITE( NOUT, FMT = 9991 ) KPASS
841            WRITE( NOUT, FMT = 9989 ) KFAIL
842         ELSE
843            WRITE( NOUT, FMT = 9990 ) KPASS
844         END IF
845         WRITE( NOUT, FMT = 9988 ) KSKIP
846         WRITE( NOUT, FMT = * )
847         WRITE( NOUT, FMT = * )
848         WRITE( NOUT, FMT = 9987 )
849         IF( NOUT.NE.6 .AND. NOUT.NE.0 )
850     $      CLOSE ( NOUT )
851      END IF
852*
853      CALL BLACS_EXIT( 0 )
854*
855 9999 FORMAT( 'ILLEGAL ', A6, ': ', A5, ' = ', I3,
856     $        '; It should be at least 1' )
857 9998 FORMAT( 'ILLEGAL GRID: nprow*npcol = ', I4, '. It can be at most',
858     $        I4 )
859 9997 FORMAT( 'Bad ', A6, ' parameters: going on to next test case.' )
860 9996 FORMAT( 'Unable to perform ', A, ': need TOTMEM of at least',
861     $        I11 )
862 9995 FORMAT( 'TIME UL      N  BW   NB  NRHS  P    Q L*U Time ',
863     $        'Slv Time   MFLOPS   MFLOP2  CHECK' )
864 9994 FORMAT( '---- -- ------ --- ---- ----- -- ---- -------- ',
865     $        '--------   ------   ------ ------' )
866 9993 FORMAT( A4, 2X, A1, 1X, I6, 1X, I3, 1X, I4, 1X,
867     $        I5, 1X, I2, 1X,
868     $        I4, 1X, F8.3, F9.4, F9.2, F9.2, 1X, A6 )
869 9992 FORMAT( 'Finished ', I6, ' tests, with the following results:' )
870 9991 FORMAT( I5, ' tests completed and passed residual checks.' )
871 9990 FORMAT( I5, ' tests completed without checking.' )
872 9989 FORMAT( I5, ' tests completed and failed residual checks.' )
873 9988 FORMAT( I5, ' tests skipped because of illegal input values.' )
874 9987 FORMAT( 'END OF TESTS.' )
875 9986 FORMAT( '||A - ', A4, '|| / (||A|| * N * eps) = ', G25.7 )
876 9985 FORMAT( '||Ax-b||/(||x||*||A||*eps*N) ', F25.7 )
877*
878      STOP
879*
880*     End of PDPBTRS_DRIVER
881*
882      END
883*
884