1      PROGRAM PSGBDRIVER
2*
3*
4*  -- ScaLAPACK routine (version 1.7) --
5*     University of Tennessee, Knoxville, Oak Ridge National Laboratory,
6*     and University of California, Berkeley.
7*     November 15, 1997
8*
9*  Purpose
10*  =======
11*
12*  PSGBDRIVER is a test program for the
13*  ScaLAPACK Band Cholesky routines corresponding to the options
14*  indicated by SGB.  This test driver performs an
15*  A = L*U factorization
16*  and solves a linear system with the factors for 1 or more RHS.
17*
18*  The program must be driven by a short data file.
19*  Here's an example file:
20*'ScaLAPACK, Version 1.2, banded linear systems input file'
21*'PVM.'
22*''                              output file name (if any)
23*6                               device out
24*'L'                             define Lower or Upper
25*9                               number of problem sizes
26*1 5 17 28 37 121 200 1023 2048 3073     values of N
27*6                               number of bandwidths
28*1 2 4 10 31 64                values of BW
29*1                               number of NB's
30*-1 3 4 5                        values of NB (-1 for automatic choice)
31*1                               number of NRHS's (must be 1)
32*8                               values of NRHS
33*1                               number of NBRHS's (ignored)
34*1                               values of NBRHS (ignored)
35*6                               number of process grids
36*1 2 3 4 5 7 8 15 26 47 64       values of "Number of Process Columns"
37*3.0                             threshold
38*
39*  Internal Parameters
40*  ===================
41*
42*  TOTMEM   INTEGER, default = 6200000.
43*           TOTMEM is a machine-specific parameter indicating the
44*           maximum amount of available memory in bytes.
45*           The user should customize TOTMEM to his platform.  Remember
46*           to leave room in memory for the operating system, the BLACS
47*           buffer, etc.  For example, on a system with 8 MB of memory
48*           per process (e.g., one processor on an Intel iPSC/860), the
49*           parameters we use are TOTMEM=6200000 (leaving 1.8 MB for OS,
50*           code, BLACS buffer, etc).  However, for PVM, we usually set
51*           TOTMEM = 2000000.  Some experimenting with the maximum value
52*           of TOTMEM may be required.
53*
54*  INTMEM   INTEGER, default = 2048.
55*           INTMEM is the size of the integer workspace used in this
56*           driver as input as the IPIV vector. It represents an
57*           upper bound on NB, the blocksize of the data
58*           distribution.
59*
60*  INTGSZ   INTEGER, default = 4 bytes.
61*  REALSZ   INTEGER, default = 4 bytes.
62*           INTGSZ and REALSZ indicate the length in bytes on the
63*           given platform for an integer and a single precision real.
64*  MEM      REAL array, dimension ( TOTMEM / REALSZ )
65*           All arrays used by ScaLAPACK routines are allocated from
66*           this array and referenced by pointers.  The integer IPB,
67*           for example, is a pointer to the starting element of MEM for
68*           the solution vector(s) B.
69*
70*  =====================================================================
71*
72*  Code Developer: Andrew J. Cleary, University of Tennessee.
73*    Current address: Lawrence Livermore National Labs.
74*  This version released: August, 2001.
75*
76*  =====================================================================
77*
78*     .. Parameters ..
79      INTEGER            TOTMEM
80      PARAMETER          ( TOTMEM = 3000000 )
81      INTEGER            INTMEM
82      PARAMETER          ( INTMEM = 2048 )
83      INTEGER            BLOCK_CYCLIC_2D, CSRC_, CTXT_, DLEN_, DTYPE_,
84     $                   LLD_, MB_, M_, NB_, N_, RSRC_
85      PARAMETER          ( BLOCK_CYCLIC_2D = 1, DLEN_ = 9, DTYPE_ = 1,
86     $                     CTXT_ = 2, M_ = 3, N_ = 4, MB_ = 5, NB_ = 6,
87     $                     RSRC_ = 7, CSRC_ = 8, LLD_ = 9 )
88*
89      REAL               ZERO
90      INTEGER            MEMSIZ, NTESTS, REALSZ
91      REAL               PADVAL
92      PARAMETER          ( REALSZ = 4,
93     $                     MEMSIZ = TOTMEM / REALSZ, NTESTS = 20,
94     $                     PADVAL = -9923.0E+0, ZERO = 0.0E+0 )
95      INTEGER            INT_ONE
96      PARAMETER          ( INT_ONE = 1 )
97*     ..
98*     .. Local Scalars ..
99      LOGICAL            CHECK
100      CHARACTER          TRANS
101      CHARACTER*6        PASSED
102      CHARACTER*80       OUTFILE
103      INTEGER            BWL, BWU, BW_NUM, FILLIN_SIZE, FREE_PTR, H, HH,
104     $                   I, IAM, IASEED, IBSEED, ICTXT, ICTXTB,
105     $                   IERR_TEMP, IMIDPAD, INFO, IPA, IPB, IPOSTPAD,
106     $                   IPREPAD, IPW, IPW_SIZE, IPW_SOLVE,
107     $                   IPW_SOLVE_SIZE, IP_DRIVER_W, IP_FILLIN, J, K,
108     $                   KFAIL, KPASS, KSKIP, KTESTS, MYCOL, MYRHS_SIZE,
109     $                   MYROW, N, NB, NBW, NGRIDS, NMAT, NNB, NNBR,
110     $                   NNR, NOUT, NP, NPCOL, NPROCS, NPROCS_REAL,
111     $                   NPROW, NQ, NRHS, N_FIRST, N_LAST, WORKSIZ
112      REAL               ANORM, SRESID, THRESH
113      DOUBLE PRECISION   NOPS, NOPS2, TMFLOPS, TMFLOPS2
114*     ..
115*     .. Local Arrays ..
116      INTEGER    IPIV(INTMEM)
117      INTEGER            BWLVAL( NTESTS ), BWUVAL( NTESTS ), DESCA( 7 ),
118     $                   DESCA2D( DLEN_ ), DESCB( 7 ), DESCB2D( DLEN_ ),
119     $                   IERR( 1 ), NBRVAL( NTESTS ), NBVAL( NTESTS ),
120     $                   NRVAL( NTESTS ), NVAL( NTESTS ),
121     $                   PVAL( NTESTS ), QVAL( NTESTS )
122      REAL               MEM( MEMSIZ )
123      DOUBLE PRECISION   CTIME( 2 ), WTIME( 2 )
124*     ..
125*     .. External Subroutines ..
126      EXTERNAL           BLACS_BARRIER, BLACS_EXIT, BLACS_GET,
127     $                   BLACS_GRIDEXIT, BLACS_GRIDINFO, BLACS_GRIDINIT,
128     $                   BLACS_PINFO, DESCINIT, IGSUM2D, PSBMATGEN,
129     $                   PSCHEKPAD, PSDBLASCHK, PSFILLPAD, PSGBINFO,
130     $                   PSGBTRF, PSGBTRS, PSMATGEN, SLBOOT, SLCOMBINE,
131     $                   SLTIMER
132*     ..
133*     .. External Functions ..
134      INTEGER            NUMROC
135      LOGICAL            LSAME
136      REAL               PSLANGE
137      EXTERNAL           LSAME, NUMROC, PSLANGE
138*     ..
139*     .. Intrinsic Functions ..
140      INTRINSIC          DBLE, MAX, MIN, MOD
141*     ..
142*     .. Data Statements ..
143      DATA               KFAIL, KPASS, KSKIP, KTESTS / 4*0 /
144*     ..
145*
146*
147*
148*     .. Executable Statements ..
149*
150*     Get starting information
151*
152      CALL BLACS_PINFO( IAM, NPROCS )
153      IASEED = 100
154      IBSEED = 200
155*
156      CALL PSGBINFO( OUTFILE, NOUT, TRANS, NMAT, NVAL, NTESTS, NBW,
157     $               BWLVAL, BWUVAL, NTESTS, NNB, NBVAL, NTESTS, NNR,
158     $               NRVAL, NTESTS, NNBR, NBRVAL, NTESTS, NGRIDS, PVAL,
159     $               NTESTS, QVAL, NTESTS, THRESH, MEM, IAM, NPROCS )
160*
161      CHECK = ( THRESH.GE.0.0E+0 )
162*
163*     Print headings
164*
165      IF( IAM.EQ.0 ) THEN
166         WRITE( NOUT, FMT = * )
167         WRITE( NOUT, FMT = 9995 )
168         WRITE( NOUT, FMT = 9994 )
169         WRITE( NOUT, FMT = * )
170      END IF
171*
172*     Loop over different process grids
173*
174      DO 60 I = 1, NGRIDS
175*
176         NPROW = PVAL( I )
177         NPCOL = QVAL( I )
178*
179*        Make sure grid information is correct
180*
181         IERR( 1 ) = 0
182         IF( NPROW.LT.1 ) THEN
183            IF( IAM.EQ.0 )
184     $         WRITE( NOUT, FMT = 9999 ) 'GRID', 'nprow', NPROW
185            IERR( 1 ) = 1
186         ELSE IF( NPCOL.LT.1 ) THEN
187            IF( IAM.EQ.0 )
188     $         WRITE( NOUT, FMT = 9999 ) 'GRID', 'npcol', NPCOL
189            IERR( 1 ) = 1
190         ELSE IF( NPROW*NPCOL.GT.NPROCS ) THEN
191            IF( IAM.EQ.0 )
192     $         WRITE( NOUT, FMT = 9998 ) NPROW*NPCOL, NPROCS
193            IERR( 1 ) = 1
194         END IF
195*
196         IF( IERR( 1 ).GT.0 ) THEN
197            IF( IAM.EQ.0 )
198     $         WRITE( NOUT, FMT = 9997 ) 'grid'
199            KSKIP = KSKIP + 1
200            GO TO 50
201         END IF
202*
203*        Define process grid
204*
205         CALL BLACS_GET( -1, 0, ICTXT )
206         CALL BLACS_GRIDINIT( ICTXT, 'Row-major', NPROW, NPCOL )
207*
208*
209*        Define transpose process grid
210*
211         CALL BLACS_GET( -1, 0, ICTXTB )
212         CALL BLACS_GRIDINIT( ICTXTB, 'Column-major', NPCOL, NPROW )
213*
214*        Go to bottom of process grid loop if this case doesn't use my
215*        process
216*
217         CALL BLACS_GRIDINFO( ICTXT, NPROW, NPCOL, MYROW, MYCOL )
218*
219         IF( MYROW.LT.0 .OR. MYCOL.LT.0 ) THEN
220            GO TO 50
221         ENDIF
222*
223         DO 40 J = 1, NMAT
224*
225           IERR( 1 ) = 0
226*
227           N = NVAL( J )
228*
229*          Make sure matrix information is correct
230*
231           IF( N.LT.1 ) THEN
232               IF( IAM.EQ.0 )
233     $            WRITE( NOUT, FMT = 9999 ) 'MATRIX', 'N', N
234               IERR( 1 ) = 1
235           END IF
236*
237*          Check all processes for an error
238*
239           CALL IGSUM2D( ICTXT, 'All', ' ', 1, 1, IERR, 1,
240     $                    -1, 0 )
241*
242           IF( IERR( 1 ).GT.0 ) THEN
243               IF( IAM.EQ.0 )
244     $            WRITE( NOUT, FMT = 9997 ) 'size'
245               KSKIP = KSKIP + 1
246               GO TO 40
247           END IF
248*
249*
250           DO 45 BW_NUM = 1, NBW
251*
252             IERR( 1 ) = 0
253*
254             BWL = BWLVAL( BW_NUM )
255             IF( BWL.LT.1 ) THEN
256               IF( IAM.EQ.0 )
257     $            WRITE( NOUT, FMT = 9999 ) 'Lower Band', 'bwl', BWL
258               IERR( 1 ) = 1
259             END IF
260*
261             BWU = BWUVAL( BW_NUM )
262             IF( BWU.LT.1 ) THEN
263               IF( IAM.EQ.0 )
264     $            WRITE( NOUT, FMT = 9999 ) 'Upper Band', 'bwu', BWU
265               IERR( 1 ) = 1
266             END IF
267*
268             IF( BWL.GT.N-1 ) THEN
269               IF( IAM.EQ.0 ) THEN
270                 IERR( 1 ) = 1
271               ENDIF
272             END IF
273*
274             IF( BWU.GT.N-1 ) THEN
275               IF( IAM.EQ.0 ) THEN
276                 IERR( 1 ) = 1
277               ENDIF
278             END IF
279*
280*            Check all processes for an error
281*
282             CALL IGSUM2D( ICTXT, 'All', ' ', 1, 1, IERR, 1,
283     $                    -1, 0 )
284*
285             IF( IERR( 1 ).GT.0 ) THEN
286               KSKIP = KSKIP + 1
287               GO TO 45
288             END IF
289*
290             DO 30 K = 1, NNB
291*
292               IERR( 1 ) = 0
293*
294               NB = NBVAL( K )
295               IF( NB.LT.0 ) THEN
296                  NB =( (N-(NPCOL-1)*(BWL+BWU)-1)/NPCOL + 1 )
297     $               + (BWL+BWU)
298                  NB = MAX( NB, 2*(BWL+BWU) )
299                  NB = MIN( N, NB )
300               END IF
301*
302*              Make sure NB is legal
303*
304               IERR( 1 ) = 0
305*
306               IF( NB.GT.INTMEM ) THEN
307                  IERR( 1 ) = 1
308                  IF( IAM.EQ.0 ) THEN
309                     WRITE( NOUT,* )'You have chosen an '
310     $            ,'NB > INTMEM in the driver.'
311                     WRITE(NOUT, *)'Please edit the driver '
312     $            ,'and increase the value of INTMEM'
313                  ENDIF
314               END IF
315*
316*              Check all processes for an error
317*
318               CALL IGSUM2D( ICTXT, 'All', ' ', 1, 1, IERR, 1,
319     $                       -1, 0 )
320*
321               IF( IERR( 1 ).GT.0 ) THEN
322                  KSKIP = KSKIP + 1
323                  GO TO 30
324               END IF
325*
326*              Padding constants
327*
328               NP = NUMROC( (2*BWL+2*BWU+1), (2*BWL+2*BWU+1),
329     $                      MYROW, 0, NPROW )
330               NQ = NUMROC( N, NB, MYCOL, 0, NPCOL )
331*
332               IF( CHECK ) THEN
333                  IPREPAD  = ((2*BWL+2*BWU+1)+10)
334                  IMIDPAD  = 10
335                  IPOSTPAD = ((2*BWL+2*BWU+1)+10)
336               ELSE
337                  IPREPAD  = 0
338                  IMIDPAD  = 0
339                  IPOSTPAD = 0
340               END IF
341*
342*              Initialize the array descriptor for the matrix A
343*
344               CALL DESCINIT( DESCA2D, (2*BWL+2*BWU+1), N,
345     $                       (2*BWL+2*BWU+1), NB, 0, 0,
346     $                       ICTXT,((2*BWL+2*BWU+1)+10), IERR( 1 ) )
347*
348*              Convert this to 1D descriptor
349*
350               DESCA( 1 ) = 501
351               DESCA( 3 ) = N
352               DESCA( 4 ) = NB
353               DESCA( 5 ) = 0
354               DESCA( 2 ) = ICTXT
355               DESCA( 6 ) = ((2*BWL+2*BWU+1)+10)
356               DESCA( 7 ) = 0
357*
358               IERR_TEMP = IERR( 1 )
359               IERR( 1 ) = 0
360               IERR( 1 ) = MIN( IERR( 1 ), IERR_TEMP )
361*
362*              Check all processes for an error
363*
364               CALL IGSUM2D( ICTXT, 'All', ' ', 1, 1, IERR, 1, -1, 0 )
365*
366               IF( IERR( 1 ).LT.0 ) THEN
367                  IF( IAM.EQ.0 )
368     $               WRITE( NOUT, FMT = 9997 ) 'descriptor'
369                  KSKIP = KSKIP + 1
370                  GO TO 30
371               END IF
372*
373*              Assign pointers into MEM for SCALAPACK arrays, A is
374*              allocated starting at position MEM( IPREPAD+1 )
375*
376               FREE_PTR = 1
377               IPB = 0
378*
379*              Save room for prepadding
380               FREE_PTR = FREE_PTR + IPREPAD
381*
382               IPA = FREE_PTR
383               FREE_PTR = FREE_PTR + DESCA2D( LLD_ )*
384     $                     DESCA2D( NB_ )
385     $                     + IPOSTPAD
386*
387*              Add memory for fillin
388*              Fillin space needs to store:
389*                Fillin spike:
390*                Contribution to previous proc's diagonal block of
391*                  reduced system:
392*                Off-diagonal block of reduced system:
393*                Diagonal block of reduced system:
394*
395               FILLIN_SIZE =
396     $            (NB+BWU)*(BWL+BWU)+6*(BWL+BWU)*(BWL+2*BWU)
397*
398*              Claim memory for fillin
399*
400               FREE_PTR = FREE_PTR + IPREPAD
401               IP_FILLIN = FREE_PTR
402               FREE_PTR = FREE_PTR + FILLIN_SIZE
403*
404*              Workspace needed by computational routines:
405*
406               IPW_SIZE = 0
407*
408*              factorization:
409*
410               IPW_SIZE = 1
411*
412*              Claim memory for IPW
413*
414               IPW = FREE_PTR
415               FREE_PTR = FREE_PTR + IPW_SIZE
416*
417*              Check for adequate memory for problem size
418*
419               IERR( 1 ) = 0
420               IF( FREE_PTR.GT.MEMSIZ ) THEN
421                  IF( IAM.EQ.0 )
422     $               WRITE( NOUT, FMT = 9996 )
423     $               'divide and conquer factorization',
424     $               (FREE_PTR )*REALSZ
425                  IERR( 1 ) = 1
426               END IF
427*
428*              Check all processes for an error
429*
430               CALL IGSUM2D( ICTXT, 'All', ' ', 1, 1, IERR,
431     $                       1, -1, 0 )
432*
433               IF( IERR( 1 ).GT.0 ) THEN
434                  IF( IAM.EQ.0 )
435     $               WRITE( NOUT, FMT = 9997 ) 'MEMORY'
436                  KSKIP = KSKIP + 1
437                  GO TO 30
438               END IF
439*
440*              Worksize needed for LAPRNT
441               WORKSIZ = MAX( ((2*BWL+2*BWU+1)+10), NB )
442*
443               IF( CHECK ) THEN
444*
445*                 Calculate the amount of workspace required by
446*                 the checking routines.
447*
448*                 PSLANGE
449                  WORKSIZ = MAX( WORKSIZ, DESCA2D( NB_ ) )
450*
451*                 PSDBLASCHK
452                  WORKSIZ = MAX( WORKSIZ,
453     $          MAX(5,MAX(MAX(BWL,BWU)*(MAX(BWL,BWU)+2),NB))+2*NB )
454               END IF
455*
456               FREE_PTR = FREE_PTR + IPREPAD
457               IP_DRIVER_W = FREE_PTR
458               FREE_PTR = FREE_PTR + WORKSIZ + IPOSTPAD
459*
460*
461*              Check for adequate memory for problem size
462*
463               IERR( 1 ) = 0
464               IF( FREE_PTR.GT.MEMSIZ ) THEN
465                  IF( IAM.EQ.0 )
466     $               WRITE( NOUT, FMT = 9996 ) 'factorization',
467     $               ( FREE_PTR )*REALSZ
468                  IERR( 1 ) = 1
469               END IF
470*
471*              Check all processes for an error
472*
473               CALL IGSUM2D( ICTXT, 'All', ' ', 1, 1, IERR,
474     $                       1, -1, 0 )
475*
476               IF( IERR( 1 ).GT.0 ) THEN
477                  IF( IAM.EQ.0 )
478     $               WRITE( NOUT, FMT = 9997 ) 'MEMORY'
479                  KSKIP = KSKIP + 1
480                  GO TO 30
481               END IF
482*
483               CALL PSBMATGEN( ICTXT, 'G', 'N', BWL, BWU, N,
484     $                         (2*BWL+2*BWU+1), NB, MEM( IPA+BWL+BWU ),
485     $                         ((2*BWL+2*BWU+1)+10), 0, 0, IASEED,
486     $                         MYROW, MYCOL, NPROW, NPCOL )
487*
488               CALL PSFILLPAD( ICTXT, NP, NQ, MEM( IPA-IPREPAD ),
489     $                          ((2*BWL+2*BWU+1)+10), IPREPAD, IPOSTPAD,
490     $                          PADVAL )
491*
492               CALL PSFILLPAD( ICTXT, WORKSIZ, 1,
493     $                          MEM( IP_DRIVER_W-IPREPAD ), WORKSIZ,
494     $                          IPREPAD, IPOSTPAD, PADVAL )
495*
496*              Calculate norm of A for residual error-checking
497*
498               IF( CHECK ) THEN
499*
500                  ANORM = PSLANGE( '1', (2*BWL+2*BWU+1),
501     $                            N, MEM( IPA ), 1, 1,
502     $                            DESCA2D, MEM( IP_DRIVER_W ) )
503                  CALL PSCHEKPAD( ICTXT, 'PSLANGE', NP, NQ,
504     $                         MEM( IPA-IPREPAD ), ((2*BWL+2*BWU+1)+10),
505     $                         IPREPAD, IPOSTPAD, PADVAL )
506                  CALL PSCHEKPAD( ICTXT, 'PSLANGE',
507     $                            WORKSIZ, 1,
508     $                            MEM( IP_DRIVER_W-IPREPAD ), WORKSIZ,
509     $                            IPREPAD, IPOSTPAD, PADVAL )
510               END IF
511*
512*
513               CALL SLBOOT()
514               CALL BLACS_BARRIER( ICTXT, 'All' )
515*
516*              Perform factorization
517*
518               CALL SLTIMER( 1 )
519*
520               CALL PSGBTRF( N, BWL, BWU, MEM( IPA ), 1, DESCA, IPIV,
521     $                       MEM( IP_FILLIN ), FILLIN_SIZE, MEM( IPW ),
522     $                       IPW_SIZE, INFO )
523*
524               CALL SLTIMER( 1 )
525*
526               IF( INFO.NE.0 ) THEN
527                  IF( IAM.EQ.0 ) THEN
528                    WRITE( NOUT, FMT = * ) 'PSGBTRF INFO=', INFO
529                  ENDIF
530                  KFAIL = KFAIL + 1
531                  GO TO 30
532               END IF
533*
534               IF( CHECK ) THEN
535*
536*                 Check for memory overwrite in factorization
537*
538                  CALL PSCHEKPAD( ICTXT, 'PSGBTRF', NP,
539     $                    NQ, MEM( IPA-IPREPAD ), ((2*BWL+2*BWU+1)+10),
540     $                    IPREPAD, IPOSTPAD, PADVAL )
541               END IF
542*
543*
544*              Loop over the different values for NRHS
545*
546               DO 20 HH = 1, NNR
547*
548                  IERR( 1 ) = 0
549*
550                  NRHS = NRVAL( HH )
551*
552*                    Initialize Array Descriptor for rhs
553*
554                     CALL DESCINIT( DESCB2D, N, NRHS, NB, 1, 0, 0,
555     $                             ICTXTB, NB+10, IERR( 1 ) )
556*
557*                    Convert this to 1D descriptor
558*
559                     DESCB( 1 ) = 502
560                     DESCB( 3 ) = N
561                     DESCB( 4 ) = NB
562                     DESCB( 5 ) = 0
563                     DESCB( 2 ) = ICTXT
564                     DESCB( 6 ) = DESCB2D( LLD_ )
565                     DESCB( 7 ) = 0
566*
567*                    reset free_ptr to reuse space for right hand sides
568*
569                     IF( IPB .GT. 0 ) THEN
570                       FREE_PTR = IPB
571                     ENDIF
572*
573                     FREE_PTR = FREE_PTR + IPREPAD
574                     IPB = FREE_PTR
575                     FREE_PTR = FREE_PTR + NRHS*DESCB2D( LLD_ )
576     $                          + IPOSTPAD
577*
578*                    Allocate workspace for workspace in TRS routine:
579*
580                     IPW_SOLVE_SIZE = NRHS*(NB+2*BWL+4*BWU)
581*
582                     IPW_SOLVE = FREE_PTR
583                     FREE_PTR = FREE_PTR + IPW_SOLVE_SIZE
584*
585                     IERR( 1 ) = 0
586                     IF( FREE_PTR.GT.MEMSIZ ) THEN
587                        IF( IAM.EQ.0 )
588     $                     WRITE( NOUT, FMT = 9996 )'solve',
589     $                            ( FREE_PTR )*REALSZ
590                        IERR( 1 ) = 1
591                     END IF
592*
593*                    Check all processes for an error
594*
595                     CALL IGSUM2D( ICTXT, 'All', ' ', 1, 1,
596     $                             IERR, 1, -1, 0 )
597*
598                     IF( IERR( 1 ).GT.0 ) THEN
599                        IF( IAM.EQ.0 )
600     $                     WRITE( NOUT, FMT = 9997 ) 'MEMORY'
601                        KSKIP = KSKIP + 1
602                        GO TO 15
603                     END IF
604*
605                     MYRHS_SIZE = NUMROC( N, NB, MYCOL, 0, NPCOL )
606*
607*                    Generate RHS
608*
609                     CALL PSMATGEN(ICTXTB, 'No', 'No',
610     $                        DESCB2D( M_ ), DESCB2D( N_ ),
611     $                        DESCB2D( MB_ ), DESCB2D( NB_ ),
612     $                        MEM( IPB ),
613     $                        DESCB2D( LLD_ ), DESCB2D( RSRC_ ),
614     $                        DESCB2D( CSRC_ ),
615     $                        IBSEED, 0, MYRHS_SIZE, 0, NRHS, MYCOL,
616     $                        MYROW, NPCOL, NPROW )
617*
618                     IF( CHECK ) THEN
619                        CALL PSFILLPAD( ICTXTB, NB, NRHS,
620     $                                  MEM( IPB-IPREPAD ),
621     $                                  DESCB2D( LLD_ ),
622     $                                  IPREPAD, IPOSTPAD,
623     $                                  PADVAL )
624                        CALL PSFILLPAD( ICTXT, WORKSIZ, 1,
625     $                                  MEM( IP_DRIVER_W-IPREPAD ),
626     $                                  WORKSIZ, IPREPAD,
627     $                                  IPOSTPAD, PADVAL )
628                     END IF
629*
630*
631                     CALL BLACS_BARRIER( ICTXT, 'All')
632                     CALL SLTIMER( 2 )
633*
634*                    Solve linear system via factorization
635*
636                     CALL PSGBTRS( TRANS, N, BWL, BWU, NRHS, MEM( IPA ),
637     $                             1, DESCA, IPIV, MEM( IPB ), 1, DESCB,
638     $                             MEM( IP_FILLIN ), FILLIN_SIZE,
639     $                             MEM( IPW_SOLVE ), IPW_SOLVE_SIZE,
640     $                             INFO )
641*
642                     CALL SLTIMER( 2 )
643*
644                     IF( INFO.NE.0 ) THEN
645                       IF( IAM.EQ.0 )
646     $  WRITE( NOUT, FMT = * ) 'PSGBTRS INFO=', INFO
647                       KFAIL = KFAIL + 1
648                       PASSED = 'FAILED'
649                       GO TO 20
650                     END IF
651*
652                     IF( CHECK ) THEN
653*
654*                       check for memory overwrite
655*
656                        CALL PSCHEKPAD( ICTXT, 'PSGBTRS-work',
657     $                                  WORKSIZ, 1,
658     $                                  MEM( IP_DRIVER_W-IPREPAD ),
659     $                                  WORKSIZ, IPREPAD,
660     $                                  IPOSTPAD, PADVAL )
661*
662*                       check the solution to rhs
663*
664                        SRESID = ZERO
665*
666                        CALL PSDBLASCHK( 'N', 'N', TRANS,
667     $                       N, BWL, BWU, NRHS,
668     $                       MEM( IPB ), 1, 1, DESCB2D,
669     $                       IASEED, MEM( IPA+BWL+BWU ), 1, 1, DESCA2D,
670     $                       IBSEED, ANORM, SRESID,
671     $                       MEM( IP_DRIVER_W ), WORKSIZ )
672*
673                        IF( IAM.EQ.0 ) THEN
674                           IF( SRESID.GT.THRESH )
675     $                        WRITE( NOUT, FMT = 9985 ) SRESID
676                        END IF
677*
678*                       The second test is a NaN trap
679*
680                        IF( ( SRESID.LE.THRESH          ).AND.
681     $                      ( (SRESID-SRESID).EQ.0.0E+0 ) ) THEN
682                           KPASS = KPASS + 1
683                           PASSED = 'PASSED'
684                        ELSE
685                           KFAIL = KFAIL + 1
686                           PASSED = 'FAILED'
687                        END IF
688*
689                     END IF
690*
691   15                CONTINUE
692*                    Skipped tests jump to here to print out "SKIPPED"
693*
694*                    Gather maximum of all CPU and WALL clock timings
695*
696                     CALL SLCOMBINE( ICTXT, 'All', '>', 'W', 2, 1,
697     $                               WTIME )
698                     CALL SLCOMBINE( ICTXT, 'All', '>', 'C', 2, 1,
699     $                               CTIME )
700*
701*                    Print results
702*
703                     IF( MYROW.EQ.0 .AND. MYCOL.EQ.0 ) THEN
704*
705                        NOPS = 0
706                        NOPS2 = 0
707*
708                        N_FIRST = NB
709                        NPROCS_REAL = ( N-1 )/NB + 1
710                        N_LAST = MOD( N-1, NB ) + 1
711*
712*                       2 N bwl (bwl+bwu) + N (bwl) flops
713*                          for LU factorization
714*
715                        NOPS = 2*(DBLE(N)*DBLE(BWL)*
716     $                         DBLE((BWL+BWU))) +
717     $                         (DBLE(N)*DBLE(BWL))
718*
719*                       nrhs * 2 N*(bwl+(bwl+bwu)) flops for LU solve.
720*
721                        NOPS = NOPS +
722     $                  2 * (DBLE(N)*(DBLE(BWL)+DBLE((BWL+BWU)))
723     $                   *DBLE(NRHS))
724*
725*                       Second calc to represent actual hardware speed
726*
727*                     2*N_FIRST (bwl+bwu)*bwu Flops for LU
728*                       factorization in proc 1
729*
730                      NOPS2 = 2*( (DBLE(N_FIRST)*
731     $                  DBLE((BWL+BWU))*DBLE(BWU)))
732*
733                      IF ( NPROCS_REAL .GT. 1) THEN
734*                       8 N_LAST (bwl+bwu)*(bwl+bwu)
735*                         flops for LU and spike
736*                         calc in last processor
737*
738                        NOPS2 = NOPS2 +
739     $                          8*( (DBLE(N_LAST)*DBLE((BWL+BWU))
740     $                          *DBLE((BWL+BWU))) )
741                      ENDIF
742*
743                      IF ( NPROCS_REAL .GT. 2) THEN
744*                       8 NB (bwl+bwu)*(bwl+bwu)  flops for LU and spike
745*                         calc in other processors
746*
747                        NOPS2 = NOPS2 + (NPROCS_REAL-2)*
748     $                          8*( (DBLE(NB)*DBLE((BWL+BWU))
749     $                          *DBLE((BWL+BWU))) )
750                      ENDIF
751*
752*                     Reduced system
753*
754                      NOPS2 = NOPS2 +
755     $                  2*( NPROCS_REAL-1 ) *
756     $                  ( (BWL+BWU)*(BWL+BWU)*(BWL+BWU)/3 )
757                      IF( NPROCS_REAL .GT. 1 ) THEN
758                        NOPS2 = NOPS2 +
759     $                    2*( NPROCS_REAL-2 ) *
760     $                    (2*(BWL+BWU)*(BWL+BWU)*(BWL+BWU))
761                      ENDIF
762*
763*                     Solve stage
764*
765*                     nrhs*2 n_first*
766*                        (bwl+(bwl+bwu))
767*                        flops for L,U solve in proc 1.
768*
769                      NOPS2 = NOPS2 +
770     $                  2*
771     $                  DBLE(N_FIRST)*
772     $                  DBLE(NRHS) *
773     $                  ( DBLE(BWL)+DBLE((BWL+BWU)))
774*
775                      IF ( NPROCS_REAL .GT. 1 ) THEN
776*
777*                       2*nrhs*2 n_last
778*                        ((bwl+bwu)+(bwl+bwu))
779*                       flops for LU solve in other procs
780*
781                        NOPS2 = NOPS2 +
782     $                    4*
783     $                    (DBLE(N_LAST)*(DBLE((BWL+BWU))+
784     $                    DBLE((BWL+BWU))))*DBLE(NRHS)
785                      ENDIF
786*
787                      IF ( NPROCS_REAL .GT. 2 ) THEN
788*
789*                       2*nrhs*2 NB
790*                        ((bwl+bwu)+(bwl+bwu))
791*                        flops for LU solve in other procs
792*
793                        NOPS2 = NOPS2 +
794     $                    ( NPROCS_REAL-2)*2*
795     $                 ( (DBLE(NB)*(DBLE((BWL+BWU))+
796     $                 DBLE((BWL+BWU))))*DBLE(NRHS) )
797                      ENDIF
798*
799*                     Reduced system
800*
801                      NOPS2 = NOPS2 +
802     $                  NRHS*( NPROCS_REAL-1)*2*((BWL+BWU)*(BWL+BWU) )
803                      IF( NPROCS_REAL .GT. 1 ) THEN
804                        NOPS2 = NOPS2 +
805     $                   NRHS*( NPROCS_REAL-2 ) *
806     $                   ( 6 * (BWL+BWU)*(BWL+BWU) )
807                      ENDIF
808*
809*
810*                       Calculate total megaflops - factorization and/or
811*                       solve -- for WALL and CPU time, and print output
812*
813*                       Print WALL time if machine supports it
814*
815                        IF( WTIME( 1 ) + WTIME( 2 ) .GT. 0.0D+0 ) THEN
816                           TMFLOPS = NOPS /
817     $                            ( ( WTIME( 1 )+WTIME( 2 ) ) * 1.0D+6 )
818                        ELSE
819                           TMFLOPS = 0.0D+0
820                        END IF
821*
822                        IF( WTIME( 1 )+WTIME( 2 ).GT.0.0D+0 ) THEN
823                           TMFLOPS2 = NOPS2 /
824     $                            ( ( WTIME( 1 )+WTIME( 2 ) ) * 1.0D+6 )
825                        ELSE
826                           TMFLOPS2 = 0.0D+0
827                        END IF
828*
829                        IF( WTIME( 2 ).GE.0.0D+0 )
830     $                     WRITE( NOUT, FMT = 9993 ) 'WALL', TRANS,
831     $                            N,
832     $                            BWL, BWU,
833     $                            NB, NRHS, NPROW, NPCOL,
834     $                            WTIME( 1 ), WTIME( 2 ), TMFLOPS,
835     $                            TMFLOPS2, PASSED
836*
837*                       Print CPU time if machine supports it
838*
839                        IF( CTIME( 1 )+CTIME( 2 ).GT.0.0D+0 ) THEN
840                           TMFLOPS = NOPS /
841     $                            ( ( CTIME( 1 )+CTIME( 2 ) ) * 1.0D+6 )
842                        ELSE
843                           TMFLOPS = 0.0D+0
844                        END IF
845*
846                        IF( CTIME( 1 )+CTIME( 2 ).GT.0.0D+0 ) THEN
847                           TMFLOPS2 = NOPS2 /
848     $                            ( ( CTIME( 1 )+CTIME( 2 ) ) * 1.0D+6 )
849                        ELSE
850                           TMFLOPS2 = 0.0D+0
851                        END IF
852*
853                        IF( CTIME( 2 ).GE.0.0D+0 )
854     $                     WRITE( NOUT, FMT = 9993 ) 'CPU ', TRANS,
855     $                            N,
856     $                            BWL, BWU,
857     $                            NB, NRHS, NPROW, NPCOL,
858     $                            CTIME( 1 ), CTIME( 2 ), TMFLOPS,
859     $                            TMFLOPS2, PASSED
860*
861                     END IF
862   20          CONTINUE
863*
864*
865   30       CONTINUE
866*           NNB loop
867*
868   45      CONTINUE
869*          BW[] loop
870*
871   40   CONTINUE
872*       NMAT loop
873*
874        CALL BLACS_GRIDEXIT( ICTXT )
875        CALL BLACS_GRIDEXIT( ICTXTB )
876*
877   50   CONTINUE
878*       NGRIDS DROPOUT
879   60 CONTINUE
880*     NGRIDS loop
881*
882*     Print ending messages and close output file
883*
884      IF( IAM.EQ.0 ) THEN
885         KTESTS = KPASS + KFAIL + KSKIP
886         WRITE( NOUT, FMT = * )
887         WRITE( NOUT, FMT = 9992 ) KTESTS
888         IF( CHECK ) THEN
889            WRITE( NOUT, FMT = 9991 ) KPASS
890            WRITE( NOUT, FMT = 9989 ) KFAIL
891         ELSE
892            WRITE( NOUT, FMT = 9990 ) KPASS
893         END IF
894         WRITE( NOUT, FMT = 9988 ) KSKIP
895         WRITE( NOUT, FMT = * )
896         WRITE( NOUT, FMT = * )
897         WRITE( NOUT, FMT = 9987 )
898         IF( NOUT.NE.6 .AND. NOUT.NE.0 )
899     $      CLOSE ( NOUT )
900      END IF
901*
902      CALL BLACS_EXIT( 0 )
903*
904 9999 FORMAT( 'ILLEGAL ', A6, ': ', A5, ' = ', I3,
905     $        '; It should be at least 1' )
906 9998 FORMAT( 'ILLEGAL GRID: nprow*npcol = ', I4, '. It can be at most',
907     $        I4 )
908 9997 FORMAT( 'Bad ', A6, ' parameters: going on to next test case.' )
909 9996 FORMAT( 'Unable to perform ', A, ': need TOTMEM of at least',
910     $        I11 )
911 9995 FORMAT( 'TIME TR      N  BWL BWU    NB  NRHS    P    Q L*U Time ',
912     $        'Slv Time   MFLOPS   MFLOP2  CHECK' )
913 9994 FORMAT( '---- -- ------  --- ---  ---- ----- ---- ---- -------- ',
914     $        '-------- -------- -------- ------' )
915 9993 FORMAT( A4,1X,A1,2X,I6,1X,I3,1X,I3,1X,I4,1X,I5,
916     $                                          1X,I4,1X,I4,1X,F9.3,
917     $        F9.4,        F9.2,    F9.2, 1X, A6 )
918 9992 FORMAT( 'Finished ', I6, ' tests, with the following results:' )
919 9991 FORMAT( I5, ' tests completed and passed residual checks.' )
920 9990 FORMAT( I5, ' tests completed without checking.' )
921 9989 FORMAT( I5, ' tests completed and failed residual checks.' )
922 9988 FORMAT( I5, ' tests skipped because of illegal input values.' )
923 9987 FORMAT( 'END OF TESTS.' )
924 9986 FORMAT( '||A - ', A4, '|| / (||A|| * N * eps) = ', G25.7 )
925 9985 FORMAT( '||Ax-b||/(||x||*||A||*eps*N) ', F25.7 )
926*
927      STOP
928*
929*     End of PSGBTRS_DRIVER
930*
931      END
932*
933