1      PROGRAM PZDBDRIVER
2*
3*
4*  -- ScaLAPACK routine (version 1.7) --
5*     University of Tennessee, Knoxville, Oak Ridge National Laboratory,
6*     and University of California, Berkeley.
7*     November 15, 1997
8*
9*  Purpose
10*  =======
11*
12*  PZDBDRIVER is a test program for the
13*  ScaLAPACK Band Cholesky routines corresponding to the options
14*  indicated by ZDB.  This test driver performs an
15*  A = L*U factorization
16*  and solves a linear system with the factors for 1 or more RHS.
17*
18*  The program must be driven by a short data file.
19*  Here's an example file:
20*'ScaLAPACK, Version 1.2, banded linear systems input file'
21*'PVM.'
22*''                              output file name (if any)
23*6                               device out
24*'L'                             define Lower or Upper
25*9                               number of problem sizes
26*1 5 17 28 37 121 200 1023 2048 3073     values of N
27*6                               number of bandwidths
28*1 2 4 10 31 64                values of BW
29*1                               number of NB's
30*-1 3 4 5                        values of NB (-1 for automatic choice)
31*1                               number of NRHS's (must be 1)
32*8                               values of NRHS
33*1                               number of NBRHS's (ignored)
34*1                               values of NBRHS (ignored)
35*6                               number of process grids
36*1 2 3 4 5 7 8 15 26 47 64       values of "Number of Process Columns"
37*3.0                             threshold
38*
39*  Internal Parameters
40*  ===================
41*
42*  TOTMEM   INTEGER, default = 6200000.
43*           TOTMEM is a machine-specific parameter indicating the
44*           maximum amount of available memory in bytes.
45*           The user should customize TOTMEM to his platform.  Remember
46*           to leave room in memory for the operating system, the BLACS
47*           buffer, etc.  For example, on a system with 8 MB of memory
48*           per process (e.g., one processor on an Intel iPSC/860), the
49*           parameters we use are TOTMEM=6200000 (leaving 1.8 MB for OS,
50*           code, BLACS buffer, etc).  However, for PVM, we usually set
51*           TOTMEM = 2000000.  Some experimenting with the maximum value
52*           of TOTMEM may be required.
53*
54*  INTGSZ   INTEGER, default = 4 bytes.
55*  ZPLXSZ   INTEGER, default = 16 bytes.
56*           INTGSZ and ZPLXSZ indicate the length in bytes on the
57*           given platform for an integer and a double precision
58*           complex.
59*  MEM      COMPLEX*16 array, dimension ( TOTMEM/ZPLXSZ )
60*           All arrays used by ScaLAPACK routines are allocated from
61*           this array and referenced by pointers.  The integer IPB,
62*           for example, is a pointer to the starting element of MEM for
63*           the solution vector(s) B.
64*
65*  =====================================================================
66*
67*  Code Developer: Andrew J. Cleary, University of Tennessee.
68*    Current address: Lawrence Livermore National Labs.
69*  This version released: August, 2001.
70*
71*  =====================================================================
72*
73*     .. Parameters ..
74      INTEGER            TOTMEM
75      PARAMETER          ( TOTMEM = 3000000 )
76      INTEGER            BLOCK_CYCLIC_2D, CSRC_, CTXT_, DLEN_, DTYPE_,
77     $                   LLD_, MB_, M_, NB_, N_, RSRC_
78      PARAMETER          ( BLOCK_CYCLIC_2D = 1, DLEN_ = 9, DTYPE_ = 1,
79     $                     CTXT_ = 2, M_ = 3, N_ = 4, MB_ = 5, NB_ = 6,
80     $                     RSRC_ = 7, CSRC_ = 8, LLD_ = 9 )
81*
82      DOUBLE PRECISION   ZERO
83      INTEGER            MEMSIZ, NTESTS, ZPLXSZ
84      COMPLEX*16         PADVAL
85      PARAMETER          ( ZPLXSZ = 16,
86     $                     MEMSIZ = TOTMEM / ZPLXSZ, NTESTS = 20,
87     $                     PADVAL = ( -9923.0D+0, -9923.0D+0 ),
88     $                     ZERO = 0.0D+0 )
89      INTEGER            INT_ONE
90      PARAMETER          ( INT_ONE = 1 )
91*     ..
92*     .. Local Scalars ..
93      LOGICAL            CHECK
94      CHARACTER          TRANS
95      CHARACTER*6        PASSED
96      CHARACTER*80       OUTFILE
97      INTEGER            BWL, BWU, BW_NUM, FILLIN_SIZE, FREE_PTR, H, HH,
98     $                   I, IAM, IASEED, IBSEED, ICTXT, ICTXTB,
99     $                   IERR_TEMP, IMIDPAD, INFO, IPA, IPB, IPOSTPAD,
100     $                   IPREPAD, IPW, IPW_SIZE, IPW_SOLVE,
101     $                   IPW_SOLVE_SIZE, IP_DRIVER_W, IP_FILLIN, J, K,
102     $                   KFAIL, KPASS, KSKIP, KTESTS, MYCOL, MYRHS_SIZE,
103     $                   MYROW, N, NB, NBW, NGRIDS, NMAT, NNB, NNBR,
104     $                   NNR, NOUT, NP, NPCOL, NPROCS, NPROCS_REAL,
105     $                   NPROW, NQ, NRHS, N_FIRST, N_LAST, WORKSIZ
106      REAL               THRESH
107            DOUBLE PRECISION    ANORM, NOPS, NOPS2, SRESID, TMFLOPS,
108     $                          TMFLOPS2
109*     ..
110*     .. Local Arrays ..
111      INTEGER            BWLVAL( NTESTS ), BWUVAL( NTESTS ), DESCA( 7 ),
112     $                   DESCA2D( DLEN_ ), DESCB( 7 ), DESCB2D( DLEN_ ),
113     $                   IERR( 1 ), NBRVAL( NTESTS ), NBVAL( NTESTS ),
114     $                   NRVAL( NTESTS ), NVAL( NTESTS ),
115     $                   PVAL( NTESTS ), QVAL( NTESTS )
116      DOUBLE PRECISION   CTIME( 2 ), WTIME( 2 )
117      COMPLEX*16         MEM( MEMSIZ )
118*     ..
119*     .. External Subroutines ..
120      EXTERNAL           BLACS_BARRIER, BLACS_EXIT, BLACS_GET,
121     $                   BLACS_GRIDEXIT, BLACS_GRIDINFO, BLACS_GRIDINIT,
122     $                   BLACS_PINFO, DESCINIT, IGSUM2D, PZBMATGEN,
123     $                   PZCHEKPAD, PZDBINFO, PZDBLASCHK, PZDBTRF,
124     $                   PZDBTRS, PZFILLPAD, PZMATGEN, SLBOOT,
125     $                   SLCOMBINE, SLTIMER
126*     ..
127*     .. External Functions ..
128      INTEGER            NUMROC
129      LOGICAL            LSAME
130      DOUBLE PRECISION   PZLANGE
131      EXTERNAL           LSAME, NUMROC, PZLANGE
132*     ..
133*     .. Intrinsic Functions ..
134      INTRINSIC          DBLE, MAX, MIN, MOD
135*     ..
136*     .. Data Statements ..
137      DATA               KFAIL, KPASS, KSKIP, KTESTS / 4*0 /
138*     ..
139*
140*
141*
142*     .. Executable Statements ..
143*
144*     Get starting information
145*
146      CALL BLACS_PINFO( IAM, NPROCS )
147      IASEED = 100
148      IBSEED = 200
149*
150      CALL PZDBINFO( OUTFILE, NOUT, TRANS, NMAT, NVAL, NTESTS, NBW,
151     $               BWLVAL, BWUVAL, NTESTS, NNB, NBVAL, NTESTS, NNR,
152     $               NRVAL, NTESTS, NNBR, NBRVAL, NTESTS, NGRIDS, PVAL,
153     $               NTESTS, QVAL, NTESTS, THRESH, MEM, IAM, NPROCS )
154*
155      CHECK = ( THRESH.GE.0.0D+0 )
156*
157*     Print headings
158*
159      IF( IAM.EQ.0 ) THEN
160         WRITE( NOUT, FMT = * )
161         WRITE( NOUT, FMT = 9995 )
162         WRITE( NOUT, FMT = 9994 )
163         WRITE( NOUT, FMT = * )
164      END IF
165*
166*     Loop over different process grids
167*
168      DO 60 I = 1, NGRIDS
169*
170         NPROW = PVAL( I )
171         NPCOL = QVAL( I )
172*
173*        Make sure grid information is correct
174*
175         IERR( 1 ) = 0
176         IF( NPROW.LT.1 ) THEN
177            IF( IAM.EQ.0 )
178     $         WRITE( NOUT, FMT = 9999 ) 'GRID', 'nprow', NPROW
179            IERR( 1 ) = 1
180         ELSE IF( NPCOL.LT.1 ) THEN
181            IF( IAM.EQ.0 )
182     $         WRITE( NOUT, FMT = 9999 ) 'GRID', 'npcol', NPCOL
183            IERR( 1 ) = 1
184         ELSE IF( NPROW*NPCOL.GT.NPROCS ) THEN
185            IF( IAM.EQ.0 )
186     $         WRITE( NOUT, FMT = 9998 ) NPROW*NPCOL, NPROCS
187            IERR( 1 ) = 1
188         END IF
189*
190         IF( IERR( 1 ).GT.0 ) THEN
191            IF( IAM.EQ.0 )
192     $         WRITE( NOUT, FMT = 9997 ) 'grid'
193            KSKIP = KSKIP + 1
194            GO TO 50
195         END IF
196*
197*        Define process grid
198*
199         CALL BLACS_GET( -1, 0, ICTXT )
200         CALL BLACS_GRIDINIT( ICTXT, 'Row-major', NPROW, NPCOL )
201*
202*
203*        Define transpose process grid
204*
205         CALL BLACS_GET( -1, 0, ICTXTB )
206         CALL BLACS_GRIDINIT( ICTXTB, 'Column-major', NPCOL, NPROW )
207*
208*        Go to bottom of process grid loop if this case doesn't use my
209*        process
210*
211         CALL BLACS_GRIDINFO( ICTXT, NPROW, NPCOL, MYROW, MYCOL )
212*
213         IF( MYROW.LT.0 .OR. MYCOL.LT.0 ) THEN
214            GO TO 50
215         ENDIF
216*
217         DO 40 J = 1, NMAT
218*
219           IERR( 1 ) = 0
220*
221           N = NVAL( J )
222*
223*          Make sure matrix information is correct
224*
225           IF( N.LT.1 ) THEN
226               IF( IAM.EQ.0 )
227     $            WRITE( NOUT, FMT = 9999 ) 'MATRIX', 'N', N
228               IERR( 1 ) = 1
229           END IF
230*
231*          Check all processes for an error
232*
233           CALL IGSUM2D( ICTXT, 'All', ' ', 1, 1, IERR, 1,
234     $                    -1, 0 )
235*
236           IF( IERR( 1 ).GT.0 ) THEN
237               IF( IAM.EQ.0 )
238     $            WRITE( NOUT, FMT = 9997 ) 'size'
239               KSKIP = KSKIP + 1
240               GO TO 40
241           END IF
242*
243*
244           DO 45 BW_NUM = 1, NBW
245*
246             IERR( 1 ) = 0
247*
248             BWL = BWLVAL( BW_NUM )
249             IF( BWL.LT.1 ) THEN
250               IF( IAM.EQ.0 )
251     $            WRITE( NOUT, FMT = 9999 ) 'Lower Band', 'bwl', BWL
252               IERR( 1 ) = 1
253             END IF
254*
255             BWU = BWUVAL( BW_NUM )
256             IF( BWU.LT.1 ) THEN
257               IF( IAM.EQ.0 )
258     $            WRITE( NOUT, FMT = 9999 ) 'Upper Band', 'bwu', BWU
259               IERR( 1 ) = 1
260             END IF
261*
262             IF( BWL.GT.N-1 ) THEN
263               IF( IAM.EQ.0 ) THEN
264                 IERR( 1 ) = 1
265               ENDIF
266             END IF
267*
268             IF( BWU.GT.N-1 ) THEN
269               IF( IAM.EQ.0 ) THEN
270                 IERR( 1 ) = 1
271               ENDIF
272             END IF
273*
274*            Check all processes for an error
275*
276             CALL IGSUM2D( ICTXT, 'All', ' ', 1, 1, IERR, 1,
277     $                    -1, 0 )
278*
279             IF( IERR( 1 ).GT.0 ) THEN
280               KSKIP = KSKIP + 1
281               GO TO 45
282             END IF
283*
284             DO 30 K = 1, NNB
285*
286               IERR( 1 ) = 0
287*
288               NB = NBVAL( K )
289               IF( NB.LT.0 ) THEN
290                  NB =( (N-(NPCOL-1)*MAX(BWL,BWU)-1)/NPCOL + 1 )
291     $               + MAX(BWL,BWU)
292                  NB = MAX( NB, 2*MAX(BWL,BWU) )
293                  NB = MIN( N, NB )
294               END IF
295*
296*              Make sure NB is legal
297*
298               IERR( 1 ) = 0
299               IF( NB.LT.MIN( 2*MAX(BWL,BWU), N ) ) THEN
300                  IERR( 1 ) = 1
301               END IF
302*
303*              Check all processes for an error
304*
305               CALL IGSUM2D( ICTXT, 'All', ' ', 1, 1, IERR, 1,
306     $                       -1, 0 )
307*
308               IF( IERR( 1 ).GT.0 ) THEN
309                  KSKIP = KSKIP + 1
310                  GO TO 30
311               END IF
312*
313*              Padding constants
314*
315               NP = NUMROC( (BWL+BWU+1), (BWL+BWU+1),
316     $                      MYROW, 0, NPROW )
317               NQ = NUMROC( N, NB, MYCOL, 0, NPCOL )
318*
319               IF( CHECK ) THEN
320                  IPREPAD  = ((BWL+BWU+1)+10)
321                  IMIDPAD  = 10
322                  IPOSTPAD = ((BWL+BWU+1)+10)
323               ELSE
324                  IPREPAD  = 0
325                  IMIDPAD  = 0
326                  IPOSTPAD = 0
327               END IF
328*
329*              Initialize the array descriptor for the matrix A
330*
331               CALL DESCINIT( DESCA2D, (BWL+BWU+1), N,
332     $                       (BWL+BWU+1), NB, 0, 0,
333     $                       ICTXT,((BWL+BWU+1)+10), IERR( 1 ) )
334*
335*              Convert this to 1D descriptor
336*
337               DESCA( 1 ) = 501
338               DESCA( 3 ) = N
339               DESCA( 4 ) = NB
340               DESCA( 5 ) = 0
341               DESCA( 2 ) = ICTXT
342               DESCA( 6 ) = ((BWL+BWU+1)+10)
343               DESCA( 7 ) = 0
344*
345               IERR_TEMP = IERR( 1 )
346               IERR( 1 ) = 0
347               IERR( 1 ) = MIN( IERR( 1 ), IERR_TEMP )
348*
349*              Check all processes for an error
350*
351               CALL IGSUM2D( ICTXT, 'All', ' ', 1, 1, IERR, 1, -1, 0 )
352*
353               IF( IERR( 1 ).LT.0 ) THEN
354                  IF( IAM.EQ.0 )
355     $               WRITE( NOUT, FMT = 9997 ) 'descriptor'
356                  KSKIP = KSKIP + 1
357                  GO TO 30
358               END IF
359*
360*              Assign pointers into MEM for SCALAPACK arrays, A is
361*              allocated starting at position MEM( IPREPAD+1 )
362*
363               FREE_PTR = 1
364               IPB = 0
365*
366*              Save room for prepadding
367               FREE_PTR = FREE_PTR + IPREPAD
368*
369               IPA = FREE_PTR
370               FREE_PTR = FREE_PTR + DESCA2D( LLD_ )*
371     $                     DESCA2D( NB_ )
372     $                     + IPOSTPAD
373*
374*              Add memory for fillin
375*              Fillin space needs to store:
376*                Fillin spike:
377*                Contribution to previous proc's diagonal block of
378*                  reduced system:
379*                Off-diagonal block of reduced system:
380*                Diagonal block of reduced system:
381*
382               FILLIN_SIZE =
383     $            NB*(BWL+BWU)+6*MAX(BWL,BWU)*MAX(BWL,BWU)
384*
385*              Claim memory for fillin
386*
387               FREE_PTR = FREE_PTR + IPREPAD
388               IP_FILLIN = FREE_PTR
389               FREE_PTR = FREE_PTR + FILLIN_SIZE
390*
391*              Workspace needed by computational routines:
392*
393               IPW_SIZE = 0
394*
395*              factorization:
396*
397               IPW_SIZE = MAX(BWL,BWU)*MAX(BWL,BWU)
398*
399*              Claim memory for IPW
400*
401               IPW = FREE_PTR
402               FREE_PTR = FREE_PTR + IPW_SIZE
403*
404*              Check for adequate memory for problem size
405*
406               IERR( 1 ) = 0
407               IF( FREE_PTR.GT.MEMSIZ ) THEN
408                  IF( IAM.EQ.0 )
409     $               WRITE( NOUT, FMT = 9996 )
410     $               'divide and conquer factorization',
411     $               (FREE_PTR )*ZPLXSZ
412                  IERR( 1 ) = 1
413               END IF
414*
415*              Check all processes for an error
416*
417               CALL IGSUM2D( ICTXT, 'All', ' ', 1, 1, IERR,
418     $                       1, -1, 0 )
419*
420               IF( IERR( 1 ).GT.0 ) THEN
421                  IF( IAM.EQ.0 )
422     $               WRITE( NOUT, FMT = 9997 ) 'MEMORY'
423                  KSKIP = KSKIP + 1
424                  GO TO 30
425               END IF
426*
427*              Worksize needed for LAPRNT
428               WORKSIZ = MAX( ((BWL+BWU+1)+10), NB )
429*
430               IF( CHECK ) THEN
431*
432*                 Calculate the amount of workspace required by
433*                 the checking routines.
434*
435*                 PZLANGE
436                  WORKSIZ = MAX( WORKSIZ, DESCA2D( NB_ ) )
437*
438*                 PZDBLASCHK
439                  WORKSIZ = MAX( WORKSIZ,
440     $          MAX(5,MAX(MAX(BWL,BWU)*(MAX(BWL,BWU)+2),NB))+2*NB )
441               END IF
442*
443               FREE_PTR = FREE_PTR + IPREPAD
444               IP_DRIVER_W = FREE_PTR
445               FREE_PTR = FREE_PTR + WORKSIZ + IPOSTPAD
446*
447*
448*              Check for adequate memory for problem size
449*
450               IERR( 1 ) = 0
451               IF( FREE_PTR.GT.MEMSIZ ) THEN
452                  IF( IAM.EQ.0 )
453     $               WRITE( NOUT, FMT = 9996 ) 'factorization',
454     $               ( FREE_PTR )*ZPLXSZ
455                  IERR( 1 ) = 1
456               END IF
457*
458*              Check all processes for an error
459*
460               CALL IGSUM2D( ICTXT, 'All', ' ', 1, 1, IERR,
461     $                       1, -1, 0 )
462*
463               IF( IERR( 1 ).GT.0 ) THEN
464                  IF( IAM.EQ.0 )
465     $               WRITE( NOUT, FMT = 9997 ) 'MEMORY'
466                  KSKIP = KSKIP + 1
467                  GO TO 30
468               END IF
469*
470               CALL PZBMATGEN( ICTXT, 'G', 'D', BWL, BWU, N,
471     $                         (BWL+BWU+1), NB, MEM( IPA ),
472     $                         ((BWL+BWU+1)+10), 0, 0, IASEED, MYROW,
473     $                         MYCOL, NPROW, NPCOL )
474*
475               CALL PZFILLPAD( ICTXT, NP, NQ, MEM( IPA-IPREPAD ),
476     $                          ((BWL+BWU+1)+10), IPREPAD, IPOSTPAD,
477     $                          PADVAL )
478*
479               CALL PZFILLPAD( ICTXT, WORKSIZ, 1,
480     $                          MEM( IP_DRIVER_W-IPREPAD ), WORKSIZ,
481     $                          IPREPAD, IPOSTPAD, PADVAL )
482*
483*              Calculate norm of A for residual error-checking
484*
485               IF( CHECK ) THEN
486*
487                  ANORM = PZLANGE( '1', (BWL+BWU+1),
488     $                            N, MEM( IPA ), 1, 1,
489     $                            DESCA2D, MEM( IP_DRIVER_W ) )
490                  CALL PZCHEKPAD( ICTXT, 'PZLANGE', NP, NQ,
491     $                         MEM( IPA-IPREPAD ), ((BWL+BWU+1)+10),
492     $                         IPREPAD, IPOSTPAD, PADVAL )
493                  CALL PZCHEKPAD( ICTXT, 'PZLANGE',
494     $                            WORKSIZ, 1,
495     $                            MEM( IP_DRIVER_W-IPREPAD ), WORKSIZ,
496     $                            IPREPAD, IPOSTPAD, PADVAL )
497               END IF
498*
499*
500               CALL SLBOOT()
501               CALL BLACS_BARRIER( ICTXT, 'All' )
502*
503*              Perform factorization
504*
505               CALL SLTIMER( 1 )
506*
507               CALL PZDBTRF( N, BWL, BWU, MEM( IPA ), 1, DESCA,
508     $                       MEM( IP_FILLIN ), FILLIN_SIZE, MEM( IPW ),
509     $                       IPW_SIZE, INFO )
510*
511               CALL SLTIMER( 1 )
512*
513               IF( INFO.NE.0 ) THEN
514                  IF( IAM.EQ.0 ) THEN
515                    WRITE( NOUT, FMT = * ) 'PZDBTRF INFO=', INFO
516                  ENDIF
517                  KFAIL = KFAIL + 1
518                  GO TO 30
519               END IF
520*
521               IF( CHECK ) THEN
522*
523*                 Check for memory overwrite in factorization
524*
525                  CALL PZCHEKPAD( ICTXT, 'PZDBTRF', NP,
526     $                    NQ, MEM( IPA-IPREPAD ), ((BWL+BWU+1)+10),
527     $                    IPREPAD, IPOSTPAD, PADVAL )
528               END IF
529*
530*
531*              Loop over the different values for NRHS
532*
533               DO 20 HH = 1, NNR
534*
535                  IERR( 1 ) = 0
536*
537                  NRHS = NRVAL( HH )
538*
539*                    Initialize Array Descriptor for rhs
540*
541                     CALL DESCINIT( DESCB2D, N, NRHS, NB, 1, 0, 0,
542     $                             ICTXTB, NB+10, IERR( 1 ) )
543*
544*                    Convert this to 1D descriptor
545*
546                     DESCB( 1 ) = 502
547                     DESCB( 3 ) = N
548                     DESCB( 4 ) = NB
549                     DESCB( 5 ) = 0
550                     DESCB( 2 ) = ICTXT
551                     DESCB( 6 ) = DESCB2D( LLD_ )
552                     DESCB( 7 ) = 0
553*
554*                    reset free_ptr to reuse space for right hand sides
555*
556                     IF( IPB .GT. 0 ) THEN
557                       FREE_PTR = IPB
558                     ENDIF
559*
560                     FREE_PTR = FREE_PTR + IPREPAD
561                     IPB = FREE_PTR
562                     FREE_PTR = FREE_PTR + NRHS*DESCB2D( LLD_ )
563     $                          + IPOSTPAD
564*
565*                    Allocate workspace for workspace in TRS routine:
566*
567                     IPW_SOLVE_SIZE = (MAX(BWL,BWU)*NRHS)
568*
569                     IPW_SOLVE = FREE_PTR
570                     FREE_PTR = FREE_PTR + IPW_SOLVE_SIZE
571*
572                     IERR( 1 ) = 0
573                     IF( FREE_PTR.GT.MEMSIZ ) THEN
574                        IF( IAM.EQ.0 )
575     $                     WRITE( NOUT, FMT = 9996 )'solve',
576     $                            ( FREE_PTR )*ZPLXSZ
577                        IERR( 1 ) = 1
578                     END IF
579*
580*                    Check all processes for an error
581*
582                     CALL IGSUM2D( ICTXT, 'All', ' ', 1, 1,
583     $                             IERR, 1, -1, 0 )
584*
585                     IF( IERR( 1 ).GT.0 ) THEN
586                        IF( IAM.EQ.0 )
587     $                     WRITE( NOUT, FMT = 9997 ) 'MEMORY'
588                        KSKIP = KSKIP + 1
589                        GO TO 15
590                     END IF
591*
592                     MYRHS_SIZE = NUMROC( N, NB, MYCOL, 0, NPCOL )
593*
594*                    Generate RHS
595*
596                     CALL PZMATGEN(ICTXTB, 'No', 'No',
597     $                        DESCB2D( M_ ), DESCB2D( N_ ),
598     $                        DESCB2D( MB_ ), DESCB2D( NB_ ),
599     $                        MEM( IPB ),
600     $                        DESCB2D( LLD_ ), DESCB2D( RSRC_ ),
601     $                        DESCB2D( CSRC_ ),
602     $                        IBSEED, 0, MYRHS_SIZE, 0, NRHS, MYCOL,
603     $                        MYROW, NPCOL, NPROW )
604*
605                     IF( CHECK ) THEN
606                        CALL PZFILLPAD( ICTXTB, NB, NRHS,
607     $                                  MEM( IPB-IPREPAD ),
608     $                                  DESCB2D( LLD_ ),
609     $                                  IPREPAD, IPOSTPAD,
610     $                                  PADVAL )
611                        CALL PZFILLPAD( ICTXT, WORKSIZ, 1,
612     $                                  MEM( IP_DRIVER_W-IPREPAD ),
613     $                                  WORKSIZ, IPREPAD,
614     $                                  IPOSTPAD, PADVAL )
615                     END IF
616*
617*
618                     CALL BLACS_BARRIER( ICTXT, 'All')
619                     CALL SLTIMER( 2 )
620*
621*                    Solve linear system via factorization
622*
623                     CALL PZDBTRS( TRANS, N, BWL, BWU, NRHS, MEM( IPA ),
624     $                             1, DESCA, MEM( IPB ), 1, DESCB,
625     $                             MEM( IP_FILLIN ), FILLIN_SIZE,
626     $                             MEM( IPW_SOLVE ), IPW_SOLVE_SIZE,
627     $                             INFO )
628*
629                     CALL SLTIMER( 2 )
630*
631                     IF( INFO.NE.0 ) THEN
632                       IF( IAM.EQ.0 )
633     $  WRITE( NOUT, FMT = * ) 'PZDBTRS INFO=', INFO
634                       KFAIL = KFAIL + 1
635                       PASSED = 'FAILED'
636                       GO TO 20
637                     END IF
638*
639                     IF( CHECK ) THEN
640*
641*                       check for memory overwrite
642*
643                        CALL PZCHEKPAD( ICTXT, 'PZDBTRS-work',
644     $                                  WORKSIZ, 1,
645     $                                  MEM( IP_DRIVER_W-IPREPAD ),
646     $                                  WORKSIZ, IPREPAD,
647     $                                  IPOSTPAD, PADVAL )
648*
649*                       check the solution to rhs
650*
651                        SRESID = ZERO
652*
653                        CALL PZDBLASCHK( 'N', 'D', TRANS,
654     $                       N, BWL, BWU, NRHS,
655     $                       MEM( IPB ), 1, 1, DESCB2D,
656     $                       IASEED, MEM( IPA ), 1, 1, DESCA2D,
657     $                       IBSEED, ANORM, SRESID,
658     $                       MEM( IP_DRIVER_W ), WORKSIZ )
659*
660                        IF( IAM.EQ.0 ) THEN
661                           IF( SRESID.GT.THRESH )
662     $                        WRITE( NOUT, FMT = 9985 ) SRESID
663                        END IF
664*
665*                       The second test is a NaN trap
666*
667                        IF( ( SRESID.LE.THRESH          ).AND.
668     $                      ( (SRESID-SRESID).EQ.0.0D+0 ) ) THEN
669                           KPASS = KPASS + 1
670                           PASSED = 'PASSED'
671                        ELSE
672                           KFAIL = KFAIL + 1
673                           PASSED = 'FAILED'
674                        END IF
675*
676                     END IF
677*
678   15                CONTINUE
679*                    Skipped tests jump to here to print out "SKIPPED"
680*
681*                    Gather maximum of all CPU and WALL clock timings
682*
683                     CALL SLCOMBINE( ICTXT, 'All', '>', 'W', 2, 1,
684     $                               WTIME )
685                     CALL SLCOMBINE( ICTXT, 'All', '>', 'C', 2, 1,
686     $                               CTIME )
687*
688*                    Print results
689*
690                     IF( MYROW.EQ.0 .AND. MYCOL.EQ.0 ) THEN
691*
692                        NOPS = 0
693                        NOPS2 = 0
694*
695                        N_FIRST = NB
696                        NPROCS_REAL = ( N-1 )/NB + 1
697                        N_LAST = MOD( N-1, NB ) + 1
698*
699*                       2 N bwl bwu + N (bwl) flops
700*                          for LU factorization
701*
702                        NOPS = 2*(DBLE(N)*DBLE(BWL)*
703     $                         DBLE(BWU)) +
704     $                         (DBLE(N)*DBLE(BWL))
705*
706*                       nrhs * 2 N*(bwl+bwu) flops for LU solve.
707*
708                        NOPS = NOPS +
709     $                  2 * (DBLE(N)*(DBLE(BWL)+DBLE(BWU))
710     $                   *DBLE(NRHS))
711*
712*                       Multiply by 4 to get complex count
713*
714                        NOPS = NOPS * DBLE(4)
715*
716*                       Second calc to represent actual hardware speed
717*
718*                     2*N_FIRST bwl*bwu Flops for LU
719*                       factorization in proc 1
720*
721                      NOPS2 = 2*( (DBLE(N_FIRST)*
722     $                  DBLE(BWL)*DBLE(BWU)))
723*
724                      IF ( NPROCS_REAL .GT. 1) THEN
725*                       8 N_LAST bwl*bwu
726*                         flops for LU and spike
727*                         calc in last processor
728*
729                        NOPS2 = NOPS2 +
730     $                          8*( (DBLE(N_LAST)*DBLE(BWL)
731     $                          *DBLE(BWU)) )
732                      ENDIF
733*
734                      IF ( NPROCS_REAL .GT. 2) THEN
735*                       8 NB bwl*bwu  flops for LU and spike
736*                         calc in other processors
737*
738                        NOPS2 = NOPS2 + (NPROCS_REAL-2)*
739     $                          8*( (DBLE(NB)*DBLE(BWL)
740     $                          *DBLE(BWU)) )
741                      ENDIF
742*
743*                     Reduced system
744*
745                      NOPS2 = NOPS2 +
746     $                  2*( NPROCS_REAL-1 ) *
747     $                  ( BWL*BWU*BWL/3 )
748                      IF( NPROCS_REAL .GT. 1 ) THEN
749                        NOPS2 = NOPS2 +
750     $                    2*( NPROCS_REAL-2 ) *
751     $                    (2*BWL*BWU*BWL)
752                      ENDIF
753*
754*                     Solve stage
755*
756*                     nrhs*2 n_first*
757*                        (bwl+bwu)
758*                        flops for L,U solve in proc 1.
759*
760                      NOPS2 = NOPS2 +
761     $                  2*
762     $                  DBLE(N_FIRST)*
763     $                  DBLE(NRHS) *
764     $                  ( DBLE(BWL)+DBLE(BWU))
765*
766                      IF ( NPROCS_REAL .GT. 1 ) THEN
767*
768*                       2*nrhs*2 n_last
769*                        (bwl+bwu)
770*                       flops for LU solve in other procs
771*
772                        NOPS2 = NOPS2 +
773     $                    4*
774     $                    (DBLE(N_LAST)*(DBLE(BWL)+
775     $                    DBLE(BWU)))*DBLE(NRHS)
776                      ENDIF
777*
778                      IF ( NPROCS_REAL .GT. 2 ) THEN
779*
780*                       2*nrhs*2 NB
781*                        (bwl+bwu)
782*                        flops for LU solve in other procs
783*
784                        NOPS2 = NOPS2 +
785     $                    ( NPROCS_REAL-2)*2*
786     $                 ( (DBLE(NB)*(DBLE(BWL)+
787     $                 DBLE(BWU)))*DBLE(NRHS) )
788                      ENDIF
789*
790*                     Reduced system
791*
792                      NOPS2 = NOPS2 +
793     $                  NRHS*( NPROCS_REAL-1)*2*(BWL*BWU )
794                      IF( NPROCS_REAL .GT. 1 ) THEN
795                        NOPS2 = NOPS2 +
796     $                   NRHS*( NPROCS_REAL-2 ) *
797     $                   ( 6 * BWL*BWU )
798                      ENDIF
799*
800*
801*                     Multiply by 4 to get complex count
802*
803                      NOPS2 = NOPS2 * DBLE(4)
804*
805*                       Calculate total megaflops - factorization and/or
806*                       solve -- for WALL and CPU time, and print output
807*
808*                       Print WALL time if machine supports it
809*
810                        IF( WTIME( 1 ) + WTIME( 2 ) .GT. 0.0D+0 ) THEN
811                           TMFLOPS = NOPS /
812     $                            ( ( WTIME( 1 )+WTIME( 2 ) ) * 1.0D+6 )
813                        ELSE
814                           TMFLOPS = 0.0D+0
815                        END IF
816*
817                        IF( WTIME( 1 )+WTIME( 2 ).GT.0.0D+0 ) THEN
818                           TMFLOPS2 = NOPS2 /
819     $                            ( ( WTIME( 1 )+WTIME( 2 ) ) * 1.0D+6 )
820                        ELSE
821                           TMFLOPS2 = 0.0D+0
822                        END IF
823*
824                        IF( WTIME( 2 ).GE.0.0D+0 )
825     $                     WRITE( NOUT, FMT = 9993 ) 'WALL', TRANS,
826     $                            N,
827     $                            BWL, BWU,
828     $                            NB, NRHS, NPROW, NPCOL,
829     $                            WTIME( 1 ), WTIME( 2 ), TMFLOPS,
830     $                            TMFLOPS2, PASSED
831*
832*                       Print CPU time if machine supports it
833*
834                        IF( CTIME( 1 )+CTIME( 2 ).GT.0.0D+0 ) THEN
835                           TMFLOPS = NOPS /
836     $                            ( ( CTIME( 1 )+CTIME( 2 ) ) * 1.0D+6 )
837                        ELSE
838                           TMFLOPS = 0.0D+0
839                        END IF
840*
841                        IF( CTIME( 1 )+CTIME( 2 ).GT.0.0D+0 ) THEN
842                           TMFLOPS2 = NOPS2 /
843     $                            ( ( CTIME( 1 )+CTIME( 2 ) ) * 1.0D+6 )
844                        ELSE
845                           TMFLOPS2 = 0.0D+0
846                        END IF
847*
848                        IF( CTIME( 2 ).GE.0.0D+0 )
849     $                     WRITE( NOUT, FMT = 9993 ) 'CPU ', TRANS,
850     $                            N,
851     $                            BWL, BWU,
852     $                            NB, NRHS, NPROW, NPCOL,
853     $                            CTIME( 1 ), CTIME( 2 ), TMFLOPS,
854     $                            TMFLOPS2, PASSED
855*
856                     END IF
857   20          CONTINUE
858*
859*
860   30       CONTINUE
861*           NNB loop
862*
863   45      CONTINUE
864*          BW[] loop
865*
866   40   CONTINUE
867*       NMAT loop
868*
869        CALL BLACS_GRIDEXIT( ICTXT )
870        CALL BLACS_GRIDEXIT( ICTXTB )
871*
872   50   CONTINUE
873*       NGRIDS DROPOUT
874   60 CONTINUE
875*     NGRIDS loop
876*
877*     Print ending messages and close output file
878*
879      IF( IAM.EQ.0 ) THEN
880         KTESTS = KPASS + KFAIL + KSKIP
881         WRITE( NOUT, FMT = * )
882         WRITE( NOUT, FMT = 9992 ) KTESTS
883         IF( CHECK ) THEN
884            WRITE( NOUT, FMT = 9991 ) KPASS
885            WRITE( NOUT, FMT = 9989 ) KFAIL
886         ELSE
887            WRITE( NOUT, FMT = 9990 ) KPASS
888         END IF
889         WRITE( NOUT, FMT = 9988 ) KSKIP
890         WRITE( NOUT, FMT = * )
891         WRITE( NOUT, FMT = * )
892         WRITE( NOUT, FMT = 9987 )
893         IF( NOUT.NE.6 .AND. NOUT.NE.0 )
894     $      CLOSE ( NOUT )
895      END IF
896*
897      CALL BLACS_EXIT( 0 )
898*
899 9999 FORMAT( 'ILLEGAL ', A6, ': ', A5, ' = ', I3,
900     $        '; It should be at least 1' )
901 9998 FORMAT( 'ILLEGAL GRID: nprow*npcol = ', I4, '. It can be at most',
902     $        I4 )
903 9997 FORMAT( 'Bad ', A6, ' parameters: going on to next test case.' )
904 9996 FORMAT( 'Unable to perform ', A, ': need TOTMEM of at least',
905     $        I11 )
906 9995 FORMAT( 'TIME TR      N  BWL BWU    NB  NRHS    P    Q L*U Time ',
907     $        'Slv Time   MFLOPS   MFLOP2  CHECK' )
908 9994 FORMAT( '---- -- ------  --- ---  ---- ----- ---- ---- -------- ',
909     $        '-------- -------- -------- ------' )
910 9993 FORMAT( A4,1X,A1,2X,I6,1X,I3,1X,I3,1X,I4,1X,I5,
911     $                                          1X,I4,1X,I4,1X,F9.3,
912     $        F9.4,        F9.2,    F9.2, 1X, A6 )
913 9992 FORMAT( 'Finished ', I6, ' tests, with the following results:' )
914 9991 FORMAT( I5, ' tests completed and passed residual checks.' )
915 9990 FORMAT( I5, ' tests completed without checking.' )
916 9989 FORMAT( I5, ' tests completed and failed residual checks.' )
917 9988 FORMAT( I5, ' tests skipped because of illegal input values.' )
918 9987 FORMAT( 'END OF TESTS.' )
919 9986 FORMAT( '||A - ', A4, '|| / (||A|| * N * eps) = ', G25.7 )
920 9985 FORMAT( '||Ax-b||/(||x||*||A||*eps*N) ', F25.7 )
921*
922      STOP
923*
924*     End of PZDBTRS_DRIVER
925*
926      END
927*
928