1      SUBROUTINE PDQRT16( TRANS, M, N, NRHS, A, IA, JA, DESCA, X, IX,
2     $                    JX, DESCX, B, IB, JB, DESCB, RWORK, RESID )
3*
4*  -- ScaLAPACK routine (version 1.7) --
5*     University of Tennessee, Knoxville, Oak Ridge National Laboratory,
6*     and University of California, Berkeley.
7*     May 1, 1997
8*
9*     .. Scalar Arguments ..
10      CHARACTER          TRANS
11      INTEGER            IA, IB, IX, JA, JB, JX, M, N, NRHS
12      DOUBLE PRECISION   RESID
13*     ..
14*     .. Array Arguments ..
15      INTEGER            DESCA( * ), DESCB( * ), DESCX( * )
16      DOUBLE PRECISION   A( * ), B( * ), RWORK( * ), X( * )
17*     ..
18*
19*  Purpose
20*  =======
21*
22*  PDQRT16 computes the residual for a solution of a system of linear
23*  equations  sub( A )*sub( X ) = B  or  sub( A' )*sub( X ) = B:
24*     RESID = norm(B - sub( A )*sub( X ) ) /
25*             ( max(m,n) * norm(sub( A ) ) * norm(sub( X ) ) * EPS ),
26*  where EPS is the machine epsilon, sub( A ) denotes
27*  A(IA:IA+N-1,JA,JA+N-1), and sub( X ) denotes
28*  X(IX:IX+N-1, JX:JX+NRHS-1).
29*
30*  Notes
31*  =====
32*
33*  Each global data object is described by an associated description
34*  vector.  This vector stores the information required to establish
35*  the mapping between an object element and its corresponding process
36*  and memory location.
37*
38*  Let A be a generic term for any 2D block cyclicly distributed array.
39*  Such a global array has an associated description vector DESCA.
40*  In the following comments, the character _ should be read as
41*  "of the global array".
42*
43*  NOTATION        STORED IN      EXPLANATION
44*  --------------- -------------- --------------------------------------
45*  DTYPE_A(global) DESCA( DTYPE_ )The descriptor type.  In this case,
46*                                 DTYPE_A = 1.
47*  CTXT_A (global) DESCA( CTXT_ ) The BLACS context handle, indicating
48*                                 the BLACS process grid A is distribu-
49*                                 ted over. The context itself is glo-
50*                                 bal, but the handle (the integer
51*                                 value) may vary.
52*  M_A    (global) DESCA( M_ )    The number of rows in the global
53*                                 array A.
54*  N_A    (global) DESCA( N_ )    The number of columns in the global
55*                                 array A.
56*  MB_A   (global) DESCA( MB_ )   The blocking factor used to distribute
57*                                 the rows of the array.
58*  NB_A   (global) DESCA( NB_ )   The blocking factor used to distribute
59*                                 the columns of the array.
60*  RSRC_A (global) DESCA( RSRC_ ) The process row over which the first
61*                                 row of the array A is distributed.
62*  CSRC_A (global) DESCA( CSRC_ ) The process column over which the
63*                                 first column of the array A is
64*                                 distributed.
65*  LLD_A  (local)  DESCA( LLD_ )  The leading dimension of the local
66*                                 array.  LLD_A >= MAX(1,LOCr(M_A)).
67*
68*  Let K be the number of rows or columns of a distributed matrix,
69*  and assume that its process grid has dimension p x q.
70*  LOCr( K ) denotes the number of elements of K that a process
71*  would receive if K were distributed over the p processes of its
72*  process column.
73*  Similarly, LOCc( K ) denotes the number of elements of K that a
74*  process would receive if K were distributed over the q processes of
75*  its process row.
76*  The values of LOCr() and LOCc() may be determined via a call to the
77*  ScaLAPACK tool function, NUMROC:
78*          LOCr( M ) = NUMROC( M, MB_A, MYROW, RSRC_A, NPROW ),
79*          LOCc( N ) = NUMROC( N, NB_A, MYCOL, CSRC_A, NPCOL ).
80*  An upper bound for these quantities may be computed by:
81*          LOCr( M ) <= ceil( ceil(M/MB_A)/NPROW )*MB_A
82*          LOCc( N ) <= ceil( ceil(N/NB_A)/NPCOL )*NB_A
83*
84*  Arguments
85*  =========
86*
87*  TRANS   (global input) CHARACTER*1
88*          Specifies the form of the system of equations:
89*          = 'N':  sub( A )*sub( X ) = sub( B )
90*          = 'T':  sub( A' )*sub( X )= sub( B ), where A' is the
91*                  transpose of sub( A ).
92*          = 'C':  sub( A' )*sub( X )= B, where A' is the transpose
93*                  of sub( A ).
94*
95*  M       (global input) INTEGER
96*          The number of rows to be operated on, i.e. the number of rows
97*          of the distributed submatrix sub( A ). M >= 0.
98*
99*  N       (global input) INTEGER
100*          The number of columns to be operated on, i.e. the number of
101*          columns of the distributed submatrix sub( A ). N >= 0.
102*
103*  NRHS    (global input) INTEGER
104*          The number of right hand sides, i.e., the number of columns
105*          of the distributed submatrix sub( B ). NRHS >= 0.
106*
107*  A       (local input) DOUBLE PRECISION pointer into the local
108*          memory to an array of dimension (LLD_A,LOCc(JA+N-1)).
109*          The original M x N matrix A.
110*
111*  IA      (global input) INTEGER
112*          The row index in the global array A indicating the first
113*          row of sub( A ).
114*
115*  JA      (global input) INTEGER
116*          The column index in the global array A indicating the
117*          first column of sub( A ).
118*
119*  DESCA   (global and local input) INTEGER array of dimension DLEN_.
120*          The array descriptor for the distributed matrix A.
121*
122*  X       (local input) DOUBLE PRECISION pointer into the local
123*          memory to an array of dimension (LLD_X,LOCc(JX+NRHS-1)). This
124*          array contains the local pieces of the computed solution
125*          distributed vectors for the system of linear equations.
126*
127*  IX      (global input) INTEGER
128*          The row index in the global array X indicating the first
129*          row of sub( X ).
130*
131*  JX      (global input) INTEGER
132*          The column index in the global array X indicating the
133*          first column of sub( X ).
134*
135*  DESCX   (global and local input) INTEGER array of dimension DLEN_.
136*          The array descriptor for the distributed matrix X.
137*
138*  B       (local input/local output) DOUBLE PRECISION pointer into
139*          the local memory to an array of dimension
140*          (LLD_B,LOCc(JB+NRHS-1)).  On entry, this array contains the
141*          local pieces of the distributes right hand side vectors for
142*          the system of linear equations. On exit, sub( B ) is over-
143*          written with the difference sub( B ) - sub( A )*sub( X ) or
144*          sub( B ) - sub( A )'*sub( X ).
145*
146*  IB      (global input) INTEGER
147*          The row index in the global array B indicating the first
148*          row of sub( B ).
149*
150*  JB      (global input) INTEGER
151*          The column index in the global array B indicating the
152*          first column of sub( B ).
153*
154*  DESCB   (global and local input) INTEGER array of dimension DLEN_.
155*          The array descriptor for the distributed matrix B.
156*
157*  RWORK   (local workspace) DOUBLE PRECISION array, dimension (LRWORK)
158*          LWORK >= Nq0 if TRANS = 'N', and LRWORK >= Mp0 otherwise.
159*
160*          where
161*
162*          IROFFA = MOD( IA-1, MB_A ), ICOFFA = MOD( JA-1, NB_A ),
163*          IAROW = INDXG2P( IA, MB_A, MYROW, RSRC_A, NPROW ),
164*          IACOL = INDXG2P( JA, NB_A, MYCOL, CSRC_A, NPCOL ),
165*          Mp0 = NUMROC( M+IROFFA, MB_A, MYROW, IAROW, NPROW ),
166*          Nq0 = NUMROC( N+ICOFFA, NB_A, MYCOL, IACOL, NPCOL ),
167*
168*          INDXG2P and NUMROC are ScaLAPACK tool functions; MYROW,
169*          MYCOL, NPROW and NPCOL can be determined by calling the
170*          subroutine BLACS_GRIDINFO.
171*
172*  RESID   (global output) DOUBLE PRECISION
173*          The maximum over the number of right hand sides of
174*          norm( sub( B )- sub( A )*sub( X ) ) /
175*          ( max(m,n) * norm( sub( A ) ) * norm( sub( X ) ) * EPS ).
176*
177*  =====================================================================
178*
179*     .. Parameters ..
180      INTEGER            BLOCK_CYCLIC_2D, CSRC_, CTXT_, DLEN_, DTYPE_,
181     $                   LLD_, MB_, M_, NB_, N_, RSRC_
182      PARAMETER          ( BLOCK_CYCLIC_2D = 1, DLEN_ = 9, DTYPE_ = 1,
183     $                     CTXT_ = 2, M_ = 3, N_ = 4, MB_ = 5, NB_ = 6,
184     $                     RSRC_ = 7, CSRC_ = 8, LLD_ = 9 )
185      DOUBLE PRECISION   ZERO, ONE
186      PARAMETER          ( ZERO = 0.0D+0, ONE = 1.0D+0 )
187*     ..
188*     .. Local Scalars ..
189      INTEGER            ICTXT, IDUMM, J, MYCOL, MYROW, N1, N2, NPCOL,
190     $                   NPROW
191      DOUBLE PRECISION   ANORM, BNORM, EPS, XNORM
192*     ..
193*     .. Local Arrays ..
194      DOUBLE PRECISION   TEMP( 2 )
195*     ..
196*     .. External Functions ..
197      LOGICAL            LSAME
198      DOUBLE PRECISION   PDLAMCH, PDLANGE
199      EXTERNAL           LSAME, PDLAMCH, PDLANGE
200*     ..
201*     .. External Subroutines ..
202      EXTERNAL           BLACS_GRIDINFO, DGAMX2D, PDASUM, PDGEMM
203*     ..
204*     .. Intrinsic Functions ..
205      INTRINSIC          MAX
206*     ..
207*     .. Executable Statements ..
208*
209*     Get grid parameters
210*
211      ICTXT = DESCA( CTXT_ )
212      CALL BLACS_GRIDINFO( ICTXT, NPROW, NPCOL, MYROW, MYCOL )
213*
214*     Quick exit if M = 0 or N = 0 or NRHS = 0
215*
216      IF( M.LE.0 .OR. N.LE.0 .OR. NRHS.EQ.0 ) THEN
217         RESID = ZERO
218         RETURN
219      END IF
220*
221      IF( LSAME( TRANS, 'T' ) .OR. LSAME( TRANS, 'C' ) ) THEN
222         ANORM = PDLANGE( 'I', M, N, A, IA, JA, DESCA, RWORK )
223         N1 = N
224         N2 = M
225      ELSE
226         ANORM = PDLANGE( '1', M, N, A, IA, JA, DESCA, RWORK )
227         N1 = M
228         N2 = N
229      END IF
230*
231      EPS = PDLAMCH( ICTXT, 'Epsilon' )
232*
233*     Compute  B - sub( A )*sub( X )  (or  B - sub( A' )*sub( X ) ) and
234*     store in B.
235*
236      CALL PDGEMM( TRANS, 'No transpose', N1, NRHS, N2, -ONE, A, IA,
237     $            JA, DESCA, X, IX, JX, DESCX, ONE, B, IB, JB, DESCB )
238*
239*     Compute the maximum over the number of right hand sides of
240*        norm( sub( B ) - sub( A )*sub( X ) ) /
241*        ( max(m,n) * norm( sub( A ) ) * norm( sub( X ) ) * EPS ).
242*
243      RESID = ZERO
244      DO 10 J = 1, NRHS
245*
246         CALL PDASUM( N1, BNORM, B, IB, JB+J-1, DESCB, 1 )
247         CALL PDASUM( N2, XNORM, X, IX, JX+J-1, DESCX, 1 )
248*
249*        Only the process columns owning the vector operands will have
250*        the correct result, the other will have zero.
251*
252         TEMP( 1 ) = BNORM
253         TEMP( 2 ) = XNORM
254         IDUMM = 0
255         CALL DGAMX2D( ICTXT, 'All', ' ', 2, 1, TEMP, 2, IDUMM, IDUMM,
256     $                 -1, -1, IDUMM )
257         BNORM = TEMP( 1 )
258         XNORM = TEMP( 2 )
259*
260*        Every processes have ANORM, BNORM and XNORM now.
261*
262         IF( ANORM.EQ.ZERO .AND. BNORM.EQ.ZERO ) THEN
263            RESID = ZERO
264         ELSE IF( ANORM.LE.ZERO .OR. XNORM.LE.ZERO ) THEN
265            RESID = ONE / EPS
266         ELSE
267            RESID = MAX( RESID, ( ( BNORM / ANORM ) / XNORM ) /
268     $              ( MAX( M, N )*EPS ) )
269         END IF
270*
271   10 CONTINUE
272*
273      RETURN
274*
275*     End of PDQRT16
276*
277      END
278