1      SUBROUTINE PCLAFCHK( AFORM, DIAG, M, N, A, IA, JA, DESCA, IASEED,
2     $                     ANORM, FRESID, WORK )
3*
4*  -- ScaLAPACK auxiliary routine (version 1.7) --
5*     University of Tennessee, Knoxville, Oak Ridge National Laboratory,
6*     and University of California, Berkeley.
7*     May 1, 1997
8*
9*     .. Scalar Arguments ..
10      CHARACTER          AFORM, DIAG
11      INTEGER            IA, IASEED, JA, M, N
12      REAL               ANORM, FRESID
13*     ..
14*     .. Array Arguments ..
15      INTEGER            DESCA( * )
16      COMPLEX            A( * ), WORK( * )
17*     ..
18*
19*  Purpose
20*  =======
21*
22*  PCLAFCHK computes the residual
23*       || sub( A ) - sub( Ao ) || / (|| sub( Ao ) ||*eps*MAX(M,N)),
24*  where Ao will be regenerated by the parallel random matrix generator,
25*  sub( A ) = A( IA:IA+M-1, JA:JA+N-1 ) and ||.|| stands for the infini-
26*  ty norm.
27*
28*  Notes
29*  =====
30*
31*  Each global data object is described by an associated description
32*  vector.  This vector stores the information required to establish
33*  the mapping between an object element and its corresponding process
34*  and memory location.
35*
36*  Let A be a generic term for any 2D block cyclicly distributed array.
37*  Such a global array has an associated description vector DESCA.
38*  In the following comments, the character _ should be read as
39*  "of the global array".
40*
41*  NOTATION        STORED IN      EXPLANATION
42*  --------------- -------------- --------------------------------------
43*  DTYPE_A(global) DESCA( DTYPE_ )The descriptor type.  In this case,
44*                                 DTYPE_A = 1.
45*  CTXT_A (global) DESCA( CTXT_ ) The BLACS context handle, indicating
46*                                 the BLACS process grid A is distribu-
47*                                 ted over. The context itself is glo-
48*                                 bal, but the handle (the integer
49*                                 value) may vary.
50*  M_A    (global) DESCA( M_ )    The number of rows in the global
51*                                 array A.
52*  N_A    (global) DESCA( N_ )    The number of columns in the global
53*                                 array A.
54*  MB_A   (global) DESCA( MB_ )   The blocking factor used to distribute
55*                                 the rows of the array.
56*  NB_A   (global) DESCA( NB_ )   The blocking factor used to distribute
57*                                 the columns of the array.
58*  RSRC_A (global) DESCA( RSRC_ ) The process row over which the first
59*                                 row of the array A is distributed.
60*  CSRC_A (global) DESCA( CSRC_ ) The process column over which the
61*                                 first column of the array A is
62*                                 distributed.
63*  LLD_A  (local)  DESCA( LLD_ )  The leading dimension of the local
64*                                 array.  LLD_A >= MAX(1,LOCr(M_A)).
65*
66*  Let K be the number of rows or columns of a distributed matrix,
67*  and assume that its process grid has dimension p x q.
68*  LOCr( K ) denotes the number of elements of K that a process
69*  would receive if K were distributed over the p processes of its
70*  process column.
71*  Similarly, LOCc( K ) denotes the number of elements of K that a
72*  process would receive if K were distributed over the q processes of
73*  its process row.
74*  The values of LOCr() and LOCc() may be determined via a call to the
75*  ScaLAPACK tool function, NUMROC:
76*          LOCr( M ) = NUMROC( M, MB_A, MYROW, RSRC_A, NPROW ),
77*          LOCc( N ) = NUMROC( N, NB_A, MYCOL, CSRC_A, NPCOL ).
78*  An upper bound for these quantities may be computed by:
79*          LOCr( M ) <= ceil( ceil(M/MB_A)/NPROW )*MB_A
80*          LOCc( N ) <= ceil( ceil(N/NB_A)/NPCOL )*NB_A
81*
82*  Arguments
83*  =========
84*
85*  AFORM   (global input) CHARACTER
86*          sub( A ) is overwritten with:
87*             - a symmetric matrix, if AFORM = 'S';
88*             - a Hermitian matrix, if AFORM = 'H';
89*             - the transpose of what would normally be generated,
90*               if AFORM = 'T';
91*             - the conjugate transpose of what would normally be
92*               generated, if AFORM = 'C';
93*             - otherwise a random matrix.
94*
95*  DIAG    (global input) CHARACTER
96*          if DIAG = 'D' : sub( A ) is diagonally dominant.
97*
98*  M       (global input) INTEGER
99*          The number of rows to be operated on, i.e. the number of rows
100*          of the distributed submatrix sub( A ). M >= 0.
101*
102*  N       (global input) INTEGER
103*          The number of columns to be operated on, i.e. the number of
104*          columns of the distributed submatrix sub( A ). N >= 0.
105*
106*  A       (local input/local output) COMPLEX pointer into the
107*          local memory to an array of dimension (LLD_A,LOCc(JA+N-1)).
108*          On entry, this array contains the local pieces of the M-by-N
109*          distributed matrix sub( A ) to be checked. On exit, this
110*          array contains the local pieces of the difference
111*          sub( A ) - sub( Ao ).
112*
113*  IA      (global input) INTEGER
114*          The row index in the global array A indicating the first
115*          row of sub( A ).
116*
117*  JA      (global input) INTEGER
118*          The column index in the global array A indicating the
119*          first column of sub( A ).
120*
121*  DESCA   (global and local input) INTEGER array of dimension DLEN_.
122*          The array descriptor for the distributed matrix A.
123*
124*  IASEED  (global input) INTEGER
125*          The seed number to generate the original matrix Ao.
126*
127*  ANORM   (global input) REAL
128*          The Infinity norm of sub( A ).
129*
130*  FRESID  (global output) REAL
131*          The maximum (worst) factorizational error.
132*
133*  WORK    (local workspace) COMPLEX array, dimension (LWORK).
134*          LWORK >= MpA0 * NB_A, where
135*
136*          IROFFA = MOD( IA-1, MB_A ),
137*          IAROW = INDXG2P( IA, MB_A, MYROW, RSRC_A, NPROW ),
138*          MpA0 = NUMROC( M+IROFFA, MB_A, MYROW, IAROW, NPROW ),
139*
140*          WORK is used to store a block of columns of sub( A ).
141*          INDXG2P and NUMROC are ScaLAPACK tool functions; MYROW,
142*          MYCOL, NPROW and NPCOL can be determined by calling the
143*          subroutine BLACS_GRIDINFO.
144*
145*  =====================================================================
146*
147*     .. Parameters ..
148      INTEGER            BLOCK_CYCLIC_2D, CSRC_, CTXT_, DLEN_, DTYPE_,
149     $                   LLD_, MB_, M_, NB_, N_, RSRC_
150      PARAMETER          ( BLOCK_CYCLIC_2D = 1, DLEN_ = 9, DTYPE_ = 1,
151     $                     CTXT_ = 2, M_ = 3, N_ = 4, MB_ = 5, NB_ = 6,
152     $                     RSRC_ = 7, CSRC_ = 8, LLD_ = 9 )
153      COMPLEX            ONE
154      PARAMETER          ( ONE = (1.0E+0, 0.0E+0) )
155*     ..
156*     .. Local Scalars ..
157      INTEGER            IACOL, IAROW, ICOFF, ICTXT, ICURCOL, ICURROW,
158     $                   II, IIA, IOFFA, IROFF, JB, JJ, JJA, JN, KK,
159     $                   LDA, LDW, LDWP1, MP, MYCOL, MYROW, NPCOL,
160     $                   NPROW, NQ
161      REAL               EPS
162*     ..
163*     .. External Subroutines ..
164      EXTERNAL           BLACS_GRIDINFO, CMATADD, INFOG2L, PCMATGEN
165*     ..
166*     .. External Functions ..
167      LOGICAL            LSAME
168      INTEGER            ICEIL, NUMROC
169      REAL               PSLAMCH, PCLANGE
170      EXTERNAL           ICEIL, LSAME, NUMROC, PCLANGE, PSLAMCH
171*     ..
172*     .. Intrinsic Functions ..
173      INTRINSIC          MAX, MIN, MOD, REAL
174*     ..
175*     .. Executable Statements ..
176*
177      ICTXT = DESCA( CTXT_ )
178      CALL BLACS_GRIDINFO( ICTXT, NPROW, NPCOL, MYROW, MYCOL )
179      EPS = PSLAMCH( ICTXT, 'eps' )
180      CALL INFOG2L( IA, JA, DESCA, NPROW, NPCOL, MYROW, MYCOL, IIA, JJA,
181     $              IAROW, IACOL )
182*
183*     Compute sub( A ) := sub( A ) - sub( Ao )
184*
185      IROFF = MOD( IA-1, DESCA( MB_ ) )
186      ICOFF = MOD( JA-1, DESCA( NB_ ) )
187      MP = NUMROC( M+IROFF, DESCA( MB_ ), MYROW, IAROW, NPROW )
188      NQ = NUMROC( N+ICOFF, DESCA( NB_ ), MYCOL, IACOL, NPCOL )
189      IF( MYROW.EQ.IAROW )
190     $   MP = MP-IROFF
191      IF( MYCOL.EQ.IACOL )
192     $   NQ = NQ-ICOFF
193      JN = MIN( ICEIL( JA, DESCA( NB_ ) ) * DESCA( NB_ ), JA+N-1 )
194      LDW = MAX( 1, MP )
195      LDWP1 = LDW + 1
196      LDA = DESCA( LLD_ )
197      IOFFA = IIA + ( JJA - 1 )*LDA
198*
199      IF( LSAME( AFORM, 'H' ) ) THEN
200*
201*        Handle first block of columns separately
202*
203         II = 1
204         ICURROW = IAROW
205         ICURCOL = IACOL
206         JB = JN - JA + 1
207*
208         IF( MYCOL.EQ.ICURCOL ) THEN
209            CALL PCMATGEN( ICTXT, AFORM, DIAG, DESCA( M_ ), DESCA( N_ ),
210     $                     DESCA( MB_ ), DESCA( NB_ ), WORK, LDW,
211     $                     DESCA( RSRC_ ), DESCA( CSRC_ ), IASEED,
212     $                     IIA-1, MP, JJA-1, JB, MYROW, MYCOL, NPROW,
213     $                     NPCOL )
214            IF( MYROW.EQ.ICURROW ) THEN
215               DO 10, KK = 0, JB-1
216                  WORK( II+KK*LDWP1 ) = REAL( WORK( II+KK*LDWP1 ) )
217   10          CONTINUE
218            END IF
219            CALL CMATADD( MP, JB, -ONE, WORK, LDW, ONE, A( IOFFA ),
220     $                    LDA )
221            JJA = JJA + JB
222            IOFFA = IOFFA + JB*LDA
223         END IF
224*
225         IF( MYROW.EQ.ICURROW )
226     $      II = II + JB
227         ICURROW = MOD( ICURROW+1, NPROW )
228         ICURCOL = MOD( ICURCOL+1, NPCOL )
229*
230         DO 30, JJ = JN+1, JA+N-1, DESCA( NB_ )
231            JB = MIN( JA+N-JJ, DESCA( NB_ ) )
232*
233            IF( MYCOL.EQ.ICURCOL ) THEN
234               CALL PCMATGEN( ICTXT, AFORM, DIAG, DESCA( M_ ),
235     $                        DESCA( N_ ), DESCA( MB_ ), DESCA( NB_ ),
236     $                        WORK, LDW, DESCA( RSRC_ ), DESCA( CSRC_ ),
237     $                        IASEED, IIA-1, MP, JJA-1, JB, MYROW,
238     $                        MYCOL, NPROW, NPCOL )
239               IF( MYROW.EQ.ICURROW ) THEN
240                  DO 20, KK = 0, JB-1
241                     WORK( II+KK*LDWP1 ) = REAL( WORK( II+KK*LDWP1 ) )
242   20             CONTINUE
243               END IF
244               CALL CMATADD( MP, JB, -ONE, WORK, LDW, ONE, A( IOFFA ),
245     $                       LDA )
246               JJA = JJA + JB
247               IOFFA = IOFFA + JB*LDA
248            END IF
249            IF( MYROW.EQ.ICURROW )
250     $         II = II + JB
251            ICURROW = MOD( ICURROW+1, NPROW )
252            ICURCOL = MOD( ICURCOL+1, NPCOL )
253   30    CONTINUE
254*
255      ELSE
256*
257*        Handle first block of columns separately
258*
259         IF( MYCOL.EQ.IACOL ) THEN
260            JB = JN-JA+1
261            CALL PCMATGEN( ICTXT, AFORM, DIAG, DESCA( M_ ), DESCA( N_ ),
262     $                     DESCA( MB_ ), DESCA( NB_ ), WORK, LDW,
263     $                     DESCA( RSRC_ ), DESCA( CSRC_ ), IASEED,
264     $                     IIA-1, MP, JJA-1, JB, MYROW, MYCOL, NPROW,
265     $                     NPCOL )
266            CALL CMATADD( MP, JB, -ONE, WORK, LDW, ONE, A( IOFFA ),
267     $                    LDA )
268            JJA = JJA + JB
269            NQ  = NQ - JB
270            IOFFA = IOFFA + JB * LDA
271         END IF
272*
273*        Handle the remaning blocks of columns
274*
275         DO 40 JJ = JJA, JJA+NQ-1, DESCA( NB_ )
276            JB = MIN( DESCA( NB_ ), JJA+NQ-JJ )
277            IOFFA = IIA + ( JJ - 1 )*LDA
278            CALL PCMATGEN( ICTXT, AFORM, DIAG, DESCA( M_ ), DESCA( N_ ),
279     $                     DESCA( MB_ ), DESCA( NB_ ), WORK, LDW,
280     $                     DESCA( RSRC_ ), DESCA( CSRC_ ), IASEED,
281     $                     IIA-1, MP, JJ-1, JB, MYROW, MYCOL, NPROW,
282     $                     NPCOL )
283            CALL CMATADD( MP, JB, -ONE, WORK, LDW, ONE, A( IOFFA ),
284     $                    LDA )
285   40    CONTINUE
286*
287      END IF
288*
289*     Calculate factor residual
290*
291      FRESID = PCLANGE( 'I', M, N, A, IA, JA, DESCA, WORK ) /
292     $                  ( MAX( M, N ) * EPS * ANORM )
293*
294      RETURN
295*
296*     End PCLAFCHK
297*
298      END
299