1      DOUBLE PRECISION FUNCTION PDQRT14( TRANS, M, N, NRHS, A, IA, JA,
2     $                                   DESCA, X, IX, JX, DESCX, WORK )
3*
4*  -- ScaLAPACK routine (version 1.7) --
5*     University of Tennessee, Knoxville, Oak Ridge National Laboratory,
6*     and University of California, Berkeley.
7*     May 1, 1997
8*
9*     .. Scalar Arguments ..
10      CHARACTER          TRANS
11      INTEGER            IA, IX, JA, JX, M, N, NRHS
12*     ..
13*     .. Array Arguments ..
14      INTEGER            DESCA( * ), DESCX( * )
15      DOUBLE PRECISION   A( * ), WORK( * ), X( * )
16*     ..
17*
18*  Purpose
19*  =======
20*
21*  PDQRT14 checks whether sub( X ) is in the row space of sub( A ) or
22*  sub( A )', where sub( A ) denotes A( IA:IA+M-1, JA:JA+N-1 ) and
23*  sub( X ) denotes X( IX:IX+N-1, JX:JX+NRHS-1 ) if TRANS = 'N', and
24*  X( IX:IX+N-1, JX:JX+NRHS-1 ) otherwise.  It does so by scaling both
25*  sub( X ) and sub( A ) such that their norms are in the range
26*  [sqrt(eps), 1/sqrt(eps)], then computing an LQ factorization of
27*  [sub( A )',sub( X )]' (if TRANS = 'N') or a QR factorization of
28*  [sub( A ),sub( X )] otherwise, and returning the norm of the trailing
29*  triangle, scaled by MAX(M,N,NRHS)*eps.
30*
31*  Notes
32*  =====
33*
34*  Each global data object is described by an associated description
35*  vector.  This vector stores the information required to establish
36*  the mapping between an object element and its corresponding process
37*  and memory location.
38*
39*  Let A be a generic term for any 2D block cyclicly distributed array.
40*  Such a global array has an associated description vector DESCA.
41*  In the following comments, the character _ should be read as
42*  "of the global array".
43*
44*  NOTATION        STORED IN      EXPLANATION
45*  --------------- -------------- --------------------------------------
46*  DTYPE_A(global) DESCA( DTYPE_ )The descriptor type.  In this case,
47*                                 DTYPE_A = 1.
48*  CTXT_A (global) DESCA( CTXT_ ) The BLACS context handle, indicating
49*                                 the BLACS process grid A is distribu-
50*                                 ted over. The context itself is glo-
51*                                 bal, but the handle (the integer
52*                                 value) may vary.
53*  M_A    (global) DESCA( M_ )    The number of rows in the global
54*                                 array A.
55*  N_A    (global) DESCA( N_ )    The number of columns in the global
56*                                 array A.
57*  MB_A   (global) DESCA( MB_ )   The blocking factor used to distribute
58*                                 the rows of the array.
59*  NB_A   (global) DESCA( NB_ )   The blocking factor used to distribute
60*                                 the columns of the array.
61*  RSRC_A (global) DESCA( RSRC_ ) The process row over which the first
62*                                 row of the array A is distributed.
63*  CSRC_A (global) DESCA( CSRC_ ) The process column over which the
64*                                 first column of the array A is
65*                                 distributed.
66*  LLD_A  (local)  DESCA( LLD_ )  The leading dimension of the local
67*                                 array.  LLD_A >= MAX(1,LOCr(M_A)).
68*
69*  Let K be the number of rows or columns of a distributed matrix,
70*  and assume that its process grid has dimension p x q.
71*  LOCr( K ) denotes the number of elements of K that a process
72*  would receive if K were distributed over the p processes of its
73*  process column.
74*  Similarly, LOCc( K ) denotes the number of elements of K that a
75*  process would receive if K were distributed over the q processes of
76*  its process row.
77*  The values of LOCr() and LOCc() may be determined via a call to the
78*  ScaLAPACK tool function, NUMROC:
79*          LOCr( M ) = NUMROC( M, MB_A, MYROW, RSRC_A, NPROW ),
80*          LOCc( N ) = NUMROC( N, NB_A, MYCOL, CSRC_A, NPCOL ).
81*  An upper bound for these quantities may be computed by:
82*          LOCr( M ) <= ceil( ceil(M/MB_A)/NPROW )*MB_A
83*          LOCc( N ) <= ceil( ceil(N/NB_A)/NPCOL )*NB_A
84*
85*  Arguments
86*  =========
87*
88*  TRANS   (global input) CHARACTER*1
89*          = 'N':  No transpose, check for sub( X ) in the row space of
90*                  sub( A ),
91*          = 'T':  Transpose, check for sub( X ) in row space of
92*                  sub( A )'.
93*
94*  M       (global input) INTEGER
95*          The number of rows to be operated on, i.e. the number of rows
96*          of the distributed submatrix sub( A ). M >= 0.
97*
98*  N       (global input) INTEGER
99*          The number of columns to be operated on, i.e. the number of
100*          columns of the distributed submatrix sub( A ). N >= 0.
101*
102*  NRHS    (global input) INTEGER
103*          The number of right hand sides, i.e., the number of columns
104*          of the distributed submatrix sub( X ). NRHS >= 0.
105*
106*  A       (local input) DOUBLE PRECISION pointer into the local memory
107*          to an array of dimension (LLD_A, LOCc(JA+N-1)). This array
108*          contains the local pieces of the M-by-N distributed matrix
109*          sub( A ).
110*
111*  IA      (global input) INTEGER
112*          The row index in the global array A indicating the first
113*          row of sub( A ).
114*
115*  JA      (global input) INTEGER
116*          The column index in the global array A indicating the
117*          first column of sub( A ).
118*
119*  DESCA   (global and local input) INTEGER array of dimension DLEN_.
120*          The array descriptor for the distributed matrix A.
121*
122*  X       (local input) DOUBLE PRECISION pointer into the local
123*          memory to an array of dimension (LLD_X,LOCc(JX+NRHS-1)).
124*          On entry, this array contains the local pieces of the
125*          N-by-NRHS distributed submatrix sub( X ) if TRANS = 'N',
126*          and the M-by-NRHS distributed submatrix sub( X ) otherwise.
127*
128*  IX      (global input) INTEGER
129*          The row index in the global array X indicating the first
130*          row of sub( X ).
131*
132*  JX      (global input) INTEGER
133*          The column index in the global array X indicating the
134*          first column of sub( X ).
135*
136*  DESCX   (global and local input) INTEGER array of dimension DLEN_.
137*          The array descriptor for the distributed matrix X.
138*
139*  WORK    (local workspace) DOUBLE PRECISION array dimension (LWORK)
140*          If TRANS='N', LWORK >= MNRHSP * NQ + LTAU + LWF and
141*          LWORK >= MP * NNRHSQ + LTAU + LWF otherwise, where
142*
143*          IF TRANS='N', (LQ fact)
144*            MNRHSP = NUMROC( M+NRHS+IROFFA, MB_A, MYROW, IAROW,
145*                             NPROW )
146*            LTAU   = NUMROC( IA+MIN( M+NRHS, N )-1, MB_A, MYROW,
147*                             RSRC_A, NPROW )
148*            LWF    = MB_A * ( MB_A + MNRHSP + NQ0 )
149*          ELSE         (QR fact)
150*            NNRHSQ = NUMROC( N+NRHS+ICOFFA, NB_A, MYCOL, IACOL,
151*                             NPCOL )
152*            LTAU   = NUMROC( JA+MIN( M, N+NRHS )-1, NB_A, MYCOL,
153*                             CSRC_A, NPCOL )
154*            LWF    = NB_A * ( NB_A + MP0 + NNRHSQ )
155*          END IF
156*
157*          and,
158*
159*          IROFFA = MOD( IA-1, MB_A ), ICOFFA = MOD( JA-1, NB_A ),
160*          IAROW = INDXG2P( IA, MB_A, MYROW, RSRC_A, NPROW ),
161*          IACOL = INDXG2P( JA, NB_A, MYCOL, CSRC_A, NPCOL ),
162*          MP0 = NUMROC( M+IROFFA, MB_A, MYROW, IAROW, NPROW ),
163*          NQ0 = NUMROC( N+ICOFFA, NB_A, MYCOL, IACOL, NPCOL ).
164*
165*          INDXG2P and NUMROC are ScaLAPACK tool functions;
166*          MYROW, MYCOL, NPROW and NPCOL can be determined by calling
167*          the subroutine BLACS_GRIDINFO.
168*
169*
170*  =====================================================================
171*
172*     .. Parameters ..
173      INTEGER            BLOCK_CYCLIC_2D, CSRC_, CTXT_, DLEN_, DTYPE_,
174     $                   LLD_, MB_, M_, NB_, N_, RSRC_
175      PARAMETER          ( BLOCK_CYCLIC_2D = 1, DLEN_ = 9, DTYPE_ = 1,
176     $                     CTXT_ = 2, M_ = 3, N_ = 4, MB_ = 5, NB_ = 6,
177     $                     RSRC_ = 7, CSRC_ = 8, LLD_ = 9 )
178      DOUBLE PRECISION   ONE, ZERO
179      PARAMETER          ( ZERO = 0.0D+0, ONE = 1.0D+0 )
180*     ..
181*     .. Local Scalars ..
182      LOGICAL            TPSD
183      INTEGER            IACOL, IAROW, ICOFFA, ICTXT, IDUM, IIA, INFO,
184     $                   IPTAU, IPW, IPWA, IROFFA, IWA, IWX, J, JJA,
185     $                   JWA, JWX, LDW, LWORK, MPWA, MPW, MQW, MYCOL,
186     $                   MYROW, NPCOL, NPROW, NPW, NQWA, NQW
187      DOUBLE PRECISION   AMAX, ANRM, ERR, XNRM
188*     ..
189*     .. Local Arrays ..
190      INTEGER            DESCW( DLEN_ ), IDUM1( 1 ), IDUM2( 1 )
191      DOUBLE PRECISION   RWORK( 1 )
192*     ..
193*     .. External Functions ..
194      LOGICAL            LSAME
195      INTEGER            NUMROC
196      DOUBLE PRECISION   PDLANGE, PDLAMCH
197      EXTERNAL           LSAME, NUMROC, PDLANGE, PDLAMCH
198*     ..
199*     .. External Subroutines ..
200      EXTERNAL           BLACS_GRIDINFO, DESCSET, DGAMX2D, INFOG2L,
201     $                   PDAMAX, PDCOPY, PDGELQF, PDGEQRF,
202     $                   PDLACPY, PDLASCL, PXERBLA
203*     ..
204*     .. Intrinsic Functions ..
205      INTRINSIC          ABS, DBLE, MAX, MIN, MOD
206*     ..
207*     .. Executable Statements ..
208*
209*     Get grid parameters
210*
211      ICTXT = DESCA( CTXT_ )
212      CALL BLACS_GRIDINFO( ICTXT, NPROW, NPCOL, MYROW, MYCOL )
213*
214      PDQRT14 = ZERO
215*
216      IPWA = 1
217      IROFFA = MOD( IA-1, DESCA( MB_ ) )
218      ICOFFA = MOD( JA-1, DESCA( NB_ ) )
219      IWA = IROFFA + 1
220      JWA = ICOFFA + 1
221      CALL INFOG2L( IA, JA, DESCA, NPROW, NPCOL, MYROW, MYCOL, IIA,
222     $              JJA, IAROW, IACOL )
223      MPWA = NUMROC( M+IROFFA, DESCA( MB_ ), MYROW, IAROW, NPROW )
224      NQWA = NUMROC( N+ICOFFA, DESCA( NB_ ), MYCOL, IACOL, NPCOL )
225*
226      INFO = 0
227      IF( LSAME( TRANS, 'N' ) ) THEN
228         IF( N.LE.0 .OR. NRHS.LE.0 )
229     $      RETURN
230         TPSD = .FALSE.
231         MPW = NUMROC( M+NRHS+IROFFA, DESCA( MB_ ), MYROW, IAROW,
232     $                 NPROW )
233         NQW = NQWA
234*
235*        Assign descriptor DESCW for workspace WORK and pointers to
236*        matrices sub( A ) and sub( X ) in workspace
237*
238         IWX = IWA + M
239         JWX = JWA
240         LDW = MAX( 1, MPW )
241         CALL DESCSET( DESCW, M+NRHS+IROFFA, N+ICOFFA, DESCA( MB_ ),
242     $                 DESCA( NB_ ), IAROW, IACOL, ICTXT, LDW )
243*
244      ELSE IF( LSAME( TRANS, 'T' ) ) THEN
245         IF( M.LE.0 .OR. NRHS.LE.0 )
246     $      RETURN
247         TPSD = .TRUE.
248         MPW = MPWA
249         NQW = NUMROC( N+NRHS+ICOFFA, DESCA( NB_ ), MYCOL, IACOL,
250     $                 NPCOL )
251*
252*        Assign descriptor DESCW for workspace WORK and pointers to
253*        matrices sub( A ) and sub( X ) in workspace
254*
255         IWX = IWA
256         JWX = JWA + N
257         LDW = MAX( 1, MPW )
258         CALL DESCSET( DESCW, M+IROFFA, N+NRHS+ICOFFA, DESCA( MB_ ),
259     $                 DESCA( NB_ ), IAROW, IACOL, ICTXT, LDW )
260      ELSE
261         CALL PXERBLA( ICTXT, 'PDQRT14', -1 )
262         RETURN
263      END IF
264*
265*     Copy and scale sub( A )
266*
267      IPTAU = IPWA + MPW*NQW
268      CALL PDLACPY( 'All', M, N, A, IA, JA, DESCA, WORK( IPWA ), IWA,
269     $              JWA, DESCW )
270      RWORK( 1 ) = ZERO
271      ANRM = PDLANGE( 'M', M, N, WORK( IPWA ), IWA, JWA, DESCW, RWORK )
272      IF( ANRM.NE.ZERO )
273     $   CALL PDLASCL( 'G', ANRM, ONE, M, N, WORK( IPWA ), IWA,
274     $                 JWA, DESCW, INFO )
275*
276*     Copy sub( X ) or sub( X )' into the right place and scale it
277*
278      IF( TPSD ) THEN
279*
280*        Copy sub( X ) into columns jwa+n:jwa+n+nrhs-1 of work
281*
282         DO 10 J = 1, NRHS
283            CALL PDCOPY( M, X, IX, JX+J-1, DESCX, 1, WORK( IPWA ), IWX,
284     $                   JWX+J-1, DESCW, 1 )
285   10    CONTINUE
286         XNRM = PDLANGE( 'M', M, NRHS, WORK( IPWA ), IWX, JWX, DESCW,
287     $                   RWORK )
288         IF( XNRM.NE.ZERO )
289     $      CALL PDLASCL( 'G', XNRM, ONE, M, NRHS, WORK( IPWA ), IWX,
290     $                    JWX, DESCW, INFO )
291*
292*        Compute QR factorization of work(iwa:iwa+m-1,jwa:jwa+n+nrhs-1)
293*
294         MQW = NUMROC( M+ICOFFA, DESCA( NB_ ), MYCOL, IACOL, NPCOL )
295         IPW = IPTAU + MIN( MQW, NQW )
296         LWORK = DESCW( NB_ ) * ( MPW + NQW + DESCW( NB_ ) )
297         CALL PDGEQRF( M, N+NRHS, WORK( IPWA ), IWA, JWA, DESCW,
298     $                WORK( IPTAU ), WORK( IPW ), LWORK, INFO )
299*
300*        Compute largest entry in upper triangle of
301*        work(iwa+n:iwa+m-1,jwa+n:jwa+n+nrhs-1)
302*
303         ERR = ZERO
304         IF( N.LT.M ) THEN
305            DO 20 J = JWX, JWA+N+NRHS-1
306               CALL PDAMAX( MIN(M-N,J-JWX+1), AMAX, IDUM, WORK( IPWA ),
307     $                      IWA+N, J, DESCW, 1 )
308               ERR = MAX( ERR, ABS( AMAX ) )
309   20       CONTINUE
310         END IF
311         CALL DGAMX2D( ICTXT, 'All', ' ', 1, 1, ERR, 1, IDUM1, IDUM2,
312     $                 -1, -1, 0 )
313*
314      ELSE
315*
316*        Copy sub( X )' into rows iwa+m:iwa+m+nrhs-1 of work
317*
318         DO 30 J = 1, NRHS
319            CALL PDCOPY( N, X, IX, JX+J-1, DESCX, 1, WORK( IPWA ),
320     $                   IWX+J-1, JWX, DESCW, DESCW( M_ ) )
321   30    CONTINUE
322*
323         XNRM = PDLANGE( 'M', NRHS, N, WORK( IPWA ), IWX, JWX, DESCW,
324     $                   RWORK )
325         IF( XNRM.NE.ZERO )
326     $      CALL PDLASCL( 'G', XNRM, ONE, NRHS, N, WORK( IPWA ), IWX,
327     $                    JWX, DESCW, INFO )
328*
329*        Compute LQ factorization of work(iwa:iwa+m+nrhs-1,jwa:jwa+n-1)
330*
331         NPW = NUMROC( N+IROFFA, DESCA( MB_ ), MYROW, IAROW, NPROW )
332         IPW = IPTAU + MIN( MPW, NPW )
333         LWORK = DESCW( MB_ ) * ( MPW + NQW + DESCW( MB_ ) )
334         CALL PDGELQF( M+NRHS, N, WORK( IPWA ), IWA, JWA, DESCW,
335     $                 WORK( IPTAU ), WORK( IPW ), LWORK, INFO )
336*
337*        Compute largest entry in lower triangle in
338*        work(iwa+m:iwa+m+nrhs-1,jwa+m:jwa+n-1)
339*
340         ERR = ZERO
341         DO 40 J = JWA+M, MIN( JWA+N-1, JWA+M+NRHS-1 )
342            CALL PDAMAX( JWA+M+NRHS-J, AMAX, IDUM, WORK( IPWA ),
343     $                   IWX+J-JWA-M, J, DESCW, 1 )
344            ERR = MAX( ERR, ABS( AMAX ) )
345   40    CONTINUE
346         CALL DGAMX2D( ICTXT, 'All', ' ', 1, 1, ERR, 1, IDUM1, IDUM2,
347     $                 -1, -1, 0 )
348*
349      END IF
350*
351      PDQRT14 = ERR / ( DBLE( MAX( M, N, NRHS ) ) *
352     $          PDLAMCH( ICTXT, 'Epsilon' ) )
353*
354      RETURN
355*
356*     End of PDQRT14
357*
358      END
359