1      SUBROUTINE PCLASMSUB( A, DESCA, I, L, K, SMLNUM, BUF, LWORK )
2*
3*  -- ScaLAPACK routine (version 1.7) --
4*     University of Tennessee, Knoxville, Oak Ridge National Laboratory,
5*     and University of California, Berkeley.
6*     July 31, 2001
7*
8*     .. Scalar Arguments ..
9      INTEGER            I, K, L, LWORK
10      REAL               SMLNUM
11*     ..
12*     .. Array Arguments ..
13      INTEGER            DESCA( * )
14      COMPLEX            A( * ), BUF( * )
15*     ..
16*
17*  Purpose
18*  =======
19*
20*  PCLASMSUB looks for a small subdiagonal element from the bottom
21*     of the matrix that it can safely set to zero.
22*
23*  Notes
24*  =====
25*
26*  Each global data object is described by an associated description
27*  vector.  This vector stores the information required to establish
28*  the mapping between an object element and its corresponding process
29*  and memory location.
30*
31*  Let A be a generic term for any 2D block cyclicly distributed array.
32*  Such a global array has an associated description vector DESCA.
33*  In the following comments, the character _ should be read as
34*  "of the global array".
35*
36*  NOTATION        STORED IN      EXPLANATION
37*  --------------- -------------- --------------------------------------
38*  DTYPE_A(global) DESCA( DTYPE_ )The descriptor type.  In this case,
39*                                 DTYPE_A = 1.
40*  CTXT_A (global) DESCA( CTXT_ ) The BLACS context handle, indicating
41*                                 the BLACS process grid A is distribu-
42*                                 ted over. The context itself is glo-
43*                                 bal, but the handle (the integer
44*                                 value) may vary.
45*  M_A    (global) DESCA( M_ )    The number of rows in the global
46*                                 array A.
47*  N_A    (global) DESCA( N_ )    The number of columns in the global
48*                                 array A.
49*  MB_A   (global) DESCA( MB_ )   The blocking factor used to distribute
50*                                 the rows of the array.
51*  NB_A   (global) DESCA( NB_ )   The blocking factor used to distribute
52*                                 the columns of the array.
53*  RSRC_A (global) DESCA( RSRC_ ) The process row over which the first
54*                                 row of the array A is distributed.
55*  CSRC_A (global) DESCA( CSRC_ ) The process column over which the
56*                                 first column of the array A is
57*                                 distributed.
58*  LLD_A  (local)  DESCA( LLD_ )  The leading dimension of the local
59*                                 array.  LLD_A >= MAX(1,LOCr(M_A)).
60*
61*  Let K be the number of rows or columns of a distributed matrix,
62*  and assume that its process grid has dimension p x q.
63*  LOCr( K ) denotes the number of elements of K that a process
64*  would receive if K were distributed over the p processes of its
65*  process column.
66*  Similarly, LOCc( K ) denotes the number of elements of K that a
67*  process would receive if K were distributed over the q processes of
68*  its process row.
69*  The values of LOCr() and LOCc() may be determined via a call to the
70*  ScaLAPACK tool function, NUMROC:
71*          LOCr( M ) = NUMROC( M, MB_A, MYROW, RSRC_A, NPROW ),
72*          LOCc( N ) = NUMROC( N, NB_A, MYCOL, CSRC_A, NPCOL ).
73*  An upper bound for these quantities may be computed by:
74*          LOCr( M ) <= ceil( ceil(M/MB_A)/NPROW )*MB_A
75*          LOCc( N ) <= ceil( ceil(N/NB_A)/NPCOL )*NB_A
76*
77*  Arguments
78*  =========
79*
80*  A       (global input) COMPLEX array, dimension (DESCA(LLD_),*)
81*          On entry, the Hessenberg matrix whose tridiagonal part is
82*          being scanned.
83*          Unchanged on exit.
84*
85*  DESCA   (global and local input) INTEGER array of dimension DLEN_.
86*          The array descriptor for the distributed matrix A.
87*
88*  I       (global input) INTEGER
89*          The global location of the bottom of the unreduced
90*          submatrix of A.
91*          Unchanged on exit.
92*
93*  L       (global input) INTEGER
94*          The global location of the top of the unreduced submatrix
95*          of A.
96*          Unchanged on exit.
97*
98*  K       (global output) INTEGER
99*          On exit, this yields the bottom portion of the unreduced
100*          submatrix.  This will satisfy: L <= M  <= I-1.
101*
102*  SMLNUM  (global input) REAL
103*          On entry, a "small number" for the given matrix.
104*          Unchanged on exit.
105*
106*  BUF     (local output) COMPLEX array of size LWORK.
107*
108*  LWORK   (global input) INTEGER
109*          On exit, LWORK is the size of the work buffer.
110*          This must be at least 2*Ceil( Ceil( (I-L)/HBL ) /
111*                                        LCM(NPROW,NPCOL) )
112*          Here LCM is least common multiple, and NPROWxNPCOL is the
113*          logical grid size.
114*
115*   Notes:
116*
117*     This routine does a global maximum and must be called by all
118*     processes.
119*
120*     This code is basically a parallelization of the following snip
121*     of LAPACK code from CLAHQR:
122*
123*        Look for a single small subdiagonal element.
124*
125*        DO 20 K = I, L + 1, -1
126*           TST1 = CABS1( H( K-1, K-1 ) ) + CABS1( H( K, K ) )
127*           IF( TST1.EQ.ZERO )
128*    $         TST1 = CLANHS( '1', I-L+1, H( L, L ), LDH, WORK )
129*           IF( CABS1( H( K, K-1 ) ).LE.MAX( ULP*TST1, SMLNUM ) )
130*    $         GO TO 30
131*  20    CONTINUE
132*  30    CONTINUE
133*
134*  Further Details
135*  ===============
136*
137*  Implemented by:  M. Fahey, May 28, 1999
138*
139*  =====================================================================
140*
141*     .. Parameters ..
142      INTEGER            BLOCK_CYCLIC_2D, CSRC_, CTXT_, DLEN_, DTYPE_,
143     $                   LLD_, MB_, M_, NB_, N_, RSRC_
144      PARAMETER          ( BLOCK_CYCLIC_2D = 1, DLEN_ = 9, DTYPE_ = 1,
145     $                     CTXT_ = 2, M_ = 3, N_ = 4, MB_ = 5, NB_ = 6,
146     $                     RSRC_ = 7, CSRC_ = 8, LLD_ = 9 )
147      REAL               ZERO
148      PARAMETER          ( ZERO = 0.0E+0 )
149*     ..
150*     .. Local Scalars ..
151      INTEGER            CONTXT, DOWN, HBL, IBUF1, IBUF2, ICOL1, ICOL2,
152     $                   II, III, IRCV1, IRCV2, IROW1, IROW2, ISRC,
153     $                   ISTR1, ISTR2, ITMP1, ITMP2, JJ, JJJ, JSRC, LDA,
154     $                   LEFT, MODKM1, MYCOL, MYROW, NPCOL, NPROW, NUM,
155     $                   RIGHT, UP
156      REAL               TST1, ULP
157      COMPLEX            CDUM, H10, H11, H22
158*     ..
159*     .. External Functions ..
160      INTEGER            ILCM, NUMROC
161      REAL               PSLAMCH
162      EXTERNAL           ILCM, NUMROC, PSLAMCH
163*     ..
164*     .. External Subroutines ..
165      EXTERNAL           BLACS_GRIDINFO, IGAMX2D, INFOG1L, INFOG2L,
166     $                   CGERV2D, CGESD2D
167*     ..
168*     .. Intrinsic Functions ..
169      INTRINSIC          ABS, REAL, AIMAG, MAX, MOD
170*     ..
171*     .. Statement Functions ..
172      REAL               CABS1
173*     ..
174*     .. Statement Function definitions ..
175      CABS1( CDUM ) = ABS( REAL( CDUM ) ) + ABS( AIMAG( CDUM ) )
176*     ..
177*     .. Executable Statements ..
178*
179      HBL = DESCA( MB_ )
180      CONTXT = DESCA( CTXT_ )
181      LDA = DESCA( LLD_ )
182      ULP = PSLAMCH( CONTXT, 'PRECISION' )
183      CALL BLACS_GRIDINFO( CONTXT, NPROW, NPCOL, MYROW, MYCOL )
184      LEFT = MOD( MYCOL+NPCOL-1, NPCOL )
185      RIGHT = MOD( MYCOL+1, NPCOL )
186      UP = MOD( MYROW+NPROW-1, NPROW )
187      DOWN = MOD( MYROW+1, NPROW )
188      NUM = NPROW*NPCOL
189*
190*     BUFFER1 STARTS AT BUF(ISTR1+1) AND WILL CONTAINS IBUF1 ELEMENTS
191*     BUFFER2 STARTS AT BUF(ISTR2+1) AND WILL CONTAINS IBUF2 ELEMENTS
192*
193      ISTR1 = 0
194      ISTR2 = ( ( I-L ) / HBL )
195      IF( ISTR2*HBL.LT.( I-L ) )
196     $   ISTR2 = ISTR2 + 1
197      II = ISTR2 / ILCM( NPROW, NPCOL )
198      IF( II*ILCM( NPROW, NPCOL ).LT.ISTR2 ) THEN
199         ISTR2 = II + 1
200      ELSE
201         ISTR2 = II
202      END IF
203      IF( LWORK.LT.2*ISTR2 ) THEN
204*
205*        Error!
206*
207         RETURN
208      END IF
209      CALL INFOG2L( I, I, DESCA, NPROW, NPCOL, MYROW, MYCOL, IROW1,
210     $              ICOL1, II, JJ )
211      MODKM1 = MOD( I-1+HBL, HBL )
212*
213*     COPY OUR RELEVANT PIECES OF TRIADIAGONAL THAT WE OWE INTO
214*     2 BUFFERS TO SEND TO WHOMEVER OWNS H(K,K) AS K MOVES DIAGONALLY
215*     UP THE TRIDIAGONAL
216*
217      IBUF1 = 0
218      IBUF2 = 0
219      IRCV1 = 0
220      IRCV2 = 0
221      DO 10 K = I, L + 1, -1
222         IF( ( MODKM1.EQ.0 ) .AND. ( DOWN.EQ.II ) .AND.
223     $       ( RIGHT.EQ.JJ ) ) THEN
224*
225*           WE MUST PACK H(K-1,K-1) AND SEND IT DIAGONAL DOWN
226*
227            IF( ( DOWN.NE.MYROW ) .OR. ( RIGHT.NE.MYCOL ) ) THEN
228               CALL INFOG2L( K-1, K-1, DESCA, NPROW, NPCOL, MYROW,
229     $                       MYCOL, IROW1, ICOL1, ISRC, JSRC )
230               IBUF1 = IBUF1 + 1
231               BUF( ISTR1+IBUF1 ) = A( ( ICOL1-1 )*LDA+IROW1 )
232            END IF
233         END IF
234         IF( ( MODKM1.EQ.0 ) .AND. ( MYROW.EQ.II ) .AND.
235     $       ( RIGHT.EQ.JJ ) ) THEN
236*
237*           WE MUST PACK H(K  ,K-1) AND SEND IT RIGHT
238*
239            IF( NPCOL.GT.1 ) THEN
240               CALL INFOG2L( K, K-1, DESCA, NPROW, NPCOL, MYROW, MYCOL,
241     $                       IROW1, ICOL1, ISRC, JSRC )
242               IBUF2 = IBUF2 + 1
243               BUF( ISTR2+IBUF2 ) = A( ( ICOL1-1 )*LDA+IROW1 )
244            END IF
245         END IF
246*
247*        ADD UP THE RECEIVES
248*
249         IF( ( MYROW.EQ.II ) .AND. ( MYCOL.EQ.JJ ) ) THEN
250            IF( ( MODKM1.EQ.0 ) .AND. ( ( NPROW.GT.1 ) .OR. ( NPCOL.GT.
251     $          1 ) ) ) THEN
252*
253*              WE MUST RECEIVE H(K-1,K-1) FROM DIAGONAL UP
254*
255               IRCV1 = IRCV1 + 1
256            END IF
257            IF( ( MODKM1.EQ.0 ) .AND. ( NPCOL.GT.1 ) ) THEN
258*
259*              WE MUST RECEIVE H(K  ,K-1) FROM LEFT
260*
261               IRCV2 = IRCV2 + 1
262            END IF
263         END IF
264*
265*        POSSIBLY CHANGE OWNERS (OCCURS ONLY WHEN MOD(K-1,HBL) = 0)
266*
267         IF( MODKM1.EQ.0 ) THEN
268            II = II - 1
269            JJ = JJ - 1
270            IF( II.LT.0 )
271     $         II = NPROW - 1
272            IF( JJ.LT.0 )
273     $         JJ = NPCOL - 1
274         END IF
275         MODKM1 = MODKM1 - 1
276         IF( MODKM1.LT.0 )
277     $      MODKM1 = HBL - 1
278   10 CONTINUE
279*
280*     SEND DATA ON TO THE APPROPRIATE NODE IF THERE IS ANY DATA TO SEND
281*
282      IF( IBUF1.GT.0 ) THEN
283         CALL CGESD2D( CONTXT, IBUF1, 1, BUF( ISTR1+1 ), IBUF1, DOWN,
284     $                 RIGHT )
285      END IF
286      IF( IBUF2.GT.0 ) THEN
287         CALL CGESD2D( CONTXT, IBUF2, 1, BUF( ISTR2+1 ), IBUF2, MYROW,
288     $                 RIGHT )
289      END IF
290*
291*     RECEIVE APPROPRIATE DATA IF THERE IS ANY
292*
293      IF( IRCV1.GT.0 ) THEN
294         CALL CGERV2D( CONTXT, IRCV1, 1, BUF( ISTR1+1 ), IRCV1, UP,
295     $                 LEFT )
296      END IF
297      IF( IRCV2.GT.0 ) THEN
298         CALL CGERV2D( CONTXT, IRCV2, 1, BUF( ISTR2+1 ), IRCV2, MYROW,
299     $                 LEFT )
300      END IF
301*
302*     START MAIN LOOP
303*
304      IBUF1 = 0
305      IBUF2 = 0
306      CALL INFOG2L( I, I, DESCA, NPROW, NPCOL, MYROW, MYCOL, IROW1,
307     $              ICOL1, II, JJ )
308      MODKM1 = MOD( I-1+HBL, HBL )
309*
310*        LOOK FOR A SINGLE SMALL SUBDIAGONAL ELEMENT.
311*
312*        Start loop for subdiagonal search
313*
314      DO 40 K = I, L + 1, -1
315         IF( ( MYROW.EQ.II ) .AND. ( MYCOL.EQ.JJ ) ) THEN
316            IF( MODKM1.EQ.0 ) THEN
317*
318*                 Grab information from WORK array
319*
320               IF( NUM.GT.1 ) THEN
321                  IBUF1 = IBUF1 + 1
322                  H11 = BUF( ISTR1+IBUF1 )
323               ELSE
324                  H11 = A( ( ICOL1-2 )*LDA+IROW1-1 )
325               END IF
326               IF( NPCOL.GT.1 ) THEN
327                  IBUF2 = IBUF2 + 1
328                  H10 = BUF( ISTR2+IBUF2 )
329               ELSE
330                  H10 = A( ( ICOL1-2 )*LDA+IROW1 )
331               END IF
332            ELSE
333*
334*                 Information is local
335*
336               H11 = A( ( ICOL1-2 )*LDA+IROW1-1 )
337               H10 = A( ( ICOL1-2 )*LDA+IROW1 )
338            END IF
339            H22 = A( ( ICOL1-1 )*LDA+IROW1 )
340            TST1 = CABS1( H11 ) + CABS1( H22 )
341            IF( TST1.EQ.ZERO ) THEN
342*
343*                 FIND SOME NORM OF THE LOCAL H(L:I,L:I)
344*
345               CALL INFOG1L( L, HBL, NPROW, MYROW, 0, ITMP1, III )
346               IROW2 = NUMROC( I, HBL, MYROW, 0, NPROW )
347               CALL INFOG1L( L, HBL, NPCOL, MYCOL, 0, ITMP2, III )
348               ICOL2 = NUMROC( I, HBL, MYCOL, 0, NPCOL )
349               DO 30 III = ITMP1, IROW2
350                  DO 20 JJJ = ITMP2, ICOL2
351                     TST1 = TST1 + CABS1( A( ( JJJ-1 )*LDA+III ) )
352   20             CONTINUE
353   30          CONTINUE
354            END IF
355            IF( CABS1( H10 ).LE.MAX( ULP*TST1, SMLNUM ) )
356     $         GO TO 50
357            IROW1 = IROW1 - 1
358            ICOL1 = ICOL1 - 1
359         END IF
360         MODKM1 = MODKM1 - 1
361         IF( MODKM1.LT.0 )
362     $      MODKM1 = HBL - 1
363         IF( ( MODKM1.EQ.HBL-1 ) .AND. ( K.GT.2 ) ) THEN
364            II = MOD( II+NPROW-1, NPROW )
365            JJ = MOD( JJ+NPCOL-1, NPCOL )
366            CALL INFOG2L( K-1, K-1, DESCA, NPROW, NPCOL, MYROW, MYCOL,
367     $                    IROW1, ICOL1, ITMP1, ITMP2 )
368         END IF
369   40 CONTINUE
370   50 CONTINUE
371      CALL IGAMX2D( CONTXT, 'ALL', ' ', 1, 1, K, 1, ITMP1, ITMP2, -1,
372     $              -1, -1 )
373      RETURN
374*
375*     End of PCLASMSUB
376*
377      END
378