1      SUBROUTINE PSLAQR5( WANTT, WANTZ, KACC22, N, KTOP, KBOT, NSHFTS,
2     $                    SR, SI, H, DESCH, ILOZ, IHIZ, Z, DESCZ, WORK,
3     $                    LWORK, IWORK, LIWORK )
4*
5*     Contribution from the Department of Computing Science and HPC2N,
6*     Umea University, Sweden
7*
8*  -- ScaLAPACK auxiliary routine (version 2.0.2) --
9*     Univ. of Tennessee, Univ. of California Berkeley, Univ. of Colorado Denver
10*     May 1 2012
11*
12      IMPLICIT NONE
13*
14*     .. Scalar Arguments ..
15      INTEGER            IHIZ, ILOZ, KACC22, KBOT, KTOP, N, NSHFTS,
16     $                   LWORK, LIWORK
17      LOGICAL            WANTT, WANTZ
18*     ..
19*     .. Array Arguments ..
20      INTEGER            DESCH( * ), DESCZ( * ), IWORK( * )
21      REAL               H( * ), SI( * ), SR( * ), Z( * ), WORK( * )
22*     ..
23*
24*  Purpose
25*  =======
26*
27*  This auxiliary subroutine called by PSLAQR0 performs a
28*  single small-bulge multi-shift QR sweep by chasing separated
29*  groups of bulges along the main block diagonal of H.
30*
31*   WANTT  (global input) logical scalar
32*          WANTT = .TRUE. if the quasi-triangular Schur factor
33*          is being computed.  WANTT is set to .FALSE. otherwise.
34*
35*   WANTZ  (global input) logical scalar
36*          WANTZ = .TRUE. if the orthogonal Schur factor is being
37*          computed.  WANTZ is set to .FALSE. otherwise.
38*
39*   KACC22 (global input) integer with value 0, 1, or 2.
40*          Specifies the computation mode of far-from-diagonal
41*          orthogonal updates.
42*     = 1: PSLAQR5 accumulates reflections and uses matrix-matrix
43*          multiply to update the far-from-diagonal matrix entries.
44*     = 2: PSLAQR5 accumulates reflections, uses matrix-matrix
45*          multiply to update the far-from-diagonal matrix entries,
46*          and takes advantage of 2-by-2 block structure during
47*          matrix multiplies.
48*
49*   N      (global input) integer scalar
50*          N is the order of the Hessenberg matrix H upon which this
51*          subroutine operates.
52*
53*   KTOP   (global input) integer scalar
54*   KBOT   (global input) integer scalar
55*          These are the first and last rows and columns of an
56*          isolated diagonal block upon which the QR sweep is to be
57*          applied. It is assumed without a check that
58*                    either KTOP = 1  or   H(KTOP,KTOP-1) = 0
59*          and
60*                    either KBOT = N  or   H(KBOT+1,KBOT) = 0.
61*
62*   NSHFTS (global input) integer scalar
63*          NSHFTS gives the number of simultaneous shifts.  NSHFTS
64*          must be positive and even.
65*
66*   SR     (global input) REAL             array of size (NSHFTS)
67*   SI     (global input) REAL             array of size (NSHFTS)
68*          SR contains the real parts and SI contains the imaginary
69*          parts of the NSHFTS shifts of origin that define the
70*          multi-shift QR sweep.
71*
72*   H      (local input/output) REAL             array of size
73*          (DESCH(LLD_),*)
74*          On input H contains a Hessenberg matrix.  On output a
75*          multi-shift QR sweep with shifts SR(J)+i*SI(J) is applied
76*          to the isolated diagonal block in rows and columns KTOP
77*          through KBOT.
78*
79*   DESCH  (global and local input) INTEGER array of dimension DLEN_.
80*          The array descriptor for the distributed matrix H.
81*
82*   ILOZ   (global input) INTEGER
83*   IHIZ   (global input) INTEGER
84*          Specify the rows of Z to which transformations must be
85*          applied if WANTZ is .TRUE.. 1 .LE. ILOZ .LE. IHIZ .LE. N
86*
87*   Z      (local input/output) REAL             array of size
88*          (DESCZ(LLD_),*)
89*          If WANTZ = .TRUE., then the QR Sweep orthogonal
90*          similarity transformation is accumulated into
91*          Z(ILOZ:IHIZ,ILO:IHI) from the right.
92*          If WANTZ = .FALSE., then Z is unreferenced.
93*
94*   DESCZ  (global and local input) INTEGER array of dimension DLEN_.
95*          The array descriptor for the distributed matrix Z.
96*
97*   WORK   (local workspace) REAL             array, dimension(DWORK)
98*
99*   LWORK  (local input) INTEGER
100*          The length of the workspace array WORK.
101*
102*   IWORK  (local workspace) INTEGER array, dimension (LIWORK)
103*
104*   LIWORK (local input) INTEGER
105*          The length of the workspace array IWORK.
106*
107*     ================================================================
108*     Based on contributions by
109*        Robert Granat, Department of Computing Science and HPC2N,
110*        University of Umea, Sweden.
111*
112*     ============================================================
113*     References:
114*       K. Braman, R. Byers, and R. Mathias,
115*       The Multi-Shift QR Algorithm Part I: Maintaining Well Focused
116*       Shifts, and Level 3 Performance.
117*       SIAM J. Matrix Anal. Appl., 23(4):929--947, 2002.
118*
119*       R. Granat, B. Kagstrom, and D. Kressner,
120*       A Novel Parallel QR Algorithm for Hybrid Distributed Momory HPC
121*       Systems.
122*       SIAM J. Sci. Comput., 32(4):2345--2378, 2010.
123*
124*     ============================================================
125*     .. Parameters ..
126      INTEGER            BLOCK_CYCLIC_2D, CSRC_, CTXT_, DLEN_, DTYPE_,
127     $                   LLD_, MB_, M_, NB_, N_, RSRC_
128      PARAMETER          ( BLOCK_CYCLIC_2D = 1, DLEN_ = 9, DTYPE_ = 1,
129     $                     CTXT_ = 2, M_ = 3, N_ = 4, MB_ = 5, NB_ = 6,
130     $                     RSRC_ = 7, CSRC_ = 8, LLD_ = 9 )
131      REAL               ZERO, ONE
132      PARAMETER          ( ZERO = 0.0e0, ONE = 1.0e0 )
133      INTEGER            NTINY
134      PARAMETER          ( NTINY = 11 )
135*     ..
136*     .. Local Scalars ..
137      REAL               ALPHA, BETA, H11, H12, H21, H22, REFSUM,
138     $                   SAFMAX, SAFMIN, SCL, SMLNUM, SWAP, TST1, TST2,
139     $                   ULP, TAU, ELEM, STAMP, DDUM, ORTH
140      INTEGER            I, I2, I4, INCOL, J, J2, J4, JBOT, JCOL, JLEN,
141     $                   JROW, JTOP, K, K1, KDU, KMS, KNZ, KRCOL, KZS,
142     $                   M, M22, MBOT, MEND, MSTART, MTOP, NBMPS, NDCOL,
143     $                   NS, NU, LLDH, LLDZ, LLDU, LLDV, LLDW, LLDWH,
144     $                   INFO, ICTXT, NPROW, NPCOL, NB, IROFFH, ITOP,
145     $                   NWIN, MYROW, MYCOL, LNS, NUMWIN, LKACC22,
146     $                   LCHAIN, WIN, IDONEJOB, IPNEXT, ANMWIN, LENRBUF,
147     $                   LENCBUF, ICHOFF, LRSRC, LCSRC, LKTOP, LKBOT,
148     $                   II, JJ, SWIN, EWIN, LNWIN, DIM, LLKTOP, LLKBOT,
149     $                   IPV, IPU, IPH, IPW, KU, KWH, KWV, NVE, LKS,
150     $                   IDUM, NHO, DIR, WINID, INDX, ILOC, JLOC, RSRC1,
151     $                   CSRC1, RSRC2, CSRC2, RSRC3, CSRC3, RSRC4, IPUU,
152     $                   CSRC4, LROWS, LCOLS, INDXS, KS, JLOC1, ILOC1,
153     $                   LKTOP1, LKTOP2, WCHUNK, NUMCHUNK, ODDEVEN,
154     $                   CHUNKNUM, DIM1, DIM4, IPW3, HROWS, ZROWS,
155     $                   HCOLS, IPW1, IPW2, RSRC, EAST, JLOC4, ILOC4,
156     $                   WEST, CSRC, SOUTH, NORHT, INDXE, NORTH,
157     $                   IHH, IPIW, LKBOT1, NPROCS, LIROFFH,
158     $                   WINFIN, RWS3, CLS3, INDX2, HROWS2,
159     $                   ZROWS2, HCOLS2, MNRBUF,
160     $                   MXRBUF, MNCBUF, MXCBUF, LWKOPT
161      LOGICAL            BLK22, BMP22, INTRO, DONEJOB, ODDNPROW,
162     $                   ODDNPCOL, LQUERY, BCDONE
163      CHARACTER          JBCMPZ*2, JOB
164*     ..
165*     .. External Functions ..
166      LOGICAL            LSAME
167      INTEGER            PILAENVX, ICEIL, INDXG2P, INDXG2L, NUMROC
168      REAL               SLAMCH, SLANGE
169      EXTERNAL           SLAMCH, PILAENVX, ICEIL, INDXG2P, INDXG2L,
170     $                   NUMROC, LSAME, SLANGE
171*     ..
172*     .. Intrinsic Functions ..
173      INTRINSIC          ABS, FLOAT, MAX, MIN, MOD
174*     ..
175*     .. Local Arrays ..
176      REAL               VT( 3 )
177*     ..
178*     .. External Subroutines ..
179      EXTERNAL           SGEMM, SLABAD, SLAMOV, SLAQR1, SLARFG, SLASET,
180     $                   STRMM, SLAQR6
181*     ..
182*     .. Executable Statements ..
183*
184      INFO = 0
185      ICTXT = DESCH( CTXT_ )
186      CALL BLACS_GRIDINFO( ICTXT, NPROW, NPCOL, MYROW, MYCOL )
187      NPROCS = NPROW*NPCOL
188      LLDH = DESCH( LLD_ )
189      LLDZ = DESCZ( LLD_ )
190      NB = DESCH( MB_ )
191      IROFFH = MOD( KTOP - 1, NB )
192      LQUERY = LWORK.EQ.-1 .OR. LIWORK.EQ.-1
193*
194*     If there are no shifts, then there is nothing to do.
195*
196      IF( .NOT. LQUERY .AND. NSHFTS.LT.2 )
197     $   RETURN
198*
199*     If the active block is empty or 1-by-1, then there
200*     is nothing to do.
201*
202      IF( .NOT. LQUERY .AND. KTOP.GE.KBOT )
203     $   RETURN
204*
205*     Shuffle shifts into pairs of real shifts and pairs of
206*     complex conjugate shifts assuming complex conjugate
207*     shifts are already adjacent to one another.
208*
209      IF( .NOT. LQUERY ) THEN
210         DO 10 I = 1, NSHFTS - 2, 2
211            IF( SI( I ).NE.-SI( I+1 ) ) THEN
212*
213               SWAP = SR( I )
214               SR( I ) = SR( I+1 )
215               SR( I+1 ) = SR( I+2 )
216               SR( I+2 ) = SWAP
217*
218               SWAP = SI( I )
219               SI( I ) = SI( I+1 )
220               SI( I+1 ) = SI( I+2 )
221               SI( I+2 ) = SWAP
222            END IF
223   10    CONTINUE
224      END IF
225*
226*     NSHFTS is supposed to be even, but if is odd,
227*     then simply reduce it by one.  The shuffle above
228*     ensures that the dropped shift is real and that
229*     the remaining shifts are paired.
230*
231      NS = NSHFTS - MOD( NSHFTS, 2 )
232*
233*     Extract the size of the computational window.
234*
235      NWIN = PILAENVX( ICTXT, 19, 'PSLAQR5', JBCMPZ, N, NB, NB, NB )
236      NWIN = MIN( NWIN, KBOT-KTOP+1 )
237*
238*     Adjust number of simultaneous shifts if it exceeds the limit
239*     set by the number of diagonal blocks in the active submatrix
240*     H(KTOP:KBOT,KTOP:KBOT).
241*
242      NS = MAX( 2, MIN( NS, ICEIL( KBOT-KTOP+1, NB )*NWIN/3 ) )
243      NS = NS - MOD( NS, 2 )
244
245*
246*     Decide the number of simultaneous computational windows
247*     from the number of shifts - each window should contain up to
248*     (NWIN / 3) shifts. Also compute the number of shifts per
249*     window and make sure that number is even.
250*
251      LNS = MIN( MAX( 2, NWIN / 3 ), MAX( 2, NS / MIN(NPROW,NPCOL) ) )
252      LNS = LNS - MOD( LNS, 2 )
253      NUMWIN = MAX( 1, MIN( ICEIL( NS, LNS ),
254     $     ICEIL( KBOT-KTOP+1, NB ) - 1 ) )
255      IF( NPROW.NE.NPCOL ) THEN
256         NUMWIN = MIN( NUMWIN, MIN(NPROW,NPCOL) )
257         LNS = MIN( LNS, MAX( 2, NS / MIN(NPROW,NPCOL) ) )
258         LNS = LNS - MOD( LNS, 2 )
259      END IF
260*
261*     Machine constants for deflation.
262*
263      SAFMIN = SLAMCH( 'SAFE MINIMUM' )
264      SAFMAX = ONE / SAFMIN
265      CALL SLABAD( SAFMIN, SAFMAX )
266      ULP = SLAMCH( 'PRECISION' )
267      SMLNUM = SAFMIN*( FLOAT( N ) / ULP )
268*
269*     Use accumulated reflections to update far-from-diagonal
270*     entries on a local level?
271*
272      IF( LNS.LT.14 ) THEN
273         LKACC22 = 1
274      ELSE
275         LKACC22 = 2
276      END IF
277*
278*     If so, exploit the 2-by-2 block structure?
279*     ( Usually it is not efficient to exploit the 2-by-2 structure
280*       because the block size is too small. )
281*
282      BLK22 = ( LNS.GT.2 ) .AND. ( KACC22.EQ.2 )
283*
284*     Clear trash.
285*
286      IF( .NOT. LQUERY .AND. KTOP+2.LE.KBOT )
287     $   CALL PSELSET( H, KTOP+2, KTOP, DESCH, ZERO )
288*
289*     NBMPS = number of 2-shift bulges in each chain
290*
291      NBMPS = LNS / 2
292*
293*     KDU = width of slab
294*
295      KDU = 6*NBMPS - 3
296*
297*     LCHAIN = length of each chain
298*
299      LCHAIN = 3 * NBMPS + 1
300*
301*     Check if workspace query.
302*
303      IF( LQUERY ) THEN
304         HROWS = NUMROC( N, NB, MYROW, DESCH(RSRC_), NPROW )
305         HCOLS = NUMROC( N, NB, MYCOL, DESCH(CSRC_), NPCOL )
306         LWKOPT = (5+2*NUMWIN)*NB**2 + 2*HROWS*NB + HCOLS*NB +
307     $        MAX( HROWS*NB, HCOLS*NB )
308         WORK(1)  = FLOAT(LWKOPT)
309         IWORK(1) = 5*NUMWIN
310         RETURN
311      END IF
312*
313*     Check if KTOP and KBOT are valid.
314*
315      IF( KTOP.LT.1 .OR. KBOT.GT.N ) RETURN
316*
317*     Create and chase NUMWIN chains of NBMPS bulges.
318*
319*     Set up window introduction.
320*
321      ANMWIN = 0
322      INTRO = .TRUE.
323      IPIW = 1
324*
325*     Main loop:
326*     While-loop over the computational windows which is
327*     terminated when all windows have been introduced,
328*     chased down to the bottom of the considered submatrix
329*     and chased off.
330*
331 20   CONTINUE
332*
333*     Set up next window as long as we have less than the prescribed
334*     number of windows. Each window is described an integer quadruple:
335*     1. Local value of KTOP (below denoted by LKTOP)
336*     2. Local value of KBOT (below denoted by LKBOT)
337*     3-4. Processor indices (LRSRC,LCSRC) associated with the window.
338*     (5. Mark that decides if a window is fully processed or not)
339*
340*     Notice - the next window is only introduced if the first block
341*     in the active submatrix does not contain any other windows.
342*
343      IF( ANMWIN.GT.0 ) THEN
344         LKTOP = IWORK( 1+(ANMWIN-1)*5 )
345      ELSE
346         LKTOP = KTOP
347      END IF
348      IF( INTRO .AND. (ANMWIN.EQ.0 .OR. LKTOP.GT.ICEIL(KTOP,NB)*NB) )
349     $     THEN
350         ANMWIN = ANMWIN + 1
351*
352*        Structure of IWORK:
353*        IWORK( 1+(WIN-1)*5 ): start position
354*        IWORK( 2+(WIN-1)*5 ): stop position
355*        IWORK( 3+(WIN-1)*5 ): processor row id
356*        IWORK( 4+(WIN-1)*5 ): processor col id
357*        IWORK( 5+(WIN-1)*5 ): window status (0, 1, or 2)
358*
359         IWORK( 1+(ANMWIN-1)*5 ) = KTOP
360         IWORK( 2+(ANMWIN-1)*5 ) = KTOP +
361     $                             MIN( NWIN,NB-IROFFH,KBOT-KTOP+1 ) - 1
362         IWORK( 3+(ANMWIN-1)*5 ) = INDXG2P( IWORK(1+(ANMWIN-1)*5), NB,
363     $                             MYROW, DESCH(RSRC_), NPROW )
364         IWORK( 4+(ANMWIN-1)*5 ) = INDXG2P( IWORK(2+(ANMWIN-1)*5), NB,
365     $                             MYCOL, DESCH(CSRC_), NPCOL )
366         IWORK( 5+(ANMWIN-1)*5 ) = 0
367         IPIW = 6+(ANMWIN-1)*5
368         IF( ANMWIN.EQ.NUMWIN ) INTRO = .FALSE.
369      END IF
370*
371*     Do-loop over the number of windows.
372*
373      IPNEXT = 1
374      DONEJOB = .FALSE.
375      IDONEJOB = 0
376      LENRBUF = 0
377      LENCBUF = 0
378      ICHOFF = 0
379      DO 40 WIN = 1, ANMWIN
380*
381*        Extract window information to simplify the rest.
382*
383         LRSRC = IWORK( 3+(WIN-1)*5 )
384         LCSRC = IWORK( 4+(WIN-1)*5 )
385         LKTOP = IWORK( 1+(WIN-1)*5 )
386         LKBOT = IWORK( 2+(WIN-1)*5 )
387         LNWIN = LKBOT - LKTOP + 1
388*
389*        Check if anything to do for current window, i.e., if the local
390*        chain of bulges has reached the next block border etc.
391*
392         IF( IWORK(5+(WIN-1)*5).LT.2 .AND. LNWIN.GT.1 .AND.
393     $        (LNWIN.GT.LCHAIN .OR. LKBOT.EQ.KBOT ) ) THEN
394            LIROFFH = MOD(LKTOP-1,NB)
395            SWIN = LKTOP-LIROFFH
396            EWIN = MIN(KBOT,LKTOP-LIROFFH+NB-1)
397            DIM = EWIN-SWIN+1
398            IF( DIM.LE.NTINY .AND. .NOT.LKBOT.EQ.KBOT ) THEN
399               IWORK( 5+(WIN-1)*5 ) = 2
400               GO TO 45
401            END IF
402            IDONEJOB = 1
403            IF( IWORK(5+(WIN-1)*5).EQ.0 ) THEN
404               IWORK(5+(WIN-1)*5) = 1
405            END IF
406*
407*           Let the process that owns the corresponding window do the
408*           local bulge chase.
409*
410            IF( MYROW.EQ.LRSRC .AND. MYCOL.EQ.LCSRC ) THEN
411*
412*              Set the kind of job to do in SLAQR6:
413*              1. JOB = 'I': Introduce and chase bulges in window WIN
414*              2. JOB = 'C': Chase bulges from top to bottom of window WIN
415*              3. JOB = 'O': Chase bulges off window WIN
416*              4. JOB = 'A': All of 1-3 above is done - this will for
417*                            example happen for very small active
418*                            submatrices (like 2-by-2)
419*
420               LLKBOT = LLKTOP + LNWIN - 1
421               IF( LKTOP.EQ.KTOP .AND. LKBOT.EQ.KBOT ) THEN
422                  JOB = 'All steps'
423                  ICHOFF = 1
424               ELSEIF( LKTOP.EQ.KTOP ) THEN
425                  JOB = 'Introduce and chase'
426               ELSEIF( LKBOT.EQ.KBOT ) THEN
427                  JOB = 'Off-chase bulges'
428                  ICHOFF = 1
429               ELSE
430                  JOB = 'Chase bulges'
431               END IF
432*
433*              Copy submatrix of H corresponding to window WIN into
434*              workspace and set out additional workspace for storing
435*              orthogonal transformations. This submatrix must be at
436*              least (NTINY+1)-by-(NTINY+1) to fit into SLAQR6 - if not,
437*              abort and go for cross border bulge chasing with this
438*              particular window.
439*
440               II = INDXG2L( SWIN, NB, MYROW, DESCH(RSRC_), NPROW )
441               JJ = INDXG2L( SWIN, NB, MYCOL, DESCH(CSRC_), NPCOL )
442               LLKTOP = 1 + LIROFFH
443               LLKBOT = LLKTOP + LNWIN - 1
444*
445               IPU = IPNEXT
446               IPH = IPU + LNWIN**2
447               IPUU = IPH + MAX(NTINY+1,DIM)**2
448               IPV = IPUU + MAX(NTINY+1,DIM)**2
449               IPNEXT = IPH
450*
451               IF( LSAME( JOB, 'A' ) .OR. LSAME( JOB, 'O' ) .AND.
452     $              DIM.LT.NTINY+1 ) THEN
453                  CALL SLASET( 'All', NTINY+1, NTINY+1, ZERO, ONE,
454     $                 WORK(IPH), NTINY+1 )
455               END IF
456               CALL SLAMOV( 'Upper', DIM, DIM, H(II+(JJ-1)*LLDH), LLDH,
457     $              WORK(IPH), MAX(NTINY+1,DIM) )
458               CALL SCOPY(  DIM-1, H(II+(JJ-1)*LLDH+1), LLDH+1,
459     $              WORK(IPH+1), MAX(NTINY+1,DIM)+1 )
460               IF( LSAME( JOB, 'C' ) .OR. LSAME( JOB, 'O') ) THEN
461                  CALL SCOPY(  DIM-2, H(II+(JJ-1)*LLDH+2), LLDH+1,
462     $                 WORK(IPH+2), MAX(NTINY+1,DIM)+1 )
463                  CALL SCOPY(  DIM-3, H(II+(JJ-1)*LLDH+3), LLDH+1,
464     $                 WORK(IPH+3), MAX(NTINY+1,DIM)+1 )
465                  CALL SLASET( 'Lower', DIM-4, DIM-4, ZERO,
466     $                 ZERO, WORK(IPH+4), MAX(NTINY+1,DIM) )
467               ELSE
468                  CALL SLASET( 'Lower', DIM-2, DIM-2, ZERO,
469     $                 ZERO, WORK(IPH+2), MAX(NTINY+1,DIM) )
470               END IF
471*
472               KU = MAX(NTINY+1,DIM) - KDU + 1
473               KWH = KDU + 1
474               NHO = ( MAX(NTINY+1,DIM)-KDU+1-4 ) - ( KDU+1 ) + 1
475               KWV = KDU + 4
476               NVE = MAX(NTINY+1,DIM) - KDU - KWV + 1
477               CALL SLASET( 'All', MAX(NTINY+1,DIM),
478     $              MAX(NTINY+1,DIM), ZERO, ONE, WORK(IPUU),
479     $              MAX(NTINY+1,DIM) )
480*
481*              Small-bulge multi-shift QR sweep.
482*
483               LKS = MAX( 1, NS - WIN*LNS + 1 )
484               CALL SLAQR6( JOB, WANTT, .TRUE., LKACC22,
485     $              MAX(NTINY+1,DIM), LLKTOP, LLKBOT, LNS, SR( LKS ),
486     $              SI( LKS ), WORK(IPH), MAX(NTINY+1,DIM), LLKTOP,
487     $              LLKBOT, WORK(IPUU), MAX(NTINY+1,DIM), WORK(IPU),
488     $              3, WORK( IPH+KU-1 ),
489     $              MAX(NTINY+1,DIM), NVE, WORK( IPH+KWV-1 ),
490     $              MAX(NTINY+1,DIM), NHO, WORK( IPH-1+KU+(KWH-1)*
491     $              MAX(NTINY+1,DIM) ), MAX(NTINY+1,DIM) )
492*
493*              Copy submatrix of H back.
494*
495               CALL SLAMOV( 'Upper', DIM, DIM, WORK(IPH),
496     $              MAX(NTINY+1,DIM), H(II+(JJ-1)*LLDH), LLDH )
497               CALL SCOPY( DIM-1, WORK(IPH+1), MAX(NTINY+1,DIM)+1,
498     $              H(II+(JJ-1)*LLDH+1), LLDH+1 )
499               IF( LSAME( JOB, 'I' ) .OR. LSAME( JOB, 'C' ) ) THEN
500                  CALL SCOPY( DIM-2, WORK(IPH+2), DIM+1,
501     $                 H(II+(JJ-1)*LLDH+2), LLDH+1 )
502                  CALL SCOPY( DIM-3, WORK(IPH+3), DIM+1,
503     $                 H(II+(JJ-1)*LLDH+3), LLDH+1 )
504               ELSE
505                  CALL SLASET( 'Lower', DIM-2, DIM-2, ZERO,
506     $                 ZERO, H(II+(JJ-1)*LLDH+2), LLDH )
507               END IF
508*
509*              Copy actual submatrix of U to the correct place
510*              of the buffer.
511*
512               CALL SLAMOV( 'All', LNWIN, LNWIN,
513     $              WORK(IPUU+(MAX(NTINY+1,DIM)*LIROFFH)+LIROFFH),
514     $              MAX(NTINY+1,DIM), WORK(IPU), LNWIN )
515            END IF
516*
517*           In case the local submatrix was smaller than
518*           (NTINY+1)-by-(NTINY+1) we go here and proceed.
519*
520 45         CONTINUE
521         ELSE
522            IWORK( 5+(WIN-1)*5 ) = 2
523         END IF
524*
525*        Increment counter for buffers of orthogonal transformations.
526*
527         IF( MYROW.EQ.LRSRC .OR. MYCOL.EQ.LCSRC ) THEN
528            IF( IDONEJOB.EQ.1 .AND. IWORK(5+(WIN-1)*5).LT.2 ) THEN
529               IF( MYROW.EQ.LRSRC ) LENRBUF = LENRBUF + LNWIN*LNWIN
530               IF( MYCOL.EQ.LCSRC ) LENCBUF = LENCBUF + LNWIN*LNWIN
531            END IF
532         END IF
533 40   CONTINUE
534*
535*     Did some work in the above do-loop?
536*
537      CALL IGSUM2D( ICTXT, 'All', '1-Tree', 1, 1, IDONEJOB, 1, -1, -1 )
538      DONEJOB = IDONEJOB.GT.0
539*
540*     Chased off bulges from first window?
541*
542      IF( NPROCS.GT.1 )
543     $   CALL IGAMX2D( ICTXT, 'All', '1-Tree', 1, 1, ICHOFF, 1, -1,
544     $        -1, -1, -1, -1 )
545*
546*     If work was done in the do-loop over local windows, perform
547*     updates, otherwise go for cross border bulge chasing and updates.
548*
549      IF( DONEJOB ) THEN
550*
551*        Broadcast orthogonal transformations.
552*
553 49      CONTINUE
554         IF( LENRBUF.GT.0 .OR. LENCBUF.GT.0 ) THEN
555            DO 50 DIR = 1, 2
556               BCDONE = .FALSE.
557               DO 60 WIN = 1, ANMWIN
558                  IF( ( LENRBUF.EQ.0 .AND. LENCBUF.EQ.0 ) .OR.
559     $                 BCDONE ) GO TO 62
560                  LRSRC = IWORK( 3+(WIN-1)*5 )
561                  LCSRC = IWORK( 4+(WIN-1)*5 )
562                  IF( MYROW.EQ.LRSRC .AND. MYCOL.EQ.LCSRC ) THEN
563                     IF( DIR.EQ.1 .AND. LENRBUF.GT.0 .AND.
564     $                    NPCOL.GT.1 ) THEN
565                        CALL SGEBS2D( ICTXT, 'Row', '1-Tree', LENRBUF,
566     $                       1, WORK, LENRBUF )
567                     ELSEIF( DIR.EQ.2 .AND. LENCBUF.GT.0 .AND.
568     $                    NPROW.GT.1 ) THEN
569                        CALL SGEBS2D( ICTXT, 'Col', '1-Tree', LENCBUF,
570     $                       1, WORK, LENCBUF )
571                     END IF
572                     IF( LENRBUF.GT.0 )
573     $                  CALL SLAMOV( 'All', LENRBUF, 1, WORK, LENRBUF,
574     $                       WORK(1+LENRBUF), LENCBUF )
575                     BCDONE = .TRUE.
576                  ELSEIF( MYROW.EQ.LRSRC .AND. DIR.EQ.1 ) THEN
577                     IF( LENRBUF.GT.0 .AND. NPCOL.GT.1 ) THEN
578                        CALL SGEBR2D( ICTXT, 'Row', '1-Tree', LENRBUF,
579     $                       1, WORK, LENRBUF, LRSRC, LCSRC )
580                        BCDONE = .TRUE.
581                     END IF
582                  ELSEIF( MYCOL.EQ.LCSRC .AND. DIR.EQ.2 ) THEN
583                     IF( LENCBUF.GT.0 .AND. NPROW.GT.1 ) THEN
584                        CALL SGEBR2D( ICTXT, 'Col', '1-Tree', LENCBUF,
585     $                       1, WORK(1+LENRBUF), LENCBUF, LRSRC, LCSRC )
586                        BCDONE = .TRUE.
587                     END IF
588                  END IF
589 62               CONTINUE
590 60            CONTINUE
591 50         CONTINUE
592         END IF
593*
594*        Compute updates - make sure to skip windows that was skipped
595*        regarding local bulge chasing.
596*
597         DO 65 DIR = 1, 2
598            WINID = 0
599            IF( DIR.EQ.1 ) THEN
600               IPNEXT = 1
601            ELSE
602               IPNEXT = 1 + LENRBUF
603            END IF
604            DO 70 WIN = 1, ANMWIN
605               IF( IWORK( 5+(WIN-1)*5 ).EQ.2 ) GO TO 75
606               LRSRC = IWORK( 3+(WIN-1)*5 )
607               LCSRC = IWORK( 4+(WIN-1)*5 )
608               LKTOP = IWORK( 1+(WIN-1)*5 )
609               LKBOT = IWORK( 2+(WIN-1)*5 )
610               LNWIN = LKBOT - LKTOP + 1
611               IF( (MYROW.EQ.LRSRC.AND.LENRBUF.GT.0.AND.DIR.EQ.1) .OR.
612     $              (MYCOL.EQ.LCSRC.AND.LENCBUF.GT.0.AND.DIR.EQ.2 ) )
613     $              THEN
614*
615*                 Set up workspaces.
616*
617                  IPU = IPNEXT
618                  IPNEXT = IPU + LNWIN*LNWIN
619                  IPW = 1 + LENRBUF + LENCBUF
620                  LIROFFH = MOD(LKTOP-1,NB)
621                  WINID = WINID + 1
622*
623*                 Recompute JOB to see if block structure of U could
624*                 possibly be exploited or not.
625*
626                  IF( LKTOP.EQ.KTOP .AND. LKBOT.EQ.KBOT ) THEN
627                     JOB = 'All steps'
628                  ELSEIF( LKTOP.EQ.KTOP ) THEN
629                     JOB = 'Introduce and chase'
630                  ELSEIF( LKBOT.EQ.KBOT ) THEN
631                     JOB = 'Off-chase bulges'
632                  ELSE
633                     JOB = 'Chase bulges'
634                  END IF
635               END IF
636*
637*              Use U to update far-from-diagonal entries in H.
638*              If required, use U to update Z as well.
639*
640               IF( .NOT. BLK22 .OR. .NOT. LSAME(JOB,'C')
641     $              .OR. LNS.LE.2 ) THEN
642*
643                  IF( DIR.EQ.2 .AND. LENCBUF.GT.0 .AND.
644     $                 MYCOL.EQ.LCSRC ) THEN
645                     IF( WANTT ) THEN
646                        DO 80 INDX = 1, LKTOP-LIROFFH-1, NB
647                           CALL INFOG2L( INDX, LKTOP, DESCH, NPROW,
648     $                          NPCOL, MYROW, MYCOL, ILOC, JLOC, RSRC1,
649     $                          CSRC1 )
650                           IF( MYROW.EQ.RSRC1.AND.MYCOL.EQ.CSRC1 ) THEN
651                              LROWS = MIN( NB, LKTOP-INDX )
652                              CALL SGEMM('No transpose', 'No transpose',
653     $                             LROWS, LNWIN, LNWIN, ONE,
654     $                             H((JLOC-1)*LLDH+ILOC), LLDH,
655     $                             WORK( IPU ), LNWIN, ZERO,
656     $                             WORK(IPW),
657     $                             LROWS )
658                              CALL SLAMOV( 'All', LROWS, LNWIN,
659     $                             WORK(IPW), LROWS,
660     $                             H((JLOC-1)*LLDH+ILOC), LLDH )
661                           END IF
662 80                     CONTINUE
663                     END IF
664                     IF( WANTZ ) THEN
665                        DO 90 INDX = 1, N, NB
666                           CALL INFOG2L( INDX, LKTOP, DESCZ, NPROW,
667     $                          NPCOL, MYROW, MYCOL, ILOC, JLOC, RSRC1,
668     $                          CSRC1 )
669                           IF( MYROW.EQ.RSRC1.AND.MYCOL.EQ.CSRC1 ) THEN
670                              LROWS = MIN(NB,N-INDX+1)
671                              CALL SGEMM( 'No transpose',
672     $                             'No transpose', LROWS, LNWIN, LNWIN,
673     $                             ONE, Z((JLOC-1)*LLDZ+ILOC), LLDZ,
674     $                             WORK( IPU ), LNWIN, ZERO,
675     $                             WORK(IPW), LROWS )
676                              CALL SLAMOV( 'All', LROWS, LNWIN,
677     $                             WORK(IPW), LROWS,
678     $                             Z((JLOC-1)*LLDZ+ILOC), LLDZ )
679                           END IF
680 90                     CONTINUE
681                     END IF
682                  END IF
683*
684*                 Update the rows of H affected by the bulge-chase.
685*
686                  IF( DIR.EQ.1 .AND. LENRBUF.GT.0 .AND.
687     $                 MYROW.EQ.LRSRC ) THEN
688                     IF( WANTT ) THEN
689                        IF( ICEIL(LKBOT,NB).EQ.ICEIL(KBOT,NB) ) THEN
690                           LCOLS = MIN(ICEIL(KBOT,NB)*NB,N) - KBOT
691                        ELSE
692                           LCOLS = 0
693                        END IF
694                        IF( LCOLS.GT.0 ) THEN
695                           INDX = KBOT + 1
696                           CALL INFOG2L( LKTOP, INDX, DESCH, NPROW,
697     $                          NPCOL, MYROW, MYCOL, ILOC, JLOC,
698     $                          RSRC1, CSRC1 )
699                           IF( MYROW.EQ.RSRC1.AND.MYCOL.EQ.CSRC1 ) THEN
700                              CALL SGEMM( 'Transpose', 'No Transpose',
701     $                             LNWIN, LCOLS, LNWIN, ONE, WORK(IPU),
702     $                             LNWIN, H((JLOC-1)*LLDH+ILOC), LLDH,
703     $                             ZERO, WORK(IPW), LNWIN )
704                              CALL SLAMOV( 'All', LNWIN, LCOLS,
705     $                             WORK(IPW), LNWIN,
706     $                             H((JLOC-1)*LLDH+ILOC), LLDH )
707                           END IF
708                        END IF
709 93                     CONTINUE
710                        INDXS = ICEIL(LKBOT,NB)*NB + 1
711                        DO 95 INDX = INDXS, N, NB
712                           CALL INFOG2L( LKTOP, INDX,
713     $                          DESCH, NPROW, NPCOL, MYROW, MYCOL,
714     $                          ILOC, JLOC, RSRC1, CSRC1 )
715                           IF( MYROW.EQ.RSRC1.AND.MYCOL.EQ.CSRC1 ) THEN
716                              LCOLS = MIN( NB, N-INDX+1 )
717                              CALL SGEMM( 'Transpose', 'No Transpose',
718     $                             LNWIN, LCOLS, LNWIN, ONE, WORK(IPU),
719     $                             LNWIN, H((JLOC-1)*LLDH+ILOC), LLDH,
720     $                             ZERO, WORK(IPW),
721     $                             LNWIN )
722                              CALL SLAMOV( 'All', LNWIN, LCOLS,
723     $                             WORK(IPW), LNWIN,
724     $                             H((JLOC-1)*LLDH+ILOC), LLDH )
725                           END IF
726 95                     CONTINUE
727                     END IF
728                  END IF
729               ELSE
730                  KS = LNWIN-LNS/2*3
731*
732*                 The LNWIN-by-LNWIN matrix U containing the accumulated
733*                 orthogonal transformations has the following structure:
734*
735*                     [ U11  U12 ]
736*                 U = [          ],
737*                     [ U21  U22 ]
738*
739*                 where U21 is KS-by-KS upper triangular and U12 is
740*                 (LNWIN-KS)-by-(LNWIN-KS) lower triangular.
741*                 Here, KS = LNS.
742*
743*                 Update the columns of H and Z affected by the bulge
744*                 chasing.
745*
746*                 Compute H2*U21 + H1*U11 in workspace.
747*
748                  IF( DIR.EQ.2 .AND. LENCBUF.GT.0 .AND.
749     $                 MYCOL.EQ.LCSRC ) THEN
750                     IF( WANTT ) THEN
751                        DO 100 INDX = 1, LKTOP-LIROFFH-1, NB
752                           CALL INFOG2L( INDX, LKTOP, DESCH, NPROW,
753     $                          NPCOL, MYROW, MYCOL, ILOC, JLOC, RSRC1,
754     $                          CSRC1 )
755                           IF( MYROW.EQ.RSRC1.AND.MYCOL.EQ.CSRC1 ) THEN
756                              JLOC1 = INDXG2L( LKTOP+LNWIN-KS, NB,
757     $                             MYCOL, DESCH( CSRC_ ), NPCOL )
758                              LROWS = MIN( NB, LKTOP-INDX )
759                              CALL SLAMOV( 'All', LROWS, KS,
760     $                             H((JLOC1-1)*LLDH+ILOC ), LLDH,
761     $                             WORK(IPW), LROWS )
762                              CALL STRMM( 'Right', 'Upper',
763     $                             'No transpose','Non-unit', LROWS,
764     $                             KS, ONE, WORK( IPU+LNWIN-KS ), LNWIN,
765     $                             WORK(IPW), LROWS )
766                              CALL SGEMM('No transpose', 'No transpose',
767     $                             LROWS, KS, LNWIN-KS, ONE,
768     $                             H((JLOC-1)*LLDH+ILOC), LLDH,
769     $                             WORK( IPU ), LNWIN, ONE, WORK(IPW),
770     $                             LROWS )
771*
772*                             Compute H1*U12 + H2*U22 in workspace.
773*
774                              CALL SLAMOV( 'All', LROWS, LNWIN-KS,
775     $                             H((JLOC-1)*LLDH+ILOC), LLDH,
776     $                             WORK( IPW+KS*LROWS ), LROWS )
777                              CALL STRMM( 'Right', 'Lower',
778     $                             'No transpose', 'Non-Unit',
779     $                             LROWS, LNWIN-KS, ONE,
780     $                             WORK( IPU+LNWIN*KS ), LNWIN,
781     $                             WORK( IPW+KS*LROWS ), LROWS )
782                              CALL SGEMM('No transpose', 'No transpose',
783     $                             LROWS, LNWIN-KS, KS, ONE,
784     $                             H((JLOC1-1)*LLDH+ILOC), LLDH,
785     $                             WORK( IPU+LNWIN*KS+LNWIN-KS ), LNWIN,
786     $                             ONE, WORK( IPW+KS*LROWS ), LROWS )
787*
788*                             Copy workspace to H.
789*
790                              CALL SLAMOV( 'All', LROWS, LNWIN,
791     $                             WORK(IPW), LROWS,
792     $                             H((JLOC-1)*LLDH+ILOC), LLDH )
793                           END IF
794 100                    CONTINUE
795                     END IF
796*
797                     IF( WANTZ ) THEN
798*
799*                       Compute Z2*U21 + Z1*U11 in workspace.
800*
801                        DO 110 INDX = 1, N, NB
802                           CALL INFOG2L( INDX, LKTOP, DESCZ, NPROW,
803     $                          NPCOL, MYROW, MYCOL, ILOC, JLOC, RSRC1,
804     $                          CSRC1 )
805                           IF( MYROW.EQ.RSRC1.AND.MYCOL.EQ.CSRC1 ) THEN
806                              JLOC1 = INDXG2L( LKTOP+LNWIN-KS, NB,
807     $                             MYCOL, DESCZ( CSRC_ ), NPCOL )
808                              LROWS = MIN(NB,N-INDX+1)
809                              CALL SLAMOV( 'All', LROWS, KS,
810     $                             Z((JLOC1-1)*LLDZ+ILOC ), LLDZ,
811     $                             WORK(IPW), LROWS )
812                              CALL STRMM( 'Right', 'Upper',
813     $                             'No transpose', 'Non-unit',
814     $                             LROWS, KS, ONE, WORK( IPU+LNWIN-KS ),
815     $                             LNWIN, WORK(IPW), LROWS )
816                              CALL SGEMM( 'No transpose',
817     $                             'No transpose', LROWS, KS, LNWIN-KS,
818     $                             ONE, Z((JLOC-1)*LLDZ+ILOC), LLDZ,
819     $                             WORK( IPU ), LNWIN, ONE, WORK(IPW),
820     $                             LROWS )
821*
822*                             Compute Z1*U12 + Z2*U22 in workspace.
823*
824                              CALL SLAMOV( 'All', LROWS, LNWIN-KS,
825     $                             Z((JLOC-1)*LLDZ+ILOC), LLDZ,
826     $                             WORK( IPW+KS*LROWS ), LROWS)
827                              CALL STRMM( 'Right', 'Lower',
828     $                             'No transpose', 'Non-unit',
829     $                             LROWS, LNWIN-KS, ONE,
830     $                             WORK( IPU+LNWIN*KS ), LNWIN,
831     $                             WORK( IPW+KS*LROWS ), LROWS )
832                              CALL SGEMM( 'No transpose',
833     $                             'No transpose', LROWS, LNWIN-KS, KS,
834     $                             ONE, Z((JLOC1-1)*LLDZ+ILOC), LLDZ,
835     $                             WORK( IPU+LNWIN*KS+LNWIN-KS ), LNWIN,
836     $                             ONE, WORK( IPW+KS*LROWS ),
837     $                             LROWS )
838*
839*                             Copy workspace to Z.
840*
841                              CALL SLAMOV( 'All', LROWS, LNWIN,
842     $                             WORK(IPW), LROWS,
843     $                             Z((JLOC-1)*LLDZ+ILOC), LLDZ )
844                           END IF
845 110                    CONTINUE
846                     END IF
847                  END IF
848*
849                  IF( DIR.EQ.1 .AND. LENRBUF.GT.0 .AND.
850     $                 MYROW.EQ.LRSRC ) THEN
851                     IF( WANTT ) THEN
852                        INDXS = ICEIL(LKBOT,NB)*NB + 1
853                        DO 120 INDX = INDXS, N, NB
854                           CALL INFOG2L( LKTOP, INDX,
855     $                          DESCH, NPROW, NPCOL, MYROW, MYCOL, ILOC,
856     $                          JLOC, RSRC1, CSRC1 )
857                           IF( MYROW.EQ.RSRC1.AND.MYCOL.EQ.CSRC1 ) THEN
858*
859*                             Compute U21**T*H2 + U11**T*H1 in workspace.
860*
861                              ILOC1 = INDXG2L( LKTOP+LNWIN-KS, NB,
862     $                             MYROW, DESCH( RSRC_ ), NPROW )
863                              LCOLS = MIN( NB, N-INDX+1 )
864                              CALL SLAMOV( 'All', KS, LCOLS,
865     $                             H((JLOC-1)*LLDH+ILOC1), LLDH,
866     $                             WORK(IPW), LNWIN )
867                              CALL STRMM( 'Left', 'Upper', 'Transpose',
868     $                             'Non-unit', KS, LCOLS, ONE,
869     $                             WORK( IPU+LNWIN-KS ), LNWIN,
870     $                             WORK(IPW), LNWIN )
871                              CALL SGEMM( 'Transpose', 'No transpose',
872     $                             KS, LCOLS, LNWIN-KS, ONE, WORK(IPU),
873     $                             LNWIN, H((JLOC-1)*LLDH+ILOC), LLDH,
874     $                             ONE, WORK(IPW), LNWIN )
875*
876*                             Compute U12**T*H1 + U22**T*H2 in workspace.
877*
878                              CALL SLAMOV( 'All', LNWIN-KS, LCOLS,
879     $                             H((JLOC-1)*LLDH+ILOC), LLDH,
880     $                             WORK( IPW+KS ), LNWIN )
881                              CALL STRMM( 'Left', 'Lower', 'Transpose',
882     $                             'Non-unit', LNWIN-KS, LCOLS, ONE,
883     $                             WORK( IPU+LNWIN*KS ), LNWIN,
884     $                             WORK( IPW+KS ), LNWIN )
885                              CALL SGEMM( 'Transpose', 'No Transpose',
886     $                             LNWIN-KS, LCOLS, KS, ONE,
887     $                             WORK( IPU+LNWIN*KS+LNWIN-KS ), LNWIN,
888     $                             H((JLOC-1)*LLDH+ILOC1), LLDH,
889     $                             ONE, WORK( IPW+KS ), LNWIN )
890*
891*                             Copy workspace to H.
892*
893                              CALL SLAMOV( 'All', LNWIN, LCOLS,
894     $                             WORK(IPW), LNWIN,
895     $                             H((JLOC-1)*LLDH+ILOC), LLDH )
896                           END IF
897 120                    CONTINUE
898                     END IF
899                  END IF
900               END IF
901*
902*              Update position information about current window.
903*
904               IF( DIR.EQ.2 ) THEN
905                  IF( LKBOT.EQ.KBOT ) THEN
906                     LKTOP = KBOT+1
907                     LKBOT = KBOT+1
908                     IWORK( 1+(WIN-1)*5 ) = LKTOP
909                     IWORK( 2+(WIN-1)*5 ) = LKBOT
910                     IWORK( 5+(WIN-1)*5 ) = 2
911                  ELSE
912                     LKTOP = MIN( LKTOP + LNWIN - LCHAIN,
913     $                    ICEIL( LKTOP, NB )*NB - LCHAIN + 1,
914     $                    KBOT )
915                     IWORK( 1+(WIN-1)*5 ) = LKTOP
916                     LKBOT = MIN( LKBOT + LNWIN - LCHAIN,
917     $                    ICEIL( LKBOT, NB )*NB, KBOT )
918                     IWORK( 2+(WIN-1)*5 ) = LKBOT
919                     LNWIN = LKBOT-LKTOP+1
920                     IF( LNWIN.EQ.LCHAIN ) IWORK(5+(WIN-1)*5) = 2
921                  END IF
922               END IF
923 75            CONTINUE
924 70         CONTINUE
925 65      CONTINUE
926*
927*        If bulges were chasen off from first window, the window is
928*        removed.
929*
930         IF( ICHOFF.GT.0 ) THEN
931            DO 128 WIN = 2, ANMWIN
932               IWORK( 1+(WIN-2)*5 ) = IWORK( 1+(WIN-1)*5 )
933               IWORK( 2+(WIN-2)*5 ) = IWORK( 2+(WIN-1)*5 )
934               IWORK( 3+(WIN-2)*5 ) = IWORK( 3+(WIN-1)*5 )
935               IWORK( 4+(WIN-2)*5 ) = IWORK( 4+(WIN-1)*5 )
936               IWORK( 5+(WIN-2)*5 ) = IWORK( 5+(WIN-1)*5 )
937 128        CONTINUE
938            ANMWIN = ANMWIN - 1
939            IPIW = 6+(ANMWIN-1)*5
940         END IF
941*
942*        If we have no more windows, return.
943*
944         IF( ANMWIN.LT.1 ) RETURN
945*
946      ELSE
947*
948*        Set up windows such that as many bulges as possible can be
949*        moved over the border to the next block. Make sure that the
950*        cross border window is at least (NTINY+1)-by-(NTINY+1), unless
951*        we are chasing off the bulges from the last window. This is
952*        accomplished by setting the bottom index LKBOT such that the
953*        local window has the correct size.
954*
955*        If LKBOT then becomes larger than KBOT, the endpoint of the whole
956*        global submatrix, or LKTOP from a window located already residing
957*        at the other side of the border, this is taken care of by some
958*        dirty tricks.
959*
960         DO 130 WIN = 1, ANMWIN
961            LKTOP1 = IWORK( 1+(WIN-1)*5 )
962            LKBOT = IWORK( 2+(WIN-1)*5 )
963            LNWIN = MAX( 6, MIN( LKBOT - LKTOP1 + 1, LCHAIN ) )
964            LKBOT1 = MAX( MIN( KBOT, ICEIL(LKTOP1,NB)*NB+LCHAIN),
965     $           MIN( KBOT, MIN( LKTOP1+2*LNWIN-1,
966     $           (ICEIL(LKTOP1,NB)+1)*NB ) ) )
967            IWORK( 2+(WIN-1)*5 ) = LKBOT1
968 130     CONTINUE
969         ICHOFF = 0
970*
971*        Keep a record over what windows that were moved over the borders
972*        such that we can delay some windows due to lack of space on the
973*        other side of the border; we do not want to leave any of the
974*        bulges behind...
975*
976*        IWORK( 5+(WIN-1)*5 ) = 0: window WIN has not been processed
977*        IWORK( 5+(WIN-1)*5 ) = 1: window WIN is being processed (need to
978*                                  know for updates)
979*        IWORK( 5+(WIN-1)*5 ) = 2: window WIN has been fully processed
980*
981*        So, start by marking all windows as not processed.
982*
983         DO 135 WIN = 1, ANMWIN
984            IWORK( 5+(WIN-1)*5 ) = 0
985 135     CONTINUE
986*
987*        Do the cross border bulge-chase as follows: Start from the
988*        first window (the one that is closest to be chased off the
989*        diagonal of H) and take the odd windows first followed by the
990*        even ones. To not get into hang-problems on processor meshes
991*        with at least one odd dimension, the windows will in such a case
992*        be processed in chunks of {the minimum odd process dimension}-1
993*        windows to avoid overlapping processor scopes in forming the
994*        cross border computational windows and the cross border update
995*        regions.
996*
997         WCHUNK = MAX( 1, MIN( ANMWIN, NPROW-1, NPCOL-1 ) )
998         NUMCHUNK = ICEIL( ANMWIN, WCHUNK )
999*
1000*        Based on the computed chunk of windows, start working with
1001*        crossborder bulge-chasing. Repeat this as long as there is
1002*        still work left to do (137 is a kind of do-while statement).
1003*
1004 137     CONTINUE
1005*
1006*        Zero out LENRBUF and LENCBUF each time we restart this loop.
1007*
1008         LENRBUF = 0
1009         LENCBUF = 0
1010*
1011         DO 140 ODDEVEN = 1, MIN( 2, ANMWIN )
1012         DO 150 CHUNKNUM = 1, NUMCHUNK
1013            IPNEXT = 1
1014            DO 160 WIN = ODDEVEN+(CHUNKNUM-1)*WCHUNK,
1015     $           MIN(ANMWIN,MAX(1,ODDEVEN+(CHUNKNUM)*WCHUNK-1)), 2
1016*
1017*              Get position and size of the WIN:th active window and
1018*              make sure that we skip the cross border bulge for this
1019*              window if the window is not shared between several data
1020*              layout blocks (and processors).
1021*
1022*              Also, delay windows that do not have sufficient size of
1023*              the other side of the border. Moreover, make sure to skip
1024*              windows that was already processed in the last round of
1025*              the do-while loop (137).
1026*
1027               IF( IWORK( 5+(WIN-1)*5 ).EQ.2 ) GO TO 165
1028               LKTOP = IWORK( 1+(WIN-1)*5 )
1029               LKBOT = IWORK( 2+(WIN-1)*5 )
1030               IF( WIN.GT.1 ) THEN
1031                  LKTOP2 = IWORK( 1+(WIN-2)*5 )
1032               ELSE
1033                  LKTOP2 = KBOT+1
1034               END IF
1035               IF( ICEIL(LKTOP,NB).EQ.ICEIL(LKBOT,NB) .OR.
1036     $              LKBOT.GE.LKTOP2 ) GO TO 165
1037               LNWIN = LKBOT - LKTOP + 1
1038               IF( LNWIN.LE.NTINY .AND. LKBOT.NE.KBOT .AND.
1039     $              .NOT. MOD(LKBOT,NB).EQ.0  ) GO TO 165
1040*
1041*              If window is going to be processed, mark it as processed.
1042*
1043               IWORK( 5+(WIN-1)*5 ) = 1
1044*
1045*              Extract processors for current cross border window,
1046*              as below:
1047*
1048*                        1 | 2
1049*                        --+--
1050*                        3 | 4
1051*
1052               RSRC1 = IWORK( 3+(WIN-1)*5 )
1053               CSRC1 = IWORK( 4+(WIN-1)*5 )
1054               RSRC2 = RSRC1
1055               CSRC2 = MOD( CSRC1+1, NPCOL )
1056               RSRC3 = MOD( RSRC1+1, NPROW )
1057               CSRC3 = CSRC1
1058               RSRC4 = MOD( RSRC1+1, NPROW )
1059               CSRC4 = MOD( CSRC1+1, NPCOL )
1060*
1061*              Form group of four processors for cross border window.
1062*
1063               IF( ( MYROW.EQ.RSRC1 .AND. MYCOL.EQ.CSRC1 ) .OR.
1064     $              ( MYROW.EQ.RSRC2 .AND. MYCOL.EQ.CSRC2 ) .OR.
1065     $              ( MYROW.EQ.RSRC3 .AND. MYCOL.EQ.CSRC3 ) .OR.
1066     $              ( MYROW.EQ.RSRC4 .AND. MYCOL.EQ.CSRC4 ) ) THEN
1067*
1068*                 Compute the upper and lower parts of the active
1069*                 window.
1070*
1071                  DIM1 = NB - MOD(LKTOP-1,NB)
1072                  DIM4 = LNWIN - DIM1
1073*
1074*                 Temporarily compute a new value of the size of the
1075*                 computational window that is larger than or equal to
1076*                 NTINY+1; call the *real* value DIM.
1077*
1078                  DIM = LNWIN
1079                  LNWIN = MAX(NTINY+1,LNWIN)
1080*
1081*                 Divide workspace.
1082*
1083                  IPU = IPNEXT
1084                  IPH = IPU + DIM**2
1085                  IPUU = IPH + LNWIN**2
1086                  IPV = IPUU + LNWIN**2
1087                  IPNEXT = IPH
1088                  IF( DIM.LT.LNWIN ) THEN
1089                     CALL SLASET( 'All', LNWIN, LNWIN, ZERO,
1090     $                    ONE, WORK( IPH ), LNWIN )
1091                  ELSE
1092                     CALL SLASET( 'All', DIM, DIM, ZERO,
1093     $                    ZERO, WORK( IPH ), LNWIN )
1094                  END IF
1095*
1096*                 Form the active window.
1097*
1098                  IF( MYROW.EQ.RSRC1 .AND. MYCOL.EQ.CSRC1 ) THEN
1099                     ILOC = INDXG2L( LKTOP, NB, MYROW,
1100     $                    DESCH( RSRC_ ), NPROW )
1101                     JLOC = INDXG2L( LKTOP, NB, MYCOL,
1102     $                    DESCH( CSRC_ ), NPCOL )
1103                     CALL SLAMOV( 'All', DIM1, DIM1,
1104     $                    H((JLOC-1)*LLDH+ILOC), LLDH, WORK(IPH),
1105     $                    LNWIN )
1106                     IF( RSRC1.NE.RSRC4 .OR. CSRC1.NE.CSRC4 ) THEN
1107*                       Proc#1 <==> Proc#4
1108                        CALL SGESD2D( ICTXT, DIM1, DIM1,
1109     $                       WORK(IPH), LNWIN, RSRC4, CSRC4 )
1110                        CALL SGERV2D( ICTXT, DIM4, DIM4,
1111     $                       WORK(IPH+DIM1*LNWIN+DIM1),
1112     $                       LNWIN, RSRC4, CSRC4 )
1113                     END IF
1114                  END IF
1115                  IF( MYROW.EQ.RSRC4 .AND. MYCOL.EQ.CSRC4 ) THEN
1116                     ILOC = INDXG2L( LKTOP+DIM1, NB, MYROW,
1117     $                    DESCH( RSRC_ ), NPROW )
1118                     JLOC = INDXG2L( LKTOP+DIM1, NB, MYCOL,
1119     $                    DESCH( CSRC_ ), NPCOL )
1120                     CALL SLAMOV( 'All', DIM4, DIM4,
1121     $                    H((JLOC-1)*LLDH+ILOC), LLDH,
1122     $                    WORK(IPH+DIM1*LNWIN+DIM1),
1123     $                    LNWIN )
1124                     IF( RSRC4.NE.RSRC1 .OR. CSRC4.NE.CSRC1 ) THEN
1125*                       Proc#4 <==> Proc#1
1126                        CALL SGESD2D( ICTXT, DIM4, DIM4,
1127     $                       WORK(IPH+DIM1*LNWIN+DIM1),
1128     $                       LNWIN, RSRC1, CSRC1 )
1129                        CALL SGERV2D( ICTXT, DIM1, DIM1,
1130     $                       WORK(IPH), LNWIN, RSRC1, CSRC1 )
1131                     END IF
1132                  END IF
1133                  IF( MYROW.EQ.RSRC2 .AND. MYCOL.EQ.CSRC2 ) THEN
1134                     ILOC = INDXG2L( LKTOP, NB, MYROW,
1135     $                    DESCH( RSRC_ ), NPROW )
1136                     JLOC = INDXG2L( LKTOP+DIM1, NB, MYCOL,
1137     $                    DESCH( CSRC_ ), NPCOL )
1138                     CALL SLAMOV( 'All', DIM1, DIM4,
1139     $                    H((JLOC-1)*LLDH+ILOC), LLDH,
1140     $                    WORK(IPH+DIM1*LNWIN), LNWIN )
1141                     IF( RSRC2.NE.RSRC1 .OR. CSRC2.NE.CSRC1 ) THEN
1142*                       Proc#2 ==> Proc#1
1143                        CALL SGESD2D( ICTXT, DIM1, DIM4,
1144     $                       WORK(IPH+DIM1*LNWIN),
1145     $                       LNWIN, RSRC1, CSRC1 )
1146                     END IF
1147                  END IF
1148                  IF( MYROW.EQ.RSRC2 .AND. MYCOL.EQ.CSRC2 ) THEN
1149                     IF( RSRC2.NE.RSRC4 .OR. CSRC2.NE.CSRC4 ) THEN
1150*                       Proc#2 ==> Proc#4
1151                        CALL SGESD2D( ICTXT, DIM1, DIM4,
1152     $                       WORK(IPH+DIM1*LNWIN),
1153     $                       LNWIN, RSRC4, CSRC4 )
1154                     END IF
1155                  END IF
1156                  IF( MYROW.EQ.RSRC3 .AND. MYCOL.EQ.CSRC3 ) THEN
1157                     ILOC = INDXG2L( LKTOP+DIM1, NB, MYROW,
1158     $                    DESCH( RSRC_ ), NPROW )
1159                     JLOC = INDXG2L( LKTOP+DIM1-1, NB, MYCOL,
1160     $                    DESCH( CSRC_ ), NPCOL )
1161                     CALL SLAMOV( 'All', 1, 1,
1162     $                    H((JLOC-1)*LLDH+ILOC), LLDH,
1163     $                    WORK(IPH+(DIM1-1)*LNWIN+DIM1),
1164     $                    LNWIN )
1165                     IF( RSRC3.NE.RSRC1 .OR. CSRC3.NE.CSRC1 ) THEN
1166*                       Proc#3 ==> Proc#1
1167                        CALL SGESD2D( ICTXT, 1, 1,
1168     $                       WORK(IPH+(DIM1-1)*LNWIN+DIM1),
1169     $                       LNWIN, RSRC1, CSRC1 )
1170                     END IF
1171                  END IF
1172                  IF( MYROW.EQ.RSRC3 .AND. MYCOL.EQ.CSRC3 ) THEN
1173                     IF( RSRC3.NE.RSRC4 .OR. CSRC3.NE.CSRC4 ) THEN
1174*                       Proc#3 ==> Proc#4
1175                        CALL SGESD2D( ICTXT, 1, 1,
1176     $                       WORK(IPH+(DIM1-1)*LNWIN+DIM1),
1177     $                       LNWIN, RSRC4, CSRC4 )
1178                     END IF
1179                  END IF
1180                  IF( MYROW.EQ.RSRC1 .AND. MYCOL.EQ.CSRC1 ) THEN
1181                     IF( RSRC1.NE.RSRC2 .OR. CSRC1.NE.CSRC2 ) THEN
1182*                       Proc#1 <== Proc#2
1183                        CALL SGERV2D( ICTXT, DIM1, DIM4,
1184     $                       WORK(IPH+DIM1*LNWIN),
1185     $                       LNWIN, RSRC2, CSRC2 )
1186                     END IF
1187                     IF( RSRC1.NE.RSRC3 .OR. CSRC1.NE.CSRC3 ) THEN
1188*                       Proc#1 <== Proc#3
1189                        CALL SGERV2D( ICTXT, 1, 1,
1190     $                       WORK(IPH+(DIM1-1)*LNWIN+DIM1),
1191     $                       LNWIN, RSRC3, CSRC3 )
1192                     END IF
1193                  END IF
1194                  IF( MYROW.EQ.RSRC4 .AND. MYCOL.EQ.CSRC4 ) THEN
1195                     IF( RSRC4.NE.RSRC2 .OR. CSRC4.NE.CSRC2 ) THEN
1196*                       Proc#4 <== Proc#2
1197                        CALL SGERV2D( ICTXT, DIM1, DIM4,
1198     $                       WORK(IPH+DIM1*LNWIN),
1199     $                       LNWIN, RSRC2, CSRC2 )
1200                     END IF
1201                     IF( RSRC4.NE.RSRC3 .OR. CSRC4.NE.CSRC3 ) THEN
1202*                       Proc#4 <== Proc#3
1203                        CALL SGERV2D( ICTXT, 1, 1,
1204     $                       WORK(IPH+(DIM1-1)*LNWIN+DIM1),
1205     $                       LNWIN, RSRC3, CSRC3 )
1206                     END IF
1207                  END IF
1208*
1209*                 Prepare for call to SLAQR6 - it could happen that no
1210*                 bulges where introduced in the pre-cross border step
1211*                 since the chain was too long to fit in the top-left
1212*                 part of the cross border window. In such a case, the
1213*                 bulges are introduced here instead.  It could also
1214*                 happen that the bottom-right part is too small to hold
1215*                 the whole chain -- in such a case, the bulges are
1216*                 chasen off immediately, as well.
1217*
1218                  IF( (MYROW.EQ.RSRC1 .AND. MYCOL.EQ.CSRC1) .OR.
1219     $                 (MYROW.EQ.RSRC4 .AND. MYCOL.EQ.CSRC4) ) THEN
1220                     IF( LKTOP.EQ.KTOP .AND. LKBOT.EQ.KBOT .AND.
1221     $                    (DIM1.LE.LCHAIN .OR. DIM1.LE.NTINY ) ) THEN
1222                        JOB = 'All steps'
1223                        ICHOFF = 1
1224                     ELSEIF( LKTOP.EQ.KTOP .AND.
1225     $                    ( DIM1.LE.LCHAIN .OR. DIM1.LE.NTINY ) ) THEN
1226                        JOB = 'Introduce and chase'
1227                     ELSEIF( LKBOT.EQ.KBOT ) THEN
1228                        JOB = 'Off-chase bulges'
1229                        ICHOFF = 1
1230                     ELSE
1231                        JOB = 'Chase bulges'
1232                     END IF
1233                     KU = LNWIN - KDU + 1
1234                     KWH = KDU + 1
1235                     NHO = ( LNWIN-KDU+1-4 ) - ( KDU+1 ) + 1
1236                     KWV = KDU + 4
1237                     NVE = LNWIN - KDU - KWV + 1
1238                     CALL SLASET( 'All', LNWIN, LNWIN,
1239     $                    ZERO, ONE, WORK(IPUU), LNWIN )
1240*
1241*                    Small-bulge multi-shift QR sweep.
1242*
1243                     LKS = MAX(1, NS - WIN*LNS + 1)
1244                     CALL SLAQR6( JOB, WANTT, .TRUE., LKACC22, LNWIN,
1245     $                    1, DIM, LNS, SR( LKS ), SI( LKS ),
1246     $                    WORK(IPH), LNWIN, 1, DIM,
1247     $                    WORK(IPUU), LNWIN, WORK(IPU), 3,
1248     $                    WORK( IPH+KU-1 ), LNWIN, NVE,
1249     $                    WORK( IPH+KWV-1 ), LNWIN, NHO,
1250     $                    WORK( IPH-1+KU+(KWH-1)*LNWIN ), LNWIN )
1251*
1252*                    Copy local submatrices of H back to global matrix.
1253*
1254                     IF( MYROW.EQ.RSRC1 .AND. MYCOL.EQ.CSRC1 ) THEN
1255                        ILOC = INDXG2L( LKTOP, NB, MYROW,
1256     $                       DESCH( RSRC_ ), NPROW )
1257                        JLOC = INDXG2L( LKTOP, NB, MYCOL,
1258     $                       DESCH( CSRC_ ), NPCOL )
1259                        CALL SLAMOV( 'All', DIM1, DIM1, WORK(IPH),
1260     $                       LNWIN, H((JLOC-1)*LLDH+ILOC),
1261     $                       LLDH )
1262                     END IF
1263                     IF( MYROW.EQ.RSRC4 .AND. MYCOL.EQ.CSRC4 ) THEN
1264                        ILOC = INDXG2L( LKTOP+DIM1, NB, MYROW,
1265     $                       DESCH( RSRC_ ), NPROW )
1266                        JLOC = INDXG2L( LKTOP+DIM1, NB, MYCOL,
1267     $                       DESCH( CSRC_ ), NPCOL )
1268                        CALL SLAMOV( 'All', DIM4, DIM4,
1269     $                       WORK(IPH+DIM1*LNWIN+DIM1),
1270     $                       LNWIN, H((JLOC-1)*LLDH+ILOC), LLDH )
1271                     END IF
1272*
1273*                    Copy actual submatrix of U to the correct place of
1274*                    the buffer.
1275*
1276                     CALL SLAMOV( 'All', DIM, DIM,
1277     $                    WORK(IPUU), LNWIN, WORK(IPU), DIM )
1278                  END IF
1279*
1280*                 Return data to process 2 and 3.
1281*
1282                  RWS3 = MIN(3,DIM4)
1283                  CLS3 = MIN(3,DIM1)
1284                  IF( MYROW.EQ.RSRC1 .AND. MYCOL.EQ.CSRC1 ) THEN
1285                     IF( RSRC1.NE.RSRC3 .OR. CSRC1.NE.CSRC3 ) THEN
1286*                       Proc#1 ==> Proc#3
1287                        CALL SGESD2D( ICTXT, RWS3, CLS3,
1288     $                       WORK( IPH+(DIM1-CLS3)*LNWIN+DIM1 ),
1289     $                       LNWIN, RSRC3, CSRC3 )
1290                     END IF
1291                  END IF
1292                  IF( MYROW.EQ.RSRC4 .AND. MYCOL.EQ.CSRC4 ) THEN
1293                     IF( RSRC4.NE.RSRC2 .OR. CSRC4.NE.CSRC2 ) THEN
1294*                       Proc#4 ==> Proc#2
1295                        CALL SGESD2D( ICTXT, DIM1, DIM4,
1296     $                       WORK( IPH+DIM1*LNWIN),
1297     $                       LNWIN, RSRC2, CSRC2 )
1298                     END IF
1299                  END IF
1300                  IF( MYROW.EQ.RSRC2 .AND. MYCOL.EQ.CSRC2 ) THEN
1301                     ILOC = INDXG2L( LKTOP, NB, MYROW,
1302     $                    DESCH( RSRC_ ), NPROW )
1303                     JLOC = INDXG2L( LKTOP+DIM1, NB, MYCOL,
1304     $                    DESCH( CSRC_ ), NPCOL )
1305                     IF( RSRC2.NE.RSRC4 .OR. CSRC2.NE.CSRC4 ) THEN
1306*                       Proc#2 <== Proc#4
1307                        CALL SGERV2D( ICTXT, DIM1, DIM4,
1308     $                       WORK(IPH+DIM1*LNWIN),
1309     $                       LNWIN, RSRC4, CSRC4 )
1310                     END IF
1311                     CALL SLAMOV( 'All', DIM1, DIM4,
1312     $                    WORK( IPH+DIM1*LNWIN ), LNWIN,
1313     $                    H((JLOC-1)*LLDH+ILOC), LLDH )
1314                  END IF
1315                  IF( MYROW.EQ.RSRC3 .AND. MYCOL.EQ.CSRC3 ) THEN
1316                     ILOC = INDXG2L( LKTOP+DIM1, NB, MYROW,
1317     $                    DESCH( RSRC_ ), NPROW )
1318                     JLOC = INDXG2L( LKTOP+DIM1-CLS3, NB, MYCOL,
1319     $                    DESCH( CSRC_ ), NPCOL )
1320                     IF( RSRC3.NE.RSRC1 .OR. CSRC3.NE.CSRC1 ) THEN
1321*                       Proc#3 <== Proc#1
1322                        CALL SGERV2D( ICTXT, RWS3, CLS3,
1323     $                       WORK( IPH+(DIM1-CLS3)*LNWIN+DIM1 ),
1324     $                       LNWIN, RSRC1, CSRC1 )
1325                     END IF
1326                     CALL SLAMOV( 'Upper', RWS3, CLS3,
1327     $                    WORK( IPH+(DIM1-CLS3)*LNWIN+DIM1 ),
1328     $                    LNWIN, H((JLOC-1)*LLDH+ILOC),
1329     $                    LLDH )
1330                     IF( RWS3.GT.1 .AND. CLS3.GT.1 ) THEN
1331                        ELEM = WORK( IPH+(DIM1-CLS3)*LNWIN+DIM1+1 )
1332                        IF( ELEM.NE.ZERO ) THEN
1333                           CALL SLAMOV( 'Lower', RWS3-1, CLS3-1,
1334     $                          WORK( IPH+(DIM1-CLS3)*LNWIN+DIM1+1 ),
1335     $                          LNWIN, H((JLOC-1)*LLDH+ILOC+1), LLDH )
1336                        END IF
1337                     END IF
1338                  END IF
1339*
1340*                 Restore correct value of LNWIN.
1341*
1342                  LNWIN = DIM
1343*
1344               END IF
1345*
1346*              Increment counter for buffers of orthogonal
1347*              transformations.
1348*
1349               IF( MYROW.EQ.RSRC1 .OR. MYCOL.EQ.CSRC1 .OR.
1350     $              MYROW.EQ.RSRC4 .OR. MYCOL.EQ.CSRC4 ) THEN
1351                  IF( MYROW.EQ.RSRC1 .OR. MYROW.EQ.RSRC4 )
1352     $               LENRBUF = LENRBUF + LNWIN*LNWIN
1353                  IF( MYCOL.EQ.CSRC1 .OR. MYCOL.EQ.CSRC4 )
1354     $               LENCBUF = LENCBUF + LNWIN*LNWIN
1355               END IF
1356*
1357*              If no cross border bulge chasing was performed for the
1358*              current WIN:th window, the processor jump to this point
1359*              and consider the next one.
1360*
1361 165           CONTINUE
1362*
1363 160        CONTINUE
1364*
1365*           Broadcast orthogonal transformations -- this will only happen
1366*           if the buffer associated with the orthogonal transformations
1367*           is not empty (controlled by LENRBUF, for row-wise
1368*           broadcasts, and LENCBUF, for column-wise broadcasts).
1369*
1370            DO 170 DIR = 1, 2
1371               BCDONE = .FALSE.
1372               DO 180 WIN = ODDEVEN+(CHUNKNUM-1)*WCHUNK,
1373     $              MIN(ANMWIN,MAX(1,ODDEVEN+(CHUNKNUM)*WCHUNK-1)), 2
1374                  IF( ( LENRBUF.EQ.0 .AND. LENCBUF.EQ.0 ) .OR.
1375     $                 BCDONE ) GO TO 185
1376                  RSRC1 = IWORK( 3+(WIN-1)*5 )
1377                  CSRC1 = IWORK( 4+(WIN-1)*5 )
1378                  RSRC4 = MOD( RSRC1+1, NPROW )
1379                  CSRC4 = MOD( CSRC1+1, NPCOL )
1380                  IF( ( MYROW.EQ.RSRC1 .AND. MYCOL.EQ.CSRC1 ) .OR.
1381     $                 ( MYROW.EQ.RSRC4 .AND. MYCOL.EQ.CSRC4 ) ) THEN
1382                     IF( DIR.EQ.1 .AND. LENRBUF.GT.0 .AND.
1383     $                    NPCOL.GT.1 .AND. NPROCS.GT.2 ) THEN
1384                        IF( MYROW.EQ.RSRC1 .OR. ( MYROW.EQ.RSRC4
1385     $                       .AND. RSRC4.NE.RSRC1 ) ) THEN
1386                           CALL SGEBS2D( ICTXT, 'Row', '1-Tree',
1387     $                          LENRBUF, 1, WORK, LENRBUF )
1388                        ELSE
1389                           CALL SGEBR2D( ICTXT, 'Row', '1-Tree',
1390     $                          LENRBUF, 1, WORK, LENRBUF, RSRC1,
1391     $                          CSRC1 )
1392                        END IF
1393                     ELSEIF( DIR.EQ.2 .AND. LENCBUF.GT.0 .AND.
1394     $                       NPROW.GT.1 .AND. NPROCS.GT.2 ) THEN
1395                        IF( MYCOL.EQ.CSRC1 .OR. ( MYCOL.EQ.CSRC4
1396     $                       .AND. CSRC4.NE.CSRC1 ) ) THEN
1397                           CALL SGEBS2D( ICTXT, 'Col', '1-Tree',
1398     $                          LENCBUF, 1, WORK, LENCBUF )
1399                        ELSE
1400                           CALL SGEBR2D( ICTXT, 'Col', '1-Tree',
1401     $                          LENCBUF, 1, WORK(1+LENRBUF), LENCBUF,
1402     $                          RSRC1, CSRC1 )
1403                        END IF
1404                     END IF
1405                     IF( LENRBUF.GT.0 .AND. ( MYCOL.EQ.CSRC1 .OR.
1406     $                    ( MYCOL.EQ.CSRC4 .AND. CSRC4.NE.CSRC1 ) ) )
1407     $                  CALL SLAMOV( 'All', LENRBUF, 1, WORK, LENRBUF,
1408     $                       WORK(1+LENRBUF), LENCBUF )
1409                     BCDONE = .TRUE.
1410                  ELSEIF( MYROW.EQ.RSRC1 .AND. DIR.EQ.1 ) THEN
1411                     IF( LENRBUF.GT.0 .AND. NPCOL.GT.1 )
1412     $                  CALL SGEBR2D( ICTXT, 'Row', '1-Tree', LENRBUF,
1413     $                       1, WORK, LENRBUF, RSRC1, CSRC1 )
1414                     BCDONE = .TRUE.
1415                  ELSEIF( MYCOL.EQ.CSRC1 .AND. DIR.EQ.2 ) THEN
1416                     IF( LENCBUF.GT.0 .AND. NPROW.GT.1 )
1417     $                  CALL SGEBR2D( ICTXT, 'Col', '1-Tree', LENCBUF,
1418     $                       1, WORK(1+LENRBUF), LENCBUF, RSRC1, CSRC1 )
1419                     BCDONE = .TRUE.
1420                  ELSEIF( MYROW.EQ.RSRC4 .AND. DIR.EQ.1 ) THEN
1421                     IF( LENRBUF.GT.0 .AND. NPCOL.GT.1 )
1422     $                  CALL SGEBR2D( ICTXT, 'Row', '1-Tree', LENRBUF,
1423     $                       1, WORK, LENRBUF, RSRC4, CSRC4 )
1424                     BCDONE = .TRUE.
1425                  ELSEIF( MYCOL.EQ.CSRC4 .AND. DIR.EQ.2 ) THEN
1426                     IF( LENCBUF.GT.0 .AND. NPROW.GT.1 )
1427     $                  CALL SGEBR2D( ICTXT, 'Col', '1-Tree', LENCBUF,
1428     $                       1, WORK(1+LENRBUF), LENCBUF, RSRC4, CSRC4 )
1429                     BCDONE = .TRUE.
1430                  END IF
1431 185              CONTINUE
1432 180           CONTINUE
1433 170        CONTINUE
1434*
1435*           Prepare for computing cross border updates by exchanging
1436*           data in cross border update regions in H and Z.
1437*
1438            DO 190 DIR = 1, 2
1439               WINID = 0
1440               IPW3 = 1
1441               DO 200 WIN = ODDEVEN+(CHUNKNUM-1)*WCHUNK,
1442     $              MIN(ANMWIN,MAX(1,ODDEVEN+(CHUNKNUM)*WCHUNK-1)), 2
1443                  IF( IWORK( 5+(WIN-1)*5 ).NE.1 ) GO TO 205
1444*
1445*                 Make sure this part of the code is only executed when
1446*                 there has been some work performed on the WIN:th
1447*                 window.
1448*
1449                  LKTOP = IWORK( 1+(WIN-1)*5 )
1450                  LKBOT = IWORK( 2+(WIN-1)*5 )
1451*
1452*                 Extract processor indices associated with
1453*                 the current window.
1454*
1455                  RSRC1 = IWORK( 3+(WIN-1)*5 )
1456                  CSRC1 = IWORK( 4+(WIN-1)*5 )
1457                  RSRC4 = MOD( RSRC1+1, NPROW )
1458                  CSRC4 = MOD( CSRC1+1, NPCOL )
1459*
1460*                 Compute local number of rows and columns
1461*                 of H and Z to exchange.
1462*
1463                  IF(((MYCOL.EQ.CSRC1.OR.MYCOL.EQ.CSRC4).AND.DIR.EQ.2)
1464     $                 .OR.((MYROW.EQ.RSRC1.OR.MYROW.EQ.RSRC4).AND.
1465     $                 DIR.EQ.1)) THEN
1466                     WINID = WINID + 1
1467                     LNWIN = LKBOT - LKTOP + 1
1468                     IPU = IPNEXT
1469                     DIM1 = NB - MOD(LKTOP-1,NB)
1470                     DIM4 = LNWIN - DIM1
1471                     IPNEXT = IPU + LNWIN*LNWIN
1472                     IF( DIR.EQ.2 ) THEN
1473                        IF( WANTZ ) THEN
1474                           ZROWS = NUMROC( N, NB, MYROW, DESCZ( RSRC_ ),
1475     $                          NPROW )
1476                        ELSE
1477                           ZROWS = 0
1478                        END IF
1479                        IF( WANTT ) THEN
1480                           HROWS = NUMROC( LKTOP-1, NB, MYROW,
1481     $                          DESCH( RSRC_ ), NPROW )
1482                        ELSE
1483                           HROWS = 0
1484                        END IF
1485                     ELSE
1486                        ZROWS = 0
1487                        HROWS = 0
1488                     END IF
1489                     IF( DIR.EQ.1 ) THEN
1490                        IF( WANTT ) THEN
1491                           HCOLS = NUMROC( N - (LKTOP+DIM1-1), NB,
1492     $                          MYCOL, CSRC4, NPCOL )
1493                           IF( MYCOL.EQ.CSRC4 ) HCOLS = HCOLS - DIM4
1494                        ELSE
1495                           HCOLS = 0
1496                        END IF
1497                     ELSE
1498                        HCOLS = 0
1499                     END IF
1500                     IPW = MAX( 1 + LENRBUF + LENCBUF, IPW3 )
1501                     IPW1 = IPW + HROWS * LNWIN
1502                     IF( WANTZ ) THEN
1503                        IPW2 = IPW1 + LNWIN * HCOLS
1504                        IPW3 = IPW2 + ZROWS * LNWIN
1505                     ELSE
1506                        IPW3 = IPW1 + LNWIN * HCOLS
1507                     END IF
1508                  END IF
1509*
1510*                 Let each process row and column involved in the updates
1511*                 exchange data in H and Z with their neighbours.
1512*
1513                  IF( DIR.EQ.2 .AND. WANTT .AND. LENCBUF.GT.0 ) THEN
1514                     IF( MYCOL.EQ.CSRC1 .OR. MYCOL.EQ.CSRC4 ) THEN
1515                        DO 210 INDX = 1, NPROW
1516                           IF( MYCOL.EQ.CSRC1 ) THEN
1517                              CALL INFOG2L( 1+(INDX-1)*NB, LKTOP, DESCH,
1518     $                             NPROW, NPCOL, MYROW, MYCOL, ILOC,
1519     $                             JLOC1, RSRC, CSRC1 )
1520                              IF( MYROW.EQ.RSRC ) THEN
1521                                 CALL SLAMOV( 'All', HROWS, DIM1,
1522     $                                H((JLOC1-1)*LLDH+ILOC), LLDH,
1523     $                                WORK(IPW), HROWS )
1524                                 IF( NPCOL.GT.1 ) THEN
1525                                    EAST = MOD( MYCOL + 1, NPCOL )
1526                                    CALL SGESD2D( ICTXT, HROWS, DIM1,
1527     $                                   WORK(IPW), HROWS, RSRC, EAST )
1528                                    CALL SGERV2D( ICTXT, HROWS, DIM4,
1529     $                                   WORK(IPW+HROWS*DIM1), HROWS,
1530     $                                   RSRC, EAST )
1531                                 END IF
1532                              END IF
1533                           END IF
1534                           IF( MYCOL.EQ.CSRC4 ) THEN
1535                              CALL INFOG2L( 1+(INDX-1)*NB, LKTOP+DIM1,
1536     $                             DESCH, NPROW, NPCOL, MYROW, MYCOL,
1537     $                             ILOC, JLOC4, RSRC, CSRC4 )
1538                              IF( MYROW.EQ.RSRC ) THEN
1539                                 CALL SLAMOV( 'All', HROWS, DIM4,
1540     $                                H((JLOC4-1)*LLDH+ILOC), LLDH,
1541     $                                WORK(IPW+HROWS*DIM1), HROWS )
1542                                 IF( NPCOL.GT.1 ) THEN
1543                                    WEST = MOD( MYCOL - 1 + NPCOL,
1544     $                                   NPCOL )
1545                                    CALL SGESD2D( ICTXT, HROWS, DIM4,
1546     $                                   WORK(IPW+HROWS*DIM1), HROWS,
1547     $                                   RSRC, WEST )
1548                                    CALL SGERV2D( ICTXT, HROWS, DIM1,
1549     $                                   WORK(IPW), HROWS, RSRC, WEST )
1550                                 END IF
1551                              END IF
1552                           END IF
1553 210                    CONTINUE
1554                     END IF
1555                  END IF
1556*
1557                  IF( DIR.EQ.1 .AND. WANTT .AND. LENRBUF.GT.0 ) THEN
1558                     IF( MYROW.EQ.RSRC1 .OR. MYROW.EQ.RSRC4 ) THEN
1559                        DO 220 INDX = 1, NPCOL
1560                           IF( MYROW.EQ.RSRC1 ) THEN
1561                              IF( INDX.EQ.1 ) THEN
1562                                 IF( LKBOT.LT.N ) THEN
1563                                    CALL INFOG2L( LKTOP, LKBOT+1, DESCH,
1564     $                                   NPROW, NPCOL, MYROW, MYCOL,
1565     $                                   ILOC1, JLOC, RSRC1, CSRC )
1566                                 ELSE
1567                                    CSRC = -1
1568                                 END IF
1569                              ELSEIF( MOD(LKBOT,NB).NE.0 ) THEN
1570                                 CALL INFOG2L( LKTOP,
1571     $                                (ICEIL(LKBOT,NB)+(INDX-2))*NB+1,
1572     $                                DESCH, NPROW, NPCOL, MYROW, MYCOL,
1573     $                                ILOC1, JLOC, RSRC1, CSRC )
1574                              ELSE
1575                                 CALL INFOG2L( LKTOP,
1576     $                                (ICEIL(LKBOT,NB)+(INDX-1))*NB+1,
1577     $                                DESCH, NPROW, NPCOL, MYROW, MYCOL,
1578     $                                ILOC1, JLOC, RSRC1, CSRC )
1579                              END IF
1580                              IF( MYCOL.EQ.CSRC ) THEN
1581                                 CALL SLAMOV( 'All', DIM1, HCOLS,
1582     $                                H((JLOC-1)*LLDH+ILOC1), LLDH,
1583     $                                WORK(IPW1), LNWIN )
1584                                 IF( NPROW.GT.1 ) THEN
1585                                    SOUTH = MOD( MYROW + 1, NPROW )
1586                                    CALL SGESD2D( ICTXT, DIM1, HCOLS,
1587     $                                   WORK(IPW1), LNWIN, SOUTH,
1588     $                                   CSRC )
1589                                    CALL SGERV2D( ICTXT, DIM4, HCOLS,
1590     $                                   WORK(IPW1+DIM1), LNWIN, SOUTH,
1591     $                                   CSRC )
1592                                 END IF
1593                              END IF
1594                           END IF
1595                           IF( MYROW.EQ.RSRC4 ) THEN
1596                              IF( INDX.EQ.1 ) THEN
1597                                 IF( LKBOT.LT.N ) THEN
1598                                    CALL INFOG2L( LKTOP+DIM1, LKBOT+1,
1599     $                                   DESCH, NPROW, NPCOL, MYROW,
1600     $                                   MYCOL, ILOC4, JLOC, RSRC4,
1601     $                                   CSRC )
1602                                 ELSE
1603                                    CSRC = -1
1604                                 END IF
1605                              ELSEIF( MOD(LKBOT,NB).NE.0 ) THEN
1606                                 CALL INFOG2L( LKTOP+DIM1,
1607     $                                (ICEIL(LKBOT,NB)+(INDX-2))*NB+1,
1608     $                                DESCH, NPROW, NPCOL, MYROW, MYCOL,
1609     $                                ILOC4, JLOC, RSRC4, CSRC )
1610                              ELSE
1611                                 CALL INFOG2L( LKTOP+DIM1,
1612     $                                (ICEIL(LKBOT,NB)+(INDX-1))*NB+1,
1613     $                                DESCH, NPROW, NPCOL, MYROW, MYCOL,
1614     $                                ILOC4, JLOC, RSRC4, CSRC )
1615                              END IF
1616                              IF( MYCOL.EQ.CSRC ) THEN
1617                                 CALL SLAMOV( 'All', DIM4, HCOLS,
1618     $                                H((JLOC-1)*LLDH+ILOC4), LLDH,
1619     $                                WORK(IPW1+DIM1), LNWIN )
1620                                 IF( NPROW.GT.1 ) THEN
1621                                    NORTH = MOD( MYROW - 1 + NPROW,
1622     $                                   NPROW )
1623                                    CALL SGESD2D( ICTXT, DIM4, HCOLS,
1624     $                                   WORK(IPW1+DIM1), LNWIN, NORTH,
1625     $                                   CSRC )
1626                                    CALL SGERV2D( ICTXT, DIM1, HCOLS,
1627     $                                   WORK(IPW1), LNWIN, NORTH,
1628     $                                   CSRC )
1629                                 END IF
1630                              END IF
1631                           END IF
1632 220                    CONTINUE
1633                     END IF
1634                  END IF
1635*
1636                  IF( DIR.EQ.2 .AND. WANTZ .AND. LENCBUF.GT.0) THEN
1637                     IF( MYCOL.EQ.CSRC1 .OR. MYCOL.EQ.CSRC4 ) THEN
1638                        DO 230 INDX = 1, NPROW
1639                           IF( MYCOL.EQ.CSRC1 ) THEN
1640                              CALL INFOG2L( 1+(INDX-1)*NB, LKTOP,
1641     $                             DESCZ, NPROW, NPCOL, MYROW, MYCOL,
1642     $                             ILOC, JLOC1, RSRC, CSRC1 )
1643                              IF( MYROW.EQ.RSRC ) THEN
1644                                 CALL SLAMOV( 'All', ZROWS, DIM1,
1645     $                                Z((JLOC1-1)*LLDZ+ILOC), LLDZ,
1646     $                                WORK(IPW2), ZROWS )
1647                                 IF( NPCOL.GT.1 ) THEN
1648                                    EAST = MOD( MYCOL + 1, NPCOL )
1649                                    CALL SGESD2D( ICTXT, ZROWS, DIM1,
1650     $                                   WORK(IPW2), ZROWS, RSRC,
1651     $                                   EAST )
1652                                    CALL SGERV2D( ICTXT, ZROWS, DIM4,
1653     $                                   WORK(IPW2+ZROWS*DIM1),
1654     $                                   ZROWS, RSRC, EAST )
1655                                 END IF
1656                              END IF
1657                           END IF
1658                           IF( MYCOL.EQ.CSRC4 ) THEN
1659                              CALL INFOG2L( 1+(INDX-1)*NB,
1660     $                             LKTOP+DIM1, DESCZ, NPROW, NPCOL,
1661     $                             MYROW, MYCOL, ILOC, JLOC4, RSRC,
1662     $                             CSRC4 )
1663                              IF( MYROW.EQ.RSRC ) THEN
1664                                 CALL SLAMOV( 'All', ZROWS, DIM4,
1665     $                                Z((JLOC4-1)*LLDZ+ILOC), LLDZ,
1666     $                                WORK(IPW2+ZROWS*DIM1), ZROWS )
1667                                 IF( NPCOL.GT.1 ) THEN
1668                                    WEST = MOD( MYCOL - 1 + NPCOL,
1669     $                                   NPCOL )
1670                                    CALL SGESD2D( ICTXT, ZROWS, DIM4,
1671     $                                   WORK(IPW2+ZROWS*DIM1),
1672     $                                   ZROWS, RSRC, WEST )
1673                                    CALL SGERV2D( ICTXT, ZROWS, DIM1,
1674     $                                   WORK(IPW2), ZROWS, RSRC,
1675     $                                   WEST )
1676                                 END IF
1677                              END IF
1678                           END IF
1679 230                    CONTINUE
1680                     END IF
1681                  END IF
1682*
1683*                 If no exchanges was performed for the current window,
1684*                 all processors jump to this point and try the next
1685*                 one.
1686*
1687 205              CONTINUE
1688*
1689 200           CONTINUE
1690*
1691*              Compute crossborder bulge-chase updates.
1692*
1693               WINID = 0
1694               IF( DIR.EQ.1 ) THEN
1695                  IPNEXT = 1
1696               ELSE
1697                  IPNEXT = 1 + LENRBUF
1698               END IF
1699               IPW3 = 1
1700               DO 240 WIN = ODDEVEN+(CHUNKNUM-1)*WCHUNK,
1701     $              MIN(ANMWIN,MAX(1,ODDEVEN+(CHUNKNUM)*WCHUNK-1)), 2
1702                  IF( IWORK( 5+(WIN-1)*5 ).NE.1 ) GO TO 245
1703*
1704*                 Only perform this part of the code if there was really
1705*                 some work performed on the WIN:th window.
1706*
1707                  LKTOP = IWORK( 1+(WIN-1)*5 )
1708                  LKBOT = IWORK( 2+(WIN-1)*5 )
1709                  LNWIN = LKBOT - LKTOP + 1
1710*
1711*                 Extract the processor indices associated with
1712*                 the current window.
1713*
1714                  RSRC1 = IWORK( 3+(WIN-1)*5 )
1715                  CSRC1 = IWORK( 4+(WIN-1)*5 )
1716                  RSRC4 = MOD( RSRC1+1, NPROW )
1717                  CSRC4 = MOD( CSRC1+1, NPCOL )
1718*
1719                  IF(((MYCOL.EQ.CSRC1.OR.MYCOL.EQ.CSRC4).AND.DIR.EQ.2)
1720     $                 .OR.((MYROW.EQ.RSRC1.OR.MYROW.EQ.RSRC4).AND.
1721     $                 DIR.EQ.1)) THEN
1722*
1723*                    Set up workspaces.
1724*
1725                     WINID = WINID + 1
1726                     LKTOP = IWORK( 1+(WIN-1)*5 )
1727                     LKBOT = IWORK( 2+(WIN-1)*5 )
1728                     LNWIN = LKBOT - LKTOP + 1
1729                     DIM1 = NB - MOD(LKTOP-1,NB)
1730                     DIM4 = LNWIN - DIM1
1731                     IPU = IPNEXT + (WINID-1)*LNWIN*LNWIN
1732                     IF( DIR.EQ.2 ) THEN
1733                        IF( WANTZ ) THEN
1734                           ZROWS = NUMROC( N, NB, MYROW, DESCZ( RSRC_ ),
1735     $                          NPROW )
1736                        ELSE
1737                           ZROWS = 0
1738                        END IF
1739                        IF( WANTT ) THEN
1740                           HROWS = NUMROC( LKTOP-1, NB, MYROW,
1741     $                          DESCH( RSRC_ ), NPROW )
1742                        ELSE
1743                           HROWS = 0
1744                        END IF
1745                     ELSE
1746                        ZROWS = 0
1747                        HROWS = 0
1748                     END IF
1749                     IF( DIR.EQ.1 ) THEN
1750                        IF( WANTT ) THEN
1751                           HCOLS = NUMROC( N - (LKTOP+DIM1-1), NB,
1752     $                          MYCOL, CSRC4, NPCOL )
1753                           IF( MYCOL.EQ.CSRC4 ) HCOLS = HCOLS - DIM4
1754                        ELSE
1755                           HCOLS = 0
1756                        END IF
1757                     ELSE
1758                        HCOLS = 0
1759                     END IF
1760*
1761*                    IPW  = local copy of overlapping column block of H
1762*                    IPW1 = local copy of overlapping row block of H
1763*                    IPW2 = local copy of overlapping column block of Z
1764*                    IPW3 = workspace for right hand side of matrix
1765*                           multiplication
1766*
1767                     IPW = MAX( 1 + LENRBUF + LENCBUF, IPW3 )
1768                     IPW1 = IPW + HROWS * LNWIN
1769                     IF( WANTZ ) THEN
1770                        IPW2 = IPW1 + LNWIN * HCOLS
1771                        IPW3 = IPW2 + ZROWS * LNWIN
1772                     ELSE
1773                        IPW3 = IPW1 + LNWIN * HCOLS
1774                     END IF
1775*
1776*                    Recompute job to see if special structure of U
1777*                    could possibly be exploited.
1778*
1779                     IF( LKTOP.EQ.KTOP .AND. LKBOT.EQ.KBOT ) THEN
1780                        JOB = 'All steps'
1781                     ELSEIF( LKTOP.EQ.KTOP .AND.
1782     $                    ( DIM1.LT.LCHAIN+1 .OR. DIM1.LE.NTINY ) )
1783     $                    THEN
1784                        JOB = 'Introduce and chase'
1785                     ELSEIF( LKBOT.EQ.KBOT ) THEN
1786                        JOB = 'Off-chase bulges'
1787                     ELSE
1788                        JOB = 'Chase bulges'
1789                     END IF
1790                  END IF
1791*
1792*                 Test if to exploit sparsity structure of
1793*                 orthogonal matrix U.
1794*
1795                  KS = DIM1+DIM4-LNS/2*3
1796                  IF( .NOT. BLK22 .OR. DIM1.NE.KS .OR.
1797     $                 DIM4.NE.KS .OR. LSAME(JOB,'I') .OR.
1798     $                 LSAME(JOB,'O') .OR. LNS.LE.2 ) THEN
1799*
1800*                    Update the columns of H and Z.
1801*
1802                     IF( DIR.EQ.2 .AND. WANTT .AND. LENCBUF.GT.0 ) THEN
1803                        DO 250 INDX = 1, MIN(LKTOP-1,1+(NPROW-1)*NB), NB
1804                           IF( MYCOL.EQ.CSRC1 ) THEN
1805                              CALL INFOG2L( INDX, LKTOP, DESCH, NPROW,
1806     $                             NPCOL, MYROW, MYCOL, ILOC, JLOC,
1807     $                             RSRC, CSRC1 )
1808                              IF( MYROW.EQ.RSRC ) THEN
1809                                 CALL SGEMM( 'No transpose',
1810     $                                'No transpose', HROWS, DIM1,
1811     $                                LNWIN, ONE, WORK( IPW ), HROWS,
1812     $                                WORK( IPU ), LNWIN, ZERO,
1813     $                                WORK(IPW3), HROWS )
1814                                 CALL SLAMOV( 'All', HROWS, DIM1,
1815     $                                WORK(IPW3), HROWS,
1816     $                                H((JLOC-1)*LLDH+ILOC), LLDH )
1817                              END IF
1818                           END IF
1819                           IF( MYCOL.EQ.CSRC4 ) THEN
1820                              CALL INFOG2L( INDX, LKTOP+DIM1, DESCH,
1821     $                             NPROW, NPCOL, MYROW, MYCOL, ILOC,
1822     $                             JLOC, RSRC, CSRC4 )
1823                              IF( MYROW.EQ.RSRC ) THEN
1824                                 CALL SGEMM( 'No transpose',
1825     $                                'No transpose', HROWS, DIM4,
1826     $                                LNWIN, ONE, WORK( IPW ), HROWS,
1827     $                                WORK( IPU+LNWIN*DIM1 ), LNWIN,
1828     $                                ZERO, WORK(IPW3), HROWS )
1829                                 CALL SLAMOV( 'All', HROWS, DIM4,
1830     $                                WORK(IPW3), HROWS,
1831     $                                H((JLOC-1)*LLDH+ILOC), LLDH )
1832                              END IF
1833                           END IF
1834 250                    CONTINUE
1835                     END IF
1836*
1837                     IF( DIR.EQ.2 .AND. WANTZ .AND. LENCBUF.GT.0 ) THEN
1838                        DO 260 INDX = 1, MIN(N,1+(NPROW-1)*NB), NB
1839                           IF( MYCOL.EQ.CSRC1 ) THEN
1840                              CALL INFOG2L( INDX, LKTOP, DESCZ, NPROW,
1841     $                             NPCOL, MYROW, MYCOL, ILOC, JLOC,
1842     $                             RSRC, CSRC1 )
1843                              IF( MYROW.EQ.RSRC ) THEN
1844                                 CALL SGEMM( 'No transpose',
1845     $                                'No transpose', ZROWS, DIM1,
1846     $                                LNWIN, ONE, WORK( IPW2 ),
1847     $                                ZROWS, WORK( IPU ), LNWIN,
1848     $                                ZERO, WORK(IPW3), ZROWS )
1849                                 CALL SLAMOV( 'All', ZROWS, DIM1,
1850     $                                WORK(IPW3), ZROWS,
1851     $                                Z((JLOC-1)*LLDZ+ILOC), LLDZ )
1852                              END IF
1853                           END IF
1854                           IF( MYCOL.EQ.CSRC4 ) THEN
1855                              CALL INFOG2L( INDX, LKTOP+DIM1, DESCZ,
1856     $                             NPROW, NPCOL, MYROW, MYCOL, ILOC,
1857     $                             JLOC, RSRC, CSRC4 )
1858                              IF( MYROW.EQ.RSRC ) THEN
1859                                 CALL SGEMM( 'No transpose',
1860     $                                'No transpose', ZROWS, DIM4,
1861     $                                LNWIN, ONE, WORK( IPW2 ),
1862     $                                ZROWS,
1863     $                                WORK( IPU+LNWIN*DIM1 ), LNWIN,
1864     $                                ZERO, WORK(IPW3), ZROWS )
1865                                 CALL SLAMOV( 'All', ZROWS, DIM4,
1866     $                                WORK(IPW3), ZROWS,
1867     $                                Z((JLOC-1)*LLDZ+ILOC), LLDZ )
1868                              END IF
1869                           END IF
1870 260                    CONTINUE
1871                     END IF
1872*
1873*                    Update the rows of H.
1874*
1875                     IF( DIR.EQ.1 .AND. WANTT .AND. LENRBUF.GT.0 ) THEN
1876                        IF( LKBOT.LT.N ) THEN
1877                           IF( MYROW.EQ.RSRC1.AND.MYCOL.EQ.CSRC4 .AND.
1878     $                          MOD(LKBOT,NB).NE.0 ) THEN
1879                              INDX = LKBOT + 1
1880                              CALL INFOG2L( LKTOP, INDX, DESCH, NPROW,
1881     $                             NPCOL, MYROW, MYCOL, ILOC, JLOC,
1882     $                             RSRC1, CSRC4 )
1883                              CALL SGEMM( 'Transpose', 'No Transpose',
1884     $                             DIM1, HCOLS, LNWIN, ONE, WORK(IPU),
1885     $                             LNWIN, WORK( IPW1 ), LNWIN, ZERO,
1886     $                             WORK(IPW3), DIM1 )
1887                              CALL SLAMOV( 'All', DIM1, HCOLS,
1888     $                             WORK(IPW3), DIM1,
1889     $                             H((JLOC-1)*LLDH+ILOC), LLDH )
1890                           END IF
1891                           IF( MYROW.EQ.RSRC4.AND.MYCOL.EQ.CSRC4 .AND.
1892     $                          MOD(LKBOT,NB).NE.0 ) THEN
1893                              INDX = LKBOT + 1
1894                              CALL INFOG2L( LKTOP+DIM1, INDX, DESCH,
1895     $                             NPROW, NPCOL, MYROW, MYCOL, ILOC,
1896     $                             JLOC, RSRC4, CSRC4 )
1897                              CALL SGEMM( 'Transpose', 'No Transpose',
1898     $                             DIM4, HCOLS, LNWIN, ONE,
1899     $                             WORK( IPU+DIM1*LNWIN ), LNWIN,
1900     $                             WORK( IPW1), LNWIN, ZERO,
1901     $                             WORK(IPW3), DIM4 )
1902                              CALL SLAMOV( 'All', DIM4, HCOLS,
1903     $                             WORK(IPW3), DIM4,
1904     $                             H((JLOC-1)*LLDH+ILOC), LLDH )
1905                           END IF
1906                           INDXS = ICEIL(LKBOT,NB)*NB + 1
1907                           IF( MOD(LKBOT,NB).NE.0 ) THEN
1908                              INDXE = MIN(N,INDXS+(NPCOL-2)*NB)
1909                           ELSE
1910                              INDXE = MIN(N,INDXS+(NPCOL-1)*NB)
1911                           END IF
1912                           DO 270 INDX = INDXS, INDXE, NB
1913                              IF( MYROW.EQ.RSRC1 ) THEN
1914                                 CALL INFOG2L( LKTOP, INDX, DESCH,
1915     $                                NPROW, NPCOL, MYROW, MYCOL, ILOC,
1916     $                                JLOC, RSRC1, CSRC )
1917                                 IF( MYCOL.EQ.CSRC ) THEN
1918                                    CALL SGEMM( 'Transpose',
1919     $                                   'No Transpose', DIM1, HCOLS,
1920     $                                   LNWIN, ONE, WORK( IPU ), LNWIN,
1921     $                                   WORK( IPW1 ), LNWIN, ZERO,
1922     $                                   WORK(IPW3), DIM1 )
1923                                    CALL SLAMOV( 'All', DIM1, HCOLS,
1924     $                                   WORK(IPW3), DIM1,
1925     $                                   H((JLOC-1)*LLDH+ILOC), LLDH )
1926                                 END IF
1927                              END IF
1928                              IF( MYROW.EQ.RSRC4 ) THEN
1929                                 CALL INFOG2L( LKTOP+DIM1, INDX, DESCH,
1930     $                                NPROW, NPCOL, MYROW, MYCOL, ILOC,
1931     $                                JLOC, RSRC4, CSRC )
1932                                 IF( MYCOL.EQ.CSRC ) THEN
1933                                    CALL SGEMM( 'Transpose',
1934     $                                   'No Transpose', DIM4, HCOLS,
1935     $                                   LNWIN, ONE,
1936     $                                   WORK( IPU+LNWIN*DIM1 ), LNWIN,
1937     $                                   WORK( IPW1 ), LNWIN,
1938     $                                   ZERO, WORK(IPW3), DIM4 )
1939                                    CALL SLAMOV( 'All', DIM4, HCOLS,
1940     $                                   WORK(IPW3), DIM4,
1941     $                                   H((JLOC-1)*LLDH+ILOC), LLDH )
1942                                 END IF
1943                              END IF
1944 270                       CONTINUE
1945                        END IF
1946                     END IF
1947                  ELSE
1948*
1949*                    Update the columns of H and Z.
1950*
1951*                    Compute H2*U21 + H1*U11 on the left side of the border.
1952*
1953                     IF( DIR.EQ.2 .AND. WANTT .AND. LENCBUF.GT.0 ) THEN
1954                        INDXE = MIN(LKTOP-1,1+(NPROW-1)*NB)
1955                        DO 280 INDX = 1, INDXE, NB
1956                           IF( MYCOL.EQ.CSRC1 ) THEN
1957                              CALL INFOG2L( INDX, LKTOP, DESCH, NPROW,
1958     $                             NPCOL, MYROW, MYCOL, ILOC, JLOC,
1959     $                             RSRC, CSRC1 )
1960                              IF( MYROW.EQ.RSRC ) THEN
1961                                 CALL SLAMOV( 'All', HROWS, KS,
1962     $                                WORK( IPW+HROWS*DIM4), HROWS,
1963     $                                WORK(IPW3), HROWS )
1964                                 CALL STRMM( 'Right', 'Upper',
1965     $                                'No transpose',
1966     $                                'Non-unit', HROWS, KS, ONE,
1967     $                                WORK( IPU+DIM4 ), LNWIN,
1968     $                                WORK(IPW3), HROWS )
1969                                 CALL SGEMM( 'No transpose',
1970     $                                'No transpose', HROWS, KS, DIM4,
1971     $                                ONE, WORK( IPW ), HROWS,
1972     $                                WORK( IPU ), LNWIN, ONE,
1973     $                                WORK(IPW3), HROWS )
1974                                 CALL SLAMOV( 'All', HROWS, KS,
1975     $                                WORK(IPW3), HROWS,
1976     $                                H((JLOC-1)*LLDH+ILOC), LLDH )
1977                              END IF
1978                           END IF
1979*
1980*                          Compute H1*U12 + H2*U22 on the right side of
1981*                          the border.
1982*
1983                           IF( MYCOL.EQ.CSRC4 ) THEN
1984                              CALL INFOG2L( INDX, LKTOP+DIM1, DESCH,
1985     $                             NPROW, NPCOL, MYROW, MYCOL, ILOC,
1986     $                             JLOC, RSRC, CSRC4 )
1987                              IF( MYROW.EQ.RSRC ) THEN
1988                                 CALL SLAMOV( 'All', HROWS, DIM4,
1989     $                                WORK(IPW), HROWS, WORK( IPW3 ),
1990     $                                HROWS )
1991                                 CALL STRMM( 'Right', 'Lower',
1992     $                                'No transpose',
1993     $                                'Non-unit', HROWS, DIM4, ONE,
1994     $                                WORK( IPU+LNWIN*KS ), LNWIN,
1995     $                                WORK( IPW3 ), HROWS )
1996                                 CALL SGEMM( 'No transpose',
1997     $                                'No transpose', HROWS, DIM4, KS,
1998     $                                ONE, WORK( IPW+HROWS*DIM4),
1999     $                                HROWS,
2000     $                                WORK( IPU+LNWIN*KS+DIM4 ), LNWIN,
2001     $                                ONE, WORK( IPW3 ), HROWS )
2002                                 CALL SLAMOV( 'All', HROWS, DIM4,
2003     $                                WORK(IPW3), HROWS,
2004     $                                H((JLOC-1)*LLDH+ILOC), LLDH )
2005                              END IF
2006                           END IF
2007 280                    CONTINUE
2008                     END IF
2009*
2010                     IF( DIR.EQ.2 .AND. WANTZ .AND. LENCBUF.GT.0 ) THEN
2011*
2012*                       Compute Z2*U21 + Z1*U11 on the left side
2013*                       of border.
2014*
2015                        INDXE = MIN(N,1+(NPROW-1)*NB)
2016                        DO 290 INDX = 1, INDXE, NB
2017                           IF( MYCOL.EQ.CSRC1 ) THEN
2018                              CALL INFOG2L( INDX, I, DESCZ, NPROW,
2019     $                             NPCOL, MYROW, MYCOL, ILOC, JLOC,
2020     $                             RSRC, CSRC1 )
2021                              IF( MYROW.EQ.RSRC ) THEN
2022                                 CALL SLAMOV( 'All', ZROWS, KS,
2023     $                                WORK( IPW2+ZROWS*DIM4),
2024     $                                ZROWS, WORK(IPW3), ZROWS )
2025                                 CALL STRMM( 'Right', 'Upper',
2026     $                                'No transpose',
2027     $                                'Non-unit', ZROWS, KS, ONE,
2028     $                                WORK( IPU+DIM4 ), LNWIN,
2029     $                                WORK(IPW3), ZROWS )
2030                                 CALL SGEMM( 'No transpose',
2031     $                                'No transpose', ZROWS, KS,
2032     $                                DIM4, ONE, WORK( IPW2 ),
2033     $                                ZROWS, WORK( IPU ), LNWIN,
2034     $                                ONE, WORK(IPW3), ZROWS )
2035                                 CALL SLAMOV( 'All', ZROWS, KS,
2036     $                                WORK(IPW3), ZROWS,
2037     $                                Z((JLOC-1)*LLDZ+ILOC), LLDZ )
2038                              END IF
2039                           END IF
2040*
2041*                          Compute Z1*U12 + Z2*U22 on the right side
2042*                          of border.
2043*
2044                           IF( MYCOL.EQ.CSRC4 ) THEN
2045                              CALL INFOG2L( INDX, I+DIM1, DESCZ,
2046     $                             NPROW, NPCOL, MYROW, MYCOL, ILOC,
2047     $                             JLOC, RSRC, CSRC4 )
2048                              IF( MYROW.EQ.RSRC ) THEN
2049                                 CALL SLAMOV( 'All', ZROWS, DIM4,
2050     $                                WORK(IPW2), ZROWS,
2051     $                                WORK( IPW3 ), ZROWS )
2052                                 CALL STRMM( 'Right', 'Lower',
2053     $                                'No transpose',
2054     $                                'Non-unit', ZROWS, DIM4,
2055     $                                ONE, WORK( IPU+LNWIN*KS ),
2056     $                                LNWIN, WORK( IPW3 ), ZROWS )
2057                                 CALL SGEMM( 'No transpose',
2058     $                                'No transpose', ZROWS, DIM4,
2059     $                                KS, ONE,
2060     $                                WORK( IPW2+ZROWS*(DIM4)),
2061     $                                ZROWS,
2062     $                                WORK( IPU+LNWIN*KS+DIM4 ),
2063     $                                LNWIN, ONE, WORK( IPW3 ),
2064     $                                ZROWS )
2065                                 CALL SLAMOV( 'All', ZROWS, DIM4,
2066     $                                WORK(IPW3), ZROWS,
2067     $                                Z((JLOC-1)*LLDZ+ILOC), LLDZ )
2068                              END IF
2069                           END IF
2070 290                    CONTINUE
2071                     END IF
2072*
2073                     IF( DIR.EQ.1 .AND. WANTT .AND. LENRBUF.GT.0) THEN
2074                        IF ( LKBOT.LT.N ) THEN
2075*
2076*                          Compute U21**T*H2 + U11**T*H1 on the upper
2077*                          side of the border.
2078*
2079                           IF( MYROW.EQ.RSRC1.AND.MYCOL.EQ.CSRC4.AND.
2080     $                          MOD(LKBOT,NB).NE.0 ) THEN
2081                              INDX = LKBOT + 1
2082                              CALL INFOG2L( LKTOP, INDX, DESCH, NPROW,
2083     $                             NPCOL, MYROW, MYCOL, ILOC, JLOC,
2084     $                             RSRC1, CSRC4 )
2085                              CALL SLAMOV( 'All', KS, HCOLS,
2086     $                             WORK( IPW1+DIM4 ), LNWIN,
2087     $                             WORK(IPW3), KS )
2088                              CALL STRMM( 'Left', 'Upper', 'Transpose',
2089     $                             'Non-unit', KS, HCOLS, ONE,
2090     $                             WORK( IPU+DIM4 ), LNWIN,
2091     $                             WORK(IPW3), KS )
2092                              CALL SGEMM( 'Transpose', 'No transpose',
2093     $                             KS, HCOLS, DIM4, ONE, WORK(IPU),
2094     $                             LNWIN, WORK(IPW1), LNWIN,
2095     $                             ONE, WORK(IPW3), KS )
2096                              CALL SLAMOV( 'All', KS, HCOLS,
2097     $                             WORK(IPW3), KS,
2098     $                             H((JLOC-1)*LLDH+ILOC), LLDH )
2099                           END IF
2100*
2101*                          Compute U12**T*H1 + U22**T*H2 one the lower
2102*                          side of the border.
2103*
2104                           IF( MYROW.EQ.RSRC4.AND.MYCOL.EQ.CSRC4.AND.
2105     $                          MOD(LKBOT,NB).NE.0 ) THEN
2106                              INDX = LKBOT + 1
2107                              CALL INFOG2L( LKTOP+DIM1, INDX, DESCH,
2108     $                             NPROW, NPCOL, MYROW, MYCOL, ILOC,
2109     $                             JLOC, RSRC4, CSRC4 )
2110                              CALL SLAMOV( 'All', DIM4, HCOLS,
2111     $                             WORK( IPW1 ), LNWIN,
2112     $                             WORK( IPW3 ), DIM4 )
2113                              CALL STRMM( 'Left', 'Lower', 'Transpose',
2114     $                             'Non-unit', DIM4, HCOLS, ONE,
2115     $                             WORK( IPU+LNWIN*KS ), LNWIN,
2116     $                             WORK( IPW3 ), DIM4 )
2117                              CALL SGEMM( 'Transpose', 'No Transpose',
2118     $                             DIM4, HCOLS, KS, ONE,
2119     $                             WORK( IPU+LNWIN*KS+DIM4 ), LNWIN,
2120     $                             WORK( IPW1+DIM1 ), LNWIN,
2121     $                             ONE, WORK( IPW3), DIM4 )
2122                              CALL SLAMOV( 'All', DIM4, HCOLS,
2123     $                             WORK(IPW3), DIM4,
2124     $                             H((JLOC-1)*LLDH+ILOC), LLDH )
2125                           END IF
2126*
2127*                          Compute U21**T*H2 + U11**T*H1 on upper side
2128*                          on border.
2129*
2130                           INDXS = ICEIL(LKBOT,NB)*NB+1
2131                           IF( MOD(LKBOT,NB).NE.0 ) THEN
2132                              INDXE = MIN(N,INDXS+(NPCOL-2)*NB)
2133                           ELSE
2134                              INDXE = MIN(N,INDXS+(NPCOL-1)*NB)
2135                           END IF
2136                           DO 300 INDX = INDXS, INDXE, NB
2137                              IF( MYROW.EQ.RSRC1 ) THEN
2138                                 CALL INFOG2L( LKTOP, INDX, DESCH,
2139     $                                NPROW, NPCOL, MYROW, MYCOL, ILOC,
2140     $                                JLOC, RSRC1, CSRC )
2141                                 IF( MYCOL.EQ.CSRC ) THEN
2142                                    CALL SLAMOV( 'All', KS, HCOLS,
2143     $                                   WORK( IPW1+DIM4 ), LNWIN,
2144     $                                   WORK(IPW3), KS )
2145                                    CALL STRMM( 'Left', 'Upper',
2146     $                                   'Transpose', 'Non-unit',
2147     $                                   KS, HCOLS, ONE,
2148     $                                   WORK( IPU+DIM4 ), LNWIN,
2149     $                                   WORK(IPW3), KS )
2150                                    CALL SGEMM( 'Transpose',
2151     $                                   'No transpose', KS, HCOLS,
2152     $                                   DIM4, ONE, WORK(IPU), LNWIN,
2153     $                                   WORK(IPW1), LNWIN, ONE,
2154     $                                   WORK(IPW3), KS )
2155                                    CALL SLAMOV( 'All', KS, HCOLS,
2156     $                                   WORK(IPW3), KS,
2157     $                                   H((JLOC-1)*LLDH+ILOC), LLDH )
2158                                 END IF
2159                              END IF
2160*
2161*                             Compute U12**T*H1 + U22**T*H2 on lower
2162*                             side of border.
2163*
2164                              IF( MYROW.EQ.RSRC4 ) THEN
2165                                 CALL INFOG2L( LKTOP+DIM1, INDX, DESCH,
2166     $                                NPROW, NPCOL, MYROW, MYCOL, ILOC,
2167     $                                JLOC, RSRC4, CSRC )
2168                                 IF( MYCOL.EQ.CSRC ) THEN
2169                                    CALL SLAMOV( 'All', DIM4, HCOLS,
2170     $                                   WORK( IPW1 ), LNWIN,
2171     $                                   WORK( IPW3 ), DIM4 )
2172                                    CALL STRMM( 'Left', 'Lower',
2173     $                                   'Transpose','Non-unit',
2174     $                                   DIM4, HCOLS, ONE,
2175     $                                   WORK( IPU+LNWIN*KS ), LNWIN,
2176     $                                   WORK( IPW3 ), DIM4 )
2177                                    CALL SGEMM( 'Transpose',
2178     $                                   'No Transpose', DIM4, HCOLS,
2179     $                                   KS, ONE,
2180     $                                   WORK( IPU+LNWIN*KS+DIM4 ),
2181     $                                   LNWIN, WORK( IPW1+DIM1 ),
2182     $                                   LNWIN, ONE, WORK( IPW3),
2183     $                                   DIM4 )
2184                                    CALL SLAMOV( 'All', DIM4, HCOLS,
2185     $                                   WORK(IPW3), DIM4,
2186     $                                   H((JLOC-1)*LLDH+ILOC), LLDH )
2187                                 END IF
2188                              END IF
2189 300                       CONTINUE
2190                        END IF
2191                     END IF
2192                  END IF
2193*
2194*                 Update window information - mark processed windows are
2195*                 completed.
2196*
2197                  IF( DIR.EQ.2 ) THEN
2198                     IF( LKBOT.EQ.KBOT ) THEN
2199                        LKTOP = KBOT+1
2200                        LKBOT = KBOT+1
2201                        IWORK( 1+(WIN-1)*5 ) = LKTOP
2202                        IWORK( 2+(WIN-1)*5 ) = LKBOT
2203                     ELSE
2204                        LKTOP = MIN( LKTOP + LNWIN - LCHAIN,
2205     $                       MIN( KBOT, ICEIL( LKBOT, NB )*NB ) -
2206     $                       LCHAIN + 1 )
2207                        IWORK( 1+(WIN-1)*5 ) = LKTOP
2208                        LKBOT = MIN( MAX( LKBOT + LNWIN - LCHAIN,
2209     $                       LKTOP + NWIN - 1), MIN( KBOT,
2210     $                       ICEIL( LKBOT, NB )*NB ) )
2211                        IWORK( 2+(WIN-1)*5 ) = LKBOT
2212                     END IF
2213                     IF( IWORK( 5+(WIN-1)*5 ).EQ.1 )
2214     $                    IWORK( 5+(WIN-1)*5 ) = 2
2215                     IWORK( 3+(WIN-1)*5 ) = RSRC4
2216                     IWORK( 4+(WIN-1)*5 ) = CSRC4
2217                  END IF
2218*
2219*                 If nothing was done for the WIN:th window, all
2220*                 processors come here and consider the next one
2221*                 instead.
2222*
2223 245              CONTINUE
2224 240           CONTINUE
2225 190        CONTINUE
2226 150     CONTINUE
2227 140     CONTINUE
2228*
2229*        Chased off bulges from first window?
2230*
2231         IF( NPROCS.GT.1 )
2232     $      CALL IGAMX2D( ICTXT, 'All', '1-Tree', 1, 1, ICHOFF, 1,
2233     $           -1, -1, -1, -1, -1 )
2234*
2235*        If the bulge was chasen off from first window it is removed.
2236*
2237         IF( ICHOFF.GT.0 ) THEN
2238            DO 198 WIN = 2, ANMWIN
2239               IWORK( 1+(WIN-2)*5 ) = IWORK( 1+(WIN-1)*5 )
2240               IWORK( 2+(WIN-2)*5 ) = IWORK( 2+(WIN-1)*5 )
2241               IWORK( 3+(WIN-2)*5 ) = IWORK( 3+(WIN-1)*5 )
2242               IWORK( 4+(WIN-2)*5 ) = IWORK( 4+(WIN-1)*5 )
2243 198        CONTINUE
2244            ANMWIN = ANMWIN - 1
2245            IPIW = 6+(ANMWIN-1)*5
2246         END IF
2247*
2248*        If we have no more windows, return.
2249*
2250         IF( ANMWIN.LT.1 ) RETURN
2251*
2252*        Check for any more windows to bring over the border.
2253*
2254         WINFIN = 0
2255         DO 199 WIN = 1, ANMWIN
2256            WINFIN = WINFIN+IWORK( 5+(WIN-1)*5 )
2257 199     CONTINUE
2258         IF( WINFIN.LT.2*ANMWIN ) GO TO 137
2259*
2260*        Zero out process mark for each window - this is legal now when
2261*        the process starts over with local bulge-chasing etc.
2262*
2263         DO 201 WIN = 1, ANMWIN
2264            IWORK( 5+(WIN-1)*5 ) = 0
2265 201     CONTINUE
2266*
2267      END IF
2268*
2269*     Go back to local bulge-chase and see if there is more work to do.
2270*
2271      GO TO 20
2272*
2273*     End of PSLAQR5
2274*
2275      END
2276