1*> \brief \b DLASD2 merges the two sets of singular values together into a single sorted set. Used by sbdsdc.
2*
3*  =========== DOCUMENTATION ===========
4*
5* Online html documentation available at
6*            http://www.netlib.org/lapack/explore-html/
7*
8*> \htmlonly
9*> Download DLASD2 + dependencies
10*> <a href="http://www.netlib.org/cgi-bin/netlibfiles.tgz?format=tgz&filename=/lapack/lapack_routine/dlasd2.f">
11*> [TGZ]</a>
12*> <a href="http://www.netlib.org/cgi-bin/netlibfiles.zip?format=zip&filename=/lapack/lapack_routine/dlasd2.f">
13*> [ZIP]</a>
14*> <a href="http://www.netlib.org/cgi-bin/netlibfiles.txt?format=txt&filename=/lapack/lapack_routine/dlasd2.f">
15*> [TXT]</a>
16*> \endhtmlonly
17*
18*  Definition:
19*  ===========
20*
21*       SUBROUTINE DLASD2( NL, NR, SQRE, K, D, Z, ALPHA, BETA, U, LDU, VT,
22*                          LDVT, DSIGMA, U2, LDU2, VT2, LDVT2, IDXP, IDX,
23*                          IDXC, IDXQ, COLTYP, INFO )
24*
25*       .. Scalar Arguments ..
26*       INTEGER            INFO, K, LDU, LDU2, LDVT, LDVT2, NL, NR, SQRE
27*       DOUBLE PRECISION   ALPHA, BETA
28*       ..
29*       .. Array Arguments ..
30*       INTEGER            COLTYP( * ), IDX( * ), IDXC( * ), IDXP( * ),
31*      $                   IDXQ( * )
32*       DOUBLE PRECISION   D( * ), DSIGMA( * ), U( LDU, * ),
33*      $                   U2( LDU2, * ), VT( LDVT, * ), VT2( LDVT2, * ),
34*      $                   Z( * )
35*       ..
36*
37*
38*> \par Purpose:
39*  =============
40*>
41*> \verbatim
42*>
43*> DLASD2 merges the two sets of singular values together into a single
44*> sorted set.  Then it tries to deflate the size of the problem.
45*> There are two ways in which deflation can occur:  when two or more
46*> singular values are close together or if there is a tiny entry in the
47*> Z vector.  For each such occurrence the order of the related secular
48*> equation problem is reduced by one.
49*>
50*> DLASD2 is called from DLASD1.
51*> \endverbatim
52*
53*  Arguments:
54*  ==========
55*
56*> \param[in] NL
57*> \verbatim
58*>          NL is INTEGER
59*>         The row dimension of the upper block.  NL >= 1.
60*> \endverbatim
61*>
62*> \param[in] NR
63*> \verbatim
64*>          NR is INTEGER
65*>         The row dimension of the lower block.  NR >= 1.
66*> \endverbatim
67*>
68*> \param[in] SQRE
69*> \verbatim
70*>          SQRE is INTEGER
71*>         = 0: the lower block is an NR-by-NR square matrix.
72*>         = 1: the lower block is an NR-by-(NR+1) rectangular matrix.
73*>
74*>         The bidiagonal matrix has N = NL + NR + 1 rows and
75*>         M = N + SQRE >= N columns.
76*> \endverbatim
77*>
78*> \param[out] K
79*> \verbatim
80*>          K is INTEGER
81*>         Contains the dimension of the non-deflated matrix,
82*>         This is the order of the related secular equation. 1 <= K <=N.
83*> \endverbatim
84*>
85*> \param[in,out] D
86*> \verbatim
87*>          D is DOUBLE PRECISION array, dimension(N)
88*>         On entry D contains the singular values of the two submatrices
89*>         to be combined.  On exit D contains the trailing (N-K) updated
90*>         singular values (those which were deflated) sorted into
91*>         increasing order.
92*> \endverbatim
93*>
94*> \param[out] Z
95*> \verbatim
96*>          Z is DOUBLE PRECISION array, dimension(N)
97*>         On exit Z contains the updating row vector in the secular
98*>         equation.
99*> \endverbatim
100*>
101*> \param[in] ALPHA
102*> \verbatim
103*>          ALPHA is DOUBLE PRECISION
104*>         Contains the diagonal element associated with the added row.
105*> \endverbatim
106*>
107*> \param[in] BETA
108*> \verbatim
109*>          BETA is DOUBLE PRECISION
110*>         Contains the off-diagonal element associated with the added
111*>         row.
112*> \endverbatim
113*>
114*> \param[in,out] U
115*> \verbatim
116*>          U is DOUBLE PRECISION array, dimension(LDU,N)
117*>         On entry U contains the left singular vectors of two
118*>         submatrices in the two square blocks with corners at (1,1),
119*>         (NL, NL), and (NL+2, NL+2), (N,N).
120*>         On exit U contains the trailing (N-K) updated left singular
121*>         vectors (those which were deflated) in its last N-K columns.
122*> \endverbatim
123*>
124*> \param[in] LDU
125*> \verbatim
126*>          LDU is INTEGER
127*>         The leading dimension of the array U.  LDU >= N.
128*> \endverbatim
129*>
130*> \param[in,out] VT
131*> \verbatim
132*>          VT is DOUBLE PRECISION array, dimension(LDVT,M)
133*>         On entry VT**T contains the right singular vectors of two
134*>         submatrices in the two square blocks with corners at (1,1),
135*>         (NL+1, NL+1), and (NL+2, NL+2), (M,M).
136*>         On exit VT**T contains the trailing (N-K) updated right singular
137*>         vectors (those which were deflated) in its last N-K columns.
138*>         In case SQRE =1, the last row of VT spans the right null
139*>         space.
140*> \endverbatim
141*>
142*> \param[in] LDVT
143*> \verbatim
144*>          LDVT is INTEGER
145*>         The leading dimension of the array VT.  LDVT >= M.
146*> \endverbatim
147*>
148*> \param[out] DSIGMA
149*> \verbatim
150*>          DSIGMA is DOUBLE PRECISION array, dimension (N)
151*>         Contains a copy of the diagonal elements (K-1 singular values
152*>         and one zero) in the secular equation.
153*> \endverbatim
154*>
155*> \param[out] U2
156*> \verbatim
157*>          U2 is DOUBLE PRECISION array, dimension(LDU2,N)
158*>         Contains a copy of the first K-1 left singular vectors which
159*>         will be used by DLASD3 in a matrix multiply (DGEMM) to solve
160*>         for the new left singular vectors. U2 is arranged into four
161*>         blocks. The first block contains a column with 1 at NL+1 and
162*>         zero everywhere else; the second block contains non-zero
163*>         entries only at and above NL; the third contains non-zero
164*>         entries only below NL+1; and the fourth is dense.
165*> \endverbatim
166*>
167*> \param[in] LDU2
168*> \verbatim
169*>          LDU2 is INTEGER
170*>         The leading dimension of the array U2.  LDU2 >= N.
171*> \endverbatim
172*>
173*> \param[out] VT2
174*> \verbatim
175*>          VT2 is DOUBLE PRECISION array, dimension(LDVT2,N)
176*>         VT2**T contains a copy of the first K right singular vectors
177*>         which will be used by DLASD3 in a matrix multiply (DGEMM) to
178*>         solve for the new right singular vectors. VT2 is arranged into
179*>         three blocks. The first block contains a row that corresponds
180*>         to the special 0 diagonal element in SIGMA; the second block
181*>         contains non-zeros only at and before NL +1; the third block
182*>         contains non-zeros only at and after  NL +2.
183*> \endverbatim
184*>
185*> \param[in] LDVT2
186*> \verbatim
187*>          LDVT2 is INTEGER
188*>         The leading dimension of the array VT2.  LDVT2 >= M.
189*> \endverbatim
190*>
191*> \param[out] IDXP
192*> \verbatim
193*>          IDXP is INTEGER array, dimension(N)
194*>         This will contain the permutation used to place deflated
195*>         values of D at the end of the array. On output IDXP(2:K)
196*>         points to the nondeflated D-values and IDXP(K+1:N)
197*>         points to the deflated singular values.
198*> \endverbatim
199*>
200*> \param[out] IDX
201*> \verbatim
202*>          IDX is INTEGER array, dimension(N)
203*>         This will contain the permutation used to sort the contents of
204*>         D into ascending order.
205*> \endverbatim
206*>
207*> \param[out] IDXC
208*> \verbatim
209*>          IDXC is INTEGER array, dimension(N)
210*>         This will contain the permutation used to arrange the columns
211*>         of the deflated U matrix into three groups:  the first group
212*>         contains non-zero entries only at and above NL, the second
213*>         contains non-zero entries only below NL+2, and the third is
214*>         dense.
215*> \endverbatim
216*>
217*> \param[in,out] IDXQ
218*> \verbatim
219*>          IDXQ is INTEGER array, dimension(N)
220*>         This contains the permutation which separately sorts the two
221*>         sub-problems in D into ascending order.  Note that entries in
222*>         the first hlaf of this permutation must first be moved one
223*>         position backward; and entries in the second half
224*>         must first have NL+1 added to their values.
225*> \endverbatim
226*>
227*> \param[out] COLTYP
228*> \verbatim
229*>          COLTYP is INTEGER array, dimension(N)
230*>         As workspace, this will contain a label which will indicate
231*>         which of the following types a column in the U2 matrix or a
232*>         row in the VT2 matrix is:
233*>         1 : non-zero in the upper half only
234*>         2 : non-zero in the lower half only
235*>         3 : dense
236*>         4 : deflated
237*>
238*>         On exit, it is an array of dimension 4, with COLTYP(I) being
239*>         the dimension of the I-th type columns.
240*> \endverbatim
241*>
242*> \param[out] INFO
243*> \verbatim
244*>          INFO is INTEGER
245*>          = 0:  successful exit.
246*>          < 0:  if INFO = -i, the i-th argument had an illegal value.
247*> \endverbatim
248*
249*  Authors:
250*  ========
251*
252*> \author Univ. of Tennessee
253*> \author Univ. of California Berkeley
254*> \author Univ. of Colorado Denver
255*> \author NAG Ltd.
256*
257*> \ingroup OTHERauxiliary
258*
259*> \par Contributors:
260*  ==================
261*>
262*>     Ming Gu and Huan Ren, Computer Science Division, University of
263*>     California at Berkeley, USA
264*>
265*  =====================================================================
266      SUBROUTINE DLASD2( NL, NR, SQRE, K, D, Z, ALPHA, BETA, U, LDU, VT,
267     $                   LDVT, DSIGMA, U2, LDU2, VT2, LDVT2, IDXP, IDX,
268     $                   IDXC, IDXQ, COLTYP, INFO )
269*
270*  -- LAPACK auxiliary routine --
271*  -- LAPACK is a software package provided by Univ. of Tennessee,    --
272*  -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..--
273*
274*     .. Scalar Arguments ..
275      INTEGER            INFO, K, LDU, LDU2, LDVT, LDVT2, NL, NR, SQRE
276      DOUBLE PRECISION   ALPHA, BETA
277*     ..
278*     .. Array Arguments ..
279      INTEGER            COLTYP( * ), IDX( * ), IDXC( * ), IDXP( * ),
280     $                   IDXQ( * )
281      DOUBLE PRECISION   D( * ), DSIGMA( * ), U( LDU, * ),
282     $                   U2( LDU2, * ), VT( LDVT, * ), VT2( LDVT2, * ),
283     $                   Z( * )
284*     ..
285*
286*  =====================================================================
287*
288*     .. Parameters ..
289      DOUBLE PRECISION   ZERO, ONE, TWO, EIGHT
290      PARAMETER          ( ZERO = 0.0D+0, ONE = 1.0D+0, TWO = 2.0D+0,
291     $                   EIGHT = 8.0D+0 )
292*     ..
293*     .. Local Arrays ..
294      INTEGER            CTOT( 4 ), PSM( 4 )
295*     ..
296*     .. Local Scalars ..
297      INTEGER            CT, I, IDXI, IDXJ, IDXJP, J, JP, JPREV, K2, M,
298     $                   N, NLP1, NLP2
299      DOUBLE PRECISION   C, EPS, HLFTOL, S, TAU, TOL, Z1
300*     ..
301*     .. External Functions ..
302      DOUBLE PRECISION   DLAMCH, DLAPY2
303      EXTERNAL           DLAMCH, DLAPY2
304*     ..
305*     .. External Subroutines ..
306      EXTERNAL           DCOPY, DLACPY, DLAMRG, DLASET, DROT, XERBLA
307*     ..
308*     .. Intrinsic Functions ..
309      INTRINSIC          ABS, MAX
310*     ..
311*     .. Executable Statements ..
312*
313*     Test the input parameters.
314*
315      INFO = 0
316*
317      IF( NL.LT.1 ) THEN
318         INFO = -1
319      ELSE IF( NR.LT.1 ) THEN
320         INFO = -2
321      ELSE IF( ( SQRE.NE.1 ) .AND. ( SQRE.NE.0 ) ) THEN
322         INFO = -3
323      END IF
324*
325      N = NL + NR + 1
326      M = N + SQRE
327*
328      IF( LDU.LT.N ) THEN
329         INFO = -10
330      ELSE IF( LDVT.LT.M ) THEN
331         INFO = -12
332      ELSE IF( LDU2.LT.N ) THEN
333         INFO = -15
334      ELSE IF( LDVT2.LT.M ) THEN
335         INFO = -17
336      END IF
337      IF( INFO.NE.0 ) THEN
338         CALL XERBLA( 'DLASD2', -INFO )
339         RETURN
340      END IF
341*
342      NLP1 = NL + 1
343      NLP2 = NL + 2
344*
345*     Generate the first part of the vector Z; and move the singular
346*     values in the first part of D one position backward.
347*
348      Z1 = ALPHA*VT( NLP1, NLP1 )
349      Z( 1 ) = Z1
350      DO 10 I = NL, 1, -1
351         Z( I+1 ) = ALPHA*VT( I, NLP1 )
352         D( I+1 ) = D( I )
353         IDXQ( I+1 ) = IDXQ( I ) + 1
354   10 CONTINUE
355*
356*     Generate the second part of the vector Z.
357*
358      DO 20 I = NLP2, M
359         Z( I ) = BETA*VT( I, NLP2 )
360   20 CONTINUE
361*
362*     Initialize some reference arrays.
363*
364      DO 30 I = 2, NLP1
365         COLTYP( I ) = 1
366   30 CONTINUE
367      DO 40 I = NLP2, N
368         COLTYP( I ) = 2
369   40 CONTINUE
370*
371*     Sort the singular values into increasing order
372*
373      DO 50 I = NLP2, N
374         IDXQ( I ) = IDXQ( I ) + NLP1
375   50 CONTINUE
376*
377*     DSIGMA, IDXC, IDXC, and the first column of U2
378*     are used as storage space.
379*
380      DO 60 I = 2, N
381         DSIGMA( I ) = D( IDXQ( I ) )
382         U2( I, 1 ) = Z( IDXQ( I ) )
383         IDXC( I ) = COLTYP( IDXQ( I ) )
384   60 CONTINUE
385*
386      CALL DLAMRG( NL, NR, DSIGMA( 2 ), 1, 1, IDX( 2 ) )
387*
388      DO 70 I = 2, N
389         IDXI = 1 + IDX( I )
390         D( I ) = DSIGMA( IDXI )
391         Z( I ) = U2( IDXI, 1 )
392         COLTYP( I ) = IDXC( IDXI )
393   70 CONTINUE
394*
395*     Calculate the allowable deflation tolerance
396*
397      EPS = DLAMCH( 'Epsilon' )
398      TOL = MAX( ABS( ALPHA ), ABS( BETA ) )
399      TOL = EIGHT*EPS*MAX( ABS( D( N ) ), TOL )
400*
401*     There are 2 kinds of deflation -- first a value in the z-vector
402*     is small, second two (or more) singular values are very close
403*     together (their difference is small).
404*
405*     If the value in the z-vector is small, we simply permute the
406*     array so that the corresponding singular value is moved to the
407*     end.
408*
409*     If two values in the D-vector are close, we perform a two-sided
410*     rotation designed to make one of the corresponding z-vector
411*     entries zero, and then permute the array so that the deflated
412*     singular value is moved to the end.
413*
414*     If there are multiple singular values then the problem deflates.
415*     Here the number of equal singular values are found.  As each equal
416*     singular value is found, an elementary reflector is computed to
417*     rotate the corresponding singular subspace so that the
418*     corresponding components of Z are zero in this new basis.
419*
420      K = 1
421      K2 = N + 1
422      DO 80 J = 2, N
423         IF( ABS( Z( J ) ).LE.TOL ) THEN
424*
425*           Deflate due to small z component.
426*
427            K2 = K2 - 1
428            IDXP( K2 ) = J
429            COLTYP( J ) = 4
430            IF( J.EQ.N )
431     $         GO TO 120
432         ELSE
433            JPREV = J
434            GO TO 90
435         END IF
436   80 CONTINUE
437   90 CONTINUE
438      J = JPREV
439  100 CONTINUE
440      J = J + 1
441      IF( J.GT.N )
442     $   GO TO 110
443      IF( ABS( Z( J ) ).LE.TOL ) THEN
444*
445*        Deflate due to small z component.
446*
447         K2 = K2 - 1
448         IDXP( K2 ) = J
449         COLTYP( J ) = 4
450      ELSE
451*
452*        Check if singular values are close enough to allow deflation.
453*
454         IF( ABS( D( J )-D( JPREV ) ).LE.TOL ) THEN
455*
456*           Deflation is possible.
457*
458            S = Z( JPREV )
459            C = Z( J )
460*
461*           Find sqrt(a**2+b**2) without overflow or
462*           destructive underflow.
463*
464            TAU = DLAPY2( C, S )
465            C = C / TAU
466            S = -S / TAU
467            Z( J ) = TAU
468            Z( JPREV ) = ZERO
469*
470*           Apply back the Givens rotation to the left and right
471*           singular vector matrices.
472*
473            IDXJP = IDXQ( IDX( JPREV )+1 )
474            IDXJ = IDXQ( IDX( J )+1 )
475            IF( IDXJP.LE.NLP1 ) THEN
476               IDXJP = IDXJP - 1
477            END IF
478            IF( IDXJ.LE.NLP1 ) THEN
479               IDXJ = IDXJ - 1
480            END IF
481            CALL DROT( N, U( 1, IDXJP ), 1, U( 1, IDXJ ), 1, C, S )
482            CALL DROT( M, VT( IDXJP, 1 ), LDVT, VT( IDXJ, 1 ), LDVT, C,
483     $                 S )
484            IF( COLTYP( J ).NE.COLTYP( JPREV ) ) THEN
485               COLTYP( J ) = 3
486            END IF
487            COLTYP( JPREV ) = 4
488            K2 = K2 - 1
489            IDXP( K2 ) = JPREV
490            JPREV = J
491         ELSE
492            K = K + 1
493            U2( K, 1 ) = Z( JPREV )
494            DSIGMA( K ) = D( JPREV )
495            IDXP( K ) = JPREV
496            JPREV = J
497         END IF
498      END IF
499      GO TO 100
500  110 CONTINUE
501*
502*     Record the last singular value.
503*
504      K = K + 1
505      U2( K, 1 ) = Z( JPREV )
506      DSIGMA( K ) = D( JPREV )
507      IDXP( K ) = JPREV
508*
509  120 CONTINUE
510*
511*     Count up the total number of the various types of columns, then
512*     form a permutation which positions the four column types into
513*     four groups of uniform structure (although one or more of these
514*     groups may be empty).
515*
516      DO 130 J = 1, 4
517         CTOT( J ) = 0
518  130 CONTINUE
519      DO 140 J = 2, N
520         CT = COLTYP( J )
521         CTOT( CT ) = CTOT( CT ) + 1
522  140 CONTINUE
523*
524*     PSM(*) = Position in SubMatrix (of types 1 through 4)
525*
526      PSM( 1 ) = 2
527      PSM( 2 ) = 2 + CTOT( 1 )
528      PSM( 3 ) = PSM( 2 ) + CTOT( 2 )
529      PSM( 4 ) = PSM( 3 ) + CTOT( 3 )
530*
531*     Fill out the IDXC array so that the permutation which it induces
532*     will place all type-1 columns first, all type-2 columns next,
533*     then all type-3's, and finally all type-4's, starting from the
534*     second column. This applies similarly to the rows of VT.
535*
536      DO 150 J = 2, N
537         JP = IDXP( J )
538         CT = COLTYP( JP )
539         IDXC( PSM( CT ) ) = J
540         PSM( CT ) = PSM( CT ) + 1
541  150 CONTINUE
542*
543*     Sort the singular values and corresponding singular vectors into
544*     DSIGMA, U2, and VT2 respectively.  The singular values/vectors
545*     which were not deflated go into the first K slots of DSIGMA, U2,
546*     and VT2 respectively, while those which were deflated go into the
547*     last N - K slots, except that the first column/row will be treated
548*     separately.
549*
550      DO 160 J = 2, N
551         JP = IDXP( J )
552         DSIGMA( J ) = D( JP )
553         IDXJ = IDXQ( IDX( IDXP( IDXC( J ) ) )+1 )
554         IF( IDXJ.LE.NLP1 ) THEN
555            IDXJ = IDXJ - 1
556         END IF
557         CALL DCOPY( N, U( 1, IDXJ ), 1, U2( 1, J ), 1 )
558         CALL DCOPY( M, VT( IDXJ, 1 ), LDVT, VT2( J, 1 ), LDVT2 )
559  160 CONTINUE
560*
561*     Determine DSIGMA(1), DSIGMA(2) and Z(1)
562*
563      DSIGMA( 1 ) = ZERO
564      HLFTOL = TOL / TWO
565      IF( ABS( DSIGMA( 2 ) ).LE.HLFTOL )
566     $   DSIGMA( 2 ) = HLFTOL
567      IF( M.GT.N ) THEN
568         Z( 1 ) = DLAPY2( Z1, Z( M ) )
569         IF( Z( 1 ).LE.TOL ) THEN
570            C = ONE
571            S = ZERO
572            Z( 1 ) = TOL
573         ELSE
574            C = Z1 / Z( 1 )
575            S = Z( M ) / Z( 1 )
576         END IF
577      ELSE
578         IF( ABS( Z1 ).LE.TOL ) THEN
579            Z( 1 ) = TOL
580         ELSE
581            Z( 1 ) = Z1
582         END IF
583      END IF
584*
585*     Move the rest of the updating row to Z.
586*
587      CALL DCOPY( K-1, U2( 2, 1 ), 1, Z( 2 ), 1 )
588*
589*     Determine the first column of U2, the first row of VT2 and the
590*     last row of VT.
591*
592      CALL DLASET( 'A', N, 1, ZERO, ZERO, U2, LDU2 )
593      U2( NLP1, 1 ) = ONE
594      IF( M.GT.N ) THEN
595         DO 170 I = 1, NLP1
596            VT( M, I ) = -S*VT( NLP1, I )
597            VT2( 1, I ) = C*VT( NLP1, I )
598  170    CONTINUE
599         DO 180 I = NLP2, M
600            VT2( 1, I ) = S*VT( M, I )
601            VT( M, I ) = C*VT( M, I )
602  180    CONTINUE
603      ELSE
604         CALL DCOPY( M, VT( NLP1, 1 ), LDVT, VT2( 1, 1 ), LDVT2 )
605      END IF
606      IF( M.GT.N ) THEN
607         CALL DCOPY( M, VT( M, 1 ), LDVT, VT2( M, 1 ), LDVT2 )
608      END IF
609*
610*     The deflated singular values and their corresponding vectors go
611*     into the back of D, U, and V respectively.
612*
613      IF( N.GT.K ) THEN
614         CALL DCOPY( N-K, DSIGMA( K+1 ), 1, D( K+1 ), 1 )
615         CALL DLACPY( 'A', N, N-K, U2( 1, K+1 ), LDU2, U( 1, K+1 ),
616     $                LDU )
617         CALL DLACPY( 'A', N-K, M, VT2( K+1, 1 ), LDVT2, VT( K+1, 1 ),
618     $                LDVT )
619      END IF
620*
621*     Copy CTOT into COLTYP for referencing in DLASD3.
622*
623      DO 190 J = 1, 4
624         COLTYP( J ) = CTOT( J )
625  190 CONTINUE
626*
627      RETURN
628*
629*     End of DLASD2
630*
631      END
632