1*> \brief \b SLASD3 finds all square roots of the roots of the secular equation, as defined by the values in D and Z, and then updates the singular vectors by matrix multiplication. Used by sbdsdc.
2*
3*  =========== DOCUMENTATION ===========
4*
5* Online html documentation available at
6*            http://www.netlib.org/lapack/explore-html/
7*
8*> \htmlonly
9*> Download SLASD3 + dependencies
10*> <a href="http://www.netlib.org/cgi-bin/netlibfiles.tgz?format=tgz&filename=/lapack/lapack_routine/slasd3.f">
11*> [TGZ]</a>
12*> <a href="http://www.netlib.org/cgi-bin/netlibfiles.zip?format=zip&filename=/lapack/lapack_routine/slasd3.f">
13*> [ZIP]</a>
14*> <a href="http://www.netlib.org/cgi-bin/netlibfiles.txt?format=txt&filename=/lapack/lapack_routine/slasd3.f">
15*> [TXT]</a>
16*> \endhtmlonly
17*
18*  Definition:
19*  ===========
20*
21*       SUBROUTINE SLASD3( NL, NR, SQRE, K, D, Q, LDQ, DSIGMA, U, LDU, U2,
22*                          LDU2, VT, LDVT, VT2, LDVT2, IDXC, CTOT, Z,
23*                          INFO )
24*
25*       .. Scalar Arguments ..
26*       INTEGER            INFO, K, LDQ, LDU, LDU2, LDVT, LDVT2, NL, NR,
27*      $                   SQRE
28*       ..
29*       .. Array Arguments ..
30*       INTEGER            CTOT( * ), IDXC( * )
31*       REAL               D( * ), DSIGMA( * ), Q( LDQ, * ), U( LDU, * ),
32*      $                   U2( LDU2, * ), VT( LDVT, * ), VT2( LDVT2, * ),
33*      $                   Z( * )
34*       ..
35*
36*
37*> \par Purpose:
38*  =============
39*>
40*> \verbatim
41*>
42*> SLASD3 finds all the square roots of the roots of the secular
43*> equation, as defined by the values in D and Z.  It makes the
44*> appropriate calls to SLASD4 and then updates the singular
45*> vectors by matrix multiplication.
46*>
47*> This code makes very mild assumptions about floating point
48*> arithmetic. It will work on machines with a guard digit in
49*> add/subtract, or on those binary machines without guard digits
50*> which subtract like the Cray XMP, Cray YMP, Cray C 90, or Cray 2.
51*> It could conceivably fail on hexadecimal or decimal machines
52*> without guard digits, but we know of none.
53*>
54*> SLASD3 is called from SLASD1.
55*> \endverbatim
56*
57*  Arguments:
58*  ==========
59*
60*> \param[in] NL
61*> \verbatim
62*>          NL is INTEGER
63*>         The row dimension of the upper block.  NL >= 1.
64*> \endverbatim
65*>
66*> \param[in] NR
67*> \verbatim
68*>          NR is INTEGER
69*>         The row dimension of the lower block.  NR >= 1.
70*> \endverbatim
71*>
72*> \param[in] SQRE
73*> \verbatim
74*>          SQRE is INTEGER
75*>         = 0: the lower block is an NR-by-NR square matrix.
76*>         = 1: the lower block is an NR-by-(NR+1) rectangular matrix.
77*>
78*>         The bidiagonal matrix has N = NL + NR + 1 rows and
79*>         M = N + SQRE >= N columns.
80*> \endverbatim
81*>
82*> \param[in] K
83*> \verbatim
84*>          K is INTEGER
85*>         The size of the secular equation, 1 =< K = < N.
86*> \endverbatim
87*>
88*> \param[out] D
89*> \verbatim
90*>          D is REAL array, dimension(K)
91*>         On exit the square roots of the roots of the secular equation,
92*>         in ascending order.
93*> \endverbatim
94*>
95*> \param[out] Q
96*> \verbatim
97*>          Q is REAL array,
98*>                     dimension at least (LDQ,K).
99*> \endverbatim
100*>
101*> \param[in] LDQ
102*> \verbatim
103*>          LDQ is INTEGER
104*>         The leading dimension of the array Q.  LDQ >= K.
105*> \endverbatim
106*>
107*> \param[in,out] DSIGMA
108*> \verbatim
109*>          DSIGMA is REAL array, dimension(K)
110*>         The first K elements of this array contain the old roots
111*>         of the deflated updating problem.  These are the poles
112*>         of the secular equation.
113*> \endverbatim
114*>
115*> \param[out] U
116*> \verbatim
117*>          U is REAL array, dimension (LDU, N)
118*>         The last N - K columns of this matrix contain the deflated
119*>         left singular vectors.
120*> \endverbatim
121*>
122*> \param[in] LDU
123*> \verbatim
124*>          LDU is INTEGER
125*>         The leading dimension of the array U.  LDU >= N.
126*> \endverbatim
127*>
128*> \param[in] U2
129*> \verbatim
130*>          U2 is REAL array, dimension (LDU2, N)
131*>         The first K columns of this matrix contain the non-deflated
132*>         left singular vectors for the split problem.
133*> \endverbatim
134*>
135*> \param[in] LDU2
136*> \verbatim
137*>          LDU2 is INTEGER
138*>         The leading dimension of the array U2.  LDU2 >= N.
139*> \endverbatim
140*>
141*> \param[out] VT
142*> \verbatim
143*>          VT is REAL array, dimension (LDVT, M)
144*>         The last M - K columns of VT**T contain the deflated
145*>         right singular vectors.
146*> \endverbatim
147*>
148*> \param[in] LDVT
149*> \verbatim
150*>          LDVT is INTEGER
151*>         The leading dimension of the array VT.  LDVT >= N.
152*> \endverbatim
153*>
154*> \param[in,out] VT2
155*> \verbatim
156*>          VT2 is REAL array, dimension (LDVT2, N)
157*>         The first K columns of VT2**T contain the non-deflated
158*>         right singular vectors for the split problem.
159*> \endverbatim
160*>
161*> \param[in] LDVT2
162*> \verbatim
163*>          LDVT2 is INTEGER
164*>         The leading dimension of the array VT2.  LDVT2 >= N.
165*> \endverbatim
166*>
167*> \param[in] IDXC
168*> \verbatim
169*>          IDXC is INTEGER array, dimension (N)
170*>         The permutation used to arrange the columns of U (and rows of
171*>         VT) into three groups:  the first group contains non-zero
172*>         entries only at and above (or before) NL +1; the second
173*>         contains non-zero entries only at and below (or after) NL+2;
174*>         and the third is dense. The first column of U and the row of
175*>         VT are treated separately, however.
176*>
177*>         The rows of the singular vectors found by SLASD4
178*>         must be likewise permuted before the matrix multiplies can
179*>         take place.
180*> \endverbatim
181*>
182*> \param[in] CTOT
183*> \verbatim
184*>          CTOT is INTEGER array, dimension (4)
185*>         A count of the total number of the various types of columns
186*>         in U (or rows in VT), as described in IDXC. The fourth column
187*>         type is any column which has been deflated.
188*> \endverbatim
189*>
190*> \param[in,out] Z
191*> \verbatim
192*>          Z is REAL array, dimension (K)
193*>         The first K elements of this array contain the components
194*>         of the deflation-adjusted updating row vector.
195*> \endverbatim
196*>
197*> \param[out] INFO
198*> \verbatim
199*>          INFO is INTEGER
200*>         = 0:  successful exit.
201*>         < 0:  if INFO = -i, the i-th argument had an illegal value.
202*>         > 0:  if INFO = 1, a singular value did not converge
203*> \endverbatim
204*
205*  Authors:
206*  ========
207*
208*> \author Univ. of Tennessee
209*> \author Univ. of California Berkeley
210*> \author Univ. of Colorado Denver
211*> \author NAG Ltd.
212*
213*> \date November 2015
214*
215*> \ingroup auxOTHERauxiliary
216*
217*> \par Contributors:
218*  ==================
219*>
220*>     Ming Gu and Huan Ren, Computer Science Division, University of
221*>     California at Berkeley, USA
222*>
223*  =====================================================================
224      SUBROUTINE SLASD3( NL, NR, SQRE, K, D, Q, LDQ, DSIGMA, U, LDU, U2,
225     $                   LDU2, VT, LDVT, VT2, LDVT2, IDXC, CTOT, Z,
226     $                   INFO )
227*
228*  -- LAPACK auxiliary routine (version 3.6.0) --
229*  -- LAPACK is a software package provided by Univ. of Tennessee,    --
230*  -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..--
231*     November 2015
232*
233*     .. Scalar Arguments ..
234      INTEGER            INFO, K, LDQ, LDU, LDU2, LDVT, LDVT2, NL, NR,
235     $                   SQRE
236*     ..
237*     .. Array Arguments ..
238      INTEGER            CTOT( * ), IDXC( * )
239      REAL               D( * ), DSIGMA( * ), Q( LDQ, * ), U( LDU, * ),
240     $                   U2( LDU2, * ), VT( LDVT, * ), VT2( LDVT2, * ),
241     $                   Z( * )
242*     ..
243*
244*  =====================================================================
245*
246*     .. Parameters ..
247      REAL               ONE, ZERO, NEGONE
248      PARAMETER          ( ONE = 1.0E+0, ZERO = 0.0E+0,
249     $                     NEGONE = -1.0E+0 )
250*     ..
251*     .. Local Scalars ..
252      INTEGER            CTEMP, I, J, JC, KTEMP, M, N, NLP1, NLP2, NRP1
253      REAL               RHO, TEMP
254*     ..
255*     .. External Functions ..
256      REAL               SLAMC3, SNRM2
257      EXTERNAL           SLAMC3, SNRM2
258*     ..
259*     .. External Subroutines ..
260      EXTERNAL           SCOPY, SGEMM, SLACPY, SLASCL, SLASD4, XERBLA
261*     ..
262*     .. Intrinsic Functions ..
263      INTRINSIC          ABS, SIGN, SQRT
264*     ..
265*     .. Executable Statements ..
266*
267*     Test the input parameters.
268*
269      INFO = 0
270*
271      IF( NL.LT.1 ) THEN
272         INFO = -1
273      ELSE IF( NR.LT.1 ) THEN
274         INFO = -2
275      ELSE IF( ( SQRE.NE.1 ) .AND. ( SQRE.NE.0 ) ) THEN
276         INFO = -3
277      END IF
278*
279      N = NL + NR + 1
280      M = N + SQRE
281      NLP1 = NL + 1
282      NLP2 = NL + 2
283*
284      IF( ( K.LT.1 ) .OR. ( K.GT.N ) ) THEN
285         INFO = -4
286      ELSE IF( LDQ.LT.K ) THEN
287         INFO = -7
288      ELSE IF( LDU.LT.N ) THEN
289         INFO = -10
290      ELSE IF( LDU2.LT.N ) THEN
291         INFO = -12
292      ELSE IF( LDVT.LT.M ) THEN
293         INFO = -14
294      ELSE IF( LDVT2.LT.M ) THEN
295         INFO = -16
296      END IF
297      IF( INFO.NE.0 ) THEN
298         CALL XERBLA( 'SLASD3', -INFO )
299         RETURN
300      END IF
301*
302*     Quick return if possible
303*
304      IF( K.EQ.1 ) THEN
305         D( 1 ) = ABS( Z( 1 ) )
306         CALL SCOPY( M, VT2( 1, 1 ), LDVT2, VT( 1, 1 ), LDVT )
307         IF( Z( 1 ).GT.ZERO ) THEN
308            CALL SCOPY( N, U2( 1, 1 ), 1, U( 1, 1 ), 1 )
309         ELSE
310            DO 10 I = 1, N
311               U( I, 1 ) = -U2( I, 1 )
312   10       CONTINUE
313         END IF
314         RETURN
315      END IF
316*
317*     Modify values DSIGMA(i) to make sure all DSIGMA(i)-DSIGMA(j) can
318*     be computed with high relative accuracy (barring over/underflow).
319*     This is a problem on machines without a guard digit in
320*     add/subtract (Cray XMP, Cray YMP, Cray C 90 and Cray 2).
321*     The following code replaces DSIGMA(I) by 2*DSIGMA(I)-DSIGMA(I),
322*     which on any of these machines zeros out the bottommost
323*     bit of DSIGMA(I) if it is 1; this makes the subsequent
324*     subtractions DSIGMA(I)-DSIGMA(J) unproblematic when cancellation
325*     occurs. On binary machines with a guard digit (almost all
326*     machines) it does not change DSIGMA(I) at all. On hexadecimal
327*     and decimal machines with a guard digit, it slightly
328*     changes the bottommost bits of DSIGMA(I). It does not account
329*     for hexadecimal or decimal machines without guard digits
330*     (we know of none). We use a subroutine call to compute
331*     2*DSIGMA(I) to prevent optimizing compilers from eliminating
332*     this code.
333*
334      DO 20 I = 1, K
335         DSIGMA( I ) = SLAMC3( DSIGMA( I ), DSIGMA( I ) ) - DSIGMA( I )
336   20 CONTINUE
337*
338*     Keep a copy of Z.
339*
340      CALL SCOPY( K, Z, 1, Q, 1 )
341*
342*     Normalize Z.
343*
344      RHO = SNRM2( K, Z, 1 )
345      CALL SLASCL( 'G', 0, 0, RHO, ONE, K, 1, Z, K, INFO )
346      RHO = RHO*RHO
347*
348*     Find the new singular values.
349*
350      DO 30 J = 1, K
351         CALL SLASD4( K, J, DSIGMA, Z, U( 1, J ), RHO, D( J ),
352     $                VT( 1, J ), INFO )
353*
354*        If the zero finder fails, report the convergence failure.
355*
356         IF( INFO.NE.0 ) THEN
357            RETURN
358         END IF
359   30 CONTINUE
360*
361*     Compute updated Z.
362*
363      DO 60 I = 1, K
364         Z( I ) = U( I, K )*VT( I, K )
365         DO 40 J = 1, I - 1
366            Z( I ) = Z( I )*( U( I, J )*VT( I, J ) /
367     $               ( DSIGMA( I )-DSIGMA( J ) ) /
368     $               ( DSIGMA( I )+DSIGMA( J ) ) )
369   40    CONTINUE
370         DO 50 J = I, K - 1
371            Z( I ) = Z( I )*( U( I, J )*VT( I, J ) /
372     $               ( DSIGMA( I )-DSIGMA( J+1 ) ) /
373     $               ( DSIGMA( I )+DSIGMA( J+1 ) ) )
374   50    CONTINUE
375         Z( I ) = SIGN( SQRT( ABS( Z( I ) ) ), Q( I, 1 ) )
376   60 CONTINUE
377*
378*     Compute left singular vectors of the modified diagonal matrix,
379*     and store related information for the right singular vectors.
380*
381      DO 90 I = 1, K
382         VT( 1, I ) = Z( 1 ) / U( 1, I ) / VT( 1, I )
383         U( 1, I ) = NEGONE
384         DO 70 J = 2, K
385            VT( J, I ) = Z( J ) / U( J, I ) / VT( J, I )
386            U( J, I ) = DSIGMA( J )*VT( J, I )
387   70    CONTINUE
388         TEMP = SNRM2( K, U( 1, I ), 1 )
389         Q( 1, I ) = U( 1, I ) / TEMP
390         DO 80 J = 2, K
391            JC = IDXC( J )
392            Q( J, I ) = U( JC, I ) / TEMP
393   80    CONTINUE
394   90 CONTINUE
395*
396*     Update the left singular vector matrix.
397*
398      IF( K.EQ.2 ) THEN
399         CALL SGEMM( 'N', 'N', N, K, K, ONE, U2, LDU2, Q, LDQ, ZERO, U,
400     $               LDU )
401         GO TO 100
402      END IF
403      IF( CTOT( 1 ).GT.0 ) THEN
404         CALL SGEMM( 'N', 'N', NL, K, CTOT( 1 ), ONE, U2( 1, 2 ), LDU2,
405     $               Q( 2, 1 ), LDQ, ZERO, U( 1, 1 ), LDU )
406         IF( CTOT( 3 ).GT.0 ) THEN
407            KTEMP = 2 + CTOT( 1 ) + CTOT( 2 )
408            CALL SGEMM( 'N', 'N', NL, K, CTOT( 3 ), ONE, U2( 1, KTEMP ),
409     $                  LDU2, Q( KTEMP, 1 ), LDQ, ONE, U( 1, 1 ), LDU )
410         END IF
411      ELSE IF( CTOT( 3 ).GT.0 ) THEN
412         KTEMP = 2 + CTOT( 1 ) + CTOT( 2 )
413         CALL SGEMM( 'N', 'N', NL, K, CTOT( 3 ), ONE, U2( 1, KTEMP ),
414     $               LDU2, Q( KTEMP, 1 ), LDQ, ZERO, U( 1, 1 ), LDU )
415      ELSE
416         CALL SLACPY( 'F', NL, K, U2, LDU2, U, LDU )
417      END IF
418      CALL SCOPY( K, Q( 1, 1 ), LDQ, U( NLP1, 1 ), LDU )
419      KTEMP = 2 + CTOT( 1 )
420      CTEMP = CTOT( 2 ) + CTOT( 3 )
421      CALL SGEMM( 'N', 'N', NR, K, CTEMP, ONE, U2( NLP2, KTEMP ), LDU2,
422     $            Q( KTEMP, 1 ), LDQ, ZERO, U( NLP2, 1 ), LDU )
423*
424*     Generate the right singular vectors.
425*
426  100 CONTINUE
427      DO 120 I = 1, K
428         TEMP = SNRM2( K, VT( 1, I ), 1 )
429         Q( I, 1 ) = VT( 1, I ) / TEMP
430         DO 110 J = 2, K
431            JC = IDXC( J )
432            Q( I, J ) = VT( JC, I ) / TEMP
433  110    CONTINUE
434  120 CONTINUE
435*
436*     Update the right singular vector matrix.
437*
438      IF( K.EQ.2 ) THEN
439         CALL SGEMM( 'N', 'N', K, M, K, ONE, Q, LDQ, VT2, LDVT2, ZERO,
440     $               VT, LDVT )
441         RETURN
442      END IF
443      KTEMP = 1 + CTOT( 1 )
444      CALL SGEMM( 'N', 'N', K, NLP1, KTEMP, ONE, Q( 1, 1 ), LDQ,
445     $            VT2( 1, 1 ), LDVT2, ZERO, VT( 1, 1 ), LDVT )
446      KTEMP = 2 + CTOT( 1 ) + CTOT( 2 )
447      IF( KTEMP.LE.LDVT2 )
448     $   CALL SGEMM( 'N', 'N', K, NLP1, CTOT( 3 ), ONE, Q( 1, KTEMP ),
449     $               LDQ, VT2( KTEMP, 1 ), LDVT2, ONE, VT( 1, 1 ),
450     $               LDVT )
451*
452      KTEMP = CTOT( 1 ) + 1
453      NRP1 = NR + SQRE
454      IF( KTEMP.GT.1 ) THEN
455         DO 130 I = 1, K
456            Q( I, KTEMP ) = Q( I, 1 )
457  130    CONTINUE
458         DO 140 I = NLP2, M
459            VT2( KTEMP, I ) = VT2( 1, I )
460  140    CONTINUE
461      END IF
462      CTEMP = 1 + CTOT( 2 ) + CTOT( 3 )
463      CALL SGEMM( 'N', 'N', K, NRP1, CTEMP, ONE, Q( 1, KTEMP ), LDQ,
464     $            VT2( KTEMP, NLP2 ), LDVT2, ZERO, VT( 1, NLP2 ), LDVT )
465*
466      RETURN
467*
468*     End of SLASD3
469*
470      END
471