1      SUBROUTINE DGELSY( M, N, NRHS, A, LDA, B, LDB, JPVT, RCOND, RANK,
2     $                   WORK, LWORK, INFO )
3*
4*  -- LAPACK driver routine (instrumented to count ops, version 3.0) --
5*     Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd.,
6*     Courant Institute, Argonne National Lab, and Rice University
7*     June 30, 1999
8*
9*     .. Scalar Arguments ..
10      INTEGER            INFO, LDA, LDB, LWORK, M, N, NRHS, RANK
11      DOUBLE PRECISION   RCOND
12*     ..
13*     .. Array Arguments ..
14      INTEGER            JPVT( * )
15      DOUBLE PRECISION   A( LDA, * ), B( LDB, * ), WORK( * )
16*     ..
17*     Common block to return operation counts and timings
18*     .. Common blocks ..
19      COMMON             / LATIME / OPS, ITCNT
20      COMMON             / LSTIME / OPCNT, TIMNG
21*     ..
22*     .. Scalars in Common ..
23      DOUBLE PRECISION   ITCNT, OPS
24*     ..
25*     .. Arrays in Common ..
26      DOUBLE PRECISION   OPCNT( 6 ), TIMNG( 6 )
27*     ..
28*
29*  Purpose
30*  =======
31*
32*  DGELSY computes the minimum-norm solution to a real linear least
33*  squares problem:
34*      min || A * X - B ||
35*  using a complete orthogonal factorization of A.  A is an M-by-N
36*  matrix which may be rank-deficient.
37*
38*  Several right hand side vectors b and solution vectors x can be
39*  handled in a single call; they are stored as the columns of the
40*  M-by-NRHS right hand side matrix B and the N-by-NRHS solution
41*  matrix X.
42*
43*  The routine first computes a QR factorization with column pivoting:
44*      A * P = Q * [ R11 R12 ]
45*                  [  0  R22 ]
46*  with R11 defined as the largest leading submatrix whose estimated
47*  condition number is less than 1/RCOND.  The order of R11, RANK,
48*  is the effective rank of A.
49*
50*  Then, R22 is considered to be negligible, and R12 is annihilated
51*  by orthogonal transformations from the right, arriving at the
52*  complete orthogonal factorization:
53*     A * P = Q * [ T11 0 ] * Z
54*                 [  0  0 ]
55*  The minimum-norm solution is then
56*     X = P * Z' [ inv(T11)*Q1'*B ]
57*                [        0       ]
58*  where Q1 consists of the first RANK columns of Q.
59*
60*  This routine is basically identical to the original xGELSX except
61*  three differences:
62*    o The call to the subroutine xGEQPF has been substituted by the
63*      the call to the subroutine xGEQP3. This subroutine is a Blas-3
64*      version of the QR factorization with column pivoting.
65*    o Matrix B (the right hand side) is updated with Blas-3.
66*    o The permutation of matrix B (the right hand side) is faster and
67*      more simple.
68*
69*  Arguments
70*  =========
71*
72*  M       (input) INTEGER
73*          The number of rows of the matrix A.  M >= 0.
74*
75*  N       (input) INTEGER
76*          The number of columns of the matrix A.  N >= 0.
77*
78*  NRHS    (input) INTEGER
79*          The number of right hand sides, i.e., the number of
80*          columns of matrices B and X. NRHS >= 0.
81*
82*  A       (input/output) DOUBLE PRECISION array, dimension (LDA,N)
83*          On entry, the M-by-N matrix A.
84*          On exit, A has been overwritten by details of its
85*          complete orthogonal factorization.
86*
87*  LDA     (input) INTEGER
88*          The leading dimension of the array A.  LDA >= max(1,M).
89*
90*  B       (input/output) DOUBLE PRECISION array, dimension (LDB,NRHS)
91*          On entry, the M-by-NRHS right hand side matrix B.
92*          On exit, the N-by-NRHS solution matrix X.
93*
94*  LDB     (input) INTEGER
95*          The leading dimension of the array B. LDB >= max(1,M,N).
96*
97*  JPVT    (input/output) INTEGER array, dimension (N)
98*          On entry, if JPVT(i) .ne. 0, the i-th column of A is permuted
99*          to the front of AP, otherwise column i is a free column.
100*          On exit, if JPVT(i) = k, then the i-th column of AP
101*          was the k-th column of A.
102*
103*  RCOND   (input) DOUBLE PRECISION
104*          RCOND is used to determine the effective rank of A, which
105*          is defined as the order of the largest leading triangular
106*          submatrix R11 in the QR factorization with pivoting of A,
107*          whose estimated condition number < 1/RCOND.
108*
109*  RANK    (output) INTEGER
110*          The effective rank of A, i.e., the order of the submatrix
111*          R11.  This is the same as the order of the submatrix T11
112*          in the complete orthogonal factorization of A.
113*
114*  WORK    (workspace/output) DOUBLE PRECISION array, dimension (LWORK)
115*          On exit, if INFO = 0, WORK(1) returns the optimal LWORK.
116*
117*  LWORK   (input) INTEGER
118*          The dimension of the array WORK.
119*          The unblocked strategy requires that:
120*             LWORK >= MAX( MN+3*N+1, 2*MN+NRHS ),
121*          where MN = min( M, N ).
122*          The block algorithm requires that:
123*             LWORK >= MAX( MN+2*N+NB*(N+1), 2*MN+NB*NRHS ),
124*          where NB is an upper bound on the blocksize returned
125*          by ILAENV for the routines DGEQP3, DTZRZF, STZRQF, DORMQR,
126*          and DORMRZ.
127*
128*          If LWORK = -1, then a workspace query is assumed; the routine
129*          only calculates the optimal size of the WORK array, returns
130*          this value as the first entry of the WORK array, and no error
131*          message related to LWORK is issued by XERBLA.
132*
133*  INFO    (output) INTEGER
134*          = 0: successful exit
135*          < 0: If INFO = -i, the i-th argument had an illegal value.
136*
137*  Further Details
138*  ===============
139*
140*  Based on contributions by
141*    A. Petitet, Computer Science Dept., Univ. of Tenn., Knoxville, USA
142*    E. Quintana-Orti, Depto. de Informatica, Universidad Jaime I, Spain
143*    G. Quintana-Orti, Depto. de Informatica, Universidad Jaime I, Spain
144*
145*  =====================================================================
146*
147*     .. Parameters ..
148      INTEGER            IMAX, IMIN
149      PARAMETER          ( IMAX = 1, IMIN = 2 )
150      DOUBLE PRECISION   ZERO, ONE
151      PARAMETER          ( ZERO = 0.0D+0, ONE = 1.0D+0 )
152*     ..
153*     .. Local Scalars ..
154      LOGICAL            LQUERY
155      INTEGER            GELSY, GEQP3, I, IASCL, IBSCL, ISMAX, ISMIN, J,
156     $                   LWKOPT, MN, NB, NB1, NB2, NB3, NB4, ORMQR,
157     $                   ORMRZ, TRSM, TZRZF
158      DOUBLE PRECISION   ANRM, BIGNUM, BNRM, C1, C2, S1, S2, SMAX,
159     $                   SMAXPR, SMIN, SMINPR, SMLNUM, T1, T2, WSIZE
160*     ..
161*     .. External Functions ..
162      INTEGER            ILAENV
163      DOUBLE PRECISION   DLAMCH, DLANGE, DOPBL3, DOPLA, DSECND
164      EXTERNAL           ILAENV, DLAMCH, DLANGE, DOPBL3, DOPLA, DSECND
165*     ..
166*     .. External Subroutines ..
167      EXTERNAL           DCOPY, DGEQP3, DLABAD, DLAIC1, DLASCL, DLASET,
168     $                   DORMQR, DORMRZ, DTRSM, DTZRZF, XERBLA
169*     ..
170*     .. Intrinsic Functions ..
171      INTRINSIC          ABS, DBLE, MAX, MIN
172*     ..
173*     .. Data statements ..
174      DATA               GELSY / 1 / , GEQP3 / 2 / , ORMQR / 4 / ,
175     $                   ORMRZ / 6 / , TRSM / 5 / , TZRZF / 3 /
176*     ..
177*     .. Executable Statements ..
178*
179      MN = MIN( M, N )
180      ISMIN = MN + 1
181      ISMAX = 2*MN + 1
182*
183*     Test the input arguments.
184*
185      INFO = 0
186      NB1 = ILAENV( 1, 'DGEQRF', ' ', M, N, -1, -1 )
187      NB2 = ILAENV( 1, 'DGERQF', ' ', M, N, -1, -1 )
188      NB3 = ILAENV( 1, 'DORMQR', ' ', M, N, NRHS, -1 )
189      NB4 = ILAENV( 1, 'DORMRQ', ' ', M, N, NRHS, -1 )
190      NB = MAX( NB1, NB2, NB3, NB4 )
191      LWKOPT = MAX( 1, MN+2*N+NB*( N+1 ), 2*MN+NB*NRHS )
192      WORK( 1 ) = DBLE( LWKOPT )
193      LQUERY = ( LWORK.EQ.-1 )
194      IF( M.LT.0 ) THEN
195         INFO = -1
196      ELSE IF( N.LT.0 ) THEN
197         INFO = -2
198      ELSE IF( NRHS.LT.0 ) THEN
199         INFO = -3
200      ELSE IF( LDA.LT.MAX( 1, M ) ) THEN
201         INFO = -5
202      ELSE IF( LDB.LT.MAX( 1, M, N ) ) THEN
203         INFO = -7
204      ELSE IF( LWORK.LT.MAX( 1, MN+3*N+1, 2*MN+NRHS ) .AND. .NOT.
205     $         LQUERY ) THEN
206         INFO = -12
207      END IF
208*
209      IF( INFO.NE.0 ) THEN
210         CALL XERBLA( 'DGELSY', -INFO )
211         RETURN
212      ELSE IF( LQUERY ) THEN
213         RETURN
214      END IF
215*
216*     Quick return if possible
217*
218      IF( MIN( M, N, NRHS ).EQ.0 ) THEN
219         RANK = 0
220         RETURN
221      END IF
222*
223*     Get machine parameters
224*
225      OPCNT( GELSY ) = OPCNT( GELSY ) + DBLE( 2 )
226      SMLNUM = DLAMCH( 'S' ) / DLAMCH( 'P' )
227      BIGNUM = ONE / SMLNUM
228      CALL DLABAD( SMLNUM, BIGNUM )
229*
230*     Scale A, B if max entries outside range [SMLNUM,BIGNUM]
231*
232      ANRM = DLANGE( 'M', M, N, A, LDA, WORK )
233      IASCL = 0
234      IF( ANRM.GT.ZERO .AND. ANRM.LT.SMLNUM ) THEN
235*
236*        Scale matrix norm up to SMLNUM
237*
238         OPCNT( GELSY ) = OPCNT( GELSY ) + DBLE( M*N )
239         CALL DLASCL( 'G', 0, 0, ANRM, SMLNUM, M, N, A, LDA, INFO )
240         IASCL = 1
241      ELSE IF( ANRM.GT.BIGNUM ) THEN
242*
243*        Scale matrix norm down to BIGNUM
244*
245         OPCNT( GELSY ) = OPCNT( GELSY ) + DBLE( M*N )
246         CALL DLASCL( 'G', 0, 0, ANRM, BIGNUM, M, N, A, LDA, INFO )
247         IASCL = 2
248      ELSE IF( ANRM.EQ.ZERO ) THEN
249*
250*        Matrix all zero. Return zero solution.
251*
252         CALL DLASET( 'F', MAX( M, N ), NRHS, ZERO, ZERO, B, LDB )
253         RANK = 0
254         GO TO 70
255      END IF
256*
257      BNRM = DLANGE( 'M', M, NRHS, B, LDB, WORK )
258      IBSCL = 0
259      IF( BNRM.GT.ZERO .AND. BNRM.LT.SMLNUM ) THEN
260*
261*        Scale matrix norm up to SMLNUM
262*
263         OPCNT( GELSY ) = OPCNT( GELSY ) + DBLE( M*NRHS )
264         CALL DLASCL( 'G', 0, 0, BNRM, SMLNUM, M, NRHS, B, LDB, INFO )
265         IBSCL = 1
266      ELSE IF( BNRM.GT.BIGNUM ) THEN
267*
268*        Scale matrix norm down to BIGNUM
269*
270         OPCNT( GELSY ) = OPCNT( GELSY ) + DBLE( M*NRHS )
271         CALL DLASCL( 'G', 0, 0, BNRM, BIGNUM, M, NRHS, B, LDB, INFO )
272         IBSCL = 2
273      END IF
274*
275*     Compute QR factorization with column pivoting of A:
276*        A * P = Q * R
277*
278      OPCNT( GEQP3 ) = OPCNT( GEQP3 ) + DOPLA( 'DGEQPF', M, N, 0, 0, 0 )
279      T1 = DSECND( )
280      CALL DGEQP3( M, N, A, LDA, JPVT, WORK( 1 ), WORK( MN+1 ),
281     $             LWORK-MN, INFO )
282      T2 = DSECND( )
283      TIMNG( GEQP3 ) = TIMNG( GEQP3 ) + ( T2-T1 )
284      WSIZE = MN + WORK( MN+1 )
285*
286*     workspace: MN+2*N+NB*(N+1).
287*     Details of Householder rotations stored in WORK(1:MN).
288*
289*     Determine RANK using incremental condition estimation
290*
291      WORK( ISMIN ) = ONE
292      WORK( ISMAX ) = ONE
293      SMAX = ABS( A( 1, 1 ) )
294      SMIN = SMAX
295      IF( ABS( A( 1, 1 ) ).EQ.ZERO ) THEN
296         RANK = 0
297         CALL DLASET( 'F', MAX( M, N ), NRHS, ZERO, ZERO, B, LDB )
298         GO TO 70
299      ELSE
300         RANK = 1
301      END IF
302*
303   10 CONTINUE
304      IF( RANK.LT.MN ) THEN
305         I = RANK + 1
306         OPS = 0
307         CALL DLAIC1( IMIN, RANK, WORK( ISMIN ), SMIN, A( 1, I ),
308     $                A( I, I ), SMINPR, S1, C1 )
309         CALL DLAIC1( IMAX, RANK, WORK( ISMAX ), SMAX, A( 1, I ),
310     $                A( I, I ), SMAXPR, S2, C2 )
311         OPCNT( GELSY ) = OPCNT( GELSY ) + OPS + DBLE( 1 )
312*
313         IF( SMAXPR*RCOND.LE.SMINPR ) THEN
314            OPCNT( GELSY ) = OPCNT( GELSY ) + DBLE( RANK*2 )
315            DO 20 I = 1, RANK
316               WORK( ISMIN+I-1 ) = S1*WORK( ISMIN+I-1 )
317               WORK( ISMAX+I-1 ) = S2*WORK( ISMAX+I-1 )
318   20       CONTINUE
319            WORK( ISMIN+RANK ) = C1
320            WORK( ISMAX+RANK ) = C2
321            SMIN = SMINPR
322            SMAX = SMAXPR
323            RANK = RANK + 1
324            GO TO 10
325         END IF
326      END IF
327*
328*     workspace: 3*MN.
329*
330*     Logically partition R = [ R11 R12 ]
331*                             [  0  R22 ]
332*     where R11 = R(1:RANK,1:RANK)
333*
334*     [R11,R12] = [ T11, 0 ] * Y
335*
336      IF( RANK.LT.N ) THEN
337         OPCNT( TZRZF ) = OPCNT( TZRZF ) +
338     $                    DOPLA( 'DTZRQF', RANK, N, 0, 0, 0 )
339         T1 = DSECND( )
340         CALL DTZRZF( RANK, N, A, LDA, WORK( MN+1 ), WORK( 2*MN+1 ),
341     $                LWORK-2*MN, INFO )
342         T2 = DSECND( )
343         TIMNG( TZRZF ) = TIMNG( TZRZF ) + ( T2-T1 )
344      END IF
345*
346*     workspace: 2*MN.
347*     Details of Householder rotations stored in WORK(MN+1:2*MN)
348*
349*     B(1:M,1:NRHS) := Q' * B(1:M,1:NRHS)
350*
351      OPCNT( ORMQR ) = OPCNT( ORMQR ) +
352     $                 DOPLA( 'DORMQR', M, NRHS, MN, 0, 0 )
353      T1 = DSECND( )
354      CALL DORMQR( 'Left', 'Transpose', M, NRHS, MN, A, LDA, WORK( 1 ),
355     $             B, LDB, WORK( 2*MN+1 ), LWORK-2*MN, INFO )
356      T2 = DSECND( )
357      TIMNG( ORMQR ) = TIMNG( ORMQR ) + ( T2-T1 )
358      WSIZE = MAX( WSIZE, 2*MN+WORK( 2*MN+1 ) )
359*
360*     workspace: 2*MN+NB*NRHS.
361*
362*     B(1:RANK,1:NRHS) := inv(T11) * B(1:RANK,1:NRHS)
363*
364      OPCNT( TRSM ) = OPCNT( TRSM ) + DOPBL3( 'DTRSM ', RANK, NRHS, 0 )
365      T1 = DSECND( )
366      CALL DTRSM( 'Left', 'Upper', 'No transpose', 'Non-unit', RANK,
367     $            NRHS, ONE, A, LDA, B, LDB )
368      T2 = DSECND( )
369      TIMNG( TRSM ) = TIMNG( TRSM ) + ( T2-T1 )
370*
371      DO 40 J = 1, NRHS
372         DO 30 I = RANK + 1, N
373            B( I, J ) = ZERO
374   30    CONTINUE
375   40 CONTINUE
376*
377*     B(1:N,1:NRHS) := Y' * B(1:N,1:NRHS)
378*
379      IF( RANK.LT.N ) THEN
380         NB = ILAENV( 1, 'DORMRQ', 'LT', N, NRHS, RANK, -1 )
381         OPCNT( ORMRZ ) = OPCNT( ORMRZ ) +
382     $                    DOPLA( 'DORMRQ', N, NRHS, RANK, 0, NB )
383         T1 = DSECND( )
384         CALL DORMRZ( 'Left', 'Transpose', N, NRHS, RANK, N-RANK, A,
385     $                LDA, WORK( MN+1 ), B, LDB, WORK( 2*MN+1 ),
386     $                LWORK-2*MN, INFO )
387         T2 = DSECND( )
388         TIMNG( ORMRZ ) = TIMNG( ORMRZ ) + ( T2-T1 )
389      END IF
390*
391*     workspace: 2*MN+NRHS.
392*
393*     B(1:N,1:NRHS) := P * B(1:N,1:NRHS)
394*
395      DO 60 J = 1, NRHS
396         DO 50 I = 1, N
397            WORK( JPVT( I ) ) = B( I, J )
398   50    CONTINUE
399         CALL DCOPY( N, WORK( 1 ), 1, B( 1, J ), 1 )
400   60 CONTINUE
401*
402*     workspace: N.
403*
404*     Undo scaling
405*
406      IF( IASCL.EQ.1 ) THEN
407         OPCNT( GELSY ) = OPCNT( GELSY ) + DBLE( N*NRHS+RANK*RANK )
408         CALL DLASCL( 'G', 0, 0, ANRM, SMLNUM, N, NRHS, B, LDB, INFO )
409         CALL DLASCL( 'U', 0, 0, SMLNUM, ANRM, RANK, RANK, A, LDA,
410     $                INFO )
411      ELSE IF( IASCL.EQ.2 ) THEN
412         OPCNT( GELSY ) = OPCNT( GELSY ) + DBLE( N*NRHS+RANK*RANK )
413         CALL DLASCL( 'G', 0, 0, ANRM, BIGNUM, N, NRHS, B, LDB, INFO )
414         CALL DLASCL( 'U', 0, 0, BIGNUM, ANRM, RANK, RANK, A, LDA,
415     $                INFO )
416      END IF
417      IF( IBSCL.EQ.1 ) THEN
418         OPCNT( GELSY ) = OPCNT( GELSY ) + DBLE( N*NRHS )
419         CALL DLASCL( 'G', 0, 0, SMLNUM, BNRM, N, NRHS, B, LDB, INFO )
420      ELSE IF( IBSCL.EQ.2 ) THEN
421         OPCNT( GELSY ) = OPCNT( GELSY ) + DBLE( N*NRHS )
422         CALL DLASCL( 'G', 0, 0, BIGNUM, BNRM, N, NRHS, B, LDB, INFO )
423      END IF
424*
425   70 CONTINUE
426      WORK( 1 ) = DBLE( LWKOPT )
427*
428      RETURN
429*
430*     End of DGELSY
431*
432      END
433