1 /*
2  * $Id: dgelss.c,v 1.1 2005-09-18 22:04:43 dhmunro Exp $
3  * LAPACK matrix solver using SVD.
4  */
5 /* Copyright (c) 2005, The Regents of the University of California.
6  * All rights reserved.
7  * This file is part of yorick (http://yorick.sourceforge.net).
8  * Read the accompanying LICENSE file for details.
9  */
10 
11 #include "dg.h"
12 
13 
14 /*---blas routines---*/
15 /* dcopy dgemv dgemm */
16 
17 
18 /*-----Fortran intrinsics converted-----*/
19 #define min(x,y) ((x)<(y)?(x):(y))
20 #define max(x,y) ((x)>(y)?(x):(y))
21 /*-----end of Fortran intrinsics-----*/
22 
23 
24 
dgelss(long m,long n,long nrhs,double a[],long lda,double b[],long ldb,double s[],double rcond,long * rank,double work[],long lwork,long * info)25 void dgelss( long m, long n, long nrhs, double a[], long lda,
26             double b[], long ldb, double s[], double rcond,
27             long *rank,double work[], long lwork, long *info )
28 {
29   /**
30    *  -- LAPACK driver routine (version 1.1) --
31    *     Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd.,
32    *     Courant Institute, Argonne National Lab, and Rice University
33    *     March 31, 1993
34    *
35    *     .. Scalar Arguments ..*/
36   /**     ..
37    *     .. Array Arguments ..*/
38 #undef work_1
39 #define work_1(a1) work[a1-1]
40 #undef s_1
41 #define s_1(a1) s[a1-1]
42 #undef b_2
43 #define b_2(a1,a2) b[a1-1+ldb*(a2-1)]
44 #undef a_2
45 #define a_2(a1,a2) a[a1-1+lda*(a2-1)]
46   /**     ..
47    *
48    *  Purpose
49    *  =======
50    *
51    *  DGELSS computes the minimum norm solution to a real linear least
52    *  squares problem:
53    *
54    *  Minimize 2-norm(| b - A*x |).
55    *
56    *  using the singular value decomposition (SVD) of A. A is an M-by-N
57    *  matrix which may be rank-deficient.
58    *
59    *  Several right hand side vectors b and solution vectors x can be
60    *  handled in a single call; they are stored as the columns of the
61    *  M-by-NRHS right hand side matrix B and the N-by-NRHS solution matrix
62    *  X.
63    *
64    *  The effective rank of A is determined by treating as zero those
65    *  singular values which are less than RCOND times the largest singular
66    *  value.
67    *
68    *  Arguments
69    *  =========
70    *
71    *  M       (input) INTEGER
72    *          The number of rows of the matrix A. M >= 0.
73    *
74    *  N       (input) INTEGER
75    *          The number of columns of the matrix A. N >= 0.
76    *
77    *  NRHS    (input) INTEGER
78    *          The number of right hand sides, i.e., the number of columns
79    *          of the matrices B and X. NRHS >= 0.
80    *
81    *  A       (input/output) DOUBLE PRECISION array, dimension (LDA,N)
82    *          On entry, the M-by-N matrix A.
83    *          On exit, the first min(m,n) rows of A are overwritten with
84    *          its right singular vectors, stored rowwise.
85    *
86    *  LDA     (input) INTEGER
87    *          The leading dimension of the array A.  LDA >= max(1,M).
88    *
89    *  B       (input/output) DOUBLE PRECISION array, dimension (LDB,NRHS)
90    *          On entry, the M-by-NRHS right hand side matrix B.
91    *          On exit, B is overwritten by the N-by-NRHS solution
92    *          matrix X.  If m >= n and RANK = n, the residual
93    *          sum-of-squares for the solution in the i-th column is given
94    *          by the sum of squares of elements n+1:m in that column.
95    *
96    *  LDB     (input) INTEGER
97    *          The leading dimension of the array B. LDB >= max(1,MAX(M,N)).
98    *
99    *  S       (output) DOUBLE PRECISION array, dimension (min(M,N))
100    *          The singular values of A in decreasing order.
101    *          The condition number of A in the 2-norm = S(1)/S(min(m,n)).
102    *
103    *  RCOND   (input) DOUBLE PRECISION
104    *          RCOND is used to determine the effective rank of A.
105    *          Singular values S(i) <= RCOND*S(1) are treated as zero.
106    *          If RCOND $<$ 0, machine precision is used instead.
107    *
108    *  RANK    (output) INTEGER
109    *          The effective rank of A, i.e., the number of singular values
110    *          which are greater than RCOND*S(1).
111    *
112    *  WORK    (workspace/output) DOUBLE PRECISION array, dimension (LWORK)
113    *          On exit, if INFO = 0, WORK(1) returns the optimal LWORK.
114    *
115    *  LWORK   (input) INTEGER
116    *          The dimension of the array WORK. LWORK >= 1, and also:
117    *          LWORK >= 3*N+MAX(2*N,NRHS,M) if M >= N,
118    *          LWORK >= 3*M+MAX(2*M,NRHS,N) if M < N.
119    *          For good performance, LWORK should generally be larger.
120    *
121    *  INFO    (output) INTEGER
122    *          = 0:  successful exit
123    *          < 0:  if INFO = -i, the i-th argument had an illegal value.
124    *          > 0:  the algorithm for computing the SVD failed to converge;
125    *                if INFO = i, i off-diagonal elements of an intermediate
126    *                bidiagonal form did not converge to zero.
127    *
128    *  =====================================================================
129    *
130    *     .. Parameters ..*/
131 #undef zero
132 #define zero 0.0e0
133 #undef one
134 #define one 1.0e0
135   /**     ..
136    *     .. Local Scalars ..*/
137   long    bl, chunk, i, iascl, ibscl, ie, il, itau,
138           itaup, itauq, iwork, ldwork, maxmn, maxwrk=0,
139           minmn, minwrk, mm, mnthr;
140   double    anrm, bignum, bnrm, eps, sfmin, smlnum, thr;
141   /**     ..
142    *     .. Local Arrays ..*/
143   double    vdum[1];
144 #undef vdum_1
145 #define vdum_1(a1) [a1-1]
146   /**     ..
147    *     .. Intrinsic Functions ..*/
148   /*      intrinsic          max, min;*/
149   /**     ..
150    *     .. Executable Statements ..
151    *
152    *     Test the input arguments
153    **/
154   /*-----implicit-declarations-----*/
155   /*-----end-of-declarations-----*/
156   *info = 0;
157   minmn = min( m, n );
158   maxmn = max( m, n );
159   mnthr = ilaenv( 6, "dgelss", " ", m, n, nrhs, -1 );
160   if( m<0 ) {
161     *info = -1;
162   } else if( n<0 ) {
163     *info = -2;
164   } else if( nrhs<0 ) {
165     *info = -3;
166   } else if( lda<max( 1, m ) ) {
167     *info = -5;
168   } else if( ldb<max( 1, maxmn ) ) {
169     *info = -7;
170   }
171   /**
172    *     Compute workspace
173    *      (Note: Comments in the code beginning "Workspace:" describe the
174    *       minimal amount of workspace needed at that point in the code,
175    *       as well as the preferred amount for good performance.
176    *       NB refers to the optimal block size for the immediately
177    *       following subroutine, as returned by ILAENV.)
178    **/
179   minwrk = 1;
180   if( *info==0 && lwork>=1 ) {
181     maxwrk = 0;
182     mm = m;
183     if( m>=n && m>=mnthr ) {
184       /**
185        *           Path 1a - overdetermined, with many more rows than columns
186        **/
187       mm = n;
188       maxwrk = max( maxwrk, n+n*ilaenv( 1, "dgeqrf", " ", m, n,
189                                        -1, -1 ) );
190       maxwrk = max( maxwrk, n+nrhs*
191                    ilaenv( 1, "dormqr", "lt", m, nrhs, n, -1 ) );
192     }
193     if( m>=n ) {
194       /**
195        *           Path 1 - overdetermined or exactly determined
196        **/
197       maxwrk = max( maxwrk, 3*n+( mm+n )*
198                    ilaenv( 1, "dgebrd", " ", mm, n, -1, -1 ) );
199       maxwrk = max( maxwrk, 3*n+nrhs*
200                    ilaenv( 1, "dormbr", "qlt", mm, nrhs, n, -1 ) );
201       maxwrk = max( maxwrk, 3*n+( n-1 )*
202                    ilaenv( 1, "dorgbr", "p", n, n, n, -1 ) );
203       maxwrk = max( maxwrk, 5*n-4 );
204       maxwrk = max( maxwrk, n*nrhs );
205       minwrk = max( max(3*n+mm, 3*n+nrhs), 5*n-4 );
206     }
207     if( n>m ) {
208       minwrk = max( max(3*m+nrhs, 3*m+n), 5*m-4 );
209       if( n>=mnthr ) {
210         /**
211          *              Path 2a - underdetermined, with many more columns
212          *              than rows
213          **/
214         maxwrk = m + m*ilaenv( 1, "dgelqf", " ", m, n, -1, -1 );
215         maxwrk = max( maxwrk, m*m+4*m+2*m*
216                      ilaenv( 1, "dgebrd", " ", m, m, -1, -1 ) );
217         maxwrk = max( maxwrk, m*m+4*m+nrhs*
218                      ilaenv( 1, "dormbr", "qlt", m, nrhs, m, -1 ) );
219         maxwrk = max( maxwrk, m*m+4*m+( m-1 )*
220                      ilaenv( 1, "dorgbr", "p", m, m, m, -1 ) );
221         maxwrk = max( maxwrk, m*m+6*m-4 );
222         if( nrhs>1 ) {
223           maxwrk = max( maxwrk, m*m+m+m*nrhs );
224         } else {
225           maxwrk = max( maxwrk, m*m+2*m );
226         }
227         maxwrk = max( maxwrk, m+nrhs*
228                      ilaenv( 1, "dormlq", "lt", n, nrhs, m, -1 ) );
229       } else {
230         /**
231          *              Path 2 - underdetermined
232          **/
233         maxwrk = 3*m + ( n+m )*ilaenv( 1, "dgebrd", " ", m, n,
234                                       -1, -1 );
235         maxwrk = max( maxwrk, 3*m+nrhs*
236                      ilaenv( 1, "dormbr", "qlt", m, nrhs, m, -1 ) );
237         maxwrk = max( maxwrk, 3*m+m*
238                      ilaenv( 1, "dorgbr", "p", m, n, m, -1 ) );
239         maxwrk = max( maxwrk, 5*m-4 );
240         maxwrk = max( maxwrk, n*nrhs );
241       }
242     }
243     minwrk = min( minwrk, maxwrk );
244     work_1( 1 ) = maxwrk;
245   }
246 
247   minwrk = max( minwrk, 1 );
248   if( lwork<minwrk )
249     *info = -12;
250   if( *info!=0 ) {
251     xerbla( "dgelss", -*info );
252     return;
253   }
254   /**
255    *     Quick return if possible
256    **/
257   if( m==0 || n==0 ) {
258     *rank = 0;
259     return;
260   }
261   /**
262    *     Get machine parameters
263    **/
264   eps = dlamch( 'p' );
265   sfmin = dlamch( 's' );
266   smlnum = sfmin / eps;
267   bignum = one / smlnum;
268   dlabad( &smlnum, &bignum );
269   /**
270    *     Scale A if max entry outside range [SMLNUM,BIGNUM]
271    **/
272   anrm = dlange( 'm', m, n, a, lda, work );
273   iascl = 0;
274   if( anrm>zero && anrm<smlnum ) {
275     /**
276      *        Scale matrix norm up to SMLNUM
277      **/
278     dlascl( 'g', 0, 0, anrm, smlnum, m, n, a, lda, info );
279     iascl = 1;
280   } else if( anrm>bignum ) {
281     /**
282      *        Scale matrix norm down to BIGNUM
283      **/
284     dlascl( 'g', 0, 0, anrm, bignum, m, n, a, lda, info );
285     iascl = 2;
286   } else if( anrm==zero ) {
287     /**
288      *        Matrix all zero. Return zero solution.
289      **/
290     dlaset( 'f', max( m, n ), nrhs, zero, zero, b, ldb );
291     dlaset( 'f', minmn, 1, zero, zero, s, 1 );
292     *rank = 0;
293     goto L_70;
294   }
295   /**
296    *     Scale B if max entry outside range [SMLNUM,BIGNUM]
297    **/
298   bnrm = dlange( 'm', m, nrhs, b, ldb, work );
299   ibscl = 0;
300   if( bnrm>zero && bnrm<smlnum ) {
301     /**
302      *        Scale matrix norm up to SMLNUM
303      **/
304     dlascl( 'g', 0, 0, bnrm, smlnum, m, nrhs, b, ldb, info );
305     ibscl = 1;
306   } else if( bnrm>bignum ) {
307     /**
308      *        Scale matrix norm down to BIGNUM
309      **/
310     dlascl( 'g', 0, 0, bnrm, bignum, m, nrhs, b, ldb, info );
311     ibscl = 2;
312   }
313   /**
314    *     Overdetermined case
315    **/
316   if( m>=n ) {
317     /**
318      *        Path 1 - overdetermined or exactly determined
319      **/
320     mm = m;
321     if( m>=mnthr ) {
322       /**
323        *           Path 1a - overdetermined, with many more rows than columns
324        **/
325       mm = n;
326       itau = 1;
327       iwork = itau + n;
328       /**
329        *           Compute A=Q*R
330        *           (Workspace: need 2*N, prefer N+N*NB)
331        **/
332       dgeqrf( m, n, a, lda, &work_1( itau ), &work_1( iwork ),
333              lwork-iwork+1, info );
334       /**
335        *           Multiply B by transpose(Q)
336        *           (Workspace: need N+NRHS, prefer N+NRHS*NB)
337        **/
338       dormqr( 'l', 't', m, nrhs, n, a, lda, &work_1( itau ), b,
339              ldb, &work_1( iwork ), lwork-iwork+1, info );
340       /**
341        *           Zero out below R
342        **/
343       if( n>1 )
344         dlaset( 'l', n-1, n-1, zero, zero, &a_2( 2, 1 ), lda );
345     }
346 
347     ie = 1;
348     itauq = ie + n;
349     itaup = itauq + n;
350     iwork = itaup + n;
351     /**
352      *        Bidiagonalize R in A
353      *        (Workspace: need 3*N+MM, prefer 3*N+(MM+N)*NB)
354      **/
355     dgebrd( mm, n, a, lda, s, &work_1( ie ), &work_1( itauq ),
356            &work_1( itaup ), &work_1( iwork ), lwork-iwork+1,
357            info );
358     /**
359      *        Multiply B by transpose of left bidiagonalizing vectors of R
360      *        (Workspace: need 3*N+NRHS, prefer 3*N+NRHS*NB)
361      **/
362     dormbr( 'q', 'l', 't', mm, nrhs, n, a, lda, &work_1( itauq ),
363            b, ldb, &work_1( iwork ), lwork-iwork+1, info );
364     /**
365      *        Generate right bidiagonalizing vectors of R in A
366      *        (Workspace: need 4*N-1, prefer 3*N+(N-1)*NB)
367      **/
368     dorgbr( 'p', n, n, n, a, lda, &work_1( itaup ),
369            &work_1( iwork ), lwork-iwork+1, info );
370     iwork = ie + n;
371     /**
372      *        Perform bidiagonal QR iteration
373      *          multiply B by transpose of left singular vectors
374      *          compute right singular vectors in A
375      *        (Workspace: need 5*N-4)
376      **/
377     dbdsqr( 'u', n, n, 0, nrhs, s, &work_1( ie ), a, lda, vdum,
378            1, b, ldb, &work_1( iwork ), info );
379     if( *info!=0 )
380       goto L_70;
381     /**
382      *        Multiply B by reciprocals of singular values
383      **/
384     thr = max( rcond*s_1( 1 ), sfmin );
385     if( thr<zero )
386       thr = max( eps*s_1( 1 ), sfmin );
387     *rank = 0;
388     for (i=1 ; i<=n ; i+=1) {
389       if( s_1( i )>thr ) {
390         drscl( nrhs, s_1( i ), &b_2( i, 1 ), ldb );
391         *rank = *rank + 1;
392       } else {
393         dlaset( 'f', 1, nrhs, zero, zero, &b_2( i, 1 ), ldb );
394       }
395     }
396     /**
397      *        Multiply B by right singular vectors
398      *        (Workspace: need N, prefer N*NRHS)
399      **/
400     if( lwork>=ldb*nrhs && nrhs>1 ) {
401       cblas_dgemm(CblasColMajor, CblasTrans, CblasNoTrans, n, nrhs, n,
402                   one, a, lda, b, ldb, zero, work, ldb );
403       dlacpy( 'g', n, nrhs, work, ldb, b, ldb );
404     } else if( nrhs>1 ) {
405       chunk = lwork / n;
406       for (i=1 ; chunk>0?i<=nrhs:i>=nrhs ; i+=chunk) {
407         bl = min( nrhs-i+1, chunk );
408         cblas_dgemm(CblasColMajor, CblasTrans, CblasNoTrans, n, bl, n,
409                     one, a, lda, b, ldb, zero, work, n );
410         dlacpy( 'g', n, bl, work, n, b, ldb );
411       }
412     } else {
413       cblas_dgemv(CblasColMajor, CblasTrans, n, n, one, a, lda, b, 1, zero,
414                   work, 1 );
415       cblas_dcopy( n, work, 1, b, 1 );
416     }
417 
418   } else if( n>=mnthr && lwork>=4*m+m*m+
419             max( max(m, 2*m-4), max(nrhs, n-3*m) ) ) {
420     /**
421      *        Path 2a - underdetermined, with many more columns than rows
422      *        and sufficient workspace for an efficient algorithm
423      **/
424     ldwork = m;
425     if( lwork>=max( 4*m+m*lda+max( max(m, 2*m-4), max(nrhs, n-3*m) ),
426                    m*lda+m+m*nrhs ) )ldwork = lda;
427     itau = 1;
428     iwork = m + 1;
429     /**
430      *        Compute A=L*Q
431      *        (Workspace: need 2*M, prefer M+M*NB)
432      **/
433     dgelqf( m, n, a, lda, &work_1( itau ), &work_1( iwork ),
434            lwork-iwork+1, info );
435     il = iwork;
436     /**
437      *        Copy L to WORK(IL), zeroing out above it
438      **/
439     dlacpy( 'l', m, m, a, lda, &work_1( il ), ldwork );
440     dlaset( 'u', m-1, m-1, zero, zero, &work_1( il+ldwork ),
441            ldwork );
442     ie = il + ldwork*m;
443     itauq = ie + m;
444     itaup = itauq + m;
445     iwork = itaup + m;
446     /**
447      *        Bidiagonalize L in WORK(IL)
448      *        (Workspace: need M*M+5*M, prefer M*M+4*M+2*M*NB)
449      **/
450     dgebrd( m, m, &work_1( il ), ldwork, s, &work_1( ie ),
451            &work_1( itauq ), &work_1( itaup ), &work_1( iwork ),
452            lwork-iwork+1, info );
453     /**
454      *        Multiply B by transpose of left bidiagonalizing vectors of L
455      *        (Workspace: need M*M+4*M+NRHS, prefer M*M+4*M+NRHS*NB)
456      **/
457     dormbr( 'q', 'l', 't', m, nrhs, m, &work_1( il ), ldwork,
458            &work_1( itauq ), b, ldb, &work_1( iwork ),
459            lwork-iwork+1, info );
460     /**
461      *        Generate right bidiagonalizing vectors of R in WORK(IL)
462      *        (Workspace: need M*M+5*M-1, prefer M*M+4*M+(M-1)*NB)
463      **/
464     dorgbr( 'p', m, m, m, &work_1( il ), ldwork, &work_1( itaup ),
465            &work_1( iwork ), lwork-iwork+1, info );
466     iwork = ie + m;
467     /**
468      *        Perform bidiagonal QR iteration,
469      *           computing right singular vectors of L in WORK(IL) and
470      *           multiplying B by transpose of left singular vectors
471      *        (Workspace: need M*M+6*M-4)
472      **/
473     dbdsqr( 'u', m, m, 0, nrhs, s, &work_1( ie ), &work_1( il ),
474            ldwork, a, lda, b, ldb, &work_1( iwork ), info );
475     if( *info!=0 )
476       goto L_70;
477     /**
478      *        Multiply B by reciprocals of singular values
479      **/
480     thr = max( rcond*s_1( 1 ), sfmin );
481     if( thr<zero )
482       thr = max( eps*s_1( 1 ), sfmin );
483     *rank = 0;
484     for (i=1 ; i<=m ; i+=1) {
485       if( s_1( i )>thr ) {
486         drscl( nrhs, s_1( i ), &b_2( i, 1 ), ldb );
487         *rank = *rank + 1;
488       } else {
489         dlaset( 'f', 1, nrhs, zero, zero, &b_2( i, 1 ), ldb );
490       }
491     }
492     iwork = ie;
493     /**
494      *        Multiply B by right singular vectors of L in WORK(IL)
495      *        (Workspace: need M*M+2*M, prefer M*M+M+M*NRHS)
496      **/
497     if( lwork>=ldb*nrhs+iwork-1 && nrhs>1 ) {
498       cblas_dgemm(CblasColMajor, CblasTrans, CblasNoTrans, m, nrhs, m,
499                   one, &work_1( il ), ldwork, b, ldb, zero,
500                   &work_1( iwork ), ldb );
501       dlacpy( 'g', m, nrhs, &work_1( iwork ), ldb, b, ldb );
502     } else if( nrhs>1 ) {
503       chunk = ( lwork-iwork+1 ) / m;
504       for (i=1 ; chunk>0?i<=nrhs:i>=nrhs ; i+=chunk) {
505         bl = min( nrhs-i+1, chunk );
506         cblas_dgemm(CblasColMajor, CblasTrans, CblasNoTrans, m, bl, m,
507                     one, &work_1( il ), ldwork, &b_2( 1, i ), ldb, zero,
508                     &work_1( iwork ), n );
509         dlacpy( 'g', m, bl, &work_1( iwork ), n, b, ldb );
510       }
511     } else {
512       cblas_dgemv(CblasColMajor, CblasTrans, m, m, one, &work_1( il ),
513                   ldwork, &b_2( 1, 1 ), 1, zero, &work_1( iwork ), 1 );
514       cblas_dcopy( m, &work_1( iwork ), 1, &b_2( 1, 1 ), 1 );
515     }
516     /**
517      *        Zero out below first M rows of B
518      **/
519     dlaset( 'f', n-m, nrhs, zero, zero, &b_2( m+1, 1 ), ldb );
520     iwork = itau + m;
521     /**
522      *        Multiply transpose(Q) by B
523      *        (Workspace: need M+NRHS, prefer M+NRHS*NB)
524      **/
525     dormlq( 'l', 't', n, nrhs, m, a, lda, &work_1( itau ), b,
526            ldb, &work_1( iwork ), lwork-iwork+1, info );
527 
528   } else {
529     /**
530      *        Path 2 - remaining underdetermined cases
531      **/
532     ie = 1;
533     itauq = ie + m;
534     itaup = itauq + m;
535     iwork = itaup + m;
536     /**
537      *        Bidiagonalize A
538      *        (Workspace: need 3*M+N, prefer 3*M+(M+N)*NB)
539      **/
540     dgebrd( m, n, a, lda, s, &work_1( ie ), &work_1( itauq ),
541            &work_1( itaup ), &work_1( iwork ), lwork-iwork+1,
542            info );
543     /**
544      *        Multiply B by transpose of left bidiagonalizing vectors
545      *        (Workspace: need 3*M+NRHS, prefer 3*M+NRHS*NB)
546      **/
547     dormbr( 'q', 'l', 't', m, nrhs, n, a, lda, &work_1( itauq ),
548            b, ldb, &work_1( iwork ), lwork-iwork+1, info );
549     /**
550      *        Generate right bidiagonalizing vectors in A
551      *        (Workspace: need 4*M, prefer 3*M+M*NB)
552      **/
553     dorgbr( 'p', m, n, m, a, lda, &work_1( itaup ),
554            &work_1( iwork ), lwork-iwork+1, info );
555     iwork = ie + m;
556     /**
557      *        Perform bidiagonal QR iteration,
558      *           computing right singular vectors of A in A and
559      *           multiplying B by transpose of left singular vectors
560      *        (Workspace: need 5*M-4)
561      **/
562     dbdsqr( 'l', m, n, 0, nrhs, s, &work_1( ie ), a, lda, vdum,
563            1, b, ldb, &work_1( iwork ), info );
564     if( *info!=0 )
565       goto L_70;
566     /**
567      *        Multiply B by reciprocals of singular values
568      **/
569     thr = max( rcond*s_1( 1 ), sfmin );
570     if( thr<zero )
571       thr = max( eps*s_1( 1 ), sfmin );
572     *rank = 0;
573     for (i=1 ; i<=m ; i+=1) {
574       if( s_1( i )>thr ) {
575         drscl( nrhs, s_1( i ), &b_2( i, 1 ), ldb );
576         *rank = *rank + 1;
577       } else {
578         dlaset( 'f', 1, nrhs, zero, zero, &b_2( i, 1 ), ldb );
579       }
580     }
581     /**
582      *        Multiply B by right singular vectors of A
583      *        (Workspace: need N, prefer N*NRHS)
584      **/
585     if( lwork>=ldb*nrhs && nrhs>1 ) {
586       cblas_dgemm(CblasColMajor, CblasTrans, CblasNoTrans, n, nrhs, m,
587                   one, a, lda, b, ldb, zero, work, ldb );
588       dlacpy( 'f', n, nrhs, work, ldb, b, ldb );
589     } else if( nrhs>1 ) {
590       chunk = lwork / n;
591       for (i=1 ; chunk>0?i<=nrhs:i>=nrhs ; i+=chunk) {
592         bl = min( nrhs-i+1, chunk );
593         cblas_dgemm(CblasColMajor, CblasTrans, CblasNoTrans, n, bl, m, one,
594                     a, lda, &b_2( 1, i ), ldb, zero, work, n );
595         dlacpy( 'f', n, bl, work, n, &b_2( 1, i ), ldb );
596       }
597     } else {
598       cblas_dgemv(CblasColMajor, CblasTrans, m, n, one, a, lda, b, 1, zero,
599                   work, 1 );
600       cblas_dcopy( n, work, 1, b, 1 );
601     }
602   }
603   /**
604    *     Undo scaling
605    **/
606   if( iascl==1 ) {
607     dlascl( 'g', 0, 0, anrm, smlnum, n, nrhs, b, ldb, info );
608     dlascl( 'g', 0, 0, smlnum, anrm, minmn, 1, s, minmn,
609            info );
610   } else if( iascl==2 ) {
611     dlascl( 'g', 0, 0, anrm, bignum, n, nrhs, b, ldb, info );
612     dlascl( 'g', 0, 0, bignum, anrm, minmn, 1, s, minmn,
613            info );
614   }
615   if( ibscl==1 ) {
616     dlascl( 'g', 0, 0, smlnum, bnrm, n, nrhs, b, ldb, info );
617   } else if( ibscl==2 ) {
618     dlascl( 'g', 0, 0, bignum, bnrm, n, nrhs, b, ldb, info );
619   }
620 
621  L_70:
622   work_1( 1 ) = maxwrk;
623   return;
624   /**
625    *     End of DGELSS
626    **/
627 }
628