1*> \brief <b> DSGESV computes the solution to system of linear equations A * X = B for GE matrices</b> (mixed precision with iterative refinement)
2*
3*  =========== DOCUMENTATION ===========
4*
5* Online html documentation available at
6*            http://www.netlib.org/lapack/explore-html/
7*
8*> \htmlonly
9*> Download DSGESV + dependencies
10*> <a href="http://www.netlib.org/cgi-bin/netlibfiles.tgz?format=tgz&filename=/lapack/lapack_routine/dsgesv.f">
11*> [TGZ]</a>
12*> <a href="http://www.netlib.org/cgi-bin/netlibfiles.zip?format=zip&filename=/lapack/lapack_routine/dsgesv.f">
13*> [ZIP]</a>
14*> <a href="http://www.netlib.org/cgi-bin/netlibfiles.txt?format=txt&filename=/lapack/lapack_routine/dsgesv.f">
15*> [TXT]</a>
16*> \endhtmlonly
17*
18*  Definition:
19*  ===========
20*
21*       SUBROUTINE DSGESV( N, NRHS, A, LDA, IPIV, B, LDB, X, LDX, WORK,
22*                          SWORK, ITER, INFO )
23*
24*       .. Scalar Arguments ..
25*       INTEGER            INFO, ITER, LDA, LDB, LDX, N, NRHS
26*       ..
27*       .. Array Arguments ..
28*       INTEGER            IPIV( * )
29*       REAL               SWORK( * )
30*       DOUBLE PRECISION   A( LDA, * ), B( LDB, * ), WORK( N, * ),
31*      $                   X( LDX, * )
32*       ..
33*
34*
35*> \par Purpose:
36*  =============
37*>
38*> \verbatim
39*>
40*> DSGESV computes the solution to a real system of linear equations
41*>    A * X = B,
42*> where A is an N-by-N matrix and X and B are N-by-NRHS matrices.
43*>
44*> DSGESV first attempts to factorize the matrix in SINGLE PRECISION
45*> and use this factorization within an iterative refinement procedure
46*> to produce a solution with DOUBLE PRECISION normwise backward error
47*> quality (see below). If the approach fails the method switches to a
48*> DOUBLE PRECISION factorization and solve.
49*>
50*> The iterative refinement is not going to be a winning strategy if
51*> the ratio SINGLE PRECISION performance over DOUBLE PRECISION
52*> performance is too small. A reasonable strategy should take the
53*> number of right-hand sides and the size of the matrix into account.
54*> This might be done with a call to ILAENV in the future. Up to now, we
55*> always try iterative refinement.
56*>
57*> The iterative refinement process is stopped if
58*>     ITER > ITERMAX
59*> or for all the RHS we have:
60*>     RNRM < SQRT(N)*XNRM*ANRM*EPS*BWDMAX
61*> where
62*>     o ITER is the number of the current iteration in the iterative
63*>       refinement process
64*>     o RNRM is the infinity-norm of the residual
65*>     o XNRM is the infinity-norm of the solution
66*>     o ANRM is the infinity-operator-norm of the matrix A
67*>     o EPS is the machine epsilon returned by DLAMCH('Epsilon')
68*> The value ITERMAX and BWDMAX are fixed to 30 and 1.0D+00
69*> respectively.
70*> \endverbatim
71*
72*  Arguments:
73*  ==========
74*
75*> \param[in] N
76*> \verbatim
77*>          N is INTEGER
78*>          The number of linear equations, i.e., the order of the
79*>          matrix A.  N >= 0.
80*> \endverbatim
81*>
82*> \param[in] NRHS
83*> \verbatim
84*>          NRHS is INTEGER
85*>          The number of right hand sides, i.e., the number of columns
86*>          of the matrix B.  NRHS >= 0.
87*> \endverbatim
88*>
89*> \param[in,out] A
90*> \verbatim
91*>          A is DOUBLE PRECISION array,
92*>          dimension (LDA,N)
93*>          On entry, the N-by-N coefficient matrix A.
94*>          On exit, if iterative refinement has been successfully used
95*>          (INFO = 0 and ITER >= 0, see description below), then A is
96*>          unchanged, if double precision factorization has been used
97*>          (INFO = 0 and ITER < 0, see description below), then the
98*>          array A contains the factors L and U from the factorization
99*>          A = P*L*U; the unit diagonal elements of L are not stored.
100*> \endverbatim
101*>
102*> \param[in] LDA
103*> \verbatim
104*>          LDA is INTEGER
105*>          The leading dimension of the array A.  LDA >= max(1,N).
106*> \endverbatim
107*>
108*> \param[out] IPIV
109*> \verbatim
110*>          IPIV is INTEGER array, dimension (N)
111*>          The pivot indices that define the permutation matrix P;
112*>          row i of the matrix was interchanged with row IPIV(i).
113*>          Corresponds either to the single precision factorization
114*>          (if INFO = 0 and ITER >= 0) or the double precision
115*>          factorization (if INFO = 0 and ITER < 0).
116*> \endverbatim
117*>
118*> \param[in] B
119*> \verbatim
120*>          B is DOUBLE PRECISION array, dimension (LDB,NRHS)
121*>          The N-by-NRHS right hand side matrix B.
122*> \endverbatim
123*>
124*> \param[in] LDB
125*> \verbatim
126*>          LDB is INTEGER
127*>          The leading dimension of the array B.  LDB >= max(1,N).
128*> \endverbatim
129*>
130*> \param[out] X
131*> \verbatim
132*>          X is DOUBLE PRECISION array, dimension (LDX,NRHS)
133*>          If INFO = 0, the N-by-NRHS solution matrix X.
134*> \endverbatim
135*>
136*> \param[in] LDX
137*> \verbatim
138*>          LDX is INTEGER
139*>          The leading dimension of the array X.  LDX >= max(1,N).
140*> \endverbatim
141*>
142*> \param[out] WORK
143*> \verbatim
144*>          WORK is DOUBLE PRECISION array, dimension (N,NRHS)
145*>          This array is used to hold the residual vectors.
146*> \endverbatim
147*>
148*> \param[out] SWORK
149*> \verbatim
150*>          SWORK is REAL array, dimension (N*(N+NRHS))
151*>          This array is used to use the single precision matrix and the
152*>          right-hand sides or solutions in single precision.
153*> \endverbatim
154*>
155*> \param[out] ITER
156*> \verbatim
157*>          ITER is INTEGER
158*>          < 0: iterative refinement has failed, double precision
159*>               factorization has been performed
160*>               -1 : the routine fell back to full precision for
161*>                    implementation- or machine-specific reasons
162*>               -2 : narrowing the precision induced an overflow,
163*>                    the routine fell back to full precision
164*>               -3 : failure of SGETRF
165*>               -31: stop the iterative refinement after the 30th
166*>                    iterations
167*>          > 0: iterative refinement has been successfully used.
168*>               Returns the number of iterations
169*> \endverbatim
170*>
171*> \param[out] INFO
172*> \verbatim
173*>          INFO is INTEGER
174*>          = 0:  successful exit
175*>          < 0:  if INFO = -i, the i-th argument had an illegal value
176*>          > 0:  if INFO = i, U(i,i) computed in DOUBLE PRECISION is
177*>                exactly zero.  The factorization has been completed,
178*>                but the factor U is exactly singular, so the solution
179*>                could not be computed.
180*> \endverbatim
181*
182*  Authors:
183*  ========
184*
185*> \author Univ. of Tennessee
186*> \author Univ. of California Berkeley
187*> \author Univ. of Colorado Denver
188*> \author NAG Ltd.
189*
190*> \ingroup doubleGEsolve
191*
192*  =====================================================================
193      SUBROUTINE DSGESV( N, NRHS, A, LDA, IPIV, B, LDB, X, LDX, WORK,
194     $                   SWORK, ITER, INFO )
195*
196*  -- LAPACK driver routine --
197*  -- LAPACK is a software package provided by Univ. of Tennessee,    --
198*  -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..--
199*
200*     .. Scalar Arguments ..
201      INTEGER            INFO, ITER, LDA, LDB, LDX, N, NRHS
202*     ..
203*     .. Array Arguments ..
204      INTEGER            IPIV( * )
205      REAL               SWORK( * )
206      DOUBLE PRECISION   A( LDA, * ), B( LDB, * ), WORK( N, * ),
207     $                   X( LDX, * )
208*     ..
209*
210*  =====================================================================
211*
212*     .. Parameters ..
213      LOGICAL            DOITREF
214      PARAMETER          ( DOITREF = .TRUE. )
215*
216      INTEGER            ITERMAX
217      PARAMETER          ( ITERMAX = 30 )
218*
219      DOUBLE PRECISION   BWDMAX
220      PARAMETER          ( BWDMAX = 1.0E+00 )
221*
222      DOUBLE PRECISION   NEGONE, ONE
223      PARAMETER          ( NEGONE = -1.0D+0, ONE = 1.0D+0 )
224*
225*     .. Local Scalars ..
226      INTEGER            I, IITER, PTSA, PTSX
227      DOUBLE PRECISION   ANRM, CTE, EPS, RNRM, XNRM
228*
229*     .. External Subroutines ..
230      EXTERNAL           DAXPY, DGEMM, DLACPY, DLAG2S, DGETRF, DGETRS,
231     $                   SGETRF, SGETRS, SLAG2D, XERBLA
232*     ..
233*     .. External Functions ..
234      INTEGER            IDAMAX
235      DOUBLE PRECISION   DLAMCH, DLANGE
236      EXTERNAL           IDAMAX, DLAMCH, DLANGE
237*     ..
238*     .. Intrinsic Functions ..
239      INTRINSIC          ABS, DBLE, MAX, SQRT
240*     ..
241*     .. Executable Statements ..
242*
243      INFO = 0
244      ITER = 0
245*
246*     Test the input parameters.
247*
248      IF( N.LT.0 ) THEN
249         INFO = -1
250      ELSE IF( NRHS.LT.0 ) THEN
251         INFO = -2
252      ELSE IF( LDA.LT.MAX( 1, N ) ) THEN
253         INFO = -4
254      ELSE IF( LDB.LT.MAX( 1, N ) ) THEN
255         INFO = -7
256      ELSE IF( LDX.LT.MAX( 1, N ) ) THEN
257         INFO = -9
258      END IF
259      IF( INFO.NE.0 ) THEN
260         CALL XERBLA( 'DSGESV', -INFO )
261         RETURN
262      END IF
263*
264*     Quick return if (N.EQ.0).
265*
266      IF( N.EQ.0 )
267     $   RETURN
268*
269*     Skip single precision iterative refinement if a priori slower
270*     than double precision factorization.
271*
272      IF( .NOT.DOITREF ) THEN
273         ITER = -1
274         GO TO 40
275      END IF
276*
277*     Compute some constants.
278*
279      ANRM = DLANGE( 'I', N, N, A, LDA, WORK )
280      EPS = DLAMCH( 'Epsilon' )
281      CTE = ANRM*EPS*SQRT( DBLE( N ) )*BWDMAX
282*
283*     Set the indices PTSA, PTSX for referencing SA and SX in SWORK.
284*
285      PTSA = 1
286      PTSX = PTSA + N*N
287*
288*     Convert B from double precision to single precision and store the
289*     result in SX.
290*
291      CALL DLAG2S( N, NRHS, B, LDB, SWORK( PTSX ), N, INFO )
292*
293      IF( INFO.NE.0 ) THEN
294         ITER = -2
295         GO TO 40
296      END IF
297*
298*     Convert A from double precision to single precision and store the
299*     result in SA.
300*
301      CALL DLAG2S( N, N, A, LDA, SWORK( PTSA ), N, INFO )
302*
303      IF( INFO.NE.0 ) THEN
304         ITER = -2
305         GO TO 40
306      END IF
307*
308*     Compute the LU factorization of SA.
309*
310      CALL SGETRF( N, N, SWORK( PTSA ), N, IPIV, INFO )
311*
312      IF( INFO.NE.0 ) THEN
313         ITER = -3
314         GO TO 40
315      END IF
316*
317*     Solve the system SA*SX = SB.
318*
319      CALL SGETRS( 'No transpose', N, NRHS, SWORK( PTSA ), N, IPIV,
320     $             SWORK( PTSX ), N, INFO )
321*
322*     Convert SX back to double precision
323*
324      CALL SLAG2D( N, NRHS, SWORK( PTSX ), N, X, LDX, INFO )
325*
326*     Compute R = B - AX (R is WORK).
327*
328      CALL DLACPY( 'All', N, NRHS, B, LDB, WORK, N )
329*
330      CALL DGEMM( 'No Transpose', 'No Transpose', N, NRHS, N, NEGONE, A,
331     $            LDA, X, LDX, ONE, WORK, N )
332*
333*     Check whether the NRHS normwise backward errors satisfy the
334*     stopping criterion. If yes, set ITER=0 and return.
335*
336      DO I = 1, NRHS
337         XNRM = ABS( X( IDAMAX( N, X( 1, I ), 1 ), I ) )
338         RNRM = ABS( WORK( IDAMAX( N, WORK( 1, I ), 1 ), I ) )
339         IF( RNRM.GT.XNRM*CTE )
340     $      GO TO 10
341      END DO
342*
343*     If we are here, the NRHS normwise backward errors satisfy the
344*     stopping criterion. We are good to exit.
345*
346      ITER = 0
347      RETURN
348*
349   10 CONTINUE
350*
351      DO 30 IITER = 1, ITERMAX
352*
353*        Convert R (in WORK) from double precision to single precision
354*        and store the result in SX.
355*
356         CALL DLAG2S( N, NRHS, WORK, N, SWORK( PTSX ), N, INFO )
357*
358         IF( INFO.NE.0 ) THEN
359            ITER = -2
360            GO TO 40
361         END IF
362*
363*        Solve the system SA*SX = SR.
364*
365         CALL SGETRS( 'No transpose', N, NRHS, SWORK( PTSA ), N, IPIV,
366     $                SWORK( PTSX ), N, INFO )
367*
368*        Convert SX back to double precision and update the current
369*        iterate.
370*
371         CALL SLAG2D( N, NRHS, SWORK( PTSX ), N, WORK, N, INFO )
372*
373         DO I = 1, NRHS
374            CALL DAXPY( N, ONE, WORK( 1, I ), 1, X( 1, I ), 1 )
375         END DO
376*
377*        Compute R = B - AX (R is WORK).
378*
379         CALL DLACPY( 'All', N, NRHS, B, LDB, WORK, N )
380*
381         CALL DGEMM( 'No Transpose', 'No Transpose', N, NRHS, N, NEGONE,
382     $               A, LDA, X, LDX, ONE, WORK, N )
383*
384*        Check whether the NRHS normwise backward errors satisfy the
385*        stopping criterion. If yes, set ITER=IITER>0 and return.
386*
387         DO I = 1, NRHS
388            XNRM = ABS( X( IDAMAX( N, X( 1, I ), 1 ), I ) )
389            RNRM = ABS( WORK( IDAMAX( N, WORK( 1, I ), 1 ), I ) )
390            IF( RNRM.GT.XNRM*CTE )
391     $         GO TO 20
392         END DO
393*
394*        If we are here, the NRHS normwise backward errors satisfy the
395*        stopping criterion, we are good to exit.
396*
397         ITER = IITER
398*
399         RETURN
400*
401   20    CONTINUE
402*
403   30 CONTINUE
404*
405*     If we are at this place of the code, this is because we have
406*     performed ITER=ITERMAX iterations and never satisfied the
407*     stopping criterion, set up the ITER flag accordingly and follow up
408*     on double precision routine.
409*
410      ITER = -ITERMAX - 1
411*
412   40 CONTINUE
413*
414*     Single-precision iterative refinement failed to converge to a
415*     satisfactory solution, so we resort to double precision.
416*
417      CALL DGETRF( N, N, A, LDA, IPIV, INFO )
418*
419      IF( INFO.NE.0 )
420     $   RETURN
421*
422      CALL DLACPY( 'All', N, NRHS, B, LDB, X, LDX )
423      CALL DGETRS( 'No transpose', N, NRHS, A, LDA, IPIV, X, LDX,
424     $             INFO )
425*
426      RETURN
427*
428*     End of DSGESV
429*
430      END
431