1*> \brief <b> DSGESV computes the solution to system of linear equations A * X = B for GE matrices</b> (mixed precision with iterative refinement)
2*
3*  =========== DOCUMENTATION ===========
4*
5* Online html documentation available at
6*            http://www.netlib.org/lapack/explore-html/
7*
8*> \htmlonly
9*> Download DSGESV + dependencies
10*> <a href="http://www.netlib.org/cgi-bin/netlibfiles.tgz?format=tgz&filename=/lapack/lapack_routine/dsgesv.f">
11*> [TGZ]</a>
12*> <a href="http://www.netlib.org/cgi-bin/netlibfiles.zip?format=zip&filename=/lapack/lapack_routine/dsgesv.f">
13*> [ZIP]</a>
14*> <a href="http://www.netlib.org/cgi-bin/netlibfiles.txt?format=txt&filename=/lapack/lapack_routine/dsgesv.f">
15*> [TXT]</a>
16*> \endhtmlonly
17*
18*  Definition:
19*  ===========
20*
21*       SUBROUTINE DSGESV( N, NRHS, A, LDA, IPIV, B, LDB, X, LDX, WORK,
22*                          SWORK, ITER, INFO )
23*
24*       .. Scalar Arguments ..
25*       INTEGER            INFO, ITER, LDA, LDB, LDX, N, NRHS
26*       ..
27*       .. Array Arguments ..
28*       INTEGER            IPIV( * )
29*       REAL               SWORK( * )
30*       DOUBLE PRECISION   A( LDA, * ), B( LDB, * ), WORK( N, * ),
31*      $                   X( LDX, * )
32*       ..
33*
34*
35*> \par Purpose:
36*  =============
37*>
38*> \verbatim
39*>
40*> DSGESV computes the solution to a real system of linear equations
41*>    A * X = B,
42*> where A is an N-by-N matrix and X and B are N-by-NRHS matrices.
43*>
44*> DSGESV first attempts to factorize the matrix in SINGLE PRECISION
45*> and use this factorization within an iterative refinement procedure
46*> to produce a solution with DOUBLE PRECISION normwise backward error
47*> quality (see below). If the approach fails the method switches to a
48*> DOUBLE PRECISION factorization and solve.
49*>
50*> The iterative refinement is not going to be a winning strategy if
51*> the ratio SINGLE PRECISION performance over DOUBLE PRECISION
52*> performance is too small. A reasonable strategy should take the
53*> number of right-hand sides and the size of the matrix into account.
54*> This might be done with a call to ILAENV in the future. Up to now, we
55*> always try iterative refinement.
56*>
57*> The iterative refinement process is stopped if
58*>     ITER > ITERMAX
59*> or for all the RHS we have:
60*>     RNRM < SQRT(N)*XNRM*ANRM*EPS*BWDMAX
61*> where
62*>     o ITER is the number of the current iteration in the iterative
63*>       refinement process
64*>     o RNRM is the infinity-norm of the residual
65*>     o XNRM is the infinity-norm of the solution
66*>     o ANRM is the infinity-operator-norm of the matrix A
67*>     o EPS is the machine epsilon returned by DLAMCH('Epsilon')
68*> The value ITERMAX and BWDMAX are fixed to 30 and 1.0D+00
69*> respectively.
70*> \endverbatim
71*
72*  Arguments:
73*  ==========
74*
75*> \param[in] N
76*> \verbatim
77*>          N is INTEGER
78*>          The number of linear equations, i.e., the order of the
79*>          matrix A.  N >= 0.
80*> \endverbatim
81*>
82*> \param[in] NRHS
83*> \verbatim
84*>          NRHS is INTEGER
85*>          The number of right hand sides, i.e., the number of columns
86*>          of the matrix B.  NRHS >= 0.
87*> \endverbatim
88*>
89*> \param[in,out] A
90*> \verbatim
91*>          A is DOUBLE PRECISION array,
92*>          dimension (LDA,N)
93*>          On entry, the N-by-N coefficient matrix A.
94*>          On exit, if iterative refinement has been successfully used
95*>          (INFO = 0 and ITER >= 0, see description below), then A is
96*>          unchanged, if double precision factorization has been used
97*>          (INFO = 0 and ITER < 0, see description below), then the
98*>          array A contains the factors L and U from the factorization
99*>          A = P*L*U; the unit diagonal elements of L are not stored.
100*> \endverbatim
101*>
102*> \param[in] LDA
103*> \verbatim
104*>          LDA is INTEGER
105*>          The leading dimension of the array A.  LDA >= max(1,N).
106*> \endverbatim
107*>
108*> \param[out] IPIV
109*> \verbatim
110*>          IPIV is INTEGER array, dimension (N)
111*>          The pivot indices that define the permutation matrix P;
112*>          row i of the matrix was interchanged with row IPIV(i).
113*>          Corresponds either to the single precision factorization
114*>          (if INFO = 0 and ITER >= 0) or the double precision
115*>          factorization (if INFO = 0 and ITER < 0).
116*> \endverbatim
117*>
118*> \param[in] B
119*> \verbatim
120*>          B is DOUBLE PRECISION array, dimension (LDB,NRHS)
121*>          The N-by-NRHS right hand side matrix B.
122*> \endverbatim
123*>
124*> \param[in] LDB
125*> \verbatim
126*>          LDB is INTEGER
127*>          The leading dimension of the array B.  LDB >= max(1,N).
128*> \endverbatim
129*>
130*> \param[out] X
131*> \verbatim
132*>          X is DOUBLE PRECISION array, dimension (LDX,NRHS)
133*>          If INFO = 0, the N-by-NRHS solution matrix X.
134*> \endverbatim
135*>
136*> \param[in] LDX
137*> \verbatim
138*>          LDX is INTEGER
139*>          The leading dimension of the array X.  LDX >= max(1,N).
140*> \endverbatim
141*>
142*> \param[out] WORK
143*> \verbatim
144*>          WORK is DOUBLE PRECISION array, dimension (N,NRHS)
145*>          This array is used to hold the residual vectors.
146*> \endverbatim
147*>
148*> \param[out] SWORK
149*> \verbatim
150*>          SWORK is REAL array, dimension (N*(N+NRHS))
151*>          This array is used to use the single precision matrix and the
152*>          right-hand sides or solutions in single precision.
153*> \endverbatim
154*>
155*> \param[out] ITER
156*> \verbatim
157*>          ITER is INTEGER
158*>          < 0: iterative refinement has failed, double precision
159*>               factorization has been performed
160*>               -1 : the routine fell back to full precision for
161*>                    implementation- or machine-specific reasons
162*>               -2 : narrowing the precision induced an overflow,
163*>                    the routine fell back to full precision
164*>               -3 : failure of SGETRF
165*>               -31: stop the iterative refinement after the 30th
166*>                    iterations
167*>          > 0: iterative refinement has been successfully used.
168*>               Returns the number of iterations
169*> \endverbatim
170*>
171*> \param[out] INFO
172*> \verbatim
173*>          INFO is INTEGER
174*>          = 0:  successful exit
175*>          < 0:  if INFO = -i, the i-th argument had an illegal value
176*>          > 0:  if INFO = i, U(i,i) computed in DOUBLE PRECISION is
177*>                exactly zero.  The factorization has been completed,
178*>                but the factor U is exactly singular, so the solution
179*>                could not be computed.
180*> \endverbatim
181*
182*  Authors:
183*  ========
184*
185*> \author Univ. of Tennessee
186*> \author Univ. of California Berkeley
187*> \author Univ. of Colorado Denver
188*> \author NAG Ltd.
189*
190*> \date June 2016
191*
192*> \ingroup doubleGEsolve
193*
194*  =====================================================================
195      SUBROUTINE DSGESV( N, NRHS, A, LDA, IPIV, B, LDB, X, LDX, WORK,
196     $                   SWORK, ITER, INFO )
197*
198*  -- LAPACK driver routine (version 3.8.0) --
199*  -- LAPACK is a software package provided by Univ. of Tennessee,    --
200*  -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..--
201*     June 2016
202*
203*     .. Scalar Arguments ..
204      INTEGER            INFO, ITER, LDA, LDB, LDX, N, NRHS
205*     ..
206*     .. Array Arguments ..
207      INTEGER            IPIV( * )
208      REAL               SWORK( * )
209      DOUBLE PRECISION   A( LDA, * ), B( LDB, * ), WORK( N, * ),
210     $                   X( LDX, * )
211*     ..
212*
213*  =====================================================================
214*
215*     .. Parameters ..
216      LOGICAL            DOITREF
217      PARAMETER          ( DOITREF = .TRUE. )
218*
219      INTEGER            ITERMAX
220      PARAMETER          ( ITERMAX = 30 )
221*
222      DOUBLE PRECISION   BWDMAX
223      PARAMETER          ( BWDMAX = 1.0E+00 )
224*
225      DOUBLE PRECISION   NEGONE, ONE
226      PARAMETER          ( NEGONE = -1.0D+0, ONE = 1.0D+0 )
227*
228*     .. Local Scalars ..
229      INTEGER            I, IITER, PTSA, PTSX
230      DOUBLE PRECISION   ANRM, CTE, EPS, RNRM, XNRM
231*
232*     .. External Subroutines ..
233      EXTERNAL           DAXPY, DGEMM, DLACPY, DLAG2S, DGETRF, DGETRS,
234     $                   SGETRF, SGETRS, SLAG2D, XERBLA
235*     ..
236*     .. External Functions ..
237      INTEGER            IDAMAX
238      DOUBLE PRECISION   DLAMCH, DLANGE
239      EXTERNAL           IDAMAX, DLAMCH, DLANGE
240*     ..
241*     .. Intrinsic Functions ..
242      INTRINSIC          ABS, DBLE, MAX, SQRT
243*     ..
244*     .. Executable Statements ..
245*
246      INFO = 0
247      ITER = 0
248*
249*     Test the input parameters.
250*
251      IF( N.LT.0 ) THEN
252         INFO = -1
253      ELSE IF( NRHS.LT.0 ) THEN
254         INFO = -2
255      ELSE IF( LDA.LT.MAX( 1, N ) ) THEN
256         INFO = -4
257      ELSE IF( LDB.LT.MAX( 1, N ) ) THEN
258         INFO = -7
259      ELSE IF( LDX.LT.MAX( 1, N ) ) THEN
260         INFO = -9
261      END IF
262      IF( INFO.NE.0 ) THEN
263         CALL XERBLA( 'DSGESV', -INFO )
264         RETURN
265      END IF
266*
267*     Quick return if (N.EQ.0).
268*
269      IF( N.EQ.0 )
270     $   RETURN
271*
272*     Skip single precision iterative refinement if a priori slower
273*     than double precision factorization.
274*
275      IF( .NOT.DOITREF ) THEN
276         ITER = -1
277         GO TO 40
278      END IF
279*
280*     Compute some constants.
281*
282      ANRM = DLANGE( 'I', N, N, A, LDA, WORK )
283      EPS = DLAMCH( 'Epsilon' )
284      CTE = ANRM*EPS*SQRT( DBLE( N ) )*BWDMAX
285*
286*     Set the indices PTSA, PTSX for referencing SA and SX in SWORK.
287*
288      PTSA = 1
289      PTSX = PTSA + N*N
290*
291*     Convert B from double precision to single precision and store the
292*     result in SX.
293*
294      CALL DLAG2S( N, NRHS, B, LDB, SWORK( PTSX ), N, INFO )
295*
296      IF( INFO.NE.0 ) THEN
297         ITER = -2
298         GO TO 40
299      END IF
300*
301*     Convert A from double precision to single precision and store the
302*     result in SA.
303*
304      CALL DLAG2S( N, N, A, LDA, SWORK( PTSA ), N, INFO )
305*
306      IF( INFO.NE.0 ) THEN
307         ITER = -2
308         GO TO 40
309      END IF
310*
311*     Compute the LU factorization of SA.
312*
313      CALL SGETRF( N, N, SWORK( PTSA ), N, IPIV, INFO )
314*
315      IF( INFO.NE.0 ) THEN
316         ITER = -3
317         GO TO 40
318      END IF
319*
320*     Solve the system SA*SX = SB.
321*
322      CALL SGETRS( 'No transpose', N, NRHS, SWORK( PTSA ), N, IPIV,
323     $             SWORK( PTSX ), N, INFO )
324*
325*     Convert SX back to double precision
326*
327      CALL SLAG2D( N, NRHS, SWORK( PTSX ), N, X, LDX, INFO )
328*
329*     Compute R = B - AX (R is WORK).
330*
331      CALL DLACPY( 'All', N, NRHS, B, LDB, WORK, N )
332*
333      CALL DGEMM( 'No Transpose', 'No Transpose', N, NRHS, N, NEGONE, A,
334     $            LDA, X, LDX, ONE, WORK, N )
335*
336*     Check whether the NRHS normwise backward errors satisfy the
337*     stopping criterion. If yes, set ITER=0 and return.
338*
339      DO I = 1, NRHS
340         XNRM = ABS( X( IDAMAX( N, X( 1, I ), 1 ), I ) )
341         RNRM = ABS( WORK( IDAMAX( N, WORK( 1, I ), 1 ), I ) )
342         IF( RNRM.GT.XNRM*CTE )
343     $      GO TO 10
344      END DO
345*
346*     If we are here, the NRHS normwise backward errors satisfy the
347*     stopping criterion. We are good to exit.
348*
349      ITER = 0
350      RETURN
351*
352   10 CONTINUE
353*
354      DO 30 IITER = 1, ITERMAX
355*
356*        Convert R (in WORK) from double precision to single precision
357*        and store the result in SX.
358*
359         CALL DLAG2S( N, NRHS, WORK, N, SWORK( PTSX ), N, INFO )
360*
361         IF( INFO.NE.0 ) THEN
362            ITER = -2
363            GO TO 40
364         END IF
365*
366*        Solve the system SA*SX = SR.
367*
368         CALL SGETRS( 'No transpose', N, NRHS, SWORK( PTSA ), N, IPIV,
369     $                SWORK( PTSX ), N, INFO )
370*
371*        Convert SX back to double precision and update the current
372*        iterate.
373*
374         CALL SLAG2D( N, NRHS, SWORK( PTSX ), N, WORK, N, INFO )
375*
376         DO I = 1, NRHS
377            CALL DAXPY( N, ONE, WORK( 1, I ), 1, X( 1, I ), 1 )
378         END DO
379*
380*        Compute R = B - AX (R is WORK).
381*
382         CALL DLACPY( 'All', N, NRHS, B, LDB, WORK, N )
383*
384         CALL DGEMM( 'No Transpose', 'No Transpose', N, NRHS, N, NEGONE,
385     $               A, LDA, X, LDX, ONE, WORK, N )
386*
387*        Check whether the NRHS normwise backward errors satisfy the
388*        stopping criterion. If yes, set ITER=IITER>0 and return.
389*
390         DO I = 1, NRHS
391            XNRM = ABS( X( IDAMAX( N, X( 1, I ), 1 ), I ) )
392            RNRM = ABS( WORK( IDAMAX( N, WORK( 1, I ), 1 ), I ) )
393            IF( RNRM.GT.XNRM*CTE )
394     $         GO TO 20
395         END DO
396*
397*        If we are here, the NRHS normwise backward errors satisfy the
398*        stopping criterion, we are good to exit.
399*
400         ITER = IITER
401*
402         RETURN
403*
404   20    CONTINUE
405*
406   30 CONTINUE
407*
408*     If we are at this place of the code, this is because we have
409*     performed ITER=ITERMAX iterations and never satisfied the
410*     stopping criterion, set up the ITER flag accordingly and follow up
411*     on double precision routine.
412*
413      ITER = -ITERMAX - 1
414*
415   40 CONTINUE
416*
417*     Single-precision iterative refinement failed to converge to a
418*     satisfactory solution, so we resort to double precision.
419*
420      CALL DGETRF( N, N, A, LDA, IPIV, INFO )
421*
422      IF( INFO.NE.0 )
423     $   RETURN
424*
425      CALL DLACPY( 'All', N, NRHS, B, LDB, X, LDX )
426      CALL DGETRS( 'No transpose', N, NRHS, A, LDA, IPIV, X, LDX,
427     $             INFO )
428*
429      RETURN
430*
431*     End of DSGESV.
432*
433      END
434