1*> \brief \b SLAORHR_COL_GETRFNP2
2*
3*  =========== DOCUMENTATION ===========
4*
5* Online html documentation available at
6*            http://www.netlib.org/lapack/explore-html/
7*
8*> \htmlonly
9*> Download DLAORHR_GETRF2NP + dependencies
10*> <a href="http://www.netlib.org/cgi-bin/netlibfiles.tgz?format=tgz&filename=/lapack/lapack_routine/slaorhr_col_getrfnp2.f">
11*> [TGZ]</a>
12*> <a href="http://www.netlib.org/cgi-bin/netlibfiles.zip?format=zip&filename=/lapack/lapack_routine/slaorhr_col_getrfnp2.f">
13*> [ZIP]</a>
14*> <a href="http://www.netlib.org/cgi-bin/netlibfiles.txt?format=txt&filename=/lapack/lapack_routine/slaorhr_col_getrfnp2.f">
15*> [TXT]</a>
16*> \endhtmlonly
17*
18*  Definition:
19*  ===========
20*
21*       RECURSIVE SUBROUTINE SLAORHR_COL_GETRFNP2( M, N, A, LDA, D, INFO )
22*
23*       .. Scalar Arguments ..
24*       INTEGER            INFO, LDA, M, N
25*       ..
26*       .. Array Arguments ..
27*       REAL               A( LDA, * ), D( * )
28*       ..
29*
30*
31*> \par Purpose:
32*  =============
33*>
34*> \verbatim
35*>
36*> SLAORHR_COL_GETRFNP2 computes the modified LU factorization without
37*> pivoting of a real general M-by-N matrix A. The factorization has
38*> the form:
39*>
40*>     A - S = L * U,
41*>
42*> where:
43*>    S is a m-by-n diagonal sign matrix with the diagonal D, so that
44*>    D(i) = S(i,i), 1 <= i <= min(M,N). The diagonal D is constructed
45*>    as D(i)=-SIGN(A(i,i)), where A(i,i) is the value after performing
46*>    i-1 steps of Gaussian elimination. This means that the diagonal
47*>    element at each step of "modified" Gaussian elimination is at
48*>    least one in absolute value (so that division-by-zero not
49*>    possible during the division by the diagonal element);
50*>
51*>    L is a M-by-N lower triangular matrix with unit diagonal elements
52*>    (lower trapezoidal if M > N);
53*>
54*>    and U is a M-by-N upper triangular matrix
55*>    (upper trapezoidal if M < N).
56*>
57*> This routine is an auxiliary routine used in the Householder
58*> reconstruction routine SORHR_COL. In SORHR_COL, this routine is
59*> applied to an M-by-N matrix A with orthonormal columns, where each
60*> element is bounded by one in absolute value. With the choice of
61*> the matrix S above, one can show that the diagonal element at each
62*> step of Gaussian elimination is the largest (in absolute value) in
63*> the column on or below the diagonal, so that no pivoting is required
64*> for numerical stability [1].
65*>
66*> For more details on the Householder reconstruction algorithm,
67*> including the modified LU factorization, see [1].
68*>
69*> This is the recursive version of the LU factorization algorithm.
70*> Denote A - S by B. The algorithm divides the matrix B into four
71*> submatrices:
72*>
73*>        [  B11 | B12  ]  where B11 is n1 by n1,
74*>    B = [ -----|----- ]        B21 is (m-n1) by n1,
75*>        [  B21 | B22  ]        B12 is n1 by n2,
76*>                               B22 is (m-n1) by n2,
77*>                               with n1 = min(m,n)/2, n2 = n-n1.
78*>
79*>
80*> The subroutine calls itself to factor B11, solves for B21,
81*> solves for B12, updates B22, then calls itself to factor B22.
82*>
83*> For more details on the recursive LU algorithm, see [2].
84*>
85*> SLAORHR_COL_GETRFNP2 is called to factorize a block by the blocked
86*> routine SLAORHR_COL_GETRFNP, which uses blocked code calling
87*> Level 3 BLAS to update the submatrix. However, SLAORHR_COL_GETRFNP2
88*> is self-sufficient and can be used without SLAORHR_COL_GETRFNP.
89*>
90*> [1] "Reconstructing Householder vectors from tall-skinny QR",
91*>     G. Ballard, J. Demmel, L. Grigori, M. Jacquelin, H.D. Nguyen,
92*>     E. Solomonik, J. Parallel Distrib. Comput.,
93*>     vol. 85, pp. 3-31, 2015.
94*>
95*> [2] "Recursion leads to automatic variable blocking for dense linear
96*>     algebra algorithms", F. Gustavson, IBM J. of Res. and Dev.,
97*>     vol. 41, no. 6, pp. 737-755, 1997.
98*> \endverbatim
99*
100*  Arguments:
101*  ==========
102*
103*> \param[in] M
104*> \verbatim
105*>          M is INTEGER
106*>          The number of rows of the matrix A.  M >= 0.
107*> \endverbatim
108*>
109*> \param[in] N
110*> \verbatim
111*>          N is INTEGER
112*>          The number of columns of the matrix A.  N >= 0.
113*> \endverbatim
114*>
115*> \param[in,out] A
116*> \verbatim
117*>          A is REAL array, dimension (LDA,N)
118*>          On entry, the M-by-N matrix to be factored.
119*>          On exit, the factors L and U from the factorization
120*>          A-S=L*U; the unit diagonal elements of L are not stored.
121*> \endverbatim
122*>
123*> \param[in] LDA
124*> \verbatim
125*>          LDA is INTEGER
126*>          The leading dimension of the array A.  LDA >= max(1,M).
127*> \endverbatim
128*>
129*> \param[out] D
130*> \verbatim
131*>          D is REAL array, dimension min(M,N)
132*>          The diagonal elements of the diagonal M-by-N sign matrix S,
133*>          D(i) = S(i,i), where 1 <= i <= min(M,N). The elements can
134*>          be only plus or minus one.
135*> \endverbatim
136*>
137*> \param[out] INFO
138*> \verbatim
139*>          INFO is INTEGER
140*>          = 0:  successful exit
141*>          < 0:  if INFO = -i, the i-th argument had an illegal value
142*> \endverbatim
143*>
144*  Authors:
145*  ========
146*
147*> \author Univ. of Tennessee
148*> \author Univ. of California Berkeley
149*> \author Univ. of Colorado Denver
150*> \author NAG Ltd.
151*
152*> \ingroup realGEcomputational
153*
154*> \par Contributors:
155*  ==================
156*>
157*> \verbatim
158*>
159*> November 2019, Igor Kozachenko,
160*>                Computer Science Division,
161*>                University of California, Berkeley
162*>
163*> \endverbatim
164*
165*  =====================================================================
166      RECURSIVE SUBROUTINE SLAORHR_COL_GETRFNP2( M, N, A, LDA, D, INFO )
167      IMPLICIT NONE
168*
169*  -- LAPACK computational routine --
170*  -- LAPACK is a software package provided by Univ. of Tennessee,    --
171*  -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..--
172*
173*     .. Scalar Arguments ..
174      INTEGER            INFO, LDA, M, N
175*     ..
176*     .. Array Arguments ..
177      REAL               A( LDA, * ), D( * )
178*     ..
179*
180*  =====================================================================
181*
182*     .. Parameters ..
183      REAL               ONE
184      PARAMETER          ( ONE = 1.0E+0 )
185*     ..
186*     .. Local Scalars ..
187      REAL               SFMIN
188      INTEGER            I, IINFO, N1, N2
189*     ..
190*     .. External Functions ..
191      REAL               SLAMCH
192      EXTERNAL           SLAMCH
193*     ..
194*     .. External Subroutines ..
195      EXTERNAL           SGEMM, SSCAL, STRSM, XERBLA
196*     ..
197*     .. Intrinsic Functions ..
198      INTRINSIC          ABS, SIGN, MAX, MIN
199*     ..
200*     .. Executable Statements ..
201*
202*     Test the input parameters
203*
204      INFO = 0
205      IF( M.LT.0 ) THEN
206         INFO = -1
207      ELSE IF( N.LT.0 ) THEN
208         INFO = -2
209      ELSE IF( LDA.LT.MAX( 1, M ) ) THEN
210         INFO = -4
211      END IF
212      IF( INFO.NE.0 ) THEN
213         CALL XERBLA( 'SLAORHR_COL_GETRFNP2', -INFO )
214         RETURN
215      END IF
216*
217*     Quick return if possible
218*
219      IF( MIN( M, N ).EQ.0 )
220     $   RETURN
221
222      IF ( M.EQ.1 ) THEN
223*
224*        One row case, (also recursion termination case),
225*        use unblocked code
226*
227*        Transfer the sign
228*
229         D( 1 ) = -SIGN( ONE, A( 1, 1 ) )
230*
231*        Construct the row of U
232*
233         A( 1, 1 ) = A( 1, 1 ) - D( 1 )
234*
235      ELSE IF( N.EQ.1 ) THEN
236*
237*        One column case, (also recursion termination case),
238*        use unblocked code
239*
240*        Transfer the sign
241*
242         D( 1 ) = -SIGN( ONE, A( 1, 1 ) )
243*
244*        Construct the row of U
245*
246         A( 1, 1 ) = A( 1, 1 ) - D( 1 )
247*
248*        Scale the elements 2:M of the column
249*
250*        Determine machine safe minimum
251*
252         SFMIN = SLAMCH('S')
253*
254*        Construct the subdiagonal elements of L
255*
256         IF( ABS( A( 1, 1 ) ) .GE. SFMIN ) THEN
257            CALL SSCAL( M-1, ONE / A( 1, 1 ), A( 2, 1 ), 1 )
258         ELSE
259            DO I = 2, M
260               A( I, 1 ) = A( I, 1 ) / A( 1, 1 )
261            END DO
262         END IF
263*
264      ELSE
265*
266*        Divide the matrix B into four submatrices
267*
268         N1 = MIN( M, N ) / 2
269         N2 = N-N1
270
271*
272*        Factor B11, recursive call
273*
274         CALL SLAORHR_COL_GETRFNP2( N1, N1, A, LDA, D, IINFO )
275*
276*        Solve for B21
277*
278         CALL STRSM( 'R', 'U', 'N', 'N', M-N1, N1, ONE, A, LDA,
279     $               A( N1+1, 1 ), LDA )
280*
281*        Solve for B12
282*
283         CALL STRSM( 'L', 'L', 'N', 'U', N1, N2, ONE, A, LDA,
284     $               A( 1, N1+1 ), LDA )
285*
286*        Update B22, i.e. compute the Schur complement
287*        B22 := B22 - B21*B12
288*
289         CALL SGEMM( 'N', 'N', M-N1, N2, N1, -ONE, A( N1+1, 1 ), LDA,
290     $               A( 1, N1+1 ), LDA, ONE, A( N1+1, N1+1 ), LDA )
291*
292*        Factor B22, recursive call
293*
294         CALL SLAORHR_COL_GETRFNP2( M-N1, N2, A( N1+1, N1+1 ), LDA,
295     $                          D( N1+1 ), IINFO )
296*
297      END IF
298      RETURN
299*
300*     End of SLAORHR_COL_GETRFNP2
301*
302      END
303