1 /*
2
3 Copyright (C) 2014, The University of Texas at Austin
4
5 This file is part of libflame and is available under the 3-Clause
6 BSD license, which can be found in the LICENSE file at the top-level
7 directory, or at http://opensource.org/licenses/BSD-3-Clause
8
9 */
10
11 #include "FLAME.h"
12
FLA_Sv_2x2(FLA_Obj alpha11,FLA_Obj alpha12,FLA_Obj alpha22,FLA_Obj sigma1,FLA_Obj sigma2)13 FLA_Error FLA_Sv_2x2( FLA_Obj alpha11, FLA_Obj alpha12, FLA_Obj alpha22,
14 FLA_Obj sigma1, FLA_Obj sigma2 )
15 /*
16 Compute the singular value decomposition of a 2x2 triangular matrix A
17 such that
18
19 / alpha11 alpha12 \
20 \ 0 alpha22 /
21
22 is equal to
23
24 / gammaL -sigmaL \ / sigma1 0 \ / gammaR -sigmaR \'
25 \ sigmaL gammaL / \ 0 sigma2 / \ sigmaR gammaR /
26
27 Upon completion, sigma1 and sigma2 are overwritten with the
28 singular values of smaller and larger absolute values, respectively.
29 gammaL, sigmaL, gammaR, and sigmaR are not computed.
30
31 This routine is a nearly-verbatim translation of slas2() and dlas2()
32 from the netlib distribution of LAPACK.
33
34 -FGVZ
35 */
36 {
37 FLA_Datatype datatype;
38
39 datatype = FLA_Obj_datatype( alpha11 );
40
41 switch ( datatype )
42 {
43 case FLA_FLOAT:
44 {
45 float* buff_alpha11 = FLA_FLOAT_PTR( alpha11 );
46 float* buff_alpha12 = FLA_FLOAT_PTR( alpha12 );
47 float* buff_alpha22 = FLA_FLOAT_PTR( alpha22 );
48 float* buff_sigma1 = FLA_FLOAT_PTR( sigma1 );
49 float* buff_sigma2 = FLA_FLOAT_PTR( sigma2 );
50
51 FLA_Sv_2x2_ops( buff_alpha11,
52 buff_alpha12,
53 buff_alpha22,
54 buff_sigma1,
55 buff_sigma2 );
56
57 break;
58 }
59
60 case FLA_DOUBLE:
61 {
62 double* buff_alpha11 = FLA_DOUBLE_PTR( alpha11 );
63 double* buff_alpha12 = FLA_DOUBLE_PTR( alpha12 );
64 double* buff_alpha22 = FLA_DOUBLE_PTR( alpha22 );
65 double* buff_sigma1 = FLA_DOUBLE_PTR( sigma1 );
66 double* buff_sigma2 = FLA_DOUBLE_PTR( sigma2 );
67
68 FLA_Sv_2x2_opd( buff_alpha11,
69 buff_alpha12,
70 buff_alpha22,
71 buff_sigma1,
72 buff_sigma2 );
73
74 break;
75 }
76 }
77
78 return FLA_SUCCESS;
79 }
80
81
82
FLA_Sv_2x2_ops(float * alpha11,float * alpha12,float * alpha22,float * sigma1,float * sigma2)83 FLA_Error FLA_Sv_2x2_ops( float* alpha11,
84 float* alpha12,
85 float* alpha22,
86 float* sigma1,
87 float* sigma2 )
88 {
89 float zero = 0.0F;
90 float one = 1.0F;
91 float two = 2.0F;
92
93 float f, g, h;
94 float as, at, au, c, fa, fhmin, fhmax, ga, ha;
95 float ssmin, ssmax;
96 float temp, temp2;
97
98 f = *alpha11;
99 g = *alpha12;
100 h = *alpha22;
101
102 fa = fabsf( f );
103 ga = fabsf( g );
104 ha = fabsf( h );
105
106 fhmin = min( fa, ha );
107 fhmax = max( fa, ha );
108
109 if ( fhmin == zero )
110 {
111 ssmin = zero;
112
113 if ( fhmax == zero )
114 ssmax = ga;
115 else
116 {
117 temp = min( fhmax, ga ) / max( fhmax, ga );
118 ssmax = max( fhmax, ga ) * sqrtf( one + temp * temp );
119 }
120 }
121 else
122 {
123 if ( ga < fhmax )
124 {
125 as = one + fhmin / fhmax;
126 at = ( fhmax - fhmin ) / fhmax;
127 au = ( ga / fhmax ) * ( ga / fhmax );
128 c = two / ( sqrtf( as * as + au ) + sqrtf( at * at + au ) );
129 ssmin = fhmin * c;
130 ssmax = fhmax / c;
131 }
132 else
133 {
134 au = fhmax / ga;
135
136 if ( au == zero )
137 {
138 ssmin = ( fhmin * fhmax ) / ga;
139 ssmax = ga;
140 }
141 else
142 {
143 as = one + fhmin / fhmax;
144 at = ( fhmax - fhmin ) / fhmax;
145 temp = as * au;
146 temp2 = at * au;
147 c = one / ( sqrtf( one + temp * temp ) +
148 sqrtf( one + temp2 * temp2 ) );
149 ssmin = ( fhmin * c ) * au;
150 ssmin = ssmin + ssmin;
151 ssmax = ga / ( c + c );
152 }
153 }
154 }
155
156 // Save the output values.
157
158 *sigma1 = ssmin;
159 *sigma2 = ssmax;
160
161 return FLA_SUCCESS;
162 }
163
164
165
FLA_Sv_2x2_opd(double * alpha11,double * alpha12,double * alpha22,double * sigma1,double * sigma2)166 FLA_Error FLA_Sv_2x2_opd( double* alpha11,
167 double* alpha12,
168 double* alpha22,
169 double* sigma1,
170 double* sigma2 )
171 {
172 double zero = 0.0;
173 double one = 1.0;
174 double two = 2.0;
175
176 double f, g, h;
177 double as, at, au, c, fa, fhmin, fhmax, ga, ha;
178 double ssmin, ssmax;
179 double temp, temp2;
180
181 f = *alpha11;
182 g = *alpha12;
183 h = *alpha22;
184
185 fa = fabs( f );
186 ga = fabs( g );
187 ha = fabs( h );
188
189 fhmin = min( fa, ha );
190 fhmax = max( fa, ha );
191
192 if ( fhmin == zero )
193 {
194 ssmin = zero;
195
196 if ( fhmax == zero )
197 ssmax = ga;
198 else
199 {
200 temp = min( fhmax, ga ) / max( fhmax, ga );
201 ssmax = max( fhmax, ga ) * sqrt( one + temp * temp );
202 }
203 }
204 else
205 {
206 if ( ga < fhmax )
207 {
208 as = one + fhmin / fhmax;
209 at = ( fhmax - fhmin ) / fhmax;
210 au = ( ga / fhmax ) * ( ga / fhmax );
211 c = two / ( sqrt( as * as + au ) + sqrt( at * at + au ) );
212 ssmin = fhmin * c;
213 ssmax = fhmax / c;
214 }
215 else
216 {
217 au = fhmax / ga;
218
219 if ( au == zero )
220 {
221 ssmin = ( fhmin * fhmax ) / ga;
222 ssmax = ga;
223 }
224 else
225 {
226 as = one + fhmin / fhmax;
227 at = ( fhmax - fhmin ) / fhmax;
228 temp = as * au;
229 temp2 = at * au;
230 c = one / ( sqrt( one + temp * temp ) +
231 sqrt( one + temp2 * temp2 ) );
232 ssmin = ( fhmin * c ) * au;
233 ssmin = ssmin + ssmin;
234 ssmax = ga / ( c + c );
235 }
236 }
237 }
238
239 // Save the output values.
240
241 *sigma1 = ssmin;
242 *sigma2 = ssmax;
243
244 return FLA_SUCCESS;
245 }
246
247