1 /*
2
3 Copyright (C) 2014, The University of Texas at Austin
4
5 This file is part of libflame and is available under the 3-Clause
6 BSD license, which can be found in the LICENSE file at the top-level
7 directory, or at http://opensource.org/licenses/BSD-3-Clause
8
9 */
10
11 #include "FLAME.h"
12
FLA_CAQR2_UT_opt_var1(FLA_Obj U,FLA_Obj D,FLA_Obj T)13 FLA_Error FLA_CAQR2_UT_opt_var1( FLA_Obj U,
14 FLA_Obj D, FLA_Obj T )
15 {
16 FLA_Datatype datatype;
17 int mn_UT, m_D;
18 int rs_U, cs_U;
19 int rs_D, cs_D;
20 int rs_T, cs_T;
21
22 datatype = FLA_Obj_datatype( U );
23
24 mn_UT = FLA_Obj_width( U );
25 m_D = FLA_Obj_length( D );
26
27 rs_U = FLA_Obj_row_stride( U );
28 cs_U = FLA_Obj_col_stride( U );
29 rs_D = FLA_Obj_row_stride( D );
30 cs_D = FLA_Obj_col_stride( D );
31 rs_T = FLA_Obj_row_stride( T );
32 cs_T = FLA_Obj_col_stride( T );
33
34
35 switch ( datatype )
36 {
37 case FLA_FLOAT:
38 {
39 float* buff_U = FLA_FLOAT_PTR( U );
40 float* buff_D = FLA_FLOAT_PTR( D );
41 float* buff_T = FLA_FLOAT_PTR( T );
42
43 FLA_CAQR2_UT_ops_var1( mn_UT,
44 m_D,
45 buff_U, rs_U, cs_U,
46 buff_D, rs_D, cs_D,
47 buff_T, rs_T, cs_T );
48
49 break;
50 }
51
52 case FLA_DOUBLE:
53 {
54 double* buff_U = FLA_DOUBLE_PTR( U );
55 double* buff_D = FLA_DOUBLE_PTR( D );
56 double* buff_T = FLA_DOUBLE_PTR( T );
57
58 FLA_CAQR2_UT_opd_var1( mn_UT,
59 m_D,
60 buff_U, rs_U, cs_U,
61 buff_D, rs_D, cs_D,
62 buff_T, rs_T, cs_T );
63
64 break;
65 }
66
67 case FLA_COMPLEX:
68 {
69 scomplex* buff_U = FLA_COMPLEX_PTR( U );
70 scomplex* buff_D = FLA_COMPLEX_PTR( D );
71 scomplex* buff_T = FLA_COMPLEX_PTR( T );
72
73 FLA_CAQR2_UT_opc_var1( mn_UT,
74 m_D,
75 buff_U, rs_U, cs_U,
76 buff_D, rs_D, cs_D,
77 buff_T, rs_T, cs_T );
78
79 break;
80 }
81
82 case FLA_DOUBLE_COMPLEX:
83 {
84 dcomplex* buff_U = FLA_DOUBLE_COMPLEX_PTR( U );
85 dcomplex* buff_D = FLA_DOUBLE_COMPLEX_PTR( D );
86 dcomplex* buff_T = FLA_DOUBLE_COMPLEX_PTR( T );
87
88 FLA_CAQR2_UT_opz_var1( mn_UT,
89 m_D,
90 buff_U, rs_U, cs_U,
91 buff_D, rs_D, cs_D,
92 buff_T, rs_T, cs_T );
93
94 break;
95 }
96 }
97
98 return FLA_SUCCESS;
99 }
100
101
102
FLA_CAQR2_UT_ops_var1(int mn_UT,int m_D,float * buff_U,int rs_U,int cs_U,float * buff_D,int rs_D,int cs_D,float * buff_T,int rs_T,int cs_T)103 FLA_Error FLA_CAQR2_UT_ops_var1( int mn_UT,
104 int m_D,
105 float* buff_U, int rs_U, int cs_U,
106 float* buff_D, int rs_D, int cs_D,
107 float* buff_T, int rs_T, int cs_T )
108 {
109 float* buff_1 = FLA_FLOAT_PTR( FLA_ONE );
110 int i, j;
111 int m_DT = m_D - mn_UT;
112
113 for ( i = m_DT, j = 0; j < mn_UT; ++i, ++j )
114 {
115 float* upsilon11 = buff_U + (j )*cs_U + (j )*rs_U;
116 float* u12t = buff_U + (j+1)*cs_U + (j )*rs_U;
117
118 float* D00 = buff_D + (0 )*cs_D + (0 )*rs_D;
119 float* d1 = buff_D + (j )*cs_D + (0 )*rs_D;
120 float* D2 = buff_D + (j+1)*cs_D + (0 )*rs_D;
121
122 float* tau11 = buff_T + (j )*cs_T + (j )*rs_T;
123 float* t01 = buff_T + (j )*cs_T + (0 )*rs_T;
124
125 float* d1B = d1 + (m_DT)*rs_D;
126 float* D00B = D00 + (m_DT)*rs_D;
127
128 int m_behind = i;
129 int n_behind = j;
130 int mn_ahead = mn_UT - j - 1;
131
132 //------------------------------------------------------------//
133
134 // FLA_Househ2_UT( FLA_LEFT,
135 // upsilon11,
136 // d1, tau11 );
137 FLA_Househ2_UT_l_ops( m_behind + 1,
138 upsilon11,
139 d1, rs_D,
140 tau11 );
141
142 // FLA_Apply_H2_UT( FLA_LEFT, tau11, d1, u12t,
143 // D2 );
144 FLA_Apply_H2_UT_l_ops_var1( m_behind + 1,
145 mn_ahead,
146 tau11,
147 d1, rs_D,
148 u12t, cs_U,
149 D2, rs_D, cs_D );
150
151 // FLA_Copy_external( d01B, t01 );
152 // FLA_Trmv_external( FLA_UPPER_TRIANGULAR, FLA_CONJ_TRANSPOSE, FLA_NONUNIT_DIAG,
153 // D00B, t01 );
154 // FLA_Gemv_external( FLA_CONJ_TRANSPOSE, FLA_ONE, D00T, d01T, FLA_ONE, t01 );
155 bl1_scopyv( BLIS1_NO_CONJUGATE,
156 n_behind,
157 d1B, rs_D,
158 t01, rs_T );
159 bl1_strmv( BLIS1_UPPER_TRIANGULAR,
160 BLIS1_CONJ_TRANSPOSE,
161 BLIS1_NONUNIT_DIAG,
162 n_behind,
163 D00B, rs_D, cs_D,
164 t01, rs_T );
165 bl1_sgemv( BLIS1_CONJ_TRANSPOSE,
166 BLIS1_NO_CONJUGATE,
167 m_DT,
168 n_behind,
169 buff_1,
170 D00, rs_D, cs_D,
171 d1, rs_D,
172 buff_1,
173 t01, rs_T );
174
175 //------------------------------------------------------------//
176
177 }
178
179 return FLA_SUCCESS;
180 }
181
182
183
FLA_CAQR2_UT_opd_var1(int mn_UT,int m_D,double * buff_U,int rs_U,int cs_U,double * buff_D,int rs_D,int cs_D,double * buff_T,int rs_T,int cs_T)184 FLA_Error FLA_CAQR2_UT_opd_var1( int mn_UT,
185 int m_D,
186 double* buff_U, int rs_U, int cs_U,
187 double* buff_D, int rs_D, int cs_D,
188 double* buff_T, int rs_T, int cs_T )
189 {
190 double* buff_1 = FLA_DOUBLE_PTR( FLA_ONE );
191 int i, j;
192 int m_DT = m_D - mn_UT;
193
194 for ( i = m_DT, j = 0; j < mn_UT; ++i, ++j )
195 {
196 double* upsilon11 = buff_U + (j )*cs_U + (j )*rs_U;
197 double* u12t = buff_U + (j+1)*cs_U + (j )*rs_U;
198
199 double* D00 = buff_D + (0 )*cs_D + (0 )*rs_D;
200 double* d1 = buff_D + (j )*cs_D + (0 )*rs_D;
201 double* D2 = buff_D + (j+1)*cs_D + (0 )*rs_D;
202
203 double* tau11 = buff_T + (j )*cs_T + (j )*rs_T;
204 double* t01 = buff_T + (j )*cs_T + (0 )*rs_T;
205
206 double* d1B = d1 + (m_DT)*rs_D;
207 double* D00B = D00 + (m_DT)*rs_D;
208
209 int m_behind = i;
210 int n_behind = j;
211 int mn_ahead = mn_UT - j - 1;
212
213 //------------------------------------------------------------//
214
215 // FLA_Househ2_UT( FLA_LEFT,
216 // upsilon11,
217 // d1, tau11 );
218 FLA_Househ2_UT_l_opd( m_behind + 1,
219 upsilon11,
220 d1, rs_D,
221 tau11 );
222
223 // FLA_Apply_H2_UT( FLA_LEFT, tau11, d1, u12t,
224 // D2 );
225 FLA_Apply_H2_UT_l_opd_var1( m_behind + 1,
226 mn_ahead,
227 tau11,
228 d1, rs_D,
229 u12t, cs_U,
230 D2, rs_D, cs_D );
231
232 // FLA_Copy_external( d01B, t01 );
233 // FLA_Trmv_external( FLA_UPPER_TRIANGULAR, FLA_CONJ_TRANSPOSE, FLA_NONUNIT_DIAG,
234 // D00B, t01 );
235 // FLA_Gemv_external( FLA_CONJ_TRANSPOSE, FLA_ONE, D00T, d01T, FLA_ONE, t01 );
236 bl1_dcopyv( BLIS1_NO_CONJUGATE,
237 n_behind,
238 d1B, rs_D,
239 t01, rs_T );
240 bl1_dtrmv( BLIS1_UPPER_TRIANGULAR,
241 BLIS1_CONJ_TRANSPOSE,
242 BLIS1_NONUNIT_DIAG,
243 n_behind,
244 D00B, rs_D, cs_D,
245 t01, rs_T );
246 bl1_dgemv( BLIS1_CONJ_TRANSPOSE,
247 BLIS1_NO_CONJUGATE,
248 m_DT,
249 n_behind,
250 buff_1,
251 D00, rs_D, cs_D,
252 d1, rs_D,
253 buff_1,
254 t01, rs_T );
255
256 //------------------------------------------------------------//
257
258 }
259
260 return FLA_SUCCESS;
261 }
262
263
264
FLA_CAQR2_UT_opc_var1(int mn_UT,int m_D,scomplex * buff_U,int rs_U,int cs_U,scomplex * buff_D,int rs_D,int cs_D,scomplex * buff_T,int rs_T,int cs_T)265 FLA_Error FLA_CAQR2_UT_opc_var1( int mn_UT,
266 int m_D,
267 scomplex* buff_U, int rs_U, int cs_U,
268 scomplex* buff_D, int rs_D, int cs_D,
269 scomplex* buff_T, int rs_T, int cs_T )
270 {
271 scomplex* buff_1 = FLA_COMPLEX_PTR( FLA_ONE );
272 int i, j;
273 int m_DT = m_D - mn_UT;
274
275 for ( i = m_DT, j = 0; j < mn_UT; ++i, ++j )
276 {
277 scomplex* upsilon11 = buff_U + (j )*cs_U + (j )*rs_U;
278 scomplex* u12t = buff_U + (j+1)*cs_U + (j )*rs_U;
279
280 scomplex* D00 = buff_D + (0 )*cs_D + (0 )*rs_D;
281 scomplex* d1 = buff_D + (j )*cs_D + (0 )*rs_D;
282 scomplex* D2 = buff_D + (j+1)*cs_D + (0 )*rs_D;
283
284 scomplex* tau11 = buff_T + (j )*cs_T + (j )*rs_T;
285 scomplex* t01 = buff_T + (j )*cs_T + (0 )*rs_T;
286
287 scomplex* d1B = d1 + (m_DT)*rs_D;
288 scomplex* D00B = D00 + (m_DT)*rs_D;
289
290 int m_behind = i;
291 int n_behind = j;
292 int mn_ahead = mn_UT - j - 1;
293
294 //------------------------------------------------------------//
295
296 // FLA_Househ2_UT( FLA_LEFT,
297 // upsilon11,
298 // d1, tau11 );
299 FLA_Househ2_UT_l_opc( m_behind + 1,
300 upsilon11,
301 d1, rs_D,
302 tau11 );
303
304 // FLA_Apply_H2_UT( FLA_LEFT, tau11, d1, u12t,
305 // D2 );
306 FLA_Apply_H2_UT_l_opc_var1( m_behind + 1,
307 mn_ahead,
308 tau11,
309 d1, rs_D,
310 u12t, cs_U,
311 D2, rs_D, cs_D );
312
313 // FLA_Copy_external( d01B, t01 );
314 // FLA_Trmv_external( FLA_UPPER_TRIANGULAR, FLA_CONJ_TRANSPOSE, FLA_NONUNIT_DIAG,
315 // D00B, t01 );
316 // FLA_Gemv_external( FLA_CONJ_TRANSPOSE, FLA_ONE, D00T, d01T, FLA_ONE, t01 );
317 bl1_ccopyv( BLIS1_NO_CONJUGATE,
318 n_behind,
319 d1B, rs_D,
320 t01, rs_T );
321 bl1_ctrmv( BLIS1_UPPER_TRIANGULAR,
322 BLIS1_CONJ_TRANSPOSE,
323 BLIS1_NONUNIT_DIAG,
324 n_behind,
325 D00B, rs_D, cs_D,
326 t01, rs_T );
327 bl1_cgemv( BLIS1_CONJ_TRANSPOSE,
328 BLIS1_NO_CONJUGATE,
329 m_DT,
330 n_behind,
331 buff_1,
332 D00, rs_D, cs_D,
333 d1, rs_D,
334 buff_1,
335 t01, rs_T );
336
337 //------------------------------------------------------------//
338
339 }
340
341 return FLA_SUCCESS;
342 }
343
344
345
FLA_CAQR2_UT_opz_var1(int mn_UT,int m_D,dcomplex * buff_U,int rs_U,int cs_U,dcomplex * buff_D,int rs_D,int cs_D,dcomplex * buff_T,int rs_T,int cs_T)346 FLA_Error FLA_CAQR2_UT_opz_var1( int mn_UT,
347 int m_D,
348 dcomplex* buff_U, int rs_U, int cs_U,
349 dcomplex* buff_D, int rs_D, int cs_D,
350 dcomplex* buff_T, int rs_T, int cs_T )
351 {
352 dcomplex* buff_1 = FLA_DOUBLE_COMPLEX_PTR( FLA_ONE );
353 int i, j;
354 int m_DT = m_D - mn_UT;
355
356 for ( i = m_DT, j = 0; j < mn_UT; ++i, ++j )
357 {
358 dcomplex* upsilon11 = buff_U + (j )*cs_U + (j )*rs_U;
359 dcomplex* u12t = buff_U + (j+1)*cs_U + (j )*rs_U;
360
361 dcomplex* D00 = buff_D + (0 )*cs_D + (0 )*rs_D;
362 dcomplex* d1 = buff_D + (j )*cs_D + (0 )*rs_D;
363 dcomplex* D2 = buff_D + (j+1)*cs_D + (0 )*rs_D;
364
365 dcomplex* tau11 = buff_T + (j )*cs_T + (j )*rs_T;
366 dcomplex* t01 = buff_T + (j )*cs_T + (0 )*rs_T;
367
368 dcomplex* d1B = d1 + (m_DT)*rs_D;
369 dcomplex* D00B = D00 + (m_DT)*rs_D;
370
371 int m_behind = i;
372 int n_behind = j;
373 int mn_ahead = mn_UT - j - 1;
374
375 //------------------------------------------------------------//
376
377 // FLA_Househ2_UT( FLA_LEFT,
378 // upsilon11,
379 // d1, tau11 );
380 FLA_Househ2_UT_l_opz( m_behind + 1,
381 upsilon11,
382 d1, rs_D,
383 tau11 );
384
385 // FLA_Apply_H2_UT( FLA_LEFT, tau11, d1, u12t,
386 // D2 );
387 FLA_Apply_H2_UT_l_opz_var1( m_behind + 1,
388 mn_ahead,
389 tau11,
390 d1, rs_D,
391 u12t, cs_U,
392 D2, rs_D, cs_D );
393
394 // FLA_Copy_external( d01B, t01 );
395 // FLA_Trmv_external( FLA_UPPER_TRIANGULAR, FLA_CONJ_TRANSPOSE, FLA_NONUNIT_DIAG,
396 // D00B, t01 );
397 // FLA_Gemv_external( FLA_CONJ_TRANSPOSE, FLA_ONE, D00T, d01T, FLA_ONE, t01 );
398 bl1_zcopyv( BLIS1_NO_CONJUGATE,
399 n_behind,
400 d1B, rs_D,
401 t01, rs_T );
402 bl1_ztrmv( BLIS1_UPPER_TRIANGULAR,
403 BLIS1_CONJ_TRANSPOSE,
404 BLIS1_NONUNIT_DIAG,
405 n_behind,
406 D00B, rs_D, cs_D,
407 t01, rs_T );
408 bl1_zgemv( BLIS1_CONJ_TRANSPOSE,
409 BLIS1_NO_CONJUGATE,
410 m_DT,
411 n_behind,
412 buff_1,
413 D00, rs_D, cs_D,
414 d1, rs_D,
415 buff_1,
416 t01, rs_T );
417
418 //------------------------------------------------------------//
419
420 }
421
422 return FLA_SUCCESS;
423 }
424
425