1 /*
2 
3     Copyright (C) 2014, The University of Texas at Austin
4 
5     This file is part of libflame and is available under the 3-Clause
6     BSD license, which can be found in the LICENSE file at the top-level
7     directory, or at http://opensource.org/licenses/BSD-3-Clause
8 
9 */
10 
11 #include "FLAME.h"
12 
FLA_CAQR2_UT_opt_var1(FLA_Obj U,FLA_Obj D,FLA_Obj T)13 FLA_Error FLA_CAQR2_UT_opt_var1( FLA_Obj U,
14                                  FLA_Obj D, FLA_Obj T )
15 {
16   FLA_Datatype datatype;
17   int          mn_UT, m_D;
18   int          rs_U, cs_U;
19   int          rs_D, cs_D;
20   int          rs_T, cs_T;
21 
22   datatype = FLA_Obj_datatype( U );
23 
24   mn_UT    = FLA_Obj_width( U );
25   m_D      = FLA_Obj_length( D );
26 
27   rs_U     = FLA_Obj_row_stride( U );
28   cs_U     = FLA_Obj_col_stride( U );
29   rs_D     = FLA_Obj_row_stride( D );
30   cs_D     = FLA_Obj_col_stride( D );
31   rs_T     = FLA_Obj_row_stride( T );
32   cs_T     = FLA_Obj_col_stride( T );
33 
34 
35   switch ( datatype )
36   {
37     case FLA_FLOAT:
38     {
39       float* buff_U = FLA_FLOAT_PTR( U );
40       float* buff_D = FLA_FLOAT_PTR( D );
41       float* buff_T = FLA_FLOAT_PTR( T );
42 
43       FLA_CAQR2_UT_ops_var1( mn_UT,
44                              m_D,
45                              buff_U, rs_U, cs_U,
46                              buff_D, rs_D, cs_D,
47                              buff_T, rs_T, cs_T );
48 
49       break;
50     }
51 
52     case FLA_DOUBLE:
53     {
54       double* buff_U = FLA_DOUBLE_PTR( U );
55       double* buff_D = FLA_DOUBLE_PTR( D );
56       double* buff_T = FLA_DOUBLE_PTR( T );
57 
58       FLA_CAQR2_UT_opd_var1( mn_UT,
59                              m_D,
60                              buff_U, rs_U, cs_U,
61                              buff_D, rs_D, cs_D,
62                              buff_T, rs_T, cs_T );
63 
64       break;
65     }
66 
67     case FLA_COMPLEX:
68     {
69       scomplex* buff_U = FLA_COMPLEX_PTR( U );
70       scomplex* buff_D = FLA_COMPLEX_PTR( D );
71       scomplex* buff_T = FLA_COMPLEX_PTR( T );
72 
73       FLA_CAQR2_UT_opc_var1( mn_UT,
74                              m_D,
75                              buff_U, rs_U, cs_U,
76                              buff_D, rs_D, cs_D,
77                              buff_T, rs_T, cs_T );
78 
79       break;
80     }
81 
82     case FLA_DOUBLE_COMPLEX:
83     {
84       dcomplex* buff_U = FLA_DOUBLE_COMPLEX_PTR( U );
85       dcomplex* buff_D = FLA_DOUBLE_COMPLEX_PTR( D );
86       dcomplex* buff_T = FLA_DOUBLE_COMPLEX_PTR( T );
87 
88       FLA_CAQR2_UT_opz_var1( mn_UT,
89                              m_D,
90                              buff_U, rs_U, cs_U,
91                              buff_D, rs_D, cs_D,
92                              buff_T, rs_T, cs_T );
93 
94       break;
95     }
96   }
97 
98   return FLA_SUCCESS;
99 }
100 
101 
102 
FLA_CAQR2_UT_ops_var1(int mn_UT,int m_D,float * buff_U,int rs_U,int cs_U,float * buff_D,int rs_D,int cs_D,float * buff_T,int rs_T,int cs_T)103 FLA_Error FLA_CAQR2_UT_ops_var1( int mn_UT,
104                                  int m_D,
105                                  float* buff_U, int rs_U, int cs_U,
106                                  float* buff_D, int rs_D, int cs_D,
107                                  float* buff_T, int rs_T, int cs_T )
108 {
109   float*    buff_1  = FLA_FLOAT_PTR( FLA_ONE );
110   int       i, j;
111   int       m_DT = m_D - mn_UT;
112 
113   for ( i = m_DT, j = 0; j < mn_UT; ++i, ++j )
114   {
115     float*    upsilon11 = buff_U + (j  )*cs_U + (j  )*rs_U;
116     float*    u12t      = buff_U + (j+1)*cs_U + (j  )*rs_U;
117 
118     float*    D00       = buff_D + (0  )*cs_D + (0  )*rs_D;
119     float*    d1        = buff_D + (j  )*cs_D + (0  )*rs_D;
120     float*    D2        = buff_D + (j+1)*cs_D + (0  )*rs_D;
121 
122     float*    tau11     = buff_T + (j  )*cs_T + (j  )*rs_T;
123     float*    t01       = buff_T + (j  )*cs_T + (0  )*rs_T;
124 
125     float*    d1B       = d1  + (m_DT)*rs_D;
126     float*    D00B      = D00 + (m_DT)*rs_D;
127 
128     int       m_behind = i;
129     int       n_behind = j;
130     int       mn_ahead = mn_UT - j - 1;
131 
132     //------------------------------------------------------------//
133 
134     // FLA_Househ2_UT( FLA_LEFT,
135     //                 upsilon11,
136     //                 d1, tau11 );
137     FLA_Househ2_UT_l_ops( m_behind + 1,
138                           upsilon11,
139                           d1, rs_D,
140                           tau11 );
141 
142     // FLA_Apply_H2_UT( FLA_LEFT, tau11, d1, u12t,
143     //                                       D2 );
144     FLA_Apply_H2_UT_l_ops_var1( m_behind + 1,
145                                 mn_ahead,
146                                 tau11,
147                                 d1, rs_D,
148                                 u12t, cs_U,
149                                 D2, rs_D, cs_D );
150 
151     // FLA_Copy_external( d01B, t01 );
152     // FLA_Trmv_external( FLA_UPPER_TRIANGULAR, FLA_CONJ_TRANSPOSE, FLA_NONUNIT_DIAG,
153     //                    D00B, t01 );
154     // FLA_Gemv_external( FLA_CONJ_TRANSPOSE, FLA_ONE, D00T, d01T, FLA_ONE, t01 );
155     bl1_scopyv( BLIS1_NO_CONJUGATE,
156                 n_behind,
157                 d1B, rs_D,
158                 t01, rs_T );
159     bl1_strmv( BLIS1_UPPER_TRIANGULAR,
160                BLIS1_CONJ_TRANSPOSE,
161                BLIS1_NONUNIT_DIAG,
162                n_behind,
163                D00B, rs_D, cs_D,
164                t01, rs_T );
165     bl1_sgemv( BLIS1_CONJ_TRANSPOSE,
166                BLIS1_NO_CONJUGATE,
167                m_DT,
168                n_behind,
169                buff_1,
170                D00, rs_D, cs_D,
171                d1,  rs_D,
172                buff_1,
173                t01, rs_T );
174 
175     //------------------------------------------------------------//
176 
177   }
178 
179   return FLA_SUCCESS;
180 }
181 
182 
183 
FLA_CAQR2_UT_opd_var1(int mn_UT,int m_D,double * buff_U,int rs_U,int cs_U,double * buff_D,int rs_D,int cs_D,double * buff_T,int rs_T,int cs_T)184 FLA_Error FLA_CAQR2_UT_opd_var1( int mn_UT,
185                                  int m_D,
186                                  double* buff_U, int rs_U, int cs_U,
187                                  double* buff_D, int rs_D, int cs_D,
188                                  double* buff_T, int rs_T, int cs_T )
189 {
190   double*   buff_1  = FLA_DOUBLE_PTR( FLA_ONE );
191   int       i, j;
192   int       m_DT = m_D - mn_UT;
193 
194   for ( i = m_DT, j = 0; j < mn_UT; ++i, ++j )
195   {
196     double*   upsilon11 = buff_U + (j  )*cs_U + (j  )*rs_U;
197     double*   u12t      = buff_U + (j+1)*cs_U + (j  )*rs_U;
198 
199     double*   D00       = buff_D + (0  )*cs_D + (0  )*rs_D;
200     double*   d1        = buff_D + (j  )*cs_D + (0  )*rs_D;
201     double*   D2        = buff_D + (j+1)*cs_D + (0  )*rs_D;
202 
203     double*   tau11     = buff_T + (j  )*cs_T + (j  )*rs_T;
204     double*   t01       = buff_T + (j  )*cs_T + (0  )*rs_T;
205 
206     double*   d1B       = d1  + (m_DT)*rs_D;
207     double*   D00B      = D00 + (m_DT)*rs_D;
208 
209     int       m_behind = i;
210     int       n_behind = j;
211     int       mn_ahead = mn_UT - j - 1;
212 
213     //------------------------------------------------------------//
214 
215     // FLA_Househ2_UT( FLA_LEFT,
216     //                 upsilon11,
217     //                 d1, tau11 );
218     FLA_Househ2_UT_l_opd( m_behind + 1,
219                           upsilon11,
220                           d1, rs_D,
221                           tau11 );
222 
223     // FLA_Apply_H2_UT( FLA_LEFT, tau11, d1, u12t,
224     //                                       D2 );
225     FLA_Apply_H2_UT_l_opd_var1( m_behind + 1,
226                                 mn_ahead,
227                                 tau11,
228                                 d1, rs_D,
229                                 u12t, cs_U,
230                                 D2, rs_D, cs_D );
231 
232     // FLA_Copy_external( d01B, t01 );
233     // FLA_Trmv_external( FLA_UPPER_TRIANGULAR, FLA_CONJ_TRANSPOSE, FLA_NONUNIT_DIAG,
234     //                    D00B, t01 );
235     // FLA_Gemv_external( FLA_CONJ_TRANSPOSE, FLA_ONE, D00T, d01T, FLA_ONE, t01 );
236     bl1_dcopyv( BLIS1_NO_CONJUGATE,
237                 n_behind,
238                 d1B, rs_D,
239                 t01, rs_T );
240     bl1_dtrmv( BLIS1_UPPER_TRIANGULAR,
241                BLIS1_CONJ_TRANSPOSE,
242                BLIS1_NONUNIT_DIAG,
243                n_behind,
244                D00B, rs_D, cs_D,
245                t01, rs_T );
246     bl1_dgemv( BLIS1_CONJ_TRANSPOSE,
247                BLIS1_NO_CONJUGATE,
248                m_DT,
249                n_behind,
250                buff_1,
251                D00, rs_D, cs_D,
252                d1,  rs_D,
253                buff_1,
254                t01, rs_T );
255 
256     //------------------------------------------------------------//
257 
258   }
259 
260   return FLA_SUCCESS;
261 }
262 
263 
264 
FLA_CAQR2_UT_opc_var1(int mn_UT,int m_D,scomplex * buff_U,int rs_U,int cs_U,scomplex * buff_D,int rs_D,int cs_D,scomplex * buff_T,int rs_T,int cs_T)265 FLA_Error FLA_CAQR2_UT_opc_var1( int mn_UT,
266                                  int m_D,
267                                  scomplex* buff_U, int rs_U, int cs_U,
268                                  scomplex* buff_D, int rs_D, int cs_D,
269                                  scomplex* buff_T, int rs_T, int cs_T )
270 {
271   scomplex* buff_1  = FLA_COMPLEX_PTR( FLA_ONE );
272   int       i, j;
273   int       m_DT = m_D - mn_UT;
274 
275   for ( i = m_DT, j = 0; j < mn_UT; ++i, ++j )
276   {
277     scomplex* upsilon11 = buff_U + (j  )*cs_U + (j  )*rs_U;
278     scomplex* u12t      = buff_U + (j+1)*cs_U + (j  )*rs_U;
279 
280     scomplex* D00       = buff_D + (0  )*cs_D + (0  )*rs_D;
281     scomplex* d1        = buff_D + (j  )*cs_D + (0  )*rs_D;
282     scomplex* D2        = buff_D + (j+1)*cs_D + (0  )*rs_D;
283 
284     scomplex* tau11     = buff_T + (j  )*cs_T + (j  )*rs_T;
285     scomplex* t01       = buff_T + (j  )*cs_T + (0  )*rs_T;
286 
287     scomplex* d1B       = d1  + (m_DT)*rs_D;
288     scomplex* D00B      = D00 + (m_DT)*rs_D;
289 
290     int       m_behind = i;
291     int       n_behind = j;
292     int       mn_ahead = mn_UT - j - 1;
293 
294     //------------------------------------------------------------//
295 
296     // FLA_Househ2_UT( FLA_LEFT,
297     //                 upsilon11,
298     //                 d1, tau11 );
299     FLA_Househ2_UT_l_opc( m_behind + 1,
300                           upsilon11,
301                           d1, rs_D,
302                           tau11 );
303 
304     // FLA_Apply_H2_UT( FLA_LEFT, tau11, d1, u12t,
305     //                                       D2 );
306     FLA_Apply_H2_UT_l_opc_var1( m_behind + 1,
307                                 mn_ahead,
308                                 tau11,
309                                 d1, rs_D,
310                                 u12t, cs_U,
311                                 D2, rs_D, cs_D );
312 
313     // FLA_Copy_external( d01B, t01 );
314     // FLA_Trmv_external( FLA_UPPER_TRIANGULAR, FLA_CONJ_TRANSPOSE, FLA_NONUNIT_DIAG,
315     //                    D00B, t01 );
316     // FLA_Gemv_external( FLA_CONJ_TRANSPOSE, FLA_ONE, D00T, d01T, FLA_ONE, t01 );
317     bl1_ccopyv( BLIS1_NO_CONJUGATE,
318                 n_behind,
319                 d1B, rs_D,
320                 t01, rs_T );
321     bl1_ctrmv( BLIS1_UPPER_TRIANGULAR,
322                BLIS1_CONJ_TRANSPOSE,
323                BLIS1_NONUNIT_DIAG,
324                n_behind,
325                D00B, rs_D, cs_D,
326                t01, rs_T );
327     bl1_cgemv( BLIS1_CONJ_TRANSPOSE,
328                BLIS1_NO_CONJUGATE,
329                m_DT,
330                n_behind,
331                buff_1,
332                D00, rs_D, cs_D,
333                d1,  rs_D,
334                buff_1,
335                t01, rs_T );
336 
337     //------------------------------------------------------------//
338 
339   }
340 
341   return FLA_SUCCESS;
342 }
343 
344 
345 
FLA_CAQR2_UT_opz_var1(int mn_UT,int m_D,dcomplex * buff_U,int rs_U,int cs_U,dcomplex * buff_D,int rs_D,int cs_D,dcomplex * buff_T,int rs_T,int cs_T)346 FLA_Error FLA_CAQR2_UT_opz_var1( int mn_UT,
347                                  int m_D,
348                                  dcomplex* buff_U, int rs_U, int cs_U,
349                                  dcomplex* buff_D, int rs_D, int cs_D,
350                                  dcomplex* buff_T, int rs_T, int cs_T )
351 {
352   dcomplex* buff_1  = FLA_DOUBLE_COMPLEX_PTR( FLA_ONE );
353   int       i, j;
354   int       m_DT = m_D - mn_UT;
355 
356   for ( i = m_DT, j = 0; j < mn_UT; ++i, ++j )
357   {
358     dcomplex* upsilon11 = buff_U + (j  )*cs_U + (j  )*rs_U;
359     dcomplex* u12t      = buff_U + (j+1)*cs_U + (j  )*rs_U;
360 
361     dcomplex* D00       = buff_D + (0  )*cs_D + (0  )*rs_D;
362     dcomplex* d1        = buff_D + (j  )*cs_D + (0  )*rs_D;
363     dcomplex* D2        = buff_D + (j+1)*cs_D + (0  )*rs_D;
364 
365     dcomplex* tau11     = buff_T + (j  )*cs_T + (j  )*rs_T;
366     dcomplex* t01       = buff_T + (j  )*cs_T + (0  )*rs_T;
367 
368     dcomplex* d1B       = d1  + (m_DT)*rs_D;
369     dcomplex* D00B      = D00 + (m_DT)*rs_D;
370 
371     int       m_behind = i;
372     int       n_behind = j;
373     int       mn_ahead = mn_UT - j - 1;
374 
375     //------------------------------------------------------------//
376 
377     // FLA_Househ2_UT( FLA_LEFT,
378     //                 upsilon11,
379     //                 d1, tau11 );
380     FLA_Househ2_UT_l_opz( m_behind + 1,
381                           upsilon11,
382                           d1, rs_D,
383                           tau11 );
384 
385     // FLA_Apply_H2_UT( FLA_LEFT, tau11, d1, u12t,
386     //                                       D2 );
387     FLA_Apply_H2_UT_l_opz_var1( m_behind + 1,
388                                 mn_ahead,
389                                 tau11,
390                                 d1, rs_D,
391                                 u12t, cs_U,
392                                 D2, rs_D, cs_D );
393 
394     // FLA_Copy_external( d01B, t01 );
395     // FLA_Trmv_external( FLA_UPPER_TRIANGULAR, FLA_CONJ_TRANSPOSE, FLA_NONUNIT_DIAG,
396     //                    D00B, t01 );
397     // FLA_Gemv_external( FLA_CONJ_TRANSPOSE, FLA_ONE, D00T, d01T, FLA_ONE, t01 );
398     bl1_zcopyv( BLIS1_NO_CONJUGATE,
399                 n_behind,
400                 d1B, rs_D,
401                 t01, rs_T );
402     bl1_ztrmv( BLIS1_UPPER_TRIANGULAR,
403                BLIS1_CONJ_TRANSPOSE,
404                BLIS1_NONUNIT_DIAG,
405                n_behind,
406                D00B, rs_D, cs_D,
407                t01, rs_T );
408     bl1_zgemv( BLIS1_CONJ_TRANSPOSE,
409                BLIS1_NO_CONJUGATE,
410                m_DT,
411                n_behind,
412                buff_1,
413                D00, rs_D, cs_D,
414                d1,  rs_D,
415                buff_1,
416                t01, rs_T );
417 
418     //------------------------------------------------------------//
419 
420   }
421 
422   return FLA_SUCCESS;
423 }
424 
425