1 /*
2
3 Copyright (C) 2014, The University of Texas at Austin
4
5 This file is part of libflame and is available under the 3-Clause
6 BSD license, which can be found in the LICENSE file at the top-level
7 directory, or at http://opensource.org/licenses/BSD-3-Clause
8
9 */
10
11 #include "FLAME.h"
12
FLA_Bidiag_UT_u_opt_var5(FLA_Obj A,FLA_Obj TU,FLA_Obj TV)13 FLA_Error FLA_Bidiag_UT_u_opt_var5( FLA_Obj A, FLA_Obj TU, FLA_Obj TV )
14 {
15 FLA_Error r_val;
16 FLA_Obj Y, Z;
17 FLA_Datatype datatype_A;
18 dim_t m_A, n_A;
19
20 datatype_A = FLA_Obj_datatype( A );
21 m_A = FLA_Obj_length( A );
22 n_A = FLA_Obj_width( A );
23
24 FLA_Obj_create( datatype_A, n_A, n_A, 0, 0, &Y );
25 FLA_Obj_create( datatype_A, m_A, n_A, 0, 0, &Z );
26
27 r_val = FLA_Bidiag_UT_u_step_opt_var5( A, Y, Z, TU, TV );
28
29 FLA_Obj_free( &Y );
30 FLA_Obj_free( &Z );
31
32 return r_val;
33 }
34
FLA_Bidiag_UT_u_step_opt_var5(FLA_Obj A,FLA_Obj Y,FLA_Obj Z,FLA_Obj T,FLA_Obj S)35 FLA_Error FLA_Bidiag_UT_u_step_opt_var5( FLA_Obj A, FLA_Obj Y, FLA_Obj Z, FLA_Obj T, FLA_Obj S )
36 {
37 FLA_Datatype datatype;
38 int m_A, n_A, m_TS;
39 int rs_A, cs_A;
40 int rs_Y, cs_Y;
41 int rs_Z, cs_Z;
42 int rs_T, cs_T;
43 int rs_S, cs_S;
44
45 datatype = FLA_Obj_datatype( A );
46
47 m_A = FLA_Obj_length( A );
48 n_A = FLA_Obj_width( A );
49 m_TS = FLA_Obj_length( T );
50
51 rs_A = FLA_Obj_row_stride( A );
52 cs_A = FLA_Obj_col_stride( A );
53
54 rs_Y = FLA_Obj_row_stride( Y );
55 cs_Y = FLA_Obj_col_stride( Y );
56
57 rs_Z = FLA_Obj_row_stride( Z );
58 cs_Z = FLA_Obj_col_stride( Z );
59
60 rs_T = FLA_Obj_row_stride( T );
61 cs_T = FLA_Obj_col_stride( T );
62
63 rs_S = FLA_Obj_row_stride( S );
64 cs_S = FLA_Obj_col_stride( S );
65
66
67 switch ( datatype )
68 {
69 case FLA_FLOAT:
70 {
71 float* buff_A = FLA_FLOAT_PTR( A );
72 float* buff_Y = FLA_FLOAT_PTR( Y );
73 float* buff_Z = FLA_FLOAT_PTR( Z );
74 float* buff_T = FLA_FLOAT_PTR( T );
75 float* buff_S = FLA_FLOAT_PTR( S );
76
77 FLA_Bidiag_UT_u_step_ops_var5( m_A,
78 n_A,
79 m_TS,
80 buff_A, rs_A, cs_A,
81 buff_Y, rs_Y, cs_Y,
82 buff_Z, rs_Z, cs_Z,
83 buff_T, rs_T, cs_T,
84 buff_S, rs_S, cs_S );
85
86 break;
87 }
88
89 case FLA_DOUBLE:
90 {
91 double* buff_A = FLA_DOUBLE_PTR( A );
92 double* buff_Y = FLA_DOUBLE_PTR( Y );
93 double* buff_Z = FLA_DOUBLE_PTR( Z );
94 double* buff_T = FLA_DOUBLE_PTR( T );
95 double* buff_S = FLA_DOUBLE_PTR( S );
96
97 FLA_Bidiag_UT_u_step_opd_var5( m_A,
98 n_A,
99 m_TS,
100 buff_A, rs_A, cs_A,
101 buff_Y, rs_Y, cs_Y,
102 buff_Z, rs_Z, cs_Z,
103 buff_T, rs_T, cs_T,
104 buff_S, rs_S, cs_S );
105
106 break;
107 }
108
109 case FLA_COMPLEX:
110 {
111 scomplex* buff_A = FLA_COMPLEX_PTR( A );
112 scomplex* buff_Y = FLA_COMPLEX_PTR( Y );
113 scomplex* buff_Z = FLA_COMPLEX_PTR( Z );
114 scomplex* buff_T = FLA_COMPLEX_PTR( T );
115 scomplex* buff_S = FLA_COMPLEX_PTR( S );
116
117 FLA_Bidiag_UT_u_step_opc_var5( m_A,
118 n_A,
119 m_TS,
120 buff_A, rs_A, cs_A,
121 buff_Y, rs_Y, cs_Y,
122 buff_Z, rs_Z, cs_Z,
123 buff_T, rs_T, cs_T,
124 buff_S, rs_S, cs_S );
125
126 break;
127 }
128
129 case FLA_DOUBLE_COMPLEX:
130 {
131 dcomplex* buff_A = FLA_DOUBLE_COMPLEX_PTR( A );
132 dcomplex* buff_Y = FLA_DOUBLE_COMPLEX_PTR( Y );
133 dcomplex* buff_Z = FLA_DOUBLE_COMPLEX_PTR( Z );
134 dcomplex* buff_T = FLA_DOUBLE_COMPLEX_PTR( T );
135 dcomplex* buff_S = FLA_DOUBLE_COMPLEX_PTR( S );
136
137 FLA_Bidiag_UT_u_step_opz_var5( m_A,
138 n_A,
139 m_TS,
140 buff_A, rs_A, cs_A,
141 buff_Y, rs_Y, cs_Y,
142 buff_Z, rs_Z, cs_Z,
143 buff_T, rs_T, cs_T,
144 buff_S, rs_S, cs_S );
145
146 break;
147 }
148 }
149
150 return FLA_SUCCESS;
151 }
152
153
154
FLA_Bidiag_UT_u_step_ops_var5(int m_A,int n_A,int m_TS,float * buff_A,int rs_A,int cs_A,float * buff_Y,int rs_Y,int cs_Y,float * buff_Z,int rs_Z,int cs_Z,float * buff_T,int rs_T,int cs_T,float * buff_S,int rs_S,int cs_S)155 FLA_Error FLA_Bidiag_UT_u_step_ops_var5( int m_A,
156 int n_A,
157 int m_TS,
158 float* buff_A, int rs_A, int cs_A,
159 float* buff_Y, int rs_Y, int cs_Y,
160 float* buff_Z, int rs_Z, int cs_Z,
161 float* buff_T, int rs_T, int cs_T,
162 float* buff_S, int rs_S, int cs_S )
163 {
164 float* buff_1 = FLA_FLOAT_PTR( FLA_ONE );
165 float* buff_0 = FLA_FLOAT_PTR( FLA_ZERO );
166 float* buff_m1 = FLA_FLOAT_PTR( FLA_MINUS_ONE );
167
168 float beta;
169 float last_elem;
170 int i;
171
172 // b_alg = FLA_Obj_length( T );
173 int b_alg = m_TS;
174
175 // FLA_Obj_create( datatype_A, m_A, 1, 0, 0, &u );
176 // FLA_Obj_create( datatype_A, n_A, 1, 0, 0, &v );
177 // FLA_Obj_create( datatype_A, n_A, 1, 0, 0, &d );
178 // FLA_Obj_create( datatype_A, n_A, 1, 0, 0, &e );
179 // FLA_Obj_create( datatype_A, m_A, 1, 0, 0, &f );
180 // FLA_Obj_create( datatype_A, m_A, 1, 0, 0, &g );
181 float* buff_u = ( float* ) FLA_malloc( m_A * sizeof( *buff_A ) );
182 float* buff_v = ( float* ) FLA_malloc( n_A * sizeof( *buff_A ) );
183 float* buff_d = ( float* ) FLA_malloc( n_A * sizeof( *buff_A ) );
184 float* buff_e = ( float* ) FLA_malloc( n_A * sizeof( *buff_A ) );
185 float* buff_f = ( float* ) FLA_malloc( m_A * sizeof( *buff_A ) );
186 float* buff_g = ( float* ) FLA_malloc( m_A * sizeof( *buff_A ) );
187 int inc_u = 1;
188 int inc_v = 1;
189 int inc_d = 1;
190 int inc_e = 1;
191 int inc_f = 1;
192 int inc_g = 1;
193
194 // FLA_Set( FLA_ZERO, Y );
195 // FLA_Set( FLA_ZERO, Z );
196 bl1_ssetm( n_A,
197 b_alg,
198 buff_0,
199 buff_Y, rs_Y, cs_Y );
200 bl1_ssetm( m_A,
201 b_alg,
202 buff_0,
203 buff_Z, rs_Z, cs_Z );
204
205 for ( i = 0; i < b_alg; ++i )
206 {
207 float* a10t = buff_A + (0 )*cs_A + (i )*rs_A;
208 float* A20 = buff_A + (0 )*cs_A + (i+1)*rs_A;
209 float* a01 = buff_A + (i )*cs_A + (0 )*rs_A;
210 float* alpha11 = buff_A + (i )*cs_A + (i )*rs_A;
211 float* a21 = buff_A + (i )*cs_A + (i+1)*rs_A;
212 float* A02 = buff_A + (i+1)*cs_A + (0 )*rs_A;
213 float* a12t = buff_A + (i+1)*cs_A + (i )*rs_A;
214 float* A22 = buff_A + (i+1)*cs_A + (i+1)*rs_A;
215
216 float* y10t = buff_Y + (0 )*cs_Y + (i )*rs_Y;
217 float* Y20 = buff_Y + (0 )*cs_Y + (i+1)*rs_Y;
218 float* y21 = buff_Y + (i )*cs_Y + (i+1)*rs_Y;
219
220 float* z10t = buff_Z + (0 )*cs_Z + (i )*rs_Z;
221 float* Z20 = buff_Z + (0 )*cs_Z + (i+1)*rs_Z;
222 float* z21 = buff_Z + (i )*cs_Z + (i+1)*rs_Z;
223
224 float* t01 = buff_T + (i )*cs_T + (0 )*rs_T;
225 float* tau11 = buff_T + (i )*cs_T + (i )*rs_T;
226
227 float* s01 = buff_S + (i )*cs_S + (0 )*rs_S;
228 float* sigma11 = buff_S + (i )*cs_S + (i )*rs_S;
229
230 float* u21 = buff_u + (i+1)*inc_u;
231
232 float* v21 = buff_v + (i+1)*inc_v;
233
234 float* d0 = buff_d + (0 )*inc_d;
235
236 float* e0 = buff_e + (0 )*inc_e;
237
238 float* f0 = buff_f + (0 )*inc_f;
239
240 float* g0 = buff_g + (0 )*inc_g;
241
242 float* v21_t = v21 + (0 )*inc_v;
243 float* v21_b = v21 + (1 )*inc_v;
244
245 float* a01_b = a01 + (0 )*cs_A + (i-1)*rs_A;
246
247 float* a12t_l = a12t + (0 )*cs_A + (0 )*rs_A;
248 float* a12t_r = a12t + (1 )*cs_A + (0 )*rs_A;
249
250 float* ABL = a10t;
251 float* ZBL = z10t;
252
253 float* a2 = alpha11;
254
255 int m_ahead = m_A - i - 1;
256 int n_ahead = n_A - i - 1;
257 int m_behind = i;
258 int n_behind = i;
259
260 /*------------------------------------------------------------*/
261
262 if ( m_behind > 0 )
263 {
264 // FLA_Copy( a01_b, last_elem );
265 // FLA_Set( FLA_ONE, a01_b );
266 last_elem = *a01_b;
267 *a01_b = *buff_1;
268 }
269
270 // FLA_Gemv( FLA_NO_TRANSPOSE, FLA_CONJUGATE, FLA_MINUS_ONE, ABL, y10t, FLA_ONE, a2 );
271 // FLA_Gemv( FLA_NO_TRANSPOSE, FLA_CONJUGATE, FLA_MINUS_ONE, ZBL, a01, FLA_ONE, a2 );
272 bl1_sgemv( BLIS1_NO_TRANSPOSE,
273 BLIS1_CONJUGATE,
274 m_ahead + 1,
275 n_behind,
276 buff_m1,
277 ABL, rs_A, cs_A,
278 y10t, cs_Y,
279 buff_1,
280 a2, rs_A );
281 bl1_sgemv( BLIS1_NO_TRANSPOSE,
282 BLIS1_CONJUGATE,
283 m_ahead + 1,
284 n_behind,
285 buff_m1,
286 ZBL, rs_Z, cs_Z,
287 a01, rs_A,
288 buff_1,
289 a2, rs_A );
290
291 // FLA_Gemv( FLA_CONJ_NO_TRANSPOSE, FLA_MINUS_ONE, Y20, a10t, FLA_ONE, a12t );
292 // FLA_Gemv( FLA_CONJ_TRANSPOSE, FLA_MINUS_ONE, A02, z10t, FLA_ONE, a12t );
293 bl1_sgemv( BLIS1_CONJ_NO_TRANSPOSE,
294 BLIS1_NO_CONJUGATE,
295 n_ahead,
296 n_behind,
297 buff_m1,
298 Y20, rs_Y, cs_Y,
299 a10t, cs_A,
300 buff_1,
301 a12t, cs_A );
302 bl1_sgemv( BLIS1_CONJ_TRANSPOSE,
303 BLIS1_NO_CONJUGATE,
304 m_behind,
305 n_ahead,
306 buff_m1,
307 A02, rs_A, cs_A,
308 z10t, cs_Z,
309 buff_1,
310 a12t, cs_A );
311
312 if ( m_behind > 0 )
313 {
314 // FLA_Copy( last_elem, a01_b );
315 *a01_b = last_elem;
316 }
317
318 // FLA_Househ2_UT( FLA_LEFT,
319 // alpha11,
320 // a21, tau11 );
321 // FLA_Copy( a21, u21 );
322 FLA_Househ2_UT_l_ops( m_ahead,
323 alpha11,
324 a21, rs_A,
325 tau11 );
326 bl1_scopyv( BLIS1_NO_CONJUGATE,
327 m_ahead,
328 a21, rs_A,
329 u21, inc_u );
330
331 if ( n_ahead > 0 )
332 {
333 // FLA_Copyt( FLA_CONJ_TRANSPOSE, a12t, y21 );
334 // FLA_Gemv( FLA_CONJ_TRANSPOSE, FLA_ONE, A22, u21, FLA_ONE, y21 );
335 bl1_scopyv( BLIS1_CONJUGATE,
336 n_ahead,
337 a12t, cs_A,
338 y21, rs_Y );
339 bl1_sgemv( BLIS1_CONJ_TRANSPOSE,
340 BLIS1_NO_CONJUGATE,
341 m_ahead,
342 n_ahead,
343 buff_1,
344 A22, rs_A, cs_A,
345 u21, inc_u,
346 buff_1,
347 y21, rs_Y );
348
349 // FLA_Gemv( FLA_CONJ_TRANSPOSE, FLA_ONE, A20, u21, FLA_ZERO, d0 );
350 // FLA_Gemv( FLA_CONJ_TRANSPOSE, FLA_ONE, Z20, u21, FLA_ZERO, e0 );
351 bl1_sgemv( BLIS1_CONJ_TRANSPOSE,
352 BLIS1_NO_CONJUGATE,
353 m_ahead,
354 n_behind,
355 buff_1,
356 A20, rs_A, cs_A,
357 u21, inc_u,
358 buff_0,
359 d0, inc_d );
360 bl1_sgemv( BLIS1_CONJ_TRANSPOSE,
361 BLIS1_NO_CONJUGATE,
362 m_ahead,
363 n_behind,
364 buff_1,
365 Z20, rs_Z, cs_Z,
366 u21, inc_u,
367 buff_0,
368 e0, inc_e );
369
370 // FLA_Copyt( FLA_CONJ_TRANSPOSE, a10t, t01 );
371 // FLA_Axpy( FLA_ONE, d0, t01 );
372 bl1_scopyv( BLIS1_CONJUGATE,
373 n_behind,
374 a10t, cs_A,
375 t01, rs_T );
376 bl1_saxpyv( BLIS1_NO_CONJUGATE,
377 n_behind,
378 buff_1,
379 d0, inc_d,
380 t01, rs_T );
381
382 // FLA_Gemv( FLA_NO_TRANSPOSE, FLA_MINUS_ONE, Y20, d0, FLA_ONE, y21 );
383 // FLA_Gemv( FLA_TRANSPOSE, FLA_MINUS_ONE, A02, e0, FLA_ONE, y21 );
384 bl1_sgemv( BLIS1_NO_TRANSPOSE,
385 BLIS1_NO_CONJUGATE,
386 n_ahead,
387 n_behind,
388 buff_m1,
389 Y20, rs_Y, cs_Y,
390 d0, inc_d,
391 buff_1,
392 y21, rs_Y );
393 bl1_sgemv( BLIS1_TRANSPOSE,
394 BLIS1_NO_CONJUGATE,
395 m_behind,
396 n_ahead,
397 buff_m1,
398 A02, rs_A, cs_A,
399 e0, inc_e,
400 buff_1,
401 y21, rs_Y );
402
403 // FLA_Inv_scalc( FLA_NO_CONJUGATE, tau11, y21 );
404 bl1_sinvscalv( BLIS1_NO_CONJUGATE,
405 n_ahead,
406 tau11,
407 y21, rs_Y );
408
409 // FLA_Axpyt( FLA_CONJ_TRANSPOSE, FLA_MINUS_ONE, y21, a12t );
410 bl1_saxpyv( BLIS1_CONJUGATE,
411 n_ahead,
412 buff_m1,
413 y21, rs_Y,
414 a12t, cs_A );
415
416 // FLA_Househ2_UT( FLA_RIGHT, a12t_l, a12t_r, sigma11 );
417 FLA_Househ2_UT_r_ops( n_ahead - 1,
418 a12t_l,
419 a12t_r, cs_A,
420 sigma11 );
421
422 // FLA_Set( FLA_ONE, v21_t );
423 // FLA_Copyt( FLA_TRANSPOSE, a12t_r, v21_b );
424 *v21_t = *buff_1;
425 bl1_scopyv( BLIS1_NO_CONJUGATE,
426 n_ahead - 1,
427 a12t_r, cs_A,
428 v21_b, inc_v );
429
430 // FLA_Dotc( FLA_CONJUGATE, y21, v21, beta );
431 // FLA_Scal( FLA_MINUS_ONE, beta );
432 bl1_sdot( BLIS1_CONJUGATE,
433 n_ahead,
434 y21, rs_Y,
435 v21, inc_v,
436 &beta );
437 bl1_sscals( buff_m1, &beta );
438
439 // FLA_Copy( u21, z21 );
440 // FLA_Gemv( FLA_NO_TRANSPOSE, FLA_ONE, A22, v21, beta, z21 );
441 bl1_scopyv( BLIS1_NO_CONJUGATE,
442 m_ahead,
443 u21, inc_u,
444 z21, rs_Z );
445 bl1_sgemv( BLIS1_NO_TRANSPOSE,
446 BLIS1_NO_CONJUGATE,
447 m_ahead,
448 n_ahead,
449 buff_1,
450 A22, rs_A, cs_A,
451 v21, inc_v,
452 &beta,
453 z21, rs_Z );
454
455 // FLA_Gemvc( FLA_CONJ_TRANSPOSE, FLA_ONE, Y20, v21, FLA_ZERO, f0 );
456 // FLA_Gemvc( FLA_CONJ_NO_TRANSPOSE, FLA_ONE, A02, v21, FLA_ZERO, g0 );
457 bl1_sgemv( BLIS1_CONJ_TRANSPOSE,
458 BLIS1_NO_CONJUGATE,
459 n_ahead,
460 m_behind,
461 buff_1,
462 Y20, rs_Y, cs_Y,
463 v21, inc_v,
464 buff_0,
465 f0, inc_f );
466 bl1_sgemv( BLIS1_CONJ_NO_TRANSPOSE,
467 BLIS1_NO_CONJUGATE,
468 m_behind,
469 n_ahead,
470 buff_1,
471 A02, rs_A, cs_A,
472 v21, inc_v,
473 buff_0,
474 g0, inc_g );
475
476 // FLA_Gemv( FLA_NO_TRANSPOSE, FLA_MINUS_ONE, A20, f0, FLA_ONE, z21 );
477 // FLA_Gemv( FLA_NO_TRANSPOSE, FLA_MINUS_ONE, Z20, g0, FLA_ONE, z21 );
478 bl1_sgemv( BLIS1_NO_TRANSPOSE,
479 BLIS1_NO_CONJUGATE,
480 m_ahead,
481 n_behind,
482 buff_m1,
483 A20, rs_A, cs_A,
484 f0, inc_f,
485 buff_1,
486 z21, rs_Z );
487 bl1_sgemv( BLIS1_NO_TRANSPOSE,
488 BLIS1_NO_CONJUGATE,
489 m_ahead,
490 n_behind,
491 buff_m1,
492 Z20, rs_Z, cs_Z,
493 g0, inc_g,
494 buff_1,
495 z21, rs_Z );
496
497 // FLA_Inv_scalc( FLA_NO_CONJUGATE, sigma11, z21 );
498 bl1_sinvscalv( BLIS1_NO_CONJUGATE,
499 m_ahead,
500 sigma11,
501 z21, rs_Z );
502
503 // FLA_Copy( g0, s01 );
504 bl1_scopyv( BLIS1_NO_CONJUGATE,
505 n_behind,
506 g0, inc_g,
507 s01, rs_S );
508 }
509 else // if ( n_ahead == 0 )
510 {
511 // FLA_Copyt( FLA_CONJ_TRANSPOSE, a10t, t01 );
512 // FLA_Gemv( FLA_CONJ_TRANSPOSE, FLA_ONE, A20, u21, FLA_ONE, t01 );
513 bl1_scopyv( BLIS1_CONJUGATE,
514 n_behind,
515 a10t, cs_A,
516 t01, rs_T );
517 bl1_sgemv( BLIS1_CONJ_TRANSPOSE,
518 BLIS1_NO_CONJUGATE,
519 m_ahead,
520 n_behind,
521 buff_1,
522 A20, rs_A, cs_A,
523 u21, inc_u,
524 buff_1,
525 t01, rs_T );
526 }
527
528 /*------------------------------------------------------------*/
529
530 }
531
532 // FLA_Obj_free( &u );
533 // FLA_Obj_free( &v );
534 // FLA_Obj_free( &d );
535 // FLA_Obj_free( &e );
536 // FLA_Obj_free( &f );
537 // FLA_Obj_free( &g );
538 FLA_free( buff_u );
539 FLA_free( buff_v );
540 FLA_free( buff_d );
541 FLA_free( buff_e );
542 FLA_free( buff_f );
543 FLA_free( buff_g );
544
545 return FLA_SUCCESS;
546 }
547
548
549
FLA_Bidiag_UT_u_step_opd_var5(int m_A,int n_A,int m_TS,double * buff_A,int rs_A,int cs_A,double * buff_Y,int rs_Y,int cs_Y,double * buff_Z,int rs_Z,int cs_Z,double * buff_T,int rs_T,int cs_T,double * buff_S,int rs_S,int cs_S)550 FLA_Error FLA_Bidiag_UT_u_step_opd_var5( int m_A,
551 int n_A,
552 int m_TS,
553 double* buff_A, int rs_A, int cs_A,
554 double* buff_Y, int rs_Y, int cs_Y,
555 double* buff_Z, int rs_Z, int cs_Z,
556 double* buff_T, int rs_T, int cs_T,
557 double* buff_S, int rs_S, int cs_S )
558 {
559 double* buff_1 = FLA_DOUBLE_PTR( FLA_ONE );
560 double* buff_0 = FLA_DOUBLE_PTR( FLA_ZERO );
561 double* buff_m1 = FLA_DOUBLE_PTR( FLA_MINUS_ONE );
562
563 double beta;
564 double last_elem;
565 int i;
566
567 // b_alg = FLA_Obj_length( T );
568 int b_alg = m_TS;
569
570 // FLA_Obj_create( datatype_A, m_A, 1, 0, 0, &u );
571 // FLA_Obj_create( datatype_A, n_A, 1, 0, 0, &v );
572 // FLA_Obj_create( datatype_A, n_A, 1, 0, 0, &d );
573 // FLA_Obj_create( datatype_A, n_A, 1, 0, 0, &e );
574 // FLA_Obj_create( datatype_A, m_A, 1, 0, 0, &f );
575 // FLA_Obj_create( datatype_A, m_A, 1, 0, 0, &g );
576 double* buff_u = ( double* ) FLA_malloc( m_A * sizeof( *buff_A ) );
577 double* buff_v = ( double* ) FLA_malloc( n_A * sizeof( *buff_A ) );
578 double* buff_d = ( double* ) FLA_malloc( n_A * sizeof( *buff_A ) );
579 double* buff_e = ( double* ) FLA_malloc( n_A * sizeof( *buff_A ) );
580 double* buff_f = ( double* ) FLA_malloc( m_A * sizeof( *buff_A ) );
581 double* buff_g = ( double* ) FLA_malloc( m_A * sizeof( *buff_A ) );
582 int inc_u = 1;
583 int inc_v = 1;
584 int inc_d = 1;
585 int inc_e = 1;
586 int inc_f = 1;
587 int inc_g = 1;
588
589 // FLA_Set( FLA_ZERO, Y );
590 // FLA_Set( FLA_ZERO, Z );
591 bl1_dsetm( n_A,
592 b_alg,
593 buff_0,
594 buff_Y, rs_Y, cs_Y );
595 bl1_dsetm( m_A,
596 b_alg,
597 buff_0,
598 buff_Z, rs_Z, cs_Z );
599
600 for ( i = 0; i < b_alg; ++i )
601 {
602 double* a10t = buff_A + (0 )*cs_A + (i )*rs_A;
603 double* A20 = buff_A + (0 )*cs_A + (i+1)*rs_A;
604 double* a01 = buff_A + (i )*cs_A + (0 )*rs_A;
605 double* alpha11 = buff_A + (i )*cs_A + (i )*rs_A;
606 double* a21 = buff_A + (i )*cs_A + (i+1)*rs_A;
607 double* A02 = buff_A + (i+1)*cs_A + (0 )*rs_A;
608 double* a12t = buff_A + (i+1)*cs_A + (i )*rs_A;
609 double* A22 = buff_A + (i+1)*cs_A + (i+1)*rs_A;
610
611 double* y10t = buff_Y + (0 )*cs_Y + (i )*rs_Y;
612 double* Y20 = buff_Y + (0 )*cs_Y + (i+1)*rs_Y;
613 double* y21 = buff_Y + (i )*cs_Y + (i+1)*rs_Y;
614
615 double* z10t = buff_Z + (0 )*cs_Z + (i )*rs_Z;
616 double* Z20 = buff_Z + (0 )*cs_Z + (i+1)*rs_Z;
617 double* z21 = buff_Z + (i )*cs_Z + (i+1)*rs_Z;
618
619 double* t01 = buff_T + (i )*cs_T + (0 )*rs_T;
620 double* tau11 = buff_T + (i )*cs_T + (i )*rs_T;
621
622 double* s01 = buff_S + (i )*cs_S + (0 )*rs_S;
623 double* sigma11 = buff_S + (i )*cs_S + (i )*rs_S;
624
625 double* u21 = buff_u + (i+1)*inc_u;
626
627 double* v21 = buff_v + (i+1)*inc_v;
628
629 double* d0 = buff_d + (0 )*inc_d;
630
631 double* e0 = buff_e + (0 )*inc_e;
632
633 double* f0 = buff_f + (0 )*inc_f;
634
635 double* g0 = buff_g + (0 )*inc_g;
636
637 double* v21_t = v21 + (0 )*inc_v;
638 double* v21_b = v21 + (1 )*inc_v;
639
640 double* a01_b = a01 + (0 )*cs_A + (i-1)*rs_A;
641
642 double* a12t_l = a12t + (0 )*cs_A + (0 )*rs_A;
643 double* a12t_r = a12t + (1 )*cs_A + (0 )*rs_A;
644
645 double* ABL = a10t;
646 double* ZBL = z10t;
647
648 double* a2 = alpha11;
649
650 int m_ahead = m_A - i - 1;
651 int n_ahead = n_A - i - 1;
652 int m_behind = i;
653 int n_behind = i;
654
655 /*------------------------------------------------------------*/
656
657 if ( m_behind > 0 )
658 {
659 // FLA_Copy( a01_b, last_elem );
660 // FLA_Set( FLA_ONE, a01_b );
661 last_elem = *a01_b;
662 *a01_b = *buff_1;
663 }
664
665 // FLA_Gemv( FLA_NO_TRANSPOSE, FLA_CONJUGATE, FLA_MINUS_ONE, ABL, y10t, FLA_ONE, a2 );
666 // FLA_Gemv( FLA_NO_TRANSPOSE, FLA_CONJUGATE, FLA_MINUS_ONE, ZBL, a01, FLA_ONE, a2 );
667 bl1_dgemv( BLIS1_NO_TRANSPOSE,
668 BLIS1_CONJUGATE,
669 m_ahead + 1,
670 n_behind,
671 buff_m1,
672 ABL, rs_A, cs_A,
673 y10t, cs_Y,
674 buff_1,
675 a2, rs_A );
676 bl1_dgemv( BLIS1_NO_TRANSPOSE,
677 BLIS1_CONJUGATE,
678 m_ahead + 1,
679 n_behind,
680 buff_m1,
681 ZBL, rs_Z, cs_Z,
682 a01, rs_A,
683 buff_1,
684 a2, rs_A );
685
686 // FLA_Gemv( FLA_CONJ_NO_TRANSPOSE, FLA_MINUS_ONE, Y20, a10t, FLA_ONE, a12t );
687 // FLA_Gemv( FLA_CONJ_TRANSPOSE, FLA_MINUS_ONE, A02, z10t, FLA_ONE, a12t );
688 bl1_dgemv( BLIS1_CONJ_NO_TRANSPOSE,
689 BLIS1_NO_CONJUGATE,
690 n_ahead,
691 n_behind,
692 buff_m1,
693 Y20, rs_Y, cs_Y,
694 a10t, cs_A,
695 buff_1,
696 a12t, cs_A );
697 bl1_dgemv( BLIS1_CONJ_TRANSPOSE,
698 BLIS1_NO_CONJUGATE,
699 m_behind,
700 n_ahead,
701 buff_m1,
702 A02, rs_A, cs_A,
703 z10t, cs_Z,
704 buff_1,
705 a12t, cs_A );
706
707 if ( m_behind > 0 )
708 {
709 // FLA_Copy( last_elem, a01_b );
710 *a01_b = last_elem;
711 }
712
713 // FLA_Househ2_UT( FLA_LEFT,
714 // alpha11,
715 // a21, tau11 );
716 // FLA_Copy( a21, u21 );
717 FLA_Househ2_UT_l_opd( m_ahead,
718 alpha11,
719 a21, rs_A,
720 tau11 );
721 bl1_dcopyv( BLIS1_NO_CONJUGATE,
722 m_ahead,
723 a21, rs_A,
724 u21, inc_u );
725
726 if ( n_ahead > 0 )
727 {
728 // FLA_Copyt( FLA_CONJ_TRANSPOSE, a12t, y21 );
729 // FLA_Gemv( FLA_CONJ_TRANSPOSE, FLA_ONE, A22, u21, FLA_ONE, y21 );
730 bl1_dcopyv( BLIS1_CONJUGATE,
731 n_ahead,
732 a12t, cs_A,
733 y21, rs_Y );
734 bl1_dgemv( BLIS1_CONJ_TRANSPOSE,
735 BLIS1_NO_CONJUGATE,
736 m_ahead,
737 n_ahead,
738 buff_1,
739 A22, rs_A, cs_A,
740 u21, inc_u,
741 buff_1,
742 y21, rs_Y );
743
744 // FLA_Gemv( FLA_CONJ_TRANSPOSE, FLA_ONE, A20, u21, FLA_ZERO, d0 );
745 // FLA_Gemv( FLA_CONJ_TRANSPOSE, FLA_ONE, Z20, u21, FLA_ZERO, e0 );
746 bl1_dgemv( BLIS1_CONJ_TRANSPOSE,
747 BLIS1_NO_CONJUGATE,
748 m_ahead,
749 n_behind,
750 buff_1,
751 A20, rs_A, cs_A,
752 u21, inc_u,
753 buff_0,
754 d0, inc_d );
755 bl1_dgemv( BLIS1_CONJ_TRANSPOSE,
756 BLIS1_NO_CONJUGATE,
757 m_ahead,
758 n_behind,
759 buff_1,
760 Z20, rs_Z, cs_Z,
761 u21, inc_u,
762 buff_0,
763 e0, inc_e );
764
765 // FLA_Copyt( FLA_CONJ_TRANSPOSE, a10t, t01 );
766 // FLA_Axpy( FLA_ONE, d0, t01 );
767 bl1_dcopyv( BLIS1_CONJUGATE,
768 n_behind,
769 a10t, cs_A,
770 t01, rs_T );
771 bl1_daxpyv( BLIS1_NO_CONJUGATE,
772 n_behind,
773 buff_1,
774 d0, inc_d,
775 t01, rs_T );
776
777 // FLA_Gemv( FLA_NO_TRANSPOSE, FLA_MINUS_ONE, Y20, d0, FLA_ONE, y21 );
778 // FLA_Gemv( FLA_TRANSPOSE, FLA_MINUS_ONE, A02, e0, FLA_ONE, y21 );
779 bl1_dgemv( BLIS1_NO_TRANSPOSE,
780 BLIS1_NO_CONJUGATE,
781 n_ahead,
782 n_behind,
783 buff_m1,
784 Y20, rs_Y, cs_Y,
785 d0, inc_d,
786 buff_1,
787 y21, rs_Y );
788 bl1_dgemv( BLIS1_TRANSPOSE,
789 BLIS1_NO_CONJUGATE,
790 m_behind,
791 n_ahead,
792 buff_m1,
793 A02, rs_A, cs_A,
794 e0, inc_e,
795 buff_1,
796 y21, rs_Y );
797
798 // FLA_Inv_scalc( FLA_NO_CONJUGATE, tau11, y21 );
799 bl1_dinvscalv( BLIS1_NO_CONJUGATE,
800 n_ahead,
801 tau11,
802 y21, rs_Y );
803
804 // FLA_Axpyt( FLA_CONJ_TRANSPOSE, FLA_MINUS_ONE, y21, a12t );
805 bl1_daxpyv( BLIS1_CONJUGATE,
806 n_ahead,
807 buff_m1,
808 y21, rs_Y,
809 a12t, cs_A );
810
811 // FLA_Househ2_UT( FLA_RIGHT, a12t_l, a12t_r, sigma11 );
812 FLA_Househ2_UT_r_opd( n_ahead - 1,
813 a12t_l,
814 a12t_r, cs_A,
815 sigma11 );
816
817 // FLA_Set( FLA_ONE, v21_t );
818 // FLA_Copyt( FLA_TRANSPOSE, a12t_r, v21_b );
819 *v21_t = *buff_1;
820 bl1_dcopyv( BLIS1_NO_CONJUGATE,
821 n_ahead - 1,
822 a12t_r, cs_A,
823 v21_b, inc_v );
824
825 // FLA_Dotc( FLA_CONJUGATE, y21, v21, beta );
826 // FLA_Scal( FLA_MINUS_ONE, beta );
827 bl1_ddot( BLIS1_CONJUGATE,
828 n_ahead,
829 y21, rs_Y,
830 v21, inc_v,
831 &beta );
832 bl1_dscals( buff_m1, &beta );
833
834 // FLA_Copy( u21, z21 );
835 // FLA_Gemv( FLA_NO_TRANSPOSE, FLA_ONE, A22, v21, beta, z21 );
836 bl1_dcopyv( BLIS1_NO_CONJUGATE,
837 m_ahead,
838 u21, inc_u,
839 z21, rs_Z );
840 bl1_dgemv( BLIS1_NO_TRANSPOSE,
841 BLIS1_NO_CONJUGATE,
842 m_ahead,
843 n_ahead,
844 buff_1,
845 A22, rs_A, cs_A,
846 v21, inc_v,
847 &beta,
848 z21, rs_Z );
849
850 // FLA_Gemvc( FLA_CONJ_TRANSPOSE, FLA_ONE, Y20, v21, FLA_ZERO, f0 );
851 // FLA_Gemvc( FLA_CONJ_NO_TRANSPOSE, FLA_ONE, A02, v21, FLA_ZERO, g0 );
852 bl1_dgemv( BLIS1_CONJ_TRANSPOSE,
853 BLIS1_NO_CONJUGATE,
854 n_ahead,
855 m_behind,
856 buff_1,
857 Y20, rs_Y, cs_Y,
858 v21, inc_v,
859 buff_0,
860 f0, inc_f );
861 bl1_dgemv( BLIS1_CONJ_NO_TRANSPOSE,
862 BLIS1_NO_CONJUGATE,
863 m_behind,
864 n_ahead,
865 buff_1,
866 A02, rs_A, cs_A,
867 v21, inc_v,
868 buff_0,
869 g0, inc_g );
870
871 // FLA_Gemv( FLA_NO_TRANSPOSE, FLA_MINUS_ONE, A20, f0, FLA_ONE, z21 );
872 // FLA_Gemv( FLA_NO_TRANSPOSE, FLA_MINUS_ONE, Z20, g0, FLA_ONE, z21 );
873 bl1_dgemv( BLIS1_NO_TRANSPOSE,
874 BLIS1_NO_CONJUGATE,
875 m_ahead,
876 n_behind,
877 buff_m1,
878 A20, rs_A, cs_A,
879 f0, inc_f,
880 buff_1,
881 z21, rs_Z );
882 bl1_dgemv( BLIS1_NO_TRANSPOSE,
883 BLIS1_NO_CONJUGATE,
884 m_ahead,
885 n_behind,
886 buff_m1,
887 Z20, rs_Z, cs_Z,
888 g0, inc_g,
889 buff_1,
890 z21, rs_Z );
891
892 // FLA_Inv_scalc( FLA_NO_CONJUGATE, sigma11, z21 );
893 bl1_dinvscalv( BLIS1_NO_CONJUGATE,
894 m_ahead,
895 sigma11,
896 z21, rs_Z );
897
898 // FLA_Copy( g0, s01 );
899 bl1_dcopyv( BLIS1_NO_CONJUGATE,
900 n_behind,
901 g0, inc_g,
902 s01, rs_S );
903 }
904 else // if ( n_ahead == 0 )
905 {
906 // FLA_Copyt( FLA_CONJ_TRANSPOSE, a10t, t01 );
907 // FLA_Gemv( FLA_CONJ_TRANSPOSE, FLA_ONE, A20, u21, FLA_ONE, t01 );
908 bl1_dcopyv( BLIS1_CONJUGATE,
909 n_behind,
910 a10t, cs_A,
911 t01, rs_T );
912 bl1_dgemv( BLIS1_CONJ_TRANSPOSE,
913 BLIS1_NO_CONJUGATE,
914 m_ahead,
915 n_behind,
916 buff_1,
917 A20, rs_A, cs_A,
918 u21, inc_u,
919 buff_1,
920 t01, rs_T );
921 }
922
923 /*------------------------------------------------------------*/
924
925 }
926
927 // FLA_Obj_free( &u );
928 // FLA_Obj_free( &v );
929 // FLA_Obj_free( &d );
930 // FLA_Obj_free( &e );
931 // FLA_Obj_free( &f );
932 // FLA_Obj_free( &g );
933 FLA_free( buff_u );
934 FLA_free( buff_v );
935 FLA_free( buff_d );
936 FLA_free( buff_e );
937 FLA_free( buff_f );
938 FLA_free( buff_g );
939
940 return FLA_SUCCESS;
941 }
942
943
944
FLA_Bidiag_UT_u_step_opc_var5(int m_A,int n_A,int m_TS,scomplex * buff_A,int rs_A,int cs_A,scomplex * buff_Y,int rs_Y,int cs_Y,scomplex * buff_Z,int rs_Z,int cs_Z,scomplex * buff_T,int rs_T,int cs_T,scomplex * buff_S,int rs_S,int cs_S)945 FLA_Error FLA_Bidiag_UT_u_step_opc_var5( int m_A,
946 int n_A,
947 int m_TS,
948 scomplex* buff_A, int rs_A, int cs_A,
949 scomplex* buff_Y, int rs_Y, int cs_Y,
950 scomplex* buff_Z, int rs_Z, int cs_Z,
951 scomplex* buff_T, int rs_T, int cs_T,
952 scomplex* buff_S, int rs_S, int cs_S )
953 {
954 scomplex* buff_1 = FLA_COMPLEX_PTR( FLA_ONE );
955 scomplex* buff_0 = FLA_COMPLEX_PTR( FLA_ZERO );
956 scomplex* buff_m1 = FLA_COMPLEX_PTR( FLA_MINUS_ONE );
957
958 scomplex beta;
959 scomplex last_elem;
960 int i;
961
962 // b_alg = FLA_Obj_length( T );
963 int b_alg = m_TS;
964
965 // FLA_Obj_create( datatype_A, m_A, 1, 0, 0, &u );
966 // FLA_Obj_create( datatype_A, n_A, 1, 0, 0, &v );
967 // FLA_Obj_create( datatype_A, n_A, 1, 0, 0, &d );
968 // FLA_Obj_create( datatype_A, n_A, 1, 0, 0, &e );
969 // FLA_Obj_create( datatype_A, m_A, 1, 0, 0, &f );
970 // FLA_Obj_create( datatype_A, m_A, 1, 0, 0, &g );
971 scomplex* buff_u = ( scomplex* ) FLA_malloc( m_A * sizeof( *buff_A ) );
972 scomplex* buff_v = ( scomplex* ) FLA_malloc( n_A * sizeof( *buff_A ) );
973 scomplex* buff_d = ( scomplex* ) FLA_malloc( n_A * sizeof( *buff_A ) );
974 scomplex* buff_e = ( scomplex* ) FLA_malloc( n_A * sizeof( *buff_A ) );
975 scomplex* buff_f = ( scomplex* ) FLA_malloc( m_A * sizeof( *buff_A ) );
976 scomplex* buff_g = ( scomplex* ) FLA_malloc( m_A * sizeof( *buff_A ) );
977 int inc_u = 1;
978 int inc_v = 1;
979 int inc_d = 1;
980 int inc_e = 1;
981 int inc_f = 1;
982 int inc_g = 1;
983
984 // FLA_Set( FLA_ZERO, Y );
985 // FLA_Set( FLA_ZERO, Z );
986 bl1_csetm( n_A,
987 b_alg,
988 buff_0,
989 buff_Y, rs_Y, cs_Y );
990 bl1_csetm( m_A,
991 b_alg,
992 buff_0,
993 buff_Z, rs_Z, cs_Z );
994
995 for ( i = 0; i < b_alg; ++i )
996 {
997 scomplex* a10t = buff_A + (0 )*cs_A + (i )*rs_A;
998 scomplex* A20 = buff_A + (0 )*cs_A + (i+1)*rs_A;
999 scomplex* a01 = buff_A + (i )*cs_A + (0 )*rs_A;
1000 scomplex* alpha11 = buff_A + (i )*cs_A + (i )*rs_A;
1001 scomplex* a21 = buff_A + (i )*cs_A + (i+1)*rs_A;
1002 scomplex* A02 = buff_A + (i+1)*cs_A + (0 )*rs_A;
1003 scomplex* a12t = buff_A + (i+1)*cs_A + (i )*rs_A;
1004 scomplex* A22 = buff_A + (i+1)*cs_A + (i+1)*rs_A;
1005
1006 scomplex* y10t = buff_Y + (0 )*cs_Y + (i )*rs_Y;
1007 scomplex* Y20 = buff_Y + (0 )*cs_Y + (i+1)*rs_Y;
1008 scomplex* y21 = buff_Y + (i )*cs_Y + (i+1)*rs_Y;
1009
1010 scomplex* z10t = buff_Z + (0 )*cs_Z + (i )*rs_Z;
1011 scomplex* Z20 = buff_Z + (0 )*cs_Z + (i+1)*rs_Z;
1012 scomplex* z21 = buff_Z + (i )*cs_Z + (i+1)*rs_Z;
1013
1014 scomplex* t01 = buff_T + (i )*cs_T + (0 )*rs_T;
1015 scomplex* tau11 = buff_T + (i )*cs_T + (i )*rs_T;
1016
1017 scomplex* s01 = buff_S + (i )*cs_S + (0 )*rs_S;
1018 scomplex* sigma11 = buff_S + (i )*cs_S + (i )*rs_S;
1019
1020 scomplex* u21 = buff_u + (i+1)*inc_u;
1021
1022 scomplex* v21 = buff_v + (i+1)*inc_v;
1023
1024 scomplex* d0 = buff_d + (0 )*inc_d;
1025
1026 scomplex* e0 = buff_e + (0 )*inc_e;
1027
1028 scomplex* f0 = buff_f + (0 )*inc_f;
1029
1030 scomplex* g0 = buff_g + (0 )*inc_g;
1031
1032 scomplex* v21_t = v21 + (0 )*inc_v;
1033 scomplex* v21_b = v21 + (1 )*inc_v;
1034
1035 scomplex* a01_b = a01 + (0 )*cs_A + (i-1)*rs_A;
1036
1037 scomplex* a12t_l = a12t + (0 )*cs_A + (0 )*rs_A;
1038 scomplex* a12t_r = a12t + (1 )*cs_A + (0 )*rs_A;
1039
1040 scomplex* ABL = a10t;
1041 scomplex* ZBL = z10t;
1042
1043 scomplex* a2 = alpha11;
1044
1045 int m_ahead = m_A - i - 1;
1046 int n_ahead = n_A - i - 1;
1047 int m_behind = i;
1048 int n_behind = i;
1049
1050 /*------------------------------------------------------------*/
1051
1052 if ( m_behind > 0 )
1053 {
1054 // FLA_Copy( a01_b, last_elem );
1055 // FLA_Set( FLA_ONE, a01_b );
1056 last_elem = *a01_b;
1057 *a01_b = *buff_1;
1058 }
1059
1060 // FLA_Gemv( FLA_NO_TRANSPOSE, FLA_CONJUGATE, FLA_MINUS_ONE, ABL, y10t, FLA_ONE, a2 );
1061 // FLA_Gemv( FLA_NO_TRANSPOSE, FLA_CONJUGATE, FLA_MINUS_ONE, ZBL, a01, FLA_ONE, a2 );
1062 bl1_cgemv( BLIS1_NO_TRANSPOSE,
1063 BLIS1_CONJUGATE,
1064 m_ahead + 1,
1065 n_behind,
1066 buff_m1,
1067 ABL, rs_A, cs_A,
1068 y10t, cs_Y,
1069 buff_1,
1070 a2, rs_A );
1071 bl1_cgemv( BLIS1_NO_TRANSPOSE,
1072 BLIS1_CONJUGATE,
1073 m_ahead + 1,
1074 n_behind,
1075 buff_m1,
1076 ZBL, rs_Z, cs_Z,
1077 a01, rs_A,
1078 buff_1,
1079 a2, rs_A );
1080
1081 // FLA_Gemv( FLA_CONJ_NO_TRANSPOSE, FLA_MINUS_ONE, Y20, a10t, FLA_ONE, a12t );
1082 // FLA_Gemv( FLA_CONJ_TRANSPOSE, FLA_MINUS_ONE, A02, z10t, FLA_ONE, a12t );
1083 bl1_cgemv( BLIS1_CONJ_NO_TRANSPOSE,
1084 BLIS1_NO_CONJUGATE,
1085 n_ahead,
1086 n_behind,
1087 buff_m1,
1088 Y20, rs_Y, cs_Y,
1089 a10t, cs_A,
1090 buff_1,
1091 a12t, cs_A );
1092 bl1_cgemv( BLIS1_CONJ_TRANSPOSE,
1093 BLIS1_NO_CONJUGATE,
1094 m_behind,
1095 n_ahead,
1096 buff_m1,
1097 A02, rs_A, cs_A,
1098 z10t, cs_Z,
1099 buff_1,
1100 a12t, cs_A );
1101
1102 if ( m_behind > 0 )
1103 {
1104 // FLA_Copy( last_elem, a01_b );
1105 *a01_b = last_elem;
1106 }
1107
1108 // FLA_Househ2_UT( FLA_LEFT,
1109 // alpha11,
1110 // a21, tau11 );
1111 // FLA_Copy( a21, u21 );
1112 FLA_Househ2_UT_l_opc( m_ahead,
1113 alpha11,
1114 a21, rs_A,
1115 tau11 );
1116 bl1_ccopyv( BLIS1_NO_CONJUGATE,
1117 m_ahead,
1118 a21, rs_A,
1119 u21, inc_u );
1120
1121 if ( n_ahead > 0 )
1122 {
1123 // FLA_Copyt( FLA_CONJ_TRANSPOSE, a12t, y21 );
1124 // FLA_Gemv( FLA_CONJ_TRANSPOSE, FLA_ONE, A22, u21, FLA_ONE, y21 );
1125 bl1_ccopyv( BLIS1_CONJUGATE,
1126 n_ahead,
1127 a12t, cs_A,
1128 y21, rs_Y );
1129 bl1_cgemv( BLIS1_CONJ_TRANSPOSE,
1130 BLIS1_NO_CONJUGATE,
1131 m_ahead,
1132 n_ahead,
1133 buff_1,
1134 A22, rs_A, cs_A,
1135 u21, inc_u,
1136 buff_1,
1137 y21, rs_Y );
1138
1139 // FLA_Gemv( FLA_CONJ_TRANSPOSE, FLA_ONE, A20, u21, FLA_ZERO, d0 );
1140 // FLA_Gemv( FLA_CONJ_TRANSPOSE, FLA_ONE, Z20, u21, FLA_ZERO, e0 );
1141 bl1_cgemv( BLIS1_CONJ_TRANSPOSE,
1142 BLIS1_NO_CONJUGATE,
1143 m_ahead,
1144 n_behind,
1145 buff_1,
1146 A20, rs_A, cs_A,
1147 u21, inc_u,
1148 buff_0,
1149 d0, inc_d );
1150 bl1_cgemv( BLIS1_CONJ_TRANSPOSE,
1151 BLIS1_NO_CONJUGATE,
1152 m_ahead,
1153 n_behind,
1154 buff_1,
1155 Z20, rs_Z, cs_Z,
1156 u21, inc_u,
1157 buff_0,
1158 e0, inc_e );
1159
1160 // FLA_Copyt( FLA_CONJ_TRANSPOSE, a10t, t01 );
1161 // FLA_Axpy( FLA_ONE, d0, t01 );
1162 bl1_ccopyv( BLIS1_CONJUGATE,
1163 n_behind,
1164 a10t, cs_A,
1165 t01, rs_T );
1166 bl1_caxpyv( BLIS1_NO_CONJUGATE,
1167 n_behind,
1168 buff_1,
1169 d0, inc_d,
1170 t01, rs_T );
1171
1172 // FLA_Gemv( FLA_NO_TRANSPOSE, FLA_MINUS_ONE, Y20, d0, FLA_ONE, y21 );
1173 // FLA_Gemv( FLA_TRANSPOSE, FLA_MINUS_ONE, A02, e0, FLA_ONE, y21 );
1174 bl1_cgemv( BLIS1_NO_TRANSPOSE,
1175 BLIS1_NO_CONJUGATE,
1176 n_ahead,
1177 n_behind,
1178 buff_m1,
1179 Y20, rs_Y, cs_Y,
1180 d0, inc_d,
1181 buff_1,
1182 y21, rs_Y );
1183 bl1_cgemv( BLIS1_TRANSPOSE,
1184 BLIS1_NO_CONJUGATE,
1185 m_behind,
1186 n_ahead,
1187 buff_m1,
1188 A02, rs_A, cs_A,
1189 e0, inc_e,
1190 buff_1,
1191 y21, rs_Y );
1192
1193 // FLA_Inv_scalc( FLA_NO_CONJUGATE, tau11, y21 );
1194 bl1_cinvscalv( BLIS1_NO_CONJUGATE,
1195 n_ahead,
1196 tau11,
1197 y21, rs_Y );
1198
1199 // FLA_Axpyt( FLA_CONJ_TRANSPOSE, FLA_MINUS_ONE, y21, a12t );
1200 bl1_caxpyv( BLIS1_CONJUGATE,
1201 n_ahead,
1202 buff_m1,
1203 y21, rs_Y,
1204 a12t, cs_A );
1205
1206 // FLA_Househ2_UT( FLA_RIGHT, a12t_l, a12t_r, sigma11 );
1207 FLA_Househ2_UT_r_opc( n_ahead - 1,
1208 a12t_l,
1209 a12t_r, cs_A,
1210 sigma11 );
1211
1212 // FLA_Set( FLA_ONE, v21_t );
1213 // FLA_Copyt( FLA_TRANSPOSE, a12t_r, v21_b );
1214 *v21_t = *buff_1;
1215 bl1_ccopyv( BLIS1_NO_CONJUGATE,
1216 n_ahead - 1,
1217 a12t_r, cs_A,
1218 v21_b, inc_v );
1219
1220 // FLA_Dotc( FLA_CONJUGATE, y21, v21, beta );
1221 // FLA_Scal( FLA_MINUS_ONE, beta );
1222 bl1_cdot( BLIS1_CONJUGATE,
1223 n_ahead,
1224 y21, rs_Y,
1225 v21, inc_v,
1226 &beta );
1227 bl1_cscals( buff_m1, &beta );
1228
1229 // FLA_Copy( u21, z21 );
1230 // FLA_Gemv( FLA_NO_TRANSPOSE, FLA_ONE, A22, v21, beta, z21 );
1231 bl1_ccopyv( BLIS1_NO_CONJUGATE,
1232 m_ahead,
1233 u21, inc_u,
1234 z21, rs_Z );
1235 bl1_cgemv( BLIS1_NO_TRANSPOSE,
1236 BLIS1_NO_CONJUGATE,
1237 m_ahead,
1238 n_ahead,
1239 buff_1,
1240 A22, rs_A, cs_A,
1241 v21, inc_v,
1242 &beta,
1243 z21, rs_Z );
1244
1245 // FLA_Gemvc( FLA_CONJ_TRANSPOSE, FLA_ONE, Y20, v21, FLA_ZERO, f0 );
1246 // FLA_Gemvc( FLA_CONJ_NO_TRANSPOSE, FLA_ONE, A02, v21, FLA_ZERO, g0 );
1247 bl1_cgemv( BLIS1_CONJ_TRANSPOSE,
1248 BLIS1_NO_CONJUGATE,
1249 n_ahead,
1250 m_behind,
1251 buff_1,
1252 Y20, rs_Y, cs_Y,
1253 v21, inc_v,
1254 buff_0,
1255 f0, inc_f );
1256 bl1_cgemv( BLIS1_CONJ_NO_TRANSPOSE,
1257 BLIS1_NO_CONJUGATE,
1258 m_behind,
1259 n_ahead,
1260 buff_1,
1261 A02, rs_A, cs_A,
1262 v21, inc_v,
1263 buff_0,
1264 g0, inc_g );
1265
1266 // FLA_Gemv( FLA_NO_TRANSPOSE, FLA_MINUS_ONE, A20, f0, FLA_ONE, z21 );
1267 // FLA_Gemv( FLA_NO_TRANSPOSE, FLA_MINUS_ONE, Z20, g0, FLA_ONE, z21 );
1268 bl1_cgemv( BLIS1_NO_TRANSPOSE,
1269 BLIS1_NO_CONJUGATE,
1270 m_ahead,
1271 n_behind,
1272 buff_m1,
1273 A20, rs_A, cs_A,
1274 f0, inc_f,
1275 buff_1,
1276 z21, rs_Z );
1277 bl1_cgemv( BLIS1_NO_TRANSPOSE,
1278 BLIS1_NO_CONJUGATE,
1279 m_ahead,
1280 n_behind,
1281 buff_m1,
1282 Z20, rs_Z, cs_Z,
1283 g0, inc_g,
1284 buff_1,
1285 z21, rs_Z );
1286
1287 // FLA_Inv_scalc( FLA_NO_CONJUGATE, sigma11, z21 );
1288 bl1_cinvscalv( BLIS1_NO_CONJUGATE,
1289 m_ahead,
1290 sigma11,
1291 z21, rs_Z );
1292
1293 // FLA_Copy( g0, s01 );
1294 bl1_ccopyv( BLIS1_NO_CONJUGATE,
1295 n_behind,
1296 g0, inc_g,
1297 s01, rs_S );
1298 }
1299 else // if ( n_ahead == 0 )
1300 {
1301 // FLA_Copyt( FLA_CONJ_TRANSPOSE, a10t, t01 );
1302 // FLA_Gemv( FLA_CONJ_TRANSPOSE, FLA_ONE, A20, u21, FLA_ONE, t01 );
1303 bl1_ccopyv( BLIS1_CONJUGATE,
1304 n_behind,
1305 a10t, cs_A,
1306 t01, rs_T );
1307 bl1_cgemv( BLIS1_CONJ_TRANSPOSE,
1308 BLIS1_NO_CONJUGATE,
1309 m_ahead,
1310 n_behind,
1311 buff_1,
1312 A20, rs_A, cs_A,
1313 u21, inc_u,
1314 buff_1,
1315 t01, rs_T );
1316 }
1317
1318 /*------------------------------------------------------------*/
1319
1320 }
1321
1322 // FLA_Obj_free( &u );
1323 // FLA_Obj_free( &v );
1324 // FLA_Obj_free( &d );
1325 // FLA_Obj_free( &e );
1326 // FLA_Obj_free( &f );
1327 // FLA_Obj_free( &g );
1328 FLA_free( buff_u );
1329 FLA_free( buff_v );
1330 FLA_free( buff_d );
1331 FLA_free( buff_e );
1332 FLA_free( buff_f );
1333 FLA_free( buff_g );
1334
1335 return FLA_SUCCESS;
1336 }
1337
1338
1339
FLA_Bidiag_UT_u_step_opz_var5(int m_A,int n_A,int m_TS,dcomplex * buff_A,int rs_A,int cs_A,dcomplex * buff_Y,int rs_Y,int cs_Y,dcomplex * buff_Z,int rs_Z,int cs_Z,dcomplex * buff_T,int rs_T,int cs_T,dcomplex * buff_S,int rs_S,int cs_S)1340 FLA_Error FLA_Bidiag_UT_u_step_opz_var5( int m_A,
1341 int n_A,
1342 int m_TS,
1343 dcomplex* buff_A, int rs_A, int cs_A,
1344 dcomplex* buff_Y, int rs_Y, int cs_Y,
1345 dcomplex* buff_Z, int rs_Z, int cs_Z,
1346 dcomplex* buff_T, int rs_T, int cs_T,
1347 dcomplex* buff_S, int rs_S, int cs_S )
1348 {
1349 dcomplex* buff_1 = FLA_DOUBLE_COMPLEX_PTR( FLA_ONE );
1350 dcomplex* buff_0 = FLA_DOUBLE_COMPLEX_PTR( FLA_ZERO );
1351 dcomplex* buff_m1 = FLA_DOUBLE_COMPLEX_PTR( FLA_MINUS_ONE );
1352
1353 dcomplex beta;
1354 dcomplex last_elem;
1355 int i;
1356
1357 // b_alg = FLA_Obj_length( T );
1358 int b_alg = m_TS;
1359
1360 // FLA_Obj_create( datatype_A, m_A, 1, 0, 0, &u );
1361 // FLA_Obj_create( datatype_A, n_A, 1, 0, 0, &v );
1362 // FLA_Obj_create( datatype_A, n_A, 1, 0, 0, &d );
1363 // FLA_Obj_create( datatype_A, n_A, 1, 0, 0, &e );
1364 // FLA_Obj_create( datatype_A, m_A, 1, 0, 0, &f );
1365 // FLA_Obj_create( datatype_A, m_A, 1, 0, 0, &g );
1366 dcomplex* buff_u = ( dcomplex* ) FLA_malloc( m_A * sizeof( *buff_A ) );
1367 dcomplex* buff_v = ( dcomplex* ) FLA_malloc( n_A * sizeof( *buff_A ) );
1368 dcomplex* buff_d = ( dcomplex* ) FLA_malloc( n_A * sizeof( *buff_A ) );
1369 dcomplex* buff_e = ( dcomplex* ) FLA_malloc( n_A * sizeof( *buff_A ) );
1370 dcomplex* buff_f = ( dcomplex* ) FLA_malloc( m_A * sizeof( *buff_A ) );
1371 dcomplex* buff_g = ( dcomplex* ) FLA_malloc( m_A * sizeof( *buff_A ) );
1372 int inc_u = 1;
1373 int inc_v = 1;
1374 int inc_d = 1;
1375 int inc_e = 1;
1376 int inc_f = 1;
1377 int inc_g = 1;
1378
1379 // FLA_Set( FLA_ZERO, Y );
1380 // FLA_Set( FLA_ZERO, Z );
1381 bl1_zsetm( n_A,
1382 b_alg,
1383 buff_0,
1384 buff_Y, rs_Y, cs_Y );
1385 bl1_zsetm( m_A,
1386 b_alg,
1387 buff_0,
1388 buff_Z, rs_Z, cs_Z );
1389
1390 for ( i = 0; i < b_alg; ++i )
1391 {
1392 dcomplex* a10t = buff_A + (0 )*cs_A + (i )*rs_A;
1393 dcomplex* A20 = buff_A + (0 )*cs_A + (i+1)*rs_A;
1394 dcomplex* a01 = buff_A + (i )*cs_A + (0 )*rs_A;
1395 dcomplex* alpha11 = buff_A + (i )*cs_A + (i )*rs_A;
1396 dcomplex* a21 = buff_A + (i )*cs_A + (i+1)*rs_A;
1397 dcomplex* A02 = buff_A + (i+1)*cs_A + (0 )*rs_A;
1398 dcomplex* a12t = buff_A + (i+1)*cs_A + (i )*rs_A;
1399 dcomplex* A22 = buff_A + (i+1)*cs_A + (i+1)*rs_A;
1400
1401 dcomplex* y10t = buff_Y + (0 )*cs_Y + (i )*rs_Y;
1402 dcomplex* Y20 = buff_Y + (0 )*cs_Y + (i+1)*rs_Y;
1403 dcomplex* y21 = buff_Y + (i )*cs_Y + (i+1)*rs_Y;
1404
1405 dcomplex* z10t = buff_Z + (0 )*cs_Z + (i )*rs_Z;
1406 dcomplex* Z20 = buff_Z + (0 )*cs_Z + (i+1)*rs_Z;
1407 dcomplex* z21 = buff_Z + (i )*cs_Z + (i+1)*rs_Z;
1408
1409 dcomplex* t01 = buff_T + (i )*cs_T + (0 )*rs_T;
1410 dcomplex* tau11 = buff_T + (i )*cs_T + (i )*rs_T;
1411
1412 dcomplex* s01 = buff_S + (i )*cs_S + (0 )*rs_S;
1413 dcomplex* sigma11 = buff_S + (i )*cs_S + (i )*rs_S;
1414
1415 dcomplex* u21 = buff_u + (i+1)*inc_u;
1416
1417 dcomplex* v21 = buff_v + (i+1)*inc_v;
1418
1419 dcomplex* d0 = buff_d + (0 )*inc_d;
1420
1421 dcomplex* e0 = buff_e + (0 )*inc_e;
1422
1423 dcomplex* f0 = buff_f + (0 )*inc_f;
1424
1425 dcomplex* g0 = buff_g + (0 )*inc_g;
1426
1427 dcomplex* v21_t = v21 + (0 )*inc_v;
1428 dcomplex* v21_b = v21 + (1 )*inc_v;
1429
1430 dcomplex* a01_b = a01 + (0 )*cs_A + (i-1)*rs_A;
1431
1432 dcomplex* a12t_l = a12t + (0 )*cs_A + (0 )*rs_A;
1433 dcomplex* a12t_r = a12t + (1 )*cs_A + (0 )*rs_A;
1434
1435 dcomplex* ABL = a10t;
1436 dcomplex* ZBL = z10t;
1437
1438 dcomplex* a2 = alpha11;
1439
1440 int m_ahead = m_A - i - 1;
1441 int n_ahead = n_A - i - 1;
1442 int m_behind = i;
1443 int n_behind = i;
1444
1445 /*------------------------------------------------------------*/
1446
1447 if ( m_behind > 0 )
1448 {
1449 // FLA_Copy( a01_b, last_elem );
1450 // FLA_Set( FLA_ONE, a01_b );
1451 last_elem = *a01_b;
1452 *a01_b = *buff_1;
1453 }
1454
1455 // FLA_Gemv( FLA_NO_TRANSPOSE, FLA_CONJUGATE, FLA_MINUS_ONE, ABL, y10t, FLA_ONE, a2 );
1456 // FLA_Gemv( FLA_NO_TRANSPOSE, FLA_CONJUGATE, FLA_MINUS_ONE, ZBL, a01, FLA_ONE, a2 );
1457 bl1_zgemv( BLIS1_NO_TRANSPOSE,
1458 BLIS1_CONJUGATE,
1459 m_ahead + 1,
1460 n_behind,
1461 buff_m1,
1462 ABL, rs_A, cs_A,
1463 y10t, cs_Y,
1464 buff_1,
1465 a2, rs_A );
1466 bl1_zgemv( BLIS1_NO_TRANSPOSE,
1467 BLIS1_CONJUGATE,
1468 m_ahead + 1,
1469 n_behind,
1470 buff_m1,
1471 ZBL, rs_Z, cs_Z,
1472 a01, rs_A,
1473 buff_1,
1474 a2, rs_A );
1475
1476 // FLA_Gemv( FLA_CONJ_NO_TRANSPOSE, FLA_MINUS_ONE, Y20, a10t, FLA_ONE, a12t );
1477 // FLA_Gemv( FLA_CONJ_TRANSPOSE, FLA_MINUS_ONE, A02, z10t, FLA_ONE, a12t );
1478 bl1_zgemv( BLIS1_CONJ_NO_TRANSPOSE,
1479 BLIS1_NO_CONJUGATE,
1480 n_ahead,
1481 n_behind,
1482 buff_m1,
1483 Y20, rs_Y, cs_Y,
1484 a10t, cs_A,
1485 buff_1,
1486 a12t, cs_A );
1487 bl1_zgemv( BLIS1_CONJ_TRANSPOSE,
1488 BLIS1_NO_CONJUGATE,
1489 m_behind,
1490 n_ahead,
1491 buff_m1,
1492 A02, rs_A, cs_A,
1493 z10t, cs_Z,
1494 buff_1,
1495 a12t, cs_A );
1496
1497 if ( m_behind > 0 )
1498 {
1499 // FLA_Copy( last_elem, a01_b );
1500 *a01_b = last_elem;
1501 }
1502
1503 // FLA_Househ2_UT( FLA_LEFT,
1504 // alpha11,
1505 // a21, tau11 );
1506 // FLA_Copy( a21, u21 );
1507 FLA_Househ2_UT_l_opz( m_ahead,
1508 alpha11,
1509 a21, rs_A,
1510 tau11 );
1511 bl1_zcopyv( BLIS1_NO_CONJUGATE,
1512 m_ahead,
1513 a21, rs_A,
1514 u21, inc_u );
1515
1516 if ( n_ahead > 0 )
1517 {
1518 // FLA_Copyt( FLA_CONJ_TRANSPOSE, a12t, y21 );
1519 // FLA_Gemv( FLA_CONJ_TRANSPOSE, FLA_ONE, A22, u21, FLA_ONE, y21 );
1520 bl1_zcopyv( BLIS1_CONJUGATE,
1521 n_ahead,
1522 a12t, cs_A,
1523 y21, rs_Y );
1524 bl1_zgemv( BLIS1_CONJ_TRANSPOSE,
1525 BLIS1_NO_CONJUGATE,
1526 m_ahead,
1527 n_ahead,
1528 buff_1,
1529 A22, rs_A, cs_A,
1530 u21, inc_u,
1531 buff_1,
1532 y21, rs_Y );
1533
1534 // FLA_Gemv( FLA_CONJ_TRANSPOSE, FLA_ONE, A20, u21, FLA_ZERO, d0 );
1535 // FLA_Gemv( FLA_CONJ_TRANSPOSE, FLA_ONE, Z20, u21, FLA_ZERO, e0 );
1536 bl1_zgemv( BLIS1_CONJ_TRANSPOSE,
1537 BLIS1_NO_CONJUGATE,
1538 m_ahead,
1539 n_behind,
1540 buff_1,
1541 A20, rs_A, cs_A,
1542 u21, inc_u,
1543 buff_0,
1544 d0, inc_d );
1545 bl1_zgemv( BLIS1_CONJ_TRANSPOSE,
1546 BLIS1_NO_CONJUGATE,
1547 m_ahead,
1548 n_behind,
1549 buff_1,
1550 Z20, rs_Z, cs_Z,
1551 u21, inc_u,
1552 buff_0,
1553 e0, inc_e );
1554
1555 // FLA_Copyt( FLA_CONJ_TRANSPOSE, a10t, t01 );
1556 // FLA_Axpy( FLA_ONE, d0, t01 );
1557 bl1_zcopyv( BLIS1_CONJUGATE,
1558 n_behind,
1559 a10t, cs_A,
1560 t01, rs_T );
1561 bl1_zaxpyv( BLIS1_NO_CONJUGATE,
1562 n_behind,
1563 buff_1,
1564 d0, inc_d,
1565 t01, rs_T );
1566
1567 // FLA_Gemv( FLA_NO_TRANSPOSE, FLA_MINUS_ONE, Y20, d0, FLA_ONE, y21 );
1568 // FLA_Gemv( FLA_TRANSPOSE, FLA_MINUS_ONE, A02, e0, FLA_ONE, y21 );
1569 bl1_zgemv( BLIS1_NO_TRANSPOSE,
1570 BLIS1_NO_CONJUGATE,
1571 n_ahead,
1572 n_behind,
1573 buff_m1,
1574 Y20, rs_Y, cs_Y,
1575 d0, inc_d,
1576 buff_1,
1577 y21, rs_Y );
1578 bl1_zgemv( BLIS1_TRANSPOSE,
1579 BLIS1_NO_CONJUGATE,
1580 m_behind,
1581 n_ahead,
1582 buff_m1,
1583 A02, rs_A, cs_A,
1584 e0, inc_e,
1585 buff_1,
1586 y21, rs_Y );
1587
1588 // FLA_Inv_scalc( FLA_NO_CONJUGATE, tau11, y21 );
1589 bl1_zinvscalv( BLIS1_NO_CONJUGATE,
1590 n_ahead,
1591 tau11,
1592 y21, rs_Y );
1593
1594 // FLA_Axpyt( FLA_CONJ_TRANSPOSE, FLA_MINUS_ONE, y21, a12t );
1595 bl1_zaxpyv( BLIS1_CONJUGATE,
1596 n_ahead,
1597 buff_m1,
1598 y21, rs_Y,
1599 a12t, cs_A );
1600
1601 // FLA_Househ2_UT( FLA_RIGHT, a12t_l, a12t_r, sigma11 );
1602 FLA_Househ2_UT_r_opz( n_ahead - 1,
1603 a12t_l,
1604 a12t_r, cs_A,
1605 sigma11 );
1606
1607 // FLA_Set( FLA_ONE, v21_t );
1608 // FLA_Copyt( FLA_TRANSPOSE, a12t_r, v21_b );
1609 *v21_t = *buff_1;
1610 bl1_zcopyv( BLIS1_NO_CONJUGATE,
1611 n_ahead - 1,
1612 a12t_r, cs_A,
1613 v21_b, inc_v );
1614
1615 // FLA_Dotc( FLA_CONJUGATE, y21, v21, beta );
1616 // FLA_Scal( FLA_MINUS_ONE, beta );
1617 bl1_zdot( BLIS1_CONJUGATE,
1618 n_ahead,
1619 y21, rs_Y,
1620 v21, inc_v,
1621 &beta );
1622 bl1_zscals( buff_m1, &beta );
1623
1624 // FLA_Copy( u21, z21 );
1625 // FLA_Gemv( FLA_NO_TRANSPOSE, FLA_ONE, A22, v21, beta, z21 );
1626 bl1_zcopyv( BLIS1_NO_CONJUGATE,
1627 m_ahead,
1628 u21, inc_u,
1629 z21, rs_Z );
1630 bl1_zgemv( BLIS1_NO_TRANSPOSE,
1631 BLIS1_NO_CONJUGATE,
1632 m_ahead,
1633 n_ahead,
1634 buff_1,
1635 A22, rs_A, cs_A,
1636 v21, inc_v,
1637 &beta,
1638 z21, rs_Z );
1639
1640 // FLA_Gemvc( FLA_CONJ_TRANSPOSE, FLA_ONE, Y20, v21, FLA_ZERO, f0 );
1641 // FLA_Gemvc( FLA_CONJ_NO_TRANSPOSE, FLA_ONE, A02, v21, FLA_ZERO, g0 );
1642 bl1_zgemv( BLIS1_CONJ_TRANSPOSE,
1643 BLIS1_NO_CONJUGATE,
1644 n_ahead,
1645 m_behind,
1646 buff_1,
1647 Y20, rs_Y, cs_Y,
1648 v21, inc_v,
1649 buff_0,
1650 f0, inc_f );
1651 bl1_zgemv( BLIS1_CONJ_NO_TRANSPOSE,
1652 BLIS1_NO_CONJUGATE,
1653 m_behind,
1654 n_ahead,
1655 buff_1,
1656 A02, rs_A, cs_A,
1657 v21, inc_v,
1658 buff_0,
1659 g0, inc_g );
1660
1661 // FLA_Gemv( FLA_NO_TRANSPOSE, FLA_MINUS_ONE, A20, f0, FLA_ONE, z21 );
1662 // FLA_Gemv( FLA_NO_TRANSPOSE, FLA_MINUS_ONE, Z20, g0, FLA_ONE, z21 );
1663 bl1_zgemv( BLIS1_NO_TRANSPOSE,
1664 BLIS1_NO_CONJUGATE,
1665 m_ahead,
1666 n_behind,
1667 buff_m1,
1668 A20, rs_A, cs_A,
1669 f0, inc_f,
1670 buff_1,
1671 z21, rs_Z );
1672 bl1_zgemv( BLIS1_NO_TRANSPOSE,
1673 BLIS1_NO_CONJUGATE,
1674 m_ahead,
1675 n_behind,
1676 buff_m1,
1677 Z20, rs_Z, cs_Z,
1678 g0, inc_g,
1679 buff_1,
1680 z21, rs_Z );
1681
1682 // FLA_Inv_scalc( FLA_NO_CONJUGATE, sigma11, z21 );
1683 bl1_zinvscalv( BLIS1_NO_CONJUGATE,
1684 m_ahead,
1685 sigma11,
1686 z21, rs_Z );
1687
1688 // FLA_Copy( g0, s01 );
1689 bl1_zcopyv( BLIS1_NO_CONJUGATE,
1690 n_behind,
1691 g0, inc_g,
1692 s01, rs_S );
1693 }
1694 else // if ( n_ahead == 0 )
1695 {
1696 // FLA_Copyt( FLA_CONJ_TRANSPOSE, a10t, t01 );
1697 // FLA_Gemv( FLA_CONJ_TRANSPOSE, FLA_ONE, A20, u21, FLA_ONE, t01 );
1698 bl1_zcopyv( BLIS1_CONJUGATE,
1699 n_behind,
1700 a10t, cs_A,
1701 t01, rs_T );
1702 bl1_zgemv( BLIS1_CONJ_TRANSPOSE,
1703 BLIS1_NO_CONJUGATE,
1704 m_ahead,
1705 n_behind,
1706 buff_1,
1707 A20, rs_A, cs_A,
1708 u21, inc_u,
1709 buff_1,
1710 t01, rs_T );
1711 }
1712
1713 /*------------------------------------------------------------*/
1714
1715 }
1716
1717 // FLA_Obj_free( &u );
1718 // FLA_Obj_free( &v );
1719 // FLA_Obj_free( &d );
1720 // FLA_Obj_free( &e );
1721 // FLA_Obj_free( &f );
1722 // FLA_Obj_free( &g );
1723 FLA_free( buff_u );
1724 FLA_free( buff_v );
1725 FLA_free( buff_d );
1726 FLA_free( buff_e );
1727 FLA_free( buff_f );
1728 FLA_free( buff_g );
1729
1730 return FLA_SUCCESS;
1731 }
1732
1733