1 /*
2
3 Copyright (C) 2014, The University of Texas at Austin
4
5 This file is part of libflame and is available under the 3-Clause
6 BSD license, which can be found in the LICENSE file at the top-level
7 directory, or at http://opensource.org/licenses/BSD-3-Clause
8
9 */
10
11 #include "FLAME.h"
12
FLA_Lyap_n_opt_var2(FLA_Obj isgn,FLA_Obj A,FLA_Obj C)13 FLA_Error FLA_Lyap_n_opt_var2( FLA_Obj isgn, FLA_Obj A, FLA_Obj C )
14 {
15 FLA_Datatype datatype;
16 int m_AC;
17 int rs_A, cs_A;
18 int rs_W, cs_W;
19 int rs_C, cs_C;
20 FLA_Obj W;
21
22 FLA_Obj_create_conf_to( FLA_NO_TRANSPOSE, A, &W );
23
24 datatype = FLA_Obj_datatype( A );
25
26 m_AC = FLA_Obj_length( A );
27
28 rs_A = FLA_Obj_row_stride( A );
29 cs_A = FLA_Obj_col_stride( A );
30
31 rs_W = FLA_Obj_row_stride( W );
32 cs_W = FLA_Obj_col_stride( W );
33
34 rs_C = FLA_Obj_row_stride( C );
35 cs_C = FLA_Obj_col_stride( C );
36
37 switch ( datatype )
38 {
39 case FLA_FLOAT:
40 {
41 float* buff_A = FLA_FLOAT_PTR( A );
42 float* buff_W = FLA_FLOAT_PTR( W );
43 float* buff_C = FLA_FLOAT_PTR( C );
44 float* buff_sgn = FLA_FLOAT_PTR( isgn );
45
46 FLA_Lyap_n_ops_var2( m_AC,
47 buff_sgn,
48 buff_A, rs_A, cs_A,
49 buff_W, rs_W, cs_W,
50 buff_C, rs_C, cs_C );
51
52 break;
53 }
54
55 case FLA_DOUBLE:
56 {
57 double* buff_A = FLA_DOUBLE_PTR( A );
58 double* buff_W = FLA_DOUBLE_PTR( W );
59 double* buff_C = FLA_DOUBLE_PTR( C );
60 double* buff_sgn = FLA_DOUBLE_PTR( isgn );
61
62 FLA_Lyap_n_opd_var2( m_AC,
63 buff_sgn,
64 buff_A, rs_A, cs_A,
65 buff_W, rs_W, cs_W,
66 buff_C, rs_C, cs_C );
67
68 break;
69 }
70
71 case FLA_COMPLEX:
72 {
73 scomplex* buff_A = FLA_COMPLEX_PTR( A );
74 scomplex* buff_W = FLA_COMPLEX_PTR( W );
75 scomplex* buff_C = FLA_COMPLEX_PTR( C );
76 scomplex* buff_sgn = FLA_COMPLEX_PTR( isgn );
77
78 FLA_Lyap_n_opc_var2( m_AC,
79 buff_sgn,
80 buff_A, rs_A, cs_A,
81 buff_W, rs_W, cs_W,
82 buff_C, rs_C, cs_C );
83
84 break;
85 }
86
87 case FLA_DOUBLE_COMPLEX:
88 {
89 dcomplex* buff_A = FLA_DOUBLE_COMPLEX_PTR( A );
90 dcomplex* buff_W = FLA_DOUBLE_COMPLEX_PTR( W );
91 dcomplex* buff_C = FLA_DOUBLE_COMPLEX_PTR( C );
92 dcomplex* buff_sgn = FLA_DOUBLE_COMPLEX_PTR( isgn );
93
94 FLA_Lyap_n_opz_var2( m_AC,
95 buff_sgn,
96 buff_A, rs_A, cs_A,
97 buff_W, rs_W, cs_W,
98 buff_C, rs_C, cs_C );
99
100 break;
101 }
102 }
103
104 FLA_Obj_free( &W );
105
106 return FLA_SUCCESS;
107 }
108
109
110
FLA_Lyap_n_ops_var2(int m_AC,float * buff_sgn,float * buff_A,int rs_A,int cs_A,float * buff_W,int rs_W,int cs_W,float * buff_C,int rs_C,int cs_C)111 FLA_Error FLA_Lyap_n_ops_var2( int m_AC,
112 float* buff_sgn,
113 float* buff_A, int rs_A, int cs_A,
114 float* buff_W, int rs_W, int cs_W,
115 float* buff_C, int rs_C, int cs_C )
116 {
117 float* buff_1 = FLA_FLOAT_PTR( FLA_ONE );
118 float* buff_m1 = FLA_FLOAT_PTR( FLA_MINUS_ONE );
119 int i;
120
121 bl1_sscalm( BLIS1_NO_CONJUGATE,
122 m_AC,
123 m_AC,
124 buff_sgn,
125 buff_C, rs_C, cs_C );
126
127 for ( i = m_AC - 1; i >= 0; --i )
128 {
129 float* a01 = buff_A + (i )*cs_A + (0 )*rs_A;
130 float* alpha11 = buff_A + (i )*cs_A + (i )*rs_A;
131 float* A02 = buff_A + (i+1)*cs_A + (0 )*rs_A;
132 float* a12t = buff_A + (i+1)*cs_A + (i )*rs_A;
133 float* A22 = buff_A + (i+1)*cs_A + (i+1)*rs_A;
134
135 float* c01 = buff_C + (i )*cs_C + (0 )*rs_C;
136 float* gamma11 = buff_C + (i )*cs_C + (i )*rs_C;
137 float* C02 = buff_C + (i+1)*cs_C + (0 )*rs_C;
138 float* c12t = buff_C + (i+1)*cs_C + (i )*rs_C;
139
140 float* W22 = buff_W + (i+1)*cs_W + (i+1)*rs_W;
141
142 float omega;
143
144 int m_behind = i;
145 int m_ahead = m_AC - i - 1;
146
147 /*------------------------------------------------------------*/
148
149 // FLA_Copyrt( FLA_UPPER_TRIANGULAR, FLA_CONJ_NO_TRANSPOSE, A22, W22 );
150 // FLA_Shift_diag( FLA_NO_CONJUGATE, alpha11, W22 );
151 // FLA_Trsv( FLA_UPPER_TRIANGULAR, FLA_NO_TRANSPOSE, FLA_NONUNIT_DIAG, W22, c12t );;
152 bl1_scopymrt( BLIS1_UPPER_TRIANGULAR,
153 BLIS1_CONJ_NO_TRANSPOSE,
154 m_ahead,
155 m_ahead,
156 A22, rs_A, cs_A,
157 W22, rs_W, cs_W );
158
159 bl1_sshiftdiag( BLIS1_NO_CONJUGATE,
160 0,
161 m_ahead,
162 m_ahead,
163 alpha11,
164 W22, rs_W, cs_W );
165
166 bl1_strsv( BLIS1_UPPER_TRIANGULAR,
167 BLIS1_NO_TRANSPOSE,
168 BLIS1_NONUNIT_DIAG,
169 m_ahead,
170 W22, rs_W, cs_W,
171 c12t, cs_C );
172
173 // FLA_Dot2cs( FLA_CONJUGATE, FLA_MINUS_ONE, a12t, c12t, FLA_ONE, gamma11 );
174 bl1_sdot2s( BLIS1_CONJUGATE,
175 m_ahead,
176 buff_m1,
177 a12t, cs_A,
178 c12t, cs_C,
179 buff_1,
180 gamma11 );
181
182 // FLA_Copyt( FLA_CONJ_NO_TRANSPOSE, alpha11, omega );
183 // FLA_Mult_add( FLA_ONE, alpha11, omega );
184 // FLA_Inv_scal( omega, gamma11 );
185 bl1_scopyconj( alpha11, &omega );
186 bl1_sadd3( alpha11, &omega, &omega );
187 bl1_sinvscals( &omega, gamma11 );
188
189 // FLA_Ger( FLA_MINUS_ONE, a01, c12t, C02 );
190 bl1_sger( BLIS1_NO_CONJUGATE,
191 BLIS1_NO_CONJUGATE,
192 m_behind,
193 m_ahead,
194 buff_m1,
195 a01, rs_A,
196 c12t, cs_C,
197 C02, rs_C, cs_C );
198
199 // FLA_Axpys( FLA_MINUS_ONE, gamma11, a01, FLA_ONE, c01 ););
200 // FLA_Gemvc( FLA_NO_TRANSPOSE, FLA_CONJUGATE, FLA_MINUS_ONE, A02, c12t, FLA_ONE, c01 );
201 bl1_saxpysv( m_behind,
202 buff_m1,
203 gamma11,
204 a01, rs_A,
205 buff_1,
206 c01, rs_C );
207
208 bl1_sgemv( BLIS1_NO_TRANSPOSE,
209 BLIS1_CONJUGATE,
210 m_behind,
211 m_ahead,
212 buff_m1,
213 A02, rs_A, cs_A,
214 c12t, cs_C,
215 buff_1,
216 c01, rs_C );
217
218 /*------------------------------------------------------------*/
219 }
220
221 return FLA_SUCCESS;
222 }
223
224
225
FLA_Lyap_n_opd_var2(int m_AC,double * buff_sgn,double * buff_A,int rs_A,int cs_A,double * buff_W,int rs_W,int cs_W,double * buff_C,int rs_C,int cs_C)226 FLA_Error FLA_Lyap_n_opd_var2( int m_AC,
227 double* buff_sgn,
228 double* buff_A, int rs_A, int cs_A,
229 double* buff_W, int rs_W, int cs_W,
230 double* buff_C, int rs_C, int cs_C )
231 {
232 double* buff_1 = FLA_DOUBLE_PTR( FLA_ONE );
233 double* buff_m1 = FLA_DOUBLE_PTR( FLA_MINUS_ONE );
234 int i;
235
236 bl1_dscalm( BLIS1_NO_CONJUGATE,
237 m_AC,
238 m_AC,
239 buff_sgn,
240 buff_C, rs_C, cs_C );
241
242 for ( i = m_AC - 1; i >= 0; --i )
243 {
244 double* a01 = buff_A + (i )*cs_A + (0 )*rs_A;
245 double* alpha11 = buff_A + (i )*cs_A + (i )*rs_A;
246 double* A02 = buff_A + (i+1)*cs_A + (0 )*rs_A;
247 double* a12t = buff_A + (i+1)*cs_A + (i )*rs_A;
248 double* A22 = buff_A + (i+1)*cs_A + (i+1)*rs_A;
249
250 double* c01 = buff_C + (i )*cs_C + (0 )*rs_C;
251 double* gamma11 = buff_C + (i )*cs_C + (i )*rs_C;
252 double* C02 = buff_C + (i+1)*cs_C + (0 )*rs_C;
253 double* c12t = buff_C + (i+1)*cs_C + (i )*rs_C;
254
255 double* W22 = buff_W + (i+1)*cs_W + (i+1)*rs_W;
256
257 double omega;
258
259 int m_behind = i;
260 int m_ahead = m_AC - i - 1;
261
262 /*------------------------------------------------------------*/
263
264 // FLA_Copyrt( FLA_UPPER_TRIANGULAR, FLA_CONJ_NO_TRANSPOSE, A22, W22 );
265 // FLA_Shift_diag( FLA_NO_CONJUGATE, alpha11, W22 );
266 // FLA_Trsv( FLA_UPPER_TRIANGULAR, FLA_NO_TRANSPOSE, FLA_NONUNIT_DIAG, W22, c12t );;
267 bl1_dcopymrt( BLIS1_UPPER_TRIANGULAR,
268 BLIS1_CONJ_NO_TRANSPOSE,
269 m_ahead,
270 m_ahead,
271 A22, rs_A, cs_A,
272 W22, rs_W, cs_W );
273
274 bl1_dshiftdiag( BLIS1_NO_CONJUGATE,
275 0,
276 m_ahead,
277 m_ahead,
278 alpha11,
279 W22, rs_W, cs_W );
280
281 bl1_dtrsv( BLIS1_UPPER_TRIANGULAR,
282 BLIS1_NO_TRANSPOSE,
283 BLIS1_NONUNIT_DIAG,
284 m_ahead,
285 W22, rs_W, cs_W,
286 c12t, cs_C );
287
288 // FLA_Dot2cs( FLA_CONJUGATE, FLA_MINUS_ONE, a12t, c12t, FLA_ONE, gamma11 );
289 bl1_ddot2s( BLIS1_CONJUGATE,
290 m_ahead,
291 buff_m1,
292 a12t, cs_A,
293 c12t, cs_C,
294 buff_1,
295 gamma11 );
296
297 // FLA_Copyt( FLA_CONJ_NO_TRANSPOSE, alpha11, omega );
298 // FLA_Mult_add( FLA_ONE, alpha11, omega );
299 // FLA_Inv_scal( omega, gamma11 );
300 bl1_dcopyconj( alpha11, &omega );
301 bl1_dadd3( alpha11, &omega, &omega );
302 bl1_dinvscals( &omega, gamma11 );
303
304 // FLA_Ger( FLA_MINUS_ONE, a01, c12t, C02 );
305 bl1_dger( BLIS1_NO_CONJUGATE,
306 BLIS1_NO_CONJUGATE,
307 m_behind,
308 m_ahead,
309 buff_m1,
310 a01, rs_A,
311 c12t, cs_C,
312 C02, rs_C, cs_C );
313
314 // FLA_Axpys( FLA_MINUS_ONE, gamma11, a01, FLA_ONE, c01 ););
315 // FLA_Gemvc( FLA_NO_TRANSPOSE, FLA_CONJUGATE, FLA_MINUS_ONE, A02, c12t, FLA_ONE, c01 );
316 bl1_daxpysv( m_behind,
317 buff_m1,
318 gamma11,
319 a01, rs_A,
320 buff_1,
321 c01, rs_C );
322
323 bl1_dgemv( BLIS1_NO_TRANSPOSE,
324 BLIS1_CONJUGATE,
325 m_behind,
326 m_ahead,
327 buff_m1,
328 A02, rs_A, cs_A,
329 c12t, cs_C,
330 buff_1,
331 c01, rs_C );
332
333 /*------------------------------------------------------------*/
334 }
335
336 return FLA_SUCCESS;
337 }
338
339
340
FLA_Lyap_n_opc_var2(int m_AC,scomplex * buff_sgn,scomplex * buff_A,int rs_A,int cs_A,scomplex * buff_W,int rs_W,int cs_W,scomplex * buff_C,int rs_C,int cs_C)341 FLA_Error FLA_Lyap_n_opc_var2( int m_AC,
342 scomplex* buff_sgn,
343 scomplex* buff_A, int rs_A, int cs_A,
344 scomplex* buff_W, int rs_W, int cs_W,
345 scomplex* buff_C, int rs_C, int cs_C )
346 {
347 scomplex* buff_1 = FLA_COMPLEX_PTR( FLA_ONE );
348 scomplex* buff_m1 = FLA_COMPLEX_PTR( FLA_MINUS_ONE );
349 int i;
350
351 bl1_cscalm( BLIS1_NO_CONJUGATE,
352 m_AC,
353 m_AC,
354 buff_sgn,
355 buff_C, rs_C, cs_C );
356
357 for ( i = m_AC - 1; i >= 0; --i )
358 {
359 scomplex* a01 = buff_A + (i )*cs_A + (0 )*rs_A;
360 scomplex* alpha11 = buff_A + (i )*cs_A + (i )*rs_A;
361 scomplex* A02 = buff_A + (i+1)*cs_A + (0 )*rs_A;
362 scomplex* a12t = buff_A + (i+1)*cs_A + (i )*rs_A;
363 scomplex* A22 = buff_A + (i+1)*cs_A + (i+1)*rs_A;
364
365 scomplex* c01 = buff_C + (i )*cs_C + (0 )*rs_C;
366 scomplex* gamma11 = buff_C + (i )*cs_C + (i )*rs_C;
367 scomplex* C02 = buff_C + (i+1)*cs_C + (0 )*rs_C;
368 scomplex* c12t = buff_C + (i+1)*cs_C + (i )*rs_C;
369
370 scomplex* W22 = buff_W + (i+1)*cs_W + (i+1)*rs_W;
371
372 scomplex omega;
373
374 int m_behind = i;
375 int m_ahead = m_AC - i - 1;
376
377 /*------------------------------------------------------------*/
378
379 // FLA_Copyrt( FLA_UPPER_TRIANGULAR, FLA_CONJ_NO_TRANSPOSE, A22, W22 );
380 // FLA_Shift_diag( FLA_NO_CONJUGATE, alpha11, W22 );
381 // FLA_Trsv( FLA_UPPER_TRIANGULAR, FLA_NO_TRANSPOSE, FLA_NONUNIT_DIAG, W22, c12t );;
382 bl1_ccopymrt( BLIS1_UPPER_TRIANGULAR,
383 BLIS1_CONJ_NO_TRANSPOSE,
384 m_ahead,
385 m_ahead,
386 A22, rs_A, cs_A,
387 W22, rs_W, cs_W );
388
389 bl1_cshiftdiag( BLIS1_NO_CONJUGATE,
390 0,
391 m_ahead,
392 m_ahead,
393 alpha11,
394 W22, rs_W, cs_W );
395
396 bl1_ctrsv( BLIS1_UPPER_TRIANGULAR,
397 BLIS1_NO_TRANSPOSE,
398 BLIS1_NONUNIT_DIAG,
399 m_ahead,
400 W22, rs_W, cs_W,
401 c12t, cs_C );
402
403 // FLA_Dot2cs( FLA_CONJUGATE, FLA_MINUS_ONE, a12t, c12t, FLA_ONE, gamma11 );
404 bl1_cdot2s( BLIS1_CONJUGATE,
405 m_ahead,
406 buff_m1,
407 a12t, cs_A,
408 c12t, cs_C,
409 buff_1,
410 gamma11 );
411
412 // FLA_Copyt( FLA_CONJ_NO_TRANSPOSE, alpha11, omega );
413 // FLA_Mult_add( FLA_ONE, alpha11, omega );
414 // FLA_Inv_scal( omega, gamma11 );
415 bl1_ccopyconj( alpha11, &omega );
416 bl1_cadd3( alpha11, &omega, &omega );
417 bl1_cinvscals( &omega, gamma11 );
418
419 // FLA_Ger( FLA_MINUS_ONE, a01, c12t, C02 );
420 bl1_cger( BLIS1_NO_CONJUGATE,
421 BLIS1_NO_CONJUGATE,
422 m_behind,
423 m_ahead,
424 buff_m1,
425 a01, rs_A,
426 c12t, cs_C,
427 C02, rs_C, cs_C );
428
429 // FLA_Axpys( FLA_MINUS_ONE, gamma11, a01, FLA_ONE, c01 ););
430 // FLA_Gemvc( FLA_NO_TRANSPOSE, FLA_CONJUGATE, FLA_MINUS_ONE, A02, c12t, FLA_ONE, c01 );
431 bl1_caxpysv( m_behind,
432 buff_m1,
433 gamma11,
434 a01, rs_A,
435 buff_1,
436 c01, rs_C );
437
438 bl1_cgemv( BLIS1_NO_TRANSPOSE,
439 BLIS1_CONJUGATE,
440 m_behind,
441 m_ahead,
442 buff_m1,
443 A02, rs_A, cs_A,
444 c12t, cs_C,
445 buff_1,
446 c01, rs_C );
447
448 /*------------------------------------------------------------*/
449 }
450
451 return FLA_SUCCESS;
452 }
453
454
455
FLA_Lyap_n_opz_var2(int m_AC,dcomplex * buff_sgn,dcomplex * buff_A,int rs_A,int cs_A,dcomplex * buff_W,int rs_W,int cs_W,dcomplex * buff_C,int rs_C,int cs_C)456 FLA_Error FLA_Lyap_n_opz_var2( int m_AC,
457 dcomplex* buff_sgn,
458 dcomplex* buff_A, int rs_A, int cs_A,
459 dcomplex* buff_W, int rs_W, int cs_W,
460 dcomplex* buff_C, int rs_C, int cs_C )
461 {
462 dcomplex* buff_1 = FLA_DOUBLE_COMPLEX_PTR( FLA_ONE );
463 dcomplex* buff_m1 = FLA_DOUBLE_COMPLEX_PTR( FLA_MINUS_ONE );
464 int i;
465
466 bl1_zscalm( BLIS1_NO_CONJUGATE,
467 m_AC,
468 m_AC,
469 buff_sgn,
470 buff_C, rs_C, cs_C );
471
472 for ( i = m_AC - 1; i >= 0; --i )
473 {
474 dcomplex* a01 = buff_A + (i )*cs_A + (0 )*rs_A;
475 dcomplex* alpha11 = buff_A + (i )*cs_A + (i )*rs_A;
476 dcomplex* A02 = buff_A + (i+1)*cs_A + (0 )*rs_A;
477 dcomplex* a12t = buff_A + (i+1)*cs_A + (i )*rs_A;
478 dcomplex* A22 = buff_A + (i+1)*cs_A + (i+1)*rs_A;
479
480 dcomplex* c01 = buff_C + (i )*cs_C + (0 )*rs_C;
481 dcomplex* gamma11 = buff_C + (i )*cs_C + (i )*rs_C;
482 dcomplex* C02 = buff_C + (i+1)*cs_C + (0 )*rs_C;
483 dcomplex* c12t = buff_C + (i+1)*cs_C + (i )*rs_C;
484
485 dcomplex* W22 = buff_W + (i+1)*cs_W + (i+1)*rs_W;
486
487 dcomplex omega;
488
489 int m_behind = i;
490 int m_ahead = m_AC - i - 1;
491
492 /*------------------------------------------------------------*/
493
494 // FLA_Copyrt( FLA_UPPER_TRIANGULAR, FLA_CONJ_NO_TRANSPOSE, A22, W22 );
495 // FLA_Shift_diag( FLA_NO_CONJUGATE, alpha11, W22 );
496 // FLA_Trsv( FLA_UPPER_TRIANGULAR, FLA_NO_TRANSPOSE, FLA_NONUNIT_DIAG, W22, c12t );;
497 bl1_zcopymrt( BLIS1_UPPER_TRIANGULAR,
498 BLIS1_CONJ_NO_TRANSPOSE,
499 m_ahead,
500 m_ahead,
501 A22, rs_A, cs_A,
502 W22, rs_W, cs_W );
503
504 bl1_zshiftdiag( BLIS1_NO_CONJUGATE,
505 0,
506 m_ahead,
507 m_ahead,
508 alpha11,
509 W22, rs_W, cs_W );
510
511 bl1_ztrsv( BLIS1_UPPER_TRIANGULAR,
512 BLIS1_NO_TRANSPOSE,
513 BLIS1_NONUNIT_DIAG,
514 m_ahead,
515 W22, rs_W, cs_W,
516 c12t, cs_C );
517
518 // FLA_Dot2cs( FLA_CONJUGATE, FLA_MINUS_ONE, a12t, c12t, FLA_ONE, gamma11 );
519 bl1_zdot2s( BLIS1_CONJUGATE,
520 m_ahead,
521 buff_m1,
522 a12t, cs_A,
523 c12t, cs_C,
524 buff_1,
525 gamma11 );
526
527 // FLA_Copyt( FLA_CONJ_NO_TRANSPOSE, alpha11, omega );
528 // FLA_Mult_add( FLA_ONE, alpha11, omega );
529 // FLA_Inv_scal( omega, gamma11 );
530 bl1_zcopyconj( alpha11, &omega );
531 bl1_zadd3( alpha11, &omega, &omega );
532 bl1_zinvscals( &omega, gamma11 );
533
534 // FLA_Ger( FLA_MINUS_ONE, a01, c12t, C02 );
535 bl1_zger( BLIS1_NO_CONJUGATE,
536 BLIS1_NO_CONJUGATE,
537 m_behind,
538 m_ahead,
539 buff_m1,
540 a01, rs_A,
541 c12t, cs_C,
542 C02, rs_C, cs_C );
543
544 // FLA_Axpys( FLA_MINUS_ONE, gamma11, a01, FLA_ONE, c01 ););
545 // FLA_Gemvc( FLA_NO_TRANSPOSE, FLA_CONJUGATE, FLA_MINUS_ONE, A02, c12t, FLA_ONE, c01 );
546 bl1_zaxpysv( m_behind,
547 buff_m1,
548 gamma11,
549 a01, rs_A,
550 buff_1,
551 c01, rs_C );
552
553 bl1_zgemv( BLIS1_NO_TRANSPOSE,
554 BLIS1_CONJUGATE,
555 m_behind,
556 m_ahead,
557 buff_m1,
558 A02, rs_A, cs_A,
559 c12t, cs_C,
560 buff_1,
561 c01, rs_C );
562
563 /*------------------------------------------------------------*/
564 }
565
566 return FLA_SUCCESS;
567 }
568
569