1 /*
2
3 Copyright (C) 2014, The University of Texas at Austin
4
5 This file is part of libflame and is available under the 3-Clause
6 BSD license, which can be found in the LICENSE file at the top-level
7 directory, or at http://opensource.org/licenses/BSD-3-Clause
8
9 */
10
11 #include "FLAME.h"
12
FLA_Lyap_h_opt_var3(FLA_Obj isgn,FLA_Obj A,FLA_Obj C)13 FLA_Error FLA_Lyap_h_opt_var3( FLA_Obj isgn, FLA_Obj A, FLA_Obj C )
14 {
15 FLA_Datatype datatype;
16 int m_AC;
17 int rs_A, cs_A;
18 int rs_W, cs_W;
19 int rs_C, cs_C;
20 FLA_Obj W;
21
22 FLA_Obj_create_conf_to( FLA_NO_TRANSPOSE, A, &W );
23
24 datatype = FLA_Obj_datatype( A );
25
26 m_AC = FLA_Obj_length( A );
27
28 rs_A = FLA_Obj_row_stride( A );
29 cs_A = FLA_Obj_col_stride( A );
30
31 rs_W = FLA_Obj_row_stride( W );
32 cs_W = FLA_Obj_col_stride( W );
33
34 rs_C = FLA_Obj_row_stride( C );
35 cs_C = FLA_Obj_col_stride( C );
36
37 switch ( datatype )
38 {
39 case FLA_FLOAT:
40 {
41 float* buff_A = FLA_FLOAT_PTR( A );
42 float* buff_W = FLA_FLOAT_PTR( W );
43 float* buff_C = FLA_FLOAT_PTR( C );
44 float* buff_sgn = FLA_FLOAT_PTR( isgn );
45
46 FLA_Lyap_h_ops_var3( m_AC,
47 buff_sgn,
48 buff_A, rs_A, cs_A,
49 buff_W, rs_W, cs_W,
50 buff_C, rs_C, cs_C );
51
52 break;
53 }
54
55 case FLA_DOUBLE:
56 {
57 double* buff_A = FLA_DOUBLE_PTR( A );
58 double* buff_W = FLA_DOUBLE_PTR( W );
59 double* buff_C = FLA_DOUBLE_PTR( C );
60 double* buff_sgn = FLA_DOUBLE_PTR( isgn );
61
62 FLA_Lyap_h_opd_var3( m_AC,
63 buff_sgn,
64 buff_A, rs_A, cs_A,
65 buff_W, rs_W, cs_W,
66 buff_C, rs_C, cs_C );
67
68 break;
69 }
70
71 case FLA_COMPLEX:
72 {
73 scomplex* buff_A = FLA_COMPLEX_PTR( A );
74 scomplex* buff_W = FLA_COMPLEX_PTR( W );
75 scomplex* buff_C = FLA_COMPLEX_PTR( C );
76 scomplex* buff_sgn = FLA_COMPLEX_PTR( isgn );
77
78 FLA_Lyap_h_opc_var3( m_AC,
79 buff_sgn,
80 buff_A, rs_A, cs_A,
81 buff_W, rs_W, cs_W,
82 buff_C, rs_C, cs_C );
83
84 break;
85 }
86
87 case FLA_DOUBLE_COMPLEX:
88 {
89 dcomplex* buff_A = FLA_DOUBLE_COMPLEX_PTR( A );
90 dcomplex* buff_W = FLA_DOUBLE_COMPLEX_PTR( W );
91 dcomplex* buff_C = FLA_DOUBLE_COMPLEX_PTR( C );
92 dcomplex* buff_sgn = FLA_DOUBLE_COMPLEX_PTR( isgn );
93
94 FLA_Lyap_h_opz_var3( m_AC,
95 buff_sgn,
96 buff_A, rs_A, cs_A,
97 buff_W, rs_W, cs_W,
98 buff_C, rs_C, cs_C );
99
100 break;
101 }
102 }
103
104 FLA_Obj_free( &W );
105
106 return FLA_SUCCESS;
107 }
108
109
110
FLA_Lyap_h_ops_var3(int m_AC,float * buff_sgn,float * buff_A,int rs_A,int cs_A,float * buff_W,int rs_W,int cs_W,float * buff_C,int rs_C,int cs_C)111 FLA_Error FLA_Lyap_h_ops_var3( int m_AC,
112 float* buff_sgn,
113 float* buff_A, int rs_A, int cs_A,
114 float* buff_W, int rs_W, int cs_W,
115 float* buff_C, int rs_C, int cs_C )
116 {
117 float* buff_1 = FLA_FLOAT_PTR( FLA_ONE );
118 float* buff_m1 = FLA_FLOAT_PTR( FLA_MINUS_ONE );
119 int i;
120
121 bl1_sscalm( BLIS1_NO_CONJUGATE,
122 m_AC,
123 m_AC,
124 buff_sgn,
125 buff_C, rs_C, cs_C );
126
127 for ( i = 0; i < m_AC; ++i )
128 {
129 float* a01 = buff_A + (i )*cs_A + (0 )*rs_A;
130 float* alpha11 = buff_A + (i )*cs_A + (i )*rs_A;
131 float* A02 = buff_A + (i+1)*cs_A + (0 )*rs_A;
132 float* a12t = buff_A + (i+1)*cs_A + (i )*rs_A;
133 float* A22 = buff_A + (i+1)*cs_A + (i+1)*rs_A;
134
135 float* c01 = buff_C + (i )*cs_C + (0 )*rs_C;
136 float* gamma11 = buff_C + (i )*cs_C + (i )*rs_C;
137 float* C02 = buff_C + (i+1)*cs_C + (0 )*rs_C;
138 float* c12t = buff_C + (i+1)*cs_C + (i )*rs_C;
139
140 float* W22 = buff_W + (i+1)*cs_W + (i+1)*rs_W;
141
142 float omega;
143
144 int m_behind = i;
145 int m_ahead = m_AC - i - 1;
146
147 /*------------------------------------------------------------*/
148
149 // FLA_Dot2cs( FLA_CONJUGATE, FLA_MINUS_ONE, a01, c01, FLA_ONE, gamma11 );
150 bl1_sdot2s( BLIS1_CONJUGATE,
151 m_behind,
152 buff_m1,
153 a01, rs_A,
154 c01, rs_C,
155 buff_1,
156 gamma11 );
157
158 // FLA_Copyt( FLA_CONJ_NO_TRANSPOSE, alpha11, omega );
159 // FLA_Mult_add( FLA_ONE, alpha11, omega );
160 // FLA_Inv_scal( omega, gamma11 );
161 bl1_scopyconj( alpha11, &omega );
162 bl1_sadd3( alpha11, &omega, &omega );
163 bl1_sinvscals( &omega, gamma11 );
164
165 // FLA_Axpys( FLA_MINUS_ONE, gamma11, a12t, FLA_ONE, c12t );
166 // FLA_Gemvc( FLA_TRANSPOSE, FLA_CONJUGATE, FLA_MINUS_ONE, A02, c01, FLA_ONE, c12t );
167 // FLA_Gemvc( FLA_TRANSPOSE, FLA_CONJUGATE, FLA_MINUS_ONE, C02, a01, FLA_ONE, c12t );
168 bl1_saxpysv( m_ahead,
169 buff_m1,
170 gamma11,
171 a12t, cs_A,
172 buff_1,
173 c12t, cs_C );
174
175 bl1_sgemv( BLIS1_TRANSPOSE,
176 BLIS1_CONJUGATE,
177 m_behind,
178 m_ahead,
179 buff_m1,
180 A02, rs_A, cs_A,
181 c01, rs_C,
182 buff_1,
183 c12t, cs_C );
184
185 bl1_sgemv( BLIS1_TRANSPOSE,
186 BLIS1_CONJUGATE,
187 m_behind,
188 m_ahead,
189 buff_m1,
190 C02, rs_C, cs_C,
191 a01, rs_A,
192 buff_1,
193 c12t, cs_C );
194
195 // FLA_Copyrt( FLA_UPPER_TRIANGULAR, FLA_NO_TRANSPOSE, A22, W22 );
196 // FLA_Shift_diag( FLA_CONJUGATE, alpha11, W22 );
197 // FLA_Trsv( FLA_UPPER_TRIANGULAR, FLA_TRANSPOSE, FLA_NONUNIT_DIAG, W22, c12t );
198 bl1_scopymrt( BLIS1_UPPER_TRIANGULAR,
199 BLIS1_NO_TRANSPOSE,
200 m_ahead,
201 m_ahead,
202 A22, rs_A, cs_A,
203 W22, rs_W, cs_W );
204
205 bl1_sshiftdiag( BLIS1_CONJUGATE,
206 0,
207 m_ahead,
208 m_ahead,
209 alpha11,
210 W22, rs_W, cs_W );
211
212 bl1_strsv( BLIS1_UPPER_TRIANGULAR,
213 BLIS1_TRANSPOSE,
214 BLIS1_NONUNIT_DIAG,
215 m_ahead,
216 W22, rs_W, cs_W,
217 c12t, cs_C );
218
219 /*------------------------------------------------------------*/
220 }
221
222 return FLA_SUCCESS;
223 }
224
225
226
FLA_Lyap_h_opd_var3(int m_AC,double * buff_sgn,double * buff_A,int rs_A,int cs_A,double * buff_W,int rs_W,int cs_W,double * buff_C,int rs_C,int cs_C)227 FLA_Error FLA_Lyap_h_opd_var3( int m_AC,
228 double* buff_sgn,
229 double* buff_A, int rs_A, int cs_A,
230 double* buff_W, int rs_W, int cs_W,
231 double* buff_C, int rs_C, int cs_C )
232 {
233 double* buff_1 = FLA_DOUBLE_PTR( FLA_ONE );
234 double* buff_m1 = FLA_DOUBLE_PTR( FLA_MINUS_ONE );
235 int i;
236
237 bl1_dscalm( BLIS1_NO_CONJUGATE,
238 m_AC,
239 m_AC,
240 buff_sgn,
241 buff_C, rs_C, cs_C );
242
243 for ( i = 0; i < m_AC; ++i )
244 {
245 double* a01 = buff_A + (i )*cs_A + (0 )*rs_A;
246 double* alpha11 = buff_A + (i )*cs_A + (i )*rs_A;
247 double* A02 = buff_A + (i+1)*cs_A + (0 )*rs_A;
248 double* a12t = buff_A + (i+1)*cs_A + (i )*rs_A;
249 double* A22 = buff_A + (i+1)*cs_A + (i+1)*rs_A;
250
251 double* c01 = buff_C + (i )*cs_C + (0 )*rs_C;
252 double* gamma11 = buff_C + (i )*cs_C + (i )*rs_C;
253 double* C02 = buff_C + (i+1)*cs_C + (0 )*rs_C;
254 double* c12t = buff_C + (i+1)*cs_C + (i )*rs_C;
255
256 double* W22 = buff_W + (i+1)*cs_W + (i+1)*rs_W;
257
258 double omega;
259
260 int m_behind = i;
261 int m_ahead = m_AC - i - 1;
262
263 /*------------------------------------------------------------*/
264
265 // FLA_Dot2cs( FLA_CONJUGATE, FLA_MINUS_ONE, a01, c01, FLA_ONE, gamma11 );
266 bl1_ddot2s( BLIS1_CONJUGATE,
267 m_behind,
268 buff_m1,
269 a01, rs_A,
270 c01, rs_C,
271 buff_1,
272 gamma11 );
273
274 // FLA_Copyt( FLA_CONJ_NO_TRANSPOSE, alpha11, omega );
275 // FLA_Mult_add( FLA_ONE, alpha11, omega );
276 // FLA_Inv_scal( omega, gamma11 );
277 bl1_dcopyconj( alpha11, &omega );
278 bl1_dadd3( alpha11, &omega, &omega );
279 bl1_dinvscals( &omega, gamma11 );
280
281 // FLA_Axpys( FLA_MINUS_ONE, gamma11, a12t, FLA_ONE, c12t );
282 // FLA_Gemvc( FLA_TRANSPOSE, FLA_CONJUGATE, FLA_MINUS_ONE, A02, c01, FLA_ONE, c12t );
283 // FLA_Gemvc( FLA_TRANSPOSE, FLA_CONJUGATE, FLA_MINUS_ONE, C02, a01, FLA_ONE, c12t );
284 bl1_daxpysv( m_ahead,
285 buff_m1,
286 gamma11,
287 a12t, cs_A,
288 buff_1,
289 c12t, cs_C );
290
291 bl1_dgemv( BLIS1_TRANSPOSE,
292 BLIS1_CONJUGATE,
293 m_behind,
294 m_ahead,
295 buff_m1,
296 A02, rs_A, cs_A,
297 c01, rs_C,
298 buff_1,
299 c12t, cs_C );
300
301 bl1_dgemv( BLIS1_TRANSPOSE,
302 BLIS1_CONJUGATE,
303 m_behind,
304 m_ahead,
305 buff_m1,
306 C02, rs_C, cs_C,
307 a01, rs_A,
308 buff_1,
309 c12t, cs_C );
310
311 // FLA_Copyrt( FLA_UPPER_TRIANGULAR, FLA_NO_TRANSPOSE, A22, W22 );
312 // FLA_Shift_diag( FLA_CONJUGATE, alpha11, W22 );
313 // FLA_Trsv( FLA_UPPER_TRIANGULAR, FLA_TRANSPOSE, FLA_NONUNIT_DIAG, W22, c12t );
314 bl1_dcopymrt( BLIS1_UPPER_TRIANGULAR,
315 BLIS1_NO_TRANSPOSE,
316 m_ahead,
317 m_ahead,
318 A22, rs_A, cs_A,
319 W22, rs_W, cs_W );
320
321 bl1_dshiftdiag( BLIS1_CONJUGATE,
322 0,
323 m_ahead,
324 m_ahead,
325 alpha11,
326 W22, rs_W, cs_W );
327
328 bl1_dtrsv( BLIS1_UPPER_TRIANGULAR,
329 BLIS1_TRANSPOSE,
330 BLIS1_NONUNIT_DIAG,
331 m_ahead,
332 W22, rs_W, cs_W,
333 c12t, cs_C );
334
335 /*------------------------------------------------------------*/
336 }
337
338 return FLA_SUCCESS;
339 }
340
341
342
FLA_Lyap_h_opc_var3(int m_AC,scomplex * buff_sgn,scomplex * buff_A,int rs_A,int cs_A,scomplex * buff_W,int rs_W,int cs_W,scomplex * buff_C,int rs_C,int cs_C)343 FLA_Error FLA_Lyap_h_opc_var3( int m_AC,
344 scomplex* buff_sgn,
345 scomplex* buff_A, int rs_A, int cs_A,
346 scomplex* buff_W, int rs_W, int cs_W,
347 scomplex* buff_C, int rs_C, int cs_C )
348 {
349 scomplex* buff_1 = FLA_COMPLEX_PTR( FLA_ONE );
350 scomplex* buff_m1 = FLA_COMPLEX_PTR( FLA_MINUS_ONE );
351 int i;
352
353 bl1_cscalm( BLIS1_NO_CONJUGATE,
354 m_AC,
355 m_AC,
356 buff_sgn,
357 buff_C, rs_C, cs_C );
358
359 for ( i = 0; i < m_AC; ++i )
360 {
361 scomplex* a01 = buff_A + (i )*cs_A + (0 )*rs_A;
362 scomplex* alpha11 = buff_A + (i )*cs_A + (i )*rs_A;
363 scomplex* A02 = buff_A + (i+1)*cs_A + (0 )*rs_A;
364 scomplex* a12t = buff_A + (i+1)*cs_A + (i )*rs_A;
365 scomplex* A22 = buff_A + (i+1)*cs_A + (i+1)*rs_A;
366
367 scomplex* c01 = buff_C + (i )*cs_C + (0 )*rs_C;
368 scomplex* gamma11 = buff_C + (i )*cs_C + (i )*rs_C;
369 scomplex* C02 = buff_C + (i+1)*cs_C + (0 )*rs_C;
370 scomplex* c12t = buff_C + (i+1)*cs_C + (i )*rs_C;
371
372 scomplex* W22 = buff_W + (i+1)*cs_W + (i+1)*rs_W;
373
374 scomplex omega;
375
376 int m_behind = i;
377 int m_ahead = m_AC - i - 1;
378
379 /*------------------------------------------------------------*/
380
381 // FLA_Dot2cs( FLA_CONJUGATE, FLA_MINUS_ONE, a01, c01, FLA_ONE, gamma11 );
382 bl1_cdot2s( BLIS1_CONJUGATE,
383 m_behind,
384 buff_m1,
385 a01, rs_A,
386 c01, rs_C,
387 buff_1,
388 gamma11 );
389
390 // FLA_Copyt( FLA_CONJ_NO_TRANSPOSE, alpha11, omega );
391 // FLA_Mult_add( FLA_ONE, alpha11, omega );
392 // FLA_Inv_scal( omega, gamma11 );
393 bl1_ccopyconj( alpha11, &omega );
394 bl1_cadd3( alpha11, &omega, &omega );
395 bl1_cinvscals( &omega, gamma11 );
396
397 // FLA_Axpys( FLA_MINUS_ONE, gamma11, a12t, FLA_ONE, c12t );
398 // FLA_Gemvc( FLA_TRANSPOSE, FLA_CONJUGATE, FLA_MINUS_ONE, A02, c01, FLA_ONE, c12t );
399 // FLA_Gemvc( FLA_TRANSPOSE, FLA_CONJUGATE, FLA_MINUS_ONE, C02, a01, FLA_ONE, c12t );
400 bl1_caxpysv( m_ahead,
401 buff_m1,
402 gamma11,
403 a12t, cs_A,
404 buff_1,
405 c12t, cs_C );
406
407 bl1_cgemv( BLIS1_TRANSPOSE,
408 BLIS1_CONJUGATE,
409 m_behind,
410 m_ahead,
411 buff_m1,
412 A02, rs_A, cs_A,
413 c01, rs_C,
414 buff_1,
415 c12t, cs_C );
416
417 bl1_cgemv( BLIS1_TRANSPOSE,
418 BLIS1_CONJUGATE,
419 m_behind,
420 m_ahead,
421 buff_m1,
422 C02, rs_C, cs_C,
423 a01, rs_A,
424 buff_1,
425 c12t, cs_C );
426
427 // FLA_Copyrt( FLA_UPPER_TRIANGULAR, FLA_NO_TRANSPOSE, A22, W22 );
428 // FLA_Shift_diag( FLA_CONJUGATE, alpha11, W22 );
429 // FLA_Trsv( FLA_UPPER_TRIANGULAR, FLA_TRANSPOSE, FLA_NONUNIT_DIAG, W22, c12t );
430 bl1_ccopymrt( BLIS1_UPPER_TRIANGULAR,
431 BLIS1_NO_TRANSPOSE,
432 m_ahead,
433 m_ahead,
434 A22, rs_A, cs_A,
435 W22, rs_W, cs_W );
436
437 bl1_cshiftdiag( BLIS1_CONJUGATE,
438 0,
439 m_ahead,
440 m_ahead,
441 alpha11,
442 W22, rs_W, cs_W );
443
444 bl1_ctrsv( BLIS1_UPPER_TRIANGULAR,
445 BLIS1_TRANSPOSE,
446 BLIS1_NONUNIT_DIAG,
447 m_ahead,
448 W22, rs_W, cs_W,
449 c12t, cs_C );
450
451 /*------------------------------------------------------------*/
452 }
453
454 return FLA_SUCCESS;
455 }
456
457
458
FLA_Lyap_h_opz_var3(int m_AC,dcomplex * buff_sgn,dcomplex * buff_A,int rs_A,int cs_A,dcomplex * buff_W,int rs_W,int cs_W,dcomplex * buff_C,int rs_C,int cs_C)459 FLA_Error FLA_Lyap_h_opz_var3( int m_AC,
460 dcomplex* buff_sgn,
461 dcomplex* buff_A, int rs_A, int cs_A,
462 dcomplex* buff_W, int rs_W, int cs_W,
463 dcomplex* buff_C, int rs_C, int cs_C )
464 {
465 dcomplex* buff_1 = FLA_DOUBLE_COMPLEX_PTR( FLA_ONE );
466 dcomplex* buff_m1 = FLA_DOUBLE_COMPLEX_PTR( FLA_MINUS_ONE );
467 int i;
468
469 bl1_zscalm( BLIS1_NO_CONJUGATE,
470 m_AC,
471 m_AC,
472 buff_sgn,
473 buff_C, rs_C, cs_C );
474
475 for ( i = 0; i < m_AC; ++i )
476 {
477 dcomplex* a01 = buff_A + (i )*cs_A + (0 )*rs_A;
478 dcomplex* alpha11 = buff_A + (i )*cs_A + (i )*rs_A;
479 dcomplex* A02 = buff_A + (i+1)*cs_A + (0 )*rs_A;
480 dcomplex* a12t = buff_A + (i+1)*cs_A + (i )*rs_A;
481 dcomplex* A22 = buff_A + (i+1)*cs_A + (i+1)*rs_A;
482
483 dcomplex* c01 = buff_C + (i )*cs_C + (0 )*rs_C;
484 dcomplex* gamma11 = buff_C + (i )*cs_C + (i )*rs_C;
485 dcomplex* C02 = buff_C + (i+1)*cs_C + (0 )*rs_C;
486 dcomplex* c12t = buff_C + (i+1)*cs_C + (i )*rs_C;
487
488 dcomplex* W22 = buff_W + (i+1)*cs_W + (i+1)*rs_W;
489
490 dcomplex omega;
491
492 int m_behind = i;
493 int m_ahead = m_AC - i - 1;
494
495 /*------------------------------------------------------------*/
496
497 // FLA_Dot2cs( FLA_CONJUGATE, FLA_MINUS_ONE, a01, c01, FLA_ONE, gamma11 );
498 bl1_zdot2s( BLIS1_CONJUGATE,
499 m_behind,
500 buff_m1,
501 a01, rs_A,
502 c01, rs_C,
503 buff_1,
504 gamma11 );
505
506 // FLA_Copyt( FLA_CONJ_NO_TRANSPOSE, alpha11, omega );
507 // FLA_Mult_add( FLA_ONE, alpha11, omega );
508 // FLA_Inv_scal( omega, gamma11 );
509 bl1_zcopyconj( alpha11, &omega );
510 bl1_zadd3( alpha11, &omega, &omega );
511 bl1_zinvscals( &omega, gamma11 );
512
513 // FLA_Axpys( FLA_MINUS_ONE, gamma11, a12t, FLA_ONE, c12t );
514 // FLA_Gemvc( FLA_TRANSPOSE, FLA_CONJUGATE, FLA_MINUS_ONE, A02, c01, FLA_ONE, c12t );
515 // FLA_Gemvc( FLA_TRANSPOSE, FLA_CONJUGATE, FLA_MINUS_ONE, C02, a01, FLA_ONE, c12t );
516 bl1_zaxpysv( m_ahead,
517 buff_m1,
518 gamma11,
519 a12t, cs_A,
520 buff_1,
521 c12t, cs_C );
522
523 bl1_zgemv( BLIS1_TRANSPOSE,
524 BLIS1_CONJUGATE,
525 m_behind,
526 m_ahead,
527 buff_m1,
528 A02, rs_A, cs_A,
529 c01, rs_C,
530 buff_1,
531 c12t, cs_C );
532
533 bl1_zgemv( BLIS1_TRANSPOSE,
534 BLIS1_CONJUGATE,
535 m_behind,
536 m_ahead,
537 buff_m1,
538 C02, rs_C, cs_C,
539 a01, rs_A,
540 buff_1,
541 c12t, cs_C );
542
543 // FLA_Copyrt( FLA_UPPER_TRIANGULAR, FLA_NO_TRANSPOSE, A22, W22 );
544 // FLA_Shift_diag( FLA_CONJUGATE, alpha11, W22 );
545 // FLA_Trsv( FLA_UPPER_TRIANGULAR, FLA_TRANSPOSE, FLA_NONUNIT_DIAG, W22, c12t );
546 bl1_zcopymrt( BLIS1_UPPER_TRIANGULAR,
547 BLIS1_NO_TRANSPOSE,
548 m_ahead,
549 m_ahead,
550 A22, rs_A, cs_A,
551 W22, rs_W, cs_W );
552
553 bl1_zshiftdiag( BLIS1_CONJUGATE,
554 0,
555 m_ahead,
556 m_ahead,
557 alpha11,
558 W22, rs_W, cs_W );
559
560 bl1_ztrsv( BLIS1_UPPER_TRIANGULAR,
561 BLIS1_TRANSPOSE,
562 BLIS1_NONUNIT_DIAG,
563 m_ahead,
564 W22, rs_W, cs_W,
565 c12t, cs_C );
566
567 /*------------------------------------------------------------*/
568 }
569
570 return FLA_SUCCESS;
571 }
572
573