1 /*
2
3 Copyright (C) 2014, The University of Texas at Austin
4
5 This file is part of libflame and is available under the 3-Clause
6 BSD license, which can be found in the LICENSE file at the top-level
7 directory, or at http://opensource.org/licenses/BSD-3-Clause
8
9 */
10 #include "FLAME.h"
11
12 #define FLA_ALG_REFERENCE 0
13 #define FLA_ALG_FRONT 1
14
15
16 void time_Apply_QUD_UT_inc(
17 int n_repeats, int mB, int mC, int mD, int n, int n_rhs, dim_t b_alg,
18 FLA_Obj R_BC, FLA_Obj R_BD, FLA_Obj C, FLA_Obj D, FLA_Obj T, FLA_Obj W,
19 FLA_Obj bR_BC, FLA_Obj bR_BD, FLA_Obj bC, FLA_Obj bD,
20 double *dtime, double *diff, double *gflops );
21
time_Apply_QUD_UT_inc(int n_repeats,int mB,int mC,int mD,int n,int n_rhs,dim_t b_alg,FLA_Obj R_BC,FLA_Obj R_BD,FLA_Obj C,FLA_Obj D,FLA_Obj T,FLA_Obj W,FLA_Obj bR_BC,FLA_Obj bR_BD,FLA_Obj bC,FLA_Obj bD,double * dtime,double * diff,double * gflops)22 void time_Apply_QUD_UT_inc(
23 int n_repeats, int mB, int mC, int mD, int n, int n_rhs, dim_t b_alg,
24 FLA_Obj R_BC, FLA_Obj R_BD, FLA_Obj C, FLA_Obj D, FLA_Obj T, FLA_Obj W,
25 FLA_Obj bR_BC, FLA_Obj bR_BD, FLA_Obj bC, FLA_Obj bD,
26 double *dtime, double *diff, double *gflops )
27 {
28 int
29 irep;
30
31 double
32 dtime_old = 1.0e9;
33
34 FLA_Obj
35 bR_BD_save, bC_save, bD_save;
36
37 FLASH_Obj_create_copy_of( FLA_NO_TRANSPOSE, bR_BD, &bR_BD_save );
38 FLASH_Obj_create_copy_of( FLA_NO_TRANSPOSE, bC, &bC_save );
39 FLASH_Obj_create_copy_of( FLA_NO_TRANSPOSE, bD, &bD_save );
40
41 for ( irep = 0 ; irep < n_repeats; irep++ ){
42
43 FLASH_Copy( bR_BD_save, bR_BD );
44 FLASH_Copy( bC_save, bC );
45 FLASH_Copy( bD_save, bD );
46
47 *dtime = FLA_Clock();
48
49 FLASH_Apply_QUD_UT_inc( FLA_LEFT, FLA_CONJ_TRANSPOSE, FLA_FORWARD, FLA_COLUMNWISE,
50 T, W,
51 bR_BD,
52 C, bC,
53 D, bD );
54
55 *dtime = FLA_Clock() - *dtime;
56 dtime_old = min( *dtime, dtime_old );
57
58 }
59
60 {
61
62 FLASH_Trsm( FLA_LEFT, FLA_UPPER_TRIANGULAR, FLA_NO_TRANSPOSE, FLA_NONUNIT_DIAG,
63 FLA_ONE, R_BD, bR_BD );
64
65 FLASH_Trsm( FLA_LEFT, FLA_UPPER_TRIANGULAR, FLA_NO_TRANSPOSE, FLA_NONUNIT_DIAG,
66 FLA_ONE, R_BC, bR_BC );
67
68 *diff = FLASH_Max_elemwise_diff( bR_BD, bR_BC );
69 }
70
71 *gflops = n * n_rhs * ( 2.0 * mC + 2.0 * mD + 0.5 * b_alg + 0.5 ) /
72 dtime_old / 1e9;
73 if ( FLA_Obj_is_complex( R_BD ) )
74 *gflops *= 4.0;
75
76 *dtime = dtime_old;
77
78 FLASH_Obj_free( &bR_BD_save );
79 FLASH_Obj_free( &bC_save );
80 FLASH_Obj_free( &bD_save );
81 }
82
83