1 /*
2 
3     Copyright (C) 2014, The University of Texas at Austin
4 
5     This file is part of libflame and is available under the 3-Clause
6     BSD license, which can be found in the LICENSE file at the top-level
7     directory, or at http://opensource.org/licenses/BSD-3-Clause
8 
9 */
10 #include "FLAME.h"
11 
12 #define FLA_ALG_REFERENCE 0
13 #define FLA_ALG_FRONT     1
14 
15 
16 void time_Apply_QUD_UT_inc(
17                  int n_repeats, int mB, int mC, int mD, int n, int n_rhs, dim_t b_alg,
18                  FLA_Obj R_BC, FLA_Obj R_BD, FLA_Obj C, FLA_Obj D, FLA_Obj T, FLA_Obj W,
19                  FLA_Obj bR_BC, FLA_Obj bR_BD, FLA_Obj bC, FLA_Obj bD,
20                  double *dtime, double *diff, double *gflops );
21 
time_Apply_QUD_UT_inc(int n_repeats,int mB,int mC,int mD,int n,int n_rhs,dim_t b_alg,FLA_Obj R_BC,FLA_Obj R_BD,FLA_Obj C,FLA_Obj D,FLA_Obj T,FLA_Obj W,FLA_Obj bR_BC,FLA_Obj bR_BD,FLA_Obj bC,FLA_Obj bD,double * dtime,double * diff,double * gflops)22 void time_Apply_QUD_UT_inc(
23                  int n_repeats, int mB, int mC, int mD, int n, int n_rhs, dim_t b_alg,
24                  FLA_Obj R_BC, FLA_Obj R_BD, FLA_Obj C, FLA_Obj D, FLA_Obj T, FLA_Obj W,
25                  FLA_Obj bR_BC, FLA_Obj bR_BD, FLA_Obj bC, FLA_Obj bD,
26                  double *dtime, double *diff, double *gflops )
27 {
28   int
29     irep;
30 
31   double
32     dtime_old = 1.0e9;
33 
34   FLA_Obj
35     bR_BD_save, bC_save, bD_save;
36 
37   FLASH_Obj_create_copy_of( FLA_NO_TRANSPOSE, bR_BD, &bR_BD_save );
38   FLASH_Obj_create_copy_of( FLA_NO_TRANSPOSE, bC, &bC_save );
39   FLASH_Obj_create_copy_of( FLA_NO_TRANSPOSE, bD, &bD_save );
40 
41   for ( irep = 0 ; irep < n_repeats; irep++ ){
42 
43     FLASH_Copy( bR_BD_save, bR_BD );
44     FLASH_Copy( bC_save, bC );
45     FLASH_Copy( bD_save, bD );
46 
47     *dtime = FLA_Clock();
48 
49     FLASH_Apply_QUD_UT_inc( FLA_LEFT, FLA_CONJ_TRANSPOSE, FLA_FORWARD, FLA_COLUMNWISE,
50                             T, W,
51                                bR_BD,
52                             C, bC,
53                             D, bD );
54 
55     *dtime = FLA_Clock() - *dtime;
56     dtime_old = min( *dtime, dtime_old );
57 
58   }
59 
60   {
61 
62     FLASH_Trsm( FLA_LEFT, FLA_UPPER_TRIANGULAR, FLA_NO_TRANSPOSE, FLA_NONUNIT_DIAG,
63                 FLA_ONE, R_BD, bR_BD );
64 
65     FLASH_Trsm( FLA_LEFT, FLA_UPPER_TRIANGULAR, FLA_NO_TRANSPOSE, FLA_NONUNIT_DIAG,
66                 FLA_ONE, R_BC, bR_BC );
67 
68     *diff = FLASH_Max_elemwise_diff( bR_BD, bR_BC );
69   }
70 
71   *gflops = n * n_rhs * ( 2.0 * mC + 2.0 * mD + 0.5 * b_alg + 0.5 ) /
72             dtime_old / 1e9;
73   if ( FLA_Obj_is_complex( R_BD ) )
74     *gflops *= 4.0;
75 
76   *dtime = dtime_old;
77 
78   FLASH_Obj_free( &bR_BD_save );
79   FLASH_Obj_free( &bC_save );
80   FLASH_Obj_free( &bD_save );
81 }
82 
83