1 2 // ================================================================================================= 3 // This file is part of the CLBlast project. The project is licensed under Apache Version 2.0. This 4 // project loosely follows the Google C++ styleguide and uses a tab-size of two spaces and a max- 5 // width of 100 characters per line. 6 // 7 // Author(s): 8 // Cedric Nugteren <www.cedricnugteren.nl> 9 // 10 // This file implements a class with static methods to describe the Xspmv routine. Examples of 11 // such 'descriptions' are how to calculate the size a of buffer or how to run the routine. These 12 // static methods are used by the correctness tester and the performance tester. 13 // 14 // ================================================================================================= 15 16 #ifndef CLBLAST_TEST_ROUTINES_XSPMV_H_ 17 #define CLBLAST_TEST_ROUTINES_XSPMV_H_ 18 19 #include "test/routines/common.hpp" 20 21 namespace clblast { 22 // ================================================================================================= 23 24 // See comment at top of file for a description of the class 25 template <typename T> 26 class TestXspmv { 27 public: 28 29 // The BLAS level: 1, 2, or 3 BLASLevel()30 static size_t BLASLevel() { return 2; } 31 32 // The list of arguments relevant for this routine GetOptions()33 static std::vector<std::string> GetOptions() { 34 return {kArgN, 35 kArgLayout, kArgTriangle, 36 kArgXInc, kArgYInc, 37 kArgAPOffset, kArgXOffset, kArgYOffset, 38 kArgAlpha, kArgBeta}; 39 } BuffersIn()40 static std::vector<std::string> BuffersIn() { return {kBufMatAP, kBufVecX, kBufVecY}; } BuffersOut()41 static std::vector<std::string> BuffersOut() { return {kBufVecY}; } 42 43 // Describes how to obtain the sizes of the buffers GetSizeX(const Arguments<T> & args)44 static size_t GetSizeX(const Arguments<T> &args) { 45 return args.n * args.x_inc + args.x_offset; 46 } GetSizeY(const Arguments<T> & args)47 static size_t GetSizeY(const Arguments<T> &args) { 48 return args.n * args.y_inc + args.y_offset; 49 } GetSizeAP(const Arguments<T> & args)50 static size_t GetSizeAP(const Arguments<T> &args) { 51 return ((args.n*(args.n+1)) / 2) + args.ap_offset; 52 } 53 54 // Describes how to set the sizes of all the buffers SetSizes(Arguments<T> & args)55 static void SetSizes(Arguments<T> &args) { 56 args.ap_size = GetSizeAP(args); 57 args.x_size = GetSizeX(args); 58 args.y_size = GetSizeY(args); 59 } 60 61 // Describes what the default values of the leading dimensions of the matrices are DefaultLDA(const Arguments<T> &)62 static size_t DefaultLDA(const Arguments<T> &) { return 1; } // N/A for this routine DefaultLDB(const Arguments<T> &)63 static size_t DefaultLDB(const Arguments<T> &) { return 1; } // N/A for this routine DefaultLDC(const Arguments<T> &)64 static size_t DefaultLDC(const Arguments<T> &) { return 1; } // N/A for this routine 65 66 // Describes which transpose options are relevant for this routine 67 using Transposes = std::vector<Transpose>; GetATransposes(const Transposes &)68 static Transposes GetATransposes(const Transposes &) { return {}; } // N/A for this routine GetBTransposes(const Transposes &)69 static Transposes GetBTransposes(const Transposes &) { return {}; } // N/A for this routine 70 71 // Describes how to prepare the input data PrepareData(const Arguments<T> &,Queue &,const int,std::vector<T> &,std::vector<T> &,std::vector<T> &,std::vector<T> &,std::vector<T> &,std::vector<T> &,std::vector<T> &)72 static void PrepareData(const Arguments<T>&, Queue&, const int, std::vector<T>&, 73 std::vector<T>&, std::vector<T>&, std::vector<T>&, std::vector<T>&, 74 std::vector<T>&, std::vector<T>&) {} // N/A for this routine 75 76 // Describes how to run the CLBlast routine RunRoutine(const Arguments<T> & args,Buffers<T> & buffers,Queue & queue)77 static StatusCode RunRoutine(const Arguments<T> &args, Buffers<T> &buffers, Queue &queue) { 78 auto queue_plain = queue(); 79 auto event = cl_event{}; 80 auto status = Spmv(args.layout, args.triangle, 81 args.n, args.alpha, 82 buffers.ap_mat(), args.ap_offset, 83 buffers.x_vec(), args.x_offset, args.x_inc, args.beta, 84 buffers.y_vec(), args.y_offset, args.y_inc, 85 &queue_plain, &event); 86 if (status == StatusCode::kSuccess) { clWaitForEvents(1, &event); clReleaseEvent(event); } 87 return status; 88 } 89 90 // Describes how to run the clBLAS routine (for correctness/performance comparison) 91 #ifdef CLBLAST_REF_CLBLAS RunReference1(const Arguments<T> & args,Buffers<T> & buffers,Queue & queue)92 static StatusCode RunReference1(const Arguments<T> &args, Buffers<T> &buffers, Queue &queue) { 93 auto queue_plain = queue(); 94 auto event = cl_event{}; 95 auto status = clblasXspmv(convertToCLBLAS(args.layout), 96 convertToCLBLAS(args.triangle), 97 args.n, args.alpha, 98 buffers.ap_mat, args.ap_offset, 99 buffers.x_vec, args.x_offset, args.x_inc, args.beta, 100 buffers.y_vec, args.y_offset, args.y_inc, 101 1, &queue_plain, 0, nullptr, &event); 102 clWaitForEvents(1, &event); 103 return static_cast<StatusCode>(status); 104 } 105 #endif 106 107 // Describes how to run the CPU BLAS routine (for correctness/performance comparison) 108 #ifdef CLBLAST_REF_CBLAS RunReference2(const Arguments<T> & args,BuffersHost<T> & buffers_host,Queue &)109 static StatusCode RunReference2(const Arguments<T> &args, BuffersHost<T> &buffers_host, Queue &) { 110 cblasXspmv(convertToCBLAS(args.layout), 111 convertToCBLAS(args.triangle), 112 args.n, args.alpha, 113 buffers_host.ap_mat, args.ap_offset, 114 buffers_host.x_vec, args.x_offset, args.x_inc, args.beta, 115 buffers_host.y_vec, args.y_offset, args.y_inc); 116 return StatusCode::kSuccess; 117 } 118 #endif 119 120 // Describes how to run the cuBLAS routine (for correctness/performance comparison) 121 #ifdef CLBLAST_REF_CUBLAS RunReference3(const Arguments<T> & args,BuffersCUDA<T> & buffers,Queue &)122 static StatusCode RunReference3(const Arguments<T> &args, BuffersCUDA<T> &buffers, Queue &) { 123 auto status = cublasXspmv(reinterpret_cast<cublasHandle_t>(args.cublas_handle), args.layout, 124 convertToCUBLAS(args.triangle), 125 args.n, args.alpha, 126 buffers.ap_mat, args.ap_offset, 127 buffers.x_vec, args.x_offset, args.x_inc, args.beta, 128 buffers.y_vec, args.y_offset, args.y_inc); 129 if (status == CUBLAS_STATUS_SUCCESS) { return StatusCode::kSuccess; } else { return StatusCode::kUnknownError; } 130 } 131 #endif 132 133 // Describes how to download the results of the computation (more importantly: which buffer) DownloadResult(const Arguments<T> & args,Buffers<T> & buffers,Queue & queue)134 static std::vector<T> DownloadResult(const Arguments<T> &args, Buffers<T> &buffers, Queue &queue) { 135 std::vector<T> result(args.y_size, static_cast<T>(0)); 136 buffers.y_vec.Read(queue, args.y_size, result); 137 return result; 138 } 139 140 // Describes how to compute the indices of the result buffer ResultID1(const Arguments<T> & args)141 static size_t ResultID1(const Arguments<T> &args) { 142 return args.n; 143 } ResultID2(const Arguments<T> &)144 static size_t ResultID2(const Arguments<T> &) { return 1; } // N/A for this routine GetResultIndex(const Arguments<T> & args,const size_t id1,const size_t)145 static size_t GetResultIndex(const Arguments<T> &args, const size_t id1, const size_t) { 146 return id1*args.y_inc + args.y_offset; 147 } 148 149 // Describes how to compute performance metrics GetFlops(const Arguments<T> & args)150 static size_t GetFlops(const Arguments<T> &args) { 151 return 2 * args.n * args.n; 152 } GetBytes(const Arguments<T> & args)153 static size_t GetBytes(const Arguments<T> &args) { 154 return (((args.n*(args.n+1)) / 2) + 2*args.n + args.n) * sizeof(T); 155 } 156 }; 157 158 // ================================================================================================= 159 } // namespace clblast 160 161 // CLBLAST_TEST_ROUTINES_XSPMV_H_ 162 #endif 163