1 2 // ================================================================================================= 3 // This file is part of the CLBlast project. The project is licensed under Apache Version 2.0. This 4 // project loosely follows the Google C++ styleguide and uses a tab-size of two spaces and a max- 5 // width of 100 characters per line. 6 // 7 // Author(s): 8 // Cedric Nugteren <www.cedricnugteren.nl> 9 // 10 // This file implements a class with static methods to describe the Xhemm routine. Examples of 11 // such 'descriptions' are how to calculate the size a of buffer or how to run the routine. These 12 // static methods are used by the correctness tester and the performance tester. 13 // 14 // ================================================================================================= 15 16 #ifndef CLBLAST_TEST_ROUTINES_XHEMM_H_ 17 #define CLBLAST_TEST_ROUTINES_XHEMM_H_ 18 19 #include "test/routines/common.hpp" 20 21 namespace clblast { 22 // ================================================================================================= 23 24 // See comment at top of file for a description of the class 25 template <typename T> 26 class TestXhemm { 27 public: 28 29 // The BLAS level: 1, 2, or 3 BLASLevel()30 static size_t BLASLevel() { return 3; } 31 32 // The list of arguments relevant for this routine GetOptions()33 static std::vector<std::string> GetOptions() { 34 return {kArgM, kArgN, 35 kArgLayout, kArgSide, kArgTriangle, 36 kArgALeadDim, kArgBLeadDim, kArgCLeadDim, 37 kArgAOffset, kArgBOffset, kArgCOffset, 38 kArgAlpha, kArgBeta}; 39 } BuffersIn()40 static std::vector<std::string> BuffersIn() { return {kBufMatA, kBufMatB, kBufMatC}; } BuffersOut()41 static std::vector<std::string> BuffersOut() { return {kBufMatC}; } 42 43 // Describes how to obtain the sizes of the buffers GetSizeA(const Arguments<T> & args)44 static size_t GetSizeA(const Arguments<T> &args) { 45 size_t k_value = (args.side == Side::kLeft) ? args.m : args.n; 46 auto a_rotated = (args.layout == Layout::kRowMajor); 47 auto a_two = (a_rotated) ? args.m : k_value; 48 return a_two * args.a_ld + args.a_offset; 49 } GetSizeB(const Arguments<T> & args)50 static size_t GetSizeB(const Arguments<T> &args) { 51 size_t k_value = (args.side == Side::kLeft) ? args.m : args.n; 52 auto b_rotated = (args.layout == Layout::kRowMajor); 53 auto b_two = (b_rotated) ? k_value : args.n; 54 return b_two * args.b_ld + args.b_offset; 55 } GetSizeC(const Arguments<T> & args)56 static size_t GetSizeC(const Arguments<T> &args) { 57 auto c_rotated = (args.layout == Layout::kRowMajor); 58 auto c_two = (c_rotated) ? args.m : args.n; 59 return c_two * args.c_ld + args.c_offset; 60 } 61 62 // Describes how to set the sizes of all the buffers SetSizes(Arguments<T> & args)63 static void SetSizes(Arguments<T> &args) { 64 args.a_size = GetSizeA(args); 65 args.b_size = GetSizeB(args); 66 args.c_size = GetSizeC(args); 67 } 68 69 // Describes what the default values of the leading dimensions of the matrices are DefaultLDA(const Arguments<T> & args)70 static size_t DefaultLDA(const Arguments<T> &args) { return args.m; } DefaultLDB(const Arguments<T> & args)71 static size_t DefaultLDB(const Arguments<T> &args) { return args.n; } DefaultLDC(const Arguments<T> & args)72 static size_t DefaultLDC(const Arguments<T> &args) { return args.n; } 73 74 // Describes which transpose options are relevant for this routine 75 using Transposes = std::vector<Transpose>; GetATransposes(const Transposes &)76 static Transposes GetATransposes(const Transposes &) { return {}; } // N/A for this routine GetBTransposes(const Transposes &)77 static Transposes GetBTransposes(const Transposes &) { return {}; } // N/A for this routine 78 79 // Describes how to prepare the input data PrepareData(const Arguments<T> &,Queue &,const int,std::vector<T> &,std::vector<T> &,std::vector<T> &,std::vector<T> &,std::vector<T> &,std::vector<T> &,std::vector<T> &)80 static void PrepareData(const Arguments<T>&, Queue&, const int, std::vector<T>&, 81 std::vector<T>&, std::vector<T>&, std::vector<T>&, std::vector<T>&, 82 std::vector<T>&, std::vector<T>&) {} // N/A for this routine 83 84 // Describes how to run the CLBlast routine RunRoutine(const Arguments<T> & args,Buffers<T> & buffers,Queue & queue)85 static StatusCode RunRoutine(const Arguments<T> &args, Buffers<T> &buffers, Queue &queue) { 86 auto queue_plain = queue(); 87 auto event = cl_event{}; 88 auto status = Hemm(args.layout, args.side, args.triangle, 89 args.m, args.n, args.alpha, 90 buffers.a_mat(), args.a_offset, args.a_ld, 91 buffers.b_mat(), args.b_offset, args.b_ld, args.beta, 92 buffers.c_mat(), args.c_offset, args.c_ld, 93 &queue_plain, &event); 94 if (status == StatusCode::kSuccess) { clWaitForEvents(1, &event); clReleaseEvent(event); } 95 return status; 96 } 97 98 // Describes how to run the clBLAS routine (for correctness/performance comparison) 99 #ifdef CLBLAST_REF_CLBLAS RunReference1(const Arguments<T> & args,Buffers<T> & buffers,Queue & queue)100 static StatusCode RunReference1(const Arguments<T> &args, Buffers<T> &buffers, Queue &queue) { 101 auto queue_plain = queue(); 102 auto event = cl_event{}; 103 auto status = clblasXhemm(convertToCLBLAS(args.layout), 104 convertToCLBLAS(args.side), 105 convertToCLBLAS(args.triangle), 106 args.m, args.n, args.alpha, 107 buffers.a_mat, args.a_offset, args.a_ld, 108 buffers.b_mat, args.b_offset, args.b_ld, args.beta, 109 buffers.c_mat, args.c_offset, args.c_ld, 110 1, &queue_plain, 0, nullptr, &event); 111 clWaitForEvents(1, &event); 112 return static_cast<StatusCode>(status); 113 } 114 #endif 115 116 // Describes how to run the CPU BLAS routine (for correctness/performance comparison) 117 #ifdef CLBLAST_REF_CBLAS RunReference2(const Arguments<T> & args,BuffersHost<T> & buffers_host,Queue &)118 static StatusCode RunReference2(const Arguments<T> &args, BuffersHost<T> &buffers_host, Queue &) { 119 cblasXhemm(convertToCBLAS(args.layout), 120 convertToCBLAS(args.side), 121 convertToCBLAS(args.triangle), 122 args.m, args.n, args.alpha, 123 buffers_host.a_mat, args.a_offset, args.a_ld, 124 buffers_host.b_mat, args.b_offset, args.b_ld, args.beta, 125 buffers_host.c_mat, args.c_offset, args.c_ld); 126 return StatusCode::kSuccess; 127 } 128 #endif 129 130 // Describes how to run the cuBLAS routine (for correctness/performance comparison) 131 #ifdef CLBLAST_REF_CUBLAS RunReference3(const Arguments<T> & args,BuffersCUDA<T> & buffers,Queue &)132 static StatusCode RunReference3(const Arguments<T> &args, BuffersCUDA<T> &buffers, Queue &) { 133 auto status = cublasXhemm(reinterpret_cast<cublasHandle_t>(args.cublas_handle), args.layout, 134 convertToCUBLAS(args.side), 135 convertToCUBLAS(args.triangle), 136 args.m, args.n, args.alpha, 137 buffers.a_mat, args.a_offset, args.a_ld, 138 buffers.b_mat, args.b_offset, args.b_ld, args.beta, 139 buffers.c_mat, args.c_offset, args.c_ld); 140 if (status == CUBLAS_STATUS_SUCCESS) { return StatusCode::kSuccess; } else { return StatusCode::kUnknownError; } 141 } 142 #endif 143 144 // Describes how to download the results of the computation (more importantly: which buffer) DownloadResult(const Arguments<T> & args,Buffers<T> & buffers,Queue & queue)145 static std::vector<T> DownloadResult(const Arguments<T> &args, Buffers<T> &buffers, Queue &queue) { 146 std::vector<T> result(args.c_size, static_cast<T>(0)); 147 buffers.c_mat.Read(queue, args.c_size, result); 148 return result; 149 } 150 151 // Describes how to compute the indices of the result buffer ResultID1(const Arguments<T> & args)152 static size_t ResultID1(const Arguments<T> &args) { return args.m; } ResultID2(const Arguments<T> & args)153 static size_t ResultID2(const Arguments<T> &args) { return args.n; } GetResultIndex(const Arguments<T> & args,const size_t id1,const size_t id2)154 static size_t GetResultIndex(const Arguments<T> &args, const size_t id1, const size_t id2) { 155 return (args.layout == Layout::kRowMajor) ? 156 id1*args.c_ld + id2 + args.c_offset: 157 id2*args.c_ld + id1 + args.c_offset; 158 } 159 160 // Describes how to compute performance metrics GetFlops(const Arguments<T> & args)161 static size_t GetFlops(const Arguments<T> &args) { 162 return 2 * args.m * args.n * args.m; 163 } GetBytes(const Arguments<T> & args)164 static size_t GetBytes(const Arguments<T> &args) { 165 return (args.m*args.m + args.m*args.n + 2*args.m*args.n) * sizeof(T); 166 } 167 }; 168 169 // ================================================================================================= 170 } // namespace clblast 171 172 // CLBLAST_TEST_ROUTINES_XHEMM_H_ 173 #endif 174