1 /* ************************************************************************
2 * Copyright 2013 Advanced Micro Devices, Inc.
3 *
4 * Licensed under the Apache License, Version 2.0 (the "License");
5 * you may not use this file except in compliance with the License.
6 * You may obtain a copy of the License at
7 *
8 * http://www.apache.org/licenses/LICENSE-2.0
9 *
10 * Unless required by applicable law or agreed to in writing, software
11 * distributed under the License is distributed on an "AS IS" BASIS,
12 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 * See the License for the specific language governing permissions and
14 * limitations under the License.
15 * ************************************************************************/
16
17 #ifndef CLBLAS_BENCHMARK_COMMON_HXX__
18 #define CLBLAS_BENCHMARK_COMMON_HXX__
19
20 #include <string>
21 #include <iostream>
22 #include <sstream>
23 #include <stdexcept>
24 #include <cstdlib>
25
26 #include "blas-math.h"
27 #include "test-limits.h"
28 #include "dis_warning.h"
29
30 #include "clBLAS.h"
31 #if defined(__APPLE__) || defined(__MACOSX)
32 #include <OpenCL/cl_ext.h>
33 #else
34 #include <CL/cl_ext.h>
35 #endif
36
37 template<typename T>
38 static T
makeScalar(double val)39 makeScalar(double val)
40 {
41 return static_cast<T>(val);
42 }
43
44 template<>
45 __template_static FloatComplex
makeScalar(double val)46 makeScalar(double val)
47 {
48 FloatComplex c;
49
50 c.s[0] = static_cast<float>(val);
51 c.s[1] = 0;
52
53 return c;
54 }
55
56 template<>
57 __template_static DoubleComplex
makeScalar(double val)58 makeScalar(double val)
59 {
60 DoubleComplex c;
61
62 c.s[0] = val;
63 c.s[1] = 0;
64
65 return c;
66 }
67
68 template<typename T>
69 static T
randomScale()70 randomScale()
71 {
72 T t = random<T>(UPPER_BOUND<T>());
73 if (module(t) == 0) {
74 t = t + ONE<T>();
75 }
76
77 return t;
78 }
79
80 std::string
prettyPrintClStatus(const cl_int & status)81 prettyPrintClStatus( const cl_int& status )
82 {
83 switch( status )
84 {
85 case CL_INVALID_GLOBAL_WORK_SIZE:
86 return "CL_INVALID_GLOBAL_WORK_SIZE";
87 case CL_INVALID_MIP_LEVEL:
88 return "CL_INVALID_MIP_LEVEL";
89 case CL_INVALID_BUFFER_SIZE:
90 return "CL_INVALID_BUFFER_SIZE";
91 case CL_INVALID_GL_OBJECT:
92 return "CL_INVALID_GL_OBJECT";
93 case CL_INVALID_OPERATION:
94 return "CL_INVALID_OPERATION";
95 case CL_INVALID_EVENT:
96 return "CL_INVALID_EVENT";
97 case CL_INVALID_EVENT_WAIT_LIST:
98 return "CL_INVALID_EVENT_WAIT_LIST";
99 case CL_INVALID_GLOBAL_OFFSET:
100 return "CL_INVALID_GLOBAL_OFFSET";
101 case CL_INVALID_WORK_ITEM_SIZE:
102 return "CL_INVALID_WORK_ITEM_SIZE";
103 case CL_INVALID_WORK_GROUP_SIZE:
104 return "CL_INVALID_WORK_GROUP_SIZE";
105 case CL_INVALID_WORK_DIMENSION:
106 return "CL_INVALID_WORK_DIMENSION";
107 case CL_INVALID_KERNEL_ARGS:
108 return "CL_INVALID_KERNEL_ARGS";
109 case CL_INVALID_ARG_SIZE:
110 return "CL_INVALID_ARG_SIZE";
111 case CL_INVALID_ARG_VALUE:
112 return "CL_INVALID_ARG_VALUE";
113 case CL_INVALID_ARG_INDEX:
114 return "CL_INVALID_ARG_INDEX";
115 case CL_INVALID_KERNEL:
116 return "CL_INVALID_KERNEL";
117 case CL_INVALID_KERNEL_DEFINITION:
118 return "CL_INVALID_KERNEL_DEFINITION";
119 case CL_INVALID_KERNEL_NAME:
120 return "CL_INVALID_KERNEL_NAME";
121 case CL_INVALID_PROGRAM_EXECUTABLE:
122 return "CL_INVALID_PROGRAM_EXECUTABLE";
123 case CL_INVALID_PROGRAM:
124 return "CL_INVALID_PROGRAM";
125 case CL_INVALID_BUILD_OPTIONS:
126 return "CL_INVALID_BUILD_OPTIONS";
127 case CL_INVALID_BINARY:
128 return "CL_INVALID_BINARY";
129 case CL_INVALID_SAMPLER:
130 return "CL_INVALID_SAMPLER";
131 case CL_INVALID_IMAGE_SIZE:
132 return "CL_INVALID_IMAGE_SIZE";
133 case CL_INVALID_IMAGE_FORMAT_DESCRIPTOR:
134 return "CL_INVALID_IMAGE_FORMAT_DESCRIPTOR";
135 case CL_INVALID_MEM_OBJECT:
136 return "CL_INVALID_MEM_OBJECT";
137 case CL_INVALID_HOST_PTR:
138 return "CL_INVALID_HOST_PTR";
139 case CL_INVALID_COMMAND_QUEUE:
140 return "CL_INVALID_COMMAND_QUEUE";
141 case CL_INVALID_QUEUE_PROPERTIES:
142 return "CL_INVALID_QUEUE_PROPERTIES";
143 case CL_INVALID_CONTEXT:
144 return "CL_INVALID_CONTEXT";
145 case CL_INVALID_DEVICE:
146 return "CL_INVALID_DEVICE";
147 case CL_INVALID_PLATFORM:
148 return "CL_INVALID_PLATFORM";
149 case CL_INVALID_DEVICE_TYPE:
150 return "CL_INVALID_DEVICE_TYPE";
151 case CL_INVALID_VALUE:
152 return "CL_INVALID_VALUE";
153 case CL_MAP_FAILURE:
154 return "CL_MAP_FAILURE";
155 case CL_BUILD_PROGRAM_FAILURE:
156 return "CL_BUILD_PROGRAM_FAILURE";
157 case CL_IMAGE_FORMAT_NOT_SUPPORTED:
158 return "CL_IMAGE_FORMAT_NOT_SUPPORTED";
159 case CL_IMAGE_FORMAT_MISMATCH:
160 return "CL_IMAGE_FORMAT_MISMATCH";
161 case CL_MEM_COPY_OVERLAP:
162 return "CL_MEM_COPY_OVERLAP";
163 case CL_PROFILING_INFO_NOT_AVAILABLE:
164 return "CL_PROFILING_INFO_NOT_AVAILABLE";
165 case CL_OUT_OF_HOST_MEMORY:
166 return "CL_OUT_OF_HOST_MEMORY";
167 case CL_OUT_OF_RESOURCES:
168 return "CL_OUT_OF_RESOURCES";
169 case CL_MEM_OBJECT_ALLOCATION_FAILURE:
170 return "CL_MEM_OBJECT_ALLOCATION_FAILURE";
171 case CL_COMPILER_NOT_AVAILABLE:
172 return "CL_COMPILER_NOT_AVAILABLE";
173 case CL_DEVICE_NOT_AVAILABLE:
174 return "CL_DEVICE_NOT_AVAILABLE";
175 case CL_DEVICE_NOT_FOUND:
176 return "CL_DEVICE_NOT_FOUND";
177 case CL_SUCCESS:
178 return "CL_SUCCESS";
179 default:
180 return "Error code not defined";
181 break;
182 }
183 }
184
185 // This is used to either wrap an OpenCL function call, or to
186 // explicitly check a variable for an OpenCL error condition.
187 // If an error occurs, we throw.
188 // Note: std::runtime_error does not take unicode strings as input, so
189 // only strings supported
190 inline cl_int
OpenCL_V_Throw(cl_int res,const std::string & msg,size_t lineno)191 OpenCL_V_Throw( cl_int res, const std::string& msg, size_t lineno )
192 {
193 switch( res )
194 {
195 case CL_SUCCESS: /**< No error */
196 break;
197 default:
198 {
199 std::stringstream tmp;
200
201 tmp << "OPENCL_V_THROWERROR< ";
202 tmp << prettyPrintClStatus(res) ;
203 tmp << " > (";
204 tmp << lineno;
205 tmp << "): ";
206 tmp << msg;
207 std::string errorm(tmp.str());
208 std::cout << errorm<< std::endl;
209 throw std::runtime_error( errorm );
210 }
211 }
212
213 return res;
214 }
215
216 #define OPENCL_V_THROW(_status,_message) OpenCL_V_Throw(_status, _message, \
217 __LINE__)
218
219 inline cl_ulong
queryMemAllocSize(cl_device_id device_)220 queryMemAllocSize( cl_device_id device_ )
221 {
222 cl_int err;
223 cl_ulong rc = 0;
224
225 err = clGetDeviceInfo(device_, CL_DEVICE_MAX_MEM_ALLOC_SIZE,
226 sizeof(rc), &rc, NULL);
227
228 return rc;
229 }
230
231 class clblasFunc
232 {
233 public:
clblasFunc(StatisticalTimer & _timer,cl_device_type devType)234 clblasFunc(StatisticalTimer& _timer, cl_device_type devType)
235 : timer(_timer)
236 {
237 cl_int err;
238
239 /* Setup OpenCL environment. */
240 OPENCL_V_THROW(clGetPlatformIDs(1, &platform_, NULL),
241 "getting platform IDs");
242 OPENCL_V_THROW(clGetDeviceIDs(platform_, devType, 1,
243 &device_, NULL), "getting device IDs");
244 props_[0] = CL_CONTEXT_PLATFORM;
245 props_[1] = (cl_context_properties)platform_;
246 props_[2] = 0;
247 ctx_ = clCreateContext(props_, 1, &device_, NULL, NULL, &err);
248 OPENCL_V_THROW(err, "creating context");
249 for (unsigned int i = 0; i < numQueues; i++) {
250 queues_[i] = clCreateCommandQueue(ctx_, device_, CL_QUEUE_OUT_OF_ORDER_EXEC_MODE_ENABLE, &err);
251 }
252
253 timer_id = timer.getUniqueID( "clfunc", 0 );
254
255
256 maxMemAllocSize = queryMemAllocSize( device_ );
257
258 /* Setup clblas. */
259 err = clblasSetup();
260 if (err != CL_SUCCESS) {
261 std::cerr << "clblasSetup() failed with %d\n";
262 for (unsigned int i = 0; i < numQueues; i++) {
263 clReleaseCommandQueue(queues_[i]);
264 }
265 clReleaseContext(ctx_);
266 }
267 }
268
~clblasFunc()269 virtual ~clblasFunc()
270 {
271 clblasTeardown();
272
273 for (unsigned int i = 0; i < numQueues; i++) {
274 OPENCL_V_THROW( clReleaseCommandQueue(queues_[i]), "releasing command queue" );
275 }
276 OPENCL_V_THROW( clReleaseContext(ctx_), "releasing context" );
277 }
278
wait_and_check()279 void wait_and_check()
280 {
281 cl_int err;
282 cl_int wait_status = clWaitForEvents(1, &event_);
283
284 if( wait_status != CL_SUCCESS )
285 {
286 if( wait_status == CL_EXEC_STATUS_ERROR_FOR_EVENTS_IN_WAIT_LIST )
287 {
288 clGetEventInfo( event_, CL_EVENT_COMMAND_EXECUTION_STATUS,
289 sizeof(cl_int), &err, NULL );
290 std::cout << "blas function execution status error: " << err << std::endl;
291 exit(1);
292 }
293 else
294 {
295 std::cout << "blas function wait status error: " << wait_status << std::endl;
296 exit(1);
297 }
298 }
299 }
300
time_in_ns()301 double time_in_ns()
302 {
303 StatisticalTimer& timer = StatisticalTimer::getInstance( );
304 return timer.getAverageTime( timer_id ) * 1e9;
305 }
306
307 virtual void call_func() = 0;
308 virtual double gflops() = 0;
309 virtual std::string gflops_formula() = 0;
setup_apiCallCount(cl_uint apiCallCount)310 virtual void setup_apiCallCount(cl_uint apiCallCount){}
311 virtual void setup_buffer(int order_option, int side_option,
312 int uplo_option, int diag_option, int
313 transA_option, int transB_option,
314 size_t M, size_t N, size_t K, size_t lda,
315 size_t ldb, size_t ldc, size_t offA, size_t offBX,
316 size_t offCY, double alpha, double beta) = 0;
317 virtual void initialize_cpu_buffer() = 0;
318 virtual void initialize_gpu_buffer() = 0;
319 virtual void reset_gpu_write_buffer() = 0;
320 virtual void read_gpu_buffer() = 0;
321 virtual void roundtrip_func() = 0;
roundtrip_func_rect()322 virtual void roundtrip_func_rect() {}
allochostptr_roundtrip_func()323 virtual void allochostptr_roundtrip_func() {}
usehostptr_roundtrip_func()324 virtual void usehostptr_roundtrip_func() {}
copyhostptr_roundtrip_func()325 virtual void copyhostptr_roundtrip_func() {}
usepersismem_roundtrip_func()326 virtual void usepersismem_roundtrip_func() {}
327 virtual void roundtrip_setup_buffer(int order_option, int side_option,
328 int uplo_option, int diag_option, int
329 transA_option, int transB_option,
330 size_t M, size_t N, size_t K, size_t lda,
331 size_t ldb, size_t ldc, size_t offA, size_t offBX,
332 size_t offCY, double alpha, double beta) = 0;
333 virtual void releaseGPUBuffer_deleteCPUBuffer()=0;
334 StatisticalTimer& timer;
335 StatisticalTimer::sTimerID timer_id;
336
337 protected:
338 virtual void initialize_scalars(double alpha, double beta) = 0;
339
340 protected:
341 cl_platform_id platform_;
342 cl_device_id device_;
343 cl_context_properties props_[3];
344 cl_context ctx_;
345 static const unsigned int numQueues = 4;
346 cl_command_queue queues_[numQueues];
347 clblasOrder order_;
348 cl_event event_;
349 size_t maxMemAllocSize;
350 }; // class clblasFunc
351
352 #endif // ifndef CLBLAS_BENCHMARK_COMMON_HXX__
353
354