1 /* ************************************************************************
2  * Copyright 2013 Advanced Micro Devices, Inc.
3  *
4  * Licensed under the Apache License, Version 2.0 (the "License");
5  * you may not use this file except in compliance with the License.
6  * You may obtain a copy of the License at
7  *
8  * http://www.apache.org/licenses/LICENSE-2.0
9  *
10  * Unless required by applicable law or agreed to in writing, software
11  * distributed under the License is distributed on an "AS IS" BASIS,
12  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13  * See the License for the specific language governing permissions and
14  * limitations under the License.
15  * ************************************************************************/
16 
17 #ifndef CLBLAS_BENCHMARK_COMMON_HXX__
18 #define CLBLAS_BENCHMARK_COMMON_HXX__
19 
20 #include <string>
21 #include <iostream>
22 #include <sstream>
23 #include <stdexcept>
24 #include <cstdlib>
25 
26 #include "blas-math.h"
27 #include "test-limits.h"
28 #include "dis_warning.h"
29 
30 #include "clBLAS.h"
31 #if defined(__APPLE__) || defined(__MACOSX)
32 #include <OpenCL/cl_ext.h>
33 #else
34 #include <CL/cl_ext.h>
35 #endif
36 
37 template<typename T>
38 static T
makeScalar(double val)39 makeScalar(double val)
40 {
41     return static_cast<T>(val);
42 }
43 
44 template<>
45 __template_static FloatComplex
makeScalar(double val)46 makeScalar(double val)
47 {
48     FloatComplex c;
49 
50     c.s[0] = static_cast<float>(val);
51     c.s[1] = 0;
52 
53     return c;
54 }
55 
56 template<>
57 __template_static DoubleComplex
makeScalar(double val)58 makeScalar(double val)
59 {
60     DoubleComplex c;
61 
62     c.s[0] = val;
63     c.s[1] = 0;
64 
65     return c;
66 }
67 
68 template<typename T>
69 static T
randomScale()70 randomScale()
71 {
72     T t = random<T>(UPPER_BOUND<T>());
73     if (module(t) == 0) {
74         t = t + ONE<T>();
75     }
76 
77     return t;
78 }
79 
80 std::string
prettyPrintClStatus(const cl_int & status)81 prettyPrintClStatus( const cl_int& status )
82 {
83     switch( status )
84     {
85     case CL_INVALID_GLOBAL_WORK_SIZE:
86         return "CL_INVALID_GLOBAL_WORK_SIZE";
87     case CL_INVALID_MIP_LEVEL:
88         return "CL_INVALID_MIP_LEVEL";
89     case CL_INVALID_BUFFER_SIZE:
90         return "CL_INVALID_BUFFER_SIZE";
91     case CL_INVALID_GL_OBJECT:
92         return "CL_INVALID_GL_OBJECT";
93     case CL_INVALID_OPERATION:
94         return "CL_INVALID_OPERATION";
95     case CL_INVALID_EVENT:
96         return "CL_INVALID_EVENT";
97     case CL_INVALID_EVENT_WAIT_LIST:
98         return "CL_INVALID_EVENT_WAIT_LIST";
99     case CL_INVALID_GLOBAL_OFFSET:
100         return "CL_INVALID_GLOBAL_OFFSET";
101     case CL_INVALID_WORK_ITEM_SIZE:
102         return "CL_INVALID_WORK_ITEM_SIZE";
103     case CL_INVALID_WORK_GROUP_SIZE:
104         return "CL_INVALID_WORK_GROUP_SIZE";
105     case CL_INVALID_WORK_DIMENSION:
106         return "CL_INVALID_WORK_DIMENSION";
107     case CL_INVALID_KERNEL_ARGS:
108         return "CL_INVALID_KERNEL_ARGS";
109     case CL_INVALID_ARG_SIZE:
110         return "CL_INVALID_ARG_SIZE";
111     case CL_INVALID_ARG_VALUE:
112         return "CL_INVALID_ARG_VALUE";
113     case CL_INVALID_ARG_INDEX:
114         return "CL_INVALID_ARG_INDEX";
115     case CL_INVALID_KERNEL:
116         return "CL_INVALID_KERNEL";
117     case CL_INVALID_KERNEL_DEFINITION:
118         return "CL_INVALID_KERNEL_DEFINITION";
119     case CL_INVALID_KERNEL_NAME:
120         return "CL_INVALID_KERNEL_NAME";
121     case CL_INVALID_PROGRAM_EXECUTABLE:
122         return "CL_INVALID_PROGRAM_EXECUTABLE";
123     case CL_INVALID_PROGRAM:
124         return "CL_INVALID_PROGRAM";
125     case CL_INVALID_BUILD_OPTIONS:
126         return "CL_INVALID_BUILD_OPTIONS";
127     case CL_INVALID_BINARY:
128         return "CL_INVALID_BINARY";
129     case CL_INVALID_SAMPLER:
130         return "CL_INVALID_SAMPLER";
131     case CL_INVALID_IMAGE_SIZE:
132         return "CL_INVALID_IMAGE_SIZE";
133     case CL_INVALID_IMAGE_FORMAT_DESCRIPTOR:
134         return "CL_INVALID_IMAGE_FORMAT_DESCRIPTOR";
135     case CL_INVALID_MEM_OBJECT:
136         return "CL_INVALID_MEM_OBJECT";
137     case CL_INVALID_HOST_PTR:
138         return "CL_INVALID_HOST_PTR";
139     case CL_INVALID_COMMAND_QUEUE:
140         return "CL_INVALID_COMMAND_QUEUE";
141     case CL_INVALID_QUEUE_PROPERTIES:
142         return "CL_INVALID_QUEUE_PROPERTIES";
143     case CL_INVALID_CONTEXT:
144         return "CL_INVALID_CONTEXT";
145     case CL_INVALID_DEVICE:
146         return "CL_INVALID_DEVICE";
147     case CL_INVALID_PLATFORM:
148         return "CL_INVALID_PLATFORM";
149     case CL_INVALID_DEVICE_TYPE:
150         return "CL_INVALID_DEVICE_TYPE";
151     case CL_INVALID_VALUE:
152         return "CL_INVALID_VALUE";
153     case CL_MAP_FAILURE:
154         return "CL_MAP_FAILURE";
155     case CL_BUILD_PROGRAM_FAILURE:
156         return "CL_BUILD_PROGRAM_FAILURE";
157     case CL_IMAGE_FORMAT_NOT_SUPPORTED:
158         return "CL_IMAGE_FORMAT_NOT_SUPPORTED";
159     case CL_IMAGE_FORMAT_MISMATCH:
160         return "CL_IMAGE_FORMAT_MISMATCH";
161     case CL_MEM_COPY_OVERLAP:
162         return "CL_MEM_COPY_OVERLAP";
163     case CL_PROFILING_INFO_NOT_AVAILABLE:
164         return "CL_PROFILING_INFO_NOT_AVAILABLE";
165     case CL_OUT_OF_HOST_MEMORY:
166         return "CL_OUT_OF_HOST_MEMORY";
167     case CL_OUT_OF_RESOURCES:
168         return "CL_OUT_OF_RESOURCES";
169     case CL_MEM_OBJECT_ALLOCATION_FAILURE:
170         return "CL_MEM_OBJECT_ALLOCATION_FAILURE";
171     case CL_COMPILER_NOT_AVAILABLE:
172         return "CL_COMPILER_NOT_AVAILABLE";
173     case CL_DEVICE_NOT_AVAILABLE:
174         return "CL_DEVICE_NOT_AVAILABLE";
175     case CL_DEVICE_NOT_FOUND:
176         return "CL_DEVICE_NOT_FOUND";
177     case CL_SUCCESS:
178         return "CL_SUCCESS";
179     default:
180         return "Error code not defined";
181         break;
182     }
183 }
184 
185 // This is used to either wrap an OpenCL function call, or to
186 // explicitly check a variable for an OpenCL error condition.
187 // If an error occurs, we throw.
188 // Note: std::runtime_error does not take unicode strings as input, so
189 // only strings supported
190 inline cl_int
OpenCL_V_Throw(cl_int res,const std::string & msg,size_t lineno)191 OpenCL_V_Throw( cl_int res, const std::string& msg, size_t lineno )
192 {
193     switch( res )
194     {
195     case CL_SUCCESS: /**< No error */
196         break;
197     default:
198         {
199             std::stringstream tmp;
200 
201             tmp << "OPENCL_V_THROWERROR< ";
202             tmp << prettyPrintClStatus(res) ;
203             tmp << " > (";
204             tmp << lineno;
205             tmp << "): ";
206             tmp << msg;
207             std::string errorm(tmp.str());
208             std::cout << errorm<< std::endl;
209             throw std::runtime_error( errorm );
210         }
211     }
212 
213     return res;
214 }
215 
216 #define OPENCL_V_THROW(_status,_message) OpenCL_V_Throw(_status, _message, \
217                                                         __LINE__)
218 
219 inline cl_ulong
queryMemAllocSize(cl_device_id device_)220 queryMemAllocSize( cl_device_id device_ )
221 {
222     cl_int err;
223     cl_ulong rc = 0;
224 
225     err = clGetDeviceInfo(device_, CL_DEVICE_MAX_MEM_ALLOC_SIZE,
226                           sizeof(rc), &rc, NULL);
227 
228     return rc;
229 }
230 
231 class clblasFunc
232 {
233 public:
clblasFunc(StatisticalTimer & _timer,cl_device_type devType)234     clblasFunc(StatisticalTimer& _timer, cl_device_type devType)
235           : timer(_timer)
236     {
237         cl_int err;
238 
239         /* Setup OpenCL environment. */
240         OPENCL_V_THROW(clGetPlatformIDs(1, &platform_, NULL),
241                        "getting platform IDs");
242         OPENCL_V_THROW(clGetDeviceIDs(platform_, devType, 1,
243                                       &device_, NULL), "getting device IDs");
244         props_[0] = CL_CONTEXT_PLATFORM;
245         props_[1] = (cl_context_properties)platform_;
246         props_[2] = 0;
247         ctx_ = clCreateContext(props_, 1, &device_, NULL, NULL, &err);
248         OPENCL_V_THROW(err, "creating context");
249         for (unsigned int i = 0; i < numQueues; i++) {
250           queues_[i] = clCreateCommandQueue(ctx_, device_, CL_QUEUE_OUT_OF_ORDER_EXEC_MODE_ENABLE, &err);
251         }
252 
253         timer_id = timer.getUniqueID( "clfunc", 0 );
254 
255 
256         maxMemAllocSize = queryMemAllocSize( device_ );
257 
258     /* Setup clblas. */
259         err = clblasSetup();
260         if (err != CL_SUCCESS) {
261             std::cerr << "clblasSetup() failed with %d\n";
262             for (unsigned int i = 0; i < numQueues; i++) {
263               clReleaseCommandQueue(queues_[i]);
264             }
265             clReleaseContext(ctx_);
266         }
267     }
268 
~clblasFunc()269     virtual ~clblasFunc()
270     {
271         clblasTeardown();
272 
273         for (unsigned int i = 0; i < numQueues; i++) {
274           OPENCL_V_THROW( clReleaseCommandQueue(queues_[i]), "releasing command queue" );
275         }
276         OPENCL_V_THROW( clReleaseContext(ctx_), "releasing context" );
277     }
278 
wait_and_check()279     void wait_and_check()
280     {
281 		cl_int err;
282         cl_int wait_status = clWaitForEvents(1, &event_);
283 
284         if( wait_status != CL_SUCCESS )
285         {
286     	    if( wait_status == CL_EXEC_STATUS_ERROR_FOR_EVENTS_IN_WAIT_LIST )
287     	    {
288     	    	clGetEventInfo( event_, CL_EVENT_COMMAND_EXECUTION_STATUS,
289                                 sizeof(cl_int), &err, NULL );
290     	    	std::cout << "blas function execution status error: " << err << std::endl;
291                 exit(1);
292     	    }
293             else
294             {
295     	    	std::cout << "blas function wait status error: " << wait_status << std::endl;
296                 exit(1);
297             }
298         }
299     }
300 
time_in_ns()301     double time_in_ns()
302     {
303 	    StatisticalTimer& timer = StatisticalTimer::getInstance( );
304         return timer.getAverageTime( timer_id ) * 1e9;
305     }
306 
307     virtual void call_func() = 0;
308     virtual double gflops() = 0;
309     virtual std::string gflops_formula() = 0;
setup_apiCallCount(cl_uint apiCallCount)310 	virtual void setup_apiCallCount(cl_uint apiCallCount){}
311     virtual void setup_buffer(int order_option, int side_option,
312                               int uplo_option, int diag_option, int
313                               transA_option, int transB_option,
314                               size_t M, size_t N, size_t K, size_t lda,
315                               size_t ldb, size_t ldc, size_t offA, size_t offBX,
316                               size_t offCY, double alpha, double beta) = 0;
317     virtual void initialize_cpu_buffer() = 0;
318     virtual void initialize_gpu_buffer() = 0;
319     virtual void reset_gpu_write_buffer() = 0;
320 	virtual void read_gpu_buffer() = 0;
321 	virtual void roundtrip_func() = 0;
roundtrip_func_rect()322 	virtual void roundtrip_func_rect() {}
allochostptr_roundtrip_func()323 	virtual void allochostptr_roundtrip_func() {}
usehostptr_roundtrip_func()324 	virtual void usehostptr_roundtrip_func() {}
copyhostptr_roundtrip_func()325 	virtual void copyhostptr_roundtrip_func() {}
usepersismem_roundtrip_func()326 	virtual void usepersismem_roundtrip_func() {}
327 	virtual void roundtrip_setup_buffer(int order_option, int side_option,
328                               int uplo_option, int diag_option, int
329                               transA_option, int transB_option,
330                               size_t M, size_t N, size_t K, size_t lda,
331                               size_t ldb, size_t ldc, size_t offA, size_t offBX,
332                               size_t offCY, double alpha, double beta) = 0;
333 	virtual void releaseGPUBuffer_deleteCPUBuffer()=0;
334     StatisticalTimer& timer;
335     StatisticalTimer::sTimerID timer_id;
336 
337 protected:
338     virtual void initialize_scalars(double alpha, double beta) = 0;
339 
340 protected:
341     cl_platform_id platform_;
342     cl_device_id device_;
343     cl_context_properties props_[3];
344     cl_context ctx_;
345     static const unsigned int numQueues = 4;
346     cl_command_queue queues_[numQueues];
347     clblasOrder order_;
348     cl_event event_;
349     size_t maxMemAllocSize;
350 }; // class clblasFunc
351 
352 #endif // ifndef CLBLAS_BENCHMARK_COMMON_HXX__
353 
354