1 /* ************************************************************************
2 * Copyright 2013 Advanced Micro Devices, Inc.
3 *
4 * Licensed under the Apache License, Version 2.0 (the "License");
5 * you may not use this file except in compliance with the License.
6 * You may obtain a copy of the License at
7 *
8 * http://www.apache.org/licenses/LICENSE-2.0
9 *
10 * Unless required by applicable law or agreed to in writing, software
11 * distributed under the License is distributed on an "AS IS" BASIS,
12 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 * See the License for the specific language governing permissions and
14 * limitations under the License.
15 * ************************************************************************/
16
17 #include <stdlib.h> // srand()
18 #include <string.h> // memcpy()
19 #include <gtest/gtest.h>
20 #include <clBLAS.h>
21
22 #include <common.h>
23 #include <clBLAS-wrapper.h>
24 #include <BlasBase.h>
25 #include <dot.h>
26 #include <blas-random.h>
27
28 #ifdef PERF_TEST_WITH_ACML
29 #include <blas-internal.h>
30 #include <blas-wrapper.h>
31 #endif
32
33 #include "PerformanceTest.h"
34
35 using namespace std;
36 using namespace clMath;
37
38 #define CHECK_RESULT(ret) \
39 do { \
40 ASSERT_GE(ret, 0) << "Fatal error: can not allocate resources or " \
41 "perform an OpenCL request!" << endl; \
42 EXPECT_EQ(0, ret) << "The OpenCL version is slower in the case" << \
43 endl; \
44 } while (0)
45
46 namespace clMath {
47
48 template <typename ElemType> class DotPerformanceTest : public PerformanceTest
49 {
50 public:
51 virtual ~DotPerformanceTest();
52
53 virtual int prepare(void);
54 virtual nano_time_t etalonPerfSingle(void);
55 virtual nano_time_t clblasPerfSingle(void);
56
runInstance(BlasFunction fn,TestParams * params)57 static void runInstance(BlasFunction fn, TestParams *params)
58 {
59 DotPerformanceTest<ElemType> perfCase(fn, params);
60 int ret = 0;
61 int opFactor;
62 BlasBase *base;
63
64 base = clMath::BlasBase::getInstance();
65
66 opFactor =1;
67
68 if (((fn == FN_DDOT) || (fn == FN_ZDOTU)) &&
69 !base->isDevSupportDoublePrecision()) {
70
71 std::cerr << ">> WARNING: The target device doesn't support native "
72 "double precision floating point arithmetic" <<
73 std::endl << ">> Test skipped" << std::endl;
74 return;
75 }
76
77 if (!perfCase.areResourcesSufficient(params)) {
78 std::cerr << ">> RESOURCE CHECK: Skip due to insufficient resources" <<
79 std::endl;
80 return;
81 }
82 else {
83 ret = perfCase.run(opFactor);
84 }
85
86 ASSERT_GE(ret, 0) << "Fatal error: can not allocate resources or "
87 "perform an OpenCL request!" << endl;
88 EXPECT_EQ(0, ret) << "The OpenCL version is slower in the case" << endl;
89 }
90
91 private:
92 DotPerformanceTest(BlasFunction fn, TestParams *params);
93
94 bool areResourcesSufficient(TestParams *params);
95
96 TestParams params_;
97 ElemType *blasX_;
98 ElemType *blasY_;
99 cl_mem mobjX_;
100 cl_mem mobjY_;
101 cl_mem mobjDP_;
102 cl_mem scratchBuff;
103 size_t lengthX;
104 size_t lengthY;
105 ::clMath::BlasBase *base_;
106 };
107
108 template <typename ElemType>
DotPerformanceTest(BlasFunction fn,TestParams * params)109 DotPerformanceTest<ElemType>::DotPerformanceTest(
110 BlasFunction fn,
111 TestParams *params) : PerformanceTest(fn,(problem_size_t) ( (2 * params->N) * sizeof(ElemType) ) ), params_(*params), mobjX_(NULL), mobjY_(NULL),mobjDP_(NULL)
112 {
113
114 blasX_ = NULL;
115 blasY_ = NULL;
116 mobjX_= mobjY_ = mobjDP_= scratchBuff = NULL;
117 lengthX = 1 + (params->N - 1) * abs(params_.incx);
118 lengthY = 1 + (params->N - 1) * abs(params_.incy);
119
120 try
121 {
122 blasX_ = new ElemType[lengthX + params_.offBX];
123 blasY_ = new ElemType[lengthY + params_.offCY];
124 }
125 catch(bad_alloc& ba) {
126 blasX_ = blasY_ = NULL; // areResourcesSufficient() will handle the rest and return
127 mobjX_= mobjY_ = mobjDP_= scratchBuff = NULL;
128 ba = ba;
129 }
130
131 base_ = ::clMath::BlasBase::getInstance();
132 }
133
134 template <typename ElemType>
~DotPerformanceTest()135 DotPerformanceTest<ElemType>::~DotPerformanceTest()
136 {
137 if(blasX_ != NULL)
138 {
139 delete[] blasX_;
140 }
141 if(blasY_ != NULL)
142 {
143 delete[] blasY_;
144 }
145 if( mobjX_ != NULL )
146 {
147 clReleaseMemObject(mobjX_);
148 }
149 if( mobjY_ != NULL )
150 {
151 clReleaseMemObject(mobjY_);
152 }
153 if( mobjDP_ != NULL )
154 {
155 clReleaseMemObject(mobjDP_);
156 }
157 if( scratchBuff!= NULL )
158 {
159 clReleaseMemObject(scratchBuff);
160 }
161
162 }
163
164 /*
165 * Check if available OpenCL resources are sufficient to
166 * run the test case
167 */
168 template <typename ElemType> bool
areResourcesSufficient(TestParams * params)169 DotPerformanceTest<ElemType>::areResourcesSufficient(TestParams *params)
170 {
171 clMath::BlasBase *base;
172 size_t gmemSize, allocSize;
173 bool ret;
174 size_t sizeX, sizeY, sizeDP;
175
176 if((blasX_ == NULL) || (blasY_ == NULL) ) {
177 return 0;
178 }
179
180 base = clMath::BlasBase::getInstance();
181 gmemSize = (size_t)base->availGlobalMemSize( 0 );
182 allocSize = (size_t)base->maxMemAllocSize();
183 sizeX = (lengthX + params->offBX) * sizeof(ElemType);
184 sizeY = (lengthY + params->offCY) * sizeof(ElemType);
185 sizeDP = (1 + params->offa) * sizeof(ElemType);
186
187 ret = ((sizeX < allocSize) && (sizeY < allocSize) && (sizeDP < allocSize));
188 ret = (ret && ((sizeX + sizeY + sizeDP) < gmemSize));
189
190 return ret;
191 }
192
193 template <typename ElemType> int
prepare(void)194 DotPerformanceTest<ElemType>::prepare(void)
195 {
196
197 randomVectors(params_.N, (blasX_ + params_.offBX), params_.incx, (blasY_ + params_.offCY), params_.incy, true);
198
199 mobjX_ = base_->createEnqueueBuffer(blasX_, ((lengthX + params_.offBX) * sizeof(ElemType)), 0, CL_MEM_READ_WRITE);
200 mobjY_ = base_->createEnqueueBuffer(blasY_, ((lengthY + params_.offCY) * sizeof(ElemType)), 0, CL_MEM_READ_WRITE);
201 mobjDP_ = base_->createEnqueueBuffer(NULL, ((1 + params_.offa) * sizeof(ElemType)), 0, CL_MEM_READ_WRITE);
202 scratchBuff = base_->createEnqueueBuffer(NULL, ((lengthY) * sizeof(ElemType)), 0, CL_MEM_READ_WRITE);
203
204 return ((mobjX_ != NULL) && (mobjY_ != NULL) && (mobjDP_ != NULL)&& (scratchBuff != NULL) )? 0 : -1;
205 }
206
207 template <typename ElemType> nano_time_t
etalonPerfSingle(void)208 DotPerformanceTest<ElemType>::etalonPerfSingle(void)
209 {
210 nano_time_t time = 0;
211
212 #ifdef PERF_TEST_WITH_ACML
213
214 time = getCurrentTime();
215 clMath::blas::dot(params_.N, blasX_, params_.offBX, params_.incx, blasY_, params_.offCY, params_.incy);
216 time = getCurrentTime() - time;
217
218 #endif // PERF_TEST_WITH_ACML
219
220 return time;
221 }
222
223
224 template <typename ElemType> nano_time_t
clblasPerfSingle(void)225 DotPerformanceTest<ElemType>::clblasPerfSingle(void)
226 {
227 nano_time_t time;
228 cl_event event;
229 cl_int status;
230 cl_command_queue queue = base_->commandQueues()[0];
231
232 DataType type;
233 type = ( typeid(ElemType) == typeid(float))? TYPE_FLOAT:( typeid(ElemType) == typeid(double))? TYPE_DOUBLE:
234 ( typeid(ElemType) == typeid(FloatComplex))? TYPE_COMPLEX_FLOAT: TYPE_COMPLEX_DOUBLE;
235
236 event = NULL;
237 clFinish( queue);
238 time = getCurrentTime();
239
240 #define TIMING
241 #ifdef TIMING
242 int iter = 100;
243 for ( int i=1; i <= iter; i++)
244 {
245 #endif
246
247 status = (cl_int)clMath::clblas::dot( type, params_.N, mobjDP_, params_.offa, mobjX_, params_.offBX, params_.incx,
248 mobjY_, params_.offCY, params_.incy, scratchBuff, 1, &queue, 0, NULL, &event);
249 if (status != CL_SUCCESS) {
250 cerr << "The CLBLAS DOT function failed, status = " <<
251 status << endl;
252
253 return NANOTIME_ERR;
254 }
255 #ifdef TIMING
256 } // iter loop
257 clFinish( queue);
258 time = getCurrentTime() - time;
259 time /= iter;
260 #else
261
262 status = flushAll(1, &queue);
263 if (status != CL_SUCCESS) {
264 cerr << "clFlush() failed, status = " << status << endl;
265 return NANOTIME_ERR;
266 }
267
268 time = getCurrentTime();
269 status = waitForSuccessfulFinish(1, &queue, &event);
270 if (status == CL_SUCCESS) {
271 time = getCurrentTime() - time;
272 }
273 else {
274 cerr << "Waiting for completion of commands to the queue failed, "
275 "status = " << status << endl;
276 time = NANOTIME_ERR;
277 }
278 #endif
279 return time;
280 }
281
282 } // namespace clMath
283
TEST_P(DOT,sdot)284 TEST_P(DOT, sdot)
285 {
286 TestParams params;
287
288 getParams(¶ms);
289 DotPerformanceTest<float>::runInstance(FN_SDOT, ¶ms);
290 }
291
292
TEST_P(DOT,ddot)293 TEST_P(DOT, ddot)
294 {
295 TestParams params;
296
297 getParams(¶ms);
298 DotPerformanceTest<double>::runInstance(FN_DDOT, ¶ms);
299 }
300
TEST_P(DOT,cdotu)301 TEST_P(DOT, cdotu)
302 {
303 TestParams params;
304
305 getParams(¶ms);
306 DotPerformanceTest<FloatComplex>::runInstance(FN_CDOTU, ¶ms);
307 }
308
309
TEST_P(DOT,zdotu)310 TEST_P(DOT, zdotu)
311 {
312 TestParams params;
313
314 getParams(¶ms);
315 DotPerformanceTest<DoubleComplex>::runInstance(FN_ZDOTU, ¶ms);
316 }
317