1 /* ************************************************************************
2 * Copyright 2013 Advanced Micro Devices, Inc.
3 *
4 * Licensed under the Apache License, Version 2.0 (the "License");
5 * you may not use this file except in compliance with the License.
6 * You may obtain a copy of the License at
7 *
8 * http://www.apache.org/licenses/LICENSE-2.0
9 *
10 * Unless required by applicable law or agreed to in writing, software
11 * distributed under the License is distributed on an "AS IS" BASIS,
12 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 * See the License for the specific language governing permissions and
14 * limitations under the License.
15 * ************************************************************************/
16
17
18 #include <stdlib.h> // srand()
19 #include <string.h> // memcpy()
20 #include <gtest/gtest.h>
21 #include <clBLAS.h>
22 #include <common.h>
23 #include <blas-internal.h>
24 #include <blas-wrapper.h>
25 #include <clBLAS-wrapper.h>
26 #include <BlasBase.h>
27 #include <blas-random.h>
28 #include <dot.h>
29
30 static void
releaseMemObjects(cl_mem objX,cl_mem objY,cl_mem objDP,cl_mem objScratch)31 releaseMemObjects(cl_mem objX, cl_mem objY, cl_mem objDP, cl_mem objScratch)
32 {
33 if(objX != NULL)
34 {
35 clReleaseMemObject(objX);
36 }
37 if(objY != NULL)
38 {
39 clReleaseMemObject(objY);
40 }
41 if(objDP != NULL)
42 {
43 clReleaseMemObject(objDP);
44 }
45 if(objScratch != NULL)
46 {
47 clReleaseMemObject(objScratch);
48 }
49 }
50
51 template <typename T> static void
deleteBuffers(T * blasX,T * blasY,T * blasDP,T * clblasDP)52 deleteBuffers(T *blasX, T *blasY, T *blasDP, T *clblasDP)
53 {
54 if(blasX != NULL)
55 {
56 delete[] blasX;
57 }
58 if(blasY != NULL)
59 {
60 delete[] blasY;
61 }
62 if(clblasDP != NULL)
63 {
64 delete[] clblasDP;
65 }
66 if(blasDP != NULL)
67 {
68 delete(blasDP);
69 }
70 }
71
72 template <typename T>
73 void
dotCorrectnessTest(TestParams * params)74 dotCorrectnessTest(TestParams *params)
75 {
76 cl_int err;
77 T *blasX, *blasY, *clblasDP, *blasDP;
78 cl_mem bufX, bufY, bufDP, scratchBuff;
79 clMath::BlasBase *base;
80 cl_event *events;
81
82 base = clMath::BlasBase::getInstance();
83
84 if ((typeid(T) == typeid(cl_double) ||
85 typeid(T) == typeid(DoubleComplex)) &&
86 !base->isDevSupportDoublePrecision()) {
87
88 std::cerr << ">> WARNING: The target device doesn't support native "
89 "double precision floating point arithmetic" <<
90 std::endl << ">> Test skipped" << std::endl;
91 SUCCEED();
92 return;
93 }
94
95 printf("number of command queues : %d\n\n", params->numCommandQueues);
96
97 events = new cl_event[params->numCommandQueues];
98 memset(events, 0, params->numCommandQueues * sizeof(cl_event));
99
100 size_t lengthX = (1 + ((params->N -1) * abs(params->incx)));
101 size_t lengthY = (1 + ((params->N -1) * abs(params->incy)));
102
103 blasX = new T[lengthX + params->offBX ];
104 blasY = new T[lengthY + params->offCY ];
105 blasDP = new T[1];
106 clblasDP = new T[1 + params->offa];
107
108 if((blasX == NULL) || (blasY == NULL) || (clblasDP == NULL) || (blasDP == NULL))
109 {
110 ::std::cerr << "Cannot allocate memory on host side\n" << "!!!!!!!!!!!!Test skipped.!!!!!!!!!!!!" << ::std::endl;
111 deleteBuffers<T>(blasX, blasY, blasDP, clblasDP);
112 delete[] events;
113 SUCCEED();
114 return;
115 }
116
117 srand(params->seed);
118 ::std::cerr << "Generating input data... ";
119
120 randomVectors(params->N, (blasX + params->offBX), params->incx, (blasY + params->offCY), params->incy, true);
121 ::std::cerr << "Done" << ::std::endl;
122
123 // Allocate buffers
124 bufX = base->createEnqueueBuffer(blasX, (lengthX + params->offBX)* sizeof(*blasX), 0, CL_MEM_READ_WRITE);
125 bufY = base->createEnqueueBuffer(blasY, (lengthY + params->offCY)* sizeof(*blasY), 0, CL_MEM_READ_WRITE);
126 bufDP = base->createEnqueueBuffer(NULL, (1 + params->offa) * sizeof(T), 0, CL_MEM_READ_WRITE);
127 scratchBuff = base->createEnqueueBuffer(NULL, (lengthX * sizeof(T)), 0, CL_MEM_READ_WRITE);
128
129 ::std::cerr << "Calling reference xDOT routine... ";
130
131 *blasDP = ::clMath::blas::dot( params->N, blasX, params->offBX, params->incx, blasY, params->offCY, params->incy);
132 ::std::cerr << "Done" << ::std::endl;
133
134 if ((bufX == NULL) || (bufY == NULL) || (bufDP == NULL) || (scratchBuff == NULL)) {
135 releaseMemObjects(bufX, bufY, bufDP, scratchBuff);
136 deleteBuffers<T>(blasX, blasY, blasDP, clblasDP);
137 delete[] events;
138 ::std::cerr << ">> Failed to create/enqueue buffer for a matrix."
139 << ::std::endl
140 << ">> Can't execute the test, because data is not transfered to GPU."
141 << ::std::endl
142 << ">> Test skipped." << ::std::endl;
143 SUCCEED();
144 return;
145 }
146
147 ::std::cerr << "Calling clblas xDOT routine... ";
148
149 DataType type;
150 type = ( typeid(T) == typeid(cl_float))? TYPE_FLOAT : ( typeid(T) == typeid(cl_double))? TYPE_DOUBLE: ( typeid(T) == typeid(cl_float2))? TYPE_COMPLEX_FLOAT:TYPE_COMPLEX_DOUBLE;
151
152 // Should use bufXTemp as well
153 err = (cl_int)::clMath::clblas::dot( type, params->N, bufDP, params->offa, bufX,
154 params->offBX, params->incx, bufY, params->offCY, params->incy, scratchBuff, params->numCommandQueues, base->commandQueues(),
155 0, NULL, events);
156
157 if (err != CL_SUCCESS) {
158 releaseMemObjects(bufX, bufY, bufDP, scratchBuff);
159 deleteBuffers<T>(blasX, blasY, blasDP, clblasDP);
160 delete[] events;
161 ASSERT_EQ(CL_SUCCESS, err) << "::clMath::clblas::DOT() failed";
162 }
163
164 err = waitForSuccessfulFinish(params->numCommandQueues,
165 base->commandQueues(), events);
166 if (err != CL_SUCCESS) {
167 releaseMemObjects(bufX, bufY, bufDP, scratchBuff);
168 deleteBuffers<T>(blasX, blasY, blasDP, clblasDP);
169 delete[] events;
170 ASSERT_EQ(CL_SUCCESS, err) << "waitForSuccessfulFinish()";
171 }
172 ::std::cerr << "Done" << ::std::endl;
173
174
175 err = clEnqueueReadBuffer(base->commandQueues()[0], bufDP, CL_TRUE, 0,
176 (1 + params->offa) * sizeof(*clblasDP), clblasDP, 0,
177 NULL, NULL);
178 if (err != CL_SUCCESS)
179 {
180 ::std::cerr << "DOT: Reading results failed...." << std::endl;
181 }
182 releaseMemObjects(bufX, bufY, bufDP, scratchBuff);
183
184 compareMatrices<T>(clblasColumnMajor, 1 , 1, (blasDP), (clblasDP+params->offa), 1);
185 deleteBuffers<T>(blasX, blasY, blasDP, clblasDP);
186 delete[] events;
187 }
188
189 // Instantiate the test
190
TEST_P(DOT,sdot)191 TEST_P(DOT, sdot) {
192 TestParams params;
193
194 getParams(¶ms);
195 dotCorrectnessTest<cl_float>(¶ms);
196 }
197
TEST_P(DOT,ddot)198 TEST_P(DOT, ddot) {
199 TestParams params;
200
201 getParams(¶ms);
202 dotCorrectnessTest<cl_double>(¶ms);
203 }
204
TEST_P(DOT,cdotu)205 TEST_P(DOT, cdotu) {
206 TestParams params;
207
208 getParams(¶ms);
209 dotCorrectnessTest<FloatComplex>(¶ms);
210 }
211
TEST_P(DOT,zdotu)212 TEST_P(DOT, zdotu) {
213 TestParams params;
214
215 getParams(¶ms);
216 dotCorrectnessTest<DoubleComplex>(¶ms);
217 }
218