1 /* ************************************************************************
2  * Copyright 2013 Advanced Micro Devices, Inc.
3  *
4  * Licensed under the Apache License, Version 2.0 (the "License");
5  * you may not use this file except in compliance with the License.
6  * You may obtain a copy of the License at
7  *
8  * http://www.apache.org/licenses/LICENSE-2.0
9  *
10  * Unless required by applicable law or agreed to in writing, software
11  * distributed under the License is distributed on an "AS IS" BASIS,
12  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13  * See the License for the specific language governing permissions and
14  * limitations under the License.
15  * ************************************************************************/
16 
17 
18 #include <stdlib.h>             // srand()
19 #include <string.h>             // memcpy()
20 #include <gtest/gtest.h>
21 #include <clBLAS.h>
22 #include <common.h>
23 #include <blas-internal.h>
24 #include <blas-wrapper.h>
25 #include <clBLAS-wrapper.h>
26 #include <BlasBase.h>
27 #include <blas-random.h>
28 #include <dot.h>
29 
30 static void
releaseMemObjects(cl_mem objX,cl_mem objY,cl_mem objDP,cl_mem objScratch)31 releaseMemObjects(cl_mem objX, cl_mem objY, cl_mem objDP, cl_mem objScratch)
32 {
33     if(objX != NULL)
34  	{
35         clReleaseMemObject(objX);
36 	}
37 	if(objY != NULL)
38     {
39         clReleaseMemObject(objY);
40 	}
41 	if(objDP != NULL)
42     {
43         clReleaseMemObject(objDP);
44     }
45 	if(objScratch != NULL)
46     {
47         clReleaseMemObject(objScratch);
48     }
49 }
50 
51 template <typename T> static void
deleteBuffers(T * blasX,T * blasY,T * blasDP,T * clblasDP)52 deleteBuffers(T *blasX, T *blasY, T *blasDP, T *clblasDP)
53 {
54 	if(blasX != NULL)
55 	{
56         delete[] blasX;
57     }
58 	if(blasY != NULL)
59 	{
60 	    delete[] blasY;
61 	}
62 	if(clblasDP != NULL)
63     {
64         delete[] clblasDP;
65     }
66 	if(blasDP != NULL)
67     {
68         delete(blasDP);
69     }
70 }
71 
72 template <typename T>
73 void
dotCorrectnessTest(TestParams * params)74 dotCorrectnessTest(TestParams *params)
75 {
76     cl_int err;
77     T *blasX, *blasY, *clblasDP, *blasDP;
78     cl_mem bufX, bufY, bufDP, scratchBuff;
79     clMath::BlasBase *base;
80     cl_event *events;
81 
82     base = clMath::BlasBase::getInstance();
83 
84     if ((typeid(T) == typeid(cl_double) ||
85          typeid(T) == typeid(DoubleComplex)) &&
86         !base->isDevSupportDoublePrecision()) {
87 
88         std::cerr << ">> WARNING: The target device doesn't support native "
89                      "double precision floating point arithmetic" <<
90                      std::endl << ">> Test skipped" << std::endl;
91         SUCCEED();
92         return;
93     }
94 
95 	printf("number of command queues : %d\n\n", params->numCommandQueues);
96 
97     events = new cl_event[params->numCommandQueues];
98     memset(events, 0, params->numCommandQueues * sizeof(cl_event));
99 
100     size_t lengthX = (1 + ((params->N -1) * abs(params->incx)));
101 	size_t lengthY = (1 + ((params->N -1) * abs(params->incy)));
102 
103     blasX 	= new T[lengthX + params->offBX ];
104     blasY 	= new T[lengthY + params->offCY ];
105 	blasDP = new T[1];
106     clblasDP = new T[1 + params->offa];
107 
108 	if((blasX == NULL) || (blasY == NULL) || (clblasDP == NULL) || (blasDP == NULL))
109 	{
110 		::std::cerr << "Cannot allocate memory on host side\n" << "!!!!!!!!!!!!Test skipped.!!!!!!!!!!!!" << ::std::endl;
111         deleteBuffers<T>(blasX, blasY, blasDP,  clblasDP);
112 		delete[] events;
113 		SUCCEED();
114         return;
115 	}
116 
117     srand(params->seed);
118     ::std::cerr << "Generating input data... ";
119 
120 	randomVectors(params->N, (blasX + params->offBX), params->incx, (blasY + params->offCY), params->incy, true);
121     ::std::cerr << "Done" << ::std::endl;
122 
123 	// Allocate buffers
124     bufX = base->createEnqueueBuffer(blasX, (lengthX + params->offBX)* sizeof(*blasX), 0, CL_MEM_READ_WRITE);
125     bufY = base->createEnqueueBuffer(blasY, (lengthY + params->offCY)* sizeof(*blasY), 0, CL_MEM_READ_WRITE);
126     bufDP = base->createEnqueueBuffer(NULL, (1 + params->offa) * sizeof(T), 0, CL_MEM_READ_WRITE);
127 	scratchBuff = base->createEnqueueBuffer(NULL, (lengthX * sizeof(T)), 0, CL_MEM_READ_WRITE);
128 
129     ::std::cerr << "Calling reference xDOT routine... ";
130 
131 	*blasDP  = ::clMath::blas::dot( params->N, blasX, params->offBX, params->incx, blasY, params->offCY, params->incy);
132     ::std::cerr << "Done" << ::std::endl;
133 
134     if ((bufX == NULL) || (bufY == NULL) || (bufDP == NULL) || (scratchBuff == NULL)) {
135         releaseMemObjects(bufX, bufY, bufDP, scratchBuff);
136         deleteBuffers<T>(blasX, blasY, blasDP, clblasDP);
137         delete[] events;
138         ::std::cerr << ">> Failed to create/enqueue buffer for a matrix."
139             << ::std::endl
140             << ">> Can't execute the test, because data is not transfered to GPU."
141             << ::std::endl
142             << ">> Test skipped." << ::std::endl;
143         SUCCEED();
144         return;
145     }
146 
147     ::std::cerr << "Calling clblas xDOT routine... ";
148 
149     DataType type;
150     type = ( typeid(T) == typeid(cl_float))? TYPE_FLOAT : ( typeid(T) == typeid(cl_double))? TYPE_DOUBLE: ( typeid(T) == typeid(cl_float2))? TYPE_COMPLEX_FLOAT:TYPE_COMPLEX_DOUBLE;
151 
152     // Should use bufXTemp as well
153     err = (cl_int)::clMath::clblas::dot( type, params->N,  bufDP, params->offa, bufX,
154     					params->offBX, params->incx, bufY, params->offCY, params->incy, scratchBuff, params->numCommandQueues, base->commandQueues(),
155     					0, NULL, events);
156 
157     if (err != CL_SUCCESS) {
158         releaseMemObjects(bufX, bufY, bufDP, scratchBuff);
159         deleteBuffers<T>(blasX, blasY, blasDP, clblasDP);
160         delete[] events;
161         ASSERT_EQ(CL_SUCCESS, err) << "::clMath::clblas::DOT() failed";
162     }
163 
164     err = waitForSuccessfulFinish(params->numCommandQueues,
165         base->commandQueues(), events);
166     if (err != CL_SUCCESS) {
167         releaseMemObjects(bufX, bufY, bufDP, scratchBuff);
168         deleteBuffers<T>(blasX, blasY, blasDP, clblasDP);
169         delete[] events;
170         ASSERT_EQ(CL_SUCCESS, err) << "waitForSuccessfulFinish()";
171     }
172     ::std::cerr << "Done" << ::std::endl;
173 
174 
175     err = clEnqueueReadBuffer(base->commandQueues()[0], bufDP, CL_TRUE, 0,
176         (1 + params->offa) * sizeof(*clblasDP), clblasDP, 0,
177         NULL, NULL);
178 	if (err != CL_SUCCESS)
179 	{
180 		::std::cerr << "DOT: Reading results failed...." << std::endl;
181 	}
182     releaseMemObjects(bufX, bufY, bufDP, scratchBuff);
183 
184     compareMatrices<T>(clblasColumnMajor, 1 , 1, (blasDP), (clblasDP+params->offa), 1);
185     deleteBuffers<T>(blasX, blasY, blasDP, clblasDP);
186     delete[] events;
187 }
188 
189 // Instantiate the test
190 
TEST_P(DOT,sdot)191 TEST_P(DOT, sdot) {
192     TestParams params;
193 
194     getParams(&params);
195     dotCorrectnessTest<cl_float>(&params);
196 }
197 
TEST_P(DOT,ddot)198 TEST_P(DOT, ddot) {
199     TestParams params;
200 
201     getParams(&params);
202     dotCorrectnessTest<cl_double>(&params);
203 }
204 
TEST_P(DOT,cdotu)205 TEST_P(DOT, cdotu) {
206     TestParams params;
207 
208     getParams(&params);
209     dotCorrectnessTest<FloatComplex>(&params);
210 }
211 
TEST_P(DOT,zdotu)212 TEST_P(DOT, zdotu) {
213     TestParams params;
214 
215     getParams(&params);
216     dotCorrectnessTest<DoubleComplex>(&params);
217 }
218