1 /* ************************************************************************
2 * Copyright 2013 Advanced Micro Devices, Inc.
3 *
4 * Licensed under the Apache License, Version 2.0 (the "License");
5 * you may not use this file except in compliance with the License.
6 * You may obtain a copy of the License at
7 *
8 * http://www.apache.org/licenses/LICENSE-2.0
9 *
10 * Unless required by applicable law or agreed to in writing, software
11 * distributed under the License is distributed on an "AS IS" BASIS,
12 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 * See the License for the specific language governing permissions and
14 * limitations under the License.
15 * ************************************************************************/
16
17
18 #include <stdlib.h> // srand()
19 #include <string.h> // memcpy()
20 #include <gtest/gtest.h>
21 #include <clBLAS.h>
22
23 #include <common.h>
24 #include <blas-internal.h>
25 #include <blas-wrapper.h>
26 #include <clBLAS-wrapper.h>
27 #include <BlasBase.h>
28 #include <blas-random.h>
29 #include <trsv.h>
30 #include <cltypes.h>
31
32 #include "trsv-delta.h"
33
34 static void
releaseMemObjects(cl_mem objA,cl_mem objX)35 releaseMemObjects(cl_mem objA, cl_mem objX)
36 {
37 if (objA != NULL)
38 clReleaseMemObject(objA);
39 if (objX != NULL)
40 clReleaseMemObject(objX);
41 }
42
43 template <typename T> static void
deleteBuffers(T * A,T * blasX,T * backX,cl_double * deltaX)44 deleteBuffers(T *A, T *blasX, T *backX, cl_double *deltaX)
45 {
46 if( A != NULL )
47 {
48 delete[] A;
49 }
50 if( blasX != NULL )
51 {
52 delete[] blasX;
53 }
54 if( backX != NULL )
55 {
56 delete[] backX;
57 }
58 if( deltaX != NULL )
59 {
60 delete[] deltaX;
61 }
62 }
63
64 template <typename T>
65 void
trsvCorrectnessTest(TestParams * params)66 trsvCorrectnessTest(TestParams *params)
67 {
68 cl_int err;
69 T *A, *blasX, *backX;
70 cl_double *deltaX;
71 cl_mem bufA, bufX;
72 clMath::BlasBase *base;
73 cl_event *events;
74
75 base = clMath::BlasBase::getInstance();
76
77 if ((typeid(T) == typeid(cl_double) ||
78 typeid(T) == typeid(DoubleComplex)) &&
79 !base->isDevSupportDoublePrecision()) {
80
81 std::cerr << ">> WARNING: The target device doesn't support native "
82 "double precision floating point arithmetic" <<
83 std::endl << ">> Test skipped" << std::endl;
84 SUCCEED();
85 return;
86 }
87
88 events = new cl_event[params->numCommandQueues];
89 memset(events, 0, params->numCommandQueues * sizeof(cl_event));
90
91 size_t lengthA = params->N * params->lda;
92 size_t lengthX = (1 + ((params->N -1) * abs(params->incx)));
93
94 A = new T[lengthA + params->offa];
95 blasX = new T[lengthX + params->offBX];
96 backX = new T[lengthX + params->offBX];
97 deltaX = new cl_double[lengthX + params->offBX];
98
99 if ((A==NULL) || (blasX == NULL) || (backX == NULL) || (deltaX == NULL))
100 {
101 ::std::cerr << "Unable to allocate matrices in Host memory" << std::endl;
102 deleteBuffers<T>(A, blasX, backX, deltaX);
103 delete[] events;
104 SUCCEED();
105 return;
106 }
107 memset( deltaX, 0, lengthX*sizeof(cl_double) );
108 memset( blasX, 0, lengthX*sizeof(T) );
109
110 srand(params->seed);
111
112 ::std::cerr << "Generating input data... ";
113
114 //custom generation function in blas-random.h
115 randomTrsvMatrices<T>( params->order, params->uplo, params->diag, params->N, (A + params->offa), params->lda, (blasX + params->offBX), params->incx);
116
117 // Generate delta X for result comparison
118 trsvDelta<T>( params->order, params->uplo, params->transA, params->diag, params->N, (A + params->offa), params->lda, (blasX + params->offBX), params->incx, (deltaX + params->offBX) );
119
120 /*printf("\n\n before acml call\nA\n");
121 printMatrixBlock( params->order, 0, 0, params->N, params->N, params->lda, A);
122 printf("\nX\n");
123 printMatrixBlock( clblasColumnMajor, 0, 0, lengthX, 1, lengthX, blasX);*/
124
125 // Copy blasX to clblasX
126 memcpy(backX, blasX, (lengthX + params->offBX) * sizeof(T));
127 // Allocate buffers
128 bufA = base->createEnqueueBuffer(A, (lengthA + params->offa)* sizeof(T), 0, CL_MEM_READ_ONLY);
129 bufX = base->createEnqueueBuffer(backX, (lengthX + params->offBX)* sizeof(T), 0, CL_MEM_WRITE_ONLY);
130 ::std::cerr << "Done" << ::std::endl;
131
132 ::std::cerr << "Calling reference xTRSV routine... ";
133
134 clblasOrder order;
135 clblasUplo fUplo;
136 clblasTranspose fTrans;
137
138 order = params->order;
139 fUplo = params->uplo;
140 fTrans = params->transA;
141
142
143 if (order != clblasColumnMajor)
144 {
145 order = clblasColumnMajor;
146 fUplo = (params->uplo == clblasUpper)? clblasLower : clblasUpper;
147 fTrans = (params->transA == clblasNoTrans)? clblasTrans : clblasNoTrans;
148
149 if( params->transA == clblasConjTrans )
150 doConjugate((A + params->offa), params->N, params->N, params->lda );
151 }
152 ::clMath::blas::trsv( order, fUplo, fTrans, params->diag, params->N, A, params->offa, params->lda, blasX, params->offBX, params->incx);
153 ::std::cerr << "Done" << ::std::endl;
154
155 /*
156 printf("\n\n acml result X\n");
157 printf("\nblasX\n");
158 printMatrixBlock( clblasColumnMajor, 0, 0, lengthX, 1, lengthX, blasX);*/
159
160 if ((bufA == NULL) || (bufX == NULL)) {
161 /* Skip the test, the most probable reason is
162 * matrix too big for a device.
163 */
164 releaseMemObjects(bufA, bufX);
165 deleteBuffers<T>(A, blasX, backX, deltaX);
166 delete[] events;
167 ::std::cerr << ">> Failed to create/enqueue buffer for a matrix."
168 << ::std::endl
169 << ">> Can't execute the test, because data is not transfered to GPU."
170 << ::std::endl
171 << ">> Test skipped." << ::std::endl;
172 SUCCEED();
173 return;
174 }
175
176 ::std::cerr << "Calling clblas xTRSV routine... ";
177
178 DataType type;
179 type = ( typeid(T) == typeid(cl_float))? TYPE_FLOAT : ( typeid(T) == typeid(cl_double))? TYPE_DOUBLE: ( typeid(T) == typeid(cl_float2))? TYPE_COMPLEX_FLOAT: TYPE_COMPLEX_DOUBLE;
180
181 // Should use bufXTemp as well
182 err = (cl_int)::clMath::clblas::trsv(type, params->order, params->uplo, params->transA, params->diag, params->N, bufA,
183 params->offa, params->lda, bufX, params->offBX, params->incx, params->numCommandQueues, base->commandQueues(),
184 0, NULL, events);
185
186 if (err != CL_SUCCESS) {
187
188 deleteBuffers<T>(A, blasX, backX, deltaX);
189 delete[] events;
190 ASSERT_EQ(CL_SUCCESS, err) << "::clMath::clblas::TRSV() failed";
191 }
192
193 err = waitForSuccessfulFinish(params->numCommandQueues,
194 base->commandQueues(), events);
195 if (err != CL_SUCCESS) {
196
197 deleteBuffers<T>(A, blasX, backX, deltaX);
198 delete[] events;
199 ASSERT_EQ(CL_SUCCESS, err) << "waitForSuccessfulFinish()";
200 }
201 ::std::cerr << "Done" << ::std::endl;
202
203 clEnqueueReadBuffer(base->commandQueues()[0], bufX, CL_TRUE, 0,
204 lengthX * sizeof(*backX), backX, 0,
205 NULL, NULL);
206
207 releaseMemObjects(bufA, bufX);
208
209 /*
210 printf("\n\n clblas result X\n");
211 printf("\nclBlasX\n");
212 printMatrixBlock( clblasColumnMajor, 0, 0, lengthX, 1, lengthX, backX);
213
214 printf("\n\n delta X\n\n");
215 printMatrixBlock( clblasColumnMajor, 0, 0, lengthX, 1, lengthX, deltaX);*/
216
217 // handle lda correctly based on row-major/col-major..
218 compareMatrices<T>( clblasColumnMajor, lengthX , 1, blasX, backX,
219 lengthX, deltaX );
220 deleteBuffers<T>(A, blasX, backX, deltaX);
221 delete[] events;
222 }
223
224 // Instantiate the test
225
TEST_P(TRSV,strsv)226 TEST_P(TRSV, strsv) {
227 TestParams params;
228
229 getParams(¶ms);
230 trsvCorrectnessTest<cl_float>(¶ms);
231 }
232
TEST_P(TRSV,dtrsv)233 TEST_P(TRSV, dtrsv) {
234 TestParams params;
235
236 getParams(¶ms);
237 trsvCorrectnessTest<cl_double>(¶ms);
238 }
239
TEST_P(TRSV,ctrsv)240 TEST_P(TRSV, ctrsv) {
241 TestParams params;
242
243 getParams(¶ms);
244 trsvCorrectnessTest<FloatComplex>(¶ms);
245 }
246
TEST_P(TRSV,ztrsv)247 TEST_P(TRSV, ztrsv) {
248 TestParams params;
249
250 getParams(¶ms);
251 trsvCorrectnessTest<DoubleComplex>(¶ms);
252 }
253