1 /* ************************************************************************
2  * Copyright 2013 Advanced Micro Devices, Inc.
3  *
4  * Licensed under the Apache License, Version 2.0 (the "License");
5  * you may not use this file except in compliance with the License.
6  * You may obtain a copy of the License at
7  *
8  * http://www.apache.org/licenses/LICENSE-2.0
9  *
10  * Unless required by applicable law or agreed to in writing, software
11  * distributed under the License is distributed on an "AS IS" BASIS,
12  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13  * See the License for the specific language governing permissions and
14  * limitations under the License.
15  * ************************************************************************/
16 
17 
18 #include <stdlib.h>             // srand()
19 #include <string.h>             // memcpy()
20 #include <gtest/gtest.h>
21 #include <clBLAS.h>
22 
23 #include <common.h>
24 #include <blas-internal.h>
25 #include <blas-wrapper.h>
26 #include <clBLAS-wrapper.h>
27 #include <BlasBase.h>
28 #include <blas-random.h>
29 #include <trsv.h>
30 #include <cltypes.h>
31 
32 #include "trsv-delta.h"
33 
34 static void
releaseMemObjects(cl_mem objA,cl_mem objX)35 releaseMemObjects(cl_mem objA, cl_mem objX)
36 {
37 	if (objA != NULL)
38     clReleaseMemObject(objA);
39 	if (objX != NULL)
40     clReleaseMemObject(objX);
41 }
42 
43 template <typename T> static void
deleteBuffers(T * A,T * blasX,T * backX,cl_double * deltaX)44 deleteBuffers(T *A, T *blasX, T *backX, cl_double *deltaX)
45 {
46     if( A != NULL )
47 	{
48     delete[] A;
49 	}
50 	if( blasX != NULL )
51 	{
52     delete[] blasX;
53 	}
54 	if( backX != NULL )
55 	{
56 		delete[] backX;
57 	}
58 	if( deltaX != NULL )
59 	{
60 	delete[] deltaX;
61 }
62 }
63 
64 template <typename T>
65 void
trsvCorrectnessTest(TestParams * params)66 trsvCorrectnessTest(TestParams *params)
67 {
68     cl_int err;
69     T *A, *blasX, *backX;
70 	cl_double *deltaX;
71     cl_mem bufA, bufX;
72     clMath::BlasBase *base;
73     cl_event *events;
74 
75     base = clMath::BlasBase::getInstance();
76 
77     if ((typeid(T) == typeid(cl_double) ||
78          typeid(T) == typeid(DoubleComplex)) &&
79         !base->isDevSupportDoublePrecision()) {
80 
81         std::cerr << ">> WARNING: The target device doesn't support native "
82                      "double precision floating point arithmetic" <<
83                      std::endl << ">> Test skipped" << std::endl;
84         SUCCEED();
85         return;
86     }
87 
88     events = new cl_event[params->numCommandQueues];
89     memset(events, 0, params->numCommandQueues * sizeof(cl_event));
90 
91     size_t lengthA = params->N * params->lda;
92     size_t lengthX = (1 + ((params->N -1) * abs(params->incx)));
93 
94     A 		= new T[lengthA + params->offa];
95     blasX 	= new T[lengthX + params->offBX];
96     backX 	= new T[lengthX + params->offBX];
97 	deltaX	= new cl_double[lengthX + params->offBX];
98 
99 	if ((A==NULL) || (blasX == NULL) || (backX == NULL) || (deltaX == NULL))
100 	{
101 		::std::cerr << "Unable to allocate matrices in Host memory" << std::endl;
102 		deleteBuffers<T>(A, blasX, backX, deltaX);
103 		delete[] events;
104 		SUCCEED();
105 		return;
106 	}
107 	memset( deltaX, 0, lengthX*sizeof(cl_double) );
108 	memset( blasX, 0, lengthX*sizeof(T) );
109 
110     srand(params->seed);
111 
112     ::std::cerr << "Generating input data... ";
113 
114 	//custom generation function in blas-random.h
115 	randomTrsvMatrices<T>( params->order, params->uplo, params->diag, params->N, (A + params->offa), params->lda, (blasX + params->offBX), params->incx);
116 
117 	// Generate delta X for result comparison
118 	trsvDelta<T>( params->order, params->uplo, params->transA, params->diag, params->N, (A + params->offa), params->lda, (blasX + params->offBX), params->incx, (deltaX + params->offBX) );
119 
120 	/*printf("\n\n before acml call\nA\n");
121 	printMatrixBlock( params->order, 0, 0, params->N, params->N, params->lda, A);
122 	printf("\nX\n");
123 	printMatrixBlock( clblasColumnMajor, 0, 0, lengthX, 1, lengthX, blasX);*/
124 
125     // Copy blasX to clblasX
126     memcpy(backX, blasX, (lengthX + params->offBX) * sizeof(T));
127 	// Allocate buffers
128     bufA = base->createEnqueueBuffer(A, (lengthA + params->offa)* sizeof(T), 0, CL_MEM_READ_ONLY);
129     bufX = base->createEnqueueBuffer(backX, (lengthX + params->offBX)* sizeof(T), 0, CL_MEM_WRITE_ONLY);
130     ::std::cerr << "Done" << ::std::endl;
131 
132     ::std::cerr << "Calling reference xTRSV routine... ";
133 
134     clblasOrder order;
135     clblasUplo fUplo;
136     clblasTranspose fTrans;
137 
138     order = params->order;
139     fUplo = params->uplo;
140     fTrans = params->transA;
141 
142 
143     if (order != clblasColumnMajor)
144     {
145         order = clblasColumnMajor;
146         fUplo =  (params->uplo == clblasUpper)? clblasLower : clblasUpper;
147         fTrans = (params->transA == clblasNoTrans)? clblasTrans : clblasNoTrans;
148 
149         if( params->transA == clblasConjTrans )
150             doConjugate((A + params->offa), params->N, params->N, params->lda );
151     }
152 	::clMath::blas::trsv( order, fUplo, fTrans, params->diag, params->N, A, params->offa, params->lda, blasX, params->offBX, params->incx);
153 	::std::cerr << "Done" << ::std::endl;
154 
155 	/*
156 	printf("\n\n acml result X\n");
157 	printf("\nblasX\n");
158 	printMatrixBlock( clblasColumnMajor, 0, 0, lengthX, 1, lengthX, blasX);*/
159 
160     if ((bufA == NULL) || (bufX == NULL)) {
161         /* Skip the test, the most probable reason is
162          *     matrix too big for a device.
163          */
164         releaseMemObjects(bufA, bufX);
165         deleteBuffers<T>(A, blasX, backX, deltaX);
166         delete[] events;
167         ::std::cerr << ">> Failed to create/enqueue buffer for a matrix."
168             << ::std::endl
169             << ">> Can't execute the test, because data is not transfered to GPU."
170             << ::std::endl
171             << ">> Test skipped." << ::std::endl;
172         SUCCEED();
173         return;
174     }
175 
176     ::std::cerr << "Calling clblas xTRSV routine... ";
177 
178     DataType type;
179     type = ( typeid(T) == typeid(cl_float))? TYPE_FLOAT : ( typeid(T) == typeid(cl_double))? TYPE_DOUBLE: ( typeid(T) == typeid(cl_float2))? TYPE_COMPLEX_FLOAT: TYPE_COMPLEX_DOUBLE;
180 
181     // Should use bufXTemp as well
182     err = (cl_int)::clMath::clblas::trsv(type, params->order, params->uplo, params->transA, params->diag, params->N, bufA,
183     					params->offa, params->lda, bufX, params->offBX, params->incx, params->numCommandQueues, base->commandQueues(),
184     					0, NULL, events);
185 
186     if (err != CL_SUCCESS) {
187 
188         deleteBuffers<T>(A, blasX, backX, deltaX);
189         delete[] events;
190         ASSERT_EQ(CL_SUCCESS, err) << "::clMath::clblas::TRSV() failed";
191     }
192 
193     err = waitForSuccessfulFinish(params->numCommandQueues,
194         base->commandQueues(), events);
195     if (err != CL_SUCCESS) {
196 
197         deleteBuffers<T>(A, blasX, backX, deltaX);
198         delete[] events;
199         ASSERT_EQ(CL_SUCCESS, err) << "waitForSuccessfulFinish()";
200     }
201     ::std::cerr << "Done" << ::std::endl;
202 
203     clEnqueueReadBuffer(base->commandQueues()[0], bufX, CL_TRUE, 0,
204         lengthX * sizeof(*backX), backX, 0,
205         NULL, NULL);
206 
207     releaseMemObjects(bufA, bufX);
208 
209 	/*
210 	printf("\n\n clblas result X\n");
211 	printf("\nclBlasX\n");
212 	printMatrixBlock( clblasColumnMajor, 0, 0, lengthX, 1, lengthX, backX);
213 
214 	printf("\n\n delta X\n\n");
215 	printMatrixBlock( clblasColumnMajor, 0, 0, lengthX, 1, lengthX, deltaX);*/
216 
217     // handle lda correctly based on row-major/col-major..
218     compareMatrices<T>( clblasColumnMajor, lengthX , 1, blasX, backX,
219                        lengthX, deltaX );
220     deleteBuffers<T>(A, blasX, backX, deltaX);
221     delete[] events;
222 }
223 
224 // Instantiate the test
225 
TEST_P(TRSV,strsv)226 TEST_P(TRSV, strsv) {
227     TestParams params;
228 
229     getParams(&params);
230     trsvCorrectnessTest<cl_float>(&params);
231 }
232 
TEST_P(TRSV,dtrsv)233 TEST_P(TRSV, dtrsv) {
234     TestParams params;
235 
236     getParams(&params);
237     trsvCorrectnessTest<cl_double>(&params);
238 }
239 
TEST_P(TRSV,ctrsv)240 TEST_P(TRSV, ctrsv) {
241     TestParams params;
242 
243     getParams(&params);
244     trsvCorrectnessTest<FloatComplex>(&params);
245 }
246 
TEST_P(TRSV,ztrsv)247 TEST_P(TRSV, ztrsv) {
248     TestParams params;
249 
250     getParams(&params);
251     trsvCorrectnessTest<DoubleComplex>(&params);
252 }
253