1 /* ************************************************************************
2  * Copyright 2013 Advanced Micro Devices, Inc.
3  *
4  * Licensed under the Apache License, Version 2.0 (the "License");
5  * you may not use this file except in compliance with the License.
6  * You may obtain a copy of the License at
7  *
8  * http://www.apache.org/licenses/LICENSE-2.0
9  *
10  * Unless required by applicable law or agreed to in writing, software
11  * distributed under the License is distributed on an "AS IS" BASIS,
12  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13  * See the License for the specific language governing permissions and
14  * limitations under the License.
15  * ************************************************************************/
16 
17 
18 #include <stdlib.h>             // srand()
19 #include <string.h>             // memcpy()
20 #include <gtest/gtest.h>
21 #include <clBLAS.h>
22 
23 #include <common.h>
24 #include <blas-internal.h>
25 #include <blas-wrapper.h>
26 #include <clBLAS-wrapper.h>
27 #include <BlasBase.h>
28 #include <blas-random.h>
29 #include <her.h>
30 
31 static void
releaseMemObjects(cl_mem objA,cl_mem objX)32 releaseMemObjects(cl_mem objA, cl_mem objX)
33 {
34 
35     if( objA!=NULL)
36     clReleaseMemObject(objA);
37     if( objX!=NULL)
38     clReleaseMemObject(objX);
39 }
40 
41 template <typename T> static void
deleteBuffers(T * A,T * X,T * backA)42 deleteBuffers(T *A, T *X, T *backA)
43 {
44 	if(A != NULL)
45 	{
46     delete[] A;
47 	}
48 	if(X != NULL)
49 	{
50     delete[] X;
51 	}
52 	if(backA != NULL)
53 	{
54     delete[] backA;
55 }
56 }
57 
58 template <typename T>
59 void
herCorrectnessTest(TestParams * params)60 herCorrectnessTest(TestParams *params)
61 {
62     cl_int err;
63     T *A, *X, *backA;
64 	T alpha_;
65     cl_mem bufA, bufX;
66     clMath::BlasBase *base;
67     cl_event *events;
68 
69     base = clMath::BlasBase::getInstance();
70 
71     if ((typeid(T) == typeid(DoubleComplex)) &&
72         !base->isDevSupportDoublePrecision()) {
73 
74         std::cerr << ">> WARNING: The target device doesn't support native "
75                      "double precision floating point arithmetic" <<
76                      std::endl << ">> Test skipped" << std::endl;
77         SUCCEED();
78         return;
79     }
80 
81     printf("number of command queues : %d\n\n", params->numCommandQueues);
82 
83     events = new cl_event[params->numCommandQueues];
84     memset(events, 0, params->numCommandQueues * sizeof(cl_event));
85 
86     size_t lengthA = params->N * params->lda;
87     size_t lengthX = (1 + ((params->N -1) * abs(params->incx)));
88     alpha_ = convertMultiplier<T>(params->alpha);
89 
90     A 	    = new T[lengthA + params->offa ];
91     backA 	= new T[lengthA + params->offa ];
92     X		= new T[lengthX + params->offBX ];
93 
94 	if((A == NULL) || (backA == NULL) || (X == NULL))
95     {
96         ::std::cerr << "Cannot allocate memory on host side\n" << "!!!!!!!!!!!!Test skipped.!!!!!!!!!!!!" << ::std::endl;
97         deleteBuffers<T>(A, X, backA);
98         delete[] events;
99 		SUCCEED();
100         return;
101     }
102     srand(params->seed);
103 
104     ::std::cerr << "Generating input data... ";
105 	randomHerMatrices( params->order, params->uplo, params->N, &alpha_, (A + params->offa), params->lda, (X + params->offBX), params->incx );
106     memcpy(backA, A, (lengthA + params->offa)* sizeof(*A));
107 	::std::cerr << "Done" << ::std::endl;
108 
109 	// Allocate buffers
110     bufA = base->createEnqueueBuffer(A, (lengthA + params->offa) * sizeof(*A), 0, CL_MEM_READ_WRITE);
111     bufX = base->createEnqueueBuffer(X, (lengthX + params->offBX) * sizeof(*X), 0, CL_MEM_READ_ONLY);
112 
113     ::std::cerr << "Calling reference xHER routine... ";
114 
115     clblasOrder fOrder;
116     clblasUplo fUplo;
117     fOrder = params->order;
118     fUplo = params->uplo;
119 
120     if (fOrder != clblasColumnMajor) {
121 
122         doConjugate( (X + params->offBX), (1 + (params->N-1) * abs(params->incx)), 1, 1 );
123         fOrder = clblasColumnMajor;
124 		fUplo = (fUplo == clblasLower)? clblasUpper : clblasLower;
125 	}
126 	clMath::blas::her( fOrder, fUplo, params->N, CREAL(alpha_), X , params->offBX, params->incx, A, params->offa, params->lda );
127     ::std::cerr << "Done" << ::std::endl;
128 
129     if ((bufA == NULL) || (bufX == NULL) ) {
130         /* Skip the test, the most probable reason is
131          *     matrix too big for a device.
132          */
133         releaseMemObjects(bufA, bufX);
134         deleteBuffers<T>(backA, A, X);
135         delete[] events;
136 		if(bufA == NULL)
137 		{
138 			::std::cerr << "BufA is null, lengthA is " << lengthA << ::std::endl;
139 		}
140 		if(bufX == NULL)
141 		{
142 			::std::cerr << "BufX is null, lengthX is  " << lengthX << ::std::endl;
143 		}
144 
145         ::std::cerr << ">> Failed to create/enqueue buffer for a matrix."
146             << ::std::endl
147             << ">> Can't execute the test, because data is not transfered to GPU."
148             << ::std::endl
149             << ">> Test skipped." << ::std::endl;
150         SUCCEED();
151         return;
152     }
153 
154     ::std::cerr << "Calling clblas xHER routine... ";
155 
156     err = (cl_int)::clMath::clblas::her( params->order, params->uplo, params->N, CREAL(alpha_),
157 						bufX, params->offBX, params->incx, bufA, params->offa, params->lda,
158 						params->numCommandQueues, base->commandQueues(),
159     					0, NULL, events);
160 
161     if (err != CL_SUCCESS) {
162         releaseMemObjects(bufA, bufX);
163         deleteBuffers<T>(backA, A, X);
164         delete[] events;
165         ASSERT_EQ(CL_SUCCESS, err) << "::clMath::clblas::HER() failed";
166     }
167 
168     err = waitForSuccessfulFinish(params->numCommandQueues,
169         base->commandQueues(), events);
170     if (err != CL_SUCCESS) {
171         releaseMemObjects(bufA, bufX);
172         deleteBuffers<T>(backA, A, X);
173         delete[] events;
174         ASSERT_EQ(CL_SUCCESS, err) << "waitForSuccessfulFinish()";
175     }
176     ::std::cerr << "Done" << ::std::endl;
177 
178     err = clEnqueueReadBuffer(base->commandQueues()[0], bufA, CL_TRUE, 0,
179         (lengthA + params->offa) * sizeof(*A), backA, 0,
180         NULL, NULL);
181 	if (err != CL_SUCCESS)
182 	{
183 		::std::cerr << "HER: Reading results failed...." << std::endl;
184 	}
185 
186     releaseMemObjects(bufA, bufX);
187 
188 	printf("Comparing the results\n");
189 	compareMatrices<T>(params->order, params->N , params->N, (A + params->offa), (backA + params->offa),
190                        params->lda);
191 
192 	deleteBuffers<T>( A, backA, X);
193     delete[] events;
194 }
195 
196 // Instantiate the test
197 
TEST_P(HER,cher)198 TEST_P(HER, cher) {
199     TestParams params;
200 
201     getParams(&params);
202     herCorrectnessTest<FloatComplex>(&params);
203 }
204 
TEST_P(HER,zher)205 TEST_P(HER, zher) {
206     TestParams params;
207 
208     getParams(&params);
209     herCorrectnessTest<DoubleComplex>(&params);
210 }
211