1 /* ************************************************************************
2  * Copyright 2013 Advanced Micro Devices, Inc.
3  *
4  * Licensed under the Apache License, Version 2.0 (the "License");
5  * you may not use this file except in compliance with the License.
6  * You may obtain a copy of the License at
7  *
8  * http://www.apache.org/licenses/LICENSE-2.0
9  *
10  * Unless required by applicable law or agreed to in writing, software
11  * distributed under the License is distributed on an "AS IS" BASIS,
12  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13  * See the License for the specific language governing permissions and
14  * limitations under the License.
15  * ************************************************************************/
16 
17 
18 #include <stdlib.h>             // srand()
19 #include <string.h>             // memcpy()
20 #include <gtest/gtest.h>
21 #include <clBLAS.h>
22 
23 #include <common.h>
24 #include <blas-internal.h>
25 #include <blas-wrapper.h>
26 #include <clBLAS-wrapper.h>
27 #include <BlasBase.h>
28 #include <blas-random.h>
29 #include <hpr.h>
30 
31 static void
releaseMemObjects(cl_mem objA,cl_mem objX)32 releaseMemObjects(cl_mem objA, cl_mem objX)
33 {
34     if( objA!=NULL)
35         clReleaseMemObject(objA);
36     if( objX!=NULL)
37         clReleaseMemObject(objX);
38 }
39 
40 template <typename T> static void
deleteBuffers(T * A,T * X,T * backA)41 deleteBuffers(T *A, T *X, T *backA)
42 {
43 	if(A != NULL)
44 	{
45         delete[] A;
46 	}
47 	if(X != NULL)
48 	{
49         delete[] X;
50 	}
51 	if(backA != NULL)
52 	{
53         delete[] backA;
54     }
55 }
56 
57 template <typename T>
58 void
hprCorrectnessTest(TestParams * params)59 hprCorrectnessTest(TestParams *params)
60 {
61     cl_int err;
62     T *AP, *X, *backA;
63 	T alpha_;
64     cl_mem bufAP, bufX;
65     clMath::BlasBase *base;
66     cl_event *events;
67 
68     base = clMath::BlasBase::getInstance();
69 
70     if ((typeid(T) == typeid(cl_double2)) &&
71         !base->isDevSupportDoublePrecision()) {
72 
73         std::cerr << ">> WARNING: The target device doesn't support native "
74                      "double precision floating point arithmetic" <<
75                      std::endl << ">> Test skipped" << std::endl;
76         SUCCEED();
77         return;
78     }
79 
80     printf("number of command queues : %d\n\n", params->numCommandQueues);
81 
82     events = new cl_event[params->numCommandQueues];
83     memset(events, 0, params->numCommandQueues * sizeof(cl_event));
84 
85     size_t lengthAP = (params->N *( params->N + 1 ))/2 ;
86     size_t lengthX = (1 + ((params->N -1) * abs(params->incx)));
87     alpha_ = convertMultiplier<T>(params->alpha);
88 
89     AP 	    = new T[lengthAP + params->offa ];
90     backA 	= new T[lengthAP + params->offa ];
91     X		= new T[lengthX + params->offBX ];
92 
93 	if((AP == NULL) || (backA == NULL) || (X == NULL))
94     {
95         ::std::cerr << "Cannot allocate memory on host side\n" << "!!!!!!!!!!!!Test skipped.!!!!!!!!!!!!" << ::std::endl;
96         deleteBuffers<T>(AP, X, backA);
97         delete[] events;
98 		SUCCEED();
99         return;
100     }
101     srand(params->seed);
102 
103     ::std::cerr << "Generating input data... ";
104     randomHerMatrices( params->order, params->uplo, params->N, &alpha_, (AP + params->offa), params->lda, (X + params->offBX), params->incx );
105     memcpy(backA, AP, (lengthAP + params->offa)* sizeof(T));
106 	::std::cerr << "Done" << ::std::endl;
107 
108 	// Allocate buffers
109     bufAP = base->createEnqueueBuffer(AP, (lengthAP + params->offa) * sizeof(T), 0, CL_MEM_READ_WRITE);
110     bufX = base->createEnqueueBuffer(X, (lengthX + params->offBX) * sizeof(*X), 0, CL_MEM_READ_ONLY);
111 
112     ::std::cerr << "Calling reference xHPR routine... ";
113 
114     clblasOrder fOrder;
115     clblasUplo fUplo;
116     fOrder = params->order;
117     fUplo = params->uplo;
118 
119     if (fOrder != clblasColumnMajor) {
120 
121         doConjugate( (X + params->offBX), (1 + (params->N-1) * abs(params->incx)), 1, 1 );
122         fOrder = clblasColumnMajor;
123 		fUplo = (fUplo == clblasLower)? clblasUpper : clblasLower;
124 	}
125 	clMath::blas::hpr( fOrder, fUplo, params->N, CREAL(alpha_), X , params->offBX, params->incx, AP, params->offa);
126     ::std::cerr << "Done" << ::std::endl;
127 
128     if ((bufAP == NULL) || (bufX == NULL) ) {
129         /* Skip the test, the most probable reason is
130          *     matrix too big for a device.
131          */
132         releaseMemObjects(bufAP, bufX);
133         deleteBuffers<T>(backA, AP, X);
134         delete[] events;
135 		if(bufAP == NULL)
136 		{
137 			::std::cerr << "BufA is null, lengthA is " << lengthAP << ::std::endl;
138 		}
139 		if(bufX == NULL)
140 		{
141 			::std::cerr << "BufX is null, lengthX is  " << lengthX << ::std::endl;
142 		}
143 
144         ::std::cerr << ">> Failed to create/enqueue buffer for a matrix."
145             << ::std::endl
146             << ">> Can't execute the test, because data is not transfered to GPU."
147             << ::std::endl
148             << ">> Test skipped." << ::std::endl;
149         SUCCEED();
150         return;
151     }
152 
153     ::std::cerr << "Calling clblas xHPR routine... ";
154 
155     err = (cl_int)::clMath::clblas::hpr( params->order, params->uplo, params->N, CREAL(alpha_),
156 						bufX, params->offBX, params->incx, bufAP, params->offa,
157 						params->numCommandQueues, base->commandQueues(),
158     					0, NULL, events);
159 
160     if (err != CL_SUCCESS) {
161         releaseMemObjects(bufAP, bufX);
162         deleteBuffers<T>(backA, AP, X);
163         delete[] events;
164         ASSERT_EQ(CL_SUCCESS, err) << "::clMath::clblas::HPR() failed";
165     }
166 
167     err = waitForSuccessfulFinish(params->numCommandQueues,
168         base->commandQueues(), events);
169     if (err != CL_SUCCESS) {
170         releaseMemObjects(bufAP, bufX);
171         deleteBuffers<T>(backA, AP, X);
172         delete[] events;
173         ASSERT_EQ(CL_SUCCESS, err) << "waitForSuccessfulFinish()";
174     }
175     ::std::cerr << "Done" << ::std::endl;
176 
177     err = clEnqueueReadBuffer(base->commandQueues()[0], bufAP, CL_TRUE, 0,
178         (lengthAP + params->offa) * sizeof(T), backA, 0,
179         NULL, NULL);
180 	if (err != CL_SUCCESS)
181 	{
182 		::std::cerr << "HPR: Reading results failed...." << std::endl;
183 	}
184 
185     releaseMemObjects(bufAP, bufX);
186 
187 	printf("Comparing the results\n");
188 
189     compareMatrices<T>(clblasColumnMajor, lengthAP, 1, (AP + params->offa), (backA + params->offa), lengthAP);
190 
191 	deleteBuffers<T>( AP, backA, X);
192     delete[] events;
193 }
194 
195 // Instantiate the test
196 
TEST_P(HPR,chpr)197 TEST_P(HPR, chpr) {
198     TestParams params;
199 
200     getParams(&params);
201     hprCorrectnessTest<FloatComplex>(&params);
202 }
203 
TEST_P(HPR,zhpr)204 TEST_P(HPR, zhpr) {
205     TestParams params;
206 
207     getParams(&params);
208     hprCorrectnessTest<DoubleComplex>(&params);
209 }
210