1 /* ************************************************************************
2 * Copyright 2013 Advanced Micro Devices, Inc.
3 *
4 * Licensed under the Apache License, Version 2.0 (the "License");
5 * you may not use this file except in compliance with the License.
6 * You may obtain a copy of the License at
7 *
8 * http://www.apache.org/licenses/LICENSE-2.0
9 *
10 * Unless required by applicable law or agreed to in writing, software
11 * distributed under the License is distributed on an "AS IS" BASIS,
12 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 * See the License for the specific language governing permissions and
14 * limitations under the License.
15 * ************************************************************************/
16
17
18 #include <stdlib.h> // srand()
19 #include <string.h> // memcpy()
20 #include <gtest/gtest.h>
21 #include <clBLAS.h>
22
23 #include <common.h>
24 #include <blas-internal.h>
25 #include <blas-wrapper.h>
26 #include <clBLAS-wrapper.h>
27 #include <BlasBase.h>
28 #include <blas-random.h>
29 #include <hpr.h>
30
31 static void
releaseMemObjects(cl_mem objA,cl_mem objX)32 releaseMemObjects(cl_mem objA, cl_mem objX)
33 {
34 if( objA!=NULL)
35 clReleaseMemObject(objA);
36 if( objX!=NULL)
37 clReleaseMemObject(objX);
38 }
39
40 template <typename T> static void
deleteBuffers(T * A,T * X,T * backA)41 deleteBuffers(T *A, T *X, T *backA)
42 {
43 if(A != NULL)
44 {
45 delete[] A;
46 }
47 if(X != NULL)
48 {
49 delete[] X;
50 }
51 if(backA != NULL)
52 {
53 delete[] backA;
54 }
55 }
56
57 template <typename T>
58 void
hprCorrectnessTest(TestParams * params)59 hprCorrectnessTest(TestParams *params)
60 {
61 cl_int err;
62 T *AP, *X, *backA;
63 T alpha_;
64 cl_mem bufAP, bufX;
65 clMath::BlasBase *base;
66 cl_event *events;
67
68 base = clMath::BlasBase::getInstance();
69
70 if ((typeid(T) == typeid(cl_double2)) &&
71 !base->isDevSupportDoublePrecision()) {
72
73 std::cerr << ">> WARNING: The target device doesn't support native "
74 "double precision floating point arithmetic" <<
75 std::endl << ">> Test skipped" << std::endl;
76 SUCCEED();
77 return;
78 }
79
80 printf("number of command queues : %d\n\n", params->numCommandQueues);
81
82 events = new cl_event[params->numCommandQueues];
83 memset(events, 0, params->numCommandQueues * sizeof(cl_event));
84
85 size_t lengthAP = (params->N *( params->N + 1 ))/2 ;
86 size_t lengthX = (1 + ((params->N -1) * abs(params->incx)));
87 alpha_ = convertMultiplier<T>(params->alpha);
88
89 AP = new T[lengthAP + params->offa ];
90 backA = new T[lengthAP + params->offa ];
91 X = new T[lengthX + params->offBX ];
92
93 if((AP == NULL) || (backA == NULL) || (X == NULL))
94 {
95 ::std::cerr << "Cannot allocate memory on host side\n" << "!!!!!!!!!!!!Test skipped.!!!!!!!!!!!!" << ::std::endl;
96 deleteBuffers<T>(AP, X, backA);
97 delete[] events;
98 SUCCEED();
99 return;
100 }
101 srand(params->seed);
102
103 ::std::cerr << "Generating input data... ";
104 randomHerMatrices( params->order, params->uplo, params->N, &alpha_, (AP + params->offa), params->lda, (X + params->offBX), params->incx );
105 memcpy(backA, AP, (lengthAP + params->offa)* sizeof(T));
106 ::std::cerr << "Done" << ::std::endl;
107
108 // Allocate buffers
109 bufAP = base->createEnqueueBuffer(AP, (lengthAP + params->offa) * sizeof(T), 0, CL_MEM_READ_WRITE);
110 bufX = base->createEnqueueBuffer(X, (lengthX + params->offBX) * sizeof(*X), 0, CL_MEM_READ_ONLY);
111
112 ::std::cerr << "Calling reference xHPR routine... ";
113
114 clblasOrder fOrder;
115 clblasUplo fUplo;
116 fOrder = params->order;
117 fUplo = params->uplo;
118
119 if (fOrder != clblasColumnMajor) {
120
121 doConjugate( (X + params->offBX), (1 + (params->N-1) * abs(params->incx)), 1, 1 );
122 fOrder = clblasColumnMajor;
123 fUplo = (fUplo == clblasLower)? clblasUpper : clblasLower;
124 }
125 clMath::blas::hpr( fOrder, fUplo, params->N, CREAL(alpha_), X , params->offBX, params->incx, AP, params->offa);
126 ::std::cerr << "Done" << ::std::endl;
127
128 if ((bufAP == NULL) || (bufX == NULL) ) {
129 /* Skip the test, the most probable reason is
130 * matrix too big for a device.
131 */
132 releaseMemObjects(bufAP, bufX);
133 deleteBuffers<T>(backA, AP, X);
134 delete[] events;
135 if(bufAP == NULL)
136 {
137 ::std::cerr << "BufA is null, lengthA is " << lengthAP << ::std::endl;
138 }
139 if(bufX == NULL)
140 {
141 ::std::cerr << "BufX is null, lengthX is " << lengthX << ::std::endl;
142 }
143
144 ::std::cerr << ">> Failed to create/enqueue buffer for a matrix."
145 << ::std::endl
146 << ">> Can't execute the test, because data is not transfered to GPU."
147 << ::std::endl
148 << ">> Test skipped." << ::std::endl;
149 SUCCEED();
150 return;
151 }
152
153 ::std::cerr << "Calling clblas xHPR routine... ";
154
155 err = (cl_int)::clMath::clblas::hpr( params->order, params->uplo, params->N, CREAL(alpha_),
156 bufX, params->offBX, params->incx, bufAP, params->offa,
157 params->numCommandQueues, base->commandQueues(),
158 0, NULL, events);
159
160 if (err != CL_SUCCESS) {
161 releaseMemObjects(bufAP, bufX);
162 deleteBuffers<T>(backA, AP, X);
163 delete[] events;
164 ASSERT_EQ(CL_SUCCESS, err) << "::clMath::clblas::HPR() failed";
165 }
166
167 err = waitForSuccessfulFinish(params->numCommandQueues,
168 base->commandQueues(), events);
169 if (err != CL_SUCCESS) {
170 releaseMemObjects(bufAP, bufX);
171 deleteBuffers<T>(backA, AP, X);
172 delete[] events;
173 ASSERT_EQ(CL_SUCCESS, err) << "waitForSuccessfulFinish()";
174 }
175 ::std::cerr << "Done" << ::std::endl;
176
177 err = clEnqueueReadBuffer(base->commandQueues()[0], bufAP, CL_TRUE, 0,
178 (lengthAP + params->offa) * sizeof(T), backA, 0,
179 NULL, NULL);
180 if (err != CL_SUCCESS)
181 {
182 ::std::cerr << "HPR: Reading results failed...." << std::endl;
183 }
184
185 releaseMemObjects(bufAP, bufX);
186
187 printf("Comparing the results\n");
188
189 compareMatrices<T>(clblasColumnMajor, lengthAP, 1, (AP + params->offa), (backA + params->offa), lengthAP);
190
191 deleteBuffers<T>( AP, backA, X);
192 delete[] events;
193 }
194
195 // Instantiate the test
196
TEST_P(HPR,chpr)197 TEST_P(HPR, chpr) {
198 TestParams params;
199
200 getParams(¶ms);
201 hprCorrectnessTest<FloatComplex>(¶ms);
202 }
203
TEST_P(HPR,zhpr)204 TEST_P(HPR, zhpr) {
205 TestParams params;
206
207 getParams(¶ms);
208 hprCorrectnessTest<DoubleComplex>(¶ms);
209 }
210