1 /* ************************************************************************
2 * Copyright 2013 Advanced Micro Devices, Inc.
3 *
4 * Licensed under the Apache License, Version 2.0 (the "License");
5 * you may not use this file except in compliance with the License.
6 * You may obtain a copy of the License at
7 *
8 * http://www.apache.org/licenses/LICENSE-2.0
9 *
10 * Unless required by applicable law or agreed to in writing, software
11 * distributed under the License is distributed on an "AS IS" BASIS,
12 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 * See the License for the specific language governing permissions and
14 * limitations under the License.
15 * ************************************************************************/
16
17
18 #include <stdlib.h> // srand()
19 #include <string.h> // memcpy()
20 #include <gtest/gtest.h>
21 #include <clBLAS.h>
22
23 #include <common.h>
24 #include <blas-internal.h>
25 #include <blas-wrapper.h>
26 #include <clBLAS-wrapper.h>
27 #include <BlasBase.h>
28 #include <blas-random.h>
29 #include <her.h>
30
31 static void
releaseMemObjects(cl_mem objA,cl_mem objX)32 releaseMemObjects(cl_mem objA, cl_mem objX)
33 {
34
35 if( objA!=NULL)
36 clReleaseMemObject(objA);
37 if( objX!=NULL)
38 clReleaseMemObject(objX);
39 }
40
41 template <typename T> static void
deleteBuffers(T * A,T * X,T * backA)42 deleteBuffers(T *A, T *X, T *backA)
43 {
44 if(A != NULL)
45 {
46 delete[] A;
47 }
48 if(X != NULL)
49 {
50 delete[] X;
51 }
52 if(backA != NULL)
53 {
54 delete[] backA;
55 }
56 }
57
58 template <typename T>
59 void
herCorrectnessTest(TestParams * params)60 herCorrectnessTest(TestParams *params)
61 {
62 cl_int err;
63 T *A, *X, *backA;
64 T alpha_;
65 cl_mem bufA, bufX;
66 clMath::BlasBase *base;
67 cl_event *events;
68
69 base = clMath::BlasBase::getInstance();
70
71 if ((typeid(T) == typeid(DoubleComplex)) &&
72 !base->isDevSupportDoublePrecision()) {
73
74 std::cerr << ">> WARNING: The target device doesn't support native "
75 "double precision floating point arithmetic" <<
76 std::endl << ">> Test skipped" << std::endl;
77 SUCCEED();
78 return;
79 }
80
81 printf("number of command queues : %d\n\n", params->numCommandQueues);
82
83 events = new cl_event[params->numCommandQueues];
84 memset(events, 0, params->numCommandQueues * sizeof(cl_event));
85
86 size_t lengthA = params->N * params->lda;
87 size_t lengthX = (1 + ((params->N -1) * abs(params->incx)));
88 alpha_ = convertMultiplier<T>(params->alpha);
89
90 A = new T[lengthA + params->offa ];
91 backA = new T[lengthA + params->offa ];
92 X = new T[lengthX + params->offBX ];
93
94 if((A == NULL) || (backA == NULL) || (X == NULL))
95 {
96 ::std::cerr << "Cannot allocate memory on host side\n" << "!!!!!!!!!!!!Test skipped.!!!!!!!!!!!!" << ::std::endl;
97 deleteBuffers<T>(A, X, backA);
98 delete[] events;
99 SUCCEED();
100 return;
101 }
102 srand(params->seed);
103
104 ::std::cerr << "Generating input data... ";
105 randomHerMatrices( params->order, params->uplo, params->N, &alpha_, (A + params->offa), params->lda, (X + params->offBX), params->incx );
106 memcpy(backA, A, (lengthA + params->offa)* sizeof(*A));
107 ::std::cerr << "Done" << ::std::endl;
108
109 // Allocate buffers
110 bufA = base->createEnqueueBuffer(A, (lengthA + params->offa) * sizeof(*A), 0, CL_MEM_READ_WRITE);
111 bufX = base->createEnqueueBuffer(X, (lengthX + params->offBX) * sizeof(*X), 0, CL_MEM_READ_ONLY);
112
113 ::std::cerr << "Calling reference xHER routine... ";
114
115 clblasOrder fOrder;
116 clblasUplo fUplo;
117 fOrder = params->order;
118 fUplo = params->uplo;
119
120 if (fOrder != clblasColumnMajor) {
121
122 doConjugate( (X + params->offBX), (1 + (params->N-1) * abs(params->incx)), 1, 1 );
123 fOrder = clblasColumnMajor;
124 fUplo = (fUplo == clblasLower)? clblasUpper : clblasLower;
125 }
126 clMath::blas::her( fOrder, fUplo, params->N, CREAL(alpha_), X , params->offBX, params->incx, A, params->offa, params->lda );
127 ::std::cerr << "Done" << ::std::endl;
128
129 if ((bufA == NULL) || (bufX == NULL) ) {
130 /* Skip the test, the most probable reason is
131 * matrix too big for a device.
132 */
133 releaseMemObjects(bufA, bufX);
134 deleteBuffers<T>(backA, A, X);
135 delete[] events;
136 if(bufA == NULL)
137 {
138 ::std::cerr << "BufA is null, lengthA is " << lengthA << ::std::endl;
139 }
140 if(bufX == NULL)
141 {
142 ::std::cerr << "BufX is null, lengthX is " << lengthX << ::std::endl;
143 }
144
145 ::std::cerr << ">> Failed to create/enqueue buffer for a matrix."
146 << ::std::endl
147 << ">> Can't execute the test, because data is not transfered to GPU."
148 << ::std::endl
149 << ">> Test skipped." << ::std::endl;
150 SUCCEED();
151 return;
152 }
153
154 ::std::cerr << "Calling clblas xHER routine... ";
155
156 err = (cl_int)::clMath::clblas::her( params->order, params->uplo, params->N, CREAL(alpha_),
157 bufX, params->offBX, params->incx, bufA, params->offa, params->lda,
158 params->numCommandQueues, base->commandQueues(),
159 0, NULL, events);
160
161 if (err != CL_SUCCESS) {
162 releaseMemObjects(bufA, bufX);
163 deleteBuffers<T>(backA, A, X);
164 delete[] events;
165 ASSERT_EQ(CL_SUCCESS, err) << "::clMath::clblas::HER() failed";
166 }
167
168 err = waitForSuccessfulFinish(params->numCommandQueues,
169 base->commandQueues(), events);
170 if (err != CL_SUCCESS) {
171 releaseMemObjects(bufA, bufX);
172 deleteBuffers<T>(backA, A, X);
173 delete[] events;
174 ASSERT_EQ(CL_SUCCESS, err) << "waitForSuccessfulFinish()";
175 }
176 ::std::cerr << "Done" << ::std::endl;
177
178 err = clEnqueueReadBuffer(base->commandQueues()[0], bufA, CL_TRUE, 0,
179 (lengthA + params->offa) * sizeof(*A), backA, 0,
180 NULL, NULL);
181 if (err != CL_SUCCESS)
182 {
183 ::std::cerr << "HER: Reading results failed...." << std::endl;
184 }
185
186 releaseMemObjects(bufA, bufX);
187
188 printf("Comparing the results\n");
189 compareMatrices<T>(params->order, params->N , params->N, (A + params->offa), (backA + params->offa),
190 params->lda);
191
192 deleteBuffers<T>( A, backA, X);
193 delete[] events;
194 }
195
196 // Instantiate the test
197
TEST_P(HER,cher)198 TEST_P(HER, cher) {
199 TestParams params;
200
201 getParams(¶ms);
202 herCorrectnessTest<FloatComplex>(¶ms);
203 }
204
TEST_P(HER,zher)205 TEST_P(HER, zher) {
206 TestParams params;
207
208 getParams(¶ms);
209 herCorrectnessTest<DoubleComplex>(¶ms);
210 }
211