1 /* ************************************************************************
2 * Copyright 2013 Advanced Micro Devices, Inc.
3 *
4 * Licensed under the Apache License, Version 2.0 (the "License");
5 * you may not use this file except in compliance with the License.
6 * You may obtain a copy of the License at
7 *
8 * http://www.apache.org/licenses/LICENSE-2.0
9 *
10 * Unless required by applicable law or agreed to in writing, software
11 * distributed under the License is distributed on an "AS IS" BASIS,
12 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 * See the License for the specific language governing permissions and
14 * limitations under the License.
15 * ************************************************************************/
16
17
18 // $Id
19
20 #ifndef CLBLAS_BENCHMARK_XSYR_HXX__
21 #define CLBLAS_BENCHMARK_XSYR_HXX__
22
23 #include "clfunc_common.hpp"
24
25 template <typename T>
26 struct xSyrBuffer
27 {
28 clblasOrder order;
29 clblasUplo uplo;
30 size_t N;
31 T alpha;
32 T* cpuX;
33 cl_mem X;
34 size_t offx;
35 int incx;
36 T* cpuA;
37 cl_mem A;
38 size_t offa;
39 size_t lda;
40 }; // struct buffer
41
42 template <typename T>
43 class xSyr : public clblasFunc
44 {
45 public:
xSyr(StatisticalTimer & timer,cl_device_type devType)46 xSyr(StatisticalTimer& timer, cl_device_type devType) : clblasFunc(timer, devType)
47 {
48 timer.getUniqueID("clSyr", 0);
49 }
50
~xSyr()51 ~xSyr()
52 {
53 delete buffer.cpuA;
54 delete buffer.cpuX;
55 OPENCL_V_THROW( clReleaseMemObject(buffer.A), "releasing buffer A");
56 OPENCL_V_THROW( clReleaseMemObject(buffer.X), "releasing buffer C");
57 }
58
gflops()59 double gflops()
60 {
61 return static_cast<double>((buffer.N * buffer.N)/time_in_ns());
62 }
63
gflops_formula()64 std::string gflops_formula()
65 {
66 return "N*N/time";
67 }
68
69 void setup_buffer(int order_option, int side_option, int
70 uplo_option, int diag_option, int transA_option, int
71 transB_option, size_t M, size_t N, size_t K,
72 size_t lda, size_t ldb, size_t ldc,size_t offA,
73 size_t offB, size_t offC, double alpha,
74 double beta);
75 void initialize_cpu_buffer();
76 void initialize_gpu_buffer();
77 void reset_gpu_write_buffer();
78 void call_func();
read_gpu_buffer()79 void read_gpu_buffer()
80 {
81 //cl_int err;
82 //to-do need to fill up
83 }
roundtrip_func()84 void roundtrip_func()
85 {//to-do need to fill up
86 }
roundtrip_setup_buffer(int order_option,int side_option,int uplo_option,int diag_option,int transA_option,int transB_option,size_t M,size_t N,size_t K,size_t lda,size_t ldb,size_t ldc,size_t offA,size_t offBX,size_t offCY,double alpha,double beta)87 void roundtrip_setup_buffer(int order_option, int side_option, int uplo_option,
88 int diag_option, int transA_option, int transB_option,
89 size_t M, size_t N, size_t K, size_t lda, size_t ldb,
90 size_t ldc, size_t offA, size_t offBX, size_t offCY,
91 double alpha, double beta)
92 {}
releaseGPUBuffer_deleteCPUBuffer()93 void releaseGPUBuffer_deleteCPUBuffer()
94 {
95 //this is necessary since we are running a iteration of tests and calculate the average time. (in client.cpp)
96 //need to do this before we eventually hit the destructor
97 //to-do
98 }
99
100 protected:
101 protected:
initialize_scalars(double alpha,double beta)102 void initialize_scalars(double alpha, double beta)
103 {
104 buffer.alpha = alpha;
105 }
106
107 private:
108 xSyrBuffer<T> buffer;
109 };
110
111 template <typename T>
setup_buffer(int order_option,int side_option,int uplo_option,int diag_option,int transA_option,int transB_option,size_t M,size_t N,size_t K,size_t lda,size_t ldb,size_t ldc,size_t offA,size_t offB,size_t offC,double alpha,double beta)112 void xSyr<T>::setup_buffer(int order_option, int side_option, int
113 uplo_option, int diag_option, int transA_option, int
114 transB_option, size_t M, size_t N, size_t K,
115 size_t lda, size_t ldb, size_t ldc,size_t offA,
116 size_t offB, size_t offC, double alpha,
117 double beta)
118 {
119 initialize_scalars(alpha, beta);
120 buffer.offa = offA;
121 buffer.offx = offB;
122 buffer.incx = 1;
123 buffer.N = M;
124 if (order_option == 0)
125 {
126 buffer.order = clblasRowMajor;
127 }
128 else
129 {
130 buffer.order = clblasColumnMajor;
131 }
132 if (uplo_option == 0)
133 {
134 buffer.uplo = clblasUpper;
135 }
136 else
137 {
138 buffer.uplo = clblasLower;
139 }
140 if (lda == 0)
141 {
142 buffer.lda = buffer.N;
143 }
144 else if (lda < buffer.N)
145 {
146 std::cerr << "lda:wrong size\n";
147 exit(1);
148 }
149 else
150 {
151 buffer.lda = lda;
152 }
153 buffer.cpuX = new T[buffer.N];
154 buffer.cpuA = new T[buffer.N * buffer.lda];
155 cl_int err;
156 buffer.A = clCreateBuffer(ctx_, CL_MEM_READ_ONLY,
157 buffer.N * buffer.lda*sizeof(T),
158 NULL, &err);
159
160 buffer.X = clCreateBuffer(ctx_, CL_MEM_READ_WRITE,
161 buffer.N*sizeof(T),
162 NULL, &err);
163 }
164
165 template <typename T>
initialize_cpu_buffer()166 void xSyr<T>::initialize_cpu_buffer()
167 {
168 srand(10);
169 for (size_t i = 0; i < buffer.N; ++i)
170 {
171 for (size_t j = 0; j < buffer.lda; ++j)
172 {
173 buffer.cpuA[i*buffer.lda+j] = random<T>(UPPER_BOUND<T>()) /
174 randomScale<T>();
175 }
176 }
177
178 for (size_t i = 0; i < buffer.N; ++i)
179 {
180 buffer.cpuX[i] = random<T>(UPPER_BOUND<T>()) /
181 randomScale<T>();
182 }
183 }
184
185 template <typename T>
initialize_gpu_buffer()186 void xSyr<T>::initialize_gpu_buffer()
187 {
188 cl_int err;
189
190 err = clEnqueueWriteBuffer(queues_[0], buffer.A, CL_TRUE,
191 buffer.offa * sizeof(T),
192 buffer.N * buffer.lda*sizeof(T),
193 buffer.cpuA, 0, NULL, NULL);
194
195 err = clEnqueueWriteBuffer(queues_[0], buffer.X, CL_TRUE, 0,
196 buffer.N*sizeof(T),
197 buffer.cpuX, 0, NULL, NULL);
198 }
199
200 template <typename T>
reset_gpu_write_buffer()201 void xSyr<T>::reset_gpu_write_buffer()
202 {
203 cl_int err;
204 err = clEnqueueWriteBuffer(queues_[0], buffer.A, CL_TRUE,
205 buffer.offa * sizeof(T),
206 buffer.N * buffer.lda*sizeof(T),
207 buffer.cpuA, 0, NULL, NULL);;
208 }
209
210 template <>
call_func()211 void xSyr<cl_float>::call_func()
212 {
213 timer.Start(timer_id);
214 clblasSsyr(buffer.order, buffer.uplo, buffer.N, buffer.alpha, buffer.X, buffer.offx,
215 buffer.incx, buffer.A, buffer.offa, buffer.lda, numQueues, queues_, 0, NULL,&event_);
216 clWaitForEvents(1, &event_);
217 timer.Stop(timer_id);
218 }
219
220 template <>
call_func()221 void xSyr<cl_double>::call_func()
222 {
223 timer.Start(timer_id);
224 clblasSsyr(buffer.order, buffer.uplo, buffer.N, buffer.alpha, buffer.X, buffer.offx,
225 buffer.incx, buffer.A, buffer.offa, buffer.lda, numQueues, queues_, 0, NULL,&event_);
226 clWaitForEvents(1, &event_);
227 timer.Stop(timer_id);
228 }
229
230 #endif // ifndef CLBLAS_BENCHMARK_XSYR_HXX__