1 /* ************************************************************************
2  * Copyright 2013 Advanced Micro Devices, Inc.
3  *
4  * Licensed under the Apache License, Version 2.0 (the "License");
5  * you may not use this file except in compliance with the License.
6  * You may obtain a copy of the License at
7  *
8  * http://www.apache.org/licenses/LICENSE-2.0
9  *
10  * Unless required by applicable law or agreed to in writing, software
11  * distributed under the License is distributed on an "AS IS" BASIS,
12  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13  * See the License for the specific language governing permissions and
14  * limitations under the License.
15  * ************************************************************************/
16 
17 
18 // $Id
19 
20 #ifndef CLBLAS_BENCHMARK_XSYR_HXX__
21 #define CLBLAS_BENCHMARK_XSYR_HXX__
22 
23 #include "clfunc_common.hpp"
24 
25 template <typename T>
26 struct xSyrBuffer
27 {
28 	clblasOrder order;
29   clblasUplo uplo;
30   size_t N;
31   T alpha;
32   T* cpuX;
33   cl_mem X;
34   size_t offx;
35   int incx;
36   T* cpuA;
37   cl_mem A;
38   size_t offa;
39   size_t lda;
40 }; // struct buffer
41 
42 template <typename T>
43 class xSyr : public clblasFunc
44 {
45 public:
xSyr(StatisticalTimer & timer,cl_device_type devType)46   xSyr(StatisticalTimer& timer, cl_device_type devType) : clblasFunc(timer,  devType)
47   {
48     timer.getUniqueID("clSyr", 0);
49   }
50 
~xSyr()51   ~xSyr()
52   {
53     delete buffer.cpuA;
54     delete buffer.cpuX;
55     OPENCL_V_THROW( clReleaseMemObject(buffer.A), "releasing buffer A");
56     OPENCL_V_THROW( clReleaseMemObject(buffer.X), "releasing buffer C");
57   }
58 
gflops()59   double gflops()
60   {
61     return static_cast<double>((buffer.N * buffer.N)/time_in_ns());
62   }
63 
gflops_formula()64   std::string gflops_formula()
65   {
66     return "N*N/time";
67   }
68 
69   void setup_buffer(int order_option, int side_option, int
70                     uplo_option, int diag_option, int transA_option, int
71                     transB_option, size_t M, size_t N, size_t K,
72                     size_t lda, size_t ldb, size_t ldc,size_t offA,
73 					          size_t offB, size_t offC, double alpha,
74                     double beta);
75   void initialize_cpu_buffer();
76   void initialize_gpu_buffer();
77   void reset_gpu_write_buffer();
78   void call_func();
read_gpu_buffer()79   	void read_gpu_buffer()
80 	{
81 		//cl_int err;
82 		//to-do need to fill up
83 	}
roundtrip_func()84 	void roundtrip_func()
85 	{//to-do need to fill up
86 	}
roundtrip_setup_buffer(int order_option,int side_option,int uplo_option,int diag_option,int transA_option,int transB_option,size_t M,size_t N,size_t K,size_t lda,size_t ldb,size_t ldc,size_t offA,size_t offBX,size_t offCY,double alpha,double beta)87 	void roundtrip_setup_buffer(int order_option, int side_option, int uplo_option,
88                       int diag_option, int transA_option, int  transB_option,
89                       size_t M, size_t N, size_t K, size_t lda, size_t ldb,
90                       size_t ldc, size_t offA, size_t offBX, size_t offCY,
91                       double alpha, double beta)
92 		{}
releaseGPUBuffer_deleteCPUBuffer()93 	void releaseGPUBuffer_deleteCPUBuffer()
94 	{
95 		//this is necessary since we are running a iteration of tests and calculate the average time. (in client.cpp)
96 		//need to do this before we eventually hit the destructor
97         //to-do
98 	}
99 
100 protected:
101 protected:
initialize_scalars(double alpha,double beta)102   void initialize_scalars(double alpha, double beta)
103   {
104     buffer.alpha = alpha;
105   }
106 
107 private:
108   xSyrBuffer<T> buffer;
109 };
110 
111 template <typename T>
setup_buffer(int order_option,int side_option,int uplo_option,int diag_option,int transA_option,int transB_option,size_t M,size_t N,size_t K,size_t lda,size_t ldb,size_t ldc,size_t offA,size_t offB,size_t offC,double alpha,double beta)112 void xSyr<T>::setup_buffer(int order_option, int side_option, int
113                     uplo_option, int diag_option, int transA_option, int
114                     transB_option, size_t M, size_t N, size_t K,
115                     size_t lda, size_t ldb, size_t ldc,size_t offA,
116 					          size_t offB, size_t offC, double alpha,
117                     double beta)
118 {
119   initialize_scalars(alpha, beta);
120   buffer.offa = offA;
121   buffer.offx = offB;
122   buffer.incx = 1;
123   buffer.N = M;
124   if (order_option == 0)
125   {
126   buffer.order = clblasRowMajor;
127   }
128   else
129   {
130   buffer.order = clblasColumnMajor;
131   }
132   if (uplo_option == 0)
133   {
134       buffer.uplo = clblasUpper;
135   }
136   else
137   {
138       buffer.uplo = clblasLower;
139   }
140   if (lda == 0)
141   {
142     buffer.lda = buffer.N;
143   }
144   else if (lda < buffer.N)
145   {
146     std::cerr << "lda:wrong size\n";
147     exit(1);
148   }
149   else
150   {
151     buffer.lda = lda;
152   }
153   buffer.cpuX = new T[buffer.N];
154   buffer.cpuA = new T[buffer.N * buffer.lda];
155   cl_int err;
156   buffer.A = clCreateBuffer(ctx_, CL_MEM_READ_ONLY,
157                                 buffer.N * buffer.lda*sizeof(T),
158                                 NULL, &err);
159 
160   buffer.X = clCreateBuffer(ctx_, CL_MEM_READ_WRITE,
161                                     buffer.N*sizeof(T),
162                                     NULL, &err);
163 }
164 
165 template <typename T>
initialize_cpu_buffer()166 void xSyr<T>::initialize_cpu_buffer()
167 {
168   srand(10);
169   for (size_t i = 0; i < buffer.N; ++i)
170   {
171     for (size_t j = 0; j < buffer.lda; ++j)
172     {
173         buffer.cpuA[i*buffer.lda+j] = random<T>(UPPER_BOUND<T>()) /
174                                       randomScale<T>();
175     }
176   }
177 
178   for (size_t i = 0; i < buffer.N; ++i)
179   {
180     buffer.cpuX[i] = random<T>(UPPER_BOUND<T>()) /
181                                       randomScale<T>();
182   }
183 }
184 
185 template <typename T>
initialize_gpu_buffer()186 void xSyr<T>::initialize_gpu_buffer()
187 {
188   cl_int err;
189 
190   err = clEnqueueWriteBuffer(queues_[0], buffer.A, CL_TRUE,
191                               buffer.offa * sizeof(T),
192                               buffer.N * buffer.lda*sizeof(T),
193                               buffer.cpuA, 0, NULL, NULL);
194 
195   err = clEnqueueWriteBuffer(queues_[0], buffer.X, CL_TRUE, 0,
196                               buffer.N*sizeof(T),
197                               buffer.cpuX, 0, NULL, NULL);
198 }
199 
200 template <typename T>
reset_gpu_write_buffer()201 void xSyr<T>::reset_gpu_write_buffer()
202 {
203   cl_int err;
204   err = clEnqueueWriteBuffer(queues_[0], buffer.A, CL_TRUE,
205                               buffer.offa * sizeof(T),
206                               buffer.N * buffer.lda*sizeof(T),
207                               buffer.cpuA, 0, NULL, NULL);;
208 }
209 
210 template <>
call_func()211 void xSyr<cl_float>::call_func()
212 {
213   timer.Start(timer_id);
214   clblasSsyr(buffer.order, buffer.uplo, buffer.N, buffer.alpha, buffer.X, buffer.offx,
215               buffer.incx, buffer.A, buffer.offa, buffer.lda, numQueues, queues_, 0, NULL,&event_);
216   clWaitForEvents(1, &event_);
217   timer.Stop(timer_id);
218 }
219 
220 template <>
call_func()221 void xSyr<cl_double>::call_func()
222 {
223   timer.Start(timer_id);
224   clblasSsyr(buffer.order, buffer.uplo, buffer.N, buffer.alpha, buffer.X, buffer.offx,
225               buffer.incx, buffer.A, buffer.offa, buffer.lda, numQueues, queues_, 0, NULL,&event_);
226   clWaitForEvents(1, &event_);
227   timer.Stop(timer_id);
228 }
229 
230 #endif // ifndef CLBLAS_BENCHMARK_XSYR_HXX__