1 // Boost.uBLAS
2 //
3 // Copyright (c) 2018 Fady Essam
4 // Copyright (c) 2018 Stefan Seefeld
5 //
6 // Distributed under the Boost Software License, Version 1.0.
7 // (See accompanying file LICENSE_1_0.txt or
8 // copy at http://www.boost.org/LICENSE_1_0.txt)
9 
10 #ifndef boost_numeric_ublas_opencl_transpose_hpp_
11 #define boost_numeric_ublas_opencl_transpose_hpp_
12 
13 #include <boost/numeric/ublas/opencl/library.hpp>
14 #include <boost/numeric/ublas/opencl/vector.hpp>
15 #include <boost/numeric/ublas/opencl/matrix.hpp>
16 
17 // Kernel for transposition of various data types
18 #define OPENCL_TRANSPOSITION_KERNEL(DATA_TYPE)	\
19 "__kernel void transpose(__global "  #DATA_TYPE "* in, __global " #DATA_TYPE "* result, unsigned int width, unsigned int height) \n"                       \
20 "{ \n"								        \
21 "  unsigned int column_index = get_global_id(0); \n"			\
22 "  unsigned int row_index = get_global_id(1); \n"			\
23 "  if (column_index < width && row_index < height) \n"			\
24 "  { \n"							      	\
25 "    unsigned int index_in = column_index + width * row_index; \n"	\
26 "    unsigned int index_result = row_index + height * column_index; \n"	\
27 "    result[index_result] = in[index_in]; \n"				\
28 "  } \n"								\
29 "} \n"
30 
31 
32 namespace boost { namespace numeric { namespace ublas { namespace opencl {
33 
34 template<class T, class L1, class L2>
35 typename std::enable_if<is_numeric<T>::value>::type
change_layout(ublas::matrix<T,L1,opencl::storage> const & m,ublas::matrix<T,L2,opencl::storage> & result,compute::command_queue & queue)36 change_layout(ublas::matrix<T, L1, opencl::storage> const &m,
37 	      ublas::matrix<T, L2, opencl::storage> &result,
38 	      compute::command_queue& queue)
39 {
40   assert(m.size1() == result.size1() && m.size2() == result.size2());
41   assert(m.device() == result.device() && m.device() == queue.get_device());
42   assert(!(std::is_same<L1, L2>::value));
43   char const *kernel;
44   if (std::is_same<T, float>::value)
45     kernel = OPENCL_TRANSPOSITION_KERNEL(float);
46   else if (std::is_same<T, double>::value)
47     kernel = OPENCL_TRANSPOSITION_KERNEL(double);
48   else if (std::is_same<T, std::complex<float>>::value)
49     kernel = OPENCL_TRANSPOSITION_KERNEL(float2);
50   else if (std::is_same<T, std::complex<double>>::value)
51     kernel = OPENCL_TRANSPOSITION_KERNEL(double2);
52   size_t len = strlen(kernel);
53   cl_int err;
54   cl_context c_context = queue.get_context().get();
55   cl_program program = clCreateProgramWithSource(c_context, 1, &kernel, &len, &err);
56   clBuildProgram(program, 1, &queue.get_device().get(), NULL, NULL, NULL);
57   cl_kernel c_kernel = clCreateKernel(program, "transpose", &err);
58   size_t width = std::is_same < L1, ublas::basic_row_major<>>::value ? m.size2() : m.size1();
59   size_t height = std::is_same < L1, ublas::basic_row_major<>>::value ? m.size1() : m.size2();
60   size_t global_size[2] = { width , height };
61   clSetKernelArg(c_kernel, 0, sizeof(T*), &m.begin().get_buffer().get());
62   clSetKernelArg(c_kernel, 1, sizeof(T*), &result.begin().get_buffer().get());
63   clSetKernelArg(c_kernel, 2, sizeof(unsigned int), &width);
64   clSetKernelArg(c_kernel, 3, sizeof(unsigned int), &height);
65   cl_command_queue c_queue = queue.get();
66   cl_event event = NULL;
67   clEnqueueNDRangeKernel(c_queue, c_kernel, 2, NULL, global_size, NULL, 0, NULL, &event);
68   clWaitForEvents(1, &event);
69 }
70 
71 template<class T, class L1, class L2, class A>
72 typename std::enable_if<is_numeric<T>::value>::type
change_layout(ublas::matrix<T,L1,A> const & m,ublas::matrix<T,L2,A> & result,compute::command_queue & queue)73 change_layout(ublas::matrix<T, L1, A> const &m,
74 	      ublas::matrix<T, L2, A> &result,
75 	      compute::command_queue& queue)
76 {
77   ublas::matrix<T, L1, opencl::storage> mdev(m, queue);
78   ublas::matrix<T, L2, opencl::storage> rdev(result.size1(), result.size2(), queue.get_context());
79   change_layout(mdev, rdev, queue);
80   rdev.to_host(result, queue);
81 }
82 
83 template<class T, class L>
84 typename std::enable_if<is_numeric<T>::value>::type
trans(ublas::matrix<T,L,opencl::storage> const & m,ublas::matrix<T,L,opencl::storage> & result,compute::command_queue & queue)85 trans(ublas::matrix<T, L, opencl::storage> const &m,
86       ublas::matrix<T, L, opencl::storage> &result,
87       compute::command_queue& queue)
88 {
89   assert(m.size1() == result.size2() && m.size2() == result.size1());
90   assert(m.device() == result.device() && m.device() == queue.get_device());
91   char const *kernel;
92   if (std::is_same<T, float>::value)
93     kernel = OPENCL_TRANSPOSITION_KERNEL(float);
94   else if (std::is_same<T, double>::value)
95     kernel = OPENCL_TRANSPOSITION_KERNEL(double);
96   else if (std::is_same<T, std::complex<float>>::value)
97     kernel = OPENCL_TRANSPOSITION_KERNEL(float2);
98   else if (std::is_same<T, std::complex<double>>::value)
99     kernel = OPENCL_TRANSPOSITION_KERNEL(double2);
100   size_t len = strlen(kernel);
101   cl_int err;
102   cl_context c_context = queue.get_context().get();
103   cl_program program = clCreateProgramWithSource(c_context, 1, &kernel, &len, &err);
104   clBuildProgram(program, 1, &queue.get_device().get(), NULL, NULL, NULL);
105   cl_kernel c_kernel = clCreateKernel(program, "transpose", &err);
106   size_t width = std::is_same <L, ublas::basic_row_major<>>::value ? m.size2() : m.size1();
107   size_t height = std::is_same <L, ublas::basic_row_major<>>::value ? m.size1() : m.size2();
108   size_t global_size[2] = { width , height };
109   clSetKernelArg(c_kernel, 0, sizeof(T*), &m.begin().get_buffer().get());
110   clSetKernelArg(c_kernel, 1, sizeof(T*), &result.begin().get_buffer().get());
111   clSetKernelArg(c_kernel, 2, sizeof(unsigned int), &width);
112   clSetKernelArg(c_kernel, 3, sizeof(unsigned int), &height);
113   cl_command_queue c_queue = queue.get();
114   cl_event event = NULL;
115   clEnqueueNDRangeKernel(c_queue, c_kernel, 2, NULL, global_size, NULL, 0, NULL, &event);
116   clWaitForEvents(1, &event);
117 }
118 
119 template<class T, class L, class A>
120 typename std::enable_if<is_numeric<T>::value>::type
trans(ublas::matrix<T,L,A> const & m,ublas::matrix<T,L,A> & result,compute::command_queue & queue)121 trans(ublas::matrix<T, L, A> const &m,
122       ublas::matrix<T, L, A> &result,
123       compute::command_queue& queue)
124 {
125   ublas::matrix<T, L, opencl::storage> mdev(m, queue);
126   ublas::matrix<T, L, opencl::storage> rdev(result.size1(), result.size2(), queue.get_context());
127   trans(mdev, rdev, queue);
128   rdev.to_host(result, queue);
129 }
130 
131 template<class T, class L, class A>
132 typename std::enable_if<is_numeric<T>::value, ublas::matrix<T, L, A>>::type
trans(ublas::matrix<T,L,A> & m,compute::command_queue & queue)133 trans(ublas::matrix<T, L, A>& m, compute::command_queue& queue)
134 {
135   ublas::matrix<T, L, A> result(m.size2(), m.size1());
136   trans(m, result, queue);
137   return result;
138 }
139 
140 }}}}
141 
142 #endif
143