1 #ifndef VIENNACL_LINALG_OPENCL_KERNELS_SCALAR_HPP
2 #define VIENNACL_LINALG_OPENCL_KERNELS_SCALAR_HPP
3
4 /* =========================================================================
5 Copyright (c) 2010-2016, Institute for Microelectronics,
6 Institute for Analysis and Scientific Computing,
7 TU Wien.
8 Portions of this software are copyright by UChicago Argonne, LLC.
9
10 -----------------
11 ViennaCL - The Vienna Computing Library
12 -----------------
13
14 Project Head: Karl Rupp rupp@iue.tuwien.ac.at
15
16 (A list of authors and contributors can be found in the manual)
17
18 License: MIT (X11), see file LICENSE in the base directory
19 ============================================================================= */
20
21 #include "viennacl/tools/tools.hpp"
22 #include "viennacl/ocl/kernel.hpp"
23 #include "viennacl/ocl/platform.hpp"
24 #include "viennacl/ocl/utils.hpp"
25
26 /** @file viennacl/linalg/opencl/kernels/scalar.hpp
27 * @brief OpenCL kernel file for scalar operations */
28 namespace viennacl
29 {
30 namespace linalg
31 {
32 namespace opencl
33 {
34 namespace kernels
35 {
36
37 //////////////////////////// Part 1: Kernel generation routines ////////////////////////////////////
38
39 /** @brief Enumeration for the scalar type in avbv-like operations */
40 enum asbs_scalar_type
41 {
42 VIENNACL_ASBS_NONE = 0, // scalar does not exist/contribute
43 VIENNACL_ASBS_CPU,
44 VIENNACL_ASBS_GPU
45 };
46
47 /** @brief Configuration struct for generating OpenCL kernels for linear combinations of viennacl::scalar<> objects */
48 struct asbs_config
49 {
asbs_configviennacl::linalg::opencl::kernels::asbs_config50 asbs_config() : with_stride_and_range(true), a(VIENNACL_ASBS_CPU), b(VIENNACL_ASBS_NONE) {}
51
52 bool with_stride_and_range;
53 std::string assign_op;
54 asbs_scalar_type a;
55 asbs_scalar_type b;
56 };
57
58 // just returns the assignment string
59 template<typename StringT>
generate_asbs_impl3(StringT & source,char sign_a,char sign_b,asbs_config const & cfg,bool mult_alpha,bool mult_beta)60 void generate_asbs_impl3(StringT & source, char sign_a, char sign_b, asbs_config const & cfg, bool mult_alpha, bool mult_beta)
61 {
62 source.append(" *s1 "); source.append(cfg.assign_op); source.append(1, sign_a); source.append(" *s2 ");
63 if (mult_alpha)
64 source.append("* alpha ");
65 else
66 source.append("/ alpha ");
67 if (cfg.b != VIENNACL_ASBS_NONE)
68 {
69 source.append(1, sign_b); source.append(" *s3 ");
70 if (mult_beta)
71 source.append("* beta");
72 else
73 source.append("/ beta");
74 }
75 source.append("; \n");
76 }
77
78 template<typename StringT>
generate_asbs_impl2(StringT & source,char sign_a,char sign_b,asbs_config const & cfg)79 void generate_asbs_impl2(StringT & source, char sign_a, char sign_b, asbs_config const & cfg)
80 {
81 source.append(" if (options2 & (1 << 1)) { \n");
82 if (cfg.b != VIENNACL_ASBS_NONE)
83 {
84 source.append(" if (options3 & (1 << 1)) \n");
85 generate_asbs_impl3(source, sign_a, sign_b, cfg, false, false);
86 source.append(" else \n");
87 generate_asbs_impl3(source, sign_a, sign_b, cfg, false, true);
88 }
89 else
90 generate_asbs_impl3(source, sign_a, sign_b, cfg, false, true);
91 source.append(" } else { \n");
92 if (cfg.b != VIENNACL_ASBS_NONE)
93 {
94 source.append(" if (options3 & (1 << 1)) \n");
95 generate_asbs_impl3(source, sign_a, sign_b, cfg, true, false);
96 source.append(" else \n");
97 generate_asbs_impl3(source, sign_a, sign_b, cfg, true, true);
98 }
99 else
100 generate_asbs_impl3(source, sign_a, sign_b, cfg, true, true);
101 source.append(" } \n");
102
103 }
104
105 template<typename StringT>
generate_asbs_impl(StringT & source,std::string const & numeric_string,asbs_config const & cfg)106 void generate_asbs_impl(StringT & source, std::string const & numeric_string, asbs_config const & cfg)
107 {
108 source.append("__kernel void as");
109 if (cfg.b != VIENNACL_ASBS_NONE)
110 source.append("bs");
111 if (cfg.assign_op != "=")
112 source.append("_s");
113
114 if (cfg.a == VIENNACL_ASBS_CPU)
115 source.append("_cpu");
116 else if (cfg.a == VIENNACL_ASBS_GPU)
117 source.append("_gpu");
118
119 if (cfg.b == VIENNACL_ASBS_CPU)
120 source.append("_cpu");
121 else if (cfg.b == VIENNACL_ASBS_GPU)
122 source.append("_gpu");
123 source.append("( \n");
124 source.append(" __global "); source.append(numeric_string); source.append(" * s1, \n");
125 source.append(" \n");
126 if (cfg.a == VIENNACL_ASBS_CPU)
127 {
128 source.append(" "); source.append(numeric_string); source.append(" fac2, \n");
129 }
130 else if (cfg.a == VIENNACL_ASBS_GPU)
131 {
132 source.append(" __global "); source.append(numeric_string); source.append(" * fac2, \n");
133 }
134 source.append(" unsigned int options2, \n"); // 0: no action, 1: flip sign, 2: take inverse, 3: flip sign and take inverse
135 source.append(" __global const "); source.append(numeric_string); source.append(" * s2");
136
137 if (cfg.b != VIENNACL_ASBS_NONE)
138 {
139 source.append(", \n\n");
140 if (cfg.b == VIENNACL_ASBS_CPU)
141 {
142 source.append(" "); source.append(numeric_string); source.append(" fac3, \n");
143 }
144 else if (cfg.b == VIENNACL_ASBS_GPU)
145 {
146 source.append(" __global "); source.append(numeric_string); source.append(" * fac3, \n");
147 }
148 source.append(" unsigned int options3, \n"); // 0: no action, 1: flip sign, 2: take inverse, 3: flip sign and take inverse
149 source.append(" __global const "); source.append(numeric_string); source.append(" * s3");
150 }
151 source.append(") \n{ \n");
152
153 if (cfg.a == VIENNACL_ASBS_CPU)
154 {
155 source.append(" "); source.append(numeric_string); source.append(" alpha = fac2; \n");
156 }
157 else if (cfg.a == VIENNACL_ASBS_GPU)
158 {
159 source.append(" "); source.append(numeric_string); source.append(" alpha = fac2[0]; \n");
160 }
161 source.append(" \n");
162
163 if (cfg.b == VIENNACL_ASBS_CPU)
164 {
165 source.append(" "); source.append(numeric_string); source.append(" beta = fac3; \n");
166 }
167 else if (cfg.b == VIENNACL_ASBS_GPU)
168 {
169 source.append(" "); source.append(numeric_string); source.append(" beta = fac3[0]; \n");
170 }
171
172 source.append(" if (options2 & (1 << 0)) { \n");
173 if (cfg.b != VIENNACL_ASBS_NONE)
174 {
175 source.append(" if (options3 & (1 << 0)) { \n");
176 generate_asbs_impl2(source, '-', '-', cfg);
177 source.append(" } else { \n");
178 generate_asbs_impl2(source, '-', '+', cfg);
179 source.append(" } \n");
180 }
181 else
182 generate_asbs_impl2(source, '-', '+', cfg);
183 source.append(" } else { \n");
184 if (cfg.b != VIENNACL_ASBS_NONE)
185 {
186 source.append(" if (options3 & (1 << 0)) { \n");
187 generate_asbs_impl2(source, '+', '-', cfg);
188 source.append(" } else { \n");
189 generate_asbs_impl2(source, '+', '+', cfg);
190 source.append(" } \n");
191 }
192 else
193 generate_asbs_impl2(source, '+', '+', cfg);
194
195 source.append(" } \n");
196 source.append("} \n");
197 }
198
199 template<typename StringT>
generate_asbs(StringT & source,std::string const & numeric_string)200 void generate_asbs(StringT & source, std::string const & numeric_string)
201 {
202 asbs_config cfg;
203 cfg.assign_op = "=";
204 cfg.with_stride_and_range = true;
205
206 // as
207 cfg.b = VIENNACL_ASBS_NONE; cfg.a = VIENNACL_ASBS_CPU; generate_asbs_impl(source, numeric_string, cfg);
208 cfg.b = VIENNACL_ASBS_NONE; cfg.a = VIENNACL_ASBS_GPU; generate_asbs_impl(source, numeric_string, cfg);
209
210 // asbs
211 cfg.a = VIENNACL_ASBS_CPU; cfg.b = VIENNACL_ASBS_CPU; generate_asbs_impl(source, numeric_string, cfg);
212 cfg.a = VIENNACL_ASBS_CPU; cfg.b = VIENNACL_ASBS_GPU; generate_asbs_impl(source, numeric_string, cfg);
213 cfg.a = VIENNACL_ASBS_GPU; cfg.b = VIENNACL_ASBS_CPU; generate_asbs_impl(source, numeric_string, cfg);
214 cfg.a = VIENNACL_ASBS_GPU; cfg.b = VIENNACL_ASBS_GPU; generate_asbs_impl(source, numeric_string, cfg);
215
216 // asbs
217 cfg.assign_op = "+=";
218
219 cfg.a = VIENNACL_ASBS_CPU; cfg.b = VIENNACL_ASBS_CPU; generate_asbs_impl(source, numeric_string, cfg);
220 cfg.a = VIENNACL_ASBS_CPU; cfg.b = VIENNACL_ASBS_GPU; generate_asbs_impl(source, numeric_string, cfg);
221 cfg.a = VIENNACL_ASBS_GPU; cfg.b = VIENNACL_ASBS_CPU; generate_asbs_impl(source, numeric_string, cfg);
222 cfg.a = VIENNACL_ASBS_GPU; cfg.b = VIENNACL_ASBS_GPU; generate_asbs_impl(source, numeric_string, cfg);
223 }
224
225 template<typename StringT>
generate_scalar_swap(StringT & source,std::string const & numeric_string)226 void generate_scalar_swap(StringT & source, std::string const & numeric_string)
227 {
228 source.append("__kernel void swap( \n");
229 source.append(" __global "); source.append(numeric_string); source.append(" * s1, \n");
230 source.append(" __global "); source.append(numeric_string); source.append(" * s2) \n");
231 source.append("{ \n");
232 source.append(" "); source.append(numeric_string); source.append(" tmp = *s2; \n");
233 source.append(" *s2 = *s1; \n");
234 source.append(" *s1 = tmp; \n");
235 source.append("} \n");
236 }
237
238 //////////////////////////// Part 2: Main kernel class ////////////////////////////////////
239
240 // main kernel class
241 /** @brief Main kernel class for generating OpenCL kernels for operations involving viennacl::scalar<>, but not viennacl::vector<> or viennacl::matrix<>. */
242 template<typename NumericT>
243 struct scalar
244 {
program_nameviennacl::linalg::opencl::kernels::scalar245 static std::string program_name()
246 {
247 return viennacl::ocl::type_to_string<NumericT>::apply() + "_scalar";
248 }
249
initviennacl::linalg::opencl::kernels::scalar250 static void init(viennacl::ocl::context & ctx)
251 {
252 static std::map<cl_context, bool> init_done;
253 if (!init_done[ctx.handle().get()])
254 {
255 viennacl::ocl::DOUBLE_PRECISION_CHECKER<NumericT>::apply(ctx);
256 std::string numeric_string = viennacl::ocl::type_to_string<NumericT>::apply();
257
258 std::string source;
259 source.reserve(8192);
260
261 viennacl::ocl::append_double_precision_pragma<NumericT>(ctx, source);
262
263 // fully parametrized kernels:
264 generate_asbs(source, numeric_string);
265 generate_scalar_swap(source, numeric_string);
266
267
268 std::string prog_name = program_name();
269 #ifdef VIENNACL_BUILD_INFO
270 std::cout << "Creating program " << prog_name << std::endl;
271 #endif
272 ctx.add_program(source, prog_name);
273 init_done[ctx.handle().get()] = true;
274 } //if
275 } //init
276 };
277
278 } // namespace kernels
279 } // namespace opencl
280 } // namespace linalg
281 } // namespace viennacl
282 #endif
283
284