1 #ifndef VIENNACL_LINALG_OPENCL_KERNELS_SCALAR_HPP
2 #define VIENNACL_LINALG_OPENCL_KERNELS_SCALAR_HPP
3 
4 /* =========================================================================
5    Copyright (c) 2010-2016, Institute for Microelectronics,
6                             Institute for Analysis and Scientific Computing,
7                             TU Wien.
8    Portions of this software are copyright by UChicago Argonne, LLC.
9 
10                             -----------------
11                   ViennaCL - The Vienna Computing Library
12                             -----------------
13 
14    Project Head:    Karl Rupp                   rupp@iue.tuwien.ac.at
15 
16    (A list of authors and contributors can be found in the manual)
17 
18    License:         MIT (X11), see file LICENSE in the base directory
19 ============================================================================= */
20 
21 #include "viennacl/tools/tools.hpp"
22 #include "viennacl/ocl/kernel.hpp"
23 #include "viennacl/ocl/platform.hpp"
24 #include "viennacl/ocl/utils.hpp"
25 
26 /** @file viennacl/linalg/opencl/kernels/scalar.hpp
27  *  @brief OpenCL kernel file for scalar operations */
28 namespace viennacl
29 {
30 namespace linalg
31 {
32 namespace opencl
33 {
34 namespace kernels
35 {
36 
37 //////////////////////////// Part 1: Kernel generation routines ////////////////////////////////////
38 
39 /** @brief Enumeration for the scalar type in avbv-like operations */
40 enum asbs_scalar_type
41 {
42   VIENNACL_ASBS_NONE = 0, // scalar does not exist/contribute
43   VIENNACL_ASBS_CPU,
44   VIENNACL_ASBS_GPU
45 };
46 
47 /** @brief Configuration struct for generating OpenCL kernels for linear combinations of viennacl::scalar<> objects */
48 struct asbs_config
49 {
asbs_configviennacl::linalg::opencl::kernels::asbs_config50   asbs_config() : with_stride_and_range(true), a(VIENNACL_ASBS_CPU), b(VIENNACL_ASBS_NONE) {}
51 
52   bool with_stride_and_range;
53   std::string      assign_op;
54   asbs_scalar_type a;
55   asbs_scalar_type b;
56 };
57 
58 // just returns the assignment string
59 template<typename StringT>
generate_asbs_impl3(StringT & source,char sign_a,char sign_b,asbs_config const & cfg,bool mult_alpha,bool mult_beta)60 void generate_asbs_impl3(StringT & source, char sign_a, char sign_b, asbs_config const & cfg, bool mult_alpha, bool mult_beta)
61 {
62   source.append("      *s1 "); source.append(cfg.assign_op); source.append(1, sign_a); source.append(" *s2 ");
63   if (mult_alpha)
64     source.append("* alpha ");
65   else
66     source.append("/ alpha ");
67   if (cfg.b != VIENNACL_ASBS_NONE)
68   {
69     source.append(1, sign_b); source.append(" *s3 ");
70     if (mult_beta)
71       source.append("* beta");
72     else
73       source.append("/ beta");
74   }
75   source.append("; \n");
76 }
77 
78 template<typename StringT>
generate_asbs_impl2(StringT & source,char sign_a,char sign_b,asbs_config const & cfg)79 void generate_asbs_impl2(StringT & source, char sign_a, char sign_b, asbs_config const & cfg)
80 {
81   source.append("    if (options2 & (1 << 1)) { \n");
82   if (cfg.b != VIENNACL_ASBS_NONE)
83   {
84     source.append("     if (options3 & (1 << 1)) \n");
85     generate_asbs_impl3(source, sign_a, sign_b, cfg, false, false);
86     source.append("     else \n");
87     generate_asbs_impl3(source, sign_a, sign_b, cfg, false, true);
88   }
89   else
90     generate_asbs_impl3(source, sign_a, sign_b, cfg, false, true);
91   source.append("    } else { \n");
92   if (cfg.b != VIENNACL_ASBS_NONE)
93   {
94     source.append("     if (options3 & (1 << 1)) \n");
95     generate_asbs_impl3(source, sign_a, sign_b, cfg, true, false);
96     source.append("     else \n");
97     generate_asbs_impl3(source, sign_a, sign_b, cfg, true, true);
98   }
99   else
100     generate_asbs_impl3(source, sign_a, sign_b, cfg, true, true);
101   source.append("    } \n");
102 
103 }
104 
105 template<typename StringT>
generate_asbs_impl(StringT & source,std::string const & numeric_string,asbs_config const & cfg)106 void generate_asbs_impl(StringT & source, std::string const & numeric_string, asbs_config const & cfg)
107 {
108   source.append("__kernel void as");
109   if (cfg.b != VIENNACL_ASBS_NONE)
110     source.append("bs");
111   if (cfg.assign_op != "=")
112     source.append("_s");
113 
114   if (cfg.a == VIENNACL_ASBS_CPU)
115     source.append("_cpu");
116   else if (cfg.a == VIENNACL_ASBS_GPU)
117     source.append("_gpu");
118 
119   if (cfg.b == VIENNACL_ASBS_CPU)
120     source.append("_cpu");
121   else if (cfg.b == VIENNACL_ASBS_GPU)
122     source.append("_gpu");
123   source.append("( \n");
124   source.append("  __global "); source.append(numeric_string); source.append(" * s1, \n");
125   source.append(" \n");
126   if (cfg.a == VIENNACL_ASBS_CPU)
127   {
128     source.append("  "); source.append(numeric_string); source.append(" fac2, \n");
129   }
130   else if (cfg.a == VIENNACL_ASBS_GPU)
131   {
132     source.append("  __global "); source.append(numeric_string); source.append(" * fac2, \n");
133   }
134   source.append("  unsigned int options2, \n");  // 0: no action, 1: flip sign, 2: take inverse, 3: flip sign and take inverse
135   source.append("  __global const "); source.append(numeric_string); source.append(" * s2");
136 
137   if (cfg.b != VIENNACL_ASBS_NONE)
138   {
139     source.append(", \n\n");
140     if (cfg.b == VIENNACL_ASBS_CPU)
141     {
142       source.append("  "); source.append(numeric_string); source.append(" fac3, \n");
143     }
144     else if (cfg.b == VIENNACL_ASBS_GPU)
145     {
146       source.append("  __global "); source.append(numeric_string); source.append(" * fac3, \n");
147     }
148     source.append("  unsigned int options3, \n");  // 0: no action, 1: flip sign, 2: take inverse, 3: flip sign and take inverse
149     source.append("  __global const "); source.append(numeric_string); source.append(" * s3");
150   }
151   source.append(") \n{ \n");
152 
153   if (cfg.a == VIENNACL_ASBS_CPU)
154   {
155     source.append("  "); source.append(numeric_string); source.append(" alpha = fac2; \n");
156   }
157   else if (cfg.a == VIENNACL_ASBS_GPU)
158   {
159     source.append("  "); source.append(numeric_string); source.append(" alpha = fac2[0]; \n");
160   }
161   source.append(" \n");
162 
163   if (cfg.b == VIENNACL_ASBS_CPU)
164   {
165     source.append("  "); source.append(numeric_string); source.append(" beta = fac3; \n");
166   }
167   else if (cfg.b == VIENNACL_ASBS_GPU)
168   {
169     source.append("  "); source.append(numeric_string); source.append(" beta = fac3[0]; \n");
170   }
171 
172   source.append("  if (options2 & (1 << 0)) { \n");
173   if (cfg.b != VIENNACL_ASBS_NONE)
174   {
175     source.append("   if (options3 & (1 << 0)) { \n");
176     generate_asbs_impl2(source, '-', '-', cfg);
177     source.append("   } else { \n");
178     generate_asbs_impl2(source, '-', '+', cfg);
179     source.append("   } \n");
180   }
181   else
182     generate_asbs_impl2(source, '-', '+', cfg);
183   source.append("  } else { \n");
184   if (cfg.b != VIENNACL_ASBS_NONE)
185   {
186     source.append("   if (options3 & (1 << 0)) { \n");
187     generate_asbs_impl2(source, '+', '-', cfg);
188     source.append("   } else { \n");
189     generate_asbs_impl2(source, '+', '+', cfg);
190     source.append("   } \n");
191   }
192   else
193     generate_asbs_impl2(source, '+', '+', cfg);
194 
195   source.append("  } \n");
196   source.append("} \n");
197 }
198 
199 template<typename StringT>
generate_asbs(StringT & source,std::string const & numeric_string)200 void generate_asbs(StringT & source, std::string const & numeric_string)
201 {
202   asbs_config cfg;
203   cfg.assign_op = "=";
204   cfg.with_stride_and_range = true;
205 
206   // as
207   cfg.b = VIENNACL_ASBS_NONE; cfg.a = VIENNACL_ASBS_CPU; generate_asbs_impl(source, numeric_string, cfg);
208   cfg.b = VIENNACL_ASBS_NONE; cfg.a = VIENNACL_ASBS_GPU; generate_asbs_impl(source, numeric_string, cfg);
209 
210   // asbs
211   cfg.a = VIENNACL_ASBS_CPU; cfg.b = VIENNACL_ASBS_CPU; generate_asbs_impl(source, numeric_string, cfg);
212   cfg.a = VIENNACL_ASBS_CPU; cfg.b = VIENNACL_ASBS_GPU; generate_asbs_impl(source, numeric_string, cfg);
213   cfg.a = VIENNACL_ASBS_GPU; cfg.b = VIENNACL_ASBS_CPU; generate_asbs_impl(source, numeric_string, cfg);
214   cfg.a = VIENNACL_ASBS_GPU; cfg.b = VIENNACL_ASBS_GPU; generate_asbs_impl(source, numeric_string, cfg);
215 
216   // asbs
217   cfg.assign_op = "+=";
218 
219   cfg.a = VIENNACL_ASBS_CPU; cfg.b = VIENNACL_ASBS_CPU; generate_asbs_impl(source, numeric_string, cfg);
220   cfg.a = VIENNACL_ASBS_CPU; cfg.b = VIENNACL_ASBS_GPU; generate_asbs_impl(source, numeric_string, cfg);
221   cfg.a = VIENNACL_ASBS_GPU; cfg.b = VIENNACL_ASBS_CPU; generate_asbs_impl(source, numeric_string, cfg);
222   cfg.a = VIENNACL_ASBS_GPU; cfg.b = VIENNACL_ASBS_GPU; generate_asbs_impl(source, numeric_string, cfg);
223 }
224 
225 template<typename StringT>
generate_scalar_swap(StringT & source,std::string const & numeric_string)226 void generate_scalar_swap(StringT & source, std::string const & numeric_string)
227 {
228   source.append("__kernel void swap( \n");
229   source.append("          __global "); source.append(numeric_string); source.append(" * s1, \n");
230   source.append("          __global "); source.append(numeric_string); source.append(" * s2) \n");
231   source.append("{ \n");
232   source.append("  "); source.append(numeric_string); source.append(" tmp = *s2; \n");
233   source.append("  *s2 = *s1; \n");
234   source.append("  *s1 = tmp; \n");
235   source.append("} \n");
236 }
237 
238 //////////////////////////// Part 2: Main kernel class ////////////////////////////////////
239 
240 // main kernel class
241 /** @brief Main kernel class for generating OpenCL kernels for operations involving viennacl::scalar<>, but not viennacl::vector<> or viennacl::matrix<>. */
242 template<typename NumericT>
243 struct scalar
244 {
program_nameviennacl::linalg::opencl::kernels::scalar245   static std::string program_name()
246   {
247     return viennacl::ocl::type_to_string<NumericT>::apply() + "_scalar";
248   }
249 
initviennacl::linalg::opencl::kernels::scalar250   static void init(viennacl::ocl::context & ctx)
251   {
252     static std::map<cl_context, bool> init_done;
253     if (!init_done[ctx.handle().get()])
254     {
255       viennacl::ocl::DOUBLE_PRECISION_CHECKER<NumericT>::apply(ctx);
256       std::string numeric_string = viennacl::ocl::type_to_string<NumericT>::apply();
257 
258       std::string source;
259       source.reserve(8192);
260 
261       viennacl::ocl::append_double_precision_pragma<NumericT>(ctx, source);
262 
263       // fully parametrized kernels:
264       generate_asbs(source, numeric_string);
265       generate_scalar_swap(source, numeric_string);
266 
267 
268       std::string prog_name = program_name();
269       #ifdef VIENNACL_BUILD_INFO
270       std::cout << "Creating program " << prog_name << std::endl;
271       #endif
272       ctx.add_program(source, prog_name);
273       init_done[ctx.handle().get()] = true;
274     } //if
275   } //init
276 };
277 
278 }  // namespace kernels
279 }  // namespace opencl
280 }  // namespace linalg
281 }  // namespace viennacl
282 #endif
283 
284