1 #ifndef VEXCL_GENERATOR_HPP
2 #define VEXCL_GENERATOR_HPP
3 
4 /*
5 The MIT License
6 
7 Copyright (c) 2012-2018 Denis Demidov <dennis.demidov@gmail.com>
8 
9 Permission is hereby granted, free of charge, to any person obtaining a copy
10 of this software and associated documentation files (the "Software"), to deal
11 in the Software without restriction, including without limitation the rights
12 to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
13 copies of the Software, and to permit persons to whom the Software is
14 furnished to do so, subject to the following conditions:
15 
16 The above copyright notice and this permission notice shall be included in
17 all copies or substantial portions of the Software.
18 
19 THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
20 IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
21 FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL THE
22 AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
23 LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
24 OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
25 THE SOFTWARE.
26 */
27 
28 /**
29  * \file   generator.hpp
30  * \author Denis Demidov <dennis.demidov@gmail.com>
31  * \brief  OpenCL kernel generator.
32  */
33 
34 #include <iostream>
35 #include <iomanip>
36 #include <sstream>
37 #include <string>
38 #include <stdexcept>
39 #include <memory>
40 
41 #include <boost/proto/proto.hpp>
42 #include <boost/function_types/parameter_types.hpp>
43 #include <boost/function_types/result_type.hpp>
44 #include <boost/function_types/function_arity.hpp>
45 
46 #include <boost/fusion/include/for_each.hpp>
47 #include <boost/fusion/include/vector_tie.hpp>
48 
49 #include <vexcl/util.hpp>
50 #include <vexcl/operations.hpp>
51 #include <vexcl/function.hpp>
52 #include <vexcl/vector.hpp>
53 
54 #include <boost/preprocessor/repetition.hpp>
55 #ifndef VEXCL_MAX_ARITY
56 #  define VEXCL_MAX_ARITY BOOST_PROTO_MAX_ARITY
57 #endif
58 
59 /// Vector expression template library for OpenCL.
60 namespace vex {
61 
62 template <typename T> class symbolic;
63 
64 /// Sends name of the symbolic variable to output stream.
65 template <typename T>
66 std::ostream& operator<<(std::ostream &os, const symbolic<T> &sym);
67 
68 /// Kernel generation interface.
69 namespace generator {
70 
71 //---------------------------------------------------------------------------
72 // The recorder class. Holds static output stream for kernel recording and
73 // static variable index (used in variable names).
74 //---------------------------------------------------------------------------
75 template <bool dummy = true>
76 class recorder {
77     static_assert(dummy, "dummy parameter should be true");
78 
79     public:
set(std::ostream & s)80         static void set(std::ostream &s) {
81             os = &s;
82 
83             // Reset preamble and state.
84             preamble.reset(new backend::source_generator);
85             state = vex::detail::empty_state();
86         }
87 
get()88         static std::ostream& get() {
89             return os ? *os : std::cout;
90         }
91 
get_preamble()92         static backend::source_generator& get_preamble() {
93             return *preamble;
94         }
95 
get_state()96         static vex::detail::kernel_generator_state_ptr get_state() {
97             return state;
98         }
99 
var_id()100         static size_t var_id() {
101             return ++index;
102         }
103     private:
104         static size_t index;
105         static std::ostream *os;
106         static std::unique_ptr<backend::source_generator> preamble;
107         static vex::detail::kernel_generator_state_ptr state;
108 };
109 
110 template <bool dummy>
111 size_t recorder<dummy>::index = 0;
112 
113 template <bool dummy>
114 std::ostream *recorder<dummy>::os = 0;
115 
116 template <bool dummy>
117 std::unique_ptr<backend::source_generator> recorder<dummy>::preamble;
118 
119 template <bool dummy>
120 vex::detail::kernel_generator_state_ptr recorder<dummy>::state;
121 
var_id()122 inline size_t var_id() {
123     return recorder<>::var_id();
124 }
125 
get_recorder()126 inline std::ostream& get_recorder() {
127     return recorder<>::get();
128 }
129 
get_preamble()130 inline backend::source_generator& get_preamble() {
131     return recorder<>::get_preamble();
132 }
133 
get_state()134 inline vex::detail::kernel_generator_state_ptr get_state() {
135     return recorder<>::get_state();
136 }
137 
138 /// Set output stream for the kernel recorder.
set_recorder(std::ostream & os)139 inline void set_recorder(std::ostream &os) {
140     recorder<>::set(os);
141 }
142 
143 //---------------------------------------------------------------------------
144 // Setting up boost::proto.
145 //---------------------------------------------------------------------------
146 struct variable {};
147 
148 // --- The grammar ----------------------------------------------------------
149 struct symbolic_grammar
150     : boost::proto::or_<
151           boost::proto::or_<
152               boost::proto::terminal< variable >,
153               boost::proto::and_<
154                   boost::proto::terminal< boost::proto::_ >,
155                   boost::proto::if_< is_cl_native< boost::proto::_value >() >
156               >
157           >,
158           VEXCL_BUILTIN_OPERATIONS(symbolic_grammar),
159           VEXCL_USER_FUNCTIONS(symbolic_grammar)
160       >
161 {};
162 
163 template <class Expr>
164 struct symbolic_expr;
165 
166 struct symbolic_domain
167     : boost::proto::domain< boost::proto::generator< symbolic_expr >, symbolic_grammar >
168 {
169     // Store everything by value inside expressions...
170     template <typename T, class Enable = void>
171     struct as_child : proto_base_domain::as_expr<T> {};
172 
173     // ... except for symbolic variables:
174     template <typename T>
175     struct as_child< T,
176         typename std::enable_if<
177             boost::proto::matches<
178                 typename boost::proto::result_of::as_expr< T >::type,
179                 boost::proto::terminal< variable >
180                 >::value
181             >::type
182         > : proto_base_domain::as_child< T > {};
183 };
184 
185 template <class Expr>
186 struct symbolic_expr
187     : boost::proto::extends< Expr, symbolic_expr< Expr >, symbolic_domain >
188 {
189     typedef boost::proto::extends< Expr, symbolic_expr< Expr >, symbolic_domain > base_type;
190 
symbolic_exprvex::generator::symbolic_expr191     symbolic_expr(const Expr &expr = Expr()) : base_type(expr) {}
192 };
193 
194 //---------------------------------------------------------------------------
195 struct index_expr
196     : public generator::symbolic_expr< boost::proto::terminal< generator::variable >::type >
197 {};
198 
index()199 inline auto index()
200     -> boost::proto::result_of::as_expr<index_expr, symbolic_domain>::type const
201 {
202     return boost::proto::as_expr<symbolic_domain>(index_expr());
203 }
204 
205 //---------------------------------------------------------------------------
206 namespace detail {
207 
208 struct symbolic_context {
209     template <typename Expr, typename Tag = typename Expr::proto_tag>
210     struct eval {};
211 
212 #define VEXCL_BINARY_OPERATION(bin_tag, bin_op)                                \
213   template <typename Expr> struct eval<Expr, boost::proto::tag::bin_tag> {     \
214     typedef void result_type;                                                  \
215     void operator()(const Expr &expr, symbolic_context &ctx) const {           \
216       get_recorder() << "( ";                                                  \
217       boost::proto::eval(boost::proto::left(expr), ctx);                       \
218       get_recorder() << " " #bin_op " ";                                       \
219       boost::proto::eval(boost::proto::right(expr), ctx);                      \
220       get_recorder() << " )";                                                  \
221     }                                                                          \
222   }
223 
224     VEXCL_BINARY_OPERATION(plus,          +);
225     VEXCL_BINARY_OPERATION(minus,         -);
226     VEXCL_BINARY_OPERATION(multiplies,    *);
227     VEXCL_BINARY_OPERATION(divides,       /);
228     VEXCL_BINARY_OPERATION(modulus,       %);
229     VEXCL_BINARY_OPERATION(shift_left,   <<);
230     VEXCL_BINARY_OPERATION(shift_right,  >>);
231     VEXCL_BINARY_OPERATION(less,          <);
232     VEXCL_BINARY_OPERATION(greater,       >);
233     VEXCL_BINARY_OPERATION(less_equal,   <=);
234     VEXCL_BINARY_OPERATION(greater_equal,>=);
235     VEXCL_BINARY_OPERATION(equal_to,     ==);
236     VEXCL_BINARY_OPERATION(not_equal_to, !=);
237     VEXCL_BINARY_OPERATION(logical_and,  &&);
238     VEXCL_BINARY_OPERATION(logical_or,   ||);
239     VEXCL_BINARY_OPERATION(bitwise_and,   &);
240     VEXCL_BINARY_OPERATION(bitwise_or,    |);
241     VEXCL_BINARY_OPERATION(bitwise_xor,   ^);
242 
243 #undef VEXCL_BINARY_OPERATION
244 
245 #define VEXCL_UNARY_PRE_OPERATION(the_tag, the_op)                             \
246   template <typename Expr> struct eval<Expr, boost::proto::tag::the_tag> {     \
247     typedef void result_type;                                                  \
248     void operator()(const Expr &expr, symbolic_context &ctx) const {           \
249       get_recorder() << "( " #the_op "( ";                                     \
250       boost::proto::eval(boost::proto::child(expr), ctx);                      \
251       get_recorder() << " ) )";                                                \
252     }                                                                          \
253   }
254 
255     VEXCL_UNARY_PRE_OPERATION(unary_plus,   +);
256     VEXCL_UNARY_PRE_OPERATION(negate,       -);
257     VEXCL_UNARY_PRE_OPERATION(logical_not,  !);
258     VEXCL_UNARY_PRE_OPERATION(pre_inc,     ++);
259     VEXCL_UNARY_PRE_OPERATION(pre_dec,     --);
260     VEXCL_UNARY_PRE_OPERATION(address_of,   &);
261     VEXCL_UNARY_PRE_OPERATION(dereference,  *);
262 
263 #undef VEXCL_UNARY_PRE_OPERATION
264 
265 #define VEXCL_UNARY_POST_OPERATION(the_tag, the_op)                            \
266   template <typename Expr> struct eval<Expr, boost::proto::tag::the_tag> {     \
267     typedef void result_type;                                                  \
268     void operator()(const Expr &expr, symbolic_context &ctx) const {           \
269       get_recorder() << "( ( ";                                                \
270       boost::proto::eval(boost::proto::child(expr), ctx);                      \
271       get_recorder() << " )" #the_op " )";                                     \
272     }                                                                          \
273   }
274 
275     VEXCL_UNARY_POST_OPERATION(post_inc, ++);
276     VEXCL_UNARY_POST_OPERATION(post_dec, --);
277 
278 #undef VEXCL_UNARY_POST_OPERATION
279 
280     template <typename Expr>
281     struct eval<Expr, boost::proto::tag::if_else_> {
282         typedef void result_type;
operator ()vex::generator::detail::symbolic_context::eval283         void operator()(const Expr &expr, symbolic_context &ctx) const {
284             get_recorder() << "( ";
285             boost::proto::eval(boost::proto::child_c<0>(expr), ctx);
286             get_recorder() << " ? ";
287             boost::proto::eval(boost::proto::child_c<1>(expr), ctx);
288             get_recorder() << " : ";
289             boost::proto::eval(boost::proto::child_c<2>(expr), ctx);
290             get_recorder() << " )";
291         }
292     };
293 
294     template <class Expr>
295     struct eval<Expr, boost::proto::tag::function> {
296         typedef void result_type;
297 
298         struct display {
299             mutable int pos;
300             symbolic_context &ctx;
301 
displayvex::generator::detail::symbolic_context::eval::display302             display(symbolic_context &ctx) : pos(0), ctx(ctx) {}
303 
304             template <class Arg>
operator ()vex::generator::detail::symbolic_context::eval::display305             void operator()(const Arg &arg) const {
306                 if (pos++) get_recorder() << ", ";
307                 boost::proto::eval(arg, ctx);
308             }
309         };
310 
311         template <class FunCall>
312         typename std::enable_if<
313             std::is_base_of<
314                 builtin_function,
315                 typename boost::proto::result_of::value<
316                     typename boost::proto::result_of::child_c<FunCall,0>::type
317                 >::type
318             >::value,
319             void
320         >::type
operator ()vex::generator::detail::symbolic_context::eval321         operator()(const FunCall &expr, symbolic_context &ctx) const {
322             get_recorder() << boost::proto::value(boost::proto::child_c<0>(expr)).name() << "( ";
323 
324             boost::fusion::for_each(
325                     boost::fusion::pop_front(expr),
326                     display(ctx)
327                     );
328 
329             get_recorder() << " )";
330         }
331 
332         template <class FunCall>
333         typename std::enable_if<
334             std::is_base_of<
335                 user_function,
336                 typename boost::proto::result_of::value<
337                     typename boost::proto::result_of::child_c<FunCall,0>::type
338                 >::type
339             >::value,
340             void
341         >::type
operator ()vex::generator::detail::symbolic_context::eval342         operator()(const FunCall &expr, symbolic_context &ctx) const {
343             typedef typename boost::proto::result_of::value<
344                 typename boost::proto::result_of::child_c<FunCall,0>::type
345             >::type fun;
346 
347             // Output function definition (once).
348             auto s = get_state()->find("user_functions");
349             if (s == get_state()->end()) {
350                 s = get_state()->insert(std::make_pair(
351                             std::string("user_functions"),
352                             boost::any( std::set<std::string>() )
353                             )).first;
354             }
355             auto &seen = boost::any_cast< std::set<std::string>& >(s->second);
356 
357             std::string fname = fun::name();
358 
359             if (seen.find(fname) == seen.end()) {
360                 seen.insert(fname);
361                 fun::define(get_preamble());
362             }
363 
364             get_recorder() << fun::name() << "( ";
365 
366             boost::fusion::for_each(
367                     boost::fusion::pop_front(expr),
368                     display(ctx)
369                     );
370 
371             get_recorder() << " )";
372         }
373     };
374 
375     template <typename Expr>
376     struct eval<Expr, boost::proto::tag::terminal> {
377         typedef void result_type;
378 
379         template <typename Term>
operator ()vex::generator::detail::symbolic_context::eval380         void operator()(const Term &term, symbolic_context &) const {
381             get_recorder() << std::scientific << std::setprecision(12)
382                 << boost::proto::value(term);
383         }
384 
385         template <typename T>
operator ()vex::generator::detail::symbolic_context::eval386         void operator()(const symbolic<T> &v, symbolic_context &) const {
387             get_recorder() << v;
388         }
389 
operator ()vex::generator::detail::symbolic_context::eval390         void operator()(const index_expr&, symbolic_context&) const {
391             get_recorder() << "idx";
392         }
393     };
394 };
395 
396 } // namespace detail
397 
398 } // namespace generator
399 
400 //---------------------------------------------------------------------------
401 // The symbolic class.
402 //---------------------------------------------------------------------------
403 /// Symbolic variable
404 template <typename T>
405 class symbolic
406     : public generator::symbolic_expr< boost::proto::terminal< generator::variable >::type >
407 {
408     public:
409         typedef T value_type;
410 
411         /// Scope/Type of the symbolic variable.
412         enum scope_type {
413             LocalVar        = 0, ///< Local variable.
414             VectorParameter = 1, ///< Vector kernel parameter.
415             ScalarParameter = 2  ///< Scalar kernel parameter.
416         };
417 
418         /// Constness of vector parameter.
419         enum constness_type {
420             NonConst = 0,   ///< Parameter should be written back at kernel exit.
421             Const = 1       ///< Parameter is readonly.
422         };
423 
424         /// Default constructor. Results in a local variable declaration.
symbolic()425         symbolic() : num(generator::var_id()), scope(LocalVar), constness(NonConst)
426         {
427             generator::get_recorder() << "\t\t" << type_name<T>() << " " << *this << " = " << T() << ";\n";
428         }
429 
430         /// Constructor.
symbolic(scope_type scope,constness_type constness=NonConst)431         explicit symbolic(scope_type scope, constness_type constness = NonConst)
432             : num(generator::var_id()), scope(scope), constness(constness)
433         {
434             if (scope == LocalVar) {
435                 generator::get_recorder() << "\t\t" << type_name<T>() << " " << *this << ";\n";
436             }
437         }
438 
439         /// Copy constructor.
symbolic(const symbolic & expr)440         symbolic(const symbolic &expr)
441             : num(generator::var_id()), scope(LocalVar), constness(NonConst)
442         {
443             generator::get_recorder() << "\t\t" << type_name<T>() << " " << *this << " = ";
444             record(expr);
445             generator::get_recorder() << ";\n";
446         }
447 
448         /// Expression constructor. Results in a local variable declaration initialized by the expression.
449         template <class Expr>
symbolic(const Expr & expr)450         symbolic(const Expr &expr)
451             : num(generator::var_id()), scope(LocalVar), constness(NonConst)
452         {
453             generator::get_recorder() << "\t\t" << type_name<T>() << " " << *this << " = ";
454             record(expr);
455             generator::get_recorder() << ";\n";
456         }
457 
458         /// Assignment operator. Results in the assignment expression written to the recorder.
operator =(const symbolic & c) const459         const symbolic& operator=(const symbolic &c) const {
460             generator::get_recorder() << "\t\t" << *this << " = ";
461             record(c);
462             generator::get_recorder() << ";\n";
463             return *this;
464         }
465 
466 #define VEXCL_ASSIGNMENT(cop, op)                                              \
467   /** Assignment operator.
468    Results in the assignment expression written to the recorder. */            \
469   template <class Expr> const symbolic &operator cop(const Expr & expr) {      \
470     generator::get_recorder() << "\t\t" << *this << " " #cop " ";              \
471     record(expr);                                                              \
472     generator::get_recorder() << ";\n";                                        \
473     return *this;                                                              \
474   }
475 
VEXCL_ASSIGNMENTS(VEXCL_ASSIGNMENT)476         VEXCL_ASSIGNMENTS(VEXCL_ASSIGNMENT)
477 
478 #undef VEXCL_ASSIGNMENT
479 
480         size_t id() const {
481             return num;
482         }
483 
484         // Initialize local variable at kernel enter.
init() const485         std::string init() const {
486             std::ostringstream s;
487 
488             if (scope == VectorParameter) {
489                 s << "\t\t" << type_name<T>() << " " << *this
490                     << " = p_" << *this << "[idx];\n";
491             } else if (scope == ScalarParameter) {
492                 s << "\t\t" << type_name<T>() << " " << *this
493                     << " = p_" << *this << ";\n";
494             }
495 
496             return s.str();
497         }
498 
499         // Write local variable to parameter at kernel exit.
write() const500         std::string write() const {
501             std::ostringstream s;
502 
503             if (scope == VectorParameter && constness == NonConst)
504                 s << "\t\tp_" << *this << "[idx] = " << *this << ";\n";
505 
506             return s.str();
507         }
508 
509         // Returns parameter type and name as strings.
prmdecl() const510         std::tuple<std::string, std::string> prmdecl() const {
511             std::ostringstream name;
512             name << "p_" << *this;
513 
514             std::string prm_type;
515 
516             if (scope == VectorParameter) {
517                 if (constness == Const)
518                     prm_type = type_name< global_ptr<const T> >();
519                 else
520                     prm_type = type_name< global_ptr<T> >();
521             } else {
522                 prm_type = type_name<T>();
523             }
524 
525             return std::make_tuple(prm_type, name.str());
526         }
527     private:
528         size_t         num;
529         scope_type     scope;
530         constness_type constness;
531 
532         template <class Expr>
record(const Expr & expr)533         static void record(const Expr &expr) {
534             generator::detail::symbolic_context ctx;
535             boost::proto::eval(boost::proto::as_child(expr), ctx);
536         }
537 };
538 
539 template <typename T>
operator <<(std::ostream & os,const symbolic<T> & sym)540 std::ostream& operator<<(std::ostream &os, const symbolic<T> &sym) {
541     return os << "var" << sym.id();
542 }
543 
544 namespace generator {
545 
546 /// Autogenerated kernel.
547 class kernel {
548     public:
kernel(const std::vector<backend::command_queue> & queue,const std::string & name)549         kernel(
550                 const std::vector<backend::command_queue> &queue,
551                 const std::string &name
552               ) : queue(queue), name(name), psize(queue.size(), 0)
553         {
554             prm_read.reset(new std::ostringstream);
555             prm_save.reset(new std::ostringstream);
556         }
557 
558         template <class SymVar>
add_param(const SymVar & var)559         void add_param(const SymVar &var) {
560             prm_decl.push_back(var.prmdecl());
561             *prm_read << var.init();
562             *prm_save << var.write();
563         }
564 
build(const std::string & body)565         void build(const std::string &body) {
566             for(auto q = queue.begin(); q != queue.end(); q++) {
567                 backend::source_generator source(*q);
568 
569                 source << get_preamble().str();
570 
571                 source.begin_kernel(name);
572                 source.begin_kernel_parameters();
573 
574                 for(auto p = prm_decl.begin(); p != prm_decl.end(); ++p)
575                     source.parameter(std::get<0>(*p), std::get<1>(*p));
576 
577                 source.parameter<size_t>("n");
578 
579                 source.end_kernel_parameters();
580                 source.grid_stride_loop().open("{");
581 
582                 source.new_line() << prm_read->str() << body << prm_save->str();
583 
584                 source.close("}");
585                 source.end_kernel();
586 
587                 backend::select_context(*q);
588                 cache.insert(std::make_pair(
589                             backend::get_context_id(*q),
590                             backend::kernel(*q, source.str(), name.c_str())
591                             ));
592             }
593         }
594 
595         template <class T>
push_arg(const T & v)596         void push_arg(const T &v) {
597             for(unsigned d = 0; d < queue.size(); d++) {
598                 cache.find(backend::get_context_id(queue[d]))->second.push_arg(v);
599             }
600         }
601 
602         template <class T>
push_arg(const vector<T> & v)603         void push_arg(const vector<T> &v) {
604             for(unsigned d = 0; d < queue.size(); d++) {
605                 cache.find(backend::get_context_id(queue[d]))->second.push_arg(v(d));
606                 psize[d] = std::max(psize[d], v.part_size(d));
607             }
608         }
609 
610         template <class T>
push_arg(const std::vector<T> & args)611         void push_arg(const std::vector<T> &args) {
612             for(unsigned d = 0; d < queue.size(); d++) {
613                 cache.find(backend::get_context_id(queue[d]))->second.push_arg(args[d]);
614             }
615         }
616 
operator ()()617         void operator()() {
618             for(unsigned d = 0; d < queue.size(); d++) {
619                 auto &K = cache.find(backend::get_context_id(queue[d]))->second;
620 
621                 if (psize[d]) {
622                     K.push_arg(psize[d]);
623                     K(queue[d]);
624 
625                     psize[d] = 0;
626                 } else {
627                     K.reset();
628                 }
629             }
630         }
631 
632 #ifndef BOOST_NO_VARIADIC_TEMPLATES
633         /// Launches the kernel with the provided parameters.
634         template <class Head, class... Tail>
operator ()(const Head & head,const Tail &...tail)635         void operator()(const Head &head, const Tail&... tail) {
636             push_arg(head);
637             (*this)(tail...);
638         }
639 #else
640 
641 #define VEXCL_FUNCALL_OPERATOR(z, n, data)                                     \
642   template <BOOST_PP_ENUM_PARAMS(n, class Param)>                              \
643   void operator()(BOOST_PP_ENUM_BINARY_PARAMS(n, const Param, &param)) {       \
644     boost::fusion::for_each(                                                   \
645             boost::fusion::vector_tie(BOOST_PP_ENUM_PARAMS(n, param)),         \
646             push_args(*this)                                                   \
647             );                                                                 \
648     (*this)();                                                                 \
649   }
650 
651 BOOST_PP_REPEAT_FROM_TO(1, VEXCL_MAX_ARITY, VEXCL_FUNCALL_OPERATOR, ~)
652 
653 #undef VEXCL_FUNCALL_OPERATOR
654 
655 #endif
656 
add_params(kernel &)657         static void add_params(kernel &) {}
658 
659         template <class Head, class... Tail>
add_params(kernel & K,const Head & head,const Tail &...tail)660         static void add_params(kernel &K, const Head &head, const Tail&... tail) {
661             K.add_param(head);
662             add_params(K, tail...);
663         }
664     private:
665         std::vector<backend::command_queue> queue;
666         std::string name;
667         std::vector<size_t> psize;
668         std::vector< std::tuple<std::string, std::string> > prm_decl;
669         std::unique_ptr<std::ostringstream> prm_read, prm_save;
670         std::map<vex::backend::context_id, vex::backend::kernel> cache;
671 
672         struct push_args {
673             kernel &K;
push_argsvex::generator::kernel::push_args674             push_args(kernel &K) : K(K) {}
675 
676             template <class T>
operator ()vex::generator::kernel::push_args677             void operator()(const T &p) const {
678                 K.push_arg(p);
679             }
680         };
681 };
682 
683 /// Function body generator.
684 class Function {
685     public:
686         template <class Ret, class ArgTuple>
Function(const std::string & body,const Ret & ret,const ArgTuple & arg)687         Function(const std::string &body, const Ret &ret, const ArgTuple &arg)
688         {
689             boost::fusion::for_each(arg, read_params(source));
690 
691             source << body;
692 
693             source << "\t\treturn " << ret << ";\n";
694         }
695 
get() const696         std::string get() const {
697             return source.str();
698         }
699     private:
700         std::ostringstream source;
701 
702         struct read_params {
703             std::ostream &os;
704             mutable int prm_idx;
705 
read_paramsvex::generator::Function::read_params706             read_params(std::ostream &os) : os(os), prm_idx(0) {}
707 
708             template <class T>
operator ()vex::generator::Function::read_params709             void operator()(const T &v) const {
710                 os << "\t\t" << type_name<typename T::value_type>() << " "
711                    << v << " = prm" << ++prm_idx << ";\n";
712             }
713         };
714 };
715 
716 #ifndef BOOST_NO_VARIADIC_TEMPLATES
717 /// Builds kernel from the recorded expression sequence and the symbolic parameter list.
718 /** The symbolic variables passed to the function should have participated in
719  * the recorded algorithm and will be converted to the generated kernel
720  * arguments.
721  */
722 template <class... Args>
build_kernel(const std::vector<backend::command_queue> & queue,const std::string & name,const std::string & body,const Args &...args)723 kernel build_kernel(
724         const std::vector<backend::command_queue> &queue,
725         const std::string &name, const std::string& body, const Args&... args
726         )
727 {
728     kernel K(queue, name);
729     kernel::add_params(K, args...);
730     K.build(body);
731     return K;
732 }
733 
734 /// Builds function body from the recorded expression.
735 /** The symbolic variables passed to the function should have participated in
736  * the recorded algorithm and will be converted to the output value and the
737  * input arguments of the generated function.
738  */
739 template <class Ret, class... Args>
make_function(std::string body,const Ret & ret,const Args &...args)740 std::string make_function(std::string body, const Ret &ret, const Args&... args) {
741     return Function(body, ret, boost::fusion::vector_tie(args...)).get();
742 }
743 #else
744 
745 #define VEXCL_BUILD_KERNEL(z, n, data)                                         \
746   template <BOOST_PP_ENUM_PARAMS(n, class Arg)>                                \
747   kernel build_kernel(const std::vector<backend::command_queue> & queue,    \
748                          const std::string & name, const std::string & body,   \
749                          BOOST_PP_ENUM_BINARY_PARAMS(n, const Arg, &arg)) {    \
750     kernel K(queue, name);                                                     \
751     boost::fusion::for_each(                                                   \
752             boost::fusion::vector_tie(BOOST_PP_ENUM_PARAMS(n, arg)),           \
753             detail::kernel_add_param(K));                                      \
754     K.build(body);                                                             \
755     return K;                                                                  \
756   }
757 
758 #define VEXCL_MAKE_FUNCTION(z, n, data)                                        \
759   template <class Ret, BOOST_PP_ENUM_PARAMS(n, class Arg)>                     \
760   std::string make_function(std::string body, const Ret &ret,                  \
761                             BOOST_PP_ENUM_BINARY_PARAMS(n, const Arg, &arg)) { \
762     return Function(body, ret, boost::fusion::vector_tie(                      \
763                                    BOOST_PP_ENUM_PARAMS(n, arg))).get();       \
764   }
765 
766 BOOST_PP_REPEAT_FROM_TO(1, VEXCL_MAX_ARITY, VEXCL_BUILD_KERNEL, ~)
767 BOOST_PP_REPEAT_FROM_TO(1, VEXCL_MAX_ARITY, VEXCL_MAKE_FUNCTION, ~)
768 
769 #undef VEXCL_BUILD_KERNEL
770 #undef VEXCL_MAKE_FUNCTION
771 
772 #endif
773 
774 // UserFunction implementation from a generic functor
775 template <class Signature, class Functor>
776 struct FunctorAdapter : UserFunction<FunctorAdapter<Signature, Functor>, Signature>
777 {
778     static std::string name_string;
779     static std::string body_string;
780 
FunctorAdaptervex::generator::FunctorAdapter781     FunctorAdapter(Functor &&f, std::string fname) {
782         using boost::function_types::function_arity;
783 
784         name_string = fname;
785         body_string = get_body(std::forward<Functor>(f),
786                 boost::mpl::size_t< function_arity<Signature>::value >() );
787     }
788 
789     // Empty constructor. Used in UserFunction::operator(). Hopefuly the body
790     // string is already constructed by the time the constructor is called.
FunctorAdaptervex::generator::FunctorAdapter791     FunctorAdapter() {}
792 
namevex::generator::FunctorAdapter793     static std::string name() { return name_string; }
bodyvex::generator::FunctorAdapter794     static std::string body() { return body_string; }
795 
796 #define VEXCL_PRINT_PRM(z, n, data)                                            \
797   typedef symbolic<                                                            \
798       typename boost::mpl::at<params, boost::mpl::int_<n> >::type> Prm##n;     \
799   Prm##n prm##n(Prm##n::ScalarParameter);                                      \
800   source << "\t\t" << type_name<typename Prm##n::value_type>() << " "          \
801          << prm##n << " = prm" << n + 1 << ";\n";
802 
803 #define VEXCL_BODY_GETTER(z, n, data)                                          \
804   static std::string get_body(Functor && f, boost::mpl::size_t<n>) {           \
805     typedef typename boost::function_types::result_type<Signature>::type       \
806         result;                                                                \
807     typedef typename boost::function_types::parameter_types<Signature>::type   \
808         params;                                                                \
809     std::ostringstream source;                                                 \
810     set_recorder(source);                                                      \
811     BOOST_PP_REPEAT(n, VEXCL_PRINT_PRM, ~) symbolic<result> ret =              \
812         f(BOOST_PP_ENUM_PARAMS(n, prm));                                       \
813     source << "\t\treturn " << ret << ";\n";                                   \
814     return source.str();                                                       \
815   }
816 
817     BOOST_PP_REPEAT_FROM_TO(1, VEXCL_MAX_ARITY, VEXCL_BODY_GETTER, ~)
818 
819 #undef VEXCL_BODY_GETTER
820 #undef VEXCL_PRINT_PRM
821 };
822 
823 template <class Signature, class Functor>
824 std::string FunctorAdapter<Signature, Functor>::name_string;
825 
826 template <class Signature, class Functor>
827 std::string FunctorAdapter<Signature, Functor>::body_string;
828 
get_gen_fun_id()829 inline size_t get_gen_fun_id() {
830     static size_t id = 0;
831     return id++;
832 }
833 
834 /// Generates a user-defined function from a generic functor.
835 /**
836  * Takes the function signature as template parameter and a generic functor as
837  * a single argument.
838  * Returns user-defined function ready to be used in vector expressions.
839  */
840 template <class Signature, class Functor>
make_function(Functor && f)841 auto make_function(Functor &&f) ->
842     FunctorAdapter<Signature, Functor>
843 {
844     size_t id = get_gen_fun_id();
845     std::ostringstream name;
846     name << "generated_function_" << id;
847     return FunctorAdapter<Signature, Functor>(std::forward<Functor>(f), name.str());
848 }
849 
850 } // namespace generator;
851 
852 } // namespace vex;
853 
854 #endif
855