1 #pragma once
2
3 #include "sycl_task.hpp"
4
5 /**
6 @file sycl_flow.hpp
7 @brief syclFlow include file
8 */
9
10 namespace tf {
11
12 // ----------------------------------------------------------------------------
13 // class definition: syclFlow
14 // ----------------------------------------------------------------------------
15
16 /**
17 @class syclFlow
18
19 @brief class for building a SYCL task dependency graph
20
21 */
22 class syclFlow {
23
24 friend class Executor;
25
26 struct External {
27 syclGraph graph;
28 };
29
30 struct Internal {
31 Executor& executor;
Internaltf::syclFlow::Internal32 Internal(Executor& e) : executor {e} {}
33 };
34
35 using handle_t = std::variant<External, Internal>;
36
37 public:
38
39 /**
40 @brief constructs a standalone %syclFlow from the default queue
41
42 A standalone %syclFlow does not go through any taskflow and
43 can be run by the caller thread using explicit offload methods
44 (e.g., tf::syclFlow::offload).
45 */
46 syclFlow();
47
48 /**
49 @brief constructs a standalone %syclFlow from the given queue
50
51 A standalone %syclFlow does not go through any taskflow and
52 can be run by the caller thread using explicit offload methods
53 (e.g., tf::syclFlow::offload).
54 */
55 syclFlow(sycl::queue queue);
56
57 /**
58 @brief destroys the %syclFlow
59 */
60 ~syclFlow() = default;
61
62 /**
63 @brief queries the emptiness of the graph
64 */
65 bool empty() const;
66
67 /**
68 @brief queries the number of tasks
69 */
70 size_t num_tasks() const;
71
72 /**
73 @brief dumps the %syclFlow graph into a DOT format through an
74 output stream
75 */
76 void dump(std::ostream& os) const;
77
78 /**
79 @brief clear the associated graph
80 */
81 void clear();
82
83 // ------------------------------------------------------------------------
84 // Generic device operations
85 // ------------------------------------------------------------------------
86
87 /**
88 @brief creates a task that launches the given command group function object
89
90 @tparam F type of command group function object
91 @param func function object that is constructible from
92 std::function<void(sycl::handler&)>
93
94 Creates a task that is associated from the given command group.
95 In SYCL, each command group function object is given a unique
96 command group handler object to perform all the necessary work
97 required to correctly process data on a device using a kernel.
98 */
99 template <typename F, std::enable_if_t<
100 std::is_invocable_r_v<void, F, sycl::handler&>, void>* = nullptr
101 >
102 syclTask on(F&& func);
103
104 /**
105 @brief updates the task to the given command group function object
106
107 Similar to tf::syclFlow::on but operates on an existing task.
108 */
109 template <typename F, std::enable_if_t<
110 std::is_invocable_r_v<void, F, sycl::handler&>, void>* = nullptr
111 >
112 void on(syclTask task, F&& func);
113
114 // TODO
115 template <typename F, std::enable_if_t<
116 std::is_invocable_r_v<
117 sycl::event, F, sycl::queue&, std::vector<sycl::event>>, void
118 >* = nullptr
119 >
120 syclTask on(F&& func);
121
122 // TODO
123 template <typename F, std::enable_if_t<
124 std::is_invocable_r_v<
125 sycl::event, F, sycl::queue&, std::vector<sycl::event>>, void
126 >* = nullptr
127 >
128 void on(syclTask task, F&& func);
129
130
131 /**
132 @brief creates a memcpy task that copies untyped data in bytes
133
134 @param tgt pointer to the target memory block
135 @param src pointer to the source memory block
136 @param bytes bytes to copy
137
138 @return a tf::syclTask handle
139
140 A memcpy task transfers @c bytes of data from a source locationA @c src
141 to a target location @c tgt. Both @c src and @c tgt may be either host
142 or USM pointers.
143 */
144 syclTask memcpy(void* tgt, const void* src, size_t bytes);
145
146 /**
147 @brief creates a memset task that fills untyped data with a byte value
148
149 @param ptr pointer to the destination device memory area
150 @param value value to set for each byte of specified memory
151 @param bytes number of bytes to set
152
153 @return a tf::syclTask handle
154
155 Fills @c bytes of memory beginning at address @c ptr with @c value.
156 @c ptr must be a USM allocation.
157 @c value is interpreted as an unsigned char.
158 */
159 syclTask memset(void* ptr, int value, size_t bytes);
160
161 /**
162 @brief creates a fill task that fills typed data with the given value
163
164 @tparam T trivially copyable value type
165
166 @param ptr pointer to the memory to fill
167 @param pattern pattern value to fill into the memory
168 @param count number of items to fill the value
169
170 Creates a task that fills the specified memory with the
171 specified value.
172 */
173 template <typename T>
174 syclTask fill(void* ptr, const T& pattern, size_t count);
175
176 /**
177 @brief creates a copy task that copies typed data from a source to a target
178 memory block
179
180 @tparam T trivially copyable value type
181
182 @param target pointer to the memory to fill
183 @param source pointer to the pattern value to fill into the memory
184 @param count number of items to fill the value
185
186 Creates a task that copies @c count items of type @c T from a source memory
187 location to a target memory location.
188 */
189 template <typename T,
190 std::enable_if_t<!std::is_same_v<T, void>, void>* = nullptr
191 >
192 syclTask copy(T* target, const T* source, size_t count);
193
194 /**
195 @brief creates a kernel task
196
197 @tparam ArgsT arguments types
198
199 @param args arguments to forward to the parallel_for methods defined
200 in the handler object
201
202 Creates a kernel task from a parallel_for method through the handler
203 object associated with a command group.
204 */
205 template <typename...ArgsT>
206 syclTask parallel_for(ArgsT&&... args);
207
208 // ------------------------------------------------------------------------
209 // algorithms
210 // ------------------------------------------------------------------------
211
212 /**
213 @brief invokes a SYCL kernel function using only one thread
214
215 @tparam F kernel function type
216 @param func kernel function
217
218 Creates a task that launches the given function object using only one
219 kernel thread.
220 */
221 template <typename F>
222 syclTask single_task(F&& func);
223
224 /**
225 @brief applies a callable to each dereferenced element of the data array
226
227 @tparam I iterator type
228 @tparam C callable type
229
230 @param first iterator to the beginning (inclusive)
231 @param last iterator to the end (exclusive)
232 @param callable a callable object to apply to the dereferenced iterator
233
234 @return a tf::syclTask handle
235
236 This method is equivalent to the parallel execution of the following loop on a GPU:
237
238 @code{.cpp}
239 for(auto itr = first; itr != last; itr++) {
240 callable(*itr);
241 }
242 @endcode
243 */
244 template <typename I, typename C>
245 syclTask for_each(I first, I last, C&& callable);
246
247 /**
248 @brief applies a callable to each index in the range with the step size
249
250 @tparam I index type
251 @tparam C callable type
252
253 @param first beginning index
254 @param last last index
255 @param step step size
256 @param callable the callable to apply to each element in the data array
257
258 @return a tf::syclTask handle
259
260 This method is equivalent to the parallel execution of the following loop on a GPU:
261
262 @code{.cpp}
263 // step is positive [first, last)
264 for(auto i=first; i<last; i+=step) {
265 callable(i);
266 }
267
268 // step is negative [first, last)
269 for(auto i=first; i>last; i+=step) {
270 callable(i);
271 }
272 @endcode
273 */
274 template <typename I, typename C>
275 syclTask for_each_index(I first, I last, I step, C&& callable);
276
277 /**
278 @brief applies a callable to a source range and stores the result in a target range
279
280 @tparam I iterator type
281 @tparam C callable type
282 @tparam S source types
283
284 @param first iterator to the beginning (inclusive)
285 @param last iterator to the end (exclusive)
286 @param callable the callable to apply to each element in the range
287 @param srcs iterators to the source ranges
288
289 @return a tf::syclTask handle
290
291 This method is equivalent to the parallel execution of the following
292 loop on a SYCL device:
293
294 @code{.cpp}
295 while (first != last) {
296 *first++ = callable(*src1++, *src2++, *src3++, ...);
297 }
298 @endcode
299 */
300 template <typename I, typename C, typename... S>
301 syclTask transform(I first, I last, C&& callable, S... srcs);
302
303 /**
304 @brief performs parallel reduction over a range of items
305
306 @tparam I input iterator type
307 @tparam T value type
308 @tparam C callable type
309
310 @param first iterator to the beginning (inclusive)
311 @param last iterator to the end (exclusive)
312 @param result pointer to the result with an initialized value
313 @param op binary reduction operator
314
315 @return a tf::syclTask handle
316
317 This method is equivalent to the parallel execution of the following loop
318 on a SYCL device:
319
320 @code{.cpp}
321 while (first != last) {
322 *result = op(*result, *first++);
323 }
324 @endcode
325 */
326 template <typename I, typename T, typename C>
327 syclTask reduce(I first, I last, T* result, C&& op);
328
329 /**
330 @brief similar to tf::syclFlow::reduce but does not assume any initial
331 value to reduce
332
333 This method is equivalent to the parallel execution of the following loop
334 on a SYCL device:
335
336 @code{.cpp}
337 *result = *first++; // no initial values partitipcate in the loop
338 while (first != last) {
339 *result = op(*result, *first++);
340 }
341 @endcode
342 */
343 template <typename I, typename T, typename C>
344 syclTask uninitialized_reduce(I first, I last, T* result, C&& op);
345
346 // ------------------------------------------------------------------------
347 // offload methods
348 // ------------------------------------------------------------------------
349
350 /**
351 @brief offloads the %syclFlow onto a GPU and repeatedly runs it until
352 the predicate becomes true
353
354 @tparam P predicate type (a binary callable)
355
356 @param predicate a binary predicate (returns @c true for stop)
357
358 Repetitively executes the present %syclFlow through the given queue object
359 until the predicate returns @c true.
360
361 By default, if users do not offload the %syclFlow,
362 the executor will offload it once.
363 */
364 template <typename P>
365 void offload_until(P&& predicate);
366
367 /**
368 @brief offloads the %syclFlow and executes it by the given times
369
370 @param N number of executions
371 */
372 void offload_n(size_t N);
373
374 /**
375 @brief offloads the %syclFlow and executes it once
376 */
377 void offload();
378
379 // ------------------------------------------------------------------------
380 // rebind methods
381 // ------------------------------------------------------------------------
382
383
384 /**
385 @brief rebinds the task to a memcpy task
386
387 Similar to tf::syclFlow::memcpy but operates on an existing task.
388 */
389 void memcpy(syclTask task, void* tgt, const void* src, size_t bytes);
390
391 /**
392 @brief rebinds the task to a memset task
393
394 Similar to tf::syclFlow::memset but operates on an existing task.
395 */
396 void memset(syclTask task, void* ptr, int value, size_t bytes);
397
398 /**
399 @brief rebinds the task to a fill task
400
401 Similar to tf::syclFlow::fill but operates on an existing task.
402 */
403 template <typename T>
404 void fill(syclTask task, void* ptr, const T& pattern, size_t count);
405
406 /**
407 @brief rebinds the task to a copy task
408
409 Similar to tf::syclFlow::copy but operates on an existing task.
410 */
411 template <typename T,
412 std::enable_if_t<!std::is_same_v<T, void>, void>* = nullptr
413 >
414 void copy(syclTask task, T* target, const T* source, size_t count);
415
416 /**
417 @brief rebinds the task to a parallel-for kernel task
418
419 Similar to tf::syclFlow::parallel_for but operates on an existing task.
420 */
421 template <typename...ArgsT>
422 void parallel_for(syclTask task, ArgsT&&... args);
423
424 /**
425 @brief rebinds the task to a single-threaded kernel task
426
427 Similar to tf::syclFlow::single_task but operates on an existing task.
428 */
429 template <typename F>
430 void single_task(syclTask task, F&& func);
431
432 /**
433 @brief rebinds the task to a for-each task
434
435 Similar to tf::syclFlow::for_each but operates on an existing task.
436 */
437 template <typename I, typename C>
438 void for_each(syclTask task, I first, I last, C&& callable);
439
440 /**
441 @brief rebinds the task to a for-each-index task
442
443 Similar to tf::syclFlow::for_each_index but operates on an existing task.
444 */
445 template <typename I, typename C>
446 void for_each_index(
447 syclTask task, I first, I last, I step, C&& callable
448 );
449
450 /**
451 @brief rebinds the task to a transform task
452
453 Similar to tf::syclFlow::transform but operates on an existing task.
454 */
455 template <typename I, typename C, typename... S>
456 void transform(
457 syclTask task, I first, I last, C&& callable, S... srcs
458 );
459
460 /**
461 @brief rebinds the task to a reduce task
462
463 Similar to tf::syclFlow::reduce but operates on an existing task.
464 */
465 template <typename I, typename T, typename C>
466 void reduce(
467 syclTask task, I first, I last, T* result, C&& op
468 );
469
470 /**
471 @brief rebinds the task to an unitialized reduce task
472
473 Similar to tf::syclFlow::uninitialized_reduce but operates on an existing task.
474 */
475 template <typename I, typename T, typename C>
476 void uninitialized_reduce(
477 syclTask task, I first, I last, T* result, C&& op
478 );
479
480 private:
481
482 syclFlow(Executor&, syclGraph&, sycl::queue&);
483
484 sycl::queue _queue;
485
486 const size_t _MAX_WORK_GROUP_SIZE;
487
488 handle_t _handle;
489
490 syclGraph& _graph;
491
492 std::vector<syclNode*> _tpg;
493 std::queue<syclNode*> _bfs;
494
495 size_t _default_group_size(size_t N) const;
496
497 template <typename I, typename C>
498 auto _for_each_cgh(I, I, C&&);
499
500 template <typename I, typename C>
501 auto _for_each_index_cgh(I, I, I, C&&);
502
503 template <typename I, typename T, typename C, bool>
504 auto _reduce_cgh(I, I, T*, C&&);
505
506 template <typename I, typename C, typename... S>
507 auto _transform_cgh(I, I, C&&, S...);
508 };
509
510 // constructor
syclFlow()511 inline syclFlow::syclFlow() :
512 _MAX_WORK_GROUP_SIZE {
513 _queue.get_device().get_info<sycl::info::device::max_work_group_size>()
514 },
515 _handle {std::in_place_type_t<External>{}},
516 _graph {std::get<External>(_handle).graph} {
517 }
518
519 // constructor
syclFlow(sycl::queue queue)520 inline syclFlow::syclFlow(sycl::queue queue) :
521 _queue {std::move(queue)},
522 _MAX_WORK_GROUP_SIZE {
523 _queue.get_device().get_info<sycl::info::device::max_work_group_size>()
524 },
525 _handle {std::in_place_type_t<External>{}},
526 _graph {std::get<External>(_handle).graph} {
527 }
528
529 // Construct the syclFlow from executor (internal graph)
syclFlow(Executor & e,syclGraph & g,sycl::queue & queue)530 inline syclFlow::syclFlow(Executor& e, syclGraph& g, sycl::queue& queue) :
531 _queue {queue},
532 _MAX_WORK_GROUP_SIZE {
533 _queue.get_device().get_info<sycl::info::device::max_work_group_size>()
534 } ,
535 _handle {std::in_place_type_t<Internal>{}, e},
536 _graph {g} {
537 }
538
539 // Function: _default_group_size
_default_group_size(size_t N) const540 inline size_t syclFlow::_default_group_size(size_t N) const {
541 return N <= 32u ? 32u : std::min(_MAX_WORK_GROUP_SIZE, next_pow2(N));
542 }
543
544 // Function: empty
empty() const545 inline bool syclFlow::empty() const {
546 return _graph._nodes.empty();
547 }
548
549 // Function: num_tasks
num_tasks() const550 inline size_t syclFlow::num_tasks() const {
551 return _graph._nodes.size();
552 }
553
554 // Procedure: dump
dump(std::ostream & os) const555 inline void syclFlow::dump(std::ostream& os) const {
556 _graph.dump(os, nullptr, "");
557 }
558
559 // Procedure: clear
clear()560 inline void syclFlow::clear() {
561 _graph.clear();
562 }
563
564 // Function: memcpy
memcpy(void * tgt,const void * src,size_t bytes)565 inline syclTask syclFlow::memcpy(void* tgt, const void* src, size_t bytes) {
566 return on([=](sycl::handler& h){ h.memcpy(tgt, src, bytes); });
567 }
568
569 // Function: memset
memset(void * ptr,int value,size_t bytes)570 inline syclTask syclFlow::memset(void* ptr, int value, size_t bytes) {
571 return on([=](sycl::handler& h){ h.memset(ptr, value, bytes); });
572 }
573
574 // Function: fill
575 template <typename T>
fill(void * ptr,const T & pattern,size_t count)576 syclTask syclFlow::fill(void* ptr, const T& pattern, size_t count) {
577 return on([=](sycl::handler& h){ h.fill(ptr, pattern, count); });
578 }
579
580 // Function: copy
581 template <typename T,
582 std::enable_if_t<!std::is_same_v<T, void>, void>*
583 >
copy(T * target,const T * source,size_t count)584 syclTask syclFlow::copy(T* target, const T* source, size_t count) {
585 return on([=](sycl::handler& h){ h.memcpy(target, source, count*sizeof(T)); });
586 }
587
588 // Function: on
589 template <typename F, std::enable_if_t<
590 std::is_invocable_r_v<void, F, sycl::handler&>, void>*
591 >
on(F && f)592 syclTask syclFlow::on(F&& f) {
593 auto node = _graph.emplace_back(_graph,
594 std::in_place_type_t<syclNode::CommandGroupHandler>{}, std::forward<F>(f)
595 );
596 return syclTask(node);
597 }
598
599 // Function: on
600 template <typename F, std::enable_if_t<std::is_invocable_r_v<
601 sycl::event, F, sycl::queue&, std::vector<sycl::event>>, void
602 >*>
on(F && f)603 syclTask syclFlow::on(F&& f) {
604 auto node = _graph.emplace_back(_graph,
605 std::in_place_type_t<syclNode::DependentSubmit>{}, std::forward<F>(f)
606 );
607 return syclTask(node);
608 }
609
610 // Function: single_task
611 template <typename F>
single_task(F && func)612 syclTask syclFlow::single_task(F&& func) {
613 return on([f=std::forward<F>(func)] (sycl::handler& h) {
614 h.single_task(f);
615 });
616 }
617
618 // Function: parallel_for
619 template <typename...ArgsT>
parallel_for(ArgsT &&...args)620 syclTask syclFlow::parallel_for(ArgsT&&... args) {
621 return on([args...] (sycl::handler& h) { h.parallel_for(args...); });
622 }
623
624 // Procedure: offload_until
625 template <typename P>
offload_until(P && predicate)626 void syclFlow::offload_until(P&& predicate) {
627
628 if(!(_graph._state & syclGraph::TOPOLOGY_CHANGED)) {
629 goto offload;
630 }
631
632 // levelize the graph
633 _tpg.clear();
634
635 // insert the first level of nodes into the queue
636 for(auto& u : _graph._nodes) {
637 u->_level = u->_dependents.size();
638 if(u->_level == 0) {
639 _bfs.push(u.get());
640 }
641 }
642
643 while(!_bfs.empty()) {
644 auto u = _bfs.front();
645 _bfs.pop();
646 _tpg.push_back(u);
647 for(auto v : u->_successors) {
648 if(--(v->_level) == 0) {
649 v->_level = u->_level + 1;
650 _bfs.push(v);
651 }
652 }
653 }
654
655 offload:
656
657 // offload the syclFlow graph
658 bool in_order = _queue.is_in_order();
659
660 while(!predicate()) {
661
662 // traverse node in a topological order
663 for(auto u : _tpg) {
664
665 switch(u->_handle.index()) {
666 // task type 1: command group handler
667 case syclNode::COMMAND_GROUP_HANDLER:
668 u->_event = _queue.submit([u, in_order](sycl::handler& h){
669 // wait on all predecessors
670 if(!in_order) {
671 for(auto p : u->_dependents) {
672 h.depends_on(p->_event);
673 }
674 }
675 std::get<syclNode::CommandGroupHandler>(u->_handle).work(h);
676 });
677 break;
678
679 // task type 2: dependent submit
680 case syclNode::DEPENDENT_SUBMIT:
681 std::vector<sycl::event> events;
682 if(!in_order) {
683 events.reserve(u->_dependents.size());
684 for(auto p : u->_dependents) {
685 events.push_back(p->_event);
686 }
687 }
688 u->_event = std::get<syclNode::DependentSubmit>(u->_handle).work(
689 _queue, std::move(events)
690 );
691 break;
692 }
693 }
694
695 // synchronize the execution
696 _queue.wait();
697 }
698
699 _graph._state = syclGraph::OFFLOADED;
700 }
701
702 // Procedure: offload_n
offload_n(size_t n)703 inline void syclFlow::offload_n(size_t n) {
704 offload_until([repeat=n] () mutable { return repeat-- == 0; });
705 }
706
707 // Procedure: offload
offload()708 inline void syclFlow::offload() {
709 offload_until([repeat=1] () mutable { return repeat-- == 0; });
710 }
711
712 // Function: on
713 template <typename F, std::enable_if_t<
714 std::is_invocable_r_v<void, F, sycl::handler&>, void>*
715 >
on(syclTask task,F && f)716 void syclFlow::on(syclTask task, F&& f) {
717 std::get<syclNode::CommandGroupHandler>((task._node)->_handle).work =
718 std::forward<F>(f);
719 }
720
721 // Function: on
722 template <typename F, std::enable_if_t<std::is_invocable_r_v<
723 sycl::event, F, sycl::queue&, std::vector<sycl::event>>, void
724 >*>
on(syclTask task,F && f)725 void syclFlow::on(syclTask task, F&& f) {
726 std::get<syclNode::DependentSubmit>((task._node)->_handle).work =
727 std::forward<F>(f);
728 }
729
730 // Function: memcpy
memcpy(syclTask task,void * tgt,const void * src,size_t bytes)731 inline void syclFlow::memcpy(
732 syclTask task, void* tgt, const void* src, size_t bytes
733 ) {
734 on(task, [=](sycl::handler& h){ h.memcpy(tgt, src, bytes); });
735 }
736
737 // Function: memset
memset(syclTask task,void * ptr,int value,size_t bytes)738 inline void syclFlow::memset(
739 syclTask task, void* ptr, int value, size_t bytes
740 ) {
741 on(task, [=](sycl::handler& h){ h.memset(ptr, value, bytes); });
742 }
743
744 // Function: fill
745 template <typename T>
fill(syclTask task,void * ptr,const T & pattern,size_t count)746 void syclFlow::fill(
747 syclTask task, void* ptr, const T& pattern, size_t count
748 ) {
749 on(task, [=](sycl::handler& h){ h.fill(ptr, pattern, count); });
750 }
751
752 // Function: copy
753 template <typename T,
754 std::enable_if_t<!std::is_same_v<T, void>, void>*
755 >
copy(syclTask task,T * target,const T * source,size_t count)756 void syclFlow::copy(
757 syclTask task, T* target, const T* source, size_t count
758 ) {
759 on(task, [=](sycl::handler& h){h.memcpy(target, source, count*sizeof(T));});
760 }
761
762 // Function: parallel_for
763 template <typename...ArgsT>
parallel_for(syclTask task,ArgsT &&...args)764 void syclFlow::parallel_for(syclTask task, ArgsT&&... args) {
765 on(task, [args...] (sycl::handler& h) { h.parallel_for(args...); });
766 }
767
768 // Function: single_task
769 template <typename F>
single_task(syclTask task,F && func)770 void syclFlow::single_task(syclTask task, F&& func) {
771 on(task, [f=std::forward<F>(func)] (sycl::handler& h) { h.single_task(f); });
772 }
773
774 // ############################################################################
775 // Forward declaration: FlowBuilder
776 // ############################################################################
777
778 // FlowBuilder::emplace_on
779 template <typename C, typename Q, std::enable_if_t<is_syclflow_task_v<C>, void>*>
emplace_on(C && callable,Q && q)780 Task FlowBuilder::emplace_on(C&& callable, Q&& q) {
781 auto n = _graph.emplace_back(
782 std::in_place_type_t<Node::syclFlow>{},
783 [c=std::forward<C>(callable), queue=std::forward<Q>(q)]
784 (Executor& e, Node* p) mutable {
785 e._invoke_syclflow_task_entry(p, c, queue);
786 },
787 std::make_unique<syclGraph>()
788 );
789 return Task(n);
790 }
791
792 // FlowBuilder::emplace
793 template <typename C, std::enable_if_t<is_syclflow_task_v<C>, void>*>
emplace(C && callable)794 Task FlowBuilder::emplace(C&& callable) {
795 return emplace_on(std::forward<C>(callable), sycl::queue{});
796 }
797
798 // ############################################################################
799 // Forward declaration: Executor
800 // ############################################################################
801
802 // Procedure: _invoke_syclflow_task_entry (syclFlow)
803 template <typename C, typename Q,
804 std::enable_if_t<is_syclflow_task_v<C>, void>*
805 >
_invoke_syclflow_task_entry(Node * node,C && c,Q & queue)806 void Executor::_invoke_syclflow_task_entry(Node* node, C&& c, Q& queue) {
807
808 auto& h = std::get<Node::syclFlow>(node->_handle);
809
810 syclGraph* g = dynamic_cast<syclGraph*>(h.graph.get());
811
812 g->clear();
813
814 syclFlow sf(*this, *g, queue);
815
816 c(sf);
817
818 if(!(g->_state & syclGraph::OFFLOADED)) {
819 sf.offload();
820 }
821 }
822
823 } // end of namespace tf -----------------------------------------------------
824
825
826