1 /*
2     Copyright (c) 2005-2020 Intel Corporation
3 
4     Licensed under the Apache License, Version 2.0 (the "License");
5     you may not use this file except in compliance with the License.
6     You may obtain a copy of the License at
7 
8         http://www.apache.org/licenses/LICENSE-2.0
9 
10     Unless required by applicable law or agreed to in writing, software
11     distributed under the License is distributed on an "AS IS" BASIS,
12     WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13     See the License for the specific language governing permissions and
14     limitations under the License.
15 */
16 
17 #ifndef _TBB_scheduler_H
18 #define _TBB_scheduler_H
19 
20 #include "scheduler_common.h"
21 #include "tbb/spin_mutex.h"
22 #include "mailbox.h"
23 #include "tbb_misc.h" // for FastRandom
24 #include "itt_notify.h"
25 #include "../rml/include/rml_tbb.h"
26 
27 #include "intrusive_list.h"
28 
29 #if __TBB_SURVIVE_THREAD_SWITCH
30 #include "cilk-tbb-interop.h"
31 #endif /* __TBB_SURVIVE_THREAD_SWITCH */
32 
33 #if __TBB_PREVIEW_RESUMABLE_TASKS
34 #include "co_context.h"
35 #endif
36 
37 namespace tbb {
38 namespace internal {
39 
40 template<typename SchedulerTraits> class custom_scheduler;
41 
42 //------------------------------------------------------------------------
43 // generic_scheduler
44 //------------------------------------------------------------------------
45 
46 #define EmptyTaskPool ((task**)0)
47 #define LockedTaskPool ((task**)~(intptr_t)0)
48 
49 //! Bit-field representing properties of a sheduler
50 struct scheduler_properties {
51     static const bool worker = false;
52     static const bool master = true;
53     //! Indicates that a scheduler acts as a master or a worker.
54     bool type : 1;
55     //! Indicates that a scheduler is on outermost level.
56     /**  Note that the explicit execute method will set this property. **/
57     bool outermost : 1;
58 #if __TBB_PREVIEW_CRITICAL_TASKS
59     //! Indicates that a scheduler is in the process of executing critical task(s).
60     bool has_taken_critical_task : 1;
61 #endif
62 #if __TBB_PREVIEW_RESUMABLE_TASKS
63     //! Indicates that the scheduler is bound to an original thread stack.
64     bool genuine : 1;
65 #endif
66     //! Reserved bits
67     unsigned char :
68 #if __TBB_PREVIEW_RESUMABLE_TASKS
69                     4;
70 #elif __TBB_PREVIEW_CRITICAL_TASKS
71                     5;
72 #else
73                     6;
74 #endif
75 };
76 
77 struct scheduler_state {
78     //! Index of the arena slot the scheduler occupies now, or occupied last time.
79     size_t my_arena_index; // TODO: make it unsigned and pair with my_affinity_id to fit into cache line
80 
81     //! Pointer to the slot in the arena we own at the moment.
82     arena_slot* my_arena_slot;
83 
84     //! The arena that I own (if master) or am servicing at the moment (if worker)
85     arena* my_arena;
86 
87     //! Innermost task whose task::execute() is running. A dummy task on the outermost level.
88     task* my_innermost_running_task;
89 
90     mail_inbox my_inbox;
91 
92     //! The mailbox id assigned to this scheduler.
93     /** The id is assigned upon first entry into the arena.
94         TODO: how are id's being garbage collected?
95         TODO: master thread may enter arena and leave and then reenter.
96                 We want to give it the same affinity_id upon reentry, if practical.
97         TODO: investigate if it makes sense to merge this field into scheduler_properties.
98       */
99     affinity_id my_affinity_id;
100 
101     scheduler_properties my_properties;
102 
103 #if __TBB_SCHEDULER_OBSERVER
104     //! Last observer in the global observers list processed by this scheduler
105     observer_proxy* my_last_global_observer;
106 #endif
107 
108 #if __TBB_ARENA_OBSERVER
109     //! Last observer in the local observers list processed by this scheduler
110     observer_proxy* my_last_local_observer;
111 #endif
112 #if __TBB_TASK_PRIORITY
113     //! Latest known highest priority of tasks in the market or arena.
114     /** Master threads currently tracks only tasks in their arenas, while workers
115         take into account global top priority (among all arenas in the market). **/
116     volatile intptr_t *my_ref_top_priority;
117 
118     //! Pointer to market's (for workers) or current arena's (for the master) reload epoch counter.
119     volatile uintptr_t *my_ref_reload_epoch;
120 #endif /* __TBB_TASK_PRIORITY */
121 #if __TBB_PREVIEW_RESUMABLE_TASKS
122     //! The currently waited task.
123     task* my_wait_task;
124 
125     //! The currently recalled stack.
126     tbb::atomic<bool>* my_current_is_recalled;
127 #endif
128 };
129 
130 //! Work stealing task scheduler.
131 /** None of the fields here are ever read or written by threads other than
132     the thread that creates the instance.
133 
134     Class generic_scheduler is an abstract base class that contains most of the scheduler,
135     except for tweaks specific to processors and tools (e.g. VTune(TM) Performance Tools).
136     The derived template class custom_scheduler<SchedulerTraits> fills in the tweaks. */
137 class generic_scheduler: public scheduler
138                        , public ::rml::job
139                        , public intrusive_list_node
140                        , public scheduler_state {
141 public: // almost every class in TBB uses generic_scheduler
142 
143     //! If sizeof(task) is <=quick_task_size, it is handled on a free list instead of malloc'd.
144     static const size_t quick_task_size = 256-task_prefix_reservation_size;
145 
is_version_3_task(task & t)146     static bool is_version_3_task( task& t ) {
147 #if __TBB_PREVIEW_CRITICAL_TASKS
148         return (t.prefix().extra_state & 0x7)>=0x1;
149 #else
150         return (t.prefix().extra_state & 0x0F)>=0x1;
151 #endif
152     }
153 
154     //! Position in the call stack specifying its maximal filling when stealing is still allowed
155     uintptr_t my_stealing_threshold;
156 #if __TBB_ipf
157     //! Position in the RSE backup area specifying its maximal filling when stealing is still allowed
158     uintptr_t my_rsb_stealing_threshold;
159 #endif
160 
161     static const size_t null_arena_index = ~size_t(0);
162 
163     inline bool is_task_pool_published () const;
164 
165     inline bool is_local_task_pool_quiescent () const;
166 
167     inline bool is_quiescent_local_task_pool_empty () const;
168 
169     inline bool is_quiescent_local_task_pool_reset () const;
170 
171     //! The market I am in
172     market* my_market;
173 
174     //! Random number generator used for picking a random victim from which to steal.
175     FastRandom my_random;
176 
177     //! Free list of small tasks that can be reused.
178     task* my_free_list;
179 
180 #if __TBB_HOARD_NONLOCAL_TASKS
181     //! Free list of small non-local tasks that should be returned or can be reused.
182     task* my_nonlocal_free_list;
183 #endif
184     //! Fake root task created by slave threads.
185     /** The task is used as the "parent" argument to method wait_for_all. */
186     task* my_dummy_task;
187 
188     //! Reference count for scheduler
189     /** Number of task_scheduler_init objects that point to this scheduler */
190     long my_ref_count;
191 
192     inline void attach_mailbox( affinity_id id );
193 
194     /* A couple of bools can be located here because space is otherwise just padding after my_affinity_id. */
195 
196     //! True if *this was created by automatic TBB initialization
197     bool my_auto_initialized;
198 
199 #if __TBB_COUNT_TASK_NODES
200     //! Net number of big task objects that have been allocated but not yet freed.
201     intptr_t my_task_node_count;
202 #endif /* __TBB_COUNT_TASK_NODES */
203 
204 #if __TBB_PREVIEW_RESUMABLE_TASKS
205     //! The list of possible post resume actions.
206     enum post_resume_action {
207         PRA_INVALID,
208         PRA_ABANDON,
209         PRA_CALLBACK,
210         PRA_CLEANUP,
211         PRA_NOTIFY,
212         PRA_NONE
213     };
214 
215     //! The suspend callback function type.
216     typedef void(*suspend_callback_t)(void*, task::suspend_point);
217 
218     //! The callback to call the user callback passed to tbb::suspend.
219     struct callback_t {
220         suspend_callback_t suspend_callback;
221         void* user_callback;
222         task::suspend_point tag;
223 
operatorcallback_t224         void operator()() {
225             if (suspend_callback) {
226                 __TBB_ASSERT(suspend_callback && user_callback && tag, NULL);
227                 suspend_callback(user_callback, tag);
228             }
229         }
230     };
231 
232     //! The coroutine context associated with the current scheduler.
233     co_context my_co_context;
234 
235     //! The post resume action requested for the current scheduler.
236     post_resume_action my_post_resume_action;
237 
238     //! The post resume action argument.
239     void* my_post_resume_arg;
240 
241     //! The scheduler to resume on exit.
242     generic_scheduler* my_target_on_exit;
243 
244     //! Set post resume action to perform after resume.
245     void set_post_resume_action(post_resume_action, void* arg);
246 
247     //! Performs post resume action.
248     void do_post_resume_action();
249 
250     //! Decides how to switch and sets post resume action.
251     /** Returns false if the caller should finish the coroutine and then resume the target scheduler.
252         Returns true if the caller should resume the target scheduler immediately. **/
253     bool prepare_resume(generic_scheduler& target);
254 
255     //! Resumes the original scheduler of the calling thread.
256     /** Returns false if the current stack should be left to perform the resume.
257         Returns true if the current stack is resumed. **/
258     bool resume_original_scheduler();
259 
260     //! Resumes the target scheduler. The prepare_resume must be called for the target scheduler in advance.
261     void resume(generic_scheduler& target);
262 
263     friend void recall_function(task::suspend_point tag);
264 #endif /* __TBB_PREVIEW_RESUMABLE_TASKS */
265 
266     //! Sets up the data necessary for the stealing limiting heuristics
267     void init_stack_info ();
268 
269     //! Returns true if stealing is allowed
can_steal()270     bool can_steal () {
271         int anchor;
272         // TODO IDEA: Add performance warning?
273 #if __TBB_ipf
274         return my_stealing_threshold < (uintptr_t)&anchor && (uintptr_t)__TBB_get_bsp() < my_rsb_stealing_threshold;
275 #else
276         return my_stealing_threshold < (uintptr_t)&anchor;
277 #endif
278     }
279 
280     //! Used by workers to enter the task pool
281     /** Does not lock the task pool in case if arena slot has been successfully grabbed. **/
282     void publish_task_pool();
283 
284     //! Leave the task pool
285     /** Leaving task pool automatically releases the task pool if it is locked. **/
286     void leave_task_pool();
287 
288     //! Resets head and tail indices to 0, and leaves task pool
289     /** The task pool must be locked by the owner (via acquire_task_pool).**/
290     inline void reset_task_pool_and_leave ();
291 
292     //! Locks victim's task pool, and returns pointer to it. The pointer can be NULL.
293     /** Garbles victim_arena_slot->task_pool for the duration of the lock. **/
294     task** lock_task_pool( arena_slot* victim_arena_slot ) const;
295 
296     //! Unlocks victim's task pool
297     /** Restores victim_arena_slot->task_pool munged by lock_task_pool. **/
298     void unlock_task_pool( arena_slot* victim_arena_slot, task** victim_task_pool ) const;
299 
300     //! Locks the local task pool
301     /** Garbles my_arena_slot->task_pool for the duration of the lock. Requires
302         correctly set my_arena_slot->task_pool_ptr. **/
303     void acquire_task_pool() const;
304 
305     //! Unlocks the local task pool
306     /** Restores my_arena_slot->task_pool munged by acquire_task_pool. Requires
307         correctly set my_arena_slot->task_pool_ptr. **/
308     void release_task_pool() const;
309 
310     //! Checks if t is affinitized to another thread, and if so, bundles it as proxy.
311     /** Returns either t or proxy containing t. **/
312     task* prepare_for_spawning( task* t );
313 
314     //! Makes newly spawned tasks visible to thieves
315     inline void commit_spawned_tasks( size_t new_tail );
316 
317     //! Makes relocated tasks visible to thieves and releases the local task pool.
318     /** Obviously, the task pool must be locked when calling this method. **/
319     inline void commit_relocated_tasks( size_t new_tail );
320 
321     //! Get a task from the local pool.
322     /** Called only by the pool owner.
323         Returns the pointer to the task or NULL if a suitable task is not found.
324         Resets the pool if it is empty. **/
325     task* get_task( __TBB_ISOLATION_EXPR( isolation_tag isolation ) );
326 
327     //! Get a task from the local pool at specified location T.
328     /** Returns the pointer to the task or NULL if the task cannot be executed,
329         e.g. proxy has been deallocated or isolation constraint is not met.
330         tasks_omitted tells if some tasks have been omitted.
331         Called only by the pool owner. The caller should guarantee that the
332         position T is not available for a thief. **/
333 #if __TBB_TASK_ISOLATION
334     task* get_task( size_t T, isolation_tag isolation, bool& tasks_omitted );
335 #else
336     task* get_task( size_t T );
337 #endif /* __TBB_TASK_ISOLATION */
338     //! Attempt to get a task from the mailbox.
339     /** Gets a task only if it has not been executed by its sender or a thief
340         that has stolen it from the sender's task pool. Otherwise returns NULL.
341 
342         This method is intended to be used only by the thread extracting the proxy
343         from its mailbox. (In contrast to local task pool, mailbox can be read only
344         by its owner). **/
345     task* get_mailbox_task( __TBB_ISOLATION_EXPR( isolation_tag isolation ) );
346 
347     //! True if t is a task_proxy
is_proxy(const task & t)348     static bool is_proxy( const task& t ) {
349         return t.prefix().extra_state==es_task_proxy;
350     }
351 
352     //! Attempts to steal a task from a randomly chosen thread/scheduler
353     task* steal_task( __TBB_ISOLATION_EXPR(isolation_tag isolation) );
354 
355     //! Steal task from another scheduler's ready pool.
356     task* steal_task_from( __TBB_ISOLATION_ARG( arena_slot& victim_arena_slot, isolation_tag isolation ) );
357 
358 #if __TBB_PREVIEW_CRITICAL_TASKS
359     //! Tries to find critical task in critical task stream
360     task* get_critical_task( __TBB_ISOLATION_EXPR(isolation_tag isolation) );
361 
362     //! Pushes task to critical task stream if it appears to be such task and returns
363     //! true. Otherwise does nothing and returns false.
364     bool handled_as_critical( task& t );
365 #endif
366 
367     /** Initial size of the task deque sufficient to serve without reallocation
368         4 nested parallel_for calls with iteration space of 65535 grains each. **/
369     static const size_t min_task_pool_size = 64;
370 
371     //! Makes sure that the task pool can accommodate at least n more elements
372     /** If necessary relocates existing task pointers or grows the ready task deque.
373         Returns (possible updated) tail index (not accounting for n). **/
374     size_t prepare_task_pool( size_t n );
375 
376     //! Initialize a scheduler for a master thread.
377     static generic_scheduler* create_master( arena* a );
378 
379     //! Perform necessary cleanup when a master thread stops using TBB.
380     bool cleanup_master( bool blocking_terminate );
381 
382     //! Initialize a scheduler for a worker thread.
383     static generic_scheduler* create_worker( market& m, size_t index, bool geniune );
384 
385     //! Perform necessary cleanup when a worker thread finishes.
386     static void cleanup_worker( void* arg, bool worker );
387 
388 protected:
389     template<typename SchedulerTraits> friend class custom_scheduler;
390     generic_scheduler( market &, bool );
391 
392 public:
393 #if TBB_USE_ASSERT > 1
394     //! Check that internal data structures are in consistent state.
395     /** Raises __TBB_ASSERT failure if inconsistency is found. */
396     void assert_task_pool_valid() const;
397 #else
398     void assert_task_pool_valid() const {}
399 #endif /* TBB_USE_ASSERT <= 1 */
400 
401     void attach_arena( arena*, size_t index, bool is_master );
402     void nested_arena_entry( arena*, size_t );
403     void nested_arena_exit();
404     void wait_until_empty();
405 
406     void spawn( task& first, task*& next ) __TBB_override;
407 
408     void spawn_root_and_wait( task& first, task*& next ) __TBB_override;
409 
410     void enqueue( task&, void* reserved ) __TBB_override;
411 
412     void local_spawn( task* first, task*& next );
413     void local_spawn_root_and_wait( task* first, task*& next );
414     virtual void local_wait_for_all( task& parent, task* child ) = 0;
415 
416     //! Destroy and deallocate this scheduler object.
417     void destroy();
418 
419     //! Cleans up this scheduler (the scheduler might be destroyed).
420     void cleanup_scheduler();
421 
422     //! Allocate task object, either from the heap or a free list.
423     /** Returns uninitialized task object with initialized prefix. */
424     task& allocate_task( size_t number_of_bytes,
425                        __TBB_CONTEXT_ARG(task* parent, task_group_context* context) );
426 
427     //! Put task on free list.
428     /** Does not call destructor. */
429     template<free_task_hint h>
430     void free_task( task& t );
431 
432     //! Return task object to the memory allocator.
433     inline void deallocate_task( task& t );
434 
435     //! True if running on a worker thread, false otherwise.
436     inline bool is_worker() const;
437 
438     //! True if the scheduler is on the outermost dispatch level.
439     inline bool outermost_level() const;
440 
441     //! True if the scheduler is on the outermost dispatch level in a master thread.
442     /** Returns true when this scheduler instance is associated with an application
443         thread, and is not executing any TBB task. This includes being in a TBB
444         dispatch loop (one of wait_for_all methods) invoked directly from that thread. **/
445     inline bool master_outermost_level () const;
446 
447     //! True if the scheduler is on the outermost dispatch level in a worker thread.
448     inline bool worker_outermost_level () const;
449 
450     //! Returns the concurrency limit of the current arena.
451     unsigned max_threads_in_arena();
452 
453 #if __TBB_COUNT_TASK_NODES
454     intptr_t get_task_node_count( bool count_arena_workers = false );
455 #endif /* __TBB_COUNT_TASK_NODES */
456 
457     //! Special value used to mark my_return_list as not taking any more entries.
plugged_return_list()458     static task* plugged_return_list() {return (task*)(intptr_t)(-1);}
459 
460     //! Number of small tasks that have been allocated by this scheduler.
461     __TBB_atomic intptr_t my_small_task_count;
462 
463     //! List of small tasks that have been returned to this scheduler by other schedulers.
464     // TODO IDEA: see if putting my_return_list on separate cache line improves performance
465     task* my_return_list;
466 
467     //! Try getting a task from other threads (via mailbox, stealing, FIFO queue, orphans adoption).
468     /** Returns obtained task or NULL if all attempts fail. */
469     virtual task* receive_or_steal_task( __TBB_ISOLATION_ARG( __TBB_atomic reference_count& completion_ref_count, isolation_tag isolation ) ) = 0;
470 
471     //! Free a small task t that that was allocated by a different scheduler
472     void free_nonlocal_small_task( task& t );
473 
474 #if __TBB_TASK_GROUP_CONTEXT
475     //! Returns task group context used by this scheduler instance.
476     /** This context is associated with root tasks created by a master thread
477         without explicitly specified context object outside of any running task.
478 
479         Note that the default context of a worker thread is never accessed by
480         user code (directly or indirectly). **/
481     inline task_group_context* default_context ();
482 
483     //! Padding isolating thread-local members from members that can be written to by other threads.
484     char _padding1[NFS_MaxLineSize - sizeof(context_list_node_t)];
485 
486     //! Head of the thread specific list of task group contexts.
487     context_list_node_t my_context_list_head;
488 
489     //! Mutex protecting access to the list of task group contexts.
490     // TODO: check whether it can be deadly preempted and replace by spinning/sleeping mutex
491     spin_mutex my_context_list_mutex;
492 
493     //! Last state propagation epoch known to this thread
494     /** Together with the_context_state_propagation_epoch constitute synchronization protocol
495         that keeps hot path of task group context construction destruction mostly
496         lock-free.
497         When local epoch equals the global one, the state of task group contexts
498         registered with this thread is consistent with that of the task group trees
499         they belong to. **/
500     uintptr_t my_context_state_propagation_epoch;
501 
502     //! Flag indicating that a context is being destructed by its owner thread
503     /** Together with my_nonlocal_ctx_list_update constitute synchronization protocol
504         that keeps hot path of context destruction (by the owner thread) mostly
505         lock-free. **/
506     tbb::atomic<uintptr_t> my_local_ctx_list_update;
507 
508 #if __TBB_TASK_PRIORITY
509     //! Returns reference priority used to decide whether a task should be offloaded.
510     inline intptr_t effective_reference_priority () const;
511 
512     // TODO: move into slots and fix is_out_of_work
513     //! Task pool for offloading tasks with priorities lower than the current top priority.
514     task* my_offloaded_tasks;
515 
516     //! Points to the last offloaded task in the my_offloaded_tasks list.
517     task** my_offloaded_task_list_tail_link;
518 
519     //! Indicator of how recently the offload area was checked for the presence of top priority tasks.
520     uintptr_t my_local_reload_epoch;
521 
522     //! Indicates that the pool is likely non-empty even if appears so from outside
523     volatile bool my_pool_reshuffling_pending;
524 
525     //! Searches offload area for top priority tasks and reloads found ones into primary task pool.
526     /** Returns one of the found tasks or NULL. **/
527     task* reload_tasks( __TBB_ISOLATION_EXPR( isolation_tag isolation ) );
528 
529     task* reload_tasks( task*& offloaded_tasks, task**& offloaded_task_list_link, __TBB_ISOLATION_ARG( intptr_t top_priority, isolation_tag isolation ) );
530 
531     //! Moves tasks with priority below the top one from primary task pool into offload area.
532     /** Returns the next execution candidate task or NULL. **/
533     task* winnow_task_pool ( __TBB_ISOLATION_EXPR( isolation_tag isolation ) );
534 
535     //! Get a task from locked or empty pool in range [H0, T0). Releases or unlocks the task pool.
536     /** Returns the found task or NULL. **/
537     task *get_task_and_activate_task_pool( size_t H0 , __TBB_ISOLATION_ARG( size_t T0, isolation_tag isolation ) );
538 
539     //! Unconditionally moves the task into offload area.
540     inline void offload_task ( task& t, intptr_t task_priority );
541 #endif /* __TBB_TASK_PRIORITY */
542 
543     //! Detaches abandoned contexts
544     /** These contexts must be destroyed by other threads. **/
545     void cleanup_local_context_list ();
546 
547     //! Finds all contexts registered by this scheduler affected by the state change
548     //! and propagates the new state to them.
549     template <typename T>
550     void propagate_task_group_state ( T task_group_context::*mptr_state, task_group_context& src, T new_state );
551 
552     // check consistency
assert_context_valid(const task_group_context * tgc)553     static void assert_context_valid(const task_group_context *tgc) {
554         suppress_unused_warning(tgc);
555 #if TBB_USE_ASSERT
556         __TBB_ASSERT(tgc, NULL);
557         uintptr_t ctx = tgc->my_version_and_traits;
558         __TBB_ASSERT(is_alive(ctx), "referenced task_group_context was destroyed");
559         static const char *msg = "task_group_context is invalid";
560         __TBB_ASSERT(!(ctx&~(3|(7<<task_group_context::traits_offset))), msg); // the value fits known values of versions and traits
561         __TBB_ASSERT(tgc->my_kind < task_group_context::dying, msg);
562         __TBB_ASSERT(tgc->my_cancellation_requested == 0 || tgc->my_cancellation_requested == 1, msg);
563         __TBB_ASSERT(tgc->my_state < task_group_context::low_unused_state_bit, msg);
564         if(tgc->my_kind != task_group_context::isolated) {
565             __TBB_ASSERT(tgc->my_owner, msg);
566             __TBB_ASSERT(tgc->my_node.my_next && tgc->my_node.my_prev, msg);
567         }
568 #if __TBB_TASK_PRIORITY
569         assert_priority_valid(tgc->my_priority);
570 #endif
571         if(tgc->my_parent)
572 #if TBB_USE_ASSERT > 1
573             assert_context_valid(tgc->my_parent);
574 #else
575             __TBB_ASSERT(is_alive(tgc->my_parent->my_version_and_traits), msg);
576 #endif
577 #endif
578     }
579 #endif /* __TBB_TASK_GROUP_CONTEXT */
580 
581 #if _WIN32||_WIN64
582 private:
583     //! Handle returned by RML when registering a master with RML
584     ::rml::server::execution_resource_t master_exec_resource;
585 public:
586 #endif /* _WIN32||_WIN64 */
587 
588 #if __TBB_TASK_GROUP_CONTEXT
589     //! Flag indicating that a context is being destructed by non-owner thread.
590     /** See also my_local_ctx_list_update. **/
591     tbb::atomic<uintptr_t> my_nonlocal_ctx_list_update;
592 #endif /* __TBB_TASK_GROUP_CONTEXT */
593 
594 #if __TBB_SURVIVE_THREAD_SWITCH
595     __cilk_tbb_unwatch_thunk my_cilk_unwatch_thunk;
596 #if TBB_USE_ASSERT
597     //! State values used to check interface contract with cilkrts.
598     /** Names of cs_running...cs_freed derived from state machine diagram in cilk-tbb-interop.h */
599     enum cilk_state_t {
600         cs_none=0xF000, // Start at nonzero value so that we can detect use of zeroed memory.
601         cs_running,
602         cs_limbo,
603         cs_freed
604     };
605     cilk_state_t my_cilk_state;
606 #endif /* TBB_USE_ASSERT */
607 #endif /* __TBB_SURVIVE_THREAD_SWITCH */
608 
609 #if __TBB_STATISTICS
610     //! Set of counters to track internal statistics on per thread basis
611     /** Placed at the end of the class definition to minimize the disturbance of
612         the core logic memory operations. **/
613     mutable statistics_counters my_counters;
614 #endif /* __TBB_STATISTICS */
615 
616 }; // class generic_scheduler
617 
618 
619 } // namespace internal
620 } // namespace tbb
621 
622 #include "arena.h"
623 #include "governor.h"
624 
625 namespace tbb {
626 namespace internal {
627 
is_task_pool_published()628 inline bool generic_scheduler::is_task_pool_published () const {
629     __TBB_ASSERT(my_arena_slot, 0);
630     return my_arena_slot->task_pool != EmptyTaskPool;
631 }
632 
is_local_task_pool_quiescent()633 inline bool generic_scheduler::is_local_task_pool_quiescent () const {
634     __TBB_ASSERT(my_arena_slot, 0);
635     task** tp = my_arena_slot->task_pool;
636     return tp == EmptyTaskPool || tp == LockedTaskPool;
637 }
638 
is_quiescent_local_task_pool_empty()639 inline bool generic_scheduler::is_quiescent_local_task_pool_empty () const {
640     __TBB_ASSERT( is_local_task_pool_quiescent(), "Task pool is not quiescent" );
641     return __TBB_load_relaxed(my_arena_slot->head) == __TBB_load_relaxed(my_arena_slot->tail);
642 }
643 
is_quiescent_local_task_pool_reset()644 inline bool generic_scheduler::is_quiescent_local_task_pool_reset () const {
645     __TBB_ASSERT( is_local_task_pool_quiescent(), "Task pool is not quiescent" );
646     return __TBB_load_relaxed(my_arena_slot->head) == 0 && __TBB_load_relaxed(my_arena_slot->tail) == 0;
647 }
648 
outermost_level()649 inline bool generic_scheduler::outermost_level () const {
650     return my_properties.outermost;
651 }
652 
master_outermost_level()653 inline bool generic_scheduler::master_outermost_level () const {
654     return !is_worker() && outermost_level();
655 }
656 
worker_outermost_level()657 inline bool generic_scheduler::worker_outermost_level () const {
658     return is_worker() && outermost_level();
659 }
660 
661 #if __TBB_TASK_GROUP_CONTEXT
default_context()662 inline task_group_context* generic_scheduler::default_context () {
663     return my_dummy_task->prefix().context;
664 }
665 #endif /* __TBB_TASK_GROUP_CONTEXT */
666 
attach_mailbox(affinity_id id)667 inline void generic_scheduler::attach_mailbox( affinity_id id ) {
668     __TBB_ASSERT(id>0,NULL);
669     my_inbox.attach( my_arena->mailbox(id) );
670     my_affinity_id = id;
671 }
672 
is_worker()673 inline bool generic_scheduler::is_worker() const {
674     return my_properties.type == scheduler_properties::worker;
675 }
676 
max_threads_in_arena()677 inline unsigned generic_scheduler::max_threads_in_arena() {
678     __TBB_ASSERT(my_arena, NULL);
679     return my_arena->my_num_slots;
680 }
681 
682 //! Return task object to the memory allocator.
deallocate_task(task & t)683 inline void generic_scheduler::deallocate_task( task& t ) {
684 #if TBB_USE_ASSERT
685     task_prefix& p = t.prefix();
686     p.state = 0xFF;
687     p.extra_state = 0xFF;
688     poison_pointer(p.next);
689 #endif /* TBB_USE_ASSERT */
690     NFS_Free((char*)&t-task_prefix_reservation_size);
691 #if __TBB_COUNT_TASK_NODES
692     --my_task_node_count;
693 #endif /* __TBB_COUNT_TASK_NODES */
694 }
695 
696 #if __TBB_COUNT_TASK_NODES
get_task_node_count(bool count_arena_workers)697 inline intptr_t generic_scheduler::get_task_node_count( bool count_arena_workers ) {
698     return my_task_node_count + (count_arena_workers? my_arena->workers_task_node_count(): 0);
699 }
700 #endif /* __TBB_COUNT_TASK_NODES */
701 
reset_task_pool_and_leave()702 inline void generic_scheduler::reset_task_pool_and_leave () {
703     __TBB_ASSERT( my_arena_slot->task_pool == LockedTaskPool, "Task pool must be locked when resetting task pool" );
704     __TBB_store_relaxed( my_arena_slot->tail, 0 );
705     __TBB_store_relaxed( my_arena_slot->head, 0 );
706     leave_task_pool();
707 }
708 
709 //TODO: move to arena_slot
commit_spawned_tasks(size_t new_tail)710 inline void generic_scheduler::commit_spawned_tasks( size_t new_tail ) {
711     __TBB_ASSERT ( new_tail <= my_arena_slot->my_task_pool_size, "task deque end was overwritten" );
712     // emit "task was released" signal
713     ITT_NOTIFY(sync_releasing, (void*)((uintptr_t)my_arena_slot+sizeof(uintptr_t)));
714     // Release fence is necessary to make sure that previously stored task pointers
715     // are visible to thieves.
716     __TBB_store_with_release( my_arena_slot->tail, new_tail );
717 }
718 
commit_relocated_tasks(size_t new_tail)719 void generic_scheduler::commit_relocated_tasks ( size_t new_tail ) {
720     __TBB_ASSERT( is_local_task_pool_quiescent(),
721                   "Task pool must be locked when calling commit_relocated_tasks()" );
722     __TBB_store_relaxed( my_arena_slot->head, 0 );
723     // Tail is updated last to minimize probability of a thread making arena
724     // snapshot being misguided into thinking that this task pool is empty.
725     __TBB_store_release( my_arena_slot->tail, new_tail );
726     release_task_pool();
727 }
728 
729 template<free_task_hint hint>
free_task(task & t)730 void generic_scheduler::free_task( task& t ) {
731 #if __TBB_HOARD_NONLOCAL_TASKS
732     static const int h = hint&(~local_task);
733 #else
734     static const free_task_hint h = hint;
735 #endif
736     GATHER_STATISTIC(--my_counters.active_tasks);
737     task_prefix& p = t.prefix();
738     // Verify that optimization hints are correct.
739     __TBB_ASSERT( h!=small_local_task || p.origin==this, NULL );
740     __TBB_ASSERT( !(h&small_task) || p.origin, NULL );
741     __TBB_ASSERT( !(h&local_task) || (!p.origin || uintptr_t(p.origin) > uintptr_t(4096)), "local_task means allocated");
742     poison_value(p.depth);
743     poison_value(p.ref_count);
744     poison_pointer(p.owner);
745 #if __TBB_PREVIEW_RESUMABLE_TASKS
746     __TBB_ASSERT(1L << t.state() & (1L << task::executing | 1L << task::allocated | 1 << task::to_resume), NULL);
747 #else
748     __TBB_ASSERT(1L << t.state() & (1L << task::executing | 1L << task::allocated), NULL);
749 #endif
750     p.state = task::freed;
751     if( h==small_local_task || p.origin==this ) {
752         GATHER_STATISTIC(++my_counters.free_list_length);
753         p.next = my_free_list;
754         my_free_list = &t;
755     } else if( !(h&local_task) && p.origin && uintptr_t(p.origin) < uintptr_t(4096) ) {
756         // a special value reserved for future use, do nothing since
757         // origin is not pointing to a scheduler instance
758     } else if( !(h&local_task) && p.origin ) {
759         GATHER_STATISTIC(++my_counters.free_list_length);
760 #if __TBB_HOARD_NONLOCAL_TASKS
761         if( !(h&no_cache) ) {
762             p.next = my_nonlocal_free_list;
763             my_nonlocal_free_list = &t;
764         } else
765 #endif
766         free_nonlocal_small_task(t);
767     } else {
768         GATHER_STATISTIC(--my_counters.big_tasks);
769         deallocate_task(t);
770     }
771 }
772 
773 #if __TBB_TASK_PRIORITY
effective_reference_priority()774 inline intptr_t generic_scheduler::effective_reference_priority () const {
775     // Workers on the outermost dispatch level (i.e. with empty stack) use market's
776     // priority as a reference point (to speedup discovering process level priority
777     // changes). But when there are enough workers to service (even if only partially)
778     // a lower priority arena, they should use arena's priority as a reference, lest
779     // be trapped in a futile spinning (because market's priority would prohibit
780     // executing ANY tasks in this arena).
781     return !worker_outermost_level() ||
782         my_arena->my_num_workers_allotted < my_arena->num_workers_active() ? *my_ref_top_priority : my_arena->my_top_priority;
783 }
784 
offload_task(task & t,intptr_t)785 inline void generic_scheduler::offload_task ( task& t, intptr_t /*priority*/ ) {
786     GATHER_STATISTIC( ++my_counters.prio_tasks_offloaded );
787     __TBB_ASSERT( !is_proxy(t), "The proxy task cannot be offloaded" );
788     __TBB_ASSERT( my_offloaded_task_list_tail_link && !*my_offloaded_task_list_tail_link, NULL );
789 #if TBB_USE_ASSERT
790     t.prefix().state = task::ready;
791 #endif /* TBB_USE_ASSERT */
792     t.prefix().next_offloaded = my_offloaded_tasks;
793     my_offloaded_tasks = &t;
794 }
795 #endif /* __TBB_TASK_PRIORITY */
796 
797 #if __TBB_PREVIEW_RESUMABLE_TASKS
set_post_resume_action(post_resume_action pra,void * arg)798 inline void generic_scheduler::set_post_resume_action(post_resume_action pra, void* arg) {
799     __TBB_ASSERT(my_post_resume_action == PRA_NONE, "Post resume action has already been set.");
800     __TBB_ASSERT(!my_post_resume_arg, NULL);
801 
802     my_post_resume_action = pra;
803     my_post_resume_arg = arg;
804 }
805 
prepare_resume(generic_scheduler & target)806 inline bool generic_scheduler::prepare_resume(generic_scheduler& target) {
807     // The second condition is valid for worker or cleanup operation for master
808     if (my_properties.outermost && my_wait_task == my_dummy_task) {
809         if (my_properties.genuine) {
810             // We are in someone's original scheduler.
811             target.set_post_resume_action(PRA_NOTIFY, my_current_is_recalled);
812             return true;
813         }
814         // We are in a coroutine on outermost level.
815         target.set_post_resume_action(PRA_CLEANUP, this);
816         my_target_on_exit = &target;
817         // Request to finish coroutine instead of immediate resume.
818         return false;
819     }
820     __TBB_ASSERT(my_wait_task != my_dummy_task, NULL);
821     // We are in the coroutine on a nested level.
822     my_wait_task->prefix().abandoned_scheduler = this;
823     target.set_post_resume_action(PRA_ABANDON, my_wait_task);
824     return true;
825 }
826 
resume_original_scheduler()827 inline bool generic_scheduler::resume_original_scheduler() {
828     generic_scheduler& target = *my_arena_slot->my_scheduler;
829     if (!prepare_resume(target)) {
830         // We should return and finish the current coroutine.
831         return false;
832     }
833     resume(target);
834     return true;
835 }
836 
resume(generic_scheduler & target)837 inline void generic_scheduler::resume(generic_scheduler& target) {
838     // Do not create non-trivial objects on the stack of this function. They might never be destroyed.
839     __TBB_ASSERT(governor::is_set(this), NULL);
840     __TBB_ASSERT(target.my_post_resume_action != PRA_NONE,
841         "The post resume action is not set. Has prepare_resume been called?");
842     __TBB_ASSERT(target.my_post_resume_arg, NULL);
843     __TBB_ASSERT(&target != this, NULL);
844     __TBB_ASSERT(target.my_arena == my_arena, "Cross-arena switch is forbidden.");
845 
846     // Transfer thread related data.
847     target.my_arena_index = my_arena_index;
848     target.my_arena_slot = my_arena_slot;
849 #if __TBB_SCHEDULER_OBSERVER
850     target.my_last_global_observer = my_last_global_observer;
851 #endif
852 #if __TBB_ARENA_OBSERVER
853     target.my_last_local_observer = my_last_local_observer;
854 #endif
855     target.attach_mailbox(affinity_id(target.my_arena_index + 1));
856 
857 #if __TBB_TASK_PRIORITY
858     if (my_offloaded_tasks)
859         my_arena->orphan_offloaded_tasks(*this);
860 #endif /* __TBB_TASK_PRIORITY */
861 
862     governor::assume_scheduler(&target);
863     my_co_context.resume(target.my_co_context);
864     __TBB_ASSERT(governor::is_set(this), NULL);
865 
866     do_post_resume_action();
867     if (this == my_arena_slot->my_scheduler) {
868         my_arena_slot->my_scheduler_is_recalled->store<tbb::relaxed>(false);
869     }
870 }
871 
do_post_resume_action()872 inline void generic_scheduler::do_post_resume_action() {
873     __TBB_ASSERT(my_post_resume_action != PRA_NONE, "The post resume action is not set.");
874     __TBB_ASSERT(my_post_resume_arg, NULL);
875 
876     switch (my_post_resume_action) {
877     case PRA_ABANDON:
878     {
879         task_prefix& wait_task_prefix = static_cast<task*>(my_post_resume_arg)->prefix();
880         reference_count old_ref_count = __TBB_FetchAndAddW(&wait_task_prefix.ref_count, internal::abandon_flag);
881         __TBB_ASSERT(old_ref_count > 0, NULL);
882         if (old_ref_count == 1) {
883             // Remove the abandon flag.
884             __TBB_store_with_release(wait_task_prefix.ref_count, 1);
885             // The wait has been completed. Spawn a resume task.
886             tbb::task::resume(wait_task_prefix.abandoned_scheduler);
887         }
888         break;
889     }
890     case PRA_CALLBACK:
891     {
892         callback_t callback = *static_cast<callback_t*>(my_post_resume_arg);
893         callback();
894         break;
895     }
896     case PRA_CLEANUP:
897     {
898         generic_scheduler* to_cleanup = static_cast<generic_scheduler*>(my_post_resume_arg);
899         __TBB_ASSERT(!to_cleanup->my_properties.genuine, NULL);
900         // Release coroutine's reference to my_arena.
901         to_cleanup->my_arena->on_thread_leaving<arena::ref_external>();
902         // Cache the coroutine for possible later re-usage
903         to_cleanup->my_arena->my_co_cache.push(to_cleanup);
904         break;
905     }
906     case PRA_NOTIFY:
907     {
908         tbb::atomic<bool>& scheduler_recall_flag = *static_cast<tbb::atomic<bool>*>(my_post_resume_arg);
909         scheduler_recall_flag = true;
910         // Do not access recall_flag because it can be destroyed after the notification.
911         break;
912     }
913     default:
914         __TBB_ASSERT(false, NULL);
915     }
916 
917     my_post_resume_action = PRA_NONE;
918     my_post_resume_arg = NULL;
919 }
920 
921 struct recall_functor {
922     tbb::atomic<bool>* scheduler_recall_flag;
923 
recall_functorrecall_functor924     recall_functor(tbb::atomic<bool>* recall_flag_) :
925         scheduler_recall_flag(recall_flag_) {}
926 
operatorrecall_functor927     void operator()(task::suspend_point /*tag*/) {
928         *scheduler_recall_flag = true;
929     }
930 };
931 
932 #if _WIN32
co_local_wait_for_all(void * arg)933 /* [[noreturn]] */ inline void __stdcall co_local_wait_for_all(void* arg) {
934 #else
935 /* [[noreturn]] */ inline void co_local_wait_for_all(void* arg) {
936 #endif
937     // Do not create non-trivial objects on the stack of this function. They will never be destroyed.
938     generic_scheduler& s = *static_cast<generic_scheduler*>(arg);
939     __TBB_ASSERT(governor::is_set(&s), NULL);
940     // For correct task stealing threshold, calculate stack on a coroutine start
941     s.init_stack_info();
942     // Basically calls the user callback passed to the tbb::task::suspend function
943     s.do_post_resume_action();
944     // Endless loop here because coroutine could be reused
945     for( ;; ) {
946         __TBB_ASSERT(s.my_innermost_running_task == s.my_dummy_task, NULL);
947         __TBB_ASSERT(s.worker_outermost_level(), NULL);
948         s.local_wait_for_all(*s.my_dummy_task, NULL);
949         __TBB_ASSERT(s.my_target_on_exit, NULL);
950         __TBB_ASSERT(s.my_wait_task == NULL, NULL);
951         s.resume(*s.my_target_on_exit);
952     }
953     // This code is unreachable
954 }
955 #endif /* __TBB_PREVIEW_RESUMABLE_TASKS */
956 
957 #if __TBB_TASK_GROUP_CONTEXT
958 //! Helper class for tracking floating point context and task group context switches
959 /** Assuming presence of an itt collector, in addition to keeping track of floating
960     point context, this class emits itt events to indicate begin and end of task group
961     context execution **/
962 template <bool report_tasks>
963 class context_guard_helper {
964     const task_group_context *curr_ctx;
965 #if __TBB_FP_CONTEXT
966     cpu_ctl_env guard_cpu_ctl_env;
967     cpu_ctl_env curr_cpu_ctl_env;
968 #endif
969 public:
970     context_guard_helper() : curr_ctx( NULL ) {
971 #if __TBB_FP_CONTEXT
972         guard_cpu_ctl_env.get_env();
973         curr_cpu_ctl_env = guard_cpu_ctl_env;
974 #endif
975     }
976     ~context_guard_helper() {
977 #if __TBB_FP_CONTEXT
978         if ( curr_cpu_ctl_env != guard_cpu_ctl_env )
979             guard_cpu_ctl_env.set_env();
980 #endif
981         if ( report_tasks && curr_ctx )
982             ITT_TASK_END;
983     }
984     // The function is called from bypass dispatch loop on the hot path.
985     // Consider performance issues when refactoring.
986     void set_ctx( const task_group_context *ctx ) {
987         generic_scheduler::assert_context_valid( ctx );
988 #if __TBB_FP_CONTEXT
989         const cpu_ctl_env &ctl = *punned_cast<cpu_ctl_env*>( &ctx->my_cpu_ctl_env );
990         // Compare the FPU settings directly because the context can be reused between parallel algorithms.
991         if ( ctl != curr_cpu_ctl_env ) {
992             curr_cpu_ctl_env = ctl;
993             curr_cpu_ctl_env.set_env();
994         }
995 #endif
996         if ( report_tasks && ctx != curr_ctx ) {
997             // if task group context was active, report end of current execution frame.
998             if ( curr_ctx )
999                 ITT_TASK_END;
1000             // reporting begin of new task group context execution frame.
1001             // using address of task group context object to group tasks (parent).
1002             // id of task execution frame is NULL and reserved for future use.
1003             ITT_TASK_BEGIN( ctx,ctx->my_name, NULL );
1004             curr_ctx = ctx;
1005         }
1006     }
1007     void restore_default() {
1008 #if __TBB_FP_CONTEXT
1009         if ( curr_cpu_ctl_env != guard_cpu_ctl_env ) {
1010             guard_cpu_ctl_env.set_env();
1011             curr_cpu_ctl_env = guard_cpu_ctl_env;
1012         }
1013 #endif
1014     }
1015 };
1016 #else
1017 template <bool T>
1018 struct context_guard_helper {
1019     void set_ctx() {}
1020     void restore_default() {}
1021 };
1022 #endif /* __TBB_TASK_GROUP_CONTEXT */
1023 
1024 } // namespace internal
1025 } // namespace tbb
1026 
1027 #endif /* _TBB_scheduler_H */
1028