1 /*
2 Copyright (c) 2005-2020 Intel Corporation
3
4 Licensed under the Apache License, Version 2.0 (the "License");
5 you may not use this file except in compliance with the License.
6 You may obtain a copy of the License at
7
8 http://www.apache.org/licenses/LICENSE-2.0
9
10 Unless required by applicable law or agreed to in writing, software
11 distributed under the License is distributed on an "AS IS" BASIS,
12 WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 See the License for the specific language governing permissions and
14 limitations under the License.
15 */
16
17 #ifndef _TBB_scheduler_H
18 #define _TBB_scheduler_H
19
20 #include "scheduler_common.h"
21 #include "tbb/spin_mutex.h"
22 #include "mailbox.h"
23 #include "tbb_misc.h" // for FastRandom
24 #include "itt_notify.h"
25 #include "../rml/include/rml_tbb.h"
26
27 #include "intrusive_list.h"
28
29 #if __TBB_SURVIVE_THREAD_SWITCH
30 #include "cilk-tbb-interop.h"
31 #endif /* __TBB_SURVIVE_THREAD_SWITCH */
32
33 #if __TBB_PREVIEW_RESUMABLE_TASKS
34 #include "co_context.h"
35 #endif
36
37 namespace tbb {
38 namespace internal {
39
40 template<typename SchedulerTraits> class custom_scheduler;
41
42 //------------------------------------------------------------------------
43 // generic_scheduler
44 //------------------------------------------------------------------------
45
46 #define EmptyTaskPool ((task**)0)
47 #define LockedTaskPool ((task**)~(intptr_t)0)
48
49 //! Bit-field representing properties of a sheduler
50 struct scheduler_properties {
51 static const bool worker = false;
52 static const bool master = true;
53 //! Indicates that a scheduler acts as a master or a worker.
54 bool type : 1;
55 //! Indicates that a scheduler is on outermost level.
56 /** Note that the explicit execute method will set this property. **/
57 bool outermost : 1;
58 #if __TBB_PREVIEW_CRITICAL_TASKS
59 //! Indicates that a scheduler is in the process of executing critical task(s).
60 bool has_taken_critical_task : 1;
61 #endif
62 #if __TBB_PREVIEW_RESUMABLE_TASKS
63 //! Indicates that the scheduler is bound to an original thread stack.
64 bool genuine : 1;
65 #endif
66 //! Reserved bits
67 unsigned char :
68 #if __TBB_PREVIEW_RESUMABLE_TASKS
69 4;
70 #elif __TBB_PREVIEW_CRITICAL_TASKS
71 5;
72 #else
73 6;
74 #endif
75 };
76
77 struct scheduler_state {
78 //! Index of the arena slot the scheduler occupies now, or occupied last time.
79 size_t my_arena_index; // TODO: make it unsigned and pair with my_affinity_id to fit into cache line
80
81 //! Pointer to the slot in the arena we own at the moment.
82 arena_slot* my_arena_slot;
83
84 //! The arena that I own (if master) or am servicing at the moment (if worker)
85 arena* my_arena;
86
87 //! Innermost task whose task::execute() is running. A dummy task on the outermost level.
88 task* my_innermost_running_task;
89
90 mail_inbox my_inbox;
91
92 //! The mailbox id assigned to this scheduler.
93 /** The id is assigned upon first entry into the arena.
94 TODO: how are id's being garbage collected?
95 TODO: master thread may enter arena and leave and then reenter.
96 We want to give it the same affinity_id upon reentry, if practical.
97 TODO: investigate if it makes sense to merge this field into scheduler_properties.
98 */
99 affinity_id my_affinity_id;
100
101 scheduler_properties my_properties;
102
103 #if __TBB_SCHEDULER_OBSERVER
104 //! Last observer in the global observers list processed by this scheduler
105 observer_proxy* my_last_global_observer;
106 #endif
107
108 #if __TBB_ARENA_OBSERVER
109 //! Last observer in the local observers list processed by this scheduler
110 observer_proxy* my_last_local_observer;
111 #endif
112 #if __TBB_TASK_PRIORITY
113 //! Latest known highest priority of tasks in the market or arena.
114 /** Master threads currently tracks only tasks in their arenas, while workers
115 take into account global top priority (among all arenas in the market). **/
116 volatile intptr_t *my_ref_top_priority;
117
118 //! Pointer to market's (for workers) or current arena's (for the master) reload epoch counter.
119 volatile uintptr_t *my_ref_reload_epoch;
120 #endif /* __TBB_TASK_PRIORITY */
121 #if __TBB_PREVIEW_RESUMABLE_TASKS
122 //! The currently waited task.
123 task* my_wait_task;
124
125 //! The currently recalled stack.
126 tbb::atomic<bool>* my_current_is_recalled;
127 #endif
128 };
129
130 //! Work stealing task scheduler.
131 /** None of the fields here are ever read or written by threads other than
132 the thread that creates the instance.
133
134 Class generic_scheduler is an abstract base class that contains most of the scheduler,
135 except for tweaks specific to processors and tools (e.g. VTune(TM) Performance Tools).
136 The derived template class custom_scheduler<SchedulerTraits> fills in the tweaks. */
137 class generic_scheduler: public scheduler
138 , public ::rml::job
139 , public intrusive_list_node
140 , public scheduler_state {
141 public: // almost every class in TBB uses generic_scheduler
142
143 //! If sizeof(task) is <=quick_task_size, it is handled on a free list instead of malloc'd.
144 static const size_t quick_task_size = 256-task_prefix_reservation_size;
145
is_version_3_task(task & t)146 static bool is_version_3_task( task& t ) {
147 #if __TBB_PREVIEW_CRITICAL_TASKS
148 return (t.prefix().extra_state & 0x7)>=0x1;
149 #else
150 return (t.prefix().extra_state & 0x0F)>=0x1;
151 #endif
152 }
153
154 //! Position in the call stack specifying its maximal filling when stealing is still allowed
155 uintptr_t my_stealing_threshold;
156 #if __TBB_ipf
157 //! Position in the RSE backup area specifying its maximal filling when stealing is still allowed
158 uintptr_t my_rsb_stealing_threshold;
159 #endif
160
161 static const size_t null_arena_index = ~size_t(0);
162
163 inline bool is_task_pool_published () const;
164
165 inline bool is_local_task_pool_quiescent () const;
166
167 inline bool is_quiescent_local_task_pool_empty () const;
168
169 inline bool is_quiescent_local_task_pool_reset () const;
170
171 //! The market I am in
172 market* my_market;
173
174 //! Random number generator used for picking a random victim from which to steal.
175 FastRandom my_random;
176
177 //! Free list of small tasks that can be reused.
178 task* my_free_list;
179
180 #if __TBB_HOARD_NONLOCAL_TASKS
181 //! Free list of small non-local tasks that should be returned or can be reused.
182 task* my_nonlocal_free_list;
183 #endif
184 //! Fake root task created by slave threads.
185 /** The task is used as the "parent" argument to method wait_for_all. */
186 task* my_dummy_task;
187
188 //! Reference count for scheduler
189 /** Number of task_scheduler_init objects that point to this scheduler */
190 long my_ref_count;
191
192 inline void attach_mailbox( affinity_id id );
193
194 /* A couple of bools can be located here because space is otherwise just padding after my_affinity_id. */
195
196 //! True if *this was created by automatic TBB initialization
197 bool my_auto_initialized;
198
199 #if __TBB_COUNT_TASK_NODES
200 //! Net number of big task objects that have been allocated but not yet freed.
201 intptr_t my_task_node_count;
202 #endif /* __TBB_COUNT_TASK_NODES */
203
204 #if __TBB_PREVIEW_RESUMABLE_TASKS
205 //! The list of possible post resume actions.
206 enum post_resume_action {
207 PRA_INVALID,
208 PRA_ABANDON,
209 PRA_CALLBACK,
210 PRA_CLEANUP,
211 PRA_NOTIFY,
212 PRA_NONE
213 };
214
215 //! The suspend callback function type.
216 typedef void(*suspend_callback_t)(void*, task::suspend_point);
217
218 //! The callback to call the user callback passed to tbb::suspend.
219 struct callback_t {
220 suspend_callback_t suspend_callback;
221 void* user_callback;
222 task::suspend_point tag;
223
operatorcallback_t224 void operator()() {
225 if (suspend_callback) {
226 __TBB_ASSERT(suspend_callback && user_callback && tag, NULL);
227 suspend_callback(user_callback, tag);
228 }
229 }
230 };
231
232 //! The coroutine context associated with the current scheduler.
233 co_context my_co_context;
234
235 //! The post resume action requested for the current scheduler.
236 post_resume_action my_post_resume_action;
237
238 //! The post resume action argument.
239 void* my_post_resume_arg;
240
241 //! The scheduler to resume on exit.
242 generic_scheduler* my_target_on_exit;
243
244 //! Set post resume action to perform after resume.
245 void set_post_resume_action(post_resume_action, void* arg);
246
247 //! Performs post resume action.
248 void do_post_resume_action();
249
250 //! Decides how to switch and sets post resume action.
251 /** Returns false if the caller should finish the coroutine and then resume the target scheduler.
252 Returns true if the caller should resume the target scheduler immediately. **/
253 bool prepare_resume(generic_scheduler& target);
254
255 //! Resumes the original scheduler of the calling thread.
256 /** Returns false if the current stack should be left to perform the resume.
257 Returns true if the current stack is resumed. **/
258 bool resume_original_scheduler();
259
260 //! Resumes the target scheduler. The prepare_resume must be called for the target scheduler in advance.
261 void resume(generic_scheduler& target);
262
263 friend void recall_function(task::suspend_point tag);
264 #endif /* __TBB_PREVIEW_RESUMABLE_TASKS */
265
266 //! Sets up the data necessary for the stealing limiting heuristics
267 void init_stack_info ();
268
269 //! Returns true if stealing is allowed
can_steal()270 bool can_steal () {
271 int anchor;
272 // TODO IDEA: Add performance warning?
273 #if __TBB_ipf
274 return my_stealing_threshold < (uintptr_t)&anchor && (uintptr_t)__TBB_get_bsp() < my_rsb_stealing_threshold;
275 #else
276 return my_stealing_threshold < (uintptr_t)&anchor;
277 #endif
278 }
279
280 //! Used by workers to enter the task pool
281 /** Does not lock the task pool in case if arena slot has been successfully grabbed. **/
282 void publish_task_pool();
283
284 //! Leave the task pool
285 /** Leaving task pool automatically releases the task pool if it is locked. **/
286 void leave_task_pool();
287
288 //! Resets head and tail indices to 0, and leaves task pool
289 /** The task pool must be locked by the owner (via acquire_task_pool).**/
290 inline void reset_task_pool_and_leave ();
291
292 //! Locks victim's task pool, and returns pointer to it. The pointer can be NULL.
293 /** Garbles victim_arena_slot->task_pool for the duration of the lock. **/
294 task** lock_task_pool( arena_slot* victim_arena_slot ) const;
295
296 //! Unlocks victim's task pool
297 /** Restores victim_arena_slot->task_pool munged by lock_task_pool. **/
298 void unlock_task_pool( arena_slot* victim_arena_slot, task** victim_task_pool ) const;
299
300 //! Locks the local task pool
301 /** Garbles my_arena_slot->task_pool for the duration of the lock. Requires
302 correctly set my_arena_slot->task_pool_ptr. **/
303 void acquire_task_pool() const;
304
305 //! Unlocks the local task pool
306 /** Restores my_arena_slot->task_pool munged by acquire_task_pool. Requires
307 correctly set my_arena_slot->task_pool_ptr. **/
308 void release_task_pool() const;
309
310 //! Checks if t is affinitized to another thread, and if so, bundles it as proxy.
311 /** Returns either t or proxy containing t. **/
312 task* prepare_for_spawning( task* t );
313
314 //! Makes newly spawned tasks visible to thieves
315 inline void commit_spawned_tasks( size_t new_tail );
316
317 //! Makes relocated tasks visible to thieves and releases the local task pool.
318 /** Obviously, the task pool must be locked when calling this method. **/
319 inline void commit_relocated_tasks( size_t new_tail );
320
321 //! Get a task from the local pool.
322 /** Called only by the pool owner.
323 Returns the pointer to the task or NULL if a suitable task is not found.
324 Resets the pool if it is empty. **/
325 task* get_task( __TBB_ISOLATION_EXPR( isolation_tag isolation ) );
326
327 //! Get a task from the local pool at specified location T.
328 /** Returns the pointer to the task or NULL if the task cannot be executed,
329 e.g. proxy has been deallocated or isolation constraint is not met.
330 tasks_omitted tells if some tasks have been omitted.
331 Called only by the pool owner. The caller should guarantee that the
332 position T is not available for a thief. **/
333 #if __TBB_TASK_ISOLATION
334 task* get_task( size_t T, isolation_tag isolation, bool& tasks_omitted );
335 #else
336 task* get_task( size_t T );
337 #endif /* __TBB_TASK_ISOLATION */
338 //! Attempt to get a task from the mailbox.
339 /** Gets a task only if it has not been executed by its sender or a thief
340 that has stolen it from the sender's task pool. Otherwise returns NULL.
341
342 This method is intended to be used only by the thread extracting the proxy
343 from its mailbox. (In contrast to local task pool, mailbox can be read only
344 by its owner). **/
345 task* get_mailbox_task( __TBB_ISOLATION_EXPR( isolation_tag isolation ) );
346
347 //! True if t is a task_proxy
is_proxy(const task & t)348 static bool is_proxy( const task& t ) {
349 return t.prefix().extra_state==es_task_proxy;
350 }
351
352 //! Attempts to steal a task from a randomly chosen thread/scheduler
353 task* steal_task( __TBB_ISOLATION_EXPR(isolation_tag isolation) );
354
355 //! Steal task from another scheduler's ready pool.
356 task* steal_task_from( __TBB_ISOLATION_ARG( arena_slot& victim_arena_slot, isolation_tag isolation ) );
357
358 #if __TBB_PREVIEW_CRITICAL_TASKS
359 //! Tries to find critical task in critical task stream
360 task* get_critical_task( __TBB_ISOLATION_EXPR(isolation_tag isolation) );
361
362 //! Pushes task to critical task stream if it appears to be such task and returns
363 //! true. Otherwise does nothing and returns false.
364 bool handled_as_critical( task& t );
365 #endif
366
367 /** Initial size of the task deque sufficient to serve without reallocation
368 4 nested parallel_for calls with iteration space of 65535 grains each. **/
369 static const size_t min_task_pool_size = 64;
370
371 //! Makes sure that the task pool can accommodate at least n more elements
372 /** If necessary relocates existing task pointers or grows the ready task deque.
373 Returns (possible updated) tail index (not accounting for n). **/
374 size_t prepare_task_pool( size_t n );
375
376 //! Initialize a scheduler for a master thread.
377 static generic_scheduler* create_master( arena* a );
378
379 //! Perform necessary cleanup when a master thread stops using TBB.
380 bool cleanup_master( bool blocking_terminate );
381
382 //! Initialize a scheduler for a worker thread.
383 static generic_scheduler* create_worker( market& m, size_t index, bool geniune );
384
385 //! Perform necessary cleanup when a worker thread finishes.
386 static void cleanup_worker( void* arg, bool worker );
387
388 protected:
389 template<typename SchedulerTraits> friend class custom_scheduler;
390 generic_scheduler( market &, bool );
391
392 public:
393 #if TBB_USE_ASSERT > 1
394 //! Check that internal data structures are in consistent state.
395 /** Raises __TBB_ASSERT failure if inconsistency is found. */
396 void assert_task_pool_valid() const;
397 #else
398 void assert_task_pool_valid() const {}
399 #endif /* TBB_USE_ASSERT <= 1 */
400
401 void attach_arena( arena*, size_t index, bool is_master );
402 void nested_arena_entry( arena*, size_t );
403 void nested_arena_exit();
404 void wait_until_empty();
405
406 void spawn( task& first, task*& next ) __TBB_override;
407
408 void spawn_root_and_wait( task& first, task*& next ) __TBB_override;
409
410 void enqueue( task&, void* reserved ) __TBB_override;
411
412 void local_spawn( task* first, task*& next );
413 void local_spawn_root_and_wait( task* first, task*& next );
414 virtual void local_wait_for_all( task& parent, task* child ) = 0;
415
416 //! Destroy and deallocate this scheduler object.
417 void destroy();
418
419 //! Cleans up this scheduler (the scheduler might be destroyed).
420 void cleanup_scheduler();
421
422 //! Allocate task object, either from the heap or a free list.
423 /** Returns uninitialized task object with initialized prefix. */
424 task& allocate_task( size_t number_of_bytes,
425 __TBB_CONTEXT_ARG(task* parent, task_group_context* context) );
426
427 //! Put task on free list.
428 /** Does not call destructor. */
429 template<free_task_hint h>
430 void free_task( task& t );
431
432 //! Return task object to the memory allocator.
433 inline void deallocate_task( task& t );
434
435 //! True if running on a worker thread, false otherwise.
436 inline bool is_worker() const;
437
438 //! True if the scheduler is on the outermost dispatch level.
439 inline bool outermost_level() const;
440
441 //! True if the scheduler is on the outermost dispatch level in a master thread.
442 /** Returns true when this scheduler instance is associated with an application
443 thread, and is not executing any TBB task. This includes being in a TBB
444 dispatch loop (one of wait_for_all methods) invoked directly from that thread. **/
445 inline bool master_outermost_level () const;
446
447 //! True if the scheduler is on the outermost dispatch level in a worker thread.
448 inline bool worker_outermost_level () const;
449
450 //! Returns the concurrency limit of the current arena.
451 unsigned max_threads_in_arena();
452
453 #if __TBB_COUNT_TASK_NODES
454 intptr_t get_task_node_count( bool count_arena_workers = false );
455 #endif /* __TBB_COUNT_TASK_NODES */
456
457 //! Special value used to mark my_return_list as not taking any more entries.
plugged_return_list()458 static task* plugged_return_list() {return (task*)(intptr_t)(-1);}
459
460 //! Number of small tasks that have been allocated by this scheduler.
461 __TBB_atomic intptr_t my_small_task_count;
462
463 //! List of small tasks that have been returned to this scheduler by other schedulers.
464 // TODO IDEA: see if putting my_return_list on separate cache line improves performance
465 task* my_return_list;
466
467 //! Try getting a task from other threads (via mailbox, stealing, FIFO queue, orphans adoption).
468 /** Returns obtained task or NULL if all attempts fail. */
469 virtual task* receive_or_steal_task( __TBB_ISOLATION_ARG( __TBB_atomic reference_count& completion_ref_count, isolation_tag isolation ) ) = 0;
470
471 //! Free a small task t that that was allocated by a different scheduler
472 void free_nonlocal_small_task( task& t );
473
474 #if __TBB_TASK_GROUP_CONTEXT
475 //! Returns task group context used by this scheduler instance.
476 /** This context is associated with root tasks created by a master thread
477 without explicitly specified context object outside of any running task.
478
479 Note that the default context of a worker thread is never accessed by
480 user code (directly or indirectly). **/
481 inline task_group_context* default_context ();
482
483 //! Padding isolating thread-local members from members that can be written to by other threads.
484 char _padding1[NFS_MaxLineSize - sizeof(context_list_node_t)];
485
486 //! Head of the thread specific list of task group contexts.
487 context_list_node_t my_context_list_head;
488
489 //! Mutex protecting access to the list of task group contexts.
490 // TODO: check whether it can be deadly preempted and replace by spinning/sleeping mutex
491 spin_mutex my_context_list_mutex;
492
493 //! Last state propagation epoch known to this thread
494 /** Together with the_context_state_propagation_epoch constitute synchronization protocol
495 that keeps hot path of task group context construction destruction mostly
496 lock-free.
497 When local epoch equals the global one, the state of task group contexts
498 registered with this thread is consistent with that of the task group trees
499 they belong to. **/
500 uintptr_t my_context_state_propagation_epoch;
501
502 //! Flag indicating that a context is being destructed by its owner thread
503 /** Together with my_nonlocal_ctx_list_update constitute synchronization protocol
504 that keeps hot path of context destruction (by the owner thread) mostly
505 lock-free. **/
506 tbb::atomic<uintptr_t> my_local_ctx_list_update;
507
508 #if __TBB_TASK_PRIORITY
509 //! Returns reference priority used to decide whether a task should be offloaded.
510 inline intptr_t effective_reference_priority () const;
511
512 // TODO: move into slots and fix is_out_of_work
513 //! Task pool for offloading tasks with priorities lower than the current top priority.
514 task* my_offloaded_tasks;
515
516 //! Points to the last offloaded task in the my_offloaded_tasks list.
517 task** my_offloaded_task_list_tail_link;
518
519 //! Indicator of how recently the offload area was checked for the presence of top priority tasks.
520 uintptr_t my_local_reload_epoch;
521
522 //! Indicates that the pool is likely non-empty even if appears so from outside
523 volatile bool my_pool_reshuffling_pending;
524
525 //! Searches offload area for top priority tasks and reloads found ones into primary task pool.
526 /** Returns one of the found tasks or NULL. **/
527 task* reload_tasks( __TBB_ISOLATION_EXPR( isolation_tag isolation ) );
528
529 task* reload_tasks( task*& offloaded_tasks, task**& offloaded_task_list_link, __TBB_ISOLATION_ARG( intptr_t top_priority, isolation_tag isolation ) );
530
531 //! Moves tasks with priority below the top one from primary task pool into offload area.
532 /** Returns the next execution candidate task or NULL. **/
533 task* winnow_task_pool ( __TBB_ISOLATION_EXPR( isolation_tag isolation ) );
534
535 //! Get a task from locked or empty pool in range [H0, T0). Releases or unlocks the task pool.
536 /** Returns the found task or NULL. **/
537 task *get_task_and_activate_task_pool( size_t H0 , __TBB_ISOLATION_ARG( size_t T0, isolation_tag isolation ) );
538
539 //! Unconditionally moves the task into offload area.
540 inline void offload_task ( task& t, intptr_t task_priority );
541 #endif /* __TBB_TASK_PRIORITY */
542
543 //! Detaches abandoned contexts
544 /** These contexts must be destroyed by other threads. **/
545 void cleanup_local_context_list ();
546
547 //! Finds all contexts registered by this scheduler affected by the state change
548 //! and propagates the new state to them.
549 template <typename T>
550 void propagate_task_group_state ( T task_group_context::*mptr_state, task_group_context& src, T new_state );
551
552 // check consistency
assert_context_valid(const task_group_context * tgc)553 static void assert_context_valid(const task_group_context *tgc) {
554 suppress_unused_warning(tgc);
555 #if TBB_USE_ASSERT
556 __TBB_ASSERT(tgc, NULL);
557 uintptr_t ctx = tgc->my_version_and_traits;
558 __TBB_ASSERT(is_alive(ctx), "referenced task_group_context was destroyed");
559 static const char *msg = "task_group_context is invalid";
560 __TBB_ASSERT(!(ctx&~(3|(7<<task_group_context::traits_offset))), msg); // the value fits known values of versions and traits
561 __TBB_ASSERT(tgc->my_kind < task_group_context::dying, msg);
562 __TBB_ASSERT(tgc->my_cancellation_requested == 0 || tgc->my_cancellation_requested == 1, msg);
563 __TBB_ASSERT(tgc->my_state < task_group_context::low_unused_state_bit, msg);
564 if(tgc->my_kind != task_group_context::isolated) {
565 __TBB_ASSERT(tgc->my_owner, msg);
566 __TBB_ASSERT(tgc->my_node.my_next && tgc->my_node.my_prev, msg);
567 }
568 #if __TBB_TASK_PRIORITY
569 assert_priority_valid(tgc->my_priority);
570 #endif
571 if(tgc->my_parent)
572 #if TBB_USE_ASSERT > 1
573 assert_context_valid(tgc->my_parent);
574 #else
575 __TBB_ASSERT(is_alive(tgc->my_parent->my_version_and_traits), msg);
576 #endif
577 #endif
578 }
579 #endif /* __TBB_TASK_GROUP_CONTEXT */
580
581 #if _WIN32||_WIN64
582 private:
583 //! Handle returned by RML when registering a master with RML
584 ::rml::server::execution_resource_t master_exec_resource;
585 public:
586 #endif /* _WIN32||_WIN64 */
587
588 #if __TBB_TASK_GROUP_CONTEXT
589 //! Flag indicating that a context is being destructed by non-owner thread.
590 /** See also my_local_ctx_list_update. **/
591 tbb::atomic<uintptr_t> my_nonlocal_ctx_list_update;
592 #endif /* __TBB_TASK_GROUP_CONTEXT */
593
594 #if __TBB_SURVIVE_THREAD_SWITCH
595 __cilk_tbb_unwatch_thunk my_cilk_unwatch_thunk;
596 #if TBB_USE_ASSERT
597 //! State values used to check interface contract with cilkrts.
598 /** Names of cs_running...cs_freed derived from state machine diagram in cilk-tbb-interop.h */
599 enum cilk_state_t {
600 cs_none=0xF000, // Start at nonzero value so that we can detect use of zeroed memory.
601 cs_running,
602 cs_limbo,
603 cs_freed
604 };
605 cilk_state_t my_cilk_state;
606 #endif /* TBB_USE_ASSERT */
607 #endif /* __TBB_SURVIVE_THREAD_SWITCH */
608
609 #if __TBB_STATISTICS
610 //! Set of counters to track internal statistics on per thread basis
611 /** Placed at the end of the class definition to minimize the disturbance of
612 the core logic memory operations. **/
613 mutable statistics_counters my_counters;
614 #endif /* __TBB_STATISTICS */
615
616 }; // class generic_scheduler
617
618
619 } // namespace internal
620 } // namespace tbb
621
622 #include "arena.h"
623 #include "governor.h"
624
625 namespace tbb {
626 namespace internal {
627
is_task_pool_published()628 inline bool generic_scheduler::is_task_pool_published () const {
629 __TBB_ASSERT(my_arena_slot, 0);
630 return my_arena_slot->task_pool != EmptyTaskPool;
631 }
632
is_local_task_pool_quiescent()633 inline bool generic_scheduler::is_local_task_pool_quiescent () const {
634 __TBB_ASSERT(my_arena_slot, 0);
635 task** tp = my_arena_slot->task_pool;
636 return tp == EmptyTaskPool || tp == LockedTaskPool;
637 }
638
is_quiescent_local_task_pool_empty()639 inline bool generic_scheduler::is_quiescent_local_task_pool_empty () const {
640 __TBB_ASSERT( is_local_task_pool_quiescent(), "Task pool is not quiescent" );
641 return __TBB_load_relaxed(my_arena_slot->head) == __TBB_load_relaxed(my_arena_slot->tail);
642 }
643
is_quiescent_local_task_pool_reset()644 inline bool generic_scheduler::is_quiescent_local_task_pool_reset () const {
645 __TBB_ASSERT( is_local_task_pool_quiescent(), "Task pool is not quiescent" );
646 return __TBB_load_relaxed(my_arena_slot->head) == 0 && __TBB_load_relaxed(my_arena_slot->tail) == 0;
647 }
648
outermost_level()649 inline bool generic_scheduler::outermost_level () const {
650 return my_properties.outermost;
651 }
652
master_outermost_level()653 inline bool generic_scheduler::master_outermost_level () const {
654 return !is_worker() && outermost_level();
655 }
656
worker_outermost_level()657 inline bool generic_scheduler::worker_outermost_level () const {
658 return is_worker() && outermost_level();
659 }
660
661 #if __TBB_TASK_GROUP_CONTEXT
default_context()662 inline task_group_context* generic_scheduler::default_context () {
663 return my_dummy_task->prefix().context;
664 }
665 #endif /* __TBB_TASK_GROUP_CONTEXT */
666
attach_mailbox(affinity_id id)667 inline void generic_scheduler::attach_mailbox( affinity_id id ) {
668 __TBB_ASSERT(id>0,NULL);
669 my_inbox.attach( my_arena->mailbox(id) );
670 my_affinity_id = id;
671 }
672
is_worker()673 inline bool generic_scheduler::is_worker() const {
674 return my_properties.type == scheduler_properties::worker;
675 }
676
max_threads_in_arena()677 inline unsigned generic_scheduler::max_threads_in_arena() {
678 __TBB_ASSERT(my_arena, NULL);
679 return my_arena->my_num_slots;
680 }
681
682 //! Return task object to the memory allocator.
deallocate_task(task & t)683 inline void generic_scheduler::deallocate_task( task& t ) {
684 #if TBB_USE_ASSERT
685 task_prefix& p = t.prefix();
686 p.state = 0xFF;
687 p.extra_state = 0xFF;
688 poison_pointer(p.next);
689 #endif /* TBB_USE_ASSERT */
690 NFS_Free((char*)&t-task_prefix_reservation_size);
691 #if __TBB_COUNT_TASK_NODES
692 --my_task_node_count;
693 #endif /* __TBB_COUNT_TASK_NODES */
694 }
695
696 #if __TBB_COUNT_TASK_NODES
get_task_node_count(bool count_arena_workers)697 inline intptr_t generic_scheduler::get_task_node_count( bool count_arena_workers ) {
698 return my_task_node_count + (count_arena_workers? my_arena->workers_task_node_count(): 0);
699 }
700 #endif /* __TBB_COUNT_TASK_NODES */
701
reset_task_pool_and_leave()702 inline void generic_scheduler::reset_task_pool_and_leave () {
703 __TBB_ASSERT( my_arena_slot->task_pool == LockedTaskPool, "Task pool must be locked when resetting task pool" );
704 __TBB_store_relaxed( my_arena_slot->tail, 0 );
705 __TBB_store_relaxed( my_arena_slot->head, 0 );
706 leave_task_pool();
707 }
708
709 //TODO: move to arena_slot
commit_spawned_tasks(size_t new_tail)710 inline void generic_scheduler::commit_spawned_tasks( size_t new_tail ) {
711 __TBB_ASSERT ( new_tail <= my_arena_slot->my_task_pool_size, "task deque end was overwritten" );
712 // emit "task was released" signal
713 ITT_NOTIFY(sync_releasing, (void*)((uintptr_t)my_arena_slot+sizeof(uintptr_t)));
714 // Release fence is necessary to make sure that previously stored task pointers
715 // are visible to thieves.
716 __TBB_store_with_release( my_arena_slot->tail, new_tail );
717 }
718
commit_relocated_tasks(size_t new_tail)719 void generic_scheduler::commit_relocated_tasks ( size_t new_tail ) {
720 __TBB_ASSERT( is_local_task_pool_quiescent(),
721 "Task pool must be locked when calling commit_relocated_tasks()" );
722 __TBB_store_relaxed( my_arena_slot->head, 0 );
723 // Tail is updated last to minimize probability of a thread making arena
724 // snapshot being misguided into thinking that this task pool is empty.
725 __TBB_store_release( my_arena_slot->tail, new_tail );
726 release_task_pool();
727 }
728
729 template<free_task_hint hint>
free_task(task & t)730 void generic_scheduler::free_task( task& t ) {
731 #if __TBB_HOARD_NONLOCAL_TASKS
732 static const int h = hint&(~local_task);
733 #else
734 static const free_task_hint h = hint;
735 #endif
736 GATHER_STATISTIC(--my_counters.active_tasks);
737 task_prefix& p = t.prefix();
738 // Verify that optimization hints are correct.
739 __TBB_ASSERT( h!=small_local_task || p.origin==this, NULL );
740 __TBB_ASSERT( !(h&small_task) || p.origin, NULL );
741 __TBB_ASSERT( !(h&local_task) || (!p.origin || uintptr_t(p.origin) > uintptr_t(4096)), "local_task means allocated");
742 poison_value(p.depth);
743 poison_value(p.ref_count);
744 poison_pointer(p.owner);
745 #if __TBB_PREVIEW_RESUMABLE_TASKS
746 __TBB_ASSERT(1L << t.state() & (1L << task::executing | 1L << task::allocated | 1 << task::to_resume), NULL);
747 #else
748 __TBB_ASSERT(1L << t.state() & (1L << task::executing | 1L << task::allocated), NULL);
749 #endif
750 p.state = task::freed;
751 if( h==small_local_task || p.origin==this ) {
752 GATHER_STATISTIC(++my_counters.free_list_length);
753 p.next = my_free_list;
754 my_free_list = &t;
755 } else if( !(h&local_task) && p.origin && uintptr_t(p.origin) < uintptr_t(4096) ) {
756 // a special value reserved for future use, do nothing since
757 // origin is not pointing to a scheduler instance
758 } else if( !(h&local_task) && p.origin ) {
759 GATHER_STATISTIC(++my_counters.free_list_length);
760 #if __TBB_HOARD_NONLOCAL_TASKS
761 if( !(h&no_cache) ) {
762 p.next = my_nonlocal_free_list;
763 my_nonlocal_free_list = &t;
764 } else
765 #endif
766 free_nonlocal_small_task(t);
767 } else {
768 GATHER_STATISTIC(--my_counters.big_tasks);
769 deallocate_task(t);
770 }
771 }
772
773 #if __TBB_TASK_PRIORITY
effective_reference_priority()774 inline intptr_t generic_scheduler::effective_reference_priority () const {
775 // Workers on the outermost dispatch level (i.e. with empty stack) use market's
776 // priority as a reference point (to speedup discovering process level priority
777 // changes). But when there are enough workers to service (even if only partially)
778 // a lower priority arena, they should use arena's priority as a reference, lest
779 // be trapped in a futile spinning (because market's priority would prohibit
780 // executing ANY tasks in this arena).
781 return !worker_outermost_level() ||
782 my_arena->my_num_workers_allotted < my_arena->num_workers_active() ? *my_ref_top_priority : my_arena->my_top_priority;
783 }
784
offload_task(task & t,intptr_t)785 inline void generic_scheduler::offload_task ( task& t, intptr_t /*priority*/ ) {
786 GATHER_STATISTIC( ++my_counters.prio_tasks_offloaded );
787 __TBB_ASSERT( !is_proxy(t), "The proxy task cannot be offloaded" );
788 __TBB_ASSERT( my_offloaded_task_list_tail_link && !*my_offloaded_task_list_tail_link, NULL );
789 #if TBB_USE_ASSERT
790 t.prefix().state = task::ready;
791 #endif /* TBB_USE_ASSERT */
792 t.prefix().next_offloaded = my_offloaded_tasks;
793 my_offloaded_tasks = &t;
794 }
795 #endif /* __TBB_TASK_PRIORITY */
796
797 #if __TBB_PREVIEW_RESUMABLE_TASKS
set_post_resume_action(post_resume_action pra,void * arg)798 inline void generic_scheduler::set_post_resume_action(post_resume_action pra, void* arg) {
799 __TBB_ASSERT(my_post_resume_action == PRA_NONE, "Post resume action has already been set.");
800 __TBB_ASSERT(!my_post_resume_arg, NULL);
801
802 my_post_resume_action = pra;
803 my_post_resume_arg = arg;
804 }
805
prepare_resume(generic_scheduler & target)806 inline bool generic_scheduler::prepare_resume(generic_scheduler& target) {
807 // The second condition is valid for worker or cleanup operation for master
808 if (my_properties.outermost && my_wait_task == my_dummy_task) {
809 if (my_properties.genuine) {
810 // We are in someone's original scheduler.
811 target.set_post_resume_action(PRA_NOTIFY, my_current_is_recalled);
812 return true;
813 }
814 // We are in a coroutine on outermost level.
815 target.set_post_resume_action(PRA_CLEANUP, this);
816 my_target_on_exit = ⌖
817 // Request to finish coroutine instead of immediate resume.
818 return false;
819 }
820 __TBB_ASSERT(my_wait_task != my_dummy_task, NULL);
821 // We are in the coroutine on a nested level.
822 my_wait_task->prefix().abandoned_scheduler = this;
823 target.set_post_resume_action(PRA_ABANDON, my_wait_task);
824 return true;
825 }
826
resume_original_scheduler()827 inline bool generic_scheduler::resume_original_scheduler() {
828 generic_scheduler& target = *my_arena_slot->my_scheduler;
829 if (!prepare_resume(target)) {
830 // We should return and finish the current coroutine.
831 return false;
832 }
833 resume(target);
834 return true;
835 }
836
resume(generic_scheduler & target)837 inline void generic_scheduler::resume(generic_scheduler& target) {
838 // Do not create non-trivial objects on the stack of this function. They might never be destroyed.
839 __TBB_ASSERT(governor::is_set(this), NULL);
840 __TBB_ASSERT(target.my_post_resume_action != PRA_NONE,
841 "The post resume action is not set. Has prepare_resume been called?");
842 __TBB_ASSERT(target.my_post_resume_arg, NULL);
843 __TBB_ASSERT(&target != this, NULL);
844 __TBB_ASSERT(target.my_arena == my_arena, "Cross-arena switch is forbidden.");
845
846 // Transfer thread related data.
847 target.my_arena_index = my_arena_index;
848 target.my_arena_slot = my_arena_slot;
849 #if __TBB_SCHEDULER_OBSERVER
850 target.my_last_global_observer = my_last_global_observer;
851 #endif
852 #if __TBB_ARENA_OBSERVER
853 target.my_last_local_observer = my_last_local_observer;
854 #endif
855 target.attach_mailbox(affinity_id(target.my_arena_index + 1));
856
857 #if __TBB_TASK_PRIORITY
858 if (my_offloaded_tasks)
859 my_arena->orphan_offloaded_tasks(*this);
860 #endif /* __TBB_TASK_PRIORITY */
861
862 governor::assume_scheduler(&target);
863 my_co_context.resume(target.my_co_context);
864 __TBB_ASSERT(governor::is_set(this), NULL);
865
866 do_post_resume_action();
867 if (this == my_arena_slot->my_scheduler) {
868 my_arena_slot->my_scheduler_is_recalled->store<tbb::relaxed>(false);
869 }
870 }
871
do_post_resume_action()872 inline void generic_scheduler::do_post_resume_action() {
873 __TBB_ASSERT(my_post_resume_action != PRA_NONE, "The post resume action is not set.");
874 __TBB_ASSERT(my_post_resume_arg, NULL);
875
876 switch (my_post_resume_action) {
877 case PRA_ABANDON:
878 {
879 task_prefix& wait_task_prefix = static_cast<task*>(my_post_resume_arg)->prefix();
880 reference_count old_ref_count = __TBB_FetchAndAddW(&wait_task_prefix.ref_count, internal::abandon_flag);
881 __TBB_ASSERT(old_ref_count > 0, NULL);
882 if (old_ref_count == 1) {
883 // Remove the abandon flag.
884 __TBB_store_with_release(wait_task_prefix.ref_count, 1);
885 // The wait has been completed. Spawn a resume task.
886 tbb::task::resume(wait_task_prefix.abandoned_scheduler);
887 }
888 break;
889 }
890 case PRA_CALLBACK:
891 {
892 callback_t callback = *static_cast<callback_t*>(my_post_resume_arg);
893 callback();
894 break;
895 }
896 case PRA_CLEANUP:
897 {
898 generic_scheduler* to_cleanup = static_cast<generic_scheduler*>(my_post_resume_arg);
899 __TBB_ASSERT(!to_cleanup->my_properties.genuine, NULL);
900 // Release coroutine's reference to my_arena.
901 to_cleanup->my_arena->on_thread_leaving<arena::ref_external>();
902 // Cache the coroutine for possible later re-usage
903 to_cleanup->my_arena->my_co_cache.push(to_cleanup);
904 break;
905 }
906 case PRA_NOTIFY:
907 {
908 tbb::atomic<bool>& scheduler_recall_flag = *static_cast<tbb::atomic<bool>*>(my_post_resume_arg);
909 scheduler_recall_flag = true;
910 // Do not access recall_flag because it can be destroyed after the notification.
911 break;
912 }
913 default:
914 __TBB_ASSERT(false, NULL);
915 }
916
917 my_post_resume_action = PRA_NONE;
918 my_post_resume_arg = NULL;
919 }
920
921 struct recall_functor {
922 tbb::atomic<bool>* scheduler_recall_flag;
923
recall_functorrecall_functor924 recall_functor(tbb::atomic<bool>* recall_flag_) :
925 scheduler_recall_flag(recall_flag_) {}
926
operatorrecall_functor927 void operator()(task::suspend_point /*tag*/) {
928 *scheduler_recall_flag = true;
929 }
930 };
931
932 #if _WIN32
co_local_wait_for_all(void * arg)933 /* [[noreturn]] */ inline void __stdcall co_local_wait_for_all(void* arg) {
934 #else
935 /* [[noreturn]] */ inline void co_local_wait_for_all(void* arg) {
936 #endif
937 // Do not create non-trivial objects on the stack of this function. They will never be destroyed.
938 generic_scheduler& s = *static_cast<generic_scheduler*>(arg);
939 __TBB_ASSERT(governor::is_set(&s), NULL);
940 // For correct task stealing threshold, calculate stack on a coroutine start
941 s.init_stack_info();
942 // Basically calls the user callback passed to the tbb::task::suspend function
943 s.do_post_resume_action();
944 // Endless loop here because coroutine could be reused
945 for( ;; ) {
946 __TBB_ASSERT(s.my_innermost_running_task == s.my_dummy_task, NULL);
947 __TBB_ASSERT(s.worker_outermost_level(), NULL);
948 s.local_wait_for_all(*s.my_dummy_task, NULL);
949 __TBB_ASSERT(s.my_target_on_exit, NULL);
950 __TBB_ASSERT(s.my_wait_task == NULL, NULL);
951 s.resume(*s.my_target_on_exit);
952 }
953 // This code is unreachable
954 }
955 #endif /* __TBB_PREVIEW_RESUMABLE_TASKS */
956
957 #if __TBB_TASK_GROUP_CONTEXT
958 //! Helper class for tracking floating point context and task group context switches
959 /** Assuming presence of an itt collector, in addition to keeping track of floating
960 point context, this class emits itt events to indicate begin and end of task group
961 context execution **/
962 template <bool report_tasks>
963 class context_guard_helper {
964 const task_group_context *curr_ctx;
965 #if __TBB_FP_CONTEXT
966 cpu_ctl_env guard_cpu_ctl_env;
967 cpu_ctl_env curr_cpu_ctl_env;
968 #endif
969 public:
970 context_guard_helper() : curr_ctx( NULL ) {
971 #if __TBB_FP_CONTEXT
972 guard_cpu_ctl_env.get_env();
973 curr_cpu_ctl_env = guard_cpu_ctl_env;
974 #endif
975 }
976 ~context_guard_helper() {
977 #if __TBB_FP_CONTEXT
978 if ( curr_cpu_ctl_env != guard_cpu_ctl_env )
979 guard_cpu_ctl_env.set_env();
980 #endif
981 if ( report_tasks && curr_ctx )
982 ITT_TASK_END;
983 }
984 // The function is called from bypass dispatch loop on the hot path.
985 // Consider performance issues when refactoring.
986 void set_ctx( const task_group_context *ctx ) {
987 generic_scheduler::assert_context_valid( ctx );
988 #if __TBB_FP_CONTEXT
989 const cpu_ctl_env &ctl = *punned_cast<cpu_ctl_env*>( &ctx->my_cpu_ctl_env );
990 // Compare the FPU settings directly because the context can be reused between parallel algorithms.
991 if ( ctl != curr_cpu_ctl_env ) {
992 curr_cpu_ctl_env = ctl;
993 curr_cpu_ctl_env.set_env();
994 }
995 #endif
996 if ( report_tasks && ctx != curr_ctx ) {
997 // if task group context was active, report end of current execution frame.
998 if ( curr_ctx )
999 ITT_TASK_END;
1000 // reporting begin of new task group context execution frame.
1001 // using address of task group context object to group tasks (parent).
1002 // id of task execution frame is NULL and reserved for future use.
1003 ITT_TASK_BEGIN( ctx,ctx->my_name, NULL );
1004 curr_ctx = ctx;
1005 }
1006 }
1007 void restore_default() {
1008 #if __TBB_FP_CONTEXT
1009 if ( curr_cpu_ctl_env != guard_cpu_ctl_env ) {
1010 guard_cpu_ctl_env.set_env();
1011 curr_cpu_ctl_env = guard_cpu_ctl_env;
1012 }
1013 #endif
1014 }
1015 };
1016 #else
1017 template <bool T>
1018 struct context_guard_helper {
1019 void set_ctx() {}
1020 void restore_default() {}
1021 };
1022 #endif /* __TBB_TASK_GROUP_CONTEXT */
1023
1024 } // namespace internal
1025 } // namespace tbb
1026
1027 #endif /* _TBB_scheduler_H */
1028