1 /*
2 * kmp_wait_release.h -- Wait/Release implementation
3 */
4
5 //===----------------------------------------------------------------------===//
6 //
7 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
8 // See https://llvm.org/LICENSE.txt for license information.
9 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
10 //
11 //===----------------------------------------------------------------------===//
12
13 #ifndef KMP_WAIT_RELEASE_H
14 #define KMP_WAIT_RELEASE_H
15
16 #include "kmp.h"
17 #include "kmp_itt.h"
18 #include "kmp_stats.h"
19 #if OMPT_SUPPORT
20 #include "ompt-specific.h"
21 #endif
22
23 /*!
24 @defgroup WAIT_RELEASE Wait/Release operations
25
26 The definitions and functions here implement the lowest level thread
27 synchronizations of suspending a thread and awaking it. They are used to build
28 higher level operations such as barriers and fork/join.
29 */
30
31 /*!
32 @ingroup WAIT_RELEASE
33 @{
34 */
35
36 /*!
37 * The flag_type describes the storage used for the flag.
38 */
39 enum flag_type {
40 flag32, /**< 32 bit flags */
41 flag64, /**< 64 bit flags */
42 flag_oncore /**< special 64-bit flag for on-core barrier (hierarchical) */
43 };
44
45 /*!
46 * Base class for wait/release volatile flag
47 */
48 template <typename P> class kmp_flag_native {
49 volatile P *loc;
50 flag_type t;
51
52 public:
53 typedef P flag_t;
kmp_flag_native(volatile P * p,flag_type ft)54 kmp_flag_native(volatile P *p, flag_type ft) : loc(p), t(ft) {}
get()55 volatile P *get() { return loc; }
get_void_p()56 void *get_void_p() { return RCAST(void *, CCAST(P *, loc)); }
set(volatile P * new_loc)57 void set(volatile P *new_loc) { loc = new_loc; }
get_type()58 flag_type get_type() { return t; }
load()59 P load() { return *loc; }
store(P val)60 void store(P val) { *loc = val; }
61 };
62
63 /*!
64 * Base class for wait/release atomic flag
65 */
66 template <typename P> class kmp_flag {
67 std::atomic<P>
68 *loc; /**< Pointer to the flag storage that is modified by another thread
69 */
70 flag_type t; /**< "Type" of the flag in loc */
71 public:
72 typedef P flag_t;
kmp_flag(std::atomic<P> * p,flag_type ft)73 kmp_flag(std::atomic<P> *p, flag_type ft) : loc(p), t(ft) {}
74 /*!
75 * @result the pointer to the actual flag
76 */
get()77 std::atomic<P> *get() { return loc; }
78 /*!
79 * @result void* pointer to the actual flag
80 */
get_void_p()81 void *get_void_p() { return RCAST(void *, loc); }
82 /*!
83 * @param new_loc in set loc to point at new_loc
84 */
set(std::atomic<P> * new_loc)85 void set(std::atomic<P> *new_loc) { loc = new_loc; }
86 /*!
87 * @result the flag_type
88 */
get_type()89 flag_type get_type() { return t; }
90 /*!
91 * @result flag value
92 */
load()93 P load() { return loc->load(std::memory_order_acquire); }
94 /*!
95 * @param val the new flag value to be stored
96 */
store(P val)97 void store(P val) { loc->store(val, std::memory_order_release); }
98 // Derived classes must provide the following:
99 /*
100 kmp_info_t * get_waiter(kmp_uint32 i);
101 kmp_uint32 get_num_waiters();
102 bool done_check();
103 bool done_check_val(P old_loc);
104 bool notdone_check();
105 P internal_release();
106 void suspend(int th_gtid);
107 void resume(int th_gtid);
108 P set_sleeping();
109 P unset_sleeping();
110 bool is_sleeping();
111 bool is_any_sleeping();
112 bool is_sleeping_val(P old_loc);
113 int execute_tasks(kmp_info_t *this_thr, kmp_int32 gtid, int final_spin,
114 int *thread_finished
115 USE_ITT_BUILD_ARG(void * itt_sync_obj), kmp_int32
116 is_constrained);
117 */
118 };
119
120 #if OMPT_SUPPORT
121 OMPT_NOINLINE
__ompt_implicit_task_end(kmp_info_t * this_thr,ompt_state_t ompt_state,ompt_data_t * tId)122 static void __ompt_implicit_task_end(kmp_info_t *this_thr,
123 ompt_state_t ompt_state,
124 ompt_data_t *tId) {
125 int ds_tid = this_thr->th.th_info.ds.ds_tid;
126 if (ompt_state == ompt_state_wait_barrier_implicit) {
127 this_thr->th.ompt_thread_info.state = ompt_state_overhead;
128 #if OMPT_OPTIONAL
129 void *codeptr = NULL;
130 if (ompt_enabled.ompt_callback_sync_region_wait) {
131 ompt_callbacks.ompt_callback(ompt_callback_sync_region_wait)(
132 ompt_sync_region_barrier_implicit, ompt_scope_end, NULL, tId,
133 codeptr);
134 }
135 if (ompt_enabled.ompt_callback_sync_region) {
136 ompt_callbacks.ompt_callback(ompt_callback_sync_region)(
137 ompt_sync_region_barrier_implicit, ompt_scope_end, NULL, tId,
138 codeptr);
139 }
140 #endif
141 if (!KMP_MASTER_TID(ds_tid)) {
142 if (ompt_enabled.ompt_callback_implicit_task) {
143 int flags = this_thr->th.ompt_thread_info.parallel_flags;
144 flags = (flags & ompt_parallel_league) ? ompt_task_initial
145 : ompt_task_implicit;
146 ompt_callbacks.ompt_callback(ompt_callback_implicit_task)(
147 ompt_scope_end, NULL, tId, 0, ds_tid, flags);
148 }
149 // return to idle state
150 this_thr->th.ompt_thread_info.state = ompt_state_idle;
151 } else {
152 this_thr->th.ompt_thread_info.state = ompt_state_overhead;
153 }
154 }
155 }
156 #endif
157
158 /* Spin wait loop that first does pause/yield, then sleep. A thread that calls
159 __kmp_wait_* must make certain that another thread calls __kmp_release
160 to wake it back up to prevent deadlocks!
161
162 NOTE: We may not belong to a team at this point. */
163 template <class C, int final_spin, bool cancellable = false,
164 bool sleepable = true>
165 static inline bool
__kmp_wait_template(kmp_info_t * this_thr,C * flag USE_ITT_BUILD_ARG (void * itt_sync_obj))166 __kmp_wait_template(kmp_info_t *this_thr,
167 C *flag USE_ITT_BUILD_ARG(void *itt_sync_obj)) {
168 #if USE_ITT_BUILD && USE_ITT_NOTIFY
169 volatile void *spin = flag->get();
170 #endif
171 kmp_uint32 spins;
172 int th_gtid;
173 int tasks_completed = FALSE;
174 int oversubscribed;
175 #if !KMP_USE_MONITOR
176 kmp_uint64 poll_count;
177 kmp_uint64 hibernate_goal;
178 #else
179 kmp_uint32 hibernate;
180 #endif
181
182 KMP_FSYNC_SPIN_INIT(spin, NULL);
183 if (flag->done_check()) {
184 KMP_FSYNC_SPIN_ACQUIRED(CCAST(void *, spin));
185 return false;
186 }
187 th_gtid = this_thr->th.th_info.ds.ds_gtid;
188 if (cancellable) {
189 kmp_team_t *team = this_thr->th.th_team;
190 if (team && team->t.t_cancel_request == cancel_parallel)
191 return true;
192 }
193 #if KMP_OS_UNIX
194 if (final_spin)
195 KMP_ATOMIC_ST_REL(&this_thr->th.th_blocking, true);
196 #endif
197 KA_TRACE(20,
198 ("__kmp_wait_sleep: T#%d waiting for flag(%p)\n", th_gtid, flag));
199 #if KMP_STATS_ENABLED
200 stats_state_e thread_state = KMP_GET_THREAD_STATE();
201 #endif
202
203 /* OMPT Behavior:
204 THIS function is called from
205 __kmp_barrier (2 times) (implicit or explicit barrier in parallel regions)
206 these have join / fork behavior
207
208 In these cases, we don't change the state or trigger events in THIS
209 function.
210 Events are triggered in the calling code (__kmp_barrier):
211
212 state := ompt_state_overhead
213 barrier-begin
214 barrier-wait-begin
215 state := ompt_state_wait_barrier
216 call join-barrier-implementation (finally arrive here)
217 {}
218 call fork-barrier-implementation (finally arrive here)
219 {}
220 state := ompt_state_overhead
221 barrier-wait-end
222 barrier-end
223 state := ompt_state_work_parallel
224
225
226 __kmp_fork_barrier (after thread creation, before executing implicit task)
227 call fork-barrier-implementation (finally arrive here)
228 {} // worker arrive here with state = ompt_state_idle
229
230
231 __kmp_join_barrier (implicit barrier at end of parallel region)
232 state := ompt_state_barrier_implicit
233 barrier-begin
234 barrier-wait-begin
235 call join-barrier-implementation (finally arrive here
236 final_spin=FALSE)
237 {
238 }
239 __kmp_fork_barrier (implicit barrier at end of parallel region)
240 call fork-barrier-implementation (finally arrive here final_spin=TRUE)
241
242 Worker after task-team is finished:
243 barrier-wait-end
244 barrier-end
245 implicit-task-end
246 idle-begin
247 state := ompt_state_idle
248
249 Before leaving, if state = ompt_state_idle
250 idle-end
251 state := ompt_state_overhead
252 */
253 #if OMPT_SUPPORT
254 ompt_state_t ompt_entry_state;
255 ompt_data_t *tId;
256 if (ompt_enabled.enabled) {
257 ompt_entry_state = this_thr->th.ompt_thread_info.state;
258 if (!final_spin || ompt_entry_state != ompt_state_wait_barrier_implicit ||
259 KMP_MASTER_TID(this_thr->th.th_info.ds.ds_tid)) {
260 ompt_lw_taskteam_t *team =
261 this_thr->th.th_team->t.ompt_serialized_team_info;
262 if (team) {
263 tId = &(team->ompt_task_info.task_data);
264 } else {
265 tId = OMPT_CUR_TASK_DATA(this_thr);
266 }
267 } else {
268 tId = &(this_thr->th.ompt_thread_info.task_data);
269 }
270 if (final_spin && (__kmp_tasking_mode == tskm_immediate_exec ||
271 this_thr->th.th_task_team == NULL)) {
272 // implicit task is done. Either no taskqueue, or task-team finished
273 __ompt_implicit_task_end(this_thr, ompt_entry_state, tId);
274 }
275 }
276 #endif
277
278 KMP_INIT_YIELD(spins); // Setup for waiting
279
280 if (__kmp_dflt_blocktime != KMP_MAX_BLOCKTIME ||
281 __kmp_pause_status == kmp_soft_paused) {
282 #if KMP_USE_MONITOR
283 // The worker threads cannot rely on the team struct existing at this point.
284 // Use the bt values cached in the thread struct instead.
285 #ifdef KMP_ADJUST_BLOCKTIME
286 if (__kmp_pause_status == kmp_soft_paused ||
287 (__kmp_zero_bt && !this_thr->th.th_team_bt_set))
288 // Force immediate suspend if not set by user and more threads than
289 // available procs
290 hibernate = 0;
291 else
292 hibernate = this_thr->th.th_team_bt_intervals;
293 #else
294 hibernate = this_thr->th.th_team_bt_intervals;
295 #endif /* KMP_ADJUST_BLOCKTIME */
296
297 /* If the blocktime is nonzero, we want to make sure that we spin wait for
298 the entirety of the specified #intervals, plus up to one interval more.
299 This increment make certain that this thread doesn't go to sleep too
300 soon. */
301 if (hibernate != 0)
302 hibernate++;
303
304 // Add in the current time value.
305 hibernate += TCR_4(__kmp_global.g.g_time.dt.t_value);
306 KF_TRACE(20, ("__kmp_wait_sleep: T#%d now=%d, hibernate=%d, intervals=%d\n",
307 th_gtid, __kmp_global.g.g_time.dt.t_value, hibernate,
308 hibernate - __kmp_global.g.g_time.dt.t_value));
309 #else
310 if (__kmp_pause_status == kmp_soft_paused) {
311 // Force immediate suspend
312 hibernate_goal = KMP_NOW();
313 } else
314 hibernate_goal = KMP_NOW() + this_thr->th.th_team_bt_intervals;
315 poll_count = 0;
316 #endif // KMP_USE_MONITOR
317 }
318
319 oversubscribed = (TCR_4(__kmp_nth) > __kmp_avail_proc);
320 KMP_MB();
321
322 // Main wait spin loop
323 while (flag->notdone_check()) {
324 kmp_task_team_t *task_team = NULL;
325 if (__kmp_tasking_mode != tskm_immediate_exec) {
326 task_team = this_thr->th.th_task_team;
327 /* If the thread's task team pointer is NULL, it means one of 3 things:
328 1) A newly-created thread is first being released by
329 __kmp_fork_barrier(), and its task team has not been set up yet.
330 2) All tasks have been executed to completion.
331 3) Tasking is off for this region. This could be because we are in a
332 serialized region (perhaps the outer one), or else tasking was manually
333 disabled (KMP_TASKING=0). */
334 if (task_team != NULL) {
335 if (TCR_SYNC_4(task_team->tt.tt_active)) {
336 if (KMP_TASKING_ENABLED(task_team))
337 flag->execute_tasks(
338 this_thr, th_gtid, final_spin,
339 &tasks_completed USE_ITT_BUILD_ARG(itt_sync_obj), 0);
340 else
341 this_thr->th.th_reap_state = KMP_SAFE_TO_REAP;
342 } else {
343 KMP_DEBUG_ASSERT(!KMP_MASTER_TID(this_thr->th.th_info.ds.ds_tid));
344 #if OMPT_SUPPORT
345 // task-team is done now, other cases should be catched above
346 if (final_spin && ompt_enabled.enabled)
347 __ompt_implicit_task_end(this_thr, ompt_entry_state, tId);
348 #endif
349 this_thr->th.th_task_team = NULL;
350 this_thr->th.th_reap_state = KMP_SAFE_TO_REAP;
351 }
352 } else {
353 this_thr->th.th_reap_state = KMP_SAFE_TO_REAP;
354 } // if
355 } // if
356
357 KMP_FSYNC_SPIN_PREPARE(CCAST(void *, spin));
358 if (TCR_4(__kmp_global.g.g_done)) {
359 if (__kmp_global.g.g_abort)
360 __kmp_abort_thread();
361 break;
362 }
363
364 // If we are oversubscribed, or have waited a bit (and
365 // KMP_LIBRARY=throughput), then yield
366 KMP_YIELD_OVERSUB_ELSE_SPIN(spins);
367
368 #if KMP_STATS_ENABLED
369 // Check if thread has been signalled to idle state
370 // This indicates that the logical "join-barrier" has finished
371 if (this_thr->th.th_stats->isIdle() &&
372 KMP_GET_THREAD_STATE() == FORK_JOIN_BARRIER) {
373 KMP_SET_THREAD_STATE(IDLE);
374 KMP_PUSH_PARTITIONED_TIMER(OMP_idle);
375 }
376 #endif
377 // Check if the barrier surrounding this wait loop has been cancelled
378 if (cancellable) {
379 kmp_team_t *team = this_thr->th.th_team;
380 if (team && team->t.t_cancel_request == cancel_parallel)
381 break;
382 }
383
384 // Don't suspend if KMP_BLOCKTIME is set to "infinite"
385 if (__kmp_dflt_blocktime == KMP_MAX_BLOCKTIME &&
386 __kmp_pause_status != kmp_soft_paused)
387 continue;
388
389 // Don't suspend if there is a likelihood of new tasks being spawned.
390 if ((task_team != NULL) && TCR_4(task_team->tt.tt_found_tasks))
391 continue;
392
393 #if KMP_USE_MONITOR
394 // If we have waited a bit more, fall asleep
395 if (TCR_4(__kmp_global.g.g_time.dt.t_value) < hibernate)
396 continue;
397 #else
398 if (KMP_BLOCKING(hibernate_goal, poll_count++))
399 continue;
400 #endif
401 // Don't suspend if wait loop designated non-sleepable
402 // in template parameters
403 if (!sleepable)
404 continue;
405
406 if (__kmp_dflt_blocktime == KMP_MAX_BLOCKTIME &&
407 __kmp_pause_status != kmp_soft_paused)
408 continue;
409
410 KF_TRACE(50, ("__kmp_wait_sleep: T#%d suspend time reached\n", th_gtid));
411
412 #if KMP_OS_UNIX
413 if (final_spin)
414 KMP_ATOMIC_ST_REL(&this_thr->th.th_blocking, false);
415 #endif
416 flag->suspend(th_gtid);
417 #if KMP_OS_UNIX
418 if (final_spin)
419 KMP_ATOMIC_ST_REL(&this_thr->th.th_blocking, true);
420 #endif
421
422 if (TCR_4(__kmp_global.g.g_done)) {
423 if (__kmp_global.g.g_abort)
424 __kmp_abort_thread();
425 break;
426 } else if (__kmp_tasking_mode != tskm_immediate_exec &&
427 this_thr->th.th_reap_state == KMP_SAFE_TO_REAP) {
428 this_thr->th.th_reap_state = KMP_NOT_SAFE_TO_REAP;
429 }
430 // TODO: If thread is done with work and times out, disband/free
431 }
432
433 #if OMPT_SUPPORT
434 ompt_state_t ompt_exit_state = this_thr->th.ompt_thread_info.state;
435 if (ompt_enabled.enabled && ompt_exit_state != ompt_state_undefined) {
436 #if OMPT_OPTIONAL
437 if (final_spin) {
438 __ompt_implicit_task_end(this_thr, ompt_exit_state, tId);
439 ompt_exit_state = this_thr->th.ompt_thread_info.state;
440 }
441 #endif
442 if (ompt_exit_state == ompt_state_idle) {
443 this_thr->th.ompt_thread_info.state = ompt_state_overhead;
444 }
445 }
446 #endif
447 #if KMP_STATS_ENABLED
448 // If we were put into idle state, pop that off the state stack
449 if (KMP_GET_THREAD_STATE() == IDLE) {
450 KMP_POP_PARTITIONED_TIMER();
451 KMP_SET_THREAD_STATE(thread_state);
452 this_thr->th.th_stats->resetIdleFlag();
453 }
454 #endif
455
456 #if KMP_OS_UNIX
457 if (final_spin)
458 KMP_ATOMIC_ST_REL(&this_thr->th.th_blocking, false);
459 #endif
460 KMP_FSYNC_SPIN_ACQUIRED(CCAST(void *, spin));
461 if (cancellable) {
462 kmp_team_t *team = this_thr->th.th_team;
463 if (team && team->t.t_cancel_request == cancel_parallel) {
464 if (tasks_completed) {
465 // undo the previous decrement of unfinished_threads so that the
466 // thread can decrement at the join barrier with no problem
467 kmp_task_team_t *task_team = this_thr->th.th_task_team;
468 std::atomic<kmp_int32> *unfinished_threads =
469 &(task_team->tt.tt_unfinished_threads);
470 KMP_ATOMIC_INC(unfinished_threads);
471 }
472 return true;
473 }
474 }
475 return false;
476 }
477
478 /* Release any threads specified as waiting on the flag by releasing the flag
479 and resume the waiting thread if indicated by the sleep bit(s). A thread that
480 calls __kmp_wait_template must call this function to wake up the potentially
481 sleeping thread and prevent deadlocks! */
__kmp_release_template(C * flag)482 template <class C> static inline void __kmp_release_template(C *flag) {
483 #ifdef KMP_DEBUG
484 int gtid = TCR_4(__kmp_init_gtid) ? __kmp_get_gtid() : -1;
485 #endif
486 KF_TRACE(20, ("__kmp_release: T#%d releasing flag(%x)\n", gtid, flag->get()));
487 KMP_DEBUG_ASSERT(flag->get());
488 KMP_FSYNC_RELEASING(flag->get_void_p());
489
490 flag->internal_release();
491
492 KF_TRACE(100, ("__kmp_release: T#%d set new spin=%d\n", gtid, flag->get(),
493 flag->load()));
494
495 if (__kmp_dflt_blocktime != KMP_MAX_BLOCKTIME) {
496 // Only need to check sleep stuff if infinite block time not set.
497 // Are *any* threads waiting on flag sleeping?
498 if (flag->is_any_sleeping()) {
499 for (unsigned int i = 0; i < flag->get_num_waiters(); ++i) {
500 // if sleeping waiter exists at i, sets current_waiter to i inside flag
501 kmp_info_t *waiter = flag->get_waiter(i);
502 if (waiter) {
503 int wait_gtid = waiter->th.th_info.ds.ds_gtid;
504 // Wake up thread if needed
505 KF_TRACE(50, ("__kmp_release: T#%d waking up thread T#%d since sleep "
506 "flag(%p) set\n",
507 gtid, wait_gtid, flag->get()));
508 flag->resume(wait_gtid); // unsets flag's current_waiter when done
509 }
510 }
511 }
512 }
513 }
514
515 template <typename FlagType> struct flag_traits {};
516
517 template <> struct flag_traits<kmp_uint32> {
518 typedef kmp_uint32 flag_t;
519 static const flag_type t = flag32;
520 static inline flag_t tcr(flag_t f) { return TCR_4(f); }
521 static inline flag_t test_then_add4(volatile flag_t *f) {
522 return KMP_TEST_THEN_ADD4_32(RCAST(volatile kmp_int32 *, f));
523 }
524 static inline flag_t test_then_or(volatile flag_t *f, flag_t v) {
525 return KMP_TEST_THEN_OR32(f, v);
526 }
527 static inline flag_t test_then_and(volatile flag_t *f, flag_t v) {
528 return KMP_TEST_THEN_AND32(f, v);
529 }
530 };
531
532 template <> struct flag_traits<kmp_uint64> {
533 typedef kmp_uint64 flag_t;
534 static const flag_type t = flag64;
535 static inline flag_t tcr(flag_t f) { return TCR_8(f); }
536 static inline flag_t test_then_add4(volatile flag_t *f) {
537 return KMP_TEST_THEN_ADD4_64(RCAST(volatile kmp_int64 *, f));
538 }
539 static inline flag_t test_then_or(volatile flag_t *f, flag_t v) {
540 return KMP_TEST_THEN_OR64(f, v);
541 }
542 static inline flag_t test_then_and(volatile flag_t *f, flag_t v) {
543 return KMP_TEST_THEN_AND64(f, v);
544 }
545 };
546
547 // Basic flag that does not use C11 Atomics
548 template <typename FlagType>
549 class kmp_basic_flag_native : public kmp_flag_native<FlagType> {
550 typedef flag_traits<FlagType> traits_type;
551 FlagType checker; /**< Value to compare flag to to check if flag has been
552 released. */
553 kmp_info_t
554 *waiting_threads[1]; /**< Array of threads sleeping on this thread. */
555 kmp_uint32
556 num_waiting_threads; /**< Number of threads sleeping on this thread. */
557 public:
558 kmp_basic_flag_native(volatile FlagType *p)
559 : kmp_flag_native<FlagType>(p, traits_type::t), num_waiting_threads(0) {}
560 kmp_basic_flag_native(volatile FlagType *p, kmp_info_t *thr)
561 : kmp_flag_native<FlagType>(p, traits_type::t), num_waiting_threads(1) {
562 waiting_threads[0] = thr;
563 }
564 kmp_basic_flag_native(volatile FlagType *p, FlagType c)
565 : kmp_flag_native<FlagType>(p, traits_type::t), checker(c),
566 num_waiting_threads(0) {}
567 /*!
568 * param i in index into waiting_threads
569 * @result the thread that is waiting at index i
570 */
571 kmp_info_t *get_waiter(kmp_uint32 i) {
572 KMP_DEBUG_ASSERT(i < num_waiting_threads);
573 return waiting_threads[i];
574 }
575 /*!
576 * @result num_waiting_threads
577 */
578 kmp_uint32 get_num_waiters() { return num_waiting_threads; }
579 /*!
580 * @param thr in the thread which is now waiting
581 *
582 * Insert a waiting thread at index 0.
583 */
584 void set_waiter(kmp_info_t *thr) {
585 waiting_threads[0] = thr;
586 num_waiting_threads = 1;
587 }
588 /*!
589 * @result true if the flag object has been released.
590 */
591 bool done_check() { return traits_type::tcr(*(this->get())) == checker; }
592 /*!
593 * @param old_loc in old value of flag
594 * @result true if the flag's old value indicates it was released.
595 */
596 bool done_check_val(FlagType old_loc) { return old_loc == checker; }
597 /*!
598 * @result true if the flag object is not yet released.
599 * Used in __kmp_wait_template like:
600 * @code
601 * while (flag.notdone_check()) { pause(); }
602 * @endcode
603 */
604 bool notdone_check() { return traits_type::tcr(*(this->get())) != checker; }
605 /*!
606 * @result Actual flag value before release was applied.
607 * Trigger all waiting threads to run by modifying flag to release state.
608 */
609 void internal_release() {
610 (void)traits_type::test_then_add4((volatile FlagType *)this->get());
611 }
612 /*!
613 * @result Actual flag value before sleep bit(s) set.
614 * Notes that there is at least one thread sleeping on the flag by setting
615 * sleep bit(s).
616 */
617 FlagType set_sleeping() {
618 return traits_type::test_then_or((volatile FlagType *)this->get(),
619 KMP_BARRIER_SLEEP_STATE);
620 }
621 /*!
622 * @result Actual flag value before sleep bit(s) cleared.
623 * Notes that there are no longer threads sleeping on the flag by clearing
624 * sleep bit(s).
625 */
626 FlagType unset_sleeping() {
627 return traits_type::test_then_and((volatile FlagType *)this->get(),
628 ~KMP_BARRIER_SLEEP_STATE);
629 }
630 /*!
631 * @param old_loc in old value of flag
632 * Test whether there are threads sleeping on the flag's old value in old_loc.
633 */
634 bool is_sleeping_val(FlagType old_loc) {
635 return old_loc & KMP_BARRIER_SLEEP_STATE;
636 }
637 /*!
638 * Test whether there are threads sleeping on the flag.
639 */
640 bool is_sleeping() { return is_sleeping_val(*(this->get())); }
641 bool is_any_sleeping() { return is_sleeping_val(*(this->get())); }
642 kmp_uint8 *get_stolen() { return NULL; }
643 enum barrier_type get_bt() { return bs_last_barrier; }
644 };
645
646 template <typename FlagType> class kmp_basic_flag : public kmp_flag<FlagType> {
647 typedef flag_traits<FlagType> traits_type;
648 FlagType checker; /**< Value to compare flag to to check if flag has been
649 released. */
650 kmp_info_t
651 *waiting_threads[1]; /**< Array of threads sleeping on this thread. */
652 kmp_uint32
653 num_waiting_threads; /**< Number of threads sleeping on this thread. */
654 public:
655 kmp_basic_flag(std::atomic<FlagType> *p)
656 : kmp_flag<FlagType>(p, traits_type::t), num_waiting_threads(0) {}
657 kmp_basic_flag(std::atomic<FlagType> *p, kmp_info_t *thr)
658 : kmp_flag<FlagType>(p, traits_type::t), num_waiting_threads(1) {
659 waiting_threads[0] = thr;
660 }
661 kmp_basic_flag(std::atomic<FlagType> *p, FlagType c)
662 : kmp_flag<FlagType>(p, traits_type::t), checker(c),
663 num_waiting_threads(0) {}
664 /*!
665 * param i in index into waiting_threads
666 * @result the thread that is waiting at index i
667 */
668 kmp_info_t *get_waiter(kmp_uint32 i) {
669 KMP_DEBUG_ASSERT(i < num_waiting_threads);
670 return waiting_threads[i];
671 }
672 /*!
673 * @result num_waiting_threads
674 */
675 kmp_uint32 get_num_waiters() { return num_waiting_threads; }
676 /*!
677 * @param thr in the thread which is now waiting
678 *
679 * Insert a waiting thread at index 0.
680 */
681 void set_waiter(kmp_info_t *thr) {
682 waiting_threads[0] = thr;
683 num_waiting_threads = 1;
684 }
685 /*!
686 * @result true if the flag object has been released.
687 */
688 bool done_check() { return this->load() == checker; }
689 /*!
690 * @param old_loc in old value of flag
691 * @result true if the flag's old value indicates it was released.
692 */
693 bool done_check_val(FlagType old_loc) { return old_loc == checker; }
694 /*!
695 * @result true if the flag object is not yet released.
696 * Used in __kmp_wait_template like:
697 * @code
698 * while (flag.notdone_check()) { pause(); }
699 * @endcode
700 */
701 bool notdone_check() { return this->load() != checker; }
702 /*!
703 * @result Actual flag value before release was applied.
704 * Trigger all waiting threads to run by modifying flag to release state.
705 */
706 void internal_release() { KMP_ATOMIC_ADD(this->get(), 4); }
707 /*!
708 * @result Actual flag value before sleep bit(s) set.
709 * Notes that there is at least one thread sleeping on the flag by setting
710 * sleep bit(s).
711 */
712 FlagType set_sleeping() {
713 return KMP_ATOMIC_OR(this->get(), KMP_BARRIER_SLEEP_STATE);
714 }
715 /*!
716 * @result Actual flag value before sleep bit(s) cleared.
717 * Notes that there are no longer threads sleeping on the flag by clearing
718 * sleep bit(s).
719 */
720 FlagType unset_sleeping() {
721 return KMP_ATOMIC_AND(this->get(), ~KMP_BARRIER_SLEEP_STATE);
722 }
723 /*!
724 * @param old_loc in old value of flag
725 * Test whether there are threads sleeping on the flag's old value in old_loc.
726 */
727 bool is_sleeping_val(FlagType old_loc) {
728 return old_loc & KMP_BARRIER_SLEEP_STATE;
729 }
730 /*!
731 * Test whether there are threads sleeping on the flag.
732 */
733 bool is_sleeping() { return is_sleeping_val(this->load()); }
734 bool is_any_sleeping() { return is_sleeping_val(this->load()); }
735 kmp_uint8 *get_stolen() { return NULL; }
736 enum barrier_type get_bt() { return bs_last_barrier; }
737 };
738
739 class kmp_flag_32 : public kmp_basic_flag<kmp_uint32> {
740 public:
741 kmp_flag_32(std::atomic<kmp_uint32> *p) : kmp_basic_flag<kmp_uint32>(p) {}
742 kmp_flag_32(std::atomic<kmp_uint32> *p, kmp_info_t *thr)
743 : kmp_basic_flag<kmp_uint32>(p, thr) {}
744 kmp_flag_32(std::atomic<kmp_uint32> *p, kmp_uint32 c)
745 : kmp_basic_flag<kmp_uint32>(p, c) {}
746 void suspend(int th_gtid) { __kmp_suspend_32(th_gtid, this); }
747 void resume(int th_gtid) { __kmp_resume_32(th_gtid, this); }
748 int execute_tasks(kmp_info_t *this_thr, kmp_int32 gtid, int final_spin,
749 int *thread_finished USE_ITT_BUILD_ARG(void *itt_sync_obj),
750 kmp_int32 is_constrained) {
751 return __kmp_execute_tasks_32(
752 this_thr, gtid, this, final_spin,
753 thread_finished USE_ITT_BUILD_ARG(itt_sync_obj), is_constrained);
754 }
755 void wait(kmp_info_t *this_thr,
756 int final_spin USE_ITT_BUILD_ARG(void *itt_sync_obj)) {
757 if (final_spin)
758 __kmp_wait_template<kmp_flag_32, TRUE>(
759 this_thr, this USE_ITT_BUILD_ARG(itt_sync_obj));
760 else
761 __kmp_wait_template<kmp_flag_32, FALSE>(
762 this_thr, this USE_ITT_BUILD_ARG(itt_sync_obj));
763 }
764 void release() { __kmp_release_template(this); }
765 flag_type get_ptr_type() { return flag32; }
766 };
767
768 class kmp_flag_64 : public kmp_basic_flag_native<kmp_uint64> {
769 public:
770 kmp_flag_64(volatile kmp_uint64 *p) : kmp_basic_flag_native<kmp_uint64>(p) {}
771 kmp_flag_64(volatile kmp_uint64 *p, kmp_info_t *thr)
772 : kmp_basic_flag_native<kmp_uint64>(p, thr) {}
773 kmp_flag_64(volatile kmp_uint64 *p, kmp_uint64 c)
774 : kmp_basic_flag_native<kmp_uint64>(p, c) {}
775 void suspend(int th_gtid) { __kmp_suspend_64(th_gtid, this); }
776 void resume(int th_gtid) { __kmp_resume_64(th_gtid, this); }
777 int execute_tasks(kmp_info_t *this_thr, kmp_int32 gtid, int final_spin,
778 int *thread_finished USE_ITT_BUILD_ARG(void *itt_sync_obj),
779 kmp_int32 is_constrained) {
780 return __kmp_execute_tasks_64(
781 this_thr, gtid, this, final_spin,
782 thread_finished USE_ITT_BUILD_ARG(itt_sync_obj), is_constrained);
783 }
784 void wait(kmp_info_t *this_thr,
785 int final_spin USE_ITT_BUILD_ARG(void *itt_sync_obj)) {
786 if (final_spin)
787 __kmp_wait_template<kmp_flag_64, TRUE>(
788 this_thr, this USE_ITT_BUILD_ARG(itt_sync_obj));
789 else
790 __kmp_wait_template<kmp_flag_64, FALSE>(
791 this_thr, this USE_ITT_BUILD_ARG(itt_sync_obj));
792 }
793 bool wait_cancellable_nosleep(kmp_info_t *this_thr,
794 int final_spin
795 USE_ITT_BUILD_ARG(void *itt_sync_obj)) {
796 bool retval = false;
797 if (final_spin)
798 retval = __kmp_wait_template<kmp_flag_64, TRUE, true, false>(
799 this_thr, this USE_ITT_BUILD_ARG(itt_sync_obj));
800 else
801 retval = __kmp_wait_template<kmp_flag_64, FALSE, true, false>(
802 this_thr, this USE_ITT_BUILD_ARG(itt_sync_obj));
803 return retval;
804 }
805 void release() { __kmp_release_template(this); }
806 flag_type get_ptr_type() { return flag64; }
807 };
808
809 // Hierarchical 64-bit on-core barrier instantiation
810 class kmp_flag_oncore : public kmp_flag_native<kmp_uint64> {
811 kmp_uint64 checker;
812 kmp_info_t *waiting_threads[1];
813 kmp_uint32 num_waiting_threads;
814 kmp_uint32
815 offset; /**< Portion of flag that is of interest for an operation. */
816 bool flag_switch; /**< Indicates a switch in flag location. */
817 enum barrier_type bt; /**< Barrier type. */
818 kmp_info_t *this_thr; /**< Thread that may be redirected to different flag
819 location. */
820 #if USE_ITT_BUILD
821 void *
822 itt_sync_obj; /**< ITT object that must be passed to new flag location. */
823 #endif
824 unsigned char &byteref(volatile kmp_uint64 *loc, size_t offset) {
825 return (RCAST(unsigned char *, CCAST(kmp_uint64 *, loc)))[offset];
826 }
827
828 public:
829 kmp_flag_oncore(volatile kmp_uint64 *p)
830 : kmp_flag_native<kmp_uint64>(p, flag_oncore), num_waiting_threads(0),
831 flag_switch(false) {}
832 kmp_flag_oncore(volatile kmp_uint64 *p, kmp_uint32 idx)
833 : kmp_flag_native<kmp_uint64>(p, flag_oncore), num_waiting_threads(0),
834 offset(idx), flag_switch(false) {}
835 kmp_flag_oncore(volatile kmp_uint64 *p, kmp_uint64 c, kmp_uint32 idx,
836 enum barrier_type bar_t,
837 kmp_info_t *thr USE_ITT_BUILD_ARG(void *itt))
838 : kmp_flag_native<kmp_uint64>(p, flag_oncore), checker(c),
839 num_waiting_threads(0), offset(idx), flag_switch(false), bt(bar_t),
840 this_thr(thr) USE_ITT_BUILD_ARG(itt_sync_obj(itt)) {}
841 kmp_info_t *get_waiter(kmp_uint32 i) {
842 KMP_DEBUG_ASSERT(i < num_waiting_threads);
843 return waiting_threads[i];
844 }
845 kmp_uint32 get_num_waiters() { return num_waiting_threads; }
846 void set_waiter(kmp_info_t *thr) {
847 waiting_threads[0] = thr;
848 num_waiting_threads = 1;
849 }
850 bool done_check_val(kmp_uint64 old_loc) {
851 return byteref(&old_loc, offset) == checker;
852 }
853 bool done_check() { return done_check_val(*get()); }
854 bool notdone_check() {
855 // Calculate flag_switch
856 if (this_thr->th.th_bar[bt].bb.wait_flag == KMP_BARRIER_SWITCH_TO_OWN_FLAG)
857 flag_switch = true;
858 if (byteref(get(), offset) != 1 && !flag_switch)
859 return true;
860 else if (flag_switch) {
861 this_thr->th.th_bar[bt].bb.wait_flag = KMP_BARRIER_SWITCHING;
862 kmp_flag_64 flag(&this_thr->th.th_bar[bt].bb.b_go,
863 (kmp_uint64)KMP_BARRIER_STATE_BUMP);
864 __kmp_wait_64(this_thr, &flag, TRUE USE_ITT_BUILD_ARG(itt_sync_obj));
865 }
866 return false;
867 }
868 void internal_release() {
869 // Other threads can write their own bytes simultaneously.
870 if (__kmp_dflt_blocktime == KMP_MAX_BLOCKTIME) {
871 byteref(get(), offset) = 1;
872 } else {
873 kmp_uint64 mask = 0;
874 byteref(&mask, offset) = 1;
875 KMP_TEST_THEN_OR64(get(), mask);
876 }
877 }
878 kmp_uint64 set_sleeping() {
879 return KMP_TEST_THEN_OR64(get(), KMP_BARRIER_SLEEP_STATE);
880 }
881 kmp_uint64 unset_sleeping() {
882 return KMP_TEST_THEN_AND64(get(), ~KMP_BARRIER_SLEEP_STATE);
883 }
884 bool is_sleeping_val(kmp_uint64 old_loc) {
885 return old_loc & KMP_BARRIER_SLEEP_STATE;
886 }
887 bool is_sleeping() { return is_sleeping_val(*get()); }
888 bool is_any_sleeping() { return is_sleeping_val(*get()); }
889 void wait(kmp_info_t *this_thr, int final_spin) {
890 if (final_spin)
891 __kmp_wait_template<kmp_flag_oncore, TRUE>(
892 this_thr, this USE_ITT_BUILD_ARG(itt_sync_obj));
893 else
894 __kmp_wait_template<kmp_flag_oncore, FALSE>(
895 this_thr, this USE_ITT_BUILD_ARG(itt_sync_obj));
896 }
897 void release() { __kmp_release_template(this); }
898 void suspend(int th_gtid) { __kmp_suspend_oncore(th_gtid, this); }
899 void resume(int th_gtid) { __kmp_resume_oncore(th_gtid, this); }
900 int execute_tasks(kmp_info_t *this_thr, kmp_int32 gtid, int final_spin,
901 int *thread_finished USE_ITT_BUILD_ARG(void *itt_sync_obj),
902 kmp_int32 is_constrained) {
903 return __kmp_execute_tasks_oncore(
904 this_thr, gtid, this, final_spin,
905 thread_finished USE_ITT_BUILD_ARG(itt_sync_obj), is_constrained);
906 }
907 kmp_uint8 *get_stolen() { return NULL; }
908 enum barrier_type get_bt() { return bt; }
909 flag_type get_ptr_type() { return flag_oncore; }
910 };
911
912 // Used to wake up threads, volatile void* flag is usually the th_sleep_loc
913 // associated with int gtid.
914 static inline void __kmp_null_resume_wrapper(int gtid, volatile void *flag) {
915 if (!flag)
916 return;
917
918 switch (RCAST(kmp_flag_64 *, CCAST(void *, flag))->get_type()) {
919 case flag32:
920 __kmp_resume_32(gtid, NULL);
921 break;
922 case flag64:
923 __kmp_resume_64(gtid, NULL);
924 break;
925 case flag_oncore:
926 __kmp_resume_oncore(gtid, NULL);
927 break;
928 }
929 }
930
931 /*!
932 @}
933 */
934
935 #endif // KMP_WAIT_RELEASE_H
936