1 //===------------ sync.cu - GPU OpenMP synchronizations ---------- CUDA -*-===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 //
9 // Include all synchronization.
10 //
11 //===----------------------------------------------------------------------===//
12 #pragma omp declare target
13
14 #include "common/omptarget.h"
15 #include "target_impl.h"
16
17 ////////////////////////////////////////////////////////////////////////////////
18 // KMP Ordered calls
19 ////////////////////////////////////////////////////////////////////////////////
20
__kmpc_ordered(kmp_Ident * loc,int32_t tid)21 EXTERN void __kmpc_ordered(kmp_Ident *loc, int32_t tid) {
22 PRINT0(LD_IO, "call kmpc_ordered\n");
23 }
24
__kmpc_end_ordered(kmp_Ident * loc,int32_t tid)25 EXTERN void __kmpc_end_ordered(kmp_Ident *loc, int32_t tid) {
26 PRINT0(LD_IO, "call kmpc_end_ordered\n");
27 }
28
29 ////////////////////////////////////////////////////////////////////////////////
30 // KMP Barriers
31 ////////////////////////////////////////////////////////////////////////////////
32
33 // a team is a block: we can use CUDA native synchronization mechanism
34 // FIXME: what if not all threads (warps) participate to the barrier?
35 // We may need to implement it differently
36
__kmpc_cancel_barrier(kmp_Ident * loc_ref,int32_t tid)37 EXTERN int32_t __kmpc_cancel_barrier(kmp_Ident *loc_ref, int32_t tid) {
38 PRINT0(LD_IO, "call kmpc_cancel_barrier\n");
39 __kmpc_barrier(loc_ref, tid);
40 PRINT0(LD_SYNC, "completed kmpc_cancel_barrier\n");
41 return 0;
42 }
43
__kmpc_barrier(kmp_Ident * loc_ref,int32_t tid)44 EXTERN void __kmpc_barrier(kmp_Ident *loc_ref, int32_t tid) {
45 if (isRuntimeUninitialized()) {
46 ASSERT0(LT_FUSSY, __kmpc_is_spmd_exec_mode(),
47 "Expected SPMD mode with uninitialized runtime.");
48 __kmpc_barrier_simple_spmd(loc_ref, tid);
49 } else {
50 tid = GetLogicalThreadIdInBlock();
51 int numberOfActiveOMPThreads =
52 GetNumberOfOmpThreads(__kmpc_is_spmd_exec_mode());
53 if (numberOfActiveOMPThreads > 1) {
54 if (__kmpc_is_spmd_exec_mode()) {
55 __kmpc_barrier_simple_spmd(loc_ref, tid);
56 } else {
57 // The #threads parameter must be rounded up to the WARPSIZE.
58 int threads =
59 WARPSIZE * ((numberOfActiveOMPThreads + WARPSIZE - 1) / WARPSIZE);
60
61 PRINT(LD_SYNC,
62 "call kmpc_barrier with %d omp threads, sync parameter %d\n",
63 (int)numberOfActiveOMPThreads, (int)threads);
64 __kmpc_impl_named_sync(threads);
65 }
66 } else {
67 // Still need to flush the memory per the standard.
68 __kmpc_flush(loc_ref);
69 } // numberOfActiveOMPThreads > 1
70 PRINT0(LD_SYNC, "completed kmpc_barrier\n");
71 }
72 }
73
74 // Emit a simple barrier call in SPMD mode. Assumes the caller is in an L0
75 // parallel region and that all worker threads participate.
__kmpc_barrier_simple_spmd(kmp_Ident * loc_ref,int32_t tid)76 EXTERN void __kmpc_barrier_simple_spmd(kmp_Ident *loc_ref, int32_t tid) {
77 PRINT0(LD_SYNC, "call kmpc_barrier_simple_spmd\n");
78 __kmpc_impl_syncthreads();
79 PRINT0(LD_SYNC, "completed kmpc_barrier_simple_spmd\n");
80 }
81
82 ////////////////////////////////////////////////////////////////////////////////
83 // KMP MASTER
84 ////////////////////////////////////////////////////////////////////////////////
85
__kmpc_master(kmp_Ident * loc,int32_t global_tid)86 EXTERN int32_t __kmpc_master(kmp_Ident *loc, int32_t global_tid) {
87 PRINT0(LD_IO, "call kmpc_master\n");
88 return IsTeamMaster(global_tid);
89 }
90
__kmpc_end_master(kmp_Ident * loc,int32_t global_tid)91 EXTERN void __kmpc_end_master(kmp_Ident *loc, int32_t global_tid) {
92 PRINT0(LD_IO, "call kmpc_end_master\n");
93 ASSERT0(LT_FUSSY, IsTeamMaster(global_tid), "expected only master here");
94 }
95
96 ////////////////////////////////////////////////////////////////////////////////
97 // KMP SINGLE
98 ////////////////////////////////////////////////////////////////////////////////
99
__kmpc_single(kmp_Ident * loc,int32_t global_tid)100 EXTERN int32_t __kmpc_single(kmp_Ident *loc, int32_t global_tid) {
101 PRINT0(LD_IO, "call kmpc_single\n");
102 // decide to implement single with master; master get the single
103 return IsTeamMaster(global_tid);
104 }
105
__kmpc_end_single(kmp_Ident * loc,int32_t global_tid)106 EXTERN void __kmpc_end_single(kmp_Ident *loc, int32_t global_tid) {
107 PRINT0(LD_IO, "call kmpc_end_single\n");
108 // decide to implement single with master: master get the single
109 ASSERT0(LT_FUSSY, IsTeamMaster(global_tid), "expected only master here");
110 // sync barrier is explicitly called... so that is not a problem
111 }
112
113 ////////////////////////////////////////////////////////////////////////////////
114 // Flush
115 ////////////////////////////////////////////////////////////////////////////////
116
__kmpc_flush(kmp_Ident * loc)117 EXTERN void __kmpc_flush(kmp_Ident *loc) {
118 PRINT0(LD_IO, "call kmpc_flush\n");
119 __kmpc_impl_threadfence();
120 }
121
122 ////////////////////////////////////////////////////////////////////////////////
123 // Vote
124 ////////////////////////////////////////////////////////////////////////////////
125
__kmpc_warp_active_thread_mask()126 EXTERN __kmpc_impl_lanemask_t __kmpc_warp_active_thread_mask() {
127 PRINT0(LD_IO, "call __kmpc_warp_active_thread_mask\n");
128 return __kmpc_impl_activemask();
129 }
130
131 ////////////////////////////////////////////////////////////////////////////////
132 // Syncwarp
133 ////////////////////////////////////////////////////////////////////////////////
134
__kmpc_syncwarp(__kmpc_impl_lanemask_t Mask)135 EXTERN void __kmpc_syncwarp(__kmpc_impl_lanemask_t Mask) {
136 PRINT0(LD_IO, "call __kmpc_syncwarp\n");
137 __kmpc_impl_syncwarp(Mask);
138 }
139
140 #pragma omp end declare target
141