1 //===------------ sync.cu - GPU OpenMP synchronizations ---------- CUDA -*-===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 //
9 // Include all synchronization.
10 //
11 //===----------------------------------------------------------------------===//
12 #pragma omp declare target
13 
14 #include "common/omptarget.h"
15 #include "target_impl.h"
16 
17 ////////////////////////////////////////////////////////////////////////////////
18 // KMP Ordered calls
19 ////////////////////////////////////////////////////////////////////////////////
20 
__kmpc_ordered(kmp_Ident * loc,int32_t tid)21 EXTERN void __kmpc_ordered(kmp_Ident *loc, int32_t tid) {
22   PRINT0(LD_IO, "call kmpc_ordered\n");
23 }
24 
__kmpc_end_ordered(kmp_Ident * loc,int32_t tid)25 EXTERN void __kmpc_end_ordered(kmp_Ident *loc, int32_t tid) {
26   PRINT0(LD_IO, "call kmpc_end_ordered\n");
27 }
28 
29 ////////////////////////////////////////////////////////////////////////////////
30 // KMP Barriers
31 ////////////////////////////////////////////////////////////////////////////////
32 
33 // a team is a block: we can use CUDA native synchronization mechanism
34 // FIXME: what if not all threads (warps) participate to the barrier?
35 // We may need to implement it differently
36 
__kmpc_cancel_barrier(kmp_Ident * loc_ref,int32_t tid)37 EXTERN int32_t __kmpc_cancel_barrier(kmp_Ident *loc_ref, int32_t tid) {
38   PRINT0(LD_IO, "call kmpc_cancel_barrier\n");
39   __kmpc_barrier(loc_ref, tid);
40   PRINT0(LD_SYNC, "completed kmpc_cancel_barrier\n");
41   return 0;
42 }
43 
__kmpc_barrier(kmp_Ident * loc_ref,int32_t tid)44 EXTERN void __kmpc_barrier(kmp_Ident *loc_ref, int32_t tid) {
45   if (isRuntimeUninitialized()) {
46     ASSERT0(LT_FUSSY, __kmpc_is_spmd_exec_mode(),
47             "Expected SPMD mode with uninitialized runtime.");
48     __kmpc_barrier_simple_spmd(loc_ref, tid);
49   } else {
50     tid = GetLogicalThreadIdInBlock();
51     int numberOfActiveOMPThreads =
52         GetNumberOfOmpThreads(__kmpc_is_spmd_exec_mode());
53     if (numberOfActiveOMPThreads > 1) {
54       if (__kmpc_is_spmd_exec_mode()) {
55         __kmpc_barrier_simple_spmd(loc_ref, tid);
56       } else {
57         // The #threads parameter must be rounded up to the WARPSIZE.
58         int threads =
59             WARPSIZE * ((numberOfActiveOMPThreads + WARPSIZE - 1) / WARPSIZE);
60 
61         PRINT(LD_SYNC,
62               "call kmpc_barrier with %d omp threads, sync parameter %d\n",
63               (int)numberOfActiveOMPThreads, (int)threads);
64         __kmpc_impl_named_sync(threads);
65       }
66     } else {
67       // Still need to flush the memory per the standard.
68       __kmpc_flush(loc_ref);
69     } // numberOfActiveOMPThreads > 1
70     PRINT0(LD_SYNC, "completed kmpc_barrier\n");
71   }
72 }
73 
74 // Emit a simple barrier call in SPMD mode.  Assumes the caller is in an L0
75 // parallel region and that all worker threads participate.
__kmpc_barrier_simple_spmd(kmp_Ident * loc_ref,int32_t tid)76 EXTERN void __kmpc_barrier_simple_spmd(kmp_Ident *loc_ref, int32_t tid) {
77   PRINT0(LD_SYNC, "call kmpc_barrier_simple_spmd\n");
78   __kmpc_impl_syncthreads();
79   PRINT0(LD_SYNC, "completed kmpc_barrier_simple_spmd\n");
80 }
81 
82 ////////////////////////////////////////////////////////////////////////////////
83 // KMP MASTER
84 ////////////////////////////////////////////////////////////////////////////////
85 
__kmpc_master(kmp_Ident * loc,int32_t global_tid)86 EXTERN int32_t __kmpc_master(kmp_Ident *loc, int32_t global_tid) {
87   PRINT0(LD_IO, "call kmpc_master\n");
88   return IsTeamMaster(global_tid);
89 }
90 
__kmpc_end_master(kmp_Ident * loc,int32_t global_tid)91 EXTERN void __kmpc_end_master(kmp_Ident *loc, int32_t global_tid) {
92   PRINT0(LD_IO, "call kmpc_end_master\n");
93   ASSERT0(LT_FUSSY, IsTeamMaster(global_tid), "expected only master here");
94 }
95 
96 ////////////////////////////////////////////////////////////////////////////////
97 // KMP SINGLE
98 ////////////////////////////////////////////////////////////////////////////////
99 
__kmpc_single(kmp_Ident * loc,int32_t global_tid)100 EXTERN int32_t __kmpc_single(kmp_Ident *loc, int32_t global_tid) {
101   PRINT0(LD_IO, "call kmpc_single\n");
102   // decide to implement single with master; master get the single
103   return IsTeamMaster(global_tid);
104 }
105 
__kmpc_end_single(kmp_Ident * loc,int32_t global_tid)106 EXTERN void __kmpc_end_single(kmp_Ident *loc, int32_t global_tid) {
107   PRINT0(LD_IO, "call kmpc_end_single\n");
108   // decide to implement single with master: master get the single
109   ASSERT0(LT_FUSSY, IsTeamMaster(global_tid), "expected only master here");
110   // sync barrier is explicitly called... so that is not a problem
111 }
112 
113 ////////////////////////////////////////////////////////////////////////////////
114 // Flush
115 ////////////////////////////////////////////////////////////////////////////////
116 
__kmpc_flush(kmp_Ident * loc)117 EXTERN void __kmpc_flush(kmp_Ident *loc) {
118   PRINT0(LD_IO, "call kmpc_flush\n");
119   __kmpc_impl_threadfence();
120 }
121 
122 ////////////////////////////////////////////////////////////////////////////////
123 // Vote
124 ////////////////////////////////////////////////////////////////////////////////
125 
__kmpc_warp_active_thread_mask()126 EXTERN __kmpc_impl_lanemask_t __kmpc_warp_active_thread_mask() {
127   PRINT0(LD_IO, "call __kmpc_warp_active_thread_mask\n");
128   return __kmpc_impl_activemask();
129 }
130 
131 ////////////////////////////////////////////////////////////////////////////////
132 // Syncwarp
133 ////////////////////////////////////////////////////////////////////////////////
134 
__kmpc_syncwarp(__kmpc_impl_lanemask_t Mask)135 EXTERN void __kmpc_syncwarp(__kmpc_impl_lanemask_t Mask) {
136   PRINT0(LD_IO, "call __kmpc_syncwarp\n");
137   __kmpc_impl_syncwarp(Mask);
138 }
139 
140 #pragma omp end declare target
141